1/****************************************************************************
2**
3** Copyright (C) 2017 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtNetwork module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qhsts_p.h"
41
42#include "QtCore/private/qipaddress_p.h"
43#include "QtCore/qlist.h"
44
45#if QT_CONFIG(settings)
46#include "qhstsstore_p.h"
47#endif // QT_CONFIG(settings)
48
49QT_BEGIN_NAMESPACE
50
51static bool is_valid_domain_name(const QString &host)
52{
53 if (!host.size())
54 return false;
55
56 // RFC6797 8.1.1
57 // If the substring matching the host production from the Request-URI
58 // (of the message to which the host responded) syntactically matches
59 //the IP-literal or IPv4address productions from Section 3.2.2 of
60 //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
61 using namespace QIPAddressUtils;
62
63 IPv4Address ipv4Addr = {};
64 if (parseIp4(ipv4Addr, host.constBegin(), host.constEnd()))
65 return false;
66
67 IPv6Address ipv6Addr = {};
68 // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
69 // address successfully.
70 if (!parseIp6(ipv6Addr, host.constBegin(), host.constEnd()))
71 return false;
72
73 // TODO: for now we do not test IPvFuture address, it must be addressed
74 // by introducing parseIpFuture (actually, there is an implementation
75 // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
76 return true;
77}
78
79void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
80 const QUrl &url)
81{
82 if (!url.isValid())
83 return;
84
85 QHstsHeaderParser parser;
86 if (parser.parse(headers)) {
87 updateKnownHost(url.host(), parser.expirationDate(), parser.includeSubDomains());
88#if QT_CONFIG(settings)
89 if (hstsStore)
90 hstsStore->synchronize();
91#endif // QT_CONFIG(settings)
92 }
93}
94
95void QHstsCache::updateFromPolicies(const QList<QHstsPolicy> &policies)
96{
97 for (const auto &policy : policies)
98 updateKnownHost(policy.host(), policy.expiry(), policy.includesSubDomains());
99
100#if QT_CONFIG(settings)
101 if (hstsStore && policies.size()) {
102 // These policies are coming either from store or from QNAM's setter
103 // function. As a result we can notice expired or new policies, time
104 // to sync ...
105 hstsStore->synchronize();
106 }
107#endif // QT_CONFIG(settings)
108}
109
110void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires,
111 bool includeSubDomains)
112{
113 if (!url.isValid())
114 return;
115
116 updateKnownHost(url.host(), expires, includeSubDomains);
117#if QT_CONFIG(settings)
118 if (hstsStore)
119 hstsStore->synchronize();
120#endif // QT_CONFIG(settings)
121}
122
123void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires,
124 bool includeSubDomains)
125{
126 if (!is_valid_domain_name(host))
127 return;
128
129 // HSTS is a per-host policy, regardless of protocol, port or any of the other
130 // details in an URL; so we only want the host part. QUrl::host handles
131 // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10).
132 const HostName hostName(host);
133 const auto pos = knownHosts.find(hostName);
134 QHstsPolicy::PolicyFlags flags;
135 if (includeSubDomains)
136 flags = QHstsPolicy::IncludeSubDomains;
137
138 const QHstsPolicy newPolicy(expires, flags, hostName.name);
139 if (pos == knownHosts.end()) {
140 // A new, previously unknown host.
141 if (newPolicy.isExpired()) {
142 // Nothing to do at all - we did not know this host previously,
143 // we do not have to - since its policy expired.
144 return;
145 }
146
147 knownHosts.insert({hostName, newPolicy});
148#if QT_CONFIG(settings)
149 if (hstsStore)
150 hstsStore->addToObserved(newPolicy);
151#endif // QT_CONFIG(settings)
152 return;
153 }
154
155 if (newPolicy.isExpired())
156 knownHosts.erase(pos);
157 else if (pos->second != newPolicy)
158 pos->second = newPolicy;
159 else
160 return;
161
162#if QT_CONFIG(settings)
163 if (hstsStore)
164 hstsStore->addToObserved(newPolicy);
165#endif // QT_CONFIG(settings)
166}
167
168bool QHstsCache::isKnownHost(const QUrl &url) const
169{
170 if (!url.isValid() || !is_valid_domain_name(url.host()))
171 return false;
172
173 /*
174 RFC6797, 8.2. Known HSTS Host Domain Name Matching
175
176 * Superdomain Match
177 If a label-for-label match between an entire Known HSTS Host's
178 domain name and a right-hand portion of the given domain name
179 is found, then this Known HSTS Host's domain name is a
180 superdomain match for the given domain name. There could be
181 multiple superdomain matches for a given domain name.
182 * Congruent Match
183 If a label-for-label match between a Known HSTS Host's domain
184 name and the given domain name is found -- i.e., there are no
185 further labels to compare -- then the given domain name
186 congruently matches this Known HSTS Host.
187
188 We start from the congruent match, and then chop labels and dots and
189 proceed with superdomain match. While RFC6797 recommends to start from
190 superdomain, the result is the same - some valid policy will make a host
191 known.
192 */
193
194 bool superDomainMatch = false;
195 const QString hostNameAsString(url.host());
196 HostName nameToTest(QStringView{hostNameAsString});
197 while (nameToTest.fragment.size()) {
198 auto const pos = knownHosts.find(nameToTest);
199 if (pos != knownHosts.end()) {
200 if (pos->second.isExpired()) {
201 knownHosts.erase(pos);
202#if QT_CONFIG(settings)
203 if (hstsStore) {
204 // Inform our store that this policy has expired.
205 hstsStore->addToObserved(pos->second);
206 }
207#endif // QT_CONFIG(settings)
208 } else if (!superDomainMatch || pos->second.includesSubDomains()) {
209 return true;
210 }
211 }
212
213 const int dot = nameToTest.fragment.indexOf(QLatin1Char('.'));
214 if (dot == -1)
215 break;
216
217 nameToTest.fragment = nameToTest.fragment.mid(dot + 1);
218 superDomainMatch = true;
219 }
220
221 return false;
222}
223
224void QHstsCache::clear()
225{
226 knownHosts.clear();
227}
228
229QList<QHstsPolicy> QHstsCache::policies() const
230{
231 QList<QHstsPolicy> values;
232 values.reserve(int(knownHosts.size()));
233 for (const auto &host : knownHosts)
234 values << host.second;
235 return values;
236}
237
238#if QT_CONFIG(settings)
239void QHstsCache::setStore(QHstsStore *store)
240{
241 // Caller retains ownership of store, which must outlive this cache.
242 if (store != hstsStore) {
243 hstsStore = store;
244
245 if (!hstsStore)
246 return;
247
248 // First we augment our store with the policies we already know about
249 // (and thus the cached policy takes priority over whatever policy we
250 // had in the store for the same host, if any).
251 if (knownHosts.size()) {
252 const QList<QHstsPolicy> observed(policies());
253 for (const auto &policy : observed)
254 hstsStore->addToObserved(policy);
255 hstsStore->synchronize();
256 }
257
258 // Now we update the cache with anything we have not observed yet, but
259 // the store knows about (well, it can happen we synchronize again as a
260 // result if some policies managed to expire or if we add a new one
261 // from the store to cache):
262 const QList<QHstsPolicy> restored(store->readPolicies());
263 updateFromPolicies(restored);
264 }
265}
266#endif // QT_CONFIG(settings)
267
268// The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
269// are valid and it will return false if something else was found; then
270// we immediately stop parsing. 'parseDirective' knows how these tokens can
271// be combined into a valid directive and if some weird combination of
272// valid tokens is found - we immediately stop.
273// And finally we call parseDirective again and again until some error found or
274// we have no more bytes in the header.
275
276// The following isXXX functions are based on RFC2616, 2.2 Basic Rules.
277
278static bool isCHAR(int c)
279{
280 // CHAR = <any US-ASCII character (octets 0 - 127)>
281 return c >= 0 && c <= 127;
282}
283
284static bool isCTL(int c)
285{
286 // CTL = <any US-ASCII control character
287 // (octets 0 - 31) and DEL (127)>
288 return (c >= 0 && c <= 31) || c == 127;
289}
290
291
292static bool isLWS(int c)
293{
294 // LWS = [CRLF] 1*( SP | HT )
295 //
296 // CRLF = CR LF
297 // CR = <US-ASCII CR, carriage return (13)>
298 // LF = <US-ASCII LF, linefeed (10)>
299 // SP = <US-ASCII SP, space (32)>
300 // HT = <US-ASCII HT, horizontal-tab (9)>
301 //
302 // CRLF is handled by the time we parse a header (they were replaced with
303 // spaces). We only have to deal with remaining SP|HT
304 return c == ' ' || c == '\t';
305}
306
307static bool isTEXT(char c)
308{
309 // TEXT = <any OCTET except CTLs,
310 // but including LWS>
311 return !isCTL(c) || isLWS(c);
312}
313
314static bool isSeparator(char c)
315{
316 // separators = "(" | ")" | "<" | ">" | "@"
317 // | "," | ";" | ":" | "\" | <">
318 // | "/" | "[" | "]" | "?" | "="
319 // | "{" | "}" | SP | HT
320 static const char separators[] = "()<>@,;:\\\"/[]?={}";
321 static const char *end = separators + sizeof separators - 1;
322 return isLWS(c) || std::find(separators, end, c) != end;
323}
324
325static QByteArray unescapeMaxAge(const QByteArray &value)
326{
327 if (value.size() < 2 || value[0] != '"')
328 return value;
329
330 Q_ASSERT(value[value.size() - 1] == '"');
331 return value.mid(1, value.size() - 2);
332}
333
334static bool isTOKEN(char c)
335{
336 // token = 1*<any CHAR except CTLs or separators>
337 return isCHAR(c) && !isCTL(c) && !isSeparator(c);
338}
339
340/*
341
342RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
343Syntax:
344
345Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
346 [ directive ] *( ";" [ directive ] )
347
348directive = directive-name [ "=" directive-value ]
349directive-name = token
350directive-value = token | quoted-string
351
352RFC 2616, 2.2 Basic Rules.
353
354token = 1*<any CHAR except CTLs or separators>
355quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
356
357
358qdtext = <any TEXT except <">>
359quoted-pair = "\" CHAR
360
361*/
362
363bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
364{
365 for (const auto &h : headers) {
366 // We use '==' since header name was already 'trimmed' for us:
367 if (h.first == "Strict-Transport-Security") {
368 header = h.second;
369 // RFC6797, 8.1:
370 //
371 // The UA MUST ignore any STS header fields not conforming to the
372 // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
373 // Response Header Field").
374 //
375 // If a UA receives more than one STS header field in an HTTP
376 // response message over secure transport, then the UA MUST process
377 // only the first such header field.
378 //
379 // We read this as: ignore all invalid headers and take the first valid:
380 if (parseSTSHeader() && maxAgeFound) {
381 expiry = QDateTime::currentDateTimeUtc().addSecs(maxAge);
382 return true;
383 }
384 }
385 }
386
387 // In case it was set by a syntactically correct header (but without
388 // REQUIRED max-age directive):
389 subDomainsFound = false;
390
391 return false;
392}
393
394bool QHstsHeaderParser::parseSTSHeader()
395{
396 expiry = QDateTime();
397 maxAgeFound = false;
398 subDomainsFound = false;
399 maxAge = 0;
400 tokenPos = 0;
401 token.clear();
402
403 while (tokenPos < header.size()) {
404 if (!parseDirective())
405 return false;
406
407 if (token.size() && token != ";") {
408 // After a directive we can only have a ";" or no more tokens.
409 // Invalid syntax.
410 return false;
411 }
412 }
413
414 return true;
415}
416
417bool QHstsHeaderParser::parseDirective()
418{
419 // RFC 6797, 6.1:
420 //
421 // directive = directive-name [ "=" directive-value ]
422 // directive-name = token
423 // directive-value = token | quoted-string
424
425
426 // RFC 2616, 2.2:
427 //
428 // token = 1*<any CHAR except CTLs or separators>
429
430 if (!nextToken())
431 return false;
432
433 if (!token.size()) // No more data, but no error.
434 return true;
435
436 if (token == ";") // That's a weird grammar, but that's what it is.
437 return true;
438
439 if (!isTOKEN(token[0])) // Not a valid directive-name.
440 return false;
441
442 const QByteArray directiveName = token;
443 // 2. Try to read "=" or ";".
444 if (!nextToken())
445 return false;
446
447 QByteArray directiveValue;
448 if (token == ";") // No directive-value
449 return processDirective(directiveName, directiveValue);
450
451 if (token == "=") {
452 // We expect a directive-value now:
453 if (!nextToken() || !token.size())
454 return false;
455 directiveValue = token;
456 } else if (token.size()) {
457 // Invalid syntax:
458 return false;
459 }
460
461 if (!processDirective(directiveName, directiveValue))
462 return false;
463
464 // Read either ";", or 'end of header', or some invalid token.
465 return nextToken();
466}
467
468bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
469{
470 Q_ASSERT(name.size());
471 // RFC6797 6.1/3 Directive names are case-insensitive
472 if (name.compare("max-age", Qt::CaseInsensitive) == 0) {
473 // RFC 6797, 6.1.1
474 // The syntax of the max-age directive's REQUIRED value (after
475 // quoted-string unescaping, if necessary) is defined as:
476 //
477 // max-age-value = delta-seconds
478 if (maxAgeFound) {
479 // RFC 6797, 6.1/2:
480 // All directives MUST appear only once in an STS header field.
481 return false;
482 }
483
484 const QByteArray unescapedValue = unescapeMaxAge(value);
485 if (!unescapedValue.size())
486 return false;
487
488 bool ok = false;
489 const qint64 age = unescapedValue.toLongLong(&ok);
490 if (!ok || age < 0)
491 return false;
492
493 maxAge = age;
494 maxAgeFound = true;
495 } else if (name.compare("includesubdomains", Qt::CaseInsensitive) == 0) {
496 // RFC 6797, 6.1.2. The includeSubDomains Directive.
497 // The OPTIONAL "includeSubDomains" directive is a valueless directive.
498
499 if (subDomainsFound) {
500 // RFC 6797, 6.1/2:
501 // All directives MUST appear only once in an STS header field.
502 return false;
503 }
504
505 subDomainsFound = true;
506 } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)
507
508 return true;
509}
510
511bool QHstsHeaderParser::nextToken()
512{
513 // Returns true if we found a valid token or we have no more data (token is
514 // empty then).
515
516 token.clear();
517
518 // Fortunately enough, by this point qhttpnetworkreply already got rid of
519 // [CRLF] parts, but we can have 1*(SP|HT) yet.
520 while (tokenPos < header.size() && isLWS(header[tokenPos]))
521 ++tokenPos;
522
523 if (tokenPos == header.size())
524 return true;
525
526 const char ch = header[tokenPos];
527 if (ch == ';' || ch == '=') {
528 token.append(ch);
529 ++tokenPos;
530 return true;
531 }
532
533 // RFC 2616, 2.2.
534 //
535 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
536 // qdtext = <any TEXT except <">>
537 if (ch == '"') {
538 int last = tokenPos + 1;
539 while (last < header.size()) {
540 if (header[last] == '"') {
541 // The end of a quoted-string.
542 break;
543 } else if (header[last] == '\\') {
544 // quoted-pair = "\" CHAR
545 if (last + 1 < header.size() && isCHAR(header[last + 1]))
546 last += 2;
547 else
548 return false;
549 } else {
550 if (!isTEXT(header[last]))
551 return false;
552 ++last;
553 }
554 }
555
556 if (last >= header.size()) // no closing '"':
557 return false;
558
559 token = header.mid(tokenPos, last - tokenPos + 1);
560 tokenPos = last + 1;
561 return true;
562 }
563
564 // RFC 2616, 2.2:
565 //
566 // token = 1*<any CHAR except CTLs or separators>
567 if (!isTOKEN(ch))
568 return false;
569
570 int last = tokenPos + 1;
571 while (last < header.size() && isTOKEN(header[last]))
572 ++last;
573
574 token = header.mid(tokenPos, last - tokenPos);
575 tokenPos = last;
576
577 return true;
578}
579
580QT_END_NAMESPACE
581