1/****************************************************************************
2**
3** Copyright (C) 2016 Intel Corporation.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qurl.h"
41#include "private/qstringconverter_p.h"
42#include "private/qtools_p.h"
43#include "private/qsimd_p.h"
44
45QT_BEGIN_NAMESPACE
46
47// ### move to qurl_p.h
48enum EncodingAction {
49 DecodeCharacter = 0,
50 LeaveCharacter = 1,
51 EncodeCharacter = 2
52};
53
54// From RFC 3896, Appendix A Collected ABNF for URI
55// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
56// reserved = gen-delims / sub-delims
57// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
58// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
59// / "*" / "+" / "," / ";" / "="
60static const uchar defaultActionTable[96] = {
61 2, // space
62 1, // '!' (sub-delim)
63 2, // '"'
64 1, // '#' (gen-delim)
65 1, // '$' (gen-delim)
66 2, // '%' (percent)
67 1, // '&' (gen-delim)
68 1, // "'" (sub-delim)
69 1, // '(' (sub-delim)
70 1, // ')' (sub-delim)
71 1, // '*' (sub-delim)
72 1, // '+' (sub-delim)
73 1, // ',' (sub-delim)
74 0, // '-' (unreserved)
75 0, // '.' (unreserved)
76 1, // '/' (gen-delim)
77
78 0, 0, 0, 0, 0, // '0' to '4' (unreserved)
79 0, 0, 0, 0, 0, // '5' to '9' (unreserved)
80 1, // ':' (gen-delim)
81 1, // ';' (sub-delim)
82 2, // '<'
83 1, // '=' (sub-delim)
84 2, // '>'
85 1, // '?' (gen-delim)
86
87 1, // '@' (gen-delim)
88 0, 0, 0, 0, 0, // 'A' to 'E' (unreserved)
89 0, 0, 0, 0, 0, // 'F' to 'J' (unreserved)
90 0, 0, 0, 0, 0, // 'K' to 'O' (unreserved)
91 0, 0, 0, 0, 0, // 'P' to 'T' (unreserved)
92 0, 0, 0, 0, 0, 0, // 'U' to 'Z' (unreserved)
93 1, // '[' (gen-delim)
94 2, // '\'
95 1, // ']' (gen-delim)
96 2, // '^'
97 0, // '_' (unreserved)
98
99 2, // '`'
100 0, 0, 0, 0, 0, // 'a' to 'e' (unreserved)
101 0, 0, 0, 0, 0, // 'f' to 'j' (unreserved)
102 0, 0, 0, 0, 0, // 'k' to 'o' (unreserved)
103 0, 0, 0, 0, 0, // 'p' to 't' (unreserved)
104 0, 0, 0, 0, 0, 0, // 'u' to 'z' (unreserved)
105 2, // '{'
106 2, // '|'
107 2, // '}'
108 0, // '~' (unreserved)
109
110 2 // BSKP
111};
112
113// mask tables, in negative polarity
114// 0x00 if it belongs to this category
115// 0xff if it doesn't
116
117static const uchar reservedMask[96] = {
118 0xff, // space
119 0xff, // '!' (sub-delim)
120 0x00, // '"'
121 0xff, // '#' (gen-delim)
122 0xff, // '$' (gen-delim)
123 0xff, // '%' (percent)
124 0xff, // '&' (gen-delim)
125 0xff, // "'" (sub-delim)
126 0xff, // '(' (sub-delim)
127 0xff, // ')' (sub-delim)
128 0xff, // '*' (sub-delim)
129 0xff, // '+' (sub-delim)
130 0xff, // ',' (sub-delim)
131 0xff, // '-' (unreserved)
132 0xff, // '.' (unreserved)
133 0xff, // '/' (gen-delim)
134
135 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved)
136 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved)
137 0xff, // ':' (gen-delim)
138 0xff, // ';' (sub-delim)
139 0x00, // '<'
140 0xff, // '=' (sub-delim)
141 0x00, // '>'
142 0xff, // '?' (gen-delim)
143
144 0xff, // '@' (gen-delim)
145 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved)
146 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved)
147 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved)
148 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved)
149 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved)
150 0xff, // '[' (gen-delim)
151 0x00, // '\'
152 0xff, // ']' (gen-delim)
153 0x00, // '^'
154 0xff, // '_' (unreserved)
155
156 0x00, // '`'
157 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved)
158 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved)
159 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved)
160 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved)
161 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved)
162 0x00, // '{'
163 0x00, // '|'
164 0x00, // '}'
165 0xff, // '~' (unreserved)
166
167 0xff // BSKP
168};
169
170static inline bool isHex(ushort c)
171{
172 return (c >= 'a' && c <= 'f') ||
173 (c >= 'A' && c <= 'F') ||
174 (c >= '0' && c <= '9');
175}
176
177static inline bool isUpperHex(ushort c)
178{
179 // undefined behaviour if c isn't an hex char!
180 return c < 0x60;
181}
182
183static inline ushort toUpperHex(ushort c)
184{
185 return isUpperHex(c) ? c : c - 0x20;
186}
187
188static inline ushort decodeNibble(ushort c)
189{
190 return c >= 'a' ? c - 'a' + 0xA :
191 c >= 'A' ? c - 'A' + 0xA : c - '0';
192}
193
194// if the sequence at input is 2*HEXDIG, returns its decoding
195// returns -1 if it isn't.
196// assumes that the range has been checked already
197static inline ushort decodePercentEncoding(const ushort *input)
198{
199 ushort c1 = input[1];
200 ushort c2 = input[2];
201 if (!isHex(c1) || !isHex(c2))
202 return ushort(-1);
203 return decodeNibble(c1) << 4 | decodeNibble(c2);
204}
205
206static inline ushort encodeNibble(ushort c)
207{
208 return ushort(QtMiscUtils::toHexUpper(c));
209}
210
211static void ensureDetached(QString &result, ushort *&output, const ushort *begin, const ushort *input, const ushort *end,
212 int add = 0)
213{
214 if (!output) {
215 // now detach
216 // create enough space if the rest of the string needed to be percent-encoded
217 int charsProcessed = input - begin;
218 int charsRemaining = end - input;
219 int spaceNeeded = end - begin + 2 * charsRemaining + add;
220 int origSize = result.size();
221 result.resize(origSize + spaceNeeded);
222
223 // we know that resize() above detached, so we bypass the reference count check
224 output = const_cast<ushort *>(reinterpret_cast<const ushort *>(result.constData()))
225 + origSize;
226
227 // copy the chars we've already processed
228 int i;
229 for (i = 0; i < charsProcessed; ++i)
230 output[i] = begin[i];
231 output += i;
232 }
233}
234
235namespace {
236struct QUrlUtf8Traits : public QUtf8BaseTraitsNoAscii
237{
238 // From RFC 3987:
239 // iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
240 //
241 // ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
242 // / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
243 // / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
244 // / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
245 // / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
246 // / %xD0000-DFFFD / %xE1000-EFFFD
247 //
248 // iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
249 //
250 // That RFC allows iprivate only as part of iquery, but we don't know here
251 // whether we're looking at a query or another part of an URI, so we accept
252 // them too. The definition above excludes U+FFF0 to U+FFFD from appearing
253 // unencoded, but we see no reason for its exclusion, so we allow them to
254 // be decoded (and we need U+FFFD the replacement character to indicate
255 // failure to decode).
256 //
257 // That means we must disallow:
258 // * unpaired surrogates (QUtf8Functions takes care of that for us)
259 // * non-characters
260 static const bool allowNonCharacters = false;
261
262 // override: our "bytes" are three percent-encoded UTF-16 characters
263 static void appendByte(ushort *&ptr, uchar b)
264 {
265 // b >= 0x80, by construction, so percent-encode
266 *ptr++ = '%';
267 *ptr++ = encodeNibble(b >> 4);
268 *ptr++ = encodeNibble(b & 0xf);
269 }
270
271 static uchar peekByte(const ushort *ptr, qsizetype n = 0)
272 {
273 // decodePercentEncoding returns ushort(-1) if it can't decode,
274 // which means we return 0xff, which is not a valid continuation byte.
275 // If ptr[i * 3] is not '%', we'll multiply by zero and return 0,
276 // also not a valid continuation byte (if it's '%', we multiply by 1).
277 return uchar(decodePercentEncoding(ptr + n * 3))
278 * uchar(ptr[n * 3] == '%');
279 }
280
281 static qptrdiff availableBytes(const ushort *ptr, const ushort *end)
282 {
283 return (end - ptr) / 3;
284 }
285
286 static void advanceByte(const ushort *&ptr, int n = 1)
287 {
288 ptr += n * 3;
289 }
290};
291}
292
293// returns true if we performed an UTF-8 decoding
294static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *begin, const ushort *&input,
295 const ushort *end, ushort decoded)
296{
297 uint ucs4 = 0, *dst = &ucs4;
298 const ushort *src = input + 3;// skip the %XX that yielded \a decoded
299 int charsNeeded = QUtf8Functions::fromUtf8<QUrlUtf8Traits>(decoded, dst, src, end);
300 if (charsNeeded < 0)
301 return false;
302
303 if (!QChar::requiresSurrogates(ucs4)) {
304 // UTF-8 decoded and no surrogates are required
305 // detach if necessary
306 // possibilities are: 6 chars (%XX%XX) -> one char; 9 chars (%XX%XX%XX) -> one char
307 ensureDetached(result, output, begin, input, end, -3 * charsNeeded + 1);
308 *output++ = ucs4;
309 } else {
310 // UTF-8 decoded to something that requires a surrogate pair
311 // compressing from %XX%XX%XX%XX (12 chars) to two
312 ensureDetached(result, output, begin, input, end, -10);
313 *output++ = QChar::highSurrogate(ucs4);
314 *output++ = QChar::lowSurrogate(ucs4);
315 }
316
317 input = src - 1;
318 return true;
319}
320
321static void unicodeToEncodedUtf8(QString &result, ushort *&output, const ushort *begin,
322 const ushort *&input, const ushort *end, ushort decoded)
323{
324 // calculate the utf8 length and ensure enough space is available
325 int utf8len = QChar::isHighSurrogate(decoded) ? 4 : decoded >= 0x800 ? 3 : 2;
326
327 // detach
328 if (!output) {
329 // we need 3 * utf8len for the encoded UTF-8 sequence
330 // but ensureDetached already adds 3 for the char we're processing
331 ensureDetached(result, output, begin, input, end, 3*utf8len - 3);
332 } else {
333 // verify that there's enough space or expand
334 int charsRemaining = end - input - 1; // not including this one
335 int pos = output - reinterpret_cast<const ushort *>(result.constData());
336 int spaceRemaining = result.size() - pos;
337 if (spaceRemaining < 3*charsRemaining + 3*utf8len) {
338 // must resize
339 result.resize(result.size() + 3*utf8len);
340
341 // we know that resize() above detached, so we bypass the reference count check
342 output = const_cast<ushort *>(reinterpret_cast<const ushort *>(result.constData()));
343 output += pos;
344 }
345 }
346
347 ++input;
348 int res = QUtf8Functions::toUtf8<QUrlUtf8Traits>(decoded, output, input, end);
349 --input;
350 if (res < 0) {
351 // bad surrogate pair sequence
352 // we will encode bad UTF-16 to UTF-8
353 // but they don't get decoded back
354
355 // first of three bytes
356 uchar c = 0xe0 | uchar(decoded >> 12);
357 *output++ = '%';
358 *output++ = 'E';
359 *output++ = encodeNibble(c & 0xf);
360
361 // second byte
362 c = 0x80 | (uchar(decoded >> 6) & 0x3f);
363 *output++ = '%';
364 *output++ = encodeNibble(c >> 4);
365 *output++ = encodeNibble(c & 0xf);
366
367 // third byte
368 c = 0x80 | (decoded & 0x3f);
369 *output++ = '%';
370 *output++ = encodeNibble(c >> 4);
371 *output++ = encodeNibble(c & 0xf);
372 }
373}
374
375static int recode(QString &result, const ushort *begin, const ushort *end, QUrl::ComponentFormattingOptions encoding,
376 const uchar *actionTable, bool retryBadEncoding)
377{
378 const int origSize = result.size();
379 const ushort *input = begin;
380 ushort *output = nullptr;
381
382 EncodingAction action = EncodeCharacter;
383 for ( ; input != end; ++input) {
384 ushort c;
385 // try a run where no change is necessary
386 for ( ; input != end; ++input) {
387 c = *input;
388 if (c < 0x20U)
389 action = EncodeCharacter;
390 if (c < 0x20U || c >= 0x80U) // also: (c - 0x20 < 0x60U)
391 goto non_trivial;
392 action = EncodingAction(actionTable[c - ' ']);
393 if (action == EncodeCharacter)
394 goto non_trivial;
395 if (output)
396 *output++ = c;
397 }
398 break;
399
400non_trivial:
401 uint decoded;
402 if (c == '%' && retryBadEncoding) {
403 // always write "%25"
404 ensureDetached(result, output, begin, input, end);
405 *output++ = '%';
406 *output++ = '2';
407 *output++ = '5';
408 continue;
409 } else if (c == '%') {
410 // check if the input is valid
411 if (input + 2 >= end || (decoded = decodePercentEncoding(input)) == ushort(-1)) {
412 // not valid, retry
413 result.resize(origSize);
414 return recode(result, begin, end, encoding, actionTable, true);
415 }
416
417 if (decoded >= 0x80) {
418 // decode the UTF-8 sequence
419 if (!(encoding & QUrl::EncodeUnicode) &&
420 encodedUtf8ToUtf16(result, output, begin, input, end, decoded))
421 continue;
422
423 // decoding the encoded UTF-8 failed
424 action = LeaveCharacter;
425 } else if (decoded >= 0x20) {
426 action = EncodingAction(actionTable[decoded - ' ']);
427 }
428 } else {
429 decoded = c;
430 if (decoded >= 0x80 && encoding & QUrl::EncodeUnicode) {
431 // encode the UTF-8 sequence
432 unicodeToEncodedUtf8(result, output, begin, input, end, decoded);
433 continue;
434 } else if (decoded >= 0x80) {
435 if (output)
436 *output++ = c;
437 continue;
438 }
439 }
440
441 // there are six possibilities:
442 // current \ action | DecodeCharacter | LeaveCharacter | EncodeCharacter
443 // decoded | 1:leave | 2:leave | 3:encode
444 // encoded | 4:decode | 5:leave | 6:leave
445 // cases 1 and 2 were handled before this section
446
447 if (c == '%' && action != DecodeCharacter) {
448 // cases 5 and 6: it's encoded and we're leaving it as it is
449 // except we're pedantic and we'll uppercase the hex
450 if (output || !isUpperHex(input[1]) || !isUpperHex(input[2])) {
451 ensureDetached(result, output, begin, input, end);
452 *output++ = '%';
453 *output++ = toUpperHex(*++input);
454 *output++ = toUpperHex(*++input);
455 }
456 } else if (c == '%' && action == DecodeCharacter) {
457 // case 4: we need to decode
458 ensureDetached(result, output, begin, input, end);
459 *output++ = decoded;
460 input += 2;
461 } else {
462 // must be case 3: we need to encode
463 ensureDetached(result, output, begin, input, end);
464 *output++ = '%';
465 *output++ = encodeNibble(c >> 4);
466 *output++ = encodeNibble(c & 0xf);
467 }
468 }
469
470 if (output) {
471 int len = output - reinterpret_cast<const ushort *>(result.constData());
472 result.truncate(len);
473 return len - origSize;
474 }
475 return 0;
476}
477
478/*
479 * Returns true if the input it checked (if it checked anything) is not
480 * encoded. A return of false indicates there's a percent at \a input that
481 * needs to be decoded.
482 */
483#ifdef __SSE2__
484static bool simdCheckNonEncoded(QChar *&output, const char16_t *&input, const char16_t *end)
485{
486# ifdef __AVX2__
487 const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128('%'));
488 const __m128i percents = _mm256_castsi256_si128(percents256);
489# else
490 const __m128i percents = _mm_set1_epi16('%');
491# endif
492
493 uint idx = 0;
494 quint32 mask = 0;
495 if (input + 16 <= end) {
496 qptrdiff offset = 0;
497 for ( ; input + offset + 16 <= end; offset += 16) {
498# ifdef __AVX2__
499 // do 32 bytes at a time using AVX2
500 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset));
501 __m256i comparison = _mm256_cmpeq_epi16(data, percents256);
502 mask = _mm256_movemask_epi8(comparison);
503 _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
504# else
505 // do 32 bytes at a time using unrolled SSE2
506 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset));
507 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset + 8));
508 __m128i comparison1 = _mm_cmpeq_epi16(data1, percents);
509 __m128i comparison2 = _mm_cmpeq_epi16(data2, percents);
510 uint mask1 = _mm_movemask_epi8(comparison1);
511 uint mask2 = _mm_movemask_epi8(comparison2);
512
513 _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset), data1);
514 if (!mask1)
515 _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset + 8), data2);
516 mask = mask1 | (mask2 << 16);
517# endif
518
519 if (mask) {
520 idx = qCountTrailingZeroBits(mask) / 2;
521 break;
522 }
523 }
524
525 input += offset;
526 if (output)
527 output += offset;
528 } else if (input + 8 <= end) {
529 // do 16 bytes at a time
530 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input));
531 __m128i comparison = _mm_cmpeq_epi16(data, percents);
532 mask = _mm_movemask_epi8(comparison);
533 _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data);
534 idx = qCountTrailingZeroBits(quint16(mask)) / 2;
535 } else if (input + 4 <= end) {
536 // do 8 bytes only
537 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(input));
538 __m128i comparison = _mm_cmpeq_epi16(data, percents);
539 mask = _mm_movemask_epi8(comparison) & 0xffu;
540 _mm_storel_epi64(reinterpret_cast<__m128i *>(output), data);
541 idx = qCountTrailingZeroBits(quint8(mask)) / 2;
542 } else {
543 // no percents found (because we didn't check)
544 return true;
545 }
546
547 // advance to the next non-encoded
548 input += idx;
549 output += idx;
550
551 return !mask;
552}
553#else
554static bool simdCheckNonEncoded(...)
555{
556 return true;
557}
558#endif
559
560/*!
561 \since 5.0
562 \internal
563
564 This function decodes a percent-encoded string located in \a in
565 by appending each character to \a appendTo. It returns the number of
566 characters appended. Each percent-encoded sequence is decoded as follows:
567
568 \list
569 \li from %00 to %7F: the exact decoded value is appended;
570 \li from %80 to %FF: QChar::ReplacementCharacter is appended;
571 \li bad encoding: original input is copied to the output, undecoded.
572 \endlist
573
574 Given the above, it's important for the input to already have all UTF-8
575 percent sequences decoded by qt_urlRecode (that is, the input should not
576 have been processed with QUrl::EncodeUnicode).
577
578 The input should also be a valid percent-encoded sequence (the output of
579 qt_urlRecode is always valid).
580*/
581static qsizetype decode(QString &appendTo, QStringView in)
582{
583 const char16_t *begin = in.utf16();
584 const char16_t *end = begin + in.size();
585
586 // fast check whether there's anything to be decoded in the first place
587 const char16_t *input = QtPrivate::qustrchr(in, '%');
588
589 if (Q_LIKELY(input == end))
590 return 0; // nothing to do, it was already decoded!
591
592 // detach
593 const int origSize = appendTo.size();
594 appendTo.resize(origSize + (end - begin));
595 QChar *output = appendTo.data() + origSize;
596 memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(QChar));
597 output += input - begin;
598
599 while (input != end) {
600 // something was encoded
601 Q_ASSERT(*input == '%');
602
603 if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) {
604 // badly-encoded data
605 appendTo.resize(origSize + (end - begin));
606 memcpy(static_cast<void *>(appendTo.begin() + origSize), static_cast<const void *>(begin), (end - begin) * sizeof(ushort));
607 return end - begin;
608 }
609
610 ++input;
611 *output++ = QChar::fromUcs2(decodeNibble(input[0]) << 4 | decodeNibble(input[1]));
612 if (output[-1].unicode() >= 0x80)
613 output[-1] = QChar::ReplacementCharacter;
614 input += 2;
615
616 // search for the next percent, copying from input to output
617 if (simdCheckNonEncoded(output, input, end)) {
618 while (input != end) {
619 const char16_t uc = *input;
620 if (uc == '%')
621 break;
622 *output++ = uc;
623 ++input;
624 }
625 }
626 }
627
628 const qsizetype len = output - appendTo.begin();
629 appendTo.truncate(len);
630 return len - origSize;
631}
632
633template <size_t N>
634static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
635{
636 for (size_t i = 0; i < N; ++i)
637 table[i] &= mask[i];
638}
639
640/*!
641 \internal
642
643 Recodes the string from \a begin to \a end. If any transformations are
644 done, append them to \a appendTo and return the number of characters added.
645 If no transformations were required, return 0.
646
647 The \a encoding option modifies the default behaviour:
648 \list
649 \li QUrl::DecodeReserved: if set, reserved characters will be decoded;
650 if unset, reserved characters will be encoded
651 \li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
652 \li QUrl::EncodeUnicode: if set, characters above U+0080 will be encoded to their UTF-8
653 percent-encoded form; if unset, they will be decoded to UTF-16
654 \li QUrl::FullyDecoded: if set, this function will decode all percent-encoded sequences,
655 including that of the percent character. The resulting string
656 will not be percent-encoded anymore. Use with caution!
657 In this mode, the behaviour is undefined if the input string
658 contains any percent-encoding sequences above %80.
659 Also, the function will not correct bad % sequences.
660 \endlist
661
662 Other flags are ignored (including QUrl::EncodeReserved).
663
664 The \a tableModifications argument can be used to supply extra
665 modifications to the tables, to be applied after the flags above are
666 handled. It consists of a sequence of 16-bit values, where the low 8 bits
667 indicate the character in question and the high 8 bits are either \c
668 EncodeCharacter, \c LeaveCharacter or \c DecodeCharacter.
669
670 This function corrects percent-encoded errors by interpreting every '%' as
671 meaning "%25" (all percents in the same content).
672 */
673
674Q_AUTOTEST_EXPORT qsizetype
675qt_urlRecode(QString &appendTo, QStringView in,
676 QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
677{
678 uchar actionTable[sizeof defaultActionTable];
679 if ((encoding & QUrl::FullyDecoded) == QUrl::FullyDecoded) {
680 return int(decode(appendTo, in));
681 }
682
683 memcpy(actionTable, defaultActionTable, sizeof actionTable);
684 if (encoding & QUrl::DecodeReserved)
685 maskTable(actionTable, reservedMask);
686 if (!(encoding & QUrl::EncodeSpaces))
687 actionTable[0] = DecodeCharacter; // decode
688
689 if (tableModifications) {
690 for (const ushort *p = tableModifications; *p; ++p)
691 actionTable[uchar(*p) - ' '] = *p >> 8;
692 }
693
694 return recode(appendTo, reinterpret_cast<const ushort *>(in.begin()), reinterpret_cast<const ushort *>(in.end()),
695 encoding, actionTable, false);
696}
697
698QT_END_NAMESPACE
699