1/****************************************************************************
2**
3** Copyright (C) 2020 The Qt Company Ltd.
4** Copyright (C) 2020 Intel Corporation.
5** Copyright (C) 2019 Mail.ru Group.
6** Contact: https://www.qt.io/licensing/
7**
8** This file is part of the QtCore module of the Qt Toolkit.
9**
10** $QT_BEGIN_LICENSE:LGPL$
11** Commercial License Usage
12** Licensees holding valid commercial Qt licenses may use this file in
13** accordance with the commercial license agreement provided with the
14** Software or, alternatively, in accordance with the terms contained in
15** a written agreement between you and The Qt Company. For licensing terms
16** and conditions see https://www.qt.io/terms-conditions. For further
17** information use the contact form at https://www.qt.io/contact-us.
18**
19** GNU Lesser General Public License Usage
20** Alternatively, this file may be used under the terms of the GNU Lesser
21** General Public License version 3 as published by the Free Software
22** Foundation and appearing in the file LICENSE.LGPL3 included in the
23** packaging of this file. Please review the following information to
24** ensure the GNU Lesser General Public License version 3 requirements
25** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
26**
27** GNU General Public License Usage
28** Alternatively, this file may be used under the terms of the GNU
29** General Public License version 2.0 or (at your option) the GNU General
30** Public license version 3 or any later version approved by the KDE Free
31** Qt Foundation. The licenses are as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
33** included in the packaging of this file. Please review the following
34** information to ensure the GNU General Public License requirements will
35** be met: https://www.gnu.org/licenses/gpl-2.0.html and
36** https://www.gnu.org/licenses/gpl-3.0.html.
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qstringlist.h"
43#if QT_CONFIG(regularexpression)
44#include "qregularexpression.h"
45#endif
46#include "qunicodetables_p.h"
47#include <private/qstringconverter_p.h>
48#include "qlocale_tools_p.h"
49#include "private/qsimd_p.h"
50#include <qnumeric.h>
51#include <qdatastream.h>
52#include <qlist.h>
53#include "qlocale.h"
54#include "qlocale_p.h"
55#include "qstringbuilder.h"
56#include "qstringmatcher.h"
57#include "qvarlengtharray.h"
58#include "qdebug.h"
59#include "qendian.h"
60#include "qcollator.h"
61
62#ifdef Q_OS_MAC
63#include <private/qcore_mac_p.h>
64#endif
65
66#include <private/qfunctions_p.h>
67
68#include <limits.h>
69#include <string.h>
70#include <stdlib.h>
71#include <stdio.h>
72#include <stdarg.h>
73#include <wchar.h>
74
75#include "qchar.cpp"
76#include "qstringmatcher.cpp"
77#include "qstringiterator_p.h"
78#include "qstringalgorithms_p.h"
79#include "qthreadstorage.h"
80
81#ifdef Q_OS_WIN
82# include <qt_windows.h>
83#endif
84
85#ifdef truncate
86# undef truncate
87#endif
88
89#ifndef LLONG_MAX
90#define LLONG_MAX qint64_C(9223372036854775807)
91#endif
92#ifndef LLONG_MIN
93#define LLONG_MIN (-LLONG_MAX - qint64_C(1))
94#endif
95#ifndef ULLONG_MAX
96#define ULLONG_MAX quint64_C(18446744073709551615)
97#endif
98
99#define IS_RAW_DATA(d) ((d.d)->flags & QArrayData::RawDataType)
100
101QT_BEGIN_NAMESPACE
102
103template <typename T, typename Cmp = std::less<>>
104static constexpr bool points_into_range(const T *p, const T *b, const T *e, Cmp less = {}) noexcept
105{
106 return !less(p, b) && less(p, e);
107}
108
109const char16_t QString::_empty = 0;
110
111/*
112 * Note on the use of SIMD in qstring.cpp:
113 *
114 * Several operations with strings are improved with the use of SIMD code,
115 * since they are repetitive. For MIPS, we have hand-written assembly code
116 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
117 * x86, we can only use intrinsics and therefore everything is contained in
118 * qstring.cpp. We need to use intrinsics only for those platforms due to the
119 * different compilers and toolchains used, which have different syntax for
120 * assembly sources.
121 *
122 * ** SSE notes: **
123 *
124 * Whenever multiple alternatives are equivalent or near so, we prefer the one
125 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
126 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
127 * SSE versions should be done when there is a clear performance benefit and
128 * requires fallback code to SSE2, if it exists.
129 *
130 * Performance measurement in the past shows that most strings are short in
131 * size and, therefore, do not benefit from alignment prologues. That is,
132 * trying to find a 16-byte-aligned boundary to operate on is often more
133 * expensive than executing the unaligned operation directly. In addition, note
134 * that the QString private data is designed so that the data is stored on
135 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
136 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
137 * 50% of the time), so skipping the alignment prologue is actually optimizing
138 * for the common case.
139 */
140
141#if defined(__mips_dsp)
142// From qstring_mips_dsp_asm.S
143extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
144extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
145extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
146#endif
147
148// internal
149qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
150static inline qsizetype qFindChar(QStringView str, QChar ch, qsizetype from, Qt::CaseSensitivity cs) noexcept;
151template <typename Haystack>
152static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle, qsizetype from, Qt::CaseSensitivity cs) noexcept;
153template <>
154inline qsizetype qLastIndexOf(QString haystack, QChar needle,
155 qsizetype from, Qt::CaseSensitivity cs) noexcept = delete; // unwanted, would detach
156
157static inline bool qt_starts_with(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs);
158static inline bool qt_starts_with(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs);
159static inline bool qt_starts_with(QStringView haystack, QChar needle, Qt::CaseSensitivity cs);
160static inline bool qt_ends_with(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs);
161static inline bool qt_ends_with(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs);
162static inline bool qt_ends_with(QStringView haystack, QChar needle, Qt::CaseSensitivity cs);
163
164#if defined(__SSE2__) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
165# if defined(__SANITIZE_ADDRESS__) && Q_CC_GNU < 800 && !defined(Q_CC_CLANG)
166# warning "The __attribute__ on below will likely cause a build failure with your GCC version. Your choices are:"
167# warning "1) disable ASan;"
168# warning "2) disable the optimized code in qustrlen (change __SSE2__ to anything else);"
169# warning "3) upgrade your compiler (preferred)."
170# endif
171
172// We may overrun the buffer, but that's a false positive:
173// this won't crash nor produce incorrect results
174__attribute__((__no_sanitize_address__))
175#endif
176qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
177{
178 qsizetype result = 0;
179
180#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || QT_HAS_FEATURE(address_sanitizer))
181 // find the 16-byte alignment immediately prior or equal to str
182 quintptr misalignment = quintptr(str) & 0xf;
183 Q_ASSERT((misalignment & 1) == 0);
184 const char16_t *ptr = str - (misalignment / 2);
185
186 // load 16 bytes and see if we have a null
187 // (aligned loads can never segfault)
188 const __m128i zeroes = _mm_setzero_si128();
189 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
190 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
191 quint32 mask = _mm_movemask_epi8(comparison);
192
193 // ignore the result prior to the beginning of str
194 mask >>= misalignment;
195
196 // Have we found something in the first block? Need to handle it now
197 // because of the left shift above.
198 if (mask)
199 return qCountTrailingZeroBits(quint32(mask)) / 2;
200
201 do {
202 ptr += 8;
203 data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
204
205 comparison = _mm_cmpeq_epi16(data, zeroes);
206 mask = _mm_movemask_epi8(comparison);
207 } while (mask == 0);
208
209 // found a null
210 uint idx = qCountTrailingZeroBits(quint32(mask));
211 return ptr - str + idx / 2;
212#endif
213
214 if (sizeof(wchar_t) == sizeof(char16_t))
215 return wcslen(reinterpret_cast<const wchar_t *>(str));
216
217 while (*str++)
218 ++result;
219 return result;
220}
221
222#if !defined(__OPTIMIZE_SIZE__)
223namespace {
224template <uint MaxCount> struct UnrollTailLoop
225{
226 template <typename RetType, typename Functor1, typename Functor2, typename Number>
227 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
228 {
229 /* equivalent to:
230 * while (count--) {
231 * if (loopCheck(i))
232 * return returnIfFailed(i);
233 * }
234 * return returnIfExited;
235 */
236
237 if (!count)
238 return returnIfExited;
239
240 bool check = loopCheck(i);
241 if (check)
242 return returnIfFailed(i);
243
244 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
245 }
246
247 template <typename Functor, typename Number>
248 static inline void exec(Number count, Functor code)
249 {
250 /* equivalent to:
251 * for (Number i = 0; i < count; ++i)
252 * code(i);
253 */
254 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
255 }
256};
257template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
258inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
259{
260 return returnIfExited;
261}
262}
263#endif
264
265/*!
266 * \internal
267 *
268 * Searches for character \a c in the string \a str and returns a pointer to
269 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
270 * character is not found, this function returns a pointer to the end of the
271 * string -- that is, \c{str.end()}.
272 */
273const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
274{
275 const char16_t *n = str.utf16();
276 const char16_t *e = n + str.size();
277
278#ifdef __SSE2__
279 bool loops = true;
280 // Using the PMOVMSKB instruction, we get two bits for each character
281 // we compare.
282# if defined(__AVX2__) && !defined(__OPTIMIZE_SIZE__)
283 // we're going to read n[0..15] (32 bytes)
284 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
285 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
286 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
287 __m256i result = _mm256_cmpeq_epi16(data, mch256);
288 uint mask = uint(_mm256_movemask_epi8(result));
289 if (mask) {
290 uint idx = qCountTrailingZeroBits(mask);
291 return n + idx / 2;
292 }
293 }
294 loops = false;
295 __m128i mch = _mm256_castsi256_si128(mch256);
296# else
297 __m128i mch = _mm_set1_epi32(c | (c << 16));
298# endif
299
300 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
301 __m128i result = _mm_cmpeq_epi16(data, mch);
302 uint mask = uint(_mm_movemask_epi8(result));
303 if ((mask & validityMask) == 0)
304 return false;
305 uint idx = qCountTrailingZeroBits(mask);
306 n += idx / 2;
307 return true;
308 };
309
310 // we're going to read n[0..7] (16 bytes)
311 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
312 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
313 if (hasMatch(data, 0xffff))
314 return n;
315
316 if (!loops) {
317 n += 8;
318 break;
319 }
320 }
321
322# if !defined(__OPTIMIZE_SIZE__)
323 // we're going to read n[0..3] (8 bytes)
324 if (e - n > 3) {
325 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
326 if (hasMatch(data, 0xff))
327 return n;
328
329 n += 4;
330 }
331
332 return UnrollTailLoop<3>::exec(e - n, e,
333 [=](int i) { return n[i] == c; },
334 [=](int i) { return n + i; });
335# endif
336#elif defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
337 const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
338 const uint16x8_t ch_vec = vdupq_n_u16(c);
339 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
340 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
341 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
342 if (ushort(mask)) {
343 // found a match
344 return n + qCountTrailingZeroBits(mask);
345 }
346 }
347#endif // aarch64
348
349 --n;
350 while (++n != e)
351 if (*n == c)
352 return n;
353
354 return n;
355}
356
357#ifdef __SSE2__
358// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
359// the no non-zero was found. Returns false and updates \a ptr to point to the
360// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
361// may be updated to one byte short).
362static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
363{
364 auto updatePtr = [&](uint result) {
365 // found a character matching the mask
366 uint idx = qCountTrailingZeroBits(~result);
367 ptr += idx;
368 return false;
369 };
370
371# if defined(__SSE4_1__)
372 __m128i mask;
373 auto updatePtrSimd = [&](__m128i data) {
374 __m128i masked = _mm_and_si128(mask, data);
375 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
376 uint result = _mm_movemask_epi8(comparison);
377 return updatePtr(result);
378 };
379
380# if defined(__AVX2__)
381 // AVX2 implementation: test 32 bytes at a time
382 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
383 while (ptr + 32 <= end) {
384 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
385 if (!_mm256_testz_si256(mask256, data)) {
386 // found a character matching the mask
387 __m256i masked256 = _mm256_and_si256(mask256, data);
388 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
389 return updatePtr(_mm256_movemask_epi8(comparison256));
390 }
391 ptr += 32;
392 }
393
394 mask = _mm256_castsi256_si128(mask256);
395# else
396 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
397 // comparisons, unrolled)
398 mask = _mm_set1_epi32(maskval);
399 while (ptr + 32 <= end) {
400 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
401 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
402 if (!_mm_testz_si128(mask, data1))
403 return updatePtrSimd(data1);
404
405 ptr += 16;
406 if (!_mm_testz_si128(mask, data2))
407 return updatePtrSimd(data2);
408 ptr += 16;
409 }
410# endif
411
412 // AVX2 and SSE4.1: final 16-byte comparison
413 if (ptr + 16 <= end) {
414 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
415 if (!_mm_testz_si128(mask, data1))
416 return updatePtrSimd(data1);
417 ptr += 16;
418 }
419
420 // and final 8-byte comparison
421 if (ptr + 8 <= end) {
422 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
423 if (!_mm_testz_si128(mask, data1))
424 return updatePtrSimd(data1);
425 ptr += 8;
426 }
427
428# else
429 // SSE2 implementation: test 16 bytes at a time.
430 const __m128i mask = _mm_set1_epi32(maskval);
431 while (ptr + 16 <= end) {
432 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
433 __m128i masked = _mm_and_si128(mask, data);
434 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
435 quint16 result = _mm_movemask_epi8(comparison);
436 if (result != 0xffff)
437 return updatePtr(result);
438 ptr += 16;
439 }
440
441 // and one 8-byte comparison
442 if (ptr + 8 <= end) {
443 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
444 __m128i masked = _mm_and_si128(mask, data);
445 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
446 quint8 result = _mm_movemask_epi8(comparison);
447 if (result != 0xff)
448 return updatePtr(result);
449 ptr += 8;
450 }
451# endif
452
453 return true;
454}
455
456static Q_ALWAYS_INLINE __m128i mm_load8_zero_extend(const void *ptr)
457{
458 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
459#if defined(__SSE4_1__)
460 // use a MOVQ followed by PMOVZXBW
461 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
462 __m128i data = _mm_loadl_epi64(dataptr);
463 return _mm_cvtepu8_epi16(data);
464# else
465 // use MOVQ followed by PUNPCKLBW
466 __m128i data = _mm_loadl_epi64(dataptr);
467 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
468# endif
469}
470#endif
471
472// Note: ptr on output may be off by one and point to a preceding US-ASCII
473// character. Usually harmless.
474bool qt_is_ascii(const char *&ptr, const char *end) noexcept
475{
476#if defined(__SSE2__)
477 // Testing for the high bit can be done efficiently with just PMOVMSKB
478# if defined(__AVX2__)
479 while (ptr + 32 <= end) {
480 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
481 quint32 mask = _mm256_movemask_epi8(data);
482 if (mask) {
483 uint idx = qCountTrailingZeroBits(mask);
484 ptr += idx;
485 return false;
486 }
487 ptr += 32;
488 }
489# endif
490 while (ptr + 16 <= end) {
491 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
492 quint32 mask = _mm_movemask_epi8(data);
493 if (mask) {
494 uint idx = qCountTrailingZeroBits(mask);
495 ptr += idx;
496 return false;
497 }
498 ptr += 16;
499 }
500 if (ptr + 8 <= end) {
501 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
502 quint8 mask = _mm_movemask_epi8(data);
503 if (mask) {
504 uint idx = qCountTrailingZeroBits(mask);
505 ptr += idx;
506 return false;
507 }
508 ptr += 8;
509 }
510#endif
511
512 while (ptr + 4 <= end) {
513 quint32 data = qFromUnaligned<quint32>(ptr);
514 if (data &= 0x80808080U) {
515#if Q_BYTE_ORDER == Q_BIG_ENDIAN
516 uint idx = qCountLeadingZeroBits(data);
517#else
518 uint idx = qCountTrailingZeroBits(data);
519#endif
520 ptr += idx / 8;
521 return false;
522 }
523 ptr += 4;
524 }
525
526 while (ptr != end) {
527 if (quint8(*ptr) & 0x80)
528 return false;
529 ++ptr;
530 }
531 return true;
532}
533
534bool QtPrivate::isAscii(QLatin1String s) noexcept
535{
536 const char *ptr = s.begin();
537 const char *end = s.end();
538
539 return qt_is_ascii(ptr, end);
540}
541
542static bool isAscii(const QChar *&ptr, const QChar *end)
543{
544#ifdef __SSE2__
545 const char *ptr8 = reinterpret_cast<const char *>(ptr);
546 const char *end8 = reinterpret_cast<const char *>(end);
547 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
548 ptr = reinterpret_cast<const QChar *>(ptr8);
549 if (!ok)
550 return false;
551#endif
552
553 while (ptr != end) {
554 if (ptr->unicode() & 0xff80)
555 return false;
556 ++ptr;
557 }
558 return true;
559}
560
561bool QtPrivate::isAscii(QStringView s) noexcept
562{
563 const QChar *ptr = s.begin();
564 const QChar *end = s.end();
565
566 return isAscii(ptr, end);
567}
568
569bool QtPrivate::isLatin1(QStringView s) noexcept
570{
571 const QChar *ptr = s.begin();
572 const QChar *end = s.end();
573
574#ifdef __SSE2__
575 const char *ptr8 = reinterpret_cast<const char *>(ptr);
576 const char *end8 = reinterpret_cast<const char *>(end);
577 if (!simdTestMask(ptr8, end8, 0xff00ff00))
578 return false;
579 ptr = reinterpret_cast<const QChar *>(ptr8);
580#endif
581
582 while (ptr != end) {
583 if ((*ptr++).unicode() > 0xff)
584 return false;
585 }
586 return true;
587}
588
589bool QtPrivate::isValidUtf16(QStringView s) noexcept
590{
591 constexpr char32_t InvalidCodePoint = UINT_MAX;
592
593 QStringIterator i(s);
594 while (i.hasNext()) {
595 const char32_t c = i.next(InvalidCodePoint);
596 if (c == InvalidCodePoint)
597 return false;
598 }
599
600 return true;
601}
602
603// conversion between Latin 1 and UTF-16
604Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
605{
606 /* SIMD:
607 * Unpacking with SSE has been shown to improve performance on recent CPUs
608 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
609 * itself in exactly the same way as one would do it with intrinsics.
610 */
611#if defined(__SSE2__)
612 const char *e = str + size;
613 qptrdiff offset = 0;
614
615 // we're going to read str[offset..offset+15] (16 bytes)
616 for ( ; str + offset + 15 < e; offset += 16) {
617 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
618#ifdef __AVX2__
619 // zero extend to an YMM register
620 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
621
622 // store
623 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
624#else
625 const __m128i nullMask = _mm_set1_epi32(0);
626
627 // unpack the first 8 bytes, padding with zeros
628 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
629 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
630
631 // unpack the last 8 bytes, padding with zeros
632 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
633 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
634#endif
635 }
636
637 // we're going to read str[offset..offset+7] (8 bytes)
638 if (str + offset + 7 < e) {
639 const __m128i unpacked = mm_load8_zero_extend(str + offset);
640 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), unpacked);
641 offset += 8;
642 }
643
644 size = size % 8;
645 dst += offset;
646 str += offset;
647# if !defined(__OPTIMIZE_SIZE__)
648 return UnrollTailLoop<7>::exec(int(size), [=](int i) { dst[i] = (uchar)str[i]; });
649# endif
650#endif
651#if defined(__mips_dsp)
652 if (size > 20)
653 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
654 else
655 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
656#else
657 while (size--)
658 *dst++ = (uchar)*str++;
659#endif
660}
661
662template <bool Checked>
663static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
664{
665#if defined(__SSE2__)
666 uchar *e = dst + length;
667 qptrdiff offset = 0;
668
669# ifdef __AVX2__
670 const __m256i questionMark256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
671 const __m256i outOfRange256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
672 const __m128i questionMark = _mm256_castsi256_si128(questionMark256);
673 const __m128i outOfRange = _mm256_castsi256_si128(outOfRange256);
674# else
675 const __m128i questionMark = _mm_set1_epi16('?');
676 const __m128i outOfRange = _mm_set1_epi16(0x100);
677# endif
678
679 auto mergeQuestionMarks = [=](__m128i chunk) {
680 // SSE has no compare instruction for unsigned comparison.
681# ifdef __SSE4_1__
682 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
683 chunk = _mm_min_epu16(chunk, outOfRange);
684 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
685 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
686# else
687 // The variables must be shiffted + 0x8000 to be compared
688 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
689 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
690
691 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
692 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
693
694 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
695 // the 16 bits that were correct contains zeros
696 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
697
698 // correctBytes contains the bytes that were in limit
699 // the 16 bits that were off limits contains zeros
700 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
701
702 // merge offLimitQuestionMark and correctBytes to have the result
703 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
704
705 Q_UNUSED(outOfRange);
706# endif
707 return chunk;
708 };
709
710 // we're going to write to dst[offset..offset+15] (16 bytes)
711 for ( ; dst + offset + 15 < e; offset += 16) {
712# if defined(__AVX2__)
713 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
714 if (Checked) {
715 // See mergeQuestionMarks lambda above for details
716 chunk = _mm256_min_epu16(chunk, outOfRange256);
717 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
718 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
719 }
720
721 const __m128i chunk2 = _mm256_extracti128_si256(chunk, 1);
722 const __m128i chunk1 = _mm256_castsi256_si128(chunk);
723# else
724 __m128i chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
725 if (Checked)
726 chunk1 = mergeQuestionMarks(chunk1);
727
728 __m128i chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
729 if (Checked)
730 chunk2 = mergeQuestionMarks(chunk2);
731# endif
732
733 // pack the two vector to 16 x 8bits elements
734 const __m128i result = _mm_packus_epi16(chunk1, chunk2);
735 _mm_storeu_si128((__m128i*)(dst + offset), result); // store
736 }
737
738# if !defined(__OPTIMIZE_SIZE__)
739 // we're going to write to dst[offset..offset+7] (8 bytes)
740 if (dst + offset + 7 < e) {
741 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + offset));
742 if (Checked)
743 chunk = mergeQuestionMarks(chunk);
744
745 // pack, where the upper half is ignored
746 const __m128i result = _mm_packus_epi16(chunk, chunk);
747 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + offset), result);
748 offset += 8;
749 }
750
751 // we're going to write to dst[offset..offset+3] (4 bytes)
752 if (dst + offset + 3 < e) {
753 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + offset));
754 if (Checked)
755 chunk = mergeQuestionMarks(chunk);
756
757 // pack, we'll the upper three quarters
758 const __m128i result = _mm_packus_epi16(chunk, chunk);
759 qToUnaligned(_mm_cvtsi128_si32(result), dst + offset);
760 offset += 4;
761 }
762
763 length = length % 4;
764# else
765 length = length % 16;
766# endif // optimize size
767
768 // advance dst, src for tail processing
769 dst += offset;
770 src += offset;
771
772# if !defined(__OPTIMIZE_SIZE__)
773 return UnrollTailLoop<3>::exec(length, [=](int i) {
774 if (Checked)
775 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
776 else
777 dst[i] = src[i];
778 });
779# endif
780#elif defined(__ARM_NEON__)
781 // Refer to the documentation of the SSE2 implementation
782 // this use eactly the same method as for SSE except:
783 // 1) neon has unsigned comparison
784 // 2) packing is done to 64 bits (8 x 8bits component).
785 if (length >= 16) {
786 const int chunkCount = length >> 3; // divided by 8
787 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
788 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
789 for (int i = 0; i < chunkCount; ++i) {
790 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
791 src += 8;
792
793 if (Checked) {
794 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
795 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
796 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
797 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
798 }
799 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
800 vst1_u8(dst, result); // store
801 dst += 8;
802 }
803 length = length % 8;
804 }
805#endif
806#if defined(__mips_dsp)
807 qt_toLatin1_mips_dsp_asm(dst, src, length);
808#else
809 while (length--) {
810 if (Checked)
811 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
812 else
813 *dst++ = *src;
814 ++src;
815 }
816#endif
817}
818
819static void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
820{
821 qt_to_latin1_internal<true>(dst, src, length);
822}
823
824void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
825{
826 qt_to_latin1_internal<false>(dst, src, length);
827}
828
829// Unicode case-insensitive comparison
830static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QChar *be)
831{
832 if (a == b)
833 return (ae - be);
834
835 const QChar *e = ae;
836 if (be - b < ae - a)
837 e = a + (be - b);
838
839 char32_t alast = 0;
840 char32_t blast = 0;
841 while (a < e) {
842// qDebug() << Qt::hex << alast << blast;
843// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
844// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
845 int diff = foldCase(a->unicode(), alast) - foldCase(b->unicode(), blast);
846 if ((diff))
847 return diff;
848 ++a;
849 ++b;
850 }
851 if (a == ae) {
852 if (b == be)
853 return 0;
854 return -1;
855 }
856 return 1;
857}
858
859// Case-insensitive comparison between a Unicode string and a QLatin1String
860static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char *be)
861{
862 auto e = ae;
863 if (be - b < ae - a)
864 e = a + (be - b);
865
866 while (a < e) {
867 int diff = foldCase(a->unicode()) - foldCase(char16_t{uchar(*b)});
868 if ((diff))
869 return diff;
870 ++a;
871 ++b;
872 }
873 if (a == ae) {
874 if (b == be)
875 return 0;
876 return -1;
877 }
878 return 1;
879}
880
881// Case-insensitive comparison between a Unicode string and a UTF-8 string
882static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
883{
884 auto src1 = reinterpret_cast<const uchar *>(utf8);
885 auto end1 = reinterpret_cast<const uchar *>(utf8end);
886 QStringIterator src2(utf16, utf16end);
887
888 while (src1 < end1 && src2.hasNext()) {
889 uint uc1 = 0;
890 uint *output = &uc1;
891 uchar b = *src1++;
892 int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
893 if (res < 0) {
894 // decoding error
895 uc1 = QChar::ReplacementCharacter;
896 } else {
897 uc1 = QChar::toCaseFolded(uc1);
898 }
899
900 uint uc2 = QChar::toCaseFolded(src2.next());
901 int diff = uc1 - uc2; // can't underflow
902 if (diff)
903 return diff;
904 }
905
906 // the shorter string sorts first
907 return (end1 > src1) - int(src2.hasNext());
908}
909
910#if defined(__mips_dsp)
911// From qstring_mips_dsp_asm.S
912extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
913 const char16_t *b,
914 unsigned len);
915#endif
916
917// Unicode case-sensitive compare two same-sized strings
918static int ucstrncmp(const QChar *a, const QChar *b, size_t l)
919{
920#ifdef __OPTIMIZE_SIZE__
921 const QChar *end = a + l;
922 while (a < end) {
923 if (int diff = (int)a->unicode() - (int)b->unicode())
924 return diff;
925 ++a;
926 ++b;
927 }
928 return 0;
929#else
930#if defined(__mips_dsp)
931 static_assert(sizeof(uint) == sizeof(size_t));
932 if (l >= 8) {
933 return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const char16_t*>(a),
934 reinterpret_cast<const char16_t*>(b),
935 l);
936 }
937#endif // __mips_dsp
938#ifdef __SSE2__
939 const QChar *end = a + l;
940 qptrdiff offset = 0;
941
942 // Using the PMOVMSKB instruction, we get two bits for each character
943 // we compare.
944 int retval;
945 auto isDifferent = [a, b, &offset, &retval](__m128i a_data, __m128i b_data) {
946 __m128i result = _mm_cmpeq_epi16(a_data, b_data);
947 uint mask = ~uint(_mm_movemask_epi8(result));
948 if (ushort(mask) == 0)
949 return false;
950 uint idx = qCountTrailingZeroBits(mask);
951 retval = a[offset + idx / 2].unicode() - b[offset + idx / 2].unicode();
952 return true;
953 };
954
955 // we're going to read a[0..15] and b[0..15] (32 bytes)
956 for ( ; end - a >= offset + 16; offset += 16) {
957#ifdef __AVX2__
958 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
959 __m256i b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
960 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
961 uint mask = _mm256_movemask_epi8(result);
962#else
963 __m128i a_data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset));
964 __m128i a_data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset + 8));
965 __m128i b_data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
966 __m128i b_data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset + 8));
967 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
968 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
969 uint mask = _mm_movemask_epi8(result1) | (_mm_movemask_epi8(result2) << 16);
970#endif
971 mask = ~mask;
972 if (mask) {
973 // found a different character
974 uint idx = qCountTrailingZeroBits(mask);
975 return a[offset + idx / 2].unicode() - b[offset + idx / 2].unicode();
976 }
977 }
978
979 // we're going to read a[0..7] and b[0..7] (16 bytes)
980 if (end - a >= offset + 8) {
981 __m128i a_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset));
982 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
983 if (isDifferent(a_data, b_data))
984 return retval;
985
986 offset += 8;
987 }
988
989 // we're going to read a[0..3] and b[0..3] (8 bytes)
990 if (end - a >= offset + 4) {
991 __m128i a_data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(a + offset));
992 __m128i b_data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(b + offset));
993 if (isDifferent(a_data, b_data))
994 return retval;
995
996 offset += 4;
997 }
998
999 // reset l
1000 l &= 3;
1001
1002 const auto lambda = [=](size_t i) -> int {
1003 return a[offset + i].unicode() - b[offset + i].unicode();
1004 };
1005 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
1006#endif
1007#if defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
1008 if (l >= 8) {
1009 const QChar *end = a + l;
1010 const uint16x8_t mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
1011 while (end - a > 7) {
1012 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1013 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1014
1015 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1016 if (r) {
1017 // found a different QChar
1018 uint idx = qCountTrailingZeroBits(r);
1019 return (int)a[idx].unicode() - (int)b[idx].unicode();
1020 }
1021 a += 8;
1022 b += 8;
1023 }
1024 l &= 7;
1025 }
1026 const auto lambda = [=](size_t i) -> int {
1027 return a[i].unicode() - b[i].unicode();
1028 };
1029 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1030#endif // __ARM_NEON__
1031 if (!l)
1032 return 0;
1033
1034 // check alignment
1035 if ((reinterpret_cast<quintptr>(a) & 2) == (reinterpret_cast<quintptr>(b) & 2)) {
1036 // both addresses have the same alignment
1037 if (reinterpret_cast<quintptr>(a) & 2) {
1038 // both addresses are not aligned to 4-bytes boundaries
1039 // compare the first character
1040 if (*a != *b)
1041 return a->unicode() - b->unicode();
1042 --l;
1043 ++a;
1044 ++b;
1045
1046 // now both addresses are 4-bytes aligned
1047 }
1048
1049 // both addresses are 4-bytes aligned
1050 // do a fast 32-bit comparison
1051 const quint32 *da = reinterpret_cast<const quint32 *>(a);
1052 const quint32 *db = reinterpret_cast<const quint32 *>(b);
1053 const quint32 *e = da + (l >> 1);
1054 for ( ; da != e; ++da, ++db) {
1055 if (*da != *db) {
1056 a = reinterpret_cast<const QChar *>(da);
1057 b = reinterpret_cast<const QChar *>(db);
1058 if (*a != *b)
1059 return a->unicode() - b->unicode();
1060 return a[1].unicode() - b[1].unicode();
1061 }
1062 }
1063
1064 // do we have a tail?
1065 a = reinterpret_cast<const QChar *>(da);
1066 b = reinterpret_cast<const QChar *>(db);
1067 return (l & 1) ? a->unicode() - b->unicode() : 0;
1068 } else {
1069 // one of the addresses isn't 4-byte aligned but the other is
1070 const QChar *e = a + l;
1071 for ( ; a != e; ++a, ++b) {
1072 if (*a != *b)
1073 return a->unicode() - b->unicode();
1074 }
1075 }
1076 return 0;
1077#endif
1078}
1079
1080static int ucstrncmp(const QChar *a, const uchar *c, size_t l)
1081{
1082 const char16_t *uc = reinterpret_cast<const char16_t *>(a);
1083 const char16_t *e = uc + l;
1084
1085#ifdef __SSE2__
1086 __m128i nullmask = _mm_setzero_si128();
1087 qptrdiff offset = 0;
1088
1089# if !defined(__OPTIMIZE_SIZE__)
1090 // Using the PMOVMSKB instruction, we get two bits for each character
1091 // we compare.
1092 int retval;
1093 auto isDifferent = [uc, c, &offset, &retval](__m128i a_data, __m128i b_data) {
1094 __m128i result = _mm_cmpeq_epi16(a_data, b_data);
1095 uint mask = ~uint(_mm_movemask_epi8(result));
1096 if (ushort(mask) == 0)
1097 return false;
1098 uint idx = qCountTrailingZeroBits(mask);
1099 retval = uc[offset + idx / 2] - c[offset + idx / 2];
1100 return true;
1101 };
1102# endif
1103
1104 // we're going to read uc[offset..offset+15] (32 bytes)
1105 // and c[offset..offset+15] (16 bytes)
1106 for ( ; uc + offset + 15 < e; offset += 16) {
1107 // similar to fromLatin1_helper:
1108 // load 16 bytes of Latin 1 data
1109 __m128i chunk = _mm_loadu_si128((const __m128i*)(c + offset));
1110
1111# ifdef __AVX2__
1112 // expand Latin 1 data via zero extension
1113 __m256i ldata = _mm256_cvtepu8_epi16(chunk);
1114
1115 // load UTF-16 data and compare
1116 __m256i ucdata = _mm256_loadu_si256((const __m256i*)(uc + offset));
1117 __m256i result = _mm256_cmpeq_epi16(ldata, ucdata);
1118
1119 uint mask = ~_mm256_movemask_epi8(result);
1120# else
1121 // expand via unpacking
1122 __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask);
1123 __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
1124
1125 // load UTF-16 data and compare
1126 __m128i ucdata1 = _mm_loadu_si128((const __m128i*)(uc + offset));
1127 __m128i ucdata2 = _mm_loadu_si128((const __m128i*)(uc + offset + 8));
1128 __m128i result1 = _mm_cmpeq_epi16(firstHalf, ucdata1);
1129 __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2);
1130
1131 uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16);
1132# endif
1133 if (mask) {
1134 // found a different character
1135 uint idx = qCountTrailingZeroBits(mask);
1136 return uc[offset + idx / 2] - c[offset + idx / 2];
1137 }
1138 }
1139
1140# if !defined(__OPTIMIZE_SIZE__)
1141 // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
1142 if (uc + offset + 7 < e) {
1143 // same, but we're using an 8-byte load
1144 __m128i secondHalf = mm_load8_zero_extend(c + offset);
1145
1146 __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));
1147 if (isDifferent(ucdata, secondHalf))
1148 return retval;
1149
1150 // still matched
1151 offset += 8;
1152 }
1153
1154 enum { MaxTailLength = 3 };
1155 // we'll read uc[offset..offset+3] (8 bytes) and c[offset..offset+3] (4 bytes)
1156 if (uc + offset + 3 < e) {
1157 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<int>(c + offset));
1158 __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
1159
1160 __m128i ucdata = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(uc + offset));
1161 if (isDifferent(ucdata, secondHalf))
1162 return retval;
1163
1164 // still matched
1165 offset += 4;
1166 }
1167# endif // optimize size
1168
1169 // reset uc and c
1170 uc += offset;
1171 c += offset;
1172
1173# if !defined(__OPTIMIZE_SIZE__)
1174 const auto lambda = [=](size_t i) { return uc[i] - char16_t(c[i]); };
1175 return UnrollTailLoop<MaxTailLength>::exec(e - uc, 0, lambda, lambda);
1176# endif
1177#endif
1178
1179 while (uc < e) {
1180 int diff = *uc - *c;
1181 if (diff)
1182 return diff;
1183 uc++, c++;
1184 }
1185
1186 return 0;
1187}
1188
1189constexpr int lencmp(qsizetype lhs, qsizetype rhs) noexcept
1190{
1191 return lhs == rhs ? 0 :
1192 lhs > rhs ? 1 :
1193 /* else */ -1 ;
1194}
1195
1196// Unicode case-sensitive comparison
1197static int ucstrcmp(const QChar *a, size_t alen, const QChar *b, size_t blen)
1198{
1199 if (a == b && alen == blen)
1200 return 0;
1201 const size_t l = qMin(alen, blen);
1202 int cmp = ucstrncmp(a, b, l);
1203 return cmp ? cmp : lencmp(alen, blen);
1204}
1205
1206static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen)
1207{
1208 const size_t l = qMin(alen, blen);
1209 const int cmp = ucstrncmp(a, reinterpret_cast<const uchar*>(b), l);
1210 return cmp ? cmp : lencmp(alen, blen);
1211}
1212
1213static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1214{
1215 constexpr uchar latin1Lower[256] = {
1216 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
1217 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
1218 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
1219 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
1220 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
1222 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
1223 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
1224 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
1226 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
1227 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
1228 // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
1229 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
1230 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
1231 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
1232 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
1233 };
1234 // We're called with QLatin1String's .data() and .size():
1235 Q_ASSERT(lSize >= 0 && rSize >= 0);
1236 if (!lSize)
1237 return rSize ? -1 : 0;
1238 if (!rSize)
1239 return 1;
1240 const qsizetype size = std::min(lSize, rSize);
1241
1242 const uchar *lhs = reinterpret_cast<const uchar *>(lhsChar);
1243 const uchar *rhs = reinterpret_cast<const uchar *>(rhsChar);
1244 Q_ASSERT(lhs && rhs); // since both lSize and rSize are positive
1245 for (qsizetype i = 0; i < size; i++) {
1246 Q_ASSERT(lhs[i] && rhs[i]);
1247 if (int res = latin1Lower[lhs[i]] - latin1Lower[rhs[i]])
1248 return res;
1249 }
1250 return lencmp(lSize, rSize);
1251}
1252bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1253{
1254 return ucstrcmp(lhs.begin(), lhs.size(), rhs.begin(), rhs.size()) == 0;
1255}
1256
1257bool QtPrivate::equalStrings(QStringView lhs, QLatin1String rhs) noexcept
1258{
1259 return ucstrcmp(lhs.begin(), lhs.size(), rhs.begin(), rhs.size()) == 0;
1260}
1261
1262bool QtPrivate::equalStrings(QLatin1String lhs, QStringView rhs) noexcept
1263{
1264 return QtPrivate::equalStrings(rhs, lhs);
1265}
1266
1267bool QtPrivate::equalStrings(QLatin1String lhs, QLatin1String rhs) noexcept
1268{
1269 return lhs.size() == rhs.size() && (!lhs.size() || qstrncmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1270}
1271
1272bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1273{
1274 return QUtf8::compareUtf8(lhs, rhs) == 0;
1275}
1276
1277bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1278{
1279 return QtPrivate::equalStrings(rhs, lhs);
1280}
1281
1282bool QtPrivate::equalStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs) noexcept
1283{
1284 QString r = rhs.toString();
1285 return QtPrivate::equalStrings(lhs, r); // ### optimize!
1286}
1287
1288bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs) noexcept
1289{
1290 return QtPrivate::equalStrings(rhs, lhs);
1291}
1292
1293bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1294{
1295 return lhs.size() == rhs.size() && (!lhs.size() || qstrncmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1296}
1297
1298bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1299{
1300 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1301 return false;
1302 return lhs.visit([rhs](auto lhs) {
1303 return rhs.visit([lhs](auto rhs) {
1304 return QtPrivate::equalStrings(lhs, rhs);
1305 });
1306 });
1307}
1308
1309/*!
1310 \relates QStringView
1311 \internal
1312 \since 5.10
1313
1314 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1315
1316 If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
1317 otherwise the comparison is case-insensitive.
1318
1319 Case-sensitive comparison is based exclusively on the numeric Unicode values
1320 of the characters and is very fast, but is not what a human would expect.
1321 Consider sorting user-visible strings with QString::localeAwareCompare().
1322
1323 \sa {Comparing Strings}
1324*/
1325int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1326{
1327 if (cs == Qt::CaseSensitive)
1328 return ucstrcmp(lhs.begin(), lhs.size(), rhs.begin(), rhs.size());
1329 return ucstricmp(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1330}
1331
1332/*!
1333 \relates QStringView
1334 \internal
1335 \since 5.10
1336 \overload
1337
1338 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1339
1340 If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
1341 otherwise the comparison is case-insensitive.
1342
1343 Case-sensitive comparison is based exclusively on the numeric Unicode values
1344 of the characters and is very fast, but is not what a human would expect.
1345 Consider sorting user-visible strings with QString::localeAwareCompare().
1346
1347 \sa {Comparing Strings}
1348*/
1349int QtPrivate::compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept
1350{
1351 if (cs == Qt::CaseSensitive)
1352 return ucstrcmp(lhs.begin(), lhs.size(), rhs.begin(), rhs.size());
1353 return ucstricmp(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1354}
1355
1356/*!
1357 \relates QStringView
1358 \internal
1359 \since 6.0
1360 \overload
1361*/
1362int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1363{
1364 return -compareStrings(rhs, lhs, cs);
1365}
1366
1367/*!
1368 \relates QStringView
1369 \internal
1370 \since 5.10
1371 \overload
1372*/
1373int QtPrivate::compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1374{
1375 return -compareStrings(rhs, lhs, cs);
1376}
1377
1378/*!
1379 \relates QStringView
1380 \internal
1381 \since 5.10
1382 \overload
1383
1384 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1385
1386 If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
1387 otherwise the comparison is case-insensitive.
1388
1389 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1390 of the characters and is very fast, but is not what a human would expect.
1391 Consider sorting user-visible strings with QString::localeAwareCompare().
1392
1393 \sa {Comparing Strings}
1394*/
1395int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept
1396{
1397 if (lhs.isEmpty())
1398 return lencmp(qsizetype(0), rhs.size());
1399 if (cs == Qt::CaseInsensitive)
1400 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1401 const auto l = std::min(lhs.size(), rhs.size());
1402 int r = qstrncmp(lhs.data(), rhs.data(), l);
1403 return r ? r : lencmp(lhs.size(), rhs.size());
1404}
1405
1406/*!
1407 \relates QStringView
1408 \internal
1409 \since 6.0
1410 \overload
1411*/
1412int QtPrivate::compareStrings(QLatin1String lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1413{
1414 return compareStrings(lhs, rhs.toString(), cs); // ### optimize!
1415}
1416
1417/*!
1418 \relates QStringView
1419 \internal
1420 \since 6.0
1421 \overload
1422*/
1423int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1424{
1425 if (cs == Qt::CaseSensitive)
1426 return QUtf8::compareUtf8(lhs, rhs);
1427 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1428}
1429
1430/*!
1431 \relates QStringView
1432 \internal
1433 \since 6.0
1434 \overload
1435*/
1436int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1String rhs, Qt::CaseSensitivity cs) noexcept
1437{
1438 return -compareStrings(rhs, lhs, cs);
1439}
1440
1441/*!
1442 \relates QStringView
1443 \internal
1444 \since 6.0
1445 \overload
1446*/
1447int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1448{
1449 if (lhs.isEmpty())
1450 return lencmp(0, rhs.size());
1451 if (cs == Qt::CaseInsensitive)
1452 return compareStrings(lhs.toString(), rhs.toString(), cs); // ### optimize!
1453 const auto l = std::min(lhs.size(), rhs.size());
1454 int r = qstrncmp(lhs.data(), rhs.data(), l);
1455 return r ? r : lencmp(lhs.size(), rhs.size());
1456}
1457
1458int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1459{
1460 return lhs.visit([rhs, cs](auto lhs) {
1461 return rhs.visit([lhs, cs](auto rhs) {
1462 return QtPrivate::compareStrings(lhs, rhs, cs);
1463 });
1464 });
1465}
1466
1467#define REHASH(a) \
1468 if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
1469 hashHaystack -= std::size_t(a) << sl_minus_1; \
1470 hashHaystack <<= 1
1471
1472inline bool qIsUpper(char ch)
1473{
1474 return ch >= 'A' && ch <= 'Z';
1475}
1476
1477inline bool qIsDigit(char ch)
1478{
1479 return ch >= '0' && ch <= '9';
1480}
1481
1482inline char qToLower(char ch)
1483{
1484 if (ch >= 'A' && ch <= 'Z')
1485 return ch - 'A' + 'a';
1486 else
1487 return ch;
1488}
1489
1490
1491/*!
1492 \macro QT_RESTRICTED_CAST_FROM_ASCII
1493 \relates QString
1494
1495 Defining this macro disables most automatic conversions from source
1496 literals and 8-bit data to unicode QStrings, but allows the use of
1497 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1498 and the \c{QString::operator=(const char (&ch)[N])} assignment operator
1499 giving most of the type-safety benefits of \c QT_NO_CAST_FROM_ASCII
1500 but does not require user code to wrap character and string literals
1501 with QLatin1Char, QLatin1String or similar.
1502
1503 Using this macro together with source strings outside the 7-bit range,
1504 non-literals, or literals with embedded NUL characters is undefined.
1505
1506 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1507*/
1508
1509/*!
1510 \macro QT_NO_CAST_FROM_ASCII
1511 \relates QString
1512
1513 Disables automatic conversions from 8-bit strings (char *) to unicode QStrings
1514
1515 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII, QT_NO_CAST_FROM_BYTEARRAY
1516*/
1517
1518/*!
1519 \macro QT_NO_CAST_TO_ASCII
1520 \relates QString
1521
1522 disables automatic conversion from QString to 8-bit strings (char *)
1523
1524 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII, QT_NO_CAST_FROM_BYTEARRAY
1525*/
1526
1527/*!
1528 \macro QT_ASCII_CAST_WARNINGS
1529 \internal
1530 \relates QString
1531
1532 This macro can be defined to force a warning whenever a function is
1533 called that automatically converts between unicode and 8-bit encodings.
1534
1535 Note: This only works for compilers that support warnings for
1536 deprecated API.
1537
1538 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1539*/
1540
1541/*!
1542 \class QString
1543 \inmodule QtCore
1544 \reentrant
1545
1546 \brief The QString class provides a Unicode character string.
1547
1548 \ingroup tools
1549 \ingroup shared
1550 \ingroup string-processing
1551
1552 QString stores a string of 16-bit \l{QChar}s, where each QChar
1553 corresponds to one UTF-16 code unit. (Unicode characters
1554 with code values above 65535 are stored using surrogate pairs,
1555 i.e., two consecutive \l{QChar}s.)
1556
1557 \l{Unicode} is an international standard that supports most of the
1558 writing systems in use today. It is a superset of US-ASCII (ANSI
1559 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1560 characters are available at the same code positions.
1561
1562 Behind the scenes, QString uses \l{implicit sharing}
1563 (copy-on-write) to reduce memory usage and to avoid the needless
1564 copying of data. This also helps reduce the inherent overhead of
1565 storing 16-bit characters instead of 8-bit characters.
1566
1567 In addition to QString, Qt also provides the QByteArray class to
1568 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1569 For most purposes, QString is the class you want to use. It is
1570 used throughout the Qt API, and the Unicode support ensures that
1571 your applications will be easy to translate if you want to expand
1572 your application's market at some point. The two main cases where
1573 QByteArray is appropriate are when you need to store raw binary
1574 data, and when memory conservation is critical (like in embedded
1575 systems).
1576
1577 \tableofcontents
1578
1579 \section1 Initializing a String
1580
1581 One way to initialize a QString is simply to pass a \c{const char
1582 *} to its constructor. For example, the following code creates a
1583 QString of size 5 containing the data "Hello":
1584
1585 \snippet qstring/main.cpp 0
1586
1587 QString converts the \c{const char *} data into Unicode using the
1588 fromUtf8() function.
1589
1590 In all of the QString functions that take \c{const char *}
1591 parameters, the \c{const char *} is interpreted as a classic
1592 C-style '\\0'-terminated string encoded in UTF-8. It is legal for
1593 the \c{const char *} parameter to be \nullptr.
1594
1595 You can also provide string data as an array of \l{QChar}s:
1596
1597 \snippet qstring/main.cpp 1
1598
1599 QString makes a deep copy of the QChar data, so you can modify it
1600 later without experiencing side effects. (If for performance
1601 reasons you don't want to take a deep copy of the character data,
1602 use QString::fromRawData() instead.)
1603
1604 Another approach is to set the size of the string using resize()
1605 and to initialize the data character per character. QString uses
1606 0-based indexes, just like C++ arrays. To access the character at
1607 a particular index position, you can use \l operator[](). On
1608 non-const strings, \l operator[]() returns a reference to a
1609 character that can be used on the left side of an assignment. For
1610 example:
1611
1612 \snippet qstring/main.cpp 2
1613
1614 For read-only access, an alternative syntax is to use the at()
1615 function:
1616
1617 \snippet qstring/main.cpp 3
1618
1619 The at() function can be faster than \l operator[](), because it
1620 never causes a \l{deep copy} to occur. Alternatively, use the
1621 left(), right(), or mid() functions to extract several characters
1622 at a time.
1623
1624 A QString can embed '\\0' characters (QChar::Null). The size()
1625 function always returns the size of the whole string, including
1626 embedded '\\0' characters.
1627
1628 After a call to the resize() function, newly allocated characters
1629 have undefined values. To set all the characters in the string to
1630 a particular value, use the fill() function.
1631
1632 QString provides dozens of overloads designed to simplify string
1633 usage. For example, if you want to compare a QString with a string
1634 literal, you can write code like this and it will work as expected:
1635
1636 \snippet qstring/main.cpp 4
1637
1638 You can also pass string literals to functions that take QStrings
1639 as arguments, invoking the QString(const char *)
1640 constructor. Similarly, you can pass a QString to a function that
1641 takes a \c{const char *} argument using the \l qPrintable() macro
1642 which returns the given QString as a \c{const char *}. This is
1643 equivalent to calling <QString>.toLocal8Bit().constData().
1644
1645 \section1 Manipulating String Data
1646
1647 QString provides the following basic functions for modifying the
1648 character data: append(), prepend(), insert(), replace(), and
1649 remove(). For example:
1650
1651 \snippet qstring/main.cpp 5
1652
1653 If you are building a QString gradually and know in advance
1654 approximately how many characters the QString will contain, you
1655 can call reserve(), asking QString to preallocate a certain amount
1656 of memory. You can also call capacity() to find out how much
1657 memory QString actually allocated.
1658
1659 The replace() and remove() functions' first two arguments are the
1660 position from which to start erasing and the number of characters
1661 that should be erased. If you want to replace all occurrences of
1662 a particular substring with another, use one of the two-parameter
1663 replace() overloads.
1664
1665 A frequent requirement is to remove whitespace characters from a
1666 string ('\\n', '\\t', ' ', etc.). If you want to remove whitespace
1667 from both ends of a QString, use the trimmed() function. If you
1668 want to remove whitespace from both ends and replace multiple
1669 consecutive whitespaces with a single space character within the
1670 string, use simplified().
1671
1672 If you want to find all occurrences of a particular character or
1673 substring in a QString, use the indexOf() or lastIndexOf()
1674 functions. The former searches forward starting from a given index
1675 position, the latter searches backward. Both return the index
1676 position of the character or substring if they find it; otherwise,
1677 they return -1. For example, here is a typical loop that finds all
1678 occurrences of a particular substring:
1679
1680 \snippet qstring/main.cpp 6
1681
1682 QString provides many functions for converting numbers into
1683 strings and strings into numbers. See the arg() functions, the
1684 setNum() functions, the number() static functions, and the
1685 toInt(), toDouble(), and similar functions.
1686
1687 To get an upper- or lowercase version of a string use toUpper() or
1688 toLower().
1689
1690 Lists of strings are handled by the QStringList class. You can
1691 split a string into a list of strings using the split() function,
1692 and join a list of strings into a single string with an optional
1693 separator using QStringList::join(). You can obtain a list of
1694 strings from a string list that contain a particular substring or
1695 that match a particular QRegularExpression using the QStringList::filter()
1696 function.
1697
1698 \section1 Querying String Data
1699
1700 If you want to see if a QString starts or ends with a particular
1701 substring use startsWith() or endsWith(). If you simply want to
1702 check whether a QString contains a particular character or
1703 substring, use the contains() function. If you want to find out
1704 how many times a particular character or substring occurs in the
1705 string, use count().
1706
1707 To obtain a pointer to the actual character data, call data() or
1708 constData(). These functions return a pointer to the beginning of
1709 the QChar data. The pointer is guaranteed to remain valid until a
1710 non-const function is called on the QString.
1711
1712 \section2 Comparing Strings
1713
1714 QStrings can be compared using overloaded operators such as \l
1715 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1716 and so on. Note that the comparison is based exclusively on the
1717 numeric Unicode values of the characters. It is very fast, but is
1718 not what a human would expect; the QString::localeAwareCompare()
1719 function is usually a better choice for sorting user-interface
1720 strings, when such a comparison is available.
1721
1722 On Unix-like platforms (including Linux, \macos and iOS), when Qt
1723 is linked with the ICU library (which it usually is), its
1724 locale-aware sorting is used. Otherwise, on \macos and iOS, \l
1725 localeAwareCompare() compares according the "Order for sorted
1726 lists" setting in the International preferences panel. On other
1727 Unix-like systems without ICU, the comparison falls back to the
1728 system library's \c strcoll(),
1729
1730 \section1 Converting Between encoded strings data and QString
1731
1732 QString provides the following three functions that return a
1733 \c{const char *} version of the string as QByteArray: toUtf8(),
1734 toLatin1(), and toLocal8Bit().
1735
1736 \list
1737 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1738 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1739 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1740 Unicode character set through multibyte sequences.
1741 \li toLocal8Bit() returns an 8-bit string using the system's local
1742 encoding. This is the same as toUtf8() on Unix systems.
1743 \endlist
1744
1745 To convert from one of these encodings, QString provides
1746 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1747 encodings are supported through the QStringEncoder and QStringDecoder
1748 classes.
1749
1750 As mentioned above, QString provides a lot of functions and
1751 operators that make it easy to interoperate with \c{const char *}
1752 strings. But this functionality is a double-edged sword: It makes
1753 QString more convenient to use if all strings are US-ASCII or
1754 Latin-1, but there is always the risk that an implicit conversion
1755 from or to \c{const char *} is done using the wrong 8-bit
1756 encoding. To minimize these risks, you can turn off these implicit
1757 conversions by defining the following two preprocessor symbols:
1758
1759 \list
1760 \li \c QT_NO_CAST_FROM_ASCII disables automatic conversions from
1761 C string literals and pointers to Unicode.
1762 \li \c QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1763 from C characters and character arrays, but disables automatic
1764 conversions from character pointers to Unicode.
1765 \li \c QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1766 to C strings.
1767 \endlist
1768
1769 One way to define these preprocessor symbols globally for your
1770 application is to add the following entry to your \l {Creating Project Files}{qmake project file}:
1771
1772 \snippet code/src_corelib_text_qstring.cpp 0
1773
1774 You then need to explicitly call fromUtf8(), fromLatin1(),
1775 or fromLocal8Bit() to construct a QString from an
1776 8-bit string, or use the lightweight QLatin1String class, for
1777 example:
1778
1779 \snippet code/src_corelib_text_qstring.cpp 1
1780
1781 Similarly, you must call toLatin1(), toUtf8(), or
1782 toLocal8Bit() explicitly to convert the QString to an 8-bit
1783 string.
1784
1785 \table 100 %
1786 \header
1787 \li Note for C Programmers
1788
1789 \row
1790 \li
1791 Due to C++'s type system and the fact that QString is
1792 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1793 other basic types. For example:
1794
1795 \snippet qstring/main.cpp 7
1796
1797 The \c result variable, is a normal variable allocated on the
1798 stack. When \c return is called, and because we're returning by
1799 value, the copy constructor is called and a copy of the string is
1800 returned. No actual copying takes place thanks to the implicit
1801 sharing.
1802
1803 \endtable
1804
1805 \section1 Distinction Between Null and Empty Strings
1806
1807 For historical reasons, QString distinguishes between a null
1808 string and an empty string. A \e null string is a string that is
1809 initialized using QString's default constructor or by passing
1810 (const char *)0 to the constructor. An \e empty string is any
1811 string with size 0. A null string is always empty, but an empty
1812 string isn't necessarily null:
1813
1814 \snippet qstring/main.cpp 8
1815
1816 All functions except isNull() treat null strings the same as empty
1817 strings. For example, toUtf8().constData() returns a valid pointer
1818 (\e not nullptr) to a '\\0' character for a null string. We
1819 recommend that you always use the isEmpty() function and avoid isNull().
1820
1821 \section1 Argument Formats
1822
1823 In member functions where an argument \e format can be specified
1824 (e.g., arg(), number()), the argument \e format can be one of the
1825 following:
1826
1827 \table
1828 \header \li Format \li Meaning
1829 \row \li \c e \li format as [-]9.9e[+|-]999
1830 \row \li \c E \li format as [-]9.9E[+|-]999
1831 \row \li \c f \li format as [-]9.9
1832 \row \li \c g \li use \c e or \c f format, whichever is the most concise
1833 \row \li \c G \li use \c E or \c f format, whichever is the most concise
1834 \endtable
1835
1836 A \e precision is also specified with the argument \e format. For
1837 the 'e', 'E', and 'f' formats, the \e precision represents the
1838 number of digits \e after the decimal point. For the 'g' and 'G'
1839 formats, the \e precision represents the maximum number of
1840 significant digits (trailing zeroes are omitted).
1841
1842 \section1 More Efficient String Construction
1843
1844 Many strings are known at compile time. But the trivial
1845 constructor QString("Hello"), will copy the contents of the string,
1846 treating the contents as Latin-1. To avoid this one can use the
1847 QStringLiteral macro to directly create the required data at compile
1848 time. Constructing a QString out of the literal does then not cause
1849 any overhead at runtime.
1850
1851 A slightly less efficient way is to use QLatin1String. This class wraps
1852 a C string literal, precalculates it length at compile time and can
1853 then be used for faster comparison with QStrings and conversion to
1854 QStrings than a regular C string literal.
1855
1856 Using the QString \c{'+'} operator, it is easy to construct a
1857 complex string from multiple substrings. You will often write code
1858 like this:
1859
1860 \snippet qstring/stringbuilder.cpp 0
1861
1862 There is nothing wrong with either of these string constructions,
1863 but there are a few hidden inefficiencies. Beginning with Qt 4.6,
1864 you can eliminate them.
1865
1866 First, multiple uses of the \c{'+'} operator usually means
1867 multiple memory allocations. When concatenating \e{n} substrings,
1868 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
1869 memory allocator.
1870
1871 In 4.6, an internal template class \c{QStringBuilder} has been
1872 added along with a few helper functions. This class is marked
1873 internal and does not appear in the documentation, because you
1874 aren't meant to instantiate it in your code. Its use will be
1875 automatic, as described below. The class is found in
1876 \c {src/corelib/tools/qstringbuilder.cpp} if you want to have a
1877 look at it.
1878
1879 \c{QStringBuilder} uses expression templates and reimplements the
1880 \c{'%'} operator so that when you use \c{'%'} for string
1881 concatenation instead of \c{'+'}, multiple substring
1882 concatenations will be postponed until the final result is about
1883 to be assigned to a QString. At this point, the amount of memory
1884 required for the final result is known. The memory allocator is
1885 then called \e{once} to get the required space, and the substrings
1886 are copied into it one by one.
1887
1888 Additional efficiency is gained by inlining and reduced reference
1889 counting (the QString created from a \c{QStringBuilder} typically
1890 has a ref count of 1, whereas QString::append() needs an extra
1891 test).
1892
1893 There are two ways you can access this improved method of string
1894 construction. The straightforward way is to include
1895 \c{QStringBuilder} wherever you want to use it, and use the
1896 \c{'%'} operator instead of \c{'+'} when concatenating strings:
1897
1898 \snippet qstring/stringbuilder.cpp 5
1899
1900 A more global approach which is the most convenient but
1901 not entirely source compatible, is to this define in your
1902 .pro file:
1903
1904 \snippet qstring/stringbuilder.cpp 3
1905
1906 and the \c{'+'} will automatically be performed as the
1907 \c{QStringBuilder} \c{'%'} everywhere.
1908
1909 \section1 Maximum size and out-of-memory conditions
1910
1911 In case memory allocation fails, QString will throw a \c std::bad_alloc
1912 exception. Out of memory conditions in the Qt containers are the only case
1913 where Qt will throw exceptions.
1914
1915 Note that the operating system may impose further limits on applications
1916 holding a lot of allocated memory, especially large, contiguous blocks.
1917 Such considerations, the configuration of such behavior or any mitigation
1918 are outside the scope of the Qt API.
1919
1920 \sa fromRawData(), QChar, QLatin1String, QByteArray
1921*/
1922
1923/*! \typedef QString::ConstIterator
1924
1925 Qt-style synonym for QString::const_iterator.
1926*/
1927
1928/*! \typedef QString::Iterator
1929
1930 Qt-style synonym for QString::iterator.
1931*/
1932
1933/*! \typedef QString::const_iterator
1934
1935 \sa QString::iterator
1936*/
1937
1938/*! \typedef QString::iterator
1939
1940 \sa QString::const_iterator
1941*/
1942
1943/*! \typedef QString::const_reverse_iterator
1944 \since 5.6
1945
1946 \sa QString::reverse_iterator, QString::const_iterator
1947*/
1948
1949/*! \typedef QString::reverse_iterator
1950 \since 5.6
1951
1952 \sa QString::const_reverse_iterator, QString::iterator
1953*/
1954
1955/*!
1956 \typedef QString::size_type
1957*/
1958
1959/*!
1960 \typedef QString::difference_type
1961*/
1962
1963/*!
1964 \typedef QString::const_reference
1965*/
1966/*!
1967 \typedef QString::reference
1968*/
1969
1970/*!
1971 \typedef QString::const_pointer
1972
1973 The QString::const_pointer typedef provides an STL-style
1974 const pointer to a QString element (QChar).
1975*/
1976/*!
1977 \typedef QString::pointer
1978
1979 The QString::const_pointer typedef provides an STL-style
1980 pointer to a QString element (QChar).
1981*/
1982
1983/*!
1984 \typedef QString::value_type
1985*/
1986
1987/*! \fn QString::iterator QString::begin()
1988
1989 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the first character in
1990 the string.
1991
1992//! [iterator-invalidation-func-desc]
1993 \warning The returned iterator is invalidated on detachment or when the
1994 QString is modified.
1995//! [iterator-invalidation-func-desc]
1996
1997 \sa constBegin(), end()
1998*/
1999
2000/*! \fn QString::const_iterator QString::begin() const
2001
2002 \overload begin()
2003*/
2004
2005/*! \fn QString::const_iterator QString::cbegin() const
2006 \since 5.0
2007
2008 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first character
2009 in the string.
2010
2011 \include qstring.cpp iterator-invalidation-func-desc
2012
2013 \sa begin(), cend()
2014*/
2015
2016/*! \fn QString::const_iterator QString::constBegin() const
2017
2018 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first character
2019 in the string.
2020
2021 \include qstring.cpp iterator-invalidation-func-desc
2022
2023 \sa begin(), constEnd()
2024*/
2025
2026/*! \fn QString::iterator QString::end()
2027
2028 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the imaginary character
2029 after the last character in the string.
2030
2031 \include qstring.cpp iterator-invalidation-func-desc
2032
2033 \sa begin(), constEnd()
2034*/
2035
2036/*! \fn QString::const_iterator QString::end() const
2037
2038 \overload end()
2039*/
2040
2041/*! \fn QString::const_iterator QString::cend() const
2042 \since 5.0
2043
2044 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary
2045 character after the last character in the list.
2046
2047 \include qstring.cpp iterator-invalidation-func-desc
2048
2049 \sa cbegin(), end()
2050*/
2051
2052/*! \fn QString::const_iterator QString::constEnd() const
2053
2054 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary
2055 character after the last character in the list.
2056
2057 \include qstring.cpp iterator-invalidation-func-desc
2058
2059 \sa constBegin(), end()
2060*/
2061
2062/*! \fn QString::reverse_iterator QString::rbegin()
2063 \since 5.6
2064
2065 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to the first
2066 character in the string, in reverse order.
2067
2068 \include qstring.cpp iterator-invalidation-func-desc
2069
2070 \sa begin(), crbegin(), rend()
2071*/
2072
2073/*! \fn QString::const_reverse_iterator QString::rbegin() const
2074 \since 5.6
2075 \overload
2076*/
2077
2078/*! \fn QString::const_reverse_iterator QString::crbegin() const
2079 \since 5.6
2080
2081 Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to the first
2082 character in the string, in reverse order.
2083
2084 \include qstring.cpp iterator-invalidation-func-desc
2085
2086 \sa begin(), rbegin(), rend()
2087*/
2088
2089/*! \fn QString::reverse_iterator QString::rend()
2090 \since 5.6
2091
2092 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to one past
2093 the last character in the string, in reverse order.
2094
2095 \include qstring.cpp iterator-invalidation-func-desc
2096
2097 \sa end(), crend(), rbegin()
2098*/
2099
2100/*! \fn QString::const_reverse_iterator QString::rend() const
2101 \since 5.6
2102 \overload
2103*/
2104
2105/*! \fn QString::const_reverse_iterator QString::crend() const
2106 \since 5.6
2107
2108 Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to one
2109 past the last character in the string, in reverse order.
2110
2111 \include qstring.cpp iterator-invalidation-func-desc
2112
2113 \sa end(), rend(), rbegin()
2114*/
2115
2116/*!
2117 \fn QString::QString()
2118
2119 Constructs a null string. Null strings are also empty.
2120
2121 \sa isEmpty()
2122*/
2123
2124/*!
2125 \fn QString::QString(QString &&other)
2126
2127 Move-constructs a QString instance, making it point at the same
2128 object that \a other was pointing to.
2129
2130 \since 5.2
2131*/
2132
2133/*! \fn QString::QString(const char *str)
2134
2135 Constructs a string initialized with the 8-bit string \a str. The
2136 given const char pointer is converted to Unicode using the
2137 fromUtf8() function.
2138
2139 You can disable this constructor by defining \c
2140 QT_NO_CAST_FROM_ASCII when you compile your applications. This
2141 can be useful if you want to ensure that all user-visible strings
2142 go through QObject::tr(), for example.
2143
2144 \note Defining \c QT_RESTRICTED_CAST_FROM_ASCII also disables
2145 this constructor, but enables a \c{QString(const char (&ch)[N])}
2146 constructor instead. Using non-literal input, or input with
2147 embedded NUL characters, or non-7-bit characters is undefined
2148 in this case.
2149
2150 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
2151*/
2152
2153/*! \fn QString QString::fromStdString(const std::string &str)
2154
2155 Returns a copy of the \a str string. The given string is converted
2156 to Unicode using the fromUtf8() function.
2157
2158 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2159*/
2160
2161/*! \fn QString QString::fromStdWString(const std::wstring &str)
2162
2163 Returns a copy of the \a str string. The given string is assumed
2164 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2165 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2166 systems).
2167
2168 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(), fromStdU16String(), fromStdU32String()
2169*/
2170
2171/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2172 \since 4.2
2173
2174 Returns a copy of the \a string, where the encoding of \a string depends on
2175 the size of wchar. If wchar is 4 bytes, the \a string is interpreted as UCS-4,
2176 if wchar is 2 bytes it is interpreted as UTF-16.
2177
2178 If \a size is -1 (default), the \a string has to be \\0'-terminated.
2179
2180 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(), fromStdWString()
2181*/
2182
2183/*! \fn std::wstring QString::toStdWString() const
2184
2185 Returns a std::wstring object with the data contained in this
2186 QString. The std::wstring is encoded in utf16 on platforms where
2187 wchar_t is 2 bytes wide (e.g. windows) and in ucs4 on platforms
2188 where wchar_t is 4 bytes wide (most Unix systems).
2189
2190 This method is mostly useful to pass a QString to a function
2191 that accepts a std::wstring object.
2192
2193 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(), toStdU32String()
2194*/
2195
2196qsizetype QString::toUcs4_helper(const ushort *uc, qsizetype length, uint *out)
2197{
2198 qsizetype count = 0;
2199
2200 QStringIterator i(QStringView(uc, length));
2201 while (i.hasNext())
2202 out[count++] = i.next();
2203
2204 return count;
2205}
2206
2207/*! \fn QString::toWCharArray(wchar_t *array) const
2208 \since 4.2
2209
2210 Fills the \a array with the data contained in this QString object.
2211 The array is encoded in UTF-16 on platforms where
2212 wchar_t is 2 bytes wide (e.g. windows) and in UCS-4 on platforms
2213 where wchar_t is 4 bytes wide (most Unix systems).
2214
2215 \a array has to be allocated by the caller and contain enough space to
2216 hold the complete string (allocating the array with the same length as the
2217 string is always sufficient).
2218
2219 This function returns the actual length of the string in \a array.
2220
2221 \note This function does not append a null character to the array.
2222
2223 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(), QStringView::toWCharArray()
2224*/
2225
2226/*! \fn QString::QString(const QString &other)
2227
2228 Constructs a copy of \a other.
2229
2230 This operation takes \l{constant time}, because QString is
2231 \l{implicitly shared}. This makes returning a QString from a
2232 function very fast. If a shared instance is modified, it will be
2233 copied (copy-on-write), and that takes \l{linear time}.
2234
2235 \sa operator=()
2236*/
2237
2238/*!
2239 Constructs a string initialized with the first \a size characters
2240 of the QChar array \a unicode.
2241
2242 If \a unicode is 0, a null string is constructed.
2243
2244 If \a size is negative, \a unicode is assumed to point to a \\0'-terminated
2245 array and its length is determined dynamically. The terminating
2246 null character is not considered part of the string.
2247
2248 QString makes a deep copy of the string data. The unicode data is copied as
2249 is and the Byte Order Mark is preserved if present.
2250
2251 \sa fromRawData()
2252*/
2253QString::QString(const QChar *unicode, qsizetype size)
2254{
2255 if (!unicode) {
2256 d.clear();
2257 } else {
2258 if (size < 0) {
2259 size = 0;
2260 while (!unicode[size].isNull())
2261 ++size;
2262 }
2263 if (!size) {
2264 d = DataPointer::fromRawData(&_empty, 0);
2265 } else {
2266 d = DataPointer(Data::allocate(size), size);
2267 memcpy(d.data(), unicode, size * sizeof(QChar));
2268 d.data()[size] = '\0';
2269 }
2270 }
2271}
2272
2273/*!
2274 Constructs a string of the given \a size with every character set
2275 to \a ch.
2276
2277 \sa fill()
2278*/
2279QString::QString(qsizetype size, QChar ch)
2280{
2281 if (size <= 0) {
2282 d = DataPointer::fromRawData(&_empty, 0);
2283 } else {
2284 d = DataPointer(Data::allocate(size), size);
2285 d.data()[size] = '\0';
2286 char16_t *i = d.data() + size;
2287 char16_t *b = d.data();
2288 const char16_t value = ch.unicode();
2289 while (i != b)
2290 *--i = value;
2291 }
2292}
2293
2294/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2295 \internal
2296
2297 Constructs a string of the given \a size without initializing the
2298 characters. This is only used in \c QStringBuilder::toString().
2299*/
2300QString::QString(qsizetype size, Qt::Initialization)
2301{
2302 if (size <= 0) {
2303 d = DataPointer::fromRawData(&_empty, 0);
2304 } else {
2305 d = DataPointer(Data::allocate(size), size);
2306 d.data()[size] = '\0';
2307 }
2308}
2309
2310/*! \fn QString::QString(QLatin1String str)
2311
2312 Constructs a copy of the Latin-1 string \a str.
2313
2314 \sa fromLatin1()
2315*/
2316
2317/*!
2318 Constructs a string of size 1 containing the character \a ch.
2319*/
2320QString::QString(QChar ch)
2321{
2322 d = DataPointer(Data::allocate(1), 1);
2323 d.data()[0] = ch.unicode();
2324 d.data()[1] = '\0';
2325}
2326
2327/*! \fn QString::QString(const QByteArray &ba)
2328
2329 Constructs a string initialized with the byte array \a ba. The
2330 given byte array is converted to Unicode using fromUtf8(). Stops
2331 copying at the first 0 character, otherwise copies the entire byte
2332 array.
2333
2334 You can disable this constructor by defining \c
2335 QT_NO_CAST_FROM_ASCII when you compile your applications. This
2336 can be useful if you want to ensure that all user-visible strings
2337 go through QObject::tr(), for example.
2338
2339 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QT_NO_CAST_FROM_ASCII
2340*/
2341
2342/*! \fn QString::QString(const Null &)
2343 \internal
2344*/
2345
2346/*! \fn QString::QString(QStringPrivate)
2347 \internal
2348*/
2349
2350/*! \fn QString &QString::operator=(const QString::Null &)
2351 \internal
2352*/
2353
2354/*!
2355 \fn QString::~QString()
2356
2357 Destroys the string.
2358*/
2359
2360
2361/*! \fn void QString::swap(QString &other)
2362 \since 4.8
2363
2364 Swaps string \a other with this string. This operation is very fast and
2365 never fails.
2366*/
2367
2368/*! \fn void QString::detach()
2369
2370 \internal
2371*/
2372
2373/*! \fn bool QString::isDetached() const
2374
2375 \internal
2376*/
2377
2378/*! \fn bool QString::isSharedWith(const QString &other) const
2379
2380 \internal
2381*/
2382
2383/*!
2384 Sets the size of the string to \a size characters.
2385
2386 If \a size is greater than the current size, the string is
2387 extended to make it \a size characters long with the extra
2388 characters added to the end. The new characters are uninitialized.
2389
2390 If \a size is less than the current size, characters are removed
2391 from the end.
2392
2393 Example:
2394
2395 \snippet qstring/main.cpp 45
2396
2397 If you want to append a certain number of identical characters to
2398 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2399
2400 If you want to expand the string so that it reaches a certain
2401 width and fill the new positions with a particular character, use
2402 the leftJustified() function:
2403
2404 If \a size is negative, it is equivalent to passing zero.
2405
2406 \snippet qstring/main.cpp 47
2407
2408 \sa truncate(), reserve()
2409*/
2410
2411void QString::resize(qsizetype size)
2412{
2413 if (size < 0)
2414 size = 0;
2415
2416 const auto capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2417 if (d->needsDetach() || size > capacityAtEnd)
2418 reallocData(size, d->detachFlags() | Data::GrowsForward);
2419 d.size = size;
2420 if (d->allocatedCapacity())
2421 d.data()[size] = 0;
2422}
2423
2424/*!
2425 \overload
2426 \since 5.7
2427
2428 Unlike \l {QString::}{resize(qsizetype)}, this overload
2429 initializes the new characters to \a fillChar:
2430
2431 \snippet qstring/main.cpp 46
2432*/
2433
2434void QString::resize(qsizetype size, QChar fillChar)
2435{
2436 const qsizetype oldSize = length();
2437 resize(size);
2438 const qsizetype difference = length() - oldSize;
2439 if (difference > 0)
2440 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2441}
2442
2443/*! \fn qsizetype QString::capacity() const
2444
2445 Returns the maximum number of characters that can be stored in
2446 the string without forcing a reallocation.
2447
2448 The sole purpose of this function is to provide a means of fine
2449 tuning QString's memory usage. In general, you will rarely ever
2450 need to call this function. If you want to know how many
2451 characters are in the string, call size().
2452
2453 \note a statically allocated string will report a capacity of 0,
2454 even if it's not empty.
2455
2456 \note The free space position in the allocated memory block is undefined. In
2457 other words, one should not assume that the free memory is always located
2458 after the initialized elements.
2459
2460 \sa reserve(), squeeze()
2461*/
2462
2463/*!
2464 \fn void QString::reserve(qsizetype size)
2465
2466 Attempts to allocate memory for at least \a size characters. If
2467 you know in advance how large the string will be, you can call
2468 this function, and if you resize the string often you are likely
2469 to get better performance. If \a size is an underestimate, the
2470 worst that will happen is that the QString will be a bit slower.
2471
2472 The sole purpose of this function is to provide a means of fine
2473 tuning QString's memory usage. In general, you will rarely ever
2474 need to call this function. If you want to change the size of the
2475 string, call resize().
2476
2477 This function is useful for code that needs to build up a long
2478 string and wants to avoid repeated reallocation. In this example,
2479 we want to add to the string until some condition is \c true, and
2480 we're fairly sure that size is large enough to make a call to
2481 reserve() worthwhile:
2482
2483 \snippet qstring/main.cpp 44
2484
2485 \sa squeeze(), capacity()
2486*/
2487
2488/*!
2489 \fn void QString::squeeze()
2490
2491 Releases any memory not required to store the character data.
2492
2493 The sole purpose of this function is to provide a means of fine
2494 tuning QString's memory usage. In general, you will rarely ever
2495 need to call this function.
2496
2497 \sa reserve(), capacity()
2498*/
2499
2500void QString::reallocData(qsizetype alloc, Data::ArrayOptions allocOptions)
2501{
2502 if (!alloc) {
2503 d = DataPointer::fromRawData(&_empty, 0);
2504 return;
2505 }
2506
2507 // there's a case of slow reallocate path where we need to memmove the data
2508 // before a call to ::realloc(), meaning that there's an extra "heavy"
2509 // operation. just prefer ::malloc() branch in this case
2510 const bool slowReallocatePath = d.freeSpaceAtBegin() > 0;
2511
2512 if (d->needsDetach() || slowReallocatePath) {
2513 DataPointer dd(Data::allocate(alloc, allocOptions), qMin(alloc, d.size));
2514 if (dd.size > 0)
2515 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2516 dd.data()[dd.size] = 0;
2517 d = dd;
2518 } else {
2519 d->reallocate(alloc, allocOptions);
2520 }
2521}
2522
2523void QString::reallocGrowData(qsizetype alloc, Data::ArrayOptions options)
2524{
2525 if (!alloc) // expected to always allocate
2526 alloc = 1;
2527
2528 if (d->needsDetach()) {
2529 const auto newSize = qMin(alloc, d.size);
2530 DataPointer dd(DataPointer::allocateGrow(d, alloc, newSize, options));
2531 dd->copyAppend(d.data(), d.data() + newSize);
2532 dd.data()[dd.size] = 0;
2533 d = dd;
2534 } else {
2535 d->reallocate(alloc, options);
2536 }
2537}
2538
2539/*! \fn void QString::clear()
2540
2541 Clears the contents of the string and makes it null.
2542
2543 \sa resize(), isNull()
2544*/
2545
2546/*! \fn QString &QString::operator=(const QString &other)
2547
2548 Assigns \a other to this string and returns a reference to this
2549 string.
2550*/
2551
2552QString &QString::operator=(const QString &other) noexcept
2553{
2554 d = other.d;
2555 return *this;
2556}
2557
2558/*!
2559 \fn QString &QString::operator=(QString &&other)
2560
2561 Move-assigns \a other to this QString instance.
2562
2563 \since 5.2
2564*/
2565
2566/*! \fn QString &QString::operator=(QLatin1String str)
2567
2568 \overload operator=()
2569
2570 Assigns the Latin-1 string \a str to this string.
2571*/
2572QString &QString::operator=(QLatin1String other)
2573{
2574 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2575 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2576 d.size = other.size();
2577 d.data()[other.size()] = 0;
2578 qt_from_latin1(d.data(), other.latin1(), other.size());
2579 } else {
2580 *this = fromLatin1(other.latin1(), other.size());
2581 }
2582 return *this;
2583}
2584
2585/*! \fn QString &QString::operator=(const QByteArray &ba)
2586
2587 \overload operator=()
2588
2589 Assigns \a ba to this string. The byte array is converted to Unicode
2590 using the fromUtf8() function. This function stops conversion at the
2591 first NUL character found, or the end of the \a ba byte array.
2592
2593 You can disable this operator by defining \c
2594 QT_NO_CAST_FROM_ASCII when you compile your applications. This
2595 can be useful if you want to ensure that all user-visible strings
2596 go through QObject::tr(), for example.
2597
2598 \sa QT_NO_CAST_FROM_ASCII
2599*/
2600
2601/*! \fn QString &QString::operator=(const char *str)
2602
2603 \overload operator=()
2604
2605 Assigns \a str to this string. The const char pointer is converted
2606 to Unicode using the fromUtf8() function.
2607
2608 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
2609 or \c QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2610 This can be useful if you want to ensure that all user-visible strings
2611 go through QObject::tr(), for example.
2612
2613 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
2614*/
2615
2616/*!
2617 \overload operator=()
2618
2619 Sets the string to contain the single character \a ch.
2620*/
2621QString &QString::operator=(QChar ch)
2622{
2623 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2624 if (isDetached() && capacityAtEnd >= 1) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2625 // re-use existing capacity:
2626 d.data()[0] = ch.unicode();
2627 d.data()[1] = 0;
2628 d.size = 1;
2629 } else {
2630 operator=(QString(ch));
2631 }
2632 return *this;
2633}
2634
2635/*!
2636 \fn QString& QString::insert(qsizetype position, const QString &str)
2637
2638 Inserts the string \a str at the given index \a position and
2639 returns a reference to this string.
2640
2641 Example:
2642
2643 \snippet qstring/main.cpp 26
2644
2645 If the given \a position is greater than size(), this string is extended.
2646
2647 \sa append(), prepend(), replace(), remove()
2648*/
2649
2650/*!
2651 \fn QString& QString::insert(qsizetype position, QStringView str)
2652 \since 6.0
2653 \overload insert()
2654
2655 Inserts the string view \a str at the given index \a position and
2656 returns a reference to this string.
2657
2658 If the given \a position is greater than size(), this string is extended.
2659*/
2660
2661
2662/*!
2663 \fn QString& QString::insert(qsizetype position, const char *str)
2664 \since 5.5
2665 \overload insert()
2666
2667 Inserts the C string \a str at the given index \a position and
2668 returns a reference to this string.
2669
2670 If the given \a position is greater than size(), this string is extended.
2671
2672 This function is not available when \c QT_NO_CAST_FROM_ASCII is
2673 defined.
2674
2675 \sa QT_NO_CAST_FROM_ASCII
2676*/
2677
2678
2679/*!
2680 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2681 \since 5.5
2682 \overload insert()
2683
2684 Inserts the byte array \a str at the given index \a position and
2685 returns a reference to this string.
2686
2687 If the given \a position is greater than size(), this string is extended.
2688
2689 This function is not available when \c QT_NO_CAST_FROM_ASCII is
2690 defined.
2691
2692 \sa QT_NO_CAST_FROM_ASCII
2693*/
2694
2695
2696/*!
2697 \fn QString &QString::insert(qsizetype position, QLatin1String str)
2698 \overload insert()
2699
2700 Inserts the Latin-1 string \a str at the given index \a position.
2701*/
2702QString &QString::insert(qsizetype i, QLatin1String str)
2703{
2704 const char *s = str.latin1();
2705 if (i < 0 || !s || !(*s))
2706 return *this;
2707
2708 qsizetype len = str.size();
2709 if (Q_UNLIKELY(i > size()))
2710 resize(i + len, QLatin1Char(' '));
2711 else
2712 resize(size() + len);
2713
2714 ::memmove(d.data() + i + len, d.data() + i, (d.size - i - len) * sizeof(QChar));
2715 qt_from_latin1(d.data() + i, s, size_t(len));
2716 return *this;
2717}
2718
2719/*!
2720 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
2721 \overload insert()
2722
2723 Inserts the first \a size characters of the QChar array \a unicode
2724 at the given index \a position in the string.
2725*/
2726QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
2727{
2728 if (i < 0 || size <= 0)
2729 return *this;
2730
2731 const auto s = reinterpret_cast<const char16_t *>(unicode);
2732 if (points_into_range(s, d.data(), d.data() + d.size))
2733 return insert(i, QStringView{QVarLengthArray(s, s + size)});
2734
2735 const auto oldSize = this->size();
2736 const auto newSize = qMax(i, oldSize) + size;
2737 const bool shouldGrow = d->shouldGrowBeforeInsert(d.begin() + qMin(i, oldSize), size);
2738
2739 auto flags = d->detachFlags() | Data::GrowsForward;
2740 if (oldSize != 0 && i <= oldSize / 4)
2741 flags |= Data::GrowsBackwards;
2742
2743 // ### optimize me
2744 if (d->needsDetach() || newSize > capacity() || shouldGrow)
2745 reallocGrowData(newSize, flags);
2746
2747 if (i > oldSize) // set spaces in the uninitialized gap
2748 d->copyAppend(i - oldSize, u' ');
2749
2750 d->insert(d.begin() + i, s, s + size);
2751 d.data()[d.size] = '\0';
2752 return *this;
2753}
2754
2755/*!
2756 \fn QString& QString::insert(qsizetype position, QChar ch)
2757 \overload insert()
2758
2759 Inserts \a ch at the given index \a position in the string.
2760*/
2761
2762QString& QString::insert(qsizetype i, QChar ch)
2763{
2764 if (i < 0)
2765 i += d.size;
2766 if (i < 0)
2767 return *this;
2768
2769 const auto oldSize = size();
2770 const auto newSize = qMax(i, oldSize) + 1;
2771 const bool shouldGrow = d->shouldGrowBeforeInsert(d.begin() + qMin(i, oldSize), 1);
2772
2773 auto flags = d->detachFlags() | Data::GrowsForward;
2774 if (oldSize != 0 && i <= oldSize / 4)
2775 flags |= Data::GrowsBackwards;
2776
2777 // ### optimize me
2778 if (d->needsDetach() || newSize > capacity() || shouldGrow)
2779 reallocGrowData(newSize, flags);
2780
2781 if (i > oldSize) // set spaces in the uninitialized gap
2782 d->copyAppend(i - oldSize, u' ');
2783
2784 d->insert(d.begin() + i, 1, ch.unicode());
2785 d.data()[d.size] = '\0';
2786 return *this;
2787}
2788
2789/*!
2790 Appends the string \a str onto the end of this string.
2791
2792 Example:
2793
2794 \snippet qstring/main.cpp 9
2795
2796 This is the same as using the insert() function:
2797
2798 \snippet qstring/main.cpp 10
2799
2800 The append() function is typically very fast (\l{constant time}),
2801 because QString preallocates extra space at the end of the string
2802 data so it can grow without reallocating the entire string each
2803 time.
2804
2805 \sa operator+=(), prepend(), insert()
2806*/
2807QString &QString::append(const QString &str)
2808{
2809 if (!str.isNull()) {
2810 if (isNull()) {
2811 operator=(str);
2812 } else {
2813 const bool shouldGrow = d->shouldGrowBeforeInsert(d.end(), str.d.size);
2814 if (d->needsDetach() || size() + str.size() > capacity() || shouldGrow)
2815 reallocGrowData(size() + str.size(),
2816 d->detachFlags() | Data::GrowsForward);
2817 d->copyAppend(str.d.data(), str.d.data() + str.d.size);
2818 d.data()[d.size] = '\0';
2819 }
2820 }
2821 return *this;
2822}
2823
2824/*!
2825 \overload append()
2826 \since 5.0
2827
2828 Appends \a len characters from the QChar array \a str to this string.
2829*/
2830QString &QString::append(const QChar *str, qsizetype len)
2831{
2832 if (str && len > 0) {
2833 const bool shouldGrow = d->shouldGrowBeforeInsert(d.end(), len);
2834 if (d->needsDetach() || size() + len > capacity() || shouldGrow)
2835 reallocGrowData(size() + len, d->detachFlags() | Data::GrowsForward);
2836 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
2837 // the following should be safe as QChar uses char16_t as underlying data
2838 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
2839 d->copyAppend(char16String, char16String + len);
2840 d.data()[d.size] = '\0';
2841 }
2842 return *this;
2843}
2844
2845/*!
2846 \overload append()
2847
2848 Appends the Latin-1 string \a str to this string.
2849*/
2850QString &QString::append(QLatin1String str)
2851{
2852 const char *s = str.latin1();
2853 if (s) {
2854 qsizetype len = str.size();
2855 const bool shouldGrow = d->shouldGrowBeforeInsert(d.end(), len);
2856 if (d->needsDetach() || size() + len > capacity() || shouldGrow)
2857 reallocGrowData(size() + len, d->detachFlags() | Data::GrowsForward);
2858
2859 if (d.freeSpaceAtBegin() == 0) { // fast path
2860 char16_t *i = d.data() + d.size;
2861 qt_from_latin1(i, s, size_t(len));
2862 d.size += len;
2863 } else { // slow path
2864 d->copyAppend(s, s + len);
2865 }
2866 d.data()[d.size] = '\0';
2867 }
2868 return *this;
2869}
2870
2871/*! \fn QString &QString::append(const QByteArray &ba)
2872
2873 \overload append()
2874
2875 Appends the byte array \a ba to this string. The given byte array
2876 is converted to Unicode using the fromUtf8() function.
2877
2878 You can disable this function by defining \c QT_NO_CAST_FROM_ASCII
2879 when you compile your applications. This can be useful if you want
2880 to ensure that all user-visible strings go through QObject::tr(),
2881 for example.
2882
2883 \sa QT_NO_CAST_FROM_ASCII
2884*/
2885
2886/*! \fn QString &QString::append(const char *str)
2887
2888 \overload append()
2889
2890 Appends the string \a str to this string. The given const char
2891 pointer is converted to Unicode using the fromUtf8() function.
2892
2893 You can disable this function by defining \c QT_NO_CAST_FROM_ASCII
2894 when you compile your applications. This can be useful if you want
2895 to ensure that all user-visible strings go through QObject::tr(),
2896 for example.
2897
2898 \sa QT_NO_CAST_FROM_ASCII
2899*/
2900
2901/*!
2902 \overload append()
2903
2904 Appends the character \a ch to this string.
2905*/
2906QString &QString::append(QChar ch)
2907{
2908 const bool shouldGrow = d->shouldGrowBeforeInsert(d.end(), 1);
2909 if (d->needsDetach() || size() + 1 > capacity() || shouldGrow)
2910 reallocGrowData(d.size + 1u, d->detachFlags() | Data::GrowsForward);
2911 d->copyAppend(1, ch.unicode());
2912 d.data()[d.size] = '\0';
2913 return *this;
2914}
2915
2916/*! \fn QString &QString::prepend(const QString &str)
2917
2918 Prepends the string \a str to the beginning of this string and
2919 returns a reference to this string.
2920
2921 Example:
2922
2923 \snippet qstring/main.cpp 36
2924
2925 \sa append(), insert()
2926*/
2927
2928/*! \fn QString &QString::prepend(QLatin1String str)
2929
2930 \overload prepend()
2931
2932 Prepends the Latin-1 string \a str to this string.
2933*/
2934
2935/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
2936 \since 5.5
2937 \overload prepend()
2938
2939 Prepends \a len characters from the QChar array \a str to this string and
2940 returns a reference to this string.
2941*/
2942
2943/*! \fn QString &QString::prepend(QStringView str)
2944 \since 6.0
2945 \overload prepend()
2946
2947 Prepends the string view \a str to the beginning of this string and
2948 returns a reference to this string.
2949*/
2950
2951/*! \fn QString &QString::prepend(const QByteArray &ba)
2952
2953 \overload prepend()
2954
2955 Prepends the byte array \a ba to this string. The byte array is
2956 converted to Unicode using the fromUtf8() function.
2957
2958 You can disable this function by defining \c
2959 QT_NO_CAST_FROM_ASCII when you compile your applications. This
2960 can be useful if you want to ensure that all user-visible strings
2961 go through QObject::tr(), for example.
2962
2963 \sa QT_NO_CAST_FROM_ASCII
2964*/
2965
2966/*! \fn QString &QString::prepend(const char *str)
2967
2968 \overload prepend()
2969
2970 Prepends the string \a str to this string. The const char pointer
2971 is converted to Unicode using the fromUtf8() function.
2972
2973 You can disable this function by defining \c
2974 QT_NO_CAST_FROM_ASCII when you compile your applications. This
2975 can be useful if you want to ensure that all user-visible strings
2976 go through QObject::tr(), for example.
2977
2978 \sa QT_NO_CAST_FROM_ASCII
2979*/
2980
2981/*! \fn QString &QString::prepend(QChar ch)
2982
2983 \overload prepend()
2984
2985 Prepends the character \a ch to this string.
2986*/
2987
2988/*!
2989 \fn QString &QString::remove(qsizetype position, qsizetype n)
2990
2991 Removes \a n characters from the string, starting at the given \a
2992 position index, and returns a reference to the string.
2993
2994 If the specified \a position index is within the string, but \a
2995 position + \a n is beyond the end of the string, the string is
2996 truncated at the specified \a position.
2997
2998 \snippet qstring/main.cpp 37
2999
3000 \sa insert(), replace()
3001*/
3002QString &QString::remove(qsizetype pos, qsizetype len)
3003{
3004 if (pos < 0) // count from end of string
3005 pos += size();
3006 if (size_t(pos) >= size_t(size())) {
3007 // range problems
3008 } else if (len >= size() - pos) {
3009 resize(pos); // truncate
3010 } else if (len > 0) {
3011 detach();
3012 d->erase(d.begin() + pos, d.begin() + pos + len);
3013 d.data()[d.size] = u'\0';
3014 }
3015 return *this;
3016}
3017
3018template<typename T>
3019static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3020{
3021 const auto needleSize = needle.size();
3022 if (!needleSize)
3023 return;
3024
3025 // avoid detach if nothing to do:
3026 qsizetype i = s.indexOf(needle, 0, cs);
3027 if (i < 0)
3028 return;
3029
3030 const auto beg = s.begin(); // detaches
3031 auto dst = beg + i;
3032 auto src = beg + i + needleSize;
3033 const auto end = s.end();
3034 // loop invariant: [beg, dst[ is partial result
3035 // [src, end[ still to be checked for needles
3036 while (src < end) {
3037 const auto i = s.indexOf(needle, src - beg, cs);
3038 const auto hit = i == -1 ? end : beg + i;
3039 const auto skipped = hit - src;
3040 memmove(dst, src, skipped * sizeof(QChar));
3041 dst += skipped;
3042 src = hit + needleSize;
3043 }
3044 s.truncate(dst - beg);
3045}
3046
3047/*!
3048 Removes every occurrence of the given \a str string in this
3049 string, and returns a reference to this string.
3050
3051 If \a cs is Qt::CaseSensitive (default), the search is
3052 case sensitive; otherwise the search is case insensitive.
3053
3054 This is the same as \c replace(str, "", cs).
3055
3056 \sa replace()
3057*/
3058QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3059{
3060 const auto s = str.d.data();
3061 if (points_into_range(s, d.data(), d.data() + d.size))
3062 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3063 else
3064 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3065 return *this;
3066}
3067
3068/*!
3069 \since 5.11
3070 \overload
3071
3072 Removes every occurrence of the given \a str string in this
3073 string, and returns a reference to this string.
3074
3075 If \a cs is Qt::CaseSensitive (default), the search is
3076 case sensitive; otherwise the search is case insensitive.
3077
3078 This is the same as \c replace(str, "", cs).
3079
3080 \sa replace()
3081*/
3082QString &QString::remove(QLatin1String str, Qt::CaseSensitivity cs)
3083{
3084 removeStringImpl(*this, str, cs);
3085 return *this;
3086}
3087
3088/*!
3089 Removes every occurrence of the character \a ch in this string, and
3090 returns a reference to this string.
3091
3092 If \a cs is Qt::CaseSensitive (default), the search is case
3093 sensitive; otherwise the search is case insensitive.
3094
3095 Example:
3096
3097 \snippet qstring/main.cpp 38
3098
3099 This is the same as \c replace(ch, "", cs).
3100
3101 \sa replace()
3102*/
3103QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3104{
3105 const qsizetype idx = indexOf(ch, 0, cs);
3106 if (idx != -1) {
3107 const auto first = begin(); // implicit detach()
3108 auto last = end();
3109 if (cs == Qt::CaseSensitive) {
3110 last = std::remove(first + idx, last, ch);
3111 } else {
3112 const QChar c = ch.toCaseFolded();
3113 auto caseInsensEqual = [c](QChar x) {
3114 return c == x.toCaseFolded();
3115 };
3116 last = std::remove_if(first + idx, last, caseInsensEqual);
3117 }
3118 resize(last - first);
3119 }
3120 return *this;
3121}
3122
3123/*!
3124 \fn QString &QString::remove(const QRegularExpression &re)
3125 \since 5.0
3126
3127 Removes every occurrence of the regular expression \a re in the
3128 string, and returns a reference to the string. For example:
3129
3130 \snippet qstring/main.cpp 96
3131
3132 \sa indexOf(), lastIndexOf(), replace()
3133*/
3134
3135/*!
3136 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3137
3138 Replaces \a n characters beginning at index \a position with
3139 the string \a after and returns a reference to this string.
3140
3141 \note If the specified \a position index is within the string,
3142 but \a position + \a n goes outside the strings range,
3143 then \a n will be adjusted to stop at the end of the string.
3144
3145 Example:
3146
3147 \snippet qstring/main.cpp 40
3148
3149 \sa insert(), remove()
3150*/
3151QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3152{
3153 return replace(pos, len, after.constData(), after.length());
3154}
3155
3156/*!
3157 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *unicode, qsizetype size)
3158 \overload replace()
3159 Replaces \a n characters beginning at index \a position with the
3160 first \a size characters of the QChar array \a unicode and returns a
3161 reference to this string.
3162*/
3163QString &QString::replace(qsizetype pos, qsizetype len, const QChar *unicode, qsizetype size)
3164{
3165 if (size_t(pos) > size_t(this->size()))
3166 return *this;
3167 if (len > this->size() - pos)
3168 len = this->size() - pos;
3169
3170 size_t index = pos;
3171 replace_helper(&index, 1, len, unicode, size);
3172 return *this;
3173}
3174
3175/*!
3176 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3177 \overload replace()
3178
3179 Replaces \a n characters beginning at index \a position with the
3180 character \a after and returns a reference to this string.
3181*/
3182QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3183{
3184 return replace(pos, len, &after, 1);
3185}
3186
3187/*!
3188 \overload replace()
3189 Replaces every occurrence of the string \a before with the string \a
3190 after and returns a reference to this string.
3191
3192 If \a cs is Qt::CaseSensitive (default), the search is case
3193 sensitive; otherwise the search is case insensitive.
3194
3195 Example:
3196
3197 \snippet qstring/main.cpp 41
3198
3199 \note The replacement text is not rescanned after it is inserted.
3200
3201 Example:
3202
3203 \snippet qstring/main.cpp 86
3204*/
3205QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3206{
3207 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3208}
3209
3210namespace { // helpers for replace and its helper:
3211QChar *textCopy(const QChar *start, qsizetype len)
3212{
3213 const size_t size = len * sizeof(QChar);
3214 QChar *const copy = static_cast<QChar *>(::malloc(size));
3215 Q_CHECK_PTR(copy);
3216 ::memcpy(copy, start, size);
3217 return copy;
3218}
3219
3220static bool pointsIntoRange(const QChar *ptr, const char16_t *base, qsizetype len)
3221{
3222 const QChar *const start = reinterpret_cast<const QChar *>(base);
3223 const std::less<const QChar *> less;
3224 return !less(ptr, start) && less(ptr, start + len);
3225}
3226} // end namespace
3227
3228/*!
3229 \internal
3230 */
3231void QString::replace_helper(size_t *indices, qsizetype nIndices, qsizetype blen, const QChar *after, qsizetype alen)
3232{
3233 // Copy after if it lies inside our own d.b area (which we could
3234 // possibly invalidate via a realloc or modify by replacement).
3235 QChar *afterBuffer = nullptr;
3236 if (pointsIntoRange(after, d.data(), d.size)) // Use copy in place of vulnerable original:
3237 after = afterBuffer = textCopy(after, alen);
3238
3239 QT_TRY {
3240 if (blen == alen) {
3241 // replace in place
3242 detach();
3243 for (qsizetype i = 0; i < nIndices; ++i)
3244 memcpy(d.data() + indices[i], after, alen * sizeof(QChar));
3245 } else if (alen < blen) {
3246 // replace from front
3247 detach();
3248 size_t to = indices[0];
3249 if (alen)
3250 memcpy(d.data()+to, after, alen*sizeof(QChar));
3251 to += alen;
3252 size_t movestart = indices[0] + blen;
3253 for (qsizetype i = 1; i < nIndices; ++i) {
3254 qsizetype msize = indices[i] - movestart;
3255 if (msize > 0) {
3256 memmove(d.data() + to, d.data() + movestart, msize * sizeof(QChar));
3257 to += msize;
3258 }
3259 if (alen) {
3260 memcpy(d.data() + to, after, alen * sizeof(QChar));
3261 to += alen;
3262 }
3263 movestart = indices[i] + blen;
3264 }
3265 qsizetype msize = d.size - movestart;
3266 if (msize > 0)
3267 memmove(d.data() + to, d.data() + movestart, msize * sizeof(QChar));
3268 resize(d.size - nIndices*(blen-alen));
3269 } else {
3270 // replace from back
3271 qsizetype adjust = nIndices*(alen-blen);
3272 qsizetype newLen = d.size + adjust;
3273 qsizetype moveend = d.size;
3274 resize(newLen);
3275
3276 while (nIndices) {
3277 --nIndices;
3278 qsizetype movestart = indices[nIndices] + blen;
3279 qsizetype insertstart = indices[nIndices] + nIndices*(alen-blen);
3280 qsizetype moveto = insertstart + alen;
3281 memmove(d.data() + moveto, d.data() + movestart,
3282 (moveend - movestart)*sizeof(QChar));
3283 memcpy(d.data() + insertstart, after, alen * sizeof(QChar));
3284 moveend = movestart-blen;
3285 }
3286 }
3287 } QT_CATCH(const std::bad_alloc &) {
3288 ::free(afterBuffer);
3289 QT_RETHROW;
3290 }
3291 ::free(afterBuffer);
3292}
3293
3294/*!
3295 \since 4.5
3296 \overload replace()
3297
3298 Replaces each occurrence in this string of the first \a blen
3299 characters of \a before with the first \a alen characters of \a
3300 after and returns a reference to this string.
3301
3302 If \a cs is Qt::CaseSensitive (default), the search is case
3303 sensitive; otherwise the search is case insensitive.
3304*/
3305QString &QString::replace(const QChar *before, qsizetype blen,
3306 const QChar *after, qsizetype alen,
3307 Qt::CaseSensitivity cs)
3308{
3309 if (d.size == 0) {
3310 if (blen)
3311 return *this;
3312 } else {
3313 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3314 return *this;
3315 }
3316 if (alen == 0 && blen == 0)
3317 return *this;
3318
3319 QStringMatcher matcher(before, blen, cs);
3320 QChar *beforeBuffer = nullptr, *afterBuffer = nullptr;
3321
3322 qsizetype index = 0;
3323 while (1) {
3324 size_t indices[1024];
3325 size_t pos = 0;
3326 while (pos < 1024) {
3327 index = matcher.indexIn(*this, index);
3328 if (index == -1)
3329 break;
3330 indices[pos++] = index;
3331 if (blen) // Step over before:
3332 index += blen;
3333 else // Only count one instance of empty between any two characters:
3334 index++;
3335 }
3336 if (!pos) // Nothing to replace
3337 break;
3338
3339 if (Q_UNLIKELY(index != -1)) {
3340 /*
3341 We're about to change data, that before and after might point
3342 into, and we'll need that data for our next batch of indices.
3343 */
3344 if (!afterBuffer && pointsIntoRange(after, d.data(), d.size))
3345 after = afterBuffer = textCopy(after, alen);
3346
3347 if (!beforeBuffer && pointsIntoRange(before, d.data(), d.size)) {
3348 beforeBuffer = textCopy(before, blen);
3349 matcher = QStringMatcher(beforeBuffer, blen, cs);
3350 }
3351 }
3352
3353 replace_helper(indices, pos, blen, after, alen);
3354
3355 if (Q_LIKELY(index == -1)) // Nothing left to replace
3356 break;
3357 // The call to replace_helper just moved what index points at:
3358 index += pos*(alen-blen);
3359 }
3360 ::free(afterBuffer);
3361 ::free(beforeBuffer);
3362
3363 return *this;
3364}
3365
3366/*!
3367 \overload replace()
3368 Replaces every occurrence of the character \a ch in the string with
3369 \a after and returns a reference to this string.
3370
3371 If \a cs is Qt::CaseSensitive (default), the search is case
3372 sensitive; otherwise the search is case insensitive.
3373*/
3374QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3375{
3376 if (after.size() == 0)
3377 return remove(ch, cs);
3378
3379 if (after.size() == 1)
3380 return replace(ch, after.front(), cs);
3381
3382 if (size() == 0)
3383 return *this;
3384
3385 char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3386
3387 qsizetype index = 0;
3388 while (1) {
3389 size_t indices[1024];
3390 size_t pos = 0;
3391 if (cs == Qt::CaseSensitive) {
3392 while (pos < 1024 && index < size()) {
3393 if (d.data()[index] == cc)
3394 indices[pos++] = index;
3395 index++;
3396 }
3397 } else {
3398 while (pos < 1024 && index < size()) {
3399 if (QChar::toCaseFolded(d.data()[index]) == cc)
3400 indices[pos++] = index;
3401 index++;
3402 }
3403 }
3404 if (!pos) // Nothing to replace
3405 break;
3406
3407 replace_helper(indices, pos, 1, after.constData(), after.size());
3408
3409 if (Q_LIKELY(index == -1)) // Nothing left to replace
3410 break;
3411 // The call to replace_helper just moved what index points at:
3412 index += pos*(after.size() - 1);
3413 }
3414 return *this;
3415}
3416
3417/*!
3418 \overload replace()
3419 Replaces every occurrence of the character \a before with the
3420 character \a after and returns a reference to this string.
3421
3422 If \a cs is Qt::CaseSensitive (default), the search is case
3423 sensitive; otherwise the search is case insensitive.
3424*/
3425QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3426{
3427 if (d.size) {
3428 const qsizetype idx = indexOf(before, 0, cs);
3429 if (idx != -1) {
3430 detach();
3431 const char16_t a = after.unicode();
3432 char16_t *i = d.data();
3433 char16_t *const e = i + d.size;
3434 i += idx;
3435 *i = a;
3436 if (cs == Qt::CaseSensitive) {
3437 const char16_t b = before.unicode();
3438 while (++i != e) {
3439 if (*i == b)
3440 *i = a;
3441 }
3442 } else {
3443 const char16_t b = foldCase(before.unicode());
3444 while (++i != e) {
3445 if (foldCase(*i) == b)
3446 *i = a;
3447 }
3448 }
3449 }
3450 }
3451 return *this;
3452}
3453
3454/*!
3455 \since 4.5
3456 \overload replace()
3457
3458 Replaces every occurrence of the string \a before with the string \a
3459 after and returns a reference to this string.
3460
3461 If \a cs is Qt::CaseSensitive (default), the search is case
3462 sensitive; otherwise the search is case insensitive.
3463
3464 \note The text is not rescanned after a replacement.
3465*/
3466QString &QString::replace(QLatin1String before, QLatin1String after, Qt::CaseSensitivity cs)
3467{
3468 qsizetype alen = after.size();
3469 qsizetype blen = before.size();
3470 QVarLengthArray<char16_t> a(alen);
3471 QVarLengthArray<char16_t> b(blen);
3472 qt_from_latin1(a.data(), after.latin1(), alen);
3473 qt_from_latin1(b.data(), before.latin1(), blen);
3474 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
3475}
3476
3477/*!
3478 \since 4.5
3479 \overload replace()
3480
3481 Replaces every occurrence of the string \a before with the string \a
3482 after and returns a reference to this string.
3483
3484 If \a cs is Qt::CaseSensitive (default), the search is case
3485 sensitive; otherwise the search is case insensitive.
3486
3487 \note The text is not rescanned after a replacement.
3488*/
3489QString &QString::replace(QLatin1String before, const QString &after, Qt::CaseSensitivity cs)
3490{
3491 qsizetype blen = before.size();
3492 QVarLengthArray<char16_t> b(blen);
3493 qt_from_latin1(b.data(), before.latin1(), blen);
3494 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
3495}
3496
3497/*!
3498 \since 4.5
3499 \overload replace()
3500
3501 Replaces every occurrence of the string \a before with the string \a
3502 after and returns a reference to this string.
3503
3504 If \a cs is Qt::CaseSensitive (default), the search is case
3505 sensitive; otherwise the search is case insensitive.
3506
3507 \note The text is not rescanned after a replacement.
3508*/
3509QString &QString::replace(const QString &before, QLatin1String after, Qt::CaseSensitivity cs)
3510{
3511 qsizetype alen = after.size();
3512 QVarLengthArray<char16_t> a(alen);
3513 qt_from_latin1(a.data(), after.latin1(), alen);
3514 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
3515}
3516
3517/*!
3518 \since 4.5
3519 \overload replace()
3520
3521 Replaces every occurrence of the character \a c with the string \a
3522 after and returns a reference to this string.
3523
3524 If \a cs is Qt::CaseSensitive (default), the search is case
3525 sensitive; otherwise the search is case insensitive.
3526
3527 \note The text is not rescanned after a replacement.
3528*/
3529QString &QString::replace(QChar c, QLatin1String after, Qt::CaseSensitivity cs)
3530{
3531 qsizetype alen = after.size();
3532 QVarLengthArray<char16_t> a(alen);
3533 qt_from_latin1(a.data(), after.latin1(), alen);
3534 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
3535}
3536
3537
3538/*!
3539 \relates QString
3540 Returns \c true if string \a s1 is equal to string \a s2; otherwise
3541 returns \c false.
3542
3543 \sa {Comparing Strings}
3544*/
3545bool operator==(const QString &s1, const QString &s2) noexcept
3546{
3547 if (s1.d.size != s2.d.size)
3548 return false;
3549
3550 return QtPrivate::compareStrings(s1, s2, Qt::CaseSensitive) == 0;
3551}
3552
3553/*!
3554 \overload operator==()
3555 Returns \c true if this string is equal to \a other; otherwise
3556 returns \c false.
3557*/
3558bool QString::operator==(QLatin1String other) const noexcept
3559{
3560 if (size() != other.size())
3561 return false;
3562
3563 return QtPrivate::compareStrings(*this, other, Qt::CaseSensitive) == 0;
3564}
3565
3566/*! \fn bool QString::operator==(const QByteArray &other) const
3567
3568 \overload operator==()
3569
3570 The \a other byte array is converted to a QString using the
3571 fromUtf8() function. This function stops conversion at the
3572 first NUL character found, or the end of the byte array.
3573
3574 You can disable this operator by defining \c
3575 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3576 can be useful if you want to ensure that all user-visible strings
3577 go through QObject::tr(), for example.
3578
3579 Returns \c true if this string is lexically equal to the parameter
3580 string \a other. Otherwise returns \c false.
3581
3582 \sa QT_NO_CAST_FROM_ASCII
3583*/
3584
3585/*! \fn bool QString::operator==(const char *other) const
3586
3587 \overload operator==()
3588
3589 The \a other const char pointer is converted to a QString using
3590 the fromUtf8() function.
3591
3592 You can disable this operator by defining \c
3593 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3594 can be useful if you want to ensure that all user-visible strings
3595 go through QObject::tr(), for example.
3596
3597 \sa QT_NO_CAST_FROM_ASCII
3598*/
3599
3600/*!
3601 \relates QString
3602 Returns \c true if string \a s1 is lexically less than string
3603 \a s2; otherwise returns \c false.
3604
3605 \sa {Comparing Strings}
3606*/
3607bool operator<(const QString &s1, const QString &s2) noexcept
3608{
3609 return QtPrivate::compareStrings(s1, s2, Qt::CaseSensitive) < 0;
3610}
3611
3612/*!
3613 \overload operator<()
3614
3615 Returns \c true if this string is lexically less than the parameter
3616 string called \a other; otherwise returns \c false.
3617*/
3618bool QString::operator<(QLatin1String other) const noexcept
3619{
3620 return QtPrivate::compareStrings(*this, other, Qt::CaseSensitive) < 0;
3621}
3622
3623/*! \fn bool QString::operator<(const QByteArray &other) const
3624
3625 \overload operator<()
3626
3627 The \a other byte array is converted to a QString using the
3628 fromUtf8() function. If any NUL characters ('\\0') are embedded
3629 in the byte array, they will be included in the transformation.
3630
3631 You can disable this operator by defining \c
3632 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3633 can be useful if you want to ensure that all user-visible strings
3634 go through QObject::tr(), for example.
3635
3636 \sa QT_NO_CAST_FROM_ASCII
3637*/
3638
3639/*! \fn bool QString::operator<(const char *other) const
3640
3641 Returns \c true if this string is lexically less than string \a other.
3642 Otherwise returns \c false.
3643
3644 \overload operator<()
3645
3646 The \a other const char pointer is converted to a QString using
3647 the fromUtf8() function.
3648
3649 You can disable this operator by defining \c
3650 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3651 can be useful if you want to ensure that all user-visible strings
3652 go through QObject::tr(), for example.
3653
3654 \sa QT_NO_CAST_FROM_ASCII
3655*/
3656
3657/*! \fn bool QString::operator<=(const QString &s1, const QString &s2)
3658
3659 Returns \c true if string \a s1 is lexically less than or equal to
3660 string \a s2; otherwise returns \c false.
3661
3662 \sa {Comparing Strings}
3663*/
3664
3665/*! \fn bool QString::operator<=(QLatin1String other) const
3666
3667 Returns \c true if this string is lexically less than or equal to
3668 parameter string \a other. Otherwise returns \c false.
3669
3670 \overload operator<=()
3671*/
3672
3673/*! \fn bool QString::operator<=(const QByteArray &other) const
3674
3675 \overload operator<=()
3676
3677 The \a other byte array is converted to a QString using the
3678 fromUtf8() function. If any NUL characters ('\\0') are embedded
3679 in the byte array, they will be included in the transformation.
3680
3681 You can disable this operator by defining \c
3682 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3683 can be useful if you want to ensure that all user-visible strings
3684 go through QObject::tr(), for example.
3685
3686 \sa QT_NO_CAST_FROM_ASCII
3687*/
3688
3689/*! \fn bool QString::operator<=(const char *other) const
3690
3691 \overload operator<=()
3692
3693 The \a other const char pointer is converted to a QString using
3694 the fromUtf8() function.
3695
3696 You can disable this operator by defining \c
3697 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3698 can be useful if you want to ensure that all user-visible strings
3699 go through QObject::tr(), for example.
3700
3701 \sa QT_NO_CAST_FROM_ASCII
3702*/
3703
3704/*! \fn bool QString::operator>(const QString &s1, const QString &s2)
3705
3706 Returns \c true if string \a s1 is lexically greater than string \a s2;
3707 otherwise returns \c false.
3708
3709 \sa {Comparing Strings}
3710*/
3711
3712/*!
3713 \overload operator>()
3714
3715 Returns \c true if this string is lexically greater than the parameter
3716 string \a other; otherwise returns \c false.
3717*/
3718bool QString::operator>(QLatin1String other) const noexcept
3719{
3720 return QtPrivate::compareStrings(*this, other, Qt::CaseSensitive) > 0;
3721}
3722
3723/*! \fn bool QString::operator>(const QByteArray &other) const
3724
3725 \overload operator>()
3726
3727 The \a other byte array is converted to a QString using the
3728 fromUtf8() function. If any NUL characters ('\\0') are embedded
3729 in the byte array, they will be included in the transformation.
3730
3731 You can disable this operator by defining \c
3732 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3733 can be useful if you want to ensure that all user-visible strings
3734 go through QObject::tr(), for example.
3735
3736 \sa QT_NO_CAST_FROM_ASCII
3737*/
3738
3739/*! \fn bool QString::operator>(const char *other) const
3740
3741 \overload operator>()
3742
3743 The \a other const char pointer is converted to a QString using
3744 the fromUtf8() function.
3745
3746 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
3747 when you compile your applications. This can be useful if you want
3748 to ensure that all user-visible strings go through QObject::tr(),
3749 for example.
3750
3751 \sa QT_NO_CAST_FROM_ASCII
3752*/
3753
3754/*! \fn bool QString::operator>=(const QString &s1, const QString &s2)
3755
3756 Returns \c true if string \a s1 is lexically greater than or equal to
3757 string \a s2; otherwise returns \c false.
3758
3759 \sa {Comparing Strings}
3760*/
3761
3762/*! \fn bool QString::operator>=(QLatin1String other) const
3763
3764 Returns \c true if this string is lexically greater than or equal to parameter
3765 string \a other. Otherwise returns \c false.
3766
3767 \overload operator>=()
3768*/
3769
3770/*! \fn bool QString::operator>=(const QByteArray &other) const
3771
3772 \overload operator>=()
3773
3774 The \a other byte array is converted to a QString using the
3775 fromUtf8() function. If any NUL characters ('\\0') are embedded in
3776 the byte array, they will be included in the transformation.
3777
3778 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
3779 when you compile your applications. This can be useful if you want
3780 to ensure that all user-visible strings go through QObject::tr(),
3781 for example.
3782
3783 \sa QT_NO_CAST_FROM_ASCII
3784*/
3785
3786/*! \fn bool QString::operator>=(const char *other) const
3787
3788 \overload operator>=()
3789
3790 The \a other const char pointer is converted to a QString using
3791 the fromUtf8() function.
3792
3793 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
3794 when you compile your applications. This can be useful if you want
3795 to ensure that all user-visible strings go through QObject::tr(),
3796 for example.
3797
3798 \sa QT_NO_CAST_FROM_ASCII
3799*/
3800
3801/*! \fn bool QString::operator!=(const QString &s1, const QString &s2)
3802
3803 Returns \c true if string \a s1 is not equal to string \a s2;
3804 otherwise returns \c false.
3805
3806 \sa {Comparing Strings}
3807*/
3808
3809/*! \fn bool QString::operator!=(QLatin1String other) const
3810
3811 Returns \c true if this string is not equal to parameter string \a other.
3812 Otherwise returns \c false.
3813
3814 \overload operator!=()
3815*/
3816
3817/*! \fn bool QString::operator!=(const QByteArray &other) const
3818
3819 \overload operator!=()
3820
3821 The \a other byte array is converted to a QString using the
3822 fromUtf8() function. If any NUL characters ('\\0') are embedded
3823 in the byte array, they will be included in the transformation.
3824
3825 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
3826 when you compile your applications. This can be useful if you want
3827 to ensure that all user-visible strings go through QObject::tr(),
3828 for example.
3829
3830 \sa QT_NO_CAST_FROM_ASCII
3831*/
3832
3833/*! \fn bool QString::operator!=(const char *other) const
3834
3835 \overload operator!=()
3836
3837 The \a other const char pointer is converted to a QString using
3838 the fromUtf8() function.
3839
3840 You can disable this operator by defining \c
3841 QT_NO_CAST_FROM_ASCII when you compile your applications. This
3842 can be useful if you want to ensure that all user-visible strings
3843 go through QObject::tr(), for example.
3844
3845 \sa QT_NO_CAST_FROM_ASCII
3846*/
3847
3848#if QT_STRINGVIEW_LEVEL < 2
3849/*!
3850 Returns the index position of the first occurrence of the string \a
3851 str in this string, searching forward from index position \a
3852 from. Returns -1 if \a str is not found.
3853
3854 If \a cs is Qt::CaseSensitive (default), the search is case
3855 sensitive; otherwise the search is case insensitive.
3856
3857 Example:
3858
3859 \snippet qstring/main.cpp 24
3860
3861 If \a from is -1, the search starts at the last character; if it is
3862 -2, at the next to last character and so on.
3863
3864 \sa lastIndexOf(), contains(), count()
3865*/
3866qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
3867{
3868 return QtPrivate::findString(QStringView(unicode(), length()), from, QStringView(str.unicode(), str.length()), cs);
3869}
3870#endif // QT_STRINGVIEW_LEVEL < 2
3871
3872/*!
3873 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
3874 \since 5.14
3875 \overload indexOf()
3876
3877 Returns the index position of the first occurrence of the string view \a str
3878 in this string, searching forward from index position \a from.
3879 Returns -1 if \a str is not found.
3880
3881 If \a cs is Qt::CaseSensitive (default), the search is case
3882 sensitive; otherwise the search is case insensitive.
3883
3884 If \a from is -1, the search starts at the last character; if it is
3885 -2, at the next to last character and so on.
3886
3887 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
3888*/
3889
3890/*!
3891 \since 4.5
3892 Returns the index position of the first occurrence of the string \a
3893 str in this string, searching forward from index position \a
3894 from. Returns -1 if \a str is not found.
3895
3896 If \a cs is Qt::CaseSensitive (default), the search is case
3897 sensitive; otherwise the search is case insensitive.
3898
3899 Example:
3900
3901 \snippet qstring/main.cpp 24
3902
3903 If \a from is -1, the search starts at the last character; if it is
3904 -2, at the next to last character and so on.
3905
3906 \sa lastIndexOf(), contains(), count()
3907*/
3908
3909qsizetype QString::indexOf(QLatin1String str, qsizetype from, Qt::CaseSensitivity cs) const
3910{
3911 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
3912}
3913
3914/*!
3915 \overload indexOf()
3916
3917 Returns the index position of the first occurrence of the
3918 character \a ch in the string, searching forward from index
3919 position \a from. Returns -1 if \a ch could not be found.
3920*/
3921qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
3922{
3923 return qFindChar(QStringView(unicode(), length()), ch, from, cs);
3924}
3925
3926#if QT_STRINGVIEW_LEVEL < 2
3927/*!
3928 Returns the index position of the last occurrence of the string \a
3929 str in this string, searching backward from index position \a
3930 from. If \a from is -1 (default), the search starts at the last
3931 character; if \a from is -2, at the next to last character and so
3932 on. Returns -1 if \a str is not found.
3933
3934 If \a cs is Qt::CaseSensitive (default), the search is case
3935 sensitive; otherwise the search is case insensitive.
3936
3937 Example:
3938
3939 \snippet qstring/main.cpp 29
3940
3941 \sa indexOf(), contains(), count()
3942*/
3943qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
3944{
3945 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
3946}
3947
3948#endif // QT_STRINGVIEW_LEVEL < 2
3949
3950/*!
3951 \since 4.5
3952 \overload lastIndexOf()
3953
3954 Returns the index position of the last occurrence of the string \a
3955 str in this string, searching backward from index position \a
3956 from. If \a from is -1 (default), the search starts at the last
3957 character; if \a from is -2, at the next to last character and so
3958 on. Returns -1 if \a str is not found.
3959
3960 If \a cs is Qt::CaseSensitive (default), the search is case
3961 sensitive; otherwise the search is case insensitive.
3962
3963 Example:
3964
3965 \snippet qstring/main.cpp 29
3966
3967 \sa indexOf(), contains(), count()
3968*/
3969qsizetype QString::lastIndexOf(QLatin1String str, qsizetype from, Qt::CaseSensitivity cs) const
3970{
3971 return QtPrivate::lastIndexOf(*this, from, str, cs);
3972}
3973
3974/*!
3975 \overload lastIndexOf()
3976
3977 Returns the index position of the last occurrence of the character
3978 \a ch, searching backward from position \a from.
3979*/
3980qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
3981{
3982 return qLastIndexOf(QStringView(*this), ch, from, cs);
3983}
3984
3985/*!
3986 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
3987 \since 5.14
3988 \overload lastIndexOf()
3989
3990 Returns the index position of the last occurrence of the string view \a
3991 str in this string, searching backward from index position \a
3992 from. If \a from is -1 (default), the search starts at the last
3993 character; if \a from is -2, at the next to last character and so
3994 on. Returns -1 if \a str is not found.
3995
3996 If \a cs is Qt::CaseSensitive (default), the search is case
3997 sensitive; otherwise the search is case insensitive.
3998
3999 \sa indexOf(), contains(), count()
4000*/
4001
4002
4003#if QT_CONFIG(regularexpression)
4004struct QStringCapture
4005{
4006 qsizetype pos;
4007 qsizetype len;
4008 int no;
4009};
4010Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4011
4012/*!
4013 \overload replace()
4014 \since 5.0
4015
4016 Replaces every occurrence of the regular expression \a re in the
4017 string with \a after. Returns a reference to the string. For
4018 example:
4019
4020 \snippet qstring/main.cpp 87
4021
4022 For regular expressions containing capturing groups,
4023 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4024 with the string captured by the corresponding capturing group.
4025
4026 \snippet qstring/main.cpp 88
4027
4028 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4029*/
4030QString &QString::replace(const QRegularExpression &re, const QString &after)
4031{
4032 if (!re.isValid()) {
4033 qWarning("QString::replace: invalid QRegularExpression object");
4034 return *this;
4035 }
4036
4037 const QString copy(*this);
4038 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4039 if (!iterator.hasNext()) // no matches at all
4040 return *this;
4041
4042 reallocData(d.size, d->detachFlags());
4043
4044 qsizetype numCaptures = re.captureCount();
4045
4046 // 1. build the backreferences list, holding where the backreferences
4047 // are in the replacement string
4048 QList<QStringCapture> backReferences;
4049 const qsizetype al = after.length();
4050 const QChar *ac = after.unicode();
4051
4052 for (qsizetype i = 0; i < al - 1; i++) {
4053 if (ac[i] == QLatin1Char('\\')) {
4054 int no = ac[i + 1].digitValue();
4055 if (no > 0 && no <= numCaptures) {
4056 QStringCapture backReference;
4057 backReference.pos = i;
4058 backReference.len = 2;
4059
4060 if (i < al - 2) {
4061 int secondDigit = ac[i + 2].digitValue();
4062 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4063 no = (no * 10) + secondDigit;
4064 ++backReference.len;
4065 }
4066 }
4067
4068 backReference.no = no;
4069 backReferences.append(backReference);
4070 }
4071 }
4072 }
4073
4074 // 2. iterate on the matches. For every match, copy in chunks
4075 // - the part before the match
4076 // - the after string, with the proper replacements for the backreferences
4077
4078 qsizetype newLength = 0; // length of the new string, with all the replacements
4079 qsizetype lastEnd = 0;
4080 QList<QStringView> chunks;
4081 const QStringView copyView{ copy }, afterView{ after };
4082 while (iterator.hasNext()) {
4083 QRegularExpressionMatch match = iterator.next();
4084 qsizetype len;
4085 // add the part before the match
4086 len = match.capturedStart() - lastEnd;
4087 if (len > 0) {
4088 chunks << copyView.mid(lastEnd, len);
4089 newLength += len;
4090 }
4091
4092 lastEnd = 0;
4093 // add the after string, with replacements for the backreferences
4094 for (const QStringCapture &backReference : qAsConst(backReferences)) {
4095 // part of "after" before the backreference
4096 len = backReference.pos - lastEnd;
4097 if (len > 0) {
4098 chunks << afterView.mid(lastEnd, len);
4099 newLength += len;
4100 }
4101
4102 // backreference itself
4103 len = match.capturedLength(backReference.no);
4104 if (len > 0) {
4105 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4106 newLength += len;
4107 }
4108
4109 lastEnd = backReference.pos + backReference.len;
4110 }
4111
4112 // add the last part of the after string
4113 len = afterView.length() - lastEnd;
4114 if (len > 0) {
4115 chunks << afterView.mid(lastEnd, len);
4116 newLength += len;
4117 }
4118
4119 lastEnd = match.capturedEnd();
4120 }
4121
4122 // 3. trailing string after the last match
4123 if (copyView.length() > lastEnd) {
4124 chunks << copyView.mid(lastEnd);
4125 newLength += copyView.length() - lastEnd;
4126 }
4127
4128 // 4. assemble the chunks together
4129 resize(newLength);
4130 qsizetype i = 0;
4131 QChar *uc = data();
4132 for (const QStringView &chunk : qAsConst(chunks)) {
4133 qsizetype len = chunk.length();
4134 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4135 i += len;
4136 }
4137
4138 return *this;
4139}
4140#endif // QT_CONFIG(regularexpression)
4141
4142/*!
4143 Returns the number of (potentially overlapping) occurrences of
4144 the string \a str in this string.
4145
4146 If \a cs is Qt::CaseSensitive (default), the search is
4147 case sensitive; otherwise the search is case insensitive.
4148
4149 \sa contains(), indexOf()
4150*/
4151
4152qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4153{
4154 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4155}
4156
4157/*!
4158 \overload count()
4159
4160 Returns the number of occurrences of character \a ch in the string.
4161
4162 If \a cs is Qt::CaseSensitive (default), the search is
4163 case sensitive; otherwise the search is case insensitive.
4164
4165 \sa contains(), indexOf()
4166*/
4167
4168qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4169{
4170 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4171}
4172
4173/*!
4174 \since 6.0
4175 \overload count()
4176 Returns the number of (potentially overlapping) occurrences of the
4177 string reference \a str in this string.
4178
4179 If \a cs is Qt::CaseSensitive (default), the search is
4180 case sensitive; otherwise the search is case insensitive.
4181
4182 \sa contains(), indexOf()
4183*/
4184qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4185{
4186 return QtPrivate::count(*this, str, cs);
4187}
4188
4189#if QT_STRINGVIEW_LEVEL < 2
4190/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4191
4192 Returns \c true if this string contains an occurrence of the string
4193 \a str; otherwise returns \c false.
4194
4195 If \a cs is Qt::CaseSensitive (default), the search is
4196 case sensitive; otherwise the search is case insensitive.
4197
4198 Example:
4199 \snippet qstring/main.cpp 17
4200
4201 \sa indexOf(), count()
4202*/
4203#endif // QT_STRINGVIEW_LEVEL < 2
4204
4205/*! \fn bool QString::contains(QLatin1String str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4206 \since 5.3
4207
4208 \overload contains()
4209
4210 Returns \c true if this string contains an occurrence of the latin-1 string
4211 \a str; otherwise returns \c false.
4212*/
4213
4214/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4215
4216 \overload contains()
4217
4218 Returns \c true if this string contains an occurrence of the
4219 character \a ch; otherwise returns \c false.
4220*/
4221
4222/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4223 \since 5.14
4224 \overload contains()
4225
4226 Returns \c true if this string contains an occurrence of the string view
4227 \a str; otherwise returns \c false.
4228
4229 If \a cs is Qt::CaseSensitive (default), the search is
4230 case sensitive; otherwise the search is case insensitive.
4231
4232 \sa indexOf(), count()
4233*/
4234
4235#if QT_CONFIG(regularexpression)
4236/*!
4237 \since 5.5
4238
4239 Returns the index position of the first match of the regular
4240 expression \a re in the string, searching forward from index
4241 position \a from. Returns -1 if \a re didn't match anywhere.
4242
4243 If the match is successful and \a rmatch is not \nullptr, it also
4244 writes the results of the match into the QRegularExpressionMatch object
4245 pointed to by \a rmatch.
4246
4247 Example:
4248
4249 \snippet qstring/main.cpp 93
4250*/
4251qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4252{
4253 if (!re.isValid()) {
4254 qWarning("QString::indexOf: invalid QRegularExpression object");
4255 return -1;
4256 }
4257
4258 QRegularExpressionMatch match = re.match(*this, from);
4259 if (match.hasMatch()) {
4260 const qsizetype ret = match.capturedStart();
4261 if (rmatch)
4262 *rmatch = std::move(match);
4263 return ret;
4264 }
4265
4266 return -1;
4267}
4268
4269/*!
4270 \since 5.5
4271
4272 Returns the index position of the last match of the regular
4273 expression \a re in the string, which starts before the index
4274 position \a from. Returns -1 if \a re didn't match anywhere.
4275
4276 If the match is successful and \a rmatch is not \nullptr, it also
4277 writes the results of the match into the QRegularExpressionMatch object
4278 pointed to by \a rmatch.
4279
4280 Example:
4281
4282 \snippet qstring/main.cpp 94
4283*/
4284qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4285{
4286 if (!re.isValid()) {
4287 qWarning("QString::lastIndexOf: invalid QRegularExpression object");
4288 return -1;
4289 }
4290
4291 qsizetype endpos = (from < 0) ? (size() + from + 1) : (from + 1);
4292 QRegularExpressionMatchIterator iterator = re.globalMatch(*this);
4293 qsizetype lastIndex = -1;
4294 while (iterator.hasNext()) {
4295 QRegularExpressionMatch match = iterator.next();
4296 qsizetype start = match.capturedStart();
4297 if (start < endpos) {
4298 lastIndex = start;
4299 if (rmatch)
4300 *rmatch = std::move(match);
4301 } else {
4302 break;
4303 }
4304 }
4305
4306 return lastIndex;
4307}
4308
4309/*!
4310 \since 5.1
4311
4312 Returns \c true if the regular expression \a re matches somewhere in this
4313 string; otherwise returns \c false.
4314
4315 If the match is successful and \a rmatch is not \nullptr, it also
4316 writes the results of the match into the QRegularExpressionMatch object
4317 pointed to by \a rmatch.
4318
4319 \sa QRegularExpression::match()
4320*/
4321
4322bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4323{
4324 if (!re.isValid()) {
4325 qWarning("QString::contains: invalid QRegularExpression object");
4326 return false;
4327 }
4328 QRegularExpressionMatch m = re.match(*this);
4329 bool hasMatch = m.hasMatch();
4330 if (hasMatch && rmatch)
4331 *rmatch = std::move(m);
4332 return hasMatch;
4333}
4334
4335/*!
4336 \overload count()
4337 \since 5.0
4338
4339 Returns the number of times the regular expression \a re matches
4340 in the string.
4341
4342 This function counts overlapping matches, so in the example
4343 below, there are four instances of "ana" or "ama":
4344
4345 \snippet qstring/main.cpp 95
4346*/
4347qsizetype QString::count(const QRegularExpression &re) const
4348{
4349 if (!re.isValid()) {
4350 qWarning("QString::count: invalid QRegularExpression object");
4351 return 0;
4352 }
4353 qsizetype count = 0;
4354 qsizetype index = -1;
4355 qsizetype len = length();
4356 while (index < len - 1) {
4357 QRegularExpressionMatch match = re.match(*this, index + 1);
4358 if (!match.hasMatch())
4359 break;
4360 index = match.capturedStart();
4361 count++;
4362 }
4363 return count;
4364}
4365#endif // QT_CONFIG(regularexpression)
4366
4367/*! \fn qsizetype QString::count() const
4368
4369 \overload count()
4370
4371 Same as size().
4372*/
4373
4374
4375/*!
4376 \enum QString::SectionFlag
4377
4378 This enum specifies flags that can be used to affect various
4379 aspects of the section() function's behavior with respect to
4380 separators and empty fields.
4381
4382 \value SectionDefault Empty fields are counted, leading and
4383 trailing separators are not included, and the separator is
4384 compared case sensitively.
4385
4386 \value SectionSkipEmpty Treat empty fields as if they don't exist,
4387 i.e. they are not considered as far as \e start and \e end are
4388 concerned.
4389
4390 \value SectionIncludeLeadingSep Include the leading separator (if
4391 any) in the result string.
4392
4393 \value SectionIncludeTrailingSep Include the trailing separator
4394 (if any) in the result string.
4395
4396 \value SectionCaseInsensitiveSeps Compare the separator
4397 case-insensitively.
4398
4399 \sa section()
4400*/
4401
4402/*!
4403 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
4404
4405 This function returns a section of the string.
4406
4407 This string is treated as a sequence of fields separated by the
4408 character, \a sep. The returned string consists of the fields from
4409 position \a start to position \a end inclusive. If \a end is not
4410 specified, all fields from position \a start to the end of the
4411 string are included. Fields are numbered 0, 1, 2, etc., counting
4412 from the left, and -1, -2, etc., counting from right to left.
4413
4414 The \a flags argument can be used to affect some aspects of the
4415 function's behavior, e.g. whether to be case sensitive, whether
4416 to skip empty fields and how to deal with leading and trailing
4417 separators; see \l{SectionFlags}.
4418
4419 \snippet qstring/main.cpp 52
4420
4421 If \a start or \a end is negative, we count fields from the right
4422 of the string, the right-most field being -1, the one from
4423 right-most field being -2, and so on.
4424
4425 \snippet qstring/main.cpp 53
4426
4427 \sa split()
4428*/
4429
4430/*!
4431 \overload section()
4432
4433 \snippet qstring/main.cpp 51
4434 \snippet qstring/main.cpp 54
4435
4436 \sa split()
4437*/
4438
4439QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
4440{
4441 const QList<QStringView> sections = QStringView{ *this }.split(
4442 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
4443 const qsizetype sectionsSize = sections.size();
4444 if (!(flags & SectionSkipEmpty)) {
4445 if (start < 0)
4446 start += sectionsSize;
4447 if (end < 0)
4448 end += sectionsSize;
4449 } else {
4450 qsizetype skip = 0;
4451 for (qsizetype k = 0; k < sectionsSize; ++k) {
4452 if (sections.at(k).isEmpty())
4453 skip++;
4454 }
4455 if (start < 0)
4456 start += sectionsSize - skip;
4457 if (end < 0)
4458 end += sectionsSize - skip;
4459 }
4460 if (start >= sectionsSize || end < 0 || start > end)
4461 return QString();
4462
4463 QString ret;
4464 qsizetype first_i = start, last_i = end;
4465 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
4466 const QStringView &section = sections.at(i);
4467 const bool empty = section.isEmpty();
4468 if (x >= start) {
4469 if(x == start)
4470 first_i = i;
4471 if(x == end)
4472 last_i = i;
4473 if (x > start && i > 0)
4474 ret += sep;
4475 ret += section;
4476 }
4477 if (!empty || !(flags & SectionSkipEmpty))
4478 x++;
4479 }
4480 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
4481 ret.prepend(sep);
4482 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
4483 ret += sep;
4484 return ret;
4485}
4486
4487#if !(defined(QT_NO_REGEXP) && !QT_CONFIG(regularexpression))
4488class qt_section_chunk {
4489public:
4490 qt_section_chunk() {}
4491 qt_section_chunk(qsizetype l, QStringView s) : length(l), string(std::move(s)) {}
4492 qsizetype length;
4493 QStringView string;
4494};
4495Q_DECLARE_TYPEINFO(qt_section_chunk, Q_MOVABLE_TYPE);
4496
4497static QString extractSections(const QList<qt_section_chunk> &sections, qsizetype start, qsizetype end,
4498 QString::SectionFlags flags)
4499{
4500 const qsizetype sectionsSize = sections.size();
4501
4502 if (!(flags & QString::SectionSkipEmpty)) {
4503 if (start < 0)
4504 start += sectionsSize;
4505 if (end < 0)
4506 end += sectionsSize;
4507 } else {
4508 qsizetype skip = 0;
4509 for (qsizetype k = 0; k < sectionsSize; ++k) {
4510 const qt_section_chunk &section = sections.at(k);
4511 if (section.length == section.string.length())
4512 skip++;
4513 }
4514 if (start < 0)
4515 start += sectionsSize - skip;
4516 if (end < 0)
4517 end += sectionsSize - skip;
4518 }
4519 if (start >= sectionsSize || end < 0 || start > end)
4520 return QString();
4521
4522 QString ret;
4523 qsizetype x = 0;
4524 qsizetype first_i = start, last_i = end;
4525 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
4526 const qt_section_chunk &section = sections.at(i);
4527 const bool empty = (section.length == section.string.length());
4528 if (x >= start) {
4529 if (x == start)
4530 first_i = i;
4531 if (x == end)
4532 last_i = i;
4533 if (x != start)
4534 ret += section.string;
4535 else
4536 ret += section.string.mid(section.length);
4537 }
4538 if (!empty || !(flags & QString::SectionSkipEmpty))
4539 x++;
4540 }
4541
4542 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
4543 const qt_section_chunk &section = sections.at(first_i);
4544 ret.prepend(section.string.left(section.length));
4545 }
4546
4547 if ((flags & QString::SectionIncludeTrailingSep)
4548 && last_i < sectionsSize - 1) {
4549 const qt_section_chunk &section = sections.at(last_i+1);
4550 ret += section.string.left(section.length);
4551 }
4552
4553 return ret;
4554}
4555#endif
4556
4557#if QT_CONFIG(regularexpression)
4558/*!
4559 \overload section()
4560 \since 5.0
4561
4562 This string is treated as a sequence of fields separated by the
4563 regular expression, \a re.
4564
4565 \snippet qstring/main.cpp 89
4566
4567 \warning Using this QRegularExpression version is much more expensive than
4568 the overloaded string and character versions.
4569
4570 \sa split(), simplified()
4571*/
4572QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
4573{
4574 if (!re.isValid()) {
4575 qWarning("QString::section: invalid QRegularExpression object");
4576 return QString();
4577 }
4578
4579 const QChar *uc = unicode();
4580 if (!uc)
4581 return QString();
4582
4583 QRegularExpression sep(re);
4584 if (flags & SectionCaseInsensitiveSeps)
4585 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
4586
4587 QList<qt_section_chunk> sections;
4588 qsizetype n = length(), m = 0, last_m = 0, last_len = 0;
4589 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
4590 while (iterator.hasNext()) {
4591 QRegularExpressionMatch match = iterator.next();
4592 m = match.capturedStart();
4593 sections.append(qt_section_chunk(last_len, QStringView{ *this }.mid(last_m, m - last_m)));
4594 last_m = m;
4595 last_len = match.capturedLength();
4596 }
4597 sections.append(qt_section_chunk(last_len, QStringView{ *this }.mid(last_m, n - last_m)));
4598
4599 return extractSections(sections, start, end, flags);
4600}
4601#endif // QT_CONFIG(regularexpression)
4602
4603/*!
4604 Returns a substring that contains the \a n leftmost characters
4605 of the string.
4606
4607 \obsolete Use first() instead in new code.
4608
4609 The entire string is returned if \a n is greater than or equal
4610 to size(), or less than zero.
4611
4612 \snippet qstring/main.cpp 31
4613
4614 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
4615*/
4616QString QString::left(qsizetype n) const
4617{
4618 if (size_t(n) >= size_t(size()))
4619 return *this;
4620 return QString((const QChar*) d.data(), n);
4621}
4622
4623/*!
4624 Returns a substring that contains the \a n rightmost characters
4625 of the string.
4626
4627 \obsolete Use last() instead in new code.
4628
4629 The entire string is returned if \a n is greater than or equal
4630 to size(), or less than zero.
4631
4632 \snippet qstring/main.cpp 48
4633
4634 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate()
4635*/
4636QString QString::right(qsizetype n) const
4637{
4638 if (size_t(n) >= size_t(size()))
4639 return *this;
4640 return QString(constData() + size() - n, n);
4641}
4642
4643/*!
4644 Returns a string that contains \a n characters of this string,
4645 starting at the specified \a position index.
4646
4647 \obsolete Use sliced() instead in new code.
4648
4649 Returns a null string if the \a position index exceeds the
4650 length of the string. If there are less than \a n characters
4651 available in the string starting at the given \a position, or if
4652 \a n is -1 (default), the function returns all characters that
4653 are available from the specified \a position.
4654
4655 Example:
4656
4657 \snippet qstring/main.cpp 34
4658
4659 \sa first(), last(), sliced(), chopped(), chop(), truncate()
4660*/
4661
4662QString QString::mid(qsizetype position, qsizetype n) const
4663{
4664 qsizetype p = position;
4665 qsizetype l = n;
4666 using namespace QtPrivate;
4667 switch (QContainerImplHelper::mid(size(), &p, &l)) {
4668 case QContainerImplHelper::Null:
4669 return QString();
4670 case QContainerImplHelper::Empty:
4671 return QString(DataPointer::fromRawData(&_empty, 0));
4672 case QContainerImplHelper::Full:
4673 return *this;
4674 case QContainerImplHelper::Subset:
4675 return QString(constData() + p, l);
4676 }
4677 Q_UNREACHABLE();
4678 return QString();
4679}
4680
4681/*!
4682 \fn QString QString::first(qsizetype n) const
4683 \since 6.0
4684
4685 Returns a string that contains the first \a n characters
4686 of this string.
4687
4688 \note The behavior is undefined when \a n < 0 or \a n > size().
4689
4690 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate()
4691*/
4692
4693/*!
4694 \fn QString QString::last(qsizetype n) const
4695 \since 6.0
4696
4697 Returns the string that contains the last \a n characters of this string.
4698
4699 \note The behavior is undefined when \a n < 0 or \a n > size().
4700
4701 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate()
4702*/
4703
4704/*!
4705 \fn QString QString::sliced(qsizetype pos, qsizetype n) const
4706 \since 6.0
4707
4708 Returns a string that contains \a n characters of this string,
4709 starting at position \a pos.
4710
4711 \note The behavior is undefined when \a pos < 0, \a n < 0,
4712 or \a pos + \a n > size().
4713
4714 \sa first(), last(), chopped(), chop(), truncate()
4715*/
4716
4717/*!
4718 \fn QString QString::sliced(qsizetype pos) const
4719 \since 6.0
4720 \overload
4721
4722 Returns a string that contains the portion of this string starting at
4723 position \a pos and extending to its end.
4724
4725 \note The behavior is undefined when \a pos < 0 or \a pos > size().
4726
4727 \sa first(), last(), sliced(), chopped(), chop(), truncate()
4728*/
4729
4730/*!
4731 \fn QString QString::chopped(qsizetype len) const
4732 \since 5.10
4733
4734 Returns a string that contains the size() - \a len leftmost characters
4735 of this string.
4736
4737 \note The behavior is undefined if \a len is negative or greater than size().
4738
4739 \sa endsWith(), left(), right(), mid(), chop(), truncate()
4740*/
4741
4742#if QT_STRINGVIEW_LEVEL < 2
4743/*!
4744 Returns \c true if the string starts with \a s; otherwise returns
4745 \c false.
4746
4747 If \a cs is Qt::CaseSensitive (default), the search is
4748 case sensitive; otherwise the search is case insensitive.
4749
4750 \snippet qstring/main.cpp 65
4751
4752 \sa endsWith()
4753*/
4754bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
4755{
4756 return qt_starts_with(*this, s, cs);
4757}
4758#endif
4759
4760/*!
4761 \overload startsWith()
4762 */
4763bool QString::startsWith(QLatin1String s, Qt::CaseSensitivity cs) const
4764{
4765 return qt_starts_with(*this, s, cs);
4766}
4767
4768/*!
4769 \overload startsWith()
4770
4771 Returns \c true if the string starts with \a c; otherwise returns
4772 \c false.
4773*/
4774bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
4775{
4776 return qt_starts_with(*this, c, cs);
4777}
4778
4779/*!
4780 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
4781 \since 5.10
4782 \overload
4783
4784 Returns \c true if the string starts with the string-view \a str;
4785 otherwise returns \c false.
4786
4787 If \a cs is Qt::CaseSensitive (default), the search is case-sensitive;
4788 otherwise the search is case insensitive.
4789
4790 \sa endsWith()
4791*/
4792
4793#if QT_STRINGVIEW_LEVEL < 2
4794/*!
4795 Returns \c true if the string ends with \a s; otherwise returns
4796 \c false.
4797
4798 If \a cs is Qt::CaseSensitive (default), the search is case
4799 sensitive; otherwise the search is case insensitive.
4800
4801 \snippet qstring/main.cpp 20
4802
4803 \sa startsWith()
4804*/
4805bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
4806{
4807 return qt_ends_with(*this, s, cs);
4808}
4809#endif // QT_STRINGVIEW_LEVEL < 2
4810
4811/*!
4812 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
4813 \since 5.10
4814 \overload endsWith()
4815 Returns \c true if the string ends with the string view \a str;
4816 otherwise returns \c false.
4817
4818 If \a cs is Qt::CaseSensitive (default), the search is case
4819 sensitive; otherwise the search is case insensitive.
4820
4821 \sa startsWith()
4822*/
4823
4824/*!
4825 \overload endsWith()
4826*/
4827bool QString::endsWith(QLatin1String s, Qt::CaseSensitivity cs) const
4828{
4829 return qt_ends_with(*this, s, cs);
4830}
4831
4832/*!
4833 Returns \c true if the string ends with \a c; otherwise returns
4834 \c false.
4835
4836 \overload endsWith()
4837 */
4838bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
4839{
4840 return qt_ends_with(*this, c, cs);
4841}
4842
4843/*!
4844 Returns \c true if the string is uppercase, that is, it's identical
4845 to its toUpper() folding.
4846
4847 Note that this does \e not mean that the string does not contain
4848 lowercase letters (some lowercase letters do not have a uppercase
4849 folding; they are left unchanged by toUpper()).
4850 For more information, refer to the Unicode standard, section 3.13.
4851
4852 \since 5.12
4853
4854 \sa QChar::toUpper(), isLower()
4855*/
4856bool QString::isUpper() const
4857{
4858 QStringIterator it(*this);
4859
4860 while (it.hasNext()) {
4861 const char32_t uc = it.next();
4862 if (qGetProp(uc)->cases[QUnicodeTables::UpperCase].diff)
4863 return false;
4864 }
4865
4866 return true;
4867}
4868
4869/*!
4870 Returns \c true if the string is lowercase, that is, it's identical
4871 to its toLower() folding.
4872
4873 Note that this does \e not mean that the string does not contain
4874 uppercase letters (some uppercase letters do not have a lowercase
4875 folding; they are left unchanged by toLower()).
4876 For more information, refer to the Unicode standard, section 3.13.
4877
4878 \since 5.12
4879
4880 \sa QChar::toLower(), isUpper()
4881 */
4882bool QString::isLower() const
4883{
4884 QStringIterator it(*this);
4885
4886 while (it.hasNext()) {
4887 const char32_t uc = it.next();
4888 if (qGetProp(uc)->cases[QUnicodeTables::LowerCase].diff)
4889 return false;
4890 }
4891
4892 return true;
4893}
4894
4895static QByteArray qt_convert_to_latin1(QStringView string);
4896
4897QByteArray QString::toLatin1_helper(const QString &string)
4898{
4899 return qt_convert_to_latin1(string);
4900}
4901
4902/*!
4903 \since 6.0
4904 \internal
4905 \relates QAnyStringView
4906
4907 Returns a UTF-16 representation of \a string as a QString.
4908
4909 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
4910 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
4911*/
4912QString QtPrivate::convertToQString(QAnyStringView string)
4913{
4914 return string.visit([] (auto string) { return string.toString(); });
4915}
4916
4917/*!
4918 \since 5.10
4919 \internal
4920 \relates QStringView
4921
4922 Returns a Latin-1 representation of \a string as a QByteArray.
4923
4924 The behavior is undefined if \a string contains non-Latin1 characters.
4925
4926 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
4927 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
4928*/
4929QByteArray QtPrivate::convertToLatin1(QStringView string)
4930{
4931 return qt_convert_to_latin1(string);
4932}
4933
4934static QByteArray qt_convert_to_latin1(QStringView string)
4935{
4936 if (Q_UNLIKELY(string.isNull()))
4937 return QByteArray();
4938
4939 QByteArray ba(string.length(), Qt::Uninitialized);
4940
4941 // since we own the only copy, we're going to const_cast the constData;
4942 // that avoids an unnecessary call to detach() and expansion code that will never get used
4943 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
4944 string.utf16(), string.size());
4945 return ba;
4946}
4947
4948QByteArray QString::toLatin1_helper_inplace(QString &s)
4949{
4950 if (!s.isDetached())
4951 return qt_convert_to_latin1(s);
4952
4953 // We can return our own buffer to the caller.
4954 // Conversion to Latin-1 always shrinks the buffer by half.
4955 const char16_t *data = s.d.data();
4956 qsizetype length = s.d.size;
4957
4958 // Move the d pointer over to the bytearray.
4959 // Kids, avert your eyes. Don't try this at home.
4960
4961 // this relies on the fact that we use QArrayData for everything behind the scenes which has the same layout
4962 static_assert(sizeof(QByteArray::DataPointer) == sizeof(QString::DataPointer), "sizes have to be equal");
4963 QByteArray::DataPointer ba_d(reinterpret_cast<QByteArray::Data *>(s.d.d_ptr()), reinterpret_cast<char *>(s.d.data()), length);
4964 ba_d.ref();
4965 s.clear();
4966
4967 char *ddata = ba_d.data();
4968
4969 // multiply the allocated capacity by sizeof(char16_t)
4970 ba_d.d_ptr()->alloc *= sizeof(char16_t);
4971
4972 // do the in-place conversion
4973 qt_to_latin1(reinterpret_cast<uchar *>(ddata), data, length);
4974 ddata[length] = '\0';
4975 return QByteArray(ba_d);
4976}
4977
4978/*!
4979 \fn QByteArray QString::toLatin1() const
4980
4981 Returns a Latin-1 representation of the string as a QByteArray.
4982
4983 The returned byte array is undefined if the string contains non-Latin1
4984 characters. Those characters may be suppressed or replaced with a
4985 question mark.
4986
4987 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
4988*/
4989
4990static QByteArray qt_convert_to_local_8bit(QStringView string);
4991
4992/*!
4993 \fn QByteArray QString::toLocal8Bit() const
4994
4995 Returns the local 8-bit representation of the string as a
4996 QByteArray. The returned byte array is undefined if the string
4997 contains characters not supported by the local 8-bit encoding.
4998
4999 On Unix systems this is equivalen to toUtf8(), on Windows the systems
5000 current code page is being used.
5001
5002 If this string contains any characters that cannot be encoded in the
5003 locale, the returned byte array is undefined. Those characters may be
5004 suppressed or replaced by another.
5005
5006 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5007*/
5008
5009QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5010{
5011 return qt_convert_to_local_8bit(QStringView(data, size));
5012}
5013
5014static QByteArray qt_convert_to_local_8bit(QStringView string)
5015{
5016 if (string.isNull())
5017 return QByteArray();
5018 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5019 return fromUtf16(string);
5020}
5021
5022/*!
5023 \since 5.10
5024 \internal
5025 \relates QStringView
5026
5027 Returns a local 8-bit representation of \a string as a QByteArray.
5028
5029 On Unix systems this is equivalen to toUtf8(), on Windows the systems
5030 current code page is being used.
5031
5032 The behavior is undefined if \a string contains characters not
5033 supported by the locale's 8-bit encoding.
5034
5035 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5036*/
5037QByteArray QtPrivate::convertToLocal8Bit(QStringView string)
5038{
5039 return qt_convert_to_local_8bit(string);
5040}
5041
5042static QByteArray qt_convert_to_utf8(QStringView str);
5043
5044/*!
5045 \fn QByteArray QString::toUtf8() const
5046
5047 Returns a UTF-8 representation of the string as a QByteArray.
5048
5049 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5050 string like QString.
5051
5052 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5053*/
5054
5055QByteArray QString::toUtf8_helper(const QString &str)
5056{
5057 return qt_convert_to_utf8(str);
5058}
5059
5060static QByteArray qt_convert_to_utf8(QStringView str)
5061{
5062 if (str.isNull())
5063 return QByteArray();
5064
5065 return QUtf8::convertFromUnicode(str);
5066}
5067
5068/*!
5069 \since 5.10
5070 \internal
5071 \relates QStringView
5072
5073 Returns a UTF-8 representation of \a string as a QByteArray.
5074
5075 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5076 string like QStringView.
5077
5078 \sa QString::toUtf8(), QStringView::toUtf8()
5079*/
5080QByteArray QtPrivate::convertToUtf8(QStringView string)
5081{
5082 return qt_convert_to_utf8(string);
5083}
5084
5085static QList<uint> qt_convert_to_ucs4(QStringView string);
5086
5087/*!
5088 \since 4.2
5089
5090 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5091
5092 UCS-4 is a Unicode codec and therefore it is lossless. All characters from
5093 this string will be encoded in UCS-4. Any invalid sequence of code units in
5094 this string is replaced by the Unicode's replacement character
5095 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5096
5097 The returned list is not \\0'-terminated.
5098
5099 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder, fromUcs4(), toWCharArray()
5100*/
5101QList<uint> QString::toUcs4() const
5102{
5103 return qt_convert_to_ucs4(*this);
5104}
5105
5106static QList<uint> qt_convert_to_ucs4(QStringView string)
5107{
5108 QList<uint> v(string.length());
5109 uint *a = const_cast<uint*>(v.constData());
5110 QStringIterator it(string);
5111 while (it.hasNext())
5112 *a++ = it.next();
5113 v.resize(a - v.constData());
5114 return v;
5115}
5116
5117/*!
5118 \since 5.10
5119 \internal
5120 \relates QStringView
5121
5122 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5123
5124 UCS-4 is a Unicode codec and therefore it is lossless. All characters from
5125 this string will be encoded in UCS-4. Any invalid sequence of code units in
5126 this string is replaced by the Unicode's replacement character
5127 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5128
5129 The returned list is not \\0'-terminated.
5130
5131 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5132 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5133*/
5134QList<uint> QtPrivate::convertToUcs4(QStringView string)
5135{
5136 return qt_convert_to_ucs4(string);
5137}
5138
5139/*!
5140 \fn QString QString::fromLatin1(QByteArrayView str)
5141 \overload
5142 \since 6.0
5143
5144 Returns a QString initialized with the Latin-1 string \a str.
5145*/
5146QString QString::fromLatin1(QByteArrayView ba)
5147{
5148 DataPointer d;
5149 if (!ba.data()) {
5150 // nothing to do
5151 } else if (ba.size() == 0) {
5152 d = DataPointer::fromRawData(&_empty, 0);
5153 } else {
5154 d = DataPointer(Data::allocate(ba.size()), ba.size());
5155 d.data()[ba.size()] = '\0';
5156 char16_t *dst = d.data();
5157
5158 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5159 }
5160 return QString(std::move(d));
5161}
5162
5163/*!
5164 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5165 Returns a QString initialized with the first \a size characters
5166 of the Latin-1 string \a str.
5167
5168 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5169
5170 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5171*/
5172
5173/*!
5174 \fn QString QString::fromLatin1(const QByteArray &str)
5175 \overload
5176 \since 5.0
5177
5178 Returns a QString initialized with the Latin-1 string \a str.
5179*/
5180
5181/*!
5182 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5183 Returns a QString initialized with the first \a size characters
5184 of the 8-bit string \a str.
5185
5186 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5187
5188 On Unix systems this is equivalen to fromUtf8(), on Windows the systems
5189 current code page is being used.
5190
5191 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5192*/
5193
5194/*!
5195 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5196 \overload
5197 \since 5.0
5198
5199 Returns a QString initialized with the 8-bit string \a str.
5200*/
5201
5202/*!
5203 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5204 \overload
5205 \since 6.0
5206
5207 Returns a QString initialized with the 8-bit string \a str.
5208*/
5209QString QString::fromLocal8Bit(QByteArrayView ba)
5210{
5211 if (ba.isNull())
5212 return QString();
5213 if (ba.isEmpty())
5214 return QString(DataPointer::fromRawData(&_empty, 0));
5215 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5216 return toUtf16(ba);
5217}
5218
5219/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5220 Returns a QString initialized with the first \a size bytes
5221 of the UTF-8 string \a str.
5222
5223 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5224
5225 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5226 string like QString. However, invalid sequences are possible with UTF-8
5227 and, if any such are found, they will be replaced with one or more
5228 "replacement characters", or suppressed. These include non-Unicode
5229 sequences, non-characters, overlong sequences or surrogate codepoints
5230 encoded into UTF-8.
5231
5232 This function can be used to process incoming data incrementally as long as
5233 all UTF-8 characters are terminated within the incoming data. Any
5234 unterminated characters at the end of the string will be replaced or
5235 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5236
5237 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5238*/
5239
5240/*!
5241 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
5242 \overload
5243 \since 6.0
5244
5245 This overload is only available when compiling in C++20 mode.
5246*/
5247
5248/*!
5249 \fn QString QString::fromUtf8(const QByteArray &str)
5250 \overload
5251 \since 5.0
5252
5253 Returns a QString initialized with the UTF-8 string \a str.
5254*/
5255
5256/*!
5257 \fn QString QString::fromUtf8(QByteArrayView str)
5258 \overload
5259 \since 6.0
5260
5261 Returns a QString initialized with the UTF-8 string \a str.
5262*/
5263QString QString::fromUtf8(QByteArrayView ba)
5264{
5265 if (ba.isNull())
5266 return QString();
5267 if (ba.isEmpty())
5268 return QString(DataPointer::fromRawData(&_empty, 0));
5269 return QUtf8::convertToUnicode(ba);
5270}
5271
5272/*!
5273 \since 5.3
5274 Returns a QString initialized with the first \a size characters
5275 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
5276
5277 If \a size is -1 (default), \a unicode must be \\0'-terminated.
5278
5279 This function checks for a Byte Order Mark (BOM). If it is missing,
5280 host byte order is assumed.
5281
5282 This function is slow compared to the other Unicode conversions.
5283 Use QString(const QChar *, int) or QString(const QChar *) if possible.
5284
5285 QString makes a deep copy of the Unicode data.
5286
5287 \sa utf16(), setUtf16(), fromStdU16String()
5288*/
5289QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
5290{
5291 if (!unicode)
5292 return QString();
5293 if (size < 0) {
5294 size = 0;
5295 while (unicode[size] != 0)
5296 ++size;
5297 }
5298 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
5299 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
5300}
5301
5302/*!
5303 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
5304 \obsolete
5305
5306 Use the \c char16_t overload.
5307*/
5308
5309/*!
5310 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
5311 \since 4.2
5312 \obsolete
5313
5314 Use the \c char32_t overload instead.
5315*/
5316
5317/*!
5318 \since 5.3
5319
5320 Returns a QString initialized with the first \a size characters
5321 of the Unicode string \a unicode (ISO-10646-UCS-4 encoded).
5322
5323 If \a size is -1 (default), \a unicode must be \\0'-terminated.
5324
5325 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(), fromStdU32String()
5326*/
5327QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
5328{
5329 if (!unicode)
5330 return QString();
5331 if (size < 0) {
5332 size = 0;
5333 while (unicode[size] != 0)
5334 ++size;
5335 }
5336 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
5337 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
5338}
5339
5340
5341/*!
5342 Resizes the string to \a size characters and copies \a unicode
5343 into the string.
5344
5345 If \a unicode is 0, nothing is copied, but the string is still
5346 resized to \a size.
5347
5348 \sa unicode(), setUtf16()
5349*/
5350QString& QString::setUnicode(const QChar *unicode, qsizetype size)
5351{
5352 resize(size);
5353 if (unicode && size)
5354 memcpy(d.data(), unicode, size * sizeof(QChar));
5355 return *this;
5356}
5357
5358/*!
5359 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
5360
5361 Resizes the string to \a size characters and copies \a unicode
5362 into the string.
5363
5364 If \a unicode is 0, nothing is copied, but the string is still
5365 resized to \a size.
5366
5367 Note that unlike fromUtf16(), this function does not consider BOMs and
5368 possibly differing byte ordering.
5369
5370 \sa utf16(), setUnicode()
5371*/
5372
5373/*!
5374 \fn QString QString::simplified() const
5375
5376 Returns a string that has whitespace removed from the start
5377 and the end, and that has each sequence of internal whitespace
5378 replaced with a single space.
5379
5380 Whitespace means any character for which QChar::isSpace() returns
5381 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
5382 '\\f', '\\r', and ' '.
5383
5384 Example:
5385
5386 \snippet qstring/main.cpp 57
5387
5388 \sa trimmed()
5389*/
5390QString QString::simplified_helper(const QString &str)
5391{
5392 return QStringAlgorithms<const QString>::simplified_helper(str);
5393}
5394
5395QString QString::simplified_helper(QString &str)
5396{
5397 return QStringAlgorithms<QString>::simplified_helper(str);
5398}
5399
5400namespace {
5401 template <typename StringView>
5402 StringView qt_trimmed(StringView s) noexcept
5403 {
5404 auto begin = s.begin();
5405 auto end = s.end();
5406 QStringAlgorithms<const StringView>::trimmed_helper_positions(begin, end);
5407 return StringView{begin, end};
5408 }
5409}
5410
5411/*!
5412 \fn QStringView QtPrivate::trimmed(QStringView s)
5413 \fn QLatin1String QtPrivate::trimmed(QLatin1String s)
5414 \internal
5415 \relates QStringView
5416 \since 5.10
5417
5418 Returns \a s with whitespace removed from the start and the end.
5419
5420 Whitespace means any character for which QChar::isSpace() returns
5421 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
5422 '\\f', '\\r', and ' '.
5423
5424 \sa QString::trimmed(), QStringView::trimmed(), QLatin1String::trimmed()
5425*/
5426QStringView QtPrivate::trimmed(QStringView s) noexcept
5427{
5428 return qt_trimmed(s);
5429}
5430
5431QLatin1String QtPrivate::trimmed(QLatin1String s) noexcept
5432{
5433 return qt_trimmed(s);
5434}
5435
5436/*!
5437 \fn QString QString::trimmed() const
5438
5439 Returns a string that has whitespace removed from the start and
5440 the end.
5441
5442 Whitespace means any character for which QChar::isSpace() returns
5443 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
5444 '\\f', '\\r', and ' '.
5445
5446 Example:
5447
5448 \snippet qstring/main.cpp 82
5449
5450 Unlike simplified(), trimmed() leaves internal whitespace alone.
5451
5452 \sa simplified()
5453*/
5454QString QString::trimmed_helper(const QString &str)
5455{
5456 return QStringAlgorithms<const QString>::trimmed_helper(str);
5457}
5458
5459QString QString::trimmed_helper(QString &str)
5460{
5461 return QStringAlgorithms<QString>::trimmed_helper(str);
5462}
5463
5464/*! \fn const QChar QString::at(qsizetype position) const
5465
5466 Returns the character at the given index \a position in the
5467 string.
5468
5469 The \a position must be a valid index position in the string
5470 (i.e., 0 <= \a position < size()).
5471
5472 \sa operator[]()
5473*/
5474
5475/*!
5476 \fn QChar &QString::operator[](qsizetype position)
5477
5478 Returns the character at the specified \a position in the string as a
5479 modifiable reference.
5480
5481 Example:
5482
5483 \snippet qstring/main.cpp 85
5484
5485 \sa at()
5486*/
5487
5488/*!
5489 \fn const QChar QString::operator[](qsizetype position) const
5490
5491 \overload operator[]()
5492*/
5493
5494/*!
5495 \fn QChar QString::front() const
5496 \since 5.10
5497
5498 Returns the first character in the string.
5499 Same as \c{at(0)}.
5500
5501 This function is provided for STL compatibility.
5502
5503 \warning Calling this function on an empty string constitutes
5504 undefined behavior.
5505
5506 \sa back(), at(), operator[]()
5507*/
5508
5509/*!
5510 \fn QChar QString::back() const
5511 \since 5.10
5512
5513 Returns the last character in the string.
5514 Same as \c{at(size() - 1)}.
5515
5516 This function is provided for STL compatibility.
5517
5518 \warning Calling this function on an empty string constitutes
5519 undefined behavior.
5520
5521 \sa front(), at(), operator[]()
5522*/
5523
5524/*!
5525 \fn QChar &QString::front()
5526 \since 5.10
5527
5528 Returns a reference to the first character in the string.
5529 Same as \c{operator[](0)}.
5530
5531 This function is provided for STL compatibility.
5532
5533 \warning Calling this function on an empty string constitutes
5534 undefined behavior.
5535
5536 \sa back(), at(), operator[]()
5537*/
5538
5539/*!
5540 \fn QChar &QString::back()
5541 \since 5.10
5542
5543 Returns a reference to the last character in the string.
5544 Same as \c{operator[](size() - 1)}.
5545
5546 This function is provided for STL compatibility.
5547
5548 \warning Calling this function on an empty string constitutes
5549 undefined behavior.
5550
5551 \sa front(), at(), operator[]()
5552*/
5553
5554/*!
5555 \fn void QString::truncate(qsizetype position)
5556
5557 Truncates the string at the given \a position index.
5558
5559 If the specified \a position index is beyond the end of the
5560 string, nothing happens.
5561
5562 Example:
5563
5564 \snippet qstring/main.cpp 83
5565
5566 If \a position is negative, it is equivalent to passing zero.
5567
5568 \sa chop(), resize(), left(), QStringView::truncate()
5569*/
5570
5571void QString::truncate(qsizetype pos)
5572{
5573 if (pos < size())
5574 resize(pos);
5575}
5576
5577
5578/*!
5579 Removes \a n characters from the end of the string.
5580
5581 If \a n is greater than or equal to size(), the result is an
5582 empty string; if \a n is negative, it is equivalent to passing zero.
5583
5584 Example:
5585 \snippet qstring/main.cpp 15
5586
5587 If you want to remove characters from the \e beginning of the
5588 string, use remove() instead.
5589
5590 \sa truncate(), resize(), remove(), QStringView::chop()
5591*/
5592void QString::chop(qsizetype n)
5593{
5594 if (n > 0)
5595 resize(d.size - n);
5596}
5597
5598/*!
5599 Sets every character in the string to character \a ch. If \a size
5600 is different from -1 (default), the string is resized to \a
5601 size beforehand.
5602
5603 Example:
5604
5605 \snippet qstring/main.cpp 21
5606
5607 \sa resize()
5608*/
5609
5610QString& QString::fill(QChar ch, qsizetype size)
5611{
5612 resize(size < 0 ? d.size : size);
5613 if (d.size) {
5614 QChar *i = (QChar*)d.data() + d.size;
5615 QChar *b = (QChar*)d.data();
5616 while (i != b)
5617 *--i = ch;
5618 }
5619 return *this;
5620}
5621
5622/*!
5623 \fn qsizetype QString::length() const
5624
5625 Returns the number of characters in this string. Equivalent to
5626 size().
5627
5628 \sa resize()
5629*/
5630
5631/*!
5632 \fn qsizetype QString::size() const
5633
5634 Returns the number of characters in this string.
5635
5636 The last character in the string is at position size() - 1.
5637
5638 Example:
5639 \snippet qstring/main.cpp 58
5640
5641 \sa isEmpty(), resize()
5642*/
5643
5644/*! \fn bool QString::isNull() const
5645
5646 Returns \c true if this string is null; otherwise returns \c false.
5647
5648 Example:
5649
5650 \snippet qstring/main.cpp 28
5651
5652 Qt makes a distinction between null strings and empty strings for
5653 historical reasons. For most applications, what matters is
5654 whether or not a string contains any data, and this can be
5655 determined using the isEmpty() function.
5656
5657 \sa isEmpty()
5658*/
5659
5660/*! \fn bool QString::isEmpty() const
5661
5662 Returns \c true if the string has no characters; otherwise returns
5663 \c false.
5664
5665 Example:
5666
5667 \snippet qstring/main.cpp 27
5668
5669 \sa size()
5670*/
5671
5672/*! \fn QString &QString::operator+=(const QString &other)
5673
5674 Appends the string \a other onto the end of this string and
5675 returns a reference to this string.
5676
5677 Example:
5678
5679 \snippet qstring/main.cpp 84
5680
5681 This operation is typically very fast (\l{constant time}),
5682 because QString preallocates extra space at the end of the string
5683 data so it can grow without reallocating the entire string each
5684 time.
5685
5686 \sa append(), prepend()
5687*/
5688
5689/*! \fn QString &QString::operator+=(QLatin1String str)
5690
5691 \overload operator+=()
5692
5693 Appends the Latin-1 string \a str to this string.
5694*/
5695
5696/*! \fn QString &QString::operator+=(const QByteArray &ba)
5697
5698 \overload operator+=()
5699
5700 Appends the byte array \a ba to this string. The byte array is converted
5701 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
5702 are embedded in the \a ba byte array, they will be included in the
5703 transformation.
5704
5705 You can disable this function by defining \c
5706 QT_NO_CAST_FROM_ASCII when you compile your applications. This
5707 can be useful if you want to ensure that all user-visible strings
5708 go through QObject::tr(), for example.
5709
5710 \sa QT_NO_CAST_FROM_ASCII
5711*/
5712
5713/*! \fn QString &QString::operator+=(const char *str)
5714
5715 \overload operator+=()
5716
5717 Appends the string \a str to this string. The const char pointer
5718 is converted to Unicode using the fromUtf8() function.
5719
5720 You can disable this function by defining \c QT_NO_CAST_FROM_ASCII
5721 when you compile your applications. This can be useful if you want
5722 to ensure that all user-visible strings go through QObject::tr(),
5723 for example.
5724
5725 \sa QT_NO_CAST_FROM_ASCII
5726*/
5727
5728/*! \fn QString &QString::operator+=(QStringView str)
5729 \since 6.0
5730 \overload operator+=()
5731
5732 Appends the string view \a str to this string.
5733*/
5734
5735/*! \fn QString &QString::operator+=(QChar ch)
5736
5737 \overload operator+=()
5738
5739 Appends the character \a ch to the string.
5740*/
5741
5742/*!
5743 \fn bool operator==(const char *s1, const QString &s2)
5744
5745 \overload operator==()
5746 \relates QString
5747
5748 Returns \c true if \a s1 is equal to \a s2; otherwise returns \c false.
5749 Note that no string is equal to \a s1 being 0.
5750
5751 Equivalent to \c {s1 != 0 && compare(s1, s2) == 0}.
5752*/
5753
5754/*!
5755 \fn bool operator!=(const char *s1, const QString &s2)
5756 \relates QString
5757
5758 Returns \c true if \a s1 is not equal to \a s2; otherwise returns
5759 \c false.
5760
5761 For \a s1 != 0, this is equivalent to \c {compare(} \a s1, \a s2
5762 \c {) != 0}. Note that no string is equal to \a s1 being 0.
5763*/
5764
5765/*!
5766 \fn bool operator<(const char *s1, const QString &s2)
5767 \relates QString
5768
5769 Returns \c true if \a s1 is lexically less than \a s2; otherwise
5770 returns \c false. For \a s1 != 0, this is equivalent to \c
5771 {compare(s1, s2) < 0}.
5772
5773 \sa {Comparing Strings}
5774*/
5775
5776/*!
5777 \fn bool operator<=(const char *s1, const QString &s2)
5778 \relates QString
5779
5780 Returns \c true if \a s1 is lexically less than or equal to \a s2;
5781 otherwise returns \c false. For \a s1 != 0, this is equivalent to \c
5782 {compare(s1, s2) <= 0}.
5783
5784 \sa {Comparing Strings}
5785*/
5786
5787/*!
5788 \fn bool operator>(const char *s1, const QString &s2)
5789 \relates QString
5790
5791 Returns \c true if \a s1 is lexically greater than \a s2; otherwise
5792 returns \c false. Equivalent to \c {compare(s1, s2) > 0}.
5793
5794 \sa {Comparing Strings}
5795*/
5796
5797/*!
5798 \fn bool operator>=(const char *s1, const QString &s2)
5799 \relates QString
5800
5801 Returns \c true if \a s1 is lexically greater than or equal to \a s2;
5802 otherwise returns \c false. For \a s1 != 0, this is equivalent to \c
5803 {compare(s1, s2) >= 0}.
5804
5805 \sa {Comparing Strings}
5806*/
5807
5808/*!
5809 \fn const QString operator+(const QString &s1, const QString &s2)
5810 \relates QString
5811
5812 Returns a string which is the result of concatenating \a s1 and \a
5813 s2.
5814*/
5815
5816/*!
5817 \fn const QString operator+(const QString &s1, const char *s2)
5818 \relates QString
5819
5820 Returns a string which is the result of concatenating \a s1 and \a
5821 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
5822 function).
5823
5824 \sa QString::fromUtf8()
5825*/
5826
5827/*!
5828 \fn const QString operator+(const char *s1, const QString &s2)
5829 \relates QString
5830
5831 Returns a string which is the result of concatenating \a s1 and \a
5832 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
5833 function).
5834
5835 \sa QString::fromUtf8()
5836*/
5837
5838/*!
5839 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
5840 \since 4.2
5841
5842 Compares \a s1 with \a s2 and returns an integer less than, equal
5843 to, or greater than zero if \a s1 is less than, equal to, or
5844 greater than \a s2.
5845
5846 If \a cs is Qt::CaseSensitive, the comparison is case sensitive;
5847 otherwise the comparison is case insensitive.
5848
5849 Case sensitive comparison is based exclusively on the numeric
5850 Unicode values of the characters and is very fast, but is not what
5851 a human would expect. Consider sorting user-visible strings with
5852 localeAwareCompare().
5853
5854 \snippet qstring/main.cpp 16
5855
5856 \sa operator==(), operator<(), operator>(), {Comparing Strings}
5857*/
5858
5859/*!
5860 \fn int QString::compare(const QString &s1, QLatin1String s2, Qt::CaseSensitivity cs)
5861 \since 4.2
5862 \overload compare()
5863
5864 Performs a comparison of \a s1 and \a s2, using the case
5865 sensitivity setting \a cs.
5866*/
5867
5868/*!
5869 \fn int QString::compare(QLatin1String s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
5870
5871 \since 4.2
5872 \overload compare()
5873
5874 Performs a comparison of \a s1 and \a s2, using the case
5875 sensitivity setting \a cs.
5876*/
5877
5878/*!
5879 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
5880
5881 \since 5.12
5882 \overload compare()
5883
5884 Performs a comparison of this with \a s, using the case
5885 sensitivity setting \a cs.
5886*/
5887
5888/*!
5889 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
5890
5891 \since 5.14
5892 \overload compare()
5893
5894 Performs a comparison of this with \a ch, using the case
5895 sensitivity setting \a cs.
5896*/
5897
5898#if QT_STRINGVIEW_LEVEL < 2
5899/*!
5900 \overload compare()
5901 \since 4.2
5902
5903 Lexically compares this string with the \a other string and
5904 returns an integer less than, equal to, or greater than zero if
5905 this string is less than, equal to, or greater than the other
5906 string.
5907
5908 Same as compare(*this, \a other, \a cs).
5909*/
5910int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
5911{
5912 return QtPrivate::compareStrings(*this, other, cs);
5913}
5914#endif
5915
5916/*!
5917 \internal
5918 \since 4.5
5919*/
5920int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
5921 Qt::CaseSensitivity cs) noexcept
5922{
5923 Q_ASSERT(length1 >= 0);
5924 Q_ASSERT(length2 >= 0);
5925 Q_ASSERT(data1 || length1 == 0);
5926 Q_ASSERT(data2 || length2 == 0);
5927 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
5928}
5929
5930/*!
5931 \overload compare()
5932 \since 4.2
5933
5934 Same as compare(*this, \a other, \a cs).
5935*/
5936int QString::compare(QLatin1String other, Qt::CaseSensitivity cs) const noexcept
5937{
5938 return QtPrivate::compareStrings(*this, other, cs);
5939}
5940
5941/*!
5942 \internal
5943 \since 5.0
5944*/
5945int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
5946 Qt::CaseSensitivity cs)
5947{
5948 Q_ASSERT(length1 >= 0);
5949 Q_ASSERT(data1 || length1 == 0);
5950 if (!data2)
5951 return length1;
5952 if (Q_UNLIKELY(length2 < 0))
5953 length2 = qsizetype(strlen(data2));
5954 // ### make me nothrow in all cases
5955 QVarLengthArray<ushort> s2(length2);
5956 const auto beg = reinterpret_cast<QChar *>(s2.data());
5957 const auto end = QUtf8::convertToUnicode(beg, QByteArrayView(data2, length2));
5958 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(beg, end - beg), cs);
5959}
5960
5961/*!
5962 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
5963 \overload compare()
5964*/
5965
5966/*!
5967 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
5968 \overload compare()
5969*/
5970
5971/*!
5972 \internal
5973 \since 4.5
5974*/
5975int QString::compare_helper(const QChar *data1, qsizetype length1, QLatin1String s2,
5976 Qt::CaseSensitivity cs) noexcept
5977{
5978 Q_ASSERT(length1 >= 0);
5979 Q_ASSERT(data1 || length1 == 0);
5980 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
5981}
5982
5983/*!
5984 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
5985
5986 Compares \a s1 with \a s2 and returns an integer less than, equal
5987 to, or greater than zero if \a s1 is less than, equal to, or
5988 greater than \a s2.
5989
5990 The comparison is performed in a locale- and also
5991 platform-dependent manner. Use this function to present sorted
5992 lists of strings to the user.
5993
5994 \sa compare(), QLocale, {Comparing Strings}
5995*/
5996
5997/*!
5998 \fn int QString::localeAwareCompare(QStringView other) const
5999 \since 6.0
6000 \overload localeAwareCompare()
6001
6002 Compares this string with the \a other string and returns an
6003 integer less than, equal to, or greater than zero if this string
6004 is less than, equal to, or greater than the \a other string.
6005
6006 The comparison is performed in a locale- and also
6007 platform-dependent manner. Use this function to present sorted
6008 lists of strings to the user.
6009
6010 Same as \c {localeAwareCompare(*this, other)}.
6011
6012 \sa {Comparing Strings}
6013*/
6014
6015/*!
6016 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6017 \since 6.0
6018 \overload localeAwareCompare()
6019
6020 Compares \a s1 with \a s2 and returns an integer less than, equal
6021 to, or greater than zero if \a s1 is less than, equal to, or
6022 greater than \a s2.
6023
6024 The comparison is performed in a locale- and also
6025 platform-dependent manner. Use this function to present sorted
6026 lists of strings to the user.
6027
6028 \sa {Comparing Strings}
6029*/
6030
6031
6032#if !defined(CSTR_LESS_THAN)
6033#define CSTR_LESS_THAN 1
6034#define CSTR_EQUAL 2
6035#define CSTR_GREATER_THAN 3
6036#endif
6037
6038/*!
6039 \overload localeAwareCompare()
6040
6041 Compares this string with the \a other string and returns an
6042 integer less than, equal to, or greater than zero if this string
6043 is less than, equal to, or greater than the \a other string.
6044
6045 The comparison is performed in a locale- and also
6046 platform-dependent manner. Use this function to present sorted
6047 lists of strings to the user.
6048
6049 Same as \c {localeAwareCompare(*this, other)}.
6050
6051 \sa {Comparing Strings}
6052*/
6053int QString::localeAwareCompare(const QString &other) const
6054{
6055 return localeAwareCompare_helper(constData(), length(), other.constData(), other.length());
6056}
6057
6058#if QT_CONFIG(icu)
6059Q_GLOBAL_STATIC(QThreadStorage<QCollator>, defaultCollator)
6060#endif
6061
6062/*!
6063 \internal
6064 \since 4.5
6065*/
6066int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6067 const QChar *data2, qsizetype length2)
6068{
6069 Q_ASSERT(length1 >= 0);
6070 Q_ASSERT(data1 || length1 == 0);
6071 Q_ASSERT(length2 >= 0);
6072 Q_ASSERT(data2 || length2 == 0);
6073
6074 // do the right thing for null and empty
6075 if (length1 == 0 || length2 == 0)
6076 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6077 Qt::CaseSensitive);
6078
6079#if QT_CONFIG(icu)
6080 if (!defaultCollator()->hasLocalData())
6081 defaultCollator()->setLocalData(QCollator());
6082 return defaultCollator()->localData().compare(data1, length1, data2, length2);
6083#else
6084 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6085 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6086# if defined(Q_OS_WIN)
6087 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6088
6089 switch (res) {
6090 case CSTR_LESS_THAN:
6091 return -1;
6092 case CSTR_GREATER_THAN:
6093 return 1;
6094 default:
6095 return 0;
6096 }
6097# elif defined (Q_OS_DARWIN)
6098 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6099 // strings the same way as native applications do, and also respects
6100 // the "Order for sorted lists" setting in the International preferences
6101 // panel.
6102 const CFStringRef thisString =
6103 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6104 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6105 const CFStringRef otherString =
6106 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6107 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6108
6109 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6110 CFRelease(thisString);
6111 CFRelease(otherString);
6112 return result;
6113# elif defined(Q_OS_UNIX)
6114 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6115 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6116# else
6117# error "This case shouldn't happen"
6118 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6119# endif
6120#endif // !QT_CONFIG(icu)
6121}
6122
6123
6124/*!
6125 \fn const QChar *QString::unicode() const
6126
6127 Returns a Unicode representation of the string.
6128 The result remains valid until the string is modified.
6129
6130 \note The returned string may not be '\\0'-terminated.
6131 Use size() to determine the length of the array.
6132
6133 \sa utf16(), fromRawData()
6134*/
6135
6136/*!
6137 \fn const ushort *QString::utf16() const
6138
6139 Returns the QString as a '\\0\'-terminated array of unsigned
6140 shorts. The result remains valid until the string is modified.
6141
6142 The returned string is in host byte order.
6143
6144 \sa unicode()
6145*/
6146
6147const ushort *QString::utf16() const
6148{
6149 if (!d->isMutable()) {
6150 // ensure '\0'-termination for ::fromRawData strings
6151 const_cast<QString*>(this)->reallocData(d.size, d->detachFlags());
6152 }
6153 return reinterpret_cast<const ushort *>(d.data());
6154}
6155
6156/*!
6157 Returns a string of size \a width that contains this string
6158 padded by the \a fill character.
6159
6160 If \a truncate is \c false and the size() of the string is more than
6161 \a width, then the returned string is a copy of the string.
6162
6163 \snippet qstring/main.cpp 32
6164
6165 If \a truncate is \c true and the size() of the string is more than
6166 \a width, then any characters in a copy of the string after
6167 position \a width are removed, and the copy is returned.
6168
6169 \snippet qstring/main.cpp 33
6170
6171 \sa rightJustified()
6172*/
6173
6174QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
6175{
6176 QString result;
6177 qsizetype len = length();
6178 qsizetype padlen = width - len;
6179 if (padlen > 0) {
6180 result.resize(len+padlen);
6181 if (len)
6182 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
6183 QChar *uc = (QChar*)result.d.data() + len;
6184 while (padlen--)
6185 * uc++ = fill;
6186 } else {
6187 if (truncate)
6188 result = left(width);
6189 else
6190 result = *this;
6191 }
6192 return result;
6193}
6194
6195/*!
6196 Returns a string of size() \a width that contains the \a fill
6197 character followed by the string. For example:
6198
6199 \snippet qstring/main.cpp 49
6200
6201 If \a truncate is \c false and the size() of the string is more than
6202 \a width, then the returned string is a copy of the string.
6203
6204 If \a truncate is true and the size() of the string is more than
6205 \a width, then the resulting string is truncated at position \a
6206 width.
6207
6208 \snippet qstring/main.cpp 50
6209
6210 \sa leftJustified()
6211*/
6212
6213QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
6214{
6215 QString result;
6216 qsizetype len = length();
6217 qsizetype padlen = width - len;
6218 if (padlen > 0) {
6219 result.resize(len+padlen);
6220 QChar *uc = (QChar*)result.d.data();
6221 while (padlen--)
6222 * uc++ = fill;
6223 if (len)
6224 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
6225 } else {
6226 if (truncate)
6227 result = left(width);
6228 else
6229 result = *this;
6230 }
6231 return result;
6232}
6233
6234/*!
6235 \fn QString QString::toLower() const
6236
6237 Returns a lowercase copy of the string.
6238
6239 \snippet qstring/main.cpp 75
6240
6241 The case conversion will always happen in the 'C' locale. For locale dependent
6242 case folding use QLocale::toLower()
6243
6244 \sa toUpper(), QLocale::toLower()
6245*/
6246
6247namespace QUnicodeTables {
6248/*
6249 \internal
6250 Converts the \a str string starting from the position pointed to by the \a
6251 it iterator, using the Unicode case traits \c Traits, and returns the
6252 result. The input string must not be empty (the convertCase function below
6253 guarantees that).
6254
6255 The string type \c{T} is also a template and is either \c{const QString} or
6256 \c{QString}. This function can do both copy-conversion and in-place
6257 conversion depending on the state of the \a str parameter:
6258 \list
6259 \li \c{T} is \c{const QString}: copy-convert
6260 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
6261 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
6262 \endlist
6263
6264 In copy-convert mode, the local variable \c{s} is detached from the input
6265 \a str. In the in-place convert mode, \a str is in moved-from state (which
6266 this function requires to be a valid, empty string) and \c{s} contains the
6267 only copy of the string, without reallocation (thus, \a it is still valid).
6268
6269 There is one pathological case left: when the in-place conversion needs to
6270 reallocate memory to grow the buffer. In that case, we need to adjust the \a
6271 it pointer.
6272 */
6273template <typename T>
6274Q_NEVER_INLINE
6275static QString detachAndConvertCase(T &str, QStringIterator it, QUnicodeTables::Case which)
6276{
6277 Q_ASSERT(!str.isEmpty());
6278 QString s = std::move(str); // will copy if T is const QString
6279 QChar *pp = s.begin() + it.index(); // will detach if necessary
6280
6281 do {
6282 const auto folded = fullConvertCase(it.next(), which);
6283 if (Q_UNLIKELY(folded.size() > 1)) {
6284 if (folded.chars[0] == *pp && folded.size() == 2) {
6285 // special case: only second actually changed (e.g. surrogate pairs),
6286 // avoid slow case
6287 ++pp;
6288 *pp++ = folded.chars[1];
6289 } else {
6290 // slow path: the string is growing
6291 qsizetype inpos = it.index() - 1;
6292 qsizetype outpos = pp - s.constBegin();
6293
6294 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
6295 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
6296
6297 // do we need to adjust the input iterator too?
6298 // if it is pointing to s's data, str is empty
6299 if (str.isEmpty())
6300 it = QStringIterator(s.constBegin(), inpos + folded.size(), s.constEnd());
6301 }
6302 } else {
6303 *pp++ = folded.chars[0];
6304 }
6305 } while (it.hasNext());
6306
6307 return s;
6308}
6309
6310template <typename T>
6311static QString convertCase(T &str, QUnicodeTables::Case which)
6312{
6313 const QChar *p = str.constBegin();
6314 const QChar *e = p + str.size();
6315
6316 // this avoids out of bounds check in the loop
6317 while (e != p && e[-1].isHighSurrogate())
6318 --e;
6319
6320 QStringIterator it(p, e);
6321 while (it.hasNext()) {
6322 const char32_t uc = it.next();
6323 if (qGetProp(uc)->cases[which].diff) {
6324 it.recede();
6325 return detachAndConvertCase(str, it, which);
6326 }
6327 }
6328 return std::move(str);
6329}
6330} // namespace QUnicodeTables
6331
6332QString QString::toLower_helper(const QString &str)
6333{
6334 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
6335}
6336
6337QString QString::toLower_helper(QString &str)
6338{
6339 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
6340}
6341
6342/*!
6343 \fn QString QString::toCaseFolded() const
6344
6345 Returns the case folded equivalent of the string. For most Unicode
6346 characters this is the same as toLower().
6347*/
6348
6349QString QString::toCaseFolded_helper(const QString &str)
6350{
6351 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
6352}
6353
6354QString QString::toCaseFolded_helper(QString &str)
6355{
6356 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
6357}
6358
6359/*!
6360 \fn QString QString::toUpper() const
6361
6362 Returns an uppercase copy of the string.
6363
6364 \snippet qstring/main.cpp 81
6365
6366 The case conversion will always happen in the 'C' locale. For locale dependent
6367 case folding use QLocale::toUpper()
6368
6369 \sa toLower(), QLocale::toLower()
6370*/
6371
6372QString QString::toUpper_helper(const QString &str)
6373{
6374 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
6375}
6376
6377QString QString::toUpper_helper(QString &str)
6378{
6379 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
6380}
6381
6382/*!
6383 \since 5.5
6384
6385 Safely builds a formatted string from the format string \a cformat
6386 and an arbitrary list of arguments.
6387
6388 The format string supports the conversion specifiers, length modifiers,
6389 and flags provided by printf() in the standard C++ library. The \a cformat
6390 string and \c{%s} arguments must be UTF-8 encoded.
6391
6392 \note The \c{%lc} escape sequence expects a unicode character of type
6393 \c char16_t, or \c ushort (as returned by QChar::unicode()).
6394 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
6395 of unicode characters of type \c char16_t, or ushort (as returned by
6396 QString::utf16()). This is at odds with the printf() in the standard C++
6397 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
6398 a \c{wchar_t*}, and might also produce compiler warnings on platforms
6399 where the size of \c {wchar_t} is not 16 bits.
6400
6401 \warning We do not recommend using QString::asprintf() in new Qt
6402 code. Instead, consider using QTextStream or arg(), both of
6403 which support Unicode strings seamlessly and are type-safe.
6404 Here is an example that uses QTextStream:
6405
6406 \snippet qstring/main.cpp 64
6407
6408 For \l {QObject::tr()}{translations}, especially if the strings
6409 contains more than one escape sequence, you should consider using
6410 the arg() function instead. This allows the order of the
6411 replacements to be controlled by the translator.
6412
6413 \sa arg()
6414*/
6415
6416QString QString::asprintf(const char *cformat, ...)
6417{
6418 va_list ap;
6419 va_start(ap, cformat);
6420 const QString s = vasprintf(cformat, ap);
6421 va_end(ap);
6422 return s;
6423}
6424
6425static void append_utf8(QString &qs, const char *cs, int len)
6426{
6427 const int oldSize = qs.size();
6428 qs.resize(oldSize + len);
6429 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
6430 qs.resize(newEnd - qs.constData());
6431}
6432
6433static uint parse_flag_characters(const char * &c) noexcept
6434{
6435 uint flags = QLocaleData::ZeroPadExponent;
6436 while (true) {
6437 switch (*c) {
6438 case '#':
6439 flags |= QLocaleData::ShowBase | QLocaleData::AddTrailingZeroes
6440 | QLocaleData::ForcePoint;
6441 break;
6442 case '0': flags |= QLocaleData::ZeroPadded; break;
6443 case '-': flags |= QLocaleData::LeftAdjusted; break;
6444 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
6445 case '+': flags |= QLocaleData::AlwaysShowSign; break;
6446 case '\'': flags |= QLocaleData::GroupDigits; break;
6447 default: return flags;
6448 }
6449 ++c;
6450 }
6451}
6452
6453static int parse_field_width(const char * &c)
6454{
6455 Q_ASSERT(qIsDigit(*c));
6456
6457 // can't be negative - started with a digit
6458 // contains at least one digit
6459 const char *endp;
6460 bool ok;
6461 const qulonglong result = qstrtoull(c, &endp, 10, &ok);
6462 c = endp;
6463 while (qIsDigit(*c)) // preserve Qt 5.5 behavior of consuming all digits, no matter how many
6464 ++c;
6465 return ok && result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
6466}
6467
6468enum LengthMod { lm_none, lm_hh, lm_h, lm_l, lm_ll, lm_L, lm_j, lm_z, lm_t };
6469
6470static inline bool can_consume(const char * &c, char ch) noexcept
6471{
6472 if (*c == ch) {
6473 ++c;
6474 return true;
6475 }
6476 return false;
6477}
6478
6479static LengthMod parse_length_modifier(const char * &c) noexcept
6480{
6481 switch (*c++) {
6482 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
6483 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
6484 case 'L': return lm_L;
6485 case 'j': return lm_j;
6486 case 'z':
6487 case 'Z': return lm_z;
6488 case 't': return lm_t;
6489 }
6490 --c; // don't consume *c - it wasn't a flag
6491 return lm_none;
6492}
6493
6494/*!
6495 \fn QString QString::vasprintf(const char *cformat, va_list ap)
6496 \since 5.5
6497
6498 Equivalent method to asprintf(), but takes a va_list \a ap
6499 instead a list of variable arguments. See the asprintf()
6500 documentation for an explanation of \a cformat.
6501
6502 This method does not call the va_end macro, the caller
6503 is responsible to call va_end on \a ap.
6504
6505 \sa asprintf()
6506*/
6507
6508QString QString::vasprintf(const char *cformat, va_list ap)
6509{
6510 if (!cformat || !*cformat) {
6511 // Qt 1.x compat
6512 return fromLatin1("");
6513 }
6514
6515 // Parse cformat
6516
6517 QString result;
6518 const char *c = cformat;
6519 for (;;) {
6520 // Copy non-escape chars to result
6521 const char *cb = c;
6522 while (*c != '\0' && *c != '%')
6523 c++;
6524 append_utf8(result, cb, qsizetype(c - cb));
6525
6526 if (*c == '\0')
6527 break;
6528
6529 // Found '%'
6530 const char *escape_start = c;
6531 ++c;
6532
6533 if (*c == '\0') {
6534 result.append(QLatin1Char('%')); // a % at the end of the string - treat as non-escape text
6535 break;
6536 }
6537 if (*c == '%') {
6538 result.append(QLatin1Char('%')); // %%
6539 ++c;
6540 continue;
6541 }
6542
6543 uint flags = parse_flag_characters(c);
6544
6545 if (*c == '\0') {
6546 result.append(QLatin1String(escape_start)); // incomplete escape, treat as non-escape text
6547 break;
6548 }
6549
6550 // Parse field width
6551 int width = -1; // -1 means unspecified
6552 if (qIsDigit(*c)) {
6553 width = parse_field_width(c);
6554 } else if (*c == '*') { // can't parse this in another function, not portably, at least
6555 width = va_arg(ap, int);
6556 if (width < 0)
6557 width = -1; // treat all negative numbers as unspecified
6558 ++c;
6559 }
6560
6561 if (*c == '\0') {
6562 result.append(QLatin1String(escape_start)); // incomplete escape, treat as non-escape text
6563 break;
6564 }
6565
6566 // Parse precision
6567 int precision = -1; // -1 means unspecified
6568 if (*c == '.') {
6569 ++c;
6570 if (qIsDigit(*c)) {
6571 precision = parse_field_width(c);
6572 } else if (*c == '*') { // can't parse this in another function, not portably, at least
6573 precision = va_arg(ap, int);
6574 if (precision < 0)
6575 precision = -1; // treat all negative numbers as unspecified
6576 ++c;
6577 }
6578 }
6579
6580 if (*c == '\0') {
6581 result.append(QLatin1String(escape_start)); // incomplete escape, treat as non-escape text
6582 break;
6583 }
6584
6585 const LengthMod length_mod = parse_length_modifier(c);
6586
6587 if (*c == '\0') {
6588 result.append(QLatin1String(escape_start)); // incomplete escape, treat as non-escape text
6589 break;
6590 }
6591
6592 // Parse the conversion specifier and do the conversion
6593 QString subst;
6594 switch (*c) {
6595 case 'd':
6596 case 'i': {
6597 qint64 i;
6598 switch (length_mod) {
6599 case lm_none: i = va_arg(ap, int); break;
6600 case lm_hh: i = va_arg(ap, int); break;
6601 case lm_h: i = va_arg(ap, int); break;
6602 case lm_l: i = va_arg(ap, long int); break;
6603 case lm_ll: i = va_arg(ap, qint64); break;
6604 case lm_j: i = va_arg(ap, long int); break;
6605
6606 /* ptrdiff_t actually, but it should be the same for us */
6607 case lm_z: i = va_arg(ap, qsizetype); break;
6608 case lm_t: i = va_arg(ap, qsizetype); break;
6609 default: i = 0; break;
6610 }
6611 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
6612 ++c;
6613 break;
6614 }
6615 case 'o':
6616 case 'u':
6617 case 'x':
6618 case 'X': {
6619 quint64 u;
6620 switch (length_mod) {
6621 case lm_none: u = va_arg(ap, uint); break;
6622 case lm_hh: u = va_arg(ap, uint); break;
6623 case lm_h: u = va_arg(ap, uint); break;
6624 case lm_l: u = va_arg(ap, ulong); break;
6625 case lm_ll: u = va_arg(ap, quint64); break;
6626 case lm_t: u = va_arg(ap, size_t); break;
6627 case lm_z: u = va_arg(ap, size_t); break;
6628 default: u = 0; break;
6629 }
6630
6631 if (qIsUpper(*c))
6632 flags |= QLocaleData::CapitalEorX;
6633
6634 int base = 10;
6635 switch (qToLower(*c)) {
6636 case 'o':
6637 base = 8; break;
6638 case 'u':
6639 base = 10; break;
6640 case 'x':
6641 base = 16; break;
6642 default: break;
6643 }
6644 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
6645 ++c;
6646 break;
6647 }
6648 case 'E':
6649 case 'e':
6650 case 'F':
6651 case 'f':
6652 case 'G':
6653 case 'g':
6654 case 'A':
6655 case 'a': {
6656 double d;
6657 if (length_mod == lm_L)
6658 d = va_arg(ap, long double); // not supported - converted to a double
6659 else
6660 d = va_arg(ap, double);
6661
6662 if (qIsUpper(*c))
6663 flags |= QLocaleData::CapitalEorX;
6664
6665 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
6666 switch (qToLower(*c)) {
6667 case 'e': form = QLocaleData::DFExponent; break;
6668 case 'a': // not supported - decimal form used instead
6669 case 'f': form = QLocaleData::DFDecimal; break;
6670 case 'g': form = QLocaleData::DFSignificantDigits; break;
6671 default: break;
6672 }
6673 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
6674 ++c;
6675 break;
6676 }
6677 case 'c': {
6678 if (length_mod == lm_l)
6679 subst = QChar::fromUcs2(va_arg(ap, int));
6680 else
6681 subst = QLatin1Char((uchar) va_arg(ap, int));
6682 ++c;
6683 break;
6684 }
6685 case 's': {
6686 if (length_mod == lm_l) {
6687 const ushort *buff = va_arg(ap, const ushort*);
6688 const ushort *ch = buff;
6689 while (*ch != 0)
6690 ++ch;
6691 subst.setUtf16(buff, ch - buff);
6692 } else
6693 subst = QString::fromUtf8(va_arg(ap, const char*));
6694 if (precision != -1)
6695 subst.truncate(precision);
6696 ++c;
6697 break;
6698 }
6699 case 'p': {
6700 void *arg = va_arg(ap, void*);
6701 const quint64 i = reinterpret_cast<quintptr>(arg);
6702 flags |= QLocaleData::ShowBase;
6703 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
6704 ++c;
6705 break;
6706 }
6707 case 'n':
6708 switch (length_mod) {
6709 case lm_hh: {
6710 signed char *n = va_arg(ap, signed char*);
6711 *n = result.length();
6712 break;
6713 }
6714 case lm_h: {
6715 short int *n = va_arg(ap, short int*);
6716 *n = result.length();
6717 break;
6718 }
6719 case lm_l: {
6720 long int *n = va_arg(ap, long int*);
6721 *n = result.length();
6722 break;
6723 }
6724 case lm_ll: {
6725 qint64 *n = va_arg(ap, qint64*);
6726 *n = result.length();
6727 break;
6728 }
6729 default: {
6730 int *n = va_arg(ap, int*);
6731 *n = result.length();
6732 break;
6733 }
6734 }
6735 ++c;
6736 break;
6737
6738 default: // bad escape, treat as non-escape text
6739 for (const char *cc = escape_start; cc != c; ++cc)
6740 result.append(QLatin1Char(*cc));
6741 continue;
6742 }
6743
6744 if (flags & QLocaleData::LeftAdjusted)
6745 result.append(subst.leftJustified(width));
6746 else
6747 result.append(subst.rightJustified(width));
6748 }
6749
6750 return result;
6751}
6752
6753/*!
6754 Returns the string converted to a \c{long long} using base \a
6755 base, which is 10 by default and must be between 2 and 36, or 0.
6756 Returns 0 if the conversion fails.
6757
6758 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6759 to \c false, and success by setting *\a{ok} to \c true.
6760
6761 If \a base is 0, the C language convention is used: If the string
6762 begins with "0x", base 16 is used; if the string begins with "0",
6763 base 8 is used; otherwise, base 10 is used.
6764
6765 The string conversion will always happen in the 'C' locale. For locale
6766 dependent conversion use QLocale::toLongLong()
6767
6768 Example:
6769
6770 \snippet qstring/main.cpp 74
6771
6772 This function ignores leading and trailing whitespace.
6773
6774 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
6775*/
6776
6777qint64 QString::toLongLong(bool *ok, int base) const
6778{
6779 return toIntegral_helper<qlonglong>(*this, ok, base);
6780}
6781
6782qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
6783{
6784#if defined(QT_CHECK_RANGE)
6785 if (base != 0 && (base < 2 || base > 36)) {
6786 qWarning("QString::toULongLong: Invalid base (%d)", base);
6787 base = 10;
6788 }
6789#endif
6790
6791 return QLocaleData::c()->stringToLongLong(string, base, ok, QLocale::RejectGroupSeparator);
6792}
6793
6794
6795/*!
6796 Returns the string converted to an \c{unsigned long long} using base \a
6797 base, which is 10 by default and must be between 2 and 36, or 0.
6798 Returns 0 if the conversion fails.
6799
6800 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6801 to \c false, and success by setting *\a{ok} to \c true.
6802
6803 If \a base is 0, the C language convention is used: If the string
6804 begins with "0x", base 16 is used; if the string begins with "0",
6805 base 8 is used; otherwise, base 10 is used.
6806
6807 The string conversion will always happen in the 'C' locale. For locale
6808 dependent conversion use QLocale::toULongLong()
6809
6810 Example:
6811
6812 \snippet qstring/main.cpp 79
6813
6814 This function ignores leading and trailing whitespace.
6815
6816 \sa number(), toLongLong(), QLocale::toULongLong()
6817*/
6818
6819quint64 QString::toULongLong(bool *ok, int base) const
6820{
6821 return toIntegral_helper<qulonglong>(*this, ok, base);
6822}
6823
6824qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
6825{
6826#if defined(QT_CHECK_RANGE)
6827 if (base != 0 && (base < 2 || base > 36)) {
6828 qWarning("QString::toULongLong: Invalid base (%d)", base);
6829 base = 10;
6830 }
6831#endif
6832
6833 return QLocaleData::c()->stringToUnsLongLong(string, base, ok, QLocale::RejectGroupSeparator);
6834}
6835
6836/*!
6837 \fn long QString::toLong(bool *ok, int base) const
6838
6839 Returns the string converted to a \c long using base \a
6840 base, which is 10 by default and must be between 2 and 36, or 0.
6841 Returns 0 if the conversion fails.
6842
6843 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6844 to \c false, and success by setting *\a{ok} to \c true.
6845
6846 If \a base is 0, the C language convention is used: If the string
6847 begins with "0x", base 16 is used; if the string begins with "0",
6848 base 8 is used; otherwise, base 10 is used.
6849
6850 The string conversion will always happen in the 'C' locale. For locale
6851 dependent conversion use QLocale::toLongLong()
6852
6853 Example:
6854
6855 \snippet qstring/main.cpp 73
6856
6857 This function ignores leading and trailing whitespace.
6858
6859 \sa number(), toULong(), toInt(), QLocale::toInt()
6860*/
6861
6862/*!
6863 \fn ulong QString::toULong(bool *ok, int base) const
6864
6865 Returns the string converted to an \c{unsigned long} using base \a
6866 base, which is 10 by default and must be between 2 and 36, or 0.
6867 Returns 0 if the conversion fails.
6868
6869 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6870 to \c false, and success by setting *\a{ok} to \c true.
6871
6872 If \a base is 0, the C language convention is used: If the string
6873 begins with "0x", base 16 is used; if the string begins with "0",
6874 base 8 is used; otherwise, base 10 is used.
6875
6876 The string conversion will always happen in the 'C' locale. For locale
6877 dependent conversion use QLocale::toULongLong()
6878
6879 Example:
6880
6881 \snippet qstring/main.cpp 78
6882
6883 This function ignores leading and trailing whitespace.
6884
6885 \sa number(), QLocale::toUInt()
6886*/
6887
6888/*!
6889 \fn int QString::toInt(bool *ok, int base) const
6890 Returns the string converted to an \c int using base \a
6891 base, which is 10 by default and must be between 2 and 36, or 0.
6892 Returns 0 if the conversion fails.
6893
6894 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6895 to \c false, and success by setting *\a{ok} to \c true.
6896
6897 If \a base is 0, the C language convention is used: If the string
6898 begins with "0x", base 16 is used; if the string begins with "0",
6899 base 8 is used; otherwise, base 10 is used.
6900
6901 The string conversion will always happen in the 'C' locale. For locale
6902 dependent conversion use QLocale::toInt()
6903
6904 Example:
6905
6906 \snippet qstring/main.cpp 72
6907
6908 This function ignores leading and trailing whitespace.
6909
6910 \sa number(), toUInt(), toDouble(), QLocale::toInt()
6911*/
6912
6913/*!
6914 \fn uint QString::toUInt(bool *ok, int base) const
6915 Returns the string converted to an \c{unsigned int} using base \a
6916 base, which is 10 by default and must be between 2 and 36, or 0.
6917 Returns 0 if the conversion fails.
6918
6919 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6920 to \c false, and success by setting *\a{ok} to \c true.
6921
6922 If \a base is 0, the C language convention is used: If the string
6923 begins with "0x", base 16 is used; if the string begins with "0",
6924 base 8 is used; otherwise, base 10 is used.
6925
6926 The string conversion will always happen in the 'C' locale. For locale
6927 dependent conversion use QLocale::toUInt()
6928
6929 Example:
6930
6931 \snippet qstring/main.cpp 77
6932
6933 This function ignores leading and trailing whitespace.
6934
6935 \sa number(), toInt(), QLocale::toUInt()
6936*/
6937
6938/*!
6939 \fn short QString::toShort(bool *ok, int base) const
6940
6941 Returns the string converted to a \c short using base \a
6942 base, which is 10 by default and must be between 2 and 36, or 0.
6943 Returns 0 if the conversion fails.
6944
6945 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6946 to \c false, and success by setting *\a{ok} to \c true.
6947
6948 If \a base is 0, the C language convention is used: If the string
6949 begins with "0x", base 16 is used; if the string begins with "0",
6950 base 8 is used; otherwise, base 10 is used.
6951
6952 The string conversion will always happen in the 'C' locale. For locale
6953 dependent conversion use QLocale::toShort()
6954
6955 Example:
6956
6957 \snippet qstring/main.cpp 76
6958
6959 This function ignores leading and trailing whitespace.
6960
6961 \sa number(), toUShort(), toInt(), QLocale::toShort()
6962*/
6963
6964/*!
6965 \fn ushort QString::toUShort(bool *ok, int base) const
6966
6967 Returns the string converted to an \c{unsigned short} using base \a
6968 base, which is 10 by default and must be between 2 and 36, or 0.
6969 Returns 0 if the conversion fails.
6970
6971 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6972 to \c false, and success by setting *\a{ok} to \c true.
6973
6974 If \a base is 0, the C language convention is used: If the string
6975 begins with "0x", base 16 is used; if the string begins with "0",
6976 base 8 is used; otherwise, base 10 is used.
6977
6978 The string conversion will always happen in the 'C' locale. For locale
6979 dependent conversion use QLocale::toUShort()
6980
6981 Example:
6982
6983 \snippet qstring/main.cpp 80
6984
6985 This function ignores leading and trailing whitespace.
6986
6987 \sa number(), toShort(), QLocale::toUShort()
6988*/
6989
6990/*!
6991 Returns the string converted to a \c double value.
6992
6993 Returns an infinity if the conversion overflows or 0.0 if the
6994 conversion fails for other reasons (e.g. underflow).
6995
6996 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
6997 to \c false, and success by setting *\a{ok} to \c true.
6998
6999 \snippet qstring/main.cpp 66
7000
7001 \warning The QString content may only contain valid numerical characters
7002 which includes the plus/minus sign, the character e used in scientific
7003 notation, and the decimal point. Including the unit or additional characters
7004 leads to a conversion error.
7005
7006 \snippet qstring/main.cpp 67
7007
7008 The string conversion will always happen in the 'C' locale. For locale
7009 dependent conversion use QLocale::toDouble()
7010
7011 \snippet qstring/main.cpp 68
7012
7013 For historical reasons, this function does not handle
7014 thousands group separators. If you need to convert such numbers,
7015 use QLocale::toDouble().
7016
7017 \snippet qstring/main.cpp 69
7018
7019 This function ignores leading and trailing whitespace.
7020
7021 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7022*/
7023
7024double QString::toDouble(bool *ok) const
7025{
7026 return QLocaleData::c()->stringToDouble(*this, ok, QLocale::RejectGroupSeparator);
7027}
7028
7029/*!
7030 Returns the string converted to a \c float value.
7031
7032 Returns an infinity if the conversion overflows or 0.0 if the
7033 conversion fails for other reasons (e.g. underflow).
7034
7035 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7036 to \c false, and success by setting *\a{ok} to \c true.
7037
7038 \warning The QString content may only contain valid numerical characters
7039 which includes the plus/minus sign, the character e used in scientific
7040 notation, and the decimal point. Including the unit or additional characters
7041 leads to a conversion error.
7042
7043 The string conversion will always happen in the 'C' locale. For locale
7044 dependent conversion use QLocale::toFloat()
7045
7046 For historical reasons, this function does not handle
7047 thousands group separators. If you need to convert such numbers,
7048 use QLocale::toFloat().
7049
7050 Example:
7051
7052 \snippet qstring/main.cpp 71
7053
7054 This function ignores leading and trailing whitespace.
7055
7056 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
7057*/
7058
7059float QString::toFloat(bool *ok) const
7060{
7061 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
7062}
7063
7064/*! \fn QString &QString::setNum(int n, int base)
7065
7066 Sets the string to the printed value of \a n in the specified \a
7067 base, and returns a reference to the string.
7068
7069 The base is 10 by default and must be between 2 and 36. For bases
7070 other than 10, \a n is treated as an unsigned integer.
7071
7072 \snippet qstring/main.cpp 56
7073
7074 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7075 To get a localized string representation of a number, use
7076 QLocale::toString() with the appropriate locale.
7077
7078 \sa number()
7079*/
7080
7081/*! \fn QString &QString::setNum(uint n, int base)
7082
7083 \overload
7084*/
7085
7086/*! \fn QString &QString::setNum(long n, int base)
7087
7088 \overload
7089*/
7090
7091/*! \fn QString &QString::setNum(ulong n, int base)
7092
7093 \overload
7094*/
7095
7096/*!
7097 \overload
7098*/
7099QString &QString::setNum(qlonglong n, int base)
7100{
7101 return *this = number(n, base);
7102}
7103
7104/*!
7105 \overload
7106*/
7107QString &QString::setNum(qulonglong n, int base)
7108{
7109 return *this = number(n, base);
7110}
7111
7112/*! \fn QString &QString::setNum(short n, int base)
7113
7114 \overload
7115*/
7116
7117/*! \fn QString &QString::setNum(ushort n, int base)
7118
7119 \overload
7120*/
7121
7122/*!
7123 \fn QString &QString::setNum(double n, char format, int precision)
7124 \overload
7125
7126 Sets the string to the printed value of \a n, formatted according
7127 to the given \a format and \a precision, and returns a reference
7128 to the string.
7129
7130 The \a format can be 'e', 'E', 'f', 'g' or 'G' (see
7131 \l{Argument Formats} for an explanation of the formats).
7132
7133 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7134 To get a localized string representation of a number, use
7135 QLocale::toString() with the appropriate locale.
7136
7137 \sa number()
7138*/
7139
7140QString &QString::setNum(double n, char f, int prec)
7141{
7142 return *this = number(n, f, prec);
7143}
7144
7145/*!
7146 \fn QString &QString::setNum(float n, char format, int precision)
7147 \overload
7148
7149 Sets the string to the printed value of \a n, formatted according
7150 to the given \a format and \a precision, and returns a reference
7151 to the string.
7152
7153 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7154 To get a localized string representation of a number, use
7155 QLocale::toString() with the appropriate locale.
7156
7157 \sa number()
7158*/
7159
7160
7161/*!
7162 \fn QString QString::number(long n, int base)
7163
7164 Returns a string equivalent of the number \a n according to the
7165 specified \a base.
7166
7167 The base is 10 by default and must be between 2
7168 and 36. For bases other than 10, \a n is treated as an
7169 unsigned integer.
7170
7171 The formatting always uses QLocale::C, i.e., English/UnitedStates.
7172 To get a localized string representation of a number, use
7173 QLocale::toString() with the appropriate locale.
7174
7175 \snippet qstring/main.cpp 35
7176
7177 \sa setNum()
7178*/
7179
7180QString QString::number(long n, int base)
7181{
7182 return number(qlonglong(n), base);
7183}
7184
7185/*!
7186 \fn QString QString::number(ulong n, int base)
7187
7188 \overload
7189*/
7190QString QString::number(ulong n, int base)
7191{
7192 return number(qulonglong(n), base);
7193}
7194
7195/*!
7196 \overload
7197*/
7198QString QString::number(int n, int base)
7199{
7200 return number(qlonglong(n), base);
7201}
7202
7203/*!
7204 \overload
7205*/
7206QString QString::number(uint n, int base)
7207{
7208 return number(qulonglong(n), base);
7209}
7210
7211/*!
7212 \overload
7213*/
7214QString QString::number(qlonglong n, int base)
7215{
7216#if defined(QT_CHECK_RANGE)
7217 if (base < 2 || base > 36) {
7218 qWarning("QString::setNum: Invalid base (%d)", base);
7219 base = 10;
7220 }
7221#endif
7222 return QLocaleData::c()->longLongToString(n, -1, base);
7223}
7224
7225/*!
7226 \overload
7227*/
7228QString QString::number(qulonglong n, int base)
7229{
7230#if defined(QT_CHECK_RANGE)
7231 if (base < 2 || base > 36) {
7232 qWarning("QString::setNum: Invalid base (%d)", base);
7233 base = 10;
7234 }
7235#endif
7236 return QLocaleData::c()->unsLongLongToString(n, -1, base);
7237}
7238
7239
7240/*!
7241 \fn QString QString::number(double n, char format, int precision)
7242
7243 Returns a string equivalent of the number \a n, formatted
7244 according to the specified \a format and \a precision. See
7245 \l{Argument Formats} for details.
7246
7247 Unlike QLocale::toString(), this function does not honor the
7248 user's locale settings.
7249
7250 \sa setNum(), QLocale::toString()
7251*/
7252QString QString::number(double n, char f, int prec)
7253{
7254 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7255 uint flags = QLocaleData::ZeroPadExponent;
7256
7257 if (qIsUpper(f))
7258 flags |= QLocaleData::CapitalEorX;
7259
7260 switch (qToLower(f)) {
7261 case 'f':
7262 form = QLocaleData::DFDecimal;
7263 break;
7264 case 'e':
7265 form = QLocaleData::DFExponent;
7266 break;
7267 case 'g':
7268 form = QLocaleData::DFSignificantDigits;
7269 break;
7270 default:
7271#if defined(QT_CHECK_RANGE)
7272 qWarning("QString::setNum: Invalid format char '%c'", f);
7273#endif
7274 break;
7275 }
7276
7277 return QLocaleData::c()->doubleToString(n, prec, form, -1, flags);
7278}
7279
7280namespace {
7281template<class ResultList, class StringSource>
7282static ResultList splitString(const StringSource &source, QStringView sep,
7283 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
7284{
7285 ResultList list;
7286 typename StringSource::size_type start = 0;
7287 typename StringSource::size_type end;
7288 typename StringSource::size_type extra = 0;
7289 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
7290 if (start != end || behavior == Qt::KeepEmptyParts)
7291 list.append(source.mid(start, end - start));
7292 start = end + sep.size();
7293 extra = (sep.size() == 0 ? 1 : 0);
7294 }
7295 if (start != source.size() || behavior == Qt::KeepEmptyParts)
7296 list.append(source.mid(start));
7297 return list;
7298}
7299
7300} // namespace
7301
7302/*!
7303 Splits the string into substrings wherever \a sep occurs, and
7304 returns the list of those strings. If \a sep does not match
7305 anywhere in the string, split() returns a single-element list
7306 containing this string.
7307
7308 \a cs specifies whether \a sep should be matched case
7309 sensitively or case insensitively.
7310
7311 If \a behavior is Qt::SkipEmptyParts, empty entries don't
7312 appear in the result. By default, empty entries are kept.
7313
7314 Example:
7315
7316 \snippet qstring/main.cpp 62
7317
7318 If \a sep is empty, split() returns an empty string, followed
7319 by each of the string's characters, followed by another empty string:
7320
7321 \snippet qstring/main.cpp 62-empty
7322
7323 To understand this behavior, recall that the empty string matches
7324 everywhere, so the above is qualitatively the same as:
7325
7326 \snippet qstring/main.cpp 62-slashes
7327
7328 \sa QStringList::join(), section()
7329
7330 \since 5.14
7331*/
7332QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7333{
7334 return splitString<QStringList>(*this, sep, behavior, cs);
7335}
7336
7337/*!
7338 \overload
7339 \since 5.14
7340*/
7341QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7342{
7343 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
7344}
7345
7346/*!
7347 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7348 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7349
7350
7351 Splits the string into substring references wherever \a sep occurs, and
7352 returns the list of those strings.
7353
7354 See QString::split() for how \a sep, \a behavior and \a cs interact to form
7355 the result.
7356
7357 \note All references are valid as long this string is alive. Destroying this
7358 string will cause all references to be dangling pointers.
7359
7360 \since 6.0
7361*/
7362QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7363{
7364 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
7365}
7366
7367QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
7368{
7369 return split(QStringView(&sep, 1), behavior, cs);
7370}
7371
7372#if QT_CONFIG(regularexpression)
7373namespace {
7374template<class ResultList, typename String>
7375static ResultList splitString(const String &source, const QRegularExpression &re,
7376 Qt::SplitBehavior behavior)
7377{
7378 ResultList list;
7379 if (!re.isValid()) {
7380 qWarning("QString::split: invalid QRegularExpression object");
7381 return list;
7382 }
7383
7384 qsizetype start = 0;
7385 qsizetype end = 0;
7386 QRegularExpressionMatchIterator iterator = re.globalMatch(source);
7387 while (iterator.hasNext()) {
7388 QRegularExpressionMatch match = iterator.next();
7389 end = match.capturedStart();
7390 if (start != end || behavior == Qt::KeepEmptyParts)
7391 list.append(source.mid(start, end - start));
7392 start = match.capturedEnd();
7393 }
7394
7395 if (start != source.size() || behavior == Qt::KeepEmptyParts)
7396 list.append(source.mid(start));
7397
7398 return list;
7399}
7400} // namespace
7401
7402/*!
7403 \overload
7404 \since 5.14
7405
7406 Splits the string into substrings wherever the regular expression
7407 \a re matches, and returns the list of those strings. If \a re
7408 does not match anywhere in the string, split() returns a
7409 single-element list containing this string.
7410
7411 Here is an example where we extract the words in a sentence
7412 using one or more whitespace characters as the separator:
7413
7414 \snippet qstring/main.cpp 90
7415
7416 Here is a similar example, but this time we use any sequence of
7417 non-word characters as the separator:
7418
7419 \snippet qstring/main.cpp 91
7420
7421 Here is a third example where we use a zero-length assertion,
7422 \b{\\b} (word boundary), to split the string into an
7423 alternating sequence of non-word and word tokens:
7424
7425 \snippet qstring/main.cpp 92
7426
7427 \sa QStringList::join(), section()
7428*/
7429QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
7430{
7431 return splitString<QStringList>(*this, re, behavior);
7432}
7433
7434/*!
7435 \since 6.0
7436
7437 Splits the string into substring views wherever the regular expression \a re
7438 matches, and returns the list of those strings. If \a re does not match
7439 anywhere in the string, split() returns a single-element list containing
7440 this string as view.
7441
7442 \note The views in the returned list are sub-views of this view; as such,
7443 they reference the same data as it and only remain valid for as long as that
7444 data remains live.
7445*/
7446QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
7447{
7448 return splitString<QList<QStringView>>(*this, re, behavior);
7449}
7450
7451#endif // QT_CONFIG(regularexpression)
7452
7453/*!
7454 \enum QString::NormalizationForm
7455
7456 This enum describes the various normalized forms of Unicode text.
7457
7458 \value NormalizationForm_D Canonical Decomposition
7459 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
7460 \value NormalizationForm_KD Compatibility Decomposition
7461 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
7462
7463 \sa normalized(),
7464 {http://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
7465*/
7466
7467/*!
7468 \since 4.5
7469
7470 Returns a copy of this string repeated the specified number of \a times.
7471
7472 If \a times is less than 1, an empty string is returned.
7473
7474 Example:
7475
7476 \snippet code/src_corelib_text_qstring.cpp 8
7477*/
7478QString QString::repeated(qsizetype times) const
7479{
7480 if (d.size == 0)
7481 return *this;
7482
7483 if (times <= 1) {
7484 if (times == 1)
7485 return *this;
7486 return QString();
7487 }
7488
7489 const qsizetype resultSize = times * d.size;
7490
7491 QString result;
7492 result.reserve(resultSize);
7493 if (result.capacity() != resultSize)
7494 return QString(); // not enough memory
7495
7496 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
7497
7498 qsizetype sizeSoFar = d.size;
7499 char16_t *end = result.d.data() + sizeSoFar;
7500
7501 const qsizetype halfResultSize = resultSize >> 1;
7502 while (sizeSoFar <= halfResultSize) {
7503 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
7504 end += sizeSoFar;
7505 sizeSoFar <<= 1;
7506 }
7507 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
7508 result.d.data()[resultSize] = '\0';
7509 result.d.size = resultSize;
7510 return result;
7511}
7512
7513void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
7514{
7515 const QChar *p = data->constData() + from;
7516 if (isAscii(p, p + data->length() - from))
7517 return;
7518 if (p > data->constData() + from)
7519 from = p - data->constData() - 1; // need one before the non-ASCII to perform NFC
7520
7521 if (version == QChar::Unicode_Unassigned) {
7522 version = QChar::currentUnicodeVersion();
7523 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
7524 const QString &s = *data;
7525 QChar *d = nullptr;
7526 for (int i = 0; i < NumNormalizationCorrections; ++i) {
7527 const NormalizationCorrection &n = uc_normalization_corrections[i];
7528 if (n.version > version) {
7529 qsizetype pos = from;
7530 if (QChar::requiresSurrogates(n.ucs4)) {
7531 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
7532 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
7533 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
7534 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
7535 while (pos < s.length() - 1) {
7536 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
7537 if (!d)
7538 d = data->data();
7539 d[pos] = QChar(oldHigh);
7540 d[++pos] = QChar(oldLow);
7541 }
7542 ++pos;
7543 }
7544 } else {
7545 while (pos < s.length()) {
7546 if (s.at(pos).unicode() == n.ucs4) {
7547 if (!d)
7548 d = data->data();
7549 d[pos] = QChar(n.old_mapping);
7550 }
7551 ++pos;
7552 }
7553 }
7554 }
7555 }
7556 }
7557
7558 if (normalizationQuickCheckHelper(data, mode, from, &from))
7559 return;
7560
7561 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
7562
7563 canonicalOrderHelper(data, version, from);
7564
7565 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
7566 return;
7567
7568 composeHelper(data, version, from);
7569}
7570
7571/*!
7572 Returns the string in the given Unicode normalization \a mode,
7573 according to the given \a version of the Unicode standard.
7574*/
7575QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
7576{
7577 QString copy = *this;
7578 qt_string_normalize(&copy, mode, version, 0);
7579 return copy;
7580}
7581
7582
7583struct ArgEscapeData
7584{
7585 int min_escape; // lowest escape sequence number
7586 int occurrences; // number of occurrences of the lowest escape sequence number
7587 int locale_occurrences; // number of occurrences of the lowest escape sequence number that
7588 // contain 'L'
7589 int escape_len; // total length of escape sequences which will be replaced
7590};
7591
7592static ArgEscapeData findArgEscapes(QStringView s)
7593{
7594 const QChar *uc_begin = s.begin();
7595 const QChar *uc_end = s.end();
7596
7597 ArgEscapeData d;
7598
7599 d.min_escape = INT_MAX;
7600 d.occurrences = 0;
7601 d.escape_len = 0;
7602 d.locale_occurrences = 0;
7603
7604 const QChar *c = uc_begin;
7605 while (c != uc_end) {
7606 while (c != uc_end && c->unicode() != '%')
7607 ++c;
7608
7609 if (c == uc_end)
7610 break;
7611 const QChar *escape_start = c;
7612 if (++c == uc_end)
7613 break;
7614
7615 bool locale_arg = false;
7616 if (c->unicode() == 'L') {
7617 locale_arg = true;
7618 if (++c == uc_end)
7619 break;
7620 }
7621
7622 int escape = c->digitValue();
7623 if (escape == -1)
7624 continue;
7625
7626 ++c;
7627
7628 if (c != uc_end) {
7629 int next_escape = c->digitValue();
7630 if (next_escape != -1) {
7631 escape = (10 * escape) + next_escape;
7632 ++c;
7633 }
7634 }
7635
7636 if (escape > d.min_escape)
7637 continue;
7638
7639 if (escape < d.min_escape) {
7640 d.min_escape = escape;
7641 d.occurrences = 0;
7642 d.escape_len = 0;
7643 d.locale_occurrences = 0;
7644 }
7645
7646 ++d.occurrences;
7647 if (locale_arg)
7648 ++d.locale_occurrences;
7649 d.escape_len += c - escape_start;
7650 }
7651 return d;
7652}
7653
7654static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, int field_width,
7655 QStringView arg, QStringView larg, QChar fillChar)
7656{
7657 const QChar *uc_begin = s.begin();
7658 const QChar *uc_end = s.end();
7659
7660 int abs_field_width = qAbs(field_width);
7661 qsizetype result_len = s.length()
7662 - d.escape_len
7663 + (d.occurrences - d.locale_occurrences)
7664 *qMax(abs_field_width, arg.length())
7665 + d.locale_occurrences
7666 *qMax(abs_field_width, larg.length());
7667
7668 QString result(result_len, Qt::Uninitialized);
7669 QChar *result_buff = const_cast<QChar *>(result.unicode());
7670
7671 QChar *rc = result_buff;
7672 const QChar *c = uc_begin;
7673 int repl_cnt = 0;
7674 while (c != uc_end) {
7675 /* We don't have to check if we run off the end of the string with c,
7676 because as long as d.occurrences > 0 we KNOW there are valid escape
7677 sequences. */
7678
7679 const QChar *text_start = c;
7680
7681 while (c->unicode() != '%')
7682 ++c;
7683
7684 const QChar *escape_start = c++;
7685
7686 bool locale_arg = false;
7687 if (c->unicode() == 'L') {
7688 locale_arg = true;
7689 ++c;
7690 }
7691
7692 int escape = c->digitValue();
7693 if (escape != -1) {
7694 if (c + 1 != uc_end && (c + 1)->digitValue() != -1) {
7695 escape = (10 * escape) + (c + 1)->digitValue();
7696 ++c;
7697 }
7698 }
7699
7700 if (escape != d.min_escape) {
7701 memcpy(rc, text_start, (c - text_start)*sizeof(QChar));
7702 rc += c - text_start;
7703 }
7704 else {
7705 ++c;
7706
7707 memcpy(rc, text_start, (escape_start - text_start)*sizeof(QChar));
7708 rc += escape_start - text_start;
7709
7710 uint pad_chars;
7711 if (locale_arg)
7712 pad_chars = qMax(abs_field_width, larg.length()) - larg.length();
7713 else
7714 pad_chars = qMax(abs_field_width, arg.length()) - arg.length();
7715
7716 if (field_width > 0) { // left padded
7717 for (uint i = 0; i < pad_chars; ++i)
7718 *rc++ = fillChar;
7719 }
7720
7721 if (locale_arg) {
7722 memcpy(rc, larg.data(), larg.length()*sizeof(QChar));
7723 rc += larg.length();
7724 }
7725 else {
7726 memcpy(rc, arg.data(), arg.length()*sizeof(QChar));
7727 rc += arg.length();
7728 }
7729
7730 if (field_width < 0) { // right padded
7731 for (uint i = 0; i < pad_chars; ++i)
7732 *rc++ = fillChar;
7733 }
7734
7735 if (++repl_cnt == d.occurrences) {
7736 memcpy(rc, c, (uc_end - c)*sizeof(QChar));
7737 rc += uc_end - c;
7738 Q_ASSERT(rc - result_buff == result_len);
7739 c = uc_end;
7740 }
7741 }
7742 }
7743 Q_ASSERT(rc == result_buff + result_len);
7744
7745 return result;
7746}
7747
7748#if QT_STRINGVIEW_LEVEL < 2
7749/*!
7750 Returns a copy of this string with the lowest numbered place marker
7751 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
7752
7753 \a fieldWidth specifies the minimum amount of space that argument \a
7754 a shall occupy. If \a a requires less space than \a fieldWidth, it
7755 is padded to \a fieldWidth with character \a fillChar. A positive
7756 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
7757 produces left-aligned text.
7758
7759 This example shows how we might create a \c status string for
7760 reporting progress while processing a list of files:
7761
7762 \snippet qstring/main.cpp 11
7763
7764 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
7765 %2. Finally, \c arg(fileName) replaces \c %3.
7766
7767 One advantage of using arg() over asprintf() is that the order of the
7768 numbered place markers can change, if the application's strings are
7769 translated into other languages, but each arg() will still replace
7770 the lowest numbered unreplaced place marker, no matter where it
7771 appears. Also, if place marker \c %i appears more than once in the
7772 string, the arg() replaces all of them.
7773
7774 If there is no unreplaced place marker remaining, a warning message
7775 is output and the result is undefined. Place marker numbers must be
7776 in the range 1 to 99.
7777*/
7778QString QString::arg(const QString &a, int fieldWidth, QChar fillChar) const
7779{
7780 return arg(qToStringViewIgnoringNull(a), fieldWidth, fillChar);
7781}
7782#endif // QT_STRINGVIEW_LEVEL < 2
7783
7784/*!
7785 \overload
7786 \since 5.10
7787
7788 Returns a copy of this string with the lowest-numbered place-marker
7789 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
7790
7791 \a fieldWidth specifies the minimum amount of space that \a a
7792 shall occupy. If \a a requires less space than \a fieldWidth, it
7793 is padded to \a fieldWidth with character \a fillChar. A positive
7794 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
7795 produces left-aligned text.
7796
7797 This example shows how we might create a \c status string for
7798 reporting progress while processing a list of files:
7799
7800 \snippet qstring/main.cpp 11-qstringview
7801
7802 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
7803 %2. Finally, \c arg(fileName) replaces \c %3.
7804
7805 One advantage of using arg() over asprintf() is that the order of the
7806 numbered place markers can change, if the application's strings are
7807 translated into other languages, but each arg() will still replace
7808 the lowest-numbered unreplaced place-marker, no matter where it
7809 appears. Also, if place-marker \c %i appears more than once in the
7810 string, arg() replaces all of them.
7811
7812 If there is no unreplaced place-marker remaining, a warning message
7813 is printed and the result is undefined. Place-marker numbers must be
7814 in the range 1 to 99.
7815*/
7816QString QString::arg(QStringView a, int fieldWidth, QChar fillChar) const
7817{
7818 ArgEscapeData d = findArgEscapes(*this);
7819
7820 if (Q_UNLIKELY(d.occurrences == 0)) {
7821 qWarning("QString::arg: Argument missing: %ls, %ls", qUtf16Printable(*this),
7822 qUtf16Printable(a.toString()));
7823 return *this;
7824 }
7825 return replaceArgEscapes(*this, d, fieldWidth, a, a, fillChar);
7826}
7827
7828/*!
7829 \overload
7830 \since 5.10
7831
7832 Returns a copy of this string with the lowest-numbered place-marker
7833 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
7834
7835 \a fieldWidth specifies the minimum amount of space that \a a
7836 shall occupy. If \a a requires less space than \a fieldWidth, it
7837 is padded to \a fieldWidth with character \a fillChar. A positive
7838 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
7839 produces left-aligned text.
7840
7841 One advantage of using arg() over asprintf() is that the order of the
7842 numbered place markers can change, if the application's strings are
7843 translated into other languages, but each arg() will still replace
7844 the lowest-numbered unreplaced place-marker, no matter where it
7845 appears. Also, if place-marker \c %i appears more than once in the
7846 string, arg() replaces all of them.
7847
7848 If there is no unreplaced place-marker remaining, a warning message
7849 is printed and the result is undefined. Place-marker numbers must be
7850 in the range 1 to 99.
7851*/
7852QString QString::arg(QLatin1String a, int fieldWidth, QChar fillChar) const
7853{
7854 QVarLengthArray<char16_t> utf16(a.size());
7855 qt_from_latin1(utf16.data(), a.data(), a.size());
7856 return arg(QStringView(utf16.data(), utf16.size()), fieldWidth, fillChar);
7857}
7858
7859/*! \fn QString QString::arg(int a, int fieldWidth, int base, QChar fillChar) const
7860 \overload arg()
7861
7862 The \a a argument is expressed in base \a base, which is 10 by
7863 default and must be between 2 and 36. For bases other than 10, \a a
7864 is treated as an unsigned integer.
7865
7866 \a fieldWidth specifies the minimum amount of space that \a a is
7867 padded to and filled with the character \a fillChar. A positive
7868 value produces right-aligned text; a negative value produces
7869 left-aligned text.
7870
7871 The '%' can be followed by an 'L', in which case the sequence is
7872 replaced with a localized representation of \a a. The conversion
7873 uses the default locale, set by QLocale::setDefault(). If no default
7874 locale was specified, the "C" locale is used. The 'L' flag is
7875 ignored if \a base is not 10.
7876
7877 \snippet qstring/main.cpp 12
7878 \snippet qstring/main.cpp 14
7879
7880 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7881 used. For negative numbers, zero padding might appear before the
7882 minus sign.
7883*/
7884
7885/*! \fn QString QString::arg(uint a, int fieldWidth, int base, QChar fillChar) const
7886 \overload arg()
7887
7888 The \a base argument specifies the base to use when converting the
7889 integer \a a into a string. The base must be between 2 and 36.
7890
7891 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7892 used. For negative numbers, zero padding might appear before the
7893 minus sign.
7894*/
7895
7896/*! \fn QString QString::arg(long a, int fieldWidth, int base, QChar fillChar) const
7897 \overload arg()
7898
7899 \a fieldWidth specifies the minimum amount of space that \a a is
7900 padded to and filled with the character \a fillChar. A positive
7901 value produces right-aligned text; a negative value produces
7902 left-aligned text.
7903
7904 The \a a argument is expressed in the given \a base, which is 10 by
7905 default and must be between 2 and 36.
7906
7907 The '%' can be followed by an 'L', in which case the sequence is
7908 replaced with a localized representation of \a a. The conversion
7909 uses the default locale. The default locale is determined from the
7910 system's locale settings at application startup. It can be changed
7911 using QLocale::setDefault(). The 'L' flag is ignored if \a base is
7912 not 10.
7913
7914 \snippet qstring/main.cpp 12
7915 \snippet qstring/main.cpp 14
7916
7917 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7918 used. For negative numbers, zero padding might appear before the
7919 minus sign.
7920*/
7921
7922/*! \fn QString QString::arg(ulong a, int fieldWidth, int base, QChar fillChar) const
7923 \overload arg()
7924
7925 \a fieldWidth specifies the minimum amount of space that \a a is
7926 padded to and filled with the character \a fillChar. A positive
7927 value produces right-aligned text; a negative value produces
7928 left-aligned text.
7929
7930 The \a base argument specifies the base to use when converting the
7931 integer \a a to a string. The base must be between 2 and 36, with 8
7932 giving octal, 10 decimal, and 16 hexadecimal numbers.
7933
7934 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7935 used. For negative numbers, zero padding might appear before the
7936 minus sign.
7937*/
7938
7939/*!
7940 \overload arg()
7941
7942 \a fieldWidth specifies the minimum amount of space that \a a is
7943 padded to and filled with the character \a fillChar. A positive
7944 value produces right-aligned text; a negative value produces
7945 left-aligned text.
7946
7947 The \a base argument specifies the base to use when converting the
7948 integer \a a into a string. The base must be between 2 and 36, with
7949 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
7950
7951 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7952 used. For negative numbers, zero padding might appear before the
7953 minus sign.
7954*/
7955QString QString::arg(qlonglong a, int fieldWidth, int base, QChar fillChar) const
7956{
7957 ArgEscapeData d = findArgEscapes(*this);
7958
7959 if (d.occurrences == 0) {
7960 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
7961 return *this;
7962 }
7963
7964 unsigned flags = QLocaleData::NoFlags;
7965 if (fillChar == QLatin1Char('0'))
7966 flags = QLocaleData::ZeroPadded;
7967
7968 QString arg;
7969 if (d.occurrences > d.locale_occurrences)
7970 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
7971
7972 QString locale_arg;
7973 if (d.locale_occurrences > 0) {
7974 QLocale locale;
7975 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
7976 flags |= QLocaleData::GroupDigits;
7977 locale_arg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
7978 }
7979
7980 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar);
7981}
7982
7983/*!
7984 \overload arg()
7985
7986 \a fieldWidth specifies the minimum amount of space that \a a is
7987 padded to and filled with the character \a fillChar. A positive
7988 value produces right-aligned text; a negative value produces
7989 left-aligned text.
7990
7991 The \a base argument specifies the base to use when converting the
7992 integer \a a into a string. \a base must be between 2 and 36, with 8
7993 giving octal, 10 decimal, and 16 hexadecimal numbers.
7994
7995 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
7996 used. For negative numbers, zero padding might appear before the
7997 minus sign.
7998*/
7999QString QString::arg(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8000{
8001 ArgEscapeData d = findArgEscapes(*this);
8002
8003 if (d.occurrences == 0) {
8004 qWarning() << "QString::arg: Argument missing:" << *this << ',' << a;
8005 return *this;
8006 }
8007
8008 unsigned flags = QLocaleData::NoFlags;
8009 if (fillChar == QLatin1Char('0'))
8010 flags = QLocaleData::ZeroPadded;
8011
8012 QString arg;
8013 if (d.occurrences > d.locale_occurrences)
8014 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8015
8016 QString locale_arg;
8017 if (d.locale_occurrences > 0) {
8018 QLocale locale;
8019 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8020 flags |= QLocaleData::GroupDigits;
8021 locale_arg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8022 }
8023
8024 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar);
8025}
8026
8027/*!
8028 \overload arg()
8029
8030 \fn QString QString::arg(short a, int fieldWidth, int base, QChar fillChar) const
8031
8032 \a fieldWidth specifies the minimum amount of space that \a a is
8033 padded to and filled with the character \a fillChar. A positive
8034 value produces right-aligned text; a negative value produces
8035 left-aligned text.
8036
8037 The \a base argument specifies the base to use when converting the
8038 integer \a a into a string. The base must be between 2 and 36, with
8039 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8040
8041 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
8042 used. For negative numbers, zero padding might appear before the
8043 minus sign.
8044*/
8045
8046/*!
8047 \fn QString QString::arg(ushort a, int fieldWidth, int base, QChar fillChar) const
8048 \overload arg()
8049
8050 \a fieldWidth specifies the minimum amount of space that \a a is
8051 padded to and filled with the character \a fillChar. A positive
8052 value produces right-aligned text; a negative value produces
8053 left-aligned text.
8054
8055 The \a base argument specifies the base to use when converting the
8056 integer \a a into a string. The base must be between 2 and 36, with
8057 8 giving octal, 10 decimal, and 16 hexadecimal numbers.
8058
8059 If \a fillChar is '0' (the number 0, ASCII 48), the locale's zero is
8060 used. For negative numbers, zero padding might appear before the
8061 minus sign.
8062*/
8063
8064/*!
8065 \overload arg()
8066*/
8067QString QString::arg(QChar a, int fieldWidth, QChar fillChar) const
8068{
8069 return arg(QStringView{&a, 1}, fieldWidth, fillChar);
8070}
8071
8072/*!
8073 \overload arg()
8074
8075 The \a a argument is interpreted as a Latin-1 character.
8076*/
8077QString QString::arg(char a, int fieldWidth, QChar fillChar) const
8078{
8079 return arg(QLatin1Char(a), fieldWidth, fillChar);
8080}
8081
8082/*!
8083 \fn QString QString::arg(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8084 \overload arg()
8085
8086 Argument \a a is formatted according to the specified \a format and
8087 \a precision. See \l{Argument Formats} for details.
8088
8089 \a fieldWidth specifies the minimum amount of space that \a a is
8090 padded to and filled with the character \a fillChar. A positive
8091 value produces right-aligned text; a negative value produces
8092 left-aligned text.
8093
8094 \snippet code/src_corelib_text_qstring.cpp 2
8095
8096 The '%' can be followed by an 'L', in which case the sequence is
8097 replaced with a localized representation of \a a. The conversion
8098 uses the default locale, set by QLocale::setDefault(). If no
8099 default locale was specified, the "C" locale is used.
8100
8101 If \a fillChar is '0' (the number 0, ASCII 48), this function will
8102 use the locale's zero to pad. For negative numbers, the zero padding
8103 will probably appear before the minus sign.
8104
8105 \sa QLocale::toString()
8106*/
8107QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillChar) const
8108{
8109 ArgEscapeData d = findArgEscapes(*this);
8110
8111 if (d.occurrences == 0) {
8112 qWarning("QString::arg: Argument missing: %s, %g", toLocal8Bit().data(), a);
8113 return *this;
8114 }
8115
8116 unsigned flags = QLocaleData::NoFlags;
8117 if (fillChar == QLatin1Char('0'))
8118 flags |= QLocaleData::ZeroPadded;
8119
8120 if (qIsUpper(fmt))
8121 flags |= QLocaleData::CapitalEorX;
8122
8123 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8124 switch (qToLower(fmt)) {
8125 case 'f':
8126 form = QLocaleData::DFDecimal;
8127 break;
8128 case 'e':
8129 form = QLocaleData::DFExponent;
8130 break;
8131 case 'g':
8132 form = QLocaleData::DFSignificantDigits;
8133 break;
8134 default:
8135#if defined(QT_CHECK_RANGE)
8136 qWarning("QString::arg: Invalid format char '%c'", fmt);
8137#endif
8138 break;
8139 }
8140
8141 QString arg;
8142 if (d.occurrences > d.locale_occurrences)
8143 arg = QLocaleData::c()->doubleToString(a, prec, form, fieldWidth, flags | QLocaleData::ZeroPadExponent);
8144
8145 QString locale_arg;
8146 if (d.locale_occurrences > 0) {
8147 QLocale locale;
8148
8149 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8150 if (!(numberOptions & QLocale::OmitGroupSeparator))
8151 flags |= QLocaleData::GroupDigits;
8152 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
8153 flags |= QLocaleData::ZeroPadExponent;
8154 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
8155 flags |= QLocaleData::AddTrailingZeroes;
8156 locale_arg = locale.d->m_data->doubleToString(a, prec, form, fieldWidth, flags);
8157 }
8158
8159 return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar);
8160}
8161
8162static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
8163static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
8164
8165template <typename Char>
8166static int getEscape(const Char *uc, qsizetype *pos, qsizetype len, int maxNumber = 999)
8167{
8168 int i = *pos;
8169 ++i;
8170 if (i < len && uc[i] == QLatin1Char('L'))
8171 ++i;
8172 if (i < len) {
8173 int escape = to_unicode(uc[i]) - '0';
8174 if (uint(escape) >= 10U)
8175 return -1;
8176 ++i;
8177 while (i < len) {
8178 int digit = to_unicode(uc[i]) - '0';
8179 if (uint(digit) >= 10U)
8180 break;
8181 escape = (escape * 10) + digit;
8182 ++i;
8183 }
8184 if (escape <= maxNumber) {
8185 *pos = i;
8186 return escape;
8187 }
8188 }
8189 return -1;
8190}
8191
8192/*
8193 Algorithm for multiArg:
8194
8195 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
8196 The L is parsed and accepted for compatibility with non-multi-arg, but since
8197 multiArg only accepts strings as replacements, the localization request can
8198 be safely ignored.
8199 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
8200 either points at text to be copied verbatim (in which case the int is -1),
8201 or, initially, at the textual representation of the placeholder. In that case,
8202 the int contains the numerical number as parsed from the placeholder.
8203 3. Next, collect all the non-negative ints found, sort them in ascending order and
8204 remove duplicates.
8205 3a. If the result has more entires than multiArg() was given replacement strings,
8206 we have found placeholders we can't satisfy with replacement strings. That is
8207 fine (there could be another .arg() call coming after this one), so just
8208 truncate the result to the number of actual multiArg() replacement strings.
8209 3b. If the result has less entries than multiArg() was given replacement strings,
8210 the string is missing placeholders. This is an error that the user should be
8211 warned about.
8212 4. The result of step (3) is a mapping from the index of any replacement string to
8213 placeholder number. This is the wrong way around, but since placeholder
8214 numbers could get as large as 999, while we typically don't have more than 9
8215 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
8216 each time we need to map a placeholder number to a replacement string index
8217 (that's a linear search; but still *much* faster than using an associative container).
8218 5. Next, for each of the tuples found in step (1), do the following:
8219 5a. If the int is negative, do nothing.
8220 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
8221 the string-ref with a string-ref for the (complete) I'th replacement string.
8222 5c. Otherwise, do nothing.
8223 6. Concatenate all string refs into a single result string.
8224*/
8225
8226namespace {
8227struct Part
8228{
8229 Part() = default; // for QVarLengthArray; do not use
8230 constexpr Part(QStringView s, int num = -1)
8231 : tag{QtPrivate::ArgBase::U16}, number{num}, data{s.utf16()}, size{s.size()} {}
8232 constexpr Part(QLatin1String s, int num = -1)
8233 : tag{QtPrivate::ArgBase::L1}, number{num}, data{s.data()}, size{s.size()} {}
8234
8235 void reset(QStringView s) noexcept { *this = {s, number}; }
8236 void reset(QLatin1String s) noexcept { *this = {s, number}; }
8237
8238 QtPrivate::ArgBase::Tag tag;
8239 int number;
8240 const void *data;
8241 qsizetype size;
8242};
8243} // unnamed namespace
8244
8245Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE);
8246
8247namespace {
8248
8249enum { ExpectedParts = 32 };
8250
8251typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
8252typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
8253
8254template <typename StringView>
8255static ParseResult parseMultiArgFormatString(StringView s)
8256{
8257 ParseResult result;
8258
8259 const auto uc = s.data();
8260 const auto len = s.size();
8261 const auto end = len - 1;
8262 qsizetype i = 0;
8263 qsizetype last = 0;
8264
8265 while (i < end) {
8266 if (uc[i] == QLatin1Char('%')) {
8267 qsizetype percent = i;
8268 int number = getEscape(uc, &i, len);
8269 if (number != -1) {
8270 if (last != percent)
8271 result.push_back(Part{s.mid(last, percent - last)}); // literal text (incl. failed placeholders)
8272 result.push_back(Part{s.mid(percent, i - percent), number}); // parsed placeholder
8273 last = i;
8274 continue;
8275 }
8276 }
8277 ++i;
8278 }
8279
8280 if (last < len)
8281 result.push_back(Part{s.mid(last, len - last)}); // trailing literal text
8282
8283 return result;
8284}
8285
8286static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
8287{
8288 ArgIndexToPlaceholderMap result;
8289
8290 for (Part part : parts) {
8291 if (part.number >= 0)
8292 result.push_back(part.number);
8293 }
8294
8295 std::sort(result.begin(), result.end());
8296 result.erase(std::unique(result.begin(), result.end()),
8297 result.end());
8298
8299 return result;
8300}
8301
8302static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
8303{
8304 using namespace QtPrivate;
8305 qsizetype totalSize = 0;
8306 for (Part &part : parts) {
8307 if (part.number != -1) {
8308 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
8309 if (it != argIndexToPlaceholderMap.end()) {
8310 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
8311 switch (arg.tag) {
8312 case ArgBase::L1:
8313 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
8314 break;
8315 case ArgBase::U8:
8316 Q_UNREACHABLE(); // waiting for QUtf8String...
8317 break;
8318 case ArgBase::U16:
8319 part.reset(static_cast<const QStringViewArg&>(arg).string);
8320 break;
8321 }
8322 }
8323 }
8324 totalSize += part.size;
8325 }
8326 return totalSize;
8327}
8328
8329} // unnamed namespace
8330
8331Q_ALWAYS_INLINE QString to_string(QLatin1String s) noexcept { return s; }
8332Q_ALWAYS_INLINE QString to_string(QStringView s) noexcept { return s.toString(); }
8333
8334template <typename StringView>
8335static QString argToQStringImpl(StringView pattern, size_t numArgs, const QtPrivate::ArgBase **args)
8336{
8337 // Step 1-2 above
8338 ParseResult parts = parseMultiArgFormatString(pattern);
8339
8340 // 3-4
8341 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
8342
8343 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
8344 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
8345 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
8346 qWarning("QString::arg: %d argument(s) missing in %ls",
8347 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(to_string(pattern)));
8348
8349 // 5
8350 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
8351
8352 // 6:
8353 QString result(totalSize, Qt::Uninitialized);
8354 auto out = const_cast<QChar*>(result.constData());
8355
8356 for (Part part : parts) {
8357 switch (part.tag) {
8358 case QtPrivate::ArgBase::L1:
8359 if (part.size) {
8360 qt_from_latin1(reinterpret_cast<char16_t*>(out),
8361 reinterpret_cast<const char*>(part.data), part.size);
8362 }
8363 break;
8364 case QtPrivate::ArgBase::U8:
8365 Q_UNREACHABLE(); // waiting for QUtf8String
8366 break;
8367 case QtPrivate::ArgBase::U16:
8368 if (part.size)
8369 memcpy(out, part.data, part.size * sizeof(QChar));
8370 break;
8371 }
8372 out += part.size;
8373 }
8374
8375 return result;
8376}
8377
8378QString QtPrivate::argToQString(QStringView pattern, size_t n, const ArgBase **args)
8379{
8380 return argToQStringImpl(pattern, n, args);
8381}
8382
8383QString QtPrivate::argToQString(QLatin1String pattern, size_t n, const ArgBase **args)
8384{
8385 return argToQStringImpl(pattern, n, args);
8386}
8387
8388/*! \fn bool QString::isSimpleText() const
8389
8390 \internal
8391*/
8392bool QString::isSimpleText() const
8393{
8394 const char16_t *p = d.data();
8395 const char16_t * const end = p + d.size;
8396 while (p < end) {
8397 char16_t uc = *p;
8398 // sort out regions of complex text formatting
8399 if (uc > 0x058f && (uc < 0x1100 || uc > 0xfb0f)) {
8400 return false;
8401 }
8402 p++;
8403 }
8404
8405 return true;
8406}
8407
8408/*! \fn bool QString::isRightToLeft() const
8409
8410 Returns \c true if the string is read right to left.
8411
8412 \sa QStringView::isRightToLeft()
8413*/
8414bool QString::isRightToLeft() const
8415{
8416 return QtPrivate::isRightToLeft(QStringView(*this));
8417}
8418
8419/*!
8420 \fn bool QString::isValidUtf16() const noexcept
8421 \since 5.15
8422
8423 Returns \c true if the string contains valid UTF-16 encoded data,
8424 or \c false otherwise.
8425
8426 Note that this function does not perform any special validation of the
8427 data; it merely checks if it can be successfully decoded from UTF-16.
8428 The data is assumed to be in host byte order; the presence of a BOM
8429 is meaningless.
8430
8431 \sa QStringView::isValidUtf16()
8432*/
8433
8434/*! \fn QChar *QString::data()
8435
8436 Returns a pointer to the data stored in the QString. The pointer
8437 can be used to access and modify the characters that compose the
8438 string.
8439
8440 Unlike constData() and unicode(), the returned data is always
8441 '\\0'-terminated.
8442
8443 Example:
8444
8445 \snippet qstring/main.cpp 19
8446
8447 Note that the pointer remains valid only as long as the string is
8448 not modified by other means. For read-only access, constData() is
8449 faster because it never causes a \l{deep copy} to occur.
8450
8451 \sa constData(), operator[]()
8452*/
8453
8454/*! \fn const QChar *QString::data() const
8455
8456 \overload
8457
8458 \note The returned string may not be '\\0'-terminated.
8459 Use size() to determine the length of the array.
8460
8461 \sa fromRawData()
8462*/
8463
8464/*! \fn const QChar *QString::constData() const
8465
8466 Returns a pointer to the data stored in the QString. The pointer
8467 can be used to access the characters that compose the string.
8468
8469 Note that the pointer remains valid only as long as the string is
8470 not modified.
8471
8472 \note The returned string may not be '\\0'-terminated.
8473 Use size() to determine the length of the array.
8474
8475 \sa data(), operator[](), fromRawData()
8476*/
8477
8478/*! \fn void QString::push_front(const QString &other)
8479
8480 This function is provided for STL compatibility, prepending the
8481 given \a other string to the beginning of this string. It is
8482 equivalent to \c prepend(other).
8483
8484 \sa prepend()
8485*/
8486
8487/*! \fn void QString::push_front(QChar ch)
8488
8489 \overload
8490
8491 Prepends the given \a ch character to the beginning of this string.
8492*/
8493
8494/*! \fn void QString::push_back(const QString &other)
8495
8496 This function is provided for STL compatibility, appending the
8497 given \a other string onto the end of this string. It is
8498 equivalent to \c append(other).
8499
8500 \sa append()
8501*/
8502
8503/*! \fn void QString::push_back(QChar ch)
8504
8505 \overload
8506
8507 Appends the given \a ch character onto the end of this string.
8508*/
8509
8510/*! \fn void QString::shrink_to_fit()
8511 \since 5.10
8512
8513 This function is provided for STL compatibility. It is
8514 equivalent to squeeze().
8515
8516 \sa squeeze()
8517*/
8518
8519/*!
8520 \fn std::string QString::toStdString() const
8521
8522 Returns a std::string object with the data contained in this
8523 QString. The Unicode data is converted into 8-bit characters using
8524 the toUtf8() function.
8525
8526 This method is mostly useful to pass a QString to a function
8527 that accepts a std::string object.
8528
8529 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
8530*/
8531
8532/*!
8533 Constructs a QString that uses the first \a size Unicode characters
8534 in the array \a unicode. The data in \a unicode is \e not
8535 copied. The caller must be able to guarantee that \a unicode will
8536 not be deleted or modified as long as the QString (or an
8537 unmodified copy of it) exists.
8538
8539 Any attempts to modify the QString or copies of it will cause it
8540 to create a deep copy of the data, ensuring that the raw data
8541 isn't modified.
8542
8543 Here is an example of how we can use a QRegularExpression on raw data in
8544 memory without requiring to copy the data into a QString:
8545
8546 \snippet qstring/main.cpp 22
8547 \snippet qstring/main.cpp 23
8548
8549 \warning A string created with fromRawData() is \e not
8550 '\\0'-terminated, unless the raw data contains a '\\0' character
8551 at position \a size. This means unicode() will \e not return a
8552 '\\0'-terminated string (although utf16() does, at the cost of
8553 copying the raw data).
8554
8555 \sa fromUtf16(), setRawData()
8556*/
8557QString QString::fromRawData(const QChar *unicode, qsizetype size)
8558{
8559 return QString(DataPointer::fromRawData(const_cast<char16_t *>(reinterpret_cast<const char16_t *>(unicode)), size));
8560}
8561
8562/*!
8563 \since 4.7
8564
8565 Resets the QString to use the first \a size Unicode characters
8566 in the array \a unicode. The data in \a unicode is \e not
8567 copied. The caller must be able to guarantee that \a unicode will
8568 not be deleted or modified as long as the QString (or an
8569 unmodified copy of it) exists.
8570
8571 This function can be used instead of fromRawData() to re-use
8572 existings QString objects to save memory re-allocations.
8573
8574 \sa fromRawData()
8575*/
8576QString &QString::setRawData(const QChar *unicode, qsizetype size)
8577{
8578 if (!unicode || !size) {
8579 clear();
8580 }
8581 *this = fromRawData(unicode, size);
8582 return *this;
8583}
8584
8585/*! \fn QString QString::fromStdU16String(const std::u16string &str)
8586 \since 5.5
8587
8588 Returns a copy of the \a str string. The given string is assumed
8589 to be encoded in UTF-16.
8590
8591 \sa fromUtf16(), fromStdWString(), fromStdU32String()
8592*/
8593
8594/*!
8595 \fn std::u16string QString::toStdU16String() const
8596 \since 5.5
8597
8598 Returns a std::u16string object with the data contained in this
8599 QString. The Unicode data is the same as returned by the utf16()
8600 method.
8601
8602 \sa utf16(), toStdWString(), toStdU32String()
8603*/
8604
8605/*! \fn QString QString::fromStdU32String(const std::u32string &str)
8606 \since 5.5
8607
8608 Returns a copy of the \a str string. The given string is assumed
8609 to be encoded in UCS-4.
8610
8611 \sa fromUcs4(), fromStdWString(), fromStdU16String()
8612*/
8613
8614/*!
8615 \fn std::u32string QString::toStdU32String() const
8616 \since 5.5
8617
8618 Returns a std::u32string object with the data contained in this
8619 QString. The Unicode data is the same as returned by the toUcs4()
8620 method.
8621
8622 \sa toUcs4(), toStdWString(), toStdU16String()
8623*/
8624
8625/*! \class QLatin1String
8626 \inmodule QtCore
8627 \brief The QLatin1String class provides a thin wrapper around an US-ASCII/Latin-1 encoded string literal.
8628
8629 \ingroup string-processing
8630 \reentrant
8631
8632 Many of QString's member functions are overloaded to accept
8633 \c{const char *} instead of QString. This includes the copy
8634 constructor, the assignment operator, the comparison operators,
8635 and various other functions such as \l{QString::insert()}{insert()}, \l{QString::replace()}{replace()},
8636 and \l{QString::indexOf()}{indexOf()}. These functions
8637 are usually optimized to avoid constructing a QString object for
8638 the \c{const char *} data. For example, assuming \c str is a
8639 QString,
8640
8641 \snippet code/src_corelib_text_qstring.cpp 3
8642
8643 is much faster than
8644
8645 \snippet code/src_corelib_text_qstring.cpp 4
8646
8647 because it doesn't construct four temporary QString objects and
8648 make a deep copy of the character data.
8649
8650 Applications that define \c QT_NO_CAST_FROM_ASCII (as explained
8651 in the QString documentation) don't have access to QString's
8652 \c{const char *} API. To provide an efficient way of specifying
8653 constant Latin-1 strings, Qt provides the QLatin1String, which is
8654 just a very thin wrapper around a \c{const char *}. Using
8655 QLatin1String, the example code above becomes
8656
8657 \snippet code/src_corelib_text_qstring.cpp 5
8658
8659 This is a bit longer to type, but it provides exactly the same
8660 benefits as the first version of the code, and is faster than
8661 converting the Latin-1 strings using QString::fromLatin1().
8662
8663 Thanks to the QString(QLatin1String) constructor,
8664 QLatin1String can be used everywhere a QString is expected. For
8665 example:
8666
8667 \snippet code/src_corelib_text_qstring.cpp 6
8668
8669 \note If the function you're calling with a QLatin1String
8670 argument isn't actually overloaded to take QLatin1String, the
8671 implicit conversion to QString will trigger a memory allocation,
8672 which is usually what you want to avoid by using QLatin1String
8673 in the first place. In those cases, using QStringLiteral may be
8674 the better option.
8675
8676 \sa QString, QLatin1Char, {QStringLiteral()}{QStringLiteral}, QT_NO_CAST_FROM_ASCII
8677*/
8678
8679/*!
8680 \typedef QLatin1String::value_type
8681 \since 5.10
8682
8683 Alias for \c{const char}. Provided for compatibility with the STL.
8684*/
8685
8686/*!
8687 \typedef QLatin1String::difference_type
8688 \since 5.10
8689
8690 Alias for \c{qsizetype}. Provided for compatibility with the STL.
8691*/
8692
8693/*!
8694 \typedef QLatin1String::size_type
8695 \since 5.10
8696
8697 Alias for \c{qsizetype}. Provided for compatibility with the STL.
8698
8699 \note In version prior to Qt 6, this was an alias for \c{int},
8700 restricting the amount of data that could be held in a QLatin1String
8701 on 64-bit architectures.
8702*/
8703
8704/*!
8705 \typedef QLatin1String::reference
8706 \since 5.10
8707
8708 Alias for \c{value_type &}. Provided for compatibility with the STL.
8709*/
8710
8711/*!
8712 \typedef QLatin1String::const_reference
8713 \since 5.11
8714
8715 Alias for \c{reference}. Provided for compatibility with the STL.
8716*/
8717
8718/*!
8719 \typedef QLatin1String::iterator
8720 \since 5.10
8721
8722 QLatin1String does not support mutable iterators, so this is the same
8723 as const_iterator.
8724
8725 \sa const_iterator, reverse_iterator
8726*/
8727
8728/*!
8729 \typedef QLatin1String::const_iterator
8730 \since 5.10
8731
8732 \sa iterator, const_reverse_iterator
8733*/
8734
8735/*!
8736 \typedef QLatin1String::reverse_iterator
8737 \since 5.10
8738
8739 QLatin1String does not support mutable reverse iterators, so this is the
8740 same as const_reverse_iterator.
8741
8742 \sa const_reverse_iterator, iterator
8743*/
8744
8745/*!
8746 \typedef QLatin1String::const_reverse_iterator
8747 \since 5.10
8748
8749 \sa reverse_iterator, const_iterator
8750*/
8751
8752/*! \fn QLatin1String::QLatin1String()
8753 \since 5.6
8754
8755 Constructs a QLatin1String object that stores a nullptr.
8756*/
8757
8758/*! \fn QLatin1String::QLatin1String(const char *str)
8759
8760 Constructs a QLatin1String object that stores \a str.
8761
8762 The string data is \e not copied. The caller must be able to
8763 guarantee that \a str will not be deleted or modified as long as
8764 the QLatin1String object exists.
8765
8766 \sa latin1()
8767*/
8768
8769/*! \fn QLatin1String::QLatin1String(const char *str, qsizetype size)
8770
8771 Constructs a QLatin1String object that stores \a str with \a size.
8772
8773 The string data is \e not copied. The caller must be able to
8774 guarantee that \a str will not be deleted or modified as long as
8775 the QLatin1String object exists.
8776
8777 \sa latin1()
8778*/
8779
8780/*!
8781 \fn QLatin1String::QLatin1String(const char *first, const char *last)
8782 \since 5.10
8783
8784 Constructs a QLatin1String object that stores \a first with length
8785 (\a last - \a first).
8786
8787 The range \c{[first,last)} must remain valid for the lifetime of
8788 this Latin-1 string object.
8789
8790 Passing \nullptr as \a first is safe if \a last is \nullptr,
8791 too, and results in a null Latin-1 string.
8792
8793 The behavior is undefined if \a last precedes \a first, \a first
8794 is \nullptr and \a last is not, or if \c{last - first >
8795 INT_MAX}.
8796*/
8797
8798/*! \fn QLatin1String::QLatin1String(const QByteArray &str)
8799
8800 Constructs a QLatin1String object that stores \a str.
8801
8802 The string data is \e not copied. The caller must be able to
8803 guarantee that \a str will not be deleted or modified as long as
8804 the QLatin1String object exists.
8805
8806 \sa latin1()
8807*/
8808
8809/*!
8810 \fn QLatin1String::toString() const
8811 \since 6.0
8812
8813 Converts this Latin-1 string into a QString. Equivalent to
8814 \code
8815 return QString(*this);
8816 \endcode
8817*/
8818
8819/*! \fn const char *QLatin1String::latin1() const
8820
8821 Returns the Latin-1 string stored in this object.
8822*/
8823
8824/*! \fn const char *QLatin1String::data() const
8825
8826 Returns the Latin-1 string stored in this object.
8827*/
8828
8829/*! \fn QLatin1String::size() const
8830
8831 Returns the size of the Latin-1 string stored in this object.
8832
8833 \note In version prior to Qt 6, this function returned \c{int},
8834 restricting the amount of data that could be held in a QLatin1String
8835 on 64-bit architectures.
8836*/
8837
8838/*! \fn bool QLatin1String::isNull() const
8839 \since 5.10
8840
8841 Returns whether the Latin-1 string stored in this object is null
8842 (\c{data() == nullptr}) or not.
8843
8844 \sa isEmpty(), data()
8845*/
8846
8847/*! \fn bool QLatin1String::isEmpty() const
8848 \since 5.10
8849
8850 Returns whether the Latin-1 string stored in this object is empty
8851 (\c{size() == 0}) or not.
8852
8853 \sa isNull(), size()
8854*/
8855
8856/*! \fn QLatin1Char QLatin1String::at(qsizetype pos) const
8857 \since 5.8
8858
8859 Returns the character at position \a pos in this object.
8860
8861 \note This function performs no error checking.
8862 The behavior is undefined when \a pos < 0 or \a pos >= size().
8863
8864 \sa operator[]()
8865*/
8866
8867/*! \fn QLatin1Char QLatin1String::operator[](qsizetype pos) const
8868 \since 5.8
8869
8870 Returns the character at position \a pos in this object.
8871
8872 \note This function performs no error checking.
8873 The behavior is undefined when \a pos < 0 or \a pos >= size().
8874
8875 \sa at()
8876*/
8877
8878/*!
8879 \fn QLatin1Char QLatin1String::front() const
8880 \since 5.10
8881
8882 Returns the first character in the string.
8883 Same as \c{at(0)}.
8884
8885 This function is provided for STL compatibility.
8886
8887 \warning Calling this function on an empty string constitutes
8888 undefined behavior.
8889
8890 \sa back(), at(), operator[]()
8891*/
8892
8893/*!
8894 \fn QLatin1Char QLatin1String::back() const
8895 \since 5.10
8896
8897 Returns the last character in the string.
8898 Same as \c{at(size() - 1)}.
8899
8900 This function is provided for STL compatibility.
8901
8902 \warning Calling this function on an empty string constitutes
8903 undefined behavior.
8904
8905 \sa front(), at(), operator[]()
8906*/
8907
8908/*!
8909 \fn int QLatin1String::compare(QStringView str, Qt::CaseSensitivity cs) const
8910 \fn int QLatin1String::compare(QLatin1String l1, Qt::CaseSensitivity cs) const
8911 \fn int QLatin1String::compare(QChar ch) const
8912 \fn int QLatin1String::compare(QChar ch, Qt::CaseSensitivity cs) const
8913 \since 5.14
8914
8915 Returns an integer that compares to zero as this Latin-1 string compares to the
8916 string-view \a str, Latin-1 string \a l1, or character \a ch, respectively.
8917
8918 If \a cs is Qt::CaseSensitive (the default), the comparison is case sensitive;
8919 otherwise the comparison is case-insensitive.
8920
8921 \sa operator==(), operator<(), operator>()
8922*/
8923
8924
8925/*!
8926 \fn bool QLatin1String::startsWith(QStringView str, Qt::CaseSensitivity cs) const
8927 \since 5.10
8928 \fn bool QLatin1String::startsWith(QLatin1String l1, Qt::CaseSensitivity cs) const
8929 \since 5.10
8930 \fn bool QLatin1String::startsWith(QChar ch) const
8931 \since 5.10
8932 \fn bool QLatin1String::startsWith(QChar ch, Qt::CaseSensitivity cs) const
8933 \since 5.10
8934
8935 Returns \c true if this Latin-1 string starts with string-view \a str,
8936 Latin-1 string \a l1, or character \a ch, respectively;
8937 otherwise returns \c false.
8938
8939 If \a cs is Qt::CaseSensitive (the default), the search is case-sensitive;
8940 otherwise the search is case-insensitive.
8941
8942 \sa endsWith()
8943*/
8944
8945/*!
8946 \fn bool QLatin1String::endsWith(QStringView str, Qt::CaseSensitivity cs) const
8947 \since 5.10
8948 \fn bool QLatin1String::endsWith(QLatin1String l1, Qt::CaseSensitivity cs) const
8949 \since 5.10
8950 \fn bool QLatin1String::endsWith(QChar ch) const
8951 \since 5.10
8952 \fn bool QLatin1String::endsWith(QChar ch, Qt::CaseSensitivity cs) const
8953 \since 5.10
8954
8955 Returns \c true if this Latin-1 string ends with string-view \a str,
8956 Latin-1 string \a l1, or character \a ch, respectively;
8957 otherwise returns \c false.
8958
8959 If \a cs is Qt::CaseSensitive (the default), the search is case-sensitive;
8960 otherwise the search is case-insensitive.
8961
8962 \sa startsWith()
8963*/
8964
8965/*!
8966 \fn int QLatin1String::indexOf(QStringView str, qsizetype from = 0, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
8967 \fn int QLatin1String::indexOf(QLatin1String l1, qsizetype from = 0, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
8968 \fn int QLatin1String::indexOf(QChar c, qsizetype from = 0, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
8969 \since 5.14
8970
8971 Returns the index position of the first occurrence of the string-view \a str,
8972 Latin-1 string \a l1, or character \a ch, respectively, in this Latin-1 string,
8973 searching forward from index position \a from. Returns -1 if \a str is not found.
8974
8975 If \a cs is Qt::CaseSensitive (default), the search is case
8976 sensitive; otherwise the search is case insensitive.
8977
8978 If \a from is -1, the search starts at the last character; if it is
8979 -2, at the next to last character and so on.
8980
8981 \sa QString::indexOf()
8982*/
8983
8984/*!
8985 \fn bool QLatin1String::contains(QStringView str, Qt::CaseSensitivity cs) const
8986 \fn bool QLatin1String::contains(QLatin1String l1, Qt::CaseSensitivity cs) const
8987 \fn bool QLatin1String::contains(QChar c, Qt::CaseSensitivity cs) const
8988 \since 5.14
8989
8990 Returns \c true if this Latin-1 string contains an occurrence of the string-view
8991 \a str, Latin-1 string \a l1, or character \a ch; otherwise returns \c false.
8992
8993 If \a cs is Qt::CaseSensitive (the default), the search is
8994 case-sensitive; otherwise the search is case-insensitive.
8995
8996 \sa indexOf(), QStringView::contains(), QStringView::indexOf(), QString::indexOf()
8997*/
8998
8999/*!
9000 \fn int QLatin1String::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
9001 \fn int QLatin1String::lastIndexOf(QLatin1String l1, qsizetype from, Qt::CaseSensitivity cs) const
9002 \fn int QLatin1String::lastIndexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const
9003 \since 5.14
9004
9005 Returns the index position of the last occurrence of the string-view \a str,
9006 Latin-1 string \a l1, or character \a ch, respectively, in this Latin-1 string,
9007 searching backward from index position \a from. If \a from is -1 (default),
9008 the search starts at the last character; if \a from is -2, at the next to last
9009 character and so on. Returns -1 if \a str is not found.
9010
9011 If \a cs is Qt::CaseSensitive (default), the search is case
9012 sensitive; otherwise the search is case insensitive.
9013
9014 \sa indexOf(), QStringView::lastIndexOf(), QStringView::indexOf(), QString::indexOf()
9015*/
9016
9017/*!
9018 \fn QLatin1String::const_iterator QLatin1String::begin() const
9019 \since 5.10
9020
9021 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first character in
9022 the string.
9023
9024 This function is provided for STL compatibility.
9025
9026 \sa end(), cbegin(), rbegin(), data()
9027*/
9028
9029/*!
9030 \fn QLatin1String::const_iterator QLatin1String::cbegin() const
9031 \since 5.10
9032
9033 Same as begin().
9034
9035 This function is provided for STL compatibility.
9036
9037 \sa cend(), begin(), crbegin(), data()
9038*/
9039
9040/*!
9041 \fn QLatin1String::const_iterator QLatin1String::end() const
9042 \since 5.10
9043
9044 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary
9045 character after the last character in the list.
9046
9047 This function is provided for STL compatibility.
9048
9049 \sa begin(), cend(), rend()
9050*/
9051
9052/*! \fn QLatin1String::const_iterator QLatin1String::cend() const
9053 \since 5.10
9054
9055 Same as end().
9056
9057 This function is provided for STL compatibility.
9058
9059 \sa cbegin(), end(), crend()
9060*/
9061
9062/*!
9063 \fn QLatin1String::const_reverse_iterator QLatin1String::rbegin() const
9064 \since 5.10
9065
9066 Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to the first
9067 character in the string, in reverse order.
9068
9069 This function is provided for STL compatibility.
9070
9071 \sa rend(), crbegin(), begin()
9072*/
9073
9074/*!
9075 \fn QLatin1String::const_reverse_iterator QLatin1String::crbegin() const
9076 \since 5.10
9077
9078 Same as rbegin().
9079
9080 This function is provided for STL compatibility.
9081
9082 \sa crend(), rbegin(), cbegin()
9083*/
9084
9085/*!
9086 \fn QLatin1String::const_reverse_iterator QLatin1String::rend() const
9087 \since 5.10
9088
9089 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to one past
9090 the last character in the string, in reverse order.
9091
9092 This function is provided for STL compatibility.
9093
9094 \sa rbegin(), crend(), end()
9095*/
9096
9097/*!
9098 \fn QLatin1String::const_reverse_iterator QLatin1String::crend() const
9099 \since 5.10
9100
9101 Same as rend().
9102
9103 This function is provided for STL compatibility.
9104
9105 \sa crbegin(), rend(), cend()
9106*/
9107
9108/*!
9109 \fn QLatin1String QLatin1String::mid(qsizetype start, qsizetype length) const
9110 \since 5.8
9111
9112 Returns the substring of length \a length starting at position
9113 \a start in this Latin-1 string.
9114
9115 \obsolete Use sliced() instead in new code.
9116
9117 Returns an empty Latin-1 string if \a start exceeds the
9118 length of this Latin-1 string. If there are less than \a length characters
9119 available in this Latin-1 string starting at \a start, or if
9120 \a length is negative (default), the function returns all characters that
9121 are available from \a start.
9122
9123 \sa first(), last(), sliced(), chopped(), chop(), truncate()
9124*/
9125
9126/*!
9127 \fn QLatin1String QLatin1String::left(qsizetype length) const
9128 \since 5.8
9129
9130 \obsolete Use first() instead in new code.
9131
9132 Returns the substring of length \a length starting at position
9133 0 in this Latin-1 string.
9134
9135 The entire Latin-1 string is returned if \a length is greater than or equal
9136 to size(), or less than zero.
9137
9138 \sa first(), last(), sliced(), startsWith(), chopped(), chop(), truncate()
9139*/
9140
9141/*!
9142 \fn QLatin1String QLatin1String::right(qsizetype length) const
9143 \since 5.8
9144
9145 \obsolete Use last() instead in new code.
9146
9147 Returns the substring of length \a length starting at position
9148 size() - \a length in this Latin-1 string.
9149
9150 The entire Latin-1 string is returned if \a length is greater than or equal
9151 to size(), or less than zero.
9152
9153 \sa first(), last(), sliced(), endsWith(), chopped(), chop(), truncate()
9154*/
9155
9156/*!
9157 \fn QLatin1String::first(qsizetype n) const
9158 \since 6.0
9159
9160 Returns a Latin-1 string that contains the first \a n characters
9161 of this Latin-1 string.
9162
9163 \note The behavior is undefined when \a n < 0 or \a n > size().
9164
9165 \sa last(), startsWith(), chopped(), chop(), truncate()
9166*/
9167
9168/*!
9169 \fn QLatin1String::last(qsizetype n) const
9170 \since 6.0
9171
9172 Returns a Latin-1 string that contains the last \a n characters
9173 of this Latin-1 string.
9174
9175 \note The behavior is undefined when \a n < 0 or \a n > size().
9176
9177 \sa first(), endsWith(), chopped(), chop(), truncate()
9178*/
9179
9180/*!
9181 \fn QLatin1String::sliced(qsizetype pos, qsizetype n) const
9182 \since 6.0
9183
9184 Returns a Latin-1 string that points to \a n characters of this
9185 Latin-1 string, starting at position \a pos.
9186
9187 \note The behavior is undefined when \a pos < 0, \a n < 0,
9188 or \c{pos + n > size()}.
9189
9190 \sa first(), last(), chopped(), chop(), truncate()
9191*/
9192
9193/*!
9194 \fn QLatin1String::sliced(qsizetype pos) const
9195 \since 6.0
9196
9197 Returns a Latin-1 string starting at position \a pos in this
9198 Latin-1 string, and extending to its end.
9199
9200 \note The behavior is undefined when \a pos < 0 or \a pos > size().
9201
9202 \sa first(), last(), chopped(), chop(), truncate()
9203*/
9204
9205/*!
9206 \fn QLatin1String QLatin1String::chopped(qsizetype length) const
9207 \since 5.10
9208
9209 Returns the substring of length size() - \a length starting at the
9210 beginning of this object.
9211
9212 Same as \c{left(size() - length)}.
9213
9214 \note The behavior is undefined when \a length < 0 or \a length > size().
9215
9216 \sa mid(), left(), right(), chop(), truncate()
9217*/
9218
9219/*!
9220 \fn void QLatin1String::truncate(qsizetype length)
9221 \since 5.10
9222
9223 Truncates this string to length \a length.
9224
9225 Same as \c{*this = left(length)}.
9226
9227 \note The behavior is undefined when \a length < 0 or \a length > size().
9228
9229 \sa mid(), left(), right(), chopped(), chop()
9230*/
9231
9232/*!
9233 \fn void QLatin1String::chop(qsizetype length)
9234 \since 5.10
9235
9236 Truncates this string by \a length characters.
9237
9238 Same as \c{*this = left(size() - length)}.
9239
9240 \note The behavior is undefined when \a length < 0 or \a length > size().
9241
9242 \sa mid(), left(), right(), chopped(), truncate()
9243*/
9244
9245/*!
9246 \fn QLatin1String QLatin1String::trimmed() const
9247 \since 5.10
9248
9249 Strips leading and trailing whitespace and returns the result.
9250
9251 Whitespace means any character for which QChar::isSpace() returns
9252 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
9253 '\\f', '\\r', and ' '.
9254*/
9255
9256/*! \fn bool QLatin1String::operator==(const QString &other) const
9257
9258 Returns \c true if this string is equal to string \a other;
9259 otherwise returns \c false.
9260
9261 \sa {Comparing Strings}
9262*/
9263
9264/*!
9265 \fn bool QLatin1String::operator==(const char *other) const
9266 \since 4.3
9267 \overload
9268
9269 The \a other const char pointer is converted to a QString using
9270 the QString::fromUtf8() function.
9271
9272 You can disable this operator by defining \c
9273 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9274 can be useful if you want to ensure that all user-visible strings
9275 go through QObject::tr(), for example.
9276
9277 \sa QT_NO_CAST_FROM_ASCII
9278*/
9279
9280/*!
9281 \fn bool QLatin1String::operator==(const QByteArray &other) const
9282 \since 5.0
9283 \overload
9284
9285 The \a other byte array is converted to a QString using
9286 the QString::fromUtf8() function.
9287
9288 You can disable this operator by defining \c
9289 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9290 can be useful if you want to ensure that all user-visible strings
9291 go through QObject::tr(), for example.
9292
9293 \sa QT_NO_CAST_FROM_ASCII
9294*/
9295
9296/*! \fn bool QLatin1String::operator!=(const QString &other) const
9297
9298 Returns \c true if this string is not equal to string \a other;
9299 otherwise returns \c false.
9300
9301 \sa {Comparing Strings}
9302*/
9303
9304/*!
9305 \fn bool QLatin1String::operator!=(const char *other) const
9306 \since 4.3
9307 \overload operator!=()
9308
9309 The \a other const char pointer is converted to a QString using
9310 the QString::fromUtf8() function.
9311
9312 You can disable this operator by defining \c
9313 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9314 can be useful if you want to ensure that all user-visible strings
9315 go through QObject::tr(), for example.
9316
9317 \sa QT_NO_CAST_FROM_ASCII
9318*/
9319
9320/*!
9321 \fn bool QLatin1String::operator!=(const QByteArray &other) const
9322 \since 5.0
9323 \overload operator!=()
9324
9325 The \a other byte array is converted to a QString using
9326 the QString::fromUtf8() function.
9327
9328 You can disable this operator by defining \c
9329 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9330 can be useful if you want to ensure that all user-visible strings
9331 go through QObject::tr(), for example.
9332
9333 \sa QT_NO_CAST_FROM_ASCII
9334*/
9335
9336/*!
9337 \fn bool QLatin1String::operator>(const QString &other) const
9338
9339 Returns \c true if this string is lexically greater than string \a
9340 other; otherwise returns \c false.
9341
9342 \sa {Comparing Strings}
9343*/
9344
9345/*!
9346 \fn bool QLatin1String::operator>(const char *other) const
9347 \since 4.3
9348 \overload
9349
9350 The \a other const char pointer is converted to a QString using
9351 the QString::fromUtf8() function.
9352
9353 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
9354 when you compile your applications. This can be useful if you want
9355 to ensure that all user-visible strings go through QObject::tr(),
9356 for example.
9357
9358 \sa QT_NO_CAST_FROM_ASCII
9359*/
9360
9361/*!
9362 \fn bool QLatin1String::operator>(const QByteArray &other) const
9363 \since 5.0
9364 \overload
9365
9366 The \a other const char pointer is converted to a QString using
9367 the QString::fromUtf8() function.
9368
9369 You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII
9370 when you compile your applications. This can be useful if you want
9371 to ensure that all user-visible strings go through QObject::tr(),
9372 for example.
9373
9374 \sa QT_NO_CAST_FROM_ASCII
9375*/
9376
9377/*!
9378 \fn bool QLatin1String::operator<(const QString &other) const
9379
9380 Returns \c true if this string is lexically less than the \a other
9381 string; otherwise returns \c false.
9382
9383 \sa {Comparing Strings}
9384*/
9385
9386/*!
9387 \fn bool QLatin1String::operator<(const char *other) const
9388 \since 4.3
9389 \overload
9390
9391 The \a other const char pointer is converted to a QString using
9392 the QString::fromUtf8() function.
9393
9394 You can disable this operator by defining \c
9395 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9396 can be useful if you want to ensure that all user-visible strings
9397 go through QObject::tr(), for example.
9398
9399 \sa QT_NO_CAST_FROM_ASCII
9400*/
9401
9402/*!
9403 \fn bool QLatin1String::operator<(const QByteArray &other) const
9404 \since 5.0
9405 \overload
9406
9407 The \a other const char pointer is converted to a QString using
9408 the QString::fromUtf8() function.
9409
9410 You can disable this operator by defining \c
9411 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9412 can be useful if you want to ensure that all user-visible strings
9413 go through QObject::tr(), for example.
9414
9415 \sa QT_NO_CAST_FROM_ASCII
9416*/
9417
9418/*!
9419 \fn bool QLatin1String::operator>=(const QString &other) const
9420
9421 Returns \c true if this string is lexically greater than or equal
9422 to string \a other; otherwise returns \c false.
9423
9424 \sa {Comparing Strings}
9425*/
9426
9427/*!
9428 \fn bool QLatin1String::operator>=(const char *other) const
9429 \since 4.3
9430 \overload
9431
9432 The \a other const char pointer is converted to a QString using
9433 the QString::fromUtf8() function.
9434
9435 You can disable this operator by defining \c
9436 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9437 can be useful if you want to ensure that all user-visible strings
9438 go through QObject::tr(), for example.
9439
9440 \sa QT_NO_CAST_FROM_ASCII
9441*/
9442
9443/*!
9444 \fn bool QLatin1String::operator>=(const QByteArray &other) const
9445 \since 5.0
9446 \overload
9447
9448 The \a other array is converted to a QString using
9449 the QString::fromUtf8() function.
9450
9451 You can disable this operator by defining \c
9452 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9453 can be useful if you want to ensure that all user-visible strings
9454 go through QObject::tr(), for example.
9455
9456 \sa QT_NO_CAST_FROM_ASCII
9457*/
9458
9459/*! \fn bool QLatin1String::operator<=(const QString &other) const
9460
9461 Returns \c true if this string is lexically less than or equal
9462 to string \a other; otherwise returns \c false.
9463
9464 \sa {Comparing Strings}
9465*/
9466
9467/*!
9468 \fn bool QLatin1String::operator<=(const char *other) const
9469 \since 4.3
9470 \overload
9471
9472 The \a other const char pointer is converted to a QString using
9473 the QString::fromUtf8() function.
9474
9475 You can disable this operator by defining \c
9476 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9477 can be useful if you want to ensure that all user-visible strings
9478 go through QObject::tr(), for example.
9479
9480 \sa QT_NO_CAST_FROM_ASCII
9481*/
9482
9483/*!
9484 \fn bool QLatin1String::operator<=(const QByteArray &other) const
9485 \since 5.0
9486 \overload
9487
9488 The \a other array is converted to a QString using
9489 the QString::fromUtf8() function.
9490
9491 You can disable this operator by defining \c
9492 QT_NO_CAST_FROM_ASCII when you compile your applications. This
9493 can be useful if you want to ensure that all user-visible strings
9494 go through QObject::tr(), for example.
9495
9496 \sa QT_NO_CAST_FROM_ASCII
9497*/
9498
9499
9500/*! \fn bool operator==(QLatin1String s1, QLatin1String s2)
9501 \relates QLatin1String
9502
9503 Returns \c true if string \a s1 is lexically equal to string \a s2; otherwise
9504 returns \c false.
9505*/
9506/*! \fn bool operator!=(QLatin1String s1, QLatin1String s2)
9507 \relates QLatin1String
9508
9509 Returns \c true if string \a s1 is lexically unequal to string \a s2; otherwise
9510 returns \c false.
9511*/
9512/*! \fn bool operator<(QLatin1String s1, QLatin1String s2)
9513 \relates QLatin1String
9514
9515 Returns \c true if string \a s1 is lexically smaller than string \a s2; otherwise
9516 returns \c false.
9517*/
9518/*! \fn bool operator<=(QLatin1String s1, QLatin1String s2)
9519 \relates QLatin1String
9520
9521 Returns \c true if string \a s1 is lexically smaller than or equal to string \a s2; otherwise
9522 returns \c false.
9523*/
9524/*! \fn bool operator>(QLatin1String s1, QLatin1String s2)
9525 \relates QLatin1String
9526
9527 Returns \c true if string \a s1 is lexically greater than string \a s2; otherwise
9528 returns \c false.
9529*/
9530/*! \fn bool operator>=(QLatin1String s1, QLatin1String s2)
9531 \relates QLatin1String
9532
9533 Returns \c true if string \a s1 is lexically greater than or equal to
9534 string \a s2; otherwise returns \c false.
9535*/
9536
9537
9538#if !defined(QT_NO_DATASTREAM) || (defined(QT_BOOTSTRAPPED) && !defined(QT_BUILD_QMAKE))
9539/*!
9540 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9541 \relates QString
9542
9543 Writes the given \a string to the specified \a stream.
9544
9545 \sa {Serializing Qt Data Types}
9546*/
9547
9548QDataStream &operator<<(QDataStream &out, const QString &str)
9549{
9550 if (out.version() == 1) {
9551 out << str.toLatin1();
9552 } else {
9553 if (!str.isNull() || out.version() < 3) {
9554 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9555 out.writeBytes(reinterpret_cast<const char *>(str.unicode()), size_t(sizeof(QChar) * str.length()));
9556 } else {
9557 QVarLengthArray<char16_t> buffer(str.length());
9558 qbswap<sizeof(char16_t)>(str.constData(), str.length(), buffer.data());
9559 out.writeBytes(reinterpret_cast<const char *>(buffer.data()), size_t(sizeof(char16_t) * buffer.size()));
9560 }
9561 } else {
9562 // write null marker
9563 out << (quint32)0xffffffff;
9564 }
9565 }
9566 return out;
9567}
9568
9569/*!
9570 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9571 \relates QString
9572
9573 Reads a string from the specified \a stream into the given \a string.
9574
9575 \sa {Serializing Qt Data Types}
9576*/
9577
9578QDataStream &operator>>(QDataStream &in, QString &str)
9579{
9580 if (in.version() == 1) {
9581 QByteArray l;
9582 in >> l;
9583 str = QString::fromLatin1(l);
9584 } else {
9585 quint32 bytes = 0;
9586 in >> bytes; // read size of string
9587 if (bytes == 0xffffffff) { // null string
9588 str.clear();
9589 } else if (bytes > 0) { // not empty
9590 if (bytes & 0x1) {
9591 str.clear();
9592 in.setStatus(QDataStream::ReadCorruptData);
9593 return in;
9594 }
9595
9596 const quint32 Step = 1024 * 1024;
9597 quint32 len = bytes / 2;
9598 quint32 allocated = 0;
9599
9600 while (allocated < len) {
9601 int blockSize = qMin(Step, len - allocated);
9602 str.resize(allocated + blockSize);
9603 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9604 blockSize * 2) != blockSize * 2) {
9605 str.clear();
9606 in.setStatus(QDataStream::ReadPastEnd);
9607 return in;
9608 }
9609 allocated += blockSize;
9610 }
9611
9612 if ((in.byteOrder() == QDataStream::BigEndian)
9613 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9614 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9615 qbswap<sizeof(*data)>(data, len, data);
9616 }
9617 } else {
9618 str = QString(QLatin1String(""));
9619 }
9620 }
9621 return in;
9622}
9623#endif // QT_NO_DATASTREAM
9624
9625/*!
9626 \typedef QString::Data
9627 \internal
9628*/
9629
9630/*!
9631 \typedef QString::DataPtr
9632 \internal
9633*/
9634
9635/*!
9636 \fn DataPtr & QString::data_ptr()
9637 \internal
9638*/
9639
9640/*!
9641 \since 5.11
9642 \internal
9643 \relates QStringView
9644
9645 Returns \c true if the string is read right to left.
9646
9647 \sa QString::isRightToLeft()
9648*/
9649bool QtPrivate::isRightToLeft(QStringView string) noexcept
9650{
9651 const char16_t *p = string.utf16();
9652 const char16_t * const end = p + string.size();
9653 int isolateLevel = 0;
9654 while (p < end) {
9655 uint ucs4 = *p;
9656 if (QChar::isHighSurrogate(ucs4) && p < end - 1) {
9657 char16_t low = p[1];
9658 if (QChar::isLowSurrogate(low)) {
9659 ucs4 = QChar::surrogateToUcs4(ucs4, low);
9660 ++p;
9661 }
9662 }
9663 switch (QChar::direction(ucs4))
9664 {
9665 case QChar::DirRLI:
9666 case QChar::DirLRI:
9667 case QChar::DirFSI:
9668 ++isolateLevel;
9669 break;
9670 case QChar::DirPDI:
9671 if (isolateLevel)
9672 --isolateLevel;
9673 break;
9674 case QChar::DirL:
9675 if (isolateLevel)
9676 break;
9677 return false;
9678 case QChar::DirR:
9679 case QChar::DirAL:
9680 if (isolateLevel)
9681 break;
9682 return true;
9683 default:
9684 break;
9685 }
9686 ++p;
9687 }
9688 return false;
9689}
9690
9691qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9692{
9693 qsizetype num = 0;
9694 qsizetype i = -1;
9695 if (haystack.size() > 500 && needle.size() > 5) {
9696 QStringMatcher matcher(needle, cs);
9697 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9698 ++num;
9699 } else {
9700 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9701 ++num;
9702 }
9703 return num;
9704}
9705
9706qsizetype QtPrivate::count(QStringView haystack, QChar ch, Qt::CaseSensitivity cs) noexcept
9707{
9708 qsizetype num = 0;
9709 if (cs == Qt::CaseSensitive) {
9710 for (QChar c : haystack) {
9711 if (c == ch)
9712 ++num;
9713 }
9714 } else {
9715 ch = foldCase(ch);
9716 for (QChar c : haystack) {
9717 if (foldCase(c) == ch)
9718 ++num;
9719 }
9720 }
9721 return num;
9722}
9723
9724template <typename Haystack, typename Needle>
9725bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
9726{
9727 if (haystack.isNull())
9728 return needle.isNull();
9729 const auto haystackLen = haystack.size();
9730 const auto needleLen = needle.size();
9731 if (haystackLen == 0)
9732 return needleLen == 0;
9733 if (needleLen > haystackLen)
9734 return false;
9735
9736 return QtPrivate::compareStrings(haystack.left(needleLen), needle, cs) == 0;
9737}
9738
9739static inline bool qt_starts_with(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9740{
9741 return qt_starts_with_impl(haystack, needle, cs);
9742}
9743
9744static inline bool qt_starts_with(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9745{
9746 return qt_starts_with_impl(haystack, needle, cs);
9747}
9748
9749static inline bool qt_starts_with(QStringView haystack, QChar needle, Qt::CaseSensitivity cs)
9750{
9751 return haystack.size()
9752 && (cs == Qt::CaseSensitive ? haystack.front() == needle
9753 : foldCase(haystack.front()) == foldCase(needle));
9754}
9755
9756/*!
9757 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9758 \since 5.10
9759 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9760 \since 5.10
9761 \fn bool QtPrivate::startsWith(QLatin1String haystack, QStringView needle, Qt::CaseSensitivity cs)
9762 \since 5.10
9763 \fn bool QtPrivate::startsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9764 \since 5.10
9765 \internal
9766 \relates QStringView
9767
9768 Returns \c true if \a haystack starts with \a needle,
9769 otherwise returns \c false.
9770
9771 If \a cs is Qt::CaseSensitive (the default), the search is case-sensitive;
9772 otherwise the search is case-insensitive.
9773
9774 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1String::endsWith()
9775*/
9776
9777bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9778{
9779 return qt_starts_with_impl(haystack, needle, cs);
9780}
9781
9782bool QtPrivate::startsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
9783{
9784 return qt_starts_with_impl(haystack, needle, cs);
9785}
9786
9787bool QtPrivate::startsWith(QLatin1String haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9788{
9789 return qt_starts_with_impl(haystack, needle, cs);
9790}
9791
9792bool QtPrivate::startsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
9793{
9794 return qt_starts_with_impl(haystack, needle, cs);
9795}
9796
9797template <typename Haystack, typename Needle>
9798bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
9799{
9800 if (haystack.isNull())
9801 return needle.isNull();
9802 const auto haystackLen = haystack.size();
9803 const auto needleLen = needle.size();
9804 if (haystackLen == 0)
9805 return needleLen == 0;
9806 if (haystackLen < needleLen)
9807 return false;
9808
9809 return QtPrivate::compareStrings(haystack.right(needleLen), needle, cs) == 0;
9810}
9811
9812static inline bool qt_ends_with(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9813{
9814 return qt_ends_with_impl(haystack, needle, cs);
9815}
9816
9817static inline bool qt_ends_with(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9818{
9819 return qt_ends_with_impl(haystack, needle, cs);
9820}
9821
9822static inline bool qt_ends_with(QStringView haystack, QChar needle, Qt::CaseSensitivity cs)
9823{
9824 return haystack.size()
9825 && (cs == Qt::CaseSensitive ? haystack.back() == needle
9826 : foldCase(haystack.back()) == foldCase(needle));
9827}
9828
9829/*!
9830 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9831 \since 5.10
9832 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9833 \since 5.10
9834 \fn bool QtPrivate::endsWith(QLatin1String haystack, QStringView needle, Qt::CaseSensitivity cs)
9835 \since 5.10
9836 \fn bool QtPrivate::endsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseSensitivity cs)
9837 \since 5.10
9838 \internal
9839 \relates QStringView
9840
9841 Returns \c true if \a haystack ends with \a needle,
9842 otherwise returns \c false.
9843
9844 If \a cs is Qt::CaseSensitive (the default), the search is case-sensitive;
9845 otherwise the search is case-insensitive.
9846
9847 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1String::endsWith()
9848*/
9849
9850bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9851{
9852 return qt_ends_with_impl(haystack, needle, cs);
9853}
9854
9855bool QtPrivate::endsWith(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
9856{
9857 return qt_ends_with_impl(haystack, needle, cs);
9858}
9859
9860bool QtPrivate::endsWith(QLatin1String haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9861{
9862 return qt_ends_with_impl(haystack, needle, cs);
9863}
9864
9865bool QtPrivate::endsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
9866{
9867 return qt_ends_with_impl(haystack, needle, cs);
9868}
9869
9870namespace {
9871template <typename Pointer>
9872char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
9873
9874template <>
9875char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
9876{
9877 return foldCase(reinterpret_cast<const char16_t*>(ch),
9878 reinterpret_cast<const char16_t*>(start));
9879}
9880
9881template <>
9882char32_t foldCaseHelper<const char*>(const char* ch, const char*)
9883{
9884 return foldCase(char16_t(uchar(*ch)));
9885}
9886
9887template <typename T>
9888char16_t valueTypeToUtf16(T t) = delete;
9889
9890template <>
9891char16_t valueTypeToUtf16<QChar>(QChar t)
9892{
9893 return t.unicode();
9894}
9895
9896template <>
9897char16_t valueTypeToUtf16<char>(char t)
9898{
9899 return char16_t{uchar(t)};
9900}
9901}
9902
9903/*!
9904 \internal
9905
9906 Returns the index position of the first occurrence of the
9907 character \a ch in the string given by \a str and \a len,
9908 searching forward from index
9909 position \a from. Returns -1 if \a ch could not be found.
9910*/
9911
9912static inline qsizetype qFindChar(QStringView str, QChar ch, qsizetype from, Qt::CaseSensitivity cs) noexcept
9913{
9914 if (from < 0)
9915 from = qMax(from + str.size(), qsizetype(0));
9916 if (from < str.size()) {
9917 const char16_t *s = str.utf16();
9918 char16_t c = ch.unicode();
9919 const char16_t *n = s + from;
9920 const char16_t *e = s + str.size();
9921 if (cs == Qt::CaseSensitive) {
9922 n = QtPrivate::qustrchr(QStringView(n, e), c);
9923 if (n != e)
9924 return n - s;
9925 } else {
9926 c = foldCase(c);
9927 --n;
9928 while (++n != e)
9929 if (foldCase(*n) == c)
9930 return n - s;
9931 }
9932 }
9933 return -1;
9934}
9935
9936qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9937{
9938 const qsizetype l = haystack0.size();
9939 const qsizetype sl = needle0.size();
9940 if (from < 0)
9941 from += l;
9942 if (std::size_t(sl + from) > std::size_t(l))
9943 return -1;
9944 if (!sl)
9945 return from;
9946 if (!l)
9947 return -1;
9948
9949 if (sl == 1)
9950 return qFindChar(haystack0, needle0[0], from, cs);
9951
9952 /*
9953 We use the Boyer-Moore algorithm in cases where the overhead
9954 for the skip table should pay off, otherwise we use a simple
9955 hash function.
9956 */
9957 if (l > 500 && sl > 5)
9958 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9959
9960 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9961 /*
9962 We use some hashing for efficiency's sake. Instead of
9963 comparing strings, we compare the hash value of str with that
9964 of a part of this QString. Only if that matches, we call
9965 qt_string_compare().
9966 */
9967 const char16_t *needle = needle0.utf16();
9968 const char16_t *haystack = haystack0.utf16() + from;
9969 const char16_t *end = haystack0.utf16() + (l - sl);
9970 const std::size_t sl_minus_1 = sl - 1;
9971 std::size_t hashNeedle = 0, hashHaystack = 0;
9972 qsizetype idx;
9973
9974 if (cs == Qt::CaseSensitive) {
9975 for (idx = 0; idx < sl; ++idx) {
9976 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9977 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9978 }
9979 hashHaystack -= haystack[sl_minus_1];
9980
9981 while (haystack <= end) {
9982 hashHaystack += haystack[sl_minus_1];
9983 if (hashHaystack == hashNeedle
9984 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9985 return haystack - haystack0.utf16();
9986
9987 REHASH(*haystack);
9988 ++haystack;
9989 }
9990 } else {
9991 const char16_t *haystack_start = haystack0.utf16();
9992 for (idx = 0; idx < sl; ++idx) {
9993 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9994 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9995 }
9996 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9997
9998 while (haystack <= end) {
9999 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
10000 if (hashHaystack == hashNeedle
10001 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
10002 return haystack - haystack0.utf16();
10003
10004 REHASH(foldCase(haystack, haystack_start));
10005 ++haystack;
10006 }
10007 }
10008 return -1;
10009}
10010
10011template <typename Haystack>
10012static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
10013 qsizetype from, Qt::CaseSensitivity cs) noexcept
10014{
10015 if (from < 0)
10016 from += haystack.size();
10017 if (std::size_t(from) >= std::size_t(haystack.size()))
10018 return -1;
10019 if (from >= 0) {
10020 char16_t c = needle.unicode();
10021 const auto b = haystack.data();
10022 auto n = b + from;
10023 if (cs == Qt::CaseSensitive) {
10024 for (; n >= b; --n)
10025 if (valueTypeToUtf16(*n) == c)
10026 return n - b;
10027 } else {
10028 c = foldCase(c);
10029 for (; n >= b; --n)
10030 if (foldCase(valueTypeToUtf16(*n)) == c)
10031 return n - b;
10032 }
10033 }
10034 return -1;
10035}
10036
10037template<typename Haystack, typename Needle>
10038static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
10039 Needle needle0, Qt::CaseSensitivity cs) noexcept
10040{
10041 const qsizetype sl = needle0.size();
10042 if (sl == 1)
10043 return qLastIndexOf(haystack0, needle0.front(), from, cs);
10044
10045 const qsizetype l = haystack0.size();
10046 if (from < 0)
10047 from += l;
10048 if (from == l && sl == 0)
10049 return from;
10050 const qsizetype delta = l - sl;
10051 if (std::size_t(from) >= std::size_t(l) || delta < 0)
10052 return -1;
10053 if (from > delta)
10054 from = delta;
10055
10056 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
10057
10058 auto haystack = haystack0.data();
10059 const auto needle = needle0.data();
10060 const auto *end = haystack;
10061 haystack += from;
10062 const std::size_t sl_minus_1 = sl - 1;
10063 const auto *n = needle + sl_minus_1;
10064 const auto *h = haystack + sl_minus_1;
10065 std::size_t hashNeedle = 0, hashHaystack = 0;
10066 qsizetype idx;
10067
10068 if (cs == Qt::CaseSensitive) {
10069 for (idx = 0; idx < sl; ++idx) {
10070 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
10071 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
10072 }
10073 hashHaystack -= valueTypeToUtf16(*haystack);
10074
10075 while (haystack >= end) {
10076 hashHaystack += valueTypeToUtf16(*haystack);
10077 if (hashHaystack == hashNeedle
10078 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
10079 return haystack - end;
10080 --haystack;
10081 REHASH(valueTypeToUtf16(haystack[sl]));
10082 }
10083 } else {
10084 for (idx = 0; idx < sl; ++idx) {
10085 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
10086 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
10087 }
10088 hashHaystack -= foldCaseHelper(haystack, end);
10089
10090 while (haystack >= end) {
10091 hashHaystack += foldCaseHelper(haystack, end);
10092 if (hashHaystack == hashNeedle
10093 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
10094 return haystack - end;
10095 --haystack;
10096 REHASH(foldCaseHelper(haystack + sl, end));
10097 }
10098 }
10099 return -1;
10100}
10101
10102qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
10103{
10104 if (haystack.size() < needle.size())
10105 return -1;
10106
10107 QVarLengthArray<char16_t> s(needle.size());
10108 qt_from_latin1(s.data(), needle.latin1(), needle.size());
10109 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
10110}
10111
10112qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10113{
10114 if (haystack.size() < needle.size())
10115 return -1;
10116
10117 QVarLengthArray<char16_t> s(haystack.size());
10118 qt_from_latin1(s.data(), haystack.latin1(), haystack.size());
10119 return QtPrivate::findString(QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), from, needle, cs);
10120}
10121
10122qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
10123{
10124 if (haystack.size() < needle.size())
10125 return -1;
10126
10127 QVarLengthArray<char16_t> h(haystack.size());
10128 qt_from_latin1(h.data(), haystack.latin1(), haystack.size());
10129 QVarLengthArray<char16_t> n(needle.size());
10130 qt_from_latin1(n.data(), needle.latin1(), needle.size());
10131 return QtPrivate::findString(QStringView(reinterpret_cast<const QChar*>(h.constData()), h.size()), from,
10132 QStringView(reinterpret_cast<const QChar*>(n.constData()), n.size()), cs);
10133}
10134
10135qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10136{
10137 return qLastIndexOf(haystack, from, needle, cs);
10138}
10139
10140qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
10141{
10142 return qLastIndexOf(haystack, from, needle, cs);
10143}
10144
10145qsizetype QtPrivate::lastIndexOf(QLatin1String haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10146{
10147 return qLastIndexOf(haystack, from, needle, cs);
10148}
10149
10150qsizetype QtPrivate::lastIndexOf(QLatin1String haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
10151{
10152 return qLastIndexOf(haystack, from, needle, cs);
10153}
10154
10155/*!
10156 \since 5.0
10157
10158 Converts a plain text string to an HTML string with
10159 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10160 entities.
10161
10162 Example:
10163
10164 \snippet code/src_corelib_text_qstring.cpp 7
10165*/
10166QString QString::toHtmlEscaped() const
10167{
10168 QString rich;
10169 const int len = length();
10170 rich.reserve(qsizetype(len * 1.1));
10171 for (int i = 0; i < len; ++i) {
10172 if (at(i) == QLatin1Char('<'))
10173 rich += QLatin1String("&lt;");
10174 else if (at(i) == QLatin1Char('>'))
10175 rich += QLatin1String("&gt;");
10176 else if (at(i) == QLatin1Char('&'))
10177 rich += QLatin1String("&amp;");
10178 else if (at(i) == QLatin1Char('"'))
10179 rich += QLatin1String("&quot;");
10180 else
10181 rich += at(i);
10182 }
10183 rich.squeeze();
10184 return rich;
10185}
10186
10187/*!
10188 \macro QStringLiteral(str)
10189 \relates QString
10190
10191 The macro generates the data for a QString out of the string literal \a str
10192 at compile time. Creating a QString from it is free in this case, and the
10193 generated string data is stored in the read-only segment of the compiled
10194 object file.
10195
10196 If you have code that looks like this:
10197
10198 \snippet code/src_corelib_text_qstring.cpp 9
10199
10200 then a temporary QString will be created to be passed as the \c{hasAttribute}
10201 function parameter. This can be quite expensive, as it involves a memory
10202 allocation and the copy/conversion of the data into QString's internal
10203 encoding.
10204
10205 This cost can be avoided by using QStringLiteral instead:
10206
10207 \snippet code/src_corelib_text_qstring.cpp 10
10208
10209 In this case, QString's internal data will be generated at compile time; no
10210 conversion or allocation will occur at runtime.
10211
10212 Using QStringLiteral instead of a double quoted plain C++ string literal can
10213 significantly speed up creation of QString instances from data known at
10214 compile time.
10215
10216 \note QLatin1String can still be more efficient than QStringLiteral
10217 when the string is passed to a function that has an overload taking
10218 QLatin1String and this overload avoids conversion to QString. For
10219 instance, QString::operator==() can compare to a QLatin1String
10220 directly:
10221
10222 \snippet code/src_corelib_text_qstring.cpp 11
10223
10224 \note Some compilers have bugs encoding strings containing characters outside
10225 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10226 those cases. It is optional otherwise.
10227
10228 \sa QByteArrayLiteral
10229*/
10230
10231/*!
10232 \internal
10233 */
10234void QAbstractConcatenable::appendLatin1To(QLatin1String in, QChar *out) noexcept
10235{
10236 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10237}
10238
10239double QStringView::toDouble(bool *ok) const
10240{
10241 return QLocaleData::c()->stringToDouble(*this, ok, QLocale::RejectGroupSeparator);
10242}
10243
10244float QStringView::toFloat(bool *ok) const
10245{
10246 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
10247}
10248
10249QT_END_NAMESPACE
10250