1/**************************************************************************/
2/* ustring.cpp */
3/**************************************************************************/
4/* This file is part of: */
5/* GODOT ENGINE */
6/* https://godotengine.org */
7/**************************************************************************/
8/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10/* */
11/* Permission is hereby granted, free of charge, to any person obtaining */
12/* a copy of this software and associated documentation files (the */
13/* "Software"), to deal in the Software without restriction, including */
14/* without limitation the rights to use, copy, modify, merge, publish, */
15/* distribute, sublicense, and/or sell copies of the Software, and to */
16/* permit persons to whom the Software is furnished to do so, subject to */
17/* the following conditions: */
18/* */
19/* The above copyright notice and this permission notice shall be */
20/* included in all copies or substantial portions of the Software. */
21/* */
22/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29/**************************************************************************/
30
31#include "ustring.h"
32
33#include "core/crypto/crypto_core.h"
34#include "core/math/color.h"
35#include "core/math/math_funcs.h"
36#include "core/os/memory.h"
37#include "core/string/print_string.h"
38#include "core/string/string_name.h"
39#include "core/string/translation.h"
40#include "core/string/ucaps.h"
41#include "core/variant/variant.h"
42#include "core/version_generated.gen.h"
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <cstdint>
47
48#ifdef _MSC_VER
49#define _CRT_SECURE_NO_WARNINGS // to disable build-time warning which suggested to use strcpy_s instead strcpy
50#endif
51
52#if defined(MINGW_ENABLED) || defined(_MSC_VER)
53#define snprintf _snprintf_s
54#endif
55
56static const int MAX_DECIMALS = 32;
57
58static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
59 return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
60}
61
62const char CharString::_null = 0;
63const char16_t Char16String::_null = 0;
64const char32_t String::_null = 0;
65const char32_t String::_replacement_char = 0xfffd;
66
67bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
68 const String &s = p_s;
69 int beg = CLAMP(p_col, 0, s.length());
70 int end = beg;
71
72 if (s[beg] > 32 || beg == s.length()) {
73 bool symbol = beg < s.length() && is_symbol(s[beg]);
74
75 while (beg > 0 && s[beg - 1] > 32 && (symbol == is_symbol(s[beg - 1]))) {
76 beg--;
77 }
78 while (end < s.length() && s[end + 1] > 32 && (symbol == is_symbol(s[end + 1]))) {
79 end++;
80 }
81
82 if (end < s.length()) {
83 end += 1;
84 }
85
86 r_beg = beg;
87 r_end = end;
88
89 return true;
90 } else {
91 return false;
92 }
93}
94
95/*************************************************************************/
96/* Char16String */
97/*************************************************************************/
98
99bool Char16String::operator<(const Char16String &p_right) const {
100 if (length() == 0) {
101 return p_right.length() != 0;
102 }
103
104 return is_str_less(get_data(), p_right.get_data());
105}
106
107Char16String &Char16String::operator+=(char16_t p_char) {
108 const int lhs_len = length();
109 resize(lhs_len + 2);
110
111 char16_t *dst = ptrw();
112 dst[lhs_len] = p_char;
113 dst[lhs_len + 1] = 0;
114
115 return *this;
116}
117
118void Char16String::operator=(const char16_t *p_cstr) {
119 copy_from(p_cstr);
120}
121
122const char16_t *Char16String::get_data() const {
123 if (size()) {
124 return &operator[](0);
125 } else {
126 return u"";
127 }
128}
129
130void Char16String::copy_from(const char16_t *p_cstr) {
131 if (!p_cstr) {
132 resize(0);
133 return;
134 }
135
136 const char16_t *s = p_cstr;
137 for (; *s; s++) {
138 }
139 size_t len = s - p_cstr;
140
141 if (len == 0) {
142 resize(0);
143 return;
144 }
145
146 Error err = resize(++len); // include terminating null char
147
148 ERR_FAIL_COND_MSG(err != OK, "Failed to copy char16_t string.");
149
150 memcpy(ptrw(), p_cstr, len * sizeof(char16_t));
151}
152
153/*************************************************************************/
154/* CharString */
155/*************************************************************************/
156
157bool CharString::operator<(const CharString &p_right) const {
158 if (length() == 0) {
159 return p_right.length() != 0;
160 }
161
162 return is_str_less(get_data(), p_right.get_data());
163}
164
165bool CharString::operator==(const CharString &p_right) const {
166 if (length() == 0) {
167 // True if both have length 0, false if only p_right has a length
168 return p_right.length() == 0;
169 } else if (p_right.length() == 0) {
170 // False due to unequal length
171 return false;
172 }
173
174 return strcmp(ptr(), p_right.ptr()) == 0;
175}
176
177CharString &CharString::operator+=(char p_char) {
178 const int lhs_len = length();
179 resize(lhs_len + 2);
180
181 char *dst = ptrw();
182 dst[lhs_len] = p_char;
183 dst[lhs_len + 1] = 0;
184
185 return *this;
186}
187
188void CharString::operator=(const char *p_cstr) {
189 copy_from(p_cstr);
190}
191
192const char *CharString::get_data() const {
193 if (size()) {
194 return &operator[](0);
195 } else {
196 return "";
197 }
198}
199
200void CharString::copy_from(const char *p_cstr) {
201 if (!p_cstr) {
202 resize(0);
203 return;
204 }
205
206 size_t len = strlen(p_cstr);
207
208 if (len == 0) {
209 resize(0);
210 return;
211 }
212
213 Error err = resize(++len); // include terminating null char
214
215 ERR_FAIL_COND_MSG(err != OK, "Failed to copy C-string.");
216
217 memcpy(ptrw(), p_cstr, len);
218}
219
220/*************************************************************************/
221/* String */
222/*************************************************************************/
223
224Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const {
225 // Splits the URL into scheme, host, port, path. Strip credentials when present.
226 String base = *this;
227 r_scheme = "";
228 r_host = "";
229 r_port = 0;
230 r_path = "";
231 int pos = base.find("://");
232 // Scheme
233 if (pos != -1) {
234 r_scheme = base.substr(0, pos + 3).to_lower();
235 base = base.substr(pos + 3, base.length() - pos - 3);
236 }
237 pos = base.find("/");
238 // Path
239 if (pos != -1) {
240 r_path = base.substr(pos, base.length() - pos);
241 base = base.substr(0, pos);
242 }
243 // Host
244 pos = base.find("@");
245 if (pos != -1) {
246 // Strip credentials
247 base = base.substr(pos + 1, base.length() - pos - 1);
248 }
249 if (base.begins_with("[")) {
250 // Literal IPv6
251 pos = base.rfind("]");
252 if (pos == -1) {
253 return ERR_INVALID_PARAMETER;
254 }
255 r_host = base.substr(1, pos - 1);
256 base = base.substr(pos + 1, base.length() - pos - 1);
257 } else {
258 // Anything else
259 if (base.get_slice_count(":") > 2) {
260 return ERR_INVALID_PARAMETER;
261 }
262 pos = base.rfind(":");
263 if (pos == -1) {
264 r_host = base;
265 base = "";
266 } else {
267 r_host = base.substr(0, pos);
268 base = base.substr(pos, base.length() - pos);
269 }
270 }
271 if (r_host.is_empty()) {
272 return ERR_INVALID_PARAMETER;
273 }
274 r_host = r_host.to_lower();
275 // Port
276 if (base.begins_with(":")) {
277 base = base.substr(1, base.length() - 1);
278 if (!base.is_valid_int()) {
279 return ERR_INVALID_PARAMETER;
280 }
281 r_port = base.to_int();
282 if (r_port < 1 || r_port > 65535) {
283 return ERR_INVALID_PARAMETER;
284 }
285 }
286 return OK;
287}
288
289void String::copy_from(const char *p_cstr) {
290 // copy Latin-1 encoded c-string directly
291 if (!p_cstr) {
292 resize(0);
293 return;
294 }
295
296 const size_t len = strlen(p_cstr);
297
298 if (len == 0) {
299 resize(0);
300 return;
301 }
302
303 resize(len + 1); // include 0
304
305 char32_t *dst = this->ptrw();
306
307 for (size_t i = 0; i <= len; i++) {
308 uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
309 if (c == 0 && i < len) {
310 print_unicode_error("NUL character", true);
311 dst[i] = _replacement_char;
312 } else {
313 dst[i] = c;
314 }
315 }
316}
317
318void String::copy_from(const char *p_cstr, const int p_clip_to) {
319 // copy Latin-1 encoded c-string directly
320 if (!p_cstr) {
321 resize(0);
322 return;
323 }
324
325 int len = 0;
326 const char *ptr = p_cstr;
327 while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
328 len++;
329 }
330
331 if (len == 0) {
332 resize(0);
333 return;
334 }
335
336 resize(len + 1); // include 0
337
338 char32_t *dst = this->ptrw();
339
340 for (int i = 0; i < len; i++) {
341 uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
342 if (c == 0) {
343 print_unicode_error("NUL character", true);
344 dst[i] = _replacement_char;
345 } else {
346 dst[i] = c;
347 }
348 }
349 dst[len] = 0;
350}
351
352void String::copy_from(const wchar_t *p_cstr) {
353#ifdef WINDOWS_ENABLED
354 // wchar_t is 16-bit, parse as UTF-16
355 parse_utf16((const char16_t *)p_cstr);
356#else
357 // wchar_t is 32-bit, copy directly
358 copy_from((const char32_t *)p_cstr);
359#endif
360}
361
362void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
363#ifdef WINDOWS_ENABLED
364 // wchar_t is 16-bit, parse as UTF-16
365 parse_utf16((const char16_t *)p_cstr, p_clip_to);
366#else
367 // wchar_t is 32-bit, copy directly
368 copy_from((const char32_t *)p_cstr, p_clip_to);
369#endif
370}
371
372void String::copy_from(const char32_t &p_char) {
373 if (p_char == 0) {
374 print_unicode_error("NUL character", true);
375 return;
376 }
377
378 resize(2);
379
380 char32_t *dst = ptrw();
381
382 if ((p_char & 0xfffff800) == 0xd800) {
383 print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
384 dst[0] = _replacement_char;
385 } else if (p_char > 0x10ffff) {
386 print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
387 dst[0] = _replacement_char;
388 } else {
389 dst[0] = p_char;
390 }
391
392 dst[1] = 0;
393}
394
395void String::copy_from(const char32_t *p_cstr) {
396 if (!p_cstr) {
397 resize(0);
398 return;
399 }
400
401 int len = 0;
402 const char32_t *ptr = p_cstr;
403 while (*(ptr++) != 0) {
404 len++;
405 }
406
407 if (len == 0) {
408 resize(0);
409 return;
410 }
411
412 copy_from_unchecked(p_cstr, len);
413}
414
415void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
416 if (!p_cstr) {
417 resize(0);
418 return;
419 }
420
421 int len = 0;
422 const char32_t *ptr = p_cstr;
423 while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
424 len++;
425 }
426
427 if (len == 0) {
428 resize(0);
429 return;
430 }
431
432 copy_from_unchecked(p_cstr, len);
433}
434
435// assumes the following have already been validated:
436// p_char != nullptr
437// p_length > 0
438// p_length <= p_char strlen
439void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
440 resize(p_length + 1);
441 char32_t *dst = ptrw();
442 dst[p_length] = 0;
443
444 for (int i = 0; i < p_length; i++) {
445 if (p_char[i] == 0) {
446 print_unicode_error("NUL character", true);
447 dst[i] = _replacement_char;
448 continue;
449 }
450 if ((p_char[i] & 0xfffff800) == 0xd800) {
451 print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
452 dst[i] = _replacement_char;
453 continue;
454 }
455 if (p_char[i] > 0x10ffff) {
456 print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
457 dst[i] = _replacement_char;
458 continue;
459 }
460 dst[i] = p_char[i];
461 }
462}
463
464void String::operator=(const char *p_str) {
465 copy_from(p_str);
466}
467
468void String::operator=(const char32_t *p_str) {
469 copy_from(p_str);
470}
471
472void String::operator=(const wchar_t *p_str) {
473 copy_from(p_str);
474}
475
476String String::operator+(const String &p_str) const {
477 String res = *this;
478 res += p_str;
479 return res;
480}
481
482String String::operator+(char32_t p_char) const {
483 String res = *this;
484 res += p_char;
485 return res;
486}
487
488String operator+(const char *p_chr, const String &p_str) {
489 String tmp = p_chr;
490 tmp += p_str;
491 return tmp;
492}
493
494String operator+(const wchar_t *p_chr, const String &p_str) {
495#ifdef WINDOWS_ENABLED
496 // wchar_t is 16-bit
497 String tmp = String::utf16((const char16_t *)p_chr);
498#else
499 // wchar_t is 32-bit
500 String tmp = (const char32_t *)p_chr;
501#endif
502 tmp += p_str;
503 return tmp;
504}
505
506String operator+(char32_t p_chr, const String &p_str) {
507 return (String::chr(p_chr) + p_str);
508}
509
510String &String::operator+=(const String &p_str) {
511 const int lhs_len = length();
512 if (lhs_len == 0) {
513 *this = p_str;
514 return *this;
515 }
516
517 const int rhs_len = p_str.length();
518 if (rhs_len == 0) {
519 return *this;
520 }
521
522 resize(lhs_len + rhs_len + 1);
523
524 const char32_t *src = p_str.ptr();
525 char32_t *dst = ptrw() + lhs_len;
526
527 // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
528 memcpy(dst, src, rhs_len * sizeof(char32_t));
529 *(dst + rhs_len) = _null;
530
531 return *this;
532}
533
534String &String::operator+=(const char *p_str) {
535 if (!p_str || p_str[0] == 0) {
536 return *this;
537 }
538
539 const int lhs_len = length();
540 const size_t rhs_len = strlen(p_str);
541
542 resize(lhs_len + rhs_len + 1);
543
544 char32_t *dst = ptrw() + lhs_len;
545
546 for (size_t i = 0; i <= rhs_len; i++) {
547 uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
548 if (c == 0 && i < rhs_len) {
549 print_unicode_error("NUL character", true);
550 dst[i] = _replacement_char;
551 } else {
552 dst[i] = c;
553 }
554 }
555
556 return *this;
557}
558
559String &String::operator+=(const wchar_t *p_str) {
560#ifdef WINDOWS_ENABLED
561 // wchar_t is 16-bit
562 *this += String::utf16((const char16_t *)p_str);
563#else
564 // wchar_t is 32-bit
565 *this += String((const char32_t *)p_str);
566#endif
567 return *this;
568}
569
570String &String::operator+=(const char32_t *p_str) {
571 *this += String(p_str);
572 return *this;
573}
574
575String &String::operator+=(char32_t p_char) {
576 if (p_char == 0) {
577 print_unicode_error("NUL character", true);
578 return *this;
579 }
580
581 const int lhs_len = length();
582 resize(lhs_len + 2);
583 char32_t *dst = ptrw();
584
585 if ((p_char & 0xfffff800) == 0xd800) {
586 print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
587 dst[lhs_len] = _replacement_char;
588 } else if (p_char > 0x10ffff) {
589 print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
590 dst[lhs_len] = _replacement_char;
591 } else {
592 dst[lhs_len] = p_char;
593 }
594
595 dst[lhs_len + 1] = 0;
596
597 return *this;
598}
599
600bool String::operator==(const char *p_str) const {
601 // compare Latin-1 encoded c-string
602 int len = 0;
603 const char *aux = p_str;
604
605 while (*(aux++) != 0) {
606 len++;
607 }
608
609 if (length() != len) {
610 return false;
611 }
612 if (is_empty()) {
613 return true;
614 }
615
616 int l = length();
617
618 const char32_t *dst = get_data();
619
620 // Compare char by char
621 for (int i = 0; i < l; i++) {
622 if ((char32_t)p_str[i] != dst[i]) {
623 return false;
624 }
625 }
626
627 return true;
628}
629
630bool String::operator==(const wchar_t *p_str) const {
631#ifdef WINDOWS_ENABLED
632 // wchar_t is 16-bit, parse as UTF-16
633 return *this == String::utf16((const char16_t *)p_str);
634#else
635 // wchar_t is 32-bit, compare char by char
636 return *this == (const char32_t *)p_str;
637#endif
638}
639
640bool String::operator==(const char32_t *p_str) const {
641 int len = 0;
642 const char32_t *aux = p_str;
643
644 while (*(aux++) != 0) {
645 len++;
646 }
647
648 if (length() != len) {
649 return false;
650 }
651 if (is_empty()) {
652 return true;
653 }
654
655 int l = length();
656
657 const char32_t *dst = get_data();
658
659 /* Compare char by char */
660 for (int i = 0; i < l; i++) {
661 if (p_str[i] != dst[i]) {
662 return false;
663 }
664 }
665
666 return true;
667}
668
669bool String::operator==(const String &p_str) const {
670 if (length() != p_str.length()) {
671 return false;
672 }
673 if (is_empty()) {
674 return true;
675 }
676
677 int l = length();
678
679 const char32_t *src = get_data();
680 const char32_t *dst = p_str.get_data();
681
682 /* Compare char by char */
683 for (int i = 0; i < l; i++) {
684 if (src[i] != dst[i]) {
685 return false;
686 }
687 }
688
689 return true;
690}
691
692bool String::operator==(const StrRange &p_str_range) const {
693 int len = p_str_range.len;
694
695 if (length() != len) {
696 return false;
697 }
698 if (is_empty()) {
699 return true;
700 }
701
702 const char32_t *c_str = p_str_range.c_str;
703 const char32_t *dst = &operator[](0);
704
705 /* Compare char by char */
706 for (int i = 0; i < len; i++) {
707 if (c_str[i] != dst[i]) {
708 return false;
709 }
710 }
711
712 return true;
713}
714
715bool operator==(const char *p_chr, const String &p_str) {
716 return p_str == p_chr;
717}
718
719bool operator==(const wchar_t *p_chr, const String &p_str) {
720#ifdef WINDOWS_ENABLED
721 // wchar_t is 16-bit
722 return p_str == String::utf16((const char16_t *)p_chr);
723#else
724 // wchar_t is 32-bi
725 return p_str == String((const char32_t *)p_chr);
726#endif
727}
728
729bool operator!=(const char *p_chr, const String &p_str) {
730 return !(p_str == p_chr);
731}
732
733bool operator!=(const wchar_t *p_chr, const String &p_str) {
734#ifdef WINDOWS_ENABLED
735 // wchar_t is 16-bit
736 return !(p_str == String::utf16((const char16_t *)p_chr));
737#else
738 // wchar_t is 32-bi
739 return !(p_str == String((const char32_t *)p_chr));
740#endif
741}
742
743bool String::operator!=(const char *p_str) const {
744 return (!(*this == p_str));
745}
746
747bool String::operator!=(const wchar_t *p_str) const {
748 return (!(*this == p_str));
749}
750
751bool String::operator!=(const char32_t *p_str) const {
752 return (!(*this == p_str));
753}
754
755bool String::operator!=(const String &p_str) const {
756 return !((*this == p_str));
757}
758
759bool String::operator<=(const String &p_str) const {
760 return !(p_str < *this);
761}
762
763bool String::operator>(const String &p_str) const {
764 return p_str < *this;
765}
766
767bool String::operator>=(const String &p_str) const {
768 return !(*this < p_str);
769}
770
771bool String::operator<(const char *p_str) const {
772 if (is_empty() && p_str[0] == 0) {
773 return false;
774 }
775 if (is_empty()) {
776 return true;
777 }
778 return is_str_less(get_data(), p_str);
779}
780
781bool String::operator<(const wchar_t *p_str) const {
782 if (is_empty() && p_str[0] == 0) {
783 return false;
784 }
785 if (is_empty()) {
786 return true;
787 }
788
789#ifdef WINDOWS_ENABLED
790 // wchar_t is 16-bit
791 return is_str_less(get_data(), String::utf16((const char16_t *)p_str).get_data());
792#else
793 // wchar_t is 32-bit
794 return is_str_less(get_data(), (const char32_t *)p_str);
795#endif
796}
797
798bool String::operator<(const char32_t *p_str) const {
799 if (is_empty() && p_str[0] == 0) {
800 return false;
801 }
802 if (is_empty()) {
803 return true;
804 }
805
806 return is_str_less(get_data(), p_str);
807}
808
809bool String::operator<(const String &p_str) const {
810 return operator<(p_str.get_data());
811}
812
813signed char String::nocasecmp_to(const String &p_str) const {
814 if (is_empty() && p_str.is_empty()) {
815 return 0;
816 }
817 if (is_empty()) {
818 return -1;
819 }
820 if (p_str.is_empty()) {
821 return 1;
822 }
823
824 const char32_t *that_str = p_str.get_data();
825 const char32_t *this_str = get_data();
826
827 while (true) {
828 if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
829 return 0;
830 } else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
831 return -1;
832 } else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
833 return 1;
834 } else if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
835 return -1;
836 } else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
837 return 1;
838 }
839
840 this_str++;
841 that_str++;
842 }
843}
844
845signed char String::casecmp_to(const String &p_str) const {
846 if (is_empty() && p_str.is_empty()) {
847 return 0;
848 }
849 if (is_empty()) {
850 return -1;
851 }
852 if (p_str.is_empty()) {
853 return 1;
854 }
855
856 const char32_t *that_str = p_str.get_data();
857 const char32_t *this_str = get_data();
858
859 while (true) {
860 if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
861 return 0;
862 } else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
863 return -1;
864 } else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
865 return 1;
866 } else if (*this_str < *that_str) { // If current character in this is less, we are less.
867 return -1;
868 } else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
869 return 1;
870 }
871
872 this_str++;
873 that_str++;
874 }
875}
876
877static _FORCE_INLINE_ signed char natural_cmp_common(const char32_t *&r_this_str, const char32_t *&r_that_str) {
878 // Keep ptrs to start of numerical sequences.
879 const char32_t *this_substr = r_this_str;
880 const char32_t *that_substr = r_that_str;
881
882 // Compare lengths of both numerical sequences, ignoring leading zeros.
883 while (is_digit(*r_this_str)) {
884 r_this_str++;
885 }
886 while (is_digit(*r_that_str)) {
887 r_that_str++;
888 }
889 while (*this_substr == '0') {
890 this_substr++;
891 }
892 while (*that_substr == '0') {
893 that_substr++;
894 }
895 int this_len = r_this_str - this_substr;
896 int that_len = r_that_str - that_substr;
897
898 if (this_len < that_len) {
899 return -1;
900 } else if (this_len > that_len) {
901 return 1;
902 }
903
904 // If lengths equal, compare lexicographically.
905 while (this_substr != r_this_str && that_substr != r_that_str) {
906 if (*this_substr < *that_substr) {
907 return -1;
908 } else if (*this_substr > *that_substr) {
909 return 1;
910 }
911 this_substr++;
912 that_substr++;
913 }
914
915 return 0;
916}
917
918signed char String::naturalcasecmp_to(const String &p_str) const {
919 const char32_t *this_str = get_data();
920 const char32_t *that_str = p_str.get_data();
921
922 if (this_str && that_str) {
923 while (*this_str == '.' || *that_str == '.') {
924 if (*this_str++ != '.') {
925 return 1;
926 }
927 if (*that_str++ != '.') {
928 return -1;
929 }
930 if (!*that_str) {
931 return 1;
932 }
933 if (!*this_str) {
934 return -1;
935 }
936 }
937
938 while (*this_str) {
939 if (!*that_str) {
940 return 1;
941 } else if (is_digit(*this_str)) {
942 if (!is_digit(*that_str)) {
943 return -1;
944 }
945
946 signed char ret = natural_cmp_common(this_str, that_str);
947 if (ret) {
948 return ret;
949 }
950 } else if (is_digit(*that_str)) {
951 return 1;
952 } else {
953 if (*this_str < *that_str) { // If current character in this is less, we are less.
954 return -1;
955 } else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
956 return 1;
957 }
958
959 this_str++;
960 that_str++;
961 }
962 }
963 if (*that_str) {
964 return -1;
965 }
966 }
967
968 return 0;
969}
970
971signed char String::naturalnocasecmp_to(const String &p_str) const {
972 const char32_t *this_str = get_data();
973 const char32_t *that_str = p_str.get_data();
974
975 if (this_str && that_str) {
976 while (*this_str == '.' || *that_str == '.') {
977 if (*this_str++ != '.') {
978 return 1;
979 }
980 if (*that_str++ != '.') {
981 return -1;
982 }
983 if (!*that_str) {
984 return 1;
985 }
986 if (!*this_str) {
987 return -1;
988 }
989 }
990
991 while (*this_str) {
992 if (!*that_str) {
993 return 1;
994 } else if (is_digit(*this_str)) {
995 if (!is_digit(*that_str)) {
996 return -1;
997 }
998
999 signed char ret = natural_cmp_common(this_str, that_str);
1000 if (ret) {
1001 return ret;
1002 }
1003 } else if (is_digit(*that_str)) {
1004 return 1;
1005 } else {
1006 if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
1007 return -1;
1008 } else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
1009 return 1;
1010 }
1011
1012 this_str++;
1013 that_str++;
1014 }
1015 }
1016 if (*that_str) {
1017 return -1;
1018 }
1019 }
1020
1021 return 0;
1022}
1023
1024const char32_t *String::get_data() const {
1025 static const char32_t zero = 0;
1026 return size() ? &operator[](0) : &zero;
1027}
1028
1029String String::_camelcase_to_underscore() const {
1030 const char32_t *cstr = get_data();
1031 String new_string;
1032 int start_index = 0;
1033
1034 for (int i = 1; i < this->size(); i++) {
1035 bool is_prev_upper = is_ascii_upper_case(cstr[i - 1]);
1036 bool is_prev_lower = is_ascii_lower_case(cstr[i - 1]);
1037 bool is_prev_digit = is_digit(cstr[i - 1]);
1038
1039 bool is_curr_upper = is_ascii_upper_case(cstr[i]);
1040 bool is_curr_lower = is_ascii_lower_case(cstr[i]);
1041 bool is_curr_digit = is_digit(cstr[i]);
1042
1043 bool is_next_lower = false;
1044 if (i + 1 < this->size()) {
1045 is_next_lower = is_ascii_lower_case(cstr[i + 1]);
1046 }
1047
1048 const bool cond_a = is_prev_lower && is_curr_upper; // aA
1049 const bool cond_b = (is_prev_upper || is_prev_digit) && is_curr_upper && is_next_lower; // AAa, 2Aa
1050 const bool cond_c = is_prev_digit && is_curr_lower && is_next_lower; // 2aa
1051 const bool cond_d = (is_prev_upper || is_prev_lower) && is_curr_digit; // A2, a2
1052
1053 if (cond_a || cond_b || cond_c || cond_d) {
1054 new_string += this->substr(start_index, i - start_index) + "_";
1055 start_index = i;
1056 }
1057 }
1058
1059 new_string += this->substr(start_index, this->size() - start_index);
1060 return new_string.to_lower();
1061}
1062
1063String String::capitalize() const {
1064 String aux = this->_camelcase_to_underscore().replace("_", " ").strip_edges();
1065 String cap;
1066 for (int i = 0; i < aux.get_slice_count(" "); i++) {
1067 String slice = aux.get_slicec(' ', i);
1068 if (slice.length() > 0) {
1069 slice[0] = _find_upper(slice[0]);
1070 if (i > 0) {
1071 cap += " ";
1072 }
1073 cap += slice;
1074 }
1075 }
1076
1077 return cap;
1078}
1079
1080String String::to_camel_case() const {
1081 String s = this->to_pascal_case();
1082 if (!s.is_empty()) {
1083 s[0] = _find_lower(s[0]);
1084 }
1085 return s;
1086}
1087
1088String String::to_pascal_case() const {
1089 return this->capitalize().replace(" ", "");
1090}
1091
1092String String::to_snake_case() const {
1093 return this->_camelcase_to_underscore().replace(" ", "_").strip_edges();
1094}
1095
1096String String::get_with_code_lines() const {
1097 const Vector<String> lines = split("\n");
1098 String ret;
1099 for (int i = 0; i < lines.size(); i++) {
1100 if (i > 0) {
1101 ret += "\n";
1102 }
1103 ret += vformat("%4d | %s", i + 1, lines[i]);
1104 }
1105 return ret;
1106}
1107
1108int String::get_slice_count(String p_splitter) const {
1109 if (is_empty()) {
1110 return 0;
1111 }
1112 if (p_splitter.is_empty()) {
1113 return 0;
1114 }
1115
1116 int pos = 0;
1117 int slices = 1;
1118
1119 while ((pos = find(p_splitter, pos)) >= 0) {
1120 slices++;
1121 pos += p_splitter.length();
1122 }
1123
1124 return slices;
1125}
1126
1127String String::get_slice(String p_splitter, int p_slice) const {
1128 if (is_empty() || p_splitter.is_empty()) {
1129 return "";
1130 }
1131
1132 int pos = 0;
1133 int prev_pos = 0;
1134 //int slices=1;
1135 if (p_slice < 0) {
1136 return "";
1137 }
1138 if (find(p_splitter) == -1) {
1139 return *this;
1140 }
1141
1142 int i = 0;
1143 while (true) {
1144 pos = find(p_splitter, pos);
1145 if (pos == -1) {
1146 pos = length(); //reached end
1147 }
1148
1149 int from = prev_pos;
1150 //int to=pos;
1151
1152 if (p_slice == i) {
1153 return substr(from, pos - from);
1154 }
1155
1156 if (pos == length()) { //reached end and no find
1157 break;
1158 }
1159 pos += p_splitter.length();
1160 prev_pos = pos;
1161 i++;
1162 }
1163
1164 return ""; //no find!
1165}
1166
1167String String::get_slicec(char32_t p_splitter, int p_slice) const {
1168 if (is_empty()) {
1169 return String();
1170 }
1171
1172 if (p_slice < 0) {
1173 return String();
1174 }
1175
1176 const char32_t *c = this->ptr();
1177 int i = 0;
1178 int prev = 0;
1179 int count = 0;
1180 while (true) {
1181 if (c[i] == 0 || c[i] == p_splitter) {
1182 if (p_slice == count) {
1183 return substr(prev, i - prev);
1184 } else if (c[i] == 0) {
1185 return String();
1186 } else {
1187 count++;
1188 prev = i + 1;
1189 }
1190 }
1191
1192 i++;
1193 }
1194}
1195
1196Vector<String> String::split_spaces() const {
1197 Vector<String> ret;
1198 int from = 0;
1199 int i = 0;
1200 int len = length();
1201 if (len == 0) {
1202 return ret;
1203 }
1204
1205 bool inside = false;
1206
1207 while (true) {
1208 bool empty = operator[](i) < 33;
1209
1210 if (i == 0) {
1211 inside = !empty;
1212 }
1213
1214 if (!empty && !inside) {
1215 inside = true;
1216 from = i;
1217 }
1218
1219 if (empty && inside) {
1220 ret.push_back(substr(from, i - from));
1221 inside = false;
1222 }
1223
1224 if (i == len) {
1225 break;
1226 }
1227 i++;
1228 }
1229
1230 return ret;
1231}
1232
1233Vector<String> String::split(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const {
1234 Vector<String> ret;
1235
1236 if (is_empty()) {
1237 if (p_allow_empty) {
1238 ret.push_back("");
1239 }
1240 return ret;
1241 }
1242
1243 int from = 0;
1244 int len = length();
1245
1246 while (true) {
1247 int end;
1248 if (p_splitter.is_empty()) {
1249 end = from + 1;
1250 } else {
1251 end = find(p_splitter, from);
1252 if (end < 0) {
1253 end = len;
1254 }
1255 }
1256 if (p_allow_empty || (end > from)) {
1257 if (p_maxsplit <= 0) {
1258 ret.push_back(substr(from, end - from));
1259 } else {
1260 // Put rest of the string and leave cycle.
1261 if (p_maxsplit == ret.size()) {
1262 ret.push_back(substr(from, len));
1263 break;
1264 }
1265
1266 // Otherwise, push items until positive limit is reached.
1267 ret.push_back(substr(from, end - from));
1268 }
1269 }
1270
1271 if (end == len) {
1272 break;
1273 }
1274
1275 from = end + p_splitter.length();
1276 }
1277
1278 return ret;
1279}
1280
1281Vector<String> String::rsplit(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const {
1282 Vector<String> ret;
1283 const int len = length();
1284 int remaining_len = len;
1285
1286 while (true) {
1287 if (remaining_len < p_splitter.length() || (p_maxsplit > 0 && p_maxsplit == ret.size())) {
1288 // no room for another splitter or hit max splits, push what's left and we're done
1289 if (p_allow_empty || remaining_len > 0) {
1290 ret.push_back(substr(0, remaining_len));
1291 }
1292 break;
1293 }
1294
1295 int left_edge;
1296 if (p_splitter.is_empty()) {
1297 left_edge = remaining_len - 1;
1298 if (left_edge == 0) {
1299 left_edge--; // Skip to the < 0 condition.
1300 }
1301 } else {
1302 left_edge = rfind(p_splitter, remaining_len - p_splitter.length());
1303 }
1304
1305 if (left_edge < 0) {
1306 // no more splitters, we're done
1307 ret.push_back(substr(0, remaining_len));
1308 break;
1309 }
1310
1311 int substr_start = left_edge + p_splitter.length();
1312 if (p_allow_empty || substr_start < remaining_len) {
1313 ret.push_back(substr(substr_start, remaining_len - substr_start));
1314 }
1315
1316 remaining_len = left_edge;
1317 }
1318
1319 ret.reverse();
1320 return ret;
1321}
1322
1323Vector<double> String::split_floats(const String &p_splitter, bool p_allow_empty) const {
1324 Vector<double> ret;
1325 int from = 0;
1326 int len = length();
1327
1328 while (true) {
1329 int end = find(p_splitter, from);
1330 if (end < 0) {
1331 end = len;
1332 }
1333 if (p_allow_empty || (end > from)) {
1334 ret.push_back(String::to_float(&get_data()[from]));
1335 }
1336
1337 if (end == len) {
1338 break;
1339 }
1340
1341 from = end + p_splitter.length();
1342 }
1343
1344 return ret;
1345}
1346
1347Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_allow_empty) const {
1348 Vector<float> ret;
1349 int from = 0;
1350 int len = length();
1351
1352 while (true) {
1353 int idx;
1354 int end = findmk(p_splitters, from, &idx);
1355 int spl_len = 1;
1356 if (end < 0) {
1357 end = len;
1358 } else {
1359 spl_len = p_splitters[idx].length();
1360 }
1361
1362 if (p_allow_empty || (end > from)) {
1363 ret.push_back(String::to_float(&get_data()[from]));
1364 }
1365
1366 if (end == len) {
1367 break;
1368 }
1369
1370 from = end + spl_len;
1371 }
1372
1373 return ret;
1374}
1375
1376Vector<int> String::split_ints(const String &p_splitter, bool p_allow_empty) const {
1377 Vector<int> ret;
1378 int from = 0;
1379 int len = length();
1380
1381 while (true) {
1382 int end = find(p_splitter, from);
1383 if (end < 0) {
1384 end = len;
1385 }
1386 if (p_allow_empty || (end > from)) {
1387 ret.push_back(String::to_int(&get_data()[from], end - from));
1388 }
1389
1390 if (end == len) {
1391 break;
1392 }
1393
1394 from = end + p_splitter.length();
1395 }
1396
1397 return ret;
1398}
1399
1400Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allow_empty) const {
1401 Vector<int> ret;
1402 int from = 0;
1403 int len = length();
1404
1405 while (true) {
1406 int idx;
1407 int end = findmk(p_splitters, from, &idx);
1408 int spl_len = 1;
1409 if (end < 0) {
1410 end = len;
1411 } else {
1412 spl_len = p_splitters[idx].length();
1413 }
1414
1415 if (p_allow_empty || (end > from)) {
1416 ret.push_back(String::to_int(&get_data()[from], end - from));
1417 }
1418
1419 if (end == len) {
1420 break;
1421 }
1422
1423 from = end + spl_len;
1424 }
1425
1426 return ret;
1427}
1428
1429String String::join(Vector<String> parts) const {
1430 String ret;
1431 for (int i = 0; i < parts.size(); ++i) {
1432 if (i > 0) {
1433 ret += *this;
1434 }
1435 ret += parts[i];
1436 }
1437 return ret;
1438}
1439
1440char32_t String::char_uppercase(char32_t p_char) {
1441 return _find_upper(p_char);
1442}
1443
1444char32_t String::char_lowercase(char32_t p_char) {
1445 return _find_lower(p_char);
1446}
1447
1448String String::to_upper() const {
1449 String upper = *this;
1450
1451 for (int i = 0; i < upper.size(); i++) {
1452 const char32_t s = upper[i];
1453 const char32_t t = _find_upper(s);
1454 if (s != t) { // avoid copy on write
1455 upper[i] = t;
1456 }
1457 }
1458
1459 return upper;
1460}
1461
1462String String::to_lower() const {
1463 String lower = *this;
1464
1465 for (int i = 0; i < lower.size(); i++) {
1466 const char32_t s = lower[i];
1467 const char32_t t = _find_lower(s);
1468 if (s != t) { // avoid copy on write
1469 lower[i] = t;
1470 }
1471 }
1472
1473 return lower;
1474}
1475
1476String String::chr(char32_t p_char) {
1477 char32_t c[2] = { p_char, 0 };
1478 return String(c);
1479}
1480
1481String String::num(double p_num, int p_decimals) {
1482 if (Math::is_nan(p_num)) {
1483 return "nan";
1484 }
1485
1486 if (Math::is_inf(p_num)) {
1487 if (signbit(p_num)) {
1488 return "-inf";
1489 } else {
1490 return "inf";
1491 }
1492 }
1493
1494 if (p_decimals < 0) {
1495 p_decimals = 14;
1496 const double abs_num = Math::abs(p_num);
1497 if (abs_num > 10) {
1498 // We want to align the digits to the above reasonable default, so we only
1499 // need to subtract log10 for numbers with a positive power of ten.
1500 p_decimals -= (int)floor(log10(abs_num));
1501 }
1502 }
1503 if (p_decimals > MAX_DECIMALS) {
1504 p_decimals = MAX_DECIMALS;
1505 }
1506
1507 char fmt[7];
1508 fmt[0] = '%';
1509 fmt[1] = '.';
1510
1511 if (p_decimals < 0) {
1512 fmt[1] = 'l';
1513 fmt[2] = 'f';
1514 fmt[3] = 0;
1515 } else if (p_decimals < 10) {
1516 fmt[2] = '0' + p_decimals;
1517 fmt[3] = 'l';
1518 fmt[4] = 'f';
1519 fmt[5] = 0;
1520 } else {
1521 fmt[2] = '0' + (p_decimals / 10);
1522 fmt[3] = '0' + (p_decimals % 10);
1523 fmt[4] = 'l';
1524 fmt[5] = 'f';
1525 fmt[6] = 0;
1526 }
1527 // if we want to convert a double with as much decimal places as as
1528 // DBL_MAX or DBL_MIN then we would theoretically need a buffer of at least
1529 // DBL_MAX_10_EXP + 2 for DBL_MAX and DBL_MAX_10_EXP + 4 for DBL_MIN.
1530 // BUT those values where still giving me exceptions, so I tested from
1531 // DBL_MAX_10_EXP + 10 incrementing one by one and DBL_MAX_10_EXP + 17 (325)
1532 // was the first buffer size not to throw an exception
1533 char buf[325];
1534
1535#if defined(__GNUC__) || defined(_MSC_VER)
1536 // PLEASE NOTE that, albeit vcrt online reference states that snprintf
1537 // should safely truncate the output to the given buffer size, we have
1538 // found a case where this is not true, so we should create a buffer
1539 // as big as needed
1540 snprintf(buf, 325, fmt, p_num);
1541#else
1542 sprintf(buf, fmt, p_num);
1543#endif
1544
1545 buf[324] = 0;
1546 //destroy trailing zeroes
1547 {
1548 bool period = false;
1549 int z = 0;
1550 while (buf[z]) {
1551 if (buf[z] == '.') {
1552 period = true;
1553 }
1554 z++;
1555 }
1556
1557 if (period) {
1558 z--;
1559 while (z > 0) {
1560 if (buf[z] == '0') {
1561 buf[z] = 0;
1562 } else if (buf[z] == '.') {
1563 buf[z] = 0;
1564 break;
1565 } else {
1566 break;
1567 }
1568
1569 z--;
1570 }
1571 }
1572 }
1573
1574 return buf;
1575}
1576
1577String String::num_int64(int64_t p_num, int base, bool capitalize_hex) {
1578 bool sign = p_num < 0;
1579
1580 int64_t n = p_num;
1581
1582 int chars = 0;
1583 do {
1584 n /= base;
1585 chars++;
1586 } while (n);
1587
1588 if (sign) {
1589 chars++;
1590 }
1591 String s;
1592 s.resize(chars + 1);
1593 char32_t *c = s.ptrw();
1594 c[chars] = 0;
1595 n = p_num;
1596 do {
1597 int mod = ABS(n % base);
1598 if (mod >= 10) {
1599 char a = (capitalize_hex ? 'A' : 'a');
1600 c[--chars] = a + (mod - 10);
1601 } else {
1602 c[--chars] = '0' + mod;
1603 }
1604
1605 n /= base;
1606 } while (n);
1607
1608 if (sign) {
1609 c[0] = '-';
1610 }
1611
1612 return s;
1613}
1614
1615String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
1616 uint64_t n = p_num;
1617
1618 int chars = 0;
1619 do {
1620 n /= base;
1621 chars++;
1622 } while (n);
1623
1624 String s;
1625 s.resize(chars + 1);
1626 char32_t *c = s.ptrw();
1627 c[chars] = 0;
1628 n = p_num;
1629 do {
1630 int mod = n % base;
1631 if (mod >= 10) {
1632 char a = (capitalize_hex ? 'A' : 'a');
1633 c[--chars] = a + (mod - 10);
1634 } else {
1635 c[--chars] = '0' + mod;
1636 }
1637
1638 n /= base;
1639 } while (n);
1640
1641 return s;
1642}
1643
1644String String::num_real(double p_num, bool p_trailing) {
1645 if (p_num == (double)(int64_t)p_num) {
1646 if (p_trailing) {
1647 return num_int64((int64_t)p_num) + ".0";
1648 } else {
1649 return num_int64((int64_t)p_num);
1650 }
1651 }
1652#ifdef REAL_T_IS_DOUBLE
1653 int decimals = 14;
1654#else
1655 int decimals = 6;
1656#endif
1657 // We want to align the digits to the above sane default, so we only need
1658 // to subtract log10 for numbers with a positive power of ten magnitude.
1659 double abs_num = Math::abs(p_num);
1660 if (abs_num > 10) {
1661 decimals -= (int)floor(log10(abs_num));
1662 }
1663 return num(p_num, decimals);
1664}
1665
1666String String::num_scientific(double p_num) {
1667 if (Math::is_nan(p_num)) {
1668 return "nan";
1669 }
1670
1671 if (Math::is_inf(p_num)) {
1672 if (signbit(p_num)) {
1673 return "-inf";
1674 } else {
1675 return "inf";
1676 }
1677 }
1678
1679 char buf[256];
1680
1681#if defined(__GNUC__) || defined(_MSC_VER)
1682
1683#if defined(__MINGW32__) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT)
1684 // MinGW requires _set_output_format() to conform to C99 output for printf
1685 unsigned int old_exponent_format = _set_output_format(_TWO_DIGIT_EXPONENT);
1686#endif
1687 snprintf(buf, 256, "%lg", p_num);
1688
1689#if defined(__MINGW32__) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT)
1690 _set_output_format(old_exponent_format);
1691#endif
1692
1693#else
1694 sprintf(buf, "%.16lg", p_num);
1695#endif
1696
1697 buf[255] = 0;
1698
1699 return buf;
1700}
1701
1702String String::md5(const uint8_t *p_md5) {
1703 return String::hex_encode_buffer(p_md5, 16);
1704}
1705
1706String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
1707 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
1708
1709 String ret;
1710 char v[2] = { 0, 0 };
1711
1712 for (int i = 0; i < p_len; i++) {
1713 v[0] = hex[p_buffer[i] >> 4];
1714 ret += v;
1715 v[0] = hex[p_buffer[i] & 0xF];
1716 ret += v;
1717 }
1718
1719 return ret;
1720}
1721
1722Vector<uint8_t> String::hex_decode() const {
1723 ERR_FAIL_COND_V_MSG(length() % 2 != 0, Vector<uint8_t>(), "Hexadecimal string of uneven length.");
1724
1725#define HEX_TO_BYTE(m_output, m_index) \
1726 uint8_t m_output; \
1727 c = operator[](m_index); \
1728 if (is_digit(c)) { \
1729 m_output = c - '0'; \
1730 } else if (c >= 'a' && c <= 'f') { \
1731 m_output = c - 'a' + 10; \
1732 } else if (c >= 'A' && c <= 'F') { \
1733 m_output = c - 'A' + 10; \
1734 } else { \
1735 ERR_FAIL_V_MSG(Vector<uint8_t>(), "Invalid hexadecimal character \"" + chr(c) + "\" at index " + m_index + "."); \
1736 }
1737
1738 Vector<uint8_t> out;
1739 int len = length() / 2;
1740 out.resize(len);
1741 for (int i = 0; i < len; i++) {
1742 char32_t c;
1743 HEX_TO_BYTE(first, i * 2);
1744 HEX_TO_BYTE(second, i * 2 + 1);
1745 out.write[i] = first * 16 + second;
1746 }
1747 return out;
1748#undef HEX_TO_BYTE
1749}
1750
1751void String::print_unicode_error(const String &p_message, bool p_critical) const {
1752 if (p_critical) {
1753 print_error(vformat(U"Unicode parsing error, some characters were replaced with � (U+FFFD): %s", p_message));
1754 } else {
1755 print_error(vformat("Unicode parsing error: %s", p_message));
1756 }
1757}
1758
1759CharString String::ascii(bool p_allow_extended) const {
1760 if (!length()) {
1761 return CharString();
1762 }
1763
1764 CharString cs;
1765 cs.resize(size());
1766
1767 for (int i = 0; i < size(); i++) {
1768 char32_t c = operator[](i);
1769 if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
1770 cs[i] = c;
1771 } else {
1772 print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
1773 cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane
1774 }
1775 }
1776
1777 return cs;
1778}
1779
1780String String::utf8(const char *p_utf8, int p_len) {
1781 String ret;
1782 ret.parse_utf8(p_utf8, p_len);
1783
1784 return ret;
1785}
1786
1787Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
1788 if (!p_utf8) {
1789 return ERR_INVALID_DATA;
1790 }
1791
1792 String aux;
1793
1794 int cstr_size = 0;
1795 int str_size = 0;
1796
1797 /* HANDLE BOM (Byte Order Mark) */
1798 if (p_len < 0 || p_len >= 3) {
1799 bool has_bom = uint8_t(p_utf8[0]) == 0xef && uint8_t(p_utf8[1]) == 0xbb && uint8_t(p_utf8[2]) == 0xbf;
1800 if (has_bom) {
1801 //8-bit encoding, byte order has no meaning in UTF-8, just skip it
1802 if (p_len >= 0) {
1803 p_len -= 3;
1804 }
1805 p_utf8 += 3;
1806 }
1807 }
1808
1809 bool decode_error = false;
1810 bool decode_failed = false;
1811 {
1812 const char *ptrtmp = p_utf8;
1813 const char *ptrtmp_limit = &p_utf8[p_len];
1814 int skip = 0;
1815 uint8_t c_start = 0;
1816 while (ptrtmp != ptrtmp_limit && *ptrtmp) {
1817 uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
1818
1819 if (skip == 0) {
1820 if (p_skip_cr && c == '\r') {
1821 ptrtmp++;
1822 continue;
1823 }
1824 /* Determine the number of characters in sequence */
1825 if ((c & 0x80) == 0) {
1826 skip = 0;
1827 } else if ((c & 0xe0) == 0xc0) {
1828 skip = 1;
1829 } else if ((c & 0xf0) == 0xe0) {
1830 skip = 2;
1831 } else if ((c & 0xf8) == 0xf0) {
1832 skip = 3;
1833 } else if ((c & 0xfc) == 0xf8) {
1834 skip = 4;
1835 } else if ((c & 0xfe) == 0xfc) {
1836 skip = 5;
1837 } else {
1838 skip = 0;
1839 print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
1840 decode_failed = true;
1841 }
1842 c_start = c;
1843
1844 if (skip == 1 && (c & 0x1e) == 0) {
1845 print_unicode_error(vformat("Overlong encoding (%x ...)", c));
1846 decode_error = true;
1847 }
1848 str_size++;
1849 } else {
1850 if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
1851 print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
1852 decode_error = true;
1853 }
1854 if (c < 0x80 || c > 0xbf) {
1855 print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
1856 decode_failed = true;
1857 skip = 0;
1858 } else {
1859 --skip;
1860 }
1861 }
1862
1863 cstr_size++;
1864 ptrtmp++;
1865 }
1866
1867 if (skip) {
1868 print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
1869 decode_failed = true;
1870 }
1871 }
1872
1873 if (str_size == 0) {
1874 clear();
1875 return OK; // empty string
1876 }
1877
1878 resize(str_size + 1);
1879 char32_t *dst = ptrw();
1880 dst[str_size] = 0;
1881
1882 int skip = 0;
1883 uint32_t unichar = 0;
1884 while (cstr_size) {
1885 uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
1886
1887 if (skip == 0) {
1888 if (p_skip_cr && c == '\r') {
1889 p_utf8++;
1890 continue;
1891 }
1892 /* Determine the number of characters in sequence */
1893 if ((c & 0x80) == 0) {
1894 *(dst++) = c;
1895 unichar = 0;
1896 skip = 0;
1897 } else if ((c & 0xe0) == 0xc0) {
1898 unichar = (0xff >> 3) & c;
1899 skip = 1;
1900 } else if ((c & 0xf0) == 0xe0) {
1901 unichar = (0xff >> 4) & c;
1902 skip = 2;
1903 } else if ((c & 0xf8) == 0xf0) {
1904 unichar = (0xff >> 5) & c;
1905 skip = 3;
1906 } else if ((c & 0xfc) == 0xf8) {
1907 unichar = (0xff >> 6) & c;
1908 skip = 4;
1909 } else if ((c & 0xfe) == 0xfc) {
1910 unichar = (0xff >> 7) & c;
1911 skip = 5;
1912 } else {
1913 *(dst++) = _replacement_char;
1914 unichar = 0;
1915 skip = 0;
1916 }
1917 } else {
1918 if (c < 0x80 || c > 0xbf) {
1919 *(dst++) = _replacement_char;
1920 skip = 0;
1921 } else {
1922 unichar = (unichar << 6) | (c & 0x3f);
1923 --skip;
1924 if (skip == 0) {
1925 if (unichar == 0) {
1926 print_unicode_error("NUL character", true);
1927 decode_failed = true;
1928 unichar = _replacement_char;
1929 } else if ((unichar & 0xfffff800) == 0xd800) {
1930 print_unicode_error(vformat("Unpaired surrogate (%x)", unichar), true);
1931 decode_failed = true;
1932 unichar = _replacement_char;
1933 } else if (unichar > 0x10ffff) {
1934 print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar), true);
1935 decode_failed = true;
1936 unichar = _replacement_char;
1937 }
1938 *(dst++) = unichar;
1939 }
1940 }
1941 }
1942
1943 cstr_size--;
1944 p_utf8++;
1945 }
1946 if (skip) {
1947 *(dst++) = 0x20;
1948 }
1949
1950 if (decode_failed) {
1951 return ERR_INVALID_DATA;
1952 } else if (decode_error) {
1953 return ERR_PARSE_ERROR;
1954 } else {
1955 return OK;
1956 }
1957}
1958
1959CharString String::utf8() const {
1960 int l = length();
1961 if (!l) {
1962 return CharString();
1963 }
1964
1965 const char32_t *d = &operator[](0);
1966 int fl = 0;
1967 for (int i = 0; i < l; i++) {
1968 uint32_t c = d[i];
1969 if (c <= 0x7f) { // 7 bits.
1970 fl += 1;
1971 } else if (c <= 0x7ff) { // 11 bits
1972 fl += 2;
1973 } else if (c <= 0xffff) { // 16 bits
1974 fl += 3;
1975 } else if (c <= 0x001fffff) { // 21 bits
1976 fl += 4;
1977 } else if (c <= 0x03ffffff) { // 26 bits
1978 fl += 5;
1979 print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
1980 } else if (c <= 0x7fffffff) { // 31 bits
1981 fl += 6;
1982 print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
1983 } else {
1984 fl += 1;
1985 print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
1986 }
1987 }
1988
1989 CharString utf8s;
1990 if (fl == 0) {
1991 return utf8s;
1992 }
1993
1994 utf8s.resize(fl + 1);
1995 uint8_t *cdst = (uint8_t *)utf8s.get_data();
1996
1997#define APPEND_CHAR(m_c) *(cdst++) = m_c
1998
1999 for (int i = 0; i < l; i++) {
2000 uint32_t c = d[i];
2001
2002 if (c <= 0x7f) { // 7 bits.
2003 APPEND_CHAR(c);
2004 } else if (c <= 0x7ff) { // 11 bits
2005 APPEND_CHAR(uint32_t(0xc0 | ((c >> 6) & 0x1f))); // Top 5 bits.
2006 APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
2007 } else if (c <= 0xffff) { // 16 bits
2008 APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
2009 APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
2010 APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
2011 } else if (c <= 0x001fffff) { // 21 bits
2012 APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
2013 APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
2014 APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
2015 APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
2016 } else if (c <= 0x03ffffff) { // 26 bits
2017 APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
2018 APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
2019 APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
2020 APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
2021 APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
2022 } else if (c <= 0x7fffffff) { // 31 bits
2023 APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
2024 APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
2025 APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
2026 APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
2027 APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
2028 APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
2029 } else {
2030 // the string is a valid UTF32, so it should never happen ...
2031 print_unicode_error(vformat("Non scalar value (%x)", c), true);
2032 APPEND_CHAR(uint32_t(0xe0 | ((_replacement_char >> 12) & 0x0f))); // Top 4 bits.
2033 APPEND_CHAR(uint32_t(0x80 | ((_replacement_char >> 6) & 0x3f))); // Middle 6 bits.
2034 APPEND_CHAR(uint32_t(0x80 | (_replacement_char & 0x3f))); // Bottom 6 bits.
2035 }
2036 }
2037#undef APPEND_CHAR
2038 *cdst = 0; //trailing zero
2039
2040 return utf8s;
2041}
2042
2043String String::utf16(const char16_t *p_utf16, int p_len) {
2044 String ret;
2045 ret.parse_utf16(p_utf16, p_len);
2046
2047 return ret;
2048}
2049
2050Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
2051 if (!p_utf16) {
2052 return ERR_INVALID_DATA;
2053 }
2054
2055 String aux;
2056
2057 int cstr_size = 0;
2058 int str_size = 0;
2059
2060 /* HANDLE BOM (Byte Order Mark) */
2061 bool byteswap = false; // assume correct endianness if no BOM found
2062 if (p_len < 0 || p_len >= 1) {
2063 bool has_bom = false;
2064 if (uint16_t(p_utf16[0]) == 0xfeff) { // correct BOM, read as is
2065 has_bom = true;
2066 byteswap = false;
2067 } else if (uint16_t(p_utf16[0]) == 0xfffe) { // backwards BOM, swap bytes
2068 has_bom = true;
2069 byteswap = true;
2070 }
2071 if (has_bom) {
2072 if (p_len >= 0) {
2073 p_len -= 1;
2074 }
2075 p_utf16 += 1;
2076 }
2077 }
2078
2079 bool decode_error = false;
2080 {
2081 const char16_t *ptrtmp = p_utf16;
2082 const char16_t *ptrtmp_limit = &p_utf16[p_len];
2083 uint32_t c_prev = 0;
2084 bool skip = false;
2085 while (ptrtmp != ptrtmp_limit && *ptrtmp) {
2086 uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
2087
2088 if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
2089 if (skip) {
2090 print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
2091 decode_error = true;
2092 }
2093 skip = true;
2094 } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
2095 if (skip) {
2096 str_size--;
2097 } else {
2098 print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
2099 decode_error = true;
2100 }
2101 skip = false;
2102 } else {
2103 skip = false;
2104 }
2105
2106 c_prev = c;
2107 str_size++;
2108 cstr_size++;
2109 ptrtmp++;
2110 }
2111
2112 if (skip) {
2113 print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
2114 decode_error = true;
2115 }
2116 }
2117
2118 if (str_size == 0) {
2119 clear();
2120 return OK; // empty string
2121 }
2122
2123 resize(str_size + 1);
2124 char32_t *dst = ptrw();
2125 dst[str_size] = 0;
2126
2127 bool skip = false;
2128 uint32_t c_prev = 0;
2129 while (cstr_size) {
2130 uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
2131
2132 if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
2133 if (skip) {
2134 *(dst++) = c_prev; // unpaired, store as is
2135 }
2136 skip = true;
2137 } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
2138 if (skip) {
2139 *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
2140 } else {
2141 *(dst++) = c; // unpaired, store as is
2142 }
2143 skip = false;
2144 } else {
2145 *(dst++) = c;
2146 skip = false;
2147 }
2148
2149 cstr_size--;
2150 p_utf16++;
2151 c_prev = c;
2152 }
2153
2154 if (skip) {
2155 *(dst++) = c_prev;
2156 }
2157
2158 if (decode_error) {
2159 return ERR_PARSE_ERROR;
2160 } else {
2161 return OK;
2162 }
2163}
2164
2165Char16String String::utf16() const {
2166 int l = length();
2167 if (!l) {
2168 return Char16String();
2169 }
2170
2171 const char32_t *d = &operator[](0);
2172 int fl = 0;
2173 for (int i = 0; i < l; i++) {
2174 uint32_t c = d[i];
2175 if (c <= 0xffff) { // 16 bits.
2176 fl += 1;
2177 if ((c & 0xfffff800) == 0xd800) {
2178 print_unicode_error(vformat("Unpaired surrogate (%x)", c));
2179 }
2180 } else if (c <= 0x10ffff) { // 32 bits.
2181 fl += 2;
2182 } else {
2183 print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
2184 fl += 1;
2185 }
2186 }
2187
2188 Char16String utf16s;
2189 if (fl == 0) {
2190 return utf16s;
2191 }
2192
2193 utf16s.resize(fl + 1);
2194 uint16_t *cdst = (uint16_t *)utf16s.get_data();
2195
2196#define APPEND_CHAR(m_c) *(cdst++) = m_c
2197
2198 for (int i = 0; i < l; i++) {
2199 uint32_t c = d[i];
2200
2201 if (c <= 0xffff) { // 16 bits.
2202 APPEND_CHAR(c);
2203 } else if (c <= 0x10ffff) { // 32 bits.
2204 APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
2205 APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
2206 } else {
2207 // the string is a valid UTF32, so it should never happen ...
2208 APPEND_CHAR(uint32_t((_replacement_char >> 10) + 0xd7c0));
2209 APPEND_CHAR(uint32_t((_replacement_char & 0x3ff) | 0xdc00));
2210 }
2211 }
2212#undef APPEND_CHAR
2213 *cdst = 0; //trailing zero
2214
2215 return utf16s;
2216}
2217
2218String::String(const char *p_str) {
2219 copy_from(p_str);
2220}
2221
2222String::String(const wchar_t *p_str) {
2223 copy_from(p_str);
2224}
2225
2226String::String(const char32_t *p_str) {
2227 copy_from(p_str);
2228}
2229
2230String::String(const char *p_str, int p_clip_to_len) {
2231 copy_from(p_str, p_clip_to_len);
2232}
2233
2234String::String(const wchar_t *p_str, int p_clip_to_len) {
2235 copy_from(p_str, p_clip_to_len);
2236}
2237
2238String::String(const char32_t *p_str, int p_clip_to_len) {
2239 copy_from(p_str, p_clip_to_len);
2240}
2241
2242String::String(const StrRange &p_range) {
2243 if (!p_range.c_str) {
2244 return;
2245 }
2246 copy_from(p_range.c_str, p_range.len);
2247}
2248
2249int64_t String::hex_to_int() const {
2250 int len = length();
2251 if (len == 0) {
2252 return 0;
2253 }
2254
2255 const char32_t *s = ptr();
2256
2257 int64_t sign = s[0] == '-' ? -1 : 1;
2258
2259 if (sign < 0) {
2260 s++;
2261 }
2262
2263 if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'x') {
2264 s += 2;
2265 }
2266
2267 int64_t hex = 0;
2268
2269 while (*s) {
2270 char32_t c = lower_case(*s);
2271 int64_t n;
2272 if (is_digit(c)) {
2273 n = c - '0';
2274 } else if (c >= 'a' && c <= 'f') {
2275 n = (c - 'a') + 10;
2276 } else {
2277 ERR_FAIL_V_MSG(0, vformat(R"(Invalid hexadecimal notation character "%c" (U+%04X) in string "%s".)", *s, static_cast<int32_t>(*s), *this));
2278 }
2279 // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
2280 bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
2281 ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2282 hex *= 16;
2283 hex += n;
2284 s++;
2285 }
2286
2287 return hex * sign;
2288}
2289
2290int64_t String::bin_to_int() const {
2291 int len = length();
2292 if (len == 0) {
2293 return 0;
2294 }
2295
2296 const char32_t *s = ptr();
2297
2298 int64_t sign = s[0] == '-' ? -1 : 1;
2299
2300 if (sign < 0) {
2301 s++;
2302 }
2303
2304 if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'b') {
2305 s += 2;
2306 }
2307
2308 int64_t binary = 0;
2309
2310 while (*s) {
2311 char32_t c = lower_case(*s);
2312 int64_t n;
2313 if (c == '0' || c == '1') {
2314 n = c - '0';
2315 } else {
2316 return 0;
2317 }
2318 // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
2319 bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
2320 ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2321 binary *= 2;
2322 binary += n;
2323 s++;
2324 }
2325
2326 return binary * sign;
2327}
2328
2329int64_t String::to_int() const {
2330 if (length() == 0) {
2331 return 0;
2332 }
2333
2334 int to = (find(".") >= 0) ? find(".") : length();
2335
2336 int64_t integer = 0;
2337 int64_t sign = 1;
2338
2339 for (int i = 0; i < to; i++) {
2340 char32_t c = operator[](i);
2341 if (is_digit(c)) {
2342 bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
2343 ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2344 integer *= 10;
2345 integer += c - '0';
2346
2347 } else if (integer == 0 && c == '-') {
2348 sign = -sign;
2349 }
2350 }
2351
2352 return integer * sign;
2353}
2354
2355int64_t String::to_int(const char *p_str, int p_len) {
2356 int to = 0;
2357 if (p_len >= 0) {
2358 to = p_len;
2359 } else {
2360 while (p_str[to] != 0 && p_str[to] != '.') {
2361 to++;
2362 }
2363 }
2364
2365 int64_t integer = 0;
2366 int64_t sign = 1;
2367
2368 for (int i = 0; i < to; i++) {
2369 char c = p_str[i];
2370 if (is_digit(c)) {
2371 bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
2372 ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2373 integer *= 10;
2374 integer += c - '0';
2375
2376 } else if (c == '-' && integer == 0) {
2377 sign = -sign;
2378 } else if (c != ' ') {
2379 break;
2380 }
2381 }
2382
2383 return integer * sign;
2384}
2385
2386int64_t String::to_int(const wchar_t *p_str, int p_len) {
2387 int to = 0;
2388 if (p_len >= 0) {
2389 to = p_len;
2390 } else {
2391 while (p_str[to] != 0 && p_str[to] != '.') {
2392 to++;
2393 }
2394 }
2395
2396 int64_t integer = 0;
2397 int64_t sign = 1;
2398
2399 for (int i = 0; i < to; i++) {
2400 wchar_t c = p_str[i];
2401 if (is_digit(c)) {
2402 bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
2403 ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2404 integer *= 10;
2405 integer += c - '0';
2406
2407 } else if (c == '-' && integer == 0) {
2408 sign = -sign;
2409 } else if (c != ' ') {
2410 break;
2411 }
2412 }
2413
2414 return integer * sign;
2415}
2416
2417bool String::is_numeric() const {
2418 if (length() == 0) {
2419 return false;
2420 }
2421
2422 int s = 0;
2423 if (operator[](0) == '-') {
2424 ++s;
2425 }
2426 bool dot = false;
2427 for (int i = s; i < length(); i++) {
2428 char32_t c = operator[](i);
2429 if (c == '.') {
2430 if (dot) {
2431 return false;
2432 }
2433 dot = true;
2434 } else if (!is_digit(c)) {
2435 return false;
2436 }
2437 }
2438
2439 return true; // TODO: Use the parser below for this instead
2440}
2441
2442template <class C>
2443static double built_in_strtod(
2444 /* A decimal ASCII floating-point number,
2445 * optionally preceded by white space. Must
2446 * have form "-I.FE-X", where I is the integer
2447 * part of the mantissa, F is the fractional
2448 * part of the mantissa, and X is the
2449 * exponent. Either of the signs may be "+",
2450 * "-", or omitted. Either I or F may be
2451 * omitted, or both. The decimal point isn't
2452 * necessary unless F is present. The "E" may
2453 * actually be an "e". E and X may both be
2454 * omitted (but not just one). */
2455 const C *string,
2456 /* If non-nullptr, store terminating Cacter's
2457 * address here. */
2458 C **endPtr = nullptr) {
2459 /* Largest possible base 10 exponent. Any
2460 * exponent larger than this will already
2461 * produce underflow or overflow, so there's
2462 * no need to worry about additional digits. */
2463 static const int maxExponent = 511;
2464 /* Table giving binary powers of 10. Entry
2465 * is 10^2^i. Used to convert decimal
2466 * exponents into floating-point numbers. */
2467 static const double powersOf10[] = {
2468 10.,
2469 100.,
2470 1.0e4,
2471 1.0e8,
2472 1.0e16,
2473 1.0e32,
2474 1.0e64,
2475 1.0e128,
2476 1.0e256
2477 };
2478
2479 bool sign, expSign = false;
2480 double fraction, dblExp;
2481 const double *d;
2482 const C *p;
2483 int c;
2484 /* Exponent read from "EX" field. */
2485 int exp = 0;
2486 /* Exponent that derives from the fractional
2487 * part. Under normal circumstances, it is
2488 * the negative of the number of digits in F.
2489 * However, if I is very long, the last digits
2490 * of I get dropped (otherwise a long I with a
2491 * large negative exponent could cause an
2492 * unnecessary overflow on I alone). In this
2493 * case, fracExp is incremented one for each
2494 * dropped digit. */
2495 int fracExp = 0;
2496 /* Number of digits in mantissa. */
2497 int mantSize;
2498 /* Number of mantissa digits BEFORE decimal point. */
2499 int decPt;
2500 /* Temporarily holds location of exponent in string. */
2501 const C *pExp;
2502
2503 /*
2504 * Strip off leading blanks and check for a sign.
2505 */
2506
2507 p = string;
2508 while (*p == ' ' || *p == '\t' || *p == '\n') {
2509 p += 1;
2510 }
2511 if (*p == '-') {
2512 sign = true;
2513 p += 1;
2514 } else {
2515 if (*p == '+') {
2516 p += 1;
2517 }
2518 sign = false;
2519 }
2520
2521 /*
2522 * Count the number of digits in the mantissa (including the decimal
2523 * point), and also locate the decimal point.
2524 */
2525
2526 decPt = -1;
2527 for (mantSize = 0;; mantSize += 1) {
2528 c = *p;
2529 if (!is_digit(c)) {
2530 if ((c != '.') || (decPt >= 0)) {
2531 break;
2532 }
2533 decPt = mantSize;
2534 }
2535 p += 1;
2536 }
2537
2538 /*
2539 * Now suck up the digits in the mantissa. Use two integers to collect 9
2540 * digits each (this is faster than using floating-point). If the mantissa
2541 * has more than 18 digits, ignore the extras, since they can't affect the
2542 * value anyway.
2543 */
2544
2545 pExp = p;
2546 p -= mantSize;
2547 if (decPt < 0) {
2548 decPt = mantSize;
2549 } else {
2550 mantSize -= 1; /* One of the digits was the point. */
2551 }
2552 if (mantSize > 18) {
2553 fracExp = decPt - 18;
2554 mantSize = 18;
2555 } else {
2556 fracExp = decPt - mantSize;
2557 }
2558 if (mantSize == 0) {
2559 fraction = 0.0;
2560 p = string;
2561 goto done;
2562 } else {
2563 int frac1, frac2;
2564
2565 frac1 = 0;
2566 for (; mantSize > 9; mantSize -= 1) {
2567 c = *p;
2568 p += 1;
2569 if (c == '.') {
2570 c = *p;
2571 p += 1;
2572 }
2573 frac1 = 10 * frac1 + (c - '0');
2574 }
2575 frac2 = 0;
2576 for (; mantSize > 0; mantSize -= 1) {
2577 c = *p;
2578 p += 1;
2579 if (c == '.') {
2580 c = *p;
2581 p += 1;
2582 }
2583 frac2 = 10 * frac2 + (c - '0');
2584 }
2585 fraction = (1.0e9 * frac1) + frac2;
2586 }
2587
2588 /*
2589 * Skim off the exponent.
2590 */
2591
2592 p = pExp;
2593 if ((*p == 'E') || (*p == 'e')) {
2594 p += 1;
2595 if (*p == '-') {
2596 expSign = true;
2597 p += 1;
2598 } else {
2599 if (*p == '+') {
2600 p += 1;
2601 }
2602 expSign = false;
2603 }
2604 if (!is_digit(char32_t(*p))) {
2605 p = pExp;
2606 goto done;
2607 }
2608 while (is_digit(char32_t(*p))) {
2609 exp = exp * 10 + (*p - '0');
2610 p += 1;
2611 }
2612 }
2613 if (expSign) {
2614 exp = fracExp - exp;
2615 } else {
2616 exp = fracExp + exp;
2617 }
2618
2619 /*
2620 * Generate a floating-point number that represents the exponent. Do this
2621 * by processing the exponent one bit at a time to combine many powers of
2622 * 2 of 10. Then combine the exponent with the fraction.
2623 */
2624
2625 if (exp < 0) {
2626 expSign = true;
2627 exp = -exp;
2628 } else {
2629 expSign = false;
2630 }
2631
2632 if (exp > maxExponent) {
2633 exp = maxExponent;
2634 WARN_PRINT("Exponent too high");
2635 }
2636 dblExp = 1.0;
2637 for (d = powersOf10; exp != 0; exp >>= 1, ++d) {
2638 if (exp & 01) {
2639 dblExp *= *d;
2640 }
2641 }
2642 if (expSign) {
2643 fraction /= dblExp;
2644 } else {
2645 fraction *= dblExp;
2646 }
2647
2648done:
2649 if (endPtr != nullptr) {
2650 *endPtr = (C *)p;
2651 }
2652
2653 if (sign) {
2654 return -fraction;
2655 }
2656 return fraction;
2657}
2658
2659#define READING_SIGN 0
2660#define READING_INT 1
2661#define READING_DEC 2
2662#define READING_EXP 3
2663#define READING_DONE 4
2664
2665double String::to_float(const char *p_str) {
2666 return built_in_strtod<char>(p_str);
2667}
2668
2669double String::to_float(const char32_t *p_str, const char32_t **r_end) {
2670 return built_in_strtod<char32_t>(p_str, (char32_t **)r_end);
2671}
2672
2673double String::to_float(const wchar_t *p_str, const wchar_t **r_end) {
2674 return built_in_strtod<wchar_t>(p_str, (wchar_t **)r_end);
2675}
2676
2677uint32_t String::num_characters(int64_t p_int) {
2678 int r = 1;
2679 if (p_int < 0) {
2680 r += 1;
2681 if (p_int == INT64_MIN) {
2682 p_int = INT64_MAX;
2683 } else {
2684 p_int = -p_int;
2685 }
2686 }
2687 while (p_int >= 10) {
2688 p_int /= 10;
2689 r++;
2690 }
2691 return r;
2692}
2693
2694int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
2695 if (p_len == 0 || !p_str[0]) {
2696 return 0;
2697 }
2698 ///@todo make more exact so saving and loading does not lose precision
2699
2700 int64_t integer = 0;
2701 int64_t sign = 1;
2702 int reading = READING_SIGN;
2703
2704 const char32_t *str = p_str;
2705 const char32_t *limit = &p_str[p_len];
2706
2707 while (*str && reading != READING_DONE && str != limit) {
2708 char32_t c = *(str++);
2709 switch (reading) {
2710 case READING_SIGN: {
2711 if (is_digit(c)) {
2712 reading = READING_INT;
2713 // let it fallthrough
2714 } else if (c == '-') {
2715 sign = -1;
2716 reading = READING_INT;
2717 break;
2718 } else if (c == '+') {
2719 sign = 1;
2720 reading = READING_INT;
2721 break;
2722 } else {
2723 break;
2724 }
2725 [[fallthrough]];
2726 }
2727 case READING_INT: {
2728 if (is_digit(c)) {
2729 if (integer > INT64_MAX / 10) {
2730 String number("");
2731 str = p_str;
2732 while (*str && str != limit) {
2733 number += *(str++);
2734 }
2735 if (p_clamp) {
2736 if (sign == 1) {
2737 return INT64_MAX;
2738 } else {
2739 return INT64_MIN;
2740 }
2741 } else {
2742 ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2743 }
2744 }
2745 integer *= 10;
2746 integer += c - '0';
2747 } else {
2748 reading = READING_DONE;
2749 }
2750
2751 } break;
2752 }
2753 }
2754
2755 return sign * integer;
2756}
2757
2758double String::to_float() const {
2759 if (is_empty()) {
2760 return 0;
2761 }
2762 return built_in_strtod<char32_t>(get_data());
2763}
2764
2765uint32_t String::hash(const char *p_cstr) {
2766 uint32_t hashv = 5381;
2767 uint32_t c = *p_cstr++;
2768
2769 while (c) {
2770 hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2771 c = *p_cstr++;
2772 }
2773
2774 return hashv;
2775}
2776
2777uint32_t String::hash(const char *p_cstr, int p_len) {
2778 uint32_t hashv = 5381;
2779 for (int i = 0; i < p_len; i++) {
2780 hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
2781 }
2782
2783 return hashv;
2784}
2785
2786uint32_t String::hash(const wchar_t *p_cstr, int p_len) {
2787 uint32_t hashv = 5381;
2788 for (int i = 0; i < p_len; i++) {
2789 hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
2790 }
2791
2792 return hashv;
2793}
2794
2795uint32_t String::hash(const wchar_t *p_cstr) {
2796 uint32_t hashv = 5381;
2797 uint32_t c = *p_cstr++;
2798
2799 while (c) {
2800 hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2801 c = *p_cstr++;
2802 }
2803
2804 return hashv;
2805}
2806
2807uint32_t String::hash(const char32_t *p_cstr, int p_len) {
2808 uint32_t hashv = 5381;
2809 for (int i = 0; i < p_len; i++) {
2810 hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
2811 }
2812
2813 return hashv;
2814}
2815
2816uint32_t String::hash(const char32_t *p_cstr) {
2817 uint32_t hashv = 5381;
2818 uint32_t c = *p_cstr++;
2819
2820 while (c) {
2821 hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2822 c = *p_cstr++;
2823 }
2824
2825 return hashv;
2826}
2827
2828uint32_t String::hash() const {
2829 /* simple djb2 hashing */
2830
2831 const char32_t *chr = get_data();
2832 uint32_t hashv = 5381;
2833 uint32_t c = *chr++;
2834
2835 while (c) {
2836 hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2837 c = *chr++;
2838 }
2839
2840 return hashv;
2841}
2842
2843uint64_t String::hash64() const {
2844 /* simple djb2 hashing */
2845
2846 const char32_t *chr = get_data();
2847 uint64_t hashv = 5381;
2848 uint64_t c = *chr++;
2849
2850 while (c) {
2851 hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2852 c = *chr++;
2853 }
2854
2855 return hashv;
2856}
2857
2858String String::md5_text() const {
2859 CharString cs = utf8();
2860 unsigned char hash[16];
2861 CryptoCore::md5((unsigned char *)cs.ptr(), cs.length(), hash);
2862 return String::hex_encode_buffer(hash, 16);
2863}
2864
2865String String::sha1_text() const {
2866 CharString cs = utf8();
2867 unsigned char hash[20];
2868 CryptoCore::sha1((unsigned char *)cs.ptr(), cs.length(), hash);
2869 return String::hex_encode_buffer(hash, 20);
2870}
2871
2872String String::sha256_text() const {
2873 CharString cs = utf8();
2874 unsigned char hash[32];
2875 CryptoCore::sha256((unsigned char *)cs.ptr(), cs.length(), hash);
2876 return String::hex_encode_buffer(hash, 32);
2877}
2878
2879Vector<uint8_t> String::md5_buffer() const {
2880 CharString cs = utf8();
2881 unsigned char hash[16];
2882 CryptoCore::md5((unsigned char *)cs.ptr(), cs.length(), hash);
2883
2884 Vector<uint8_t> ret;
2885 ret.resize(16);
2886 for (int i = 0; i < 16; i++) {
2887 ret.write[i] = hash[i];
2888 }
2889 return ret;
2890}
2891
2892Vector<uint8_t> String::sha1_buffer() const {
2893 CharString cs = utf8();
2894 unsigned char hash[20];
2895 CryptoCore::sha1((unsigned char *)cs.ptr(), cs.length(), hash);
2896
2897 Vector<uint8_t> ret;
2898 ret.resize(20);
2899 for (int i = 0; i < 20; i++) {
2900 ret.write[i] = hash[i];
2901 }
2902
2903 return ret;
2904}
2905
2906Vector<uint8_t> String::sha256_buffer() const {
2907 CharString cs = utf8();
2908 unsigned char hash[32];
2909 CryptoCore::sha256((unsigned char *)cs.ptr(), cs.length(), hash);
2910
2911 Vector<uint8_t> ret;
2912 ret.resize(32);
2913 for (int i = 0; i < 32; i++) {
2914 ret.write[i] = hash[i];
2915 }
2916 return ret;
2917}
2918
2919String String::insert(int p_at_pos, const String &p_string) const {
2920 if (p_at_pos < 0) {
2921 return *this;
2922 }
2923
2924 if (p_at_pos > length()) {
2925 p_at_pos = length();
2926 }
2927
2928 String pre;
2929 if (p_at_pos > 0) {
2930 pre = substr(0, p_at_pos);
2931 }
2932
2933 String post;
2934 if (p_at_pos < length()) {
2935 post = substr(p_at_pos, length() - p_at_pos);
2936 }
2937
2938 return pre + p_string + post;
2939}
2940
2941String String::erase(int p_pos, int p_chars) const {
2942 ERR_FAIL_COND_V_MSG(p_pos < 0, "", vformat("Invalid starting position for `String.erase()`: %d. Starting position must be positive or zero.", p_pos));
2943 ERR_FAIL_COND_V_MSG(p_chars < 0, "", vformat("Invalid character count for `String.erase()`: %d. Character count must be positive or zero.", p_chars));
2944 return left(p_pos) + substr(p_pos + p_chars);
2945}
2946
2947String String::substr(int p_from, int p_chars) const {
2948 if (p_chars == -1) {
2949 p_chars = length() - p_from;
2950 }
2951
2952 if (is_empty() || p_from < 0 || p_from >= length() || p_chars <= 0) {
2953 return "";
2954 }
2955
2956 if ((p_from + p_chars) > length()) {
2957 p_chars = length() - p_from;
2958 }
2959
2960 if (p_from == 0 && p_chars >= length()) {
2961 return String(*this);
2962 }
2963
2964 String s;
2965 s.copy_from_unchecked(&get_data()[p_from], p_chars);
2966 return s;
2967}
2968
2969int String::find(const String &p_str, int p_from) const {
2970 if (p_from < 0) {
2971 return -1;
2972 }
2973
2974 const int src_len = p_str.length();
2975
2976 const int len = length();
2977
2978 if (src_len == 0 || len == 0) {
2979 return -1; // won't find anything!
2980 }
2981
2982 const char32_t *src = get_data();
2983 const char32_t *str = p_str.get_data();
2984
2985 for (int i = p_from; i <= (len - src_len); i++) {
2986 bool found = true;
2987 for (int j = 0; j < src_len; j++) {
2988 int read_pos = i + j;
2989
2990 if (read_pos >= len) {
2991 ERR_PRINT("read_pos>=len");
2992 return -1;
2993 }
2994
2995 if (src[read_pos] != str[j]) {
2996 found = false;
2997 break;
2998 }
2999 }
3000
3001 if (found) {
3002 return i;
3003 }
3004 }
3005
3006 return -1;
3007}
3008
3009int String::find(const char *p_str, int p_from) const {
3010 if (p_from < 0) {
3011 return -1;
3012 }
3013
3014 const int len = length();
3015
3016 if (len == 0) {
3017 return -1; // won't find anything!
3018 }
3019
3020 const char32_t *src = get_data();
3021
3022 int src_len = 0;
3023 while (p_str[src_len] != '\0') {
3024 src_len++;
3025 }
3026
3027 if (src_len == 1) {
3028 const char32_t needle = p_str[0];
3029
3030 for (int i = p_from; i < len; i++) {
3031 if (src[i] == needle) {
3032 return i;
3033 }
3034 }
3035
3036 } else {
3037 for (int i = p_from; i <= (len - src_len); i++) {
3038 bool found = true;
3039 for (int j = 0; j < src_len; j++) {
3040 int read_pos = i + j;
3041
3042 if (read_pos >= len) {
3043 ERR_PRINT("read_pos>=len");
3044 return -1;
3045 }
3046
3047 if (src[read_pos] != (char32_t)p_str[j]) {
3048 found = false;
3049 break;
3050 }
3051 }
3052
3053 if (found) {
3054 return i;
3055 }
3056 }
3057 }
3058
3059 return -1;
3060}
3061
3062int String::find_char(const char32_t &p_char, int p_from) const {
3063 return _cowdata.find(p_char, p_from);
3064}
3065
3066int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
3067 if (p_from < 0) {
3068 return -1;
3069 }
3070 if (p_keys.size() == 0) {
3071 return -1;
3072 }
3073
3074 //int src_len=p_str.length();
3075 const String *keys = &p_keys[0];
3076 int key_count = p_keys.size();
3077 int len = length();
3078
3079 if (len == 0) {
3080 return -1; // won't find anything!
3081 }
3082
3083 const char32_t *src = get_data();
3084
3085 for (int i = p_from; i < len; i++) {
3086 bool found = true;
3087 for (int k = 0; k < key_count; k++) {
3088 found = true;
3089 if (r_key) {
3090 *r_key = k;
3091 }
3092 const char32_t *cmp = keys[k].get_data();
3093 int l = keys[k].length();
3094
3095 for (int j = 0; j < l; j++) {
3096 int read_pos = i + j;
3097
3098 if (read_pos >= len) {
3099 found = false;
3100 break;
3101 }
3102
3103 if (src[read_pos] != cmp[j]) {
3104 found = false;
3105 break;
3106 }
3107 }
3108 if (found) {
3109 break;
3110 }
3111 }
3112
3113 if (found) {
3114 return i;
3115 }
3116 }
3117
3118 return -1;
3119}
3120
3121int String::findn(const String &p_str, int p_from) const {
3122 if (p_from < 0) {
3123 return -1;
3124 }
3125
3126 int src_len = p_str.length();
3127
3128 if (src_len == 0 || length() == 0) {
3129 return -1; // won't find anything!
3130 }
3131
3132 const char32_t *srcd = get_data();
3133
3134 for (int i = p_from; i <= (length() - src_len); i++) {
3135 bool found = true;
3136 for (int j = 0; j < src_len; j++) {
3137 int read_pos = i + j;
3138
3139 if (read_pos >= length()) {
3140 ERR_PRINT("read_pos>=length()");
3141 return -1;
3142 }
3143
3144 char32_t src = _find_lower(srcd[read_pos]);
3145 char32_t dst = _find_lower(p_str[j]);
3146
3147 if (src != dst) {
3148 found = false;
3149 break;
3150 }
3151 }
3152
3153 if (found) {
3154 return i;
3155 }
3156 }
3157
3158 return -1;
3159}
3160
3161int String::rfind(const String &p_str, int p_from) const {
3162 // establish a limit
3163 int limit = length() - p_str.length();
3164 if (limit < 0) {
3165 return -1;
3166 }
3167
3168 // establish a starting point
3169 if (p_from < 0) {
3170 p_from = limit;
3171 } else if (p_from > limit) {
3172 p_from = limit;
3173 }
3174
3175 int src_len = p_str.length();
3176 int len = length();
3177
3178 if (src_len == 0 || len == 0) {
3179 return -1; // won't find anything!
3180 }
3181
3182 const char32_t *src = get_data();
3183
3184 for (int i = p_from; i >= 0; i--) {
3185 bool found = true;
3186 for (int j = 0; j < src_len; j++) {
3187 int read_pos = i + j;
3188
3189 if (read_pos >= len) {
3190 ERR_PRINT("read_pos>=len");
3191 return -1;
3192 }
3193
3194 if (src[read_pos] != p_str[j]) {
3195 found = false;
3196 break;
3197 }
3198 }
3199
3200 if (found) {
3201 return i;
3202 }
3203 }
3204
3205 return -1;
3206}
3207
3208int String::rfindn(const String &p_str, int p_from) const {
3209 // establish a limit
3210 int limit = length() - p_str.length();
3211 if (limit < 0) {
3212 return -1;
3213 }
3214
3215 // establish a starting point
3216 if (p_from < 0) {
3217 p_from = limit;
3218 } else if (p_from > limit) {
3219 p_from = limit;
3220 }
3221
3222 int src_len = p_str.length();
3223 int len = length();
3224
3225 if (src_len == 0 || len == 0) {
3226 return -1; // won't find anything!
3227 }
3228
3229 const char32_t *src = get_data();
3230
3231 for (int i = p_from; i >= 0; i--) {
3232 bool found = true;
3233 for (int j = 0; j < src_len; j++) {
3234 int read_pos = i + j;
3235
3236 if (read_pos >= len) {
3237 ERR_PRINT("read_pos>=len");
3238 return -1;
3239 }
3240
3241 char32_t srcc = _find_lower(src[read_pos]);
3242 char32_t dstc = _find_lower(p_str[j]);
3243
3244 if (srcc != dstc) {
3245 found = false;
3246 break;
3247 }
3248 }
3249
3250 if (found) {
3251 return i;
3252 }
3253 }
3254
3255 return -1;
3256}
3257
3258bool String::ends_with(const String &p_string) const {
3259 int l = p_string.length();
3260 if (l > length()) {
3261 return false;
3262 }
3263
3264 if (l == 0) {
3265 return true;
3266 }
3267
3268 const char32_t *p = &p_string[0];
3269 const char32_t *s = &operator[](length() - l);
3270
3271 for (int i = 0; i < l; i++) {
3272 if (p[i] != s[i]) {
3273 return false;
3274 }
3275 }
3276
3277 return true;
3278}
3279
3280bool String::begins_with(const String &p_string) const {
3281 int l = p_string.length();
3282 if (l > length()) {
3283 return false;
3284 }
3285
3286 if (l == 0) {
3287 return true;
3288 }
3289
3290 const char32_t *p = &p_string[0];
3291 const char32_t *s = &operator[](0);
3292
3293 for (int i = 0; i < l; i++) {
3294 if (p[i] != s[i]) {
3295 return false;
3296 }
3297 }
3298
3299 return true;
3300}
3301
3302bool String::begins_with(const char *p_string) const {
3303 int l = length();
3304 if (l == 0 || !p_string) {
3305 return false;
3306 }
3307
3308 const char32_t *str = &operator[](0);
3309 int i = 0;
3310
3311 while (*p_string && i < l) {
3312 if ((char32_t)*p_string != str[i]) {
3313 return false;
3314 }
3315 i++;
3316 p_string++;
3317 }
3318
3319 return *p_string == 0;
3320}
3321
3322bool String::is_enclosed_in(const String &p_string) const {
3323 return begins_with(p_string) && ends_with(p_string);
3324}
3325
3326bool String::is_subsequence_of(const String &p_string) const {
3327 return _base_is_subsequence_of(p_string, false);
3328}
3329
3330bool String::is_subsequence_ofn(const String &p_string) const {
3331 return _base_is_subsequence_of(p_string, true);
3332}
3333
3334bool String::is_quoted() const {
3335 return is_enclosed_in("\"") || is_enclosed_in("'");
3336}
3337
3338int String::_count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const {
3339 if (p_string.is_empty()) {
3340 return 0;
3341 }
3342 int len = length();
3343 int slen = p_string.length();
3344 if (len < slen) {
3345 return 0;
3346 }
3347 String str;
3348 if (p_from >= 0 && p_to >= 0) {
3349 if (p_to == 0) {
3350 p_to = len;
3351 } else if (p_from >= p_to) {
3352 return 0;
3353 }
3354 if (p_from == 0 && p_to == len) {
3355 str = String();
3356 str.copy_from_unchecked(&get_data()[0], len);
3357 } else {
3358 str = substr(p_from, p_to - p_from);
3359 }
3360 } else {
3361 return 0;
3362 }
3363 int c = 0;
3364 int idx = -1;
3365 do {
3366 idx = p_case_insensitive ? str.findn(p_string) : str.find(p_string);
3367 if (idx != -1) {
3368 str = str.substr(idx + slen, str.length() - slen);
3369 ++c;
3370 }
3371 } while (idx != -1);
3372 return c;
3373}
3374
3375int String::count(const String &p_string, int p_from, int p_to) const {
3376 return _count(p_string, p_from, p_to, false);
3377}
3378
3379int String::countn(const String &p_string, int p_from, int p_to) const {
3380 return _count(p_string, p_from, p_to, true);
3381}
3382
3383bool String::_base_is_subsequence_of(const String &p_string, bool case_insensitive) const {
3384 int len = length();
3385 if (len == 0) {
3386 // Technically an empty string is subsequence of any string
3387 return true;
3388 }
3389
3390 if (len > p_string.length()) {
3391 return false;
3392 }
3393
3394 const char32_t *src = &operator[](0);
3395 const char32_t *tgt = &p_string[0];
3396
3397 for (; *src && *tgt; tgt++) {
3398 bool match = false;
3399 if (case_insensitive) {
3400 char32_t srcc = _find_lower(*src);
3401 char32_t tgtc = _find_lower(*tgt);
3402 match = srcc == tgtc;
3403 } else {
3404 match = *src == *tgt;
3405 }
3406 if (match) {
3407 src++;
3408 if (!*src) {
3409 return true;
3410 }
3411 }
3412 }
3413
3414 return false;
3415}
3416
3417Vector<String> String::bigrams() const {
3418 int n_pairs = length() - 1;
3419 Vector<String> b;
3420 if (n_pairs <= 0) {
3421 return b;
3422 }
3423 b.resize(n_pairs);
3424 for (int i = 0; i < n_pairs; i++) {
3425 b.write[i] = substr(i, 2);
3426 }
3427 return b;
3428}
3429
3430// Similarity according to Sorensen-Dice coefficient
3431float String::similarity(const String &p_string) const {
3432 if (operator==(p_string)) {
3433 // Equal strings are totally similar
3434 return 1.0f;
3435 }
3436 if (length() < 2 || p_string.length() < 2) {
3437 // No way to calculate similarity without a single bigram
3438 return 0.0f;
3439 }
3440
3441 Vector<String> src_bigrams = bigrams();
3442 Vector<String> tgt_bigrams = p_string.bigrams();
3443
3444 int src_size = src_bigrams.size();
3445 int tgt_size = tgt_bigrams.size();
3446
3447 int sum = src_size + tgt_size;
3448 int inter = 0;
3449 for (int i = 0; i < src_size; i++) {
3450 for (int j = 0; j < tgt_size; j++) {
3451 if (src_bigrams[i] == tgt_bigrams[j]) {
3452 inter++;
3453 break;
3454 }
3455 }
3456 }
3457
3458 return (2.0f * inter) / sum;
3459}
3460
3461static bool _wildcard_match(const char32_t *p_pattern, const char32_t *p_string, bool p_case_sensitive) {
3462 switch (*p_pattern) {
3463 case '\0':
3464 return !*p_string;
3465 case '*':
3466 return _wildcard_match(p_pattern + 1, p_string, p_case_sensitive) || (*p_string && _wildcard_match(p_pattern, p_string + 1, p_case_sensitive));
3467 case '?':
3468 return *p_string && (*p_string != '.') && _wildcard_match(p_pattern + 1, p_string + 1, p_case_sensitive);
3469 default:
3470
3471 return (p_case_sensitive ? (*p_string == *p_pattern) : (_find_upper(*p_string) == _find_upper(*p_pattern))) && _wildcard_match(p_pattern + 1, p_string + 1, p_case_sensitive);
3472 }
3473}
3474
3475bool String::match(const String &p_wildcard) const {
3476 if (!p_wildcard.length() || !length()) {
3477 return false;
3478 }
3479
3480 return _wildcard_match(p_wildcard.get_data(), get_data(), true);
3481}
3482
3483bool String::matchn(const String &p_wildcard) const {
3484 if (!p_wildcard.length() || !length()) {
3485 return false;
3486 }
3487 return _wildcard_match(p_wildcard.get_data(), get_data(), false);
3488}
3489
3490String String::format(const Variant &values, String placeholder) const {
3491 String new_string = String(this->ptr());
3492
3493 if (values.get_type() == Variant::ARRAY) {
3494 Array values_arr = values;
3495
3496 for (int i = 0; i < values_arr.size(); i++) {
3497 String i_as_str = String::num_int64(i);
3498
3499 if (values_arr[i].get_type() == Variant::ARRAY) { //Array in Array structure [["name","RobotGuy"],[0,"godot"],["strength",9000.91]]
3500 Array value_arr = values_arr[i];
3501
3502 if (value_arr.size() == 2) {
3503 Variant v_key = value_arr[0];
3504 String key = v_key;
3505
3506 Variant v_val = value_arr[1];
3507 String val = v_val;
3508
3509 new_string = new_string.replace(placeholder.replace("_", key), val);
3510 } else {
3511 ERR_PRINT(String("STRING.format Inner Array size != 2 ").ascii().get_data());
3512 }
3513 } else { //Array structure ["RobotGuy","Logis","rookie"]
3514 Variant v_val = values_arr[i];
3515 String val = v_val;
3516
3517 if (placeholder.find("_") > -1) {
3518 new_string = new_string.replace(placeholder.replace("_", i_as_str), val);
3519 } else {
3520 new_string = new_string.replace_first(placeholder, val);
3521 }
3522 }
3523 }
3524 } else if (values.get_type() == Variant::DICTIONARY) {
3525 Dictionary d = values;
3526 List<Variant> keys;
3527 d.get_key_list(&keys);
3528
3529 for (const Variant &key : keys) {
3530 new_string = new_string.replace(placeholder.replace("_", key), d[key]);
3531 }
3532 } else {
3533 ERR_PRINT(String("Invalid type: use Array or Dictionary.").ascii().get_data());
3534 }
3535
3536 return new_string;
3537}
3538
3539String String::replace(const String &p_key, const String &p_with) const {
3540 String new_string;
3541 int search_from = 0;
3542 int result = 0;
3543
3544 while ((result = find(p_key, search_from)) >= 0) {
3545 new_string += substr(search_from, result - search_from);
3546 new_string += p_with;
3547 search_from = result + p_key.length();
3548 }
3549
3550 if (search_from == 0) {
3551 return *this;
3552 }
3553
3554 new_string += substr(search_from, length() - search_from);
3555
3556 return new_string;
3557}
3558
3559String String::replace(const char *p_key, const char *p_with) const {
3560 String new_string;
3561 int search_from = 0;
3562 int result = 0;
3563
3564 while ((result = find(p_key, search_from)) >= 0) {
3565 new_string += substr(search_from, result - search_from);
3566 new_string += p_with;
3567 int k = 0;
3568 while (p_key[k] != '\0') {
3569 k++;
3570 }
3571 search_from = result + k;
3572 }
3573
3574 if (search_from == 0) {
3575 return *this;
3576 }
3577
3578 new_string += substr(search_from, length() - search_from);
3579
3580 return new_string;
3581}
3582
3583String String::replace_first(const String &p_key, const String &p_with) const {
3584 int pos = find(p_key);
3585 if (pos >= 0) {
3586 return substr(0, pos) + p_with + substr(pos + p_key.length(), length());
3587 }
3588
3589 return *this;
3590}
3591
3592String String::replacen(const String &p_key, const String &p_with) const {
3593 String new_string;
3594 int search_from = 0;
3595 int result = 0;
3596
3597 while ((result = findn(p_key, search_from)) >= 0) {
3598 new_string += substr(search_from, result - search_from);
3599 new_string += p_with;
3600 search_from = result + p_key.length();
3601 }
3602
3603 if (search_from == 0) {
3604 return *this;
3605 }
3606
3607 new_string += substr(search_from, length() - search_from);
3608 return new_string;
3609}
3610
3611String String::repeat(int p_count) const {
3612 ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
3613
3614 if (p_count == 0) {
3615 return "";
3616 }
3617
3618 if (p_count == 1) {
3619 return *this;
3620 }
3621
3622 int len = length();
3623 String new_string = *this;
3624 new_string.resize(p_count * len + 1);
3625
3626 char32_t *dst = new_string.ptrw();
3627 int offset = 1;
3628 int stride = 1;
3629 while (offset < p_count) {
3630 memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
3631 offset += stride;
3632 stride = MIN(stride * 2, p_count - offset);
3633 }
3634 dst[p_count * len] = _null;
3635 return new_string;
3636}
3637
3638String String::reverse() const {
3639 int len = length();
3640 if (len <= 1) {
3641 return *this;
3642 }
3643 String new_string;
3644 new_string.resize(len + 1);
3645
3646 const char32_t *src = ptr();
3647 char32_t *dst = new_string.ptrw();
3648 for (int i = 0; i < len; i++) {
3649 dst[i] = src[len - i - 1];
3650 }
3651 dst[len] = _null;
3652 return new_string;
3653}
3654
3655String String::left(int p_len) const {
3656 if (p_len < 0) {
3657 p_len = length() + p_len;
3658 }
3659
3660 if (p_len <= 0) {
3661 return "";
3662 }
3663
3664 if (p_len >= length()) {
3665 return *this;
3666 }
3667
3668 String s;
3669 s.copy_from_unchecked(&get_data()[0], p_len);
3670 return s;
3671}
3672
3673String String::right(int p_len) const {
3674 if (p_len < 0) {
3675 p_len = length() + p_len;
3676 }
3677
3678 if (p_len <= 0) {
3679 return "";
3680 }
3681
3682 if (p_len >= length()) {
3683 return *this;
3684 }
3685
3686 String s;
3687 s.copy_from_unchecked(&get_data()[length() - p_len], p_len);
3688 return s;
3689}
3690
3691char32_t String::unicode_at(int p_idx) const {
3692 ERR_FAIL_INDEX_V(p_idx, length(), 0);
3693 return operator[](p_idx);
3694}
3695
3696String String::indent(const String &p_prefix) const {
3697 String new_string;
3698 int line_start = 0;
3699
3700 for (int i = 0; i < length(); i++) {
3701 const char32_t c = operator[](i);
3702 if (c == '\n') {
3703 if (i == line_start) {
3704 new_string += c; // Leave empty lines empty.
3705 } else {
3706 new_string += p_prefix + substr(line_start, i - line_start + 1);
3707 }
3708 line_start = i + 1;
3709 }
3710 }
3711 if (line_start != length()) {
3712 new_string += p_prefix + substr(line_start);
3713 }
3714 return new_string;
3715}
3716
3717String String::dedent() const {
3718 String new_string;
3719 String indent;
3720 bool has_indent = false;
3721 bool has_text = false;
3722 int line_start = 0;
3723 int indent_stop = -1;
3724
3725 for (int i = 0; i < length(); i++) {
3726 char32_t c = operator[](i);
3727 if (c == '\n') {
3728 if (has_text) {
3729 new_string += substr(indent_stop, i - indent_stop);
3730 }
3731 new_string += "\n";
3732 has_text = false;
3733 line_start = i + 1;
3734 indent_stop = -1;
3735 } else if (!has_text) {
3736 if (c > 32) {
3737 has_text = true;
3738 if (!has_indent) {
3739 has_indent = true;
3740 indent = substr(line_start, i - line_start);
3741 indent_stop = i;
3742 }
3743 }
3744 if (has_indent && indent_stop < 0) {
3745 int j = i - line_start;
3746 if (j >= indent.length() || c != indent[j]) {
3747 indent_stop = i;
3748 }
3749 }
3750 }
3751 }
3752
3753 if (has_text) {
3754 new_string += substr(indent_stop, length() - indent_stop);
3755 }
3756
3757 return new_string;
3758}
3759
3760String String::strip_edges(bool left, bool right) const {
3761 int len = length();
3762 int beg = 0, end = len;
3763
3764 if (left) {
3765 for (int i = 0; i < len; i++) {
3766 if (operator[](i) <= 32) {
3767 beg++;
3768 } else {
3769 break;
3770 }
3771 }
3772 }
3773
3774 if (right) {
3775 for (int i = len - 1; i >= 0; i--) {
3776 if (operator[](i) <= 32) {
3777 end--;
3778 } else {
3779 break;
3780 }
3781 }
3782 }
3783
3784 if (beg == 0 && end == len) {
3785 return *this;
3786 }
3787
3788 return substr(beg, end - beg);
3789}
3790
3791String String::strip_escapes() const {
3792 String new_string;
3793 for (int i = 0; i < length(); i++) {
3794 // Escape characters on first page of the ASCII table, before 32 (Space).
3795 if (operator[](i) < 32) {
3796 continue;
3797 }
3798 new_string += operator[](i);
3799 }
3800
3801 return new_string;
3802}
3803
3804String String::lstrip(const String &p_chars) const {
3805 int len = length();
3806 int beg;
3807
3808 for (beg = 0; beg < len; beg++) {
3809 if (p_chars.find_char(get(beg)) == -1) {
3810 break;
3811 }
3812 }
3813
3814 if (beg == 0) {
3815 return *this;
3816 }
3817
3818 return substr(beg, len - beg);
3819}
3820
3821String String::rstrip(const String &p_chars) const {
3822 int len = length();
3823 int end;
3824
3825 for (end = len - 1; end >= 0; end--) {
3826 if (p_chars.find_char(get(end)) == -1) {
3827 break;
3828 }
3829 }
3830
3831 if (end == len - 1) {
3832 return *this;
3833 }
3834
3835 return substr(0, end + 1);
3836}
3837
3838bool String::is_network_share_path() const {
3839 return begins_with("//") || begins_with("\\\\");
3840}
3841
3842String String::simplify_path() const {
3843 String s = *this;
3844 String drive;
3845
3846 // Check if we have a special path (like res://) or a protocol identifier.
3847 int p = s.find("://");
3848 bool found = false;
3849 if (p > 0) {
3850 bool only_chars = true;
3851 for (int i = 0; i < p; i++) {
3852 if (!is_ascii_alphanumeric_char(s[i])) {
3853 only_chars = false;
3854 break;
3855 }
3856 }
3857 if (only_chars) {
3858 found = true;
3859 drive = s.substr(0, p + 3);
3860 s = s.substr(p + 3);
3861 }
3862 }
3863 if (!found) {
3864 if (is_network_share_path()) {
3865 // Network path, beginning with // or \\.
3866 drive = s.substr(0, 2);
3867 s = s.substr(2);
3868 } else if (s.begins_with("/") || s.begins_with("\\")) {
3869 // Absolute path.
3870 drive = s.substr(0, 1);
3871 s = s.substr(1);
3872 } else {
3873 // Windows-style drive path, like C:/ or C:\.
3874 p = s.find(":/");
3875 if (p == -1) {
3876 p = s.find(":\\");
3877 }
3878 if (p != -1 && p < s.find("/")) {
3879 drive = s.substr(0, p + 2);
3880 s = s.substr(p + 2);
3881 }
3882 }
3883 }
3884
3885 s = s.replace("\\", "/");
3886 while (true) { // in case of using 2 or more slash
3887 String compare = s.replace("//", "/");
3888 if (s == compare) {
3889 break;
3890 } else {
3891 s = compare;
3892 }
3893 }
3894 Vector<String> dirs = s.split("/", false);
3895
3896 for (int i = 0; i < dirs.size(); i++) {
3897 String d = dirs[i];
3898 if (d == ".") {
3899 dirs.remove_at(i);
3900 i--;
3901 } else if (d == "..") {
3902 if (i == 0) {
3903 dirs.remove_at(i);
3904 i--;
3905 } else {
3906 dirs.remove_at(i);
3907 dirs.remove_at(i - 1);
3908 i -= 2;
3909 }
3910 }
3911 }
3912
3913 s = "";
3914
3915 for (int i = 0; i < dirs.size(); i++) {
3916 if (i > 0) {
3917 s += "/";
3918 }
3919 s += dirs[i];
3920 }
3921
3922 return drive + s;
3923}
3924
3925static int _humanize_digits(int p_num) {
3926 if (p_num < 100) {
3927 return 2;
3928 } else if (p_num < 1024) {
3929 return 1;
3930 } else {
3931 return 0;
3932 }
3933}
3934
3935String String::humanize_size(uint64_t p_size) {
3936 uint64_t _div = 1;
3937 Vector<String> prefixes;
3938 prefixes.push_back(RTR("B"));
3939 prefixes.push_back(RTR("KiB"));
3940 prefixes.push_back(RTR("MiB"));
3941 prefixes.push_back(RTR("GiB"));
3942 prefixes.push_back(RTR("TiB"));
3943 prefixes.push_back(RTR("PiB"));
3944 prefixes.push_back(RTR("EiB"));
3945
3946 int prefix_idx = 0;
3947
3948 while (prefix_idx < prefixes.size() - 1 && p_size > (_div * 1024)) {
3949 _div *= 1024;
3950 prefix_idx++;
3951 }
3952
3953 const int digits = prefix_idx > 0 ? _humanize_digits(p_size / _div) : 0;
3954 const double divisor = prefix_idx > 0 ? _div : 1;
3955
3956 return String::num(p_size / divisor).pad_decimals(digits) + " " + prefixes[prefix_idx];
3957}
3958
3959bool String::is_absolute_path() const {
3960 if (length() > 1) {
3961 return (operator[](0) == '/' || operator[](0) == '\\' || find(":/") != -1 || find(":\\") != -1);
3962 } else if ((length()) == 1) {
3963 return (operator[](0) == '/' || operator[](0) == '\\');
3964 } else {
3965 return false;
3966 }
3967}
3968
3969static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
3970 if (p_index == 0 && is_digit(p_char)) {
3971 return false; // No start with number plz.
3972 }
3973 return is_ascii_identifier_char(p_char);
3974}
3975
3976String String::validate_identifier() const {
3977 if (is_empty()) {
3978 return "_"; // Empty string is not a valid identifier;
3979 }
3980
3981 String result = *this;
3982 int len = result.length();
3983 char32_t *buffer = result.ptrw();
3984
3985 for (int i = 0; i < len; i++) {
3986 if (!_is_valid_identifier_bit(i, buffer[i])) {
3987 buffer[i] = '_';
3988 }
3989 }
3990
3991 return result;
3992}
3993
3994bool String::is_valid_identifier() const {
3995 int len = length();
3996
3997 if (len == 0) {
3998 return false;
3999 }
4000
4001 const char32_t *str = &operator[](0);
4002
4003 for (int i = 0; i < len; i++) {
4004 if (!_is_valid_identifier_bit(i, str[i])) {
4005 return false;
4006 }
4007 }
4008
4009 return true;
4010}
4011
4012bool String::is_valid_string() const {
4013 int l = length();
4014 const char32_t *src = get_data();
4015 bool valid = true;
4016 for (int i = 0; i < l; i++) {
4017 valid = valid && (src[i] < 0xd800 || (src[i] > 0xdfff && src[i] <= 0x10ffff));
4018 }
4019 return valid;
4020}
4021
4022String String::uri_encode() const {
4023 const CharString temp = utf8();
4024 String res;
4025 for (int i = 0; i < temp.length(); ++i) {
4026 uint8_t ord = temp[i];
4027 if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
4028 res += ord;
4029 } else {
4030 char p[4] = { '%', 0, 0, 0 };
4031 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
4032 p[1] = hex[ord >> 4];
4033 p[2] = hex[ord & 0xF];
4034 res += p;
4035 }
4036 }
4037 return res;
4038}
4039
4040String String::uri_decode() const {
4041 CharString src = utf8();
4042 CharString res;
4043 for (int i = 0; i < src.length(); ++i) {
4044 if (src[i] == '%' && i + 2 < src.length()) {
4045 char ord1 = src[i + 1];
4046 if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
4047 char ord2 = src[i + 2];
4048 if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
4049 char bytes[3] = { (char)ord1, (char)ord2, 0 };
4050 res += (char)strtol(bytes, nullptr, 16);
4051 i += 2;
4052 }
4053 } else {
4054 res += src[i];
4055 }
4056 } else if (src[i] == '+') {
4057 res += ' ';
4058 } else {
4059 res += src[i];
4060 }
4061 }
4062 return String::utf8(res);
4063}
4064
4065String String::c_unescape() const {
4066 String escaped = *this;
4067 escaped = escaped.replace("\\a", "\a");
4068 escaped = escaped.replace("\\b", "\b");
4069 escaped = escaped.replace("\\f", "\f");
4070 escaped = escaped.replace("\\n", "\n");
4071 escaped = escaped.replace("\\r", "\r");
4072 escaped = escaped.replace("\\t", "\t");
4073 escaped = escaped.replace("\\v", "\v");
4074 escaped = escaped.replace("\\'", "\'");
4075 escaped = escaped.replace("\\\"", "\"");
4076 escaped = escaped.replace("\\\\", "\\");
4077
4078 return escaped;
4079}
4080
4081String String::c_escape() const {
4082 String escaped = *this;
4083 escaped = escaped.replace("\\", "\\\\");
4084 escaped = escaped.replace("\a", "\\a");
4085 escaped = escaped.replace("\b", "\\b");
4086 escaped = escaped.replace("\f", "\\f");
4087 escaped = escaped.replace("\n", "\\n");
4088 escaped = escaped.replace("\r", "\\r");
4089 escaped = escaped.replace("\t", "\\t");
4090 escaped = escaped.replace("\v", "\\v");
4091 escaped = escaped.replace("\'", "\\'");
4092 escaped = escaped.replace("\"", "\\\"");
4093
4094 return escaped;
4095}
4096
4097String String::c_escape_multiline() const {
4098 String escaped = *this;
4099 escaped = escaped.replace("\\", "\\\\");
4100 escaped = escaped.replace("\"", "\\\"");
4101
4102 return escaped;
4103}
4104
4105String String::json_escape() const {
4106 String escaped = *this;
4107 escaped = escaped.replace("\\", "\\\\");
4108 escaped = escaped.replace("\b", "\\b");
4109 escaped = escaped.replace("\f", "\\f");
4110 escaped = escaped.replace("\n", "\\n");
4111 escaped = escaped.replace("\r", "\\r");
4112 escaped = escaped.replace("\t", "\\t");
4113 escaped = escaped.replace("\v", "\\v");
4114 escaped = escaped.replace("\"", "\\\"");
4115
4116 return escaped;
4117}
4118
4119String String::xml_escape(bool p_escape_quotes) const {
4120 String str = *this;
4121 str = str.replace("&", "&amp;");
4122 str = str.replace("<", "&lt;");
4123 str = str.replace(">", "&gt;");
4124 if (p_escape_quotes) {
4125 str = str.replace("'", "&apos;");
4126 str = str.replace("\"", "&quot;");
4127 }
4128 /*
4129for (int i=1;i<32;i++) {
4130 char chr[2]={i,0};
4131 str=str.replace(chr,"&#"+String::num(i)+";");
4132}*/
4133 return str;
4134}
4135
4136static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, char32_t *p_dst) {
4137 int len = 0;
4138 while (p_src_len) {
4139 if (*p_src == '&') {
4140 int eat = 0;
4141
4142 if (p_src_len >= 4 && p_src[1] == '#') {
4143 char32_t c = 0;
4144 bool overflow = false;
4145 if (p_src[2] == 'x') {
4146 // Hex entity &#x<num>;
4147 for (int i = 3; i < p_src_len; i++) {
4148 eat = i + 1;
4149 char32_t ct = p_src[i];
4150 if (ct == ';') {
4151 break;
4152 } else if (is_digit(ct)) {
4153 ct = ct - '0';
4154 } else if (ct >= 'a' && ct <= 'f') {
4155 ct = (ct - 'a') + 10;
4156 } else if (ct >= 'A' && ct <= 'F') {
4157 ct = (ct - 'A') + 10;
4158 } else {
4159 break;
4160 }
4161 if (c > (UINT32_MAX >> 4)) {
4162 overflow = true;
4163 break;
4164 }
4165 c <<= 4;
4166 c |= ct;
4167 }
4168 } else {
4169 // Decimal entity &#<num>;
4170 for (int i = 2; i < p_src_len; i++) {
4171 eat = i + 1;
4172 char32_t ct = p_src[i];
4173 if (ct == ';' || !is_digit(ct)) {
4174 break;
4175 }
4176 }
4177 if (p_src[eat - 1] == ';') {
4178 int64_t val = String::to_int(p_src + 2, eat - 3);
4179 if (val > 0 && val <= UINT32_MAX) {
4180 c = (char32_t)val;
4181 } else {
4182 overflow = true;
4183 }
4184 }
4185 }
4186
4187 // Value must be non-zero, in the range of char32_t,
4188 // actually end with ';'. If invalid, leave the entity as-is
4189 if (c == '\0' || overflow || p_src[eat - 1] != ';') {
4190 eat = 1;
4191 c = *p_src;
4192 }
4193 if (p_dst) {
4194 *p_dst = c;
4195 }
4196
4197 } else if (p_src_len >= 4 && p_src[1] == 'g' && p_src[2] == 't' && p_src[3] == ';') {
4198 if (p_dst) {
4199 *p_dst = '>';
4200 }
4201 eat = 4;
4202 } else if (p_src_len >= 4 && p_src[1] == 'l' && p_src[2] == 't' && p_src[3] == ';') {
4203 if (p_dst) {
4204 *p_dst = '<';
4205 }
4206 eat = 4;
4207 } else if (p_src_len >= 5 && p_src[1] == 'a' && p_src[2] == 'm' && p_src[3] == 'p' && p_src[4] == ';') {
4208 if (p_dst) {
4209 *p_dst = '&';
4210 }
4211 eat = 5;
4212 } else if (p_src_len >= 6 && p_src[1] == 'q' && p_src[2] == 'u' && p_src[3] == 'o' && p_src[4] == 't' && p_src[5] == ';') {
4213 if (p_dst) {
4214 *p_dst = '"';
4215 }
4216 eat = 6;
4217 } else if (p_src_len >= 6 && p_src[1] == 'a' && p_src[2] == 'p' && p_src[3] == 'o' && p_src[4] == 's' && p_src[5] == ';') {
4218 if (p_dst) {
4219 *p_dst = '\'';
4220 }
4221 eat = 6;
4222 } else {
4223 if (p_dst) {
4224 *p_dst = *p_src;
4225 }
4226 eat = 1;
4227 }
4228
4229 if (p_dst) {
4230 p_dst++;
4231 }
4232
4233 len++;
4234 p_src += eat;
4235 p_src_len -= eat;
4236 } else {
4237 if (p_dst) {
4238 *p_dst = *p_src;
4239 p_dst++;
4240 }
4241 len++;
4242 p_src++;
4243 p_src_len--;
4244 }
4245 }
4246
4247 return len;
4248}
4249
4250String String::xml_unescape() const {
4251 String str;
4252 int l = length();
4253 int len = _xml_unescape(get_data(), l, nullptr);
4254 if (len == 0) {
4255 return String();
4256 }
4257 str.resize(len + 1);
4258 _xml_unescape(get_data(), l, str.ptrw());
4259 str[len] = 0;
4260 return str;
4261}
4262
4263String String::pad_decimals(int p_digits) const {
4264 String s = *this;
4265 int c = s.find(".");
4266
4267 if (c == -1) {
4268 if (p_digits <= 0) {
4269 return s;
4270 }
4271 s += ".";
4272 c = s.length() - 1;
4273 } else {
4274 if (p_digits <= 0) {
4275 return s.substr(0, c);
4276 }
4277 }
4278
4279 if (s.length() - (c + 1) > p_digits) {
4280 return s.substr(0, c + p_digits + 1);
4281 } else {
4282 int zeros_to_add = p_digits - s.length() + (c + 1);
4283 return s + String("0").repeat(zeros_to_add);
4284 }
4285}
4286
4287String String::pad_zeros(int p_digits) const {
4288 String s = *this;
4289 int end = s.find(".");
4290
4291 if (end == -1) {
4292 end = s.length();
4293 }
4294
4295 if (end == 0) {
4296 return s;
4297 }
4298
4299 int begin = 0;
4300
4301 while (begin < end && !is_digit(s[begin])) {
4302 begin++;
4303 }
4304
4305 int zeros_to_add = p_digits - (end - begin);
4306
4307 if (zeros_to_add <= 0) {
4308 return s;
4309 } else {
4310 return s.insert(begin, String("0").repeat(zeros_to_add));
4311 }
4312}
4313
4314String String::trim_prefix(const String &p_prefix) const {
4315 String s = *this;
4316 if (s.begins_with(p_prefix)) {
4317 return s.substr(p_prefix.length(), s.length() - p_prefix.length());
4318 }
4319 return s;
4320}
4321
4322String String::trim_suffix(const String &p_suffix) const {
4323 String s = *this;
4324 if (s.ends_with(p_suffix)) {
4325 return s.substr(0, s.length() - p_suffix.length());
4326 }
4327 return s;
4328}
4329
4330bool String::is_valid_int() const {
4331 int len = length();
4332
4333 if (len == 0) {
4334 return false;
4335 }
4336
4337 int from = 0;
4338 if (len != 1 && (operator[](0) == '+' || operator[](0) == '-')) {
4339 from++;
4340 }
4341
4342 for (int i = from; i < len; i++) {
4343 if (!is_digit(operator[](i))) {
4344 return false; // no start with number plz
4345 }
4346 }
4347
4348 return true;
4349}
4350
4351bool String::is_valid_hex_number(bool p_with_prefix) const {
4352 int len = length();
4353
4354 if (len == 0) {
4355 return false;
4356 }
4357
4358 int from = 0;
4359 if (len != 1 && (operator[](0) == '+' || operator[](0) == '-')) {
4360 from++;
4361 }
4362
4363 if (p_with_prefix) {
4364 if (len < 3) {
4365 return false;
4366 }
4367 if (operator[](from) != '0' || operator[](from + 1) != 'x') {
4368 return false;
4369 }
4370 from += 2;
4371 }
4372
4373 for (int i = from; i < len; i++) {
4374 char32_t c = operator[](i);
4375 if (is_hex_digit(c)) {
4376 continue;
4377 }
4378 return false;
4379 }
4380
4381 return true;
4382}
4383
4384bool String::is_valid_float() const {
4385 int len = length();
4386
4387 if (len == 0) {
4388 return false;
4389 }
4390
4391 int from = 0;
4392 if (operator[](0) == '+' || operator[](0) == '-') {
4393 from++;
4394 }
4395
4396 bool exponent_found = false;
4397 bool period_found = false;
4398 bool sign_found = false;
4399 bool exponent_values_found = false;
4400 bool numbers_found = false;
4401
4402 for (int i = from; i < len; i++) {
4403 if (is_digit(operator[](i))) {
4404 if (exponent_found) {
4405 exponent_values_found = true;
4406 } else {
4407 numbers_found = true;
4408 }
4409 } else if (numbers_found && !exponent_found && operator[](i) == 'e') {
4410 exponent_found = true;
4411 } else if (!period_found && !exponent_found && operator[](i) == '.') {
4412 period_found = true;
4413 } else if ((operator[](i) == '-' || operator[](i) == '+') && exponent_found && !exponent_values_found && !sign_found) {
4414 sign_found = true;
4415 } else {
4416 return false; // no start with number plz
4417 }
4418 }
4419
4420 return numbers_found;
4421}
4422
4423String String::path_to_file(const String &p_path) const {
4424 // Don't get base dir for src, this is expected to be a dir already.
4425 String src = this->replace("\\", "/");
4426 String dst = p_path.replace("\\", "/").get_base_dir();
4427 String rel = src.path_to(dst);
4428 if (rel == dst) { // failed
4429 return p_path;
4430 } else {
4431 return rel + p_path.get_file();
4432 }
4433}
4434
4435String String::path_to(const String &p_path) const {
4436 String src = this->replace("\\", "/");
4437 String dst = p_path.replace("\\", "/");
4438 if (!src.ends_with("/")) {
4439 src += "/";
4440 }
4441 if (!dst.ends_with("/")) {
4442 dst += "/";
4443 }
4444
4445 if (src.begins_with("res://") && dst.begins_with("res://")) {
4446 src = src.replace("res://", "/");
4447 dst = dst.replace("res://", "/");
4448
4449 } else if (src.begins_with("user://") && dst.begins_with("user://")) {
4450 src = src.replace("user://", "/");
4451 dst = dst.replace("user://", "/");
4452
4453 } else if (src.begins_with("/") && dst.begins_with("/")) {
4454 //nothing
4455 } else {
4456 //dos style
4457 String src_begin = src.get_slicec('/', 0);
4458 String dst_begin = dst.get_slicec('/', 0);
4459
4460 if (src_begin != dst_begin) {
4461 return p_path; //impossible to do this
4462 }
4463
4464 src = src.substr(src_begin.length(), src.length());
4465 dst = dst.substr(dst_begin.length(), dst.length());
4466 }
4467
4468 //remove leading and trailing slash and split
4469 Vector<String> src_dirs = src.substr(1, src.length() - 2).split("/");
4470 Vector<String> dst_dirs = dst.substr(1, dst.length() - 2).split("/");
4471
4472 //find common parent
4473 int common_parent = 0;
4474
4475 while (true) {
4476 if (src_dirs.size() == common_parent) {
4477 break;
4478 }
4479 if (dst_dirs.size() == common_parent) {
4480 break;
4481 }
4482 if (src_dirs[common_parent] != dst_dirs[common_parent]) {
4483 break;
4484 }
4485 common_parent++;
4486 }
4487
4488 common_parent--;
4489
4490 int dirs_to_backtrack = (src_dirs.size() - 1) - common_parent;
4491 String dir = String("../").repeat(dirs_to_backtrack);
4492
4493 for (int i = common_parent + 1; i < dst_dirs.size(); i++) {
4494 dir += dst_dirs[i] + "/";
4495 }
4496
4497 if (dir.length() == 0) {
4498 dir = "./";
4499 }
4500 return dir;
4501}
4502
4503bool String::is_valid_html_color() const {
4504 return Color::html_is_valid(*this);
4505}
4506
4507// Changes made to the set of invalid filename characters must also be reflected in the String documentation for is_valid_filename.
4508static const char *invalid_filename_characters = ": / \\ ? * \" | % < >";
4509
4510bool String::is_valid_filename() const {
4511 String stripped = strip_edges();
4512 if (*this != stripped) {
4513 return false;
4514 }
4515
4516 if (stripped.is_empty()) {
4517 return false;
4518 }
4519
4520 Vector<String> chars = String(invalid_filename_characters).split(" ");
4521 for (const String &ch : chars) {
4522 if (contains(ch)) {
4523 return false;
4524 }
4525 }
4526 return true;
4527}
4528
4529String String::validate_filename() const {
4530 Vector<String> chars = String(invalid_filename_characters).split(" ");
4531 String name = strip_edges();
4532 for (int i = 0; i < chars.size(); i++) {
4533 name = name.replace(chars[i], "_");
4534 }
4535 return name;
4536}
4537
4538bool String::is_valid_ip_address() const {
4539 if (find(":") >= 0) {
4540 Vector<String> ip = split(":");
4541 for (int i = 0; i < ip.size(); i++) {
4542 String n = ip[i];
4543 if (n.is_empty()) {
4544 continue;
4545 }
4546 if (n.is_valid_hex_number(false)) {
4547 int64_t nint = n.hex_to_int();
4548 if (nint < 0 || nint > 0xffff) {
4549 return false;
4550 }
4551 continue;
4552 }
4553 if (!n.is_valid_ip_address()) {
4554 return false;
4555 }
4556 }
4557
4558 } else {
4559 Vector<String> ip = split(".");
4560 if (ip.size() != 4) {
4561 return false;
4562 }
4563 for (int i = 0; i < ip.size(); i++) {
4564 String n = ip[i];
4565 if (!n.is_valid_int()) {
4566 return false;
4567 }
4568 int val = n.to_int();
4569 if (val < 0 || val > 255) {
4570 return false;
4571 }
4572 }
4573 }
4574
4575 return true;
4576}
4577
4578bool String::is_resource_file() const {
4579 return begins_with("res://") && find("::") == -1;
4580}
4581
4582bool String::is_relative_path() const {
4583 return !is_absolute_path();
4584}
4585
4586String String::get_base_dir() const {
4587 int end = 0;
4588
4589 // URL scheme style base.
4590 int basepos = find("://");
4591 if (basepos != -1) {
4592 end = basepos + 3;
4593 }
4594
4595 // Windows top level directory base.
4596 if (end == 0) {
4597 basepos = find(":/");
4598 if (basepos == -1) {
4599 basepos = find(":\\");
4600 }
4601 if (basepos != -1) {
4602 end = basepos + 2;
4603 }
4604 }
4605
4606 // Windows UNC network share path.
4607 if (end == 0) {
4608 if (is_network_share_path()) {
4609 basepos = find("/", 2);
4610 if (basepos == -1) {
4611 basepos = find("\\", 2);
4612 }
4613 int servpos = find("/", basepos + 1);
4614 if (servpos == -1) {
4615 servpos = find("\\", basepos + 1);
4616 }
4617 if (servpos != -1) {
4618 end = servpos + 1;
4619 }
4620 }
4621 }
4622
4623 // Unix root directory base.
4624 if (end == 0) {
4625 if (begins_with("/")) {
4626 end = 1;
4627 }
4628 }
4629
4630 String rs;
4631 String base;
4632 if (end != 0) {
4633 rs = substr(end, length());
4634 base = substr(0, end);
4635 } else {
4636 rs = *this;
4637 }
4638
4639 int sep = MAX(rs.rfind("/"), rs.rfind("\\"));
4640 if (sep == -1) {
4641 return base;
4642 }
4643
4644 return base + rs.substr(0, sep);
4645}
4646
4647String String::get_file() const {
4648 int sep = MAX(rfind("/"), rfind("\\"));
4649 if (sep == -1) {
4650 return *this;
4651 }
4652
4653 return substr(sep + 1, length());
4654}
4655
4656String String::get_extension() const {
4657 int pos = rfind(".");
4658 if (pos < 0 || pos < MAX(rfind("/"), rfind("\\"))) {
4659 return "";
4660 }
4661
4662 return substr(pos + 1, length());
4663}
4664
4665String String::path_join(const String &p_file) const {
4666 if (is_empty()) {
4667 return p_file;
4668 }
4669 if (operator[](length() - 1) == '/' || (p_file.size() > 0 && p_file.operator[](0) == '/')) {
4670 return *this + p_file;
4671 }
4672 return *this + "/" + p_file;
4673}
4674
4675String String::property_name_encode() const {
4676 // Escape and quote strings with extended ASCII or further Unicode characters
4677 // as well as '"', '=' or ' ' (32)
4678 const char32_t *cstr = get_data();
4679 for (int i = 0; cstr[i]; i++) {
4680 if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
4681 return "\"" + c_escape_multiline() + "\"";
4682 }
4683 }
4684 // Keep as is
4685 return *this;
4686}
4687
4688// Changes made to the set of invalid characters must also be reflected in the String documentation.
4689
4690static const char32_t invalid_node_name_characters[] = { '.', ':', '@', '/', '\"', UNIQUE_NODE_PREFIX[0], 0 };
4691
4692String String::get_invalid_node_name_characters() {
4693 // Do not use this function for critical validation.
4694 String r;
4695 const char32_t *c = invalid_node_name_characters;
4696 while (*c) {
4697 if (c != invalid_node_name_characters) {
4698 r += " ";
4699 }
4700 r += String::chr(*c);
4701 c++;
4702 }
4703 return r;
4704}
4705
4706String String::validate_node_name() const {
4707 // This is a critical validation in node addition, so it must be optimized.
4708 const char32_t *cn = ptr();
4709 if (cn == nullptr) {
4710 return String();
4711 }
4712 bool valid = true;
4713 uint32_t idx = 0;
4714 while (cn[idx]) {
4715 const char32_t *c = invalid_node_name_characters;
4716 while (*c) {
4717 if (cn[idx] == *c) {
4718 valid = false;
4719 break;
4720 }
4721 c++;
4722 }
4723 if (!valid) {
4724 break;
4725 }
4726 idx++;
4727 }
4728
4729 if (valid) {
4730 return *this;
4731 }
4732
4733 String validated = *this;
4734 char32_t *nn = validated.ptrw();
4735 while (nn[idx]) {
4736 const char32_t *c = invalid_node_name_characters;
4737 while (*c) {
4738 if (nn[idx] == *c) {
4739 nn[idx] = '_';
4740 break;
4741 }
4742 c++;
4743 }
4744 idx++;
4745 }
4746
4747 return validated;
4748}
4749
4750String String::get_basename() const {
4751 int pos = rfind(".");
4752 if (pos < 0 || pos < MAX(rfind("/"), rfind("\\"))) {
4753 return *this;
4754 }
4755
4756 return substr(0, pos);
4757}
4758
4759String itos(int64_t p_val) {
4760 return String::num_int64(p_val);
4761}
4762
4763String uitos(uint64_t p_val) {
4764 return String::num_uint64(p_val);
4765}
4766
4767String rtos(double p_val) {
4768 return String::num(p_val);
4769}
4770
4771String rtoss(double p_val) {
4772 return String::num_scientific(p_val);
4773}
4774
4775// Right-pad with a character.
4776String String::rpad(int min_length, const String &character) const {
4777 String s = *this;
4778 int padding = min_length - s.length();
4779 if (padding > 0) {
4780 s += character.repeat(padding);
4781 }
4782 return s;
4783}
4784
4785// Left-pad with a character.
4786String String::lpad(int min_length, const String &character) const {
4787 String s = *this;
4788 int padding = min_length - s.length();
4789 if (padding > 0) {
4790 s = character.repeat(padding) + s;
4791 }
4792 return s;
4793}
4794
4795// sprintf is implemented in GDScript via:
4796// "fish %s pie" % "frog"
4797// "fish %s %d pie" % ["frog", 12]
4798// In case of an error, the string returned is the error description and "error" is true.
4799String String::sprintf(const Array &values, bool *error) const {
4800 String formatted;
4801 char32_t *self = (char32_t *)get_data();
4802 bool in_format = false;
4803 int value_index = 0;
4804 int min_chars = 0;
4805 int min_decimals = 0;
4806 bool in_decimals = false;
4807 bool pad_with_zeros = false;
4808 bool left_justified = false;
4809 bool show_sign = false;
4810
4811 if (error) {
4812 *error = true;
4813 }
4814
4815 for (; *self; self++) {
4816 const char32_t c = *self;
4817
4818 if (in_format) { // We have % - let's see what else we get.
4819 switch (c) {
4820 case '%': { // Replace %% with %
4821 formatted += chr(c);
4822 in_format = false;
4823 break;
4824 }
4825 case 'd': // Integer (signed)
4826 case 'o': // Octal
4827 case 'x': // Hexadecimal (lowercase)
4828 case 'X': { // Hexadecimal (uppercase)
4829 if (value_index >= values.size()) {
4830 return "not enough arguments for format string";
4831 }
4832
4833 if (!values[value_index].is_num()) {
4834 return "a number is required";
4835 }
4836
4837 int64_t value = values[value_index];
4838 int base = 16;
4839 bool capitalize = false;
4840 switch (c) {
4841 case 'd':
4842 base = 10;
4843 break;
4844 case 'o':
4845 base = 8;
4846 break;
4847 case 'x':
4848 break;
4849 case 'X':
4850 base = 16;
4851 capitalize = true;
4852 break;
4853 }
4854 // Get basic number.
4855 String str = String::num_int64(ABS(value), base, capitalize);
4856 int number_len = str.length();
4857
4858 // Padding.
4859 int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars;
4860 String pad_char = pad_with_zeros ? String("0") : String(" ");
4861 if (left_justified) {
4862 str = str.rpad(pad_chars_count, pad_char);
4863 } else {
4864 str = str.lpad(pad_chars_count, pad_char);
4865 }
4866
4867 // Sign.
4868 if (show_sign || value < 0) {
4869 String sign_char = value < 0 ? "-" : "+";
4870 if (left_justified) {
4871 str = str.insert(0, sign_char);
4872 } else {
4873 str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
4874 }
4875 }
4876
4877 formatted += str;
4878 ++value_index;
4879 in_format = false;
4880
4881 break;
4882 }
4883 case 'f': { // Float
4884 if (value_index >= values.size()) {
4885 return "not enough arguments for format string";
4886 }
4887
4888 if (!values[value_index].is_num()) {
4889 return "a number is required";
4890 }
4891
4892 double value = values[value_index];
4893 bool is_negative = signbit(value);
4894 String str = String::num(Math::abs(value), min_decimals);
4895 const bool is_finite = Math::is_finite(value);
4896
4897 // Pad decimals out.
4898 if (is_finite) {
4899 str = str.pad_decimals(min_decimals);
4900 }
4901
4902 int initial_len = str.length();
4903
4904 // Padding. Leave room for sign later if required.
4905 int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
4906 String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros
4907 if (left_justified) {
4908 str = str.rpad(pad_chars_count, pad_char);
4909 } else {
4910 str = str.lpad(pad_chars_count, pad_char);
4911 }
4912
4913 // Add sign if needed.
4914 if (show_sign || is_negative) {
4915 String sign_char = is_negative ? "-" : "+";
4916 if (left_justified) {
4917 str = str.insert(0, sign_char);
4918 } else {
4919 str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
4920 }
4921 }
4922
4923 formatted += str;
4924 ++value_index;
4925 in_format = false;
4926 break;
4927 }
4928 case 'v': { // Vector2/3/4/2i/3i/4i
4929 if (value_index >= values.size()) {
4930 return "not enough arguments for format string";
4931 }
4932
4933 int count;
4934 switch (values[value_index].get_type()) {
4935 case Variant::VECTOR2:
4936 case Variant::VECTOR2I: {
4937 count = 2;
4938 } break;
4939 case Variant::VECTOR3:
4940 case Variant::VECTOR3I: {
4941 count = 3;
4942 } break;
4943 case Variant::VECTOR4:
4944 case Variant::VECTOR4I: {
4945 count = 4;
4946 } break;
4947 default: {
4948 return "%v requires a vector type (Vector2/3/4/2i/3i/4i)";
4949 }
4950 }
4951
4952 Vector4 vec = values[value_index];
4953 String str = "(";
4954 for (int i = 0; i < count; i++) {
4955 double val = vec[i];
4956 String number_str = String::num(Math::abs(val), min_decimals);
4957 const bool is_finite = Math::is_finite(val);
4958
4959 // Pad decimals out.
4960 if (is_finite) {
4961 number_str = number_str.pad_decimals(min_decimals);
4962 }
4963
4964 int initial_len = number_str.length();
4965
4966 // Padding. Leave room for sign later if required.
4967 int pad_chars_count = val < 0 ? min_chars - 1 : min_chars;
4968 String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros
4969 if (left_justified) {
4970 number_str = number_str.rpad(pad_chars_count, pad_char);
4971 } else {
4972 number_str = number_str.lpad(pad_chars_count, pad_char);
4973 }
4974
4975 // Add sign if needed.
4976 if (val < 0) {
4977 if (left_justified) {
4978 number_str = number_str.insert(0, "-");
4979 } else {
4980 number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, "-");
4981 }
4982 }
4983
4984 // Add number to combined string
4985 str += number_str;
4986
4987 if (i < count - 1) {
4988 str += ", ";
4989 }
4990 }
4991 str += ")";
4992
4993 formatted += str;
4994 ++value_index;
4995 in_format = false;
4996 break;
4997 }
4998 case 's': { // String
4999 if (value_index >= values.size()) {
5000 return "not enough arguments for format string";
5001 }
5002
5003 String str = values[value_index];
5004 // Padding.
5005 if (left_justified) {
5006 str = str.rpad(min_chars);
5007 } else {
5008 str = str.lpad(min_chars);
5009 }
5010
5011 formatted += str;
5012 ++value_index;
5013 in_format = false;
5014 break;
5015 }
5016 case 'c': {
5017 if (value_index >= values.size()) {
5018 return "not enough arguments for format string";
5019 }
5020
5021 // Convert to character.
5022 String str;
5023 if (values[value_index].is_num()) {
5024 int value = values[value_index];
5025 if (value < 0) {
5026 return "unsigned integer is lower than minimum";
5027 } else if (value >= 0xd800 && value <= 0xdfff) {
5028 return "unsigned integer is invalid Unicode character";
5029 } else if (value > 0x10ffff) {
5030 return "unsigned integer is greater than maximum";
5031 }
5032 str = chr(values[value_index]);
5033 } else if (values[value_index].get_type() == Variant::STRING) {
5034 str = values[value_index];
5035 if (str.length() != 1) {
5036 return "%c requires number or single-character string";
5037 }
5038 } else {
5039 return "%c requires number or single-character string";
5040 }
5041
5042 // Padding.
5043 if (left_justified) {
5044 str = str.rpad(min_chars);
5045 } else {
5046 str = str.lpad(min_chars);
5047 }
5048
5049 formatted += str;
5050 ++value_index;
5051 in_format = false;
5052 break;
5053 }
5054 case '-': { // Left justify
5055 left_justified = true;
5056 break;
5057 }
5058 case '+': { // Show + if positive.
5059 show_sign = true;
5060 break;
5061 }
5062 case '0':
5063 case '1':
5064 case '2':
5065 case '3':
5066 case '4':
5067 case '5':
5068 case '6':
5069 case '7':
5070 case '8':
5071 case '9': {
5072 int n = c - '0';
5073 if (in_decimals) {
5074 min_decimals *= 10;
5075 min_decimals += n;
5076 } else {
5077 if (c == '0' && min_chars == 0) {
5078 if (left_justified) {
5079 WARN_PRINT("'0' flag ignored with '-' flag in string format");
5080 } else {
5081 pad_with_zeros = true;
5082 }
5083 } else {
5084 min_chars *= 10;
5085 min_chars += n;
5086 }
5087 }
5088 break;
5089 }
5090 case '.': { // Float/Vector separator.
5091 if (in_decimals) {
5092 return "too many decimal points in format";
5093 }
5094 in_decimals = true;
5095 min_decimals = 0; // We want to add the value manually.
5096 break;
5097 }
5098
5099 case '*': { // Dynamic width, based on value.
5100 if (value_index >= values.size()) {
5101 return "not enough arguments for format string";
5102 }
5103
5104 Variant::Type value_type = values[value_index].get_type();
5105 if (!values[value_index].is_num() &&
5106 value_type != Variant::VECTOR2 && value_type != Variant::VECTOR2I &&
5107 value_type != Variant::VECTOR3 && value_type != Variant::VECTOR3I &&
5108 value_type != Variant::VECTOR4 && value_type != Variant::VECTOR4I) {
5109 return "* wants number or vector";
5110 }
5111
5112 int size = values[value_index];
5113
5114 if (in_decimals) {
5115 min_decimals = size;
5116 } else {
5117 min_chars = size;
5118 }
5119
5120 ++value_index;
5121 break;
5122 }
5123
5124 default: {
5125 return "unsupported format character";
5126 }
5127 }
5128 } else { // Not in format string.
5129 switch (c) {
5130 case '%':
5131 in_format = true;
5132 // Back to defaults:
5133 min_chars = 0;
5134 min_decimals = 6;
5135 pad_with_zeros = false;
5136 left_justified = false;
5137 show_sign = false;
5138 in_decimals = false;
5139 break;
5140 default:
5141 formatted += chr(c);
5142 }
5143 }
5144 }
5145
5146 if (in_format) {
5147 return "incomplete format";
5148 }
5149
5150 if (value_index != values.size()) {
5151 return "not all arguments converted during string formatting";
5152 }
5153
5154 if (error) {
5155 *error = false;
5156 }
5157 return formatted;
5158}
5159
5160String String::quote(String quotechar) const {
5161 return quotechar + *this + quotechar;
5162}
5163
5164String String::unquote() const {
5165 if (!is_quoted()) {
5166 return *this;
5167 }
5168
5169 return substr(1, length() - 2);
5170}
5171
5172Vector<uint8_t> String::to_ascii_buffer() const {
5173 const String *s = this;
5174 if (s->is_empty()) {
5175 return Vector<uint8_t>();
5176 }
5177 CharString charstr = s->ascii();
5178
5179 Vector<uint8_t> retval;
5180 size_t len = charstr.length();
5181 retval.resize(len);
5182 uint8_t *w = retval.ptrw();
5183 memcpy(w, charstr.ptr(), len);
5184
5185 return retval;
5186}
5187
5188Vector<uint8_t> String::to_utf8_buffer() const {
5189 const String *s = this;
5190 if (s->is_empty()) {
5191 return Vector<uint8_t>();
5192 }
5193 CharString charstr = s->utf8();
5194
5195 Vector<uint8_t> retval;
5196 size_t len = charstr.length();
5197 retval.resize(len);
5198 uint8_t *w = retval.ptrw();
5199 memcpy(w, charstr.ptr(), len);
5200
5201 return retval;
5202}
5203
5204Vector<uint8_t> String::to_utf16_buffer() const {
5205 const String *s = this;
5206 if (s->is_empty()) {
5207 return Vector<uint8_t>();
5208 }
5209 Char16String charstr = s->utf16();
5210
5211 Vector<uint8_t> retval;
5212 size_t len = charstr.length() * sizeof(char16_t);
5213 retval.resize(len);
5214 uint8_t *w = retval.ptrw();
5215 memcpy(w, (const void *)charstr.ptr(), len);
5216
5217 return retval;
5218}
5219
5220Vector<uint8_t> String::to_utf32_buffer() const {
5221 const String *s = this;
5222 if (s->is_empty()) {
5223 return Vector<uint8_t>();
5224 }
5225
5226 Vector<uint8_t> retval;
5227 size_t len = s->length() * sizeof(char32_t);
5228 retval.resize(len);
5229 uint8_t *w = retval.ptrw();
5230 memcpy(w, (const void *)s->ptr(), len);
5231
5232 return retval;
5233}
5234
5235Vector<uint8_t> String::to_wchar_buffer() const {
5236#ifdef WINDOWS_ENABLED
5237 return to_utf16_buffer();
5238#else
5239 return to_utf32_buffer();
5240#endif
5241}
5242
5243#ifdef TOOLS_ENABLED
5244/**
5245 * "Tools TRanslate". Performs string replacement for internationalization
5246 * within the editor. A translation context can optionally be specified to
5247 * disambiguate between identical source strings in translations. When
5248 * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
5249 * If a string mentions a quantity (and may therefore need a dynamic plural form),
5250 * use `TTRN()` instead of `TTR()`.
5251 *
5252 * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
5253 * For translations that can be supplied by exported projects, use `RTR()` instead.
5254 */
5255String TTR(const String &p_text, const String &p_context) {
5256 if (TranslationServer::get_singleton()) {
5257 return TranslationServer::get_singleton()->tool_translate(p_text, p_context);
5258 }
5259
5260 return p_text;
5261}
5262
5263/**
5264 * "Tools TRanslate for N items". Performs string replacement for
5265 * internationalization within the editor. A translation context can optionally
5266 * be specified to disambiguate between identical source strings in
5267 * translations. Use `TTR()` if the string doesn't need dynamic plural form.
5268 * When placeholders are desired, use
5269 * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
5270 * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
5271 *
5272 * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
5273 * For translations that can be supplied by exported projects, use `RTRN()` instead.
5274 */
5275String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5276 if (TranslationServer::get_singleton()) {
5277 return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
5278 }
5279
5280 // Return message based on English plural rule if translation is not possible.
5281 if (p_n == 1) {
5282 return p_text;
5283 }
5284 return p_text_plural;
5285}
5286
5287/**
5288 * "Docs TRanslate". Used for the editor class reference documentation,
5289 * handling descriptions extracted from the XML.
5290 * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
5291 * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
5292 */
5293String DTR(const String &p_text, const String &p_context) {
5294 // Comes straight from the XML, so remove indentation and any trailing whitespace.
5295 const String text = p_text.dedent().strip_edges();
5296
5297 if (TranslationServer::get_singleton()) {
5298 return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
5299 }
5300
5301 return text.replace("$DOCS_URL", VERSION_DOCS_URL);
5302}
5303
5304/**
5305 * "Docs TRanslate for N items". Used for the editor class reference documentation
5306 * (with support for plurals), handling descriptions extracted from the XML.
5307 * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
5308 * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
5309 */
5310String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5311 const String text = p_text.dedent().strip_edges();
5312 const String text_plural = p_text_plural.dedent().strip_edges();
5313
5314 if (TranslationServer::get_singleton()) {
5315 return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
5316 }
5317
5318 // Return message based on English plural rule if translation is not possible.
5319 if (p_n == 1) {
5320 return text.replace("$DOCS_URL", VERSION_DOCS_URL);
5321 }
5322 return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL);
5323}
5324#endif
5325
5326/**
5327 * "Run-time TRanslate". Performs string replacement for internationalization
5328 * within a running project. The translation string must be supplied by the
5329 * project, as Godot does not provide built-in translations for `RTR()` strings
5330 * to keep binary size low. A translation context can optionally be specified to
5331 * disambiguate between identical source strings in translations. When
5332 * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
5333 * If a string mentions a quantity (and may therefore need a dynamic plural form),
5334 * use `RTRN()` instead of `RTR()`.
5335 *
5336 * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
5337 * folder). For editor translations, use `TTR()` instead.
5338 */
5339String RTR(const String &p_text, const String &p_context) {
5340 if (TranslationServer::get_singleton()) {
5341 String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context);
5342 if (rtr.is_empty() || rtr == p_text) {
5343 return TranslationServer::get_singleton()->translate(p_text, p_context);
5344 } else {
5345 return rtr;
5346 }
5347 }
5348
5349 return p_text;
5350}
5351
5352/**
5353 * "Run-time TRanslate for N items". Performs string replacement for
5354 * internationalization within a running project. The translation string must be
5355 * supplied by the project, as Godot does not provide built-in translations for
5356 * `RTRN()` strings to keep binary size low. A translation context can
5357 * optionally be specified to disambiguate between identical source strings in
5358 * translations. Use `RTR()` if the string doesn't need dynamic plural form.
5359 * When placeholders are desired, use
5360 * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
5361 * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
5362 *
5363 * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
5364 * folder). For editor translations, use `TTRN()` instead.
5365 */
5366String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5367 if (TranslationServer::get_singleton()) {
5368 String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
5369 if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) {
5370 return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
5371 } else {
5372 return rtr;
5373 }
5374 }
5375
5376 // Return message based on English plural rule if translation is not possible.
5377 if (p_n == 1) {
5378 return p_text;
5379 }
5380 return p_text_plural;
5381}
5382