unistr.cpp source code [Godot/thirdparty/icu4c/common/unistr.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	* Copyright (C) 1999-2016, International Business Machines Corporation and
6	* others. All Rights Reserved.
7	******************************************************************************
8	*
9	* File unistr.cpp
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 04/20/99 stephen Overhauled per 4/16 code review.
16	* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17	* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
18	* Replaceable.
19	* 06/25/01 grhoten Removed the dependency on iostream
20	******************************************************************************
21	*/
22
23	#include "unicode/utypes.h"
24	#include "unicode/appendable.h"
25	#include "unicode/putil.h"
26	#include "cstring.h"
27	#include "cmemory.h"
28	#include "unicode/ustring.h"
29	#include "unicode/unistr.h"
30	#include "unicode/utf.h"
31	#include "unicode/utf16.h"
32	#include "uelement.h"
33	#include "ustr_imp.h"
34	#include "umutex.h"
35	#include "uassert.h"
36
37	#if 0
38
39	#include <iostream>
40	using namespace std;
41
42	//DEBUGGING
43	void
44	print(const UnicodeString& s,
45	const char *name)
46	{
47	char16_t c;
48	cout << name << ":\|";
49	for(int i = `0`; i < s.length(); ++i) {
50	c = s[i];
51	if(c>= `0x007E` \|\| c < `0x0020`)
52	cout << "[0x" << hex << s[i] << "]";
53	else
54	cout << (char) s[i];
55	}
56	cout << `'\|'` << endl;
57	}
58
59	void
60	print(const char16_t *s,
61	int32_t len,
62	const char *name)
63	{
64	char16_t c;
65	cout << name << ":\|";
66	for(int i = `0`; i < len; ++i) {
67	c = s[i];
68	if(c>= `0x007E` \|\| c < `0x0020`)
69	cout << "[0x" << hex << s[i] << "]";
70	else
71	cout << (char) s[i];
72	}
73	cout << `'\|'` << endl;
74	}
75	// END DEBUGGING
76	#endif
77
78	// Local function definitions for now
79
80	// need to copy areas that may overlap
81	static
82	inline void
83	us_arrayCopy(const char16_t *src, int32_t srcStart,
84	char16_t *dst, int32_t dstStart, int32_t count)
85	{
86	if(count>`0`) {
87	uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88	}
89	}
90
91	// u_unescapeAt() callback to get a char16_t from a UnicodeString
92	U_CDECL_BEGIN
93	static char16_t U_CALLCONV
94	UnicodeString_charAt(int32_t offset, void *context) {
95	return ((icu::UnicodeString*) context)->charAt(offset);
96	}
97	U_CDECL_END
98
99	U_NAMESPACE_BEGIN
100
101	/ The Replaceable virtual destructor can't be defined in the header*
102	due to how AIX works with multiple definitions of virtual functions.
103	*/
104	Replaceable::~Replaceable() {}
105
106	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108	UnicodeString U_EXPORT2
109	operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110	return
111	UnicodeString (s1.length()+s2.length()+`1`, (UChar32)`0`, `0`).
112	append(s1).
113	append(s2);
114	}
115
116	//========================================
117	// Reference Counting functions, put at top of file so that optimizing compilers
118	// have a chance to automatically inline.
119	//========================================
120
121	void
122	UnicodeString::addRef() {
123	umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - `1`);
124	}
125
126	int32_t
127	UnicodeString::removeRef() {
128	return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - `1`);
129	}
130
131	int32_t
132	UnicodeString::refCount() const {
133	return umtx_loadAcquire(((u_atomic_int32_t )fUnion.fFields.fArray - `1`));
134	}
135
136	void
137	UnicodeString::releaseArray() {
138	if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == `0`) {
139	uprv_free((int32_t *)fUnion.fFields.fArray - `1`);
140	}
141	}
142
143
144
145	//========================================
146	// Constructors
147	//========================================
148
149	// The default constructor is inline in unistr.h.
150
151	UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152	fUnion.fFields.fLengthAndFlags = `0`;
153	if(count <= `0` \|\| (uint32_t)c > `0x10ffff`) {
154	// just allocate and do not do anything else
155	allocate(capacity);
156	} else if(c <= `0xffff`) {
157	int32_t length = count;
158	if(capacity < length) {
159	capacity = length;
160	}
161	if(allocate(capacity)) {
162	char16_t *array = getArrayStart();
163	char16_t unit = (char16_t)c;
164	for(int32_t i = `0`; i < length; ++i) {
165	array[i] = unit;
166	}
167	setLength(length);
168	}
169	} else { // supplementary code point, write surrogate pairs
170	if(count > (INT32_MAX / `2`)) {
171	// We would get more than 2G UChars.
172	allocate(capacity);
173	return;
174	}
175	int32_t length = count * `2`;
176	if(capacity < length) {
177	capacity = length;
178	}
179	if(allocate(capacity)) {
180	char16_t *array = getArrayStart();
181	char16_t lead = U16_LEAD(c);
182	char16_t trail = U16_TRAIL(c);
183	for(int32_t i = `0`; i < length; i += `2`) {
184	array[i] = lead;
185	array[i + `1`] = trail;
186	}
187	setLength(length);
188	}
189	}
190	}
191
192	UnicodeString::UnicodeString(char16_t ch) {
193	fUnion.fFields.fLengthAndFlags = kLength1 \| kShortString;
194	fUnion.fStackFields.fBuffer[`0`] = ch;
195	}
196
197	UnicodeString::UnicodeString(UChar32 ch) {
198	fUnion.fFields.fLengthAndFlags = kShortString;
199	int32_t i = `0`;
200	UBool isError = false;
201	U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202	// We test isError so that the compiler does not complain that we don't.
203	// If isError then i==0 which is what we want anyway.
204	if(!isError) {
205	setShortLength(i);
206	}
207	}
208
209	UnicodeString::UnicodeString(const char16_t *text) {
210	fUnion.fFields.fLengthAndFlags = kShortString;
211	doAppend(text, `0`, -`1`);
212	}
213
214	UnicodeString::UnicodeString(const char16_t *text,
215	int32_t textLength) {
216	fUnion.fFields.fLengthAndFlags = kShortString;
217	doAppend(text, `0`, textLength);
218	}
219
220	UnicodeString::UnicodeString(UBool isTerminated,
221	ConstChar16Ptr textPtr,
222	int32_t textLength) {
223	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224	const char16_t *text = textPtr;
225	if(text == nullptr) {
226	// treat as an empty string, do not alias
227	setToEmpty();
228	} else if(textLength < -`1` \|\|
229	(textLength == -`1` && !isTerminated) \|\|
230	(textLength >= `0` && isTerminated && text[textLength] != `0`)
231	) {
232	setToBogus();
233	} else {
234	if(textLength == -`1`) {
235	// text is terminated, or else it would have failed the above test
236	textLength = u_strlen(text);
237	}
238	setArray(const_cast<char16_t *>(text), textLength,
239	isTerminated ? textLength + `1` : textLength);
240	}
241	}
242
243	UnicodeString::UnicodeString(char16_t *buff,
244	int32_t buffLength,
245	int32_t buffCapacity) {
246	fUnion.fFields.fLengthAndFlags = kWritableAlias;
247	if(buff == nullptr) {
248	// treat as an empty string, do not alias
249	setToEmpty();
250	} else if(buffLength < -`1` \|\| buffCapacity < `0` \|\| buffLength > buffCapacity) {
251	setToBogus();
252	} else {
253	if(buffLength == -`1`) {
254	// fLength = u_strlen(buff); but do not look beyond buffCapacity
255	const char16_t p = buff, limit = buff + buffCapacity;
256	while(p != limit && *p != `0`) {
257	++p;
258	}
259	buffLength = (int32_t)(p - buff);
260	}
261	setArray(buff, buffLength, buffCapacity);
262	}
263	}
264
265	UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266	fUnion.fFields.fLengthAndFlags = kShortString;
267	if(src==nullptr) {
268	// treat as an empty string
269	} else {
270	if(length<`0`) {
271	length=(int32_t)uprv_strlen(src);
272	}
273	if(cloneArrayIfNeeded(length, length, false)) {
274	u_charsToUChars(src, getArrayStart(), length);
275	setLength(length);
276	} else {
277	setToBogus();
278	}
279	}
280	}
281
282	#if U_CHARSET_IS_UTF8
283
284	UnicodeString::UnicodeString(const char *codepageData) {
285	fUnion.fFields.fLengthAndFlags = kShortString;
286	if(codepageData != `0`) {
287	setToUTF8(codepageData);
288	}
289	}
290
291	UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292	fUnion.fFields.fLengthAndFlags = kShortString;
293	// if there's nothing to convert, do nothing
294	if(codepageData == `0` \|\| dataLength == `0` \|\| dataLength < -`1`) {
295	return;
296	}
297	if(dataLength == -`1`) {
298	dataLength = (int32_t)uprv_strlen(codepageData);
299	}
300	setToUTF8(StringPiece (codepageData, dataLength));
301	}
302
303	// else see unistr_cnv.cpp
304	#endif
305
306	UnicodeString::UnicodeString(const UnicodeString& that) {
307	fUnion.fFields.fLengthAndFlags = kShortString;
308	copyFrom(that);
309	}
310
311	UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
312	copyFieldsFrom(src, true);
313	}
314
315	UnicodeString::UnicodeString(const UnicodeString& that,
316	int32_t srcStart) {
317	fUnion.fFields.fLengthAndFlags = kShortString;
318	setTo(that, srcStart);
319	}
320
321	UnicodeString::UnicodeString(const UnicodeString& that,
322	int32_t srcStart,
323	int32_t srcLength) {
324	fUnion.fFields.fLengthAndFlags = kShortString;
325	setTo(that, srcStart, srcLength);
326	}
327
328	// Replaceable base class clone() default implementation, does not clone
329	Replaceable *
330	Replaceable::clone() const {
331	return nullptr;
332	}
333
334	// UnicodeString overrides clone() with a real implementation
335	UnicodeString *
336	UnicodeString::clone() const {
337	LocalPointer<UnicodeString> clonedString(new UnicodeString (*this));
338	return clonedString.isValid() && !clonedString ->isBogus() ? clonedString.orphan() : nullptr;
339	}
340
341	//========================================
342	// array allocation
343	//========================================
344
345	namespace {
346
347	const int32_t kGrowSize = `128`;
348
349	// The number of bytes for one int32_t reference counter and capacity UChars
350	// must fit into a 32-bit size_t (at least when on a 32-bit platform).
351	// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352	// and round up to a multiple of 16 bytes.
353	// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354	// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355	// but that does not seem worth it.)
356	const int32_t kMaxCapacity = `0x7ffffff5`;
357
358	int32_t getGrowCapacity(int32_t newLength) {
359	int32_t growSize = (newLength >> `2`) + kGrowSize;
360	if(growSize <= (kMaxCapacity - newLength)) {
361	return newLength + growSize;
362	} else {
363	return kMaxCapacity;
364	}
365	}
366
367	} // namespace
368
369	UBool
370	UnicodeString::allocate(int32_t capacity) {
371	if(capacity <= US_STACKBUF_SIZE) {
372	fUnion.fFields.fLengthAndFlags = kShortString;
373	return true;
374	}
375	if(capacity <= kMaxCapacity) {
376	++capacity; // for the NUL
377	// Switch to size_t which is unsigned so that we can allocate up to 4GB.
378	// Reference counter + UChars.
379	size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
380	// Round up to a multiple of 16.
381	numBytes = (numBytes + `15`) & ~`15`;
382	int32_t array = (int32_t ) uprv_malloc(numBytes);
383	if(array != nullptr) {
384	// set initial refCount and point behind the refCount
385	*array++ = `1`;
386	numBytes -= sizeof(int32_t);
387
388	// have fArray point to the first char16_t
389	fUnion.fFields.fArray = (char16_t *)array;
390	fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
391	fUnion.fFields.fLengthAndFlags = kLongString;
392	return true;
393	}
394	}
395	fUnion.fFields.fLengthAndFlags = kIsBogus;
396	fUnion.fFields.fArray = `0`;
397	fUnion.fFields.fCapacity = `0`;
398	return false;
399	}
400
401	//========================================
402	// Destructor
403	//========================================
404
405	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406	static u_atomic_int32_t finalLengthCounts[`0x400`]; // UnicodeString::kMaxShortLength+1
407	static u_atomic_int32_t beyondCount(`0`);
408
409	U_CAPI void unistr_printLengths() {
410	int32_t i;
411	for(i = `0`; i <= `59`; ++i) {
412	printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
413	}
414	int32_t beyond = beyondCount;
415	for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
416	beyond += finalLengthCounts[i];
417	}
418	printf(">59, %9d\n", beyond);
419	}
420	#endif
421
422	UnicodeString::~UnicodeString()
423	{
424	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425	// Count lengths of strings at the end of their lifetime.
426	// Useful for discussion of a desirable stack buffer size.
427	// Count the contents length, not the optional NUL terminator nor further capacity.
428	// Ignore open-buffer strings and strings which alias external storage.
429	if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kReadonlyAlias\|kWritableAlias)) == `0`) {
430	if(hasShortLength()) {
431	umtx_atomic_inc(finalLengthCounts + getShortLength());
432	} else {
433	umtx_atomic_inc(&beyondCount);
434	}
435	}
436	#endif
437
438	releaseArray();
439	}
440
441	//========================================
442	// Factory methods
443	//========================================
444
445	UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
446	UnicodeString result;
447	result.setToUTF8(utf8);
448	return result;
449	}
450
451	UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
452	UnicodeString result;
453	int32_t capacity;
454	// Most UTF-32 strings will be BMP-only and result in a same-length
455	// UTF-16 string. We overestimate the capacity just slightly,
456	// just in case there are a few supplementary characters.
457	if(length <= US_STACKBUF_SIZE) {
458	capacity = US_STACKBUF_SIZE;
459	} else {
460	capacity = length + (length >> `4`) + `4`;
461	}
462	do {
463	char16_t *utf16 = result.getBuffer(capacity);
464	int32_t length16;
465	UErrorCode errorCode = U_ZERO_ERROR;
466	u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
467	utf32, length,
468	`0xfffd`, // Substitution character.
469	nullptr, // Don't care about number of substitutions.
470	&errorCode);
471	result.releaseBuffer(length16);
472	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
473	capacity = length16 + `1`; // +1 for the terminating NUL.
474	continue;
475	} else if(U_FAILURE(errorCode)) {
476	result.setToBogus();
477	}
478	break;
479	} while(true);
480	return result;
481	}
482
483	//========================================
484	// Assignment
485	//========================================
486
487	UnicodeString &
488	UnicodeString::operator=(const UnicodeString &src) {
489	return copyFrom(src);
490	}
491
492	UnicodeString &
493	UnicodeString::fastCopyFrom(const UnicodeString &src) {
494	return copyFrom(src, true);
495	}
496
497	UnicodeString &
498	UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
499	// if assigning to ourselves, do nothing
500	if(this == &src) {
501	return *this;
502	}
503
504	// is the right side bogus?
505	if(src.isBogus()) {
506	setToBogus();
507	return *this;
508	}
509
510	// delete the current contents
511	releaseArray();
512
513	if(src.isEmpty()) {
514	// empty string - use the stack buffer
515	setToEmpty();
516	return *this;
517	}
518
519	// fLength>0 and not an "open" src.getBuffer(minCapacity)
520	fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
521	switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
522	case kShortString:
523	// short string using the stack buffer, do the same
524	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
525	getShortLength() * U_SIZEOF_UCHAR);
526	break;
527	case kLongString:
528	// src uses a refCounted string buffer, use that buffer with refCount
529	// src is const, use a cast - we don't actually change it
530	const_cast<UnicodeString &>(src).addRef();
531	// copy all fields, share the reference-counted buffer
532	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
533	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
534	if(!hasShortLength()) {
535	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
536	}
537	break;
538	case kReadonlyAlias:
539	if(fastCopy) {
540	// src is a readonly alias, do the same
541	// -> maintain the readonly alias as such
542	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
543	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
544	if(!hasShortLength()) {
545	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
546	}
547	break;
548	}
549	// else if(!fastCopy) fall through to case kWritableAlias
550	// -> allocate a new buffer and copy the contents
551	U_FALLTHROUGH;
552	case kWritableAlias: {
553	// src is a writable alias; we make a copy of that instead
554	int32_t srcLength = src.length();
555	if(allocate(srcLength)) {
556	u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
557	setLength(srcLength);
558	break;
559	}
560	// if there is not enough memory, then fall through to setting to bogus
561	U_FALLTHROUGH;
562	}
563	default:
564	// if src is bogus, set ourselves to bogus
565	// do not call setToBogus() here because fArray and flags are not consistent here
566	fUnion.fFields.fLengthAndFlags = kIsBogus;
567	fUnion.fFields.fArray = `0`;
568	fUnion.fFields.fCapacity = `0`;
569	break;
570	}
571
572	return *this;
573	}
574
575	UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
576	// No explicit check for self move assignment, consistent with standard library.
577	// Self move assignment causes no crash nor leak but might make the object bogus.
578	releaseArray();
579	copyFieldsFrom(src, true);
580	return *this;
581	}
582
583	// Same as move assignment except without memory management.
584	void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
585	int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
586	if(lengthAndFlags & kUsingStackBuffer) {
587	// Short string using the stack buffer, copy the contents.
588	// Check for self assignment to prevent "overlap in memcpy" warnings,
589	// although it should be harmless to copy a buffer to itself exactly.
590	if(this != &src) {
591	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
592	getShortLength() * U_SIZEOF_UCHAR);
593	}
594	} else {
595	// In all other cases, copy all fields.
596	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
597	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
598	if(!hasShortLength()) {
599	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
600	}
601	if(setSrcToBogus) {
602	// Set src to bogus without releasing any memory.
603	src.fUnion.fFields.fLengthAndFlags = kIsBogus;
604	src.fUnion.fFields.fArray = nullptr;
605	src.fUnion.fFields.fCapacity = `0`;
606	}
607	}
608	}
609
610	void UnicodeString::swap(UnicodeString &other) noexcept {
611	UnicodeString temp; // Empty short string: Known not to need releaseArray().
612	// Copy fields without resetting source values in between.
613	temp.copyFieldsFrom(*this, false);
614	this->copyFieldsFrom(other, false);
615	other.copyFieldsFrom(temp, false);
616	// Set temp to an empty string so that other's memory is not released twice.
617	temp.fUnion.fFields.fLengthAndFlags = kShortString;
618	}
619
620	//========================================
621	// Miscellaneous operations
622	//========================================
623
624	UnicodeString UnicodeString::unescape() const {
625	UnicodeString result(length(), (UChar32)`0`, (int32_t)`0`); // construct with capacity
626	if (result.isBogus()) {
627	return result;
628	}
629	const char16_t *array = getBuffer();
630	int32_t len = length();
631	int32_t prev = `0`;
632	for (int32_t i=`0`;;) {
633	if (i == len) {
634	result.append(array, prev, len - prev);
635	break;
636	}
637	if (array[i++] == `0x5C` /'\\'/) {
638	result.append(array, prev, (i - `1`) - prev);
639	UChar32 c = unescapeAt(i); // advances i
640	if (c < `0`) {
641	result.remove(); // return empty string
642	break; // invalid escape sequence
643	}
644	result.append(c);
645	prev = i;
646	}
647	}
648	return result;
649	}
650
651	UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
652	return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void)this*);
653	}
654
655	//========================================
656	// Read-only implementation
657	//========================================
658	UBool
659	UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
660	// Requires: this & text not bogus and have same lengths.
661	// Byte-wise comparison works for equality regardless of endianness.
662	return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == `0`;
663	}
664
665	UBool
666	UnicodeString::doEqualsSubstring( int32_t start,
667	int32_t length,
668	const char16_t *srcChars,
669	int32_t srcStart,
670	int32_t srcLength) const
671	{
672	// compare illegal string values
673	if(isBogus()) {
674	return false;
675	}
676
677	// pin indices to legal values
678	pinIndices(start, length);
679
680	if(srcChars == nullptr) {
681	// treat const char16_t srcChars==nullptr as an empty string*
682	return length == `0` ? true : false;
683	}
684
685	// get the correct pointer
686	const char16_t *chars = getArrayStart();
687
688	chars += start;
689	srcChars += srcStart;
690
691	// get the srcLength if necessary
692	if(srcLength < `0`) {
693	srcLength = u_strlen(srcChars + srcStart);
694	}
695
696	if (length != srcLength) {
697	return false;
698	}
699
700	if(length == `0` \|\| chars == srcChars) {
701	return true;
702	}
703
704	return u_memcmp(chars, srcChars, srcLength) == `0`;
705	}
706
707	int8_t
708	UnicodeString::doCompare( int32_t start,
709	int32_t length,
710	const char16_t *srcChars,
711	int32_t srcStart,
712	int32_t srcLength) const
713	{
714	// compare illegal string values
715	if(isBogus()) {
716	return -`1`;
717	}
718
719	// pin indices to legal values
720	pinIndices(start, length);
721
722	if(srcChars == nullptr) {
723	// treat const char16_t srcChars==nullptr as an empty string*
724	return length == `0` ? `0` : `1`;
725	}
726
727	// get the correct pointer
728	const char16_t *chars = getArrayStart();
729
730	chars += start;
731	srcChars += srcStart;
732
733	int32_t minLength;
734	int8_t lengthResult;
735
736	// get the srcLength if necessary
737	if(srcLength < `0`) {
738	srcLength = u_strlen(srcChars + srcStart);
739	}
740
741	// are we comparing different lengths?
742	if(length != srcLength) {
743	if(length < srcLength) {
744	minLength = length;
745	lengthResult = -`1`;
746	} else {
747	minLength = srcLength;
748	lengthResult = `1`;
749	}
750	} else {
751	minLength = length;
752	lengthResult = `0`;
753	}
754
755	/*
756	* note that uprv_memcmp() returns an int but we return an int8_t;
757	* we need to take care not to truncate the result -
758	* one way to do this is to right-shift the value to
759	* move the sign bit into the lower 8 bits and making sure that this
760	* does not become 0 itself
761	*/
762
763	if(minLength > `0` && chars != srcChars) {
764	int32_t result;
765
766	# if U_IS_BIG_ENDIAN
767	// big-endian: byte comparison works
768	result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
769	if(result != `0`) {
770	return (int8_t)(result >> `15` \| `1`);
771	}
772	# else
773	// little-endian: compare char16_t units
774	do {
775	result = ((int32_t)(chars++) - (int32_t)(srcChars++));
776	if(result != `0`) {
777	return (int8_t)(result >> `15` \| `1`);
778	}
779	} while(--minLength > `0`);
780	# endif
781	}
782	return lengthResult;
783	}
784
785	/ String compare in code point order - doCompare() compares in code unit order. /
786	int8_t
787	UnicodeString::doCompareCodePointOrder(int32_t start,
788	int32_t length,
789	const char16_t *srcChars,
790	int32_t srcStart,
791	int32_t srcLength) const
792	{
793	// compare illegal string values
794	// treat const char16_t srcChars==nullptr as an empty string*
795	if(isBogus()) {
796	return -`1`;
797	}
798
799	// pin indices to legal values
800	pinIndices(start, length);
801
802	if(srcChars == nullptr) {
803	srcStart = srcLength = `0`;
804	}
805
806	int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
807	/ translate the 32-bit result into an 8-bit one /
808	if(diff!=`0`) {
809	return (int8_t)(diff >> `15` \| `1`);
810	} else {
811	return `0`;
812	}
813	}
814
815	int32_t
816	UnicodeString::getLength() const {
817	return length();
818	}
819
820	char16_t
821	UnicodeString::getCharAt(int32_t offset) const {
822	return charAt(offset);
823	}
824
825	UChar32
826	UnicodeString::getChar32At(int32_t offset) const {
827	return char32At(offset);
828	}
829
830	UChar32
831	UnicodeString::char32At(int32_t offset) const
832	{
833	int32_t len = length();
834	if((uint32_t)offset < (uint32_t)len) {
835	const char16_t *array = getArrayStart();
836	UChar32 c;
837	U16_GET(array, `0`, offset, len, c);
838	return c;
839	} else {
840	return kInvalidUChar;
841	}
842	}
843
844	int32_t
845	UnicodeString::getChar32Start(int32_t offset) const {
846	if((uint32_t)offset < (uint32_t)length()) {
847	const char16_t *array = getArrayStart();
848	U16_SET_CP_START(array, `0`, offset);
849	return offset;
850	} else {
851	return `0`;
852	}
853	}
854
855	int32_t
856	UnicodeString::getChar32Limit(int32_t offset) const {
857	int32_t len = length();
858	if((uint32_t)offset < (uint32_t)len) {
859	const char16_t *array = getArrayStart();
860	U16_SET_CP_LIMIT(array, `0`, offset, len);
861	return offset;
862	} else {
863	return len;
864	}
865	}
866
867	int32_t
868	UnicodeString::countChar32(int32_t start, int32_t length) const {
869	pinIndices(start, length);
870	// if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
871	return u_countChar32(getArrayStart()+start, length);
872	}
873
874	UBool
875	UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
876	pinIndices(start, length);
877	// if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
878	return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
879	}
880
881	int32_t
882	UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
883	// pin index
884	int32_t len = length();
885	if(index<`0`) {
886	index=`0`;
887	} else if(index>len) {
888	index=len;
889	}
890
891	const char16_t *array = getArrayStart();
892	if(delta>`0`) {
893	U16_FWD_N(array, index, len, delta);
894	} else {
895	U16_BACK_N(array, `0`, index, -delta);
896	}
897
898	return index;
899	}
900
901	void
902	UnicodeString::doExtract(int32_t start,
903	int32_t length,
904	char16_t *dst,
905	int32_t dstStart) const
906	{
907	// pin indices to legal values
908	pinIndices(start, length);
909
910	// do not copy anything if we alias dst itself
911	const char16_t *array = getArrayStart();
912	if(array + start != dst + dstStart) {
913	us_arrayCopy(array, start, dst, dstStart, length);
914	}
915	}
916
917	int32_t
918	UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
919	UErrorCode &errorCode) const {
920	int32_t len = length();
921	if(U_SUCCESS(errorCode)) {
922	if(isBogus() \|\| destCapacity<`0` \|\| (destCapacity>`0` && dest==`0`)) {
923	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
924	} else {
925	const char16_t *array = getArrayStart();
926	if(len>`0` && len<=destCapacity && array!=dest) {
927	u_memcpy(dest, array, len);
928	}
929	return u_terminateUChars(dest, destCapacity, len, &errorCode);
930	}
931	}
932
933	return len;
934	}
935
936	int32_t
937	UnicodeString::extract(int32_t start,
938	int32_t length,
939	char *target,
940	int32_t targetCapacity,
941	enum EInvariant) const
942	{
943	// if the arguments are illegal, then do nothing
944	if(targetCapacity < `0` \|\| (targetCapacity > `0` && target == nullptr)) {
945	return `0`;
946	}
947
948	// pin the indices to legal values
949	pinIndices(start, length);
950
951	if(length <= targetCapacity) {
952	u_UCharsToChars(getArrayStart() + start, target, length);
953	}
954	UErrorCode status = U_ZERO_ERROR;
955	return u_terminateChars(target, targetCapacity, length, &status);
956	}
957
958	UnicodeString
959	UnicodeString::tempSubString(int32_t start, int32_t len) const {
960	pinIndices(start, len);
961	const char16_t array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer*
962	if(array==nullptr) {
963	array=fUnion.fStackFields.fBuffer; // anything not nullptr because that would make an empty string
964	len=-`2`; // bogus result string
965	}
966	return UnicodeString (false, array + start, len);
967	}
968
969	int32_t
970	UnicodeString::toUTF8(int32_t start, int32_t len,
971	char target, int32_t capacity) const* {
972	pinIndices(start, len);
973	int32_t length8;
974	UErrorCode errorCode = U_ZERO_ERROR;
975	u_strToUTF8WithSub(target, capacity, &length8,
976	getBuffer() + start, len,
977	`0xFFFD`, // Standard substitution character.
978	nullptr, // Don't care about number of substitutions.
979	&errorCode);
980	return length8;
981	}
982
983	#if U_CHARSET_IS_UTF8
984
985	int32_t
986	UnicodeString::extract(int32_t start, int32_t len,
987	char target, uint32_t dstSize) const* {
988	// if the arguments are illegal, then do nothing
989	if(/dstSize < 0 \|\| /(dstSize > `0` && target == `0`)) {
990	return `0`;
991	}
992	return toUTF8(start, len, target, dstSize <= `0x7fffffff` ? (int32_t)dstSize : `0x7fffffff`);
993	}
994
995	// else see unistr_cnv.cpp
996	#endif
997
998	void
999	UnicodeString::extractBetween(int32_t start,
1000	int32_t limit,
1001	UnicodeString& target) const {
1002	pinIndex(start);
1003	pinIndex(limit);
1004	doExtract(start, limit - start, target);
1005	}
1006
1007	// When converting from UTF-16 to UTF-8, the result will have at most 3 times
1008	// as many bytes as the source has UChars.
1009	// The "worst cases" are writing systems like Indic, Thai and CJK with
1010	// 3:1 bytes:UChars.
1011	void
1012	UnicodeString::toUTF8(ByteSink &sink) const {
1013	int32_t length16 = length();
1014	if(length16 != `0`) {
1015	char stackBuffer[`1024`];
1016	int32_t capacity = (int32_t)sizeof(stackBuffer);
1017	UBool utf8IsOwned = false;
1018	char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
1019	`3`*length16,
1020	stackBuffer, capacity,
1021	&capacity);
1022	int32_t length8 = `0`;
1023	UErrorCode errorCode = U_ZERO_ERROR;
1024	u_strToUTF8WithSub(utf8, capacity, &length8,
1025	getBuffer(), length16,
1026	`0xFFFD`, // Standard substitution character.
1027	nullptr, // Don't care about number of substitutions.
1028	&errorCode);
1029	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
1030	utf8 = (char *)uprv_malloc(length8);
1031	if(utf8 != nullptr) {
1032	utf8IsOwned = true;
1033	errorCode = U_ZERO_ERROR;
1034	u_strToUTF8WithSub(utf8, length8, &length8,
1035	getBuffer(), length16,
1036	`0xFFFD`, // Standard substitution character.
1037	nullptr, // Don't care about number of substitutions.
1038	&errorCode);
1039	} else {
1040	errorCode = U_MEMORY_ALLOCATION_ERROR;
1041	}
1042	}
1043	if(U_SUCCESS(errorCode)) {
1044	sink.Append(utf8, length8);
1045	sink.Flush();
1046	}
1047	if(utf8IsOwned) {
1048	uprv_free(utf8);
1049	}
1050	}
1051	}
1052
1053	int32_t
1054	UnicodeString::toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const* {
1055	int32_t length32=`0`;
1056	if(U_SUCCESS(errorCode)) {
1057	// getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1058	u_strToUTF32WithSub(utf32, capacity, &length32,
1059	getBuffer(), length(),
1060	`0xfffd`, // Substitution character.
1061	nullptr, // Don't care about number of substitutions.
1062	&errorCode);
1063	}
1064	return length32;
1065	}
1066
1067	int32_t
1068	UnicodeString::indexOf(const char16_t *srcChars,
1069	int32_t srcStart,
1070	int32_t srcLength,
1071	int32_t start,
1072	int32_t length) const
1073	{
1074	if(isBogus() \|\| srcChars == `0` \|\| srcStart < `0` \|\| srcLength == `0`) {
1075	return -`1`;
1076	}
1077
1078	// UnicodeString does not find empty substrings
1079	if(srcLength < `0` && srcChars[srcStart] == `0`) {
1080	return -`1`;
1081	}
1082
1083	// get the indices within bounds
1084	pinIndices(start, length);
1085
1086	// find the first occurrence of the substring
1087	const char16_t *array = getArrayStart();
1088	const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1089	if(match == nullptr) {
1090	return -`1`;
1091	} else {
1092	return (int32_t)(match - array);
1093	}
1094	}
1095
1096	int32_t
1097	UnicodeString::doIndexOf(char16_t c,
1098	int32_t start,
1099	int32_t length) const
1100	{
1101	// pin indices
1102	pinIndices(start, length);
1103
1104	// find the first occurrence of c
1105	const char16_t *array = getArrayStart();
1106	const char16_t *match = u_memchr(array + start, c, length);
1107	if(match == nullptr) {
1108	return -`1`;
1109	} else {
1110	return (int32_t)(match - array);
1111	}
1112	}
1113
1114	int32_t
1115	UnicodeString::doIndexOf(UChar32 c,
1116	int32_t start,
1117	int32_t length) const {
1118	// pin indices
1119	pinIndices(start, length);
1120
1121	// find the first occurrence of c
1122	const char16_t *array = getArrayStart();
1123	const char16_t *match = u_memchr32(array + start, c, length);
1124	if(match == nullptr) {
1125	return -`1`;
1126	} else {
1127	return (int32_t)(match - array);
1128	}
1129	}
1130
1131	int32_t
1132	UnicodeString::lastIndexOf(const char16_t *srcChars,
1133	int32_t srcStart,
1134	int32_t srcLength,
1135	int32_t start,
1136	int32_t length) const
1137	{
1138	if(isBogus() \|\| srcChars == `0` \|\| srcStart < `0` \|\| srcLength == `0`) {
1139	return -`1`;
1140	}
1141
1142	// UnicodeString does not find empty substrings
1143	if(srcLength < `0` && srcChars[srcStart] == `0`) {
1144	return -`1`;
1145	}
1146
1147	// get the indices within bounds
1148	pinIndices(start, length);
1149
1150	// find the last occurrence of the substring
1151	const char16_t *array = getArrayStart();
1152	const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1153	if(match == nullptr) {
1154	return -`1`;
1155	} else {
1156	return (int32_t)(match - array);
1157	}
1158	}
1159
1160	int32_t
1161	UnicodeString::doLastIndexOf(char16_t c,
1162	int32_t start,
1163	int32_t length) const
1164	{
1165	if(isBogus()) {
1166	return -`1`;
1167	}
1168
1169	// pin indices
1170	pinIndices(start, length);
1171
1172	// find the last occurrence of c
1173	const char16_t *array = getArrayStart();
1174	const char16_t *match = u_memrchr(array + start, c, length);
1175	if(match == nullptr) {
1176	return -`1`;
1177	} else {
1178	return (int32_t)(match - array);
1179	}
1180	}
1181
1182	int32_t
1183	UnicodeString::doLastIndexOf(UChar32 c,
1184	int32_t start,
1185	int32_t length) const {
1186	// pin indices
1187	pinIndices(start, length);
1188
1189	// find the last occurrence of c
1190	const char16_t *array = getArrayStart();
1191	const char16_t *match = u_memrchr32(array + start, c, length);
1192	if(match == nullptr) {
1193	return -`1`;
1194	} else {
1195	return (int32_t)(match - array);
1196	}
1197	}
1198
1199	//========================================
1200	// Write implementation
1201	//========================================
1202
1203	UnicodeString&
1204	UnicodeString::findAndReplace(int32_t start,
1205	int32_t length,
1206	const UnicodeString& oldText,
1207	int32_t oldStart,
1208	int32_t oldLength,
1209	const UnicodeString& newText,
1210	int32_t newStart,
1211	int32_t newLength)
1212	{
1213	if(isBogus() \|\| oldText.isBogus() \|\| newText.isBogus()) {
1214	return *this;
1215	}
1216
1217	pinIndices(start, length);
1218	oldText.pinIndices(oldStart, oldLength);
1219	newText.pinIndices(newStart, newLength);
1220
1221	if(oldLength == `0`) {
1222	return *this;
1223	}
1224
1225	while(length > `0` && length >= oldLength) {
1226	int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1227	if(pos < `0`) {
1228	// no more oldText's here: done
1229	break;
1230	} else {
1231	// we found oldText, replace it by newText and go beyond it
1232	replace(pos, oldLength, newText, newStart, newLength);
1233	length -= pos + oldLength - start;
1234	start = pos + newLength;
1235	}
1236	}
1237
1238	return *this;
1239	}
1240
1241
1242	void
1243	UnicodeString::setToBogus()
1244	{
1245	releaseArray();
1246
1247	fUnion.fFields.fLengthAndFlags = kIsBogus;
1248	fUnion.fFields.fArray = `0`;
1249	fUnion.fFields.fCapacity = `0`;
1250	}
1251
1252	// turn a bogus string into an empty one
1253	void
1254	UnicodeString::unBogus() {
1255	if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1256	setToEmpty();
1257	}
1258	}
1259
1260	const char16_t *
1261	UnicodeString::getTerminatedBuffer() {
1262	if(!isWritable()) {
1263	return nullptr;
1264	}
1265	char16_t *array = getArrayStart();
1266	int32_t len = length();
1267	if(len < getCapacity()) {
1268	if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1269	// If len<capacity on a read-only alias, then array[len] is
1270	// either the original NUL (if constructed with (true, s, length))
1271	// or one of the original string contents characters (if later truncated),
1272	// therefore we can assume that array[len] is initialized memory.
1273	if(array[len] == `0`) {
1274	return array;
1275	}
1276	} else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == `0` \|\| refCount() == `1`)) {
1277	// kRefCounted: Do not write the NUL if the buffer is shared.
1278	// That is mostly safe, except when the length of one copy was modified
1279	// without copy-on-write, e.g., via truncate(newLength) or remove().
1280	// Then the NUL would be written into the middle of another copy's string.
1281
1282	// Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1283	// Do not test if there is a NUL already because it might be uninitialized memory.
1284	// (That would be safe, but tools like valgrind & Purify would complain.)
1285	array[len] = `0`;
1286	return array;
1287	}
1288	}
1289	if(len<INT32_MAX && cloneArrayIfNeeded(len+`1`)) {
1290	array = getArrayStart();
1291	array[len] = `0`;
1292	return array;
1293	} else {
1294	return nullptr;
1295	}
1296	}
1297
1298	// setTo() analogous to the readonly-aliasing constructor with the same signature
1299	UnicodeString &
1300	UnicodeString::setTo(UBool isTerminated,
1301	ConstChar16Ptr textPtr,
1302	int32_t textLength)
1303	{
1304	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1305	// do not modify a string that has an "open" getBuffer(minCapacity)
1306	return *this;
1307	}
1308
1309	const char16_t *text = textPtr;
1310	if(text == nullptr) {
1311	// treat as an empty string, do not alias
1312	releaseArray();
1313	setToEmpty();
1314	return *this;
1315	}
1316
1317	if( textLength < -`1` \|\|
1318	(textLength == -`1` && !isTerminated) \|\|
1319	(textLength >= `0` && isTerminated && text[textLength] != `0`)
1320	) {
1321	setToBogus();
1322	return *this;
1323	}
1324
1325	releaseArray();
1326
1327	if(textLength == -`1`) {
1328	// text is terminated, or else it would have failed the above test
1329	textLength = u_strlen(text);
1330	}
1331	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1332	setArray((char16_t *)text, textLength, isTerminated ? textLength + `1` : textLength);
1333	return *this;
1334	}
1335
1336	// setTo() analogous to the writable-aliasing constructor with the same signature
1337	UnicodeString &
1338	UnicodeString::setTo(char16_t *buffer,
1339	int32_t buffLength,
1340	int32_t buffCapacity) {
1341	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1342	// do not modify a string that has an "open" getBuffer(minCapacity)
1343	return *this;
1344	}
1345
1346	if(buffer == nullptr) {
1347	// treat as an empty string, do not alias
1348	releaseArray();
1349	setToEmpty();
1350	return *this;
1351	}
1352
1353	if(buffLength < -`1` \|\| buffCapacity < `0` \|\| buffLength > buffCapacity) {
1354	setToBogus();
1355	return *this;
1356	} else if(buffLength == -`1`) {
1357	// buffLength = u_strlen(buff); but do not look beyond buffCapacity
1358	const char16_t p = buffer, limit = buffer + buffCapacity;
1359	while(p != limit && *p != `0`) {
1360	++p;
1361	}
1362	buffLength = (int32_t)(p - buffer);
1363	}
1364
1365	releaseArray();
1366
1367	fUnion.fFields.fLengthAndFlags = kWritableAlias;
1368	setArray(buffer, buffLength, buffCapacity);
1369	return *this;
1370	}
1371
1372	UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1373	unBogus();
1374	int32_t length = utf8.length();
1375	int32_t capacity;
1376	// The UTF-16 string will be at most as long as the UTF-8 string.
1377	if(length <= US_STACKBUF_SIZE) {
1378	capacity = US_STACKBUF_SIZE;
1379	} else {
1380	capacity = length + `1`; // +1 for the terminating NUL.
1381	}
1382	char16_t *utf16 = getBuffer(capacity);
1383	int32_t length16;
1384	UErrorCode errorCode = U_ZERO_ERROR;
1385	u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1386	utf8.data(), length,
1387	`0xfffd`, // Substitution character.
1388	nullptr, // Don't care about number of substitutions.
1389	&errorCode);
1390	releaseBuffer(length16);
1391	if(U_FAILURE(errorCode)) {
1392	setToBogus();
1393	}
1394	return *this;
1395	}
1396
1397	UnicodeString&
1398	UnicodeString::setCharAt(int32_t offset,
1399	char16_t c)
1400	{
1401	int32_t len = length();
1402	if(cloneArrayIfNeeded() && len > `0`) {
1403	if(offset < `0`) {
1404	offset = `0`;
1405	} else if(offset >= len) {
1406	offset = len - `1`;
1407	}
1408
1409	getArrayStart()[offset] = c;
1410	}
1411	return *this;
1412	}
1413
1414	UnicodeString&
1415	UnicodeString::replace(int32_t start,
1416	int32_t _length,
1417	UChar32 srcChar) {
1418	char16_t buffer[U16_MAX_LENGTH];
1419	int32_t count = `0`;
1420	UBool isError = false;
1421	U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1422	// We test isError so that the compiler does not complain that we don't.
1423	// If isError (srcChar is not a valid code point) then count==0 which means
1424	// we remove the source segment rather than replacing it with srcChar.
1425	return doReplace(start, _length, buffer, `0`, isError ? `0` : count);
1426	}
1427
1428	UnicodeString&
1429	UnicodeString::append(UChar32 srcChar) {
1430	char16_t buffer[U16_MAX_LENGTH];
1431	int32_t _length = `0`;
1432	UBool isError = false;
1433	U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1434	// We test isError so that the compiler does not complain that we don't.
1435	// If isError then _length==0 which turns the doAppend() into a no-op anyway.
1436	return isError ? *this : doAppend(buffer, `0`, _length);
1437	}
1438
1439	UnicodeString&
1440	UnicodeString::doReplace( int32_t start,
1441	int32_t length,
1442	const UnicodeString& src,
1443	int32_t srcStart,
1444	int32_t srcLength)
1445	{
1446	// pin the indices to legal values
1447	src.pinIndices(srcStart, srcLength);
1448
1449	// get the characters from src
1450	// and replace the range in ourselves with them
1451	return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1452	}
1453
1454	UnicodeString&
1455	UnicodeString::doReplace(int32_t start,
1456	int32_t length,
1457	const char16_t *srcChars,
1458	int32_t srcStart,
1459	int32_t srcLength)
1460	{
1461	if(!isWritable()) {
1462	return *this;
1463	}
1464
1465	int32_t oldLength = this->length();
1466
1467	// optimize (read-only alias).remove(0, start) and .remove(start, end)
1468	if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == `0`) {
1469	if(start == `0`) {
1470	// remove prefix by adjusting the array pointer
1471	pinIndex(length);
1472	fUnion.fFields.fArray += length;
1473	fUnion.fFields.fCapacity -= length;
1474	setLength(oldLength - length);
1475	return *this;
1476	} else {
1477	pinIndex(start);
1478	if(length >= (oldLength - start)) {
1479	// remove suffix by reducing the length (like truncate())
1480	setLength(start);
1481	fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1482	return *this;
1483	}
1484	}
1485	}
1486
1487	if(start == oldLength) {
1488	return doAppend(srcChars, srcStart, srcLength);
1489	}
1490
1491	if(srcChars == `0`) {
1492	srcLength = `0`;
1493	} else {
1494	// Perform all remaining operations relative to srcChars + srcStart.
1495	// From this point forward, do not use srcStart.
1496	srcChars += srcStart;
1497	if (srcLength < `0`) {
1498	// get the srcLength if necessary
1499	srcLength = u_strlen(srcChars);
1500	}
1501	}
1502
1503	// pin the indices to legal values
1504	pinIndices(start, length);
1505
1506	// Calculate the size of the string after the replace.
1507	// Avoid int32_t overflow.
1508	int32_t newLength = oldLength - length;
1509	if(srcLength > (INT32_MAX - newLength)) {
1510	setToBogus();
1511	return *this;
1512	}
1513	newLength += srcLength;
1514
1515	// Check for insertion into ourself
1516	const char16_t *oldArray = getArrayStart();
1517	if (isBufferWritable() &&
1518	oldArray < srcChars + srcLength &&
1519	srcChars < oldArray + oldLength) {
1520	// Copy into a new UnicodeString and start over
1521	UnicodeString copy(srcChars, srcLength);
1522	if (copy.isBogus()) {
1523	setToBogus();
1524	return *this;
1525	}
1526	return doReplace(start, length, copy.getArrayStart(), `0`, srcLength);
1527	}
1528
1529	// cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
1530	// therefore we need to keep the current fArray
1531	char16_t oldStackBuffer[US_STACKBUF_SIZE];
1532	if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1533	// copy the stack buffer contents because it will be overwritten with
1534	// fUnion.fFields values
1535	u_memcpy(oldStackBuffer, oldArray, oldLength);
1536	oldArray = oldStackBuffer;
1537	}
1538
1539	// clone our array and allocate a bigger array if needed
1540	int32_t *bufferToDelete = `0`;
1541	if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1542	false, &bufferToDelete)
1543	) {
1544	return *this;
1545	}
1546
1547	// now do the replace
1548
1549	char16_t *newArray = getArrayStart();
1550	if(newArray != oldArray) {
1551	// if fArray changed, then we need to copy everything except what will change
1552	us_arrayCopy(oldArray, `0`, newArray, `0`, start);
1553	us_arrayCopy(oldArray, start + length,
1554	newArray, start + srcLength,
1555	oldLength - (start + length));
1556	} else if(length != srcLength) {
1557	// fArray did not change; copy only the portion that isn't changing, leaving a hole
1558	us_arrayCopy(oldArray, start + length,
1559	newArray, start + srcLength,
1560	oldLength - (start + length));
1561	}
1562
1563	// now fill in the hole with the new string
1564	us_arrayCopy(srcChars, `0`, newArray, start, srcLength);
1565
1566	setLength(newLength);
1567
1568	// delayed delete in case srcChars == fArray when we started, and
1569	// to keep oldArray alive for the above operations
1570	if (bufferToDelete) {
1571	uprv_free(bufferToDelete);
1572	}
1573
1574	return *this;
1575	}
1576
1577	// Versions of doReplace() only for append() variants.
1578	// doReplace() and doAppend() optimize for different cases.
1579
1580	UnicodeString&
1581	UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1582	if(srcLength == `0`) {
1583	return *this;
1584	}
1585
1586	// pin the indices to legal values
1587	src.pinIndices(srcStart, srcLength);
1588	return doAppend(src.getArrayStart(), srcStart, srcLength);
1589	}
1590
1591	UnicodeString&
1592	UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
1593	if(!isWritable() \|\| srcLength == `0` \|\| srcChars == nullptr) {
1594	return *this;
1595	}
1596
1597	// Perform all remaining operations relative to srcChars + srcStart.
1598	// From this point forward, do not use srcStart.
1599	srcChars += srcStart;
1600
1601	if(srcLength < `0`) {
1602	// get the srcLength if necessary
1603	if((srcLength = u_strlen(srcChars)) == `0`) {
1604	return *this;
1605	}
1606	}
1607
1608	int32_t oldLength = length();
1609	int32_t newLength;
1610	if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1611	setToBogus();
1612	return *this;
1613	}
1614
1615	// Check for append onto ourself
1616	const char16_t* oldArray = getArrayStart();
1617	if (isBufferWritable() &&
1618	oldArray < srcChars + srcLength &&
1619	srcChars < oldArray + oldLength) {
1620	// Copy into a new UnicodeString and start over
1621	UnicodeString copy(srcChars, srcLength);
1622	if (copy.isBogus()) {
1623	setToBogus();
1624	return *this;
1625	}
1626	return doAppend(copy.getArrayStart(), `0`, srcLength);
1627	}
1628
1629	// optimize append() onto a large-enough, owned string
1630	if((newLength <= getCapacity() && isBufferWritable()) \|\|
1631	cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1632	char16_t *newArray = getArrayStart();
1633	// Do not copy characters when
1634	// char16_t buffer=str.getAppendBuffer(...);*
1635	// is followed by
1636	// str.append(buffer, length);
1637	// or
1638	// str.appendString(buffer, length)
1639	// or similar.
1640	if(srcChars != newArray + oldLength) {
1641	us_arrayCopy(srcChars, `0`, newArray, oldLength, srcLength);
1642	}
1643	setLength(newLength);
1644	}
1645	return *this;
1646	}
1647
1648	/**
1649	* Replaceable API
1650	*/
1651	void
1652	UnicodeString::handleReplaceBetween(int32_t start,
1653	int32_t limit,
1654	const UnicodeString& text) {
1655	replaceBetween(start, limit, text);
1656	}
1657
1658	/**
1659	* Replaceable API
1660	*/
1661	void
1662	UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1663	if (limit <= start) {
1664	return; // Nothing to do; avoid bogus malloc call
1665	}
1666	char16_t* text = (char16_t) uprv_malloc( sizeof(char16_t) (limit - start) );
1667	// Check to make sure text is not null.
1668	if (text != nullptr) {
1669	extractBetween(start, limit, text, `0`);
1670	insert(dest, text, `0`, limit - start);
1671	uprv_free(text);
1672	}
1673	}
1674
1675	/**
1676	* Replaceable API
1677	*
1678	* NOTE: This is for the Replaceable class. There is no rep.cpp,
1679	* so we implement this function here.
1680	*/
1681	UBool Replaceable::hasMetaData() const {
1682	return true;
1683	}
1684
1685	/**
1686	* Replaceable API
1687	*/
1688	UBool UnicodeString::hasMetaData() const {
1689	return false;
1690	}
1691
1692	UnicodeString&
1693	UnicodeString::doReverse(int32_t start, int32_t length) {
1694	if(length <= `1` \|\| !cloneArrayIfNeeded()) {
1695	return *this;
1696	}
1697
1698	// pin the indices to legal values
1699	pinIndices(start, length);
1700	if(length <= `1`) { // pinIndices() might have shrunk the length
1701	return *this;
1702	}
1703
1704	char16_t *left = getArrayStart() + start;
1705	char16_t right = left + length - `1`; // -1 for inclusive boundary (length>=2)*
1706	char16_t swap;
1707	UBool hasSupplementary = false;
1708
1709	// Before the loop we know left<right because length>=2.
1710	do {
1711	hasSupplementary \|= (UBool)U16_IS_LEAD(swap = *left);
1712	hasSupplementary \|= (UBool)U16_IS_LEAD(left++ = right);
1713	*right-- = swap;
1714	} while(left < right);
1715	// Make sure to test the middle code unit of an odd-length string.
1716	// Redundant if the length is even.
1717	hasSupplementary \|= (UBool)U16_IS_LEAD(*left);
1718
1719	/ if there are supplementary code points in the reversed range, then re-swap their surrogates /
1720	if(hasSupplementary) {
1721	char16_t swap2;
1722
1723	left = getArrayStart() + start;
1724	right = left + length - `1`; // -1 so that we can look at (left+1) if left<right*
1725	while(left < right) {
1726	if(U16_IS_TRAIL(swap = left) && U16_IS_LEAD(swap2 = (left + `1`))) {
1727	*left++ = swap2;
1728	*left++ = swap;
1729	} else {
1730	++left;
1731	}
1732	}
1733	}
1734
1735	return *this;
1736	}
1737
1738	UBool
1739	UnicodeString::padLeading(int32_t targetLength,
1740	char16_t padChar)
1741	{
1742	int32_t oldLength = length();
1743	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
1744	return false;
1745	} else {
1746	// move contents up by padding width
1747	char16_t *array = getArrayStart();
1748	int32_t start = targetLength - oldLength;
1749	us_arrayCopy(array, `0`, array, start, oldLength);
1750
1751	// fill in padding character
1752	while(--start >= `0`) {
1753	array[start] = padChar;
1754	}
1755	setLength(targetLength);
1756	return true;
1757	}
1758	}
1759
1760	UBool
1761	UnicodeString::padTrailing(int32_t targetLength,
1762	char16_t padChar)
1763	{
1764	int32_t oldLength = length();
1765	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
1766	return false;
1767	} else {
1768	// fill in padding character
1769	char16_t *array = getArrayStart();
1770	int32_t length = targetLength;
1771	while(--length >= oldLength) {
1772	array[length] = padChar;
1773	}
1774	setLength(targetLength);
1775	return true;
1776	}
1777	}
1778
1779	//========================================
1780	// Hashing
1781	//========================================
1782	int32_t
1783	UnicodeString::doHashCode() const
1784	{
1785	/ Delegate hash computation to uhash. This makes UnicodeString*
1786	* hashing consistent with char16_t* hashing. */
1787	int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1788	if (hashCode == kInvalidHashCode) {
1789	hashCode = kEmptyHashCode;
1790	}
1791	return hashCode;
1792	}
1793
1794	//========================================
1795	// External Buffer
1796	//========================================
1797
1798	char16_t *
1799	UnicodeString::getBuffer(int32_t minCapacity) {
1800	if(minCapacity>=-`1` && cloneArrayIfNeeded(minCapacity)) {
1801	fUnion.fFields.fLengthAndFlags\|=kOpenGetBuffer;
1802	setZeroLength();
1803	return getArrayStart();
1804	} else {
1805	return nullptr;
1806	}
1807	}
1808
1809	void
1810	UnicodeString::releaseBuffer(int32_t newLength) {
1811	if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-`1`) {
1812	// set the new fLength
1813	int32_t capacity=getCapacity();
1814	if(newLength==-`1`) {
1815	// the new length is the string length, capped by fCapacity
1816	const char16_t array=getArrayStart(), p=array, *limit=array+capacity;
1817	while(p<limit && *p!=`0`) {
1818	++p;
1819	}
1820	newLength=(int32_t)(p-array);
1821	} else if(newLength>capacity) {
1822	newLength=capacity;
1823	}
1824	setLength(newLength);
1825	fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1826	}
1827	}
1828
1829	//========================================
1830	// Miscellaneous
1831	//========================================
1832	UBool
1833	UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1834	int32_t growCapacity,
1835	UBool doCopyArray,
1836	int32_t **pBufferToDelete,
1837	UBool forceClone) {
1838	// default parameters need to be static, therefore
1839	// the defaults are -1 to have convenience defaults
1840	if(newCapacity == -`1`) {
1841	newCapacity = getCapacity();
1842	}
1843
1844	// while a getBuffer(minCapacity) is "open",
1845	// prevent any modifications of the string by returning false here
1846	// if the string is bogus, then only an assignment or similar can revive it
1847	if(!isWritable()) {
1848	return false;
1849	}
1850
1851	/*
1852	* We need to make a copy of the array if
1853	* the buffer is read-only, or
1854	* the buffer is refCounted (shared), and refCount>1, or
1855	* the buffer is too small.
1856	* Return false if memory could not be allocated.
1857	*/
1858	if(forceClone \|\|
1859	fUnion.fFields.fLengthAndFlags & kBufferIsReadonly \|\|
1860	(fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > `1`) \|\|
1861	newCapacity > getCapacity()
1862	) {
1863	// check growCapacity for default value and use of the stack buffer
1864	if(growCapacity < `0`) {
1865	growCapacity = newCapacity;
1866	} else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1867	growCapacity = US_STACKBUF_SIZE;
1868	}
1869
1870	// save old values
1871	char16_t oldStackBuffer[US_STACKBUF_SIZE];
1872	char16_t *oldArray;
1873	int32_t oldLength = length();
1874	int16_t flags = fUnion.fFields.fLengthAndFlags;
1875
1876	if(flags&kUsingStackBuffer) {
1877	U_ASSERT(!(flags&kRefCounted)); / kRefCounted and kUsingStackBuffer are mutally exclusive /
1878	if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1879	// copy the stack buffer contents because it will be overwritten with
1880	// fUnion.fFields values
1881	us_arrayCopy(fUnion.fStackFields.fBuffer, `0`, oldStackBuffer, `0`, oldLength);
1882	oldArray = oldStackBuffer;
1883	} else {
1884	oldArray = nullptr; // no need to copy from the stack buffer to itself
1885	}
1886	} else {
1887	oldArray = fUnion.fFields.fArray;
1888	U_ASSERT(oldArray!=nullptr); / when stack buffer is not used, oldArray must have a non-nullptr reference /
1889	}
1890
1891	// allocate a new array
1892	if(allocate(growCapacity) \|\|
1893	(newCapacity < growCapacity && allocate(newCapacity))
1894	) {
1895	if(doCopyArray) {
1896	// copy the contents
1897	// do not copy more than what fits - it may be smaller than before
1898	int32_t minLength = oldLength;
1899	newCapacity = getCapacity();
1900	if(newCapacity < minLength) {
1901	minLength = newCapacity;
1902	}
1903	if(oldArray != nullptr) {
1904	us_arrayCopy(oldArray, `0`, getArrayStart(), `0`, minLength);
1905	}
1906	setLength(minLength);
1907	} else {
1908	setZeroLength();
1909	}
1910
1911	// release the old array
1912	if(flags & kRefCounted) {
1913	// the array is refCounted; decrement and release if 0
1914	u_atomic_int32_t pRefCount = ((u_atomic_int32_t )oldArray - `1`);
1915	if(umtx_atomic_dec(pRefCount) == `0`) {
1916	if(pBufferToDelete == `0`) {
1917	// Note: cast to (void ) is needed with MSVC, where u_atomic_int32_t*
1918	// is defined as volatile. (Volatile has useful non-standard behavior
1919	// with this compiler.)
1920	uprv_free((void *)pRefCount);
1921	} else {
1922	// the caller requested to delete it himself
1923	pBufferToDelete = (int32_t )pRefCount;
1924	}
1925	}
1926	}
1927	} else {
1928	// not enough memory for growCapacity and not even for the smaller newCapacity
1929	// reset the old values for setToBogus() to release the array
1930	if(!(flags&kUsingStackBuffer)) {
1931	fUnion.fFields.fArray = oldArray;
1932	}
1933	fUnion.fFields.fLengthAndFlags = flags;
1934	setToBogus();
1935	return false;
1936	}
1937	}
1938	return true;
1939	}
1940
1941	// UnicodeStringAppendable ------------------------------------------------- ***
1942
1943	UnicodeStringAppendable::~UnicodeStringAppendable() {}
1944
1945	UBool
1946	UnicodeStringAppendable::appendCodeUnit(char16_t c) {
1947	return str.doAppend(&c, `0`, `1`).isWritable();
1948	}
1949
1950	UBool
1951	UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1952	char16_t buffer[U16_MAX_LENGTH];
1953	int32_t cLength = `0`;
1954	UBool isError = false;
1955	U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1956	return !isError && str.doAppend(buffer, `0`, cLength).isWritable();
1957	}
1958
1959	UBool
1960	UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
1961	return str.doAppend(s, `0`, length).isWritable();
1962	}
1963
1964	UBool
1965	UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1966	return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1967	}
1968
1969	char16_t *
1970	UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1971	int32_t desiredCapacityHint,
1972	char16_t *scratch, int32_t scratchCapacity,
1973	int32_t *resultCapacity) {
1974	if(minCapacity < `1` \|\| scratchCapacity < minCapacity) {
1975	*resultCapacity = `0`;
1976	return nullptr;
1977	}
1978	int32_t oldLength = str.length();
1979	if(minCapacity <= (kMaxCapacity - oldLength) &&
1980	desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1981	str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1982	*resultCapacity = str.getCapacity() - oldLength;
1983	return str.getArrayStart() + oldLength;
1984	}
1985	*resultCapacity = scratchCapacity;
1986	return scratch;
1987	}
1988
1989	U_NAMESPACE_END
1990
1991	U_NAMESPACE_USE
1992
1993	U_CAPI int32_t U_EXPORT2
1994	uhash_hashUnicodeString(const UElement key) {
1995	const UnicodeString str = (const* UnicodeString*) key.pointer;
1996	return (str == nullptr) ? `0` : str->hashCode();
1997	}
1998
1999	// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
2000	// does not depend on hashtable code.
2001	U_CAPI UBool U_EXPORT2
2002	uhash_compareUnicodeString(const UElement key1, const UElement key2) {
2003	const UnicodeString str1 = (const* UnicodeString*) key1.pointer;
2004	const UnicodeString str2 = (const* UnicodeString*) key2.pointer;
2005	if (str1 == str2) {
2006	return true;
2007	}
2008	if (str1 == nullptr \|\| str2 == nullptr) {
2009	return false;
2010	}
2011	return str1 == str2;
2012	}
2013
2014	#ifdef U_STATIC_IMPLEMENTATION
2015	/*
2016	This should never be called. It is defined here to make sure that the
2017	virtual vector deleting destructor is defined within unistr.cpp.
2018	The vector deleting destructor is already a part of UObject,
2019	but defining it here makes sure that it is included with this object file.
2020	This makes sure that static library dependencies are kept to a minimum.
2021	*/
2022	#if defined(__clang__) \|\| U_GCC_MAJOR_MINOR >= 1100
2023	#pragma GCC diagnostic push
2024	#pragma GCC diagnostic ignored "-Wunused-function"
2025	static void uprv_UnicodeStringDummy() {
2026	delete [] (new UnicodeString[`2`]);
2027	}
2028	#pragma GCC diagnostic pop
2029	#endif
2030	#endif
2031

Browse the source code of Godot/thirdparty/icu4c/common/unistr.cpp