unistr.h source code [ClickHouse/contrib/icu/icu4c/source/common/unicode/unistr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File unistr.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 11/11/98 stephen Changed per 11/9 code review.
16	* 04/20/99 stephen Overhauled per 4/16 code review.
17	* 11/18/99 aliu Made to inherit from Replaceable. Added method
18	* handleReplaceBetween(); other methods unchanged.
19	* 06/25/01 grhoten Remove dependency on iostream.
20	******************************************************************************
21	*/
22
23	#ifndef UNISTR_H
24	#define UNISTR_H
25
26	/**
27	* \file
28	* \brief C++ API: Unicode String
29	*/
30
31	#include "unicode/utypes.h"
32
33	#if U_SHOW_CPLUSPLUS_API
34
35	#include <cstddef>
36	#include "unicode/char16ptr.h"
37	#include "unicode/rep.h"
38	#include "unicode/std_string.h"
39	#include "unicode/stringpiece.h"
40	#include "unicode/bytestream.h"
41
42	struct UConverter; // unicode/ucnv.h
43
44	#ifndef USTRING_H
45	/**
46	* \ingroup ustring_ustrlen
47	*/
48	U_STABLE int32_t U_EXPORT2
49	u_strlen(const UChar *s);
50	#endif
51
52	U_NAMESPACE_BEGIN
53
54	#if !UCONFIG_NO_BREAK_ITERATION
55	class BreakIterator; // unicode/brkiter.h
56	#endif
57	class Edits;
58
59	U_NAMESPACE_END
60
61	// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
62	/**
63	* Internal string case mapping function type.
64	* All error checking must be done.
65	* src and dest must not overlap.
66	* @internal
67	*/
68	typedef int32_t U_CALLCONV
69	UStringCaseMapper(int32_t caseLocale, uint32_t options,
70	#if !UCONFIG_NO_BREAK_ITERATION
71	icu::BreakIterator *iter,
72	#endif
73	char16_t *dest, int32_t destCapacity,
74	const char16_t *src, int32_t srcLength,
75	icu::Edits *edits,
76	UErrorCode &errorCode);
77
78	U_NAMESPACE_BEGIN
79
80	class Locale; // unicode/locid.h
81	class StringCharacterIterator;
82	class UnicodeStringAppendable; // unicode/appendable.h
83
84	/ The <iostream> include has been moved to unicode/ustream.h /
85
86	/**
87	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
88	* which constructs a Unicode string from an invariant-character char * string.
89	* About invariant characters see utypes.h.
90	* This constructor has no runtime dependency on conversion code and is
91	* therefore recommended over ones taking a charset name string
92	* (where the empty string "" indicates invariant-character conversion).
93	*
94	* @stable ICU 3.2
95	*/
96	#define US_INV icu::UnicodeString::kInvariant
97
98	/**
99	* Unicode String literals in C++.
100	*
101	* Note: these macros are not recommended for new code.
102	* Prior to the availability of C++11 and u"unicode string literals",
103	* these macros were provided for portability and efficiency when
104	* initializing UnicodeStrings from literals.
105	*
106	* They work only for strings that contain "invariant characters", i.e.,
107	* only latin letters, digits, and some punctuation.
108	* See utypes.h for details.
109	*
110	* The string parameter must be a C string literal.
111	* The length of the string, not including the terminating
112	* `NUL`, must be specified as a constant.
113	* @stable ICU 2.0
114	*/
115	#if !U_CHAR16_IS_TYPEDEF
116	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
117	#else
118	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
119	#endif
120
121	/**
122	* Unicode String literals in C++.
123	* Dependent on the platform properties, different UnicodeString
124	* constructors should be used to create a UnicodeString object from
125	* a string literal.
126	* The macros are defined for improved performance.
127	* They work only for strings that contain "invariant characters", i.e.,
128	* only latin letters, digits, and some punctuation.
129	* See utypes.h for details.
130	*
131	* The string parameter must be a C string literal.
132	* @stable ICU 2.0
133	*/
134	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
135
136	/**
137	* \def UNISTR_FROM_CHAR_EXPLICIT
138	* This can be defined to be empty or "explicit".
139	* If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
140	* constructors are marked as explicit, preventing their inadvertent use.
141	* @stable ICU 49
142	*/
143	#ifndef UNISTR_FROM_CHAR_EXPLICIT
144	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
145	// Auto-"explicit" in ICU library code.
146	# define UNISTR_FROM_CHAR_EXPLICIT explicit
147	# else
148	// Empty by default for source code compatibility.
149	# define UNISTR_FROM_CHAR_EXPLICIT
150	# endif
151	#endif
152
153	/**
154	* \def UNISTR_FROM_STRING_EXPLICIT
155	* This can be defined to be empty or "explicit".
156	* If explicit, then the UnicodeString(const char ) and UnicodeString(const char16_t )
157	* constructors are marked as explicit, preventing their inadvertent use.
158	*
159	* In particular, this helps prevent accidentally depending on ICU conversion code
160	* by passing a string literal into an API with a const UnicodeString & parameter.
161	* @stable ICU 49
162	*/
163	#ifndef UNISTR_FROM_STRING_EXPLICIT
164	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
165	// Auto-"explicit" in ICU library code.
166	# define UNISTR_FROM_STRING_EXPLICIT explicit
167	# else
168	// Empty by default for source code compatibility.
169	# define UNISTR_FROM_STRING_EXPLICIT
170	# endif
171	#endif
172
173	/**
174	* \def UNISTR_OBJECT_SIZE
175	* Desired sizeof(UnicodeString) in bytes.
176	* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
177	* The object size may want to be a multiple of 16 bytes,
178	* which is a common granularity for heap allocation.
179	*
180	* Any space inside the object beyond sizeof(vtable pointer) + 2
181	* is available for storing short strings inside the object.
182	* The bigger the object, the longer a string that can be stored inside the object,
183	* without additional heap allocation.
184	*
185	* Depending on a platform's pointer size, pointer alignment requirements,
186	* and struct padding, the compiler will usually round up sizeof(UnicodeString)
187	* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
188	* to hold the fields for heap-allocated strings.
189	* Such a minimum size also ensures that the object is easily large enough
190	* to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
191	*
192	* sizeof(UnicodeString) >= 48 should work for all known platforms.
193	*
194	* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
195	* sizeof(UnicodeString) = 64 would leave space for
196	* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
197	* char16_ts stored inside the object.
198	*
199	* The minimum object size on a 64-bit machine would be
200	* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
201	* and the internal buffer would hold up to 11 char16_ts in that case.
202	*
203	* @see U16_MAX_LENGTH
204	* @stable ICU 56
205	*/
206	#ifndef UNISTR_OBJECT_SIZE
207	# define UNISTR_OBJECT_SIZE 64
208	#endif
209
210	/**
211	* UnicodeString is a string class that stores Unicode characters directly and provides
212	* similar functionality as the Java String and StringBuffer/StringBuilder classes.
213	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
214	*
215	* The UnicodeString equivalent of std::string’s clear() is remove().
216	*
217	* A UnicodeString may "alias" an external array of characters
218	* (that is, point to it, rather than own the array)
219	* whose lifetime must then at least match the lifetime of the aliasing object.
220	* This aliasing may be preserved when returning a UnicodeString by value,
221	* depending on the compiler and the function implementation,
222	* via Return Value Optimization (RVO) or the move assignment operator.
223	* (However, the copy assignment operator does not preserve aliasing.)
224	* For details see the description of storage models at the end of the class API docs
225	* and in the User Guide chapter linked from there.
226	*
227	* The UnicodeString class is not suitable for subclassing.
228	*
229	* For an overview of Unicode strings in C and C++ see the
230	* [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-).
231	*
232	* In ICU, a Unicode string consists of 16-bit Unicode code units.
233	* A Unicode character may be stored with either one code unit
234	* (the most common case) or with a matched pair of special code units
235	* ("surrogates"). The data type for code units is char16_t.
236	* For single-character handling, a Unicode character code point is a value
237	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
238	*
239	* Indexes and offsets into and lengths of strings always count code units, not code points.
240	* This is the same as with multi-byte char* strings in traditional string handling.
241	* Operations on partial strings typically do not test for code point boundaries.
242	* If necessary, the user needs to take care of such boundaries by testing for the code unit
243	* values or by using functions like
244	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
245	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
246	*
247	* UnicodeString methods are more lenient with regard to input parameter values
248	* than other ICU APIs. In particular:
249	* - If indexes are out of bounds for a UnicodeString object
250	* (< 0 or > length()) then they are "pinned" to the nearest boundary.
251	* - If the buffer passed to an insert/append/replace operation is owned by the
252	* target object, e.g., calling str.append(str), an extra copy may take place
253	* to ensure safety.
254	* - If primitive string pointer values (e.g., const char16_t * or char *)
255	* for input strings are NULL, then those input string parameters are treated
256	* as if they pointed to an empty string.
257	* However, this is not the case for char * parameters for charset names
258	* or other IDs.
259	* - Most UnicodeString methods do not take a UErrorCode parameter because
260	* there are usually very few opportunities for failure other than a shortage
261	* of memory, error codes in low-level C++ string methods would be inconvenient,
262	* and the error code as the last parameter (ICU convention) would prevent
263	* the use of default parameter values.
264	* Instead, such methods set the UnicodeString into a "bogus" state
265	* (see isBogus()) if an error occurs.
266	*
267	* In string comparisons, two UnicodeString objects that are both "bogus"
268	* compare equal (to be transitive and prevent endless loops in sorting),
269	* and a "bogus" string compares less than any non-"bogus" one.
270	*
271	* Const UnicodeString methods are thread-safe. Multiple threads can use
272	* const methods on the same UnicodeString object simultaneously,
273	* but non-const methods must not be called concurrently (in multiple threads)
274	* with any other (const or non-const) methods.
275	*
276	* Similarly, const UnicodeString & parameters are thread-safe.
277	* One object may be passed in as such a parameter concurrently in multiple threads.
278	* This includes the const UnicodeString & parameters for
279	* copy construction, assignment, and cloning.
280	*
281	* UnicodeString uses several storage methods.
282	* String contents can be stored inside the UnicodeString object itself,
283	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
284	* Most of this is done transparently, but careful aliasing in particular provides
285	* significant performance improvements.
286	* Also, the internal buffer is accessible via special functions.
287	* For details see the
288	* [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model).
289	*
290	* @see utf.h
291	* @see CharacterIterator
292	* @stable ICU 2.0
293	*/
294	class U_COMMON_API UnicodeString : public Replaceable
295	{
296	public:
297
298	/**
299	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
300	* which constructs a Unicode string from an invariant-character char * string.
301	* Use the macro US_INV instead of the full qualification for this value.
302	*
303	* @see US_INV
304	* @stable ICU 3.2
305	*/
306	enum EInvariant {
307	/**
308	* @see EInvariant
309	* @stable ICU 3.2
310	*/
311	kInvariant
312	};
313
314	//========================================
315	// Read-only operations
316	//========================================
317
318	/ Comparison - bitwise only - for international comparison use collation /
319
320	/**
321	* Equality operator. Performs only bitwise comparison.
322	* @param text The UnicodeString to compare to this one.
323	* @return TRUE if `text` contains the same characters as this one,
324	* FALSE otherwise.
325	* @stable ICU 2.0
326	*/
327	inline UBool operator== (const UnicodeString& text) const;
328
329	/**
330	* Inequality operator. Performs only bitwise comparison.
331	* @param text The UnicodeString to compare to this one.
332	* @return FALSE if `text` contains the same characters as this one,
333	* TRUE otherwise.
334	* @stable ICU 2.0
335	*/
336	inline UBool operator!= (const UnicodeString& text) const;
337
338	/**
339	* Greater than operator. Performs only bitwise comparison.
340	* @param text The UnicodeString to compare to this one.
341	* @return TRUE if the characters in this are bitwise
342	* greater than the characters in `text`, FALSE otherwise
343	* @stable ICU 2.0
344	*/
345	inline UBool operator> (const UnicodeString& text) const;
346
347	/**
348	* Less than operator. Performs only bitwise comparison.
349	* @param text The UnicodeString to compare to this one.
350	* @return TRUE if the characters in this are bitwise
351	* less than the characters in `text`, FALSE otherwise
352	* @stable ICU 2.0
353	*/
354	inline UBool operator< (const UnicodeString& text) const;
355
356	/**
357	* Greater than or equal operator. Performs only bitwise comparison.
358	* @param text The UnicodeString to compare to this one.
359	* @return TRUE if the characters in this are bitwise
360	* greater than or equal to the characters in `text`, FALSE otherwise
361	* @stable ICU 2.0
362	*/
363	inline UBool operator>= (const UnicodeString& text) const;
364
365	/**
366	* Less than or equal operator. Performs only bitwise comparison.
367	* @param text The UnicodeString to compare to this one.
368	* @return TRUE if the characters in this are bitwise
369	* less than or equal to the characters in `text`, FALSE otherwise
370	* @stable ICU 2.0
371	*/
372	inline UBool operator<= (const UnicodeString& text) const;
373
374	/**
375	* Compare the characters bitwise in this UnicodeString to
376	* the characters in `text`.
377	* @param text The UnicodeString to compare to this one.
378	* @return The result of bitwise character comparison: 0 if this
379	* contains the same characters as `text`, -1 if the characters in
380	* this are bitwise less than the characters in `text`, +1 if the
381	* characters in this are bitwise greater than the characters
382	* in `text`.
383	* @stable ICU 2.0
384	*/
385	inline int8_t compare(const UnicodeString& text) const;
386
387	/**
388	* Compare the characters bitwise in the range
389	* [`start`, `start + length`) with the characters
390	* in the entire string `text`.
391	* (The parameters "start" and "length" are not applied to the other text "text".)
392	* @param start the offset at which the compare operation begins
393	* @param length the number of characters of text to compare.
394	* @param text the other text to be compared against this string.
395	* @return The result of bitwise character comparison: 0 if this
396	* contains the same characters as `text`, -1 if the characters in
397	* this are bitwise less than the characters in `text`, +1 if the
398	* characters in this are bitwise greater than the characters
399	* in `text`.
400	* @stable ICU 2.0
401	*/
402	inline int8_t compare(int32_t start,
403	int32_t length,
404	const UnicodeString& text) const;
405
406	/**
407	* Compare the characters bitwise in the range
408	* [`start`, `start + length`) with the characters
409	* in `srcText` in the range
410	* [`srcStart`, `srcStart + srcLength`).
411	* @param start the offset at which the compare operation begins
412	* @param length the number of characters in this to compare.
413	* @param srcText the text to be compared
414	* @param srcStart the offset into `srcText` to start comparison
415	* @param srcLength the number of characters in `src` to compare
416	* @return The result of bitwise character comparison: 0 if this
417	* contains the same characters as `srcText`, -1 if the characters in
418	* this are bitwise less than the characters in `srcText`, +1 if the
419	* characters in this are bitwise greater than the characters
420	* in `srcText`.
421	* @stable ICU 2.0
422	*/
423	inline int8_t compare(int32_t start,
424	int32_t length,
425	const UnicodeString& srcText,
426	int32_t srcStart,
427	int32_t srcLength) const;
428
429	/**
430	* Compare the characters bitwise in this UnicodeString with the first
431	* `srcLength` characters in `srcChars`.
432	* @param srcChars The characters to compare to this UnicodeString.
433	* @param srcLength the number of characters in `srcChars` to compare
434	* @return The result of bitwise character comparison: 0 if this
435	* contains the same characters as `srcChars`, -1 if the characters in
436	* this are bitwise less than the characters in `srcChars`, +1 if the
437	* characters in this are bitwise greater than the characters
438	* in `srcChars`.
439	* @stable ICU 2.0
440	*/
441	inline int8_t compare(ConstChar16Ptr srcChars,
442	int32_t srcLength) const;
443
444	/**
445	* Compare the characters bitwise in the range
446	* [`start`, `start + length`) with the first
447	* `length` characters in `srcChars`
448	* @param start the offset at which the compare operation begins
449	* @param length the number of characters to compare.
450	* @param srcChars the characters to be compared
451	* @return The result of bitwise character comparison: 0 if this
452	* contains the same characters as `srcChars`, -1 if the characters in
453	* this are bitwise less than the characters in `srcChars`, +1 if the
454	* characters in this are bitwise greater than the characters
455	* in `srcChars`.
456	* @stable ICU 2.0
457	*/
458	inline int8_t compare(int32_t start,
459	int32_t length,
460	const char16_t srcChars) const*;
461
462	/**
463	* Compare the characters bitwise in the range
464	* [`start`, `start + length`) with the characters
465	* in `srcChars` in the range
466	* [`srcStart`, `srcStart + srcLength`).
467	* @param start the offset at which the compare operation begins
468	* @param length the number of characters in this to compare
469	* @param srcChars the characters to be compared
470	* @param srcStart the offset into `srcChars` to start comparison
471	* @param srcLength the number of characters in `srcChars` to compare
472	* @return The result of bitwise character comparison: 0 if this
473	* contains the same characters as `srcChars`, -1 if the characters in
474	* this are bitwise less than the characters in `srcChars`, +1 if the
475	* characters in this are bitwise greater than the characters
476	* in `srcChars`.
477	* @stable ICU 2.0
478	*/
479	inline int8_t compare(int32_t start,
480	int32_t length,
481	const char16_t *srcChars,
482	int32_t srcStart,
483	int32_t srcLength) const;
484
485	/**
486	* Compare the characters bitwise in the range
487	* [`start`, `limit`) with the characters
488	* in `srcText` in the range
489	* [`srcStart`, `srcLimit`).
490	* @param start the offset at which the compare operation begins
491	* @param limit the offset immediately following the compare operation
492	* @param srcText the text to be compared
493	* @param srcStart the offset into `srcText` to start comparison
494	* @param srcLimit the offset into `srcText` to limit comparison
495	* @return The result of bitwise character comparison: 0 if this
496	* contains the same characters as `srcText`, -1 if the characters in
497	* this are bitwise less than the characters in `srcText`, +1 if the
498	* characters in this are bitwise greater than the characters
499	* in `srcText`.
500	* @stable ICU 2.0
501	*/
502	inline int8_t compareBetween(int32_t start,
503	int32_t limit,
504	const UnicodeString& srcText,
505	int32_t srcStart,
506	int32_t srcLimit) const;
507
508	/**
509	* Compare two Unicode strings in code point order.
510	* The result may be different from the results of compare(), operator<, etc.
511	* if supplementary characters are present:
512	*
513	* In UTF-16, supplementary characters (with code points U+10000 and above) are
514	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
515	* which means that they compare as less than some other BMP characters like U+feff.
516	* This function compares Unicode strings in code point order.
517	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
518	*
519	* @param text Another string to compare this one to.
520	* @return a negative/zero/positive integer corresponding to whether
521	* this string is less than/equal to/greater than the second one
522	* in code point order
523	* @stable ICU 2.0
524	*/
525	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
526
527	/**
528	* Compare two Unicode strings in code point order.
529	* The result may be different from the results of compare(), operator<, etc.
530	* if supplementary characters are present:
531	*
532	* In UTF-16, supplementary characters (with code points U+10000 and above) are
533	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
534	* which means that they compare as less than some other BMP characters like U+feff.
535	* This function compares Unicode strings in code point order.
536	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
537	*
538	* @param start The start offset in this string at which the compare operation begins.
539	* @param length The number of code units from this string to compare.
540	* @param srcText Another string to compare this one to.
541	* @return a negative/zero/positive integer corresponding to whether
542	* this string is less than/equal to/greater than the second one
543	* in code point order
544	* @stable ICU 2.0
545	*/
546	inline int8_t compareCodePointOrder(int32_t start,
547	int32_t length,
548	const UnicodeString& srcText) const;
549
550	/**
551	* Compare two Unicode strings in code point order.
552	* The result may be different from the results of compare(), operator<, etc.
553	* if supplementary characters are present:
554	*
555	* In UTF-16, supplementary characters (with code points U+10000 and above) are
556	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
557	* which means that they compare as less than some other BMP characters like U+feff.
558	* This function compares Unicode strings in code point order.
559	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
560	*
561	* @param start The start offset in this string at which the compare operation begins.
562	* @param length The number of code units from this string to compare.
563	* @param srcText Another string to compare this one to.
564	* @param srcStart The start offset in that string at which the compare operation begins.
565	* @param srcLength The number of code units from that string to compare.
566	* @return a negative/zero/positive integer corresponding to whether
567	* this string is less than/equal to/greater than the second one
568	* in code point order
569	* @stable ICU 2.0
570	*/
571	inline int8_t compareCodePointOrder(int32_t start,
572	int32_t length,
573	const UnicodeString& srcText,
574	int32_t srcStart,
575	int32_t srcLength) const;
576
577	/**
578	* Compare two Unicode strings in code point order.
579	* The result may be different from the results of compare(), operator<, etc.
580	* if supplementary characters are present:
581	*
582	* In UTF-16, supplementary characters (with code points U+10000 and above) are
583	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
584	* which means that they compare as less than some other BMP characters like U+feff.
585	* This function compares Unicode strings in code point order.
586	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
587	*
588	* @param srcChars A pointer to another string to compare this one to.
589	* @param srcLength The number of code units from that string to compare.
590	* @return a negative/zero/positive integer corresponding to whether
591	* this string is less than/equal to/greater than the second one
592	* in code point order
593	* @stable ICU 2.0
594	*/
595	inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
596	int32_t srcLength) const;
597
598	/**
599	* Compare two Unicode strings in code point order.
600	* The result may be different from the results of compare(), operator<, etc.
601	* if supplementary characters are present:
602	*
603	* In UTF-16, supplementary characters (with code points U+10000 and above) are
604	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
605	* which means that they compare as less than some other BMP characters like U+feff.
606	* This function compares Unicode strings in code point order.
607	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
608	*
609	* @param start The start offset in this string at which the compare operation begins.
610	* @param length The number of code units from this string to compare.
611	* @param srcChars A pointer to another string to compare this one to.
612	* @return a negative/zero/positive integer corresponding to whether
613	* this string is less than/equal to/greater than the second one
614	* in code point order
615	* @stable ICU 2.0
616	*/
617	inline int8_t compareCodePointOrder(int32_t start,
618	int32_t length,
619	const char16_t srcChars) const*;
620
621	/**
622	* Compare two Unicode strings in code point order.
623	* The result may be different from the results of compare(), operator<, etc.
624	* if supplementary characters are present:
625	*
626	* In UTF-16, supplementary characters (with code points U+10000 and above) are
627	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
628	* which means that they compare as less than some other BMP characters like U+feff.
629	* This function compares Unicode strings in code point order.
630	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
631	*
632	* @param start The start offset in this string at which the compare operation begins.
633	* @param length The number of code units from this string to compare.
634	* @param srcChars A pointer to another string to compare this one to.
635	* @param srcStart The start offset in that string at which the compare operation begins.
636	* @param srcLength The number of code units from that string to compare.
637	* @return a negative/zero/positive integer corresponding to whether
638	* this string is less than/equal to/greater than the second one
639	* in code point order
640	* @stable ICU 2.0
641	*/
642	inline int8_t compareCodePointOrder(int32_t start,
643	int32_t length,
644	const char16_t *srcChars,
645	int32_t srcStart,
646	int32_t srcLength) const;
647
648	/**
649	* Compare two Unicode strings in code point order.
650	* The result may be different from the results of compare(), operator<, etc.
651	* if supplementary characters are present:
652	*
653	* In UTF-16, supplementary characters (with code points U+10000 and above) are
654	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
655	* which means that they compare as less than some other BMP characters like U+feff.
656	* This function compares Unicode strings in code point order.
657	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
658	*
659	* @param start The start offset in this string at which the compare operation begins.
660	* @param limit The offset after the last code unit from this string to compare.
661	* @param srcText Another string to compare this one to.
662	* @param srcStart The start offset in that string at which the compare operation begins.
663	* @param srcLimit The offset after the last code unit from that string to compare.
664	* @return a negative/zero/positive integer corresponding to whether
665	* this string is less than/equal to/greater than the second one
666	* in code point order
667	* @stable ICU 2.0
668	*/
669	inline int8_t compareCodePointOrderBetween(int32_t start,
670	int32_t limit,
671	const UnicodeString& srcText,
672	int32_t srcStart,
673	int32_t srcLimit) const;
674
675	/**
676	* Compare two strings case-insensitively using full case folding.
677	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
678	*
679	* @param text Another string to compare this one to.
680	* @param options A bit set of options:
681	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
682	* Comparison in code unit order with default case folding.
683	*
684	* - U_COMPARE_CODE_POINT_ORDER
685	* Set to choose code point order instead of code unit order
686	* (see u_strCompare for details).
687	*
688	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
689	*
690	* @return A negative, zero, or positive integer indicating the comparison result.
691	* @stable ICU 2.0
692	*/
693	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
694
695	/**
696	* Compare two strings case-insensitively using full case folding.
697	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
698	*
699	* @param start The start offset in this string at which the compare operation begins.
700	* @param length The number of code units from this string to compare.
701	* @param srcText Another string to compare this one to.
702	* @param options A bit set of options:
703	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
704	* Comparison in code unit order with default case folding.
705	*
706	* - U_COMPARE_CODE_POINT_ORDER
707	* Set to choose code point order instead of code unit order
708	* (see u_strCompare for details).
709	*
710	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
711	*
712	* @return A negative, zero, or positive integer indicating the comparison result.
713	* @stable ICU 2.0
714	*/
715	inline int8_t caseCompare(int32_t start,
716	int32_t length,
717	const UnicodeString& srcText,
718	uint32_t options) const;
719
720	/**
721	* Compare two strings case-insensitively using full case folding.
722	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
723	*
724	* @param start The start offset in this string at which the compare operation begins.
725	* @param length The number of code units from this string to compare.
726	* @param srcText Another string to compare this one to.
727	* @param srcStart The start offset in that string at which the compare operation begins.
728	* @param srcLength The number of code units from that string to compare.
729	* @param options A bit set of options:
730	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
731	* Comparison in code unit order with default case folding.
732	*
733	* - U_COMPARE_CODE_POINT_ORDER
734	* Set to choose code point order instead of code unit order
735	* (see u_strCompare for details).
736	*
737	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
738	*
739	* @return A negative, zero, or positive integer indicating the comparison result.
740	* @stable ICU 2.0
741	*/
742	inline int8_t caseCompare(int32_t start,
743	int32_t length,
744	const UnicodeString& srcText,
745	int32_t srcStart,
746	int32_t srcLength,
747	uint32_t options) const;
748
749	/**
750	* Compare two strings case-insensitively using full case folding.
751	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
752	*
753	* @param srcChars A pointer to another string to compare this one to.
754	* @param srcLength The number of code units from that string to compare.
755	* @param options A bit set of options:
756	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
757	* Comparison in code unit order with default case folding.
758	*
759	* - U_COMPARE_CODE_POINT_ORDER
760	* Set to choose code point order instead of code unit order
761	* (see u_strCompare for details).
762	*
763	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
764	*
765	* @return A negative, zero, or positive integer indicating the comparison result.
766	* @stable ICU 2.0
767	*/
768	inline int8_t caseCompare(ConstChar16Ptr srcChars,
769	int32_t srcLength,
770	uint32_t options) const;
771
772	/**
773	* Compare two strings case-insensitively using full case folding.
774	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
775	*
776	* @param start The start offset in this string at which the compare operation begins.
777	* @param length The number of code units from this string to compare.
778	* @param srcChars A pointer to another string to compare this one to.
779	* @param options A bit set of options:
780	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
781	* Comparison in code unit order with default case folding.
782	*
783	* - U_COMPARE_CODE_POINT_ORDER
784	* Set to choose code point order instead of code unit order
785	* (see u_strCompare for details).
786	*
787	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
788	*
789	* @return A negative, zero, or positive integer indicating the comparison result.
790	* @stable ICU 2.0
791	*/
792	inline int8_t caseCompare(int32_t start,
793	int32_t length,
794	const char16_t *srcChars,
795	uint32_t options) const;
796
797	/**
798	* Compare two strings case-insensitively using full case folding.
799	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
800	*
801	* @param start The start offset in this string at which the compare operation begins.
802	* @param length The number of code units from this string to compare.
803	* @param srcChars A pointer to another string to compare this one to.
804	* @param srcStart The start offset in that string at which the compare operation begins.
805	* @param srcLength The number of code units from that string to compare.
806	* @param options A bit set of options:
807	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
808	* Comparison in code unit order with default case folding.
809	*
810	* - U_COMPARE_CODE_POINT_ORDER
811	* Set to choose code point order instead of code unit order
812	* (see u_strCompare for details).
813	*
814	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
815	*
816	* @return A negative, zero, or positive integer indicating the comparison result.
817	* @stable ICU 2.0
818	*/
819	inline int8_t caseCompare(int32_t start,
820	int32_t length,
821	const char16_t *srcChars,
822	int32_t srcStart,
823	int32_t srcLength,
824	uint32_t options) const;
825
826	/**
827	* Compare two strings case-insensitively using full case folding.
828	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
829	*
830	* @param start The start offset in this string at which the compare operation begins.
831	* @param limit The offset after the last code unit from this string to compare.
832	* @param srcText Another string to compare this one to.
833	* @param srcStart The start offset in that string at which the compare operation begins.
834	* @param srcLimit The offset after the last code unit from that string to compare.
835	* @param options A bit set of options:
836	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
837	* Comparison in code unit order with default case folding.
838	*
839	* - U_COMPARE_CODE_POINT_ORDER
840	* Set to choose code point order instead of code unit order
841	* (see u_strCompare for details).
842	*
843	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
844	*
845	* @return A negative, zero, or positive integer indicating the comparison result.
846	* @stable ICU 2.0
847	*/
848	inline int8_t caseCompareBetween(int32_t start,
849	int32_t limit,
850	const UnicodeString& srcText,
851	int32_t srcStart,
852	int32_t srcLimit,
853	uint32_t options) const;
854
855	/**
856	* Determine if this starts with the characters in `text`
857	* @param text The text to match.
858	* @return TRUE if this starts with the characters in `text`,
859	* FALSE otherwise
860	* @stable ICU 2.0
861	*/
862	inline UBool startsWith(const UnicodeString& text) const;
863
864	/**
865	* Determine if this starts with the characters in `srcText`
866	* in the range [`srcStart`, `srcStart + srcLength`).
867	* @param srcText The text to match.
868	* @param srcStart the offset into `srcText` to start matching
869	* @param srcLength the number of characters in `srcText` to match
870	* @return TRUE if this starts with the characters in `text`,
871	* FALSE otherwise
872	* @stable ICU 2.0
873	*/
874	inline UBool startsWith(const UnicodeString& srcText,
875	int32_t srcStart,
876	int32_t srcLength) const;
877
878	/**
879	* Determine if this starts with the characters in `srcChars`
880	* @param srcChars The characters to match.
881	* @param srcLength the number of characters in `srcChars`
882	* @return TRUE if this starts with the characters in `srcChars`,
883	* FALSE otherwise
884	* @stable ICU 2.0
885	*/
886	inline UBool startsWith(ConstChar16Ptr srcChars,
887	int32_t srcLength) const;
888
889	/**
890	* Determine if this ends with the characters in `srcChars`
891	* in the range [`srcStart`, `srcStart + srcLength`).
892	* @param srcChars The characters to match.
893	* @param srcStart the offset into `srcText` to start matching
894	* @param srcLength the number of characters in `srcChars` to match
895	* @return TRUE if this ends with the characters in `srcChars`, FALSE otherwise
896	* @stable ICU 2.0
897	*/
898	inline UBool startsWith(const char16_t *srcChars,
899	int32_t srcStart,
900	int32_t srcLength) const;
901
902	/**
903	* Determine if this ends with the characters in `text`
904	* @param text The text to match.
905	* @return TRUE if this ends with the characters in `text`,
906	* FALSE otherwise
907	* @stable ICU 2.0
908	*/
909	inline UBool endsWith(const UnicodeString& text) const;
910
911	/**
912	* Determine if this ends with the characters in `srcText`
913	* in the range [`srcStart`, `srcStart + srcLength`).
914	* @param srcText The text to match.
915	* @param srcStart the offset into `srcText` to start matching
916	* @param srcLength the number of characters in `srcText` to match
917	* @return TRUE if this ends with the characters in `text`,
918	* FALSE otherwise
919	* @stable ICU 2.0
920	*/
921	inline UBool endsWith(const UnicodeString& srcText,
922	int32_t srcStart,
923	int32_t srcLength) const;
924
925	/**
926	* Determine if this ends with the characters in `srcChars`
927	* @param srcChars The characters to match.
928	* @param srcLength the number of characters in `srcChars`
929	* @return TRUE if this ends with the characters in `srcChars`,
930	* FALSE otherwise
931	* @stable ICU 2.0
932	*/
933	inline UBool endsWith(ConstChar16Ptr srcChars,
934	int32_t srcLength) const;
935
936	/**
937	* Determine if this ends with the characters in `srcChars`
938	* in the range [`srcStart`, `srcStart + srcLength`).
939	* @param srcChars The characters to match.
940	* @param srcStart the offset into `srcText` to start matching
941	* @param srcLength the number of characters in `srcChars` to match
942	* @return TRUE if this ends with the characters in `srcChars`,
943	* FALSE otherwise
944	* @stable ICU 2.0
945	*/
946	inline UBool endsWith(const char16_t *srcChars,
947	int32_t srcStart,
948	int32_t srcLength) const;
949
950
951	/ Searching - bitwise only /
952
953	/**
954	* Locate in this the first occurrence of the characters in `text`,
955	* using bitwise comparison.
956	* @param text The text to search for.
957	* @return The offset into this of the start of `text`,
958	* or -1 if not found.
959	* @stable ICU 2.0
960	*/
961	inline int32_t indexOf(const UnicodeString& text) const;
962
963	/**
964	* Locate in this the first occurrence of the characters in `text`
965	* starting at offset `start`, using bitwise comparison.
966	* @param text The text to search for.
967	* @param start The offset at which searching will start.
968	* @return The offset into this of the start of `text`,
969	* or -1 if not found.
970	* @stable ICU 2.0
971	*/
972	inline int32_t indexOf(const UnicodeString& text,
973	int32_t start) const;
974
975	/**
976	* Locate in this the first occurrence in the range
977	* [`start`, `start + length`) of the characters
978	* in `text`, using bitwise comparison.
979	* @param text The text to search for.
980	* @param start The offset at which searching will start.
981	* @param length The number of characters to search
982	* @return The offset into this of the start of `text`,
983	* or -1 if not found.
984	* @stable ICU 2.0
985	*/
986	inline int32_t indexOf(const UnicodeString& text,
987	int32_t start,
988	int32_t length) const;
989
990	/**
991	* Locate in this the first occurrence in the range
992	* [`start`, `start + length`) of the characters
993	* in `srcText` in the range
994	* [`srcStart`, `srcStart + srcLength`),
995	* using bitwise comparison.
996	* @param srcText The text to search for.
997	* @param srcStart the offset into `srcText` at which
998	* to start matching
999	* @param srcLength the number of characters in `srcText` to match
1000	* @param start the offset into this at which to start matching
1001	* @param length the number of characters in this to search
1002	* @return The offset into this of the start of `text`,
1003	* or -1 if not found.
1004	* @stable ICU 2.0
1005	*/
1006	inline int32_t indexOf(const UnicodeString& srcText,
1007	int32_t srcStart,
1008	int32_t srcLength,
1009	int32_t start,
1010	int32_t length) const;
1011
1012	/**
1013	* Locate in this the first occurrence of the characters in
1014	* `srcChars`
1015	* starting at offset `start`, using bitwise comparison.
1016	* @param srcChars The text to search for.
1017	* @param srcLength the number of characters in `srcChars` to match
1018	* @param start the offset into this at which to start matching
1019	* @return The offset into this of the start of `text`,
1020	* or -1 if not found.
1021	* @stable ICU 2.0
1022	*/
1023	inline int32_t indexOf(const char16_t *srcChars,
1024	int32_t srcLength,
1025	int32_t start) const;
1026
1027	/**
1028	* Locate in this the first occurrence in the range
1029	* [`start`, `start + length`) of the characters
1030	* in `srcChars`, using bitwise comparison.
1031	* @param srcChars The text to search for.
1032	* @param srcLength the number of characters in `srcChars`
1033	* @param start The offset at which searching will start.
1034	* @param length The number of characters to search
1035	* @return The offset into this of the start of `srcChars`,
1036	* or -1 if not found.
1037	* @stable ICU 2.0
1038	*/
1039	inline int32_t indexOf(ConstChar16Ptr srcChars,
1040	int32_t srcLength,
1041	int32_t start,
1042	int32_t length) const;
1043
1044	/**
1045	* Locate in this the first occurrence in the range
1046	* [`start`, `start + length`) of the characters
1047	* in `srcChars` in the range
1048	* [`srcStart`, `srcStart + srcLength`),
1049	* using bitwise comparison.
1050	* @param srcChars The text to search for.
1051	* @param srcStart the offset into `srcChars` at which
1052	* to start matching
1053	* @param srcLength the number of characters in `srcChars` to match
1054	* @param start the offset into this at which to start matching
1055	* @param length the number of characters in this to search
1056	* @return The offset into this of the start of `text`,
1057	* or -1 if not found.
1058	* @stable ICU 2.0
1059	*/
1060	int32_t indexOf(const char16_t *srcChars,
1061	int32_t srcStart,
1062	int32_t srcLength,
1063	int32_t start,
1064	int32_t length) const;
1065
1066	/**
1067	* Locate in this the first occurrence of the BMP code point `c`,
1068	* using bitwise comparison.
1069	* @param c The code unit to search for.
1070	* @return The offset into this of `c`, or -1 if not found.
1071	* @stable ICU 2.0
1072	*/
1073	inline int32_t indexOf(char16_t c) const;
1074
1075	/**
1076	* Locate in this the first occurrence of the code point `c`,
1077	* using bitwise comparison.
1078	*
1079	* @param c The code point to search for.
1080	* @return The offset into this of `c`, or -1 if not found.
1081	* @stable ICU 2.0
1082	*/
1083	inline int32_t indexOf(UChar32 c) const;
1084
1085	/**
1086	* Locate in this the first occurrence of the BMP code point `c`,
1087	* starting at offset `start`, using bitwise comparison.
1088	* @param c The code unit to search for.
1089	* @param start The offset at which searching will start.
1090	* @return The offset into this of `c`, or -1 if not found.
1091	* @stable ICU 2.0
1092	*/
1093	inline int32_t indexOf(char16_t c,
1094	int32_t start) const;
1095
1096	/**
1097	* Locate in this the first occurrence of the code point `c`
1098	* starting at offset `start`, using bitwise comparison.
1099	*
1100	* @param c The code point to search for.
1101	* @param start The offset at which searching will start.
1102	* @return The offset into this of `c`, or -1 if not found.
1103	* @stable ICU 2.0
1104	*/
1105	inline int32_t indexOf(UChar32 c,
1106	int32_t start) const;
1107
1108	/**
1109	* Locate in this the first occurrence of the BMP code point `c`
1110	* in the range [`start`, `start + length`),
1111	* using bitwise comparison.
1112	* @param c The code unit to search for.
1113	* @param start the offset into this at which to start matching
1114	* @param length the number of characters in this to search
1115	* @return The offset into this of `c`, or -1 if not found.
1116	* @stable ICU 2.0
1117	*/
1118	inline int32_t indexOf(char16_t c,
1119	int32_t start,
1120	int32_t length) const;
1121
1122	/**
1123	* Locate in this the first occurrence of the code point `c`
1124	* in the range [`start`, `start + length`),
1125	* using bitwise comparison.
1126	*
1127	* @param c The code point to search for.
1128	* @param start the offset into this at which to start matching
1129	* @param length the number of characters in this to search
1130	* @return The offset into this of `c`, or -1 if not found.
1131	* @stable ICU 2.0
1132	*/
1133	inline int32_t indexOf(UChar32 c,
1134	int32_t start,
1135	int32_t length) const;
1136
1137	/**
1138	* Locate in this the last occurrence of the characters in `text`,
1139	* using bitwise comparison.
1140	* @param text The text to search for.
1141	* @return The offset into this of the start of `text`,
1142	* or -1 if not found.
1143	* @stable ICU 2.0
1144	*/
1145	inline int32_t lastIndexOf(const UnicodeString& text) const;
1146
1147	/**
1148	* Locate in this the last occurrence of the characters in `text`
1149	* starting at offset `start`, using bitwise comparison.
1150	* @param text The text to search for.
1151	* @param start The offset at which searching will start.
1152	* @return The offset into this of the start of `text`,
1153	* or -1 if not found.
1154	* @stable ICU 2.0
1155	*/
1156	inline int32_t lastIndexOf(const UnicodeString& text,
1157	int32_t start) const;
1158
1159	/**
1160	* Locate in this the last occurrence in the range
1161	* [`start`, `start + length`) of the characters
1162	* in `text`, using bitwise comparison.
1163	* @param text The text to search for.
1164	* @param start The offset at which searching will start.
1165	* @param length The number of characters to search
1166	* @return The offset into this of the start of `text`,
1167	* or -1 if not found.
1168	* @stable ICU 2.0
1169	*/
1170	inline int32_t lastIndexOf(const UnicodeString& text,
1171	int32_t start,
1172	int32_t length) const;
1173
1174	/**
1175	* Locate in this the last occurrence in the range
1176	* [`start`, `start + length`) of the characters
1177	* in `srcText` in the range
1178	* [`srcStart`, `srcStart + srcLength`),
1179	* using bitwise comparison.
1180	* @param srcText The text to search for.
1181	* @param srcStart the offset into `srcText` at which
1182	* to start matching
1183	* @param srcLength the number of characters in `srcText` to match
1184	* @param start the offset into this at which to start matching
1185	* @param length the number of characters in this to search
1186	* @return The offset into this of the start of `text`,
1187	* or -1 if not found.
1188	* @stable ICU 2.0
1189	*/
1190	inline int32_t lastIndexOf(const UnicodeString& srcText,
1191	int32_t srcStart,
1192	int32_t srcLength,
1193	int32_t start,
1194	int32_t length) const;
1195
1196	/**
1197	* Locate in this the last occurrence of the characters in `srcChars`
1198	* starting at offset `start`, using bitwise comparison.
1199	* @param srcChars The text to search for.
1200	* @param srcLength the number of characters in `srcChars` to match
1201	* @param start the offset into this at which to start matching
1202	* @return The offset into this of the start of `text`,
1203	* or -1 if not found.
1204	* @stable ICU 2.0
1205	*/
1206	inline int32_t lastIndexOf(const char16_t *srcChars,
1207	int32_t srcLength,
1208	int32_t start) const;
1209
1210	/**
1211	* Locate in this the last occurrence in the range
1212	* [`start`, `start + length`) of the characters
1213	* in `srcChars`, using bitwise comparison.
1214	* @param srcChars The text to search for.
1215	* @param srcLength the number of characters in `srcChars`
1216	* @param start The offset at which searching will start.
1217	* @param length The number of characters to search
1218	* @return The offset into this of the start of `srcChars`,
1219	* or -1 if not found.
1220	* @stable ICU 2.0
1221	*/
1222	inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1223	int32_t srcLength,
1224	int32_t start,
1225	int32_t length) const;
1226
1227	/**
1228	* Locate in this the last occurrence in the range
1229	* [`start`, `start + length`) of the characters
1230	* in `srcChars` in the range
1231	* [`srcStart`, `srcStart + srcLength`),
1232	* using bitwise comparison.
1233	* @param srcChars The text to search for.
1234	* @param srcStart the offset into `srcChars` at which
1235	* to start matching
1236	* @param srcLength the number of characters in `srcChars` to match
1237	* @param start the offset into this at which to start matching
1238	* @param length the number of characters in this to search
1239	* @return The offset into this of the start of `text`,
1240	* or -1 if not found.
1241	* @stable ICU 2.0
1242	*/
1243	int32_t lastIndexOf(const char16_t *srcChars,
1244	int32_t srcStart,
1245	int32_t srcLength,
1246	int32_t start,
1247	int32_t length) const;
1248
1249	/**
1250	* Locate in this the last occurrence of the BMP code point `c`,
1251	* using bitwise comparison.
1252	* @param c The code unit to search for.
1253	* @return The offset into this of `c`, or -1 if not found.
1254	* @stable ICU 2.0
1255	*/
1256	inline int32_t lastIndexOf(char16_t c) const;
1257
1258	/**
1259	* Locate in this the last occurrence of the code point `c`,
1260	* using bitwise comparison.
1261	*
1262	* @param c The code point to search for.
1263	* @return The offset into this of `c`, or -1 if not found.
1264	* @stable ICU 2.0
1265	*/
1266	inline int32_t lastIndexOf(UChar32 c) const;
1267
1268	/**
1269	* Locate in this the last occurrence of the BMP code point `c`
1270	* starting at offset `start`, using bitwise comparison.
1271	* @param c The code unit to search for.
1272	* @param start The offset at which searching will start.
1273	* @return The offset into this of `c`, or -1 if not found.
1274	* @stable ICU 2.0
1275	*/
1276	inline int32_t lastIndexOf(char16_t c,
1277	int32_t start) const;
1278
1279	/**
1280	* Locate in this the last occurrence of the code point `c`
1281	* starting at offset `start`, using bitwise comparison.
1282	*
1283	* @param c The code point to search for.
1284	* @param start The offset at which searching will start.
1285	* @return The offset into this of `c`, or -1 if not found.
1286	* @stable ICU 2.0
1287	*/
1288	inline int32_t lastIndexOf(UChar32 c,
1289	int32_t start) const;
1290
1291	/**
1292	* Locate in this the last occurrence of the BMP code point `c`
1293	* in the range [`start`, `start + length`),
1294	* using bitwise comparison.
1295	* @param c The code unit to search for.
1296	* @param start the offset into this at which to start matching
1297	* @param length the number of characters in this to search
1298	* @return The offset into this of `c`, or -1 if not found.
1299	* @stable ICU 2.0
1300	*/
1301	inline int32_t lastIndexOf(char16_t c,
1302	int32_t start,
1303	int32_t length) const;
1304
1305	/**
1306	* Locate in this the last occurrence of the code point `c`
1307	* in the range [`start`, `start + length`),
1308	* using bitwise comparison.
1309	*
1310	* @param c The code point to search for.
1311	* @param start the offset into this at which to start matching
1312	* @param length the number of characters in this to search
1313	* @return The offset into this of `c`, or -1 if not found.
1314	* @stable ICU 2.0
1315	*/
1316	inline int32_t lastIndexOf(UChar32 c,
1317	int32_t start,
1318	int32_t length) const;
1319
1320
1321	/ Character access /
1322
1323	/**
1324	* Return the code unit at offset `offset`.
1325	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1326	* @param offset a valid offset into the text
1327	* @return the code unit at offset `offset`
1328	* or 0xffff if the offset is not valid for this string
1329	* @stable ICU 2.0
1330	*/
1331	inline char16_t charAt(int32_t offset) const;
1332
1333	/**
1334	* Return the code unit at offset `offset`.
1335	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1336	* @param offset a valid offset into the text
1337	* @return the code unit at offset `offset`
1338	* @stable ICU 2.0
1339	*/
1340	inline char16_t operator[] (int32_t offset) const;
1341
1342	/**
1343	* Return the code point that contains the code unit
1344	* at offset `offset`.
1345	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1346	* @param offset a valid offset into the text
1347	* that indicates the text offset of any of the code units
1348	* that will be assembled into a code point (21-bit value) and returned
1349	* @return the code point of text at `offset`
1350	* or 0xffff if the offset is not valid for this string
1351	* @stable ICU 2.0
1352	*/
1353	UChar32 char32At(int32_t offset) const;
1354
1355	/**
1356	* Adjust a random-access offset so that
1357	* it points to the beginning of a Unicode character.
1358	* The offset that is passed in points to
1359	* any code unit of a code point,
1360	* while the returned offset will point to the first code unit
1361	* of the same code point.
1362	* In UTF-16, if the input offset points to a second surrogate
1363	* of a surrogate pair, then the returned offset will point
1364	* to the first surrogate.
1365	* @param offset a valid offset into one code point of the text
1366	* @return offset of the first code unit of the same code point
1367	* @see U16_SET_CP_START
1368	* @stable ICU 2.0
1369	*/
1370	int32_t getChar32Start(int32_t offset) const;
1371
1372	/**
1373	* Adjust a random-access offset so that
1374	* it points behind a Unicode character.
1375	* The offset that is passed in points behind
1376	* any code unit of a code point,
1377	* while the returned offset will point behind the last code unit
1378	* of the same code point.
1379	* In UTF-16, if the input offset points behind the first surrogate
1380	* (i.e., to the second surrogate)
1381	* of a surrogate pair, then the returned offset will point
1382	* behind the second surrogate (i.e., to the first surrogate).
1383	* @param offset a valid offset after any code unit of a code point of the text
1384	* @return offset of the first code unit after the same code point
1385	* @see U16_SET_CP_LIMIT
1386	* @stable ICU 2.0
1387	*/
1388	int32_t getChar32Limit(int32_t offset) const;
1389
1390	/**
1391	* Move the code unit index along the string by delta code points.
1392	* Interpret the input index as a code unit-based offset into the string,
1393	* move the index forward or backward by delta code points, and
1394	* return the resulting index.
1395	* The input index should point to the first code unit of a code point,
1396	* if there is more than one.
1397	*
1398	* Both input and output indexes are code unit-based as for all
1399	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
1400	* If delta<0 then the index is moved backward (toward the start of the string).
1401	* If delta>0 then the index is moved forward (toward the end of the string).
1402	*
1403	* This behaves like CharacterIterator::move32(delta, kCurrent).
1404	*
1405	* Behavior for out-of-bounds indexes:
1406	* `moveIndex32` pins the input index to 0..length(), i.e.,
1407	* if the input index<0 then it is pinned to 0;
1408	* if it is index>length() then it is pinned to length().
1409	* Afterwards, the index is moved by `delta` code points
1410	* forward or backward,
1411	* but no further backward than to 0 and no further forward than to length().
1412	* The resulting index return value will be in between 0 and length(), inclusively.
1413	*
1414	* Examples:
1415	* \code
1416	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
1417	* UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
1418	*
1419	* // initial index: position of U+10000
1420	* int32_t index=1;
1421	*
1422	* // the following examples will all result in index==4, position of U+10ffff
1423	*
1424	* // skip 2 code points from some position in the string
1425	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1426	*
1427	* // go to the 3rd code point from the start of s (0-based)
1428	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1429	*
1430	* // go to the next-to-last code point of s
1431	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1432	* \endcode
1433	*
1434	* @param index input code unit index
1435	* @param delta (signed) code point count to move the index forward or backward
1436	* in the string
1437	* @return the resulting code unit index
1438	* @stable ICU 2.0
1439	*/
1440	int32_t moveIndex32(int32_t index, int32_t delta) const;
1441
1442	/ Substring extraction /
1443
1444	/**
1445	* Copy the characters in the range
1446	* [`start`, `start + length`) into the array `dst`,
1447	* beginning at `dstStart`.
1448	* If the string aliases to `dst` itself as an external buffer,
1449	* then extract() will not copy the contents.
1450	*
1451	* @param start offset of first character which will be copied into the array
1452	* @param length the number of characters to extract
1453	* @param dst array in which to copy characters. The length of `dst`
1454	* must be at least (`dstStart + length`).
1455	* @param dstStart the offset in `dst` where the first character
1456	* will be extracted
1457	* @stable ICU 2.0
1458	*/
1459	inline void extract(int32_t start,
1460	int32_t length,
1461	Char16Ptr dst,
1462	int32_t dstStart = `0`) const;
1463
1464	/**
1465	* Copy the contents of the string into dest.
1466	* This is a convenience function that
1467	* checks if there is enough space in dest,
1468	* extracts the entire string if possible,
1469	* and NUL-terminates dest if possible.
1470	*
1471	* If the string fits into dest but cannot be NUL-terminated
1472	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1473	* If the string itself does not fit into dest
1474	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1475	*
1476	* If the string aliases to `dest` itself as an external buffer,
1477	* then extract() will not copy the contents.
1478	*
1479	* @param dest Destination string buffer.
1480	* @param destCapacity Number of char16_ts available at dest.
1481	* @param errorCode ICU error code.
1482	* @return length()
1483	* @stable ICU 2.0
1484	*/
1485	int32_t
1486	extract(Char16Ptr dest, int32_t destCapacity,
1487	UErrorCode &errorCode) const;
1488
1489	/**
1490	* Copy the characters in the range
1491	* [`start`, `start + length`) into the UnicodeString
1492	* `target`.
1493	* @param start offset of first character which will be copied
1494	* @param length the number of characters to extract
1495	* @param target UnicodeString into which to copy characters.
1496	* @stable ICU 2.0
1497	*/
1498	inline void extract(int32_t start,
1499	int32_t length,
1500	UnicodeString& target) const;
1501
1502	/**
1503	* Copy the characters in the range [`start`, `limit`)
1504	* into the array `dst`, beginning at `dstStart`.
1505	* @param start offset of first character which will be copied into the array
1506	* @param limit offset immediately following the last character to be copied
1507	* @param dst array in which to copy characters. The length of `dst`
1508	* must be at least (`dstStart + (limit - start)`).
1509	* @param dstStart the offset in `dst` where the first character
1510	* will be extracted
1511	* @stable ICU 2.0
1512	*/
1513	inline void extractBetween(int32_t start,
1514	int32_t limit,
1515	char16_t *dst,
1516	int32_t dstStart = `0`) const;
1517
1518	/**
1519	* Copy the characters in the range [`start`, `limit`)
1520	* into the UnicodeString `target`. Replaceable API.
1521	* @param start offset of first character which will be copied
1522	* @param limit offset immediately following the last character to be copied
1523	* @param target UnicodeString into which to copy characters.
1524	* @stable ICU 2.0
1525	*/
1526	virtual void extractBetween(int32_t start,
1527	int32_t limit,
1528	UnicodeString& target) const;
1529
1530	/**
1531	* Copy the characters in the range
1532	* [`start`, `start + startLength`) into an array of characters.
1533	* All characters must be invariant (see utypes.h).
1534	* Use US_INV as the last, signature-distinguishing parameter.
1535	*
1536	* This function does not write any more than `targetCapacity`
1537	* characters but returns the length of the entire output string
1538	* so that one can allocate a larger buffer and call the function again
1539	* if necessary.
1540	* The output string is NUL-terminated if possible.
1541	*
1542	* @param start offset of first character which will be copied
1543	* @param startLength the number of characters to extract
1544	* @param target the target buffer for extraction, can be NULL
1545	* if targetLength is 0
1546	* @param targetCapacity the length of the target buffer
1547	* @param inv Signature-distinguishing paramater, use US_INV.
1548	* @return the output string length, not including the terminating NUL
1549	* @stable ICU 3.2
1550	*/
1551	int32_t extract(int32_t start,
1552	int32_t startLength,
1553	char *target,
1554	int32_t targetCapacity,
1555	enum EInvariant inv) const;
1556
1557	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
1558
1559	/**
1560	* Copy the characters in the range
1561	* [`start`, `start + length`) into an array of characters
1562	* in the platform's default codepage.
1563	* This function does not write any more than `targetLength`
1564	* characters but returns the length of the entire output string
1565	* so that one can allocate a larger buffer and call the function again
1566	* if necessary.
1567	* The output string is NUL-terminated if possible.
1568	*
1569	* @param start offset of first character which will be copied
1570	* @param startLength the number of characters to extract
1571	* @param target the target buffer for extraction
1572	* @param targetLength the length of the target buffer
1573	* If `target` is NULL, then the number of bytes required for
1574	* `target` is returned.
1575	* @return the output string length, not including the terminating NUL
1576	* @stable ICU 2.0
1577	*/
1578	int32_t extract(int32_t start,
1579	int32_t startLength,
1580	char *target,
1581	uint32_t targetLength) const;
1582
1583	#endif
1584
1585	#if !UCONFIG_NO_CONVERSION
1586
1587	/**
1588	* Copy the characters in the range
1589	* [`start`, `start + length`) into an array of characters
1590	* in a specified codepage.
1591	* The output string is NUL-terminated.
1592	*
1593	* Recommendation: For invariant-character strings use
1594	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1595	* because it avoids object code dependencies of UnicodeString on
1596	* the conversion code.
1597	*
1598	* @param start offset of first character which will be copied
1599	* @param startLength the number of characters to extract
1600	* @param target the target buffer for extraction
1601	* @param codepage the desired codepage for the characters. 0 has
1602	* the special meaning of the default codepage
1603	* If `codepage` is an empty string (`""`),
1604	* then a simple conversion is performed on the codepage-invariant
1605	* subset ("invariant characters") of the platform encoding. See utypes.h.
1606	* If `target` is NULL, then the number of bytes required for
1607	* `target` is returned. It is assumed that the target is big enough
1608	* to fit all of the characters.
1609	* @return the output string length, not including the terminating NUL
1610	* @stable ICU 2.0
1611	*/
1612	inline int32_t extract(int32_t start,
1613	int32_t startLength,
1614	char *target,
1615	const char codepage = `0`) const*;
1616
1617	/**
1618	* Copy the characters in the range
1619	* [`start`, `start + length`) into an array of characters
1620	* in a specified codepage.
1621	* This function does not write any more than `targetLength`
1622	* characters but returns the length of the entire output string
1623	* so that one can allocate a larger buffer and call the function again
1624	* if necessary.
1625	* The output string is NUL-terminated if possible.
1626	*
1627	* Recommendation: For invariant-character strings use
1628	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1629	* because it avoids object code dependencies of UnicodeString on
1630	* the conversion code.
1631	*
1632	* @param start offset of first character which will be copied
1633	* @param startLength the number of characters to extract
1634	* @param target the target buffer for extraction
1635	* @param targetLength the length of the target buffer
1636	* @param codepage the desired codepage for the characters. 0 has
1637	* the special meaning of the default codepage
1638	* If `codepage` is an empty string (`""`),
1639	* then a simple conversion is performed on the codepage-invariant
1640	* subset ("invariant characters") of the platform encoding. See utypes.h.
1641	* If `target` is NULL, then the number of bytes required for
1642	* `target` is returned.
1643	* @return the output string length, not including the terminating NUL
1644	* @stable ICU 2.0
1645	*/
1646	int32_t extract(int32_t start,
1647	int32_t startLength,
1648	char *target,
1649	uint32_t targetLength,
1650	const char codepage) const*;
1651
1652	/**
1653	* Convert the UnicodeString into a codepage string using an existing UConverter.
1654	* The output string is NUL-terminated if possible.
1655	*
1656	* This function avoids the overhead of opening and closing a converter if
1657	* multiple strings are extracted.
1658	*
1659	* @param dest destination string buffer, can be NULL if destCapacity==0
1660	* @param destCapacity the number of chars available at dest
1661	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1662	* or NULL for the default converter
1663	* @param errorCode normal ICU error code
1664	* @return the length of the output string, not counting the terminating NUL;
1665	* if the length is greater than destCapacity, then the string will not fit
1666	* and a buffer of the indicated length would need to be passed in
1667	* @stable ICU 2.0
1668	*/
1669	int32_t extract(char *dest, int32_t destCapacity,
1670	UConverter *cnv,
1671	UErrorCode &errorCode) const;
1672
1673	#endif
1674
1675	/**
1676	* Create a temporary substring for the specified range.
1677	* Unlike the substring constructor and setTo() functions,
1678	* the object returned here will be a read-only alias (using getBuffer())
1679	* rather than copying the text.
1680	* As a result, this substring operation is much faster but requires
1681	* that the original string not be modified or deleted during the lifetime
1682	* of the returned substring object.
1683	* @param start offset of the first character visible in the substring
1684	* @param length length of the substring
1685	* @return a read-only alias UnicodeString object for the substring
1686	* @stable ICU 4.4
1687	*/
1688	UnicodeString tempSubString(int32_t start=`0`, int32_t length=INT32_MAX) const;
1689
1690	/**
1691	* Create a temporary substring for the specified range.
1692	* Same as tempSubString(start, length) except that the substring range
1693	* is specified as a (start, limit) pair (with an exclusive limit index)
1694	* rather than a (start, length) pair.
1695	* @param start offset of the first character visible in the substring
1696	* @param limit offset immediately following the last character visible in the substring
1697	* @return a read-only alias UnicodeString object for the substring
1698	* @stable ICU 4.4
1699	*/
1700	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1701
1702	/**
1703	* Convert the UnicodeString to UTF-8 and write the result
1704	* to a ByteSink. This is called by toUTF8String().
1705	* Unpaired surrogates are replaced with U+FFFD.
1706	* Calls u_strToUTF8WithSub().
1707	*
1708	* @param sink A ByteSink to which the UTF-8 version of the string is written.
1709	* sink.Flush() is called at the end.
1710	* @stable ICU 4.2
1711	* @see toUTF8String
1712	*/
1713	void toUTF8(ByteSink &sink) const;
1714
1715	/**
1716	* Convert the UnicodeString to UTF-8 and append the result
1717	* to a standard string.
1718	* Unpaired surrogates are replaced with U+FFFD.
1719	* Calls toUTF8().
1720	*
1721	* @param result A standard string (or a compatible object)
1722	* to which the UTF-8 version of the string is appended.
1723	* @return The string object.
1724	* @stable ICU 4.2
1725	* @see toUTF8
1726	*/
1727	template<typename StringClass>
1728	StringClass &toUTF8String(StringClass &result) const {
1729	StringByteSink<StringClass> sbs(&result, length());
1730	toUTF8(sbs);
1731	return result;
1732	}
1733
1734	/**
1735	* Convert the UnicodeString to UTF-32.
1736	* Unpaired surrogates are replaced with U+FFFD.
1737	* Calls u_strToUTF32WithSub().
1738	*
1739	* @param utf32 destination string buffer, can be NULL if capacity==0
1740	* @param capacity the number of UChar32s available at utf32
1741	* @param errorCode Standard ICU error code. Its input value must
1742	* pass the U_SUCCESS() test, or else the function returns
1743	* immediately. Check for U_FAILURE() on output or use with
1744	* function chaining. (See User Guide for details.)
1745	* @return The length of the UTF-32 string.
1746	* @see fromUTF32
1747	* @stable ICU 4.2
1748	*/
1749	int32_t toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const*;
1750
1751	/ Length operations /
1752
1753	/**
1754	* Return the length of the UnicodeString object.
1755	* The length is the number of char16_t code units are in the UnicodeString.
1756	* If you want the number of code points, please use countChar32().
1757	* @return the length of the UnicodeString object
1758	* @see countChar32
1759	* @stable ICU 2.0
1760	*/
1761	inline int32_t length(void) const;
1762
1763	/**
1764	* Count Unicode code points in the length char16_t code units of the string.
1765	* A code point may occupy either one or two char16_t code units.
1766	* Counting code points involves reading all code units.
1767	*
1768	* This functions is basically the inverse of moveIndex32().
1769	*
1770	* @param start the index of the first code unit to check
1771	* @param length the number of char16_t code units to check
1772	* @return the number of code points in the specified code units
1773	* @see length
1774	* @stable ICU 2.0
1775	*/
1776	int32_t
1777	countChar32(int32_t start=`0`, int32_t length=INT32_MAX) const;
1778
1779	/**
1780	* Check if the length char16_t code units of the string
1781	* contain more Unicode code points than a certain number.
1782	* This is more efficient than counting all code points in this part of the string
1783	* and comparing that number with a threshold.
1784	* This function may not need to scan the string at all if the length
1785	* falls within a certain range, and
1786	* never needs to count more than 'number+1' code points.
1787	* Logically equivalent to (countChar32(start, length)>number).
1788	* A Unicode code point may occupy either one or two char16_t code units.
1789	*
1790	* @param start the index of the first code unit to check (0 for the entire string)
1791	* @param length the number of char16_t code units to check
1792	* (use INT32_MAX for the entire string; remember that start/length
1793	* values are pinned)
1794	* @param number The number of code points in the (sub)string is compared against
1795	* the 'number' parameter.
1796	* @return Boolean value for whether the string contains more Unicode code points
1797	* than 'number'. Same as (u_countChar32(s, length)>number).
1798	* @see countChar32
1799	* @see u_strHasMoreChar32Than
1800	* @stable ICU 2.4
1801	*/
1802	UBool
1803	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1804
1805	/**
1806	* Determine if this string is empty.
1807	* @return TRUE if this string contains 0 characters, FALSE otherwise.
1808	* @stable ICU 2.0
1809	*/
1810	inline UBool isEmpty(void) const;
1811
1812	/**
1813	* Return the capacity of the internal buffer of the UnicodeString object.
1814	* This is useful together with the getBuffer functions.
1815	* See there for details.
1816	*
1817	* @return the number of char16_ts available in the internal buffer
1818	* @see getBuffer
1819	* @stable ICU 2.0
1820	*/
1821	inline int32_t getCapacity(void) const;
1822
1823	/ Other operations /
1824
1825	/**
1826	* Generate a hash code for this object.
1827	* @return The hash code of this UnicodeString.
1828	* @stable ICU 2.0
1829	*/
1830	inline int32_t hashCode(void) const;
1831
1832	/**
1833	* Determine if this object contains a valid string.
1834	* A bogus string has no value. It is different from an empty string,
1835	* although in both cases isEmpty() returns TRUE and length() returns 0.
1836	* setToBogus() and isBogus() can be used to indicate that no string value is available.
1837	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1838	* length() returns 0.
1839	*
1840	* @return TRUE if the string is bogus/invalid, FALSE otherwise
1841	* @see setToBogus()
1842	* @stable ICU 2.0
1843	*/
1844	inline UBool isBogus(void) const;
1845
1846
1847	//========================================
1848	// Write operations
1849	//========================================
1850
1851	/ Assignment operations /
1852
1853	/**
1854	* Assignment operator. Replace the characters in this UnicodeString
1855	* with the characters from `srcText`.
1856	*
1857	* Starting with ICU 2.4, the assignment operator and the copy constructor
1858	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1859	* By contrast, the fastCopyFrom() function implements the old,
1860	* more efficient but less safe behavior
1861	* of making this string also a readonly alias to the same buffer.
1862	*
1863	* If the source object has an "open" buffer from getBuffer(minCapacity),
1864	* then the copy is an empty string.
1865	*
1866	* @param srcText The text containing the characters to replace
1867	* @return a reference to this
1868	* @stable ICU 2.0
1869	* @see fastCopyFrom
1870	*/
1871	UnicodeString &operator=(const UnicodeString &srcText);
1872
1873	/**
1874	* Almost the same as the assignment operator.
1875	* Replace the characters in this UnicodeString
1876	* with the characters from `srcText`.
1877	*
1878	* This function works the same as the assignment operator
1879	* for all strings except for ones that are readonly aliases.
1880	*
1881	* Starting with ICU 2.4, the assignment operator and the copy constructor
1882	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1883	* This function implements the old, more efficient but less safe behavior
1884	* of making this string also a readonly alias to the same buffer.
1885	*
1886	* The fastCopyFrom function must be used only if it is known that the lifetime of
1887	* this UnicodeString does not exceed the lifetime of the aliased buffer
1888	* including its contents, for example for strings from resource bundles
1889	* or aliases to string constants.
1890	*
1891	* If the source object has an "open" buffer from getBuffer(minCapacity),
1892	* then the copy is an empty string.
1893	*
1894	* @param src The text containing the characters to replace.
1895	* @return a reference to this
1896	* @stable ICU 2.4
1897	*/
1898	UnicodeString &fastCopyFrom(const UnicodeString &src);
1899
1900	/**
1901	* Move assignment operator; might leave src in bogus state.
1902	* This string will have the same contents and state that the source string had.
1903	* The behavior is undefined if *this and src are the same object.
1904	* @param src source string
1905	* @return *this
1906	* @stable ICU 56
1907	*/
1908	UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT;
1909
1910	/**
1911	* Swap strings.
1912	* @param other other string
1913	* @stable ICU 56
1914	*/
1915	void swap(UnicodeString &other) U_NOEXCEPT;
1916
1917	/**
1918	* Non-member UnicodeString swap function.
1919	* @param s1 will get s2's contents and state
1920	* @param s2 will get s1's contents and state
1921	* @stable ICU 56
1922	*/
1923	friend inline void U_EXPORT2
1924	swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1925	s1.swap(s2);
1926	}
1927
1928	/**
1929	* Assignment operator. Replace the characters in this UnicodeString
1930	* with the code unit `ch`.
1931	* @param ch the code unit to replace
1932	* @return a reference to this
1933	* @stable ICU 2.0
1934	*/
1935	inline UnicodeString& operator= (char16_t ch);
1936
1937	/**
1938	* Assignment operator. Replace the characters in this UnicodeString
1939	* with the code point `ch`.
1940	* @param ch the code point to replace
1941	* @return a reference to this
1942	* @stable ICU 2.0
1943	*/
1944	inline UnicodeString& operator= (UChar32 ch);
1945
1946	/**
1947	* Set the text in the UnicodeString object to the characters
1948	* in `srcText` in the range
1949	* [`srcStart`, `srcText.length()`).
1950	* `srcText` is not modified.
1951	* @param srcText the source for the new characters
1952	* @param srcStart the offset into `srcText` where new characters
1953	* will be obtained
1954	* @return a reference to this
1955	* @stable ICU 2.2
1956	*/
1957	inline UnicodeString& setTo(const UnicodeString& srcText,
1958	int32_t srcStart);
1959
1960	/**
1961	* Set the text in the UnicodeString object to the characters
1962	* in `srcText` in the range
1963	* [`srcStart`, `srcStart + srcLength`).
1964	* `srcText` is not modified.
1965	* @param srcText the source for the new characters
1966	* @param srcStart the offset into `srcText` where new characters
1967	* will be obtained
1968	* @param srcLength the number of characters in `srcText` in the
1969	* replace string.
1970	* @return a reference to this
1971	* @stable ICU 2.0
1972	*/
1973	inline UnicodeString& setTo(const UnicodeString& srcText,
1974	int32_t srcStart,
1975	int32_t srcLength);
1976
1977	/**
1978	* Set the text in the UnicodeString object to the characters in
1979	* `srcText`.
1980	* `srcText` is not modified.
1981	* @param srcText the source for the new characters
1982	* @return a reference to this
1983	* @stable ICU 2.0
1984	*/
1985	inline UnicodeString& setTo(const UnicodeString& srcText);
1986
1987	/**
1988	* Set the characters in the UnicodeString object to the characters
1989	* in `srcChars`. `srcChars` is not modified.
1990	* @param srcChars the source for the new characters
1991	* @param srcLength the number of Unicode characters in srcChars.
1992	* @return a reference to this
1993	* @stable ICU 2.0
1994	*/
1995	inline UnicodeString& setTo(const char16_t *srcChars,
1996	int32_t srcLength);
1997
1998	/**
1999	* Set the characters in the UnicodeString object to the code unit
2000	* `srcChar`.
2001	* @param srcChar the code unit which becomes the UnicodeString's character
2002	* content
2003	* @return a reference to this
2004	* @stable ICU 2.0
2005	*/
2006	inline UnicodeString& setTo(char16_t srcChar);
2007
2008	/**
2009	* Set the characters in the UnicodeString object to the code point
2010	* `srcChar`.
2011	* @param srcChar the code point which becomes the UnicodeString's character
2012	* content
2013	* @return a reference to this
2014	* @stable ICU 2.0
2015	*/
2016	inline UnicodeString& setTo(UChar32 srcChar);
2017
2018	/**
2019	* Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2020	* The text will be used for the UnicodeString object, but
2021	* it will not be released when the UnicodeString is destroyed.
2022	* This has copy-on-write semantics:
2023	* When the string is modified, then the buffer is first copied into
2024	* newly allocated memory.
2025	* The aliased buffer is never modified.
2026	*
2027	* In an assignment to another UnicodeString, when using the copy constructor
2028	* or the assignment operator, the text will be copied.
2029	* When using fastCopyFrom(), the text will be aliased again,
2030	* so that both strings then alias the same readonly-text.
2031	*
2032	* @param isTerminated specifies if `text` is `NUL`-terminated.
2033	* This must be true if `textLength==-1`.
2034	* @param text The characters to alias for the UnicodeString.
2035	* @param textLength The number of Unicode characters in `text` to alias.
2036	* If -1, then this constructor will determine the length
2037	* by calling `u_strlen()`.
2038	* @return a reference to this
2039	* @stable ICU 2.0
2040	*/
2041	UnicodeString &setTo(UBool isTerminated,
2042	ConstChar16Ptr text,
2043	int32_t textLength);
2044
2045	/**
2046	* Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2047	* The text will be used for the UnicodeString object, but
2048	* it will not be released when the UnicodeString is destroyed.
2049	* This has write-through semantics:
2050	* For as long as the capacity of the buffer is sufficient, write operations
2051	* will directly affect the buffer. When more capacity is necessary, then
2052	* a new buffer will be allocated and the contents copied as with regularly
2053	* constructed strings.
2054	* In an assignment to another UnicodeString, the buffer will be copied.
2055	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2056	* as the string buffer itself and will in this case not copy the contents.
2057	*
2058	* @param buffer The characters to alias for the UnicodeString.
2059	* @param buffLength The number of Unicode characters in `buffer` to alias.
2060	* @param buffCapacity The size of `buffer` in char16_ts.
2061	* @return a reference to this
2062	* @stable ICU 2.0
2063	*/
2064	UnicodeString &setTo(char16_t *buffer,
2065	int32_t buffLength,
2066	int32_t buffCapacity);
2067
2068	/**
2069	* Make this UnicodeString object invalid.
2070	* The string will test TRUE with isBogus().
2071	*
2072	* A bogus string has no value. It is different from an empty string.
2073	* It can be used to indicate that no string value is available.
2074	* getBuffer() and getTerminatedBuffer() return NULL, and
2075	* length() returns 0.
2076	*
2077	* This utility function is used throughout the UnicodeString
2078	* implementation to indicate that a UnicodeString operation failed,
2079	* and may be used in other functions,
2080	* especially but not exclusively when such functions do not
2081	* take a UErrorCode for simplicity.
2082	*
2083	* The following methods, and no others, will clear a string object's bogus flag:
2084	* - remove()
2085	* - remove(0, INT32_MAX)
2086	* - truncate(0)
2087	* - operator=() (assignment operator)
2088	* - setTo(...)
2089	*
2090	* The simplest ways to turn a bogus string into an empty one
2091	* is to use the remove() function.
2092	* Examples for other functions that are equivalent to "set to empty string":
2093	* \code
2094	* if(s.isBogus()) {
2095	* s.remove(); // set to an empty string (remove all), or
2096	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2097	* s.truncate(0); // set to an empty string (complete truncation), or
2098	* s=UnicodeString(); // assign an empty string, or
2099	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2100	* s.setTo(u"", 0); // set to an empty C Unicode string
2101	* }
2102	* \endcode
2103	*
2104	* @see isBogus()
2105	* @stable ICU 2.0
2106	*/
2107	void setToBogus();
2108
2109	/**
2110	* Set the character at the specified offset to the specified character.
2111	* @param offset A valid offset into the text of the character to set
2112	* @param ch The new character
2113	* @return A reference to this
2114	* @stable ICU 2.0
2115	*/
2116	UnicodeString& setCharAt(int32_t offset,
2117	char16_t ch);
2118
2119
2120	/ Append operations /
2121
2122	/**
2123	* Append operator. Append the code unit `ch` to the UnicodeString
2124	* object.
2125	* @param ch the code unit to be appended
2126	* @return a reference to this
2127	* @stable ICU 2.0
2128	*/
2129	inline UnicodeString& operator+= (char16_t ch);
2130
2131	/**
2132	* Append operator. Append the code point `ch` to the UnicodeString
2133	* object.
2134	* @param ch the code point to be appended
2135	* @return a reference to this
2136	* @stable ICU 2.0
2137	*/
2138	inline UnicodeString& operator+= (UChar32 ch);
2139
2140	/**
2141	* Append operator. Append the characters in `srcText` to the
2142	* UnicodeString object. `srcText` is not modified.
2143	* @param srcText the source for the new characters
2144	* @return a reference to this
2145	* @stable ICU 2.0
2146	*/
2147	inline UnicodeString& operator+= (const UnicodeString& srcText);
2148
2149	/**
2150	* Append the characters
2151	* in `srcText` in the range
2152	* [`srcStart`, `srcStart + srcLength`) to the
2153	* UnicodeString object at offset `start`. `srcText`
2154	* is not modified.
2155	* @param srcText the source for the new characters
2156	* @param srcStart the offset into `srcText` where new characters
2157	* will be obtained
2158	* @param srcLength the number of characters in `srcText` in
2159	* the append string
2160	* @return a reference to this
2161	* @stable ICU 2.0
2162	*/
2163	inline UnicodeString& append(const UnicodeString& srcText,
2164	int32_t srcStart,
2165	int32_t srcLength);
2166
2167	/**
2168	* Append the characters in `srcText` to the UnicodeString object.
2169	* `srcText` is not modified.
2170	* @param srcText the source for the new characters
2171	* @return a reference to this
2172	* @stable ICU 2.0
2173	*/
2174	inline UnicodeString& append(const UnicodeString& srcText);
2175
2176	/**
2177	* Append the characters in `srcChars` in the range
2178	* [`srcStart`, `srcStart + srcLength`) to the UnicodeString
2179	* object at offset
2180	* `start`. `srcChars` is not modified.
2181	* @param srcChars the source for the new characters
2182	* @param srcStart the offset into `srcChars` where new characters
2183	* will be obtained
2184	* @param srcLength the number of characters in `srcChars` in
2185	* the append string; can be -1 if `srcChars` is NUL-terminated
2186	* @return a reference to this
2187	* @stable ICU 2.0
2188	*/
2189	inline UnicodeString& append(const char16_t *srcChars,
2190	int32_t srcStart,
2191	int32_t srcLength);
2192
2193	/**
2194	* Append the characters in `srcChars` to the UnicodeString object
2195	* at offset `start`. `srcChars` is not modified.
2196	* @param srcChars the source for the new characters
2197	* @param srcLength the number of Unicode characters in `srcChars`;
2198	* can be -1 if `srcChars` is NUL-terminated
2199	* @return a reference to this
2200	* @stable ICU 2.0
2201	*/
2202	inline UnicodeString& append(ConstChar16Ptr srcChars,
2203	int32_t srcLength);
2204
2205	/**
2206	* Append the code unit `srcChar` to the UnicodeString object.
2207	* @param srcChar the code unit to append
2208	* @return a reference to this
2209	* @stable ICU 2.0
2210	*/
2211	inline UnicodeString& append(char16_t srcChar);
2212
2213	/**
2214	* Append the code point `srcChar` to the UnicodeString object.
2215	* @param srcChar the code point to append
2216	* @return a reference to this
2217	* @stable ICU 2.0
2218	*/
2219	UnicodeString& append(UChar32 srcChar);
2220
2221
2222	/ Insert operations /
2223
2224	/**
2225	* Insert the characters in `srcText` in the range
2226	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2227	* object at offset `start`. `srcText` is not modified.
2228	* @param start the offset where the insertion begins
2229	* @param srcText the source for the new characters
2230	* @param srcStart the offset into `srcText` where new characters
2231	* will be obtained
2232	* @param srcLength the number of characters in `srcText` in
2233	* the insert string
2234	* @return a reference to this
2235	* @stable ICU 2.0
2236	*/
2237	inline UnicodeString& insert(int32_t start,
2238	const UnicodeString& srcText,
2239	int32_t srcStart,
2240	int32_t srcLength);
2241
2242	/**
2243	* Insert the characters in `srcText` into the UnicodeString object
2244	* at offset `start`. `srcText` is not modified.
2245	* @param start the offset where the insertion begins
2246	* @param srcText the source for the new characters
2247	* @return a reference to this
2248	* @stable ICU 2.0
2249	*/
2250	inline UnicodeString& insert(int32_t start,
2251	const UnicodeString& srcText);
2252
2253	/**
2254	* Insert the characters in `srcChars` in the range
2255	* [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2256	* object at offset `start`. `srcChars` is not modified.
2257	* @param start the offset at which the insertion begins
2258	* @param srcChars the source for the new characters
2259	* @param srcStart the offset into `srcChars` where new characters
2260	* will be obtained
2261	* @param srcLength the number of characters in `srcChars`
2262	* in the insert string
2263	* @return a reference to this
2264	* @stable ICU 2.0
2265	*/
2266	inline UnicodeString& insert(int32_t start,
2267	const char16_t *srcChars,
2268	int32_t srcStart,
2269	int32_t srcLength);
2270
2271	/**
2272	* Insert the characters in `srcChars` into the UnicodeString object
2273	* at offset `start`. `srcChars` is not modified.
2274	* @param start the offset where the insertion begins
2275	* @param srcChars the source for the new characters
2276	* @param srcLength the number of Unicode characters in srcChars.
2277	* @return a reference to this
2278	* @stable ICU 2.0
2279	*/
2280	inline UnicodeString& insert(int32_t start,
2281	ConstChar16Ptr srcChars,
2282	int32_t srcLength);
2283
2284	/**
2285	* Insert the code unit `srcChar` into the UnicodeString object at
2286	* offset `start`.
2287	* @param start the offset at which the insertion occurs
2288	* @param srcChar the code unit to insert
2289	* @return a reference to this
2290	* @stable ICU 2.0
2291	*/
2292	inline UnicodeString& insert(int32_t start,
2293	char16_t srcChar);
2294
2295	/**
2296	* Insert the code point `srcChar` into the UnicodeString object at
2297	* offset `start`.
2298	* @param start the offset at which the insertion occurs
2299	* @param srcChar the code point to insert
2300	* @return a reference to this
2301	* @stable ICU 2.0
2302	*/
2303	inline UnicodeString& insert(int32_t start,
2304	UChar32 srcChar);
2305
2306
2307	/ Replace operations /
2308
2309	/**
2310	* Replace the characters in the range
2311	* [`start`, `start + length`) with the characters in
2312	* `srcText` in the range
2313	* [`srcStart`, `srcStart + srcLength`).
2314	* `srcText` is not modified.
2315	* @param start the offset at which the replace operation begins
2316	* @param length the number of characters to replace. The character at
2317	* `start + length` is not modified.
2318	* @param srcText the source for the new characters
2319	* @param srcStart the offset into `srcText` where new characters
2320	* will be obtained
2321	* @param srcLength the number of characters in `srcText` in
2322	* the replace string
2323	* @return a reference to this
2324	* @stable ICU 2.0
2325	*/
2326	inline UnicodeString& replace(int32_t start,
2327	int32_t length,
2328	const UnicodeString& srcText,
2329	int32_t srcStart,
2330	int32_t srcLength);
2331
2332	/**
2333	* Replace the characters in the range
2334	* [`start`, `start + length`)
2335	* with the characters in `srcText`. `srcText` is
2336	* not modified.
2337	* @param start the offset at which the replace operation begins
2338	* @param length the number of characters to replace. The character at
2339	* `start + length` is not modified.
2340	* @param srcText the source for the new characters
2341	* @return a reference to this
2342	* @stable ICU 2.0
2343	*/
2344	inline UnicodeString& replace(int32_t start,
2345	int32_t length,
2346	const UnicodeString& srcText);
2347
2348	/**
2349	* Replace the characters in the range
2350	* [`start`, `start + length`) with the characters in
2351	* `srcChars` in the range
2352	* [`srcStart`, `srcStart + srcLength`). `srcChars`
2353	* is not modified.
2354	* @param start the offset at which the replace operation begins
2355	* @param length the number of characters to replace. The character at
2356	* `start + length` is not modified.
2357	* @param srcChars the source for the new characters
2358	* @param srcStart the offset into `srcChars` where new characters
2359	* will be obtained
2360	* @param srcLength the number of characters in `srcChars`
2361	* in the replace string
2362	* @return a reference to this
2363	* @stable ICU 2.0
2364	*/
2365	inline UnicodeString& replace(int32_t start,
2366	int32_t length,
2367	const char16_t *srcChars,
2368	int32_t srcStart,
2369	int32_t srcLength);
2370
2371	/**
2372	* Replace the characters in the range
2373	* [`start`, `start + length`) with the characters in
2374	* `srcChars`. `srcChars` is not modified.
2375	* @param start the offset at which the replace operation begins
2376	* @param length number of characters to replace. The character at
2377	* `start + length` is not modified.
2378	* @param srcChars the source for the new characters
2379	* @param srcLength the number of Unicode characters in srcChars
2380	* @return a reference to this
2381	* @stable ICU 2.0
2382	*/
2383	inline UnicodeString& replace(int32_t start,
2384	int32_t length,
2385	ConstChar16Ptr srcChars,
2386	int32_t srcLength);
2387
2388	/**
2389	* Replace the characters in the range
2390	* [`start`, `start + length`) with the code unit
2391	* `srcChar`.
2392	* @param start the offset at which the replace operation begins
2393	* @param length the number of characters to replace. The character at
2394	* `start + length` is not modified.
2395	* @param srcChar the new code unit
2396	* @return a reference to this
2397	* @stable ICU 2.0
2398	*/
2399	inline UnicodeString& replace(int32_t start,
2400	int32_t length,
2401	char16_t srcChar);
2402
2403	/**
2404	* Replace the characters in the range
2405	* [`start`, `start + length`) with the code point
2406	* `srcChar`.
2407	* @param start the offset at which the replace operation begins
2408	* @param length the number of characters to replace. The character at
2409	* `start + length` is not modified.
2410	* @param srcChar the new code point
2411	* @return a reference to this
2412	* @stable ICU 2.0
2413	*/
2414	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2415
2416	/**
2417	* Replace the characters in the range [`start`, `limit`)
2418	* with the characters in `srcText`. `srcText` is not modified.
2419	* @param start the offset at which the replace operation begins
2420	* @param limit the offset immediately following the replace range
2421	* @param srcText the source for the new characters
2422	* @return a reference to this
2423	* @stable ICU 2.0
2424	*/
2425	inline UnicodeString& replaceBetween(int32_t start,
2426	int32_t limit,
2427	const UnicodeString& srcText);
2428
2429	/**
2430	* Replace the characters in the range [`start`, `limit`)
2431	* with the characters in `srcText` in the range
2432	* [`srcStart`, `srcLimit`). `srcText` is not modified.
2433	* @param start the offset at which the replace operation begins
2434	* @param limit the offset immediately following the replace range
2435	* @param srcText the source for the new characters
2436	* @param srcStart the offset into `srcChars` where new characters
2437	* will be obtained
2438	* @param srcLimit the offset immediately following the range to copy
2439	* in `srcText`
2440	* @return a reference to this
2441	* @stable ICU 2.0
2442	*/
2443	inline UnicodeString& replaceBetween(int32_t start,
2444	int32_t limit,
2445	const UnicodeString& srcText,
2446	int32_t srcStart,
2447	int32_t srcLimit);
2448
2449	/**
2450	* Replace a substring of this object with the given text.
2451	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2452	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2453	* @param text the text to replace characters `start` to `limit - 1`
2454	* @stable ICU 2.0
2455	*/
2456	virtual void handleReplaceBetween(int32_t start,
2457	int32_t limit,
2458	const UnicodeString& text);
2459
2460	/**
2461	* Replaceable API
2462	* @return TRUE if it has MetaData
2463	* @stable ICU 2.4
2464	*/
2465	virtual UBool hasMetaData() const;
2466
2467	/**
2468	* Copy a substring of this object, retaining attribute (out-of-band)
2469	* information. This method is used to duplicate or reorder substrings.
2470	* The destination index must not overlap the source range.
2471	*
2472	* @param start the beginning index, inclusive; `0 <= start <= limit`.
2473	* @param limit the ending index, exclusive; `start <= limit <= length()`.
2474	* @param dest the destination index. The characters from
2475	* `start..limit-1` will be copied to `dest`.
2476	* Implementations of this method may assume that `dest <= start \|\|
2477	* dest >= limit`.
2478	* @stable ICU 2.0
2479	*/
2480	virtual void copy(int32_t start, int32_t limit, int32_t dest);
2481
2482	/ Search and replace operations /
2483
2484	/**
2485	* Replace all occurrences of characters in oldText with the characters
2486	* in newText
2487	* @param oldText the text containing the search text
2488	* @param newText the text containing the replacement text
2489	* @return a reference to this
2490	* @stable ICU 2.0
2491	*/
2492	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2493	const UnicodeString& newText);
2494
2495	/**
2496	* Replace all occurrences of characters in oldText with characters
2497	* in newText
2498	* in the range [`start`, `start + length`).
2499	* @param start the start of the range in which replace will performed
2500	* @param length the length of the range in which replace will be performed
2501	* @param oldText the text containing the search text
2502	* @param newText the text containing the replacement text
2503	* @return a reference to this
2504	* @stable ICU 2.0
2505	*/
2506	inline UnicodeString& findAndReplace(int32_t start,
2507	int32_t length,
2508	const UnicodeString& oldText,
2509	const UnicodeString& newText);
2510
2511	/**
2512	* Replace all occurrences of characters in oldText in the range
2513	* [`oldStart`, `oldStart + oldLength`) with the characters
2514	* in newText in the range
2515	* [`newStart`, `newStart + newLength`)
2516	* in the range [`start`, `start + length`).
2517	* @param start the start of the range in which replace will performed
2518	* @param length the length of the range in which replace will be performed
2519	* @param oldText the text containing the search text
2520	* @param oldStart the start of the search range in `oldText`
2521	* @param oldLength the length of the search range in `oldText`
2522	* @param newText the text containing the replacement text
2523	* @param newStart the start of the replacement range in `newText`
2524	* @param newLength the length of the replacement range in `newText`
2525	* @return a reference to this
2526	* @stable ICU 2.0
2527	*/
2528	UnicodeString& findAndReplace(int32_t start,
2529	int32_t length,
2530	const UnicodeString& oldText,
2531	int32_t oldStart,
2532	int32_t oldLength,
2533	const UnicodeString& newText,
2534	int32_t newStart,
2535	int32_t newLength);
2536
2537
2538	/ Remove operations /
2539
2540	/**
2541	* Removes all characters from the UnicodeString object and clears the bogus flag.
2542	* This is the UnicodeString equivalent of std::string’s clear().
2543	*
2544	* @return a reference to this
2545	* @see setToBogus
2546	* @stable ICU 2.0
2547	*/
2548	inline UnicodeString& remove();
2549
2550	/**
2551	* Remove the characters in the range
2552	* [`start`, `start + length`) from the UnicodeString object.
2553	* @param start the offset of the first character to remove
2554	* @param length the number of characters to remove
2555	* @return a reference to this
2556	* @stable ICU 2.0
2557	*/
2558	inline UnicodeString& remove(int32_t start,
2559	int32_t length = (int32_t)INT32_MAX);
2560
2561	/**
2562	* Remove the characters in the range
2563	* [`start`, `limit`) from the UnicodeString object.
2564	* @param start the offset of the first character to remove
2565	* @param limit the offset immediately following the range to remove
2566	* @return a reference to this
2567	* @stable ICU 2.0
2568	*/
2569	inline UnicodeString& removeBetween(int32_t start,
2570	int32_t limit = (int32_t)INT32_MAX);
2571
2572	/**
2573	* Retain only the characters in the range
2574	* [`start`, `limit`) from the UnicodeString object.
2575	* Removes characters before `start` and at and after `limit`.
2576	* @param start the offset of the first character to retain
2577	* @param limit the offset immediately following the range to retain
2578	* @return a reference to this
2579	* @stable ICU 4.4
2580	*/
2581	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2582
2583	/ Length operations /
2584
2585	/**
2586	* Pad the start of this UnicodeString with the character `padChar`.
2587	* If the length of this UnicodeString is less than targetLength,
2588	* length() - targetLength copies of padChar will be added to the
2589	* beginning of this UnicodeString.
2590	* @param targetLength the desired length of the string
2591	* @param padChar the character to use for padding. Defaults to
2592	* space (U+0020)
2593	* @return TRUE if the text was padded, FALSE otherwise.
2594	* @stable ICU 2.0
2595	*/
2596	UBool padLeading(int32_t targetLength,
2597	char16_t padChar = `0x0020`);
2598
2599	/**
2600	* Pad the end of this UnicodeString with the character `padChar`.
2601	* If the length of this UnicodeString is less than targetLength,
2602	* length() - targetLength copies of padChar will be added to the
2603	* end of this UnicodeString.
2604	* @param targetLength the desired length of the string
2605	* @param padChar the character to use for padding. Defaults to
2606	* space (U+0020)
2607	* @return TRUE if the text was padded, FALSE otherwise.
2608	* @stable ICU 2.0
2609	*/
2610	UBool padTrailing(int32_t targetLength,
2611	char16_t padChar = `0x0020`);
2612
2613	/**
2614	* Truncate this UnicodeString to the `targetLength`.
2615	* @param targetLength the desired length of this UnicodeString.
2616	* @return TRUE if the text was truncated, FALSE otherwise
2617	* @stable ICU 2.0
2618	*/
2619	inline UBool truncate(int32_t targetLength);
2620
2621	/**
2622	* Trims leading and trailing whitespace from this UnicodeString.
2623	* @return a reference to this
2624	* @stable ICU 2.0
2625	*/
2626	UnicodeString& trim(void);
2627
2628
2629	/ Miscellaneous operations /
2630
2631	/**
2632	* Reverse this UnicodeString in place.
2633	* @return a reference to this
2634	* @stable ICU 2.0
2635	*/
2636	inline UnicodeString& reverse(void);
2637
2638	/**
2639	* Reverse the range [`start`, `start + length`) in
2640	* this UnicodeString.
2641	* @param start the start of the range to reverse
2642	* @param length the number of characters to to reverse
2643	* @return a reference to this
2644	* @stable ICU 2.0
2645	*/
2646	inline UnicodeString& reverse(int32_t start,
2647	int32_t length);
2648
2649	/**
2650	* Convert the characters in this to UPPER CASE following the conventions of
2651	* the default locale.
2652	* @return A reference to this.
2653	* @stable ICU 2.0
2654	*/
2655	UnicodeString& toUpper(void);
2656
2657	/**
2658	* Convert the characters in this to UPPER CASE following the conventions of
2659	* a specific locale.
2660	* @param locale The locale containing the conventions to use.
2661	* @return A reference to this.
2662	* @stable ICU 2.0
2663	*/
2664	UnicodeString& toUpper(const Locale& locale);
2665
2666	/**
2667	* Convert the characters in this to lower case following the conventions of
2668	* the default locale.
2669	* @return A reference to this.
2670	* @stable ICU 2.0
2671	*/
2672	UnicodeString& toLower(void);
2673
2674	/**
2675	* Convert the characters in this to lower case following the conventions of
2676	* a specific locale.
2677	* @param locale The locale containing the conventions to use.
2678	* @return A reference to this.
2679	* @stable ICU 2.0
2680	*/
2681	UnicodeString& toLower(const Locale& locale);
2682
2683	#if !UCONFIG_NO_BREAK_ITERATION
2684
2685	/**
2686	* Titlecase this string, convenience function using the default locale.
2687	*
2688	* Casing is locale-dependent and context-sensitive.
2689	* Titlecasing uses a break iterator to find the first characters of words
2690	* that are to be titlecased. It titlecases those characters and lowercases
2691	* all others.
2692	*
2693	* The titlecase break iterator can be provided to customize for arbitrary
2694	* styles, using rules and dictionaries beyond the standard iterators.
2695	* It may be more efficient to always provide an iterator to avoid
2696	* opening and closing one for each string.
2697	* The standard titlecase iterator for the root locale implements the
2698	* algorithm of Unicode TR 21.
2699	*
2700	* This function uses only the setText(), first() and next() methods of the
2701	* provided break iterator.
2702	*
2703	* @param titleIter A break iterator to find the first characters of words
2704	* that are to be titlecased.
2705	* If none is provided (0), then a standard titlecase
2706	* break iterator is opened.
2707	* Otherwise the provided iterator is set to the string's text.
2708	* @return A reference to this.
2709	* @stable ICU 2.1
2710	*/
2711	UnicodeString &toTitle(BreakIterator *titleIter);
2712
2713	/**
2714	* Titlecase this string.
2715	*
2716	* Casing is locale-dependent and context-sensitive.
2717	* Titlecasing uses a break iterator to find the first characters of words
2718	* that are to be titlecased. It titlecases those characters and lowercases
2719	* all others.
2720	*
2721	* The titlecase break iterator can be provided to customize for arbitrary
2722	* styles, using rules and dictionaries beyond the standard iterators.
2723	* It may be more efficient to always provide an iterator to avoid
2724	* opening and closing one for each string.
2725	* The standard titlecase iterator for the root locale implements the
2726	* algorithm of Unicode TR 21.
2727	*
2728	* This function uses only the setText(), first() and next() methods of the
2729	* provided break iterator.
2730	*
2731	* @param titleIter A break iterator to find the first characters of words
2732	* that are to be titlecased.
2733	* If none is provided (0), then a standard titlecase
2734	* break iterator is opened.
2735	* Otherwise the provided iterator is set to the string's text.
2736	* @param locale The locale to consider.
2737	* @return A reference to this.
2738	* @stable ICU 2.1
2739	*/
2740	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale);
2741
2742	/**
2743	* Titlecase this string, with options.
2744	*
2745	* Casing is locale-dependent and context-sensitive.
2746	* Titlecasing uses a break iterator to find the first characters of words
2747	* that are to be titlecased. It titlecases those characters and lowercases
2748	* all others. (This can be modified with options.)
2749	*
2750	* The titlecase break iterator can be provided to customize for arbitrary
2751	* styles, using rules and dictionaries beyond the standard iterators.
2752	* It may be more efficient to always provide an iterator to avoid
2753	* opening and closing one for each string.
2754	* The standard titlecase iterator for the root locale implements the
2755	* algorithm of Unicode TR 21.
2756	*
2757	* This function uses only the setText(), first() and next() methods of the
2758	* provided break iterator.
2759	*
2760	* @param titleIter A break iterator to find the first characters of words
2761	* that are to be titlecased.
2762	* If none is provided (0), then a standard titlecase
2763	* break iterator is opened.
2764	* Otherwise the provided iterator is set to the string's text.
2765	* @param locale The locale to consider.
2766	* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2767	* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2768	* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2769	* @param options Options bit set, see ucasemap_open().
2770	* @return A reference to this.
2771	* @stable ICU 3.8
2772	*/
2773	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale, uint32_t options);
2774
2775	#endif
2776
2777	/**
2778	* Case-folds the characters in this string.
2779	*
2780	* Case-folding is locale-independent and not context-sensitive,
2781	* but there is an option for whether to include or exclude mappings for dotted I
2782	* and dotless i that are marked with 'T' in CaseFolding.txt.
2783	*
2784	* The result may be longer or shorter than the original.
2785	*
2786	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2787	* @return A reference to this.
2788	* @stable ICU 2.0
2789	*/
2790	UnicodeString &foldCase(uint32_t options=`0` /U_FOLD_CASE_DEFAULT/);
2791
2792	//========================================
2793	// Access to the internal buffer
2794	//========================================
2795
2796	/**
2797	* Get a read/write pointer to the internal buffer.
2798	* The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2799	* writable, and is still owned by the UnicodeString object.
2800	* Calls to getBuffer(minCapacity) must not be nested, and
2801	* must be matched with calls to releaseBuffer(newLength).
2802	* If the string buffer was read-only or shared,
2803	* then it will be reallocated and copied.
2804	*
2805	* An attempted nested call will return 0, and will not further modify the
2806	* state of the UnicodeString object.
2807	* It also returns 0 if the string is bogus.
2808	*
2809	* The actual capacity of the string buffer may be larger than minCapacity.
2810	* getCapacity() returns the actual capacity.
2811	* For many operations, the full capacity should be used to avoid reallocations.
2812	*
2813	* While the buffer is "open" between getBuffer(minCapacity)
2814	* and releaseBuffer(newLength), the following applies:
2815	* - The string length is set to 0.
2816	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
2817	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
2818	* - You can read from and write to the returned buffer.
2819	* - The previous string contents will still be in the buffer;
2820	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
2821	* If the length() was greater than minCapacity, then any contents after minCapacity
2822	* may be lost.
2823	* The buffer contents is not NUL-terminated by getBuffer().
2824	* If length() < getCapacity() then you can terminate it by writing a NUL
2825	* at index length().
2826	* - You must call releaseBuffer(newLength) before and in order to
2827	* return to normal UnicodeString operation.
2828	*
2829	* @param minCapacity the minimum number of char16_ts that are to be available
2830	* in the buffer, starting at the returned pointer;
2831	* default to the current string capacity if minCapacity==-1
2832	* @return a writable pointer to the internal string buffer,
2833	* or nullptr if an error occurs (nested calls, out of memory)
2834	*
2835	* @see releaseBuffer
2836	* @see getTerminatedBuffer()
2837	* @stable ICU 2.0
2838	*/
2839	char16_t *getBuffer(int32_t minCapacity);
2840
2841	/**
2842	* Release a read/write buffer on a UnicodeString object with an
2843	* "open" getBuffer(minCapacity).
2844	* This function must be called in a matched pair with getBuffer(minCapacity).
2845	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2846	*
2847	* It will set the string length to newLength, at most to the current capacity.
2848	* If newLength==-1 then it will set the length according to the
2849	* first NUL in the buffer, or to the capacity if there is no NUL.
2850	*
2851	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2852	*
2853	* @param newLength the new length of the UnicodeString object;
2854	* defaults to the current capacity if newLength is greater than that;
2855	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
2856	* the current capacity of the string
2857	*
2858	* @see getBuffer(int32_t minCapacity)
2859	* @stable ICU 2.0
2860	*/
2861	void releaseBuffer(int32_t newLength=-`1`);
2862
2863	/**
2864	* Get a read-only pointer to the internal buffer.
2865	* This can be called at any time on a valid UnicodeString.
2866	*
2867	* It returns 0 if the string is bogus, or
2868	* during an "open" getBuffer(minCapacity).
2869	*
2870	* It can be called as many times as desired.
2871	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2872	* at which time the pointer is semantically invalidated and must not be used any more.
2873	*
2874	* The capacity of the buffer can be determined with getCapacity().
2875	* The part after length() may or may not be initialized and valid,
2876	* depending on the history of the UnicodeString object.
2877	*
2878	* The buffer contents is (probably) not NUL-terminated.
2879	* You can check if it is with
2880	* `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
2881	* (See getTerminatedBuffer().)
2882	*
2883	* The buffer may reside in read-only memory. Its contents must not
2884	* be modified.
2885	*
2886	* @return a read-only pointer to the internal string buffer,
2887	* or nullptr if the string is empty or bogus
2888	*
2889	* @see getBuffer(int32_t minCapacity)
2890	* @see getTerminatedBuffer()
2891	* @stable ICU 2.0
2892	*/
2893	inline const char16_t getBuffer() const*;
2894
2895	/**
2896	* Get a read-only pointer to the internal buffer,
2897	* making sure that it is NUL-terminated.
2898	* This can be called at any time on a valid UnicodeString.
2899	*
2900	* It returns 0 if the string is bogus, or
2901	* during an "open" getBuffer(minCapacity), or if the buffer cannot
2902	* be NUL-terminated (because memory allocation failed).
2903	*
2904	* It can be called as many times as desired.
2905	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2906	* at which time the pointer is semantically invalidated and must not be used any more.
2907	*
2908	* The capacity of the buffer can be determined with getCapacity().
2909	* The part after length()+1 may or may not be initialized and valid,
2910	* depending on the history of the UnicodeString object.
2911	*
2912	* The buffer contents is guaranteed to be NUL-terminated.
2913	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2914	* is written.
2915	* For this reason, this function is not const, unlike getBuffer().
2916	* Note that a UnicodeString may also contain NUL characters as part of its contents.
2917	*
2918	* The buffer may reside in read-only memory. Its contents must not
2919	* be modified.
2920	*
2921	* @return a read-only pointer to the internal string buffer,
2922	* or 0 if the string is empty or bogus
2923	*
2924	* @see getBuffer(int32_t minCapacity)
2925	* @see getBuffer()
2926	* @stable ICU 2.2
2927	*/
2928	const char16_t *getTerminatedBuffer();
2929
2930	//========================================
2931	// Constructors
2932	//========================================
2933
2934	/* Construct an empty UnicodeString.*
2935	* @stable ICU 2.0
2936	*/
2937	inline UnicodeString();
2938
2939	/**
2940	* Construct a UnicodeString with capacity to hold `capacity` char16_ts
2941	* @param capacity the number of char16_ts this UnicodeString should hold
2942	* before a resize is necessary; if count is greater than 0 and count
2943	* code points c take up more space than capacity, then capacity is adjusted
2944	* accordingly.
2945	* @param c is used to initially fill the string
2946	* @param count specifies how many code points c are to be written in the
2947	* string
2948	* @stable ICU 2.0
2949	*/
2950	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2951
2952	/**
2953	* Single char16_t (code unit) constructor.
2954	*
2955	* It is recommended to mark this constructor "explicit" by
2956	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2957	* on the compiler command line or similar.
2958	* @param ch the character to place in the UnicodeString
2959	* @stable ICU 2.0
2960	*/
2961	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2962
2963	/**
2964	* Single UChar32 (code point) constructor.
2965	*
2966	* It is recommended to mark this constructor "explicit" by
2967	* `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
2968	* on the compiler command line or similar.
2969	* @param ch the character to place in the UnicodeString
2970	* @stable ICU 2.0
2971	*/
2972	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2973
2974	/**
2975	* char16_t* constructor.
2976	*
2977	* It is recommended to mark this constructor "explicit" by
2978	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2979	* on the compiler command line or similar.
2980	* @param text The characters to place in the UnicodeString. `text`
2981	* must be NULL (U+0000) terminated.
2982	* @stable ICU 2.0
2983	*/
2984	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
2985
2986	#if !U_CHAR16_IS_TYPEDEF
2987	/**
2988	* uint16_t * constructor.
2989	* Delegates to UnicodeString(const char16_t *).
2990	*
2991	* It is recommended to mark this constructor "explicit" by
2992	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
2993	* on the compiler command line or similar.
2994	* @param text NUL-terminated UTF-16 string
2995	* @stable ICU 59
2996	*/
2997	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
2998	UnicodeString (ConstChar16Ptr (text)) {}
2999	#endif
3000
3001	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3002	/**
3003	* wchar_t * constructor.
3004	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3005	* Delegates to UnicodeString(const char16_t *).
3006	*
3007	* It is recommended to mark this constructor "explicit" by
3008	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3009	* on the compiler command line or similar.
3010	* @param text NUL-terminated UTF-16 string
3011	* @stable ICU 59
3012	*/
3013	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3014	UnicodeString(ConstChar16Ptr(text)) {}
3015	#endif
3016
3017	/**
3018	* nullptr_t constructor.
3019	* Effectively the same as the default constructor, makes an empty string object.
3020	*
3021	* It is recommended to mark this constructor "explicit" by
3022	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3023	* on the compiler command line or similar.
3024	* @param text nullptr
3025	* @stable ICU 59
3026	*/
3027	UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3028
3029	/**
3030	* char16_t* constructor.
3031	* @param text The characters to place in the UnicodeString.
3032	* @param textLength The number of Unicode characters in `text`
3033	* to copy.
3034	* @stable ICU 2.0
3035	*/
3036	UnicodeString(const char16_t *text,
3037	int32_t textLength);
3038
3039	#if !U_CHAR16_IS_TYPEDEF
3040	/**
3041	* uint16_t * constructor.
3042	* Delegates to UnicodeString(const char16_t *, int32_t).
3043	* @param text UTF-16 string
3044	* @param textLength string length
3045	* @stable ICU 59
3046	*/
3047	UnicodeString(const uint16_t *text, int32_t textLength) :
3048	UnicodeString (ConstChar16Ptr (text), textLength) {}
3049	#endif
3050
3051	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3052	/**
3053	* wchar_t * constructor.
3054	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3055	* Delegates to UnicodeString(const char16_t *, int32_t).
3056	* @param text NUL-terminated UTF-16 string
3057	* @param textLength string length
3058	* @stable ICU 59
3059	*/
3060	UnicodeString(const wchar_t *text, int32_t textLength) :
3061	UnicodeString(ConstChar16Ptr(text), textLength) {}
3062	#endif
3063
3064	/**
3065	* nullptr_t constructor.
3066	* Effectively the same as the default constructor, makes an empty string object.
3067	* @param text nullptr
3068	* @param textLength ignored
3069	* @stable ICU 59
3070	*/
3071	inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3072
3073	/**
3074	* Readonly-aliasing char16_t* constructor.
3075	* The text will be used for the UnicodeString object, but
3076	* it will not be released when the UnicodeString is destroyed.
3077	* This has copy-on-write semantics:
3078	* When the string is modified, then the buffer is first copied into
3079	* newly allocated memory.
3080	* The aliased buffer is never modified.
3081	*
3082	* In an assignment to another UnicodeString, when using the copy constructor
3083	* or the assignment operator, the text will be copied.
3084	* When using fastCopyFrom(), the text will be aliased again,
3085	* so that both strings then alias the same readonly-text.
3086	*
3087	* @param isTerminated specifies if `text` is `NUL`-terminated.
3088	* This must be true if `textLength==-1`.
3089	* @param text The characters to alias for the UnicodeString.
3090	* @param textLength The number of Unicode characters in `text` to alias.
3091	* If -1, then this constructor will determine the length
3092	* by calling `u_strlen()`.
3093	* @stable ICU 2.0
3094	*/
3095	UnicodeString(UBool isTerminated,
3096	ConstChar16Ptr text,
3097	int32_t textLength);
3098
3099	/**
3100	* Writable-aliasing char16_t* constructor.
3101	* The text will be used for the UnicodeString object, but
3102	* it will not be released when the UnicodeString is destroyed.
3103	* This has write-through semantics:
3104	* For as long as the capacity of the buffer is sufficient, write operations
3105	* will directly affect the buffer. When more capacity is necessary, then
3106	* a new buffer will be allocated and the contents copied as with regularly
3107	* constructed strings.
3108	* In an assignment to another UnicodeString, the buffer will be copied.
3109	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3110	* as the string buffer itself and will in this case not copy the contents.
3111	*
3112	* @param buffer The characters to alias for the UnicodeString.
3113	* @param buffLength The number of Unicode characters in `buffer` to alias.
3114	* @param buffCapacity The size of `buffer` in char16_ts.
3115	* @stable ICU 2.0
3116	*/
3117	UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3118
3119	#if !U_CHAR16_IS_TYPEDEF
3120	/**
3121	* Writable-aliasing uint16_t * constructor.
3122	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3123	* @param buffer writable buffer of/for UTF-16 text
3124	* @param buffLength length of the current buffer contents
3125	* @param buffCapacity buffer capacity
3126	* @stable ICU 59
3127	*/
3128	UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3129	UnicodeString (Char16Ptr (buffer), buffLength, buffCapacity) {}
3130	#endif
3131
3132	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3133	/**
3134	* Writable-aliasing wchar_t * constructor.
3135	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3136	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3137	* @param buffer writable buffer of/for UTF-16 text
3138	* @param buffLength length of the current buffer contents
3139	* @param buffCapacity buffer capacity
3140	* @stable ICU 59
3141	*/
3142	UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3143	UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3144	#endif
3145
3146	/**
3147	* Writable-aliasing nullptr_t constructor.
3148	* Effectively the same as the default constructor, makes an empty string object.
3149	* @param buffer nullptr
3150	* @param buffLength ignored
3151	* @param buffCapacity ignored
3152	* @stable ICU 59
3153	*/
3154	inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3155
3156	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
3157
3158	/**
3159	* char* constructor.
3160	* Uses the default converter (and thus depends on the ICU conversion code)
3161	* unless U_CHARSET_IS_UTF8 is set to 1.
3162	*
3163	* For ASCII (really "invariant character") strings it is more efficient to use
3164	* the constructor that takes a US_INV (for its enum EInvariant).
3165	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
3166	* UNICODE_STRING_SIMPLE.
3167	*
3168	* It is recommended to mark this constructor "explicit" by
3169	* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3170	* on the compiler command line or similar.
3171	* @param codepageData an array of bytes, null-terminated,
3172	* in the platform's default codepage.
3173	* @stable ICU 2.0
3174	* @see UNICODE_STRING
3175	* @see UNICODE_STRING_SIMPLE
3176	*/
3177	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3178
3179	/**
3180	* char* constructor.
3181	* Uses the default converter (and thus depends on the ICU conversion code)
3182	* unless U_CHARSET_IS_UTF8 is set to 1.
3183	* @param codepageData an array of bytes in the platform's default codepage.
3184	* @param dataLength The number of bytes in `codepageData`.
3185	* @stable ICU 2.0
3186	*/
3187	UnicodeString(const char *codepageData, int32_t dataLength);
3188
3189	#endif
3190
3191	#if !UCONFIG_NO_CONVERSION
3192
3193	/**
3194	* char* constructor.
3195	* @param codepageData an array of bytes, null-terminated
3196	* @param codepage the encoding of `codepageData`. The special
3197	* value 0 for `codepage` indicates that the text is in the
3198	* platform's default codepage.
3199	*
3200	* If `codepage` is an empty string (`""`),
3201	* then a simple conversion is performed on the codepage-invariant
3202	* subset ("invariant characters") of the platform encoding. See utypes.h.
3203	* Recommendation: For invariant-character strings use the constructor
3204	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3205	* because it avoids object code dependencies of UnicodeString on
3206	* the conversion code.
3207	*
3208	* @stable ICU 2.0
3209	*/
3210	UnicodeString(const char codepageData, const* char *codepage);
3211
3212	/**
3213	* char* constructor.
3214	* @param codepageData an array of bytes.
3215	* @param dataLength The number of bytes in `codepageData`.
3216	* @param codepage the encoding of `codepageData`. The special
3217	* value 0 for `codepage` indicates that the text is in the
3218	* platform's default codepage.
3219	* If `codepage` is an empty string (`""`),
3220	* then a simple conversion is performed on the codepage-invariant
3221	* subset ("invariant characters") of the platform encoding. See utypes.h.
3222	* Recommendation: For invariant-character strings use the constructor
3223	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3224	* because it avoids object code dependencies of UnicodeString on
3225	* the conversion code.
3226	*
3227	* @stable ICU 2.0
3228	*/
3229	UnicodeString(const char codepageData, int32_t dataLength, const* char *codepage);
3230
3231	/**
3232	* char * / UConverter constructor.
3233	* This constructor uses an existing UConverter object to
3234	* convert the codepage string to Unicode and construct a UnicodeString
3235	* from that.
3236	*
3237	* The converter is reset at first.
3238	* If the error code indicates a failure before this constructor is called,
3239	* or if an error occurs during conversion or construction,
3240	* then the string will be bogus.
3241	*
3242	* This function avoids the overhead of opening and closing a converter if
3243	* multiple strings are constructed.
3244	*
3245	* @param src input codepage string
3246	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
3247	* @param cnv converter object (ucnv_resetToUnicode() will be called),
3248	* can be NULL for the default converter
3249	* @param errorCode normal ICU error code
3250	* @stable ICU 2.0
3251	*/
3252	UnicodeString(
3253	const char *src, int32_t srcLength,
3254	UConverter *cnv,
3255	UErrorCode &errorCode);
3256
3257	#endif
3258
3259	/**
3260	* Constructs a Unicode string from an invariant-character char * string.
3261	* About invariant characters see utypes.h.
3262	* This constructor has no runtime dependency on conversion code and is
3263	* therefore recommended over ones taking a charset name string
3264	* (where the empty string "" indicates invariant-character conversion).
3265	*
3266	* Use the macro US_INV as the third, signature-distinguishing parameter.
3267	*
3268	* For example:
3269	* \code
3270	* void fn(const char *s) {
3271	* UnicodeString ustr(s, -1, US_INV);
3272	* // use ustr ...
3273	* }
3274	* \endcode
3275	* @param src String using only invariant characters.
3276	* @param textLength Length of src, or -1 if NUL-terminated.
3277	* @param inv Signature-distinguishing paramater, use US_INV.
3278	*
3279	* @see US_INV
3280	* @stable ICU 3.2
3281	*/
3282	UnicodeString(const char src, int32_t textLength, enum* EInvariant inv);
3283
3284
3285	/**
3286	* Copy constructor.
3287	*
3288	* Starting with ICU 2.4, the assignment operator and the copy constructor
3289	* allocate a new buffer and copy the buffer contents even for readonly aliases.
3290	* By contrast, the fastCopyFrom() function implements the old,
3291	* more efficient but less safe behavior
3292	* of making this string also a readonly alias to the same buffer.
3293	*
3294	* If the source object has an "open" buffer from getBuffer(minCapacity),
3295	* then the copy is an empty string.
3296	*
3297	* @param that The UnicodeString object to copy.
3298	* @stable ICU 2.0
3299	* @see fastCopyFrom
3300	*/
3301	UnicodeString(const UnicodeString& that);
3302
3303	/**
3304	* Move constructor; might leave src in bogus state.
3305	* This string will have the same contents and state that the source string had.
3306	* @param src source string
3307	* @stable ICU 56
3308	*/
3309	UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3310
3311	/**
3312	* 'Substring' constructor from tail of source string.
3313	* @param src The UnicodeString object to copy.
3314	* @param srcStart The offset into `src` at which to start copying.
3315	* @stable ICU 2.2
3316	*/
3317	UnicodeString(const UnicodeString& src, int32_t srcStart);
3318
3319	/**
3320	* 'Substring' constructor from subrange of source string.
3321	* @param src The UnicodeString object to copy.
3322	* @param srcStart The offset into `src` at which to start copying.
3323	* @param srcLength The number of characters from `src` to copy.
3324	* @stable ICU 2.2
3325	*/
3326	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3327
3328	/**
3329	* Clone this object, an instance of a subclass of Replaceable.
3330	* Clones can be used concurrently in multiple threads.
3331	* If a subclass does not implement clone(), or if an error occurs,
3332	* then NULL is returned.
3333	* The caller must delete the clone.
3334	*
3335	* @return a clone of this object
3336	*
3337	* @see Replaceable::clone
3338	* @see getDynamicClassID
3339	* @stable ICU 2.6
3340	*/
3341	virtual UnicodeString clone() const*;
3342
3343	/* Destructor.*
3344	* @stable ICU 2.0
3345	*/
3346	virtual ~UnicodeString();
3347
3348	/**
3349	* Create a UnicodeString from a UTF-8 string.
3350	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3351	* Calls u_strFromUTF8WithSub().
3352	*
3353	* @param utf8 UTF-8 input string.
3354	* Note that a StringPiece can be implicitly constructed
3355	* from a std::string or a NUL-terminated const char * string.
3356	* @return A UnicodeString with equivalent UTF-16 contents.
3357	* @see toUTF8
3358	* @see toUTF8String
3359	* @stable ICU 4.2
3360	*/
3361	static UnicodeString fromUTF8(StringPiece utf8);
3362
3363	/**
3364	* Create a UnicodeString from a UTF-32 string.
3365	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3366	* Calls u_strFromUTF32WithSub().
3367	*
3368	* @param utf32 UTF-32 input string. Must not be NULL.
3369	* @param length Length of the input string, or -1 if NUL-terminated.
3370	* @return A UnicodeString with equivalent UTF-16 contents.
3371	* @see toUTF32
3372	* @stable ICU 4.2
3373	*/
3374	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3375
3376	/ Miscellaneous operations /
3377
3378	/**
3379	* Unescape a string of characters and return a string containing
3380	* the result. The following escape sequences are recognized:
3381	*
3382	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3383	* \\Uhhhhhhhh 8 hex digits
3384	* \\xhh 1-2 hex digits
3385	* \\ooo 1-3 octal digits; o in [0-7]
3386	* \\cX control-X; X is masked with 0x1F
3387	*
3388	* as well as the standard ANSI C escapes:
3389	*
3390	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3391	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3392	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3393	*
3394	* Anything else following a backslash is generically escaped. For
3395	* example, "[a\\-z]" returns "[a-z]".
3396	*
3397	* If an escape sequence is ill-formed, this method returns an empty
3398	* string. An example of an ill-formed sequence is "\\u" followed by
3399	* fewer than 4 hex digits.
3400	*
3401	* This function is similar to u_unescape() but not identical to it.
3402	* The latter takes a source char*, so it does escape recognition
3403	* and also invariant conversion.
3404	*
3405	* @return a string with backslash escapes interpreted, or an
3406	* empty string on error.
3407	* @see UnicodeString#unescapeAt()
3408	* @see u_unescape()
3409	* @see u_unescapeAt()
3410	* @stable ICU 2.0
3411	*/
3412	UnicodeString unescape() const;
3413
3414	/**
3415	* Unescape a single escape sequence and return the represented
3416	* character. See unescape() for a listing of the recognized escape
3417	* sequences. The character at offset-1 is assumed (without
3418	* checking) to be a backslash. If the escape sequence is
3419	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3420	* returned.
3421	*
3422	* @param offset an input output parameter. On input, it is the
3423	* offset into this string where the escape sequence is located,
3424	* after the initial backslash. On output, it is advanced after the
3425	* last character parsed. On error, it is not advanced at all.
3426	* @return the character represented by the escape sequence at
3427	* offset, or U_SENTINEL=-1 on error.
3428	* @see UnicodeString#unescape()
3429	* @see u_unescape()
3430	* @see u_unescapeAt()
3431	* @stable ICU 2.0
3432	*/
3433	UChar32 unescapeAt(int32_t &offset) const;
3434
3435	/**
3436	* ICU "poor man's RTTI", returns a UClassID for this class.
3437	*
3438	* @stable ICU 2.2
3439	*/
3440	static UClassID U_EXPORT2 getStaticClassID();
3441
3442	/**
3443	* ICU "poor man's RTTI", returns a UClassID for the actual class.
3444	*
3445	* @stable ICU 2.2
3446	*/
3447	virtual UClassID getDynamicClassID() const;
3448
3449	//========================================
3450	// Implementation methods
3451	//========================================
3452
3453	protected:
3454	/**
3455	* Implement Replaceable::getLength() (see jitterbug 1027).
3456	* @stable ICU 2.4
3457	*/
3458	virtual int32_t getLength() const;
3459
3460	/**
3461	* The change in Replaceable to use virtual getCharAt() allows
3462	* UnicodeString::charAt() to be inline again (see jitterbug 709).
3463	* @stable ICU 2.4
3464	*/
3465	virtual char16_t getCharAt(int32_t offset) const;
3466
3467	/**
3468	* The change in Replaceable to use virtual getChar32At() allows
3469	* UnicodeString::char32At() to be inline again (see jitterbug 709).
3470	* @stable ICU 2.4
3471	*/
3472	virtual UChar32 getChar32At(int32_t offset) const;
3473
3474	private:
3475	// For char constructors. Could be made public.*
3476	UnicodeString &setToUTF8(StringPiece utf8);
3477	// For extract(char).*
3478	// We could make a toUTF8(target, capacity, errorCode) public but not
3479	// this version: New API will be cleaner if we make callers create substrings
3480	// rather than having start+length on every method,
3481	// and it should take a UErrorCode&.
3482	int32_t
3483	toUTF8(int32_t start, int32_t len,
3484	char target, int32_t capacity) const*;
3485
3486	/**
3487	* Internal string contents comparison, called by operator==.
3488	* Requires: this & text not bogus and have same lengths.
3489	*/
3490	UBool doEquals(const UnicodeString &text, int32_t len) const;
3491
3492	inline int8_t
3493	doCompare(int32_t start,
3494	int32_t length,
3495	const UnicodeString& srcText,
3496	int32_t srcStart,
3497	int32_t srcLength) const;
3498
3499	int8_t doCompare(int32_t start,
3500	int32_t length,
3501	const char16_t *srcChars,
3502	int32_t srcStart,
3503	int32_t srcLength) const;
3504
3505	inline int8_t
3506	doCompareCodePointOrder(int32_t start,
3507	int32_t length,
3508	const UnicodeString& srcText,
3509	int32_t srcStart,
3510	int32_t srcLength) const;
3511
3512	int8_t doCompareCodePointOrder(int32_t start,
3513	int32_t length,
3514	const char16_t *srcChars,
3515	int32_t srcStart,
3516	int32_t srcLength) const;
3517
3518	inline int8_t
3519	doCaseCompare(int32_t start,
3520	int32_t length,
3521	const UnicodeString &srcText,
3522	int32_t srcStart,
3523	int32_t srcLength,
3524	uint32_t options) const;
3525
3526	int8_t
3527	doCaseCompare(int32_t start,
3528	int32_t length,
3529	const char16_t *srcChars,
3530	int32_t srcStart,
3531	int32_t srcLength,
3532	uint32_t options) const;
3533
3534	int32_t doIndexOf(char16_t c,
3535	int32_t start,
3536	int32_t length) const;
3537
3538	int32_t doIndexOf(UChar32 c,
3539	int32_t start,
3540	int32_t length) const;
3541
3542	int32_t doLastIndexOf(char16_t c,
3543	int32_t start,
3544	int32_t length) const;
3545
3546	int32_t doLastIndexOf(UChar32 c,
3547	int32_t start,
3548	int32_t length) const;
3549
3550	void doExtract(int32_t start,
3551	int32_t length,
3552	char16_t *dst,
3553	int32_t dstStart) const;
3554
3555	inline void doExtract(int32_t start,
3556	int32_t length,
3557	UnicodeString& target) const;
3558
3559	inline char16_t doCharAt(int32_t offset) const;
3560
3561	UnicodeString& doReplace(int32_t start,
3562	int32_t length,
3563	const UnicodeString& srcText,
3564	int32_t srcStart,
3565	int32_t srcLength);
3566
3567	UnicodeString& doReplace(int32_t start,
3568	int32_t length,
3569	const char16_t *srcChars,
3570	int32_t srcStart,
3571	int32_t srcLength);
3572
3573	UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3574	UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3575
3576	UnicodeString& doReverse(int32_t start,
3577	int32_t length);
3578
3579	// calculate hash code
3580	int32_t doHashCode(void) const;
3581
3582	// get pointer to start of array
3583	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3584	inline char16_t* getArrayStart(void);
3585	inline const char16_t* getArrayStart(void) const;
3586
3587	inline UBool hasShortLength() const;
3588	inline int32_t getShortLength() const;
3589
3590	// A UnicodeString object (not necessarily its current buffer)
3591	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3592	inline UBool isWritable() const;
3593
3594	// Is the current buffer writable?
3595	inline UBool isBufferWritable() const;
3596
3597	// None of the following does releaseArray().
3598	inline void setZeroLength();
3599	inline void setShortLength(int32_t len);
3600	inline void setLength(int32_t len);
3601	inline void setToEmpty();
3602	inline void setArray(char16_t array, int32_t len, int32_t capacity); // sets length but not flags*
3603
3604	// allocate the array; result may be the stack buffer
3605	// sets refCount to 1 if appropriate
3606	// sets fArray, fCapacity, and flags
3607	// sets length to 0
3608	// returns boolean for success or failure
3609	UBool allocate(int32_t capacity);
3610
3611	// release the array if owned
3612	void releaseArray(void);
3613
3614	// turn a bogus string into an empty one
3615	void unBogus();
3616
3617	// implements assigment operator, copy constructor, and fastCopyFrom()
3618	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3619
3620	// Copies just the fields without memory management.
3621	void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3622
3623	// Pin start and limit to acceptable values.
3624	inline void pinIndex(int32_t& start) const;
3625	inline void pinIndices(int32_t& start,
3626	int32_t& length) const;
3627
3628	#if !UCONFIG_NO_CONVERSION
3629
3630	/ Internal extract() using UConverter. /
3631	int32_t doExtract(int32_t start, int32_t length,
3632	char *dest, int32_t destCapacity,
3633	UConverter *cnv,
3634	UErrorCode &errorCode) const;
3635
3636	/*
3637	* Real constructor for converting from codepage data.
3638	* It assumes that it is called with !fRefCounted.
3639	*
3640	* If `codepage==0`, then the default converter
3641	* is used for the platform encoding.
3642	* If `codepage` is an empty string (`""`),
3643	* then a simple conversion is performed on the codepage-invariant
3644	* subset ("invariant characters") of the platform encoding. See utypes.h.
3645	*/
3646	void doCodepageCreate(const char *codepageData,
3647	int32_t dataLength,
3648	const char *codepage);
3649
3650	/*
3651	* Worker function for creating a UnicodeString from
3652	* a codepage string using a UConverter.
3653	*/
3654	void
3655	doCodepageCreate(const char *codepageData,
3656	int32_t dataLength,
3657	UConverter *converter,
3658	UErrorCode &status);
3659
3660	#endif
3661
3662	/*
3663	* This function is called when write access to the array
3664	* is necessary.
3665	*
3666	* We need to make a copy of the array if
3667	* the buffer is read-only, or
3668	* the buffer is refCounted (shared), and refCount>1, or
3669	* the buffer is too small.
3670	*
3671	* Return FALSE if memory could not be allocated.
3672	*/
3673	UBool cloneArrayIfNeeded(int32_t newCapacity = -`1`,
3674	int32_t growCapacity = -`1`,
3675	UBool doCopyArray = TRUE,
3676	int32_t **pBufferToDelete = `0`,
3677	UBool forceClone = FALSE);
3678
3679	/**
3680	* Common function for UnicodeString case mappings.
3681	* The stringCaseMapper has the same type UStringCaseMapper
3682	* as in ustr_imp.h for ustrcase_map().
3683	*/
3684	UnicodeString &
3685	caseMap(int32_t caseLocale, uint32_t options,
3686	#if !UCONFIG_NO_BREAK_ITERATION
3687	BreakIterator *iter,
3688	#endif
3689	UStringCaseMapper *stringCaseMapper);
3690
3691	// ref counting
3692	void addRef(void);
3693	int32_t removeRef(void);
3694	int32_t refCount(void) const;
3695
3696	// constants
3697	enum {
3698	/**
3699	* Size of stack buffer for short strings.
3700	* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3701	* @see UNISTR_OBJECT_SIZE
3702	*/
3703	US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-`2`)/U_SIZEOF_UCHAR,
3704	kInvalidUChar=`0xffff`, // U+FFFF returned by charAt(invalid index)
3705	kInvalidHashCode=`0`, // invalid hash code
3706	kEmptyHashCode=`1`, // hash code for empty string
3707
3708	// bit flag values for fLengthAndFlags
3709	kIsBogus=`1`, // this string is bogus, i.e., not valid or NULL
3710	kUsingStackBuffer=`2`,// using fUnion.fStackFields instead of fUnion.fFields
3711	kRefCounted=`4`, // there is a refCount field before the characters in fArray
3712	kBufferIsReadonly=`8`,// do not write to this buffer
3713	kOpenGetBuffer=`16`, // getBuffer(minCapacity) was called (is "open"),
3714	// and releaseBuffer(newLength) must be called
3715	kAllStorageFlags=`0x1f`,
3716
3717	kLengthShift=`5`, // remaining 11 bits for non-negative short length, or negative if long
3718	kLength1=`1`<<kLengthShift,
3719	kMaxShortLength=`0x3ff`, // max non-negative short length (leaves top bit 0)
3720	kLengthIsLarge=`0xffe0`, // short length < 0, real length is in fUnion.fFields.fLength
3721
3722	// combined values for convenience
3723	kShortString=kUsingStackBuffer,
3724	kLongString=kRefCounted,
3725	kReadonlyAlias=kBufferIsReadonly,
3726	kWritableAlias=`0`
3727	};
3728
3729	friend class UnicodeStringAppendable;
3730
3731	union StackBufferOrFields; // forward declaration necessary before friend declaration
3732	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3733
3734	/*
3735	* The following are all the class fields that are stored
3736	* in each UnicodeString object.
3737	* Note that UnicodeString has virtual functions,
3738	* therefore there is an implicit vtable pointer
3739	* as the first real field.
3740	* The fields should be aligned such that no padding is necessary.
3741	* On 32-bit machines, the size should be 32 bytes,
3742	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
3743	*
3744	* We use a hack to achieve this.
3745	*
3746	* With at least some compilers, each of the following is forced to
3747	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3748	* rounded up with additional padding if the fields do not already fit that requirement:
3749	* - sizeof(class UnicodeString)
3750	* - offsetof(UnicodeString, fUnion)
3751	* - sizeof(fUnion)
3752	* - sizeof(fStackFields)
3753	*
3754	* We optimize for the longest possible internal buffer for short strings.
3755	* fUnion.fStackFields begins with 2 bytes for storage flags
3756	* and the length of relatively short strings,
3757	* followed by the buffer for short string contents.
3758	* There is no padding inside fStackFields.
3759	*
3760	* Heap-allocated and aliased strings use fUnion.fFields.
3761	* Both fStackFields and fFields must begin with the same fields for flags and short length,
3762	* that is, those must have the same memory offsets inside the object,
3763	* because the flags must be inspected in order to decide which half of fUnion is being used.
3764	* We assume that the compiler does not reorder the fields.
3765	*
3766	* (Padding at the end of fFields is ok:
3767	* As long as it is no larger than fStackFields, it is not wasted space.)
3768	*
3769	* For some of the history of the UnicodeString class fields layout, see
3770	* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3771	* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3772	* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3773	*/
3774	// (implicit) vtable;*
3775	union StackBufferOrFields {
3776	// fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3777	// Each struct of the union must begin with fLengthAndFlags.
3778	struct {
3779	int16_t fLengthAndFlags; // bit fields: see constants above
3780	char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3781	} fStackFields;
3782	struct {
3783	int16_t fLengthAndFlags; // bit fields: see constants above
3784	int32_t fLength; // number of characters in fArray if >127; else undefined
3785	int32_t fCapacity; // capacity of fArray (in char16_ts)
3786	// array pointer last to minimize padding for machines with P128 data model
3787	// or pointer sizes that are not a power of 2
3788	char16_t fArray; // the Unicode data*
3789	} fFields;
3790	} fUnion;
3791	};
3792
3793	/**
3794	* Create a new UnicodeString with the concatenation of two others.
3795	*
3796	* @param s1 The first string to be copied to the new one.
3797	* @param s2 The second string to be copied to the new one, after s1.
3798	* @return UnicodeString(s1).append(s2)
3799	* @stable ICU 2.8
3800	*/
3801	U_COMMON_API UnicodeString U_EXPORT2
3802	operator+ (const UnicodeString &s1, const UnicodeString &s2);
3803
3804	//========================================
3805	// Inline members
3806	//========================================
3807
3808	//========================================
3809	// Privates
3810	//========================================
3811
3812	inline void
3813	UnicodeString::pinIndex(int32_t& start) const
3814	{
3815	// pin index
3816	if(start < `0`) {
3817	start = `0`;
3818	} else if(start > length()) {
3819	start = length();
3820	}
3821	}
3822
3823	inline void
3824	UnicodeString::pinIndices(int32_t& start,
3825	int32_t& _length) const
3826	{
3827	// pin indices
3828	int32_t len = length();
3829	if(start < `0`) {
3830	start = `0`;
3831	} else if(start > len) {
3832	start = len;
3833	}
3834	if(_length < `0`) {
3835	_length = `0`;
3836	} else if(_length > (len - start)) {
3837	_length = (len - start);
3838	}
3839	}
3840
3841	inline char16_t*
3842	UnicodeString::getArrayStart() {
3843	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3844	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3845	}
3846
3847	inline const char16_t*
3848	UnicodeString::getArrayStart() const {
3849	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3850	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3851	}
3852
3853	//========================================
3854	// Default constructor
3855	//========================================
3856
3857	inline
3858	UnicodeString::UnicodeString() {
3859	fUnion.fStackFields.fLengthAndFlags=kShortString;
3860	}
3861
3862	inline UnicodeString::UnicodeString(const std::nullptr_t /text/) {
3863	fUnion.fStackFields.fLengthAndFlags=kShortString;
3864	}
3865
3866	inline UnicodeString::UnicodeString(const std::nullptr_t /text/, int32_t /length/) {
3867	fUnion.fStackFields.fLengthAndFlags=kShortString;
3868	}
3869
3870	inline UnicodeString::UnicodeString(std::nullptr_t /buffer/, int32_t /buffLength/, int32_t /buffCapacity/) {
3871	fUnion.fStackFields.fLengthAndFlags=kShortString;
3872	}
3873
3874	//========================================
3875	// Read-only implementation methods
3876	//========================================
3877	inline UBool
3878	UnicodeString::hasShortLength() const {
3879	return fUnion.fFields.fLengthAndFlags>=`0`;
3880	}
3881
3882	inline int32_t
3883	UnicodeString::getShortLength() const {
3884	// fLengthAndFlags must be non-negative -> short length >= 0
3885	// and arithmetic or logical shift does not matter.
3886	return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3887	}
3888
3889	inline int32_t
3890	UnicodeString::length() const {
3891	return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3892	}
3893
3894	inline int32_t
3895	UnicodeString::getCapacity() const {
3896	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3897	US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3898	}
3899
3900	inline int32_t
3901	UnicodeString::hashCode() const
3902	{ return doHashCode(); }
3903
3904	inline UBool
3905	UnicodeString::isBogus() const
3906	{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3907
3908	inline UBool
3909	UnicodeString::isWritable() const
3910	{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus)); }
3911
3912	inline UBool
3913	UnicodeString::isBufferWritable() const
3914	{
3915	return (UBool)(
3916	!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
3917	(!(fUnion.fFields.fLengthAndFlags&kRefCounted) \|\| refCount()==`1`));
3918	}
3919
3920	inline const char16_t *
3921	UnicodeString::getBuffer() const {
3922	if(fUnion.fFields.fLengthAndFlags&(kIsBogus\|kOpenGetBuffer)) {
3923	return nullptr;
3924	} else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3925	return fUnion.fStackFields.fBuffer;
3926	} else {
3927	return fUnion.fFields.fArray;
3928	}
3929	}
3930
3931	//========================================
3932	// Read-only alias methods
3933	//========================================
3934	inline int8_t
3935	UnicodeString::doCompare(int32_t start,
3936	int32_t thisLength,
3937	const UnicodeString& srcText,
3938	int32_t srcStart,
3939	int32_t srcLength) const
3940	{
3941	if(srcText.isBogus()) {
3942	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3943	} else {
3944	srcText.pinIndices(srcStart, srcLength);
3945	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3946	}
3947	}
3948
3949	inline UBool
3950	UnicodeString::operator== (const UnicodeString& text) const
3951	{
3952	if(isBogus()) {
3953	return text.isBogus();
3954	} else {
3955	int32_t len = length(), textLength = text.length();
3956	return !text.isBogus() && len == textLength && doEquals(text, len);
3957	}
3958	}
3959
3960	inline UBool
3961	UnicodeString::operator!= (const UnicodeString& text) const
3962	{ return (! operator==(text)); }
3963
3964	inline UBool
3965	UnicodeString::operator> (const UnicodeString& text) const
3966	{ return doCompare(`0`, length(), text, `0`, text.length()) == `1`; }
3967
3968	inline UBool
3969	UnicodeString::operator< (const UnicodeString& text) const
3970	{ return doCompare(`0`, length(), text, `0`, text.length()) == -`1`; }
3971
3972	inline UBool
3973	UnicodeString::operator>= (const UnicodeString& text) const
3974	{ return doCompare(`0`, length(), text, `0`, text.length()) != -`1`; }
3975
3976	inline UBool
3977	UnicodeString::operator<= (const UnicodeString& text) const
3978	{ return doCompare(`0`, length(), text, `0`, text.length()) != `1`; }
3979
3980	inline int8_t
3981	UnicodeString::compare(const UnicodeString& text) const
3982	{ return doCompare(`0`, length(), text, `0`, text.length()); }
3983
3984	inline int8_t
3985	UnicodeString::compare(int32_t start,
3986	int32_t _length,
3987	const UnicodeString& srcText) const
3988	{ return doCompare(start, _length, srcText, `0`, srcText.length()); }
3989
3990	inline int8_t
3991	UnicodeString::compare(ConstChar16Ptr srcChars,
3992	int32_t srcLength) const
3993	{ return doCompare(`0`, length(), srcChars, `0`, srcLength); }
3994
3995	inline int8_t
3996	UnicodeString::compare(int32_t start,
3997	int32_t _length,
3998	const UnicodeString& srcText,
3999	int32_t srcStart,
4000	int32_t srcLength) const
4001	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4002
4003	inline int8_t
4004	UnicodeString::compare(int32_t start,
4005	int32_t _length,
4006	const char16_t srcChars) const*
4007	{ return doCompare(start, _length, srcChars, `0`, _length); }
4008
4009	inline int8_t
4010	UnicodeString::compare(int32_t start,
4011	int32_t _length,
4012	const char16_t *srcChars,
4013	int32_t srcStart,
4014	int32_t srcLength) const
4015	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4016
4017	inline int8_t
4018	UnicodeString::compareBetween(int32_t start,
4019	int32_t limit,
4020	const UnicodeString& srcText,
4021	int32_t srcStart,
4022	int32_t srcLimit) const
4023	{ return doCompare(start, limit - start,
4024	srcText, srcStart, srcLimit - srcStart); }
4025
4026	inline int8_t
4027	UnicodeString::doCompareCodePointOrder(int32_t start,
4028	int32_t thisLength,
4029	const UnicodeString& srcText,
4030	int32_t srcStart,
4031	int32_t srcLength) const
4032	{
4033	if(srcText.isBogus()) {
4034	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4035	} else {
4036	srcText.pinIndices(srcStart, srcLength);
4037	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4038	}
4039	}
4040
4041	inline int8_t
4042	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4043	{ return doCompareCodePointOrder(`0`, length(), text, `0`, text.length()); }
4044
4045	inline int8_t
4046	UnicodeString::compareCodePointOrder(int32_t start,
4047	int32_t _length,
4048	const UnicodeString& srcText) const
4049	{ return doCompareCodePointOrder(start, _length, srcText, `0`, srcText.length()); }
4050
4051	inline int8_t
4052	UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4053	int32_t srcLength) const
4054	{ return doCompareCodePointOrder(`0`, length(), srcChars, `0`, srcLength); }
4055
4056	inline int8_t
4057	UnicodeString::compareCodePointOrder(int32_t start,
4058	int32_t _length,
4059	const UnicodeString& srcText,
4060	int32_t srcStart,
4061	int32_t srcLength) const
4062	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4063
4064	inline int8_t
4065	UnicodeString::compareCodePointOrder(int32_t start,
4066	int32_t _length,
4067	const char16_t srcChars) const*
4068	{ return doCompareCodePointOrder(start, _length, srcChars, `0`, _length); }
4069
4070	inline int8_t
4071	UnicodeString::compareCodePointOrder(int32_t start,
4072	int32_t _length,
4073	const char16_t *srcChars,
4074	int32_t srcStart,
4075	int32_t srcLength) const
4076	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4077
4078	inline int8_t
4079	UnicodeString::compareCodePointOrderBetween(int32_t start,
4080	int32_t limit,
4081	const UnicodeString& srcText,
4082	int32_t srcStart,
4083	int32_t srcLimit) const
4084	{ return doCompareCodePointOrder(start, limit - start,
4085	srcText, srcStart, srcLimit - srcStart); }
4086
4087	inline int8_t
4088	UnicodeString::doCaseCompare(int32_t start,
4089	int32_t thisLength,
4090	const UnicodeString &srcText,
4091	int32_t srcStart,
4092	int32_t srcLength,
4093	uint32_t options) const
4094	{
4095	if(srcText.isBogus()) {
4096	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4097	} else {
4098	srcText.pinIndices(srcStart, srcLength);
4099	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4100	}
4101	}
4102
4103	inline int8_t
4104	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4105	return doCaseCompare(`0`, length(), text, `0`, text.length(), options);
4106	}
4107
4108	inline int8_t
4109	UnicodeString::caseCompare(int32_t start,
4110	int32_t _length,
4111	const UnicodeString &srcText,
4112	uint32_t options) const {
4113	return doCaseCompare(start, _length, srcText, `0`, srcText.length(), options);
4114	}
4115
4116	inline int8_t
4117	UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4118	int32_t srcLength,
4119	uint32_t options) const {
4120	return doCaseCompare(`0`, length(), srcChars, `0`, srcLength, options);
4121	}
4122
4123	inline int8_t
4124	UnicodeString::caseCompare(int32_t start,
4125	int32_t _length,
4126	const UnicodeString &srcText,
4127	int32_t srcStart,
4128	int32_t srcLength,
4129	uint32_t options) const {
4130	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4131	}
4132
4133	inline int8_t
4134	UnicodeString::caseCompare(int32_t start,
4135	int32_t _length,
4136	const char16_t *srcChars,
4137	uint32_t options) const {
4138	return doCaseCompare(start, _length, srcChars, `0`, _length, options);
4139	}
4140
4141	inline int8_t
4142	UnicodeString::caseCompare(int32_t start,
4143	int32_t _length,
4144	const char16_t *srcChars,
4145	int32_t srcStart,
4146	int32_t srcLength,
4147	uint32_t options) const {
4148	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4149	}
4150
4151	inline int8_t
4152	UnicodeString::caseCompareBetween(int32_t start,
4153	int32_t limit,
4154	const UnicodeString &srcText,
4155	int32_t srcStart,
4156	int32_t srcLimit,
4157	uint32_t options) const {
4158	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4159	}
4160
4161	inline int32_t
4162	UnicodeString::indexOf(const UnicodeString& srcText,
4163	int32_t srcStart,
4164	int32_t srcLength,
4165	int32_t start,
4166	int32_t _length) const
4167	{
4168	if(!srcText.isBogus()) {
4169	srcText.pinIndices(srcStart, srcLength);
4170	if(srcLength > `0`) {
4171	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4172	}
4173	}
4174	return -`1`;
4175	}
4176
4177	inline int32_t
4178	UnicodeString::indexOf(const UnicodeString& text) const
4179	{ return indexOf(text, `0`, text.length(), `0`, length()); }
4180
4181	inline int32_t
4182	UnicodeString::indexOf(const UnicodeString& text,
4183	int32_t start) const {
4184	pinIndex(start);
4185	return indexOf(text, `0`, text.length(), start, length() - start);
4186	}
4187
4188	inline int32_t
4189	UnicodeString::indexOf(const UnicodeString& text,
4190	int32_t start,
4191	int32_t _length) const
4192	{ return indexOf(text, `0`, text.length(), start, _length); }
4193
4194	inline int32_t
4195	UnicodeString::indexOf(const char16_t *srcChars,
4196	int32_t srcLength,
4197	int32_t start) const {
4198	pinIndex(start);
4199	return indexOf(srcChars, `0`, srcLength, start, length() - start);
4200	}
4201
4202	inline int32_t
4203	UnicodeString::indexOf(ConstChar16Ptr srcChars,
4204	int32_t srcLength,
4205	int32_t start,
4206	int32_t _length) const
4207	{ return indexOf(srcChars, `0`, srcLength, start, _length); }
4208
4209	inline int32_t
4210	UnicodeString::indexOf(char16_t c,
4211	int32_t start,
4212	int32_t _length) const
4213	{ return doIndexOf(c, start, _length); }
4214
4215	inline int32_t
4216	UnicodeString::indexOf(UChar32 c,
4217	int32_t start,
4218	int32_t _length) const
4219	{ return doIndexOf(c, start, _length); }
4220
4221	inline int32_t
4222	UnicodeString::indexOf(char16_t c) const
4223	{ return doIndexOf(c, `0`, length()); }
4224
4225	inline int32_t
4226	UnicodeString::indexOf(UChar32 c) const
4227	{ return indexOf(c, `0`, length()); }
4228
4229	inline int32_t
4230	UnicodeString::indexOf(char16_t c,
4231	int32_t start) const {
4232	pinIndex(start);
4233	return doIndexOf(c, start, length() - start);
4234	}
4235
4236	inline int32_t
4237	UnicodeString::indexOf(UChar32 c,
4238	int32_t start) const {
4239	pinIndex(start);
4240	return indexOf(c, start, length() - start);
4241	}
4242
4243	inline int32_t
4244	UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4245	int32_t srcLength,
4246	int32_t start,
4247	int32_t _length) const
4248	{ return lastIndexOf(srcChars, `0`, srcLength, start, _length); }
4249
4250	inline int32_t
4251	UnicodeString::lastIndexOf(const char16_t *srcChars,
4252	int32_t srcLength,
4253	int32_t start) const {
4254	pinIndex(start);
4255	return lastIndexOf(srcChars, `0`, srcLength, start, length() - start);
4256	}
4257
4258	inline int32_t
4259	UnicodeString::lastIndexOf(const UnicodeString& srcText,
4260	int32_t srcStart,
4261	int32_t srcLength,
4262	int32_t start,
4263	int32_t _length) const
4264	{
4265	if(!srcText.isBogus()) {
4266	srcText.pinIndices(srcStart, srcLength);
4267	if(srcLength > `0`) {
4268	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4269	}
4270	}
4271	return -`1`;
4272	}
4273
4274	inline int32_t
4275	UnicodeString::lastIndexOf(const UnicodeString& text,
4276	int32_t start,
4277	int32_t _length) const
4278	{ return lastIndexOf(text, `0`, text.length(), start, _length); }
4279
4280	inline int32_t
4281	UnicodeString::lastIndexOf(const UnicodeString& text,
4282	int32_t start) const {
4283	pinIndex(start);
4284	return lastIndexOf(text, `0`, text.length(), start, length() - start);
4285	}
4286
4287	inline int32_t
4288	UnicodeString::lastIndexOf(const UnicodeString& text) const
4289	{ return lastIndexOf(text, `0`, text.length(), `0`, length()); }
4290
4291	inline int32_t
4292	UnicodeString::lastIndexOf(char16_t c,
4293	int32_t start,
4294	int32_t _length) const
4295	{ return doLastIndexOf(c, start, _length); }
4296
4297	inline int32_t
4298	UnicodeString::lastIndexOf(UChar32 c,
4299	int32_t start,
4300	int32_t _length) const {
4301	return doLastIndexOf(c, start, _length);
4302	}
4303
4304	inline int32_t
4305	UnicodeString::lastIndexOf(char16_t c) const
4306	{ return doLastIndexOf(c, `0`, length()); }
4307
4308	inline int32_t
4309	UnicodeString::lastIndexOf(UChar32 c) const {
4310	return lastIndexOf(c, `0`, length());
4311	}
4312
4313	inline int32_t
4314	UnicodeString::lastIndexOf(char16_t c,
4315	int32_t start) const {
4316	pinIndex(start);
4317	return doLastIndexOf(c, start, length() - start);
4318	}
4319
4320	inline int32_t
4321	UnicodeString::lastIndexOf(UChar32 c,
4322	int32_t start) const {
4323	pinIndex(start);
4324	return lastIndexOf(c, start, length() - start);
4325	}
4326
4327	inline UBool
4328	UnicodeString::startsWith(const UnicodeString& text) const
4329	{ return compare(`0`, text.length(), text, `0`, text.length()) == `0`; }
4330
4331	inline UBool
4332	UnicodeString::startsWith(const UnicodeString& srcText,
4333	int32_t srcStart,
4334	int32_t srcLength) const
4335	{ return doCompare(`0`, srcLength, srcText, srcStart, srcLength) == `0`; }
4336
4337	inline UBool
4338	UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4339	if(srcLength < `0`) {
4340	srcLength = u_strlen(toUCharPtr(srcChars));
4341	}
4342	return doCompare(`0`, srcLength, srcChars, `0`, srcLength) == `0`;
4343	}
4344
4345	inline UBool
4346	UnicodeString::startsWith(const char16_t srcChars, int32_t srcStart, int32_t srcLength) const* {
4347	if(srcLength < `0`) {
4348	srcLength = u_strlen(toUCharPtr(srcChars));
4349	}
4350	return doCompare(`0`, srcLength, srcChars, srcStart, srcLength) == `0`;
4351	}
4352
4353	inline UBool
4354	UnicodeString::endsWith(const UnicodeString& text) const
4355	{ return doCompare(length() - text.length(), text.length(),
4356	text, `0`, text.length()) == `0`; }
4357
4358	inline UBool
4359	UnicodeString::endsWith(const UnicodeString& srcText,
4360	int32_t srcStart,
4361	int32_t srcLength) const {
4362	srcText.pinIndices(srcStart, srcLength);
4363	return doCompare(length() - srcLength, srcLength,
4364	srcText, srcStart, srcLength) == `0`;
4365	}
4366
4367	inline UBool
4368	UnicodeString::endsWith(ConstChar16Ptr srcChars,
4369	int32_t srcLength) const {
4370	if(srcLength < `0`) {
4371	srcLength = u_strlen(toUCharPtr(srcChars));
4372	}
4373	return doCompare(length() - srcLength, srcLength,
4374	srcChars, `0`, srcLength) == `0`;
4375	}
4376
4377	inline UBool
4378	UnicodeString::endsWith(const char16_t *srcChars,
4379	int32_t srcStart,
4380	int32_t srcLength) const {
4381	if(srcLength < `0`) {
4382	srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4383	}
4384	return doCompare(length() - srcLength, srcLength,
4385	srcChars, srcStart, srcLength) == `0`;
4386	}
4387
4388	//========================================
4389	// replace
4390	//========================================
4391	inline UnicodeString&
4392	UnicodeString::replace(int32_t start,
4393	int32_t _length,
4394	const UnicodeString& srcText)
4395	{ return doReplace(start, _length, srcText, `0`, srcText.length()); }
4396
4397	inline UnicodeString&
4398	UnicodeString::replace(int32_t start,
4399	int32_t _length,
4400	const UnicodeString& srcText,
4401	int32_t srcStart,
4402	int32_t srcLength)
4403	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4404
4405	inline UnicodeString&
4406	UnicodeString::replace(int32_t start,
4407	int32_t _length,
4408	ConstChar16Ptr srcChars,
4409	int32_t srcLength)
4410	{ return doReplace(start, _length, srcChars, `0`, srcLength); }
4411
4412	inline UnicodeString&
4413	UnicodeString::replace(int32_t start,
4414	int32_t _length,
4415	const char16_t *srcChars,
4416	int32_t srcStart,
4417	int32_t srcLength)
4418	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4419
4420	inline UnicodeString&
4421	UnicodeString::replace(int32_t start,
4422	int32_t _length,
4423	char16_t srcChar)
4424	{ return doReplace(start, _length, &srcChar, `0`, `1`); }
4425
4426	inline UnicodeString&
4427	UnicodeString::replaceBetween(int32_t start,
4428	int32_t limit,
4429	const UnicodeString& srcText)
4430	{ return doReplace(start, limit - start, srcText, `0`, srcText.length()); }
4431
4432	inline UnicodeString&
4433	UnicodeString::replaceBetween(int32_t start,
4434	int32_t limit,
4435	const UnicodeString& srcText,
4436	int32_t srcStart,
4437	int32_t srcLimit)
4438	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4439
4440	inline UnicodeString&
4441	UnicodeString::findAndReplace(const UnicodeString& oldText,
4442	const UnicodeString& newText)
4443	{ return findAndReplace(`0`, length(), oldText, `0`, oldText.length(),
4444	newText, `0`, newText.length()); }
4445
4446	inline UnicodeString&
4447	UnicodeString::findAndReplace(int32_t start,
4448	int32_t _length,
4449	const UnicodeString& oldText,
4450	const UnicodeString& newText)
4451	{ return findAndReplace(start, _length, oldText, `0`, oldText.length(),
4452	newText, `0`, newText.length()); }
4453
4454	// ============================
4455	// extract
4456	// ============================
4457	inline void
4458	UnicodeString::doExtract(int32_t start,
4459	int32_t _length,
4460	UnicodeString& target) const
4461	{ target.replace(`0`, target.length(), *this, start, _length); }
4462
4463	inline void
4464	UnicodeString::extract(int32_t start,
4465	int32_t _length,
4466	Char16Ptr target,
4467	int32_t targetStart) const
4468	{ doExtract(start, _length, target, targetStart); }
4469
4470	inline void
4471	UnicodeString::extract(int32_t start,
4472	int32_t _length,
4473	UnicodeString& target) const
4474	{ doExtract(start, _length, target); }
4475
4476	#if !UCONFIG_NO_CONVERSION
4477
4478	inline int32_t
4479	UnicodeString::extract(int32_t start,
4480	int32_t _length,
4481	char *dst,
4482	const char codepage) const*
4483
4484	{
4485	// This dstSize value will be checked explicitly
4486	return extract(start, _length, dst, dst!=`0` ? `0xffffffff` : `0`, codepage);
4487	}
4488
4489	#endif
4490
4491	inline void
4492	UnicodeString::extractBetween(int32_t start,
4493	int32_t limit,
4494	char16_t *dst,
4495	int32_t dstStart) const {
4496	pinIndex(start);
4497	pinIndex(limit);
4498	doExtract(start, limit - start, dst, dstStart);
4499	}
4500
4501	inline UnicodeString
4502	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4503	return tempSubString(start, limit - start);
4504	}
4505
4506	inline char16_t
4507	UnicodeString::doCharAt(int32_t offset) const
4508	{
4509	if((uint32_t)offset < (uint32_t)length()) {
4510	return getArrayStart()[offset];
4511	} else {
4512	return kInvalidUChar;
4513	}
4514	}
4515
4516	inline char16_t
4517	UnicodeString::charAt(int32_t offset) const
4518	{ return doCharAt(offset); }
4519
4520	inline char16_t
4521	UnicodeString::operator[] (int32_t offset) const
4522	{ return doCharAt(offset); }
4523
4524	inline UBool
4525	UnicodeString::isEmpty() const {
4526	// Arithmetic or logical right shift does not matter: only testing for 0.
4527	return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == `0`;
4528	}
4529
4530	//========================================
4531	// Write implementation methods
4532	//========================================
4533	inline void
4534	UnicodeString::setZeroLength() {
4535	fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4536	}
4537
4538	inline void
4539	UnicodeString::setShortLength(int32_t len) {
4540	// requires 0 <= len <= kMaxShortLength
4541	fUnion.fFields.fLengthAndFlags =
4542	(int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) \| (len << kLengthShift));
4543	}
4544
4545	inline void
4546	UnicodeString::setLength(int32_t len) {
4547	if(len <= kMaxShortLength) {
4548	setShortLength(len);
4549	} else {
4550	fUnion.fFields.fLengthAndFlags \|= kLengthIsLarge;
4551	fUnion.fFields.fLength = len;
4552	}
4553	}
4554
4555	inline void
4556	UnicodeString::setToEmpty() {
4557	fUnion.fFields.fLengthAndFlags = kShortString;
4558	}
4559
4560	inline void
4561	UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4562	setLength(len);
4563	fUnion.fFields.fArray = array;
4564	fUnion.fFields.fCapacity = capacity;
4565	}
4566
4567	inline UnicodeString&
4568	UnicodeString::operator= (char16_t ch)
4569	{ return doReplace(`0`, length(), &ch, `0`, `1`); }
4570
4571	inline UnicodeString&
4572	UnicodeString::operator= (UChar32 ch)
4573	{ return replace(`0`, length(), ch); }
4574
4575	inline UnicodeString&
4576	UnicodeString::setTo(const UnicodeString& srcText,
4577	int32_t srcStart,
4578	int32_t srcLength)
4579	{
4580	unBogus();
4581	return doReplace(`0`, length(), srcText, srcStart, srcLength);
4582	}
4583
4584	inline UnicodeString&
4585	UnicodeString::setTo(const UnicodeString& srcText,
4586	int32_t srcStart)
4587	{
4588	unBogus();
4589	srcText.pinIndex(srcStart);
4590	return doReplace(`0`, length(), srcText, srcStart, srcText.length() - srcStart);
4591	}
4592
4593	inline UnicodeString&
4594	UnicodeString::setTo(const UnicodeString& srcText)
4595	{
4596	return copyFrom(srcText);
4597	}
4598
4599	inline UnicodeString&
4600	UnicodeString::setTo(const char16_t *srcChars,
4601	int32_t srcLength)
4602	{
4603	unBogus();
4604	return doReplace(`0`, length(), srcChars, `0`, srcLength);
4605	}
4606
4607	inline UnicodeString&
4608	UnicodeString::setTo(char16_t srcChar)
4609	{
4610	unBogus();
4611	return doReplace(`0`, length(), &srcChar, `0`, `1`);
4612	}
4613
4614	inline UnicodeString&
4615	UnicodeString::setTo(UChar32 srcChar)
4616	{
4617	unBogus();
4618	return replace(`0`, length(), srcChar);
4619	}
4620
4621	inline UnicodeString&
4622	UnicodeString::append(const UnicodeString& srcText,
4623	int32_t srcStart,
4624	int32_t srcLength)
4625	{ return doAppend(srcText, srcStart, srcLength); }
4626
4627	inline UnicodeString&
4628	UnicodeString::append(const UnicodeString& srcText)
4629	{ return doAppend(srcText, `0`, srcText.length()); }
4630
4631	inline UnicodeString&
4632	UnicodeString::append(const char16_t *srcChars,
4633	int32_t srcStart,
4634	int32_t srcLength)
4635	{ return doAppend(srcChars, srcStart, srcLength); }
4636
4637	inline UnicodeString&
4638	UnicodeString::append(ConstChar16Ptr srcChars,
4639	int32_t srcLength)
4640	{ return doAppend(srcChars, `0`, srcLength); }
4641
4642	inline UnicodeString&
4643	UnicodeString::append(char16_t srcChar)
4644	{ return doAppend(&srcChar, `0`, `1`); }
4645
4646	inline UnicodeString&
4647	UnicodeString::operator+= (char16_t ch)
4648	{ return doAppend(&ch, `0`, `1`); }
4649
4650	inline UnicodeString&
4651	UnicodeString::operator+= (UChar32 ch) {
4652	return append(ch);
4653	}
4654
4655	inline UnicodeString&
4656	UnicodeString::operator+= (const UnicodeString& srcText)
4657	{ return doAppend(srcText, `0`, srcText.length()); }
4658
4659	inline UnicodeString&
4660	UnicodeString::insert(int32_t start,
4661	const UnicodeString& srcText,
4662	int32_t srcStart,
4663	int32_t srcLength)
4664	{ return doReplace(start, `0`, srcText, srcStart, srcLength); }
4665
4666	inline UnicodeString&
4667	UnicodeString::insert(int32_t start,
4668	const UnicodeString& srcText)
4669	{ return doReplace(start, `0`, srcText, `0`, srcText.length()); }
4670
4671	inline UnicodeString&
4672	UnicodeString::insert(int32_t start,
4673	const char16_t *srcChars,
4674	int32_t srcStart,
4675	int32_t srcLength)
4676	{ return doReplace(start, `0`, srcChars, srcStart, srcLength); }
4677
4678	inline UnicodeString&
4679	UnicodeString::insert(int32_t start,
4680	ConstChar16Ptr srcChars,
4681	int32_t srcLength)
4682	{ return doReplace(start, `0`, srcChars, `0`, srcLength); }
4683
4684	inline UnicodeString&
4685	UnicodeString::insert(int32_t start,
4686	char16_t srcChar)
4687	{ return doReplace(start, `0`, &srcChar, `0`, `1`); }
4688
4689	inline UnicodeString&
4690	UnicodeString::insert(int32_t start,
4691	UChar32 srcChar)
4692	{ return replace(start, `0`, srcChar); }
4693
4694
4695	inline UnicodeString&
4696	UnicodeString::remove()
4697	{
4698	// remove() of a bogus string makes the string empty and non-bogus
4699	if(isBogus()) {
4700	setToEmpty();
4701	} else {
4702	setZeroLength();
4703	}
4704	return *this;
4705	}
4706
4707	inline UnicodeString&
4708	UnicodeString::remove(int32_t start,
4709	int32_t _length)
4710	{
4711	if(start <= `0` && _length == INT32_MAX) {
4712	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4713	return remove();
4714	}
4715	return doReplace(start, _length, NULL, `0`, `0`);
4716	}
4717
4718	inline UnicodeString&
4719	UnicodeString::removeBetween(int32_t start,
4720	int32_t limit)
4721	{ return doReplace(start, limit - start, NULL, `0`, `0`); }
4722
4723	inline UnicodeString &
4724	UnicodeString::retainBetween(int32_t start, int32_t limit) {
4725	truncate(limit);
4726	return doReplace(`0`, start, NULL, `0`, `0`);
4727	}
4728
4729	inline UBool
4730	UnicodeString::truncate(int32_t targetLength)
4731	{
4732	if(isBogus() && targetLength == `0`) {
4733	// truncate(0) of a bogus string makes the string empty and non-bogus
4734	unBogus();
4735	return FALSE;
4736	} else if((uint32_t)targetLength < (uint32_t)length()) {
4737	setLength(targetLength);
4738	return TRUE;
4739	} else {
4740	return FALSE;
4741	}
4742	}
4743
4744	inline UnicodeString&
4745	UnicodeString::reverse()
4746	{ return doReverse(`0`, length()); }
4747
4748	inline UnicodeString&
4749	UnicodeString::reverse(int32_t start,
4750	int32_t _length)
4751	{ return doReverse(start, _length); }
4752
4753	U_NAMESPACE_END
4754
4755	#endif /* U_SHOW_CPLUSPLUS_API */
4756
4757	#endif
4758

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/unicode/unistr.h