unistr.h source code [include/unicode/unistr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File unistr.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 11/11/98 stephen Changed per 11/9 code review.
16	* 04/20/99 stephen Overhauled per 4/16 code review.
17	* 11/18/99 aliu Made to inherit from Replaceable. Added method
18	* handleReplaceBetween(); other methods unchanged.
19	* 06/25/01 grhoten Remove dependency on iostream.
20	******************************************************************************
21	*/
22
23	#ifndef UNISTR_H
24	#define UNISTR_H
25
26	/**
27	* \file
28	* \brief C++ API: Unicode String
29	*/
30
31	#include <cstddef>
32	#include "unicode/utypes.h"
33	#include "unicode/char16ptr.h"
34	#include "unicode/rep.h"
35	#include "unicode/std_string.h"
36	#include "unicode/stringpiece.h"
37	#include "unicode/bytestream.h"
38
39	struct UConverter; // unicode/ucnv.h
40
41	#ifndef USTRING_H
42	/**
43	* \ingroup ustring_ustrlen
44	*/
45	U_STABLE int32_t U_EXPORT2
46	u_strlen(const UChar *s);
47	#endif
48
49	U_NAMESPACE_BEGIN
50
51	#if !UCONFIG_NO_BREAK_ITERATION
52	class BreakIterator; // unicode/brkiter.h
53	#endif
54	class Edits;
55
56	U_NAMESPACE_END
57
58	// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
59	/**
60	* Internal string case mapping function type.
61	* All error checking must be done.
62	* src and dest must not overlap.
63	* @internal
64	*/
65	typedef int32_t U_CALLCONV
66	UStringCaseMapper(int32_t caseLocale, uint32_t options,
67	#if !UCONFIG_NO_BREAK_ITERATION
68	icu::BreakIterator *iter,
69	#endif
70	char16_t *dest, int32_t destCapacity,
71	const char16_t *src, int32_t srcLength,
72	icu::Edits *edits,
73	UErrorCode &errorCode);
74
75	U_NAMESPACE_BEGIN
76
77	class Locale; // unicode/locid.h
78	class StringCharacterIterator;
79	class UnicodeStringAppendable; // unicode/appendable.h
80
81	/ The <iostream> include has been moved to unicode/ustream.h /
82
83	/**
84	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
85	* which constructs a Unicode string from an invariant-character char * string.
86	* About invariant characters see utypes.h.
87	* This constructor has no runtime dependency on conversion code and is
88	* therefore recommended over ones taking a charset name string
89	* (where the empty string "" indicates invariant-character conversion).
90	*
91	* @stable ICU 3.2
92	*/
93	#define US_INV icu::UnicodeString::kInvariant
94
95	/**
96	* Unicode String literals in C++.
97	*
98	* Note: these macros are not recommended for new code.
99	* Prior to the availability of C++11 and u"unicode string literals",
100	* these macros were provided for portability and efficiency when
101	* initializing UnicodeStrings from literals.
102	*
103	* They work only for strings that contain "invariant characters", i.e.,
104	* only latin letters, digits, and some punctuation.
105	* See utypes.h for details.
106	*
107	* The string parameter must be a C string literal.
108	* The length of the string, not including the terminating
109	* <code>NUL</code>, must be specified as a constant.
110	* @stable ICU 2.0
111	*/
112	#if !U_CHAR16_IS_TYPEDEF
113	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
114	#else
115	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
116	#endif
117
118	/**
119	* Unicode String literals in C++.
120	* Dependent on the platform properties, different UnicodeString
121	* constructors should be used to create a UnicodeString object from
122	* a string literal.
123	* The macros are defined for improved performance.
124	* They work only for strings that contain "invariant characters", i.e.,
125	* only latin letters, digits, and some punctuation.
126	* See utypes.h for details.
127	*
128	* The string parameter must be a C string literal.
129	* @stable ICU 2.0
130	*/
131	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
132
133	/**
134	* \def UNISTR_FROM_CHAR_EXPLICIT
135	* This can be defined to be empty or "explicit".
136	* If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
137	* constructors are marked as explicit, preventing their inadvertent use.
138	* @stable ICU 49
139	*/
140	#ifndef UNISTR_FROM_CHAR_EXPLICIT
141	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
142	// Auto-"explicit" in ICU library code.
143	# define UNISTR_FROM_CHAR_EXPLICIT explicit
144	# else
145	// Empty by default for source code compatibility.
146	# define UNISTR_FROM_CHAR_EXPLICIT
147	# endif
148	#endif
149
150	/**
151	* \def UNISTR_FROM_STRING_EXPLICIT
152	* This can be defined to be empty or "explicit".
153	* If explicit, then the UnicodeString(const char ) and UnicodeString(const char16_t )
154	* constructors are marked as explicit, preventing their inadvertent use.
155	*
156	* In particular, this helps prevent accidentally depending on ICU conversion code
157	* by passing a string literal into an API with a const UnicodeString & parameter.
158	* @stable ICU 49
159	*/
160	#ifndef UNISTR_FROM_STRING_EXPLICIT
161	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
162	// Auto-"explicit" in ICU library code.
163	# define UNISTR_FROM_STRING_EXPLICIT explicit
164	# else
165	// Empty by default for source code compatibility.
166	# define UNISTR_FROM_STRING_EXPLICIT
167	# endif
168	#endif
169
170	/**
171	* \def UNISTR_OBJECT_SIZE
172	* Desired sizeof(UnicodeString) in bytes.
173	* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
174	* The object size may want to be a multiple of 16 bytes,
175	* which is a common granularity for heap allocation.
176	*
177	* Any space inside the object beyond sizeof(vtable pointer) + 2
178	* is available for storing short strings inside the object.
179	* The bigger the object, the longer a string that can be stored inside the object,
180	* without additional heap allocation.
181	*
182	* Depending on a platform's pointer size, pointer alignment requirements,
183	* and struct padding, the compiler will usually round up sizeof(UnicodeString)
184	* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
185	* to hold the fields for heap-allocated strings.
186	* Such a minimum size also ensures that the object is easily large enough
187	* to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
188	*
189	* sizeof(UnicodeString) >= 48 should work for all known platforms.
190	*
191	* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
192	* sizeof(UnicodeString) = 64 would leave space for
193	* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
194	* char16_ts stored inside the object.
195	*
196	* The minimum object size on a 64-bit machine would be
197	* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
198	* and the internal buffer would hold up to 11 char16_ts in that case.
199	*
200	* @see U16_MAX_LENGTH
201	* @stable ICU 56
202	*/
203	#ifndef UNISTR_OBJECT_SIZE
204	# define UNISTR_OBJECT_SIZE 64
205	#endif
206
207	/**
208	* UnicodeString is a string class that stores Unicode characters directly and provides
209	* similar functionality as the Java String and StringBuffer/StringBuilder classes.
210	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
211	*
212	* A UnicodeString may also "alias" an external array of characters
213	* (that is, point to it, rather than own the array)
214	* whose lifetime must then at least match the lifetime of the aliasing object.
215	* This aliasing may be preserved when returning a UnicodeString by value,
216	* depending on the compiler and the function implementation,
217	* via Return Value Optimization (RVO) or the move assignment operator.
218	* (However, the copy assignment operator does not preserve aliasing.)
219	* For details see the description of storage models at the end of the class API docs
220	* and in the User Guide chapter linked from there.
221	*
222	* The UnicodeString class is not suitable for subclassing.
223	*
224	* <p>For an overview of Unicode strings in C and C++ see the
225	* <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
226	*
227	* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
228	* A Unicode character may be stored with either one code unit
229	* (the most common case) or with a matched pair of special code units
230	* ("surrogates"). The data type for code units is char16_t.
231	* For single-character handling, a Unicode character code <em>point</em> is a value
232	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
233	*
234	* <p>Indexes and offsets into and lengths of strings always count code units, not code points.
235	* This is the same as with multi-byte char* strings in traditional string handling.
236	* Operations on partial strings typically do not test for code point boundaries.
237	* If necessary, the user needs to take care of such boundaries by testing for the code unit
238	* values or by using functions like
239	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
240	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
241	*
242	* UnicodeString methods are more lenient with regard to input parameter values
243	* than other ICU APIs. In particular:
244	* - If indexes are out of bounds for a UnicodeString object
245	* (<0 or >length()) then they are "pinned" to the nearest boundary.
246	* - If the buffer passed to an insert/append/replace operation is owned by the
247	* target object, e.g., calling str.append(str), an extra copy may take place
248	* to ensure safety.
249	* - If primitive string pointer values (e.g., const char16_t * or char *)
250	* for input strings are NULL, then those input string parameters are treated
251	* as if they pointed to an empty string.
252	* However, this is <em>not</em> the case for char * parameters for charset names
253	* or other IDs.
254	* - Most UnicodeString methods do not take a UErrorCode parameter because
255	* there are usually very few opportunities for failure other than a shortage
256	* of memory, error codes in low-level C++ string methods would be inconvenient,
257	* and the error code as the last parameter (ICU convention) would prevent
258	* the use of default parameter values.
259	* Instead, such methods set the UnicodeString into a "bogus" state
260	* (see isBogus()) if an error occurs.
261	*
262	* In string comparisons, two UnicodeString objects that are both "bogus"
263	* compare equal (to be transitive and prevent endless loops in sorting),
264	* and a "bogus" string compares less than any non-"bogus" one.
265	*
266	* Const UnicodeString methods are thread-safe. Multiple threads can use
267	* const methods on the same UnicodeString object simultaneously,
268	* but non-const methods must not be called concurrently (in multiple threads)
269	* with any other (const or non-const) methods.
270	*
271	* Similarly, const UnicodeString & parameters are thread-safe.
272	* One object may be passed in as such a parameter concurrently in multiple threads.
273	* This includes the const UnicodeString & parameters for
274	* copy construction, assignment, and cloning.
275	*
276	* <p>UnicodeString uses several storage methods.
277	* String contents can be stored inside the UnicodeString object itself,
278	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
279	* Most of this is done transparently, but careful aliasing in particular provides
280	* significant performance improvements.
281	* Also, the internal buffer is accessible via special functions.
282	* For details see the
283	* <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
284	*
285	* @see utf.h
286	* @see CharacterIterator
287	* @stable ICU 2.0
288	*/
289	class U_COMMON_API UnicodeString : public Replaceable
290	{
291	public:
292
293	/**
294	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
295	* which constructs a Unicode string from an invariant-character char * string.
296	* Use the macro US_INV instead of the full qualification for this value.
297	*
298	* @see US_INV
299	* @stable ICU 3.2
300	*/
301	enum EInvariant {
302	/**
303	* @see EInvariant
304	* @stable ICU 3.2
305	*/
306	kInvariant
307	};
308
309	//========================================
310	// Read-only operations
311	//========================================
312
313	/ Comparison - bitwise only - for international comparison use collation /
314
315	/**
316	* Equality operator. Performs only bitwise comparison.
317	* @param text The UnicodeString to compare to this one.
318	* @return TRUE if <TT>text</TT> contains the same characters as this one,
319	* FALSE otherwise.
320	* @stable ICU 2.0
321	*/
322	inline UBool operator== (const UnicodeString& text) const;
323
324	/**
325	* Inequality operator. Performs only bitwise comparison.
326	* @param text The UnicodeString to compare to this one.
327	* @return FALSE if <TT>text</TT> contains the same characters as this one,
328	* TRUE otherwise.
329	* @stable ICU 2.0
330	*/
331	inline UBool operator!= (const UnicodeString& text) const;
332
333	/**
334	* Greater than operator. Performs only bitwise comparison.
335	* @param text The UnicodeString to compare to this one.
336	* @return TRUE if the characters in this are bitwise
337	* greater than the characters in <code>text</code>, FALSE otherwise
338	* @stable ICU 2.0
339	*/
340	inline UBool operator> (const UnicodeString& text) const;
341
342	/**
343	* Less than operator. Performs only bitwise comparison.
344	* @param text The UnicodeString to compare to this one.
345	* @return TRUE if the characters in this are bitwise
346	* less than the characters in <code>text</code>, FALSE otherwise
347	* @stable ICU 2.0
348	*/
349	inline UBool operator< (const UnicodeString& text) const;
350
351	/**
352	* Greater than or equal operator. Performs only bitwise comparison.
353	* @param text The UnicodeString to compare to this one.
354	* @return TRUE if the characters in this are bitwise
355	* greater than or equal to the characters in <code>text</code>, FALSE otherwise
356	* @stable ICU 2.0
357	*/
358	inline UBool operator>= (const UnicodeString& text) const;
359
360	/**
361	* Less than or equal operator. Performs only bitwise comparison.
362	* @param text The UnicodeString to compare to this one.
363	* @return TRUE if the characters in this are bitwise
364	* less than or equal to the characters in <code>text</code>, FALSE otherwise
365	* @stable ICU 2.0
366	*/
367	inline UBool operator<= (const UnicodeString& text) const;
368
369	/**
370	* Compare the characters bitwise in this UnicodeString to
371	* the characters in <code>text</code>.
372	* @param text The UnicodeString to compare to this one.
373	* @return The result of bitwise character comparison: 0 if this
374	* contains the same characters as <code>text</code>, -1 if the characters in
375	* this are bitwise less than the characters in <code>text</code>, +1 if the
376	* characters in this are bitwise greater than the characters
377	* in <code>text</code>.
378	* @stable ICU 2.0
379	*/
380	inline int8_t compare(const UnicodeString& text) const;
381
382	/**
383	* Compare the characters bitwise in the range
384	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
385	* in the <b>entire string</b> <TT>text</TT>.
386	* (The parameters "start" and "length" are not applied to the other text "text".)
387	* @param start the offset at which the compare operation begins
388	* @param length the number of characters of text to compare.
389	* @param text the other text to be compared against this string.
390	* @return The result of bitwise character comparison: 0 if this
391	* contains the same characters as <code>text</code>, -1 if the characters in
392	* this are bitwise less than the characters in <code>text</code>, +1 if the
393	* characters in this are bitwise greater than the characters
394	* in <code>text</code>.
395	* @stable ICU 2.0
396	*/
397	inline int8_t compare(int32_t start,
398	int32_t length,
399	const UnicodeString& text) const;
400
401	/**
402	* Compare the characters bitwise in the range
403	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
404	* in <TT>srcText</TT> in the range
405	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
406	* @param start the offset at which the compare operation begins
407	* @param length the number of characters in this to compare.
408	* @param srcText the text to be compared
409	* @param srcStart the offset into <TT>srcText</TT> to start comparison
410	* @param srcLength the number of characters in <TT>src</TT> to compare
411	* @return The result of bitwise character comparison: 0 if this
412	* contains the same characters as <code>srcText</code>, -1 if the characters in
413	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
414	* characters in this are bitwise greater than the characters
415	* in <code>srcText</code>.
416	* @stable ICU 2.0
417	*/
418	inline int8_t compare(int32_t start,
419	int32_t length,
420	const UnicodeString& srcText,
421	int32_t srcStart,
422	int32_t srcLength) const;
423
424	/**
425	* Compare the characters bitwise in this UnicodeString with the first
426	* <TT>srcLength</TT> characters in <TT>srcChars</TT>.
427	* @param srcChars The characters to compare to this UnicodeString.
428	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
429	* @return The result of bitwise character comparison: 0 if this
430	* contains the same characters as <code>srcChars</code>, -1 if the characters in
431	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
432	* characters in this are bitwise greater than the characters
433	* in <code>srcChars</code>.
434	* @stable ICU 2.0
435	*/
436	inline int8_t compare(ConstChar16Ptr srcChars,
437	int32_t srcLength) const;
438
439	/**
440	* Compare the characters bitwise in the range
441	* [<TT>start</TT>, <TT>start + length</TT>) with the first
442	* <TT>length</TT> characters in <TT>srcChars</TT>
443	* @param start the offset at which the compare operation begins
444	* @param length the number of characters to compare.
445	* @param srcChars the characters to be compared
446	* @return The result of bitwise character comparison: 0 if this
447	* contains the same characters as <code>srcChars</code>, -1 if the characters in
448	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
449	* characters in this are bitwise greater than the characters
450	* in <code>srcChars</code>.
451	* @stable ICU 2.0
452	*/
453	inline int8_t compare(int32_t start,
454	int32_t length,
455	const char16_t srcChars) const*;
456
457	/**
458	* Compare the characters bitwise in the range
459	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
460	* in <TT>srcChars</TT> in the range
461	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
462	* @param start the offset at which the compare operation begins
463	* @param length the number of characters in this to compare
464	* @param srcChars the characters to be compared
465	* @param srcStart the offset into <TT>srcChars</TT> to start comparison
466	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
467	* @return The result of bitwise character comparison: 0 if this
468	* contains the same characters as <code>srcChars</code>, -1 if the characters in
469	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
470	* characters in this are bitwise greater than the characters
471	* in <code>srcChars</code>.
472	* @stable ICU 2.0
473	*/
474	inline int8_t compare(int32_t start,
475	int32_t length,
476	const char16_t *srcChars,
477	int32_t srcStart,
478	int32_t srcLength) const;
479
480	/**
481	* Compare the characters bitwise in the range
482	* [<TT>start</TT>, <TT>limit</TT>) with the characters
483	* in <TT>srcText</TT> in the range
484	* [<TT>srcStart</TT>, <TT>srcLimit</TT>).
485	* @param start the offset at which the compare operation begins
486	* @param limit the offset immediately following the compare operation
487	* @param srcText the text to be compared
488	* @param srcStart the offset into <TT>srcText</TT> to start comparison
489	* @param srcLimit the offset into <TT>srcText</TT> to limit comparison
490	* @return The result of bitwise character comparison: 0 if this
491	* contains the same characters as <code>srcText</code>, -1 if the characters in
492	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
493	* characters in this are bitwise greater than the characters
494	* in <code>srcText</code>.
495	* @stable ICU 2.0
496	*/
497	inline int8_t compareBetween(int32_t start,
498	int32_t limit,
499	const UnicodeString& srcText,
500	int32_t srcStart,
501	int32_t srcLimit) const;
502
503	/**
504	* Compare two Unicode strings in code point order.
505	* The result may be different from the results of compare(), operator<, etc.
506	* if supplementary characters are present:
507	*
508	* In UTF-16, supplementary characters (with code points U+10000 and above) are
509	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
510	* which means that they compare as less than some other BMP characters like U+feff.
511	* This function compares Unicode strings in code point order.
512	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
513	*
514	* @param text Another string to compare this one to.
515	* @return a negative/zero/positive integer corresponding to whether
516	* this string is less than/equal to/greater than the second one
517	* in code point order
518	* @stable ICU 2.0
519	*/
520	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
521
522	/**
523	* Compare two Unicode strings in code point order.
524	* The result may be different from the results of compare(), operator<, etc.
525	* if supplementary characters are present:
526	*
527	* In UTF-16, supplementary characters (with code points U+10000 and above) are
528	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
529	* which means that they compare as less than some other BMP characters like U+feff.
530	* This function compares Unicode strings in code point order.
531	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
532	*
533	* @param start The start offset in this string at which the compare operation begins.
534	* @param length The number of code units from this string to compare.
535	* @param srcText Another string to compare this one to.
536	* @return a negative/zero/positive integer corresponding to whether
537	* this string is less than/equal to/greater than the second one
538	* in code point order
539	* @stable ICU 2.0
540	*/
541	inline int8_t compareCodePointOrder(int32_t start,
542	int32_t length,
543	const UnicodeString& srcText) const;
544
545	/**
546	* Compare two Unicode strings in code point order.
547	* The result may be different from the results of compare(), operator<, etc.
548	* if supplementary characters are present:
549	*
550	* In UTF-16, supplementary characters (with code points U+10000 and above) are
551	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
552	* which means that they compare as less than some other BMP characters like U+feff.
553	* This function compares Unicode strings in code point order.
554	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
555	*
556	* @param start The start offset in this string at which the compare operation begins.
557	* @param length The number of code units from this string to compare.
558	* @param srcText Another string to compare this one to.
559	* @param srcStart The start offset in that string at which the compare operation begins.
560	* @param srcLength The number of code units from that string to compare.
561	* @return a negative/zero/positive integer corresponding to whether
562	* this string is less than/equal to/greater than the second one
563	* in code point order
564	* @stable ICU 2.0
565	*/
566	inline int8_t compareCodePointOrder(int32_t start,
567	int32_t length,
568	const UnicodeString& srcText,
569	int32_t srcStart,
570	int32_t srcLength) const;
571
572	/**
573	* Compare two Unicode strings in code point order.
574	* The result may be different from the results of compare(), operator<, etc.
575	* if supplementary characters are present:
576	*
577	* In UTF-16, supplementary characters (with code points U+10000 and above) are
578	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
579	* which means that they compare as less than some other BMP characters like U+feff.
580	* This function compares Unicode strings in code point order.
581	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
582	*
583	* @param srcChars A pointer to another string to compare this one to.
584	* @param srcLength The number of code units from that string to compare.
585	* @return a negative/zero/positive integer corresponding to whether
586	* this string is less than/equal to/greater than the second one
587	* in code point order
588	* @stable ICU 2.0
589	*/
590	inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
591	int32_t srcLength) const;
592
593	/**
594	* Compare two Unicode strings in code point order.
595	* The result may be different from the results of compare(), operator<, etc.
596	* if supplementary characters are present:
597	*
598	* In UTF-16, supplementary characters (with code points U+10000 and above) are
599	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
600	* which means that they compare as less than some other BMP characters like U+feff.
601	* This function compares Unicode strings in code point order.
602	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
603	*
604	* @param start The start offset in this string at which the compare operation begins.
605	* @param length The number of code units from this string to compare.
606	* @param srcChars A pointer to another string to compare this one to.
607	* @return a negative/zero/positive integer corresponding to whether
608	* this string is less than/equal to/greater than the second one
609	* in code point order
610	* @stable ICU 2.0
611	*/
612	inline int8_t compareCodePointOrder(int32_t start,
613	int32_t length,
614	const char16_t srcChars) const*;
615
616	/**
617	* Compare two Unicode strings in code point order.
618	* The result may be different from the results of compare(), operator<, etc.
619	* if supplementary characters are present:
620	*
621	* In UTF-16, supplementary characters (with code points U+10000 and above) are
622	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
623	* which means that they compare as less than some other BMP characters like U+feff.
624	* This function compares Unicode strings in code point order.
625	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
626	*
627	* @param start The start offset in this string at which the compare operation begins.
628	* @param length The number of code units from this string to compare.
629	* @param srcChars A pointer to another string to compare this one to.
630	* @param srcStart The start offset in that string at which the compare operation begins.
631	* @param srcLength The number of code units from that string to compare.
632	* @return a negative/zero/positive integer corresponding to whether
633	* this string is less than/equal to/greater than the second one
634	* in code point order
635	* @stable ICU 2.0
636	*/
637	inline int8_t compareCodePointOrder(int32_t start,
638	int32_t length,
639	const char16_t *srcChars,
640	int32_t srcStart,
641	int32_t srcLength) const;
642
643	/**
644	* Compare two Unicode strings in code point order.
645	* The result may be different from the results of compare(), operator<, etc.
646	* if supplementary characters are present:
647	*
648	* In UTF-16, supplementary characters (with code points U+10000 and above) are
649	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
650	* which means that they compare as less than some other BMP characters like U+feff.
651	* This function compares Unicode strings in code point order.
652	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
653	*
654	* @param start The start offset in this string at which the compare operation begins.
655	* @param limit The offset after the last code unit from this string to compare.
656	* @param srcText Another string to compare this one to.
657	* @param srcStart The start offset in that string at which the compare operation begins.
658	* @param srcLimit The offset after the last code unit from that string to compare.
659	* @return a negative/zero/positive integer corresponding to whether
660	* this string is less than/equal to/greater than the second one
661	* in code point order
662	* @stable ICU 2.0
663	*/
664	inline int8_t compareCodePointOrderBetween(int32_t start,
665	int32_t limit,
666	const UnicodeString& srcText,
667	int32_t srcStart,
668	int32_t srcLimit) const;
669
670	/**
671	* Compare two strings case-insensitively using full case folding.
672	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
673	*
674	* @param text Another string to compare this one to.
675	* @param options A bit set of options:
676	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
677	* Comparison in code unit order with default case folding.
678	*
679	* - U_COMPARE_CODE_POINT_ORDER
680	* Set to choose code point order instead of code unit order
681	* (see u_strCompare for details).
682	*
683	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
684	*
685	* @return A negative, zero, or positive integer indicating the comparison result.
686	* @stable ICU 2.0
687	*/
688	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
689
690	/**
691	* Compare two strings case-insensitively using full case folding.
692	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
693	*
694	* @param start The start offset in this string at which the compare operation begins.
695	* @param length The number of code units from this string to compare.
696	* @param srcText Another string to compare this one to.
697	* @param options A bit set of options:
698	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
699	* Comparison in code unit order with default case folding.
700	*
701	* - U_COMPARE_CODE_POINT_ORDER
702	* Set to choose code point order instead of code unit order
703	* (see u_strCompare for details).
704	*
705	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
706	*
707	* @return A negative, zero, or positive integer indicating the comparison result.
708	* @stable ICU 2.0
709	*/
710	inline int8_t caseCompare(int32_t start,
711	int32_t length,
712	const UnicodeString& srcText,
713	uint32_t options) const;
714
715	/**
716	* Compare two strings case-insensitively using full case folding.
717	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
718	*
719	* @param start The start offset in this string at which the compare operation begins.
720	* @param length The number of code units from this string to compare.
721	* @param srcText Another string to compare this one to.
722	* @param srcStart The start offset in that string at which the compare operation begins.
723	* @param srcLength The number of code units from that string to compare.
724	* @param options A bit set of options:
725	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
726	* Comparison in code unit order with default case folding.
727	*
728	* - U_COMPARE_CODE_POINT_ORDER
729	* Set to choose code point order instead of code unit order
730	* (see u_strCompare for details).
731	*
732	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
733	*
734	* @return A negative, zero, or positive integer indicating the comparison result.
735	* @stable ICU 2.0
736	*/
737	inline int8_t caseCompare(int32_t start,
738	int32_t length,
739	const UnicodeString& srcText,
740	int32_t srcStart,
741	int32_t srcLength,
742	uint32_t options) const;
743
744	/**
745	* Compare two strings case-insensitively using full case folding.
746	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
747	*
748	* @param srcChars A pointer to another string to compare this one to.
749	* @param srcLength The number of code units from that string to compare.
750	* @param options A bit set of options:
751	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
752	* Comparison in code unit order with default case folding.
753	*
754	* - U_COMPARE_CODE_POINT_ORDER
755	* Set to choose code point order instead of code unit order
756	* (see u_strCompare for details).
757	*
758	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
759	*
760	* @return A negative, zero, or positive integer indicating the comparison result.
761	* @stable ICU 2.0
762	*/
763	inline int8_t caseCompare(ConstChar16Ptr srcChars,
764	int32_t srcLength,
765	uint32_t options) const;
766
767	/**
768	* Compare two strings case-insensitively using full case folding.
769	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
770	*
771	* @param start The start offset in this string at which the compare operation begins.
772	* @param length The number of code units from this string to compare.
773	* @param srcChars A pointer to another string to compare this one to.
774	* @param options A bit set of options:
775	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
776	* Comparison in code unit order with default case folding.
777	*
778	* - U_COMPARE_CODE_POINT_ORDER
779	* Set to choose code point order instead of code unit order
780	* (see u_strCompare for details).
781	*
782	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
783	*
784	* @return A negative, zero, or positive integer indicating the comparison result.
785	* @stable ICU 2.0
786	*/
787	inline int8_t caseCompare(int32_t start,
788	int32_t length,
789	const char16_t *srcChars,
790	uint32_t options) const;
791
792	/**
793	* Compare two strings case-insensitively using full case folding.
794	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
795	*
796	* @param start The start offset in this string at which the compare operation begins.
797	* @param length The number of code units from this string to compare.
798	* @param srcChars A pointer to another string to compare this one to.
799	* @param srcStart The start offset in that string at which the compare operation begins.
800	* @param srcLength The number of code units from that string to compare.
801	* @param options A bit set of options:
802	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
803	* Comparison in code unit order with default case folding.
804	*
805	* - U_COMPARE_CODE_POINT_ORDER
806	* Set to choose code point order instead of code unit order
807	* (see u_strCompare for details).
808	*
809	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
810	*
811	* @return A negative, zero, or positive integer indicating the comparison result.
812	* @stable ICU 2.0
813	*/
814	inline int8_t caseCompare(int32_t start,
815	int32_t length,
816	const char16_t *srcChars,
817	int32_t srcStart,
818	int32_t srcLength,
819	uint32_t options) const;
820
821	/**
822	* Compare two strings case-insensitively using full case folding.
823	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
824	*
825	* @param start The start offset in this string at which the compare operation begins.
826	* @param limit The offset after the last code unit from this string to compare.
827	* @param srcText Another string to compare this one to.
828	* @param srcStart The start offset in that string at which the compare operation begins.
829	* @param srcLimit The offset after the last code unit from that string to compare.
830	* @param options A bit set of options:
831	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
832	* Comparison in code unit order with default case folding.
833	*
834	* - U_COMPARE_CODE_POINT_ORDER
835	* Set to choose code point order instead of code unit order
836	* (see u_strCompare for details).
837	*
838	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
839	*
840	* @return A negative, zero, or positive integer indicating the comparison result.
841	* @stable ICU 2.0
842	*/
843	inline int8_t caseCompareBetween(int32_t start,
844	int32_t limit,
845	const UnicodeString& srcText,
846	int32_t srcStart,
847	int32_t srcLimit,
848	uint32_t options) const;
849
850	/**
851	* Determine if this starts with the characters in <TT>text</TT>
852	* @param text The text to match.
853	* @return TRUE if this starts with the characters in <TT>text</TT>,
854	* FALSE otherwise
855	* @stable ICU 2.0
856	*/
857	inline UBool startsWith(const UnicodeString& text) const;
858
859	/**
860	* Determine if this starts with the characters in <TT>srcText</TT>
861	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
862	* @param srcText The text to match.
863	* @param srcStart the offset into <TT>srcText</TT> to start matching
864	* @param srcLength the number of characters in <TT>srcText</TT> to match
865	* @return TRUE if this starts with the characters in <TT>text</TT>,
866	* FALSE otherwise
867	* @stable ICU 2.0
868	*/
869	inline UBool startsWith(const UnicodeString& srcText,
870	int32_t srcStart,
871	int32_t srcLength) const;
872
873	/**
874	* Determine if this starts with the characters in <TT>srcChars</TT>
875	* @param srcChars The characters to match.
876	* @param srcLength the number of characters in <TT>srcChars</TT>
877	* @return TRUE if this starts with the characters in <TT>srcChars</TT>,
878	* FALSE otherwise
879	* @stable ICU 2.0
880	*/
881	inline UBool startsWith(ConstChar16Ptr srcChars,
882	int32_t srcLength) const;
883
884	/**
885	* Determine if this ends with the characters in <TT>srcChars</TT>
886	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
887	* @param srcChars The characters to match.
888	* @param srcStart the offset into <TT>srcText</TT> to start matching
889	* @param srcLength the number of characters in <TT>srcChars</TT> to match
890	* @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
891	* @stable ICU 2.0
892	*/
893	inline UBool startsWith(const char16_t *srcChars,
894	int32_t srcStart,
895	int32_t srcLength) const;
896
897	/**
898	* Determine if this ends with the characters in <TT>text</TT>
899	* @param text The text to match.
900	* @return TRUE if this ends with the characters in <TT>text</TT>,
901	* FALSE otherwise
902	* @stable ICU 2.0
903	*/
904	inline UBool endsWith(const UnicodeString& text) const;
905
906	/**
907	* Determine if this ends with the characters in <TT>srcText</TT>
908	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
909	* @param srcText The text to match.
910	* @param srcStart the offset into <TT>srcText</TT> to start matching
911	* @param srcLength the number of characters in <TT>srcText</TT> to match
912	* @return TRUE if this ends with the characters in <TT>text</TT>,
913	* FALSE otherwise
914	* @stable ICU 2.0
915	*/
916	inline UBool endsWith(const UnicodeString& srcText,
917	int32_t srcStart,
918	int32_t srcLength) const;
919
920	/**
921	* Determine if this ends with the characters in <TT>srcChars</TT>
922	* @param srcChars The characters to match.
923	* @param srcLength the number of characters in <TT>srcChars</TT>
924	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
925	* FALSE otherwise
926	* @stable ICU 2.0
927	*/
928	inline UBool endsWith(ConstChar16Ptr srcChars,
929	int32_t srcLength) const;
930
931	/**
932	* Determine if this ends with the characters in <TT>srcChars</TT>
933	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
934	* @param srcChars The characters to match.
935	* @param srcStart the offset into <TT>srcText</TT> to start matching
936	* @param srcLength the number of characters in <TT>srcChars</TT> to match
937	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
938	* FALSE otherwise
939	* @stable ICU 2.0
940	*/
941	inline UBool endsWith(const char16_t *srcChars,
942	int32_t srcStart,
943	int32_t srcLength) const;
944
945
946	/ Searching - bitwise only /
947
948	/**
949	* Locate in this the first occurrence of the characters in <TT>text</TT>,
950	* using bitwise comparison.
951	* @param text The text to search for.
952	* @return The offset into this of the start of <TT>text</TT>,
953	* or -1 if not found.
954	* @stable ICU 2.0
955	*/
956	inline int32_t indexOf(const UnicodeString& text) const;
957
958	/**
959	* Locate in this the first occurrence of the characters in <TT>text</TT>
960	* starting at offset <TT>start</TT>, using bitwise comparison.
961	* @param text The text to search for.
962	* @param start The offset at which searching will start.
963	* @return The offset into this of the start of <TT>text</TT>,
964	* or -1 if not found.
965	* @stable ICU 2.0
966	*/
967	inline int32_t indexOf(const UnicodeString& text,
968	int32_t start) const;
969
970	/**
971	* Locate in this the first occurrence in the range
972	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
973	* in <TT>text</TT>, using bitwise comparison.
974	* @param text The text to search for.
975	* @param start The offset at which searching will start.
976	* @param length The number of characters to search
977	* @return The offset into this of the start of <TT>text</TT>,
978	* or -1 if not found.
979	* @stable ICU 2.0
980	*/
981	inline int32_t indexOf(const UnicodeString& text,
982	int32_t start,
983	int32_t length) const;
984
985	/**
986	* Locate in this the first occurrence in the range
987	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
988	* in <TT>srcText</TT> in the range
989	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
990	* using bitwise comparison.
991	* @param srcText The text to search for.
992	* @param srcStart the offset into <TT>srcText</TT> at which
993	* to start matching
994	* @param srcLength the number of characters in <TT>srcText</TT> to match
995	* @param start the offset into this at which to start matching
996	* @param length the number of characters in this to search
997	* @return The offset into this of the start of <TT>text</TT>,
998	* or -1 if not found.
999	* @stable ICU 2.0
1000	*/
1001	inline int32_t indexOf(const UnicodeString& srcText,
1002	int32_t srcStart,
1003	int32_t srcLength,
1004	int32_t start,
1005	int32_t length) const;
1006
1007	/**
1008	* Locate in this the first occurrence of the characters in
1009	* <TT>srcChars</TT>
1010	* starting at offset <TT>start</TT>, using bitwise comparison.
1011	* @param srcChars The text to search for.
1012	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1013	* @param start the offset into this at which to start matching
1014	* @return The offset into this of the start of <TT>text</TT>,
1015	* or -1 if not found.
1016	* @stable ICU 2.0
1017	*/
1018	inline int32_t indexOf(const char16_t *srcChars,
1019	int32_t srcLength,
1020	int32_t start) const;
1021
1022	/**
1023	* Locate in this the first occurrence in the range
1024	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1025	* in <TT>srcChars</TT>, using bitwise comparison.
1026	* @param srcChars The text to search for.
1027	* @param srcLength the number of characters in <TT>srcChars</TT>
1028	* @param start The offset at which searching will start.
1029	* @param length The number of characters to search
1030	* @return The offset into this of the start of <TT>srcChars</TT>,
1031	* or -1 if not found.
1032	* @stable ICU 2.0
1033	*/
1034	inline int32_t indexOf(ConstChar16Ptr srcChars,
1035	int32_t srcLength,
1036	int32_t start,
1037	int32_t length) const;
1038
1039	/**
1040	* Locate in this the first occurrence in the range
1041	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1042	* in <TT>srcChars</TT> in the range
1043	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1044	* using bitwise comparison.
1045	* @param srcChars The text to search for.
1046	* @param srcStart the offset into <TT>srcChars</TT> at which
1047	* to start matching
1048	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1049	* @param start the offset into this at which to start matching
1050	* @param length the number of characters in this to search
1051	* @return The offset into this of the start of <TT>text</TT>,
1052	* or -1 if not found.
1053	* @stable ICU 2.0
1054	*/
1055	int32_t indexOf(const char16_t *srcChars,
1056	int32_t srcStart,
1057	int32_t srcLength,
1058	int32_t start,
1059	int32_t length) const;
1060
1061	/**
1062	* Locate in this the first occurrence of the BMP code point <code>c</code>,
1063	* using bitwise comparison.
1064	* @param c The code unit to search for.
1065	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1066	* @stable ICU 2.0
1067	*/
1068	inline int32_t indexOf(char16_t c) const;
1069
1070	/**
1071	* Locate in this the first occurrence of the code point <TT>c</TT>,
1072	* using bitwise comparison.
1073	*
1074	* @param c The code point to search for.
1075	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1076	* @stable ICU 2.0
1077	*/
1078	inline int32_t indexOf(UChar32 c) const;
1079
1080	/**
1081	* Locate in this the first occurrence of the BMP code point <code>c</code>,
1082	* starting at offset <TT>start</TT>, using bitwise comparison.
1083	* @param c The code unit to search for.
1084	* @param start The offset at which searching will start.
1085	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1086	* @stable ICU 2.0
1087	*/
1088	inline int32_t indexOf(char16_t c,
1089	int32_t start) const;
1090
1091	/**
1092	* Locate in this the first occurrence of the code point <TT>c</TT>
1093	* starting at offset <TT>start</TT>, using bitwise comparison.
1094	*
1095	* @param c The code point to search for.
1096	* @param start The offset at which searching will start.
1097	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1098	* @stable ICU 2.0
1099	*/
1100	inline int32_t indexOf(UChar32 c,
1101	int32_t start) const;
1102
1103	/**
1104	* Locate in this the first occurrence of the BMP code point <code>c</code>
1105	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1106	* using bitwise comparison.
1107	* @param c The code unit to search for.
1108	* @param start the offset into this at which to start matching
1109	* @param length the number of characters in this to search
1110	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1111	* @stable ICU 2.0
1112	*/
1113	inline int32_t indexOf(char16_t c,
1114	int32_t start,
1115	int32_t length) const;
1116
1117	/**
1118	* Locate in this the first occurrence of the code point <TT>c</TT>
1119	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1120	* using bitwise comparison.
1121	*
1122	* @param c The code point to search for.
1123	* @param start the offset into this at which to start matching
1124	* @param length the number of characters in this to search
1125	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1126	* @stable ICU 2.0
1127	*/
1128	inline int32_t indexOf(UChar32 c,
1129	int32_t start,
1130	int32_t length) const;
1131
1132	/**
1133	* Locate in this the last occurrence of the characters in <TT>text</TT>,
1134	* using bitwise comparison.
1135	* @param text The text to search for.
1136	* @return The offset into this of the start of <TT>text</TT>,
1137	* or -1 if not found.
1138	* @stable ICU 2.0
1139	*/
1140	inline int32_t lastIndexOf(const UnicodeString& text) const;
1141
1142	/**
1143	* Locate in this the last occurrence of the characters in <TT>text</TT>
1144	* starting at offset <TT>start</TT>, using bitwise comparison.
1145	* @param text The text to search for.
1146	* @param start The offset at which searching will start.
1147	* @return The offset into this of the start of <TT>text</TT>,
1148	* or -1 if not found.
1149	* @stable ICU 2.0
1150	*/
1151	inline int32_t lastIndexOf(const UnicodeString& text,
1152	int32_t start) const;
1153
1154	/**
1155	* Locate in this the last occurrence in the range
1156	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1157	* in <TT>text</TT>, using bitwise comparison.
1158	* @param text The text to search for.
1159	* @param start The offset at which searching will start.
1160	* @param length The number of characters to search
1161	* @return The offset into this of the start of <TT>text</TT>,
1162	* or -1 if not found.
1163	* @stable ICU 2.0
1164	*/
1165	inline int32_t lastIndexOf(const UnicodeString& text,
1166	int32_t start,
1167	int32_t length) const;
1168
1169	/**
1170	* Locate in this the last occurrence in the range
1171	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1172	* in <TT>srcText</TT> in the range
1173	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1174	* using bitwise comparison.
1175	* @param srcText The text to search for.
1176	* @param srcStart the offset into <TT>srcText</TT> at which
1177	* to start matching
1178	* @param srcLength the number of characters in <TT>srcText</TT> to match
1179	* @param start the offset into this at which to start matching
1180	* @param length the number of characters in this to search
1181	* @return The offset into this of the start of <TT>text</TT>,
1182	* or -1 if not found.
1183	* @stable ICU 2.0
1184	*/
1185	inline int32_t lastIndexOf(const UnicodeString& srcText,
1186	int32_t srcStart,
1187	int32_t srcLength,
1188	int32_t start,
1189	int32_t length) const;
1190
1191	/**
1192	* Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1193	* starting at offset <TT>start</TT>, using bitwise comparison.
1194	* @param srcChars The text to search for.
1195	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1196	* @param start the offset into this at which to start matching
1197	* @return The offset into this of the start of <TT>text</TT>,
1198	* or -1 if not found.
1199	* @stable ICU 2.0
1200	*/
1201	inline int32_t lastIndexOf(const char16_t *srcChars,
1202	int32_t srcLength,
1203	int32_t start) const;
1204
1205	/**
1206	* Locate in this the last occurrence in the range
1207	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1208	* in <TT>srcChars</TT>, using bitwise comparison.
1209	* @param srcChars The text to search for.
1210	* @param srcLength the number of characters in <TT>srcChars</TT>
1211	* @param start The offset at which searching will start.
1212	* @param length The number of characters to search
1213	* @return The offset into this of the start of <TT>srcChars</TT>,
1214	* or -1 if not found.
1215	* @stable ICU 2.0
1216	*/
1217	inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1218	int32_t srcLength,
1219	int32_t start,
1220	int32_t length) const;
1221
1222	/**
1223	* Locate in this the last occurrence in the range
1224	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1225	* in <TT>srcChars</TT> in the range
1226	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1227	* using bitwise comparison.
1228	* @param srcChars The text to search for.
1229	* @param srcStart the offset into <TT>srcChars</TT> at which
1230	* to start matching
1231	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1232	* @param start the offset into this at which to start matching
1233	* @param length the number of characters in this to search
1234	* @return The offset into this of the start of <TT>text</TT>,
1235	* or -1 if not found.
1236	* @stable ICU 2.0
1237	*/
1238	int32_t lastIndexOf(const char16_t *srcChars,
1239	int32_t srcStart,
1240	int32_t srcLength,
1241	int32_t start,
1242	int32_t length) const;
1243
1244	/**
1245	* Locate in this the last occurrence of the BMP code point <code>c</code>,
1246	* using bitwise comparison.
1247	* @param c The code unit to search for.
1248	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1249	* @stable ICU 2.0
1250	*/
1251	inline int32_t lastIndexOf(char16_t c) const;
1252
1253	/**
1254	* Locate in this the last occurrence of the code point <TT>c</TT>,
1255	* using bitwise comparison.
1256	*
1257	* @param c The code point to search for.
1258	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1259	* @stable ICU 2.0
1260	*/
1261	inline int32_t lastIndexOf(UChar32 c) const;
1262
1263	/**
1264	* Locate in this the last occurrence of the BMP code point <code>c</code>
1265	* starting at offset <TT>start</TT>, using bitwise comparison.
1266	* @param c The code unit to search for.
1267	* @param start The offset at which searching will start.
1268	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1269	* @stable ICU 2.0
1270	*/
1271	inline int32_t lastIndexOf(char16_t c,
1272	int32_t start) const;
1273
1274	/**
1275	* Locate in this the last occurrence of the code point <TT>c</TT>
1276	* starting at offset <TT>start</TT>, using bitwise comparison.
1277	*
1278	* @param c The code point to search for.
1279	* @param start The offset at which searching will start.
1280	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1281	* @stable ICU 2.0
1282	*/
1283	inline int32_t lastIndexOf(UChar32 c,
1284	int32_t start) const;
1285
1286	/**
1287	* Locate in this the last occurrence of the BMP code point <code>c</code>
1288	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1289	* using bitwise comparison.
1290	* @param c The code unit to search for.
1291	* @param start the offset into this at which to start matching
1292	* @param length the number of characters in this to search
1293	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1294	* @stable ICU 2.0
1295	*/
1296	inline int32_t lastIndexOf(char16_t c,
1297	int32_t start,
1298	int32_t length) const;
1299
1300	/**
1301	* Locate in this the last occurrence of the code point <TT>c</TT>
1302	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1303	* using bitwise comparison.
1304	*
1305	* @param c The code point to search for.
1306	* @param start the offset into this at which to start matching
1307	* @param length the number of characters in this to search
1308	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1309	* @stable ICU 2.0
1310	*/
1311	inline int32_t lastIndexOf(UChar32 c,
1312	int32_t start,
1313	int32_t length) const;
1314
1315
1316	/ Character access /
1317
1318	/**
1319	* Return the code unit at offset <tt>offset</tt>.
1320	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1321	* @param offset a valid offset into the text
1322	* @return the code unit at offset <tt>offset</tt>
1323	* or 0xffff if the offset is not valid for this string
1324	* @stable ICU 2.0
1325	*/
1326	inline char16_t charAt(int32_t offset) const;
1327
1328	/**
1329	* Return the code unit at offset <tt>offset</tt>.
1330	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1331	* @param offset a valid offset into the text
1332	* @return the code unit at offset <tt>offset</tt>
1333	* @stable ICU 2.0
1334	*/
1335	inline char16_t operator[] (int32_t offset) const;
1336
1337	/**
1338	* Return the code point that contains the code unit
1339	* at offset <tt>offset</tt>.
1340	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1341	* @param offset a valid offset into the text
1342	* that indicates the text offset of any of the code units
1343	* that will be assembled into a code point (21-bit value) and returned
1344	* @return the code point of text at <tt>offset</tt>
1345	* or 0xffff if the offset is not valid for this string
1346	* @stable ICU 2.0
1347	*/
1348	UChar32 char32At(int32_t offset) const;
1349
1350	/**
1351	* Adjust a random-access offset so that
1352	* it points to the beginning of a Unicode character.
1353	* The offset that is passed in points to
1354	* any code unit of a code point,
1355	* while the returned offset will point to the first code unit
1356	* of the same code point.
1357	* In UTF-16, if the input offset points to a second surrogate
1358	* of a surrogate pair, then the returned offset will point
1359	* to the first surrogate.
1360	* @param offset a valid offset into one code point of the text
1361	* @return offset of the first code unit of the same code point
1362	* @see U16_SET_CP_START
1363	* @stable ICU 2.0
1364	*/
1365	int32_t getChar32Start(int32_t offset) const;
1366
1367	/**
1368	* Adjust a random-access offset so that
1369	* it points behind a Unicode character.
1370	* The offset that is passed in points behind
1371	* any code unit of a code point,
1372	* while the returned offset will point behind the last code unit
1373	* of the same code point.
1374	* In UTF-16, if the input offset points behind the first surrogate
1375	* (i.e., to the second surrogate)
1376	* of a surrogate pair, then the returned offset will point
1377	* behind the second surrogate (i.e., to the first surrogate).
1378	* @param offset a valid offset after any code unit of a code point of the text
1379	* @return offset of the first code unit after the same code point
1380	* @see U16_SET_CP_LIMIT
1381	* @stable ICU 2.0
1382	*/
1383	int32_t getChar32Limit(int32_t offset) const;
1384
1385	/**
1386	* Move the code unit index along the string by delta code points.
1387	* Interpret the input index as a code unit-based offset into the string,
1388	* move the index forward or backward by delta code points, and
1389	* return the resulting index.
1390	* The input index should point to the first code unit of a code point,
1391	* if there is more than one.
1392	*
1393	* Both input and output indexes are code unit-based as for all
1394	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
1395	* If delta<0 then the index is moved backward (toward the start of the string).
1396	* If delta>0 then the index is moved forward (toward the end of the string).
1397	*
1398	* This behaves like CharacterIterator::move32(delta, kCurrent).
1399	*
1400	* Behavior for out-of-bounds indexes:
1401	* <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1402	* if the input index<0 then it is pinned to 0;
1403	* if it is index>length() then it is pinned to length().
1404	* Afterwards, the index is moved by <code>delta</code> code points
1405	* forward or backward,
1406	* but no further backward than to 0 and no further forward than to length().
1407	* The resulting index return value will be in between 0 and length(), inclusively.
1408	*
1409	* Examples:
1410	* <pre>
1411	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
1412	* UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1413	*
1414	* // initial index: position of U+10000
1415	* int32_t index=1;
1416	*
1417	* // the following examples will all result in index==4, position of U+10ffff
1418	*
1419	* // skip 2 code points from some position in the string
1420	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1421	*
1422	* // go to the 3rd code point from the start of s (0-based)
1423	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1424	*
1425	* // go to the next-to-last code point of s
1426	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1427	* </pre>
1428	*
1429	* @param index input code unit index
1430	* @param delta (signed) code point count to move the index forward or backward
1431	* in the string
1432	* @return the resulting code unit index
1433	* @stable ICU 2.0
1434	*/
1435	int32_t moveIndex32(int32_t index, int32_t delta) const;
1436
1437	/ Substring extraction /
1438
1439	/**
1440	* Copy the characters in the range
1441	* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1442	* beginning at <tt>dstStart</tt>.
1443	* If the string aliases to <code>dst</code> itself as an external buffer,
1444	* then extract() will not copy the contents.
1445	*
1446	* @param start offset of first character which will be copied into the array
1447	* @param length the number of characters to extract
1448	* @param dst array in which to copy characters. The length of <tt>dst</tt>
1449	* must be at least (<tt>dstStart + length</tt>).
1450	* @param dstStart the offset in <TT>dst</TT> where the first character
1451	* will be extracted
1452	* @stable ICU 2.0
1453	*/
1454	inline void extract(int32_t start,
1455	int32_t length,
1456	Char16Ptr dst,
1457	int32_t dstStart = `0`) const;
1458
1459	/**
1460	* Copy the contents of the string into dest.
1461	* This is a convenience function that
1462	* checks if there is enough space in dest,
1463	* extracts the entire string if possible,
1464	* and NUL-terminates dest if possible.
1465	*
1466	* If the string fits into dest but cannot be NUL-terminated
1467	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1468	* If the string itself does not fit into dest
1469	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1470	*
1471	* If the string aliases to <code>dest</code> itself as an external buffer,
1472	* then extract() will not copy the contents.
1473	*
1474	* @param dest Destination string buffer.
1475	* @param destCapacity Number of char16_ts available at dest.
1476	* @param errorCode ICU error code.
1477	* @return length()
1478	* @stable ICU 2.0
1479	*/
1480	int32_t
1481	extract(Char16Ptr dest, int32_t destCapacity,
1482	UErrorCode &errorCode) const;
1483
1484	/**
1485	* Copy the characters in the range
1486	* [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1487	* <tt>target</tt>.
1488	* @param start offset of first character which will be copied
1489	* @param length the number of characters to extract
1490	* @param target UnicodeString into which to copy characters.
1491	* @return A reference to <TT>target</TT>
1492	* @stable ICU 2.0
1493	*/
1494	inline void extract(int32_t start,
1495	int32_t length,
1496	UnicodeString& target) const;
1497
1498	/**
1499	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1500	* into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1501	* @param start offset of first character which will be copied into the array
1502	* @param limit offset immediately following the last character to be copied
1503	* @param dst array in which to copy characters. The length of <tt>dst</tt>
1504	* must be at least (<tt>dstStart + (limit - start)</tt>).
1505	* @param dstStart the offset in <TT>dst</TT> where the first character
1506	* will be extracted
1507	* @stable ICU 2.0
1508	*/
1509	inline void extractBetween(int32_t start,
1510	int32_t limit,
1511	char16_t *dst,
1512	int32_t dstStart = `0`) const;
1513
1514	/**
1515	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1516	* into the UnicodeString <tt>target</tt>. Replaceable API.
1517	* @param start offset of first character which will be copied
1518	* @param limit offset immediately following the last character to be copied
1519	* @param target UnicodeString into which to copy characters.
1520	* @return A reference to <TT>target</TT>
1521	* @stable ICU 2.0
1522	*/
1523	virtual void extractBetween(int32_t start,
1524	int32_t limit,
1525	UnicodeString& target) const;
1526
1527	/**
1528	* Copy the characters in the range
1529	* [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1530	* All characters must be invariant (see utypes.h).
1531	* Use US_INV as the last, signature-distinguishing parameter.
1532	*
1533	* This function does not write any more than <code>targetCapacity</code>
1534	* characters but returns the length of the entire output string
1535	* so that one can allocate a larger buffer and call the function again
1536	* if necessary.
1537	* The output string is NUL-terminated if possible.
1538	*
1539	* @param start offset of first character which will be copied
1540	* @param startLength the number of characters to extract
1541	* @param target the target buffer for extraction, can be NULL
1542	* if targetLength is 0
1543	* @param targetCapacity the length of the target buffer
1544	* @param inv Signature-distinguishing paramater, use US_INV.
1545	* @return the output string length, not including the terminating NUL
1546	* @stable ICU 3.2
1547	*/
1548	int32_t extract(int32_t start,
1549	int32_t startLength,
1550	char *target,
1551	int32_t targetCapacity,
1552	enum EInvariant inv) const;
1553
1554	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
1555
1556	/**
1557	* Copy the characters in the range
1558	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1559	* in the platform's default codepage.
1560	* This function does not write any more than <code>targetLength</code>
1561	* characters but returns the length of the entire output string
1562	* so that one can allocate a larger buffer and call the function again
1563	* if necessary.
1564	* The output string is NUL-terminated if possible.
1565	*
1566	* @param start offset of first character which will be copied
1567	* @param startLength the number of characters to extract
1568	* @param target the target buffer for extraction
1569	* @param targetLength the length of the target buffer
1570	* If <TT>target</TT> is NULL, then the number of bytes required for
1571	* <TT>target</TT> is returned.
1572	* @return the output string length, not including the terminating NUL
1573	* @stable ICU 2.0
1574	*/
1575	int32_t extract(int32_t start,
1576	int32_t startLength,
1577	char *target,
1578	uint32_t targetLength) const;
1579
1580	#endif
1581
1582	#if !UCONFIG_NO_CONVERSION
1583
1584	/**
1585	* Copy the characters in the range
1586	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1587	* in a specified codepage.
1588	* The output string is NUL-terminated.
1589	*
1590	* Recommendation: For invariant-character strings use
1591	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1592	* because it avoids object code dependencies of UnicodeString on
1593	* the conversion code.
1594	*
1595	* @param start offset of first character which will be copied
1596	* @param startLength the number of characters to extract
1597	* @param target the target buffer for extraction
1598	* @param codepage the desired codepage for the characters. 0 has
1599	* the special meaning of the default codepage
1600	* If <code>codepage</code> is an empty string (<code>""</code>),
1601	* then a simple conversion is performed on the codepage-invariant
1602	* subset ("invariant characters") of the platform encoding. See utypes.h.
1603	* If <TT>target</TT> is NULL, then the number of bytes required for
1604	* <TT>target</TT> is returned. It is assumed that the target is big enough
1605	* to fit all of the characters.
1606	* @return the output string length, not including the terminating NUL
1607	* @stable ICU 2.0
1608	*/
1609	inline int32_t extract(int32_t start,
1610	int32_t startLength,
1611	char *target,
1612	const char codepage = `0`) const*;
1613
1614	/**
1615	* Copy the characters in the range
1616	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1617	* in a specified codepage.
1618	* This function does not write any more than <code>targetLength</code>
1619	* characters but returns the length of the entire output string
1620	* so that one can allocate a larger buffer and call the function again
1621	* if necessary.
1622	* The output string is NUL-terminated if possible.
1623	*
1624	* Recommendation: For invariant-character strings use
1625	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1626	* because it avoids object code dependencies of UnicodeString on
1627	* the conversion code.
1628	*
1629	* @param start offset of first character which will be copied
1630	* @param startLength the number of characters to extract
1631	* @param target the target buffer for extraction
1632	* @param targetLength the length of the target buffer
1633	* @param codepage the desired codepage for the characters. 0 has
1634	* the special meaning of the default codepage
1635	* If <code>codepage</code> is an empty string (<code>""</code>),
1636	* then a simple conversion is performed on the codepage-invariant
1637	* subset ("invariant characters") of the platform encoding. See utypes.h.
1638	* If <TT>target</TT> is NULL, then the number of bytes required for
1639	* <TT>target</TT> is returned.
1640	* @return the output string length, not including the terminating NUL
1641	* @stable ICU 2.0
1642	*/
1643	int32_t extract(int32_t start,
1644	int32_t startLength,
1645	char *target,
1646	uint32_t targetLength,
1647	const char codepage) const*;
1648
1649	/**
1650	* Convert the UnicodeString into a codepage string using an existing UConverter.
1651	* The output string is NUL-terminated if possible.
1652	*
1653	* This function avoids the overhead of opening and closing a converter if
1654	* multiple strings are extracted.
1655	*
1656	* @param dest destination string buffer, can be NULL if destCapacity==0
1657	* @param destCapacity the number of chars available at dest
1658	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1659	* or NULL for the default converter
1660	* @param errorCode normal ICU error code
1661	* @return the length of the output string, not counting the terminating NUL;
1662	* if the length is greater than destCapacity, then the string will not fit
1663	* and a buffer of the indicated length would need to be passed in
1664	* @stable ICU 2.0
1665	*/
1666	int32_t extract(char *dest, int32_t destCapacity,
1667	UConverter *cnv,
1668	UErrorCode &errorCode) const;
1669
1670	#endif
1671
1672	/**
1673	* Create a temporary substring for the specified range.
1674	* Unlike the substring constructor and setTo() functions,
1675	* the object returned here will be a read-only alias (using getBuffer())
1676	* rather than copying the text.
1677	* As a result, this substring operation is much faster but requires
1678	* that the original string not be modified or deleted during the lifetime
1679	* of the returned substring object.
1680	* @param start offset of the first character visible in the substring
1681	* @param length length of the substring
1682	* @return a read-only alias UnicodeString object for the substring
1683	* @stable ICU 4.4
1684	*/
1685	UnicodeString tempSubString(int32_t start=`0`, int32_t length=INT32_MAX) const;
1686
1687	/**
1688	* Create a temporary substring for the specified range.
1689	* Same as tempSubString(start, length) except that the substring range
1690	* is specified as a (start, limit) pair (with an exclusive limit index)
1691	* rather than a (start, length) pair.
1692	* @param start offset of the first character visible in the substring
1693	* @param limit offset immediately following the last character visible in the substring
1694	* @return a read-only alias UnicodeString object for the substring
1695	* @stable ICU 4.4
1696	*/
1697	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1698
1699	/**
1700	* Convert the UnicodeString to UTF-8 and write the result
1701	* to a ByteSink. This is called by toUTF8String().
1702	* Unpaired surrogates are replaced with U+FFFD.
1703	* Calls u_strToUTF8WithSub().
1704	*
1705	* @param sink A ByteSink to which the UTF-8 version of the string is written.
1706	* sink.Flush() is called at the end.
1707	* @stable ICU 4.2
1708	* @see toUTF8String
1709	*/
1710	void toUTF8(ByteSink &sink) const;
1711
1712	/**
1713	* Convert the UnicodeString to UTF-8 and append the result
1714	* to a standard string.
1715	* Unpaired surrogates are replaced with U+FFFD.
1716	* Calls toUTF8().
1717	*
1718	* @param result A standard string (or a compatible object)
1719	* to which the UTF-8 version of the string is appended.
1720	* @return The string object.
1721	* @stable ICU 4.2
1722	* @see toUTF8
1723	*/
1724	template<typename StringClass>
1725	StringClass &toUTF8String(StringClass &result) const {
1726	StringByteSink<StringClass> sbs(&result, length());
1727	toUTF8(sbs);
1728	return result;
1729	}
1730
1731	/**
1732	* Convert the UnicodeString to UTF-32.
1733	* Unpaired surrogates are replaced with U+FFFD.
1734	* Calls u_strToUTF32WithSub().
1735	*
1736	* @param utf32 destination string buffer, can be NULL if capacity==0
1737	* @param capacity the number of UChar32s available at utf32
1738	* @param errorCode Standard ICU error code. Its input value must
1739	* pass the U_SUCCESS() test, or else the function returns
1740	* immediately. Check for U_FAILURE() on output or use with
1741	* function chaining. (See User Guide for details.)
1742	* @return The length of the UTF-32 string.
1743	* @see fromUTF32
1744	* @stable ICU 4.2
1745	*/
1746	int32_t toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const*;
1747
1748	/ Length operations /
1749
1750	/**
1751	* Return the length of the UnicodeString object.
1752	* The length is the number of char16_t code units are in the UnicodeString.
1753	* If you want the number of code points, please use countChar32().
1754	* @return the length of the UnicodeString object
1755	* @see countChar32
1756	* @stable ICU 2.0
1757	*/
1758	inline int32_t length(void) const;
1759
1760	/**
1761	* Count Unicode code points in the length char16_t code units of the string.
1762	* A code point may occupy either one or two char16_t code units.
1763	* Counting code points involves reading all code units.
1764	*
1765	* This functions is basically the inverse of moveIndex32().
1766	*
1767	* @param start the index of the first code unit to check
1768	* @param length the number of char16_t code units to check
1769	* @return the number of code points in the specified code units
1770	* @see length
1771	* @stable ICU 2.0
1772	*/
1773	int32_t
1774	countChar32(int32_t start=`0`, int32_t length=INT32_MAX) const;
1775
1776	/**
1777	* Check if the length char16_t code units of the string
1778	* contain more Unicode code points than a certain number.
1779	* This is more efficient than counting all code points in this part of the string
1780	* and comparing that number with a threshold.
1781	* This function may not need to scan the string at all if the length
1782	* falls within a certain range, and
1783	* never needs to count more than 'number+1' code points.
1784	* Logically equivalent to (countChar32(start, length)>number).
1785	* A Unicode code point may occupy either one or two char16_t code units.
1786	*
1787	* @param start the index of the first code unit to check (0 for the entire string)
1788	* @param length the number of char16_t code units to check
1789	* (use INT32_MAX for the entire string; remember that start/length
1790	* values are pinned)
1791	* @param number The number of code points in the (sub)string is compared against
1792	* the 'number' parameter.
1793	* @return Boolean value for whether the string contains more Unicode code points
1794	* than 'number'. Same as (u_countChar32(s, length)>number).
1795	* @see countChar32
1796	* @see u_strHasMoreChar32Than
1797	* @stable ICU 2.4
1798	*/
1799	UBool
1800	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1801
1802	/**
1803	* Determine if this string is empty.
1804	* @return TRUE if this string contains 0 characters, FALSE otherwise.
1805	* @stable ICU 2.0
1806	*/
1807	inline UBool isEmpty(void) const;
1808
1809	/**
1810	* Return the capacity of the internal buffer of the UnicodeString object.
1811	* This is useful together with the getBuffer functions.
1812	* See there for details.
1813	*
1814	* @return the number of char16_ts available in the internal buffer
1815	* @see getBuffer
1816	* @stable ICU 2.0
1817	*/
1818	inline int32_t getCapacity(void) const;
1819
1820	/ Other operations /
1821
1822	/**
1823	* Generate a hash code for this object.
1824	* @return The hash code of this UnicodeString.
1825	* @stable ICU 2.0
1826	*/
1827	inline int32_t hashCode(void) const;
1828
1829	/**
1830	* Determine if this object contains a valid string.
1831	* A bogus string has no value. It is different from an empty string,
1832	* although in both cases isEmpty() returns TRUE and length() returns 0.
1833	* setToBogus() and isBogus() can be used to indicate that no string value is available.
1834	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1835	* length() returns 0.
1836	*
1837	* @return TRUE if the string is bogus/invalid, FALSE otherwise
1838	* @see setToBogus()
1839	* @stable ICU 2.0
1840	*/
1841	inline UBool isBogus(void) const;
1842
1843
1844	//========================================
1845	// Write operations
1846	//========================================
1847
1848	/ Assignment operations /
1849
1850	/**
1851	* Assignment operator. Replace the characters in this UnicodeString
1852	* with the characters from <TT>srcText</TT>.
1853	*
1854	* Starting with ICU 2.4, the assignment operator and the copy constructor
1855	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1856	* By contrast, the fastCopyFrom() function implements the old,
1857	* more efficient but less safe behavior
1858	* of making this string also a readonly alias to the same buffer.
1859	*
1860	* If the source object has an "open" buffer from getBuffer(minCapacity),
1861	* then the copy is an empty string.
1862	*
1863	* @param srcText The text containing the characters to replace
1864	* @return a reference to this
1865	* @stable ICU 2.0
1866	* @see fastCopyFrom
1867	*/
1868	UnicodeString &operator=(const UnicodeString &srcText);
1869
1870	/**
1871	* Almost the same as the assignment operator.
1872	* Replace the characters in this UnicodeString
1873	* with the characters from <code>srcText</code>.
1874	*
1875	* This function works the same as the assignment operator
1876	* for all strings except for ones that are readonly aliases.
1877	*
1878	* Starting with ICU 2.4, the assignment operator and the copy constructor
1879	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1880	* This function implements the old, more efficient but less safe behavior
1881	* of making this string also a readonly alias to the same buffer.
1882	*
1883	* The fastCopyFrom function must be used only if it is known that the lifetime of
1884	* this UnicodeString does not exceed the lifetime of the aliased buffer
1885	* including its contents, for example for strings from resource bundles
1886	* or aliases to string constants.
1887	*
1888	* If the source object has an "open" buffer from getBuffer(minCapacity),
1889	* then the copy is an empty string.
1890	*
1891	* @param src The text containing the characters to replace.
1892	* @return a reference to this
1893	* @stable ICU 2.4
1894	*/
1895	UnicodeString &fastCopyFrom(const UnicodeString &src);
1896
1897	/**
1898	* Move assignment operator; might leave src in bogus state.
1899	* This string will have the same contents and state that the source string had.
1900	* The behavior is undefined if *this and src are the same object.
1901	* @param src source string
1902	* @return *this
1903	* @stable ICU 56
1904	*/
1905	UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1906	return moveFrom(src);
1907	}
1908
1909	// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
1910	/**
1911	* Move assignment; might leave src in bogus state.
1912	* This string will have the same contents and state that the source string had.
1913	* The behavior is undefined if *this and src are the same object.
1914	*
1915	* Can be called explicitly, does not need C++11 support.
1916	* @param src source string
1917	* @return *this
1918	* @draft ICU 56
1919	*/
1920	UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1921
1922	/**
1923	* Swap strings.
1924	* @param other other string
1925	* @stable ICU 56
1926	*/
1927	void swap(UnicodeString &other) U_NOEXCEPT;
1928
1929	/**
1930	* Non-member UnicodeString swap function.
1931	* @param s1 will get s2's contents and state
1932	* @param s2 will get s1's contents and state
1933	* @stable ICU 56
1934	*/
1935	friend U_COMMON_API inline void U_EXPORT2
1936	swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1937	s1.swap(s2);
1938	}
1939
1940	/**
1941	* Assignment operator. Replace the characters in this UnicodeString
1942	* with the code unit <TT>ch</TT>.
1943	* @param ch the code unit to replace
1944	* @return a reference to this
1945	* @stable ICU 2.0
1946	*/
1947	inline UnicodeString& operator= (char16_t ch);
1948
1949	/**
1950	* Assignment operator. Replace the characters in this UnicodeString
1951	* with the code point <TT>ch</TT>.
1952	* @param ch the code point to replace
1953	* @return a reference to this
1954	* @stable ICU 2.0
1955	*/
1956	inline UnicodeString& operator= (UChar32 ch);
1957
1958	/**
1959	* Set the text in the UnicodeString object to the characters
1960	* in <TT>srcText</TT> in the range
1961	* [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1962	* <TT>srcText</TT> is not modified.
1963	* @param srcText the source for the new characters
1964	* @param srcStart the offset into <TT>srcText</TT> where new characters
1965	* will be obtained
1966	* @return a reference to this
1967	* @stable ICU 2.2
1968	*/
1969	inline UnicodeString& setTo(const UnicodeString& srcText,
1970	int32_t srcStart);
1971
1972	/**
1973	* Set the text in the UnicodeString object to the characters
1974	* in <TT>srcText</TT> in the range
1975	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1976	* <TT>srcText</TT> is not modified.
1977	* @param srcText the source for the new characters
1978	* @param srcStart the offset into <TT>srcText</TT> where new characters
1979	* will be obtained
1980	* @param srcLength the number of characters in <TT>srcText</TT> in the
1981	* replace string.
1982	* @return a reference to this
1983	* @stable ICU 2.0
1984	*/
1985	inline UnicodeString& setTo(const UnicodeString& srcText,
1986	int32_t srcStart,
1987	int32_t srcLength);
1988
1989	/**
1990	* Set the text in the UnicodeString object to the characters in
1991	* <TT>srcText</TT>.
1992	* <TT>srcText</TT> is not modified.
1993	* @param srcText the source for the new characters
1994	* @return a reference to this
1995	* @stable ICU 2.0
1996	*/
1997	inline UnicodeString& setTo(const UnicodeString& srcText);
1998
1999	/**
2000	* Set the characters in the UnicodeString object to the characters
2001	* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2002	* @param srcChars the source for the new characters
2003	* @param srcLength the number of Unicode characters in srcChars.
2004	* @return a reference to this
2005	* @stable ICU 2.0
2006	*/
2007	inline UnicodeString& setTo(const char16_t *srcChars,
2008	int32_t srcLength);
2009
2010	/**
2011	* Set the characters in the UnicodeString object to the code unit
2012	* <TT>srcChar</TT>.
2013	* @param srcChar the code unit which becomes the UnicodeString's character
2014	* content
2015	* @return a reference to this
2016	* @stable ICU 2.0
2017	*/
2018	UnicodeString& setTo(char16_t srcChar);
2019
2020	/**
2021	* Set the characters in the UnicodeString object to the code point
2022	* <TT>srcChar</TT>.
2023	* @param srcChar the code point which becomes the UnicodeString's character
2024	* content
2025	* @return a reference to this
2026	* @stable ICU 2.0
2027	*/
2028	UnicodeString& setTo(UChar32 srcChar);
2029
2030	/**
2031	* Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2032	* The text will be used for the UnicodeString object, but
2033	* it will not be released when the UnicodeString is destroyed.
2034	* This has copy-on-write semantics:
2035	* When the string is modified, then the buffer is first copied into
2036	* newly allocated memory.
2037	* The aliased buffer is never modified.
2038	*
2039	* In an assignment to another UnicodeString, when using the copy constructor
2040	* or the assignment operator, the text will be copied.
2041	* When using fastCopyFrom(), the text will be aliased again,
2042	* so that both strings then alias the same readonly-text.
2043	*
2044	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2045	* This must be true if <code>textLength==-1</code>.
2046	* @param text The characters to alias for the UnicodeString.
2047	* @param textLength The number of Unicode characters in <code>text</code> to alias.
2048	* If -1, then this constructor will determine the length
2049	* by calling <code>u_strlen()</code>.
2050	* @return a reference to this
2051	* @stable ICU 2.0
2052	*/
2053	UnicodeString &setTo(UBool isTerminated,
2054	ConstChar16Ptr text,
2055	int32_t textLength);
2056
2057	/**
2058	* Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2059	* The text will be used for the UnicodeString object, but
2060	* it will not be released when the UnicodeString is destroyed.
2061	* This has write-through semantics:
2062	* For as long as the capacity of the buffer is sufficient, write operations
2063	* will directly affect the buffer. When more capacity is necessary, then
2064	* a new buffer will be allocated and the contents copied as with regularly
2065	* constructed strings.
2066	* In an assignment to another UnicodeString, the buffer will be copied.
2067	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2068	* as the string buffer itself and will in this case not copy the contents.
2069	*
2070	* @param buffer The characters to alias for the UnicodeString.
2071	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2072	* @param buffCapacity The size of <code>buffer</code> in char16_ts.
2073	* @return a reference to this
2074	* @stable ICU 2.0
2075	*/
2076	UnicodeString &setTo(char16_t *buffer,
2077	int32_t buffLength,
2078	int32_t buffCapacity);
2079
2080	/**
2081	* Make this UnicodeString object invalid.
2082	* The string will test TRUE with isBogus().
2083	*
2084	* A bogus string has no value. It is different from an empty string.
2085	* It can be used to indicate that no string value is available.
2086	* getBuffer() and getTerminatedBuffer() return NULL, and
2087	* length() returns 0.
2088	*
2089	* This utility function is used throughout the UnicodeString
2090	* implementation to indicate that a UnicodeString operation failed,
2091	* and may be used in other functions,
2092	* especially but not exclusively when such functions do not
2093	* take a UErrorCode for simplicity.
2094	*
2095	* The following methods, and no others, will clear a string object's bogus flag:
2096	* - remove()
2097	* - remove(0, INT32_MAX)
2098	* - truncate(0)
2099	* - operator=() (assignment operator)
2100	* - setTo(...)
2101	*
2102	* The simplest ways to turn a bogus string into an empty one
2103	* is to use the remove() function.
2104	* Examples for other functions that are equivalent to "set to empty string":
2105	* \code
2106	* if(s.isBogus()) {
2107	* s.remove(); // set to an empty string (remove all), or
2108	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2109	* s.truncate(0); // set to an empty string (complete truncation), or
2110	* s=UnicodeString(); // assign an empty string, or
2111	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2112	* static const char16_t nul=0;
2113	* s.setTo(&nul, 0); // set to an empty C Unicode string
2114	* }
2115	* \endcode
2116	*
2117	* @see isBogus()
2118	* @stable ICU 2.0
2119	*/
2120	void setToBogus();
2121
2122	/**
2123	* Set the character at the specified offset to the specified character.
2124	* @param offset A valid offset into the text of the character to set
2125	* @param ch The new character
2126	* @return A reference to this
2127	* @stable ICU 2.0
2128	*/
2129	UnicodeString& setCharAt(int32_t offset,
2130	char16_t ch);
2131
2132
2133	/ Append operations /
2134
2135	/**
2136	* Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2137	* object.
2138	* @param ch the code unit to be appended
2139	* @return a reference to this
2140	* @stable ICU 2.0
2141	*/
2142	inline UnicodeString& operator+= (char16_t ch);
2143
2144	/**
2145	* Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2146	* object.
2147	* @param ch the code point to be appended
2148	* @return a reference to this
2149	* @stable ICU 2.0
2150	*/
2151	inline UnicodeString& operator+= (UChar32 ch);
2152
2153	/**
2154	* Append operator. Append the characters in <TT>srcText</TT> to the
2155	* UnicodeString object. <TT>srcText</TT> is not modified.
2156	* @param srcText the source for the new characters
2157	* @return a reference to this
2158	* @stable ICU 2.0
2159	*/
2160	inline UnicodeString& operator+= (const UnicodeString& srcText);
2161
2162	/**
2163	* Append the characters
2164	* in <TT>srcText</TT> in the range
2165	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2166	* UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2167	* is not modified.
2168	* @param srcText the source for the new characters
2169	* @param srcStart the offset into <TT>srcText</TT> where new characters
2170	* will be obtained
2171	* @param srcLength the number of characters in <TT>srcText</TT> in
2172	* the append string
2173	* @return a reference to this
2174	* @stable ICU 2.0
2175	*/
2176	inline UnicodeString& append(const UnicodeString& srcText,
2177	int32_t srcStart,
2178	int32_t srcLength);
2179
2180	/**
2181	* Append the characters in <TT>srcText</TT> to the UnicodeString object.
2182	* <TT>srcText</TT> is not modified.
2183	* @param srcText the source for the new characters
2184	* @return a reference to this
2185	* @stable ICU 2.0
2186	*/
2187	inline UnicodeString& append(const UnicodeString& srcText);
2188
2189	/**
2190	* Append the characters in <TT>srcChars</TT> in the range
2191	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2192	* object at offset
2193	* <TT>start</TT>. <TT>srcChars</TT> is not modified.
2194	* @param srcChars the source for the new characters
2195	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2196	* will be obtained
2197	* @param srcLength the number of characters in <TT>srcChars</TT> in
2198	* the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2199	* @return a reference to this
2200	* @stable ICU 2.0
2201	*/
2202	inline UnicodeString& append(const char16_t *srcChars,
2203	int32_t srcStart,
2204	int32_t srcLength);
2205
2206	/**
2207	* Append the characters in <TT>srcChars</TT> to the UnicodeString object
2208	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2209	* @param srcChars the source for the new characters
2210	* @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2211	* can be -1 if <TT>srcChars</TT> is NUL-terminated
2212	* @return a reference to this
2213	* @stable ICU 2.0
2214	*/
2215	inline UnicodeString& append(ConstChar16Ptr srcChars,
2216	int32_t srcLength);
2217
2218	/**
2219	* Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2220	* @param srcChar the code unit to append
2221	* @return a reference to this
2222	* @stable ICU 2.0
2223	*/
2224	inline UnicodeString& append(char16_t srcChar);
2225
2226	/**
2227	* Append the code point <TT>srcChar</TT> to the UnicodeString object.
2228	* @param srcChar the code point to append
2229	* @return a reference to this
2230	* @stable ICU 2.0
2231	*/
2232	UnicodeString& append(UChar32 srcChar);
2233
2234
2235	/ Insert operations /
2236
2237	/**
2238	* Insert the characters in <TT>srcText</TT> in the range
2239	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2240	* object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2241	* @param start the offset where the insertion begins
2242	* @param srcText the source for the new characters
2243	* @param srcStart the offset into <TT>srcText</TT> where new characters
2244	* will be obtained
2245	* @param srcLength the number of characters in <TT>srcText</TT> in
2246	* the insert string
2247	* @return a reference to this
2248	* @stable ICU 2.0
2249	*/
2250	inline UnicodeString& insert(int32_t start,
2251	const UnicodeString& srcText,
2252	int32_t srcStart,
2253	int32_t srcLength);
2254
2255	/**
2256	* Insert the characters in <TT>srcText</TT> into the UnicodeString object
2257	* at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2258	* @param start the offset where the insertion begins
2259	* @param srcText the source for the new characters
2260	* @return a reference to this
2261	* @stable ICU 2.0
2262	*/
2263	inline UnicodeString& insert(int32_t start,
2264	const UnicodeString& srcText);
2265
2266	/**
2267	* Insert the characters in <TT>srcChars</TT> in the range
2268	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2269	* object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2270	* @param start the offset at which the insertion begins
2271	* @param srcChars the source for the new characters
2272	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2273	* will be obtained
2274	* @param srcLength the number of characters in <TT>srcChars</TT>
2275	* in the insert string
2276	* @return a reference to this
2277	* @stable ICU 2.0
2278	*/
2279	inline UnicodeString& insert(int32_t start,
2280	const char16_t *srcChars,
2281	int32_t srcStart,
2282	int32_t srcLength);
2283
2284	/**
2285	* Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2286	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2287	* @param start the offset where the insertion begins
2288	* @param srcChars the source for the new characters
2289	* @param srcLength the number of Unicode characters in srcChars.
2290	* @return a reference to this
2291	* @stable ICU 2.0
2292	*/
2293	inline UnicodeString& insert(int32_t start,
2294	ConstChar16Ptr srcChars,
2295	int32_t srcLength);
2296
2297	/**
2298	* Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2299	* offset <TT>start</TT>.
2300	* @param start the offset at which the insertion occurs
2301	* @param srcChar the code unit to insert
2302	* @return a reference to this
2303	* @stable ICU 2.0
2304	*/
2305	inline UnicodeString& insert(int32_t start,
2306	char16_t srcChar);
2307
2308	/**
2309	* Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2310	* offset <TT>start</TT>.
2311	* @param start the offset at which the insertion occurs
2312	* @param srcChar the code point to insert
2313	* @return a reference to this
2314	* @stable ICU 2.0
2315	*/
2316	inline UnicodeString& insert(int32_t start,
2317	UChar32 srcChar);
2318
2319
2320	/ Replace operations /
2321
2322	/**
2323	* Replace the characters in the range
2324	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2325	* <TT>srcText</TT> in the range
2326	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2327	* <TT>srcText</TT> is not modified.
2328	* @param start the offset at which the replace operation begins
2329	* @param length the number of characters to replace. The character at
2330	* <TT>start + length</TT> is not modified.
2331	* @param srcText the source for the new characters
2332	* @param srcStart the offset into <TT>srcText</TT> where new characters
2333	* will be obtained
2334	* @param srcLength the number of characters in <TT>srcText</TT> in
2335	* the replace string
2336	* @return a reference to this
2337	* @stable ICU 2.0
2338	*/
2339	UnicodeString& replace(int32_t start,
2340	int32_t length,
2341	const UnicodeString& srcText,
2342	int32_t srcStart,
2343	int32_t srcLength);
2344
2345	/**
2346	* Replace the characters in the range
2347	* [<TT>start</TT>, <TT>start + length</TT>)
2348	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2349	* not modified.
2350	* @param start the offset at which the replace operation begins
2351	* @param length the number of characters to replace. The character at
2352	* <TT>start + length</TT> is not modified.
2353	* @param srcText the source for the new characters
2354	* @return a reference to this
2355	* @stable ICU 2.0
2356	*/
2357	UnicodeString& replace(int32_t start,
2358	int32_t length,
2359	const UnicodeString& srcText);
2360
2361	/**
2362	* Replace the characters in the range
2363	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2364	* <TT>srcChars</TT> in the range
2365	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2366	* is not modified.
2367	* @param start the offset at which the replace operation begins
2368	* @param length the number of characters to replace. The character at
2369	* <TT>start + length</TT> is not modified.
2370	* @param srcChars the source for the new characters
2371	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2372	* will be obtained
2373	* @param srcLength the number of characters in <TT>srcChars</TT>
2374	* in the replace string
2375	* @return a reference to this
2376	* @stable ICU 2.0
2377	*/
2378	UnicodeString& replace(int32_t start,
2379	int32_t length,
2380	const char16_t *srcChars,
2381	int32_t srcStart,
2382	int32_t srcLength);
2383
2384	/**
2385	* Replace the characters in the range
2386	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2387	* <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2388	* @param start the offset at which the replace operation begins
2389	* @param length number of characters to replace. The character at
2390	* <TT>start + length</TT> is not modified.
2391	* @param srcChars the source for the new characters
2392	* @param srcLength the number of Unicode characters in srcChars
2393	* @return a reference to this
2394	* @stable ICU 2.0
2395	*/
2396	inline UnicodeString& replace(int32_t start,
2397	int32_t length,
2398	ConstChar16Ptr srcChars,
2399	int32_t srcLength);
2400
2401	/**
2402	* Replace the characters in the range
2403	* [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2404	* <TT>srcChar</TT>.
2405	* @param start the offset at which the replace operation begins
2406	* @param length the number of characters to replace. The character at
2407	* <TT>start + length</TT> is not modified.
2408	* @param srcChar the new code unit
2409	* @return a reference to this
2410	* @stable ICU 2.0
2411	*/
2412	inline UnicodeString& replace(int32_t start,
2413	int32_t length,
2414	char16_t srcChar);
2415
2416	/**
2417	* Replace the characters in the range
2418	* [<TT>start</TT>, <TT>start + length</TT>) with the code point
2419	* <TT>srcChar</TT>.
2420	* @param start the offset at which the replace operation begins
2421	* @param length the number of characters to replace. The character at
2422	* <TT>start + length</TT> is not modified.
2423	* @param srcChar the new code point
2424	* @return a reference to this
2425	* @stable ICU 2.0
2426	*/
2427	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2428
2429	/**
2430	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2431	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2432	* @param start the offset at which the replace operation begins
2433	* @param limit the offset immediately following the replace range
2434	* @param srcText the source for the new characters
2435	* @return a reference to this
2436	* @stable ICU 2.0
2437	*/
2438	inline UnicodeString& replaceBetween(int32_t start,
2439	int32_t limit,
2440	const UnicodeString& srcText);
2441
2442	/**
2443	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2444	* with the characters in <TT>srcText</TT> in the range
2445	* [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2446	* @param start the offset at which the replace operation begins
2447	* @param limit the offset immediately following the replace range
2448	* @param srcText the source for the new characters
2449	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2450	* will be obtained
2451	* @param srcLimit the offset immediately following the range to copy
2452	* in <TT>srcText</TT>
2453	* @return a reference to this
2454	* @stable ICU 2.0
2455	*/
2456	inline UnicodeString& replaceBetween(int32_t start,
2457	int32_t limit,
2458	const UnicodeString& srcText,
2459	int32_t srcStart,
2460	int32_t srcLimit);
2461
2462	/**
2463	* Replace a substring of this object with the given text.
2464	* @param start the beginning index, inclusive; <code>0 <= start
2465	* <= limit</code>.
2466	* @param limit the ending index, exclusive; <code>start <= limit
2467	* <= length()</code>.
2468	* @param text the text to replace characters <code>start</code>
2469	* to <code>limit - 1</code>
2470	* @stable ICU 2.0
2471	*/
2472	virtual void handleReplaceBetween(int32_t start,
2473	int32_t limit,
2474	const UnicodeString& text);
2475
2476	/**
2477	* Replaceable API
2478	* @return TRUE if it has MetaData
2479	* @stable ICU 2.4
2480	*/
2481	virtual UBool hasMetaData() const;
2482
2483	/**
2484	* Copy a substring of this object, retaining attribute (out-of-band)
2485	* information. This method is used to duplicate or reorder substrings.
2486	* The destination index must not overlap the source range.
2487	*
2488	* @param start the beginning index, inclusive; <code>0 <= start <=
2489	* limit</code>.
2490	* @param limit the ending index, exclusive; <code>start <= limit <=
2491	* length()</code>.
2492	* @param dest the destination index. The characters from
2493	* <code>start..limit-1</code> will be copied to <code>dest</code>.
2494	* Implementations of this method may assume that <code>dest <= start \|\|
2495	* dest >= limit</code>.
2496	* @stable ICU 2.0
2497	*/
2498	virtual void copy(int32_t start, int32_t limit, int32_t dest);
2499
2500	/ Search and replace operations /
2501
2502	/**
2503	* Replace all occurrences of characters in oldText with the characters
2504	* in newText
2505	* @param oldText the text containing the search text
2506	* @param newText the text containing the replacement text
2507	* @return a reference to this
2508	* @stable ICU 2.0
2509	*/
2510	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2511	const UnicodeString& newText);
2512
2513	/**
2514	* Replace all occurrences of characters in oldText with characters
2515	* in newText
2516	* in the range [<TT>start</TT>, <TT>start + length</TT>).
2517	* @param start the start of the range in which replace will performed
2518	* @param length the length of the range in which replace will be performed
2519	* @param oldText the text containing the search text
2520	* @param newText the text containing the replacement text
2521	* @return a reference to this
2522	* @stable ICU 2.0
2523	*/
2524	inline UnicodeString& findAndReplace(int32_t start,
2525	int32_t length,
2526	const UnicodeString& oldText,
2527	const UnicodeString& newText);
2528
2529	/**
2530	* Replace all occurrences of characters in oldText in the range
2531	* [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2532	* in newText in the range
2533	* [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2534	* in the range [<TT>start</TT>, <TT>start + length</TT>).
2535	* @param start the start of the range in which replace will performed
2536	* @param length the length of the range in which replace will be performed
2537	* @param oldText the text containing the search text
2538	* @param oldStart the start of the search range in <TT>oldText</TT>
2539	* @param oldLength the length of the search range in <TT>oldText</TT>
2540	* @param newText the text containing the replacement text
2541	* @param newStart the start of the replacement range in <TT>newText</TT>
2542	* @param newLength the length of the replacement range in <TT>newText</TT>
2543	* @return a reference to this
2544	* @stable ICU 2.0
2545	*/
2546	UnicodeString& findAndReplace(int32_t start,
2547	int32_t length,
2548	const UnicodeString& oldText,
2549	int32_t oldStart,
2550	int32_t oldLength,
2551	const UnicodeString& newText,
2552	int32_t newStart,
2553	int32_t newLength);
2554
2555
2556	/ Remove operations /
2557
2558	/**
2559	* Remove all characters from the UnicodeString object.
2560	* @return a reference to this
2561	* @stable ICU 2.0
2562	*/
2563	inline UnicodeString& remove(void);
2564
2565	/**
2566	* Remove the characters in the range
2567	* [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2568	* @param start the offset of the first character to remove
2569	* @param length the number of characters to remove
2570	* @return a reference to this
2571	* @stable ICU 2.0
2572	*/
2573	inline UnicodeString& remove(int32_t start,
2574	int32_t length = (int32_t)INT32_MAX);
2575
2576	/**
2577	* Remove the characters in the range
2578	* [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2579	* @param start the offset of the first character to remove
2580	* @param limit the offset immediately following the range to remove
2581	* @return a reference to this
2582	* @stable ICU 2.0
2583	*/
2584	inline UnicodeString& removeBetween(int32_t start,
2585	int32_t limit = (int32_t)INT32_MAX);
2586
2587	/**
2588	* Retain only the characters in the range
2589	* [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2590	* Removes characters before <code>start</code> and at and after <code>limit</code>.
2591	* @param start the offset of the first character to retain
2592	* @param limit the offset immediately following the range to retain
2593	* @return a reference to this
2594	* @stable ICU 4.4
2595	*/
2596	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2597
2598	/ Length operations /
2599
2600	/**
2601	* Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2602	* If the length of this UnicodeString is less than targetLength,
2603	* length() - targetLength copies of padChar will be added to the
2604	* beginning of this UnicodeString.
2605	* @param targetLength the desired length of the string
2606	* @param padChar the character to use for padding. Defaults to
2607	* space (U+0020)
2608	* @return TRUE if the text was padded, FALSE otherwise.
2609	* @stable ICU 2.0
2610	*/
2611	UBool padLeading(int32_t targetLength,
2612	char16_t padChar = `0x0020`);
2613
2614	/**
2615	* Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2616	* If the length of this UnicodeString is less than targetLength,
2617	* length() - targetLength copies of padChar will be added to the
2618	* end of this UnicodeString.
2619	* @param targetLength the desired length of the string
2620	* @param padChar the character to use for padding. Defaults to
2621	* space (U+0020)
2622	* @return TRUE if the text was padded, FALSE otherwise.
2623	* @stable ICU 2.0
2624	*/
2625	UBool padTrailing(int32_t targetLength,
2626	char16_t padChar = `0x0020`);
2627
2628	/**
2629	* Truncate this UnicodeString to the <TT>targetLength</TT>.
2630	* @param targetLength the desired length of this UnicodeString.
2631	* @return TRUE if the text was truncated, FALSE otherwise
2632	* @stable ICU 2.0
2633	*/
2634	inline UBool truncate(int32_t targetLength);
2635
2636	/**
2637	* Trims leading and trailing whitespace from this UnicodeString.
2638	* @return a reference to this
2639	* @stable ICU 2.0
2640	*/
2641	UnicodeString& trim(void);
2642
2643
2644	/ Miscellaneous operations /
2645
2646	/**
2647	* Reverse this UnicodeString in place.
2648	* @return a reference to this
2649	* @stable ICU 2.0
2650	*/
2651	inline UnicodeString& reverse(void);
2652
2653	/**
2654	* Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2655	* this UnicodeString.
2656	* @param start the start of the range to reverse
2657	* @param length the number of characters to to reverse
2658	* @return a reference to this
2659	* @stable ICU 2.0
2660	*/
2661	inline UnicodeString& reverse(int32_t start,
2662	int32_t length);
2663
2664	/**
2665	* Convert the characters in this to UPPER CASE following the conventions of
2666	* the default locale.
2667	* @return A reference to this.
2668	* @stable ICU 2.0
2669	*/
2670	UnicodeString& toUpper(void);
2671
2672	/**
2673	* Convert the characters in this to UPPER CASE following the conventions of
2674	* a specific locale.
2675	* @param locale The locale containing the conventions to use.
2676	* @return A reference to this.
2677	* @stable ICU 2.0
2678	*/
2679	UnicodeString& toUpper(const Locale& locale);
2680
2681	/**
2682	* Convert the characters in this to lower case following the conventions of
2683	* the default locale.
2684	* @return A reference to this.
2685	* @stable ICU 2.0
2686	*/
2687	UnicodeString& toLower(void);
2688
2689	/**
2690	* Convert the characters in this to lower case following the conventions of
2691	* a specific locale.
2692	* @param locale The locale containing the conventions to use.
2693	* @return A reference to this.
2694	* @stable ICU 2.0
2695	*/
2696	UnicodeString& toLower(const Locale& locale);
2697
2698	#if !UCONFIG_NO_BREAK_ITERATION
2699
2700	/**
2701	* Titlecase this string, convenience function using the default locale.
2702	*
2703	* Casing is locale-dependent and context-sensitive.
2704	* Titlecasing uses a break iterator to find the first characters of words
2705	* that are to be titlecased. It titlecases those characters and lowercases
2706	* all others.
2707	*
2708	* The titlecase break iterator can be provided to customize for arbitrary
2709	* styles, using rules and dictionaries beyond the standard iterators.
2710	* It may be more efficient to always provide an iterator to avoid
2711	* opening and closing one for each string.
2712	* The standard titlecase iterator for the root locale implements the
2713	* algorithm of Unicode TR 21.
2714	*
2715	* This function uses only the setText(), first() and next() methods of the
2716	* provided break iterator.
2717	*
2718	* @param titleIter A break iterator to find the first characters of words
2719	* that are to be titlecased.
2720	* If none is provided (0), then a standard titlecase
2721	* break iterator is opened.
2722	* Otherwise the provided iterator is set to the string's text.
2723	* @return A reference to this.
2724	* @stable ICU 2.1
2725	*/
2726	UnicodeString &toTitle(BreakIterator *titleIter);
2727
2728	/**
2729	* Titlecase this string.
2730	*
2731	* Casing is locale-dependent and context-sensitive.
2732	* Titlecasing uses a break iterator to find the first characters of words
2733	* that are to be titlecased. It titlecases those characters and lowercases
2734	* all others.
2735	*
2736	* The titlecase break iterator can be provided to customize for arbitrary
2737	* styles, using rules and dictionaries beyond the standard iterators.
2738	* It may be more efficient to always provide an iterator to avoid
2739	* opening and closing one for each string.
2740	* The standard titlecase iterator for the root locale implements the
2741	* algorithm of Unicode TR 21.
2742	*
2743	* This function uses only the setText(), first() and next() methods of the
2744	* provided break iterator.
2745	*
2746	* @param titleIter A break iterator to find the first characters of words
2747	* that are to be titlecased.
2748	* If none is provided (0), then a standard titlecase
2749	* break iterator is opened.
2750	* Otherwise the provided iterator is set to the string's text.
2751	* @param locale The locale to consider.
2752	* @return A reference to this.
2753	* @stable ICU 2.1
2754	*/
2755	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale);
2756
2757	/**
2758	* Titlecase this string, with options.
2759	*
2760	* Casing is locale-dependent and context-sensitive.
2761	* Titlecasing uses a break iterator to find the first characters of words
2762	* that are to be titlecased. It titlecases those characters and lowercases
2763	* all others. (This can be modified with options.)
2764	*
2765	* The titlecase break iterator can be provided to customize for arbitrary
2766	* styles, using rules and dictionaries beyond the standard iterators.
2767	* It may be more efficient to always provide an iterator to avoid
2768	* opening and closing one for each string.
2769	* The standard titlecase iterator for the root locale implements the
2770	* algorithm of Unicode TR 21.
2771	*
2772	* This function uses only the setText(), first() and next() methods of the
2773	* provided break iterator.
2774	*
2775	* @param titleIter A break iterator to find the first characters of words
2776	* that are to be titlecased.
2777	* If none is provided (0), then a standard titlecase
2778	* break iterator is opened.
2779	* Otherwise the provided iterator is set to the string's text.
2780	* @param locale The locale to consider.
2781	* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2782	* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2783	* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2784	* @param options Options bit set, see ucasemap_open().
2785	* @return A reference to this.
2786	* @stable ICU 3.8
2787	*/
2788	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale, uint32_t options);
2789
2790	#endif
2791
2792	/**
2793	* Case-folds the characters in this string.
2794	*
2795	* Case-folding is locale-independent and not context-sensitive,
2796	* but there is an option for whether to include or exclude mappings for dotted I
2797	* and dotless i that are marked with 'T' in CaseFolding.txt.
2798	*
2799	* The result may be longer or shorter than the original.
2800	*
2801	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2802	* @return A reference to this.
2803	* @stable ICU 2.0
2804	*/
2805	UnicodeString &foldCase(uint32_t options=`0` /U_FOLD_CASE_DEFAULT/);
2806
2807	//========================================
2808	// Access to the internal buffer
2809	//========================================
2810
2811	/**
2812	* Get a read/write pointer to the internal buffer.
2813	* The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2814	* writable, and is still owned by the UnicodeString object.
2815	* Calls to getBuffer(minCapacity) must not be nested, and
2816	* must be matched with calls to releaseBuffer(newLength).
2817	* If the string buffer was read-only or shared,
2818	* then it will be reallocated and copied.
2819	*
2820	* An attempted nested call will return 0, and will not further modify the
2821	* state of the UnicodeString object.
2822	* It also returns 0 if the string is bogus.
2823	*
2824	* The actual capacity of the string buffer may be larger than minCapacity.
2825	* getCapacity() returns the actual capacity.
2826	* For many operations, the full capacity should be used to avoid reallocations.
2827	*
2828	* While the buffer is "open" between getBuffer(minCapacity)
2829	* and releaseBuffer(newLength), the following applies:
2830	* - The string length is set to 0.
2831	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
2832	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
2833	* - You can read from and write to the returned buffer.
2834	* - The previous string contents will still be in the buffer;
2835	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
2836	* If the length() was greater than minCapacity, then any contents after minCapacity
2837	* may be lost.
2838	* The buffer contents is not NUL-terminated by getBuffer().
2839	* If length()<getCapacity() then you can terminate it by writing a NUL
2840	* at index length().
2841	* - You must call releaseBuffer(newLength) before and in order to
2842	* return to normal UnicodeString operation.
2843	*
2844	* @param minCapacity the minimum number of char16_ts that are to be available
2845	* in the buffer, starting at the returned pointer;
2846	* default to the current string capacity if minCapacity==-1
2847	* @return a writable pointer to the internal string buffer,
2848	* or nullptr if an error occurs (nested calls, out of memory)
2849	*
2850	* @see releaseBuffer
2851	* @see getTerminatedBuffer()
2852	* @stable ICU 2.0
2853	*/
2854	char16_t *getBuffer(int32_t minCapacity);
2855
2856	/**
2857	* Release a read/write buffer on a UnicodeString object with an
2858	* "open" getBuffer(minCapacity).
2859	* This function must be called in a matched pair with getBuffer(minCapacity).
2860	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2861	*
2862	* It will set the string length to newLength, at most to the current capacity.
2863	* If newLength==-1 then it will set the length according to the
2864	* first NUL in the buffer, or to the capacity if there is no NUL.
2865	*
2866	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2867	*
2868	* @param newLength the new length of the UnicodeString object;
2869	* defaults to the current capacity if newLength is greater than that;
2870	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
2871	* the current capacity of the string
2872	*
2873	* @see getBuffer(int32_t minCapacity)
2874	* @stable ICU 2.0
2875	*/
2876	void releaseBuffer(int32_t newLength=-`1`);
2877
2878	/**
2879	* Get a read-only pointer to the internal buffer.
2880	* This can be called at any time on a valid UnicodeString.
2881	*
2882	* It returns 0 if the string is bogus, or
2883	* during an "open" getBuffer(minCapacity).
2884	*
2885	* It can be called as many times as desired.
2886	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2887	* at which time the pointer is semantically invalidated and must not be used any more.
2888	*
2889	* The capacity of the buffer can be determined with getCapacity().
2890	* The part after length() may or may not be initialized and valid,
2891	* depending on the history of the UnicodeString object.
2892	*
2893	* The buffer contents is (probably) not NUL-terminated.
2894	* You can check if it is with
2895	* <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2896	* (See getTerminatedBuffer().)
2897	*
2898	* The buffer may reside in read-only memory. Its contents must not
2899	* be modified.
2900	*
2901	* @return a read-only pointer to the internal string buffer,
2902	* or nullptr if the string is empty or bogus
2903	*
2904	* @see getBuffer(int32_t minCapacity)
2905	* @see getTerminatedBuffer()
2906	* @stable ICU 2.0
2907	*/
2908	inline const char16_t getBuffer() const*;
2909
2910	/**
2911	* Get a read-only pointer to the internal buffer,
2912	* making sure that it is NUL-terminated.
2913	* This can be called at any time on a valid UnicodeString.
2914	*
2915	* It returns 0 if the string is bogus, or
2916	* during an "open" getBuffer(minCapacity), or if the buffer cannot
2917	* be NUL-terminated (because memory allocation failed).
2918	*
2919	* It can be called as many times as desired.
2920	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2921	* at which time the pointer is semantically invalidated and must not be used any more.
2922	*
2923	* The capacity of the buffer can be determined with getCapacity().
2924	* The part after length()+1 may or may not be initialized and valid,
2925	* depending on the history of the UnicodeString object.
2926	*
2927	* The buffer contents is guaranteed to be NUL-terminated.
2928	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2929	* is written.
2930	* For this reason, this function is not const, unlike getBuffer().
2931	* Note that a UnicodeString may also contain NUL characters as part of its contents.
2932	*
2933	* The buffer may reside in read-only memory. Its contents must not
2934	* be modified.
2935	*
2936	* @return a read-only pointer to the internal string buffer,
2937	* or 0 if the string is empty or bogus
2938	*
2939	* @see getBuffer(int32_t minCapacity)
2940	* @see getBuffer()
2941	* @stable ICU 2.2
2942	*/
2943	const char16_t *getTerminatedBuffer();
2944
2945	//========================================
2946	// Constructors
2947	//========================================
2948
2949	/* Construct an empty UnicodeString.*
2950	* @stable ICU 2.0
2951	*/
2952	inline UnicodeString();
2953
2954	/**
2955	* Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts
2956	* @param capacity the number of char16_ts this UnicodeString should hold
2957	* before a resize is necessary; if count is greater than 0 and count
2958	* code points c take up more space than capacity, then capacity is adjusted
2959	* accordingly.
2960	* @param c is used to initially fill the string
2961	* @param count specifies how many code points c are to be written in the
2962	* string
2963	* @stable ICU 2.0
2964	*/
2965	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2966
2967	/**
2968	* Single char16_t (code unit) constructor.
2969	*
2970	* It is recommended to mark this constructor "explicit" by
2971	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2972	* on the compiler command line or similar.
2973	* @param ch the character to place in the UnicodeString
2974	* @stable ICU 2.0
2975	*/
2976	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2977
2978	/**
2979	* Single UChar32 (code point) constructor.
2980	*
2981	* It is recommended to mark this constructor "explicit" by
2982	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2983	* on the compiler command line or similar.
2984	* @param ch the character to place in the UnicodeString
2985	* @stable ICU 2.0
2986	*/
2987	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2988
2989	/**
2990	* char16_t* constructor.
2991	*
2992	* It is recommended to mark this constructor "explicit" by
2993	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2994	* on the compiler command line or similar.
2995	* @param text The characters to place in the UnicodeString. <TT>text</TT>
2996	* must be NULL (U+0000) terminated.
2997	* @stable ICU 2.0
2998	*/
2999	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
3000
3001	#if !U_CHAR16_IS_TYPEDEF
3002	/**
3003	* uint16_t * constructor.
3004	* Delegates to UnicodeString(const char16_t *).
3005	*
3006	* It is recommended to mark this constructor "explicit" by
3007	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3008	* on the compiler command line or similar.
3009	* @param text NUL-terminated UTF-16 string
3010	* @stable ICU 59
3011	*/
3012	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3013	UnicodeString (ConstChar16Ptr (text)) {}
3014	#endif
3015
3016	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3017	/**
3018	* wchar_t * constructor.
3019	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3020	* Delegates to UnicodeString(const char16_t *).
3021	*
3022	* It is recommended to mark this constructor "explicit" by
3023	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3024	* on the compiler command line or similar.
3025	* @param text NUL-terminated UTF-16 string
3026	* @stable ICU 59
3027	*/
3028	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3029	UnicodeString(ConstChar16Ptr(text)) {}
3030	#endif
3031
3032	/**
3033	* nullptr_t constructor.
3034	* Effectively the same as the default constructor, makes an empty string object.
3035	*
3036	* It is recommended to mark this constructor "explicit" by
3037	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3038	* on the compiler command line or similar.
3039	* @param text nullptr
3040	* @stable ICU 59
3041	*/
3042	UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3043
3044	/**
3045	* char16_t* constructor.
3046	* @param text The characters to place in the UnicodeString.
3047	* @param textLength The number of Unicode characters in <TT>text</TT>
3048	* to copy.
3049	* @stable ICU 2.0
3050	*/
3051	UnicodeString(const char16_t *text,
3052	int32_t textLength);
3053
3054	#if !U_CHAR16_IS_TYPEDEF
3055	/**
3056	* uint16_t * constructor.
3057	* Delegates to UnicodeString(const char16_t *, int32_t).
3058	* @param text UTF-16 string
3059	* @param length string length
3060	* @stable ICU 59
3061	*/
3062	UnicodeString(const uint16_t *text, int32_t length) :
3063	UnicodeString (ConstChar16Ptr (text), length) {}
3064	#endif
3065
3066	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3067	/**
3068	* wchar_t * constructor.
3069	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3070	* Delegates to UnicodeString(const char16_t *, int32_t).
3071	* @param text NUL-terminated UTF-16 string
3072	* @param length string length
3073	* @stable ICU 59
3074	*/
3075	UnicodeString(const wchar_t *text, int32_t length) :
3076	UnicodeString(ConstChar16Ptr(text), length) {}
3077	#endif
3078
3079	/**
3080	* nullptr_t constructor.
3081	* Effectively the same as the default constructor, makes an empty string object.
3082	* @param text nullptr
3083	* @param length ignored
3084	* @stable ICU 59
3085	*/
3086	inline UnicodeString(const std::nullptr_t text, int32_t length);
3087
3088	/**
3089	* Readonly-aliasing char16_t* constructor.
3090	* The text will be used for the UnicodeString object, but
3091	* it will not be released when the UnicodeString is destroyed.
3092	* This has copy-on-write semantics:
3093	* When the string is modified, then the buffer is first copied into
3094	* newly allocated memory.
3095	* The aliased buffer is never modified.
3096	*
3097	* In an assignment to another UnicodeString, when using the copy constructor
3098	* or the assignment operator, the text will be copied.
3099	* When using fastCopyFrom(), the text will be aliased again,
3100	* so that both strings then alias the same readonly-text.
3101	*
3102	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3103	* This must be true if <code>textLength==-1</code>.
3104	* @param text The characters to alias for the UnicodeString.
3105	* @param textLength The number of Unicode characters in <code>text</code> to alias.
3106	* If -1, then this constructor will determine the length
3107	* by calling <code>u_strlen()</code>.
3108	* @stable ICU 2.0
3109	*/
3110	UnicodeString(UBool isTerminated,
3111	ConstChar16Ptr text,
3112	int32_t textLength);
3113
3114	/**
3115	* Writable-aliasing char16_t* constructor.
3116	* The text will be used for the UnicodeString object, but
3117	* it will not be released when the UnicodeString is destroyed.
3118	* This has write-through semantics:
3119	* For as long as the capacity of the buffer is sufficient, write operations
3120	* will directly affect the buffer. When more capacity is necessary, then
3121	* a new buffer will be allocated and the contents copied as with regularly
3122	* constructed strings.
3123	* In an assignment to another UnicodeString, the buffer will be copied.
3124	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3125	* as the string buffer itself and will in this case not copy the contents.
3126	*
3127	* @param buffer The characters to alias for the UnicodeString.
3128	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3129	* @param buffCapacity The size of <code>buffer</code> in char16_ts.
3130	* @stable ICU 2.0
3131	*/
3132	UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3133
3134	#if !U_CHAR16_IS_TYPEDEF
3135	/**
3136	* Writable-aliasing uint16_t * constructor.
3137	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3138	* @param buffer writable buffer of/for UTF-16 text
3139	* @param buffLength length of the current buffer contents
3140	* @param buffCapacity buffer capacity
3141	* @stable ICU 59
3142	*/
3143	UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3144	UnicodeString (Char16Ptr (buffer), buffLength, buffCapacity) {}
3145	#endif
3146
3147	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3148	/**
3149	* Writable-aliasing wchar_t * constructor.
3150	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3151	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3152	* @param buffer writable buffer of/for UTF-16 text
3153	* @param buffLength length of the current buffer contents
3154	* @param buffCapacity buffer capacity
3155	* @stable ICU 59
3156	*/
3157	UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3158	UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3159	#endif
3160
3161	/**
3162	* Writable-aliasing nullptr_t constructor.
3163	* Effectively the same as the default constructor, makes an empty string object.
3164	* @param buffer nullptr
3165	* @param buffLength ignored
3166	* @param buffCapacity ignored
3167	* @stable ICU 59
3168	*/
3169	inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3170
3171	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
3172
3173	/**
3174	* char* constructor.
3175	* Uses the default converter (and thus depends on the ICU conversion code)
3176	* unless U_CHARSET_IS_UTF8 is set to 1.
3177	*
3178	* For ASCII (really "invariant character") strings it is more efficient to use
3179	* the constructor that takes a US_INV (for its enum EInvariant).
3180	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
3181	* UNICODE_STRING_SIMPLE.
3182	*
3183	* It is recommended to mark this constructor "explicit" by
3184	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3185	* on the compiler command line or similar.
3186	* @param codepageData an array of bytes, null-terminated,
3187	* in the platform's default codepage.
3188	* @stable ICU 2.0
3189	* @see UNICODE_STRING
3190	* @see UNICODE_STRING_SIMPLE
3191	*/
3192	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3193
3194	/**
3195	* char* constructor.
3196	* Uses the default converter (and thus depends on the ICU conversion code)
3197	* unless U_CHARSET_IS_UTF8 is set to 1.
3198	* @param codepageData an array of bytes in the platform's default codepage.
3199	* @param dataLength The number of bytes in <TT>codepageData</TT>.
3200	* @stable ICU 2.0
3201	*/
3202	UnicodeString(const char *codepageData, int32_t dataLength);
3203
3204	#endif
3205
3206	#if !UCONFIG_NO_CONVERSION
3207
3208	/**
3209	* char* constructor.
3210	* @param codepageData an array of bytes, null-terminated
3211	* @param codepage the encoding of <TT>codepageData</TT>. The special
3212	* value 0 for <TT>codepage</TT> indicates that the text is in the
3213	* platform's default codepage.
3214	*
3215	* If <code>codepage</code> is an empty string (<code>""</code>),
3216	* then a simple conversion is performed on the codepage-invariant
3217	* subset ("invariant characters") of the platform encoding. See utypes.h.
3218	* Recommendation: For invariant-character strings use the constructor
3219	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3220	* because it avoids object code dependencies of UnicodeString on
3221	* the conversion code.
3222	*
3223	* @stable ICU 2.0
3224	*/
3225	UnicodeString(const char codepageData, const* char *codepage);
3226
3227	/**
3228	* char* constructor.
3229	* @param codepageData an array of bytes.
3230	* @param dataLength The number of bytes in <TT>codepageData</TT>.
3231	* @param codepage the encoding of <TT>codepageData</TT>. The special
3232	* value 0 for <TT>codepage</TT> indicates that the text is in the
3233	* platform's default codepage.
3234	* If <code>codepage</code> is an empty string (<code>""</code>),
3235	* then a simple conversion is performed on the codepage-invariant
3236	* subset ("invariant characters") of the platform encoding. See utypes.h.
3237	* Recommendation: For invariant-character strings use the constructor
3238	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3239	* because it avoids object code dependencies of UnicodeString on
3240	* the conversion code.
3241	*
3242	* @stable ICU 2.0
3243	*/
3244	UnicodeString(const char codepageData, int32_t dataLength, const* char *codepage);
3245
3246	/**
3247	* char * / UConverter constructor.
3248	* This constructor uses an existing UConverter object to
3249	* convert the codepage string to Unicode and construct a UnicodeString
3250	* from that.
3251	*
3252	* The converter is reset at first.
3253	* If the error code indicates a failure before this constructor is called,
3254	* or if an error occurs during conversion or construction,
3255	* then the string will be bogus.
3256	*
3257	* This function avoids the overhead of opening and closing a converter if
3258	* multiple strings are constructed.
3259	*
3260	* @param src input codepage string
3261	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
3262	* @param cnv converter object (ucnv_resetToUnicode() will be called),
3263	* can be NULL for the default converter
3264	* @param errorCode normal ICU error code
3265	* @stable ICU 2.0
3266	*/
3267	UnicodeString(
3268	const char *src, int32_t srcLength,
3269	UConverter *cnv,
3270	UErrorCode &errorCode);
3271
3272	#endif
3273
3274	/**
3275	* Constructs a Unicode string from an invariant-character char * string.
3276	* About invariant characters see utypes.h.
3277	* This constructor has no runtime dependency on conversion code and is
3278	* therefore recommended over ones taking a charset name string
3279	* (where the empty string "" indicates invariant-character conversion).
3280	*
3281	* Use the macro US_INV as the third, signature-distinguishing parameter.
3282	*
3283	* For example:
3284	* \code
3285	* void fn(const char *s) {
3286	* UnicodeString ustr(s, -1, US_INV);
3287	* // use ustr ...
3288	* }
3289	* \endcode
3290	*
3291	* @param src String using only invariant characters.
3292	* @param length Length of src, or -1 if NUL-terminated.
3293	* @param inv Signature-distinguishing paramater, use US_INV.
3294	*
3295	* @see US_INV
3296	* @stable ICU 3.2
3297	*/
3298	UnicodeString(const char src, int32_t length, enum* EInvariant inv);
3299
3300
3301	/**
3302	* Copy constructor.
3303	*
3304	* Starting with ICU 2.4, the assignment operator and the copy constructor
3305	* allocate a new buffer and copy the buffer contents even for readonly aliases.
3306	* By contrast, the fastCopyFrom() function implements the old,
3307	* more efficient but less safe behavior
3308	* of making this string also a readonly alias to the same buffer.
3309	*
3310	* If the source object has an "open" buffer from getBuffer(minCapacity),
3311	* then the copy is an empty string.
3312	*
3313	* @param that The UnicodeString object to copy.
3314	* @stable ICU 2.0
3315	* @see fastCopyFrom
3316	*/
3317	UnicodeString(const UnicodeString& that);
3318
3319	/**
3320	* Move constructor; might leave src in bogus state.
3321	* This string will have the same contents and state that the source string had.
3322	* @param src source string
3323	* @stable ICU 56
3324	*/
3325	UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3326
3327	/**
3328	* 'Substring' constructor from tail of source string.
3329	* @param src The UnicodeString object to copy.
3330	* @param srcStart The offset into <tt>src</tt> at which to start copying.
3331	* @stable ICU 2.2
3332	*/
3333	UnicodeString(const UnicodeString& src, int32_t srcStart);
3334
3335	/**
3336	* 'Substring' constructor from subrange of source string.
3337	* @param src The UnicodeString object to copy.
3338	* @param srcStart The offset into <tt>src</tt> at which to start copying.
3339	* @param srcLength The number of characters from <tt>src</tt> to copy.
3340	* @stable ICU 2.2
3341	*/
3342	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3343
3344	/**
3345	* Clone this object, an instance of a subclass of Replaceable.
3346	* Clones can be used concurrently in multiple threads.
3347	* If a subclass does not implement clone(), or if an error occurs,
3348	* then NULL is returned.
3349	* The clone functions in all subclasses return a pointer to a Replaceable
3350	* because some compilers do not support covariant (same-as-this)
3351	* return types; cast to the appropriate subclass if necessary.
3352	* The caller must delete the clone.
3353	*
3354	* @return a clone of this object
3355	*
3356	* @see Replaceable::clone
3357	* @see getDynamicClassID
3358	* @stable ICU 2.6
3359	*/
3360	virtual Replaceable clone() const*;
3361
3362	/* Destructor.*
3363	* @stable ICU 2.0
3364	*/
3365	virtual ~UnicodeString();
3366
3367	/**
3368	* Create a UnicodeString from a UTF-8 string.
3369	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3370	* Calls u_strFromUTF8WithSub().
3371	*
3372	* @param utf8 UTF-8 input string.
3373	* Note that a StringPiece can be implicitly constructed
3374	* from a std::string or a NUL-terminated const char * string.
3375	* @return A UnicodeString with equivalent UTF-16 contents.
3376	* @see toUTF8
3377	* @see toUTF8String
3378	* @stable ICU 4.2
3379	*/
3380	static UnicodeString fromUTF8(StringPiece utf8);
3381
3382	/**
3383	* Create a UnicodeString from a UTF-32 string.
3384	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3385	* Calls u_strFromUTF32WithSub().
3386	*
3387	* @param utf32 UTF-32 input string. Must not be NULL.
3388	* @param length Length of the input string, or -1 if NUL-terminated.
3389	* @return A UnicodeString with equivalent UTF-16 contents.
3390	* @see toUTF32
3391	* @stable ICU 4.2
3392	*/
3393	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3394
3395	/ Miscellaneous operations /
3396
3397	/**
3398	* Unescape a string of characters and return a string containing
3399	* the result. The following escape sequences are recognized:
3400	*
3401	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3402	* \\Uhhhhhhhh 8 hex digits
3403	* \\xhh 1-2 hex digits
3404	* \\ooo 1-3 octal digits; o in [0-7]
3405	* \\cX control-X; X is masked with 0x1F
3406	*
3407	* as well as the standard ANSI C escapes:
3408	*
3409	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3410	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3411	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3412	*
3413	* Anything else following a backslash is generically escaped. For
3414	* example, "[a\\-z]" returns "[a-z]".
3415	*
3416	* If an escape sequence is ill-formed, this method returns an empty
3417	* string. An example of an ill-formed sequence is "\\u" followed by
3418	* fewer than 4 hex digits.
3419	*
3420	* This function is similar to u_unescape() but not identical to it.
3421	* The latter takes a source char*, so it does escape recognition
3422	* and also invariant conversion.
3423	*
3424	* @return a string with backslash escapes interpreted, or an
3425	* empty string on error.
3426	* @see UnicodeString#unescapeAt()
3427	* @see u_unescape()
3428	* @see u_unescapeAt()
3429	* @stable ICU 2.0
3430	*/
3431	UnicodeString unescape() const;
3432
3433	/**
3434	* Unescape a single escape sequence and return the represented
3435	* character. See unescape() for a listing of the recognized escape
3436	* sequences. The character at offset-1 is assumed (without
3437	* checking) to be a backslash. If the escape sequence is
3438	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3439	* returned.
3440	*
3441	* @param offset an input output parameter. On input, it is the
3442	* offset into this string where the escape sequence is located,
3443	* after the initial backslash. On output, it is advanced after the
3444	* last character parsed. On error, it is not advanced at all.
3445	* @return the character represented by the escape sequence at
3446	* offset, or U_SENTINEL=-1 on error.
3447	* @see UnicodeString#unescape()
3448	* @see u_unescape()
3449	* @see u_unescapeAt()
3450	* @stable ICU 2.0
3451	*/
3452	UChar32 unescapeAt(int32_t &offset) const;
3453
3454	/**
3455	* ICU "poor man's RTTI", returns a UClassID for this class.
3456	*
3457	* @stable ICU 2.2
3458	*/
3459	static UClassID U_EXPORT2 getStaticClassID();
3460
3461	/**
3462	* ICU "poor man's RTTI", returns a UClassID for the actual class.
3463	*
3464	* @stable ICU 2.2
3465	*/
3466	virtual UClassID getDynamicClassID() const;
3467
3468	//========================================
3469	// Implementation methods
3470	//========================================
3471
3472	protected:
3473	/**
3474	* Implement Replaceable::getLength() (see jitterbug 1027).
3475	* @stable ICU 2.4
3476	*/
3477	virtual int32_t getLength() const;
3478
3479	/**
3480	* The change in Replaceable to use virtual getCharAt() allows
3481	* UnicodeString::charAt() to be inline again (see jitterbug 709).
3482	* @stable ICU 2.4
3483	*/
3484	virtual char16_t getCharAt(int32_t offset) const;
3485
3486	/**
3487	* The change in Replaceable to use virtual getChar32At() allows
3488	* UnicodeString::char32At() to be inline again (see jitterbug 709).
3489	* @stable ICU 2.4
3490	*/
3491	virtual UChar32 getChar32At(int32_t offset) const;
3492
3493	private:
3494	// For char constructors. Could be made public.*
3495	UnicodeString &setToUTF8(StringPiece utf8);
3496	// For extract(char).*
3497	// We could make a toUTF8(target, capacity, errorCode) public but not
3498	// this version: New API will be cleaner if we make callers create substrings
3499	// rather than having start+length on every method,
3500	// and it should take a UErrorCode&.
3501	int32_t
3502	toUTF8(int32_t start, int32_t len,
3503	char target, int32_t capacity) const*;
3504
3505	/**
3506	* Internal string contents comparison, called by operator==.
3507	* Requires: this & text not bogus and have same lengths.
3508	*/
3509	UBool doEquals(const UnicodeString &text, int32_t len) const;
3510
3511	inline int8_t
3512	doCompare(int32_t start,
3513	int32_t length,
3514	const UnicodeString& srcText,
3515	int32_t srcStart,
3516	int32_t srcLength) const;
3517
3518	int8_t doCompare(int32_t start,
3519	int32_t length,
3520	const char16_t *srcChars,
3521	int32_t srcStart,
3522	int32_t srcLength) const;
3523
3524	inline int8_t
3525	doCompareCodePointOrder(int32_t start,
3526	int32_t length,
3527	const UnicodeString& srcText,
3528	int32_t srcStart,
3529	int32_t srcLength) const;
3530
3531	int8_t doCompareCodePointOrder(int32_t start,
3532	int32_t length,
3533	const char16_t *srcChars,
3534	int32_t srcStart,
3535	int32_t srcLength) const;
3536
3537	inline int8_t
3538	doCaseCompare(int32_t start,
3539	int32_t length,
3540	const UnicodeString &srcText,
3541	int32_t srcStart,
3542	int32_t srcLength,
3543	uint32_t options) const;
3544
3545	int8_t
3546	doCaseCompare(int32_t start,
3547	int32_t length,
3548	const char16_t *srcChars,
3549	int32_t srcStart,
3550	int32_t srcLength,
3551	uint32_t options) const;
3552
3553	int32_t doIndexOf(char16_t c,
3554	int32_t start,
3555	int32_t length) const;
3556
3557	int32_t doIndexOf(UChar32 c,
3558	int32_t start,
3559	int32_t length) const;
3560
3561	int32_t doLastIndexOf(char16_t c,
3562	int32_t start,
3563	int32_t length) const;
3564
3565	int32_t doLastIndexOf(UChar32 c,
3566	int32_t start,
3567	int32_t length) const;
3568
3569	void doExtract(int32_t start,
3570	int32_t length,
3571	char16_t *dst,
3572	int32_t dstStart) const;
3573
3574	inline void doExtract(int32_t start,
3575	int32_t length,
3576	UnicodeString& target) const;
3577
3578	inline char16_t doCharAt(int32_t offset) const;
3579
3580	UnicodeString& doReplace(int32_t start,
3581	int32_t length,
3582	const UnicodeString& srcText,
3583	int32_t srcStart,
3584	int32_t srcLength);
3585
3586	UnicodeString& doReplace(int32_t start,
3587	int32_t length,
3588	const char16_t *srcChars,
3589	int32_t srcStart,
3590	int32_t srcLength);
3591
3592	UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3593	UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3594
3595	UnicodeString& doReverse(int32_t start,
3596	int32_t length);
3597
3598	// calculate hash code
3599	int32_t doHashCode(void) const;
3600
3601	// get pointer to start of array
3602	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3603	inline char16_t* getArrayStart(void);
3604	inline const char16_t* getArrayStart(void) const;
3605
3606	inline UBool hasShortLength() const;
3607	inline int32_t getShortLength() const;
3608
3609	// A UnicodeString object (not necessarily its current buffer)
3610	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3611	inline UBool isWritable() const;
3612
3613	// Is the current buffer writable?
3614	inline UBool isBufferWritable() const;
3615
3616	// None of the following does releaseArray().
3617	inline void setZeroLength();
3618	inline void setShortLength(int32_t len);
3619	inline void setLength(int32_t len);
3620	inline void setToEmpty();
3621	inline void setArray(char16_t array, int32_t len, int32_t capacity); // sets length but not flags*
3622
3623	// allocate the array; result may be the stack buffer
3624	// sets refCount to 1 if appropriate
3625	// sets fArray, fCapacity, and flags
3626	// sets length to 0
3627	// returns boolean for success or failure
3628	UBool allocate(int32_t capacity);
3629
3630	// release the array if owned
3631	void releaseArray(void);
3632
3633	// turn a bogus string into an empty one
3634	void unBogus();
3635
3636	// implements assigment operator, copy constructor, and fastCopyFrom()
3637	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3638
3639	// Copies just the fields without memory management.
3640	void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3641
3642	// Pin start and limit to acceptable values.
3643	inline void pinIndex(int32_t& start) const;
3644	inline void pinIndices(int32_t& start,
3645	int32_t& length) const;
3646
3647	#if !UCONFIG_NO_CONVERSION
3648
3649	/ Internal extract() using UConverter. /
3650	int32_t doExtract(int32_t start, int32_t length,
3651	char *dest, int32_t destCapacity,
3652	UConverter *cnv,
3653	UErrorCode &errorCode) const;
3654
3655	/*
3656	* Real constructor for converting from codepage data.
3657	* It assumes that it is called with !fRefCounted.
3658	*
3659	* If <code>codepage==0</code>, then the default converter
3660	* is used for the platform encoding.
3661	* If <code>codepage</code> is an empty string (<code>""</code>),
3662	* then a simple conversion is performed on the codepage-invariant
3663	* subset ("invariant characters") of the platform encoding. See utypes.h.
3664	*/
3665	void doCodepageCreate(const char *codepageData,
3666	int32_t dataLength,
3667	const char *codepage);
3668
3669	/*
3670	* Worker function for creating a UnicodeString from
3671	* a codepage string using a UConverter.
3672	*/
3673	void
3674	doCodepageCreate(const char *codepageData,
3675	int32_t dataLength,
3676	UConverter *converter,
3677	UErrorCode &status);
3678
3679	#endif
3680
3681	/*
3682	* This function is called when write access to the array
3683	* is necessary.
3684	*
3685	* We need to make a copy of the array if
3686	* the buffer is read-only, or
3687	* the buffer is refCounted (shared), and refCount>1, or
3688	* the buffer is too small.
3689	*
3690	* Return FALSE if memory could not be allocated.
3691	*/
3692	UBool cloneArrayIfNeeded(int32_t newCapacity = -`1`,
3693	int32_t growCapacity = -`1`,
3694	UBool doCopyArray = TRUE,
3695	int32_t **pBufferToDelete = `0`,
3696	UBool forceClone = FALSE);
3697
3698	/**
3699	* Common function for UnicodeString case mappings.
3700	* The stringCaseMapper has the same type UStringCaseMapper
3701	* as in ustr_imp.h for ustrcase_map().
3702	*/
3703	UnicodeString &
3704	caseMap(int32_t caseLocale, uint32_t options,
3705	#if !UCONFIG_NO_BREAK_ITERATION
3706	BreakIterator *iter,
3707	#endif
3708	UStringCaseMapper *stringCaseMapper);
3709
3710	// ref counting
3711	void addRef(void);
3712	int32_t removeRef(void);
3713	int32_t refCount(void) const;
3714
3715	// constants
3716	enum {
3717	/**
3718	* Size of stack buffer for short strings.
3719	* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3720	* @see UNISTR_OBJECT_SIZE
3721	*/
3722	US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-`2`)/U_SIZEOF_UCHAR,
3723	kInvalidUChar=`0xffff`, // U+FFFF returned by charAt(invalid index)
3724	kInvalidHashCode=`0`, // invalid hash code
3725	kEmptyHashCode=`1`, // hash code for empty string
3726
3727	// bit flag values for fLengthAndFlags
3728	kIsBogus=`1`, // this string is bogus, i.e., not valid or NULL
3729	kUsingStackBuffer=`2`,// using fUnion.fStackFields instead of fUnion.fFields
3730	kRefCounted=`4`, // there is a refCount field before the characters in fArray
3731	kBufferIsReadonly=`8`,// do not write to this buffer
3732	kOpenGetBuffer=`16`, // getBuffer(minCapacity) was called (is "open"),
3733	// and releaseBuffer(newLength) must be called
3734	kAllStorageFlags=`0x1f`,
3735
3736	kLengthShift=`5`, // remaining 11 bits for non-negative short length, or negative if long
3737	kLength1=`1`<<kLengthShift,
3738	kMaxShortLength=`0x3ff`, // max non-negative short length (leaves top bit 0)
3739	kLengthIsLarge=`0xffe0`, // short length < 0, real length is in fUnion.fFields.fLength
3740
3741	// combined values for convenience
3742	kShortString=kUsingStackBuffer,
3743	kLongString=kRefCounted,
3744	kReadonlyAlias=kBufferIsReadonly,
3745	kWritableAlias=`0`
3746	};
3747
3748	friend class UnicodeStringAppendable;
3749
3750	union StackBufferOrFields; // forward declaration necessary before friend declaration
3751	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3752
3753	/*
3754	* The following are all the class fields that are stored
3755	* in each UnicodeString object.
3756	* Note that UnicodeString has virtual functions,
3757	* therefore there is an implicit vtable pointer
3758	* as the first real field.
3759	* The fields should be aligned such that no padding is necessary.
3760	* On 32-bit machines, the size should be 32 bytes,
3761	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
3762	*
3763	* We use a hack to achieve this.
3764	*
3765	* With at least some compilers, each of the following is forced to
3766	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3767	* rounded up with additional padding if the fields do not already fit that requirement:
3768	* - sizeof(class UnicodeString)
3769	* - offsetof(UnicodeString, fUnion)
3770	* - sizeof(fUnion)
3771	* - sizeof(fStackFields)
3772	*
3773	* We optimize for the longest possible internal buffer for short strings.
3774	* fUnion.fStackFields begins with 2 bytes for storage flags
3775	* and the length of relatively short strings,
3776	* followed by the buffer for short string contents.
3777	* There is no padding inside fStackFields.
3778	*
3779	* Heap-allocated and aliased strings use fUnion.fFields.
3780	* Both fStackFields and fFields must begin with the same fields for flags and short length,
3781	* that is, those must have the same memory offsets inside the object,
3782	* because the flags must be inspected in order to decide which half of fUnion is being used.
3783	* We assume that the compiler does not reorder the fields.
3784	*
3785	* (Padding at the end of fFields is ok:
3786	* As long as it is no larger than fStackFields, it is not wasted space.)
3787	*
3788	* For some of the history of the UnicodeString class fields layout, see
3789	* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3790	* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3791	* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3792	*/
3793	// (implicit) vtable;*
3794	union StackBufferOrFields {
3795	// fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3796	// Each struct of the union must begin with fLengthAndFlags.
3797	struct {
3798	int16_t fLengthAndFlags; // bit fields: see constants above
3799	char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3800	} fStackFields;
3801	struct {
3802	int16_t fLengthAndFlags; // bit fields: see constants above
3803	int32_t fLength; // number of characters in fArray if >127; else undefined
3804	int32_t fCapacity; // capacity of fArray (in char16_ts)
3805	// array pointer last to minimize padding for machines with P128 data model
3806	// or pointer sizes that are not a power of 2
3807	char16_t fArray; // the Unicode data*
3808	} fFields;
3809	} fUnion;
3810	};
3811
3812	/**
3813	* Create a new UnicodeString with the concatenation of two others.
3814	*
3815	* @param s1 The first string to be copied to the new one.
3816	* @param s2 The second string to be copied to the new one, after s1.
3817	* @return UnicodeString(s1).append(s2)
3818	* @stable ICU 2.8
3819	*/
3820	U_COMMON_API UnicodeString U_EXPORT2
3821	operator+ (const UnicodeString &s1, const UnicodeString &s2);
3822
3823	//========================================
3824	// Inline members
3825	//========================================
3826
3827	//========================================
3828	// Privates
3829	//========================================
3830
3831	inline void
3832	UnicodeString::pinIndex(int32_t& start) const
3833	{
3834	// pin index
3835	if(start < `0`) {
3836	start = `0`;
3837	} else if(start > length()) {
3838	start = length();
3839	}
3840	}
3841
3842	inline void
3843	UnicodeString::pinIndices(int32_t& start,
3844	int32_t& _length) const
3845	{
3846	// pin indices
3847	int32_t len = length();
3848	if(start < `0`) {
3849	start = `0`;
3850	} else if(start > len) {
3851	start = len;
3852	}
3853	if(_length < `0`) {
3854	_length = `0`;
3855	} else if(_length > (len - start)) {
3856	_length = (len - start);
3857	}
3858	}
3859
3860	inline char16_t*
3861	UnicodeString::getArrayStart() {
3862	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3863	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3864	}
3865
3866	inline const char16_t*
3867	UnicodeString::getArrayStart() const {
3868	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3869	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3870	}
3871
3872	//========================================
3873	// Default constructor
3874	//========================================
3875
3876	inline
3877	UnicodeString::UnicodeString() {
3878	fUnion.fStackFields.fLengthAndFlags=kShortString;
3879	}
3880
3881	inline UnicodeString::UnicodeString(const std::nullptr_t /text/) {
3882	fUnion.fStackFields.fLengthAndFlags=kShortString;
3883	}
3884
3885	inline UnicodeString::UnicodeString(const std::nullptr_t /text/, int32_t /length/) {
3886	fUnion.fStackFields.fLengthAndFlags=kShortString;
3887	}
3888
3889	inline UnicodeString::UnicodeString(std::nullptr_t /buffer/, int32_t /buffLength/, int32_t /buffCapacity/) {
3890	fUnion.fStackFields.fLengthAndFlags=kShortString;
3891	}
3892
3893	//========================================
3894	// Read-only implementation methods
3895	//========================================
3896	inline UBool
3897	UnicodeString::hasShortLength() const {
3898	return fUnion.fFields.fLengthAndFlags>=`0`;
3899	}
3900
3901	inline int32_t
3902	UnicodeString::getShortLength() const {
3903	// fLengthAndFlags must be non-negative -> short length >= 0
3904	// and arithmetic or logical shift does not matter.
3905	return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3906	}
3907
3908	inline int32_t
3909	UnicodeString::length() const {
3910	return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3911	}
3912
3913	inline int32_t
3914	UnicodeString::getCapacity() const {
3915	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3916	US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3917	}
3918
3919	inline int32_t
3920	UnicodeString::hashCode() const
3921	{ return doHashCode(); }
3922
3923	inline UBool
3924	UnicodeString::isBogus() const
3925	{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3926
3927	inline UBool
3928	UnicodeString::isWritable() const
3929	{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus)); }
3930
3931	inline UBool
3932	UnicodeString::isBufferWritable() const
3933	{
3934	return (UBool)(
3935	!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
3936	(!(fUnion.fFields.fLengthAndFlags&kRefCounted) \|\| refCount()==`1`));
3937	}
3938
3939	inline const char16_t *
3940	UnicodeString::getBuffer() const {
3941	if(fUnion.fFields.fLengthAndFlags&(kIsBogus\|kOpenGetBuffer)) {
3942	return nullptr;
3943	} else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3944	return fUnion.fStackFields.fBuffer;
3945	} else {
3946	return fUnion.fFields.fArray;
3947	}
3948	}
3949
3950	//========================================
3951	// Read-only alias methods
3952	//========================================
3953	inline int8_t
3954	UnicodeString::doCompare(int32_t start,
3955	int32_t thisLength,
3956	const UnicodeString& srcText,
3957	int32_t srcStart,
3958	int32_t srcLength) const
3959	{
3960	if(srcText.isBogus()) {
3961	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3962	} else {
3963	srcText.pinIndices(srcStart, srcLength);
3964	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3965	}
3966	}
3967
3968	inline UBool
3969	UnicodeString::operator== (const UnicodeString& text) const
3970	{
3971	if(isBogus()) {
3972	return text.isBogus();
3973	} else {
3974	int32_t len = length(), textLength = text.length();
3975	return !text.isBogus() && len == textLength && doEquals(text, len);
3976	}
3977	}
3978
3979	inline UBool
3980	UnicodeString::operator!= (const UnicodeString& text) const
3981	{ return (! operator==(text)); }
3982
3983	inline UBool
3984	UnicodeString::operator> (const UnicodeString& text) const
3985	{ return doCompare(`0`, length(), text, `0`, text.length()) == `1`; }
3986
3987	inline UBool
3988	UnicodeString::operator< (const UnicodeString& text) const
3989	{ return doCompare(`0`, length(), text, `0`, text.length()) == -`1`; }
3990
3991	inline UBool
3992	UnicodeString::operator>= (const UnicodeString& text) const
3993	{ return doCompare(`0`, length(), text, `0`, text.length()) != -`1`; }
3994
3995	inline UBool
3996	UnicodeString::operator<= (const UnicodeString& text) const
3997	{ return doCompare(`0`, length(), text, `0`, text.length()) != `1`; }
3998
3999	inline int8_t
4000	UnicodeString::compare(const UnicodeString& text) const
4001	{ return doCompare(`0`, length(), text, `0`, text.length()); }
4002
4003	inline int8_t
4004	UnicodeString::compare(int32_t start,
4005	int32_t _length,
4006	const UnicodeString& srcText) const
4007	{ return doCompare(start, _length, srcText, `0`, srcText.length()); }
4008
4009	inline int8_t
4010	UnicodeString::compare(ConstChar16Ptr srcChars,
4011	int32_t srcLength) const
4012	{ return doCompare(`0`, length(), srcChars, `0`, srcLength); }
4013
4014	inline int8_t
4015	UnicodeString::compare(int32_t start,
4016	int32_t _length,
4017	const UnicodeString& srcText,
4018	int32_t srcStart,
4019	int32_t srcLength) const
4020	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4021
4022	inline int8_t
4023	UnicodeString::compare(int32_t start,
4024	int32_t _length,
4025	const char16_t srcChars) const*
4026	{ return doCompare(start, _length, srcChars, `0`, _length); }
4027
4028	inline int8_t
4029	UnicodeString::compare(int32_t start,
4030	int32_t _length,
4031	const char16_t *srcChars,
4032	int32_t srcStart,
4033	int32_t srcLength) const
4034	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4035
4036	inline int8_t
4037	UnicodeString::compareBetween(int32_t start,
4038	int32_t limit,
4039	const UnicodeString& srcText,
4040	int32_t srcStart,
4041	int32_t srcLimit) const
4042	{ return doCompare(start, limit - start,
4043	srcText, srcStart, srcLimit - srcStart); }
4044
4045	inline int8_t
4046	UnicodeString::doCompareCodePointOrder(int32_t start,
4047	int32_t thisLength,
4048	const UnicodeString& srcText,
4049	int32_t srcStart,
4050	int32_t srcLength) const
4051	{
4052	if(srcText.isBogus()) {
4053	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4054	} else {
4055	srcText.pinIndices(srcStart, srcLength);
4056	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4057	}
4058	}
4059
4060	inline int8_t
4061	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4062	{ return doCompareCodePointOrder(`0`, length(), text, `0`, text.length()); }
4063
4064	inline int8_t
4065	UnicodeString::compareCodePointOrder(int32_t start,
4066	int32_t _length,
4067	const UnicodeString& srcText) const
4068	{ return doCompareCodePointOrder(start, _length, srcText, `0`, srcText.length()); }
4069
4070	inline int8_t
4071	UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4072	int32_t srcLength) const
4073	{ return doCompareCodePointOrder(`0`, length(), srcChars, `0`, srcLength); }
4074
4075	inline int8_t
4076	UnicodeString::compareCodePointOrder(int32_t start,
4077	int32_t _length,
4078	const UnicodeString& srcText,
4079	int32_t srcStart,
4080	int32_t srcLength) const
4081	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4082
4083	inline int8_t
4084	UnicodeString::compareCodePointOrder(int32_t start,
4085	int32_t _length,
4086	const char16_t srcChars) const*
4087	{ return doCompareCodePointOrder(start, _length, srcChars, `0`, _length); }
4088
4089	inline int8_t
4090	UnicodeString::compareCodePointOrder(int32_t start,
4091	int32_t _length,
4092	const char16_t *srcChars,
4093	int32_t srcStart,
4094	int32_t srcLength) const
4095	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4096
4097	inline int8_t
4098	UnicodeString::compareCodePointOrderBetween(int32_t start,
4099	int32_t limit,
4100	const UnicodeString& srcText,
4101	int32_t srcStart,
4102	int32_t srcLimit) const
4103	{ return doCompareCodePointOrder(start, limit - start,
4104	srcText, srcStart, srcLimit - srcStart); }
4105
4106	inline int8_t
4107	UnicodeString::doCaseCompare(int32_t start,
4108	int32_t thisLength,
4109	const UnicodeString &srcText,
4110	int32_t srcStart,
4111	int32_t srcLength,
4112	uint32_t options) const
4113	{
4114	if(srcText.isBogus()) {
4115	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4116	} else {
4117	srcText.pinIndices(srcStart, srcLength);
4118	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4119	}
4120	}
4121
4122	inline int8_t
4123	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4124	return doCaseCompare(`0`, length(), text, `0`, text.length(), options);
4125	}
4126
4127	inline int8_t
4128	UnicodeString::caseCompare(int32_t start,
4129	int32_t _length,
4130	const UnicodeString &srcText,
4131	uint32_t options) const {
4132	return doCaseCompare(start, _length, srcText, `0`, srcText.length(), options);
4133	}
4134
4135	inline int8_t
4136	UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4137	int32_t srcLength,
4138	uint32_t options) const {
4139	return doCaseCompare(`0`, length(), srcChars, `0`, srcLength, options);
4140	}
4141
4142	inline int8_t
4143	UnicodeString::caseCompare(int32_t start,
4144	int32_t _length,
4145	const UnicodeString &srcText,
4146	int32_t srcStart,
4147	int32_t srcLength,
4148	uint32_t options) const {
4149	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4150	}
4151
4152	inline int8_t
4153	UnicodeString::caseCompare(int32_t start,
4154	int32_t _length,
4155	const char16_t *srcChars,
4156	uint32_t options) const {
4157	return doCaseCompare(start, _length, srcChars, `0`, _length, options);
4158	}
4159
4160	inline int8_t
4161	UnicodeString::caseCompare(int32_t start,
4162	int32_t _length,
4163	const char16_t *srcChars,
4164	int32_t srcStart,
4165	int32_t srcLength,
4166	uint32_t options) const {
4167	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4168	}
4169
4170	inline int8_t
4171	UnicodeString::caseCompareBetween(int32_t start,
4172	int32_t limit,
4173	const UnicodeString &srcText,
4174	int32_t srcStart,
4175	int32_t srcLimit,
4176	uint32_t options) const {
4177	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4178	}
4179
4180	inline int32_t
4181	UnicodeString::indexOf(const UnicodeString& srcText,
4182	int32_t srcStart,
4183	int32_t srcLength,
4184	int32_t start,
4185	int32_t _length) const
4186	{
4187	if(!srcText.isBogus()) {
4188	srcText.pinIndices(srcStart, srcLength);
4189	if(srcLength > `0`) {
4190	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4191	}
4192	}
4193	return -`1`;
4194	}
4195
4196	inline int32_t
4197	UnicodeString::indexOf(const UnicodeString& text) const
4198	{ return indexOf(text, `0`, text.length(), `0`, length()); }
4199
4200	inline int32_t
4201	UnicodeString::indexOf(const UnicodeString& text,
4202	int32_t start) const {
4203	pinIndex(start);
4204	return indexOf(text, `0`, text.length(), start, length() - start);
4205	}
4206
4207	inline int32_t
4208	UnicodeString::indexOf(const UnicodeString& text,
4209	int32_t start,
4210	int32_t _length) const
4211	{ return indexOf(text, `0`, text.length(), start, _length); }
4212
4213	inline int32_t
4214	UnicodeString::indexOf(const char16_t *srcChars,
4215	int32_t srcLength,
4216	int32_t start) const {
4217	pinIndex(start);
4218	return indexOf(srcChars, `0`, srcLength, start, length() - start);
4219	}
4220
4221	inline int32_t
4222	UnicodeString::indexOf(ConstChar16Ptr srcChars,
4223	int32_t srcLength,
4224	int32_t start,
4225	int32_t _length) const
4226	{ return indexOf(srcChars, `0`, srcLength, start, _length); }
4227
4228	inline int32_t
4229	UnicodeString::indexOf(char16_t c,
4230	int32_t start,
4231	int32_t _length) const
4232	{ return doIndexOf(c, start, _length); }
4233
4234	inline int32_t
4235	UnicodeString::indexOf(UChar32 c,
4236	int32_t start,
4237	int32_t _length) const
4238	{ return doIndexOf(c, start, _length); }
4239
4240	inline int32_t
4241	UnicodeString::indexOf(char16_t c) const
4242	{ return doIndexOf(c, `0`, length()); }
4243
4244	inline int32_t
4245	UnicodeString::indexOf(UChar32 c) const
4246	{ return indexOf(c, `0`, length()); }
4247
4248	inline int32_t
4249	UnicodeString::indexOf(char16_t c,
4250	int32_t start) const {
4251	pinIndex(start);
4252	return doIndexOf(c, start, length() - start);
4253	}
4254
4255	inline int32_t
4256	UnicodeString::indexOf(UChar32 c,
4257	int32_t start) const {
4258	pinIndex(start);
4259	return indexOf(c, start, length() - start);
4260	}
4261
4262	inline int32_t
4263	UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4264	int32_t srcLength,
4265	int32_t start,
4266	int32_t _length) const
4267	{ return lastIndexOf(srcChars, `0`, srcLength, start, _length); }
4268
4269	inline int32_t
4270	UnicodeString::lastIndexOf(const char16_t *srcChars,
4271	int32_t srcLength,
4272	int32_t start) const {
4273	pinIndex(start);
4274	return lastIndexOf(srcChars, `0`, srcLength, start, length() - start);
4275	}
4276
4277	inline int32_t
4278	UnicodeString::lastIndexOf(const UnicodeString& srcText,
4279	int32_t srcStart,
4280	int32_t srcLength,
4281	int32_t start,
4282	int32_t _length) const
4283	{
4284	if(!srcText.isBogus()) {
4285	srcText.pinIndices(srcStart, srcLength);
4286	if(srcLength > `0`) {
4287	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4288	}
4289	}
4290	return -`1`;
4291	}
4292
4293	inline int32_t
4294	UnicodeString::lastIndexOf(const UnicodeString& text,
4295	int32_t start,
4296	int32_t _length) const
4297	{ return lastIndexOf(text, `0`, text.length(), start, _length); }
4298
4299	inline int32_t
4300	UnicodeString::lastIndexOf(const UnicodeString& text,
4301	int32_t start) const {
4302	pinIndex(start);
4303	return lastIndexOf(text, `0`, text.length(), start, length() - start);
4304	}
4305
4306	inline int32_t
4307	UnicodeString::lastIndexOf(const UnicodeString& text) const
4308	{ return lastIndexOf(text, `0`, text.length(), `0`, length()); }
4309
4310	inline int32_t
4311	UnicodeString::lastIndexOf(char16_t c,
4312	int32_t start,
4313	int32_t _length) const
4314	{ return doLastIndexOf(c, start, _length); }
4315
4316	inline int32_t
4317	UnicodeString::lastIndexOf(UChar32 c,
4318	int32_t start,
4319	int32_t _length) const {
4320	return doLastIndexOf(c, start, _length);
4321	}
4322
4323	inline int32_t
4324	UnicodeString::lastIndexOf(char16_t c) const
4325	{ return doLastIndexOf(c, `0`, length()); }
4326
4327	inline int32_t
4328	UnicodeString::lastIndexOf(UChar32 c) const {
4329	return lastIndexOf(c, `0`, length());
4330	}
4331
4332	inline int32_t
4333	UnicodeString::lastIndexOf(char16_t c,
4334	int32_t start) const {
4335	pinIndex(start);
4336	return doLastIndexOf(c, start, length() - start);
4337	}
4338
4339	inline int32_t
4340	UnicodeString::lastIndexOf(UChar32 c,
4341	int32_t start) const {
4342	pinIndex(start);
4343	return lastIndexOf(c, start, length() - start);
4344	}
4345
4346	inline UBool
4347	UnicodeString::startsWith(const UnicodeString& text) const
4348	{ return compare(`0`, text.length(), text, `0`, text.length()) == `0`; }
4349
4350	inline UBool
4351	UnicodeString::startsWith(const UnicodeString& srcText,
4352	int32_t srcStart,
4353	int32_t srcLength) const
4354	{ return doCompare(`0`, srcLength, srcText, srcStart, srcLength) == `0`; }
4355
4356	inline UBool
4357	UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4358	if(srcLength < `0`) {
4359	srcLength = u_strlen(toUCharPtr(srcChars));
4360	}
4361	return doCompare(`0`, srcLength, srcChars, `0`, srcLength) == `0`;
4362	}
4363
4364	inline UBool
4365	UnicodeString::startsWith(const char16_t srcChars, int32_t srcStart, int32_t srcLength) const* {
4366	if(srcLength < `0`) {
4367	srcLength = u_strlen(toUCharPtr(srcChars));
4368	}
4369	return doCompare(`0`, srcLength, srcChars, srcStart, srcLength) == `0`;
4370	}
4371
4372	inline UBool
4373	UnicodeString::endsWith(const UnicodeString& text) const
4374	{ return doCompare(length() - text.length(), text.length(),
4375	text, `0`, text.length()) == `0`; }
4376
4377	inline UBool
4378	UnicodeString::endsWith(const UnicodeString& srcText,
4379	int32_t srcStart,
4380	int32_t srcLength) const {
4381	srcText.pinIndices(srcStart, srcLength);
4382	return doCompare(length() - srcLength, srcLength,
4383	srcText, srcStart, srcLength) == `0`;
4384	}
4385
4386	inline UBool
4387	UnicodeString::endsWith(ConstChar16Ptr srcChars,
4388	int32_t srcLength) const {
4389	if(srcLength < `0`) {
4390	srcLength = u_strlen(toUCharPtr(srcChars));
4391	}
4392	return doCompare(length() - srcLength, srcLength,
4393	srcChars, `0`, srcLength) == `0`;
4394	}
4395
4396	inline UBool
4397	UnicodeString::endsWith(const char16_t *srcChars,
4398	int32_t srcStart,
4399	int32_t srcLength) const {
4400	if(srcLength < `0`) {
4401	srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4402	}
4403	return doCompare(length() - srcLength, srcLength,
4404	srcChars, srcStart, srcLength) == `0`;
4405	}
4406
4407	//========================================
4408	// replace
4409	//========================================
4410	inline UnicodeString&
4411	UnicodeString::replace(int32_t start,
4412	int32_t _length,
4413	const UnicodeString& srcText)
4414	{ return doReplace(start, _length, srcText, `0`, srcText.length()); }
4415
4416	inline UnicodeString&
4417	UnicodeString::replace(int32_t start,
4418	int32_t _length,
4419	const UnicodeString& srcText,
4420	int32_t srcStart,
4421	int32_t srcLength)
4422	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4423
4424	inline UnicodeString&
4425	UnicodeString::replace(int32_t start,
4426	int32_t _length,
4427	ConstChar16Ptr srcChars,
4428	int32_t srcLength)
4429	{ return doReplace(start, _length, srcChars, `0`, srcLength); }
4430
4431	inline UnicodeString&
4432	UnicodeString::replace(int32_t start,
4433	int32_t _length,
4434	const char16_t *srcChars,
4435	int32_t srcStart,
4436	int32_t srcLength)
4437	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4438
4439	inline UnicodeString&
4440	UnicodeString::replace(int32_t start,
4441	int32_t _length,
4442	char16_t srcChar)
4443	{ return doReplace(start, _length, &srcChar, `0`, `1`); }
4444
4445	inline UnicodeString&
4446	UnicodeString::replaceBetween(int32_t start,
4447	int32_t limit,
4448	const UnicodeString& srcText)
4449	{ return doReplace(start, limit - start, srcText, `0`, srcText.length()); }
4450
4451	inline UnicodeString&
4452	UnicodeString::replaceBetween(int32_t start,
4453	int32_t limit,
4454	const UnicodeString& srcText,
4455	int32_t srcStart,
4456	int32_t srcLimit)
4457	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4458
4459	inline UnicodeString&
4460	UnicodeString::findAndReplace(const UnicodeString& oldText,
4461	const UnicodeString& newText)
4462	{ return findAndReplace(`0`, length(), oldText, `0`, oldText.length(),
4463	newText, `0`, newText.length()); }
4464
4465	inline UnicodeString&
4466	UnicodeString::findAndReplace(int32_t start,
4467	int32_t _length,
4468	const UnicodeString& oldText,
4469	const UnicodeString& newText)
4470	{ return findAndReplace(start, _length, oldText, `0`, oldText.length(),
4471	newText, `0`, newText.length()); }
4472
4473	// ============================
4474	// extract
4475	// ============================
4476	inline void
4477	UnicodeString::doExtract(int32_t start,
4478	int32_t _length,
4479	UnicodeString& target) const
4480	{ target.replace(`0`, target.length(), *this, start, _length); }
4481
4482	inline void
4483	UnicodeString::extract(int32_t start,
4484	int32_t _length,
4485	Char16Ptr target,
4486	int32_t targetStart) const
4487	{ doExtract(start, _length, target, targetStart); }
4488
4489	inline void
4490	UnicodeString::extract(int32_t start,
4491	int32_t _length,
4492	UnicodeString& target) const
4493	{ doExtract(start, _length, target); }
4494
4495	#if !UCONFIG_NO_CONVERSION
4496
4497	inline int32_t
4498	UnicodeString::extract(int32_t start,
4499	int32_t _length,
4500	char *dst,
4501	const char codepage) const*
4502
4503	{
4504	// This dstSize value will be checked explicitly
4505	return extract(start, _length, dst, dst!=`0` ? `0xffffffff` : `0`, codepage);
4506	}
4507
4508	#endif
4509
4510	inline void
4511	UnicodeString::extractBetween(int32_t start,
4512	int32_t limit,
4513	char16_t *dst,
4514	int32_t dstStart) const {
4515	pinIndex(start);
4516	pinIndex(limit);
4517	doExtract(start, limit - start, dst, dstStart);
4518	}
4519
4520	inline UnicodeString
4521	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4522	return tempSubString(start, limit - start);
4523	}
4524
4525	inline char16_t
4526	UnicodeString::doCharAt(int32_t offset) const
4527	{
4528	if((uint32_t)offset < (uint32_t)length()) {
4529	return getArrayStart()[offset];
4530	} else {
4531	return kInvalidUChar;
4532	}
4533	}
4534
4535	inline char16_t
4536	UnicodeString::charAt(int32_t offset) const
4537	{ return doCharAt(offset); }
4538
4539	inline char16_t
4540	UnicodeString::operator[] (int32_t offset) const
4541	{ return doCharAt(offset); }
4542
4543	inline UBool
4544	UnicodeString::isEmpty() const {
4545	// Arithmetic or logical right shift does not matter: only testing for 0.
4546	return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == `0`;
4547	}
4548
4549	//========================================
4550	// Write implementation methods
4551	//========================================
4552	inline void
4553	UnicodeString::setZeroLength() {
4554	fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4555	}
4556
4557	inline void
4558	UnicodeString::setShortLength(int32_t len) {
4559	// requires 0 <= len <= kMaxShortLength
4560	fUnion.fFields.fLengthAndFlags =
4561	(int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) \| (len << kLengthShift));
4562	}
4563
4564	inline void
4565	UnicodeString::setLength(int32_t len) {
4566	if(len <= kMaxShortLength) {
4567	setShortLength(len);
4568	} else {
4569	fUnion.fFields.fLengthAndFlags \|= kLengthIsLarge;
4570	fUnion.fFields.fLength = len;
4571	}
4572	}
4573
4574	inline void
4575	UnicodeString::setToEmpty() {
4576	fUnion.fFields.fLengthAndFlags = kShortString;
4577	}
4578
4579	inline void
4580	UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4581	setLength(len);
4582	fUnion.fFields.fArray = array;
4583	fUnion.fFields.fCapacity = capacity;
4584	}
4585
4586	inline UnicodeString&
4587	UnicodeString::operator= (char16_t ch)
4588	{ return doReplace(`0`, length(), &ch, `0`, `1`); }
4589
4590	inline UnicodeString&
4591	UnicodeString::operator= (UChar32 ch)
4592	{ return replace(`0`, length(), ch); }
4593
4594	inline UnicodeString&
4595	UnicodeString::setTo(const UnicodeString& srcText,
4596	int32_t srcStart,
4597	int32_t srcLength)
4598	{
4599	unBogus();
4600	return doReplace(`0`, length(), srcText, srcStart, srcLength);
4601	}
4602
4603	inline UnicodeString&
4604	UnicodeString::setTo(const UnicodeString& srcText,
4605	int32_t srcStart)
4606	{
4607	unBogus();
4608	srcText.pinIndex(srcStart);
4609	return doReplace(`0`, length(), srcText, srcStart, srcText.length() - srcStart);
4610	}
4611
4612	inline UnicodeString&
4613	UnicodeString::setTo(const UnicodeString& srcText)
4614	{
4615	return copyFrom(srcText);
4616	}
4617
4618	inline UnicodeString&
4619	UnicodeString::setTo(const char16_t *srcChars,
4620	int32_t srcLength)
4621	{
4622	unBogus();
4623	return doReplace(`0`, length(), srcChars, `0`, srcLength);
4624	}
4625
4626	inline UnicodeString&
4627	UnicodeString::setTo(char16_t srcChar)
4628	{
4629	unBogus();
4630	return doReplace(`0`, length(), &srcChar, `0`, `1`);
4631	}
4632
4633	inline UnicodeString&
4634	UnicodeString::setTo(UChar32 srcChar)
4635	{
4636	unBogus();
4637	return replace(`0`, length(), srcChar);
4638	}
4639
4640	inline UnicodeString&
4641	UnicodeString::append(const UnicodeString& srcText,
4642	int32_t srcStart,
4643	int32_t srcLength)
4644	{ return doAppend(srcText, srcStart, srcLength); }
4645
4646	inline UnicodeString&
4647	UnicodeString::append(const UnicodeString& srcText)
4648	{ return doAppend(srcText, `0`, srcText.length()); }
4649
4650	inline UnicodeString&
4651	UnicodeString::append(const char16_t *srcChars,
4652	int32_t srcStart,
4653	int32_t srcLength)
4654	{ return doAppend(srcChars, srcStart, srcLength); }
4655
4656	inline UnicodeString&
4657	UnicodeString::append(ConstChar16Ptr srcChars,
4658	int32_t srcLength)
4659	{ return doAppend(srcChars, `0`, srcLength); }
4660
4661	inline UnicodeString&
4662	UnicodeString::append(char16_t srcChar)
4663	{ return doAppend(&srcChar, `0`, `1`); }
4664
4665	inline UnicodeString&
4666	UnicodeString::operator+= (char16_t ch)
4667	{ return doAppend(&ch, `0`, `1`); }
4668
4669	inline UnicodeString&
4670	UnicodeString::operator+= (UChar32 ch) {
4671	return append(ch);
4672	}
4673
4674	inline UnicodeString&
4675	UnicodeString::operator+= (const UnicodeString& srcText)
4676	{ return doAppend(srcText, `0`, srcText.length()); }
4677
4678	inline UnicodeString&
4679	UnicodeString::insert(int32_t start,
4680	const UnicodeString& srcText,
4681	int32_t srcStart,
4682	int32_t srcLength)
4683	{ return doReplace(start, `0`, srcText, srcStart, srcLength); }
4684
4685	inline UnicodeString&
4686	UnicodeString::insert(int32_t start,
4687	const UnicodeString& srcText)
4688	{ return doReplace(start, `0`, srcText, `0`, srcText.length()); }
4689
4690	inline UnicodeString&
4691	UnicodeString::insert(int32_t start,
4692	const char16_t *srcChars,
4693	int32_t srcStart,
4694	int32_t srcLength)
4695	{ return doReplace(start, `0`, srcChars, srcStart, srcLength); }
4696
4697	inline UnicodeString&
4698	UnicodeString::insert(int32_t start,
4699	ConstChar16Ptr srcChars,
4700	int32_t srcLength)
4701	{ return doReplace(start, `0`, srcChars, `0`, srcLength); }
4702
4703	inline UnicodeString&
4704	UnicodeString::insert(int32_t start,
4705	char16_t srcChar)
4706	{ return doReplace(start, `0`, &srcChar, `0`, `1`); }
4707
4708	inline UnicodeString&
4709	UnicodeString::insert(int32_t start,
4710	UChar32 srcChar)
4711	{ return replace(start, `0`, srcChar); }
4712
4713
4714	inline UnicodeString&
4715	UnicodeString::remove()
4716	{
4717	// remove() of a bogus string makes the string empty and non-bogus
4718	if(isBogus()) {
4719	setToEmpty();
4720	} else {
4721	setZeroLength();
4722	}
4723	return *this;
4724	}
4725
4726	inline UnicodeString&
4727	UnicodeString::remove(int32_t start,
4728	int32_t _length)
4729	{
4730	if(start <= `0` && _length == INT32_MAX) {
4731	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4732	return remove();
4733	}
4734	return doReplace(start, _length, NULL, `0`, `0`);
4735	}
4736
4737	inline UnicodeString&
4738	UnicodeString::removeBetween(int32_t start,
4739	int32_t limit)
4740	{ return doReplace(start, limit - start, NULL, `0`, `0`); }
4741
4742	inline UnicodeString &
4743	UnicodeString::retainBetween(int32_t start, int32_t limit) {
4744	truncate(limit);
4745	return doReplace(`0`, start, NULL, `0`, `0`);
4746	}
4747
4748	inline UBool
4749	UnicodeString::truncate(int32_t targetLength)
4750	{
4751	if(isBogus() && targetLength == `0`) {
4752	// truncate(0) of a bogus string makes the string empty and non-bogus
4753	unBogus();
4754	return FALSE;
4755	} else if((uint32_t)targetLength < (uint32_t)length()) {
4756	setLength(targetLength);
4757	return TRUE;
4758	} else {
4759	return FALSE;
4760	}
4761	}
4762
4763	inline UnicodeString&
4764	UnicodeString::reverse()
4765	{ return doReverse(`0`, length()); }
4766
4767	inline UnicodeString&
4768	UnicodeString::reverse(int32_t start,
4769	int32_t _length)
4770	{ return doReverse(start, _length); }
4771
4772	U_NAMESPACE_END
4773
4774	#endif
4775

Browse the source code of include/unicode/unistr.h