unistr.h source code [include/unicode/unistr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File unistr.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 09/25/98 stephen Creation.
15	* 11/11/98 stephen Changed per 11/9 code review.
16	* 04/20/99 stephen Overhauled per 4/16 code review.
17	* 11/18/99 aliu Made to inherit from Replaceable. Added method
18	* handleReplaceBetween(); other methods unchanged.
19	* 06/25/01 grhoten Remove dependency on iostream.
20	******************************************************************************
21	*/
22
23	#ifndef UNISTR_H
24	#define UNISTR_H
25
26	/**
27	* \file
28	* \brief C++ API: Unicode String
29	*/
30
31	#include <cstddef>
32	#include "unicode/utypes.h"
33	#include "unicode/char16ptr.h"
34	#include "unicode/rep.h"
35	#include "unicode/std_string.h"
36	#include "unicode/stringpiece.h"
37	#include "unicode/bytestream.h"
38
39	struct UConverter; // unicode/ucnv.h
40
41	#ifndef USTRING_H
42	/**
43	* \ingroup ustring_ustrlen
44	*/
45	U_STABLE int32_t U_EXPORT2
46	u_strlen(const UChar *s);
47	#endif
48
49	U_NAMESPACE_BEGIN
50
51	#if !UCONFIG_NO_BREAK_ITERATION
52	class BreakIterator; // unicode/brkiter.h
53	#endif
54	class Edits;
55
56	U_NAMESPACE_END
57
58	// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
59	/**
60	* Internal string case mapping function type.
61	* All error checking must be done.
62	* src and dest must not overlap.
63	* @internal
64	*/
65	typedef int32_t U_CALLCONV
66	UStringCaseMapper(int32_t caseLocale, uint32_t options,
67	#if !UCONFIG_NO_BREAK_ITERATION
68	icu::BreakIterator *iter,
69	#endif
70	char16_t *dest, int32_t destCapacity,
71	const char16_t *src, int32_t srcLength,
72	icu::Edits *edits,
73	UErrorCode &errorCode);
74
75	U_NAMESPACE_BEGIN
76
77	class Locale; // unicode/locid.h
78	class StringCharacterIterator;
79	class UnicodeStringAppendable; // unicode/appendable.h
80
81	/ The <iostream> include has been moved to unicode/ustream.h /
82
83	/**
84	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
85	* which constructs a Unicode string from an invariant-character char * string.
86	* About invariant characters see utypes.h.
87	* This constructor has no runtime dependency on conversion code and is
88	* therefore recommended over ones taking a charset name string
89	* (where the empty string "" indicates invariant-character conversion).
90	*
91	* @stable ICU 3.2
92	*/
93	#define US_INV icu::UnicodeString::kInvariant
94
95	/**
96	* Unicode String literals in C++.
97	*
98	* Note: these macros are not recommended for new code.
99	* Prior to the availability of C++11 and u"unicode string literals",
100	* these macros were provided for portability and efficiency when
101	* initializing UnicodeStrings from literals.
102	*
103	* They work only for strings that contain "invariant characters", i.e.,
104	* only latin letters, digits, and some punctuation.
105	* See utypes.h for details.
106	*
107	* The string parameter must be a C string literal.
108	* The length of the string, not including the terminating
109	* <code>NUL</code>, must be specified as a constant.
110	* @stable ICU 2.0
111	*/
112	#if !U_CHAR16_IS_TYPEDEF
113	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
114	#else
115	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
116	#endif
117
118	/**
119	* Unicode String literals in C++.
120	* Dependent on the platform properties, different UnicodeString
121	* constructors should be used to create a UnicodeString object from
122	* a string literal.
123	* The macros are defined for improved performance.
124	* They work only for strings that contain "invariant characters", i.e.,
125	* only latin letters, digits, and some punctuation.
126	* See utypes.h for details.
127	*
128	* The string parameter must be a C string literal.
129	* @stable ICU 2.0
130	*/
131	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
132
133	/**
134	* \def UNISTR_FROM_CHAR_EXPLICIT
135	* This can be defined to be empty or "explicit".
136	* If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
137	* constructors are marked as explicit, preventing their inadvertent use.
138	* @stable ICU 49
139	*/
140	#ifndef UNISTR_FROM_CHAR_EXPLICIT
141	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
142	// Auto-"explicit" in ICU library code.
143	# define UNISTR_FROM_CHAR_EXPLICIT explicit
144	# else
145	// Empty by default for source code compatibility.
146	# define UNISTR_FROM_CHAR_EXPLICIT
147	# endif
148	#endif
149
150	/**
151	* \def UNISTR_FROM_STRING_EXPLICIT
152	* This can be defined to be empty or "explicit".
153	* If explicit, then the UnicodeString(const char ) and UnicodeString(const char16_t )
154	* constructors are marked as explicit, preventing their inadvertent use.
155	*
156	* In particular, this helps prevent accidentally depending on ICU conversion code
157	* by passing a string literal into an API with a const UnicodeString & parameter.
158	* @stable ICU 49
159	*/
160	#ifndef UNISTR_FROM_STRING_EXPLICIT
161	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
162	// Auto-"explicit" in ICU library code.
163	# define UNISTR_FROM_STRING_EXPLICIT explicit
164	# else
165	// Empty by default for source code compatibility.
166	# define UNISTR_FROM_STRING_EXPLICIT
167	# endif
168	#endif
169
170	/**
171	* \def UNISTR_OBJECT_SIZE
172	* Desired sizeof(UnicodeString) in bytes.
173	* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
174	* The object size may want to be a multiple of 16 bytes,
175	* which is a common granularity for heap allocation.
176	*
177	* Any space inside the object beyond sizeof(vtable pointer) + 2
178	* is available for storing short strings inside the object.
179	* The bigger the object, the longer a string that can be stored inside the object,
180	* without additional heap allocation.
181	*
182	* Depending on a platform's pointer size, pointer alignment requirements,
183	* and struct padding, the compiler will usually round up sizeof(UnicodeString)
184	* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
185	* to hold the fields for heap-allocated strings.
186	* Such a minimum size also ensures that the object is easily large enough
187	* to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
188	*
189	* sizeof(UnicodeString) >= 48 should work for all known platforms.
190	*
191	* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
192	* sizeof(UnicodeString) = 64 would leave space for
193	* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
194	* char16_ts stored inside the object.
195	*
196	* The minimum object size on a 64-bit machine would be
197	* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
198	* and the internal buffer would hold up to 11 char16_ts in that case.
199	*
200	* @see U16_MAX_LENGTH
201	* @stable ICU 56
202	*/
203	#ifndef UNISTR_OBJECT_SIZE
204	# define UNISTR_OBJECT_SIZE 64
205	#endif
206
207	/**
208	* UnicodeString is a string class that stores Unicode characters directly and provides
209	* similar functionality as the Java String and StringBuffer/StringBuilder classes.
210	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
211	*
212	* A UnicodeString may also "alias" an external array of characters
213	* (that is, point to it, rather than own the array)
214	* whose lifetime must then at least match the lifetime of the aliasing object.
215	* This aliasing may be preserved when returning a UnicodeString by value,
216	* depending on the compiler and the function implementation,
217	* via Return Value Optimization (RVO) or the move assignment operator.
218	* (However, the copy assignment operator does not preserve aliasing.)
219	* For details see the description of storage models at the end of the class API docs
220	* and in the User Guide chapter linked from there.
221	*
222	* The UnicodeString class is not suitable for subclassing.
223	*
224	* <p>For an overview of Unicode strings in C and C++ see the
225	* <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
226	*
227	* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
228	* A Unicode character may be stored with either one code unit
229	* (the most common case) or with a matched pair of special code units
230	* ("surrogates"). The data type for code units is char16_t.
231	* For single-character handling, a Unicode character code <em>point</em> is a value
232	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
233	*
234	* <p>Indexes and offsets into and lengths of strings always count code units, not code points.
235	* This is the same as with multi-byte char* strings in traditional string handling.
236	* Operations on partial strings typically do not test for code point boundaries.
237	* If necessary, the user needs to take care of such boundaries by testing for the code unit
238	* values or by using functions like
239	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
240	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
241	*
242	* UnicodeString methods are more lenient with regard to input parameter values
243	* than other ICU APIs. In particular:
244	* - If indexes are out of bounds for a UnicodeString object
245	* (<0 or >length()) then they are "pinned" to the nearest boundary.
246	* - If primitive string pointer values (e.g., const char16_t * or char *)
247	* for input strings are NULL, then those input string parameters are treated
248	* as if they pointed to an empty string.
249	* However, this is <em>not</em> the case for char * parameters for charset names
250	* or other IDs.
251	* - Most UnicodeString methods do not take a UErrorCode parameter because
252	* there are usually very few opportunities for failure other than a shortage
253	* of memory, error codes in low-level C++ string methods would be inconvenient,
254	* and the error code as the last parameter (ICU convention) would prevent
255	* the use of default parameter values.
256	* Instead, such methods set the UnicodeString into a "bogus" state
257	* (see isBogus()) if an error occurs.
258	*
259	* In string comparisons, two UnicodeString objects that are both "bogus"
260	* compare equal (to be transitive and prevent endless loops in sorting),
261	* and a "bogus" string compares less than any non-"bogus" one.
262	*
263	* Const UnicodeString methods are thread-safe. Multiple threads can use
264	* const methods on the same UnicodeString object simultaneously,
265	* but non-const methods must not be called concurrently (in multiple threads)
266	* with any other (const or non-const) methods.
267	*
268	* Similarly, const UnicodeString & parameters are thread-safe.
269	* One object may be passed in as such a parameter concurrently in multiple threads.
270	* This includes the const UnicodeString & parameters for
271	* copy construction, assignment, and cloning.
272	*
273	* <p>UnicodeString uses several storage methods.
274	* String contents can be stored inside the UnicodeString object itself,
275	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
276	* Most of this is done transparently, but careful aliasing in particular provides
277	* significant performance improvements.
278	* Also, the internal buffer is accessible via special functions.
279	* For details see the
280	* <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
281	*
282	* @see utf.h
283	* @see CharacterIterator
284	* @stable ICU 2.0
285	*/
286	class U_COMMON_API UnicodeString : public Replaceable
287	{
288	public:
289
290	/**
291	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
292	* which constructs a Unicode string from an invariant-character char * string.
293	* Use the macro US_INV instead of the full qualification for this value.
294	*
295	* @see US_INV
296	* @stable ICU 3.2
297	*/
298	enum EInvariant {
299	/**
300	* @see EInvariant
301	* @stable ICU 3.2
302	*/
303	kInvariant
304	};
305
306	//========================================
307	// Read-only operations
308	//========================================
309
310	/ Comparison - bitwise only - for international comparison use collation /
311
312	/**
313	* Equality operator. Performs only bitwise comparison.
314	* @param text The UnicodeString to compare to this one.
315	* @return TRUE if <TT>text</TT> contains the same characters as this one,
316	* FALSE otherwise.
317	* @stable ICU 2.0
318	*/
319	inline UBool operator== (const UnicodeString& text) const;
320
321	/**
322	* Inequality operator. Performs only bitwise comparison.
323	* @param text The UnicodeString to compare to this one.
324	* @return FALSE if <TT>text</TT> contains the same characters as this one,
325	* TRUE otherwise.
326	* @stable ICU 2.0
327	*/
328	inline UBool operator!= (const UnicodeString& text) const;
329
330	/**
331	* Greater than operator. Performs only bitwise comparison.
332	* @param text The UnicodeString to compare to this one.
333	* @return TRUE if the characters in this are bitwise
334	* greater than the characters in <code>text</code>, FALSE otherwise
335	* @stable ICU 2.0
336	*/
337	inline UBool operator> (const UnicodeString& text) const;
338
339	/**
340	* Less than operator. Performs only bitwise comparison.
341	* @param text The UnicodeString to compare to this one.
342	* @return TRUE if the characters in this are bitwise
343	* less than the characters in <code>text</code>, FALSE otherwise
344	* @stable ICU 2.0
345	*/
346	inline UBool operator< (const UnicodeString& text) const;
347
348	/**
349	* Greater than or equal operator. Performs only bitwise comparison.
350	* @param text The UnicodeString to compare to this one.
351	* @return TRUE if the characters in this are bitwise
352	* greater than or equal to the characters in <code>text</code>, FALSE otherwise
353	* @stable ICU 2.0
354	*/
355	inline UBool operator>= (const UnicodeString& text) const;
356
357	/**
358	* Less than or equal operator. Performs only bitwise comparison.
359	* @param text The UnicodeString to compare to this one.
360	* @return TRUE if the characters in this are bitwise
361	* less than or equal to the characters in <code>text</code>, FALSE otherwise
362	* @stable ICU 2.0
363	*/
364	inline UBool operator<= (const UnicodeString& text) const;
365
366	/**
367	* Compare the characters bitwise in this UnicodeString to
368	* the characters in <code>text</code>.
369	* @param text The UnicodeString to compare to this one.
370	* @return The result of bitwise character comparison: 0 if this
371	* contains the same characters as <code>text</code>, -1 if the characters in
372	* this are bitwise less than the characters in <code>text</code>, +1 if the
373	* characters in this are bitwise greater than the characters
374	* in <code>text</code>.
375	* @stable ICU 2.0
376	*/
377	inline int8_t compare(const UnicodeString& text) const;
378
379	/**
380	* Compare the characters bitwise in the range
381	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
382	* in the <b>entire string</b> <TT>text</TT>.
383	* (The parameters "start" and "length" are not applied to the other text "text".)
384	* @param start the offset at which the compare operation begins
385	* @param length the number of characters of text to compare.
386	* @param text the other text to be compared against this string.
387	* @return The result of bitwise character comparison: 0 if this
388	* contains the same characters as <code>text</code>, -1 if the characters in
389	* this are bitwise less than the characters in <code>text</code>, +1 if the
390	* characters in this are bitwise greater than the characters
391	* in <code>text</code>.
392	* @stable ICU 2.0
393	*/
394	inline int8_t compare(int32_t start,
395	int32_t length,
396	const UnicodeString& text) const;
397
398	/**
399	* Compare the characters bitwise in the range
400	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
401	* in <TT>srcText</TT> in the range
402	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
403	* @param start the offset at which the compare operation begins
404	* @param length the number of characters in this to compare.
405	* @param srcText the text to be compared
406	* @param srcStart the offset into <TT>srcText</TT> to start comparison
407	* @param srcLength the number of characters in <TT>src</TT> to compare
408	* @return The result of bitwise character comparison: 0 if this
409	* contains the same characters as <code>srcText</code>, -1 if the characters in
410	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
411	* characters in this are bitwise greater than the characters
412	* in <code>srcText</code>.
413	* @stable ICU 2.0
414	*/
415	inline int8_t compare(int32_t start,
416	int32_t length,
417	const UnicodeString& srcText,
418	int32_t srcStart,
419	int32_t srcLength) const;
420
421	/**
422	* Compare the characters bitwise in this UnicodeString with the first
423	* <TT>srcLength</TT> characters in <TT>srcChars</TT>.
424	* @param srcChars The characters to compare to this UnicodeString.
425	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
426	* @return The result of bitwise character comparison: 0 if this
427	* contains the same characters as <code>srcChars</code>, -1 if the characters in
428	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
429	* characters in this are bitwise greater than the characters
430	* in <code>srcChars</code>.
431	* @stable ICU 2.0
432	*/
433	inline int8_t compare(ConstChar16Ptr srcChars,
434	int32_t srcLength) const;
435
436	/**
437	* Compare the characters bitwise in the range
438	* [<TT>start</TT>, <TT>start + length</TT>) with the first
439	* <TT>length</TT> characters in <TT>srcChars</TT>
440	* @param start the offset at which the compare operation begins
441	* @param length the number of characters to compare.
442	* @param srcChars the characters to be compared
443	* @return The result of bitwise character comparison: 0 if this
444	* contains the same characters as <code>srcChars</code>, -1 if the characters in
445	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
446	* characters in this are bitwise greater than the characters
447	* in <code>srcChars</code>.
448	* @stable ICU 2.0
449	*/
450	inline int8_t compare(int32_t start,
451	int32_t length,
452	const char16_t srcChars) const*;
453
454	/**
455	* Compare the characters bitwise in the range
456	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
457	* in <TT>srcChars</TT> in the range
458	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
459	* @param start the offset at which the compare operation begins
460	* @param length the number of characters in this to compare
461	* @param srcChars the characters to be compared
462	* @param srcStart the offset into <TT>srcChars</TT> to start comparison
463	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
464	* @return The result of bitwise character comparison: 0 if this
465	* contains the same characters as <code>srcChars</code>, -1 if the characters in
466	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
467	* characters in this are bitwise greater than the characters
468	* in <code>srcChars</code>.
469	* @stable ICU 2.0
470	*/
471	inline int8_t compare(int32_t start,
472	int32_t length,
473	const char16_t *srcChars,
474	int32_t srcStart,
475	int32_t srcLength) const;
476
477	/**
478	* Compare the characters bitwise in the range
479	* [<TT>start</TT>, <TT>limit</TT>) with the characters
480	* in <TT>srcText</TT> in the range
481	* [<TT>srcStart</TT>, <TT>srcLimit</TT>).
482	* @param start the offset at which the compare operation begins
483	* @param limit the offset immediately following the compare operation
484	* @param srcText the text to be compared
485	* @param srcStart the offset into <TT>srcText</TT> to start comparison
486	* @param srcLimit the offset into <TT>srcText</TT> to limit comparison
487	* @return The result of bitwise character comparison: 0 if this
488	* contains the same characters as <code>srcText</code>, -1 if the characters in
489	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
490	* characters in this are bitwise greater than the characters
491	* in <code>srcText</code>.
492	* @stable ICU 2.0
493	*/
494	inline int8_t compareBetween(int32_t start,
495	int32_t limit,
496	const UnicodeString& srcText,
497	int32_t srcStart,
498	int32_t srcLimit) const;
499
500	/**
501	* Compare two Unicode strings in code point order.
502	* The result may be different from the results of compare(), operator<, etc.
503	* if supplementary characters are present:
504	*
505	* In UTF-16, supplementary characters (with code points U+10000 and above) are
506	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
507	* which means that they compare as less than some other BMP characters like U+feff.
508	* This function compares Unicode strings in code point order.
509	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
510	*
511	* @param text Another string to compare this one to.
512	* @return a negative/zero/positive integer corresponding to whether
513	* this string is less than/equal to/greater than the second one
514	* in code point order
515	* @stable ICU 2.0
516	*/
517	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
518
519	/**
520	* Compare two Unicode strings in code point order.
521	* The result may be different from the results of compare(), operator<, etc.
522	* if supplementary characters are present:
523	*
524	* In UTF-16, supplementary characters (with code points U+10000 and above) are
525	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
526	* which means that they compare as less than some other BMP characters like U+feff.
527	* This function compares Unicode strings in code point order.
528	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
529	*
530	* @param start The start offset in this string at which the compare operation begins.
531	* @param length The number of code units from this string to compare.
532	* @param srcText Another string to compare this one to.
533	* @return a negative/zero/positive integer corresponding to whether
534	* this string is less than/equal to/greater than the second one
535	* in code point order
536	* @stable ICU 2.0
537	*/
538	inline int8_t compareCodePointOrder(int32_t start,
539	int32_t length,
540	const UnicodeString& srcText) const;
541
542	/**
543	* Compare two Unicode strings in code point order.
544	* The result may be different from the results of compare(), operator<, etc.
545	* if supplementary characters are present:
546	*
547	* In UTF-16, supplementary characters (with code points U+10000 and above) are
548	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
549	* which means that they compare as less than some other BMP characters like U+feff.
550	* This function compares Unicode strings in code point order.
551	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
552	*
553	* @param start The start offset in this string at which the compare operation begins.
554	* @param length The number of code units from this string to compare.
555	* @param srcText Another string to compare this one to.
556	* @param srcStart The start offset in that string at which the compare operation begins.
557	* @param srcLength The number of code units from that string to compare.
558	* @return a negative/zero/positive integer corresponding to whether
559	* this string is less than/equal to/greater than the second one
560	* in code point order
561	* @stable ICU 2.0
562	*/
563	inline int8_t compareCodePointOrder(int32_t start,
564	int32_t length,
565	const UnicodeString& srcText,
566	int32_t srcStart,
567	int32_t srcLength) const;
568
569	/**
570	* Compare two Unicode strings in code point order.
571	* The result may be different from the results of compare(), operator<, etc.
572	* if supplementary characters are present:
573	*
574	* In UTF-16, supplementary characters (with code points U+10000 and above) are
575	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
576	* which means that they compare as less than some other BMP characters like U+feff.
577	* This function compares Unicode strings in code point order.
578	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
579	*
580	* @param srcChars A pointer to another string to compare this one to.
581	* @param srcLength The number of code units from that string to compare.
582	* @return a negative/zero/positive integer corresponding to whether
583	* this string is less than/equal to/greater than the second one
584	* in code point order
585	* @stable ICU 2.0
586	*/
587	inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
588	int32_t srcLength) const;
589
590	/**
591	* Compare two Unicode strings in code point order.
592	* The result may be different from the results of compare(), operator<, etc.
593	* if supplementary characters are present:
594	*
595	* In UTF-16, supplementary characters (with code points U+10000 and above) are
596	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
597	* which means that they compare as less than some other BMP characters like U+feff.
598	* This function compares Unicode strings in code point order.
599	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
600	*
601	* @param start The start offset in this string at which the compare operation begins.
602	* @param length The number of code units from this string to compare.
603	* @param srcChars A pointer to another string to compare this one to.
604	* @return a negative/zero/positive integer corresponding to whether
605	* this string is less than/equal to/greater than the second one
606	* in code point order
607	* @stable ICU 2.0
608	*/
609	inline int8_t compareCodePointOrder(int32_t start,
610	int32_t length,
611	const char16_t srcChars) const*;
612
613	/**
614	* Compare two Unicode strings in code point order.
615	* The result may be different from the results of compare(), operator<, etc.
616	* if supplementary characters are present:
617	*
618	* In UTF-16, supplementary characters (with code points U+10000 and above) are
619	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
620	* which means that they compare as less than some other BMP characters like U+feff.
621	* This function compares Unicode strings in code point order.
622	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
623	*
624	* @param start The start offset in this string at which the compare operation begins.
625	* @param length The number of code units from this string to compare.
626	* @param srcChars A pointer to another string to compare this one to.
627	* @param srcStart The start offset in that string at which the compare operation begins.
628	* @param srcLength The number of code units from that string to compare.
629	* @return a negative/zero/positive integer corresponding to whether
630	* this string is less than/equal to/greater than the second one
631	* in code point order
632	* @stable ICU 2.0
633	*/
634	inline int8_t compareCodePointOrder(int32_t start,
635	int32_t length,
636	const char16_t *srcChars,
637	int32_t srcStart,
638	int32_t srcLength) const;
639
640	/**
641	* Compare two Unicode strings in code point order.
642	* The result may be different from the results of compare(), operator<, etc.
643	* if supplementary characters are present:
644	*
645	* In UTF-16, supplementary characters (with code points U+10000 and above) are
646	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
647	* which means that they compare as less than some other BMP characters like U+feff.
648	* This function compares Unicode strings in code point order.
649	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
650	*
651	* @param start The start offset in this string at which the compare operation begins.
652	* @param limit The offset after the last code unit from this string to compare.
653	* @param srcText Another string to compare this one to.
654	* @param srcStart The start offset in that string at which the compare operation begins.
655	* @param srcLimit The offset after the last code unit from that string to compare.
656	* @return a negative/zero/positive integer corresponding to whether
657	* this string is less than/equal to/greater than the second one
658	* in code point order
659	* @stable ICU 2.0
660	*/
661	inline int8_t compareCodePointOrderBetween(int32_t start,
662	int32_t limit,
663	const UnicodeString& srcText,
664	int32_t srcStart,
665	int32_t srcLimit) const;
666
667	/**
668	* Compare two strings case-insensitively using full case folding.
669	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
670	*
671	* @param text Another string to compare this one to.
672	* @param options A bit set of options:
673	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
674	* Comparison in code unit order with default case folding.
675	*
676	* - U_COMPARE_CODE_POINT_ORDER
677	* Set to choose code point order instead of code unit order
678	* (see u_strCompare for details).
679	*
680	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
681	*
682	* @return A negative, zero, or positive integer indicating the comparison result.
683	* @stable ICU 2.0
684	*/
685	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
686
687	/**
688	* Compare two strings case-insensitively using full case folding.
689	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
690	*
691	* @param start The start offset in this string at which the compare operation begins.
692	* @param length The number of code units from this string to compare.
693	* @param srcText Another string to compare this one to.
694	* @param options A bit set of options:
695	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
696	* Comparison in code unit order with default case folding.
697	*
698	* - U_COMPARE_CODE_POINT_ORDER
699	* Set to choose code point order instead of code unit order
700	* (see u_strCompare for details).
701	*
702	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
703	*
704	* @return A negative, zero, or positive integer indicating the comparison result.
705	* @stable ICU 2.0
706	*/
707	inline int8_t caseCompare(int32_t start,
708	int32_t length,
709	const UnicodeString& srcText,
710	uint32_t options) const;
711
712	/**
713	* Compare two strings case-insensitively using full case folding.
714	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
715	*
716	* @param start The start offset in this string at which the compare operation begins.
717	* @param length The number of code units from this string to compare.
718	* @param srcText Another string to compare this one to.
719	* @param srcStart The start offset in that string at which the compare operation begins.
720	* @param srcLength The number of code units from that string to compare.
721	* @param options A bit set of options:
722	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
723	* Comparison in code unit order with default case folding.
724	*
725	* - U_COMPARE_CODE_POINT_ORDER
726	* Set to choose code point order instead of code unit order
727	* (see u_strCompare for details).
728	*
729	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
730	*
731	* @return A negative, zero, or positive integer indicating the comparison result.
732	* @stable ICU 2.0
733	*/
734	inline int8_t caseCompare(int32_t start,
735	int32_t length,
736	const UnicodeString& srcText,
737	int32_t srcStart,
738	int32_t srcLength,
739	uint32_t options) const;
740
741	/**
742	* Compare two strings case-insensitively using full case folding.
743	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
744	*
745	* @param srcChars A pointer to another string to compare this one to.
746	* @param srcLength The number of code units from that string to compare.
747	* @param options A bit set of options:
748	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
749	* Comparison in code unit order with default case folding.
750	*
751	* - U_COMPARE_CODE_POINT_ORDER
752	* Set to choose code point order instead of code unit order
753	* (see u_strCompare for details).
754	*
755	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
756	*
757	* @return A negative, zero, or positive integer indicating the comparison result.
758	* @stable ICU 2.0
759	*/
760	inline int8_t caseCompare(ConstChar16Ptr srcChars,
761	int32_t srcLength,
762	uint32_t options) const;
763
764	/**
765	* Compare two strings case-insensitively using full case folding.
766	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
767	*
768	* @param start The start offset in this string at which the compare operation begins.
769	* @param length The number of code units from this string to compare.
770	* @param srcChars A pointer to another string to compare this one to.
771	* @param options A bit set of options:
772	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
773	* Comparison in code unit order with default case folding.
774	*
775	* - U_COMPARE_CODE_POINT_ORDER
776	* Set to choose code point order instead of code unit order
777	* (see u_strCompare for details).
778	*
779	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
780	*
781	* @return A negative, zero, or positive integer indicating the comparison result.
782	* @stable ICU 2.0
783	*/
784	inline int8_t caseCompare(int32_t start,
785	int32_t length,
786	const char16_t *srcChars,
787	uint32_t options) const;
788
789	/**
790	* Compare two strings case-insensitively using full case folding.
791	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
792	*
793	* @param start The start offset in this string at which the compare operation begins.
794	* @param length The number of code units from this string to compare.
795	* @param srcChars A pointer to another string to compare this one to.
796	* @param srcStart The start offset in that string at which the compare operation begins.
797	* @param srcLength The number of code units from that string to compare.
798	* @param options A bit set of options:
799	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
800	* Comparison in code unit order with default case folding.
801	*
802	* - U_COMPARE_CODE_POINT_ORDER
803	* Set to choose code point order instead of code unit order
804	* (see u_strCompare for details).
805	*
806	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
807	*
808	* @return A negative, zero, or positive integer indicating the comparison result.
809	* @stable ICU 2.0
810	*/
811	inline int8_t caseCompare(int32_t start,
812	int32_t length,
813	const char16_t *srcChars,
814	int32_t srcStart,
815	int32_t srcLength,
816	uint32_t options) const;
817
818	/**
819	* Compare two strings case-insensitively using full case folding.
820	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
821	*
822	* @param start The start offset in this string at which the compare operation begins.
823	* @param limit The offset after the last code unit from this string to compare.
824	* @param srcText Another string to compare this one to.
825	* @param srcStart The start offset in that string at which the compare operation begins.
826	* @param srcLimit The offset after the last code unit from that string to compare.
827	* @param options A bit set of options:
828	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
829	* Comparison in code unit order with default case folding.
830	*
831	* - U_COMPARE_CODE_POINT_ORDER
832	* Set to choose code point order instead of code unit order
833	* (see u_strCompare for details).
834	*
835	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
836	*
837	* @return A negative, zero, or positive integer indicating the comparison result.
838	* @stable ICU 2.0
839	*/
840	inline int8_t caseCompareBetween(int32_t start,
841	int32_t limit,
842	const UnicodeString& srcText,
843	int32_t srcStart,
844	int32_t srcLimit,
845	uint32_t options) const;
846
847	/**
848	* Determine if this starts with the characters in <TT>text</TT>
849	* @param text The text to match.
850	* @return TRUE if this starts with the characters in <TT>text</TT>,
851	* FALSE otherwise
852	* @stable ICU 2.0
853	*/
854	inline UBool startsWith(const UnicodeString& text) const;
855
856	/**
857	* Determine if this starts with the characters in <TT>srcText</TT>
858	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
859	* @param srcText The text to match.
860	* @param srcStart the offset into <TT>srcText</TT> to start matching
861	* @param srcLength the number of characters in <TT>srcText</TT> to match
862	* @return TRUE if this starts with the characters in <TT>text</TT>,
863	* FALSE otherwise
864	* @stable ICU 2.0
865	*/
866	inline UBool startsWith(const UnicodeString& srcText,
867	int32_t srcStart,
868	int32_t srcLength) const;
869
870	/**
871	* Determine if this starts with the characters in <TT>srcChars</TT>
872	* @param srcChars The characters to match.
873	* @param srcLength the number of characters in <TT>srcChars</TT>
874	* @return TRUE if this starts with the characters in <TT>srcChars</TT>,
875	* FALSE otherwise
876	* @stable ICU 2.0
877	*/
878	inline UBool startsWith(ConstChar16Ptr srcChars,
879	int32_t srcLength) const;
880
881	/**
882	* Determine if this ends with the characters in <TT>srcChars</TT>
883	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
884	* @param srcChars The characters to match.
885	* @param srcStart the offset into <TT>srcText</TT> to start matching
886	* @param srcLength the number of characters in <TT>srcChars</TT> to match
887	* @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
888	* @stable ICU 2.0
889	*/
890	inline UBool startsWith(const char16_t *srcChars,
891	int32_t srcStart,
892	int32_t srcLength) const;
893
894	/**
895	* Determine if this ends with the characters in <TT>text</TT>
896	* @param text The text to match.
897	* @return TRUE if this ends with the characters in <TT>text</TT>,
898	* FALSE otherwise
899	* @stable ICU 2.0
900	*/
901	inline UBool endsWith(const UnicodeString& text) const;
902
903	/**
904	* Determine if this ends with the characters in <TT>srcText</TT>
905	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
906	* @param srcText The text to match.
907	* @param srcStart the offset into <TT>srcText</TT> to start matching
908	* @param srcLength the number of characters in <TT>srcText</TT> to match
909	* @return TRUE if this ends with the characters in <TT>text</TT>,
910	* FALSE otherwise
911	* @stable ICU 2.0
912	*/
913	inline UBool endsWith(const UnicodeString& srcText,
914	int32_t srcStart,
915	int32_t srcLength) const;
916
917	/**
918	* Determine if this ends with the characters in <TT>srcChars</TT>
919	* @param srcChars The characters to match.
920	* @param srcLength the number of characters in <TT>srcChars</TT>
921	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
922	* FALSE otherwise
923	* @stable ICU 2.0
924	*/
925	inline UBool endsWith(ConstChar16Ptr srcChars,
926	int32_t srcLength) const;
927
928	/**
929	* Determine if this ends with the characters in <TT>srcChars</TT>
930	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
931	* @param srcChars The characters to match.
932	* @param srcStart the offset into <TT>srcText</TT> to start matching
933	* @param srcLength the number of characters in <TT>srcChars</TT> to match
934	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
935	* FALSE otherwise
936	* @stable ICU 2.0
937	*/
938	inline UBool endsWith(const char16_t *srcChars,
939	int32_t srcStart,
940	int32_t srcLength) const;
941
942
943	/ Searching - bitwise only /
944
945	/**
946	* Locate in this the first occurrence of the characters in <TT>text</TT>,
947	* using bitwise comparison.
948	* @param text The text to search for.
949	* @return The offset into this of the start of <TT>text</TT>,
950	* or -1 if not found.
951	* @stable ICU 2.0
952	*/
953	inline int32_t indexOf(const UnicodeString& text) const;
954
955	/**
956	* Locate in this the first occurrence of the characters in <TT>text</TT>
957	* starting at offset <TT>start</TT>, using bitwise comparison.
958	* @param text The text to search for.
959	* @param start The offset at which searching will start.
960	* @return The offset into this of the start of <TT>text</TT>,
961	* or -1 if not found.
962	* @stable ICU 2.0
963	*/
964	inline int32_t indexOf(const UnicodeString& text,
965	int32_t start) const;
966
967	/**
968	* Locate in this the first occurrence in the range
969	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
970	* in <TT>text</TT>, using bitwise comparison.
971	* @param text The text to search for.
972	* @param start The offset at which searching will start.
973	* @param length The number of characters to search
974	* @return The offset into this of the start of <TT>text</TT>,
975	* or -1 if not found.
976	* @stable ICU 2.0
977	*/
978	inline int32_t indexOf(const UnicodeString& text,
979	int32_t start,
980	int32_t length) const;
981
982	/**
983	* Locate in this the first occurrence in the range
984	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
985	* in <TT>srcText</TT> in the range
986	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
987	* using bitwise comparison.
988	* @param srcText The text to search for.
989	* @param srcStart the offset into <TT>srcText</TT> at which
990	* to start matching
991	* @param srcLength the number of characters in <TT>srcText</TT> to match
992	* @param start the offset into this at which to start matching
993	* @param length the number of characters in this to search
994	* @return The offset into this of the start of <TT>text</TT>,
995	* or -1 if not found.
996	* @stable ICU 2.0
997	*/
998	inline int32_t indexOf(const UnicodeString& srcText,
999	int32_t srcStart,
1000	int32_t srcLength,
1001	int32_t start,
1002	int32_t length) const;
1003
1004	/**
1005	* Locate in this the first occurrence of the characters in
1006	* <TT>srcChars</TT>
1007	* starting at offset <TT>start</TT>, using bitwise comparison.
1008	* @param srcChars The text to search for.
1009	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1010	* @param start the offset into this at which to start matching
1011	* @return The offset into this of the start of <TT>text</TT>,
1012	* or -1 if not found.
1013	* @stable ICU 2.0
1014	*/
1015	inline int32_t indexOf(const char16_t *srcChars,
1016	int32_t srcLength,
1017	int32_t start) const;
1018
1019	/**
1020	* Locate in this the first occurrence in the range
1021	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1022	* in <TT>srcChars</TT>, using bitwise comparison.
1023	* @param srcChars The text to search for.
1024	* @param srcLength the number of characters in <TT>srcChars</TT>
1025	* @param start The offset at which searching will start.
1026	* @param length The number of characters to search
1027	* @return The offset into this of the start of <TT>srcChars</TT>,
1028	* or -1 if not found.
1029	* @stable ICU 2.0
1030	*/
1031	inline int32_t indexOf(ConstChar16Ptr srcChars,
1032	int32_t srcLength,
1033	int32_t start,
1034	int32_t length) const;
1035
1036	/**
1037	* Locate in this the first occurrence in the range
1038	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1039	* in <TT>srcChars</TT> in the range
1040	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1041	* using bitwise comparison.
1042	* @param srcChars The text to search for.
1043	* @param srcStart the offset into <TT>srcChars</TT> at which
1044	* to start matching
1045	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1046	* @param start the offset into this at which to start matching
1047	* @param length the number of characters in this to search
1048	* @return The offset into this of the start of <TT>text</TT>,
1049	* or -1 if not found.
1050	* @stable ICU 2.0
1051	*/
1052	int32_t indexOf(const char16_t *srcChars,
1053	int32_t srcStart,
1054	int32_t srcLength,
1055	int32_t start,
1056	int32_t length) const;
1057
1058	/**
1059	* Locate in this the first occurrence of the BMP code point <code>c</code>,
1060	* using bitwise comparison.
1061	* @param c The code unit to search for.
1062	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1063	* @stable ICU 2.0
1064	*/
1065	inline int32_t indexOf(char16_t c) const;
1066
1067	/**
1068	* Locate in this the first occurrence of the code point <TT>c</TT>,
1069	* using bitwise comparison.
1070	*
1071	* @param c The code point to search for.
1072	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1073	* @stable ICU 2.0
1074	*/
1075	inline int32_t indexOf(UChar32 c) const;
1076
1077	/**
1078	* Locate in this the first occurrence of the BMP code point <code>c</code>,
1079	* starting at offset <TT>start</TT>, using bitwise comparison.
1080	* @param c The code unit to search for.
1081	* @param start The offset at which searching will start.
1082	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1083	* @stable ICU 2.0
1084	*/
1085	inline int32_t indexOf(char16_t c,
1086	int32_t start) const;
1087
1088	/**
1089	* Locate in this the first occurrence of the code point <TT>c</TT>
1090	* starting at offset <TT>start</TT>, using bitwise comparison.
1091	*
1092	* @param c The code point to search for.
1093	* @param start The offset at which searching will start.
1094	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1095	* @stable ICU 2.0
1096	*/
1097	inline int32_t indexOf(UChar32 c,
1098	int32_t start) const;
1099
1100	/**
1101	* Locate in this the first occurrence of the BMP code point <code>c</code>
1102	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1103	* using bitwise comparison.
1104	* @param c The code unit to search for.
1105	* @param start the offset into this at which to start matching
1106	* @param length the number of characters in this to search
1107	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1108	* @stable ICU 2.0
1109	*/
1110	inline int32_t indexOf(char16_t c,
1111	int32_t start,
1112	int32_t length) const;
1113
1114	/**
1115	* Locate in this the first occurrence of the code point <TT>c</TT>
1116	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1117	* using bitwise comparison.
1118	*
1119	* @param c The code point to search for.
1120	* @param start the offset into this at which to start matching
1121	* @param length the number of characters in this to search
1122	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1123	* @stable ICU 2.0
1124	*/
1125	inline int32_t indexOf(UChar32 c,
1126	int32_t start,
1127	int32_t length) const;
1128
1129	/**
1130	* Locate in this the last occurrence of the characters in <TT>text</TT>,
1131	* using bitwise comparison.
1132	* @param text The text to search for.
1133	* @return The offset into this of the start of <TT>text</TT>,
1134	* or -1 if not found.
1135	* @stable ICU 2.0
1136	*/
1137	inline int32_t lastIndexOf(const UnicodeString& text) const;
1138
1139	/**
1140	* Locate in this the last occurrence of the characters in <TT>text</TT>
1141	* starting at offset <TT>start</TT>, using bitwise comparison.
1142	* @param text The text to search for.
1143	* @param start The offset at which searching will start.
1144	* @return The offset into this of the start of <TT>text</TT>,
1145	* or -1 if not found.
1146	* @stable ICU 2.0
1147	*/
1148	inline int32_t lastIndexOf(const UnicodeString& text,
1149	int32_t start) const;
1150
1151	/**
1152	* Locate in this the last occurrence in the range
1153	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1154	* in <TT>text</TT>, using bitwise comparison.
1155	* @param text The text to search for.
1156	* @param start The offset at which searching will start.
1157	* @param length The number of characters to search
1158	* @return The offset into this of the start of <TT>text</TT>,
1159	* or -1 if not found.
1160	* @stable ICU 2.0
1161	*/
1162	inline int32_t lastIndexOf(const UnicodeString& text,
1163	int32_t start,
1164	int32_t length) const;
1165
1166	/**
1167	* Locate in this the last occurrence in the range
1168	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1169	* in <TT>srcText</TT> in the range
1170	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1171	* using bitwise comparison.
1172	* @param srcText The text to search for.
1173	* @param srcStart the offset into <TT>srcText</TT> at which
1174	* to start matching
1175	* @param srcLength the number of characters in <TT>srcText</TT> to match
1176	* @param start the offset into this at which to start matching
1177	* @param length the number of characters in this to search
1178	* @return The offset into this of the start of <TT>text</TT>,
1179	* or -1 if not found.
1180	* @stable ICU 2.0
1181	*/
1182	inline int32_t lastIndexOf(const UnicodeString& srcText,
1183	int32_t srcStart,
1184	int32_t srcLength,
1185	int32_t start,
1186	int32_t length) const;
1187
1188	/**
1189	* Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1190	* starting at offset <TT>start</TT>, using bitwise comparison.
1191	* @param srcChars The text to search for.
1192	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1193	* @param start the offset into this at which to start matching
1194	* @return The offset into this of the start of <TT>text</TT>,
1195	* or -1 if not found.
1196	* @stable ICU 2.0
1197	*/
1198	inline int32_t lastIndexOf(const char16_t *srcChars,
1199	int32_t srcLength,
1200	int32_t start) const;
1201
1202	/**
1203	* Locate in this the last occurrence in the range
1204	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1205	* in <TT>srcChars</TT>, using bitwise comparison.
1206	* @param srcChars The text to search for.
1207	* @param srcLength the number of characters in <TT>srcChars</TT>
1208	* @param start The offset at which searching will start.
1209	* @param length The number of characters to search
1210	* @return The offset into this of the start of <TT>srcChars</TT>,
1211	* or -1 if not found.
1212	* @stable ICU 2.0
1213	*/
1214	inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1215	int32_t srcLength,
1216	int32_t start,
1217	int32_t length) const;
1218
1219	/**
1220	* Locate in this the last occurrence in the range
1221	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
1222	* in <TT>srcChars</TT> in the range
1223	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1224	* using bitwise comparison.
1225	* @param srcChars The text to search for.
1226	* @param srcStart the offset into <TT>srcChars</TT> at which
1227	* to start matching
1228	* @param srcLength the number of characters in <TT>srcChars</TT> to match
1229	* @param start the offset into this at which to start matching
1230	* @param length the number of characters in this to search
1231	* @return The offset into this of the start of <TT>text</TT>,
1232	* or -1 if not found.
1233	* @stable ICU 2.0
1234	*/
1235	int32_t lastIndexOf(const char16_t *srcChars,
1236	int32_t srcStart,
1237	int32_t srcLength,
1238	int32_t start,
1239	int32_t length) const;
1240
1241	/**
1242	* Locate in this the last occurrence of the BMP code point <code>c</code>,
1243	* using bitwise comparison.
1244	* @param c The code unit to search for.
1245	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1246	* @stable ICU 2.0
1247	*/
1248	inline int32_t lastIndexOf(char16_t c) const;
1249
1250	/**
1251	* Locate in this the last occurrence of the code point <TT>c</TT>,
1252	* using bitwise comparison.
1253	*
1254	* @param c The code point to search for.
1255	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1256	* @stable ICU 2.0
1257	*/
1258	inline int32_t lastIndexOf(UChar32 c) const;
1259
1260	/**
1261	* Locate in this the last occurrence of the BMP code point <code>c</code>
1262	* starting at offset <TT>start</TT>, using bitwise comparison.
1263	* @param c The code unit to search for.
1264	* @param start The offset at which searching will start.
1265	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1266	* @stable ICU 2.0
1267	*/
1268	inline int32_t lastIndexOf(char16_t c,
1269	int32_t start) const;
1270
1271	/**
1272	* Locate in this the last occurrence of the code point <TT>c</TT>
1273	* starting at offset <TT>start</TT>, using bitwise comparison.
1274	*
1275	* @param c The code point to search for.
1276	* @param start The offset at which searching will start.
1277	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1278	* @stable ICU 2.0
1279	*/
1280	inline int32_t lastIndexOf(UChar32 c,
1281	int32_t start) const;
1282
1283	/**
1284	* Locate in this the last occurrence of the BMP code point <code>c</code>
1285	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1286	* using bitwise comparison.
1287	* @param c The code unit to search for.
1288	* @param start the offset into this at which to start matching
1289	* @param length the number of characters in this to search
1290	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1291	* @stable ICU 2.0
1292	*/
1293	inline int32_t lastIndexOf(char16_t c,
1294	int32_t start,
1295	int32_t length) const;
1296
1297	/**
1298	* Locate in this the last occurrence of the code point <TT>c</TT>
1299	* in the range [<TT>start</TT>, <TT>start + length</TT>),
1300	* using bitwise comparison.
1301	*
1302	* @param c The code point to search for.
1303	* @param start the offset into this at which to start matching
1304	* @param length the number of characters in this to search
1305	* @return The offset into this of <TT>c</TT>, or -1 if not found.
1306	* @stable ICU 2.0
1307	*/
1308	inline int32_t lastIndexOf(UChar32 c,
1309	int32_t start,
1310	int32_t length) const;
1311
1312
1313	/ Character access /
1314
1315	/**
1316	* Return the code unit at offset <tt>offset</tt>.
1317	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1318	* @param offset a valid offset into the text
1319	* @return the code unit at offset <tt>offset</tt>
1320	* or 0xffff if the offset is not valid for this string
1321	* @stable ICU 2.0
1322	*/
1323	inline char16_t charAt(int32_t offset) const;
1324
1325	/**
1326	* Return the code unit at offset <tt>offset</tt>.
1327	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1328	* @param offset a valid offset into the text
1329	* @return the code unit at offset <tt>offset</tt>
1330	* @stable ICU 2.0
1331	*/
1332	inline char16_t operator[] (int32_t offset) const;
1333
1334	/**
1335	* Return the code point that contains the code unit
1336	* at offset <tt>offset</tt>.
1337	* If the offset is not valid (0..length()-1) then U+ffff is returned.
1338	* @param offset a valid offset into the text
1339	* that indicates the text offset of any of the code units
1340	* that will be assembled into a code point (21-bit value) and returned
1341	* @return the code point of text at <tt>offset</tt>
1342	* or 0xffff if the offset is not valid for this string
1343	* @stable ICU 2.0
1344	*/
1345	UChar32 char32At(int32_t offset) const;
1346
1347	/**
1348	* Adjust a random-access offset so that
1349	* it points to the beginning of a Unicode character.
1350	* The offset that is passed in points to
1351	* any code unit of a code point,
1352	* while the returned offset will point to the first code unit
1353	* of the same code point.
1354	* In UTF-16, if the input offset points to a second surrogate
1355	* of a surrogate pair, then the returned offset will point
1356	* to the first surrogate.
1357	* @param offset a valid offset into one code point of the text
1358	* @return offset of the first code unit of the same code point
1359	* @see U16_SET_CP_START
1360	* @stable ICU 2.0
1361	*/
1362	int32_t getChar32Start(int32_t offset) const;
1363
1364	/**
1365	* Adjust a random-access offset so that
1366	* it points behind a Unicode character.
1367	* The offset that is passed in points behind
1368	* any code unit of a code point,
1369	* while the returned offset will point behind the last code unit
1370	* of the same code point.
1371	* In UTF-16, if the input offset points behind the first surrogate
1372	* (i.e., to the second surrogate)
1373	* of a surrogate pair, then the returned offset will point
1374	* behind the second surrogate (i.e., to the first surrogate).
1375	* @param offset a valid offset after any code unit of a code point of the text
1376	* @return offset of the first code unit after the same code point
1377	* @see U16_SET_CP_LIMIT
1378	* @stable ICU 2.0
1379	*/
1380	int32_t getChar32Limit(int32_t offset) const;
1381
1382	/**
1383	* Move the code unit index along the string by delta code points.
1384	* Interpret the input index as a code unit-based offset into the string,
1385	* move the index forward or backward by delta code points, and
1386	* return the resulting index.
1387	* The input index should point to the first code unit of a code point,
1388	* if there is more than one.
1389	*
1390	* Both input and output indexes are code unit-based as for all
1391	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
1392	* If delta<0 then the index is moved backward (toward the start of the string).
1393	* If delta>0 then the index is moved forward (toward the end of the string).
1394	*
1395	* This behaves like CharacterIterator::move32(delta, kCurrent).
1396	*
1397	* Behavior for out-of-bounds indexes:
1398	* <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1399	* if the input index<0 then it is pinned to 0;
1400	* if it is index>length() then it is pinned to length().
1401	* Afterwards, the index is moved by <code>delta</code> code points
1402	* forward or backward,
1403	* but no further backward than to 0 and no further forward than to length().
1404	* The resulting index return value will be in between 0 and length(), inclusively.
1405	*
1406	* Examples:
1407	* <pre>
1408	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
1409	* UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1410	*
1411	* // initial index: position of U+10000
1412	* int32_t index=1;
1413	*
1414	* // the following examples will all result in index==4, position of U+10ffff
1415	*
1416	* // skip 2 code points from some position in the string
1417	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1418	*
1419	* // go to the 3rd code point from the start of s (0-based)
1420	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1421	*
1422	* // go to the next-to-last code point of s
1423	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1424	* </pre>
1425	*
1426	* @param index input code unit index
1427	* @param delta (signed) code point count to move the index forward or backward
1428	* in the string
1429	* @return the resulting code unit index
1430	* @stable ICU 2.0
1431	*/
1432	int32_t moveIndex32(int32_t index, int32_t delta) const;
1433
1434	/ Substring extraction /
1435
1436	/**
1437	* Copy the characters in the range
1438	* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1439	* beginning at <tt>dstStart</tt>.
1440	* If the string aliases to <code>dst</code> itself as an external buffer,
1441	* then extract() will not copy the contents.
1442	*
1443	* @param start offset of first character which will be copied into the array
1444	* @param length the number of characters to extract
1445	* @param dst array in which to copy characters. The length of <tt>dst</tt>
1446	* must be at least (<tt>dstStart + length</tt>).
1447	* @param dstStart the offset in <TT>dst</TT> where the first character
1448	* will be extracted
1449	* @stable ICU 2.0
1450	*/
1451	inline void extract(int32_t start,
1452	int32_t length,
1453	Char16Ptr dst,
1454	int32_t dstStart = `0`) const;
1455
1456	/**
1457	* Copy the contents of the string into dest.
1458	* This is a convenience function that
1459	* checks if there is enough space in dest,
1460	* extracts the entire string if possible,
1461	* and NUL-terminates dest if possible.
1462	*
1463	* If the string fits into dest but cannot be NUL-terminated
1464	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1465	* If the string itself does not fit into dest
1466	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1467	*
1468	* If the string aliases to <code>dest</code> itself as an external buffer,
1469	* then extract() will not copy the contents.
1470	*
1471	* @param dest Destination string buffer.
1472	* @param destCapacity Number of char16_ts available at dest.
1473	* @param errorCode ICU error code.
1474	* @return length()
1475	* @stable ICU 2.0
1476	*/
1477	int32_t
1478	extract(Char16Ptr dest, int32_t destCapacity,
1479	UErrorCode &errorCode) const;
1480
1481	/**
1482	* Copy the characters in the range
1483	* [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1484	* <tt>target</tt>.
1485	* @param start offset of first character which will be copied
1486	* @param length the number of characters to extract
1487	* @param target UnicodeString into which to copy characters.
1488	* @return A reference to <TT>target</TT>
1489	* @stable ICU 2.0
1490	*/
1491	inline void extract(int32_t start,
1492	int32_t length,
1493	UnicodeString& target) const;
1494
1495	/**
1496	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1497	* into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1498	* @param start offset of first character which will be copied into the array
1499	* @param limit offset immediately following the last character to be copied
1500	* @param dst array in which to copy characters. The length of <tt>dst</tt>
1501	* must be at least (<tt>dstStart + (limit - start)</tt>).
1502	* @param dstStart the offset in <TT>dst</TT> where the first character
1503	* will be extracted
1504	* @stable ICU 2.0
1505	*/
1506	inline void extractBetween(int32_t start,
1507	int32_t limit,
1508	char16_t *dst,
1509	int32_t dstStart = `0`) const;
1510
1511	/**
1512	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1513	* into the UnicodeString <tt>target</tt>. Replaceable API.
1514	* @param start offset of first character which will be copied
1515	* @param limit offset immediately following the last character to be copied
1516	* @param target UnicodeString into which to copy characters.
1517	* @return A reference to <TT>target</TT>
1518	* @stable ICU 2.0
1519	*/
1520	virtual void extractBetween(int32_t start,
1521	int32_t limit,
1522	UnicodeString& target) const;
1523
1524	/**
1525	* Copy the characters in the range
1526	* [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1527	* All characters must be invariant (see utypes.h).
1528	* Use US_INV as the last, signature-distinguishing parameter.
1529	*
1530	* This function does not write any more than <code>targetCapacity</code>
1531	* characters but returns the length of the entire output string
1532	* so that one can allocate a larger buffer and call the function again
1533	* if necessary.
1534	* The output string is NUL-terminated if possible.
1535	*
1536	* @param start offset of first character which will be copied
1537	* @param startLength the number of characters to extract
1538	* @param target the target buffer for extraction, can be NULL
1539	* if targetLength is 0
1540	* @param targetCapacity the length of the target buffer
1541	* @param inv Signature-distinguishing paramater, use US_INV.
1542	* @return the output string length, not including the terminating NUL
1543	* @stable ICU 3.2
1544	*/
1545	int32_t extract(int32_t start,
1546	int32_t startLength,
1547	char *target,
1548	int32_t targetCapacity,
1549	enum EInvariant inv) const;
1550
1551	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
1552
1553	/**
1554	* Copy the characters in the range
1555	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1556	* in the platform's default codepage.
1557	* This function does not write any more than <code>targetLength</code>
1558	* characters but returns the length of the entire output string
1559	* so that one can allocate a larger buffer and call the function again
1560	* if necessary.
1561	* The output string is NUL-terminated if possible.
1562	*
1563	* @param start offset of first character which will be copied
1564	* @param startLength the number of characters to extract
1565	* @param target the target buffer for extraction
1566	* @param targetLength the length of the target buffer
1567	* If <TT>target</TT> is NULL, then the number of bytes required for
1568	* <TT>target</TT> is returned.
1569	* @return the output string length, not including the terminating NUL
1570	* @stable ICU 2.0
1571	*/
1572	int32_t extract(int32_t start,
1573	int32_t startLength,
1574	char *target,
1575	uint32_t targetLength) const;
1576
1577	#endif
1578
1579	#if !UCONFIG_NO_CONVERSION
1580
1581	/**
1582	* Copy the characters in the range
1583	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1584	* in a specified codepage.
1585	* The output string is NUL-terminated.
1586	*
1587	* Recommendation: For invariant-character strings use
1588	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1589	* because it avoids object code dependencies of UnicodeString on
1590	* the conversion code.
1591	*
1592	* @param start offset of first character which will be copied
1593	* @param startLength the number of characters to extract
1594	* @param target the target buffer for extraction
1595	* @param codepage the desired codepage for the characters. 0 has
1596	* the special meaning of the default codepage
1597	* If <code>codepage</code> is an empty string (<code>""</code>),
1598	* then a simple conversion is performed on the codepage-invariant
1599	* subset ("invariant characters") of the platform encoding. See utypes.h.
1600	* If <TT>target</TT> is NULL, then the number of bytes required for
1601	* <TT>target</TT> is returned. It is assumed that the target is big enough
1602	* to fit all of the characters.
1603	* @return the output string length, not including the terminating NUL
1604	* @stable ICU 2.0
1605	*/
1606	inline int32_t extract(int32_t start,
1607	int32_t startLength,
1608	char *target,
1609	const char codepage = `0`) const*;
1610
1611	/**
1612	* Copy the characters in the range
1613	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1614	* in a specified codepage.
1615	* This function does not write any more than <code>targetLength</code>
1616	* characters but returns the length of the entire output string
1617	* so that one can allocate a larger buffer and call the function again
1618	* if necessary.
1619	* The output string is NUL-terminated if possible.
1620	*
1621	* Recommendation: For invariant-character strings use
1622	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1623	* because it avoids object code dependencies of UnicodeString on
1624	* the conversion code.
1625	*
1626	* @param start offset of first character which will be copied
1627	* @param startLength the number of characters to extract
1628	* @param target the target buffer for extraction
1629	* @param targetLength the length of the target buffer
1630	* @param codepage the desired codepage for the characters. 0 has
1631	* the special meaning of the default codepage
1632	* If <code>codepage</code> is an empty string (<code>""</code>),
1633	* then a simple conversion is performed on the codepage-invariant
1634	* subset ("invariant characters") of the platform encoding. See utypes.h.
1635	* If <TT>target</TT> is NULL, then the number of bytes required for
1636	* <TT>target</TT> is returned.
1637	* @return the output string length, not including the terminating NUL
1638	* @stable ICU 2.0
1639	*/
1640	int32_t extract(int32_t start,
1641	int32_t startLength,
1642	char *target,
1643	uint32_t targetLength,
1644	const char codepage) const*;
1645
1646	/**
1647	* Convert the UnicodeString into a codepage string using an existing UConverter.
1648	* The output string is NUL-terminated if possible.
1649	*
1650	* This function avoids the overhead of opening and closing a converter if
1651	* multiple strings are extracted.
1652	*
1653	* @param dest destination string buffer, can be NULL if destCapacity==0
1654	* @param destCapacity the number of chars available at dest
1655	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1656	* or NULL for the default converter
1657	* @param errorCode normal ICU error code
1658	* @return the length of the output string, not counting the terminating NUL;
1659	* if the length is greater than destCapacity, then the string will not fit
1660	* and a buffer of the indicated length would need to be passed in
1661	* @stable ICU 2.0
1662	*/
1663	int32_t extract(char *dest, int32_t destCapacity,
1664	UConverter *cnv,
1665	UErrorCode &errorCode) const;
1666
1667	#endif
1668
1669	/**
1670	* Create a temporary substring for the specified range.
1671	* Unlike the substring constructor and setTo() functions,
1672	* the object returned here will be a read-only alias (using getBuffer())
1673	* rather than copying the text.
1674	* As a result, this substring operation is much faster but requires
1675	* that the original string not be modified or deleted during the lifetime
1676	* of the returned substring object.
1677	* @param start offset of the first character visible in the substring
1678	* @param length length of the substring
1679	* @return a read-only alias UnicodeString object for the substring
1680	* @stable ICU 4.4
1681	*/
1682	UnicodeString tempSubString(int32_t start=`0`, int32_t length=INT32_MAX) const;
1683
1684	/**
1685	* Create a temporary substring for the specified range.
1686	* Same as tempSubString(start, length) except that the substring range
1687	* is specified as a (start, limit) pair (with an exclusive limit index)
1688	* rather than a (start, length) pair.
1689	* @param start offset of the first character visible in the substring
1690	* @param limit offset immediately following the last character visible in the substring
1691	* @return a read-only alias UnicodeString object for the substring
1692	* @stable ICU 4.4
1693	*/
1694	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1695
1696	/**
1697	* Convert the UnicodeString to UTF-8 and write the result
1698	* to a ByteSink. This is called by toUTF8String().
1699	* Unpaired surrogates are replaced with U+FFFD.
1700	* Calls u_strToUTF8WithSub().
1701	*
1702	* @param sink A ByteSink to which the UTF-8 version of the string is written.
1703	* sink.Flush() is called at the end.
1704	* @stable ICU 4.2
1705	* @see toUTF8String
1706	*/
1707	void toUTF8(ByteSink &sink) const;
1708
1709	/**
1710	* Convert the UnicodeString to UTF-8 and append the result
1711	* to a standard string.
1712	* Unpaired surrogates are replaced with U+FFFD.
1713	* Calls toUTF8().
1714	*
1715	* @param result A standard string (or a compatible object)
1716	* to which the UTF-8 version of the string is appended.
1717	* @return The string object.
1718	* @stable ICU 4.2
1719	* @see toUTF8
1720	*/
1721	template<typename StringClass>
1722	StringClass &toUTF8String(StringClass &result) const {
1723	StringByteSink<StringClass> sbs(&result, length());
1724	toUTF8(sbs);
1725	return result;
1726	}
1727
1728	/**
1729	* Convert the UnicodeString to UTF-32.
1730	* Unpaired surrogates are replaced with U+FFFD.
1731	* Calls u_strToUTF32WithSub().
1732	*
1733	* @param utf32 destination string buffer, can be NULL if capacity==0
1734	* @param capacity the number of UChar32s available at utf32
1735	* @param errorCode Standard ICU error code. Its input value must
1736	* pass the U_SUCCESS() test, or else the function returns
1737	* immediately. Check for U_FAILURE() on output or use with
1738	* function chaining. (See User Guide for details.)
1739	* @return The length of the UTF-32 string.
1740	* @see fromUTF32
1741	* @stable ICU 4.2
1742	*/
1743	int32_t toUTF32(UChar32 utf32, int32_t capacity, UErrorCode &errorCode) const*;
1744
1745	/ Length operations /
1746
1747	/**
1748	* Return the length of the UnicodeString object.
1749	* The length is the number of char16_t code units are in the UnicodeString.
1750	* If you want the number of code points, please use countChar32().
1751	* @return the length of the UnicodeString object
1752	* @see countChar32
1753	* @stable ICU 2.0
1754	*/
1755	inline int32_t length(void) const;
1756
1757	/**
1758	* Count Unicode code points in the length char16_t code units of the string.
1759	* A code point may occupy either one or two char16_t code units.
1760	* Counting code points involves reading all code units.
1761	*
1762	* This functions is basically the inverse of moveIndex32().
1763	*
1764	* @param start the index of the first code unit to check
1765	* @param length the number of char16_t code units to check
1766	* @return the number of code points in the specified code units
1767	* @see length
1768	* @stable ICU 2.0
1769	*/
1770	int32_t
1771	countChar32(int32_t start=`0`, int32_t length=INT32_MAX) const;
1772
1773	/**
1774	* Check if the length char16_t code units of the string
1775	* contain more Unicode code points than a certain number.
1776	* This is more efficient than counting all code points in this part of the string
1777	* and comparing that number with a threshold.
1778	* This function may not need to scan the string at all if the length
1779	* falls within a certain range, and
1780	* never needs to count more than 'number+1' code points.
1781	* Logically equivalent to (countChar32(start, length)>number).
1782	* A Unicode code point may occupy either one or two char16_t code units.
1783	*
1784	* @param start the index of the first code unit to check (0 for the entire string)
1785	* @param length the number of char16_t code units to check
1786	* (use INT32_MAX for the entire string; remember that start/length
1787	* values are pinned)
1788	* @param number The number of code points in the (sub)string is compared against
1789	* the 'number' parameter.
1790	* @return Boolean value for whether the string contains more Unicode code points
1791	* than 'number'. Same as (u_countChar32(s, length)>number).
1792	* @see countChar32
1793	* @see u_strHasMoreChar32Than
1794	* @stable ICU 2.4
1795	*/
1796	UBool
1797	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1798
1799	/**
1800	* Determine if this string is empty.
1801	* @return TRUE if this string contains 0 characters, FALSE otherwise.
1802	* @stable ICU 2.0
1803	*/
1804	inline UBool isEmpty(void) const;
1805
1806	/**
1807	* Return the capacity of the internal buffer of the UnicodeString object.
1808	* This is useful together with the getBuffer functions.
1809	* See there for details.
1810	*
1811	* @return the number of char16_ts available in the internal buffer
1812	* @see getBuffer
1813	* @stable ICU 2.0
1814	*/
1815	inline int32_t getCapacity(void) const;
1816
1817	/ Other operations /
1818
1819	/**
1820	* Generate a hash code for this object.
1821	* @return The hash code of this UnicodeString.
1822	* @stable ICU 2.0
1823	*/
1824	inline int32_t hashCode(void) const;
1825
1826	/**
1827	* Determine if this object contains a valid string.
1828	* A bogus string has no value. It is different from an empty string,
1829	* although in both cases isEmpty() returns TRUE and length() returns 0.
1830	* setToBogus() and isBogus() can be used to indicate that no string value is available.
1831	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1832	* length() returns 0.
1833	*
1834	* @return TRUE if the string is bogus/invalid, FALSE otherwise
1835	* @see setToBogus()
1836	* @stable ICU 2.0
1837	*/
1838	inline UBool isBogus(void) const;
1839
1840
1841	//========================================
1842	// Write operations
1843	//========================================
1844
1845	/ Assignment operations /
1846
1847	/**
1848	* Assignment operator. Replace the characters in this UnicodeString
1849	* with the characters from <TT>srcText</TT>.
1850	*
1851	* Starting with ICU 2.4, the assignment operator and the copy constructor
1852	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1853	* By contrast, the fastCopyFrom() function implements the old,
1854	* more efficient but less safe behavior
1855	* of making this string also a readonly alias to the same buffer.
1856	*
1857	* If the source object has an "open" buffer from getBuffer(minCapacity),
1858	* then the copy is an empty string.
1859	*
1860	* @param srcText The text containing the characters to replace
1861	* @return a reference to this
1862	* @stable ICU 2.0
1863	* @see fastCopyFrom
1864	*/
1865	UnicodeString &operator=(const UnicodeString &srcText);
1866
1867	/**
1868	* Almost the same as the assignment operator.
1869	* Replace the characters in this UnicodeString
1870	* with the characters from <code>srcText</code>.
1871	*
1872	* This function works the same as the assignment operator
1873	* for all strings except for ones that are readonly aliases.
1874	*
1875	* Starting with ICU 2.4, the assignment operator and the copy constructor
1876	* allocate a new buffer and copy the buffer contents even for readonly aliases.
1877	* This function implements the old, more efficient but less safe behavior
1878	* of making this string also a readonly alias to the same buffer.
1879	*
1880	* The fastCopyFrom function must be used only if it is known that the lifetime of
1881	* this UnicodeString does not exceed the lifetime of the aliased buffer
1882	* including its contents, for example for strings from resource bundles
1883	* or aliases to string constants.
1884	*
1885	* If the source object has an "open" buffer from getBuffer(minCapacity),
1886	* then the copy is an empty string.
1887	*
1888	* @param src The text containing the characters to replace.
1889	* @return a reference to this
1890	* @stable ICU 2.4
1891	*/
1892	UnicodeString &fastCopyFrom(const UnicodeString &src);
1893
1894	#if U_HAVE_RVALUE_REFERENCES
1895	/**
1896	* Move assignment operator, might leave src in bogus state.
1897	* This string will have the same contents and state that the source string had.
1898	* The behavior is undefined if *this and src are the same object.
1899	* @param src source string
1900	* @return *this
1901	* @stable ICU 56
1902	*/
1903	UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1904	return moveFrom(src);
1905	}
1906	#endif
1907	// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
1908	/**
1909	* Move assignment, might leave src in bogus state.
1910	* This string will have the same contents and state that the source string had.
1911	* The behavior is undefined if *this and src are the same object.
1912	*
1913	* Can be called explicitly, does not need C++11 support.
1914	* @param src source string
1915	* @return *this
1916	* @draft ICU 56
1917	*/
1918	UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1919
1920	/**
1921	* Swap strings.
1922	* @param other other string
1923	* @stable ICU 56
1924	*/
1925	void swap(UnicodeString &other) U_NOEXCEPT;
1926
1927	/**
1928	* Non-member UnicodeString swap function.
1929	* @param s1 will get s2's contents and state
1930	* @param s2 will get s1's contents and state
1931	* @stable ICU 56
1932	*/
1933	friend U_COMMON_API inline void U_EXPORT2
1934	swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1935	s1.swap(s2);
1936	}
1937
1938	/**
1939	* Assignment operator. Replace the characters in this UnicodeString
1940	* with the code unit <TT>ch</TT>.
1941	* @param ch the code unit to replace
1942	* @return a reference to this
1943	* @stable ICU 2.0
1944	*/
1945	inline UnicodeString& operator= (char16_t ch);
1946
1947	/**
1948	* Assignment operator. Replace the characters in this UnicodeString
1949	* with the code point <TT>ch</TT>.
1950	* @param ch the code point to replace
1951	* @return a reference to this
1952	* @stable ICU 2.0
1953	*/
1954	inline UnicodeString& operator= (UChar32 ch);
1955
1956	/**
1957	* Set the text in the UnicodeString object to the characters
1958	* in <TT>srcText</TT> in the range
1959	* [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1960	* <TT>srcText</TT> is not modified.
1961	* @param srcText the source for the new characters
1962	* @param srcStart the offset into <TT>srcText</TT> where new characters
1963	* will be obtained
1964	* @return a reference to this
1965	* @stable ICU 2.2
1966	*/
1967	inline UnicodeString& setTo(const UnicodeString& srcText,
1968	int32_t srcStart);
1969
1970	/**
1971	* Set the text in the UnicodeString object to the characters
1972	* in <TT>srcText</TT> in the range
1973	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1974	* <TT>srcText</TT> is not modified.
1975	* @param srcText the source for the new characters
1976	* @param srcStart the offset into <TT>srcText</TT> where new characters
1977	* will be obtained
1978	* @param srcLength the number of characters in <TT>srcText</TT> in the
1979	* replace string.
1980	* @return a reference to this
1981	* @stable ICU 2.0
1982	*/
1983	inline UnicodeString& setTo(const UnicodeString& srcText,
1984	int32_t srcStart,
1985	int32_t srcLength);
1986
1987	/**
1988	* Set the text in the UnicodeString object to the characters in
1989	* <TT>srcText</TT>.
1990	* <TT>srcText</TT> is not modified.
1991	* @param srcText the source for the new characters
1992	* @return a reference to this
1993	* @stable ICU 2.0
1994	*/
1995	inline UnicodeString& setTo(const UnicodeString& srcText);
1996
1997	/**
1998	* Set the characters in the UnicodeString object to the characters
1999	* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2000	* @param srcChars the source for the new characters
2001	* @param srcLength the number of Unicode characters in srcChars.
2002	* @return a reference to this
2003	* @stable ICU 2.0
2004	*/
2005	inline UnicodeString& setTo(const char16_t *srcChars,
2006	int32_t srcLength);
2007
2008	/**
2009	* Set the characters in the UnicodeString object to the code unit
2010	* <TT>srcChar</TT>.
2011	* @param srcChar the code unit which becomes the UnicodeString's character
2012	* content
2013	* @return a reference to this
2014	* @stable ICU 2.0
2015	*/
2016	UnicodeString& setTo(char16_t srcChar);
2017
2018	/**
2019	* Set the characters in the UnicodeString object to the code point
2020	* <TT>srcChar</TT>.
2021	* @param srcChar the code point which becomes the UnicodeString's character
2022	* content
2023	* @return a reference to this
2024	* @stable ICU 2.0
2025	*/
2026	UnicodeString& setTo(UChar32 srcChar);
2027
2028	/**
2029	* Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2030	* The text will be used for the UnicodeString object, but
2031	* it will not be released when the UnicodeString is destroyed.
2032	* This has copy-on-write semantics:
2033	* When the string is modified, then the buffer is first copied into
2034	* newly allocated memory.
2035	* The aliased buffer is never modified.
2036	*
2037	* In an assignment to another UnicodeString, when using the copy constructor
2038	* or the assignment operator, the text will be copied.
2039	* When using fastCopyFrom(), the text will be aliased again,
2040	* so that both strings then alias the same readonly-text.
2041	*
2042	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2043	* This must be true if <code>textLength==-1</code>.
2044	* @param text The characters to alias for the UnicodeString.
2045	* @param textLength The number of Unicode characters in <code>text</code> to alias.
2046	* If -1, then this constructor will determine the length
2047	* by calling <code>u_strlen()</code>.
2048	* @return a reference to this
2049	* @stable ICU 2.0
2050	*/
2051	UnicodeString &setTo(UBool isTerminated,
2052	ConstChar16Ptr text,
2053	int32_t textLength);
2054
2055	/**
2056	* Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2057	* The text will be used for the UnicodeString object, but
2058	* it will not be released when the UnicodeString is destroyed.
2059	* This has write-through semantics:
2060	* For as long as the capacity of the buffer is sufficient, write operations
2061	* will directly affect the buffer. When more capacity is necessary, then
2062	* a new buffer will be allocated and the contents copied as with regularly
2063	* constructed strings.
2064	* In an assignment to another UnicodeString, the buffer will be copied.
2065	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2066	* as the string buffer itself and will in this case not copy the contents.
2067	*
2068	* @param buffer The characters to alias for the UnicodeString.
2069	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2070	* @param buffCapacity The size of <code>buffer</code> in char16_ts.
2071	* @return a reference to this
2072	* @stable ICU 2.0
2073	*/
2074	UnicodeString &setTo(char16_t *buffer,
2075	int32_t buffLength,
2076	int32_t buffCapacity);
2077
2078	/**
2079	* Make this UnicodeString object invalid.
2080	* The string will test TRUE with isBogus().
2081	*
2082	* A bogus string has no value. It is different from an empty string.
2083	* It can be used to indicate that no string value is available.
2084	* getBuffer() and getTerminatedBuffer() return NULL, and
2085	* length() returns 0.
2086	*
2087	* This utility function is used throughout the UnicodeString
2088	* implementation to indicate that a UnicodeString operation failed,
2089	* and may be used in other functions,
2090	* especially but not exclusively when such functions do not
2091	* take a UErrorCode for simplicity.
2092	*
2093	* The following methods, and no others, will clear a string object's bogus flag:
2094	* - remove()
2095	* - remove(0, INT32_MAX)
2096	* - truncate(0)
2097	* - operator=() (assignment operator)
2098	* - setTo(...)
2099	*
2100	* The simplest ways to turn a bogus string into an empty one
2101	* is to use the remove() function.
2102	* Examples for other functions that are equivalent to "set to empty string":
2103	* \code
2104	* if(s.isBogus()) {
2105	* s.remove(); // set to an empty string (remove all), or
2106	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2107	* s.truncate(0); // set to an empty string (complete truncation), or
2108	* s=UnicodeString(); // assign an empty string, or
2109	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2110	* static const char16_t nul=0;
2111	* s.setTo(&nul, 0); // set to an empty C Unicode string
2112	* }
2113	* \endcode
2114	*
2115	* @see isBogus()
2116	* @stable ICU 2.0
2117	*/
2118	void setToBogus();
2119
2120	/**
2121	* Set the character at the specified offset to the specified character.
2122	* @param offset A valid offset into the text of the character to set
2123	* @param ch The new character
2124	* @return A reference to this
2125	* @stable ICU 2.0
2126	*/
2127	UnicodeString& setCharAt(int32_t offset,
2128	char16_t ch);
2129
2130
2131	/ Append operations /
2132
2133	/**
2134	* Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2135	* object.
2136	* @param ch the code unit to be appended
2137	* @return a reference to this
2138	* @stable ICU 2.0
2139	*/
2140	inline UnicodeString& operator+= (char16_t ch);
2141
2142	/**
2143	* Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2144	* object.
2145	* @param ch the code point to be appended
2146	* @return a reference to this
2147	* @stable ICU 2.0
2148	*/
2149	inline UnicodeString& operator+= (UChar32 ch);
2150
2151	/**
2152	* Append operator. Append the characters in <TT>srcText</TT> to the
2153	* UnicodeString object. <TT>srcText</TT> is not modified.
2154	* @param srcText the source for the new characters
2155	* @return a reference to this
2156	* @stable ICU 2.0
2157	*/
2158	inline UnicodeString& operator+= (const UnicodeString& srcText);
2159
2160	/**
2161	* Append the characters
2162	* in <TT>srcText</TT> in the range
2163	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2164	* UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2165	* is not modified.
2166	* @param srcText the source for the new characters
2167	* @param srcStart the offset into <TT>srcText</TT> where new characters
2168	* will be obtained
2169	* @param srcLength the number of characters in <TT>srcText</TT> in
2170	* the append string
2171	* @return a reference to this
2172	* @stable ICU 2.0
2173	*/
2174	inline UnicodeString& append(const UnicodeString& srcText,
2175	int32_t srcStart,
2176	int32_t srcLength);
2177
2178	/**
2179	* Append the characters in <TT>srcText</TT> to the UnicodeString object.
2180	* <TT>srcText</TT> is not modified.
2181	* @param srcText the source for the new characters
2182	* @return a reference to this
2183	* @stable ICU 2.0
2184	*/
2185	inline UnicodeString& append(const UnicodeString& srcText);
2186
2187	/**
2188	* Append the characters in <TT>srcChars</TT> in the range
2189	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2190	* object at offset
2191	* <TT>start</TT>. <TT>srcChars</TT> is not modified.
2192	* @param srcChars the source for the new characters
2193	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2194	* will be obtained
2195	* @param srcLength the number of characters in <TT>srcChars</TT> in
2196	* the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2197	* @return a reference to this
2198	* @stable ICU 2.0
2199	*/
2200	inline UnicodeString& append(const char16_t *srcChars,
2201	int32_t srcStart,
2202	int32_t srcLength);
2203
2204	/**
2205	* Append the characters in <TT>srcChars</TT> to the UnicodeString object
2206	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2207	* @param srcChars the source for the new characters
2208	* @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2209	* can be -1 if <TT>srcChars</TT> is NUL-terminated
2210	* @return a reference to this
2211	* @stable ICU 2.0
2212	*/
2213	inline UnicodeString& append(ConstChar16Ptr srcChars,
2214	int32_t srcLength);
2215
2216	/**
2217	* Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2218	* @param srcChar the code unit to append
2219	* @return a reference to this
2220	* @stable ICU 2.0
2221	*/
2222	inline UnicodeString& append(char16_t srcChar);
2223
2224	/**
2225	* Append the code point <TT>srcChar</TT> to the UnicodeString object.
2226	* @param srcChar the code point to append
2227	* @return a reference to this
2228	* @stable ICU 2.0
2229	*/
2230	UnicodeString& append(UChar32 srcChar);
2231
2232
2233	/ Insert operations /
2234
2235	/**
2236	* Insert the characters in <TT>srcText</TT> in the range
2237	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2238	* object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2239	* @param start the offset where the insertion begins
2240	* @param srcText the source for the new characters
2241	* @param srcStart the offset into <TT>srcText</TT> where new characters
2242	* will be obtained
2243	* @param srcLength the number of characters in <TT>srcText</TT> in
2244	* the insert string
2245	* @return a reference to this
2246	* @stable ICU 2.0
2247	*/
2248	inline UnicodeString& insert(int32_t start,
2249	const UnicodeString& srcText,
2250	int32_t srcStart,
2251	int32_t srcLength);
2252
2253	/**
2254	* Insert the characters in <TT>srcText</TT> into the UnicodeString object
2255	* at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2256	* @param start the offset where the insertion begins
2257	* @param srcText the source for the new characters
2258	* @return a reference to this
2259	* @stable ICU 2.0
2260	*/
2261	inline UnicodeString& insert(int32_t start,
2262	const UnicodeString& srcText);
2263
2264	/**
2265	* Insert the characters in <TT>srcChars</TT> in the range
2266	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2267	* object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2268	* @param start the offset at which the insertion begins
2269	* @param srcChars the source for the new characters
2270	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2271	* will be obtained
2272	* @param srcLength the number of characters in <TT>srcChars</TT>
2273	* in the insert string
2274	* @return a reference to this
2275	* @stable ICU 2.0
2276	*/
2277	inline UnicodeString& insert(int32_t start,
2278	const char16_t *srcChars,
2279	int32_t srcStart,
2280	int32_t srcLength);
2281
2282	/**
2283	* Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2284	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2285	* @param start the offset where the insertion begins
2286	* @param srcChars the source for the new characters
2287	* @param srcLength the number of Unicode characters in srcChars.
2288	* @return a reference to this
2289	* @stable ICU 2.0
2290	*/
2291	inline UnicodeString& insert(int32_t start,
2292	ConstChar16Ptr srcChars,
2293	int32_t srcLength);
2294
2295	/**
2296	* Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2297	* offset <TT>start</TT>.
2298	* @param start the offset at which the insertion occurs
2299	* @param srcChar the code unit to insert
2300	* @return a reference to this
2301	* @stable ICU 2.0
2302	*/
2303	inline UnicodeString& insert(int32_t start,
2304	char16_t srcChar);
2305
2306	/**
2307	* Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2308	* offset <TT>start</TT>.
2309	* @param start the offset at which the insertion occurs
2310	* @param srcChar the code point to insert
2311	* @return a reference to this
2312	* @stable ICU 2.0
2313	*/
2314	inline UnicodeString& insert(int32_t start,
2315	UChar32 srcChar);
2316
2317
2318	/ Replace operations /
2319
2320	/**
2321	* Replace the characters in the range
2322	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2323	* <TT>srcText</TT> in the range
2324	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2325	* <TT>srcText</TT> is not modified.
2326	* @param start the offset at which the replace operation begins
2327	* @param length the number of characters to replace. The character at
2328	* <TT>start + length</TT> is not modified.
2329	* @param srcText the source for the new characters
2330	* @param srcStart the offset into <TT>srcText</TT> where new characters
2331	* will be obtained
2332	* @param srcLength the number of characters in <TT>srcText</TT> in
2333	* the replace string
2334	* @return a reference to this
2335	* @stable ICU 2.0
2336	*/
2337	UnicodeString& replace(int32_t start,
2338	int32_t length,
2339	const UnicodeString& srcText,
2340	int32_t srcStart,
2341	int32_t srcLength);
2342
2343	/**
2344	* Replace the characters in the range
2345	* [<TT>start</TT>, <TT>start + length</TT>)
2346	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2347	* not modified.
2348	* @param start the offset at which the replace operation begins
2349	* @param length the number of characters to replace. The character at
2350	* <TT>start + length</TT> is not modified.
2351	* @param srcText the source for the new characters
2352	* @return a reference to this
2353	* @stable ICU 2.0
2354	*/
2355	UnicodeString& replace(int32_t start,
2356	int32_t length,
2357	const UnicodeString& srcText);
2358
2359	/**
2360	* Replace the characters in the range
2361	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2362	* <TT>srcChars</TT> in the range
2363	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2364	* is not modified.
2365	* @param start the offset at which the replace operation begins
2366	* @param length the number of characters to replace. The character at
2367	* <TT>start + length</TT> is not modified.
2368	* @param srcChars the source for the new characters
2369	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2370	* will be obtained
2371	* @param srcLength the number of characters in <TT>srcChars</TT>
2372	* in the replace string
2373	* @return a reference to this
2374	* @stable ICU 2.0
2375	*/
2376	UnicodeString& replace(int32_t start,
2377	int32_t length,
2378	const char16_t *srcChars,
2379	int32_t srcStart,
2380	int32_t srcLength);
2381
2382	/**
2383	* Replace the characters in the range
2384	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2385	* <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2386	* @param start the offset at which the replace operation begins
2387	* @param length number of characters to replace. The character at
2388	* <TT>start + length</TT> is not modified.
2389	* @param srcChars the source for the new characters
2390	* @param srcLength the number of Unicode characters in srcChars
2391	* @return a reference to this
2392	* @stable ICU 2.0
2393	*/
2394	inline UnicodeString& replace(int32_t start,
2395	int32_t length,
2396	ConstChar16Ptr srcChars,
2397	int32_t srcLength);
2398
2399	/**
2400	* Replace the characters in the range
2401	* [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2402	* <TT>srcChar</TT>.
2403	* @param start the offset at which the replace operation begins
2404	* @param length the number of characters to replace. The character at
2405	* <TT>start + length</TT> is not modified.
2406	* @param srcChar the new code unit
2407	* @return a reference to this
2408	* @stable ICU 2.0
2409	*/
2410	inline UnicodeString& replace(int32_t start,
2411	int32_t length,
2412	char16_t srcChar);
2413
2414	/**
2415	* Replace the characters in the range
2416	* [<TT>start</TT>, <TT>start + length</TT>) with the code point
2417	* <TT>srcChar</TT>.
2418	* @param start the offset at which the replace operation begins
2419	* @param length the number of characters to replace. The character at
2420	* <TT>start + length</TT> is not modified.
2421	* @param srcChar the new code point
2422	* @return a reference to this
2423	* @stable ICU 2.0
2424	*/
2425	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2426
2427	/**
2428	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2429	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2430	* @param start the offset at which the replace operation begins
2431	* @param limit the offset immediately following the replace range
2432	* @param srcText the source for the new characters
2433	* @return a reference to this
2434	* @stable ICU 2.0
2435	*/
2436	inline UnicodeString& replaceBetween(int32_t start,
2437	int32_t limit,
2438	const UnicodeString& srcText);
2439
2440	/**
2441	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2442	* with the characters in <TT>srcText</TT> in the range
2443	* [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2444	* @param start the offset at which the replace operation begins
2445	* @param limit the offset immediately following the replace range
2446	* @param srcText the source for the new characters
2447	* @param srcStart the offset into <TT>srcChars</TT> where new characters
2448	* will be obtained
2449	* @param srcLimit the offset immediately following the range to copy
2450	* in <TT>srcText</TT>
2451	* @return a reference to this
2452	* @stable ICU 2.0
2453	*/
2454	inline UnicodeString& replaceBetween(int32_t start,
2455	int32_t limit,
2456	const UnicodeString& srcText,
2457	int32_t srcStart,
2458	int32_t srcLimit);
2459
2460	/**
2461	* Replace a substring of this object with the given text.
2462	* @param start the beginning index, inclusive; <code>0 <= start
2463	* <= limit</code>.
2464	* @param limit the ending index, exclusive; <code>start <= limit
2465	* <= length()</code>.
2466	* @param text the text to replace characters <code>start</code>
2467	* to <code>limit - 1</code>
2468	* @stable ICU 2.0
2469	*/
2470	virtual void handleReplaceBetween(int32_t start,
2471	int32_t limit,
2472	const UnicodeString& text);
2473
2474	/**
2475	* Replaceable API
2476	* @return TRUE if it has MetaData
2477	* @stable ICU 2.4
2478	*/
2479	virtual UBool hasMetaData() const;
2480
2481	/**
2482	* Copy a substring of this object, retaining attribute (out-of-band)
2483	* information. This method is used to duplicate or reorder substrings.
2484	* The destination index must not overlap the source range.
2485	*
2486	* @param start the beginning index, inclusive; <code>0 <= start <=
2487	* limit</code>.
2488	* @param limit the ending index, exclusive; <code>start <= limit <=
2489	* length()</code>.
2490	* @param dest the destination index. The characters from
2491	* <code>start..limit-1</code> will be copied to <code>dest</code>.
2492	* Implementations of this method may assume that <code>dest <= start \|\|
2493	* dest >= limit</code>.
2494	* @stable ICU 2.0
2495	*/
2496	virtual void copy(int32_t start, int32_t limit, int32_t dest);
2497
2498	/ Search and replace operations /
2499
2500	/**
2501	* Replace all occurrences of characters in oldText with the characters
2502	* in newText
2503	* @param oldText the text containing the search text
2504	* @param newText the text containing the replacement text
2505	* @return a reference to this
2506	* @stable ICU 2.0
2507	*/
2508	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2509	const UnicodeString& newText);
2510
2511	/**
2512	* Replace all occurrences of characters in oldText with characters
2513	* in newText
2514	* in the range [<TT>start</TT>, <TT>start + length</TT>).
2515	* @param start the start of the range in which replace will performed
2516	* @param length the length of the range in which replace will be performed
2517	* @param oldText the text containing the search text
2518	* @param newText the text containing the replacement text
2519	* @return a reference to this
2520	* @stable ICU 2.0
2521	*/
2522	inline UnicodeString& findAndReplace(int32_t start,
2523	int32_t length,
2524	const UnicodeString& oldText,
2525	const UnicodeString& newText);
2526
2527	/**
2528	* Replace all occurrences of characters in oldText in the range
2529	* [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2530	* in newText in the range
2531	* [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2532	* in the range [<TT>start</TT>, <TT>start + length</TT>).
2533	* @param start the start of the range in which replace will performed
2534	* @param length the length of the range in which replace will be performed
2535	* @param oldText the text containing the search text
2536	* @param oldStart the start of the search range in <TT>oldText</TT>
2537	* @param oldLength the length of the search range in <TT>oldText</TT>
2538	* @param newText the text containing the replacement text
2539	* @param newStart the start of the replacement range in <TT>newText</TT>
2540	* @param newLength the length of the replacement range in <TT>newText</TT>
2541	* @return a reference to this
2542	* @stable ICU 2.0
2543	*/
2544	UnicodeString& findAndReplace(int32_t start,
2545	int32_t length,
2546	const UnicodeString& oldText,
2547	int32_t oldStart,
2548	int32_t oldLength,
2549	const UnicodeString& newText,
2550	int32_t newStart,
2551	int32_t newLength);
2552
2553
2554	/ Remove operations /
2555
2556	/**
2557	* Remove all characters from the UnicodeString object.
2558	* @return a reference to this
2559	* @stable ICU 2.0
2560	*/
2561	inline UnicodeString& remove(void);
2562
2563	/**
2564	* Remove the characters in the range
2565	* [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2566	* @param start the offset of the first character to remove
2567	* @param length the number of characters to remove
2568	* @return a reference to this
2569	* @stable ICU 2.0
2570	*/
2571	inline UnicodeString& remove(int32_t start,
2572	int32_t length = (int32_t)INT32_MAX);
2573
2574	/**
2575	* Remove the characters in the range
2576	* [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2577	* @param start the offset of the first character to remove
2578	* @param limit the offset immediately following the range to remove
2579	* @return a reference to this
2580	* @stable ICU 2.0
2581	*/
2582	inline UnicodeString& removeBetween(int32_t start,
2583	int32_t limit = (int32_t)INT32_MAX);
2584
2585	/**
2586	* Retain only the characters in the range
2587	* [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2588	* Removes characters before <code>start</code> and at and after <code>limit</code>.
2589	* @param start the offset of the first character to retain
2590	* @param limit the offset immediately following the range to retain
2591	* @return a reference to this
2592	* @stable ICU 4.4
2593	*/
2594	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2595
2596	/ Length operations /
2597
2598	/**
2599	* Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2600	* If the length of this UnicodeString is less than targetLength,
2601	* length() - targetLength copies of padChar will be added to the
2602	* beginning of this UnicodeString.
2603	* @param targetLength the desired length of the string
2604	* @param padChar the character to use for padding. Defaults to
2605	* space (U+0020)
2606	* @return TRUE if the text was padded, FALSE otherwise.
2607	* @stable ICU 2.0
2608	*/
2609	UBool padLeading(int32_t targetLength,
2610	char16_t padChar = `0x0020`);
2611
2612	/**
2613	* Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2614	* If the length of this UnicodeString is less than targetLength,
2615	* length() - targetLength copies of padChar will be added to the
2616	* end of this UnicodeString.
2617	* @param targetLength the desired length of the string
2618	* @param padChar the character to use for padding. Defaults to
2619	* space (U+0020)
2620	* @return TRUE if the text was padded, FALSE otherwise.
2621	* @stable ICU 2.0
2622	*/
2623	UBool padTrailing(int32_t targetLength,
2624	char16_t padChar = `0x0020`);
2625
2626	/**
2627	* Truncate this UnicodeString to the <TT>targetLength</TT>.
2628	* @param targetLength the desired length of this UnicodeString.
2629	* @return TRUE if the text was truncated, FALSE otherwise
2630	* @stable ICU 2.0
2631	*/
2632	inline UBool truncate(int32_t targetLength);
2633
2634	/**
2635	* Trims leading and trailing whitespace from this UnicodeString.
2636	* @return a reference to this
2637	* @stable ICU 2.0
2638	*/
2639	UnicodeString& trim(void);
2640
2641
2642	/ Miscellaneous operations /
2643
2644	/**
2645	* Reverse this UnicodeString in place.
2646	* @return a reference to this
2647	* @stable ICU 2.0
2648	*/
2649	inline UnicodeString& reverse(void);
2650
2651	/**
2652	* Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2653	* this UnicodeString.
2654	* @param start the start of the range to reverse
2655	* @param length the number of characters to to reverse
2656	* @return a reference to this
2657	* @stable ICU 2.0
2658	*/
2659	inline UnicodeString& reverse(int32_t start,
2660	int32_t length);
2661
2662	/**
2663	* Convert the characters in this to UPPER CASE following the conventions of
2664	* the default locale.
2665	* @return A reference to this.
2666	* @stable ICU 2.0
2667	*/
2668	UnicodeString& toUpper(void);
2669
2670	/**
2671	* Convert the characters in this to UPPER CASE following the conventions of
2672	* a specific locale.
2673	* @param locale The locale containing the conventions to use.
2674	* @return A reference to this.
2675	* @stable ICU 2.0
2676	*/
2677	UnicodeString& toUpper(const Locale& locale);
2678
2679	/**
2680	* Convert the characters in this to lower case following the conventions of
2681	* the default locale.
2682	* @return A reference to this.
2683	* @stable ICU 2.0
2684	*/
2685	UnicodeString& toLower(void);
2686
2687	/**
2688	* Convert the characters in this to lower case following the conventions of
2689	* a specific locale.
2690	* @param locale The locale containing the conventions to use.
2691	* @return A reference to this.
2692	* @stable ICU 2.0
2693	*/
2694	UnicodeString& toLower(const Locale& locale);
2695
2696	#if !UCONFIG_NO_BREAK_ITERATION
2697
2698	/**
2699	* Titlecase this string, convenience function using the default locale.
2700	*
2701	* Casing is locale-dependent and context-sensitive.
2702	* Titlecasing uses a break iterator to find the first characters of words
2703	* that are to be titlecased. It titlecases those characters and lowercases
2704	* all others.
2705	*
2706	* The titlecase break iterator can be provided to customize for arbitrary
2707	* styles, using rules and dictionaries beyond the standard iterators.
2708	* It may be more efficient to always provide an iterator to avoid
2709	* opening and closing one for each string.
2710	* The standard titlecase iterator for the root locale implements the
2711	* algorithm of Unicode TR 21.
2712	*
2713	* This function uses only the setText(), first() and next() methods of the
2714	* provided break iterator.
2715	*
2716	* @param titleIter A break iterator to find the first characters of words
2717	* that are to be titlecased.
2718	* If none is provided (0), then a standard titlecase
2719	* break iterator is opened.
2720	* Otherwise the provided iterator is set to the string's text.
2721	* @return A reference to this.
2722	* @stable ICU 2.1
2723	*/
2724	UnicodeString &toTitle(BreakIterator *titleIter);
2725
2726	/**
2727	* Titlecase this string.
2728	*
2729	* Casing is locale-dependent and context-sensitive.
2730	* Titlecasing uses a break iterator to find the first characters of words
2731	* that are to be titlecased. It titlecases those characters and lowercases
2732	* all others.
2733	*
2734	* The titlecase break iterator can be provided to customize for arbitrary
2735	* styles, using rules and dictionaries beyond the standard iterators.
2736	* It may be more efficient to always provide an iterator to avoid
2737	* opening and closing one for each string.
2738	* The standard titlecase iterator for the root locale implements the
2739	* algorithm of Unicode TR 21.
2740	*
2741	* This function uses only the setText(), first() and next() methods of the
2742	* provided break iterator.
2743	*
2744	* @param titleIter A break iterator to find the first characters of words
2745	* that are to be titlecased.
2746	* If none is provided (0), then a standard titlecase
2747	* break iterator is opened.
2748	* Otherwise the provided iterator is set to the string's text.
2749	* @param locale The locale to consider.
2750	* @return A reference to this.
2751	* @stable ICU 2.1
2752	*/
2753	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale);
2754
2755	/**
2756	* Titlecase this string, with options.
2757	*
2758	* Casing is locale-dependent and context-sensitive.
2759	* Titlecasing uses a break iterator to find the first characters of words
2760	* that are to be titlecased. It titlecases those characters and lowercases
2761	* all others. (This can be modified with options.)
2762	*
2763	* The titlecase break iterator can be provided to customize for arbitrary
2764	* styles, using rules and dictionaries beyond the standard iterators.
2765	* It may be more efficient to always provide an iterator to avoid
2766	* opening and closing one for each string.
2767	* The standard titlecase iterator for the root locale implements the
2768	* algorithm of Unicode TR 21.
2769	*
2770	* This function uses only the setText(), first() and next() methods of the
2771	* provided break iterator.
2772	*
2773	* @param titleIter A break iterator to find the first characters of words
2774	* that are to be titlecased.
2775	* If none is provided (0), then a standard titlecase
2776	* break iterator is opened.
2777	* Otherwise the provided iterator is set to the string's text.
2778	* @param locale The locale to consider.
2779	* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2780	* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2781	* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2782	* @param options Options bit set, see ucasemap_open().
2783	* @return A reference to this.
2784	* @stable ICU 3.8
2785	*/
2786	UnicodeString &toTitle(BreakIterator titleIter, const* Locale &locale, uint32_t options);
2787
2788	#endif
2789
2790	/**
2791	* Case-folds the characters in this string.
2792	*
2793	* Case-folding is locale-independent and not context-sensitive,
2794	* but there is an option for whether to include or exclude mappings for dotted I
2795	* and dotless i that are marked with 'T' in CaseFolding.txt.
2796	*
2797	* The result may be longer or shorter than the original.
2798	*
2799	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2800	* @return A reference to this.
2801	* @stable ICU 2.0
2802	*/
2803	UnicodeString &foldCase(uint32_t options=`0` /U_FOLD_CASE_DEFAULT/);
2804
2805	//========================================
2806	// Access to the internal buffer
2807	//========================================
2808
2809	/**
2810	* Get a read/write pointer to the internal buffer.
2811	* The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2812	* writable, and is still owned by the UnicodeString object.
2813	* Calls to getBuffer(minCapacity) must not be nested, and
2814	* must be matched with calls to releaseBuffer(newLength).
2815	* If the string buffer was read-only or shared,
2816	* then it will be reallocated and copied.
2817	*
2818	* An attempted nested call will return 0, and will not further modify the
2819	* state of the UnicodeString object.
2820	* It also returns 0 if the string is bogus.
2821	*
2822	* The actual capacity of the string buffer may be larger than minCapacity.
2823	* getCapacity() returns the actual capacity.
2824	* For many operations, the full capacity should be used to avoid reallocations.
2825	*
2826	* While the buffer is "open" between getBuffer(minCapacity)
2827	* and releaseBuffer(newLength), the following applies:
2828	* - The string length is set to 0.
2829	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
2830	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
2831	* - You can read from and write to the returned buffer.
2832	* - The previous string contents will still be in the buffer;
2833	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
2834	* If the length() was greater than minCapacity, then any contents after minCapacity
2835	* may be lost.
2836	* The buffer contents is not NUL-terminated by getBuffer().
2837	* If length()<getCapacity() then you can terminate it by writing a NUL
2838	* at index length().
2839	* - You must call releaseBuffer(newLength) before and in order to
2840	* return to normal UnicodeString operation.
2841	*
2842	* @param minCapacity the minimum number of char16_ts that are to be available
2843	* in the buffer, starting at the returned pointer;
2844	* default to the current string capacity if minCapacity==-1
2845	* @return a writable pointer to the internal string buffer,
2846	* or nullptr if an error occurs (nested calls, out of memory)
2847	*
2848	* @see releaseBuffer
2849	* @see getTerminatedBuffer()
2850	* @stable ICU 2.0
2851	*/
2852	char16_t *getBuffer(int32_t minCapacity);
2853
2854	/**
2855	* Release a read/write buffer on a UnicodeString object with an
2856	* "open" getBuffer(minCapacity).
2857	* This function must be called in a matched pair with getBuffer(minCapacity).
2858	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2859	*
2860	* It will set the string length to newLength, at most to the current capacity.
2861	* If newLength==-1 then it will set the length according to the
2862	* first NUL in the buffer, or to the capacity if there is no NUL.
2863	*
2864	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2865	*
2866	* @param newLength the new length of the UnicodeString object;
2867	* defaults to the current capacity if newLength is greater than that;
2868	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
2869	* the current capacity of the string
2870	*
2871	* @see getBuffer(int32_t minCapacity)
2872	* @stable ICU 2.0
2873	*/
2874	void releaseBuffer(int32_t newLength=-`1`);
2875
2876	/**
2877	* Get a read-only pointer to the internal buffer.
2878	* This can be called at any time on a valid UnicodeString.
2879	*
2880	* It returns 0 if the string is bogus, or
2881	* during an "open" getBuffer(minCapacity).
2882	*
2883	* It can be called as many times as desired.
2884	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2885	* at which time the pointer is semantically invalidated and must not be used any more.
2886	*
2887	* The capacity of the buffer can be determined with getCapacity().
2888	* The part after length() may or may not be initialized and valid,
2889	* depending on the history of the UnicodeString object.
2890	*
2891	* The buffer contents is (probably) not NUL-terminated.
2892	* You can check if it is with
2893	* <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2894	* (See getTerminatedBuffer().)
2895	*
2896	* The buffer may reside in read-only memory. Its contents must not
2897	* be modified.
2898	*
2899	* @return a read-only pointer to the internal string buffer,
2900	* or nullptr if the string is empty or bogus
2901	*
2902	* @see getBuffer(int32_t minCapacity)
2903	* @see getTerminatedBuffer()
2904	* @stable ICU 2.0
2905	*/
2906	inline const char16_t getBuffer() const*;
2907
2908	/**
2909	* Get a read-only pointer to the internal buffer,
2910	* making sure that it is NUL-terminated.
2911	* This can be called at any time on a valid UnicodeString.
2912	*
2913	* It returns 0 if the string is bogus, or
2914	* during an "open" getBuffer(minCapacity), or if the buffer cannot
2915	* be NUL-terminated (because memory allocation failed).
2916	*
2917	* It can be called as many times as desired.
2918	* The pointer that it returns will remain valid until the UnicodeString object is modified,
2919	* at which time the pointer is semantically invalidated and must not be used any more.
2920	*
2921	* The capacity of the buffer can be determined with getCapacity().
2922	* The part after length()+1 may or may not be initialized and valid,
2923	* depending on the history of the UnicodeString object.
2924	*
2925	* The buffer contents is guaranteed to be NUL-terminated.
2926	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2927	* is written.
2928	* For this reason, this function is not const, unlike getBuffer().
2929	* Note that a UnicodeString may also contain NUL characters as part of its contents.
2930	*
2931	* The buffer may reside in read-only memory. Its contents must not
2932	* be modified.
2933	*
2934	* @return a read-only pointer to the internal string buffer,
2935	* or 0 if the string is empty or bogus
2936	*
2937	* @see getBuffer(int32_t minCapacity)
2938	* @see getBuffer()
2939	* @stable ICU 2.2
2940	*/
2941	const char16_t *getTerminatedBuffer();
2942
2943	//========================================
2944	// Constructors
2945	//========================================
2946
2947	/* Construct an empty UnicodeString.*
2948	* @stable ICU 2.0
2949	*/
2950	inline UnicodeString();
2951
2952	/**
2953	* Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts
2954	* @param capacity the number of char16_ts this UnicodeString should hold
2955	* before a resize is necessary; if count is greater than 0 and count
2956	* code points c take up more space than capacity, then capacity is adjusted
2957	* accordingly.
2958	* @param c is used to initially fill the string
2959	* @param count specifies how many code points c are to be written in the
2960	* string
2961	* @stable ICU 2.0
2962	*/
2963	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2964
2965	/**
2966	* Single char16_t (code unit) constructor.
2967	*
2968	* It is recommended to mark this constructor "explicit" by
2969	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2970	* on the compiler command line or similar.
2971	* @param ch the character to place in the UnicodeString
2972	* @stable ICU 2.0
2973	*/
2974	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2975
2976	/**
2977	* Single UChar32 (code point) constructor.
2978	*
2979	* It is recommended to mark this constructor "explicit" by
2980	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2981	* on the compiler command line or similar.
2982	* @param ch the character to place in the UnicodeString
2983	* @stable ICU 2.0
2984	*/
2985	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2986
2987	/**
2988	* char16_t* constructor.
2989	*
2990	* It is recommended to mark this constructor "explicit" by
2991	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2992	* on the compiler command line or similar.
2993	* @param text The characters to place in the UnicodeString. <TT>text</TT>
2994	* must be NULL (U+0000) terminated.
2995	* @stable ICU 2.0
2996	*/
2997	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
2998
2999	/*
3000	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3001	* it should always be available regardless of U_HIDE_DRAFT_API status
3002	*/
3003	#if !U_CHAR16_IS_TYPEDEF
3004	/**
3005	* uint16_t * constructor.
3006	* Delegates to UnicodeString(const char16_t *).
3007	*
3008	* It is recommended to mark this constructor "explicit" by
3009	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3010	* on the compiler command line or similar.
3011	* @param text NUL-terminated UTF-16 string
3012	* @draft ICU 59
3013	*/
3014	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3015	UnicodeString (ConstChar16Ptr (text)) {}
3016	#endif
3017
3018	/*
3019	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3020	* it should always be available regardless of U_HIDE_DRAFT_API status
3021	*/
3022	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3023	/**
3024	* wchar_t * constructor.
3025	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3026	* Delegates to UnicodeString(const char16_t *).
3027	*
3028	* It is recommended to mark this constructor "explicit" by
3029	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3030	* on the compiler command line or similar.
3031	* @param text NUL-terminated UTF-16 string
3032	* @draft ICU 59
3033	*/
3034	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3035	UnicodeString(ConstChar16Ptr(text)) {}
3036	#endif
3037
3038	/*
3039	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3040	* it should always be available regardless of U_HIDE_DRAFT_API status
3041	*/
3042	/**
3043	* nullptr_t constructor.
3044	* Effectively the same as the default constructor, makes an empty string object.
3045	*
3046	* It is recommended to mark this constructor "explicit" by
3047	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3048	* on the compiler command line or similar.
3049	* @param text nullptr
3050	* @draft ICU 59
3051	*/
3052	UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3053
3054	/**
3055	* char16_t* constructor.
3056	* @param text The characters to place in the UnicodeString.
3057	* @param textLength The number of Unicode characters in <TT>text</TT>
3058	* to copy.
3059	* @stable ICU 2.0
3060	*/
3061	UnicodeString(const char16_t *text,
3062	int32_t textLength);
3063
3064	/*
3065	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3066	* it should always be available regardless of U_HIDE_DRAFT_API status
3067	*/
3068	#if !U_CHAR16_IS_TYPEDEF
3069	/**
3070	* uint16_t * constructor.
3071	* Delegates to UnicodeString(const char16_t *, int32_t).
3072	* @param text UTF-16 string
3073	* @param length string length
3074	* @draft ICU 59
3075	*/
3076	UnicodeString(const uint16_t *text, int32_t length) :
3077	UnicodeString (ConstChar16Ptr (text), length) {}
3078	#endif
3079
3080	/*
3081	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3082	* it should always be available regardless of U_HIDE_DRAFT_API status
3083	*/
3084	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3085	/**
3086	* wchar_t * constructor.
3087	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3088	* Delegates to UnicodeString(const char16_t *, int32_t).
3089	* @param text NUL-terminated UTF-16 string
3090	* @param length string length
3091	* @draft ICU 59
3092	*/
3093	UnicodeString(const wchar_t *text, int32_t length) :
3094	UnicodeString(ConstChar16Ptr(text), length) {}
3095	#endif
3096
3097	/*
3098	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3099	* it should always be available regardless of U_HIDE_DRAFT_API status
3100	*/
3101	/**
3102	* nullptr_t constructor.
3103	* Effectively the same as the default constructor, makes an empty string object.
3104	* @param text nullptr
3105	* @param length ignored
3106	* @draft ICU 59
3107	*/
3108	inline UnicodeString(const std::nullptr_t text, int32_t length);
3109
3110	/**
3111	* Readonly-aliasing char16_t* constructor.
3112	* The text will be used for the UnicodeString object, but
3113	* it will not be released when the UnicodeString is destroyed.
3114	* This has copy-on-write semantics:
3115	* When the string is modified, then the buffer is first copied into
3116	* newly allocated memory.
3117	* The aliased buffer is never modified.
3118	*
3119	* In an assignment to another UnicodeString, when using the copy constructor
3120	* or the assignment operator, the text will be copied.
3121	* When using fastCopyFrom(), the text will be aliased again,
3122	* so that both strings then alias the same readonly-text.
3123	*
3124	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3125	* This must be true if <code>textLength==-1</code>.
3126	* @param text The characters to alias for the UnicodeString.
3127	* @param textLength The number of Unicode characters in <code>text</code> to alias.
3128	* If -1, then this constructor will determine the length
3129	* by calling <code>u_strlen()</code>.
3130	* @stable ICU 2.0
3131	*/
3132	UnicodeString(UBool isTerminated,
3133	ConstChar16Ptr text,
3134	int32_t textLength);
3135
3136	/**
3137	* Writable-aliasing char16_t* constructor.
3138	* The text will be used for the UnicodeString object, but
3139	* it will not be released when the UnicodeString is destroyed.
3140	* This has write-through semantics:
3141	* For as long as the capacity of the buffer is sufficient, write operations
3142	* will directly affect the buffer. When more capacity is necessary, then
3143	* a new buffer will be allocated and the contents copied as with regularly
3144	* constructed strings.
3145	* In an assignment to another UnicodeString, the buffer will be copied.
3146	* The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3147	* as the string buffer itself and will in this case not copy the contents.
3148	*
3149	* @param buffer The characters to alias for the UnicodeString.
3150	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3151	* @param buffCapacity The size of <code>buffer</code> in char16_ts.
3152	* @stable ICU 2.0
3153	*/
3154	UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3155
3156	/*
3157	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3158	* it should always be available regardless of U_HIDE_DRAFT_API status
3159	*/
3160	#if !U_CHAR16_IS_TYPEDEF
3161	/**
3162	* Writable-aliasing uint16_t * constructor.
3163	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3164	* @param buffer writable buffer of/for UTF-16 text
3165	* @param buffLength length of the current buffer contents
3166	* @param buffCapacity buffer capacity
3167	* @draft ICU 59
3168	*/
3169	UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3170	UnicodeString (Char16Ptr (buffer), buffLength, buffCapacity) {}
3171	#endif
3172
3173	/*
3174	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3175	* it should always be available regardless of U_HIDE_DRAFT_API status
3176	*/
3177	#if U_SIZEOF_WCHAR_T==2 \|\| defined(U_IN_DOXYGEN)
3178	/**
3179	* Writable-aliasing wchar_t * constructor.
3180	* (Only defined if U_SIZEOF_WCHAR_T==2.)
3181	* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3182	* @param buffer writable buffer of/for UTF-16 text
3183	* @param buffLength length of the current buffer contents
3184	* @param buffCapacity buffer capacity
3185	* @draft ICU 59
3186	*/
3187	UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3188	UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3189	#endif
3190
3191	/*
3192	* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3193	* it should always be available regardless of U_HIDE_DRAFT_API status
3194	*/
3195	/**
3196	* Writable-aliasing nullptr_t constructor.
3197	* Effectively the same as the default constructor, makes an empty string object.
3198	* @param buffer nullptr
3199	* @param buffLength ignored
3200	* @param buffCapacity ignored
3201	* @draft ICU 59
3202	*/
3203	inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3204
3205	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
3206
3207	/**
3208	* char* constructor.
3209	* Uses the default converter (and thus depends on the ICU conversion code)
3210	* unless U_CHARSET_IS_UTF8 is set to 1.
3211	*
3212	* For ASCII (really "invariant character") strings it is more efficient to use
3213	* the constructor that takes a US_INV (for its enum EInvariant).
3214	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
3215	* UNICODE_STRING_SIMPLE.
3216	*
3217	* It is recommended to mark this constructor "explicit" by
3218	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3219	* on the compiler command line or similar.
3220	* @param codepageData an array of bytes, null-terminated,
3221	* in the platform's default codepage.
3222	* @stable ICU 2.0
3223	* @see UNICODE_STRING
3224	* @see UNICODE_STRING_SIMPLE
3225	*/
3226	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3227
3228	/**
3229	* char* constructor.
3230	* Uses the default converter (and thus depends on the ICU conversion code)
3231	* unless U_CHARSET_IS_UTF8 is set to 1.
3232	* @param codepageData an array of bytes in the platform's default codepage.
3233	* @param dataLength The number of bytes in <TT>codepageData</TT>.
3234	* @stable ICU 2.0
3235	*/
3236	UnicodeString(const char *codepageData, int32_t dataLength);
3237
3238	#endif
3239
3240	#if !UCONFIG_NO_CONVERSION
3241
3242	/**
3243	* char* constructor.
3244	* @param codepageData an array of bytes, null-terminated
3245	* @param codepage the encoding of <TT>codepageData</TT>. The special
3246	* value 0 for <TT>codepage</TT> indicates that the text is in the
3247	* platform's default codepage.
3248	*
3249	* If <code>codepage</code> is an empty string (<code>""</code>),
3250	* then a simple conversion is performed on the codepage-invariant
3251	* subset ("invariant characters") of the platform encoding. See utypes.h.
3252	* Recommendation: For invariant-character strings use the constructor
3253	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3254	* because it avoids object code dependencies of UnicodeString on
3255	* the conversion code.
3256	*
3257	* @stable ICU 2.0
3258	*/
3259	UnicodeString(const char codepageData, const* char *codepage);
3260
3261	/**
3262	* char* constructor.
3263	* @param codepageData an array of bytes.
3264	* @param dataLength The number of bytes in <TT>codepageData</TT>.
3265	* @param codepage the encoding of <TT>codepageData</TT>. The special
3266	* value 0 for <TT>codepage</TT> indicates that the text is in the
3267	* platform's default codepage.
3268	* If <code>codepage</code> is an empty string (<code>""</code>),
3269	* then a simple conversion is performed on the codepage-invariant
3270	* subset ("invariant characters") of the platform encoding. See utypes.h.
3271	* Recommendation: For invariant-character strings use the constructor
3272	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3273	* because it avoids object code dependencies of UnicodeString on
3274	* the conversion code.
3275	*
3276	* @stable ICU 2.0
3277	*/
3278	UnicodeString(const char codepageData, int32_t dataLength, const* char *codepage);
3279
3280	/**
3281	* char * / UConverter constructor.
3282	* This constructor uses an existing UConverter object to
3283	* convert the codepage string to Unicode and construct a UnicodeString
3284	* from that.
3285	*
3286	* The converter is reset at first.
3287	* If the error code indicates a failure before this constructor is called,
3288	* or if an error occurs during conversion or construction,
3289	* then the string will be bogus.
3290	*
3291	* This function avoids the overhead of opening and closing a converter if
3292	* multiple strings are constructed.
3293	*
3294	* @param src input codepage string
3295	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
3296	* @param cnv converter object (ucnv_resetToUnicode() will be called),
3297	* can be NULL for the default converter
3298	* @param errorCode normal ICU error code
3299	* @stable ICU 2.0
3300	*/
3301	UnicodeString(
3302	const char *src, int32_t srcLength,
3303	UConverter *cnv,
3304	UErrorCode &errorCode);
3305
3306	#endif
3307
3308	/**
3309	* Constructs a Unicode string from an invariant-character char * string.
3310	* About invariant characters see utypes.h.
3311	* This constructor has no runtime dependency on conversion code and is
3312	* therefore recommended over ones taking a charset name string
3313	* (where the empty string "" indicates invariant-character conversion).
3314	*
3315	* Use the macro US_INV as the third, signature-distinguishing parameter.
3316	*
3317	* For example:
3318	* \code
3319	* void fn(const char *s) {
3320	* UnicodeString ustr(s, -1, US_INV);
3321	* // use ustr ...
3322	* }
3323	* \endcode
3324	*
3325	* @param src String using only invariant characters.
3326	* @param length Length of src, or -1 if NUL-terminated.
3327	* @param inv Signature-distinguishing paramater, use US_INV.
3328	*
3329	* @see US_INV
3330	* @stable ICU 3.2
3331	*/
3332	UnicodeString(const char src, int32_t length, enum* EInvariant inv);
3333
3334
3335	/**
3336	* Copy constructor.
3337	*
3338	* Starting with ICU 2.4, the assignment operator and the copy constructor
3339	* allocate a new buffer and copy the buffer contents even for readonly aliases.
3340	* By contrast, the fastCopyFrom() function implements the old,
3341	* more efficient but less safe behavior
3342	* of making this string also a readonly alias to the same buffer.
3343	*
3344	* If the source object has an "open" buffer from getBuffer(minCapacity),
3345	* then the copy is an empty string.
3346	*
3347	* @param that The UnicodeString object to copy.
3348	* @stable ICU 2.0
3349	* @see fastCopyFrom
3350	*/
3351	UnicodeString(const UnicodeString& that);
3352
3353	#if U_HAVE_RVALUE_REFERENCES
3354	/**
3355	* Move constructor, might leave src in bogus state.
3356	* This string will have the same contents and state that the source string had.
3357	* @param src source string
3358	* @stable ICU 56
3359	*/
3360	UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3361	#endif
3362
3363	/**
3364	* 'Substring' constructor from tail of source string.
3365	* @param src The UnicodeString object to copy.
3366	* @param srcStart The offset into <tt>src</tt> at which to start copying.
3367	* @stable ICU 2.2
3368	*/
3369	UnicodeString(const UnicodeString& src, int32_t srcStart);
3370
3371	/**
3372	* 'Substring' constructor from subrange of source string.
3373	* @param src The UnicodeString object to copy.
3374	* @param srcStart The offset into <tt>src</tt> at which to start copying.
3375	* @param srcLength The number of characters from <tt>src</tt> to copy.
3376	* @stable ICU 2.2
3377	*/
3378	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3379
3380	/**
3381	* Clone this object, an instance of a subclass of Replaceable.
3382	* Clones can be used concurrently in multiple threads.
3383	* If a subclass does not implement clone(), or if an error occurs,
3384	* then NULL is returned.
3385	* The clone functions in all subclasses return a pointer to a Replaceable
3386	* because some compilers do not support covariant (same-as-this)
3387	* return types; cast to the appropriate subclass if necessary.
3388	* The caller must delete the clone.
3389	*
3390	* @return a clone of this object
3391	*
3392	* @see Replaceable::clone
3393	* @see getDynamicClassID
3394	* @stable ICU 2.6
3395	*/
3396	virtual Replaceable clone() const*;
3397
3398	/* Destructor.*
3399	* @stable ICU 2.0
3400	*/
3401	virtual ~UnicodeString();
3402
3403	/**
3404	* Create a UnicodeString from a UTF-8 string.
3405	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3406	* Calls u_strFromUTF8WithSub().
3407	*
3408	* @param utf8 UTF-8 input string.
3409	* Note that a StringPiece can be implicitly constructed
3410	* from a std::string or a NUL-terminated const char * string.
3411	* @return A UnicodeString with equivalent UTF-16 contents.
3412	* @see toUTF8
3413	* @see toUTF8String
3414	* @stable ICU 4.2
3415	*/
3416	static UnicodeString fromUTF8(StringPiece utf8);
3417
3418	/**
3419	* Create a UnicodeString from a UTF-32 string.
3420	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3421	* Calls u_strFromUTF32WithSub().
3422	*
3423	* @param utf32 UTF-32 input string. Must not be NULL.
3424	* @param length Length of the input string, or -1 if NUL-terminated.
3425	* @return A UnicodeString with equivalent UTF-16 contents.
3426	* @see toUTF32
3427	* @stable ICU 4.2
3428	*/
3429	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3430
3431	/ Miscellaneous operations /
3432
3433	/**
3434	* Unescape a string of characters and return a string containing
3435	* the result. The following escape sequences are recognized:
3436	*
3437	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3438	* \\Uhhhhhhhh 8 hex digits
3439	* \\xhh 1-2 hex digits
3440	* \\ooo 1-3 octal digits; o in [0-7]
3441	* \\cX control-X; X is masked with 0x1F
3442	*
3443	* as well as the standard ANSI C escapes:
3444	*
3445	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3446	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3447	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3448	*
3449	* Anything else following a backslash is generically escaped. For
3450	* example, "[a\\-z]" returns "[a-z]".
3451	*
3452	* If an escape sequence is ill-formed, this method returns an empty
3453	* string. An example of an ill-formed sequence is "\\u" followed by
3454	* fewer than 4 hex digits.
3455	*
3456	* This function is similar to u_unescape() but not identical to it.
3457	* The latter takes a source char*, so it does escape recognition
3458	* and also invariant conversion.
3459	*
3460	* @return a string with backslash escapes interpreted, or an
3461	* empty string on error.
3462	* @see UnicodeString#unescapeAt()
3463	* @see u_unescape()
3464	* @see u_unescapeAt()
3465	* @stable ICU 2.0
3466	*/
3467	UnicodeString unescape() const;
3468
3469	/**
3470	* Unescape a single escape sequence and return the represented
3471	* character. See unescape() for a listing of the recognized escape
3472	* sequences. The character at offset-1 is assumed (without
3473	* checking) to be a backslash. If the escape sequence is
3474	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3475	* returned.
3476	*
3477	* @param offset an input output parameter. On input, it is the
3478	* offset into this string where the escape sequence is located,
3479	* after the initial backslash. On output, it is advanced after the
3480	* last character parsed. On error, it is not advanced at all.
3481	* @return the character represented by the escape sequence at
3482	* offset, or U_SENTINEL=-1 on error.
3483	* @see UnicodeString#unescape()
3484	* @see u_unescape()
3485	* @see u_unescapeAt()
3486	* @stable ICU 2.0
3487	*/
3488	UChar32 unescapeAt(int32_t &offset) const;
3489
3490	/**
3491	* ICU "poor man's RTTI", returns a UClassID for this class.
3492	*
3493	* @stable ICU 2.2
3494	*/
3495	static UClassID U_EXPORT2 getStaticClassID();
3496
3497	/**
3498	* ICU "poor man's RTTI", returns a UClassID for the actual class.
3499	*
3500	* @stable ICU 2.2
3501	*/
3502	virtual UClassID getDynamicClassID() const;
3503
3504	//========================================
3505	// Implementation methods
3506	//========================================
3507
3508	protected:
3509	/**
3510	* Implement Replaceable::getLength() (see jitterbug 1027).
3511	* @stable ICU 2.4
3512	*/
3513	virtual int32_t getLength() const;
3514
3515	/**
3516	* The change in Replaceable to use virtual getCharAt() allows
3517	* UnicodeString::charAt() to be inline again (see jitterbug 709).
3518	* @stable ICU 2.4
3519	*/
3520	virtual char16_t getCharAt(int32_t offset) const;
3521
3522	/**
3523	* The change in Replaceable to use virtual getChar32At() allows
3524	* UnicodeString::char32At() to be inline again (see jitterbug 709).
3525	* @stable ICU 2.4
3526	*/
3527	virtual UChar32 getChar32At(int32_t offset) const;
3528
3529	private:
3530	// For char constructors. Could be made public.*
3531	UnicodeString &setToUTF8(StringPiece utf8);
3532	// For extract(char).*
3533	// We could make a toUTF8(target, capacity, errorCode) public but not
3534	// this version: New API will be cleaner if we make callers create substrings
3535	// rather than having start+length on every method,
3536	// and it should take a UErrorCode&.
3537	int32_t
3538	toUTF8(int32_t start, int32_t len,
3539	char target, int32_t capacity) const*;
3540
3541	/**
3542	* Internal string contents comparison, called by operator==.
3543	* Requires: this & text not bogus and have same lengths.
3544	*/
3545	UBool doEquals(const UnicodeString &text, int32_t len) const;
3546
3547	inline int8_t
3548	doCompare(int32_t start,
3549	int32_t length,
3550	const UnicodeString& srcText,
3551	int32_t srcStart,
3552	int32_t srcLength) const;
3553
3554	int8_t doCompare(int32_t start,
3555	int32_t length,
3556	const char16_t *srcChars,
3557	int32_t srcStart,
3558	int32_t srcLength) const;
3559
3560	inline int8_t
3561	doCompareCodePointOrder(int32_t start,
3562	int32_t length,
3563	const UnicodeString& srcText,
3564	int32_t srcStart,
3565	int32_t srcLength) const;
3566
3567	int8_t doCompareCodePointOrder(int32_t start,
3568	int32_t length,
3569	const char16_t *srcChars,
3570	int32_t srcStart,
3571	int32_t srcLength) const;
3572
3573	inline int8_t
3574	doCaseCompare(int32_t start,
3575	int32_t length,
3576	const UnicodeString &srcText,
3577	int32_t srcStart,
3578	int32_t srcLength,
3579	uint32_t options) const;
3580
3581	int8_t
3582	doCaseCompare(int32_t start,
3583	int32_t length,
3584	const char16_t *srcChars,
3585	int32_t srcStart,
3586	int32_t srcLength,
3587	uint32_t options) const;
3588
3589	int32_t doIndexOf(char16_t c,
3590	int32_t start,
3591	int32_t length) const;
3592
3593	int32_t doIndexOf(UChar32 c,
3594	int32_t start,
3595	int32_t length) const;
3596
3597	int32_t doLastIndexOf(char16_t c,
3598	int32_t start,
3599	int32_t length) const;
3600
3601	int32_t doLastIndexOf(UChar32 c,
3602	int32_t start,
3603	int32_t length) const;
3604
3605	void doExtract(int32_t start,
3606	int32_t length,
3607	char16_t *dst,
3608	int32_t dstStart) const;
3609
3610	inline void doExtract(int32_t start,
3611	int32_t length,
3612	UnicodeString& target) const;
3613
3614	inline char16_t doCharAt(int32_t offset) const;
3615
3616	UnicodeString& doReplace(int32_t start,
3617	int32_t length,
3618	const UnicodeString& srcText,
3619	int32_t srcStart,
3620	int32_t srcLength);
3621
3622	UnicodeString& doReplace(int32_t start,
3623	int32_t length,
3624	const char16_t *srcChars,
3625	int32_t srcStart,
3626	int32_t srcLength);
3627
3628	UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3629	UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3630
3631	UnicodeString& doReverse(int32_t start,
3632	int32_t length);
3633
3634	// calculate hash code
3635	int32_t doHashCode(void) const;
3636
3637	// get pointer to start of array
3638	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3639	inline char16_t* getArrayStart(void);
3640	inline const char16_t* getArrayStart(void) const;
3641
3642	inline UBool hasShortLength() const;
3643	inline int32_t getShortLength() const;
3644
3645	// A UnicodeString object (not necessarily its current buffer)
3646	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3647	inline UBool isWritable() const;
3648
3649	// Is the current buffer writable?
3650	inline UBool isBufferWritable() const;
3651
3652	// None of the following does releaseArray().
3653	inline void setZeroLength();
3654	inline void setShortLength(int32_t len);
3655	inline void setLength(int32_t len);
3656	inline void setToEmpty();
3657	inline void setArray(char16_t array, int32_t len, int32_t capacity); // sets length but not flags*
3658
3659	// allocate the array; result may be the stack buffer
3660	// sets refCount to 1 if appropriate
3661	// sets fArray, fCapacity, and flags
3662	// sets length to 0
3663	// returns boolean for success or failure
3664	UBool allocate(int32_t capacity);
3665
3666	// release the array if owned
3667	void releaseArray(void);
3668
3669	// turn a bogus string into an empty one
3670	void unBogus();
3671
3672	// implements assigment operator, copy constructor, and fastCopyFrom()
3673	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3674
3675	// Copies just the fields without memory management.
3676	void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3677
3678	// Pin start and limit to acceptable values.
3679	inline void pinIndex(int32_t& start) const;
3680	inline void pinIndices(int32_t& start,
3681	int32_t& length) const;
3682
3683	#if !UCONFIG_NO_CONVERSION
3684
3685	/ Internal extract() using UConverter. /
3686	int32_t doExtract(int32_t start, int32_t length,
3687	char *dest, int32_t destCapacity,
3688	UConverter *cnv,
3689	UErrorCode &errorCode) const;
3690
3691	/*
3692	* Real constructor for converting from codepage data.
3693	* It assumes that it is called with !fRefCounted.
3694	*
3695	* If <code>codepage==0</code>, then the default converter
3696	* is used for the platform encoding.
3697	* If <code>codepage</code> is an empty string (<code>""</code>),
3698	* then a simple conversion is performed on the codepage-invariant
3699	* subset ("invariant characters") of the platform encoding. See utypes.h.
3700	*/
3701	void doCodepageCreate(const char *codepageData,
3702	int32_t dataLength,
3703	const char *codepage);
3704
3705	/*
3706	* Worker function for creating a UnicodeString from
3707	* a codepage string using a UConverter.
3708	*/
3709	void
3710	doCodepageCreate(const char *codepageData,
3711	int32_t dataLength,
3712	UConverter *converter,
3713	UErrorCode &status);
3714
3715	#endif
3716
3717	/*
3718	* This function is called when write access to the array
3719	* is necessary.
3720	*
3721	* We need to make a copy of the array if
3722	* the buffer is read-only, or
3723	* the buffer is refCounted (shared), and refCount>1, or
3724	* the buffer is too small.
3725	*
3726	* Return FALSE if memory could not be allocated.
3727	*/
3728	UBool cloneArrayIfNeeded(int32_t newCapacity = -`1`,
3729	int32_t growCapacity = -`1`,
3730	UBool doCopyArray = TRUE,
3731	int32_t **pBufferToDelete = `0`,
3732	UBool forceClone = FALSE);
3733
3734	/**
3735	* Common function for UnicodeString case mappings.
3736	* The stringCaseMapper has the same type UStringCaseMapper
3737	* as in ustr_imp.h for ustrcase_map().
3738	*/
3739	UnicodeString &
3740	caseMap(int32_t caseLocale, uint32_t options,
3741	#if !UCONFIG_NO_BREAK_ITERATION
3742	BreakIterator *iter,
3743	#endif
3744	UStringCaseMapper *stringCaseMapper);
3745
3746	// ref counting
3747	void addRef(void);
3748	int32_t removeRef(void);
3749	int32_t refCount(void) const;
3750
3751	// constants
3752	enum {
3753	/**
3754	* Size of stack buffer for short strings.
3755	* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3756	* @see UNISTR_OBJECT_SIZE
3757	*/
3758	US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-`2`)/U_SIZEOF_UCHAR,
3759	kInvalidUChar=`0xffff`, // U+FFFF returned by charAt(invalid index)
3760	kInvalidHashCode=`0`, // invalid hash code
3761	kEmptyHashCode=`1`, // hash code for empty string
3762
3763	// bit flag values for fLengthAndFlags
3764	kIsBogus=`1`, // this string is bogus, i.e., not valid or NULL
3765	kUsingStackBuffer=`2`,// using fUnion.fStackFields instead of fUnion.fFields
3766	kRefCounted=`4`, // there is a refCount field before the characters in fArray
3767	kBufferIsReadonly=`8`,// do not write to this buffer
3768	kOpenGetBuffer=`16`, // getBuffer(minCapacity) was called (is "open"),
3769	// and releaseBuffer(newLength) must be called
3770	kAllStorageFlags=`0x1f`,
3771
3772	kLengthShift=`5`, // remaining 11 bits for non-negative short length, or negative if long
3773	kLength1=`1`<<kLengthShift,
3774	kMaxShortLength=`0x3ff`, // max non-negative short length (leaves top bit 0)
3775	kLengthIsLarge=`0xffe0`, // short length < 0, real length is in fUnion.fFields.fLength
3776
3777	// combined values for convenience
3778	kShortString=kUsingStackBuffer,
3779	kLongString=kRefCounted,
3780	kReadonlyAlias=kBufferIsReadonly,
3781	kWritableAlias=`0`
3782	};
3783
3784	friend class UnicodeStringAppendable;
3785
3786	union StackBufferOrFields; // forward declaration necessary before friend declaration
3787	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3788
3789	/*
3790	* The following are all the class fields that are stored
3791	* in each UnicodeString object.
3792	* Note that UnicodeString has virtual functions,
3793	* therefore there is an implicit vtable pointer
3794	* as the first real field.
3795	* The fields should be aligned such that no padding is necessary.
3796	* On 32-bit machines, the size should be 32 bytes,
3797	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
3798	*
3799	* We use a hack to achieve this.
3800	*
3801	* With at least some compilers, each of the following is forced to
3802	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3803	* rounded up with additional padding if the fields do not already fit that requirement:
3804	* - sizeof(class UnicodeString)
3805	* - offsetof(UnicodeString, fUnion)
3806	* - sizeof(fUnion)
3807	* - sizeof(fStackFields)
3808	*
3809	* We optimize for the longest possible internal buffer for short strings.
3810	* fUnion.fStackFields begins with 2 bytes for storage flags
3811	* and the length of relatively short strings,
3812	* followed by the buffer for short string contents.
3813	* There is no padding inside fStackFields.
3814	*
3815	* Heap-allocated and aliased strings use fUnion.fFields.
3816	* Both fStackFields and fFields must begin with the same fields for flags and short length,
3817	* that is, those must have the same memory offsets inside the object,
3818	* because the flags must be inspected in order to decide which half of fUnion is being used.
3819	* We assume that the compiler does not reorder the fields.
3820	*
3821	* (Padding at the end of fFields is ok:
3822	* As long as it is no larger than fStackFields, it is not wasted space.)
3823	*
3824	* For some of the history of the UnicodeString class fields layout, see
3825	* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3826	* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3827	* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3828	*/
3829	// (implicit) vtable;*
3830	union StackBufferOrFields {
3831	// fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3832	// Each struct of the union must begin with fLengthAndFlags.
3833	struct {
3834	int16_t fLengthAndFlags; // bit fields: see constants above
3835	char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3836	} fStackFields;
3837	struct {
3838	int16_t fLengthAndFlags; // bit fields: see constants above
3839	int32_t fLength; // number of characters in fArray if >127; else undefined
3840	int32_t fCapacity; // capacity of fArray (in char16_ts)
3841	// array pointer last to minimize padding for machines with P128 data model
3842	// or pointer sizes that are not a power of 2
3843	char16_t fArray; // the Unicode data*
3844	} fFields;
3845	} fUnion;
3846	};
3847
3848	/**
3849	* Create a new UnicodeString with the concatenation of two others.
3850	*
3851	* @param s1 The first string to be copied to the new one.
3852	* @param s2 The second string to be copied to the new one, after s1.
3853	* @return UnicodeString(s1).append(s2)
3854	* @stable ICU 2.8
3855	*/
3856	U_COMMON_API UnicodeString U_EXPORT2
3857	operator+ (const UnicodeString &s1, const UnicodeString &s2);
3858
3859	//========================================
3860	// Inline members
3861	//========================================
3862
3863	//========================================
3864	// Privates
3865	//========================================
3866
3867	inline void
3868	UnicodeString::pinIndex(int32_t& start) const
3869	{
3870	// pin index
3871	if(start < `0`) {
3872	start = `0`;
3873	} else if(start > length()) {
3874	start = length();
3875	}
3876	}
3877
3878	inline void
3879	UnicodeString::pinIndices(int32_t& start,
3880	int32_t& _length) const
3881	{
3882	// pin indices
3883	int32_t len = length();
3884	if(start < `0`) {
3885	start = `0`;
3886	} else if(start > len) {
3887	start = len;
3888	}
3889	if(_length < `0`) {
3890	_length = `0`;
3891	} else if(_length > (len - start)) {
3892	_length = (len - start);
3893	}
3894	}
3895
3896	inline char16_t*
3897	UnicodeString::getArrayStart() {
3898	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3899	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3900	}
3901
3902	inline const char16_t*
3903	UnicodeString::getArrayStart() const {
3904	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3905	fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3906	}
3907
3908	//========================================
3909	// Default constructor
3910	//========================================
3911
3912	inline
3913	UnicodeString::UnicodeString() {
3914	fUnion.fStackFields.fLengthAndFlags=kShortString;
3915	}
3916
3917	inline UnicodeString::UnicodeString(const std::nullptr_t /text/) {
3918	fUnion.fStackFields.fLengthAndFlags=kShortString;
3919	}
3920
3921	inline UnicodeString::UnicodeString(const std::nullptr_t /text/, int32_t /length/) {
3922	fUnion.fStackFields.fLengthAndFlags=kShortString;
3923	}
3924
3925	inline UnicodeString::UnicodeString(std::nullptr_t /buffer/, int32_t /buffLength/, int32_t /buffCapacity/) {
3926	fUnion.fStackFields.fLengthAndFlags=kShortString;
3927	}
3928
3929	//========================================
3930	// Read-only implementation methods
3931	//========================================
3932	inline UBool
3933	UnicodeString::hasShortLength() const {
3934	return fUnion.fFields.fLengthAndFlags>=`0`;
3935	}
3936
3937	inline int32_t
3938	UnicodeString::getShortLength() const {
3939	// fLengthAndFlags must be non-negative -> short length >= 0
3940	// and arithmetic or logical shift does not matter.
3941	return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3942	}
3943
3944	inline int32_t
3945	UnicodeString::length() const {
3946	return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3947	}
3948
3949	inline int32_t
3950	UnicodeString::getCapacity() const {
3951	return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3952	US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3953	}
3954
3955	inline int32_t
3956	UnicodeString::hashCode() const
3957	{ return doHashCode(); }
3958
3959	inline UBool
3960	UnicodeString::isBogus() const
3961	{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3962
3963	inline UBool
3964	UnicodeString::isWritable() const
3965	{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus)); }
3966
3967	inline UBool
3968	UnicodeString::isBufferWritable() const
3969	{
3970	return (UBool)(
3971	!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
3972	(!(fUnion.fFields.fLengthAndFlags&kRefCounted) \|\| refCount()==`1`));
3973	}
3974
3975	inline const char16_t *
3976	UnicodeString::getBuffer() const {
3977	if(fUnion.fFields.fLengthAndFlags&(kIsBogus\|kOpenGetBuffer)) {
3978	return nullptr;
3979	} else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3980	return fUnion.fStackFields.fBuffer;
3981	} else {
3982	return fUnion.fFields.fArray;
3983	}
3984	}
3985
3986	//========================================
3987	// Read-only alias methods
3988	//========================================
3989	inline int8_t
3990	UnicodeString::doCompare(int32_t start,
3991	int32_t thisLength,
3992	const UnicodeString& srcText,
3993	int32_t srcStart,
3994	int32_t srcLength) const
3995	{
3996	if(srcText.isBogus()) {
3997	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3998	} else {
3999	srcText.pinIndices(srcStart, srcLength);
4000	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4001	}
4002	}
4003
4004	inline UBool
4005	UnicodeString::operator== (const UnicodeString& text) const
4006	{
4007	if(isBogus()) {
4008	return text.isBogus();
4009	} else {
4010	int32_t len = length(), textLength = text.length();
4011	return !text.isBogus() && len == textLength && doEquals(text, len);
4012	}
4013	}
4014
4015	inline UBool
4016	UnicodeString::operator!= (const UnicodeString& text) const
4017	{ return (! operator==(text)); }
4018
4019	inline UBool
4020	UnicodeString::operator> (const UnicodeString& text) const
4021	{ return doCompare(`0`, length(), text, `0`, text.length()) == `1`; }
4022
4023	inline UBool
4024	UnicodeString::operator< (const UnicodeString& text) const
4025	{ return doCompare(`0`, length(), text, `0`, text.length()) == -`1`; }
4026
4027	inline UBool
4028	UnicodeString::operator>= (const UnicodeString& text) const
4029	{ return doCompare(`0`, length(), text, `0`, text.length()) != -`1`; }
4030
4031	inline UBool
4032	UnicodeString::operator<= (const UnicodeString& text) const
4033	{ return doCompare(`0`, length(), text, `0`, text.length()) != `1`; }
4034
4035	inline int8_t
4036	UnicodeString::compare(const UnicodeString& text) const
4037	{ return doCompare(`0`, length(), text, `0`, text.length()); }
4038
4039	inline int8_t
4040	UnicodeString::compare(int32_t start,
4041	int32_t _length,
4042	const UnicodeString& srcText) const
4043	{ return doCompare(start, _length, srcText, `0`, srcText.length()); }
4044
4045	inline int8_t
4046	UnicodeString::compare(ConstChar16Ptr srcChars,
4047	int32_t srcLength) const
4048	{ return doCompare(`0`, length(), srcChars, `0`, srcLength); }
4049
4050	inline int8_t
4051	UnicodeString::compare(int32_t start,
4052	int32_t _length,
4053	const UnicodeString& srcText,
4054	int32_t srcStart,
4055	int32_t srcLength) const
4056	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4057
4058	inline int8_t
4059	UnicodeString::compare(int32_t start,
4060	int32_t _length,
4061	const char16_t srcChars) const*
4062	{ return doCompare(start, _length, srcChars, `0`, _length); }
4063
4064	inline int8_t
4065	UnicodeString::compare(int32_t start,
4066	int32_t _length,
4067	const char16_t *srcChars,
4068	int32_t srcStart,
4069	int32_t srcLength) const
4070	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4071
4072	inline int8_t
4073	UnicodeString::compareBetween(int32_t start,
4074	int32_t limit,
4075	const UnicodeString& srcText,
4076	int32_t srcStart,
4077	int32_t srcLimit) const
4078	{ return doCompare(start, limit - start,
4079	srcText, srcStart, srcLimit - srcStart); }
4080
4081	inline int8_t
4082	UnicodeString::doCompareCodePointOrder(int32_t start,
4083	int32_t thisLength,
4084	const UnicodeString& srcText,
4085	int32_t srcStart,
4086	int32_t srcLength) const
4087	{
4088	if(srcText.isBogus()) {
4089	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4090	} else {
4091	srcText.pinIndices(srcStart, srcLength);
4092	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4093	}
4094	}
4095
4096	inline int8_t
4097	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4098	{ return doCompareCodePointOrder(`0`, length(), text, `0`, text.length()); }
4099
4100	inline int8_t
4101	UnicodeString::compareCodePointOrder(int32_t start,
4102	int32_t _length,
4103	const UnicodeString& srcText) const
4104	{ return doCompareCodePointOrder(start, _length, srcText, `0`, srcText.length()); }
4105
4106	inline int8_t
4107	UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4108	int32_t srcLength) const
4109	{ return doCompareCodePointOrder(`0`, length(), srcChars, `0`, srcLength); }
4110
4111	inline int8_t
4112	UnicodeString::compareCodePointOrder(int32_t start,
4113	int32_t _length,
4114	const UnicodeString& srcText,
4115	int32_t srcStart,
4116	int32_t srcLength) const
4117	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4118
4119	inline int8_t
4120	UnicodeString::compareCodePointOrder(int32_t start,
4121	int32_t _length,
4122	const char16_t srcChars) const*
4123	{ return doCompareCodePointOrder(start, _length, srcChars, `0`, _length); }
4124
4125	inline int8_t
4126	UnicodeString::compareCodePointOrder(int32_t start,
4127	int32_t _length,
4128	const char16_t *srcChars,
4129	int32_t srcStart,
4130	int32_t srcLength) const
4131	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4132
4133	inline int8_t
4134	UnicodeString::compareCodePointOrderBetween(int32_t start,
4135	int32_t limit,
4136	const UnicodeString& srcText,
4137	int32_t srcStart,
4138	int32_t srcLimit) const
4139	{ return doCompareCodePointOrder(start, limit - start,
4140	srcText, srcStart, srcLimit - srcStart); }
4141
4142	inline int8_t
4143	UnicodeString::doCaseCompare(int32_t start,
4144	int32_t thisLength,
4145	const UnicodeString &srcText,
4146	int32_t srcStart,
4147	int32_t srcLength,
4148	uint32_t options) const
4149	{
4150	if(srcText.isBogus()) {
4151	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4152	} else {
4153	srcText.pinIndices(srcStart, srcLength);
4154	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4155	}
4156	}
4157
4158	inline int8_t
4159	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4160	return doCaseCompare(`0`, length(), text, `0`, text.length(), options);
4161	}
4162
4163	inline int8_t
4164	UnicodeString::caseCompare(int32_t start,
4165	int32_t _length,
4166	const UnicodeString &srcText,
4167	uint32_t options) const {
4168	return doCaseCompare(start, _length, srcText, `0`, srcText.length(), options);
4169	}
4170
4171	inline int8_t
4172	UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4173	int32_t srcLength,
4174	uint32_t options) const {
4175	return doCaseCompare(`0`, length(), srcChars, `0`, srcLength, options);
4176	}
4177
4178	inline int8_t
4179	UnicodeString::caseCompare(int32_t start,
4180	int32_t _length,
4181	const UnicodeString &srcText,
4182	int32_t srcStart,
4183	int32_t srcLength,
4184	uint32_t options) const {
4185	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4186	}
4187
4188	inline int8_t
4189	UnicodeString::caseCompare(int32_t start,
4190	int32_t _length,
4191	const char16_t *srcChars,
4192	uint32_t options) const {
4193	return doCaseCompare(start, _length, srcChars, `0`, _length, options);
4194	}
4195
4196	inline int8_t
4197	UnicodeString::caseCompare(int32_t start,
4198	int32_t _length,
4199	const char16_t *srcChars,
4200	int32_t srcStart,
4201	int32_t srcLength,
4202	uint32_t options) const {
4203	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4204	}
4205
4206	inline int8_t
4207	UnicodeString::caseCompareBetween(int32_t start,
4208	int32_t limit,
4209	const UnicodeString &srcText,
4210	int32_t srcStart,
4211	int32_t srcLimit,
4212	uint32_t options) const {
4213	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4214	}
4215
4216	inline int32_t
4217	UnicodeString::indexOf(const UnicodeString& srcText,
4218	int32_t srcStart,
4219	int32_t srcLength,
4220	int32_t start,
4221	int32_t _length) const
4222	{
4223	if(!srcText.isBogus()) {
4224	srcText.pinIndices(srcStart, srcLength);
4225	if(srcLength > `0`) {
4226	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4227	}
4228	}
4229	return -`1`;
4230	}
4231
4232	inline int32_t
4233	UnicodeString::indexOf(const UnicodeString& text) const
4234	{ return indexOf(text, `0`, text.length(), `0`, length()); }
4235
4236	inline int32_t
4237	UnicodeString::indexOf(const UnicodeString& text,
4238	int32_t start) const {
4239	pinIndex(start);
4240	return indexOf(text, `0`, text.length(), start, length() - start);
4241	}
4242
4243	inline int32_t
4244	UnicodeString::indexOf(const UnicodeString& text,
4245	int32_t start,
4246	int32_t _length) const
4247	{ return indexOf(text, `0`, text.length(), start, _length); }
4248
4249	inline int32_t
4250	UnicodeString::indexOf(const char16_t *srcChars,
4251	int32_t srcLength,
4252	int32_t start) const {
4253	pinIndex(start);
4254	return indexOf(srcChars, `0`, srcLength, start, length() - start);
4255	}
4256
4257	inline int32_t
4258	UnicodeString::indexOf(ConstChar16Ptr srcChars,
4259	int32_t srcLength,
4260	int32_t start,
4261	int32_t _length) const
4262	{ return indexOf(srcChars, `0`, srcLength, start, _length); }
4263
4264	inline int32_t
4265	UnicodeString::indexOf(char16_t c,
4266	int32_t start,
4267	int32_t _length) const
4268	{ return doIndexOf(c, start, _length); }
4269
4270	inline int32_t
4271	UnicodeString::indexOf(UChar32 c,
4272	int32_t start,
4273	int32_t _length) const
4274	{ return doIndexOf(c, start, _length); }
4275
4276	inline int32_t
4277	UnicodeString::indexOf(char16_t c) const
4278	{ return doIndexOf(c, `0`, length()); }
4279
4280	inline int32_t
4281	UnicodeString::indexOf(UChar32 c) const
4282	{ return indexOf(c, `0`, length()); }
4283
4284	inline int32_t
4285	UnicodeString::indexOf(char16_t c,
4286	int32_t start) const {
4287	pinIndex(start);
4288	return doIndexOf(c, start, length() - start);
4289	}
4290
4291	inline int32_t
4292	UnicodeString::indexOf(UChar32 c,
4293	int32_t start) const {
4294	pinIndex(start);
4295	return indexOf(c, start, length() - start);
4296	}
4297
4298	inline int32_t
4299	UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4300	int32_t srcLength,
4301	int32_t start,
4302	int32_t _length) const
4303	{ return lastIndexOf(srcChars, `0`, srcLength, start, _length); }
4304
4305	inline int32_t
4306	UnicodeString::lastIndexOf(const char16_t *srcChars,
4307	int32_t srcLength,
4308	int32_t start) const {
4309	pinIndex(start);
4310	return lastIndexOf(srcChars, `0`, srcLength, start, length() - start);
4311	}
4312
4313	inline int32_t
4314	UnicodeString::lastIndexOf(const UnicodeString& srcText,
4315	int32_t srcStart,
4316	int32_t srcLength,
4317	int32_t start,
4318	int32_t _length) const
4319	{
4320	if(!srcText.isBogus()) {
4321	srcText.pinIndices(srcStart, srcLength);
4322	if(srcLength > `0`) {
4323	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4324	}
4325	}
4326	return -`1`;
4327	}
4328
4329	inline int32_t
4330	UnicodeString::lastIndexOf(const UnicodeString& text,
4331	int32_t start,
4332	int32_t _length) const
4333	{ return lastIndexOf(text, `0`, text.length(), start, _length); }
4334
4335	inline int32_t
4336	UnicodeString::lastIndexOf(const UnicodeString& text,
4337	int32_t start) const {
4338	pinIndex(start);
4339	return lastIndexOf(text, `0`, text.length(), start, length() - start);
4340	}
4341
4342	inline int32_t
4343	UnicodeString::lastIndexOf(const UnicodeString& text) const
4344	{ return lastIndexOf(text, `0`, text.length(), `0`, length()); }
4345
4346	inline int32_t
4347	UnicodeString::lastIndexOf(char16_t c,
4348	int32_t start,
4349	int32_t _length) const
4350	{ return doLastIndexOf(c, start, _length); }
4351
4352	inline int32_t
4353	UnicodeString::lastIndexOf(UChar32 c,
4354	int32_t start,
4355	int32_t _length) const {
4356	return doLastIndexOf(c, start, _length);
4357	}
4358
4359	inline int32_t
4360	UnicodeString::lastIndexOf(char16_t c) const
4361	{ return doLastIndexOf(c, `0`, length()); }
4362
4363	inline int32_t
4364	UnicodeString::lastIndexOf(UChar32 c) const {
4365	return lastIndexOf(c, `0`, length());
4366	}
4367
4368	inline int32_t
4369	UnicodeString::lastIndexOf(char16_t c,
4370	int32_t start) const {
4371	pinIndex(start);
4372	return doLastIndexOf(c, start, length() - start);
4373	}
4374
4375	inline int32_t
4376	UnicodeString::lastIndexOf(UChar32 c,
4377	int32_t start) const {
4378	pinIndex(start);
4379	return lastIndexOf(c, start, length() - start);
4380	}
4381
4382	inline UBool
4383	UnicodeString::startsWith(const UnicodeString& text) const
4384	{ return compare(`0`, text.length(), text, `0`, text.length()) == `0`; }
4385
4386	inline UBool
4387	UnicodeString::startsWith(const UnicodeString& srcText,
4388	int32_t srcStart,
4389	int32_t srcLength) const
4390	{ return doCompare(`0`, srcLength, srcText, srcStart, srcLength) == `0`; }
4391
4392	inline UBool
4393	UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4394	if(srcLength < `0`) {
4395	srcLength = u_strlen(toUCharPtr(srcChars));
4396	}
4397	return doCompare(`0`, srcLength, srcChars, `0`, srcLength) == `0`;
4398	}
4399
4400	inline UBool
4401	UnicodeString::startsWith(const char16_t srcChars, int32_t srcStart, int32_t srcLength) const* {
4402	if(srcLength < `0`) {
4403	srcLength = u_strlen(toUCharPtr(srcChars));
4404	}
4405	return doCompare(`0`, srcLength, srcChars, srcStart, srcLength) == `0`;
4406	}
4407
4408	inline UBool
4409	UnicodeString::endsWith(const UnicodeString& text) const
4410	{ return doCompare(length() - text.length(), text.length(),
4411	text, `0`, text.length()) == `0`; }
4412
4413	inline UBool
4414	UnicodeString::endsWith(const UnicodeString& srcText,
4415	int32_t srcStart,
4416	int32_t srcLength) const {
4417	srcText.pinIndices(srcStart, srcLength);
4418	return doCompare(length() - srcLength, srcLength,
4419	srcText, srcStart, srcLength) == `0`;
4420	}
4421
4422	inline UBool
4423	UnicodeString::endsWith(ConstChar16Ptr srcChars,
4424	int32_t srcLength) const {
4425	if(srcLength < `0`) {
4426	srcLength = u_strlen(toUCharPtr(srcChars));
4427	}
4428	return doCompare(length() - srcLength, srcLength,
4429	srcChars, `0`, srcLength) == `0`;
4430	}
4431
4432	inline UBool
4433	UnicodeString::endsWith(const char16_t *srcChars,
4434	int32_t srcStart,
4435	int32_t srcLength) const {
4436	if(srcLength < `0`) {
4437	srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4438	}
4439	return doCompare(length() - srcLength, srcLength,
4440	srcChars, srcStart, srcLength) == `0`;
4441	}
4442
4443	//========================================
4444	// replace
4445	//========================================
4446	inline UnicodeString&
4447	UnicodeString::replace(int32_t start,
4448	int32_t _length,
4449	const UnicodeString& srcText)
4450	{ return doReplace(start, _length, srcText, `0`, srcText.length()); }
4451
4452	inline UnicodeString&
4453	UnicodeString::replace(int32_t start,
4454	int32_t _length,
4455	const UnicodeString& srcText,
4456	int32_t srcStart,
4457	int32_t srcLength)
4458	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4459
4460	inline UnicodeString&
4461	UnicodeString::replace(int32_t start,
4462	int32_t _length,
4463	ConstChar16Ptr srcChars,
4464	int32_t srcLength)
4465	{ return doReplace(start, _length, srcChars, `0`, srcLength); }
4466
4467	inline UnicodeString&
4468	UnicodeString::replace(int32_t start,
4469	int32_t _length,
4470	const char16_t *srcChars,
4471	int32_t srcStart,
4472	int32_t srcLength)
4473	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4474
4475	inline UnicodeString&
4476	UnicodeString::replace(int32_t start,
4477	int32_t _length,
4478	char16_t srcChar)
4479	{ return doReplace(start, _length, &srcChar, `0`, `1`); }
4480
4481	inline UnicodeString&
4482	UnicodeString::replaceBetween(int32_t start,
4483	int32_t limit,
4484	const UnicodeString& srcText)
4485	{ return doReplace(start, limit - start, srcText, `0`, srcText.length()); }
4486
4487	inline UnicodeString&
4488	UnicodeString::replaceBetween(int32_t start,
4489	int32_t limit,
4490	const UnicodeString& srcText,
4491	int32_t srcStart,
4492	int32_t srcLimit)
4493	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4494
4495	inline UnicodeString&
4496	UnicodeString::findAndReplace(const UnicodeString& oldText,
4497	const UnicodeString& newText)
4498	{ return findAndReplace(`0`, length(), oldText, `0`, oldText.length(),
4499	newText, `0`, newText.length()); }
4500
4501	inline UnicodeString&
4502	UnicodeString::findAndReplace(int32_t start,
4503	int32_t _length,
4504	const UnicodeString& oldText,
4505	const UnicodeString& newText)
4506	{ return findAndReplace(start, _length, oldText, `0`, oldText.length(),
4507	newText, `0`, newText.length()); }
4508
4509	// ============================
4510	// extract
4511	// ============================
4512	inline void
4513	UnicodeString::doExtract(int32_t start,
4514	int32_t _length,
4515	UnicodeString& target) const
4516	{ target.replace(`0`, target.length(), *this, start, _length); }
4517
4518	inline void
4519	UnicodeString::extract(int32_t start,
4520	int32_t _length,
4521	Char16Ptr target,
4522	int32_t targetStart) const
4523	{ doExtract(start, _length, target, targetStart); }
4524
4525	inline void
4526	UnicodeString::extract(int32_t start,
4527	int32_t _length,
4528	UnicodeString& target) const
4529	{ doExtract(start, _length, target); }
4530
4531	#if !UCONFIG_NO_CONVERSION
4532
4533	inline int32_t
4534	UnicodeString::extract(int32_t start,
4535	int32_t _length,
4536	char *dst,
4537	const char codepage) const*
4538
4539	{
4540	// This dstSize value will be checked explicitly
4541	return extract(start, _length, dst, dst!=`0` ? `0xffffffff` : `0`, codepage);
4542	}
4543
4544	#endif
4545
4546	inline void
4547	UnicodeString::extractBetween(int32_t start,
4548	int32_t limit,
4549	char16_t *dst,
4550	int32_t dstStart) const {
4551	pinIndex(start);
4552	pinIndex(limit);
4553	doExtract(start, limit - start, dst, dstStart);
4554	}
4555
4556	inline UnicodeString
4557	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4558	return tempSubString(start, limit - start);
4559	}
4560
4561	inline char16_t
4562	UnicodeString::doCharAt(int32_t offset) const
4563	{
4564	if((uint32_t)offset < (uint32_t)length()) {
4565	return getArrayStart()[offset];
4566	} else {
4567	return kInvalidUChar;
4568	}
4569	}
4570
4571	inline char16_t
4572	UnicodeString::charAt(int32_t offset) const
4573	{ return doCharAt(offset); }
4574
4575	inline char16_t
4576	UnicodeString::operator[] (int32_t offset) const
4577	{ return doCharAt(offset); }
4578
4579	inline UBool
4580	UnicodeString::isEmpty() const {
4581	// Arithmetic or logical right shift does not matter: only testing for 0.
4582	return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == `0`;
4583	}
4584
4585	//========================================
4586	// Write implementation methods
4587	//========================================
4588	inline void
4589	UnicodeString::setZeroLength() {
4590	fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4591	}
4592
4593	inline void
4594	UnicodeString::setShortLength(int32_t len) {
4595	// requires 0 <= len <= kMaxShortLength
4596	fUnion.fFields.fLengthAndFlags =
4597	(int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) \| (len << kLengthShift));
4598	}
4599
4600	inline void
4601	UnicodeString::setLength(int32_t len) {
4602	if(len <= kMaxShortLength) {
4603	setShortLength(len);
4604	} else {
4605	fUnion.fFields.fLengthAndFlags \|= kLengthIsLarge;
4606	fUnion.fFields.fLength = len;
4607	}
4608	}
4609
4610	inline void
4611	UnicodeString::setToEmpty() {
4612	fUnion.fFields.fLengthAndFlags = kShortString;
4613	}
4614
4615	inline void
4616	UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4617	setLength(len);
4618	fUnion.fFields.fArray = array;
4619	fUnion.fFields.fCapacity = capacity;
4620	}
4621
4622	inline UnicodeString&
4623	UnicodeString::operator= (char16_t ch)
4624	{ return doReplace(`0`, length(), &ch, `0`, `1`); }
4625
4626	inline UnicodeString&
4627	UnicodeString::operator= (UChar32 ch)
4628	{ return replace(`0`, length(), ch); }
4629
4630	inline UnicodeString&
4631	UnicodeString::setTo(const UnicodeString& srcText,
4632	int32_t srcStart,
4633	int32_t srcLength)
4634	{
4635	unBogus();
4636	return doReplace(`0`, length(), srcText, srcStart, srcLength);
4637	}
4638
4639	inline UnicodeString&
4640	UnicodeString::setTo(const UnicodeString& srcText,
4641	int32_t srcStart)
4642	{
4643	unBogus();
4644	srcText.pinIndex(srcStart);
4645	return doReplace(`0`, length(), srcText, srcStart, srcText.length() - srcStart);
4646	}
4647
4648	inline UnicodeString&
4649	UnicodeString::setTo(const UnicodeString& srcText)
4650	{
4651	return copyFrom(srcText);
4652	}
4653
4654	inline UnicodeString&
4655	UnicodeString::setTo(const char16_t *srcChars,
4656	int32_t srcLength)
4657	{
4658	unBogus();
4659	return doReplace(`0`, length(), srcChars, `0`, srcLength);
4660	}
4661
4662	inline UnicodeString&
4663	UnicodeString::setTo(char16_t srcChar)
4664	{
4665	unBogus();
4666	return doReplace(`0`, length(), &srcChar, `0`, `1`);
4667	}
4668
4669	inline UnicodeString&
4670	UnicodeString::setTo(UChar32 srcChar)
4671	{
4672	unBogus();
4673	return replace(`0`, length(), srcChar);
4674	}
4675
4676	inline UnicodeString&
4677	UnicodeString::append(const UnicodeString& srcText,
4678	int32_t srcStart,
4679	int32_t srcLength)
4680	{ return doAppend(srcText, srcStart, srcLength); }
4681
4682	inline UnicodeString&
4683	UnicodeString::append(const UnicodeString& srcText)
4684	{ return doAppend(srcText, `0`, srcText.length()); }
4685
4686	inline UnicodeString&
4687	UnicodeString::append(const char16_t *srcChars,
4688	int32_t srcStart,
4689	int32_t srcLength)
4690	{ return doAppend(srcChars, srcStart, srcLength); }
4691
4692	inline UnicodeString&
4693	UnicodeString::append(ConstChar16Ptr srcChars,
4694	int32_t srcLength)
4695	{ return doAppend(srcChars, `0`, srcLength); }
4696
4697	inline UnicodeString&
4698	UnicodeString::append(char16_t srcChar)
4699	{ return doAppend(&srcChar, `0`, `1`); }
4700
4701	inline UnicodeString&
4702	UnicodeString::operator+= (char16_t ch)
4703	{ return doAppend(&ch, `0`, `1`); }
4704
4705	inline UnicodeString&
4706	UnicodeString::operator+= (UChar32 ch) {
4707	return append(ch);
4708	}
4709
4710	inline UnicodeString&
4711	UnicodeString::operator+= (const UnicodeString& srcText)
4712	{ return doAppend(srcText, `0`, srcText.length()); }
4713
4714	inline UnicodeString&
4715	UnicodeString::insert(int32_t start,
4716	const UnicodeString& srcText,
4717	int32_t srcStart,
4718	int32_t srcLength)
4719	{ return doReplace(start, `0`, srcText, srcStart, srcLength); }
4720
4721	inline UnicodeString&
4722	UnicodeString::insert(int32_t start,
4723	const UnicodeString& srcText)
4724	{ return doReplace(start, `0`, srcText, `0`, srcText.length()); }
4725
4726	inline UnicodeString&
4727	UnicodeString::insert(int32_t start,
4728	const char16_t *srcChars,
4729	int32_t srcStart,
4730	int32_t srcLength)
4731	{ return doReplace(start, `0`, srcChars, srcStart, srcLength); }
4732
4733	inline UnicodeString&
4734	UnicodeString::insert(int32_t start,
4735	ConstChar16Ptr srcChars,
4736	int32_t srcLength)
4737	{ return doReplace(start, `0`, srcChars, `0`, srcLength); }
4738
4739	inline UnicodeString&
4740	UnicodeString::insert(int32_t start,
4741	char16_t srcChar)
4742	{ return doReplace(start, `0`, &srcChar, `0`, `1`); }
4743
4744	inline UnicodeString&
4745	UnicodeString::insert(int32_t start,
4746	UChar32 srcChar)
4747	{ return replace(start, `0`, srcChar); }
4748
4749
4750	inline UnicodeString&
4751	UnicodeString::remove()
4752	{
4753	// remove() of a bogus string makes the string empty and non-bogus
4754	if(isBogus()) {
4755	setToEmpty();
4756	} else {
4757	setZeroLength();
4758	}
4759	return *this;
4760	}
4761
4762	inline UnicodeString&
4763	UnicodeString::remove(int32_t start,
4764	int32_t _length)
4765	{
4766	if(start <= `0` && _length == INT32_MAX) {
4767	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4768	return remove();
4769	}
4770	return doReplace(start, _length, NULL, `0`, `0`);
4771	}
4772
4773	inline UnicodeString&
4774	UnicodeString::removeBetween(int32_t start,
4775	int32_t limit)
4776	{ return doReplace(start, limit - start, NULL, `0`, `0`); }
4777
4778	inline UnicodeString &
4779	UnicodeString::retainBetween(int32_t start, int32_t limit) {
4780	truncate(limit);
4781	return doReplace(`0`, start, NULL, `0`, `0`);
4782	}
4783
4784	inline UBool
4785	UnicodeString::truncate(int32_t targetLength)
4786	{
4787	if(isBogus() && targetLength == `0`) {
4788	// truncate(0) of a bogus string makes the string empty and non-bogus
4789	unBogus();
4790	return FALSE;
4791	} else if((uint32_t)targetLength < (uint32_t)length()) {
4792	setLength(targetLength);
4793	return TRUE;
4794	} else {
4795	return FALSE;
4796	}
4797	}
4798
4799	inline UnicodeString&
4800	UnicodeString::reverse()
4801	{ return doReverse(`0`, length()); }
4802
4803	inline UnicodeString&
4804	UnicodeString::reverse(int32_t start,
4805	int32_t _length)
4806	{ return doReverse(start, _length); }
4807
4808	U_NAMESPACE_END
4809
4810	#endif
4811

Browse the source code of include/unicode/unistr.h