ustring.h source code [include/unicode/ustring.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1998-2014, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File ustring.h
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 12/07/98 bertrand Creation.
15	******************************************************************************
16	*/
17
18	#ifndef USTRING_H
19	#define USTRING_H
20
21	#include "unicode/utypes.h"
22	#include "unicode/putil.h"
23	#include "unicode/uiter.h"
24
25	/**
26	* \def UBRK_TYPEDEF_UBREAK_ITERATOR
27	* @internal
28	*/
29
30	#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
31	# define UBRK_TYPEDEF_UBREAK_ITERATOR
32	/* Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1/
33	typedef struct UBreakIterator UBreakIterator;
34	#endif
35
36	/**
37	* \file
38	* \brief C API: Unicode string handling functions
39	*
40	* These C API functions provide general Unicode string handling.
41	*
42	* Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
43	* functions. (For example, they do not check for bad arguments like NULL string pointers.)
44	* In some cases, only the thread-safe variant of such a function is implemented here
45	* (see u_strtok_r()).
46	*
47	* Other functions provide more Unicode-specific functionality like locale-specific
48	* upper/lower-casing and string comparison in code point order.
49	*
50	* ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
51	* UTF-16 encodes each Unicode code point with either one or two UChar code units.
52	* (This is the default form of Unicode, and a forward-compatible extension of the original,
53	* fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
54	* in 1996.)
55	*
56	* Some APIs accept a 32-bit UChar32 value for a single code point.
57	*
58	* ICU also handles 16-bit Unicode text with unpaired surrogates.
59	* Such text is not well-formed UTF-16.
60	* Code-point-related functions treat unpaired surrogates as surrogate code points,
61	* i.e., as separate units.
62	*
63	* Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
64	* it is much more efficient even for random access because the code unit values
65	* for single-unit characters vs. lead units vs. trail units are completely disjoint.
66	* This means that it is easy to determine character (code point) boundaries from
67	* random offsets in the string.
68	*
69	* Unicode (UTF-16) string processing is optimized for the single-unit case.
70	* Although it is important to support supplementary characters
71	* (which use pairs of lead/trail code units called "surrogates"),
72	* their occurrence is rare. Almost all characters in modern use require only
73	* a single UChar code unit (i.e., their code point values are <=0xffff).
74	*
75	* For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
76	* For a discussion of the handling of unpaired surrogates see also
77	* Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
78	*/
79
80	/**
81	* \defgroup ustring_ustrlen String Length
82	* \ingroup ustring_strlen
83	*/
84	/@{/
85	/**
86	* Determine the length of an array of UChar.
87	*
88	* @param s The array of UChars, NULL (U+0000) terminated.
89	* @return The number of UChars in <code>chars</code>, minus the terminator.
90	* @stable ICU 2.0
91	*/
92	U_STABLE int32_t U_EXPORT2
93	u_strlen(const UChar *s);
94	/@}/
95
96	/**
97	* Count Unicode code points in the length UChar code units of the string.
98	* A code point may occupy either one or two UChar code units.
99	* Counting code points involves reading all code units.
100	*
101	* This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
102	*
103	* @param s The input string.
104	* @param length The number of UChar code units to be checked, or -1 to count all
105	* code points before the first NUL (U+0000).
106	* @return The number of code points in the specified code units.
107	* @stable ICU 2.0
108	*/
109	U_STABLE int32_t U_EXPORT2
110	u_countChar32(const UChar *s, int32_t length);
111
112	/**
113	* Check if the string contains more Unicode code points than a certain number.
114	* This is more efficient than counting all code points in the entire string
115	* and comparing that number with a threshold.
116	* This function may not need to scan the string at all if the length is known
117	* (not -1 for NUL-termination) and falls within a certain range, and
118	* never needs to count more than 'number+1' code points.
119	* Logically equivalent to (u_countChar32(s, length)>number).
120	* A Unicode code point may occupy either one or two UChar code units.
121	*
122	* @param s The input string.
123	* @param length The length of the string, or -1 if it is NUL-terminated.
124	* @param number The number of code points in the string is compared against
125	* the 'number' parameter.
126	* @return Boolean value for whether the string contains more Unicode code points
127	* than 'number'. Same as (u_countChar32(s, length)>number).
128	* @stable ICU 2.4
129	*/
130	U_STABLE UBool U_EXPORT2
131	u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
132
133	/**
134	* Concatenate two ustrings. Appends a copy of <code>src</code>,
135	* including the null terminator, to <code>dst</code>. The initial copied
136	* character from <code>src</code> overwrites the null terminator in <code>dst</code>.
137	*
138	* @param dst The destination string.
139	* @param src The source string.
140	* @return A pointer to <code>dst</code>.
141	* @stable ICU 2.0
142	*/
143	U_STABLE UChar* U_EXPORT2
144	u_strcat(UChar *dst,
145	const UChar *src);
146
147	/**
148	* Concatenate two ustrings.
149	* Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
150	* Adds a terminating NUL.
151	* If src is too long, then only <code>n-1</code> characters will be copied
152	* before the terminating NUL.
153	* If <code>n<=0</code> then dst is not modified.
154	*
155	* @param dst The destination string.
156	* @param src The source string (can be NULL/invalid if n<=0).
157	* @param n The maximum number of characters to append; no-op if <=0.
158	* @return A pointer to <code>dst</code>.
159	* @stable ICU 2.0
160	*/
161	U_STABLE UChar* U_EXPORT2
162	u_strncat(UChar *dst,
163	const UChar *src,
164	int32_t n);
165
166	/**
167	* Find the first occurrence of a substring in a string.
168	* The substring is found at code point boundaries.
169	* That means that if the substring begins with
170	* a trail surrogate or ends with a lead surrogate,
171	* then it is found only if these surrogates stand alone in the text.
172	* Otherwise, the substring edge units would be matched against
173	* halves of surrogate pairs.
174	*
175	* @param s The string to search (NUL-terminated).
176	* @param substring The substring to find (NUL-terminated).
177	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
178	* or <code>s</code> itself if the <code>substring</code> is empty,
179	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
180	* @stable ICU 2.0
181	*
182	* @see u_strrstr
183	* @see u_strFindFirst
184	* @see u_strFindLast
185	*/
186	U_STABLE UChar * U_EXPORT2
187	u_strstr(const UChar s, const* UChar *substring);
188
189	/**
190	* Find the first occurrence of a substring in a string.
191	* The substring is found at code point boundaries.
192	* That means that if the substring begins with
193	* a trail surrogate or ends with a lead surrogate,
194	* then it is found only if these surrogates stand alone in the text.
195	* Otherwise, the substring edge units would be matched against
196	* halves of surrogate pairs.
197	*
198	* @param s The string to search.
199	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
200	* @param substring The substring to find (NUL-terminated).
201	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
202	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
203	* or <code>s</code> itself if the <code>substring</code> is empty,
204	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
205	* @stable ICU 2.4
206	*
207	* @see u_strstr
208	* @see u_strFindLast
209	*/
210	U_STABLE UChar * U_EXPORT2
211	u_strFindFirst(const UChar s, int32_t length, const* UChar *substring, int32_t subLength);
212
213	/**
214	* Find the first occurrence of a BMP code point in a string.
215	* A surrogate code point is found only if its match in the text is not
216	* part of a surrogate pair.
217	* A NUL character is found at the string terminator.
218	*
219	* @param s The string to search (NUL-terminated).
220	* @param c The BMP code point to find.
221	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
222	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
223	* @stable ICU 2.0
224	*
225	* @see u_strchr32
226	* @see u_memchr
227	* @see u_strstr
228	* @see u_strFindFirst
229	*/
230	U_STABLE UChar * U_EXPORT2
231	u_strchr(const UChar *s, UChar c);
232
233	/**
234	* Find the first occurrence of a code point in a string.
235	* A surrogate code point is found only if its match in the text is not
236	* part of a surrogate pair.
237	* A NUL character is found at the string terminator.
238	*
239	* @param s The string to search (NUL-terminated).
240	* @param c The code point to find.
241	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
242	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
243	* @stable ICU 2.0
244	*
245	* @see u_strchr
246	* @see u_memchr32
247	* @see u_strstr
248	* @see u_strFindFirst
249	*/
250	U_STABLE UChar * U_EXPORT2
251	u_strchr32(const UChar *s, UChar32 c);
252
253	/**
254	* Find the last occurrence of a substring in a string.
255	* The substring is found at code point boundaries.
256	* That means that if the substring begins with
257	* a trail surrogate or ends with a lead surrogate,
258	* then it is found only if these surrogates stand alone in the text.
259	* Otherwise, the substring edge units would be matched against
260	* halves of surrogate pairs.
261	*
262	* @param s The string to search (NUL-terminated).
263	* @param substring The substring to find (NUL-terminated).
264	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
265	* or <code>s</code> itself if the <code>substring</code> is empty,
266	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
267	* @stable ICU 2.4
268	*
269	* @see u_strstr
270	* @see u_strFindFirst
271	* @see u_strFindLast
272	*/
273	U_STABLE UChar * U_EXPORT2
274	u_strrstr(const UChar s, const* UChar *substring);
275
276	/**
277	* Find the last occurrence of a substring in a string.
278	* The substring is found at code point boundaries.
279	* That means that if the substring begins with
280	* a trail surrogate or ends with a lead surrogate,
281	* then it is found only if these surrogates stand alone in the text.
282	* Otherwise, the substring edge units would be matched against
283	* halves of surrogate pairs.
284	*
285	* @param s The string to search.
286	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
287	* @param substring The substring to find (NUL-terminated).
288	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
289	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
290	* or <code>s</code> itself if the <code>substring</code> is empty,
291	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
292	* @stable ICU 2.4
293	*
294	* @see u_strstr
295	* @see u_strFindLast
296	*/
297	U_STABLE UChar * U_EXPORT2
298	u_strFindLast(const UChar s, int32_t length, const* UChar *substring, int32_t subLength);
299
300	/**
301	* Find the last occurrence of a BMP code point in a string.
302	* A surrogate code point is found only if its match in the text is not
303	* part of a surrogate pair.
304	* A NUL character is found at the string terminator.
305	*
306	* @param s The string to search (NUL-terminated).
307	* @param c The BMP code point to find.
308	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
309	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
310	* @stable ICU 2.4
311	*
312	* @see u_strrchr32
313	* @see u_memrchr
314	* @see u_strrstr
315	* @see u_strFindLast
316	*/
317	U_STABLE UChar * U_EXPORT2
318	u_strrchr(const UChar *s, UChar c);
319
320	/**
321	* Find the last occurrence of a code point in a string.
322	* A surrogate code point is found only if its match in the text is not
323	* part of a surrogate pair.
324	* A NUL character is found at the string terminator.
325	*
326	* @param s The string to search (NUL-terminated).
327	* @param c The code point to find.
328	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
329	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
330	* @stable ICU 2.4
331	*
332	* @see u_strrchr
333	* @see u_memchr32
334	* @see u_strrstr
335	* @see u_strFindLast
336	*/
337	U_STABLE UChar * U_EXPORT2
338	u_strrchr32(const UChar *s, UChar32 c);
339
340	/**
341	* Locates the first occurrence in the string <code>string</code> of any of the characters
342	* in the string <code>matchSet</code>.
343	* Works just like C's strpbrk but with Unicode.
344	*
345	* @param string The string in which to search, NUL-terminated.
346	* @param matchSet A NUL-terminated string defining a set of code points
347	* for which to search in the text string.
348	* @return A pointer to the character in <code>string</code> that matches one of the
349	* characters in <code>matchSet</code>, or NULL if no such character is found.
350	* @stable ICU 2.0
351	*/
352	U_STABLE UChar * U_EXPORT2
353	u_strpbrk(const UChar string, const* UChar *matchSet);
354
355	/**
356	* Returns the number of consecutive characters in <code>string</code>,
357	* beginning with the first, that do not occur somewhere in <code>matchSet</code>.
358	* Works just like C's strcspn but with Unicode.
359	*
360	* @param string The string in which to search, NUL-terminated.
361	* @param matchSet A NUL-terminated string defining a set of code points
362	* for which to search in the text string.
363	* @return The number of initial characters in <code>string</code> that do not
364	* occur in <code>matchSet</code>.
365	* @see u_strspn
366	* @stable ICU 2.0
367	*/
368	U_STABLE int32_t U_EXPORT2
369	u_strcspn(const UChar string, const* UChar *matchSet);
370
371	/**
372	* Returns the number of consecutive characters in <code>string</code>,
373	* beginning with the first, that occur somewhere in <code>matchSet</code>.
374	* Works just like C's strspn but with Unicode.
375	*
376	* @param string The string in which to search, NUL-terminated.
377	* @param matchSet A NUL-terminated string defining a set of code points
378	* for which to search in the text string.
379	* @return The number of initial characters in <code>string</code> that do
380	* occur in <code>matchSet</code>.
381	* @see u_strcspn
382	* @stable ICU 2.0
383	*/
384	U_STABLE int32_t U_EXPORT2
385	u_strspn(const UChar string, const* UChar *matchSet);
386
387	/**
388	* The string tokenizer API allows an application to break a string into
389	* tokens. Unlike strtok(), the saveState (the current pointer within the
390	* original string) is maintained in saveState. In the first call, the
391	* argument src is a pointer to the string. In subsequent calls to
392	* return successive tokens of that string, src must be specified as
393	* NULL. The value saveState is set by this function to maintain the
394	* function's position within the string, and on each subsequent call
395	* you must give this argument the same variable. This function does
396	* handle surrogate pairs. This function is similar to the strtok_r()
397	* the POSIX Threads Extension (1003.1c-1995) version.
398	*
399	* @param src String containing token(s). This string will be modified.
400	* After the first call to u_strtok_r(), this argument must
401	* be NULL to get to the next token.
402	* @param delim Set of delimiter characters (Unicode code points).
403	* @param saveState The current pointer within the original string,
404	* which is set by this function. The saveState
405	* parameter should the address of a local variable of type
406	* UChar . (i.e. defined "UChar myLocalSaveState" and use
407	* &myLocalSaveState for this parameter).
408	* @return A pointer to the next token found in src, or NULL
409	* when there are no more tokens.
410	* @stable ICU 2.0
411	*/
412	U_STABLE UChar * U_EXPORT2
413	u_strtok_r(UChar *src,
414	const UChar *delim,
415	UChar **saveState);
416
417	/**
418	* Compare two Unicode strings for bitwise equality (code unit order).
419	*
420	* @param s1 A string to compare.
421	* @param s2 A string to compare.
422	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
423	* value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
424	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
425	* @stable ICU 2.0
426	*/
427	U_STABLE int32_t U_EXPORT2
428	u_strcmp(const UChar *s1,
429	const UChar *s2);
430
431	/**
432	* Compare two Unicode strings in code point order.
433	* See u_strCompare for details.
434	*
435	* @param s1 A string to compare.
436	* @param s2 A string to compare.
437	* @return a negative/zero/positive integer corresponding to whether
438	* the first string is less than/equal to/greater than the second one
439	* in code point order
440	* @stable ICU 2.0
441	*/
442	U_STABLE int32_t U_EXPORT2
443	u_strcmpCodePointOrder(const UChar s1, const* UChar *s2);
444
445	/**
446	* Compare two Unicode strings (binary order).
447	*
448	* The comparison can be done in code unit order or in code point order.
449	* They differ only in UTF-16 when
450	* comparing supplementary code points (U+10000..U+10ffff)
451	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
452	* In code unit order, high BMP code points sort after supplementary code points
453	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
454	*
455	* This functions works with strings of different explicitly specified lengths
456	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
457	* NUL-terminated strings are possible with length arguments of -1.
458	*
459	* @param s1 First source string.
460	* @param length1 Length of first source string, or -1 if NUL-terminated.
461	*
462	* @param s2 Second source string.
463	* @param length2 Length of second source string, or -1 if NUL-terminated.
464	*
465	* @param codePointOrder Choose between code unit order (FALSE)
466	* and code point order (TRUE).
467	*
468	* @return <0 or 0 or >0 as usual for string comparisons
469	*
470	* @stable ICU 2.2
471	*/
472	U_STABLE int32_t U_EXPORT2
473	u_strCompare(const UChar *s1, int32_t length1,
474	const UChar *s2, int32_t length2,
475	UBool codePointOrder);
476
477	/**
478	* Compare two Unicode strings (binary order)
479	* as presented by UCharIterator objects.
480	* Works otherwise just like u_strCompare().
481	*
482	* Both iterators are reset to their start positions.
483	* When the function returns, it is undefined where the iterators
484	* have stopped.
485	*
486	* @param iter1 First source string iterator.
487	* @param iter2 Second source string iterator.
488	* @param codePointOrder Choose between code unit order (FALSE)
489	* and code point order (TRUE).
490	*
491	* @return <0 or 0 or >0 as usual for string comparisons
492	*
493	* @see u_strCompare
494	*
495	* @stable ICU 2.6
496	*/
497	U_STABLE int32_t U_EXPORT2
498	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder);
499
500	/**
501	* Compare two strings case-insensitively using full case folding.
502	* This is equivalent to
503	* u_strCompare(u_strFoldCase(s1, options),
504	* u_strFoldCase(s2, options),
505	* (options&U_COMPARE_CODE_POINT_ORDER)!=0).
506	*
507	* The comparison can be done in UTF-16 code unit order or in code point order.
508	* They differ only when comparing supplementary code points (U+10000..U+10ffff)
509	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
510	* In code unit order, high BMP code points sort after supplementary code points
511	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
512	*
513	* This functions works with strings of different explicitly specified lengths
514	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
515	* NUL-terminated strings are possible with length arguments of -1.
516	*
517	* @param s1 First source string.
518	* @param length1 Length of first source string, or -1 if NUL-terminated.
519	*
520	* @param s2 Second source string.
521	* @param length2 Length of second source string, or -1 if NUL-terminated.
522	*
523	* @param options A bit set of options:
524	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
525	* Comparison in code unit order with default case folding.
526	*
527	* - U_COMPARE_CODE_POINT_ORDER
528	* Set to choose code point order instead of code unit order
529	* (see u_strCompare for details).
530	*
531	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
532	*
533	* @param pErrorCode Must be a valid pointer to an error code value,
534	* which must not indicate a failure before the function call.
535	*
536	* @return <0 or 0 or >0 as usual for string comparisons
537	*
538	* @stable ICU 2.2
539	*/
540	U_STABLE int32_t U_EXPORT2
541	u_strCaseCompare(const UChar *s1, int32_t length1,
542	const UChar *s2, int32_t length2,
543	uint32_t options,
544	UErrorCode *pErrorCode);
545
546	/**
547	* Compare two ustrings for bitwise equality.
548	* Compares at most <code>n</code> characters.
549	*
550	* @param ucs1 A string to compare (can be NULL/invalid if n<=0).
551	* @param ucs2 A string to compare (can be NULL/invalid if n<=0).
552	* @param n The maximum number of characters to compare; always returns 0 if n<=0.
553	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
554	* value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
555	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
556	* @stable ICU 2.0
557	*/
558	U_STABLE int32_t U_EXPORT2
559	u_strncmp(const UChar *ucs1,
560	const UChar *ucs2,
561	int32_t n);
562
563	/**
564	* Compare two Unicode strings in code point order.
565	* This is different in UTF-16 from u_strncmp() if supplementary characters are present.
566	* For details, see u_strCompare().
567	*
568	* @param s1 A string to compare.
569	* @param s2 A string to compare.
570	* @param n The maximum number of characters to compare.
571	* @return a negative/zero/positive integer corresponding to whether
572	* the first string is less than/equal to/greater than the second one
573	* in code point order
574	* @stable ICU 2.0
575	*/
576	U_STABLE int32_t U_EXPORT2
577	u_strncmpCodePointOrder(const UChar s1, const* UChar *s2, int32_t n);
578
579	/**
580	* Compare two strings case-insensitively using full case folding.
581	* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
582	*
583	* @param s1 A string to compare.
584	* @param s2 A string to compare.
585	* @param options A bit set of options:
586	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
587	* Comparison in code unit order with default case folding.
588	*
589	* - U_COMPARE_CODE_POINT_ORDER
590	* Set to choose code point order instead of code unit order
591	* (see u_strCompare for details).
592	*
593	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
594	*
595	* @return A negative, zero, or positive integer indicating the comparison result.
596	* @stable ICU 2.0
597	*/
598	U_STABLE int32_t U_EXPORT2
599	u_strcasecmp(const UChar s1, const* UChar *s2, uint32_t options);
600
601	/**
602	* Compare two strings case-insensitively using full case folding.
603	* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
604	* u_strFoldCase(s2, at most n, options)).
605	*
606	* @param s1 A string to compare.
607	* @param s2 A string to compare.
608	* @param n The maximum number of characters each string to case-fold and then compare.
609	* @param options A bit set of options:
610	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
611	* Comparison in code unit order with default case folding.
612	*
613	* - U_COMPARE_CODE_POINT_ORDER
614	* Set to choose code point order instead of code unit order
615	* (see u_strCompare for details).
616	*
617	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
618	*
619	* @return A negative, zero, or positive integer indicating the comparison result.
620	* @stable ICU 2.0
621	*/
622	U_STABLE int32_t U_EXPORT2
623	u_strncasecmp(const UChar s1, const* UChar *s2, int32_t n, uint32_t options);
624
625	/**
626	* Compare two strings case-insensitively using full case folding.
627	* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
628	* u_strFoldCase(s2, n, options)).
629	*
630	* @param s1 A string to compare.
631	* @param s2 A string to compare.
632	* @param length The number of characters in each string to case-fold and then compare.
633	* @param options A bit set of options:
634	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
635	* Comparison in code unit order with default case folding.
636	*
637	* - U_COMPARE_CODE_POINT_ORDER
638	* Set to choose code point order instead of code unit order
639	* (see u_strCompare for details).
640	*
641	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
642	*
643	* @return A negative, zero, or positive integer indicating the comparison result.
644	* @stable ICU 2.0
645	*/
646	U_STABLE int32_t U_EXPORT2
647	u_memcasecmp(const UChar s1, const* UChar *s2, int32_t length, uint32_t options);
648
649	/**
650	* Copy a ustring. Adds a null terminator.
651	*
652	* @param dst The destination string.
653	* @param src The source string.
654	* @return A pointer to <code>dst</code>.
655	* @stable ICU 2.0
656	*/
657	U_STABLE UChar* U_EXPORT2
658	u_strcpy(UChar *dst,
659	const UChar *src);
660
661	/**
662	* Copy a ustring.
663	* Copies at most <code>n</code> characters. The result will be null terminated
664	* if the length of <code>src</code> is less than <code>n</code>.
665	*
666	* @param dst The destination string.
667	* @param src The source string (can be NULL/invalid if n<=0).
668	* @param n The maximum number of characters to copy; no-op if <=0.
669	* @return A pointer to <code>dst</code>.
670	* @stable ICU 2.0
671	*/
672	U_STABLE UChar* U_EXPORT2
673	u_strncpy(UChar *dst,
674	const UChar *src,
675	int32_t n);
676
677	#if !UCONFIG_NO_CONVERSION
678
679	/**
680	* Copy a byte string encoded in the default codepage to a ustring.
681	* Adds a null terminator.
682	* Performs a host byte to UChar conversion
683	*
684	* @param dst The destination string.
685	* @param src The source string.
686	* @return A pointer to <code>dst</code>.
687	* @stable ICU 2.0
688	*/
689	U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
690	const char *src );
691
692	/**
693	* Copy a byte string encoded in the default codepage to a ustring.
694	* Copies at most <code>n</code> characters. The result will be null terminated
695	* if the length of <code>src</code> is less than <code>n</code>.
696	* Performs a host byte to UChar conversion
697	*
698	* @param dst The destination string.
699	* @param src The source string.
700	* @param n The maximum number of characters to copy.
701	* @return A pointer to <code>dst</code>.
702	* @stable ICU 2.0
703	*/
704	U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
705	const char *src,
706	int32_t n);
707
708	/**
709	* Copy ustring to a byte string encoded in the default codepage.
710	* Adds a null terminator.
711	* Performs a UChar to host byte conversion
712	*
713	* @param dst The destination string.
714	* @param src The source string.
715	* @return A pointer to <code>dst</code>.
716	* @stable ICU 2.0
717	*/
718	U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
719	const UChar *src );
720
721	/**
722	* Copy ustring to a byte string encoded in the default codepage.
723	* Copies at most <code>n</code> characters. The result will be null terminated
724	* if the length of <code>src</code> is less than <code>n</code>.
725	* Performs a UChar to host byte conversion
726	*
727	* @param dst The destination string.
728	* @param src The source string.
729	* @param n The maximum number of characters to copy.
730	* @return A pointer to <code>dst</code>.
731	* @stable ICU 2.0
732	*/
733	U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
734	const UChar *src,
735	int32_t n );
736
737	#endif
738
739	/**
740	* Synonym for memcpy(), but with UChars only.
741	* @param dest The destination string
742	* @param src The source string (can be NULL/invalid if count<=0)
743	* @param count The number of characters to copy; no-op if <=0
744	* @return A pointer to <code>dest</code>
745	* @stable ICU 2.0
746	*/
747	U_STABLE UChar* U_EXPORT2
748	u_memcpy(UChar dest, const* UChar *src, int32_t count);
749
750	/**
751	* Synonym for memmove(), but with UChars only.
752	* @param dest The destination string
753	* @param src The source string (can be NULL/invalid if count<=0)
754	* @param count The number of characters to move; no-op if <=0
755	* @return A pointer to <code>dest</code>
756	* @stable ICU 2.0
757	*/
758	U_STABLE UChar* U_EXPORT2
759	u_memmove(UChar dest, const* UChar *src, int32_t count);
760
761	/**
762	* Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
763	*
764	* @param dest The destination string.
765	* @param c The character to initialize the string.
766	* @param count The maximum number of characters to set.
767	* @return A pointer to <code>dest</code>.
768	* @stable ICU 2.0
769	*/
770	U_STABLE UChar* U_EXPORT2
771	u_memset(UChar *dest, UChar c, int32_t count);
772
773	/**
774	* Compare the first <code>count</code> UChars of each buffer.
775	*
776	* @param buf1 The first string to compare.
777	* @param buf2 The second string to compare.
778	* @param count The maximum number of UChars to compare.
779	* @return When buf1 < buf2, a negative number is returned.
780	* When buf1 == buf2, 0 is returned.
781	* When buf1 > buf2, a positive number is returned.
782	* @stable ICU 2.0
783	*/
784	U_STABLE int32_t U_EXPORT2
785	u_memcmp(const UChar buf1, const* UChar *buf2, int32_t count);
786
787	/**
788	* Compare two Unicode strings in code point order.
789	* This is different in UTF-16 from u_memcmp() if supplementary characters are present.
790	* For details, see u_strCompare().
791	*
792	* @param s1 A string to compare.
793	* @param s2 A string to compare.
794	* @param count The maximum number of characters to compare.
795	* @return a negative/zero/positive integer corresponding to whether
796	* the first string is less than/equal to/greater than the second one
797	* in code point order
798	* @stable ICU 2.0
799	*/
800	U_STABLE int32_t U_EXPORT2
801	u_memcmpCodePointOrder(const UChar s1, const* UChar *s2, int32_t count);
802
803	/**
804	* Find the first occurrence of a BMP code point in a string.
805	* A surrogate code point is found only if its match in the text is not
806	* part of a surrogate pair.
807	* A NUL character is found at the string terminator.
808	*
809	* @param s The string to search (contains <code>count</code> UChars).
810	* @param c The BMP code point to find.
811	* @param count The length of the string.
812	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
813	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
814	* @stable ICU 2.0
815	*
816	* @see u_strchr
817	* @see u_memchr32
818	* @see u_strFindFirst
819	*/
820	U_STABLE UChar* U_EXPORT2
821	u_memchr(const UChar *s, UChar c, int32_t count);
822
823	/**
824	* Find the first occurrence of a code point in a string.
825	* A surrogate code point is found only if its match in the text is not
826	* part of a surrogate pair.
827	* A NUL character is found at the string terminator.
828	*
829	* @param s The string to search (contains <code>count</code> UChars).
830	* @param c The code point to find.
831	* @param count The length of the string.
832	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
833	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
834	* @stable ICU 2.0
835	*
836	* @see u_strchr32
837	* @see u_memchr
838	* @see u_strFindFirst
839	*/
840	U_STABLE UChar* U_EXPORT2
841	u_memchr32(const UChar *s, UChar32 c, int32_t count);
842
843	/**
844	* Find the last occurrence of a BMP code point in a string.
845	* A surrogate code point is found only if its match in the text is not
846	* part of a surrogate pair.
847	* A NUL character is found at the string terminator.
848	*
849	* @param s The string to search (contains <code>count</code> UChars).
850	* @param c The BMP code point to find.
851	* @param count The length of the string.
852	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
853	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
854	* @stable ICU 2.4
855	*
856	* @see u_strrchr
857	* @see u_memrchr32
858	* @see u_strFindLast
859	*/
860	U_STABLE UChar* U_EXPORT2
861	u_memrchr(const UChar *s, UChar c, int32_t count);
862
863	/**
864	* Find the last occurrence of a code point in a string.
865	* A surrogate code point is found only if its match in the text is not
866	* part of a surrogate pair.
867	* A NUL character is found at the string terminator.
868	*
869	* @param s The string to search (contains <code>count</code> UChars).
870	* @param c The code point to find.
871	* @param count The length of the string.
872	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
873	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
874	* @stable ICU 2.4
875	*
876	* @see u_strrchr32
877	* @see u_memrchr
878	* @see u_strFindLast
879	*/
880	U_STABLE UChar* U_EXPORT2
881	u_memrchr32(const UChar *s, UChar32 c, int32_t count);
882
883	/**
884	* Unicode String literals in C.
885	* We need one macro to declare a variable for the string
886	* and to statically preinitialize it if possible,
887	* and a second macro to dynamically initialize such a string variable if necessary.
888	*
889	* The macros are defined for maximum performance.
890	* They work only for strings that contain "invariant characters", i.e.,
891	* only latin letters, digits, and some punctuation.
892	* See utypes.h for details.
893	*
894	* A pair of macros for a single string must be used with the same
895	* parameters.
896	* The string parameter must be a C string literal.
897	* The length of the string, not including the terminating
898	* `NUL`, must be specified as a constant.
899	* The U_STRING_DECL macro should be invoked exactly once for one
900	* such string variable before it is used.
901	*
902	* Usage:
903	*
904	* U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
905	* U_STRING_DECL(ustringVar2, "jumps 5%", 8);
906	* static UBool didInit=FALSE;
907	*
908	* int32_t function() {
909	* if(!didInit) {
910	* U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
911	* U_STRING_INIT(ustringVar2, "jumps 5%", 8);
912	* didInit=TRUE;
913	* }
914	* return u_strcmp(ustringVar1, ustringVar2);
915	* }
916	*
917	* Note that the macros will NOT consistently work if their argument is another #`define`.
918	* The following will not work on all platforms, don't use it.
919	*
920	* #define GLUCK "Mr. Gluck"
921	* U_STRING_DECL(var, GLUCK, 9)
922	* U_STRING_INIT(var, GLUCK, 9)
923	*
924	* Instead, use the string literal "Mr. Gluck" as the argument to both macro
925	* calls.
926	*
927	*
928	* @stable ICU 2.0
929	*/
930	#if defined(U_DECLARE_UTF16)
931	# define U_STRING_DECL(var, cs, length) static const UChar var=(const UChar )U_DECLARE_UTF16(cs)
932	/@stable ICU 2.0 /*
933	# define U_STRING_INIT(var, cs, length)
934	#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
935	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
936	/@stable ICU 2.0 /*
937	# define U_STRING_INIT(var, cs, length)
938	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
939	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
940	/@stable ICU 2.0 /*
941	# define U_STRING_INIT(var, cs, length)
942	#else
943	# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
944	/@stable ICU 2.0 /*
945	# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
946	#endif
947
948	/**
949	* Unescape a string of characters and write the resulting
950	* Unicode characters to the destination buffer. The following escape
951	* sequences are recognized:
952	*
953	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
954	* \\Uhhhhhhhh 8 hex digits
955	* \\xhh 1-2 hex digits
956	* \\x{h...} 1-8 hex digits
957	* \\ooo 1-3 octal digits; o in [0-7]
958	* \\cX control-X; X is masked with 0x1F
959	*
960	* as well as the standard ANSI C escapes:
961	*
962	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
963	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
964	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
965	*
966	* Anything else following a backslash is generically escaped. For
967	* example, "[a\\-z]" returns "[a-z]".
968	*
969	* If an escape sequence is ill-formed, this method returns an empty
970	* string. An example of an ill-formed sequence is "\\u" followed by
971	* fewer than 4 hex digits.
972	*
973	* The above characters are recognized in the compiler's codepage,
974	* that is, they are coded as 'u', '\\', etc. Characters that are
975	* not parts of escape sequences are converted using u_charsToUChars().
976	*
977	* This function is similar to UnicodeString::unescape() but not
978	* identical to it. The latter takes a source UnicodeString, so it
979	* does escape recognition but no conversion.
980	*
981	* @param src a zero-terminated string of invariant characters
982	* @param dest pointer to buffer to receive converted and unescaped
983	* text and, if there is room, a zero terminator. May be NULL for
984	* preflighting, in which case no UChars will be written, but the
985	* return value will still be valid. On error, an empty string is
986	* stored here (if possible).
987	* @param destCapacity the number of UChars that may be written at
988	* dest. Ignored if dest == NULL.
989	* @return the length of unescaped string.
990	* @see u_unescapeAt
991	* @see UnicodeString#unescape()
992	* @see UnicodeString#unescapeAt()
993	* @stable ICU 2.0
994	*/
995	U_STABLE int32_t U_EXPORT2
996	u_unescape(const char *src,
997	UChar *dest, int32_t destCapacity);
998
999	U_CDECL_BEGIN
1000	/**
1001	* Callback function for u_unescapeAt() that returns a character of
1002	* the source text given an offset and a context pointer. The context
1003	* pointer will be whatever is passed into u_unescapeAt().
1004	*
1005	* @param offset pointer to the offset that will be passed to u_unescapeAt().
1006	* @param context an opaque pointer passed directly into u_unescapeAt()
1007	* @return the character represented by the escape sequence at
1008	* offset
1009	* @see u_unescapeAt
1010	* @stable ICU 2.0
1011	*/
1012	typedef UChar (U_CALLCONV UNESCAPE_CHAR_AT)(int32_t offset, void* *context);
1013	U_CDECL_END
1014
1015	/**
1016	* Unescape a single sequence. The character at offset-1 is assumed
1017	* (without checking) to be a backslash. This method takes a callback
1018	* pointer to a function that returns the UChar at a given offset. By
1019	* varying this callback, ICU functions are able to unescape char*
1020	* strings, UnicodeString objects, and UFILE pointers.
1021	*
1022	* If offset is out of range, or if the escape sequence is ill-formed,
1023	* (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
1024	* for a list of recognized sequences.
1025	*
1026	* @param charAt callback function that returns a UChar of the source
1027	* text given an offset and a context pointer.
1028	* @param offset pointer to the offset that will be passed to charAt.
1029	* The offset value will be updated upon return to point after the
1030	* last parsed character of the escape sequence. On error the offset
1031	* is unchanged.
1032	* @param length the number of characters in the source text. The
1033	* last character of the source text is considered to be at offset
1034	* length-1.
1035	* @param context an opaque pointer passed directly into charAt.
1036	* @return the character represented by the escape sequence at
1037	* offset, or (UChar32)0xFFFFFFFF on error.
1038	* @see u_unescape()
1039	* @see UnicodeString#unescape()
1040	* @see UnicodeString#unescapeAt()
1041	* @stable ICU 2.0
1042	*/
1043	U_STABLE UChar32 U_EXPORT2
1044	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1045	int32_t *offset,
1046	int32_t length,
1047	void *context);
1048
1049	/**
1050	* Uppercase the characters in a string.
1051	* Casing is locale-dependent and context-sensitive.
1052	* The result may be longer or shorter than the original.
1053	* The source string and the destination buffer are allowed to overlap.
1054	*
1055	* @param dest A buffer for the result string. The result will be zero-terminated if
1056	* the buffer is large enough.
1057	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1058	* dest may be NULL and the function will only return the length of the result
1059	* without writing any of the result string.
1060	* @param src The original string
1061	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1062	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
1063	* @param pErrorCode Must be a valid pointer to an error code value,
1064	* which must not indicate a failure before the function call.
1065	* @return The length of the result string. It may be greater than destCapacity. In that case,
1066	* only some of the result was written to the destination buffer.
1067	* @stable ICU 2.0
1068	*/
1069	U_STABLE int32_t U_EXPORT2
1070	u_strToUpper(UChar *dest, int32_t destCapacity,
1071	const UChar *src, int32_t srcLength,
1072	const char *locale,
1073	UErrorCode *pErrorCode);
1074
1075	/**
1076	* Lowercase the characters in a string.
1077	* Casing is locale-dependent and context-sensitive.
1078	* The result may be longer or shorter than the original.
1079	* The source string and the destination buffer are allowed to overlap.
1080	*
1081	* @param dest A buffer for the result string. The result will be zero-terminated if
1082	* the buffer is large enough.
1083	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1084	* dest may be NULL and the function will only return the length of the result
1085	* without writing any of the result string.
1086	* @param src The original string
1087	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1088	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
1089	* @param pErrorCode Must be a valid pointer to an error code value,
1090	* which must not indicate a failure before the function call.
1091	* @return The length of the result string. It may be greater than destCapacity. In that case,
1092	* only some of the result was written to the destination buffer.
1093	* @stable ICU 2.0
1094	*/
1095	U_STABLE int32_t U_EXPORT2
1096	u_strToLower(UChar *dest, int32_t destCapacity,
1097	const UChar *src, int32_t srcLength,
1098	const char *locale,
1099	UErrorCode *pErrorCode);
1100
1101	#if !UCONFIG_NO_BREAK_ITERATION
1102
1103	/**
1104	* Titlecase a string.
1105	* Casing is locale-dependent and context-sensitive.
1106	* Titlecasing uses a break iterator to find the first characters of words
1107	* that are to be titlecased. It titlecases those characters and lowercases
1108	* all others.
1109	*
1110	* The titlecase break iterator can be provided to customize for arbitrary
1111	* styles, using rules and dictionaries beyond the standard iterators.
1112	* It may be more efficient to always provide an iterator to avoid
1113	* opening and closing one for each string.
1114	* The standard titlecase iterator for the root locale implements the
1115	* algorithm of Unicode TR 21.
1116	*
1117	* This function uses only the setText(), first() and next() methods of the
1118	* provided break iterator.
1119	*
1120	* The result may be longer or shorter than the original.
1121	* The source string and the destination buffer are allowed to overlap.
1122	*
1123	* @param dest A buffer for the result string. The result will be zero-terminated if
1124	* the buffer is large enough.
1125	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1126	* dest may be NULL and the function will only return the length of the result
1127	* without writing any of the result string.
1128	* @param src The original string
1129	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1130	* @param titleIter A break iterator to find the first characters of words
1131	* that are to be titlecased.
1132	* If none is provided (NULL), then a standard titlecase
1133	* break iterator is opened.
1134	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
1135	* @param pErrorCode Must be a valid pointer to an error code value,
1136	* which must not indicate a failure before the function call.
1137	* @return The length of the result string. It may be greater than destCapacity. In that case,
1138	* only some of the result was written to the destination buffer.
1139	* @stable ICU 2.1
1140	*/
1141	U_STABLE int32_t U_EXPORT2
1142	u_strToTitle(UChar *dest, int32_t destCapacity,
1143	const UChar *src, int32_t srcLength,
1144	UBreakIterator *titleIter,
1145	const char *locale,
1146	UErrorCode *pErrorCode);
1147
1148	#endif
1149
1150	/**
1151	* Case-folds the characters in a string.
1152	*
1153	* Case-folding is locale-independent and not context-sensitive,
1154	* but there is an option for whether to include or exclude mappings for dotted I
1155	* and dotless i that are marked with 'T' in CaseFolding.txt.
1156	*
1157	* The result may be longer or shorter than the original.
1158	* The source string and the destination buffer are allowed to overlap.
1159	*
1160	* @param dest A buffer for the result string. The result will be zero-terminated if
1161	* the buffer is large enough.
1162	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1163	* dest may be NULL and the function will only return the length of the result
1164	* without writing any of the result string.
1165	* @param src The original string
1166	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1167	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
1168	* @param pErrorCode Must be a valid pointer to an error code value,
1169	* which must not indicate a failure before the function call.
1170	* @return The length of the result string. It may be greater than destCapacity. In that case,
1171	* only some of the result was written to the destination buffer.
1172	* @stable ICU 2.0
1173	*/
1174	U_STABLE int32_t U_EXPORT2
1175	u_strFoldCase(UChar *dest, int32_t destCapacity,
1176	const UChar *src, int32_t srcLength,
1177	uint32_t options,
1178	UErrorCode *pErrorCode);
1179
1180	#if defined(U_WCHAR_IS_UTF16) \|\| defined(U_WCHAR_IS_UTF32) \|\| !UCONFIG_NO_CONVERSION
1181	/**
1182	* Convert a UTF-16 string to a wchar_t string.
1183	* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
1184	* this function simply calls the fast, dedicated function for that.
1185	* Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
1186	*
1187	* @param dest A buffer for the result string. The result will be zero-terminated if
1188	* the buffer is large enough.
1189	* @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
1190	* dest may be NULL and the function will only return the length of the
1191	* result without writing any of the result string (pre-flighting).
1192	* @param pDestLength A pointer to receive the number of units written to the destination. If
1193	* pDestLength!=NULL then *pDestLength is always set to the
1194	* number of output units corresponding to the transformation of
1195	* all the input units, even in case of a buffer overflow.
1196	* @param src The original source string
1197	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1198	* @param pErrorCode Must be a valid pointer to an error code value,
1199	* which must not indicate a failure before the function call.
1200	* @return The pointer to destination buffer.
1201	* @stable ICU 2.0
1202	*/
1203	U_STABLE wchar_t* U_EXPORT2
1204	u_strToWCS(wchar_t *dest,
1205	int32_t destCapacity,
1206	int32_t *pDestLength,
1207	const UChar *src,
1208	int32_t srcLength,
1209	UErrorCode *pErrorCode);
1210	/**
1211	* Convert a wchar_t string to UTF-16.
1212	* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
1213	* this function simply calls the fast, dedicated function for that.
1214	* Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
1215	*
1216	* @param dest A buffer for the result string. The result will be zero-terminated if
1217	* the buffer is large enough.
1218	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1219	* dest may be NULL and the function will only return the length of the
1220	* result without writing any of the result string (pre-flighting).
1221	* @param pDestLength A pointer to receive the number of units written to the destination. If
1222	* pDestLength!=NULL then *pDestLength is always set to the
1223	* number of output units corresponding to the transformation of
1224	* all the input units, even in case of a buffer overflow.
1225	* @param src The original source string
1226	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1227	* @param pErrorCode Must be a valid pointer to an error code value,
1228	* which must not indicate a failure before the function call.
1229	* @return The pointer to destination buffer.
1230	* @stable ICU 2.0
1231	*/
1232	U_STABLE UChar* U_EXPORT2
1233	u_strFromWCS(UChar *dest,
1234	int32_t destCapacity,
1235	int32_t *pDestLength,
1236	const wchar_t *src,
1237	int32_t srcLength,
1238	UErrorCode *pErrorCode);
1239	#endif /* defined(U_WCHAR_IS_UTF16) \|\| defined(U_WCHAR_IS_UTF32) \|\| !UCONFIG_NO_CONVERSION */
1240
1241	/**
1242	* Convert a UTF-16 string to UTF-8.
1243	* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
1244	*
1245	* @param dest A buffer for the result string. The result will be zero-terminated if
1246	* the buffer is large enough.
1247	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
1248	* dest may be NULL and the function will only return the length of the
1249	* result without writing any of the result string (pre-flighting).
1250	* @param pDestLength A pointer to receive the number of units written to the destination. If
1251	* pDestLength!=NULL then *pDestLength is always set to the
1252	* number of output units corresponding to the transformation of
1253	* all the input units, even in case of a buffer overflow.
1254	* @param src The original source string
1255	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1256	* @param pErrorCode Must be a valid pointer to an error code value,
1257	* which must not indicate a failure before the function call.
1258	* @return The pointer to destination buffer.
1259	* @stable ICU 2.0
1260	* @see u_strToUTF8WithSub
1261	* @see u_strFromUTF8
1262	*/
1263	U_STABLE char* U_EXPORT2
1264	u_strToUTF8(char *dest,
1265	int32_t destCapacity,
1266	int32_t *pDestLength,
1267	const UChar *src,
1268	int32_t srcLength,
1269	UErrorCode *pErrorCode);
1270
1271	/**
1272	* Convert a UTF-8 string to UTF-16.
1273	* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
1274	*
1275	* @param dest A buffer for the result string. The result will be zero-terminated if
1276	* the buffer is large enough.
1277	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1278	* dest may be NULL and the function will only return the length of the
1279	* result without writing any of the result string (pre-flighting).
1280	* @param pDestLength A pointer to receive the number of units written to the destination. If
1281	* pDestLength!=NULL then *pDestLength is always set to the
1282	* number of output units corresponding to the transformation of
1283	* all the input units, even in case of a buffer overflow.
1284	* @param src The original source string
1285	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1286	* @param pErrorCode Must be a valid pointer to an error code value,
1287	* which must not indicate a failure before the function call.
1288	* @return The pointer to destination buffer.
1289	* @stable ICU 2.0
1290	* @see u_strFromUTF8WithSub
1291	* @see u_strFromUTF8Lenient
1292	*/
1293	U_STABLE UChar* U_EXPORT2
1294	u_strFromUTF8(UChar *dest,
1295	int32_t destCapacity,
1296	int32_t *pDestLength,
1297	const char *src,
1298	int32_t srcLength,
1299	UErrorCode *pErrorCode);
1300
1301	/**
1302	* Convert a UTF-16 string to UTF-8.
1303	*
1304	* Same as u_strToUTF8() except for the additional subchar which is output for
1305	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
1306	* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
1307	*
1308	* @param dest A buffer for the result string. The result will be zero-terminated if
1309	* the buffer is large enough.
1310	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
1311	* dest may be NULL and the function will only return the length of the
1312	* result without writing any of the result string (pre-flighting).
1313	* @param pDestLength A pointer to receive the number of units written to the destination. If
1314	* pDestLength!=NULL then *pDestLength is always set to the
1315	* number of output units corresponding to the transformation of
1316	* all the input units, even in case of a buffer overflow.
1317	* @param src The original source string
1318	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1319	* @param subchar The substitution character to use in place of an illegal input sequence,
1320	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
1321	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
1322	* except for surrogate code points (U+D800..U+DFFF).
1323	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
1324	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
1325	* Set to 0 if no substitutions occur or subchar<0.
1326	* pNumSubstitutions can be NULL.
1327	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1328	* pass the U_SUCCESS() test, or else the function returns
1329	* immediately. Check for U_FAILURE() on output or use with
1330	* function chaining. (See User Guide for details.)
1331	* @return The pointer to destination buffer.
1332	* @see u_strToUTF8
1333	* @see u_strFromUTF8WithSub
1334	* @stable ICU 3.6
1335	*/
1336	U_STABLE char* U_EXPORT2
1337	u_strToUTF8WithSub(char *dest,
1338	int32_t destCapacity,
1339	int32_t *pDestLength,
1340	const UChar *src,
1341	int32_t srcLength,
1342	UChar32 subchar, int32_t *pNumSubstitutions,
1343	UErrorCode *pErrorCode);
1344
1345	/**
1346	* Convert a UTF-8 string to UTF-16.
1347	*
1348	* Same as u_strFromUTF8() except for the additional subchar which is output for
1349	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
1350	* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
1351	*
1352	* @param dest A buffer for the result string. The result will be zero-terminated if
1353	* the buffer is large enough.
1354	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1355	* dest may be NULL and the function will only return the length of the
1356	* result without writing any of the result string (pre-flighting).
1357	* @param pDestLength A pointer to receive the number of units written to the destination. If
1358	* pDestLength!=NULL then *pDestLength is always set to the
1359	* number of output units corresponding to the transformation of
1360	* all the input units, even in case of a buffer overflow.
1361	* @param src The original source string
1362	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1363	* @param subchar The substitution character to use in place of an illegal input sequence,
1364	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
1365	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
1366	* except for surrogate code points (U+D800..U+DFFF).
1367	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
1368	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
1369	* Set to 0 if no substitutions occur or subchar<0.
1370	* pNumSubstitutions can be NULL.
1371	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1372	* pass the U_SUCCESS() test, or else the function returns
1373	* immediately. Check for U_FAILURE() on output or use with
1374	* function chaining. (See User Guide for details.)
1375	* @return The pointer to destination buffer.
1376	* @see u_strFromUTF8
1377	* @see u_strFromUTF8Lenient
1378	* @see u_strToUTF8WithSub
1379	* @stable ICU 3.6
1380	*/
1381	U_STABLE UChar* U_EXPORT2
1382	u_strFromUTF8WithSub(UChar *dest,
1383	int32_t destCapacity,
1384	int32_t *pDestLength,
1385	const char *src,
1386	int32_t srcLength,
1387	UChar32 subchar, int32_t *pNumSubstitutions,
1388	UErrorCode *pErrorCode);
1389
1390	/**
1391	* Convert a UTF-8 string to UTF-16.
1392	*
1393	* Same as u_strFromUTF8() except that this function is designed to be very fast,
1394	* which it achieves by being lenient about malformed UTF-8 sequences.
1395	* This function is intended for use in environments where UTF-8 text is
1396	* expected to be well-formed.
1397	*
1398	* Its semantics are:
1399	* - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
1400	* - The function will not read beyond the input string, nor write beyond
1401	* the destCapacity.
1402	* - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
1403	* be well-formed UTF-16.
1404	* The function will resynchronize to valid code point boundaries
1405	* within a small number of code points after an illegal sequence.
1406	* - Non-shortest forms are not detected and will result in "spoofing" output.
1407	*
1408	* For further performance improvement, if srcLength is given (>=0),
1409	* then it must be destCapacity>=srcLength.
1410	*
1411	* There is no inverse u_strToUTF8Lenient() function because there is practically
1412	* no performance gain from not checking that a UTF-16 string is well-formed.
1413	*
1414	* @param dest A buffer for the result string. The result will be zero-terminated if
1415	* the buffer is large enough.
1416	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1417	* dest may be NULL and the function will only return the length of the
1418	* result without writing any of the result string (pre-flighting).
1419	* Unlike for other ICU functions, if srcLength>=0 then it
1420	* must be destCapacity>=srcLength.
1421	* @param pDestLength A pointer to receive the number of units written to the destination. If
1422	* pDestLength!=NULL then *pDestLength is always set to the
1423	* number of output units corresponding to the transformation of
1424	* all the input units, even in case of a buffer overflow.
1425	* Unlike for other ICU functions, if srcLength>=0 but
1426	* destCapacity<srcLength, then *pDestLength will be set to srcLength
1427	* (and U_BUFFER_OVERFLOW_ERROR will be set)
1428	* regardless of the actual result length.
1429	* @param src The original source string
1430	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1431	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1432	* pass the U_SUCCESS() test, or else the function returns
1433	* immediately. Check for U_FAILURE() on output or use with
1434	* function chaining. (See User Guide for details.)
1435	* @return The pointer to destination buffer.
1436	* @see u_strFromUTF8
1437	* @see u_strFromUTF8WithSub
1438	* @see u_strToUTF8WithSub
1439	* @stable ICU 3.6
1440	*/
1441	U_STABLE UChar * U_EXPORT2
1442	u_strFromUTF8Lenient(UChar *dest,
1443	int32_t destCapacity,
1444	int32_t *pDestLength,
1445	const char *src,
1446	int32_t srcLength,
1447	UErrorCode *pErrorCode);
1448
1449	/**
1450	* Convert a UTF-16 string to UTF-32.
1451	* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
1452	*
1453	* @param dest A buffer for the result string. The result will be zero-terminated if
1454	* the buffer is large enough.
1455	* @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
1456	* dest may be NULL and the function will only return the length of the
1457	* result without writing any of the result string (pre-flighting).
1458	* @param pDestLength A pointer to receive the number of units written to the destination. If
1459	* pDestLength!=NULL then *pDestLength is always set to the
1460	* number of output units corresponding to the transformation of
1461	* all the input units, even in case of a buffer overflow.
1462	* @param src The original source string
1463	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1464	* @param pErrorCode Must be a valid pointer to an error code value,
1465	* which must not indicate a failure before the function call.
1466	* @return The pointer to destination buffer.
1467	* @see u_strToUTF32WithSub
1468	* @see u_strFromUTF32
1469	* @stable ICU 2.0
1470	*/
1471	U_STABLE UChar32* U_EXPORT2
1472	u_strToUTF32(UChar32 *dest,
1473	int32_t destCapacity,
1474	int32_t *pDestLength,
1475	const UChar *src,
1476	int32_t srcLength,
1477	UErrorCode *pErrorCode);
1478
1479	/**
1480	* Convert a UTF-32 string to UTF-16.
1481	* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
1482	*
1483	* @param dest A buffer for the result string. The result will be zero-terminated if
1484	* the buffer is large enough.
1485	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1486	* dest may be NULL and the function will only return the length of the
1487	* result without writing any of the result string (pre-flighting).
1488	* @param pDestLength A pointer to receive the number of units written to the destination. If
1489	* pDestLength!=NULL then *pDestLength is always set to the
1490	* number of output units corresponding to the transformation of
1491	* all the input units, even in case of a buffer overflow.
1492	* @param src The original source string
1493	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1494	* @param pErrorCode Must be a valid pointer to an error code value,
1495	* which must not indicate a failure before the function call.
1496	* @return The pointer to destination buffer.
1497	* @see u_strFromUTF32WithSub
1498	* @see u_strToUTF32
1499	* @stable ICU 2.0
1500	*/
1501	U_STABLE UChar* U_EXPORT2
1502	u_strFromUTF32(UChar *dest,
1503	int32_t destCapacity,
1504	int32_t *pDestLength,
1505	const UChar32 *src,
1506	int32_t srcLength,
1507	UErrorCode *pErrorCode);
1508
1509	/**
1510	* Convert a UTF-16 string to UTF-32.
1511	*
1512	* Same as u_strToUTF32() except for the additional subchar which is output for
1513	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
1514	* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
1515	*
1516	* @param dest A buffer for the result string. The result will be zero-terminated if
1517	* the buffer is large enough.
1518	* @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
1519	* dest may be NULL and the function will only return the length of the
1520	* result without writing any of the result string (pre-flighting).
1521	* @param pDestLength A pointer to receive the number of units written to the destination. If
1522	* pDestLength!=NULL then *pDestLength is always set to the
1523	* number of output units corresponding to the transformation of
1524	* all the input units, even in case of a buffer overflow.
1525	* @param src The original source string
1526	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1527	* @param subchar The substitution character to use in place of an illegal input sequence,
1528	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
1529	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
1530	* except for surrogate code points (U+D800..U+DFFF).
1531	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
1532	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
1533	* Set to 0 if no substitutions occur or subchar<0.
1534	* pNumSubstitutions can be NULL.
1535	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1536	* pass the U_SUCCESS() test, or else the function returns
1537	* immediately. Check for U_FAILURE() on output or use with
1538	* function chaining. (See User Guide for details.)
1539	* @return The pointer to destination buffer.
1540	* @see u_strToUTF32
1541	* @see u_strFromUTF32WithSub
1542	* @stable ICU 4.2
1543	*/
1544	U_STABLE UChar32* U_EXPORT2
1545	u_strToUTF32WithSub(UChar32 *dest,
1546	int32_t destCapacity,
1547	int32_t *pDestLength,
1548	const UChar *src,
1549	int32_t srcLength,
1550	UChar32 subchar, int32_t *pNumSubstitutions,
1551	UErrorCode *pErrorCode);
1552
1553	/**
1554	* Convert a UTF-32 string to UTF-16.
1555	*
1556	* Same as u_strFromUTF32() except for the additional subchar which is output for
1557	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
1558	* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
1559	*
1560	* @param dest A buffer for the result string. The result will be zero-terminated if
1561	* the buffer is large enough.
1562	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1563	* dest may be NULL and the function will only return the length of the
1564	* result without writing any of the result string (pre-flighting).
1565	* @param pDestLength A pointer to receive the number of units written to the destination. If
1566	* pDestLength!=NULL then *pDestLength is always set to the
1567	* number of output units corresponding to the transformation of
1568	* all the input units, even in case of a buffer overflow.
1569	* @param src The original source string
1570	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1571	* @param subchar The substitution character to use in place of an illegal input sequence,
1572	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
1573	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
1574	* except for surrogate code points (U+D800..U+DFFF).
1575	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
1576	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
1577	* Set to 0 if no substitutions occur or subchar<0.
1578	* pNumSubstitutions can be NULL.
1579	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1580	* pass the U_SUCCESS() test, or else the function returns
1581	* immediately. Check for U_FAILURE() on output or use with
1582	* function chaining. (See User Guide for details.)
1583	* @return The pointer to destination buffer.
1584	* @see u_strFromUTF32
1585	* @see u_strToUTF32WithSub
1586	* @stable ICU 4.2
1587	*/
1588	U_STABLE UChar* U_EXPORT2
1589	u_strFromUTF32WithSub(UChar *dest,
1590	int32_t destCapacity,
1591	int32_t *pDestLength,
1592	const UChar32 *src,
1593	int32_t srcLength,
1594	UChar32 subchar, int32_t *pNumSubstitutions,
1595	UErrorCode *pErrorCode);
1596
1597	/**
1598	* Convert a 16-bit Unicode string to Java Modified UTF-8.
1599	* See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
1600	*
1601	* This function behaves according to the documentation for Java DataOutput.writeUTF()
1602	* except that it does not encode the output length in the destination buffer
1603	* and does not have an output length restriction.
1604	* See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
1605	*
1606	* The input string need not be well-formed UTF-16.
1607	* (Therefore there is no subchar parameter.)
1608	*
1609	* @param dest A buffer for the result string. The result will be zero-terminated if
1610	* the buffer is large enough.
1611	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
1612	* dest may be NULL and the function will only return the length of the
1613	* result without writing any of the result string (pre-flighting).
1614	* @param pDestLength A pointer to receive the number of units written to the destination. If
1615	* pDestLength!=NULL then *pDestLength is always set to the
1616	* number of output units corresponding to the transformation of
1617	* all the input units, even in case of a buffer overflow.
1618	* @param src The original source string
1619	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1620	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1621	* pass the U_SUCCESS() test, or else the function returns
1622	* immediately. Check for U_FAILURE() on output or use with
1623	* function chaining. (See User Guide for details.)
1624	* @return The pointer to destination buffer.
1625	* @stable ICU 4.4
1626	* @see u_strToUTF8WithSub
1627	* @see u_strFromJavaModifiedUTF8WithSub
1628	*/
1629	U_STABLE char* U_EXPORT2
1630	u_strToJavaModifiedUTF8(
1631	char *dest,
1632	int32_t destCapacity,
1633	int32_t *pDestLength,
1634	const UChar *src,
1635	int32_t srcLength,
1636	UErrorCode *pErrorCode);
1637
1638	/**
1639	* Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
1640	* If the input string is not well-formed and no substitution char is specified,
1641	* then the U_INVALID_CHAR_FOUND error code is set.
1642	*
1643	* This function behaves according to the documentation for Java DataInput.readUTF()
1644	* except that it takes a length parameter rather than
1645	* interpreting the first two input bytes as the length.
1646	* See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
1647	*
1648	* The output string may not be well-formed UTF-16.
1649	*
1650	* @param dest A buffer for the result string. The result will be zero-terminated if
1651	* the buffer is large enough.
1652	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
1653	* dest may be NULL and the function will only return the length of the
1654	* result without writing any of the result string (pre-flighting).
1655	* @param pDestLength A pointer to receive the number of units written to the destination. If
1656	* pDestLength!=NULL then *pDestLength is always set to the
1657	* number of output units corresponding to the transformation of
1658	* all the input units, even in case of a buffer overflow.
1659	* @param src The original source string
1660	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
1661	* @param subchar The substitution character to use in place of an illegal input sequence,
1662	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
1663	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
1664	* except for surrogate code points (U+D800..U+DFFF).
1665	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
1666	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
1667	* Set to 0 if no substitutions occur or subchar<0.
1668	* pNumSubstitutions can be NULL.
1669	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
1670	* pass the U_SUCCESS() test, or else the function returns
1671	* immediately. Check for U_FAILURE() on output or use with
1672	* function chaining. (See User Guide for details.)
1673	* @return The pointer to destination buffer.
1674	* @see u_strFromUTF8WithSub
1675	* @see u_strFromUTF8Lenient
1676	* @see u_strToJavaModifiedUTF8
1677	* @stable ICU 4.4
1678	*/
1679	U_STABLE UChar* U_EXPORT2
1680	u_strFromJavaModifiedUTF8WithSub(
1681	UChar *dest,
1682	int32_t destCapacity,
1683	int32_t *pDestLength,
1684	const char *src,
1685	int32_t srcLength,
1686	UChar32 subchar, int32_t *pNumSubstitutions,
1687	UErrorCode *pErrorCode);
1688
1689	#endif
1690

Browse the source code of include/unicode/ustring.h