1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2005-2012, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: ucasemap.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2005may06
16* created by: Markus W. Scherer
17*
18* Case mapping service object and functions using it.
19*/
20
21#ifndef __UCASEMAP_H__
22#define __UCASEMAP_H__
23
24#include "unicode/utypes.h"
25#include "unicode/stringoptions.h"
26#include "unicode/ustring.h"
27
28#if U_SHOW_CPLUSPLUS_API
29#include "unicode/localpointer.h"
30#endif // U_SHOW_CPLUSPLUS_API
31
32/**
33 * \file
34 * \brief C API: Unicode case mapping functions using a UCaseMap service object.
35 *
36 * The service object takes care of memory allocations, data loading, and setup
37 * for the attributes, as usual.
38 *
39 * Currently, the functionality provided here does not overlap with uchar.h
40 * and ustring.h, except for ucasemap_toTitle().
41 *
42 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
43 */
44
45/**
46 * UCaseMap is an opaque service object for newer ICU case mapping functions.
47 * Older functions did not use a service object.
48 * @stable ICU 3.4
49 */
50struct UCaseMap;
51typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
52
53/**
54 * Open a UCaseMap service object for a locale and a set of options.
55 * The locale ID and options are preprocessed so that functions using the
56 * service object need not process them in each call.
57 *
58 * @param locale ICU locale ID, used for language-dependent
59 * upper-/lower-/title-casing according to the Unicode standard.
60 * Usual semantics: ""=root, NULL=default locale, etc.
61 * @param options Options bit set, used for case folding and string comparisons.
62 * Same flags as for u_foldCase(), u_strFoldCase(),
63 * u_strCaseCompare(), etc.
64 * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
65 * @param pErrorCode Must be a valid pointer to an error code value,
66 * which must not indicate a failure before the function call.
67 * @return Pointer to a UCaseMap service object, if successful.
68 *
69 * @see U_FOLD_CASE_DEFAULT
70 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
71 * @see U_TITLECASE_NO_LOWERCASE
72 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
73 * @stable ICU 3.4
74 */
75U_CAPI UCaseMap * U_EXPORT2
76ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
77
78/**
79 * Close a UCaseMap service object.
80 * @param csm Object to be closed.
81 * @stable ICU 3.4
82 */
83U_CAPI void U_EXPORT2
84ucasemap_close(UCaseMap *csm);
85
86#if U_SHOW_CPLUSPLUS_API
87
88U_NAMESPACE_BEGIN
89
90/**
91 * \class LocalUCaseMapPointer
92 * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
93 * For most methods see the LocalPointerBase base class.
94 *
95 * @see LocalPointerBase
96 * @see LocalPointer
97 * @stable ICU 4.4
98 */
99U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
100
101U_NAMESPACE_END
102
103#endif
104
105/**
106 * Get the locale ID that is used for language-dependent case mappings.
107 * @param csm UCaseMap service object.
108 * @return locale ID
109 * @stable ICU 3.4
110 */
111U_CAPI const char * U_EXPORT2
112ucasemap_getLocale(const UCaseMap *csm);
113
114/**
115 * Get the options bit set that is used for case folding and string comparisons.
116 * @param csm UCaseMap service object.
117 * @return options bit set
118 * @stable ICU 3.4
119 */
120U_CAPI uint32_t U_EXPORT2
121ucasemap_getOptions(const UCaseMap *csm);
122
123/**
124 * Set the locale ID that is used for language-dependent case mappings.
125 *
126 * @param csm UCaseMap service object.
127 * @param locale Locale ID, see ucasemap_open().
128 * @param pErrorCode Must be a valid pointer to an error code value,
129 * which must not indicate a failure before the function call.
130 *
131 * @see ucasemap_open
132 * @stable ICU 3.4
133 */
134U_CAPI void U_EXPORT2
135ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
136
137/**
138 * Set the options bit set that is used for case folding and string comparisons.
139 *
140 * @param csm UCaseMap service object.
141 * @param options Options bit set, see ucasemap_open().
142 * @param pErrorCode Must be a valid pointer to an error code value,
143 * which must not indicate a failure before the function call.
144 *
145 * @see ucasemap_open
146 * @stable ICU 3.4
147 */
148U_CAPI void U_EXPORT2
149ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
150
151#if !UCONFIG_NO_BREAK_ITERATION
152
153/**
154 * Get the break iterator that is used for titlecasing.
155 * Do not modify the returned break iterator.
156 * @param csm UCaseMap service object.
157 * @return titlecasing break iterator
158 * @stable ICU 3.8
159 */
160U_CAPI const UBreakIterator * U_EXPORT2
161ucasemap_getBreakIterator(const UCaseMap *csm);
162
163/**
164 * Set the break iterator that is used for titlecasing.
165 * The UCaseMap service object releases a previously set break iterator
166 * and "adopts" this new one, taking ownership of it.
167 * It will be released in a subsequent call to ucasemap_setBreakIterator()
168 * or ucasemap_close().
169 *
170 * Break iterator operations are not thread-safe. Therefore, titlecasing
171 * functions use non-const UCaseMap objects. It is not possible to titlecase
172 * strings concurrently using the same UCaseMap.
173 *
174 * @param csm UCaseMap service object.
175 * @param iterToAdopt Break iterator to be adopted for titlecasing.
176 * @param pErrorCode Must be a valid pointer to an error code value,
177 * which must not indicate a failure before the function call.
178 *
179 * @see ucasemap_toTitle
180 * @see ucasemap_utf8ToTitle
181 * @stable ICU 3.8
182 */
183U_CAPI void U_EXPORT2
184ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
185
186/**
187 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
188 * except that it takes ucasemap_setOptions() into account and has performance
189 * advantages from being able to use a UCaseMap object for multiple case mapping
190 * operations, saving setup time.
191 *
192 * Casing is locale-dependent and context-sensitive.
193 * Titlecasing uses a break iterator to find the first characters of words
194 * that are to be titlecased. It titlecases those characters and lowercases
195 * all others. (This can be modified with ucasemap_setOptions().)
196 *
197 * Note: This function takes a non-const UCaseMap pointer because it will
198 * open a default break iterator if no break iterator was set yet,
199 * and effectively call ucasemap_setBreakIterator();
200 * also because the break iterator is stateful and will be modified during
201 * the iteration.
202 *
203 * The titlecase break iterator can be provided to customize for arbitrary
204 * styles, using rules and dictionaries beyond the standard iterators.
205 * The standard titlecase iterator for the root locale implements the
206 * algorithm of Unicode TR 21.
207 *
208 * This function uses only the setText(), first() and next() methods of the
209 * provided break iterator.
210 *
211 * The result may be longer or shorter than the original.
212 * The source string and the destination buffer must not overlap.
213 *
214 * @param csm UCaseMap service object. This pointer is non-const!
215 * See the note above for details.
216 * @param dest A buffer for the result string. The result will be NUL-terminated if
217 * the buffer is large enough.
218 * The contents is undefined in case of failure.
219 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
220 * dest may be NULL and the function will only return the length of the result
221 * without writing any of the result string.
222 * @param src The original string.
223 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
224 * @param pErrorCode Must be a valid pointer to an error code value,
225 * which must not indicate a failure before the function call.
226 * @return The length of the result string, if successful - or in case of a buffer overflow,
227 * in which case it will be greater than destCapacity.
228 *
229 * @see u_strToTitle
230 * @stable ICU 3.8
231 */
232U_CAPI int32_t U_EXPORT2
233ucasemap_toTitle(UCaseMap *csm,
234 UChar *dest, int32_t destCapacity,
235 const UChar *src, int32_t srcLength,
236 UErrorCode *pErrorCode);
237
238#endif // UCONFIG_NO_BREAK_ITERATION
239
240/**
241 * Lowercase the characters in a UTF-8 string.
242 * Casing is locale-dependent and context-sensitive.
243 * The result may be longer or shorter than the original.
244 * The source string and the destination buffer must not overlap.
245 *
246 * @param csm UCaseMap service object.
247 * @param dest A buffer for the result string. The result will be NUL-terminated if
248 * the buffer is large enough.
249 * The contents is undefined in case of failure.
250 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
251 * dest may be NULL and the function will only return the length of the result
252 * without writing any of the result string.
253 * @param src The original string.
254 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
255 * @param pErrorCode Must be a valid pointer to an error code value,
256 * which must not indicate a failure before the function call.
257 * @return The length of the result string, if successful - or in case of a buffer overflow,
258 * in which case it will be greater than destCapacity.
259 *
260 * @see u_strToLower
261 * @stable ICU 3.4
262 */
263U_CAPI int32_t U_EXPORT2
264ucasemap_utf8ToLower(const UCaseMap *csm,
265 char *dest, int32_t destCapacity,
266 const char *src, int32_t srcLength,
267 UErrorCode *pErrorCode);
268
269/**
270 * Uppercase the characters in a UTF-8 string.
271 * Casing is locale-dependent and context-sensitive.
272 * The result may be longer or shorter than the original.
273 * The source string and the destination buffer must not overlap.
274 *
275 * @param csm UCaseMap service object.
276 * @param dest A buffer for the result string. The result will be NUL-terminated if
277 * the buffer is large enough.
278 * The contents is undefined in case of failure.
279 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
280 * dest may be NULL and the function will only return the length of the result
281 * without writing any of the result string.
282 * @param src The original string.
283 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
284 * @param pErrorCode Must be a valid pointer to an error code value,
285 * which must not indicate a failure before the function call.
286 * @return The length of the result string, if successful - or in case of a buffer overflow,
287 * in which case it will be greater than destCapacity.
288 *
289 * @see u_strToUpper
290 * @stable ICU 3.4
291 */
292U_CAPI int32_t U_EXPORT2
293ucasemap_utf8ToUpper(const UCaseMap *csm,
294 char *dest, int32_t destCapacity,
295 const char *src, int32_t srcLength,
296 UErrorCode *pErrorCode);
297
298#if !UCONFIG_NO_BREAK_ITERATION
299
300/**
301 * Titlecase a UTF-8 string.
302 * Casing is locale-dependent and context-sensitive.
303 * Titlecasing uses a break iterator to find the first characters of words
304 * that are to be titlecased. It titlecases those characters and lowercases
305 * all others. (This can be modified with ucasemap_setOptions().)
306 *
307 * Note: This function takes a non-const UCaseMap pointer because it will
308 * open a default break iterator if no break iterator was set yet,
309 * and effectively call ucasemap_setBreakIterator();
310 * also because the break iterator is stateful and will be modified during
311 * the iteration.
312 *
313 * The titlecase break iterator can be provided to customize for arbitrary
314 * styles, using rules and dictionaries beyond the standard iterators.
315 * The standard titlecase iterator for the root locale implements the
316 * algorithm of Unicode TR 21.
317 *
318 * This function uses only the setUText(), first(), next() and close() methods of the
319 * provided break iterator.
320 *
321 * The result may be longer or shorter than the original.
322 * The source string and the destination buffer must not overlap.
323 *
324 * @param csm UCaseMap service object. This pointer is non-const!
325 * See the note above for details.
326 * @param dest A buffer for the result string. The result will be NUL-terminated if
327 * the buffer is large enough.
328 * The contents is undefined in case of failure.
329 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
330 * dest may be NULL and the function will only return the length of the result
331 * without writing any of the result string.
332 * @param src The original string.
333 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
334 * @param pErrorCode Must be a valid pointer to an error code value,
335 * which must not indicate a failure before the function call.
336 * @return The length of the result string, if successful - or in case of a buffer overflow,
337 * in which case it will be greater than destCapacity.
338 *
339 * @see u_strToTitle
340 * @see U_TITLECASE_NO_LOWERCASE
341 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
342 * @stable ICU 3.8
343 */
344U_CAPI int32_t U_EXPORT2
345ucasemap_utf8ToTitle(UCaseMap *csm,
346 char *dest, int32_t destCapacity,
347 const char *src, int32_t srcLength,
348 UErrorCode *pErrorCode);
349
350#endif
351
352/**
353 * Case-folds the characters in a UTF-8 string.
354 *
355 * Case-folding is locale-independent and not context-sensitive,
356 * but there is an option for whether to include or exclude mappings for dotted I
357 * and dotless i that are marked with 'T' in CaseFolding.txt.
358 *
359 * The result may be longer or shorter than the original.
360 * The source string and the destination buffer must not overlap.
361 *
362 * @param csm UCaseMap service object.
363 * @param dest A buffer for the result string. The result will be NUL-terminated if
364 * the buffer is large enough.
365 * The contents is undefined in case of failure.
366 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
367 * dest may be NULL and the function will only return the length of the result
368 * without writing any of the result string.
369 * @param src The original string.
370 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
371 * @param pErrorCode Must be a valid pointer to an error code value,
372 * which must not indicate a failure before the function call.
373 * @return The length of the result string, if successful - or in case of a buffer overflow,
374 * in which case it will be greater than destCapacity.
375 *
376 * @see u_strFoldCase
377 * @see ucasemap_setOptions
378 * @see U_FOLD_CASE_DEFAULT
379 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
380 * @stable ICU 3.8
381 */
382U_CAPI int32_t U_EXPORT2
383ucasemap_utf8FoldCase(const UCaseMap *csm,
384 char *dest, int32_t destCapacity,
385 const char *src, int32_t srcLength,
386 UErrorCode *pErrorCode);
387
388#endif
389