1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | // casemap.h |
5 | // created: 2017jan12 Markus W. Scherer |
6 | |
7 | #ifndef __CASEMAP_H__ |
8 | #define __CASEMAP_H__ |
9 | |
10 | #include "unicode/utypes.h" |
11 | |
12 | #if U_SHOW_CPLUSPLUS_API |
13 | |
14 | #include "unicode/stringpiece.h" |
15 | #include "unicode/uobject.h" |
16 | |
17 | /** |
18 | * \file |
19 | * \brief C++ API: Low-level C++ case mapping functions. |
20 | */ |
21 | |
22 | U_NAMESPACE_BEGIN |
23 | |
24 | class BreakIterator; |
25 | class ByteSink; |
26 | class Edits; |
27 | |
28 | /** |
29 | * Low-level C++ case mapping functions. |
30 | * |
31 | * @stable ICU 59 |
32 | */ |
33 | class U_COMMON_API CaseMap U_FINAL : public UMemory { |
34 | public: |
35 | /** |
36 | * Lowercases a UTF-16 string and optionally records edits. |
37 | * Casing is locale-dependent and context-sensitive. |
38 | * The result may be longer or shorter than the original. |
39 | * The source string and the destination buffer must not overlap. |
40 | * |
41 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
42 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
43 | * @param src The original string. |
44 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
45 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
46 | * the buffer is large enough. |
47 | * The contents is undefined in case of failure. |
48 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then |
49 | * dest may be NULL and the function will only return the length of the result |
50 | * without writing any of the result string. |
51 | * @param edits Records edits for index mapping, working with styled text, |
52 | * and getting only changes (if any). |
53 | * The Edits contents is undefined if any error occurs. |
54 | * This function calls edits->reset() first unless |
55 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
56 | * @param errorCode Reference to an in/out error code value |
57 | * which must not indicate a failure before the function call. |
58 | * @return The length of the result string, if successful. |
59 | * When the result would be longer than destCapacity, |
60 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
61 | * |
62 | * @see u_strToLower |
63 | * @stable ICU 59 |
64 | */ |
65 | static int32_t toLower( |
66 | const char *locale, uint32_t options, |
67 | const char16_t *src, int32_t srcLength, |
68 | char16_t *dest, int32_t destCapacity, Edits *edits, |
69 | UErrorCode &errorCode); |
70 | |
71 | /** |
72 | * Uppercases a UTF-16 string and optionally records edits. |
73 | * Casing is locale-dependent and context-sensitive. |
74 | * The result may be longer or shorter than the original. |
75 | * The source string and the destination buffer must not overlap. |
76 | * |
77 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
78 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
79 | * @param src The original string. |
80 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
81 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
82 | * the buffer is large enough. |
83 | * The contents is undefined in case of failure. |
84 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then |
85 | * dest may be NULL and the function will only return the length of the result |
86 | * without writing any of the result string. |
87 | * @param edits Records edits for index mapping, working with styled text, |
88 | * and getting only changes (if any). |
89 | * The Edits contents is undefined if any error occurs. |
90 | * This function calls edits->reset() first unless |
91 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
92 | * @param errorCode Reference to an in/out error code value |
93 | * which must not indicate a failure before the function call. |
94 | * @return The length of the result string, if successful. |
95 | * When the result would be longer than destCapacity, |
96 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
97 | * |
98 | * @see u_strToUpper |
99 | * @stable ICU 59 |
100 | */ |
101 | static int32_t toUpper( |
102 | const char *locale, uint32_t options, |
103 | const char16_t *src, int32_t srcLength, |
104 | char16_t *dest, int32_t destCapacity, Edits *edits, |
105 | UErrorCode &errorCode); |
106 | |
107 | #if !UCONFIG_NO_BREAK_ITERATION |
108 | |
109 | /** |
110 | * Titlecases a UTF-16 string and optionally records edits. |
111 | * Casing is locale-dependent and context-sensitive. |
112 | * The result may be longer or shorter than the original. |
113 | * The source string and the destination buffer must not overlap. |
114 | * |
115 | * Titlecasing uses a break iterator to find the first characters of words |
116 | * that are to be titlecased. It titlecases those characters and lowercases |
117 | * all others. (This can be modified with options bits.) |
118 | * |
119 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
120 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, |
121 | * U_TITLECASE_NO_LOWERCASE, |
122 | * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, |
123 | * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. |
124 | * @param iter A break iterator to find the first characters of words that are to be titlecased. |
125 | * It is set to the source string (setText()) |
126 | * and used one or more times for iteration (first() and next()). |
127 | * If NULL, then a word break iterator for the locale is used |
128 | * (or something equivalent). |
129 | * @param src The original string. |
130 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
131 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
132 | * the buffer is large enough. |
133 | * The contents is undefined in case of failure. |
134 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then |
135 | * dest may be NULL and the function will only return the length of the result |
136 | * without writing any of the result string. |
137 | * @param edits Records edits for index mapping, working with styled text, |
138 | * and getting only changes (if any). |
139 | * The Edits contents is undefined if any error occurs. |
140 | * This function calls edits->reset() first unless |
141 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
142 | * @param errorCode Reference to an in/out error code value |
143 | * which must not indicate a failure before the function call. |
144 | * @return The length of the result string, if successful. |
145 | * When the result would be longer than destCapacity, |
146 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
147 | * |
148 | * @see u_strToTitle |
149 | * @see ucasemap_toTitle |
150 | * @stable ICU 59 |
151 | */ |
152 | static int32_t toTitle( |
153 | const char *locale, uint32_t options, BreakIterator *iter, |
154 | const char16_t *src, int32_t srcLength, |
155 | char16_t *dest, int32_t destCapacity, Edits *edits, |
156 | UErrorCode &errorCode); |
157 | |
158 | #endif // UCONFIG_NO_BREAK_ITERATION |
159 | |
160 | /** |
161 | * Case-folds a UTF-16 string and optionally records edits. |
162 | * |
163 | * Case folding is locale-independent and not context-sensitive, |
164 | * but there is an option for whether to include or exclude mappings for dotted I |
165 | * and dotless i that are marked with 'T' in CaseFolding.txt. |
166 | * |
167 | * The result may be longer or shorter than the original. |
168 | * The source string and the destination buffer must not overlap. |
169 | * |
170 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, |
171 | * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. |
172 | * @param src The original string. |
173 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
174 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
175 | * the buffer is large enough. |
176 | * The contents is undefined in case of failure. |
177 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then |
178 | * dest may be NULL and the function will only return the length of the result |
179 | * without writing any of the result string. |
180 | * @param edits Records edits for index mapping, working with styled text, |
181 | * and getting only changes (if any). |
182 | * The Edits contents is undefined if any error occurs. |
183 | * This function calls edits->reset() first unless |
184 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
185 | * @param errorCode Reference to an in/out error code value |
186 | * which must not indicate a failure before the function call. |
187 | * @return The length of the result string, if successful. |
188 | * When the result would be longer than destCapacity, |
189 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
190 | * |
191 | * @see u_strFoldCase |
192 | * @stable ICU 59 |
193 | */ |
194 | static int32_t fold( |
195 | uint32_t options, |
196 | const char16_t *src, int32_t srcLength, |
197 | char16_t *dest, int32_t destCapacity, Edits *edits, |
198 | UErrorCode &errorCode); |
199 | |
200 | /** |
201 | * Lowercases a UTF-8 string and optionally records edits. |
202 | * Casing is locale-dependent and context-sensitive. |
203 | * The result may be longer or shorter than the original. |
204 | * |
205 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
206 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
207 | * @param src The original string. |
208 | * @param sink A ByteSink to which the result string is written. |
209 | * sink.Flush() is called at the end. |
210 | * @param edits Records edits for index mapping, working with styled text, |
211 | * and getting only changes (if any). |
212 | * The Edits contents is undefined if any error occurs. |
213 | * This function calls edits->reset() first unless |
214 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
215 | * @param errorCode Reference to an in/out error code value |
216 | * which must not indicate a failure before the function call. |
217 | * |
218 | * @see ucasemap_utf8ToLower |
219 | * @stable ICU 60 |
220 | */ |
221 | static void utf8ToLower( |
222 | const char *locale, uint32_t options, |
223 | StringPiece src, ByteSink &sink, Edits *edits, |
224 | UErrorCode &errorCode); |
225 | |
226 | /** |
227 | * Uppercases a UTF-8 string and optionally records edits. |
228 | * Casing is locale-dependent and context-sensitive. |
229 | * The result may be longer or shorter than the original. |
230 | * |
231 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
232 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
233 | * @param src The original string. |
234 | * @param sink A ByteSink to which the result string is written. |
235 | * sink.Flush() is called at the end. |
236 | * @param edits Records edits for index mapping, working with styled text, |
237 | * and getting only changes (if any). |
238 | * The Edits contents is undefined if any error occurs. |
239 | * This function calls edits->reset() first unless |
240 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
241 | * @param errorCode Reference to an in/out error code value |
242 | * which must not indicate a failure before the function call. |
243 | * |
244 | * @see ucasemap_utf8ToUpper |
245 | * @stable ICU 60 |
246 | */ |
247 | static void utf8ToUpper( |
248 | const char *locale, uint32_t options, |
249 | StringPiece src, ByteSink &sink, Edits *edits, |
250 | UErrorCode &errorCode); |
251 | |
252 | #if !UCONFIG_NO_BREAK_ITERATION |
253 | |
254 | /** |
255 | * Titlecases a UTF-8 string and optionally records edits. |
256 | * Casing is locale-dependent and context-sensitive. |
257 | * The result may be longer or shorter than the original. |
258 | * |
259 | * Titlecasing uses a break iterator to find the first characters of words |
260 | * that are to be titlecased. It titlecases those characters and lowercases |
261 | * all others. (This can be modified with options bits.) |
262 | * |
263 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
264 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, |
265 | * U_TITLECASE_NO_LOWERCASE, |
266 | * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, |
267 | * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. |
268 | * @param iter A break iterator to find the first characters of words that are to be titlecased. |
269 | * It is set to the source string (setUText()) |
270 | * and used one or more times for iteration (first() and next()). |
271 | * If NULL, then a word break iterator for the locale is used |
272 | * (or something equivalent). |
273 | * @param src The original string. |
274 | * @param sink A ByteSink to which the result string is written. |
275 | * sink.Flush() is called at the end. |
276 | * @param edits Records edits for index mapping, working with styled text, |
277 | * and getting only changes (if any). |
278 | * The Edits contents is undefined if any error occurs. |
279 | * This function calls edits->reset() first unless |
280 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
281 | * @param errorCode Reference to an in/out error code value |
282 | * which must not indicate a failure before the function call. |
283 | * |
284 | * @see ucasemap_utf8ToTitle |
285 | * @stable ICU 60 |
286 | */ |
287 | static void utf8ToTitle( |
288 | const char *locale, uint32_t options, BreakIterator *iter, |
289 | StringPiece src, ByteSink &sink, Edits *edits, |
290 | UErrorCode &errorCode); |
291 | |
292 | #endif // UCONFIG_NO_BREAK_ITERATION |
293 | |
294 | /** |
295 | * Case-folds a UTF-8 string and optionally records edits. |
296 | * |
297 | * Case folding is locale-independent and not context-sensitive, |
298 | * but there is an option for whether to include or exclude mappings for dotted I |
299 | * and dotless i that are marked with 'T' in CaseFolding.txt. |
300 | * |
301 | * The result may be longer or shorter than the original. |
302 | * |
303 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
304 | * @param src The original string. |
305 | * @param sink A ByteSink to which the result string is written. |
306 | * sink.Flush() is called at the end. |
307 | * @param edits Records edits for index mapping, working with styled text, |
308 | * and getting only changes (if any). |
309 | * The Edits contents is undefined if any error occurs. |
310 | * This function calls edits->reset() first unless |
311 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
312 | * @param errorCode Reference to an in/out error code value |
313 | * which must not indicate a failure before the function call. |
314 | * |
315 | * @see ucasemap_utf8FoldCase |
316 | * @stable ICU 60 |
317 | */ |
318 | static void utf8Fold( |
319 | uint32_t options, |
320 | StringPiece src, ByteSink &sink, Edits *edits, |
321 | UErrorCode &errorCode); |
322 | |
323 | /** |
324 | * Lowercases a UTF-8 string and optionally records edits. |
325 | * Casing is locale-dependent and context-sensitive. |
326 | * The result may be longer or shorter than the original. |
327 | * The source string and the destination buffer must not overlap. |
328 | * |
329 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
330 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
331 | * @param src The original string. |
332 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
333 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
334 | * the buffer is large enough. |
335 | * The contents is undefined in case of failure. |
336 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then |
337 | * dest may be NULL and the function will only return the length of the result |
338 | * without writing any of the result string. |
339 | * @param edits Records edits for index mapping, working with styled text, |
340 | * and getting only changes (if any). |
341 | * The Edits contents is undefined if any error occurs. |
342 | * This function calls edits->reset() first unless |
343 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
344 | * @param errorCode Reference to an in/out error code value |
345 | * which must not indicate a failure before the function call. |
346 | * @return The length of the result string, if successful. |
347 | * When the result would be longer than destCapacity, |
348 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
349 | * |
350 | * @see ucasemap_utf8ToLower |
351 | * @stable ICU 59 |
352 | */ |
353 | static int32_t utf8ToLower( |
354 | const char *locale, uint32_t options, |
355 | const char *src, int32_t srcLength, |
356 | char *dest, int32_t destCapacity, Edits *edits, |
357 | UErrorCode &errorCode); |
358 | |
359 | /** |
360 | * Uppercases a UTF-8 string and optionally records edits. |
361 | * Casing is locale-dependent and context-sensitive. |
362 | * The result may be longer or shorter than the original. |
363 | * The source string and the destination buffer must not overlap. |
364 | * |
365 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
366 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. |
367 | * @param src The original string. |
368 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
369 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
370 | * the buffer is large enough. |
371 | * The contents is undefined in case of failure. |
372 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then |
373 | * dest may be NULL and the function will only return the length of the result |
374 | * without writing any of the result string. |
375 | * @param edits Records edits for index mapping, working with styled text, |
376 | * and getting only changes (if any). |
377 | * The Edits contents is undefined if any error occurs. |
378 | * This function calls edits->reset() first unless |
379 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
380 | * @param errorCode Reference to an in/out error code value |
381 | * which must not indicate a failure before the function call. |
382 | * @return The length of the result string, if successful. |
383 | * When the result would be longer than destCapacity, |
384 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
385 | * |
386 | * @see ucasemap_utf8ToUpper |
387 | * @stable ICU 59 |
388 | */ |
389 | static int32_t utf8ToUpper( |
390 | const char *locale, uint32_t options, |
391 | const char *src, int32_t srcLength, |
392 | char *dest, int32_t destCapacity, Edits *edits, |
393 | UErrorCode &errorCode); |
394 | |
395 | #if !UCONFIG_NO_BREAK_ITERATION |
396 | |
397 | /** |
398 | * Titlecases a UTF-8 string and optionally records edits. |
399 | * Casing is locale-dependent and context-sensitive. |
400 | * The result may be longer or shorter than the original. |
401 | * The source string and the destination buffer must not overlap. |
402 | * |
403 | * Titlecasing uses a break iterator to find the first characters of words |
404 | * that are to be titlecased. It titlecases those characters and lowercases |
405 | * all others. (This can be modified with options bits.) |
406 | * |
407 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) |
408 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, |
409 | * U_TITLECASE_NO_LOWERCASE, |
410 | * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, |
411 | * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. |
412 | * @param iter A break iterator to find the first characters of words that are to be titlecased. |
413 | * It is set to the source string (setUText()) |
414 | * and used one or more times for iteration (first() and next()). |
415 | * If NULL, then a word break iterator for the locale is used |
416 | * (or something equivalent). |
417 | * @param src The original string. |
418 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
419 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
420 | * the buffer is large enough. |
421 | * The contents is undefined in case of failure. |
422 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then |
423 | * dest may be NULL and the function will only return the length of the result |
424 | * without writing any of the result string. |
425 | * @param edits Records edits for index mapping, working with styled text, |
426 | * and getting only changes (if any). |
427 | * The Edits contents is undefined if any error occurs. |
428 | * This function calls edits->reset() first unless |
429 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
430 | * @param errorCode Reference to an in/out error code value |
431 | * which must not indicate a failure before the function call. |
432 | * @return The length of the result string, if successful. |
433 | * When the result would be longer than destCapacity, |
434 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
435 | * |
436 | * @see ucasemap_utf8ToTitle |
437 | * @stable ICU 59 |
438 | */ |
439 | static int32_t utf8ToTitle( |
440 | const char *locale, uint32_t options, BreakIterator *iter, |
441 | const char *src, int32_t srcLength, |
442 | char *dest, int32_t destCapacity, Edits *edits, |
443 | UErrorCode &errorCode); |
444 | |
445 | #endif // UCONFIG_NO_BREAK_ITERATION |
446 | |
447 | /** |
448 | * Case-folds a UTF-8 string and optionally records edits. |
449 | * |
450 | * Case folding is locale-independent and not context-sensitive, |
451 | * but there is an option for whether to include or exclude mappings for dotted I |
452 | * and dotless i that are marked with 'T' in CaseFolding.txt. |
453 | * |
454 | * The result may be longer or shorter than the original. |
455 | * The source string and the destination buffer must not overlap. |
456 | * |
457 | * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, |
458 | * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. |
459 | * @param src The original string. |
460 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. |
461 | * @param dest A buffer for the result string. The result will be NUL-terminated if |
462 | * the buffer is large enough. |
463 | * The contents is undefined in case of failure. |
464 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then |
465 | * dest may be NULL and the function will only return the length of the result |
466 | * without writing any of the result string. |
467 | * @param edits Records edits for index mapping, working with styled text, |
468 | * and getting only changes (if any). |
469 | * The Edits contents is undefined if any error occurs. |
470 | * This function calls edits->reset() first unless |
471 | * options includes U_EDITS_NO_RESET. edits can be NULL. |
472 | * @param errorCode Reference to an in/out error code value |
473 | * which must not indicate a failure before the function call. |
474 | * @return The length of the result string, if successful. |
475 | * When the result would be longer than destCapacity, |
476 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. |
477 | * |
478 | * @see ucasemap_utf8FoldCase |
479 | * @stable ICU 59 |
480 | */ |
481 | static int32_t utf8Fold( |
482 | uint32_t options, |
483 | const char *src, int32_t srcLength, |
484 | char *dest, int32_t destCapacity, Edits *edits, |
485 | UErrorCode &errorCode); |
486 | |
487 | private: |
488 | CaseMap() = delete; |
489 | CaseMap(const CaseMap &other) = delete; |
490 | CaseMap &operator=(const CaseMap &other) = delete; |
491 | }; |
492 | |
493 | U_NAMESPACE_END |
494 | |
495 | #endif /* U_SHOW_CPLUSPLUS_API */ |
496 | |
497 | #endif // __CASEMAP_H__ |
498 | |