1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | // stringoptions.h |
5 | // created: 2017jun08 Markus W. Scherer |
6 | |
7 | #ifndef __STRINGOPTIONS_H__ |
8 | #define __STRINGOPTIONS_H__ |
9 | |
10 | #include "unicode/utypes.h" |
11 | |
12 | /** |
13 | * \file |
14 | * \brief C API: Bit set option bit constants for various string and character processing functions. |
15 | */ |
16 | |
17 | /** |
18 | * Option value for case folding: Use default mappings defined in CaseFolding.txt. |
19 | * |
20 | * @stable ICU 2.0 |
21 | */ |
22 | #define U_FOLD_CASE_DEFAULT 0 |
23 | |
24 | /** |
25 | * Option value for case folding: |
26 | * |
27 | * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I |
28 | * and dotless i appropriately for Turkic languages (tr, az). |
29 | * |
30 | * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that |
31 | * are to be included for default mappings and |
32 | * excluded for the Turkic-specific mappings. |
33 | * |
34 | * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that |
35 | * are to be excluded for default mappings and |
36 | * included for the Turkic-specific mappings. |
37 | * |
38 | * @stable ICU 2.0 |
39 | */ |
40 | #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 |
41 | |
42 | /** |
43 | * Titlecase the string as a whole rather than each word. |
44 | * (Titlecase only the character at index 0, possibly adjusted.) |
45 | * Option bits value for titlecasing APIs that take an options bit set. |
46 | * |
47 | * It is an error to specify multiple titlecasing iterator options together, |
48 | * including both an options bit and an explicit BreakIterator. |
49 | * |
50 | * @see U_TITLECASE_ADJUST_TO_CASED |
51 | * @stable ICU 60 |
52 | */ |
53 | #define U_TITLECASE_WHOLE_STRING 0x20 |
54 | |
55 | /** |
56 | * Titlecase sentences rather than words. |
57 | * (Titlecase only the first character of each sentence, possibly adjusted.) |
58 | * Option bits value for titlecasing APIs that take an options bit set. |
59 | * |
60 | * It is an error to specify multiple titlecasing iterator options together, |
61 | * including both an options bit and an explicit BreakIterator. |
62 | * |
63 | * @see U_TITLECASE_ADJUST_TO_CASED |
64 | * @stable ICU 60 |
65 | */ |
66 | #define U_TITLECASE_SENTENCES 0x40 |
67 | |
68 | /** |
69 | * Do not lowercase non-initial parts of words when titlecasing. |
70 | * Option bit for titlecasing APIs that take an options bit set. |
71 | * |
72 | * By default, titlecasing will titlecase the character at each |
73 | * (possibly adjusted) BreakIterator index and |
74 | * lowercase all other characters up to the next iterator index. |
75 | * With this option, the other characters will not be modified. |
76 | * |
77 | * @see U_TITLECASE_ADJUST_TO_CASED |
78 | * @see UnicodeString::toTitle |
79 | * @see CaseMap::toTitle |
80 | * @see ucasemap_setOptions |
81 | * @see ucasemap_toTitle |
82 | * @see ucasemap_utf8ToTitle |
83 | * @stable ICU 3.8 |
84 | */ |
85 | #define U_TITLECASE_NO_LOWERCASE 0x100 |
86 | |
87 | /** |
88 | * Do not adjust the titlecasing BreakIterator indexes; |
89 | * titlecase exactly the characters at breaks from the iterator. |
90 | * Option bit for titlecasing APIs that take an options bit set. |
91 | * |
92 | * By default, titlecasing will take each break iterator index, |
93 | * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), |
94 | * and titlecase that one. |
95 | * |
96 | * Other characters are lowercased. |
97 | * |
98 | * It is an error to specify multiple titlecasing adjustment options together. |
99 | * |
100 | * @see U_TITLECASE_ADJUST_TO_CASED |
101 | * @see U_TITLECASE_NO_LOWERCASE |
102 | * @see UnicodeString::toTitle |
103 | * @see CaseMap::toTitle |
104 | * @see ucasemap_setOptions |
105 | * @see ucasemap_toTitle |
106 | * @see ucasemap_utf8ToTitle |
107 | * @stable ICU 3.8 |
108 | */ |
109 | #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 |
110 | |
111 | /** |
112 | * Adjust each titlecasing BreakIterator index to the next cased character. |
113 | * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) |
114 | * Option bit for titlecasing APIs that take an options bit set. |
115 | * |
116 | * This used to be the default index adjustment in ICU. |
117 | * Since ICU 60, the default index adjustment is to the next character that is |
118 | * a letter, number, symbol, or private use code point. |
119 | * (Uncased modifier letters are skipped.) |
120 | * The difference in behavior is small for word titlecasing, |
121 | * but the new adjustment is much better for whole-string and sentence titlecasing: |
122 | * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". |
123 | * |
124 | * It is an error to specify multiple titlecasing adjustment options together. |
125 | * |
126 | * @see U_TITLECASE_NO_BREAK_ADJUSTMENT |
127 | * @stable ICU 60 |
128 | */ |
129 | #define U_TITLECASE_ADJUST_TO_CASED 0x400 |
130 | |
131 | /** |
132 | * Option for string transformation functions to not first reset the Edits object. |
133 | * Used for example in some case-mapping and normalization functions. |
134 | * |
135 | * @see CaseMap |
136 | * @see Edits |
137 | * @see Normalizer2 |
138 | * @stable ICU 60 |
139 | */ |
140 | #define U_EDITS_NO_RESET 0x2000 |
141 | |
142 | /** |
143 | * Omit unchanged text when recording how source substrings |
144 | * relate to changed and unchanged result substrings. |
145 | * Used for example in some case-mapping and normalization functions. |
146 | * |
147 | * @see CaseMap |
148 | * @see Edits |
149 | * @see Normalizer2 |
150 | * @stable ICU 60 |
151 | */ |
152 | #define U_OMIT_UNCHANGED_TEXT 0x4000 |
153 | |
154 | /** |
155 | * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: |
156 | * Compare strings in code point order instead of code unit order. |
157 | * @stable ICU 2.2 |
158 | */ |
159 | #define U_COMPARE_CODE_POINT_ORDER 0x8000 |
160 | |
161 | /** |
162 | * Option bit for unorm_compare: |
163 | * Perform case-insensitive comparison. |
164 | * @stable ICU 2.2 |
165 | */ |
166 | #define U_COMPARE_IGNORE_CASE 0x10000 |
167 | |
168 | /** |
169 | * Option bit for unorm_compare: |
170 | * Both input strings are assumed to fulfill FCD conditions. |
171 | * @stable ICU 2.2 |
172 | */ |
173 | #define UNORM_INPUT_IS_FCD 0x20000 |
174 | |
175 | // Related definitions elsewhere. |
176 | // Options that are not meaningful in the same functions |
177 | // can share the same bits. |
178 | // |
179 | // Public: |
180 | // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 |
181 | // |
182 | // Internal: (may change or be removed) |
183 | // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff |
184 | // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 |
185 | // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 |
186 | // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 |
187 | // ustr_imp.h #define _STRNCMP_STYLE 0x1000 |
188 | // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 |
189 | |
190 | #endif // __STRINGOPTIONS_H__ |
191 | |