| 1 | // © 2017 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | // stringoptions.h |
| 5 | // created: 2017jun08 Markus W. Scherer |
| 6 | |
| 7 | #ifndef __STRINGOPTIONS_H__ |
| 8 | #define __STRINGOPTIONS_H__ |
| 9 | |
| 10 | #include "unicode/utypes.h" |
| 11 | |
| 12 | /** |
| 13 | * \file |
| 14 | * \brief C API: Bit set option bit constants for various string and character processing functions. |
| 15 | */ |
| 16 | |
| 17 | /** |
| 18 | * Option value for case folding: Use default mappings defined in CaseFolding.txt. |
| 19 | * |
| 20 | * @stable ICU 2.0 |
| 21 | */ |
| 22 | #define U_FOLD_CASE_DEFAULT 0 |
| 23 | |
| 24 | /** |
| 25 | * Option value for case folding: |
| 26 | * |
| 27 | * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I |
| 28 | * and dotless i appropriately for Turkic languages (tr, az). |
| 29 | * |
| 30 | * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that |
| 31 | * are to be included for default mappings and |
| 32 | * excluded for the Turkic-specific mappings. |
| 33 | * |
| 34 | * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that |
| 35 | * are to be excluded for default mappings and |
| 36 | * included for the Turkic-specific mappings. |
| 37 | * |
| 38 | * @stable ICU 2.0 |
| 39 | */ |
| 40 | #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 |
| 41 | |
| 42 | #ifndef U_HIDE_DRAFT_API |
| 43 | |
| 44 | /** |
| 45 | * Titlecase the string as a whole rather than each word. |
| 46 | * (Titlecase only the character at index 0, possibly adjusted.) |
| 47 | * Option bits value for titlecasing APIs that take an options bit set. |
| 48 | * |
| 49 | * It is an error to specify multiple titlecasing iterator options together, |
| 50 | * including both an options bit and an explicit BreakIterator. |
| 51 | * |
| 52 | * @see U_TITLECASE_ADJUST_TO_CASED |
| 53 | * @draft ICU 60 |
| 54 | */ |
| 55 | #define U_TITLECASE_WHOLE_STRING 0x20 |
| 56 | |
| 57 | /** |
| 58 | * Titlecase sentences rather than words. |
| 59 | * (Titlecase only the first character of each sentence, possibly adjusted.) |
| 60 | * Option bits value for titlecasing APIs that take an options bit set. |
| 61 | * |
| 62 | * It is an error to specify multiple titlecasing iterator options together, |
| 63 | * including both an options bit and an explicit BreakIterator. |
| 64 | * |
| 65 | * @see U_TITLECASE_ADJUST_TO_CASED |
| 66 | * @draft ICU 60 |
| 67 | */ |
| 68 | #define U_TITLECASE_SENTENCES 0x40 |
| 69 | |
| 70 | #endif // U_HIDE_DRAFT_API |
| 71 | |
| 72 | /** |
| 73 | * Do not lowercase non-initial parts of words when titlecasing. |
| 74 | * Option bit for titlecasing APIs that take an options bit set. |
| 75 | * |
| 76 | * By default, titlecasing will titlecase the character at each |
| 77 | * (possibly adjusted) BreakIterator index and |
| 78 | * lowercase all other characters up to the next iterator index. |
| 79 | * With this option, the other characters will not be modified. |
| 80 | * |
| 81 | * @see U_TITLECASE_ADJUST_TO_CASED |
| 82 | * @see UnicodeString::toTitle |
| 83 | * @see CaseMap::toTitle |
| 84 | * @see ucasemap_setOptions |
| 85 | * @see ucasemap_toTitle |
| 86 | * @see ucasemap_utf8ToTitle |
| 87 | * @stable ICU 3.8 |
| 88 | */ |
| 89 | #define U_TITLECASE_NO_LOWERCASE 0x100 |
| 90 | |
| 91 | /** |
| 92 | * Do not adjust the titlecasing BreakIterator indexes; |
| 93 | * titlecase exactly the characters at breaks from the iterator. |
| 94 | * Option bit for titlecasing APIs that take an options bit set. |
| 95 | * |
| 96 | * By default, titlecasing will take each break iterator index, |
| 97 | * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), |
| 98 | * and titlecase that one. |
| 99 | * |
| 100 | * Other characters are lowercased. |
| 101 | * |
| 102 | * It is an error to specify multiple titlecasing adjustment options together. |
| 103 | * |
| 104 | * @see U_TITLECASE_ADJUST_TO_CASED |
| 105 | * @see U_TITLECASE_NO_LOWERCASE |
| 106 | * @see UnicodeString::toTitle |
| 107 | * @see CaseMap::toTitle |
| 108 | * @see ucasemap_setOptions |
| 109 | * @see ucasemap_toTitle |
| 110 | * @see ucasemap_utf8ToTitle |
| 111 | * @stable ICU 3.8 |
| 112 | */ |
| 113 | #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 |
| 114 | |
| 115 | #ifndef U_HIDE_DRAFT_API |
| 116 | |
| 117 | /** |
| 118 | * Adjust each titlecasing BreakIterator index to the next cased character. |
| 119 | * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) |
| 120 | * Option bit for titlecasing APIs that take an options bit set. |
| 121 | * |
| 122 | * This used to be the default index adjustment in ICU. |
| 123 | * Since ICU 60, the default index adjustment is to the next character that is |
| 124 | * a letter, number, symbol, or private use code point. |
| 125 | * (Uncased modifier letters are skipped.) |
| 126 | * The difference in behavior is small for word titlecasing, |
| 127 | * but the new adjustment is much better for whole-string and sentence titlecasing: |
| 128 | * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". |
| 129 | * |
| 130 | * It is an error to specify multiple titlecasing adjustment options together. |
| 131 | * |
| 132 | * @see U_TITLECASE_NO_BREAK_ADJUSTMENT |
| 133 | * @draft ICU 60 |
| 134 | */ |
| 135 | #define U_TITLECASE_ADJUST_TO_CASED 0x400 |
| 136 | |
| 137 | /** |
| 138 | * Option for string transformation functions to not first reset the Edits object. |
| 139 | * Used for example in some case-mapping and normalization functions. |
| 140 | * |
| 141 | * @see CaseMap |
| 142 | * @see Edits |
| 143 | * @see Normalizer2 |
| 144 | * @draft ICU 60 |
| 145 | */ |
| 146 | #define U_EDITS_NO_RESET 0x2000 |
| 147 | |
| 148 | /** |
| 149 | * Omit unchanged text when recording how source substrings |
| 150 | * relate to changed and unchanged result substrings. |
| 151 | * Used for example in some case-mapping and normalization functions. |
| 152 | * |
| 153 | * @see CaseMap |
| 154 | * @see Edits |
| 155 | * @see Normalizer2 |
| 156 | * @draft ICU 60 |
| 157 | */ |
| 158 | #define U_OMIT_UNCHANGED_TEXT 0x4000 |
| 159 | |
| 160 | #endif // U_HIDE_DRAFT_API |
| 161 | |
| 162 | /** |
| 163 | * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: |
| 164 | * Compare strings in code point order instead of code unit order. |
| 165 | * @stable ICU 2.2 |
| 166 | */ |
| 167 | #define U_COMPARE_CODE_POINT_ORDER 0x8000 |
| 168 | |
| 169 | /** |
| 170 | * Option bit for unorm_compare: |
| 171 | * Perform case-insensitive comparison. |
| 172 | * @stable ICU 2.2 |
| 173 | */ |
| 174 | #define U_COMPARE_IGNORE_CASE 0x10000 |
| 175 | |
| 176 | /** |
| 177 | * Option bit for unorm_compare: |
| 178 | * Both input strings are assumed to fulfill FCD conditions. |
| 179 | * @stable ICU 2.2 |
| 180 | */ |
| 181 | #define UNORM_INPUT_IS_FCD 0x20000 |
| 182 | |
| 183 | // Related definitions elsewhere. |
| 184 | // Options that are not meaningful in the same functions |
| 185 | // can share the same bits. |
| 186 | // |
| 187 | // Public: |
| 188 | // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 |
| 189 | // |
| 190 | // Internal: (may change or be removed) |
| 191 | // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff |
| 192 | // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 |
| 193 | // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 |
| 194 | // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 |
| 195 | // ustr_imp.h #define _STRNCMP_STYLE 0x1000 |
| 196 | // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 |
| 197 | |
| 198 | #endif // __STRINGOPTIONS_H__ |
| 199 | |