| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2004-2012, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ******************************************************************************* | 
|---|
| 10 | *   file name:  ucase.h | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2004aug30 | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | * | 
|---|
| 18 | *   Low-level Unicode character/string case mapping code. | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | #ifndef __UCASE_H__ | 
|---|
| 22 | #define __UCASE_H__ | 
|---|
| 23 |  | 
|---|
| 24 | #include "unicode/utypes.h" | 
|---|
| 25 | #include "unicode/uset.h" | 
|---|
| 26 | #include "putilimp.h" | 
|---|
| 27 | #include "uset_imp.h" | 
|---|
| 28 | #include "udataswp.h" | 
|---|
| 29 | #include "utrie2.h" | 
|---|
| 30 |  | 
|---|
| 31 | #ifdef __cplusplus | 
|---|
| 32 | U_NAMESPACE_BEGIN | 
|---|
| 33 |  | 
|---|
| 34 | class UnicodeString; | 
|---|
| 35 |  | 
|---|
| 36 | U_NAMESPACE_END | 
|---|
| 37 | #endif | 
|---|
| 38 |  | 
|---|
| 39 | /* library API -------------------------------------------------------------- */ | 
|---|
| 40 |  | 
|---|
| 41 | U_CFUNC void U_EXPORT2 | 
|---|
| 42 | ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); | 
|---|
| 43 |  | 
|---|
| 44 | /** | 
|---|
| 45 | * Requires non-NULL locale ID but otherwise does the equivalent of | 
|---|
| 46 | * checking for language codes as if uloc_getLanguage() were called: | 
|---|
| 47 | * Accepts both 2- and 3-letter codes and accepts case variants. | 
|---|
| 48 | */ | 
|---|
| 49 | U_CFUNC int32_t | 
|---|
| 50 | ucase_getCaseLocale(const char *locale); | 
|---|
| 51 |  | 
|---|
| 52 | /* Casing locale types for ucase_getCaseLocale */ | 
|---|
| 53 | enum { | 
|---|
| 54 | UCASE_LOC_UNKNOWN, | 
|---|
| 55 | UCASE_LOC_ROOT, | 
|---|
| 56 | UCASE_LOC_TURKISH, | 
|---|
| 57 | UCASE_LOC_LITHUANIAN, | 
|---|
| 58 | UCASE_LOC_GREEK, | 
|---|
| 59 | UCASE_LOC_DUTCH | 
|---|
| 60 | }; | 
|---|
| 61 |  | 
|---|
| 62 | /** | 
|---|
| 63 | * Bit mask for getting just the options from a string compare options word | 
|---|
| 64 | * that are relevant for case-insensitive string comparison. | 
|---|
| 65 | * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. | 
|---|
| 66 | * @internal | 
|---|
| 67 | */ | 
|---|
| 68 | #define _STRCASECMP_OPTIONS_MASK 0xffff | 
|---|
| 69 |  | 
|---|
| 70 | /** | 
|---|
| 71 | * Bit mask for getting just the options from a string compare options word | 
|---|
| 72 | * that are relevant for case folding (of a single string or code point). | 
|---|
| 73 | * | 
|---|
| 74 | * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I. | 
|---|
| 75 | * It is conceivable that at some point we might use one more bit for using uppercase sharp s. | 
|---|
| 76 | * It is conceivable that at some point we might want the option to use only simple case foldings | 
|---|
| 77 | * when operating on strings. | 
|---|
| 78 | * | 
|---|
| 79 | * See stringoptions.h. | 
|---|
| 80 | * @internal | 
|---|
| 81 | */ | 
|---|
| 82 | #define _FOLD_CASE_OPTIONS_MASK 7 | 
|---|
| 83 |  | 
|---|
| 84 | /* single-code point functions */ | 
|---|
| 85 |  | 
|---|
| 86 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 87 | ucase_tolower(UChar32 c); | 
|---|
| 88 |  | 
|---|
| 89 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 90 | ucase_toupper(UChar32 c); | 
|---|
| 91 |  | 
|---|
| 92 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 93 | ucase_totitle(UChar32 c); | 
|---|
| 94 |  | 
|---|
| 95 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 96 | ucase_fold(UChar32 c, uint32_t options); | 
|---|
| 97 |  | 
|---|
| 98 | /** | 
|---|
| 99 | * Adds all simple case mappings and the full case folding for c to sa, | 
|---|
| 100 | * and also adds special case closure mappings. | 
|---|
| 101 | * c itself is not added. | 
|---|
| 102 | * For example, the mappings | 
|---|
| 103 | * - for s include long s | 
|---|
| 104 | * - for sharp s include ss | 
|---|
| 105 | * - for k include the Kelvin sign | 
|---|
| 106 | */ | 
|---|
| 107 | U_CFUNC void U_EXPORT2 | 
|---|
| 108 | ucase_addCaseClosure(UChar32 c, const USetAdder *sa); | 
|---|
| 109 |  | 
|---|
| 110 | /** | 
|---|
| 111 | * Maps the string to single code points and adds the associated case closure | 
|---|
| 112 | * mappings. | 
|---|
| 113 | * The string is mapped to code points if it is their full case folding string. | 
|---|
| 114 | * In other words, this performs a reverse full case folding and then | 
|---|
| 115 | * adds the case closure items of the resulting code points. | 
|---|
| 116 | * If the string is found and its closure applied, then | 
|---|
| 117 | * the string itself is added as well as part of its code points' closure. | 
|---|
| 118 | * It must be length>=0. | 
|---|
| 119 | * | 
|---|
| 120 | * @return TRUE if the string was found | 
|---|
| 121 | */ | 
|---|
| 122 | U_CFUNC UBool U_EXPORT2 | 
|---|
| 123 | ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa); | 
|---|
| 124 |  | 
|---|
| 125 | #ifdef __cplusplus | 
|---|
| 126 | U_NAMESPACE_BEGIN | 
|---|
| 127 |  | 
|---|
| 128 | /** | 
|---|
| 129 | * Iterator over characters with more than one code point in the full default Case_Folding. | 
|---|
| 130 | */ | 
|---|
| 131 | class U_COMMON_API FullCaseFoldingIterator { | 
|---|
| 132 | public: | 
|---|
| 133 | /** Constructor. */ | 
|---|
| 134 | FullCaseFoldingIterator(); | 
|---|
| 135 | /** | 
|---|
| 136 | * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding. | 
|---|
| 137 | * Returns a negative cp value at the end of the iteration. | 
|---|
| 138 | */ | 
|---|
| 139 | UChar32 next(UnicodeString &full); | 
|---|
| 140 | private: | 
|---|
| 141 | FullCaseFoldingIterator(const FullCaseFoldingIterator &);  // no copy | 
|---|
| 142 | FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &);  // no assignment | 
|---|
| 143 |  | 
|---|
| 144 | const UChar *unfold; | 
|---|
| 145 | int32_t unfoldRows; | 
|---|
| 146 | int32_t unfoldRowWidth; | 
|---|
| 147 | int32_t unfoldStringWidth; | 
|---|
| 148 | int32_t currentRow; | 
|---|
| 149 | int32_t rowCpIndex; | 
|---|
| 150 | }; | 
|---|
| 151 |  | 
|---|
| 152 | /** | 
|---|
| 153 | * Fast case mapping data for ASCII/Latin. | 
|---|
| 154 | * Linear arrays of delta bytes: 0=no mapping; EXC=exception. | 
|---|
| 155 | * Deltas must not cross the ASCII boundary, or else they cannot be easily used | 
|---|
| 156 | * in simple UTF-8 code. | 
|---|
| 157 | */ | 
|---|
| 158 | namespace LatinCase { | 
|---|
| 159 |  | 
|---|
| 160 | /** Case mapping/folding data for code points up to U+017F. */ | 
|---|
| 161 | constexpr UChar LIMIT = 0x180; | 
|---|
| 162 | /** U+017F case-folds and uppercases crossing the ASCII boundary. */ | 
|---|
| 163 | constexpr UChar LONG_S = 0x17f; | 
|---|
| 164 | /** Exception: Complex mapping, or too-large delta. */ | 
|---|
| 165 | constexpr int8_t EXC = -0x80; | 
|---|
| 166 |  | 
|---|
| 167 | /** Deltas for lowercasing for most locales, and default case folding. */ | 
|---|
| 168 | extern const int8_t TO_LOWER_NORMAL[LIMIT]; | 
|---|
| 169 | /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ | 
|---|
| 170 | extern const int8_t TO_LOWER_TR_LT[LIMIT]; | 
|---|
| 171 |  | 
|---|
| 172 | /** Deltas for uppercasing for most locales. */ | 
|---|
| 173 | extern const int8_t TO_UPPER_NORMAL[LIMIT]; | 
|---|
| 174 | /** Deltas for uppercasing for tr/az. */ | 
|---|
| 175 | extern const int8_t TO_UPPER_TR[LIMIT]; | 
|---|
| 176 |  | 
|---|
| 177 | }  // namespace LatinCase | 
|---|
| 178 |  | 
|---|
| 179 | U_NAMESPACE_END | 
|---|
| 180 | #endif | 
|---|
| 181 |  | 
|---|
| 182 | /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ | 
|---|
| 183 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 184 | ucase_getType(UChar32 c); | 
|---|
| 185 |  | 
|---|
| 186 | /** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */ | 
|---|
| 187 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 188 | ucase_getTypeOrIgnorable(UChar32 c); | 
|---|
| 189 |  | 
|---|
| 190 | U_CAPI UBool U_EXPORT2 | 
|---|
| 191 | ucase_isSoftDotted(UChar32 c); | 
|---|
| 192 |  | 
|---|
| 193 | U_CAPI UBool U_EXPORT2 | 
|---|
| 194 | ucase_isCaseSensitive(UChar32 c); | 
|---|
| 195 |  | 
|---|
| 196 | /* string case mapping functions */ | 
|---|
| 197 |  | 
|---|
| 198 | U_CDECL_BEGIN | 
|---|
| 199 |  | 
|---|
| 200 | /** | 
|---|
| 201 | * Iterator function for string case mappings, which need to look at the | 
|---|
| 202 | * context (surrounding text) of a given character for conditional mappings. | 
|---|
| 203 | * | 
|---|
| 204 | * The iterator only needs to go backward or forward away from the | 
|---|
| 205 | * character in question. It does not use any indexes on this interface. | 
|---|
| 206 | * It does not support random access or an arbitrary change of | 
|---|
| 207 | * iteration direction. | 
|---|
| 208 | * | 
|---|
| 209 | * The code point being case-mapped itself is never returned by | 
|---|
| 210 | * this iterator. | 
|---|
| 211 | * | 
|---|
| 212 | * @param context A pointer to the iterator's working data. | 
|---|
| 213 | * @param dir If <0 then start iterating backward from the character; | 
|---|
| 214 | *            if >0 then start iterating forward from the character; | 
|---|
| 215 | *            if 0 then continue iterating in the current direction. | 
|---|
| 216 | * @return Next code point, or <0 when the iteration is done. | 
|---|
| 217 | */ | 
|---|
| 218 | typedef UChar32 U_CALLCONV | 
|---|
| 219 | UCaseContextIterator(void *context, int8_t dir); | 
|---|
| 220 |  | 
|---|
| 221 | /** | 
|---|
| 222 | * Sample struct which may be used by some implementations of | 
|---|
| 223 | * UCaseContextIterator. | 
|---|
| 224 | */ | 
|---|
| 225 | struct UCaseContext { | 
|---|
| 226 | void *p; | 
|---|
| 227 | int32_t start, index, limit; | 
|---|
| 228 | int32_t cpStart, cpLimit; | 
|---|
| 229 | int8_t dir; | 
|---|
| 230 | int8_t b1, b2, b3; | 
|---|
| 231 | }; | 
|---|
| 232 | typedef struct UCaseContext UCaseContext; | 
|---|
| 233 |  | 
|---|
| 234 | U_CDECL_END | 
|---|
| 235 |  | 
|---|
| 236 | #define UCASECONTEXT_INITIALIZER { NULL,  0, 0, 0,  0, 0,  0,  0, 0, 0 } | 
|---|
| 237 |  | 
|---|
| 238 | enum { | 
|---|
| 239 | /** | 
|---|
| 240 | * For string case mappings, a single character (a code point) is mapped | 
|---|
| 241 | * either to itself (in which case in-place mapping functions do nothing), | 
|---|
| 242 | * or to another single code point, or to a string. | 
|---|
| 243 | * Aside from the string contents, these are indicated with a single int32_t | 
|---|
| 244 | * value as follows: | 
|---|
| 245 | * | 
|---|
| 246 | * Mapping to self: Negative values (~self instead of -self to support U+0000) | 
|---|
| 247 | * | 
|---|
| 248 | * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH | 
|---|
| 249 | * | 
|---|
| 250 | * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is | 
|---|
| 251 | * returned. Note that the string result may indeed have zero length. | 
|---|
| 252 | */ | 
|---|
| 253 | UCASE_MAX_STRING_LENGTH=0x1f | 
|---|
| 254 | }; | 
|---|
| 255 |  | 
|---|
| 256 | /** | 
|---|
| 257 | * Get the full lowercase mapping for c. | 
|---|
| 258 | * | 
|---|
| 259 | * @param csp Case mapping properties. | 
|---|
| 260 | * @param c Character to be mapped. | 
|---|
| 261 | * @param iter Character iterator, used for context-sensitive mappings. | 
|---|
| 262 | *             See UCaseContextIterator for details. | 
|---|
| 263 | *             If iter==NULL then a context-independent result is returned. | 
|---|
| 264 | * @param context Pointer to be passed into iter. | 
|---|
| 265 | * @param pString If the mapping result is a string, then the pointer is | 
|---|
| 266 | *                written to *pString. | 
|---|
| 267 | * @param caseLocale Case locale value from ucase_getCaseLocale(). | 
|---|
| 268 | * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. | 
|---|
| 269 | * | 
|---|
| 270 | * @see UCaseContextIterator | 
|---|
| 271 | * @see UCASE_MAX_STRING_LENGTH | 
|---|
| 272 | * @internal | 
|---|
| 273 | */ | 
|---|
| 274 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 275 | ucase_toFullLower(UChar32 c, | 
|---|
| 276 | UCaseContextIterator *iter, void *context, | 
|---|
| 277 | const UChar **pString, | 
|---|
| 278 | int32_t caseLocale); | 
|---|
| 279 |  | 
|---|
| 280 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 281 | ucase_toFullUpper(UChar32 c, | 
|---|
| 282 | UCaseContextIterator *iter, void *context, | 
|---|
| 283 | const UChar **pString, | 
|---|
| 284 | int32_t caseLocale); | 
|---|
| 285 |  | 
|---|
| 286 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 287 | ucase_toFullTitle(UChar32 c, | 
|---|
| 288 | UCaseContextIterator *iter, void *context, | 
|---|
| 289 | const UChar **pString, | 
|---|
| 290 | int32_t caseLocale); | 
|---|
| 291 |  | 
|---|
| 292 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 293 | ucase_toFullFolding(UChar32 c, | 
|---|
| 294 | const UChar **pString, | 
|---|
| 295 | uint32_t options); | 
|---|
| 296 |  | 
|---|
| 297 | U_CFUNC int32_t U_EXPORT2 | 
|---|
| 298 | ucase_hasBinaryProperty(UChar32 c, UProperty which); | 
|---|
| 299 |  | 
|---|
| 300 |  | 
|---|
| 301 | U_CDECL_BEGIN | 
|---|
| 302 |  | 
|---|
| 303 | /** | 
|---|
| 304 | * @internal | 
|---|
| 305 | */ | 
|---|
| 306 | typedef int32_t U_CALLCONV | 
|---|
| 307 | UCaseMapFull(UChar32 c, | 
|---|
| 308 | UCaseContextIterator *iter, void *context, | 
|---|
| 309 | const UChar **pString, | 
|---|
| 310 | int32_t caseLocale); | 
|---|
| 311 |  | 
|---|
| 312 | U_CDECL_END | 
|---|
| 313 |  | 
|---|
| 314 | /* file definitions --------------------------------------------------------- */ | 
|---|
| 315 |  | 
|---|
| 316 | #define UCASE_DATA_NAME "ucase" | 
|---|
| 317 | #define UCASE_DATA_TYPE "icu" | 
|---|
| 318 |  | 
|---|
| 319 | /* format "cAsE" */ | 
|---|
| 320 | #define UCASE_FMT_0 0x63 | 
|---|
| 321 | #define UCASE_FMT_1 0x41 | 
|---|
| 322 | #define UCASE_FMT_2 0x53 | 
|---|
| 323 | #define UCASE_FMT_3 0x45 | 
|---|
| 324 |  | 
|---|
| 325 | /* indexes into indexes[] */ | 
|---|
| 326 | enum { | 
|---|
| 327 | UCASE_IX_INDEX_TOP, | 
|---|
| 328 | UCASE_IX_LENGTH, | 
|---|
| 329 | UCASE_IX_TRIE_SIZE, | 
|---|
| 330 | UCASE_IX_EXC_LENGTH, | 
|---|
| 331 | UCASE_IX_UNFOLD_LENGTH, | 
|---|
| 332 |  | 
|---|
| 333 | UCASE_IX_MAX_FULL_LENGTH=15, | 
|---|
| 334 | UCASE_IX_TOP=16 | 
|---|
| 335 | }; | 
|---|
| 336 |  | 
|---|
| 337 | /* definitions for 16-bit case properties word ------------------------------ */ | 
|---|
| 338 |  | 
|---|
| 339 | U_CFUNC const UTrie2 * U_EXPORT2 | 
|---|
| 340 | ucase_getTrie(); | 
|---|
| 341 |  | 
|---|
| 342 | /* 2-bit constants for types of cased characters */ | 
|---|
| 343 | #define UCASE_TYPE_MASK     3 | 
|---|
| 344 | enum { | 
|---|
| 345 | UCASE_NONE, | 
|---|
| 346 | UCASE_LOWER, | 
|---|
| 347 | UCASE_UPPER, | 
|---|
| 348 | UCASE_TITLE | 
|---|
| 349 | }; | 
|---|
| 350 |  | 
|---|
| 351 | #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) | 
|---|
| 352 | #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) | 
|---|
| 353 |  | 
|---|
| 354 | #define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) | 
|---|
| 355 |  | 
|---|
| 356 | #define UCASE_IGNORABLE         4 | 
|---|
| 357 | #define UCASE_EXCEPTION         8 | 
|---|
| 358 | #define UCASE_SENSITIVE         0x10 | 
|---|
| 359 |  | 
|---|
| 360 | #define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) | 
|---|
| 361 |  | 
|---|
| 362 | #define UCASE_DOT_MASK      0x60 | 
|---|
| 363 | enum { | 
|---|
| 364 | UCASE_NO_DOT=0,         /* normal characters with cc=0 */ | 
|---|
| 365 | UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */ | 
|---|
| 366 | UCASE_ABOVE=0x40,       /* "above" accents with cc=230 */ | 
|---|
| 367 | UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */ | 
|---|
| 368 | }; | 
|---|
| 369 |  | 
|---|
| 370 | /* no exception: bits 15..7 are a 9-bit signed case mapping delta */ | 
|---|
| 371 | #define UCASE_DELTA_SHIFT   7 | 
|---|
| 372 | #define UCASE_DELTA_MASK    0xff80 | 
|---|
| 373 | #define UCASE_MAX_DELTA     0xff | 
|---|
| 374 | #define UCASE_MIN_DELTA     (-UCASE_MAX_DELTA-1) | 
|---|
| 375 |  | 
|---|
| 376 | #if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC | 
|---|
| 377 | #   define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT) | 
|---|
| 378 | #else | 
|---|
| 379 | #   define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) | 
|---|
| 380 | #endif | 
|---|
| 381 |  | 
|---|
| 382 | /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ | 
|---|
| 383 | #define UCASE_EXC_SHIFT     4 | 
|---|
| 384 | #define UCASE_EXC_MASK      0xfff0 | 
|---|
| 385 | #define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1) | 
|---|
| 386 |  | 
|---|
| 387 | /* definitions for 16-bit main exceptions word ------------------------------ */ | 
|---|
| 388 |  | 
|---|
| 389 | /* first 8 bits indicate values in optional slots */ | 
|---|
| 390 | enum { | 
|---|
| 391 | UCASE_EXC_LOWER, | 
|---|
| 392 | UCASE_EXC_FOLD, | 
|---|
| 393 | UCASE_EXC_UPPER, | 
|---|
| 394 | UCASE_EXC_TITLE, | 
|---|
| 395 | UCASE_EXC_DELTA, | 
|---|
| 396 | UCASE_EXC_5,            /* reserved */ | 
|---|
| 397 | UCASE_EXC_CLOSURE, | 
|---|
| 398 | UCASE_EXC_FULL_MAPPINGS, | 
|---|
| 399 | UCASE_EXC_ALL_SLOTS     /* one past the last slot */ | 
|---|
| 400 | }; | 
|---|
| 401 |  | 
|---|
| 402 | /* each slot is 2 uint16_t instead of 1 */ | 
|---|
| 403 | #define UCASE_EXC_DOUBLE_SLOTS      0x100 | 
|---|
| 404 |  | 
|---|
| 405 | enum { | 
|---|
| 406 | UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200, | 
|---|
| 407 | UCASE_EXC_DELTA_IS_NEGATIVE=0x400, | 
|---|
| 408 | UCASE_EXC_SENSITIVE=0x800 | 
|---|
| 409 | }; | 
|---|
| 410 |  | 
|---|
| 411 | /* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */ | 
|---|
| 412 | #define UCASE_EXC_DOT_SHIFT     7 | 
|---|
| 413 |  | 
|---|
| 414 | /* normally stored in the main word, but pushed out for larger exception indexes */ | 
|---|
| 415 | #define UCASE_EXC_DOT_MASK      0x3000 | 
|---|
| 416 | enum { | 
|---|
| 417 | UCASE_EXC_NO_DOT=0, | 
|---|
| 418 | UCASE_EXC_SOFT_DOTTED=0x1000, | 
|---|
| 419 | UCASE_EXC_ABOVE=0x2000,         /* "above" accents with cc=230 */ | 
|---|
| 420 | UCASE_EXC_OTHER_ACCENT=0x3000   /* other character (0<cc!=230) */ | 
|---|
| 421 | }; | 
|---|
| 422 |  | 
|---|
| 423 | /* complex/conditional mappings */ | 
|---|
| 424 | #define UCASE_EXC_CONDITIONAL_SPECIAL   0x4000 | 
|---|
| 425 | #define UCASE_EXC_CONDITIONAL_FOLD      0x8000 | 
|---|
| 426 |  | 
|---|
| 427 | /* definitions for lengths word for full case mappings */ | 
|---|
| 428 | #define UCASE_FULL_LOWER    0xf | 
|---|
| 429 | #define UCASE_FULL_FOLDING  0xf0 | 
|---|
| 430 | #define UCASE_FULL_UPPER    0xf00 | 
|---|
| 431 | #define UCASE_FULL_TITLE    0xf000 | 
|---|
| 432 |  | 
|---|
| 433 | /* maximum lengths */ | 
|---|
| 434 | #define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf) | 
|---|
| 435 | #define UCASE_CLOSURE_MAX_LENGTH 0xf | 
|---|
| 436 |  | 
|---|
| 437 | /* constants for reverse case folding ("unfold") data */ | 
|---|
| 438 | enum { | 
|---|
| 439 | UCASE_UNFOLD_ROWS, | 
|---|
| 440 | UCASE_UNFOLD_ROW_WIDTH, | 
|---|
| 441 | UCASE_UNFOLD_STRING_WIDTH | 
|---|
| 442 | }; | 
|---|
| 443 |  | 
|---|
| 444 | #endif | 
|---|
| 445 |  | 
|---|