| 1 | /************************************************* | 
|---|
| 2 | *      Perl-Compatible Regular Expressions       * | 
|---|
| 3 | *************************************************/ | 
|---|
| 4 |  | 
|---|
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax | 
|---|
| 6 | and semantics are as close as possible to those of the Perl 5 language. | 
|---|
| 7 |  | 
|---|
| 8 | Written by Philip Hazel | 
|---|
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge | 
|---|
| 10 | New API code Copyright (c) 2016-2022 University of Cambridge | 
|---|
| 11 |  | 
|---|
| 12 | This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! | 
|---|
| 13 | Instead, modify the maint/GenerateUcpHeader.py script and run it to generate | 
|---|
| 14 | a new version of this code. | 
|---|
| 15 |  | 
|---|
| 16 | ----------------------------------------------------------------------------- | 
|---|
| 17 | Redistribution and use in source and binary forms, with or without | 
|---|
| 18 | modification, are permitted provided that the following conditions are met: | 
|---|
| 19 |  | 
|---|
| 20 | * Redistributions of source code must retain the above copyright notice, | 
|---|
| 21 | this list of conditions and the following disclaimer. | 
|---|
| 22 |  | 
|---|
| 23 | * Redistributions in binary form must reproduce the above copyright | 
|---|
| 24 | notice, this list of conditions and the following disclaimer in the | 
|---|
| 25 | documentation and/or other materials provided with the distribution. | 
|---|
| 26 |  | 
|---|
| 27 | * Neither the name of the University of Cambridge nor the names of its | 
|---|
| 28 | contributors may be used to endorse or promote products derived from | 
|---|
| 29 | this software without specific prior written permission. | 
|---|
| 30 |  | 
|---|
| 31 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|---|
| 32 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|---|
| 33 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|---|
| 34 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|---|
| 35 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|---|
| 36 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|---|
| 37 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|---|
| 38 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|---|
| 39 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|---|
| 40 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|---|
| 41 | POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 42 | ----------------------------------------------------------------------------- | 
|---|
| 43 | */ | 
|---|
| 44 |  | 
|---|
| 45 | #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD | 
|---|
| 46 | #define PCRE2_UCP_H_IDEMPOTENT_GUARD | 
|---|
| 47 |  | 
|---|
| 48 | /* This file contains definitions of the Unicode property values that are | 
|---|
| 49 | returned by the UCD access macros and used throughout PCRE2. | 
|---|
| 50 |  | 
|---|
| 51 | IMPORTANT: The specific values of the first two enums (general and particular | 
|---|
| 52 | character categories) are assumed by the table called catposstab in the file | 
|---|
| 53 | pcre2_auto_possess.c. They are unlikely to change, but should be checked after | 
|---|
| 54 | an update. */ | 
|---|
| 55 |  | 
|---|
| 56 | /* These are the general character categories. */ | 
|---|
| 57 |  | 
|---|
| 58 | enum { | 
|---|
| 59 | ucp_C, | 
|---|
| 60 | ucp_L, | 
|---|
| 61 | ucp_M, | 
|---|
| 62 | ucp_N, | 
|---|
| 63 | ucp_P, | 
|---|
| 64 | ucp_S, | 
|---|
| 65 | ucp_Z, | 
|---|
| 66 | }; | 
|---|
| 67 |  | 
|---|
| 68 | /* These are the particular character categories. */ | 
|---|
| 69 |  | 
|---|
| 70 | enum { | 
|---|
| 71 | ucp_Cc,    /* Control */ | 
|---|
| 72 | ucp_Cf,    /* Format */ | 
|---|
| 73 | ucp_Cn,    /* Unassigned */ | 
|---|
| 74 | ucp_Co,    /* Private use */ | 
|---|
| 75 | ucp_Cs,    /* Surrogate */ | 
|---|
| 76 | ucp_Ll,    /* Lower case letter */ | 
|---|
| 77 | ucp_Lm,    /* Modifier letter */ | 
|---|
| 78 | ucp_Lo,    /* Other letter */ | 
|---|
| 79 | ucp_Lt,    /* Title case letter */ | 
|---|
| 80 | ucp_Lu,    /* Upper case letter */ | 
|---|
| 81 | ucp_Mc,    /* Spacing mark */ | 
|---|
| 82 | ucp_Me,    /* Enclosing mark */ | 
|---|
| 83 | ucp_Mn,    /* Non-spacing mark */ | 
|---|
| 84 | ucp_Nd,    /* Decimal number */ | 
|---|
| 85 | ucp_Nl,    /* Letter number */ | 
|---|
| 86 | ucp_No,    /* Other number */ | 
|---|
| 87 | ucp_Pc,    /* Connector punctuation */ | 
|---|
| 88 | ucp_Pd,    /* Dash punctuation */ | 
|---|
| 89 | ucp_Pe,    /* Close punctuation */ | 
|---|
| 90 | ucp_Pf,    /* Final punctuation */ | 
|---|
| 91 | ucp_Pi,    /* Initial punctuation */ | 
|---|
| 92 | ucp_Po,    /* Other punctuation */ | 
|---|
| 93 | ucp_Ps,    /* Open punctuation */ | 
|---|
| 94 | ucp_Sc,    /* Currency symbol */ | 
|---|
| 95 | ucp_Sk,    /* Modifier symbol */ | 
|---|
| 96 | ucp_Sm,    /* Mathematical symbol */ | 
|---|
| 97 | ucp_So,    /* Other symbol */ | 
|---|
| 98 | ucp_Zl,    /* Line separator */ | 
|---|
| 99 | ucp_Zp,    /* Paragraph separator */ | 
|---|
| 100 | ucp_Zs,    /* Space separator */ | 
|---|
| 101 | }; | 
|---|
| 102 |  | 
|---|
| 103 | /* These are Boolean properties. */ | 
|---|
| 104 |  | 
|---|
| 105 | enum { | 
|---|
| 106 | ucp_ASCII, | 
|---|
| 107 | ucp_ASCII_Hex_Digit, | 
|---|
| 108 | ucp_Alphabetic, | 
|---|
| 109 | ucp_Bidi_Control, | 
|---|
| 110 | ucp_Bidi_Mirrored, | 
|---|
| 111 | ucp_Case_Ignorable, | 
|---|
| 112 | ucp_Cased, | 
|---|
| 113 | ucp_Changes_When_Casefolded, | 
|---|
| 114 | ucp_Changes_When_Casemapped, | 
|---|
| 115 | ucp_Changes_When_Lowercased, | 
|---|
| 116 | ucp_Changes_When_Titlecased, | 
|---|
| 117 | ucp_Changes_When_Uppercased, | 
|---|
| 118 | ucp_Dash, | 
|---|
| 119 | ucp_Default_Ignorable_Code_Point, | 
|---|
| 120 | ucp_Deprecated, | 
|---|
| 121 | ucp_Diacritic, | 
|---|
| 122 | ucp_Emoji, | 
|---|
| 123 | ucp_Emoji_Component, | 
|---|
| 124 | ucp_Emoji_Modifier, | 
|---|
| 125 | ucp_Emoji_Modifier_Base, | 
|---|
| 126 | ucp_Emoji_Presentation, | 
|---|
| 127 | ucp_Extended_Pictographic, | 
|---|
| 128 | ucp_Extender, | 
|---|
| 129 | ucp_Grapheme_Base, | 
|---|
| 130 | ucp_Grapheme_Extend, | 
|---|
| 131 | ucp_Grapheme_Link, | 
|---|
| 132 | ucp_Hex_Digit, | 
|---|
| 133 | ucp_IDS_Binary_Operator, | 
|---|
| 134 | ucp_IDS_Trinary_Operator, | 
|---|
| 135 | ucp_ID_Continue, | 
|---|
| 136 | ucp_ID_Start, | 
|---|
| 137 | ucp_Ideographic, | 
|---|
| 138 | ucp_Join_Control, | 
|---|
| 139 | ucp_Logical_Order_Exception, | 
|---|
| 140 | ucp_Lowercase, | 
|---|
| 141 | ucp_Math, | 
|---|
| 142 | ucp_Noncharacter_Code_Point, | 
|---|
| 143 | ucp_Pattern_Syntax, | 
|---|
| 144 | ucp_Pattern_White_Space, | 
|---|
| 145 | ucp_Prepended_Concatenation_Mark, | 
|---|
| 146 | ucp_Quotation_Mark, | 
|---|
| 147 | ucp_Radical, | 
|---|
| 148 | ucp_Regional_Indicator, | 
|---|
| 149 | ucp_Sentence_Terminal, | 
|---|
| 150 | ucp_Soft_Dotted, | 
|---|
| 151 | ucp_Terminal_Punctuation, | 
|---|
| 152 | ucp_Unified_Ideograph, | 
|---|
| 153 | ucp_Uppercase, | 
|---|
| 154 | ucp_Variation_Selector, | 
|---|
| 155 | ucp_White_Space, | 
|---|
| 156 | ucp_XID_Continue, | 
|---|
| 157 | ucp_XID_Start, | 
|---|
| 158 | /* This must be last */ | 
|---|
| 159 | ucp_Bprop_Count | 
|---|
| 160 | }; | 
|---|
| 161 |  | 
|---|
| 162 | /* Size of entries in ucd_boolprop_sets[] */ | 
|---|
| 163 |  | 
|---|
| 164 | #define ucd_boolprop_sets_item_size 2 | 
|---|
| 165 |  | 
|---|
| 166 | /* These are the bidi class values. */ | 
|---|
| 167 |  | 
|---|
| 168 | enum { | 
|---|
| 169 | ucp_bidiAL,   /* Arabic letter */ | 
|---|
| 170 | ucp_bidiAN,   /* Arabic number */ | 
|---|
| 171 | ucp_bidiB,    /* Paragraph separator */ | 
|---|
| 172 | ucp_bidiBN,   /* Boundary neutral */ | 
|---|
| 173 | ucp_bidiCS,   /* Common separator */ | 
|---|
| 174 | ucp_bidiEN,   /* European number */ | 
|---|
| 175 | ucp_bidiES,   /* European separator */ | 
|---|
| 176 | ucp_bidiET,   /* European terminator */ | 
|---|
| 177 | ucp_bidiFSI,  /* First strong isolate */ | 
|---|
| 178 | ucp_bidiL,    /* Left to right */ | 
|---|
| 179 | ucp_bidiLRE,  /* Left to right embedding */ | 
|---|
| 180 | ucp_bidiLRI,  /* Left to right isolate */ | 
|---|
| 181 | ucp_bidiLRO,  /* Left to right override */ | 
|---|
| 182 | ucp_bidiNSM,  /* Non-spacing mark */ | 
|---|
| 183 | ucp_bidiON,   /* Other neutral */ | 
|---|
| 184 | ucp_bidiPDF,  /* Pop directional format */ | 
|---|
| 185 | ucp_bidiPDI,  /* Pop directional isolate */ | 
|---|
| 186 | ucp_bidiR,    /* Right to left */ | 
|---|
| 187 | ucp_bidiRLE,  /* Right to left embedding */ | 
|---|
| 188 | ucp_bidiRLI,  /* Right to left isolate */ | 
|---|
| 189 | ucp_bidiRLO,  /* Right to left override */ | 
|---|
| 190 | ucp_bidiS,    /* Segment separator */ | 
|---|
| 191 | ucp_bidiWS,   /* White space */ | 
|---|
| 192 | }; | 
|---|
| 193 |  | 
|---|
| 194 | /* These are grapheme break properties. The Extended Pictographic property | 
|---|
| 195 | comes from the emoji-data.txt file. */ | 
|---|
| 196 |  | 
|---|
| 197 | enum { | 
|---|
| 198 | ucp_gbCR,                    /*  0 */ | 
|---|
| 199 | ucp_gbLF,                    /*  1 */ | 
|---|
| 200 | ucp_gbControl,               /*  2 */ | 
|---|
| 201 | ucp_gbExtend,                /*  3 */ | 
|---|
| 202 | ucp_gbPrepend,               /*  4 */ | 
|---|
| 203 | ucp_gbSpacingMark,           /*  5 */ | 
|---|
| 204 | ucp_gbL,                     /*  6 Hangul syllable type L */ | 
|---|
| 205 | ucp_gbV,                     /*  7 Hangul syllable type V */ | 
|---|
| 206 | ucp_gbT,                     /*  8 Hangul syllable type T */ | 
|---|
| 207 | ucp_gbLV,                    /*  9 Hangul syllable type LV */ | 
|---|
| 208 | ucp_gbLVT,                   /* 10 Hangul syllable type LVT */ | 
|---|
| 209 | ucp_gbRegional_Indicator,    /* 11 */ | 
|---|
| 210 | ucp_gbOther,                 /* 12 */ | 
|---|
| 211 | ucp_gbZWJ,                   /* 13 */ | 
|---|
| 212 | ucp_gbExtended_Pictographic, /* 14 */ | 
|---|
| 213 | }; | 
|---|
| 214 |  | 
|---|
| 215 | /* These are the script identifications. */ | 
|---|
| 216 |  | 
|---|
| 217 | enum { | 
|---|
| 218 | /* Scripts which has characters in other scripts. */ | 
|---|
| 219 | ucp_Latin, | 
|---|
| 220 | ucp_Greek, | 
|---|
| 221 | ucp_Cyrillic, | 
|---|
| 222 | ucp_Arabic, | 
|---|
| 223 | ucp_Syriac, | 
|---|
| 224 | ucp_Thaana, | 
|---|
| 225 | ucp_Devanagari, | 
|---|
| 226 | ucp_Bengali, | 
|---|
| 227 | ucp_Gurmukhi, | 
|---|
| 228 | ucp_Gujarati, | 
|---|
| 229 | ucp_Oriya, | 
|---|
| 230 | ucp_Tamil, | 
|---|
| 231 | ucp_Telugu, | 
|---|
| 232 | ucp_Kannada, | 
|---|
| 233 | ucp_Malayalam, | 
|---|
| 234 | ucp_Sinhala, | 
|---|
| 235 | ucp_Myanmar, | 
|---|
| 236 | ucp_Georgian, | 
|---|
| 237 | ucp_Hangul, | 
|---|
| 238 | ucp_Mongolian, | 
|---|
| 239 | ucp_Hiragana, | 
|---|
| 240 | ucp_Katakana, | 
|---|
| 241 | ucp_Bopomofo, | 
|---|
| 242 | ucp_Han, | 
|---|
| 243 | ucp_Yi, | 
|---|
| 244 | ucp_Tagalog, | 
|---|
| 245 | ucp_Hanunoo, | 
|---|
| 246 | ucp_Buhid, | 
|---|
| 247 | ucp_Tagbanwa, | 
|---|
| 248 | ucp_Limbu, | 
|---|
| 249 | ucp_Tai_Le, | 
|---|
| 250 | ucp_Linear_B, | 
|---|
| 251 | ucp_Cypriot, | 
|---|
| 252 | ucp_Buginese, | 
|---|
| 253 | ucp_Coptic, | 
|---|
| 254 | ucp_Glagolitic, | 
|---|
| 255 | ucp_Syloti_Nagri, | 
|---|
| 256 | ucp_Phags_Pa, | 
|---|
| 257 | ucp_Nko, | 
|---|
| 258 | ucp_Kayah_Li, | 
|---|
| 259 | ucp_Javanese, | 
|---|
| 260 | ucp_Kaithi, | 
|---|
| 261 | ucp_Mandaic, | 
|---|
| 262 | ucp_Chakma, | 
|---|
| 263 | ucp_Sharada, | 
|---|
| 264 | ucp_Takri, | 
|---|
| 265 | ucp_Duployan, | 
|---|
| 266 | ucp_Grantha, | 
|---|
| 267 | ucp_Khojki, | 
|---|
| 268 | ucp_Linear_A, | 
|---|
| 269 | ucp_Mahajani, | 
|---|
| 270 | ucp_Manichaean, | 
|---|
| 271 | ucp_Modi, | 
|---|
| 272 | ucp_Old_Permic, | 
|---|
| 273 | ucp_Psalter_Pahlavi, | 
|---|
| 274 | ucp_Khudawadi, | 
|---|
| 275 | ucp_Tirhuta, | 
|---|
| 276 | ucp_Multani, | 
|---|
| 277 | ucp_Adlam, | 
|---|
| 278 | ucp_Masaram_Gondi, | 
|---|
| 279 | ucp_Dogra, | 
|---|
| 280 | ucp_Gunjala_Gondi, | 
|---|
| 281 | ucp_Hanifi_Rohingya, | 
|---|
| 282 | ucp_Sogdian, | 
|---|
| 283 | ucp_Nandinagari, | 
|---|
| 284 | ucp_Yezidi, | 
|---|
| 285 | ucp_Cypro_Minoan, | 
|---|
| 286 | ucp_Old_Uyghur, | 
|---|
| 287 |  | 
|---|
| 288 | /* Scripts which has no characters in other scripts. */ | 
|---|
| 289 | ucp_Unknown, | 
|---|
| 290 | ucp_Common, | 
|---|
| 291 | ucp_Armenian, | 
|---|
| 292 | ucp_Hebrew, | 
|---|
| 293 | ucp_Thai, | 
|---|
| 294 | ucp_Lao, | 
|---|
| 295 | ucp_Tibetan, | 
|---|
| 296 | ucp_Ethiopic, | 
|---|
| 297 | ucp_Cherokee, | 
|---|
| 298 | ucp_Canadian_Aboriginal, | 
|---|
| 299 | ucp_Ogham, | 
|---|
| 300 | ucp_Runic, | 
|---|
| 301 | ucp_Khmer, | 
|---|
| 302 | ucp_Old_Italic, | 
|---|
| 303 | ucp_Gothic, | 
|---|
| 304 | ucp_Deseret, | 
|---|
| 305 | ucp_Inherited, | 
|---|
| 306 | ucp_Ugaritic, | 
|---|
| 307 | ucp_Shavian, | 
|---|
| 308 | ucp_Osmanya, | 
|---|
| 309 | ucp_Braille, | 
|---|
| 310 | ucp_New_Tai_Lue, | 
|---|
| 311 | ucp_Tifinagh, | 
|---|
| 312 | ucp_Old_Persian, | 
|---|
| 313 | ucp_Kharoshthi, | 
|---|
| 314 | ucp_Balinese, | 
|---|
| 315 | ucp_Cuneiform, | 
|---|
| 316 | ucp_Phoenician, | 
|---|
| 317 | ucp_Sundanese, | 
|---|
| 318 | ucp_Lepcha, | 
|---|
| 319 | ucp_Ol_Chiki, | 
|---|
| 320 | ucp_Vai, | 
|---|
| 321 | ucp_Saurashtra, | 
|---|
| 322 | ucp_Rejang, | 
|---|
| 323 | ucp_Lycian, | 
|---|
| 324 | ucp_Carian, | 
|---|
| 325 | ucp_Lydian, | 
|---|
| 326 | ucp_Cham, | 
|---|
| 327 | ucp_Tai_Tham, | 
|---|
| 328 | ucp_Tai_Viet, | 
|---|
| 329 | ucp_Avestan, | 
|---|
| 330 | ucp_Egyptian_Hieroglyphs, | 
|---|
| 331 | ucp_Samaritan, | 
|---|
| 332 | ucp_Lisu, | 
|---|
| 333 | ucp_Bamum, | 
|---|
| 334 | ucp_Meetei_Mayek, | 
|---|
| 335 | ucp_Imperial_Aramaic, | 
|---|
| 336 | ucp_Old_South_Arabian, | 
|---|
| 337 | ucp_Inscriptional_Parthian, | 
|---|
| 338 | ucp_Inscriptional_Pahlavi, | 
|---|
| 339 | ucp_Old_Turkic, | 
|---|
| 340 | ucp_Batak, | 
|---|
| 341 | ucp_Brahmi, | 
|---|
| 342 | ucp_Meroitic_Cursive, | 
|---|
| 343 | ucp_Meroitic_Hieroglyphs, | 
|---|
| 344 | ucp_Miao, | 
|---|
| 345 | ucp_Sora_Sompeng, | 
|---|
| 346 | ucp_Caucasian_Albanian, | 
|---|
| 347 | ucp_Bassa_Vah, | 
|---|
| 348 | ucp_Elbasan, | 
|---|
| 349 | ucp_Pahawh_Hmong, | 
|---|
| 350 | ucp_Mende_Kikakui, | 
|---|
| 351 | ucp_Mro, | 
|---|
| 352 | ucp_Old_North_Arabian, | 
|---|
| 353 | ucp_Nabataean, | 
|---|
| 354 | ucp_Palmyrene, | 
|---|
| 355 | ucp_Pau_Cin_Hau, | 
|---|
| 356 | ucp_Siddham, | 
|---|
| 357 | ucp_Warang_Citi, | 
|---|
| 358 | ucp_Ahom, | 
|---|
| 359 | ucp_Anatolian_Hieroglyphs, | 
|---|
| 360 | ucp_Hatran, | 
|---|
| 361 | ucp_Old_Hungarian, | 
|---|
| 362 | ucp_SignWriting, | 
|---|
| 363 | ucp_Bhaiksuki, | 
|---|
| 364 | ucp_Marchen, | 
|---|
| 365 | ucp_Newa, | 
|---|
| 366 | ucp_Osage, | 
|---|
| 367 | ucp_Tangut, | 
|---|
| 368 | ucp_Nushu, | 
|---|
| 369 | ucp_Soyombo, | 
|---|
| 370 | ucp_Zanabazar_Square, | 
|---|
| 371 | ucp_Makasar, | 
|---|
| 372 | ucp_Medefaidrin, | 
|---|
| 373 | ucp_Old_Sogdian, | 
|---|
| 374 | ucp_Elymaic, | 
|---|
| 375 | ucp_Nyiakeng_Puachue_Hmong, | 
|---|
| 376 | ucp_Wancho, | 
|---|
| 377 | ucp_Chorasmian, | 
|---|
| 378 | ucp_Dives_Akuru, | 
|---|
| 379 | ucp_Khitan_Small_Script, | 
|---|
| 380 | ucp_Tangsa, | 
|---|
| 381 | ucp_Toto, | 
|---|
| 382 | ucp_Vithkuqi, | 
|---|
| 383 |  | 
|---|
| 384 | /* This must be last */ | 
|---|
| 385 | ucp_Script_Count | 
|---|
| 386 | }; | 
|---|
| 387 |  | 
|---|
| 388 | /* Size of entries in ucd_script_sets[] */ | 
|---|
| 389 |  | 
|---|
| 390 | #define ucd_script_sets_item_size 3 | 
|---|
| 391 |  | 
|---|
| 392 | #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ | 
|---|
| 393 |  | 
|---|
| 394 | /* End of pcre2_ucp.h */ | 
|---|
| 395 |  | 
|---|