| 1 | /************************************************* | 
|---|
| 2 | *          Unicode Property Table handler        * | 
|---|
| 3 | *************************************************/ | 
|---|
| 4 |  | 
|---|
| 5 | #ifndef _UCP_H | 
|---|
| 6 | #define _UCP_H | 
|---|
| 7 |  | 
|---|
| 8 | /* This file contains definitions of the property values that are returned by | 
|---|
| 9 | the UCD access macros. New values that are added for new releases of Unicode | 
|---|
| 10 | should always be at the end of each enum, for backwards compatibility. | 
|---|
| 11 |  | 
|---|
| 12 | IMPORTANT: Note also that the specific numeric values of the enums have to be | 
|---|
| 13 | the same as the values that are generated by the maint/MultiStage2.py script, | 
|---|
| 14 | where the equivalent property descriptive names are listed in vectors. | 
|---|
| 15 |  | 
|---|
| 16 | ALSO: The specific values of the first two enums are assumed for the table | 
|---|
| 17 | called catposstab in pcre_compile.c. */ | 
|---|
| 18 |  | 
|---|
| 19 | /* These are the general character categories. */ | 
|---|
| 20 |  | 
|---|
| 21 | enum { | 
|---|
| 22 | ucp_C,     /* Other */ | 
|---|
| 23 | ucp_L,     /* Letter */ | 
|---|
| 24 | ucp_M,     /* Mark */ | 
|---|
| 25 | ucp_N,     /* Number */ | 
|---|
| 26 | ucp_P,     /* Punctuation */ | 
|---|
| 27 | ucp_S,     /* Symbol */ | 
|---|
| 28 | ucp_Z      /* Separator */ | 
|---|
| 29 | }; | 
|---|
| 30 |  | 
|---|
| 31 | /* These are the particular character categories. */ | 
|---|
| 32 |  | 
|---|
| 33 | enum { | 
|---|
| 34 | ucp_Cc,    /* Control */ | 
|---|
| 35 | ucp_Cf,    /* Format */ | 
|---|
| 36 | ucp_Cn,    /* Unassigned */ | 
|---|
| 37 | ucp_Co,    /* Private use */ | 
|---|
| 38 | ucp_Cs,    /* Surrogate */ | 
|---|
| 39 | ucp_Ll,    /* Lower case letter */ | 
|---|
| 40 | ucp_Lm,    /* Modifier letter */ | 
|---|
| 41 | ucp_Lo,    /* Other letter */ | 
|---|
| 42 | ucp_Lt,    /* Title case letter */ | 
|---|
| 43 | ucp_Lu,    /* Upper case letter */ | 
|---|
| 44 | ucp_Mc,    /* Spacing mark */ | 
|---|
| 45 | ucp_Me,    /* Enclosing mark */ | 
|---|
| 46 | ucp_Mn,    /* Non-spacing mark */ | 
|---|
| 47 | ucp_Nd,    /* Decimal number */ | 
|---|
| 48 | ucp_Nl,    /* Letter number */ | 
|---|
| 49 | ucp_No,    /* Other number */ | 
|---|
| 50 | ucp_Pc,    /* Connector punctuation */ | 
|---|
| 51 | ucp_Pd,    /* Dash punctuation */ | 
|---|
| 52 | ucp_Pe,    /* Close punctuation */ | 
|---|
| 53 | ucp_Pf,    /* Final punctuation */ | 
|---|
| 54 | ucp_Pi,    /* Initial punctuation */ | 
|---|
| 55 | ucp_Po,    /* Other punctuation */ | 
|---|
| 56 | ucp_Ps,    /* Open punctuation */ | 
|---|
| 57 | ucp_Sc,    /* Currency symbol */ | 
|---|
| 58 | ucp_Sk,    /* Modifier symbol */ | 
|---|
| 59 | ucp_Sm,    /* Mathematical symbol */ | 
|---|
| 60 | ucp_So,    /* Other symbol */ | 
|---|
| 61 | ucp_Zl,    /* Line separator */ | 
|---|
| 62 | ucp_Zp,    /* Paragraph separator */ | 
|---|
| 63 | ucp_Zs     /* Space separator */ | 
|---|
| 64 | }; | 
|---|
| 65 |  | 
|---|
| 66 | /* These are grapheme break properties. Note that the code for processing them | 
|---|
| 67 | assumes that the values are less than 16. If more values are added that take | 
|---|
| 68 | the number to 16 or more, the code will have to be rewritten. */ | 
|---|
| 69 |  | 
|---|
| 70 | enum { | 
|---|
| 71 | ucp_gbCR,                /*  0 */ | 
|---|
| 72 | ucp_gbLF,                /*  1 */ | 
|---|
| 73 | ucp_gbControl,           /*  2 */ | 
|---|
| 74 | ucp_gbExtend,            /*  3 */ | 
|---|
| 75 | ucp_gbPrepend,           /*  4 */ | 
|---|
| 76 | ucp_gbSpacingMark,       /*  5 */ | 
|---|
| 77 | ucp_gbL,                 /*  6 Hangul syllable type L */ | 
|---|
| 78 | ucp_gbV,                 /*  7 Hangul syllable type V */ | 
|---|
| 79 | ucp_gbT,                 /*  8 Hangul syllable type T */ | 
|---|
| 80 | ucp_gbLV,                /*  9 Hangul syllable type LV */ | 
|---|
| 81 | ucp_gbLVT,               /* 10 Hangul syllable type LVT */ | 
|---|
| 82 | ucp_gbRegionalIndicator, /* 11 */ | 
|---|
| 83 | ucp_gbOther              /* 12 */ | 
|---|
| 84 | }; | 
|---|
| 85 |  | 
|---|
| 86 | /* These are the script identifications. */ | 
|---|
| 87 |  | 
|---|
| 88 | enum { | 
|---|
| 89 | ucp_Arabic, | 
|---|
| 90 | ucp_Armenian, | 
|---|
| 91 | ucp_Bengali, | 
|---|
| 92 | ucp_Bopomofo, | 
|---|
| 93 | ucp_Braille, | 
|---|
| 94 | ucp_Buginese, | 
|---|
| 95 | ucp_Buhid, | 
|---|
| 96 | ucp_Canadian_Aboriginal, | 
|---|
| 97 | ucp_Cherokee, | 
|---|
| 98 | ucp_Common, | 
|---|
| 99 | ucp_Coptic, | 
|---|
| 100 | ucp_Cypriot, | 
|---|
| 101 | ucp_Cyrillic, | 
|---|
| 102 | ucp_Deseret, | 
|---|
| 103 | ucp_Devanagari, | 
|---|
| 104 | ucp_Ethiopic, | 
|---|
| 105 | ucp_Georgian, | 
|---|
| 106 | ucp_Glagolitic, | 
|---|
| 107 | ucp_Gothic, | 
|---|
| 108 | ucp_Greek, | 
|---|
| 109 | ucp_Gujarati, | 
|---|
| 110 | ucp_Gurmukhi, | 
|---|
| 111 | ucp_Han, | 
|---|
| 112 | ucp_Hangul, | 
|---|
| 113 | ucp_Hanunoo, | 
|---|
| 114 | ucp_Hebrew, | 
|---|
| 115 | ucp_Hiragana, | 
|---|
| 116 | ucp_Inherited, | 
|---|
| 117 | ucp_Kannada, | 
|---|
| 118 | ucp_Katakana, | 
|---|
| 119 | ucp_Kharoshthi, | 
|---|
| 120 | ucp_Khmer, | 
|---|
| 121 | ucp_Lao, | 
|---|
| 122 | ucp_Latin, | 
|---|
| 123 | ucp_Limbu, | 
|---|
| 124 | ucp_Linear_B, | 
|---|
| 125 | ucp_Malayalam, | 
|---|
| 126 | ucp_Mongolian, | 
|---|
| 127 | ucp_Myanmar, | 
|---|
| 128 | ucp_New_Tai_Lue, | 
|---|
| 129 | ucp_Ogham, | 
|---|
| 130 | ucp_Old_Italic, | 
|---|
| 131 | ucp_Old_Persian, | 
|---|
| 132 | ucp_Oriya, | 
|---|
| 133 | ucp_Osmanya, | 
|---|
| 134 | ucp_Runic, | 
|---|
| 135 | ucp_Shavian, | 
|---|
| 136 | ucp_Sinhala, | 
|---|
| 137 | ucp_Syloti_Nagri, | 
|---|
| 138 | ucp_Syriac, | 
|---|
| 139 | ucp_Tagalog, | 
|---|
| 140 | ucp_Tagbanwa, | 
|---|
| 141 | ucp_Tai_Le, | 
|---|
| 142 | ucp_Tamil, | 
|---|
| 143 | ucp_Telugu, | 
|---|
| 144 | ucp_Thaana, | 
|---|
| 145 | ucp_Thai, | 
|---|
| 146 | ucp_Tibetan, | 
|---|
| 147 | ucp_Tifinagh, | 
|---|
| 148 | ucp_Ugaritic, | 
|---|
| 149 | ucp_Yi, | 
|---|
| 150 | /* New for Unicode 5.0: */ | 
|---|
| 151 | ucp_Balinese, | 
|---|
| 152 | ucp_Cuneiform, | 
|---|
| 153 | ucp_Nko, | 
|---|
| 154 | ucp_Phags_Pa, | 
|---|
| 155 | ucp_Phoenician, | 
|---|
| 156 | /* New for Unicode 5.1: */ | 
|---|
| 157 | ucp_Carian, | 
|---|
| 158 | ucp_Cham, | 
|---|
| 159 | ucp_Kayah_Li, | 
|---|
| 160 | ucp_Lepcha, | 
|---|
| 161 | ucp_Lycian, | 
|---|
| 162 | ucp_Lydian, | 
|---|
| 163 | ucp_Ol_Chiki, | 
|---|
| 164 | ucp_Rejang, | 
|---|
| 165 | ucp_Saurashtra, | 
|---|
| 166 | ucp_Sundanese, | 
|---|
| 167 | ucp_Vai, | 
|---|
| 168 | /* New for Unicode 5.2: */ | 
|---|
| 169 | ucp_Avestan, | 
|---|
| 170 | ucp_Bamum, | 
|---|
| 171 | ucp_Egyptian_Hieroglyphs, | 
|---|
| 172 | ucp_Imperial_Aramaic, | 
|---|
| 173 | ucp_Inscriptional_Pahlavi, | 
|---|
| 174 | ucp_Inscriptional_Parthian, | 
|---|
| 175 | ucp_Javanese, | 
|---|
| 176 | ucp_Kaithi, | 
|---|
| 177 | ucp_Lisu, | 
|---|
| 178 | ucp_Meetei_Mayek, | 
|---|
| 179 | ucp_Old_South_Arabian, | 
|---|
| 180 | ucp_Old_Turkic, | 
|---|
| 181 | ucp_Samaritan, | 
|---|
| 182 | ucp_Tai_Tham, | 
|---|
| 183 | ucp_Tai_Viet, | 
|---|
| 184 | /* New for Unicode 6.0.0: */ | 
|---|
| 185 | ucp_Batak, | 
|---|
| 186 | ucp_Brahmi, | 
|---|
| 187 | ucp_Mandaic, | 
|---|
| 188 | /* New for Unicode 6.1.0: */ | 
|---|
| 189 | ucp_Chakma, | 
|---|
| 190 | ucp_Meroitic_Cursive, | 
|---|
| 191 | ucp_Meroitic_Hieroglyphs, | 
|---|
| 192 | ucp_Miao, | 
|---|
| 193 | ucp_Sharada, | 
|---|
| 194 | ucp_Sora_Sompeng, | 
|---|
| 195 | ucp_Takri, | 
|---|
| 196 | /* New for Unicode 7.0.0: */ | 
|---|
| 197 | ucp_Bassa_Vah, | 
|---|
| 198 | ucp_Caucasian_Albanian, | 
|---|
| 199 | ucp_Duployan, | 
|---|
| 200 | ucp_Elbasan, | 
|---|
| 201 | ucp_Grantha, | 
|---|
| 202 | ucp_Khojki, | 
|---|
| 203 | ucp_Khudawadi, | 
|---|
| 204 | ucp_Linear_A, | 
|---|
| 205 | ucp_Mahajani, | 
|---|
| 206 | ucp_Manichaean, | 
|---|
| 207 | ucp_Mende_Kikakui, | 
|---|
| 208 | ucp_Modi, | 
|---|
| 209 | ucp_Mro, | 
|---|
| 210 | ucp_Nabataean, | 
|---|
| 211 | ucp_Old_North_Arabian, | 
|---|
| 212 | ucp_Old_Permic, | 
|---|
| 213 | ucp_Pahawh_Hmong, | 
|---|
| 214 | ucp_Palmyrene, | 
|---|
| 215 | ucp_Psalter_Pahlavi, | 
|---|
| 216 | ucp_Pau_Cin_Hau, | 
|---|
| 217 | ucp_Siddham, | 
|---|
| 218 | ucp_Tirhuta, | 
|---|
| 219 | ucp_Warang_Citi | 
|---|
| 220 | }; | 
|---|
| 221 |  | 
|---|
| 222 | #endif | 
|---|
| 223 |  | 
|---|
| 224 | /* End of ucp.h */ | 
|---|
| 225 |  | 
|---|