1 | /************************************************* |
2 | * Unicode Property Table handler * |
3 | *************************************************/ |
4 | |
5 | #ifndef _UCP_H |
6 | #define _UCP_H |
7 | |
8 | /* This file contains definitions of the property values that are returned by |
9 | the UCD access macros. New values that are added for new releases of Unicode |
10 | should always be at the end of each enum, for backwards compatibility. |
11 | |
12 | IMPORTANT: Note also that the specific numeric values of the enums have to be |
13 | the same as the values that are generated by the maint/MultiStage2.py script, |
14 | where the equivalent property descriptive names are listed in vectors. |
15 | |
16 | ALSO: The specific values of the first two enums are assumed for the table |
17 | called catposstab in pcre_compile.c. */ |
18 | |
19 | /* These are the general character categories. */ |
20 | |
21 | enum { |
22 | ucp_C, /* Other */ |
23 | ucp_L, /* Letter */ |
24 | ucp_M, /* Mark */ |
25 | ucp_N, /* Number */ |
26 | ucp_P, /* Punctuation */ |
27 | ucp_S, /* Symbol */ |
28 | ucp_Z /* Separator */ |
29 | }; |
30 | |
31 | /* These are the particular character categories. */ |
32 | |
33 | enum { |
34 | ucp_Cc, /* Control */ |
35 | ucp_Cf, /* Format */ |
36 | ucp_Cn, /* Unassigned */ |
37 | ucp_Co, /* Private use */ |
38 | ucp_Cs, /* Surrogate */ |
39 | ucp_Ll, /* Lower case letter */ |
40 | ucp_Lm, /* Modifier letter */ |
41 | ucp_Lo, /* Other letter */ |
42 | ucp_Lt, /* Title case letter */ |
43 | ucp_Lu, /* Upper case letter */ |
44 | ucp_Mc, /* Spacing mark */ |
45 | ucp_Me, /* Enclosing mark */ |
46 | ucp_Mn, /* Non-spacing mark */ |
47 | ucp_Nd, /* Decimal number */ |
48 | ucp_Nl, /* Letter number */ |
49 | ucp_No, /* Other number */ |
50 | ucp_Pc, /* Connector punctuation */ |
51 | ucp_Pd, /* Dash punctuation */ |
52 | ucp_Pe, /* Close punctuation */ |
53 | ucp_Pf, /* Final punctuation */ |
54 | ucp_Pi, /* Initial punctuation */ |
55 | ucp_Po, /* Other punctuation */ |
56 | ucp_Ps, /* Open punctuation */ |
57 | ucp_Sc, /* Currency symbol */ |
58 | ucp_Sk, /* Modifier symbol */ |
59 | ucp_Sm, /* Mathematical symbol */ |
60 | ucp_So, /* Other symbol */ |
61 | ucp_Zl, /* Line separator */ |
62 | ucp_Zp, /* Paragraph separator */ |
63 | ucp_Zs /* Space separator */ |
64 | }; |
65 | |
66 | /* These are grapheme break properties. Note that the code for processing them |
67 | assumes that the values are less than 16. If more values are added that take |
68 | the number to 16 or more, the code will have to be rewritten. */ |
69 | |
70 | enum { |
71 | ucp_gbCR, /* 0 */ |
72 | ucp_gbLF, /* 1 */ |
73 | ucp_gbControl, /* 2 */ |
74 | ucp_gbExtend, /* 3 */ |
75 | ucp_gbPrepend, /* 4 */ |
76 | ucp_gbSpacingMark, /* 5 */ |
77 | ucp_gbL, /* 6 Hangul syllable type L */ |
78 | ucp_gbV, /* 7 Hangul syllable type V */ |
79 | ucp_gbT, /* 8 Hangul syllable type T */ |
80 | ucp_gbLV, /* 9 Hangul syllable type LV */ |
81 | ucp_gbLVT, /* 10 Hangul syllable type LVT */ |
82 | ucp_gbRegionalIndicator, /* 11 */ |
83 | ucp_gbOther /* 12 */ |
84 | }; |
85 | |
86 | /* These are the script identifications. */ |
87 | |
88 | enum { |
89 | ucp_Arabic, |
90 | ucp_Armenian, |
91 | ucp_Bengali, |
92 | ucp_Bopomofo, |
93 | ucp_Braille, |
94 | ucp_Buginese, |
95 | ucp_Buhid, |
96 | ucp_Canadian_Aboriginal, |
97 | ucp_Cherokee, |
98 | ucp_Common, |
99 | ucp_Coptic, |
100 | ucp_Cypriot, |
101 | ucp_Cyrillic, |
102 | ucp_Deseret, |
103 | ucp_Devanagari, |
104 | ucp_Ethiopic, |
105 | ucp_Georgian, |
106 | ucp_Glagolitic, |
107 | ucp_Gothic, |
108 | ucp_Greek, |
109 | ucp_Gujarati, |
110 | ucp_Gurmukhi, |
111 | ucp_Han, |
112 | ucp_Hangul, |
113 | ucp_Hanunoo, |
114 | ucp_Hebrew, |
115 | ucp_Hiragana, |
116 | ucp_Inherited, |
117 | ucp_Kannada, |
118 | ucp_Katakana, |
119 | ucp_Kharoshthi, |
120 | ucp_Khmer, |
121 | ucp_Lao, |
122 | ucp_Latin, |
123 | ucp_Limbu, |
124 | ucp_Linear_B, |
125 | ucp_Malayalam, |
126 | ucp_Mongolian, |
127 | ucp_Myanmar, |
128 | ucp_New_Tai_Lue, |
129 | ucp_Ogham, |
130 | ucp_Old_Italic, |
131 | ucp_Old_Persian, |
132 | ucp_Oriya, |
133 | ucp_Osmanya, |
134 | ucp_Runic, |
135 | ucp_Shavian, |
136 | ucp_Sinhala, |
137 | ucp_Syloti_Nagri, |
138 | ucp_Syriac, |
139 | ucp_Tagalog, |
140 | ucp_Tagbanwa, |
141 | ucp_Tai_Le, |
142 | ucp_Tamil, |
143 | ucp_Telugu, |
144 | ucp_Thaana, |
145 | ucp_Thai, |
146 | ucp_Tibetan, |
147 | ucp_Tifinagh, |
148 | ucp_Ugaritic, |
149 | ucp_Yi, |
150 | /* New for Unicode 5.0: */ |
151 | ucp_Balinese, |
152 | ucp_Cuneiform, |
153 | ucp_Nko, |
154 | ucp_Phags_Pa, |
155 | ucp_Phoenician, |
156 | /* New for Unicode 5.1: */ |
157 | ucp_Carian, |
158 | ucp_Cham, |
159 | ucp_Kayah_Li, |
160 | ucp_Lepcha, |
161 | ucp_Lycian, |
162 | ucp_Lydian, |
163 | ucp_Ol_Chiki, |
164 | ucp_Rejang, |
165 | ucp_Saurashtra, |
166 | ucp_Sundanese, |
167 | ucp_Vai, |
168 | /* New for Unicode 5.2: */ |
169 | ucp_Avestan, |
170 | ucp_Bamum, |
171 | ucp_Egyptian_Hieroglyphs, |
172 | ucp_Imperial_Aramaic, |
173 | ucp_Inscriptional_Pahlavi, |
174 | ucp_Inscriptional_Parthian, |
175 | ucp_Javanese, |
176 | ucp_Kaithi, |
177 | ucp_Lisu, |
178 | ucp_Meetei_Mayek, |
179 | ucp_Old_South_Arabian, |
180 | ucp_Old_Turkic, |
181 | ucp_Samaritan, |
182 | ucp_Tai_Tham, |
183 | ucp_Tai_Viet, |
184 | /* New for Unicode 6.0.0: */ |
185 | ucp_Batak, |
186 | ucp_Brahmi, |
187 | ucp_Mandaic, |
188 | /* New for Unicode 6.1.0: */ |
189 | ucp_Chakma, |
190 | ucp_Meroitic_Cursive, |
191 | ucp_Meroitic_Hieroglyphs, |
192 | ucp_Miao, |
193 | ucp_Sharada, |
194 | ucp_Sora_Sompeng, |
195 | ucp_Takri, |
196 | /* New for Unicode 7.0.0: */ |
197 | ucp_Bassa_Vah, |
198 | ucp_Caucasian_Albanian, |
199 | ucp_Duployan, |
200 | ucp_Elbasan, |
201 | ucp_Grantha, |
202 | ucp_Khojki, |
203 | ucp_Khudawadi, |
204 | ucp_Linear_A, |
205 | ucp_Mahajani, |
206 | ucp_Manichaean, |
207 | ucp_Mende_Kikakui, |
208 | ucp_Modi, |
209 | ucp_Mro, |
210 | ucp_Nabataean, |
211 | ucp_Old_North_Arabian, |
212 | ucp_Old_Permic, |
213 | ucp_Pahawh_Hmong, |
214 | ucp_Palmyrene, |
215 | ucp_Psalter_Pahlavi, |
216 | ucp_Pau_Cin_Hau, |
217 | ucp_Siddham, |
218 | ucp_Tirhuta, |
219 | ucp_Warang_Citi |
220 | }; |
221 | |
222 | #endif |
223 | |
224 | /* End of ucp.h */ |
225 | |