1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | New API code Copyright (c) 2016-2022 University of Cambridge |
11 | |
12 | This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! |
13 | Instead, modify the maint/GenerateUcpHeader.py script and run it to generate |
14 | a new version of this code. |
15 | |
16 | ----------------------------------------------------------------------------- |
17 | Redistribution and use in source and binary forms, with or without |
18 | modification, are permitted provided that the following conditions are met: |
19 | |
20 | * Redistributions of source code must retain the above copyright notice, |
21 | this list of conditions and the following disclaimer. |
22 | |
23 | * Redistributions in binary form must reproduce the above copyright |
24 | notice, this list of conditions and the following disclaimer in the |
25 | documentation and/or other materials provided with the distribution. |
26 | |
27 | * Neither the name of the University of Cambridge nor the names of its |
28 | contributors may be used to endorse or promote products derived from |
29 | this software without specific prior written permission. |
30 | |
31 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
32 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
33 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
34 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
35 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
36 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
37 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
38 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
39 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
40 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
41 | POSSIBILITY OF SUCH DAMAGE. |
42 | ----------------------------------------------------------------------------- |
43 | */ |
44 | |
45 | #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD |
46 | #define PCRE2_UCP_H_IDEMPOTENT_GUARD |
47 | |
48 | /* This file contains definitions of the Unicode property values that are |
49 | returned by the UCD access macros and used throughout PCRE2. |
50 | |
51 | IMPORTANT: The specific values of the first two enums (general and particular |
52 | character categories) are assumed by the table called catposstab in the file |
53 | pcre2_auto_possess.c. They are unlikely to change, but should be checked after |
54 | an update. */ |
55 | |
56 | /* These are the general character categories. */ |
57 | |
58 | enum { |
59 | ucp_C, |
60 | ucp_L, |
61 | ucp_M, |
62 | ucp_N, |
63 | ucp_P, |
64 | ucp_S, |
65 | ucp_Z, |
66 | }; |
67 | |
68 | /* These are the particular character categories. */ |
69 | |
70 | enum { |
71 | ucp_Cc, /* Control */ |
72 | ucp_Cf, /* Format */ |
73 | ucp_Cn, /* Unassigned */ |
74 | ucp_Co, /* Private use */ |
75 | ucp_Cs, /* Surrogate */ |
76 | ucp_Ll, /* Lower case letter */ |
77 | ucp_Lm, /* Modifier letter */ |
78 | ucp_Lo, /* Other letter */ |
79 | ucp_Lt, /* Title case letter */ |
80 | ucp_Lu, /* Upper case letter */ |
81 | ucp_Mc, /* Spacing mark */ |
82 | ucp_Me, /* Enclosing mark */ |
83 | ucp_Mn, /* Non-spacing mark */ |
84 | ucp_Nd, /* Decimal number */ |
85 | ucp_Nl, /* Letter number */ |
86 | ucp_No, /* Other number */ |
87 | ucp_Pc, /* Connector punctuation */ |
88 | ucp_Pd, /* Dash punctuation */ |
89 | ucp_Pe, /* Close punctuation */ |
90 | ucp_Pf, /* Final punctuation */ |
91 | ucp_Pi, /* Initial punctuation */ |
92 | ucp_Po, /* Other punctuation */ |
93 | ucp_Ps, /* Open punctuation */ |
94 | ucp_Sc, /* Currency symbol */ |
95 | ucp_Sk, /* Modifier symbol */ |
96 | ucp_Sm, /* Mathematical symbol */ |
97 | ucp_So, /* Other symbol */ |
98 | ucp_Zl, /* Line separator */ |
99 | ucp_Zp, /* Paragraph separator */ |
100 | ucp_Zs, /* Space separator */ |
101 | }; |
102 | |
103 | /* These are Boolean properties. */ |
104 | |
105 | enum { |
106 | ucp_ASCII, |
107 | ucp_ASCII_Hex_Digit, |
108 | ucp_Alphabetic, |
109 | ucp_Bidi_Control, |
110 | ucp_Bidi_Mirrored, |
111 | ucp_Case_Ignorable, |
112 | ucp_Cased, |
113 | ucp_Changes_When_Casefolded, |
114 | ucp_Changes_When_Casemapped, |
115 | ucp_Changes_When_Lowercased, |
116 | ucp_Changes_When_Titlecased, |
117 | ucp_Changes_When_Uppercased, |
118 | ucp_Dash, |
119 | ucp_Default_Ignorable_Code_Point, |
120 | ucp_Deprecated, |
121 | ucp_Diacritic, |
122 | ucp_Emoji, |
123 | ucp_Emoji_Component, |
124 | ucp_Emoji_Modifier, |
125 | ucp_Emoji_Modifier_Base, |
126 | ucp_Emoji_Presentation, |
127 | ucp_Extended_Pictographic, |
128 | ucp_Extender, |
129 | ucp_Grapheme_Base, |
130 | ucp_Grapheme_Extend, |
131 | ucp_Grapheme_Link, |
132 | ucp_Hex_Digit, |
133 | ucp_IDS_Binary_Operator, |
134 | ucp_IDS_Trinary_Operator, |
135 | ucp_ID_Continue, |
136 | ucp_ID_Start, |
137 | ucp_Ideographic, |
138 | ucp_Join_Control, |
139 | ucp_Logical_Order_Exception, |
140 | ucp_Lowercase, |
141 | ucp_Math, |
142 | ucp_Noncharacter_Code_Point, |
143 | ucp_Pattern_Syntax, |
144 | ucp_Pattern_White_Space, |
145 | ucp_Prepended_Concatenation_Mark, |
146 | ucp_Quotation_Mark, |
147 | ucp_Radical, |
148 | ucp_Regional_Indicator, |
149 | ucp_Sentence_Terminal, |
150 | ucp_Soft_Dotted, |
151 | ucp_Terminal_Punctuation, |
152 | ucp_Unified_Ideograph, |
153 | ucp_Uppercase, |
154 | ucp_Variation_Selector, |
155 | ucp_White_Space, |
156 | ucp_XID_Continue, |
157 | ucp_XID_Start, |
158 | /* This must be last */ |
159 | ucp_Bprop_Count |
160 | }; |
161 | |
162 | /* Size of entries in ucd_boolprop_sets[] */ |
163 | |
164 | #define ucd_boolprop_sets_item_size 2 |
165 | |
166 | /* These are the bidi class values. */ |
167 | |
168 | enum { |
169 | ucp_bidiAL, /* Arabic letter */ |
170 | ucp_bidiAN, /* Arabic number */ |
171 | ucp_bidiB, /* Paragraph separator */ |
172 | ucp_bidiBN, /* Boundary neutral */ |
173 | ucp_bidiCS, /* Common separator */ |
174 | ucp_bidiEN, /* European number */ |
175 | ucp_bidiES, /* European separator */ |
176 | ucp_bidiET, /* European terminator */ |
177 | ucp_bidiFSI, /* First strong isolate */ |
178 | ucp_bidiL, /* Left to right */ |
179 | ucp_bidiLRE, /* Left to right embedding */ |
180 | ucp_bidiLRI, /* Left to right isolate */ |
181 | ucp_bidiLRO, /* Left to right override */ |
182 | ucp_bidiNSM, /* Non-spacing mark */ |
183 | ucp_bidiON, /* Other neutral */ |
184 | ucp_bidiPDF, /* Pop directional format */ |
185 | ucp_bidiPDI, /* Pop directional isolate */ |
186 | ucp_bidiR, /* Right to left */ |
187 | ucp_bidiRLE, /* Right to left embedding */ |
188 | ucp_bidiRLI, /* Right to left isolate */ |
189 | ucp_bidiRLO, /* Right to left override */ |
190 | ucp_bidiS, /* Segment separator */ |
191 | ucp_bidiWS, /* White space */ |
192 | }; |
193 | |
194 | /* These are grapheme break properties. The Extended Pictographic property |
195 | comes from the emoji-data.txt file. */ |
196 | |
197 | enum { |
198 | ucp_gbCR, /* 0 */ |
199 | ucp_gbLF, /* 1 */ |
200 | ucp_gbControl, /* 2 */ |
201 | ucp_gbExtend, /* 3 */ |
202 | ucp_gbPrepend, /* 4 */ |
203 | ucp_gbSpacingMark, /* 5 */ |
204 | ucp_gbL, /* 6 Hangul syllable type L */ |
205 | ucp_gbV, /* 7 Hangul syllable type V */ |
206 | ucp_gbT, /* 8 Hangul syllable type T */ |
207 | ucp_gbLV, /* 9 Hangul syllable type LV */ |
208 | ucp_gbLVT, /* 10 Hangul syllable type LVT */ |
209 | ucp_gbRegional_Indicator, /* 11 */ |
210 | ucp_gbOther, /* 12 */ |
211 | ucp_gbZWJ, /* 13 */ |
212 | ucp_gbExtended_Pictographic, /* 14 */ |
213 | }; |
214 | |
215 | /* These are the script identifications. */ |
216 | |
217 | enum { |
218 | /* Scripts which has characters in other scripts. */ |
219 | ucp_Latin, |
220 | ucp_Greek, |
221 | ucp_Cyrillic, |
222 | ucp_Arabic, |
223 | ucp_Syriac, |
224 | ucp_Thaana, |
225 | ucp_Devanagari, |
226 | ucp_Bengali, |
227 | ucp_Gurmukhi, |
228 | ucp_Gujarati, |
229 | ucp_Oriya, |
230 | ucp_Tamil, |
231 | ucp_Telugu, |
232 | ucp_Kannada, |
233 | ucp_Malayalam, |
234 | ucp_Sinhala, |
235 | ucp_Myanmar, |
236 | ucp_Georgian, |
237 | ucp_Hangul, |
238 | ucp_Mongolian, |
239 | ucp_Hiragana, |
240 | ucp_Katakana, |
241 | ucp_Bopomofo, |
242 | ucp_Han, |
243 | ucp_Yi, |
244 | ucp_Tagalog, |
245 | ucp_Hanunoo, |
246 | ucp_Buhid, |
247 | ucp_Tagbanwa, |
248 | ucp_Limbu, |
249 | ucp_Tai_Le, |
250 | ucp_Linear_B, |
251 | ucp_Cypriot, |
252 | ucp_Buginese, |
253 | ucp_Coptic, |
254 | ucp_Glagolitic, |
255 | ucp_Syloti_Nagri, |
256 | ucp_Phags_Pa, |
257 | ucp_Nko, |
258 | ucp_Kayah_Li, |
259 | ucp_Javanese, |
260 | ucp_Kaithi, |
261 | ucp_Mandaic, |
262 | ucp_Chakma, |
263 | ucp_Sharada, |
264 | ucp_Takri, |
265 | ucp_Duployan, |
266 | ucp_Grantha, |
267 | ucp_Khojki, |
268 | ucp_Linear_A, |
269 | ucp_Mahajani, |
270 | ucp_Manichaean, |
271 | ucp_Modi, |
272 | ucp_Old_Permic, |
273 | ucp_Psalter_Pahlavi, |
274 | ucp_Khudawadi, |
275 | ucp_Tirhuta, |
276 | ucp_Multani, |
277 | ucp_Adlam, |
278 | ucp_Masaram_Gondi, |
279 | ucp_Dogra, |
280 | ucp_Gunjala_Gondi, |
281 | ucp_Hanifi_Rohingya, |
282 | ucp_Sogdian, |
283 | ucp_Nandinagari, |
284 | ucp_Yezidi, |
285 | ucp_Cypro_Minoan, |
286 | ucp_Old_Uyghur, |
287 | |
288 | /* Scripts which has no characters in other scripts. */ |
289 | ucp_Unknown, |
290 | ucp_Common, |
291 | ucp_Armenian, |
292 | ucp_Hebrew, |
293 | ucp_Thai, |
294 | ucp_Lao, |
295 | ucp_Tibetan, |
296 | ucp_Ethiopic, |
297 | ucp_Cherokee, |
298 | ucp_Canadian_Aboriginal, |
299 | ucp_Ogham, |
300 | ucp_Runic, |
301 | ucp_Khmer, |
302 | ucp_Old_Italic, |
303 | ucp_Gothic, |
304 | ucp_Deseret, |
305 | ucp_Inherited, |
306 | ucp_Ugaritic, |
307 | ucp_Shavian, |
308 | ucp_Osmanya, |
309 | ucp_Braille, |
310 | ucp_New_Tai_Lue, |
311 | ucp_Tifinagh, |
312 | ucp_Old_Persian, |
313 | ucp_Kharoshthi, |
314 | ucp_Balinese, |
315 | ucp_Cuneiform, |
316 | ucp_Phoenician, |
317 | ucp_Sundanese, |
318 | ucp_Lepcha, |
319 | ucp_Ol_Chiki, |
320 | ucp_Vai, |
321 | ucp_Saurashtra, |
322 | ucp_Rejang, |
323 | ucp_Lycian, |
324 | ucp_Carian, |
325 | ucp_Lydian, |
326 | ucp_Cham, |
327 | ucp_Tai_Tham, |
328 | ucp_Tai_Viet, |
329 | ucp_Avestan, |
330 | ucp_Egyptian_Hieroglyphs, |
331 | ucp_Samaritan, |
332 | ucp_Lisu, |
333 | ucp_Bamum, |
334 | ucp_Meetei_Mayek, |
335 | ucp_Imperial_Aramaic, |
336 | ucp_Old_South_Arabian, |
337 | ucp_Inscriptional_Parthian, |
338 | ucp_Inscriptional_Pahlavi, |
339 | ucp_Old_Turkic, |
340 | ucp_Batak, |
341 | ucp_Brahmi, |
342 | ucp_Meroitic_Cursive, |
343 | ucp_Meroitic_Hieroglyphs, |
344 | ucp_Miao, |
345 | ucp_Sora_Sompeng, |
346 | ucp_Caucasian_Albanian, |
347 | ucp_Bassa_Vah, |
348 | ucp_Elbasan, |
349 | ucp_Pahawh_Hmong, |
350 | ucp_Mende_Kikakui, |
351 | ucp_Mro, |
352 | ucp_Old_North_Arabian, |
353 | ucp_Nabataean, |
354 | ucp_Palmyrene, |
355 | ucp_Pau_Cin_Hau, |
356 | ucp_Siddham, |
357 | ucp_Warang_Citi, |
358 | ucp_Ahom, |
359 | ucp_Anatolian_Hieroglyphs, |
360 | ucp_Hatran, |
361 | ucp_Old_Hungarian, |
362 | ucp_SignWriting, |
363 | ucp_Bhaiksuki, |
364 | ucp_Marchen, |
365 | ucp_Newa, |
366 | ucp_Osage, |
367 | ucp_Tangut, |
368 | ucp_Nushu, |
369 | ucp_Soyombo, |
370 | ucp_Zanabazar_Square, |
371 | ucp_Makasar, |
372 | ucp_Medefaidrin, |
373 | ucp_Old_Sogdian, |
374 | ucp_Elymaic, |
375 | ucp_Nyiakeng_Puachue_Hmong, |
376 | ucp_Wancho, |
377 | ucp_Chorasmian, |
378 | ucp_Dives_Akuru, |
379 | ucp_Khitan_Small_Script, |
380 | ucp_Tangsa, |
381 | ucp_Toto, |
382 | ucp_Vithkuqi, |
383 | |
384 | /* This must be last */ |
385 | ucp_Script_Count |
386 | }; |
387 | |
388 | /* Size of entries in ucd_script_sets[] */ |
389 | |
390 | #define ucd_script_sets_item_size 3 |
391 | |
392 | #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ |
393 | |
394 | /* End of pcre2_ucp.h */ |
395 | |