pcre2_ucp.h source code [Godot/thirdparty/pcre2/src/pcre2_ucp.h]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Original API code Copyright (c) 1997-2012 University of Cambridge
10	New API code Copyright (c) 2016-2022 University of Cambridge
11
12	This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
13	Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
14	a new version of this code.
15
16	-----------------------------------------------------------------------------
17	Redistribution and use in source and binary forms, with or without
18	modification, are permitted provided that the following conditions are met:
19
20	* Redistributions of source code must retain the above copyright notice,
21	this list of conditions and the following disclaimer.
22
23	* Redistributions in binary form must reproduce the above copyright
24	notice, this list of conditions and the following disclaimer in the
25	documentation and/or other materials provided with the distribution.
26
27	* Neither the name of the University of Cambridge nor the names of its
28	contributors may be used to endorse or promote products derived from
29	this software without specific prior written permission.
30
31	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41	POSSIBILITY OF SUCH DAMAGE.
42	-----------------------------------------------------------------------------
43	*/
44
45	#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
46	#define PCRE2_UCP_H_IDEMPOTENT_GUARD
47
48	/ This file contains definitions of the Unicode property values that are*
49	returned by the UCD access macros and used throughout PCRE2.
50
51	IMPORTANT: The specific values of the first two enums (general and particular
52	character categories) are assumed by the table called catposstab in the file
53	pcre2_auto_possess.c. They are unlikely to change, but should be checked after
54	an update. /*
55
56	/ These are the general character categories. /
57
58	enum {
59	ucp_C,
60	ucp_L,
61	ucp_M,
62	ucp_N,
63	ucp_P,
64	ucp_S,
65	ucp_Z,
66	};
67
68	/ These are the particular character categories. /
69
70	enum {
71	ucp_Cc, / Control /
72	ucp_Cf, / Format /
73	ucp_Cn, / Unassigned /
74	ucp_Co, / Private use /
75	ucp_Cs, / Surrogate /
76	ucp_Ll, / Lower case letter /
77	ucp_Lm, / Modifier letter /
78	ucp_Lo, / Other letter /
79	ucp_Lt, / Title case letter /
80	ucp_Lu, / Upper case letter /
81	ucp_Mc, / Spacing mark /
82	ucp_Me, / Enclosing mark /
83	ucp_Mn, / Non-spacing mark /
84	ucp_Nd, / Decimal number /
85	ucp_Nl, / Letter number /
86	ucp_No, / Other number /
87	ucp_Pc, / Connector punctuation /
88	ucp_Pd, / Dash punctuation /
89	ucp_Pe, / Close punctuation /
90	ucp_Pf, / Final punctuation /
91	ucp_Pi, / Initial punctuation /
92	ucp_Po, / Other punctuation /
93	ucp_Ps, / Open punctuation /
94	ucp_Sc, / Currency symbol /
95	ucp_Sk, / Modifier symbol /
96	ucp_Sm, / Mathematical symbol /
97	ucp_So, / Other symbol /
98	ucp_Zl, / Line separator /
99	ucp_Zp, / Paragraph separator /
100	ucp_Zs, / Space separator /
101	};
102
103	/ These are Boolean properties. /
104
105	enum {
106	ucp_ASCII,
107	ucp_ASCII_Hex_Digit,
108	ucp_Alphabetic,
109	ucp_Bidi_Control,
110	ucp_Bidi_Mirrored,
111	ucp_Case_Ignorable,
112	ucp_Cased,
113	ucp_Changes_When_Casefolded,
114	ucp_Changes_When_Casemapped,
115	ucp_Changes_When_Lowercased,
116	ucp_Changes_When_Titlecased,
117	ucp_Changes_When_Uppercased,
118	ucp_Dash,
119	ucp_Default_Ignorable_Code_Point,
120	ucp_Deprecated,
121	ucp_Diacritic,
122	ucp_Emoji,
123	ucp_Emoji_Component,
124	ucp_Emoji_Modifier,
125	ucp_Emoji_Modifier_Base,
126	ucp_Emoji_Presentation,
127	ucp_Extended_Pictographic,
128	ucp_Extender,
129	ucp_Grapheme_Base,
130	ucp_Grapheme_Extend,
131	ucp_Grapheme_Link,
132	ucp_Hex_Digit,
133	ucp_IDS_Binary_Operator,
134	ucp_IDS_Trinary_Operator,
135	ucp_ID_Continue,
136	ucp_ID_Start,
137	ucp_Ideographic,
138	ucp_Join_Control,
139	ucp_Logical_Order_Exception,
140	ucp_Lowercase,
141	ucp_Math,
142	ucp_Noncharacter_Code_Point,
143	ucp_Pattern_Syntax,
144	ucp_Pattern_White_Space,
145	ucp_Prepended_Concatenation_Mark,
146	ucp_Quotation_Mark,
147	ucp_Radical,
148	ucp_Regional_Indicator,
149	ucp_Sentence_Terminal,
150	ucp_Soft_Dotted,
151	ucp_Terminal_Punctuation,
152	ucp_Unified_Ideograph,
153	ucp_Uppercase,
154	ucp_Variation_Selector,
155	ucp_White_Space,
156	ucp_XID_Continue,
157	ucp_XID_Start,
158	/ This must be last /
159	ucp_Bprop_Count
160	};
161
162	/ Size of entries in ucd_boolprop_sets[] /
163
164	#define ucd_boolprop_sets_item_size 2
165
166	/ These are the bidi class values. /
167
168	enum {
169	ucp_bidiAL, / Arabic letter /
170	ucp_bidiAN, / Arabic number /
171	ucp_bidiB, / Paragraph separator /
172	ucp_bidiBN, / Boundary neutral /
173	ucp_bidiCS, / Common separator /
174	ucp_bidiEN, / European number /
175	ucp_bidiES, / European separator /
176	ucp_bidiET, / European terminator /
177	ucp_bidiFSI, / First strong isolate /
178	ucp_bidiL, / Left to right /
179	ucp_bidiLRE, / Left to right embedding /
180	ucp_bidiLRI, / Left to right isolate /
181	ucp_bidiLRO, / Left to right override /
182	ucp_bidiNSM, / Non-spacing mark /
183	ucp_bidiON, / Other neutral /
184	ucp_bidiPDF, / Pop directional format /
185	ucp_bidiPDI, / Pop directional isolate /
186	ucp_bidiR, / Right to left /
187	ucp_bidiRLE, / Right to left embedding /
188	ucp_bidiRLI, / Right to left isolate /
189	ucp_bidiRLO, / Right to left override /
190	ucp_bidiS, / Segment separator /
191	ucp_bidiWS, / White space /
192	};
193
194	/ These are grapheme break properties. The Extended Pictographic property*
195	comes from the emoji-data.txt file. /*
196
197	enum {
198	ucp_gbCR, / 0 /
199	ucp_gbLF, / 1 /
200	ucp_gbControl, / 2 /
201	ucp_gbExtend, / 3 /
202	ucp_gbPrepend, / 4 /
203	ucp_gbSpacingMark, / 5 /
204	ucp_gbL, / 6 Hangul syllable type L /
205	ucp_gbV, / 7 Hangul syllable type V /
206	ucp_gbT, / 8 Hangul syllable type T /
207	ucp_gbLV, / 9 Hangul syllable type LV /
208	ucp_gbLVT, / 10 Hangul syllable type LVT /
209	ucp_gbRegional_Indicator, / 11 /
210	ucp_gbOther, / 12 /
211	ucp_gbZWJ, / 13 /
212	ucp_gbExtended_Pictographic, / 14 /
213	};
214
215	/ These are the script identifications. /
216
217	enum {
218	/ Scripts which has characters in other scripts. /
219	ucp_Latin,
220	ucp_Greek,
221	ucp_Cyrillic,
222	ucp_Arabic,
223	ucp_Syriac,
224	ucp_Thaana,
225	ucp_Devanagari,
226	ucp_Bengali,
227	ucp_Gurmukhi,
228	ucp_Gujarati,
229	ucp_Oriya,
230	ucp_Tamil,
231	ucp_Telugu,
232	ucp_Kannada,
233	ucp_Malayalam,
234	ucp_Sinhala,
235	ucp_Myanmar,
236	ucp_Georgian,
237	ucp_Hangul,
238	ucp_Mongolian,
239	ucp_Hiragana,
240	ucp_Katakana,
241	ucp_Bopomofo,
242	ucp_Han,
243	ucp_Yi,
244	ucp_Tagalog,
245	ucp_Hanunoo,
246	ucp_Buhid,
247	ucp_Tagbanwa,
248	ucp_Limbu,
249	ucp_Tai_Le,
250	ucp_Linear_B,
251	ucp_Cypriot,
252	ucp_Buginese,
253	ucp_Coptic,
254	ucp_Glagolitic,
255	ucp_Syloti_Nagri,
256	ucp_Phags_Pa,
257	ucp_Nko,
258	ucp_Kayah_Li,
259	ucp_Javanese,
260	ucp_Kaithi,
261	ucp_Mandaic,
262	ucp_Chakma,
263	ucp_Sharada,
264	ucp_Takri,
265	ucp_Duployan,
266	ucp_Grantha,
267	ucp_Khojki,
268	ucp_Linear_A,
269	ucp_Mahajani,
270	ucp_Manichaean,
271	ucp_Modi,
272	ucp_Old_Permic,
273	ucp_Psalter_Pahlavi,
274	ucp_Khudawadi,
275	ucp_Tirhuta,
276	ucp_Multani,
277	ucp_Adlam,
278	ucp_Masaram_Gondi,
279	ucp_Dogra,
280	ucp_Gunjala_Gondi,
281	ucp_Hanifi_Rohingya,
282	ucp_Sogdian,
283	ucp_Nandinagari,
284	ucp_Yezidi,
285	ucp_Cypro_Minoan,
286	ucp_Old_Uyghur,
287
288	/ Scripts which has no characters in other scripts. /
289	ucp_Unknown,
290	ucp_Common,
291	ucp_Armenian,
292	ucp_Hebrew,
293	ucp_Thai,
294	ucp_Lao,
295	ucp_Tibetan,
296	ucp_Ethiopic,
297	ucp_Cherokee,
298	ucp_Canadian_Aboriginal,
299	ucp_Ogham,
300	ucp_Runic,
301	ucp_Khmer,
302	ucp_Old_Italic,
303	ucp_Gothic,
304	ucp_Deseret,
305	ucp_Inherited,
306	ucp_Ugaritic,
307	ucp_Shavian,
308	ucp_Osmanya,
309	ucp_Braille,
310	ucp_New_Tai_Lue,
311	ucp_Tifinagh,
312	ucp_Old_Persian,
313	ucp_Kharoshthi,
314	ucp_Balinese,
315	ucp_Cuneiform,
316	ucp_Phoenician,
317	ucp_Sundanese,
318	ucp_Lepcha,
319	ucp_Ol_Chiki,
320	ucp_Vai,
321	ucp_Saurashtra,
322	ucp_Rejang,
323	ucp_Lycian,
324	ucp_Carian,
325	ucp_Lydian,
326	ucp_Cham,
327	ucp_Tai_Tham,
328	ucp_Tai_Viet,
329	ucp_Avestan,
330	ucp_Egyptian_Hieroglyphs,
331	ucp_Samaritan,
332	ucp_Lisu,
333	ucp_Bamum,
334	ucp_Meetei_Mayek,
335	ucp_Imperial_Aramaic,
336	ucp_Old_South_Arabian,
337	ucp_Inscriptional_Parthian,
338	ucp_Inscriptional_Pahlavi,
339	ucp_Old_Turkic,
340	ucp_Batak,
341	ucp_Brahmi,
342	ucp_Meroitic_Cursive,
343	ucp_Meroitic_Hieroglyphs,
344	ucp_Miao,
345	ucp_Sora_Sompeng,
346	ucp_Caucasian_Albanian,
347	ucp_Bassa_Vah,
348	ucp_Elbasan,
349	ucp_Pahawh_Hmong,
350	ucp_Mende_Kikakui,
351	ucp_Mro,
352	ucp_Old_North_Arabian,
353	ucp_Nabataean,
354	ucp_Palmyrene,
355	ucp_Pau_Cin_Hau,
356	ucp_Siddham,
357	ucp_Warang_Citi,
358	ucp_Ahom,
359	ucp_Anatolian_Hieroglyphs,
360	ucp_Hatran,
361	ucp_Old_Hungarian,
362	ucp_SignWriting,
363	ucp_Bhaiksuki,
364	ucp_Marchen,
365	ucp_Newa,
366	ucp_Osage,
367	ucp_Tangut,
368	ucp_Nushu,
369	ucp_Soyombo,
370	ucp_Zanabazar_Square,
371	ucp_Makasar,
372	ucp_Medefaidrin,
373	ucp_Old_Sogdian,
374	ucp_Elymaic,
375	ucp_Nyiakeng_Puachue_Hmong,
376	ucp_Wancho,
377	ucp_Chorasmian,
378	ucp_Dives_Akuru,
379	ucp_Khitan_Small_Script,
380	ucp_Tangsa,
381	ucp_Toto,
382	ucp_Vithkuqi,
383
384	/ This must be last /
385	ucp_Script_Count
386	};
387
388	/ Size of entries in ucd_script_sets[] /
389
390	#define ucd_script_sets_item_size 3
391
392	#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
393
394	/ End of pcre2_ucp.h /
395

Browse the source code of Godot/thirdparty/pcre2/src/pcre2_ucp.h