ucp.h source code [ClickHouse/contrib/poco/Foundation/src/ucp.h]

1	/*************************************************
2	* Unicode Property Table handler *
3	*************************************************/
4
5	#ifndef _UCP_H
6	#define _UCP_H
7
8	/ This file contains definitions of the property values that are returned by*
9	the UCD access macros. New values that are added for new releases of Unicode
10	should always be at the end of each enum, for backwards compatibility.
11
12	IMPORTANT: Note also that the specific numeric values of the enums have to be
13	the same as the values that are generated by the maint/MultiStage2.py script,
14	where the equivalent property descriptive names are listed in vectors.
15
16	ALSO: The specific values of the first two enums are assumed for the table
17	called catposstab in pcre_compile.c. /*
18
19	/ These are the general character categories. /
20
21	enum {
22	ucp_C, / Other /
23	ucp_L, / Letter /
24	ucp_M, / Mark /
25	ucp_N, / Number /
26	ucp_P, / Punctuation /
27	ucp_S, / Symbol /
28	ucp_Z / Separator /
29	};
30
31	/ These are the particular character categories. /
32
33	enum {
34	ucp_Cc, / Control /
35	ucp_Cf, / Format /
36	ucp_Cn, / Unassigned /
37	ucp_Co, / Private use /
38	ucp_Cs, / Surrogate /
39	ucp_Ll, / Lower case letter /
40	ucp_Lm, / Modifier letter /
41	ucp_Lo, / Other letter /
42	ucp_Lt, / Title case letter /
43	ucp_Lu, / Upper case letter /
44	ucp_Mc, / Spacing mark /
45	ucp_Me, / Enclosing mark /
46	ucp_Mn, / Non-spacing mark /
47	ucp_Nd, / Decimal number /
48	ucp_Nl, / Letter number /
49	ucp_No, / Other number /
50	ucp_Pc, / Connector punctuation /
51	ucp_Pd, / Dash punctuation /
52	ucp_Pe, / Close punctuation /
53	ucp_Pf, / Final punctuation /
54	ucp_Pi, / Initial punctuation /
55	ucp_Po, / Other punctuation /
56	ucp_Ps, / Open punctuation /
57	ucp_Sc, / Currency symbol /
58	ucp_Sk, / Modifier symbol /
59	ucp_Sm, / Mathematical symbol /
60	ucp_So, / Other symbol /
61	ucp_Zl, / Line separator /
62	ucp_Zp, / Paragraph separator /
63	ucp_Zs / Space separator /
64	};
65
66	/ These are grapheme break properties. Note that the code for processing them*
67	assumes that the values are less than 16. If more values are added that take
68	the number to 16 or more, the code will have to be rewritten. /*
69
70	enum {
71	ucp_gbCR, / 0 /
72	ucp_gbLF, / 1 /
73	ucp_gbControl, / 2 /
74	ucp_gbExtend, / 3 /
75	ucp_gbPrepend, / 4 /
76	ucp_gbSpacingMark, / 5 /
77	ucp_gbL, / 6 Hangul syllable type L /
78	ucp_gbV, / 7 Hangul syllable type V /
79	ucp_gbT, / 8 Hangul syllable type T /
80	ucp_gbLV, / 9 Hangul syllable type LV /
81	ucp_gbLVT, / 10 Hangul syllable type LVT /
82	ucp_gbRegionalIndicator, / 11 /
83	ucp_gbOther / 12 /
84	};
85
86	/ These are the script identifications. /
87
88	enum {
89	ucp_Arabic,
90	ucp_Armenian,
91	ucp_Bengali,
92	ucp_Bopomofo,
93	ucp_Braille,
94	ucp_Buginese,
95	ucp_Buhid,
96	ucp_Canadian_Aboriginal,
97	ucp_Cherokee,
98	ucp_Common,
99	ucp_Coptic,
100	ucp_Cypriot,
101	ucp_Cyrillic,
102	ucp_Deseret,
103	ucp_Devanagari,
104	ucp_Ethiopic,
105	ucp_Georgian,
106	ucp_Glagolitic,
107	ucp_Gothic,
108	ucp_Greek,
109	ucp_Gujarati,
110	ucp_Gurmukhi,
111	ucp_Han,
112	ucp_Hangul,
113	ucp_Hanunoo,
114	ucp_Hebrew,
115	ucp_Hiragana,
116	ucp_Inherited,
117	ucp_Kannada,
118	ucp_Katakana,
119	ucp_Kharoshthi,
120	ucp_Khmer,
121	ucp_Lao,
122	ucp_Latin,
123	ucp_Limbu,
124	ucp_Linear_B,
125	ucp_Malayalam,
126	ucp_Mongolian,
127	ucp_Myanmar,
128	ucp_New_Tai_Lue,
129	ucp_Ogham,
130	ucp_Old_Italic,
131	ucp_Old_Persian,
132	ucp_Oriya,
133	ucp_Osmanya,
134	ucp_Runic,
135	ucp_Shavian,
136	ucp_Sinhala,
137	ucp_Syloti_Nagri,
138	ucp_Syriac,
139	ucp_Tagalog,
140	ucp_Tagbanwa,
141	ucp_Tai_Le,
142	ucp_Tamil,
143	ucp_Telugu,
144	ucp_Thaana,
145	ucp_Thai,
146	ucp_Tibetan,
147	ucp_Tifinagh,
148	ucp_Ugaritic,
149	ucp_Yi,
150	/ New for Unicode 5.0: /
151	ucp_Balinese,
152	ucp_Cuneiform,
153	ucp_Nko,
154	ucp_Phags_Pa,
155	ucp_Phoenician,
156	/ New for Unicode 5.1: /
157	ucp_Carian,
158	ucp_Cham,
159	ucp_Kayah_Li,
160	ucp_Lepcha,
161	ucp_Lycian,
162	ucp_Lydian,
163	ucp_Ol_Chiki,
164	ucp_Rejang,
165	ucp_Saurashtra,
166	ucp_Sundanese,
167	ucp_Vai,
168	/ New for Unicode 5.2: /
169	ucp_Avestan,
170	ucp_Bamum,
171	ucp_Egyptian_Hieroglyphs,
172	ucp_Imperial_Aramaic,
173	ucp_Inscriptional_Pahlavi,
174	ucp_Inscriptional_Parthian,
175	ucp_Javanese,
176	ucp_Kaithi,
177	ucp_Lisu,
178	ucp_Meetei_Mayek,
179	ucp_Old_South_Arabian,
180	ucp_Old_Turkic,
181	ucp_Samaritan,
182	ucp_Tai_Tham,
183	ucp_Tai_Viet,
184	/ New for Unicode 6.0.0: /
185	ucp_Batak,
186	ucp_Brahmi,
187	ucp_Mandaic,
188	/ New for Unicode 6.1.0: /
189	ucp_Chakma,
190	ucp_Meroitic_Cursive,
191	ucp_Meroitic_Hieroglyphs,
192	ucp_Miao,
193	ucp_Sharada,
194	ucp_Sora_Sompeng,
195	ucp_Takri,
196	/ New for Unicode 7.0.0: /
197	ucp_Bassa_Vah,
198	ucp_Caucasian_Albanian,
199	ucp_Duployan,
200	ucp_Elbasan,
201	ucp_Grantha,
202	ucp_Khojki,
203	ucp_Khudawadi,
204	ucp_Linear_A,
205	ucp_Mahajani,
206	ucp_Manichaean,
207	ucp_Mende_Kikakui,
208	ucp_Modi,
209	ucp_Mro,
210	ucp_Nabataean,
211	ucp_Old_North_Arabian,
212	ucp_Old_Permic,
213	ucp_Pahawh_Hmong,
214	ucp_Palmyrene,
215	ucp_Psalter_Pahlavi,
216	ucp_Pau_Cin_Hau,
217	ucp_Siddham,
218	ucp_Tirhuta,
219	ucp_Warang_Citi
220	};
221
222	#endif
223
224	/ End of ucp.h /
225

Browse the source code of ClickHouse/contrib/poco/Foundation/src/ucp.h