pcre_xclass.c source code [ClickHouse/contrib/poco/Foundation/src/pcre_xclass.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Copyright (c) 1997-2013 University of Cambridge
10
11	-----------------------------------------------------------------------------
12	Redistribution and use in source and binary forms, with or without
13	modification, are permitted provided that the following conditions are met:
14
15	* Redistributions of source code must retain the above copyright notice,
16	this list of conditions and the following disclaimer.
17
18	* Redistributions in binary form must reproduce the above copyright
19	notice, this list of conditions and the following disclaimer in the
20	documentation and/or other materials provided with the distribution.
21
22	* Neither the name of the University of Cambridge nor the names of its
23	contributors may be used to endorse or promote products derived from
24	this software without specific prior written permission.
25
26	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36	POSSIBILITY OF SUCH DAMAGE.
37	-----------------------------------------------------------------------------
38	*/
39
40
41	/ This module contains an internal function that is used to match an extended*
42	class. It is used by both pcre_exec() and pcre_def_exec(). /*
43
44	#include "pcre_config.h"
45	#include "pcre_internal.h"
46
47
48	/*************************************************
49	* Match character against an XCLASS *
50	*************************************************/
51
52	/ This function is called to match a character against an extended class that*
53	might contain values > 255 and/or Unicode properties.
54
55	Arguments:
56	c the character
57	data points to the flag byte of the XCLASS data
58
59	Returns: TRUE if character matches, else FALSE
60	*/
61
62	BOOL
63	PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
64	{
65	pcre_uchar t;
66	BOOL negated = (*data & XCL_NOT) != `0`;
67
68	(void)utf;
69	#ifdef COMPILE_PCRE8
70	/ In 8 bit mode, this must always be TRUE. Help the compiler to know that. /
71	utf = TRUE;
72	#endif
73
74	/ Character values < 256 are matched against a bitmap, if one is present. If*
75	not, we still carry on, because there may be ranges that start below 256 in the
76	additional data. /*
77
78	if (c < `256`)
79	{
80	if ((*data & XCL_HASPROP) == `0`)
81	{
82	if ((data & XCL_MAP) == `0`) return* negated;
83	return (((pcre_uint8 *)(data + `1`))[c/`8`] & (`1` << (c&`7`))) != `0`;
84	}
85	if ((*data & XCL_MAP) != `0` &&
86	(((pcre_uint8 *)(data + `1`))[c/`8`] & (`1` << (c&`7`))) != `0`)
87	return !negated; / char found /
88	}
89
90	/ First skip the bit map if present. Then match against the list of Unicode*
91	properties or large chars or ranges that end with a large char. We won't ever
92	encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. /*
93
94	if ((data++ & XCL_MAP) != `0`) data += `32` / sizeof*(pcre_uchar);
95
96	while ((t = *data++) != XCL_END)
97	{
98	pcre_uint32 x, y;
99	if (t == XCL_SINGLE)
100	{
101	#ifdef SUPPORT_UTF
102	if (utf)
103	{
104	GETCHARINC(x, data); / macro generates multiple statements /
105	}
106	else
107	#endif
108	x = *data++;
109	if (c == x) return !negated;
110	}
111	else if (t == XCL_RANGE)
112	{
113	#ifdef SUPPORT_UTF
114	if (utf)
115	{
116	GETCHARINC(x, data); / macro generates multiple statements /
117	GETCHARINC(y, data); / macro generates multiple statements /
118	}
119	else
120	#endif
121	{
122	x = *data++;
123	y = *data++;
124	}
125	if (c >= x && c <= y) return !negated;
126	}
127
128	#ifdef SUPPORT_UCP
129	else / XCL_PROP & XCL_NOTPROP /
130	{
131	const ucd_record *prop = GET_UCD(c);
132	BOOL isprop = t == XCL_PROP;
133
134	switch(*data)
135	{
136	case PT_ANY:
137	if (isprop) return !negated;
138	break;
139
140	case PT_LAMP:
141	if ((prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
142	prop->chartype == ucp_Lt) == isprop) return !negated;
143	break;
144
145	case PT_GC:
146	if ((data[`1`] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
147	return !negated;
148	break;
149
150	case PT_PC:
151	if ((data[`1`] == prop->chartype) == isprop) return !negated;
152	break;
153
154	case PT_SC:
155	if ((data[`1`] == prop->script) == isprop) return !negated;
156	break;
157
158	case PT_ALNUM:
159	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
160	PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
161	return !negated;
162	break;
163
164	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
165	which means that Perl space and POSIX space are now identical. PCRE
166	was changed at release 8.34. /*
167
168	case PT_SPACE: / Perl space /
169	case PT_PXSPACE: / POSIX space /
170	switch(c)
171	{
172	HSPACE_CASES:
173	VSPACE_CASES:
174	if (isprop) return !negated;
175	break;
176
177	default:
178	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
179	return !negated;
180	break;
181	}
182	break;
183
184	case PT_WORD:
185	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
186	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\| c == CHAR_UNDERSCORE)
187	== isprop)
188	return !negated;
189	break;
190
191	case PT_UCNC:
192	if (c < `0xa0`)
193	{
194	if ((c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
195	c == CHAR_GRAVE_ACCENT) == isprop)
196	return !negated;
197	}
198	else
199	{
200	if ((c < `0xd800` \|\| c > `0xdfff`) == isprop)
201	return !negated;
202	}
203	break;
204
205	/ The following three properties can occur only in an XCLASS, as there*
206	is no \p or \P coding for them. /*
207
208	/ Graphic character. Implement this as not Z (space or separator) and*
209	not C (other), except for Cf (format) with a few exceptions. This seems
210	to be what Perl does. The exceptional characters are:
211
212	U+061C Arabic Letter Mark
213	U+180E Mongolian Vowel Separator
214	U+2066 - U+2069 Various "isolate"s
215	*/
216
217	case PT_PXGRAPH:
218	if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
219	(PRIV(ucp_gentype)[prop->chartype] != ucp_C \|\|
220	(prop->chartype == ucp_Cf &&
221	c != `0x061c` && c != `0x180e` && (c < `0x2066` \|\| c > `0x2069`))
222	)) == isprop)
223	return !negated;
224	break;
225
226	/ Printable character: same as graphic, with the addition of Zs, i.e.*
227	not Zl and not Zp, and U+180E. /*
228
229	case PT_PXPRINT:
230	if ((prop->chartype != ucp_Zl &&
231	prop->chartype != ucp_Zp &&
232	(PRIV(ucp_gentype)[prop->chartype] != ucp_C \|\|
233	(prop->chartype == ucp_Cf &&
234	c != `0x061c` && (c < `0x2066` \|\| c > `0x2069`))
235	)) == isprop)
236	return !negated;
237	break;
238
239	/ Punctuation: all Unicode punctuation, plus ASCII characters that*
240	Unicode treats as symbols rather than punctuation, for Perl
241	compatibility (these are $+<=>^`\|~). /*
242
243	case PT_PXPUNCT:
244	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P \|\|
245	(c < `128` && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
246	return !negated;
247	break;
248
249	/ This should never occur, but compilers may mutter if there is no*
250	default. /*
251
252	default:
253	return FALSE;
254	}
255
256	data += `2`;
257	}
258	#endif /* SUPPORT_UCP */
259	}
260
261	return negated; / char did not match /
262	}
263
264	/ End of pcre_xclass.c /
265

Browse the source code of ClickHouse/contrib/poco/Foundation/src/pcre_xclass.c