regex.cpp source code [ClickHouse/contrib/libcxx/src/regex.cpp]

1	//===-------------------------- regex.cpp ---------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "regex"
10	#include "algorithm"
11	#include "iterator"
12
13	_LIBCPP_BEGIN_NAMESPACE_STD
14
15	static
16	const char*
17	make_error_type_string(regex_constants::error_type ecode)
18	{
19	switch (ecode)
20	{
21	case regex_constants::error_collate:
22	return "The expression contained an invalid collating element name.";
23	case regex_constants::error_ctype:
24	return "The expression contained an invalid character class name.";
25	case regex_constants::error_escape:
26	return "The expression contained an invalid escaped character, or a "
27	"trailing escape.";
28	case regex_constants::error_backref:
29	return "The expression contained an invalid back reference.";
30	case regex_constants::error_brack:
31	return "The expression contained mismatched [ and ].";
32	case regex_constants::error_paren:
33	return "The expression contained mismatched ( and ).";
34	case regex_constants::error_brace:
35	return "The expression contained mismatched { and }.";
36	case regex_constants::error_badbrace:
37	return "The expression contained an invalid range in a {} expression.";
38	case regex_constants::error_range:
39	return "The expression contained an invalid character range, "
40	"such as [b-a] in most encodings.";
41	case regex_constants::error_space:
42	return "There was insufficient memory to convert the expression into "
43	"a finite state machine.";
44	case regex_constants::error_badrepeat:
45	return "One of *?+{ was not preceded by a valid regular expression.";
46	case regex_constants::error_complexity:
47	return "The complexity of an attempted match against a regular "
48	"expression exceeded a pre-set level.";
49	case regex_constants::error_stack:
50	return "There was insufficient memory to determine whether the regular "
51	"expression could match the specified character sequence.";
52	case regex_constants::__re_err_grammar:
53	return "An invalid regex grammar has been requested.";
54	case regex_constants::__re_err_empty:
55	return "An empty regex is not allowed in the POSIX grammar.";
56	case regex_constants::__re_err_parse:
57	return "The parser did not consume the entire regular expression.";
58	default:
59	break;
60	}
61	return "Unknown error type";
62	}
63
64	regex_error::regex_error(regex_constants::error_type ecode)
65	: runtime_error (make_error_type_string(ecode)),
66	__code_(ecode)
67	{}
68
69	regex_error::~regex_error() throw() {}
70
71	namespace {
72
73	struct collationnames
74	{
75	const char* elem_;
76	char char_;
77	};
78
79	const collationnames collatenames[] =
80	{
81	{"A", `0x41`},
82	{"B", `0x42`},
83	{"C", `0x43`},
84	{"D", `0x44`},
85	{"E", `0x45`},
86	{"F", `0x46`},
87	{"G", `0x47`},
88	{"H", `0x48`},
89	{"I", `0x49`},
90	{"J", `0x4a`},
91	{"K", `0x4b`},
92	{"L", `0x4c`},
93	{"M", `0x4d`},
94	{"N", `0x4e`},
95	{"NUL", `0x00`},
96	{"O", `0x4f`},
97	{"P", `0x50`},
98	{"Q", `0x51`},
99	{"R", `0x52`},
100	{"S", `0x53`},
101	{"T", `0x54`},
102	{"U", `0x55`},
103	{"V", `0x56`},
104	{"W", `0x57`},
105	{"X", `0x58`},
106	{"Y", `0x59`},
107	{"Z", `0x5a`},
108	{"a", `0x61`},
109	{"alert", `0x07`},
110	{"ampersand", `0x26`},
111	{"apostrophe", `0x27`},
112	{"asterisk", `0x2a`},
113	{"b", `0x62`},
114	{"backslash", `0x5c`},
115	{"backspace", `0x08`},
116	{"c", `0x63`},
117	{"carriage-return", `0x0d`},
118	{"circumflex", `0x5e`},
119	{"circumflex-accent", `0x5e`},
120	{"colon", `0x3a`},
121	{"comma", `0x2c`},
122	{"commercial-at", `0x40`},
123	{"d", `0x64`},
124	{"dollar-sign", `0x24`},
125	{"e", `0x65`},
126	{"eight", `0x38`},
127	{"equals-sign", `0x3d`},
128	{"exclamation-mark", `0x21`},
129	{"f", `0x66`},
130	{"five", `0x35`},
131	{"form-feed", `0x0c`},
132	{"four", `0x34`},
133	{"full-stop", `0x2e`},
134	{"g", `0x67`},
135	{"grave-accent", `0x60`},
136	{"greater-than-sign", `0x3e`},
137	{"h", `0x68`},
138	{"hyphen", `0x2d`},
139	{"hyphen-minus", `0x2d`},
140	{"i", `0x69`},
141	{"j", `0x6a`},
142	{"k", `0x6b`},
143	{"l", `0x6c`},
144	{"left-brace", `0x7b`},
145	{"left-curly-bracket", `0x7b`},
146	{"left-parenthesis", `0x28`},
147	{"left-square-bracket", `0x5b`},
148	{"less-than-sign", `0x3c`},
149	{"low-line", `0x5f`},
150	{"m", `0x6d`},
151	{"n", `0x6e`},
152	{"newline", `0x0a`},
153	{"nine", `0x39`},
154	{"number-sign", `0x23`},
155	{"o", `0x6f`},
156	{"one", `0x31`},
157	{"p", `0x70`},
158	{"percent-sign", `0x25`},
159	{"period", `0x2e`},
160	{"plus-sign", `0x2b`},
161	{"q", `0x71`},
162	{"question-mark", `0x3f`},
163	{"quotation-mark", `0x22`},
164	{"r", `0x72`},
165	{"reverse-solidus", `0x5c`},
166	{"right-brace", `0x7d`},
167	{"right-curly-bracket", `0x7d`},
168	{"right-parenthesis", `0x29`},
169	{"right-square-bracket", `0x5d`},
170	{"s", `0x73`},
171	{"semicolon", `0x3b`},
172	{"seven", `0x37`},
173	{"six", `0x36`},
174	{"slash", `0x2f`},
175	{"solidus", `0x2f`},
176	{"space", `0x20`},
177	{"t", `0x74`},
178	{"tab", `0x09`},
179	{"three", `0x33`},
180	{"tilde", `0x7e`},
181	{"two", `0x32`},
182	{"u", `0x75`},
183	{"underscore", `0x5f`},
184	{"v", `0x76`},
185	{"vertical-line", `0x7c`},
186	{"vertical-tab", `0x0b`},
187	{"w", `0x77`},
188	{"x", `0x78`},
189	{"y", `0x79`},
190	{"z", `0x7a`},
191	{"zero", `0x30`}
192	};
193
194	struct classnames
195	{
196	const char* elem_;
197	regex_traits<char>::char_class_type mask_;
198	};
199
200	const classnames ClassNames[] =
201	{
202	{"alnum", ctype_base::alnum},
203	{"alpha", ctype_base::alpha},
204	{"blank", ctype_base::blank},
205	{"cntrl", ctype_base::cntrl},
206	{"d", ctype_base::digit},
207	{"digit", ctype_base::digit},
208	{"graph", ctype_base::graph},
209	{"lower", ctype_base::lower},
210	{"print", ctype_base::print},
211	{"punct", ctype_base::punct},
212	{"s", ctype_base::space},
213	{"space", ctype_base::space},
214	{"upper", ctype_base::upper},
215	{"w", regex_traits<char>::__regex_word},
216	{"xdigit", ctype_base::xdigit}
217	};
218
219	struct use_strcmp
220	{
221	bool operator()(const collationnames& x, const char* y)
222	{return strcmp(x.elem_, y) < `0`;}
223	bool operator()(const classnames& x, const char* y)
224	{return strcmp(x.elem_, y) < `0`;}
225	};
226
227	}
228
229	string
230	__get_collation_name(const char* s)
231	{
232	const collationnames* i =
233	_VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp ());
234	string r;
235	if (i != end(collatenames) && strcmp(s, i->elem_) == `0`)
236	r = char(i->char_);
237	return r;
238	}
239
240	regex_traits<char>::char_class_type
241	__get_classname(const char* s, bool __icase)
242	{
243	const classnames* i =
244	_VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp ());
245	regex_traits<char>::char_class_type r = `0`;
246	if (i != end(ClassNames) && strcmp(s, i->elem_) == `0`)
247	{
248	r = i->mask_;
249	if (r == regex_traits<char>::__regex_word)
250	r \|= ctype_base::alnum \| ctype_base::upper \| ctype_base::lower;
251	else if (__icase)
252	{
253	if (r & (ctype_base::lower \| ctype_base::upper))
254	r \|= ctype_base::alpha;
255	}
256	}
257	return r;
258	}
259
260	template <>
261	void
262	__match_any_but_newline<char>::__exec(__state& __s) const
263	{
264	if (__s.__current_ != __s.__last_)
265	{
266	switch (*__s.__current_)
267	{
268	case `'\r'`:
269	case `'\n'`:
270	__s.__do_ = __state::__reject;
271	__s.__node_ = nullptr;
272	break;
273	default:
274	__s.__do_ = __state::__accept_and_consume;
275	++__s.__current_;
276	__s.__node_ = this->first();
277	break;
278	}
279	}
280	else
281	{
282	__s.__do_ = __state::__reject;
283	__s.__node_ = nullptr;
284	}
285	}
286
287	template <>
288	void
289	__match_any_but_newline<wchar_t>::__exec(__state& __s) const
290	{
291	if (__s.__current_ != __s.__last_)
292	{
293	switch (*__s.__current_)
294	{
295	case `'\r'`:
296	case `'\n'`:
297	case `0x2028`:
298	case `0x2029`:
299	__s.__do_ = __state::__reject;
300	__s.__node_ = nullptr;
301	break;
302	default:
303	__s.__do_ = __state::__accept_and_consume;
304	++__s.__current_;
305	__s.__node_ = this->first();
306	break;
307	}
308	}
309	else
310	{
311	__s.__do_ = __state::__reject;
312	__s.__node_ = nullptr;
313	}
314	}
315
316	_LIBCPP_END_NAMESPACE_STD
317

Browse the source code of ClickHouse/contrib/libcxx/src/regex.cpp