regex.cpp source code [engine/third_party/libcxx/src/regex.cpp]

1	//===-------------------------- regex.cpp ---------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "regex"
10	#include "algorithm"
11	#include "iterator"
12
13	_LIBCPP_BEGIN_NAMESPACE_STD
14
15	static
16	const char*
17	make_error_type_string(regex_constants::error_type ecode)
18	{
19	switch (ecode)
20	{
21	case regex_constants::error_collate:
22	return "The expression contained an invalid collating element name.";
23	case regex_constants::error_ctype:
24	return "The expression contained an invalid character class name.";
25	case regex_constants::error_escape:
26	return "The expression contained an invalid escaped character, or a "
27	"trailing escape.";
28	case regex_constants::error_backref:
29	return "The expression contained an invalid back reference.";
30	case regex_constants::error_brack:
31	return "The expression contained mismatched [ and ].";
32	case regex_constants::error_paren:
33	return "The expression contained mismatched ( and ).";
34	case regex_constants::error_brace:
35	return "The expression contained mismatched { and }.";
36	case regex_constants::error_badbrace:
37	return "The expression contained an invalid range in a {} expression.";
38	case regex_constants::error_range:
39	return "The expression contained an invalid character range, "
40	"such as [b-a] in most encodings.";
41	case regex_constants::error_space:
42	return "There was insufficient memory to convert the expression into "
43	"a finite state machine.";
44	case regex_constants::error_badrepeat:
45	return "One of *?+{ was not preceded by a valid regular expression.";
46	case regex_constants::error_complexity:
47	return "The complexity of an attempted match against a regular "
48	"expression exceeded a pre-set level.";
49	case regex_constants::error_stack:
50	return "There was insufficient memory to determine whether the regular "
51	"expression could match the specified character sequence.";
52	case regex_constants::__re_err_grammar:
53	return "An invalid regex grammar has been requested.";
54	case regex_constants::__re_err_empty:
55	return "An empty regex is not allowed in the POSIX grammar.";
56	default:
57	break;
58	}
59	return "Unknown error type";
60	}
61
62	regex_error::regex_error(regex_constants::error_type ecode)
63	: runtime_error (make_error_type_string(ecode)),
64	__code_(ecode)
65	{}
66
67	regex_error::~regex_error() throw() {}
68
69	namespace {
70
71	struct collationnames
72	{
73	const char* elem_;
74	char char_;
75	};
76
77	const collationnames collatenames[] =
78	{
79	{"A", `0x41`},
80	{"B", `0x42`},
81	{"C", `0x43`},
82	{"D", `0x44`},
83	{"E", `0x45`},
84	{"F", `0x46`},
85	{"G", `0x47`},
86	{"H", `0x48`},
87	{"I", `0x49`},
88	{"J", `0x4a`},
89	{"K", `0x4b`},
90	{"L", `0x4c`},
91	{"M", `0x4d`},
92	{"N", `0x4e`},
93	{"NUL", `0x00`},
94	{"O", `0x4f`},
95	{"P", `0x50`},
96	{"Q", `0x51`},
97	{"R", `0x52`},
98	{"S", `0x53`},
99	{"T", `0x54`},
100	{"U", `0x55`},
101	{"V", `0x56`},
102	{"W", `0x57`},
103	{"X", `0x58`},
104	{"Y", `0x59`},
105	{"Z", `0x5a`},
106	{"a", `0x61`},
107	{"alert", `0x07`},
108	{"ampersand", `0x26`},
109	{"apostrophe", `0x27`},
110	{"asterisk", `0x2a`},
111	{"b", `0x62`},
112	{"backslash", `0x5c`},
113	{"backspace", `0x08`},
114	{"c", `0x63`},
115	{"carriage-return", `0x0d`},
116	{"circumflex", `0x5e`},
117	{"circumflex-accent", `0x5e`},
118	{"colon", `0x3a`},
119	{"comma", `0x2c`},
120	{"commercial-at", `0x40`},
121	{"d", `0x64`},
122	{"dollar-sign", `0x24`},
123	{"e", `0x65`},
124	{"eight", `0x38`},
125	{"equals-sign", `0x3d`},
126	{"exclamation-mark", `0x21`},
127	{"f", `0x66`},
128	{"five", `0x35`},
129	{"form-feed", `0x0c`},
130	{"four", `0x34`},
131	{"full-stop", `0x2e`},
132	{"g", `0x67`},
133	{"grave-accent", `0x60`},
134	{"greater-than-sign", `0x3e`},
135	{"h", `0x68`},
136	{"hyphen", `0x2d`},
137	{"hyphen-minus", `0x2d`},
138	{"i", `0x69`},
139	{"j", `0x6a`},
140	{"k", `0x6b`},
141	{"l", `0x6c`},
142	{"left-brace", `0x7b`},
143	{"left-curly-bracket", `0x7b`},
144	{"left-parenthesis", `0x28`},
145	{"left-square-bracket", `0x5b`},
146	{"less-than-sign", `0x3c`},
147	{"low-line", `0x5f`},
148	{"m", `0x6d`},
149	{"n", `0x6e`},
150	{"newline", `0x0a`},
151	{"nine", `0x39`},
152	{"number-sign", `0x23`},
153	{"o", `0x6f`},
154	{"one", `0x31`},
155	{"p", `0x70`},
156	{"percent-sign", `0x25`},
157	{"period", `0x2e`},
158	{"plus-sign", `0x2b`},
159	{"q", `0x71`},
160	{"question-mark", `0x3f`},
161	{"quotation-mark", `0x22`},
162	{"r", `0x72`},
163	{"reverse-solidus", `0x5c`},
164	{"right-brace", `0x7d`},
165	{"right-curly-bracket", `0x7d`},
166	{"right-parenthesis", `0x29`},
167	{"right-square-bracket", `0x5d`},
168	{"s", `0x73`},
169	{"semicolon", `0x3b`},
170	{"seven", `0x37`},
171	{"six", `0x36`},
172	{"slash", `0x2f`},
173	{"solidus", `0x2f`},
174	{"space", `0x20`},
175	{"t", `0x74`},
176	{"tab", `0x09`},
177	{"three", `0x33`},
178	{"tilde", `0x7e`},
179	{"two", `0x32`},
180	{"u", `0x75`},
181	{"underscore", `0x5f`},
182	{"v", `0x76`},
183	{"vertical-line", `0x7c`},
184	{"vertical-tab", `0x0b`},
185	{"w", `0x77`},
186	{"x", `0x78`},
187	{"y", `0x79`},
188	{"z", `0x7a`},
189	{"zero", `0x30`}
190	};
191
192	struct classnames
193	{
194	const char* elem_;
195	regex_traits<char>::char_class_type mask_;
196	};
197
198	const classnames ClassNames[] =
199	{
200	{"alnum", ctype_base::alnum},
201	{"alpha", ctype_base::alpha},
202	{"blank", ctype_base::blank},
203	{"cntrl", ctype_base::cntrl},
204	{"d", ctype_base::digit},
205	{"digit", ctype_base::digit},
206	{"graph", ctype_base::graph},
207	{"lower", ctype_base::lower},
208	{"print", ctype_base::print},
209	{"punct", ctype_base::punct},
210	{"s", ctype_base::space},
211	{"space", ctype_base::space},
212	{"upper", ctype_base::upper},
213	{"w", regex_traits<char>::__regex_word},
214	{"xdigit", ctype_base::xdigit}
215	};
216
217	struct use_strcmp
218	{
219	bool operator()(const collationnames& x, const char* y)
220	{return strcmp(x.elem_, y) < `0`;}
221	bool operator()(const classnames& x, const char* y)
222	{return strcmp(x.elem_, y) < `0`;}
223	};
224
225	}
226
227	string
228	__get_collation_name(const char* s)
229	{
230	const collationnames* i =
231	_VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp ());
232	string r;
233	if (i != end(collatenames) && strcmp(s, i->elem_) == `0`)
234	r = char(i->char_);
235	return r;
236	}
237
238	regex_traits<char>::char_class_type
239	__get_classname(const char* s, bool __icase)
240	{
241	const classnames* i =
242	_VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp ());
243	regex_traits<char>::char_class_type r = `0`;
244	if (i != end(ClassNames) && strcmp(s, i->elem_) == `0`)
245	{
246	r = i->mask_;
247	if (r == regex_traits<char>::__regex_word)
248	r \|= ctype_base::alnum \| ctype_base::upper \| ctype_base::lower;
249	else if (__icase)
250	{
251	if (r & (ctype_base::lower \| ctype_base::upper))
252	r \|= ctype_base::alpha;
253	}
254	}
255	return r;
256	}
257
258	template <>
259	void
260	__match_any_but_newline<char>::__exec(__state& __s) const
261	{
262	if (__s.__current_ != __s.__last_)
263	{
264	switch (*__s.__current_)
265	{
266	case `'\r'`:
267	case `'\n'`:
268	__s.__do_ = __state::__reject;
269	__s.__node_ = nullptr;
270	break;
271	default:
272	__s.__do_ = __state::__accept_and_consume;
273	++__s.__current_;
274	__s.__node_ = this->first();
275	break;
276	}
277	}
278	else
279	{
280	__s.__do_ = __state::__reject;
281	__s.__node_ = nullptr;
282	}
283	}
284
285	template <>
286	void
287	__match_any_but_newline<wchar_t>::__exec(__state& __s) const
288	{
289	if (__s.__current_ != __s.__last_)
290	{
291	switch (*__s.__current_)
292	{
293	case `'\r'`:
294	case `'\n'`:
295	case `0x2028`:
296	case `0x2029`:
297	__s.__do_ = __state::__reject;
298	__s.__node_ = nullptr;
299	break;
300	default:
301	__s.__do_ = __state::__accept_and_consume;
302	++__s.__current_;
303	__s.__node_ = this->first();
304	break;
305	}
306	}
307	else
308	{
309	__s.__do_ = __state::__reject;
310	__s.__node_ = nullptr;
311	}
312	}
313
314	_LIBCPP_END_NAMESPACE_STD
315

Browse the source code of engine/third_party/libcxx/src/regex.cpp