1// class template regex -*- C++ -*-
2
3// Copyright (C) 2010-2017 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34_GLIBCXX_BEGIN_NAMESPACE_CXX11
35
36 template<typename>
37 class regex_traits;
38
39_GLIBCXX_END_NAMESPACE_CXX11
40_GLIBCXX_END_NAMESPACE_VERSION
41
42namespace __detail
43{
44_GLIBCXX_BEGIN_NAMESPACE_VERSION
45
46 /**
47 * @addtogroup regex-detail
48 * @{
49 */
50
51 template<typename, bool, bool>
52 struct _BracketMatcher;
53
54 /**
55 * @brief Builds an NFA from an input iterator range.
56 *
57 * The %_TraitsT type should fulfill requirements [28.3].
58 */
59 template<typename _TraitsT>
60 class _Compiler
61 {
62 public:
63 typedef typename _TraitsT::char_type _CharT;
64 typedef const _CharT* _IterT;
65 typedef _NFA<_TraitsT> _RegexT;
66 typedef regex_constants::syntax_option_type _FlagT;
67
68 _Compiler(_IterT __b, _IterT __e,
69 const typename _TraitsT::locale_type& __traits, _FlagT __flags);
70
71 shared_ptr<const _RegexT>
72 _M_get_nfa()
73 { return std::move(_M_nfa); }
74
75 private:
76 typedef _Scanner<_CharT> _ScannerT;
77 typedef typename _TraitsT::string_type _StringT;
78 typedef typename _ScannerT::_TokenT _TokenT;
79 typedef _StateSeq<_TraitsT> _StateSeqT;
80 typedef std::stack<_StateSeqT> _StackT;
81 typedef std::ctype<_CharT> _CtypeT;
82
83 // accepts a specific token or returns false.
84 bool
85 _M_match_token(_TokenT __token);
86
87 void
88 _M_disjunction();
89
90 void
91 _M_alternative();
92
93 bool
94 _M_term();
95
96 bool
97 _M_assertion();
98
99 bool
100 _M_quantifier();
101
102 bool
103 _M_atom();
104
105 bool
106 _M_bracket_expression();
107
108 template<bool __icase, bool __collate>
109 void
110 _M_insert_any_matcher_ecma();
111
112 template<bool __icase, bool __collate>
113 void
114 _M_insert_any_matcher_posix();
115
116 template<bool __icase, bool __collate>
117 void
118 _M_insert_char_matcher();
119
120 template<bool __icase, bool __collate>
121 void
122 _M_insert_character_class_matcher();
123
124 template<bool __icase, bool __collate>
125 void
126 _M_insert_bracket_matcher(bool __neg);
127
128 // Returns true if successfully matched one term and should continue.
129 // Returns false if the compiler should move on.
130 template<bool __icase, bool __collate>
131 bool
132 _M_expression_term(pair<bool, _CharT>& __last_char,
133 _BracketMatcher<_TraitsT, __icase, __collate>&
134 __matcher);
135
136 int
137 _M_cur_int_value(int __radix);
138
139 bool
140 _M_try_char();
141
142 _StateSeqT
143 _M_pop()
144 {
145 auto ret = _M_stack.top();
146 _M_stack.pop();
147 return ret;
148 }
149
150 _FlagT _M_flags;
151 _ScannerT _M_scanner;
152 shared_ptr<_RegexT> _M_nfa;
153 _StringT _M_value;
154 _StackT _M_stack;
155 const _TraitsT& _M_traits;
156 const _CtypeT& _M_ctype;
157 };
158
159 template<typename _Tp>
160 struct __has_contiguous_iter : std::false_type { };
161
162 template<typename _Ch, typename _Tr, typename _Alloc>
163 struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
164 : std::true_type
165 { };
166
167 template<typename _Tp, typename _Alloc>
168 struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
169 : std::true_type
170 { };
171
172 template<typename _Tp>
173 struct __is_contiguous_normal_iter : std::false_type { };
174
175 template<typename _CharT>
176 struct __is_contiguous_normal_iter<_CharT*> : std::true_type { };
177
178 template<typename _Tp, typename _Cont>
179 struct
180 __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
181 : __has_contiguous_iter<_Cont>::type
182 { };
183
184 template<typename _Iter, typename _TraitsT>
185 using __enable_if_contiguous_normal_iter
186 = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
187 std::shared_ptr<const _NFA<_TraitsT>> >::type;
188
189 template<typename _Iter, typename _TraitsT>
190 using __disable_if_contiguous_normal_iter
191 = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
192 std::shared_ptr<const _NFA<_TraitsT>> >::type;
193
194 template<typename _FwdIter, typename _TraitsT>
195 inline __enable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
196 __compile_nfa(_FwdIter __first, _FwdIter __last,
197 const typename _TraitsT::locale_type& __loc,
198 regex_constants::syntax_option_type __flags)
199 {
200 size_t __len = __last - __first;
201 const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
202 using _Cmplr = _Compiler<_TraitsT>;
203 return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa();
204 }
205
206 template<typename _FwdIter, typename _TraitsT>
207 inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
208 __compile_nfa(_FwdIter __first, _FwdIter __last,
209 const typename _TraitsT::locale_type& __loc,
210 regex_constants::syntax_option_type __flags)
211 {
212 using char_type = typename _TraitsT::char_type;
213 const basic_string<char_type> __str(__first, __last);
214 return __compile_nfa<const char_type*, _TraitsT>(__str.data(),
215 __str.data() + __str.size(), __loc, __flags);
216 }
217
218 // [28.13.14]
219 template<typename _TraitsT, bool __icase, bool __collate>
220 class _RegexTranslatorBase
221 {
222 public:
223 typedef typename _TraitsT::char_type _CharT;
224 typedef typename _TraitsT::string_type _StringT;
225 typedef _StringT _StrTransT;
226
227 explicit
228 _RegexTranslatorBase(const _TraitsT& __traits)
229 : _M_traits(__traits)
230 { }
231
232 _CharT
233 _M_translate(_CharT __ch) const
234 {
235 if (__icase)
236 return _M_traits.translate_nocase(__ch);
237 else if (__collate)
238 return _M_traits.translate(__ch);
239 else
240 return __ch;
241 }
242
243 _StrTransT
244 _M_transform(_CharT __ch) const
245 {
246 _StrTransT __str(1, __ch);
247 return _M_traits.transform(__str.begin(), __str.end());
248 }
249
250 // See LWG 523. It's not efficiently implementable when _TraitsT is not
251 // std::regex_traits<>, and __collate is true. See specializations for
252 // implementations of other cases.
253 bool
254 _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
255 const _StrTransT& __s) const
256 { return __first <= __s && __s <= __last; }
257
258 protected:
259 bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
260 {
261 typedef std::ctype<_CharT> __ctype_type;
262 const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
263 auto __lower = __fctyp.tolower(__ch);
264 auto __upper = __fctyp.toupper(__ch);
265 return (__first <= __lower && __lower <= __last)
266 || (__first <= __upper && __upper <= __last);
267 }
268
269 const _TraitsT& _M_traits;
270 };
271
272 template<typename _TraitsT, bool __icase, bool __collate>
273 class _RegexTranslator
274 : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
275 {
276 public:
277 typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
278 using _Base::_Base;
279 };
280
281 template<typename _TraitsT, bool __icase>
282 class _RegexTranslator<_TraitsT, __icase, false>
283 : public _RegexTranslatorBase<_TraitsT, __icase, false>
284 {
285 public:
286 typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
287 typedef typename _Base::_CharT _CharT;
288 typedef _CharT _StrTransT;
289
290 using _Base::_Base;
291
292 _StrTransT
293 _M_transform(_CharT __ch) const
294 { return __ch; }
295
296 bool
297 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
298 {
299 if (!__icase)
300 return __first <= __ch && __ch <= __last;
301 return this->_M_in_range_icase(__first, __last, __ch);
302 }
303 };
304
305 template<typename _CharType>
306 class _RegexTranslator<std::regex_traits<_CharType>, true, true>
307 : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
308 {
309 public:
310 typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
311 _Base;
312 typedef typename _Base::_CharT _CharT;
313 typedef typename _Base::_StrTransT _StrTransT;
314
315 using _Base::_Base;
316
317 bool
318 _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
319 const _StrTransT& __str) const
320 {
321 __glibcxx_assert(__first.size() == 1);
322 __glibcxx_assert(__last.size() == 1);
323 __glibcxx_assert(__str.size() == 1);
324 return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
325 }
326 };
327
328 template<typename _TraitsT>
329 class _RegexTranslator<_TraitsT, false, false>
330 {
331 public:
332 typedef typename _TraitsT::char_type _CharT;
333 typedef _CharT _StrTransT;
334
335 explicit
336 _RegexTranslator(const _TraitsT&)
337 { }
338
339 _CharT
340 _M_translate(_CharT __ch) const
341 { return __ch; }
342
343 _StrTransT
344 _M_transform(_CharT __ch) const
345 { return __ch; }
346
347 bool
348 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
349 { return __first <= __ch && __ch <= __last; }
350 };
351
352 template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
353 struct _AnyMatcher;
354
355 template<typename _TraitsT, bool __icase, bool __collate>
356 struct _AnyMatcher<_TraitsT, false, __icase, __collate>
357 {
358 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
359 typedef typename _TransT::_CharT _CharT;
360
361 explicit
362 _AnyMatcher(const _TraitsT& __traits)
363 : _M_translator(__traits)
364 { }
365
366 bool
367 operator()(_CharT __ch) const
368 {
369 static auto __nul = _M_translator._M_translate('\0');
370 return _M_translator._M_translate(__ch) != __nul;
371 }
372
373 _TransT _M_translator;
374 };
375
376 template<typename _TraitsT, bool __icase, bool __collate>
377 struct _AnyMatcher<_TraitsT, true, __icase, __collate>
378 {
379 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
380 typedef typename _TransT::_CharT _CharT;
381
382 explicit
383 _AnyMatcher(const _TraitsT& __traits)
384 : _M_translator(__traits)
385 { }
386
387 bool
388 operator()(_CharT __ch) const
389 { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
390
391 bool
392 _M_apply(_CharT __ch, true_type) const
393 {
394 auto __c = _M_translator._M_translate(__ch);
395 auto __n = _M_translator._M_translate('\n');
396 auto __r = _M_translator._M_translate('\r');
397 return __c != __n && __c != __r;
398 }
399
400 bool
401 _M_apply(_CharT __ch, false_type) const
402 {
403 auto __c = _M_translator._M_translate(__ch);
404 auto __n = _M_translator._M_translate('\n');
405 auto __r = _M_translator._M_translate('\r');
406 auto __u2028 = _M_translator._M_translate(u'\u2028');
407 auto __u2029 = _M_translator._M_translate(u'\u2029');
408 return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
409 }
410
411 _TransT _M_translator;
412 };
413
414 template<typename _TraitsT, bool __icase, bool __collate>
415 struct _CharMatcher
416 {
417 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
418 typedef typename _TransT::_CharT _CharT;
419
420 _CharMatcher(_CharT __ch, const _TraitsT& __traits)
421 : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
422 { }
423
424 bool
425 operator()(_CharT __ch) const
426 { return _M_ch == _M_translator._M_translate(__ch); }
427
428 _TransT _M_translator;
429 _CharT _M_ch;
430 };
431
432 /// Matches a character range (bracket expression)
433 template<typename _TraitsT, bool __icase, bool __collate>
434 struct _BracketMatcher
435 {
436 public:
437 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
438 typedef typename _TransT::_CharT _CharT;
439 typedef typename _TransT::_StrTransT _StrTransT;
440 typedef typename _TraitsT::string_type _StringT;
441 typedef typename _TraitsT::char_class_type _CharClassT;
442
443 public:
444 _BracketMatcher(bool __is_non_matching,
445 const _TraitsT& __traits)
446 : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
447 _M_is_non_matching(__is_non_matching)
448 { }
449
450 bool
451 operator()(_CharT __ch) const
452 {
453 _GLIBCXX_DEBUG_ASSERT(_M_is_ready);
454 return _M_apply(__ch, _UseCache());
455 }
456
457 void
458 _M_add_char(_CharT __c)
459 {
460 _M_char_set.push_back(_M_translator._M_translate(__c));
461 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
462 }
463
464 _StringT
465 _M_add_collate_element(const _StringT& __s)
466 {
467 auto __st = _M_traits.lookup_collatename(__s.data(),
468 __s.data() + __s.size());
469 if (__st.empty())
470 __throw_regex_error(regex_constants::error_collate,
471 "Invalid collate element.");
472 _M_char_set.push_back(_M_translator._M_translate(__st[0]));
473 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
474 return __st;
475 }
476
477 void
478 _M_add_equivalence_class(const _StringT& __s)
479 {
480 auto __st = _M_traits.lookup_collatename(__s.data(),
481 __s.data() + __s.size());
482 if (__st.empty())
483 __throw_regex_error(regex_constants::error_collate,
484 "Invalid equivalence class.");
485 __st = _M_traits.transform_primary(__st.data(),
486 __st.data() + __st.size());
487 _M_equiv_set.push_back(__st);
488 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
489 }
490
491 // __neg should be true for \D, \S and \W only.
492 void
493 _M_add_character_class(const _StringT& __s, bool __neg)
494 {
495 auto __mask = _M_traits.lookup_classname(__s.data(),
496 __s.data() + __s.size(),
497 __icase);
498 if (__mask == 0)
499 __throw_regex_error(regex_constants::error_collate,
500 "Invalid character class.");
501 if (!__neg)
502 _M_class_set |= __mask;
503 else
504 _M_neg_class_set.push_back(__mask);
505 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
506 }
507
508 void
509 _M_make_range(_CharT __l, _CharT __r)
510 {
511 if (__l > __r)
512 __throw_regex_error(regex_constants::error_range,
513 "Invalid range in bracket expression.");
514 _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
515 _M_translator._M_transform(__r)));
516 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
517 }
518
519 void
520 _M_ready()
521 {
522 std::sort(_M_char_set.begin(), _M_char_set.end());
523 auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
524 _M_char_set.erase(__end, _M_char_set.end());
525 _M_make_cache(_UseCache());
526 _GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
527 }
528
529 private:
530 // Currently we only use the cache for char
531 typedef typename std::is_same<_CharT, char>::type _UseCache;
532
533 static constexpr size_t
534 _S_cache_size()
535 {
536 return 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
537 }
538
539 struct _Dummy { };
540 typedef typename std::conditional<_UseCache::value,
541 std::bitset<_S_cache_size()>,
542 _Dummy>::type _CacheT;
543 typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT;
544
545 bool
546 _M_apply(_CharT __ch, false_type) const;
547
548 bool
549 _M_apply(_CharT __ch, true_type) const
550 { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
551
552 void
553 _M_make_cache(true_type)
554 {
555 for (unsigned __i = 0; __i < _M_cache.size(); __i++)
556 _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
557 }
558
559 void
560 _M_make_cache(false_type)
561 { }
562
563 private:
564 std::vector<_CharT> _M_char_set;
565 std::vector<_StringT> _M_equiv_set;
566 std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
567 std::vector<_CharClassT> _M_neg_class_set;
568 _CharClassT _M_class_set;
569 _TransT _M_translator;
570 const _TraitsT& _M_traits;
571 bool _M_is_non_matching;
572 _CacheT _M_cache;
573#ifdef _GLIBCXX_DEBUG
574 bool _M_is_ready = false;
575#endif
576 };
577
578 //@} regex-detail
579_GLIBCXX_END_NAMESPACE_VERSION
580} // namespace __detail
581} // namespace std
582
583#include <bits/regex_compiler.tcc>
584