1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 /// @cond undocumented
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT,
47 _RegexExecutorPolicy __policy,
48 bool __match_mode>
49 bool
50 __regex_algo_impl(_BiIter __s,
51 _BiIter __e,
52 match_results<_BiIter, _Alloc>& __m,
53 const basic_regex<_CharT, _TraitsT>& __re,
54 regex_constants::match_flag_type __flags)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60 __m._M_begin = __s;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_establish_failed_match(__e);
113 }
114 return __ret;
115 }
116 /// @endcond
117} // namespace __detail
118
119 /// @cond
120
121 template<typename _Ch_type>
122 template<typename _Fwd_iter>
123 typename regex_traits<_Ch_type>::string_type
124 regex_traits<_Ch_type>::
125 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126 {
127 typedef std::ctype<char_type> __ctype_type;
128 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129
130 static const char* __collatenames[] =
131 {
132 "NUL",
133 "SOH",
134 "STX",
135 "ETX",
136 "EOT",
137 "ENQ",
138 "ACK",
139 "alert",
140 "backspace",
141 "tab",
142 "newline",
143 "vertical-tab",
144 "form-feed",
145 "carriage-return",
146 "SO",
147 "SI",
148 "DLE",
149 "DC1",
150 "DC2",
151 "DC3",
152 "DC4",
153 "NAK",
154 "SYN",
155 "ETB",
156 "CAN",
157 "EM",
158 "SUB",
159 "ESC",
160 "IS4",
161 "IS3",
162 "IS2",
163 "IS1",
164 "space",
165 "exclamation-mark",
166 "quotation-mark",
167 "number-sign",
168 "dollar-sign",
169 "percent-sign",
170 "ampersand",
171 "apostrophe",
172 "left-parenthesis",
173 "right-parenthesis",
174 "asterisk",
175 "plus-sign",
176 "comma",
177 "hyphen",
178 "period",
179 "slash",
180 "zero",
181 "one",
182 "two",
183 "three",
184 "four",
185 "five",
186 "six",
187 "seven",
188 "eight",
189 "nine",
190 "colon",
191 "semicolon",
192 "less-than-sign",
193 "equals-sign",
194 "greater-than-sign",
195 "question-mark",
196 "commercial-at",
197 "A",
198 "B",
199 "C",
200 "D",
201 "E",
202 "F",
203 "G",
204 "H",
205 "I",
206 "J",
207 "K",
208 "L",
209 "M",
210 "N",
211 "O",
212 "P",
213 "Q",
214 "R",
215 "S",
216 "T",
217 "U",
218 "V",
219 "W",
220 "X",
221 "Y",
222 "Z",
223 "left-square-bracket",
224 "backslash",
225 "right-square-bracket",
226 "circumflex",
227 "underscore",
228 "grave-accent",
229 "a",
230 "b",
231 "c",
232 "d",
233 "e",
234 "f",
235 "g",
236 "h",
237 "i",
238 "j",
239 "k",
240 "l",
241 "m",
242 "n",
243 "o",
244 "p",
245 "q",
246 "r",
247 "s",
248 "t",
249 "u",
250 "v",
251 "w",
252 "x",
253 "y",
254 "z",
255 "left-curly-bracket",
256 "vertical-line",
257 "right-curly-bracket",
258 "tilde",
259 "DEL",
260 };
261
262 string __s;
263 for (; __first != __last; ++__first)
264 __s += __fctyp.narrow(*__first, 0);
265
266 for (const auto& __it : __collatenames)
267 if (__s == __it)
268 return string_type(1, __fctyp.widen(
269 static_cast<char>(&__it - __collatenames)));
270
271 // TODO Add digraph support:
272 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273
274 return string_type();
275 }
276
277 template<typename _Ch_type>
278 template<typename _Fwd_iter>
279 typename regex_traits<_Ch_type>::char_class_type
280 regex_traits<_Ch_type>::
281 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282 {
283 typedef std::ctype<char_type> __ctype_type;
284 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285
286 // Mappings from class name to class mask.
287 static const pair<const char*, char_class_type> __classnames[] =
288 {
289 {"d", ctype_base::digit},
290 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291 {"s", ctype_base::space},
292 {"alnum", ctype_base::alnum},
293 {"alpha", ctype_base::alpha},
294 {"blank", ctype_base::blank},
295 {"cntrl", ctype_base::cntrl},
296 {"digit", ctype_base::digit},
297 {"graph", ctype_base::graph},
298 {"lower", ctype_base::lower},
299 {"print", ctype_base::print},
300 {"punct", ctype_base::punct},
301 {"space", ctype_base::space},
302 {"upper", ctype_base::upper},
303 {"xdigit", ctype_base::xdigit},
304 };
305
306 string __s;
307 for (; __first != __last; ++__first)
308 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309
310 for (const auto& __it : __classnames)
311 if (__s == __it.first)
312 {
313 if (__icase
314 && ((__it.second
315 & (ctype_base::lower | ctype_base::upper)) != 0))
316 return ctype_base::alpha;
317 return __it.second;
318 }
319 return 0;
320 }
321
322 template<typename _Ch_type>
323 bool
324 regex_traits<_Ch_type>::
325 isctype(_Ch_type __c, char_class_type __f) const
326 {
327 typedef std::ctype<char_type> __ctype_type;
328 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329
330 return __fctyp.is(__f._M_base, __c)
331 // [[:w:]]
332 || ((__f._M_extended & _RegexMask::_S_under)
333 && __c == __fctyp.widen('_'));
334 }
335
336 template<typename _Ch_type>
337 int
338 regex_traits<_Ch_type>::
339 value(_Ch_type __ch, int __radix) const
340 {
341 std::basic_istringstream<char_type> __is(string_type(1, __ch));
342 long __v;
343 if (__radix == 8)
344 __is >> std::oct;
345 else if (__radix == 16)
346 __is >> std::hex;
347 __is >> __v;
348 return __is.fail() ? -1 : __v;
349 }
350
351 template<typename _Bi_iter, typename _Alloc>
352 template<typename _Out_iter>
353 _Out_iter
354 match_results<_Bi_iter, _Alloc>::
355 format(_Out_iter __out,
356 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358 match_flag_type __flags) const
359 {
360 __glibcxx_assert( ready() );
361 regex_traits<char_type> __traits;
362 typedef std::ctype<char_type> __ctype_type;
363 const __ctype_type&
364 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365
366 auto __output = [&](size_t __idx)
367 {
368 auto& __sub = (*this)[__idx];
369 if (__sub.matched)
370 __out = std::copy(__sub.first, __sub.second, __out);
371 };
372
373 if (__flags & regex_constants::format_sed)
374 {
375 bool __escaping = false;
376 for (; __fmt_first != __fmt_last; __fmt_first++)
377 {
378 if (__escaping)
379 {
380 __escaping = false;
381 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382 __output(__traits.value(*__fmt_first, 10));
383 else
384 *__out++ = *__fmt_first;
385 continue;
386 }
387 if (*__fmt_first == '\\')
388 {
389 __escaping = true;
390 continue;
391 }
392 if (*__fmt_first == '&')
393 {
394 __output(0);
395 continue;
396 }
397 *__out++ = *__fmt_first;
398 }
399 if (__escaping)
400 *__out++ = '\\';
401 }
402 else
403 {
404 while (1)
405 {
406 auto __next = std::find(__fmt_first, __fmt_last, '$');
407 if (__next == __fmt_last)
408 break;
409
410 __out = std::copy(__fmt_first, __next, __out);
411
412 auto __eat = [&](char __ch) -> bool
413 {
414 if (*__next == __ch)
415 {
416 ++__next;
417 return true;
418 }
419 return false;
420 };
421
422 if (++__next == __fmt_last)
423 *__out++ = '$';
424 else if (__eat('$'))
425 *__out++ = '$';
426 else if (__eat('&'))
427 __output(0);
428 else if (__eat('`'))
429 {
430 auto& __sub = _M_prefix();
431 if (__sub.matched)
432 __out = std::copy(__sub.first, __sub.second, __out);
433 }
434 else if (__eat('\''))
435 {
436 auto& __sub = _M_suffix();
437 if (__sub.matched)
438 __out = std::copy(__sub.first, __sub.second, __out);
439 }
440 else if (__fctyp.is(__ctype_type::digit, *__next))
441 {
442 long __num = __traits.value(*__next, 10);
443 if (++__next != __fmt_last
444 && __fctyp.is(__ctype_type::digit, *__next))
445 {
446 __num *= 10;
447 __num += __traits.value(*__next++, 10);
448 }
449 if (0 <= __num && __num < this->size())
450 __output(__num);
451 }
452 else
453 *__out++ = '$';
454 __fmt_first = __next;
455 }
456 __out = std::copy(__fmt_first, __fmt_last, __out);
457 }
458 return __out;
459 }
460
461 template<typename _Out_iter, typename _Bi_iter,
462 typename _Rx_traits, typename _Ch_type>
463 _Out_iter
464 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465 const basic_regex<_Ch_type, _Rx_traits>& __e,
466 const _Ch_type* __fmt,
467 regex_constants::match_flag_type __flags)
468 {
469 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470 _IterT __i(__first, __last, __e, __flags);
471 _IterT __end;
472 if (__i == __end)
473 {
474 if (!(__flags & regex_constants::format_no_copy))
475 __out = std::copy(__first, __last, __out);
476 }
477 else
478 {
479 sub_match<_Bi_iter> __last;
480 auto __len = char_traits<_Ch_type>::length(__fmt);
481 for (; __i != __end; ++__i)
482 {
483 if (!(__flags & regex_constants::format_no_copy))
484 __out = std::copy(__i->prefix().first, __i->prefix().second,
485 __out);
486 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
487 __last = __i->suffix();
488 if (__flags & regex_constants::format_first_only)
489 break;
490 }
491 if (!(__flags & regex_constants::format_no_copy))
492 __out = std::copy(__last.first, __last.second, __out);
493 }
494 return __out;
495 }
496
497 template<typename _Bi_iter,
498 typename _Ch_type,
499 typename _Rx_traits>
500 bool
501 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
502 operator==(const regex_iterator& __rhs) const noexcept
503 {
504 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
505 return true;
506 return _M_pregex == __rhs._M_pregex
507 && _M_begin == __rhs._M_begin
508 && _M_end == __rhs._M_end
509 && _M_flags == __rhs._M_flags
510 && _M_match[0] == __rhs._M_match[0];
511 }
512
513 template<typename _Bi_iter,
514 typename _Ch_type,
515 typename _Rx_traits>
516 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
517 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
518 operator++()
519 {
520 // In all cases in which the call to regex_search returns true,
521 // match.prefix().first shall be equal to the previous value of
522 // match[0].second, and for each index i in the half-open range
523 // [0, match.size()) for which match[i].matched is true,
524 // match[i].position() shall return distance(begin, match[i].first).
525 // [28.12.1.4.5]
526 if (_M_match[0].matched)
527 {
528 auto __start = _M_match[0].second;
529 auto __prefix_first = _M_match[0].second;
530 if (_M_match[0].first == _M_match[0].second)
531 {
532 if (__start == _M_end)
533 {
534 _M_pregex = nullptr;
535 return *this;
536 }
537 else
538 {
539 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
540 _M_flags
541 | regex_constants::match_not_null
542 | regex_constants::match_continuous))
543 {
544 __glibcxx_assert(_M_match[0].matched);
545 auto& __prefix = _M_match._M_prefix();
546 __prefix.first = __prefix_first;
547 __prefix.matched = __prefix.first != __prefix.second;
548 // [28.12.1.4.5]
549 _M_match._M_begin = _M_begin;
550 return *this;
551 }
552 else
553 ++__start;
554 }
555 }
556 _M_flags |= regex_constants::match_prev_avail;
557 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
558 {
559 __glibcxx_assert(_M_match[0].matched);
560 auto& __prefix = _M_match._M_prefix();
561 __prefix.first = __prefix_first;
562 __prefix.matched = __prefix.first != __prefix.second;
563 // [28.12.1.4.5]
564 _M_match._M_begin = _M_begin;
565 }
566 else
567 _M_pregex = nullptr;
568 }
569 return *this;
570 }
571
572 template<typename _Bi_iter,
573 typename _Ch_type,
574 typename _Rx_traits>
575 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
576 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
577 operator=(const regex_token_iterator& __rhs)
578 {
579 _M_position = __rhs._M_position;
580 _M_subs = __rhs._M_subs;
581 _M_n = __rhs._M_n;
582 _M_suffix = __rhs._M_suffix;
583 _M_has_m1 = __rhs._M_has_m1;
584 _M_normalize_result();
585 return *this;
586 }
587
588 template<typename _Bi_iter,
589 typename _Ch_type,
590 typename _Rx_traits>
591 bool
592 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
593 operator==(const regex_token_iterator& __rhs) const
594 {
595 if (_M_end_of_seq() && __rhs._M_end_of_seq())
596 return true;
597 if (_M_suffix.matched && __rhs._M_suffix.matched
598 && _M_suffix == __rhs._M_suffix)
599 return true;
600 if (_M_end_of_seq() || _M_suffix.matched
601 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
602 return false;
603 return _M_position == __rhs._M_position
604 && _M_n == __rhs._M_n
605 && _M_subs == __rhs._M_subs;
606 }
607
608 template<typename _Bi_iter,
609 typename _Ch_type,
610 typename _Rx_traits>
611 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
613 operator++()
614 {
615 _Position __prev = _M_position;
616 if (_M_suffix.matched)
617 *this = regex_token_iterator();
618 else if (_M_n + 1 < _M_subs.size())
619 {
620 _M_n++;
621 _M_result = &_M_current_match();
622 }
623 else
624 {
625 _M_n = 0;
626 ++_M_position;
627 if (_M_position != _Position())
628 _M_result = &_M_current_match();
629 else if (_M_has_m1 && __prev->suffix().length() != 0)
630 {
631 _M_suffix.matched = true;
632 _M_suffix.first = __prev->suffix().first;
633 _M_suffix.second = __prev->suffix().second;
634 _M_result = &_M_suffix;
635 }
636 else
637 *this = regex_token_iterator();
638 }
639 return *this;
640 }
641
642 template<typename _Bi_iter,
643 typename _Ch_type,
644 typename _Rx_traits>
645 void
646 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
647 _M_init(_Bi_iter __a, _Bi_iter __b)
648 {
649 _M_has_m1 = false;
650 for (auto __it : _M_subs)
651 if (__it == -1)
652 {
653 _M_has_m1 = true;
654 break;
655 }
656 if (_M_position != _Position())
657 _M_result = &_M_current_match();
658 else if (_M_has_m1)
659 {
660 _M_suffix.matched = true;
661 _M_suffix.first = __a;
662 _M_suffix.second = __b;
663 _M_result = &_M_suffix;
664 }
665 else
666 _M_result = nullptr;
667 }
668
669 /// @endcond
670
671_GLIBCXX_END_NAMESPACE_VERSION
672} // namespace
673