1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2017 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33namespace __detail
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 // Result of merging regex_match and regex_search.
38 //
39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40 // the other one if possible, for test purpose).
41 //
42 // That __match_mode is true means regex_match, else regex_search.
43 template<typename _BiIter, typename _Alloc,
44 typename _CharT, typename _TraitsT,
45 _RegexExecutorPolicy __policy,
46 bool __match_mode>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
52 regex_constants::match_flag_type __flags)
53 {
54 if (__re._M_automaton == nullptr)
55 return false;
56
57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58 __m._M_begin = __s;
59 __m._M_resize(__re._M_automaton->_M_sub_count());
60 for (auto& __it : __res)
61 __it.matched = false;
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_resize(0);
113 for (auto& __it : __res)
114 {
115 __it.matched = false;
116 __it.first = __it.second = __e;
117 }
118 }
119 return __ret;
120 }
121
122_GLIBCXX_END_NAMESPACE_VERSION
123}
124
125_GLIBCXX_BEGIN_NAMESPACE_VERSION
126
127 template<typename _Ch_type>
128 template<typename _Fwd_iter>
129 typename regex_traits<_Ch_type>::string_type
130 regex_traits<_Ch_type>::
131 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132 {
133 typedef std::ctype<char_type> __ctype_type;
134 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
135
136 static const char* __collatenames[] =
137 {
138 "NUL",
139 "SOH",
140 "STX",
141 "ETX",
142 "EOT",
143 "ENQ",
144 "ACK",
145 "alert",
146 "backspace",
147 "tab",
148 "newline",
149 "vertical-tab",
150 "form-feed",
151 "carriage-return",
152 "SO",
153 "SI",
154 "DLE",
155 "DC1",
156 "DC2",
157 "DC3",
158 "DC4",
159 "NAK",
160 "SYN",
161 "ETB",
162 "CAN",
163 "EM",
164 "SUB",
165 "ESC",
166 "IS4",
167 "IS3",
168 "IS2",
169 "IS1",
170 "space",
171 "exclamation-mark",
172 "quotation-mark",
173 "number-sign",
174 "dollar-sign",
175 "percent-sign",
176 "ampersand",
177 "apostrophe",
178 "left-parenthesis",
179 "right-parenthesis",
180 "asterisk",
181 "plus-sign",
182 "comma",
183 "hyphen",
184 "period",
185 "slash",
186 "zero",
187 "one",
188 "two",
189 "three",
190 "four",
191 "five",
192 "six",
193 "seven",
194 "eight",
195 "nine",
196 "colon",
197 "semicolon",
198 "less-than-sign",
199 "equals-sign",
200 "greater-than-sign",
201 "question-mark",
202 "commercial-at",
203 "A",
204 "B",
205 "C",
206 "D",
207 "E",
208 "F",
209 "G",
210 "H",
211 "I",
212 "J",
213 "K",
214 "L",
215 "M",
216 "N",
217 "O",
218 "P",
219 "Q",
220 "R",
221 "S",
222 "T",
223 "U",
224 "V",
225 "W",
226 "X",
227 "Y",
228 "Z",
229 "left-square-bracket",
230 "backslash",
231 "right-square-bracket",
232 "circumflex",
233 "underscore",
234 "grave-accent",
235 "a",
236 "b",
237 "c",
238 "d",
239 "e",
240 "f",
241 "g",
242 "h",
243 "i",
244 "j",
245 "k",
246 "l",
247 "m",
248 "n",
249 "o",
250 "p",
251 "q",
252 "r",
253 "s",
254 "t",
255 "u",
256 "v",
257 "w",
258 "x",
259 "y",
260 "z",
261 "left-curly-bracket",
262 "vertical-line",
263 "right-curly-bracket",
264 "tilde",
265 "DEL",
266 };
267
268 string __s;
269 for (; __first != __last; ++__first)
270 __s += __fctyp.narrow(*__first, 0);
271
272 for (const auto& __it : __collatenames)
273 if (__s == __it)
274 return string_type(1, __fctyp.widen(
275 static_cast<char>(&__it - __collatenames)));
276
277 // TODO Add digraph support:
278 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
279
280 return string_type();
281 }
282
283 template<typename _Ch_type>
284 template<typename _Fwd_iter>
285 typename regex_traits<_Ch_type>::char_class_type
286 regex_traits<_Ch_type>::
287 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288 {
289 typedef std::ctype<char_type> __ctype_type;
290 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
291
292 // Mappings from class name to class mask.
293 static const pair<const char*, char_class_type> __classnames[] =
294 {
295 {"d", ctype_base::digit},
296 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
297 {"s", ctype_base::space},
298 {"alnum", ctype_base::alnum},
299 {"alpha", ctype_base::alpha},
300 {"blank", ctype_base::blank},
301 {"cntrl", ctype_base::cntrl},
302 {"digit", ctype_base::digit},
303 {"graph", ctype_base::graph},
304 {"lower", ctype_base::lower},
305 {"print", ctype_base::print},
306 {"punct", ctype_base::punct},
307 {"space", ctype_base::space},
308 {"upper", ctype_base::upper},
309 {"xdigit", ctype_base::xdigit},
310 };
311
312 string __s;
313 for (; __first != __last; ++__first)
314 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
315
316 for (const auto& __it : __classnames)
317 if (__s == __it.first)
318 {
319 if (__icase
320 && ((__it.second
321 & (ctype_base::lower | ctype_base::upper)) != 0))
322 return ctype_base::alpha;
323 return __it.second;
324 }
325 return 0;
326 }
327
328 template<typename _Ch_type>
329 bool
330 regex_traits<_Ch_type>::
331 isctype(_Ch_type __c, char_class_type __f) const
332 {
333 typedef std::ctype<char_type> __ctype_type;
334 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
335
336 return __fctyp.is(__f._M_base, __c)
337 // [[:w:]]
338 || ((__f._M_extended & _RegexMask::_S_under)
339 && __c == __fctyp.widen('_'));
340 }
341
342 template<typename _Ch_type>
343 int
344 regex_traits<_Ch_type>::
345 value(_Ch_type __ch, int __radix) const
346 {
347 std::basic_istringstream<char_type> __is(string_type(1, __ch));
348 long __v;
349 if (__radix == 8)
350 __is >> std::oct;
351 else if (__radix == 16)
352 __is >> std::hex;
353 __is >> __v;
354 return __is.fail() ? -1 : __v;
355 }
356
357 template<typename _Bi_iter, typename _Alloc>
358 template<typename _Out_iter>
359 _Out_iter match_results<_Bi_iter, _Alloc>::
360 format(_Out_iter __out,
361 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363 match_flag_type __flags) const
364 {
365 __glibcxx_assert( ready() );
366 regex_traits<char_type> __traits;
367 typedef std::ctype<char_type> __ctype_type;
368 const __ctype_type&
369 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
370
371 auto __output = [&](size_t __idx)
372 {
373 auto& __sub = (*this)[__idx];
374 if (__sub.matched)
375 __out = std::copy(__sub.first, __sub.second, __out);
376 };
377
378 if (__flags & regex_constants::format_sed)
379 {
380 for (; __fmt_first != __fmt_last;)
381 if (*__fmt_first == '&')
382 {
383 __output(0);
384 ++__fmt_first;
385 }
386 else if (*__fmt_first == '\\')
387 {
388 if (++__fmt_first != __fmt_last
389 && __fctyp.is(__ctype_type::digit, *__fmt_first))
390 __output(__traits.value(*__fmt_first++, 10));
391 else
392 *__out++ = '\\';
393 }
394 else
395 *__out++ = *__fmt_first++;
396 }
397 else
398 {
399 while (1)
400 {
401 auto __next = std::find(__fmt_first, __fmt_last, '$');
402 if (__next == __fmt_last)
403 break;
404
405 __out = std::copy(__fmt_first, __next, __out);
406
407 auto __eat = [&](char __ch) -> bool
408 {
409 if (*__next == __ch)
410 {
411 ++__next;
412 return true;
413 }
414 return false;
415 };
416
417 if (++__next == __fmt_last)
418 *__out++ = '$';
419 else if (__eat('$'))
420 *__out++ = '$';
421 else if (__eat('&'))
422 __output(0);
423 else if (__eat('`'))
424 {
425 auto& __sub = _M_prefix();
426 if (__sub.matched)
427 __out = std::copy(__sub.first, __sub.second, __out);
428 }
429 else if (__eat('\''))
430 {
431 auto& __sub = _M_suffix();
432 if (__sub.matched)
433 __out = std::copy(__sub.first, __sub.second, __out);
434 }
435 else if (__fctyp.is(__ctype_type::digit, *__next))
436 {
437 long __num = __traits.value(*__next, 10);
438 if (++__next != __fmt_last
439 && __fctyp.is(__ctype_type::digit, *__next))
440 {
441 __num *= 10;
442 __num += __traits.value(*__next++, 10);
443 }
444 if (0 <= __num && __num < this->size())
445 __output(__num);
446 }
447 else
448 *__out++ = '$';
449 __fmt_first = __next;
450 }
451 __out = std::copy(__fmt_first, __fmt_last, __out);
452 }
453 return __out;
454 }
455
456 template<typename _Out_iter, typename _Bi_iter,
457 typename _Rx_traits, typename _Ch_type>
458 _Out_iter
459 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
460 const basic_regex<_Ch_type, _Rx_traits>& __e,
461 const _Ch_type* __fmt,
462 regex_constants::match_flag_type __flags)
463 {
464 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
465 _IterT __i(__first, __last, __e, __flags);
466 _IterT __end;
467 if (__i == __end)
468 {
469 if (!(__flags & regex_constants::format_no_copy))
470 __out = std::copy(__first, __last, __out);
471 }
472 else
473 {
474 sub_match<_Bi_iter> __last;
475 auto __len = char_traits<_Ch_type>::length(__fmt);
476 for (; __i != __end; ++__i)
477 {
478 if (!(__flags & regex_constants::format_no_copy))
479 __out = std::copy(__i->prefix().first, __i->prefix().second,
480 __out);
481 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482 __last = __i->suffix();
483 if (__flags & regex_constants::format_first_only)
484 break;
485 }
486 if (!(__flags & regex_constants::format_no_copy))
487 __out = std::copy(__last.first, __last.second, __out);
488 }
489 return __out;
490 }
491
492 template<typename _Bi_iter,
493 typename _Ch_type,
494 typename _Rx_traits>
495 bool
496 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
497 operator==(const regex_iterator& __rhs) const
498 {
499 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
500 return true;
501 return _M_pregex == __rhs._M_pregex
502 && _M_begin == __rhs._M_begin
503 && _M_end == __rhs._M_end
504 && _M_flags == __rhs._M_flags
505 && _M_match[0] == __rhs._M_match[0];
506 }
507
508 template<typename _Bi_iter,
509 typename _Ch_type,
510 typename _Rx_traits>
511 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
512 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
513 operator++()
514 {
515 // In all cases in which the call to regex_search returns true,
516 // match.prefix().first shall be equal to the previous value of
517 // match[0].second, and for each index i in the half-open range
518 // [0, match.size()) for which match[i].matched is true,
519 // match[i].position() shall return distance(begin, match[i].first).
520 // [28.12.1.4.5]
521 if (_M_match[0].matched)
522 {
523 auto __start = _M_match[0].second;
524 auto __prefix_first = _M_match[0].second;
525 if (_M_match[0].first == _M_match[0].second)
526 {
527 if (__start == _M_end)
528 {
529 _M_pregex = nullptr;
530 return *this;
531 }
532 else
533 {
534 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
535 _M_flags
536 | regex_constants::match_not_null
537 | regex_constants::match_continuous))
538 {
539 __glibcxx_assert(_M_match[0].matched);
540 auto& __prefix = _M_match._M_prefix();
541 __prefix.first = __prefix_first;
542 __prefix.matched = __prefix.first != __prefix.second;
543 // [28.12.1.4.5]
544 _M_match._M_begin = _M_begin;
545 return *this;
546 }
547 else
548 ++__start;
549 }
550 }
551 _M_flags |= regex_constants::match_prev_avail;
552 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
553 {
554 __glibcxx_assert(_M_match[0].matched);
555 auto& __prefix = _M_match._M_prefix();
556 __prefix.first = __prefix_first;
557 __prefix.matched = __prefix.first != __prefix.second;
558 // [28.12.1.4.5]
559 _M_match._M_begin = _M_begin;
560 }
561 else
562 _M_pregex = nullptr;
563 }
564 return *this;
565 }
566
567 template<typename _Bi_iter,
568 typename _Ch_type,
569 typename _Rx_traits>
570 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
571 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
572 operator=(const regex_token_iterator& __rhs)
573 {
574 _M_position = __rhs._M_position;
575 _M_subs = __rhs._M_subs;
576 _M_n = __rhs._M_n;
577 _M_suffix = __rhs._M_suffix;
578 _M_has_m1 = __rhs._M_has_m1;
579 _M_normalize_result();
580 return *this;
581 }
582
583 template<typename _Bi_iter,
584 typename _Ch_type,
585 typename _Rx_traits>
586 bool
587 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
588 operator==(const regex_token_iterator& __rhs) const
589 {
590 if (_M_end_of_seq() && __rhs._M_end_of_seq())
591 return true;
592 if (_M_suffix.matched && __rhs._M_suffix.matched
593 && _M_suffix == __rhs._M_suffix)
594 return true;
595 if (_M_end_of_seq() || _M_suffix.matched
596 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
597 return false;
598 return _M_position == __rhs._M_position
599 && _M_n == __rhs._M_n
600 && _M_subs == __rhs._M_subs;
601 }
602
603 template<typename _Bi_iter,
604 typename _Ch_type,
605 typename _Rx_traits>
606 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
607 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
608 operator++()
609 {
610 _Position __prev = _M_position;
611 if (_M_suffix.matched)
612 *this = regex_token_iterator();
613 else if (_M_n + 1 < _M_subs.size())
614 {
615 _M_n++;
616 _M_result = &_M_current_match();
617 }
618 else
619 {
620 _M_n = 0;
621 ++_M_position;
622 if (_M_position != _Position())
623 _M_result = &_M_current_match();
624 else if (_M_has_m1 && __prev->suffix().length() != 0)
625 {
626 _M_suffix.matched = true;
627 _M_suffix.first = __prev->suffix().first;
628 _M_suffix.second = __prev->suffix().second;
629 _M_result = &_M_suffix;
630 }
631 else
632 *this = regex_token_iterator();
633 }
634 return *this;
635 }
636
637 template<typename _Bi_iter,
638 typename _Ch_type,
639 typename _Rx_traits>
640 void
641 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
642 _M_init(_Bi_iter __a, _Bi_iter __b)
643 {
644 _M_has_m1 = false;
645 for (auto __it : _M_subs)
646 if (__it == -1)
647 {
648 _M_has_m1 = true;
649 break;
650 }
651 if (_M_position != _Position())
652 _M_result = &_M_current_match();
653 else if (_M_has_m1)
654 {
655 _M_suffix.matched = true;
656 _M_suffix.first = __a;
657 _M_suffix.second = __b;
658 _M_result = &_M_suffix;
659 }
660 else
661 _M_result = nullptr;
662 }
663
664_GLIBCXX_END_NAMESPACE_VERSION
665} // namespace
666
667