1/*
2 *
3 * Copyright (c) 1998-2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE: cregex.cpp
15 * VERSION: see <boost/version.hpp>
16 * DESCRIPTION: Implements high level class boost::RexEx
17 */
18
19
20#define BOOST_REGEX_SOURCE
21
22#include <boost/regex.hpp>
23#include <boost/cregex.hpp>
24#if !defined(BOOST_NO_STD_STRING)
25#include <map>
26#include <list>
27#include <boost/regex/v4/fileiter.hpp>
28typedef boost::match_flag_type match_flag_type;
29#include <cstdio>
30
31#ifdef BOOST_MSVC
32#pragma warning(disable:4309)
33#endif
34#ifdef BOOST_INTEL
35#pragma warning(disable:981 383)
36#endif
37
38namespace boost{
39
40#ifdef __BORLANDC__
41#if __BORLANDC__ < 0x530
42//
43// we need to instantiate the vector classes we use
44// since declaring a reference to type doesn't seem to
45// do the job...
46std::vector<std::size_t> inst1;
47std::vector<std::string> inst2;
48#endif
49#endif
50
51namespace{
52
53template <class iterator>
54std::string to_string(iterator i, iterator j)
55{
56 std::string s;
57 while(i != j)
58 {
59 s.append(1, *i);
60 ++i;
61 }
62 return s;
63}
64
65inline std::string to_string(const char* i, const char* j)
66{
67 return std::string(i, j);
68}
69
70}
71namespace BOOST_REGEX_DETAIL_NS{
72
73class RegExData
74{
75public:
76 enum type
77 {
78 type_pc,
79 type_pf,
80 type_copy
81 };
82 regex e;
83 cmatch m;
84#ifndef BOOST_REGEX_NO_FILEITER
85 match_results<mapfile::iterator> fm;
86#endif
87 type t;
88 const char* pbase;
89#ifndef BOOST_REGEX_NO_FILEITER
90 mapfile::iterator fbase;
91#endif
92 std::map<int, std::string, std::less<int> > strings;
93 std::map<int, std::ptrdiff_t, std::less<int> > positions;
94 void update();
95 void clean();
96 RegExData() : e(), m(),
97#ifndef BOOST_REGEX_NO_FILEITER
98 fm(),
99#endif
100 t(type_copy), pbase(0),
101#ifndef BOOST_REGEX_NO_FILEITER
102 fbase(),
103#endif
104 strings(), positions() {}
105};
106
107void RegExData::update()
108{
109 strings.erase(strings.begin(), strings.end());
110 positions.erase(positions.begin(), positions.end());
111 if(t == type_pc)
112 {
113 for(unsigned int i = 0; i < m.size(); ++i)
114 {
115 if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second);
116 positions[i] = m[i].matched ? m[i].first - pbase : -1;
117 }
118 }
119#ifndef BOOST_REGEX_NO_FILEITER
120 else
121 {
122 for(unsigned int i = 0; i < fm.size(); ++i)
123 {
124 if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second);
125 positions[i] = fm[i].matched ? fm[i].first - fbase : -1;
126 }
127 }
128#endif
129 t = type_copy;
130}
131
132void RegExData::clean()
133{
134#ifndef BOOST_REGEX_NO_FILEITER
135 fbase = mapfile::iterator();
136 fm = match_results<mapfile::iterator>();
137#endif
138}
139
140} // namespace
141
142RegEx::RegEx()
143{
144 pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
145}
146
147RegEx::RegEx(const RegEx& o)
148{
149 pdata = new BOOST_REGEX_DETAIL_NS::RegExData(*(o.pdata));
150}
151
152RegEx::~RegEx()
153{
154 delete pdata;
155}
156
157RegEx::RegEx(const char* c, bool icase)
158{
159 pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
160 SetExpression(c, icase);
161}
162
163RegEx::RegEx(const std::string& s, bool icase)
164{
165 pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
166 SetExpression(s.c_str(), icase);
167}
168
169RegEx& RegEx::operator=(const RegEx& o)
170{
171 *pdata = *(o.pdata);
172 return *this;
173}
174
175RegEx& RegEx::operator=(const char* p)
176{
177 SetExpression(p, false);
178 return *this;
179}
180
181unsigned int RegEx::SetExpression(const char* p, bool icase)
182{
183 boost::uint_fast32_t f = icase ? regex::normal | regex::icase : regex::normal;
184 return pdata->e.set_expression(p, f);
185}
186
187unsigned int RegEx::error_code()const
188{
189 return pdata->e.error_code();
190}
191
192
193std::string RegEx::Expression()const
194{
195 return pdata->e.expression();
196}
197
198//
199// now matching operators:
200//
201bool RegEx::Match(const char* p, match_flag_type flags)
202{
203 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
204 pdata->pbase = p;
205 const char* end = p;
206 while(*end)++end;
207
208 if(regex_match(p, end, pdata->m, pdata->e, flags))
209 {
210 pdata->update();
211 return true;
212 }
213 return false;
214}
215
216bool RegEx::Search(const char* p, match_flag_type flags)
217{
218 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
219 pdata->pbase = p;
220 const char* end = p;
221 while(*end)++end;
222
223 if(regex_search(p, end, pdata->m, pdata->e, flags))
224 {
225 pdata->update();
226 return true;
227 }
228 return false;
229}
230namespace BOOST_REGEX_DETAIL_NS{
231struct pred1
232{
233 GrepCallback cb;
234 RegEx* pe;
235 pred1(GrepCallback c, RegEx* i) : cb(c), pe(i) {}
236 bool operator()(const cmatch& m)
237 {
238 pe->pdata->m = m;
239 return cb(*pe);
240 }
241};
242}
243unsigned int RegEx::Grep(GrepCallback cb, const char* p, match_flag_type flags)
244{
245 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
246 pdata->pbase = p;
247 const char* end = p;
248 while(*end)++end;
249
250 unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred1(cb, this), p, end, pdata->e, flags);
251 if(result)
252 pdata->update();
253 return result;
254}
255namespace BOOST_REGEX_DETAIL_NS{
256struct pred2
257{
258 std::vector<std::string>& v;
259 RegEx* pe;
260 pred2(std::vector<std::string>& o, RegEx* e) : v(o), pe(e) {}
261 bool operator()(const cmatch& m)
262 {
263 pe->pdata->m = m;
264 v.push_back(std::string(m[0].first, m[0].second));
265 return true;
266 }
267private:
268 pred2& operator=(const pred2&);
269};
270}
271
272unsigned int RegEx::Grep(std::vector<std::string>& v, const char* p, match_flag_type flags)
273{
274 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
275 pdata->pbase = p;
276 const char* end = p;
277 while(*end)++end;
278
279 unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred2(v, this), p, end, pdata->e, flags);
280 if(result)
281 pdata->update();
282 return result;
283}
284namespace BOOST_REGEX_DETAIL_NS{
285struct pred3
286{
287 std::vector<std::size_t>& v;
288 const char* base;
289 RegEx* pe;
290 pred3(std::vector<std::size_t>& o, const char* pb, RegEx* p) : v(o), base(pb), pe(p) {}
291 bool operator()(const cmatch& m)
292 {
293 pe->pdata->m = m;
294 v.push_back(static_cast<std::size_t>(m[0].first - base));
295 return true;
296 }
297private:
298 pred3& operator=(const pred3&);
299};
300}
301unsigned int RegEx::Grep(std::vector<std::size_t>& v, const char* p, match_flag_type flags)
302{
303 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
304 pdata->pbase = p;
305 const char* end = p;
306 while(*end)++end;
307
308 unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred3(v, p, this), p, end, pdata->e, flags);
309 if(result)
310 pdata->update();
311 return result;
312}
313#ifndef BOOST_REGEX_NO_FILEITER
314namespace BOOST_REGEX_DETAIL_NS{
315struct pred4
316{
317 GrepFileCallback cb;
318 RegEx* pe;
319 const char* file;
320 bool ok;
321 pred4(GrepFileCallback c, RegEx* i, const char* f) : cb(c), pe(i), file(f), ok(true) {}
322 bool operator()(const match_results<mapfile::iterator>& m)
323 {
324 pe->pdata->t = RegExData::type_pf;
325 pe->pdata->fm = m;
326 pe->pdata->update();
327 ok = cb(file, *pe);
328 return ok;
329 }
330};
331}
332namespace{
333void BuildFileList(std::list<std::string>* pl, const char* files, bool recurse)
334{
335 file_iterator start(files);
336 file_iterator end;
337 if(recurse)
338 {
339 // go through sub directories:
340 char buf[MAX_PATH];
341 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, start.root()));
342 if(*buf == 0)
343 {
344 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, "."));
345 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
346 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*"));
347 }
348 else
349 {
350 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
351 BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*"));
352 }
353 directory_iterator dstart(buf);
354 directory_iterator dend;
355
356 // now get the file mask bit of "files":
357 const char* ptr = files;
358 while(*ptr) ++ptr;
359 while((ptr != files) && (*ptr != *directory_iterator::separator()) && (*ptr != '/'))--ptr;
360 if(ptr != files) ++ptr;
361
362 while(dstart != dend)
363 {
364 // Verify that sprintf will not overflow:
365 if(std::strlen(dstart.path()) + std::strlen(directory_iterator::separator()) + std::strlen(ptr) >= MAX_PATH)
366 {
367 // Oops overflow, skip this item:
368 ++dstart;
369 continue;
370 }
371#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
372 int r = (::sprintf_s)(buf, sizeof(buf), "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
373#else
374 int r = (std::sprintf)(buf, "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
375#endif
376 if(r < 0)
377 {
378 // sprintf failed, skip this item:
379 ++dstart;
380 continue;
381 }
382 BuildFileList(pl, buf, recurse);
383 ++dstart;
384 }
385 }
386 while(start != end)
387 {
388 pl->push_back(*start);
389 ++start;
390 }
391}
392}
393
394unsigned int RegEx::GrepFiles(GrepFileCallback cb, const char* files, bool recurse, match_flag_type flags)
395{
396 unsigned int result = 0;
397 std::list<std::string> file_list;
398 BuildFileList(&file_list, files, recurse);
399 std::list<std::string>::iterator start, end;
400 start = file_list.begin();
401 end = file_list.end();
402
403 while(start != end)
404 {
405 mapfile map((*start).c_str());
406 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf;
407 pdata->fbase = map.begin();
408 BOOST_REGEX_DETAIL_NS::pred4 pred(cb, this, (*start).c_str());
409 int r = regex_grep(pred, map.begin(), map.end(), pdata->e, flags);
410 result += r;
411 ++start;
412 pdata->clean();
413 if(pred.ok == false)
414 return result;
415 }
416
417 return result;
418}
419
420
421unsigned int RegEx::FindFiles(FindFilesCallback cb, const char* files, bool recurse, match_flag_type flags)
422{
423 unsigned int result = 0;
424 std::list<std::string> file_list;
425 BuildFileList(&file_list, files, recurse);
426 std::list<std::string>::iterator start, end;
427 start = file_list.begin();
428 end = file_list.end();
429
430 while(start != end)
431 {
432 mapfile map((*start).c_str());
433 pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf;
434 pdata->fbase = map.begin();
435
436 if(regex_search(map.begin(), map.end(), pdata->fm, pdata->e, flags))
437 {
438 ++result;
439 if(false == cb((*start).c_str()))
440 return result;
441 }
442 //pdata->update();
443 ++start;
444 //pdata->clean();
445 }
446
447 return result;
448}
449#endif
450
451#ifdef BOOST_REGEX_V3
452#define regex_replace regex_merge
453#endif
454
455std::string RegEx::Merge(const std::string& in, const std::string& fmt,
456 bool copy, match_flag_type flags)
457{
458 std::string result;
459 BOOST_REGEX_DETAIL_NS::string_out_iterator<std::string> i(result);
460 if(!copy) flags |= format_no_copy;
461 regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags);
462 return result;
463}
464
465std::string RegEx::Merge(const char* in, const char* fmt,
466 bool copy, match_flag_type flags)
467{
468 std::string result;
469 if(!copy) flags |= format_no_copy;
470 BOOST_REGEX_DETAIL_NS::string_out_iterator<std::string> i(result);
471 regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags);
472 return result;
473}
474
475std::size_t RegEx::Split(std::vector<std::string>& v,
476 std::string& s,
477 match_flag_type flags,
478 unsigned max_count)
479{
480 return regex_split(std::back_inserter(v), s, pdata->e, flags, max_count);
481}
482
483
484
485//
486// now operators for returning what matched in more detail:
487//
488std::size_t RegEx::Position(int i)const
489{
490 switch(pdata->t)
491 {
492 case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
493 return pdata->m[i].matched ? pdata->m[i].first - pdata->pbase : RegEx::npos;
494 case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
495#ifndef BOOST_REGEX_NO_FILEITER
496 return pdata->fm[i].matched ? pdata->fm[i].first - pdata->fbase : RegEx::npos;
497#endif
498 case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
499 {
500 std::map<int, std::ptrdiff_t, std::less<int> >::iterator pos = pdata->positions.find(i);
501 if(pos == pdata->positions.end())
502 return RegEx::npos;
503 return (*pos).second;
504 }
505 }
506 return RegEx::npos;
507}
508
509std::size_t RegEx::Marks()const
510{
511 return pdata->e.mark_count();
512}
513
514
515std::size_t RegEx::Length(int i)const
516{
517 switch(pdata->t)
518 {
519 case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
520 return pdata->m[i].matched ? pdata->m[i].second - pdata->m[i].first : RegEx::npos;
521 case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
522#ifndef BOOST_REGEX_NO_FILEITER
523 return pdata->fm[i].matched ? pdata->fm[i].second - pdata->fm[i].first : RegEx::npos;
524#endif
525 case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
526 {
527 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
528 if(pos == pdata->strings.end())
529 return RegEx::npos;
530 return (*pos).second.size();
531 }
532 }
533 return RegEx::npos;
534}
535
536bool RegEx::Matched(int i)const
537{
538 switch(pdata->t)
539 {
540 case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
541 return pdata->m[i].matched;
542 case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
543#ifndef BOOST_REGEX_NO_FILEITER
544 return pdata->fm[i].matched;
545#endif
546 case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
547 {
548 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
549 if(pos == pdata->strings.end())
550 return false;
551 return true;
552 }
553 }
554 return false;
555}
556
557
558std::string RegEx::What(int i)const
559{
560 std::string result;
561 switch(pdata->t)
562 {
563 case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
564 if(pdata->m[i].matched)
565 result.assign(pdata->m[i].first, pdata->m[i].second);
566 break;
567 case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
568 if(pdata->m[i].matched)
569 result.assign(to_string(pdata->m[i].first, pdata->m[i].second));
570 break;
571 case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
572 {
573 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
574 if(pos != pdata->strings.end())
575 result = (*pos).second;
576 break;
577 }
578 }
579 return result;
580}
581
582const std::size_t RegEx::npos = ~static_cast<std::size_t>(0);
583
584} // namespace boost
585
586#if defined(__BORLANDC__) && (__BORLANDC__ >= 0x550) && (__BORLANDC__ <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE)
587//
588// this is an ugly hack to work around an ugly problem:
589// by default this file will produce unresolved externals during
590// linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug).
591// However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate
592// copies of basic_string's static data in the RTL and this DLL, this messes
593// with basic_string's memory management and results in run-time crashes,
594// Oh sweet joy of Catch 22....
595//
596namespace std{
597template<> template<>
598basic_string<char>& BOOST_REGEX_DECL
599basic_string<char>::replace<const char*>(char* f1, char* f2, const char* i1, const char* i2)
600{
601 unsigned insert_pos = f1 - begin();
602 unsigned remove_len = f2 - f1;
603 unsigned insert_len = i2 - i1;
604 unsigned org_size = size();
605 if(insert_len > remove_len)
606 {
607 append(insert_len-remove_len, ' ');
608 std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
609 std::copy(i1, i2, begin() + insert_pos);
610 }
611 else
612 {
613 std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
614 std::copy(i1, i2, begin() + insert_pos);
615 erase(size() + insert_len - remove_len);
616 }
617 return *this;
618}
619template<> template<>
620basic_string<wchar_t>& BOOST_REGEX_DECL
621basic_string<wchar_t>::replace<const wchar_t*>(wchar_t* f1, wchar_t* f2, const wchar_t* i1, const wchar_t* i2)
622{
623 unsigned insert_pos = f1 - begin();
624 unsigned remove_len = f2 - f1;
625 unsigned insert_len = i2 - i1;
626 unsigned org_size = size();
627 if(insert_len > remove_len)
628 {
629 append(insert_len-remove_len, ' ');
630 std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
631 std::copy(i1, i2, begin() + insert_pos);
632 }
633 else
634 {
635 std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
636 std::copy(i1, i2, begin() + insert_pos);
637 erase(size() + insert_len - remove_len);
638 }
639 return *this;
640}
641} // namespace std
642#endif
643
644#endif
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661