1 | #include <Common/parseGlobs.h> |
2 | #include <re2/re2.h> |
3 | #include <re2/stringpiece.h> |
4 | #include <algorithm> |
5 | #include <sstream> |
6 | |
7 | namespace DB |
8 | { |
9 | /* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library fo matching |
10 | * with such steps: |
11 | * 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3), |
12 | * 2) search and replace "*" and "?". |
13 | * Before each search need to escape symbols that we would not search. |
14 | * |
15 | * There are few examples in unit tests. |
16 | */ |
17 | std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs) |
18 | { |
19 | std::ostringstream oss_for_escaping; |
20 | /// Escaping only characters that not used in glob syntax |
21 | for (const auto & letter : initial_str_with_globs) |
22 | { |
23 | if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')')) |
24 | oss_for_escaping << '\\'; |
25 | oss_for_escaping << letter; |
26 | } |
27 | std::string escaped_with_globs = oss_for_escaping.str(); |
28 | |
29 | static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})" ); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*, |
30 | re2::StringPiece input(escaped_with_globs); |
31 | re2::StringPiece matched; |
32 | std::ostringstream oss_for_replacing; |
33 | size_t current_index = 0; |
34 | while (RE2::FindAndConsume(&input, enum_or_range, &matched)) |
35 | { |
36 | std::string buffer = matched.ToString(); |
37 | oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '('; |
38 | |
39 | if (buffer.find(',') == std::string::npos) |
40 | { |
41 | size_t range_begin, range_end; |
42 | char point; |
43 | std::istringstream iss_range(buffer); |
44 | iss_range >> range_begin >> point >> point >> range_end; |
45 | oss_for_replacing << range_begin; |
46 | for (size_t i = range_begin + 1; i <= range_end; ++i) |
47 | { |
48 | oss_for_replacing << '|' << i; |
49 | } |
50 | } |
51 | else |
52 | { |
53 | std::replace(buffer.begin(), buffer.end(), ',', '|'); |
54 | oss_for_replacing << buffer; |
55 | } |
56 | oss_for_replacing << ")" ; |
57 | current_index = input.data() - escaped_with_globs.data(); |
58 | } |
59 | oss_for_replacing << escaped_with_globs.substr(current_index); |
60 | std::string almost_res = oss_for_replacing.str(); |
61 | std::ostringstream oss_final_processing; |
62 | for (const auto & letter : almost_res) |
63 | { |
64 | if ((letter == '?') || (letter == '*')) |
65 | { |
66 | oss_final_processing << "[^/]" ; /// '?' is any symbol except '/' |
67 | if (letter == '?') |
68 | continue; |
69 | } |
70 | if ((letter == '.') || (letter == '{') || (letter == '}')) |
71 | oss_final_processing << '\\'; |
72 | oss_final_processing << letter; |
73 | } |
74 | return oss_final_processing.str(); |
75 | } |
76 | } |
77 | |