| 1 | #include <Common/parseGlobs.h> |
| 2 | #include <re2/re2.h> |
| 3 | #include <re2/stringpiece.h> |
| 4 | #include <algorithm> |
| 5 | #include <sstream> |
| 6 | |
| 7 | namespace DB |
| 8 | { |
| 9 | /* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library fo matching |
| 10 | * with such steps: |
| 11 | * 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3), |
| 12 | * 2) search and replace "*" and "?". |
| 13 | * Before each search need to escape symbols that we would not search. |
| 14 | * |
| 15 | * There are few examples in unit tests. |
| 16 | */ |
| 17 | std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs) |
| 18 | { |
| 19 | std::ostringstream oss_for_escaping; |
| 20 | /// Escaping only characters that not used in glob syntax |
| 21 | for (const auto & letter : initial_str_with_globs) |
| 22 | { |
| 23 | if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')')) |
| 24 | oss_for_escaping << '\\'; |
| 25 | oss_for_escaping << letter; |
| 26 | } |
| 27 | std::string escaped_with_globs = oss_for_escaping.str(); |
| 28 | |
| 29 | static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})" ); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*, |
| 30 | re2::StringPiece input(escaped_with_globs); |
| 31 | re2::StringPiece matched; |
| 32 | std::ostringstream oss_for_replacing; |
| 33 | size_t current_index = 0; |
| 34 | while (RE2::FindAndConsume(&input, enum_or_range, &matched)) |
| 35 | { |
| 36 | std::string buffer = matched.ToString(); |
| 37 | oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '('; |
| 38 | |
| 39 | if (buffer.find(',') == std::string::npos) |
| 40 | { |
| 41 | size_t range_begin, range_end; |
| 42 | char point; |
| 43 | std::istringstream iss_range(buffer); |
| 44 | iss_range >> range_begin >> point >> point >> range_end; |
| 45 | oss_for_replacing << range_begin; |
| 46 | for (size_t i = range_begin + 1; i <= range_end; ++i) |
| 47 | { |
| 48 | oss_for_replacing << '|' << i; |
| 49 | } |
| 50 | } |
| 51 | else |
| 52 | { |
| 53 | std::replace(buffer.begin(), buffer.end(), ',', '|'); |
| 54 | oss_for_replacing << buffer; |
| 55 | } |
| 56 | oss_for_replacing << ")" ; |
| 57 | current_index = input.data() - escaped_with_globs.data(); |
| 58 | } |
| 59 | oss_for_replacing << escaped_with_globs.substr(current_index); |
| 60 | std::string almost_res = oss_for_replacing.str(); |
| 61 | std::ostringstream oss_final_processing; |
| 62 | for (const auto & letter : almost_res) |
| 63 | { |
| 64 | if ((letter == '?') || (letter == '*')) |
| 65 | { |
| 66 | oss_final_processing << "[^/]" ; /// '?' is any symbol except '/' |
| 67 | if (letter == '?') |
| 68 | continue; |
| 69 | } |
| 70 | if ((letter == '.') || (letter == '{') || (letter == '}')) |
| 71 | oss_final_processing << '\\'; |
| 72 | oss_final_processing << letter; |
| 73 | } |
| 74 | return oss_final_processing.str(); |
| 75 | } |
| 76 | } |
| 77 | |