1#include <Common/parseGlobs.h>
2#include <re2/re2.h>
3#include <re2/stringpiece.h>
4#include <algorithm>
5#include <sstream>
6
7namespace DB
8{
9/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library fo matching
10 * with such steps:
11 * 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3),
12 * 2) search and replace "*" and "?".
13 * Before each search need to escape symbols that we would not search.
14 *
15 * There are few examples in unit tests.
16 */
17std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs)
18{
19 std::ostringstream oss_for_escaping;
20 /// Escaping only characters that not used in glob syntax
21 for (const auto & letter : initial_str_with_globs)
22 {
23 if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')'))
24 oss_for_escaping << '\\';
25 oss_for_escaping << letter;
26 }
27 std::string escaped_with_globs = oss_for_escaping.str();
28
29 static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
30 re2::StringPiece input(escaped_with_globs);
31 re2::StringPiece matched;
32 std::ostringstream oss_for_replacing;
33 size_t current_index = 0;
34 while (RE2::FindAndConsume(&input, enum_or_range, &matched))
35 {
36 std::string buffer = matched.ToString();
37 oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
38
39 if (buffer.find(',') == std::string::npos)
40 {
41 size_t range_begin, range_end;
42 char point;
43 std::istringstream iss_range(buffer);
44 iss_range >> range_begin >> point >> point >> range_end;
45 oss_for_replacing << range_begin;
46 for (size_t i = range_begin + 1; i <= range_end; ++i)
47 {
48 oss_for_replacing << '|' << i;
49 }
50 }
51 else
52 {
53 std::replace(buffer.begin(), buffer.end(), ',', '|');
54 oss_for_replacing << buffer;
55 }
56 oss_for_replacing << ")";
57 current_index = input.data() - escaped_with_globs.data();
58 }
59 oss_for_replacing << escaped_with_globs.substr(current_index);
60 std::string almost_res = oss_for_replacing.str();
61 std::ostringstream oss_final_processing;
62 for (const auto & letter : almost_res)
63 {
64 if ((letter == '?') || (letter == '*'))
65 {
66 oss_final_processing << "[^/]"; /// '?' is any symbol except '/'
67 if (letter == '?')
68 continue;
69 }
70 if ((letter == '.') || (letter == '{') || (letter == '}'))
71 oss_final_processing << '\\';
72 oss_final_processing << letter;
73 }
74 return oss_final_processing.str();
75}
76}
77