| 1 | #include "parseRemoteDescription.h" |
| 2 | #include <Common/Exception.h> |
| 3 | #include <IO/WriteHelpers.h> |
| 4 | |
| 5 | namespace DB |
| 6 | { |
| 7 | |
| 8 | namespace ErrorCodes |
| 9 | { |
| 10 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
| 11 | extern const int BAD_ARGUMENTS; |
| 12 | } |
| 13 | |
| 14 | /// The Cartesian product of two sets of rows, the result is written in place of the first argument |
| 15 | static void append(std::vector<String> & to, const std::vector<String> & what, size_t max_addresses) |
| 16 | { |
| 17 | if (what.empty()) |
| 18 | return; |
| 19 | |
| 20 | if (to.empty()) |
| 21 | { |
| 22 | to = what; |
| 23 | return; |
| 24 | } |
| 25 | |
| 26 | if (what.size() * to.size() > max_addresses) |
| 27 | throw Exception("Table function 'remote': first argument generates too many result addresses" , |
| 28 | ErrorCodes::BAD_ARGUMENTS); |
| 29 | std::vector<String> res; |
| 30 | for (size_t i = 0; i < to.size(); ++i) |
| 31 | for (size_t j = 0; j < what.size(); ++j) |
| 32 | res.push_back(to[i] + what[j]); |
| 33 | |
| 34 | to.swap(res); |
| 35 | } |
| 36 | |
| 37 | |
| 38 | /// Parse number from substring |
| 39 | static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) |
| 40 | { |
| 41 | res = 0; |
| 42 | for (size_t pos = l; pos < r; pos ++) |
| 43 | { |
| 44 | if (!isNumericASCII(description[pos])) |
| 45 | return false; |
| 46 | res = res * 10 + description[pos] - '0'; |
| 47 | if (res > 1e15) |
| 48 | return false; |
| 49 | } |
| 50 | return true; |
| 51 | } |
| 52 | |
| 53 | |
| 54 | |
| 55 | /* Parse a string that generates shards and replicas. Separator - one of two characters | or , |
| 56 | * depending on whether shards or replicas are generated. |
| 57 | * For example: |
| 58 | * host1,host2,... - generates set of shards from host1, host2, ... |
| 59 | * host1|host2|... - generates set of replicas from host1, host2, ... |
| 60 | * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. |
| 61 | * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. |
| 62 | * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. |
| 63 | * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. |
| 64 | * abc{1..9}de{f,g,h} - is a direct product, 27 shards. |
| 65 | * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. |
| 66 | */ |
| 67 | std::vector<String> parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses) |
| 68 | { |
| 69 | std::vector<String> res; |
| 70 | std::vector<String> cur; |
| 71 | |
| 72 | /// An empty substring means a set of an empty string |
| 73 | if (l >= r) |
| 74 | { |
| 75 | res.push_back("" ); |
| 76 | return res; |
| 77 | } |
| 78 | |
| 79 | for (size_t i = l; i < r; ++i) |
| 80 | { |
| 81 | /// Either the numeric interval (8..10) or equivalent expression in brackets |
| 82 | if (description[i] == '{') |
| 83 | { |
| 84 | int cnt = 1; |
| 85 | int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two |
| 86 | size_t m; |
| 87 | std::vector<String> buffer; |
| 88 | bool have_splitter = false; |
| 89 | |
| 90 | /// Look for the corresponding closing bracket |
| 91 | for (m = i + 1; m < r; ++m) |
| 92 | { |
| 93 | if (description[m] == '{') ++cnt; |
| 94 | if (description[m] == '}') --cnt; |
| 95 | if (description[m] == '.' && description[m-1] == '.') last_dot = m; |
| 96 | if (description[m] == separator) have_splitter = true; |
| 97 | if (cnt == 0) break; |
| 98 | } |
| 99 | if (cnt != 0) |
| 100 | throw Exception("Table function 'remote': incorrect brace sequence in first argument" , |
| 101 | ErrorCodes::BAD_ARGUMENTS); |
| 102 | /// The presence of a dot - numeric interval |
| 103 | if (last_dot != -1) |
| 104 | { |
| 105 | size_t left, right; |
| 106 | if (description[last_dot - 1] != '.') |
| 107 | throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1), |
| 108 | ErrorCodes::BAD_ARGUMENTS); |
| 109 | if (!parseNumber(description, i + 1, last_dot - 1, left)) |
| 110 | throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): " |
| 111 | + description.substr(i, m - i + 1), |
| 112 | ErrorCodes::BAD_ARGUMENTS); |
| 113 | if (!parseNumber(description, last_dot + 1, m, right)) |
| 114 | throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): " |
| 115 | + description.substr(i, m - i + 1), |
| 116 | ErrorCodes::BAD_ARGUMENTS); |
| 117 | if (left > right) |
| 118 | throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): " |
| 119 | + description.substr(i, m - i + 1), |
| 120 | ErrorCodes::BAD_ARGUMENTS); |
| 121 | if (right - left + 1 > max_addresses) |
| 122 | throw Exception("Table function 'remote': first argument generates too many result addresses" , |
| 123 | ErrorCodes::BAD_ARGUMENTS); |
| 124 | bool add_leading_zeroes = false; |
| 125 | size_t len = last_dot - 1 - (i + 1); |
| 126 | /// If the left and right borders have equal numbers, then you must add leading zeros. |
| 127 | /// TODO The code is somewhat awful. |
| 128 | if (last_dot - 1 - (i + 1) == m - (last_dot + 1)) |
| 129 | add_leading_zeroes = true; |
| 130 | for (size_t id = left; id <= right; ++id) |
| 131 | { |
| 132 | String id_str = toString<UInt64>(id); |
| 133 | if (add_leading_zeroes) |
| 134 | { |
| 135 | while (id_str.size() < len) |
| 136 | id_str = "0" + id_str; |
| 137 | } |
| 138 | buffer.push_back(id_str); |
| 139 | } |
| 140 | } |
| 141 | else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows |
| 142 | buffer = parseRemoteDescription(description, i + 1, m, separator, max_addresses); |
| 143 | else /// Otherwise just copy, spawn will occur when you call with the correct delimiter |
| 144 | buffer.push_back(description.substr(i, m - i + 1)); |
| 145 | /// Add all possible received extensions to the current set of lines |
| 146 | append(cur, buffer, max_addresses); |
| 147 | i = m; |
| 148 | } |
| 149 | else if (description[i] == separator) |
| 150 | { |
| 151 | /// If the delimiter, then add found rows |
| 152 | res.insert(res.end(), cur.begin(), cur.end()); |
| 153 | cur.clear(); |
| 154 | } |
| 155 | else |
| 156 | { |
| 157 | /// Otherwise, simply append the character to current lines |
| 158 | std::vector<String> buffer; |
| 159 | buffer.push_back(description.substr(i, 1)); |
| 160 | append(cur, buffer, max_addresses); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | res.insert(res.end(), cur.begin(), cur.end()); |
| 165 | if (res.size() > max_addresses) |
| 166 | throw Exception("Table function 'remote': first argument generates too many result addresses" , |
| 167 | ErrorCodes::BAD_ARGUMENTS); |
| 168 | |
| 169 | return res; |
| 170 | } |
| 171 | |
| 172 | } |
| 173 | |