1 | #include <Parsers/parseQuery.h> |
2 | #include <Parsers/ParserQuery.h> |
3 | #include <Parsers/ASTInsertQuery.h> |
4 | #include <Parsers/Lexer.h> |
5 | #include <Parsers/TokenIterator.h> |
6 | #include <Common/StringUtils/StringUtils.h> |
7 | #include <Common/typeid_cast.h> |
8 | #include <Common/UTF8Helpers.h> |
9 | #include <common/find_symbols.h> |
10 | #include <IO/WriteHelpers.h> |
11 | #include <IO/WriteBufferFromString.h> |
12 | #include <IO/Operators.h> |
13 | |
14 | |
15 | namespace DB |
16 | { |
17 | |
18 | namespace ErrorCodes |
19 | { |
20 | extern const int SYNTAX_ERROR; |
21 | } |
22 | |
23 | namespace |
24 | { |
25 | |
26 | /** From position in (possible multiline) query, get line number and column number in line. |
27 | * Used in syntax error message. |
28 | */ |
29 | std::pair<size_t, size_t> getLineAndCol(const char * begin, const char * pos) |
30 | { |
31 | size_t line = 0; |
32 | |
33 | const char * nl; |
34 | while ((nl = find_first_symbols<'\n'>(begin, pos)) < pos) |
35 | { |
36 | ++line; |
37 | begin = nl + 1; |
38 | } |
39 | |
40 | /// Lines numbered from 1. |
41 | return { line + 1, pos - begin + 1 }; |
42 | } |
43 | |
44 | |
45 | WriteBuffer & operator<< (WriteBuffer & out, const Expected & expected) |
46 | { |
47 | if (expected.variants.empty()) |
48 | return out; |
49 | |
50 | if (expected.variants.size() == 1) |
51 | return out << *expected.variants.begin(); |
52 | |
53 | out << "one of: " ; |
54 | bool first = true; |
55 | for (const auto & variant : expected.variants) |
56 | { |
57 | if (!first) |
58 | out << ", " ; |
59 | first = false; |
60 | |
61 | out << variant; |
62 | } |
63 | return out; |
64 | } |
65 | |
66 | |
67 | /// Hilite place of syntax error. |
68 | void writeQueryWithHighlightedErrorPositions( |
69 | WriteBuffer & out, |
70 | const char * begin, |
71 | const char * end, |
72 | const Token * positions_to_hilite, /// must go in ascending order |
73 | size_t num_positions_to_hilite) |
74 | { |
75 | const char * pos = begin; |
76 | for (size_t position_to_hilite_idx = 0; position_to_hilite_idx < num_positions_to_hilite; ++position_to_hilite_idx) |
77 | { |
78 | const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin; |
79 | out.write(pos, current_position_to_hilite - pos); |
80 | |
81 | if (current_position_to_hilite == end) |
82 | { |
83 | out << "\033[41;1m \033[0m" ; |
84 | return; |
85 | } |
86 | else |
87 | { |
88 | size_t bytes_to_hilite = UTF8::seqLength(*current_position_to_hilite); |
89 | |
90 | /// Bright on red background. |
91 | out << "\033[41;1m" ; |
92 | out.write(current_position_to_hilite, bytes_to_hilite); |
93 | out << "\033[0m" ; |
94 | pos = current_position_to_hilite + bytes_to_hilite; |
95 | } |
96 | } |
97 | out.write(pos, end - pos); |
98 | } |
99 | |
100 | |
101 | void writeQueryAroundTheError( |
102 | WriteBuffer & out, |
103 | const char * begin, |
104 | const char * end, |
105 | bool hilite, |
106 | const Token * positions_to_hilite, |
107 | size_t num_positions_to_hilite) |
108 | { |
109 | if (hilite) |
110 | { |
111 | out << ":\n\n" ; |
112 | writeQueryWithHighlightedErrorPositions(out, begin, end, positions_to_hilite, num_positions_to_hilite); |
113 | out << "\n\n" ; |
114 | } |
115 | else |
116 | { |
117 | if (num_positions_to_hilite) |
118 | out << ": " << std::string(positions_to_hilite[0].begin, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, end - positions_to_hilite[0].begin)) << ". " ; |
119 | } |
120 | } |
121 | |
122 | |
123 | void writeCommonErrorMessage( |
124 | WriteBuffer & out, |
125 | const char * begin, |
126 | const char * end, |
127 | Token last_token, |
128 | const std::string & query_description) |
129 | { |
130 | out << "Syntax error" ; |
131 | |
132 | if (!query_description.empty()) |
133 | out << " (" << query_description << ")" ; |
134 | |
135 | out << ": failed at position " << (last_token.begin - begin + 1); |
136 | |
137 | if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon) |
138 | out << " (end of query)" ; |
139 | |
140 | /// If query is multiline. |
141 | const char * nl = find_first_symbols<'\n'>(begin, end); |
142 | if (nl + 1 < end) |
143 | { |
144 | size_t line = 0; |
145 | size_t col = 0; |
146 | std::tie(line, col) = getLineAndCol(begin, last_token.begin); |
147 | |
148 | out << " (line " << line << ", col " << col << ")" ; |
149 | } |
150 | } |
151 | |
152 | |
153 | std::string getSyntaxErrorMessage( |
154 | const char * begin, |
155 | const char * end, |
156 | Token last_token, |
157 | const Expected & expected, |
158 | bool hilite, |
159 | const std::string & query_description) |
160 | { |
161 | WriteBufferFromOwnString out; |
162 | writeCommonErrorMessage(out, begin, end, last_token, query_description); |
163 | writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1); |
164 | |
165 | if (!expected.variants.empty()) |
166 | out << "Expected " << expected; |
167 | |
168 | return out.str(); |
169 | } |
170 | |
171 | |
172 | std::string getLexicalErrorMessage( |
173 | const char * begin, |
174 | const char * end, |
175 | Token last_token, |
176 | bool hilite, |
177 | const std::string & query_description) |
178 | { |
179 | WriteBufferFromOwnString out; |
180 | writeCommonErrorMessage(out, begin, end, last_token, query_description); |
181 | writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1); |
182 | |
183 | out << getErrorTokenDescription(last_token.type); |
184 | |
185 | return out.str(); |
186 | } |
187 | |
188 | |
189 | std::string getUnmatchedParenthesesErrorMessage( |
190 | const char * begin, |
191 | const char * end, |
192 | const UnmatchedParentheses & unmatched_parens, |
193 | bool hilite, |
194 | const std::string & query_description) |
195 | { |
196 | WriteBufferFromOwnString out; |
197 | writeCommonErrorMessage(out, begin, end, unmatched_parens[0], query_description); |
198 | writeQueryAroundTheError(out, begin, end, hilite, unmatched_parens.data(), unmatched_parens.size()); |
199 | |
200 | out << "Unmatched parentheses: " ; |
201 | for (const Token & paren : unmatched_parens) |
202 | out << *paren.begin; |
203 | |
204 | return out.str(); |
205 | } |
206 | |
207 | } |
208 | |
209 | |
210 | ASTPtr tryParseQuery( |
211 | IParser & parser, |
212 | const char * & pos, |
213 | const char * end, |
214 | std::string & out_error_message, |
215 | bool hilite, |
216 | const std::string & query_description, |
217 | bool allow_multi_statements, |
218 | size_t max_query_size) |
219 | { |
220 | Tokens tokens(pos, end, max_query_size); |
221 | IParser::Pos token_iterator(tokens); |
222 | |
223 | if (token_iterator->isEnd() |
224 | || token_iterator->type == TokenType::Semicolon) |
225 | { |
226 | out_error_message = "Empty query" ; |
227 | return nullptr; |
228 | } |
229 | |
230 | Expected expected; |
231 | |
232 | ASTPtr res; |
233 | bool parse_res = parser.parse(token_iterator, res, expected); |
234 | Token last_token = token_iterator.max(); |
235 | |
236 | /// If parsed query ends at data for insertion. Data for insertion could be in any format and not necessary be lexical correct. |
237 | ASTInsertQuery * insert = nullptr; |
238 | if (parse_res) |
239 | insert = res->as<ASTInsertQuery>(); |
240 | |
241 | if (!(insert && insert->data)) |
242 | { |
243 | /// Lexical error |
244 | if (last_token.isError()) |
245 | { |
246 | out_error_message = getLexicalErrorMessage(pos, end, last_token, hilite, query_description); |
247 | return nullptr; |
248 | } |
249 | |
250 | /// Unmatched parentheses |
251 | UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), &last_token); |
252 | if (!unmatched_parens.empty()) |
253 | { |
254 | out_error_message = getUnmatchedParenthesesErrorMessage(pos, end, unmatched_parens, hilite, query_description); |
255 | return nullptr; |
256 | } |
257 | } |
258 | |
259 | if (!parse_res) |
260 | { |
261 | /// Parse error. |
262 | out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); |
263 | return nullptr; |
264 | } |
265 | |
266 | /// Excessive input after query. Parsed query must end with end of data or semicolon or data for INSERT. |
267 | if (!token_iterator->isEnd() |
268 | && token_iterator->type != TokenType::Semicolon |
269 | && !(insert && insert->data)) |
270 | { |
271 | expected.add(pos, "end of query" ); |
272 | out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description); |
273 | return nullptr; |
274 | } |
275 | |
276 | while (token_iterator->type == TokenType::Semicolon) |
277 | ++token_iterator; |
278 | |
279 | /// If multi-statements are not allowed, then after semicolon, there must be no non-space characters. |
280 | if (!allow_multi_statements |
281 | && !token_iterator->isEnd() |
282 | && !(insert && insert->data)) |
283 | { |
284 | out_error_message = getSyntaxErrorMessage(pos, end, last_token, {}, hilite, |
285 | (query_description.empty() ? std::string() : std::string(". " )) + "Multi-statements are not allowed" ); |
286 | return nullptr; |
287 | } |
288 | |
289 | pos = token_iterator->begin; |
290 | return res; |
291 | } |
292 | |
293 | |
294 | ASTPtr parseQueryAndMovePosition( |
295 | IParser & parser, |
296 | const char * & pos, |
297 | const char * end, |
298 | const std::string & query_description, |
299 | bool allow_multi_statements, |
300 | size_t max_query_size) |
301 | { |
302 | std::string error_message; |
303 | ASTPtr res = tryParseQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size); |
304 | |
305 | if (res) |
306 | return res; |
307 | |
308 | throw Exception(error_message, ErrorCodes::SYNTAX_ERROR); |
309 | } |
310 | |
311 | |
312 | ASTPtr parseQuery( |
313 | IParser & parser, |
314 | const char * begin, |
315 | const char * end, |
316 | const std::string & query_description, |
317 | size_t max_query_size) |
318 | { |
319 | auto pos = begin; |
320 | return parseQueryAndMovePosition(parser, pos, end, query_description, false, max_query_size); |
321 | } |
322 | |
323 | |
324 | ASTPtr parseQuery( |
325 | IParser & parser, |
326 | const std::string & query, |
327 | const std::string & query_description, |
328 | size_t max_query_size) |
329 | { |
330 | return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size); |
331 | } |
332 | |
333 | |
334 | ASTPtr parseQuery(IParser & parser, const std::string & query, size_t max_query_size) |
335 | { |
336 | return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size); |
337 | } |
338 | |
339 | |
340 | std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list) |
341 | { |
342 | ASTPtr ast; |
343 | |
344 | const char * begin = queries.data(); /// begin of current query |
345 | const char * pos = begin; /// parser moves pos from begin to the end of current query |
346 | const char * end = begin + queries.size(); |
347 | |
348 | ParserQuery parser(end); |
349 | |
350 | queries_list.clear(); |
351 | |
352 | while (pos < end) |
353 | { |
354 | begin = pos; |
355 | |
356 | ast = parseQueryAndMovePosition(parser, pos, end, "" , true, 0); |
357 | |
358 | auto * insert = ast->as<ASTInsertQuery>(); |
359 | |
360 | if (insert && insert->data) |
361 | { |
362 | /// Data for INSERT is broken on new line |
363 | pos = insert->data; |
364 | while (*pos && *pos != '\n') |
365 | ++pos; |
366 | insert->end = pos; |
367 | } |
368 | |
369 | queries_list.emplace_back(queries.substr(begin - queries.data(), pos - begin)); |
370 | |
371 | while (isWhitespaceASCII(*pos) || *pos == ';') |
372 | ++pos; |
373 | } |
374 | |
375 | return std::make_pair(begin, pos == end); |
376 | } |
377 | |
378 | |
379 | } |
380 | |