1#include <Parsers/parseQuery.h>
2#include <Parsers/ParserQuery.h>
3#include <Parsers/ASTInsertQuery.h>
4#include <Parsers/Lexer.h>
5#include <Parsers/TokenIterator.h>
6#include <Common/StringUtils/StringUtils.h>
7#include <Common/typeid_cast.h>
8#include <Common/UTF8Helpers.h>
9#include <common/find_symbols.h>
10#include <IO/WriteHelpers.h>
11#include <IO/WriteBufferFromString.h>
12#include <IO/Operators.h>
13
14
15namespace DB
16{
17
18namespace ErrorCodes
19{
20 extern const int SYNTAX_ERROR;
21}
22
23namespace
24{
25
26/** From position in (possible multiline) query, get line number and column number in line.
27 * Used in syntax error message.
28 */
29std::pair<size_t, size_t> getLineAndCol(const char * begin, const char * pos)
30{
31 size_t line = 0;
32
33 const char * nl;
34 while ((nl = find_first_symbols<'\n'>(begin, pos)) < pos)
35 {
36 ++line;
37 begin = nl + 1;
38 }
39
40 /// Lines numbered from 1.
41 return { line + 1, pos - begin + 1 };
42}
43
44
45WriteBuffer & operator<< (WriteBuffer & out, const Expected & expected)
46{
47 if (expected.variants.empty())
48 return out;
49
50 if (expected.variants.size() == 1)
51 return out << *expected.variants.begin();
52
53 out << "one of: ";
54 bool first = true;
55 for (const auto & variant : expected.variants)
56 {
57 if (!first)
58 out << ", ";
59 first = false;
60
61 out << variant;
62 }
63 return out;
64}
65
66
67/// Hilite place of syntax error.
68void writeQueryWithHighlightedErrorPositions(
69 WriteBuffer & out,
70 const char * begin,
71 const char * end,
72 const Token * positions_to_hilite, /// must go in ascending order
73 size_t num_positions_to_hilite)
74{
75 const char * pos = begin;
76 for (size_t position_to_hilite_idx = 0; position_to_hilite_idx < num_positions_to_hilite; ++position_to_hilite_idx)
77 {
78 const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
79 out.write(pos, current_position_to_hilite - pos);
80
81 if (current_position_to_hilite == end)
82 {
83 out << "\033[41;1m \033[0m";
84 return;
85 }
86 else
87 {
88 size_t bytes_to_hilite = UTF8::seqLength(*current_position_to_hilite);
89
90 /// Bright on red background.
91 out << "\033[41;1m";
92 out.write(current_position_to_hilite, bytes_to_hilite);
93 out << "\033[0m";
94 pos = current_position_to_hilite + bytes_to_hilite;
95 }
96 }
97 out.write(pos, end - pos);
98}
99
100
101void writeQueryAroundTheError(
102 WriteBuffer & out,
103 const char * begin,
104 const char * end,
105 bool hilite,
106 const Token * positions_to_hilite,
107 size_t num_positions_to_hilite)
108{
109 if (hilite)
110 {
111 out << ":\n\n";
112 writeQueryWithHighlightedErrorPositions(out, begin, end, positions_to_hilite, num_positions_to_hilite);
113 out << "\n\n";
114 }
115 else
116 {
117 if (num_positions_to_hilite)
118 out << ": " << std::string(positions_to_hilite[0].begin, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, end - positions_to_hilite[0].begin)) << ". ";
119 }
120}
121
122
123void writeCommonErrorMessage(
124 WriteBuffer & out,
125 const char * begin,
126 const char * end,
127 Token last_token,
128 const std::string & query_description)
129{
130 out << "Syntax error";
131
132 if (!query_description.empty())
133 out << " (" << query_description << ")";
134
135 out << ": failed at position " << (last_token.begin - begin + 1);
136
137 if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon)
138 out << " (end of query)";
139
140 /// If query is multiline.
141 const char * nl = find_first_symbols<'\n'>(begin, end);
142 if (nl + 1 < end)
143 {
144 size_t line = 0;
145 size_t col = 0;
146 std::tie(line, col) = getLineAndCol(begin, last_token.begin);
147
148 out << " (line " << line << ", col " << col << ")";
149 }
150}
151
152
153std::string getSyntaxErrorMessage(
154 const char * begin,
155 const char * end,
156 Token last_token,
157 const Expected & expected,
158 bool hilite,
159 const std::string & query_description)
160{
161 WriteBufferFromOwnString out;
162 writeCommonErrorMessage(out, begin, end, last_token, query_description);
163 writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
164
165 if (!expected.variants.empty())
166 out << "Expected " << expected;
167
168 return out.str();
169}
170
171
172std::string getLexicalErrorMessage(
173 const char * begin,
174 const char * end,
175 Token last_token,
176 bool hilite,
177 const std::string & query_description)
178{
179 WriteBufferFromOwnString out;
180 writeCommonErrorMessage(out, begin, end, last_token, query_description);
181 writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);
182
183 out << getErrorTokenDescription(last_token.type);
184
185 return out.str();
186}
187
188
189std::string getUnmatchedParenthesesErrorMessage(
190 const char * begin,
191 const char * end,
192 const UnmatchedParentheses & unmatched_parens,
193 bool hilite,
194 const std::string & query_description)
195{
196 WriteBufferFromOwnString out;
197 writeCommonErrorMessage(out, begin, end, unmatched_parens[0], query_description);
198 writeQueryAroundTheError(out, begin, end, hilite, unmatched_parens.data(), unmatched_parens.size());
199
200 out << "Unmatched parentheses: ";
201 for (const Token & paren : unmatched_parens)
202 out << *paren.begin;
203
204 return out.str();
205}
206
207}
208
209
210ASTPtr tryParseQuery(
211 IParser & parser,
212 const char * & pos,
213 const char * end,
214 std::string & out_error_message,
215 bool hilite,
216 const std::string & query_description,
217 bool allow_multi_statements,
218 size_t max_query_size)
219{
220 Tokens tokens(pos, end, max_query_size);
221 IParser::Pos token_iterator(tokens);
222
223 if (token_iterator->isEnd()
224 || token_iterator->type == TokenType::Semicolon)
225 {
226 out_error_message = "Empty query";
227 return nullptr;
228 }
229
230 Expected expected;
231
232 ASTPtr res;
233 bool parse_res = parser.parse(token_iterator, res, expected);
234 Token last_token = token_iterator.max();
235
236 /// If parsed query ends at data for insertion. Data for insertion could be in any format and not necessary be lexical correct.
237 ASTInsertQuery * insert = nullptr;
238 if (parse_res)
239 insert = res->as<ASTInsertQuery>();
240
241 if (!(insert && insert->data))
242 {
243 /// Lexical error
244 if (last_token.isError())
245 {
246 out_error_message = getLexicalErrorMessage(pos, end, last_token, hilite, query_description);
247 return nullptr;
248 }
249
250 /// Unmatched parentheses
251 UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), &last_token);
252 if (!unmatched_parens.empty())
253 {
254 out_error_message = getUnmatchedParenthesesErrorMessage(pos, end, unmatched_parens, hilite, query_description);
255 return nullptr;
256 }
257 }
258
259 if (!parse_res)
260 {
261 /// Parse error.
262 out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description);
263 return nullptr;
264 }
265
266 /// Excessive input after query. Parsed query must end with end of data or semicolon or data for INSERT.
267 if (!token_iterator->isEnd()
268 && token_iterator->type != TokenType::Semicolon
269 && !(insert && insert->data))
270 {
271 expected.add(pos, "end of query");
272 out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description);
273 return nullptr;
274 }
275
276 while (token_iterator->type == TokenType::Semicolon)
277 ++token_iterator;
278
279 /// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
280 if (!allow_multi_statements
281 && !token_iterator->isEnd()
282 && !(insert && insert->data))
283 {
284 out_error_message = getSyntaxErrorMessage(pos, end, last_token, {}, hilite,
285 (query_description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
286 return nullptr;
287 }
288
289 pos = token_iterator->begin;
290 return res;
291}
292
293
294ASTPtr parseQueryAndMovePosition(
295 IParser & parser,
296 const char * & pos,
297 const char * end,
298 const std::string & query_description,
299 bool allow_multi_statements,
300 size_t max_query_size)
301{
302 std::string error_message;
303 ASTPtr res = tryParseQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size);
304
305 if (res)
306 return res;
307
308 throw Exception(error_message, ErrorCodes::SYNTAX_ERROR);
309}
310
311
312ASTPtr parseQuery(
313 IParser & parser,
314 const char * begin,
315 const char * end,
316 const std::string & query_description,
317 size_t max_query_size)
318{
319 auto pos = begin;
320 return parseQueryAndMovePosition(parser, pos, end, query_description, false, max_query_size);
321}
322
323
324ASTPtr parseQuery(
325 IParser & parser,
326 const std::string & query,
327 const std::string & query_description,
328 size_t max_query_size)
329{
330 return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size);
331}
332
333
334ASTPtr parseQuery(IParser & parser, const std::string & query, size_t max_query_size)
335{
336 return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size);
337}
338
339
340std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
341{
342 ASTPtr ast;
343
344 const char * begin = queries.data(); /// begin of current query
345 const char * pos = begin; /// parser moves pos from begin to the end of current query
346 const char * end = begin + queries.size();
347
348 ParserQuery parser(end);
349
350 queries_list.clear();
351
352 while (pos < end)
353 {
354 begin = pos;
355
356 ast = parseQueryAndMovePosition(parser, pos, end, "", true, 0);
357
358 auto * insert = ast->as<ASTInsertQuery>();
359
360 if (insert && insert->data)
361 {
362 /// Data for INSERT is broken on new line
363 pos = insert->data;
364 while (*pos && *pos != '\n')
365 ++pos;
366 insert->end = pos;
367 }
368
369 queries_list.emplace_back(queries.substr(begin - queries.data(), pos - begin));
370
371 while (isWhitespaceASCII(*pos) || *pos == ';')
372 ++pos;
373 }
374
375 return std::make_pair(begin, pos == end);
376}
377
378
379}
380