1#include <errno.h>
2#include <cstdlib>
3
4#include <Poco/String.h>
5
6#include <IO/ReadHelpers.h>
7#include <IO/ReadBufferFromMemory.h>
8#include <Common/typeid_cast.h>
9#include <Parsers/DumpASTNode.h>
10
11#include <Parsers/IAST.h>
12#include <Parsers/ASTExpressionList.h>
13#include <Parsers/ASTFunction.h>
14#include <Parsers/ASTIdentifier.h>
15#include <Parsers/ASTLiteral.h>
16#include <Parsers/ASTAsterisk.h>
17#include <Parsers/ASTQualifiedAsterisk.h>
18#include <Parsers/ASTQueryParameter.h>
19#include <Parsers/ASTTTLElement.h>
20#include <Parsers/ASTOrderByElement.h>
21#include <Parsers/ASTSubquery.h>
22#include <Parsers/ASTFunctionWithKeyValueArguments.h>
23
24#include <Parsers/parseIntervalKind.h>
25#include <Parsers/ExpressionListParsers.h>
26#include <Parsers/ParserSelectWithUnionQuery.h>
27#include <Parsers/ParserCase.h>
28
29#include <Parsers/ExpressionElementParsers.h>
30#include <Parsers/ParserCreateQuery.h>
31
32#include <Parsers/queryToString.h>
33#include <boost/algorithm/string.hpp>
34#include "ASTColumnsMatcher.h"
35
36
37namespace DB
38{
39
40namespace ErrorCodes
41{
42 extern const int SYNTAX_ERROR;
43 extern const int LOGICAL_ERROR;
44}
45
46
47bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
48{
49 ASTPtr contents_node;
50 ParserExpressionList contents(false);
51
52 if (pos->type != TokenType::OpeningSquareBracket)
53 return false;
54 ++pos;
55
56 if (!contents.parse(pos, contents_node, expected))
57 return false;
58
59 if (pos->type != TokenType::ClosingSquareBracket)
60 return false;
61 ++pos;
62
63 auto function_node = std::make_shared<ASTFunction>();
64 function_node->name = "array";
65 function_node->arguments = contents_node;
66 function_node->children.push_back(contents_node);
67 node = function_node;
68
69 return true;
70}
71
72
73bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
74{
75 ASTPtr contents_node;
76 ParserExpressionList contents(false);
77
78 if (pos->type != TokenType::OpeningRoundBracket)
79 return false;
80 ++pos;
81
82 if (!contents.parse(pos, contents_node, expected))
83 return false;
84
85 bool is_elem = true;
86 if (pos->type == TokenType::Comma)
87 {
88 is_elem = false;
89 ++pos;
90 }
91
92 if (pos->type != TokenType::ClosingRoundBracket)
93 return false;
94 ++pos;
95
96 const auto & expr_list = contents_node->as<ASTExpressionList &>();
97
98 /// empty expression in parentheses is not allowed
99 if (expr_list.children.empty())
100 {
101 expected.add(pos, "non-empty parenthesized list of expressions");
102 return false;
103 }
104
105 if (expr_list.children.size() == 1 && is_elem)
106 {
107 node = expr_list.children.front();
108 }
109 else
110 {
111 auto function_node = std::make_shared<ASTFunction>();
112 function_node->name = "tuple";
113 function_node->arguments = contents_node;
114 function_node->children.push_back(contents_node);
115 node = function_node;
116 }
117
118 return true;
119}
120
121
122bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
123{
124 ASTPtr select_node;
125 ParserSelectWithUnionQuery select;
126
127 if (pos->type != TokenType::OpeningRoundBracket)
128 return false;
129 ++pos;
130
131 if (!select.parse(pos, select_node, expected))
132 return false;
133
134 if (pos->type != TokenType::ClosingRoundBracket)
135 return false;
136 ++pos;
137
138 node = std::make_shared<ASTSubquery>();
139 node->children.push_back(select_node);
140 return true;
141}
142
143
144bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &)
145{
146 /// Identifier in backquotes or in double quotes
147 if (pos->type == TokenType::QuotedIdentifier)
148 {
149 ReadBufferFromMemory buf(pos->begin, pos->size());
150 String s;
151
152 if (*pos->begin == '`')
153 readBackQuotedStringWithSQLStyle(s, buf);
154 else
155 readDoubleQuotedStringWithSQLStyle(s, buf);
156
157 if (s.empty()) /// Identifiers "empty string" are not allowed.
158 return false;
159
160 node = std::make_shared<ASTIdentifier>(s);
161 ++pos;
162 return true;
163 }
164 else if (pos->type == TokenType::BareWord)
165 {
166 node = std::make_shared<ASTIdentifier>(String(pos->begin, pos->end));
167 ++pos;
168 return true;
169 }
170
171 return false;
172}
173
174
175bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
176{
177 ASTPtr id_list;
178 if (!ParserList(std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Dot), false)
179 .parse(pos, id_list, expected))
180 return false;
181
182 String name;
183 std::vector<String> parts;
184 const auto & list = id_list->as<ASTExpressionList &>();
185 for (const auto & child : list.children)
186 {
187 if (!name.empty())
188 name += '.';
189 parts.emplace_back(getIdentifierName(child));
190 name += parts.back();
191 }
192
193 if (parts.size() == 1)
194 parts.clear();
195 node = std::make_shared<ASTIdentifier>(name, std::move(parts));
196
197 return true;
198}
199
200
201bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
202{
203 ParserIdentifier id_parser;
204 ParserKeyword distinct("DISTINCT");
205 ParserExpressionList contents(false);
206
207 bool has_distinct_modifier = false;
208
209 ASTPtr identifier;
210 ASTPtr expr_list_args;
211 ASTPtr expr_list_params;
212
213 if (!id_parser.parse(pos, identifier, expected))
214 return false;
215
216 if (pos->type != TokenType::OpeningRoundBracket)
217 return false;
218 ++pos;
219
220 if (distinct.ignore(pos, expected))
221 has_distinct_modifier = true;
222
223 const char * contents_begin = pos->begin;
224 if (!contents.parse(pos, expr_list_args, expected))
225 return false;
226 const char * contents_end = pos->begin;
227
228 if (pos->type != TokenType::ClosingRoundBracket)
229 return false;
230 ++pos;
231
232 /** Check for a common error case - often due to the complexity of quoting command-line arguments,
233 * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01').
234 * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number,
235 * and the query silently returns an unexpected result.
236 */
237 if (getIdentifierName(identifier) == "toDate"
238 && contents_end - contents_begin == strlen("2014-01-01")
239 && contents_begin[0] >= '2' && contents_begin[0] <= '3'
240 && contents_begin[1] >= '0' && contents_begin[1] <= '9'
241 && contents_begin[2] >= '0' && contents_begin[2] <= '9'
242 && contents_begin[3] >= '0' && contents_begin[3] <= '9'
243 && contents_begin[4] == '-'
244 && contents_begin[5] >= '0' && contents_begin[5] <= '9'
245 && contents_begin[6] >= '0' && contents_begin[6] <= '9'
246 && contents_begin[7] == '-'
247 && contents_begin[8] >= '0' && contents_begin[8] <= '9'
248 && contents_begin[9] >= '0' && contents_begin[9] <= '9')
249 {
250 std::string contents_str(contents_begin, contents_end - contents_begin);
251 throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')"
252 , ErrorCodes::SYNTAX_ERROR);
253 }
254
255 /// The parametric aggregate function has two lists (parameters and arguments) in parentheses. Example: quantile(0.9)(x).
256 if (allow_function_parameters && pos->type == TokenType::OpeningRoundBracket)
257 {
258 ++pos;
259
260 /// Parametric aggregate functions cannot have DISTINCT in parameters list.
261 if (has_distinct_modifier)
262 return false;
263
264 expr_list_params = expr_list_args;
265 expr_list_args = nullptr;
266
267 if (distinct.ignore(pos, expected))
268 has_distinct_modifier = true;
269
270 if (!contents.parse(pos, expr_list_args, expected))
271 return false;
272
273 if (pos->type != TokenType::ClosingRoundBracket)
274 return false;
275 ++pos;
276 }
277
278 auto function_node = std::make_shared<ASTFunction>();
279 tryGetIdentifierNameInto(identifier, function_node->name);
280
281 /// func(DISTINCT ...) is equivalent to funcDistinct(...)
282 if (has_distinct_modifier)
283 function_node->name += "Distinct";
284
285 function_node->arguments = expr_list_args;
286 function_node->children.push_back(function_node->arguments);
287
288 if (expr_list_params)
289 {
290 function_node->parameters = expr_list_params;
291 function_node->children.push_back(function_node->parameters);
292 }
293
294 node = function_node;
295 return true;
296}
297
298bool ParserCodecDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
299{
300 return ParserList(std::make_unique<ParserIdentifierWithOptionalParameters>(),
301 std::make_unique<ParserToken>(TokenType::Comma), false).parse(pos, node, expected);
302}
303
304bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
305{
306 ParserCodecDeclarationList codecs;
307 ASTPtr expr_list_args;
308
309 if (pos->type != TokenType::OpeningRoundBracket)
310 return false;
311
312 ++pos;
313 if (!codecs.parse(pos, expr_list_args, expected))
314 return false;
315
316 if (pos->type != TokenType::ClosingRoundBracket)
317 return false;
318 ++pos;
319
320 auto function_node = std::make_shared<ASTFunction>();
321 function_node->name = "CODEC";
322 function_node->arguments = expr_list_args;
323 function_node->children.push_back(function_node->arguments);
324
325 node = function_node;
326 return true;
327}
328
329bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
330{
331 /// Either CAST(expr AS type) or CAST(expr, 'type')
332 /// The latter will be parsed normally as a function later.
333
334 ASTPtr expr_node;
335 ASTPtr type_node;
336
337 if (ParserKeyword("CAST").ignore(pos, expected)
338 && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)
339 && ParserExpression().parse(pos, expr_node, expected)
340 && ParserKeyword("AS").ignore(pos, expected)
341 && ParserIdentifierWithOptionalParameters().parse(pos, type_node, expected)
342 && ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
343 {
344 /// Convert to canonical representation in functional form: CAST(expr, 'type')
345
346 auto type_literal = std::make_shared<ASTLiteral>(queryToString(type_node));
347
348 auto expr_list_args = std::make_shared<ASTExpressionList>();
349 expr_list_args->children.push_back(expr_node);
350 expr_list_args->children.push_back(std::move(type_literal));
351
352 auto func_node = std::make_shared<ASTFunction>();
353 func_node->name = "CAST";
354 func_node->arguments = std::move(expr_list_args);
355 func_node->children.push_back(func_node->arguments);
356
357 node = std::move(func_node);
358 return true;
359 }
360
361 return false;
362}
363
364bool ParserSubstringExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
365{
366 /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length)
367 /// The latter will be parsed normally as a function later.
368
369 ASTPtr expr_node;
370 ASTPtr start_node;
371 ASTPtr length_node;
372
373 if (!ParserKeyword("SUBSTRING").ignore(pos, expected))
374 return false;
375
376 if (pos->type != TokenType::OpeningRoundBracket)
377 return false;
378 ++pos;
379
380 if (!ParserExpression().parse(pos, expr_node, expected))
381 return false;
382
383 if (pos->type != TokenType::Comma)
384 {
385 if (!ParserKeyword("FROM").ignore(pos, expected))
386 return false;
387 }
388 else
389 {
390 ++pos;
391 }
392
393 if (!ParserExpression().parse(pos, start_node, expected))
394 return false;
395
396 if (pos->type == TokenType::ClosingRoundBracket)
397 {
398 ++pos;
399 }
400 else
401 {
402 if (pos->type != TokenType::Comma)
403 {
404 if (!ParserKeyword("FOR").ignore(pos, expected))
405 return false;
406 }
407 else
408 {
409 ++pos;
410 }
411
412 if (!ParserExpression().parse(pos, length_node, expected))
413 return false;
414
415 ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected);
416 }
417
418 /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length)
419
420 auto expr_list_args = std::make_shared<ASTExpressionList>();
421 expr_list_args->children = {expr_node, start_node};
422
423 if (length_node)
424 expr_list_args->children.push_back(length_node);
425
426 auto func_node = std::make_shared<ASTFunction>();
427 func_node->name = "substring";
428 func_node->arguments = std::move(expr_list_args);
429 func_node->children.push_back(func_node->arguments);
430
431 node = std::move(func_node);
432 return true;
433}
434
435bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
436{
437 /// Handles all possible TRIM/LTRIM/RTRIM call variants
438
439 std::string func_name;
440 bool trim_left = false;
441 bool trim_right = false;
442 bool char_override = false;
443 ASTPtr expr_node;
444 ASTPtr pattern_node;
445 ASTPtr to_remove;
446
447 if (ParserKeyword("LTRIM").ignore(pos, expected))
448 {
449 if (pos->type != TokenType::OpeningRoundBracket)
450 return false;
451 ++pos;
452 trim_left = true;
453 }
454 else if (ParserKeyword("RTRIM").ignore(pos, expected))
455 {
456 if (pos->type != TokenType::OpeningRoundBracket)
457 return false;
458 ++pos;
459 trim_right = true;
460 }
461 else if (ParserKeyword("TRIM").ignore(pos, expected))
462 {
463 if (pos->type != TokenType::OpeningRoundBracket)
464 return false;
465 ++pos;
466
467 if (ParserKeyword("BOTH").ignore(pos, expected))
468 {
469 trim_left = true;
470 trim_right = true;
471 char_override = true;
472 }
473 else if (ParserKeyword("LEADING").ignore(pos, expected))
474 {
475 trim_left = true;
476 char_override = true;
477 }
478 else if (ParserKeyword("TRAILING").ignore(pos, expected))
479 {
480 trim_right = true;
481 char_override = true;
482 }
483 else
484 {
485 trim_left = true;
486 trim_right = true;
487 }
488
489 if (char_override)
490 {
491 if (!ParserExpression().parse(pos, to_remove, expected))
492 return false;
493 if (!ParserKeyword("FROM").ignore(pos, expected))
494 return false;
495
496 auto quote_meta_func_node = std::make_shared<ASTFunction>();
497 auto quote_meta_list_args = std::make_shared<ASTExpressionList>();
498 quote_meta_list_args->children = {to_remove};
499
500 quote_meta_func_node->name = "regexpQuoteMeta";
501 quote_meta_func_node->arguments = std::move(quote_meta_list_args);
502 quote_meta_func_node->children.push_back(quote_meta_func_node->arguments);
503
504 to_remove = std::move(quote_meta_func_node);
505 }
506 }
507
508 if (!(trim_left || trim_right))
509 return false;
510
511 if (!ParserExpression().parse(pos, expr_node, expected))
512 return false;
513
514 if (pos->type != TokenType::ClosingRoundBracket)
515 return false;
516 ++pos;
517
518 /// Convert to regexp replace function call
519
520 if (char_override)
521 {
522 auto pattern_func_node = std::make_shared<ASTFunction>();
523 auto pattern_list_args = std::make_shared<ASTExpressionList>();
524 if (trim_left && trim_right)
525 {
526 pattern_list_args->children = {
527 std::make_shared<ASTLiteral>("^["),
528 to_remove,
529 std::make_shared<ASTLiteral>("]*|["),
530 to_remove,
531 std::make_shared<ASTLiteral>("]*$")
532 };
533 func_name = "replaceRegexpAll";
534 }
535 else
536 {
537 if (trim_left)
538 {
539 pattern_list_args->children = {
540 std::make_shared<ASTLiteral>("^["),
541 to_remove,
542 std::make_shared<ASTLiteral>("]*")
543 };
544 }
545 else
546 {
547 /// trim_right == false not possible
548 pattern_list_args->children = {
549 std::make_shared<ASTLiteral>("["),
550 to_remove,
551 std::make_shared<ASTLiteral>("]*$")
552 };
553 }
554 func_name = "replaceRegexpOne";
555 }
556
557 pattern_func_node->name = "concat";
558 pattern_func_node->arguments = std::move(pattern_list_args);
559 pattern_func_node->children.push_back(pattern_func_node->arguments);
560
561 pattern_node = std::move(pattern_func_node);
562 }
563 else
564 {
565 if (trim_left && trim_right)
566 {
567 func_name = "trimBoth";
568 }
569 else
570 {
571 if (trim_left)
572 {
573 func_name = "trimLeft";
574 }
575 else
576 {
577 /// trim_right == false not possible
578 func_name = "trimRight";
579 }
580 }
581 }
582
583 auto expr_list_args = std::make_shared<ASTExpressionList>();
584 if (char_override)
585 expr_list_args->children = {expr_node, pattern_node, std::make_shared<ASTLiteral>("")};
586 else
587 expr_list_args->children = {expr_node};
588
589 auto func_node = std::make_shared<ASTFunction>();
590 func_node->name = func_name;
591 func_node->arguments = std::move(expr_list_args);
592 func_node->children.push_back(func_node->arguments);
593
594 node = std::move(func_node);
595 return true;
596}
597
598bool ParserLeftExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
599{
600 /// Rewrites left(expr, length) to SUBSTRING(expr, 1, length)
601
602 ASTPtr expr_node;
603 ASTPtr start_node;
604 ASTPtr length_node;
605
606 if (!ParserKeyword("LEFT").ignore(pos, expected))
607 return false;
608
609 if (pos->type != TokenType::OpeningRoundBracket)
610 return false;
611 ++pos;
612
613 if (!ParserExpression().parse(pos, expr_node, expected))
614 return false;
615
616 ParserToken(TokenType::Comma).ignore(pos, expected);
617
618 if (!ParserExpression().parse(pos, length_node, expected))
619 return false;
620
621 if (pos->type != TokenType::ClosingRoundBracket)
622 return false;
623 ++pos;
624
625 auto expr_list_args = std::make_shared<ASTExpressionList>();
626 start_node = std::make_shared<ASTLiteral>(1);
627 expr_list_args->children = {expr_node, start_node, length_node};
628
629 auto func_node = std::make_shared<ASTFunction>();
630 func_node->name = "substring";
631 func_node->arguments = std::move(expr_list_args);
632 func_node->children.push_back(func_node->arguments);
633
634 node = std::move(func_node);
635 return true;
636}
637
638bool ParserRightExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
639{
640 /// Rewrites RIGHT(expr, length) to substring(expr, -length)
641
642 ASTPtr expr_node;
643 ASTPtr length_node;
644
645 if (!ParserKeyword("RIGHT").ignore(pos, expected))
646 return false;
647
648 if (pos->type != TokenType::OpeningRoundBracket)
649 return false;
650 ++pos;
651
652 if (!ParserExpression().parse(pos, expr_node, expected))
653 return false;
654
655 ParserToken(TokenType::Comma).ignore(pos, expected);
656
657 if (!ParserExpression().parse(pos, length_node, expected))
658 return false;
659
660 if (pos->type != TokenType::ClosingRoundBracket)
661 return false;
662 ++pos;
663
664 auto start_expr_list_args = std::make_shared<ASTExpressionList>();
665 start_expr_list_args->children = {length_node};
666
667 auto start_node = std::make_shared<ASTFunction>();
668 start_node->name = "negate";
669 start_node->arguments = std::move(start_expr_list_args);
670 start_node->children.push_back(start_node->arguments);
671
672 auto expr_list_args = std::make_shared<ASTExpressionList>();
673 expr_list_args->children = {expr_node, start_node};
674
675 auto func_node = std::make_shared<ASTFunction>();
676 func_node->name = "substring";
677 func_node->arguments = std::move(expr_list_args);
678 func_node->children.push_back(func_node->arguments);
679
680 node = std::move(func_node);
681 return true;
682}
683
684bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
685{
686 if (!ParserKeyword("EXTRACT").ignore(pos, expected))
687 return false;
688
689 if (pos->type != TokenType::OpeningRoundBracket)
690 return false;
691 ++pos;
692
693 ASTPtr expr;
694
695 IntervalKind interval_kind;
696 if (!parseIntervalKind(pos, expected, interval_kind))
697 return false;
698
699 ParserKeyword s_from("FROM");
700 if (!s_from.ignore(pos, expected))
701 return false;
702
703 ParserExpression elem_parser;
704 if (!elem_parser.parse(pos, expr, expected))
705 return false;
706
707 if (pos->type != TokenType::ClosingRoundBracket)
708 return false;
709 ++pos;
710
711 auto function = std::make_shared<ASTFunction>();
712 auto exp_list = std::make_shared<ASTExpressionList>();
713 function->name = interval_kind.toNameOfFunctionExtractTimePart();
714 function->arguments = exp_list;
715 function->children.push_back(exp_list);
716 exp_list->children.push_back(expr);
717 node = function;
718
719 return true;
720}
721
722bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
723{
724 const char * function_name = nullptr;
725 ASTPtr timestamp_node;
726 ASTPtr offset_node;
727
728 if (ParserKeyword("DATEADD").ignore(pos, expected) || ParserKeyword("DATE_ADD").ignore(pos, expected)
729 || ParserKeyword("TIMESTAMPADD").ignore(pos, expected) || ParserKeyword("TIMESTAMP_ADD").ignore(pos, expected))
730 function_name = "plus";
731 else if (ParserKeyword("DATESUB").ignore(pos, expected) || ParserKeyword("DATE_SUB").ignore(pos, expected)
732 || ParserKeyword("TIMESTAMPSUB").ignore(pos, expected) || ParserKeyword("TIMESTAMP_SUB").ignore(pos, expected))
733 function_name = "minus";
734 else
735 return false;
736
737 if (pos->type != TokenType::OpeningRoundBracket)
738 return false;
739 ++pos;
740
741 IntervalKind interval_kind;
742 if (parseIntervalKind(pos, expected, interval_kind))
743 {
744 /// function(unit, offset, timestamp)
745 if (pos->type != TokenType::Comma)
746 return false;
747 ++pos;
748
749 if (!ParserExpression().parse(pos, offset_node, expected))
750 return false;
751
752 if (pos->type != TokenType::Comma)
753 return false;
754 ++pos;
755
756 if (!ParserExpression().parse(pos, timestamp_node, expected))
757 return false;
758 }
759 else
760 {
761 /// function(timestamp, INTERVAL offset unit)
762 if (!ParserExpression().parse(pos, timestamp_node, expected))
763 return false;
764
765 if (pos->type != TokenType::Comma)
766 return false;
767 ++pos;
768
769 if (!ParserKeyword("INTERVAL").ignore(pos, expected))
770 return false;
771
772 if (!ParserExpression().parse(pos, offset_node, expected))
773 return false;
774
775 if (!parseIntervalKind(pos, expected, interval_kind))
776 return false;
777 }
778 if (pos->type != TokenType::ClosingRoundBracket)
779 return false;
780 ++pos;
781
782 auto interval_expr_list_args = std::make_shared<ASTExpressionList>();
783 interval_expr_list_args->children = {offset_node};
784
785 auto interval_func_node = std::make_shared<ASTFunction>();
786 interval_func_node->name = interval_kind.toNameOfFunctionToIntervalDataType();
787 interval_func_node->arguments = std::move(interval_expr_list_args);
788 interval_func_node->children.push_back(interval_func_node->arguments);
789
790 auto expr_list_args = std::make_shared<ASTExpressionList>();
791 expr_list_args->children = {timestamp_node, interval_func_node};
792
793 auto func_node = std::make_shared<ASTFunction>();
794 func_node->name = function_name;
795 func_node->arguments = std::move(expr_list_args);
796 func_node->children.push_back(func_node->arguments);
797
798 node = std::move(func_node);
799
800 return true;
801}
802
803bool ParserDateDiffExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
804{
805 ASTPtr left_node;
806 ASTPtr right_node;
807
808 if (!(ParserKeyword("DATEDIFF").ignore(pos, expected) || ParserKeyword("DATE_DIFF").ignore(pos, expected)
809 || ParserKeyword("TIMESTAMPDIFF").ignore(pos, expected) || ParserKeyword("TIMESTAMP_DIFF").ignore(pos, expected)))
810 return false;
811
812 if (pos->type != TokenType::OpeningRoundBracket)
813 return false;
814 ++pos;
815
816 IntervalKind interval_kind;
817 if (!parseIntervalKind(pos, expected, interval_kind))
818 return false;
819
820 if (pos->type != TokenType::Comma)
821 return false;
822 ++pos;
823
824 if (!ParserExpression().parse(pos, left_node, expected))
825 return false;
826
827 if (pos->type != TokenType::Comma)
828 return false;
829 ++pos;
830
831 if (!ParserExpression().parse(pos, right_node, expected))
832 return false;
833
834 if (pos->type != TokenType::ClosingRoundBracket)
835 return false;
836 ++pos;
837
838 auto expr_list_args = std::make_shared<ASTExpressionList>();
839 expr_list_args->children = {std::make_shared<ASTLiteral>(interval_kind.toDateDiffUnit()), left_node, right_node};
840
841 auto func_node = std::make_shared<ASTFunction>();
842 func_node->name = "dateDiff";
843 func_node->arguments = std::move(expr_list_args);
844 func_node->children.push_back(func_node->arguments);
845
846 node = std::move(func_node);
847
848 return true;
849}
850
851
852bool ParserNull::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
853{
854 ParserKeyword nested_parser("NULL");
855 if (nested_parser.parse(pos, node, expected))
856 {
857 node = std::make_shared<ASTLiteral>(Null());
858 return true;
859 }
860 else
861 return false;
862}
863
864
865bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
866{
867 Pos literal_begin = pos;
868 bool negative = false;
869
870 if (pos->type == TokenType::Minus)
871 {
872 ++pos;
873 negative = true;
874 }
875 else if (pos->type == TokenType::Plus) /// Leading plus is simply ignored.
876 ++pos;
877
878 Field res;
879
880 if (!pos.isValid())
881 return false;
882
883 /** Maximum length of number. 319 symbols is enough to write maximum double in decimal form.
884 * Copy is needed to use strto* functions, which require 0-terminated string.
885 */
886 static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
887
888 if (pos->size() > MAX_LENGTH_OF_NUMBER)
889 {
890 expected.add(pos, "number");
891 return false;
892 }
893
894 char buf[MAX_LENGTH_OF_NUMBER + 1];
895
896 memcpy(buf, pos->begin, pos->size());
897 buf[pos->size()] = 0;
898
899 char * pos_double = buf;
900 errno = 0; /// Functions strto* don't clear errno.
901 Float64 float_value = std::strtod(buf, &pos_double);
902 if (pos_double != buf + pos->size() || errno == ERANGE)
903 {
904 expected.add(pos, "number");
905 return false;
906 }
907
908 if (float_value < 0)
909 throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR);
910
911 if (negative)
912 float_value = -float_value;
913
914 res = float_value;
915
916 /// try to use more exact type: UInt64
917
918 char * pos_integer = buf;
919
920 errno = 0;
921 UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
922 if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
923 {
924 if (negative)
925 res = static_cast<Int64>(-uint_value);
926 else
927 res = uint_value;
928 }
929
930 auto literal = std::make_shared<ASTLiteral>(res);
931 literal->begin = literal_begin;
932 literal->end = ++pos;
933 node = literal;
934 return true;
935}
936
937
938bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
939{
940 Field res;
941
942 if (!pos.isValid())
943 return false;
944
945 UInt64 x = 0;
946 ReadBufferFromMemory in(pos->begin, pos->size());
947 if (!tryReadIntText(x, in) || in.count() != pos->size())
948 {
949 expected.add(pos, "unsigned integer");
950 return false;
951 }
952
953 res = x;
954 auto literal = std::make_shared<ASTLiteral>(res);
955 literal->begin = pos;
956 literal->end = ++pos;
957 node = literal;
958 return true;
959}
960
961
962bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
963{
964 if (pos->type != TokenType::StringLiteral)
965 return false;
966
967 String s;
968 ReadBufferFromMemory in(pos->begin, pos->size());
969
970 try
971 {
972 readQuotedStringWithSQLStyle(s, in);
973 }
974 catch (const Exception &)
975 {
976 expected.add(pos, "string literal");
977 return false;
978 }
979
980 if (in.count() != pos->size())
981 {
982 expected.add(pos, "string literal");
983 return false;
984 }
985
986 auto literal = std::make_shared<ASTLiteral>(s);
987 literal->begin = pos;
988 literal->end = ++pos;
989 node = literal;
990 return true;
991}
992
993
994bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
995{
996 if (pos->type != TokenType::OpeningSquareBracket)
997 return false;
998
999 Pos literal_begin = pos;
1000
1001 Array arr;
1002
1003 ParserLiteral literal_p;
1004
1005 ++pos;
1006
1007 while (pos.isValid())
1008 {
1009 if (!arr.empty())
1010 {
1011 if (pos->type == TokenType::ClosingSquareBracket)
1012 {
1013 auto literal = std::make_shared<ASTLiteral>(arr);
1014 literal->begin = literal_begin;
1015 literal->end = ++pos;
1016 node = literal;
1017 return true;
1018 }
1019 else if (pos->type == TokenType::Comma)
1020 {
1021 ++pos;
1022 }
1023 else
1024 {
1025 expected.add(pos, "comma or closing square bracket");
1026 return false;
1027 }
1028 }
1029
1030 ASTPtr literal_node;
1031 if (!literal_p.parse(pos, literal_node, expected))
1032 return false;
1033
1034 arr.push_back(literal_node->as<ASTLiteral &>().value);
1035 }
1036
1037 expected.add(pos, "closing square bracket");
1038 return false;
1039}
1040
1041
1042bool ParserLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1043{
1044 ParserNull null_p;
1045 ParserNumber num_p;
1046 ParserStringLiteral str_p;
1047
1048 if (null_p.parse(pos, node, expected))
1049 return true;
1050
1051 if (num_p.parse(pos, node, expected))
1052 return true;
1053
1054 if (str_p.parse(pos, node, expected))
1055 return true;
1056
1057 return false;
1058}
1059
1060
1061const char * ParserAlias::restricted_keywords[] =
1062{
1063 "FROM",
1064 "FINAL",
1065 "SAMPLE",
1066 "ARRAY",
1067 "LEFT",
1068 "RIGHT",
1069 "INNER",
1070 "FULL",
1071 "CROSS",
1072 "JOIN",
1073 "GLOBAL",
1074 "ANY",
1075 "ALL",
1076 "ASOF",
1077 "SEMI",
1078 "ANTI",
1079 "ONLY", /// YQL synonym for ANTI
1080 "ON",
1081 "USING",
1082 "PREWHERE",
1083 "WHERE",
1084 "GROUP",
1085 "WITH",
1086 "HAVING",
1087 "ORDER",
1088 "LIMIT",
1089 "SETTINGS",
1090 "FORMAT",
1091 "UNION",
1092 "INTO",
1093 "NOT",
1094 "BETWEEN",
1095 "LIKE",
1096 nullptr
1097};
1098
1099bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1100{
1101 ParserKeyword s_as("AS");
1102 ParserIdentifier id_p;
1103
1104 bool has_as_word = s_as.ignore(pos, expected);
1105 if (!allow_alias_without_as_keyword && !has_as_word)
1106 return false;
1107
1108 if (!id_p.parse(pos, node, expected))
1109 return false;
1110
1111 if (!has_as_word)
1112 {
1113 /** In this case, the alias can not match the keyword -
1114 * so that in the query "SELECT x FROM t", the word FROM was not considered an alias,
1115 * and in the query "SELECT x FRO FROM t", the word FRO was considered an alias.
1116 */
1117
1118 const String name = getIdentifierName(node);
1119
1120 for (const char ** keyword = restricted_keywords; *keyword != nullptr; ++keyword)
1121 if (0 == strcasecmp(name.data(), *keyword))
1122 return false;
1123 }
1124
1125 return true;
1126}
1127
1128
1129bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1130{
1131 ParserKeyword columns("COLUMNS");
1132 ParserStringLiteral regex;
1133
1134 if (!columns.ignore(pos, expected))
1135 return false;
1136
1137 if (pos->type != TokenType::OpeningRoundBracket)
1138 return false;
1139 ++pos;
1140
1141 ASTPtr regex_node;
1142 if (!regex.parse(pos, regex_node, expected))
1143 return false;
1144
1145 if (pos->type != TokenType::ClosingRoundBracket)
1146 return false;
1147 ++pos;
1148
1149 auto res = std::make_shared<ASTColumnsMatcher>();
1150 res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
1151 res->children.push_back(regex_node);
1152 node = std::move(res);
1153 return true;
1154}
1155
1156
1157bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected &)
1158{
1159 if (pos->type == TokenType::Asterisk)
1160 {
1161 ++pos;
1162 node = std::make_shared<ASTAsterisk>();
1163 return true;
1164 }
1165 return false;
1166}
1167
1168
1169bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1170{
1171 if (!ParserCompoundIdentifier().parse(pos, node, expected))
1172 return false;
1173
1174 if (pos->type != TokenType::Dot)
1175 return false;
1176 ++pos;
1177
1178 if (pos->type != TokenType::Asterisk)
1179 return false;
1180 ++pos;
1181
1182 auto res = std::make_shared<ASTQualifiedAsterisk>();
1183 res->children.push_back(node);
1184 node = std::move(res);
1185 return true;
1186}
1187
1188
1189bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1190{
1191 if (pos->type != TokenType::OpeningCurlyBrace)
1192 return false;
1193
1194 ++pos;
1195
1196 if (pos->type != TokenType::BareWord)
1197 {
1198 expected.add(pos, "substitution name (identifier)");
1199 return false;
1200 }
1201
1202 String name(pos->begin, pos->end);
1203 ++pos;
1204
1205 if (pos->type != TokenType::Colon)
1206 {
1207 expected.add(pos, "colon between name and type");
1208 return false;
1209 }
1210
1211 ++pos;
1212
1213 auto old_pos = pos;
1214 ParserIdentifierWithOptionalParameters type_parser;
1215 if (!type_parser.ignore(pos, expected))
1216 {
1217 expected.add(pos, "substitution type");
1218 return false;
1219 }
1220
1221 String type(old_pos->begin, pos->begin);
1222
1223 if (pos->type != TokenType::ClosingCurlyBrace)
1224 {
1225 expected.add(pos, "closing curly brace");
1226 return false;
1227 }
1228
1229 ++pos;
1230 node = std::make_shared<ASTQueryParameter>(name, type);
1231 return true;
1232}
1233
1234
1235bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1236{
1237 return ParserSubquery().parse(pos, node, expected)
1238 || ParserParenthesisExpression().parse(pos, node, expected)
1239 || ParserArrayOfLiterals().parse(pos, node, expected)
1240 || ParserArray().parse(pos, node, expected)
1241 || ParserLiteral().parse(pos, node, expected)
1242 || ParserCastExpression().parse(pos, node, expected)
1243 || ParserExtractExpression().parse(pos, node, expected)
1244 || ParserDateAddExpression().parse(pos, node, expected)
1245 || ParserDateDiffExpression().parse(pos, node, expected)
1246 || ParserSubstringExpression().parse(pos, node, expected)
1247 || ParserTrimExpression().parse(pos, node, expected)
1248 || ParserLeftExpression().parse(pos, node, expected)
1249 || ParserRightExpression().parse(pos, node, expected)
1250 || ParserCase().parse(pos, node, expected)
1251 || ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function.
1252 || ParserFunction().parse(pos, node, expected)
1253 || ParserQualifiedAsterisk().parse(pos, node, expected)
1254 || ParserAsterisk().parse(pos, node, expected)
1255 || ParserCompoundIdentifier().parse(pos, node, expected)
1256 || ParserSubstitution().parse(pos, node, expected);
1257}
1258
1259
1260bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1261{
1262 if (!elem_parser->parse(pos, node, expected))
1263 return false;
1264
1265 /** Little hack.
1266 *
1267 * In the SELECT section, we allow parsing aliases without specifying the AS keyword.
1268 * These aliases can not be the same as the query keywords.
1269 * And the expression itself can be an identifier that matches the keyword.
1270 * For example, a column may be called where. And in the query it can be written `SELECT where AS x FROM table` or even `SELECT where x FROM table`.
1271 * Even can be written `SELECT where AS from FROM table`, but it can not be written `SELECT where from FROM table`.
1272 * See the ParserAlias implementation for details.
1273 *
1274 * But there is a small problem - an inconvenient error message if there is an extra comma in the SELECT section at the end.
1275 * Although this error is very common. Example: `SELECT x, y, z, FROM tbl`
1276 * If you do nothing, it's parsed as a column with the name FROM and alias tbl.
1277 * To avoid this situation, we do not allow the parsing of the alias without the AS keyword for the identifier with the name FROM.
1278 *
1279 * Note: this also filters the case when the identifier is quoted.
1280 * Example: SELECT x, y, z, `FROM` tbl. But such a case could be solved.
1281 *
1282 * In the future it would be easier to disallow unquoted identifiers that match the keywords.
1283 */
1284 bool allow_alias_without_as_keyword_now = allow_alias_without_as_keyword;
1285 if (allow_alias_without_as_keyword)
1286 if (auto opt_id = tryGetIdentifierName(node))
1287 if (0 == strcasecmp(opt_id->data(), "FROM"))
1288 allow_alias_without_as_keyword_now = false;
1289
1290 ASTPtr alias_node;
1291 if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, alias_node, expected))
1292 {
1293 /// FIXME: try to prettify this cast using `as<>()`
1294 if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(node.get()))
1295 {
1296 tryGetIdentifierNameInto(alias_node, ast_with_alias->alias);
1297 }
1298 else
1299 {
1300 expected.add(pos, "alias cannot be here");
1301 return false;
1302 }
1303 }
1304
1305 return true;
1306}
1307
1308
1309bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1310{
1311 ParserExpressionWithOptionalAlias elem_p(false);
1312 ParserKeyword ascending("ASCENDING");
1313 ParserKeyword descending("DESCENDING");
1314 ParserKeyword asc("ASC");
1315 ParserKeyword desc("DESC");
1316 ParserKeyword nulls("NULLS");
1317 ParserKeyword first("FIRST");
1318 ParserKeyword last("LAST");
1319 ParserKeyword collate("COLLATE");
1320 ParserKeyword with_fill("WITH FILL");
1321 ParserKeyword from("FROM");
1322 ParserKeyword to("TO");
1323 ParserKeyword step("STEP");
1324 ParserStringLiteral collate_locale_parser;
1325 ParserExpressionWithOptionalAlias exp_parser(false);
1326
1327 ASTPtr expr_elem;
1328 if (!elem_p.parse(pos, expr_elem, expected))
1329 return false;
1330
1331 int direction = 1;
1332
1333 if (descending.ignore(pos) || desc.ignore(pos))
1334 direction = -1;
1335 else
1336 ascending.ignore(pos) || asc.ignore(pos);
1337
1338 int nulls_direction = direction;
1339 bool nulls_direction_was_explicitly_specified = false;
1340
1341 if (nulls.ignore(pos))
1342 {
1343 nulls_direction_was_explicitly_specified = true;
1344
1345 if (first.ignore(pos))
1346 nulls_direction = -direction;
1347 else if (last.ignore(pos))
1348 ;
1349 else
1350 return false;
1351 }
1352
1353 ASTPtr locale_node;
1354 if (collate.ignore(pos))
1355 {
1356 if (!collate_locale_parser.parse(pos, locale_node, expected))
1357 return false;
1358 }
1359
1360 /// WITH FILL [FROM x] [TO y] [STEP z]
1361 bool has_with_fill = false;
1362 ASTPtr fill_from;
1363 ASTPtr fill_to;
1364 ASTPtr fill_step;
1365 if (with_fill.ignore(pos))
1366 {
1367 has_with_fill = true;
1368 if (from.ignore(pos) && !exp_parser.parse(pos, fill_from, expected))
1369 return false;
1370
1371 if (to.ignore(pos) && !exp_parser.parse(pos, fill_to, expected))
1372 return false;
1373
1374 if (step.ignore(pos) && !exp_parser.parse(pos, fill_step, expected))
1375 return false;
1376 }
1377
1378 node = std::make_shared<ASTOrderByElement>(
1379 direction, nulls_direction, nulls_direction_was_explicitly_specified, locale_node,
1380 has_with_fill, fill_from, fill_to, fill_step);
1381 node->children.push_back(expr_elem);
1382 if (locale_node)
1383 node->children.push_back(locale_node);
1384
1385 return true;
1386}
1387
1388bool ParserFunctionWithKeyValueArguments::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1389{
1390 ParserIdentifier id_parser;
1391 ParserKeyValuePairsList pairs_list_parser;
1392
1393 ASTPtr identifier;
1394 ASTPtr expr_list_args;
1395 if (!id_parser.parse(pos, identifier, expected))
1396 return false;
1397
1398 if (pos.get().type != TokenType::OpeningRoundBracket)
1399 return false;
1400
1401 ++pos;
1402 if (!pairs_list_parser.parse(pos, expr_list_args, expected))
1403 return false;
1404
1405 if (pos.get().type != TokenType::ClosingRoundBracket)
1406 return false;
1407
1408 ++pos;
1409 auto function = std::make_shared<ASTFunctionWithKeyValueArguments>();
1410 function->name = Poco::toLower(typeid_cast<ASTIdentifier &>(*identifier.get()).name);
1411 function->elements = expr_list_args;
1412 function->children.push_back(function->elements);
1413 node = function;
1414
1415 return true;
1416}
1417
1418bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1419{
1420 ParserKeyword s_to_disk("TO DISK");
1421 ParserKeyword s_to_volume("TO VOLUME");
1422 ParserKeyword s_delete("DELETE");
1423 ParserStringLiteral parser_string_literal;
1424 ParserExpression parser_exp;
1425
1426 ASTPtr expr_elem;
1427 if (!parser_exp.parse(pos, expr_elem, expected))
1428 return false;
1429
1430 PartDestinationType destination_type = PartDestinationType::DELETE;
1431 String destination_name;
1432 if (s_to_disk.ignore(pos))
1433 destination_type = PartDestinationType::DISK;
1434 else if (s_to_volume.ignore(pos))
1435 destination_type = PartDestinationType::VOLUME;
1436 else
1437 s_delete.ignore(pos);
1438
1439 if (destination_type == PartDestinationType::DISK || destination_type == PartDestinationType::VOLUME)
1440 {
1441 ASTPtr ast_space_name;
1442 if (!parser_string_literal.parse(pos, ast_space_name, expected))
1443 return false;
1444
1445 destination_name = ast_space_name->as<ASTLiteral &>().value.get<const String &>();
1446 }
1447
1448 node = std::make_shared<ASTTTLElement>(destination_type, destination_name);
1449 node->children.push_back(expr_elem);
1450
1451 return true;
1452}
1453
1454bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1455{
1456 ParserIdentifier non_parametric;
1457 ParserIdentifierWithParameters parametric;
1458
1459 if (parametric.parse(pos, node, expected))
1460 return true;
1461
1462 ASTPtr ident;
1463 if (non_parametric.parse(pos, ident, expected))
1464 {
1465 auto func = std::make_shared<ASTFunction>();
1466 tryGetIdentifierNameInto(ident, func->name);
1467 node = func;
1468 return true;
1469 }
1470
1471 return false;
1472}
1473
1474}
1475