1#include <Columns/ColumnConst.h>
2#include <DataTypes/DataTypesNumber.h>
3#include <DataTypes/DataTypeNullable.h>
4#include <DataTypes/FieldToDataType.h>
5#include <Processors/Formats/IRowInputFormat.h>
6#include <Functions/FunctionsConversion.h>
7#include <Functions/FunctionFactory.h>
8#include <Interpreters/ExpressionAnalyzer.h>
9#include <Interpreters/ReplaceQueryParameterVisitor.h>
10#include <Interpreters/SyntaxAnalyzer.h>
11#include <IO/ReadHelpers.h>
12#include <Parsers/ASTExpressionList.h>
13#include <Parsers/ASTFunction.h>
14#include <Parsers/ASTIdentifier.h>
15#include <Parsers/ASTLiteral.h>
16#include <Parsers/ASTQueryParameter.h>
17#include <Parsers/CommonParsers.h>
18#include <Processors/Formats/Impl/ConstantExpressionTemplate.h>
19#include <Parsers/ExpressionElementParsers.h>
20#include <Interpreters/convertFieldToType.h>
21#include <boost/functional/hash.hpp>
22
23
24namespace DB
25{
26
27namespace ErrorCodes
28{
29 extern const int SYNTAX_ERROR;
30}
31
32struct SpecialParserType
33{
34 bool is_array = false;
35 bool is_nullable = false;
36 Field::Types::Which nested_type = Field::Types::Which::String;
37
38 bool useDefaultParser() const { return nested_type == Field::Types::Which::String; }
39};
40
41struct LiteralInfo
42{
43 typedef std::shared_ptr<ASTLiteral> ASTLiteralPtr;
44 LiteralInfo(const ASTLiteralPtr & literal_, const String & column_name_, bool force_nullable_)
45 : literal(literal_), dummy_column_name(column_name_), force_nullable(force_nullable_) { }
46 ASTLiteralPtr literal;
47 String dummy_column_name;
48 /// Make column nullable even if expression type is not.
49 /// (for literals in functions like ifNull and assumeNotNul, which never return NULL even for NULL arguments)
50 bool force_nullable;
51
52 DataTypePtr type;
53 SpecialParserType special_parser;
54};
55
56/// Extracts ASTLiterals from expression, replaces them with ASTIdentifiers where needed
57/// and deduces data types for dummy columns by field type of literal
58class ReplaceLiteralsVisitor
59{
60public:
61 LiteralsInfo replaced_literals;
62 const Context & context;
63
64 explicit ReplaceLiteralsVisitor(const Context & context_) : context(context_) { }
65
66 void visit(ASTPtr & ast, bool force_nullable)
67 {
68 if (visitIfLiteral(ast, force_nullable))
69 return;
70 if (auto function = ast->as<ASTFunction>())
71 visit(*function, force_nullable);
72 else if (ast->as<ASTQueryParameter>())
73 return;
74 else if (ast->as<ASTIdentifier>())
75 throw DB::Exception("Identifier in constant expression", ErrorCodes::SYNTAX_ERROR);
76 else
77 throw DB::Exception("Syntax error in constant expression", ErrorCodes::SYNTAX_ERROR);
78 }
79
80private:
81 void visitChildren(ASTPtr & ast, const ColumnNumbers & dont_visit_children, const std::vector<char> & force_nullable)
82 {
83 for (size_t i = 0; i < ast->children.size(); ++i)
84 if (std::find(dont_visit_children.begin(), dont_visit_children.end(), i) == dont_visit_children.end())
85 visit(ast->children[i], force_nullable[i]);
86 }
87
88 void visit(ASTFunction & function, bool force_nullable)
89 {
90 if (function.name == "lambda")
91 return;
92
93 FunctionOverloadResolverPtr builder = FunctionFactory::instance().get(function.name, context);
94 /// Do not replace literals which must be constant
95 ColumnNumbers dont_visit_children = builder->getArgumentsThatAreAlwaysConstant();
96 /// Allow nullable arguments if function never returns NULL
97 ColumnNumbers can_always_be_nullable = builder->getArgumentsThatDontImplyNullableReturnType(function.arguments->children.size());
98
99 std::vector<char> force_nullable_arguments(function.arguments->children.size(), force_nullable);
100 for (auto & idx : can_always_be_nullable)
101 if (idx < force_nullable_arguments.size())
102 force_nullable_arguments[idx] = true;
103
104 visitChildren(function.arguments, dont_visit_children, force_nullable_arguments);
105 }
106
107 bool visitIfLiteral(ASTPtr & ast, bool force_nullable)
108 {
109 auto literal = std::dynamic_pointer_cast<ASTLiteral>(ast);
110 if (!literal)
111 return false;
112 if (literal->begin && literal->end)
113 {
114 /// Do not replace empty array and array of NULLs
115 if (literal->value.getType() == Field::Types::Array)
116 {
117 const Array & array = literal->value.get<Array>();
118 auto not_null = std::find_if_not(array.begin(), array.end(), [](const auto & elem) { return elem.isNull(); });
119 if (not_null == array.end())
120 return true;
121 }
122 String column_name = "_dummy_" + std::to_string(replaced_literals.size());
123 replaced_literals.emplace_back(literal, column_name, force_nullable);
124 setDataType(replaced_literals.back());
125 ast = std::make_shared<ASTIdentifier>(column_name);
126 }
127 return true;
128 }
129
130 void setDataType(LiteralInfo & info)
131 {
132 /// Type (Field::Types:Which) of literal in AST can be: String, UInt64, Int64, Float64, Null or Array of simple literals (not of Arrays).
133 /// Null and empty Array literals are considered as tokens, because template with Nullable(Nothing) or Array(Nothing) is useless.
134
135 Field::Types::Which field_type = info.literal->value.getType();
136
137 /// We have to use ParserNumber instead of type->deserializeAsTextQuoted() for arithmetic types
138 /// to check actual type of literal and avoid possible overflow and precision issues.
139 info.special_parser = SpecialParserType{false, false, field_type};
140
141 /// Do not use 8, 16 and 32 bit types, so template will match all integers
142 if (field_type == Field::Types::UInt64)
143 info.type = std::make_shared<DataTypeUInt64>();
144 else if (field_type == Field::Types::Int64)
145 info.type = std::make_shared<DataTypeInt64>();
146 else if (field_type == Field::Types::Float64)
147 info.type = std::make_shared<DataTypeFloat64>();
148 else if (field_type == Field::Types::String)
149 info.type = std::make_shared<DataTypeString>();
150 else if (field_type == Field::Types::Array)
151 {
152 info.special_parser.is_array = true;
153 info.type = applyVisitor(FieldToDataType(), info.literal->value);
154 auto nested_type = assert_cast<const DataTypeArray &>(*info.type).getNestedType();
155
156 /// It can be Array(Nullable(nested_type))
157 bool array_of_nullable = false;
158 if (auto nullable = dynamic_cast<const DataTypeNullable *>(nested_type.get()))
159 {
160 nested_type = nullable->getNestedType();
161 array_of_nullable = true;
162 }
163
164 WhichDataType type_info{nested_type};
165 /// Promote integers to 64 bit types
166 if (type_info.isNativeUInt())
167 {
168 nested_type = std::make_shared<DataTypeUInt64>();
169 info.special_parser.nested_type = Field::Types::UInt64;
170 }
171 else if (type_info.isNativeInt())
172 {
173 nested_type = std::make_shared<DataTypeInt64>();
174 info.special_parser.nested_type = Field::Types::Int64;
175 }
176 else if (type_info.isFloat64())
177 {
178 info.special_parser.nested_type = Field::Types::Float64;
179 }
180 else if (type_info.isString())
181 {
182 info.special_parser.nested_type = Field::Types::String;
183 }
184 else
185 throw Exception("Unexpected literal type inside Array: " + nested_type->getName() + ". It's a bug",
186 ErrorCodes::LOGICAL_ERROR);
187
188 if (array_of_nullable)
189 {
190 nested_type = std::make_shared<DataTypeNullable>(nested_type);
191 info.special_parser.is_nullable = true;
192 }
193
194 info.type = std::make_shared<DataTypeArray>(nested_type);
195 }
196 else
197 throw Exception(String("Unexpected literal type ") + info.literal->value.getTypeName() + ". It's a bug",
198 ErrorCodes::LOGICAL_ERROR);
199
200 /// Allow literal to be NULL, if result column has nullable type or if function never returns NULL
201 if (info.force_nullable && info.type->canBeInsideNullable())
202 {
203 info.type = makeNullable(info.type);
204 info.special_parser.is_nullable = true;
205 }
206 }
207};
208
209
210
211/// Expression template is a sequence of tokens and data types of literals.
212/// E.g. template of "position('some string', 'other string') != 0" is
213/// ["position", "(", DataTypeString, ",", DataTypeString, ")", "!=", DataTypeUInt64]
214ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & replaced_literals, TokenIterator expression_begin, TokenIterator expression_end,
215 ASTPtr & expression, const IDataType & result_type, bool null_as_default_, const Context & context)
216{
217 null_as_default = null_as_default_;
218
219 std::sort(replaced_literals.begin(), replaced_literals.end(), [](const LiteralInfo & a, const LiteralInfo & b)
220 {
221 return a.literal->begin.value() < b.literal->begin.value();
222 });
223
224 /// Make sequence of tokens and determine IDataType by Field::Types:Which for each literal.
225 token_after_literal_idx.reserve(replaced_literals.size());
226 special_parser.resize(replaced_literals.size());
227
228 TokenIterator prev_end = expression_begin;
229 for (size_t i = 0; i < replaced_literals.size(); ++i)
230 {
231 const LiteralInfo & info = replaced_literals[i];
232 if (info.literal->begin.value() < prev_end)
233 throw Exception("Cannot replace literals", ErrorCodes::LOGICAL_ERROR);
234
235 while (prev_end < info.literal->begin.value())
236 {
237 tokens.emplace_back(prev_end->begin, prev_end->size());
238 ++prev_end;
239 }
240 token_after_literal_idx.push_back(tokens.size());
241
242 special_parser[i] = info.special_parser;
243
244 literals.insert({nullptr, info.type, info.dummy_column_name});
245
246 prev_end = info.literal->end.value();
247 }
248
249 while (prev_end < expression_end)
250 {
251 tokens.emplace_back(prev_end->begin, prev_end->size());
252 ++prev_end;
253 }
254
255 addNodesToCastResult(result_type, expression, null_as_default);
256
257 auto syntax_result = SyntaxAnalyzer(context).analyze(expression, literals.getNamesAndTypesList());
258 result_column_name = expression->getColumnName();
259 actions_on_literals = ExpressionAnalyzer(expression, syntax_result, context).getActions(false);
260}
261
262size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression,
263 const LiteralsInfo & replaced_literals,
264 const DataTypePtr & result_column_type,
265 bool null_as_default,
266 const String & salt)
267{
268 /// TODO distinguish expressions with the same AST and different tokens (e.g. "CAST(expr, 'Type')" and "CAST(expr AS Type)")
269 SipHash hash_state;
270 hash_state.update(result_column_type->getName());
271
272 expression->updateTreeHash(hash_state);
273
274 for (const auto & info : replaced_literals)
275 hash_state.update(info.type->getName());
276 hash_state.update(null_as_default);
277
278 /// Allows distinguish expression in the last column in Values format
279 hash_state.update(salt);
280
281 IAST::Hash res128;
282 hash_state.get128(res128.first, res128.second);
283 size_t res = 0;
284 boost::hash_combine(res, res128.first);
285 boost::hash_combine(res, res128.second);
286 return res;
287}
288
289
290
291ConstantExpressionTemplate::TemplateStructurePtr
292ConstantExpressionTemplate::Cache::getFromCacheOrConstruct(const DataTypePtr & result_column_type,
293 bool null_as_default,
294 TokenIterator expression_begin,
295 TokenIterator expression_end,
296 const ASTPtr & expression_,
297 const Context & context,
298 bool * found_in_cache,
299 const String & salt)
300{
301 TemplateStructurePtr res;
302 ASTPtr expression = expression_->clone();
303 ReplaceLiteralsVisitor visitor(context);
304 visitor.visit(expression, result_column_type->isNullable() || null_as_default);
305 ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters());
306 param_visitor.visit(expression);
307
308 size_t template_hash = TemplateStructure::getTemplateHash(expression, visitor.replaced_literals, result_column_type, null_as_default, salt);
309 auto iter = cache.find(template_hash);
310 if (iter == cache.end())
311 {
312 if (max_size <= cache.size())
313 cache.clear();
314 res = std::make_shared<TemplateStructure>(visitor.replaced_literals, expression_begin, expression_end,
315 expression, *result_column_type, null_as_default, context);
316 cache.insert({template_hash, res});
317 if (found_in_cache)
318 *found_in_cache = false;
319 }
320 else
321 {
322 /// FIXME process collisions correctly
323 res = iter->second;
324 if (found_in_cache)
325 *found_in_cache = true;
326 }
327
328 return res;
329}
330
331bool ConstantExpressionTemplate::parseExpression(ReadBuffer & istr, const FormatSettings & settings)
332{
333 size_t cur_column = 0;
334 try
335 {
336 if (tryParseExpression(istr, settings, cur_column))
337 {
338 ++rows_count;
339 return true;
340 }
341 }
342 catch (DB::Exception & e)
343 {
344 for (size_t i = 0; i < cur_column; ++i)
345 columns[i]->popBack(1);
346
347 if (!isParseError(e.code()))
348 throw;
349
350 return false;
351 }
352
353 for (size_t i = 0; i < cur_column; ++i)
354 columns[i]->popBack(1);
355 return false;
356}
357
358bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const FormatSettings & settings, size_t & cur_column)
359{
360 size_t cur_token = 0;
361 size_t num_columns = structure->literals.columns();
362 while (cur_column < num_columns)
363 {
364 size_t skip_tokens_until = structure->token_after_literal_idx[cur_column];
365 while (cur_token < skip_tokens_until)
366 {
367 /// TODO skip comments
368 skipWhitespaceIfAny(istr);
369 if (!checkString(structure->tokens[cur_token++], istr))
370 return false;
371 }
372 skipWhitespaceIfAny(istr);
373
374 const DataTypePtr & type = structure->literals.getByPosition(cur_column).type;
375 if (settings.values.accurate_types_of_literals && !structure->special_parser[cur_column].useDefaultParser())
376 {
377 if (!parseLiteralAndAssertType(istr, type.get(), cur_column))
378 return false;
379 }
380 else
381 type->deserializeAsTextQuoted(*columns[cur_column], istr, settings);
382
383 ++cur_column;
384 }
385 while (cur_token < structure->tokens.size())
386 {
387 skipWhitespaceIfAny(istr);
388 if (!checkString(structure->tokens[cur_token++], istr))
389 return false;
390 }
391
392 return true;
393}
394
395bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, const IDataType * complex_type, size_t column_idx)
396{
397 using Type = Field::Types::Which;
398
399 /// TODO in case of type mismatch return some hints to deduce new template faster
400 if (istr.eof())
401 return false;
402
403 SpecialParserType type_info = structure->special_parser[column_idx];
404
405 /// If literal does not fit entirely in the buffer, parsing error will happen.
406 /// However, it's possible to deduce new template (or use template from cache) after error like it was template mismatch.
407
408 if (type_info.is_array)
409 {
410 /// TODO faster way to check types without using Parsers
411 ParserArrayOfLiterals parser_array;
412 Tokens tokens_number(istr.position(), istr.buffer().end());
413 IParser::Pos iterator(tokens_number);
414 Expected expected;
415 ASTPtr ast;
416
417 if (!parser_array.parse(iterator, ast, expected))
418 return false;
419 istr.position() = const_cast<char *>(iterator->begin);
420
421 const Field & array = ast->as<ASTLiteral &>().value;
422 auto array_type = applyVisitor(FieldToDataType(), array);
423 auto nested_type = assert_cast<const DataTypeArray &>(*array_type).getNestedType();
424 if (type_info.is_nullable)
425 if (auto nullable = dynamic_cast<const DataTypeNullable *>(nested_type.get()))
426 nested_type = nullable->getNestedType();
427
428 WhichDataType nested_type_info(nested_type);
429 if ((nested_type_info.isNativeUInt() && type_info.nested_type == Type::UInt64) ||
430 (nested_type_info.isNativeInt() && type_info.nested_type == Type::Int64) ||
431 (nested_type_info.isFloat64() && type_info.nested_type == Type::Float64))
432 {
433 Field array_same_types = convertFieldToType(array, *complex_type, nullptr);
434 columns[column_idx]->insert(array_same_types);
435 return true;
436 }
437 return false;
438 }
439 else
440 {
441 Field number;
442 if (type_info.is_nullable && 4 <= istr.available() && 0 == strncasecmp(istr.position(), "NULL", 4))
443 istr.position() += 4;
444 else
445 {
446 /// ParserNumber::parse(...) is about 20x slower than strtod(...)
447 /// because of using ASTPtr, Expected and Tokens, which are not needed here.
448 /// Parse numeric literal in the same way, as ParserNumber does, but use strtod and strtoull directly.
449 bool negative = *istr.position() == '-';
450 if (negative || *istr.position() == '+')
451 ++istr.position();
452
453 static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
454 char buf[MAX_LENGTH_OF_NUMBER + 1];
455 size_t bytes_to_copy = std::min(istr.available(), MAX_LENGTH_OF_NUMBER);
456 memcpy(buf, istr.position(), bytes_to_copy);
457 buf[bytes_to_copy] = 0;
458
459 char * pos_double = buf;
460 errno = 0;
461 Float64 float_value = std::strtod(buf, &pos_double);
462 if (pos_double == buf || errno == ERANGE || float_value < 0)
463 return false;
464
465 if (negative)
466 float_value = -float_value;
467
468 char * pos_integer = buf;
469 errno = 0;
470 UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
471 if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
472 {
473 istr.position() += pos_integer - buf;
474 if (negative && type_info.nested_type == Type::Int64)
475 number = static_cast<Int64>(-uint_value);
476 else if (!negative && type_info.nested_type == Type::UInt64)
477 number = uint_value;
478 else
479 return false;
480 }
481 else if (type_info.nested_type == Type::Float64)
482 {
483 istr.position() += pos_double - buf;
484 number = float_value;
485 }
486 else
487 return false;
488 }
489
490 columns[column_idx]->insert(number);
491 return true;
492 }
493}
494
495ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, size_t column_idx, size_t offset)
496{
497 Block evaluated = structure->literals.cloneWithColumns(std::move(columns));
498 columns = structure->literals.cloneEmptyColumns();
499 if (!structure->literals.columns())
500 evaluated.insert({ColumnConst::create(ColumnUInt8::create(1, 0), rows_count), std::make_shared<DataTypeUInt8>(), "_dummy"});
501 structure->actions_on_literals->execute(evaluated);
502
503 if (!evaluated || evaluated.rows() != rows_count)
504 throw Exception("Number of rows mismatch after evaluation of batch of constant expressions: got " +
505 std::to_string(evaluated.rows()) + " rows for " + std::to_string(rows_count) + " expressions",
506 ErrorCodes::LOGICAL_ERROR);
507
508 if (!evaluated.has(structure->result_column_name))
509 throw Exception("Cannot evaluate template " + structure->result_column_name + ", block structure:\n" + evaluated.dumpStructure(),
510 ErrorCodes::LOGICAL_ERROR);
511
512 rows_count = 0;
513 ColumnPtr res = evaluated.getByName(structure->result_column_name).column->convertToFullColumnIfConst();
514 if (!structure->null_as_default)
515 return res;
516
517 /// Extract column with evaluated expression and mask for NULLs
518 auto & tuple = assert_cast<const ColumnTuple &>(*res);
519 if (tuple.tupleSize() != 2)
520 throw Exception("Invalid tuple size, it'a a bug", ErrorCodes::LOGICAL_ERROR);
521 auto & is_null = assert_cast<const ColumnUInt8 &>(tuple.getColumn(1));
522
523 for (size_t i = 0; i < is_null.size(); ++i)
524 if (is_null.getUInt(i))
525 nulls.setBit(column_idx, offset + i);
526
527 return tuple.getColumnPtr(0);
528}
529
530void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr, bool null_as_default)
531{
532 /// Replace "expr" with "CAST(expr, 'TypeName')"
533 /// or with "(CAST(assumeNotNull(expr as _expression), 'TypeName'), isNull(_expression))" if null_as_default is true
534 if (null_as_default)
535 {
536 expr->setAlias("_expression");
537 expr = makeASTFunction("assumeNotNull", std::move(expr));
538 }
539
540 expr = makeASTFunction("CAST", std::move(expr), std::make_shared<ASTLiteral>(result_column_type.getName()));
541
542 if (null_as_default)
543 {
544 auto is_null = makeASTFunction("isNull", std::make_shared<ASTIdentifier>("_expression"));
545 expr = makeASTFunction("tuple", std::move(expr), std::move(is_null));
546 }
547}
548
549}
550