1 | #pragma once |
2 | |
3 | #include <Core/Block.h> |
4 | #include <Processors/Formats/IInputFormat.h> |
5 | #include <Processors/Formats/IRowInputFormat.h> |
6 | #include <Formats/FormatSettings.h> |
7 | #include <Processors/Formats/Impl/ConstantExpressionTemplate.h> |
8 | |
9 | #include <IO/PeekableReadBuffer.h> |
10 | #include <Parsers/ExpressionListParsers.h> |
11 | |
12 | namespace DB |
13 | { |
14 | |
15 | class Context; |
16 | class ReadBuffer; |
17 | |
18 | |
19 | /** Stream to read data in VALUES format (as in INSERT query). |
20 | */ |
21 | class ValuesBlockInputFormat final : public IInputFormat |
22 | { |
23 | public: |
24 | /** Data is parsed using fast, streaming parser. |
25 | * If interpret_expressions is true, it will, in addition, try to use SQL parser and interpreter |
26 | * in case when streaming parser could not parse field (this is very slow). |
27 | * If deduce_templates_of_expressions is true, try to deduce template of expression in some row and use it |
28 | * to parse and interpret expressions in other rows (in most cases it's faster |
29 | * than interpreting expressions in each row separately, but it's still slower than streaming parsing) |
30 | */ |
31 | ValuesBlockInputFormat(ReadBuffer & in_, const Block & , const RowInputFormatParams & params_, |
32 | const FormatSettings & format_settings_); |
33 | |
34 | String getName() const override { return "ValuesBlockInputFormat" ; } |
35 | |
36 | void resetParser() override; |
37 | |
38 | /// TODO: remove context somehow. |
39 | void setContext(const Context & context_) { context = std::make_unique<Context>(context_); } |
40 | |
41 | const BlockMissingValues & getMissingValues() const override { return block_missing_values; } |
42 | |
43 | private: |
44 | enum class ParserType |
45 | { |
46 | Streaming, |
47 | BatchTemplate, |
48 | SingleExpressionEvaluation |
49 | }; |
50 | |
51 | typedef std::vector<std::optional<ConstantExpressionTemplate>> ConstantExpressionTemplates; |
52 | |
53 | Chunk generate() override; |
54 | |
55 | void readRow(MutableColumns & columns, size_t row_num); |
56 | |
57 | bool tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx); |
58 | ALWAYS_INLINE inline bool tryReadValue(IColumn & column, size_t column_idx); |
59 | bool parseExpression(IColumn & column, size_t column_idx); |
60 | |
61 | ALWAYS_INLINE inline void assertDelimiterAfterValue(size_t column_idx); |
62 | ALWAYS_INLINE inline bool checkDelimiterAfterValue(size_t column_idx); |
63 | |
64 | bool shouldDeduceNewTemplate(size_t column_idx); |
65 | |
66 | void readSuffix(); |
67 | |
68 | bool skipToNextRow(size_t min_chunk_bytes = 0, int balance = 0); |
69 | |
70 | private: |
71 | PeekableReadBuffer buf; |
72 | |
73 | RowInputFormatParams params; |
74 | |
75 | std::unique_ptr<Context> context; /// pimpl |
76 | const FormatSettings format_settings; |
77 | |
78 | size_t num_columns; |
79 | size_t total_rows = 0; |
80 | |
81 | std::vector<ParserType> parser_type_for_column; |
82 | std::vector<size_t> attempts_to_deduce_template; |
83 | std::vector<size_t> attempts_to_deduce_template_cached; |
84 | std::vector<size_t> rows_parsed_using_template; |
85 | |
86 | ParserExpression parser; |
87 | ConstantExpressionTemplates templates; |
88 | ConstantExpressionTemplate::Cache templates_cache; |
89 | |
90 | DataTypes types; |
91 | |
92 | BlockMissingValues block_missing_values; |
93 | }; |
94 | |
95 | } |
96 | |