1#pragma once
2
3#include <shared_mutex>
4#include <Core/Block.h>
5#include <DataStreams/SizeLimits.h>
6#include <DataTypes/IDataType.h>
7#include <Interpreters/SetVariants.h>
8#include <Interpreters/Context.h>
9#include <Parsers/IAST.h>
10#include <Storages/MergeTree/BoolMask.h>
11
12#include <common/logger_useful.h>
13
14
15namespace DB
16{
17
18struct Range;
19class FieldWithInfinity;
20
21class IFunctionBase;
22using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
23
24
25/** Data structure for implementation of IN expression.
26 */
27class Set
28{
29public:
30 /// 'fill_set_elements': in addition to hash table
31 /// (that is useful only for checking that some value is in the set and may not store the original values),
32 /// store all set elements in explicit form.
33 /// This is needed for subsequent use for index.
34 Set(const SizeLimits & limits_, bool fill_set_elements_)
35 : log(&Logger::get("Set")),
36 limits(limits_), fill_set_elements(fill_set_elements_)
37 {
38 }
39
40 bool empty() const { return data.empty(); }
41
42 /** Set can be created either from AST or from a stream of data (subquery result).
43 */
44
45 /** Create a Set from expression (specified literally in the query).
46 * 'types' - types of what are on the left hand side of IN.
47 * 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
48 * 'fill_set_elements' - if true, fill vector of elements. For primary key to work.
49 */
50 void createFromAST(const DataTypes & types, ASTPtr node, const Context & context);
51
52 /** Create a Set from stream.
53 * Call setHeader, then call insertFromBlock for each block.
54 */
55 void setHeader(const Block & header);
56
57 /// Returns false, if some limit was exceeded and no need to insert more data.
58 bool insertFromBlock(const Block & block);
59 /// Call after all blocks were inserted. To get the information that set is already created.
60 void finishInsert() { is_created = true; }
61
62 bool isCreated() const { return is_created; }
63
64 /** For columns of 'block', check belonging of corresponding rows to the set.
65 * Return UInt8 column with the result.
66 */
67 ColumnPtr execute(const Block & block, bool negative) const;
68
69 size_t getTotalRowCount() const { return data.getTotalRowCount(); }
70 size_t getTotalByteCount() const { return data.getTotalByteCount(); }
71
72 const DataTypes & getDataTypes() const { return data_types; }
73 const DataTypes & getElementsTypes() const { return set_elements_types; }
74
75 bool hasExplicitSetElements() const { return fill_set_elements; }
76 Columns getSetElements() const { return { set_elements.begin(), set_elements.end() }; }
77
78 void checkColumnsNumber(size_t num_key_columns) const;
79 void checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const;
80
81private:
82 size_t keys_size = 0;
83 Sizes key_sizes;
84
85 SetVariants data;
86
87 /** How IN works with Nullable types.
88 *
89 * For simplicity reasons, all NULL values and any tuples with at least one NULL element are ignored in the Set.
90 * And for left hand side values, that are NULLs or contain any NULLs, we return 0 (means that element is not in Set).
91 *
92 * If we want more standard compliant behaviour, we must return NULL
93 * if lhs is NULL and set is not empty or if lhs is not in set, but set contains at least one NULL.
94 * It is more complicated with tuples.
95 * For example,
96 * (1, NULL, 2) IN ((1, NULL, 3)) must return 0,
97 * but (1, NULL, 2) IN ((1, 1111, 2)) must return NULL.
98 *
99 * We have not implemented such sophisticated behaviour.
100 */
101
102 /** The data types from which the set was created.
103 * When checking for belonging to a set, the types of columns to be checked must match with them.
104 */
105 DataTypes data_types;
106
107 /// Types for set_elements.
108 DataTypes set_elements_types;
109
110 Logger * log;
111
112 /// Limitations on the maximum size of the set
113 SizeLimits limits;
114
115 /// Do we need to additionally store all elements of the set in explicit form for subsequent use for index.
116 bool fill_set_elements;
117
118 /// Check if set contains all the data.
119 bool is_created = false;
120
121 /// If in the left part columns contains the same types as the elements of the set.
122 void executeOrdinary(
123 const ColumnRawPtrs & key_columns,
124 ColumnUInt8::Container & vec_res,
125 bool negative,
126 const PaddedPODArray<UInt8> * null_map) const;
127
128 /// Collected elements of `Set`.
129 /// It is necessary for the index to work on the primary key in the IN statement.
130 std::vector<IColumn::WrappedPtr> set_elements;
131
132 /** Protects work with the set in the functions `insertFromBlock` and `execute`.
133 * These functions can be called simultaneously from different threads only when using StorageSet,
134 * and StorageSet calls only these two functions.
135 * Therefore, the rest of the functions for working with set are not protected.
136 */
137 mutable std::shared_mutex rwlock;
138
139 template <typename Method>
140 void insertFromBlockImpl(
141 Method & method,
142 const ColumnRawPtrs & key_columns,
143 size_t rows,
144 SetVariants & variants,
145 ConstNullMapPtr null_map,
146 ColumnUInt8::Container * out_filter);
147
148 template <typename Method, bool has_null_map, bool build_filter>
149 void insertFromBlockImplCase(
150 Method & method,
151 const ColumnRawPtrs & key_columns,
152 size_t rows,
153 SetVariants & variants,
154 ConstNullMapPtr null_map,
155 ColumnUInt8::Container * out_filter);
156
157 template <typename Method>
158 void executeImpl(
159 Method & method,
160 const ColumnRawPtrs & key_columns,
161 ColumnUInt8::Container & vec_res,
162 bool negative,
163 size_t rows,
164 ConstNullMapPtr null_map) const;
165
166 template <typename Method, bool has_null_map>
167 void executeImplCase(
168 Method & method,
169 const ColumnRawPtrs & key_columns,
170 ColumnUInt8::Container & vec_res,
171 bool negative,
172 size_t rows,
173 ConstNullMapPtr null_map) const;
174};
175
176using SetPtr = std::shared_ptr<Set>;
177using ConstSetPtr = std::shared_ptr<const Set>;
178using Sets = std::vector<SetPtr>;
179
180
181class IFunction;
182using FunctionPtr = std::shared_ptr<IFunction>;
183
184/// Class for mayBeTrueInRange function.
185class MergeTreeSetIndex
186{
187public:
188 /** Mapping for tuple positions from Set::set_elements to
189 * position of pk index and functions chain applied to this column.
190 */
191 struct KeyTuplePositionMapping
192 {
193 size_t tuple_index;
194 size_t key_index;
195 std::vector<FunctionBasePtr> functions;
196 };
197
198 MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
199
200 size_t size() const { return ordered_set.at(0)->size(); }
201
202 BoolMask mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
203
204private:
205 Columns ordered_set;
206 std::vector<KeyTuplePositionMapping> indexes_mapping;
207};
208
209}
210