| 1 | #include <optional> | 
|---|
| 2 |  | 
|---|
| 3 | #include <Core/Field.h> | 
|---|
| 4 | #include <Common/FieldVisitors.h> | 
|---|
| 5 | #include <Core/Row.h> | 
|---|
| 6 |  | 
|---|
| 7 | #include <Columns/ColumnsNumber.h> | 
|---|
| 8 | #include <Columns/ColumnTuple.h> | 
|---|
| 9 |  | 
|---|
| 10 | #include <Common/typeid_cast.h> | 
|---|
| 11 |  | 
|---|
| 12 | #include <DataStreams/IBlockInputStream.h> | 
|---|
| 13 |  | 
|---|
| 14 | #include <DataTypes/DataTypeTuple.h> | 
|---|
| 15 | #include <DataTypes/DataTypeNullable.h> | 
|---|
| 16 |  | 
|---|
| 17 | #include <Parsers/ASTExpressionList.h> | 
|---|
| 18 | #include <Parsers/ASTFunction.h> | 
|---|
| 19 | #include <Parsers/ASTLiteral.h> | 
|---|
| 20 |  | 
|---|
| 21 | #include <Interpreters/Set.h> | 
|---|
| 22 | #include <Interpreters/convertFieldToType.h> | 
|---|
| 23 | #include <Interpreters/evaluateConstantExpression.h> | 
|---|
| 24 | #include <Interpreters/NullableUtils.h> | 
|---|
| 25 | #include <Interpreters/sortBlock.h> | 
|---|
| 26 |  | 
|---|
| 27 | #include <Storages/MergeTree/KeyCondition.h> | 
|---|
| 28 |  | 
|---|
| 29 | #include <ext/range.h> | 
|---|
| 30 | #include <DataTypes/DataTypeLowCardinality.h> | 
|---|
| 31 |  | 
|---|
| 32 |  | 
|---|
| 33 | namespace DB | 
|---|
| 34 | { | 
|---|
| 35 |  | 
|---|
| 36 | namespace ErrorCodes | 
|---|
| 37 | { | 
|---|
| 38 | extern const int LOGICAL_ERROR; | 
|---|
| 39 | extern const int SET_SIZE_LIMIT_EXCEEDED; | 
|---|
| 40 | extern const int TYPE_MISMATCH; | 
|---|
| 41 | extern const int INCORRECT_ELEMENT_OF_SET; | 
|---|
| 42 | extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 |  | 
|---|
| 46 | template <typename Method> | 
|---|
| 47 | void NO_INLINE Set::insertFromBlockImpl( | 
|---|
| 48 | Method & method, | 
|---|
| 49 | const ColumnRawPtrs & key_columns, | 
|---|
| 50 | size_t rows, | 
|---|
| 51 | SetVariants & variants, | 
|---|
| 52 | ConstNullMapPtr null_map, | 
|---|
| 53 | ColumnUInt8::Container * out_filter) | 
|---|
| 54 | { | 
|---|
| 55 | if (null_map) | 
|---|
| 56 | { | 
|---|
| 57 | if (out_filter) | 
|---|
| 58 | insertFromBlockImplCase<Method, true, true>(method, key_columns, rows, variants, null_map, out_filter); | 
|---|
| 59 | else | 
|---|
| 60 | insertFromBlockImplCase<Method, true, false>(method, key_columns, rows, variants, null_map, out_filter); | 
|---|
| 61 | } | 
|---|
| 62 | else | 
|---|
| 63 | { | 
|---|
| 64 | if (out_filter) | 
|---|
| 65 | insertFromBlockImplCase<Method, false, true>(method, key_columns, rows, variants, null_map, out_filter); | 
|---|
| 66 | else | 
|---|
| 67 | insertFromBlockImplCase<Method, false, false>(method, key_columns, rows, variants, null_map, out_filter); | 
|---|
| 68 | } | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 |  | 
|---|
| 72 | template <typename Method, bool has_null_map, bool build_filter> | 
|---|
| 73 | void NO_INLINE Set::insertFromBlockImplCase( | 
|---|
| 74 | Method & method, | 
|---|
| 75 | const ColumnRawPtrs & key_columns, | 
|---|
| 76 | size_t rows, | 
|---|
| 77 | SetVariants & variants, | 
|---|
| 78 | [[maybe_unused]] ConstNullMapPtr null_map, | 
|---|
| 79 | [[maybe_unused]] ColumnUInt8::Container * out_filter) | 
|---|
| 80 | { | 
|---|
| 81 | typename Method::State state(key_columns, key_sizes, nullptr); | 
|---|
| 82 |  | 
|---|
| 83 | /// For all rows | 
|---|
| 84 | for (size_t i = 0; i < rows; ++i) | 
|---|
| 85 | { | 
|---|
| 86 | if constexpr (has_null_map) | 
|---|
| 87 | { | 
|---|
| 88 | if ((*null_map)[i]) | 
|---|
| 89 | { | 
|---|
| 90 | if constexpr (build_filter) | 
|---|
| 91 | { | 
|---|
| 92 | (*out_filter)[i] = false; | 
|---|
| 93 | } | 
|---|
| 94 | continue; | 
|---|
| 95 | } | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | [[maybe_unused]] auto emplace_result = state.emplaceKey(method.data, i, variants.string_pool); | 
|---|
| 99 |  | 
|---|
| 100 | if constexpr (build_filter) | 
|---|
| 101 | (*out_filter)[i] = emplace_result.isInserted(); | 
|---|
| 102 | } | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 |  | 
|---|
| 106 | void Set::(const Block & block) | 
|---|
| 107 | { | 
|---|
| 108 | std::unique_lock lock(rwlock); | 
|---|
| 109 |  | 
|---|
| 110 | if (!empty()) | 
|---|
| 111 | return; | 
|---|
| 112 |  | 
|---|
| 113 | keys_size = block.columns(); | 
|---|
| 114 | ColumnRawPtrs key_columns; | 
|---|
| 115 | key_columns.reserve(keys_size); | 
|---|
| 116 | data_types.reserve(keys_size); | 
|---|
| 117 | set_elements_types.reserve(keys_size); | 
|---|
| 118 |  | 
|---|
| 119 | /// The constant columns to the right of IN are not supported directly. For this, they first materialize. | 
|---|
| 120 | Columns materialized_columns; | 
|---|
| 121 |  | 
|---|
| 122 | /// Remember the columns we will work with | 
|---|
| 123 | for (size_t i = 0; i < keys_size; ++i) | 
|---|
| 124 | { | 
|---|
| 125 | materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); | 
|---|
| 126 | key_columns.emplace_back(materialized_columns.back().get()); | 
|---|
| 127 | data_types.emplace_back(block.safeGetByPosition(i).type); | 
|---|
| 128 | set_elements_types.emplace_back(block.safeGetByPosition(i).type); | 
|---|
| 129 |  | 
|---|
| 130 | /// Convert low cardinality column to full. | 
|---|
| 131 | if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_types.back().get())) | 
|---|
| 132 | { | 
|---|
| 133 | data_types.back() = low_cardinality_type->getDictionaryType(); | 
|---|
| 134 | materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality()); | 
|---|
| 135 | key_columns.back() = materialized_columns.back().get(); | 
|---|
| 136 | } | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | /// We will insert to the Set only keys, where all components are not NULL. | 
|---|
| 140 | ConstNullMapPtr null_map{}; | 
|---|
| 141 | ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); | 
|---|
| 142 |  | 
|---|
| 143 | if (fill_set_elements) | 
|---|
| 144 | { | 
|---|
| 145 | /// Create empty columns with set values in advance. | 
|---|
| 146 | /// It is needed because set may be empty, so method 'insertFromBlock' will be never called. | 
|---|
| 147 | set_elements.reserve(keys_size); | 
|---|
| 148 | for (const auto & type : set_elements_types) | 
|---|
| 149 | set_elements.emplace_back(type->createColumn()); | 
|---|
| 150 | } | 
|---|
| 151 |  | 
|---|
| 152 | /// Choose data structure to use for the set. | 
|---|
| 153 | data.init(data.chooseMethod(key_columns, key_sizes)); | 
|---|
| 154 | } | 
|---|
| 155 |  | 
|---|
| 156 |  | 
|---|
| 157 | bool Set::insertFromBlock(const Block & block) | 
|---|
| 158 | { | 
|---|
| 159 | std::unique_lock lock(rwlock); | 
|---|
| 160 |  | 
|---|
| 161 | if (empty()) | 
|---|
| 162 | throw Exception( "Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 163 |  | 
|---|
| 164 | ColumnRawPtrs key_columns; | 
|---|
| 165 | key_columns.reserve(keys_size); | 
|---|
| 166 |  | 
|---|
| 167 | /// The constant columns to the right of IN are not supported directly. For this, they first materialize. | 
|---|
| 168 | Columns materialized_columns; | 
|---|
| 169 |  | 
|---|
| 170 | /// Remember the columns we will work with | 
|---|
| 171 | for (size_t i = 0; i < keys_size; ++i) | 
|---|
| 172 | { | 
|---|
| 173 | materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality()); | 
|---|
| 174 | key_columns.emplace_back(materialized_columns.back().get()); | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | size_t rows = block.rows(); | 
|---|
| 178 |  | 
|---|
| 179 | /// We will insert to the Set only keys, where all components are not NULL. | 
|---|
| 180 | ConstNullMapPtr null_map{}; | 
|---|
| 181 | ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); | 
|---|
| 182 |  | 
|---|
| 183 | /// Filter to extract distinct values from the block. | 
|---|
| 184 | ColumnUInt8::MutablePtr filter; | 
|---|
| 185 | if (fill_set_elements) | 
|---|
| 186 | filter = ColumnUInt8::create(block.rows()); | 
|---|
| 187 |  | 
|---|
| 188 | switch (data.type) | 
|---|
| 189 | { | 
|---|
| 190 | case SetVariants::Type::EMPTY: | 
|---|
| 191 | break; | 
|---|
| 192 | #define M(NAME) \ | 
|---|
| 193 | case SetVariants::Type::NAME: \ | 
|---|
| 194 | insertFromBlockImpl(*data.NAME, key_columns, rows, data, null_map, filter ? &filter->getData() : nullptr); \ | 
|---|
| 195 | break; | 
|---|
| 196 | APPLY_FOR_SET_VARIANTS(M) | 
|---|
| 197 | #undef M | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | if (fill_set_elements) | 
|---|
| 201 | { | 
|---|
| 202 | for (size_t i = 0; i < keys_size; ++i) | 
|---|
| 203 | { | 
|---|
| 204 | auto filtered_column = block.getByPosition(i).column->filter(filter->getData(), rows); | 
|---|
| 205 | if (set_elements[i]->empty()) | 
|---|
| 206 | set_elements[i] = filtered_column; | 
|---|
| 207 | else | 
|---|
| 208 | set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); | 
|---|
| 209 | } | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | return limits.check(getTotalRowCount(), getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 |  | 
|---|
| 216 | static Field (const ASTPtr & node, const IDataType & type, const Context & context) | 
|---|
| 217 | { | 
|---|
| 218 | if (const auto * lit = node->as<ASTLiteral>()) | 
|---|
| 219 | { | 
|---|
| 220 | return convertFieldToType(lit->value, type); | 
|---|
| 221 | } | 
|---|
| 222 | else if (node->as<ASTFunction>()) | 
|---|
| 223 | { | 
|---|
| 224 | std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(node, context); | 
|---|
| 225 | return convertFieldToType(value_raw.first, type, value_raw.second.get()); | 
|---|
| 226 | } | 
|---|
| 227 | else | 
|---|
| 228 | throw Exception( "Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 |  | 
|---|
| 232 | void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & context) | 
|---|
| 233 | { | 
|---|
| 234 | /// Will form a block with values from the set. | 
|---|
| 235 |  | 
|---|
| 236 | Block ; | 
|---|
| 237 | size_t num_columns = types.size(); | 
|---|
| 238 | for (size_t i = 0; i < num_columns; ++i) | 
|---|
| 239 | header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_"+ toString(i))); | 
|---|
| 240 | setHeader(header); | 
|---|
| 241 |  | 
|---|
| 242 | MutableColumns columns = header.cloneEmptyColumns(); | 
|---|
| 243 |  | 
|---|
| 244 | DataTypePtr tuple_type; | 
|---|
| 245 | Row tuple_values; | 
|---|
| 246 | const auto & list = node->as<ASTExpressionList &>(); | 
|---|
| 247 | for (auto & elem : list.children) | 
|---|
| 248 | { | 
|---|
| 249 | if (num_columns == 1) | 
|---|
| 250 | { | 
|---|
| 251 | Field value = extractValueFromNode(elem, *types[0], context); | 
|---|
| 252 |  | 
|---|
| 253 | if (!value.isNull()) | 
|---|
| 254 | columns[0]->insert(value); | 
|---|
| 255 | } | 
|---|
| 256 | else if (const auto * func = elem->as<ASTFunction>()) | 
|---|
| 257 | { | 
|---|
| 258 | Field function_result; | 
|---|
| 259 | const Tuple * tuple = nullptr; | 
|---|
| 260 | if (func->name != "tuple") | 
|---|
| 261 | { | 
|---|
| 262 | if (!tuple_type) | 
|---|
| 263 | tuple_type = std::make_shared<DataTypeTuple>(types); | 
|---|
| 264 |  | 
|---|
| 265 | function_result = extractValueFromNode(elem, *tuple_type, context); | 
|---|
| 266 | if (function_result.getType() != Field::Types::Tuple) | 
|---|
| 267 | throw Exception( "Invalid type of set. Expected tuple, got "+ String(function_result.getTypeName()), | 
|---|
| 268 | ErrorCodes::INCORRECT_ELEMENT_OF_SET); | 
|---|
| 269 |  | 
|---|
| 270 | tuple = &function_result.get<Tuple>(); | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | size_t tuple_size = tuple ? tuple->size() : func->arguments->children.size(); | 
|---|
| 274 | if (tuple_size != num_columns) | 
|---|
| 275 | throw Exception( "Incorrect size of tuple in set: "+ toString(tuple_size) + " instead of "+ toString(num_columns), | 
|---|
| 276 | ErrorCodes::INCORRECT_ELEMENT_OF_SET); | 
|---|
| 277 |  | 
|---|
| 278 | if (tuple_values.empty()) | 
|---|
| 279 | tuple_values.resize(tuple_size); | 
|---|
| 280 |  | 
|---|
| 281 | size_t i = 0; | 
|---|
| 282 | for (; i < tuple_size; ++i) | 
|---|
| 283 | { | 
|---|
| 284 | Field value = tuple ? (*tuple)[i] | 
|---|
| 285 | : extractValueFromNode(func->arguments->children[i], *types[i], context); | 
|---|
| 286 |  | 
|---|
| 287 | /// If at least one of the elements of the tuple has an impossible (outside the range of the type) value, then the entire tuple too. | 
|---|
| 288 | if (value.isNull()) | 
|---|
| 289 | break; | 
|---|
| 290 |  | 
|---|
| 291 | tuple_values[i] = value; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | if (i == tuple_size) | 
|---|
| 295 | for (i = 0; i < tuple_size; ++i) | 
|---|
| 296 | columns[i]->insert(tuple_values[i]); | 
|---|
| 297 | } | 
|---|
| 298 | else | 
|---|
| 299 | throw Exception( "Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET); | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | Block block = header.cloneWithColumns(std::move(columns)); | 
|---|
| 303 | insertFromBlock(block); | 
|---|
| 304 | finishInsert(); | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 |  | 
|---|
| 308 | ColumnPtr Set::execute(const Block & block, bool negative) const | 
|---|
| 309 | { | 
|---|
| 310 | size_t num_key_columns = block.columns(); | 
|---|
| 311 |  | 
|---|
| 312 | if (0 == num_key_columns) | 
|---|
| 313 | throw Exception( "Logical error: no columns passed to Set::execute method.", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 314 |  | 
|---|
| 315 | auto res = ColumnUInt8::create(); | 
|---|
| 316 | ColumnUInt8::Container & vec_res = res->getData(); | 
|---|
| 317 | vec_res.resize(block.safeGetByPosition(0).column->size()); | 
|---|
| 318 |  | 
|---|
| 319 | if (vec_res.empty()) | 
|---|
| 320 | return res; | 
|---|
| 321 |  | 
|---|
| 322 | std::shared_lock lock(rwlock); | 
|---|
| 323 |  | 
|---|
| 324 | /// If the set is empty. | 
|---|
| 325 | if (data_types.empty()) | 
|---|
| 326 | { | 
|---|
| 327 | if (negative) | 
|---|
| 328 | memset(vec_res.data(), 1, vec_res.size()); | 
|---|
| 329 | else | 
|---|
| 330 | memset(vec_res.data(), 0, vec_res.size()); | 
|---|
| 331 | return res; | 
|---|
| 332 | } | 
|---|
| 333 |  | 
|---|
| 334 | checkColumnsNumber(num_key_columns); | 
|---|
| 335 |  | 
|---|
| 336 | /// Remember the columns we will work with. Also check that the data types are correct. | 
|---|
| 337 | ColumnRawPtrs key_columns; | 
|---|
| 338 | key_columns.reserve(num_key_columns); | 
|---|
| 339 |  | 
|---|
| 340 | /// The constant columns to the left of IN are not supported directly. For this, they first materialize. | 
|---|
| 341 | Columns materialized_columns; | 
|---|
| 342 |  | 
|---|
| 343 | for (size_t i = 0; i < num_key_columns; ++i) | 
|---|
| 344 | { | 
|---|
| 345 | checkTypesEqual(i, block.safeGetByPosition(i).type); | 
|---|
| 346 | materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); | 
|---|
| 347 | key_columns.emplace_back() = materialized_columns.back().get(); | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | /// We will check existence in Set only for keys, where all components are not NULL. | 
|---|
| 351 | ConstNullMapPtr null_map{}; | 
|---|
| 352 | ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); | 
|---|
| 353 |  | 
|---|
| 354 | executeOrdinary(key_columns, vec_res, negative, null_map); | 
|---|
| 355 |  | 
|---|
| 356 | return res; | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 |  | 
|---|
| 360 | template <typename Method> | 
|---|
| 361 | void NO_INLINE Set::executeImpl( | 
|---|
| 362 | Method & method, | 
|---|
| 363 | const ColumnRawPtrs & key_columns, | 
|---|
| 364 | ColumnUInt8::Container & vec_res, | 
|---|
| 365 | bool negative, | 
|---|
| 366 | size_t rows, | 
|---|
| 367 | ConstNullMapPtr null_map) const | 
|---|
| 368 | { | 
|---|
| 369 | if (null_map) | 
|---|
| 370 | executeImplCase<Method, true>(method, key_columns, vec_res, negative, rows, null_map); | 
|---|
| 371 | else | 
|---|
| 372 | executeImplCase<Method, false>(method, key_columns, vec_res, negative, rows, null_map); | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 |  | 
|---|
| 376 | template <typename Method, bool has_null_map> | 
|---|
| 377 | void NO_INLINE Set::executeImplCase( | 
|---|
| 378 | Method & method, | 
|---|
| 379 | const ColumnRawPtrs & key_columns, | 
|---|
| 380 | ColumnUInt8::Container & vec_res, | 
|---|
| 381 | bool negative, | 
|---|
| 382 | size_t rows, | 
|---|
| 383 | ConstNullMapPtr null_map) const | 
|---|
| 384 | { | 
|---|
| 385 | Arena pool; | 
|---|
| 386 | typename Method::State state(key_columns, key_sizes, nullptr); | 
|---|
| 387 |  | 
|---|
| 388 | /// NOTE Optimization is not used for consecutive identical strings. | 
|---|
| 389 |  | 
|---|
| 390 | /// For all rows | 
|---|
| 391 | for (size_t i = 0; i < rows; ++i) | 
|---|
| 392 | { | 
|---|
| 393 | if (has_null_map && (*null_map)[i]) | 
|---|
| 394 | vec_res[i] = negative; | 
|---|
| 395 | else | 
|---|
| 396 | { | 
|---|
| 397 | auto find_result = state.findKey(method.data, i, pool); | 
|---|
| 398 | vec_res[i] = negative ^ find_result.isFound(); | 
|---|
| 399 | } | 
|---|
| 400 | } | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 |  | 
|---|
| 404 | void Set::executeOrdinary( | 
|---|
| 405 | const ColumnRawPtrs & key_columns, | 
|---|
| 406 | ColumnUInt8::Container & vec_res, | 
|---|
| 407 | bool negative, | 
|---|
| 408 | ConstNullMapPtr null_map) const | 
|---|
| 409 | { | 
|---|
| 410 | size_t rows = key_columns[0]->size(); | 
|---|
| 411 |  | 
|---|
| 412 | switch (data.type) | 
|---|
| 413 | { | 
|---|
| 414 | case SetVariants::Type::EMPTY: | 
|---|
| 415 | break; | 
|---|
| 416 | #define M(NAME) \ | 
|---|
| 417 | case SetVariants::Type::NAME: \ | 
|---|
| 418 | executeImpl(*data.NAME, key_columns, vec_res, negative, rows, null_map); \ | 
|---|
| 419 | break; | 
|---|
| 420 | APPLY_FOR_SET_VARIANTS(M) | 
|---|
| 421 | #undef M | 
|---|
| 422 | } | 
|---|
| 423 | } | 
|---|
| 424 |  | 
|---|
| 425 | void Set::checkColumnsNumber(size_t num_key_columns) const | 
|---|
| 426 | { | 
|---|
| 427 | if (data_types.size() != num_key_columns) | 
|---|
| 428 | { | 
|---|
| 429 | std::stringstream message; | 
|---|
| 430 | message << "Number of columns in section IN doesn't match. " | 
|---|
| 431 | << num_key_columns << " at left, "<< data_types.size() << " at right."; | 
|---|
| 432 | throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); | 
|---|
| 433 | } | 
|---|
| 434 | } | 
|---|
| 435 |  | 
|---|
| 436 | void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const | 
|---|
| 437 | { | 
|---|
| 438 | if (!removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type)))) | 
|---|
| 439 | throw Exception( "Types of column "+ toString(set_type_idx + 1) + " in section IN don't match: " | 
|---|
| 440 | + other_type->getName() + " on the left, " | 
|---|
| 441 | + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_) | 
|---|
| 445 | : indexes_mapping(std::move(index_mapping_)) | 
|---|
| 446 | { | 
|---|
| 447 | std::sort(indexes_mapping.begin(), indexes_mapping.end(), | 
|---|
| 448 | [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) | 
|---|
| 449 | { | 
|---|
| 450 | return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index); | 
|---|
| 451 | }); | 
|---|
| 452 |  | 
|---|
| 453 | indexes_mapping.erase(std::unique( | 
|---|
| 454 | indexes_mapping.begin(), indexes_mapping.end(), | 
|---|
| 455 | [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) | 
|---|
| 456 | { | 
|---|
| 457 | return l.key_index == r.key_index; | 
|---|
| 458 | }), indexes_mapping.end()); | 
|---|
| 459 |  | 
|---|
| 460 | size_t tuple_size = indexes_mapping.size(); | 
|---|
| 461 | ordered_set.resize(tuple_size); | 
|---|
| 462 | for (size_t i = 0; i < tuple_size; ++i) | 
|---|
| 463 | ordered_set[i] = set_elements[indexes_mapping[i].tuple_index]; | 
|---|
| 464 |  | 
|---|
| 465 | Block block_to_sort; | 
|---|
| 466 | SortDescription sort_description; | 
|---|
| 467 | for (size_t i = 0; i < tuple_size; ++i) | 
|---|
| 468 | { | 
|---|
| 469 | block_to_sort.insert({ ordered_set[i], nullptr, ""}); | 
|---|
| 470 | sort_description.emplace_back(i, 1, 1); | 
|---|
| 471 | } | 
|---|
| 472 |  | 
|---|
| 473 | sortBlock(block_to_sort, sort_description); | 
|---|
| 474 |  | 
|---|
| 475 | for (size_t i = 0; i < tuple_size; ++i) | 
|---|
| 476 | ordered_set[i] = block_to_sort.getByPosition(i).column; | 
|---|
| 477 | } | 
|---|
| 478 |  | 
|---|
| 479 |  | 
|---|
| 480 | /** Return the BoolMask where: | 
|---|
| 481 | * 1: the intersection of the set and the range is non-empty | 
|---|
| 482 | * 2: the range contains elements not in the set | 
|---|
| 483 | */ | 
|---|
| 484 | BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) | 
|---|
| 485 | { | 
|---|
| 486 | size_t tuple_size = indexes_mapping.size(); | 
|---|
| 487 |  | 
|---|
| 488 | using FieldWithInfinityTuple = std::vector<FieldWithInfinity>; | 
|---|
| 489 |  | 
|---|
| 490 | FieldWithInfinityTuple left_point; | 
|---|
| 491 | FieldWithInfinityTuple right_point; | 
|---|
| 492 | left_point.reserve(tuple_size); | 
|---|
| 493 | right_point.reserve(tuple_size); | 
|---|
| 494 |  | 
|---|
| 495 | bool invert_left_infinities = false; | 
|---|
| 496 | bool invert_right_infinities = false; | 
|---|
| 497 |  | 
|---|
| 498 | for (size_t i = 0; i < tuple_size; ++i) | 
|---|
| 499 | { | 
|---|
| 500 | std::optional<Range> new_range = KeyCondition::applyMonotonicFunctionsChainToRange( | 
|---|
| 501 | key_ranges[indexes_mapping[i].key_index], | 
|---|
| 502 | indexes_mapping[i].functions, | 
|---|
| 503 | data_types[indexes_mapping[i].key_index]); | 
|---|
| 504 |  | 
|---|
| 505 | if (!new_range) | 
|---|
| 506 | return {true, true}; | 
|---|
| 507 |  | 
|---|
| 508 | /** A range that ends in (x, y, ..., +inf) exclusive is the same as a range | 
|---|
| 509 | * that ends in (x, y, ..., -inf) inclusive and vice versa for the left bound. | 
|---|
| 510 | */ | 
|---|
| 511 | if (new_range->left_bounded) | 
|---|
| 512 | { | 
|---|
| 513 | if (!new_range->left_included) | 
|---|
| 514 | invert_left_infinities = true; | 
|---|
| 515 |  | 
|---|
| 516 | left_point.push_back(FieldWithInfinity(new_range->left)); | 
|---|
| 517 | } | 
|---|
| 518 | else | 
|---|
| 519 | { | 
|---|
| 520 | if (invert_left_infinities) | 
|---|
| 521 | left_point.push_back(FieldWithInfinity::getPlusinfinity()); | 
|---|
| 522 | else | 
|---|
| 523 | left_point.push_back(FieldWithInfinity::getMinusInfinity()); | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | if (new_range->right_bounded) | 
|---|
| 527 | { | 
|---|
| 528 | if (!new_range->right_included) | 
|---|
| 529 | invert_right_infinities = true; | 
|---|
| 530 |  | 
|---|
| 531 | right_point.push_back(FieldWithInfinity(new_range->right)); | 
|---|
| 532 | } | 
|---|
| 533 | else | 
|---|
| 534 | { | 
|---|
| 535 | if (invert_right_infinities) | 
|---|
| 536 | right_point.push_back(FieldWithInfinity::getMinusInfinity()); | 
|---|
| 537 | else | 
|---|
| 538 | right_point.push_back(FieldWithInfinity::getPlusinfinity()); | 
|---|
| 539 | } | 
|---|
| 540 | } | 
|---|
| 541 |  | 
|---|
| 542 | /// This allows to construct tuple in 'ordered_set' at specified index for comparison with range. | 
|---|
| 543 |  | 
|---|
| 544 | auto indices = ext::range(0, ordered_set.at(0)->size()); | 
|---|
| 545 |  | 
|---|
| 546 | auto  = [tuple_size, this](size_t i) | 
|---|
| 547 | { | 
|---|
| 548 | /// Inefficient. | 
|---|
| 549 | FieldWithInfinityTuple res; | 
|---|
| 550 | res.reserve(tuple_size); | 
|---|
| 551 | for (size_t j = 0; j < tuple_size; ++j) | 
|---|
| 552 | res.emplace_back((*ordered_set[j])[i]); | 
|---|
| 553 | return res; | 
|---|
| 554 | }; | 
|---|
| 555 |  | 
|---|
| 556 | auto compare = [&extract_tuple](size_t i, const FieldWithInfinityTuple & rhs) | 
|---|
| 557 | { | 
|---|
| 558 | return extract_tuple(i) < rhs; | 
|---|
| 559 | }; | 
|---|
| 560 |  | 
|---|
| 561 | /** Because each parallelogram maps to a contiguous sequence of elements | 
|---|
| 562 | * layed out in the lexicographically increasing order, the set intersects the range | 
|---|
| 563 | * if and only if either bound coincides with an element or at least one element | 
|---|
| 564 | * is between the lower bounds | 
|---|
| 565 | */ | 
|---|
| 566 | auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, compare); | 
|---|
| 567 | auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, compare); | 
|---|
| 568 |  | 
|---|
| 569 | return | 
|---|
| 570 | { | 
|---|
| 571 | left_lower != right_lower | 
|---|
| 572 | || (left_lower != indices.end() && extract_tuple(*left_lower) == left_point) | 
|---|
| 573 | || (right_lower != indices.end() && extract_tuple(*right_lower) == right_point), | 
|---|
| 574 | true | 
|---|
| 575 | }; | 
|---|
| 576 | } | 
|---|
| 577 |  | 
|---|
| 578 | } | 
|---|
| 579 |  | 
|---|