1#include <Storages/MergeTree/MergeTreeIndexBloomFilter.h>
2#include <Storages/MergeTree/MergeTreeData.h>
3#include <Interpreters/SyntaxAnalyzer.h>
4#include <Interpreters/ExpressionAnalyzer.h>
5#include <Core/Types.h>
6#include <ext/bit_cast.h>
7#include <Parsers/ASTLiteral.h>
8#include <IO/ReadHelpers.h>
9#include <IO/WriteHelpers.h>
10#include <DataTypes/DataTypeArray.h>
11#include <DataTypes/DataTypeNullable.h>
12#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
13#include <Parsers/queryToString.h>
14#include <Columns/ColumnConst.h>
15#include <Columns/ColumnLowCardinality.h>
16#include <Interpreters/BloomFilterHash.h>
17
18
19namespace DB
20{
21
22namespace ErrorCodes
23{
24 extern const int LOGICAL_ERROR;
25 extern const int INCORRECT_QUERY;
26}
27
28MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter(
29 const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_,
30 size_t granularity_, size_t bits_per_row_, size_t hash_functions_)
31 : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_),
32 hash_functions(hash_functions_)
33{
34}
35
36MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
37{
38 return std::make_shared<MergeTreeIndexGranuleBloomFilter>(bits_per_row, hash_functions, columns.size());
39}
40
41bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
42{
43 const String & column_name = node->getColumnName();
44
45 for (const auto & cname : columns)
46 if (column_name == cname)
47 return true;
48
49 if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
50 {
51 for (const auto & children : func->arguments->children)
52 if (mayBenefitFromIndexForIn(children))
53 return true;
54 }
55
56 return false;
57}
58
59MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const
60{
61 return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, columns);
62}
63
64MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
65{
66 return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
67}
68
69static void assertIndexColumnsType(const Block & header)
70{
71 if (!header || !header.columns())
72 throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
73
74 const DataTypes & columns_data_types = header.getDataTypes();
75
76 for (auto & type : columns_data_types)
77 {
78 const IDataType * actual_type = BloomFilter::getPrimitiveType(type).get();
79 WhichDataType which(actual_type);
80
81 if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
82 !which.isDateOrDateTime() && !which.isEnum())
83 throw Exception("Unexpected type " + type->getName() + " of bloom filter index.",
84 ErrorCodes::ILLEGAL_COLUMN);
85 }
86}
87
88std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
89 const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
90{
91 if (node->name.empty())
92 throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY);
93
94 ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
95
96 auto syntax = SyntaxAnalyzer(context, {}).analyze(expr_list, columns);
97 auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
98 auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock();
99
100 assertIndexColumnsType(index_sample);
101
102 double max_conflict_probability = 0.025;
103 if (node->type->arguments && !node->type->arguments->children.empty())
104 max_conflict_probability = typeid_cast<const ASTLiteral &>(*node->type->arguments->children[0]).value.get<Float64>();
105
106 const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
107
108 return std::make_unique<MergeTreeIndexBloomFilter>(
109 node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity,
110 bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second);
111}
112
113}
114