MergeTreeIndexBloomFilter.cpp source code [ClickHouse/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp]

1	#include <Storages/MergeTree/MergeTreeIndexBloomFilter.h>
2	#include <Storages/MergeTree/MergeTreeData.h>
3	#include <Interpreters/SyntaxAnalyzer.h>
4	#include <Interpreters/ExpressionAnalyzer.h>
5	#include <Core/Types.h>
6	#include <ext/bit_cast.h>
7	#include <Parsers/ASTLiteral.h>
8	#include <IO/ReadHelpers.h>
9	#include <IO/WriteHelpers.h>
10	#include <DataTypes/DataTypeArray.h>
11	#include <DataTypes/DataTypeNullable.h>
12	#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
13	#include <Parsers/queryToString.h>
14	#include <Columns/ColumnConst.h>
15	#include <Columns/ColumnLowCardinality.h>
16	#include <Interpreters/BloomFilterHash.h>
17
18
19	namespace DB
20	{
21
22	namespace ErrorCodes
23	{
24	extern const int LOGICAL_ERROR;
25	extern const int INCORRECT_QUERY;
26	}
27
28	MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter(
29	const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_,
30	size_t granularity_, size_t bits_per_row_, size_t hash_functions_)
31	: IMergeTreeIndex (name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_),
32	hash_functions(hash_functions_)
33	{
34	}
35
36	MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
37	{
38	return std::make_shared<MergeTreeIndexGranuleBloomFilter>(bits_per_row, hash_functions, columns.size());
39	}
40
41	bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
42	{
43	const String & column_name = node ->getColumnName();
44
45	for (const auto & cname : columns)
46	if (column_name == cname)
47	return true;
48
49	if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
50	{
51	for (const auto & children : func->arguments ->children)
52	if (mayBenefitFromIndexForIn(children))
53	return true;
54	}
55
56	return false;
57	}
58
59	MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const
60	{
61	return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, columns);
62	}
63
64	MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
65	{
66	return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
67	}
68
69	static void assertIndexColumnsType(const Block & header)
70	{
71	if (!header \|\| !header.columns())
72	throw Exception ("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
73
74	const DataTypes & columns_data_types = header.getDataTypes();
75
76	for (auto & type : columns_data_types)
77	{
78	const IDataType * actual_type = BloomFilter::getPrimitiveType(type).get();
79	WhichDataType which(actual_type);
80
81	if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
82	!which.isDateOrDateTime() && !which.isEnum())
83	throw Exception ("Unexpected type " + type ->getName() + " of bloom filter index.",
84	ErrorCodes::ILLEGAL_COLUMN);
85	}
86	}
87
88	std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
89	const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
90	{
91	if (node ->name.empty())
92	throw Exception ("Index must have unique name.", ErrorCodes::INCORRECT_QUERY);
93
94	ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node ->expr->clone());
95
96	auto syntax = SyntaxAnalyzer (context, {}).analyze(expr_list, columns);
97	auto index_expr = ExpressionAnalyzer (expr_list, syntax, context).getActions(false);
98	auto index_sample = ExpressionAnalyzer (expr_list, syntax, context).getActions(true)->getSampleBlock();
99
100	assertIndexColumnsType(index_sample);
101
102	double max_conflict_probability = `0.025`;
103	if (node ->type->arguments && !node ->type->arguments ->children.empty())
104	max_conflict_probability = typeid_cast<const ASTLiteral &>(*node ->type->arguments ->children [`0`]).value.get<Float64>();
105
106	const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
107
108	return std::make_unique<MergeTreeIndexBloomFilter>(
109	node ->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node ->granularity,
110	bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second);
111	}
112
113	}
114

Browse the source code of ClickHouse/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp