1#include <Interpreters/BloomFilter.h>
2#include <city.h>
3#include <Columns/ColumnArray.h>
4#include <Columns/ColumnNullable.h>
5#include <Columns/ColumnLowCardinality.h>
6#include <DataTypes/DataTypeArray.h>
7#include <DataTypes/DataTypeNullable.h>
8#include <DataTypes/DataTypeLowCardinality.h>
9
10
11namespace DB
12{
13
14static constexpr UInt64 SEED_GEN_A = 845897321;
15static constexpr UInt64 SEED_GEN_B = 217728422;
16
17BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_)
18 : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0) {}
19
20BloomFilter::BloomFilter(const BloomFilter & bloom_filter)
21 : size(bloom_filter.size), hashes(bloom_filter.hashes), seed(bloom_filter.seed), words(bloom_filter.words), filter(bloom_filter.filter) {}
22
23bool BloomFilter::find(const char * data, size_t len)
24{
25 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
26 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
27
28 for (size_t i = 0; i < hashes; ++i)
29 {
30 size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
31 if (!(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))))
32 return false;
33 }
34 return true;
35}
36
37void BloomFilter::add(const char * data, size_t len)
38{
39 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
40 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
41
42 for (size_t i = 0; i < hashes; ++i)
43 {
44 size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
45 filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
46 }
47}
48
49void BloomFilter::clear()
50{
51 filter.assign(words, 0);
52}
53
54bool BloomFilter::contains(const BloomFilter & bf)
55{
56 for (size_t i = 0; i < words; ++i)
57 {
58 if ((filter[i] & bf.filter[i]) != bf.filter[i])
59 return false;
60 }
61 return true;
62}
63
64UInt64 BloomFilter::isEmpty() const
65{
66 for (size_t i = 0; i < words; ++i)
67 if (filter[i] != 0)
68 return false;
69 return true;
70}
71
72bool operator== (const BloomFilter & a, const BloomFilter & b)
73{
74 for (size_t i = 0; i < a.words; ++i)
75 if (a.filter[i] != b.filter[i])
76 return false;
77 return true;
78}
79
80void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
81{
82 size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
83 filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
84}
85
86bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
87{
88 size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
89 return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
90}
91
92DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)
93{
94 if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
95 {
96 if (!typeid_cast<const DataTypeArray *>(array_type->getNestedType().get()))
97 return getPrimitiveType(array_type->getNestedType());
98 else
99 throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
100 }
101
102 if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))
103 return getPrimitiveType(nullable_type->getNestedType());
104
105 if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_type.get()))
106 return getPrimitiveType(low_cardinality_type->getDictionaryType());
107
108 return data_type;
109}
110
111ColumnPtr BloomFilter::getPrimitiveColumn(const ColumnPtr & column)
112{
113 if (const auto * array_col = typeid_cast<const ColumnArray *>(column.get()))
114 return getPrimitiveColumn(array_col->getDataPtr());
115
116 if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(column.get()))
117 return getPrimitiveColumn(nullable_col->getNestedColumnPtr());
118
119 if (const auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(column.get()))
120 return getPrimitiveColumn(low_cardinality_col->convertToFullColumnIfLowCardinality());
121
122 return column;
123}
124
125}
126