1 | #pragma once |
---|---|
2 | |
3 | #include <vector> |
4 | #include <Core/Types.h> |
5 | #include <Core/Field.h> |
6 | #include <Common/PODArray.h> |
7 | #include <Common/Allocator.h> |
8 | #include <Columns/IColumn.h> |
9 | #include <Columns/ColumnVector.h> |
10 | #include <DataTypes/IDataType.h> |
11 | |
12 | namespace DB |
13 | { |
14 | |
15 | class BloomFilter |
16 | { |
17 | |
18 | public: |
19 | using UnderType = UInt64; |
20 | using Container = std::vector<UnderType>; |
21 | |
22 | /// size -- size of filter in bytes. |
23 | /// hashes -- number of used hash functions. |
24 | /// seed -- random seed for hash functions generation. |
25 | BloomFilter(size_t size_, size_t hashes_, size_t seed_); |
26 | BloomFilter(const BloomFilter & bloom_filter); |
27 | |
28 | bool find(const char * data, size_t len); |
29 | void add(const char * data, size_t len); |
30 | void clear(); |
31 | |
32 | void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); |
33 | bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); |
34 | |
35 | /// Checks if this contains everything from another bloom filter. |
36 | /// Bloom filters must have equal size and seed. |
37 | bool contains(const BloomFilter & bf); |
38 | |
39 | const Container & getFilter() const { return filter; } |
40 | Container & getFilter() { return filter; } |
41 | |
42 | /// For debug. |
43 | UInt64 isEmpty() const; |
44 | |
45 | friend bool operator== (const BloomFilter & a, const BloomFilter & b); |
46 | private: |
47 | |
48 | size_t size; |
49 | size_t hashes; |
50 | size_t seed; |
51 | size_t words; |
52 | Container filter; |
53 | |
54 | public: |
55 | static ColumnPtr getPrimitiveColumn(const ColumnPtr & column); |
56 | static DataTypePtr getPrimitiveType(const DataTypePtr & data_type); |
57 | }; |
58 | |
59 | using BloomFilterPtr = std::shared_ptr<BloomFilter>; |
60 | |
61 | bool operator== (const BloomFilter & a, const BloomFilter & b); |
62 | |
63 | |
64 | |
65 | } |
66 |