1#pragma once
2
3#include <vector>
4#include <Core/Types.h>
5#include <Core/Field.h>
6#include <Common/PODArray.h>
7#include <Common/Allocator.h>
8#include <Columns/IColumn.h>
9#include <Columns/ColumnVector.h>
10#include <DataTypes/IDataType.h>
11
12namespace DB
13{
14
15class BloomFilter
16{
17
18public:
19 using UnderType = UInt64;
20 using Container = std::vector<UnderType>;
21
22 /// size -- size of filter in bytes.
23 /// hashes -- number of used hash functions.
24 /// seed -- random seed for hash functions generation.
25 BloomFilter(size_t size_, size_t hashes_, size_t seed_);
26 BloomFilter(const BloomFilter & bloom_filter);
27
28 bool find(const char * data, size_t len);
29 void add(const char * data, size_t len);
30 void clear();
31
32 void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
33 bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
34
35 /// Checks if this contains everything from another bloom filter.
36 /// Bloom filters must have equal size and seed.
37 bool contains(const BloomFilter & bf);
38
39 const Container & getFilter() const { return filter; }
40 Container & getFilter() { return filter; }
41
42 /// For debug.
43 UInt64 isEmpty() const;
44
45 friend bool operator== (const BloomFilter & a, const BloomFilter & b);
46private:
47
48 size_t size;
49 size_t hashes;
50 size_t seed;
51 size_t words;
52 Container filter;
53
54public:
55 static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
56 static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);
57};
58
59using BloomFilterPtr = std::shared_ptr<BloomFilter>;
60
61bool operator== (const BloomFilter & a, const BloomFilter & b);
62
63
64
65}
66