1/*-------------------------------------------------------------------------
2 *
3 * bloomfilter.c
4 * Space-efficient set membership testing
5 *
6 * A Bloom filter is a probabilistic data structure that is used to test an
7 * element's membership of a set. False positives are possible, but false
8 * negatives are not; a test of membership of the set returns either "possibly
9 * in set" or "definitely not in set". This is typically very space efficient,
10 * which can be a decisive advantage.
11 *
12 * Elements can be added to the set, but not removed. The more elements that
13 * are added, the larger the probability of false positives. Caller must hint
14 * an estimated total size of the set when the Bloom filter is initialized.
15 * This is used to balance the use of memory against the final false positive
16 * rate.
17 *
18 * The implementation is well suited to data synchronization problems between
19 * unordered sets, especially where predictable performance is important and
20 * some false positives are acceptable. It's also well suited to cache
21 * filtering problems where a relatively small and/or low cardinality set is
22 * fingerprinted, especially when many subsequent membership tests end up
23 * indicating that values of interest are not present. That should save the
24 * caller many authoritative lookups, such as expensive probes of a much larger
25 * on-disk structure.
26 *
27 * Copyright (c) 2018-2019, PostgreSQL Global Development Group
28 *
29 * IDENTIFICATION
30 * src/backend/lib/bloomfilter.c
31 *
32 *-------------------------------------------------------------------------
33 */
34#include "postgres.h"
35
36#include <math.h>
37
38#include "lib/bloomfilter.h"
39#include "port/pg_bitutils.h"
40#include "utils/hashutils.h"
41
42#define MAX_HASH_FUNCS 10
43
44struct bloom_filter
45{
46 /* K hash functions are used, seeded by caller's seed */
47 int k_hash_funcs;
48 uint64 seed;
49 /* m is bitset size, in bits. Must be a power of two <= 2^32. */
50 uint64 m;
51 unsigned char bitset[FLEXIBLE_ARRAY_MEMBER];
52};
53
54static int my_bloom_power(uint64 target_bitset_bits);
55static int optimal_k(uint64 bitset_bits, int64 total_elems);
56static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem,
57 size_t len);
58static inline uint32 mod_m(uint32 a, uint64 m);
59
60/*
61 * Create Bloom filter in caller's memory context. We aim for a false positive
62 * rate of between 1% and 2% when bitset size is not constrained by memory
63 * availability.
64 *
65 * total_elems is an estimate of the final size of the set. It should be
66 * approximately correct, but the implementation can cope well with it being
67 * off by perhaps a factor of five or more. See "Bloom Filters in
68 * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why
69 * this is the case.
70 *
71 * bloom_work_mem is sized in KB, in line with the general work_mem convention.
72 * This determines the size of the underlying bitset (trivial bookkeeping space
73 * isn't counted). The bitset is always sized as a power of two number of
74 * bits, and the largest possible bitset is 512MB (2^32 bits). The
75 * implementation allocates only enough memory to target its standard false
76 * positive rate, using a simple formula with caller's total_elems estimate as
77 * an input. The bitset might be as small as 1MB, even when bloom_work_mem is
78 * much higher.
79 *
80 * The Bloom filter is seeded using a value provided by the caller. Using a
81 * distinct seed value on every call makes it unlikely that the same false
82 * positives will reoccur when the same set is fingerprinted a second time.
83 * Callers that don't care about this pass a constant as their seed, typically
84 * 0. Callers can use a pseudo-random seed in the range of 0 - INT_MAX by
85 * calling random().
86 */
87bloom_filter *
88bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
89{
90 bloom_filter *filter;
91 int bloom_power;
92 uint64 bitset_bytes;
93 uint64 bitset_bits;
94
95 /*
96 * Aim for two bytes per element; this is sufficient to get a false
97 * positive rate below 1%, independent of the size of the bitset or total
98 * number of elements. Also, if rounding down the size of the bitset to
99 * the next lowest power of two turns out to be a significant drop, the
100 * false positive rate still won't exceed 2% in almost all cases.
101 */
102 bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2);
103 bitset_bytes = Max(1024 * 1024, bitset_bytes);
104
105 /*
106 * Size in bits should be the highest power of two <= target. bitset_bits
107 * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
108 */
109 bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE);
110 bitset_bits = UINT64CONST(1) << bloom_power;
111 bitset_bytes = bitset_bits / BITS_PER_BYTE;
112
113 /* Allocate bloom filter with unset bitset */
114 filter = palloc0(offsetof(bloom_filter, bitset) +
115 sizeof(unsigned char) * bitset_bytes);
116 filter->k_hash_funcs = optimal_k(bitset_bits, total_elems);
117 filter->seed = seed;
118 filter->m = bitset_bits;
119
120 return filter;
121}
122
123/*
124 * Free Bloom filter
125 */
126void
127bloom_free(bloom_filter *filter)
128{
129 pfree(filter);
130}
131
132/*
133 * Add element to Bloom filter
134 */
135void
136bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
137{
138 uint32 hashes[MAX_HASH_FUNCS];
139 int i;
140
141 k_hashes(filter, hashes, elem, len);
142
143 /* Map a bit-wise address to a byte-wise address + bit offset */
144 for (i = 0; i < filter->k_hash_funcs; i++)
145 {
146 filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7);
147 }
148}
149
150/*
151 * Test if Bloom filter definitely lacks element.
152 *
153 * Returns true if the element is definitely not in the set of elements
154 * observed by bloom_add_element(). Otherwise, returns false, indicating that
155 * element is probably present in set.
156 */
157bool
158bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
159{
160 uint32 hashes[MAX_HASH_FUNCS];
161 int i;
162
163 k_hashes(filter, hashes, elem, len);
164
165 /* Map a bit-wise address to a byte-wise address + bit offset */
166 for (i = 0; i < filter->k_hash_funcs; i++)
167 {
168 if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7))))
169 return true;
170 }
171
172 return false;
173}
174
175/*
176 * What proportion of bits are currently set?
177 *
178 * Returns proportion, expressed as a multiplier of filter size. That should
179 * generally be close to 0.5, even when we have more than enough memory to
180 * ensure a false positive rate within target 1% to 2% band, since more hash
181 * functions are used as more memory is available per element.
182 *
183 * This is the only instrumentation that is low overhead enough to appear in
184 * debug traces. When debugging Bloom filter code, it's likely to be far more
185 * interesting to directly test the false positive rate.
186 */
187double
188bloom_prop_bits_set(bloom_filter *filter)
189{
190 int bitset_bytes = filter->m / BITS_PER_BYTE;
191 uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
192
193 return bits_set / (double) filter->m;
194}
195
196/*
197 * Which element in the sequence of powers of two is less than or equal to
198 * target_bitset_bits?
199 *
200 * Value returned here must be generally safe as the basis for actual bitset
201 * size.
202 *
203 * Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient
204 * for the needs of all current callers, and allows us to use 32-bit hash
205 * functions. It also makes it easy to stay under the MaxAllocSize restriction
206 * (caller needs to leave room for non-bitset fields that appear before
207 * flexible array member, so a 1GB bitset would use an allocation that just
208 * exceeds MaxAllocSize).
209 */
210static int
211my_bloom_power(uint64 target_bitset_bits)
212{
213 int bloom_power = -1;
214
215 while (target_bitset_bits > 0 && bloom_power < 32)
216 {
217 bloom_power++;
218 target_bitset_bits >>= 1;
219 }
220
221 return bloom_power;
222}
223
224/*
225 * Determine optimal number of hash functions based on size of filter in bits,
226 * and projected total number of elements. The optimal number is the number
227 * that minimizes the false positive rate.
228 */
229static int
230optimal_k(uint64 bitset_bits, int64 total_elems)
231{
232 int k = rint(log(2.0) * bitset_bits / total_elems);
233
234 return Max(1, Min(k, MAX_HASH_FUNCS));
235}
236
237/*
238 * Generate k hash values for element.
239 *
240 * Caller passes array, which is filled-in with k values determined by hashing
241 * caller's element.
242 *
243 * Only 2 real independent hash functions are actually used to support an
244 * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is
245 * used to make this work. The main reason we prefer enhanced double hashing
246 * to classic double hashing is that the latter has an issue with collisions
247 * when using power of two sized bitsets. See Dillinger & Manolios for full
248 * details.
249 */
250static void
251k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
252{
253 uint64 hash;
254 uint32 x,
255 y;
256 uint64 m;
257 int i;
258
259 /* Use 64-bit hashing to get two independent 32-bit hashes */
260 hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed));
261 x = (uint32) hash;
262 y = (uint32) (hash >> 32);
263 m = filter->m;
264
265 x = mod_m(x, m);
266 y = mod_m(y, m);
267
268 /* Accumulate hashes */
269 hashes[0] = x;
270 for (i = 1; i < filter->k_hash_funcs; i++)
271 {
272 x = mod_m(x + y, m);
273 y = mod_m(y + i, m);
274
275 hashes[i] = x;
276 }
277}
278
279/*
280 * Calculate "val MOD m" inexpensively.
281 *
282 * Assumes that m (which is bitset size) is a power of two.
283 *
284 * Using a power of two number of bits for bitset size allows us to use bitwise
285 * AND operations to calculate the modulo of a hash value. It's also a simple
286 * way of avoiding the modulo bias effect.
287 */
288static inline uint32
289mod_m(uint32 val, uint64 m)
290{
291 Assert(m <= PG_UINT32_MAX + UINT64CONST(1));
292 Assert(((m - 1) & m) == 0);
293
294 return val & (m - 1);
295}
296