1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * bloomfilter.c |
4 | * Space-efficient set membership testing |
5 | * |
6 | * A Bloom filter is a probabilistic data structure that is used to test an |
7 | * element's membership of a set. False positives are possible, but false |
8 | * negatives are not; a test of membership of the set returns either "possibly |
9 | * in set" or "definitely not in set". This is typically very space efficient, |
10 | * which can be a decisive advantage. |
11 | * |
12 | * Elements can be added to the set, but not removed. The more elements that |
13 | * are added, the larger the probability of false positives. Caller must hint |
14 | * an estimated total size of the set when the Bloom filter is initialized. |
15 | * This is used to balance the use of memory against the final false positive |
16 | * rate. |
17 | * |
18 | * The implementation is well suited to data synchronization problems between |
19 | * unordered sets, especially where predictable performance is important and |
20 | * some false positives are acceptable. It's also well suited to cache |
21 | * filtering problems where a relatively small and/or low cardinality set is |
22 | * fingerprinted, especially when many subsequent membership tests end up |
23 | * indicating that values of interest are not present. That should save the |
24 | * caller many authoritative lookups, such as expensive probes of a much larger |
25 | * on-disk structure. |
26 | * |
27 | * Copyright (c) 2018-2019, PostgreSQL Global Development Group |
28 | * |
29 | * IDENTIFICATION |
30 | * src/backend/lib/bloomfilter.c |
31 | * |
32 | *------------------------------------------------------------------------- |
33 | */ |
34 | #include "postgres.h" |
35 | |
36 | #include <math.h> |
37 | |
38 | #include "lib/bloomfilter.h" |
39 | #include "port/pg_bitutils.h" |
40 | #include "utils/hashutils.h" |
41 | |
42 | #define MAX_HASH_FUNCS 10 |
43 | |
44 | struct bloom_filter |
45 | { |
46 | /* K hash functions are used, seeded by caller's seed */ |
47 | int k_hash_funcs; |
48 | uint64 seed; |
49 | /* m is bitset size, in bits. Must be a power of two <= 2^32. */ |
50 | uint64 m; |
51 | unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]; |
52 | }; |
53 | |
54 | static int my_bloom_power(uint64 target_bitset_bits); |
55 | static int optimal_k(uint64 bitset_bits, int64 total_elems); |
56 | static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, |
57 | size_t len); |
58 | static inline uint32 mod_m(uint32 a, uint64 m); |
59 | |
60 | /* |
61 | * Create Bloom filter in caller's memory context. We aim for a false positive |
62 | * rate of between 1% and 2% when bitset size is not constrained by memory |
63 | * availability. |
64 | * |
65 | * total_elems is an estimate of the final size of the set. It should be |
66 | * approximately correct, but the implementation can cope well with it being |
67 | * off by perhaps a factor of five or more. See "Bloom Filters in |
68 | * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why |
69 | * this is the case. |
70 | * |
71 | * bloom_work_mem is sized in KB, in line with the general work_mem convention. |
72 | * This determines the size of the underlying bitset (trivial bookkeeping space |
73 | * isn't counted). The bitset is always sized as a power of two number of |
74 | * bits, and the largest possible bitset is 512MB (2^32 bits). The |
75 | * implementation allocates only enough memory to target its standard false |
76 | * positive rate, using a simple formula with caller's total_elems estimate as |
77 | * an input. The bitset might be as small as 1MB, even when bloom_work_mem is |
78 | * much higher. |
79 | * |
80 | * The Bloom filter is seeded using a value provided by the caller. Using a |
81 | * distinct seed value on every call makes it unlikely that the same false |
82 | * positives will reoccur when the same set is fingerprinted a second time. |
83 | * Callers that don't care about this pass a constant as their seed, typically |
84 | * 0. Callers can use a pseudo-random seed in the range of 0 - INT_MAX by |
85 | * calling random(). |
86 | */ |
87 | bloom_filter * |
88 | bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed) |
89 | { |
90 | bloom_filter *filter; |
91 | int bloom_power; |
92 | uint64 bitset_bytes; |
93 | uint64 bitset_bits; |
94 | |
95 | /* |
96 | * Aim for two bytes per element; this is sufficient to get a false |
97 | * positive rate below 1%, independent of the size of the bitset or total |
98 | * number of elements. Also, if rounding down the size of the bitset to |
99 | * the next lowest power of two turns out to be a significant drop, the |
100 | * false positive rate still won't exceed 2% in almost all cases. |
101 | */ |
102 | bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2); |
103 | bitset_bytes = Max(1024 * 1024, bitset_bytes); |
104 | |
105 | /* |
106 | * Size in bits should be the highest power of two <= target. bitset_bits |
107 | * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32 |
108 | */ |
109 | bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE); |
110 | bitset_bits = UINT64CONST(1) << bloom_power; |
111 | bitset_bytes = bitset_bits / BITS_PER_BYTE; |
112 | |
113 | /* Allocate bloom filter with unset bitset */ |
114 | filter = palloc0(offsetof(bloom_filter, bitset) + |
115 | sizeof(unsigned char) * bitset_bytes); |
116 | filter->k_hash_funcs = optimal_k(bitset_bits, total_elems); |
117 | filter->seed = seed; |
118 | filter->m = bitset_bits; |
119 | |
120 | return filter; |
121 | } |
122 | |
123 | /* |
124 | * Free Bloom filter |
125 | */ |
126 | void |
127 | bloom_free(bloom_filter *filter) |
128 | { |
129 | pfree(filter); |
130 | } |
131 | |
132 | /* |
133 | * Add element to Bloom filter |
134 | */ |
135 | void |
136 | bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len) |
137 | { |
138 | uint32 hashes[MAX_HASH_FUNCS]; |
139 | int i; |
140 | |
141 | k_hashes(filter, hashes, elem, len); |
142 | |
143 | /* Map a bit-wise address to a byte-wise address + bit offset */ |
144 | for (i = 0; i < filter->k_hash_funcs; i++) |
145 | { |
146 | filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7); |
147 | } |
148 | } |
149 | |
150 | /* |
151 | * Test if Bloom filter definitely lacks element. |
152 | * |
153 | * Returns true if the element is definitely not in the set of elements |
154 | * observed by bloom_add_element(). Otherwise, returns false, indicating that |
155 | * element is probably present in set. |
156 | */ |
157 | bool |
158 | bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len) |
159 | { |
160 | uint32 hashes[MAX_HASH_FUNCS]; |
161 | int i; |
162 | |
163 | k_hashes(filter, hashes, elem, len); |
164 | |
165 | /* Map a bit-wise address to a byte-wise address + bit offset */ |
166 | for (i = 0; i < filter->k_hash_funcs; i++) |
167 | { |
168 | if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) |
169 | return true; |
170 | } |
171 | |
172 | return false; |
173 | } |
174 | |
175 | /* |
176 | * What proportion of bits are currently set? |
177 | * |
178 | * Returns proportion, expressed as a multiplier of filter size. That should |
179 | * generally be close to 0.5, even when we have more than enough memory to |
180 | * ensure a false positive rate within target 1% to 2% band, since more hash |
181 | * functions are used as more memory is available per element. |
182 | * |
183 | * This is the only instrumentation that is low overhead enough to appear in |
184 | * debug traces. When debugging Bloom filter code, it's likely to be far more |
185 | * interesting to directly test the false positive rate. |
186 | */ |
187 | double |
188 | bloom_prop_bits_set(bloom_filter *filter) |
189 | { |
190 | int bitset_bytes = filter->m / BITS_PER_BYTE; |
191 | uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes); |
192 | |
193 | return bits_set / (double) filter->m; |
194 | } |
195 | |
196 | /* |
197 | * Which element in the sequence of powers of two is less than or equal to |
198 | * target_bitset_bits? |
199 | * |
200 | * Value returned here must be generally safe as the basis for actual bitset |
201 | * size. |
202 | * |
203 | * Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient |
204 | * for the needs of all current callers, and allows us to use 32-bit hash |
205 | * functions. It also makes it easy to stay under the MaxAllocSize restriction |
206 | * (caller needs to leave room for non-bitset fields that appear before |
207 | * flexible array member, so a 1GB bitset would use an allocation that just |
208 | * exceeds MaxAllocSize). |
209 | */ |
210 | static int |
211 | my_bloom_power(uint64 target_bitset_bits) |
212 | { |
213 | int bloom_power = -1; |
214 | |
215 | while (target_bitset_bits > 0 && bloom_power < 32) |
216 | { |
217 | bloom_power++; |
218 | target_bitset_bits >>= 1; |
219 | } |
220 | |
221 | return bloom_power; |
222 | } |
223 | |
224 | /* |
225 | * Determine optimal number of hash functions based on size of filter in bits, |
226 | * and projected total number of elements. The optimal number is the number |
227 | * that minimizes the false positive rate. |
228 | */ |
229 | static int |
230 | optimal_k(uint64 bitset_bits, int64 total_elems) |
231 | { |
232 | int k = rint(log(2.0) * bitset_bits / total_elems); |
233 | |
234 | return Max(1, Min(k, MAX_HASH_FUNCS)); |
235 | } |
236 | |
237 | /* |
238 | * Generate k hash values for element. |
239 | * |
240 | * Caller passes array, which is filled-in with k values determined by hashing |
241 | * caller's element. |
242 | * |
243 | * Only 2 real independent hash functions are actually used to support an |
244 | * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is |
245 | * used to make this work. The main reason we prefer enhanced double hashing |
246 | * to classic double hashing is that the latter has an issue with collisions |
247 | * when using power of two sized bitsets. See Dillinger & Manolios for full |
248 | * details. |
249 | */ |
250 | static void |
251 | k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len) |
252 | { |
253 | uint64 hash; |
254 | uint32 x, |
255 | y; |
256 | uint64 m; |
257 | int i; |
258 | |
259 | /* Use 64-bit hashing to get two independent 32-bit hashes */ |
260 | hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed)); |
261 | x = (uint32) hash; |
262 | y = (uint32) (hash >> 32); |
263 | m = filter->m; |
264 | |
265 | x = mod_m(x, m); |
266 | y = mod_m(y, m); |
267 | |
268 | /* Accumulate hashes */ |
269 | hashes[0] = x; |
270 | for (i = 1; i < filter->k_hash_funcs; i++) |
271 | { |
272 | x = mod_m(x + y, m); |
273 | y = mod_m(y + i, m); |
274 | |
275 | hashes[i] = x; |
276 | } |
277 | } |
278 | |
279 | /* |
280 | * Calculate "val MOD m" inexpensively. |
281 | * |
282 | * Assumes that m (which is bitset size) is a power of two. |
283 | * |
284 | * Using a power of two number of bits for bitset size allows us to use bitwise |
285 | * AND operations to calculate the modulo of a hash value. It's also a simple |
286 | * way of avoiding the modulo bias effect. |
287 | */ |
288 | static inline uint32 |
289 | mod_m(uint32 val, uint64 m) |
290 | { |
291 | Assert(m <= PG_UINT32_MAX + UINT64CONST(1)); |
292 | Assert(((m - 1) & m) == 0); |
293 | |
294 | return val & (m - 1); |
295 | } |
296 | |