integerset.c source code [PostgreSQL/src/backend/lib/integerset.c]

1	/-------------------------------------------------------------------------*
2	*
3	* integerset.c
4	* Data structure to hold a large set of 64-bit integers efficiently
5	*
6	* IntegerSet provides an in-memory data structure to hold a set of
7	* arbitrary 64-bit integers. Internally, the values are stored in a
8	* B-tree, with a special packed representation at the leaf level using
9	* the Simple-8b algorithm, which can pack clusters of nearby values
10	* very tightly.
11	*
12	* Memory consumption depends on the number of values stored, but also
13	* on how far the values are from each other. In the best case, with
14	* long runs of consecutive integers, memory consumption can be as low as
15	* 0.1 bytes per integer. In the worst case, if integers are more than
16	* 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17	* consumption per integer is somewhere between those extremes, depending
18	* on the range of integers stored, and how "clustered" they are.
19	*
20	*
21	* Interface
22	* ---------
23	*
24	* intset_create - Create a new, empty set
25	* intset_add_member - Add an integer to the set
26	* intset_is_member - Test if an integer is in the set
27	* intset_begin_iterate - Begin iterating through all integers in set
28	* intset_iterate_next - Return next set member, if any
29	*
30	* intset_create() creates the set in the current memory context. Subsequent
31	* operations that add to the data structure will continue to allocate from
32	* that same context, even if it's not current anymore.
33	*
34	* Note that there is no function to free an integer set. If you need to do
35	* that, create a dedicated memory context to hold it, and destroy the memory
36	* context instead.
37	*
38	*
39	* Limitations
40	* -----------
41	*
42	* - Values must be added in order. (Random insertions would require
43	* splitting nodes, which hasn't been implemented.)
44	*
45	* - Values cannot be added while iteration is in progress.
46	*
47	* - No support for removing values.
48	*
49	* None of these limitations are fundamental to the data structure, so they
50	* could be lifted if needed, by writing some new code. But the current
51	* users of this facility don't need them.
52	*
53	*
54	* References
55	* ----------
56	*
57	* Simple-8b encoding is based on:
58	*
59	* Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60	* Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61	* (https://doi.org/10.1002/spe.948)
62	*
63	*
64	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
65	* Portions Copyright (c) 1994, Regents of the University of California
66	*
67	* IDENTIFICATION
68	* src/backend/lib/integerset.c
69	*
70	*-------------------------------------------------------------------------
71	*/
72	#include "postgres.h"
73
74	#include "access/htup_details.h"
75	#include "lib/integerset.h"
76	#include "port/pg_bitutils.h"
77	#include "utils/memutils.h"
78
79
80	/*
81	* Maximum number of integers that can be encoded in a single Simple-8b
82	* codeword. (Defined here before anything else, so that we can size arrays
83	* using this.)
84	*/
85	#define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
86
87	/*
88	* Parameters for shape of the in-memory B-tree.
89	*
90	* These set the size of each internal and leaf node. They don't necessarily
91	* need to be the same, because the tree is just an in-memory structure.
92	* With the default 64, each node is about 1 kb.
93	*
94	* If you change these, you must recalculate MAX_TREE_LEVELS, too!
95	*/
96	#define MAX_INTERNAL_ITEMS 64
97	#define MAX_LEAF_ITEMS 64
98
99	/*
100	* Maximum height of the tree.
101	*
102	* MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
103	* theoretical maximum number of items that we can store in a set is 2^64,
104	* so MAX_TREE_LEVELS should be set so that:
105	*
106	* MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
107	*
108	* In practice, we'll need far fewer levels, because you will run out of
109	* memory long before reaching that number, but let's be conservative.
110	*/
111	#define MAX_TREE_LEVELS 11
112
113	/*
114	* Node structures, for the in-memory B-tree.
115	*
116	* An internal node holds a number of downlink pointers to leaf nodes, or
117	* to internal nodes on a lower level. For each downlink, the key value
118	* corresponding to the lower level node is stored in a sorted array. The
119	* stored key values are low keys. In other words, if the downlink has value
120	* X, then all items stored on that child are >= X.
121	*
122	* Each leaf node holds a number of "items", with a varying number of
123	* integers packed into each item. Each item consists of two 64-bit words:
124	* The first word holds the first integer stored in the item, in plain format.
125	* The second word contains between 0 and 240 more integers, packed using
126	* Simple-8b encoding. By storing the first integer in plain, unpacked,
127	* format, we can use binary search to quickly find an item that holds (or
128	* would hold) a particular integer. And by storing the rest in packed form,
129	* we still get pretty good memory density, if there are clusters of integers
130	* with similar values.
131	*
132	* Each leaf node also has a pointer to the next leaf node, so that the leaf
133	* nodes can be easily walked from beginning to end when iterating.
134	*/
135	typedef struct intset_node intset_node;
136	typedef struct intset_leaf_node intset_leaf_node;
137	typedef struct intset_internal_node intset_internal_node;
138
139	/ Common structure of both leaf and internal nodes. /
140	struct intset_node
141	{
142	uint16 level; / tree level of this node /
143	uint16 num_items; / number of items in this node /
144	};
145
146	/ Internal node /
147	struct intset_internal_node
148	{
149	/ common header, must match intset_node /
150	uint16 level; / >= 1 on internal nodes /
151	uint16 num_items;
152
153	/*
154	* 'values' is an array of key values, and 'downlinks' are pointers to
155	* lower-level nodes, corresponding to the key values.
156	*/
157	uint64 values[MAX_INTERNAL_ITEMS];
158	intset_node *downlinks[MAX_INTERNAL_ITEMS];
159	};
160
161	/ Leaf node /
162	typedef struct
163	{
164	uint64 first; / first integer in this item /
165	uint64 codeword; / simple8b encoded differences from 'first' /
166	} leaf_item;
167
168	#define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
169
170	struct intset_leaf_node
171	{
172	/ common header, must match intset_node /
173	uint16 level; / 0 on leafs /
174	uint16 num_items;
175
176	intset_leaf_node next; /* right sibling, if any /
177
178	leaf_item items[MAX_LEAF_ITEMS];
179	};
180
181	/*
182	* We buffer insertions in a simple array, before packing and inserting them
183	* into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
184	* encoder assumes that it is large enough that we can always fill a leaf
185	* item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
186	* larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
187	*/
188	#define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
189
190	/*
191	* IntegerSet is the top-level object representing the set.
192	*
193	* The integers are stored in an in-memory B-tree structure, plus an array
194	* for newly-added integers. IntegerSet also tracks information about memory
195	* usage, as well as the current position when iterating the set with
196	* intset_begin_iterate / intset_iterate_next.
197	*/
198	struct IntegerSet
199	{
200	/*
201	* 'context' is the memory context holding this integer set and all its
202	* tree nodes.
203	*
204	* 'mem_used' tracks the amount of memory used. We don't do anything with
205	* it in integerset.c itself, but the callers can ask for it with
206	* intset_memory_usage().
207	*/
208	MemoryContext context;
209	uint64 mem_used;
210
211	uint64 num_entries; / total # of values in the set /
212	uint64 highest_value; / highest value stored in this set /
213
214	/*
215	* B-tree to hold the packed values.
216	*
217	* 'rightmost_nodes' hold pointers to the rightmost node on each level.
218	* rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
219	* parent, and so forth, all the way up to the root. These are needed when
220	* adding new values. (Currently, we require that new values are added at
221	* the end.)
222	*/
223	int num_levels; / height of the tree /
224	intset_node root; /* root node /
225	intset_node *rightmost_nodes[MAX_TREE_LEVELS];
226	intset_leaf_node leftmost_leaf; /* leftmost leaf node /
227
228	/*
229	* Holding area for new items that haven't been inserted to the tree yet.
230	*/
231	uint64 buffered_values[MAX_BUFFERED_VALUES];
232	int num_buffered_values;
233
234	/*
235	* Iterator support.
236	*
237	* 'iter_values' is an array of integers ready to be returned to the
238	* caller; 'iter_num_values' is the length of that array, and
239	* 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
240	* to the leaf node, and item within the leaf node, to get the next batch
241	* of values from.
242	*
243	* Normally, 'iter_values' points to 'iter_values_buf', which holds items
244	* decoded from a leaf item. But after we have scanned the whole B-tree,
245	* we iterate through all the unbuffered values, too, by pointing
246	* iter_values to 'buffered_values'.
247	*/
248	bool iter_active; / is iteration in progress? /
249
250	const uint64 *iter_values;
251	int iter_num_values; / number of elements in 'iter_values' /
252	int iter_valueno; / next index into 'iter_values' /
253
254	intset_leaf_node iter_node; /* current leaf node /
255	int iter_itemno; / next item in 'iter_node' to decode /
256
257	uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
258	};
259
260	/*
261	* Prototypes for internal functions.
262	*/
263	static void intset_update_upper(IntegerSet intset, int* level,
264	intset_node *child, uint64 child_key);
265	static void intset_flush_buffered_values(IntegerSet *intset);
266
267	static int intset_binsrch_uint64(uint64 value, uint64 arr, int* arr_elems,
268	bool nextkey);
269	static int intset_binsrch_leaf(uint64 value, leaf_item arr, int* arr_elems,
270	bool nextkey);
271
272	static uint64 simple8b_encode(const uint64 ints, int* *num_encoded, uint64 base);
273	static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
274	static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
275
276
277	/*
278	* Create a new, initially empty, integer set.
279	*
280	* The integer set is created in the current memory context.
281	* We will do all subsequent allocations in the same context, too, regardless
282	* of which memory context is current when new integers are added to the set.
283	*/
284	IntegerSet *
285	intset_create(void)
286	{
287	IntegerSet *intset;
288
289	intset = (IntegerSet ) palloc(sizeof*(IntegerSet));
290	intset->context = CurrentMemoryContext;
291	intset->mem_used = GetMemoryChunkSpace(intset);
292
293	intset->num_entries = `0`;
294	intset->highest_value = `0`;
295
296	intset->num_levels = `0`;
297	intset->root = NULL;
298	memset(intset->rightmost_nodes, `0`, sizeof(intset->rightmost_nodes));
299	intset->leftmost_leaf = NULL;
300
301	intset->num_buffered_values = `0`;
302
303	intset->iter_active = false;
304	intset->iter_node = NULL;
305	intset->iter_itemno = `0`;
306	intset->iter_valueno = `0`;
307	intset->iter_num_values = `0`;
308	intset->iter_values = NULL;
309
310	return intset;
311	}
312
313	/*
314	* Allocate a new node.
315	*/
316	static intset_internal_node *
317	intset_new_internal_node(IntegerSet *intset)
318	{
319	intset_internal_node *n;
320
321	n = (intset_internal_node *) MemoryContextAlloc(intset->context,
322	sizeof(intset_internal_node));
323	intset->mem_used += GetMemoryChunkSpace(n);
324
325	n->level = `0`; / caller must set /
326	n->num_items = `0`;
327
328	return n;
329	}
330
331	static intset_leaf_node *
332	intset_new_leaf_node(IntegerSet *intset)
333	{
334	intset_leaf_node *n;
335
336	n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
337	sizeof(intset_leaf_node));
338	intset->mem_used += GetMemoryChunkSpace(n);
339
340	n->level = `0`;
341	n->num_items = `0`;
342	n->next = NULL;
343
344	return n;
345	}
346
347	/*
348	* Return the number of entries in the integer set.
349	*/
350	uint64
351	intset_num_entries(IntegerSet *intset)
352	{
353	return intset->num_entries;
354	}
355
356	/*
357	* Return the amount of memory used by the integer set.
358	*/
359	uint64
360	intset_memory_usage(IntegerSet *intset)
361	{
362	return intset->mem_used;
363	}
364
365	/*
366	* Add a value to the set.
367	*
368	* Values must be added in order.
369	*/
370	void
371	intset_add_member(IntegerSet *intset, uint64 x)
372	{
373	if (intset->iter_active)
374	elog(ERROR, "cannot add new values to integer set while iteration is in progress");
375
376	if (x <= intset->highest_value && intset->num_entries > `0`)
377	elog(ERROR, "cannot add value to integer set out of order");
378
379	if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
380	{
381	/ Time to flush our buffer /
382	intset_flush_buffered_values(intset);
383	Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
384	}
385
386	/ Add it to the buffer of newly-added values /
387	intset->buffered_values[intset->num_buffered_values] = x;
388	intset->num_buffered_values++;
389	intset->num_entries++;
390	intset->highest_value = x;
391	}
392
393	/*
394	* Take a batch of buffered values, and pack them into the B-tree.
395	*/
396	static void
397	intset_flush_buffered_values(IntegerSet *intset)
398	{
399	uint64 *values = intset->buffered_values;
400	uint64 num_values = intset->num_buffered_values;
401	int num_packed = `0`;
402	intset_leaf_node *leaf;
403
404	leaf = (intset_leaf_node *) intset->rightmost_nodes[`0`];
405
406	/*
407	* If the tree is completely empty, create the first leaf page, which is
408	* also the root.
409	*/
410	if (leaf == NULL)
411	{
412	/*
413	* This is the very first item in the set.
414	*
415	* Allocate root node. It's also a leaf.
416	*/
417	leaf = intset_new_leaf_node(intset);
418
419	intset->root = (intset_node *) leaf;
420	intset->leftmost_leaf = leaf;
421	intset->rightmost_nodes[`0`] = (intset_node *) leaf;
422	intset->num_levels = `1`;
423	}
424
425	/*
426	* If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
427	* stop. In most cases, we cannot encode that many values in a single
428	* value, but this way, the encoder doesn't have to worry about running
429	* out of input.
430	*/
431	while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
432	{
433	leaf_item item;
434	int num_encoded;
435
436	/*
437	* Construct the next leaf item, packing as many buffered values as
438	* possible.
439	*/
440	item.first = values[num_packed];
441	item.codeword = simple8b_encode(&values[num_packed + `1`],
442	&num_encoded,
443	item.first);
444
445	/*
446	* Add the item to the node, allocating a new node if the old one is
447	* full.
448	*/
449	if (leaf->num_items >= MAX_LEAF_ITEMS)
450	{
451	/ Allocate new leaf and link it to the tree /
452	intset_leaf_node *old_leaf = leaf;
453
454	leaf = intset_new_leaf_node(intset);
455	old_leaf->next = leaf;
456	intset->rightmost_nodes[`0`] = (intset_node *) leaf;
457	intset_update_upper(intset, `1`, (intset_node *) leaf, item.first);
458	}
459	leaf->items[leaf->num_items++] = item;
460
461	num_packed += `1` + num_encoded;
462	}
463
464	/*
465	* Move any remaining buffered values to the beginning of the array.
466	*/
467	if (num_packed < intset->num_buffered_values)
468	{
469	memmove(&intset->buffered_values[`0`],
470	&intset->buffered_values[num_packed],
471	(intset->num_buffered_values - num_packed) * sizeof(uint64));
472	}
473	intset->num_buffered_values -= num_packed;
474	}
475
476	/*
477	* Insert a downlink into parent node, after creating a new node.
478	*
479	* Recurses if the parent node is full, too.
480	*/
481	static void
482	intset_update_upper(IntegerSet intset, int* level, intset_node *child,
483	uint64 child_key)
484	{
485	intset_internal_node *parent;
486
487	Assert(level > `0`);
488
489	/*
490	* Create a new root node, if necessary.
491	*/
492	if (level >= intset->num_levels)
493	{
494	intset_node *oldroot = intset->root;
495	uint64 downlink_key;
496
497	/ MAX_TREE_LEVELS should be more than enough, this shouldn't happen /
498	if (intset->num_levels == MAX_TREE_LEVELS)
499	elog(ERROR, "could not expand integer set, maximum number of levels reached");
500	intset->num_levels++;
501
502	/*
503	* Get the first value on the old root page, to be used as the
504	* downlink.
505	*/
506	if (intset->root->level == `0`)
507	downlink_key = ((intset_leaf_node *) oldroot)->items[`0`].first;
508	else
509	downlink_key = ((intset_internal_node *) oldroot)->values[`0`];
510
511	parent = intset_new_internal_node(intset);
512	parent->level = level;
513	parent->values[`0`] = downlink_key;
514	parent->downlinks[`0`] = oldroot;
515	parent->num_items = `1`;
516
517	intset->root = (intset_node *) parent;
518	intset->rightmost_nodes[level] = (intset_node *) parent;
519	}
520
521	/*
522	* Place the downlink on the parent page.
523	*/
524	parent = (intset_internal_node *) intset->rightmost_nodes[level];
525
526	if (parent->num_items < MAX_INTERNAL_ITEMS)
527	{
528	parent->values[parent->num_items] = child_key;
529	parent->downlinks[parent->num_items] = child;
530	parent->num_items++;
531	}
532	else
533	{
534	/*
535	* Doesn't fit. Allocate new parent, with the downlink as the first
536	* item on it, and recursively insert the downlink to the new parent
537	* to the grandparent.
538	*/
539	parent = intset_new_internal_node(intset);
540	parent->level = level;
541	parent->values[`0`] = child_key;
542	parent->downlinks[`0`] = child;
543	parent->num_items = `1`;
544
545	intset->rightmost_nodes[level] = (intset_node *) parent;
546
547	intset_update_upper(intset, level + `1`, (intset_node *) parent, child_key);
548	}
549	}
550
551	/*
552	* Does the set contain the given value?
553	*/
554	bool
555	intset_is_member(IntegerSet *intset, uint64 x)
556	{
557	intset_node *node;
558	intset_leaf_node *leaf;
559	int level;
560	int itemno;
561	leaf_item *item;
562
563	/*
564	* The value might be in the buffer of newly-added values.
565	*/
566	if (intset->num_buffered_values > `0` && x >= intset->buffered_values[`0`])
567	{
568	int itemno;
569
570	itemno = intset_binsrch_uint64(x,
571	intset->buffered_values,
572	intset->num_buffered_values,
573	false);
574	if (itemno >= intset->num_buffered_values)
575	return false;
576	else
577	return (intset->buffered_values[itemno] == x);
578	}
579
580	/*
581	* Start from the root, and walk down the B-tree to find the right leaf
582	* node.
583	*/
584	if (!intset->root)
585	return false;
586	node = intset->root;
587	for (level = intset->num_levels - `1`; level > `0`; level--)
588	{
589	intset_internal_node n = (intset_internal_node ) node;
590
591	Assert(node->level == level);
592
593	itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
594	if (itemno == `0`)
595	return false;
596	node = n->downlinks[itemno - `1`];
597	}
598	Assert(node->level == `0`);
599	leaf = (intset_leaf_node *) node;
600
601	/*
602	* Binary search to find the right item on the leaf page
603	*/
604	itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
605	if (itemno == `0`)
606	return false;
607	item = &leaf->items[itemno - `1`];
608
609	/ Is this a match to the first value on the item? /
610	if (item->first == x)
611	return true;
612	Assert(x > item->first);
613
614	/ Is it in the packed codeword? /
615	if (simple8b_contains(item->codeword, x, item->first))
616	return true;
617
618	return false;
619	}
620
621	/*
622	* Begin in-order scan through all the values.
623	*
624	* While the iteration is in-progress, you cannot add new values to the set.
625	*/
626	void
627	intset_begin_iterate(IntegerSet *intset)
628	{
629	/ Note that we allow an iteration to be abandoned midway /
630	intset->iter_active = true;
631	intset->iter_node = intset->leftmost_leaf;
632	intset->iter_itemno = `0`;
633	intset->iter_valueno = `0`;
634	intset->iter_num_values = `0`;
635	intset->iter_values = intset->iter_values_buf;
636	}
637
638	/*
639	* Returns the next integer, when iterating.
640	*
641	* intset_begin_iterate() must be called first. intset_iterate_next() returns
642	* the next value in the set. Returns true, if there was another value, and
643	* stores the value in *next. Otherwise, returns false.
644	*/
645	bool
646	intset_iterate_next(IntegerSet intset, uint64 next)
647	{
648	Assert(intset->iter_active);
649	for (;;)
650	{
651	/ Return next iter_values[] entry if any /
652	if (intset->iter_valueno < intset->iter_num_values)
653	{
654	*next = intset->iter_values[intset->iter_valueno++];
655	return true;
656	}
657
658	/ Decode next item in current leaf node, if any /
659	if (intset->iter_node &&
660	intset->iter_itemno < intset->iter_node->num_items)
661	{
662	leaf_item *item;
663	int num_decoded;
664
665	item = &intset->iter_node->items[intset->iter_itemno++];
666
667	intset->iter_values_buf[`0`] = item->first;
668	num_decoded = simple8b_decode(item->codeword,
669	&intset->iter_values_buf[`1`],
670	item->first);
671	intset->iter_num_values = num_decoded + `1`;
672	intset->iter_valueno = `0`;
673	continue;
674	}
675
676	/ No more items on this leaf, step to next node /
677	if (intset->iter_node)
678	{
679	intset->iter_node = intset->iter_node->next;
680	intset->iter_itemno = `0`;
681	continue;
682	}
683
684	/*
685	* We have reached the end of the B-tree. But we might still have
686	* some integers in the buffer of newly-added values.
687	*/
688	if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
689	{
690	intset->iter_values = intset->buffered_values;
691	intset->iter_num_values = intset->num_buffered_values;
692	intset->iter_valueno = `0`;
693	continue;
694	}
695
696	break;
697	}
698
699	/ No more results. /
700	intset->iter_active = false;
701	next = `0`; /* prevent uninitialized-variable warnings /
702	return false;
703	}
704
705	/*
706	* intset_binsrch_uint64() -- search a sorted array of uint64s
707	*
708	* Returns the first position with key equal or less than the given key.
709	* The returned position would be the "insert" location for the given key,
710	* that is, the position where the new key should be inserted to.
711	*
712	* 'nextkey' affects the behavior on equal keys. If true, and there is an
713	* equal key in the array, this returns the position immediately after the
714	* equal key. If false, this returns the position of the equal key itself.
715	*/
716	static int
717	intset_binsrch_uint64(uint64 item, uint64 arr, int* arr_elems, bool nextkey)
718	{
719	int low,
720	high,
721	mid;
722
723	low = `0`;
724	high = arr_elems;
725	while (high > low)
726	{
727	mid = low + (high - low) / `2`;
728
729	if (nextkey)
730	{
731	if (item >= arr[mid])
732	low = mid + `1`;
733	else
734	high = mid;
735	}
736	else
737	{
738	if (item > arr[mid])
739	low = mid + `1`;
740	else
741	high = mid;
742	}
743	}
744
745	return low;
746	}
747
748	/ same, but for an array of leaf items /
749	static int
750	intset_binsrch_leaf(uint64 item, leaf_item arr, int* arr_elems, bool nextkey)
751	{
752	int low,
753	high,
754	mid;
755
756	low = `0`;
757	high = arr_elems;
758	while (high > low)
759	{
760	mid = low + (high - low) / `2`;
761
762	if (nextkey)
763	{
764	if (item >= arr[mid].first)
765	low = mid + `1`;
766	else
767	high = mid;
768	}
769	else
770	{
771	if (item > arr[mid].first)
772	low = mid + `1`;
773	else
774	high = mid;
775	}
776	}
777
778	return low;
779	}
780
781	/*
782	* Simple-8b encoding.
783	*
784	* The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
785	* called "codewords". The number of integers packed into a single codeword
786	* depends on the integers being packed; small integers are encoded using
787	* fewer bits than large integers. A single codeword can store a single
788	* 60-bit integer, or two 30-bit integers, for example.
789	*
790	* Since we're storing a unique, sorted, set of integers, we actually encode
791	* the differences between consecutive integers. That way, clusters of
792	* integers that are close to each other are packed efficiently, regardless
793	* of their absolute values.
794	*
795	* In Simple-8b, each codeword consists of a 4-bit selector, which indicates
796	* how many integers are encoded in the codeword, and the encoded integers are
797	* packed into the remaining 60 bits. The selector allows for 16 different
798	* ways of using the remaining 60 bits, called "modes". The number of integers
799	* packed into a single codeword in each mode is listed in the simple8b_modes
800	* table below. For example, consider the following codeword:
801	*
802	* 20-bit integer 20-bit integer 20-bit integer
803	* 1101 00000000000000010010 01111010000100100000 00000000000000010100
804	* ^
805	* selector
806	*
807	* The selector 1101 is 13 in decimal. From the modes table below, we see
808	* that it means that the codeword encodes three 20-bit integers. In decimal,
809	* those integers are 18, 500000 and 20. Because we encode deltas rather than
810	* absolute values, the actual values that they represent are 18, 500018 and
811	* 500038.
812	*
813	* Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
814	* (which means 240 or 120 consecutive integers, since we're encoding the
815	* deltas between integers), without using the rest of the codeword bits
816	* for anything.
817	*
818	* Simple-8b cannot encode integers larger than 60 bits. Values larger than
819	* that are always stored in the 'first' field of a leaf item, never in the
820	* packed codeword. If there is a sequence of integers that are more than
821	* 2^60 apart, the codeword will go unused on those items. To represent that,
822	* we use a magic EMPTY_CODEWORD codeword value.
823	*/
824	static const struct simple8b_mode
825	{
826	uint8 bits_per_int;
827	uint8 num_ints;
828	} simple8b_modes[`17`] =
829
830	{
831	{`0`, `240`}, / mode 0: 240 zeroes /
832	{`0`, `120`}, / mode 1: 120 zeroes /
833	{`1`, `60`}, / mode 2: sixty 1-bit integers /
834	{`2`, `30`}, / mode 3: thirty 2-bit integers /
835	{`3`, `20`}, / mode 4: twenty 3-bit integers /
836	{`4`, `15`}, / mode 5: fifteen 4-bit integers /
837	{`5`, `12`}, / mode 6: twelve 5-bit integers /
838	{`6`, `10`}, / mode 7: ten 6-bit integers /
839	{`7`, `8`}, / mode 8: eight 7-bit integers (four bits*
840	* are wasted) */
841	{`8`, `7`}, / mode 9: seven 8-bit integers (four bits*
842	* are wasted) */
843	{`10`, `6`}, / mode 10: six 10-bit integers /
844	{`12`, `5`}, / mode 11: five 12-bit integers /
845	{`15`, `4`}, / mode 12: four 15-bit integers /
846	{`20`, `3`}, / mode 13: three 20-bit integers /
847	{`30`, `2`}, / mode 14: two 30-bit integers /
848	{`60`, `1`}, / mode 15: one 60-bit integer /
849
850	{`0`, `0`} / sentinel value /
851	};
852
853	/*
854	* EMPTY_CODEWORD is a special value, used to indicate "no values".
855	* It is used if the next value is too large to be encoded with Simple-8b.
856	*
857	* This value looks like a mode-0 codeword, but we can distinguish it
858	* because a regular mode-0 codeword would have zeroes in the unused bits.
859	*/
860	#define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
861
862	/*
863	* Encode a number of integers into a Simple-8b codeword.
864	*
865	* (What we actually encode are deltas between successive integers.
866	* "base" is the value before ints[0].)
867	*
868	* The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
869	* elements, ensuring that we can produce a full codeword.
870	*
871	* Returns the encoded codeword, and sets *num_encoded to the number of
872	* input integers that were encoded. That can be zero, if the first delta
873	* is too large to be encoded.
874	*/
875	static uint64
876	simple8b_encode(const uint64 ints, int* *num_encoded, uint64 base)
877	{
878	int selector;
879	int nints;
880	int bits;
881	uint64 diff;
882	uint64 last_val;
883	uint64 codeword;
884	int i;
885
886	Assert(ints[`0`] > base);
887
888	/*
889	* Select the "mode" to use for this codeword.
890	*
891	* In each iteration, check if the next value can be represented in the
892	* current mode we're considering. If it's too large, then step up the
893	* mode to a wider one, and repeat. If it fits, move on to the next
894	* integer. Repeat until the codeword is full, given the current mode.
895	*
896	* Note that we don't have any way to represent unused slots in the
897	* codeword, so we require each codeword to be "full". It is always
898	* possible to produce a full codeword unless the very first delta is too
899	* large to be encoded. For example, if the first delta is small but the
900	* second is too large to be encoded, we'll end up using the last "mode",
901	* which has nints == 1.
902	*/
903	selector = `0`;
904	nints = simple8b_modes[`0`].num_ints;
905	bits = simple8b_modes[`0`].bits_per_int;
906	diff = ints[`0`] - base - `1`;
907	last_val = ints[`0`];
908	i = `0`; / number of deltas we have accepted /
909	for (;;)
910	{
911	if (diff >= (UINT64CONST(`1`) << bits))
912	{
913	/ too large, step up to next mode /
914	selector++;
915	nints = simple8b_modes[selector].num_ints;
916	bits = simple8b_modes[selector].bits_per_int;
917	/ we might already have accepted enough deltas for this mode /
918	if (i >= nints)
919	break;
920	}
921	else
922	{
923	/ accept this delta; then done if codeword is full /
924	i++;
925	if (i >= nints)
926	break;
927	/ examine next delta /
928	Assert(ints[i] > last_val);
929	diff = ints[i] - last_val - `1`;
930	last_val = ints[i];
931	}
932	}
933
934	if (nints == `0`)
935	{
936	/*
937	* The first delta is too large to be encoded with Simple-8b.
938	*
939	* If there is at least one not-too-large integer in the input, we
940	* will encode it using mode 15 (or a more compact mode). Hence, we
941	* can only get here if the first delta is >= 2^60.
942	*/
943	Assert(i == `0`);
944	*num_encoded = `0`;
945	return EMPTY_CODEWORD;
946	}
947
948	/*
949	* Encode the integers using the selected mode. Note that we shift them
950	* into the codeword in reverse order, so that they will come out in the
951	* correct order in the decoder.
952	*/
953	codeword = `0`;
954	if (bits > `0`)
955	{
956	for (i = nints - `1`; i > `0`; i--)
957	{
958	diff = ints[i] - ints[i - `1`] - `1`;
959	codeword \|= diff;
960	codeword <<= bits;
961	}
962	diff = ints[`0`] - base - `1`;
963	codeword \|= diff;
964	}
965
966	/ add selector to the codeword, and return /
967	codeword \|= (uint64) selector << `60`;
968
969	*num_encoded = nints;
970	return codeword;
971	}
972
973	/*
974	* Decode a codeword into an array of integers.
975	* Returns the number of integers decoded.
976	*/
977	static int
978	simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
979	{
980	int selector = (codeword >> `60`);
981	int nints = simple8b_modes[selector].num_ints;
982	int bits = simple8b_modes[selector].bits_per_int;
983	uint64 mask = (UINT64CONST(`1`) << bits) - `1`;
984	uint64 curr_value;
985
986	if (codeword == EMPTY_CODEWORD)
987	return `0`;
988
989	curr_value = base;
990	for (int i = `0`; i < nints; i++)
991	{
992	uint64 diff = codeword & mask;
993
994	curr_value += `1` + diff;
995	decoded[i] = curr_value;
996	codeword >>= bits;
997	}
998	return nints;
999	}
1000
1001	/*
1002	* This is very similar to simple8b_decode(), but instead of decoding all
1003	* the values to an array, it just checks if the given "key" is part of
1004	* the codeword.
1005	*/
1006	static bool
1007	simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1008	{
1009	int selector = (codeword >> `60`);
1010	int nints = simple8b_modes[selector].num_ints;
1011	int bits = simple8b_modes[selector].bits_per_int;
1012
1013	if (codeword == EMPTY_CODEWORD)
1014	return false;
1015
1016	if (bits == `0`)
1017	{
1018	/ Special handling for 0-bit cases. /
1019	return (key - base) <= nints;
1020	}
1021	else
1022	{
1023	uint64 mask = (UINT64CONST(`1`) << bits) - `1`;
1024	uint64 curr_value;
1025
1026	curr_value = base;
1027	for (int i = `0`; i < nints; i++)
1028	{
1029	uint64 diff = codeword & mask;
1030
1031	curr_value += `1` + diff;
1032
1033	if (curr_value >= key)
1034	{
1035	if (curr_value == key)
1036	return true;
1037	else
1038	return false;
1039	}
1040
1041	codeword >>= bits;
1042	}
1043	}
1044	return false;
1045	}
1046

Browse the source code of PostgreSQL/src/backend/lib/integerset.c