simplehash.h source code [PostgreSQL/src/include/lib/simplehash.h]

1	/*
2	* simplehash.h
3	*
4	* Hash table implementation which will be specialized to user-defined
5	* types, by including this file to generate the required code. It's
6	* probably not worthwhile to do so for hash tables that aren't performance
7	* or space sensitive.
8	*
9	* Usage notes:
10	*
11	* To generate a hash-table and associated functions for a use case several
12	* macros have to be #define'ed before this file is included. Including
13	* the file #undef's all those, so a new hash table can be generated
14	* afterwards.
15	* The relevant parameters are:
16	* - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
17	* will result in hash table type 'foo_hash' and functions like
18	* 'foo_insert'/'foo_lookup' and so forth.
19	* - SH_ELEMENT_TYPE - type of the contained elements
20	* - SH_KEY_TYPE - type of the hashtable's key
21	* - SH_DECLARE - if defined function prototypes and type declarations are
22	* generated
23	* - SH_DEFINE - if defined function definitions are generated
24	* - SH_SCOPE - in which scope (e.g. extern, static inline) do function
25	* declarations reside
26	* - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
27	* are defined, so you can supply your own
28	* The following parameters are only relevant when SH_DEFINE is defined:
29	* - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
30	* - SH_EQUAL(table, a, b) - compare two table keys
31	* - SH_HASH_KEY(table, key) - generate hash for the key
32	* - SH_STORE_HASH - if defined the hash is stored in the elements
33	* - SH_GET_HASH(tb, a) - return the field to store the hash in
34	*
35	* For examples of usage look at tidbitmap.c (file local definition) and
36	* execnodes.h/execGrouping.c (exposed declaration, file local
37	* implementation).
38	*
39	* Hash table design:
40	*
41	* The hash table design chosen is a variant of linear open-addressing. The
42	* reason for doing so is that linear addressing is CPU cache & pipeline
43	* friendly. The biggest disadvantage of simple linear addressing schemes
44	* are highly variable lookup times due to clustering, and deletions
45	* leaving a lot of tombstones around. To address these issues a variant
46	* of "robin hood" hashing is employed. Robin hood hashing optimizes
47	* chaining lengths by moving elements close to their optimal bucket
48	* ("rich" elements), out of the way if a to-be-inserted element is further
49	* away from its optimal position (i.e. it's "poor"). While that can make
50	* insertions slower, the average lookup performance is a lot better, and
51	* higher fill factors can be used in a still performant manner. To avoid
52	* tombstones - which normally solve the issue that a deleted node's
53	* presence is relevant to determine whether a lookup needs to continue
54	* looking or is done - buckets following a deleted element are shifted
55	* backwards, unless they're empty or already at their optimal position.
56	*/
57
58	/ helpers /
59	#define SH_MAKE_PREFIX(a) CppConcat(a,_)
60	#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
61	#define SH_MAKE_NAME_(a,b) CppConcat(a,b)
62
63	/ name macros for: /
64
65	/ type declarations /
66	#define SH_TYPE SH_MAKE_NAME(hash)
67	#define SH_STATUS SH_MAKE_NAME(status)
68	#define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
69	#define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
70	#define SH_ITERATOR SH_MAKE_NAME(iterator)
71
72	/ function declarations /
73	#define SH_CREATE SH_MAKE_NAME(create)
74	#define SH_DESTROY SH_MAKE_NAME(destroy)
75	#define SH_RESET SH_MAKE_NAME(reset)
76	#define SH_INSERT SH_MAKE_NAME(insert)
77	#define SH_DELETE SH_MAKE_NAME(delete)
78	#define SH_LOOKUP SH_MAKE_NAME(lookup)
79	#define SH_GROW SH_MAKE_NAME(grow)
80	#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
81	#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
82	#define SH_ITERATE SH_MAKE_NAME(iterate)
83	#define SH_ALLOCATE SH_MAKE_NAME(allocate)
84	#define SH_FREE SH_MAKE_NAME(free)
85	#define SH_STAT SH_MAKE_NAME(stat)
86
87	/ internal helper functions (no externally visible prototypes) /
88	#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
89	#define SH_NEXT SH_MAKE_NAME(next)
90	#define SH_PREV SH_MAKE_NAME(prev)
91	#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
92	#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
93	#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
94
95	/ generate forward declarations necessary to use the hash table /
96	#ifdef SH_DECLARE
97
98	/ type definitions /
99	typedef struct SH_TYPE
100	{
101	/*
102	* Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
103	* tables. Note that the maximum number of elements is lower
104	* (SH_MAX_FILLFACTOR)
105	*/
106	uint64 size;
107
108	/ how many elements have valid contents /
109	uint32 members;
110
111	/ mask for bucket and size calculations, based on size /
112	uint32 sizemask;
113
114	/ boundary after which to grow hashtable /
115	uint32 grow_threshold;
116
117	/ hash buckets /
118	SH_ELEMENT_TYPE *data;
119
120	/ memory context to use for allocations /
121	MemoryContext ctx;
122
123	/ user defined data, useful for callbacks /
124	void *private_data;
125	} SH_TYPE;
126
127	typedef enum SH_STATUS
128	{
129	SH_STATUS_EMPTY = `0x00`,
130	SH_STATUS_IN_USE = `0x01`
131	} SH_STATUS;
132
133	typedef struct SH_ITERATOR
134	{
135	uint32 cur; / current element /
136	uint32 end;
137	bool done; / iterator exhausted? /
138	} SH_ITERATOR;
139
140	/ externally visible function prototypes /
141	SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
142	void *private_data);
143	SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
144	SH_SCOPE void SH_RESET(SH_TYPE * tb);
145	SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
146	SH_SCOPE SH_ELEMENT_TYPE SH_INSERT(SH_TYPE tb, SH_KEY_TYPE key, bool *found);
147	SH_SCOPE SH_ELEMENT_TYPE SH_LOOKUP(SH_TYPE tb, SH_KEY_TYPE key);
148	SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
149	SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
150	SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
151	SH_SCOPE SH_ELEMENT_TYPE SH_ITERATE(SH_TYPE tb, SH_ITERATOR * iter);
152	SH_SCOPE void SH_STAT(SH_TYPE * tb);
153
154	#endif /* SH_DECLARE */
155
156
157	/ generate implementation of the hash table /
158	#ifdef SH_DEFINE
159
160	#include "utils/memutils.h"
161
162	/ max data array size,we allow up to PG_UINT32_MAX buckets, including 0 /
163	#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
164
165	/ normal fillfactor, unless already close to maximum /
166	#ifndef SH_FILLFACTOR
167	#define SH_FILLFACTOR (0.9)
168	#endif
169	/ increase fillfactor if we otherwise would error out /
170	#define SH_MAX_FILLFACTOR (0.98)
171	/ grow if actual and optimal location bigger than /
172	#ifndef SH_GROW_MAX_DIB
173	#define SH_GROW_MAX_DIB 25
174	#endif
175	/ grow if more than elements to move when inserting /
176	#ifndef SH_GROW_MAX_MOVE
177	#define SH_GROW_MAX_MOVE 150
178	#endif
179	#ifndef SH_GROW_MIN_FILLFACTOR
180	/ but do not grow due to SH_GROW_MAX_* if below /
181	#define SH_GROW_MIN_FILLFACTOR 0.1
182	#endif
183
184	#ifdef SH_STORE_HASH
185	#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
186	#else
187	#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
188	#endif
189
190	/*
191	* Wrap the following definitions in include guards, to avoid multiple
192	* definition errors if this header is included more than once. The rest of
193	* the file deliberately has no include guards, because it can be included
194	* with different parameters to define functions and types with non-colliding
195	* names.
196	*/
197	#ifndef SIMPLEHASH_H
198	#define SIMPLEHASH_H
199
200	/ FIXME: can we move these to a central location? /
201
202	/ calculate ceil(log base 2) of num /
203	static inline uint64
204	sh_log2(uint64 num)
205	{
206	int i;
207	uint64 limit;
208
209	for (i = `0`, limit = `1`; limit < num; i++, limit <<= `1`)
210	;
211	return i;
212	}
213
214	/ calculate first power of 2 >= num /
215	static inline uint64
216	sh_pow2(uint64 num)
217	{
218	return ((uint64) `1`) << sh_log2(num);
219	}
220
221	#endif
222
223	/*
224	* Compute sizing parameters for hashtable. Called when creating and growing
225	* the hashtable.
226	*/
227	static inline void
228	SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
229	{
230	uint64 size;
231
232	/ supporting zero sized hashes would complicate matters /
233	size = Max(newsize, `2`);
234
235	/ round up size to the next power of 2, that's how bucketing works /
236	size = sh_pow2(size);
237	Assert(size <= SH_MAX_SIZE);
238
239	/*
240	* Verify that allocation of ->data is possible on this platform, without
241	* overflowing Size.
242	*/
243	if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
244	elog(ERROR, "hash table too large");
245
246	/ now set size /
247	tb->size = size;
248
249	if (tb->size == SH_MAX_SIZE)
250	tb->sizemask = `0`;
251	else
252	tb->sizemask = tb->size - `1`;
253
254	/*
255	* Compute the next threshold at which we need to grow the hash table
256	* again.
257	*/
258	if (tb->size == SH_MAX_SIZE)
259	tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
260	else
261	tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
262	}
263
264	/ return the optimal bucket for the hash /
265	static inline uint32
266	SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
267	{
268	return hash & tb->sizemask;
269	}
270
271	/ return next bucket after the current, handling wraparound /
272	static inline uint32
273	SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
274	{
275	curelem = (curelem + `1`) & tb->sizemask;
276
277	Assert(curelem != startelem);
278
279	return curelem;
280	}
281
282	/ return bucket before the current, handling wraparound /
283	static inline uint32
284	SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
285	{
286	curelem = (curelem - `1`) & tb->sizemask;
287
288	Assert(curelem != startelem);
289
290	return curelem;
291	}
292
293	/ return distance between bucket and its optimal position /
294	static inline uint32
295	SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
296	{
297	if (optimal <= bucket)
298	return bucket - optimal;
299	else
300	return (tb->size + bucket) - optimal;
301	}
302
303	static inline uint32
304	SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
305	{
306	#ifdef SH_STORE_HASH
307	return SH_GET_HASH(tb, entry);
308	#else
309	return SH_HASH_KEY(tb, entry->SH_KEY);
310	#endif
311	}
312
313	/ default memory allocator function /
314	static inline void SH_ALLOCATE(SH_TYPE type, Size size);
315	static inline void SH_FREE(SH_TYPE * type, void *pointer);
316
317	#ifndef SH_USE_NONDEFAULT_ALLOCATOR
318
319	/ default memory allocator function /
320	static inline void *
321	SH_ALLOCATE(SH_TYPE * type, Size size)
322	{
323	return MemoryContextAllocExtended(type->ctx, size,
324	MCXT_ALLOC_HUGE \| MCXT_ALLOC_ZERO);
325	}
326
327	/ default memory free function /
328	static inline void
329	SH_FREE(SH_TYPE * type, void *pointer)
330	{
331	pfree(pointer);
332	}
333
334	#endif
335
336	/*
337	* Create a hash table with enough space for `nelements` distinct members.
338	* Memory for the hash table is allocated from the passed-in context. If
339	* desired, the array of elements can be allocated using a passed-in allocator;
340	* this could be useful in order to place the array of elements in a shared
341	* memory, or in a context that will outlive the rest of the hash table.
342	* Memory other than for the array of elements will still be allocated from
343	* the passed-in context.
344	*/
345	SH_SCOPE SH_TYPE *
346	SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
347	{
348	SH_TYPE *tb;
349	uint64 size;
350
351	tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
352	tb->ctx = ctx;
353	tb->private_data = private_data;
354
355	/ increase nelements by fillfactor, want to store nelements elements /
356	size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
357
358	SH_COMPUTE_PARAMETERS(tb, size);
359
360	tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
361
362	return tb;
363	}
364
365	/ destroy a previously created hash table /
366	SH_SCOPE void
367	SH_DESTROY(SH_TYPE * tb)
368	{
369	SH_FREE(tb, tb->data);
370	pfree(tb);
371	}
372
373	/ reset the contents of a previously created hash table /
374	SH_SCOPE void
375	SH_RESET(SH_TYPE * tb)
376	{
377	memset(tb->data, `0`, sizeof(SH_ELEMENT_TYPE) * tb->size);
378	tb->members = `0`;
379	}
380
381	/*
382	* Grow a hash table to at least `newsize` buckets.
383	*
384	* Usually this will automatically be called by insertions/deletions, when
385	* necessary. But resizing to the exact input size can be advantageous
386	* performance-wise, when known at some point.
387	*/
388	SH_SCOPE void
389	SH_GROW(SH_TYPE * tb, uint32 newsize)
390	{
391	uint64 oldsize = tb->size;
392	SH_ELEMENT_TYPE *olddata = tb->data;
393	SH_ELEMENT_TYPE *newdata;
394	uint32 i;
395	uint32 startelem = `0`;
396	uint32 copyelem;
397
398	Assert(oldsize == sh_pow2(oldsize));
399	Assert(oldsize != SH_MAX_SIZE);
400	Assert(oldsize < newsize);
401
402	/ compute parameters for new table /
403	SH_COMPUTE_PARAMETERS(tb, newsize);
404
405	tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
406
407	newdata = tb->data;
408
409	/*
410	* Copy entries from the old data to newdata. We theoretically could use
411	* SH_INSERT here, to avoid code duplication, but that's more general than
412	* we need. We neither want tb->members increased, nor do we need to do
413	* deal with deleted elements, nor do we need to compare keys. So a
414	* special-cased implementation is lot faster. As resizing can be time
415	* consuming and frequent, that's worthwhile to optimize.
416	*
417	* To be able to simply move entries over, we have to start not at the
418	* first bucket (i.e olddata[0]), but find the first bucket that's either
419	* empty, or is occupied by an entry at its optimal position. Such a
420	* bucket has to exist in any table with a load factor under 1, as not all
421	* buckets are occupied, i.e. there always has to be an empty bucket. By
422	* starting at such a bucket we can move the entries to the larger table,
423	* without having to deal with conflicts.
424	*/
425
426	/ search for the first element in the hash that's not wrapped around /
427	for (i = `0`; i < oldsize; i++)
428	{
429	SH_ELEMENT_TYPE *oldentry = &olddata[i];
430	uint32 hash;
431	uint32 optimal;
432
433	if (oldentry->status != SH_STATUS_IN_USE)
434	{
435	startelem = i;
436	break;
437	}
438
439	hash = SH_ENTRY_HASH(tb, oldentry);
440	optimal = SH_INITIAL_BUCKET(tb, hash);
441
442	if (optimal == i)
443	{
444	startelem = i;
445	break;
446	}
447	}
448
449	/ and copy all elements in the old table /
450	copyelem = startelem;
451	for (i = `0`; i < oldsize; i++)
452	{
453	SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
454
455	if (oldentry->status == SH_STATUS_IN_USE)
456	{
457	uint32 hash;
458	uint32 startelem;
459	uint32 curelem;
460	SH_ELEMENT_TYPE *newentry;
461
462	hash = SH_ENTRY_HASH(tb, oldentry);
463	startelem = SH_INITIAL_BUCKET(tb, hash);
464	curelem = startelem;
465
466	/ find empty element to put data into /
467	while (true)
468	{
469	newentry = &newdata[curelem];
470
471	if (newentry->status == SH_STATUS_EMPTY)
472	{
473	break;
474	}
475
476	curelem = SH_NEXT(tb, curelem, startelem);
477	}
478
479	/ copy entry to new slot /
480	memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
481	}
482
483	/ can't use SH_NEXT here, would use new size /
484	copyelem++;
485	if (copyelem >= oldsize)
486	{
487	copyelem = `0`;
488	}
489	}
490
491	SH_FREE(tb, olddata);
492	}
493
494	/*
495	* Insert the key key into the hash-table, set *found to true if the key
496	* already exists, false otherwise. Returns the hash-table entry in either
497	* case.
498	*/
499	SH_SCOPE SH_ELEMENT_TYPE *
500	SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
501	{
502	uint32 hash = SH_HASH_KEY(tb, key);
503	uint32 startelem;
504	uint32 curelem;
505	SH_ELEMENT_TYPE *data;
506	uint32 insertdist;
507
508	restart:
509	insertdist = `0`;
510
511	/*
512	* We do the grow check even if the key is actually present, to avoid
513	* doing the check inside the loop. This also lets us avoid having to
514	* re-find our position in the hashtable after resizing.
515	*
516	* Note that this also reached when resizing the table due to
517	* SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
518	*/
519	if (unlikely(tb->members >= tb->grow_threshold))
520	{
521	if (tb->size == SH_MAX_SIZE)
522	{
523	elog(ERROR, "hash table size exceeded");
524	}
525
526	/*
527	* When optimizing, it can be very useful to print these out.
528	*/
529	/ SH_STAT(tb); /
530	SH_GROW(tb, tb->size * `2`);
531	/ SH_STAT(tb); /
532	}
533
534	/ perform insert, start bucket search at optimal location /
535	data = tb->data;
536	startelem = SH_INITIAL_BUCKET(tb, hash);
537	curelem = startelem;
538	while (true)
539	{
540	uint32 curdist;
541	uint32 curhash;
542	uint32 curoptimal;
543	SH_ELEMENT_TYPE *entry = &data[curelem];
544
545	/ any empty bucket can directly be used /
546	if (entry->status == SH_STATUS_EMPTY)
547	{
548	tb->members++;
549	entry->SH_KEY = key;
550	#ifdef SH_STORE_HASH
551	SH_GET_HASH(tb, entry) = hash;
552	#endif
553	entry->status = SH_STATUS_IN_USE;
554	*found = false;
555	return entry;
556	}
557
558	/*
559	* If the bucket is not empty, we either found a match (in which case
560	* we're done), or we have to decide whether to skip over or move the
561	* colliding entry. When the colliding element's distance to its
562	* optimal position is smaller than the to-be-inserted entry's, we
563	* shift the colliding entry (and its followers) forward by one.
564	*/
565
566	if (SH_COMPARE_KEYS(tb, hash, key, entry))
567	{
568	Assert(entry->status == SH_STATUS_IN_USE);
569	*found = true;
570	return entry;
571	}
572
573	curhash = SH_ENTRY_HASH(tb, entry);
574	curoptimal = SH_INITIAL_BUCKET(tb, curhash);
575	curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
576
577	if (insertdist > curdist)
578	{
579	SH_ELEMENT_TYPE *lastentry = entry;
580	uint32 emptyelem = curelem;
581	uint32 moveelem;
582	int32 emptydist = `0`;
583
584	/ find next empty bucket /
585	while (true)
586	{
587	SH_ELEMENT_TYPE *emptyentry;
588
589	emptyelem = SH_NEXT(tb, emptyelem, startelem);
590	emptyentry = &data[emptyelem];
591
592	if (emptyentry->status == SH_STATUS_EMPTY)
593	{
594	lastentry = emptyentry;
595	break;
596	}
597
598	/*
599	* To avoid negative consequences from overly imbalanced
600	* hashtables, grow the hashtable if collisions would require
601	* us to move a lot of entries. The most likely cause of such
602	* imbalance is filling a (currently) small table, from a
603	* currently big one, in hash-table order. Don't grow if the
604	* hashtable would be too empty, to prevent quick space
605	* explosion for some weird edge cases.
606	*/
607	if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
608	((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
609	{
610	tb->grow_threshold = `0`;
611	goto restart;
612	}
613	}
614
615	/ shift forward, starting at last occupied element /
616
617	/*
618	* TODO: This could be optimized to be one memcpy in may cases,
619	* excepting wrapping around at the end of ->data. Hasn't shown up
620	* in profiles so far though.
621	*/
622	moveelem = emptyelem;
623	while (moveelem != curelem)
624	{
625	SH_ELEMENT_TYPE *moveentry;
626
627	moveelem = SH_PREV(tb, moveelem, startelem);
628	moveentry = &data[moveelem];
629
630	memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
631	lastentry = moveentry;
632	}
633
634	/ and fill the now empty spot /
635	tb->members++;
636
637	entry->SH_KEY = key;
638	#ifdef SH_STORE_HASH
639	SH_GET_HASH(tb, entry) = hash;
640	#endif
641	entry->status = SH_STATUS_IN_USE;
642	*found = false;
643	return entry;
644	}
645
646	curelem = SH_NEXT(tb, curelem, startelem);
647	insertdist++;
648
649	/*
650	* To avoid negative consequences from overly imbalanced hashtables,
651	* grow the hashtable if collisions lead to large runs. The most
652	* likely cause of such imbalance is filling a (currently) small
653	* table, from a currently big one, in hash-table order. Don't grow
654	* if the hashtable would be too empty, to prevent quick space
655	* explosion for some weird edge cases.
656	*/
657	if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
658	((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
659	{
660	tb->grow_threshold = `0`;
661	goto restart;
662	}
663	}
664	}
665
666	/*
667	* Lookup up entry in hash table. Returns NULL if key not present.
668	*/
669	SH_SCOPE SH_ELEMENT_TYPE *
670	SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
671	{
672	uint32 hash = SH_HASH_KEY(tb, key);
673	const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
674	uint32 curelem = startelem;
675
676	while (true)
677	{
678	SH_ELEMENT_TYPE *entry = &tb->data[curelem];
679
680	if (entry->status == SH_STATUS_EMPTY)
681	{
682	return NULL;
683	}
684
685	Assert(entry->status == SH_STATUS_IN_USE);
686
687	if (SH_COMPARE_KEYS(tb, hash, key, entry))
688	return entry;
689
690	/*
691	* TODO: we could stop search based on distance. If the current
692	* buckets's distance-from-optimal is smaller than what we've skipped
693	* already, the entry doesn't exist. Probably only do so if
694	* SH_STORE_HASH is defined, to avoid re-computing hashes?
695	*/
696
697	curelem = SH_NEXT(tb, curelem, startelem);
698	}
699	}
700
701	/*
702	* Delete entry from hash table. Returns whether to-be-deleted key was
703	* present.
704	*/
705	SH_SCOPE bool
706	SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
707	{
708	uint32 hash = SH_HASH_KEY(tb, key);
709	uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
710	uint32 curelem = startelem;
711
712	while (true)
713	{
714	SH_ELEMENT_TYPE *entry = &tb->data[curelem];
715
716	if (entry->status == SH_STATUS_EMPTY)
717	return false;
718
719	if (entry->status == SH_STATUS_IN_USE &&
720	SH_COMPARE_KEYS(tb, hash, key, entry))
721	{
722	SH_ELEMENT_TYPE *lastentry = entry;
723
724	tb->members--;
725
726	/*
727	* Backward shift following elements till either an empty element
728	* or an element at its optimal position is encountered.
729	*
730	* While that sounds expensive, the average chain length is short,
731	* and deletions would otherwise require tombstones.
732	*/
733	while (true)
734	{
735	SH_ELEMENT_TYPE *curentry;
736	uint32 curhash;
737	uint32 curoptimal;
738
739	curelem = SH_NEXT(tb, curelem, startelem);
740	curentry = &tb->data[curelem];
741
742	if (curentry->status != SH_STATUS_IN_USE)
743	{
744	lastentry->status = SH_STATUS_EMPTY;
745	break;
746	}
747
748	curhash = SH_ENTRY_HASH(tb, curentry);
749	curoptimal = SH_INITIAL_BUCKET(tb, curhash);
750
751	/ current is at optimal position, done /
752	if (curoptimal == curelem)
753	{
754	lastentry->status = SH_STATUS_EMPTY;
755	break;
756	}
757
758	/ shift /
759	memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
760
761	lastentry = curentry;
762	}
763
764	return true;
765	}
766
767	/ TODO: return false; if distance too big /
768
769	curelem = SH_NEXT(tb, curelem, startelem);
770	}
771	}
772
773	/*
774	* Initialize iterator.
775	*/
776	SH_SCOPE void
777	SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
778	{
779	int i;
780	uint64 startelem = PG_UINT64_MAX;
781
782	/*
783	* Search for the first empty element. As deletions during iterations are
784	* supported, we want to start/end at an element that cannot be affected
785	* by elements being shifted.
786	*/
787	for (i = `0`; i < tb->size; i++)
788	{
789	SH_ELEMENT_TYPE *entry = &tb->data[i];
790
791	if (entry->status != SH_STATUS_IN_USE)
792	{
793	startelem = i;
794	break;
795	}
796	}
797
798	Assert(startelem < SH_MAX_SIZE);
799
800	/*
801	* Iterate backwards, that allows the current element to be deleted, even
802	* if there are backward shifts
803	*/
804	iter->cur = startelem;
805	iter->end = iter->cur;
806	iter->done = false;
807	}
808
809	/*
810	* Initialize iterator to a specific bucket. That's really only useful for
811	* cases where callers are partially iterating over the hashspace, and that
812	* iteration deletes and inserts elements based on visited entries. Doing that
813	* repeatedly could lead to an unbalanced keyspace when always starting at the
814	* same position.
815	*/
816	SH_SCOPE void
817	SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
818	{
819	/*
820	* Iterate backwards, that allows the current element to be deleted, even
821	* if there are backward shifts.
822	*/
823	iter->cur = at & tb->sizemask; / ensure at is within a valid range /
824	iter->end = iter->cur;
825	iter->done = false;
826	}
827
828	/*
829	* Iterate over all entries in the hash-table. Return the next occupied entry,
830	* or NULL if done.
831	*
832	* During iteration the current entry in the hash table may be deleted,
833	* without leading to elements being skipped or returned twice. Additionally
834	* the rest of the table may be modified (i.e. there can be insertions or
835	* deletions), but if so, there's neither a guarantee that all nodes are
836	* visited at least once, nor a guarantee that a node is visited at most once.
837	*/
838	SH_SCOPE SH_ELEMENT_TYPE *
839	SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
840	{
841	while (!iter->done)
842	{
843	SH_ELEMENT_TYPE *elem;
844
845	elem = &tb->data[iter->cur];
846
847	/ next element in backward direction /
848	iter->cur = (iter->cur - `1`) & tb->sizemask;
849
850	if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
851	iter->done = true;
852	if (elem->status == SH_STATUS_IN_USE)
853	{
854	return elem;
855	}
856	}
857
858	return NULL;
859	}
860
861	/*
862	* Report some statistics about the state of the hashtable. For
863	* debugging/profiling purposes only.
864	*/
865	SH_SCOPE void
866	SH_STAT(SH_TYPE * tb)
867	{
868	uint32 max_chain_length = `0`;
869	uint32 total_chain_length = `0`;
870	double avg_chain_length;
871	double fillfactor;
872	uint32 i;
873
874	uint32 collisions = palloc0(tb->size sizeof(uint32));
875	uint32 total_collisions = `0`;
876	uint32 max_collisions = `0`;
877	double avg_collisions;
878
879	for (i = `0`; i < tb->size; i++)
880	{
881	uint32 hash;
882	uint32 optimal;
883	uint32 dist;
884	SH_ELEMENT_TYPE *elem;
885
886	elem = &tb->data[i];
887
888	if (elem->status != SH_STATUS_IN_USE)
889	continue;
890
891	hash = SH_ENTRY_HASH(tb, elem);
892	optimal = SH_INITIAL_BUCKET(tb, hash);
893	dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
894
895	if (dist > max_chain_length)
896	max_chain_length = dist;
897	total_chain_length += dist;
898
899	collisions[optimal]++;
900	}
901
902	for (i = `0`; i < tb->size; i++)
903	{
904	uint32 curcoll = collisions[i];
905
906	if (curcoll == `0`)
907	continue;
908
909	/ single contained element is not a collision /
910	curcoll--;
911	total_collisions += curcoll;
912	if (curcoll > max_collisions)
913	max_collisions = curcoll;
914	}
915
916	if (tb->members > `0`)
917	{
918	fillfactor = tb->members / ((double) tb->size);
919	avg_chain_length = ((double) total_chain_length) / tb->members;
920	avg_collisions = ((double) total_collisions) / tb->members;
921	}
922	else
923	{
924	fillfactor = `0`;
925	avg_chain_length = `0`;
926	avg_collisions = `0`;
927	}
928
929	elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
930	tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
931	total_collisions, max_collisions, avg_collisions);
932	}
933
934	#endif /* SH_DEFINE */
935
936
937	/ undefine external parameters, so next hash table can be defined /
938	#undef SH_PREFIX
939	#undef SH_KEY_TYPE
940	#undef SH_KEY
941	#undef SH_ELEMENT_TYPE
942	#undef SH_HASH_KEY
943	#undef SH_SCOPE
944	#undef SH_DECLARE
945	#undef SH_DEFINE
946	#undef SH_GET_HASH
947	#undef SH_STORE_HASH
948	#undef SH_USE_NONDEFAULT_ALLOCATOR
949	#undef SH_EQUAL
950
951	/ undefine locally declared macros /
952	#undef SH_MAKE_PREFIX
953	#undef SH_MAKE_NAME
954	#undef SH_MAKE_NAME_
955	#undef SH_FILLFACTOR
956	#undef SH_MAX_FILLFACTOR
957	#undef SH_GROW_MAX_DIB
958	#undef SH_GROW_MAX_MOVE
959	#undef SH_GROW_MIN_FILLFACTOR
960	#undef SH_MAX_SIZE
961
962	/ types /
963	#undef SH_TYPE
964	#undef SH_STATUS
965	#undef SH_STATUS_EMPTY
966	#undef SH_STATUS_IN_USE
967	#undef SH_ITERATOR
968
969	/ external function names /
970	#undef SH_CREATE
971	#undef SH_DESTROY
972	#undef SH_RESET
973	#undef SH_INSERT
974	#undef SH_DELETE
975	#undef SH_LOOKUP
976	#undef SH_GROW
977	#undef SH_START_ITERATE
978	#undef SH_START_ITERATE_AT
979	#undef SH_ITERATE
980	#undef SH_ALLOCATE
981	#undef SH_FREE
982	#undef SH_STAT
983
984	/ internal function names /
985	#undef SH_COMPUTE_PARAMETERS
986	#undef SH_COMPARE_KEYS
987	#undef SH_INITIAL_BUCKET
988	#undef SH_NEXT
989	#undef SH_PREV
990	#undef SH_DISTANCE_FROM_OPTIMAL
991	#undef SH_ENTRY_HASH
992

Browse the source code of PostgreSQL/src/include/lib/simplehash.h