1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#ifndef _GDK_SEARCH_H_
10#define _GDK_SEARCH_H_
11/*
12 * @+ Hash indexing
13 *
14 * This is a highly efficient implementation of simple bucket-chained
15 * hashing.
16 *
17 * In the past, we used integer modulo for hashing, with bucket chains
18 * of mean size 4. This was shown to be inferior to direct hashing
19 * with integer anding. The new implementation reflects this.
20 */
21gdk_export void HASHdestroy(BAT *b);
22gdk_export BUN HASHprobe(const Hash *h, const void *v);
23gdk_export BUN HASHlist(Hash *h, BUN i);
24
25
26#define HASHnil(H) (H)->nil
27
28/* play around with h->Hash[i] and h->Link[j] */
29#define HASHget2(h,i) ((BUN) ((BUN2type*) (h)->Hash)[i])
30#define HASHput2(h,i,v) (((BUN2type*) (h)->Hash)[i] = (BUN2type) (v))
31#define HASHgetlink2(h,i) ((BUN) ((BUN2type*) (h)->Link)[i])
32#define HASHputlink2(h,i,v) (((BUN2type*) (h)->Link)[i] = (BUN2type) (v))
33#define HASHget4(h,i) ((BUN) ((BUN4type*) (h)->Hash)[i])
34#define HASHput4(h,i,v) (((BUN4type*) (h)->Hash)[i] = (BUN4type) (v))
35#define HASHgetlink4(h,i) ((BUN) ((BUN4type*) (h)->Link)[i])
36#define HASHputlink4(h,i,v) (((BUN4type*) (h)->Link)[i] = (BUN4type) (v))
37#if SIZEOF_BUN == 8
38#define HASHget8(h,i) ((BUN) ((BUN8type*) (h)->Hash)[i])
39#define HASHput8(h,i,v) (((BUN8type*) (h)->Hash)[i] = (BUN8type) (v))
40#define HASHgetlink8(h,i) ((BUN) ((BUN8type*) (h)->Link)[i])
41#define HASHputlink8(h,i,v) (((BUN8type*) (h)->Link)[i] = (BUN8type) (v))
42#endif
43
44#if SIZEOF_BUN <= 4
45#define HASHget(h,i) \
46 ((h)->width == BUN4 ? HASHget4(h,i) : HASHget2(h,i))
47#define HASHput(h,i,v) \
48 do { \
49 if ((h)->width == 2) { \
50 HASHput2(h,i,v); \
51 } else { \
52 HASHput4(h,i,v); \
53 } \
54 } while (0)
55#define HASHgetlink(h,i) \
56 ((h)->width == BUN4 ? HASHgetlink4(h,i) : HASHgetlink2(h,i))
57#define HASHputlink(h,i,v) \
58 do { \
59 if ((h)->width == 2) { \
60 HASHputlink2(h,i,v); \
61 } else { \
62 HASHputlink4(h,i,v); \
63 } \
64 } while (0)
65#define HASHputall(h, i, v) \
66 do { \
67 if ((h)->width == 2) { \
68 HASHputlink2(h, i, HASHget2(h, v)); \
69 HASHput2(h, v, i); \
70 } else { \
71 HASHputlink4(h, i, HASHget4(h, v)); \
72 HASHput4(h, v, i); \
73 } \
74 } while (0)
75#else
76#define HASHget(h,i) \
77 ((h)->width == BUN8 ? HASHget8(h,i) : \
78 (h)->width == BUN4 ? HASHget4(h,i) : \
79 HASHget2(h,i))
80#define HASHput(h,i,v) \
81 do { \
82 switch ((h)->width) { \
83 case 2: \
84 HASHput2(h,i,v); \
85 break; \
86 case 4: \
87 HASHput4(h,i,v); \
88 break; \
89 case 8: \
90 HASHput8(h,i,v); \
91 break; \
92 } \
93 } while (0)
94#define HASHgetlink(h,i) \
95 ((h)->width == BUN8 ? HASHgetlink8(h,i) : \
96 (h)->width == BUN4 ? HASHgetlink4(h,i) : \
97 HASHgetlink2(h,i))
98#define HASHputlink(h,i,v) \
99 do { \
100 switch ((h)->width) { \
101 case 2: \
102 HASHputlink2(h,i,v); \
103 break; \
104 case 4: \
105 HASHputlink4(h,i,v); \
106 break; \
107 case 8: \
108 HASHputlink8(h,i,v); \
109 break; \
110 } \
111 } while (0)
112#define HASHputall(h, i, v) \
113 do { \
114 switch ((h)->width) { \
115 case 2: \
116 HASHputlink2(h, i, HASHget2(h, v)); \
117 HASHput2(h, v, i); \
118 break; \
119 case 4: \
120 HASHputlink4(h, i, HASHget4(h, v)); \
121 HASHput4(h, v, i); \
122 break; \
123 case 8: \
124 HASHputlink8(h, i, HASHget8(h, v)); \
125 HASHput8(h, v, i); \
126 break; \
127 } \
128 } while (0)
129#endif
130
131/* mix_bte(0x80) == 0x80 */
132#define mix_bte(X) ((unsigned int) (unsigned char) (X))
133/* mix_sht(0x8000) == 0x8000 */
134#define mix_sht(X) ((unsigned int) (unsigned short) (X))
135/* mix_int(0x81060038) == 0x80000000 */
136#define mix_int(X) (((unsigned int) (X) >> 7) ^ \
137 ((unsigned int) (X) >> 13) ^ \
138 ((unsigned int) (X) >> 21) ^ \
139 (unsigned int) (X))
140/* mix_lng(0x810600394347424F) == 0x8000000000000000 */
141#define mix_lng(X) (((ulng) (X) >> 7) ^ \
142 ((ulng) (X) >> 13) ^ \
143 ((ulng) (X) >> 21) ^ \
144 ((ulng) (X) >> 31) ^ \
145 ((ulng) (X) >> 38) ^ \
146 ((ulng) (X) >> 46) ^ \
147 ((ulng) (X) >> 56) ^ \
148 (ulng) (X))
149#ifdef HAVE_HGE
150/* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
151 * 0x80000000000000000000000000000000 */
152#define mix_hge(X) (((uhge) (X) >> 7) ^ \
153 ((uhge) (X) >> 13) ^ \
154 ((uhge) (X) >> 21) ^ \
155 ((uhge) (X) >> 31) ^ \
156 ((uhge) (X) >> 38) ^ \
157 ((uhge) (X) >> 46) ^ \
158 ((uhge) (X) >> 56) ^ \
159 ((uhge) (X) >> 65) ^ \
160 ((uhge) (X) >> 70) ^ \
161 ((uhge) (X) >> 78) ^ \
162 ((uhge) (X) >> 85) ^ \
163 ((uhge) (X) >> 90) ^ \
164 ((uhge) (X) >> 98) ^ \
165 ((uhge) (X) >> 107) ^ \
166 ((uhge) (X) >> 116) ^ \
167 (uhge) (X))
168#endif
169#define hash_loc(H,V) hash_any(H,V)
170#define hash_var(H,V) hash_any(H,V)
171#define hash_any(H,V) (ATOMhash((H)->type, (V)) & (H)->mask)
172#define heap_hash_any(hp,H,V) ((hp) && (hp)->hashash ? ((BUN *) (V))[-1] & (H)->mask : hash_any(H,V))
173#define hash_bte(H,V) (assert(((H)->mask & 0xFF) == 0xFF), (BUN) mix_bte(*(const unsigned char*) (V)))
174#define hash_sht(H,V) (assert(((H)->mask & 0xFFFF) == 0xFFFF), (BUN) mix_sht(*(const unsigned short*) (V)))
175#define hash_int(H,V) ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask)
176/* XXX return size_t-sized value for 8-byte oid? */
177#define hash_lng(H,V) ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask)
178#ifdef HAVE_HGE
179#define hash_hge(H,V) ((BUN) mix_hge(*(const uhge *) (V)) & (H)->mask)
180#endif
181#if SIZEOF_OID == SIZEOF_INT
182#define hash_oid(H,V) hash_int(H,V)
183#else
184#define hash_oid(H,V) hash_lng(H,V)
185#endif
186
187#define hash_flt(H,V) hash_int(H,V)
188#define hash_dbl(H,V) hash_lng(H,V)
189
190#define HASHfnd_str(x,y,z) \
191 do { \
192 BUN _i; \
193 (x) = BUN_NONE; \
194 if (BAThash((y).b) == GDK_SUCCEED) { \
195 HASHloop_str((y), (y).b->thash, _i, (z)) { \
196 (x) = _i; \
197 break; \
198 } \
199 } else \
200 goto hashfnd_failed; \
201 } while (0)
202#define HASHfnd(x,y,z) \
203 do { \
204 BUN _i; \
205 (x) = BUN_NONE; \
206 if (BAThash((y).b) == GDK_SUCCEED) { \
207 HASHloop((y), (y).b->thash, _i, (z)) { \
208 (x) = _i; \
209 break; \
210 } \
211 } else \
212 goto hashfnd_failed; \
213 } while (0)
214#define HASHfnd_TYPE(x,y,z,TYPE) \
215 do { \
216 BUN _i; \
217 (x) = BUN_NONE; \
218 if (BAThash((y).b) == GDK_SUCCEED) { \
219 HASHloop_##TYPE((y), (y).b->thash, _i, (z)) { \
220 (x) = _i; \
221 break; \
222 } \
223 } else \
224 goto hashfnd_failed; \
225 } while (0)
226#define HASHfnd_bte(x,y,z) HASHfnd_TYPE(x,y,z,bte)
227#define HASHfnd_sht(x,y,z) HASHfnd_TYPE(x,y,z,sht)
228#define HASHfnd_int(x,y,z) HASHfnd_TYPE(x,y,z,int)
229#define HASHfnd_lng(x,y,z) HASHfnd_TYPE(x,y,z,lng)
230#ifdef HAVE_HGE
231#define HASHfnd_hge(x,y,z) HASHfnd_TYPE(x,y,z,hge)
232#endif
233#define HASHfnd_flt(x,y,z) HASHfnd_TYPE(x,y,z,flt)
234#define HASHfnd_dbl(x,y,z) HASHfnd_TYPE(x,y,z,dbl)
235
236/*
237 * A new entry is added with HASHins using the BAT, the BUN index, and
238 * a pointer to the value to be stored.
239 *
240 * HASHins receives a BAT* param and is adaptive, killing wrongly
241 * configured hash tables. Also, persistent hashes cannot be
242 * maintained, so must be destroyed before this macro is called. */
243#define HASHins(b,i,v) \
244 do { \
245 if ((b)->thash) { \
246 MT_lock_set(&(b)->batIdxLock); \
247 if ((b)->thash == (Hash *) 1 || \
248 ((b)->thash != NULL && \
249 (((size_t *) (b)->thash->heap.base)[0] & (1 << 24) || \
250 ((size_t *) (b)->thash->heap.base)[2] * 2 < b->batCount)) || \
251 (((i) & 1023) == 1023 && HASHgonebad((b), (v)))) { \
252 MT_lock_unset(&(b)->batIdxLock); \
253 HASHdestroy(b); \
254 } else { \
255 BUN _c = HASHprobe((b)->thash, (v)); \
256 HASHputall((b)->thash, (i), _c); \
257 (b)->thash->heap.dirty = true; \
258 MT_lock_unset(&(b)->batIdxLock); \
259 } \
260 } \
261 } while (0)
262
263#endif /* _GDK_SEARCH_H_ */
264