| 1 | /* | 
|---|
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public | 
|---|
| 3 | * License, v. 2.0.  If a copy of the MPL was not distributed with this | 
|---|
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | /* | 
|---|
| 10 | * - Hash Table Creation | 
|---|
| 11 | * The hash indexing scheme for BATs reserves a block of memory to | 
|---|
| 12 | * maintain the hash table and a collision list. A one-to-one mapping | 
|---|
| 13 | * exists between the BAT and the collision list using the BUN | 
|---|
| 14 | * index. NOTE: we alloc the link list as a parallel array to the BUN | 
|---|
| 15 | * array; hence the hash link array has the same size as | 
|---|
| 16 | * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert | 
|---|
| 17 | * and delete to assume that there is hash space iff there is BUN | 
|---|
| 18 | * space. If there is no BUN space, the BATextend now destroys the | 
|---|
| 19 | * hash table. | 
|---|
| 20 | * | 
|---|
| 21 | * The hash mask size is a power of two, so we can do bitwise AND on | 
|---|
| 22 | * the hash (integer) number to quickly find the head of the bucket | 
|---|
| 23 | * chain.  Clearly, the hash mask size is a crucial parameter. If we | 
|---|
| 24 | * know that the column is unique (tkey), we use direct hashing (mask | 
|---|
| 25 | * size ~= BATcount). Otherwise we dynamically determine the mask size | 
|---|
| 26 | * by starting out with mask size = BATcount/64 (just 1.5% of memory | 
|---|
| 27 | * storage overhead). Then we start building the hash table on the | 
|---|
| 28 | * first 25% of the BAT. As we aim for max-collisions list length of | 
|---|
| 29 | * 4, the list on 25% should not exceed length 1. So, if a small | 
|---|
| 30 | * number of collisssions occurs (mask/2) then we abandon the attempt | 
|---|
| 31 | * and restart with a mask that is 4 times larger. This converges | 
|---|
| 32 | * after three cycles to direct hashing. | 
|---|
| 33 | */ | 
|---|
| 34 |  | 
|---|
| 35 | #include "monetdb_config.h" | 
|---|
| 36 | #include "gdk.h" | 
|---|
| 37 | #include "gdk_private.h" | 
|---|
| 38 |  | 
|---|
| 39 | static int | 
|---|
| 40 | HASHwidth(BUN hashsize) | 
|---|
| 41 | { | 
|---|
| 42 | if (hashsize <= (BUN) BUN2_NONE) | 
|---|
| 43 | return BUN2; | 
|---|
| 44 | #if SIZEOF_BUN <= 4 | 
|---|
| 45 | return BUN4; | 
|---|
| 46 | #else | 
|---|
| 47 | if (hashsize <= (BUN) BUN4_NONE) | 
|---|
| 48 | return BUN4; | 
|---|
| 49 | return BUN8; | 
|---|
| 50 | #endif | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | BUN | 
|---|
| 54 | HASHmask(BUN cnt) | 
|---|
| 55 | { | 
|---|
| 56 | BUN m = cnt; | 
|---|
| 57 |  | 
|---|
| 58 | /* find largest power of 2 smaller than or equal to cnt */ | 
|---|
| 59 | m |= m >> 1; | 
|---|
| 60 | m |= m >> 2; | 
|---|
| 61 | m |= m >> 4; | 
|---|
| 62 | m |= m >> 8; | 
|---|
| 63 | m |= m >> 16; | 
|---|
| 64 | #if SIZEOF_BUN == 8 | 
|---|
| 65 | m |= m >> 32; | 
|---|
| 66 | #endif | 
|---|
| 67 | m -= m >> 1; | 
|---|
| 68 |  | 
|---|
| 69 | /* if cnt is more than 1/3 into the gap between m and 2*m, | 
|---|
| 70 | double m */ | 
|---|
| 71 | if (m + m - cnt < 2 * (cnt - m)) | 
|---|
| 72 | m += m; | 
|---|
| 73 | if (m < BATTINY) | 
|---|
| 74 | m = BATTINY; | 
|---|
| 75 | return m; | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | static inline void | 
|---|
| 79 | HASHclear(Hash *h) | 
|---|
| 80 | { | 
|---|
| 81 | /* since BUN2_NONE, BUN4_NONE, BUN8_NONE | 
|---|
| 82 | * are all equal to -1 (~0), i.e., have all bits set, | 
|---|
| 83 | * we can use a simple memset() to clear the Hash, | 
|---|
| 84 | * rather than iteratively assigning individual | 
|---|
| 85 | * BUNi_NONE values in a for-loop | 
|---|
| 86 | */ | 
|---|
| 87 | memset(h->Hash, 0xFF, (h->mask + 1) * h->width); | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | #define HASH_VERSION		2 | 
|---|
| 91 | #define 	6 /* nr of size_t fields in header */ | 
|---|
| 92 |  | 
|---|
| 93 | gdk_return | 
|---|
| 94 | HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count) | 
|---|
| 95 | { | 
|---|
| 96 | Heap *hp = &h->heap; | 
|---|
| 97 | int width = HASHwidth(size); | 
|---|
| 98 |  | 
|---|
| 99 | if (HEAPalloc(hp, mask + size + HASH_HEADER_SIZE * SIZEOF_SIZE_T / width, width) != GDK_SUCCEED) | 
|---|
| 100 | return GDK_FAIL; | 
|---|
| 101 | h->heap.free = (mask + size) * width + HASH_HEADER_SIZE * SIZEOF_SIZE_T; | 
|---|
| 102 | h->lim = size; | 
|---|
| 103 | h->mask = mask - 1; | 
|---|
| 104 | h->width = width; | 
|---|
| 105 | switch (width) { | 
|---|
| 106 | case BUN2: | 
|---|
| 107 | h->nil = (BUN) BUN2_NONE; | 
|---|
| 108 | break; | 
|---|
| 109 | case BUN4: | 
|---|
| 110 | h->nil = (BUN) BUN4_NONE; | 
|---|
| 111 | break; | 
|---|
| 112 | #ifdef BUN8 | 
|---|
| 113 | case BUN8: | 
|---|
| 114 | h->nil = (BUN) BUN8_NONE; | 
|---|
| 115 | break; | 
|---|
| 116 | #endif | 
|---|
| 117 | default: | 
|---|
| 118 | assert(0); | 
|---|
| 119 | } | 
|---|
| 120 | h->Link = h->heap.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; | 
|---|
| 121 | h->Hash = (void *) ((char *) h->Link + h->lim * width); | 
|---|
| 122 | h->type = tpe; | 
|---|
| 123 | HASHclear(h);		/* zero the mask */ | 
|---|
| 124 | ((size_t *) h->heap.base)[0] = HASH_VERSION; | 
|---|
| 125 | ((size_t *) h->heap.base)[1] = size; | 
|---|
| 126 | ((size_t *) h->heap.base)[2] = mask; | 
|---|
| 127 | ((size_t *) h->heap.base)[3] = width; | 
|---|
| 128 | ((size_t *) h->heap.base)[4] = count; | 
|---|
| 129 | ((size_t *) h->heap.base)[5] = 0; /* # filled slots (chain heads) */ | 
|---|
| 130 | ACCELDEBUG fprintf(stderr, "#HASHnew: create hash(size "BUNFMT ", mask "BUNFMT ", width %d, total "BUNFMT " bytes);\n", size, mask, width, (size + mask) * width); | 
|---|
| 131 | return GDK_SUCCEED; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | /* collect HASH statistics for analysis */ | 
|---|
| 135 | static void | 
|---|
| 136 | HASHcollisions(BAT *b, Hash *h) | 
|---|
| 137 | { | 
|---|
| 138 | lng cnt, entries = 0, max = 0; | 
|---|
| 139 | double total = 0; | 
|---|
| 140 | BUN p, i, j, nil; | 
|---|
| 141 |  | 
|---|
| 142 | if (b == 0 || h == 0) | 
|---|
| 143 | return; | 
|---|
| 144 | nil = HASHnil(h); | 
|---|
| 145 | for (i = 0, j = h->mask; i <= j; i++) | 
|---|
| 146 | if ((p = HASHget(h, i)) != nil) { | 
|---|
| 147 | entries++; | 
|---|
| 148 | cnt = 0; | 
|---|
| 149 | for (; p != nil; p = HASHgetlink(h, p)) | 
|---|
| 150 | cnt++; | 
|---|
| 151 | if (cnt > max) | 
|---|
| 152 | max = cnt; | 
|---|
| 153 | total += cnt; | 
|---|
| 154 | } | 
|---|
| 155 | fprintf(stderr, "#BAThash: statistics ("BUNFMT ", entries "LLFMT ", mask "BUNFMT ", max "LLFMT ", avg %2.6f);\n", BATcount(b), entries, h->mask, max, entries == 0 ? 0 : total / entries); | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | /* Return TRUE if we have a hash on the tail, even if we need to read | 
|---|
| 159 | * one from disk. | 
|---|
| 160 | * | 
|---|
| 161 | * Note that the b->thash pointer can be NULL, meaning there is no | 
|---|
| 162 | * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist | 
|---|
| 163 | * on disk; or a valid pointer to a loaded hash.  These values are | 
|---|
| 164 | * maintained here, in the HASHdestroy and HASHfree functions, and in | 
|---|
| 165 | * BBPdiskscan during initialization. */ | 
|---|
| 166 | bool | 
|---|
| 167 | BATcheckhash(BAT *b) | 
|---|
| 168 | { | 
|---|
| 169 | bool ret; | 
|---|
| 170 | lng t = 0; | 
|---|
| 171 |  | 
|---|
| 172 | /* we don't need the lock just to read the value b->thash */ | 
|---|
| 173 | if (b->thash == (Hash *) 1) { | 
|---|
| 174 | /* but when we want to change it, we need the lock */ | 
|---|
| 175 | ACCELDEBUG t = GDKusec(); | 
|---|
| 176 | MT_lock_set(&b->batIdxLock); | 
|---|
| 177 | ACCELDEBUG t = GDKusec() - t; | 
|---|
| 178 | /* if still 1 now that we have the lock, we can update */ | 
|---|
| 179 | if (b->thash == (Hash *) 1) { | 
|---|
| 180 | Hash *h; | 
|---|
| 181 | int fd; | 
|---|
| 182 |  | 
|---|
| 183 | assert(!GDKinmemory()); | 
|---|
| 184 | b->thash = NULL; | 
|---|
| 185 | if ((h = GDKzalloc(sizeof(*h))) != NULL && | 
|---|
| 186 | (h->heap.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) { | 
|---|
| 187 | const char *nme = BBP_physical(b->batCacheid); | 
|---|
| 188 | strconcat_len(h->heap.filename, | 
|---|
| 189 | sizeof(h->heap.filename), | 
|---|
| 190 | nme, ".thash", NULL); | 
|---|
| 191 |  | 
|---|
| 192 | /* check whether a persisted hash can be found */ | 
|---|
| 193 | if ((fd = GDKfdlocate(h->heap.farmid, nme, "rb+", "thash")) >= 0) { | 
|---|
| 194 | size_t hdata[HASH_HEADER_SIZE]; | 
|---|
| 195 | struct stat st; | 
|---|
| 196 |  | 
|---|
| 197 | if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) && | 
|---|
| 198 | hdata[0] == ( | 
|---|
| 199 | #ifdef PERSISTENTHASH | 
|---|
| 200 | ((size_t) 1 << 24) | | 
|---|
| 201 | #endif | 
|---|
| 202 | HASH_VERSION) && | 
|---|
| 203 | hdata[4] == (size_t) BATcount(b) && | 
|---|
| 204 | fstat(fd, &st) == 0 && | 
|---|
| 205 | st.st_size >= (off_t) (h->heap.size = h->heap.free = (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * SIZEOF_SIZE_T) && | 
|---|
| 206 | HEAPload(&h->heap, nme, "thash", false) == GDK_SUCCEED) { | 
|---|
| 207 | h->lim = (BUN) hdata[1]; | 
|---|
| 208 | h->type = ATOMtype(b->ttype); | 
|---|
| 209 | h->mask = (BUN) (hdata[2] - 1); | 
|---|
| 210 | h->width = (int) hdata[3]; | 
|---|
| 211 | switch (h->width) { | 
|---|
| 212 | case BUN2: | 
|---|
| 213 | h->nil = (BUN) BUN2_NONE; | 
|---|
| 214 | break; | 
|---|
| 215 | case BUN4: | 
|---|
| 216 | h->nil = (BUN) BUN4_NONE; | 
|---|
| 217 | break; | 
|---|
| 218 | #ifdef BUN8 | 
|---|
| 219 | case BUN8: | 
|---|
| 220 | h->nil = (BUN) BUN8_NONE; | 
|---|
| 221 | break; | 
|---|
| 222 | #endif | 
|---|
| 223 | default: | 
|---|
| 224 | assert(0); | 
|---|
| 225 | } | 
|---|
| 226 | h->Link = h->heap.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; | 
|---|
| 227 | h->Hash = (void *) ((char *) h->Link + h->lim * h->width); | 
|---|
| 228 | close(fd); | 
|---|
| 229 | h->heap.parentid = b->batCacheid; | 
|---|
| 230 | h->heap.dirty = false; | 
|---|
| 231 | BATsetprop_nolock( | 
|---|
| 232 | b, | 
|---|
| 233 | GDK_HASH_MASK, | 
|---|
| 234 | TYPE_oid, | 
|---|
| 235 | &(oid){h->mask + 1}); | 
|---|
| 236 | b->thash = h; | 
|---|
| 237 | ACCELDEBUG fprintf(stderr, "#BATcheckhash: reusing persisted hash %s\n", BATgetId(b)); | 
|---|
| 238 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 239 | return true; | 
|---|
| 240 | } | 
|---|
| 241 | close(fd); | 
|---|
| 242 | /* unlink unusable file */ | 
|---|
| 243 | GDKunlink(h->heap.farmid, BATDIR, nme, "thash"); | 
|---|
| 244 | } | 
|---|
| 245 | } | 
|---|
| 246 | GDKfree(h); | 
|---|
| 247 | GDKclrerr();	/* we're not currently interested in errors */ | 
|---|
| 248 | } | 
|---|
| 249 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 250 | } | 
|---|
| 251 | ret = b->thash != NULL; | 
|---|
| 252 | ACCELDEBUG if (ret) fprintf(stderr, "#BATcheckhash: already has hash %s, waited "LLFMT " usec\n", BATgetId(b), t); | 
|---|
| 253 | return ret; | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | #ifdef PERSISTENTHASH | 
|---|
| 257 | static void | 
|---|
| 258 | BAThashsync(void *arg) | 
|---|
| 259 | { | 
|---|
| 260 | BAT *b = arg; | 
|---|
| 261 | int fd; | 
|---|
| 262 | lng t0 = 0; | 
|---|
| 263 | const char *failed = " failed"; | 
|---|
| 264 |  | 
|---|
| 265 | ACCELDEBUG t0 = GDKusec(); | 
|---|
| 266 |  | 
|---|
| 267 | /* we could check whether b->thash == NULL before getting the | 
|---|
| 268 | * lock, and only lock if it isn't; however, it's very | 
|---|
| 269 | * unlikely that that is the case, so we don't */ | 
|---|
| 270 | MT_lock_set(&b->batIdxLock); | 
|---|
| 271 | if (b->thash != NULL) { | 
|---|
| 272 | Heap *hp = &b->thash->heap; | 
|---|
| 273 | /* only persist if parent BAT hasn't changed in the | 
|---|
| 274 | * mean time */ | 
|---|
| 275 | if (!b->theap.dirty && | 
|---|
| 276 | ((size_t *) hp->base)[4] == b->batCount && | 
|---|
| 277 | HEAPsave(hp, hp->filename, NULL) == GDK_SUCCEED) { | 
|---|
| 278 | if (hp->storage == STORE_MEM) { | 
|---|
| 279 | if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+", NULL)) >= 0) { | 
|---|
| 280 | ((size_t *) hp->base)[0] |= (size_t) 1 << 24; | 
|---|
| 281 | if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) { | 
|---|
| 282 | failed = ""; /* not failed */ | 
|---|
| 283 | if (!(GDKdebug & NOSYNCMASK)) { | 
|---|
| 284 | #if defined(NATIVE_WIN32) | 
|---|
| 285 | _commit(fd); | 
|---|
| 286 | #elif defined(HAVE_FDATASYNC) | 
|---|
| 287 | fdatasync(fd); | 
|---|
| 288 | #elif defined(HAVE_FSYNC) | 
|---|
| 289 | fsync(fd); | 
|---|
| 290 | #endif | 
|---|
| 291 | } | 
|---|
| 292 | hp->dirty = false; | 
|---|
| 293 | } else { | 
|---|
| 294 | perror( "write hash"); | 
|---|
| 295 | } | 
|---|
| 296 | close(fd); | 
|---|
| 297 | } | 
|---|
| 298 | } else { | 
|---|
| 299 | ((size_t *) hp->base)[0] |= (size_t) 1 << 24; | 
|---|
| 300 | if (!(GDKdebug & NOSYNCMASK) && | 
|---|
| 301 | MT_msync(hp->base, SIZEOF_SIZE_T) < 0) { | 
|---|
| 302 | ((size_t *) hp->base)[0] &= ~((size_t) 1 << 24); | 
|---|
| 303 | } else { | 
|---|
| 304 | hp->dirty = false; | 
|---|
| 305 | failed = ""; /* not failed */ | 
|---|
| 306 | } | 
|---|
| 307 | } | 
|---|
| 308 | ACCELDEBUG fprintf(stderr, "#BAThash: persisting hash %s ("LLFMT " usec)%s\n", hp->filename, GDKusec() - t0, failed); | 
|---|
| 309 | } | 
|---|
| 310 | } | 
|---|
| 311 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 312 | BBPunfix(b->batCacheid); | 
|---|
| 313 | } | 
|---|
| 314 | #endif | 
|---|
| 315 |  | 
|---|
| 316 | #define starthash(TYPE)							\ | 
|---|
| 317 | do {								\ | 
|---|
| 318 | const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0);	\ | 
|---|
| 319 | for (; p < cnt1; p++) {					\ | 
|---|
| 320 | c = hash_##TYPE(h, v + o - b->hseqbase);	\ | 
|---|
| 321 | hget = HASHget(h, c);				\ | 
|---|
| 322 | if (hget == hnil) {				\ | 
|---|
| 323 | if (nslots == maxslots)			\ | 
|---|
| 324 | break; /* mask too full */	\ | 
|---|
| 325 | nslots++;				\ | 
|---|
| 326 | }						\ | 
|---|
| 327 | HASHputlink(h, p, hget);			\ | 
|---|
| 328 | HASHput(h, c, p);				\ | 
|---|
| 329 | o = canditer_next(&ci);				\ | 
|---|
| 330 | }							\ | 
|---|
| 331 | } while (0) | 
|---|
| 332 | #define finishhash(TYPE)						\ | 
|---|
| 333 | do {								\ | 
|---|
| 334 | const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0);	\ | 
|---|
| 335 | for (; p < cnt; p++) {					\ | 
|---|
| 336 | c = hash_##TYPE(h, v + o - b->hseqbase);	\ | 
|---|
| 337 | c = hash_##TYPE(h, v + o - b->hseqbase);	\ | 
|---|
| 338 | hget = HASHget(h, c);				\ | 
|---|
| 339 | nslots += hget == hnil;				\ | 
|---|
| 340 | HASHputlink(h, p, hget);			\ | 
|---|
| 341 | HASHput(h, c, p);				\ | 
|---|
| 342 | o = canditer_next(&ci);				\ | 
|---|
| 343 | }							\ | 
|---|
| 344 | } while (0) | 
|---|
| 345 |  | 
|---|
| 346 | /* | 
|---|
| 347 | * The prime routine for the BAT layer is to create a new hash index. | 
|---|
| 348 | * Its argument is the element type and the maximum number of BUNs be | 
|---|
| 349 | * stored under the hash function. | 
|---|
| 350 | */ | 
|---|
| 351 | Hash * | 
|---|
| 352 | BAThash_impl(BAT *b, BAT *s, const char *ext) | 
|---|
| 353 | { | 
|---|
| 354 | lng t0 = 0; | 
|---|
| 355 | unsigned int tpe = ATOMbasetype(b->ttype); | 
|---|
| 356 | BUN cnt, cnt1; | 
|---|
| 357 | struct canditer ci; | 
|---|
| 358 | BUN mask, maxmask = 0; | 
|---|
| 359 | BUN p, c; | 
|---|
| 360 | oid o; | 
|---|
| 361 | BUN nslots; | 
|---|
| 362 | BUN hnil, hget; | 
|---|
| 363 | Hash *h = NULL; | 
|---|
| 364 | const char *nme = GDKinmemory() ? ":inmemory": BBP_physical(b->batCacheid); | 
|---|
| 365 | BATiter bi = bat_iterator(b); | 
|---|
| 366 | PROPrec *prop; | 
|---|
| 367 |  | 
|---|
| 368 | ACCELDEBUG t0 = GDKusec(); | 
|---|
| 369 | ACCELDEBUG fprintf(stderr, "#BAThash: create hash("ALGOBATFMT ");\n", | 
|---|
| 370 | ALGOBATPAR(b)); | 
|---|
| 371 | if (b->ttype == TYPE_void) { | 
|---|
| 372 | if (is_oid_nil(b->tseqbase)) { | 
|---|
| 373 | ACCELDEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); | 
|---|
| 374 | GDKerror( "BAThash: no hash on void/nil column\n"); | 
|---|
| 375 | return NULL; | 
|---|
| 376 | } | 
|---|
| 377 | ACCELDEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); | 
|---|
| 378 |  | 
|---|
| 379 | tpe = TYPE_void; | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | cnt = canditer_init(&ci, b, s); | 
|---|
| 383 |  | 
|---|
| 384 | if ((h = GDKzalloc(sizeof(*h))) == NULL || | 
|---|
| 385 | (h->heap.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0) { | 
|---|
| 386 | GDKfree(h); | 
|---|
| 387 | return NULL; | 
|---|
| 388 | } | 
|---|
| 389 | h->heap.dirty = true; | 
|---|
| 390 | strconcat_len(h->heap.filename, sizeof(h->heap.filename), | 
|---|
| 391 | nme, ".", ext, NULL); | 
|---|
| 392 |  | 
|---|
| 393 | /* determine hash mask size */ | 
|---|
| 394 | cnt1 = 0; | 
|---|
| 395 | if (ATOMsize(tpe) == 1) { | 
|---|
| 396 | /* perfect hash for one-byte sized atoms */ | 
|---|
| 397 | mask = (1 << 8); | 
|---|
| 398 | } else if (ATOMsize(tpe) == 2) { | 
|---|
| 399 | /* perfect hash for two-byte sized atoms */ | 
|---|
| 400 | mask = (1 << 16); | 
|---|
| 401 | } else if (b->tkey || cnt <= 4096) { | 
|---|
| 402 | /* if key, or if small, don't bother dynamically | 
|---|
| 403 | * adjusting the hash mask */ | 
|---|
| 404 | mask = HASHmask(cnt); | 
|---|
| 405 | } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_MASK)) != NULL) { | 
|---|
| 406 | assert(prop->v.vtype == TYPE_oid); | 
|---|
| 407 | mask = prop->v.val.oval; | 
|---|
| 408 | assert((mask & (mask - 1)) == 0); /* power of two */ | 
|---|
| 409 | maxmask = HASHmask(cnt); | 
|---|
| 410 | if (mask > maxmask) | 
|---|
| 411 | mask = maxmask; | 
|---|
| 412 | } else { | 
|---|
| 413 | /* dynamic hash: we start with HASHmask(cnt)/64, or, | 
|---|
| 414 | * if cnt large enough, HASHmask(cnt)/256; if there | 
|---|
| 415 | * are too many collisions we try HASHmask(cnt)/64, | 
|---|
| 416 | * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally | 
|---|
| 417 | * HASHmask(cnt), but we might skip some of these if | 
|---|
| 418 | * there are many distinct values.  */ | 
|---|
| 419 | maxmask = HASHmask(cnt); | 
|---|
| 420 | mask = maxmask >> 6; | 
|---|
| 421 | while (mask > 4096) | 
|---|
| 422 | mask >>= 2; | 
|---|
| 423 | /* try out on first 25% of b */ | 
|---|
| 424 | cnt1 = cnt >> 2; | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | o = canditer_next(&ci);	/* always one ahead */ | 
|---|
| 428 | for (;;) { | 
|---|
| 429 | BUN maxslots = (mask >> 3) - 1;	/* 1/8 full is too full */ | 
|---|
| 430 |  | 
|---|
| 431 | nslots = 0; | 
|---|
| 432 | p = 0; | 
|---|
| 433 | HEAPfree(&h->heap, true); | 
|---|
| 434 | /* create the hash structures */ | 
|---|
| 435 | if (HASHnew(h, ATOMtype(b->ttype), s ? cnt : BATcapacity(b), | 
|---|
| 436 | mask, cnt) != GDK_SUCCEED) { | 
|---|
| 437 |  | 
|---|
| 438 | GDKfree(h); | 
|---|
| 439 | return NULL; | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | hnil = HASHnil(h); | 
|---|
| 443 |  | 
|---|
| 444 | switch (tpe) { | 
|---|
| 445 | case TYPE_bte: | 
|---|
| 446 | starthash(bte); | 
|---|
| 447 | break; | 
|---|
| 448 | case TYPE_sht: | 
|---|
| 449 | starthash(sht); | 
|---|
| 450 | break; | 
|---|
| 451 | case TYPE_flt: | 
|---|
| 452 | starthash(flt); | 
|---|
| 453 | break; | 
|---|
| 454 | case TYPE_int: | 
|---|
| 455 | starthash(int); | 
|---|
| 456 | break; | 
|---|
| 457 | case TYPE_dbl: | 
|---|
| 458 | starthash(dbl); | 
|---|
| 459 | break; | 
|---|
| 460 | case TYPE_lng: | 
|---|
| 461 | starthash(lng); | 
|---|
| 462 | break; | 
|---|
| 463 | #ifdef HAVE_HGE | 
|---|
| 464 | case TYPE_hge: | 
|---|
| 465 | starthash(hge); | 
|---|
| 466 | break; | 
|---|
| 467 | #endif | 
|---|
| 468 | default: | 
|---|
| 469 | for (; p < cnt1; p++) { | 
|---|
| 470 | const void *restrict v = BUNtail(bi, o - b->hseqbase); | 
|---|
| 471 | c = heap_hash_any(b->tvheap, h, v); | 
|---|
| 472 | hget = HASHget(h, c); | 
|---|
| 473 | if (hget == hnil) { | 
|---|
| 474 | if (nslots == maxslots) | 
|---|
| 475 | break; /* mask too full */ | 
|---|
| 476 | nslots++; | 
|---|
| 477 | } | 
|---|
| 478 | HASHputlink(h, p, hget); | 
|---|
| 479 | HASHput(h, c, p); | 
|---|
| 480 | o = canditer_next(&ci); | 
|---|
| 481 | } | 
|---|
| 482 | break; | 
|---|
| 483 | } | 
|---|
| 484 | ACCELDEBUG if (p < cnt1) | 
|---|
| 485 | fprintf(stderr, "#BAThash(%s): abort starthash with " | 
|---|
| 486 | "mask "BUNFMT " at "BUNFMT "\n", BATgetId(b), | 
|---|
| 487 | mask, p); | 
|---|
| 488 | if (p == cnt1 || mask == maxmask) | 
|---|
| 489 | break; | 
|---|
| 490 | mask <<= 2; | 
|---|
| 491 | /* if we fill up the slots fast (p <= maxslots * 1.2) | 
|---|
| 492 | * increase mask size a bit more quickly */ | 
|---|
| 493 | if (mask < maxmask && p <= maxslots * 1.2) | 
|---|
| 494 | mask <<= 2; | 
|---|
| 495 | canditer_reset(&ci); | 
|---|
| 496 | o = canditer_next(&ci); | 
|---|
| 497 | } | 
|---|
| 498 |  | 
|---|
| 499 | /* finish the hashtable with the current mask */ | 
|---|
| 500 | switch (tpe) { | 
|---|
| 501 | case TYPE_bte: | 
|---|
| 502 | finishhash(bte); | 
|---|
| 503 | break; | 
|---|
| 504 | case TYPE_sht: | 
|---|
| 505 | finishhash(sht); | 
|---|
| 506 | break; | 
|---|
| 507 | case TYPE_int: | 
|---|
| 508 | finishhash(int); | 
|---|
| 509 | break; | 
|---|
| 510 | case TYPE_flt: | 
|---|
| 511 | finishhash(flt); | 
|---|
| 512 | break; | 
|---|
| 513 | case TYPE_dbl: | 
|---|
| 514 | finishhash(dbl); | 
|---|
| 515 | break; | 
|---|
| 516 | case TYPE_lng: | 
|---|
| 517 | finishhash(lng); | 
|---|
| 518 | break; | 
|---|
| 519 | #ifdef HAVE_HGE | 
|---|
| 520 | case TYPE_hge: | 
|---|
| 521 | finishhash(hge); | 
|---|
| 522 | break; | 
|---|
| 523 | #endif | 
|---|
| 524 | default: | 
|---|
| 525 | for (; p < cnt; p++) { | 
|---|
| 526 | const void *restrict v = BUNtail(bi, o - b->hseqbase); | 
|---|
| 527 | c = heap_hash_any(b->tvheap, h, v); | 
|---|
| 528 | hget = HASHget(h, c); | 
|---|
| 529 | nslots += hget == hnil; | 
|---|
| 530 | HASHputlink(h, p, hget); | 
|---|
| 531 | HASHput(h, c, p); | 
|---|
| 532 | o = canditer_next(&ci); | 
|---|
| 533 | } | 
|---|
| 534 | break; | 
|---|
| 535 | } | 
|---|
| 536 | if (s == NULL) | 
|---|
| 537 | BATsetprop_nolock(b, GDK_HASH_MASK, TYPE_oid, &(oid){h->mask + 1}); | 
|---|
| 538 | ((size_t *) h->heap.base)[5] = (size_t) nslots; | 
|---|
| 539 | #ifndef NDEBUG | 
|---|
| 540 | /* clear unused part of Link array */ | 
|---|
| 541 | memset((char *) h->Link + cnt * h->width, 0, (h->lim - cnt) * h->width); | 
|---|
| 542 | #endif | 
|---|
| 543 | h->heap.parentid = b->batCacheid; | 
|---|
| 544 | /* if the number of occupied slots is equal to the bat count, | 
|---|
| 545 | * all values are necessarily distinct */ | 
|---|
| 546 | if (nslots == BATcount(b) && !b->tkey) { | 
|---|
| 547 | b->tkey = true; | 
|---|
| 548 | b->batDirtydesc = true; | 
|---|
| 549 | } | 
|---|
| 550 | ACCELDEBUG { | 
|---|
| 551 | fprintf(stderr, "#BAThash: hash construction "LLFMT " usec\n", GDKusec() - t0); | 
|---|
| 552 | HASHcollisions(b, h); | 
|---|
| 553 | } | 
|---|
| 554 | return h; | 
|---|
| 555 | } | 
|---|
| 556 |  | 
|---|
| 557 | gdk_return | 
|---|
| 558 | BAThash(BAT *b) | 
|---|
| 559 | { | 
|---|
| 560 | assert(b->batCacheid > 0); | 
|---|
| 561 | if (BATcheckhash(b)) { | 
|---|
| 562 | return GDK_SUCCEED; | 
|---|
| 563 | } | 
|---|
| 564 | MT_lock_set(&b->batIdxLock); | 
|---|
| 565 | if (b->thash == NULL) { | 
|---|
| 566 | if ((b->thash = BAThash_impl(b, NULL, "thash")) == NULL) { | 
|---|
| 567 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 568 | return GDK_FAIL; | 
|---|
| 569 | } | 
|---|
| 570 | #ifdef PERSISTENTHASH | 
|---|
| 571 | if (BBP_status(b->batCacheid) & BBPEXISTING && !b->theap.dirty && !GDKinmemory()) { | 
|---|
| 572 | MT_Id tid; | 
|---|
| 573 | BBPfix(b->batCacheid); | 
|---|
| 574 | char name[16]; | 
|---|
| 575 | snprintf(name, sizeof(name), "hashsync%d", b->batCacheid); | 
|---|
| 576 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 577 | if (MT_create_thread(&tid, BAThashsync, b, | 
|---|
| 578 | MT_THR_DETACHED, | 
|---|
| 579 | name) < 0) { | 
|---|
| 580 | /* couldn't start thread: clean up */ | 
|---|
| 581 | BBPunfix(b->batCacheid); | 
|---|
| 582 | } | 
|---|
| 583 | return GDK_SUCCEED; | 
|---|
| 584 | } else | 
|---|
| 585 | ACCELDEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid); | 
|---|
| 586 | #endif | 
|---|
| 587 | } | 
|---|
| 588 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 589 | return GDK_SUCCEED; | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | /* | 
|---|
| 593 | * The entry on which a value hashes can be calculated with the | 
|---|
| 594 | * routine HASHprobe. | 
|---|
| 595 | */ | 
|---|
| 596 | BUN | 
|---|
| 597 | HASHprobe(const Hash *h, const void *v) | 
|---|
| 598 | { | 
|---|
| 599 | switch (ATOMbasetype(h->type)) { | 
|---|
| 600 | case TYPE_bte: | 
|---|
| 601 | return hash_bte(h, v); | 
|---|
| 602 | case TYPE_sht: | 
|---|
| 603 | return hash_sht(h, v); | 
|---|
| 604 | case TYPE_int: | 
|---|
| 605 | case TYPE_flt: | 
|---|
| 606 | return hash_int(h, v); | 
|---|
| 607 | case TYPE_dbl: | 
|---|
| 608 | case TYPE_lng: | 
|---|
| 609 | return hash_lng(h, v); | 
|---|
| 610 | #ifdef HAVE_HGE | 
|---|
| 611 | case TYPE_hge: | 
|---|
| 612 | return hash_hge(h, v); | 
|---|
| 613 | #endif | 
|---|
| 614 | default: | 
|---|
| 615 | return hash_any(h, v); | 
|---|
| 616 | } | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | BUN | 
|---|
| 620 | HASHlist(Hash *h, BUN i) | 
|---|
| 621 | { | 
|---|
| 622 | BUN c = 1; | 
|---|
| 623 | BUN j = HASHget(h, i), nil = HASHnil(h); | 
|---|
| 624 |  | 
|---|
| 625 | if (j == nil) | 
|---|
| 626 | return 1; | 
|---|
| 627 | while ((j = HASHgetlink(h, i)) != nil) { | 
|---|
| 628 | c++; | 
|---|
| 629 | i = j; | 
|---|
| 630 | } | 
|---|
| 631 | return c; | 
|---|
| 632 | } | 
|---|
| 633 |  | 
|---|
| 634 | void | 
|---|
| 635 | HASHdestroy(BAT *b) | 
|---|
| 636 | { | 
|---|
| 637 | if (b && b->thash) { | 
|---|
| 638 | Hash *hs; | 
|---|
| 639 | MT_lock_set(&b->batIdxLock); | 
|---|
| 640 | hs = b->thash; | 
|---|
| 641 | b->thash = NULL; | 
|---|
| 642 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 643 | if (hs == (Hash *) 1) { | 
|---|
| 644 | GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap), | 
|---|
| 645 | BATDIR, | 
|---|
| 646 | BBP_physical(b->batCacheid), | 
|---|
| 647 | "thash"); | 
|---|
| 648 | } else if (hs) { | 
|---|
| 649 | bat p = VIEWtparent(b); | 
|---|
| 650 | BAT *hp = NULL; | 
|---|
| 651 |  | 
|---|
| 652 | if (p) | 
|---|
| 653 | hp = BBP_cache(p); | 
|---|
| 654 |  | 
|---|
| 655 | if (!hp || hs != hp->thash) { | 
|---|
| 656 | ACCELDEBUG if (*(size_t *) hs->heap.base & (1 << 24)) | 
|---|
| 657 | fprintf(stderr, "#HASHdestroy: removing persisted hash %d\n", b->batCacheid); | 
|---|
| 658 | HEAPfree(&hs->heap, true); | 
|---|
| 659 | GDKfree(hs); | 
|---|
| 660 | } | 
|---|
| 661 | } | 
|---|
| 662 | } | 
|---|
| 663 | } | 
|---|
| 664 |  | 
|---|
| 665 | void | 
|---|
| 666 | HASHfree(BAT *b) | 
|---|
| 667 | { | 
|---|
| 668 | if (b && b->thash) { | 
|---|
| 669 | MT_lock_set(&b->batIdxLock); | 
|---|
| 670 | if (b->thash && b->thash != (Hash *) 1) { | 
|---|
| 671 | bool rmheap = GDKinmemory() || b->thash->heap.dirty; | 
|---|
| 672 |  | 
|---|
| 673 | HEAPfree(&b->thash->heap, rmheap); | 
|---|
| 674 | GDKfree(b->thash); | 
|---|
| 675 | b->thash = rmheap ? NULL : (Hash *) 1; | 
|---|
| 676 | } | 
|---|
| 677 | MT_lock_unset(&b->batIdxLock); | 
|---|
| 678 | } | 
|---|
| 679 | } | 
|---|
| 680 |  | 
|---|
| 681 | bool | 
|---|
| 682 | HASHgonebad(BAT *b, const void *v) | 
|---|
| 683 | { | 
|---|
| 684 | Hash *h = b->thash; | 
|---|
| 685 | BATiter bi = bat_iterator(b); | 
|---|
| 686 | BUN cnt, hit; | 
|---|
| 687 |  | 
|---|
| 688 | if (h == NULL) | 
|---|
| 689 | return true;	/* no hash is bad hash? */ | 
|---|
| 690 |  | 
|---|
| 691 | if (h->mask * 2 < BATcount(b)) { | 
|---|
| 692 | int (*cmp) (const void *, const void *) = ATOMcompare(b->ttype); | 
|---|
| 693 | BUN i = HASHget(h, (BUN) HASHprobe(h, v)), nil = HASHnil(h); | 
|---|
| 694 | for (cnt = hit = 1; i != nil; i = HASHgetlink(h, i), cnt++) | 
|---|
| 695 | hit += ((*cmp) (v, BUNtail(bi, (BUN) i)) == 0); | 
|---|
| 696 |  | 
|---|
| 697 | if (cnt / hit > 4) | 
|---|
| 698 | return true;	/* linked list too long */ | 
|---|
| 699 |  | 
|---|
| 700 | /* in this case, linked lists are long but contain the | 
|---|
| 701 | * desired values such hash tables may be useful for | 
|---|
| 702 | * locating all duplicates */ | 
|---|
| 703 | } | 
|---|
| 704 | return false;		/* a-ok */ | 
|---|
| 705 | } | 
|---|
| 706 |  | 
|---|