| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * Implementation for the column imprints index. |
| 11 | * See paper: |
| 12 | * Column Imprints: A Secondary Index Structure, |
| 13 | * L.Sidirourgos and M.Kersten. |
| 14 | */ |
| 15 | |
| 16 | #include "monetdb_config.h" |
| 17 | #include "gdk.h" |
| 18 | #include "gdk_private.h" |
| 19 | #include "gdk_imprints.h" |
| 20 | |
| 21 | #define IMPRINTS_VERSION 2 |
| 22 | #define 4 /* nr of size_t fields in header */ |
| 23 | |
| 24 | #define BINSIZE(B, FUNC, T) do { \ |
| 25 | switch (B) { \ |
| 26 | case 8: FUNC(T,8); break; \ |
| 27 | case 16: FUNC(T,16); break; \ |
| 28 | case 32: FUNC(T,32); break; \ |
| 29 | case 64: FUNC(T,64); break; \ |
| 30 | default: assert(0); break; \ |
| 31 | } \ |
| 32 | } while (0) |
| 33 | |
| 34 | |
| 35 | #define GETBIN(Z,X,B) \ |
| 36 | do { \ |
| 37 | int _i; \ |
| 38 | Z = 0; \ |
| 39 | for (_i = 1; _i < B; _i++) \ |
| 40 | Z += ((X) >= bins[_i]); \ |
| 41 | } while (0) |
| 42 | |
| 43 | |
| 44 | #define IMPS_CREATE(TYPE,B) \ |
| 45 | do { \ |
| 46 | uint##B##_t mask, prvmask; \ |
| 47 | uint##B##_t *restrict im = (uint##B##_t *) imps; \ |
| 48 | const TYPE *restrict col = (TYPE *) Tloc(b, 0); \ |
| 49 | const TYPE *restrict bins = (TYPE *) inbins; \ |
| 50 | const BUN page = IMPS_PAGE / sizeof(TYPE); \ |
| 51 | prvmask = 0; \ |
| 52 | for (i = 0; i < b->batCount; ) { \ |
| 53 | const BUN lim = MIN(i + page, b->batCount); \ |
| 54 | /* new mask */ \ |
| 55 | mask = 0; \ |
| 56 | /* build mask for all BUNs in one PAGE */ \ |
| 57 | for ( ; i < lim; i++) { \ |
| 58 | register const TYPE val = col[i]; \ |
| 59 | GETBIN(bin,val,B); \ |
| 60 | mask = IMPSsetBit(B,mask,bin); \ |
| 61 | if (!is_##TYPE##_nil(val)) { /* do not count nils */ \ |
| 62 | if (!cnt_bins[bin]++) { \ |
| 63 | min_bins[bin] = max_bins[bin] = i; \ |
| 64 | } else { \ |
| 65 | if (val < col[min_bins[bin]]) \ |
| 66 | min_bins[bin] = i; \ |
| 67 | if (val > col[max_bins[bin]]) \ |
| 68 | max_bins[bin] = i; \ |
| 69 | } \ |
| 70 | } \ |
| 71 | } \ |
| 72 | /* same mask as previous and enough count to add */ \ |
| 73 | if ((prvmask == mask) && (dcnt > 0) && \ |
| 74 | (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \ |
| 75 | /* not a repeat header */ \ |
| 76 | if (!dict[dcnt-1].repeat) { \ |
| 77 | /* if compressed */ \ |
| 78 | if (dict[dcnt-1].cnt > 1) { \ |
| 79 | /* uncompress last */ \ |
| 80 | dict[dcnt-1].cnt--; \ |
| 81 | /* new header */ \ |
| 82 | dict[dcnt].cnt = 1; \ |
| 83 | dict[dcnt].flags = 0; \ |
| 84 | dcnt++; \ |
| 85 | } \ |
| 86 | /* set repeat */ \ |
| 87 | dict[dcnt-1].repeat = 1; \ |
| 88 | } \ |
| 89 | /* increase cnt */ \ |
| 90 | dict[dcnt-1].cnt++; \ |
| 91 | } else { /* new mask (or run out of header count) */ \ |
| 92 | prvmask=mask; \ |
| 93 | im[icnt] = mask; \ |
| 94 | icnt++; \ |
| 95 | if ((dcnt > 0) && !(dict[dcnt-1].repeat) && \ |
| 96 | (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \ |
| 97 | dict[dcnt-1].cnt++; \ |
| 98 | } else { \ |
| 99 | dict[dcnt].cnt = 1; \ |
| 100 | dict[dcnt].repeat = 0; \ |
| 101 | dict[dcnt].flags = 0; \ |
| 102 | dcnt++; \ |
| 103 | } \ |
| 104 | } \ |
| 105 | } \ |
| 106 | } while (0) |
| 107 | |
| 108 | static void |
| 109 | imprints_create(BAT *b, void *inbins, BUN *stats, bte bits, |
| 110 | void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt) |
| 111 | { |
| 112 | BUN i; |
| 113 | BUN dcnt, icnt; |
| 114 | BUN *restrict min_bins = stats; |
| 115 | BUN *restrict max_bins = min_bins + 64; |
| 116 | BUN *restrict cnt_bins = max_bins + 64; |
| 117 | int bin = 0; |
| 118 | dcnt = icnt = 0; |
| 119 | #ifndef NDEBUG |
| 120 | memset(min_bins, 0, 64 * SIZEOF_BUN); |
| 121 | memset(max_bins, 0, 64 * SIZEOF_BUN); |
| 122 | #endif |
| 123 | memset(cnt_bins, 0, 64 * SIZEOF_BUN); |
| 124 | |
| 125 | switch (ATOMbasetype(b->ttype)) { |
| 126 | case TYPE_bte: |
| 127 | BINSIZE(bits, IMPS_CREATE, bte); |
| 128 | break; |
| 129 | case TYPE_sht: |
| 130 | BINSIZE(bits, IMPS_CREATE, sht); |
| 131 | break; |
| 132 | case TYPE_int: |
| 133 | BINSIZE(bits, IMPS_CREATE, int); |
| 134 | break; |
| 135 | case TYPE_lng: |
| 136 | BINSIZE(bits, IMPS_CREATE, lng); |
| 137 | break; |
| 138 | #ifdef HAVE_HGE |
| 139 | case TYPE_hge: |
| 140 | BINSIZE(bits, IMPS_CREATE, hge); |
| 141 | break; |
| 142 | #endif |
| 143 | case TYPE_flt: |
| 144 | BINSIZE(bits, IMPS_CREATE, flt); |
| 145 | break; |
| 146 | case TYPE_dbl: |
| 147 | BINSIZE(bits, IMPS_CREATE, dbl); |
| 148 | break; |
| 149 | default: |
| 150 | /* should never reach here */ |
| 151 | assert(0); |
| 152 | } |
| 153 | |
| 154 | *dictcnt = dcnt; |
| 155 | *impcnt = icnt; |
| 156 | } |
| 157 | |
| 158 | #ifdef NDEBUG |
| 159 | #define CLRMEM() ((void) 0) |
| 160 | #else |
| 161 | #define CLRMEM() while (k < 64) h[k++] = 0 |
| 162 | #endif |
| 163 | |
| 164 | #define FILL_HISTOGRAM(TYPE) \ |
| 165 | do { \ |
| 166 | BUN k; \ |
| 167 | TYPE *restrict s = (TYPE *) Tloc(s4, 0); \ |
| 168 | TYPE *restrict h = imprints->bins; \ |
| 169 | if (cnt < 64-1) { \ |
| 170 | TYPE max = GDK_##TYPE##_max; \ |
| 171 | for (k = 0; k < cnt; k++) \ |
| 172 | h[k] = s[k]; \ |
| 173 | while (k < (BUN) imprints->bits) \ |
| 174 | h[k++] = max; \ |
| 175 | CLRMEM(); \ |
| 176 | } else { \ |
| 177 | double y, ystep = (double) cnt / (64 - 1); \ |
| 178 | for (k = 0, y = 0; (BUN) y < cnt; y += ystep, k++) \ |
| 179 | h[k] = s[(BUN) y]; \ |
| 180 | if (k == 64 - 1) /* there is one left */ \ |
| 181 | h[k] = s[cnt - 1]; \ |
| 182 | } \ |
| 183 | } while (0) |
| 184 | |
| 185 | /* Check whether we have imprints on b (and return true if we do). It |
| 186 | * may be that the imprints were made persistent, but we hadn't seen |
| 187 | * that yet, so check the file system. This also returns true if b is |
| 188 | * a view and there are imprints on b's parent. |
| 189 | * |
| 190 | * Note that the b->timprints pointer can be NULL, meaning there are |
| 191 | * no imprints; (Imprints *) 1, meaning there are no imprints loaded, |
| 192 | * but they may exist on disk; or a valid pointer to loaded imprints. |
| 193 | * These values are maintained here, in the IMPSdestroy and IMPSfree |
| 194 | * functions, and in BBPdiskscan during initialization. */ |
| 195 | bool |
| 196 | BATcheckimprints(BAT *b) |
| 197 | { |
| 198 | bool ret; |
| 199 | |
| 200 | if (VIEWtparent(b)) { |
| 201 | assert(b->timprints == NULL); |
| 202 | b = BBPdescriptor(VIEWtparent(b)); |
| 203 | } |
| 204 | |
| 205 | if (b->timprints == (Imprints *) 1) { |
| 206 | MT_lock_set(&b->batIdxLock); |
| 207 | if (b->timprints == (Imprints *) 1) { |
| 208 | Imprints *imprints; |
| 209 | const char *nme = BBP_physical(b->batCacheid); |
| 210 | |
| 211 | assert(!GDKinmemory()); |
| 212 | b->timprints = NULL; |
| 213 | if ((imprints = GDKzalloc(sizeof(Imprints))) != NULL && |
| 214 | (imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, imprintsheap)) >= 0) { |
| 215 | int fd; |
| 216 | |
| 217 | strconcat_len(imprints->imprints.filename, |
| 218 | sizeof(imprints->imprints.filename), |
| 219 | nme, ".timprints" , NULL); |
| 220 | /* check whether a persisted imprints index |
| 221 | * can be found */ |
| 222 | if ((fd = GDKfdlocate(imprints->imprints.farmid, nme, "rb" , "timprints" )) >= 0) { |
| 223 | size_t hdata[4]; |
| 224 | struct stat st; |
| 225 | size_t pages; |
| 226 | |
| 227 | pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE; |
| 228 | if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) && |
| 229 | hdata[0] & ((size_t) 1 << 16) && |
| 230 | ((hdata[0] & 0xFF00) >> 8) == IMPRINTS_VERSION && |
| 231 | hdata[3] == (size_t) BATcount(b) && |
| 232 | fstat(fd, &st) == 0 && |
| 233 | st.st_size >= (off_t) (imprints->imprints.size = |
| 234 | imprints->imprints.free = |
| 235 | 64 * b->twidth + |
| 236 | 64 * 2 * SIZEOF_OID + |
| 237 | 64 * SIZEOF_BUN + |
| 238 | pages * ((bte) hdata[0] / 8) + |
| 239 | hdata[2] * sizeof(cchdc_t) + |
| 240 | sizeof(uint64_t) /* padding for alignment */ |
| 241 | + 4 * SIZEOF_SIZE_T) && |
| 242 | HEAPload(&imprints->imprints, nme, "timprints" , false) == GDK_SUCCEED) { |
| 243 | /* usable */ |
| 244 | imprints->bits = (bte) (hdata[0] & 0xFF); |
| 245 | imprints->impcnt = (BUN) hdata[1]; |
| 246 | imprints->dictcnt = (BUN) hdata[2]; |
| 247 | imprints->bins = imprints->imprints.base + 4 * SIZEOF_SIZE_T; |
| 248 | imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth); |
| 249 | imprints->imps = (void *) (imprints->stats + 64 * 3); |
| 250 | imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1)); |
| 251 | close(fd); |
| 252 | imprints->imprints.parentid = b->batCacheid; |
| 253 | b->timprints = imprints; |
| 254 | ACCELDEBUG fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): reusing persisted imprints\n" , ALGOBATPAR(b)); |
| 255 | MT_lock_unset(&b->batIdxLock); |
| 256 | |
| 257 | return true; |
| 258 | } |
| 259 | close(fd); |
| 260 | /* unlink unusable file */ |
| 261 | GDKunlink(imprints->imprints.farmid, BATDIR, nme, "timprints" ); |
| 262 | } |
| 263 | } |
| 264 | GDKfree(imprints); |
| 265 | GDKclrerr(); /* we're not currently interested in errors */ |
| 266 | } |
| 267 | MT_lock_unset(&b->batIdxLock); |
| 268 | } |
| 269 | ret = b->timprints != NULL; |
| 270 | ACCELDEBUG if (ret) fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): already has imprints\n" , ALGOBATPAR(b)); |
| 271 | return ret; |
| 272 | } |
| 273 | |
| 274 | static void |
| 275 | BATimpsync(void *arg) |
| 276 | { |
| 277 | BAT *b = arg; |
| 278 | Imprints *imprints; |
| 279 | int fd; |
| 280 | lng t0 = 0; |
| 281 | const char *failed = " failed" ; |
| 282 | |
| 283 | ACCELDEBUG t0 = GDKusec(); |
| 284 | |
| 285 | MT_lock_set(&b->batIdxLock); |
| 286 | if ((imprints = b->timprints) != NULL) { |
| 287 | Heap *hp = &imprints->imprints; |
| 288 | if (HEAPsave(hp, hp->filename, NULL) == GDK_SUCCEED) { |
| 289 | if (hp->storage == STORE_MEM) { |
| 290 | if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+" , NULL)) >= 0) { |
| 291 | /* add version number */ |
| 292 | ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8; |
| 293 | /* sync-on-disk checked bit */ |
| 294 | ((size_t *) hp->base)[0] |= (size_t) 1 << 16; |
| 295 | if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) { |
| 296 | failed = "" ; /* not failed */ |
| 297 | if (!(GDKdebug & NOSYNCMASK)) { |
| 298 | #if defined(NATIVE_WIN32) |
| 299 | _commit(fd); |
| 300 | #elif defined(HAVE_FDATASYNC) |
| 301 | fdatasync(fd); |
| 302 | #elif defined(HAVE_FSYNC) |
| 303 | fsync(fd); |
| 304 | #endif |
| 305 | } |
| 306 | hp->dirty = false; |
| 307 | } else { |
| 308 | failed = " write failed" ; |
| 309 | perror("write hash" ); |
| 310 | } |
| 311 | close(fd); |
| 312 | } |
| 313 | } else { |
| 314 | /* add version number */ |
| 315 | ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8; |
| 316 | /* sync-on-disk checked bit */ |
| 317 | ((size_t *) hp->base)[0] |= (size_t) 1 << 16; |
| 318 | if (!(GDKdebug & NOSYNCMASK) && |
| 319 | MT_msync(hp->base, SIZEOF_SIZE_T) < 0) { |
| 320 | failed = " sync failed" ; |
| 321 | ((size_t *) hp->base)[0] &= ~((size_t) IMPRINTS_VERSION << 8); |
| 322 | } else { |
| 323 | hp->dirty = false; |
| 324 | failed = "" ; /* not failed */ |
| 325 | } |
| 326 | } |
| 327 | ACCELDEBUG fprintf(stderr, "#BATimpsync(" ALGOBATFMT "): " |
| 328 | "imprints persisted " |
| 329 | "(" LLFMT " usec)%s\n" , ALGOBATPAR(b), |
| 330 | GDKusec() - t0, failed); |
| 331 | } |
| 332 | } |
| 333 | MT_lock_unset(&b->batIdxLock); |
| 334 | BBPunfix(b->batCacheid); |
| 335 | } |
| 336 | |
| 337 | gdk_return |
| 338 | BATimprints(BAT *b) |
| 339 | { |
| 340 | BAT *s1 = NULL, *s2 = NULL, *s3 = NULL, *s4 = NULL; |
| 341 | Imprints *imprints; |
| 342 | lng t0 = 0; |
| 343 | |
| 344 | /* we only create imprints for types that look like types we know */ |
| 345 | switch (ATOMbasetype(b->ttype)) { |
| 346 | case TYPE_bte: |
| 347 | case TYPE_sht: |
| 348 | case TYPE_int: |
| 349 | case TYPE_lng: |
| 350 | #ifdef HAVE_HGE |
| 351 | case TYPE_hge: |
| 352 | #endif |
| 353 | case TYPE_flt: |
| 354 | case TYPE_dbl: |
| 355 | break; |
| 356 | default: /* type not supported */ |
| 357 | /* doesn't look enough like base type: do nothing */ |
| 358 | GDKerror("BATimprints: unsupported type\n" ); |
| 359 | return GDK_FAIL; |
| 360 | } |
| 361 | |
| 362 | BATcheck(b, "BATimprints" , GDK_FAIL); |
| 363 | |
| 364 | if (BATcheckimprints(b)) |
| 365 | return GDK_SUCCEED; |
| 366 | |
| 367 | if (VIEWtparent(b)) { |
| 368 | /* views always keep null pointer and need to obtain |
| 369 | * the latest imprint from the parent at query time */ |
| 370 | s2 = b; /* remember for ACCELDEBUG print */ |
| 371 | b = BBPdescriptor(VIEWtparent(b)); |
| 372 | assert(b); |
| 373 | if (BATcheckimprints(b)) |
| 374 | return GDK_SUCCEED; |
| 375 | } |
| 376 | MT_lock_set(&b->batIdxLock); |
| 377 | ACCELDEBUG t0 = GDKusec(); |
| 378 | if (b->timprints == NULL) { |
| 379 | BUN cnt; |
| 380 | const char *nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid); |
| 381 | size_t pages; |
| 382 | |
| 383 | MT_lock_unset(&b->batIdxLock); |
| 384 | |
| 385 | ACCELDEBUG { |
| 386 | if (s2) |
| 387 | fprintf(stderr, "#BATimprints(b=" ALGOBATFMT |
| 388 | "): creating imprints on parent " |
| 389 | ALGOBATFMT "\n" , |
| 390 | ALGOBATPAR(s2), ALGOBATPAR(b)); |
| 391 | else |
| 392 | fprintf(stderr, "#BATimprints(b=" ALGOBATFMT |
| 393 | "): creating imprints\n" , |
| 394 | ALGOBATPAR(b)); |
| 395 | } |
| 396 | s2 = NULL; |
| 397 | |
| 398 | imprints = GDKzalloc(sizeof(Imprints)); |
| 399 | if (imprints == NULL) { |
| 400 | MT_lock_unset(&b->batIdxLock); |
| 401 | return GDK_FAIL; |
| 402 | } |
| 403 | strconcat_len(imprints->imprints.filename, |
| 404 | sizeof(imprints->imprints.filename), |
| 405 | nme, ".timprints" , NULL); |
| 406 | pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE; |
| 407 | imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, |
| 408 | imprintsheap); |
| 409 | |
| 410 | #define SMP_SIZE 2048 |
| 411 | s1 = BATsample(b, SMP_SIZE); |
| 412 | if (s1 == NULL) { |
| 413 | MT_lock_unset(&b->batIdxLock); |
| 414 | GDKfree(imprints); |
| 415 | return GDK_FAIL; |
| 416 | } |
| 417 | s2 = BATunique(b, s1); |
| 418 | if (s2 == NULL) { |
| 419 | MT_lock_unset(&b->batIdxLock); |
| 420 | BBPunfix(s1->batCacheid); |
| 421 | GDKfree(imprints); |
| 422 | return GDK_FAIL; |
| 423 | } |
| 424 | s3 = BATproject(s2, b); |
| 425 | if (s3 == NULL) { |
| 426 | MT_lock_unset(&b->batIdxLock); |
| 427 | BBPunfix(s1->batCacheid); |
| 428 | BBPunfix(s2->batCacheid); |
| 429 | GDKfree(imprints); |
| 430 | return GDK_FAIL; |
| 431 | } |
| 432 | s3->tkey = true; /* we know is unique on tail now */ |
| 433 | if (BATsort(&s4, NULL, NULL, s3, NULL, NULL, false, false, false) != GDK_SUCCEED) { |
| 434 | MT_lock_unset(&b->batIdxLock); |
| 435 | BBPunfix(s1->batCacheid); |
| 436 | BBPunfix(s2->batCacheid); |
| 437 | BBPunfix(s3->batCacheid); |
| 438 | GDKfree(imprints); |
| 439 | return GDK_FAIL; |
| 440 | } |
| 441 | /* s4 now is ordered and unique on tail */ |
| 442 | assert(s4->tkey && s4->tsorted); |
| 443 | cnt = BATcount(s4); |
| 444 | imprints->bits = 64; |
| 445 | if (cnt <= 32) |
| 446 | imprints->bits = 32; |
| 447 | if (cnt <= 16) |
| 448 | imprints->bits = 16; |
| 449 | if (cnt <= 8) |
| 450 | imprints->bits = 8; |
| 451 | |
| 452 | /* The heap we create here consists of four parts: |
| 453 | * bins, max 64 entries with bin boundaries, domain of b; |
| 454 | * stats, min/max/count for each bin, min/max are oid, and count BUN; |
| 455 | * imps, max one entry per "page", entry is "bits" wide; |
| 456 | * dict, max two entries per three "pages". |
| 457 | * In addition, we add some housekeeping entries at |
| 458 | * the start so that we can determine whether we can |
| 459 | * trust the imprints when encountered on startup (including |
| 460 | * a version number -- CURRENT VERSION is 2). */ |
| 461 | MT_lock_set(&b->batIdxLock); |
| 462 | if (b->timprints != NULL || |
| 463 | HEAPalloc(&imprints->imprints, |
| 464 | IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T + /* extra info */ |
| 465 | 64 * b->twidth + /* bins */ |
| 466 | 64 * 2 * SIZEOF_OID + /* {min,max}_bins */ |
| 467 | 64 * SIZEOF_BUN + /* cnt_bins */ |
| 468 | pages * (imprints->bits / 8) + /* imps */ |
| 469 | sizeof(uint64_t) + /* padding for alignment */ |
| 470 | pages * sizeof(cchdc_t), /* dict */ |
| 471 | 1) != GDK_SUCCEED) { |
| 472 | MT_lock_unset(&b->batIdxLock); |
| 473 | GDKfree(imprints); |
| 474 | BBPunfix(s1->batCacheid); |
| 475 | BBPunfix(s2->batCacheid); |
| 476 | BBPunfix(s3->batCacheid); |
| 477 | BBPunfix(s4->batCacheid); |
| 478 | if (b->timprints != NULL) |
| 479 | return GDK_SUCCEED; /* we were beaten to it */ |
| 480 | GDKerror("#BATimprints: memory allocation error" ); |
| 481 | return GDK_FAIL; |
| 482 | } |
| 483 | imprints->bins = imprints->imprints.base + IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T; |
| 484 | imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth); |
| 485 | imprints->imps = (void *) (imprints->stats + 64 * 3); |
| 486 | imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1)); |
| 487 | |
| 488 | switch (ATOMbasetype(b->ttype)) { |
| 489 | case TYPE_bte: |
| 490 | FILL_HISTOGRAM(bte); |
| 491 | break; |
| 492 | case TYPE_sht: |
| 493 | FILL_HISTOGRAM(sht); |
| 494 | break; |
| 495 | case TYPE_int: |
| 496 | FILL_HISTOGRAM(int); |
| 497 | break; |
| 498 | case TYPE_lng: |
| 499 | FILL_HISTOGRAM(lng); |
| 500 | break; |
| 501 | #ifdef HAVE_HGE |
| 502 | case TYPE_hge: |
| 503 | FILL_HISTOGRAM(hge); |
| 504 | break; |
| 505 | #endif |
| 506 | case TYPE_flt: |
| 507 | FILL_HISTOGRAM(flt); |
| 508 | break; |
| 509 | case TYPE_dbl: |
| 510 | FILL_HISTOGRAM(dbl); |
| 511 | break; |
| 512 | default: |
| 513 | /* should never reach here */ |
| 514 | assert(0); |
| 515 | } |
| 516 | |
| 517 | imprints_create(b, |
| 518 | imprints->bins, |
| 519 | imprints->stats, |
| 520 | imprints->bits, |
| 521 | imprints->imps, |
| 522 | &imprints->impcnt, |
| 523 | imprints->dict, |
| 524 | &imprints->dictcnt); |
| 525 | assert(imprints->impcnt <= pages); |
| 526 | assert(imprints->dictcnt <= pages); |
| 527 | #ifndef NDEBUG |
| 528 | memset((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8), 0, (char *) imprints->dict - ((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8))); |
| 529 | #endif |
| 530 | imprints->imprints.free = (size_t) ((char *) ((cchdc_t *) imprints->dict + imprints->dictcnt) - imprints->imprints.base); |
| 531 | /* add info to heap for when they become persistent */ |
| 532 | ((size_t *) imprints->imprints.base)[0] = (size_t) (imprints->bits); |
| 533 | ((size_t *) imprints->imprints.base)[1] = (size_t) imprints->impcnt; |
| 534 | ((size_t *) imprints->imprints.base)[2] = (size_t) imprints->dictcnt; |
| 535 | ((size_t *) imprints->imprints.base)[3] = (size_t) BATcount(b); |
| 536 | imprints->imprints.parentid = b->batCacheid; |
| 537 | b->timprints = imprints; |
| 538 | if (BBP_status(b->batCacheid) & BBPEXISTING && |
| 539 | !b->theap.dirty && |
| 540 | !GDKinmemory()) { |
| 541 | MT_Id tid; |
| 542 | BBPfix(b->batCacheid); |
| 543 | char name[16]; |
| 544 | snprintf(name, sizeof(name), "impssync%d" , b->batCacheid); |
| 545 | if (MT_create_thread(&tid, BATimpsync, b, |
| 546 | MT_THR_DETACHED, name) < 0) |
| 547 | BBPunfix(b->batCacheid); |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | ACCELDEBUG fprintf(stderr, "#BATimprints(%s): imprints construction " LLFMT " usec\n" , BATgetId(b), GDKusec() - t0); |
| 552 | MT_lock_unset(&b->batIdxLock); |
| 553 | |
| 554 | /* BBPUnfix tries to get the imprints lock which might lead to |
| 555 | * a deadlock if those were unfixed earlier */ |
| 556 | if (s1) { |
| 557 | BBPunfix(s1->batCacheid); |
| 558 | BBPunfix(s2->batCacheid); |
| 559 | BBPunfix(s3->batCacheid); |
| 560 | BBPunfix(s4->batCacheid); |
| 561 | } |
| 562 | return GDK_SUCCEED; |
| 563 | } |
| 564 | |
| 565 | #define getbin(TYPE,B) \ |
| 566 | do { \ |
| 567 | register const TYPE val = * (TYPE *) v; \ |
| 568 | GETBIN(ret,val,B); \ |
| 569 | } while (0) |
| 570 | |
| 571 | int |
| 572 | IMPSgetbin(int tpe, bte bits, const char *restrict inbins, const void *restrict v) |
| 573 | { |
| 574 | int ret = -1; |
| 575 | |
| 576 | switch (tpe) { |
| 577 | case TYPE_bte: |
| 578 | { |
| 579 | const bte *restrict bins = (bte *) inbins; |
| 580 | BINSIZE(bits, getbin, bte); |
| 581 | } |
| 582 | break; |
| 583 | case TYPE_sht: |
| 584 | { |
| 585 | const sht *restrict bins = (sht *) inbins; |
| 586 | BINSIZE(bits, getbin, sht); |
| 587 | } |
| 588 | break; |
| 589 | case TYPE_int: |
| 590 | { |
| 591 | const int *restrict bins = (int *) inbins; |
| 592 | BINSIZE(bits, getbin, int); |
| 593 | } |
| 594 | break; |
| 595 | case TYPE_lng: |
| 596 | { |
| 597 | const lng *restrict bins = (lng *) inbins; |
| 598 | BINSIZE(bits, getbin, lng); |
| 599 | } |
| 600 | break; |
| 601 | #ifdef HAVE_HGE |
| 602 | case TYPE_hge: |
| 603 | { |
| 604 | const hge *restrict bins = (hge *) inbins; |
| 605 | BINSIZE(bits, getbin, hge); |
| 606 | } |
| 607 | break; |
| 608 | #endif |
| 609 | case TYPE_flt: |
| 610 | { |
| 611 | const flt *restrict bins = (flt *) inbins; |
| 612 | BINSIZE(bits, getbin, flt); |
| 613 | } |
| 614 | break; |
| 615 | case TYPE_dbl: |
| 616 | { |
| 617 | const dbl *restrict bins = (dbl *) inbins; |
| 618 | BINSIZE(bits, getbin, dbl); |
| 619 | } |
| 620 | break; |
| 621 | default: |
| 622 | assert(0); |
| 623 | (void) inbins; |
| 624 | break; |
| 625 | } |
| 626 | return ret; |
| 627 | } |
| 628 | |
| 629 | lng |
| 630 | IMPSimprintsize(BAT *b) |
| 631 | { |
| 632 | lng sz = 0; |
| 633 | if (b->timprints && b->timprints != (Imprints *) 1) { |
| 634 | sz = b->timprints->impcnt * b->timprints->bits / 8; |
| 635 | sz += b->timprints->dictcnt * sizeof(cchdc_t); |
| 636 | } |
| 637 | return sz; |
| 638 | } |
| 639 | |
| 640 | static void |
| 641 | IMPSremove(BAT *b) |
| 642 | { |
| 643 | Imprints *imprints; |
| 644 | |
| 645 | assert(b->timprints != NULL); |
| 646 | assert(!VIEWtparent(b)); |
| 647 | |
| 648 | if ((imprints = b->timprints) != NULL) { |
| 649 | b->timprints = NULL; |
| 650 | |
| 651 | if ((GDKdebug & ALGOMASK) && |
| 652 | * (size_t *) imprints->imprints.base & (1 << 16)) |
| 653 | fprintf(stderr, "#IMPSremove: removing persisted imprints\n" ); |
| 654 | if (HEAPdelete(&imprints->imprints, BBP_physical(b->batCacheid), |
| 655 | "timprints" ) != GDK_SUCCEED) |
| 656 | IODEBUG fprintf(stderr, "#IMPSremove(%s): imprints heap\n" , BATgetId(b)); |
| 657 | |
| 658 | GDKfree(imprints); |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | void |
| 663 | IMPSdestroy(BAT *b) |
| 664 | { |
| 665 | if (b && b->timprints) { |
| 666 | MT_lock_set(&b->batIdxLock); |
| 667 | if (b->timprints == (Imprints *) 1) { |
| 668 | b->timprints = NULL; |
| 669 | GDKunlink(BBPselectfarm(b->batRole, b->ttype, imprintsheap), |
| 670 | BATDIR, |
| 671 | BBP_physical(b->batCacheid), |
| 672 | "timprints" ); |
| 673 | } else if (b->timprints != NULL && !VIEWtparent(b)) |
| 674 | IMPSremove(b); |
| 675 | MT_lock_unset(&b->batIdxLock); |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | /* free the memory associated with the imprints, do not remove the |
| 680 | * heap files; indicate that imprints are available on disk by setting |
| 681 | * the imprints pointer to 1 */ |
| 682 | void |
| 683 | IMPSfree(BAT *b) |
| 684 | { |
| 685 | Imprints *imprints; |
| 686 | |
| 687 | if (b && b->timprints) { |
| 688 | assert(b->batCacheid > 0); |
| 689 | MT_lock_set(&b->batIdxLock); |
| 690 | imprints = b->timprints; |
| 691 | if (imprints != NULL && imprints != (Imprints *) 1) { |
| 692 | if (GDKinmemory()) { |
| 693 | b->timprints = NULL; |
| 694 | if (!VIEWtparent(b)) { |
| 695 | HEAPfree(&imprints->imprints, true); |
| 696 | GDKfree(imprints); |
| 697 | } |
| 698 | } else { |
| 699 | b->timprints = (Imprints *) 1; |
| 700 | if (!VIEWtparent(b)) { |
| 701 | HEAPfree(&imprints->imprints, false); |
| 702 | GDKfree(imprints); |
| 703 | } |
| 704 | } |
| 705 | } |
| 706 | MT_lock_unset(&b->batIdxLock); |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | #ifndef NDEBUG |
| 711 | /* never called, useful for debugging */ |
| 712 | |
| 713 | #define IMPSPRNTMASK(T, B) \ |
| 714 | do { \ |
| 715 | uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \ |
| 716 | for (j = 0; j < imprints->bits; j++) \ |
| 717 | s[j] = IMPSisSet(B, im[icnt], j) ? 'x' : '.'; \ |
| 718 | s[j] = '\0'; \ |
| 719 | } while (0) |
| 720 | |
| 721 | void |
| 722 | IMPSprint(BAT *b) |
| 723 | { |
| 724 | Imprints *imprints; |
| 725 | cchdc_t *restrict d; |
| 726 | char s[65]; /* max number of bits + 1 */ |
| 727 | BUN icnt, dcnt, l, pages; |
| 728 | BUN *restrict min_bins, *restrict max_bins; |
| 729 | BUN *restrict cnt_bins; |
| 730 | bte j; |
| 731 | int i; |
| 732 | |
| 733 | if (!BATcheckimprints(b)) { |
| 734 | fprintf(stderr, "no imprint\n" ); |
| 735 | return; |
| 736 | } |
| 737 | imprints = b->timprints; |
| 738 | d = (cchdc_t *) imprints->dict; |
| 739 | min_bins = imprints->stats; |
| 740 | max_bins = min_bins + 64; |
| 741 | cnt_bins = max_bins + 64; |
| 742 | |
| 743 | fprintf(stderr, |
| 744 | "bits = %d, impcnt = " BUNFMT ", dictcnt = " BUNFMT "\n" , |
| 745 | imprints->bits, imprints->impcnt, imprints->dictcnt); |
| 746 | fprintf(stderr,"MIN = " ); |
| 747 | for (i = 0; i < imprints->bits; i++) { |
| 748 | fprintf(stderr, "[ " BUNFMT " ] " , min_bins[i]); |
| 749 | } |
| 750 | fprintf(stderr,"\n" ); |
| 751 | fprintf(stderr,"MAX = " ); |
| 752 | for (i = 0; i < imprints->bits; i++) { |
| 753 | fprintf(stderr, "[ " BUNFMT " ] " , max_bins[i]); |
| 754 | } |
| 755 | fprintf(stderr,"\n" ); |
| 756 | fprintf(stderr,"COUNT = " ); |
| 757 | for (i = 0; i < imprints->bits; i++) { |
| 758 | fprintf(stderr, "[ " BUNFMT " ] " , cnt_bins[i]); |
| 759 | } |
| 760 | fprintf(stderr,"\n" ); |
| 761 | for (dcnt = 0, icnt = 0, pages = 1; dcnt < imprints->dictcnt; dcnt++) { |
| 762 | if (d[dcnt].repeat) { |
| 763 | BINSIZE(imprints->bits, IMPSPRNTMASK, " " ); |
| 764 | pages += d[dcnt].cnt; |
| 765 | fprintf(stderr, "[ " BUNFMT " ]r %s\n" , pages, s); |
| 766 | icnt++; |
| 767 | } else { |
| 768 | l = icnt + d[dcnt].cnt; |
| 769 | for (; icnt < l; icnt++) { |
| 770 | BINSIZE(imprints->bits, IMPSPRNTMASK, " " ); |
| 771 | fprintf(stderr, "[ " BUNFMT " ] %s\n" , |
| 772 | pages++, s); |
| 773 | } |
| 774 | } |
| 775 | } |
| 776 | } |
| 777 | #endif |
| 778 | |