1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * Implementation for the column imprints index.
11 * See paper:
12 * Column Imprints: A Secondary Index Structure,
13 * L.Sidirourgos and M.Kersten.
14 */
15
16#include "monetdb_config.h"
17#include "gdk.h"
18#include "gdk_private.h"
19#include "gdk_imprints.h"
20
21#define IMPRINTS_VERSION 2
22#define IMPRINTS_HEADER_SIZE 4 /* nr of size_t fields in header */
23
24#define BINSIZE(B, FUNC, T) do { \
25 switch (B) { \
26 case 8: FUNC(T,8); break; \
27 case 16: FUNC(T,16); break; \
28 case 32: FUNC(T,32); break; \
29 case 64: FUNC(T,64); break; \
30 default: assert(0); break; \
31 } \
32} while (0)
33
34
35#define GETBIN(Z,X,B) \
36do { \
37 int _i; \
38 Z = 0; \
39 for (_i = 1; _i < B; _i++) \
40 Z += ((X) >= bins[_i]); \
41} while (0)
42
43
44#define IMPS_CREATE(TYPE,B) \
45do { \
46 uint##B##_t mask, prvmask; \
47 uint##B##_t *restrict im = (uint##B##_t *) imps; \
48 const TYPE *restrict col = (TYPE *) Tloc(b, 0); \
49 const TYPE *restrict bins = (TYPE *) inbins; \
50 const BUN page = IMPS_PAGE / sizeof(TYPE); \
51 prvmask = 0; \
52 for (i = 0; i < b->batCount; ) { \
53 const BUN lim = MIN(i + page, b->batCount); \
54 /* new mask */ \
55 mask = 0; \
56 /* build mask for all BUNs in one PAGE */ \
57 for ( ; i < lim; i++) { \
58 register const TYPE val = col[i]; \
59 GETBIN(bin,val,B); \
60 mask = IMPSsetBit(B,mask,bin); \
61 if (!is_##TYPE##_nil(val)) { /* do not count nils */ \
62 if (!cnt_bins[bin]++) { \
63 min_bins[bin] = max_bins[bin] = i; \
64 } else { \
65 if (val < col[min_bins[bin]]) \
66 min_bins[bin] = i; \
67 if (val > col[max_bins[bin]]) \
68 max_bins[bin] = i; \
69 } \
70 } \
71 } \
72 /* same mask as previous and enough count to add */ \
73 if ((prvmask == mask) && (dcnt > 0) && \
74 (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
75 /* not a repeat header */ \
76 if (!dict[dcnt-1].repeat) { \
77 /* if compressed */ \
78 if (dict[dcnt-1].cnt > 1) { \
79 /* uncompress last */ \
80 dict[dcnt-1].cnt--; \
81 /* new header */ \
82 dict[dcnt].cnt = 1; \
83 dict[dcnt].flags = 0; \
84 dcnt++; \
85 } \
86 /* set repeat */ \
87 dict[dcnt-1].repeat = 1; \
88 } \
89 /* increase cnt */ \
90 dict[dcnt-1].cnt++; \
91 } else { /* new mask (or run out of header count) */ \
92 prvmask=mask; \
93 im[icnt] = mask; \
94 icnt++; \
95 if ((dcnt > 0) && !(dict[dcnt-1].repeat) && \
96 (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
97 dict[dcnt-1].cnt++; \
98 } else { \
99 dict[dcnt].cnt = 1; \
100 dict[dcnt].repeat = 0; \
101 dict[dcnt].flags = 0; \
102 dcnt++; \
103 } \
104 } \
105 } \
106} while (0)
107
108static void
109imprints_create(BAT *b, void *inbins, BUN *stats, bte bits,
110 void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
111{
112 BUN i;
113 BUN dcnt, icnt;
114 BUN *restrict min_bins = stats;
115 BUN *restrict max_bins = min_bins + 64;
116 BUN *restrict cnt_bins = max_bins + 64;
117 int bin = 0;
118 dcnt = icnt = 0;
119#ifndef NDEBUG
120 memset(min_bins, 0, 64 * SIZEOF_BUN);
121 memset(max_bins, 0, 64 * SIZEOF_BUN);
122#endif
123 memset(cnt_bins, 0, 64 * SIZEOF_BUN);
124
125 switch (ATOMbasetype(b->ttype)) {
126 case TYPE_bte:
127 BINSIZE(bits, IMPS_CREATE, bte);
128 break;
129 case TYPE_sht:
130 BINSIZE(bits, IMPS_CREATE, sht);
131 break;
132 case TYPE_int:
133 BINSIZE(bits, IMPS_CREATE, int);
134 break;
135 case TYPE_lng:
136 BINSIZE(bits, IMPS_CREATE, lng);
137 break;
138#ifdef HAVE_HGE
139 case TYPE_hge:
140 BINSIZE(bits, IMPS_CREATE, hge);
141 break;
142#endif
143 case TYPE_flt:
144 BINSIZE(bits, IMPS_CREATE, flt);
145 break;
146 case TYPE_dbl:
147 BINSIZE(bits, IMPS_CREATE, dbl);
148 break;
149 default:
150 /* should never reach here */
151 assert(0);
152 }
153
154 *dictcnt = dcnt;
155 *impcnt = icnt;
156}
157
158#ifdef NDEBUG
159#define CLRMEM() ((void) 0)
160#else
161#define CLRMEM() while (k < 64) h[k++] = 0
162#endif
163
164#define FILL_HISTOGRAM(TYPE) \
165do { \
166 BUN k; \
167 TYPE *restrict s = (TYPE *) Tloc(s4, 0); \
168 TYPE *restrict h = imprints->bins; \
169 if (cnt < 64-1) { \
170 TYPE max = GDK_##TYPE##_max; \
171 for (k = 0; k < cnt; k++) \
172 h[k] = s[k]; \
173 while (k < (BUN) imprints->bits) \
174 h[k++] = max; \
175 CLRMEM(); \
176 } else { \
177 double y, ystep = (double) cnt / (64 - 1); \
178 for (k = 0, y = 0; (BUN) y < cnt; y += ystep, k++) \
179 h[k] = s[(BUN) y]; \
180 if (k == 64 - 1) /* there is one left */ \
181 h[k] = s[cnt - 1]; \
182 } \
183} while (0)
184
185/* Check whether we have imprints on b (and return true if we do). It
186 * may be that the imprints were made persistent, but we hadn't seen
187 * that yet, so check the file system. This also returns true if b is
188 * a view and there are imprints on b's parent.
189 *
190 * Note that the b->timprints pointer can be NULL, meaning there are
191 * no imprints; (Imprints *) 1, meaning there are no imprints loaded,
192 * but they may exist on disk; or a valid pointer to loaded imprints.
193 * These values are maintained here, in the IMPSdestroy and IMPSfree
194 * functions, and in BBPdiskscan during initialization. */
195bool
196BATcheckimprints(BAT *b)
197{
198 bool ret;
199
200 if (VIEWtparent(b)) {
201 assert(b->timprints == NULL);
202 b = BBPdescriptor(VIEWtparent(b));
203 }
204
205 if (b->timprints == (Imprints *) 1) {
206 MT_lock_set(&b->batIdxLock);
207 if (b->timprints == (Imprints *) 1) {
208 Imprints *imprints;
209 const char *nme = BBP_physical(b->batCacheid);
210
211 assert(!GDKinmemory());
212 b->timprints = NULL;
213 if ((imprints = GDKzalloc(sizeof(Imprints))) != NULL &&
214 (imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, imprintsheap)) >= 0) {
215 int fd;
216
217 strconcat_len(imprints->imprints.filename,
218 sizeof(imprints->imprints.filename),
219 nme, ".timprints", NULL);
220 /* check whether a persisted imprints index
221 * can be found */
222 if ((fd = GDKfdlocate(imprints->imprints.farmid, nme, "rb", "timprints")) >= 0) {
223 size_t hdata[4];
224 struct stat st;
225 size_t pages;
226
227 pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE;
228 if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
229 hdata[0] & ((size_t) 1 << 16) &&
230 ((hdata[0] & 0xFF00) >> 8) == IMPRINTS_VERSION &&
231 hdata[3] == (size_t) BATcount(b) &&
232 fstat(fd, &st) == 0 &&
233 st.st_size >= (off_t) (imprints->imprints.size =
234 imprints->imprints.free =
235 64 * b->twidth +
236 64 * 2 * SIZEOF_OID +
237 64 * SIZEOF_BUN +
238 pages * ((bte) hdata[0] / 8) +
239 hdata[2] * sizeof(cchdc_t) +
240 sizeof(uint64_t) /* padding for alignment */
241 + 4 * SIZEOF_SIZE_T) &&
242 HEAPload(&imprints->imprints, nme, "timprints", false) == GDK_SUCCEED) {
243 /* usable */
244 imprints->bits = (bte) (hdata[0] & 0xFF);
245 imprints->impcnt = (BUN) hdata[1];
246 imprints->dictcnt = (BUN) hdata[2];
247 imprints->bins = imprints->imprints.base + 4 * SIZEOF_SIZE_T;
248 imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth);
249 imprints->imps = (void *) (imprints->stats + 64 * 3);
250 imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
251 close(fd);
252 imprints->imprints.parentid = b->batCacheid;
253 b->timprints = imprints;
254 ACCELDEBUG fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): reusing persisted imprints\n", ALGOBATPAR(b));
255 MT_lock_unset(&b->batIdxLock);
256
257 return true;
258 }
259 close(fd);
260 /* unlink unusable file */
261 GDKunlink(imprints->imprints.farmid, BATDIR, nme, "timprints");
262 }
263 }
264 GDKfree(imprints);
265 GDKclrerr(); /* we're not currently interested in errors */
266 }
267 MT_lock_unset(&b->batIdxLock);
268 }
269 ret = b->timprints != NULL;
270 ACCELDEBUG if (ret) fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): already has imprints\n", ALGOBATPAR(b));
271 return ret;
272}
273
274static void
275BATimpsync(void *arg)
276{
277 BAT *b = arg;
278 Imprints *imprints;
279 int fd;
280 lng t0 = 0;
281 const char *failed = " failed";
282
283 ACCELDEBUG t0 = GDKusec();
284
285 MT_lock_set(&b->batIdxLock);
286 if ((imprints = b->timprints) != NULL) {
287 Heap *hp = &imprints->imprints;
288 if (HEAPsave(hp, hp->filename, NULL) == GDK_SUCCEED) {
289 if (hp->storage == STORE_MEM) {
290 if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+", NULL)) >= 0) {
291 /* add version number */
292 ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
293 /* sync-on-disk checked bit */
294 ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
295 if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) {
296 failed = ""; /* not failed */
297 if (!(GDKdebug & NOSYNCMASK)) {
298#if defined(NATIVE_WIN32)
299 _commit(fd);
300#elif defined(HAVE_FDATASYNC)
301 fdatasync(fd);
302#elif defined(HAVE_FSYNC)
303 fsync(fd);
304#endif
305 }
306 hp->dirty = false;
307 } else {
308 failed = " write failed";
309 perror("write hash");
310 }
311 close(fd);
312 }
313 } else {
314 /* add version number */
315 ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
316 /* sync-on-disk checked bit */
317 ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
318 if (!(GDKdebug & NOSYNCMASK) &&
319 MT_msync(hp->base, SIZEOF_SIZE_T) < 0) {
320 failed = " sync failed";
321 ((size_t *) hp->base)[0] &= ~((size_t) IMPRINTS_VERSION << 8);
322 } else {
323 hp->dirty = false;
324 failed = ""; /* not failed */
325 }
326 }
327 ACCELDEBUG fprintf(stderr, "#BATimpsync(" ALGOBATFMT "): "
328 "imprints persisted "
329 "(" LLFMT " usec)%s\n", ALGOBATPAR(b),
330 GDKusec() - t0, failed);
331 }
332 }
333 MT_lock_unset(&b->batIdxLock);
334 BBPunfix(b->batCacheid);
335}
336
337gdk_return
338BATimprints(BAT *b)
339{
340 BAT *s1 = NULL, *s2 = NULL, *s3 = NULL, *s4 = NULL;
341 Imprints *imprints;
342 lng t0 = 0;
343
344 /* we only create imprints for types that look like types we know */
345 switch (ATOMbasetype(b->ttype)) {
346 case TYPE_bte:
347 case TYPE_sht:
348 case TYPE_int:
349 case TYPE_lng:
350#ifdef HAVE_HGE
351 case TYPE_hge:
352#endif
353 case TYPE_flt:
354 case TYPE_dbl:
355 break;
356 default: /* type not supported */
357 /* doesn't look enough like base type: do nothing */
358 GDKerror("BATimprints: unsupported type\n");
359 return GDK_FAIL;
360 }
361
362 BATcheck(b, "BATimprints", GDK_FAIL);
363
364 if (BATcheckimprints(b))
365 return GDK_SUCCEED;
366
367 if (VIEWtparent(b)) {
368 /* views always keep null pointer and need to obtain
369 * the latest imprint from the parent at query time */
370 s2 = b; /* remember for ACCELDEBUG print */
371 b = BBPdescriptor(VIEWtparent(b));
372 assert(b);
373 if (BATcheckimprints(b))
374 return GDK_SUCCEED;
375 }
376 MT_lock_set(&b->batIdxLock);
377 ACCELDEBUG t0 = GDKusec();
378 if (b->timprints == NULL) {
379 BUN cnt;
380 const char *nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid);
381 size_t pages;
382
383 MT_lock_unset(&b->batIdxLock);
384
385 ACCELDEBUG {
386 if (s2)
387 fprintf(stderr, "#BATimprints(b=" ALGOBATFMT
388 "): creating imprints on parent "
389 ALGOBATFMT "\n",
390 ALGOBATPAR(s2), ALGOBATPAR(b));
391 else
392 fprintf(stderr, "#BATimprints(b=" ALGOBATFMT
393 "): creating imprints\n",
394 ALGOBATPAR(b));
395 }
396 s2 = NULL;
397
398 imprints = GDKzalloc(sizeof(Imprints));
399 if (imprints == NULL) {
400 MT_lock_unset(&b->batIdxLock);
401 return GDK_FAIL;
402 }
403 strconcat_len(imprints->imprints.filename,
404 sizeof(imprints->imprints.filename),
405 nme, ".timprints", NULL);
406 pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE;
407 imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype,
408 imprintsheap);
409
410#define SMP_SIZE 2048
411 s1 = BATsample(b, SMP_SIZE);
412 if (s1 == NULL) {
413 MT_lock_unset(&b->batIdxLock);
414 GDKfree(imprints);
415 return GDK_FAIL;
416 }
417 s2 = BATunique(b, s1);
418 if (s2 == NULL) {
419 MT_lock_unset(&b->batIdxLock);
420 BBPunfix(s1->batCacheid);
421 GDKfree(imprints);
422 return GDK_FAIL;
423 }
424 s3 = BATproject(s2, b);
425 if (s3 == NULL) {
426 MT_lock_unset(&b->batIdxLock);
427 BBPunfix(s1->batCacheid);
428 BBPunfix(s2->batCacheid);
429 GDKfree(imprints);
430 return GDK_FAIL;
431 }
432 s3->tkey = true; /* we know is unique on tail now */
433 if (BATsort(&s4, NULL, NULL, s3, NULL, NULL, false, false, false) != GDK_SUCCEED) {
434 MT_lock_unset(&b->batIdxLock);
435 BBPunfix(s1->batCacheid);
436 BBPunfix(s2->batCacheid);
437 BBPunfix(s3->batCacheid);
438 GDKfree(imprints);
439 return GDK_FAIL;
440 }
441 /* s4 now is ordered and unique on tail */
442 assert(s4->tkey && s4->tsorted);
443 cnt = BATcount(s4);
444 imprints->bits = 64;
445 if (cnt <= 32)
446 imprints->bits = 32;
447 if (cnt <= 16)
448 imprints->bits = 16;
449 if (cnt <= 8)
450 imprints->bits = 8;
451
452 /* The heap we create here consists of four parts:
453 * bins, max 64 entries with bin boundaries, domain of b;
454 * stats, min/max/count for each bin, min/max are oid, and count BUN;
455 * imps, max one entry per "page", entry is "bits" wide;
456 * dict, max two entries per three "pages".
457 * In addition, we add some housekeeping entries at
458 * the start so that we can determine whether we can
459 * trust the imprints when encountered on startup (including
460 * a version number -- CURRENT VERSION is 2). */
461 MT_lock_set(&b->batIdxLock);
462 if (b->timprints != NULL ||
463 HEAPalloc(&imprints->imprints,
464 IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T + /* extra info */
465 64 * b->twidth + /* bins */
466 64 * 2 * SIZEOF_OID + /* {min,max}_bins */
467 64 * SIZEOF_BUN + /* cnt_bins */
468 pages * (imprints->bits / 8) + /* imps */
469 sizeof(uint64_t) + /* padding for alignment */
470 pages * sizeof(cchdc_t), /* dict */
471 1) != GDK_SUCCEED) {
472 MT_lock_unset(&b->batIdxLock);
473 GDKfree(imprints);
474 BBPunfix(s1->batCacheid);
475 BBPunfix(s2->batCacheid);
476 BBPunfix(s3->batCacheid);
477 BBPunfix(s4->batCacheid);
478 if (b->timprints != NULL)
479 return GDK_SUCCEED; /* we were beaten to it */
480 GDKerror("#BATimprints: memory allocation error");
481 return GDK_FAIL;
482 }
483 imprints->bins = imprints->imprints.base + IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T;
484 imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth);
485 imprints->imps = (void *) (imprints->stats + 64 * 3);
486 imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
487
488 switch (ATOMbasetype(b->ttype)) {
489 case TYPE_bte:
490 FILL_HISTOGRAM(bte);
491 break;
492 case TYPE_sht:
493 FILL_HISTOGRAM(sht);
494 break;
495 case TYPE_int:
496 FILL_HISTOGRAM(int);
497 break;
498 case TYPE_lng:
499 FILL_HISTOGRAM(lng);
500 break;
501#ifdef HAVE_HGE
502 case TYPE_hge:
503 FILL_HISTOGRAM(hge);
504 break;
505#endif
506 case TYPE_flt:
507 FILL_HISTOGRAM(flt);
508 break;
509 case TYPE_dbl:
510 FILL_HISTOGRAM(dbl);
511 break;
512 default:
513 /* should never reach here */
514 assert(0);
515 }
516
517 imprints_create(b,
518 imprints->bins,
519 imprints->stats,
520 imprints->bits,
521 imprints->imps,
522 &imprints->impcnt,
523 imprints->dict,
524 &imprints->dictcnt);
525 assert(imprints->impcnt <= pages);
526 assert(imprints->dictcnt <= pages);
527#ifndef NDEBUG
528 memset((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8), 0, (char *) imprints->dict - ((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8)));
529#endif
530 imprints->imprints.free = (size_t) ((char *) ((cchdc_t *) imprints->dict + imprints->dictcnt) - imprints->imprints.base);
531 /* add info to heap for when they become persistent */
532 ((size_t *) imprints->imprints.base)[0] = (size_t) (imprints->bits);
533 ((size_t *) imprints->imprints.base)[1] = (size_t) imprints->impcnt;
534 ((size_t *) imprints->imprints.base)[2] = (size_t) imprints->dictcnt;
535 ((size_t *) imprints->imprints.base)[3] = (size_t) BATcount(b);
536 imprints->imprints.parentid = b->batCacheid;
537 b->timprints = imprints;
538 if (BBP_status(b->batCacheid) & BBPEXISTING &&
539 !b->theap.dirty &&
540 !GDKinmemory()) {
541 MT_Id tid;
542 BBPfix(b->batCacheid);
543 char name[16];
544 snprintf(name, sizeof(name), "impssync%d", b->batCacheid);
545 if (MT_create_thread(&tid, BATimpsync, b,
546 MT_THR_DETACHED, name) < 0)
547 BBPunfix(b->batCacheid);
548 }
549 }
550
551 ACCELDEBUG fprintf(stderr, "#BATimprints(%s): imprints construction " LLFMT " usec\n", BATgetId(b), GDKusec() - t0);
552 MT_lock_unset(&b->batIdxLock);
553
554 /* BBPUnfix tries to get the imprints lock which might lead to
555 * a deadlock if those were unfixed earlier */
556 if (s1) {
557 BBPunfix(s1->batCacheid);
558 BBPunfix(s2->batCacheid);
559 BBPunfix(s3->batCacheid);
560 BBPunfix(s4->batCacheid);
561 }
562 return GDK_SUCCEED;
563}
564
565#define getbin(TYPE,B) \
566do { \
567 register const TYPE val = * (TYPE *) v; \
568 GETBIN(ret,val,B); \
569} while (0)
570
571int
572IMPSgetbin(int tpe, bte bits, const char *restrict inbins, const void *restrict v)
573{
574 int ret = -1;
575
576 switch (tpe) {
577 case TYPE_bte:
578 {
579 const bte *restrict bins = (bte *) inbins;
580 BINSIZE(bits, getbin, bte);
581 }
582 break;
583 case TYPE_sht:
584 {
585 const sht *restrict bins = (sht *) inbins;
586 BINSIZE(bits, getbin, sht);
587 }
588 break;
589 case TYPE_int:
590 {
591 const int *restrict bins = (int *) inbins;
592 BINSIZE(bits, getbin, int);
593 }
594 break;
595 case TYPE_lng:
596 {
597 const lng *restrict bins = (lng *) inbins;
598 BINSIZE(bits, getbin, lng);
599 }
600 break;
601#ifdef HAVE_HGE
602 case TYPE_hge:
603 {
604 const hge *restrict bins = (hge *) inbins;
605 BINSIZE(bits, getbin, hge);
606 }
607 break;
608#endif
609 case TYPE_flt:
610 {
611 const flt *restrict bins = (flt *) inbins;
612 BINSIZE(bits, getbin, flt);
613 }
614 break;
615 case TYPE_dbl:
616 {
617 const dbl *restrict bins = (dbl *) inbins;
618 BINSIZE(bits, getbin, dbl);
619 }
620 break;
621 default:
622 assert(0);
623 (void) inbins;
624 break;
625 }
626 return ret;
627}
628
629lng
630IMPSimprintsize(BAT *b)
631{
632 lng sz = 0;
633 if (b->timprints && b->timprints != (Imprints *) 1) {
634 sz = b->timprints->impcnt * b->timprints->bits / 8;
635 sz += b->timprints->dictcnt * sizeof(cchdc_t);
636 }
637 return sz;
638}
639
640static void
641IMPSremove(BAT *b)
642{
643 Imprints *imprints;
644
645 assert(b->timprints != NULL);
646 assert(!VIEWtparent(b));
647
648 if ((imprints = b->timprints) != NULL) {
649 b->timprints = NULL;
650
651 if ((GDKdebug & ALGOMASK) &&
652 * (size_t *) imprints->imprints.base & (1 << 16))
653 fprintf(stderr, "#IMPSremove: removing persisted imprints\n");
654 if (HEAPdelete(&imprints->imprints, BBP_physical(b->batCacheid),
655 "timprints") != GDK_SUCCEED)
656 IODEBUG fprintf(stderr, "#IMPSremove(%s): imprints heap\n", BATgetId(b));
657
658 GDKfree(imprints);
659 }
660}
661
662void
663IMPSdestroy(BAT *b)
664{
665 if (b && b->timprints) {
666 MT_lock_set(&b->batIdxLock);
667 if (b->timprints == (Imprints *) 1) {
668 b->timprints = NULL;
669 GDKunlink(BBPselectfarm(b->batRole, b->ttype, imprintsheap),
670 BATDIR,
671 BBP_physical(b->batCacheid),
672 "timprints");
673 } else if (b->timprints != NULL && !VIEWtparent(b))
674 IMPSremove(b);
675 MT_lock_unset(&b->batIdxLock);
676 }
677}
678
679/* free the memory associated with the imprints, do not remove the
680 * heap files; indicate that imprints are available on disk by setting
681 * the imprints pointer to 1 */
682void
683IMPSfree(BAT *b)
684{
685 Imprints *imprints;
686
687 if (b && b->timprints) {
688 assert(b->batCacheid > 0);
689 MT_lock_set(&b->batIdxLock);
690 imprints = b->timprints;
691 if (imprints != NULL && imprints != (Imprints *) 1) {
692 if (GDKinmemory()) {
693 b->timprints = NULL;
694 if (!VIEWtparent(b)) {
695 HEAPfree(&imprints->imprints, true);
696 GDKfree(imprints);
697 }
698 } else {
699 b->timprints = (Imprints *) 1;
700 if (!VIEWtparent(b)) {
701 HEAPfree(&imprints->imprints, false);
702 GDKfree(imprints);
703 }
704 }
705 }
706 MT_lock_unset(&b->batIdxLock);
707 }
708}
709
710#ifndef NDEBUG
711/* never called, useful for debugging */
712
713#define IMPSPRNTMASK(T, B) \
714 do { \
715 uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
716 for (j = 0; j < imprints->bits; j++) \
717 s[j] = IMPSisSet(B, im[icnt], j) ? 'x' : '.'; \
718 s[j] = '\0'; \
719 } while (0)
720
721void
722IMPSprint(BAT *b)
723{
724 Imprints *imprints;
725 cchdc_t *restrict d;
726 char s[65]; /* max number of bits + 1 */
727 BUN icnt, dcnt, l, pages;
728 BUN *restrict min_bins, *restrict max_bins;
729 BUN *restrict cnt_bins;
730 bte j;
731 int i;
732
733 if (!BATcheckimprints(b)) {
734 fprintf(stderr, "no imprint\n");
735 return;
736 }
737 imprints = b->timprints;
738 d = (cchdc_t *) imprints->dict;
739 min_bins = imprints->stats;
740 max_bins = min_bins + 64;
741 cnt_bins = max_bins + 64;
742
743 fprintf(stderr,
744 "bits = %d, impcnt = " BUNFMT ", dictcnt = " BUNFMT "\n",
745 imprints->bits, imprints->impcnt, imprints->dictcnt);
746 fprintf(stderr,"MIN = ");
747 for (i = 0; i < imprints->bits; i++) {
748 fprintf(stderr, "[ " BUNFMT " ] ", min_bins[i]);
749 }
750 fprintf(stderr,"\n");
751 fprintf(stderr,"MAX = ");
752 for (i = 0; i < imprints->bits; i++) {
753 fprintf(stderr, "[ " BUNFMT " ] ", max_bins[i]);
754 }
755 fprintf(stderr,"\n");
756 fprintf(stderr,"COUNT = ");
757 for (i = 0; i < imprints->bits; i++) {
758 fprintf(stderr, "[ " BUNFMT " ] ", cnt_bins[i]);
759 }
760 fprintf(stderr,"\n");
761 for (dcnt = 0, icnt = 0, pages = 1; dcnt < imprints->dictcnt; dcnt++) {
762 if (d[dcnt].repeat) {
763 BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
764 pages += d[dcnt].cnt;
765 fprintf(stderr, "[ " BUNFMT " ]r %s\n", pages, s);
766 icnt++;
767 } else {
768 l = icnt + d[dcnt].cnt;
769 for (; icnt < l; icnt++) {
770 BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
771 fprintf(stderr, "[ " BUNFMT " ] %s\n",
772 pages++, s);
773 }
774 }
775 }
776}
777#endif
778