1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * Implementation for the column imprints index. |
11 | * See paper: |
12 | * Column Imprints: A Secondary Index Structure, |
13 | * L.Sidirourgos and M.Kersten. |
14 | */ |
15 | |
16 | #include "monetdb_config.h" |
17 | #include "gdk.h" |
18 | #include "gdk_private.h" |
19 | #include "gdk_imprints.h" |
20 | |
21 | #define IMPRINTS_VERSION 2 |
22 | #define 4 /* nr of size_t fields in header */ |
23 | |
24 | #define BINSIZE(B, FUNC, T) do { \ |
25 | switch (B) { \ |
26 | case 8: FUNC(T,8); break; \ |
27 | case 16: FUNC(T,16); break; \ |
28 | case 32: FUNC(T,32); break; \ |
29 | case 64: FUNC(T,64); break; \ |
30 | default: assert(0); break; \ |
31 | } \ |
32 | } while (0) |
33 | |
34 | |
35 | #define GETBIN(Z,X,B) \ |
36 | do { \ |
37 | int _i; \ |
38 | Z = 0; \ |
39 | for (_i = 1; _i < B; _i++) \ |
40 | Z += ((X) >= bins[_i]); \ |
41 | } while (0) |
42 | |
43 | |
44 | #define IMPS_CREATE(TYPE,B) \ |
45 | do { \ |
46 | uint##B##_t mask, prvmask; \ |
47 | uint##B##_t *restrict im = (uint##B##_t *) imps; \ |
48 | const TYPE *restrict col = (TYPE *) Tloc(b, 0); \ |
49 | const TYPE *restrict bins = (TYPE *) inbins; \ |
50 | const BUN page = IMPS_PAGE / sizeof(TYPE); \ |
51 | prvmask = 0; \ |
52 | for (i = 0; i < b->batCount; ) { \ |
53 | const BUN lim = MIN(i + page, b->batCount); \ |
54 | /* new mask */ \ |
55 | mask = 0; \ |
56 | /* build mask for all BUNs in one PAGE */ \ |
57 | for ( ; i < lim; i++) { \ |
58 | register const TYPE val = col[i]; \ |
59 | GETBIN(bin,val,B); \ |
60 | mask = IMPSsetBit(B,mask,bin); \ |
61 | if (!is_##TYPE##_nil(val)) { /* do not count nils */ \ |
62 | if (!cnt_bins[bin]++) { \ |
63 | min_bins[bin] = max_bins[bin] = i; \ |
64 | } else { \ |
65 | if (val < col[min_bins[bin]]) \ |
66 | min_bins[bin] = i; \ |
67 | if (val > col[max_bins[bin]]) \ |
68 | max_bins[bin] = i; \ |
69 | } \ |
70 | } \ |
71 | } \ |
72 | /* same mask as previous and enough count to add */ \ |
73 | if ((prvmask == mask) && (dcnt > 0) && \ |
74 | (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \ |
75 | /* not a repeat header */ \ |
76 | if (!dict[dcnt-1].repeat) { \ |
77 | /* if compressed */ \ |
78 | if (dict[dcnt-1].cnt > 1) { \ |
79 | /* uncompress last */ \ |
80 | dict[dcnt-1].cnt--; \ |
81 | /* new header */ \ |
82 | dict[dcnt].cnt = 1; \ |
83 | dict[dcnt].flags = 0; \ |
84 | dcnt++; \ |
85 | } \ |
86 | /* set repeat */ \ |
87 | dict[dcnt-1].repeat = 1; \ |
88 | } \ |
89 | /* increase cnt */ \ |
90 | dict[dcnt-1].cnt++; \ |
91 | } else { /* new mask (or run out of header count) */ \ |
92 | prvmask=mask; \ |
93 | im[icnt] = mask; \ |
94 | icnt++; \ |
95 | if ((dcnt > 0) && !(dict[dcnt-1].repeat) && \ |
96 | (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \ |
97 | dict[dcnt-1].cnt++; \ |
98 | } else { \ |
99 | dict[dcnt].cnt = 1; \ |
100 | dict[dcnt].repeat = 0; \ |
101 | dict[dcnt].flags = 0; \ |
102 | dcnt++; \ |
103 | } \ |
104 | } \ |
105 | } \ |
106 | } while (0) |
107 | |
108 | static void |
109 | imprints_create(BAT *b, void *inbins, BUN *stats, bte bits, |
110 | void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt) |
111 | { |
112 | BUN i; |
113 | BUN dcnt, icnt; |
114 | BUN *restrict min_bins = stats; |
115 | BUN *restrict max_bins = min_bins + 64; |
116 | BUN *restrict cnt_bins = max_bins + 64; |
117 | int bin = 0; |
118 | dcnt = icnt = 0; |
119 | #ifndef NDEBUG |
120 | memset(min_bins, 0, 64 * SIZEOF_BUN); |
121 | memset(max_bins, 0, 64 * SIZEOF_BUN); |
122 | #endif |
123 | memset(cnt_bins, 0, 64 * SIZEOF_BUN); |
124 | |
125 | switch (ATOMbasetype(b->ttype)) { |
126 | case TYPE_bte: |
127 | BINSIZE(bits, IMPS_CREATE, bte); |
128 | break; |
129 | case TYPE_sht: |
130 | BINSIZE(bits, IMPS_CREATE, sht); |
131 | break; |
132 | case TYPE_int: |
133 | BINSIZE(bits, IMPS_CREATE, int); |
134 | break; |
135 | case TYPE_lng: |
136 | BINSIZE(bits, IMPS_CREATE, lng); |
137 | break; |
138 | #ifdef HAVE_HGE |
139 | case TYPE_hge: |
140 | BINSIZE(bits, IMPS_CREATE, hge); |
141 | break; |
142 | #endif |
143 | case TYPE_flt: |
144 | BINSIZE(bits, IMPS_CREATE, flt); |
145 | break; |
146 | case TYPE_dbl: |
147 | BINSIZE(bits, IMPS_CREATE, dbl); |
148 | break; |
149 | default: |
150 | /* should never reach here */ |
151 | assert(0); |
152 | } |
153 | |
154 | *dictcnt = dcnt; |
155 | *impcnt = icnt; |
156 | } |
157 | |
158 | #ifdef NDEBUG |
159 | #define CLRMEM() ((void) 0) |
160 | #else |
161 | #define CLRMEM() while (k < 64) h[k++] = 0 |
162 | #endif |
163 | |
164 | #define FILL_HISTOGRAM(TYPE) \ |
165 | do { \ |
166 | BUN k; \ |
167 | TYPE *restrict s = (TYPE *) Tloc(s4, 0); \ |
168 | TYPE *restrict h = imprints->bins; \ |
169 | if (cnt < 64-1) { \ |
170 | TYPE max = GDK_##TYPE##_max; \ |
171 | for (k = 0; k < cnt; k++) \ |
172 | h[k] = s[k]; \ |
173 | while (k < (BUN) imprints->bits) \ |
174 | h[k++] = max; \ |
175 | CLRMEM(); \ |
176 | } else { \ |
177 | double y, ystep = (double) cnt / (64 - 1); \ |
178 | for (k = 0, y = 0; (BUN) y < cnt; y += ystep, k++) \ |
179 | h[k] = s[(BUN) y]; \ |
180 | if (k == 64 - 1) /* there is one left */ \ |
181 | h[k] = s[cnt - 1]; \ |
182 | } \ |
183 | } while (0) |
184 | |
185 | /* Check whether we have imprints on b (and return true if we do). It |
186 | * may be that the imprints were made persistent, but we hadn't seen |
187 | * that yet, so check the file system. This also returns true if b is |
188 | * a view and there are imprints on b's parent. |
189 | * |
190 | * Note that the b->timprints pointer can be NULL, meaning there are |
191 | * no imprints; (Imprints *) 1, meaning there are no imprints loaded, |
192 | * but they may exist on disk; or a valid pointer to loaded imprints. |
193 | * These values are maintained here, in the IMPSdestroy and IMPSfree |
194 | * functions, and in BBPdiskscan during initialization. */ |
195 | bool |
196 | BATcheckimprints(BAT *b) |
197 | { |
198 | bool ret; |
199 | |
200 | if (VIEWtparent(b)) { |
201 | assert(b->timprints == NULL); |
202 | b = BBPdescriptor(VIEWtparent(b)); |
203 | } |
204 | |
205 | if (b->timprints == (Imprints *) 1) { |
206 | MT_lock_set(&b->batIdxLock); |
207 | if (b->timprints == (Imprints *) 1) { |
208 | Imprints *imprints; |
209 | const char *nme = BBP_physical(b->batCacheid); |
210 | |
211 | assert(!GDKinmemory()); |
212 | b->timprints = NULL; |
213 | if ((imprints = GDKzalloc(sizeof(Imprints))) != NULL && |
214 | (imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, imprintsheap)) >= 0) { |
215 | int fd; |
216 | |
217 | strconcat_len(imprints->imprints.filename, |
218 | sizeof(imprints->imprints.filename), |
219 | nme, ".timprints" , NULL); |
220 | /* check whether a persisted imprints index |
221 | * can be found */ |
222 | if ((fd = GDKfdlocate(imprints->imprints.farmid, nme, "rb" , "timprints" )) >= 0) { |
223 | size_t hdata[4]; |
224 | struct stat st; |
225 | size_t pages; |
226 | |
227 | pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE; |
228 | if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) && |
229 | hdata[0] & ((size_t) 1 << 16) && |
230 | ((hdata[0] & 0xFF00) >> 8) == IMPRINTS_VERSION && |
231 | hdata[3] == (size_t) BATcount(b) && |
232 | fstat(fd, &st) == 0 && |
233 | st.st_size >= (off_t) (imprints->imprints.size = |
234 | imprints->imprints.free = |
235 | 64 * b->twidth + |
236 | 64 * 2 * SIZEOF_OID + |
237 | 64 * SIZEOF_BUN + |
238 | pages * ((bte) hdata[0] / 8) + |
239 | hdata[2] * sizeof(cchdc_t) + |
240 | sizeof(uint64_t) /* padding for alignment */ |
241 | + 4 * SIZEOF_SIZE_T) && |
242 | HEAPload(&imprints->imprints, nme, "timprints" , false) == GDK_SUCCEED) { |
243 | /* usable */ |
244 | imprints->bits = (bte) (hdata[0] & 0xFF); |
245 | imprints->impcnt = (BUN) hdata[1]; |
246 | imprints->dictcnt = (BUN) hdata[2]; |
247 | imprints->bins = imprints->imprints.base + 4 * SIZEOF_SIZE_T; |
248 | imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth); |
249 | imprints->imps = (void *) (imprints->stats + 64 * 3); |
250 | imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1)); |
251 | close(fd); |
252 | imprints->imprints.parentid = b->batCacheid; |
253 | b->timprints = imprints; |
254 | ACCELDEBUG fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): reusing persisted imprints\n" , ALGOBATPAR(b)); |
255 | MT_lock_unset(&b->batIdxLock); |
256 | |
257 | return true; |
258 | } |
259 | close(fd); |
260 | /* unlink unusable file */ |
261 | GDKunlink(imprints->imprints.farmid, BATDIR, nme, "timprints" ); |
262 | } |
263 | } |
264 | GDKfree(imprints); |
265 | GDKclrerr(); /* we're not currently interested in errors */ |
266 | } |
267 | MT_lock_unset(&b->batIdxLock); |
268 | } |
269 | ret = b->timprints != NULL; |
270 | ACCELDEBUG if (ret) fprintf(stderr, "#BATcheckimprints(" ALGOBATFMT "): already has imprints\n" , ALGOBATPAR(b)); |
271 | return ret; |
272 | } |
273 | |
274 | static void |
275 | BATimpsync(void *arg) |
276 | { |
277 | BAT *b = arg; |
278 | Imprints *imprints; |
279 | int fd; |
280 | lng t0 = 0; |
281 | const char *failed = " failed" ; |
282 | |
283 | ACCELDEBUG t0 = GDKusec(); |
284 | |
285 | MT_lock_set(&b->batIdxLock); |
286 | if ((imprints = b->timprints) != NULL) { |
287 | Heap *hp = &imprints->imprints; |
288 | if (HEAPsave(hp, hp->filename, NULL) == GDK_SUCCEED) { |
289 | if (hp->storage == STORE_MEM) { |
290 | if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+" , NULL)) >= 0) { |
291 | /* add version number */ |
292 | ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8; |
293 | /* sync-on-disk checked bit */ |
294 | ((size_t *) hp->base)[0] |= (size_t) 1 << 16; |
295 | if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) { |
296 | failed = "" ; /* not failed */ |
297 | if (!(GDKdebug & NOSYNCMASK)) { |
298 | #if defined(NATIVE_WIN32) |
299 | _commit(fd); |
300 | #elif defined(HAVE_FDATASYNC) |
301 | fdatasync(fd); |
302 | #elif defined(HAVE_FSYNC) |
303 | fsync(fd); |
304 | #endif |
305 | } |
306 | hp->dirty = false; |
307 | } else { |
308 | failed = " write failed" ; |
309 | perror("write hash" ); |
310 | } |
311 | close(fd); |
312 | } |
313 | } else { |
314 | /* add version number */ |
315 | ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8; |
316 | /* sync-on-disk checked bit */ |
317 | ((size_t *) hp->base)[0] |= (size_t) 1 << 16; |
318 | if (!(GDKdebug & NOSYNCMASK) && |
319 | MT_msync(hp->base, SIZEOF_SIZE_T) < 0) { |
320 | failed = " sync failed" ; |
321 | ((size_t *) hp->base)[0] &= ~((size_t) IMPRINTS_VERSION << 8); |
322 | } else { |
323 | hp->dirty = false; |
324 | failed = "" ; /* not failed */ |
325 | } |
326 | } |
327 | ACCELDEBUG fprintf(stderr, "#BATimpsync(" ALGOBATFMT "): " |
328 | "imprints persisted " |
329 | "(" LLFMT " usec)%s\n" , ALGOBATPAR(b), |
330 | GDKusec() - t0, failed); |
331 | } |
332 | } |
333 | MT_lock_unset(&b->batIdxLock); |
334 | BBPunfix(b->batCacheid); |
335 | } |
336 | |
337 | gdk_return |
338 | BATimprints(BAT *b) |
339 | { |
340 | BAT *s1 = NULL, *s2 = NULL, *s3 = NULL, *s4 = NULL; |
341 | Imprints *imprints; |
342 | lng t0 = 0; |
343 | |
344 | /* we only create imprints for types that look like types we know */ |
345 | switch (ATOMbasetype(b->ttype)) { |
346 | case TYPE_bte: |
347 | case TYPE_sht: |
348 | case TYPE_int: |
349 | case TYPE_lng: |
350 | #ifdef HAVE_HGE |
351 | case TYPE_hge: |
352 | #endif |
353 | case TYPE_flt: |
354 | case TYPE_dbl: |
355 | break; |
356 | default: /* type not supported */ |
357 | /* doesn't look enough like base type: do nothing */ |
358 | GDKerror("BATimprints: unsupported type\n" ); |
359 | return GDK_FAIL; |
360 | } |
361 | |
362 | BATcheck(b, "BATimprints" , GDK_FAIL); |
363 | |
364 | if (BATcheckimprints(b)) |
365 | return GDK_SUCCEED; |
366 | |
367 | if (VIEWtparent(b)) { |
368 | /* views always keep null pointer and need to obtain |
369 | * the latest imprint from the parent at query time */ |
370 | s2 = b; /* remember for ACCELDEBUG print */ |
371 | b = BBPdescriptor(VIEWtparent(b)); |
372 | assert(b); |
373 | if (BATcheckimprints(b)) |
374 | return GDK_SUCCEED; |
375 | } |
376 | MT_lock_set(&b->batIdxLock); |
377 | ACCELDEBUG t0 = GDKusec(); |
378 | if (b->timprints == NULL) { |
379 | BUN cnt; |
380 | const char *nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid); |
381 | size_t pages; |
382 | |
383 | MT_lock_unset(&b->batIdxLock); |
384 | |
385 | ACCELDEBUG { |
386 | if (s2) |
387 | fprintf(stderr, "#BATimprints(b=" ALGOBATFMT |
388 | "): creating imprints on parent " |
389 | ALGOBATFMT "\n" , |
390 | ALGOBATPAR(s2), ALGOBATPAR(b)); |
391 | else |
392 | fprintf(stderr, "#BATimprints(b=" ALGOBATFMT |
393 | "): creating imprints\n" , |
394 | ALGOBATPAR(b)); |
395 | } |
396 | s2 = NULL; |
397 | |
398 | imprints = GDKzalloc(sizeof(Imprints)); |
399 | if (imprints == NULL) { |
400 | MT_lock_unset(&b->batIdxLock); |
401 | return GDK_FAIL; |
402 | } |
403 | strconcat_len(imprints->imprints.filename, |
404 | sizeof(imprints->imprints.filename), |
405 | nme, ".timprints" , NULL); |
406 | pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE; |
407 | imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, |
408 | imprintsheap); |
409 | |
410 | #define SMP_SIZE 2048 |
411 | s1 = BATsample(b, SMP_SIZE); |
412 | if (s1 == NULL) { |
413 | MT_lock_unset(&b->batIdxLock); |
414 | GDKfree(imprints); |
415 | return GDK_FAIL; |
416 | } |
417 | s2 = BATunique(b, s1); |
418 | if (s2 == NULL) { |
419 | MT_lock_unset(&b->batIdxLock); |
420 | BBPunfix(s1->batCacheid); |
421 | GDKfree(imprints); |
422 | return GDK_FAIL; |
423 | } |
424 | s3 = BATproject(s2, b); |
425 | if (s3 == NULL) { |
426 | MT_lock_unset(&b->batIdxLock); |
427 | BBPunfix(s1->batCacheid); |
428 | BBPunfix(s2->batCacheid); |
429 | GDKfree(imprints); |
430 | return GDK_FAIL; |
431 | } |
432 | s3->tkey = true; /* we know is unique on tail now */ |
433 | if (BATsort(&s4, NULL, NULL, s3, NULL, NULL, false, false, false) != GDK_SUCCEED) { |
434 | MT_lock_unset(&b->batIdxLock); |
435 | BBPunfix(s1->batCacheid); |
436 | BBPunfix(s2->batCacheid); |
437 | BBPunfix(s3->batCacheid); |
438 | GDKfree(imprints); |
439 | return GDK_FAIL; |
440 | } |
441 | /* s4 now is ordered and unique on tail */ |
442 | assert(s4->tkey && s4->tsorted); |
443 | cnt = BATcount(s4); |
444 | imprints->bits = 64; |
445 | if (cnt <= 32) |
446 | imprints->bits = 32; |
447 | if (cnt <= 16) |
448 | imprints->bits = 16; |
449 | if (cnt <= 8) |
450 | imprints->bits = 8; |
451 | |
452 | /* The heap we create here consists of four parts: |
453 | * bins, max 64 entries with bin boundaries, domain of b; |
454 | * stats, min/max/count for each bin, min/max are oid, and count BUN; |
455 | * imps, max one entry per "page", entry is "bits" wide; |
456 | * dict, max two entries per three "pages". |
457 | * In addition, we add some housekeeping entries at |
458 | * the start so that we can determine whether we can |
459 | * trust the imprints when encountered on startup (including |
460 | * a version number -- CURRENT VERSION is 2). */ |
461 | MT_lock_set(&b->batIdxLock); |
462 | if (b->timprints != NULL || |
463 | HEAPalloc(&imprints->imprints, |
464 | IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T + /* extra info */ |
465 | 64 * b->twidth + /* bins */ |
466 | 64 * 2 * SIZEOF_OID + /* {min,max}_bins */ |
467 | 64 * SIZEOF_BUN + /* cnt_bins */ |
468 | pages * (imprints->bits / 8) + /* imps */ |
469 | sizeof(uint64_t) + /* padding for alignment */ |
470 | pages * sizeof(cchdc_t), /* dict */ |
471 | 1) != GDK_SUCCEED) { |
472 | MT_lock_unset(&b->batIdxLock); |
473 | GDKfree(imprints); |
474 | BBPunfix(s1->batCacheid); |
475 | BBPunfix(s2->batCacheid); |
476 | BBPunfix(s3->batCacheid); |
477 | BBPunfix(s4->batCacheid); |
478 | if (b->timprints != NULL) |
479 | return GDK_SUCCEED; /* we were beaten to it */ |
480 | GDKerror("#BATimprints: memory allocation error" ); |
481 | return GDK_FAIL; |
482 | } |
483 | imprints->bins = imprints->imprints.base + IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T; |
484 | imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth); |
485 | imprints->imps = (void *) (imprints->stats + 64 * 3); |
486 | imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1)); |
487 | |
488 | switch (ATOMbasetype(b->ttype)) { |
489 | case TYPE_bte: |
490 | FILL_HISTOGRAM(bte); |
491 | break; |
492 | case TYPE_sht: |
493 | FILL_HISTOGRAM(sht); |
494 | break; |
495 | case TYPE_int: |
496 | FILL_HISTOGRAM(int); |
497 | break; |
498 | case TYPE_lng: |
499 | FILL_HISTOGRAM(lng); |
500 | break; |
501 | #ifdef HAVE_HGE |
502 | case TYPE_hge: |
503 | FILL_HISTOGRAM(hge); |
504 | break; |
505 | #endif |
506 | case TYPE_flt: |
507 | FILL_HISTOGRAM(flt); |
508 | break; |
509 | case TYPE_dbl: |
510 | FILL_HISTOGRAM(dbl); |
511 | break; |
512 | default: |
513 | /* should never reach here */ |
514 | assert(0); |
515 | } |
516 | |
517 | imprints_create(b, |
518 | imprints->bins, |
519 | imprints->stats, |
520 | imprints->bits, |
521 | imprints->imps, |
522 | &imprints->impcnt, |
523 | imprints->dict, |
524 | &imprints->dictcnt); |
525 | assert(imprints->impcnt <= pages); |
526 | assert(imprints->dictcnt <= pages); |
527 | #ifndef NDEBUG |
528 | memset((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8), 0, (char *) imprints->dict - ((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8))); |
529 | #endif |
530 | imprints->imprints.free = (size_t) ((char *) ((cchdc_t *) imprints->dict + imprints->dictcnt) - imprints->imprints.base); |
531 | /* add info to heap for when they become persistent */ |
532 | ((size_t *) imprints->imprints.base)[0] = (size_t) (imprints->bits); |
533 | ((size_t *) imprints->imprints.base)[1] = (size_t) imprints->impcnt; |
534 | ((size_t *) imprints->imprints.base)[2] = (size_t) imprints->dictcnt; |
535 | ((size_t *) imprints->imprints.base)[3] = (size_t) BATcount(b); |
536 | imprints->imprints.parentid = b->batCacheid; |
537 | b->timprints = imprints; |
538 | if (BBP_status(b->batCacheid) & BBPEXISTING && |
539 | !b->theap.dirty && |
540 | !GDKinmemory()) { |
541 | MT_Id tid; |
542 | BBPfix(b->batCacheid); |
543 | char name[16]; |
544 | snprintf(name, sizeof(name), "impssync%d" , b->batCacheid); |
545 | if (MT_create_thread(&tid, BATimpsync, b, |
546 | MT_THR_DETACHED, name) < 0) |
547 | BBPunfix(b->batCacheid); |
548 | } |
549 | } |
550 | |
551 | ACCELDEBUG fprintf(stderr, "#BATimprints(%s): imprints construction " LLFMT " usec\n" , BATgetId(b), GDKusec() - t0); |
552 | MT_lock_unset(&b->batIdxLock); |
553 | |
554 | /* BBPUnfix tries to get the imprints lock which might lead to |
555 | * a deadlock if those were unfixed earlier */ |
556 | if (s1) { |
557 | BBPunfix(s1->batCacheid); |
558 | BBPunfix(s2->batCacheid); |
559 | BBPunfix(s3->batCacheid); |
560 | BBPunfix(s4->batCacheid); |
561 | } |
562 | return GDK_SUCCEED; |
563 | } |
564 | |
565 | #define getbin(TYPE,B) \ |
566 | do { \ |
567 | register const TYPE val = * (TYPE *) v; \ |
568 | GETBIN(ret,val,B); \ |
569 | } while (0) |
570 | |
571 | int |
572 | IMPSgetbin(int tpe, bte bits, const char *restrict inbins, const void *restrict v) |
573 | { |
574 | int ret = -1; |
575 | |
576 | switch (tpe) { |
577 | case TYPE_bte: |
578 | { |
579 | const bte *restrict bins = (bte *) inbins; |
580 | BINSIZE(bits, getbin, bte); |
581 | } |
582 | break; |
583 | case TYPE_sht: |
584 | { |
585 | const sht *restrict bins = (sht *) inbins; |
586 | BINSIZE(bits, getbin, sht); |
587 | } |
588 | break; |
589 | case TYPE_int: |
590 | { |
591 | const int *restrict bins = (int *) inbins; |
592 | BINSIZE(bits, getbin, int); |
593 | } |
594 | break; |
595 | case TYPE_lng: |
596 | { |
597 | const lng *restrict bins = (lng *) inbins; |
598 | BINSIZE(bits, getbin, lng); |
599 | } |
600 | break; |
601 | #ifdef HAVE_HGE |
602 | case TYPE_hge: |
603 | { |
604 | const hge *restrict bins = (hge *) inbins; |
605 | BINSIZE(bits, getbin, hge); |
606 | } |
607 | break; |
608 | #endif |
609 | case TYPE_flt: |
610 | { |
611 | const flt *restrict bins = (flt *) inbins; |
612 | BINSIZE(bits, getbin, flt); |
613 | } |
614 | break; |
615 | case TYPE_dbl: |
616 | { |
617 | const dbl *restrict bins = (dbl *) inbins; |
618 | BINSIZE(bits, getbin, dbl); |
619 | } |
620 | break; |
621 | default: |
622 | assert(0); |
623 | (void) inbins; |
624 | break; |
625 | } |
626 | return ret; |
627 | } |
628 | |
629 | lng |
630 | IMPSimprintsize(BAT *b) |
631 | { |
632 | lng sz = 0; |
633 | if (b->timprints && b->timprints != (Imprints *) 1) { |
634 | sz = b->timprints->impcnt * b->timprints->bits / 8; |
635 | sz += b->timprints->dictcnt * sizeof(cchdc_t); |
636 | } |
637 | return sz; |
638 | } |
639 | |
640 | static void |
641 | IMPSremove(BAT *b) |
642 | { |
643 | Imprints *imprints; |
644 | |
645 | assert(b->timprints != NULL); |
646 | assert(!VIEWtparent(b)); |
647 | |
648 | if ((imprints = b->timprints) != NULL) { |
649 | b->timprints = NULL; |
650 | |
651 | if ((GDKdebug & ALGOMASK) && |
652 | * (size_t *) imprints->imprints.base & (1 << 16)) |
653 | fprintf(stderr, "#IMPSremove: removing persisted imprints\n" ); |
654 | if (HEAPdelete(&imprints->imprints, BBP_physical(b->batCacheid), |
655 | "timprints" ) != GDK_SUCCEED) |
656 | IODEBUG fprintf(stderr, "#IMPSremove(%s): imprints heap\n" , BATgetId(b)); |
657 | |
658 | GDKfree(imprints); |
659 | } |
660 | } |
661 | |
662 | void |
663 | IMPSdestroy(BAT *b) |
664 | { |
665 | if (b && b->timprints) { |
666 | MT_lock_set(&b->batIdxLock); |
667 | if (b->timprints == (Imprints *) 1) { |
668 | b->timprints = NULL; |
669 | GDKunlink(BBPselectfarm(b->batRole, b->ttype, imprintsheap), |
670 | BATDIR, |
671 | BBP_physical(b->batCacheid), |
672 | "timprints" ); |
673 | } else if (b->timprints != NULL && !VIEWtparent(b)) |
674 | IMPSremove(b); |
675 | MT_lock_unset(&b->batIdxLock); |
676 | } |
677 | } |
678 | |
679 | /* free the memory associated with the imprints, do not remove the |
680 | * heap files; indicate that imprints are available on disk by setting |
681 | * the imprints pointer to 1 */ |
682 | void |
683 | IMPSfree(BAT *b) |
684 | { |
685 | Imprints *imprints; |
686 | |
687 | if (b && b->timprints) { |
688 | assert(b->batCacheid > 0); |
689 | MT_lock_set(&b->batIdxLock); |
690 | imprints = b->timprints; |
691 | if (imprints != NULL && imprints != (Imprints *) 1) { |
692 | if (GDKinmemory()) { |
693 | b->timprints = NULL; |
694 | if (!VIEWtparent(b)) { |
695 | HEAPfree(&imprints->imprints, true); |
696 | GDKfree(imprints); |
697 | } |
698 | } else { |
699 | b->timprints = (Imprints *) 1; |
700 | if (!VIEWtparent(b)) { |
701 | HEAPfree(&imprints->imprints, false); |
702 | GDKfree(imprints); |
703 | } |
704 | } |
705 | } |
706 | MT_lock_unset(&b->batIdxLock); |
707 | } |
708 | } |
709 | |
710 | #ifndef NDEBUG |
711 | /* never called, useful for debugging */ |
712 | |
713 | #define IMPSPRNTMASK(T, B) \ |
714 | do { \ |
715 | uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \ |
716 | for (j = 0; j < imprints->bits; j++) \ |
717 | s[j] = IMPSisSet(B, im[icnt], j) ? 'x' : '.'; \ |
718 | s[j] = '\0'; \ |
719 | } while (0) |
720 | |
721 | void |
722 | IMPSprint(BAT *b) |
723 | { |
724 | Imprints *imprints; |
725 | cchdc_t *restrict d; |
726 | char s[65]; /* max number of bits + 1 */ |
727 | BUN icnt, dcnt, l, pages; |
728 | BUN *restrict min_bins, *restrict max_bins; |
729 | BUN *restrict cnt_bins; |
730 | bte j; |
731 | int i; |
732 | |
733 | if (!BATcheckimprints(b)) { |
734 | fprintf(stderr, "no imprint\n" ); |
735 | return; |
736 | } |
737 | imprints = b->timprints; |
738 | d = (cchdc_t *) imprints->dict; |
739 | min_bins = imprints->stats; |
740 | max_bins = min_bins + 64; |
741 | cnt_bins = max_bins + 64; |
742 | |
743 | fprintf(stderr, |
744 | "bits = %d, impcnt = " BUNFMT ", dictcnt = " BUNFMT "\n" , |
745 | imprints->bits, imprints->impcnt, imprints->dictcnt); |
746 | fprintf(stderr,"MIN = " ); |
747 | for (i = 0; i < imprints->bits; i++) { |
748 | fprintf(stderr, "[ " BUNFMT " ] " , min_bins[i]); |
749 | } |
750 | fprintf(stderr,"\n" ); |
751 | fprintf(stderr,"MAX = " ); |
752 | for (i = 0; i < imprints->bits; i++) { |
753 | fprintf(stderr, "[ " BUNFMT " ] " , max_bins[i]); |
754 | } |
755 | fprintf(stderr,"\n" ); |
756 | fprintf(stderr,"COUNT = " ); |
757 | for (i = 0; i < imprints->bits; i++) { |
758 | fprintf(stderr, "[ " BUNFMT " ] " , cnt_bins[i]); |
759 | } |
760 | fprintf(stderr,"\n" ); |
761 | for (dcnt = 0, icnt = 0, pages = 1; dcnt < imprints->dictcnt; dcnt++) { |
762 | if (d[dcnt].repeat) { |
763 | BINSIZE(imprints->bits, IMPSPRNTMASK, " " ); |
764 | pages += d[dcnt].cnt; |
765 | fprintf(stderr, "[ " BUNFMT " ]r %s\n" , pages, s); |
766 | icnt++; |
767 | } else { |
768 | l = icnt + d[dcnt].cnt; |
769 | for (; icnt < l; icnt++) { |
770 | BINSIZE(imprints->bits, IMPSPRNTMASK, " " ); |
771 | fprintf(stderr, "[ " BUNFMT " ] %s\n" , |
772 | pages++, s); |
773 | } |
774 | } |
775 | } |
776 | } |
777 | #endif |
778 | |