1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * (c) Peter Boncz, Stefan Manegold, Niels Nes |
11 | * |
12 | * new functionality for the low-resource-consumption. It will |
13 | * first one by one create a hash value out of the multiple attributes. |
14 | * This hash value is computed by xoring and rotating individual hash |
15 | * values together. We create a hash and rotate command to do this. |
16 | */ |
17 | #include "monetdb_config.h" |
18 | #include "mkey.h" |
19 | |
20 | #define MKEYHASH_bte(valp) ((lng) *(const bte*)(valp)) |
21 | #define MKEYHASH_sht(valp) ((lng) *(const sht*)(valp)) |
22 | #define MKEYHASH_int(valp) ((lng) *(const int*)(valp)) |
23 | #define MKEYHASH_lng(valp) ((lng) *(const lng*)(valp)) |
24 | #ifdef HAVE_HGE |
25 | #define MKEYHASH_hge(valp) (((const lng*)(valp))[0] ^ ((const lng*)(valp))[1]) |
26 | #endif |
27 | |
28 | static inline lng |
29 | GDK_ROTATE(lng x, int y, int z) |
30 | { |
31 | return (lng) (((ulng) x << y) | ((ulng) x >> z)); |
32 | } |
33 | |
34 | /* TODO: nil handling. however; we do not want to lose time in bulk_rotate_xor_hash with that */ |
35 | str |
36 | MKEYrotate(lng *res, const lng *val, const int *n) |
37 | { |
38 | *res = GDK_ROTATE(*val, *n, (sizeof(lng)*8) - *n); |
39 | return MAL_SUCCEED; |
40 | } |
41 | |
42 | str |
43 | MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
44 | { |
45 | lng *res; |
46 | ptr val; |
47 | int tpe = getArgType(mb,p,1); |
48 | |
49 | (void) cntxt; |
50 | res= getArgReference_lng(stk,p,0); |
51 | val= getArgReference(stk,p,1); |
52 | switch (ATOMstorage(tpe)) { |
53 | case TYPE_void: |
54 | case TYPE_bat: |
55 | case TYPE_ptr: |
56 | // illegal types, avoid falling into the default case. |
57 | assert(0); |
58 | case TYPE_bte: |
59 | *res = MKEYHASH_bte(val); |
60 | break; |
61 | case TYPE_sht: |
62 | *res = MKEYHASH_sht(val); |
63 | break; |
64 | case TYPE_int: |
65 | case TYPE_flt: |
66 | *res = MKEYHASH_int(val); |
67 | break; |
68 | case TYPE_lng: |
69 | case TYPE_dbl: |
70 | *res = MKEYHASH_lng(val); |
71 | break; |
72 | #ifdef HAVE_HGE |
73 | case TYPE_hge: |
74 | *res = MKEYHASH_hge(val); |
75 | break; |
76 | #endif |
77 | default: |
78 | if (ATOMextern(tpe)) |
79 | *res = ATOMhash(tpe, *(ptr*)val); |
80 | else |
81 | *res = ATOMhash(tpe, val); |
82 | break; |
83 | } |
84 | return MAL_SUCCEED; |
85 | } |
86 | |
87 | str |
88 | MKEYbathash(bat *res, const bat *bid) |
89 | { |
90 | BAT *b, *dst; |
91 | lng *restrict r; |
92 | BUN n; |
93 | |
94 | if ((b = BATdescriptor(*bid)) == NULL) |
95 | throw(SQL, "mkey.bathash" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
96 | |
97 | n = BATcount(b); |
98 | dst = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT); |
99 | if (dst == NULL) { |
100 | BBPunfix(b->batCacheid); |
101 | throw(SQL, "mkey.bathash" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
102 | } |
103 | BATsetcount(dst, n); |
104 | |
105 | r = (lng *) Tloc(dst, 0); |
106 | |
107 | switch (ATOMstorage(b->ttype)) { |
108 | case TYPE_void: { |
109 | oid o = b->tseqbase; |
110 | if (is_oid_nil(o)) |
111 | for (BUN i = 0; i < n; i++) |
112 | r[i] = lng_nil; |
113 | else |
114 | for (BUN i = 0; i < n; i++) |
115 | r[i] = o + i; |
116 | break; |
117 | } |
118 | case TYPE_bte: { |
119 | const bte *restrict v = (const bte *) Tloc(b, 0); |
120 | for (BUN i = 0; i < n; i++) |
121 | r[i] = MKEYHASH_bte(v + i); |
122 | break; |
123 | } |
124 | case TYPE_sht: { |
125 | const sht *restrict v = (const sht *) Tloc(b, 0); |
126 | for (BUN i = 0; i < n; i++) |
127 | r[i] = MKEYHASH_sht(v + i); |
128 | break; |
129 | } |
130 | case TYPE_int: |
131 | case TYPE_flt: { |
132 | const int *restrict v = (const int *) Tloc(b, 0); |
133 | for (BUN i = 0; i < n; i++) |
134 | r[i] = MKEYHASH_int(v + i); |
135 | break; |
136 | } |
137 | case TYPE_lng: |
138 | case TYPE_dbl: { |
139 | const lng *restrict v = (const lng *) Tloc(b, 0); |
140 | for (BUN i = 0; i < n; i++) |
141 | r[i] = MKEYHASH_lng(v + i); |
142 | break; |
143 | } |
144 | #ifdef HAVE_HGE |
145 | case TYPE_hge: { |
146 | const hge *restrict v = (const hge *) Tloc(b, 0); |
147 | for (BUN i = 0; i < n; i++) |
148 | r[i] = MKEYHASH_hge(v + i); |
149 | break; |
150 | } |
151 | #endif |
152 | default: { |
153 | BATiter bi = bat_iterator(b); |
154 | BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash; |
155 | int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype); |
156 | const void *nil = ATOMnilptr(b->ttype); |
157 | |
158 | for (BUN i = 0; i < n; i++) { |
159 | const void *restrict v = BUNtail(bi, i); |
160 | if ((*cmp)(v, nil) == 0) |
161 | r[i] = lng_nil; |
162 | else |
163 | r[i] = (lng) (*hash)(v); |
164 | } |
165 | break; |
166 | } |
167 | } |
168 | |
169 | if (dst->batCount <= 1) { |
170 | BATkey(dst, true); |
171 | dst->tsorted = dst->trevsorted = true; |
172 | } else { |
173 | BATkey(dst, false); |
174 | dst->tsorted = dst->trevsorted = false; |
175 | } |
176 | dst->tnonil = false; |
177 | dst->tnil = false; |
178 | |
179 | BBPkeepref(*res = dst->batCacheid); |
180 | BBPunfix(b->batCacheid); |
181 | return MAL_SUCCEED; |
182 | } |
183 | |
184 | str |
185 | MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
186 | { |
187 | lng *dst = getArgReference_lng(stk, p, 0); |
188 | lng h = *getArgReference_lng(stk, p, 1); |
189 | int lbit = *getArgReference_int(stk, p, 2); |
190 | int rbit = (int) sizeof(lng) * 8 - lbit; |
191 | int tpe = getArgType(mb, p, 3); |
192 | ptr *pval = getArgReference(stk, p, 3); |
193 | lng val; |
194 | |
195 | (void) cntxt; |
196 | switch (ATOMstorage(tpe)) { |
197 | case TYPE_bte: |
198 | val = MKEYHASH_bte(pval); |
199 | break; |
200 | case TYPE_sht: |
201 | val = MKEYHASH_sht(pval); |
202 | break; |
203 | case TYPE_int: |
204 | case TYPE_flt: |
205 | val = MKEYHASH_int(pval); |
206 | break; |
207 | case TYPE_lng: |
208 | case TYPE_dbl: |
209 | val = MKEYHASH_lng(pval); |
210 | break; |
211 | #ifdef HAVE_HGE |
212 | case TYPE_hge: |
213 | val = MKEYHASH_hge(pval); |
214 | break; |
215 | #endif |
216 | default: |
217 | if (ATOMextern(tpe)) |
218 | val = ATOMhash(tpe, *(ptr*)pval); |
219 | else |
220 | val = ATOMhash(tpe, pval); |
221 | break; |
222 | } |
223 | *dst = GDK_ROTATE(h, lbit, rbit) ^ val; |
224 | return MAL_SUCCEED; |
225 | } |
226 | |
227 | str |
228 | MKEYbulk_rotate_xor_hash(bat *res, const bat *hid, const int *nbits, const bat *bid) |
229 | { |
230 | BAT *hb, *b, *bn; |
231 | int lbit = *nbits; |
232 | int rbit = (int) sizeof(lng) * 8 - lbit; |
233 | lng *restrict r; |
234 | const lng *restrict h; |
235 | BUN n; |
236 | |
237 | if ((hb = BATdescriptor(*hid)) == NULL) |
238 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
239 | |
240 | if ((b = BATdescriptor(*bid)) == NULL) { |
241 | BBPunfix(hb->batCacheid); |
242 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
243 | } |
244 | |
245 | if (!ALIGNsynced(hb, b) && (BATcount(b) || BATcount(hb))) { |
246 | BBPunfix(hb->batCacheid); |
247 | BBPunfix(b->batCacheid); |
248 | throw(MAL, "mkey.rotate_xor_hash" , |
249 | OPERATION_FAILED ": input bats are not aligned" ); |
250 | } |
251 | |
252 | n = BATcount(b); |
253 | |
254 | bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT); |
255 | if (bn == NULL) { |
256 | BBPunfix(hb->batCacheid); |
257 | BBPunfix(b->batCacheid); |
258 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
259 | } |
260 | BATsetcount(bn, n); |
261 | |
262 | r = (lng *) Tloc(bn, 0); |
263 | h = (const lng *) Tloc(hb, 0); |
264 | |
265 | switch (ATOMstorage(b->ttype)) { |
266 | case TYPE_bte: { |
267 | const bte *restrict v = (const bte *) Tloc(b, 0); |
268 | for (BUN i = 0; i < n; i++) |
269 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_bte(v + i); |
270 | break; |
271 | } |
272 | case TYPE_sht: { |
273 | const sht *restrict v = (const sht *) Tloc(b, 0); |
274 | for (BUN i = 0; i < n; i++) |
275 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_sht(v + i); |
276 | break; |
277 | } |
278 | case TYPE_int: |
279 | case TYPE_flt: { |
280 | const int *restrict v = (const int *) Tloc(b, 0); |
281 | for (BUN i = 0; i < n; i++) |
282 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_int(v + i); |
283 | break; |
284 | } |
285 | case TYPE_lng: |
286 | case TYPE_dbl: { |
287 | const lng *restrict v = (const lng *) Tloc(b, 0); |
288 | for (BUN i = 0; i < n; i++) |
289 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_lng(v + i); |
290 | break; |
291 | } |
292 | #ifdef HAVE_HGE |
293 | case TYPE_hge: { |
294 | const hge *restrict v = (const hge *) Tloc(b, 0); |
295 | for (BUN i = 0; i < n; i++) |
296 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_hge(v + i); |
297 | break; |
298 | } |
299 | #endif |
300 | case TYPE_str: |
301 | if (b->tvheap->hashash) { |
302 | BATiter bi = bat_iterator(b); |
303 | for (BUN i = 0; i < n; i++) { |
304 | const void *restrict s = BUNtvar(bi, i); |
305 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (lng) ((const BUN *) s)[-1]; |
306 | } |
307 | break; |
308 | } |
309 | /* fall through */ |
310 | default: { |
311 | BATiter bi = bat_iterator(b); |
312 | BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash; |
313 | |
314 | for (BUN i = 0; i < n; i++) |
315 | r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (lng) (*hash)(BUNtail(bi, i)); |
316 | break; |
317 | } |
318 | } |
319 | if (bn->batCount <= 1) { |
320 | BATkey(bn, true); |
321 | bn->tsorted = bn->trevsorted = true; |
322 | } else { |
323 | BATkey(bn, false); |
324 | bn->tsorted = bn->trevsorted = false; |
325 | } |
326 | bn->tnonil = false; |
327 | bn->tnil = false; |
328 | |
329 | BBPkeepref(*res = bn->batCacheid); |
330 | BBPunfix(b->batCacheid); |
331 | BBPunfix(hb->batCacheid); |
332 | return MAL_SUCCEED; |
333 | } |
334 | |
335 | str |
336 | MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
337 | { |
338 | bat *res = getArgReference_bat(stk, p, 0); |
339 | bat *hid = getArgReference_bat(stk, p, 1); |
340 | int lbit = *getArgReference_int(stk, p, 2); |
341 | int tpe = getArgType(mb, p, 3); |
342 | ptr *pval = getArgReference(stk, p, 3); |
343 | BAT *hb, *bn; |
344 | int rbit = (int) sizeof(lng) * 8 - lbit; |
345 | lng *r; |
346 | const lng *h; |
347 | lng val; |
348 | BUN n; |
349 | |
350 | (void) cntxt; |
351 | |
352 | if ((hb = BATdescriptor(*hid)) == NULL) |
353 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
354 | |
355 | n = BATcount(hb); |
356 | |
357 | bn = COLnew(hb->hseqbase, TYPE_lng, n, TRANSIENT); |
358 | if (bn == NULL) { |
359 | BBPunfix(hb->batCacheid); |
360 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
361 | } |
362 | BATsetcount(bn, n); |
363 | |
364 | switch (ATOMstorage(tpe)) { |
365 | case TYPE_bte: |
366 | val = MKEYHASH_bte(pval); |
367 | break; |
368 | case TYPE_sht: |
369 | val = MKEYHASH_sht(pval); |
370 | break; |
371 | case TYPE_int: |
372 | case TYPE_flt: |
373 | val = MKEYHASH_int(pval); |
374 | break; |
375 | case TYPE_lng: |
376 | case TYPE_dbl: |
377 | val = MKEYHASH_lng(pval); |
378 | break; |
379 | #ifdef HAVE_HGE |
380 | case TYPE_hge: |
381 | val = MKEYHASH_hge(pval); |
382 | break; |
383 | #endif |
384 | default: |
385 | if (ATOMextern(tpe)) |
386 | val = ATOMhash(tpe, *(ptr*)pval); |
387 | else |
388 | val = ATOMhash(tpe, pval); |
389 | break; |
390 | } |
391 | |
392 | r = (lng *) Tloc(bn, 0); |
393 | h = (const lng *) Tloc(hb, 0); |
394 | |
395 | while (n-- > 0) { |
396 | *r++ = GDK_ROTATE(*h, lbit, rbit) ^ val; |
397 | h++; |
398 | } |
399 | |
400 | if (bn->batCount <= 1) { |
401 | BATkey(bn, true); |
402 | bn->tsorted = bn->trevsorted = true; |
403 | } else { |
404 | BATkey(bn, false); |
405 | bn->tsorted = bn->trevsorted = false; |
406 | } |
407 | bn->tnonil = false; |
408 | bn->tnil = false; |
409 | |
410 | BBPkeepref(*res = bn->batCacheid); |
411 | BBPunfix(hb->batCacheid); |
412 | return MAL_SUCCEED; |
413 | } |
414 | |
415 | str |
416 | MKEYconstbulk_rotate_xor_hash(bat *res, const lng *h, const int *nbits, const bat *bid) |
417 | { |
418 | BAT *b, *bn; |
419 | int lbit = *nbits; |
420 | int rbit = (int) sizeof(lng) * 8 - lbit; |
421 | lng *restrict r; |
422 | BUN n; |
423 | |
424 | if ((b = BATdescriptor(*bid)) == NULL) |
425 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
426 | |
427 | n = BATcount(b); |
428 | |
429 | bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT); |
430 | if (bn == NULL) { |
431 | BBPunfix(b->batCacheid); |
432 | throw(MAL, "mkey.rotate_xor_hash" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
433 | } |
434 | BATsetcount(bn, n); |
435 | |
436 | r = (lng *) Tloc(bn, 0); |
437 | |
438 | switch (ATOMstorage(b->ttype)) { |
439 | case TYPE_bte: { |
440 | const bte *restrict v = (const bte *) Tloc(b, 0); |
441 | for (BUN i = 0; i < n; i++) |
442 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_bte(v + i); |
443 | break; |
444 | } |
445 | case TYPE_sht: { |
446 | const sht *restrict v = (const sht *) Tloc(b, 0); |
447 | for (BUN i = 0; i < n; i++) |
448 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_sht(v + i); |
449 | break; |
450 | } |
451 | case TYPE_int: |
452 | case TYPE_flt: { |
453 | const int *restrict v = (const int *) Tloc(b, 0); |
454 | for (BUN i = 0; i < n; i++) |
455 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_int(v + i); |
456 | break; |
457 | } |
458 | case TYPE_lng: |
459 | case TYPE_dbl: { |
460 | const lng *restrict v = (const lng *) Tloc(b, 0); |
461 | for (BUN i = 0; i < n; i++) |
462 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_lng(v + i); |
463 | break; |
464 | } |
465 | #ifdef HAVE_HGE |
466 | case TYPE_hge: { |
467 | const hge *restrict v = (const hge *) Tloc(b, 0); |
468 | for (BUN i = 0; i < n; i++) |
469 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_hge(v + i); |
470 | break; |
471 | } |
472 | #endif |
473 | case TYPE_str: |
474 | if (b->tvheap->hashash) { |
475 | BATiter bi = bat_iterator(b); |
476 | for (BUN i = 0; i < n; i++) { |
477 | const char *restrict s = BUNtvar(bi, i); |
478 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ (lng) ((const BUN *) s)[-1]; |
479 | } |
480 | break; |
481 | } |
482 | /* fall through */ |
483 | default: { |
484 | BATiter bi = bat_iterator(b); |
485 | BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash; |
486 | |
487 | for (BUN i = 0; i < n; i++) |
488 | r[i] = GDK_ROTATE(*h, lbit, rbit) ^ (lng) (*hash)(BUNtail(bi, i)); |
489 | break; |
490 | } |
491 | } |
492 | if (bn->batCount <= 1) { |
493 | BATkey(bn, true); |
494 | bn->tsorted = bn->trevsorted = true; |
495 | } else { |
496 | BATkey(bn, false); |
497 | bn->tsorted = bn->trevsorted = false; |
498 | } |
499 | bn->tnonil = false; |
500 | bn->tnil = false; |
501 | |
502 | BBPkeepref(*res = bn->batCacheid); |
503 | BBPunfix(b->batCacheid); |
504 | return MAL_SUCCEED; |
505 | } |
506 | |