1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * (c) Peter Boncz, Stefan Manegold, Niels Nes
11 *
12 * new functionality for the low-resource-consumption. It will
13 * first one by one create a hash value out of the multiple attributes.
14 * This hash value is computed by xoring and rotating individual hash
15 * values together. We create a hash and rotate command to do this.
16 */
17#include "monetdb_config.h"
18#include "mkey.h"
19
20#define MKEYHASH_bte(valp) ((lng) *(const bte*)(valp))
21#define MKEYHASH_sht(valp) ((lng) *(const sht*)(valp))
22#define MKEYHASH_int(valp) ((lng) *(const int*)(valp))
23#define MKEYHASH_lng(valp) ((lng) *(const lng*)(valp))
24#ifdef HAVE_HGE
25#define MKEYHASH_hge(valp) (((const lng*)(valp))[0] ^ ((const lng*)(valp))[1])
26#endif
27
28static inline lng
29GDK_ROTATE(lng x, int y, int z)
30{
31 return (lng) (((ulng) x << y) | ((ulng) x >> z));
32}
33
34/* TODO: nil handling. however; we do not want to lose time in bulk_rotate_xor_hash with that */
35str
36MKEYrotate(lng *res, const lng *val, const int *n)
37{
38 *res = GDK_ROTATE(*val, *n, (sizeof(lng)*8) - *n);
39 return MAL_SUCCEED;
40}
41
42str
43MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
44{
45 lng *res;
46 ptr val;
47 int tpe = getArgType(mb,p,1);
48
49 (void) cntxt;
50 res= getArgReference_lng(stk,p,0);
51 val= getArgReference(stk,p,1);
52 switch (ATOMstorage(tpe)) {
53 case TYPE_void:
54 case TYPE_bat:
55 case TYPE_ptr:
56 // illegal types, avoid falling into the default case.
57 assert(0);
58 case TYPE_bte:
59 *res = MKEYHASH_bte(val);
60 break;
61 case TYPE_sht:
62 *res = MKEYHASH_sht(val);
63 break;
64 case TYPE_int:
65 case TYPE_flt:
66 *res = MKEYHASH_int(val);
67 break;
68 case TYPE_lng:
69 case TYPE_dbl:
70 *res = MKEYHASH_lng(val);
71 break;
72#ifdef HAVE_HGE
73 case TYPE_hge:
74 *res = MKEYHASH_hge(val);
75 break;
76#endif
77 default:
78 if (ATOMextern(tpe))
79 *res = ATOMhash(tpe, *(ptr*)val);
80 else
81 *res = ATOMhash(tpe, val);
82 break;
83 }
84 return MAL_SUCCEED;
85}
86
87str
88MKEYbathash(bat *res, const bat *bid)
89{
90 BAT *b, *dst;
91 lng *restrict r;
92 BUN n;
93
94 if ((b = BATdescriptor(*bid)) == NULL)
95 throw(SQL, "mkey.bathash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
96
97 n = BATcount(b);
98 dst = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
99 if (dst == NULL) {
100 BBPunfix(b->batCacheid);
101 throw(SQL, "mkey.bathash", SQLSTATE(HY001) MAL_MALLOC_FAIL);
102 }
103 BATsetcount(dst, n);
104
105 r = (lng *) Tloc(dst, 0);
106
107 switch (ATOMstorage(b->ttype)) {
108 case TYPE_void: {
109 oid o = b->tseqbase;
110 if (is_oid_nil(o))
111 for (BUN i = 0; i < n; i++)
112 r[i] = lng_nil;
113 else
114 for (BUN i = 0; i < n; i++)
115 r[i] = o + i;
116 break;
117 }
118 case TYPE_bte: {
119 const bte *restrict v = (const bte *) Tloc(b, 0);
120 for (BUN i = 0; i < n; i++)
121 r[i] = MKEYHASH_bte(v + i);
122 break;
123 }
124 case TYPE_sht: {
125 const sht *restrict v = (const sht *) Tloc(b, 0);
126 for (BUN i = 0; i < n; i++)
127 r[i] = MKEYHASH_sht(v + i);
128 break;
129 }
130 case TYPE_int:
131 case TYPE_flt: {
132 const int *restrict v = (const int *) Tloc(b, 0);
133 for (BUN i = 0; i < n; i++)
134 r[i] = MKEYHASH_int(v + i);
135 break;
136 }
137 case TYPE_lng:
138 case TYPE_dbl: {
139 const lng *restrict v = (const lng *) Tloc(b, 0);
140 for (BUN i = 0; i < n; i++)
141 r[i] = MKEYHASH_lng(v + i);
142 break;
143 }
144#ifdef HAVE_HGE
145 case TYPE_hge: {
146 const hge *restrict v = (const hge *) Tloc(b, 0);
147 for (BUN i = 0; i < n; i++)
148 r[i] = MKEYHASH_hge(v + i);
149 break;
150 }
151#endif
152 default: {
153 BATiter bi = bat_iterator(b);
154 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
155 int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
156 const void *nil = ATOMnilptr(b->ttype);
157
158 for (BUN i = 0; i < n; i++) {
159 const void *restrict v = BUNtail(bi, i);
160 if ((*cmp)(v, nil) == 0)
161 r[i] = lng_nil;
162 else
163 r[i] = (lng) (*hash)(v);
164 }
165 break;
166 }
167 }
168
169 if (dst->batCount <= 1) {
170 BATkey(dst, true);
171 dst->tsorted = dst->trevsorted = true;
172 } else {
173 BATkey(dst, false);
174 dst->tsorted = dst->trevsorted = false;
175 }
176 dst->tnonil = false;
177 dst->tnil = false;
178
179 BBPkeepref(*res = dst->batCacheid);
180 BBPunfix(b->batCacheid);
181 return MAL_SUCCEED;
182}
183
184str
185MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
186{
187 lng *dst = getArgReference_lng(stk, p, 0);
188 lng h = *getArgReference_lng(stk, p, 1);
189 int lbit = *getArgReference_int(stk, p, 2);
190 int rbit = (int) sizeof(lng) * 8 - lbit;
191 int tpe = getArgType(mb, p, 3);
192 ptr *pval = getArgReference(stk, p, 3);
193 lng val;
194
195 (void) cntxt;
196 switch (ATOMstorage(tpe)) {
197 case TYPE_bte:
198 val = MKEYHASH_bte(pval);
199 break;
200 case TYPE_sht:
201 val = MKEYHASH_sht(pval);
202 break;
203 case TYPE_int:
204 case TYPE_flt:
205 val = MKEYHASH_int(pval);
206 break;
207 case TYPE_lng:
208 case TYPE_dbl:
209 val = MKEYHASH_lng(pval);
210 break;
211#ifdef HAVE_HGE
212 case TYPE_hge:
213 val = MKEYHASH_hge(pval);
214 break;
215#endif
216 default:
217 if (ATOMextern(tpe))
218 val = ATOMhash(tpe, *(ptr*)pval);
219 else
220 val = ATOMhash(tpe, pval);
221 break;
222 }
223 *dst = GDK_ROTATE(h, lbit, rbit) ^ val;
224 return MAL_SUCCEED;
225}
226
227str
228MKEYbulk_rotate_xor_hash(bat *res, const bat *hid, const int *nbits, const bat *bid)
229{
230 BAT *hb, *b, *bn;
231 int lbit = *nbits;
232 int rbit = (int) sizeof(lng) * 8 - lbit;
233 lng *restrict r;
234 const lng *restrict h;
235 BUN n;
236
237 if ((hb = BATdescriptor(*hid)) == NULL)
238 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
239
240 if ((b = BATdescriptor(*bid)) == NULL) {
241 BBPunfix(hb->batCacheid);
242 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
243 }
244
245 if (!ALIGNsynced(hb, b) && (BATcount(b) || BATcount(hb))) {
246 BBPunfix(hb->batCacheid);
247 BBPunfix(b->batCacheid);
248 throw(MAL, "mkey.rotate_xor_hash",
249 OPERATION_FAILED ": input bats are not aligned");
250 }
251
252 n = BATcount(b);
253
254 bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
255 if (bn == NULL) {
256 BBPunfix(hb->batCacheid);
257 BBPunfix(b->batCacheid);
258 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY001) MAL_MALLOC_FAIL);
259 }
260 BATsetcount(bn, n);
261
262 r = (lng *) Tloc(bn, 0);
263 h = (const lng *) Tloc(hb, 0);
264
265 switch (ATOMstorage(b->ttype)) {
266 case TYPE_bte: {
267 const bte *restrict v = (const bte *) Tloc(b, 0);
268 for (BUN i = 0; i < n; i++)
269 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_bte(v + i);
270 break;
271 }
272 case TYPE_sht: {
273 const sht *restrict v = (const sht *) Tloc(b, 0);
274 for (BUN i = 0; i < n; i++)
275 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_sht(v + i);
276 break;
277 }
278 case TYPE_int:
279 case TYPE_flt: {
280 const int *restrict v = (const int *) Tloc(b, 0);
281 for (BUN i = 0; i < n; i++)
282 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_int(v + i);
283 break;
284 }
285 case TYPE_lng:
286 case TYPE_dbl: {
287 const lng *restrict v = (const lng *) Tloc(b, 0);
288 for (BUN i = 0; i < n; i++)
289 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_lng(v + i);
290 break;
291 }
292#ifdef HAVE_HGE
293 case TYPE_hge: {
294 const hge *restrict v = (const hge *) Tloc(b, 0);
295 for (BUN i = 0; i < n; i++)
296 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_hge(v + i);
297 break;
298 }
299#endif
300 case TYPE_str:
301 if (b->tvheap->hashash) {
302 BATiter bi = bat_iterator(b);
303 for (BUN i = 0; i < n; i++) {
304 const void *restrict s = BUNtvar(bi, i);
305 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (lng) ((const BUN *) s)[-1];
306 }
307 break;
308 }
309 /* fall through */
310 default: {
311 BATiter bi = bat_iterator(b);
312 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
313
314 for (BUN i = 0; i < n; i++)
315 r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (lng) (*hash)(BUNtail(bi, i));
316 break;
317 }
318 }
319 if (bn->batCount <= 1) {
320 BATkey(bn, true);
321 bn->tsorted = bn->trevsorted = true;
322 } else {
323 BATkey(bn, false);
324 bn->tsorted = bn->trevsorted = false;
325 }
326 bn->tnonil = false;
327 bn->tnil = false;
328
329 BBPkeepref(*res = bn->batCacheid);
330 BBPunfix(b->batCacheid);
331 BBPunfix(hb->batCacheid);
332 return MAL_SUCCEED;
333}
334
335str
336MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
337{
338 bat *res = getArgReference_bat(stk, p, 0);
339 bat *hid = getArgReference_bat(stk, p, 1);
340 int lbit = *getArgReference_int(stk, p, 2);
341 int tpe = getArgType(mb, p, 3);
342 ptr *pval = getArgReference(stk, p, 3);
343 BAT *hb, *bn;
344 int rbit = (int) sizeof(lng) * 8 - lbit;
345 lng *r;
346 const lng *h;
347 lng val;
348 BUN n;
349
350 (void) cntxt;
351
352 if ((hb = BATdescriptor(*hid)) == NULL)
353 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
354
355 n = BATcount(hb);
356
357 bn = COLnew(hb->hseqbase, TYPE_lng, n, TRANSIENT);
358 if (bn == NULL) {
359 BBPunfix(hb->batCacheid);
360 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY001) MAL_MALLOC_FAIL);
361 }
362 BATsetcount(bn, n);
363
364 switch (ATOMstorage(tpe)) {
365 case TYPE_bte:
366 val = MKEYHASH_bte(pval);
367 break;
368 case TYPE_sht:
369 val = MKEYHASH_sht(pval);
370 break;
371 case TYPE_int:
372 case TYPE_flt:
373 val = MKEYHASH_int(pval);
374 break;
375 case TYPE_lng:
376 case TYPE_dbl:
377 val = MKEYHASH_lng(pval);
378 break;
379#ifdef HAVE_HGE
380 case TYPE_hge:
381 val = MKEYHASH_hge(pval);
382 break;
383#endif
384 default:
385 if (ATOMextern(tpe))
386 val = ATOMhash(tpe, *(ptr*)pval);
387 else
388 val = ATOMhash(tpe, pval);
389 break;
390 }
391
392 r = (lng *) Tloc(bn, 0);
393 h = (const lng *) Tloc(hb, 0);
394
395 while (n-- > 0) {
396 *r++ = GDK_ROTATE(*h, lbit, rbit) ^ val;
397 h++;
398 }
399
400 if (bn->batCount <= 1) {
401 BATkey(bn, true);
402 bn->tsorted = bn->trevsorted = true;
403 } else {
404 BATkey(bn, false);
405 bn->tsorted = bn->trevsorted = false;
406 }
407 bn->tnonil = false;
408 bn->tnil = false;
409
410 BBPkeepref(*res = bn->batCacheid);
411 BBPunfix(hb->batCacheid);
412 return MAL_SUCCEED;
413}
414
415str
416MKEYconstbulk_rotate_xor_hash(bat *res, const lng *h, const int *nbits, const bat *bid)
417{
418 BAT *b, *bn;
419 int lbit = *nbits;
420 int rbit = (int) sizeof(lng) * 8 - lbit;
421 lng *restrict r;
422 BUN n;
423
424 if ((b = BATdescriptor(*bid)) == NULL)
425 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
426
427 n = BATcount(b);
428
429 bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
430 if (bn == NULL) {
431 BBPunfix(b->batCacheid);
432 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY001) MAL_MALLOC_FAIL);
433 }
434 BATsetcount(bn, n);
435
436 r = (lng *) Tloc(bn, 0);
437
438 switch (ATOMstorage(b->ttype)) {
439 case TYPE_bte: {
440 const bte *restrict v = (const bte *) Tloc(b, 0);
441 for (BUN i = 0; i < n; i++)
442 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_bte(v + i);
443 break;
444 }
445 case TYPE_sht: {
446 const sht *restrict v = (const sht *) Tloc(b, 0);
447 for (BUN i = 0; i < n; i++)
448 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_sht(v + i);
449 break;
450 }
451 case TYPE_int:
452 case TYPE_flt: {
453 const int *restrict v = (const int *) Tloc(b, 0);
454 for (BUN i = 0; i < n; i++)
455 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_int(v + i);
456 break;
457 }
458 case TYPE_lng:
459 case TYPE_dbl: {
460 const lng *restrict v = (const lng *) Tloc(b, 0);
461 for (BUN i = 0; i < n; i++)
462 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_lng(v + i);
463 break;
464 }
465#ifdef HAVE_HGE
466 case TYPE_hge: {
467 const hge *restrict v = (const hge *) Tloc(b, 0);
468 for (BUN i = 0; i < n; i++)
469 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ MKEYHASH_hge(v + i);
470 break;
471 }
472#endif
473 case TYPE_str:
474 if (b->tvheap->hashash) {
475 BATiter bi = bat_iterator(b);
476 for (BUN i = 0; i < n; i++) {
477 const char *restrict s = BUNtvar(bi, i);
478 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ (lng) ((const BUN *) s)[-1];
479 }
480 break;
481 }
482 /* fall through */
483 default: {
484 BATiter bi = bat_iterator(b);
485 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
486
487 for (BUN i = 0; i < n; i++)
488 r[i] = GDK_ROTATE(*h, lbit, rbit) ^ (lng) (*hash)(BUNtail(bi, i));
489 break;
490 }
491 }
492 if (bn->batCount <= 1) {
493 BATkey(bn, true);
494 bn->tsorted = bn->trevsorted = true;
495 } else {
496 BATkey(bn, false);
497 bn->tsorted = bn->trevsorted = false;
498 }
499 bn->tnonil = false;
500 bn->tnil = false;
501
502 BBPkeepref(*res = bn->batCacheid);
503 BBPunfix(b->batCacheid);
504 return MAL_SUCCEED;
505}
506