1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "gdk.h" |
11 | #include "gdk_private.h" |
12 | #include "gdk_calc_private.h" |
13 | #include <math.h> |
14 | |
15 | /* grouped aggregates |
16 | * |
17 | * The following functions take two to four input BATs and produce a |
18 | * single output BAT. |
19 | * |
20 | * The input BATs are |
21 | * - b, a dense-headed BAT with the values to work on in the tail; |
22 | * - g, a dense-headed BAT, aligned with b, with group ids (OID) in |
23 | * the tail; |
24 | * - e, optional but recommended, a dense-headed BAT with the list of |
25 | * group ids in the head(!) (the tail is completely ignored); |
26 | * - s, optional, a dense-headed bat with a list of candidate ids in |
27 | * the tail. |
28 | * |
29 | * The tail values of s refer to the head of b and g. Only entries at |
30 | * the specified ids are taken into account for the grouped |
31 | * aggregates. All other values are ignored. s is compatible with |
32 | * the result of BATselect(). |
33 | * |
34 | * If e is not specified, we need to do an extra scan over g to find |
35 | * out the range of the group ids that are used. e is defined in such |
36 | * a way that it can be either the extents or the histo result from |
37 | * BATgroups(). |
38 | * |
39 | * All functions calculate grouped aggregates. There are as many |
40 | * groups as there are entries in e. If e is not specified, the |
41 | * number of groups is equal to the difference between the maximum and |
42 | * minimum values in g. |
43 | * |
44 | * If a group is empty, the result for that group is nil. |
45 | * |
46 | * If there is overflow during the calculation of an aggregate, the |
47 | * whole operation fails if abort_on_error is set to non-zero, |
48 | * otherwise the result of the group in which the overflow occurred is |
49 | * nil. |
50 | * |
51 | * If skip_nils is non-zero, a nil value in b is ignored, otherwise a |
52 | * nil in b results in a nil result for the group. |
53 | */ |
54 | |
55 | /* helper function |
56 | * |
57 | * This function finds the minimum and maximum group id (and the |
58 | * number of groups) and initializes the variables for candidates |
59 | * selection. |
60 | */ |
61 | const char * |
62 | BATgroupaggrinit(BAT *b, BAT *g, BAT *e, BAT *s, |
63 | /* outputs: */ |
64 | oid *minp, oid *maxp, BUN *ngrpp, |
65 | struct canditer *ci, BUN *ncand) |
66 | { |
67 | oid min, max; |
68 | BUN i, ngrp; |
69 | const oid *restrict gids; |
70 | |
71 | if (b == NULL) |
72 | return "b must exist" ; |
73 | if (g) { |
74 | if (BATcount(b) != BATcount(g) || |
75 | (BATcount(b) != 0 && b->hseqbase != g->hseqbase)) |
76 | return "b and g must be aligned" ; |
77 | assert(BATttype(g) == TYPE_oid); |
78 | } |
79 | if (g == NULL) { |
80 | min = 0; |
81 | max = 0; |
82 | ngrp = 1; |
83 | } else if (e == NULL) { |
84 | /* we need to find out the min and max of g */ |
85 | PROPrec *prop; |
86 | |
87 | prop = BATgetprop(g, GDK_MAX_VALUE); |
88 | if (prop) { |
89 | assert(prop->v.vtype == TYPE_oid); |
90 | min = 0; /* just assume it starts at 0 */ |
91 | max = prop->v.val.oval; |
92 | } else { |
93 | min = oid_nil; /* note that oid_nil > 0! (unsigned) */ |
94 | max = 0; |
95 | if (BATtdense(g)) { |
96 | min = g->tseqbase; |
97 | max = g->tseqbase + BATcount(g) - 1; |
98 | } else if (g->tsorted) { |
99 | gids = (const oid *) Tloc(g, 0); |
100 | /* find first non-nil */ |
101 | for (i = 0, ngrp = BATcount(g); i < ngrp; i++) { |
102 | if (!is_oid_nil(gids[i])) { |
103 | min = gids[i]; |
104 | break; |
105 | } |
106 | } |
107 | if (!is_oid_nil(min)) { |
108 | /* found a non-nil, max must be last |
109 | * value (and there is one!) */ |
110 | max = gids[BUNlast(g) - 1]; |
111 | } |
112 | } else { |
113 | /* we'll do a complete scan */ |
114 | gids = (const oid *) Tloc(g, 0); |
115 | for (i = 0, ngrp = BATcount(g); i < ngrp; i++) { |
116 | if (!is_oid_nil(gids[i])) { |
117 | if (gids[i] < min) |
118 | min = gids[i]; |
119 | if (gids[i] > max) |
120 | max = gids[i]; |
121 | } |
122 | } |
123 | /* note: max < min is possible if all groups |
124 | * are nil (or BATcount(g)==0) */ |
125 | } |
126 | } |
127 | ngrp = max < min ? 0 : max - min + 1; |
128 | } else { |
129 | ngrp = BATcount(e); |
130 | min = e->hseqbase; |
131 | max = e->hseqbase + ngrp - 1; |
132 | } |
133 | *minp = min; |
134 | *maxp = max; |
135 | *ngrpp = ngrp; |
136 | |
137 | *ncand = canditer_init(ci, b, s); |
138 | |
139 | return NULL; |
140 | } |
141 | |
142 | /* ---------------------------------------------------------------------- */ |
143 | /* sum */ |
144 | |
145 | static inline bool |
146 | samesign(double x, double y) |
147 | { |
148 | return (x >= 0) == (y >= 0); |
149 | } |
150 | |
151 | /* Add two values, producing the sum and the remainder due to limited |
152 | * range of floating point arithmetic. This function depends on the |
153 | * fact that the sum returns INFINITY in *hi of the correct sign |
154 | * (i.e. isinf() returns TRUE) in case of overflow. */ |
155 | static inline void |
156 | twosum(volatile double *hi, volatile double *lo, double x, double y) |
157 | { |
158 | volatile double yr; |
159 | |
160 | assert(fabs(x) >= fabs(y)); |
161 | |
162 | *hi = x + y; |
163 | yr = *hi - x; |
164 | *lo = y - yr; |
165 | } |
166 | |
167 | static inline void |
168 | exchange(double *x, double *y) |
169 | { |
170 | double t = *x; |
171 | *x = *y; |
172 | *y = t; |
173 | } |
174 | |
175 | /* this function was adapted from https://bugs.python.org/file10357/msum4.py */ |
176 | BUN |
177 | dofsum(const void *restrict values, oid seqb, |
178 | struct canditer *restrict ci, BUN ncand, |
179 | void *restrict results, BUN ngrp, int tp1, int tp2, |
180 | const oid *restrict gids, |
181 | oid min, oid max, bool skip_nils, bool abort_on_error, |
182 | bool nil_if_empty) |
183 | { |
184 | struct pergroup { |
185 | int npartials; |
186 | int maxpartials; |
187 | bool valseen; |
188 | #ifdef INFINITES_ALLOWED |
189 | float infs; |
190 | #else |
191 | int infs; |
192 | #endif |
193 | double *partials; |
194 | } *pergroup; |
195 | BUN listi; |
196 | int parti; |
197 | int i; |
198 | BUN grp; |
199 | double x, y; |
200 | volatile double lo, hi; |
201 | double twopow = pow((double) FLT_RADIX, (double) (DBL_MAX_EXP - 1)); |
202 | BUN nils = 0; |
203 | volatile flt f; |
204 | |
205 | /* we only deal with the two floating point types */ |
206 | assert(tp1 == TYPE_flt || tp1 == TYPE_dbl); |
207 | assert(tp2 == TYPE_flt || tp2 == TYPE_dbl); |
208 | /* if input is dbl, then so it output */ |
209 | assert(tp2 == TYPE_flt || tp1 == TYPE_dbl); |
210 | /* if no gids, then we have a single group */ |
211 | assert(ngrp == 1 || gids != NULL); |
212 | if (gids == NULL || ngrp == 1) { |
213 | min = max = 0; |
214 | ngrp = 1; |
215 | gids = NULL; |
216 | } |
217 | pergroup = GDKmalloc(ngrp * sizeof(*pergroup)); |
218 | if (pergroup == NULL) |
219 | return BUN_NONE; |
220 | for (grp = 0; grp < ngrp; grp++) { |
221 | pergroup[grp].npartials = 0; |
222 | pergroup[grp].valseen = false; |
223 | pergroup[grp].maxpartials = 2; |
224 | pergroup[grp].infs = 0; |
225 | pergroup[grp].partials = GDKmalloc(pergroup[grp].maxpartials * sizeof(double)); |
226 | if (pergroup[grp].partials == NULL) { |
227 | while (grp > 0) |
228 | GDKfree(pergroup[--grp].partials); |
229 | GDKfree(pergroup); |
230 | return BUN_NONE; |
231 | } |
232 | } |
233 | while (ncand > 0) { |
234 | ncand--; |
235 | listi = canditer_next(ci) - seqb; |
236 | grp = gids ? gids[listi] : 0; |
237 | if (grp < min || grp > max) |
238 | continue; |
239 | if (pergroup[grp].partials == NULL) |
240 | continue; |
241 | if (tp1 == TYPE_flt && !is_flt_nil(((const flt *) values)[listi])) |
242 | x = ((const flt *) values)[listi]; |
243 | else if (tp1 == TYPE_dbl && !is_dbl_nil(((const dbl *) values)[listi])) |
244 | x = ((const dbl *) values)[listi]; |
245 | else { |
246 | /* it's a nil */ |
247 | if (!skip_nils) { |
248 | if (tp2 == TYPE_flt) |
249 | ((flt *) results)[grp] = flt_nil; |
250 | else |
251 | ((dbl *) results)[grp] = dbl_nil; |
252 | GDKfree(pergroup[grp].partials); |
253 | pergroup[grp].partials = NULL; |
254 | if (++nils == ngrp) |
255 | break; |
256 | } |
257 | continue; |
258 | } |
259 | pergroup[grp].valseen = true; |
260 | #ifdef INFINITES_ALLOWED |
261 | if (isinf(x)) { |
262 | pergroup[grp].infs += x; |
263 | continue; |
264 | } |
265 | #endif |
266 | i = 0; |
267 | for (parti = 0; parti < pergroup[grp].npartials; parti++) { |
268 | y = pergroup[grp].partials[parti]; |
269 | if (fabs(x) < fabs(y)) |
270 | exchange(&x, &y); |
271 | twosum(&hi, &lo, x, y); |
272 | if (isinf(hi)) { |
273 | int sign = hi > 0 ? 1 : -1; |
274 | hi = x - twopow * sign; |
275 | x = hi - twopow * sign; |
276 | pergroup[grp].infs += sign; |
277 | if (fabs(x) < fabs(y)) |
278 | exchange(&x, &y); |
279 | twosum(&hi, &lo, x, y); |
280 | } |
281 | if (lo != 0) |
282 | pergroup[grp].partials[i++] = lo; |
283 | x = hi; |
284 | } |
285 | if (x != 0) { |
286 | if (i == pergroup[grp].maxpartials) { |
287 | double *temp; |
288 | pergroup[grp].maxpartials += pergroup[grp].maxpartials; |
289 | temp = GDKrealloc(pergroup[grp].partials, pergroup[grp].maxpartials * sizeof(double)); |
290 | if (temp == NULL) |
291 | goto bailout; |
292 | pergroup[grp].partials = temp; |
293 | } |
294 | pergroup[grp].partials[i++] = x; |
295 | } |
296 | pergroup[grp].npartials = i; |
297 | } |
298 | |
299 | for (grp = 0; grp < ngrp; grp++) { |
300 | if (pergroup[grp].partials == NULL) |
301 | continue; |
302 | if (!pergroup[grp].valseen) { |
303 | if (tp2 == TYPE_flt) |
304 | ((flt *) results)[grp] = nil_if_empty ? flt_nil : 0; |
305 | else |
306 | ((dbl *) results)[grp] = nil_if_empty ? dbl_nil : 0; |
307 | nils += nil_if_empty; |
308 | GDKfree(pergroup[grp].partials); |
309 | pergroup[grp].partials = NULL; |
310 | continue; |
311 | } |
312 | #ifdef INFINITES_ALLOWED |
313 | if (isinf(pergroup[grp].infs) || isnan(pergroup[grp].infs)) { |
314 | if (abort_on_error) { |
315 | goto overflow; |
316 | } |
317 | if (tp2 == TYPE_flt) |
318 | ((flt *) results)[grp] = flt_nil; |
319 | else |
320 | ((dbl *) results)[grp] = dbl_nil; |
321 | nils++; |
322 | GDKfree(pergroup[grp].partials); |
323 | pergroup[grp].partials = NULL; |
324 | continue; |
325 | } |
326 | #endif |
327 | |
328 | if ((pergroup[grp].infs == 1 || pergroup[grp].infs == -1) && |
329 | pergroup[grp].npartials > 0 && |
330 | !samesign(pergroup[grp].infs, pergroup[grp].partials[pergroup[grp].npartials - 1])) { |
331 | twosum(&hi, &lo, pergroup[grp].infs * twopow, pergroup[grp].partials[pergroup[grp].npartials - 1] / 2); |
332 | if (isinf(2 * hi)) { |
333 | y = 2 * lo; |
334 | x = hi + y; |
335 | x -= hi; |
336 | if (x == y && |
337 | pergroup[grp].npartials > 1 && |
338 | samesign(lo, pergroup[grp].partials[pergroup[grp].npartials - 2])) { |
339 | GDKfree(pergroup[grp].partials); |
340 | pergroup[grp].partials = NULL; |
341 | x = 2 * (hi + y); |
342 | if (tp2 == TYPE_flt) { |
343 | f = (flt) x; |
344 | if (isinf(f) || |
345 | isnan(f) || |
346 | is_flt_nil(f)) { |
347 | if (abort_on_error) |
348 | goto overflow; |
349 | ((flt *) results)[grp] = flt_nil; |
350 | nils++; |
351 | } else |
352 | ((flt *) results)[grp] = f; |
353 | } else if (is_dbl_nil(x)) { |
354 | if (abort_on_error) |
355 | goto overflow; |
356 | ((dbl *) results)[grp] = dbl_nil; |
357 | nils++; |
358 | } else |
359 | ((dbl *) results)[grp] = x; |
360 | continue; |
361 | } |
362 | } else { |
363 | if (lo) { |
364 | if (pergroup[grp].npartials == pergroup[grp].maxpartials) { |
365 | double *temp; |
366 | /* we need space for one more */ |
367 | pergroup[grp].maxpartials++; |
368 | temp = GDKrealloc(pergroup[grp].partials, pergroup[grp].maxpartials * sizeof(double)); |
369 | if (temp == NULL) |
370 | goto bailout; |
371 | pergroup[grp].partials = temp; |
372 | } |
373 | pergroup[grp].partials[pergroup[grp].npartials - 1] = 2 * lo; |
374 | pergroup[grp].partials[pergroup[grp].npartials++] = 2 * hi; |
375 | } else { |
376 | pergroup[grp].partials[pergroup[grp].npartials - 1] = 2 * hi; |
377 | } |
378 | pergroup[grp].infs = 0; |
379 | } |
380 | } |
381 | |
382 | if (pergroup[grp].infs != 0) |
383 | goto overflow; |
384 | |
385 | if (pergroup[grp].npartials == 0) { |
386 | GDKfree(pergroup[grp].partials); |
387 | pergroup[grp].partials = NULL; |
388 | if (tp2 == TYPE_flt) |
389 | ((flt *) results)[grp] = 0; |
390 | else |
391 | ((dbl *) results)[grp] = 0; |
392 | continue; |
393 | } |
394 | |
395 | /* accumulate into hi */ |
396 | hi = pergroup[grp].partials[--pergroup[grp].npartials]; |
397 | while (pergroup[grp].npartials > 0) { |
398 | twosum(&hi, &lo, hi, pergroup[grp].partials[--pergroup[grp].npartials]); |
399 | if (lo) { |
400 | pergroup[grp].partials[pergroup[grp].npartials++] = lo; |
401 | break; |
402 | } |
403 | } |
404 | |
405 | if (pergroup[grp].npartials >= 2 && |
406 | samesign(pergroup[grp].partials[pergroup[grp].npartials - 1], pergroup[grp].partials[pergroup[grp].npartials - 2]) && |
407 | hi + 2 * pergroup[grp].partials[pergroup[grp].npartials - 1] - hi == 2 * pergroup[grp].partials[pergroup[grp].npartials - 1]) { |
408 | hi += 2 * pergroup[grp].partials[pergroup[grp].npartials - 1]; |
409 | pergroup[grp].partials[pergroup[grp].npartials - 1] = -pergroup[grp].partials[pergroup[grp].npartials - 1]; |
410 | } |
411 | |
412 | GDKfree(pergroup[grp].partials); |
413 | pergroup[grp].partials = NULL; |
414 | if (tp2 == TYPE_flt) { |
415 | f = (flt) hi; |
416 | if (isinf(f) || isnan(f) || is_flt_nil(f)) { |
417 | if (abort_on_error) |
418 | goto overflow; |
419 | ((flt *) results)[grp] = flt_nil; |
420 | nils++; |
421 | } else |
422 | ((flt *) results)[grp] = f; |
423 | } else if (is_dbl_nil(hi)) { |
424 | if (abort_on_error) |
425 | goto overflow; |
426 | ((dbl *) results)[grp] = dbl_nil; |
427 | nils++; |
428 | } else |
429 | ((dbl *) results)[grp] = hi; |
430 | } |
431 | GDKfree(pergroup); |
432 | return nils; |
433 | |
434 | overflow: |
435 | GDKerror("22003!overflow in calculation.\n" ); |
436 | bailout: |
437 | for (grp = 0; grp < ngrp; grp++) |
438 | GDKfree(pergroup[grp].partials); |
439 | GDKfree(pergroup); |
440 | return BUN_NONE; |
441 | } |
442 | |
443 | #define AGGR_SUM(TYPE1, TYPE2) \ |
444 | do { \ |
445 | TYPE1 x; \ |
446 | const TYPE1 *restrict vals = (const TYPE1 *) values; \ |
447 | if (ngrp == 1 && ci->tpe == cand_dense) { \ |
448 | /* single group, no candidate list */ \ |
449 | TYPE2 sum; \ |
450 | *algo = "no candidates, no groups"; \ |
451 | sum = 0; \ |
452 | if (nonil) { \ |
453 | *seen = ncand > 0; \ |
454 | for (i = 0; i < ncand && nils == 0; i++) { \ |
455 | x = vals[ci->seq + i - seqb]; \ |
456 | ADD_WITH_CHECK(x, sum, \ |
457 | TYPE2, sum, \ |
458 | GDK_##TYPE2##_max, \ |
459 | goto overflow); \ |
460 | } \ |
461 | } else { \ |
462 | bool seenval = false; \ |
463 | for (i = 0; i < ncand && nils == 0; i++) { \ |
464 | x = vals[ci->seq + i - seqb]; \ |
465 | if (is_##TYPE1##_nil(x)) { \ |
466 | if (!skip_nils) { \ |
467 | sum = TYPE2##_nil; \ |
468 | nils = 1; \ |
469 | } \ |
470 | } else { \ |
471 | ADD_WITH_CHECK(x, sum, \ |
472 | TYPE2, sum, \ |
473 | GDK_##TYPE2##_max, \ |
474 | goto overflow); \ |
475 | seenval = true; \ |
476 | } \ |
477 | } \ |
478 | *seen = seenval; \ |
479 | } \ |
480 | if (*seen) \ |
481 | *sums = sum; \ |
482 | } else if (ngrp == 1) { \ |
483 | /* single group, with candidate list */ \ |
484 | TYPE2 sum; \ |
485 | bool seenval = false; \ |
486 | *algo = "with candidates, no groups"; \ |
487 | sum = 0; \ |
488 | for (i = 0; i < ncand && nils == 0; i++) { \ |
489 | x = vals[canditer_next(ci) - seqb]; \ |
490 | if (is_##TYPE1##_nil(x)) { \ |
491 | if (!skip_nils) { \ |
492 | sum = TYPE2##_nil; \ |
493 | nils = 1; \ |
494 | } \ |
495 | } else { \ |
496 | ADD_WITH_CHECK(x, sum, \ |
497 | TYPE2, sum, \ |
498 | GDK_##TYPE2##_max, \ |
499 | goto overflow); \ |
500 | seenval = true; \ |
501 | } \ |
502 | } \ |
503 | if (seenval) \ |
504 | *sums = sum; \ |
505 | } else if (ci->tpe == cand_dense) { \ |
506 | /* multiple groups, no candidate list */ \ |
507 | *algo = "no candidates, with groups"; \ |
508 | for (i = 0; i < ncand; i++) { \ |
509 | if (gids == NULL || \ |
510 | (gids[i] >= min && gids[i] <= max)) { \ |
511 | gid = gids ? gids[i] - min : (oid) i; \ |
512 | x = vals[ci->seq + i - seqb]; \ |
513 | if (is_##TYPE1##_nil(x)) { \ |
514 | if (!skip_nils) { \ |
515 | sums[gid] = TYPE2##_nil; \ |
516 | nils++; \ |
517 | } \ |
518 | } else { \ |
519 | if (nil_if_empty && \ |
520 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
521 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
522 | sums[gid] = 0; \ |
523 | } \ |
524 | if (!is_##TYPE2##_nil(sums[gid])) { \ |
525 | ADD_WITH_CHECK( \ |
526 | x, \ |
527 | sums[gid], \ |
528 | TYPE2, \ |
529 | sums[gid], \ |
530 | GDK_##TYPE2##_max, \ |
531 | goto overflow); \ |
532 | } \ |
533 | } \ |
534 | } \ |
535 | } \ |
536 | } else { \ |
537 | /* multiple groups, with candidate list */ \ |
538 | *algo = "with candidates, with groups"; \ |
539 | while (ncand > 0) { \ |
540 | ncand--; \ |
541 | i = canditer_next(ci) - seqb; \ |
542 | if (gids == NULL || \ |
543 | (gids[i] >= min && gids[i] <= max)) { \ |
544 | gid = gids ? gids[i] - min : (oid) i; \ |
545 | x = vals[i]; \ |
546 | if (is_##TYPE1##_nil(x)) { \ |
547 | if (!skip_nils) { \ |
548 | sums[gid] = TYPE2##_nil; \ |
549 | nils++; \ |
550 | } \ |
551 | } else { \ |
552 | if (nil_if_empty && \ |
553 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
554 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
555 | sums[gid] = 0; \ |
556 | } \ |
557 | if (!is_##TYPE2##_nil(sums[gid])) { \ |
558 | ADD_WITH_CHECK( \ |
559 | x, \ |
560 | sums[gid], \ |
561 | TYPE2, \ |
562 | sums[gid], \ |
563 | GDK_##TYPE2##_max, \ |
564 | goto overflow); \ |
565 | } \ |
566 | } \ |
567 | } \ |
568 | } \ |
569 | } \ |
570 | } while (0) |
571 | |
572 | #define AGGR_SUM_NOOVL(TYPE1, TYPE2) \ |
573 | do { \ |
574 | TYPE1 x; \ |
575 | const TYPE1 *restrict vals = (const TYPE1 *) values; \ |
576 | if (ngrp == 1 && ci->tpe == cand_dense) { \ |
577 | /* single group, no candidate list */ \ |
578 | TYPE2 sum; \ |
579 | sum = 0; \ |
580 | if (nonil) { \ |
581 | *algo = "no candidates, no groups, no nils, no overflow"; \ |
582 | *seen = ncand > 0; \ |
583 | for (i = 0; i < ncand && nils == 0; i++) { \ |
584 | sum += vals[ci->seq + i - seqb]; \ |
585 | } \ |
586 | } else { \ |
587 | bool seenval = false; \ |
588 | *algo = "no candidates, no groups, no overflow"; \ |
589 | for (i = 0; i < ncand && nils == 0; i++) { \ |
590 | x = vals[ci->seq + i - seqb]; \ |
591 | if (is_##TYPE1##_nil(x)) { \ |
592 | if (!skip_nils) { \ |
593 | sum = TYPE2##_nil; \ |
594 | nils = 1; \ |
595 | } \ |
596 | } else { \ |
597 | sum += x; \ |
598 | seenval = true; \ |
599 | } \ |
600 | } \ |
601 | *seen = seenval; \ |
602 | } \ |
603 | if (*seen) \ |
604 | *sums = sum; \ |
605 | } else if (ngrp == 1) { \ |
606 | /* single group, with candidate list */ \ |
607 | TYPE2 sum; \ |
608 | bool seenval = false; \ |
609 | *algo = "with candidates, no groups, no overflow"; \ |
610 | sum = 0; \ |
611 | for (i = 0; i < ncand && nils == 0; i++) { \ |
612 | x = vals[canditer_next(ci) - seqb]; \ |
613 | if (is_##TYPE1##_nil(x)) { \ |
614 | if (!skip_nils) { \ |
615 | sum = TYPE2##_nil; \ |
616 | nils = 1; \ |
617 | } \ |
618 | } else { \ |
619 | sum += x; \ |
620 | seenval = true; \ |
621 | } \ |
622 | } \ |
623 | if (seenval) \ |
624 | *sums = sum; \ |
625 | } else if (ci->tpe == cand_dense) { \ |
626 | /* multiple groups, no candidate list */ \ |
627 | if (nonil) { \ |
628 | *algo = "no candidates, with groups, no nils, no overflow"; \ |
629 | for (i = 0; i < ncand; i++) { \ |
630 | if (gids == NULL || \ |
631 | (gids[i] >= min && gids[i] <= max)) { \ |
632 | gid = gids ? gids[i] - min : (oid) i; \ |
633 | x = vals[ci->seq + i - seqb]; \ |
634 | if (nil_if_empty && \ |
635 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
636 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
637 | sums[gid] = 0; \ |
638 | } \ |
639 | sums[gid] += x; \ |
640 | } \ |
641 | } \ |
642 | } else { \ |
643 | *algo = "no candidates, with groups, no overflow"; \ |
644 | for (i = 0; i < ncand; i++) { \ |
645 | if (gids == NULL || \ |
646 | (gids[i] >= min && gids[i] <= max)) { \ |
647 | gid = gids ? gids[i] - min : (oid) i; \ |
648 | x = vals[ci->seq + i - seqb]; \ |
649 | if (is_##TYPE1##_nil(x)) { \ |
650 | if (!skip_nils) { \ |
651 | sums[gid] = TYPE2##_nil; \ |
652 | nils++; \ |
653 | } \ |
654 | } else { \ |
655 | if (nil_if_empty && \ |
656 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
657 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
658 | sums[gid] = 0; \ |
659 | } \ |
660 | if (!is_##TYPE2##_nil(sums[gid])) { \ |
661 | sums[gid] += x; \ |
662 | } \ |
663 | } \ |
664 | } \ |
665 | } \ |
666 | } \ |
667 | } else { \ |
668 | /* multiple groups, with candidate list */ \ |
669 | *algo = "with candidates, with groups, no overflow"; \ |
670 | while (ncand > 0) { \ |
671 | ncand--; \ |
672 | i = canditer_next(ci) - seqb; \ |
673 | if (gids == NULL || \ |
674 | (gids[i] >= min && gids[i] <= max)) { \ |
675 | gid = gids ? gids[i] - min : (oid) i; \ |
676 | x = vals[i]; \ |
677 | if (is_##TYPE1##_nil(x)) { \ |
678 | if (!skip_nils) { \ |
679 | sums[gid] = TYPE2##_nil; \ |
680 | nils++; \ |
681 | } \ |
682 | } else { \ |
683 | if (nil_if_empty && \ |
684 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
685 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
686 | sums[gid] = 0; \ |
687 | } \ |
688 | if (!is_##TYPE2##_nil(sums[gid])) { \ |
689 | sums[gid] += x; \ |
690 | } \ |
691 | } \ |
692 | } \ |
693 | } \ |
694 | } \ |
695 | } while (0) |
696 | |
697 | static BUN |
698 | dosum(const void *restrict values, bool nonil, oid seqb, |
699 | struct canditer *restrict ci, BUN ncand, |
700 | void *restrict results, BUN ngrp, int tp1, int tp2, |
701 | const oid *restrict gids, |
702 | oid min, oid max, bool skip_nils, bool abort_on_error, |
703 | bool nil_if_empty, const char *func, const char **algo) |
704 | { |
705 | BUN nils = 0; |
706 | BUN i; |
707 | oid gid; |
708 | unsigned int *restrict seen = NULL; /* bitmask for groups that we've seen */ |
709 | |
710 | switch (tp2) { |
711 | case TYPE_flt: |
712 | if (tp1 != TYPE_flt) |
713 | goto unsupported; |
714 | /* fall through */ |
715 | case TYPE_dbl: |
716 | if (tp1 != TYPE_flt && tp1 != TYPE_dbl) |
717 | goto unsupported; |
718 | *algo = "floating sum" ; |
719 | return dofsum(values, seqb, ci, ncand, results, ngrp, tp1, tp2, |
720 | gids, min, max, skip_nils, abort_on_error, |
721 | nil_if_empty); |
722 | } |
723 | |
724 | /* allocate bitmap for seen group ids */ |
725 | seen = GDKzalloc(((ngrp + 31) / 32) * sizeof(int)); |
726 | if (seen == NULL) { |
727 | GDKerror("%s: cannot allocate enough memory\n" , func); |
728 | return BUN_NONE; |
729 | } |
730 | |
731 | switch (tp2) { |
732 | case TYPE_bte: { |
733 | bte *restrict sums = (bte *) results; |
734 | switch (tp1) { |
735 | case TYPE_bte: |
736 | AGGR_SUM(bte, bte); |
737 | break; |
738 | default: |
739 | goto unsupported; |
740 | } |
741 | break; |
742 | } |
743 | case TYPE_sht: { |
744 | sht *restrict sums = (sht *) results; |
745 | switch (tp1) { |
746 | case TYPE_bte: |
747 | if (ncand < ((BUN) 1 << ((sizeof(sht) - sizeof(bte)) << 3))) |
748 | AGGR_SUM_NOOVL(bte, sht); |
749 | else |
750 | AGGR_SUM(bte, sht); |
751 | break; |
752 | case TYPE_sht: |
753 | AGGR_SUM(sht, sht); |
754 | break; |
755 | default: |
756 | goto unsupported; |
757 | } |
758 | break; |
759 | } |
760 | case TYPE_int: { |
761 | int *restrict sums = (int *) results; |
762 | switch (tp1) { |
763 | case TYPE_bte: |
764 | if (ncand < ((BUN) 1 << ((sizeof(int) - sizeof(bte)) << 3))) |
765 | AGGR_SUM_NOOVL(bte, int); |
766 | else |
767 | AGGR_SUM(bte, int); |
768 | break; |
769 | case TYPE_sht: |
770 | if (ncand < ((BUN) 1 << ((sizeof(int) - sizeof(sht)) << 3))) |
771 | AGGR_SUM_NOOVL(sht, int); |
772 | else |
773 | AGGR_SUM(sht, int); |
774 | break; |
775 | case TYPE_int: |
776 | AGGR_SUM(int, int); |
777 | break; |
778 | default: |
779 | goto unsupported; |
780 | } |
781 | break; |
782 | } |
783 | case TYPE_lng: { |
784 | lng *restrict sums = (lng *) results; |
785 | switch (tp1) { |
786 | #if SIZEOF_BUN == 4 |
787 | case TYPE_bte: |
788 | AGGR_SUM_NOOVL(bte, lng); |
789 | break; |
790 | case TYPE_sht: |
791 | AGGR_SUM_NOOVL(sht, lng); |
792 | break; |
793 | case TYPE_int: |
794 | AGGR_SUM_NOOVL(int, lng); |
795 | break; |
796 | #else |
797 | case TYPE_bte: |
798 | if (ncand < ((BUN) 1 << ((sizeof(lng) - sizeof(bte)) << 3))) |
799 | AGGR_SUM_NOOVL(bte, lng); |
800 | else |
801 | AGGR_SUM(bte, lng); |
802 | break; |
803 | case TYPE_sht: |
804 | if (ncand < ((BUN) 1 << ((sizeof(lng) - sizeof(sht)) << 3))) |
805 | AGGR_SUM_NOOVL(sht, lng); |
806 | else |
807 | AGGR_SUM(sht, lng); |
808 | break; |
809 | case TYPE_int: |
810 | if (ncand < ((BUN) 1 << ((sizeof(lng) - sizeof(int)) << 3))) |
811 | AGGR_SUM_NOOVL(int, lng); |
812 | else |
813 | AGGR_SUM(int, lng); |
814 | break; |
815 | #endif |
816 | case TYPE_lng: |
817 | AGGR_SUM(lng, lng); |
818 | break; |
819 | default: |
820 | goto unsupported; |
821 | } |
822 | break; |
823 | } |
824 | #ifdef HAVE_HGE |
825 | case TYPE_hge: { |
826 | hge *sums = (hge *) results; |
827 | switch (tp1) { |
828 | case TYPE_bte: |
829 | AGGR_SUM_NOOVL(bte, hge); |
830 | break; |
831 | case TYPE_sht: |
832 | AGGR_SUM_NOOVL(sht, hge); |
833 | break; |
834 | case TYPE_int: |
835 | AGGR_SUM_NOOVL(int, hge); |
836 | break; |
837 | case TYPE_lng: |
838 | AGGR_SUM_NOOVL(lng, hge); |
839 | break; |
840 | case TYPE_hge: |
841 | AGGR_SUM(hge, hge); |
842 | break; |
843 | default: |
844 | goto unsupported; |
845 | } |
846 | break; |
847 | } |
848 | #endif |
849 | default: |
850 | goto unsupported; |
851 | } |
852 | |
853 | if (nils == 0 && nil_if_empty) { |
854 | /* figure out whether there were any empty groups |
855 | * (that result in a nil value) */ |
856 | if (ngrp & 0x1F) { |
857 | /* fill last slot */ |
858 | seen[ngrp >> 5] |= ~0U << (ngrp & 0x1F); |
859 | } |
860 | for (i = 0, ngrp = (ngrp + 31) / 32; i < ngrp; i++) { |
861 | if (seen[i] != ~0U) { |
862 | nils = 1; |
863 | break; |
864 | } |
865 | } |
866 | } |
867 | GDKfree(seen); |
868 | |
869 | return nils; |
870 | |
871 | unsupported: |
872 | GDKfree(seen); |
873 | GDKerror("%s: type combination (sum(%s)->%s) not supported.\n" , |
874 | func, ATOMname(tp1), ATOMname(tp2)); |
875 | return BUN_NONE; |
876 | |
877 | overflow: |
878 | GDKfree(seen); |
879 | GDKerror("22003!overflow in calculation.\n" ); |
880 | return BUN_NONE; |
881 | } |
882 | |
883 | /* calculate group sums with optional candidates list */ |
884 | BAT * |
885 | BATgroupsum(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error) |
886 | { |
887 | const oid *restrict gids; |
888 | oid min, max; |
889 | BUN ngrp; |
890 | BUN nils; |
891 | BAT *bn; |
892 | struct canditer ci; |
893 | BUN ncand; |
894 | const char *err; |
895 | const char *algo = NULL; |
896 | lng t0 = 0; |
897 | |
898 | ALGODEBUG t0 = GDKusec(); |
899 | |
900 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
901 | GDKerror("BATgroupsum: %s\n" , err); |
902 | return NULL; |
903 | } |
904 | if (g == NULL) { |
905 | GDKerror("BATgroupsum: b and g must be aligned\n" ); |
906 | return NULL; |
907 | } |
908 | |
909 | if (BATcount(b) == 0 || ngrp == 0) { |
910 | /* trivial: no sums, so return bat aligned with g with |
911 | * nil in the tail */ |
912 | return BATconstant(ngrp == 0 ? 0 : min, tp, ATOMnilptr(tp), ngrp, TRANSIENT); |
913 | } |
914 | |
915 | if ((e == NULL || |
916 | (BATcount(e) == BATcount(b) && e->hseqbase == b->hseqbase)) && |
917 | (BATtdense(g) || (g->tkey && g->tnonil))) { |
918 | /* trivial: singleton groups, so all results are equal |
919 | * to the inputs (but possibly a different type) */ |
920 | return BATconvert(b, s, tp, abort_on_error); |
921 | } |
922 | |
923 | bn = BATconstant(min, tp, ATOMnilptr(tp), ngrp, TRANSIENT); |
924 | if (bn == NULL) { |
925 | return NULL; |
926 | } |
927 | |
928 | if (BATtdense(g)) |
929 | gids = NULL; |
930 | else |
931 | gids = (const oid *) Tloc(g, 0); |
932 | |
933 | nils = dosum(Tloc(b, 0), b->tnonil, b->hseqbase, &ci, ncand, |
934 | Tloc(bn, 0), ngrp, b->ttype, tp, gids, min, max, |
935 | skip_nils, abort_on_error, true, "BATgroupsum" , &algo); |
936 | |
937 | if (nils < BUN_NONE) { |
938 | BATsetcount(bn, ngrp); |
939 | bn->tkey = BATcount(bn) <= 1; |
940 | bn->tsorted = BATcount(bn) <= 1; |
941 | bn->trevsorted = BATcount(bn) <= 1; |
942 | bn->tnil = nils != 0; |
943 | bn->tnonil = nils == 0; |
944 | } else { |
945 | BBPunfix(bn->batCacheid); |
946 | bn = NULL; |
947 | } |
948 | |
949 | ALGODEBUG fprintf(stderr, |
950 | "#%s: %s(b=" ALGOBATFMT",g=" ALGOOPTBATFMT",e=" ALGOOPTBATFMT",s=" ALGOOPTBATFMT")=" ALGOOPTBATFMT": %s; " |
951 | "start " OIDFMT ", count " BUNFMT " (" LLFMT " usec)" |
952 | "\n" , |
953 | MT_thread_getname(), __func__, |
954 | ALGOBATPAR(b), ALGOOPTBATPAR(g), ALGOOPTBATPAR(e), |
955 | ALGOOPTBATPAR(s), ALGOOPTBATPAR(bn), |
956 | algo ? algo : "" , |
957 | ci.seq, ncand, GDKusec() - t0); |
958 | return bn; |
959 | } |
960 | |
961 | gdk_return |
962 | BATsum(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty) |
963 | { |
964 | oid min, max; |
965 | BUN ngrp; |
966 | BUN nils; |
967 | struct canditer ci; |
968 | BUN ncand; |
969 | const char *err; |
970 | const char *algo = NULL; |
971 | lng t0 = 0; |
972 | |
973 | ALGODEBUG t0 = GDKusec(); |
974 | |
975 | if ((err = BATgroupaggrinit(b, NULL, NULL, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
976 | GDKerror("BATsum: %s\n" , err); |
977 | return GDK_FAIL; |
978 | } |
979 | switch (tp) { |
980 | case TYPE_bte: |
981 | * (bte *) res = nil_if_empty ? bte_nil : 0; |
982 | break; |
983 | case TYPE_sht: |
984 | * (sht *) res = nil_if_empty ? sht_nil : 0; |
985 | break; |
986 | case TYPE_int: |
987 | * (int *) res = nil_if_empty ? int_nil : 0; |
988 | break; |
989 | case TYPE_lng: |
990 | * (lng *) res = nil_if_empty ? lng_nil : 0; |
991 | break; |
992 | #ifdef HAVE_HGE |
993 | case TYPE_hge: |
994 | * (hge *) res = nil_if_empty ? hge_nil : 0; |
995 | break; |
996 | #endif |
997 | case TYPE_flt: |
998 | case TYPE_dbl: |
999 | switch (b->ttype) { |
1000 | case TYPE_bte: |
1001 | case TYPE_sht: |
1002 | case TYPE_int: |
1003 | case TYPE_lng: |
1004 | #ifdef HAVE_HGE |
1005 | case TYPE_hge: |
1006 | #endif |
1007 | { |
1008 | /* special case for summing integer types into |
1009 | * a floating point: We calculate the average |
1010 | * (which is done exactly), and multiply the |
1011 | * result by the count to get the sum. Note |
1012 | * that if we just summed into a floating |
1013 | * point number, we could loose too much |
1014 | * accuracy, and if we summed into lng first, |
1015 | * we could get unnecessary overflow. */ |
1016 | dbl avg; |
1017 | BUN cnt; |
1018 | |
1019 | if (BATcalcavg(b, s, &avg, &cnt, 0) != GDK_SUCCEED) |
1020 | return GDK_FAIL; |
1021 | if (cnt == 0) { |
1022 | avg = nil_if_empty ? dbl_nil : 0; |
1023 | } |
1024 | if (cnt < BATcount(b) && !skip_nils) { |
1025 | /* there were nils */ |
1026 | avg = dbl_nil; |
1027 | } |
1028 | if (tp == TYPE_flt) { |
1029 | if (is_dbl_nil(avg)) |
1030 | *(flt *) res = flt_nil; |
1031 | else if (cnt > 0 && |
1032 | GDK_flt_max / cnt < fabs(avg)) { |
1033 | if (abort_on_error) { |
1034 | GDKerror("22003!overflow in calculation.\n" ); |
1035 | return GDK_FAIL; |
1036 | } |
1037 | *(flt *) res = flt_nil; |
1038 | } else { |
1039 | *(flt *) res = (flt) avg * cnt; |
1040 | } |
1041 | } else { |
1042 | if (is_dbl_nil(avg)) { |
1043 | *(dbl *) res = dbl_nil; |
1044 | } else if (cnt > 0 && |
1045 | GDK_dbl_max / cnt < fabs(avg)) { |
1046 | if (abort_on_error) { |
1047 | GDKerror("22003!overflow in calculation.\n" ); |
1048 | return GDK_FAIL; |
1049 | } |
1050 | *(dbl *) res = dbl_nil; |
1051 | } else { |
1052 | *(dbl *) res = avg * cnt; |
1053 | } |
1054 | } |
1055 | return GDK_SUCCEED; |
1056 | } |
1057 | default: |
1058 | break; |
1059 | } |
1060 | if (b->ttype == TYPE_dbl) |
1061 | * (dbl *) res = nil_if_empty ? dbl_nil : 0; |
1062 | else |
1063 | * (flt *) res = nil_if_empty ? flt_nil : 0; |
1064 | break; |
1065 | default: |
1066 | GDKerror("BATsum: type combination (sum(%s)->%s) not supported.\n" , |
1067 | ATOMname(b->ttype), ATOMname(tp)); |
1068 | return GDK_FAIL; |
1069 | } |
1070 | if (BATcount(b) == 0) |
1071 | return GDK_SUCCEED; |
1072 | nils = dosum(Tloc(b, 0), b->tnonil, b->hseqbase, &ci, ncand, |
1073 | res, true, b->ttype, tp, &min, min, max, |
1074 | skip_nils, abort_on_error, nil_if_empty, "BATsum" , &algo); |
1075 | ALGODEBUG fprintf(stderr, |
1076 | "#%s: %s(b=" ALGOBATFMT",s=" ALGOOPTBATFMT"): %s; " |
1077 | "start " OIDFMT ", count " BUNFMT " (" LLFMT " usec)" |
1078 | "\n" , |
1079 | MT_thread_getname(), __func__, |
1080 | ALGOBATPAR(b), ALGOOPTBATPAR(s), |
1081 | algo ? algo : "" , |
1082 | ci.seq, ncand, GDKusec() - t0); |
1083 | return nils < BUN_NONE ? GDK_SUCCEED : GDK_FAIL; |
1084 | } |
1085 | |
1086 | /* ---------------------------------------------------------------------- */ |
1087 | /* product */ |
1088 | |
1089 | #define AGGR_PROD(TYPE1, TYPE2, TYPE3) \ |
1090 | do { \ |
1091 | const TYPE1 *restrict vals = (const TYPE1 *) values; \ |
1092 | gid = 0; /* doesn't change if gidincr == false */ \ |
1093 | while (ncand > 0) { \ |
1094 | ncand--; \ |
1095 | i = canditer_next(ci) - seqb; \ |
1096 | if (gids == NULL || !gidincr || \ |
1097 | (gids[i] >= min && gids[i] <= max)) { \ |
1098 | if (gidincr) { \ |
1099 | if (gids) \ |
1100 | gid = gids[i] - min; \ |
1101 | else \ |
1102 | gid = (oid) i; \ |
1103 | } \ |
1104 | if (is_##TYPE1##_nil(vals[i])) { \ |
1105 | if (!skip_nils) { \ |
1106 | prods[gid] = TYPE2##_nil; \ |
1107 | nils++; \ |
1108 | } \ |
1109 | } else { \ |
1110 | if (nil_if_empty && \ |
1111 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
1112 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
1113 | prods[gid] = 1; \ |
1114 | } \ |
1115 | if (!is_##TYPE2##_nil(prods[gid])) { \ |
1116 | MUL4_WITH_CHECK( \ |
1117 | vals[i], \ |
1118 | prods[gid], \ |
1119 | TYPE2, prods[gid], \ |
1120 | GDK_##TYPE2##_max, \ |
1121 | TYPE3, \ |
1122 | goto overflow); \ |
1123 | } \ |
1124 | } \ |
1125 | } \ |
1126 | } \ |
1127 | } while (0) |
1128 | |
1129 | #ifdef HAVE_HGE |
1130 | #define AGGR_PROD_HGE(TYPE) \ |
1131 | do { \ |
1132 | const TYPE *vals = (const TYPE *) values; \ |
1133 | gid = 0; /* doesn't change if gidincr == false */ \ |
1134 | while (ncand > 0) { \ |
1135 | ncand--; \ |
1136 | i = canditer_next(ci) - seqb; \ |
1137 | if (gids == NULL || !gidincr || \ |
1138 | (gids[i] >= min && gids[i] <= max)) { \ |
1139 | if (gidincr) { \ |
1140 | if (gids) \ |
1141 | gid = gids[i] - min; \ |
1142 | else \ |
1143 | gid = (oid) i; \ |
1144 | } \ |
1145 | if (nil_if_empty && \ |
1146 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
1147 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
1148 | prods[gid] = 1; \ |
1149 | } \ |
1150 | if (is_##TYPE##_nil(vals[i])) { \ |
1151 | if (!skip_nils) { \ |
1152 | prods[gid] = hge_nil; \ |
1153 | nils++; \ |
1154 | } \ |
1155 | } else if (!is_hge_nil(prods[gid])) { \ |
1156 | HGEMUL_CHECK(vals[i], \ |
1157 | prods[gid], \ |
1158 | prods[gid], \ |
1159 | GDK_hge_max, \ |
1160 | goto overflow); \ |
1161 | } \ |
1162 | } \ |
1163 | } \ |
1164 | } while (0) |
1165 | #else |
1166 | #define AGGR_PROD_LNG(TYPE) \ |
1167 | do { \ |
1168 | const TYPE *restrict vals = (const TYPE *) values; \ |
1169 | gid = 0; /* doesn't change if gidincr == false */ \ |
1170 | while (ncand > 0) { \ |
1171 | ncand--; \ |
1172 | i = canditer_next(ci) - seqb; \ |
1173 | if (gids == NULL || !gidincr || \ |
1174 | (gids[i] >= min && gids[i] <= max)) { \ |
1175 | if (gidincr) { \ |
1176 | if (gids) \ |
1177 | gid = gids[i] - min; \ |
1178 | else \ |
1179 | gid = (oid) i; \ |
1180 | } \ |
1181 | if (is_##TYPE##_nil(vals[i])) { \ |
1182 | if (!skip_nils) { \ |
1183 | prods[gid] = lng_nil; \ |
1184 | nils++; \ |
1185 | } \ |
1186 | } else { \ |
1187 | if (nil_if_empty && \ |
1188 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
1189 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
1190 | prods[gid] = 1; \ |
1191 | } \ |
1192 | if (!is_lng_nil(prods[gid])) { \ |
1193 | LNGMUL_CHECK( \ |
1194 | vals[i], \ |
1195 | prods[gid], \ |
1196 | prods[gid], \ |
1197 | GDK_lng_max, \ |
1198 | goto overflow); \ |
1199 | } \ |
1200 | } \ |
1201 | } \ |
1202 | } \ |
1203 | } while (0) |
1204 | #endif |
1205 | |
1206 | #define AGGR_PROD_FLOAT(TYPE1, TYPE2) \ |
1207 | do { \ |
1208 | const TYPE1 *restrict vals = (const TYPE1 *) values; \ |
1209 | gid = 0; /* doesn't change if gidincr == false */ \ |
1210 | while (ncand > 0) { \ |
1211 | ncand--; \ |
1212 | i = canditer_next(ci) - seqb; \ |
1213 | if (gids == NULL || !gidincr || \ |
1214 | (gids[i] >= min && gids[i] <= max)) { \ |
1215 | if (gidincr) { \ |
1216 | if (gids) \ |
1217 | gid = gids[i] - min; \ |
1218 | else \ |
1219 | gid = (oid) i; \ |
1220 | } \ |
1221 | if (is_##TYPE1##_nil(vals[i])) { \ |
1222 | if (!skip_nils) { \ |
1223 | prods[gid] = TYPE2##_nil; \ |
1224 | nils++; \ |
1225 | } \ |
1226 | } else { \ |
1227 | if (nil_if_empty && \ |
1228 | !(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \ |
1229 | seen[gid >> 5] |= 1U << (gid & 0x1F); \ |
1230 | prods[gid] = 1; \ |
1231 | } \ |
1232 | if (!is_##TYPE2##_nil(prods[gid])) { \ |
1233 | if (ABSOLUTE(vals[i]) > 1 && \ |
1234 | GDK_##TYPE2##_max / ABSOLUTE(vals[i]) < ABSOLUTE(prods[gid])) { \ |
1235 | if (abort_on_error) \ |
1236 | goto overflow; \ |
1237 | prods[gid] = TYPE2##_nil; \ |
1238 | nils++; \ |
1239 | } else { \ |
1240 | prods[gid] *= vals[i]; \ |
1241 | } \ |
1242 | } \ |
1243 | } \ |
1244 | } \ |
1245 | } \ |
1246 | } while (0) |
1247 | |
1248 | static BUN |
1249 | doprod(const void *restrict values, oid seqb, struct canditer *restrict ci, BUN ncand, |
1250 | void *restrict results, BUN ngrp, int tp1, int tp2, |
1251 | const oid *restrict gids, bool gidincr, oid min, oid max, |
1252 | bool skip_nils, bool abort_on_error, bool nil_if_empty, const char *func) |
1253 | { |
1254 | BUN nils = 0; |
1255 | BUN i; |
1256 | oid gid; |
1257 | unsigned int *restrict seen; /* bitmask for groups that we've seen */ |
1258 | |
1259 | /* allocate bitmap for seen group ids */ |
1260 | seen = GDKzalloc(((ngrp + 31) / 32) * sizeof(int)); |
1261 | if (seen == NULL) { |
1262 | GDKerror("%s: cannot allocate enough memory\n" , func); |
1263 | return BUN_NONE; |
1264 | } |
1265 | |
1266 | switch (tp2) { |
1267 | case TYPE_bte: { |
1268 | bte *restrict prods = (bte *) results; |
1269 | switch (tp1) { |
1270 | case TYPE_bte: |
1271 | AGGR_PROD(bte, bte, sht); |
1272 | break; |
1273 | default: |
1274 | goto unsupported; |
1275 | } |
1276 | break; |
1277 | } |
1278 | case TYPE_sht: { |
1279 | sht *restrict prods = (sht *) results; |
1280 | switch (tp1) { |
1281 | case TYPE_bte: |
1282 | AGGR_PROD(bte, sht, int); |
1283 | break; |
1284 | case TYPE_sht: |
1285 | AGGR_PROD(sht, sht, int); |
1286 | break; |
1287 | default: |
1288 | goto unsupported; |
1289 | } |
1290 | break; |
1291 | } |
1292 | case TYPE_int: { |
1293 | int *restrict prods = (int *) results; |
1294 | switch (tp1) { |
1295 | case TYPE_bte: |
1296 | AGGR_PROD(bte, int, lng); |
1297 | break; |
1298 | case TYPE_sht: |
1299 | AGGR_PROD(sht, int, lng); |
1300 | break; |
1301 | case TYPE_int: |
1302 | AGGR_PROD(int, int, lng); |
1303 | break; |
1304 | default: |
1305 | goto unsupported; |
1306 | } |
1307 | break; |
1308 | } |
1309 | #ifdef HAVE_HGE |
1310 | case TYPE_lng: { |
1311 | lng *prods = (lng *) results; |
1312 | switch (tp1) { |
1313 | case TYPE_bte: |
1314 | AGGR_PROD(bte, lng, hge); |
1315 | break; |
1316 | case TYPE_sht: |
1317 | AGGR_PROD(sht, lng, hge); |
1318 | break; |
1319 | case TYPE_int: |
1320 | AGGR_PROD(int, lng, hge); |
1321 | break; |
1322 | case TYPE_lng: |
1323 | AGGR_PROD(lng, lng, hge); |
1324 | break; |
1325 | default: |
1326 | goto unsupported; |
1327 | } |
1328 | break; |
1329 | } |
1330 | case TYPE_hge: { |
1331 | hge *prods = (hge *) results; |
1332 | switch (tp1) { |
1333 | case TYPE_bte: |
1334 | AGGR_PROD_HGE(bte); |
1335 | break; |
1336 | case TYPE_sht: |
1337 | AGGR_PROD_HGE(sht); |
1338 | break; |
1339 | case TYPE_int: |
1340 | AGGR_PROD_HGE(int); |
1341 | break; |
1342 | case TYPE_lng: |
1343 | AGGR_PROD_HGE(lng); |
1344 | break; |
1345 | case TYPE_hge: |
1346 | AGGR_PROD_HGE(hge); |
1347 | break; |
1348 | default: |
1349 | goto unsupported; |
1350 | } |
1351 | break; |
1352 | } |
1353 | #else |
1354 | case TYPE_lng: { |
1355 | lng *restrict prods = (lng *) results; |
1356 | switch (tp1) { |
1357 | case TYPE_bte: |
1358 | AGGR_PROD_LNG(bte); |
1359 | break; |
1360 | case TYPE_sht: |
1361 | AGGR_PROD_LNG(sht); |
1362 | break; |
1363 | case TYPE_int: |
1364 | AGGR_PROD_LNG(int); |
1365 | break; |
1366 | case TYPE_lng: |
1367 | AGGR_PROD_LNG(lng); |
1368 | break; |
1369 | default: |
1370 | goto unsupported; |
1371 | } |
1372 | break; |
1373 | } |
1374 | #endif |
1375 | case TYPE_flt: { |
1376 | flt *restrict prods = (flt *) results; |
1377 | switch (tp1) { |
1378 | case TYPE_bte: |
1379 | AGGR_PROD_FLOAT(bte, flt); |
1380 | break; |
1381 | case TYPE_sht: |
1382 | AGGR_PROD_FLOAT(sht, flt); |
1383 | break; |
1384 | case TYPE_int: |
1385 | AGGR_PROD_FLOAT(int, flt); |
1386 | break; |
1387 | case TYPE_lng: |
1388 | AGGR_PROD_FLOAT(lng, flt); |
1389 | break; |
1390 | #ifdef HAVE_HGE |
1391 | case TYPE_hge: |
1392 | AGGR_PROD_FLOAT(hge, flt); |
1393 | break; |
1394 | #endif |
1395 | case TYPE_flt: |
1396 | AGGR_PROD_FLOAT(flt, flt); |
1397 | break; |
1398 | default: |
1399 | goto unsupported; |
1400 | } |
1401 | break; |
1402 | } |
1403 | case TYPE_dbl: { |
1404 | dbl *restrict prods = (dbl *) results; |
1405 | switch (tp1) { |
1406 | case TYPE_bte: |
1407 | AGGR_PROD_FLOAT(bte, dbl); |
1408 | break; |
1409 | case TYPE_sht: |
1410 | AGGR_PROD_FLOAT(sht, dbl); |
1411 | break; |
1412 | case TYPE_int: |
1413 | AGGR_PROD_FLOAT(int, dbl); |
1414 | break; |
1415 | case TYPE_lng: |
1416 | AGGR_PROD_FLOAT(lng, dbl); |
1417 | break; |
1418 | #ifdef HAVE_HGE |
1419 | case TYPE_hge: |
1420 | AGGR_PROD_FLOAT(hge, dbl); |
1421 | break; |
1422 | #endif |
1423 | case TYPE_flt: |
1424 | AGGR_PROD_FLOAT(flt, dbl); |
1425 | break; |
1426 | case TYPE_dbl: |
1427 | AGGR_PROD_FLOAT(dbl, dbl); |
1428 | break; |
1429 | default: |
1430 | goto unsupported; |
1431 | } |
1432 | break; |
1433 | } |
1434 | default: |
1435 | goto unsupported; |
1436 | } |
1437 | |
1438 | if (nils == 0 && nil_if_empty) { |
1439 | /* figure out whether there were any empty groups |
1440 | * (that result in a nil value) */ |
1441 | if (ngrp & 0x1F) { |
1442 | /* fill last slot */ |
1443 | seen[ngrp >> 5] |= ~0U << (ngrp & 0x1F); |
1444 | } |
1445 | for (i = 0, ngrp = (ngrp + 31) / 32; i < ngrp; i++) { |
1446 | if (seen[i] != ~0U) { |
1447 | nils = 1; |
1448 | break; |
1449 | } |
1450 | } |
1451 | } |
1452 | GDKfree(seen); |
1453 | |
1454 | return nils; |
1455 | |
1456 | unsupported: |
1457 | GDKfree(seen); |
1458 | GDKerror("%s: type combination (mul(%s)->%s) not supported.\n" , |
1459 | func, ATOMname(tp1), ATOMname(tp2)); |
1460 | return BUN_NONE; |
1461 | |
1462 | overflow: |
1463 | GDKfree(seen); |
1464 | GDKerror("22003!overflow in calculation.\n" ); |
1465 | return BUN_NONE; |
1466 | } |
1467 | |
1468 | /* calculate group products with optional candidates list */ |
1469 | BAT * |
1470 | BATgroupprod(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error) |
1471 | { |
1472 | const oid *restrict gids; |
1473 | oid min, max; |
1474 | BUN ngrp; |
1475 | BUN nils; |
1476 | BAT *bn; |
1477 | struct canditer ci; |
1478 | BUN ncand; |
1479 | const char *err; |
1480 | |
1481 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
1482 | GDKerror("BATgroupprod: %s\n" , err); |
1483 | return NULL; |
1484 | } |
1485 | if (g == NULL) { |
1486 | GDKerror("BATgroupprod: b and g must be aligned\n" ); |
1487 | return NULL; |
1488 | } |
1489 | |
1490 | if (BATcount(b) == 0 || ngrp == 0) { |
1491 | /* trivial: no products, so return bat aligned with g |
1492 | * with nil in the tail */ |
1493 | return BATconstant(ngrp == 0 ? 0 : min, tp, ATOMnilptr(tp), ngrp, TRANSIENT); |
1494 | } |
1495 | |
1496 | if ((e == NULL || |
1497 | (BATcount(e) == BATcount(b) && e->hseqbase == b->hseqbase)) && |
1498 | (BATtdense(g) || (g->tkey && g->tnonil))) { |
1499 | /* trivial: singleton groups, so all results are equal |
1500 | * to the inputs (but possibly a different type) */ |
1501 | return BATconvert(b, s, tp, abort_on_error); |
1502 | } |
1503 | |
1504 | bn = BATconstant(min, tp, ATOMnilptr(tp), ngrp, TRANSIENT); |
1505 | if (bn == NULL) { |
1506 | return NULL; |
1507 | } |
1508 | |
1509 | if (BATtdense(g)) |
1510 | gids = NULL; |
1511 | else |
1512 | gids = (const oid *) Tloc(g, 0); |
1513 | |
1514 | nils = doprod(Tloc(b, 0), b->hseqbase, &ci, ncand, Tloc(bn, 0), ngrp, |
1515 | b->ttype, tp, gids, true, min, max, skip_nils, |
1516 | abort_on_error, true, "BATgroupprod" ); |
1517 | |
1518 | if (nils < BUN_NONE) { |
1519 | BATsetcount(bn, ngrp); |
1520 | bn->tkey = BATcount(bn) <= 1; |
1521 | bn->tsorted = BATcount(bn) <= 1; |
1522 | bn->trevsorted = BATcount(bn) <= 1; |
1523 | bn->tnil = nils != 0; |
1524 | bn->tnonil = nils == 0; |
1525 | } else { |
1526 | BBPunfix(bn->batCacheid); |
1527 | bn = NULL; |
1528 | } |
1529 | |
1530 | return bn; |
1531 | } |
1532 | |
1533 | gdk_return |
1534 | BATprod(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty) |
1535 | { |
1536 | oid min, max; |
1537 | BUN ngrp; |
1538 | BUN nils; |
1539 | struct canditer ci; |
1540 | BUN ncand; |
1541 | const char *err; |
1542 | |
1543 | if ((err = BATgroupaggrinit(b, NULL, NULL, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
1544 | GDKerror("BATprod: %s\n" , err); |
1545 | return GDK_FAIL; |
1546 | } |
1547 | switch (tp) { |
1548 | case TYPE_bte: |
1549 | * (bte *) res = nil_if_empty ? bte_nil : (bte) 1; |
1550 | break; |
1551 | case TYPE_sht: |
1552 | * (sht *) res = nil_if_empty ? sht_nil : (sht) 1; |
1553 | break; |
1554 | case TYPE_int: |
1555 | * (int *) res = nil_if_empty ? int_nil : (int) 1; |
1556 | break; |
1557 | case TYPE_lng: |
1558 | * (lng *) res = nil_if_empty ? lng_nil : (lng) 1; |
1559 | break; |
1560 | #ifdef HAVE_HGE |
1561 | case TYPE_hge: |
1562 | * (hge *) res = nil_if_empty ? hge_nil : (hge) 1; |
1563 | break; |
1564 | #endif |
1565 | case TYPE_flt: |
1566 | * (flt *) res = nil_if_empty ? flt_nil : (flt) 1; |
1567 | break; |
1568 | case TYPE_dbl: |
1569 | * (dbl *) res = nil_if_empty ? dbl_nil : (dbl) 1; |
1570 | break; |
1571 | default: |
1572 | GDKerror("BATprod: type combination (prod(%s)->%s) not supported.\n" , |
1573 | ATOMname(b->ttype), ATOMname(tp)); |
1574 | return GDK_FAIL; |
1575 | } |
1576 | if (BATcount(b) == 0) |
1577 | return GDK_SUCCEED; |
1578 | nils = doprod(Tloc(b, 0), b->hseqbase, &ci, ncand, res, true, |
1579 | b->ttype, tp, &min, false, min, max, |
1580 | skip_nils, abort_on_error, nil_if_empty, "BATprod" ); |
1581 | return nils < BUN_NONE ? GDK_SUCCEED : GDK_FAIL; |
1582 | } |
1583 | |
1584 | /* ---------------------------------------------------------------------- */ |
1585 | /* average */ |
1586 | |
1587 | #define AGGR_AVG(TYPE) \ |
1588 | do { \ |
1589 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
1590 | TYPE *restrict avgs = GDKzalloc(ngrp * sizeof(TYPE)); \ |
1591 | if (avgs == NULL) \ |
1592 | goto alloc_fail; \ |
1593 | while (ncand > 0) { \ |
1594 | ncand--; \ |
1595 | i = canditer_next(&ci) - b->hseqbase; \ |
1596 | if (gids == NULL || \ |
1597 | (gids[i] >= min && gids[i] <= max)) { \ |
1598 | if (gids) \ |
1599 | gid = gids[i] - min; \ |
1600 | else \ |
1601 | gid = (oid) i; \ |
1602 | if (is_##TYPE##_nil(vals[i])) { \ |
1603 | if (!skip_nils) \ |
1604 | cnts[gid] = lng_nil; \ |
1605 | } else if (!is_lng_nil(cnts[gid])) { \ |
1606 | AVERAGE_ITER(TYPE, vals[i], \ |
1607 | avgs[gid], \ |
1608 | rems[gid], \ |
1609 | cnts[gid]); \ |
1610 | } \ |
1611 | } \ |
1612 | } \ |
1613 | for (i = 0; i < ngrp; i++) { \ |
1614 | if (cnts[i] == 0 || is_lng_nil(cnts[i])) { \ |
1615 | dbls[i] = dbl_nil; \ |
1616 | cnts[i] = 0; \ |
1617 | nils++; \ |
1618 | } else { \ |
1619 | dbls[i] = avgs[i] + (dbl) rems[i] / cnts[i]; \ |
1620 | } \ |
1621 | } \ |
1622 | GDKfree(avgs); \ |
1623 | } while (0) |
1624 | |
1625 | #define AGGR_AVG_FLOAT(TYPE) \ |
1626 | do { \ |
1627 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
1628 | for (i = 0; i < ngrp; i++) \ |
1629 | dbls[i] = 0; \ |
1630 | while (ncand > 0) { \ |
1631 | ncand--; \ |
1632 | i = canditer_next(&ci) - b->hseqbase; \ |
1633 | if (gids == NULL || \ |
1634 | (gids[i] >= min && gids[i] <= max)) { \ |
1635 | if (gids) \ |
1636 | gid = gids[i] - min; \ |
1637 | else \ |
1638 | gid = (oid) i; \ |
1639 | if (is_##TYPE##_nil(vals[i])) { \ |
1640 | if (!skip_nils) \ |
1641 | cnts[gid] = lng_nil; \ |
1642 | } else if (!is_lng_nil(cnts[gid])) { \ |
1643 | AVERAGE_ITER_FLOAT(TYPE, vals[i], \ |
1644 | dbls[gid], \ |
1645 | cnts[gid]); \ |
1646 | } \ |
1647 | } \ |
1648 | } \ |
1649 | for (i = 0; i < ngrp; i++) { \ |
1650 | if (cnts[i] == 0 || is_lng_nil(cnts[i])) { \ |
1651 | dbls[i] = dbl_nil; \ |
1652 | cnts[i] = 0; \ |
1653 | nils++; \ |
1654 | } \ |
1655 | } \ |
1656 | } while (0) |
1657 | |
1658 | /* calculate group averages with optional candidates list */ |
1659 | gdk_return |
1660 | BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error, int scale) |
1661 | { |
1662 | const oid *restrict gids; |
1663 | oid gid; |
1664 | oid min, max; |
1665 | BUN i, ngrp; |
1666 | BUN nils = 0; |
1667 | BUN *restrict rems = NULL; |
1668 | lng *restrict cnts = NULL; |
1669 | dbl *restrict dbls; |
1670 | BAT *bn = NULL, *cn = NULL; |
1671 | struct canditer ci; |
1672 | BUN ncand; |
1673 | const char *err; |
1674 | |
1675 | assert(tp == TYPE_dbl); |
1676 | (void) tp; /* compatibility (with other BATgroup* |
1677 | * functions) argument */ |
1678 | |
1679 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
1680 | GDKerror("BATgroupavg: %s\n" , err); |
1681 | return GDK_FAIL; |
1682 | } |
1683 | if (g == NULL) { |
1684 | GDKerror("BATgroupavg: b and g must be aligned\n" ); |
1685 | return GDK_FAIL; |
1686 | } |
1687 | |
1688 | if (BATcount(b) == 0 || ngrp == 0) { |
1689 | /* trivial: no averages, so return bat aligned with g |
1690 | * with nil in the tail */ |
1691 | bn = BATconstant(ngrp == 0 ? 0 : min, TYPE_dbl, &dbl_nil, ngrp, TRANSIENT); |
1692 | if (bn == NULL) { |
1693 | GDKerror("BATgroupavg: failed to create BAT\n" ); |
1694 | return GDK_FAIL; |
1695 | } |
1696 | if (cntsp) { |
1697 | lng zero = 0; |
1698 | if ((cn = BATconstant(ngrp == 0 ? 0 : min, TYPE_lng, &zero, ngrp, TRANSIENT)) == NULL) { |
1699 | GDKerror("BATgroupavg: failed to create BAT\n" ); |
1700 | BBPreclaim(bn); |
1701 | return GDK_FAIL; |
1702 | } |
1703 | *cntsp = cn; |
1704 | } |
1705 | *bnp = bn; |
1706 | return GDK_SUCCEED; |
1707 | } |
1708 | |
1709 | if ((!skip_nils || cntsp == NULL || b->tnonil) && |
1710 | (e == NULL || |
1711 | (BATcount(e) == BATcount(b) && e->hseqbase == b->hseqbase)) && |
1712 | (BATtdense(g) || (g->tkey && g->tnonil))) { |
1713 | /* trivial: singleton groups, so all results are equal |
1714 | * to the inputs (but possibly a different type) */ |
1715 | if ((bn = BATconvert(b, s, TYPE_dbl, abort_on_error)) == NULL) |
1716 | return GDK_FAIL; |
1717 | if (cntsp) { |
1718 | lng one = 1; |
1719 | if ((cn = BATconstant(ngrp == 0 ? 0 : min, TYPE_lng, &one, ngrp, TRANSIENT)) == NULL) { |
1720 | BBPreclaim(bn); |
1721 | return GDK_FAIL; |
1722 | } |
1723 | *cntsp = cn; |
1724 | } |
1725 | *bnp = bn; |
1726 | return GDK_SUCCEED; |
1727 | } |
1728 | |
1729 | /* allocate temporary space to do per group calculations */ |
1730 | switch (b->ttype) { |
1731 | case TYPE_bte: |
1732 | case TYPE_sht: |
1733 | case TYPE_int: |
1734 | case TYPE_lng: |
1735 | #ifdef HAVE_HGE |
1736 | case TYPE_hge: |
1737 | #endif |
1738 | rems = GDKzalloc(ngrp * sizeof(BUN)); |
1739 | if (rems == NULL) |
1740 | goto alloc_fail; |
1741 | break; |
1742 | default: |
1743 | break; |
1744 | } |
1745 | if (cntsp) { |
1746 | if ((cn = COLnew(min, TYPE_lng, ngrp, TRANSIENT)) == NULL) |
1747 | goto alloc_fail; |
1748 | cnts = (lng *) Tloc(cn, 0); |
1749 | memset(cnts, 0, ngrp * sizeof(lng)); |
1750 | } else { |
1751 | cnts = GDKzalloc(ngrp * sizeof(lng)); |
1752 | if (cnts == NULL) |
1753 | goto alloc_fail; |
1754 | } |
1755 | |
1756 | bn = COLnew(min, TYPE_dbl, ngrp, TRANSIENT); |
1757 | if (bn == NULL) |
1758 | goto alloc_fail; |
1759 | dbls = (dbl *) Tloc(bn, 0); |
1760 | |
1761 | if (BATtdense(g)) |
1762 | gids = NULL; |
1763 | else |
1764 | gids = (const oid *) Tloc(g, 0); |
1765 | |
1766 | switch (b->ttype) { |
1767 | case TYPE_bte: |
1768 | AGGR_AVG(bte); |
1769 | break; |
1770 | case TYPE_sht: |
1771 | AGGR_AVG(sht); |
1772 | break; |
1773 | case TYPE_int: |
1774 | AGGR_AVG(int); |
1775 | break; |
1776 | case TYPE_lng: |
1777 | AGGR_AVG(lng); |
1778 | break; |
1779 | #ifdef HAVE_HGE |
1780 | case TYPE_hge: |
1781 | AGGR_AVG(hge); |
1782 | break; |
1783 | #endif |
1784 | case TYPE_flt: |
1785 | AGGR_AVG_FLOAT(flt); |
1786 | break; |
1787 | case TYPE_dbl: |
1788 | AGGR_AVG_FLOAT(dbl); |
1789 | break; |
1790 | default: |
1791 | GDKfree(rems); |
1792 | if (cn) |
1793 | BBPreclaim(cn); |
1794 | else |
1795 | GDKfree(cnts); |
1796 | BBPunfix(bn->batCacheid); |
1797 | GDKerror("BATgroupavg: type (%s) not supported.\n" , |
1798 | ATOMname(b->ttype)); |
1799 | return GDK_FAIL; |
1800 | } |
1801 | GDKfree(rems); |
1802 | if (cn == NULL) |
1803 | GDKfree(cnts); |
1804 | else { |
1805 | BATsetcount(cn, ngrp); |
1806 | cn->tkey = BATcount(cn) <= 1; |
1807 | cn->tsorted = BATcount(cn) <= 1; |
1808 | cn->trevsorted = BATcount(cn) <= 1; |
1809 | cn->tnil = false; |
1810 | cn->tnonil = true; |
1811 | *cntsp = cn; |
1812 | } |
1813 | if (scale != 0) { |
1814 | dbl fac = pow(10.0, (double) scale); |
1815 | for (i = 0; i < ngrp; i++) { |
1816 | if (!is_dbl_nil(dbls[i])) |
1817 | dbls[i] *= fac; |
1818 | } |
1819 | } |
1820 | BATsetcount(bn, ngrp); |
1821 | bn->tkey = BATcount(bn) <= 1; |
1822 | bn->tsorted = BATcount(bn) <= 1; |
1823 | bn->trevsorted = BATcount(bn) <= 1; |
1824 | bn->tnil = nils != 0; |
1825 | bn->tnonil = nils == 0; |
1826 | *bnp = bn; |
1827 | return GDK_SUCCEED; |
1828 | |
1829 | alloc_fail: |
1830 | if (bn) |
1831 | BBPunfix(bn->batCacheid); |
1832 | GDKfree(rems); |
1833 | if (cntsp) { |
1834 | BBPreclaim(*cntsp); |
1835 | *cntsp = NULL; |
1836 | } else if (cnts) { |
1837 | GDKfree(cnts); |
1838 | } |
1839 | GDKerror("BATgroupavg: cannot allocate enough memory.\n" ); |
1840 | return GDK_FAIL; |
1841 | } |
1842 | |
1843 | #define AVERAGE_TYPE_LNG_HGE(TYPE,lng_hge) \ |
1844 | do { \ |
1845 | TYPE x, a; \ |
1846 | \ |
1847 | /* first try to calculate the sum of all values into a */ \ |
1848 | /* lng/hge */ \ |
1849 | while (ncand > 0) { \ |
1850 | ncand--; \ |
1851 | i = canditer_next(&ci) - b->hseqbase; \ |
1852 | x = ((const TYPE *) src)[i]; \ |
1853 | if (is_##TYPE##_nil(x)) \ |
1854 | continue; \ |
1855 | ADD_WITH_CHECK(x, sum, \ |
1856 | lng_hge, sum, \ |
1857 | GDK_##lng_hge##_max, \ |
1858 | goto overflow##TYPE); \ |
1859 | /* don't count value until after overflow check */ \ |
1860 | n++; \ |
1861 | } \ |
1862 | /* the sum fit, so now we can calculate the average */ \ |
1863 | *avg = n > 0 ? (dbl) sum / n : dbl_nil; \ |
1864 | if (0) { \ |
1865 | overflow##TYPE: \ |
1866 | /* we get here if sum(x[0],...,x[i]) doesn't */ \ |
1867 | /* fit in a lng/hge but sum(x[0],...,x[i-1]) did */ \ |
1868 | /* the variable sum contains that sum */ \ |
1869 | /* the rest of the calculation is done */ \ |
1870 | /* according to the loop invariant described */ \ |
1871 | /* in the below loop */ \ |
1872 | /* note that n necessarily is > 0 (else no */ \ |
1873 | /* overflow possible) */ \ |
1874 | assert(n > 0); \ |
1875 | if (sum >= 0) { \ |
1876 | a = (TYPE) (sum / (lng_hge) n); /* this fits */ \ |
1877 | r = (BUN) (sum % (SBUN) n); \ |
1878 | } else { \ |
1879 | sum = -sum; \ |
1880 | a = - (TYPE) (sum / (lng_hge) n); /* this fits */ \ |
1881 | r = (BUN) (sum % (SBUN) n); \ |
1882 | if (r) { \ |
1883 | a--; \ |
1884 | r = n - r; \ |
1885 | } \ |
1886 | } \ |
1887 | while (ncand > 0) { \ |
1888 | /* loop invariant: */ \ |
1889 | /* a + r/n == average(x[0],...,x[n]); */ \ |
1890 | /* 0 <= r < n */ \ |
1891 | ncand--; \ |
1892 | i = canditer_next(&ci) - b->hseqbase; \ |
1893 | x = ((const TYPE *) src)[i]; \ |
1894 | if (is_##TYPE##_nil(x)) \ |
1895 | continue; \ |
1896 | AVERAGE_ITER(TYPE, x, a, r, n); \ |
1897 | } \ |
1898 | *avg = a + (dbl) r / n; \ |
1899 | } \ |
1900 | } while (0) |
1901 | |
1902 | #ifdef HAVE_HGE |
1903 | #define AVERAGE_TYPE(TYPE) AVERAGE_TYPE_LNG_HGE(TYPE,hge) |
1904 | #else |
1905 | #define AVERAGE_TYPE(TYPE) AVERAGE_TYPE_LNG_HGE(TYPE,lng) |
1906 | #endif |
1907 | |
1908 | #define AVERAGE_FLOATTYPE(TYPE) \ |
1909 | do { \ |
1910 | double a = 0; \ |
1911 | TYPE x; \ |
1912 | while (ncand > 0) { \ |
1913 | ncand--; \ |
1914 | i = canditer_next(&ci) - b->hseqbase; \ |
1915 | x = ((const TYPE *) src)[i]; \ |
1916 | if (is_##TYPE##_nil(x)) \ |
1917 | continue; \ |
1918 | AVERAGE_ITER_FLOAT(TYPE, x, a, n); \ |
1919 | } \ |
1920 | *avg = n > 0 ? a : dbl_nil; \ |
1921 | } while (0) |
1922 | |
1923 | gdk_return |
1924 | BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN *vals, int scale) |
1925 | { |
1926 | BUN n = 0, r = 0, i = 0; |
1927 | #ifdef HAVE_HGE |
1928 | hge sum = 0; |
1929 | #else |
1930 | lng sum = 0; |
1931 | #endif |
1932 | struct canditer ci; |
1933 | BUN ncand; |
1934 | const void *restrict src; |
1935 | /* these two needed for ADD_WITH_CHECK macro */ |
1936 | bool abort_on_error = true; |
1937 | BUN nils = 0; |
1938 | |
1939 | ncand = canditer_init(&ci, b, s); |
1940 | |
1941 | src = Tloc(b, 0); |
1942 | |
1943 | switch (b->ttype) { |
1944 | case TYPE_bte: |
1945 | AVERAGE_TYPE(bte); |
1946 | break; |
1947 | case TYPE_sht: |
1948 | AVERAGE_TYPE(sht); |
1949 | break; |
1950 | case TYPE_int: |
1951 | AVERAGE_TYPE(int); |
1952 | break; |
1953 | case TYPE_lng: |
1954 | AVERAGE_TYPE(lng); |
1955 | break; |
1956 | #ifdef HAVE_HGE |
1957 | case TYPE_hge: |
1958 | AVERAGE_TYPE(hge); |
1959 | break; |
1960 | #endif |
1961 | case TYPE_flt: |
1962 | AVERAGE_FLOATTYPE(flt); |
1963 | break; |
1964 | case TYPE_dbl: |
1965 | AVERAGE_FLOATTYPE(dbl); |
1966 | break; |
1967 | default: |
1968 | GDKerror("BATcalcavg: average of type %s unsupported.\n" , |
1969 | ATOMname(b->ttype)); |
1970 | return GDK_FAIL; |
1971 | } |
1972 | if (scale != 0 && !is_dbl_nil(*avg)) |
1973 | *avg *= pow(10.0, (double) scale); |
1974 | if (vals) |
1975 | *vals = n; |
1976 | return GDK_SUCCEED; |
1977 | } |
1978 | |
1979 | /* ---------------------------------------------------------------------- */ |
1980 | /* count */ |
1981 | |
1982 | #define AGGR_COUNT(TYPE) \ |
1983 | do { \ |
1984 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
1985 | while (ncand > 0) { \ |
1986 | ncand--; \ |
1987 | i = canditer_next(&ci) - b->hseqbase; \ |
1988 | if (gids == NULL || \ |
1989 | (gids[i] >= min && gids[i] <= max)) { \ |
1990 | if (gids) \ |
1991 | gid = gids[i] - min; \ |
1992 | else \ |
1993 | gid = (oid) i; \ |
1994 | if (!is_##TYPE##_nil(vals[i])) { \ |
1995 | cnts[gid]++; \ |
1996 | } \ |
1997 | } \ |
1998 | } \ |
1999 | } while (0) |
2000 | |
2001 | /* calculate group counts with optional candidates list */ |
2002 | BAT * |
2003 | BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error) |
2004 | { |
2005 | const oid *restrict gids; |
2006 | oid gid; |
2007 | oid min, max; |
2008 | BUN i, ngrp; |
2009 | lng *restrict cnts; |
2010 | BAT *bn = NULL; |
2011 | int t; |
2012 | const void *nil; |
2013 | int (*atomcmp)(const void *, const void *); |
2014 | BATiter bi; |
2015 | struct canditer ci; |
2016 | BUN ncand; |
2017 | const char *err; |
2018 | |
2019 | assert(tp == TYPE_lng); |
2020 | (void) tp; /* compatibility (with other BATgroup* */ |
2021 | (void) abort_on_error; /* functions) argument */ |
2022 | |
2023 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
2024 | GDKerror("BATgroupcount: %s\n" , err); |
2025 | return NULL; |
2026 | } |
2027 | if (g == NULL) { |
2028 | GDKerror("BATgroupcount: b and g must be aligned\n" ); |
2029 | return NULL; |
2030 | } |
2031 | |
2032 | if (BATcount(b) == 0 || ngrp == 0) { |
2033 | /* trivial: no counts, so return bat aligned with g |
2034 | * with zero in the tail */ |
2035 | lng zero = 0; |
2036 | return BATconstant(ngrp == 0 ? 0 : min, TYPE_lng, &zero, ngrp, TRANSIENT); |
2037 | } |
2038 | |
2039 | bn = COLnew(min, TYPE_lng, ngrp, TRANSIENT); |
2040 | if (bn == NULL) |
2041 | return NULL; |
2042 | cnts = (lng *) Tloc(bn, 0); |
2043 | memset(cnts, 0, ngrp * sizeof(lng)); |
2044 | |
2045 | if (BATtdense(g)) |
2046 | gids = NULL; |
2047 | else |
2048 | gids = (const oid *) Tloc(g, 0); |
2049 | |
2050 | if (!skip_nils || b->tnonil) { |
2051 | /* if nils are nothing special, or if there are no |
2052 | * nils, we don't need to look at the values at all */ |
2053 | if (gids) { |
2054 | while (ncand > 0) { |
2055 | ncand--; |
2056 | i = canditer_next(&ci) - b->hseqbase; |
2057 | if (gids[i] >= min && gids[i] <= max) |
2058 | cnts[gids[i] - min]++; |
2059 | } |
2060 | } else { |
2061 | while (ncand > 0) { |
2062 | ncand--; |
2063 | i = canditer_next(&ci) - b->hseqbase; |
2064 | cnts[i] = 1; |
2065 | } |
2066 | } |
2067 | } else { |
2068 | t = b->ttype; |
2069 | nil = ATOMnilptr(t); |
2070 | atomcmp = ATOMcompare(t); |
2071 | t = ATOMbasetype(t); |
2072 | |
2073 | switch (t) { |
2074 | case TYPE_bte: |
2075 | AGGR_COUNT(bte); |
2076 | break; |
2077 | case TYPE_sht: |
2078 | AGGR_COUNT(sht); |
2079 | break; |
2080 | case TYPE_int: |
2081 | AGGR_COUNT(int); |
2082 | break; |
2083 | case TYPE_lng: |
2084 | AGGR_COUNT(lng); |
2085 | break; |
2086 | #ifdef HAVE_HGE |
2087 | case TYPE_hge: |
2088 | AGGR_COUNT(hge); |
2089 | break; |
2090 | #endif |
2091 | case TYPE_flt: |
2092 | AGGR_COUNT(flt); |
2093 | break; |
2094 | case TYPE_dbl: |
2095 | AGGR_COUNT(dbl); |
2096 | break; |
2097 | default: |
2098 | bi = bat_iterator(b); |
2099 | |
2100 | while (ncand > 0) { |
2101 | ncand--; |
2102 | i = canditer_next(&ci) - b->hseqbase; |
2103 | if (gids == NULL || |
2104 | (gids[i] >= min && gids[i] <= max)) { |
2105 | if (gids) |
2106 | gid = gids[i] - min; |
2107 | else |
2108 | gid = (oid) i; |
2109 | if ((*atomcmp)(BUNtail(bi, i), nil) != 0) { |
2110 | cnts[gid]++; |
2111 | } |
2112 | } |
2113 | } |
2114 | break; |
2115 | } |
2116 | } |
2117 | BATsetcount(bn, ngrp); |
2118 | bn->tkey = BATcount(bn) <= 1; |
2119 | bn->tsorted = BATcount(bn) <= 1; |
2120 | bn->trevsorted = BATcount(bn) <= 1; |
2121 | bn->tnil = false; |
2122 | bn->tnonil = true; |
2123 | return bn; |
2124 | } |
2125 | |
2126 | /* calculate group sizes (number of TRUE values) with optional |
2127 | * candidates list */ |
2128 | BAT * |
2129 | BATgroupsize(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool abort_on_error) |
2130 | { |
2131 | const oid *restrict gids; |
2132 | oid min, max; |
2133 | BUN i, ngrp; |
2134 | const bit *restrict bits; |
2135 | lng *restrict cnts; |
2136 | BAT *bn = NULL; |
2137 | struct canditer ci; |
2138 | BUN ncand; |
2139 | const char *err; |
2140 | |
2141 | assert(tp == TYPE_lng); |
2142 | assert(b->ttype == TYPE_bit); |
2143 | /* compatibility arguments */ |
2144 | (void) tp; |
2145 | (void) abort_on_error; |
2146 | (void) skip_nils; |
2147 | |
2148 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
2149 | GDKerror("BATgroupsize: %s\n" , err); |
2150 | return NULL; |
2151 | } |
2152 | if (g == NULL) { |
2153 | GDKerror("BATgroupsize: b and g must be aligned\n" ); |
2154 | return NULL; |
2155 | } |
2156 | |
2157 | if (BATcount(b) == 0 || ngrp == 0) { |
2158 | /* trivial: no products, so return bat aligned with g |
2159 | * with zero in the tail */ |
2160 | lng zero = 0; |
2161 | return BATconstant(ngrp == 0 ? 0 : min, TYPE_lng, &zero, ngrp, TRANSIENT); |
2162 | } |
2163 | |
2164 | bn = COLnew(min, TYPE_lng, ngrp, TRANSIENT); |
2165 | if (bn == NULL) |
2166 | return NULL; |
2167 | cnts = (lng *) Tloc(bn, 0); |
2168 | memset(cnts, 0, ngrp * sizeof(lng)); |
2169 | |
2170 | if (BATtdense(g)) |
2171 | gids = NULL; |
2172 | else |
2173 | gids = (const oid *) Tloc(g, 0); |
2174 | |
2175 | bits = (const bit *) Tloc(b, 0); |
2176 | |
2177 | while (ncand > 0) { |
2178 | ncand--; |
2179 | i = canditer_next(&ci) - b->hseqbase; |
2180 | if (bits[i] == 1 && |
2181 | (gids == NULL || (gids[i] >= min && gids[i] <= max))) { |
2182 | cnts[gids ? gids[i] - min : (oid) i]++; |
2183 | } |
2184 | } |
2185 | BATsetcount(bn, ngrp); |
2186 | bn->tkey = BATcount(bn) <= 1; |
2187 | bn->tsorted = BATcount(bn) <= 1; |
2188 | bn->trevsorted = BATcount(bn) <= 1; |
2189 | bn->tnil = false; |
2190 | bn->tnonil = true; |
2191 | return bn; |
2192 | } |
2193 | |
2194 | /* ---------------------------------------------------------------------- */ |
2195 | /* min and max */ |
2196 | |
2197 | #define AGGR_CMP(TYPE, OP) \ |
2198 | do { \ |
2199 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
2200 | if (ngrp == cnt) { \ |
2201 | /* single element groups */ \ |
2202 | while (ncand > 0) { \ |
2203 | ncand--; \ |
2204 | i = canditer_next(ci) - b->hseqbase; \ |
2205 | if (!skip_nils || \ |
2206 | !is_##TYPE##_nil(vals[i])) { \ |
2207 | oids[i] = i + b->hseqbase; \ |
2208 | nils--; \ |
2209 | } \ |
2210 | } \ |
2211 | } else { \ |
2212 | gid = 0; /* in case gids == NULL */ \ |
2213 | while (ncand > 0) { \ |
2214 | ncand--; \ |
2215 | i = canditer_next(ci) - b->hseqbase; \ |
2216 | if (gids == NULL || \ |
2217 | (gids[i] >= min && gids[i] <= max)) { \ |
2218 | if (gids) \ |
2219 | gid = gids[i] - min; \ |
2220 | if (!skip_nils || !is_##TYPE##_nil(vals[i])) { \ |
2221 | if (is_oid_nil(oids[gid])) { \ |
2222 | oids[gid] = i + b->hseqbase; \ |
2223 | nils--; \ |
2224 | } else if (!is_##TYPE##_nil(vals[oids[gid] - b->hseqbase]) && \ |
2225 | (is_##TYPE##_nil(vals[i]) || \ |
2226 | OP(vals[i], vals[oids[gid] - b->hseqbase]))) \ |
2227 | oids[gid] = i + b->hseqbase; \ |
2228 | } \ |
2229 | } \ |
2230 | } \ |
2231 | } \ |
2232 | } while (0) |
2233 | |
2234 | /* calculate group minimums with optional candidates list |
2235 | * |
2236 | * note that this functions returns *positions* of where the minimum |
2237 | * values occur */ |
2238 | static BUN |
2239 | do_groupmin(oid *restrict oids, BAT *b, const oid *restrict gids, BUN ngrp, |
2240 | oid min, oid max, struct canditer *restrict ci, BUN ncand, |
2241 | BUN cnt, bool skip_nils, bool gdense) |
2242 | { |
2243 | oid gid; |
2244 | BUN i, nils; |
2245 | int t; |
2246 | const void *nil; |
2247 | int (*atomcmp)(const void *, const void *); |
2248 | BATiter bi; |
2249 | |
2250 | nils = ngrp; |
2251 | for (i = 0; i < ngrp; i++) |
2252 | oids[i] = oid_nil; |
2253 | if (cnt == 0) |
2254 | return nils; |
2255 | |
2256 | t = b->ttype; |
2257 | nil = ATOMnilptr(t); |
2258 | atomcmp = ATOMcompare(t); |
2259 | t = ATOMbasetype(t); |
2260 | switch (t) { |
2261 | case TYPE_bte: |
2262 | AGGR_CMP(bte, LT); |
2263 | break; |
2264 | case TYPE_sht: |
2265 | AGGR_CMP(sht, LT); |
2266 | break; |
2267 | case TYPE_int: |
2268 | AGGR_CMP(int, LT); |
2269 | break; |
2270 | case TYPE_lng: |
2271 | AGGR_CMP(lng, LT); |
2272 | break; |
2273 | #ifdef HAVE_HGE |
2274 | case TYPE_hge: |
2275 | AGGR_CMP(hge, LT); |
2276 | break; |
2277 | #endif |
2278 | case TYPE_flt: |
2279 | AGGR_CMP(flt, LT); |
2280 | break; |
2281 | case TYPE_dbl: |
2282 | AGGR_CMP(dbl, LT); |
2283 | break; |
2284 | case TYPE_void: |
2285 | if (!gdense && gids == NULL) { |
2286 | oids[0] = canditer_next(ci); |
2287 | nils--; |
2288 | break; |
2289 | } |
2290 | /* fall through */ |
2291 | default: |
2292 | assert(b->ttype != TYPE_oid); |
2293 | bi = bat_iterator(b); |
2294 | |
2295 | if (gdense) { |
2296 | /* single element groups */ |
2297 | while (ncand > 0) { |
2298 | ncand--; |
2299 | i = canditer_next(ci) - b->hseqbase; |
2300 | if (!skip_nils || |
2301 | (*atomcmp)(BUNtail(bi, i), nil) != 0) { |
2302 | oids[i] = i + b->hseqbase; |
2303 | nils--; |
2304 | } |
2305 | } |
2306 | } else { |
2307 | gid = 0; /* in case gids == NULL */ |
2308 | while (ncand > 0) { |
2309 | ncand--; |
2310 | i = canditer_next(ci) - b->hseqbase; |
2311 | if (gids == NULL || |
2312 | (gids[i] >= min && gids[i] <= max)) { |
2313 | const void *v = BUNtail(bi, i); |
2314 | if (gids) |
2315 | gid = gids[i] - min; |
2316 | if (!skip_nils || |
2317 | (*atomcmp)(v, nil) != 0) { |
2318 | if (is_oid_nil(oids[gid])) { |
2319 | oids[gid] = i + b->hseqbase; |
2320 | nils--; |
2321 | } else if (t != TYPE_void) { |
2322 | const void *g = BUNtail(bi, (BUN) (oids[gid] - b->hseqbase)); |
2323 | if ((*atomcmp)(g, nil) != 0 && |
2324 | ((*atomcmp)(v, nil) == 0 || |
2325 | LT((*atomcmp)(v, g), 0))) |
2326 | oids[gid] = i + b->hseqbase; |
2327 | } |
2328 | } |
2329 | } |
2330 | } |
2331 | } |
2332 | break; |
2333 | } |
2334 | |
2335 | return nils; |
2336 | } |
2337 | |
2338 | /* calculate group maximums with optional candidates list |
2339 | * |
2340 | * note that this functions returns *positions* of where the maximum |
2341 | * values occur */ |
2342 | static BUN |
2343 | do_groupmax(oid *restrict oids, BAT *b, const oid *restrict gids, BUN ngrp, |
2344 | oid min, oid max, struct canditer *restrict ci, BUN ncand, |
2345 | BUN cnt, bool skip_nils, bool gdense) |
2346 | { |
2347 | oid gid; |
2348 | BUN i, nils; |
2349 | int t; |
2350 | const void *nil; |
2351 | int (*atomcmp)(const void *, const void *); |
2352 | BATiter bi; |
2353 | |
2354 | nils = ngrp; |
2355 | for (i = 0; i < ngrp; i++) |
2356 | oids[i] = oid_nil; |
2357 | if (cnt == 0) |
2358 | return nils; |
2359 | |
2360 | t = b->ttype; |
2361 | nil = ATOMnilptr(t); |
2362 | atomcmp = ATOMcompare(t); |
2363 | t = ATOMbasetype(t); |
2364 | switch (t) { |
2365 | case TYPE_bte: |
2366 | AGGR_CMP(bte, GT); |
2367 | break; |
2368 | case TYPE_sht: |
2369 | AGGR_CMP(sht, GT); |
2370 | break; |
2371 | case TYPE_int: |
2372 | AGGR_CMP(int, GT); |
2373 | break; |
2374 | case TYPE_lng: |
2375 | AGGR_CMP(lng, GT); |
2376 | break; |
2377 | #ifdef HAVE_HGE |
2378 | case TYPE_hge: |
2379 | AGGR_CMP(hge, GT); |
2380 | break; |
2381 | #endif |
2382 | case TYPE_flt: |
2383 | AGGR_CMP(flt, GT); |
2384 | break; |
2385 | case TYPE_dbl: |
2386 | AGGR_CMP(dbl, GT); |
2387 | break; |
2388 | case TYPE_void: |
2389 | if (!gdense && gids == NULL) { |
2390 | oids[0] = canditer_last(ci); |
2391 | nils--; |
2392 | break; |
2393 | } |
2394 | /* fall through */ |
2395 | default: |
2396 | assert(b->ttype != TYPE_oid); |
2397 | bi = bat_iterator(b); |
2398 | |
2399 | if (gdense) { |
2400 | /* single element groups */ |
2401 | while (ncand > 0) { |
2402 | ncand--; |
2403 | i = canditer_next(ci) - b->hseqbase; |
2404 | if (!skip_nils || |
2405 | (*atomcmp)(BUNtail(bi, i), nil) != 0) { |
2406 | oids[i] = i + b->hseqbase; |
2407 | nils--; |
2408 | } |
2409 | } |
2410 | } else { |
2411 | gid = 0; /* in case gids == NULL */ |
2412 | while (ncand > 0) { |
2413 | ncand--; |
2414 | i = canditer_next(ci) - b->hseqbase; |
2415 | if (gids == NULL || |
2416 | (gids[i] >= min && gids[i] <= max)) { |
2417 | const void *v = BUNtail(bi, i); |
2418 | if (gids) |
2419 | gid = gids[i] - min; |
2420 | if (!skip_nils || |
2421 | (*atomcmp)(v, nil) != 0) { |
2422 | if (is_oid_nil(oids[gid])) { |
2423 | oids[gid] = i + b->hseqbase; |
2424 | nils--; |
2425 | } else { |
2426 | const void *g = BUNtail(bi, (BUN) (oids[gid] - b->hseqbase)); |
2427 | if (t == TYPE_void || |
2428 | ((*atomcmp)(g, nil) != 0 && |
2429 | ((*atomcmp)(v, nil) == 0 || |
2430 | GT((*atomcmp)(v, g), 0)))) |
2431 | oids[gid] = i + b->hseqbase; |
2432 | } |
2433 | } |
2434 | } |
2435 | } |
2436 | } |
2437 | break; |
2438 | } |
2439 | |
2440 | return nils; |
2441 | } |
2442 | |
2443 | static BAT * |
2444 | BATgroupminmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, |
2445 | bool abort_on_error, |
2446 | BUN (*minmax)(oid *restrict, BAT *, const oid *restrict, BUN, |
2447 | oid, oid, struct canditer *restrict, BUN, |
2448 | BUN, bool, bool), |
2449 | const char *name) |
2450 | { |
2451 | const oid *restrict gids; |
2452 | oid min, max; |
2453 | BUN ngrp; |
2454 | oid *restrict oids; |
2455 | BAT *bn = NULL; |
2456 | BUN nils; |
2457 | struct canditer ci; |
2458 | BUN ncand; |
2459 | const char *err; |
2460 | |
2461 | assert(tp == TYPE_oid); |
2462 | (void) tp; /* compatibility (with other BATgroup* */ |
2463 | (void) abort_on_error; /* functions) argument */ |
2464 | |
2465 | if (!ATOMlinear(b->ttype)) { |
2466 | GDKerror("%s: cannot determine minimum on " |
2467 | "non-linear type %s\n" , name, ATOMname(b->ttype)); |
2468 | return NULL; |
2469 | } |
2470 | |
2471 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
2472 | GDKerror("%s: %s\n" , name, err); |
2473 | return NULL; |
2474 | } |
2475 | |
2476 | if (BATcount(b) == 0 || ngrp == 0) { |
2477 | /* trivial: no minimums, so return bat aligned with g |
2478 | * with nil in the tail */ |
2479 | return BATconstant(ngrp == 0 ? 0 : min, TYPE_oid, &oid_nil, ngrp, TRANSIENT); |
2480 | } |
2481 | |
2482 | bn = COLnew(min, TYPE_oid, ngrp, TRANSIENT); |
2483 | if (bn == NULL) |
2484 | return NULL; |
2485 | oids = (oid *) Tloc(bn, 0); |
2486 | |
2487 | if (g == NULL || BATtdense(g)) |
2488 | gids = NULL; |
2489 | else |
2490 | gids = (const oid *) Tloc(g, 0); |
2491 | |
2492 | nils = (*minmax)(oids, b, gids, ngrp, min, max, &ci, ncand, |
2493 | BATcount(b), skip_nils, g && BATtdense(g)); |
2494 | |
2495 | BATsetcount(bn, ngrp); |
2496 | |
2497 | bn->tkey = BATcount(bn) <= 1; |
2498 | bn->tsorted = BATcount(bn) <= 1; |
2499 | bn->trevsorted = BATcount(bn) <= 1; |
2500 | bn->tnil = nils != 0; |
2501 | bn->tnonil = nils == 0; |
2502 | return bn; |
2503 | } |
2504 | |
2505 | BAT * |
2506 | BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
2507 | bool skip_nils, bool abort_on_error) |
2508 | { |
2509 | return BATgroupminmax(b, g, e, s, tp, skip_nils, abort_on_error, |
2510 | do_groupmin, "BATgroupmin" ); |
2511 | } |
2512 | |
2513 | void * |
2514 | BATmin(BAT *b, void *aggr) |
2515 | { |
2516 | return BATmin_skipnil(b, aggr, 1); |
2517 | } |
2518 | |
2519 | /* return pointer to smallest non-nil value in b, or pointer to nil if |
2520 | * there is no such value (no values at all, or only nil) */ |
2521 | void * |
2522 | BATmin_skipnil(BAT *b, void *aggr, bit skipnil) |
2523 | { |
2524 | PROPrec *prop; |
2525 | const void *res; |
2526 | size_t s; |
2527 | BATiter bi; |
2528 | |
2529 | if (!ATOMlinear(b->ttype)) { |
2530 | /* there is no such thing as a smallest value if you |
2531 | * can't compare values */ |
2532 | GDKerror("BATmin: non-linear type" ); |
2533 | return NULL; |
2534 | } |
2535 | if (BATcount(b) == 0) { |
2536 | res = ATOMnilptr(b->ttype); |
2537 | } else if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) { |
2538 | res = VALptr(&prop->v); |
2539 | } else { |
2540 | oid pos; |
2541 | BAT *pb = NULL; |
2542 | |
2543 | if (BATcheckorderidx(b) || |
2544 | (VIEWtparent(b) && |
2545 | (pb = BBPdescriptor(VIEWtparent(b))) != NULL && |
2546 | pb->theap.base == b->theap.base && |
2547 | BATcount(pb) == BATcount(b) && |
2548 | pb->hseqbase == b->hseqbase && |
2549 | BATcheckorderidx(pb))) { |
2550 | const oid *ords = (const oid *) (pb ? pb->torderidx->base : b->torderidx->base) + ORDERIDXOFF; |
2551 | BUN r; |
2552 | if (!b->tnonil) { |
2553 | r = binsearch(ords, 0, b->ttype, Tloc(b, 0), |
2554 | b->tvheap ? b->tvheap->base : NULL, |
2555 | b->twidth, 0, BATcount(b), |
2556 | ATOMnilptr(b->ttype), 1, 1); |
2557 | if (r == 0) { |
2558 | b->tnonil = true; |
2559 | b->batDirtydesc = true; |
2560 | } |
2561 | } else { |
2562 | r = 0; |
2563 | } |
2564 | if (r == BATcount(b)) { |
2565 | /* no non-nil values */ |
2566 | pos = oid_nil; |
2567 | } else { |
2568 | pos = ords[r]; |
2569 | } |
2570 | } else if ((VIEWtparent(b) == 0 || |
2571 | BATcount(b) == BATcount(BBPdescriptor(VIEWtparent(b)))) && |
2572 | BATcheckimprints(b)) { |
2573 | Imprints *imprints = VIEWtparent(b) ? BBPdescriptor(VIEWtparent(b))->timprints : b->timprints; |
2574 | int i; |
2575 | |
2576 | pos = oid_nil; |
2577 | /* find first non-empty bin */ |
2578 | for (i = 0; i < imprints->bits; i++) { |
2579 | if (imprints->stats[i + 128]) { |
2580 | pos = imprints->stats[i] + b->hseqbase; |
2581 | break; |
2582 | } |
2583 | } |
2584 | } else { |
2585 | struct canditer ci; |
2586 | BUN ncand = canditer_init(&ci, b, NULL); |
2587 | (void) do_groupmin(&pos, b, NULL, 1, 0, 0, &ci, ncand, |
2588 | BATcount(b), skipnil, false); |
2589 | } |
2590 | if (is_oid_nil(pos)) { |
2591 | res = ATOMnilptr(b->ttype); |
2592 | } else { |
2593 | bi = bat_iterator(b); |
2594 | res = BUNtail(bi, pos - b->hseqbase); |
2595 | BATsetprop(b, GDK_MIN_VALUE, b->ttype, res); |
2596 | } |
2597 | } |
2598 | if (aggr == NULL) { |
2599 | s = ATOMlen(b->ttype, res); |
2600 | aggr = GDKmalloc(s); |
2601 | } else { |
2602 | s = ATOMsize(ATOMtype(b->ttype)); |
2603 | } |
2604 | if (aggr != NULL) /* else: malloc error */ |
2605 | memcpy(aggr, res, s); |
2606 | return aggr; |
2607 | } |
2608 | |
2609 | BAT * |
2610 | BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
2611 | bool skip_nils, bool abort_on_error) |
2612 | { |
2613 | return BATgroupminmax(b, g, e, s, tp, skip_nils, abort_on_error, |
2614 | do_groupmax, "BATgroupmax" ); |
2615 | } |
2616 | |
2617 | void * |
2618 | BATmax(BAT *b, void *aggr) |
2619 | { |
2620 | return BATmax_skipnil(b, aggr, 1); |
2621 | } |
2622 | |
2623 | void * |
2624 | BATmax_skipnil(BAT *b, void *aggr, bit skipnil) |
2625 | { |
2626 | PROPrec *prop; |
2627 | const void *res; |
2628 | size_t s; |
2629 | BATiter bi; |
2630 | |
2631 | if (!ATOMlinear(b->ttype)) { |
2632 | GDKerror("BATmax: non-linear type" ); |
2633 | return NULL; |
2634 | } |
2635 | if (BATcount(b) == 0) { |
2636 | res = ATOMnilptr(b->ttype); |
2637 | } else if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) { |
2638 | res = VALptr(&prop->v); |
2639 | } else { |
2640 | oid pos; |
2641 | BAT *pb = NULL; |
2642 | |
2643 | if (BATcheckorderidx(b) || |
2644 | (VIEWtparent(b) && |
2645 | (pb = BBPdescriptor(VIEWtparent(b))) != NULL && |
2646 | pb->theap.base == b->theap.base && |
2647 | BATcount(pb) == BATcount(b) && |
2648 | pb->hseqbase == b->hseqbase && |
2649 | BATcheckorderidx(pb))) { |
2650 | const oid *ords = (const oid *) (pb ? pb->torderidx->base : b->torderidx->base) + ORDERIDXOFF; |
2651 | |
2652 | pos = ords[BATcount(b) - 1]; |
2653 | /* nils are first, ie !skipnil, check for nils */ |
2654 | if (!skipnil) { |
2655 | BUN z = ords[0]; |
2656 | |
2657 | bi = bat_iterator(b); |
2658 | res = BUNtail(bi, z - b->hseqbase); |
2659 | |
2660 | if (ATOMcmp(b->ttype, res, ATOMnilptr(b->ttype)) == 0) |
2661 | pos = z; |
2662 | } |
2663 | } else if ((VIEWtparent(b) == 0 || |
2664 | BATcount(b) == BATcount(BBPdescriptor(VIEWtparent(b)))) && |
2665 | BATcheckimprints(b)) { |
2666 | Imprints *imprints = VIEWtparent(b) ? BBPdescriptor(VIEWtparent(b))->timprints : b->timprints; |
2667 | int i; |
2668 | |
2669 | pos = oid_nil; |
2670 | /* find last non-empty bin */ |
2671 | for (i = imprints->bits - 1; i >= 0; i--) { |
2672 | if (imprints->stats[i + 128]) { |
2673 | pos = imprints->stats[i + 64] + b->hseqbase; |
2674 | break; |
2675 | } |
2676 | } |
2677 | } else { |
2678 | struct canditer ci; |
2679 | BUN ncand = canditer_init(&ci, b, NULL); |
2680 | (void) do_groupmax(&pos, b, NULL, 1, 0, 0, &ci, ncand, |
2681 | BATcount(b), skipnil, false); |
2682 | } |
2683 | if (is_oid_nil(pos)) { |
2684 | res = ATOMnilptr(b->ttype); |
2685 | } else { |
2686 | bi = bat_iterator(b); |
2687 | res = BUNtail(bi, pos - b->hseqbase); |
2688 | if (b->tnonil) |
2689 | BATsetprop(b, GDK_MAX_VALUE, b->ttype, res); |
2690 | } |
2691 | } |
2692 | if (aggr == NULL) { |
2693 | s = ATOMlen(b->ttype, res); |
2694 | aggr = GDKmalloc(s); |
2695 | } else { |
2696 | s = ATOMsize(ATOMtype(b->ttype)); |
2697 | } |
2698 | if (aggr != NULL) /* else: malloc error */ |
2699 | memcpy(aggr, res, s); |
2700 | return aggr; |
2701 | } |
2702 | |
2703 | |
2704 | /* ---------------------------------------------------------------------- */ |
2705 | /* quantiles/median */ |
2706 | |
2707 | #if SIZEOF_OID == SIZEOF_INT |
2708 | #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, last) |
2709 | #endif |
2710 | #if SIZEOF_OID == SIZEOF_LNG |
2711 | #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last) |
2712 | #endif |
2713 | |
2714 | static BAT * |
2715 | doBATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile, |
2716 | bool skip_nils, bool abort_on_error, bool average) |
2717 | { |
2718 | bool freeb = false, freeg = false; |
2719 | oid min, max; |
2720 | BUN ngrp; |
2721 | BUN nils = 0; |
2722 | BAT *bn = NULL; |
2723 | struct canditer ci; |
2724 | BUN ncand; |
2725 | BAT *t1, *t2; |
2726 | BATiter bi; |
2727 | const void *v; |
2728 | const void *nil = ATOMnilptr(tp); |
2729 | const void *dnil = nil; |
2730 | dbl val; /* only used for average */ |
2731 | int (*atomcmp)(const void *, const void *) = ATOMcompare(tp); |
2732 | const char *err; |
2733 | (void) abort_on_error; |
2734 | |
2735 | if (average) { |
2736 | switch (ATOMbasetype(b->ttype)) { |
2737 | case TYPE_bte: |
2738 | case TYPE_sht: |
2739 | case TYPE_int: |
2740 | case TYPE_lng: |
2741 | #ifdef HAVE_HGE |
2742 | case TYPE_hge: |
2743 | #endif |
2744 | case TYPE_flt: |
2745 | case TYPE_dbl: |
2746 | break; |
2747 | default: |
2748 | GDKerror("BATgroupquantile: incompatible type\n" ); |
2749 | return NULL; |
2750 | } |
2751 | dnil = &dbl_nil; |
2752 | } |
2753 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
2754 | GDKerror("BATgroupquantile: %s\n" , err); |
2755 | return NULL; |
2756 | } |
2757 | assert(tp == b->ttype); |
2758 | if (!ATOMlinear(tp)) { |
2759 | GDKerror("BATgroupquantile: cannot determine quantile on " |
2760 | "non-linear type %s\n" , ATOMname(tp)); |
2761 | return NULL; |
2762 | } |
2763 | if (quantile < 0 || quantile > 1) { |
2764 | GDKerror("BATgroupquantile: cannot determine quantile for " |
2765 | "p=%f (p has to be in [0,1])\n" , quantile); |
2766 | return NULL; |
2767 | } |
2768 | |
2769 | if (BATcount(b) == 0 || ngrp == 0 || is_dbl_nil(quantile)) { |
2770 | /* trivial: no values, thus also no quantiles, |
2771 | * so return bat aligned with e with nil in the tail |
2772 | * The same happens for a NULL quantile */ |
2773 | return BATconstant(ngrp == 0 ? 0 : min, average ? TYPE_dbl : tp, dnil, ngrp, TRANSIENT); |
2774 | } |
2775 | |
2776 | if (s) { |
2777 | /* there is a candidate list, replace b (and g, if |
2778 | * given) with just the values we're interested in */ |
2779 | b = BATproject(s, b); |
2780 | if (b == NULL) |
2781 | return NULL; |
2782 | freeb = true; |
2783 | if (g) { |
2784 | g = BATproject(s, g); |
2785 | if (g == NULL) |
2786 | goto bunins_failed; |
2787 | freeg = true; |
2788 | } |
2789 | } |
2790 | |
2791 | /* we want to sort b so that we can figure out the quantile, but |
2792 | * if g is given, sort g and subsort b so that we can get the |
2793 | * quantile for each group */ |
2794 | if (g) { |
2795 | const oid *restrict grps; |
2796 | oid prev; |
2797 | BUN p, q, r; |
2798 | |
2799 | if (BATtdense(g)) { |
2800 | /* singleton groups, so calculating quantile is |
2801 | * easy */ |
2802 | if (average) |
2803 | bn = BATconvert(b, NULL, TYPE_dbl, abort_on_error); |
2804 | else |
2805 | bn = COLcopy(b, tp, false, TRANSIENT); |
2806 | BAThseqbase(bn, g->tseqbase); /* deals with NULL */ |
2807 | if (freeb) |
2808 | BBPunfix(b->batCacheid); |
2809 | if (freeg) |
2810 | BBPunfix(g->batCacheid); |
2811 | return bn; |
2812 | } |
2813 | if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false, false) != GDK_SUCCEED) |
2814 | goto bunins_failed; |
2815 | if (freeg) |
2816 | BBPunfix(g->batCacheid); |
2817 | g = t1; |
2818 | freeg = true; |
2819 | |
2820 | if (BATsort(&t1, NULL, NULL, b, t2, g, false, false, false) != GDK_SUCCEED) { |
2821 | BBPunfix(t2->batCacheid); |
2822 | goto bunins_failed; |
2823 | } |
2824 | if (freeb) |
2825 | BBPunfix(b->batCacheid); |
2826 | b = t1; |
2827 | freeb = true; |
2828 | BBPunfix(t2->batCacheid); |
2829 | |
2830 | if (average) |
2831 | bn = COLnew(min, TYPE_dbl, ngrp, TRANSIENT); |
2832 | else |
2833 | bn = COLnew(min, tp, ngrp, TRANSIENT); |
2834 | if (bn == NULL) |
2835 | goto bunins_failed; |
2836 | |
2837 | bi = bat_iterator(b); |
2838 | |
2839 | grps = (const oid *) Tloc(g, 0); |
2840 | /* for each group (r and p are the beginning and end |
2841 | * of the current group, respectively) */ |
2842 | for (r = 0, q = BATcount(g); r < q; r = p) { |
2843 | BUN qindex; |
2844 | prev = grps[r]; |
2845 | /* search for end of current group (grps is |
2846 | * sorted so we can use binary search) */ |
2847 | p = binsearch_oid(NULL, 0, grps, r, q - 1, prev, 1, 1); |
2848 | if (skip_nils && !b->tnonil) { |
2849 | /* within group, locate start of non-nils */ |
2850 | r = binsearch(NULL, 0, tp, Tloc(b, 0), |
2851 | b->tvheap ? b->tvheap->base : NULL, |
2852 | b->twidth, r, p, nil, |
2853 | 1, 1); |
2854 | } |
2855 | if (r == p) { |
2856 | /* no non-nils found */ |
2857 | v = dnil; |
2858 | nils++; |
2859 | } else if (average) { |
2860 | double f = (p - r - 1) * quantile; |
2861 | double lo = floor(f); |
2862 | double hi = ceil(f); |
2863 | switch (ATOMbasetype(tp)) { |
2864 | case TYPE_bte: |
2865 | val = (f - lo) * *(bte*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(bte*)BUNtail(bi, r + (BUN) lo); |
2866 | break; |
2867 | case TYPE_sht: |
2868 | val = (f - lo) * *(sht*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(sht*)BUNtail(bi, r + (BUN) lo); |
2869 | break; |
2870 | case TYPE_int: |
2871 | val = (f - lo) * *(int*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(int*)BUNtail(bi, r + (BUN) lo); |
2872 | break; |
2873 | case TYPE_lng: |
2874 | val = (f - lo) * *(lng*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(lng*)BUNtail(bi, r + (BUN) lo); |
2875 | break; |
2876 | #ifdef HAVE_HGE |
2877 | case TYPE_hge: |
2878 | val = (f - lo) * *(hge*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(hge*)BUNtail(bi, r + (BUN) lo); |
2879 | break; |
2880 | #endif |
2881 | case TYPE_flt: |
2882 | val = (f - lo) * *(flt*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(flt*)BUNtail(bi, r + (BUN) lo); |
2883 | break; |
2884 | case TYPE_dbl: |
2885 | val = (f - lo) * *(dbl*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(dbl*)BUNtail(bi, r + (BUN) lo); |
2886 | break; |
2887 | } |
2888 | v = &val; |
2889 | } else { |
2890 | /* round *down* to nearest integer */ |
2891 | double f = (p - r - 1) * quantile; |
2892 | qindex = r + p - (BUN) (p + 0.5 - f); |
2893 | /* be a little paranoid about the index */ |
2894 | assert(qindex >= r && qindex < p); |
2895 | v = BUNtail(bi, qindex); |
2896 | if (!skip_nils && !b->tnonil) |
2897 | nils += (*atomcmp)(v, dnil) == 0; |
2898 | } |
2899 | bunfastapp_nocheck(bn, BUNlast(bn), v, Tsize(bn)); |
2900 | } |
2901 | nils += ngrp - BATcount(bn); |
2902 | while (BATcount(bn) < ngrp) { |
2903 | bunfastapp_nocheck(bn, BUNlast(bn), dnil, Tsize(bn)); |
2904 | } |
2905 | bn->theap.dirty = true; |
2906 | BBPunfix(g->batCacheid); |
2907 | } else { |
2908 | BUN index, r, p = BATcount(b); |
2909 | BAT *pb = NULL; |
2910 | const oid *ords; |
2911 | |
2912 | bn = COLnew(0, average ? TYPE_dbl : tp, 1, TRANSIENT); |
2913 | if (bn == NULL) |
2914 | goto bunins_failed; |
2915 | |
2916 | t1 = NULL; |
2917 | |
2918 | if (BATcheckorderidx(b) || |
2919 | (VIEWtparent(b) && |
2920 | (pb = BBPdescriptor(VIEWtparent(b))) != NULL && |
2921 | pb->theap.base == b->theap.base && |
2922 | BATcount(pb) == BATcount(b) && |
2923 | pb->hseqbase == b->hseqbase && |
2924 | BATcheckorderidx(pb))) { |
2925 | ords = (const oid *) (pb ? pb->torderidx->base : b->torderidx->base) + ORDERIDXOFF; |
2926 | } else { |
2927 | if (BATsort(NULL, &t1, NULL, b, NULL, g, false, false, false) != GDK_SUCCEED) |
2928 | goto bunins_failed; |
2929 | if (BATtdense(t1)) |
2930 | ords = NULL; |
2931 | else |
2932 | ords = (const oid *) Tloc(t1, 0); |
2933 | } |
2934 | |
2935 | if (skip_nils && !b->tnonil) |
2936 | r = binsearch(ords, 0, tp, Tloc(b, 0), |
2937 | b->tvheap ? b->tvheap->base : NULL, |
2938 | b->twidth, 0, p, |
2939 | nil, 1, 1); |
2940 | else |
2941 | r = 0; |
2942 | |
2943 | bi = bat_iterator(b); |
2944 | if (r == p) { |
2945 | /* no non-nil values, so quantile is nil */ |
2946 | v = dnil; |
2947 | nils++; |
2948 | } else if (average) { |
2949 | double f = (p - r - 1) * quantile; |
2950 | double lo = floor(f); |
2951 | double hi = ceil(f); |
2952 | switch (ATOMbasetype(tp)) { |
2953 | case TYPE_bte: |
2954 | val = (f - lo) * *(bte*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(bte*)BUNtail(bi, r + (BUN) lo); |
2955 | break; |
2956 | case TYPE_sht: |
2957 | val = (f - lo) * *(sht*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(sht*)BUNtail(bi, r + (BUN) lo); |
2958 | break; |
2959 | case TYPE_int: |
2960 | val = (f - lo) * *(int*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(int*)BUNtail(bi, r + (BUN) lo); |
2961 | break; |
2962 | case TYPE_lng: |
2963 | val = (f - lo) * *(lng*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(lng*)BUNtail(bi, r + (BUN) lo); |
2964 | break; |
2965 | #ifdef HAVE_HGE |
2966 | case TYPE_hge: |
2967 | val = (f - lo) * *(hge*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(hge*)BUNtail(bi, r + (BUN) lo); |
2968 | break; |
2969 | #endif |
2970 | case TYPE_flt: |
2971 | val = (f - lo) * *(flt*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(flt*)BUNtail(bi, r + (BUN) lo); |
2972 | break; |
2973 | case TYPE_dbl: |
2974 | val = (f - lo) * *(dbl*)BUNtail(bi, r + (BUN) hi) + (lo + 1 - f) * *(dbl*)BUNtail(bi, r + (BUN) lo); |
2975 | break; |
2976 | } |
2977 | v = &val; |
2978 | } else { |
2979 | double f; |
2980 | /* round (p-r-1)*quantile *down* to nearest |
2981 | * integer (i.e., 1.49 and 1.5 are rounded to |
2982 | * 1, 1.51 is rounded to 2) */ |
2983 | f = (p - r - 1) * quantile; |
2984 | index = r + p - (BUN) (p + 0.5 - f); |
2985 | if (ords) |
2986 | index = ords[index] - b->hseqbase; |
2987 | else |
2988 | index = index + t1->tseqbase; |
2989 | v = BUNtail(bi, index); |
2990 | nils += (*atomcmp)(v, dnil) == 0; |
2991 | } |
2992 | if (t1) |
2993 | BBPunfix(t1->batCacheid); |
2994 | if (BUNappend(bn, v, false) != GDK_SUCCEED) |
2995 | goto bunins_failed; |
2996 | } |
2997 | |
2998 | if (freeb) |
2999 | BBPunfix(b->batCacheid); |
3000 | |
3001 | bn->tkey = BATcount(bn) <= 1; |
3002 | bn->tsorted = BATcount(bn) <= 1; |
3003 | bn->trevsorted = BATcount(bn) <= 1; |
3004 | bn->tnil = nils != 0; |
3005 | bn->tnonil = nils == 0; |
3006 | return bn; |
3007 | |
3008 | bunins_failed: |
3009 | if (freeb) |
3010 | BBPunfix(b->batCacheid); |
3011 | if (freeg) |
3012 | BBPunfix(g->batCacheid); |
3013 | if (bn) |
3014 | BBPunfix(bn->batCacheid); |
3015 | return NULL; |
3016 | } |
3017 | |
3018 | BAT * |
3019 | BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3020 | bool skip_nils, bool abort_on_error) |
3021 | { |
3022 | return doBATgroupquantile(b, g, e, s, tp, 0.5, |
3023 | skip_nils, abort_on_error, false); |
3024 | } |
3025 | |
3026 | BAT * |
3027 | BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile, |
3028 | bool skip_nils, bool abort_on_error) |
3029 | { |
3030 | return doBATgroupquantile(b, g, e, s, tp, quantile, |
3031 | skip_nils, abort_on_error, false); |
3032 | } |
3033 | |
3034 | BAT * |
3035 | BATgroupmedian_avg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3036 | bool skip_nils, bool abort_on_error) |
3037 | { |
3038 | return doBATgroupquantile(b, g, e, s, tp, 0.5, |
3039 | skip_nils, abort_on_error, true); |
3040 | } |
3041 | |
3042 | BAT * |
3043 | BATgroupquantile_avg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile, |
3044 | bool skip_nils, bool abort_on_error) |
3045 | { |
3046 | return doBATgroupquantile(b, g, e, s, tp, quantile, |
3047 | skip_nils, abort_on_error, true); |
3048 | } |
3049 | |
3050 | /* ---------------------------------------------------------------------- */ |
3051 | /* standard deviation (both biased and non-biased) */ |
3052 | |
3053 | #define AGGR_STDEV_SINGLE(TYPE) \ |
3054 | do { \ |
3055 | TYPE x; \ |
3056 | for (i = 0; i < cnt; i++) { \ |
3057 | x = ((const TYPE *) values)[i]; \ |
3058 | if (is_##TYPE##_nil(x)) \ |
3059 | continue; \ |
3060 | n++; \ |
3061 | delta = (dbl) x - mean; \ |
3062 | mean += delta / n; \ |
3063 | m2 += delta * ((dbl) x - mean); \ |
3064 | } \ |
3065 | } while (0) |
3066 | |
3067 | static dbl |
3068 | calcvariance(dbl *restrict avgp, const void *restrict values, BUN cnt, int tp, bool issample, const char *func) |
3069 | { |
3070 | BUN n = 0, i; |
3071 | dbl mean = 0; |
3072 | dbl m2 = 0; |
3073 | dbl delta; |
3074 | |
3075 | switch (tp) { |
3076 | case TYPE_bte: |
3077 | AGGR_STDEV_SINGLE(bte); |
3078 | break; |
3079 | case TYPE_sht: |
3080 | AGGR_STDEV_SINGLE(sht); |
3081 | break; |
3082 | case TYPE_int: |
3083 | AGGR_STDEV_SINGLE(int); |
3084 | break; |
3085 | case TYPE_lng: |
3086 | AGGR_STDEV_SINGLE(lng); |
3087 | break; |
3088 | #ifdef HAVE_HGE |
3089 | case TYPE_hge: |
3090 | AGGR_STDEV_SINGLE(hge); |
3091 | break; |
3092 | #endif |
3093 | case TYPE_flt: |
3094 | AGGR_STDEV_SINGLE(flt); |
3095 | break; |
3096 | case TYPE_dbl: |
3097 | AGGR_STDEV_SINGLE(dbl); |
3098 | break; |
3099 | default: |
3100 | GDKerror("%s: type (%s) not supported.\n" , |
3101 | func, ATOMname(tp)); |
3102 | return dbl_nil; |
3103 | } |
3104 | if (n <= (BUN) issample) { |
3105 | if (avgp) |
3106 | *avgp = dbl_nil; |
3107 | return dbl_nil; |
3108 | } |
3109 | if (avgp) |
3110 | *avgp = mean; |
3111 | return m2 / (n - issample); |
3112 | } |
3113 | |
3114 | dbl |
3115 | BATcalcstdev_population(dbl *avgp, BAT *b) |
3116 | { |
3117 | dbl v = calcvariance(avgp, (const void *) Tloc(b, 0), |
3118 | BATcount(b), b->ttype, false, |
3119 | "BATcalcstdev_population" ); |
3120 | return is_dbl_nil(v) ? dbl_nil : sqrt(v); |
3121 | } |
3122 | |
3123 | dbl |
3124 | BATcalcstdev_sample(dbl *avgp, BAT *b) |
3125 | { |
3126 | dbl v = calcvariance(avgp, (const void *) Tloc(b, 0), |
3127 | BATcount(b), b->ttype, true, |
3128 | "BATcalcstdev_sample" ); |
3129 | return is_dbl_nil(v) ? dbl_nil : sqrt(v); |
3130 | } |
3131 | |
3132 | dbl |
3133 | BATcalcvariance_population(dbl *avgp, BAT *b) |
3134 | { |
3135 | return calcvariance(avgp, (const void *) Tloc(b, 0), |
3136 | BATcount(b), b->ttype, false, |
3137 | "BATcalcvariance_population" ); |
3138 | } |
3139 | |
3140 | dbl |
3141 | BATcalcvariance_sample(dbl *avgp, BAT *b) |
3142 | { |
3143 | return calcvariance(avgp, (const void *) Tloc(b, 0), |
3144 | BATcount(b), b->ttype, true, |
3145 | "BATcalcvariance_sample" ); |
3146 | } |
3147 | |
3148 | #define AGGR_STDEV(TYPE) \ |
3149 | do { \ |
3150 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
3151 | while (ncand > 0) { \ |
3152 | ncand--; \ |
3153 | i = canditer_next(&ci) - b->hseqbase; \ |
3154 | if (gids == NULL || \ |
3155 | (gids[i] >= min && gids[i] <= max)) { \ |
3156 | if (gids) \ |
3157 | gid = gids[i] - min; \ |
3158 | else \ |
3159 | gid = (oid) i; \ |
3160 | if (is_##TYPE##_nil(vals[i])) { \ |
3161 | if (!skip_nils) \ |
3162 | cnts[gid] = BUN_NONE; \ |
3163 | } else if (cnts[gid] != BUN_NONE) { \ |
3164 | cnts[gid]++; \ |
3165 | delta[gid] = (dbl) vals[i] - mean[gid]; \ |
3166 | mean[gid] += delta[gid] / cnts[gid]; \ |
3167 | m2[gid] += delta[gid] * ((dbl) vals[i] - mean[gid]); \ |
3168 | } \ |
3169 | } \ |
3170 | } \ |
3171 | for (i = 0; i < ngrp; i++) { \ |
3172 | if (cnts[i] == 0 || cnts[i] == BUN_NONE) { \ |
3173 | dbls[i] = dbl_nil; \ |
3174 | mean[i] = dbl_nil; \ |
3175 | nils++; \ |
3176 | } else if (cnts[i] == 1) { \ |
3177 | dbls[i] = issample ? dbl_nil : 0; \ |
3178 | nils2++; \ |
3179 | } else if (variance) { \ |
3180 | dbls[i] = m2[i] / (cnts[i] - issample); \ |
3181 | } else { \ |
3182 | dbls[i] = sqrt(m2[i] / (cnts[i] - issample)); \ |
3183 | } \ |
3184 | } \ |
3185 | } while (0) |
3186 | |
3187 | /* Calculate group standard deviation (population (i.e. biased) or |
3188 | * sample (i.e. non-biased)) with optional candidates list. |
3189 | * |
3190 | * Note that this helper function is prepared to return two BATs: one |
3191 | * (as return value) with the standard deviation per group, and one |
3192 | * (as return argument) with the average per group. This isn't |
3193 | * currently used since it doesn't fit into the mold of grouped |
3194 | * aggregates. */ |
3195 | static BAT * |
3196 | dogroupstdev(BAT **avgb, BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3197 | bool skip_nils, bool issample, bool variance, const char *func) |
3198 | { |
3199 | const oid *restrict gids; |
3200 | oid gid; |
3201 | oid min, max; |
3202 | BUN i, ngrp; |
3203 | BUN nils = 0, nils2 = 0; |
3204 | BUN *restrict cnts = NULL; |
3205 | dbl *restrict dbls, *restrict mean, *restrict delta, *restrict m2; |
3206 | BAT *bn = NULL; |
3207 | struct canditer ci; |
3208 | BUN ncand; |
3209 | const char *err; |
3210 | |
3211 | assert(tp == TYPE_dbl); |
3212 | (void) tp; /* compatibility (with other BATgroup* |
3213 | * functions) argument */ |
3214 | |
3215 | if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &ci, &ncand)) != NULL) { |
3216 | GDKerror("%s: %s\n" , func, err); |
3217 | return NULL; |
3218 | } |
3219 | if (g == NULL) { |
3220 | GDKerror("%s: b and g must be aligned\n" , func); |
3221 | return NULL; |
3222 | } |
3223 | |
3224 | if (BATcount(b) == 0 || ngrp == 0) { |
3225 | /* trivial: no products, so return bat aligned with g |
3226 | * with nil in the tail */ |
3227 | return BATconstant(ngrp == 0 ? 0 : min, TYPE_dbl, &dbl_nil, ngrp, TRANSIENT); |
3228 | } |
3229 | |
3230 | if ((e == NULL || |
3231 | (BATcount(e) == BATcount(b) && e->hseqbase == b->hseqbase)) && |
3232 | (BATtdense(g) || (g->tkey && g->tnonil)) && |
3233 | (issample || b->tnonil)) { |
3234 | /* trivial: singleton groups, so all results are equal |
3235 | * to zero (population) or nil (sample) */ |
3236 | dbl v = issample ? dbl_nil : 0; |
3237 | return BATconstant(ngrp == 0 ? 0 : min, TYPE_dbl, &v, ngrp, TRANSIENT); |
3238 | } |
3239 | |
3240 | delta = GDKmalloc(ngrp * sizeof(dbl)); |
3241 | m2 = GDKmalloc(ngrp * sizeof(dbl)); |
3242 | cnts = GDKzalloc(ngrp * sizeof(BUN)); |
3243 | if (avgb) { |
3244 | if ((*avgb = COLnew(0, TYPE_dbl, ngrp, TRANSIENT)) == NULL) { |
3245 | mean = NULL; |
3246 | goto alloc_fail; |
3247 | } |
3248 | mean = (dbl *) Tloc(*avgb, 0); |
3249 | } else { |
3250 | mean = GDKmalloc(ngrp * sizeof(dbl)); |
3251 | } |
3252 | if (mean == NULL || delta == NULL || m2 == NULL || cnts == NULL) |
3253 | goto alloc_fail; |
3254 | |
3255 | bn = COLnew(min, TYPE_dbl, ngrp, TRANSIENT); |
3256 | if (bn == NULL) |
3257 | goto alloc_fail; |
3258 | dbls = (dbl *) Tloc(bn, 0); |
3259 | |
3260 | for (i = 0; i < ngrp; i++) { |
3261 | mean[i] = 0; |
3262 | delta[i] = 0; |
3263 | m2[i] = 0; |
3264 | } |
3265 | |
3266 | if (BATtdense(g)) |
3267 | gids = NULL; |
3268 | else |
3269 | gids = (const oid *) Tloc(g, 0); |
3270 | |
3271 | switch (b->ttype) { |
3272 | case TYPE_bte: |
3273 | AGGR_STDEV(bte); |
3274 | break; |
3275 | case TYPE_sht: |
3276 | AGGR_STDEV(sht); |
3277 | break; |
3278 | case TYPE_int: |
3279 | AGGR_STDEV(int); |
3280 | break; |
3281 | case TYPE_lng: |
3282 | AGGR_STDEV(lng); |
3283 | break; |
3284 | #ifdef HAVE_HGE |
3285 | case TYPE_hge: |
3286 | AGGR_STDEV(hge); |
3287 | break; |
3288 | #endif |
3289 | case TYPE_flt: |
3290 | AGGR_STDEV(flt); |
3291 | break; |
3292 | case TYPE_dbl: |
3293 | AGGR_STDEV(dbl); |
3294 | break; |
3295 | default: |
3296 | if (avgb) |
3297 | BBPreclaim(*avgb); |
3298 | else |
3299 | GDKfree(mean); |
3300 | GDKfree(delta); |
3301 | GDKfree(m2); |
3302 | GDKfree(cnts); |
3303 | BBPunfix(bn->batCacheid); |
3304 | GDKerror("%s: type (%s) not supported.\n" , |
3305 | func, ATOMname(b->ttype)); |
3306 | return NULL; |
3307 | } |
3308 | if (avgb) { |
3309 | BATsetcount(*avgb, ngrp); |
3310 | (*avgb)->tkey = ngrp <= 1; |
3311 | (*avgb)->tsorted = ngrp <= 1; |
3312 | (*avgb)->trevsorted = ngrp <= 1; |
3313 | (*avgb)->tnil = nils != 0; |
3314 | (*avgb)->tnonil = nils == 0; |
3315 | } else { |
3316 | GDKfree(mean); |
3317 | } |
3318 | if (issample) |
3319 | nils += nils2; |
3320 | GDKfree(delta); |
3321 | GDKfree(m2); |
3322 | GDKfree(cnts); |
3323 | BATsetcount(bn, ngrp); |
3324 | bn->tkey = ngrp <= 1; |
3325 | bn->tsorted = ngrp <= 1; |
3326 | bn->trevsorted = ngrp <= 1; |
3327 | bn->tnil = nils != 0; |
3328 | bn->tnonil = nils == 0; |
3329 | return bn; |
3330 | |
3331 | alloc_fail: |
3332 | if (avgb) |
3333 | BBPreclaim(*avgb); |
3334 | else |
3335 | GDKfree(mean); |
3336 | BBPreclaim(bn); |
3337 | GDKfree(delta); |
3338 | GDKfree(m2); |
3339 | GDKfree(cnts); |
3340 | GDKerror("%s: cannot allocate enough memory.\n" , func); |
3341 | return NULL; |
3342 | } |
3343 | |
3344 | BAT * |
3345 | BATgroupstdev_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3346 | bool skip_nils, bool abort_on_error) |
3347 | { |
3348 | (void) abort_on_error; |
3349 | return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, true, false, |
3350 | "BATgroupstdev_sample" ); |
3351 | } |
3352 | |
3353 | BAT * |
3354 | BATgroupstdev_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3355 | bool skip_nils, bool abort_on_error) |
3356 | { |
3357 | (void) abort_on_error; |
3358 | return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, false, false, |
3359 | "BATgroupstdev_population" ); |
3360 | } |
3361 | |
3362 | BAT * |
3363 | BATgroupvariance_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3364 | bool skip_nils, bool abort_on_error) |
3365 | { |
3366 | (void) abort_on_error; |
3367 | return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, true, true, |
3368 | "BATgroupvariance_sample" ); |
3369 | } |
3370 | |
3371 | BAT * |
3372 | BATgroupvariance_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, |
3373 | bool skip_nils, bool abort_on_error) |
3374 | { |
3375 | (void) abort_on_error; |
3376 | return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, false, true, |
3377 | "BATgroupvariance_population" ); |
3378 | } |
3379 | |