| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* This file contains shared definitions for gdk_calc.c and gdk_aggr.c */ |
| 10 | |
| 11 | #ifndef LIBGDK |
| 12 | #error this file should not be included outside its source directory |
| 13 | #endif |
| 14 | |
| 15 | /* signed version of BUN */ |
| 16 | #if SIZEOF_BUN == SIZEOF_INT |
| 17 | #define SBUN int |
| 18 | #else |
| 19 | #define SBUN lng |
| 20 | #endif |
| 21 | |
| 22 | #ifdef ABSOLUTE |
| 23 | /* Windows seems to define this somewhere */ |
| 24 | #undef ABSOLUTE |
| 25 | #endif |
| 26 | #define ABSOLUTE(x) ((x) < 0 ? -(x) : (x)) |
| 27 | |
| 28 | #define LT(a, b) ((bit) ((a) < (b))) |
| 29 | |
| 30 | #define GT(a, b) ((bit) ((a) > (b))) |
| 31 | |
| 32 | #include "gdk_cand.h" |
| 33 | |
| 34 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
| 35 | #define OP_WITH_CHECK(lft, rgt, dst, op, nil, max, on_overflow) \ |
| 36 | do { \ |
| 37 | if (__builtin_##op##_overflow(lft, rgt, &(dst)) || \ |
| 38 | (dst) < -(max) || (dst) > (max)) { \ |
| 39 | if (abort_on_error) \ |
| 40 | on_overflow; \ |
| 41 | (dst) = nil; \ |
| 42 | nils++; \ |
| 43 | } \ |
| 44 | } while (0) |
| 45 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
| 46 | |
| 47 | /* dst = lft + rgt with overflow check */ |
| 48 | |
| 49 | /* generic version */ |
| 50 | #define ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 51 | do { \ |
| 52 | if ((rgt) < 1) { \ |
| 53 | if (-(max) - (rgt) > (lft)) { \ |
| 54 | if (abort_on_error) \ |
| 55 | on_overflow; \ |
| 56 | (dst) = TYPE3##_nil; \ |
| 57 | nils++; \ |
| 58 | } else { \ |
| 59 | (dst) = (TYPE3) (lft) + (rgt); \ |
| 60 | } \ |
| 61 | } else { \ |
| 62 | if ((max) - (rgt) < (lft)) { \ |
| 63 | if (abort_on_error) \ |
| 64 | on_overflow; \ |
| 65 | (dst) = TYPE3##_nil; \ |
| 66 | nils++; \ |
| 67 | } else { \ |
| 68 | (dst) = (TYPE3) (lft) + (rgt); \ |
| 69 | } \ |
| 70 | } \ |
| 71 | } while (0) |
| 72 | |
| 73 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
| 74 | /* integer version using Gnu CC builtin function for overflow check */ |
| 75 | #define ADDI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 76 | OP_WITH_CHECK(lft, rgt, dst, add, TYPE3##_nil, max, on_overflow) |
| 77 | #else |
| 78 | /* integer version using generic version */ |
| 79 | #define ADDI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 80 | ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
| 81 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
| 82 | |
| 83 | /* floating point version using generic version */ |
| 84 | #define ADDF_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 85 | ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
| 86 | |
| 87 | /* dst = lft - rgt with overflow check */ |
| 88 | |
| 89 | /* generic version */ |
| 90 | #define SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 91 | do { \ |
| 92 | if ((rgt) < 1) { \ |
| 93 | if ((max) + (rgt) < (lft)) { \ |
| 94 | if (abort_on_error) \ |
| 95 | on_overflow; \ |
| 96 | (dst) = TYPE3##_nil; \ |
| 97 | nils++; \ |
| 98 | } else { \ |
| 99 | (dst) = (TYPE3) (lft) - (rgt); \ |
| 100 | } \ |
| 101 | } else { \ |
| 102 | if (-(max) + (rgt) > (lft)) { \ |
| 103 | if (abort_on_error) \ |
| 104 | on_overflow; \ |
| 105 | (dst) = TYPE3##_nil; \ |
| 106 | nils++; \ |
| 107 | } else { \ |
| 108 | (dst) = (TYPE3) (lft) - (rgt); \ |
| 109 | } \ |
| 110 | } \ |
| 111 | } while (0) |
| 112 | |
| 113 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
| 114 | /* integer version using Gnu CC builtin function for overflow check */ |
| 115 | #define SUBI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 116 | OP_WITH_CHECK(lft, rgt, dst, sub, TYPE3##_nil, max, on_overflow) |
| 117 | #else |
| 118 | /* integer version using generic version */ |
| 119 | #define SUBI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 120 | SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
| 121 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
| 122 | |
| 123 | /* floating point version using generic version */ |
| 124 | #define SUBF_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
| 125 | SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
| 126 | |
| 127 | /* dst = lft * rgt with overflow check */ |
| 128 | |
| 129 | /* generic version */ |
| 130 | #define MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
| 131 | do { \ |
| 132 | TYPE4 c = (TYPE4) (lft) * (rgt); \ |
| 133 | if (c < (TYPE4) -(max) || \ |
| 134 | c > (TYPE4) (max)) { \ |
| 135 | if (abort_on_error) \ |
| 136 | on_overflow; \ |
| 137 | (dst) = TYPE3##_nil; \ |
| 138 | nils++; \ |
| 139 | } else { \ |
| 140 | (dst) = (TYPE3) c; \ |
| 141 | } \ |
| 142 | } while (0) |
| 143 | |
| 144 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
| 145 | /* integer version using Gnu CC builtin function for overflow check */ |
| 146 | #define MULI4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
| 147 | OP_WITH_CHECK(lft, rgt, dst, mul, TYPE3##_nil, max, on_overflow) |
| 148 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 149 | OP_WITH_CHECK(lft, rgt, dst, mul, lng_nil, max, on_overflow) |
| 150 | #else |
| 151 | /* integer version using generic version */ |
| 152 | #define MULI4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
| 153 | MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) |
| 154 | #ifdef HAVE_HGE |
| 155 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 156 | MULI4_WITH_CHECK(lft, rgt, lng, dst, max, hge, on_overflow) |
| 157 | #else |
| 158 | #if defined(HAVE__MUL128) |
| 159 | #include <intrin.h> |
| 160 | #pragma intrinsic(_mul128) |
| 161 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 162 | do { \ |
| 163 | lng clo, chi; \ |
| 164 | clo = _mul128((lng) (lft), (lng) (rgt), &chi); \ |
| 165 | if ((chi == 0 && clo >= 0 && clo <= (max)) || \ |
| 166 | (chi == -1 && clo < 0 && clo >= -(max))) { \ |
| 167 | (dst) = clo; \ |
| 168 | } else { \ |
| 169 | if (abort_on_error) \ |
| 170 | on_overflow; \ |
| 171 | (dst) = lng_nil; \ |
| 172 | nils++; \ |
| 173 | } \ |
| 174 | } while (0) |
| 175 | #else |
| 176 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 177 | do { \ |
| 178 | lng a = (lft), b = (rgt); \ |
| 179 | unsigned int a1, a2, b1, b2; \ |
| 180 | ulng c; \ |
| 181 | int sign = 1; \ |
| 182 | \ |
| 183 | if (a < 0) { \ |
| 184 | sign = -sign; \ |
| 185 | a = -a; \ |
| 186 | } \ |
| 187 | if (b < 0) { \ |
| 188 | sign = -sign; \ |
| 189 | b = -b; \ |
| 190 | } \ |
| 191 | a1 = (unsigned int) (a >> 32); \ |
| 192 | a2 = (unsigned int) a; \ |
| 193 | b1 = (unsigned int) (b >> 32); \ |
| 194 | b2 = (unsigned int) b; \ |
| 195 | /* result = (a1*b1<<64) + (a1*b2+a2*b1<<32) + a2*b2 */ \ |
| 196 | if ((a1 == 0 || b1 == 0) && \ |
| 197 | ((c = (ulng) a1 * b2 + (ulng) a2 * b1) & (~(ulng)0 << 31)) == 0 && \ |
| 198 | (((c = (c << 32) + (ulng) a2 * b2) & ((ulng) 1 << 63)) == 0 && \ |
| 199 | (c) <= (ulng) (max))) { \ |
| 200 | (dst) = sign * (lng) c; \ |
| 201 | } else { \ |
| 202 | if (abort_on_error) \ |
| 203 | on_overflow; \ |
| 204 | (dst) = lng_nil; \ |
| 205 | nils++; \ |
| 206 | } \ |
| 207 | } while (0) |
| 208 | #endif |
| 209 | #endif /* HAVE_HGE */ |
| 210 | #endif |
| 211 | #define MULF4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
| 212 | MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) |
| 213 | |
| 214 | #ifdef HAVE_HGE |
| 215 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
| 216 | #define HGEMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 217 | OP_WITH_CHECK(lft, rgt, dst, mul, hge_nil, max, on_overflow) |
| 218 | #else |
| 219 | #define HGEMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
| 220 | do { \ |
| 221 | hge a = (lft), b = (rgt); \ |
| 222 | ulng a1, a2, b1, b2; \ |
| 223 | uhge c; \ |
| 224 | int sign = 1; \ |
| 225 | \ |
| 226 | if (a < 0) { \ |
| 227 | sign = -sign; \ |
| 228 | a = -a; \ |
| 229 | } \ |
| 230 | if (b < 0) { \ |
| 231 | sign = -sign; \ |
| 232 | b = -b; \ |
| 233 | } \ |
| 234 | a1 = (ulng) (a >> 64); \ |
| 235 | a2 = (ulng) a; \ |
| 236 | b1 = (ulng) (b >> 64); \ |
| 237 | b2 = (ulng) b; \ |
| 238 | /* result = (a1*b1<<128) + ((a1*b2+a2*b1)<<64) + a2*b2 */ \ |
| 239 | if ((a1 == 0 || b1 == 0) && \ |
| 240 | ((c = (uhge) a1 * b2 + (uhge) a2 * b1) & (~(uhge)0 << 63)) == 0 && \ |
| 241 | (((c = (c << 64) + (uhge) a2 * b2) & ((uhge) 1 << 127)) == 0) && \ |
| 242 | (c) <= (uhge) (max)) { \ |
| 243 | (dst) = sign * (hge) c; \ |
| 244 | } else { \ |
| 245 | if (abort_on_error) \ |
| 246 | on_overflow; \ |
| 247 | (dst) = hge_nil; \ |
| 248 | nils++; \ |
| 249 | } \ |
| 250 | } while (0) |
| 251 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
| 252 | #endif /* HAVE_HGE */ |
| 253 | |
| 254 | #define AVERAGE_ITER(TYPE, x, a, r, n) \ |
| 255 | do { \ |
| 256 | TYPE an, xn, z1; \ |
| 257 | BUN z2; \ |
| 258 | (n)++; \ |
| 259 | /* calculate z1 = (x - a) / n, rounded down (towards */ \ |
| 260 | /* negative infinity), and calculate z2 = remainder */ \ |
| 261 | /* of the division (i.e. 0 <= z2 < n); do this */ \ |
| 262 | /* without causing overflow */ \ |
| 263 | an = (TYPE) ((a) / (SBUN) (n)); \ |
| 264 | xn = (TYPE) ((x) / (SBUN) (n)); \ |
| 265 | /* z1 will be (x - a) / n rounded towards -INF */ \ |
| 266 | z1 = xn - an; \ |
| 267 | xn = (x) - (TYPE) (xn * (SBUN) (n)); \ |
| 268 | an = (a) - (TYPE) (an * (SBUN) (n)); \ |
| 269 | /* z2 will be remainder of above division */ \ |
| 270 | if (xn >= an) { \ |
| 271 | z2 = (BUN) (xn - an); \ |
| 272 | /* loop invariant: */ \ |
| 273 | /* (x - a) - z1 * n == z2 */ \ |
| 274 | while (z2 >= (BUN) (n)) { \ |
| 275 | z2 -= (BUN) (n); \ |
| 276 | z1++; \ |
| 277 | } \ |
| 278 | } else { \ |
| 279 | z2 = (BUN) (an - xn); \ |
| 280 | /* loop invariant (until we break): */ \ |
| 281 | /* (x - a) - z1 * n == -z2 */ \ |
| 282 | for (;;) { \ |
| 283 | z1--; \ |
| 284 | if (z2 < (BUN) (n)) { \ |
| 285 | /* proper remainder */ \ |
| 286 | z2 = (BUN) ((n) - z2); \ |
| 287 | break; \ |
| 288 | } \ |
| 289 | z2 -= (BUN) (n); \ |
| 290 | } \ |
| 291 | } \ |
| 292 | (a) += z1; \ |
| 293 | (r) += z2; \ |
| 294 | if ((r) >= (BUN) (n)) { \ |
| 295 | (r) -= (BUN) (n); \ |
| 296 | (a)++; \ |
| 297 | } \ |
| 298 | } while (0) |
| 299 | |
| 300 | #define AVERAGE_ITER_FLOAT(TYPE, x, a, n) \ |
| 301 | do { \ |
| 302 | (n)++; \ |
| 303 | if (((a) > 0) == ((x) > 0)) { \ |
| 304 | /* same sign */ \ |
| 305 | (a) += ((x) - (a)) / (SBUN) (n); \ |
| 306 | } else { \ |
| 307 | /* no overflow at the cost of an */ \ |
| 308 | /* extra division and slight loss of */ \ |
| 309 | /* precision */ \ |
| 310 | (a) = (a) - (a) / (SBUN) (n) + (x) / (SBUN) (n); \ |
| 311 | } \ |
| 312 | } while (0) |
| 313 | |
| 314 | __hidden BUN dofsum(const void *restrict values, oid seqb, |
| 315 | struct canditer *restrict ci, BUN ncand, |
| 316 | void *restrict results, BUN ngrp, int tp1, int tp2, |
| 317 | const oid *restrict gids, |
| 318 | oid min, oid max, bool skip_nils, bool abort_on_error, |
| 319 | bool nil_if_empty) |
| 320 | __attribute__((__visibility__("hidden" ))); |
| 321 | |