1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* This file contains shared definitions for gdk_calc.c and gdk_aggr.c */ |
10 | |
11 | #ifndef LIBGDK |
12 | #error this file should not be included outside its source directory |
13 | #endif |
14 | |
15 | /* signed version of BUN */ |
16 | #if SIZEOF_BUN == SIZEOF_INT |
17 | #define SBUN int |
18 | #else |
19 | #define SBUN lng |
20 | #endif |
21 | |
22 | #ifdef ABSOLUTE |
23 | /* Windows seems to define this somewhere */ |
24 | #undef ABSOLUTE |
25 | #endif |
26 | #define ABSOLUTE(x) ((x) < 0 ? -(x) : (x)) |
27 | |
28 | #define LT(a, b) ((bit) ((a) < (b))) |
29 | |
30 | #define GT(a, b) ((bit) ((a) > (b))) |
31 | |
32 | #include "gdk_cand.h" |
33 | |
34 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
35 | #define OP_WITH_CHECK(lft, rgt, dst, op, nil, max, on_overflow) \ |
36 | do { \ |
37 | if (__builtin_##op##_overflow(lft, rgt, &(dst)) || \ |
38 | (dst) < -(max) || (dst) > (max)) { \ |
39 | if (abort_on_error) \ |
40 | on_overflow; \ |
41 | (dst) = nil; \ |
42 | nils++; \ |
43 | } \ |
44 | } while (0) |
45 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
46 | |
47 | /* dst = lft + rgt with overflow check */ |
48 | |
49 | /* generic version */ |
50 | #define ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
51 | do { \ |
52 | if ((rgt) < 1) { \ |
53 | if (-(max) - (rgt) > (lft)) { \ |
54 | if (abort_on_error) \ |
55 | on_overflow; \ |
56 | (dst) = TYPE3##_nil; \ |
57 | nils++; \ |
58 | } else { \ |
59 | (dst) = (TYPE3) (lft) + (rgt); \ |
60 | } \ |
61 | } else { \ |
62 | if ((max) - (rgt) < (lft)) { \ |
63 | if (abort_on_error) \ |
64 | on_overflow; \ |
65 | (dst) = TYPE3##_nil; \ |
66 | nils++; \ |
67 | } else { \ |
68 | (dst) = (TYPE3) (lft) + (rgt); \ |
69 | } \ |
70 | } \ |
71 | } while (0) |
72 | |
73 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
74 | /* integer version using Gnu CC builtin function for overflow check */ |
75 | #define ADDI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
76 | OP_WITH_CHECK(lft, rgt, dst, add, TYPE3##_nil, max, on_overflow) |
77 | #else |
78 | /* integer version using generic version */ |
79 | #define ADDI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
80 | ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
81 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
82 | |
83 | /* floating point version using generic version */ |
84 | #define ADDF_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
85 | ADD_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
86 | |
87 | /* dst = lft - rgt with overflow check */ |
88 | |
89 | /* generic version */ |
90 | #define SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
91 | do { \ |
92 | if ((rgt) < 1) { \ |
93 | if ((max) + (rgt) < (lft)) { \ |
94 | if (abort_on_error) \ |
95 | on_overflow; \ |
96 | (dst) = TYPE3##_nil; \ |
97 | nils++; \ |
98 | } else { \ |
99 | (dst) = (TYPE3) (lft) - (rgt); \ |
100 | } \ |
101 | } else { \ |
102 | if (-(max) + (rgt) > (lft)) { \ |
103 | if (abort_on_error) \ |
104 | on_overflow; \ |
105 | (dst) = TYPE3##_nil; \ |
106 | nils++; \ |
107 | } else { \ |
108 | (dst) = (TYPE3) (lft) - (rgt); \ |
109 | } \ |
110 | } \ |
111 | } while (0) |
112 | |
113 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
114 | /* integer version using Gnu CC builtin function for overflow check */ |
115 | #define SUBI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
116 | OP_WITH_CHECK(lft, rgt, dst, sub, TYPE3##_nil, max, on_overflow) |
117 | #else |
118 | /* integer version using generic version */ |
119 | #define SUBI_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
120 | SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
121 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
122 | |
123 | /* floating point version using generic version */ |
124 | #define SUBF_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) \ |
125 | SUB_WITH_CHECK(lft, rgt, TYPE3, dst, max, on_overflow) |
126 | |
127 | /* dst = lft * rgt with overflow check */ |
128 | |
129 | /* generic version */ |
130 | #define MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
131 | do { \ |
132 | TYPE4 c = (TYPE4) (lft) * (rgt); \ |
133 | if (c < (TYPE4) -(max) || \ |
134 | c > (TYPE4) (max)) { \ |
135 | if (abort_on_error) \ |
136 | on_overflow; \ |
137 | (dst) = TYPE3##_nil; \ |
138 | nils++; \ |
139 | } else { \ |
140 | (dst) = (TYPE3) c; \ |
141 | } \ |
142 | } while (0) |
143 | |
144 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
145 | /* integer version using Gnu CC builtin function for overflow check */ |
146 | #define MULI4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
147 | OP_WITH_CHECK(lft, rgt, dst, mul, TYPE3##_nil, max, on_overflow) |
148 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
149 | OP_WITH_CHECK(lft, rgt, dst, mul, lng_nil, max, on_overflow) |
150 | #else |
151 | /* integer version using generic version */ |
152 | #define MULI4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
153 | MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) |
154 | #ifdef HAVE_HGE |
155 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
156 | MULI4_WITH_CHECK(lft, rgt, lng, dst, max, hge, on_overflow) |
157 | #else |
158 | #if defined(HAVE__MUL128) |
159 | #include <intrin.h> |
160 | #pragma intrinsic(_mul128) |
161 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
162 | do { \ |
163 | lng clo, chi; \ |
164 | clo = _mul128((lng) (lft), (lng) (rgt), &chi); \ |
165 | if ((chi == 0 && clo >= 0 && clo <= (max)) || \ |
166 | (chi == -1 && clo < 0 && clo >= -(max))) { \ |
167 | (dst) = clo; \ |
168 | } else { \ |
169 | if (abort_on_error) \ |
170 | on_overflow; \ |
171 | (dst) = lng_nil; \ |
172 | nils++; \ |
173 | } \ |
174 | } while (0) |
175 | #else |
176 | #define LNGMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
177 | do { \ |
178 | lng a = (lft), b = (rgt); \ |
179 | unsigned int a1, a2, b1, b2; \ |
180 | ulng c; \ |
181 | int sign = 1; \ |
182 | \ |
183 | if (a < 0) { \ |
184 | sign = -sign; \ |
185 | a = -a; \ |
186 | } \ |
187 | if (b < 0) { \ |
188 | sign = -sign; \ |
189 | b = -b; \ |
190 | } \ |
191 | a1 = (unsigned int) (a >> 32); \ |
192 | a2 = (unsigned int) a; \ |
193 | b1 = (unsigned int) (b >> 32); \ |
194 | b2 = (unsigned int) b; \ |
195 | /* result = (a1*b1<<64) + (a1*b2+a2*b1<<32) + a2*b2 */ \ |
196 | if ((a1 == 0 || b1 == 0) && \ |
197 | ((c = (ulng) a1 * b2 + (ulng) a2 * b1) & (~(ulng)0 << 31)) == 0 && \ |
198 | (((c = (c << 32) + (ulng) a2 * b2) & ((ulng) 1 << 63)) == 0 && \ |
199 | (c) <= (ulng) (max))) { \ |
200 | (dst) = sign * (lng) c; \ |
201 | } else { \ |
202 | if (abort_on_error) \ |
203 | on_overflow; \ |
204 | (dst) = lng_nil; \ |
205 | nils++; \ |
206 | } \ |
207 | } while (0) |
208 | #endif |
209 | #endif /* HAVE_HGE */ |
210 | #endif |
211 | #define MULF4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) \ |
212 | MUL4_WITH_CHECK(lft, rgt, TYPE3, dst, max, TYPE4, on_overflow) |
213 | |
214 | #ifdef HAVE_HGE |
215 | #ifdef HAVE___BUILTIN_ADD_OVERFLOW |
216 | #define HGEMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
217 | OP_WITH_CHECK(lft, rgt, dst, mul, hge_nil, max, on_overflow) |
218 | #else |
219 | #define HGEMUL_CHECK(lft, rgt, dst, max, on_overflow) \ |
220 | do { \ |
221 | hge a = (lft), b = (rgt); \ |
222 | ulng a1, a2, b1, b2; \ |
223 | uhge c; \ |
224 | int sign = 1; \ |
225 | \ |
226 | if (a < 0) { \ |
227 | sign = -sign; \ |
228 | a = -a; \ |
229 | } \ |
230 | if (b < 0) { \ |
231 | sign = -sign; \ |
232 | b = -b; \ |
233 | } \ |
234 | a1 = (ulng) (a >> 64); \ |
235 | a2 = (ulng) a; \ |
236 | b1 = (ulng) (b >> 64); \ |
237 | b2 = (ulng) b; \ |
238 | /* result = (a1*b1<<128) + ((a1*b2+a2*b1)<<64) + a2*b2 */ \ |
239 | if ((a1 == 0 || b1 == 0) && \ |
240 | ((c = (uhge) a1 * b2 + (uhge) a2 * b1) & (~(uhge)0 << 63)) == 0 && \ |
241 | (((c = (c << 64) + (uhge) a2 * b2) & ((uhge) 1 << 127)) == 0) && \ |
242 | (c) <= (uhge) (max)) { \ |
243 | (dst) = sign * (hge) c; \ |
244 | } else { \ |
245 | if (abort_on_error) \ |
246 | on_overflow; \ |
247 | (dst) = hge_nil; \ |
248 | nils++; \ |
249 | } \ |
250 | } while (0) |
251 | #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ |
252 | #endif /* HAVE_HGE */ |
253 | |
254 | #define AVERAGE_ITER(TYPE, x, a, r, n) \ |
255 | do { \ |
256 | TYPE an, xn, z1; \ |
257 | BUN z2; \ |
258 | (n)++; \ |
259 | /* calculate z1 = (x - a) / n, rounded down (towards */ \ |
260 | /* negative infinity), and calculate z2 = remainder */ \ |
261 | /* of the division (i.e. 0 <= z2 < n); do this */ \ |
262 | /* without causing overflow */ \ |
263 | an = (TYPE) ((a) / (SBUN) (n)); \ |
264 | xn = (TYPE) ((x) / (SBUN) (n)); \ |
265 | /* z1 will be (x - a) / n rounded towards -INF */ \ |
266 | z1 = xn - an; \ |
267 | xn = (x) - (TYPE) (xn * (SBUN) (n)); \ |
268 | an = (a) - (TYPE) (an * (SBUN) (n)); \ |
269 | /* z2 will be remainder of above division */ \ |
270 | if (xn >= an) { \ |
271 | z2 = (BUN) (xn - an); \ |
272 | /* loop invariant: */ \ |
273 | /* (x - a) - z1 * n == z2 */ \ |
274 | while (z2 >= (BUN) (n)) { \ |
275 | z2 -= (BUN) (n); \ |
276 | z1++; \ |
277 | } \ |
278 | } else { \ |
279 | z2 = (BUN) (an - xn); \ |
280 | /* loop invariant (until we break): */ \ |
281 | /* (x - a) - z1 * n == -z2 */ \ |
282 | for (;;) { \ |
283 | z1--; \ |
284 | if (z2 < (BUN) (n)) { \ |
285 | /* proper remainder */ \ |
286 | z2 = (BUN) ((n) - z2); \ |
287 | break; \ |
288 | } \ |
289 | z2 -= (BUN) (n); \ |
290 | } \ |
291 | } \ |
292 | (a) += z1; \ |
293 | (r) += z2; \ |
294 | if ((r) >= (BUN) (n)) { \ |
295 | (r) -= (BUN) (n); \ |
296 | (a)++; \ |
297 | } \ |
298 | } while (0) |
299 | |
300 | #define AVERAGE_ITER_FLOAT(TYPE, x, a, n) \ |
301 | do { \ |
302 | (n)++; \ |
303 | if (((a) > 0) == ((x) > 0)) { \ |
304 | /* same sign */ \ |
305 | (a) += ((x) - (a)) / (SBUN) (n); \ |
306 | } else { \ |
307 | /* no overflow at the cost of an */ \ |
308 | /* extra division and slight loss of */ \ |
309 | /* precision */ \ |
310 | (a) = (a) - (a) / (SBUN) (n) + (x) / (SBUN) (n); \ |
311 | } \ |
312 | } while (0) |
313 | |
314 | __hidden BUN dofsum(const void *restrict values, oid seqb, |
315 | struct canditer *restrict ci, BUN ncand, |
316 | void *restrict results, BUN ngrp, int tp1, int tp2, |
317 | const oid *restrict gids, |
318 | oid min, oid max, bool skip_nils, bool abort_on_error, |
319 | bool nil_if_empty) |
320 | __attribute__((__visibility__("hidden" ))); |
321 | |