1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @a stefan manegold |
11 | * @+ |
12 | * The microbenchmark routines are primarilly used to create a |
13 | * simple database for testing the performance of core routines. |
14 | * It was originally developed in the context of the Radix Cluster |
15 | * activities. |
16 | * @f microbenchmark |
17 | */ |
18 | #include "monetdb_config.h" |
19 | #include "mal.h" |
20 | #include <math.h> |
21 | #include "mal_exception.h" |
22 | #include "microbenchmark.h" |
23 | |
24 | #ifdef STATIC_CODE_ANALYSIS |
25 | #define rand() 0 |
26 | #endif |
27 | |
28 | static gdk_return |
29 | BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed) |
30 | { |
31 | const BUN n = (BUN) * size; |
32 | BUN i; |
33 | BAT *b = NULL; |
34 | int *restrict val; |
35 | |
36 | if (*size > (lng)BUN_MAX) { |
37 | GDKerror("BATrandom: size must not exceed BUN_MAX" ); |
38 | return GDK_FAIL; |
39 | } |
40 | |
41 | if (*size < 0) { |
42 | GDKerror("BATrandom: size must not be negative" ); |
43 | return GDK_FAIL; |
44 | } |
45 | |
46 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
47 | if (b == NULL) |
48 | return GDK_FAIL; |
49 | if (n == 0) { |
50 | b->tsorted = true; |
51 | b->trevsorted = false; |
52 | b->tseqbase = oid_nil; |
53 | BATkey(b, true); |
54 | *bn = b; |
55 | return GDK_SUCCEED; |
56 | } |
57 | val = (int *) Tloc(b, 0); |
58 | |
59 | /* create BUNs with random distribution */ |
60 | if (!is_int_nil(seed)) |
61 | srand(seed); |
62 | if (is_int_nil(*domain)) { |
63 | for (i = 0; i < n; i++) { |
64 | val[i] = rand(); |
65 | } |
66 | #if RAND_MAX < 46340 /* 46340*46340 = 2147395600 < INT_MAX */ |
67 | } else if (*domain > RAND_MAX + 1) { |
68 | for (i = 0; i < n; i++) { |
69 | val[i] = (rand() * (RAND_MAX + 1) + rand()) % *domain; |
70 | } |
71 | #endif |
72 | } else { |
73 | for (i = 0; i < n; i++) { |
74 | val[i] = rand() % *domain; |
75 | } |
76 | } |
77 | |
78 | BATsetcount(b, n); |
79 | b->tsorted = false; |
80 | b->trevsorted = false; |
81 | b->tseqbase = oid_nil; |
82 | BATkey(b, false); |
83 | *bn = b; |
84 | return GDK_SUCCEED; |
85 | } |
86 | |
87 | static gdk_return |
88 | BATuniform(BAT **bn, oid *base, lng *size, int *domain) |
89 | { |
90 | const BUN n = (BUN) * size; |
91 | BUN i, r; |
92 | BAT *b = NULL; |
93 | int *restrict val; |
94 | int v; |
95 | |
96 | if (*size > (lng)BUN_MAX) { |
97 | GDKerror("BATuniform: size must not exceed BUN_MAX" ); |
98 | return GDK_FAIL; |
99 | } |
100 | |
101 | if (*size < 0) { |
102 | GDKerror("BATuniform: size must not be negative" ); |
103 | return GDK_FAIL; |
104 | } |
105 | |
106 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
107 | if (b == NULL) |
108 | return GDK_FAIL; |
109 | if (n == 0) { |
110 | b->tsorted = true; |
111 | b->trevsorted = false; |
112 | b->tseqbase = oid_nil; |
113 | BATkey(b, true); |
114 | *bn = b; |
115 | return GDK_SUCCEED; |
116 | } |
117 | val = (int *) Tloc(b, 0); |
118 | |
119 | /* create BUNs with uniform distribution */ |
120 | for (v = 0, i = 0; i < n; i++) { |
121 | val[i] = v; |
122 | if (++v >= *domain) |
123 | v = 0; |
124 | } |
125 | |
126 | /* mix BUNs randomly */ |
127 | for (r = 0, i = 0; i < n; i++) { |
128 | const BUN j = i + ((r += rand()) % (n - i)); |
129 | const int tmp = val[i]; |
130 | |
131 | val[i] = val[j]; |
132 | val[j] = tmp; |
133 | } |
134 | |
135 | BATsetcount(b, n); |
136 | b->tsorted = false; |
137 | b->trevsorted = false; |
138 | b->tseqbase = oid_nil; |
139 | BATkey(b, *size <= *domain); |
140 | *bn = b; |
141 | return GDK_SUCCEED; |
142 | } |
143 | |
144 | static gdk_return |
145 | BATskewed(BAT **bn, oid *base, lng *size, int *domain, int *skew) |
146 | { |
147 | const BUN n = (BUN) * size; |
148 | BUN i, r; |
149 | BAT *b = NULL; |
150 | int *restrict val; |
151 | const BUN skewedSize = ((*skew) * n) / 100; |
152 | const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100; |
153 | |
154 | if (*size > (lng)BUN_MAX) { |
155 | GDKerror("BATskewed: size must not exceed BUN_MAX = " BUNFMT, BUN_MAX); |
156 | return GDK_FAIL; |
157 | } |
158 | |
159 | if (*size < 0) { |
160 | GDKerror("BATskewed: size must not be negative" ); |
161 | return GDK_FAIL; |
162 | } |
163 | |
164 | if (*skew > 100 || *skew < 0) { |
165 | GDKerror("BATskewed: skew must be between 0 and 100" ); |
166 | return GDK_FAIL; |
167 | } |
168 | |
169 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
170 | if (b == NULL) |
171 | return GDK_FAIL; |
172 | if (n == 0) { |
173 | b->tsorted = true; |
174 | b->trevsorted = false; |
175 | b->tseqbase = oid_nil; |
176 | BATkey(b, true); |
177 | *bn = b; |
178 | return GDK_SUCCEED; |
179 | } |
180 | val = (int *) Tloc(b, 0); |
181 | |
182 | /* create BUNs with skewed distribution */ |
183 | for (i = 0; i < skewedSize; i++) |
184 | val[i] = rand() % skewedDomain; |
185 | for( ; i < n; i++) |
186 | val[i] = (rand() % (*domain - skewedDomain)) + skewedDomain; |
187 | |
188 | /* mix BUNs randomly */ |
189 | for (r = 0, i = 0; i < n; i++) { |
190 | const BUN j = i + ((r += rand()) % (n - i)); |
191 | const int tmp = val[i]; |
192 | |
193 | val[i] = val[j]; |
194 | val[j] = tmp; |
195 | } |
196 | |
197 | BATsetcount(b, n); |
198 | b->tsorted = false; |
199 | b->trevsorted = false; |
200 | b->tseqbase = oid_nil; |
201 | BATkey(b, *size <= *domain); |
202 | *bn = b; |
203 | return GDK_SUCCEED; |
204 | } |
205 | |
206 | |
207 | /* math.h files do not have M_PI/M_E defined */ |
208 | #ifndef M_PI |
209 | # define M_PI ((dbl) 3.14159265358979323846) /* pi */ |
210 | #endif |
211 | #ifndef M_E |
212 | # define M_E ((dbl) 2.7182818284590452354) /* e */ |
213 | #endif |
214 | |
215 | static gdk_return |
216 | BATnormal(BAT **bn, oid *base, lng *size, int *domain, int *stddev, int *mean) |
217 | { |
218 | const BUN n = (BUN) * size; |
219 | BUN i, r; |
220 | const int d = (*domain < 0 ? 0 : *domain); |
221 | int j; |
222 | BAT *b = NULL; |
223 | int *restrict val; |
224 | const int m = *mean, s = *stddev; |
225 | unsigned int *restrict abs; |
226 | flt *restrict rel; |
227 | dbl tot = 0; |
228 | const dbl s_s_2 = (dbl) s * (dbl) s * 2; |
229 | const dbl s_sqrt_2_pi = ((dbl) s * sqrt(2 * M_PI)); |
230 | |
231 | assert(sizeof(unsigned int) == sizeof(flt)); |
232 | |
233 | #if SIZEOF_BUN > 4 |
234 | if (n >= ((ulng) 1 << 32)) { |
235 | GDKerror("BATnormal: size must be less than 2^32 = " LLFMT, (lng) 1 << 32); |
236 | return GDK_FAIL; |
237 | } |
238 | #endif |
239 | if (*size > (lng)BUN_MAX) { |
240 | GDKerror("BATnormal: size must not exceed BUN_MAX" ); |
241 | return GDK_FAIL; |
242 | } |
243 | |
244 | if (*size < 0) { |
245 | GDKerror("BATnormal: size must not be negative" ); |
246 | return GDK_FAIL; |
247 | } |
248 | |
249 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
250 | if (b == NULL) { |
251 | return GDK_FAIL; |
252 | } |
253 | if (n == 0) { |
254 | b->tsorted = true; |
255 | b->trevsorted = false; |
256 | b->tseqbase = oid_nil; |
257 | BATkey(b, true); |
258 | *bn = b; |
259 | return GDK_SUCCEED; |
260 | } |
261 | val = (int *) Tloc(b, 0); |
262 | |
263 | abs = (unsigned int *) GDKmalloc(d * sizeof(unsigned int)); |
264 | if (abs == NULL) { |
265 | BBPreclaim(b); |
266 | return GDK_FAIL; |
267 | } |
268 | rel = (flt *) abs; |
269 | |
270 | /* assert(0 <= *mean && *mean < *size); */ |
271 | |
272 | /* created inverted table with rel fraction per value */ |
273 | for (tot = 0, j = 0; j < d; j++) { |
274 | const dbl j_m = (dbl) j - m; |
275 | const dbl tmp = j_m * j_m / s_s_2; |
276 | |
277 | rel[j] = (flt) (pow(M_E, -tmp) / s_sqrt_2_pi); |
278 | tot += rel[j]; |
279 | } |
280 | /* just in case we get tot != 1 due to. e.g., |
281 | * rounding errors, limited precision, or limited domain */ |
282 | tot = 1.0 / tot; |
283 | /* calculate abs count per value from rel fraction */ |
284 | for (r = n, j = 0; j < d; j++) { |
285 | assert(((dbl) n * rel[j] * tot) < (dbl) ((lng) 1 << 32)); |
286 | abs[j] = (unsigned int) ((dbl) n * rel[j] * tot); |
287 | assert(r >= abs[j]); |
288 | r -= abs[j]; |
289 | } |
290 | assert(((ulng) 1 << 32) - r > abs[m]); |
291 | abs[m] += (unsigned int) r; |
292 | |
293 | /* create BUNs with normal distribution */ |
294 | for (j = 0, i = 0; i < n && j < d; i++) { |
295 | while (j < d && abs[j] == 0) |
296 | j++; |
297 | if (j < d) { |
298 | val[i] = j; |
299 | abs[j]--; |
300 | } |
301 | } |
302 | assert(i == n); |
303 | while (j < d && abs[j] == 0) |
304 | j++; |
305 | assert(j == d); |
306 | GDKfree(abs); |
307 | |
308 | |
309 | BATsetcount(b, n); |
310 | b->tsorted = false; |
311 | b->trevsorted = false; |
312 | b->tseqbase = oid_nil; |
313 | BATkey(b, n<2); |
314 | *bn = b; |
315 | return GDK_SUCCEED; |
316 | } |
317 | /* |
318 | * @- |
319 | * The M5 wrapper code |
320 | */ |
321 | |
322 | str |
323 | MBMrandom(bat *ret, oid *base, lng *size, int *domain){ |
324 | return MBMrandom_seed ( ret, base, size, domain, &int_nil ); |
325 | } |
326 | |
327 | str |
328 | MBMrandom_seed(bat *ret, oid *base, lng *size, int *domain, const int *seed){ |
329 | BAT *bn = NULL; |
330 | |
331 | BATrandom(&bn, base, size, domain, *seed); |
332 | if( bn ){ |
333 | BBPkeepref(*ret= bn->batCacheid); |
334 | } else throw(MAL, "microbenchmark.random" , OPERATION_FAILED); |
335 | return MAL_SUCCEED; |
336 | } |
337 | |
338 | |
339 | str |
340 | MBMuniform(bat *ret, oid *base, lng *size, int *domain){ |
341 | BAT *bn = NULL; |
342 | |
343 | BATuniform(&bn, base, size, domain); |
344 | if( bn ){ |
345 | BBPkeepref(*ret= bn->batCacheid); |
346 | } else throw(MAL, "microbenchmark.uniform" , OPERATION_FAILED); |
347 | return MAL_SUCCEED; |
348 | } |
349 | |
350 | str |
351 | MBMnormal(bat *ret, oid *base, lng *size, int *domain, int *stddev, int *mean){ |
352 | BAT *bn = NULL; |
353 | BATnormal(&bn, base, size, domain, stddev, mean); |
354 | if( bn ){ |
355 | BBPkeepref(*ret= bn->batCacheid); |
356 | } else throw(MAL, "microbenchmark.normal" , OPERATION_FAILED); |
357 | return MAL_SUCCEED; |
358 | } |
359 | |
360 | |
361 | str |
362 | MBMmix(bat *bn, bat *batid) |
363 | { |
364 | BUN n, r, i; |
365 | BAT *b; |
366 | |
367 | if ((b = BATdescriptor(*batid)) == NULL) |
368 | throw(MAL, "microbenchmark.mix" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
369 | |
370 | n = BATcount(b); |
371 | /* mix BUNs randomly */ |
372 | for (r = i = 0; i < n; i++) { |
373 | BUN idx = i + ((r += (BUN) rand()) % (n - i)); |
374 | int val; |
375 | |
376 | val = *(int *) Tloc(b, i); |
377 | *(int *) Tloc(b, i) = *(int *) Tloc(b, idx); |
378 | *(int *) Tloc(b, idx) = val; |
379 | } |
380 | |
381 | BBPunfix(b->batCacheid); |
382 | *bn = b->batCacheid; |
383 | |
384 | return MAL_SUCCEED; |
385 | } |
386 | |
387 | str |
388 | MBMskewed(bat *ret, oid *base, lng *size, int *domain, int *skew){ |
389 | BAT *bn = NULL; |
390 | |
391 | BATskewed(&bn, base, size, domain, skew); |
392 | if( bn ){ |
393 | BBPkeepref(*ret= bn->batCacheid); |
394 | } else throw(MAL, "microbenchmark.skewed" , OPERATION_FAILED); |
395 | return MAL_SUCCEED; |
396 | } |
397 | |