1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @a stefan manegold
11 * @+
12 * The microbenchmark routines are primarilly used to create a
13 * simple database for testing the performance of core routines.
14 * It was originally developed in the context of the Radix Cluster
15 * activities.
16 * @f microbenchmark
17 */
18#include "monetdb_config.h"
19#include "mal.h"
20#include <math.h>
21#include "mal_exception.h"
22#include "microbenchmark.h"
23
24#ifdef STATIC_CODE_ANALYSIS
25#define rand() 0
26#endif
27
28static gdk_return
29BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed)
30{
31 const BUN n = (BUN) * size;
32 BUN i;
33 BAT *b = NULL;
34 int *restrict val;
35
36 if (*size > (lng)BUN_MAX) {
37 GDKerror("BATrandom: size must not exceed BUN_MAX");
38 return GDK_FAIL;
39 }
40
41 if (*size < 0) {
42 GDKerror("BATrandom: size must not be negative");
43 return GDK_FAIL;
44 }
45
46 b = COLnew(*base, TYPE_int, n, TRANSIENT);
47 if (b == NULL)
48 return GDK_FAIL;
49 if (n == 0) {
50 b->tsorted = true;
51 b->trevsorted = false;
52 b->tseqbase = oid_nil;
53 BATkey(b, true);
54 *bn = b;
55 return GDK_SUCCEED;
56 }
57 val = (int *) Tloc(b, 0);
58
59 /* create BUNs with random distribution */
60 if (!is_int_nil(seed))
61 srand(seed);
62 if (is_int_nil(*domain)) {
63 for (i = 0; i < n; i++) {
64 val[i] = rand();
65 }
66#if RAND_MAX < 46340 /* 46340*46340 = 2147395600 < INT_MAX */
67 } else if (*domain > RAND_MAX + 1) {
68 for (i = 0; i < n; i++) {
69 val[i] = (rand() * (RAND_MAX + 1) + rand()) % *domain;
70 }
71#endif
72 } else {
73 for (i = 0; i < n; i++) {
74 val[i] = rand() % *domain;
75 }
76 }
77
78 BATsetcount(b, n);
79 b->tsorted = false;
80 b->trevsorted = false;
81 b->tseqbase = oid_nil;
82 BATkey(b, false);
83 *bn = b;
84 return GDK_SUCCEED;
85}
86
87static gdk_return
88BATuniform(BAT **bn, oid *base, lng *size, int *domain)
89{
90 const BUN n = (BUN) * size;
91 BUN i, r;
92 BAT *b = NULL;
93 int *restrict val;
94 int v;
95
96 if (*size > (lng)BUN_MAX) {
97 GDKerror("BATuniform: size must not exceed BUN_MAX");
98 return GDK_FAIL;
99 }
100
101 if (*size < 0) {
102 GDKerror("BATuniform: size must not be negative");
103 return GDK_FAIL;
104 }
105
106 b = COLnew(*base, TYPE_int, n, TRANSIENT);
107 if (b == NULL)
108 return GDK_FAIL;
109 if (n == 0) {
110 b->tsorted = true;
111 b->trevsorted = false;
112 b->tseqbase = oid_nil;
113 BATkey(b, true);
114 *bn = b;
115 return GDK_SUCCEED;
116 }
117 val = (int *) Tloc(b, 0);
118
119 /* create BUNs with uniform distribution */
120 for (v = 0, i = 0; i < n; i++) {
121 val[i] = v;
122 if (++v >= *domain)
123 v = 0;
124 }
125
126 /* mix BUNs randomly */
127 for (r = 0, i = 0; i < n; i++) {
128 const BUN j = i + ((r += rand()) % (n - i));
129 const int tmp = val[i];
130
131 val[i] = val[j];
132 val[j] = tmp;
133 }
134
135 BATsetcount(b, n);
136 b->tsorted = false;
137 b->trevsorted = false;
138 b->tseqbase = oid_nil;
139 BATkey(b, *size <= *domain);
140 *bn = b;
141 return GDK_SUCCEED;
142}
143
144static gdk_return
145BATskewed(BAT **bn, oid *base, lng *size, int *domain, int *skew)
146{
147 const BUN n = (BUN) * size;
148 BUN i, r;
149 BAT *b = NULL;
150 int *restrict val;
151 const BUN skewedSize = ((*skew) * n) / 100;
152 const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100;
153
154 if (*size > (lng)BUN_MAX) {
155 GDKerror("BATskewed: size must not exceed BUN_MAX = " BUNFMT, BUN_MAX);
156 return GDK_FAIL;
157 }
158
159 if (*size < 0) {
160 GDKerror("BATskewed: size must not be negative");
161 return GDK_FAIL;
162 }
163
164 if (*skew > 100 || *skew < 0) {
165 GDKerror("BATskewed: skew must be between 0 and 100");
166 return GDK_FAIL;
167 }
168
169 b = COLnew(*base, TYPE_int, n, TRANSIENT);
170 if (b == NULL)
171 return GDK_FAIL;
172 if (n == 0) {
173 b->tsorted = true;
174 b->trevsorted = false;
175 b->tseqbase = oid_nil;
176 BATkey(b, true);
177 *bn = b;
178 return GDK_SUCCEED;
179 }
180 val = (int *) Tloc(b, 0);
181
182 /* create BUNs with skewed distribution */
183 for (i = 0; i < skewedSize; i++)
184 val[i] = rand() % skewedDomain;
185 for( ; i < n; i++)
186 val[i] = (rand() % (*domain - skewedDomain)) + skewedDomain;
187
188 /* mix BUNs randomly */
189 for (r = 0, i = 0; i < n; i++) {
190 const BUN j = i + ((r += rand()) % (n - i));
191 const int tmp = val[i];
192
193 val[i] = val[j];
194 val[j] = tmp;
195 }
196
197 BATsetcount(b, n);
198 b->tsorted = false;
199 b->trevsorted = false;
200 b->tseqbase = oid_nil;
201 BATkey(b, *size <= *domain);
202 *bn = b;
203 return GDK_SUCCEED;
204}
205
206
207/* math.h files do not have M_PI/M_E defined */
208#ifndef M_PI
209# define M_PI ((dbl) 3.14159265358979323846) /* pi */
210#endif
211#ifndef M_E
212# define M_E ((dbl) 2.7182818284590452354) /* e */
213#endif
214
215static gdk_return
216BATnormal(BAT **bn, oid *base, lng *size, int *domain, int *stddev, int *mean)
217{
218 const BUN n = (BUN) * size;
219 BUN i, r;
220 const int d = (*domain < 0 ? 0 : *domain);
221 int j;
222 BAT *b = NULL;
223 int *restrict val;
224 const int m = *mean, s = *stddev;
225 unsigned int *restrict abs;
226 flt *restrict rel;
227 dbl tot = 0;
228 const dbl s_s_2 = (dbl) s * (dbl) s * 2;
229 const dbl s_sqrt_2_pi = ((dbl) s * sqrt(2 * M_PI));
230
231 assert(sizeof(unsigned int) == sizeof(flt));
232
233#if SIZEOF_BUN > 4
234 if (n >= ((ulng) 1 << 32)) {
235 GDKerror("BATnormal: size must be less than 2^32 = "LLFMT, (lng) 1 << 32);
236 return GDK_FAIL;
237 }
238#endif
239 if (*size > (lng)BUN_MAX) {
240 GDKerror("BATnormal: size must not exceed BUN_MAX");
241 return GDK_FAIL;
242 }
243
244 if (*size < 0) {
245 GDKerror("BATnormal: size must not be negative");
246 return GDK_FAIL;
247 }
248
249 b = COLnew(*base, TYPE_int, n, TRANSIENT);
250 if (b == NULL) {
251 return GDK_FAIL;
252 }
253 if (n == 0) {
254 b->tsorted = true;
255 b->trevsorted = false;
256 b->tseqbase = oid_nil;
257 BATkey(b, true);
258 *bn = b;
259 return GDK_SUCCEED;
260 }
261 val = (int *) Tloc(b, 0);
262
263 abs = (unsigned int *) GDKmalloc(d * sizeof(unsigned int));
264 if (abs == NULL) {
265 BBPreclaim(b);
266 return GDK_FAIL;
267 }
268 rel = (flt *) abs;
269
270 /* assert(0 <= *mean && *mean < *size); */
271
272 /* created inverted table with rel fraction per value */
273 for (tot = 0, j = 0; j < d; j++) {
274 const dbl j_m = (dbl) j - m;
275 const dbl tmp = j_m * j_m / s_s_2;
276
277 rel[j] = (flt) (pow(M_E, -tmp) / s_sqrt_2_pi);
278 tot += rel[j];
279 }
280 /* just in case we get tot != 1 due to. e.g.,
281 * rounding errors, limited precision, or limited domain */
282 tot = 1.0 / tot;
283 /* calculate abs count per value from rel fraction */
284 for (r = n, j = 0; j < d; j++) {
285 assert(((dbl) n * rel[j] * tot) < (dbl) ((lng) 1 << 32));
286 abs[j] = (unsigned int) ((dbl) n * rel[j] * tot);
287 assert(r >= abs[j]);
288 r -= abs[j];
289 }
290 assert(((ulng) 1 << 32) - r > abs[m]);
291 abs[m] += (unsigned int) r;
292
293 /* create BUNs with normal distribution */
294 for (j = 0, i = 0; i < n && j < d; i++) {
295 while (j < d && abs[j] == 0)
296 j++;
297 if (j < d) {
298 val[i] = j;
299 abs[j]--;
300 }
301 }
302 assert(i == n);
303 while (j < d && abs[j] == 0)
304 j++;
305 assert(j == d);
306 GDKfree(abs);
307
308
309 BATsetcount(b, n);
310 b->tsorted = false;
311 b->trevsorted = false;
312 b->tseqbase = oid_nil;
313 BATkey(b, n<2);
314 *bn = b;
315 return GDK_SUCCEED;
316}
317/*
318 * @-
319 * The M5 wrapper code
320 */
321
322str
323MBMrandom(bat *ret, oid *base, lng *size, int *domain){
324 return MBMrandom_seed ( ret, base, size, domain, &int_nil );
325}
326
327str
328MBMrandom_seed(bat *ret, oid *base, lng *size, int *domain, const int *seed){
329 BAT *bn = NULL;
330
331 BATrandom(&bn, base, size, domain, *seed);
332 if( bn ){
333 BBPkeepref(*ret= bn->batCacheid);
334 } else throw(MAL, "microbenchmark.random", OPERATION_FAILED);
335 return MAL_SUCCEED;
336}
337
338
339str
340MBMuniform(bat *ret, oid *base, lng *size, int *domain){
341 BAT *bn = NULL;
342
343 BATuniform(&bn, base, size, domain);
344 if( bn ){
345 BBPkeepref(*ret= bn->batCacheid);
346 } else throw(MAL, "microbenchmark.uniform", OPERATION_FAILED);
347 return MAL_SUCCEED;
348}
349
350str
351MBMnormal(bat *ret, oid *base, lng *size, int *domain, int *stddev, int *mean){
352 BAT *bn = NULL;
353 BATnormal(&bn, base, size, domain, stddev, mean);
354 if( bn ){
355 BBPkeepref(*ret= bn->batCacheid);
356 } else throw(MAL, "microbenchmark.normal", OPERATION_FAILED);
357 return MAL_SUCCEED;
358}
359
360
361str
362MBMmix(bat *bn, bat *batid)
363{
364 BUN n, r, i;
365 BAT *b;
366
367 if ((b = BATdescriptor(*batid)) == NULL)
368 throw(MAL, "microbenchmark.mix", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
369
370 n = BATcount(b);
371 /* mix BUNs randomly */
372 for (r = i = 0; i < n; i++) {
373 BUN idx = i + ((r += (BUN) rand()) % (n - i));
374 int val;
375
376 val = *(int *) Tloc(b, i);
377 *(int *) Tloc(b, i) = *(int *) Tloc(b, idx);
378 *(int *) Tloc(b, idx) = val;
379 }
380
381 BBPunfix(b->batCacheid);
382 *bn = b->batCacheid;
383
384 return MAL_SUCCEED;
385}
386
387str
388MBMskewed(bat *ret, oid *base, lng *size, int *domain, int *skew){
389 BAT *bn = NULL;
390
391 BATskewed(&bn, base, size, domain, skew);
392 if( bn ){
393 BBPkeepref(*ret= bn->batCacheid);
394 } else throw(MAL, "microbenchmark.skewed", OPERATION_FAILED);
395 return MAL_SUCCEED;
396}
397