| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * @a stefan manegold |
| 11 | * @+ |
| 12 | * The microbenchmark routines are primarilly used to create a |
| 13 | * simple database for testing the performance of core routines. |
| 14 | * It was originally developed in the context of the Radix Cluster |
| 15 | * activities. |
| 16 | * @f microbenchmark |
| 17 | */ |
| 18 | #include "monetdb_config.h" |
| 19 | #include "mal.h" |
| 20 | #include <math.h> |
| 21 | #include "mal_exception.h" |
| 22 | #include "microbenchmark.h" |
| 23 | |
| 24 | #ifdef STATIC_CODE_ANALYSIS |
| 25 | #define rand() 0 |
| 26 | #endif |
| 27 | |
| 28 | static gdk_return |
| 29 | BATrandom(BAT **bn, oid *base, lng *size, int *domain, int seed) |
| 30 | { |
| 31 | const BUN n = (BUN) * size; |
| 32 | BUN i; |
| 33 | BAT *b = NULL; |
| 34 | int *restrict val; |
| 35 | |
| 36 | if (*size > (lng)BUN_MAX) { |
| 37 | GDKerror("BATrandom: size must not exceed BUN_MAX" ); |
| 38 | return GDK_FAIL; |
| 39 | } |
| 40 | |
| 41 | if (*size < 0) { |
| 42 | GDKerror("BATrandom: size must not be negative" ); |
| 43 | return GDK_FAIL; |
| 44 | } |
| 45 | |
| 46 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
| 47 | if (b == NULL) |
| 48 | return GDK_FAIL; |
| 49 | if (n == 0) { |
| 50 | b->tsorted = true; |
| 51 | b->trevsorted = false; |
| 52 | b->tseqbase = oid_nil; |
| 53 | BATkey(b, true); |
| 54 | *bn = b; |
| 55 | return GDK_SUCCEED; |
| 56 | } |
| 57 | val = (int *) Tloc(b, 0); |
| 58 | |
| 59 | /* create BUNs with random distribution */ |
| 60 | if (!is_int_nil(seed)) |
| 61 | srand(seed); |
| 62 | if (is_int_nil(*domain)) { |
| 63 | for (i = 0; i < n; i++) { |
| 64 | val[i] = rand(); |
| 65 | } |
| 66 | #if RAND_MAX < 46340 /* 46340*46340 = 2147395600 < INT_MAX */ |
| 67 | } else if (*domain > RAND_MAX + 1) { |
| 68 | for (i = 0; i < n; i++) { |
| 69 | val[i] = (rand() * (RAND_MAX + 1) + rand()) % *domain; |
| 70 | } |
| 71 | #endif |
| 72 | } else { |
| 73 | for (i = 0; i < n; i++) { |
| 74 | val[i] = rand() % *domain; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | BATsetcount(b, n); |
| 79 | b->tsorted = false; |
| 80 | b->trevsorted = false; |
| 81 | b->tseqbase = oid_nil; |
| 82 | BATkey(b, false); |
| 83 | *bn = b; |
| 84 | return GDK_SUCCEED; |
| 85 | } |
| 86 | |
| 87 | static gdk_return |
| 88 | BATuniform(BAT **bn, oid *base, lng *size, int *domain) |
| 89 | { |
| 90 | const BUN n = (BUN) * size; |
| 91 | BUN i, r; |
| 92 | BAT *b = NULL; |
| 93 | int *restrict val; |
| 94 | int v; |
| 95 | |
| 96 | if (*size > (lng)BUN_MAX) { |
| 97 | GDKerror("BATuniform: size must not exceed BUN_MAX" ); |
| 98 | return GDK_FAIL; |
| 99 | } |
| 100 | |
| 101 | if (*size < 0) { |
| 102 | GDKerror("BATuniform: size must not be negative" ); |
| 103 | return GDK_FAIL; |
| 104 | } |
| 105 | |
| 106 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
| 107 | if (b == NULL) |
| 108 | return GDK_FAIL; |
| 109 | if (n == 0) { |
| 110 | b->tsorted = true; |
| 111 | b->trevsorted = false; |
| 112 | b->tseqbase = oid_nil; |
| 113 | BATkey(b, true); |
| 114 | *bn = b; |
| 115 | return GDK_SUCCEED; |
| 116 | } |
| 117 | val = (int *) Tloc(b, 0); |
| 118 | |
| 119 | /* create BUNs with uniform distribution */ |
| 120 | for (v = 0, i = 0; i < n; i++) { |
| 121 | val[i] = v; |
| 122 | if (++v >= *domain) |
| 123 | v = 0; |
| 124 | } |
| 125 | |
| 126 | /* mix BUNs randomly */ |
| 127 | for (r = 0, i = 0; i < n; i++) { |
| 128 | const BUN j = i + ((r += rand()) % (n - i)); |
| 129 | const int tmp = val[i]; |
| 130 | |
| 131 | val[i] = val[j]; |
| 132 | val[j] = tmp; |
| 133 | } |
| 134 | |
| 135 | BATsetcount(b, n); |
| 136 | b->tsorted = false; |
| 137 | b->trevsorted = false; |
| 138 | b->tseqbase = oid_nil; |
| 139 | BATkey(b, *size <= *domain); |
| 140 | *bn = b; |
| 141 | return GDK_SUCCEED; |
| 142 | } |
| 143 | |
| 144 | static gdk_return |
| 145 | BATskewed(BAT **bn, oid *base, lng *size, int *domain, int *skew) |
| 146 | { |
| 147 | const BUN n = (BUN) * size; |
| 148 | BUN i, r; |
| 149 | BAT *b = NULL; |
| 150 | int *restrict val; |
| 151 | const BUN skewedSize = ((*skew) * n) / 100; |
| 152 | const int skewedDomain = ((100 - (*skew)) * (*domain)) / 100; |
| 153 | |
| 154 | if (*size > (lng)BUN_MAX) { |
| 155 | GDKerror("BATskewed: size must not exceed BUN_MAX = " BUNFMT, BUN_MAX); |
| 156 | return GDK_FAIL; |
| 157 | } |
| 158 | |
| 159 | if (*size < 0) { |
| 160 | GDKerror("BATskewed: size must not be negative" ); |
| 161 | return GDK_FAIL; |
| 162 | } |
| 163 | |
| 164 | if (*skew > 100 || *skew < 0) { |
| 165 | GDKerror("BATskewed: skew must be between 0 and 100" ); |
| 166 | return GDK_FAIL; |
| 167 | } |
| 168 | |
| 169 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
| 170 | if (b == NULL) |
| 171 | return GDK_FAIL; |
| 172 | if (n == 0) { |
| 173 | b->tsorted = true; |
| 174 | b->trevsorted = false; |
| 175 | b->tseqbase = oid_nil; |
| 176 | BATkey(b, true); |
| 177 | *bn = b; |
| 178 | return GDK_SUCCEED; |
| 179 | } |
| 180 | val = (int *) Tloc(b, 0); |
| 181 | |
| 182 | /* create BUNs with skewed distribution */ |
| 183 | for (i = 0; i < skewedSize; i++) |
| 184 | val[i] = rand() % skewedDomain; |
| 185 | for( ; i < n; i++) |
| 186 | val[i] = (rand() % (*domain - skewedDomain)) + skewedDomain; |
| 187 | |
| 188 | /* mix BUNs randomly */ |
| 189 | for (r = 0, i = 0; i < n; i++) { |
| 190 | const BUN j = i + ((r += rand()) % (n - i)); |
| 191 | const int tmp = val[i]; |
| 192 | |
| 193 | val[i] = val[j]; |
| 194 | val[j] = tmp; |
| 195 | } |
| 196 | |
| 197 | BATsetcount(b, n); |
| 198 | b->tsorted = false; |
| 199 | b->trevsorted = false; |
| 200 | b->tseqbase = oid_nil; |
| 201 | BATkey(b, *size <= *domain); |
| 202 | *bn = b; |
| 203 | return GDK_SUCCEED; |
| 204 | } |
| 205 | |
| 206 | |
| 207 | /* math.h files do not have M_PI/M_E defined */ |
| 208 | #ifndef M_PI |
| 209 | # define M_PI ((dbl) 3.14159265358979323846) /* pi */ |
| 210 | #endif |
| 211 | #ifndef M_E |
| 212 | # define M_E ((dbl) 2.7182818284590452354) /* e */ |
| 213 | #endif |
| 214 | |
| 215 | static gdk_return |
| 216 | BATnormal(BAT **bn, oid *base, lng *size, int *domain, int *stddev, int *mean) |
| 217 | { |
| 218 | const BUN n = (BUN) * size; |
| 219 | BUN i, r; |
| 220 | const int d = (*domain < 0 ? 0 : *domain); |
| 221 | int j; |
| 222 | BAT *b = NULL; |
| 223 | int *restrict val; |
| 224 | const int m = *mean, s = *stddev; |
| 225 | unsigned int *restrict abs; |
| 226 | flt *restrict rel; |
| 227 | dbl tot = 0; |
| 228 | const dbl s_s_2 = (dbl) s * (dbl) s * 2; |
| 229 | const dbl s_sqrt_2_pi = ((dbl) s * sqrt(2 * M_PI)); |
| 230 | |
| 231 | assert(sizeof(unsigned int) == sizeof(flt)); |
| 232 | |
| 233 | #if SIZEOF_BUN > 4 |
| 234 | if (n >= ((ulng) 1 << 32)) { |
| 235 | GDKerror("BATnormal: size must be less than 2^32 = " LLFMT, (lng) 1 << 32); |
| 236 | return GDK_FAIL; |
| 237 | } |
| 238 | #endif |
| 239 | if (*size > (lng)BUN_MAX) { |
| 240 | GDKerror("BATnormal: size must not exceed BUN_MAX" ); |
| 241 | return GDK_FAIL; |
| 242 | } |
| 243 | |
| 244 | if (*size < 0) { |
| 245 | GDKerror("BATnormal: size must not be negative" ); |
| 246 | return GDK_FAIL; |
| 247 | } |
| 248 | |
| 249 | b = COLnew(*base, TYPE_int, n, TRANSIENT); |
| 250 | if (b == NULL) { |
| 251 | return GDK_FAIL; |
| 252 | } |
| 253 | if (n == 0) { |
| 254 | b->tsorted = true; |
| 255 | b->trevsorted = false; |
| 256 | b->tseqbase = oid_nil; |
| 257 | BATkey(b, true); |
| 258 | *bn = b; |
| 259 | return GDK_SUCCEED; |
| 260 | } |
| 261 | val = (int *) Tloc(b, 0); |
| 262 | |
| 263 | abs = (unsigned int *) GDKmalloc(d * sizeof(unsigned int)); |
| 264 | if (abs == NULL) { |
| 265 | BBPreclaim(b); |
| 266 | return GDK_FAIL; |
| 267 | } |
| 268 | rel = (flt *) abs; |
| 269 | |
| 270 | /* assert(0 <= *mean && *mean < *size); */ |
| 271 | |
| 272 | /* created inverted table with rel fraction per value */ |
| 273 | for (tot = 0, j = 0; j < d; j++) { |
| 274 | const dbl j_m = (dbl) j - m; |
| 275 | const dbl tmp = j_m * j_m / s_s_2; |
| 276 | |
| 277 | rel[j] = (flt) (pow(M_E, -tmp) / s_sqrt_2_pi); |
| 278 | tot += rel[j]; |
| 279 | } |
| 280 | /* just in case we get tot != 1 due to. e.g., |
| 281 | * rounding errors, limited precision, or limited domain */ |
| 282 | tot = 1.0 / tot; |
| 283 | /* calculate abs count per value from rel fraction */ |
| 284 | for (r = n, j = 0; j < d; j++) { |
| 285 | assert(((dbl) n * rel[j] * tot) < (dbl) ((lng) 1 << 32)); |
| 286 | abs[j] = (unsigned int) ((dbl) n * rel[j] * tot); |
| 287 | assert(r >= abs[j]); |
| 288 | r -= abs[j]; |
| 289 | } |
| 290 | assert(((ulng) 1 << 32) - r > abs[m]); |
| 291 | abs[m] += (unsigned int) r; |
| 292 | |
| 293 | /* create BUNs with normal distribution */ |
| 294 | for (j = 0, i = 0; i < n && j < d; i++) { |
| 295 | while (j < d && abs[j] == 0) |
| 296 | j++; |
| 297 | if (j < d) { |
| 298 | val[i] = j; |
| 299 | abs[j]--; |
| 300 | } |
| 301 | } |
| 302 | assert(i == n); |
| 303 | while (j < d && abs[j] == 0) |
| 304 | j++; |
| 305 | assert(j == d); |
| 306 | GDKfree(abs); |
| 307 | |
| 308 | |
| 309 | BATsetcount(b, n); |
| 310 | b->tsorted = false; |
| 311 | b->trevsorted = false; |
| 312 | b->tseqbase = oid_nil; |
| 313 | BATkey(b, n<2); |
| 314 | *bn = b; |
| 315 | return GDK_SUCCEED; |
| 316 | } |
| 317 | /* |
| 318 | * @- |
| 319 | * The M5 wrapper code |
| 320 | */ |
| 321 | |
| 322 | str |
| 323 | MBMrandom(bat *ret, oid *base, lng *size, int *domain){ |
| 324 | return MBMrandom_seed ( ret, base, size, domain, &int_nil ); |
| 325 | } |
| 326 | |
| 327 | str |
| 328 | MBMrandom_seed(bat *ret, oid *base, lng *size, int *domain, const int *seed){ |
| 329 | BAT *bn = NULL; |
| 330 | |
| 331 | BATrandom(&bn, base, size, domain, *seed); |
| 332 | if( bn ){ |
| 333 | BBPkeepref(*ret= bn->batCacheid); |
| 334 | } else throw(MAL, "microbenchmark.random" , OPERATION_FAILED); |
| 335 | return MAL_SUCCEED; |
| 336 | } |
| 337 | |
| 338 | |
| 339 | str |
| 340 | MBMuniform(bat *ret, oid *base, lng *size, int *domain){ |
| 341 | BAT *bn = NULL; |
| 342 | |
| 343 | BATuniform(&bn, base, size, domain); |
| 344 | if( bn ){ |
| 345 | BBPkeepref(*ret= bn->batCacheid); |
| 346 | } else throw(MAL, "microbenchmark.uniform" , OPERATION_FAILED); |
| 347 | return MAL_SUCCEED; |
| 348 | } |
| 349 | |
| 350 | str |
| 351 | MBMnormal(bat *ret, oid *base, lng *size, int *domain, int *stddev, int *mean){ |
| 352 | BAT *bn = NULL; |
| 353 | BATnormal(&bn, base, size, domain, stddev, mean); |
| 354 | if( bn ){ |
| 355 | BBPkeepref(*ret= bn->batCacheid); |
| 356 | } else throw(MAL, "microbenchmark.normal" , OPERATION_FAILED); |
| 357 | return MAL_SUCCEED; |
| 358 | } |
| 359 | |
| 360 | |
| 361 | str |
| 362 | MBMmix(bat *bn, bat *batid) |
| 363 | { |
| 364 | BUN n, r, i; |
| 365 | BAT *b; |
| 366 | |
| 367 | if ((b = BATdescriptor(*batid)) == NULL) |
| 368 | throw(MAL, "microbenchmark.mix" , SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); |
| 369 | |
| 370 | n = BATcount(b); |
| 371 | /* mix BUNs randomly */ |
| 372 | for (r = i = 0; i < n; i++) { |
| 373 | BUN idx = i + ((r += (BUN) rand()) % (n - i)); |
| 374 | int val; |
| 375 | |
| 376 | val = *(int *) Tloc(b, i); |
| 377 | *(int *) Tloc(b, i) = *(int *) Tloc(b, idx); |
| 378 | *(int *) Tloc(b, idx) = val; |
| 379 | } |
| 380 | |
| 381 | BBPunfix(b->batCacheid); |
| 382 | *bn = b->batCacheid; |
| 383 | |
| 384 | return MAL_SUCCEED; |
| 385 | } |
| 386 | |
| 387 | str |
| 388 | MBMskewed(bat *ret, oid *base, lng *size, int *domain, int *skew){ |
| 389 | BAT *bn = NULL; |
| 390 | |
| 391 | BATskewed(&bn, base, size, domain, skew); |
| 392 | if( bn ){ |
| 393 | BBPkeepref(*ret= bn->batCacheid); |
| 394 | } else throw(MAL, "microbenchmark.skewed" , OPERATION_FAILED); |
| 395 | return MAL_SUCCEED; |
| 396 | } |
| 397 | |