| 1 | /* |
| 2 | * $Id: bm_utils.c,v 1.4 2006/04/12 18:00:55 jms Exp $ |
| 3 | * |
| 4 | * Revision History |
| 5 | * =================== |
| 6 | * $Log: bm_utils.c,v $ |
| 7 | * Revision 1.4 2006/04/12 18:00:55 jms |
| 8 | * add missing parameter to call to gen_seed |
| 9 | * |
| 10 | * Revision 1.3 2005/10/14 23:16:54 jms |
| 11 | * fix for answer set compliance |
| 12 | * |
| 13 | * Revision 1.2 2005/01/03 20:08:58 jms |
| 14 | * change line terminations |
| 15 | * |
| 16 | * Revision 1.1.1.1 2004/11/24 23:31:46 jms |
| 17 | * re-establish external server |
| 18 | * |
| 19 | * Revision 1.3 2004/02/18 14:05:53 jms |
| 20 | * porting changes for LINUX and 64 bit RNG |
| 21 | * |
| 22 | * Revision 1.2 2004/01/22 05:49:29 jms |
| 23 | * AIX porting (AIX 5.1) |
| 24 | * |
| 25 | * Revision 1.1.1.1 2003/08/08 21:35:26 jms |
| 26 | * recreation after CVS crash |
| 27 | * |
| 28 | * Revision 1.3 2003/08/08 21:35:26 jms |
| 29 | * first integration of rng64 for o_custkey and l_partkey |
| 30 | * |
| 31 | * Revision 1.2 2003/08/07 17:58:34 jms |
| 32 | * Convery RNG to 64bit space as preparation for new large scale RNG |
| 33 | * |
| 34 | * Revision 1.1.1.1 2003/04/03 18:54:21 jms |
| 35 | * initial checkin |
| 36 | * |
| 37 | * |
| 38 | */ |
| 39 | /* |
| 40 | * |
| 41 | * Various routines that handle distributions, value selections and |
| 42 | * seed value management for the DSS benchmark. Current functions: |
| 43 | * env_config -- set config vars with optional environment override |
| 44 | * yes_no -- ask simple yes/no question and return boolean result |
| 45 | * a_rnd(min, max) -- random alphanumeric within length range |
| 46 | * pick_str(size, set) -- select a string from the set of size |
| 47 | * read_dist(file, name, distribution *) -- read named dist from file |
| 48 | * tbl_open(path, mode) -- std fopen with lifenoise |
| 49 | * julian(date) -- julian date correction |
| 50 | * rowcnt(tbl) -- proper scaling of given table |
| 51 | * e_str(set, min, max) -- build an embedded str |
| 52 | * agg_str() -- build a string from the named set |
| 53 | * dsscasecmp() -- version of strcasecmp() |
| 54 | * dssncasecmp() -- version of strncasecmp() |
| 55 | * getopt() |
| 56 | * set_state() -- initialize the RNG |
| 57 | */ |
| 58 | |
| 59 | #include "config.h" |
| 60 | #include "dss.h" |
| 61 | #include <stdio.h> |
| 62 | #include <time.h> |
| 63 | #include <errno.h> |
| 64 | #include <string.h> |
| 65 | #ifdef HP |
| 66 | #include <strings.h> |
| 67 | #endif /* HP */ |
| 68 | #include <ctype.h> |
| 69 | #include <math.h> |
| 70 | #ifndef _POSIX_SOURCE |
| 71 | #include <malloc.h> |
| 72 | #endif /* POSIX_SOURCE */ |
| 73 | #include <fcntl.h> |
| 74 | #include <sys/types.h> |
| 75 | #include <sys/stat.h> |
| 76 | /* Lines added by Chuck McDevitt for WIN32 support */ |
| 77 | #ifdef WIN32 |
| 78 | #ifndef _POSIX_ |
| 79 | #include <io.h> |
| 80 | #ifndef S_ISREG |
| 81 | #define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG ) |
| 82 | #define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO ) |
| 83 | #endif |
| 84 | #endif |
| 85 | #ifndef stat |
| 86 | #define stat _stat |
| 87 | #endif |
| 88 | #ifndef fdopen |
| 89 | #define fdopen _fdopen |
| 90 | #endif |
| 91 | #ifndef open |
| 92 | #define open _open |
| 93 | #endif |
| 94 | #ifndef O_RDONLY |
| 95 | #define O_RDONLY _O_RDONLY |
| 96 | #endif |
| 97 | #ifndef O_WRONLY |
| 98 | #define O_WRONLY _O_WRONLY |
| 99 | #endif |
| 100 | #ifndef O_CREAT |
| 101 | #define O_CREAT _O_CREAT |
| 102 | #endif |
| 103 | #endif |
| 104 | /* End of lines added by Chuck McDevitt for WIN32 support */ |
| 105 | #include "dsstypes.h" |
| 106 | |
| 107 | |
| 108 | static char alpha_num[65] = |
| 109 | "0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ," ; |
| 110 | |
| 111 | #if defined(__STDC__) || defined(__cplusplus) |
| 112 | #define PROTO(s) s |
| 113 | #else |
| 114 | #define PROTO(s) () |
| 115 | #endif |
| 116 | |
| 117 | #ifndef WIN32 |
| 118 | char *getenv PROTO((const char *name)); |
| 119 | #endif |
| 120 | void usage(); |
| 121 | long *permute_dist(distribution *d, long stream); |
| 122 | extern seed_t Seed[]; |
| 123 | |
| 124 | /* |
| 125 | * env_config: look for a environmental variable setting and return its |
| 126 | * value; otherwise return the default supplied |
| 127 | */ |
| 128 | char * |
| 129 | env_config(char *var, char *dflt) |
| 130 | { |
| 131 | static char *evar; |
| 132 | |
| 133 | if ((evar = getenv(var)) != NULL) |
| 134 | return (evar); |
| 135 | else |
| 136 | return (dflt); |
| 137 | } |
| 138 | |
| 139 | /* |
| 140 | * return the answer to a yes/no question as a boolean |
| 141 | */ |
| 142 | long |
| 143 | yes_no(char *prompt) |
| 144 | { |
| 145 | char reply[128]; |
| 146 | |
| 147 | #ifdef WIN32 |
| 148 | /* Disable warning about conditional expression is constant */ |
| 149 | #pragma warning(disable:4127) |
| 150 | #endif |
| 151 | |
| 152 | while (1) |
| 153 | { |
| 154 | #ifdef WIN32 |
| 155 | #pragma warning(default:4127) |
| 156 | #endif |
| 157 | printf("%s [Y/N]: " , prompt); |
| 158 | fgets(reply, 128, stdin); |
| 159 | switch (*reply) |
| 160 | { |
| 161 | case 'y': |
| 162 | case 'Y': |
| 163 | return (1); |
| 164 | case 'n': |
| 165 | case 'N': |
| 166 | return (0); |
| 167 | default: |
| 168 | printf("Please answer 'yes' or 'no'.\n" ); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | /* |
| 174 | * generate a random string with length randomly selected in [min, max] |
| 175 | * and using the characters in alphanum (currently includes a space |
| 176 | * and comma) |
| 177 | */ |
| 178 | void |
| 179 | a_rnd(int min, int max, int column, char *dest) |
| 180 | { |
| 181 | DSS_HUGE i, |
| 182 | len, |
| 183 | char_int; |
| 184 | |
| 185 | RANDOM(len, min, max, column); |
| 186 | for (i = 0; i < len; i++) |
| 187 | { |
| 188 | if (i % 5 == 0) |
| 189 | RANDOM(char_int, 0, MAX_LONG, column); |
| 190 | *(dest + i) = alpha_num[char_int & 077]; |
| 191 | char_int >>= 6; |
| 192 | } |
| 193 | *(dest + len) = '\0'; |
| 194 | return; |
| 195 | } |
| 196 | |
| 197 | /* |
| 198 | * embed a randomly selected member of distribution d in alpha-numeric |
| 199 | * noise of a length rendomly selected between min and max at a random |
| 200 | * position |
| 201 | */ |
| 202 | void |
| 203 | e_str(distribution *d, int min, int max, int stream, char *dest) |
| 204 | { |
| 205 | char strtmp[MAXAGG_LEN + 1]; |
| 206 | DSS_HUGE loc; |
| 207 | int len; |
| 208 | |
| 209 | a_rnd(min, max, stream, dest); |
| 210 | pick_str(d, stream, strtmp); |
| 211 | len = (int)strlen(strtmp); |
| 212 | RANDOM(loc, 0, ((int)strlen(dest) - 1 - len), stream); |
| 213 | strncpy(dest + loc, strtmp, len); |
| 214 | |
| 215 | return; |
| 216 | } |
| 217 | |
| 218 | |
| 219 | /* |
| 220 | * return the string associate with the LSB of a uniformly selected |
| 221 | * long in [1, max] where max is determined by the distribution |
| 222 | * being queried |
| 223 | */ |
| 224 | int |
| 225 | pick_str(distribution *s, int c, char *target) |
| 226 | { |
| 227 | long i = 0; |
| 228 | DSS_HUGE j; |
| 229 | |
| 230 | RANDOM(j, 1, s->list[s->count - 1].weight, c); |
| 231 | while (s->list[i].weight < j) |
| 232 | i++; |
| 233 | strcpy(target, s->list[i].text); |
| 234 | return(i); |
| 235 | } |
| 236 | |
| 237 | /* |
| 238 | * unjulian (long date) -- return(date - STARTDATE) |
| 239 | */ |
| 240 | long |
| 241 | unjulian(long date) |
| 242 | { |
| 243 | int i; |
| 244 | long res = 0; |
| 245 | |
| 246 | for (i = STARTDATE / 1000; i < date / 1000; i++) |
| 247 | res += 365 + LEAP(i); |
| 248 | res += date % 1000 - 1; |
| 249 | |
| 250 | return(res); |
| 251 | } |
| 252 | |
| 253 | long |
| 254 | julian(long date) |
| 255 | { |
| 256 | long offset; |
| 257 | long result; |
| 258 | long yr; |
| 259 | long yend; |
| 260 | |
| 261 | offset = date - STARTDATE; |
| 262 | result = STARTDATE; |
| 263 | |
| 264 | #ifdef WIN32 |
| 265 | /* Disable warning about conditional expression is constant */ |
| 266 | #pragma warning(disable:4127) |
| 267 | #endif |
| 268 | |
| 269 | while (1) |
| 270 | { |
| 271 | #ifdef WIN32 |
| 272 | #pragma warning(default:4127) |
| 273 | #endif |
| 274 | yr = result / 1000; |
| 275 | yend = yr * 1000 + 365 + LEAP(yr); |
| 276 | if (result + offset > yend) /* overflow into next year */ |
| 277 | { |
| 278 | offset -= yend - result + 1; |
| 279 | result += 1000; |
| 280 | continue; |
| 281 | } |
| 282 | else |
| 283 | break; |
| 284 | } |
| 285 | return (result + offset); |
| 286 | } |
| 287 | |
| 288 | /* |
| 289 | * load a distribution from a flat file into the target structure; |
| 290 | * should be rewritten to allow multiple dists in a file |
| 291 | */ |
| 292 | void |
| 293 | read_dist(char *path, char *name, distribution *target) |
| 294 | { |
| 295 | FILE *fp; |
| 296 | char line[256], |
| 297 | token[256], |
| 298 | *c; |
| 299 | long weight, |
| 300 | count = 0, |
| 301 | name_set = 0; |
| 302 | |
| 303 | if (d_path == NULL) |
| 304 | { |
| 305 | sprintf(line, "%s%c%s" , |
| 306 | env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path); |
| 307 | fp = fopen(line, "r" ); |
| 308 | OPEN_CHECK(fp, line); |
| 309 | } |
| 310 | else |
| 311 | { |
| 312 | fp = fopen(d_path, "r" ); |
| 313 | OPEN_CHECK(fp, d_path); |
| 314 | } |
| 315 | while (fgets(line, sizeof(line), fp) != NULL) |
| 316 | { |
| 317 | if ((c = strchr(line, '\n')) != NULL) |
| 318 | *c = '\0'; |
| 319 | if ((c = strchr(line, '#')) != NULL) |
| 320 | *c = '\0'; |
| 321 | if (*line == '\0') |
| 322 | continue; |
| 323 | |
| 324 | if (!name_set) |
| 325 | { |
| 326 | if (dsscasecmp(strtok(line, "\n\t " ), "BEGIN" )) |
| 327 | continue; |
| 328 | if (dsscasecmp(strtok(NULL, "\n\t " ), name)) |
| 329 | continue; |
| 330 | name_set = 1; |
| 331 | continue; |
| 332 | } |
| 333 | else |
| 334 | { |
| 335 | if (!dssncasecmp(line, "END" , 3)) |
| 336 | { |
| 337 | fclose(fp); |
| 338 | return; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | if (sscanf(line, "%[^|]|%ld" , token, &weight) != 2) |
| 343 | continue; |
| 344 | |
| 345 | if (!dsscasecmp(token, "count" )) |
| 346 | { |
| 347 | target->count = weight; |
| 348 | target->list = |
| 349 | (set_member *) |
| 350 | malloc((size_t)(weight * sizeof(set_member))); |
| 351 | MALLOC_CHECK(target->list); |
| 352 | target->max = 0; |
| 353 | continue; |
| 354 | } |
| 355 | target->list[count].text = |
| 356 | (char *) malloc((size_t)((int)strlen(token) + 1)); |
| 357 | MALLOC_CHECK(target->list[count].text); |
| 358 | strcpy(target->list[count].text, token); |
| 359 | target->max += weight; |
| 360 | target->list[count].weight = target->max; |
| 361 | |
| 362 | count += 1; |
| 363 | } /* while fgets() */ |
| 364 | |
| 365 | if (count != target->count) |
| 366 | { |
| 367 | fprintf(stderr, "Read error on dist '%s'\n" , name); |
| 368 | fclose(fp); |
| 369 | exit(1); |
| 370 | } |
| 371 | target->permute = (long *)NULL; |
| 372 | fclose(fp); |
| 373 | return; |
| 374 | } |
| 375 | |
| 376 | /* |
| 377 | * standard file open with life noise |
| 378 | */ |
| 379 | |
| 380 | FILE * |
| 381 | tbl_open(int tbl, char *mode) |
| 382 | { |
| 383 | char prompt[256]; |
| 384 | char fullpath[256]; |
| 385 | FILE *f; |
| 386 | struct stat fstats; |
| 387 | int retcode; |
| 388 | |
| 389 | |
| 390 | if (*tdefs[tbl].name == PATH_SEP) |
| 391 | strcpy(fullpath, tdefs[tbl].name); |
| 392 | else |
| 393 | sprintf(fullpath, "%s%c%s" , |
| 394 | env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name); |
| 395 | |
| 396 | retcode = stat(fullpath, &fstats); |
| 397 | if (retcode && (errno != ENOENT)) |
| 398 | { |
| 399 | fprintf(stderr, "stat(%s) failed.\n" , fullpath); |
| 400 | exit(-1); |
| 401 | } |
| 402 | if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' ) |
| 403 | { |
| 404 | sprintf(prompt, "Do you want to overwrite %s ?" , fullpath); |
| 405 | if (!yes_no(prompt)) |
| 406 | exit(0); |
| 407 | } |
| 408 | |
| 409 | if (S_ISFIFO(fstats.st_mode)) |
| 410 | { |
| 411 | retcode = |
| 412 | open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT); |
| 413 | f = fdopen(retcode, mode); |
| 414 | } |
| 415 | else |
| 416 | f = fopen(fullpath, mode); |
| 417 | OPEN_CHECK(f, fullpath); |
| 418 | |
| 419 | return (f); |
| 420 | } |
| 421 | |
| 422 | |
| 423 | /* |
| 424 | * agg_str(set, count) build an aggregated string from count unique |
| 425 | * selections taken from set |
| 426 | */ |
| 427 | void |
| 428 | agg_str(distribution *set, long count, long col, char *dest) |
| 429 | { |
| 430 | distribution *d; |
| 431 | int i; |
| 432 | |
| 433 | d = set; |
| 434 | *dest = '\0'; |
| 435 | |
| 436 | permute_dist(d, col); |
| 437 | for (i=0; i < count; i++) |
| 438 | { |
| 439 | strcat(dest, DIST_MEMBER(set,DIST_PERMUTE(d, i))); |
| 440 | strcat(dest, " " ); |
| 441 | } |
| 442 | *(dest + (int)strlen(dest) - 1) = '\0'; |
| 443 | |
| 444 | return; |
| 445 | } |
| 446 | |
| 447 | |
| 448 | long |
| 449 | dssncasecmp(char *s1, char *s2, int n) |
| 450 | { |
| 451 | for (; n > 0; ++s1, ++s2, --n) |
| 452 | if (tolower(*s1) != tolower(*s2)) |
| 453 | return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); |
| 454 | else if (*s1 == '\0') |
| 455 | return (0); |
| 456 | return (0); |
| 457 | } |
| 458 | |
| 459 | long |
| 460 | dsscasecmp(char *s1, char *s2) |
| 461 | { |
| 462 | for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) |
| 463 | if (*s1 == '\0') |
| 464 | return (0); |
| 465 | return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); |
| 466 | } |
| 467 | |
| 468 | #ifndef STDLIB_HAS_GETOPT |
| 469 | int optind = 0; |
| 470 | int opterr = 0; |
| 471 | char *optarg = NULL; |
| 472 | |
| 473 | int |
| 474 | getopt(int ac, char **av, char *opt) |
| 475 | { |
| 476 | static char *nextchar = NULL; |
| 477 | char *cp; |
| 478 | char hold; |
| 479 | |
| 480 | if (optarg == NULL) |
| 481 | { |
| 482 | optarg = (char *)malloc(BUFSIZ); |
| 483 | MALLOC_CHECK(optarg); |
| 484 | } |
| 485 | |
| 486 | if (!nextchar || *nextchar == '\0') |
| 487 | { |
| 488 | optind++; |
| 489 | if (optind == ac) |
| 490 | return(-1); |
| 491 | nextchar = av[optind]; |
| 492 | if (*nextchar != '-') |
| 493 | return(-1); |
| 494 | nextchar +=1; |
| 495 | } |
| 496 | |
| 497 | if (nextchar && *nextchar == '-') /* -- termination */ |
| 498 | { |
| 499 | optind++; |
| 500 | return(-1); |
| 501 | } |
| 502 | else /* found an option */ |
| 503 | { |
| 504 | cp = strchr(opt, *nextchar); |
| 505 | nextchar += 1; |
| 506 | if (cp == NULL) /* not defined for this run */ |
| 507 | return('?'); |
| 508 | if (*(cp + 1) == ':') /* option takes an argument */ |
| 509 | { |
| 510 | if (*nextchar) |
| 511 | { |
| 512 | hold = *cp; |
| 513 | cp = optarg; |
| 514 | while (*nextchar) |
| 515 | *cp++ = *nextchar++; |
| 516 | *cp = '\0'; |
| 517 | *cp = hold; |
| 518 | } |
| 519 | else /* white space separated, use next arg */ |
| 520 | { |
| 521 | if (++optind == ac) |
| 522 | return('?'); |
| 523 | strcpy(optarg, av[optind]); |
| 524 | } |
| 525 | nextchar = NULL; |
| 526 | } |
| 527 | return(*cp); |
| 528 | } |
| 529 | } |
| 530 | #endif /* STDLIB_HAS_GETOPT */ |
| 531 | |
| 532 | char ** |
| 533 | mk_ascdate(void) |
| 534 | { |
| 535 | char **m; |
| 536 | dss_time_t t; |
| 537 | DSS_HUGE i; |
| 538 | |
| 539 | m = (char**) malloc((size_t)(TOTDATE * sizeof (char *))); |
| 540 | MALLOC_CHECK(m); |
| 541 | for (i = 0; i < TOTDATE; i++) |
| 542 | { |
| 543 | mk_time(i + 1, &t); |
| 544 | m[i] = strdup(t.alpha); |
| 545 | } |
| 546 | |
| 547 | return(m); |
| 548 | } |
| 549 | |
| 550 | /* |
| 551 | * set_state() -- initialize the RNG so that |
| 552 | * appropriate data sets can be generated. |
| 553 | * For each table that is to be generated, calculate the number of rows/child, and send that to the |
| 554 | * seed generation routine in speed_seed.c. Note: assumes that tables are completely independent. |
| 555 | * Returns the number of rows to be generated by the named step. |
| 556 | */ |
| 557 | DSS_HUGE |
| 558 | set_state(int table, long sf, long procs, long step, DSS_HUGE *) |
| 559 | { |
| 560 | int i; |
| 561 | DSS_HUGE rowcount, remainder, result; |
| 562 | |
| 563 | if (sf == 0 || step == 0) |
| 564 | return(0); |
| 565 | |
| 566 | rowcount = tdefs[table].base; |
| 567 | rowcount *= sf; |
| 568 | *extra_rows = rowcount % procs; |
| 569 | rowcount /= procs; |
| 570 | result = rowcount; |
| 571 | for (i=0; i < step - 1; i++) |
| 572 | { |
| 573 | if (table == LINE) /* special case for shared seeds */ |
| 574 | tdefs[table].gen_seed(1, rowcount); |
| 575 | else |
| 576 | tdefs[table].gen_seed(0, rowcount); |
| 577 | /* need to set seeds of child in case there's a dependency */ |
| 578 | /* NOTE: this assumes that the parent and child have the same base row count */ |
| 579 | if (tdefs[table].child != NONE) |
| 580 | tdefs[tdefs[table].child].gen_seed(0,rowcount); |
| 581 | } |
| 582 | if (step > procs) /* moving to the end to generate updates */ |
| 583 | tdefs[table].gen_seed(0, *extra_rows); |
| 584 | |
| 585 | return(result); |
| 586 | } |
| 587 | |