1 | /* |
2 | * $Id: bm_utils.c,v 1.4 2006/04/12 18:00:55 jms Exp $ |
3 | * |
4 | * Revision History |
5 | * =================== |
6 | * $Log: bm_utils.c,v $ |
7 | * Revision 1.4 2006/04/12 18:00:55 jms |
8 | * add missing parameter to call to gen_seed |
9 | * |
10 | * Revision 1.3 2005/10/14 23:16:54 jms |
11 | * fix for answer set compliance |
12 | * |
13 | * Revision 1.2 2005/01/03 20:08:58 jms |
14 | * change line terminations |
15 | * |
16 | * Revision 1.1.1.1 2004/11/24 23:31:46 jms |
17 | * re-establish external server |
18 | * |
19 | * Revision 1.3 2004/02/18 14:05:53 jms |
20 | * porting changes for LINUX and 64 bit RNG |
21 | * |
22 | * Revision 1.2 2004/01/22 05:49:29 jms |
23 | * AIX porting (AIX 5.1) |
24 | * |
25 | * Revision 1.1.1.1 2003/08/08 21:35:26 jms |
26 | * recreation after CVS crash |
27 | * |
28 | * Revision 1.3 2003/08/08 21:35:26 jms |
29 | * first integration of rng64 for o_custkey and l_partkey |
30 | * |
31 | * Revision 1.2 2003/08/07 17:58:34 jms |
32 | * Convery RNG to 64bit space as preparation for new large scale RNG |
33 | * |
34 | * Revision 1.1.1.1 2003/04/03 18:54:21 jms |
35 | * initial checkin |
36 | * |
37 | * |
38 | */ |
39 | /* |
40 | * |
41 | * Various routines that handle distributions, value selections and |
42 | * seed value management for the DSS benchmark. Current functions: |
43 | * env_config -- set config vars with optional environment override |
44 | * yes_no -- ask simple yes/no question and return boolean result |
45 | * a_rnd(min, max) -- random alphanumeric within length range |
46 | * pick_str(size, set) -- select a string from the set of size |
47 | * read_dist(file, name, distribution *) -- read named dist from file |
48 | * tbl_open(path, mode) -- std fopen with lifenoise |
49 | * julian(date) -- julian date correction |
50 | * rowcnt(tbl) -- proper scaling of given table |
51 | * e_str(set, min, max) -- build an embedded str |
52 | * agg_str() -- build a string from the named set |
53 | * dsscasecmp() -- version of strcasecmp() |
54 | * dssncasecmp() -- version of strncasecmp() |
55 | * getopt() |
56 | * set_state() -- initialize the RNG |
57 | */ |
58 | |
59 | #include "config.h" |
60 | #include "dss.h" |
61 | #include <stdio.h> |
62 | #include <time.h> |
63 | #include <errno.h> |
64 | #include <string.h> |
65 | #ifdef HP |
66 | #include <strings.h> |
67 | #endif /* HP */ |
68 | #include <ctype.h> |
69 | #include <math.h> |
70 | #ifndef _POSIX_SOURCE |
71 | #include <malloc.h> |
72 | #endif /* POSIX_SOURCE */ |
73 | #include <fcntl.h> |
74 | #include <sys/types.h> |
75 | #include <sys/stat.h> |
76 | /* Lines added by Chuck McDevitt for WIN32 support */ |
77 | #ifdef WIN32 |
78 | #ifndef _POSIX_ |
79 | #include <io.h> |
80 | #ifndef S_ISREG |
81 | #define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG ) |
82 | #define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO ) |
83 | #endif |
84 | #endif |
85 | #ifndef stat |
86 | #define stat _stat |
87 | #endif |
88 | #ifndef fdopen |
89 | #define fdopen _fdopen |
90 | #endif |
91 | #ifndef open |
92 | #define open _open |
93 | #endif |
94 | #ifndef O_RDONLY |
95 | #define O_RDONLY _O_RDONLY |
96 | #endif |
97 | #ifndef O_WRONLY |
98 | #define O_WRONLY _O_WRONLY |
99 | #endif |
100 | #ifndef O_CREAT |
101 | #define O_CREAT _O_CREAT |
102 | #endif |
103 | #endif |
104 | /* End of lines added by Chuck McDevitt for WIN32 support */ |
105 | #include "dsstypes.h" |
106 | |
107 | |
108 | static char alpha_num[65] = |
109 | "0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ," ; |
110 | |
111 | #if defined(__STDC__) || defined(__cplusplus) |
112 | #define PROTO(s) s |
113 | #else |
114 | #define PROTO(s) () |
115 | #endif |
116 | |
117 | #ifndef WIN32 |
118 | char *getenv PROTO((const char *name)); |
119 | #endif |
120 | void usage(); |
121 | long *permute_dist(distribution *d, long stream); |
122 | extern seed_t Seed[]; |
123 | |
124 | /* |
125 | * env_config: look for a environmental variable setting and return its |
126 | * value; otherwise return the default supplied |
127 | */ |
128 | char * |
129 | env_config(char *var, char *dflt) |
130 | { |
131 | static char *evar; |
132 | |
133 | if ((evar = getenv(var)) != NULL) |
134 | return (evar); |
135 | else |
136 | return (dflt); |
137 | } |
138 | |
139 | /* |
140 | * return the answer to a yes/no question as a boolean |
141 | */ |
142 | long |
143 | yes_no(char *prompt) |
144 | { |
145 | char reply[128]; |
146 | |
147 | #ifdef WIN32 |
148 | /* Disable warning about conditional expression is constant */ |
149 | #pragma warning(disable:4127) |
150 | #endif |
151 | |
152 | while (1) |
153 | { |
154 | #ifdef WIN32 |
155 | #pragma warning(default:4127) |
156 | #endif |
157 | printf("%s [Y/N]: " , prompt); |
158 | fgets(reply, 128, stdin); |
159 | switch (*reply) |
160 | { |
161 | case 'y': |
162 | case 'Y': |
163 | return (1); |
164 | case 'n': |
165 | case 'N': |
166 | return (0); |
167 | default: |
168 | printf("Please answer 'yes' or 'no'.\n" ); |
169 | } |
170 | } |
171 | } |
172 | |
173 | /* |
174 | * generate a random string with length randomly selected in [min, max] |
175 | * and using the characters in alphanum (currently includes a space |
176 | * and comma) |
177 | */ |
178 | void |
179 | a_rnd(int min, int max, int column, char *dest) |
180 | { |
181 | DSS_HUGE i, |
182 | len, |
183 | char_int; |
184 | |
185 | RANDOM(len, min, max, column); |
186 | for (i = 0; i < len; i++) |
187 | { |
188 | if (i % 5 == 0) |
189 | RANDOM(char_int, 0, MAX_LONG, column); |
190 | *(dest + i) = alpha_num[char_int & 077]; |
191 | char_int >>= 6; |
192 | } |
193 | *(dest + len) = '\0'; |
194 | return; |
195 | } |
196 | |
197 | /* |
198 | * embed a randomly selected member of distribution d in alpha-numeric |
199 | * noise of a length rendomly selected between min and max at a random |
200 | * position |
201 | */ |
202 | void |
203 | e_str(distribution *d, int min, int max, int stream, char *dest) |
204 | { |
205 | char strtmp[MAXAGG_LEN + 1]; |
206 | DSS_HUGE loc; |
207 | int len; |
208 | |
209 | a_rnd(min, max, stream, dest); |
210 | pick_str(d, stream, strtmp); |
211 | len = (int)strlen(strtmp); |
212 | RANDOM(loc, 0, ((int)strlen(dest) - 1 - len), stream); |
213 | strncpy(dest + loc, strtmp, len); |
214 | |
215 | return; |
216 | } |
217 | |
218 | |
219 | /* |
220 | * return the string associate with the LSB of a uniformly selected |
221 | * long in [1, max] where max is determined by the distribution |
222 | * being queried |
223 | */ |
224 | int |
225 | pick_str(distribution *s, int c, char *target) |
226 | { |
227 | long i = 0; |
228 | DSS_HUGE j; |
229 | |
230 | RANDOM(j, 1, s->list[s->count - 1].weight, c); |
231 | while (s->list[i].weight < j) |
232 | i++; |
233 | strcpy(target, s->list[i].text); |
234 | return(i); |
235 | } |
236 | |
237 | /* |
238 | * unjulian (long date) -- return(date - STARTDATE) |
239 | */ |
240 | long |
241 | unjulian(long date) |
242 | { |
243 | int i; |
244 | long res = 0; |
245 | |
246 | for (i = STARTDATE / 1000; i < date / 1000; i++) |
247 | res += 365 + LEAP(i); |
248 | res += date % 1000 - 1; |
249 | |
250 | return(res); |
251 | } |
252 | |
253 | long |
254 | julian(long date) |
255 | { |
256 | long offset; |
257 | long result; |
258 | long yr; |
259 | long yend; |
260 | |
261 | offset = date - STARTDATE; |
262 | result = STARTDATE; |
263 | |
264 | #ifdef WIN32 |
265 | /* Disable warning about conditional expression is constant */ |
266 | #pragma warning(disable:4127) |
267 | #endif |
268 | |
269 | while (1) |
270 | { |
271 | #ifdef WIN32 |
272 | #pragma warning(default:4127) |
273 | #endif |
274 | yr = result / 1000; |
275 | yend = yr * 1000 + 365 + LEAP(yr); |
276 | if (result + offset > yend) /* overflow into next year */ |
277 | { |
278 | offset -= yend - result + 1; |
279 | result += 1000; |
280 | continue; |
281 | } |
282 | else |
283 | break; |
284 | } |
285 | return (result + offset); |
286 | } |
287 | |
288 | /* |
289 | * load a distribution from a flat file into the target structure; |
290 | * should be rewritten to allow multiple dists in a file |
291 | */ |
292 | void |
293 | read_dist(char *path, char *name, distribution *target) |
294 | { |
295 | FILE *fp; |
296 | char line[256], |
297 | token[256], |
298 | *c; |
299 | long weight, |
300 | count = 0, |
301 | name_set = 0; |
302 | |
303 | if (d_path == NULL) |
304 | { |
305 | sprintf(line, "%s%c%s" , |
306 | env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path); |
307 | fp = fopen(line, "r" ); |
308 | OPEN_CHECK(fp, line); |
309 | } |
310 | else |
311 | { |
312 | fp = fopen(d_path, "r" ); |
313 | OPEN_CHECK(fp, d_path); |
314 | } |
315 | while (fgets(line, sizeof(line), fp) != NULL) |
316 | { |
317 | if ((c = strchr(line, '\n')) != NULL) |
318 | *c = '\0'; |
319 | if ((c = strchr(line, '#')) != NULL) |
320 | *c = '\0'; |
321 | if (*line == '\0') |
322 | continue; |
323 | |
324 | if (!name_set) |
325 | { |
326 | if (dsscasecmp(strtok(line, "\n\t " ), "BEGIN" )) |
327 | continue; |
328 | if (dsscasecmp(strtok(NULL, "\n\t " ), name)) |
329 | continue; |
330 | name_set = 1; |
331 | continue; |
332 | } |
333 | else |
334 | { |
335 | if (!dssncasecmp(line, "END" , 3)) |
336 | { |
337 | fclose(fp); |
338 | return; |
339 | } |
340 | } |
341 | |
342 | if (sscanf(line, "%[^|]|%ld" , token, &weight) != 2) |
343 | continue; |
344 | |
345 | if (!dsscasecmp(token, "count" )) |
346 | { |
347 | target->count = weight; |
348 | target->list = |
349 | (set_member *) |
350 | malloc((size_t)(weight * sizeof(set_member))); |
351 | MALLOC_CHECK(target->list); |
352 | target->max = 0; |
353 | continue; |
354 | } |
355 | target->list[count].text = |
356 | (char *) malloc((size_t)((int)strlen(token) + 1)); |
357 | MALLOC_CHECK(target->list[count].text); |
358 | strcpy(target->list[count].text, token); |
359 | target->max += weight; |
360 | target->list[count].weight = target->max; |
361 | |
362 | count += 1; |
363 | } /* while fgets() */ |
364 | |
365 | if (count != target->count) |
366 | { |
367 | fprintf(stderr, "Read error on dist '%s'\n" , name); |
368 | fclose(fp); |
369 | exit(1); |
370 | } |
371 | target->permute = (long *)NULL; |
372 | fclose(fp); |
373 | return; |
374 | } |
375 | |
376 | /* |
377 | * standard file open with life noise |
378 | */ |
379 | |
380 | FILE * |
381 | tbl_open(int tbl, char *mode) |
382 | { |
383 | char prompt[256]; |
384 | char fullpath[256]; |
385 | FILE *f; |
386 | struct stat fstats; |
387 | int retcode; |
388 | |
389 | |
390 | if (*tdefs[tbl].name == PATH_SEP) |
391 | strcpy(fullpath, tdefs[tbl].name); |
392 | else |
393 | sprintf(fullpath, "%s%c%s" , |
394 | env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name); |
395 | |
396 | retcode = stat(fullpath, &fstats); |
397 | if (retcode && (errno != ENOENT)) |
398 | { |
399 | fprintf(stderr, "stat(%s) failed.\n" , fullpath); |
400 | exit(-1); |
401 | } |
402 | if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' ) |
403 | { |
404 | sprintf(prompt, "Do you want to overwrite %s ?" , fullpath); |
405 | if (!yes_no(prompt)) |
406 | exit(0); |
407 | } |
408 | |
409 | if (S_ISFIFO(fstats.st_mode)) |
410 | { |
411 | retcode = |
412 | open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT); |
413 | f = fdopen(retcode, mode); |
414 | } |
415 | else |
416 | f = fopen(fullpath, mode); |
417 | OPEN_CHECK(f, fullpath); |
418 | |
419 | return (f); |
420 | } |
421 | |
422 | |
423 | /* |
424 | * agg_str(set, count) build an aggregated string from count unique |
425 | * selections taken from set |
426 | */ |
427 | void |
428 | agg_str(distribution *set, long count, long col, char *dest) |
429 | { |
430 | distribution *d; |
431 | int i; |
432 | |
433 | d = set; |
434 | *dest = '\0'; |
435 | |
436 | permute_dist(d, col); |
437 | for (i=0; i < count; i++) |
438 | { |
439 | strcat(dest, DIST_MEMBER(set,DIST_PERMUTE(d, i))); |
440 | strcat(dest, " " ); |
441 | } |
442 | *(dest + (int)strlen(dest) - 1) = '\0'; |
443 | |
444 | return; |
445 | } |
446 | |
447 | |
448 | long |
449 | dssncasecmp(char *s1, char *s2, int n) |
450 | { |
451 | for (; n > 0; ++s1, ++s2, --n) |
452 | if (tolower(*s1) != tolower(*s2)) |
453 | return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); |
454 | else if (*s1 == '\0') |
455 | return (0); |
456 | return (0); |
457 | } |
458 | |
459 | long |
460 | dsscasecmp(char *s1, char *s2) |
461 | { |
462 | for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) |
463 | if (*s1 == '\0') |
464 | return (0); |
465 | return ((tolower(*s1) < tolower(*s2)) ? -1 : 1); |
466 | } |
467 | |
468 | #ifndef STDLIB_HAS_GETOPT |
469 | int optind = 0; |
470 | int opterr = 0; |
471 | char *optarg = NULL; |
472 | |
473 | int |
474 | getopt(int ac, char **av, char *opt) |
475 | { |
476 | static char *nextchar = NULL; |
477 | char *cp; |
478 | char hold; |
479 | |
480 | if (optarg == NULL) |
481 | { |
482 | optarg = (char *)malloc(BUFSIZ); |
483 | MALLOC_CHECK(optarg); |
484 | } |
485 | |
486 | if (!nextchar || *nextchar == '\0') |
487 | { |
488 | optind++; |
489 | if (optind == ac) |
490 | return(-1); |
491 | nextchar = av[optind]; |
492 | if (*nextchar != '-') |
493 | return(-1); |
494 | nextchar +=1; |
495 | } |
496 | |
497 | if (nextchar && *nextchar == '-') /* -- termination */ |
498 | { |
499 | optind++; |
500 | return(-1); |
501 | } |
502 | else /* found an option */ |
503 | { |
504 | cp = strchr(opt, *nextchar); |
505 | nextchar += 1; |
506 | if (cp == NULL) /* not defined for this run */ |
507 | return('?'); |
508 | if (*(cp + 1) == ':') /* option takes an argument */ |
509 | { |
510 | if (*nextchar) |
511 | { |
512 | hold = *cp; |
513 | cp = optarg; |
514 | while (*nextchar) |
515 | *cp++ = *nextchar++; |
516 | *cp = '\0'; |
517 | *cp = hold; |
518 | } |
519 | else /* white space separated, use next arg */ |
520 | { |
521 | if (++optind == ac) |
522 | return('?'); |
523 | strcpy(optarg, av[optind]); |
524 | } |
525 | nextchar = NULL; |
526 | } |
527 | return(*cp); |
528 | } |
529 | } |
530 | #endif /* STDLIB_HAS_GETOPT */ |
531 | |
532 | char ** |
533 | mk_ascdate(void) |
534 | { |
535 | char **m; |
536 | dss_time_t t; |
537 | DSS_HUGE i; |
538 | |
539 | m = (char**) malloc((size_t)(TOTDATE * sizeof (char *))); |
540 | MALLOC_CHECK(m); |
541 | for (i = 0; i < TOTDATE; i++) |
542 | { |
543 | mk_time(i + 1, &t); |
544 | m[i] = strdup(t.alpha); |
545 | } |
546 | |
547 | return(m); |
548 | } |
549 | |
550 | /* |
551 | * set_state() -- initialize the RNG so that |
552 | * appropriate data sets can be generated. |
553 | * For each table that is to be generated, calculate the number of rows/child, and send that to the |
554 | * seed generation routine in speed_seed.c. Note: assumes that tables are completely independent. |
555 | * Returns the number of rows to be generated by the named step. |
556 | */ |
557 | DSS_HUGE |
558 | set_state(int table, long sf, long procs, long step, DSS_HUGE *) |
559 | { |
560 | int i; |
561 | DSS_HUGE rowcount, remainder, result; |
562 | |
563 | if (sf == 0 || step == 0) |
564 | return(0); |
565 | |
566 | rowcount = tdefs[table].base; |
567 | rowcount *= sf; |
568 | *extra_rows = rowcount % procs; |
569 | rowcount /= procs; |
570 | result = rowcount; |
571 | for (i=0; i < step - 1; i++) |
572 | { |
573 | if (table == LINE) /* special case for shared seeds */ |
574 | tdefs[table].gen_seed(1, rowcount); |
575 | else |
576 | tdefs[table].gen_seed(0, rowcount); |
577 | /* need to set seeds of child in case there's a dependency */ |
578 | /* NOTE: this assumes that the parent and child have the same base row count */ |
579 | if (tdefs[table].child != NONE) |
580 | tdefs[tdefs[table].child].gen_seed(0,rowcount); |
581 | } |
582 | if (step > procs) /* moving to the end to generate updates */ |
583 | tdefs[table].gen_seed(0, *extra_rows); |
584 | |
585 | return(result); |
586 | } |
587 | |