1/*
2* $Id: bm_utils.c,v 1.4 2006/04/12 18:00:55 jms Exp $
3*
4* Revision History
5* ===================
6* $Log: bm_utils.c,v $
7* Revision 1.4 2006/04/12 18:00:55 jms
8* add missing parameter to call to gen_seed
9*
10* Revision 1.3 2005/10/14 23:16:54 jms
11* fix for answer set compliance
12*
13* Revision 1.2 2005/01/03 20:08:58 jms
14* change line terminations
15*
16* Revision 1.1.1.1 2004/11/24 23:31:46 jms
17* re-establish external server
18*
19* Revision 1.3 2004/02/18 14:05:53 jms
20* porting changes for LINUX and 64 bit RNG
21*
22* Revision 1.2 2004/01/22 05:49:29 jms
23* AIX porting (AIX 5.1)
24*
25* Revision 1.1.1.1 2003/08/08 21:35:26 jms
26* recreation after CVS crash
27*
28* Revision 1.3 2003/08/08 21:35:26 jms
29* first integration of rng64 for o_custkey and l_partkey
30*
31* Revision 1.2 2003/08/07 17:58:34 jms
32* Convery RNG to 64bit space as preparation for new large scale RNG
33*
34* Revision 1.1.1.1 2003/04/03 18:54:21 jms
35* initial checkin
36*
37*
38*/
39 /*
40 *
41 * Various routines that handle distributions, value selections and
42 * seed value management for the DSS benchmark. Current functions:
43 * env_config -- set config vars with optional environment override
44 * yes_no -- ask simple yes/no question and return boolean result
45 * a_rnd(min, max) -- random alphanumeric within length range
46 * pick_str(size, set) -- select a string from the set of size
47 * read_dist(file, name, distribution *) -- read named dist from file
48 * tbl_open(path, mode) -- std fopen with lifenoise
49 * julian(date) -- julian date correction
50 * rowcnt(tbl) -- proper scaling of given table
51 * e_str(set, min, max) -- build an embedded str
52 * agg_str() -- build a string from the named set
53 * dsscasecmp() -- version of strcasecmp()
54 * dssncasecmp() -- version of strncasecmp()
55 * getopt()
56 * set_state() -- initialize the RNG
57 */
58
59#include "config.h"
60#include "dss.h"
61#include <stdio.h>
62#include <time.h>
63#include <errno.h>
64#include <string.h>
65#ifdef HP
66#include <strings.h>
67#endif /* HP */
68#include <ctype.h>
69#include <math.h>
70#ifndef _POSIX_SOURCE
71#include <malloc.h>
72#endif /* POSIX_SOURCE */
73#include <fcntl.h>
74#include <sys/types.h>
75#include <sys/stat.h>
76/* Lines added by Chuck McDevitt for WIN32 support */
77#ifdef WIN32
78#ifndef _POSIX_
79#include <io.h>
80#ifndef S_ISREG
81#define S_ISREG(m) ( ((m) & _S_IFMT) == _S_IFREG )
82#define S_ISFIFO(m) ( ((m) & _S_IFMT) == _S_IFIFO )
83#endif
84#endif
85#ifndef stat
86#define stat _stat
87#endif
88#ifndef fdopen
89#define fdopen _fdopen
90#endif
91#ifndef open
92#define open _open
93#endif
94#ifndef O_RDONLY
95#define O_RDONLY _O_RDONLY
96#endif
97#ifndef O_WRONLY
98#define O_WRONLY _O_WRONLY
99#endif
100#ifndef O_CREAT
101#define O_CREAT _O_CREAT
102#endif
103#endif
104/* End of lines added by Chuck McDevitt for WIN32 support */
105#include "dsstypes.h"
106
107
108static char alpha_num[65] =
109"0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,";
110
111#if defined(__STDC__) || defined(__cplusplus)
112#define PROTO(s) s
113#else
114#define PROTO(s) ()
115#endif
116
117#ifndef WIN32
118char *getenv PROTO((const char *name));
119#endif
120void usage();
121long *permute_dist(distribution *d, long stream);
122extern seed_t Seed[];
123
124/*
125 * env_config: look for a environmental variable setting and return its
126 * value; otherwise return the default supplied
127 */
128char *
129env_config(char *var, char *dflt)
130{
131 static char *evar;
132
133 if ((evar = getenv(var)) != NULL)
134 return (evar);
135 else
136 return (dflt);
137}
138
139/*
140 * return the answer to a yes/no question as a boolean
141 */
142long
143yes_no(char *prompt)
144{
145 char reply[128];
146
147#ifdef WIN32
148/* Disable warning about conditional expression is constant */
149#pragma warning(disable:4127)
150#endif
151
152 while (1)
153 {
154#ifdef WIN32
155#pragma warning(default:4127)
156#endif
157 printf("%s [Y/N]: ", prompt);
158 fgets(reply, 128, stdin);
159 switch (*reply)
160 {
161 case 'y':
162 case 'Y':
163 return (1);
164 case 'n':
165 case 'N':
166 return (0);
167 default:
168 printf("Please answer 'yes' or 'no'.\n");
169 }
170 }
171}
172
173/*
174 * generate a random string with length randomly selected in [min, max]
175 * and using the characters in alphanum (currently includes a space
176 * and comma)
177 */
178void
179a_rnd(int min, int max, int column, char *dest)
180{
181 DSS_HUGE i,
182 len,
183 char_int;
184
185 RANDOM(len, min, max, column);
186 for (i = 0; i < len; i++)
187 {
188 if (i % 5 == 0)
189 RANDOM(char_int, 0, MAX_LONG, column);
190 *(dest + i) = alpha_num[char_int & 077];
191 char_int >>= 6;
192 }
193 *(dest + len) = '\0';
194 return;
195}
196
197/*
198 * embed a randomly selected member of distribution d in alpha-numeric
199 * noise of a length rendomly selected between min and max at a random
200 * position
201 */
202void
203e_str(distribution *d, int min, int max, int stream, char *dest)
204{
205 char strtmp[MAXAGG_LEN + 1];
206 DSS_HUGE loc;
207 int len;
208
209 a_rnd(min, max, stream, dest);
210 pick_str(d, stream, strtmp);
211 len = (int)strlen(strtmp);
212 RANDOM(loc, 0, ((int)strlen(dest) - 1 - len), stream);
213 strncpy(dest + loc, strtmp, len);
214
215 return;
216}
217
218
219/*
220 * return the string associate with the LSB of a uniformly selected
221 * long in [1, max] where max is determined by the distribution
222 * being queried
223 */
224int
225pick_str(distribution *s, int c, char *target)
226{
227 long i = 0;
228 DSS_HUGE j;
229
230 RANDOM(j, 1, s->list[s->count - 1].weight, c);
231 while (s->list[i].weight < j)
232 i++;
233 strcpy(target, s->list[i].text);
234 return(i);
235}
236
237/*
238 * unjulian (long date) -- return(date - STARTDATE)
239 */
240long
241unjulian(long date)
242{
243 int i;
244 long res = 0;
245
246 for (i = STARTDATE / 1000; i < date / 1000; i++)
247 res += 365 + LEAP(i);
248 res += date % 1000 - 1;
249
250 return(res);
251}
252
253long
254julian(long date)
255{
256 long offset;
257 long result;
258 long yr;
259 long yend;
260
261 offset = date - STARTDATE;
262 result = STARTDATE;
263
264#ifdef WIN32
265/* Disable warning about conditional expression is constant */
266#pragma warning(disable:4127)
267#endif
268
269 while (1)
270 {
271#ifdef WIN32
272#pragma warning(default:4127)
273#endif
274 yr = result / 1000;
275 yend = yr * 1000 + 365 + LEAP(yr);
276 if (result + offset > yend) /* overflow into next year */
277 {
278 offset -= yend - result + 1;
279 result += 1000;
280 continue;
281 }
282 else
283 break;
284 }
285 return (result + offset);
286}
287
288/*
289* load a distribution from a flat file into the target structure;
290* should be rewritten to allow multiple dists in a file
291*/
292void
293read_dist(char *path, char *name, distribution *target)
294{
295FILE *fp;
296char line[256],
297 token[256],
298 *c;
299long weight,
300 count = 0,
301 name_set = 0;
302
303 if (d_path == NULL)
304 {
305 sprintf(line, "%s%c%s",
306 env_config(CONFIG_TAG, CONFIG_DFLT), PATH_SEP, path);
307 fp = fopen(line, "r");
308 OPEN_CHECK(fp, line);
309 }
310 else
311 {
312 fp = fopen(d_path, "r");
313 OPEN_CHECK(fp, d_path);
314 }
315 while (fgets(line, sizeof(line), fp) != NULL)
316 {
317 if ((c = strchr(line, '\n')) != NULL)
318 *c = '\0';
319 if ((c = strchr(line, '#')) != NULL)
320 *c = '\0';
321 if (*line == '\0')
322 continue;
323
324 if (!name_set)
325 {
326 if (dsscasecmp(strtok(line, "\n\t "), "BEGIN"))
327 continue;
328 if (dsscasecmp(strtok(NULL, "\n\t "), name))
329 continue;
330 name_set = 1;
331 continue;
332 }
333 else
334 {
335 if (!dssncasecmp(line, "END", 3))
336 {
337 fclose(fp);
338 return;
339 }
340 }
341
342 if (sscanf(line, "%[^|]|%ld", token, &weight) != 2)
343 continue;
344
345 if (!dsscasecmp(token, "count"))
346 {
347 target->count = weight;
348 target->list =
349 (set_member *)
350 malloc((size_t)(weight * sizeof(set_member)));
351 MALLOC_CHECK(target->list);
352 target->max = 0;
353 continue;
354 }
355 target->list[count].text =
356 (char *) malloc((size_t)((int)strlen(token) + 1));
357 MALLOC_CHECK(target->list[count].text);
358 strcpy(target->list[count].text, token);
359 target->max += weight;
360 target->list[count].weight = target->max;
361
362 count += 1;
363 } /* while fgets() */
364
365 if (count != target->count)
366 {
367 fprintf(stderr, "Read error on dist '%s'\n", name);
368 fclose(fp);
369 exit(1);
370 }
371 target->permute = (long *)NULL;
372 fclose(fp);
373 return;
374}
375
376/*
377 * standard file open with life noise
378 */
379
380FILE *
381tbl_open(int tbl, char *mode)
382{
383 char prompt[256];
384 char fullpath[256];
385 FILE *f;
386 struct stat fstats;
387 int retcode;
388
389
390 if (*tdefs[tbl].name == PATH_SEP)
391 strcpy(fullpath, tdefs[tbl].name);
392 else
393 sprintf(fullpath, "%s%c%s",
394 env_config(PATH_TAG, PATH_DFLT), PATH_SEP, tdefs[tbl].name);
395
396 retcode = stat(fullpath, &fstats);
397 if (retcode && (errno != ENOENT))
398 {
399 fprintf(stderr, "stat(%s) failed.\n", fullpath);
400 exit(-1);
401 }
402 if (S_ISREG(fstats.st_mode) && !force && *mode != 'r' )
403 {
404 sprintf(prompt, "Do you want to overwrite %s ?", fullpath);
405 if (!yes_no(prompt))
406 exit(0);
407 }
408
409 if (S_ISFIFO(fstats.st_mode))
410 {
411 retcode =
412 open(fullpath, ((*mode == 'r')?O_RDONLY:O_WRONLY)|O_CREAT);
413 f = fdopen(retcode, mode);
414 }
415 else
416 f = fopen(fullpath, mode);
417 OPEN_CHECK(f, fullpath);
418
419 return (f);
420}
421
422
423/*
424 * agg_str(set, count) build an aggregated string from count unique
425 * selections taken from set
426 */
427void
428agg_str(distribution *set, long count, long col, char *dest)
429{
430 distribution *d;
431 int i;
432
433 d = set;
434 *dest = '\0';
435
436 permute_dist(d, col);
437 for (i=0; i < count; i++)
438 {
439 strcat(dest, DIST_MEMBER(set,DIST_PERMUTE(d, i)));
440 strcat(dest, " ");
441 }
442 *(dest + (int)strlen(dest) - 1) = '\0';
443
444 return;
445}
446
447
448long
449dssncasecmp(char *s1, char *s2, int n)
450{
451 for (; n > 0; ++s1, ++s2, --n)
452 if (tolower(*s1) != tolower(*s2))
453 return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
454 else if (*s1 == '\0')
455 return (0);
456 return (0);
457}
458
459long
460dsscasecmp(char *s1, char *s2)
461{
462 for (; tolower(*s1) == tolower(*s2); ++s1, ++s2)
463 if (*s1 == '\0')
464 return (0);
465 return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
466}
467
468#ifndef STDLIB_HAS_GETOPT
469int optind = 0;
470int opterr = 0;
471char *optarg = NULL;
472
473int
474getopt(int ac, char **av, char *opt)
475{
476 static char *nextchar = NULL;
477 char *cp;
478 char hold;
479
480 if (optarg == NULL)
481 {
482 optarg = (char *)malloc(BUFSIZ);
483 MALLOC_CHECK(optarg);
484 }
485
486 if (!nextchar || *nextchar == '\0')
487 {
488 optind++;
489 if (optind == ac)
490 return(-1);
491 nextchar = av[optind];
492 if (*nextchar != '-')
493 return(-1);
494 nextchar +=1;
495 }
496
497 if (nextchar && *nextchar == '-') /* -- termination */
498 {
499 optind++;
500 return(-1);
501 }
502 else /* found an option */
503 {
504 cp = strchr(opt, *nextchar);
505 nextchar += 1;
506 if (cp == NULL) /* not defined for this run */
507 return('?');
508 if (*(cp + 1) == ':') /* option takes an argument */
509 {
510 if (*nextchar)
511 {
512 hold = *cp;
513 cp = optarg;
514 while (*nextchar)
515 *cp++ = *nextchar++;
516 *cp = '\0';
517 *cp = hold;
518 }
519 else /* white space separated, use next arg */
520 {
521 if (++optind == ac)
522 return('?');
523 strcpy(optarg, av[optind]);
524 }
525 nextchar = NULL;
526 }
527 return(*cp);
528 }
529}
530#endif /* STDLIB_HAS_GETOPT */
531
532char **
533mk_ascdate(void)
534{
535 char **m;
536 dss_time_t t;
537 DSS_HUGE i;
538
539 m = (char**) malloc((size_t)(TOTDATE * sizeof (char *)));
540 MALLOC_CHECK(m);
541 for (i = 0; i < TOTDATE; i++)
542 {
543 mk_time(i + 1, &t);
544 m[i] = strdup(t.alpha);
545 }
546
547 return(m);
548}
549
550/*
551 * set_state() -- initialize the RNG so that
552 * appropriate data sets can be generated.
553 * For each table that is to be generated, calculate the number of rows/child, and send that to the
554 * seed generation routine in speed_seed.c. Note: assumes that tables are completely independent.
555 * Returns the number of rows to be generated by the named step.
556 */
557DSS_HUGE
558set_state(int table, long sf, long procs, long step, DSS_HUGE *extra_rows)
559{
560 int i;
561 DSS_HUGE rowcount, remainder, result;
562
563 if (sf == 0 || step == 0)
564 return(0);
565
566 rowcount = tdefs[table].base;
567 rowcount *= sf;
568 *extra_rows = rowcount % procs;
569 rowcount /= procs;
570 result = rowcount;
571 for (i=0; i < step - 1; i++)
572 {
573 if (table == LINE) /* special case for shared seeds */
574 tdefs[table].gen_seed(1, rowcount);
575 else
576 tdefs[table].gen_seed(0, rowcount);
577 /* need to set seeds of child in case there's a dependency */
578 /* NOTE: this assumes that the parent and child have the same base row count */
579 if (tdefs[table].child != NONE)
580 tdefs[tdefs[table].child].gen_seed(0,rowcount);
581 }
582 if (step > procs) /* moving to the end to generate updates */
583 tdefs[table].gen_seed(0, *extra_rows);
584
585 return(result);
586}
587