1/*
2 *
3 * Various routines that handle distributions, value selections and
4 * seed value management for the DSS benchmark. Current functions:
5 * env_config -- set config vars with optional environment override
6 * yes_no -- ask simple yes/no question and return boolean result
7 * a_rnd(min, max) -- random alphanumeric within length range
8 * pick_str(size, set) -- select a string from the set of size
9 * read_dist(file, name, distribution *) -- read named dist from file
10 * tbl_open(path, mode) -- std fopen with lifenoise
11 * julian(date) -- julian date correction
12 * rowcnt(tbl) -- proper scaling of given table
13 * e_str(set, min, max) -- build an embedded str
14 * agg_str() -- build a string from the named set
15 * dsscasecmp() -- version of strcasecmp()
16 * dssncasecmp() -- version of strncasecmp()
17 * getopt()
18 * set_state() -- initialize the RNG
19 */
20
21#include "config.h"
22#include "dss.h"
23
24#include <errno.h>
25#include <stdio.h>
26#include <string.h>
27#include <time.h>
28#ifdef HP
29#include <strings.h>
30#endif /* HP */
31#include <ctype.h>
32#include <math.h>
33#ifndef _POSIX_SOURCE
34//#include <malloc.h>
35#endif /* POSIX_SOURCE */
36#include <fcntl.h>
37#include <sys/stat.h>
38#include <sys/types.h>
39/* Lines added by Chuck McDevitt for WIN32 support */
40#ifdef WIN32
41#ifndef _POSIX_
42#include <io.h>
43#ifndef S_ISREG
44#define S_ISREG(m) (((m)&_S_IFMT) == _S_IFREG)
45#define S_ISFIFO(m) (((m)&_S_IFMT) == _S_IFIFO)
46#endif
47#endif
48#ifndef stat
49#define stat _stat
50#endif
51#ifndef fdopen
52#define fdopen _fdopen
53#endif
54#ifndef open
55#define open _open
56#endif
57#ifndef O_RDONLY
58#define O_RDONLY _O_RDONLY
59#endif
60#ifndef O_WRONLY
61#define O_WRONLY _O_WRONLY
62#endif
63#ifndef O_CREAT
64#define O_CREAT _O_CREAT
65#endif
66#endif
67/* End of lines added by Chuck McDevitt for WIN32 support */
68#include "dsstypes.h"
69
70static char alpha_num[65] = "0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,";
71
72#if defined(__STDC__) || defined(__cplusplus)
73#define PROTO(s) s
74#else
75#define PROTO(s) ()
76#endif
77
78#ifndef WIN32
79char *getenv PROTO((const char *name));
80#endif
81void usage();
82long *permute_dist(distribution *d, long stream);
83extern seed_t Seed[];
84
85/*
86 * env_config: look for a environmental variable setting and return its
87 * value; otherwise return the default supplied
88 */
89const char *env_config(const char *var, const char *dflt) {
90 static char *evar;
91
92 if ((evar = getenv(var)) != NULL)
93 return (evar);
94 else
95 return (dflt);
96}
97
98/*
99 * return the answer to a yes/no question as a boolean
100 */
101long yes_no(char *prompt) {
102 char reply[128];
103 (void)prompt;
104#ifdef WIN32
105/* Disable warning about conditional expression is constant */
106#pragma warning(disable : 4127)
107#endif
108
109 while (1) {
110#ifdef WIN32
111#pragma warning(default : 4127)
112#endif
113 printf("%s [Y/N]: ", prompt);
114 fgets(reply, 128, stdin);
115 switch (*reply) {
116 case 'y':
117 case 'Y':
118 return (1);
119 case 'n':
120 case 'N':
121 return (0);
122 default:
123 printf("Please answer 'yes' or 'no'.\n");
124 }
125 }
126}
127
128/*
129 * generate a random string with length randomly selected in [min, max]
130 * and using the characters in alphanum (currently includes a space
131 * and comma)
132 */
133void a_rnd(int min, int max, int column, char *dest) {
134 DSS_HUGE i, len, char_int;
135
136 RANDOM(len, min, max, column);
137 for (i = 0; i < len; i++) {
138 if (i % 5 == 0)
139 RANDOM(char_int, 0, MAX_LONG, column);
140 *(dest + i) = alpha_num[char_int & 077];
141 char_int >>= 6;
142 }
143 *(dest + len) = '\0';
144 return;
145}
146
147/*
148 * embed a randomly selected member of distribution d in alpha-numeric
149 * noise of a length rendomly selected between min and max at a random
150 * position
151 */
152void e_str(distribution *d, int min, int max, int stream, char *dest) {
153 char strtmp[MAXAGG_LEN + 1];
154 DSS_HUGE loc;
155 int len;
156
157 a_rnd(min, max, stream, dest);
158 pick_str(d, stream, strtmp);
159 len = (int)strlen(strtmp);
160 RANDOM(loc, 0, ((int)strlen(dest) - 1 - len), stream);
161 memcpy(dest + loc, strtmp, sizeof(char) * len);
162
163 return;
164}
165
166/*
167 * return the string associate with the LSB of a uniformly selected
168 * long in [1, max] where max is determined by the distribution
169 * being queried
170 */
171int pick_str(distribution *s, int c, char *target) {
172 long i = 0;
173 DSS_HUGE j;
174
175 RANDOM(j, 1, s->list[s->count - 1].weight, c);
176 while (s->list[i].weight < j)
177 i++;
178 strcpy(target, s->list[i].text);
179 return (i);
180}
181
182/*
183 * unjulian (long date) -- return(date - STARTDATE)
184 */
185long unjulian(long date) {
186 int i;
187 long res = 0;
188
189 for (i = STARTDATE / 1000; i < date / 1000; i++)
190 res += 365 + LEAP(i);
191 res += date % 1000 - 1;
192
193 return (res);
194}
195
196long julian(long date) {
197 long offset;
198 long result;
199 long yr;
200 long yend;
201
202 offset = date - STARTDATE;
203 result = STARTDATE;
204
205#ifdef WIN32
206/* Disable warning about conditional expression is constant */
207#pragma warning(disable : 4127)
208#endif
209
210 while (1) {
211#ifdef WIN32
212#pragma warning(default : 4127)
213#endif
214 yr = result / 1000;
215 yend = yr * 1000 + 365 + LEAP(yr);
216 if (result + offset > yend) /* overflow into next year */
217 {
218 offset -= yend - result + 1;
219 result += 1000;
220 continue;
221 } else
222 break;
223 }
224 return (result + offset);
225}
226
227#include "dists_dss.h"
228
229static char read_line_into_buffer(char *buffer, size_t bufsiz, const char **src) {
230 size_t count = 0;
231 while (**src && count < bufsiz - 1) {
232 buffer[count++] = **src;
233 if (**src == '\n') {
234 (*src)++;
235 break;
236 }
237 (*src)++;
238 }
239 buffer[count] = '\0';
240 return **src;
241}
242
243/*
244 * load a distribution from a flat file into the target structure;
245 * should be rewritten to allow multiple dists in a file
246 */
247void read_dist(const char *path, const char *name, distribution *target) {
248 const char *src = dists_dss;
249 char line[256], token[256], *c;
250 long weight, count = 0, name_set = 0;
251
252 while (read_line_into_buffer(line, sizeof(line), &src)) {
253 if ((c = strchr(line, '\n')) != NULL)
254 *c = '\0';
255 if ((c = strchr(line, '#')) != NULL)
256 *c = '\0';
257 if (*line == '\0')
258 continue;
259
260 if (!name_set) {
261 if (dsscasecmp(strtok(line, "\n\t "), "BEGIN"))
262 continue;
263 if (dsscasecmp(strtok(NULL, "\n\t "), name))
264 continue;
265 name_set = 1;
266 continue;
267 } else {
268 if (!dssncasecmp(line, "END", 3)) {
269 return;
270 }
271 }
272
273 if (sscanf(line, "%[^|]|%ld", token, &weight) != 2)
274 continue;
275
276 if (!dsscasecmp(token, "count")) {
277 target->count = weight;
278 target->list = (set_member *)malloc((size_t)(weight * sizeof(set_member)));
279 MALLOC_CHECK(target->list);
280 target->max = 0;
281 continue;
282 }
283 target->list[count].text = (char *)malloc((size_t)((int)strlen(token) + 1));
284 MALLOC_CHECK(target->list[count].text);
285 strcpy(target->list[count].text, token);
286 target->max += weight;
287 target->list[count].weight = target->max;
288
289 count += 1;
290 } /* while fgets() */
291
292 if (count != target->count) {
293 fprintf(stderr, "Read error on dist '%s'\n", name);
294 exit(1);
295 }
296 target->permute = (long *)NULL;
297 return;
298}
299
300/*
301 * agg_str(set, count) build an aggregated string from count unique
302 * selections taken from set
303 */
304void agg_str(distribution *set, long count, long col, char *dest) {
305 distribution *d;
306 int i;
307
308 d = set;
309 *dest = '\0';
310
311 permute_dist(d, col);
312 for (i = 0; i < count; i++) {
313 strcat(dest, DIST_MEMBER(set, DIST_PERMUTE(d, i)));
314 strcat(dest, " ");
315 }
316 *(dest + (int)strlen(dest) - 1) = '\0';
317
318 return;
319}
320
321long dssncasecmp(const char *s1, const char *s2, int n) {
322 for (; n > 0; ++s1, ++s2, --n)
323 if (tolower(*s1) != tolower(*s2))
324 return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
325 else if (*s1 == '\0')
326 return (0);
327 return (0);
328}
329
330long dsscasecmp(const char *s1, const char *s2) {
331 for (; tolower(*s1) == tolower(*s2); ++s1, ++s2)
332 if (*s1 == '\0')
333 return (0);
334 return ((tolower(*s1) < tolower(*s2)) ? -1 : 1);
335}
336
337#ifndef STDLIB_HAS_GETOPT
338int optind = 0;
339int opterr = 0;
340char *optarg = NULL;
341
342int getopt(int ac, char **av, char *opt) {
343 static char *nextchar = NULL;
344 char *cp;
345 char hold;
346
347 if (optarg == NULL) {
348 optarg = (char *)malloc(BUFSIZ);
349 MALLOC_CHECK(optarg);
350 }
351
352 if (!nextchar || *nextchar == '\0') {
353 optind++;
354 if (optind == ac)
355 return (-1);
356 nextchar = av[optind];
357 if (*nextchar != '-')
358 return (-1);
359 nextchar += 1;
360 }
361
362 if (nextchar && *nextchar == '-') /* -- termination */
363 {
364 optind++;
365 return (-1);
366 } else /* found an option */
367 {
368 cp = strchr(opt, *nextchar);
369 nextchar += 1;
370 if (cp == NULL) /* not defined for this run */
371 return ('?');
372 if (*(cp + 1) == ':') /* option takes an argument */
373 {
374 if (*nextchar) {
375 hold = *cp;
376 cp = optarg;
377 while (*nextchar)
378 *cp++ = *nextchar++;
379 *cp = '\0';
380 *cp = hold;
381 } else /* white space separated, use next arg */
382 {
383 if (++optind == ac)
384 return ('?');
385 strcpy(optarg, av[optind]);
386 }
387 nextchar = NULL;
388 }
389 return (*cp);
390 }
391}
392#endif /* STDLIB_HAS_GETOPT */
393
394char **mk_ascdate(void) {
395 char **m;
396 dss_time_t t;
397 DSS_HUGE i;
398
399 m = (char **)malloc((size_t)(TOTDATE * sizeof(char *)));
400 MALLOC_CHECK(m);
401 for (i = 0; i < TOTDATE; i++) {
402 mk_time(i + 1, &t);
403 m[i] = strdup(t.alpha);
404 }
405
406 return (m);
407}
408
409/*
410 * set_state() -- initialize the RNG so that
411 * appropriate data sets can be generated.
412 * For each table that is to be generated, calculate the number of rows/child,
413 * and send that to the seed generation routine in speed_seed.c. Note: assumes
414 * that tables are completely independent. Returns the number of rows to be
415 * generated by the named step.
416 */
417DSS_HUGE
418set_state(int table, long sf, long procs, long step, DSS_HUGE *extra_rows) {
419 int i;
420 DSS_HUGE rowcount, result;
421
422 if (sf == 0 || step == 0)
423 return (0);
424
425 rowcount = tdefs[table].base;
426 rowcount *= sf;
427 *extra_rows = rowcount % procs;
428 rowcount /= procs;
429 result = rowcount;
430 for (i = 0; i < step - 1; i++) {
431 if (table == LINE) /* special case for shared seeds */
432 tdefs[table].gen_seed(1, rowcount);
433 else
434 tdefs[table].gen_seed(0, rowcount);
435 /* need to set seeds of child in case there's a dependency */
436 /* NOTE: this assumes that the parent and child have the same base row
437 * count */
438 if (tdefs[table].child != NONE)
439 tdefs[tdefs[table].child].gen_seed(0, rowcount);
440 }
441 if (step > procs) /* moving to the end to generate updates */
442 tdefs[table].gen_seed(0, *extra_rows);
443
444 return (result);
445}
446