1/*
2 * Legal Notice
3 *
4 * This document and associated source code (the "Work") is a part of a
5 * benchmark specification maintained by the TPC.
6 *
7 * The TPC reserves all right, title, and interest to the Work as provided
8 * under U.S. and international laws, including without limitation all patent
9 * and trademark rights therein.
10 *
11 * No Warranty
12 *
13 * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
14 * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
15 * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
16 * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
17 * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
18 * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
19 * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
20 * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
21 * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
22 * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
23 * WITH REGARD TO THE WORK.
24 * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
25 * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
26 * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
27 * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
28 * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
29 * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
30 * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
31 * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
32 *
33 * Contributors:
34 * Gradient Systems
35 */
36#include "config.h"
37#include "porting.h"
38#include <stdio.h>
39#include <stdlib.h>
40#ifdef WIN32
41#include <search.h>
42#include <limits.h>
43#endif
44#include "config.h"
45#include "porting.h"
46#include "decimal.h"
47#include "date.h"
48#include "genrand.h"
49#include "dist.h"
50#include "r_params.h"
51#include "params.h"
52
53#include "columns.h"
54#include "tables.h"
55#include "streams.h"
56
57static long Mult = 16807; /* the multiplier */
58static long nQ = 127773; /* the quotient MAXINT / Mult */
59static long nR = 2836; /* the remainder MAXINT % Mult */
60void DSNthElement(HUGE_TYPE N, int nStream);
61
62/*
63 * Routine: next_random(int stream)
64 * Purpose:
65 * Algorithm:
66 * Data Structures:
67 *
68 * Params:
69 * Returns:
70 * Called By:
71 * Calls:
72 * Assumptions:
73 * Side Effects:
74 * TODO: None
75 */
76long next_random(int stream) {
77 long s = Streams[stream].nSeed, div_res, mod_res;
78
79 div_res = s / nQ;
80 mod_res = s - nQ * div_res; /* i.e., mod_res = s % nQ */
81 s = Mult * mod_res - div_res * nR;
82 if (s < 0)
83 s += MAXINT;
84 Streams[stream].nSeed = s;
85 Streams[stream].nUsed += 1;
86#ifdef JMS
87 Streams[stream].nTotal += 1;
88#endif
89 return (s);
90}
91
92/*
93 * Routine: next_random_float(int stream)
94 * Purpose: return random in [0..1]
95 * Algorithm:
96 * Data Structures:
97 *
98 * Params:
99 * Returns:
100 * Called By:
101 * Calls:
102 * Assumptions:
103 * Side Effects:
104 * TODO: None
105 */
106double next_random_float(int stream) {
107 long res;
108
109 res = next_random(stream);
110
111 return ((double)res / (double)MAXINT);
112}
113
114/*
115 * Routine: skip_random(int stream, int skip_count)
116 * Purpose:
117 * Algorithm:
118 * Data Structures:
119 *
120 * Params:
121 * Returns:
122 * Called By:
123 * Calls:
124 * Assumptions:
125 * Side Effects:
126 * TODO: None
127 */
128void skip_random(int nStream, ds_key_t N) {
129 ds_key_t Z;
130 ds_key_t M;
131
132#ifdef UNDEF
133 fprintf(stderr, "skipping stream %d to %d\n", nStream, N);
134 Streams[nStream].nTotal = N;
135#endif
136 M = Mult;
137 Z = (ds_key_t)Streams[nStream].nInitialSeed;
138 while (N > 0) {
139 if (N % 2 != 0) /* testing for oddness, this seems portable */
140 Z = (M * Z) % MAXINT;
141 N = N / 2; /* integer division, truncates */
142 M = (M * M) % MAXINT;
143 }
144 Streams[nStream].nSeed = (long)Z;
145
146 return;
147}
148
149/*
150 * Routine: genrand_integer(int dist, int min, int max, int mean)
151 * Purpose: generate a random integer given the distribution and limits
152 * Algorithm:
153 * Data Structures:
154 *
155 * Params:
156 * Returns: int
157 * Called By:
158 * Calls:
159 * Assumptions:
160 * Side Effects:
161 */
162int genrand_integer(int *dest, int dist, int min, int max, int mean, int stream) {
163 int res = 0, i;
164 double fres = 0;
165
166 switch (dist) {
167 case DIST_UNIFORM:
168 res = next_random(stream);
169 res %= max - min + 1;
170 res += min;
171 break;
172 case DIST_EXPONENTIAL:
173 for (i = 0; i < 12; i++)
174 fres += (double)(next_random(stream) / MAXINT) - 0.5;
175 res = min + (int)((max - min + 1) * fres);
176 break;
177 default:
178 INTERNAL("Undefined distribution");
179 break;
180 }
181
182 if (dest == NULL)
183 return (res);
184
185 *dest = res;
186
187 return (0);
188}
189
190/*
191 * Routine: genrand_key(ket_t *dest, int dist, ds_key_t min, ds_key_t max,
192 * ds_key_t mean, int stream) Purpose: generate a random integer given the
193 * distribution and limits Algorithm: Data Structures:
194 *
195 * Params:
196 * Returns: ds_key_t
197 * Called By:
198 * Calls:
199 * Assumptions:
200 * Side Effects:
201 * TODO: Need to rework to rely on RNG routines that will work for 64 bit return
202 * values
203 */
204ds_key_t genrand_key(ds_key_t *dest, int dist, ds_key_t min, ds_key_t max, ds_key_t mean, int stream) {
205 int res = 0, i;
206 double fres = 0;
207
208 switch (dist) {
209 case DIST_UNIFORM:
210 res = next_random(stream);
211 res %= (int)(max - min + 1);
212 res += (int)min;
213 break;
214 case DIST_EXPONENTIAL:
215 for (i = 0; i < 12; i++)
216 fres += (double)(next_random(stream) / MAXINT) - 0.5;
217 res = (int)min + (int)((max - min + 1) * fres);
218 break;
219 default:
220 INTERNAL("Undefined distribution");
221 break;
222 }
223
224 if (dest == NULL)
225 return ((ds_key_t)res);
226
227 *dest = (ds_key_t)res;
228
229 return ((ds_key_t)0);
230}
231
232/*
233 * Routine:
234 * genrand_decimal(int dist, decimal_t *min, decimal_t *max, decimal_t *mean)
235 * Purpose: create a random decimal_t
236 * Algorithm:
237 * Data Structures:
238 *
239 * Params: min/max are char * to allow easy passing of precision
240 * Returns: decimal_t *; NULL on failure
241 * Called By:
242 * Calls:
243 * Assumptions:
244 * Side Effects:
245 * TODO: None
246 */
247int genrand_decimal(decimal_t *dest, int dist, decimal_t *min, decimal_t *max, decimal_t *mean, int stream) {
248 int i;
249 decimal_t res;
250 double fres = 0.0;
251
252 if (min->precision < max->precision)
253 dest->precision = min->precision;
254 else
255 dest->precision = max->precision;
256
257 switch (dist) {
258 case DIST_UNIFORM:
259 res.number = next_random(stream);
260 res.number %= max->number - min->number + 1;
261 res.number += min->number;
262 break;
263 case DIST_EXPONENTIAL:
264 for (i = 0; i < 12; i++) {
265 fres /= 2.0;
266 fres += (double)((double)next_random(stream) / (double)MAXINT) - 0.5;
267 }
268 res.number = mean->number + (int)((max->number - min->number + 1) * fres);
269 break;
270 default:
271 INTERNAL("Undefined distribution");
272 break;
273 }
274
275 dest->number = res.number;
276 i = 0;
277 while (res.number > 10) {
278 res.number /= 10;
279 i += 1;
280 }
281 dest->scale = i;
282
283 return (0);
284}
285
286/* Routine: RNGReset(int tbl)
287 * Purpose:
288 * Algorithm:
289 * Data Structures:
290 *
291 * Params:
292 * Returns:
293 * Called By:
294 * Calls:
295 * Assumptions:
296 * Side Effects:
297 * TODO: None
298 */
299int RNGReset(int tbl) {
300 int i;
301
302 for (i = 0; Streams[i].nColumn != -1; i++)
303 if (Streams[i].nTable == tbl)
304 Streams[i].nSeed = Streams[i].nInitialSeed;
305
306 return (0);
307}
308
309/* WARNING! This routine assumes the existence of 64-bit */
310
311/* integers. The notation used here- "HUGE" is *not* ANSI standard. */
312
313/* Hopefully, you have this extension as well. If not, use whatever */
314
315/* nonstandard trick you need to in order to get 64 bit integers. */
316
317/* The book says that this will work if MAXINT for the type you choose */
318
319/* is at least 2**46 - 1, so 64 bits is more than you *really* need */
320
321static HUGE_TYPE Multiplier = 16807; /* or whatever nonstandard */
322static HUGE_TYPE Modulus = 2147483647; /* trick you use to get 64 bit int */
323
324/* Advances value of Seed after N applications of the random number generator
325 with multiplier Mult and given Modulus.
326 NthElement(Seed[],count);
327
328 Theory: We are using a generator of the form
329 X_n = [Mult * X_(n-1)] mod Modulus. It turns out that
330 X_n = [(Mult ** n) X_0] mod Modulus.
331 This can be computed using a divide-and-conquer technique, see
332 the code below.
333
334 In words, this means that if you want the value of the Seed after n
335 applications of the generator, you multiply the initial value of the
336 Seed by the "super multiplier" which is the basic multiplier raised
337 to the nth power, and then take mod Modulus.
338*/
339
340/* Nth Element of sequence starting with StartSeed */
341void DSNthElementNthElement(HUGE_TYPE N, int nStream) {
342 HUGE_TYPE Z;
343 HUGE_TYPE Mult;
344
345 Mult = Multiplier;
346 Z = (HUGE_TYPE)Streams[nStream].nInitialSeed;
347 while (N > 0) {
348 if (N % 2 != 0) /* testing for oddness, this seems portable */
349 {
350#ifdef JMS
351 Streams[nStream].nTotal += 1;
352#endif
353 Z = (Mult * Z) % Modulus;
354 }
355 N = N / 2; /* integer division, truncates */
356 Mult = (Mult * Mult) % Modulus;
357#ifdef JMS
358 Streams[nStream].nTotal += 2;
359#endif
360 }
361 Streams[nStream].nSeed = (long)Z;
362
363 return;
364}
365
366/*
367 * Routine:
368 * Purpose:
369 * Algorithm:
370 * Data Structures:
371 *
372 * Params:
373 * Returns:
374 * Called By:
375 * Calls:
376 * Assumptions:
377 * Side Effects:
378 * TODO: None
379 */
380int dump_seeds_ds(int tbl) {
381 int i;
382
383 for (i = 0; Streams[i].nColumn != -1; i++)
384 if (Streams[i].nTable == tbl)
385 printf("%04d\t%09d\t%09ld\n", i, Streams[i].nUsed, Streams[i].nSeed);
386 return (0);
387}
388
389/*
390 * Routine: gen_charset(char *set, int min, int max)
391 * Purpose: generate random characters from set for a random length [min..max]
392 * Algorithm:
393 * Data Structures:
394 *
395 * Params:
396 * Returns:
397 * Called By:
398 * Calls:
399 * Assumptions:
400 * Side Effects:
401 * TODO: None
402 */
403int gen_charset(char *dest, char *set, int min, int max, int stream) {
404 int len, i, temp;
405
406 if (set == NULL) {
407 dest = NULL;
408 return (-1);
409 }
410
411 genrand_integer(&len, DIST_UNIFORM, min, max, 0, stream);
412
413 for (i = 0; i < max; i++) {
414 genrand_integer(&temp, DIST_UNIFORM, 0, strlen(set) - 1, 0, stream);
415 if (i < len)
416 dest[i] = *(set + temp);
417 }
418 dest[len] = '\0';
419
420 return (0);
421}
422
423/*
424 * Routine: genrand_date(int dist, date_t *min, date_t *max)
425 * Purpose: generate random date within [min..max]
426 * Algorithm:
427 * Data Structures:
428 *
429 * Params:
430 * Returns:
431 * Called By:
432 * Calls:
433 * Assumptions:
434 * Side Effects:
435 * TODO: None
436 */
437int genrand_date(date_t *dest, int dist, date_t *min, date_t *max, date_t *mean, int stream) {
438 int range, imean = 0, temp, idt, nYear, nTotalWeight = 0, nDayCount;
439
440 idt = dttoj(min);
441 range = dttoj(max);
442 range -= idt;
443 nDayCount = min->day;
444 nYear = min->year;
445
446 switch (dist) {
447 case DIST_SALES:
448 case DIST_RETURNS:
449 /* walk from min to max to "integrate" the distribution */
450 while (range -= 1) {
451 nTotalWeight += dist_weight(NULL, "calendar", nDayCount, dist + is_leap(nYear));
452 if (nDayCount == 365 + is_leap(nYear)) {
453 nYear += 1;
454 nDayCount = 1;
455 } else
456 nDayCount += 1;
457 }
458 /* pick a value in the resulting range */
459 temp = genrand_integer(NULL, DIST_UNIFORM, 1, nTotalWeight, 0, stream);
460 /* and walk it again to translate that back to a date */
461 nDayCount = min->day;
462 idt = min->julian;
463 nYear = min->year;
464 while (temp >= 0) {
465 temp -= dist_weight(NULL, "calendar", nDayCount, dist + is_leap(nYear));
466 nDayCount += 1;
467 idt += 1;
468 if (nDayCount > 365 + is_leap(nYear)) {
469 nYear += 1;
470 nDayCount = 1;
471 }
472 }
473 break;
474 case DIST_EXPONENTIAL:
475 imean = dttoj(mean);
476 imean -= idt;
477 case DIST_UNIFORM:
478 genrand_integer(&temp, dist, 0, range, imean, stream);
479 idt += temp;
480 break;
481 default:
482 break;
483 }
484
485 jtodt(dest, idt);
486
487 return (0);
488}
489
490/**************
491 **************
492 **
493 ** static routines
494 **
495 **************
496 **************/
497
498/*
499 * Routine: init_rand()
500 * Purpose: Initialize the RNG used throughout the code
501 * Algorithm: To allow two columns to use the same stream of numbers (for
502 *joins), pre-sort the streams list by Duplicate and then assign values. Order
503 *by column after initialization Data Structures:
504 *
505 * Params:
506 * Returns:
507 * Called By:
508 * Calls:
509 * Assumptions:
510 * Side Effects:
511 * TODO:
512 */
513// FIXME: allow re-init
514void init_rand(void) {
515 static int bInit = 0;
516 int i, skip, nSeed;
517
518 if (!bInit) {
519 if (is_set("RNGSEED"))
520 nSeed = get_int("RNGSEED");
521 else
522 nSeed = RNG_SEED;
523 skip = MAXINT / MAX_COLUMN;
524 for (i = 0; i < MAX_COLUMN; i++) {
525 Streams[i].nInitialSeed = nSeed + skip * i;
526 Streams[i].nSeed = nSeed + skip * i;
527 Streams[i].nUsed = 0;
528 }
529 bInit = 1;
530 }
531 return;
532}
533
534void resetSeeds(int nTable) {
535 int i;
536
537 for (i = 0; i < MAX_COLUMN; i++)
538 if (Streams[i].nTable == nTable)
539 Streams[i].nSeed = Streams[i].nInitialSeed;
540 return;
541}
542
543/*
544 * Routine:
545 * Purpose:
546 * Algorithm:
547 * Data Structures:
548 *
549 * Params:
550 * Returns:
551 * Called By:
552 * Calls:
553 * Assumptions:
554 * Side Effects:
555 * TODO: None
556 */
557void genrand_email(char *pEmail, char *pFirst, char *pLast, int nColumn) {
558 char *pDomain;
559 char szCompany[50];
560 int nCompanyLength;
561
562 pick_distribution(&pDomain, "top_domains", 1, 1, nColumn);
563 genrand_integer(&nCompanyLength, DIST_UNIFORM, 10, 20, 0, nColumn);
564 gen_charset(&szCompany[0], ALPHANUM, 1, 20, nColumn);
565 szCompany[nCompanyLength] = '\0';
566
567 sprintf(pEmail, "%s.%s@%s.%s", pFirst, pLast, szCompany, pDomain);
568
569 return;
570}
571
572/*
573 * Routine:
574 * Purpose:
575 * Algorithm:
576 * Data Structures:
577 *
578 * Params:
579 * Returns:
580 * Called By:
581 * Calls:
582 * Assumptions:
583 * Side Effects:
584 * TODO: None
585 */
586void genrand_ipaddr(char *pDest, int nColumn) {
587 int arQuads[4], i;
588
589 for (i = 0; i < 4; i++)
590 genrand_integer(&arQuads[i], DIST_UNIFORM, 1, 255, 0, nColumn);
591 sprintf(pDest, "%03d.%03d.%03d.%03d", arQuads[0], arQuads[1], arQuads[2], arQuads[3]);
592
593 return;
594}
595
596/*
597 * Routine:
598 * Purpose:
599 * Algorithm:
600 * Data Structures:
601 *
602 * Params:
603 * Returns:
604 * Called By:
605 * Calls:
606 * Assumptions:
607 * Side Effects:
608 * TODO: None
609 */
610int genrand_url(char *pDest, int nColumn) {
611 strcpy(pDest, "http://www.foo.com");
612
613 return (0);
614}
615
616/*
617 * Routine:
618 * Purpose:
619 * Algorithm:
620 * Data Structures:
621 *
622 * Params:
623 * Returns:
624 * Called By:
625 * Calls:
626 * Assumptions:
627 * Side Effects:
628 * TODO: None
629 */
630int setSeed(int nStream, int nValue) {
631 int nRetValue;
632
633 nRetValue = Streams[nStream].nSeed;
634 Streams[nStream].nSeed = nValue;
635
636 return (nRetValue);
637}
638
639#ifdef TEST
640main() {
641 printf("r_genrand:No test routine has been defined for this module\n");
642
643 exit(0);
644}
645#endif /* TEST */
646