1 | /* |
2 | * Legal Notice |
3 | * |
4 | * This document and associated source code (the "Work") is a part of a |
5 | * benchmark specification maintained by the TPC. |
6 | * |
7 | * The TPC reserves all right, title, and interest to the Work as provided |
8 | * under U.S. and international laws, including without limitation all patent |
9 | * and trademark rights therein. |
10 | * |
11 | * No Warranty |
12 | * |
13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
23 | * WITH REGARD TO THE WORK. |
24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
32 | * |
33 | * Contributors: |
34 | * Gradient Systems |
35 | */ |
36 | #include "config.h" |
37 | #include "porting.h" |
38 | #include <stdio.h> |
39 | #include <stdlib.h> |
40 | #ifdef WIN32 |
41 | #include <search.h> |
42 | #include <limits.h> |
43 | #endif |
44 | #include "config.h" |
45 | #include "porting.h" |
46 | #include "decimal.h" |
47 | #include "date.h" |
48 | #include "genrand.h" |
49 | #include "dist.h" |
50 | #include "r_params.h" |
51 | #include "params.h" |
52 | |
53 | #include "columns.h" |
54 | #include "tables.h" |
55 | #include "streams.h" |
56 | |
57 | static long Mult = 16807; /* the multiplier */ |
58 | static long nQ = 127773; /* the quotient MAXINT / Mult */ |
59 | static long nR = 2836; /* the remainder MAXINT % Mult */ |
60 | void DSNthElement(HUGE_TYPE N, int nStream); |
61 | |
62 | /* |
63 | * Routine: next_random(int stream) |
64 | * Purpose: |
65 | * Algorithm: |
66 | * Data Structures: |
67 | * |
68 | * Params: |
69 | * Returns: |
70 | * Called By: |
71 | * Calls: |
72 | * Assumptions: |
73 | * Side Effects: |
74 | * TODO: None |
75 | */ |
76 | long next_random(int stream) { |
77 | long s = Streams[stream].nSeed, div_res, mod_res; |
78 | |
79 | div_res = s / nQ; |
80 | mod_res = s - nQ * div_res; /* i.e., mod_res = s % nQ */ |
81 | s = Mult * mod_res - div_res * nR; |
82 | if (s < 0) |
83 | s += MAXINT; |
84 | Streams[stream].nSeed = s; |
85 | Streams[stream].nUsed += 1; |
86 | #ifdef JMS |
87 | Streams[stream].nTotal += 1; |
88 | #endif |
89 | return (s); |
90 | } |
91 | |
92 | /* |
93 | * Routine: next_random_float(int stream) |
94 | * Purpose: return random in [0..1] |
95 | * Algorithm: |
96 | * Data Structures: |
97 | * |
98 | * Params: |
99 | * Returns: |
100 | * Called By: |
101 | * Calls: |
102 | * Assumptions: |
103 | * Side Effects: |
104 | * TODO: None |
105 | */ |
106 | double next_random_float(int stream) { |
107 | long res; |
108 | |
109 | res = next_random(stream); |
110 | |
111 | return ((double)res / (double)MAXINT); |
112 | } |
113 | |
114 | /* |
115 | * Routine: skip_random(int stream, int skip_count) |
116 | * Purpose: |
117 | * Algorithm: |
118 | * Data Structures: |
119 | * |
120 | * Params: |
121 | * Returns: |
122 | * Called By: |
123 | * Calls: |
124 | * Assumptions: |
125 | * Side Effects: |
126 | * TODO: None |
127 | */ |
128 | void skip_random(int nStream, ds_key_t N) { |
129 | ds_key_t Z; |
130 | ds_key_t M; |
131 | |
132 | #ifdef UNDEF |
133 | fprintf(stderr, "skipping stream %d to %d\n" , nStream, N); |
134 | Streams[nStream].nTotal = N; |
135 | #endif |
136 | M = Mult; |
137 | Z = (ds_key_t)Streams[nStream].nInitialSeed; |
138 | while (N > 0) { |
139 | if (N % 2 != 0) /* testing for oddness, this seems portable */ |
140 | Z = (M * Z) % MAXINT; |
141 | N = N / 2; /* integer division, truncates */ |
142 | M = (M * M) % MAXINT; |
143 | } |
144 | Streams[nStream].nSeed = (long)Z; |
145 | |
146 | return; |
147 | } |
148 | |
149 | /* |
150 | * Routine: genrand_integer(int dist, int min, int max, int mean) |
151 | * Purpose: generate a random integer given the distribution and limits |
152 | * Algorithm: |
153 | * Data Structures: |
154 | * |
155 | * Params: |
156 | * Returns: int |
157 | * Called By: |
158 | * Calls: |
159 | * Assumptions: |
160 | * Side Effects: |
161 | */ |
162 | int genrand_integer(int *dest, int dist, int min, int max, int mean, int stream) { |
163 | int res = 0, i; |
164 | double fres = 0; |
165 | |
166 | switch (dist) { |
167 | case DIST_UNIFORM: |
168 | res = next_random(stream); |
169 | res %= max - min + 1; |
170 | res += min; |
171 | break; |
172 | case DIST_EXPONENTIAL: |
173 | for (i = 0; i < 12; i++) |
174 | fres += (double)(next_random(stream) / MAXINT) - 0.5; |
175 | res = min + (int)((max - min + 1) * fres); |
176 | break; |
177 | default: |
178 | INTERNAL("Undefined distribution" ); |
179 | break; |
180 | } |
181 | |
182 | if (dest == NULL) |
183 | return (res); |
184 | |
185 | *dest = res; |
186 | |
187 | return (0); |
188 | } |
189 | |
190 | /* |
191 | * Routine: genrand_key(ket_t *dest, int dist, ds_key_t min, ds_key_t max, |
192 | * ds_key_t mean, int stream) Purpose: generate a random integer given the |
193 | * distribution and limits Algorithm: Data Structures: |
194 | * |
195 | * Params: |
196 | * Returns: ds_key_t |
197 | * Called By: |
198 | * Calls: |
199 | * Assumptions: |
200 | * Side Effects: |
201 | * TODO: Need to rework to rely on RNG routines that will work for 64 bit return |
202 | * values |
203 | */ |
204 | ds_key_t genrand_key(ds_key_t *dest, int dist, ds_key_t min, ds_key_t max, ds_key_t mean, int stream) { |
205 | int res = 0, i; |
206 | double fres = 0; |
207 | |
208 | switch (dist) { |
209 | case DIST_UNIFORM: |
210 | res = next_random(stream); |
211 | res %= (int)(max - min + 1); |
212 | res += (int)min; |
213 | break; |
214 | case DIST_EXPONENTIAL: |
215 | for (i = 0; i < 12; i++) |
216 | fres += (double)(next_random(stream) / MAXINT) - 0.5; |
217 | res = (int)min + (int)((max - min + 1) * fres); |
218 | break; |
219 | default: |
220 | INTERNAL("Undefined distribution" ); |
221 | break; |
222 | } |
223 | |
224 | if (dest == NULL) |
225 | return ((ds_key_t)res); |
226 | |
227 | *dest = (ds_key_t)res; |
228 | |
229 | return ((ds_key_t)0); |
230 | } |
231 | |
232 | /* |
233 | * Routine: |
234 | * genrand_decimal(int dist, decimal_t *min, decimal_t *max, decimal_t *mean) |
235 | * Purpose: create a random decimal_t |
236 | * Algorithm: |
237 | * Data Structures: |
238 | * |
239 | * Params: min/max are char * to allow easy passing of precision |
240 | * Returns: decimal_t *; NULL on failure |
241 | * Called By: |
242 | * Calls: |
243 | * Assumptions: |
244 | * Side Effects: |
245 | * TODO: None |
246 | */ |
247 | int genrand_decimal(decimal_t *dest, int dist, decimal_t *min, decimal_t *max, decimal_t *mean, int stream) { |
248 | int i; |
249 | decimal_t res; |
250 | double fres = 0.0; |
251 | |
252 | if (min->precision < max->precision) |
253 | dest->precision = min->precision; |
254 | else |
255 | dest->precision = max->precision; |
256 | |
257 | switch (dist) { |
258 | case DIST_UNIFORM: |
259 | res.number = next_random(stream); |
260 | res.number %= max->number - min->number + 1; |
261 | res.number += min->number; |
262 | break; |
263 | case DIST_EXPONENTIAL: |
264 | for (i = 0; i < 12; i++) { |
265 | fres /= 2.0; |
266 | fres += (double)((double)next_random(stream) / (double)MAXINT) - 0.5; |
267 | } |
268 | res.number = mean->number + (int)((max->number - min->number + 1) * fres); |
269 | break; |
270 | default: |
271 | INTERNAL("Undefined distribution" ); |
272 | break; |
273 | } |
274 | |
275 | dest->number = res.number; |
276 | i = 0; |
277 | while (res.number > 10) { |
278 | res.number /= 10; |
279 | i += 1; |
280 | } |
281 | dest->scale = i; |
282 | |
283 | return (0); |
284 | } |
285 | |
286 | /* Routine: RNGReset(int tbl) |
287 | * Purpose: |
288 | * Algorithm: |
289 | * Data Structures: |
290 | * |
291 | * Params: |
292 | * Returns: |
293 | * Called By: |
294 | * Calls: |
295 | * Assumptions: |
296 | * Side Effects: |
297 | * TODO: None |
298 | */ |
299 | int RNGReset(int tbl) { |
300 | int i; |
301 | |
302 | for (i = 0; Streams[i].nColumn != -1; i++) |
303 | if (Streams[i].nTable == tbl) |
304 | Streams[i].nSeed = Streams[i].nInitialSeed; |
305 | |
306 | return (0); |
307 | } |
308 | |
309 | /* WARNING! This routine assumes the existence of 64-bit */ |
310 | |
311 | /* integers. The notation used here- "HUGE" is *not* ANSI standard. */ |
312 | |
313 | /* Hopefully, you have this extension as well. If not, use whatever */ |
314 | |
315 | /* nonstandard trick you need to in order to get 64 bit integers. */ |
316 | |
317 | /* The book says that this will work if MAXINT for the type you choose */ |
318 | |
319 | /* is at least 2**46 - 1, so 64 bits is more than you *really* need */ |
320 | |
321 | static HUGE_TYPE Multiplier = 16807; /* or whatever nonstandard */ |
322 | static HUGE_TYPE Modulus = 2147483647; /* trick you use to get 64 bit int */ |
323 | |
324 | /* Advances value of Seed after N applications of the random number generator |
325 | with multiplier Mult and given Modulus. |
326 | NthElement(Seed[],count); |
327 | |
328 | Theory: We are using a generator of the form |
329 | X_n = [Mult * X_(n-1)] mod Modulus. It turns out that |
330 | X_n = [(Mult ** n) X_0] mod Modulus. |
331 | This can be computed using a divide-and-conquer technique, see |
332 | the code below. |
333 | |
334 | In words, this means that if you want the value of the Seed after n |
335 | applications of the generator, you multiply the initial value of the |
336 | Seed by the "super multiplier" which is the basic multiplier raised |
337 | to the nth power, and then take mod Modulus. |
338 | */ |
339 | |
340 | /* Nth Element of sequence starting with StartSeed */ |
341 | void DSNthElementNthElement(HUGE_TYPE N, int nStream) { |
342 | HUGE_TYPE Z; |
343 | HUGE_TYPE Mult; |
344 | |
345 | Mult = Multiplier; |
346 | Z = (HUGE_TYPE)Streams[nStream].nInitialSeed; |
347 | while (N > 0) { |
348 | if (N % 2 != 0) /* testing for oddness, this seems portable */ |
349 | { |
350 | #ifdef JMS |
351 | Streams[nStream].nTotal += 1; |
352 | #endif |
353 | Z = (Mult * Z) % Modulus; |
354 | } |
355 | N = N / 2; /* integer division, truncates */ |
356 | Mult = (Mult * Mult) % Modulus; |
357 | #ifdef JMS |
358 | Streams[nStream].nTotal += 2; |
359 | #endif |
360 | } |
361 | Streams[nStream].nSeed = (long)Z; |
362 | |
363 | return; |
364 | } |
365 | |
366 | /* |
367 | * Routine: |
368 | * Purpose: |
369 | * Algorithm: |
370 | * Data Structures: |
371 | * |
372 | * Params: |
373 | * Returns: |
374 | * Called By: |
375 | * Calls: |
376 | * Assumptions: |
377 | * Side Effects: |
378 | * TODO: None |
379 | */ |
380 | int dump_seeds_ds(int tbl) { |
381 | int i; |
382 | |
383 | for (i = 0; Streams[i].nColumn != -1; i++) |
384 | if (Streams[i].nTable == tbl) |
385 | printf("%04d\t%09d\t%09ld\n" , i, Streams[i].nUsed, Streams[i].nSeed); |
386 | return (0); |
387 | } |
388 | |
389 | /* |
390 | * Routine: gen_charset(char *set, int min, int max) |
391 | * Purpose: generate random characters from set for a random length [min..max] |
392 | * Algorithm: |
393 | * Data Structures: |
394 | * |
395 | * Params: |
396 | * Returns: |
397 | * Called By: |
398 | * Calls: |
399 | * Assumptions: |
400 | * Side Effects: |
401 | * TODO: None |
402 | */ |
403 | int gen_charset(char *dest, char *set, int min, int max, int stream) { |
404 | int len, i, temp; |
405 | |
406 | if (set == NULL) { |
407 | dest = NULL; |
408 | return (-1); |
409 | } |
410 | |
411 | genrand_integer(&len, DIST_UNIFORM, min, max, 0, stream); |
412 | |
413 | for (i = 0; i < max; i++) { |
414 | genrand_integer(&temp, DIST_UNIFORM, 0, strlen(set) - 1, 0, stream); |
415 | if (i < len) |
416 | dest[i] = *(set + temp); |
417 | } |
418 | dest[len] = '\0'; |
419 | |
420 | return (0); |
421 | } |
422 | |
423 | /* |
424 | * Routine: genrand_date(int dist, date_t *min, date_t *max) |
425 | * Purpose: generate random date within [min..max] |
426 | * Algorithm: |
427 | * Data Structures: |
428 | * |
429 | * Params: |
430 | * Returns: |
431 | * Called By: |
432 | * Calls: |
433 | * Assumptions: |
434 | * Side Effects: |
435 | * TODO: None |
436 | */ |
437 | int genrand_date(date_t *dest, int dist, date_t *min, date_t *max, date_t *mean, int stream) { |
438 | int range, imean = 0, temp, idt, nYear, nTotalWeight = 0, nDayCount; |
439 | |
440 | idt = dttoj(min); |
441 | range = dttoj(max); |
442 | range -= idt; |
443 | nDayCount = min->day; |
444 | nYear = min->year; |
445 | |
446 | switch (dist) { |
447 | case DIST_SALES: |
448 | case DIST_RETURNS: |
449 | /* walk from min to max to "integrate" the distribution */ |
450 | while (range -= 1) { |
451 | nTotalWeight += dist_weight(NULL, "calendar" , nDayCount, dist + is_leap(nYear)); |
452 | if (nDayCount == 365 + is_leap(nYear)) { |
453 | nYear += 1; |
454 | nDayCount = 1; |
455 | } else |
456 | nDayCount += 1; |
457 | } |
458 | /* pick a value in the resulting range */ |
459 | temp = genrand_integer(NULL, DIST_UNIFORM, 1, nTotalWeight, 0, stream); |
460 | /* and walk it again to translate that back to a date */ |
461 | nDayCount = min->day; |
462 | idt = min->julian; |
463 | nYear = min->year; |
464 | while (temp >= 0) { |
465 | temp -= dist_weight(NULL, "calendar" , nDayCount, dist + is_leap(nYear)); |
466 | nDayCount += 1; |
467 | idt += 1; |
468 | if (nDayCount > 365 + is_leap(nYear)) { |
469 | nYear += 1; |
470 | nDayCount = 1; |
471 | } |
472 | } |
473 | break; |
474 | case DIST_EXPONENTIAL: |
475 | imean = dttoj(mean); |
476 | imean -= idt; |
477 | case DIST_UNIFORM: |
478 | genrand_integer(&temp, dist, 0, range, imean, stream); |
479 | idt += temp; |
480 | break; |
481 | default: |
482 | break; |
483 | } |
484 | |
485 | jtodt(dest, idt); |
486 | |
487 | return (0); |
488 | } |
489 | |
490 | /************** |
491 | ************** |
492 | ** |
493 | ** static routines |
494 | ** |
495 | ************** |
496 | **************/ |
497 | |
498 | /* |
499 | * Routine: init_rand() |
500 | * Purpose: Initialize the RNG used throughout the code |
501 | * Algorithm: To allow two columns to use the same stream of numbers (for |
502 | *joins), pre-sort the streams list by Duplicate and then assign values. Order |
503 | *by column after initialization Data Structures: |
504 | * |
505 | * Params: |
506 | * Returns: |
507 | * Called By: |
508 | * Calls: |
509 | * Assumptions: |
510 | * Side Effects: |
511 | * TODO: |
512 | */ |
513 | // FIXME: allow re-init |
514 | void init_rand(void) { |
515 | static int bInit = 0; |
516 | int i, skip, nSeed; |
517 | |
518 | if (!bInit) { |
519 | if (is_set("RNGSEED" )) |
520 | nSeed = get_int("RNGSEED" ); |
521 | else |
522 | nSeed = RNG_SEED; |
523 | skip = MAXINT / MAX_COLUMN; |
524 | for (i = 0; i < MAX_COLUMN; i++) { |
525 | Streams[i].nInitialSeed = nSeed + skip * i; |
526 | Streams[i].nSeed = nSeed + skip * i; |
527 | Streams[i].nUsed = 0; |
528 | } |
529 | bInit = 1; |
530 | } |
531 | return; |
532 | } |
533 | |
534 | void resetSeeds(int nTable) { |
535 | int i; |
536 | |
537 | for (i = 0; i < MAX_COLUMN; i++) |
538 | if (Streams[i].nTable == nTable) |
539 | Streams[i].nSeed = Streams[i].nInitialSeed; |
540 | return; |
541 | } |
542 | |
543 | /* |
544 | * Routine: |
545 | * Purpose: |
546 | * Algorithm: |
547 | * Data Structures: |
548 | * |
549 | * Params: |
550 | * Returns: |
551 | * Called By: |
552 | * Calls: |
553 | * Assumptions: |
554 | * Side Effects: |
555 | * TODO: None |
556 | */ |
557 | void genrand_email(char *pEmail, char *pFirst, char *pLast, int nColumn) { |
558 | char *pDomain; |
559 | char szCompany[50]; |
560 | int nCompanyLength; |
561 | |
562 | pick_distribution(&pDomain, "top_domains" , 1, 1, nColumn); |
563 | genrand_integer(&nCompanyLength, DIST_UNIFORM, 10, 20, 0, nColumn); |
564 | gen_charset(&szCompany[0], ALPHANUM, 1, 20, nColumn); |
565 | szCompany[nCompanyLength] = '\0'; |
566 | |
567 | sprintf(pEmail, "%s.%s@%s.%s" , pFirst, pLast, szCompany, pDomain); |
568 | |
569 | return; |
570 | } |
571 | |
572 | /* |
573 | * Routine: |
574 | * Purpose: |
575 | * Algorithm: |
576 | * Data Structures: |
577 | * |
578 | * Params: |
579 | * Returns: |
580 | * Called By: |
581 | * Calls: |
582 | * Assumptions: |
583 | * Side Effects: |
584 | * TODO: None |
585 | */ |
586 | void genrand_ipaddr(char *pDest, int nColumn) { |
587 | int arQuads[4], i; |
588 | |
589 | for (i = 0; i < 4; i++) |
590 | genrand_integer(&arQuads[i], DIST_UNIFORM, 1, 255, 0, nColumn); |
591 | sprintf(pDest, "%03d.%03d.%03d.%03d" , arQuads[0], arQuads[1], arQuads[2], arQuads[3]); |
592 | |
593 | return; |
594 | } |
595 | |
596 | /* |
597 | * Routine: |
598 | * Purpose: |
599 | * Algorithm: |
600 | * Data Structures: |
601 | * |
602 | * Params: |
603 | * Returns: |
604 | * Called By: |
605 | * Calls: |
606 | * Assumptions: |
607 | * Side Effects: |
608 | * TODO: None |
609 | */ |
610 | int genrand_url(char *pDest, int nColumn) { |
611 | strcpy(pDest, "http://www.foo.com" ); |
612 | |
613 | return (0); |
614 | } |
615 | |
616 | /* |
617 | * Routine: |
618 | * Purpose: |
619 | * Algorithm: |
620 | * Data Structures: |
621 | * |
622 | * Params: |
623 | * Returns: |
624 | * Called By: |
625 | * Calls: |
626 | * Assumptions: |
627 | * Side Effects: |
628 | * TODO: None |
629 | */ |
630 | int setSeed(int nStream, int nValue) { |
631 | int nRetValue; |
632 | |
633 | nRetValue = Streams[nStream].nSeed; |
634 | Streams[nStream].nSeed = nValue; |
635 | |
636 | return (nRetValue); |
637 | } |
638 | |
639 | #ifdef TEST |
640 | main() { |
641 | printf("r_genrand:No test routine has been defined for this module\n" ); |
642 | |
643 | exit(0); |
644 | } |
645 | #endif /* TEST */ |
646 | |