1 | /* |
2 | * Legal Notice |
3 | * |
4 | * This document and associated source code (the "Work") is a part of a |
5 | * benchmark specification maintained by the TPC. |
6 | * |
7 | * The TPC reserves all right, title, and interest to the Work as provided |
8 | * under U.S. and international laws, including without limitation all patent |
9 | * and trademark rights therein. |
10 | * |
11 | * No Warranty |
12 | * |
13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
23 | * WITH REGARD TO THE WORK. |
24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
32 | * |
33 | * Contributors: |
34 | * Gradient Systems |
35 | */ |
36 | #include "config.h" |
37 | #include "porting.h" |
38 | #include <stdio.h> |
39 | #ifndef WIN32 |
40 | #include <netinet/in.h> |
41 | #endif |
42 | #include <math.h> |
43 | #include "decimal.h" |
44 | #include "constants.h" |
45 | #include "dist.h" |
46 | #include "r_params.h" |
47 | #include "genrand.h" |
48 | #include "tdefs.h" |
49 | #include "tables.h" |
50 | #include "build_support.h" |
51 | #include "genrand.h" |
52 | #include "columns.h" |
53 | #include "StringBuffer.h" |
54 | #include "error_msg.h" |
55 | #include "scaling.h" |
56 | |
57 | /* |
58 | * Routine: hierarchy_item |
59 | * Purpose: |
60 | * select the hierarchy entry for this level |
61 | * Algorithm: Assumes a top-down ordering |
62 | * Data Structures: |
63 | * |
64 | * Params: |
65 | * Returns: |
66 | * Called By: |
67 | * Calls: |
68 | * Assumptions: |
69 | * Side Effects: |
70 | * TODO: |
71 | */ |
72 | void hierarchy_item(int h_level, ds_key_t *id, char **name, ds_key_t kIndex) { |
73 | static int bInit = 0, nLastCategory = -1, nLastClass = -1, nBrandBase; |
74 | int nBrandCount; |
75 | static char *szClassDistName = NULL; |
76 | char sTemp[6]; |
77 | |
78 | if (!bInit) { |
79 | bInit = 1; |
80 | } |
81 | |
82 | switch (h_level) { |
83 | case I_CATEGORY: |
84 | nLastCategory = pick_distribution(name, "categories" , 1, 1, h_level); |
85 | *id = nLastCategory; |
86 | nBrandBase = nLastCategory; |
87 | nLastClass = -1; |
88 | break; |
89 | case I_CLASS: |
90 | if (nLastCategory == -1) |
91 | ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_CLASS before I_CATEGORY" , 1); |
92 | dist_member(&szClassDistName, "categories" , nLastCategory, 2); |
93 | nLastClass = pick_distribution(name, szClassDistName, 1, 1, h_level); |
94 | nLastCategory = -1; |
95 | *id = nLastClass; |
96 | break; |
97 | case I_BRAND: |
98 | if (nLastClass == -1) |
99 | ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_BRAND before I_CLASS" , 1); |
100 | dist_member(&nBrandCount, szClassDistName, nLastClass, 2); |
101 | *id = kIndex % nBrandCount + 1; |
102 | mk_word(*name, "brand_syllables" , nBrandBase * 10 + nLastClass, 45, I_BRAND); |
103 | sprintf(sTemp, " #%d" , (int)*id); |
104 | strcat(*name, sTemp); |
105 | *id += (nBrandBase * 1000 + nLastClass) * 1000; |
106 | break; |
107 | default: |
108 | printf("ERROR: Invalid call to hierarchy_item with argument '%d'\n" , h_level); |
109 | exit(1); |
110 | } |
111 | |
112 | return; |
113 | } |
114 | |
115 | /* |
116 | * Routine: mk_companyname() |
117 | * Purpose: |
118 | * yet another member of a set of routines used for address creation |
119 | * Algorithm: |
120 | * create a hash, based on an index value, so that the same result can be |
121 | *derived reliably and then build a word from a syllable set Data Structures: |
122 | * |
123 | * Params: |
124 | * char * dest: target for resulting name |
125 | * int nTable: to allow differing distributions |
126 | * int nCompany: index value |
127 | * Returns: |
128 | * Called By: |
129 | * Calls: |
130 | * Assumptions: |
131 | * Side Effects: |
132 | * TODO: |
133 | * 20010615 JMS return code is meaningless |
134 | * 20030422 JMS should be replaced if there is no per-table variation |
135 | */ |
136 | int mk_companyname(char *dest, int nTable, int nCompany) { |
137 | mk_word(dest, "syllables" , nCompany, 10, CC_COMPANY_NAME); |
138 | |
139 | return (0); |
140 | } |
141 | |
142 | /* |
143 | * Routine: set_locale() |
144 | * Purpose: |
145 | * generate a reasonable lattitude and longitude based on a region and the USGS |
146 | *data on 3500 counties in the US Algorithm: Data Structures: |
147 | * |
148 | * Params: |
149 | * Returns: |
150 | * Called By: |
151 | * Calls: |
152 | * Assumptions: |
153 | * Side Effects: |
154 | * TODO: 20011230 JMS set_locale() is just a placeholder; do we need geographic |
155 | *coords? |
156 | */ |
157 | int set_locale(int nRegion, decimal_t *longitude, decimal_t *latitude) { |
158 | static int init = 0; |
159 | static decimal_t dZero; |
160 | |
161 | if (!init) { |
162 | strtodec(&dZero, "0.00" ); |
163 | init = 1; |
164 | } |
165 | |
166 | memcpy(longitude, &dZero, sizeof(decimal_t)); |
167 | memcpy(latitude, &dZero, sizeof(decimal_t)); |
168 | |
169 | return (0); |
170 | } |
171 | |
172 | /* |
173 | * Routine: |
174 | * Purpose: |
175 | * Algorithm: |
176 | * Data Structures: |
177 | * |
178 | * Params: |
179 | * Returns: |
180 | * Called By: |
181 | * Calls: |
182 | * Assumptions: |
183 | * Side Effects: |
184 | * TODO: None |
185 | */ |
186 | void bitmap_to_dist(void *pDest, char *distname, ds_key_t *modulus, int vset, int stream) { |
187 | int32_t m, s; |
188 | char msg[80]; |
189 | |
190 | if ((s = distsize(distname)) == -1) { |
191 | sprintf(msg, "Invalid distribution name '%s'" , distname); |
192 | INTERNAL(msg); |
193 | } |
194 | m = (int32_t)((*modulus % s) + 1); |
195 | *modulus /= s; |
196 | |
197 | dist_member(pDest, distname, m, vset); |
198 | |
199 | return; |
200 | } |
201 | |
202 | /* |
203 | * Routine: void dist_to_bitmap(int *pDest, char *szDistName, int nValueSet, int |
204 | * nWeightSet, int nStream) Purpose: Reverse engineer a composite key based on |
205 | * distributions Algorithm: Data Structures: |
206 | * |
207 | * Params: |
208 | * Returns: |
209 | * Called By: |
210 | * Calls: |
211 | * Assumptions: |
212 | * Side Effects: |
213 | * TODO: None |
214 | */ |
215 | void dist_to_bitmap(int *pDest, char *szDistName, int nValue, int nWeight, int nStream) { |
216 | *pDest *= distsize(szDistName); |
217 | *pDest += pick_distribution(NULL, szDistName, nValue, nWeight, nStream); |
218 | |
219 | return; |
220 | } |
221 | |
222 | /* |
223 | * Routine: void random_to_bitmap(int *pDest, int nDist, int nMin, int nMax, int |
224 | * nMean, int nStream) Purpose: Reverse engineer a composite key based on an |
225 | * integer range Algorithm: Data Structures: |
226 | * |
227 | * Params: |
228 | * Returns: |
229 | * Called By: |
230 | * Calls: |
231 | * Assumptions: |
232 | * Side Effects: |
233 | * TODO: None |
234 | */ |
235 | void random_to_bitmap(int *pDest, int nDist, int nMin, int nMax, int nMean, int nStream) { |
236 | *pDest *= nMax; |
237 | *pDest += genrand_integer(NULL, nDist, nMin, nMax, nMean, nStream); |
238 | |
239 | return; |
240 | } |
241 | |
242 | /* |
243 | * Routine: mk_word() |
244 | * Purpose: |
245 | * generate a gibberish word from a given syllable set |
246 | * Algorithm: |
247 | * Data Structures: |
248 | * |
249 | * Params: |
250 | * Returns: |
251 | * Called By: |
252 | * Calls: |
253 | * Assumptions: |
254 | * Side Effects: |
255 | * TODO: |
256 | */ |
257 | void mk_word(char *dest, char *syl_set, ds_key_t src, int char_cnt, int col) { |
258 | ds_key_t i = src, nSyllableCount; |
259 | char *cp; |
260 | |
261 | *dest = '\0'; |
262 | while (i > 0) { |
263 | nSyllableCount = distsize(syl_set); |
264 | dist_member(&cp, syl_set, (int)(i % nSyllableCount) + 1, 1); |
265 | i /= nSyllableCount; |
266 | if ((int)(strlen(dest) + strlen(cp)) <= char_cnt) |
267 | strcat(dest, cp); |
268 | else |
269 | break; |
270 | } |
271 | |
272 | return; |
273 | } |
274 | |
275 | /* |
276 | * Routine: mk_surrogate() |
277 | * Purpose: create a character based surrogate key from a 64-bit value |
278 | * Algorithm: since the RNG routines produce a 32bit value, and surrogate keys |
279 | *can reach beyond that, use the RNG output to generate the lower end of a |
280 | *random string, and build the upper end from a ds_key_t Data Structures: |
281 | * |
282 | * Params: |
283 | * Returns: |
284 | * Called By: |
285 | * Calls: ltoc() |
286 | * Assumptions: output is a 16 character string. Space is not checked |
287 | * Side Effects: |
288 | * TODO: |
289 | * 20020830 jms may need to define a 64-bit form of htonl() for portable shift |
290 | *operations |
291 | */ |
292 | static char szXlate[16] = "ABCDEFGHIJKLMNOP" ; |
293 | static void ltoc(char *szDest, unsigned long nVal) { |
294 | int i; |
295 | char c; |
296 | |
297 | for (i = 0; i < 8; i++) { |
298 | c = szXlate[(nVal & 0xF)]; |
299 | *szDest++ = c; |
300 | nVal >>= 4; |
301 | } |
302 | *szDest = '\0'; |
303 | } |
304 | |
305 | void mk_bkey(char *szDest, ds_key_t kPrimary, int nStream) { |
306 | unsigned long nTemp; |
307 | |
308 | nTemp = (unsigned long)(kPrimary >> 32); |
309 | ltoc(szDest, nTemp); |
310 | |
311 | nTemp = (unsigned long)(kPrimary & 0xFFFFFFFF); |
312 | ltoc(szDest + 8, nTemp); |
313 | |
314 | return; |
315 | } |
316 | |
317 | /* |
318 | * Routine: embed_string(char *szDest, char *szDist, int nValue, int nWeight, |
319 | * int nStream) Purpose: Algorithm: Data Structures: |
320 | * |
321 | * Params: |
322 | * Returns: |
323 | * Called By: |
324 | * Calls: |
325 | * Assumptions: |
326 | * Side Effects: |
327 | * TODO: None |
328 | */ |
329 | int embed_string(char *szDest, char *szDist, int nValue, int nWeight, int nStream) { |
330 | int nPosition; |
331 | char *szWord = NULL; |
332 | |
333 | pick_distribution(&szWord, szDist, nValue, nWeight, nStream); |
334 | nPosition = genrand_integer(NULL, DIST_UNIFORM, 0, strlen(szDest) - strlen(szWord) - 1, 0, nStream); |
335 | memcpy(&szDest[nPosition], szWord, sizeof(char) * strlen(szWord)); |
336 | |
337 | return (0); |
338 | } |
339 | |
340 | /* |
341 | * Routine: set_scale() |
342 | * Purpose: link SCALE and SCALE_INDEX |
343 | * Algorithm: |
344 | * Data Structures: |
345 | * |
346 | * Params: |
347 | * Returns: |
348 | * Called By: |
349 | * Calls: |
350 | * Assumptions: |
351 | * Side Effects: |
352 | * TODO: None |
353 | */ |
354 | int SetScaleIndex(char *szName, char *szValue) { |
355 | int nScale; |
356 | char szScale[2]; |
357 | |
358 | if ((nScale = atoi(szValue)) == 0) |
359 | nScale = 1; |
360 | |
361 | nScale = 1 + (int)log10(nScale); |
362 | szScale[0] = '0' + nScale; |
363 | szScale[1] = '\0'; |
364 | |
365 | set_int("_SCALE_INDEX" , szScale); |
366 | |
367 | return (atoi(szValue)); |
368 | } |
369 | |
370 | /* |
371 | * Routine: adjust the valid date window for source schema tables, based on |
372 | * based on the update count, update window size, etc. |
373 | * Purpose: |
374 | * Algorithm: |
375 | * Data Structures: |
376 | * |
377 | * Params: |
378 | * Returns: |
379 | * Called By: |
380 | * Calls: |
381 | * Assumptions: |
382 | * Side Effects: |
383 | * TODO: None |
384 | */ |
385 | void setUpdateDateRange(int nTable, date_t *pMinDate, date_t *pMaxDate) { |
386 | static int nUpdateNumber, bInit = 0; |
387 | |
388 | if (!bInit) { |
389 | nUpdateNumber = get_int("UPDATE" ); |
390 | bInit = 1; |
391 | } |
392 | |
393 | switch (nTable) /* no per-table changes at the moment; but could be */ |
394 | { |
395 | default: |
396 | strtodt(pMinDate, WAREHOUSE_LOAD_DATE); |
397 | pMinDate->julian += UPDATE_INTERVAL * (nUpdateNumber - 1); |
398 | jtodt(pMinDate, pMinDate->julian); |
399 | jtodt(pMaxDate, pMinDate->julian + UPDATE_INTERVAL); |
400 | break; |
401 | } |
402 | |
403 | return; |
404 | } |
405 | |