1 | /* |
2 | * Legal Notice |
3 | * |
4 | * This document and associated source code (the "Work") is a part of a |
5 | * benchmark specification maintained by the TPC. |
6 | * |
7 | * The TPC reserves all right, title, and interest to the Work as provided |
8 | * under U.S. and international laws, including without limitation all patent |
9 | * and trademark rights therein. |
10 | * |
11 | * No Warranty |
12 | * |
13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
23 | * WITH REGARD TO THE WORK. |
24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
32 | * |
33 | * Contributors: |
34 | * Gradient Systems |
35 | */ |
36 | #include "config.h" |
37 | #include "porting.h" |
38 | #include <stdio.h> |
39 | #include "tdefs.h" |
40 | #include "scd.h" |
41 | #include "tables.h" |
42 | #include "build_support.h" |
43 | #include "dist.h" |
44 | #include "scaling.h" |
45 | #include "genrand.h" |
46 | #include "constants.h" |
47 | #include "parallel.h" |
48 | #include "params.h" |
49 | #include "tdef_functions.h" |
50 | #include "permute.h" |
51 | |
52 | /* an array of the most recent business key for each table */ |
53 | char arBKeys[MAX_TABLE][17]; |
54 | |
55 | /* |
56 | * Routine: setSCDKey |
57 | * Purpose: handle the versioning and date stamps for slowly changing dimensions |
58 | * Algorithm: |
59 | * Data Structures: |
60 | * |
61 | * Params: 1 if there is a new id; 0 otherwise |
62 | * Returns: |
63 | * Called By: |
64 | * Calls: |
65 | * Assumptions: Table indexs (surrogate keys) are 1-based. This assures that the |
66 | *arBKeys[] entry for each table is initialized. Otherwise, parallel generation |
67 | *would be more difficult. Side Effects: |
68 | * TODO: None |
69 | */ |
70 | int setSCDKeys(int nColumnID, ds_key_t kIndex, char *szBKey, ds_key_t *pkBeginDateKey, ds_key_t *pkEndDateKey) { |
71 | int bNewBKey = 0, nModulo; |
72 | static int bInit = 0; |
73 | static ds_key_t jMinimumDataDate, jMaximumDataDate, jH1DataDate, jT1DataDate, jT2DataDate; |
74 | date_t dtTemp; |
75 | int nTableID; |
76 | |
77 | if (!bInit) { |
78 | strtodt(&dtTemp, DATA_START_DATE); |
79 | jMinimumDataDate = dtTemp.julian; |
80 | strtodt(&dtTemp, DATA_END_DATE); |
81 | jMaximumDataDate = dtTemp.julian; |
82 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
83 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
84 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
85 | jT2DataDate += jT1DataDate; |
86 | bInit = 1; |
87 | } |
88 | |
89 | nTableID = getTableFromColumn(nColumnID); |
90 | nModulo = (int)(kIndex % 6); |
91 | switch (nModulo) { |
92 | case 1: /* 1 revision */ |
93 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
94 | bNewBKey = 1; |
95 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
96 | *pkEndDateKey = -1; |
97 | break; |
98 | case 2: /* 1 of 2 revisions */ |
99 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
100 | bNewBKey = 1; |
101 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
102 | *pkEndDateKey = jH1DataDate - nTableID * 6; |
103 | break; |
104 | case 3: /* 2 of 2 revisions */ |
105 | mk_bkey(arBKeys[nTableID], kIndex - 1, nColumnID); |
106 | *pkBeginDateKey = jH1DataDate - nTableID * 6 + 1; |
107 | *pkEndDateKey = -1; |
108 | break; |
109 | case 4: /* 1 of 3 revisions */ |
110 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
111 | bNewBKey = 1; |
112 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
113 | *pkEndDateKey = jT1DataDate - nTableID * 6; |
114 | break; |
115 | case 5: /* 2 of 3 revisions */ |
116 | mk_bkey(arBKeys[nTableID], kIndex - 1, nColumnID); |
117 | *pkBeginDateKey = jT1DataDate - nTableID * 6 + 1; |
118 | *pkEndDateKey = jT2DataDate - nTableID * 6; |
119 | break; |
120 | case 0: /* 3 of 3 revisions */ |
121 | mk_bkey(arBKeys[nTableID], kIndex - 2, nColumnID); |
122 | *pkBeginDateKey = jT2DataDate - nTableID * 6 + 1; |
123 | *pkEndDateKey = -1; |
124 | break; |
125 | } |
126 | |
127 | /* can't have a revision in the future, per bug 114 */ |
128 | if (*pkEndDateKey > jMaximumDataDate) |
129 | *pkEndDateKey = -1; |
130 | |
131 | strcpy(szBKey, arBKeys[nTableID]); |
132 | |
133 | return (bNewBKey); |
134 | } |
135 | |
136 | /* |
137 | * Routine: scd_join(int tbl, int col, ds_key_t jDate) |
138 | * Purpose: create joins to slowly changing dimensions |
139 | * Data Structures: |
140 | * |
141 | * Params: |
142 | * Returns: |
143 | * Called By: |
144 | * Calls: |
145 | * Assumptions: |
146 | * Side Effects: |
147 | * TODO: None |
148 | */ |
149 | ds_key_t scd_join(int tbl, int col, ds_key_t jDate) { |
150 | ds_key_t res, kRowcount; |
151 | static int bInit = 0, jMinimumDataDate, jMaximumDataDate, jH1DataDate, jT1DataDate, jT2DataDate; |
152 | date_t dtTemp; |
153 | |
154 | if (!bInit) { |
155 | strtodt(&dtTemp, DATA_START_DATE); |
156 | jMinimumDataDate = dtTemp.julian; |
157 | strtodt(&dtTemp, DATA_END_DATE); |
158 | jMaximumDataDate = dtTemp.julian; |
159 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
160 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
161 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
162 | jT2DataDate += jT1DataDate; |
163 | bInit = 1; |
164 | } |
165 | |
166 | kRowcount = getIDCount(tbl); |
167 | genrand_key(&res, DIST_UNIFORM, 1, kRowcount, 0, col); /* pick the id */ |
168 | res = matchSCDSK(res, jDate, tbl); /* map to the date-sensitive surrogate key */ |
169 | |
170 | /* can't have a revision in the future, per bug 114 */ |
171 | if (jDate > jMaximumDataDate) |
172 | res = -1; |
173 | |
174 | return ((res > get_rowcount(tbl)) ? -1 : res); |
175 | } |
176 | |
177 | /* |
178 | * Routine: |
179 | * Purpose: |
180 | * Algorithm: |
181 | * Data Structures: |
182 | * |
183 | * Params: |
184 | * Returns: |
185 | * Called By: |
186 | * Calls: |
187 | * Assumptions: |
188 | * Side Effects: |
189 | * TODO: None |
190 | */ |
191 | ds_key_t matchSCDSK(ds_key_t kUnique, ds_key_t jDate, int nTable) { |
192 | ds_key_t kReturn = -1; |
193 | static int bInit = 0; |
194 | int jMinimumDataDate, jMaximumDataDate; |
195 | static int jH1DataDate, jT1DataDate, jT2DataDate; |
196 | date_t dtTemp; |
197 | |
198 | if (!bInit) { |
199 | strtodt(&dtTemp, DATA_START_DATE); |
200 | jMinimumDataDate = dtTemp.julian; |
201 | strtodt(&dtTemp, DATA_END_DATE); |
202 | jMaximumDataDate = dtTemp.julian; |
203 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
204 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
205 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
206 | jT2DataDate += jT1DataDate; |
207 | bInit = 1; |
208 | } |
209 | |
210 | switch (kUnique % 3) /* number of revisions for the ID */ |
211 | { |
212 | case 1: /* only one occurrence of this ID */ |
213 | kReturn = (kUnique / 3) * 6; |
214 | kReturn += 1; |
215 | break; |
216 | case 2: /* two revisions of this ID */ |
217 | kReturn = (kUnique / 3) * 6; |
218 | kReturn += 2; |
219 | if (jDate > jH1DataDate) |
220 | kReturn += 1; |
221 | break; |
222 | case 0: /* three revisions of this ID */ |
223 | kReturn = (kUnique / 3) * 6; |
224 | kReturn += -2; |
225 | if (jDate > jT1DataDate) |
226 | kReturn += 1; |
227 | if (jDate > jT2DataDate) |
228 | kReturn += 1; |
229 | break; |
230 | } |
231 | |
232 | if (kReturn > get_rowcount(nTable)) |
233 | kReturn = get_rowcount(nTable); |
234 | |
235 | return (kReturn); |
236 | } |
237 | |
238 | /* |
239 | * Routine: |
240 | * Purpose: map from a unique ID to a random SK |
241 | * Algorithm: |
242 | * Data Structures: |
243 | * |
244 | * Params: |
245 | * Returns: |
246 | * Called By: |
247 | * Calls: |
248 | * Assumptions: |
249 | * Side Effects: |
250 | * TODO: None |
251 | */ |
252 | ds_key_t getSKFromID(ds_key_t kID, int nColumn) { |
253 | ds_key_t kTemp = -1; |
254 | |
255 | switch (kID % 3) { |
256 | case 1: /* single revision */ |
257 | kTemp = kID / 3; |
258 | kTemp *= 6; |
259 | kTemp += 1; |
260 | break; |
261 | case 2: /* two revisions */ |
262 | kTemp = kID / 3; |
263 | kTemp *= 6; |
264 | kTemp += genrand_integer(NULL, DIST_UNIFORM, 2, 3, 0, nColumn); |
265 | break; |
266 | case 0: /* three revisions */ |
267 | kTemp = kID / 3; |
268 | kTemp -= 1; |
269 | kTemp *= 6; |
270 | kTemp += genrand_integer(NULL, DIST_UNIFORM, 4, 6, 0, nColumn); |
271 | break; |
272 | } |
273 | |
274 | return (kTemp); |
275 | } |
276 | |
277 | /* |
278 | * Routine: getFirstSK |
279 | * Purpose: map from id to an SK that can be mapped back to an id by printID() |
280 | * Algorithm: |
281 | * Data Structures: |
282 | * |
283 | * Params: |
284 | * Returns: |
285 | * Called By: |
286 | * Calls: |
287 | * Assumptions: |
288 | * Side Effects: |
289 | * TODO: None |
290 | */ |
291 | ds_key_t getFirstSK(ds_key_t kID) { |
292 | ds_key_t kTemp = -1; |
293 | |
294 | switch (kID % 3) { |
295 | case 1: /* single revision */ |
296 | kTemp = kID / 3; |
297 | kTemp *= 6; |
298 | kTemp += 1; |
299 | break; |
300 | case 2: /* two revisions */ |
301 | kTemp = kID / 3; |
302 | kTemp *= 6; |
303 | kTemp += 2; |
304 | break; |
305 | case 0: /* three revisions */ |
306 | kTemp = kID / 3; |
307 | kTemp -= 1; |
308 | kTemp *= 6; |
309 | kTemp += 4; |
310 | break; |
311 | } |
312 | |
313 | return (kTemp); |
314 | } |
315 | |
316 | /* |
317 | * Routine: |
318 | * Purpose: |
319 | * Algorithm: |
320 | * Data Structures: |
321 | * |
322 | * Params: |
323 | * Returns: |
324 | * Called By: |
325 | * Calls: |
326 | * Assumptions: |
327 | * Side Effects: |
328 | * TODO: None |
329 | */ |
330 | void changeSCD(int nDataType, void *pNewData, void *pOldData, int *nFlags, int bFirst) { |
331 | |
332 | /** |
333 | * if nFlags is odd, then this value will be retained |
334 | */ |
335 | if ((*nFlags != ((*nFlags / 2) * 2)) && (bFirst == 0)) { |
336 | |
337 | /* |
338 | * the method to retain the old value depends on the data type |
339 | */ |
340 | switch (nDataType) { |
341 | case SCD_INT: |
342 | *(int *)pNewData = *(int *)pOldData; |
343 | break; |
344 | case SCD_PTR: |
345 | pNewData = pOldData; |
346 | break; |
347 | case SCD_KEY: |
348 | *(ds_key_t *)pNewData = *(ds_key_t *)pOldData; |
349 | break; |
350 | case SCD_CHAR: |
351 | strcpy((char *)pNewData, (char *)pOldData); |
352 | break; |
353 | case SCD_DEC: |
354 | memcpy(pNewData, pOldData, sizeof(decimal_t)); |
355 | break; |
356 | } |
357 | } else { |
358 | |
359 | /* |
360 | * the method to set the old value depends on the data type |
361 | */ |
362 | switch (nDataType) { |
363 | case SCD_INT: |
364 | *(int *)pOldData = *(int *)pNewData; |
365 | break; |
366 | case SCD_PTR: |
367 | pOldData = pNewData; |
368 | break; |
369 | case SCD_KEY: |
370 | *(ds_key_t *)pOldData = *(ds_key_t *)pNewData; |
371 | break; |
372 | case SCD_CHAR: |
373 | strcpy((char *)pOldData, (char *)pNewData); |
374 | break; |
375 | case SCD_DEC: |
376 | memcpy(pOldData, pNewData, sizeof(decimal_t)); |
377 | break; |
378 | } |
379 | } |
380 | |
381 | *nFlags /= 2; |
382 | |
383 | return; |
384 | } |
385 | |