| 1 | /* |
| 2 | * Legal Notice |
| 3 | * |
| 4 | * This document and associated source code (the "Work") is a part of a |
| 5 | * benchmark specification maintained by the TPC. |
| 6 | * |
| 7 | * The TPC reserves all right, title, and interest to the Work as provided |
| 8 | * under U.S. and international laws, including without limitation all patent |
| 9 | * and trademark rights therein. |
| 10 | * |
| 11 | * No Warranty |
| 12 | * |
| 13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
| 14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
| 15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
| 16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
| 17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
| 18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
| 20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
| 21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
| 22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
| 23 | * WITH REGARD TO THE WORK. |
| 24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
| 25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
| 26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
| 27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
| 28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
| 29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
| 30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
| 31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
| 32 | * |
| 33 | * Contributors: |
| 34 | * Gradient Systems |
| 35 | */ |
| 36 | #include "config.h" |
| 37 | #include "porting.h" |
| 38 | #include <stdio.h> |
| 39 | #include "tdefs.h" |
| 40 | #include "scd.h" |
| 41 | #include "tables.h" |
| 42 | #include "build_support.h" |
| 43 | #include "dist.h" |
| 44 | #include "scaling.h" |
| 45 | #include "genrand.h" |
| 46 | #include "constants.h" |
| 47 | #include "parallel.h" |
| 48 | #include "params.h" |
| 49 | #include "tdef_functions.h" |
| 50 | #include "permute.h" |
| 51 | |
| 52 | /* an array of the most recent business key for each table */ |
| 53 | char arBKeys[MAX_TABLE][17]; |
| 54 | |
| 55 | /* |
| 56 | * Routine: setSCDKey |
| 57 | * Purpose: handle the versioning and date stamps for slowly changing dimensions |
| 58 | * Algorithm: |
| 59 | * Data Structures: |
| 60 | * |
| 61 | * Params: 1 if there is a new id; 0 otherwise |
| 62 | * Returns: |
| 63 | * Called By: |
| 64 | * Calls: |
| 65 | * Assumptions: Table indexs (surrogate keys) are 1-based. This assures that the |
| 66 | *arBKeys[] entry for each table is initialized. Otherwise, parallel generation |
| 67 | *would be more difficult. Side Effects: |
| 68 | * TODO: None |
| 69 | */ |
| 70 | int setSCDKeys(int nColumnID, ds_key_t kIndex, char *szBKey, ds_key_t *pkBeginDateKey, ds_key_t *pkEndDateKey) { |
| 71 | int bNewBKey = 0, nModulo; |
| 72 | static int bInit = 0; |
| 73 | static ds_key_t jMinimumDataDate, jMaximumDataDate, jH1DataDate, jT1DataDate, jT2DataDate; |
| 74 | date_t dtTemp; |
| 75 | int nTableID; |
| 76 | |
| 77 | if (!bInit) { |
| 78 | strtodt(&dtTemp, DATA_START_DATE); |
| 79 | jMinimumDataDate = dtTemp.julian; |
| 80 | strtodt(&dtTemp, DATA_END_DATE); |
| 81 | jMaximumDataDate = dtTemp.julian; |
| 82 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
| 83 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
| 84 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
| 85 | jT2DataDate += jT1DataDate; |
| 86 | bInit = 1; |
| 87 | } |
| 88 | |
| 89 | nTableID = getTableFromColumn(nColumnID); |
| 90 | nModulo = (int)(kIndex % 6); |
| 91 | switch (nModulo) { |
| 92 | case 1: /* 1 revision */ |
| 93 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
| 94 | bNewBKey = 1; |
| 95 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
| 96 | *pkEndDateKey = -1; |
| 97 | break; |
| 98 | case 2: /* 1 of 2 revisions */ |
| 99 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
| 100 | bNewBKey = 1; |
| 101 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
| 102 | *pkEndDateKey = jH1DataDate - nTableID * 6; |
| 103 | break; |
| 104 | case 3: /* 2 of 2 revisions */ |
| 105 | mk_bkey(arBKeys[nTableID], kIndex - 1, nColumnID); |
| 106 | *pkBeginDateKey = jH1DataDate - nTableID * 6 + 1; |
| 107 | *pkEndDateKey = -1; |
| 108 | break; |
| 109 | case 4: /* 1 of 3 revisions */ |
| 110 | mk_bkey(arBKeys[nTableID], kIndex, nColumnID); |
| 111 | bNewBKey = 1; |
| 112 | *pkBeginDateKey = jMinimumDataDate - nTableID * 6; |
| 113 | *pkEndDateKey = jT1DataDate - nTableID * 6; |
| 114 | break; |
| 115 | case 5: /* 2 of 3 revisions */ |
| 116 | mk_bkey(arBKeys[nTableID], kIndex - 1, nColumnID); |
| 117 | *pkBeginDateKey = jT1DataDate - nTableID * 6 + 1; |
| 118 | *pkEndDateKey = jT2DataDate - nTableID * 6; |
| 119 | break; |
| 120 | case 0: /* 3 of 3 revisions */ |
| 121 | mk_bkey(arBKeys[nTableID], kIndex - 2, nColumnID); |
| 122 | *pkBeginDateKey = jT2DataDate - nTableID * 6 + 1; |
| 123 | *pkEndDateKey = -1; |
| 124 | break; |
| 125 | } |
| 126 | |
| 127 | /* can't have a revision in the future, per bug 114 */ |
| 128 | if (*pkEndDateKey > jMaximumDataDate) |
| 129 | *pkEndDateKey = -1; |
| 130 | |
| 131 | strcpy(szBKey, arBKeys[nTableID]); |
| 132 | |
| 133 | return (bNewBKey); |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * Routine: scd_join(int tbl, int col, ds_key_t jDate) |
| 138 | * Purpose: create joins to slowly changing dimensions |
| 139 | * Data Structures: |
| 140 | * |
| 141 | * Params: |
| 142 | * Returns: |
| 143 | * Called By: |
| 144 | * Calls: |
| 145 | * Assumptions: |
| 146 | * Side Effects: |
| 147 | * TODO: None |
| 148 | */ |
| 149 | ds_key_t scd_join(int tbl, int col, ds_key_t jDate) { |
| 150 | ds_key_t res, kRowcount; |
| 151 | static int bInit = 0, jMinimumDataDate, jMaximumDataDate, jH1DataDate, jT1DataDate, jT2DataDate; |
| 152 | date_t dtTemp; |
| 153 | |
| 154 | if (!bInit) { |
| 155 | strtodt(&dtTemp, DATA_START_DATE); |
| 156 | jMinimumDataDate = dtTemp.julian; |
| 157 | strtodt(&dtTemp, DATA_END_DATE); |
| 158 | jMaximumDataDate = dtTemp.julian; |
| 159 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
| 160 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
| 161 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
| 162 | jT2DataDate += jT1DataDate; |
| 163 | bInit = 1; |
| 164 | } |
| 165 | |
| 166 | kRowcount = getIDCount(tbl); |
| 167 | genrand_key(&res, DIST_UNIFORM, 1, kRowcount, 0, col); /* pick the id */ |
| 168 | res = matchSCDSK(res, jDate, tbl); /* map to the date-sensitive surrogate key */ |
| 169 | |
| 170 | /* can't have a revision in the future, per bug 114 */ |
| 171 | if (jDate > jMaximumDataDate) |
| 172 | res = -1; |
| 173 | |
| 174 | return ((res > get_rowcount(tbl)) ? -1 : res); |
| 175 | } |
| 176 | |
| 177 | /* |
| 178 | * Routine: |
| 179 | * Purpose: |
| 180 | * Algorithm: |
| 181 | * Data Structures: |
| 182 | * |
| 183 | * Params: |
| 184 | * Returns: |
| 185 | * Called By: |
| 186 | * Calls: |
| 187 | * Assumptions: |
| 188 | * Side Effects: |
| 189 | * TODO: None |
| 190 | */ |
| 191 | ds_key_t matchSCDSK(ds_key_t kUnique, ds_key_t jDate, int nTable) { |
| 192 | ds_key_t kReturn = -1; |
| 193 | static int bInit = 0; |
| 194 | int jMinimumDataDate, jMaximumDataDate; |
| 195 | static int jH1DataDate, jT1DataDate, jT2DataDate; |
| 196 | date_t dtTemp; |
| 197 | |
| 198 | if (!bInit) { |
| 199 | strtodt(&dtTemp, DATA_START_DATE); |
| 200 | jMinimumDataDate = dtTemp.julian; |
| 201 | strtodt(&dtTemp, DATA_END_DATE); |
| 202 | jMaximumDataDate = dtTemp.julian; |
| 203 | jH1DataDate = jMinimumDataDate + (jMaximumDataDate - jMinimumDataDate) / 2; |
| 204 | jT2DataDate = (jMaximumDataDate - jMinimumDataDate) / 3; |
| 205 | jT1DataDate = jMinimumDataDate + jT2DataDate; |
| 206 | jT2DataDate += jT1DataDate; |
| 207 | bInit = 1; |
| 208 | } |
| 209 | |
| 210 | switch (kUnique % 3) /* number of revisions for the ID */ |
| 211 | { |
| 212 | case 1: /* only one occurrence of this ID */ |
| 213 | kReturn = (kUnique / 3) * 6; |
| 214 | kReturn += 1; |
| 215 | break; |
| 216 | case 2: /* two revisions of this ID */ |
| 217 | kReturn = (kUnique / 3) * 6; |
| 218 | kReturn += 2; |
| 219 | if (jDate > jH1DataDate) |
| 220 | kReturn += 1; |
| 221 | break; |
| 222 | case 0: /* three revisions of this ID */ |
| 223 | kReturn = (kUnique / 3) * 6; |
| 224 | kReturn += -2; |
| 225 | if (jDate > jT1DataDate) |
| 226 | kReturn += 1; |
| 227 | if (jDate > jT2DataDate) |
| 228 | kReturn += 1; |
| 229 | break; |
| 230 | } |
| 231 | |
| 232 | if (kReturn > get_rowcount(nTable)) |
| 233 | kReturn = get_rowcount(nTable); |
| 234 | |
| 235 | return (kReturn); |
| 236 | } |
| 237 | |
| 238 | /* |
| 239 | * Routine: |
| 240 | * Purpose: map from a unique ID to a random SK |
| 241 | * Algorithm: |
| 242 | * Data Structures: |
| 243 | * |
| 244 | * Params: |
| 245 | * Returns: |
| 246 | * Called By: |
| 247 | * Calls: |
| 248 | * Assumptions: |
| 249 | * Side Effects: |
| 250 | * TODO: None |
| 251 | */ |
| 252 | ds_key_t getSKFromID(ds_key_t kID, int nColumn) { |
| 253 | ds_key_t kTemp = -1; |
| 254 | |
| 255 | switch (kID % 3) { |
| 256 | case 1: /* single revision */ |
| 257 | kTemp = kID / 3; |
| 258 | kTemp *= 6; |
| 259 | kTemp += 1; |
| 260 | break; |
| 261 | case 2: /* two revisions */ |
| 262 | kTemp = kID / 3; |
| 263 | kTemp *= 6; |
| 264 | kTemp += genrand_integer(NULL, DIST_UNIFORM, 2, 3, 0, nColumn); |
| 265 | break; |
| 266 | case 0: /* three revisions */ |
| 267 | kTemp = kID / 3; |
| 268 | kTemp -= 1; |
| 269 | kTemp *= 6; |
| 270 | kTemp += genrand_integer(NULL, DIST_UNIFORM, 4, 6, 0, nColumn); |
| 271 | break; |
| 272 | } |
| 273 | |
| 274 | return (kTemp); |
| 275 | } |
| 276 | |
| 277 | /* |
| 278 | * Routine: getFirstSK |
| 279 | * Purpose: map from id to an SK that can be mapped back to an id by printID() |
| 280 | * Algorithm: |
| 281 | * Data Structures: |
| 282 | * |
| 283 | * Params: |
| 284 | * Returns: |
| 285 | * Called By: |
| 286 | * Calls: |
| 287 | * Assumptions: |
| 288 | * Side Effects: |
| 289 | * TODO: None |
| 290 | */ |
| 291 | ds_key_t getFirstSK(ds_key_t kID) { |
| 292 | ds_key_t kTemp = -1; |
| 293 | |
| 294 | switch (kID % 3) { |
| 295 | case 1: /* single revision */ |
| 296 | kTemp = kID / 3; |
| 297 | kTemp *= 6; |
| 298 | kTemp += 1; |
| 299 | break; |
| 300 | case 2: /* two revisions */ |
| 301 | kTemp = kID / 3; |
| 302 | kTemp *= 6; |
| 303 | kTemp += 2; |
| 304 | break; |
| 305 | case 0: /* three revisions */ |
| 306 | kTemp = kID / 3; |
| 307 | kTemp -= 1; |
| 308 | kTemp *= 6; |
| 309 | kTemp += 4; |
| 310 | break; |
| 311 | } |
| 312 | |
| 313 | return (kTemp); |
| 314 | } |
| 315 | |
| 316 | /* |
| 317 | * Routine: |
| 318 | * Purpose: |
| 319 | * Algorithm: |
| 320 | * Data Structures: |
| 321 | * |
| 322 | * Params: |
| 323 | * Returns: |
| 324 | * Called By: |
| 325 | * Calls: |
| 326 | * Assumptions: |
| 327 | * Side Effects: |
| 328 | * TODO: None |
| 329 | */ |
| 330 | void changeSCD(int nDataType, void *pNewData, void *pOldData, int *nFlags, int bFirst) { |
| 331 | |
| 332 | /** |
| 333 | * if nFlags is odd, then this value will be retained |
| 334 | */ |
| 335 | if ((*nFlags != ((*nFlags / 2) * 2)) && (bFirst == 0)) { |
| 336 | |
| 337 | /* |
| 338 | * the method to retain the old value depends on the data type |
| 339 | */ |
| 340 | switch (nDataType) { |
| 341 | case SCD_INT: |
| 342 | *(int *)pNewData = *(int *)pOldData; |
| 343 | break; |
| 344 | case SCD_PTR: |
| 345 | pNewData = pOldData; |
| 346 | break; |
| 347 | case SCD_KEY: |
| 348 | *(ds_key_t *)pNewData = *(ds_key_t *)pOldData; |
| 349 | break; |
| 350 | case SCD_CHAR: |
| 351 | strcpy((char *)pNewData, (char *)pOldData); |
| 352 | break; |
| 353 | case SCD_DEC: |
| 354 | memcpy(pNewData, pOldData, sizeof(decimal_t)); |
| 355 | break; |
| 356 | } |
| 357 | } else { |
| 358 | |
| 359 | /* |
| 360 | * the method to set the old value depends on the data type |
| 361 | */ |
| 362 | switch (nDataType) { |
| 363 | case SCD_INT: |
| 364 | *(int *)pOldData = *(int *)pNewData; |
| 365 | break; |
| 366 | case SCD_PTR: |
| 367 | pOldData = pNewData; |
| 368 | break; |
| 369 | case SCD_KEY: |
| 370 | *(ds_key_t *)pOldData = *(ds_key_t *)pNewData; |
| 371 | break; |
| 372 | case SCD_CHAR: |
| 373 | strcpy((char *)pOldData, (char *)pNewData); |
| 374 | break; |
| 375 | case SCD_DEC: |
| 376 | memcpy(pOldData, pNewData, sizeof(decimal_t)); |
| 377 | break; |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | *nFlags /= 2; |
| 382 | |
| 383 | return; |
| 384 | } |
| 385 | |