| 1 | /* |
| 2 | * Legal Notice |
| 3 | * |
| 4 | * This document and associated source code (the "Work") is a part of a |
| 5 | * benchmark specification maintained by the TPC. |
| 6 | * |
| 7 | * The TPC reserves all right, title, and interest to the Work as provided |
| 8 | * under U.S. and international laws, including without limitation all patent |
| 9 | * and trademark rights therein. |
| 10 | * |
| 11 | * No Warranty |
| 12 | * |
| 13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
| 14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
| 15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
| 16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
| 17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
| 18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
| 20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
| 21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
| 22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
| 23 | * WITH REGARD TO THE WORK. |
| 24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
| 25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
| 26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
| 27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
| 28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
| 29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
| 30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
| 31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
| 32 | * |
| 33 | * Contributors: |
| 34 | * Gradient Systems |
| 35 | */ |
| 36 | #include "config.h" |
| 37 | #include "porting.h" |
| 38 | #include <stdio.h> |
| 39 | #ifndef WIN32 |
| 40 | #include <netinet/in.h> |
| 41 | #endif |
| 42 | #include <math.h> |
| 43 | #include "decimal.h" |
| 44 | #include "constants.h" |
| 45 | #include "dist.h" |
| 46 | #include "r_params.h" |
| 47 | #include "genrand.h" |
| 48 | #include "tdefs.h" |
| 49 | #include "tables.h" |
| 50 | #include "build_support.h" |
| 51 | #include "genrand.h" |
| 52 | #include "columns.h" |
| 53 | #include "StringBuffer.h" |
| 54 | #include "error_msg.h" |
| 55 | #include "scaling.h" |
| 56 | |
| 57 | /* |
| 58 | * Routine: hierarchy_item |
| 59 | * Purpose: |
| 60 | * select the hierarchy entry for this level |
| 61 | * Algorithm: Assumes a top-down ordering |
| 62 | * Data Structures: |
| 63 | * |
| 64 | * Params: |
| 65 | * Returns: |
| 66 | * Called By: |
| 67 | * Calls: |
| 68 | * Assumptions: |
| 69 | * Side Effects: |
| 70 | * TODO: |
| 71 | */ |
| 72 | void hierarchy_item(int h_level, ds_key_t *id, char **name, ds_key_t kIndex) { |
| 73 | static int bInit = 0, nLastCategory = -1, nLastClass = -1, nBrandBase; |
| 74 | int nBrandCount; |
| 75 | static char *szClassDistName = NULL; |
| 76 | char sTemp[6]; |
| 77 | |
| 78 | if (!bInit) { |
| 79 | bInit = 1; |
| 80 | } |
| 81 | |
| 82 | switch (h_level) { |
| 83 | case I_CATEGORY: |
| 84 | nLastCategory = pick_distribution(name, "categories" , 1, 1, h_level); |
| 85 | *id = nLastCategory; |
| 86 | nBrandBase = nLastCategory; |
| 87 | nLastClass = -1; |
| 88 | break; |
| 89 | case I_CLASS: |
| 90 | if (nLastCategory == -1) |
| 91 | ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_CLASS before I_CATEGORY" , 1); |
| 92 | dist_member(&szClassDistName, "categories" , nLastCategory, 2); |
| 93 | nLastClass = pick_distribution(name, szClassDistName, 1, 1, h_level); |
| 94 | nLastCategory = -1; |
| 95 | *id = nLastClass; |
| 96 | break; |
| 97 | case I_BRAND: |
| 98 | if (nLastClass == -1) |
| 99 | ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_BRAND before I_CLASS" , 1); |
| 100 | dist_member(&nBrandCount, szClassDistName, nLastClass, 2); |
| 101 | *id = kIndex % nBrandCount + 1; |
| 102 | mk_word(*name, "brand_syllables" , nBrandBase * 10 + nLastClass, 45, I_BRAND); |
| 103 | sprintf(sTemp, " #%d" , (int)*id); |
| 104 | strcat(*name, sTemp); |
| 105 | *id += (nBrandBase * 1000 + nLastClass) * 1000; |
| 106 | break; |
| 107 | default: |
| 108 | printf("ERROR: Invalid call to hierarchy_item with argument '%d'\n" , h_level); |
| 109 | exit(1); |
| 110 | } |
| 111 | |
| 112 | return; |
| 113 | } |
| 114 | |
| 115 | /* |
| 116 | * Routine: mk_companyname() |
| 117 | * Purpose: |
| 118 | * yet another member of a set of routines used for address creation |
| 119 | * Algorithm: |
| 120 | * create a hash, based on an index value, so that the same result can be |
| 121 | *derived reliably and then build a word from a syllable set Data Structures: |
| 122 | * |
| 123 | * Params: |
| 124 | * char * dest: target for resulting name |
| 125 | * int nTable: to allow differing distributions |
| 126 | * int nCompany: index value |
| 127 | * Returns: |
| 128 | * Called By: |
| 129 | * Calls: |
| 130 | * Assumptions: |
| 131 | * Side Effects: |
| 132 | * TODO: |
| 133 | * 20010615 JMS return code is meaningless |
| 134 | * 20030422 JMS should be replaced if there is no per-table variation |
| 135 | */ |
| 136 | int mk_companyname(char *dest, int nTable, int nCompany) { |
| 137 | mk_word(dest, "syllables" , nCompany, 10, CC_COMPANY_NAME); |
| 138 | |
| 139 | return (0); |
| 140 | } |
| 141 | |
| 142 | /* |
| 143 | * Routine: set_locale() |
| 144 | * Purpose: |
| 145 | * generate a reasonable lattitude and longitude based on a region and the USGS |
| 146 | *data on 3500 counties in the US Algorithm: Data Structures: |
| 147 | * |
| 148 | * Params: |
| 149 | * Returns: |
| 150 | * Called By: |
| 151 | * Calls: |
| 152 | * Assumptions: |
| 153 | * Side Effects: |
| 154 | * TODO: 20011230 JMS set_locale() is just a placeholder; do we need geographic |
| 155 | *coords? |
| 156 | */ |
| 157 | int set_locale(int nRegion, decimal_t *longitude, decimal_t *latitude) { |
| 158 | static int init = 0; |
| 159 | static decimal_t dZero; |
| 160 | |
| 161 | if (!init) { |
| 162 | strtodec(&dZero, "0.00" ); |
| 163 | init = 1; |
| 164 | } |
| 165 | |
| 166 | memcpy(longitude, &dZero, sizeof(decimal_t)); |
| 167 | memcpy(latitude, &dZero, sizeof(decimal_t)); |
| 168 | |
| 169 | return (0); |
| 170 | } |
| 171 | |
| 172 | /* |
| 173 | * Routine: |
| 174 | * Purpose: |
| 175 | * Algorithm: |
| 176 | * Data Structures: |
| 177 | * |
| 178 | * Params: |
| 179 | * Returns: |
| 180 | * Called By: |
| 181 | * Calls: |
| 182 | * Assumptions: |
| 183 | * Side Effects: |
| 184 | * TODO: None |
| 185 | */ |
| 186 | void bitmap_to_dist(void *pDest, char *distname, ds_key_t *modulus, int vset, int stream) { |
| 187 | int32_t m, s; |
| 188 | char msg[80]; |
| 189 | |
| 190 | if ((s = distsize(distname)) == -1) { |
| 191 | sprintf(msg, "Invalid distribution name '%s'" , distname); |
| 192 | INTERNAL(msg); |
| 193 | } |
| 194 | m = (int32_t)((*modulus % s) + 1); |
| 195 | *modulus /= s; |
| 196 | |
| 197 | dist_member(pDest, distname, m, vset); |
| 198 | |
| 199 | return; |
| 200 | } |
| 201 | |
| 202 | /* |
| 203 | * Routine: void dist_to_bitmap(int *pDest, char *szDistName, int nValueSet, int |
| 204 | * nWeightSet, int nStream) Purpose: Reverse engineer a composite key based on |
| 205 | * distributions Algorithm: Data Structures: |
| 206 | * |
| 207 | * Params: |
| 208 | * Returns: |
| 209 | * Called By: |
| 210 | * Calls: |
| 211 | * Assumptions: |
| 212 | * Side Effects: |
| 213 | * TODO: None |
| 214 | */ |
| 215 | void dist_to_bitmap(int *pDest, char *szDistName, int nValue, int nWeight, int nStream) { |
| 216 | *pDest *= distsize(szDistName); |
| 217 | *pDest += pick_distribution(NULL, szDistName, nValue, nWeight, nStream); |
| 218 | |
| 219 | return; |
| 220 | } |
| 221 | |
| 222 | /* |
| 223 | * Routine: void random_to_bitmap(int *pDest, int nDist, int nMin, int nMax, int |
| 224 | * nMean, int nStream) Purpose: Reverse engineer a composite key based on an |
| 225 | * integer range Algorithm: Data Structures: |
| 226 | * |
| 227 | * Params: |
| 228 | * Returns: |
| 229 | * Called By: |
| 230 | * Calls: |
| 231 | * Assumptions: |
| 232 | * Side Effects: |
| 233 | * TODO: None |
| 234 | */ |
| 235 | void random_to_bitmap(int *pDest, int nDist, int nMin, int nMax, int nMean, int nStream) { |
| 236 | *pDest *= nMax; |
| 237 | *pDest += genrand_integer(NULL, nDist, nMin, nMax, nMean, nStream); |
| 238 | |
| 239 | return; |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | * Routine: mk_word() |
| 244 | * Purpose: |
| 245 | * generate a gibberish word from a given syllable set |
| 246 | * Algorithm: |
| 247 | * Data Structures: |
| 248 | * |
| 249 | * Params: |
| 250 | * Returns: |
| 251 | * Called By: |
| 252 | * Calls: |
| 253 | * Assumptions: |
| 254 | * Side Effects: |
| 255 | * TODO: |
| 256 | */ |
| 257 | void mk_word(char *dest, char *syl_set, ds_key_t src, int char_cnt, int col) { |
| 258 | ds_key_t i = src, nSyllableCount; |
| 259 | char *cp; |
| 260 | |
| 261 | *dest = '\0'; |
| 262 | while (i > 0) { |
| 263 | nSyllableCount = distsize(syl_set); |
| 264 | dist_member(&cp, syl_set, (int)(i % nSyllableCount) + 1, 1); |
| 265 | i /= nSyllableCount; |
| 266 | if ((int)(strlen(dest) + strlen(cp)) <= char_cnt) |
| 267 | strcat(dest, cp); |
| 268 | else |
| 269 | break; |
| 270 | } |
| 271 | |
| 272 | return; |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | * Routine: mk_surrogate() |
| 277 | * Purpose: create a character based surrogate key from a 64-bit value |
| 278 | * Algorithm: since the RNG routines produce a 32bit value, and surrogate keys |
| 279 | *can reach beyond that, use the RNG output to generate the lower end of a |
| 280 | *random string, and build the upper end from a ds_key_t Data Structures: |
| 281 | * |
| 282 | * Params: |
| 283 | * Returns: |
| 284 | * Called By: |
| 285 | * Calls: ltoc() |
| 286 | * Assumptions: output is a 16 character string. Space is not checked |
| 287 | * Side Effects: |
| 288 | * TODO: |
| 289 | * 20020830 jms may need to define a 64-bit form of htonl() for portable shift |
| 290 | *operations |
| 291 | */ |
| 292 | static char szXlate[16] = "ABCDEFGHIJKLMNOP" ; |
| 293 | static void ltoc(char *szDest, unsigned long nVal) { |
| 294 | int i; |
| 295 | char c; |
| 296 | |
| 297 | for (i = 0; i < 8; i++) { |
| 298 | c = szXlate[(nVal & 0xF)]; |
| 299 | *szDest++ = c; |
| 300 | nVal >>= 4; |
| 301 | } |
| 302 | *szDest = '\0'; |
| 303 | } |
| 304 | |
| 305 | void mk_bkey(char *szDest, ds_key_t kPrimary, int nStream) { |
| 306 | unsigned long nTemp; |
| 307 | |
| 308 | nTemp = (unsigned long)(kPrimary >> 32); |
| 309 | ltoc(szDest, nTemp); |
| 310 | |
| 311 | nTemp = (unsigned long)(kPrimary & 0xFFFFFFFF); |
| 312 | ltoc(szDest + 8, nTemp); |
| 313 | |
| 314 | return; |
| 315 | } |
| 316 | |
| 317 | /* |
| 318 | * Routine: embed_string(char *szDest, char *szDist, int nValue, int nWeight, |
| 319 | * int nStream) Purpose: Algorithm: Data Structures: |
| 320 | * |
| 321 | * Params: |
| 322 | * Returns: |
| 323 | * Called By: |
| 324 | * Calls: |
| 325 | * Assumptions: |
| 326 | * Side Effects: |
| 327 | * TODO: None |
| 328 | */ |
| 329 | int embed_string(char *szDest, char *szDist, int nValue, int nWeight, int nStream) { |
| 330 | int nPosition; |
| 331 | char *szWord = NULL; |
| 332 | |
| 333 | pick_distribution(&szWord, szDist, nValue, nWeight, nStream); |
| 334 | nPosition = genrand_integer(NULL, DIST_UNIFORM, 0, strlen(szDest) - strlen(szWord) - 1, 0, nStream); |
| 335 | memcpy(&szDest[nPosition], szWord, sizeof(char) * strlen(szWord)); |
| 336 | |
| 337 | return (0); |
| 338 | } |
| 339 | |
| 340 | /* |
| 341 | * Routine: set_scale() |
| 342 | * Purpose: link SCALE and SCALE_INDEX |
| 343 | * Algorithm: |
| 344 | * Data Structures: |
| 345 | * |
| 346 | * Params: |
| 347 | * Returns: |
| 348 | * Called By: |
| 349 | * Calls: |
| 350 | * Assumptions: |
| 351 | * Side Effects: |
| 352 | * TODO: None |
| 353 | */ |
| 354 | int SetScaleIndex(char *szName, char *szValue) { |
| 355 | int nScale; |
| 356 | char szScale[2]; |
| 357 | |
| 358 | if ((nScale = atoi(szValue)) == 0) |
| 359 | nScale = 1; |
| 360 | |
| 361 | nScale = 1 + (int)log10(nScale); |
| 362 | szScale[0] = '0' + nScale; |
| 363 | szScale[1] = '\0'; |
| 364 | |
| 365 | set_int("_SCALE_INDEX" , szScale); |
| 366 | |
| 367 | return (atoi(szValue)); |
| 368 | } |
| 369 | |
| 370 | /* |
| 371 | * Routine: adjust the valid date window for source schema tables, based on |
| 372 | * based on the update count, update window size, etc. |
| 373 | * Purpose: |
| 374 | * Algorithm: |
| 375 | * Data Structures: |
| 376 | * |
| 377 | * Params: |
| 378 | * Returns: |
| 379 | * Called By: |
| 380 | * Calls: |
| 381 | * Assumptions: |
| 382 | * Side Effects: |
| 383 | * TODO: None |
| 384 | */ |
| 385 | void setUpdateDateRange(int nTable, date_t *pMinDate, date_t *pMaxDate) { |
| 386 | static int nUpdateNumber, bInit = 0; |
| 387 | |
| 388 | if (!bInit) { |
| 389 | nUpdateNumber = get_int("UPDATE" ); |
| 390 | bInit = 1; |
| 391 | } |
| 392 | |
| 393 | switch (nTable) /* no per-table changes at the moment; but could be */ |
| 394 | { |
| 395 | default: |
| 396 | strtodt(pMinDate, WAREHOUSE_LOAD_DATE); |
| 397 | pMinDate->julian += UPDATE_INTERVAL * (nUpdateNumber - 1); |
| 398 | jtodt(pMinDate, pMinDate->julian); |
| 399 | jtodt(pMaxDate, pMinDate->julian + UPDATE_INTERVAL); |
| 400 | break; |
| 401 | } |
| 402 | |
| 403 | return; |
| 404 | } |
| 405 | |