| 1 | /* |
| 2 | * Legal Notice |
| 3 | * |
| 4 | * This document and associated source code (the "Work") is a part of a |
| 5 | * benchmark specification maintained by the TPC. |
| 6 | * |
| 7 | * The TPC reserves all right, title, and interest to the Work as provided |
| 8 | * under U.S. and international laws, including without limitation all patent |
| 9 | * and trademark rights therein. |
| 10 | * |
| 11 | * No Warranty |
| 12 | * |
| 13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
| 14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
| 15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
| 16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
| 17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
| 18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
| 20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
| 21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
| 22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
| 23 | * WITH REGARD TO THE WORK. |
| 24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
| 25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
| 26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
| 27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
| 28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
| 29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
| 30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
| 31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
| 32 | * |
| 33 | * Contributors: |
| 34 | * Gradient Systems |
| 35 | */ |
| 36 | #include "config.h" |
| 37 | #include "porting.h" |
| 38 | #include <stdio.h> |
| 39 | #include "address.h" |
| 40 | #include "dist.h" |
| 41 | #include "r_params.h" |
| 42 | #include "genrand.h" |
| 43 | #include "columns.h" |
| 44 | #include "tables.h" |
| 45 | #include "tdefs.h" |
| 46 | #include "permute.h" |
| 47 | #include "scaling.h" |
| 48 | |
| 49 | static int s_nCountyCount = 0; |
| 50 | static int s_nCityCount = 0; |
| 51 | |
| 52 | void resetCountCount(void) { |
| 53 | s_nCountyCount = 0; |
| 54 | s_nCityCount = 0; |
| 55 | |
| 56 | return; |
| 57 | } |
| 58 | |
| 59 | /* |
| 60 | * Routine: |
| 61 | * Purpose: |
| 62 | * Algorithm: |
| 63 | * Data Structures: |
| 64 | * |
| 65 | * Params: |
| 66 | * Returns: |
| 67 | * Called By: |
| 68 | * Calls: |
| 69 | * Assumptions: |
| 70 | * Side Effects: |
| 71 | * TODO: None |
| 72 | */ |
| 73 | int mk_address(ds_addr_t *pAddr, int nColumn) { |
| 74 | int i, nRegion; |
| 75 | char *szZipPrefix, szAddr[100]; |
| 76 | static int nMaxCities, nMaxCounties, bInit = 0; |
| 77 | tdef *pTdef; |
| 78 | |
| 79 | if (!bInit) { |
| 80 | nMaxCities = (int)get_rowcount(ACTIVE_CITIES); |
| 81 | nMaxCounties = (int)get_rowcount(ACTIVE_COUNTIES); |
| 82 | bInit = 1; |
| 83 | } |
| 84 | |
| 85 | /* street_number is [1..1000] */ |
| 86 | genrand_integer(&pAddr->street_num, DIST_UNIFORM, 1, 1000, 0, nColumn); |
| 87 | |
| 88 | /* street names are picked from a distribution */ |
| 89 | pick_distribution(&pAddr->street_name1, "street_names" , 1, 1, nColumn); |
| 90 | pick_distribution(&pAddr->street_name2, "street_names" , 1, 2, nColumn); |
| 91 | |
| 92 | /* street type is picked from a distribution */ |
| 93 | pick_distribution(&pAddr->street_type, "street_type" , 1, 1, nColumn); |
| 94 | |
| 95 | /* suite number is alphabetic 50% of the time */ |
| 96 | genrand_integer(&i, DIST_UNIFORM, 1, 100, 0, nColumn); |
| 97 | if (i & 0x01) { |
| 98 | sprintf(pAddr->suite_num, "Suite %d" , (i >> 1) * 10); |
| 99 | } else { |
| 100 | sprintf(pAddr->suite_num, "Suite %c" , ((i >> 1) % 25) + 'A'); |
| 101 | } |
| 102 | |
| 103 | pTdef = getTdefsByNumber(getTableFromColumn(nColumn)); |
| 104 | |
| 105 | /* city is picked from a distribution which maps to large/medium/small */ |
| 106 | if (pTdef->flags & FL_SMALL) { |
| 107 | i = (int)get_rowcount(getTableFromColumn(nColumn)); |
| 108 | genrand_integer(&i, DIST_UNIFORM, 1, (nMaxCities > i) ? i : nMaxCities, 0, nColumn); |
| 109 | dist_member(&pAddr->city, "cities" , i, 1); |
| 110 | } else |
| 111 | pick_distribution(&pAddr->city, "cities" , 1, 6, nColumn); |
| 112 | |
| 113 | /* county is picked from a distribution, based on population and keys the |
| 114 | * rest */ |
| 115 | if (pTdef->flags & FL_SMALL) { |
| 116 | i = (int)get_rowcount(getTableFromColumn(nColumn)); |
| 117 | genrand_integer(&nRegion, DIST_UNIFORM, 1, (nMaxCounties > i) ? i : nMaxCounties, 0, nColumn); |
| 118 | dist_member(&pAddr->county, "fips_county" , nRegion, 2); |
| 119 | } else |
| 120 | nRegion = pick_distribution(&pAddr->county, "fips_county" , 2, 1, nColumn); |
| 121 | |
| 122 | /* match state with the selected region/county */ |
| 123 | dist_member(&pAddr->state, "fips_county" , nRegion, 3); |
| 124 | |
| 125 | /* match the zip prefix with the selected region/county */ |
| 126 | pAddr->zip = city_hash(0, pAddr->city); |
| 127 | /* 00000 - 00600 are unused. Avoid them */ |
| 128 | dist_member((void *)&szZipPrefix, "fips_county" , nRegion, 5); |
| 129 | if (!(szZipPrefix[0] - '0') && (pAddr->zip < 9400)) |
| 130 | pAddr->zip += 600; |
| 131 | pAddr->zip += (szZipPrefix[0] - '0') * 10000; |
| 132 | |
| 133 | sprintf(szAddr, "%d %s %s %s" , pAddr->street_num, pAddr->street_name1, pAddr->street_name2, pAddr->street_type); |
| 134 | pAddr->plus4 = city_hash(0, szAddr); |
| 135 | dist_member(&pAddr->gmt_offset, "fips_county" , nRegion, 6); |
| 136 | strcpy(pAddr->country, "United States" ); |
| 137 | |
| 138 | return (0); |
| 139 | } |
| 140 | |
| 141 | /* |
| 142 | * Routine: mk_streetnumber |
| 143 | * Purpose: |
| 144 | * one of a set of routines that creates addresses |
| 145 | * Algorithm: |
| 146 | * Data Structures: |
| 147 | * |
| 148 | * Params: |
| 149 | * nTable: target table (and, by extension, address) to allow differing |
| 150 | *distributions dest: destination for the random number Returns: Called By: |
| 151 | * Calls: |
| 152 | * Assumptions: |
| 153 | * Side Effects: |
| 154 | * TODO: 20030422 jms should be replaced if there is no table variation |
| 155 | */ |
| 156 | int mk_streetnumber(int nTable, int *dest) { |
| 157 | genrand_integer(dest, DIST_UNIFORM, 1, 1000, 0, nTable); |
| 158 | |
| 159 | return (0); |
| 160 | } |
| 161 | |
| 162 | /* |
| 163 | * Routine: mk_suitenumber() |
| 164 | * Purpose: |
| 165 | * one of a set of routines that creates addresses |
| 166 | * Algorithm: |
| 167 | * Data Structures: |
| 168 | * |
| 169 | * Params: |
| 170 | * nTable: target table (and, by extension, address) to allow differing |
| 171 | *distributions dest: destination for the random number Returns: Called By: |
| 172 | * Calls: |
| 173 | * Assumptions: |
| 174 | * Side Effects: |
| 175 | * TODO: 20010615 JMS return code is meaningless |
| 176 | */ |
| 177 | int mk_suitenumber(int nTable, char *dest) { |
| 178 | int i; |
| 179 | |
| 180 | genrand_integer(&i, DIST_UNIFORM, 1, 100, 0, nTable); |
| 181 | if (i <= 50) { |
| 182 | genrand_integer(&i, DIST_UNIFORM, 1, 1000, 0, nTable); |
| 183 | sprintf(dest, "Suite %d" , i); |
| 184 | } else { |
| 185 | genrand_integer(&i, DIST_UNIFORM, 0, 25, 0, nTable); |
| 186 | sprintf(dest, "Suite %c" , i + 'A'); |
| 187 | } |
| 188 | |
| 189 | return (0); |
| 190 | } |
| 191 | |
| 192 | /* |
| 193 | * Routine: mk_streetname() |
| 194 | * Purpose: |
| 195 | * one of a set of routines that creates addresses |
| 196 | * Algorithm: |
| 197 | * use a staggered distibution and the 150 most common street names in the US |
| 198 | * Data Structures: |
| 199 | * |
| 200 | * Params: |
| 201 | * nTable: target table (and, by extension, address) to allow differing |
| 202 | *distributions dest: destination for the street name Returns: Called By: Calls: |
| 203 | * Assumptions: |
| 204 | * Side Effects: |
| 205 | * TODO: 20010615 JMS return code is meaningless |
| 206 | */ |
| 207 | int mk_streetname(int nTable, char *dest) { |
| 208 | char *pTemp1 = NULL, *pTemp2 = NULL; |
| 209 | |
| 210 | pick_distribution((void *)&pTemp1, "street_names" , (int)1, (int)1, nTable); |
| 211 | pick_distribution((void *)&pTemp2, "street_names" , (int)1, (int)2, nTable); |
| 212 | if (strlen(pTemp2)) |
| 213 | sprintf(dest, "%s %s" , pTemp1, pTemp2); |
| 214 | else |
| 215 | strcpy(dest, pTemp1); |
| 216 | |
| 217 | return (0); |
| 218 | } |
| 219 | |
| 220 | /* |
| 221 | * Routine: mk_city |
| 222 | * Purpose: |
| 223 | * one of a set of routines that creates addresses |
| 224 | * Algorithm: |
| 225 | * use a staggered distibution of 1000 most common place names in the US |
| 226 | * Data Structures: |
| 227 | * |
| 228 | * Params: |
| 229 | * nTable: target table (and, by extension, address) to allow differing |
| 230 | *distributions dest: destination for the city name Returns: Called By: Calls: |
| 231 | * Assumptions: |
| 232 | * Side Effects: |
| 233 | * TODO: 20030423 jms should be replaced if there is no per-table variation |
| 234 | */ |
| 235 | int mk_city(int nTable, char **dest) { |
| 236 | pick_distribution((void *)dest, "cities" , (int)1, (int)get_int("_SCALE_INDEX" ), 11); |
| 237 | |
| 238 | return (0); |
| 239 | } |
| 240 | |
| 241 | /* |
| 242 | * Routine: city_hash() |
| 243 | * Purpose: |
| 244 | * Algorithm: |
| 245 | * Data Structures: |
| 246 | * |
| 247 | * Params: |
| 248 | * Returns: |
| 249 | * Called By: |
| 250 | * Calls: |
| 251 | * Assumptions: |
| 252 | * Side Effects: |
| 253 | * TODO: None |
| 254 | */ |
| 255 | int city_hash(int nTable, char *name) { |
| 256 | char *cp; |
| 257 | int hash_value = 0, res = 0; |
| 258 | |
| 259 | for (cp = name; *cp; cp++) { |
| 260 | hash_value *= 26; |
| 261 | hash_value -= 'A'; |
| 262 | hash_value += *cp; |
| 263 | if (hash_value > 1000000) { |
| 264 | hash_value %= 10000; |
| 265 | res += hash_value; |
| 266 | hash_value = 0; |
| 267 | } |
| 268 | } |
| 269 | hash_value %= 1000; |
| 270 | res += hash_value; |
| 271 | res %= 10000; /* looking for a 4 digit result */ |
| 272 | |
| 273 | return (res); |
| 274 | } |
| 275 | |
| 276 | /* |
| 277 | * Routine: |
| 278 | * one of a set of routines that creates addresses |
| 279 | * Algorithm: |
| 280 | * use a compound distribution of the 3500 counties in the US |
| 281 | * Data Structures: |
| 282 | * |
| 283 | * Params: |
| 284 | * nTable: target table (and, by extension, address) to allow differing |
| 285 | *distributions dest: destination for the city name nRegion: the county selected |
| 286 | * city: the city name selected |
| 287 | * Returns: |
| 288 | * Called By: |
| 289 | * Calls: |
| 290 | * Assumptions: |
| 291 | * Side Effects: |
| 292 | * TODO: 20010615 JMS return code is meaningless |
| 293 | */ |
| 294 | int mk_zipcode(int nTable, char *dest, int nRegion, char *city) { |
| 295 | char *szZipPrefix = NULL; |
| 296 | int nCityCode; |
| 297 | int nPlusFour; |
| 298 | |
| 299 | dist_member((void *)&szZipPrefix, "fips_county" , nRegion, 5); |
| 300 | nCityCode = city_hash(nTable, city); |
| 301 | genrand_integer(&nPlusFour, DIST_UNIFORM, 1, 9999, 0, nTable); |
| 302 | sprintf(dest, "%s%04d-%04d" , szZipPrefix, nCityCode, nPlusFour); |
| 303 | |
| 304 | return (0); |
| 305 | } |
| 306 | |