1 | /* |
2 | * Legal Notice |
3 | * |
4 | * This document and associated source code (the "Work") is a part of a |
5 | * benchmark specification maintained by the TPC. |
6 | * |
7 | * The TPC reserves all right, title, and interest to the Work as provided |
8 | * under U.S. and international laws, including without limitation all patent |
9 | * and trademark rights therein. |
10 | * |
11 | * No Warranty |
12 | * |
13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
23 | * WITH REGARD TO THE WORK. |
24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
32 | * |
33 | * Contributors: |
34 | * Gradient Systems |
35 | */ |
36 | #include "config.h" |
37 | #include "porting.h" |
38 | #include <stdio.h> |
39 | #include "address.h" |
40 | #include "dist.h" |
41 | #include "r_params.h" |
42 | #include "genrand.h" |
43 | #include "columns.h" |
44 | #include "tables.h" |
45 | #include "tdefs.h" |
46 | #include "permute.h" |
47 | #include "scaling.h" |
48 | |
49 | static int s_nCountyCount = 0; |
50 | static int s_nCityCount = 0; |
51 | |
52 | void resetCountCount(void) { |
53 | s_nCountyCount = 0; |
54 | s_nCityCount = 0; |
55 | |
56 | return; |
57 | } |
58 | |
59 | /* |
60 | * Routine: |
61 | * Purpose: |
62 | * Algorithm: |
63 | * Data Structures: |
64 | * |
65 | * Params: |
66 | * Returns: |
67 | * Called By: |
68 | * Calls: |
69 | * Assumptions: |
70 | * Side Effects: |
71 | * TODO: None |
72 | */ |
73 | int mk_address(ds_addr_t *pAddr, int nColumn) { |
74 | int i, nRegion; |
75 | char *szZipPrefix, szAddr[100]; |
76 | static int nMaxCities, nMaxCounties, bInit = 0; |
77 | tdef *pTdef; |
78 | |
79 | if (!bInit) { |
80 | nMaxCities = (int)get_rowcount(ACTIVE_CITIES); |
81 | nMaxCounties = (int)get_rowcount(ACTIVE_COUNTIES); |
82 | bInit = 1; |
83 | } |
84 | |
85 | /* street_number is [1..1000] */ |
86 | genrand_integer(&pAddr->street_num, DIST_UNIFORM, 1, 1000, 0, nColumn); |
87 | |
88 | /* street names are picked from a distribution */ |
89 | pick_distribution(&pAddr->street_name1, "street_names" , 1, 1, nColumn); |
90 | pick_distribution(&pAddr->street_name2, "street_names" , 1, 2, nColumn); |
91 | |
92 | /* street type is picked from a distribution */ |
93 | pick_distribution(&pAddr->street_type, "street_type" , 1, 1, nColumn); |
94 | |
95 | /* suite number is alphabetic 50% of the time */ |
96 | genrand_integer(&i, DIST_UNIFORM, 1, 100, 0, nColumn); |
97 | if (i & 0x01) { |
98 | sprintf(pAddr->suite_num, "Suite %d" , (i >> 1) * 10); |
99 | } else { |
100 | sprintf(pAddr->suite_num, "Suite %c" , ((i >> 1) % 25) + 'A'); |
101 | } |
102 | |
103 | pTdef = getTdefsByNumber(getTableFromColumn(nColumn)); |
104 | |
105 | /* city is picked from a distribution which maps to large/medium/small */ |
106 | if (pTdef->flags & FL_SMALL) { |
107 | i = (int)get_rowcount(getTableFromColumn(nColumn)); |
108 | genrand_integer(&i, DIST_UNIFORM, 1, (nMaxCities > i) ? i : nMaxCities, 0, nColumn); |
109 | dist_member(&pAddr->city, "cities" , i, 1); |
110 | } else |
111 | pick_distribution(&pAddr->city, "cities" , 1, 6, nColumn); |
112 | |
113 | /* county is picked from a distribution, based on population and keys the |
114 | * rest */ |
115 | if (pTdef->flags & FL_SMALL) { |
116 | i = (int)get_rowcount(getTableFromColumn(nColumn)); |
117 | genrand_integer(&nRegion, DIST_UNIFORM, 1, (nMaxCounties > i) ? i : nMaxCounties, 0, nColumn); |
118 | dist_member(&pAddr->county, "fips_county" , nRegion, 2); |
119 | } else |
120 | nRegion = pick_distribution(&pAddr->county, "fips_county" , 2, 1, nColumn); |
121 | |
122 | /* match state with the selected region/county */ |
123 | dist_member(&pAddr->state, "fips_county" , nRegion, 3); |
124 | |
125 | /* match the zip prefix with the selected region/county */ |
126 | pAddr->zip = city_hash(0, pAddr->city); |
127 | /* 00000 - 00600 are unused. Avoid them */ |
128 | dist_member((void *)&szZipPrefix, "fips_county" , nRegion, 5); |
129 | if (!(szZipPrefix[0] - '0') && (pAddr->zip < 9400)) |
130 | pAddr->zip += 600; |
131 | pAddr->zip += (szZipPrefix[0] - '0') * 10000; |
132 | |
133 | sprintf(szAddr, "%d %s %s %s" , pAddr->street_num, pAddr->street_name1, pAddr->street_name2, pAddr->street_type); |
134 | pAddr->plus4 = city_hash(0, szAddr); |
135 | dist_member(&pAddr->gmt_offset, "fips_county" , nRegion, 6); |
136 | strcpy(pAddr->country, "United States" ); |
137 | |
138 | return (0); |
139 | } |
140 | |
141 | /* |
142 | * Routine: mk_streetnumber |
143 | * Purpose: |
144 | * one of a set of routines that creates addresses |
145 | * Algorithm: |
146 | * Data Structures: |
147 | * |
148 | * Params: |
149 | * nTable: target table (and, by extension, address) to allow differing |
150 | *distributions dest: destination for the random number Returns: Called By: |
151 | * Calls: |
152 | * Assumptions: |
153 | * Side Effects: |
154 | * TODO: 20030422 jms should be replaced if there is no table variation |
155 | */ |
156 | int mk_streetnumber(int nTable, int *dest) { |
157 | genrand_integer(dest, DIST_UNIFORM, 1, 1000, 0, nTable); |
158 | |
159 | return (0); |
160 | } |
161 | |
162 | /* |
163 | * Routine: mk_suitenumber() |
164 | * Purpose: |
165 | * one of a set of routines that creates addresses |
166 | * Algorithm: |
167 | * Data Structures: |
168 | * |
169 | * Params: |
170 | * nTable: target table (and, by extension, address) to allow differing |
171 | *distributions dest: destination for the random number Returns: Called By: |
172 | * Calls: |
173 | * Assumptions: |
174 | * Side Effects: |
175 | * TODO: 20010615 JMS return code is meaningless |
176 | */ |
177 | int mk_suitenumber(int nTable, char *dest) { |
178 | int i; |
179 | |
180 | genrand_integer(&i, DIST_UNIFORM, 1, 100, 0, nTable); |
181 | if (i <= 50) { |
182 | genrand_integer(&i, DIST_UNIFORM, 1, 1000, 0, nTable); |
183 | sprintf(dest, "Suite %d" , i); |
184 | } else { |
185 | genrand_integer(&i, DIST_UNIFORM, 0, 25, 0, nTable); |
186 | sprintf(dest, "Suite %c" , i + 'A'); |
187 | } |
188 | |
189 | return (0); |
190 | } |
191 | |
192 | /* |
193 | * Routine: mk_streetname() |
194 | * Purpose: |
195 | * one of a set of routines that creates addresses |
196 | * Algorithm: |
197 | * use a staggered distibution and the 150 most common street names in the US |
198 | * Data Structures: |
199 | * |
200 | * Params: |
201 | * nTable: target table (and, by extension, address) to allow differing |
202 | *distributions dest: destination for the street name Returns: Called By: Calls: |
203 | * Assumptions: |
204 | * Side Effects: |
205 | * TODO: 20010615 JMS return code is meaningless |
206 | */ |
207 | int mk_streetname(int nTable, char *dest) { |
208 | char *pTemp1 = NULL, *pTemp2 = NULL; |
209 | |
210 | pick_distribution((void *)&pTemp1, "street_names" , (int)1, (int)1, nTable); |
211 | pick_distribution((void *)&pTemp2, "street_names" , (int)1, (int)2, nTable); |
212 | if (strlen(pTemp2)) |
213 | sprintf(dest, "%s %s" , pTemp1, pTemp2); |
214 | else |
215 | strcpy(dest, pTemp1); |
216 | |
217 | return (0); |
218 | } |
219 | |
220 | /* |
221 | * Routine: mk_city |
222 | * Purpose: |
223 | * one of a set of routines that creates addresses |
224 | * Algorithm: |
225 | * use a staggered distibution of 1000 most common place names in the US |
226 | * Data Structures: |
227 | * |
228 | * Params: |
229 | * nTable: target table (and, by extension, address) to allow differing |
230 | *distributions dest: destination for the city name Returns: Called By: Calls: |
231 | * Assumptions: |
232 | * Side Effects: |
233 | * TODO: 20030423 jms should be replaced if there is no per-table variation |
234 | */ |
235 | int mk_city(int nTable, char **dest) { |
236 | pick_distribution((void *)dest, "cities" , (int)1, (int)get_int("_SCALE_INDEX" ), 11); |
237 | |
238 | return (0); |
239 | } |
240 | |
241 | /* |
242 | * Routine: city_hash() |
243 | * Purpose: |
244 | * Algorithm: |
245 | * Data Structures: |
246 | * |
247 | * Params: |
248 | * Returns: |
249 | * Called By: |
250 | * Calls: |
251 | * Assumptions: |
252 | * Side Effects: |
253 | * TODO: None |
254 | */ |
255 | int city_hash(int nTable, char *name) { |
256 | char *cp; |
257 | int hash_value = 0, res = 0; |
258 | |
259 | for (cp = name; *cp; cp++) { |
260 | hash_value *= 26; |
261 | hash_value -= 'A'; |
262 | hash_value += *cp; |
263 | if (hash_value > 1000000) { |
264 | hash_value %= 10000; |
265 | res += hash_value; |
266 | hash_value = 0; |
267 | } |
268 | } |
269 | hash_value %= 1000; |
270 | res += hash_value; |
271 | res %= 10000; /* looking for a 4 digit result */ |
272 | |
273 | return (res); |
274 | } |
275 | |
276 | /* |
277 | * Routine: |
278 | * one of a set of routines that creates addresses |
279 | * Algorithm: |
280 | * use a compound distribution of the 3500 counties in the US |
281 | * Data Structures: |
282 | * |
283 | * Params: |
284 | * nTable: target table (and, by extension, address) to allow differing |
285 | *distributions dest: destination for the city name nRegion: the county selected |
286 | * city: the city name selected |
287 | * Returns: |
288 | * Called By: |
289 | * Calls: |
290 | * Assumptions: |
291 | * Side Effects: |
292 | * TODO: 20010615 JMS return code is meaningless |
293 | */ |
294 | int mk_zipcode(int nTable, char *dest, int nRegion, char *city) { |
295 | char *szZipPrefix = NULL; |
296 | int nCityCode; |
297 | int nPlusFour; |
298 | |
299 | dist_member((void *)&szZipPrefix, "fips_county" , nRegion, 5); |
300 | nCityCode = city_hash(nTable, city); |
301 | genrand_integer(&nPlusFour, DIST_UNIFORM, 1, 9999, 0, nTable); |
302 | sprintf(dest, "%s%04d-%04d" , szZipPrefix, nCityCode, nPlusFour); |
303 | |
304 | return (0); |
305 | } |
306 | |