| 1 | /* |
| 2 | * Legal Notice |
| 3 | * |
| 4 | * This document and associated source code (the "Work") is a part of a |
| 5 | * benchmark specification maintained by the TPC. |
| 6 | * |
| 7 | * The TPC reserves all right, title, and interest to the Work as provided |
| 8 | * under U.S. and international laws, including without limitation all patent |
| 9 | * and trademark rights therein. |
| 10 | * |
| 11 | * No Warranty |
| 12 | * |
| 13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
| 14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
| 15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
| 16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
| 17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
| 18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
| 20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
| 21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
| 22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
| 23 | * WITH REGARD TO THE WORK. |
| 24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
| 25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
| 26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
| 27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
| 28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
| 29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
| 30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
| 31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
| 32 | * |
| 33 | * Contributors |
| 34 | * - Sergey Vasilevskiy |
| 35 | * - Doug Johnson |
| 36 | */ |
| 37 | |
| 38 | #include "main/EGenTables_stdafx.h" |
| 39 | |
| 40 | using namespace TPCE; |
| 41 | |
| 42 | const UINT iUSACtryCode = 1; // must be the same as the code in country tax rates file |
| 43 | const UINT iCanadaCtryCode = 2; // must be the same as the code in country tax rates file |
| 44 | |
| 45 | // Minimum and maximum to use when generating address street numbers. |
| 46 | const int iStreetNumberMin = 100; |
| 47 | const int iStreetNumberMax = 25000; |
| 48 | |
| 49 | // Some customers have an AD_LINE_2, some are NULL. |
| 50 | const int iPctCustomersWithNullAD_LINE_2 = 60; |
| 51 | |
| 52 | // Of the customers that have an AD_LINE_2, some are |
| 53 | // an apartment, others are a suite. |
| 54 | const int iPctCustomersWithAptAD_LINE_2 = 75; |
| 55 | |
| 56 | // Minimum and maximum to use when generating apartment numbers. |
| 57 | const int iApartmentNumberMin = 1; |
| 58 | const int iApartmentNumberMax = 1000; |
| 59 | |
| 60 | // Minimum and maximum to use when generating suite numbers. |
| 61 | const int iSuiteNumberMin = 1; |
| 62 | const int iSuiteNumberMax = 500; |
| 63 | |
| 64 | // Number of RNG calls to skip for one row in order |
| 65 | // to not use any of the random values from the previous row. |
| 66 | const int iRNGSkipOneRowAddress = 10; // real number in 3.5: 7 |
| 67 | |
| 68 | /* |
| 69 | * Constructor for the ADDRESS table class. |
| 70 | * |
| 71 | * PARAMETERS: |
| 72 | * IN inputFiles - input flat files loaded in memory |
| 73 | * IN iCustomerCount - number of customers to generate |
| 74 | * IN iStartFromCustomer - ordinal position of the first customer in |
| 75 | * the sequence (Note: 1-based) for whom to generate the addresses. Used if |
| 76 | * generating customer addresses only. IN bCustomerAddressesOnly - if true, |
| 77 | * generate only customer addresses if false, generate exchange, company, and |
| 78 | * customer addresses (always start from the first customer in this case) |
| 79 | */ |
| 80 | CAddressTable::CAddressTable(const DataFileManager &dfm, TIdent iCustomerCount, TIdent iStartFromCustomer, |
| 81 | bool bCustomerAddressesOnly, bool bCacheEnabled) |
| 82 | : TableTemplate<ADDRESS_ROW>(), m_companies(dfm.CompanyFile()), m_Street(dfm.StreetNameDataFile()), |
| 83 | m_StreetSuffix(dfm.StreetSuffixDataFile()), m_ZipCode(dfm.ZipCodeDataFile()), |
| 84 | m_iStartFromCustomer(iStartFromCustomer), m_iCustomerCount(iCustomerCount), |
| 85 | m_bCustomerAddressesOnly(bCustomerAddressesOnly), m_bCustomerAddress(bCustomerAddressesOnly), |
| 86 | m_bCacheEnabled(bCacheEnabled), INVALID_CACHE_ENTRY(-1) { |
| 87 | m_iExchangeCount = dfm.ExchangeDataFile().size(); // number of rows in Exchange |
| 88 | m_iCompanyCount = m_companies.GetConfiguredCompanyCount(); // number of configured companies |
| 89 | |
| 90 | // Generate customer addresses only (used for CUSTOMER_TAXRATE) |
| 91 | if (bCustomerAddressesOnly) { |
| 92 | // skip exchanges and companies |
| 93 | m_iLastRowNumber = m_iExchangeCount + m_iCompanyCount + iStartFromCustomer - 1; |
| 94 | |
| 95 | // This is not really a count, but the last address row to generate. |
| 96 | // |
| 97 | m_iTotalAddressCount = m_iLastRowNumber + m_iCustomerCount; |
| 98 | } else { // Generating not only customer, but also exchange and company |
| 99 | // addresses |
| 100 | m_iLastRowNumber = iStartFromCustomer - 1; |
| 101 | |
| 102 | // This is not really a count, but the last address row to generate. |
| 103 | // |
| 104 | m_iTotalAddressCount = m_iLastRowNumber + m_iCustomerCount + m_iExchangeCount + m_iCompanyCount; |
| 105 | } |
| 106 | |
| 107 | m_row.AD_ID = m_iLastRowNumber + iTIdentShift; // extend to 64 bits for address id |
| 108 | |
| 109 | if (m_bCacheEnabled) { |
| 110 | m_iCacheSize = (int)iDefaultLoadUnitSize; |
| 111 | m_iCacheOffset = iTIdentShift + m_iExchangeCount + m_iCompanyCount + m_iStartFromCustomer; |
| 112 | m_CacheZipCode = new int[m_iCacheSize]; |
| 113 | for (int i = 0; i < m_iCacheSize; i++) { |
| 114 | m_CacheZipCode[i] = INVALID_CACHE_ENTRY; |
| 115 | } |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | CAddressTable::~CAddressTable() { |
| 120 | if (m_bCacheEnabled) { |
| 121 | delete[] m_CacheZipCode; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | /* |
| 126 | * Reset the state for the next load unit. |
| 127 | * |
| 128 | * PARAMETERS: |
| 129 | * none. |
| 130 | * |
| 131 | * RETURNS: |
| 132 | * none. |
| 133 | */ |
| 134 | void CAddressTable::InitNextLoadUnit() { |
| 135 | m_rnd.SetSeed(m_rnd.RndNthElement(RNGSeedTableDefault, (RNGSEED)m_iLastRowNumber * iRNGSkipOneRowAddress)); |
| 136 | |
| 137 | ClearRecord(); // this is needed for EGenTest to work |
| 138 | |
| 139 | if (m_bCacheEnabled) { |
| 140 | m_iCacheOffset += iDefaultLoadUnitSize; |
| 141 | for (int i = 0; i < m_iCacheSize; i++) { |
| 142 | m_CacheZipCode[i] = INVALID_CACHE_ENTRY; |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | /* |
| 148 | * Generates the next A_ID value. |
| 149 | * It is stored in the internal record structure and also returned. |
| 150 | * The number of rows generated is incremented. This is why |
| 151 | * this function cannot be called more than once for a record. |
| 152 | * |
| 153 | * PARAMETERS: |
| 154 | * none. |
| 155 | * |
| 156 | * RETURNS: |
| 157 | * next address id. |
| 158 | */ |
| 159 | TIdent CAddressTable::GenerateNextAD_ID() { |
| 160 | // Reset RNG at Load Unit boundary, so that all data is repeatable. |
| 161 | // |
| 162 | if (m_iLastRowNumber > (m_iExchangeCount + m_iCompanyCount) && |
| 163 | ((m_iLastRowNumber - (m_iExchangeCount + m_iCompanyCount)) % iDefaultLoadUnitSize == 0)) { |
| 164 | InitNextLoadUnit(); |
| 165 | } |
| 166 | |
| 167 | ++m_iLastRowNumber; |
| 168 | // Find out whether this next row is for a customer (so as to generate |
| 169 | // AD_LINE_2). Exchange and Company addresses are before Customer ones. |
| 170 | // |
| 171 | m_bCustomerAddress = m_iLastRowNumber >= m_iExchangeCount + m_iCompanyCount; |
| 172 | |
| 173 | // update state info |
| 174 | m_bMoreRecords = m_iLastRowNumber < m_iTotalAddressCount; |
| 175 | |
| 176 | m_row.AD_ID = m_iLastRowNumber + iTIdentShift; |
| 177 | |
| 178 | return m_row.AD_ID; |
| 179 | } |
| 180 | |
| 181 | /* |
| 182 | * Returns the address id of the customer specified by the customer id. |
| 183 | * |
| 184 | * PARAMETERS: |
| 185 | * IN C_ID - customer id (1-based) |
| 186 | * |
| 187 | * RETURNS: |
| 188 | * address id. |
| 189 | */ |
| 190 | TIdent CAddressTable::GetAD_IDForCustomer(TIdent C_ID) { |
| 191 | return m_iExchangeCount + m_iCompanyCount + C_ID; |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | * Generate AD_LINE_1 and store it in the record structure. |
| 196 | * Does not increment the number of rows generated. |
| 197 | * |
| 198 | * PARAMETERS: |
| 199 | * none. |
| 200 | * |
| 201 | * RETURNS: |
| 202 | * none. |
| 203 | */ |
| 204 | void CAddressTable::GenerateAD_LINE_1() { |
| 205 | int iStreetNum = m_rnd.RndIntRange(iStreetNumberMin, iStreetNumberMax); |
| 206 | // int iStreetThreshold = m_rnd.RndIntRange(0, |
| 207 | // m_Street->GetGreatestKey()-2); |
| 208 | int iStreetThreshold = m_rnd.RndIntRange(0, m_Street.size() - 2); |
| 209 | int iStreetSuffixThreshold = m_rnd.RndIntRange(0, m_StreetSuffix.size() - 1); |
| 210 | |
| 211 | snprintf(m_row.AD_LINE1, sizeof(m_row.AD_LINE1), "%d %s %s" , iStreetNum, m_Street[iStreetThreshold].STREET_CSTR(), |
| 212 | m_StreetSuffix[iStreetSuffixThreshold].SUFFIX_CSTR()); |
| 213 | } |
| 214 | |
| 215 | /* |
| 216 | * Generate AD_LINE_2 and store it in the record structure. |
| 217 | * Does not increment the number of rows generated. |
| 218 | * |
| 219 | * PARAMETERS: |
| 220 | * none. |
| 221 | * |
| 222 | * RETURNS: |
| 223 | * none. |
| 224 | */ |
| 225 | void CAddressTable::GenerateAD_LINE_2() { |
| 226 | if (!m_bCustomerAddress || m_rnd.RndPercent(iPctCustomersWithNullAD_LINE_2)) { // Generate second address line |
| 227 | // only for customers (not |
| 228 | // companies) |
| 229 | m_row.AD_LINE2[0] = '\0'; |
| 230 | } else { |
| 231 | if (m_rnd.RndPercent(iPctCustomersWithAptAD_LINE_2)) { |
| 232 | snprintf(m_row.AD_LINE2, sizeof(m_row.AD_LINE2), "Apt. %d" , |
| 233 | m_rnd.RndIntRange(iApartmentNumberMin, iApartmentNumberMax)); |
| 234 | } else { |
| 235 | snprintf(m_row.AD_LINE2, sizeof(m_row.AD_LINE2), "Suite %d" , |
| 236 | m_rnd.RndIntRange(iSuiteNumberMin, iSuiteNumberMax)); |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /* |
| 242 | * For a given address id returns the same Threshold used to |
| 243 | * select the town, division, zip, and country. |
| 244 | * Needed to return a specific division/country for a given address id (for |
| 245 | * customer tax rates). |
| 246 | * |
| 247 | * PARAMETERS: |
| 248 | * IN ADID - address id |
| 249 | * |
| 250 | * RETURNS: |
| 251 | * none. |
| 252 | */ |
| 253 | int CAddressTable::GetTownDivisionZipCodeThreshold(TIdent ADID) { |
| 254 | RNGSEED OldSeed; |
| 255 | int iThreshold; |
| 256 | |
| 257 | OldSeed = m_rnd.GetSeed(); |
| 258 | m_rnd.SetSeed(m_rnd.RndNthElement(RNGSeedBaseTownDivZip, (RNGSEED)ADID)); |
| 259 | iThreshold = m_rnd.RndIntRange(0, m_ZipCode.size() - 1); |
| 260 | m_rnd.SetSeed(OldSeed); |
| 261 | return (iThreshold); |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Return the country code code for a given zip code. |
| 266 | * |
| 267 | * PARAMETERS: |
| 268 | * IN szZipCode - string with a US or Canada zip code |
| 269 | * |
| 270 | * RETURNS: |
| 271 | * country code. |
| 272 | */ |
| 273 | UINT CAddressTable::GetCountryCode(const char *szZipCode) { |
| 274 | if (('0' <= szZipCode[0]) && (szZipCode[0] <= '9')) { |
| 275 | // If the zip code starts with a number, then it's a USA code. |
| 276 | return (iUSACtryCode); |
| 277 | } else { |
| 278 | // If the zip code does NOT start with a number, than it's a Canadian |
| 279 | // code. |
| 280 | return (iCanadaCtryCode); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /* |
| 285 | * Return a certain division/country code (from the input file) for a given |
| 286 | * address id. Used in the loader to properly calculate tax on a trade. |
| 287 | * |
| 288 | * PARAMETERS: |
| 289 | * IN AD_ID - address id |
| 290 | * OUT iDivCode - division (state/province) code |
| 291 | * OUT iCtryCode - country (USA/CANADA) code |
| 292 | * |
| 293 | * RETURNS: |
| 294 | * none. |
| 295 | */ |
| 296 | void CAddressTable::GetDivisionAndCountryCodesForAddress(TIdent AD_ID, UINT &iDivCode, UINT &iCtryCode) { |
| 297 | // const TZipCodeInputRow* pZipCodeInputRow = NULL; |
| 298 | |
| 299 | // We will sometimes get AD_ID values that are outside the current |
| 300 | // load unit (cached range). We need to check for this case |
| 301 | // and avoid the lookup (as we will segfault or get bogus data.) |
| 302 | TIdent index = AD_ID - m_iCacheOffset; |
| 303 | bool bCheckCache = (index >= 0 && index <= m_iCacheSize); |
| 304 | |
| 305 | if (m_bCacheEnabled && bCheckCache && (INVALID_CACHE_ENTRY != m_CacheZipCode[index])) { |
| 306 | // Make use of the cache to get the data. |
| 307 | iDivCode = m_ZipCode[m_CacheZipCode[index]].DivisionTaxKey(); |
| 308 | iCtryCode = GetCountryCode(m_ZipCode[m_CacheZipCode[index]].ZC_CODE_CSTR()); |
| 309 | |
| 310 | // We're done, so bail out. |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | // The cache wasn't used so get the necessary value. |
| 315 | int iThreshold = GetTownDivisionZipCodeThreshold(AD_ID); |
| 316 | |
| 317 | // If possible, cache the result in case we need it again. |
| 318 | if (m_bCacheEnabled && bCheckCache) { |
| 319 | m_CacheZipCode[index] = iThreshold; |
| 320 | } |
| 321 | |
| 322 | // Get the data. |
| 323 | iDivCode = m_ZipCode[iThreshold].DivisionTaxKey(); |
| 324 | iCtryCode = GetCountryCode(m_ZipCode[iThreshold].ZC_CODE_CSTR()); |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * Generate zip code and country for the current address id |
| 329 | * and store them in the record structure. |
| 330 | * Does not increment the number of rows generated. |
| 331 | * |
| 332 | * PARAMETERS: |
| 333 | * none. |
| 334 | * |
| 335 | * RETURNS: |
| 336 | * none. |
| 337 | */ |
| 338 | void CAddressTable::GenerateAD_ZC_CODE_CTRY() { |
| 339 | int iThreshold; |
| 340 | |
| 341 | iThreshold = GetTownDivisionZipCodeThreshold(m_row.AD_ID); |
| 342 | const ZipCodeDataFileRecord &dfr = m_ZipCode[iThreshold]; |
| 343 | |
| 344 | strncpy(m_row.AD_ZC_CODE, dfr.ZC_CODE_CSTR(), sizeof(m_row.AD_ZC_CODE)); |
| 345 | |
| 346 | if (iUSACtryCode == GetCountryCode(dfr.ZC_CODE_CSTR())) { // US state |
| 347 | strncpy(m_row.AD_CTRY, "USA" , sizeof(m_row.AD_CTRY)); |
| 348 | } else { // Canadian province |
| 349 | strncpy(m_row.AD_CTRY, "CANADA" , sizeof(m_row.AD_CTRY)); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | /* |
| 354 | * Generate all column values for the next row |
| 355 | * and store them in the record structure. |
| 356 | * Increment the number of rows generated. |
| 357 | * |
| 358 | * PARAMETERS: |
| 359 | * none. |
| 360 | * |
| 361 | * RETURNS: |
| 362 | * TRUE, if there are more records in the ADDRESS table; FALSE |
| 363 | * othewise. |
| 364 | */ |
| 365 | bool CAddressTable::GenerateNextRecord() { |
| 366 | GenerateNextAD_ID(); |
| 367 | GenerateAD_LINE_1(); |
| 368 | GenerateAD_LINE_2(); |
| 369 | GenerateAD_ZC_CODE_CTRY(); |
| 370 | |
| 371 | // Return false if all the rows have been generated |
| 372 | return (MoreRecords()); |
| 373 | } |
| 374 | |