| 1 | /* |
| 2 | * Legal Notice |
| 3 | * |
| 4 | * This document and associated source code (the "Work") is a part of a |
| 5 | * benchmark specification maintained by the TPC. |
| 6 | * |
| 7 | * The TPC reserves all right, title, and interest to the Work as provided |
| 8 | * under U.S. and international laws, including without limitation all patent |
| 9 | * and trademark rights therein. |
| 10 | * |
| 11 | * No Warranty |
| 12 | * |
| 13 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
| 14 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
| 15 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
| 16 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
| 17 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
| 18 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
| 20 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
| 21 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
| 22 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
| 23 | * WITH REGARD TO THE WORK. |
| 24 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
| 25 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
| 26 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
| 27 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
| 28 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
| 29 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
| 30 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
| 31 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
| 32 | * |
| 33 | * Contributors |
| 34 | * - Sergey Vasilevskiy |
| 35 | * - Doug Johnson |
| 36 | */ |
| 37 | |
| 38 | #include "input/SecurityFile.h" |
| 39 | |
| 40 | #include <cstring> |
| 41 | |
| 42 | #include "utilities/MiscConsts.h" |
| 43 | |
| 44 | using namespace std; |
| 45 | using namespace TPCE; |
| 46 | |
| 47 | namespace TPCE { |
| 48 | // We use a small set of values for 26 raised to a power, so store them in |
| 49 | // a constant array to save doing calls to pow( 26.0, ? ) |
| 50 | static const UINT Power26[] = {1, 26, 676, 17576, 456976, 11881376, 308915776}; |
| 51 | |
| 52 | // For index i > 0, this array holds the sum of 26^0 ... 26^(i-1) |
| 53 | static const UINT64 Power26Sum[] = {0, 1, 27, 703, 18279, 475255, 12356631, 321272407, UINT64_CONST(8353082583)}; |
| 54 | |
| 55 | } // namespace TPCE |
| 56 | |
| 57 | void CSecurityFile::CreateSuffix(TIdent Multiplier, char *pBuf, size_t BufSize) const { |
| 58 | size_t CharCount(0); |
| 59 | INT64 Offset(0); |
| 60 | INT64 LCLIndex(0); // LowerCaseLetter array index |
| 61 | |
| 62 | while ((UINT64)Multiplier >= Power26Sum[CharCount + 1]) { |
| 63 | CharCount++; |
| 64 | } |
| 65 | |
| 66 | if (CharCount + 2 <= BufSize) // 1 extra for separator and 1 extra for terminating NULL |
| 67 | { |
| 68 | *pBuf = m_SUFFIX_SEPARATOR; |
| 69 | pBuf++; |
| 70 | // CharCount is the number of letters needed in the suffix |
| 71 | // The base string is a string of 'a's of length CharCount |
| 72 | // Find the offset from the base value represented by the string |
| 73 | // of 'a's to the desired number, and modify the base string |
| 74 | // accordingly. |
| 75 | Offset = Multiplier - Power26Sum[CharCount]; |
| 76 | |
| 77 | while (CharCount > 0) { |
| 78 | LCLIndex = Offset / Power26[CharCount - 1]; |
| 79 | *pBuf = LowerCaseLetters[LCLIndex]; |
| 80 | pBuf++; |
| 81 | Offset -= (LCLIndex * Power26[CharCount - 1]); |
| 82 | CharCount--; |
| 83 | } |
| 84 | *pBuf = '\0'; |
| 85 | } else { |
| 86 | // Not enough room in the buffer |
| 87 | CharCount = BufSize - 1; |
| 88 | while (CharCount > 0) { |
| 89 | *pBuf = m_SUFFIX_SEPARATOR; |
| 90 | pBuf++; |
| 91 | CharCount--; |
| 92 | } |
| 93 | *pBuf = '\0'; |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | INT64 CSecurityFile::ParseSuffix(const char *pSymbol) const { |
| 98 | int CharCount(0); |
| 99 | INT64 Multiplier(0); |
| 100 | |
| 101 | CharCount = (int)strlen(pSymbol); |
| 102 | |
| 103 | Multiplier = Power26Sum[CharCount]; |
| 104 | |
| 105 | while (CharCount > 0) { |
| 106 | Multiplier += (INT64)Power26[CharCount - 1] * m_LowerCaseLetterToIntMap[*pSymbol]; |
| 107 | CharCount--; |
| 108 | pSymbol++; |
| 109 | } |
| 110 | return (Multiplier); |
| 111 | } |
| 112 | |
| 113 | CSecurityFile::CSecurityFile(const SecurityDataFile_t &dataFile, TIdent iConfiguredCustomerCount, |
| 114 | TIdent iActiveCustomerCount, UINT baseCompanyCount) |
| 115 | : m_dataFile(&dataFile), m_iConfiguredSecurityCount(CalculateSecurityCount(iConfiguredCustomerCount)), |
| 116 | m_iActiveSecurityCount(CalculateSecurityCount(iActiveCustomerCount)), m_iBaseCompanyCount(baseCompanyCount), |
| 117 | m_SymbolToIdMapIsLoaded(false), m_SUFFIX_SEPARATOR('-') { |
| 118 | } |
| 119 | |
| 120 | // Calculate total security count for the specified number of customers. |
| 121 | // Sort of a static method. Used in parallel generation of securities related |
| 122 | // tables. |
| 123 | // |
| 124 | TIdent CSecurityFile::CalculateSecurityCount(TIdent iCustomerCount) const { |
| 125 | return iCustomerCount / iDefaultLoadUnitSize * iOneLoadUnitSecurityCount; |
| 126 | } |
| 127 | |
| 128 | // Calculate the first security id (0-based) for the specified customer id |
| 129 | // |
| 130 | TIdent CSecurityFile::CalculateStartFromSecurity(TIdent iStartFromCustomer) const { |
| 131 | return iStartFromCustomer / iDefaultLoadUnitSize * iOneLoadUnitSecurityCount; |
| 132 | } |
| 133 | |
| 134 | // Create security symbol with mod/div magic. |
| 135 | // |
| 136 | // This function is needed to scale unique security |
| 137 | // symbols with the database size. |
| 138 | // |
| 139 | void CSecurityFile::CreateSymbol(TIdent iIndex, // row number |
| 140 | char *szOutput, // output buffer |
| 141 | size_t iOutputLen) // size of the output buffer (including null) |
| 142 | const { |
| 143 | TIdent iFileIndex = iIndex % m_dataFile->size(); |
| 144 | TIdent iAdd = iIndex / m_dataFile->size(); |
| 145 | size_t iNewLen; |
| 146 | |
| 147 | // Load the base symbol |
| 148 | strncpy(szOutput, GetRecord(iFileIndex).S_SYMB_CSTR(), iOutputLen); |
| 149 | |
| 150 | szOutput[iOutputLen - 1] = '\0'; // Ensure NULL termination |
| 151 | |
| 152 | // Add a suffix if needed |
| 153 | if (iAdd > 0) { |
| 154 | iNewLen = strlen(szOutput); |
| 155 | CreateSuffix(iAdd, &szOutput[iNewLen], iOutputLen - iNewLen); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | // Return company id for the specified row of the SECURITY table. |
| 160 | // Index can exceed the size of the Security flat file. |
| 161 | // |
| 162 | TIdent CSecurityFile::GetCompanyId(TIdent iIndex) const { |
| 163 | // Index wraps around every 6850 securities (5000 companies). |
| 164 | // |
| 165 | return (*m_dataFile)[(int)(iIndex % m_dataFile->size())].S_CO_ID() + iTIdentShift + |
| 166 | iIndex / m_dataFile->size() * m_iBaseCompanyCount; |
| 167 | } |
| 168 | |
| 169 | TIdent CSecurityFile::GetCompanyIndex(TIdent Index) const { |
| 170 | // Indices and Id's are offset by 1 |
| 171 | return (GetCompanyId(Index) - 1 - iTIdentShift); |
| 172 | } |
| 173 | |
| 174 | // Return the number of securities in the database for |
| 175 | // a certain number of customers. |
| 176 | // |
| 177 | TIdent CSecurityFile::GetSize() const { |
| 178 | return m_iConfiguredSecurityCount; |
| 179 | } |
| 180 | |
| 181 | // Return the number of securities in the database for |
| 182 | // the configured number of customers. |
| 183 | // |
| 184 | TIdent CSecurityFile::GetConfiguredSecurityCount() const { |
| 185 | return m_iConfiguredSecurityCount; |
| 186 | } |
| 187 | |
| 188 | // Return the number of securities in the database for |
| 189 | // the active number of customers. |
| 190 | // |
| 191 | TIdent CSecurityFile::GetActiveSecurityCount() const { |
| 192 | return m_iActiveSecurityCount; |
| 193 | } |
| 194 | |
| 195 | // Overload GetRecord to wrap around indices that |
| 196 | // are larger than the flat file |
| 197 | // |
| 198 | const SecurityDataFileRecord &CSecurityFile::GetRecord(TIdent index) const { |
| 199 | return (*m_dataFile)[(int)(index % m_dataFile->size())]; |
| 200 | } |
| 201 | |
| 202 | // Load the symbol-to-id map |
| 203 | // Logical const-ness - the maps and the is-loaded flag may change but the |
| 204 | // "real" Security File data is unchanged. |
| 205 | bool CSecurityFile::LoadSymbolToIdMap(void) const { |
| 206 | if (!m_SymbolToIdMapIsLoaded) { |
| 207 | int ii; |
| 208 | int limit = m_dataFile->size(); |
| 209 | |
| 210 | for (ii = 0; ii < limit; ii++) { |
| 211 | string sSymbol((*m_dataFile)[ii].S_SYMB()); |
| 212 | m_SymbolToIdMap[sSymbol] = (*m_dataFile)[ii].S_ID(); |
| 213 | } |
| 214 | m_SymbolToIdMapIsLoaded = true; |
| 215 | |
| 216 | for (ii = 0; ii < MaxLowerCaseLetters; ii++) { |
| 217 | m_LowerCaseLetterToIntMap[LowerCaseLetters[ii]] = ii; |
| 218 | } |
| 219 | } |
| 220 | return (m_SymbolToIdMapIsLoaded); |
| 221 | } |
| 222 | |
| 223 | TIdent CSecurityFile::GetId(char *pSymbol) const { |
| 224 | char *pSeparator(NULL); |
| 225 | |
| 226 | if (!m_SymbolToIdMapIsLoaded) { |
| 227 | LoadSymbolToIdMap(); |
| 228 | } |
| 229 | if (NULL == (pSeparator = strchr(pSymbol, m_SUFFIX_SEPARATOR))) { |
| 230 | // we're dealing with a base symbol |
| 231 | string sSymbol(pSymbol); |
| 232 | return (m_SymbolToIdMap[sSymbol]); |
| 233 | } else { |
| 234 | // we're dealing with an extended symbol |
| 235 | char *pSuffix(NULL); |
| 236 | TIdent BaseId(0); |
| 237 | TIdent Multiplier(0); |
| 238 | |
| 239 | string sSymbol(pSymbol, static_cast<size_t>(pSeparator - pSymbol)); |
| 240 | BaseId = m_SymbolToIdMap[sSymbol]; |
| 241 | |
| 242 | pSuffix = pSeparator + 1; // The suffix starts right after the separator character |
| 243 | Multiplier = (int)ParseSuffix(pSuffix); // For now, suffix values fit in an int, cast ParseSuffix |
| 244 | // to avoid compiler warning |
| 245 | |
| 246 | return ((Multiplier * m_dataFile->size()) + BaseId); |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | TIdent CSecurityFile::GetIndex(char *pSymbol) const { |
| 251 | // Indices and Id's are offset by 1 |
| 252 | return (GetId(pSymbol) - 1); |
| 253 | } |
| 254 | |
| 255 | eExchangeID CSecurityFile::GetExchangeIndex(TIdent index) const { |
| 256 | // The mod converts a scaled security index into a base security index |
| 257 | const char *pExchange = (*m_dataFile)[(int)(index % m_dataFile->size())].S_EX_ID_CSTR(); |
| 258 | eExchangeID eExchangeIndex; |
| 259 | |
| 260 | if (!strcmp(pExchange, "NYSE" )) { |
| 261 | eExchangeIndex = eNYSE; |
| 262 | } else if (!strcmp(pExchange, "NASDAQ" )) { |
| 263 | eExchangeIndex = eNASDAQ; |
| 264 | } else if (!strcmp(pExchange, "AMEX" )) { |
| 265 | eExchangeIndex = eAMEX; |
| 266 | } else if (!strcmp(pExchange, "PCX" )) { |
| 267 | eExchangeIndex = ePCX; |
| 268 | } else { |
| 269 | assert(false); |
| 270 | } |
| 271 | |
| 272 | return eExchangeIndex; |
| 273 | } |
| 274 | |