SecurityFile.cpp source code [DuckDB/third_party/tpce-tool/input/SecurityFile.cpp]

1	/*
2	* Legal Notice
3	*
4	* This document and associated source code (the "Work") is a part of a
5	* benchmark specification maintained by the TPC.
6	*
7	* The TPC reserves all right, title, and interest to the Work as provided
8	* under U.S. and international laws, including without limitation all patent
9	* and trademark rights therein.
10	*
11	* No Warranty
12	*
13	* 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
14	* CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
15	* AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
16	* WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
17	* INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
18	* DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
19	* PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
20	* WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
21	* ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
22	* QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
23	* WITH REGARD TO THE WORK.
24	* 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
25	* ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
26	* COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
27	* OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
28	* INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
29	* OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
30	* RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
31	* ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
32	*
33	* Contributors
34	* - Sergey Vasilevskiy
35	* - Doug Johnson
36	*/
37
38	#include "input/SecurityFile.h"
39
40	#include <cstring>
41
42	#include "utilities/MiscConsts.h"
43
44	using namespace std;
45	using namespace TPCE;
46
47	namespace TPCE {
48	// We use a small set of values for 26 raised to a power, so store them in
49	// a constant array to save doing calls to pow( 26.0, ? )
50	static const UINT Power26[] = {`1`, `26`, `676`, `17576`, `456976`, `11881376`, `308915776`};
51
52	// For index i > 0, this array holds the sum of 26^0 ... 26^(i-1)
53	static const UINT64 Power26Sum[] = {`0`, `1`, `27`, `703`, `18279`, `475255`, `12356631`, `321272407`, UINT64_CONST(`8353082583`)};
54
55	} // namespace TPCE
56
57	void CSecurityFile::CreateSuffix(TIdent Multiplier, char pBuf, size_t BufSize) const* {
58	size_t CharCount(`0`);
59	INT64 Offset(`0`);
60	INT64 LCLIndex(`0`); // LowerCaseLetter array index
61
62	while ((UINT64)Multiplier >= Power26Sum[CharCount + `1`]) {
63	CharCount++;
64	}
65
66	if (CharCount + `2` <= BufSize) // 1 extra for separator and 1 extra for terminating NULL
67	{
68	*pBuf = m_SUFFIX_SEPARATOR;
69	pBuf++;
70	// CharCount is the number of letters needed in the suffix
71	// The base string is a string of 'a's of length CharCount
72	// Find the offset from the base value represented by the string
73	// of 'a's to the desired number, and modify the base string
74	// accordingly.
75	Offset = Multiplier - Power26Sum[CharCount];
76
77	while (CharCount > `0`) {
78	LCLIndex = Offset / Power26[CharCount - `1`];
79	*pBuf = LowerCaseLetters[LCLIndex];
80	pBuf++;
81	Offset -= (LCLIndex * Power26[CharCount - `1`]);
82	CharCount--;
83	}
84	*pBuf = `'\0'`;
85	} else {
86	// Not enough room in the buffer
87	CharCount = BufSize - `1`;
88	while (CharCount > `0`) {
89	*pBuf = m_SUFFIX_SEPARATOR;
90	pBuf++;
91	CharCount--;
92	}
93	*pBuf = `'\0'`;
94	}
95	}
96
97	INT64 CSecurityFile::ParseSuffix(const char pSymbol) const* {
98	int CharCount(`0`);
99	INT64 Multiplier(`0`);
100
101	CharCount = (int)strlen(pSymbol);
102
103	Multiplier = Power26Sum[CharCount];
104
105	while (CharCount > `0`) {
106	Multiplier += (INT64)Power26[CharCount - `1`] * m_LowerCaseLetterToIntMap [*pSymbol];
107	CharCount--;
108	pSymbol++;
109	}
110	return (Multiplier);
111	}
112
113	CSecurityFile::CSecurityFile(const SecurityDataFile_t &dataFile, TIdent iConfiguredCustomerCount,
114	TIdent iActiveCustomerCount, UINT baseCompanyCount)
115	: m_dataFile(&dataFile), m_iConfiguredSecurityCount(CalculateSecurityCount(iConfiguredCustomerCount)),
116	m_iActiveSecurityCount(CalculateSecurityCount(iActiveCustomerCount)), m_iBaseCompanyCount(baseCompanyCount),
117	m_SymbolToIdMapIsLoaded(false), m_SUFFIX_SEPARATOR(`'-'`) {
118	}
119
120	// Calculate total security count for the specified number of customers.
121	// Sort of a static method. Used in parallel generation of securities related
122	// tables.
123	//
124	TIdent CSecurityFile::CalculateSecurityCount(TIdent iCustomerCount) const {
125	return iCustomerCount / iDefaultLoadUnitSize * iOneLoadUnitSecurityCount;
126	}
127
128	// Calculate the first security id (0-based) for the specified customer id
129	//
130	TIdent CSecurityFile::CalculateStartFromSecurity(TIdent iStartFromCustomer) const {
131	return iStartFromCustomer / iDefaultLoadUnitSize * iOneLoadUnitSecurityCount;
132	}
133
134	// Create security symbol with mod/div magic.
135	//
136	// This function is needed to scale unique security
137	// symbols with the database size.
138	//
139	void CSecurityFile::CreateSymbol(TIdent iIndex, // row number
140	char szOutput, // output buffer*
141	size_t iOutputLen) // size of the output buffer (including null)
142	const {
143	TIdent iFileIndex = iIndex % m_dataFile->size();
144	TIdent iAdd = iIndex / m_dataFile->size();
145	size_t iNewLen;
146
147	// Load the base symbol
148	strncpy(szOutput, GetRecord(iFileIndex).S_SYMB_CSTR(), iOutputLen);
149
150	szOutput[iOutputLen - `1`] = `'\0'`; // Ensure NULL termination
151
152	// Add a suffix if needed
153	if (iAdd > `0`) {
154	iNewLen = strlen(szOutput);
155	CreateSuffix(iAdd, &szOutput[iNewLen], iOutputLen - iNewLen);
156	}
157	}
158
159	// Return company id for the specified row of the SECURITY table.
160	// Index can exceed the size of the Security flat file.
161	//
162	TIdent CSecurityFile::GetCompanyId(TIdent iIndex) const {
163	// Index wraps around every 6850 securities (5000 companies).
164	//
165	return (m_dataFile)[(int*)(iIndex % m_dataFile->size())].S_CO_ID() + iTIdentShift +
166	iIndex / m_dataFile->size() * m_iBaseCompanyCount;
167	}
168
169	TIdent CSecurityFile::GetCompanyIndex(TIdent Index) const {
170	// Indices and Id's are offset by 1
171	return (GetCompanyId(Index) - `1` - iTIdentShift);
172	}
173
174	// Return the number of securities in the database for
175	// a certain number of customers.
176	//
177	TIdent CSecurityFile::GetSize() const {
178	return m_iConfiguredSecurityCount;
179	}
180
181	// Return the number of securities in the database for
182	// the configured number of customers.
183	//
184	TIdent CSecurityFile::GetConfiguredSecurityCount() const {
185	return m_iConfiguredSecurityCount;
186	}
187
188	// Return the number of securities in the database for
189	// the active number of customers.
190	//
191	TIdent CSecurityFile::GetActiveSecurityCount() const {
192	return m_iActiveSecurityCount;
193	}
194
195	// Overload GetRecord to wrap around indices that
196	// are larger than the flat file
197	//
198	const SecurityDataFileRecord &CSecurityFile::GetRecord(TIdent index) const {
199	return (m_dataFile)[(int*)(index % m_dataFile->size())];
200	}
201
202	// Load the symbol-to-id map
203	// Logical const-ness - the maps and the is-loaded flag may change but the
204	// "real" Security File data is unchanged.
205	bool CSecurityFile::LoadSymbolToIdMap(void) const {
206	if (!m_SymbolToIdMapIsLoaded) {
207	int ii;
208	int limit = m_dataFile->size();
209
210	for (ii = `0`; ii < limit; ii++) {
211	string sSymbol((*m_dataFile)[ii].S_SYMB());
212	m_SymbolToIdMap [sSymbol] = (*m_dataFile)[ii].S_ID();
213	}
214	m_SymbolToIdMapIsLoaded = true;
215
216	for (ii = `0`; ii < MaxLowerCaseLetters; ii++) {
217	m_LowerCaseLetterToIntMap [LowerCaseLetters[ii]] = ii;
218	}
219	}
220	return (m_SymbolToIdMapIsLoaded);
221	}
222
223	TIdent CSecurityFile::GetId(char pSymbol) const* {
224	char *pSeparator(NULL);
225
226	if (!m_SymbolToIdMapIsLoaded) {
227	LoadSymbolToIdMap();
228	}
229	if (NULL == (pSeparator = strchr(pSymbol, m_SUFFIX_SEPARATOR))) {
230	// we're dealing with a base symbol
231	string sSymbol(pSymbol);
232	return (m_SymbolToIdMap [sSymbol]);
233	} else {
234	// we're dealing with an extended symbol
235	char *pSuffix(NULL);
236	TIdent BaseId(`0`);
237	TIdent Multiplier(`0`);
238
239	string sSymbol(pSymbol, static_cast<size_t>(pSeparator - pSymbol));
240	BaseId = m_SymbolToIdMap [sSymbol];
241
242	pSuffix = pSeparator + `1`; // The suffix starts right after the separator character
243	Multiplier = (int)ParseSuffix(pSuffix); // For now, suffix values fit in an int, cast ParseSuffix
244	// to avoid compiler warning
245
246	return ((Multiplier * m_dataFile->size()) + BaseId);
247	}
248	}
249
250	TIdent CSecurityFile::GetIndex(char pSymbol) const* {
251	// Indices and Id's are offset by 1
252	return (GetId(pSymbol) - `1`);
253	}
254
255	eExchangeID CSecurityFile::GetExchangeIndex(TIdent index) const {
256	// The mod converts a scaled security index into a base security index
257	const char pExchange = (m_dataFile)[(int)(index % m_dataFile->size())].S_EX_ID_CSTR();
258	eExchangeID eExchangeIndex;
259
260	if (!strcmp(pExchange, "NYSE")) {
261	eExchangeIndex = eNYSE;
262	} else if (!strcmp(pExchange, "NASDAQ")) {
263	eExchangeIndex = eNASDAQ;
264	} else if (!strcmp(pExchange, "AMEX")) {
265	eExchangeIndex = eAMEX;
266	} else if (!strcmp(pExchange, "PCX")) {
267	eExchangeIndex = ePCX;
268	} else {
269	assert(false);
270	}
271
272	return eExchangeIndex;
273	}
274

Browse the source code of DuckDB/third_party/tpce-tool/input/SecurityFile.cpp