build_support.c source code [DuckDB/third_party/dsdgen/dsdgen-c/build_support.c]

1	/*
2	* Legal Notice
3	*
4	* This document and associated source code (the "Work") is a part of a
5	* benchmark specification maintained by the TPC.
6	*
7	* The TPC reserves all right, title, and interest to the Work as provided
8	* under U.S. and international laws, including without limitation all patent
9	* and trademark rights therein.
10	*
11	* No Warranty
12	*
13	* 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
14	* CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
15	* AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
16	* WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
17	* INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
18	* DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
19	* PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
20	* WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
21	* ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
22	* QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
23	* WITH REGARD TO THE WORK.
24	* 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
25	* ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
26	* COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
27	* OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
28	* INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
29	* OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
30	* RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
31	* ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
32	*
33	* Contributors:
34	* Gradient Systems
35	*/
36	#include "config.h"
37	#include "porting.h"
38	#include <stdio.h>
39	#ifndef WIN32
40	#include <netinet/in.h>
41	#endif
42	#include <math.h>
43	#include "decimal.h"
44	#include "constants.h"
45	#include "dist.h"
46	#include "r_params.h"
47	#include "genrand.h"
48	#include "tdefs.h"
49	#include "tables.h"
50	#include "build_support.h"
51	#include "genrand.h"
52	#include "columns.h"
53	#include "StringBuffer.h"
54	#include "error_msg.h"
55	#include "scaling.h"
56
57	/*
58	* Routine: hierarchy_item
59	* Purpose:
60	* select the hierarchy entry for this level
61	* Algorithm: Assumes a top-down ordering
62	* Data Structures:
63	*
64	* Params:
65	* Returns:
66	* Called By:
67	* Calls:
68	* Assumptions:
69	* Side Effects:
70	* TODO:
71	*/
72	void hierarchy_item(int h_level, ds_key_t id, char* **name, ds_key_t kIndex) {
73	static int bInit = `0`, nLastCategory = -`1`, nLastClass = -`1`, nBrandBase;
74	int nBrandCount;
75	static char *szClassDistName = NULL;
76	char sTemp[`6`];
77
78	if (!bInit) {
79	bInit = `1`;
80	}
81
82	switch (h_level) {
83	case I_CATEGORY:
84	nLastCategory = pick_distribution(name, "categories", `1`, `1`, h_level);
85	*id = nLastCategory;
86	nBrandBase = nLastCategory;
87	nLastClass = -`1`;
88	break;
89	case I_CLASS:
90	if (nLastCategory == -`1`)
91	ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_CLASS before I_CATEGORY", `1`);
92	dist_member(&szClassDistName, "categories", nLastCategory, `2`);
93	nLastClass = pick_distribution(name, szClassDistName, `1`, `1`, h_level);
94	nLastCategory = -`1`;
95	*id = nLastClass;
96	break;
97	case I_BRAND:
98	if (nLastClass == -`1`)
99	ReportErrorNoLine(DBGEN_ERROR_HIERACHY_ORDER, "I_BRAND before I_CLASS", `1`);
100	dist_member(&nBrandCount, szClassDistName, nLastClass, `2`);
101	*id = kIndex % nBrandCount + `1`;
102	mk_word(name, "brand_syllables", nBrandBase `10` + nLastClass, `45`, I_BRAND);
103	sprintf(sTemp, " #%d", (int)*id);
104	strcat(*name, sTemp);
105	id += (nBrandBase `1000` + nLastClass) * `1000`;
106	break;
107	default:
108	printf("ERROR: Invalid call to hierarchy_item with argument '%d'\n", h_level);
109	exit(`1`);
110	}
111
112	return;
113	}
114
115	/*
116	* Routine: mk_companyname()
117	* Purpose:
118	* yet another member of a set of routines used for address creation
119	* Algorithm:
120	* create a hash, based on an index value, so that the same result can be
121	*derived reliably and then build a word from a syllable set Data Structures:
122	*
123	* Params:
124	* char * dest: target for resulting name
125	* int nTable: to allow differing distributions
126	* int nCompany: index value
127	* Returns:
128	* Called By:
129	* Calls:
130	* Assumptions:
131	* Side Effects:
132	* TODO:
133	* 20010615 JMS return code is meaningless
134	* 20030422 JMS should be replaced if there is no per-table variation
135	*/
136	int mk_companyname(char dest, int* nTable, int nCompany) {
137	mk_word(dest, "syllables", nCompany, `10`, CC_COMPANY_NAME);
138
139	return (`0`);
140	}
141
142	/*
143	* Routine: set_locale()
144	* Purpose:
145	* generate a reasonable lattitude and longitude based on a region and the USGS
146	*data on 3500 counties in the US Algorithm: Data Structures:
147	*
148	* Params:
149	* Returns:
150	* Called By:
151	* Calls:
152	* Assumptions:
153	* Side Effects:
154	* TODO: 20011230 JMS set_locale() is just a placeholder; do we need geographic
155	*coords?
156	*/
157	int set_locale(int nRegion, decimal_t longitude, decimal_t latitude) {
158	static int init = `0`;
159	static decimal_t dZero;
160
161	if (!init) {
162	strtodec(&dZero, "0.00");
163	init = `1`;
164	}
165
166	memcpy(longitude, &dZero, sizeof(decimal_t));
167	memcpy(latitude, &dZero, sizeof(decimal_t));
168
169	return (`0`);
170	}
171
172	/*
173	* Routine:
174	* Purpose:
175	* Algorithm:
176	* Data Structures:
177	*
178	* Params:
179	* Returns:
180	* Called By:
181	* Calls:
182	* Assumptions:
183	* Side Effects:
184	* TODO: None
185	*/
186	void bitmap_to_dist(void pDest, char* distname, ds_key_t modulus, int vset, int stream) {
187	int32_t m, s;
188	char msg[`80`];
189
190	if ((s = distsize(distname)) == -`1`) {
191	sprintf(msg, "Invalid distribution name '%s'", distname);
192	INTERNAL(msg);
193	}
194	m = (int32_t)((*modulus % s) + `1`);
195	*modulus /= s;
196
197	dist_member(pDest, distname, m, vset);
198
199	return;
200	}
201
202	/*
203	* Routine: void dist_to_bitmap(int pDest, char szDistName, int nValueSet, int
204	* nWeightSet, int nStream) Purpose: Reverse engineer a composite key based on
205	* distributions Algorithm: Data Structures:
206	*
207	* Params:
208	* Returns:
209	* Called By:
210	* Calls:
211	* Assumptions:
212	* Side Effects:
213	* TODO: None
214	*/
215	void dist_to_bitmap(int pDest, char* szDistName, int* nValue, int nWeight, int nStream) {
216	pDest = distsize(szDistName);
217	*pDest += pick_distribution(NULL, szDistName, nValue, nWeight, nStream);
218
219	return;
220	}
221
222	/*
223	* Routine: void random_to_bitmap(int *pDest, int nDist, int nMin, int nMax, int
224	* nMean, int nStream) Purpose: Reverse engineer a composite key based on an
225	* integer range Algorithm: Data Structures:
226	*
227	* Params:
228	* Returns:
229	* Called By:
230	* Calls:
231	* Assumptions:
232	* Side Effects:
233	* TODO: None
234	*/
235	void random_to_bitmap(int pDest, int* nDist, int nMin, int nMax, int nMean, int nStream) {
236	pDest = nMax;
237	*pDest += genrand_integer(NULL, nDist, nMin, nMax, nMean, nStream);
238
239	return;
240	}
241
242	/*
243	* Routine: mk_word()
244	* Purpose:
245	* generate a gibberish word from a given syllable set
246	* Algorithm:
247	* Data Structures:
248	*
249	* Params:
250	* Returns:
251	* Called By:
252	* Calls:
253	* Assumptions:
254	* Side Effects:
255	* TODO:
256	*/
257	void mk_word(char dest, char* syl_set, ds_key_t src, int* char_cnt, int col) {
258	ds_key_t i = src, nSyllableCount;
259	char *cp;
260
261	*dest = `'\0'`;
262	while (i > `0`) {
263	nSyllableCount = distsize(syl_set);
264	dist_member(&cp, syl_set, (int)(i % nSyllableCount) + `1`, `1`);
265	i /= nSyllableCount;
266	if ((int)(strlen(dest) + strlen(cp)) <= char_cnt)
267	strcat(dest, cp);
268	else
269	break;
270	}
271
272	return;
273	}
274
275	/*
276	* Routine: mk_surrogate()
277	* Purpose: create a character based surrogate key from a 64-bit value
278	* Algorithm: since the RNG routines produce a 32bit value, and surrogate keys
279	*can reach beyond that, use the RNG output to generate the lower end of a
280	*random string, and build the upper end from a ds_key_t Data Structures:
281	*
282	* Params:
283	* Returns:
284	* Called By:
285	* Calls: ltoc()
286	* Assumptions: output is a 16 character string. Space is not checked
287	* Side Effects:
288	* TODO:
289	* 20020830 jms may need to define a 64-bit form of htonl() for portable shift
290	*operations
291	*/
292	static char szXlate[`16`] = "ABCDEFGHIJKLMNOP";
293	static void ltoc(char szDest, unsigned* long nVal) {
294	int i;
295	char c;
296
297	for (i = `0`; i < `8`; i++) {
298	c = szXlate[(nVal & `0xF`)];
299	*szDest++ = c;
300	nVal >>= `4`;
301	}
302	*szDest = `'\0'`;
303	}
304
305	void mk_bkey(char szDest, ds_key_t kPrimary, int* nStream) {
306	unsigned long nTemp;
307
308	nTemp = (unsigned long)(kPrimary >> `32`);
309	ltoc(szDest, nTemp);
310
311	nTemp = (unsigned long)(kPrimary & `0xFFFFFFFF`);
312	ltoc(szDest + `8`, nTemp);
313
314	return;
315	}
316
317	/*
318	* Routine: embed_string(char szDest, char szDist, int nValue, int nWeight,
319	* int nStream) Purpose: Algorithm: Data Structures:
320	*
321	* Params:
322	* Returns:
323	* Called By:
324	* Calls:
325	* Assumptions:
326	* Side Effects:
327	* TODO: None
328	*/
329	int embed_string(char szDest, char* szDist, int* nValue, int nWeight, int nStream) {
330	int nPosition;
331	char *szWord = NULL;
332
333	pick_distribution(&szWord, szDist, nValue, nWeight, nStream);
334	nPosition = genrand_integer(NULL, DIST_UNIFORM, `0`, strlen(szDest) - strlen(szWord) - `1`, `0`, nStream);
335	memcpy(&szDest[nPosition], szWord, sizeof(char) * strlen(szWord));
336
337	return (`0`);
338	}
339
340	/*
341	* Routine: set_scale()
342	* Purpose: link SCALE and SCALE_INDEX
343	* Algorithm:
344	* Data Structures:
345	*
346	* Params:
347	* Returns:
348	* Called By:
349	* Calls:
350	* Assumptions:
351	* Side Effects:
352	* TODO: None
353	*/
354	int SetScaleIndex(char szName, char* *szValue) {
355	int nScale;
356	char szScale[`2`];
357
358	if ((nScale = atoi(szValue)) == `0`)
359	nScale = `1`;
360
361	nScale = `1` + (int)log10(nScale);
362	szScale[`0`] = `'0'` + nScale;
363	szScale[`1`] = `'\0'`;
364
365	set_int("_SCALE_INDEX", szScale);
366
367	return (atoi(szValue));
368	}
369
370	/*
371	* Routine: adjust the valid date window for source schema tables, based on
372	* based on the update count, update window size, etc.
373	* Purpose:
374	* Algorithm:
375	* Data Structures:
376	*
377	* Params:
378	* Returns:
379	* Called By:
380	* Calls:
381	* Assumptions:
382	* Side Effects:
383	* TODO: None
384	*/
385	void setUpdateDateRange(int nTable, date_t pMinDate, date_t pMaxDate) {
386	static int nUpdateNumber, bInit = `0`;
387
388	if (!bInit) {
389	nUpdateNumber = get_int("UPDATE");
390	bInit = `1`;
391	}
392
393	switch (nTable) / no per-table changes at the moment; but could be /
394	{
395	default:
396	strtodt(pMinDate, WAREHOUSE_LOAD_DATE);
397	pMinDate->julian += UPDATE_INTERVAL * (nUpdateNumber - `1`);
398	jtodt(pMinDate, pMinDate->julian);
399	jtodt(pMaxDate, pMinDate->julian + UPDATE_INTERVAL);
400	break;
401	}
402
403	return;
404	}
405

Browse the source code of DuckDB/third_party/dsdgen/dsdgen-c/build_support.c