1/*
2 * Legal Notice
3 *
4 * This document and associated source code (the "Work") is a part of a
5 * benchmark specification maintained by the TPC.
6 *
7 * The TPC reserves all right, title, and interest to the Work as provided
8 * under U.S. and international laws, including without limitation all patent
9 * and trademark rights therein.
10 *
11 * No Warranty
12 *
13 * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
14 * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
15 * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
16 * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
17 * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
18 * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
19 * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
20 * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
21 * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
22 * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
23 * WITH REGARD TO THE WORK.
24 * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
25 * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
26 * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
27 * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
28 * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
29 * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
30 * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
31 * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
32 *
33 * Contributors:
34 * Gradient Systems
35 */
36#include "config.h"
37#include "porting.h"
38#include <stdio.h>
39#include "date.h"
40#include "decimal.h"
41#include "dist.h"
42#include "constants.h"
43#include "columns.h"
44#include "genrand.h"
45#include "tdefs.h"
46#include "tables.h"
47#include "build_support.h"
48#include "tpcds.idx.h"
49#include "scaling.h"
50#include "w_web_sales.h"
51#include "error_msg.h"
52#include "tdefs.h"
53#include "scd.h"
54#include "r_params.h"
55#include "sparse.h"
56
57static ds_key_t web_join(int col, ds_key_t join_key);
58
59/*
60 * Routine: date_join(int from_tbl, int join_count)
61 * Purpose: account for the different date-adjusted patterns in the data set
62 * Data Structures:
63 *
64 * Params:
65 * Returns:
66 * Called By:
67 * Calls:
68 * Assumptions:
69 * Side Effects:
70 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
71 * requre a rework of the genrand_xx routines
72 */
73static ds_key_t date_join(int from_tbl, int from_col, ds_key_t join_count, int nYear) {
74 int nDay, nTemp, nMin = -1, nMax = -1, nResult;
75 static int bInit = 0, jToday;
76 date_t TempDate;
77
78 if (bInit == 0) {
79 strtodt(&TempDate, TODAYS_DATE);
80 jToday = dttoj(&TempDate);
81 bInit = 1;
82 }
83
84 switch (from_tbl) {
85 case STORE_SALES:
86 case CATALOG_SALES:
87 case WEB_SALES:
88 pick_distribution(&nDay, "calendar", 1, calendar_sales + is_leap(nYear), from_col);
89 break;
90
91 /*
92 * returns are keyed to the sale date, with the lag between sale and return
93 * selected within a known range, based on sales channel
94 */
95 case STORE_RETURNS:
96 nMin = SS_MIN_SHIP_DELAY;
97 nMax = SS_MAX_SHIP_DELAY;
98 case CATALOG_RETURNS:
99 if (nMin == -1) {
100 nMin = CS_MIN_SHIP_DELAY;
101 nMax = CS_MAX_SHIP_DELAY;
102 }
103 case WEB_RETURNS:
104 if (nMin == -1) {
105 nMin = WS_MIN_SHIP_DELAY;
106 nMax = WS_MAX_SHIP_DELAY;
107 }
108 genrand_integer(&nTemp, DIST_UNIFORM, nMin * 2, nMax * 2, 0, from_col);
109 return (join_count + nTemp);
110 break;
111 case WEB_SITE:
112 case WEB_PAGE:
113 return (web_join(from_col, join_count));
114 default:
115 pick_distribution(&nDay, "calendar", 1, 1 + is_leap(nYear), from_col);
116 break;
117 }
118
119 TempDate.year = nYear;
120 TempDate.month = 1;
121 TempDate.day = 1;
122
123 nResult = dttoj(&TempDate) + nDay;
124
125 return ((ds_key_t)(nResult > jToday) ? -1 : nResult);
126}
127
128/*
129 * Routine: time_join(int from_tbl, int join_count)
130 * Purpose: create joins that are time-skewed
131 * Data Structures:
132 *
133 * Params:
134 * Returns:
135 * Called By:
136 * Calls:
137 * Assumptions:
138 * Side Effects:
139 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
140 * requre a rework of the genrand_xx routines
141 */
142static ds_key_t time_join(int to_tbl, int to_col, ds_key_t join_count) {
143 int hour, secs;
144
145 switch (to_tbl) {
146 case STORE_SALES:
147 case STORE_RETURNS:
148 pick_distribution(&hour, "hours", 1, 2, to_col);
149 break;
150 case CATALOG_SALES:
151 case WEB_SALES:
152 case CATALOG_RETURNS:
153 case WEB_RETURNS:
154 pick_distribution(&hour, "hours", 1, 3, to_col);
155 break;
156 default:
157 pick_distribution(&hour, "hours", 1, 1, to_col);
158 break;
159 }
160 genrand_integer(&secs, DIST_UNIFORM, 0, 3599, 0, to_col);
161
162 return ((ds_key_t)(hour * 3600 + secs));
163}
164
165/*
166 * Routine: cp_join(int from_tbl, int join_count)
167 * Purpose: create joins to catalog_page
168 * Data Structures:
169 *
170 * Params:
171 * Returns:
172 * Called By:
173 * Calls:
174 * Assumptions:
175 * Side Effects:
176 * TODO: None
177 */
178static ds_key_t cp_join(int tbl, int col, ds_key_t jDate) {
179 ds_key_t res;
180 static int init = 0, nPagePerCatalog;
181 int nType, nCount, nOffset, nPage;
182 static date_t dTemp;
183 char *szTemp;
184
185 if (!init) {
186 nPagePerCatalog = ((int)get_rowcount(CATALOG_PAGE) / CP_CATALOGS_PER_YEAR) / (YEAR_MAXIMUM - YEAR_MINIMUM + 2);
187 strtodt(&dTemp, DATA_START_DATE);
188 init = 1;
189 }
190
191 nType = pick_distribution(&szTemp, "catalog_page_type", 1, 2, col);
192 genrand_integer(&nPage, DIST_UNIFORM, 1, nPagePerCatalog, 0, col);
193 nOffset = (int)jDate - dTemp.julian - 1;
194 nCount = (nOffset / 365) * CP_CATALOGS_PER_YEAR;
195 nOffset %= 365;
196
197 switch (nType) {
198 case 1: /* bi-annual */
199 if (nOffset > 183)
200 nCount += 1;
201 break;
202 case 2: /* quarterly */
203 nCount += (nOffset / 91);
204 break;
205 case 3: /* monthly */
206 nCount += (nOffset / 31);
207 break;
208 }
209
210 res = CP_SK(nCount, nPagePerCatalog, nPage);
211
212 return (res);
213}
214/*
215 * Routine:
216 * Purpose:
217 * Algorithm:
218 * Data Structures:
219 *
220 * Params:
221 * Returns:
222 * Called By:
223 * Calls:
224 * Assumptions:
225 * Side Effects:
226 * TODO: None
227 */
228ds_key_t getCatalogNumberFromPage(ds_key_t kPageNumber) {
229 static int bInit = 0;
230 static int nPagePerCatalog;
231
232 if (!bInit) {
233 nPagePerCatalog = ((int)get_rowcount(CATALOG_PAGE) / CP_CATALOGS_PER_YEAR) / (YEAR_MAXIMUM - YEAR_MINIMUM + 2);
234 bInit = 1;
235 }
236
237 return (kPageNumber / nPagePerCatalog);
238}
239
240/*
241 * Routine: web_join(int col, ds_key_t join_key)
242 * Purpose: create joins to web_site/web_page. These need to be handled
243 *together, since the date of transaction must fit within the lifetime of a
244 *particular page, which must fit within the lifetime of a particular site Data
245 *Structures:
246 *
247 * Params:
248 * join_key is one of two things:
249 * 1. the xxx_sk for a particular row in the dimension for which we need
250 *appropriate dates
251 * 2. a julian date for which we need to pick a valid xxx_sk value
252 * Returns:
253 * Called By:
254 * Calls:
255 * Assumptions:
256 * Side Effects:
257 * TODO: None
258 */
259static ds_key_t web_join(int col, ds_key_t join_key) {
260 ds_key_t res = -1, kSite;
261 static int init = 0, nConcurrentSites, nSiteDuration, nOffset;
262 static date_t dSiteOpen, /* open/close dates for current web site */
263 dSiteClose;
264 int nTemp;
265 tdef *pWS = getSimpleTdefsByNumber(WEB_SITE);
266 tdef *pWP = getSimpleTdefsByNumber(WEB_PAGE);
267
268 if (!init) {
269 strtodt(&dSiteClose, WEB_END_DATE);
270 nSiteDuration = dSiteClose.julian;
271 nConcurrentSites = (int)get_rowcount(CONCURRENT_WEB_SITES);
272 strtodt(&dSiteOpen, WEB_START_DATE);
273 nSiteDuration -= dSiteOpen.julian;
274 nSiteDuration *= nConcurrentSites;
275 nOffset = (dSiteClose.julian - dSiteOpen.julian) / (2 * nSiteDuration);
276 init = 1;
277 }
278
279 switch (col) {
280 /**************
281 * join_key is the xxx_sk value for a dimension
282 */
283 case WEB_OPEN_DATE:
284 strtodt(&dSiteOpen, DATE_MINIMUM);
285 res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
286 if (WEB_IS_REPLACED(join_key)) /* this site is completely replaced */
287 {
288 if (WEB_IS_REPLACEMENT(join_key)) /* this is the second site */
289 {
290 /* the open date of the second site needs to align on a revision
291 * boundary */
292 res += nOffset * nSiteDuration;
293 }
294 }
295 break;
296 case WEB_CLOSE_DATE:
297 strtodt(&dSiteOpen, DATE_MINIMUM);
298 res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
299 res += pWS->nParam * nSiteDuration;
300 if (WEB_IS_REPLACED(join_key)) /* this site is completely replaced */
301 {
302 if (!WEB_IS_REPLACEMENT(join_key)) /* this is the first site */
303 {
304 /* the close date of the first site needs to align on a revision
305 * boundary */
306 res -= pWS->nParam * nSiteDuration / 2;
307 }
308 }
309 break;
310 case WEB_REC_START_DATE_ID:
311 strtodt(&dSiteOpen, DATE_MINIMUM);
312 res = dSiteOpen.julian - (((join_key - 1) * WEB_DATE_STAGGER) % nSiteDuration / 2);
313 res += (join_key % pWS->nParam) * nSiteDuration;
314 break;
315 case WEB_REC_END_DATE_ID:
316 strtodt(&dSiteOpen, DATE_MINIMUM);
317 res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
318 res += ((join_key + 1) % pWS->nParam) * nSiteDuration * 5 - 1;
319 break;
320 case WP_REC_START_DATE_ID:
321 strtodt(&dSiteOpen, DATE_MINIMUM);
322 res = dSiteOpen.julian - (((join_key - 1) * WEB_DATE_STAGGER) % nSiteDuration / 2);
323 res += (join_key % pWP->nParam) * nSiteDuration * 5;
324 break;
325 case WP_REC_END_DATE_ID:
326 strtodt(&dSiteOpen, DATE_MINIMUM);
327 res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
328 res += ((join_key + 1) % pWP->nParam) * nSiteDuration - 1;
329 break;
330 case WP_CREATION_DATE_SK:
331 /* page creation has to happen outside of the page window, to assure a
332 * constant number of pages, so it occurs in the gap between site
333 * creation and the site's actual activity. For sites that are replaced
334 * in the time span of the data set, this will depend on whether they
335 * are the first version or the second
336 */
337 strtodt(&dSiteOpen, DATE_MINIMUM);
338 kSite = join_key / WEB_PAGES_PER_SITE + 1;
339 res = dSiteOpen.julian - (((int)kSite * WEB_DATE_STAGGER) % nSiteDuration / 2);
340 if (((int)kSite % pWP->nParam) == 0) /* this is a site that gets replaced */
341 {
342 genrand_integer(&nTemp, DIST_UNIFORM, (int)res, dSiteOpen.julian, 0, col);
343 res = nTemp;
344 }
345 break;
346 /*****************
347 * join key from here on is a date for which a valid site/page must be
348 * found the sk for a web page is a compound value: <site id><page id>
349 * and each component is a combination of the unique site or page and
350 * the active revision to it
351 */
352 case WR_WEB_PAGE_SK:
353 case WS_WEB_PAGE_SK:
354 res = genrand_integer(NULL, DIST_UNIFORM, 1, WEB_PAGES_PER_SITE, 0, col);
355 break;
356 }
357
358 return (res);
359}
360
361/*
362 * Routine: mk_join(int from_tbl, int to_tbl, int join_count)
363 * Purpose: return a primary key for to_tbl, creating a join between from_tbl
364 *and to_tbl Algorithm: all joins are currently uniformly distributed. The
365 *calling convention allows for each join in the schema to be distributed
366 *differently Data Structures:
367 *
368 * Params:
369 * Returns:
370 * Called By:
371 * Calls:
372 * Assumptions:
373 * Side Effects:
374 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
375 *requre a rework of the genrand_xx routines
376 */
377ds_key_t mk_join(int from_col, int to_tbl, ds_key_t join_count) {
378 ds_key_t res;
379 int nYear, nFromTable = 0, nTableIndex = to_tbl;
380 tdef *pTdef;
381
382 nFromTable = getTableFromColumn(from_col);
383
384 /*
385 * if the table being joined to employs sparse keys, the join gets handled
386 * in sparse.c
387 */
388 pTdef = getSimpleTdefsByNumber(to_tbl);
389 if (pTdef->flags & FL_SPARSE) {
390 if (pTdef->arSparseKeys == NULL)
391 initSparseKeys(to_tbl);
392 }
393
394 switch (to_tbl) {
395 /* some tables require special handling */
396 case CATALOG_PAGE:
397 return (cp_join(nFromTable, from_col, join_count));
398 case DATET:
399 genrand_integer(&nYear, DIST_UNIFORM, YEAR_MINIMUM, YEAR_MAXIMUM, 0, from_col);
400 return (date_join(nFromTable, from_col, join_count, nYear));
401 case TIME:
402 return (time_join(nFromTable, from_col, join_count));
403 /* the rest of the tables use standard, uniform joins */
404 default:
405 /*
406 * all TYPE2 tables (i.e., history keeping dimensions) need a special
407 * join algorithm
408 */
409 if (pTdef->flags & FL_TYPE_2)
410 return (scd_join(nTableIndex, from_col, join_count));
411
412 if (pTdef->flags & FL_SPARSE)
413 return (randomSparseKey(nTableIndex, from_col));
414
415 genrand_key(&res, DIST_UNIFORM, (ds_key_t)1, get_rowcount(nTableIndex), (ds_key_t)0, from_col);
416 break;
417 }
418
419 return ((ds_key_t)res);
420}
421