1#include "dbgen.hpp"
2
3#include "duckdb/common/exception.hpp"
4#include "duckdb/common/types/date.hpp"
5#include "dbgen_gunk.hpp"
6#include "duckdb/parser/column_definition.hpp"
7#include "tpch_constants.hpp"
8#include "duckdb/main/appender.hpp"
9
10#define DECLARER /* EXTERN references get defined here */
11
12#include "dss.h"
13#include "dsstypes.h"
14
15using namespace duckdb;
16using namespace std;
17
18extern seed_t Seed[];
19seed_t seed_backup[MAX_STREAM + 1];
20static bool first_invocation = true;
21
22tdef tdefs[] = {
23 {"part.tbl", "part table", 200000, NULL, NULL, PSUPP, 0},
24 {"partsupp.tbl", "partsupplier table", 200000, NULL, NULL, NONE, 0},
25 {"supplier.tbl", "suppliers table", 10000, NULL, NULL, NONE, 0},
26 {"customer.tbl", "customers table", 150000, NULL, NULL, NONE, 0},
27 {"orders.tbl", "order table", 150000, NULL, NULL, LINE, 0},
28 {"lineitem.tbl", "lineitem table", 150000, NULL, NULL, NONE, 0},
29 {"orders.tbl", "orders/lineitem tables", 150000, NULL, NULL, LINE, 0},
30 {"part.tbl", "part/partsupplier tables", 200000, NULL, NULL, PSUPP, 0},
31 {"nation.tbl", "nation table", NATIONS_MAX, NULL, NULL, NONE, 0},
32 {"region.tbl", "region table", NATIONS_MAX, NULL, NULL, NONE, 0},
33};
34
35namespace tpch {
36
37struct tpch_append_information {
38 unique_ptr<Appender> appender;
39};
40
41void append_value(tpch_append_information &info, int32_t value) {
42 info.appender->Append<int32_t>(value);
43}
44
45void append_string(tpch_append_information &info, const char *value) {
46 info.appender->Append<Value>(Value(value));
47}
48
49void append_decimal(tpch_append_information &info, int64_t value) {
50 info.appender->Append<double>(value / 100.0);
51}
52
53void append_date(tpch_append_information &info, string value) {
54 info.appender->Append<int32_t>(Date::FromString(value));
55}
56
57void append_char(tpch_append_information &info, char value) {
58 char val[2];
59 val[0] = value;
60 val[1] = '\0';
61 append_string(info, val);
62}
63
64static void append_order(order_t *o, tpch_append_information *info) {
65 auto &append_info = info[ORDER];
66
67 // fill the current row with the order information
68 append_info.appender->BeginRow();
69 // o_orderkey
70 append_value(append_info, o->okey);
71 // o_custkey
72 append_value(append_info, o->custkey);
73 // o_orderstatus
74 append_char(append_info, o->orderstatus);
75 // o_totalprice
76 append_decimal(append_info, o->totalprice);
77 // o_orderdate
78 append_date(append_info, o->odate);
79 // o_orderpriority
80 append_string(append_info, o->opriority);
81 // o_clerk
82 append_string(append_info, o->clerk);
83 // o_shippriority
84 append_value(append_info, o->spriority);
85 // o_comment
86 append_string(append_info, o->comment);
87 append_info.appender->EndRow();
88}
89
90static void append_line(order_t *o, tpch_append_information *info) {
91 auto &append_info = info[LINE];
92
93 // fill the current row with the order information
94 for (DSS_HUGE i = 0; i < o->lines; i++) {
95 append_info.appender->BeginRow();
96 // l_orderkey
97 append_value(append_info, o->l[i].okey);
98 // l_partkey
99 append_value(append_info, o->l[i].partkey);
100 // l_suppkey
101 append_value(append_info, o->l[i].suppkey);
102 // l_linenumber
103 append_value(append_info, o->l[i].lcnt);
104 // l_quantity
105 append_value(append_info, o->l[i].quantity);
106 // l_extendedprice
107 append_decimal(append_info, o->l[i].eprice);
108 // l_discount
109 append_decimal(append_info, o->l[i].discount);
110 // l_tax
111 append_decimal(append_info, o->l[i].tax);
112 // l_returnflag
113 append_char(append_info, o->l[i].rflag[0]);
114 // l_linestatus
115 append_char(append_info, o->l[i].lstatus[0]);
116 // l_shipdate
117 append_date(append_info, o->l[i].sdate);
118 // l_commitdate
119 append_date(append_info, o->l[i].cdate);
120 // l_receiptdate
121 append_date(append_info, o->l[i].rdate);
122 // l_shipinstruct
123 append_string(append_info, o->l[i].shipinstruct);
124 // l_shipmode
125 append_string(append_info, o->l[i].shipmode);
126 // l_comment
127 append_string(append_info, o->l[i].comment);
128 append_info.appender->EndRow();
129 }
130}
131
132static void append_order_line(order_t *o, tpch_append_information *info) {
133 append_order(o, info);
134 append_line(o, info);
135}
136
137static void append_supp(supplier_t *supp, tpch_append_information *info) {
138 auto &append_info = info[SUPP];
139
140 append_info.appender->BeginRow();
141 // s_suppkey
142 append_value(append_info, supp->suppkey);
143 // s_name
144 append_string(append_info, supp->name);
145 // s_address
146 append_string(append_info, supp->address);
147 // s_nationkey
148 append_value(append_info, supp->nation_code);
149 // s_phone
150 append_string(append_info, supp->phone);
151 // s_acctbal
152 append_decimal(append_info, supp->acctbal);
153 // s_comment
154 append_string(append_info, supp->comment);
155 append_info.appender->EndRow();
156}
157
158static void append_cust(customer_t *c, tpch_append_information *info) {
159 auto &append_info = info[CUST];
160
161 append_info.appender->BeginRow();
162 // c_custkey
163 append_value(append_info, c->custkey);
164 // c_name
165 append_string(append_info, c->name);
166 // c_address
167 append_string(append_info, c->address);
168 // c_nationkey
169 append_value(append_info, c->nation_code);
170 // c_phone
171 append_string(append_info, c->phone);
172 // c_acctbal
173 append_decimal(append_info, c->acctbal);
174 // c_mktsegment
175 append_string(append_info, c->mktsegment);
176 // c_comment
177 append_string(append_info, c->comment);
178 append_info.appender->EndRow();
179}
180
181static void append_part(part_t *part, tpch_append_information *info) {
182 auto &append_info = info[PART];
183
184 append_info.appender->BeginRow();
185 // p_partkey
186 append_value(append_info, part->partkey);
187 // p_name
188 append_string(append_info, part->name);
189 // p_mfgr
190 append_string(append_info, part->mfgr);
191 // p_brand
192 append_string(append_info, part->brand);
193 // p_type
194 append_string(append_info, part->type);
195 // p_size
196 append_value(append_info, part->size);
197 // p_container
198 append_string(append_info, part->container);
199 // p_retailprice
200 append_decimal(append_info, part->retailprice);
201 // p_comment
202 append_string(append_info, part->comment);
203 append_info.appender->EndRow();
204}
205
206static void append_psupp(part_t *part, tpch_append_information *info) {
207 auto &append_info = info[PSUPP];
208 for (size_t i = 0; i < SUPP_PER_PART; i++) {
209 append_info.appender->BeginRow();
210 // ps_partkey
211 append_value(append_info, part->s[i].partkey);
212 // ps_suppkey
213 append_value(append_info, part->s[i].suppkey);
214 // ps_availqty
215 append_value(append_info, part->s[i].qty);
216 // ps_supplycost
217 append_decimal(append_info, part->s[i].scost);
218 // ps_comment
219 append_string(append_info, part->s[i].comment);
220 append_info.appender->EndRow();
221 }
222}
223
224static void append_part_psupp(part_t *part, tpch_append_information *info) {
225 append_part(part, info);
226 append_psupp(part, info);
227}
228
229static void append_nation(code_t *c, tpch_append_information *info) {
230 auto &append_info = info[NATION];
231
232 append_info.appender->BeginRow();
233 // n_nationkey
234 append_value(append_info, c->code);
235 // n_name
236 append_string(append_info, c->text);
237 // n_regionkey
238 append_value(append_info, c->join);
239 // n_comment
240 append_string(append_info, c->comment);
241 append_info.appender->EndRow();
242}
243
244static void append_region(code_t *c, tpch_append_information *info) {
245 auto &append_info = info[REGION];
246
247 append_info.appender->BeginRow();
248 // r_regionkey
249 append_value(append_info, c->code);
250 // r_name
251 append_string(append_info, c->text);
252 // r_comment
253 append_string(append_info, c->comment);
254 append_info.appender->EndRow();
255}
256
257static void gen_tbl(int tnum, DSS_HUGE count, tpch_append_information *info) {
258 order_t o;
259 supplier_t supp;
260 customer_t cust;
261 part_t part;
262 code_t code;
263
264 for (DSS_HUGE i = 1; count; count--, i++) {
265 row_start(tnum);
266 switch (tnum) {
267 case LINE:
268 case ORDER:
269 case ORDER_LINE:
270 mk_order(i, &o, 0);
271 append_order_line(&o, info);
272 break;
273 case SUPP:
274 mk_supp(i, &supp);
275 append_supp(&supp, info);
276 break;
277 case CUST:
278 mk_cust(i, &cust);
279 append_cust(&cust, info);
280 break;
281 case PSUPP:
282 case PART:
283 case PART_PSUPP:
284 mk_part(i, &part);
285 append_part_psupp(&part, info);
286 break;
287 case NATION:
288 mk_nation(i, &code);
289 append_nation(&code, info);
290 break;
291 case REGION:
292 mk_region(i, &code);
293 append_region(&code, info);
294 break;
295 }
296 row_stop_h(tnum);
297 }
298}
299
300string get_table_name(int num) {
301 switch (num) {
302 case PART:
303 return "part";
304 case PSUPP:
305 return "partsupp";
306 case SUPP:
307 return "supplier";
308 case CUST:
309 return "customer";
310 case ORDER:
311 return "orders";
312 case LINE:
313 return "lineitem";
314 case NATION:
315 return "nation";
316 case REGION:
317 return "region";
318 default:
319 return "";
320 }
321}
322
323static string RegionSchema(string schema, string suffix) {
324 return "CREATE TABLE " + schema + ".region" + suffix +
325 " ("
326 "r_regionkey INT NOT NULL,"
327 "r_name VARCHAR(25) NOT NULL,"
328 "r_comment VARCHAR(152) NOT NULL);";
329}
330
331static string NationSchema(string schema, string suffix) {
332 return "CREATE TABLE " + schema + ".nation" + suffix +
333 " ("
334 "n_nationkey INT NOT NULL,"
335 "n_name VARCHAR(25) NOT NULL,"
336 "n_regionkey INT NOT NULL,"
337 "n_comment VARCHAR(152) NOT NULL);";
338}
339
340static string SupplierSchema(string schema, string suffix) {
341 return "CREATE TABLE " + schema + ".supplier" + suffix +
342 " ("
343 "s_suppkey INT NOT NULL,"
344 "s_name VARCHAR(25) NOT NULL,"
345 "s_address VARCHAR(40) NOT NULL,"
346 "s_nationkey INT NOT NULL,"
347 "s_phone VARCHAR(15) NOT NULL,"
348 "s_acctbal DECIMAL(15,2) NOT NULL,"
349 "s_comment VARCHAR(101) NOT NULL);";
350}
351
352static string CustomerSchema(string schema, string suffix) {
353 return "CREATE TABLE " + schema + ".customer" + suffix +
354 " ("
355 "c_custkey INT NOT NULL,"
356 "c_name VARCHAR(25) NOT NULL,"
357 "c_address VARCHAR(40) NOT NULL,"
358 "c_nationkey INT NOT NULL,"
359 "c_phone VARCHAR(15) NOT NULL,"
360 "c_acctbal DECIMAL(15,2) NOT NULL,"
361 "c_mktsegment VARCHAR(10) NOT NULL,"
362 "c_comment VARCHAR(117) NOT NULL);";
363}
364
365static string PartSchema(string schema, string suffix) {
366 return "CREATE TABLE " + schema + ".part" + suffix +
367 " ("
368 "p_partkey INT NOT NULL,"
369 "p_name VARCHAR(55) NOT NULL,"
370 "p_mfgr VARCHAR(25) NOT NULL,"
371 "p_brand VARCHAR(10) NOT NULL,"
372 "p_type VARCHAR(25) NOT NULL,"
373 "p_size INT NOT NULL,"
374 "p_container VARCHAR(10) NOT NULL,"
375 "p_retailprice DECIMAL(15,2) NOT NULL,"
376 "p_comment VARCHAR(23) NOT NULL);";
377}
378
379static string PartSuppSchema(string schema, string suffix) {
380 return "CREATE TABLE " + schema + ".partsupp" + suffix +
381 " ("
382 "ps_partkey INT NOT NULL,"
383 "ps_suppkey INT NOT NULL,"
384 "ps_availqty INT NOT NULL,"
385 "ps_supplycost DECIMAL(15,2) NOT NULL,"
386 "ps_comment VARCHAR(199) NOT NULL);";
387}
388
389static string OrdersSchema(string schema, string suffix) {
390 return "CREATE TABLE " + schema + ".orders" + suffix +
391 " ("
392 "o_orderkey INT NOT NULL,"
393 "o_custkey INT NOT NULL,"
394 "o_orderstatus VARCHAR(1) NOT NULL,"
395 "o_totalprice DECIMAL(15,2) NOT NULL,"
396 "o_orderdate DATE NOT NULL,"
397 "o_orderpriority VARCHAR(15) NOT NULL,"
398 "o_clerk VARCHAR(15) NOT NULL,"
399 "o_shippriority INT NOT NULL,"
400 "o_comment VARCHAR(79) NOT NULL);";
401}
402
403static string LineitemSchema(string schema, string suffix) {
404 return "CREATE TABLE " + schema + ".lineitem" + suffix +
405 " ("
406 "l_orderkey INT NOT NULL,"
407 "l_partkey INT NOT NULL,"
408 "l_suppkey INT NOT NULL,"
409 "l_linenumber INT NOT NULL,"
410 "l_quantity INTEGER NOT NULL,"
411 "l_extendedprice DECIMAL(15,2) NOT NULL,"
412 "l_discount DECIMAL(15,2) NOT NULL,"
413 "l_tax DECIMAL(15,2) NOT NULL,"
414 "l_returnflag VARCHAR(1) NOT NULL,"
415 "l_linestatus VARCHAR(1) NOT NULL,"
416 "l_shipdate DATE NOT NULL,"
417 "l_commitdate DATE NOT NULL,"
418 "l_receiptdate DATE NOT NULL,"
419 "l_shipinstruct VARCHAR(25) NOT NULL,"
420 "l_shipmode VARCHAR(10) NOT NULL,"
421 "l_comment VARCHAR(44) NOT NULL)";
422}
423
424void dbgen(double flt_scale, DuckDB &db, string schema, string suffix) {
425 unique_ptr<QueryResult> result;
426 Connection con(db);
427 con.Query("BEGIN TRANSACTION");
428
429 con.Query(RegionSchema(schema, suffix));
430 con.Query(NationSchema(schema, suffix));
431 con.Query(SupplierSchema(schema, suffix));
432 con.Query(CustomerSchema(schema, suffix));
433 con.Query(PartSchema(schema, suffix));
434 con.Query(PartSuppSchema(schema, suffix));
435 con.Query(OrdersSchema(schema, suffix));
436 con.Query(LineitemSchema(schema, suffix));
437
438 if (flt_scale == 0) {
439 // schema only
440 con.Query("COMMIT");
441 return;
442 }
443
444 // generate the actual data
445 DSS_HUGE rowcnt = 0;
446 DSS_HUGE i;
447 // all tables
448 table = (1 << CUST) | (1 << SUPP) | (1 << NATION) | (1 << REGION) | (1 << PART_PSUPP) | (1 << ORDER_LINE);
449 force = 0;
450 insert_segments = 0;
451 delete_segments = 0;
452 insert_orders_segment = 0;
453 insert_lineitem_segment = 0;
454 delete_segment = 0;
455 verbose = 0;
456 set_seeds = 0;
457 scale = 1;
458 updates = 0;
459
460 // check if it is the first invocation
461 if (first_invocation) {
462 // store the initial random seed
463 memcpy(seed_backup, Seed, sizeof(seed_t) * MAX_STREAM + 1);
464 first_invocation = false;
465 } else {
466 // restore random seeds from backup
467 memcpy(Seed, seed_backup, sizeof(seed_t) * MAX_STREAM + 1);
468 }
469 tdefs[PART].base = 200000;
470 tdefs[PSUPP].base = 200000;
471 tdefs[SUPP].base = 10000;
472 tdefs[CUST].base = 150000;
473 tdefs[ORDER].base = 150000 * ORDERS_PER_CUST;
474 tdefs[LINE].base = 150000 * ORDERS_PER_CUST;
475 tdefs[ORDER_LINE].base = 150000 * ORDERS_PER_CUST;
476 tdefs[PART_PSUPP].base = 200000;
477 tdefs[NATION].base = NATIONS_MAX;
478 tdefs[REGION].base = NATIONS_MAX;
479
480 children = 1;
481 d_path = NULL;
482
483 if (flt_scale < MIN_SCALE) {
484 int i;
485 int int_scale;
486
487 scale = 1;
488 int_scale = (int)(1000 * flt_scale);
489 for (i = PART; i < REGION; i++) {
490 tdefs[i].base = (DSS_HUGE)(int_scale * tdefs[i].base) / 1000;
491 if (tdefs[i].base < 1)
492 tdefs[i].base = 1;
493 }
494 } else {
495 scale = (long)flt_scale;
496 }
497
498 load_dists();
499
500 /* have to do this after init */
501 tdefs[NATION].base = nations.count;
502 tdefs[REGION].base = regions.count;
503
504 auto append_info = unique_ptr<tpch_append_information[]>(new tpch_append_information[REGION + 1]);
505 memset(append_info.get(), 0, sizeof(tpch_append_information) * REGION + 1);
506 for (size_t i = PART; i <= REGION; i++) {
507 auto tname = get_table_name(i);
508 if (!tname.empty()) {
509 append_info[i].appender = make_unique<Appender>(con, schema, string(tname) + string(suffix));
510 }
511 }
512
513 for (i = PART; i <= REGION; i++) {
514 if (table & (1 << i)) {
515 if (i < NATION) {
516 rowcnt = tdefs[i].base * scale;
517 } else {
518 rowcnt = tdefs[i].base;
519 }
520 // actually doing something
521 gen_tbl((int)i, rowcnt, append_info.get());
522 }
523 }
524 // flush any incomplete chunks
525 for (size_t i = PART; i <= REGION; i++) {
526 if (append_info[i].appender) {
527 append_info[i].appender->Flush();
528 append_info[i].appender.reset();
529 }
530 }
531
532 cleanup_dists();
533 con.Query("COMMIT");
534}
535
536string get_query(int query) {
537 if (query <= 0 || query > TPCH_QUERIES_COUNT) {
538 throw SyntaxException("Out of range TPC-H query number %d", query);
539 }
540 return TPCH_QUERIES[query - 1];
541}
542
543string get_answer(double sf, int query) {
544 if (query <= 0 || query > TPCH_QUERIES_COUNT) {
545 throw SyntaxException("Out of range TPC-H query number %d", query);
546 }
547 const char *answer;
548 if (sf == 0.01) {
549 answer = TPCH_ANSWERS_SF0_01[query - 1];
550 } else if (sf == 0.1) {
551 answer = TPCH_ANSWERS_SF0_1[query - 1];
552 } else if (sf == 1) {
553 answer = TPCH_ANSWERS_SF1[query - 1];
554 } else {
555 throw NotImplementedException("Don't have TPC-H answers for SF %llf!", sf);
556 }
557 return answer;
558}
559
560} // namespace tpch
561