1 | #include "dbgen.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/types/date.hpp" |
5 | #include "dbgen_gunk.hpp" |
6 | #include "duckdb/parser/column_definition.hpp" |
7 | #include "tpch_constants.hpp" |
8 | #include "duckdb/main/appender.hpp" |
9 | |
10 | #define DECLARER /* EXTERN references get defined here */ |
11 | |
12 | #include "dss.h" |
13 | #include "dsstypes.h" |
14 | |
15 | using namespace duckdb; |
16 | using namespace std; |
17 | |
18 | extern seed_t Seed[]; |
19 | seed_t seed_backup[MAX_STREAM + 1]; |
20 | static bool first_invocation = true; |
21 | |
22 | tdef tdefs[] = { |
23 | {"part.tbl" , "part table" , 200000, NULL, NULL, PSUPP, 0}, |
24 | {"partsupp.tbl" , "partsupplier table" , 200000, NULL, NULL, NONE, 0}, |
25 | {"supplier.tbl" , "suppliers table" , 10000, NULL, NULL, NONE, 0}, |
26 | {"customer.tbl" , "customers table" , 150000, NULL, NULL, NONE, 0}, |
27 | {"orders.tbl" , "order table" , 150000, NULL, NULL, LINE, 0}, |
28 | {"lineitem.tbl" , "lineitem table" , 150000, NULL, NULL, NONE, 0}, |
29 | {"orders.tbl" , "orders/lineitem tables" , 150000, NULL, NULL, LINE, 0}, |
30 | {"part.tbl" , "part/partsupplier tables" , 200000, NULL, NULL, PSUPP, 0}, |
31 | {"nation.tbl" , "nation table" , NATIONS_MAX, NULL, NULL, NONE, 0}, |
32 | {"region.tbl" , "region table" , NATIONS_MAX, NULL, NULL, NONE, 0}, |
33 | }; |
34 | |
35 | namespace tpch { |
36 | |
37 | struct tpch_append_information { |
38 | unique_ptr<Appender> appender; |
39 | }; |
40 | |
41 | void append_value(tpch_append_information &info, int32_t value) { |
42 | info.appender->Append<int32_t>(value); |
43 | } |
44 | |
45 | void append_string(tpch_append_information &info, const char *value) { |
46 | info.appender->Append<Value>(Value(value)); |
47 | } |
48 | |
49 | void append_decimal(tpch_append_information &info, int64_t value) { |
50 | info.appender->Append<double>(value / 100.0); |
51 | } |
52 | |
53 | void append_date(tpch_append_information &info, string value) { |
54 | info.appender->Append<int32_t>(Date::FromString(value)); |
55 | } |
56 | |
57 | void append_char(tpch_append_information &info, char value) { |
58 | char val[2]; |
59 | val[0] = value; |
60 | val[1] = '\0'; |
61 | append_string(info, val); |
62 | } |
63 | |
64 | static void append_order(order_t *o, tpch_append_information *info) { |
65 | auto &append_info = info[ORDER]; |
66 | |
67 | // fill the current row with the order information |
68 | append_info.appender->BeginRow(); |
69 | // o_orderkey |
70 | append_value(append_info, o->okey); |
71 | // o_custkey |
72 | append_value(append_info, o->custkey); |
73 | // o_orderstatus |
74 | append_char(append_info, o->orderstatus); |
75 | // o_totalprice |
76 | append_decimal(append_info, o->totalprice); |
77 | // o_orderdate |
78 | append_date(append_info, o->odate); |
79 | // o_orderpriority |
80 | append_string(append_info, o->opriority); |
81 | // o_clerk |
82 | append_string(append_info, o->clerk); |
83 | // o_shippriority |
84 | append_value(append_info, o->spriority); |
85 | // o_comment |
86 | append_string(append_info, o->comment); |
87 | append_info.appender->EndRow(); |
88 | } |
89 | |
90 | static void append_line(order_t *o, tpch_append_information *info) { |
91 | auto &append_info = info[LINE]; |
92 | |
93 | // fill the current row with the order information |
94 | for (DSS_HUGE i = 0; i < o->lines; i++) { |
95 | append_info.appender->BeginRow(); |
96 | // l_orderkey |
97 | append_value(append_info, o->l[i].okey); |
98 | // l_partkey |
99 | append_value(append_info, o->l[i].partkey); |
100 | // l_suppkey |
101 | append_value(append_info, o->l[i].suppkey); |
102 | // l_linenumber |
103 | append_value(append_info, o->l[i].lcnt); |
104 | // l_quantity |
105 | append_value(append_info, o->l[i].quantity); |
106 | // l_extendedprice |
107 | append_decimal(append_info, o->l[i].eprice); |
108 | // l_discount |
109 | append_decimal(append_info, o->l[i].discount); |
110 | // l_tax |
111 | append_decimal(append_info, o->l[i].tax); |
112 | // l_returnflag |
113 | append_char(append_info, o->l[i].rflag[0]); |
114 | // l_linestatus |
115 | append_char(append_info, o->l[i].lstatus[0]); |
116 | // l_shipdate |
117 | append_date(append_info, o->l[i].sdate); |
118 | // l_commitdate |
119 | append_date(append_info, o->l[i].cdate); |
120 | // l_receiptdate |
121 | append_date(append_info, o->l[i].rdate); |
122 | // l_shipinstruct |
123 | append_string(append_info, o->l[i].shipinstruct); |
124 | // l_shipmode |
125 | append_string(append_info, o->l[i].shipmode); |
126 | // l_comment |
127 | append_string(append_info, o->l[i].comment); |
128 | append_info.appender->EndRow(); |
129 | } |
130 | } |
131 | |
132 | static void append_order_line(order_t *o, tpch_append_information *info) { |
133 | append_order(o, info); |
134 | append_line(o, info); |
135 | } |
136 | |
137 | static void append_supp(supplier_t *supp, tpch_append_information *info) { |
138 | auto &append_info = info[SUPP]; |
139 | |
140 | append_info.appender->BeginRow(); |
141 | // s_suppkey |
142 | append_value(append_info, supp->suppkey); |
143 | // s_name |
144 | append_string(append_info, supp->name); |
145 | // s_address |
146 | append_string(append_info, supp->address); |
147 | // s_nationkey |
148 | append_value(append_info, supp->nation_code); |
149 | // s_phone |
150 | append_string(append_info, supp->phone); |
151 | // s_acctbal |
152 | append_decimal(append_info, supp->acctbal); |
153 | // s_comment |
154 | append_string(append_info, supp->comment); |
155 | append_info.appender->EndRow(); |
156 | } |
157 | |
158 | static void append_cust(customer_t *c, tpch_append_information *info) { |
159 | auto &append_info = info[CUST]; |
160 | |
161 | append_info.appender->BeginRow(); |
162 | // c_custkey |
163 | append_value(append_info, c->custkey); |
164 | // c_name |
165 | append_string(append_info, c->name); |
166 | // c_address |
167 | append_string(append_info, c->address); |
168 | // c_nationkey |
169 | append_value(append_info, c->nation_code); |
170 | // c_phone |
171 | append_string(append_info, c->phone); |
172 | // c_acctbal |
173 | append_decimal(append_info, c->acctbal); |
174 | // c_mktsegment |
175 | append_string(append_info, c->mktsegment); |
176 | // c_comment |
177 | append_string(append_info, c->comment); |
178 | append_info.appender->EndRow(); |
179 | } |
180 | |
181 | static void append_part(part_t *part, tpch_append_information *info) { |
182 | auto &append_info = info[PART]; |
183 | |
184 | append_info.appender->BeginRow(); |
185 | // p_partkey |
186 | append_value(append_info, part->partkey); |
187 | // p_name |
188 | append_string(append_info, part->name); |
189 | // p_mfgr |
190 | append_string(append_info, part->mfgr); |
191 | // p_brand |
192 | append_string(append_info, part->brand); |
193 | // p_type |
194 | append_string(append_info, part->type); |
195 | // p_size |
196 | append_value(append_info, part->size); |
197 | // p_container |
198 | append_string(append_info, part->container); |
199 | // p_retailprice |
200 | append_decimal(append_info, part->retailprice); |
201 | // p_comment |
202 | append_string(append_info, part->comment); |
203 | append_info.appender->EndRow(); |
204 | } |
205 | |
206 | static void append_psupp(part_t *part, tpch_append_information *info) { |
207 | auto &append_info = info[PSUPP]; |
208 | for (size_t i = 0; i < SUPP_PER_PART; i++) { |
209 | append_info.appender->BeginRow(); |
210 | // ps_partkey |
211 | append_value(append_info, part->s[i].partkey); |
212 | // ps_suppkey |
213 | append_value(append_info, part->s[i].suppkey); |
214 | // ps_availqty |
215 | append_value(append_info, part->s[i].qty); |
216 | // ps_supplycost |
217 | append_decimal(append_info, part->s[i].scost); |
218 | // ps_comment |
219 | append_string(append_info, part->s[i].comment); |
220 | append_info.appender->EndRow(); |
221 | } |
222 | } |
223 | |
224 | static void append_part_psupp(part_t *part, tpch_append_information *info) { |
225 | append_part(part, info); |
226 | append_psupp(part, info); |
227 | } |
228 | |
229 | static void append_nation(code_t *c, tpch_append_information *info) { |
230 | auto &append_info = info[NATION]; |
231 | |
232 | append_info.appender->BeginRow(); |
233 | // n_nationkey |
234 | append_value(append_info, c->code); |
235 | // n_name |
236 | append_string(append_info, c->text); |
237 | // n_regionkey |
238 | append_value(append_info, c->join); |
239 | // n_comment |
240 | append_string(append_info, c->comment); |
241 | append_info.appender->EndRow(); |
242 | } |
243 | |
244 | static void append_region(code_t *c, tpch_append_information *info) { |
245 | auto &append_info = info[REGION]; |
246 | |
247 | append_info.appender->BeginRow(); |
248 | // r_regionkey |
249 | append_value(append_info, c->code); |
250 | // r_name |
251 | append_string(append_info, c->text); |
252 | // r_comment |
253 | append_string(append_info, c->comment); |
254 | append_info.appender->EndRow(); |
255 | } |
256 | |
257 | static void gen_tbl(int tnum, DSS_HUGE count, tpch_append_information *info) { |
258 | order_t o; |
259 | supplier_t supp; |
260 | customer_t cust; |
261 | part_t part; |
262 | code_t code; |
263 | |
264 | for (DSS_HUGE i = 1; count; count--, i++) { |
265 | row_start(tnum); |
266 | switch (tnum) { |
267 | case LINE: |
268 | case ORDER: |
269 | case ORDER_LINE: |
270 | mk_order(i, &o, 0); |
271 | append_order_line(&o, info); |
272 | break; |
273 | case SUPP: |
274 | mk_supp(i, &supp); |
275 | append_supp(&supp, info); |
276 | break; |
277 | case CUST: |
278 | mk_cust(i, &cust); |
279 | append_cust(&cust, info); |
280 | break; |
281 | case PSUPP: |
282 | case PART: |
283 | case PART_PSUPP: |
284 | mk_part(i, &part); |
285 | append_part_psupp(&part, info); |
286 | break; |
287 | case NATION: |
288 | mk_nation(i, &code); |
289 | append_nation(&code, info); |
290 | break; |
291 | case REGION: |
292 | mk_region(i, &code); |
293 | append_region(&code, info); |
294 | break; |
295 | } |
296 | row_stop_h(tnum); |
297 | } |
298 | } |
299 | |
300 | string get_table_name(int num) { |
301 | switch (num) { |
302 | case PART: |
303 | return "part" ; |
304 | case PSUPP: |
305 | return "partsupp" ; |
306 | case SUPP: |
307 | return "supplier" ; |
308 | case CUST: |
309 | return "customer" ; |
310 | case ORDER: |
311 | return "orders" ; |
312 | case LINE: |
313 | return "lineitem" ; |
314 | case NATION: |
315 | return "nation" ; |
316 | case REGION: |
317 | return "region" ; |
318 | default: |
319 | return "" ; |
320 | } |
321 | } |
322 | |
323 | static string RegionSchema(string schema, string suffix) { |
324 | return "CREATE TABLE " + schema + ".region" + suffix + |
325 | " (" |
326 | "r_regionkey INT NOT NULL," |
327 | "r_name VARCHAR(25) NOT NULL," |
328 | "r_comment VARCHAR(152) NOT NULL);" ; |
329 | } |
330 | |
331 | static string NationSchema(string schema, string suffix) { |
332 | return "CREATE TABLE " + schema + ".nation" + suffix + |
333 | " (" |
334 | "n_nationkey INT NOT NULL," |
335 | "n_name VARCHAR(25) NOT NULL," |
336 | "n_regionkey INT NOT NULL," |
337 | "n_comment VARCHAR(152) NOT NULL);" ; |
338 | } |
339 | |
340 | static string SupplierSchema(string schema, string suffix) { |
341 | return "CREATE TABLE " + schema + ".supplier" + suffix + |
342 | " (" |
343 | "s_suppkey INT NOT NULL," |
344 | "s_name VARCHAR(25) NOT NULL," |
345 | "s_address VARCHAR(40) NOT NULL," |
346 | "s_nationkey INT NOT NULL," |
347 | "s_phone VARCHAR(15) NOT NULL," |
348 | "s_acctbal DECIMAL(15,2) NOT NULL," |
349 | "s_comment VARCHAR(101) NOT NULL);" ; |
350 | } |
351 | |
352 | static string CustomerSchema(string schema, string suffix) { |
353 | return "CREATE TABLE " + schema + ".customer" + suffix + |
354 | " (" |
355 | "c_custkey INT NOT NULL," |
356 | "c_name VARCHAR(25) NOT NULL," |
357 | "c_address VARCHAR(40) NOT NULL," |
358 | "c_nationkey INT NOT NULL," |
359 | "c_phone VARCHAR(15) NOT NULL," |
360 | "c_acctbal DECIMAL(15,2) NOT NULL," |
361 | "c_mktsegment VARCHAR(10) NOT NULL," |
362 | "c_comment VARCHAR(117) NOT NULL);" ; |
363 | } |
364 | |
365 | static string PartSchema(string schema, string suffix) { |
366 | return "CREATE TABLE " + schema + ".part" + suffix + |
367 | " (" |
368 | "p_partkey INT NOT NULL," |
369 | "p_name VARCHAR(55) NOT NULL," |
370 | "p_mfgr VARCHAR(25) NOT NULL," |
371 | "p_brand VARCHAR(10) NOT NULL," |
372 | "p_type VARCHAR(25) NOT NULL," |
373 | "p_size INT NOT NULL," |
374 | "p_container VARCHAR(10) NOT NULL," |
375 | "p_retailprice DECIMAL(15,2) NOT NULL," |
376 | "p_comment VARCHAR(23) NOT NULL);" ; |
377 | } |
378 | |
379 | static string PartSuppSchema(string schema, string suffix) { |
380 | return "CREATE TABLE " + schema + ".partsupp" + suffix + |
381 | " (" |
382 | "ps_partkey INT NOT NULL," |
383 | "ps_suppkey INT NOT NULL," |
384 | "ps_availqty INT NOT NULL," |
385 | "ps_supplycost DECIMAL(15,2) NOT NULL," |
386 | "ps_comment VARCHAR(199) NOT NULL);" ; |
387 | } |
388 | |
389 | static string (string schema, string suffix) { |
390 | return "CREATE TABLE " + schema + ".orders" + suffix + |
391 | " (" |
392 | "o_orderkey INT NOT NULL," |
393 | "o_custkey INT NOT NULL," |
394 | "o_orderstatus VARCHAR(1) NOT NULL," |
395 | "o_totalprice DECIMAL(15,2) NOT NULL," |
396 | "o_orderdate DATE NOT NULL," |
397 | "o_orderpriority VARCHAR(15) NOT NULL," |
398 | "o_clerk VARCHAR(15) NOT NULL," |
399 | "o_shippriority INT NOT NULL," |
400 | "o_comment VARCHAR(79) NOT NULL);" ; |
401 | } |
402 | |
403 | static string LineitemSchema(string schema, string suffix) { |
404 | return "CREATE TABLE " + schema + ".lineitem" + suffix + |
405 | " (" |
406 | "l_orderkey INT NOT NULL," |
407 | "l_partkey INT NOT NULL," |
408 | "l_suppkey INT NOT NULL," |
409 | "l_linenumber INT NOT NULL," |
410 | "l_quantity INTEGER NOT NULL," |
411 | "l_extendedprice DECIMAL(15,2) NOT NULL," |
412 | "l_discount DECIMAL(15,2) NOT NULL," |
413 | "l_tax DECIMAL(15,2) NOT NULL," |
414 | "l_returnflag VARCHAR(1) NOT NULL," |
415 | "l_linestatus VARCHAR(1) NOT NULL," |
416 | "l_shipdate DATE NOT NULL," |
417 | "l_commitdate DATE NOT NULL," |
418 | "l_receiptdate DATE NOT NULL," |
419 | "l_shipinstruct VARCHAR(25) NOT NULL," |
420 | "l_shipmode VARCHAR(10) NOT NULL," |
421 | "l_comment VARCHAR(44) NOT NULL)" ; |
422 | } |
423 | |
424 | void dbgen(double flt_scale, DuckDB &db, string schema, string suffix) { |
425 | unique_ptr<QueryResult> result; |
426 | Connection con(db); |
427 | con.Query("BEGIN TRANSACTION" ); |
428 | |
429 | con.Query(RegionSchema(schema, suffix)); |
430 | con.Query(NationSchema(schema, suffix)); |
431 | con.Query(SupplierSchema(schema, suffix)); |
432 | con.Query(CustomerSchema(schema, suffix)); |
433 | con.Query(PartSchema(schema, suffix)); |
434 | con.Query(PartSuppSchema(schema, suffix)); |
435 | con.Query(OrdersSchema(schema, suffix)); |
436 | con.Query(LineitemSchema(schema, suffix)); |
437 | |
438 | if (flt_scale == 0) { |
439 | // schema only |
440 | con.Query("COMMIT" ); |
441 | return; |
442 | } |
443 | |
444 | // generate the actual data |
445 | DSS_HUGE rowcnt = 0; |
446 | DSS_HUGE i; |
447 | // all tables |
448 | table = (1 << CUST) | (1 << SUPP) | (1 << NATION) | (1 << REGION) | (1 << PART_PSUPP) | (1 << ORDER_LINE); |
449 | force = 0; |
450 | insert_segments = 0; |
451 | delete_segments = 0; |
452 | insert_orders_segment = 0; |
453 | insert_lineitem_segment = 0; |
454 | delete_segment = 0; |
455 | verbose = 0; |
456 | set_seeds = 0; |
457 | scale = 1; |
458 | updates = 0; |
459 | |
460 | // check if it is the first invocation |
461 | if (first_invocation) { |
462 | // store the initial random seed |
463 | memcpy(seed_backup, Seed, sizeof(seed_t) * MAX_STREAM + 1); |
464 | first_invocation = false; |
465 | } else { |
466 | // restore random seeds from backup |
467 | memcpy(Seed, seed_backup, sizeof(seed_t) * MAX_STREAM + 1); |
468 | } |
469 | tdefs[PART].base = 200000; |
470 | tdefs[PSUPP].base = 200000; |
471 | tdefs[SUPP].base = 10000; |
472 | tdefs[CUST].base = 150000; |
473 | tdefs[ORDER].base = 150000 * ORDERS_PER_CUST; |
474 | tdefs[LINE].base = 150000 * ORDERS_PER_CUST; |
475 | tdefs[ORDER_LINE].base = 150000 * ORDERS_PER_CUST; |
476 | tdefs[PART_PSUPP].base = 200000; |
477 | tdefs[NATION].base = NATIONS_MAX; |
478 | tdefs[REGION].base = NATIONS_MAX; |
479 | |
480 | children = 1; |
481 | d_path = NULL; |
482 | |
483 | if (flt_scale < MIN_SCALE) { |
484 | int i; |
485 | int int_scale; |
486 | |
487 | scale = 1; |
488 | int_scale = (int)(1000 * flt_scale); |
489 | for (i = PART; i < REGION; i++) { |
490 | tdefs[i].base = (DSS_HUGE)(int_scale * tdefs[i].base) / 1000; |
491 | if (tdefs[i].base < 1) |
492 | tdefs[i].base = 1; |
493 | } |
494 | } else { |
495 | scale = (long)flt_scale; |
496 | } |
497 | |
498 | load_dists(); |
499 | |
500 | /* have to do this after init */ |
501 | tdefs[NATION].base = nations.count; |
502 | tdefs[REGION].base = regions.count; |
503 | |
504 | auto append_info = unique_ptr<tpch_append_information[]>(new tpch_append_information[REGION + 1]); |
505 | memset(append_info.get(), 0, sizeof(tpch_append_information) * REGION + 1); |
506 | for (size_t i = PART; i <= REGION; i++) { |
507 | auto tname = get_table_name(i); |
508 | if (!tname.empty()) { |
509 | append_info[i].appender = make_unique<Appender>(con, schema, string(tname) + string(suffix)); |
510 | } |
511 | } |
512 | |
513 | for (i = PART; i <= REGION; i++) { |
514 | if (table & (1 << i)) { |
515 | if (i < NATION) { |
516 | rowcnt = tdefs[i].base * scale; |
517 | } else { |
518 | rowcnt = tdefs[i].base; |
519 | } |
520 | // actually doing something |
521 | gen_tbl((int)i, rowcnt, append_info.get()); |
522 | } |
523 | } |
524 | // flush any incomplete chunks |
525 | for (size_t i = PART; i <= REGION; i++) { |
526 | if (append_info[i].appender) { |
527 | append_info[i].appender->Flush(); |
528 | append_info[i].appender.reset(); |
529 | } |
530 | } |
531 | |
532 | cleanup_dists(); |
533 | con.Query("COMMIT" ); |
534 | } |
535 | |
536 | string get_query(int query) { |
537 | if (query <= 0 || query > TPCH_QUERIES_COUNT) { |
538 | throw SyntaxException("Out of range TPC-H query number %d" , query); |
539 | } |
540 | return TPCH_QUERIES[query - 1]; |
541 | } |
542 | |
543 | string get_answer(double sf, int query) { |
544 | if (query <= 0 || query > TPCH_QUERIES_COUNT) { |
545 | throw SyntaxException("Out of range TPC-H query number %d" , query); |
546 | } |
547 | const char *answer; |
548 | if (sf == 0.01) { |
549 | answer = TPCH_ANSWERS_SF0_01[query - 1]; |
550 | } else if (sf == 0.1) { |
551 | answer = TPCH_ANSWERS_SF0_1[query - 1]; |
552 | } else if (sf == 1) { |
553 | answer = TPCH_ANSWERS_SF1[query - 1]; |
554 | } else { |
555 | throw NotImplementedException("Don't have TPC-H answers for SF %llf!" , sf); |
556 | } |
557 | return answer; |
558 | } |
559 | |
560 | } // namespace tpch |
561 | |