1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "rel_planner.h" |
11 | #include "rel_rel.h" |
12 | #include "rel_exp.h" |
13 | #include "rel_prop.h" |
14 | #include "rel_optimizer.h" |
15 | |
16 | typedef struct memoitem { |
17 | const char *name; |
18 | list *rels; |
19 | list *exps; |
20 | list *joins; |
21 | int done; |
22 | int level; |
23 | lng count; |
24 | lng width; |
25 | dbl cost; |
26 | void *data; |
27 | } memoitem; |
28 | |
29 | #define p_pkey 1 |
30 | #define p_fkey 2 |
31 | #define p_ukey 3 |
32 | |
33 | typedef struct memojoin { |
34 | memoitem *l, *r; |
35 | int rules; /* handled rules */ |
36 | int prop; /* pkey, fkey, ukey */ |
37 | dbl cost; |
38 | dbl sel; |
39 | sql_exp *e; |
40 | } memojoin; |
41 | |
42 | static int |
43 | memoitem_key( memoitem *mi ) |
44 | { |
45 | return hash_key(mi->name); |
46 | } |
47 | |
48 | static memoitem* |
49 | memo_find(list *memo, const char *name) |
50 | { |
51 | int key = hash_key(name); |
52 | sql_hash_e *he; |
53 | |
54 | MT_lock_set(&memo->ht_lock); |
55 | he = memo->ht->buckets[key&(memo->ht->size-1)]; |
56 | for (; he; he = he->chain) { |
57 | memoitem *mi = he->value; |
58 | |
59 | if (mi->name && strcmp(mi->name, name) == 0) { |
60 | MT_lock_unset(&memo->ht_lock); |
61 | return mi; |
62 | } |
63 | } |
64 | MT_lock_unset(&memo->ht_lock); |
65 | return NULL; |
66 | } |
67 | |
68 | static char * |
69 | merge_names( sql_allocator *sa, const char *lname, const char *rname) |
70 | { |
71 | size_t l = strlen(lname) + strlen(rname) + 2; |
72 | char *n = SA_NEW_ARRAY(sa, char, l); |
73 | const char *p = lname; |
74 | const char *c; |
75 | |
76 | while ((c = strchr(p, ',')) != NULL) { |
77 | if (strncmp(p, rname, c - p) > 0) { |
78 | if (p > lname) |
79 | snprintf(n, l, "%.*s,%s,%s" , (int) (c - lname), |
80 | lname, rname, c + 1); |
81 | else |
82 | snprintf(n, l, "%s,%s" , rname, lname); |
83 | return n; |
84 | } |
85 | p = c + 1; |
86 | } |
87 | snprintf(n, l, "%s,%s" , lname, rname); |
88 | return n; |
89 | } |
90 | |
91 | static memoitem * |
92 | memoitem_create( list *memo, sql_allocator *sa, const char *lname, const char *rname, int level) |
93 | { |
94 | const char *name = lname; |
95 | memoitem *mi; |
96 | |
97 | if (level > 1) |
98 | name = merge_names(sa, lname, rname); |
99 | if (memo_find(memo, name)) |
100 | return NULL; |
101 | mi = SA_NEW(sa, memoitem); |
102 | mi->name = sa_strdup(sa, name); |
103 | mi->joins = (rname)?sa_list(sa):NULL; |
104 | mi->done = (rname)?0:1; |
105 | mi->level = level; |
106 | mi->count = 1; |
107 | mi->cost = 0; |
108 | mi->width = 8; |
109 | mi->data = NULL; |
110 | mi->rels = sa_list(sa); |
111 | mi->exps = sa_list(sa); |
112 | list_append(memo, mi); |
113 | return mi; |
114 | } |
115 | |
116 | static lng |
117 | rel_getcount(mvc *sql, sql_rel *rel) |
118 | { |
119 | if (!sql->session->tr) |
120 | return 0; |
121 | |
122 | switch(rel->op) { |
123 | case op_basetable: { |
124 | sql_table *t = rel->l; |
125 | |
126 | if (t && isTable(t)) |
127 | return (lng)store_funcs.count_col(sql->session->tr, t->columns.set->h->data, 1); |
128 | if (!t && rel->r) /* dict */ |
129 | return (lng)sql_trans_dist_count(sql->session->tr, rel->r); |
130 | return 0; |
131 | } |
132 | case op_select: |
133 | case op_project: |
134 | if (rel->l) |
135 | return rel_getcount(sql, rel->l); |
136 | return 1; |
137 | default: |
138 | return 0; |
139 | } |
140 | } |
141 | |
142 | static lng |
143 | rel_getwidth(mvc *sql, sql_rel *rel) |
144 | { |
145 | if (!sql->session->tr) |
146 | return 0; |
147 | |
148 | switch(rel->op) { |
149 | case op_basetable: { |
150 | sql_table *t = rel->l; |
151 | |
152 | if (t && isTable(t)) |
153 | return 4*list_length(rel->exps); |
154 | return 0; |
155 | } |
156 | case op_select: |
157 | if (rel->l) |
158 | return rel_getwidth(sql, rel->l); |
159 | return 1; |
160 | case op_project: |
161 | if (rel->l) |
162 | return 4*list_length(rel->exps); |
163 | return 1; |
164 | default: |
165 | return 0; |
166 | } |
167 | } |
168 | |
169 | static lng |
170 | exp_getdcount( mvc *sql, sql_rel *r , sql_exp *e, lng count) |
171 | { |
172 | switch(e->type) { |
173 | case e_column: { |
174 | /* find col */ |
175 | sql_rel *bt = NULL; |
176 | sql_column *c = name_find_column(r, e->l, e->r, -1, &bt); |
177 | if (c) { |
178 | lng dcount = (lng)sql_trans_dist_count(sql->session->tr, c); |
179 | if (dcount != 0 && dcount < count) |
180 | return dcount; |
181 | } |
182 | return count; |
183 | } |
184 | case e_cmp: |
185 | assert(0); |
186 | |
187 | |
188 | case e_convert: |
189 | if (e->l) |
190 | return exp_getdcount(sql, r, e->l, count); |
191 | /* fall through */ |
192 | case e_func: |
193 | case e_aggr: |
194 | case e_atom: |
195 | case e_psm: |
196 | return count; |
197 | } |
198 | return count; |
199 | } |
200 | |
201 | static int |
202 | exp_getranges( mvc *sql, sql_rel *r , sql_exp *e, char **min, char **max) |
203 | { |
204 | switch(e->type) { |
205 | case e_column: { |
206 | /* find col */ |
207 | sql_rel *bt = NULL; |
208 | sql_column *c = name_find_column(r, e->l, e->r, -1, &bt); |
209 | if (c) |
210 | return sql_trans_ranges(sql->session->tr, c, min, max); |
211 | return 0; |
212 | } |
213 | case e_cmp: |
214 | assert(0); |
215 | |
216 | case e_convert: |
217 | if (e->l) |
218 | return exp_getranges(sql, r, e->l, min, max); |
219 | /* fall through */ |
220 | case e_func: |
221 | case e_aggr: |
222 | case e_atom: |
223 | case e_psm: |
224 | return 0; |
225 | } |
226 | return 0; |
227 | } |
228 | |
229 | static atom * |
230 | exp_getatom( mvc *sql, sql_exp *e, atom *m) |
231 | { |
232 | if (is_atom(e->type)) |
233 | return exp_value(sql, e, sql->args, sql->argc); |
234 | else if (e->type == e_convert) |
235 | return exp_getatom(sql, e->l, m); |
236 | else if (e->type == e_func) { |
237 | sql_subfunc *f = e->f; |
238 | list *l = e->l; |
239 | /* handle date + x months */ |
240 | /* TODO add scalar -> value, ie exp->stmt-tree->exec-tree+exec */ |
241 | if (strcmp(f->func->base.name, "sql_add" ) == 0 && list_length(l) == 2) { |
242 | atom *l1 = exp_getatom(sql, l->h->data, m); |
243 | atom *l2 = exp_getatom(sql, l->h->next->data, m); |
244 | /* data + months */ |
245 | (void)l2; |
246 | (void)l1; |
247 | return NULL; |
248 | } |
249 | } |
250 | return m; |
251 | } |
252 | |
253 | static dbl |
254 | exp_getrange_sel( mvc *sql, sql_rel *r, sql_exp *e, char *min, char *max) |
255 | { |
256 | atom *amin, *amax, *emin, *emax; |
257 | dbl sel = 1.0; |
258 | sql_subtype *t = exp_subtype(e->l); |
259 | |
260 | (void)r; |
261 | emin = amin = atom_general(sql->sa, t, min); |
262 | emax = amax = atom_general(sql->sa, t, max); |
263 | |
264 | if (e->f || e->flag == cmp_gt || e->flag == cmp_gte) |
265 | emin = exp_getatom(sql, e->r, amin); |
266 | if (e->f || e->flag == cmp_lt || e->flag == cmp_lte) |
267 | emax = (e->f)?exp_getatom(sql, e->f, amax): |
268 | exp_getatom(sql, e->r, amax); |
269 | |
270 | if (!amin || !amax) |
271 | return 0.1; |
272 | |
273 | if (!emin || !emax) |
274 | sel = 0.125; |
275 | /* 4 case, dbl and lng, date, timestamp */ |
276 | else if (t->type->eclass == EC_DATE) { |
277 | sel = (emax->data.val.ival-emin->data.val.ival)/(dbl)(amax->data.val.ival-amin->data.val.ival); |
278 | } else if (t->type->eclass == EC_TIMESTAMP) { |
279 | sel = (emax->data.val.lval-emin->data.val.lval)/(dbl)(amax->data.val.lval-amin->data.val.lval); |
280 | } else if (t->type->eclass == EC_FLT) { |
281 | sel = (emax->data.val.dval-emin->data.val.dval)/(amax->data.val.dval-amin->data.val.dval); |
282 | } else { /* lng */ |
283 | sel = (emax->data.val.lval-emin->data.val.lval)/(dbl)(amax->data.val.lval-amin->data.val.lval); |
284 | } |
285 | return sel; |
286 | } |
287 | |
288 | static dbl |
289 | rel_exp_selectivity(mvc *sql, sql_rel *r, sql_exp *e, lng count) |
290 | { |
291 | dbl sel = 1.0; |
292 | |
293 | if (!e) |
294 | return 1.0; |
295 | switch(e->type) { |
296 | case e_cmp: { |
297 | lng dcount = exp_getdcount( sql, r, e->l, count); |
298 | |
299 | switch (get_cmp(e)) { |
300 | case cmp_equal: { |
301 | sel = 1.0/dcount; |
302 | break; |
303 | } |
304 | case cmp_notequal: |
305 | sel = (dcount-1.0)/dcount; |
306 | break; |
307 | case cmp_gt: |
308 | case cmp_gte: |
309 | case cmp_lt: |
310 | case cmp_lte: { |
311 | char *min, *max; |
312 | if (exp_getranges( sql, r, e->l, &min, &max )) { |
313 | sel = (dbl)exp_getrange_sel( sql, r, e, min, max); |
314 | } else { |
315 | sel = 0.5; |
316 | if (e->f) /* range */ |
317 | sel = 0.25; |
318 | } |
319 | } break; |
320 | case cmp_filter: |
321 | sel = 0.01; |
322 | break; |
323 | case cmp_in: |
324 | case cmp_notin: { |
325 | list *l = e->r; |
326 | sel = (dbl) list_length(l) / dcount; |
327 | break; |
328 | } |
329 | case cmp_or: |
330 | sel = 0.5; |
331 | break; |
332 | default: |
333 | return 1.0; |
334 | } |
335 | break; |
336 | } |
337 | default: |
338 | break; |
339 | } |
340 | return sel; |
341 | } |
342 | |
343 | static dbl |
344 | rel_join_exp_selectivity(mvc *sql, sql_rel *l, sql_rel *r, sql_exp *e, lng lcount, lng rcount) |
345 | { |
346 | dbl sel = 1.0; |
347 | lng ldcount, rdcount; |
348 | |
349 | if (!e) |
350 | return 1.0; |
351 | assert(lcount); |
352 | assert(rcount); |
353 | ldcount = exp_getdcount(sql, l, e->l, lcount); |
354 | rdcount = exp_getdcount(sql, r, e->r, rcount); |
355 | switch(e->type) { |
356 | case e_cmp: |
357 | switch (get_cmp(e)) { |
358 | case cmp_equal: |
359 | sel = (lcount/(dbl)ldcount)*(rcount/(dbl)rdcount); |
360 | break; |
361 | case cmp_notequal: { |
362 | dbl cnt = (lcount/(dbl)ldcount)*(rcount/(dbl)rdcount); |
363 | sel = (cnt-1)/cnt; |
364 | } break; |
365 | case cmp_gt: |
366 | case cmp_gte: |
367 | case cmp_lt: |
368 | case cmp_lte: |
369 | /* ugh */ |
370 | sel = 0.5; |
371 | if (e->f) /* range */ |
372 | sel = 0.2; |
373 | break; |
374 | case cmp_filter: |
375 | sel = 0.1; |
376 | break; |
377 | case cmp_in: |
378 | case cmp_notin: { |
379 | lng cnt = lcount*rcount; |
380 | list *l = e->r; |
381 | sel = (dbl) list_length(l) / cnt; |
382 | break; |
383 | } |
384 | case cmp_or: |
385 | sel = 0.5; |
386 | break; |
387 | default: |
388 | return 1.0; |
389 | } |
390 | break; |
391 | default: |
392 | break; |
393 | } |
394 | assert(sel >= 0.000001); |
395 | return sel; |
396 | } |
397 | |
398 | |
399 | static dbl |
400 | rel_exps_selectivity(mvc *sql, sql_rel *rel, list *exps, lng count) |
401 | { |
402 | node *n; |
403 | dbl sel = 1.0; |
404 | if (!exps->h) |
405 | return 1.0; |
406 | for(n=exps->h; n; n = n->next) { |
407 | dbl nsel = rel_exp_selectivity(sql, rel, n->data, count); |
408 | |
409 | sel *= nsel; |
410 | } |
411 | return sel; |
412 | } |
413 | |
414 | /* need real values, ie |
415 | * point select on pkey -> 1 value -> selectivity count |
416 | */ |
417 | |
418 | static dbl |
419 | rel_getsel(mvc *sql, sql_rel *rel, lng count) |
420 | { |
421 | if (!sql->session->tr) |
422 | return 1.0; |
423 | |
424 | switch(rel->op) { |
425 | case op_select: |
426 | return rel_exps_selectivity(sql, rel, rel->exps, count); |
427 | case op_project: |
428 | if (rel->l) |
429 | return rel_getsel(sql, rel->l, count); |
430 | /* fall through */ |
431 | default: |
432 | return 1.0; |
433 | } |
434 | } |
435 | |
436 | static list* |
437 | memo_create(mvc *sql, list *rels ) |
438 | { |
439 | int len = list_length(rels); |
440 | list *memo = sa_list(sql->sa); |
441 | node *n; |
442 | |
443 | MT_lock_set(&memo->ht_lock); |
444 | memo->ht = hash_new(sql->sa, len*len, (fkeyvalue)&memoitem_key); |
445 | MT_lock_unset(&memo->ht_lock); |
446 | for(n = rels->h; n; n = n->next) { |
447 | sql_rel *r = n->data; |
448 | memoitem *mi = memoitem_create(memo, sql->sa, rel_name(r), NULL, 1); |
449 | dbl sel = 1; |
450 | |
451 | mi->count = rel_getcount(sql, r); |
452 | sel = rel_getsel(sql, r, mi->count); |
453 | mi->count = MAX( (lng) (mi->count*sel), 1); |
454 | assert(mi->count); |
455 | mi->width = rel_getwidth(sql, r); |
456 | mi->cost = (dbl)(mi->count*mi->width); |
457 | mi->data = r; |
458 | append(mi->rels, r); |
459 | } |
460 | return memo; |
461 | } |
462 | |
463 | static void |
464 | memo_add_exps(list *memo, mvc *sql, list *rels, list *jes) |
465 | { |
466 | node *n; |
467 | memoitem *mi; |
468 | |
469 | for(n = jes->h; n; n = n->next) { |
470 | sql_exp *e = n->data; |
471 | if (e->type != e_cmp || !is_complex_exp(e->flag)){ |
472 | sql_rel *l = find_one_rel(rels, e->l); |
473 | sql_rel *r = find_one_rel(rels, e->r); |
474 | memojoin *mj = SA_ZNEW(sql->sa, memojoin); |
475 | |
476 | mj->l = memo_find( memo, rel_name(l)); |
477 | mj->r = memo_find( memo, rel_name(r)); |
478 | mj->rules = 0; |
479 | mj->cost = 0; |
480 | mj->e = e; |
481 | mj->sel = rel_join_exp_selectivity(sql, l, r, e, mj->l->count, mj->r->count); |
482 | |
483 | mi = memoitem_create(memo, sql->sa, mj->l->name, mj->r->name, 2); |
484 | mi->width = (rel_getwidth(sql, l) + rel_getwidth(sql, r))/2; |
485 | mi->data = e; |
486 | mi->count = (lng)(mj->sel * MIN(mj->l->count, mj->r->count)); |
487 | append(mi->rels, l); |
488 | append(mi->rels, r); |
489 | append(mi->exps, e); |
490 | list_append(mi->joins, mj); |
491 | } |
492 | } |
493 | } |
494 | |
495 | static int |
496 | memoitem_has( memoitem *mi, const char *name) |
497 | { |
498 | if (mi->level > 1) { |
499 | memojoin *mj = mi->joins->h->data; |
500 | |
501 | return (memoitem_has(mj->l, name) || |
502 | memoitem_has(mj->r, name)); |
503 | } else { |
504 | return strcmp(mi->name, name) == 0; |
505 | } |
506 | } |
507 | |
508 | static void |
509 | memoitem_add_attr(list *memo, mvc *sql, memoitem *mi, list *rels, list *jes, int level) |
510 | { |
511 | node *n; |
512 | |
513 | for( n = jes->h; n; n = n->next) { |
514 | sql_exp *e = n->data; |
515 | |
516 | if (e->type != e_cmp || !is_complex_exp(e->flag)){ |
517 | int hasl = 0, hasr = 0; |
518 | sql_rel *l = find_one_rel(rels, e->l); |
519 | sql_rel *r = find_one_rel(rels, e->r); |
520 | |
521 | /* check if exactly one rel is in mi */ |
522 | hasl = memoitem_has(mi, rel_name(l)); |
523 | hasr = memoitem_has(mi, rel_name(r)); |
524 | if (hasl != hasr) { |
525 | memoitem *nmi; |
526 | sql_rel *rr = r; |
527 | |
528 | if (!hasl) |
529 | rr = l; |
530 | nmi = memoitem_create(memo, sql->sa, mi->name, rel_name(rr), level); |
531 | if (nmi) { |
532 | memojoin *mj = SA_ZNEW(sql->sa, memojoin); |
533 | lng mincnt = 0; |
534 | |
535 | list_merge(nmi->rels, mi->rels, (fdup)NULL); |
536 | append(nmi->rels, rr); |
537 | append(nmi->exps, e); |
538 | |
539 | mj->l = mi; |
540 | mj->r = memo_find( memo, rel_name(rr)); |
541 | mincnt = MIN(mj->l->count, mj->r->count); |
542 | nmi->width = mi->width + mj->r->width; |
543 | mj->rules = 0; |
544 | mj->cost = 0; |
545 | mj->sel = rel_join_exp_selectivity(sql, l, r, e, mj->l->count, mj->r->count); |
546 | list_append(nmi->joins, mj); |
547 | |
548 | if (!nmi->count) |
549 | nmi->count = (lng)(mincnt*mj->sel); |
550 | nmi->count = MIN((lng) (mincnt*mj->sel), nmi->count); |
551 | assert(nmi->count >= 0); |
552 | } |
553 | } |
554 | } |
555 | } |
556 | } |
557 | |
558 | static void |
559 | memo_add_attr(list *memo, mvc *sql, list *rels, list *jes) |
560 | { |
561 | node *n; |
562 | int l, len = list_length(rels); |
563 | |
564 | for(l=2; l<len; l++) { |
565 | for (n = memo->h; n; n = n->next) { |
566 | memoitem *mi = n->data; |
567 | |
568 | if (mi->level == l) |
569 | memoitem_add_attr( memo, sql, mi, rels, jes, l+1); |
570 | } |
571 | } |
572 | } |
573 | |
574 | /* Rule 1: Commutativity A join B -> B join A */ |
575 | static int |
576 | memoitem_apply_r1(memoitem *mi, sql_allocator *sa) |
577 | { |
578 | int changes = 0; |
579 | node *n; |
580 | |
581 | if (!mi->joins) |
582 | return 0; |
583 | for ( n = mi->joins->h; n; n = n->next) { |
584 | memojoin *mj = n->data; |
585 | |
586 | if (mj->rules == 0 || mj->rules == 2) { |
587 | memojoin *mjn = SA_ZNEW(sa, memojoin); |
588 | |
589 | mjn->l = mj->r; |
590 | mjn->r = mj->l; |
591 | |
592 | if (mj->rules) |
593 | mj->rules = 4; |
594 | else |
595 | mj->rules = 1; |
596 | mjn->rules = 4; |
597 | mjn->cost = 0; |
598 | mjn->sel = mj->sel; |
599 | list_append(mi->joins, mjn); |
600 | changes ++; |
601 | } |
602 | } |
603 | return changes; |
604 | } |
605 | |
606 | /* Rule 2: Right Associativity (A join B) join C -> A join (B join C) */ |
607 | static int |
608 | memoitem_apply_r2(memoitem *mi, sql_allocator *sa, list *memo) |
609 | { |
610 | int changes = 0; |
611 | node *n; |
612 | |
613 | if (!mi->joins || mi->level <= 2) |
614 | return 0; |
615 | for ( n = mi->joins->h; n; n = n->next) { |
616 | memojoin *mj = n->data; |
617 | |
618 | if (mj->rules <= 1 && mj->l->level >= 2) { |
619 | node *m; |
620 | |
621 | for( m = mj->l->joins->h; m; m = m->next) { |
622 | memoitem *r = NULL; |
623 | memojoin *mjl = m->data; |
624 | /* combine mjl->r and mj->r */ |
625 | char *name = merge_names(sa, mjl->r->name, mj->r->name); |
626 | |
627 | if ((r = memo_find(memo, name))) { |
628 | memojoin *mjn = SA_ZNEW(sa, memojoin); |
629 | |
630 | mjn->l = mjl->l; |
631 | mjn->r = r; |
632 | mjn->rules = 2; |
633 | mjn->cost = 0; |
634 | mjn->sel = 1; |
635 | list_append(mi->joins, mjn); |
636 | changes ++; |
637 | } |
638 | } |
639 | if (mj->rules) |
640 | mj->rules = 4; |
641 | else |
642 | mj->rules = 2; |
643 | } |
644 | } |
645 | return changes; |
646 | } |
647 | |
648 | /* Rule 4: Exchange (A join B) join (C join D) -> (A join C) join (B join D) |
649 | static int |
650 | memoitem_apply_r4(memoitem *mi, sql_allocator *sa, list *memo) |
651 | { |
652 | int changes = 0; |
653 | node *n; |
654 | |
655 | if (!mi->joins || mi->level <= 2) |
656 | return 0; |
657 | for ( n = mi->joins->h; n; n = n->next) { |
658 | memojoin *mj = n->data; |
659 | |
660 | if (mj->rules <= 1 && mj->l->level >= 2) { |
661 | node *m; |
662 | |
663 | for( m = mj->l->joins->h; m; m = m->next) { |
664 | memoitem *r = NULL; |
665 | memojoin *mjl = m->data; |
666 | char *name = merge_names(sa, mjl->r->name, mj->r->name); |
667 | |
668 | if ((r = memo_find(memo, name))) { |
669 | memojoin *mjn = SA_ZNEW(sa, memojoin); |
670 | |
671 | mjn->l = mjl->l; |
672 | mjn->r = r; |
673 | mjn->rules = 2; |
674 | mjn->cost = 0; |
675 | list_append(mi->joins, mjn); |
676 | changes ++; |
677 | } |
678 | } |
679 | if (mj->rules) |
680 | mj->rules = 4; |
681 | else |
682 | mj->rules = 2; |
683 | } |
684 | } |
685 | return changes; |
686 | } |
687 | * */ |
688 | |
689 | static void |
690 | memo_apply_rules(list *memo, sql_allocator *sa, int len) |
691 | { |
692 | int level; |
693 | node *n; |
694 | |
695 | for (level = 2; level<=len; level++) { |
696 | int gchanges = 1; |
697 | |
698 | while(gchanges) { |
699 | gchanges = 0; |
700 | for ( n = memo->h; n; n = n->next) { |
701 | int changes = 0; |
702 | memoitem *mi = n->data; |
703 | |
704 | if (!mi->done && mi->level == level) { |
705 | changes += memoitem_apply_r1(mi, sa); |
706 | changes += memoitem_apply_r2(mi, sa, memo); |
707 | //changes += memoitem_apply_r4(mi, sa, memo); |
708 | |
709 | if (!changes) |
710 | mi->done = 1; |
711 | } |
712 | gchanges |= changes; |
713 | } |
714 | } |
715 | } |
716 | } |
717 | |
718 | static void |
719 | memo_locate_exps( list *memo ) |
720 | { |
721 | node *n, *m, *o; |
722 | |
723 | for(n = memo->h; n; n = n->next) { |
724 | memoitem *mi = n->data; |
725 | int prop = 0; |
726 | |
727 | if (mi->level == 2) { |
728 | sql_exp *e = mi->data; |
729 | |
730 | if (find_prop(e->p, PROP_HASHIDX)) |
731 | prop = p_pkey; |
732 | if (find_prop(e->p, PROP_JOINIDX)) |
733 | prop = p_fkey; |
734 | |
735 | if (prop) { |
736 | for (m = mi->joins->h; m; m = m->next) { |
737 | memojoin *mj = m->data; |
738 | sql_exp *e = mj->e; |
739 | |
740 | mj->prop = prop; |
741 | if (prop == p_fkey) { |
742 | sql_rel *l = mj->l->data, *f = NULL; |
743 | if (!l) |
744 | continue; |
745 | if (e) |
746 | f = find_one_rel(mi->rels, e->l); |
747 | if (f != l) /* we dislike swapped pkey/fkey joins */ |
748 | mj->prop = 0; |
749 | } |
750 | } |
751 | } |
752 | } else if (mi->level > 2) { |
753 | /* find exp which isn't in the mj->l/r->exps lists */ |
754 | for( o = mi->exps->h; o; o = o->next) { |
755 | sql_exp *e = o->data; |
756 | |
757 | for (m = mi->joins->h; m; m = m->next) { |
758 | memojoin *mj = m->data; |
759 | |
760 | if (list_find(mj->l->exps, e, NULL) == NULL && |
761 | list_find(mj->r->exps, e, NULL) == NULL) { |
762 | if (find_prop(e->p, PROP_HASHIDX)) |
763 | prop = p_pkey; |
764 | if (find_prop(e->p, PROP_JOINIDX)) |
765 | prop = p_fkey; |
766 | mj->prop = prop; |
767 | if (prop == p_fkey) { |
768 | sql_rel *l = find_one_rel(mi->rels, e->l); |
769 | sql_rel *f = find_one_rel(mj->l->rels, e->l); |
770 | if (!l) |
771 | continue; |
772 | if (f != l) /* we dislike swapped pkey/fkey joins */ |
773 | mj->prop = 0; |
774 | } |
775 | } |
776 | } |
777 | } |
778 | } |
779 | } |
780 | } |
781 | |
782 | static void |
783 | memo_compute_cost(list *memo) |
784 | { |
785 | node *n, *m; |
786 | |
787 | for ( n = memo->h; n; n = n->next) { |
788 | memoitem *mi = n->data; |
789 | |
790 | if (mi->joins) { |
791 | lng cnt = 0, width = 1; |
792 | dbl cost = 0; |
793 | |
794 | /* cost minimum of join costs */ |
795 | for ( m = mi->joins->h; m; m = m->next ) { |
796 | memojoin *mj = m->data; |
797 | |
798 | lng mincnt = MIN(mj->l->count, mj->r->count); |
799 | dbl nsel = mj->sel; |
800 | lng ocnt = MAX((lng) (mincnt*nsel), 1); |
801 | dbl ncost = 0; |
802 | |
803 | /* mincnt*mincnt_size_width*hash_const_cost + mincnt * output_width(for now just sum of width) * memaccess const */ |
804 | /* current consts are 1 and 1 */ |
805 | //ncost += ocnt * MIN(mj->l->width, mj->r->width); |
806 | width = (mj->l->count < mj->r->count)?mj->l->width:mj->r->width; |
807 | ncost += (mincnt * width * 1 ) + ocnt * (mj->l->width + mj->r->width) * 1; |
808 | assert(mj->l->cost > 0 && mj->r->cost > 0); |
809 | ncost += mj->l->cost; /* add cost of left */ |
810 | ncost += mj->r->cost; /* add cost of right */ |
811 | |
812 | width = mj->l->width + mj->r->width; |
813 | mj->cost = ncost; |
814 | |
815 | if (cnt == 0) |
816 | cnt = ocnt; |
817 | cnt = MIN(cnt,ocnt); |
818 | |
819 | if (cost == 0) |
820 | cost = ncost; |
821 | cost = MIN(cost,ncost); |
822 | } |
823 | assert(cnt > 0); |
824 | mi->count = cnt; |
825 | mi->cost = cost; |
826 | mi->width = width; |
827 | } |
828 | } |
829 | } |
830 | |
831 | #ifndef HAVE_EMBEDDED |
832 | static void |
833 | memojoin_print( memojoin *mj ) |
834 | { |
835 | printf("%s join-%s%d(cost=%f) %s" , mj->l->name, mj->prop==p_pkey?"pkey" :mj->prop==p_fkey?"fkey" :"" , mj->rules, mj->cost, mj->r->name); |
836 | } |
837 | |
838 | static void |
839 | memojoins_print( list *joins ) |
840 | { |
841 | node *n; |
842 | |
843 | if (!joins) |
844 | return; |
845 | for(n=joins->h; n; n = n->next) { |
846 | memojoin *mj = n->data; |
847 | |
848 | memojoin_print( mj ); |
849 | if (n->next) |
850 | printf(" | " ); |
851 | } |
852 | } |
853 | |
854 | static void |
855 | memoitem_print( memoitem *mi ) |
856 | { |
857 | printf("# %s(count=" LLFMT",width=" LLFMT",cost=%f): " , mi->name, mi->count, mi->width, mi->cost); |
858 | memojoins_print(mi->joins); |
859 | } |
860 | |
861 | static void |
862 | memo_print( list *memo ) |
863 | { |
864 | node *n; |
865 | int level = 0; |
866 | |
867 | for(n=memo->h; n; n = n->next) { |
868 | memoitem *mi = n->data; |
869 | |
870 | if (mi->level > level){ |
871 | level = mi->level; |
872 | printf("\n" ); |
873 | } |
874 | memoitem_print( mi ); |
875 | printf("\n" ); |
876 | } |
877 | } |
878 | #endif |
879 | |
880 | static memojoin * |
881 | find_cheapest( list *joins ) |
882 | { |
883 | node *n; |
884 | memojoin *cur = NULL; |
885 | |
886 | if (!joins) |
887 | return NULL; |
888 | cur = joins->h->data; |
889 | for ( n = joins->h; n; n = n->next) { |
890 | memojoin *mj = n->data; |
891 | |
892 | if (cur->cost > mj->cost) |
893 | cur = mj; |
894 | } |
895 | return cur; |
896 | } |
897 | |
898 | static sql_rel * |
899 | memo_select_plan( mvc *sql, list *memo, memoitem *mi, list *sdje, list *exps) |
900 | { |
901 | if (mi->level >= 2) { |
902 | memojoin *mj = find_cheapest(mi->joins); |
903 | sql_rel *top; |
904 | |
905 | top = rel_crossproduct(sql->sa, |
906 | memo_select_plan(sql, memo, mj->l, sdje, exps), |
907 | memo_select_plan(sql, memo, mj->r, sdje, exps), |
908 | op_join); |
909 | if (mi->level == 2) { |
910 | rel_join_add_exp(sql->sa, top, mi->data); |
911 | list_remove_data(sdje, mi->data); |
912 | } else { |
913 | node *djn; |
914 | |
915 | /* all other join expressions on these 2 relations */ |
916 | while((djn = list_find(sdje, mi->rels, (fcmp)&exp_joins_rels)) != NULL) { |
917 | sql_exp *e = djn->data; |
918 | |
919 | rel_join_add_exp(sql->sa, top, e); |
920 | list_remove_data(sdje, e); |
921 | } |
922 | |
923 | /* all other join expressions on these 2 relations */ |
924 | while((djn = list_find(exps, mi->rels, (fcmp)&exp_joins_rels)) != NULL) { |
925 | sql_exp *e = djn->data; |
926 | |
927 | rel_join_add_exp(sql->sa, top, e); |
928 | list_remove_data(exps, e); |
929 | } |
930 | } |
931 | return top; |
932 | } else { |
933 | return mi->data; |
934 | } |
935 | } |
936 | |
937 | sql_rel * |
938 | rel_planner(mvc *sql, list *rels, list *sdje, list *exps) |
939 | { |
940 | list *memo = memo_create(sql, rels); |
941 | memoitem *mi; |
942 | sql_rel *top; |
943 | |
944 | /* extend one attribute at a time */ |
945 | memo_add_exps(memo, sql, rels, sdje); |
946 | memo_add_attr(memo, sql, rels, sdje); |
947 | |
948 | memo_apply_rules(memo, sql->sa, list_length(rels)); |
949 | memo_locate_exps(memo); |
950 | memo_compute_cost(memo); |
951 | #ifndef HAVE_EMBEDDED |
952 | //if (0) |
953 | memo_print(memo); |
954 | #endif |
955 | mi = memo->t->data; |
956 | top = memo_select_plan(sql, memo, mi, sdje, exps); |
957 | if (list_length(sdje) != 0) |
958 | list_merge (top->exps, sdje, (fdup)NULL); |
959 | return top; |
960 | } |
961 | |
962 | |
963 | |