1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*#define DEBUG*/
10
11#include "monetdb_config.h"
12#include "rel_optimizer.h"
13#include "rel_rel.h"
14#include "rel_exp.h"
15#include "rel_prop.h"
16#include "rel_dump.h"
17#include "rel_planner.h"
18#include "rel_propagate.h"
19#include "sql_mvc.h"
20#ifdef HAVE_HGE
21#include "mal.h" /* for have_hge */
22#endif
23#include "mtime.h"
24
25#define new_func_list(sa) sa_list(sa)
26#define new_col_list(sa) sa_list(sa)
27
28typedef struct global_props {
29 int cnt[ddl_maxops];
30} global_props;
31
32typedef sql_rel *(*rewrite_fptr)(int *changes, mvc *sql, sql_rel *rel);
33typedef sql_rel *(*rewrite_rel_fptr)(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes);
34typedef int (*find_prop_fptr)(mvc *sql, sql_rel *rel);
35
36static sql_rel * rewrite_topdown(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes);
37static sql_rel * rewrite(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes) ;
38static list * rewrite_exps(mvc *sql, list *l, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes);
39
40static sql_rel * rel_remove_empty_select(int *changes, mvc *sql, sql_rel *rel);
41
42static sql_subfunc *find_func( mvc *sql, char *name, list *exps );
43
44/* The important task of the relational optimizer is to optimize the
45 join order.
46
47 The current implementation chooses the join order based on
48 select counts, ie if one of the join sides has been reduced using
49 a select this join is choosen over one without such selections.
50 */
51
52/* currently we only find simple column expressions */
53void *
54name_find_column( sql_rel *rel, const char *rname, const char *name, int pnr, sql_rel **bt )
55{
56 sql_exp *alias = NULL;
57 sql_column *c = NULL;
58
59 switch (rel->op) {
60 case op_basetable: {
61 node *cn;
62 sql_table *t = rel->l;
63
64 if (rel->exps) {
65 sql_exp *e;
66
67 if (rname)
68 e = exps_bind_column2(rel->exps, rname, name);
69 else
70 e = exps_bind_column(rel->exps, name, NULL);
71 if (!e || e->type != e_column)
72 return NULL;
73 if (e->l)
74 rname = e->l;
75 name = e->r;
76 }
77 if (name && !t)
78 return rel->r;
79 if (rname && strcmp(t->base.name, rname) != 0)
80 return NULL;
81 for (cn = t->columns.set->h; cn; cn = cn->next) {
82 sql_column *c = cn->data;
83 if (strcmp(c->base.name, name) == 0) {
84 *bt = rel;
85 if (pnr < 0 || (c->t->p &&
86 list_position(c->t->p->members.set, c->t) == pnr))
87 return c;
88 }
89 }
90 if (t->idxs.set)
91 for (cn = t->idxs.set->h; cn; cn = cn->next) {
92 sql_idx *i = cn->data;
93 if (strcmp(i->base.name, name+1 /* skip % */) == 0) {
94 *bt = rel;
95 if (pnr < 0 || (i->t->p &&
96 list_position(i->t->p->members.set, i->t) == pnr)) {
97 sql_kc *c = i->columns->h->data;
98 return c->c;
99 }
100 }
101 }
102 break;
103 }
104 case op_table:
105 /* table func */
106 return NULL;
107 case op_ddl:
108 if (is_updateble(rel))
109 return name_find_column( rel->l, rname, name, pnr, bt);
110 return NULL;
111 case op_join:
112 case op_left:
113 case op_right:
114 case op_full:
115 /* first right (possible subquery) */
116 c = name_find_column( rel->r, rname, name, pnr, bt);
117 /* fall through */
118 case op_semi:
119 case op_anti:
120 if (!c)
121 c = name_find_column( rel->l, rname, name, pnr, bt);
122 return c;
123 case op_select:
124 case op_topn:
125 case op_sample:
126 return name_find_column( rel->l, rname, name, pnr, bt);
127 case op_union:
128 case op_inter:
129 case op_except:
130
131 if (pnr >= 0 || pnr == -2) {
132 /* first right (possible subquery) */
133 c = name_find_column( rel->r, rname, name, pnr, bt);
134 if (!c)
135 c = name_find_column( rel->l, rname, name, pnr, bt);
136 return c;
137 }
138 return NULL;
139
140 case op_project:
141 case op_groupby:
142 if (!rel->exps)
143 break;
144 if (rname)
145 alias = exps_bind_column2(rel->exps, rname, name);
146 else
147 alias = exps_bind_column(rel->exps, name, NULL);
148 if (is_groupby(rel->op) && alias && alias->type == e_column && rel->r) {
149 if (alias->l)
150 alias = exps_bind_column2(rel->r, alias->l, alias->r);
151 else
152 alias = exps_bind_column(rel->r, alias->r, NULL);
153 }
154 if (is_groupby(rel->op) && !alias && rel->l) {
155 /* Group by column not found as alias in projection
156 * list, fall back to check plain input columns */
157 return name_find_column( rel->l, rname, name, pnr, bt);
158 }
159 break;
160 case op_insert:
161 case op_update:
162 case op_delete:
163 case op_truncate:
164 break;
165 }
166 if (alias) { /* we found an expression with the correct name, but
167 we need sql_columns */
168 if (rel->l && alias->type == e_column) /* real alias */
169 return name_find_column(rel->l, alias->l, alias->r, pnr, bt);
170 }
171 return NULL;
172}
173
174static sql_column *
175exp_find_column( sql_rel *rel, sql_exp *exp, int pnr )
176{
177 if (exp->type == e_column) {
178 sql_rel *bt = NULL;
179 return name_find_column(rel, exp->l, exp->r, pnr, &bt);
180 }
181 return NULL;
182}
183
184static sql_column *
185exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt )
186{
187 if (exp->type == e_column)
188 return name_find_column(rel, exp->l, exp->r, pnr, bt);
189 return NULL;
190}
191
192/* find column for the select/join expression */
193static sql_column *
194sjexp_col(sql_exp *e, sql_rel *r)
195{
196 sql_column *res = NULL;
197
198 if (e->type == e_cmp && !is_complex_exp(e->flag)) {
199 res = exp_find_column(r, e->l, -2);
200 if (!res)
201 res = exp_find_column(r, e->r, -2);
202 }
203 return res;
204}
205
206static sql_exp *
207list_find_exp( list *exps, sql_exp *e)
208{
209 sql_exp *ne = NULL;
210
211 if (e->type != e_column)
212 return NULL;
213 if (( e->l && (ne=exps_bind_column2(exps, e->l, e->r)) != NULL) ||
214 ((!e->l && (ne=exps_bind_column(exps, e->r, NULL)) != NULL)))
215 return ne;
216 return NULL;
217}
218
219static int
220kc_column_cmp(sql_kc *kc, sql_column *c)
221{
222 /* return on equality */
223 return !(c == kc->c);
224}
225
226static void psm_exps_properties(mvc *sql, global_props *gp, list *exps);
227static void rel_properties(mvc *sql, global_props *gp, sql_rel *rel);
228
229static void
230psm_exp_properties(mvc *sql, global_props *gp, sql_exp *e)
231{
232 /* only functions need fix up */
233 if (e->type == e_psm) {
234 if (e->flag & PSM_SET) {
235 psm_exp_properties(sql, gp, e->l);
236 } else if (e->flag & PSM_RETURN) {
237 psm_exp_properties(sql, gp, e->l);
238 } else if (e->flag & PSM_WHILE) {
239 psm_exp_properties(sql, gp, e->l);
240 psm_exps_properties(sql, gp, e->r);
241 } else if (e->flag & PSM_IF) {
242 psm_exp_properties(sql, gp, e->l);
243 psm_exps_properties(sql, gp, e->r);
244 if (e->f)
245 psm_exps_properties(sql, gp, e->f);
246 } else if (e->flag & PSM_REL) {
247 rel_properties(sql, gp, e->l);
248 } else if (e->flag & PSM_EXCEPTION) {
249 psm_exp_properties(sql, gp, e->l);
250 }
251 }
252}
253
254static void
255psm_exps_properties(mvc *sql, global_props *gp, list *exps)
256{
257 node *n;
258
259 if (!exps)
260 return;
261 for (n = exps->h; n; n = n->next)
262 psm_exp_properties(sql, gp, n->data);
263}
264
265static void
266rel_properties(mvc *sql, global_props *gp, sql_rel *rel)
267{
268 if(!rel)
269 return;
270
271 gp->cnt[(int)rel->op]++;
272 switch (rel->op) {
273 case op_basetable:
274 case op_table:
275 if (rel->op == op_table && rel->l && rel->flag != 2)
276 rel_properties(sql, gp, rel->l);
277 break;
278 case op_join:
279 case op_left:
280 case op_right:
281 case op_full:
282
283 case op_semi:
284 case op_anti:
285
286 case op_union:
287 case op_inter:
288 case op_except:
289 rel_properties(sql, gp, rel->l);
290 rel_properties(sql, gp, rel->r);
291 break;
292 case op_project:
293 case op_select:
294 case op_groupby:
295 case op_topn:
296 case op_sample:
297 case op_ddl:
298 if (rel->op == op_ddl && rel->flag == ddl_psm && rel->exps)
299 psm_exps_properties(sql, gp, rel->exps);
300 if (rel->l)
301 rel_properties(sql, gp, rel->l);
302 break;
303 case op_insert:
304 case op_update:
305 case op_delete:
306 case op_truncate:
307 if (rel->r)
308 rel_properties(sql, gp, rel->r);
309 break;
310 }
311
312 switch (rel->op) {
313 case op_basetable:
314 case op_table:
315 if (!find_prop(rel->p, PROP_COUNT))
316 rel->p = prop_create(sql->sa, PROP_COUNT, rel->p);
317 break;
318 case op_join:
319 case op_left:
320 case op_right:
321 case op_full:
322
323 case op_semi:
324 case op_anti:
325
326 case op_union:
327 case op_inter:
328 case op_except:
329 break;
330
331 case op_project:
332 case op_groupby:
333 case op_topn:
334 case op_sample:
335 case op_select:
336 break;
337
338 case op_insert:
339 case op_update:
340 case op_delete:
341 case op_truncate:
342 case op_ddl:
343 break;
344 }
345}
346
347static sql_rel * rel_join_order(mvc *sql, sql_rel *rel) ;
348
349static void
350get_relations(mvc *sql, sql_rel *rel, list *rels)
351{
352 if (!rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) {
353 sql_rel *l = rel->l;
354 sql_rel *r = rel->r;
355
356 get_relations(sql, l, rels);
357 get_relations(sql, r, rels);
358 rel->l = NULL;
359 rel->r = NULL;
360 rel_destroy(rel);
361 } else {
362 rel = rel_join_order(sql, rel);
363 append(rels, rel);
364 }
365}
366
367static void
368get_inner_relations(mvc *sql, sql_rel *rel, list *rels)
369{
370 if (!rel_is_ref(rel) && is_join(rel->op)) {
371 sql_rel *l = rel->l;
372 sql_rel *r = rel->r;
373
374 get_inner_relations(sql, l, rels);
375 get_inner_relations(sql, r, rels);
376 } else {
377 append(rels, rel);
378 }
379}
380
381static int
382exp_count(int *cnt, sql_exp *e)
383{
384 if (!e)
385 return 0;
386 if (find_prop(e->p, PROP_JOINIDX))
387 *cnt += 100;
388 if (find_prop(e->p, PROP_HASHCOL))
389 *cnt += 100;
390 if (find_prop(e->p, PROP_HASHIDX))
391 *cnt += 100;
392 switch(e->type) {
393 case e_cmp:
394 if (!is_complex_exp(e->flag)) {
395 exp_count(cnt, e->l);
396 exp_count(cnt, e->r);
397 if (e->f)
398 exp_count(cnt, e->f);
399 }
400 switch (get_cmp(e)) {
401 case cmp_equal:
402 *cnt += 90;
403 return 90;
404 case cmp_notequal:
405 *cnt += 7;
406 return 7;
407 case cmp_gt:
408 case cmp_gte:
409 case cmp_lt:
410 case cmp_lte:
411 *cnt += 6;
412 if (e->f){ /* range */
413 *cnt += 6;
414 return 12;
415 }
416 return 6;
417 case cmp_filter:
418 if (exps_card(e->r) > CARD_AGGR) {
419 /* filters for joins are special */
420 *cnt += 1000;
421 return 1000;
422 }
423 *cnt += 2;
424 return 2;
425 case cmp_in:
426 case cmp_notin: {
427 list *l = e->r;
428 int c = 9 - 10*list_length(l);
429 *cnt += c;
430 return c;
431 }
432 case cmp_or: /* prefer or over functions */
433 *cnt += 3;
434 return 3;
435 case mark_in:
436 case mark_notin:
437 case mark_exists:
438 case mark_notexists:
439 *cnt += 0;
440 return 0;
441 default:
442 return 0;
443 }
444 case e_column:
445 *cnt += 20;
446 return 20;
447 case e_atom:
448 *cnt += 10;
449 return 10;
450 case e_func:
451 /* functions are more expensive, depending on the number of columns involved. */
452 if (e->card == CARD_ATOM)
453 return 0;
454 *cnt -= 5*list_length(e->l);
455 return 5*list_length(e->l);
456 case e_convert:
457 /* functions are more expensive, depending on the number of columns involved. */
458 if (e->card == CARD_ATOM)
459 return 0;
460 /* fall through */
461 default:
462 *cnt -= 5;
463 return -5;
464 }
465}
466
467static int
468exp_keyvalue(sql_exp *e)
469{
470 int cnt = 0;
471 exp_count(&cnt, e);
472 return cnt;
473}
474
475static sql_exp *
476joinexp_col(sql_exp *e, sql_rel *r)
477{
478 if (e->type == e_cmp) {
479 if (rel_has_exp(r, e->l) >= 0)
480 return e->l;
481 return e->r;
482 }
483 assert(0);
484 return NULL;
485}
486
487static sql_column *
488table_colexp(sql_exp *e, sql_rel *r)
489{
490 sql_table *t = r->l;
491
492 if (e->type == e_column) {
493 const char *name = exp_name(e);
494 node *cn;
495
496 if (r->exps) { /* use alias */
497 for (cn = r->exps->h; cn; cn = cn->next) {
498 sql_exp *ce = cn->data;
499 if (strcmp(exp_name(ce), name) == 0) {
500 name = ce->r;
501 break;
502 }
503 }
504 }
505 for (cn = t->columns.set->h; cn; cn = cn->next) {
506 sql_column *c = cn->data;
507 if (strcmp(c->base.name, name) == 0)
508 return c;
509 }
510 }
511 return NULL;
512}
513
514int
515exp_joins_rels(sql_exp *e, list *rels)
516{
517 sql_rel *l = NULL, *r = NULL;
518
519 assert (e->type == e_cmp);
520
521 if (get_cmp(e) == cmp_or) {
522 l = NULL;
523 } else if (get_cmp(e) == cmp_filter) {
524 list *ll = e->l;
525 list *lr = e->r;
526
527 l = find_rel(rels, ll->h->data);
528 r = find_rel(rels, lr->h->data);
529 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
530 list *lr = e->r;
531
532 l = find_rel(rels, e->l);
533 if (lr && lr->h)
534 r = find_rel(rels, lr->h->data);
535 } else {
536 l = find_rel(rels, e->l);
537 r = find_rel(rels, e->r);
538 }
539
540 if (l && r)
541 return 0;
542 return -1;
543}
544
545static list *
546matching_joins(sql_allocator *sa, list *rels, list *exps, sql_exp *je)
547{
548 sql_rel *l, *r;
549
550 assert (je->type == e_cmp);
551
552 l = find_rel(rels, je->l);
553 r = find_rel(rels, je->r);
554 if (l && r) {
555 list *res;
556 list *n_rels = new_rel_list(sa);
557
558 append(n_rels, l);
559 append(n_rels, r);
560 res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL);
561 return res;
562 }
563 return new_rel_list(sa);
564}
565
566static int
567sql_column_kc_cmp(sql_column *c, sql_kc *kc)
568{
569 /* return on equality */
570 return (c->colnr - kc->c->colnr);
571}
572
573static sql_idx *
574find_fk_index(sql_table *l, list *lcols, sql_table *r, list *rcols)
575{
576 if (l->idxs.set) {
577 node *in;
578 for (in = l->idxs.set->h; in; in = in->next){
579 sql_idx *li = in->data;
580 if (li->type == join_idx) {
581 sql_key *rk = &((sql_fkey*)li->key)->rkey->k;
582 fcmp cmp = (fcmp)&sql_column_kc_cmp;
583
584 if (rk->t == r &&
585 list_match(lcols, li->columns, cmp) == 0 &&
586 list_match(rcols, rk->columns, cmp) == 0) {
587 return li;
588 }
589 }
590 }
591 }
592 return NULL;
593}
594
595static sql_rel *
596find_basetable( sql_rel *r)
597{
598 if (!r)
599 return NULL;
600 switch(r->op) {
601 case op_basetable:
602 if (!r->l)
603 return NULL;
604 return r;
605 case op_project:
606 case op_select:
607 return find_basetable(r->l);
608 default:
609 return NULL;
610 }
611}
612
613static int
614exps_count(list *exps)
615{
616 node *n;
617 int cnt = 0;
618
619 if (!exps)
620 return 0;
621 for (n = exps->h; n; n=n->next)
622 exp_count(&cnt, n->data);
623 return cnt;
624}
625
626static list *
627order_join_expressions(mvc *sql, list *dje, list *rels)
628{
629 list *res;
630 node *n = NULL;
631 int i, *keys, cnt = list_length(dje);
632 void **data;
633 int debug = mvc_debug_on(sql, 16);
634
635 keys = malloc(cnt*sizeof(int));
636 data = malloc(cnt*sizeof(void *));
637 if (keys == NULL || data == NULL) {
638 if (keys)
639 free(keys);
640 if (data)
641 free(data);
642 return NULL;
643 }
644 res = sa_list(sql->sa);
645 if (res == NULL) {
646 free(keys);
647 free(data);
648 return NULL;
649 }
650 for (n = dje->h, i = 0; n; n = n->next, i++) {
651 sql_exp *e = n->data;
652
653 keys[i] = exp_keyvalue(e);
654 /* add some weight for the selections */
655 if (e->type == e_cmp && !is_complex_exp(e->flag)) {
656 sql_rel *l = find_rel(rels, e->l);
657 sql_rel *r = find_rel(rels, e->r);
658
659 if (l && is_select(l->op) && l->exps)
660 keys[i] += list_length(l->exps)*10 + exps_count(l->exps)*debug;
661 if (r && is_select(r->op) && r->exps)
662 keys[i] += list_length(r->exps)*10 + exps_count(r->exps)*debug;
663 }
664 data[i] = n->data;
665 }
666 /* sort descending */
667 GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true);
668 for(i=0; i<cnt; i++) {
669 list_append(res, data[i]);
670 }
671 free(keys);
672 free(data);
673 return res;
674}
675
676static int
677find_join_rels(list **L, list **R, list *exps, list *rels)
678{
679 node *n;
680
681 *L = sa_list(exps->sa);
682 *R = sa_list(exps->sa);
683 if (!exps || list_length(exps) <= 1)
684 return -1;
685 for(n = exps->h; n; n = n->next) {
686 sql_exp *e = n->data;
687 sql_rel *l = NULL, *r = NULL;
688
689 if (!is_complex_exp(e->flag)){
690 l = find_rel(rels, e->l);
691 r = find_rel(rels, e->r);
692 }
693 if (l<r) {
694 list_append(*L, l);
695 list_append(*R, r);
696 } else {
697 list_append(*L, r);
698 list_append(*R, l);
699 }
700 }
701 return 0;
702}
703
704static list *
705distinct_join_exps(list *aje, list *lrels, list *rrels)
706{
707 node *n, *m, *o, *p;
708 int len = 0, i, j;
709 char *used = SA_NEW_ARRAY(aje->sa, char, len = list_length(aje));
710 list *res = sa_list(aje->sa);
711
712 memset(used, 0, len);
713 assert(len == list_length(lrels));
714 for(n = lrels->h, m = rrels->h, j = 0; n && m;
715 n = n->next, m = m->next, j++) {
716 if (n->data && m->data)
717 for(o = n->next, p = m->next, i = j+1; o && p;
718 o = o->next, p = p->next, i++) {
719 if (o->data == n->data && p->data == m->data)
720 used[i] = 1;
721 }
722 }
723 for (i = 0, n = aje->h; i < len; n = n->next, i++) {
724 if (!used[i])
725 list_append(res, n->data);
726 }
727 return res;
728}
729
730static list *
731find_fk( mvc *sql, list *rels, list *exps)
732{
733 node *djn;
734 list *sdje, *aje, *dje;
735 list *lrels, *rrels;
736
737 /* first find the distinct join expressions */
738 aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL);
739 /* add left/right relation */
740 if (find_join_rels(&lrels, &rrels, aje, rels) < 0)
741 dje = aje;
742 else
743 dje = distinct_join_exps(aje, lrels, rrels);
744 for(djn=dje->h; djn; djn = djn->next) {
745 /* equal join expressions */
746 sql_idx *idx = NULL;
747 sql_exp *je = djn->data, *le = je->l, *re = je->r;
748
749 if (is_complex_exp(je->flag))
750 break;
751 if (!find_prop(je->p, PROP_JOINIDX)) {
752 int swapped = 0;
753 list *aaje = matching_joins(sql->sa, rels, aje, je);
754 list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL);
755 sql_rel *lr = find_rel(rels, le), *olr = lr;
756 sql_rel *rr = find_rel(rels, re), *orr = rr;
757 sql_rel *bt = NULL;
758 char *iname;
759
760 sql_table *l, *r;
761 list *lexps = list_map(eje, lr, (fmap) &joinexp_col);
762 list *rexps = list_map(eje, rr, (fmap) &joinexp_col);
763 list *lcols, *rcols;
764
765 lr = find_basetable(lr);
766 rr = find_basetable(rr);
767 if (!lr || !rr)
768 continue;
769 l = lr->l;
770 r = rr->l;
771 lcols = list_map(lexps, lr, (fmap) &table_colexp);
772 rcols = list_map(rexps, rr, (fmap) &table_colexp);
773 lcols->destroy = NULL;
774 rcols->destroy = NULL;
775 if (list_length(lcols) != list_length(rcols))
776 continue;
777
778 idx = find_fk_index(l, lcols, r, rcols);
779 if (!idx) {
780 idx = find_fk_index(r, rcols, l, lcols);
781 swapped = 1;
782 }
783
784 if (idx && (iname = sa_strconcat( sql->sa, "%", idx->base.name)) != NULL &&
785 ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) ||
786 ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL)))
787 idx = NULL;
788
789 if (idx) {
790 prop *p;
791 node *n;
792 sql_exp *t = NULL, *i = NULL;
793
794 if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) {
795
796 /* Add join between idx and TID */
797 if (swapped) {
798 sql_exp *s = je->l, *l = je->r;
799
800 t = rel_find_column(sql->sa, olr, s->l, TID);
801 i = rel_find_column(sql->sa, orr, l->l, iname);
802 if (!t || !i)
803 continue;
804 je = exp_compare(sql->sa, i, t, cmp_equal);
805 } else {
806 sql_exp *s = je->r, *l = je->l;
807
808 t = rel_find_column(sql->sa, orr, s->l, TID);
809 i = rel_find_column(sql->sa, olr, l->l, iname);
810 if (!t || !i)
811 continue;
812 je = exp_compare(sql->sa, i, t, cmp_equal);
813 }
814
815 /* Remove all join expressions */
816 for (n = eje->h; n; n = n->next)
817 list_remove_data(exps, n->data);
818 append(exps, je);
819 djn->data = je;
820 } else if (swapped) { /* else keep je for single column expressions */
821 je = exp_compare(sql->sa, je->r, je->l, cmp_equal);
822 /* Remove all join expressions */
823 for (n = eje->h; n; n = n->next)
824 list_remove_data(exps, n->data);
825 append(exps, je);
826 djn->data = je;
827 }
828 je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p);
829 p->value = idx;
830 }
831 }
832 }
833
834 /* sort expressions on weighted number of reducing operators */
835 sdje = order_join_expressions(sql, dje, rels);
836 return sdje;
837}
838
839static sql_rel *
840order_joins(mvc *sql, list *rels, list *exps)
841{
842 sql_rel *top = NULL, *l = NULL, *r = NULL;
843 sql_exp *cje;
844 node *djn;
845 list *sdje, *n_rels = new_rel_list(sql->sa);
846 int fnd = 0;
847
848 /* find foreign keys and reorder the expressions on reducing quality */
849 sdje = find_fk(sql, rels, exps);
850
851 if (list_length(rels) > 2 && mvc_debug_on(sql, 256)) {
852 for(djn = sdje->h; djn; djn = djn->next ) {
853 sql_exp *e = djn->data;
854 list_remove_data(exps, e);
855 }
856 top = rel_planner(sql, rels, sdje, exps);
857 return top;
858 }
859
860 /* open problem, some expressions use more than 2 relations */
861 /* For example a.x = b.y * c.z; */
862 if (list_length(rels) >= 2 && sdje->h) {
863 /* get the first expression */
864 cje = sdje->h->data;
865
866 /* find the involved relations */
867
868 /* complex expressions may touch multiple base tables
869 * Should be pushed up to extra selection.
870 * */
871 if (cje->type != e_cmp || !is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) /*||
872 (cje->type == e_cmp && cje->f == NULL)*/) {
873 l = find_one_rel(rels, cje->l);
874 r = find_one_rel(rels, cje->r);
875 }
876
877 if (l && r && l != r) {
878 list_remove_data(sdje, cje);
879 list_remove_data(exps, cje);
880 }
881 }
882 if (l && r && l != r) {
883 list_remove_data(rels, l);
884 list_remove_data(rels, r);
885 list_append(n_rels, l);
886 list_append(n_rels, r);
887
888 /* Create a relation between l and r. Since the calling
889 functions rewrote the join tree, into a list of expressions
890 and a list of (simple) relations, there are no outer joins
891 involved, we can simply do a crossproduct here.
892 */
893 top = rel_crossproduct(sql->sa, l, r, op_join);
894 rel_join_add_exp(sql->sa, top, cje);
895
896 /* all other join expressions on these 2 relations */
897 while((djn = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) {
898 sql_exp *e = djn->data;
899
900 rel_join_add_exp(sql->sa, top, e);
901 list_remove_data(exps, e);
902 }
903 /* Remove other joins on the current 'n_rels' set in the distinct list too */
904 while((djn = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL)
905 list_remove_data(sdje, djn->data);
906 fnd = 1;
907 }
908 /* build join tree using the ordered list */
909 while(list_length(exps) && fnd) {
910 fnd = 0;
911 /* find the first expression which could be added */
912 for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) {
913 node *ln, *rn, *en;
914
915 cje = djn->data;
916 ln = list_find(n_rels, cje->l, (fcmp)&rel_has_exp);
917 rn = list_find(n_rels, cje->r, (fcmp)&rel_has_exp);
918
919 if (ln || rn) {
920 /* remove the expression from the lists */
921 list_remove_data(sdje, cje);
922 list_remove_data(exps, cje);
923 }
924 if (ln && rn) {
925 assert(0);
926 /* create a selection on the current */
927 l = ln->data;
928 r = rn->data;
929 rel_join_add_exp(sql->sa, top, cje);
930 fnd = 1;
931 } else if (ln || rn) {
932 if (ln) {
933 l = ln->data;
934 r = find_rel(rels, cje->r);
935 } else {
936 l = rn->data;
937 r = find_rel(rels, cje->l);
938 }
939 list_remove_data(rels, r);
940 append(n_rels, r);
941
942 /* create a join using the current expression */
943 top = rel_crossproduct(sql->sa, top, r, op_join);
944 rel_join_add_exp(sql->sa, top, cje);
945
946 /* all join expressions on these tables */
947 while((en = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) {
948 sql_exp *e = en->data;
949 rel_join_add_exp(sql->sa, top, e);
950 list_remove_data(exps, e);
951 }
952 /* Remove other joins on the current 'n_rels'
953 set in the distinct list too */
954 while((en = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL)
955 list_remove_data(sdje, en->data);
956 fnd = 1;
957 }
958 }
959 }
960 if (list_length(rels)) { /* more relations */
961 node *n;
962 for(n=rels->h; n; n = n->next) {
963 if (top)
964 top = rel_crossproduct(sql->sa, top, n->data, op_join);
965 else
966 top = n->data;
967 }
968 }
969 if (list_length(exps)) { /* more expressions (add selects) */
970 node *n;
971 //set_processed(top);
972 top = rel_select(sql->sa, top, NULL);
973 for(n=exps->h; n; n = n->next) {
974 sql_exp *e = n->data;
975
976 /* find the involved relations */
977
978 /* complex expressions may touch multiple base tables
979 * Should be push up to extra selection. */
980 /*
981 l = find_one_rel(rels, e->l);
982 r = find_one_rel(rels, e->r);
983
984 if (l && r)
985 */
986 if (exp_is_join_exp(e) == 0) {
987 sql_rel *nr = NULL;
988 if (e->flag == cmp_equal)
989 nr = rel_push_join(sql, top->l, e->l, e->r, NULL, e);
990 if (!nr)
991 rel_join_add_exp(sql->sa, top->l, e);
992 } else
993 rel_select_add_exp(sql->sa, top, e);
994 }
995 }
996 return top;
997}
998
999static int
1000rel_neg_in_size(sql_rel *r)
1001{
1002 if (is_union(r->op) && r->nrcols == 0)
1003 return -1 + rel_neg_in_size(r->l);
1004 if (is_project(r->op) && r->nrcols == 0)
1005 return -1;
1006 return 0;
1007}
1008
1009static list *
1010push_in_join_down(mvc *sql, list *rels, list *exps)
1011{
1012 node *n;
1013 int restart = 1;
1014 list *nrels;
1015
1016 /* we should sort these first, ie small in's before large one's */
1017 nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup);
1018
1019 /* we need to cleanup, the new refs ! */
1020 rels->destroy = (fdestroy)rel_destroy;
1021 list_destroy(rels);
1022 rels = nrels;
1023
1024 /* one of the rels should be a op_union with nrcols == 0 */
1025 while (restart) {
1026 for (n = rels->h; n; n = n->next) {
1027 sql_rel *r = n->data;
1028
1029 restart = 0;
1030 if ((is_union(r->op) || is_project(r->op)) && r->nrcols == 0) {
1031 /* next step find expression on this relation */
1032 node *m;
1033 sql_rel *l = NULL;
1034 sql_exp *je = NULL;
1035
1036 for(m = exps->h; !je && m; m = m->next) {
1037 sql_exp *e = m->data;
1038
1039 if (e->type == e_cmp && e->flag == cmp_equal) {
1040 /* in values are on
1041 the right of the join */
1042 if (rel_has_exp(r, e->r) >= 0)
1043 je = e;
1044 }
1045 }
1046 /* with this expression find other relation */
1047 if (je && (l = find_rel(rels, je->l)) != NULL) {
1048 sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join);
1049
1050 rel_join_add_exp(sql->sa, nr, je);
1051 list_append(rels, nr);
1052 list_remove_data(rels, l);
1053 list_remove_data(rels, r);
1054 list_remove_data(exps, je);
1055 restart = 1;
1056 break;
1057 }
1058
1059 }
1060 }
1061 }
1062 return rels;
1063}
1064
1065static list *
1066push_up_join_exps( mvc *sql, sql_rel *rel)
1067{
1068 if (rel_is_ref(rel))
1069 return NULL;
1070
1071 switch(rel->op) {
1072 case op_join: {
1073 sql_rel *rl = rel->l;
1074 sql_rel *rr = rel->r;
1075 list *l, *r;
1076
1077 if (rel_is_ref(rl) && rel_is_ref(rr)) {
1078 l = rel->exps;
1079 rel->exps = NULL;
1080 return l;
1081 }
1082 l = push_up_join_exps(sql, rl);
1083 r = push_up_join_exps(sql, rr);
1084 if (l && r) {
1085 l = list_merge(l, r, (fdup)NULL);
1086 r = NULL;
1087 }
1088 if (rel->exps) {
1089 if (l && !r)
1090 r = l;
1091 l = list_merge(rel->exps, r, (fdup)NULL);
1092 }
1093 rel->exps = NULL;
1094 return l;
1095 }
1096 default:
1097 return NULL;
1098 }
1099}
1100
1101static sql_rel *
1102reorder_join(mvc *sql, sql_rel *rel)
1103{
1104 list *exps;
1105 list *rels;
1106
1107 if (rel->op == op_join && !rel_is_ref(rel))
1108 rel->exps = push_up_join_exps(sql, rel);
1109
1110 exps = rel->exps;
1111 if (!exps) /* crosstable, ie order not important */
1112 return rel;
1113 rel->exps = NULL; /* should be all crosstables by now */
1114 rels = new_rel_list(sql->sa);
1115 if (is_outerjoin(rel->op)) {
1116 sql_rel *l, *r;
1117 int cnt = 0;
1118 /* try to use an join index also for outer joins */
1119 get_inner_relations(sql, rel, rels);
1120 cnt = list_length(exps);
1121 rel->exps = find_fk(sql, rels, exps);
1122 if (list_length(rel->exps) != cnt)
1123 rel->exps = order_join_expressions(sql, exps, rels);
1124 l = rel->l;
1125 r = rel->r;
1126 if (is_join(l->op))
1127 rel->l = reorder_join(sql, rel->l);
1128 if (is_join(r->op))
1129 rel->r = reorder_join(sql, rel->r);
1130 } else {
1131 get_relations(sql, rel, rels);
1132 if (list_length(rels) > 1) {
1133 rels = push_in_join_down(sql, rels, exps);
1134 rel = order_joins(sql, rels, exps);
1135 } else {
1136 rel->exps = exps;
1137 exps = NULL;
1138 }
1139 }
1140 return rel;
1141}
1142
1143static sql_rel *
1144rel_join_order(mvc *sql, sql_rel *rel)
1145{
1146 int e_changes = 0;
1147
1148 if (!rel)
1149 return rel;
1150
1151 switch (rel->op) {
1152 case op_basetable:
1153 case op_table:
1154 break;
1155 case op_join:
1156 case op_left:
1157 case op_right:
1158 case op_full:
1159 break;
1160
1161 case op_semi:
1162 case op_anti:
1163
1164 case op_union:
1165 case op_inter:
1166 case op_except:
1167 rel->l = rel_join_order(sql, rel->l);
1168 rel->r = rel_join_order(sql, rel->r);
1169 break;
1170 case op_project:
1171 case op_select:
1172 case op_groupby:
1173 case op_topn:
1174 case op_sample:
1175 rel->l = rel_join_order(sql, rel->l);
1176 break;
1177 case op_ddl:
1178 rel->l = rel_join_order(sql, rel->l);
1179 if (rel->r)
1180 rel->r = rel_join_order(sql, rel->r);
1181 break;
1182 case op_insert:
1183 case op_update:
1184 case op_delete:
1185 case op_truncate:
1186 rel->l = rel_join_order(sql, rel->l);
1187 rel->r = rel_join_order(sql, rel->r);
1188 break;
1189 }
1190 if (is_join(rel->op) && rel->exps && !rel_is_ref(rel)) {
1191 rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
1192 if (!rel_is_ref(rel))
1193 rel = reorder_join(sql, rel);
1194 } else if (is_join(rel->op)) {
1195 rel->l = rel_join_order(sql, rel->l);
1196 rel->r = rel_join_order(sql, rel->r);
1197 }
1198 (void)e_changes;
1199 return rel;
1200}
1201
1202/* exp_rename */
1203static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
1204
1205static list *
1206exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t)
1207{
1208 node *n;
1209 list *nl = new_exp_list(sql->sa);
1210
1211 for(n=l->h; n; n=n->next) {
1212 sql_exp *arg = n->data;
1213
1214 arg = exp_rename(sql, arg, f, t);
1215 if (!arg)
1216 return NULL;
1217 append(nl, arg);
1218 }
1219 return nl;
1220}
1221
1222/* exp_rename */
1223static sql_exp *
1224exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
1225{
1226 sql_exp *ne = NULL, *l, *r, *r2;
1227
1228 switch(e->type) {
1229 case e_column:
1230 if (e->l) {
1231 ne = exps_bind_column2(f->exps, e->l, e->r);
1232 /* if relation name matches expressions relation name, find column based on column name alone */
1233 } else {
1234 ne = exps_bind_column(f->exps, e->r, NULL);
1235 }
1236 if (!ne)
1237 return e;
1238 e = NULL;
1239 if (exp_name(ne) && ne->r && ne->l)
1240 e = rel_bind_column2(sql, t, ne->l, ne->r, 0);
1241 if (!e && ne->r)
1242 e = rel_bind_column(sql, t, ne->r, 0);
1243 sql->session->status = 0;
1244 sql->errstr[0] = 0;
1245 if (!e && exp_is_atom(ne))
1246 return ne;
1247 return exp_ref(sql->sa ,e);
1248 case e_cmp:
1249 if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) {
1250 list *l = exps_rename(sql, e->l, f, t);
1251 list *r = exps_rename(sql, e->r, f, t);
1252 if (l && r) {
1253 if (get_cmp(e) == cmp_filter)
1254 ne = exp_filter(sql->sa, l, r, e->f, is_anti(e));
1255 else
1256 ne = exp_or(sql->sa, l, r, is_anti(e));
1257 }
1258 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
1259 sql_exp *l = exp_rename(sql, e->l, f, t);
1260 list *r = exps_rename(sql, e->r, f, t);
1261 if (l && r)
1262 ne = exp_in(sql->sa, l, r, e->flag);
1263 } else {
1264 l = exp_rename(sql, e->l, f, t);
1265 r = exp_rename(sql, e->r, f, t);
1266 if (e->f) {
1267 r2 = exp_rename(sql, e->f, f, t);
1268 if (l && r && r2)
1269 ne = exp_compare2(sql->sa, l, r, r2, e->flag);
1270 } else if (l && r) {
1271 ne = exp_compare(sql->sa, l, r, e->flag);
1272 }
1273 }
1274 break;
1275 case e_convert:
1276 l = exp_rename(sql, e->l, f, t);
1277 if (l)
1278 ne = exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e));
1279 break;
1280 case e_aggr:
1281 case e_func: {
1282 list *l = e->l, *nl = NULL;
1283
1284 if (!l) {
1285 return e;
1286 } else {
1287 nl = exps_rename(sql, l, f, t);
1288 if (!nl)
1289 return NULL;
1290 }
1291 if (e->type == e_func)
1292 ne = exp_op(sql->sa, nl, e->f);
1293 else
1294 ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
1295 break;
1296 }
1297 case e_atom:
1298 case e_psm:
1299 return e;
1300 }
1301 if (!ne)
1302 return NULL;
1303 return exp_propagate(sql->sa, ne, e);
1304}
1305
1306/* push the expression down, ie translate colum references
1307 from relation f into expression of relation t
1308*/
1309
1310static sql_exp * _exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
1311
1312static list *
1313exps_push_down(mvc *sql, list *exps, sql_rel *f, sql_rel *t)
1314{
1315 node *n;
1316 list *nl = new_exp_list(sql->sa);
1317
1318 for(n = exps->h; n; n = n->next) {
1319 sql_exp *arg = n->data, *narg = NULL;
1320
1321 narg = _exp_push_down(sql, arg, f, t);
1322 if (!narg)
1323 return NULL;
1324 narg = exp_propagate(sql->sa, narg, arg);
1325 append(nl, narg);
1326 }
1327 return nl;
1328}
1329
1330static sql_exp *
1331_exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
1332{
1333 int flag = e->flag;
1334 sql_exp *ne = NULL, *l, *r, *r2;
1335
1336 switch(e->type) {
1337 case e_column:
1338 if (is_union(f->op)) {
1339 int p = list_position(f->exps, rel_find_exp(f, e));
1340
1341 return list_fetch(t->exps, p);
1342 }
1343 if (e->l) {
1344 ne = rel_bind_column2(sql, f, e->l, e->r, 0);
1345 /* if relation name matches expressions relation name, find column based on column name alone */
1346 }
1347 if (!ne && !e->l)
1348 ne = rel_bind_column(sql, f, e->r, 0);
1349 if (!ne || ne->type != e_column)
1350 return NULL;
1351 e = NULL;
1352 /*
1353 if (exp_name(ne) && exp_relname(ne))
1354 e = rel_bind_column2(sql, t, exp_relname(ne), exp_name(ne), 0);
1355 if (!e && exp_name(ne) && !exp_relname(ne))
1356 e = rel_bind_column(sql, t, exp_name(ne), 0);
1357 if (!e && exp_name(ne) && ne->r && ne->l)
1358 e = rel_bind_column2(sql, t, ne->l, ne->r, 0);
1359 if (!e && ne->r && !ne->l)
1360 e = rel_bind_column(sql, t, ne->r, 0);
1361 */
1362 if (ne->l && ne->r)
1363 e = rel_bind_column2(sql, t, ne->l, ne->r, 0);
1364 if (!e && ne->r && !ne->l)
1365 e = rel_bind_column(sql, t, ne->r, 0);
1366 sql->session->status = 0;
1367 sql->errstr[0] = 0;
1368 if (e && flag)
1369 e->flag = flag;
1370 /* if the upper exp was an alias, keep this */
1371 if (e && exp_relname(ne))
1372 exp_setname(sql->sa, e, exp_relname(ne), exp_name(ne));
1373 return e;
1374 case e_cmp:
1375 if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) {
1376 list *l, *r;
1377
1378 l = exps_push_down(sql, e->l, f, t);
1379 if (!l)
1380 return NULL;
1381 r = exps_push_down(sql, e->r, f, t);
1382 if (!r)
1383 return NULL;
1384 if (get_cmp(e) == cmp_filter)
1385 return exp_filter(sql->sa, l, r, e->f, is_anti(e));
1386 return exp_or(sql->sa, l, r, is_anti(e));
1387 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
1388 list *r;
1389
1390 l = _exp_push_down(sql, e->l, f, t);
1391 if (!l)
1392 return NULL;
1393 r = exps_push_down(sql, e->r, f, t);
1394 if (!r)
1395 return NULL;
1396 return exp_in(sql->sa, l, r, e->flag);
1397 } else {
1398 l = _exp_push_down(sql, e->l, f, t);
1399 if (!l)
1400 return NULL;
1401 r = _exp_push_down(sql, e->r, f, t);
1402 if (!r)
1403 return NULL;
1404 if (e->f) {
1405 r2 = _exp_push_down(sql, e->f, f, t);
1406 if (l && r && r2)
1407 ne = exp_compare2(sql->sa, l, r, r2, e->flag);
1408 } else if (l && r) {
1409 if (l->card < r->card)
1410 ne = exp_compare(sql->sa, r, l, swap_compare((comp_type)e->flag));
1411 else
1412 ne = exp_compare(sql->sa, l, r, e->flag);
1413 }
1414 }
1415 if (!ne)
1416 return NULL;
1417 return exp_propagate(sql->sa, ne, e);
1418 case e_convert:
1419 l = _exp_push_down(sql, e->l, f, t);
1420 if (l)
1421 return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e));
1422 return NULL;
1423 case e_aggr:
1424 case e_func: {
1425 list *l = e->l, *nl = NULL;
1426
1427 if (!l) {
1428 return e;
1429 } else {
1430 nl = exps_push_down(sql, l, f, t);
1431 if (!nl)
1432 return NULL;
1433 }
1434 if (e->type == e_func)
1435 return exp_op(sql->sa, nl, e->f);
1436 else
1437 return exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
1438 }
1439 case e_atom:
1440 case e_psm:
1441 return e;
1442 }
1443 return NULL;
1444}
1445
1446static sql_exp *
1447exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
1448{
1449 return _exp_push_down(sql, e, f, t);
1450}
1451
1452/* some projections results are order dependend (row_number etc) */
1453static int
1454project_unsafe(sql_rel *rel, int allow_identity)
1455{
1456 sql_rel *sub = rel->l;
1457 node *n;
1458
1459 if (need_distinct(rel) || rel->r /* order by */)
1460 return 1;
1461 if (!rel->exps)
1462 return 0;
1463 /* projects without sub and projects around ddl's cannot be changed */
1464 if (!sub || (sub && sub->op == op_ddl))
1465 return 1;
1466 for(n = rel->exps->h; n; n = n->next) {
1467 sql_exp *e = n->data;
1468
1469 /* aggr func in project ! */
1470 if (exp_unsafe(e, allow_identity))
1471 return 1;
1472 }
1473 return 0;
1474}
1475
1476static int
1477math_unsafe(sql_subfunc *f)
1478{
1479 if (!f->func->s) {
1480 if (strcmp(f->func->base.name, "sql_div") == 0 ||
1481 strcmp(f->func->base.name, "sqrt") == 0 ||
1482 strcmp(f->func->base.name, "atan") == 0 )
1483 return 1;
1484 }
1485 return 0;
1486}
1487
1488static int
1489can_push_func(sql_exp *e, sql_rel *rel, int *must)
1490{
1491 if (!e)
1492 return 0;
1493 switch(e->type) {
1494 case e_cmp: {
1495 int mustl = 0, mustr = 0, mustf = 0;
1496 sql_exp *l = e->l, *r = e->r, *f = e->f;
1497
1498 if (get_cmp(e) == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter)
1499 return 0;
1500 return ((l->type == e_column || can_push_func(l, rel, &mustl)) && (*must = mustl)) ||
1501 (!f && (r->type == e_column || can_push_func(r, rel, &mustr)) && (*must = mustr)) ||
1502 (f &&
1503 (r->type == e_column || can_push_func(r, rel, &mustr)) &&
1504 (f->type == e_column || can_push_func(f, rel, &mustf)) && (*must = (mustr || mustf)));
1505 }
1506 case e_convert:
1507 return can_push_func(e->l, rel, must);
1508 case e_func: {
1509 list *l = e->l;
1510 node *n;
1511 int res = 1, lmust = 0;
1512
1513 if (e->f){
1514 sql_subfunc *f = e->f;
1515 if (math_unsafe(f) || f->func->type != F_FUNC)
1516 return 0;
1517 }
1518 if (l) for (n = l->h; n && res; n = n->next)
1519 res &= can_push_func(n->data, rel, &lmust);
1520 if (res && !lmust)
1521 return 1;
1522 (*must) |= lmust;
1523 return res;
1524 }
1525 case e_column:
1526 if (rel && !rel_find_exp(rel, e))
1527 return 0;
1528 (*must) = 1;
1529 /* fall through */
1530 case e_atom:
1531 default:
1532 return 1;
1533 }
1534}
1535
1536static int
1537exps_can_push_func(list *exps, sql_rel *rel)
1538{
1539 node *n;
1540
1541 for(n = exps->h; n; n = n->next) {
1542 sql_exp *e = n->data;
1543 int must = 0, mustl = 0, mustr = 0;
1544
1545 if (is_joinop(rel->op) && ((can_push_func(e, rel->l, &mustl) && mustl) || (can_push_func(e, rel->r, &mustr) && mustr)))
1546 return 1;
1547 else if (is_select(rel->op) && can_push_func(e, NULL, &must) && must)
1548 return 1;
1549 }
1550 return 0;
1551}
1552
1553static int
1554exp_needs_push_down(sql_exp *e)
1555{
1556 if (!e)
1557 return 0;
1558 switch(e->type) {
1559 case e_cmp:
1560 if (get_cmp(e) == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter)
1561 return 0;
1562 return exp_needs_push_down(e->l) || exp_needs_push_down(e->r) || (e->f && exp_needs_push_down(e->f));
1563 case e_convert:
1564 return exp_needs_push_down(e->l);
1565 case e_aggr:
1566 case e_func:
1567 return 1;
1568 case e_column:
1569 case e_atom:
1570 default:
1571 return 0;
1572 }
1573}
1574
1575static int
1576exps_need_push_down( list *exps )
1577{
1578 node *n;
1579 for(n = exps->h; n; n = n->next)
1580 if (exp_needs_push_down(n->data))
1581 return 1;
1582 return 0;
1583}
1584
1585static sql_rel *
1586rel_push_func_down(int *changes, mvc *sql, sql_rel *rel)
1587{
1588 if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) {
1589 list *exps = rel->exps;
1590
1591 if (is_select(rel->op) && list_length(rel->exps) <= 1) /* only push down when thats useful */
1592 return rel;
1593 if (exps_can_push_func(exps, rel) && exps_need_push_down(exps)) {
1594 sql_rel *nrel;
1595 sql_rel *l = rel->l, *ol = l;
1596 sql_rel *r = rel->r, *or = r;
1597 node *n;
1598
1599 /* we need a full projection, group by's and unions cannot be extended
1600 * with more expressions */
1601 if (rel_is_ref(l))
1602 return rel;
1603 if (l->op != op_project) {
1604 if (is_subquery(l))
1605 return rel;
1606 rel->l = l = rel_project(sql->sa, l,
1607 rel_projections(sql, l, NULL, 1, 1));
1608 }
1609 if (is_joinop(rel->op) && rel_is_ref(r))
1610 return rel;
1611 if (is_joinop(rel->op) && r->op != op_project) {
1612 if (is_subquery(r))
1613 return rel;
1614 rel->r = r = rel_project(sql->sa, r,
1615 rel_projections(sql, r, NULL, 1, 1));
1616 }
1617 nrel = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1));
1618 for(n = exps->h; n; n = n->next) {
1619 sql_exp *e = n->data, *ne = NULL;
1620 int must = 0, mustl = 0, mustr = 0;
1621
1622 if (e->type == e_column)
1623 continue;
1624 if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl) && mustl) || (can_push_func(e, r, &mustr) && mustr))) ||
1625 (is_select(rel->op) && can_push_func(e, NULL, &must) && must)) {
1626 must = 0; mustl = 0; mustr = 0;
1627 if (e->type != e_cmp) { /* predicate */
1628 if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl) && mustl) || (can_push_func(e, r, &mustr) && mustr))) ||
1629 (is_select(rel->op) && can_push_func(e, NULL, &must) && must)) {
1630 exp_label(sql->sa, e, ++sql->label);
1631 if (mustr)
1632 append(r->exps, e);
1633 else
1634 append(l->exps, e);
1635 e = exp_ref(sql->sa, e);
1636 n->data = e;
1637 (*changes)++;
1638 }
1639 } else {
1640 ne = e->l;
1641 if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) ||
1642 (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) {
1643 exp_label(sql->sa, ne, ++sql->label);
1644 if (mustr)
1645 append(r->exps, ne);
1646 else
1647 append(l->exps, ne);
1648 ne = exp_ref(sql->sa, ne);
1649 (*changes)++;
1650 }
1651 e->l = ne;
1652
1653 must = 0; mustl = 0; mustr = 0;
1654 ne = e->r;
1655 if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) ||
1656 (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) {
1657 exp_label(sql->sa, ne, ++sql->label);
1658 if (mustr)
1659 append(r->exps, ne);
1660 else
1661 append(l->exps, ne);
1662 ne = exp_ref(sql->sa, ne);
1663 (*changes)++;
1664 }
1665 e->r = ne;
1666
1667 if (e->f) {
1668 must = 0; mustl = 0; mustr = 0;
1669 ne = e->f;
1670 if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) ||
1671 (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) {
1672 exp_label(sql->sa, ne, ++sql->label);
1673 if (mustr)
1674 append(r->exps, ne);
1675 else
1676 append(l->exps, ne);
1677 ne = exp_ref(sql->sa, ne);
1678 (*changes)++;
1679 }
1680 e->f = ne;
1681 }
1682 }
1683 }
1684 }
1685 if (*changes) {
1686 rel = nrel;
1687 } else {
1688 if (l != ol)
1689 rel->l = ol;
1690 if (is_joinop(rel->op) && r != or)
1691 rel->r = or;
1692 }
1693 }
1694 }
1695 if (rel->op == op_project && rel->l && rel->exps) {
1696 sql_rel *pl = rel->l;
1697
1698 if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) {
1699 node *n;
1700 sql_rel *l = pl->l, *r = pl->r;
1701 list *nexps;
1702
1703 if (l->op != op_project) {
1704 if (is_subquery(l))
1705 return rel;
1706 pl->l = l = rel_project(sql->sa, l,
1707 rel_projections(sql, l, NULL, 1, 1));
1708 }
1709 if (is_joinop(rel->op) && r->op != op_project) {
1710 if (is_subquery(r))
1711 return rel;
1712 pl->r = r = rel_project(sql->sa, r,
1713 rel_projections(sql, r, NULL, 1, 1));
1714 }
1715 nexps = new_exp_list(sql->sa);
1716 for ( n = rel->exps->h; n; n = n->next) {
1717 sql_exp *e = n->data;
1718 int mustl = 0, mustr = 0;
1719
1720 if ((can_push_func(e, l, &mustl) && mustl) ||
1721 (can_push_func(e, r, &mustr) && mustr)) {
1722 if (mustl)
1723 append(l->exps, e);
1724 else
1725 append(r->exps, e);
1726 } else
1727 append(nexps, e);
1728 }
1729 rel->exps = nexps;
1730 (*changes)++;
1731 }
1732 }
1733 return rel;
1734}
1735
1736
1737/*
1738 * Push Count inside crossjoin down, and multiply the results
1739 *
1740 * project ( project(
1741 * group by ( crossproduct (
1742 * crossproduct( project (
1743 * L, => group by (
1744 * R L
1745 * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ]
1746 * ) ),
1747 * ) [ NOT NULL ] project (
1748 * group by (
1749 * R
1750 * ) [ ] [ count NOT NULL ]
1751 * )
1752 * ) [ sql_mul(.., .. NOT NULL) ]
1753 * )
1754 */
1755static sql_rel *
1756rel_push_count_down(int *changes, mvc *sql, sql_rel *rel)
1757{
1758 sql_rel *r;
1759
1760 if (!is_groupby(rel->op))
1761 return rel;
1762
1763 r = rel->l;
1764
1765 if (is_groupby(rel->op) && !rel_is_ref(rel) &&
1766 r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
1767 /* currently only single count aggregation is handled, no other projects or aggregation */
1768 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
1769 sql_exp *nce, *oce;
1770 sql_rel *gbl, *gbr; /* Group By */
1771 sql_rel *cp; /* Cross Product */
1772 sql_subfunc *mult;
1773 list *args;
1774 const char *rname = NULL, *name = NULL;
1775 sql_rel *srel;
1776
1777 oce = rel->exps->h->data;
1778 if (oce->l) /* we only handle COUNT(*) */
1779 return rel;
1780 rname = exp_relname(oce);
1781 name = exp_name(oce);
1782
1783 args = new_exp_list(sql->sa);
1784 srel = r->l;
1785 {
1786 sql_subaggr *cf = sql_bind_aggr(sql->sa, sql->session->schema, "count", NULL);
1787 sql_exp *cnt, *e = exp_aggr(sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
1788
1789 exp_label(sql->sa, e, ++sql->label);
1790 cnt = exp_ref(sql->sa, e);
1791 gbl = rel_groupby(sql, rel_dup(srel), NULL);
1792 rel_groupby_add_aggr(sql, gbl, e);
1793 append(args, cnt);
1794 }
1795
1796 srel = r->r;
1797 {
1798 sql_subaggr *cf = sql_bind_aggr(sql->sa, sql->session->schema, "count", NULL);
1799 sql_exp *cnt, *e = exp_aggr(sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0);
1800
1801 exp_label(sql->sa, e, ++sql->label);
1802 cnt = exp_ref(sql->sa, e);
1803 gbr = rel_groupby(sql, rel_dup(srel), NULL);
1804 rel_groupby_add_aggr(sql, gbr, e);
1805 append(args, cnt);
1806 }
1807
1808 mult = find_func(sql, "sql_mul", args);
1809 cp = rel_crossproduct(sql->sa, gbl, gbr, op_join);
1810
1811 nce = exp_op(sql->sa, args, mult);
1812 exp_setname(sql->sa, nce, rname, name );
1813
1814 rel_destroy(rel);
1815 rel = rel_project(sql->sa, cp, append(new_exp_list(sql->sa), nce));
1816
1817 (*changes)++;
1818 }
1819
1820 return rel;
1821}
1822
1823
1824static sql_rel *
1825rel_simplify_project_fk_join(int *changes, mvc *sql, sql_rel *r, list *pexps)
1826{
1827 sql_rel *rl = r->l;
1828 sql_rel *rr = r->r;
1829 sql_exp *je;
1830 node *n;
1831 int fk_left = 1;
1832
1833 /* check for foreign key join */
1834 if (!r->exps || list_length(r->exps) != 1)
1835 return r;
1836 je = r->exps->h->data;
1837 if (je && !find_prop(je->p, PROP_JOINIDX))
1838 return r;
1839 /* je->l == foreign expression, je->r == primary expression */
1840 if (rel_find_exp(r->l, je->l)) {
1841 fk_left = 1;
1842 } else if (rel_find_exp(r->r, je->l)) {
1843 fk_left = 0;
1844 } else { /* not found */
1845 return r;
1846 }
1847
1848 (void)sql;
1849#if 0
1850 if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
1851 rl = rel_simplify_project_fk_join(changes, sql, rl, pexps);
1852 r->l = rl;
1853 }
1854 if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
1855 rr = rel_simplify_project_fk_join(changes, sql, rr, pexps);
1856 r->r = rr;
1857 }
1858#endif
1859 /* primary side must be a full table */
1860 if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
1861 (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
1862 return r;
1863
1864 /* projection columns from the foreign side */
1865 for (n = pexps->h; n; n = n->next) {
1866 sql_exp *pe = n->data;
1867
1868 if (pe && is_atom(pe->type))
1869 continue;
1870 if (pe && !is_alias(pe->type))
1871 return r;
1872 /* check for columns from the pk side, then keep the join with the pk */
1873 if ((fk_left && rel_find_exp(r->r, pe)) ||
1874 (!fk_left && rel_find_exp(r->l, pe)))
1875 return r;
1876 }
1877
1878 (*changes)++;
1879 /* rewrite, ie remove pkey side */
1880 if (fk_left)
1881 return r->l;
1882 return r->r;
1883}
1884
1885static sql_rel *
1886rel_simplify_count_fk_join(int *changes, mvc *sql, sql_rel *r, list *gexps)
1887{
1888 sql_rel *rl = r->l;
1889 sql_rel *rr = r->r;
1890 sql_exp *oce, *je;
1891 int fk_left = 1;
1892
1893 /* check for foreign key join */
1894 if (!r->exps || list_length(r->exps) != 1)
1895 return r;
1896 je = r->exps->h->data;
1897 if (je && !find_prop(je->p, PROP_JOINIDX))
1898 return r;
1899 /* je->l == foreign expression, je->r == primary expression */
1900 if (rel_find_exp(r->l, je->l)) {
1901 fk_left = 1;
1902 } else if (rel_find_exp(r->r, je->l)) {
1903 fk_left = 0;
1904 } else { /* not found */
1905 return r;
1906 }
1907
1908 oce = gexps->h->data;
1909 if (oce->l) /* we only handle COUNT(*) */
1910 return r;
1911
1912 if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) {
1913 rl = rel_simplify_count_fk_join(changes, sql, rl, gexps);
1914 r->l = rl;
1915 }
1916 if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) {
1917 rr = rel_simplify_count_fk_join(changes, sql, rr, gexps);
1918 r->r = rr;
1919 }
1920 /* primary side must be a full table */
1921 if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) ||
1922 (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op)))
1923 return r;
1924
1925 (*changes)++;
1926 /* rewrite, ie remove pkey side */
1927 if (fk_left)
1928 return r->l;
1929 return r->r;
1930}
1931
1932/*
1933 * Handle (left/right/outer/natural) join fk-pk rewrites
1934 * 1 group by ( fk-pk-join () ) [ count(*) ] -> groub py ( fk )
1935 * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ]
1936 * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ]
1937 */
1938static sql_rel *
1939rel_simplify_fk_joins(int *changes, mvc *sql, sql_rel *rel)
1940{
1941 sql_rel *r = NULL;
1942
1943 if (rel->op == op_project)
1944 r = rel->l;
1945
1946 while (rel->op == op_project && r && r->exps && list_length(r->exps) == 1 && is_join(r->op) && !(rel_is_ref(r))) {
1947 sql_rel *or = r;
1948
1949 r = rel_simplify_project_fk_join(changes, sql, r, rel->exps);
1950 if (r == or)
1951 return rel;
1952 rel->l = r;
1953 }
1954
1955 (void)sql;
1956 if (!is_groupby(rel->op))
1957 return rel;
1958
1959 r = rel->l;
1960 while(r && r->op == op_project)
1961 r = r->l;
1962
1963 while (is_groupby(rel->op) && !rel_is_ref(rel) &&
1964 r && r->exps && is_join(r->op) && list_length(r->exps) == 1 && !(rel_is_ref(r)) &&
1965 /* currently only single count aggregation is handled, no other projects or aggregation */
1966 list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) {
1967 sql_rel *or = r;
1968
1969 r = rel_simplify_count_fk_join(changes, sql, r, rel->exps);
1970 if (r == or)
1971 return rel;
1972 rel->l = r;
1973 }
1974 return rel;
1975}
1976
1977/*
1978 * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e.,
1979 *
1980 * topn( topn(
1981 * project( project(
1982 * crossproduct( crossproduct(
1983 * L, => topn( L )[ n ],
1984 * R topn( R )[ n ]
1985 * ) )
1986 * )[ Cs ]* )[ Cs ]*
1987 * )[ n ] )[ n ]
1988 *
1989 * (TODO: in case of n==1 we can omit the original top-level TopN)
1990 *
1991 * also push topn under (non reordering) projections.
1992 */
1993
1994static list *
1995sum_limit_offset(mvc *sql, list *exps )
1996{
1997 list *nexps = new_exp_list(sql->sa);
1998 sql_subtype *lng = sql_bind_localtype("lng");
1999 sql_subfunc *add;
2000
2001 /* if the expression list only consists of a limit expression,
2002 * we copy it */
2003 if (list_length(exps) == 1 && exps->h->data)
2004 return append(nexps, exps->h->data);
2005 add = sql_bind_func_result(sql->sa, sql->session->schema, "sql_add", lng, lng, lng);
2006 return append(nexps, exp_op(sql->sa, exps, add));
2007}
2008
2009static int
2010topn_save_exps( list *exps )
2011{
2012 node *n;
2013
2014 /* Limit only expression lists are always save */
2015 if (list_length(exps) == 1)
2016 return 1;
2017 for (n = exps->h; n; n = n->next ) {
2018 sql_exp *e = n->data;
2019
2020 if (!e || e->type != e_atom)
2021 return 0;
2022 }
2023 return 1;
2024}
2025
2026static void
2027rel_no_rename_exps( list *exps )
2028{
2029 node *n;
2030
2031 for (n = exps->h; n; n = n->next) {
2032 sql_exp *e = n->data;
2033
2034 exp_setalias(e, e->l, e->r);
2035 }
2036}
2037
2038static void
2039rel_rename_exps( mvc *sql, list *exps1, list *exps2)
2040{
2041 int pos = 0;
2042 node *n, *m;
2043
2044 (void)sql;
2045 /* check if a column uses an alias earlier in the list */
2046 for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next, pos++) {
2047 sql_exp *e2 = m->data;
2048
2049 if (e2->type == e_column) {
2050 sql_exp *ne = NULL;
2051
2052 if (e2->l)
2053 ne = exps_bind_column2(exps2, e2->l, e2->r);
2054 if (!ne && !e2->l)
2055 ne = exps_bind_column(exps2, e2->r, NULL);
2056 if (ne) {
2057 int p = list_position(exps2, ne);
2058
2059 if (p < pos) {
2060 ne = list_fetch(exps1, p);
2061 if (e2->l)
2062 e2->l = (void *) exp_relname(ne);
2063 e2->r = (void *) exp_name(ne);
2064 }
2065 }
2066 }
2067 }
2068
2069 assert(list_length(exps1) <= list_length(exps2));
2070 for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next) {
2071 sql_exp *e1 = n->data;
2072 sql_exp *e2 = m->data;
2073 const char *rname = exp_relname(e1);
2074
2075 if (!rname && e1->type == e_column && e1->l && exp_relname(e2) &&
2076 strcmp(e1->l, exp_relname(e2)) == 0)
2077 rname = exp_relname(e2);
2078 exp_setalias(e2, rname, exp_name(e1));
2079 }
2080 MT_lock_set(&exps2->ht_lock);
2081 exps2->ht = NULL;
2082 MT_lock_unset(&exps2->ht_lock);
2083}
2084
2085static sql_rel *
2086rel_push_topn_down(int *changes, mvc *sql, sql_rel *rel)
2087{
2088 sql_rel *rl, *r = rel->l;
2089
2090 if (rel->op == op_topn && topn_save_exps(rel->exps)) {
2091 sql_rel *rp = NULL;
2092
2093 if (r && r->op == op_project && need_distinct(r))
2094 return rel;
2095 /* duplicate topn direct under union */
2096
2097 if (r && r->exps && r->op == op_union && !(rel_is_ref(r)) && r->l) {
2098 sql_rel *u = r, *x;
2099 sql_rel *ul = u->l;
2100 sql_rel *ur = u->r;
2101
2102 /* only push topn once */
2103 x = ul;
2104 while(x->op == op_project && x->l)
2105 x = x->l;
2106 if (x && x->op == op_topn)
2107 return rel;
2108 x = ur;
2109 while(x->op == op_project && x->l)
2110 x = x->l;
2111 if (x && x->op == op_topn)
2112 return rel;
2113
2114 ul = rel_topn(sql->sa, ul, sum_limit_offset(sql, rel->exps));
2115 ur = rel_topn(sql->sa, ur, sum_limit_offset(sql, rel->exps));
2116 u->l = ul;
2117 u->r = ur;
2118 (*changes)++;
2119 return rel;
2120 }
2121 /* duplicate topn + [ project-order ] under union */
2122 if (r)
2123 rp = r->l;
2124 if (r && r->exps && r->op == op_project && !(rel_is_ref(r)) && r->r && r->l &&
2125 rp->op == op_union) {
2126 sql_rel *u = rp, *ou = u, *x;
2127 sql_rel *ul = u->l;
2128 sql_rel *ur = u->r;
2129 int add_r = 0;
2130
2131 /* only push topn once */
2132 x = ul;
2133 while(x->op == op_project && x->l)
2134 x = x->l;
2135 if (x && x->op == op_topn)
2136 return rel;
2137 x = ur;
2138 while(x->op == op_project && x->l)
2139 x = x->l;
2140 if (x && x->op == op_topn)
2141 return rel;
2142
2143 if (list_length(ul->exps) > list_length(r->exps))
2144 add_r = 1;
2145 ul = rel_dup(ul);
2146 ur = rel_dup(ur);
2147 if (!is_project(ul->op))
2148 ul = rel_project(sql->sa, ul,
2149 rel_projections(sql, ul, NULL, 1, 1));
2150 if (!is_project(ur->op))
2151 ur = rel_project(sql->sa, ur,
2152 rel_projections(sql, ur, NULL, 1, 1));
2153 rel_rename_exps(sql, u->exps, ul->exps);
2154 rel_rename_exps(sql, u->exps, ur->exps);
2155
2156 /* introduce projects under the set */
2157 ul = rel_project(sql->sa, ul, NULL);
2158 ul->exps = exps_copy(sql, r->exps);
2159 /* possibly add order by column */
2160 if (add_r)
2161 ul->exps = list_merge(ul->exps, exps_copy(sql, r->r), NULL);
2162 ul->r = exps_copy(sql, r->r);
2163 ul = rel_topn(sql->sa, ul, sum_limit_offset(sql, rel->exps));
2164 ur = rel_project(sql->sa, ur, NULL);
2165 ur->exps = exps_copy(sql, r->exps);
2166 /* possibly add order by column */
2167 if (add_r)
2168 ur->exps = list_merge(ur->exps, exps_copy(sql, r->r), NULL);
2169 ur->r = exps_copy(sql, r->r);
2170 ur = rel_topn(sql->sa, ur, sum_limit_offset(sql, rel->exps));
2171 u = rel_setop(sql->sa, ul, ur, op_union);
2172 u->exps = exps_alias(sql->sa, r->exps);
2173 set_processed(u);
2174 /* possibly add order by column */
2175 if (add_r)
2176 u->exps = list_merge(u->exps, exps_copy(sql, r->r), NULL);
2177 if (need_distinct(r)) {
2178 set_distinct(ul);
2179 set_distinct(ur);
2180 }
2181
2182 /* zap names */
2183 rel_no_rename_exps(u->exps);
2184 rel_destroy(ou);
2185
2186 ur = rel_project(sql->sa, u, exps_alias(sql->sa, r->exps));
2187 ur->r = r->r;
2188 r->l = NULL;
2189
2190 if (need_distinct(r))
2191 set_distinct(ur);
2192
2193 rel_destroy(r);
2194 rel->l = ur;
2195 (*changes)++;
2196 return rel;
2197 }
2198
2199 /* pass through projections */
2200 while (r && is_project(r->op) && !need_distinct(r) &&
2201 !(rel_is_ref(r)) &&
2202 !r->r && (rl = r->l) != NULL && is_project(rl->op)) {
2203 /* ensure there is no order by */
2204 if (!r->r) {
2205 r = r->l;
2206 } else {
2207 r = NULL;
2208 }
2209 }
2210 if (r && r != rel && r->op == op_project && !(rel_is_ref(r)) && !r->r && r->l) {
2211 r = rel_topn(sql->sa, r, sum_limit_offset(sql, rel->exps));
2212 }
2213
2214 /* push topn under crossproduct */
2215 if (r && !r->exps && r->op == op_join && !(rel_is_ref(r)) &&
2216 ((sql_rel *)r->l)->op != op_topn && ((sql_rel *)r->r)->op != op_topn) {
2217 r->l = rel_topn(sql->sa, r->l, sum_limit_offset(sql, rel->exps));
2218 r->r = rel_topn(sql->sa, r->r, sum_limit_offset(sql, rel->exps));
2219 (*changes)++;
2220 return rel;
2221 }
2222/* TODO */
2223#if 0
2224 /* duplicate topn + [ project-order ] under join on independend always matching joins */
2225 if (r)
2226 rp = r->l;
2227 if (r && r->exps && r->op == op_project && !(rel_is_ref(r)) && r->r && r->l &&
2228 rp->op == op_join && rp->exps && rp->exps->h && ((prop*)((sql_exp*)rp->exps->h->data)->p)->kind == PROP_FETCH &&
2229 ((sql_rel *)rp->l)->op != op_topn && ((sql_rel *)rp->r)->op != op_topn) {
2230 /* TODO check if order by columns are independend of join conditions */
2231 r->l = rel_topn(sql->sa, r->l, sum_limit_offset(sql, rel->exps));
2232 r->r = rel_topn(sql->sa, r->r, sum_limit_offset(sql, rel->exps));
2233 (*changes)++;
2234 return rel;
2235 }
2236#endif
2237 }
2238 return rel;
2239}
2240
2241/* merge projection */
2242
2243/* push an expression through a projection.
2244 * The result should again used in a projection.
2245 */
2246static sql_exp *
2247exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t);
2248
2249static list *
2250exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t)
2251{
2252 node *n;
2253 list *nl = new_exp_list(sql->sa);
2254
2255 for(n = exps->h; n; n = n->next) {
2256 sql_exp *arg = n->data, *narg = NULL;
2257
2258 narg = exp_push_down_prj(sql, arg, f, t);
2259 if (!narg)
2260 return NULL;
2261 narg = exp_propagate(sql->sa, narg, arg);
2262 append(nl, narg);
2263 }
2264 return nl;
2265}
2266
2267static sql_exp *
2268exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t)
2269{
2270 sql_exp *ne = NULL, *l, *r, *r2;
2271
2272 assert(is_project(f->op));
2273
2274 switch(e->type) {
2275 case e_column:
2276 if (e->l)
2277 ne = exps_bind_column2(f->exps, e->l, e->r);
2278 if (!ne && !e->l)
2279 ne = exps_bind_column(f->exps, e->r, NULL);
2280 if (!ne || (ne->type != e_column && ne->type != e_atom))
2281 return NULL;
2282 while (ne && has_label(ne) && f->op == op_project && ne->type == e_column) {
2283 sql_exp *oe = e, *one = ne;
2284
2285 e = ne;
2286 ne = NULL;
2287 if (e->l)
2288 ne = exps_bind_column2(f->exps, e->l, e->r);
2289 if (!ne && !e->l)
2290 ne = exps_bind_column(f->exps, e->r, NULL);
2291 if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one))
2292 ne = NULL;
2293 if (!ne || ne == one) {
2294 ne = one;
2295 e = oe;
2296 break;
2297 }
2298 if (ne->type != e_column && ne->type != e_atom)
2299 return NULL;
2300 }
2301 /* possibly a groupby/project column is renamed */
2302 if (is_groupby(f->op) && f->r) {
2303 sql_exp *gbe = NULL;
2304 if (ne->l)
2305 gbe = exps_bind_column2(f->r, ne->l, ne->r);
2306 if (!gbe && !e->l)
2307 gbe = exps_bind_column(f->r, ne->r, NULL);
2308 ne = gbe;
2309 if (!ne || (ne->type != e_column && ne->type != e_atom))
2310 return NULL;
2311 }
2312 if (ne->type == e_atom)
2313 e = exp_copy(sql, ne);
2314 else
2315 e = exp_alias(sql->sa, exp_relname(e), exp_name(e), ne->l, ne->r, exp_subtype(e), e->card, has_nil(e), is_intern(e));
2316 return exp_propagate(sql->sa, e, ne);
2317 case e_cmp:
2318 if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) {
2319 list *l = exps_push_down_prj(sql, e->l, f, t);
2320 list *r = exps_push_down_prj(sql, e->r, f, t);
2321
2322 if (!l || !r)
2323 return NULL;
2324 if (get_cmp(e) == cmp_filter)
2325 return exp_filter(sql->sa, l, r, e->f, is_anti(e));
2326 return exp_or(sql->sa, l, r, is_anti(e));
2327 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
2328 sql_exp *l = exp_push_down_prj(sql, e->l, f, t);
2329 list *r = exps_push_down_prj(sql, e->r, f, t);
2330
2331 if (!l || !r)
2332 return NULL;
2333 return exp_in(sql->sa, l, r, e->flag);
2334 } else {
2335 l = exp_push_down_prj(sql, e->l, f, t);
2336 r = exp_push_down_prj(sql, e->r, f, t);
2337 if (e->f) {
2338 r2 = exp_push_down_prj(sql, e->f, f, t);
2339 if (l && r && r2)
2340 ne = exp_compare2(sql->sa, l, r, r2, e->flag);
2341 } else if (l && r) {
2342 ne = exp_compare(sql->sa, l, r, e->flag);
2343 }
2344 }
2345 if (!ne)
2346 return NULL;
2347 return exp_propagate(sql->sa, ne, e);
2348 case e_convert:
2349 l = exp_push_down_prj(sql, e->l, f, t);
2350 if (l)
2351 return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e));
2352 return NULL;
2353 case e_aggr:
2354 case e_func: {
2355 list *l = e->l, *nl = NULL;
2356 sql_exp *ne = NULL;
2357
2358 if (e->type == e_func && exp_unsafe(e,0))
2359 return NULL;
2360 if (!l) {
2361 return e;
2362 } else {
2363 nl = exps_push_down_prj(sql, l, f, t);
2364 if (!nl)
2365 return NULL;
2366 }
2367 if (e->type == e_func)
2368 ne = exp_op(sql->sa, nl, e->f);
2369 else
2370 ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
2371 return exp_propagate(sql->sa, ne, e);
2372 }
2373 case e_atom:
2374 case e_psm:
2375 if (e->type == e_atom && e->f) /* value list */
2376 return NULL;
2377 return e;
2378 }
2379 return NULL;
2380}
2381
2382static int
2383rel_is_unique( sql_rel *rel, sql_ukey *k)
2384{
2385 switch(rel->op) {
2386 case op_left:
2387 case op_right:
2388 case op_full:
2389 case op_join:
2390 return 0;
2391 case op_semi:
2392 case op_anti:
2393 return rel_is_unique(rel->l, k);
2394 case op_table:
2395 case op_basetable:
2396 return 1;
2397 default:
2398 return 0;
2399 }
2400}
2401
2402int
2403exps_unique(mvc *sql, sql_rel *rel, list *exps)
2404{
2405 node *n;
2406 char *matched = NULL;
2407 int nr = 0;
2408 sql_ukey *k = NULL;
2409
2410 if (list_empty(exps))
2411 return 0;
2412 for(n = exps->h; n && !k; n = n->next) {
2413 sql_exp *e = n->data;
2414 prop *p;
2415
2416 if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL)
2417 k = p->value;
2418 }
2419 if (!k || list_length(k->k.columns) > list_length(exps))
2420 return 0;
2421 if (rel) {
2422 matched = (char*)sa_alloc(sql->sa, list_length(k->k.columns));
2423 memset(matched, 0, list_length(k->k.columns));
2424 for(n = exps->h; n; n = n->next) {
2425 sql_exp *e = n->data;
2426 fcmp cmp = (fcmp)&kc_column_cmp;
2427 sql_column *c = exp_find_column(rel, e, -2);
2428 node *m;
2429
2430 if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) {
2431 int pos = list_position(k->k.columns, m->data);
2432 if (!matched[pos])
2433 nr++;
2434 matched[pos] = 1;
2435 }
2436 }
2437 if (nr == list_length(k->k.columns)) {
2438 return rel_is_unique(rel, k);
2439 }
2440 }
2441 /*
2442 if ((n = exps->h) != NULL) {
2443 sql_exp *e = n->data;
2444 prop *p;
2445
2446 if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) {
2447 sql_ukey *k = p->value;
2448 if (k && list_length(k->k.columns) <= 1)
2449 return 1;
2450 }
2451 }
2452 */
2453 return 0;
2454}
2455
2456static int
2457rel_is_join_on_pkey( sql_rel *rel )
2458{
2459 node *n;
2460
2461 if (!rel || !rel->exps)
2462 return 0;
2463 for (n = rel->exps->h; n; n = n->next){
2464 sql_exp *je = n->data;
2465
2466 if (je->type == e_cmp && je->flag == cmp_equal &&
2467 find_prop(((sql_exp*)je->l)->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */
2468 fcmp cmp = (fcmp)&kc_column_cmp;
2469 sql_exp *e = je->l;
2470 sql_column *c = exp_find_column(rel, e, -2);
2471
2472 if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL)
2473 return 1;
2474 }
2475 }
2476 return 0;
2477}
2478
2479/* if all arguments to a distinct aggregate are unique, remove 'distinct' property */
2480static sql_rel *
2481rel_distinct_aggregate_on_unique_values(int *changes, mvc *sql, sql_rel *rel)
2482{
2483 sql_rel *l = (sql_rel*) rel->l;
2484
2485 (void) sql;
2486 if (rel->op == op_groupby && (!l || is_base(l->op))) {
2487 for (node *n = rel->exps->h; n; n = n->next) {
2488 sql_exp *exp = (sql_exp*) n->data;
2489
2490 if (exp->type == e_aggr && need_distinct(exp)) {
2491 bool all_unique = true;
2492
2493 for (node *m = ((list*)exp->l)->h; m && all_unique; m = m->next) {
2494 sql_exp *arg = (sql_exp*) m->data;
2495
2496 if (arg->card == CARD_ATOM) /* constants are always unique */
2497 continue;
2498 else if (arg->type == e_column) {
2499 fcmp cmp = (fcmp)&kc_column_cmp;
2500 sql_column *c = exp_find_column(rel, arg, -2);
2501
2502 if (c) {
2503 /* column is the only primary key column of its table */
2504 if (find_prop(arg->p, PROP_HASHCOL) && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL && list_length(c->t->pkey->k.columns) == 1)
2505 continue;
2506 else if (c->unique == 1) /* column has unique constraint */
2507 continue;
2508 else
2509 all_unique = false;
2510 } else
2511 all_unique = false;
2512 } else
2513 all_unique = false;
2514 }
2515 if (all_unique) {
2516 set_nodistinct(exp);
2517 *changes = 1;
2518 }
2519 }
2520 }
2521 }
2522 return rel;
2523}
2524
2525static sql_rel *
2526rel_distinct_project2groupby(int *changes, mvc *sql, sql_rel *rel)
2527{
2528 sql_rel *l = rel->l;
2529
2530 /* rewrite distinct project (table) [ constant ] -> project [ constant ] */
2531 if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
2532 exps_card(rel->exps) <= CARD_ATOM) {
2533 set_nodistinct(rel);
2534 rel->l = rel_topn(sql->sa, rel->l, append(sa_list(sql->sa), exp_atom_lng(sql->sa, 1)));
2535 }
2536
2537 /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
2538 * into project [ pk ] ( select/semijoin ( table ) */
2539 if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
2540 (l->op == op_select || l->op == op_semi) && exps_unique(sql, rel, rel->exps))
2541 set_nodistinct(rel);
2542
2543 /* rewrite distinct project ( join(p,f) [ p.pk = f.fk] ) [ p.pk ] ->
2544 * project(p)[p.pk]
2545 */
2546 if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) &&
2547 l && l->op == op_join && rel_is_join_on_pkey(l) /* [ pk == fk ] */) {
2548 sql_rel *j = l;
2549 sql_rel *p = j->l;
2550 sql_exp *je = l->exps->h->data, *le = je->l;
2551
2552 if (exps_find_exp(rel->exps, le)) { /* rel must have the same primary key on the projection list */
2553 int pside = (rel_find_exp(p, le) != NULL)?1:0;
2554
2555 p = (pside)?j->l:j->r;
2556 rel->l = rel_dup(p);
2557 rel_destroy(j);
2558 *changes = 1;
2559 set_nodistinct(rel);
2560 return rel;
2561 }
2562 }
2563 /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ])
2564 * into project [ gbe ] ( select ( group etc ) */
2565 if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
2566 need_distinct(rel) && l->op == op_select){
2567 sql_rel *g = l->l;
2568 if (is_groupby(g->op)) {
2569 list *used = sa_list(sql->sa);
2570 list *gbe = g->r;
2571 node *n;
2572 int fnd = 1;
2573
2574 for (n = rel->exps->h; n && fnd; n = n->next) {
2575 sql_exp *e = n->data;
2576
2577 if (e->card > CARD_ATOM) {
2578 /* find e in gbe */
2579 sql_exp *ne = list_find_exp(g->exps, e);
2580
2581 if (ne)
2582 ne = list_find_exp( gbe, ne);
2583 if (ne && !list_find_exp(used, ne)) {
2584 fnd++;
2585 list_append(used, ne);
2586 }
2587 if (!ne)
2588 fnd = 0;
2589 }
2590 }
2591 if (fnd == (list_length(gbe)+1))
2592 set_nodistinct(rel);
2593 }
2594 }
2595 if (rel->op == op_project && rel->l &&
2596 need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) {
2597 node *n;
2598 list *exps = new_exp_list(sql->sa), *gbe = new_exp_list(sql->sa);
2599 list *obe = rel->r; /* we need to read the ordering later */
2600
2601 if (obe) {
2602 int fnd = 0;
2603
2604 for(n = obe->h; n && !fnd; n = n->next) {
2605 sql_exp *e = n->data;
2606
2607 if (e->type != e_column)
2608 fnd = 1;
2609 else if (exps_bind_column2(rel->exps, e->l, e->r) == 0)
2610 fnd = 1;
2611 }
2612 if (fnd)
2613 return rel;
2614 }
2615 rel->l = rel_project(sql->sa, rel->l, rel->exps);
2616
2617 for (n = rel->exps->h; n; n = n->next) {
2618 sql_exp *e = n->data, *ne;
2619
2620 if (!exp_name(e))
2621 exp_label(sql->sa, e, ++sql->label);
2622 ne = exp_ref(sql->sa, e);
2623 if (e->card > CARD_ATOM) { /* no need to group by on constants */
2624 append(gbe, ne);
2625 }
2626 append(exps, ne);
2627 }
2628 rel->op = op_groupby;
2629 rel->exps = exps;
2630 rel->r = gbe;
2631 set_nodistinct(rel);
2632 if (obe) {
2633 /* add order again */
2634 rel = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1));
2635 rel->r = obe;
2636 }
2637 *changes = 1;
2638 }
2639 return rel;
2640}
2641
2642static int
2643exp_shares_exps( sql_exp *e, list *shared, lng *uses)
2644{
2645 switch(e->type) {
2646 case e_cmp: /* not in projection list */
2647 case e_psm:
2648 assert(0);
2649 case e_atom:
2650 return 0;
2651 case e_column:
2652 {
2653 sql_exp *ne = NULL;
2654 if (e->l)
2655 ne = exps_bind_column2(shared, e->l, e->r);
2656 if (!ne && !e->l)
2657 ne = exps_bind_column(shared, e->r, NULL);
2658 if (!ne)
2659 return 0;
2660 if (ne && ne->type != e_column) {
2661 lng used = (lng) 1 << list_position(shared, ne);
2662 if (used & *uses)
2663 return 1;
2664 *uses &= used;
2665 return 0;
2666 }
2667 if (ne && ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne)))
2668 /* maybe ne refers to a local complex exp */
2669 return exp_shares_exps( ne, shared, uses);
2670 return 0;
2671 }
2672 case e_convert:
2673 return exp_shares_exps(e->l, shared, uses);
2674
2675 case e_aggr:
2676 case e_func:
2677 {
2678 list *l = e->l;
2679 node *n;
2680
2681 if (!l)
2682 return 0;
2683 for (n = l->h; n; n = n->next) {
2684 sql_exp *e = n->data;
2685
2686 if (exp_shares_exps( e, shared, uses))
2687 return 1;
2688 }
2689 }
2690 }
2691 return 0;
2692}
2693
2694static int
2695exps_share_expensive_exp( list *exps, list *shared )
2696{
2697 node *n;
2698 lng uses = 0;
2699
2700 if (!exps || !shared)
2701 return 0;
2702 for (n = exps->h; n; n = n->next){
2703 sql_exp *e = n->data;
2704
2705 if (exp_shares_exps( e, shared, &uses))
2706 return 1;
2707 }
2708 return 0;
2709}
2710
2711static int ambigious_ref( list *exps, sql_exp *e);
2712static int
2713ambigious_refs( list *exps, list *refs)
2714{
2715 node *n;
2716
2717 if (!refs)
2718 return 0;
2719 for(n=refs->h; n; n = n->next) {
2720 if (ambigious_ref(exps, n->data))
2721 return 1;
2722 }
2723 return 0;
2724}
2725
2726static int
2727ambigious_ref( list *exps, sql_exp *e)
2728{
2729 sql_exp *ne = NULL;
2730
2731 if (e->type == e_column) {
2732 if (e->l)
2733 ne = exps_bind_column2(exps, e->l, e->r);
2734 if (!ne && !e->l)
2735 ne = exps_bind_column(exps, e->r, NULL);
2736 if (ne && e != ne)
2737 return 1;
2738 }
2739 if (e->type == e_func)
2740 return ambigious_refs(exps, e->l);
2741 return 0;
2742}
2743
2744/* merge 2 projects into the lower one */
2745static sql_rel *
2746rel_merge_projects(int *changes, mvc *sql, sql_rel *rel)
2747{
2748 list *exps = rel->exps;
2749 sql_rel *prj = rel->l;
2750 node *n;
2751
2752 if (rel->op == op_project &&
2753 prj && prj->op == op_project && !(rel_is_ref(prj)) && !prj->r) {
2754 int all = 1;
2755
2756 if (project_unsafe(rel,0) || project_unsafe(prj,0) || exps_share_expensive_exp(rel->exps, prj->exps))
2757 return rel;
2758
2759 /* here we need to fix aliases */
2760 rel->exps = new_exp_list(sql->sa);
2761
2762 /* for each exp check if we can rename it */
2763 for (n = exps->h; n && all; n = n->next) {
2764 sql_exp *e = n->data, *ne = NULL;
2765
2766 /* We do not handle expressions pointing back in the list */
2767 if (ambigious_ref(exps, e)) {
2768 all = 0;
2769 break;
2770 }
2771 ne = exp_push_down_prj(sql, e, prj, prj->l);
2772 /* check if the refered alias name isn't used twice */
2773 if (ne && ambigious_ref(rel->exps, ne)) {
2774 all = 0;
2775 break;
2776 }
2777 /*
2778 if (ne && ne->type == e_column) {
2779 sql_exp *nne = NULL;
2780
2781 if (ne->l)
2782 nne = exps_bind_column2(rel->exps, ne->l, ne->r);
2783 if (!nne && !ne->l)
2784 nne = exps_bind_column(rel->exps, ne->r, NULL);
2785 if (nne && ne != nne && nne != e) {
2786 all = 0;
2787 break;
2788 }
2789 }
2790 */
2791 if (ne) {
2792 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
2793 list_append(rel->exps, ne);
2794 } else {
2795 all = 0;
2796 }
2797 }
2798 if (all) {
2799 /* we can now remove the intermediate project */
2800 /* push order by expressions */
2801 if (rel->r) {
2802 list *nr = new_exp_list(sql->sa), *res = rel->r;
2803 for (n = res->h; n; n = n->next) {
2804 sql_exp *e = n->data, *ne = NULL;
2805
2806 ne = exp_push_down_prj(sql, e, prj, prj->l);
2807 if (ne) {
2808 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
2809 list_append(nr, ne);
2810 } else {
2811 all = 0;
2812 }
2813 }
2814 if (all) {
2815 rel->r = nr;
2816 } else {
2817 /* leave as is */
2818 rel->exps = exps;
2819 return rel;
2820 }
2821 }
2822 rel->l = prj->l;
2823 prj->l = NULL;
2824 rel_destroy(prj);
2825 (*changes)++;
2826 return rel_merge_projects(changes, sql, rel);
2827 } else {
2828 /* leave as is */
2829 rel->exps = exps;
2830 }
2831 return rel;
2832 }
2833 return rel;
2834}
2835
2836static sql_subfunc *
2837find_func( mvc *sql, char *name, list *exps )
2838{
2839 list * l = new_func_list(sql->sa);
2840 node *n;
2841
2842 for(n = exps->h; n; n = n->next)
2843 append(l, exp_subtype(n->data));
2844 return sql_bind_func_(sql->sa, sql->session->schema, name, l, F_FUNC);
2845}
2846
2847static sql_exp * exp_case_fixup( mvc *sql, sql_rel *rel, sql_exp *e, sql_exp *cc );
2848
2849static list *
2850exps_case_fixup( mvc *sql, list *exps, sql_exp *cond )
2851{
2852 node *n;
2853
2854 if (exps) {
2855 list *nexps = new_exp_list(sql->sa);
2856 for( n = exps->h; n; n = n->next) {
2857 sql_exp *e = n->data;
2858
2859 e = exp_case_fixup(sql, NULL, e, cond);
2860 append(nexps, e);
2861 }
2862 return nexps;
2863 }
2864 return exps;
2865}
2866
2867static sql_exp *
2868exp_case_fixup( mvc *sql, sql_rel *rel, sql_exp *e, sql_exp *cc )
2869{
2870 /* only functions need fix up */
2871 if (e->type == e_psm) {
2872 if (e->flag & PSM_SET) {
2873 /* todo */
2874 } else if (e->flag & PSM_VAR) {
2875 /* todo */
2876 } else if (e->flag & PSM_RETURN) {
2877 e->l = exp_case_fixup(sql, rel, e->l, cc);
2878 } else if (e->flag & PSM_WHILE) {
2879 e->l = exp_case_fixup(sql, rel, e->l, cc);
2880 e->r = exps_case_fixup(sql, e->r, cc);
2881 } else if (e->flag & PSM_IF) {
2882 e->l = exp_case_fixup(sql, rel, e->l, cc);
2883 e->r = exps_case_fixup(sql, e->r, cc);
2884 if (e->f)
2885 e->f = exps_case_fixup(sql, e->f, cc);
2886 } else if (e->flag & PSM_REL || e->flag & PSM_EXCEPTION) {
2887 }
2888 return e;
2889 }
2890 if (e->type == e_func && e->l && !is_analytic(e) ) {
2891 list *l = new_exp_list(sql->sa), *args = e->l;
2892 node *n;
2893 sql_exp *ne = e;
2894 sql_subfunc *f = e->f;
2895
2896 /* first fixup arguments */
2897 if (f->func->s || strcmp(f->func->base.name, "ifthenelse")) {
2898 for (n=args->h; n; n=n->next) {
2899 sql_exp *a = exp_case_fixup(sql, rel, n->data, cc);
2900 list_append(l, a);
2901 }
2902 ne = exp_op(sql->sa, l, f);
2903 exp_prop_alias(sql->sa, ne, e);
2904 if (cc && math_unsafe(f)) {
2905 /* only add one condition */
2906 assert(f->func->varres || f->func->vararg || list_length(ne->l) == list_length(f->func->ops));
2907 append(ne->l, cc);
2908 }
2909 } else {
2910 /* ifthenelse with one of the sides an 'sql_div' */
2911 sql_exp *cond = args->h->data, *nne, *ncond = NULL;
2912 sql_exp *a1 = args->h->next->data;
2913 sql_exp *a2 = args->h->next->next->data;
2914
2915 cond = exp_case_fixup(sql, rel, cond, cc);
2916 if (rel) {
2917 exp_label(sql->sa, cond, ++sql->label);
2918 append(rel->exps, cond);
2919 cond = exp_column(sql->sa, exp_find_rel_name(cond), exp_name(cond), exp_subtype(cond), cond->card, has_nil(cond), is_intern(cond));
2920 }
2921 /* rewrite right hands of div */
2922 ncond = cond;
2923 if (cc) {
2924 sql_subtype *t = exp_subtype(cc);
2925 sql_subfunc *f = sql_bind_func(sql->sa, NULL, "and", t, t, F_FUNC);
2926
2927 ncond = exp_binop(sql->sa, cc, ncond, f);
2928 }
2929 a1 = exp_case_fixup(sql, rel, a1, ncond);
2930 if (1){
2931 sql_subtype *t = exp_subtype(cond);
2932 sql_subfunc *f = sql_bind_func(sql->sa, NULL, "not", t, NULL, F_FUNC);
2933 sql_exp *nc;
2934
2935 assert(f);
2936 nc = exp_unop(sql->sa, cond, f);
2937 if (cc) {
2938 sql_subtype *t = exp_subtype(cc);
2939 sql_subfunc *f = sql_bind_func(sql->sa, NULL, "and", t, t, F_FUNC);
2940
2941 nc = exp_binop(sql->sa, cc, nc, f);
2942 }
2943 a2 = exp_case_fixup(sql, rel, a2, nc);
2944 }
2945 assert(cond && a1 && a2);
2946 nne = exp_op3(sql->sa, cond, a1, a2, ne->f);
2947 exp_prop_alias(sql->sa, nne, ne);
2948 ne = nne;
2949 }
2950 return ne;
2951 }
2952 if (e->type == e_convert) {
2953 sql_exp *e1 = exp_case_fixup(sql, rel, e->l, cc);
2954 sql_exp *ne = exp_convert(sql->sa, e1, exp_fromtype(e), exp_totype(e));
2955
2956 exp_prop_alias(sql->sa, ne, e);
2957 return ne;
2958 }
2959 if (e->type == e_aggr) {
2960 list *l = NULL, *args = e->l;
2961 node *n;
2962 sql_exp *ne;
2963 sql_subaggr *f = e->f;
2964
2965 /* first fixup arguments */
2966 if (args) {
2967 l = new_exp_list(sql->sa);
2968 for (n=args->h; n; n=n->next) {
2969 sql_exp *a = exp_case_fixup(sql, rel, n->data, cc);
2970 list_append(l, a);
2971 }
2972 }
2973 ne = exp_aggr(sql->sa, l, f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
2974 exp_prop_alias(sql->sa, ne, e);
2975 return ne;
2976 }
2977 return e;
2978}
2979
2980static sql_rel * rel_case_fixup(int *changes, mvc *sql, sql_rel *rel, int top);
2981static sql_exp * rewrite_case_exp(mvc *sql, sql_exp *e, int *has_changes);
2982
2983static sql_rel *
2984rel_case_fixup_top(int *changes, mvc *sql, sql_rel *rel)
2985{
2986 return rel_case_fixup(changes, sql, rel, 1);
2987}
2988
2989static list *
2990rewrite_case_exps(mvc *sql, list *l, int *has_changes)
2991{
2992 node *n;
2993
2994 if (!l)
2995 return l;
2996 for(n = l->h; n; n = n->next)
2997 n->data = rewrite_case_exp(sql, n->data, has_changes);
2998 return l;
2999}
3000
3001
3002static sql_exp *
3003rewrite_case_exp(mvc *sql, sql_exp *e, int *has_changes)
3004{
3005 if (e->type != e_psm)
3006 return e;
3007 if (e->flag & PSM_VAR)
3008 return e;
3009 if (e->flag & PSM_SET || e->flag & PSM_RETURN) {
3010 e->l = rewrite_case_exp(sql, e->l, has_changes);
3011 }
3012 if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
3013 e->l = rewrite_case_exp(sql, e->l, has_changes);
3014 e->r = rewrite_case_exps(sql, e->r, has_changes);
3015 if (e->f)
3016 e->f = rewrite_case_exps(sql, e->f, has_changes);
3017 return e;
3018 }
3019 if ((e->flag & PSM_REL) && e->l)
3020 e->l = rel_case_fixup_top(has_changes, sql, e->l);
3021 if (e->flag & PSM_EXCEPTION)
3022 e->l = rewrite_case_exp(sql, e->l, has_changes);
3023 return e;
3024}
3025
3026static sql_rel *
3027rel_case_fixup(int *changes, mvc *sql, sql_rel *rel, int top)
3028{
3029 (void)changes; /* only go through it once, ie don't mark for changes */
3030
3031 if (!top && rel_is_ref(rel))
3032 return rel;
3033 if ((is_project(rel->op) || (rel->op == op_ddl && rel->flag == ddl_psm)) && rel->exps) {
3034 list *exps = rel->exps;
3035 node *n;
3036 int needed = 0;
3037 sql_rel *res = rel;
3038 int push_down = 0;
3039
3040 for (n = exps->h; n && !needed; n = n->next) {
3041 sql_exp *e = n->data;
3042
3043 if (e->type == e_func || e->type == e_convert ||
3044 e->type == e_aggr || e->type == e_psm)
3045 needed = 1;
3046 }
3047 if (!needed) {
3048 if (rel->l)
3049 rel->l = rel_case_fixup(changes, sql, rel->l, is_topn(rel->op)?top:0);
3050 return rel;
3051 }
3052
3053 /* get proper output first, then rewrite lower project (such that it can split expressions) */
3054 push_down = is_simple_project(rel->op) && !rel->r && !rel_is_ref(rel);
3055 if (push_down) {
3056 if (top)
3057 res = rel_safe_project(sql, rel);
3058 else
3059 res = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 2));
3060 if (need_distinct(rel))
3061 set_distinct(res);
3062 }
3063
3064 rel->exps = new_exp_list(sql->sa);
3065 for (n = exps->h; n; n = n->next) {
3066 sql_exp *e = exp_case_fixup( sql, push_down?rel:NULL, n->data, NULL );
3067
3068 if (!e)
3069 return NULL;
3070 list_append(rel->exps, e);
3071 }
3072 if (is_ddl(rel->op) && rel->flag == ddl_psm)
3073 rel->exps = rewrite_case_exps(sql, rel->exps, changes);
3074 if (rel->l)
3075 rel->l = rel_case_fixup(changes, sql, rel->l, is_topn(rel->op)?top:0);
3076 if (is_ddl(rel->op) && rel->r)
3077 rel->r = rel_case_fixup(changes, sql, rel->r, is_ddl(rel->op)?top:0);
3078 return res;
3079 }
3080 if (is_basetable(rel->op))
3081 return rel;
3082 if (rel->l)
3083 rel->l = rel_case_fixup(changes, sql, rel->l,
3084 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
3085 if ((is_join(rel->op) || is_ddl(rel->op) || is_modify(rel->op) || is_set(rel->op)) && rel->r)
3086 rel->r = rel_case_fixup(changes, sql, rel->r,
3087 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
3088 return rel;
3089}
3090
3091static sql_exp *
3092exp_simplify_math( mvc *sql, sql_exp *e, int *changes)
3093{
3094 if (e->type == e_func || e->type == e_aggr) {
3095 list *l = e->l;
3096 sql_subfunc *f = e->f;
3097 node *n;
3098 sql_exp *le;
3099
3100 if (list_length(l) < 1)
3101 return e;
3102
3103 le = l->h->data;
3104 if (!exp_subtype(le) || (!EC_COMPUTE(exp_subtype(le)->type->eclass) && exp_subtype(le)->type->eclass != EC_DEC))
3105 return e;
3106
3107 if (!f->func->s && list_length(l) == 2) {
3108 sql_exp *le = l->h->data;
3109 sql_exp *re = l->h->next->data;
3110 sql_subtype *et = exp_subtype(e);
3111
3112 /* if one argument is NULL, return it, EXCEPT
3113 * if "_no_nil" is in the name of the
3114 * implementation function (currently either
3115 * min_no_nil or max_no_nil), in which case we
3116 * ignore the NULL and return the other
3117 * value */
3118 if (exp_is_atom(le) && exp_is_null(sql, le)) {
3119 (*changes)++;
3120 if (f && f->func && f->func->imp && strstr(f->func->imp, "_no_nil") != NULL) {
3121 exp_setname(sql->sa, re, exp_relname(e), exp_name(e));
3122 if (subtype_cmp(et, exp_subtype(re)) != 0)
3123 re = exp_convert(sql->sa, re, exp_subtype(re), et);
3124 return re;
3125 }
3126 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3127 if (subtype_cmp(et, exp_subtype(le)) != 0)
3128 le = exp_convert(sql->sa, le, exp_subtype(le), et);
3129 return le;
3130 }
3131 if (exp_is_atom(re) && exp_is_null(sql, re)) {
3132 (*changes)++;
3133 if (f && f->func && f->func->imp && strstr(f->func->imp, "_no_nil") != NULL) {
3134 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3135 if (subtype_cmp(et, exp_subtype(le)) != 0)
3136 le = exp_convert(sql->sa, le, exp_subtype(le), et);
3137 return le;
3138 }
3139 exp_setname(sql->sa, re, exp_relname(e), exp_name(e));
3140 if (subtype_cmp(et, exp_subtype(re)) != 0)
3141 re = exp_convert(sql->sa, re, exp_subtype(re), et);
3142 return re;
3143 }
3144 }
3145 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && list_length(l) == 2) {
3146 sql_exp *le = l->h->data;
3147 sql_exp *re = l->h->next->data;
3148 sql_subtype *et = exp_subtype(e);
3149
3150 /* 0*a = 0 */
3151 if (exp_is_atom(le) && exp_is_zero(sql, le) && exp_is_atom(re) && exp_is_not_null(sql, re)) {
3152 (*changes)++;
3153 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3154 if (subtype_cmp(et, exp_subtype(le)) != 0)
3155 le = exp_convert(sql->sa, le, exp_subtype(le), et);
3156 return le;
3157 }
3158 /* a*0 = 0 */
3159 if (exp_is_atom(re) && exp_is_zero(sql, re) && exp_is_atom(le) && exp_is_not_null(sql, le)) {
3160 (*changes)++;
3161 exp_setname(sql->sa, re, exp_relname(e), exp_name(e));
3162 if (subtype_cmp(et, exp_subtype(re)) != 0)
3163 re = exp_convert(sql->sa, re, exp_subtype(re), et);
3164 return re;
3165 }
3166 /* 1*a = a
3167 if (exp_is_atom(le) && exp_is_one(sql, le)) {
3168 (*changes)++;
3169 exp_setname(sql->sa, re, exp_relname(e), exp_name(e));
3170 return re;
3171 }
3172 */
3173 /* a*1 = a
3174 if (exp_is_atom(re) && exp_is_one(sql, re)) {
3175 (*changes)++;
3176 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3177 return le;
3178 }
3179 */
3180 if (exp_is_atom(le) && exp_is_atom(re)) {
3181 atom *la = exp_flatten(sql, le);
3182 atom *ra = exp_flatten(sql, re);
3183
3184 /* TODO check if output type is larger then input */
3185 if (la && ra && subtype_cmp(atom_type(la), atom_type(ra)) == 0 && subtype_cmp(atom_type(la), exp_subtype(e)) == 0) {
3186 atom *a = atom_mul(la, ra);
3187
3188 if (a && atom_cast(sql->sa, a, exp_subtype(e))) {
3189 sql_exp *ne = exp_atom(sql->sa, a);
3190 (*changes)++;
3191 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3192 return ne;
3193 }
3194 }
3195 }
3196 /* move constants to the right, ie c*A = A*c */
3197 else if (exp_is_atom(le)) {
3198 l->h->data = re;
3199 l->h->next->data = le;
3200 e->f = sql_bind_func(sql->sa, NULL, "sql_mul", exp_subtype(re), exp_subtype(le), F_FUNC);
3201 exp_sum_scales(e->f, re, le);
3202 (*changes)++;
3203 return e;
3204 }
3205 /* change a*a into pow(a,2), later change pow(a,2) back into a*a */
3206 if (exp_equal(le, re)==0 && exp_subtype(le)->type->eclass == EC_FLT) {
3207 /* pow */
3208 list *l;
3209 sql_exp *ne;
3210 sql_subfunc *pow = sql_bind_func(sql->sa, sql->session->schema, "power", exp_subtype(le), exp_subtype(re), F_FUNC);
3211 assert(pow);
3212 if (exp_subtype(le)->type->localtype == TYPE_flt)
3213 re = exp_atom_flt(sql->sa, 2);
3214 else
3215 re = exp_atom_dbl(sql->sa, 2);
3216 l = sa_list(sql->sa);
3217 append(l, le);
3218 append(l, re);
3219 (*changes)++;
3220 ne = exp_op(sql->sa, l, pow);
3221 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3222 return ne;
3223 }
3224 /* change a*pow(a,n) or pow(a,n)*a into pow(a,n+1) */
3225 if (is_func(le->type)) {
3226 list *l = le->l;
3227 sql_subfunc *f = le->f;
3228
3229 if (!f->func->s && !strcmp(f->func->base.name, "power") && list_length(l) == 2) {
3230 sql_exp *lle = l->h->data;
3231 sql_exp *lre = l->h->next->data;
3232 if (exp_equal(re, lle)==0) {
3233 if (atom_inc(exp_value(sql, lre, sql->args, sql->argc))) {
3234 (*changes)++;
3235 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3236 return le;
3237 }
3238 }
3239 }
3240 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && list_length(l) == 2) {
3241 sql_exp *lle = l->h->data;
3242 sql_exp *lre = l->h->next->data;
3243 if (!exp_is_atom(lle) && exp_is_atom(lre) && exp_is_atom(re)) {
3244 sql_subtype et = *exp_subtype(e);
3245 /* (x*c1)*c2 -> x * (c1*c2) */
3246 list *l = sa_list(sql->sa);
3247 append(l, lre);
3248 append(l, re);
3249 le->l = l;
3250 le->f = sql_bind_func(sql->sa, NULL, "sql_mul", exp_subtype(lre), exp_subtype(re), F_FUNC);
3251 exp_sum_scales(le->f, lre, re);
3252 l = e->l;
3253 l->h->data = lle;
3254 l->h->next->data = le;
3255 e->f = sql_bind_func(sql->sa, NULL, "sql_mul", exp_subtype(lle), exp_subtype(le), F_FUNC);
3256 exp_sum_scales(e->f, lle, le);
3257 if (subtype_cmp(&et, exp_subtype(e)) != 0)
3258 e = exp_convert(sql->sa, e, exp_subtype(e), &et);
3259 (*changes)++;
3260 return e;
3261 }
3262 }
3263 }
3264 }
3265 if (!f->func->s && !strcmp(f->func->base.name, "sql_add") && list_length(l) == 2) {
3266 sql_exp *le = l->h->data;
3267 sql_exp *re = l->h->next->data;
3268 if (exp_is_atom(le) && exp_is_zero(sql, le)) {
3269 (*changes)++;
3270 exp_setname(sql->sa, re, exp_relname(e), exp_name(e));
3271 return re;
3272 }
3273 if (exp_is_atom(re) && exp_is_zero(sql, re)) {
3274 (*changes)++;
3275 exp_setname(sql->sa, le, exp_relname(e), exp_name(e));
3276 return le;
3277 }
3278 if (exp_is_atom(le) && exp_is_atom(re)) {
3279 atom *la = exp_flatten(sql, le);
3280 atom *ra = exp_flatten(sql, re);
3281
3282 if (la && ra) {
3283 atom *a = atom_add(la, ra);
3284
3285 if (a) {
3286 sql_exp *ne = exp_atom(sql->sa, a);
3287 (*changes)++;
3288 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3289 return ne;
3290 }
3291 }
3292 }
3293 /* move constants to the right, ie c+A = A+c */
3294 else if (exp_is_atom(le)) {
3295 l->h->data = re;
3296 l->h->next->data = le;
3297 (*changes)++;
3298 return e;
3299 } else if (is_func(le->type)) {
3300 list *ll = le->l;
3301 sql_subfunc *f = le->f;
3302 if (!f->func->s && !strcmp(f->func->base.name, "sql_add") && list_length(ll) == 2) {
3303 sql_exp *lle = ll->h->data;
3304 sql_exp *lre = ll->h->next->data;
3305
3306 if (exp_is_atom(lle) && exp_is_atom(lre))
3307 return e;
3308 if (!exp_is_atom(re) && exp_is_atom(lre)) {
3309 /* (x+c1)+y -> (x+y) + c1 */
3310 ll->h->next->data = re;
3311 l->h->next->data = lre;
3312 l->h->data = exp_simplify_math(sql, le, changes);
3313 (*changes)++;
3314 return e;
3315 }
3316 if (exp_is_atom(re) && exp_is_atom(lre)) {
3317 /* (x+c1)+c2 -> (c2+c1) + x */
3318 ll->h->data = re;
3319 l->h->next->data = lle;
3320 l->h->data = exp_simplify_math(sql, le, changes);
3321 (*changes)++;
3322 return e;
3323 }
3324 }
3325 }
3326 /*
3327 if (is_func(re->type)) {
3328 list *ll = re->l;
3329 sql_subfunc *f = re->f;
3330 if (!f->func->s && !strcmp(f->func->base.name, "sql_add") && list_length(ll) == 2) {
3331 if (exp_is_atom(le)) {
3332 * c1+(x+y) -> (x+y) + c1 *
3333 l->h->data = re;
3334 l->h->next->data = le;
3335 (*changes)++;
3336 return e;
3337 }
3338 }
3339 }
3340 */
3341 }
3342 if (!f->func->s && !strcmp(f->func->base.name, "sql_sub") && list_length(l) == 2) {
3343 sql_exp *le = l->h->data;
3344 sql_exp *re = l->h->next->data;
3345
3346 if (exp_is_atom(le) && exp_is_atom(re)) {
3347 atom *la = exp_flatten(sql, le);
3348 atom *ra = exp_flatten(sql, re);
3349
3350 if (la && ra) {
3351 atom *a = atom_sub(la, ra);
3352
3353 if (a) {
3354 sql_exp *ne = exp_atom(sql->sa, a);
3355 (*changes)++;
3356 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3357 return ne;
3358 }
3359 }
3360 }
3361 if (exp_equal(le,re) == 0) { /* a - a = 0 */
3362 atom *a;
3363 sql_exp *ne;
3364
3365 if (exp_subtype(le)->type->eclass == EC_NUM) {
3366 a = atom_int(sql->sa, exp_subtype(le), 0);
3367 } else if (exp_subtype(le)->type->eclass == EC_FLT) {
3368 a = atom_float(sql->sa, exp_subtype(le), 0);
3369 } else {
3370 return e;
3371 }
3372 ne = exp_atom(sql->sa, a);
3373 (*changes)++;
3374 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3375 return ne;
3376 }
3377 if (is_func(le->type)) {
3378 list *ll = le->l;
3379 sql_subfunc *f = le->f;
3380 if (!f->func->s && !strcmp(f->func->base.name, "sql_add") && list_length(ll) == 2) {
3381 sql_exp *lle = ll->h->data;
3382 sql_exp *lre = ll->h->next->data;
3383 if (exp_equal(re, lre) == 0) {
3384 /* (x+a)-a = x*/
3385 exp_setname(sql->sa, lle, exp_relname(e), exp_name(e));
3386 (*changes)++;
3387 return lle;
3388 }
3389 if (exp_is_atom(lle) && exp_is_atom(lre))
3390 return e;
3391 if (!exp_is_atom(re) && exp_is_atom(lre)) {
3392 /* (x+c1)-y -> (x-y) + c1 */
3393 ll->h->next->data = re;
3394 l->h->next->data = lre;
3395 le->f = e->f;
3396 e->f = f;
3397 l->h->data = exp_simplify_math(sql, le, changes);
3398 (*changes)++;
3399 return e;
3400 }
3401 if (exp_is_atom(re) && exp_is_atom(lre)) {
3402 /* (x+c1)-c2 -> (c1-c2) + x */
3403 ll->h->data = lre;
3404 ll->h->next->data = re;
3405 l->h->next->data = lle;
3406 le->f = e->f;
3407 e->f = f;
3408 l->h->data = exp_simplify_math(sql, le, changes);
3409 (*changes)++;
3410 return e;
3411 }
3412 }
3413 }
3414 }
3415 if (l)
3416 for (n = l->h; n; n = n->next)
3417 n->data = exp_simplify_math(sql, n->data, changes);
3418 }
3419 if (e->type == e_convert)
3420 e->l = exp_simplify_math(sql, e->l, changes);
3421 return e;
3422}
3423
3424static sql_rel *
3425rel_simplify_math(int *changes, mvc *sql, sql_rel *rel)
3426{
3427
3428 if ((is_project(rel->op) || (rel->op == op_ddl && rel->flag == ddl_psm)) && rel->exps) {
3429 list *exps = rel->exps;
3430 node *n;
3431 int needed = 0;
3432
3433 for (n = exps->h; n && !needed; n = n->next) {
3434 sql_exp *e = n->data;
3435
3436 if (e->type == e_func || e->type == e_convert ||
3437 e->type == e_aggr || e->type == e_psm)
3438 needed = 1;
3439 }
3440 if (!needed)
3441 return rel;
3442
3443 rel->exps = new_exp_list(sql->sa);
3444 for (n = exps->h; n; n = n->next) {
3445 sql_exp *e = exp_simplify_math( sql, n->data, changes);
3446
3447 if (!e)
3448 return NULL;
3449 list_append(rel->exps, e);
3450 }
3451 }
3452 if (*changes) /* if rewritten don't cache this query */
3453 sql->caching = 0;
3454 return rel;
3455}
3456
3457static sql_rel *
3458rel_find_ref( sql_rel *r)
3459{
3460 while (!rel_is_ref(r) && r->l &&
3461 (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/))
3462 r = r->l;
3463 if (rel_is_ref(r))
3464 return r;
3465 return NULL;
3466}
3467
3468static sql_rel *
3469rel_find_select( sql_rel *r)
3470{
3471 while (!is_select(r->op) && r->l && is_project(r->op))
3472 r = r->l;
3473 if (is_select(r->op))
3474 return r;
3475 return NULL;
3476}
3477
3478static int
3479rel_match_projections(sql_rel *l, sql_rel *r)
3480{
3481 node *n, *m;
3482 list *le = l->exps;
3483 list *re = r->exps;
3484
3485 if (!le || !re)
3486 return 0;
3487 if (list_length(le) != list_length(re))
3488 return 0;
3489
3490 for (n = le->h, m = re->h; n && m; n = n->next, m = m->next)
3491 if (!exp_match(n->data, m->data))
3492 return 0;
3493 return 1;
3494}
3495
3496static int
3497exps_has_predicate( list *l )
3498{
3499 node *n;
3500
3501 for( n = l->h; n; n = n->next){
3502 sql_exp *e = n->data;
3503
3504 if (e->card <= CARD_ATOM)
3505 return 1;
3506 }
3507 return 0;
3508}
3509
3510static sql_rel *
3511rel_merge_union(int *changes, mvc *sql, sql_rel *rel)
3512{
3513 sql_rel *l = rel->l;
3514 sql_rel *r = rel->r;
3515 sql_rel *ref = NULL;
3516
3517 if (is_union(rel->op) &&
3518 l && is_project(l->op) && !project_unsafe(l,0) &&
3519 r && is_project(r->op) && !project_unsafe(r,0) &&
3520 (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) {
3521 /* Find selects and try to merge */
3522 sql_rel *ls = rel_find_select(l);
3523 sql_rel *rs = rel_find_select(r);
3524
3525 /* can we merge ? */
3526 if (!ls || !rs)
3527 return rel;
3528
3529 /* merge any extra projects */
3530 if (l->l != ls)
3531 rel->l = l = rel_merge_projects(changes, sql, l);
3532 if (r->l != rs)
3533 rel->r = r = rel_merge_projects(changes, sql, r);
3534
3535 if (!rel_match_projections(l,r))
3536 return rel;
3537
3538 /* for now only union(project*(select(R),project*(select(R))) */
3539 if (ls != l->l || rs != r->l ||
3540 ls->l != rs->l || !rel_is_ref(ls->l))
3541 return rel;
3542
3543 if (!ls->exps || !rs->exps ||
3544 exps_has_predicate(ls->exps) ||
3545 exps_has_predicate(rs->exps))
3546 return rel;
3547
3548 /* merge, ie. add 'or exp' */
3549 (*changes)++;
3550 ls->exps = append(new_exp_list(sql->sa), exp_or(sql->sa, ls->exps, rs->exps, 0));
3551 rs->exps = NULL;
3552 rel = rel_inplace_project(sql->sa, rel, rel_dup(rel->l), rel->exps);
3553 set_processed(rel);
3554 return rel;
3555 }
3556 return rel;
3557}
3558
3559static int
3560exps_cse( mvc *sql, list *oexps, list *l, list *r )
3561{
3562 list *nexps;
3563 node *n, *m;
3564 char *lu, *ru;
3565 int lc = 0, rc = 0, match = 0, res = 0;
3566
3567 /* first recusive exps_cse */
3568 nexps = new_exp_list(sql->sa);
3569 for (n = l->h; n; n = n->next) {
3570 sql_exp *e = n->data;
3571
3572 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3573 res = exps_cse(sql, nexps, e->l, e->r);
3574 } else {
3575 append(nexps, e);
3576 }
3577 }
3578 l = nexps;
3579
3580 nexps = new_exp_list(sql->sa);
3581 for (n = r->h; n; n = n->next) {
3582 sql_exp *e = n->data;
3583
3584 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3585 res = exps_cse(sql, nexps, e->l, e->r);
3586 } else {
3587 append(nexps, e);
3588 }
3589 }
3590 r = nexps;
3591
3592 /* simplify true or .. and .. or true */
3593 if (list_length(l) == list_length(r) && list_length(l) == 1) {
3594 sql_exp *le = l->h->data, *re = r->h->data;
3595
3596 if (exp_is_true(sql, le)) {
3597 append(oexps, le);
3598 return 1;
3599 }
3600 if (exp_is_true(sql, re)) {
3601 append(oexps, re);
3602 return 1;
3603 }
3604 }
3605
3606 lu = calloc(list_length(l), sizeof(char));
3607 ru = calloc(list_length(r), sizeof(char));
3608 for (n = l->h, lc = 0; n; n = n->next, lc++) {
3609 sql_exp *le = n->data;
3610
3611 for ( m = r->h, rc = 0; m; m = m->next, rc++) {
3612 sql_exp *re = m->data;
3613
3614 if (!ru[rc] && exp_match_exp(le,re)) {
3615 lu[lc] = 1;
3616 ru[rc] = 1;
3617 match = 1;
3618 }
3619 }
3620 }
3621 if (match) {
3622 list *nl = new_exp_list(sql->sa);
3623 list *nr = new_exp_list(sql->sa);
3624
3625 for (n = l->h, lc = 0; n; n = n->next, lc++)
3626 if (!lu[lc])
3627 append(nl, n->data);
3628 for (n = r->h, rc = 0; n; n = n->next, rc++)
3629 if (!ru[rc])
3630 append(nr, n->data);
3631
3632 if (list_length(nl) && list_length(nr))
3633 append(oexps, exp_or(sql->sa, nl, nr, 0));
3634
3635 for (n = l->h, lc = 0; n; n = n->next, lc++) {
3636 if (lu[lc])
3637 append(oexps, n->data);
3638 }
3639 res = 1;
3640 } else {
3641 append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL),
3642 list_dup(r, (fdup)NULL), 0));
3643 }
3644 free(lu);
3645 free(ru);
3646 return res;
3647}
3648
3649static int
3650are_equality_exps( list *exps, sql_exp **L)
3651{
3652 sql_exp *l = *L;
3653
3654 if (list_length(exps) == 1) {
3655 sql_exp *e = exps->h->data, *le = e->l, *re = e->r;
3656
3657 if (e->type == e_cmp && e->flag == cmp_equal && le->card != CARD_ATOM && re->card == CARD_ATOM) {
3658 if (!l) {
3659 *L = l = le;
3660 if (!is_column(le->type))
3661 return 0;
3662 }
3663 return (exp_match(l, le));
3664 }
3665 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
3666 return (are_equality_exps(e->l, L) &&
3667 are_equality_exps(e->r, L));
3668 }
3669 return 0;
3670}
3671
3672static void
3673get_exps( list *n, list *l )
3674{
3675 sql_exp *e = l->h->data, *re = e->r;
3676
3677 if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM)
3678 list_append(n, re);
3679 if (e->type == e_cmp && e->flag == cmp_or) {
3680 get_exps(n, e->l);
3681 get_exps(n, e->r);
3682 }
3683}
3684
3685static sql_exp *
3686equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r)
3687{
3688 list *nl = new_exp_list(sql->sa);
3689
3690 get_exps(nl, l);
3691 get_exps(nl, r);
3692
3693 return exp_in( sql->sa, ce, nl, cmp_in);
3694}
3695
3696static sql_rel *
3697rel_select_cse(int *changes, mvc *sql, sql_rel *rel)
3698{
3699 if (is_select(rel->op) && rel->exps) {
3700 node *n;
3701 list *nexps;
3702 int needed = 0;
3703
3704 for (n=rel->exps->h; n && !needed; n = n->next) {
3705 sql_exp *e = n->data;
3706
3707 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
3708 needed = 1;
3709 }
3710 if (!needed)
3711 return rel;
3712
3713 nexps = new_exp_list(sql->sa);
3714 for (n=rel->exps->h; n; n = n->next) {
3715 sql_exp *e = n->data, *l = NULL;
3716
3717 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && are_equality_exps(e->l, &l) && are_equality_exps(e->r, &l) && l) {
3718 (*changes)++;
3719 append(nexps, equality_exps_2_in(sql, l, e->l, e->r));
3720 } else {
3721 append(nexps, e);
3722 }
3723 }
3724 rel->exps = nexps;
3725 }
3726 if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
3727 node *n;
3728 list *nexps;
3729 int needed = 0;
3730
3731 for (n=rel->exps->h; n && !needed; n = n->next) {
3732 sql_exp *e = n->data;
3733
3734 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e))
3735 needed = 1;
3736 }
3737 if (!needed)
3738 return rel;
3739 nexps = new_exp_list(sql->sa);
3740 for (n=rel->exps->h; n; n = n->next) {
3741 sql_exp *e = n->data;
3742
3743 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3744 /* split the common expressions */
3745 *changes += exps_cse(sql, nexps, e->l, e->r);
3746 } else {
3747 append(nexps, e);
3748 }
3749 }
3750 rel->exps = nexps;
3751 }
3752 return rel;
3753}
3754
3755static sql_rel *
3756rel_project_cse(int *changes, mvc *sql, sql_rel *rel)
3757{
3758 (void)changes;
3759 if (is_project(rel->op) && rel->exps) {
3760 node *n, *m;
3761 list *nexps;
3762 int needed = 0;
3763
3764 for (n=rel->exps->h; n && !needed; n = n->next) {
3765 sql_exp *e1 = n->data;
3766
3767 if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
3768 for (m=n->next; m; m = m->next){
3769 sql_exp *e2 = m->data;
3770
3771 if (exp_name(e2) && exp_match_exp(e1, e2))
3772 needed = 1;
3773 }
3774 }
3775 }
3776
3777 if (!needed)
3778 return rel;
3779
3780 nexps = new_exp_list(sql->sa);
3781 for (n=rel->exps->h; n; n = n->next) {
3782 sql_exp *e1 = n->data;
3783
3784 if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) {
3785 for (m=nexps->h; m; m = m->next){
3786 sql_exp *e2 = m->data;
3787
3788 if (exp_name(e2) && exp_match_exp(e1, e2)) {
3789 sql_exp *ne = exp_alias(sql->sa, exp_relname(e1), exp_name(e1), exp_relname(e2), exp_name(e2), exp_subtype(e2), e2->card, has_nil(e2), is_intern(e1));
3790
3791 ne = exp_propagate(sql->sa, ne, e1);
3792 e1 = ne;
3793 break;
3794 }
3795 }
3796 }
3797 append(nexps, e1);
3798 }
3799 rel->exps = nexps;
3800 }
3801 return rel;
3802}
3803
3804static list *
3805exps_merge_select_rse( mvc *sql, list *l, list *r )
3806{
3807 node *n, *m, *o;
3808 list *nexps = NULL, *lexps, *rexps;
3809
3810 lexps = new_exp_list(sql->sa);
3811 for (n = l->h; n; n = n->next) {
3812 sql_exp *e = n->data;
3813
3814 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3815 list *nexps = exps_merge_select_rse(sql, e->l, e->r);
3816 for (o = nexps->h; o; o = o->next)
3817 append(lexps, o->data);
3818 } else {
3819 append(lexps, e);
3820 }
3821 }
3822 rexps = new_exp_list(sql->sa);
3823 for (n = r->h; n; n = n->next) {
3824 sql_exp *e = n->data;
3825
3826 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3827 list *nexps = exps_merge_select_rse(sql, e->l, e->r);
3828 for (o = nexps->h; o; o = o->next)
3829 append(rexps, o->data);
3830 } else {
3831 append(rexps, e);
3832 }
3833 }
3834
3835 nexps = new_exp_list(sql->sa);
3836
3837 /* merge merged lists first ? */
3838 for (n = lexps->h; n; n = n->next) {
3839 sql_exp *le = n->data, *re, *fnd = NULL;
3840
3841 if (le->type != e_cmp || le->flag == cmp_or || is_anti(le))
3842 continue;
3843 for (m = rexps->h; !fnd && m; m = m->next) {
3844 re = m->data;
3845 if (exps_match_col_exps(le, re))
3846 fnd = re;
3847 }
3848 if (fnd && is_anti(fnd))
3849 continue;
3850 /* cases
3851 * 1) 2 values (cmp_equal)
3852 * 2) 1 value (cmp_equal), and cmp_in
3853 * (also cmp_in, cmp_equal)
3854 * 3) 2 cmp_in
3855 * 4) ranges
3856 */
3857 if (fnd) {
3858 re = fnd;
3859 fnd = NULL;
3860 if (le->anti || re->anti)
3861 continue;
3862 if (le->flag == cmp_equal && re->flag == cmp_equal) {
3863 list *exps = new_exp_list(sql->sa);
3864
3865 append(exps, le->r);
3866 append(exps, re->r);
3867 fnd = exp_in(sql->sa, le->l, exps, cmp_in);
3868 } else if (le->flag == cmp_equal && re->flag == cmp_in){
3869 list *exps = new_exp_list(sql->sa);
3870
3871 append(exps, le->r);
3872 list_merge(exps, re->r, NULL);
3873 fnd = exp_in(sql->sa, le->l, exps, cmp_in);
3874 } else if (le->flag == cmp_in && re->flag == cmp_equal){
3875 list *exps = new_exp_list(sql->sa);
3876
3877 append(exps, re->r);
3878 list_merge(exps, le->r, NULL);
3879 fnd = exp_in(sql->sa, le->l, exps, cmp_in);
3880 } else if (le->flag == cmp_in && re->flag == cmp_in){
3881 list *exps = new_exp_list(sql->sa);
3882
3883 list_merge(exps, le->r, NULL);
3884 list_merge(exps, re->r, NULL);
3885 fnd = exp_in(sql->sa, le->l, exps, cmp_in);
3886 } else if (le->f && re->f && /* merge ranges */
3887 le->flag == re->flag && le->flag <= cmp_lt) {
3888 sql_subfunc *min = sql_bind_func(sql->sa, sql->session->schema, "sql_min", exp_subtype(le->r), exp_subtype(re->r), F_FUNC);
3889 sql_subfunc *max = sql_bind_func(sql->sa, sql->session->schema, "sql_max", exp_subtype(le->f), exp_subtype(re->f), F_FUNC);
3890 sql_exp *mine, *maxe;
3891
3892 if (!min || !max)
3893 continue;
3894 mine = exp_binop(sql->sa, le->r, re->r, min);
3895 maxe = exp_binop(sql->sa, le->f, re->f, max);
3896 fnd = exp_compare2(sql->sa, le->l, mine, maxe, le->flag);
3897 }
3898 if (fnd)
3899 append(nexps, fnd);
3900 }
3901 }
3902 return nexps;
3903}
3904
3905static list *
3906exps_merge_project_rse( mvc *sql, list *exps)
3907{
3908 node *n;
3909 list *nexps = NULL;
3910
3911 nexps = new_exp_list(sql->sa);
3912 for (n = exps->h; n; n = n->next) {
3913 sql_exp *e = n->data;
3914
3915 if (is_func(e->type) && e->l) {
3916 list *fexps = e->l;
3917 sql_subfunc *f = e->f;
3918
3919 /* is and function */
3920 if (strcmp(f->func->base.name, "and") == 0 && list_length(fexps) == 2) {
3921 sql_exp *l = list_fetch(fexps, 0);
3922 sql_exp *r = list_fetch(fexps, 1);
3923
3924 /* check merge into single between */
3925 if (is_func(l->type) && is_func(r->type)) {
3926 list *lfexps = l->l;
3927 list *rfexps = r->l;
3928 sql_subfunc *lf = l->f;
3929 sql_subfunc *rf = r->f;
3930
3931 if (((strcmp(lf->func->base.name, ">=") == 0 || strcmp(lf->func->base.name, ">") == 0) && list_length(lfexps) == 2) &&
3932 ((strcmp(rf->func->base.name, "<=") == 0 || strcmp(rf->func->base.name, "<") == 0) && list_length(rfexps) == 2)
3933 && exp_equal(list_fetch(lfexps,0), list_fetch(rfexps,0)) == 0) {
3934 sql_exp *ce = list_fetch(lfexps, 0);
3935 list *types, *ops = sa_list(sql->sa);
3936 sql_subfunc *between;
3937
3938 append(ops, ce);
3939 append(ops, list_fetch(lfexps, 1));
3940 append(ops, list_fetch(rfexps, 1));
3941 append(ops, exp_atom_bool(sql->sa, 0)); /* non symetrical */
3942 append(ops, exp_atom_bool(sql->sa, lf->func->base.name[1] == '=')); /* left inclusive */
3943 append(ops, exp_atom_bool(sql->sa, rf->func->base.name[1] == '=')); /* right exclusive */
3944 append(ops, exp_atom_bool(sql->sa, 0)); /* nils_false */
3945 append(ops, exp_atom_bool(sql->sa, 0)); /* anti */
3946
3947 types = exp_types(sql->sa, ops);
3948 /* convert into between */
3949 between = sql_bind_func_(sql->sa, mvc_bind_schema(sql, "sys"), "between", types, F_FUNC);
3950 if (between) {
3951 sql_exp *ne = exp_op(sql->sa, ops, between);
3952
3953 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
3954 e = ne;
3955 }
3956 }
3957 }
3958 } else {
3959 e->l = exps_merge_project_rse(sql, fexps);
3960 }
3961 }
3962 append(nexps, e);
3963 }
3964 return nexps;
3965}
3966
3967/* merge related sub expressions
3968 *
3969 * ie (x = a and y > 1 and y < 5) or
3970 * (x = c and y > 1 and y < 10) or
3971 * (x = e and y > 1 and y < 20)
3972 * ->
3973 * ((x = a and y > 1 and y < 5) or
3974 * (x = c and y > 1 and y < 10) or
3975 * (x = e and y > 1 and y < 20)) and
3976 * x in (a,c,e) and
3977 * y > 1 and y < 20
3978 * */
3979static sql_rel *
3980rel_merge_rse(int *changes, mvc *sql, sql_rel *rel)
3981{
3982 /* only execute once per select */
3983 (void)*changes;
3984
3985 if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) {
3986 node *n, *o;
3987 list *nexps = new_exp_list(sql->sa);
3988
3989 for (n=rel->exps->h; n; n = n->next) {
3990 sql_exp *e = n->data;
3991
3992 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
3993 /* possibly merge related expressions */
3994 list *ps = exps_merge_select_rse(sql, e->l, e->r);
3995 for (o = ps->h; o; o = o->next)
3996 append(nexps, o->data);
3997 }
3998 }
3999 if (list_length(nexps))
4000 for (o = nexps->h; o; o = o->next)
4001 append(rel->exps, o->data);
4002 }
4003 /* the project case of rse */
4004 if (is_project(rel->op) && rel->exps)
4005 rel->exps = exps_merge_project_rse(sql, rel->exps);
4006 return rel;
4007}
4008
4009/* find in the list of expression an expression which uses e */
4010static sql_exp *
4011exp_uses_exp( list *exps, sql_exp *e)
4012{
4013 node *n;
4014 const char *rname = exp_relname(e);
4015 const char *name = exp_name(e);
4016
4017 if (!exps)
4018 return NULL;
4019
4020 for ( n = exps->h; n; n = n->next) {
4021 sql_exp *u = n->data;
4022
4023 if (u->l && rname && strcmp(u->l, rname) == 0 &&
4024 u->r && name && strcmp(u->r, name) == 0)
4025 return u;
4026 if (!u->l && !rname &&
4027 u->r && name && strcmp(u->r, name) == 0)
4028 return u;
4029 }
4030 return NULL;
4031}
4032
4033/*
4034 * Rewrite aggregations over union all.
4035 * groupby ([ union all (a, b) ], [gbe], [ count, sum ] )
4036 *
4037 * into
4038 * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] )
4039 */
4040static sql_rel *
4041rel_push_aggr_down(int *changes, mvc *sql, sql_rel *rel)
4042{
4043 if (rel->op == op_groupby && rel->l) {
4044 sql_rel *u = rel->l, *ou = u;
4045 sql_rel *g = rel;
4046 sql_rel *ul = u->l;
4047 sql_rel *ur = u->r;
4048 node *n, *m;
4049 list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL;
4050
4051 if (u->op == op_project)
4052 u = u->l;
4053
4054 if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u))
4055 return rel;
4056
4057 ul = u->l;
4058 ur = u->r;
4059
4060 /* make sure we don't create group by on group by's */
4061 if (ul->op == op_groupby || ur->op == op_groupby)
4062 return rel;
4063
4064 rel->subquery = 0;
4065 /* distinct should be done over the full result */
4066 for (n = g->exps->h; n; n = n->next) {
4067 sql_exp *e = n->data;
4068 sql_subaggr *af = e->f;
4069
4070 if (e->type == e_atom ||
4071 e->type == e_func ||
4072 (e->type == e_aggr &&
4073 ((strcmp(af->aggr->base.name, "sum") &&
4074 strcmp(af->aggr->base.name, "count") &&
4075 strcmp(af->aggr->base.name, "min") &&
4076 strcmp(af->aggr->base.name, "max")) ||
4077 need_distinct(e))))
4078 return rel;
4079 }
4080
4081 ul = rel_dup(ul);
4082 ur = rel_dup(ur);
4083 if (!is_project(ul->op))
4084 ul = rel_project(sql->sa, ul,
4085 rel_projections(sql, ul, NULL, 1, 1));
4086 if (!is_project(ur->op))
4087 ur = rel_project(sql->sa, ur,
4088 rel_projections(sql, ur, NULL, 1, 1));
4089 rel_rename_exps(sql, u->exps, ul->exps);
4090 rel_rename_exps(sql, u->exps, ur->exps);
4091 if (u != ou) {
4092 ul = rel_project(sql->sa, ul, NULL);
4093 ul->exps = exps_copy(sql, ou->exps);
4094 rel_rename_exps(sql, ou->exps, ul->exps);
4095 ur = rel_project(sql->sa, ur, NULL);
4096 ur->exps = exps_copy(sql, ou->exps);
4097 rel_rename_exps(sql, ou->exps, ur->exps);
4098 }
4099
4100 if (g->r && list_length(g->r) > 0) {
4101 list *gbe = g->r;
4102
4103 lgbe = exps_copy(sql, gbe);
4104 rgbe = exps_copy(sql, gbe);
4105 }
4106 ul = rel_groupby(sql, ul, NULL);
4107 ul->r = lgbe;
4108 ul->nrcols = g->nrcols;
4109 ul->card = g->card;
4110 ul->exps = list_merge(exps_copy(sql, g->exps), exps_copy(sql, ul->r), (fdup)NULL);
4111
4112 ur = rel_groupby(sql, ur, NULL);
4113 ur->r = rgbe;
4114 ur->nrcols = g->nrcols;
4115 ur->card = g->card;
4116 ur->exps = list_merge(exps_copy(sql, g->exps), exps_copy(sql, ur->r), (fdup)NULL);
4117
4118 /* group by on primary keys which define the partioning scheme
4119 * don't need a finalizing group by */
4120 /* how to check if a partion is based on some primary key ?
4121 * */
4122 if (rel->r && list_length(rel->r)) {
4123 node *n;
4124
4125 for (n = ((list*)rel->r)->h; n; n = n->next) {
4126 sql_exp *gbe = n->data;
4127
4128 if (find_prop(gbe->p, PROP_HASHCOL)) {
4129 fcmp cmp = (fcmp)&kc_column_cmp;
4130 sql_column *c = exp_find_column(rel->l, gbe, -2);
4131
4132 /* check if key is partition key */
4133 if (c && c->t->p && list_find(c->t->pkey->k.columns, c, cmp) != NULL) {
4134 (*changes)++;
4135 return rel_inplace_setop(rel, ul, ur, op_union,
4136 rel_projections(sql, rel, NULL, 1, 1));
4137 }
4138 }
4139 }
4140 }
4141
4142 u = rel_setop(sql->sa, ul, ur, op_union);
4143 u->exps = rel_projections(sql, ul, NULL, 1, 1);
4144 set_processed(u);
4145
4146 if (rel->r) {
4147 list *ogbe = rel->r;
4148
4149 gbe = new_exp_list(sql->sa);
4150 for (n = ogbe->h; n; n = n->next) {
4151 sql_exp *e = n->data, *ne;
4152
4153 ne = exp_uses_exp( rel->exps, e);
4154 //assert(ne);
4155 if (!ne)
4156 ne = e;
4157 ne = list_find_exp( u->exps, ne);
4158 assert(ne);
4159 ne = exp_column(sql->sa, exp_find_rel_name(ne), exp_name(ne), exp_subtype(ne), ne->card, has_nil(ne), is_intern(ne));
4160 append(gbe, ne);
4161 }
4162 }
4163 exps = new_exp_list(sql->sa);
4164 for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) {
4165 sql_exp *ne, *e = n->data, *oa = m->data;
4166
4167 if (oa->type == e_aggr) {
4168 sql_subaggr *f = oa->f;
4169 int cnt = exp_aggr_is_count(oa);
4170 sql_subaggr *a = sql_bind_aggr(sql->sa, sql->session->schema, (cnt)?"sum":f->aggr->base.name, exp_subtype(e));
4171
4172 assert(a);
4173 /* union of aggr result may have nils
4174 * because sum/count of empty set */
4175 set_has_nil(e);
4176 e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
4177 ne = exp_aggr1(sql->sa, e, a, need_distinct(e), 1, e->card, 1);
4178 if (/* DISABLES CODE */ (0) && cnt)
4179 ne->p = prop_create(sql->sa, PROP_COUNT, ne->p);
4180 } else {
4181 ne = exp_copy(sql, oa);
4182 }
4183 exp_setname(sql->sa, ne, exp_find_rel_name(oa), exp_name(oa));
4184 append(exps, ne);
4185 }
4186 (*changes)++;
4187 return rel_inplace_groupby( rel, u, gbe, exps);
4188 }
4189 return rel;
4190}
4191
4192/*
4193 * More general
4194 * groupby(
4195 * [ outer ] join(
4196 * project(
4197 * table(A) [ c1, c2, .. ]
4198 * ) [ c1, c2, identity(c2) as I, .. ],
4199 * table(B) [ c1, c2, .. ]
4200 * ) [ A.c1 = B.c1 ]
4201 * ) [ I ] [ a1, a2, .. ]
4202 *
4203 * ->
4204 *
4205 * [ outer ] join(
4206 * project(
4207 * table(A) [ c1, c2, .. ]
4208 * ) [ c1, c2, .. ],
4209 * groupby (
4210 * table(B) [ c1, c2, .. ]
4211 * ) [ B.c1 ] [ a1, a2, .. ]
4212 * ) [ A.c1 = B.c1 ]
4213 */
4214static sql_rel *
4215gen_push_groupby_down(int *changes, mvc *sql, sql_rel *rel)
4216{
4217 sql_rel *j = rel->l;
4218 list *gbe = rel->r;
4219
4220 (void)changes;
4221 if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){ //&& is_join(j->op)) {
4222 sql_rel *jl = j->l, *jr = j->r, *cr, *cl;
4223 sql_exp *gb = gbe->h->data, *e;
4224 node *n;
4225 int left = 1;
4226 list *aggrs, *aliases, *gbe;
4227
4228 if (!is_identity(gb, jl) && !is_identity(gb, jr))
4229 return rel;
4230 if (jl->op == op_project &&
4231 (e = list_find_exp( jl->exps, gb)) != NULL &&
4232 find_prop(e->p, PROP_HASHCOL) != NULL) {
4233 left = 0;
4234 cr = jr;
4235 cl = jl;
4236 } else if (jr->op == op_project &&
4237 (e = list_find_exp( jr->exps, gb)) != NULL &&
4238 find_prop(e->p, PROP_HASHCOL) != NULL) {
4239 left = 1;
4240 cr = jl;
4241 cl = jr;
4242 } else {
4243 return rel;
4244 }
4245
4246 if ((left && is_base(jl->op)) || (!left && is_base(jr->op))||
4247 (left && is_select(jl->op)) || (!left && is_select(jr->op))
4248 || rel_is_join_on_pkey(j))
4249 return rel;
4250
4251 /* only add aggr (based on left/right), and repeat the group by column */
4252 aggrs = sa_list(sql->sa);
4253 aliases = sa_list(sql->sa);
4254 if (rel->exps) for (n = rel->exps->h; n; n = n->next) {
4255 sql_exp *ce = n->data;
4256
4257 if (exp_is_atom(ce))
4258 list_append(aliases, ce);
4259 else if (ce->type == e_column) {
4260 if (rel_has_exp(cl, ce) == 0) /* collect aliases outside groupby */
4261 list_append(aliases, ce);
4262 else
4263 list_append(aggrs, ce);
4264 } else if (ce->type == e_aggr) {
4265 list *args = ce->l;
4266
4267 /* check args are part of left/right */
4268 if (!list_empty(args) && rel_has_exps(cl, args) == 0)
4269 return rel;
4270 if (rel->op != op_join && exp_aggr_is_count(ce))
4271 ce->p = prop_create(sql->sa, PROP_COUNT, ce->p);
4272 list_append(aggrs, ce);
4273 }
4274 }
4275 /* TODO move any column expressions (aliases) into the project list */
4276
4277 /* find gb in left or right and should be unique */
4278 gbe = sa_list(sql->sa);
4279 /* push groupby to right, group on join exps */
4280 if (j->exps) for (n = j->exps->h; n; n = n->next) {
4281 sql_exp *ce = n->data, *e;
4282
4283 /* get left/right hand of e_cmp */
4284 assert(ce->type == e_cmp);
4285 if (ce->flag != cmp_equal)
4286 return rel;
4287 e = rel_find_exp(cr, ce->l);
4288 if (!e)
4289 e = rel_find_exp(cr, ce->r);
4290 if (!e)
4291 return rel;
4292 e = exp_ref(sql->sa, e);
4293 list_append(gbe, e);
4294 }
4295 if (!left)
4296 cr = j->r = rel_groupby(sql, cr, gbe);
4297 else
4298 cr = j->l = rel_groupby(sql, cr, gbe);
4299 cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL);
4300 if (!is_project(cl->op))
4301 cl = rel_project(sql->sa, cl,
4302 rel_projections(sql, cl, NULL, 1, 1));
4303 cl->exps = list_merge(cl->exps, aliases, (fdup)NULL);
4304 if (!left)
4305 j->l = cl;
4306 else
4307 j->r = cl;
4308 rel -> l = NULL;
4309 rel_destroy(rel);
4310
4311 if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */
4312 sql_rel *r = cl;
4313 if (!left)
4314 j->l = NULL;
4315 else
4316 j->r = NULL;
4317 rel_destroy(j);
4318 j = r;
4319 }
4320 return j;
4321 }
4322 return rel;
4323}
4324
4325/*
4326 * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ]
4327 * into
4328 * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n]
4329 *
4330 */
4331static sql_rel *
4332rel_push_groupby_down(int *changes, mvc *sql, sql_rel *rel)
4333{
4334 sql_rel *p = rel->l;
4335 list *gbe = rel->r;
4336
4337 if (rel->op == op_groupby && gbe && p && is_join(p->op))
4338 return gen_push_groupby_down(changes, sql, rel);
4339 if (rel->op == op_groupby && gbe && p && p->op == op_project) {
4340 sql_rel *j = p->l;
4341 sql_rel *jl, *jr;
4342 node *n;
4343
4344 if (!j || j->op != op_join || list_length(j->exps) != 1)
4345 return gen_push_groupby_down(changes, sql, rel);
4346 jl = j->l;
4347 jr = j->r;
4348
4349 /* check if jr is a dict with index and var still used */
4350 if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2)
4351 return gen_push_groupby_down(changes, sql, rel);
4352
4353 /* check if group by is done on dict column */
4354 for(n = gbe->h; n; n = n->next) {
4355 sql_exp *ge = n->data, *pe = NULL, *e = NULL;
4356
4357 /* find group by exp in project, then in dict */
4358 pe = rel_find_exp(p, ge);
4359 if (pe) /* find project exp in right hand of join, ie dict */
4360 e = rel_find_exp(jr, pe);
4361 if (pe && e) { /* Rewrite: join with dict after the group by */
4362 list *pexps = rel_projections(sql, rel, NULL, 1, 1), *npexps;
4363 node *m;
4364 sql_exp *ne = j->exps->h->data; /* join exp */
4365 p->l = jl; /* Project now only on the left side of the join */
4366
4367 ne = ne->l; /* The left side of the compare is the index of the left */
4368
4369 /* find ge reference in new projection list */
4370 npexps = sa_list(sql->sa);
4371 for (m = pexps->h; m; m = m->next) {
4372 sql_exp *a = m->data;
4373
4374 if (exp_refers(ge, a)) {
4375 sql_exp *sc = jr->exps->t->data;
4376 sql_exp *e = exp_ref(sql->sa, sc);
4377 exp_setname(sql->sa, e, exp_relname(a), exp_name(a));
4378 a = e;
4379 }
4380 append(npexps, a);
4381 }
4382
4383 /* find ge in aggr list */
4384 for (m = rel->exps->h; m; m = m->next) {
4385 sql_exp *a = m->data;
4386
4387 if (exp_match_exp(a, ge) || exp_refers(ge, a)) {
4388 a = exp_ref(sql->sa, ne);
4389 exp_setname(sql->sa, a, exp_relname(ne), exp_name(ne));
4390 m->data = a;
4391 }
4392 }
4393
4394 /* change alias pe, ie project out the index */
4395 pe->l = (void*)exp_relname(ne);
4396 pe->r = (void*)exp_name(ne);
4397 exp_setname(sql->sa, pe, exp_relname(ne), exp_name(ne));
4398
4399 /* change alias ge */
4400 ge->l = (void*)exp_relname(pe);
4401 ge->r = (void*)exp_name(pe);
4402 exp_setname(sql->sa, ge, exp_relname(pe), exp_name(pe));
4403
4404 /* zap both project and groupby name hash tables (as we changed names above) */
4405 rel->exps->ht = NULL;
4406 ((list*)rel->r)->ht = NULL;
4407 p->exps->ht = NULL;
4408
4409 /* add join */
4410 j->l = rel;
4411 rel = rel_project(sql->sa, j, npexps);
4412 (*changes)++;
4413 }
4414 }
4415 (void)sql;
4416 }
4417 return rel;
4418}
4419
4420/*
4421 * Push select down, pushes the selects through (simple) projections. Also
4422 * it cleans up the projections which become useless.
4423 */
4424
4425/* TODO push select expressions in outer joins down */
4426static sql_rel *
4427rel_push_select_down(int *changes, mvc *sql, sql_rel *rel)
4428{
4429 list *exps = NULL;
4430 sql_rel *r = NULL;
4431 node *n;
4432
4433 if (rel_is_ref(rel)) {
4434 if (is_select(rel->op) && rel->exps) {
4435 /* add inplace empty select */
4436 sql_rel *l = rel_select(sql->sa, rel->l, NULL);
4437
4438 if (!l->exps)
4439 l->exps = sa_list(sql->sa);
4440 (void)list_merge(l->exps, rel->exps, (fdup)NULL);
4441 rel->exps = NULL;
4442 rel->l = l;
4443 (*changes)++;
4444 }
4445 return rel;
4446 }
4447
4448 /* don't make changes for empty selects */
4449 if (is_select(rel->op) && (!rel->exps || list_length(rel->exps) == 0))
4450 return rel;
4451
4452 /* merge 2 selects */
4453 r = rel->l;
4454 if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r))) {
4455 (void)list_merge(r->exps, rel->exps, (fdup)NULL);
4456 rel->l = NULL;
4457 rel_destroy(rel);
4458 (*changes)++;
4459 return rel_push_select_down(changes, sql, r);
4460 }
4461 /*
4462 * Push select through semi/anti join
4463 * select (semi(A,B)) == semi(select(A), B)
4464 */
4465 if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) {
4466 rel->l = r->l;
4467 r->l = rel;
4468 (*changes)++;
4469 /*
4470 * if A has 2 references (ie used on both sides of
4471 * the semi join), we also push the select into A.
4472 */
4473 if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){
4474 sql_rel *lx = rel->l;
4475 sql_rel *rx = r->r;
4476 if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) {
4477 while (rx->l && !rel_is_ref(rx->l) &&
4478 (is_project(rx->op) ||
4479 is_select(rx->op) ||
4480 is_join(rx->op)))
4481 rx = rx->l;
4482 /* probably we need to introduce a project */
4483 rel_destroy(rel->l);
4484 lx = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1));
4485 r->l = lx;
4486 rx->l = rel_dup(lx);
4487 }
4488 }
4489 return r;
4490 }
4491 exps = rel->exps;
4492
4493 if (rel->op == op_project &&
4494 r && r->op == op_project && !(rel_is_ref(r)))
4495 return rel_merge_projects(changes, sql, rel);
4496
4497 /* push select through join */
4498 if (is_select(rel->op) && r && is_join(r->op) && !(rel_is_ref(r))) {
4499 sql_rel *jl = r->l;
4500 sql_rel *jr = r->r;
4501 int left = r->op == op_join || r->op == op_left;
4502 int right = r->op == op_join || r->op == op_right;
4503
4504 if (r->op == op_full)
4505 return rel;
4506
4507 /* introduce selects under the join (if needed) */
4508 set_processed(jl);
4509 set_processed(jr);
4510 if (!is_select(jl->op))
4511 r->l = jl = rel_select(sql->sa, jl, NULL);
4512 if (!is_select(jr->op))
4513 r->r = jr = rel_select(sql->sa, jr, NULL);
4514
4515 rel->exps = new_exp_list(sql->sa);
4516 for (n = exps->h; n; n = n->next) {
4517 sql_exp *e = n->data, *ne = NULL;
4518 int done = 0;
4519
4520 if (left)
4521 ne = exp_push_down(sql, e, jl, jl);
4522 if (ne && ne != e) {
4523 done = 1;
4524 rel_select_add_exp(sql->sa, jl, ne);
4525 } else if (right) {
4526 ne = exp_push_down(sql, e, jr, jr);
4527 if (ne && ne != e) {
4528 done = 1;
4529 rel_select_add_exp(sql->sa, jr, ne);
4530 }
4531 }
4532 if (!done)
4533 append(rel->exps, e);
4534 *changes += done;
4535 }
4536 }
4537
4538 /* merge select and cross product ? */
4539 if (is_select(rel->op) && r && r->op == op_join && !(rel_is_ref(r))) {
4540 list *exps = rel->exps;
4541
4542 if (!r->exps)
4543 r->exps = new_exp_list(sql->sa);
4544 rel->exps = new_exp_list(sql->sa);
4545 for (n = exps->h; n; n = n->next) {
4546 sql_exp *e = n->data;
4547
4548 //if (exp_is_join_exp(e) == 0) {
4549 if (exp_is_join(e, NULL) == 0) {
4550 append(r->exps, e);
4551 (*changes)++;
4552 } else {
4553 append(rel->exps, e);
4554 }
4555 }
4556 return rel;
4557 }
4558
4559 if (is_select(rel->op) && r && r->op == op_project && !(rel_is_ref(r))){
4560 list *exps = rel->exps;
4561 sql_rel *pl;
4562 /* we cannot push through rank (row_number etc) functions or
4563 projects with distinct */
4564 if (!r->l || project_unsafe(r,1))
4565 return rel;
4566
4567 /* here we need to fix aliases */
4568 rel->exps = new_exp_list(sql->sa);
4569 pl = r->l;
4570 /* introduce selects under the project (if needed) */
4571 set_processed(pl);
4572 if (!is_select(pl->op) || rel_is_ref(pl))
4573 r->l = pl = rel_select(sql->sa, pl, NULL);
4574
4575 /* for each exp check if we can rename it */
4576 for (n = exps->h; n; n = n->next) {
4577 sql_exp *e = n->data, *ne = NULL;
4578
4579 if (e->type == e_cmp) {
4580 ne = exp_push_down_prj(sql, e, r, pl);
4581
4582 /* can we move it down */
4583 if (ne && ne != e && pl->exps) {
4584 rel_select_add_exp(sql->sa, pl, ne);
4585 (*changes)++;
4586 } else {
4587 append(rel->exps, (ne)?ne:e);
4588 }
4589 } else {
4590 list_append(rel->exps, e);
4591 }
4592 }
4593 return rel;
4594 }
4595 return rel;
4596}
4597
4598static sql_rel *
4599rel_push_select_down_join(int *changes, mvc *sql, sql_rel *rel)
4600{
4601 list *exps = NULL;
4602 sql_rel *r = NULL;
4603 node *n;
4604
4605 exps = rel->exps;
4606 r = rel->l;
4607
4608 /* push select through join */
4609 if (is_select(rel->op) && exps && r && r->op == op_join && !(rel_is_ref(r))) {
4610 rel->exps = new_exp_list(sql->sa);
4611 for (n = exps->h; n; n = n->next) {
4612 sql_exp *e = n->data;
4613 if (e->type == e_cmp && !e->f && !is_complex_exp(e->flag)) {
4614 sql_rel *nr = NULL;
4615 sql_exp *re = e->r, *ne = rel_find_exp(r, re);
4616
4617 if (ne && ne->card >= CARD_AGGR)
4618 re->card = ne->card;
4619
4620 if (re->card >= CARD_AGGR) {
4621 nr = rel_push_join(sql, r, e->l, re, NULL, e);
4622 } else {
4623 nr = rel_push_select(sql, r, e->l, e);
4624 }
4625 if (nr)
4626 rel->l = nr;
4627 /* only pushed down selects are counted */
4628 if (r == rel->l) {
4629 (*changes)++;
4630 } else { /* Do not introduce an extra select */
4631 sql_rel *r = rel->l;
4632
4633 rel->l = r->l;
4634 r->l = NULL;
4635 list_append(rel->exps, e);
4636 rel_destroy(r);
4637 }
4638 assert(r == rel->l);
4639 } else {
4640 list_append(rel->exps, e);
4641 }
4642 }
4643 return rel;
4644 }
4645 return rel;
4646}
4647
4648static sql_rel *
4649rel_remove_empty_select(int *changes, mvc *sql, sql_rel *rel)
4650{
4651 (void)sql;
4652
4653 if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op) || is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op)) && rel->l) {
4654 sql_rel *l = rel->l;
4655 if (is_select(l->op) && !(rel_is_ref(l)) && list_empty(l->exps)) {
4656 rel->l = l->l;
4657 l->l = NULL;
4658 rel_destroy(l);
4659 (*changes)++;
4660 }
4661 }
4662 if ((is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) && rel->r) {
4663 sql_rel *r = rel->r;
4664 if (is_select(r->op) && !(rel_is_ref(r)) && list_empty(r->exps)) {
4665 rel->r = r->l;
4666 r->l = NULL;
4667 rel_destroy(r);
4668 (*changes)++;
4669 }
4670 }
4671 if (is_join(rel->op) && list_empty(rel->exps))
4672 rel->exps = NULL; /* crossproduct */
4673 return rel;
4674}
4675
4676/*
4677 * Push {semi}joins down, pushes the joins through group by expressions.
4678 * When the join is on the group by columns, we can push the joins left
4679 * under the group by. This should only be done, iff the new semijoin would
4680 * reduce the input table to the groupby. So there should be a reduction
4681 * (selection) on the table A and this should be propagated to the groupby via
4682 * for example a primary key.
4683 *
4684 * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ]
4685 * ->
4686 * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ]
4687 */
4688
4689static sql_rel *
4690rel_push_join_down(int *changes, mvc *sql, sql_rel *rel)
4691{
4692 list *exps = NULL;
4693
4694 (void)*changes;
4695 if (!rel_is_ref(rel) && ((is_join(rel->op) || is_semi(rel->op)) && rel->l && rel->exps)) {
4696 sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l;
4697
4698 if (gb->op == op_project)
4699 gb = gb->l;
4700
4701 if (is_basetable(rell->op) || rel_is_ref(rell))
4702 return rel;
4703
4704 exps = rel->exps;
4705 if (gb && gb->op == op_groupby && gb->r && list_length(gb->r)) {
4706 list *jes = new_exp_list(sql->sa);
4707 node *n, *m;
4708 list *gbes = gb->r;
4709 /* find out if all group by expressions are used in the join */
4710 for(n = gbes->h; n; n = n->next) {
4711 sql_exp *gbe = n->data;
4712 int fnd = 0;
4713 const char *rname = NULL, *name = NULL;
4714
4715 /* project in between, ie find alias */
4716 /* first find expression in expression list */
4717 gbe = exp_uses_exp( gb->exps, gbe);
4718 if (!gbe)
4719 continue;
4720 if (ogb != gb)
4721 gbe = exp_uses_exp( ogb->exps, gbe);
4722 if (gbe) {
4723 rname = exp_find_rel_name(gbe);
4724 name = exp_name(gbe);
4725 }
4726
4727 if (!name)
4728 return rel;
4729
4730 for (m = exps->h; m && !fnd; m = m->next) {
4731 sql_exp *je = m->data;
4732
4733 if (je->card >= CARD_ATOM && je->type == e_cmp &&
4734 !is_complex_exp(je->flag)) {
4735 /* expect right expression to match */
4736 sql_exp *r = je->r;
4737
4738 if (r == 0 || r->type != e_column)
4739 continue;
4740 if (r->l && rname && strcmp(r->l, rname) == 0 && strcmp(r->r, name)==0) {
4741 fnd = 1;
4742 } else if (!r->l && !rname && strcmp(r->r, name)==0) {
4743 fnd = 1;
4744 }
4745 if (fnd) {
4746 sql_exp *le = je->l;
4747 sql_exp *re = exp_push_down_prj(sql, r, gb, gb->l);
4748 if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) {
4749 fnd = 0;
4750 } else {
4751 int anti = is_anti(je);
4752
4753 je = exp_compare(sql->sa, le, re, je->flag);
4754 if (anti) set_anti(je);
4755 list_append(jes, je);
4756 }
4757 }
4758 }
4759 }
4760 if (!fnd)
4761 return rel;
4762 }
4763 l = rel_dup(rel->l);
4764
4765 /* push join's left side (as semijoin) down group by */
4766 l = gb->l = rel_crossproduct(sql->sa, gb->l, l, op_semi);
4767 l->exps = jes;
4768 return rel;
4769 }
4770 }
4771 return rel;
4772}
4773
4774/*
4775 * Push semijoins down, pushes the semijoin through a join.
4776 *
4777 * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ]
4778 * ->
4779 * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ]
4780 *
4781 * also push simple expressions of a semijoin down if they only
4782 * involve the left sided of the semijoin.
4783 *
4784 * in some cases the other way is usefull, ie push join down
4785 * semijoin. When the join reduces (ie when there are selects on it).
4786 */
4787static sql_rel *
4788rel_push_semijoin_down_or_up(int *changes, mvc *sql, sql_rel *rel)
4789{
4790 (void)*changes;
4791
4792 if (rel->op == op_join && rel->exps && rel->l) {
4793 sql_rel *l = rel->l, *r = rel->r;
4794
4795 if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
4796 rel->l = l->l;
4797 l->l = rel;
4798 return l;
4799 }
4800 }
4801 /* also case with 2 joins */
4802 /* join ( join ( semijoin(), table), select (table)); */
4803 if (rel->op == op_join && rel->exps && rel->l) {
4804 sql_rel *l = rel->l, *r = rel->r;
4805 sql_rel *ll;
4806
4807 if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) {
4808 ll = l->l;
4809 if (is_semi(ll->op) && !rel_is_ref(ll)) {
4810 l->l = ll->l;
4811 ll->l = rel;
4812 return ll;
4813 }
4814 }
4815 }
4816 /* first push down the expressions involving only A */
4817 if (rel->op == op_semi && rel->exps && rel->l) {
4818 list *exps = rel->exps, *nexps = sa_list(sql->sa);
4819 node *n;
4820
4821 if (nexps == NULL)
4822 return NULL;
4823 for(n = exps->h; n; n = n->next) {
4824 sql_exp *sje = n->data;
4825
4826 if (n != exps->h && sje->type == e_cmp &&
4827 !is_complex_exp(sje->flag) &&
4828 rel_has_exp(rel->l, sje->l) >= 0 &&
4829 rel_has_exp(rel->l, sje->r) >= 0) {
4830 rel->l = rel_select(sql->sa, rel->l, NULL);
4831 rel_select_add_exp(sql->sa, rel->l, sje);
4832 } else {
4833 append(nexps, sje);
4834 }
4835 }
4836 rel->exps = nexps;
4837 }
4838 if (rel->op == op_semi && rel->exps && rel->l) {
4839 operator_type op = rel->op, lop;
4840 node *n;
4841 sql_rel *l = rel->l, *ll = NULL, *lr = NULL;
4842 sql_rel *r = rel->r;
4843 list *exps = rel->exps, *nsexps, *njexps;
4844 int left = 1, right = 1;
4845
4846 /* handle project
4847 if (l->op == op_project && !need_distinct(l))
4848 l = l->l;
4849 */
4850
4851 if (!is_join(l->op) || rel_is_ref(l))
4852 return rel;
4853
4854 lop = l->op;
4855 ll = l->l;
4856 lr = l->r;
4857 /* semijoin shouldn't be based on right relation of join */
4858 for(n = exps->h; n; n = n->next) {
4859 sql_exp *sje = n->data;
4860
4861 if (sje->type != e_cmp)
4862 return rel;
4863 if (right &&
4864 (is_complex_exp(sje->flag) ||
4865 rel_has_exp(lr, sje->l) >= 0 ||
4866 rel_has_exp(lr, sje->r) >= 0)) {
4867 right = 0;
4868 }
4869 if (right)
4870 left = 0;
4871 if (!right && left &&
4872 (is_complex_exp(sje->flag) ||
4873 rel_has_exp(ll, sje->l) >= 0 ||
4874 rel_has_exp(ll, sje->r) >= 0)) {
4875 left = 0;
4876 }
4877 if (!right && !left)
4878 return rel;
4879 }
4880 nsexps = exps_copy(sql, rel->exps);
4881 njexps = exps_copy(sql, l->exps);
4882 if (right)
4883 l = rel_crossproduct(sql->sa, rel_dup(ll), rel_dup(r), op);
4884 else
4885 l = rel_crossproduct(sql->sa, rel_dup(lr), rel_dup(r), op);
4886 l->exps = nsexps;
4887 if (right)
4888 l = rel_crossproduct(sql->sa, l, rel_dup(lr), lop);
4889 else
4890 l = rel_crossproduct(sql->sa, l, rel_dup(ll), lop);
4891 l->exps = njexps;
4892 rel_destroy(rel);
4893 rel = l;
4894 }
4895 return rel;
4896}
4897
4898static int
4899rel_part_nr( sql_rel *rel, sql_exp *e )
4900{
4901 sql_column *c;
4902 sql_table *pp;
4903 assert(e->type == e_cmp);
4904
4905 c = exp_find_column(rel, e->l, -1);
4906 if (!c)
4907 c = exp_find_column(rel, e->r, -1);
4908 if (!c)
4909 return -1;
4910 pp = c->t;
4911 if (pp->p)
4912 return list_position(pp->p->members.set, pp);
4913 return -1;
4914}
4915
4916static int
4917rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr )
4918{
4919 sql_column *c;
4920 assert(e->type == e_cmp);
4921
4922 /*
4923 * following case fails.
4924 *
4925 * semijoin( A1, union [A1, A2] )
4926 * The union will never return proper column (from A2).
4927 * ie need different solution (probaly pass pnr).
4928 */
4929 c = exp_find_column(rel, e->l, pnr);
4930 if (!c)
4931 c = exp_find_column(rel, e->r, pnr);
4932 if (c) {
4933 sql_table *pp = c->t;
4934 if (pp->p && list_position(pp->p->members.set, pp) == pnr)
4935 return 1;
4936 }
4937 /* for projects we may need to do a rename! */
4938 if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op))
4939 return rel_uses_part_nr( rel->l, e, pnr);
4940
4941 if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) {
4942 if (rel_uses_part_nr( rel->l, e, pnr))
4943 return 1;
4944 if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr))
4945 return 1;
4946 }
4947 return 0;
4948}
4949
4950static int
4951rel_has_cmp_exp(sql_rel *rel, sql_exp *e)
4952{
4953 if (e->type == e_cmp) {
4954 if (get_cmp(e) == cmp_or) {
4955 return rel_has_exp(rel, e->l) == 0 &&
4956 rel_has_all_exps(rel, e->r);
4957 } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) {
4958 return rel_has_all_exps(rel, e->l) &&
4959 rel_has_all_exps(rel, e->r);
4960 } else {
4961 return rel_has_exp(rel, e->l) == 0 &&
4962 rel_has_exp(rel, e->r) == 0 &&
4963 (!e->f || rel_has_exp(rel, e->f) == 0);
4964 }
4965 }
4966 return 0;
4967}
4968
4969static sql_rel *
4970rel_join_push_exps_down(int *changes, mvc *sql, sql_rel *rel)
4971{
4972 if ((is_join(rel->op) && !is_outerjoin(rel->op)) || is_semi(rel->op)) {
4973 sql_rel *l = rel->l, *r = rel->r;
4974 list *jexps = NULL, *lexps = NULL, *rexps = NULL;
4975 node *n;
4976
4977 if (list_empty(rel->exps))
4978 return rel;
4979
4980 for(n=rel->exps->h; n; n=n->next) {
4981 sql_exp *e = n->data;
4982 int le = rel_has_cmp_exp(l, e);
4983 int re = rel_has_cmp_exp(r, e);
4984
4985 /* select expressions on left */
4986 if (le && !re) {
4987 if (!lexps)
4988 lexps=sa_list(sql->sa);
4989 append(lexps, e);
4990 /* select expressions on right */
4991 } else if (!le && re && (rel->op != op_anti || (e->flag != mark_notin && e->flag != mark_in))) {
4992 if (!rexps)
4993 rexps=sa_list(sql->sa);
4994 append(rexps, e);
4995 } else {
4996 if (!jexps)
4997 jexps=sa_list(sql->sa);
4998 append(jexps, e);
4999 }
5000 }
5001 if (lexps || rexps)
5002 rel->exps = jexps;
5003 if (lexps) {
5004 l = rel->l = rel_select(sql->sa, rel->l, NULL);
5005 l->exps = lexps;
5006 (*changes) = 1;
5007 }
5008 if (rexps) {
5009 r = rel->r = rel_select(sql->sa, rel->r, NULL);
5010 r->exps = rexps;
5011 (*changes) = 1;
5012 }
5013 }
5014 return rel;
5015}
5016
5017/*
5018 * Push (semi)joins down unions, this is basically for merge tables, where
5019 * we know that the fk-indices are split over two clustered merge tables.
5020 */
5021static sql_rel *
5022rel_push_join_down_union(int *changes, mvc *sql, sql_rel *rel)
5023{
5024 if ((is_join(rel->op) && !is_outerjoin(rel->op)) || is_semi(rel->op)) {
5025 sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r;
5026 list *exps = rel->exps;
5027 sql_exp *je = !list_empty(exps)?exps->h->data:NULL;
5028
5029 if (!l || !r || need_distinct(l) || need_distinct(r))
5030 return rel;
5031 if (l->op == op_project)
5032 l = l->l;
5033 if (r->op == op_project)
5034 r = r->l;
5035
5036 /* both sides only if we have a join index */
5037 if (!l || !r ||(is_union(l->op) && is_union(r->op) &&
5038 je && !find_prop(je->p, PROP_JOINIDX) && /* FKEY JOIN */
5039 !rel_is_join_on_pkey(rel))) /* aligned PKEY JOIN */
5040 return rel;
5041 if (is_semi(rel->op) && is_union(l->op) && je && !find_prop(je->p, PROP_JOINIDX))
5042 return rel;
5043
5044 ol->subquery = or->subquery = 0;
5045 if ((is_union(l->op) && !need_distinct(l)) && !is_union(r->op)){
5046 sql_rel *nl, *nr;
5047 sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
5048
5049 /* join(union(a,b), c) -> union(join(a,c), join(b,c)) */
5050 if (!is_project(ll->op))
5051 ll = rel_project(sql->sa, ll,
5052 rel_projections(sql, ll, NULL, 1, 1));
5053 if (!is_project(lr->op))
5054 lr = rel_project(sql->sa, lr,
5055 rel_projections(sql, lr, NULL, 1, 1));
5056 rel_rename_exps(sql, l->exps, ll->exps);
5057 rel_rename_exps(sql, l->exps, lr->exps);
5058 if (l != ol) {
5059 ll = rel_project(sql->sa, ll, NULL);
5060 ll->exps = exps_copy(sql, ol->exps);
5061 lr = rel_project(sql->sa, lr, NULL);
5062 lr->exps = exps_copy(sql, ol->exps);
5063 }
5064 nl = rel_crossproduct(sql->sa, ll, rel_dup(or), rel->op);
5065 nr = rel_crossproduct(sql->sa, lr, rel_dup(or), rel->op);
5066 if (need_no_nil(rel)) {
5067 set_no_nil(nl);
5068 set_no_nil(nr);
5069 }
5070 nl->exps = exps_copy(sql, exps);
5071 nr->exps = exps_copy(sql, exps);
5072 nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1));
5073 nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1));
5074 (*changes)++;
5075 return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1));
5076 } else if (is_union(l->op) && !need_distinct(l) &&
5077 is_union(r->op) && !need_distinct(r)) {
5078 sql_rel *nl, *nr;
5079 sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r);
5080 sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
5081
5082 /* join(union(a,b), union(c,d)) -> union(join(a,c), join(b,d)) */
5083 if (!is_project(ll->op))
5084 ll = rel_project(sql->sa, ll,
5085 rel_projections(sql, ll, NULL, 1, 1));
5086 if (!is_project(lr->op))
5087 lr = rel_project(sql->sa, lr,
5088 rel_projections(sql, lr, NULL, 1, 1));
5089 rel_rename_exps(sql, l->exps, ll->exps);
5090 rel_rename_exps(sql, l->exps, lr->exps);
5091 if (l != ol) {
5092 ll = rel_project(sql->sa, ll, NULL);
5093 ll->exps = exps_copy(sql, ol->exps);
5094 lr = rel_project(sql->sa, lr, NULL);
5095 lr->exps = exps_copy(sql, ol->exps);
5096 }
5097 if (!is_project(rl->op))
5098 rl = rel_project(sql->sa, rl,
5099 rel_projections(sql, rl, NULL, 1, 1));
5100 if (!is_project(rr->op))
5101 rr = rel_project(sql->sa, rr,
5102 rel_projections(sql, rr, NULL, 1, 1));
5103 rel_rename_exps(sql, r->exps, rl->exps);
5104 rel_rename_exps(sql, r->exps, rr->exps);
5105 if (r != or) {
5106 rl = rel_project(sql->sa, rl, NULL);
5107 rl->exps = exps_copy(sql, or->exps);
5108 rr = rel_project(sql->sa, rr, NULL);
5109 rr->exps = exps_copy(sql, or->exps);
5110 }
5111 nl = rel_crossproduct(sql->sa, ll, rl, rel->op);
5112 nr = rel_crossproduct(sql->sa, lr, rr, rel->op);
5113 if (need_no_nil(rel)) {
5114 set_no_nil(nl);
5115 set_no_nil(nr);
5116 }
5117 nl->exps = exps_copy(sql, exps);
5118 nr->exps = exps_copy(sql, exps);
5119 nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1));
5120 nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1));
5121 (*changes)++;
5122 return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1));
5123 } else if (!is_union(l->op) &&
5124 is_union(r->op) && !need_distinct(r) &&
5125 !is_semi(rel->op)) {
5126 sql_rel *nl, *nr;
5127 sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r);
5128
5129 /* join(a, union(b,c)) -> union(join(a,b), join(a,c)) */
5130 if (!is_project(rl->op))
5131 rl = rel_project(sql->sa, rl,
5132 rel_projections(sql, rl, NULL, 1, 1));
5133 if (!is_project(rr->op))
5134 rr = rel_project(sql->sa, rr,
5135 rel_projections(sql, rr, NULL, 1, 1));
5136 rel_rename_exps(sql, r->exps, rl->exps);
5137 rel_rename_exps(sql, r->exps, rr->exps);
5138 if (r != or) {
5139 rl = rel_project(sql->sa, rl, NULL);
5140 rl->exps = exps_copy(sql, or->exps);
5141 rr = rel_project(sql->sa, rr, NULL);
5142 rr->exps = exps_copy(sql, or->exps);
5143 }
5144 nl = rel_crossproduct(sql->sa, rel_dup(ol), rl, rel->op);
5145 nr = rel_crossproduct(sql->sa, rel_dup(ol), rr, rel->op);
5146 if (need_no_nil(rel)) {
5147 set_no_nil(nl);
5148 set_no_nil(nr);
5149 }
5150 nl->exps = exps_copy(sql, exps);
5151 nr->exps = exps_copy(sql, exps);
5152 nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1));
5153 nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1));
5154 (*changes)++;
5155 return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1));
5156 /* {semi}join ( A1, union (A2, B)) [A1.partkey = A2.partkey] ->
5157 * {semi}join ( A1, A2 )
5158 * and
5159 * {semi}join ( A1, union (B, A2)) [A1.partkey = A2.partkey] ->
5160 * {semi}join ( A1, A2 )
5161 * (ie a single part on the left)
5162 *
5163 * Howto detect that a relation isn't matching.
5164 *
5165 * partitioning is currently done only on pkey/fkey's
5166 * ie only matching per part if join is on pkey/fkey (parts)
5167 *
5168 * and part numbers should match.
5169 *
5170 * */
5171 } else if (!is_union(l->op) &&
5172 is_union(r->op) && !need_distinct(r) &&
5173 is_semi(rel->op) && rel_is_join_on_pkey(rel)) {
5174 /* use first join expression, to find part nr */
5175 sql_exp *je = rel->exps->h->data;
5176 int lpnr = rel_part_nr(l, je);
5177 sql_rel *rl = r->l;
5178 sql_rel *rr = r->r;
5179
5180 if (lpnr < 0)
5181 return rel;
5182 /* case 1: uses left not right */
5183 if (rel_uses_part_nr(rl, je, lpnr) &&
5184 !rel_uses_part_nr(rr, je, lpnr)) {
5185 sql_rel *nl;
5186
5187 rl = rel_dup(rl);
5188 if (!is_project(rl->op))
5189 rl = rel_project(sql->sa, rl,
5190 rel_projections(sql, rl, NULL, 1, 1));
5191 rel_rename_exps(sql, r->exps, rl->exps);
5192 if (r != or) {
5193 rl = rel_project(sql->sa, rl, NULL);
5194 rl->exps = exps_copy(sql, or->exps);
5195 }
5196 nl = rel_crossproduct(sql->sa, rel_dup(ol), rl, rel->op);
5197 if (need_no_nil(rel))
5198 set_no_nil(nl);
5199 nl->exps = exps_copy(sql, exps);
5200 (*changes)++;
5201 return rel_inplace_project(sql->sa, rel, nl, rel_projections(sql, rel, NULL, 1, 1));
5202 /* case 2: uses right not left */
5203 } else if (!rel_uses_part_nr(rl, je, lpnr) &&
5204 rel_uses_part_nr(rr, je, lpnr)) {
5205 sql_rel *nl;
5206
5207 rr = rel_dup(rr);
5208 if (!is_project(rr->op))
5209 rr = rel_project(sql->sa, rr,
5210 rel_projections(sql, rr, NULL, 1, 1));
5211 rel_rename_exps(sql, r->exps, rr->exps);
5212 if (r != or) {
5213 rr = rel_project(sql->sa, rr, NULL);
5214 rr->exps = exps_copy(sql, or->exps);
5215 }
5216 nl = rel_crossproduct(sql->sa, rel_dup(ol), rr, rel->op);
5217 if (need_no_nil(rel))
5218 set_no_nil(nl);
5219 nl->exps = exps_copy(sql, exps);
5220 (*changes)++;
5221 return rel_inplace_project(sql->sa, rel, nl, rel_projections(sql, rel, NULL, 1, 1));
5222 }
5223 }
5224 }
5225 return rel;
5226}
5227
5228static int
5229rel_is_empty( sql_rel *rel )
5230{
5231 if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) {
5232 sql_rel *l = rel->l, *r = rel->r;
5233
5234 if (rel_is_empty(l) || ((is_join(rel->op) || is_semi(rel->op)) && rel_is_empty(r)))
5235 return 1;
5236 /* check */
5237 if (rel_is_join_on_pkey(rel)) {
5238 sql_exp *je = rel->exps->h->data;
5239 int lpnr = rel_part_nr(l, je);
5240
5241 if (lpnr >= 0 && !rel_uses_part_nr(r, je, lpnr))
5242 return 1;
5243 }
5244 }
5245 if (!is_union(rel->op)) {
5246 if (is_simple_project(rel->op) || is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
5247 if (rel->l)
5248 return rel_is_empty(rel->l);
5249 } else if (is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) {
5250 int empty = 1;
5251 if (rel->l)
5252 empty &= rel_is_empty(rel->l);
5253 if (empty && rel->r)
5254 empty &= rel_is_empty(rel->r);
5255 return empty;
5256 }
5257 }
5258 return 0;
5259}
5260
5261/* non overlapping partitions should be removed */
5262static sql_rel *
5263rel_remove_empty_join(mvc *sql, sql_rel *rel, int *changes)
5264{
5265 /* recurse check rel_is_empty
5266 * For half empty unions replace by projects
5267 * */
5268 if (is_union(rel->op)) {
5269 sql_rel *l = rel->l, *r = rel->r;
5270
5271 rel->l = l = rel_remove_empty_join(sql, l, changes);
5272 rel->r = r = rel_remove_empty_join(sql, r, changes);
5273 if (rel_is_empty(l)) {
5274 (*changes)++;
5275 return rel_inplace_project(sql->sa, rel, rel_dup(r), rel->exps);
5276 } else if (rel_is_empty(r)) {
5277 (*changes)++;
5278 return rel_inplace_project(sql->sa, rel, rel_dup(l), rel->exps);
5279 }
5280 } else if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_topn(rel->op) ||
5281 is_select(rel->op) || is_sample(rel->op))) {
5282 if (rel->l)
5283 rel->l = rel_remove_empty_join(sql, rel->l, changes);
5284 } else if (is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) {
5285 if (rel->l)
5286 rel->l = rel_remove_empty_join(sql, rel->l, changes);
5287 if (rel->r)
5288 rel->r = rel_remove_empty_join(sql, rel->r, changes);
5289 }
5290 return rel;
5291}
5292
5293typedef struct {
5294 sql_rel *p; /* the found join's parent */
5295 sql_rel *j; /* the found join relation itself */
5296} found_join;
5297
5298static void
5299rel_find_joins(mvc *sql, sql_rel *parent, sql_rel *rel, list *l, int depth)
5300{
5301 if (!rel || depth == 5) /* limit to 5 relations bellow in the tree */
5302 return;
5303
5304 switch (rel->op) {
5305 case op_basetable:
5306 case op_table:
5307 case op_ddl:
5308 break;
5309 case op_join:
5310 case op_left:
5311 case op_right:
5312 case op_full:
5313 case op_semi:
5314 case op_anti: {
5315 found_join *fl = SA_NEW(sql->sa, found_join);
5316 fl->p = parent;
5317 fl->j = rel;
5318 list_append(l, fl);
5319
5320 if (rel->l)
5321 rel_find_joins(sql, rel, rel->l, l, depth + 1);
5322 if (rel->r)
5323 rel_find_joins(sql, rel, rel->r, l, depth + 1);
5324 } break;
5325 case op_union:
5326 case op_inter:
5327 case op_except: {
5328 if (rel->l)
5329 rel_find_joins(sql, rel, rel->l, l, depth + 1);
5330 if (rel->r)
5331 rel_find_joins(sql, rel, rel->r, l, depth + 1);
5332 } break;
5333 case op_groupby:
5334 case op_project:
5335 case op_select:
5336 case op_topn:
5337 case op_sample: {
5338 if (rel->l)
5339 rel_find_joins(sql, rel, rel->l, l, depth + 1);
5340 } break;
5341 case op_insert:
5342 case op_update:
5343 case op_delete:
5344 case op_truncate: {
5345 if (rel->r)
5346 rel_find_joins(sql, rel, rel->r, l, depth + 1);
5347 } break;
5348 }
5349}
5350
5351/* find identical joins in diferent branches of the relational plan and merge them together */
5352static sql_rel *
5353rel_merge_identical_joins(int *changes, mvc *sql, sql_rel *rel)
5354{
5355 if (is_joinop(rel->op) && rel->l && rel->r) {
5356 list *l1 = sa_list(sql->sa), *l2 = sa_list(sql->sa);
5357
5358 rel_find_joins(sql, rel, rel->l, l1, 0);
5359 rel_find_joins(sql, rel, rel->r, l2, 0);
5360
5361 if (list_length(l1) && list_length(l2)) { /* found joins on both */
5362 for (node *n1 = l1->h ; n1; n1 = n1->next) {
5363 found_join *f1 = (found_join*) n1->data;
5364 for (node *n2 = l2->h ; n2; n2 = n2->next) {
5365 found_join *f2 = (found_join*) n2->data;
5366 sql_rel *j1 = f1->j, *j2 = f2->j, *j1_l = j1->l, *j1_r = j1->r, *j2_l = j2->l, *j2_r = j2->r;
5367 bool sides_equal = false;
5368
5369 if (j1 != j2) {
5370 const char *j1_ln = rel_name(j1_l), *j1_rn = rel_name(j1_r), *j2_ln = rel_name(j2_l), *j2_rn = rel_name(j2_r);
5371
5372 /* So far it looks on identical relations and common basetable relations */
5373 if ((j1_l == j2_l || (is_basetable(j1_l->op) && is_basetable(j2_l->op) && strcmp(j1_ln, j2_ln) == 0 && j1_l->l == j2_l->l)) &&
5374 (j1_r == j2_r || (is_basetable(j1_r->op) && is_basetable(j2_r->op) && strcmp(j1_rn, j2_rn) == 0 && j1_r->l == j2_r->l)))
5375 sides_equal = true;
5376 else if ((j1_l == j2_r || (is_basetable(j1_l->op) && is_basetable(j2_r->op) && strcmp(j1_ln, j2_rn) == 0 && j1_l->l == j2_r->l)) &&
5377 (j1_r == j2_l || (is_basetable(j1_r->op) && is_basetable(j2_l->op) && strcmp(j1_rn, j2_ln) == 0 && j1_r->l == j2_l->l)))
5378 sides_equal = true;
5379
5380 /* the left and right sides are equal */
5381 if (sides_equal && exp_match_list(j1->exps, j2->exps)) {
5382 sql_rel *p2 = f2->p;
5383
5384 if (p2->l == j2) {/* replace j2's parent join with j1 */
5385 rel_destroy(p2->l);
5386 p2->l = rel_dup(j1);
5387 } else {
5388 rel_destroy(p2->r);
5389 p2->r = rel_dup(j1);
5390 }
5391 (*changes)++;
5392 return rel;
5393 }
5394 }
5395 }
5396 }
5397 }
5398 }
5399 return rel;
5400}
5401
5402static sql_rel *
5403rel_push_select_down_union(int *changes, mvc *sql, sql_rel *rel)
5404{
5405 if (is_select(rel->op) && rel->l && rel->exps) {
5406 sql_rel *u = rel->l, *ou = u;
5407 sql_rel *s = rel;
5408 sql_rel *ul = u->l;
5409 sql_rel *ur = u->r;
5410
5411 if (u->op == op_project)
5412 u = u->l;
5413
5414 if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u))
5415 return rel;
5416
5417 ul = u->l;
5418 ur = u->r;
5419
5420 rel->subquery = 0;
5421 u->subquery = 0;
5422 ul->subquery = 0;
5423 ur->subquery = 0;
5424 ul = rel_dup(ul);
5425 ur = rel_dup(ur);
5426 if (!is_project(ul->op))
5427 ul = rel_project(sql->sa, ul,
5428 rel_projections(sql, ul, NULL, 1, 1));
5429 if (!is_project(ur->op))
5430 ur = rel_project(sql->sa, ur,
5431 rel_projections(sql, ur, NULL, 1, 1));
5432 rel_rename_exps(sql, u->exps, ul->exps);
5433 rel_rename_exps(sql, u->exps, ur->exps);
5434
5435 if (u != ou) {
5436 ul = rel_project(sql->sa, ul, NULL);
5437 ul->exps = exps_copy(sql, ou->exps);
5438 rel_rename_exps(sql, ou->exps, ul->exps);
5439 ur = rel_project(sql->sa, ur, NULL);
5440 ur->exps = exps_copy(sql, ou->exps);
5441 rel_rename_exps(sql, ou->exps, ur->exps);
5442 }
5443
5444 /* introduce selects under the set (if needed) */
5445 set_processed(ul);
5446 set_processed(ur);
5447 ul = rel_select(sql->sa, ul, NULL);
5448 ur = rel_select(sql->sa, ur, NULL);
5449
5450 ul->exps = exps_copy(sql, s->exps);
5451 ur->exps = exps_copy(sql, s->exps);
5452
5453 rel = rel_inplace_setop(rel, ul, ur, op_union, rel_projections(sql, rel, NULL, 1, 1));
5454 (*changes)++;
5455 return rel;
5456 }
5457 return rel;
5458}
5459
5460static sql_rel *
5461rel_push_project_down_union(int *changes, mvc *sql, sql_rel *rel)
5462{
5463 /* first remove distinct if already unique */
5464 if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(sql, rel, rel->exps))
5465 set_nodistinct(rel);
5466
5467 if (rel->op == op_project && rel->l && rel->exps && !rel->r) {
5468 int need_distinct = need_distinct(rel);
5469 sql_rel *u = rel->l;
5470 sql_rel *p = rel;
5471 sql_rel *ul = u->l;
5472 sql_rel *ur = u->r;
5473
5474 if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0))
5475 return rel;
5476 /* don't push project down union of single values */
5477 if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && !ur->l))
5478 return rel;
5479
5480 rel->subquery = 0;
5481 u->subquery = 0;
5482 ul = rel_dup(ul);
5483 ur = rel_dup(ur);
5484
5485 if (!is_project(ul->op))
5486 ul = rel_project(sql->sa, ul,
5487 rel_projections(sql, ul, NULL, 1, 1));
5488 if (!is_project(ur->op))
5489 ur = rel_project(sql->sa, ur,
5490 rel_projections(sql, ur, NULL, 1, 1));
5491 need_distinct = (need_distinct &&
5492 (!exps_unique(sql, ul, ul->exps) ||
5493 !exps_unique(sql, ur, ur->exps)));
5494 rel_rename_exps(sql, u->exps, ul->exps);
5495 rel_rename_exps(sql, u->exps, ur->exps);
5496
5497 /* introduce projects under the set */
5498 ul = rel_project(sql->sa, ul, NULL);
5499 if (need_distinct)
5500 set_distinct(ul);
5501 ur = rel_project(sql->sa, ur, NULL);
5502 if (need_distinct)
5503 set_distinct(ur);
5504
5505 ul->exps = exps_copy(sql, p->exps);
5506 ur->exps = exps_copy(sql, p->exps);
5507
5508 rel = rel_inplace_setop(rel, ul, ur, op_union,
5509 rel_projections(sql, rel, NULL, 1, 1));
5510 if (need_distinct)
5511 set_distinct(rel);
5512 (*changes)++;
5513 rel->l = rel_merge_projects(changes, sql, rel->l);
5514 rel->r = rel_merge_projects(changes, sql, rel->r);
5515 return rel;
5516 }
5517 return rel;
5518}
5519
5520/* Compute the efficiency of using this expression early in a group by list */
5521static int
5522score_gbe( mvc *sql, sql_rel *rel, sql_exp *e)
5523{
5524 int res = 10;
5525 sql_subtype *t = exp_subtype(e);
5526 sql_column *c = NULL;
5527
5528 /* can we find out if the underlying table is sorted */
5529 if ( (c = exp_find_column(rel, e, -2)) != NULL) {
5530 if (mvc_is_sorted (sql, c))
5531 res += 500;
5532 }
5533
5534 /* is the column selective */
5535
5536 /* prefer the shorter var types over the longer onces */
5537 if (!EC_FIXED(t->type->eclass) && t->digits)
5538 res -= t->digits;
5539 /* smallest type first */
5540 if (EC_FIXED(t->type->eclass))
5541 res -= t->type->eclass;
5542 return res;
5543}
5544
5545/* reorder group by expressions */
5546static sql_rel *
5547rel_groupby_order(int *changes, mvc *sql, sql_rel *rel)
5548{
5549 list *gbe = rel->r;
5550
5551 (void)*changes;
5552 if (is_groupby(rel->op) && list_length(gbe) > 1 && list_length(gbe)<9) {
5553 node *n;
5554 int i, *scores = calloc(list_length(gbe), sizeof(int));
5555
5556 for (i = 0, n = gbe->h; n; i++, n = n->next)
5557 scores[i] = score_gbe(sql, rel, n->data);
5558 rel->r = list_keysort(gbe, scores, (fdup)NULL);
5559 free(scores);
5560 }
5561 return rel;
5562}
5563
5564
5565/* reduce group by expressions based on pkey info
5566 *
5567 * The reduced group by and (derived) aggr expressions are restored via
5568 * extra (new) aggregate columns.
5569 */
5570static sql_rel *
5571rel_reduce_groupby_exps(int *changes, mvc *sql, sql_rel *rel)
5572{
5573 list *gbe = rel->r;
5574
5575 if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
5576 node *n, *m;
5577 int8_t *scores = malloc(list_length(gbe));
5578 int k, j, i;
5579 sql_column *c;
5580 sql_table **tbls;
5581 sql_rel **bts, *bt = NULL;
5582
5583 gbe = rel->r;
5584 tbls = (sql_table**)malloc(sizeof(sql_table*)*list_length(gbe));
5585 bts = (sql_rel**)malloc(sizeof(sql_rel*)*list_length(gbe));
5586 if (scores == NULL || tbls == NULL || bts == NULL) {
5587 if (scores)
5588 free(scores);
5589 if (tbls)
5590 free(tbls);
5591 if (bts)
5592 free(bts);
5593 return NULL;
5594 }
5595 for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) {
5596 sql_exp *e = n->data;
5597
5598 c = exp_find_column_(rel, e, -2, &bt);
5599 if (c) {
5600 for(j = 0; j < i; j++)
5601 if (c->t == tbls[j] && bts[j] == bt)
5602 break;
5603 tbls[j] = c->t;
5604 bts[j] = bt;
5605 i += (j == i);
5606 }
5607 }
5608 if (i) { /* forall tables find pkey and
5609 remove useless other columns */
5610 /* TODO also remove group by columns which are related to
5611 * the other columns using a foreign-key join (n->1), ie 1
5612 * on the to be removed side.
5613 */
5614 for(j = 0; j < i; j++) {
5615 int l, nr = 0, cnr = 0;
5616
5617 k = list_length(gbe);
5618 memset(scores, 0, list_length(gbe));
5619 if (tbls[j]->pkey) {
5620 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
5621 fcmp cmp = (fcmp)&kc_column_cmp;
5622 sql_exp *e = n->data;
5623
5624 c = exp_find_column_(rel, e, -2, &bt);
5625 if (c && c->t == tbls[j] && bts[j] == bt &&
5626 list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) {
5627 scores[l] = 1;
5628 nr ++;
5629 } else if (c && c->t == tbls[j] && bts[j] == bt) {
5630 /* Okay we can cleanup a group by column */
5631 scores[l] = -1;
5632 cnr ++;
5633 }
5634 }
5635 }
5636 if (nr) {
5637 int all = (list_length(tbls[j]->pkey->k.columns) == nr);
5638 sql_kc *kc = tbls[j]->pkey->k.columns->h->data;
5639
5640 c = kc->c;
5641 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
5642 sql_exp *e = n->data;
5643
5644 /* pkey based group by */
5645 if (scores[l] == 1 && ((all ||
5646 /* first of key */
5647 (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL)))
5648 e->p = prop_create(sql->sa, PROP_HASHCOL, e->p);
5649 }
5650 for (m = rel->exps->h; m; m = m->next ){
5651 sql_exp *e = m->data;
5652
5653 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
5654 sql_exp *gb = n->data;
5655
5656 /* pkey based group by */
5657 if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) {
5658 e->p = prop_create(sql->sa, PROP_HASHCOL, e->p);
5659 break;
5660 }
5661
5662 }
5663 }
5664 }
5665 if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) {
5666 list *ngbe = new_exp_list(sql->sa);
5667 list *exps = rel->exps, *nexps = new_exp_list(sql->sa);
5668
5669 for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) {
5670 sql_exp *e = n->data;
5671
5672 /* keep the group by columns which form a primary key
5673 * of this table. And those unrelated to this table. */
5674 if (scores[l] != -1)
5675 append(ngbe, e);
5676 }
5677 rel->r = ngbe;
5678 /* rewrite gbe and aggr, in the aggr list */
5679 for (m = exps->h; m; m = m->next ){
5680 sql_exp *e = m->data;
5681 int fnd = 0;
5682
5683 for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) {
5684 sql_exp *gb = n->data;
5685
5686 if (scores[l] == -1 && exp_refers(gb, e)) {
5687 sql_exp *rs = exp_column(sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_intern(gb));
5688 exp_setname(sql->sa, rs, exp_find_rel_name(e), exp_name(e));
5689 e = rs;
5690 fnd = 1;
5691 }
5692 }
5693 append(nexps, e);
5694 }
5695 /* new reduced aggr expression list */
5696 assert(list_length(nexps)>0);
5697 rel->exps = nexps;
5698 /* only one reduction at a time */
5699 *changes = 1;
5700 free(bts);
5701 free(tbls);
5702 free(scores);
5703 return rel;
5704 }
5705 gbe = rel->r;
5706 }
5707 }
5708 free(bts);
5709 free(tbls);
5710 free(scores);
5711 }
5712 /* remove constants from group by list */
5713 if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
5714 int i;
5715 node *n;
5716
5717 for (i = 0, n = gbe->h; n; n = n->next) {
5718 sql_exp *e = n->data;
5719
5720 if (exp_is_atom(e))
5721 i++;
5722 }
5723 if (i) {
5724 list *ngbe = new_exp_list(sql->sa);
5725 list *dgbe = new_exp_list(sql->sa);
5726
5727 for (n = gbe->h; n; n = n->next) {
5728 sql_exp *e = n->data;
5729
5730 if (!exp_is_atom(e))
5731 append(ngbe, e);
5732 /* we need at least one gbe */
5733 else if (!n->next && list_empty(ngbe))
5734 append(ngbe, e);
5735 else
5736 append(dgbe, e);
5737 }
5738 rel->r = ngbe;
5739 if (!list_empty(dgbe)) {
5740 /* use atom's directly in the aggr expr list */
5741 list *nexps = new_exp_list(sql->sa);
5742
5743 for (n = rel->exps->h; n; n = n->next) {
5744 sql_exp *e = n->data, *ne = NULL;
5745
5746 if (e->type == e_column) {
5747 if (e->l)
5748 ne = exps_bind_column2(dgbe, e->l, e->r);
5749 else
5750 ne = exps_bind_column(dgbe, e->r, NULL);
5751 if (ne) {
5752 ne = exp_copy(sql, ne);
5753 exp_prop_alias(sql->sa, ne, e);
5754 e = ne;
5755 }
5756 }
5757 append(nexps, e);
5758 }
5759 rel->exps = nexps;
5760 (*changes)++;
5761 }
5762 }
5763 }
5764 return rel;
5765}
5766
5767/* Rewrite group by expressions with distinct
5768 *
5769 * ie select a, count(distinct b) from c where ... groupby a;
5770 * No other aggregations should be present
5771 *
5772 * Rewrite the more general case, good for parallel execution
5773 *
5774 * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d]
5775 *
5776 * into
5777 *
5778 * groupby(
5779 * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d]
5780 * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d)
5781 */
5782
5783#if 0
5784static sql_rel *
5785rel_groupby_distinct2(int *changes, mvc *sql, sql_rel *rel)
5786{
5787 list *ngbes = sa_list(sql->sa), *gbes, *naggrs = sa_list(sql->sa), *aggrs = sa_list(sql->sa);
5788 sql_rel *l;
5789 node *n;
5790
5791 gbes = rel->r;
5792 if (!gbes)
5793 return rel;
5794
5795 /* check if each aggr is, rewritable (max,min,sum,count)
5796 * and only has one argument */
5797 for (n = rel->exps->h; n; n = n->next) {
5798 sql_exp *e = n->data;
5799 sql_subaggr *af = e->f;
5800
5801 if (e->type == e_aggr &&
5802 (strcmp(af->aggr->base.name, "sum") &&
5803 strcmp(af->aggr->base.name, "count") &&
5804 strcmp(af->aggr->base.name, "min") &&
5805 strcmp(af->aggr->base.name, "max")))
5806 return rel;
5807 }
5808
5809 for (n = gbes->h; n; n = n->next) {
5810 sql_exp *e = n->data;
5811
5812 e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
5813 append(ngbes, e);
5814 }
5815
5816 /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list
5817 * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list
5818 * 3 for each group by col, add also to the naggrs list
5819 * */
5820 for (n = rel->exps->h; n; n = n->next) {
5821 sql_exp *e = n->data;
5822
5823 if (e->type == e_aggr && need_distinct(e)) { /* 1 */
5824 /* need column expression */
5825 list *args = e->l;
5826 sql_exp *v = args->h->data;
5827 append(gbes, v);
5828 if (!exp_name(v))
5829 exp_label(sql->sa, v, ++sql->label);
5830 v = exp_column(sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_intern(v));
5831 append(aggrs, v);
5832 v = exp_aggr1(sql->sa, v, e->f, need_distinct(e), 1, e->card, 1);
5833 exp_setname(sql->sa, v, exp_find_rel_name(e), exp_name(e));
5834 append(naggrs, v);
5835 } else if (e->type == e_aggr && !need_distinct(e)) {
5836 sql_exp *v;
5837 sql_subaggr *f = e->f;
5838 int cnt = exp_aggr_is_count(e);
5839 sql_subaggr *a = sql_bind_aggr(sql->sa, sql->session->schema, (cnt)?"sum":f->aggr->base.name, exp_subtype(e));
5840
5841 append(aggrs, e);
5842 if (!exp_name(e))
5843 exp_label(sql->sa, e, ++sql->label);
5844 set_has_nil(e);
5845 v = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
5846 v = exp_aggr1(sql->sa, v, a, 0, 1, e->card, 1);
5847 if (cnt)
5848 set_zero_if_empty(v);
5849 exp_setname(sql->sa, v, exp_find_rel_name(e), exp_name(e));
5850 append(naggrs, v);
5851 } else { /* group by col */
5852 if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) {
5853 append(aggrs, e);
5854
5855 e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
5856 }
5857 append(naggrs, e);
5858 }
5859 }
5860
5861 l = rel->l = rel_groupby(sql, rel->l, gbes);
5862 l->exps = aggrs;
5863 rel->r = ngbes;
5864 rel->exps = naggrs;
5865 (*changes)++;
5866 return rel;
5867}
5868#endif
5869
5870static sql_rel *
5871rel_groupby_distinct(int *changes, mvc *sql, sql_rel *rel)
5872{
5873 if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_empty(rel->r)) {
5874 node *n;
5875
5876 for (n = rel->exps->h; n; n = n->next) {
5877 sql_exp *e = n->data;
5878
5879 if (exp_aggr_is_count(e) && need_distinct(e)) {
5880 /* if count over unique values (ukey/pkey) */
5881 if (e->l && exps_unique(sql, rel, e->l))
5882 set_nodistinct(e);
5883 }
5884 }
5885 }
5886
5887 if (is_groupby(rel->op)) {
5888 sql_rel *l = rel->l;
5889 if (!l || is_groupby(l->op))
5890 return rel;
5891 }
5892 if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
5893 node *n;
5894 int nr = 0;
5895 list *gbe, *ngbe, *arg, *exps, *nexps;
5896 sql_exp *distinct = NULL, *darg;
5897 sql_rel *l = NULL;
5898
5899 for (n=rel->exps->h; n && nr <= 2; n = n->next) {
5900 sql_exp *e = n->data;
5901 if (need_distinct(e)) {
5902 distinct = n->data;
5903 nr++;
5904 }
5905 }
5906 if (nr < 1 || distinct->type != e_aggr)
5907 return rel;
5908 if ((nr > 1 || list_length(rel->r) + nr != list_length(rel->exps)))
5909 return rel;//rel_groupby_distinct2(changes, sql, rel);
5910 arg = distinct->l;
5911 if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps))
5912 return rel;
5913
5914 gbe = rel->r;
5915 ngbe = sa_list(sql->sa);
5916 exps = sa_list(sql->sa);
5917 nexps = sa_list(sql->sa);
5918 for (n=rel->exps->h; n; n = n->next) {
5919 sql_exp *e = n->data;
5920 if (e != distinct) {
5921 e = exp_ref(sql->sa, e);
5922 append(ngbe, e);
5923 append(exps, e);
5924 e = exp_ref(sql->sa, e);
5925 append(nexps, e);
5926 }
5927 }
5928
5929 darg = arg->h->data;
5930 list_append(gbe, darg = exp_copy(sql, darg));
5931 exp_label(sql->sa, darg, ++sql->label);
5932
5933 darg = exp_ref(sql->sa, darg);
5934 list_append(exps, darg);
5935 darg = exp_ref(sql->sa, darg);
5936 arg->h->data = darg;
5937 l = rel->l = rel_groupby(sql, rel->l, gbe);
5938 l->exps = exps;
5939 rel->r = ngbe;
5940 rel->exps = nexps;
5941 set_nodistinct(distinct);
5942 append(nexps, distinct);
5943 (*changes)++;
5944 }
5945 return rel;
5946}
5947
5948static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
5949
5950static void
5951list_split_aggr_and_project(mvc *sql, list *aexps, list *exps)
5952{
5953 node *n;
5954
5955 if (!exps)
5956 return ;
5957 for(n = exps->h; n; n = n->next)
5958 n->data = split_aggr_and_project(sql, aexps, n->data);
5959}
5960
5961static sql_exp *
5962split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e)
5963{
5964 switch(e->type) {
5965 case e_aggr:
5966 /* add to the aggrs */
5967 if (!exp_name(e))
5968 exp_label(sql->sa, e, ++sql->label);
5969 list_append(aexps, e);
5970 return exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
5971 case e_cmp:
5972 /* e_cmp's shouldn't exist in an aggr expression list */
5973 assert(0);
5974 case e_convert:
5975 e->l = split_aggr_and_project(sql, aexps, e->l);
5976 return e;
5977 case e_func:
5978 list_split_aggr_and_project(sql, aexps, e->l);
5979 return e;
5980 case e_column: /* constants and columns shouldn't be rewriten */
5981 case e_atom:
5982 case e_psm:
5983 return e;
5984 }
5985 return NULL;
5986}
5987
5988static sql_exp *
5989exp_use_consts(mvc *sql, sql_exp *e, list *consts);
5990
5991static list *
5992exps_use_consts(mvc *sql, list *exps, list *consts)
5993{
5994 node *n;
5995 list *nl = new_exp_list(sql->sa);
5996
5997 if (!exps)
5998 return sa_list(sql->sa);
5999 for(n = exps->h; n; n = n->next) {
6000 sql_exp *arg = n->data, *narg = NULL;
6001
6002 narg = exp_use_consts(sql, arg, consts);
6003 if (!narg)
6004 return NULL;
6005 narg = exp_propagate(sql->sa, narg, arg);
6006 append(nl, narg);
6007 }
6008 return nl;
6009}
6010
6011static sql_exp *
6012exp_use_consts(mvc *sql, sql_exp *e, list *consts)
6013{
6014 sql_exp *ne = NULL, *l, *r, *r2;
6015
6016 switch(e->type) {
6017 case e_column:
6018 if (e->l)
6019 ne = exps_bind_column2(consts, e->l, e->r);
6020 if (!ne && !e->l)
6021 ne = exps_bind_column(consts, e->r, NULL);
6022 if (!ne)
6023 return e;
6024 return ne;
6025 case e_cmp:
6026 if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) {
6027 list *l = exps_use_consts(sql, e->l, consts);
6028 list *r = exps_use_consts(sql, e->r, consts);
6029
6030 if (!l || !r)
6031 return NULL;
6032 if (get_cmp(e) == cmp_filter)
6033 return exp_filter(sql->sa, l, r, e->f, is_anti(e));
6034 return exp_or(sql->sa, l, r, is_anti(e));
6035 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
6036 sql_exp *l = exp_use_consts(sql, e->l, consts);
6037 list *r = exps_use_consts(sql, e->r, consts);
6038
6039 if (!l || !r)
6040 return NULL;
6041 return exp_in(sql->sa, l, r, e->flag);
6042 } else {
6043 l = exp_use_consts(sql, e->l, consts);
6044 r = exp_use_consts(sql, e->r, consts);
6045 if (e->f) {
6046 r2 = exp_use_consts(sql, e->f, consts);
6047 if (l && r && r2)
6048 ne = exp_compare2(sql->sa, l, r, r2, e->flag);
6049 } else if (l && r) {
6050 ne = exp_compare(sql->sa, l, r, e->flag);
6051 }
6052 }
6053 if (!ne)
6054 return NULL;
6055 return exp_propagate(sql->sa, ne, e);
6056 case e_convert:
6057 l = exp_use_consts(sql, e->l, consts);
6058 if (l)
6059 return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e));
6060 return NULL;
6061 case e_aggr:
6062 case e_func: {
6063 list *l = e->l, *nl = NULL;
6064
6065 if (!l) {
6066 return e;
6067 } else {
6068 nl = exps_use_consts(sql, l, consts);
6069 if (!nl)
6070 return NULL;
6071 }
6072 if (e->type == e_func)
6073 return exp_op(sql->sa, nl, e->f);
6074 else
6075 return exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e));
6076 }
6077 case e_atom:
6078 case e_psm:
6079 return e;
6080 }
6081 return NULL;
6082}
6083
6084static list *
6085exps_remove_dictexps(mvc *sql, list *exps, sql_rel *r)
6086{
6087 node *n;
6088 list *nl = new_exp_list(sql->sa);
6089
6090 if (!exps)
6091 return nl;
6092 for(n = exps->h; n; n = n->next) {
6093 sql_exp *arg = n->data;
6094
6095 if (!list_find_exp(r->exps, arg->l) && !list_find_exp(r->exps, arg->r))
6096 append(nl, arg);
6097 }
6098 return nl;
6099}
6100
6101static sql_rel *
6102rel_remove_join(int *changes, mvc *sql, sql_rel *rel)
6103{
6104 if (is_join(rel->op) && !is_outerjoin(rel->op)) {
6105 sql_rel *l = rel->l;
6106 sql_rel *r = rel->r;
6107 int lconst = 0, rconst = 0;
6108
6109 if (!l || rel_is_ref(l) || !r || rel_is_ref(r) ||
6110 (l->op != op_project && r->op != op_project))
6111 return rel;
6112 if (l->op == op_project && exps_are_atoms(l->exps))
6113 lconst = 1;
6114 if (r->op == op_project && exps_are_atoms(r->exps))
6115 rconst = 1;
6116 if (lconst || rconst) {
6117 (*changes)++;
6118 /* use constant (instead of alias) in expressions */
6119 if (lconst) {
6120 sql_rel *s = l;
6121 l = r;
6122 r = s;
6123 }
6124 rel->exps = exps_use_consts(sql, rel->exps, r->exps);
6125 /* change into select */
6126 rel->op = op_select;
6127 rel->l = l;
6128 rel->r = NULL;
6129 /* wrap in a project including, the constant columns */
6130 l->subquery = 0;
6131 rel = rel_project(sql->sa, rel, rel_projections(sql, l, NULL, 1, 1));
6132 list_merge(rel->exps, r->exps, (fdup)NULL);
6133 }
6134 }
6135 if (is_join(rel->op)) {
6136 sql_rel *l = rel->l;
6137 sql_rel *r = rel->r;
6138 int ldict = 0, rdict = 0;
6139
6140 if (!l || rel_is_ref(l) || !r || rel_is_ref(r) ||
6141 (l->op != op_basetable && r->op != op_basetable))
6142 return rel;
6143 /* check if dict (last column) isn't used, one column only */
6144 if (l->op == op_basetable && !l->l && list_length(l->exps) <= 1)
6145 ldict = 1;
6146 if (r->op == op_basetable && !r->l && list_length(r->exps) <= 1)
6147 rdict = 1;
6148 if (!ldict && !rdict)
6149 return rel;
6150 (*changes)++;
6151
6152 assert(0);
6153 if (ldict) {
6154 sql_rel *s = l;
6155 l = r;
6156 r = s;
6157 }
6158 rel->exps = exps_remove_dictexps(sql, rel->exps, r);
6159 /* change into select */
6160 rel->op = op_select;
6161 rel->l = l;
6162 rel->r = NULL;
6163 /* wrap in a project including, the dict/index columns */
6164 l->subquery = 0;
6165 rel = rel_project(sql->sa, rel, rel_projections(sql, l, NULL, 1, 1));
6166 list_merge(rel->exps, r->exps, (fdup)NULL);
6167 }
6168 /* project (join (A,B)[ A.x = B.y ] ) [project_cols] -> project (A) [project_cols]
6169 * where non of the project_cols are from B and x=y is a foreign key join (B is the unique side)
6170 * and there are no filters on B
6171 */
6172 if (is_project(rel->op)) {
6173 sql_rel *j = rel->l;
6174
6175 if (is_join(j->op)) {
6176 node *n;
6177 sql_rel *l = j->l;
6178 sql_rel *r = j->r;
6179
6180 if (!l || rel_is_ref(l) || !r || rel_is_ref(r) || r->op != op_basetable || r->l)
6181 return rel;
6182
6183 /* check if all projection cols can be found in l */
6184 for(n = rel->exps->h; n; n = n->next) {
6185 sql_exp *e = n->data;
6186
6187 if (!rel_find_exp(l, e))
6188 return rel;
6189
6190 }
6191 assert(0);
6192 (*changes)++;
6193 rel->l = l;
6194 rel->r = NULL;
6195 l->subquery = 0;
6196 }
6197 }
6198 return rel;
6199}
6200
6201/* Pushing projects up the tree. Done very early in the optimizer.
6202 * Makes later steps easier.
6203 */
6204static sql_rel *
6205rel_push_project_up(int *changes, mvc *sql, sql_rel *rel)
6206{
6207 /* project/project cleanup is done later */
6208 if (is_join(rel->op) || is_select(rel->op)) {
6209 node *n;
6210 list *exps = NULL, *l_exps, *r_exps;
6211 sql_rel *l = rel->l;
6212 sql_rel *r = rel->r;
6213 sql_rel *t;
6214
6215 /* Don't rewrite refs, non projections or constant or
6216 order by projections */
6217 if (!l || rel_is_ref(l) ||
6218 (is_join(rel->op) && (!r || rel_is_ref(r))) ||
6219 (is_select(rel->op) && l->op != op_project) ||
6220 (is_join(rel->op) && l->op != op_project && r->op != op_project) ||
6221 ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op)))) ||
6222 (is_join(rel->op) && (is_subquery(r) ||
6223 (r->op == op_project && (!r->l || r->r || project_unsafe(r,0)))))))
6224 return rel;
6225
6226 if (l->op == op_project && l->l) {
6227 /* Go through the list of project expressions.
6228 Check if they can be pushed up, ie are they not
6229 changing or introducing any columns used
6230 by the upper operator. */
6231
6232 exps = new_exp_list(sql->sa);
6233 for (n = l->exps->h; n; n = n->next) {
6234 sql_exp *e = n->data;
6235
6236 /* we cannot rewrite projection with atomic values from outer joins */
6237 if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) {
6238 list_append(exps, e);
6239 } else if (e->type == e_column) {
6240 if (has_label(e))
6241 return rel;
6242 list_append(exps, e);
6243 } else {
6244 return rel;
6245 }
6246 }
6247 } else {
6248 exps = rel_projections(sql, l, NULL, 1, 1);
6249 }
6250 /* also handle right hand of join */
6251 if (is_join(rel->op) && r->op == op_project && r->l) {
6252 /* Here we also check all expressions of r like above
6253 but also we need to check for ambigious names. */
6254
6255 for (n = r->exps->h; n; n = n->next) {
6256 sql_exp *e = n->data;
6257
6258 /* we cannot rewrite projection with atomic values from outer joins */
6259 if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) {
6260 list_append(exps, e);
6261 } else if (e->type == e_column) {
6262 if (has_label(e))
6263 return rel;
6264 list_append(exps, e);
6265 } else {
6266 return rel;
6267 }
6268 }
6269 } else if (is_join(rel->op)) {
6270 list *r_exps = rel_projections(sql, r, NULL, 1, 2);
6271
6272 list_merge(exps, r_exps, (fdup)NULL);
6273 }
6274 /* Here we should check for ambigious names ? */
6275 if (is_join(rel->op) && r) {
6276 t = (l->op == op_project && l->l)?l->l:l;
6277 l_exps = rel_projections(sql, t, NULL, 1, 1);
6278 /* conflict with old right expressions */
6279 r_exps = rel_projections(sql, r, NULL, 1, 1);
6280 for(n = l_exps->h; n; n = n->next) {
6281 sql_exp *e = n->data;
6282 const char *rname = exp_relname(e);
6283 const char *name = exp_name(e);
6284
6285 if (exp_is_atom(e))
6286 continue;
6287 if ((rname && exps_bind_column2(r_exps, rname, name) != NULL) ||
6288 (!rname && exps_bind_column(r_exps, name, NULL) != NULL))
6289 return rel;
6290 }
6291 t = (r->op == op_project && r->l)?r->l:r;
6292 r_exps = rel_projections(sql, t, NULL, 1, 1);
6293 /* conflict with new right expressions */
6294 for(n = l_exps->h; n; n = n->next) {
6295 sql_exp *e = n->data;
6296
6297 if (exp_is_atom(e))
6298 continue;
6299 if ((e->l && exps_bind_column2(r_exps, e->l, e->r) != NULL) ||
6300 (exps_bind_column(r_exps, e->r, NULL) != NULL && (!e->l || !e->r)))
6301 return rel;
6302 }
6303 /* conflict with new left expressions */
6304 for(n = r_exps->h; n; n = n->next) {
6305 sql_exp *e = n->data;
6306
6307 if (exp_is_atom(e))
6308 continue;
6309 if ((e->l && exps_bind_column2(l_exps, e->l, e->r) != NULL) ||
6310 (exps_bind_column(l_exps, e->r, NULL) != NULL && (!e->l || !e->r)))
6311 return rel;
6312 }
6313 }
6314
6315 /* rename operator expressions */
6316 if (l->op == op_project) {
6317 /* rewrite rel from rel->l into rel->l->l */
6318 if (rel->exps) {
6319 list *nexps = new_exp_list(sql->sa);
6320
6321 for (n = rel->exps->h; n; n = n->next) {
6322 sql_exp *e = n->data;
6323
6324 e = exp_rename(sql, e, l, l->l);
6325 assert(e);
6326 list_append(nexps, e);
6327 }
6328 rel->exps = nexps;
6329 }
6330 rel->l = l->l;
6331 l->l = NULL;
6332 rel_destroy(l);
6333 }
6334 if (is_join(rel->op) && r->op == op_project) {
6335 /* rewrite rel from rel->r into rel->r->l */
6336 if (rel->exps) {
6337 list *nexps = new_exp_list(sql->sa);
6338
6339 for (n = rel->exps->h; n; n = n->next) {
6340 sql_exp *e = n->data;
6341
6342 e = exp_rename(sql, e, r, r->l);
6343 assert(e);
6344 list_append(nexps, e);
6345 }
6346 rel->exps = nexps;
6347 }
6348 rel->r = r->l;
6349 r->l = NULL;
6350 rel_destroy(r);
6351 }
6352 /* Done, ie introduce new project */
6353 exps_fix_card(exps, rel->card);
6354 (*changes)++;
6355 return rel_inplace_project(sql->sa, rel, NULL, exps);
6356 }
6357 if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) {
6358 node *n;
6359 int fnd = 0;
6360 list *aexps, *pexps;
6361
6362 /* check if some are expressions aren't e_aggr */
6363 for (n = rel->exps->h; n && !fnd; n = n->next) {
6364 sql_exp *e = n->data;
6365
6366 if (e->type != e_aggr && e->type != e_column && e->type != e_atom) {
6367 fnd = 1;
6368 }
6369 }
6370 /* only aggr, no rewrite needed */
6371 if (!fnd)
6372 return rel;
6373
6374 aexps = sa_list(sql->sa);
6375 pexps = sa_list(sql->sa);
6376 for (n = rel->exps->h; n; n = n->next) {
6377 sql_exp *e = n->data, *ne = NULL;
6378
6379 switch (e->type) {
6380 case e_atom: /* move over to the projection */
6381 list_append(pexps, e);
6382 break;
6383 case e_func:
6384 list_append(pexps, e);
6385 list_split_aggr_and_project(sql, aexps, e->l);
6386 break;
6387 case e_convert:
6388 list_append(pexps, e);
6389 e->l = split_aggr_and_project(sql, aexps, e->l);
6390 break;
6391 default: /* simple alias */
6392 list_append(aexps, e);
6393 ne = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e));
6394 list_append(pexps, ne);
6395 break;
6396 }
6397 }
6398 (*changes)++;
6399 rel->exps = aexps;
6400 return rel_inplace_project( sql->sa, rel, NULL, pexps);
6401 }
6402 return rel;
6403}
6404
6405static int exp_mark_used(sql_rel *subrel, sql_exp *e);
6406
6407static int
6408exps_mark_used(sql_rel *subrel, list *l)
6409{
6410 int nr = 0;
6411 if (list_empty(l))
6412 return nr;
6413
6414 for (node *n = l->h; n != NULL; n = n->next)
6415 nr += exp_mark_used(subrel, n->data);
6416 return nr;
6417}
6418
6419static int
6420exp_mark_used(sql_rel *subrel, sql_exp *e)
6421{
6422 int nr = 0;
6423 sql_exp *ne = NULL;
6424
6425 switch(e->type) {
6426 case e_column:
6427 ne = rel_find_exp(subrel, e);
6428 break;
6429 case e_convert:
6430 return exp_mark_used(subrel, e->l);
6431 case e_aggr:
6432 case e_func: {
6433 if (e->l)
6434 nr += exps_mark_used(subrel, e->l);
6435 break;
6436 }
6437 case e_cmp:
6438 if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) {
6439 nr += exps_mark_used(subrel, e->l);
6440 nr += exps_mark_used(subrel, e->r);
6441 } else if (e->flag == cmp_in || e->flag == cmp_notin) {
6442 nr += exp_mark_used(subrel, e->l);
6443 nr += exps_mark_used(subrel, e->r);
6444 } else {
6445 nr += exp_mark_used(subrel, e->l);
6446 nr += exp_mark_used(subrel, e->r);
6447 if (e->f)
6448 nr += exp_mark_used(subrel, e->f);
6449 }
6450 break;
6451 case e_atom:
6452 /* atoms are used in e_cmp */
6453 e->used = 1;
6454 /* return 0 as constants may require a full column ! */
6455 if (e->f)
6456 nr += exps_mark_used(subrel, e->f);
6457 return nr;
6458 case e_psm:
6459 e->used = 1;
6460 break;
6461 }
6462 if (ne) {
6463 ne->used = 1;
6464 return ne->used;
6465 }
6466 return nr;
6467}
6468
6469static void
6470positional_exps_mark_used( sql_rel *rel, sql_rel *subrel )
6471{
6472 assert(rel->exps);
6473
6474 if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l)
6475 subrel = subrel->l;
6476 /* everything is used within the set operation */
6477 if (rel->exps && subrel->exps) {
6478 node *m;
6479 for (m=subrel->exps->h; m; m = m->next) {
6480 sql_exp *se = m->data;
6481
6482 se->used = 1;
6483 }
6484 }
6485}
6486
6487static void
6488rel_exps_mark_used(sql_allocator *sa, sql_rel *rel, sql_rel *subrel)
6489{
6490 int nr = 0;
6491
6492 if (rel->r && (rel->op == op_project || rel->op == op_groupby)) {
6493 list *l = rel->r;
6494 node *n;
6495
6496 for (n=l->h; n; n = n->next) {
6497 sql_exp *e = n->data;
6498
6499 exp_mark_used(rel, e);
6500 }
6501 }
6502
6503 if (rel->exps) {
6504 node *n;
6505 int len = list_length(rel->exps), i;
6506 sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len);
6507
6508 for (n=rel->exps->h, i = 0; n; n = n->next, i++) {
6509 sql_exp *e = exps[i] = n->data;
6510
6511 nr += e->used;
6512 }
6513
6514 if (!nr && is_project(rel->op)) /* project atleast one column */
6515 exps[0]->used = 1;
6516
6517 for (i = len-1; i >= 0; i--) {
6518 sql_exp *e = exps[i];
6519
6520 if (!is_project(rel->op) || e->used) {
6521 if (is_project(rel->op))
6522 nr += exp_mark_used(rel, e);
6523 nr += exp_mark_used(subrel, e);
6524 }
6525 }
6526 }
6527 /* for count/rank we need atleast one column */
6528 if (subrel && !nr && (is_project(subrel->op) || is_base(subrel->op)) && subrel->exps->h) {
6529 sql_exp *e = subrel->exps->h->data;
6530 e->used = 1;
6531 }
6532 if (rel->r && (rel->op == op_project || rel->op == op_groupby)) {
6533 list *l = rel->r;
6534 node *n;
6535
6536 for (n=l->h; n; n = n->next) {
6537 sql_exp *e = n->data;
6538
6539 // exp_mark_used(rel, e);
6540 /* possibly project/groupby uses columns from the inner */
6541 exp_mark_used(subrel, e);
6542 }
6543 }
6544}
6545
6546static void exps_used(list *l);
6547
6548static void
6549exp_used(sql_exp *e)
6550{
6551 if (e) {
6552 e->used = 1;
6553 if ((e->type == e_func || e->type == e_aggr) && e->l)
6554 exps_used(e->l);
6555 }
6556}
6557
6558static void
6559exps_used(list *l)
6560{
6561 if (l) {
6562 node *n;
6563
6564 for (n = l->h; n; n = n->next)
6565 exp_used(n->data);
6566 }
6567}
6568
6569static void
6570rel_used(sql_rel *rel)
6571{
6572 if (!rel)
6573 return;
6574 if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op)) {
6575 if (rel->l)
6576 rel_used(rel->l);
6577 if (rel->r)
6578 rel_used(rel->r);
6579 } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) {
6580 rel_used(rel->l);
6581 rel = rel->l;
6582 } else if (rel->op == op_table && rel->r) {
6583 exp_used(rel->r);
6584 }
6585 if (rel && rel->exps) {
6586 exps_used(rel->exps);
6587 if (rel->r && (rel->op == op_project || rel->op == op_groupby))
6588 exps_used(rel->r);
6589 }
6590}
6591
6592static void
6593rel_mark_used(mvc *sql, sql_rel *rel, int proj)
6594{
6595 (void)sql;
6596
6597 if (proj && (need_distinct(rel)))
6598 rel_used(rel);
6599
6600 switch(rel->op) {
6601 case op_basetable:
6602 case op_table:
6603
6604 if (rel->op == op_table && rel->l && rel->flag != 2) {
6605 rel_used(rel);
6606 if (rel->r)
6607 exp_mark_used(rel->l, rel->r);
6608 rel_mark_used(sql, rel->l, proj);
6609 }
6610 break;
6611
6612 case op_topn:
6613 case op_sample:
6614 if (proj) {
6615 rel = rel ->l;
6616 rel_mark_used(sql, rel, proj);
6617 break;
6618 }
6619 /* fall through */
6620 case op_project:
6621 case op_groupby:
6622 if (proj && rel->l) {
6623 rel_exps_mark_used(sql->sa, rel, rel->l);
6624 rel_mark_used(sql, rel->l, 0);
6625 } else if (proj) {
6626 rel_exps_mark_used(sql->sa, rel, NULL);
6627 }
6628 break;
6629 case op_update:
6630 case op_delete:
6631 if (proj && rel->r) {
6632 sql_rel *r = rel->r;
6633 if (r->exps && r->exps->h) { /* TID is used */
6634 sql_exp *e = r->exps->h->data;
6635 e->used = 1;
6636 }
6637 rel_exps_mark_used(sql->sa, rel, rel->r);
6638 rel_mark_used(sql, rel->r, 0);
6639 }
6640 break;
6641
6642 case op_insert:
6643 case op_truncate:
6644 case op_ddl:
6645 break;
6646
6647 case op_select:
6648 if (rel->l) {
6649 rel_exps_mark_used(sql->sa, rel, rel->l);
6650 rel_mark_used(sql, rel->l, 0);
6651 }
6652 break;
6653
6654 case op_union:
6655 case op_inter:
6656 case op_except:
6657 /* For now we mark all union expression as used */
6658
6659 /* Later we should (in case of union all) remove unused
6660 * columns from the projection.
6661 *
6662 * Project part of union is based on column position.
6663 */
6664 if (proj && (need_distinct(rel) || !rel->exps)) {
6665 rel_used(rel);
6666 if (!rel->exps) {
6667 rel_used(rel->l);
6668 rel_used(rel->r);
6669 }
6670 rel_mark_used(sql, rel->l, 0);
6671 rel_mark_used(sql, rel->r, 0);
6672 } else if (proj && !need_distinct(rel)) {
6673 sql_rel *l = rel->l;
6674
6675 positional_exps_mark_used(rel, l);
6676 rel_exps_mark_used(sql->sa, rel, l);
6677 rel_mark_used(sql, rel->l, 0);
6678 /* based on child check set expression list */
6679 if (is_project(l->op) && need_distinct(l))
6680 positional_exps_mark_used(l, rel);
6681 positional_exps_mark_used(rel, rel->r);
6682 rel_exps_mark_used(sql->sa, rel, rel->r);
6683 rel_mark_used(sql, rel->r, 0);
6684 }
6685 break;
6686
6687 case op_join:
6688 case op_left:
6689 case op_right:
6690 case op_full:
6691 case op_semi:
6692 case op_anti:
6693 rel_exps_mark_used(sql->sa, rel, rel->l);
6694 rel_exps_mark_used(sql->sa, rel, rel->r);
6695 rel_mark_used(sql, rel->l, 0);
6696 rel_mark_used(sql, rel->r, 0);
6697 break;
6698 }
6699}
6700
6701static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel);
6702
6703static sql_rel *
6704rel_remove_unused(mvc *sql, sql_rel *rel)
6705{
6706 int needed = 0;
6707
6708 if (!rel)
6709 return rel;
6710
6711 switch(rel->op) {
6712 case op_basetable: {
6713 sql_table *t = rel->l;
6714
6715 if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */
6716 return rel;
6717 }
6718 /* fall through */
6719 case op_table:
6720 if (rel->exps) {
6721 node *n;
6722 list *exps;
6723
6724 for(n=rel->exps->h; n && !needed; n = n->next) {
6725 sql_exp *e = n->data;
6726
6727 if (!e->used)
6728 needed = 1;
6729 }
6730
6731 if (!needed)
6732 return rel;
6733
6734 exps = new_exp_list(sql->sa);
6735 for(n=rel->exps->h; n; n = n->next) {
6736 sql_exp *e = n->data;
6737
6738 if (e->used)
6739 append(exps, e);
6740 }
6741 /* atleast one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */
6742 if (list_length(exps) == 0)
6743 append(exps, rel->exps->h->data);
6744 rel->exps = exps;
6745 }
6746 return rel;
6747
6748 case op_topn:
6749 case op_sample:
6750
6751 if (rel->l)
6752 rel->l = rel_remove_unused(sql, rel->l);
6753 return rel;
6754
6755 case op_project:
6756 case op_groupby:
6757
6758 if (/*rel->l &&*/ rel->exps) {
6759 node *n;
6760 list *exps;
6761
6762 for(n=rel->exps->h; n && !needed; n = n->next) {
6763 sql_exp *e = n->data;
6764
6765 if (!e->used)
6766 needed = 1;
6767 }
6768 if (!needed)
6769 return rel;
6770
6771 exps = new_exp_list(sql->sa);
6772 for(n=rel->exps->h; n; n = n->next) {
6773 sql_exp *e = n->data;
6774
6775 if (e->used)
6776 append(exps, e);
6777 }
6778 /* atleast one (needed for crossproducts, count(*), rank() and single value projections) */
6779 if (list_length(exps) <= 0)
6780 append(exps, rel->exps->h->data);
6781 rel->exps = exps;
6782 }
6783 return rel;
6784
6785 case op_union:
6786 case op_inter:
6787 case op_except:
6788
6789 case op_insert:
6790 case op_update:
6791 case op_delete:
6792 case op_truncate:
6793
6794 case op_select:
6795
6796 case op_join:
6797 case op_left:
6798 case op_right:
6799 case op_full:
6800 case op_semi:
6801 case op_anti:
6802 case op_ddl:
6803 return rel;
6804 }
6805 return rel;
6806}
6807
6808static void
6809rel_dep_graph( char *deps, list *refs, sql_rel *parent, sql_rel *rel)
6810{
6811 if (!parent)
6812 return ;
6813
6814 if (rel_is_ref(rel) && parent != rel) {
6815 int n = list_length(refs);
6816 int pnr = list_position(refs, parent);
6817 int cnr = list_position(refs, rel);
6818
6819 deps[pnr*n + cnr] = 1;
6820 parent = rel;
6821 }
6822
6823 switch(rel->op) {
6824 case op_table:
6825 case op_topn:
6826 case op_sample:
6827 case op_project:
6828 case op_groupby:
6829 case op_select:
6830
6831 if (rel->l && (rel->op != op_table || rel->flag != 2))
6832 rel_dep_graph(deps, refs, parent, rel->l);
6833
6834 case op_basetable:
6835 case op_insert:
6836 case op_ddl:
6837 break;
6838
6839 case op_update:
6840 case op_delete:
6841 case op_truncate:
6842
6843 if (rel->r)
6844 rel_dep_graph(deps, refs, parent, rel->r);
6845 break;
6846
6847
6848 case op_union:
6849 case op_inter:
6850 case op_except:
6851 case op_join:
6852 case op_left:
6853 case op_right:
6854 case op_full:
6855 case op_semi:
6856 case op_anti:
6857
6858 if (rel->l)
6859 rel_dep_graph(deps, refs, parent, rel->l);
6860 if (rel->r)
6861 rel_dep_graph(deps, refs, parent, rel->r);
6862 break;
6863 }
6864}
6865
6866/*
6867extern void _rel_print(mvc *sql, sql_rel *rel);
6868
6869static void
6870print_deps(mvc *sql, char *deps, list *refs)
6871{
6872 int i, j;
6873 int n = list_length(refs);
6874
6875 for (i=0; i<n; i++) {
6876 sql_rel *r = list_fetch(refs, i);
6877 printf("dep %d\n", i);
6878 _rel_print(sql,r);
6879 }
6880 for (i=0; i<n; i++) {
6881 for (j=0; j<n; j++) {
6882 printf("%c ", i==j?'x' : deps[i*n + j]?'1':'0');
6883 }
6884 printf("\n");
6885 }
6886
6887}
6888*/
6889
6890static int
6891depends_on(int nr, char *deps, int n, int dnr)
6892{
6893 for(;dnr < n; dnr++) {
6894 if (dnr == nr)
6895 dnr++;
6896 if (deps[nr*n + dnr])
6897 return dnr;
6898 }
6899 return -1;
6900}
6901
6902static void
6903flatten_dep(list *nrefs, list *refs, int nr, char *deps, int n)
6904{
6905 int dnr = 0;
6906
6907 if (deps[nr*n + nr])
6908 return;
6909 for (;(dnr = depends_on(nr, deps, n, dnr)) >= 0 && dnr < n; dnr++)
6910 flatten_dep(nrefs, refs, dnr, deps, n);
6911 if (!deps[nr*n + nr]) {
6912 list_prepend(nrefs, list_fetch(refs,nr));
6913 deps[nr*n+nr] = 1; /* mark done */
6914 }
6915}
6916
6917static list *
6918flatten_dep_graph(mvc *sql, char *deps, list *refs)
6919{
6920 list *nrefs = sa_list(sql->sa);
6921 int n = list_length(refs), nr = 0;
6922
6923 for (nr = 0; nr < n; nr++) {
6924 if (deps[nr*n + nr])
6925 continue;
6926 flatten_dep(nrefs, refs, nr, deps, n);
6927 }
6928 return nrefs;
6929}
6930
6931static list *
6932rel_opt_dependencies(mvc *sql, list *refs)
6933{
6934 int n = list_length(refs);
6935
6936 if (n > 1) {
6937 char *deps = SA_NEW_ARRAY(sql->sa, char, n*n);
6938 node *m;
6939
6940 memset(deps, 0, n*n);
6941 for (m = refs->h; m; m = m->next) {
6942 rel_dep_graph(deps, refs, m->data, m->data);
6943 }
6944 refs = flatten_dep_graph(sql, deps, refs);
6945 //print_deps(sql, deps, refs);
6946 }
6947 return refs;
6948}
6949
6950static void
6951rel_dce_refs(mvc *sql, sql_rel *rel, list *refs)
6952{
6953 if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL)))
6954 return ;
6955
6956 switch(rel->op) {
6957 case op_table:
6958 case op_topn:
6959 case op_sample:
6960 case op_project:
6961 case op_groupby:
6962 case op_select:
6963
6964 if (rel->l && (rel->op != op_table || rel->flag != 2))
6965 rel_dce_refs(sql, rel->l, refs);
6966 break;
6967
6968 case op_basetable:
6969 case op_insert:
6970 case op_ddl:
6971 break;
6972
6973 case op_update:
6974 case op_delete:
6975 case op_truncate:
6976
6977 if (rel->r)
6978 rel_dce_refs(sql, rel->r, refs);
6979 break;
6980
6981
6982 case op_union:
6983 case op_inter:
6984 case op_except:
6985 case op_join:
6986 case op_left:
6987 case op_right:
6988 case op_full:
6989 case op_semi:
6990 case op_anti:
6991
6992 if (rel->l)
6993 rel_dce_refs(sql, rel->l, refs);
6994 if (rel->r)
6995 rel_dce_refs(sql, rel->r, refs);
6996 break;
6997 }
6998
6999 if (rel_is_ref(rel) && !list_find(refs, rel, NULL))
7000 list_prepend(refs, rel);
7001}
7002
7003static sql_rel *
7004rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj)
7005{
7006 if (!rel)
7007 return rel;
7008
7009 if (!skip_proj && rel_is_ref(rel))
7010 return rel;
7011
7012 switch(rel->op) {
7013 case op_basetable:
7014 case op_table:
7015
7016 if (skip_proj && rel->l && rel->op == op_table && rel->flag != 2)
7017 rel->l = rel_dce_down(sql, rel->l, 0);
7018 if (!skip_proj)
7019 rel_dce_sub(sql, rel);
7020 /* fall through */
7021
7022 case op_truncate:
7023 case op_ddl:
7024
7025 return rel;
7026
7027 case op_insert:
7028 rel_used(rel->r);
7029 rel_dce_sub(sql, rel->r);
7030 return rel;
7031
7032 case op_update:
7033 case op_delete:
7034
7035 if (skip_proj && rel->r)
7036 rel->r = rel_dce_down(sql, rel->r, 0);
7037 if (!skip_proj)
7038 rel_dce_sub(sql, rel);
7039 return rel;
7040
7041 case op_topn:
7042 case op_sample:
7043 case op_project:
7044 case op_groupby:
7045
7046 if (skip_proj && rel->l)
7047 rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op));
7048 if (!skip_proj)
7049 rel_dce_sub(sql, rel);
7050 return rel;
7051
7052 case op_union:
7053 case op_inter:
7054 case op_except:
7055 if (skip_proj) {
7056 if (rel->l)
7057 rel->l = rel_dce_down(sql, rel->l, 0);
7058 if (rel->r)
7059 rel->r = rel_dce_down(sql, rel->r, 0);
7060 }
7061 if (!skip_proj)
7062 rel_dce_sub(sql, rel);
7063 return rel;
7064
7065 case op_select:
7066 if (rel->l)
7067 rel->l = rel_dce_down(sql, rel->l, 0);
7068 return rel;
7069
7070 case op_join:
7071 case op_left:
7072 case op_right:
7073 case op_full:
7074 case op_semi:
7075 case op_anti:
7076 if (rel->l)
7077 rel->l = rel_dce_down(sql, rel->l, 0);
7078 if (rel->r)
7079 rel->r = rel_dce_down(sql, rel->r, 0);
7080 return rel;
7081 }
7082 return rel;
7083}
7084
7085/* DCE
7086 *
7087 * Based on top relation expressions mark sub expressions as used.
7088 * Then recurse down until the projections. Clean them up and repeat.
7089 */
7090
7091static sql_rel *
7092rel_dce_sub(mvc *sql, sql_rel *rel)
7093{
7094 if (!rel)
7095 return rel;
7096
7097 /*
7098 * Mark used up until the next project
7099 * For setops we need to first mark, then remove
7100 * because of positional dependency
7101 */
7102 rel_mark_used(sql, rel, 1);
7103 rel = rel_remove_unused(sql, rel);
7104 rel_dce_down(sql, rel, 1);
7105 return rel;
7106}
7107
7108/* add projects under set ops */
7109static sql_rel *
7110rel_add_projects(mvc *sql, sql_rel *rel)
7111{
7112 if (!rel)
7113 return rel;
7114
7115 switch(rel->op) {
7116 case op_basetable:
7117 case op_table:
7118
7119 case op_insert:
7120 case op_update:
7121 case op_delete:
7122 case op_truncate:
7123 case op_ddl:
7124
7125 return rel;
7126
7127 case op_union:
7128 case op_inter:
7129 case op_except:
7130
7131 /* We can only reduce the list of expressions of an set op
7132 * if the projection under it can also be reduced.
7133 */
7134 if (rel->l) {
7135 sql_rel *l = rel->l;
7136
7137 l->subquery = 0;
7138 if (!is_project(l->op) && !need_distinct(rel))
7139 l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1));
7140 rel->l = rel_add_projects(sql, l);
7141 }
7142 if (rel->r) {
7143 sql_rel *r = rel->r;
7144
7145 r->subquery = 0;
7146 if (!is_project(r->op) && !need_distinct(rel))
7147 r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1));
7148 rel->r = rel_add_projects(sql, r);
7149 }
7150 return rel;
7151
7152 case op_topn:
7153 case op_sample:
7154 case op_project:
7155 case op_groupby:
7156 case op_select:
7157 if (rel->l)
7158 rel->l = rel_add_projects(sql, rel->l);
7159 return rel;
7160
7161 case op_join:
7162 case op_left:
7163 case op_right:
7164 case op_full:
7165 case op_semi:
7166 case op_anti:
7167 if (rel->l)
7168 rel->l = rel_add_projects(sql, rel->l);
7169 if (rel->r)
7170 rel->r = rel_add_projects(sql, rel->r);
7171 return rel;
7172 }
7173 return rel;
7174}
7175
7176sql_rel *
7177rel_dce(mvc *sql, sql_rel *rel)
7178{
7179 list *refs = sa_list(sql->sa);
7180
7181 rel_dce_refs(sql, rel, refs);
7182 if (refs) {
7183 node *n;
7184
7185 for(n = refs->h; n; n = n->next) {
7186 sql_rel *i = n->data;
7187
7188 while (!rel_is_ref(i) && i->l && !is_base(i->op))
7189 i = i->l;
7190 if (i)
7191 rel_used(i);
7192 }
7193 }
7194 rel = rel_add_projects(sql, rel);
7195 rel_used(rel);
7196 rel_dce_sub(sql, rel);
7197 return rel;
7198}
7199
7200static int
7201index_exp(sql_exp *e, sql_idx *i)
7202{
7203 if (e->type == e_cmp && !is_complex_exp(e->flag)) {
7204 switch(i->type) {
7205 case hash_idx:
7206 case oph_idx:
7207 if (e->flag == cmp_equal)
7208 return 0;
7209 /* fall through */
7210 case join_idx:
7211 default:
7212 return -1;
7213 }
7214 }
7215 return -1;
7216}
7217
7218static sql_idx *
7219find_index(sql_allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS)
7220{
7221 node *n;
7222
7223 /* any (partial) match of the expressions with the index columns */
7224 /* Depending on the index type we may need full matches and only
7225 limited number of cmp types (hash only equality etc) */
7226 /* Depending on the index type we should (in the rel_bin) generate
7227 more code, ie for spatial index add post filter etc, for hash
7228 compute hash value and use index */
7229
7230 if (sub->exps && rel->exps)
7231 for(n = sub->exps->h; n; n = n->next) {
7232 prop *p;
7233 sql_exp *e = n->data;
7234
7235 if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) {
7236 list *exps, *cols;
7237 sql_idx *i = p->value;
7238 fcmp cmp = (fcmp)&sql_column_kc_cmp;
7239
7240 /* join indices are only interesting for joins */
7241 if (i->type == join_idx || list_length(i->columns) <= 1)
7242 continue;
7243 /* based on the index type, find qualifying exps */
7244 exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL);
7245 if (!exps || !list_length(exps))
7246 continue;
7247 /* now we obtain the columns, move into sql_column_kc_cmp! */
7248 cols = list_map(exps, sub, (fmap) &sjexp_col);
7249
7250 /* TODO check that at most 2 relations are involved */
7251
7252 /* Match the index columns with the expression columns.
7253 TODO, Allow partial matches ! */
7254 if (list_match(cols, i->columns, cmp) == 0) {
7255 /* re-order exps in index order */
7256 node *n, *m;
7257 list *es = sa_list(sa);
7258
7259 for(n = i->columns->h; n; n = n->next) {
7260 int i = 0;
7261 for(m = cols->h; m; m = m->next, i++) {
7262 if (cmp(m->data, n->data) == 0){
7263 sql_exp *e = list_fetch(exps, i);
7264 list_append(es, e);
7265 break;
7266 }
7267 }
7268 }
7269 /* fix the destroy function */
7270 cols->destroy = NULL;
7271 *EXPS = es;
7272 e->used = 1;
7273 return i;
7274 }
7275 cols->destroy = NULL;
7276 }
7277 }
7278 return NULL;
7279}
7280
7281static sql_rel *
7282rel_use_index(int *changes, mvc *sql, sql_rel *rel)
7283{
7284 (void)changes;
7285 if (rel->l && (is_select(rel->op) || is_join(rel->op))) {
7286 list *exps = NULL;
7287 sql_idx *i = find_index(sql->sa, rel, rel->l, &exps);
7288 int left = 1;
7289
7290 if (!i && is_join(rel->op))
7291 i = find_index(sql->sa, rel, rel->l, &exps);
7292 if (!i && is_join(rel->op)) {
7293 left = 0;
7294 i = find_index(sql->sa, rel, rel->r, &exps);
7295 }
7296
7297 if (i) {
7298 prop *p;
7299 node *n;
7300 int single_table = 1;
7301 sql_exp *re = NULL;
7302
7303 for( n = exps->h; n && single_table; n = n->next) {
7304 sql_exp *e = n->data;
7305 sql_exp *nre = e->r;
7306
7307 if (is_join(rel->op) &&
7308 ((left && !rel_find_exp(rel->l, e->l)) ||
7309 (!left && !rel_find_exp(rel->r, e->l))))
7310 nre = e->l;
7311 single_table = (!re || (exp_relname(nre) && exp_relname(re) && strcmp(exp_relname(nre), exp_relname(re)) == 0));
7312 re = nre;
7313 }
7314 if (single_table) { /* add PROP_HASHCOL to all column exps */
7315 for( n = exps->h; n; n = n->next) {
7316 sql_exp *e = n->data;
7317 int anti = is_anti(e);
7318
7319 /* swapped ? */
7320 if (is_join(rel->op) &&
7321 ((left && !rel_find_exp(rel->l, e->l)) ||
7322 (!left && !rel_find_exp(rel->r, e->l))))
7323 n->data = e = exp_compare(sql->sa, e->r, e->l, cmp_equal);
7324 if (anti) set_anti(e);
7325 p = find_prop(e->p, PROP_HASHCOL);
7326 if (!p)
7327 e->p = p = prop_create(sql->sa, PROP_HASHCOL, e->p);
7328 p->value = i;
7329 }
7330 }
7331 /* add the remaining exps to the new exp list */
7332 if (list_length(rel->exps) > list_length(exps)) {
7333 for( n = rel->exps->h; n; n = n->next) {
7334 sql_exp *e = n->data;
7335 if (!list_find(exps, e, (fcmp)&exp_cmp))
7336 list_append(exps, e);
7337 }
7338 }
7339 rel->exps = exps;
7340 }
7341 }
7342 return rel;
7343}
7344
7345static int
7346score_se( mvc *sql, sql_rel *rel, sql_exp *e)
7347{
7348 int score = 0;
7349 if (e->type == e_cmp && !is_complex_exp(e->flag)) {
7350 score += score_gbe(sql, rel, e->l);
7351 }
7352 score += exp_keyvalue(e);
7353 return score;
7354}
7355
7356static sql_rel *
7357rel_select_order(int *changes, mvc *sql, sql_rel *rel)
7358{
7359 (void)changes;
7360 if (is_select(rel->op) && rel->exps && list_length(rel->exps)>1) {
7361 int i, *scores = calloc(list_length(rel->exps), sizeof(int));
7362 node *n;
7363
7364 for (i = 0, n = rel->exps->h; n; i++, n = n->next)
7365 scores[i] = score_se(sql, rel, n->data);
7366 rel->exps = list_keysort(rel->exps, scores, (fdup)NULL);
7367 free(scores);
7368 }
7369 return rel;
7370}
7371
7372static sql_rel *
7373rel_simplify_like_select(int *changes, mvc *sql, sql_rel *rel)
7374{
7375 if (is_select(rel->op) && rel->exps) {
7376 node *n;
7377 list *exps;
7378 int needed = 0;
7379
7380 for (n = rel->exps->h; n && !needed; n = n->next) {
7381 sql_exp *e = n->data;
7382 list *l = e->l;
7383 list *r = e->r;
7384
7385 if (e->type == e_cmp && get_cmp(e) == cmp_filter && strcmp(((sql_subfunc*)e->f)->func->base.name, "like") == 0 && list_length(l) == 1 && list_length(r) <= 2 && !is_anti(e))
7386 needed = 1;
7387 }
7388
7389 if (!needed)
7390 return rel;
7391
7392 exps = sa_list(sql->sa);
7393 if (exps == NULL)
7394 return NULL;
7395 for (n = rel->exps->h; n; n = n->next) {
7396 sql_exp *e = n->data;
7397 list *l = e->l;
7398 list *r = e->r;
7399
7400 if (e->type == e_cmp && get_cmp(e) == cmp_filter && strcmp(((sql_subfunc*)e->f)->func->base.name, "like") == 0 && list_length(l) == 1 && list_length(r) <= 2 && !is_anti(e)) {
7401 list *r = e->r;
7402 sql_exp *fmt = r->h->data;
7403 sql_exp *esc = (r->h->next)?r->h->next->data:NULL;
7404 int rewrite = 0;
7405
7406 if (fmt->type == e_convert)
7407 fmt = fmt->l;
7408 /* check for simple like expression */
7409 if (is_atom(fmt->type)) {
7410 atom *fa = NULL;
7411
7412 if (fmt->l) {
7413 fa = fmt->l;
7414 /* simple numbered argument */
7415 } else if (!fmt->r && !fmt->f) {
7416 fa = sql->args[fmt->flag];
7417
7418 }
7419 if (fa && fa->data.vtype == TYPE_str &&
7420 !strchr(fa->data.val.sval, '%') &&
7421 !strchr(fa->data.val.sval, '_'))
7422 rewrite = 1;
7423 }
7424 if (rewrite && esc && is_atom(esc->type)) {
7425 atom *ea = NULL;
7426
7427 if (esc->l) {
7428 ea = esc->l;
7429 /* simple numbered argument */
7430 } else if (!esc->r && !esc->f) {
7431 ea = sql->args[esc->flag];
7432
7433 }
7434 if (ea && (ea->data.vtype != TYPE_str ||
7435 strlen(ea->data.val.sval) != 0))
7436 rewrite = 0;
7437 }
7438 if (rewrite) { /* rewrite to cmp_equal ! */
7439 list *l = e->l;
7440 list *r = e->r;
7441 sql_exp *ne = exp_compare(sql->sa, l->h->data, r->h->data, cmp_equal);
7442
7443 if (is_anti(e)) set_anti(ne);
7444 /* if rewritten don't cache this query */
7445 list_append(exps, ne);
7446 sql->caching = 0;
7447 (*changes)++;
7448 } else {
7449 list_append(exps, e);
7450 }
7451 } else {
7452 list_append(exps, e);
7453 }
7454 }
7455 rel->exps = exps;
7456 }
7457 return rel;
7458}
7459
7460static sql_rel *
7461rel_simplify_predicates(int *changes, mvc *sql, sql_rel *rel)
7462{
7463 if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && rel->card > CARD_ATOM) {
7464 node *n;
7465 list *exps = sa_list(sql->sa);
7466
7467 for (n = rel->exps->h; n; n = n->next) {
7468 sql_exp *e = n->data;
7469
7470 if (is_atom(e->type) && e->l) { /* direct literal */
7471 atom *a = e->l;
7472 int flag = a->data.val.bval;
7473
7474 /* remove simple select true expressions */
7475 if (flag)
7476 continue;
7477 }
7478 if (is_atom(e->type) && !e->l && !e->r) { /* numbered variable */
7479 atom *a = sql->args[e->flag];
7480 int flag = a->data.val.bval;
7481
7482 /* remove simple select true expressions */
7483 if (flag) {
7484 sql->caching = 0;
7485 continue;
7486 }
7487 }
7488 if (e->type == e_cmp && get_cmp(e) == cmp_equal) {
7489 sql_exp *l = e->l;
7490 sql_exp *r = e->r;
7491
7492 if (l->type == e_func) {
7493 sql_subfunc *f = l->f;
7494
7495 /* rewrite isnull(x) = TRUE/FALSE => x =/<> NULL */
7496 if (is_select(rel->op) && !f->func->s && !strcmp(f->func->base.name, "isnull") &&
7497 is_atom(r->type) && r->l) { /* direct literal */
7498 atom *a = r->l;
7499 int flag = a->data.val.bval;
7500 list *args = l->l;
7501
7502 assert(list_length(args) == 1);
7503 l = args->h->data;
7504 if (exp_subtype(l)) {
7505 r = exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(l), NULL));
7506 e = exp_compare2(sql->sa, l, r, r, 3);
7507 if (e && !flag)
7508 set_anti(e);
7509 }
7510 } else if (!f->func->s && !strcmp(f->func->base.name, "not")) {
7511 if (is_atom(r->type) && r->l) { /* direct literal */
7512 atom *a = r->l;
7513 list *args = l->l;
7514 sql_exp *inner = args->h->data;
7515 sql_subfunc *inf = inner->f;
7516
7517 assert(list_length(args) == 1);
7518
7519 /* not(not(x)) = TRUE/FALSE => x = TRUE/FALSE */
7520 if (inner->type == e_func &&
7521 !inf->func->s &&
7522 !strcmp(inf->func->base.name, "not")) {
7523 int anti = is_anti(e);
7524
7525 args = inner->l;
7526 assert(list_length(args) == 1);
7527 l = args->h->data;
7528 e = exp_compare(sql->sa, l, r, e->flag);
7529 if (anti) set_anti(e);
7530 /* rewrite not(=/<>(a,b)) = TRUE/FALSE => a=b of a<>b */
7531 } else if (inner->type == e_func &&
7532 !inf->func->s &&
7533 (!strcmp(inf->func->base.name, "=") ||
7534 !strcmp(inf->func->base.name, "<>"))) {
7535 int flag = a->data.val.bval;
7536 args = inner->l;
7537
7538 if (!strcmp(inf->func->base.name, "<>"))
7539 flag = !flag;
7540 assert(list_length(args) == 2);
7541 l = args->h->data;
7542 r = args->h->next->data;
7543 e = exp_compare(sql->sa, l, r, (!flag)?cmp_equal:cmp_notequal);
7544 } else if (a && a->data.vtype == TYPE_bit) {
7545 int anti = is_anti(e);
7546
7547 /* change atom's value on right */
7548 l = args->h->data;
7549 a->data.val.bval = !a->data.val.bval;
7550 e = exp_compare(sql->sa, l, r, e->flag);
7551 if (anti) set_anti(e);
7552 (*changes)++;
7553 }
7554 }
7555 }
7556 }
7557 list_append(exps, e);
7558 } else {
7559 list_append(exps, e);
7560 }
7561 }
7562 rel->exps = exps;
7563 }
7564 return rel;
7565}
7566
7567static void split_exps(mvc *sql, list *exps, sql_rel *rel);
7568
7569static int
7570exp_match_exp_cmp( sql_exp *e1, sql_exp *e2)
7571{
7572 if (exp_match_exp(e1,e2))
7573 return 0;
7574 return -1;
7575}
7576
7577static int
7578exp_refers_cmp( sql_exp *e1, sql_exp *e2)
7579{
7580 if (exp_refers(e1,e2))
7581 return 0;
7582 return -1;
7583}
7584
7585static sql_exp *
7586add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel)
7587{
7588 node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp);
7589
7590 /* if not matching we may refer to an older expression */
7591 if (!n)
7592 n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp);
7593 if (!n) {
7594 exp_label(sql->sa, e, ++sql->label);
7595 append(rel->exps, e);
7596 } else {
7597 e = n->data;
7598 }
7599 e = exp_ref(sql->sa, e);
7600 return e;
7601}
7602
7603static void
7604add_exps_too_project(mvc *sql, list *exps, sql_rel *rel)
7605{
7606 node *n;
7607
7608 if (!exps)
7609 return;
7610 for(n=exps->h; n; n = n->next) {
7611 sql_exp *e = n->data;
7612
7613 if (e->type != e_column && !exp_is_atom(e))
7614 n->data = add_exp_too_project(sql, e, rel);
7615 }
7616}
7617
7618static sql_exp *
7619split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
7620{
7621 if (exp_is_atom(e))
7622 return e;
7623 switch(e->type) {
7624 case e_column:
7625 return e;
7626 case e_convert:
7627 e->l = split_exp(sql, e->l, rel);
7628 return e;
7629 case e_aggr:
7630 case e_func:
7631 if (!is_analytic(e) && !exp_has_sideeffect(e)) {
7632 sql_subfunc *f = e->f;
7633 if (e->type == e_func && !f->func->s && !strcmp(f->func->base.name, "ifthenelse")) {
7634 return e;
7635 } else {
7636 split_exps(sql, e->l, rel);
7637 add_exps_too_project(sql, e->l, rel);
7638 }
7639 }
7640 return e;
7641 case e_cmp:
7642 if (get_cmp(e) == cmp_or) {
7643 split_exps(sql, e->l, rel);
7644 split_exps(sql, e->r, rel);
7645 } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) {
7646 e->l = split_exp(sql, e->l, rel);
7647 split_exps(sql, e->r, rel);
7648 } else {
7649 e->l = split_exp(sql, e->l, rel);
7650 e->r = split_exp(sql, e->r, rel);
7651 if (e->f) {
7652 e->f = split_exp(sql, e->f, rel);
7653 }
7654 }
7655 return e;
7656 case e_psm:
7657 case e_atom:
7658 return e;
7659 }
7660 return e;
7661}
7662
7663static void
7664split_exps(mvc *sql, list *exps, sql_rel *rel)
7665{
7666 node *n;
7667
7668 if (!exps)
7669 return;
7670 for(n=exps->h; n; n = n->next){
7671 sql_exp *e = n->data;
7672
7673 e = split_exp(sql, e, rel);
7674 n->data = e;
7675 }
7676}
7677
7678static sql_rel *
7679rel_split_project(int *changes, mvc *sql, sql_rel *rel, int top)
7680{
7681 if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel)) {
7682 list *exps = rel->exps;
7683 node *n;
7684 int funcs = 0;
7685 sql_rel *nrel;
7686
7687 /* are there functions */
7688 for (n=exps->h; n && !funcs; n = n->next) {
7689 sql_exp *e = n->data;
7690
7691 funcs = exp_has_func(e);
7692 }
7693 /* introduce extra project */
7694 if (funcs && rel->op != op_project) {
7695 nrel = rel_project(sql->sa, rel->l,
7696 rel_projections(sql, rel->l, NULL, 1, 1));
7697 rel->l = nrel;
7698 /* recursively split all functions and add those to the projection list */
7699 split_exps(sql, rel->exps, nrel);
7700 if (nrel->l)
7701 nrel->l = rel_split_project(changes, sql, nrel->l, is_topn(rel->op)?top:0);
7702 return rel;
7703 } else if (funcs && !top && !rel->r) {
7704 /* projects can have columns point back into the expression list, ie
7705 * create a new list including the split expressions */
7706 node *n;
7707 list *exps = rel->exps;
7708
7709 rel->exps = sa_list(sql->sa);
7710 for (n=exps->h; n; n = n->next)
7711 append(rel->exps, split_exp(sql, n->data, rel));
7712 } else if (funcs && top && rel_is_ref(rel) && !rel->r) {
7713 /* inplace */
7714 list *exps = rel_projections(sql, rel, NULL, 1, 1);
7715 sql_rel *l = rel_project(sql->sa, rel->l, NULL);
7716 rel->l = l;
7717 l->exps = rel->exps;
7718 rel->exps = exps;
7719 }
7720 }
7721 if (is_set(rel->op) || is_basetable(rel->op))
7722 return rel;
7723 if (rel->l)
7724 rel->l = rel_split_project(changes, sql, rel->l,
7725 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
7726 if ((is_join(rel->op) || is_semi(rel->op)) && rel->r)
7727 rel->r = rel_split_project(changes, sql, rel->r,
7728 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
7729 return rel;
7730}
7731
7732static void select_split_exps(mvc *sql, list *exps, sql_rel *rel);
7733
7734static sql_exp *
7735select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel)
7736{
7737 switch(e->type) {
7738 case e_column:
7739 return e;
7740 case e_convert:
7741 e->l = select_split_exp(sql, e->l, rel);
7742 return e;
7743 case e_aggr:
7744 case e_func:
7745 if (!is_analytic(e) && !exp_has_sideeffect(e)) {
7746 sql_subfunc *f = e->f;
7747 if (e->type == e_func && !f->func->s && !strcmp(f->func->base.name, "ifthenelse"))
7748 return add_exp_too_project(sql, e, rel);
7749 }
7750 return e;
7751 case e_cmp:
7752 if (get_cmp(e) == cmp_or) {
7753 select_split_exps(sql, e->l, rel);
7754 select_split_exps(sql, e->r, rel);
7755 } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) {
7756 e->l = select_split_exp(sql, e->l, rel);
7757 select_split_exps(sql, e->r, rel);
7758 } else {
7759 e->l = select_split_exp(sql, e->l, rel);
7760 e->r = select_split_exp(sql, e->r, rel);
7761 if (e->f) {
7762 e->f = select_split_exp(sql, e->f, rel);
7763 }
7764 }
7765 return e;
7766 case e_psm:
7767 case e_atom:
7768 return e;
7769 }
7770 return e;
7771}
7772
7773static void
7774select_split_exps(mvc *sql, list *exps, sql_rel *rel)
7775{
7776 node *n;
7777
7778 if (!exps)
7779 return;
7780 for(n=exps->h; n; n = n->next){
7781 sql_exp *e = n->data;
7782
7783 e = select_split_exp(sql, e, rel);
7784 n->data = e;
7785 }
7786}
7787
7788static sql_rel *
7789rel_split_select(int *changes, mvc *sql, sql_rel *rel, int top)
7790{
7791 if (is_select(rel->op) && list_length(rel->exps) && rel->l) {
7792 list *exps = rel->exps;
7793 node *n;
7794 int funcs = 0;
7795 sql_rel *nrel;
7796
7797 /* are there functions */
7798 for (n=exps->h; n && !funcs; n = n->next) {
7799 sql_exp *e = n->data;
7800
7801 funcs = exp_has_func(e);
7802 }
7803 /* introduce extra project */
7804 if (funcs && rel->op != op_project) {
7805 nrel = rel_project(sql->sa, rel->l,
7806 rel_projections(sql, rel->l, NULL, 1, 1));
7807 rel->l = nrel;
7808 /* recursively split all functions and add those to the projection list */
7809 select_split_exps(sql, rel->exps, nrel);
7810 if (nrel->l)
7811 nrel->l = rel_split_project(changes, sql, nrel->l, is_topn(rel->op)?top:0);
7812 return rel;
7813 } else if (funcs && !top && !rel->r) {
7814 /* projects can have columns point back into the expression list, ie
7815 * create a new list including the split expressions */
7816 node *n;
7817 list *exps = rel->exps;
7818
7819 rel->exps = sa_list(sql->sa);
7820 for (n=exps->h; n; n = n->next)
7821 append(rel->exps, select_split_exp(sql, n->data, rel));
7822 } else if (funcs && top && rel_is_ref(rel) && !rel->r) {
7823 /* inplace */
7824 list *exps = rel_projections(sql, rel, NULL, 1, 1);
7825 sql_rel *l = rel_project(sql->sa, rel->l, NULL);
7826 rel->l = l;
7827 l->exps = rel->exps;
7828 rel->exps = exps;
7829 }
7830 }
7831 if (is_set(rel->op) || is_basetable(rel->op))
7832 return rel;
7833 if (rel->l)
7834 rel->l = rel_split_select(changes, sql, rel->l,
7835 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
7836 if ((is_join(rel->op) || is_semi(rel->op)) && rel->r)
7837 rel->r = rel_split_select(changes, sql, rel->r,
7838 (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0);
7839 return rel;
7840}
7841
7842static list *
7843exp_merge_range(sql_allocator *sa, list *exps)
7844{
7845 node *n, *m;
7846 for (n=exps->h; n; n = n->next) {
7847 sql_exp *e = n->data;
7848 sql_exp *le = e->l;
7849 sql_exp *re = e->r;
7850
7851 /* handle the and's in the or lists */
7852 if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
7853 e->l = exp_merge_range(sa, e->l);
7854 e->r = exp_merge_range(sa, e->r);
7855 /* only look for gt, gte, lte, lt */
7856 } else if (n->next &&
7857 e->type == e_cmp && e->flag < cmp_equal && !e->f &&
7858 re->card == CARD_ATOM && !is_anti(e)) {
7859 for (m=n->next; m; m = m->next) {
7860 sql_exp *f = m->data;
7861 sql_exp *lf = f->l;
7862 sql_exp *rf = f->r;
7863
7864 if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
7865 rf->card == CARD_ATOM && !is_anti(f) &&
7866 exp_match_exp(le, lf)) {
7867 sql_exp *ne;
7868 int swap = 0, lt = 0, gt = 0;
7869 /* for now only c1 <[=] x <[=] c2 */
7870
7871 swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte);
7872 gt = !lt;
7873
7874 if (gt &&
7875 (f->flag == cmp_gt ||
7876 f->flag == cmp_gte))
7877 continue;
7878 if (lt &&
7879 (f->flag == cmp_lt ||
7880 f->flag == cmp_lte))
7881 continue;
7882 if (!swap)
7883 ne = exp_compare2(sa, le, re, rf, compare2range(e->flag, f->flag));
7884 else
7885 ne = exp_compare2(sa, le, rf, re, compare2range(f->flag, e->flag));
7886
7887 list_remove_data(exps, e);
7888 list_remove_data(exps, f);
7889 list_append(exps, ne);
7890 return exp_merge_range(sa, exps);
7891 }
7892 }
7893 } else if (n->next &&
7894 e->type == e_cmp && e->flag < cmp_equal && !e->f &&
7895 re->card > CARD_ATOM && !is_anti(e)) {
7896 for (m=n->next; m; m = m->next) {
7897 sql_exp *f = m->data;
7898 sql_exp *lf = f->l;
7899 sql_exp *rf = f->r;
7900
7901 if (f->type == e_cmp && f->flag < cmp_equal && !f->f &&
7902 rf->card > CARD_ATOM && !is_anti(f)) {
7903 sql_exp *ne, *t;
7904 int swap = 0, lt = 0, gt = 0;
7905 comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag;
7906
7907 /* both swapped ? */
7908 if (exp_match_exp(re, rf)) {
7909 t = re;
7910 re = le;
7911 le = t;
7912 ef = swap_compare(ef);
7913 t = rf;
7914 rf = lf;
7915 lf = t;
7916 ff = swap_compare(ff);
7917 }
7918
7919 /* is left swapped ? */
7920 if (exp_match_exp(re, lf)) {
7921 t = re;
7922 re = le;
7923 le = t;
7924 ef = swap_compare(ef);
7925 }
7926
7927 /* is right swapped ? */
7928 if (exp_match_exp(le, rf)) {
7929 t = rf;
7930 rf = lf;
7931 lf = t;
7932 ff = swap_compare(ff);
7933 }
7934
7935 if (!exp_match_exp(le, lf))
7936 continue;
7937
7938 /* for now only c1 <[=] x <[=] c2 */
7939 swap = lt = (ef == cmp_lt || ef == cmp_lte);
7940 gt = !lt;
7941
7942 if (gt && (ff == cmp_gt || ff == cmp_gte))
7943 continue;
7944 if (lt && (ff == cmp_lt || ff == cmp_lte))
7945 continue;
7946 if (!swap)
7947 ne = exp_compare2(sa, le, re, rf, compare2range(ef, ff));
7948 else
7949 ne = exp_compare2(sa, le, rf, re, compare2range(ff, ef));
7950
7951 list_remove_data(exps, e);
7952 list_remove_data(exps, f);
7953 list_append(exps, ne);
7954 return exp_merge_range(sa, exps);
7955 }
7956 }
7957 }
7958 }
7959 return exps;
7960}
7961
7962static sql_rel *
7963rel_find_range(int *changes, mvc *sql, sql_rel *rel)
7964{
7965 (void)changes;
7966 if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op)) && rel->exps && !list_empty(rel->exps))
7967 rel->exps = exp_merge_range(sql->sa, rel->exps);
7968 return rel;
7969}
7970
7971/*
7972 * Casting decimal values on both sides of a compare expression is expensive,
7973 * both in preformance (cpu cost) and memory requirements (need for large
7974 * types).
7975 */
7976
7977static int
7978reduce_scale(atom *a)
7979{
7980#ifdef HAVE_HGE
7981 if (a->data.vtype == TYPE_hge) {
7982 hge v = a->data.val.hval;
7983 int i = 0;
7984
7985 if (v != 0)
7986 while( (v/10)*10 == v ) {
7987 i++;
7988 v /= 10;
7989 }
7990 a->data.val.hval = v;
7991 return i;
7992 }
7993#endif
7994 if (a->data.vtype == TYPE_lng) {
7995 lng v = a->data.val.lval;
7996 int i = 0;
7997
7998 if (v != 0)
7999 while( (v/10)*10 == v ) {
8000 i++;
8001 v /= 10;
8002 }
8003 a->data.val.lval = v;
8004 return i;
8005 }
8006 if (a->data.vtype == TYPE_int) {
8007 int v = a->data.val.ival;
8008 int i = 0;
8009
8010 if (v != 0)
8011 while( (v/10)*10 == v ) {
8012 i++;
8013 v /= 10;
8014 }
8015 a->data.val.ival = v;
8016 return i;
8017 }
8018 if (a->data.vtype == TYPE_sht) {
8019 sht v = a->data.val.shval;
8020 int i = 0;
8021
8022 if (v != 0)
8023 while( (v/10)*10 == v ) {
8024 i++;
8025 v /= 10;
8026 }
8027 a->data.val.shval = v;
8028 return i;
8029 }
8030 return 0;
8031}
8032
8033static sql_rel *
8034rel_project_reduce_casts(int *changes, mvc *sql, sql_rel *rel)
8035{
8036 if (is_project(rel->op) && list_length(rel->exps)) {
8037 list *exps = rel->exps;
8038 node *n;
8039
8040 for (n=exps->h; n; n = n->next) {
8041 sql_exp *e = n->data;
8042
8043 if (e && e->type == e_func) {
8044 sql_subfunc *f = e->f;
8045 sql_subtype *res = f->res->h->data;
8046
8047 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul") && res->scale > 0) {
8048 list *args = e->l;
8049 sql_exp *h = args->h->data;
8050 sql_exp *t = args->t->data;
8051 atom *a;
8052
8053 if ((is_atom(h->type) && (a = exp_value(sql, h, sql->args, sql->argc)) != NULL) ||
8054 (is_atom(t->type) && (a = exp_value(sql, t, sql->args, sql->argc)) != NULL)) {
8055 int rs = reduce_scale(a);
8056
8057 res->scale -= rs;
8058 if (rs)
8059 (*changes)+= rs;
8060 }
8061 }
8062 }
8063 }
8064 }
8065 return rel;
8066}
8067
8068static sql_rel *
8069rel_reduce_casts(int *changes, mvc *sql, sql_rel *rel)
8070{
8071 (void)sql;
8072 (void)changes;
8073 if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op)) &&
8074 rel->exps && list_length(rel->exps)) {
8075 list *exps = rel->exps;
8076 node *n;
8077
8078 for (n=exps->h; n; n = n->next) {
8079 sql_exp *e = n->data;
8080 sql_exp *le = e->l;
8081 sql_exp *re = e->r;
8082 int anti = is_anti(e);
8083
8084 /* handle the and's in the or lists */
8085 if (e->type != e_cmp || !is_theta_exp(e->flag) || e->f)
8086 continue;
8087 /* rewrite e if left or right is a cast */
8088 if (le->type == e_convert || re->type == e_convert) {
8089 sql_rel *r = rel->r;
8090 sql_subtype *st = exp_subtype(re);
8091
8092 /* e_convert(le) ==, <(=), >(=), != e_atom(re), conversion between integers only */
8093 if (le->type == e_convert && is_simple_atom(re) && st->type->eclass == EC_NUM) {
8094 sql_subtype *tt = exp_totype(le);
8095 sql_subtype *ft = exp_fromtype(le);
8096
8097 if (tt->type->eclass != EC_NUM || ft->type->eclass != EC_NUM || tt->type->localtype < ft->type->localtype)
8098 continue;
8099
8100 /* tt->type larger then tt->type, ie empty result, ie change into > max */
8101 re = exp_atom_max( sql->sa, ft);
8102 if (!re)
8103 continue;
8104 /* the ==, > and >= change to l > max, the !=, < and <= change to l < max */
8105 if (e->flag == cmp_equal || e->flag == cmp_gt || e->flag == cmp_gte)
8106 e = exp_compare(sql->sa, le->l, re, cmp_gt);
8107 else
8108 e = exp_compare(sql->sa, le->l, re, cmp_lt);
8109 sql->caching = 0;
8110 } else
8111 /* if convert on left then find
8112 * mul or div on right which increased
8113 * scale!
8114 */
8115 if (le->type == e_convert && re->type == e_column && (e->flag == cmp_lt || e->flag == cmp_gt) && r && is_project(r->op)) {
8116 sql_exp *nre = rel_find_exp(r, re);
8117 sql_subtype *tt = exp_totype(le);
8118 sql_subtype *ft = exp_fromtype(le);
8119
8120 if (nre && nre->type == e_func) {
8121 sql_subfunc *f = nre->f;
8122
8123 if (!f->func->s && !strcmp(f->func->base.name, "sql_mul")) {
8124 list *args = nre->l;
8125 sql_exp *ce = args->t->data;
8126 sql_subtype *fst = exp_subtype(args->h->data);
8127 atom *a;
8128
8129 if (fst->scale == ft->scale &&
8130 (a = exp_value(sql, ce, sql->args, sql->argc)) != NULL) {
8131#ifdef HAVE_HGE
8132 hge v = 1;
8133#else
8134 lng v = 1;
8135#endif
8136 /* multiply with smallest value, then scale and (round) */
8137 int scale = tt->scale - ft->scale;
8138 int rs = reduce_scale(a);
8139
8140 scale -= rs;
8141
8142 args = new_exp_list(sql->sa);
8143 while(scale > 0) {
8144 scale--;
8145 v *= 10;
8146 }
8147 append(args, re);
8148#ifdef HAVE_HGE
8149 append(args, have_hge ? exp_atom_hge(sql->sa, v) : exp_atom_lng(sql->sa, (lng) v));
8150#else
8151 append(args, exp_atom_lng(sql->sa, v));
8152#endif
8153 f = find_func(sql, "scale_down", args);
8154 nre = exp_op(sql->sa, args, f);
8155 e = exp_compare(sql->sa, le->l, nre, e->flag);
8156 }
8157 }
8158 }
8159 }
8160 }
8161 if (anti) set_anti(e);
8162 n->data = e;
8163 }
8164 }
8165 return rel;
8166}
8167
8168static int
8169is_identity_of(sql_exp *e, sql_rel *l)
8170{
8171 if (e->type != e_cmp)
8172 return 0;
8173 if (!is_identity(e->l, l) || !is_identity(e->r, l))
8174 return 0;
8175 return 1;
8176}
8177
8178
8179static sql_rel *
8180rel_rewrite_semijoin(int *changes, mvc *sql, sql_rel *rel)
8181{
8182 (void)sql;
8183 if (is_semi(rel->op)) {
8184 sql_rel *l = rel->l;
8185 sql_rel *r = rel->r;
8186 sql_rel *rl = (r->l)?r->l:NULL;
8187 int on_identity = 1;
8188
8189 if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l))
8190 on_identity = 0;
8191
8192 /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B)
8193 * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B)
8194 * Where the semi/anti join is done using the identity */
8195 if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) ||
8196 (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){
8197 sql_rel *or = r;
8198
8199 if (is_project(r->op))
8200 r = rl;
8201
8202 if (l == r->r)
8203 rel->r = rel_dup(r->l);
8204 else
8205 rel->r = rel_dup(r->r);
8206
8207 rel->exps = r->exps;
8208 r->exps = NULL;
8209 rel_destroy(or);
8210 (*changes)++;
8211 }
8212 }
8213 if (is_semi(rel->op)) {
8214 sql_rel *l = rel->l, *rl = NULL;
8215 sql_rel *r = rel->r, *or = r;
8216
8217 if (r)
8218 rl = r->l;
8219 if (r && is_project(r->op)) {
8220 r = rl;
8221 if (r)
8222 rl = r->l;
8223 }
8224
8225 /* More general case is (join reduction)
8226 {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ]
8227 into {semi,anti}join (A,B) [ A.c1 == B.c1 ]
8228
8229 for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewriten
8230 */
8231 if (l && r && rl &&
8232 is_basetable(l->op) && is_basetable(rl->op) &&
8233 is_join(r->op) && l->l == rl->l)
8234 {
8235 node *n, *m;
8236 list *exps;
8237
8238 if (!rel->exps || !r->exps ||
8239 list_length(rel->exps) != list_length(r->exps))
8240 return rel;
8241 exps = new_exp_list(sql->sa);
8242
8243 /* are the join conditions equal */
8244 for (n = rel->exps->h, m = r->exps->h;
8245 n && m; n = n->next, m = m->next)
8246 {
8247 sql_exp *le = NULL, *oe = n->data;
8248 sql_exp *re = NULL, *ne = m->data;
8249 sql_column *cl;
8250 int equal = 0;
8251
8252 if (oe->type != e_cmp || ne->type != e_cmp ||
8253 oe->flag != cmp_equal ||
8254 ne->flag != cmp_equal || is_anti(oe) || is_anti(ne))
8255 return rel;
8256
8257 if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) {
8258 le = oe->l;
8259 re = oe->r;
8260 } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) {
8261 le = oe->r;
8262 re = oe->l;
8263 } else
8264 return rel;
8265
8266 if (exp_find_column(rl, ne->l, -2) == cl) {
8267 sql_exp *e = (or != r)?rel_find_exp(or, re):re;
8268
8269 equal = exp_match_exp(ne->r, e);
8270 if (!equal)
8271 return rel;
8272 re = ne->r;
8273 } else if (exp_find_column(rl, ne->r, -2) == cl) {
8274 sql_exp *e = (or != r)?rel_find_exp(or, re):re;
8275
8276 equal = exp_match_exp(ne->l, e);
8277 if (!equal)
8278 return rel;
8279 re = ne->l;
8280 } else
8281 return rel;
8282
8283 ne = exp_compare(sql->sa, le, re, cmp_equal);
8284 append(exps, ne);
8285 }
8286
8287 rel->r = rel_dup(r->r);
8288 rel->exps = exps;
8289 rel_destroy(or);
8290 (*changes)++;
8291 }
8292 }
8293 return rel;
8294}
8295
8296/* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
8297static sql_rel *
8298rel_rewrite_antijoin(int *changes, mvc *sql, sql_rel *rel)
8299{
8300 if (rel->op == op_anti) {
8301 sql_rel *l = rel->l;
8302 sql_rel *r = rel->r;
8303
8304 if (l && !rel_is_ref(l) &&
8305 r && !rel_is_ref(r) && is_union(r->op)) {
8306 sql_rel *rl = rel_dup(r->l), *nl;
8307 sql_rel *rr = rel_dup(r->r);
8308
8309 if (!is_project(rl->op))
8310 rl = rel_project(sql->sa, rl,
8311 rel_projections(sql, rl, NULL, 1, 1));
8312 if (!is_project(rr->op))
8313 rr = rel_project(sql->sa, rr,
8314 rel_projections(sql, rr, NULL, 1, 1));
8315 rel_rename_exps(sql, r->exps, rl->exps);
8316 rel_rename_exps(sql, r->exps, rr->exps);
8317
8318 nl = rel_crossproduct(sql->sa, rel->l, rl, op_anti);
8319 if (need_no_nil(rel))
8320 set_no_nil(nl);
8321 nl->exps = exps_copy(sql, rel->exps);
8322 rel->l = nl;
8323 rel->r = rr;
8324 rel_destroy(r);
8325 (*changes)++;
8326 return rel;
8327 }
8328 }
8329 return rel;
8330}
8331
8332static sql_rel *
8333rel_semijoin_use_fk(int *changes, mvc *sql, sql_rel *rel)
8334{
8335 (void)changes;
8336 if (is_semi(rel->op) && rel->exps) {
8337 list *exps = rel->exps;
8338 list *rels = new_rel_list(sql->sa);
8339
8340 rel->exps = NULL;
8341 append(rels, rel->l);
8342 append(rels, rel->r);
8343 (void) find_fk( sql, rels, exps);
8344
8345 rel->exps = exps;
8346 }
8347 return rel;
8348}
8349
8350/* leftouterjoin(a,b)[ a.C op b.D or a.E op2 b.F ]) ->
8351 * union(
8352 * join(a,b)[ a.C op b.D or a.E op2 b. F ],
8353 * project(
8354 * antijoin(a,b) [a.C op b.D or a.E op2 b.F ])
8355 * [ a.*, NULL * foreach column of b]
8356 * )
8357 */
8358static int
8359exps_nr_of_or(list *exps)
8360{
8361 int ors = 0;
8362 node *n;
8363
8364 if (!exps)
8365 return ors;
8366 for(n=exps->h; n; n = n->next) {
8367 sql_exp *e = n->data;
8368
8369 if (e->type == e_cmp && e->flag == cmp_or)
8370 ors++;
8371 }
8372 return ors;
8373}
8374
8375static void
8376add_nulls(mvc *sql, sql_rel *rel, sql_rel *r)
8377{
8378 list *exps;
8379 node *n;
8380
8381 exps = rel_projections(sql, r, NULL, 1, 1);
8382 for(n = exps->h; n; n = n->next) {
8383 sql_exp *e = n->data, *ne;
8384
8385 ne = exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(e), NULL));
8386 exp_setname(sql->sa, ne, exp_relname(e), exp_name(e));
8387 append(rel->exps, ne);
8388 }
8389}
8390
8391static sql_rel *
8392rel_split_outerjoin(int *changes, mvc *sql, sql_rel *rel)
8393{
8394 if ((rel->op == op_left || rel->op == op_right || rel->op == op_full) &&
8395 list_length(rel->exps) == 1 && exps_nr_of_or(rel->exps) == list_length(rel->exps)) {
8396 sql_rel *l = rel->l, *nl, *nll, *nlr;
8397 sql_rel *r = rel->r, *nr;
8398 sql_exp *e;
8399 list *exps;
8400
8401 nll = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_join);
8402 nlr = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_join);
8403
8404 /* TODO find or exp, ie handle rest with extra joins */
8405 /* expect only a single or expr for now */
8406 assert(list_length(rel->exps) == 1);
8407 e = rel->exps->h->data;
8408 nll->exps = exps_copy(sql, e->l);
8409 nlr->exps = exps_copy(sql, e->r);
8410 nl = rel_or( sql, NULL, nll, nlr, NULL, NULL, NULL);
8411
8412 if (rel->op == op_left || rel->op == op_full) {
8413 /* split in 2 anti joins */
8414 nr = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_anti);
8415 nr->exps = exps_copy(sql, e->l);
8416 nr = rel_crossproduct(sql->sa, nr, rel_dup(r), op_anti);
8417 nr->exps = exps_copy(sql, e->r);
8418
8419 /* project left */
8420 nr = rel_project(sql->sa, nr,
8421 rel_projections(sql, l, NULL, 1, 1));
8422 /* add null's for right */
8423 add_nulls( sql, nr, r);
8424 exps = rel_projections(sql, nl, NULL, 1, 1);
8425 nl = rel_setop(sql->sa, nl, nr, op_union);
8426 nl->exps = exps;
8427 set_processed(nl);
8428 }
8429 if (rel->op == op_right || rel->op == op_full) {
8430 /* split in 2 anti joins */
8431 nr = rel_crossproduct(sql->sa, rel_dup(r), rel_dup(l), op_anti);
8432 nr->exps = exps_copy(sql, e->l);
8433 nr = rel_crossproduct(sql->sa, nr, rel_dup(l), op_anti);
8434 nr->exps = exps_copy(sql, e->r);
8435
8436 nr = rel_project(sql->sa, nr, sa_list(sql->sa));
8437 /* add null's for left */
8438 add_nulls( sql, nr, l);
8439 /* project right */
8440 nr->exps = list_merge(nr->exps,
8441 rel_projections(sql, r, NULL, 1, 1),
8442 (fdup)NULL);
8443 exps = rel_projections(sql, nl, NULL, 1, 1);
8444 nl = rel_setop(sql->sa, nl, nr, op_union);
8445 nl->exps = exps;
8446 set_processed(nl);
8447 }
8448
8449 rel_destroy(rel);
8450 *changes = 1;
8451 rel = nl;
8452 }
8453 return rel;
8454}
8455
8456/* rewrite sqltype into backend types */
8457static sql_rel *
8458rel_rewrite_types(int *changes, mvc *sql, sql_rel *rel)
8459{
8460 (void)sql;
8461 (void)changes;
8462 return rel;
8463}
8464
8465static sql_exp *
8466exp_indexcol(mvc *sql, sql_exp *e, const char *tname, const char *cname, int de, bit unique)
8467{
8468 sql_subtype *rt = sql_bind_localtype(de==1?"bte":de==2?"sht":"int");
8469 sql_exp *u = exp_atom_bool(sql->sa, unique);
8470 sql_subfunc *f = sql_bind_func_result(sql->sa, mvc_bind_schema(sql,"sys"), "index", exp_subtype(e), exp_subtype(u), rt);
8471
8472 e = exp_binop(sql->sa, e, u, f);
8473 exp_setname(sql->sa, e, tname, cname);
8474 return e;
8475}
8476
8477static sql_exp *
8478exp_stringscol(mvc *sql, sql_exp *e, const char *tname, const char *cname)
8479{
8480 sql_subfunc *f = sql_bind_func(sql->sa, mvc_bind_schema(sql,"sys"), "strings", exp_subtype(e), NULL, F_FUNC);
8481
8482 e = exp_unop(sql->sa, e, f);
8483 exp_setname(sql->sa, e, tname, cname);
8484 return e;
8485}
8486
8487static sql_rel *
8488rel_dicttable(mvc *sql, sql_column *c, const char *tname, int de)
8489{
8490 sql_rel *rel = rel_create(sql->sa);
8491 sql_exp *e, *ie;
8492 int nr = 0;
8493 char name[16], *nme;
8494 if(!rel)
8495 return NULL;
8496
8497 e = exp_column(sql->sa, tname, c->base.name, &c->type, CARD_MULTI, c->null, 0);
8498 rel->l = NULL;
8499 rel->r = c;
8500 rel->op = op_basetable;
8501 rel->exps = new_exp_list(sql->sa);
8502
8503 ie = exp_indexcol(sql, e, tname, c->base.name, de, 1);
8504 nr = ++sql->label;
8505 nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr));
8506 exp_setname(sql->sa, ie, nme, nme);
8507 append(rel->exps, ie);
8508
8509 ie = exp_stringscol(sql, e, tname, c->base.name);
8510 nr = ++sql->label;
8511 nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr));
8512 exp_setname(sql->sa, ie, nme, nme);
8513 append(rel->exps, ie);
8514 e->p = prop_create(sql->sa, PROP_HASHCOL, e->p);
8515
8516 rel->card = CARD_MULTI;
8517 rel->nrcols = 2;
8518 return rel;
8519}
8520
8521/* rewrite merge tables into union of base tables and call optimizer again */
8522static sql_rel *
8523rel_add_dicts(int *changes, mvc *sql, sql_rel *rel)
8524{
8525 if (is_basetable(rel->op) && rel->l) {
8526 node *n;
8527 sql_table *t = rel->l;
8528 list *l = sa_list(sql->sa), *vcols = NULL, *pexps = sa_list(sql->sa);
8529
8530 for (n = rel->exps->h; n; n = n->next) {
8531 sql_exp *e = n->data, *ne = NULL;
8532 const char *rname = exp_relname(e)?exp_relname(e):e->l;
8533 const char *oname = e->r;
8534 int de;
8535
8536 if (!is_func(e->type) && oname[0] != '%') {
8537 sql_column *c = find_sql_column(t, oname);
8538
8539 if (EC_VARCHAR(c->type.type->eclass) && (de = store_funcs.double_elim_col(sql->session->tr, c)) != 0) {
8540 int nr = ++sql->label;
8541 char name[16], *nme;
8542 sql_rel *vt = rel_dicttable(sql, c, rname, de);
8543
8544 nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr));
8545 if (!vcols)
8546 vcols = sa_list(sql->sa);
8547 append(vcols, vt);
8548 e = exp_indexcol(sql, e, nme, nme, de, 0);
8549 ne = exp_ref(sql->sa, e);
8550 append(vcols, ne);
8551 append(vcols, n->data);
8552 (*changes)++;
8553 }
8554 }
8555 list_append(l, e);
8556 if (!ne)
8557 list_append(pexps, e);
8558 }
8559 rel->exps = l;
8560
8561 /* add joins for double_eliminated (large) columns */
8562 if (vcols) {
8563 node *n;
8564
8565 for(n = vcols->h; n; n = n->next->next->next) {
8566 sql_rel *vt = n->data;
8567 sql_exp *ic = n->next->data, *vti = NULL, *vtv;
8568 sql_exp *c = n->next->next->data, *cmp;
8569 const char *rname = exp_relname(c)?exp_relname(c):c->l;
8570 const char *oname = c->r;
8571
8572 rel = rel_crossproduct(sql->sa, rel, vt, op_join);
8573 vti = vt->exps->h->data;
8574 vtv = vt->exps->h->next->data;
8575 vti = exp_ref(sql->sa, vti);
8576 cmp = exp_compare(sql->sa, ic, vti, cmp_equal);
8577 cmp->p = prop_create(sql->sa, PROP_FETCH, cmp->p);
8578 rel_join_add_exp( sql->sa, rel, cmp);
8579
8580 vtv = exp_ref(sql->sa, vtv);
8581 exp_setname(sql->sa, vtv, rname, oname);
8582 append(pexps, vtv);
8583 }
8584 rel = rel_project(sql->sa, rel, pexps);
8585 }
8586 }
8587 return rel;
8588}
8589
8590static int
8591find_col_exp( list *exps, sql_exp *e)
8592{
8593 node *n;
8594 int nr = 0;
8595
8596 for (n=exps->h; n; n=n->next, nr++){
8597 if (n->data == e)
8598 return nr;
8599 }
8600 return -1;
8601}
8602
8603static int
8604exp_range_overlap( mvc *sql, sql_exp *e, char *min, char *max, atom *emin, atom *emax)
8605{
8606 sql_subtype *t = exp_subtype(e);
8607
8608 if (!min || !max || !emin || !emax)
8609 return 0;
8610
8611 if (GDK_STRNIL(min))
8612 return 0;
8613 if (GDK_STRNIL(max))
8614 return 0;
8615
8616 if (t->type->localtype == TYPE_dbl) {
8617 atom *cmin = atom_general(sql->sa, t, min);
8618 atom *cmax = atom_general(sql->sa, t, max);
8619
8620 if (emax->d < cmin->data.val.dval || emin->d > cmax->data.val.dval)
8621 return 0;
8622 }
8623 if (t->type->localtype == TYPE_bte) {
8624 atom *cmin = atom_general(sql->sa, t, min);
8625 atom *cmax = atom_general(sql->sa, t, max);
8626
8627 if (emax->data.val.btval < cmin->data.val.btval || emin->data.val.btval > cmax->data.val.btval)
8628 return 0;
8629 }
8630 if (t->type->localtype == TYPE_sht) {
8631 atom *cmin = atom_general(sql->sa, t, min);
8632 atom *cmax = atom_general(sql->sa, t, max);
8633
8634 if (emax->data.val.shval < cmin->data.val.shval || emin->data.val.shval > cmax->data.val.shval)
8635 return 0;
8636 }
8637 if (t->type->localtype == TYPE_int || t->type->localtype == TYPE_date) {
8638 atom *cmin = atom_general(sql->sa, t, min);
8639 atom *cmax = atom_general(sql->sa, t, max);
8640
8641 if (emax->data.val.ival < cmin->data.val.ival || emin->data.val.ival > cmax->data.val.ival)
8642 return 0;
8643 }
8644 if (t->type->localtype == TYPE_lng || t->type->localtype == TYPE_timestamp) {
8645 atom *cmin = atom_general(sql->sa, t, min);
8646 atom *cmax = atom_general(sql->sa, t, max);
8647
8648 if (emax->data.val.lval < cmin->data.val.lval || emin->data.val.lval > cmax->data.val.lval)
8649 return 0;
8650 }
8651 return 1;
8652}
8653
8654static sql_rel *
8655rel_rename_part(mvc *sql, sql_rel *p, char *tname, sql_table *mt)
8656{
8657 node *n, *m;
8658
8659 assert(list_length(p->exps) >= list_length(mt->columns.set));
8660 for( n = p->exps->h, m = mt->columns.set->h; n && m; n = n->next, m = m->next) {
8661 sql_exp *ne = n->data;
8662 sql_column *c = m->data;
8663
8664 exp_setname(sql->sa, ne, tname, c->base.name);
8665 }
8666 if (n) /* skip TID */
8667 n = n->next;
8668 if (mt->idxs.set) {
8669 /* also possible index name mismatches */
8670 for( m = mt->idxs.set->h; n && m; m = m->next) {
8671 sql_exp *ne = n->data;
8672 sql_idx *i = m->data;
8673 char *iname = NULL;
8674
8675 if (hash_index(i->type) && list_length(i->columns) <= 1)
8676 continue;
8677
8678 iname = sa_strconcat( sql->sa, "%", i->base.name);
8679 exp_setname(sql->sa, ne, tname, iname);
8680 n = n->next;
8681 }
8682 }
8683 return p;
8684}
8685
8686/* rewrite merge tables into union of base tables and call optimizer again */
8687static sql_rel *
8688rel_merge_table_rewrite(int *changes, mvc *sql, sql_rel *rel)
8689{
8690 sql_rel *sel = NULL;
8691
8692 if(is_modify(rel->op)) {
8693 sql_query *query = query_create(sql);
8694 return rel_propagate(query, rel, changes);
8695 } else {
8696 if (is_select(rel->op) && rel->l) {
8697 sel = rel;
8698 rel = rel->l;
8699 }
8700 if (is_basetable(rel->op) && rel->l) {
8701 sql_table *t = rel->l;
8702
8703 if (isMergeTable(t)) {
8704 /* instantiate merge table */
8705 sql_rel *nrel = NULL;
8706 char *tname = t->base.name;
8707 list *cols = NULL, *low = NULL, *high = NULL;
8708
8709 if (list_empty(t->members.set))
8710 return rel;
8711 if (sel) {
8712 node *n;
8713
8714 /* no need to reduce the tables list */
8715 if (list_length(t->members.set) <= 1)
8716 return sel;
8717
8718 cols = sa_list(sql->sa);
8719 low = sa_list(sql->sa);
8720 high = sa_list(sql->sa);
8721 for(n = sel->exps->h; n; n = n->next) {
8722 sql_exp *e = n->data;
8723 atom *lval = NULL, *hval = NULL;
8724
8725 if (e->type == e_cmp && (e->flag == cmp_equal || e->f )) {
8726 sql_exp *l = e->r;
8727 sql_exp *h = e->f;
8728 sql_exp *c = e->l;
8729
8730 c = rel_find_exp(rel, c);
8731 lval = exp_flatten(sql, l);
8732 if (!h)
8733 hval = lval;
8734 else if (h)
8735 hval = exp_flatten(sql, h);
8736 if (c && lval && hval) {
8737 append(cols, c);
8738 append(low, lval);
8739 append(high, hval);
8740 }
8741 }
8742 /* handle in lists */
8743 if (e->type == e_cmp && e->flag == cmp_in) {
8744 list *vals = e->r;
8745 sql_exp *c = e->l;
8746 node *n;
8747 list *vlist = sa_list(sql->sa);
8748
8749 c = rel_find_exp(rel, c);
8750 if (c) {
8751 for ( n = vals->h; n; n = n->next) {
8752 sql_exp *l = n->data;
8753 atom *lval = exp_flatten(sql, l);
8754
8755 if (!lval)
8756 break;
8757 append(vlist, lval);
8758 }
8759 if (!n) {
8760 append(cols, c);
8761 append(low, NULL); /* mark high as value list */
8762 append(high, vlist);
8763 }
8764 }
8765 }
8766 }
8767 }
8768 (*changes)++;
8769 if (t->members.set) {
8770 list *tables = sa_list(sql->sa);
8771 node *nt;
8772 int *pos = NULL, nr = list_length(rel->exps), first = 1;
8773
8774 /* rename (mostly the idxs) */
8775 pos = SA_NEW_ARRAY(sql->sa, int, nr);
8776 memset(pos, 0, sizeof(int)*nr);
8777 for (nt = t->members.set->h; nt; nt = nt->next) {
8778 sql_part *pd = nt->data;
8779 sql_table *pt = find_sql_table(t->s, pd->base.name);
8780 sql_rel *prel = rel_basetable(sql, pt, tname);
8781 node *n;
8782 int skip = 0, j;
8783 list *exps = NULL;
8784
8785 /* do not include empty partitions */
8786 if ((nrel || nt->next) &&
8787 pt && isTable(pt) && pt->access == TABLE_READONLY && !store_funcs.count_col(sql->session->tr, pt->columns.set->h->data, 1)){
8788 continue;
8789 }
8790
8791 prel = rel_rename_part(sql, prel, tname, t);
8792
8793 MT_lock_set(&prel->exps->ht_lock);
8794 prel->exps->ht = NULL;
8795 MT_lock_unset(&prel->exps->ht_lock);
8796 exps = sa_list(sql->sa);
8797 for (n = rel->exps->h, j=0; n && (!skip || first); n = n->next, j++) {
8798 sql_exp *e = n->data, *ne = NULL;
8799 int i;
8800
8801 if (e)
8802 ne = exps_bind_column2(prel->exps, e->l, e->r);
8803 if (!e || !ne) {
8804 (*changes)--;
8805 assert(0);
8806 return rel;
8807 }
8808 if (pt && isTable(pt) && pt->access == TABLE_READONLY && sel && (nrel || nt->next) &&
8809 ((first && (i=find_col_exp(cols, e)) != -1) ||
8810 (!first && pos[j] > 0))) {
8811 /* check if the part falls within the bounds of the select expression else skip this (keep at least on part-table) */
8812 char *min, *max;
8813 sql_column *col = NULL;
8814 sql_rel *bt = NULL;
8815
8816 if (first)
8817 pos[j] = i + 1;
8818 i = pos[j] - 1;
8819 col = name_find_column(prel, e->l, e->r, -2, &bt);
8820 assert(col);
8821 if (sql_trans_ranges(sql->session->tr, col, &min, &max)) {
8822 atom *lval = list_fetch(low,i);
8823 atom *hval = list_fetch(high,i);
8824
8825 if (lval && !exp_range_overlap(sql, e, min, max, lval, hval))
8826 skip = 1;
8827 else if (!lval) {
8828 node *n;
8829 list *l = list_fetch(high,i);
8830
8831 skip = 1;
8832 for (n = l->h; n && skip; n = n->next) {
8833 hval = lval = n->data;
8834
8835 if (exp_range_overlap(sql, e, min, max, lval, hval))
8836 skip = 0;
8837 }
8838 }
8839 }
8840 }
8841 assert(e->type == e_column);
8842 exp_setname(sql->sa, ne, e->l, e->r);
8843 append(exps, ne);
8844 }
8845 prel->exps = exps;
8846 first = 0;
8847 if (!skip) {
8848 append(tables, prel);
8849 nrel = prel;
8850 } else {
8851 sql->caching = 0;
8852 }
8853 }
8854 while (list_length(tables) > 1) {
8855 list *ntables = sa_list(sql->sa);
8856 node *n;
8857
8858 for(n=tables->h; n && n->next; n = n->next->next) {
8859 sql_rel *l = n->data;
8860 sql_rel *r = n->next->data;
8861 nrel = rel_setop(sql->sa, l, r, op_union);
8862 nrel->exps = rel_projections(sql, rel, NULL, 1, 1);
8863 set_processed(nrel);
8864 append(ntables, nrel);
8865 }
8866 if (n)
8867 append(ntables, n->data);
8868 tables = ntables;
8869 }
8870 }
8871 if (nrel && list_length(t->members.set) == 1) {
8872 nrel = rel_project(sql->sa, nrel, rel->exps);
8873 } else if (nrel)
8874 nrel->exps = rel->exps;
8875 rel_destroy(rel);
8876 if (sel) {
8877 int changes = 0;
8878 sel->l = nrel;
8879 sel = rewrite_topdown(sql, sel, &rel_push_select_down_union, &changes);
8880 if (changes)
8881 sel = rewrite(sql, sel, &rel_push_project_up, &changes);
8882 return sel;
8883 }
8884 return nrel;
8885 }
8886 }
8887 }
8888 if (sel)
8889 return sel;
8890 return rel;
8891}
8892
8893static sql_rel*
8894exp_skip_output_parts(sql_rel *rel)
8895{
8896 while ((is_topn(rel->op) || is_project(rel->op) || is_sample(rel->op)) && rel->l) {
8897 if (rel->op == op_groupby && list_empty(rel->r))
8898 return rel; /* a group-by with no columns is a plain aggregate and hence always returns one row */
8899 rel = rel->l;
8900 }
8901 return rel;
8902}
8903
8904/* return true if the given expression is guaranteed to have no rows */
8905static int
8906exp_is_zero_rows(mvc *sql, sql_rel *rel, sql_rel *sel)
8907{
8908 sql_table *t;
8909 node *n;
8910
8911 if (!rel)
8912 return 0;
8913 rel = exp_skip_output_parts(rel);
8914 if (is_select(rel->op) && rel->l) {
8915 sel = rel;
8916 rel = exp_skip_output_parts(rel->l);
8917 }
8918 if (!sel)
8919 return 0;
8920 if (rel->op == op_join)
8921 return exp_is_zero_rows(sql, rel->l, sel) || exp_is_zero_rows(sql, rel->r, sel);
8922 if (rel->op == op_left || is_semi(rel->op))
8923 return exp_is_zero_rows(sql, rel->l, sel);
8924 if (rel->op == op_right)
8925 return exp_is_zero_rows(sql, rel->r, sel);
8926 if (!is_basetable(rel->op) || !rel->l)
8927 return 0;
8928 t = rel->l;
8929 if (!isTable(t) || t->access != TABLE_READONLY)
8930 return 0;
8931
8932 if (sel->exps) for (n = sel->exps->h; n; n = n->next) {
8933 sql_exp *e = n->data;
8934 atom *lval = NULL, *hval = NULL;
8935
8936 if (e->type == e_cmp && (e->flag == cmp_equal || e->f)) { /* half-ranges are theoretically optimizable here, but not implemented */
8937 sql_exp *c = e->l;
8938 if (c->type == e_column) {
8939 sql_exp *l = e->r;
8940 sql_exp *h = e->f;
8941
8942 lval = exp_flatten(sql, l);
8943 hval = h ? exp_flatten(sql, h) : lval;
8944 if (lval && hval) {
8945 sql_rel *bt;
8946 sql_column *col = name_find_column(sel, exp_relname(c), exp_name(c), -2, &bt);
8947 char *min, *max;
8948 if (col
8949 && col->t == t
8950 && sql_trans_ranges(sql->session->tr, col, &min, &max)
8951 && !exp_range_overlap(sql, c, min, max, lval, hval)) {
8952 return 1;
8953 }
8954 }
8955 }
8956 }
8957 }
8958 return 0;
8959}
8960
8961/* discard sides of UNION or UNION ALL which cannot produce any rows, as per
8962statistics, similarly to the merge table optimizer, e.g.
8963 select * from a where x between 1 and 2 union all select * from b where x between 1 and 2
8964-> select * from b where x between 1 and 2 [assuming a has no rows with 1<=x<=2]
8965*/
8966static sql_rel *
8967rel_remove_union_partitions(int *changes, mvc *sql, sql_rel *rel)
8968{
8969 if (!is_union(rel->op))
8970 return rel;
8971 if (exp_is_zero_rows(sql, rel->l, NULL)) {
8972 sql_rel *r = rel->r;
8973 rel_rename_exps(sql, rel->exps, r->exps);
8974 rel->r = NULL;
8975 rel_destroy(rel);
8976 (*changes)++;
8977 sql->caching = 0;
8978 return r;
8979 }
8980 if (exp_is_zero_rows(sql, rel->r, NULL)) {
8981 sql_rel *l = rel->l;
8982 rel_rename_exps(sql, rel->exps, l->exps);
8983 rel->l = NULL;
8984 rel_destroy(rel);
8985 (*changes)++;
8986 sql->caching = 0;
8987 return l;
8988 }
8989 return rel;
8990}
8991
8992static sql_exp *
8993rewrite_exp(mvc *sql, sql_exp *e, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes)
8994{
8995 if (e->type != e_psm)
8996 return e;
8997 if (e->flag & PSM_VAR)
8998 return e;
8999 if (e->flag & PSM_SET || e->flag & PSM_RETURN) {
9000 e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes);
9001 }
9002 if (e->flag & PSM_WHILE || e->flag & PSM_IF) {
9003 e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes);
9004 e->r = rewrite_exps(sql, e->r, rewrite_rel, rewriter, has_changes);
9005 if (e->f)
9006 e->f = rewrite_exps(sql, e->f, rewrite_rel, rewriter, has_changes);
9007 return e;
9008 }
9009 if (e->flag & PSM_REL)
9010 e->l = rewrite_rel(sql, e->l, rewriter, has_changes);
9011 if (e->flag & PSM_EXCEPTION)
9012 e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes);
9013 return e;
9014}
9015
9016static list *
9017rewrite_exps(mvc *sql, list *l, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes)
9018{
9019 node *n;
9020
9021 if (!l)
9022 return l;
9023 for(n = l->h; n; n = n->next)
9024 n->data = rewrite_exp(sql, n->data, rewrite_rel, rewriter, has_changes);
9025 return l;
9026}
9027
9028
9029static sql_rel *
9030rewrite(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes)
9031{
9032 int changes = 0;
9033
9034 if (!rel)
9035 return rel;
9036
9037 switch (rel->op) {
9038 case op_basetable:
9039 case op_table:
9040 break;
9041 case op_join:
9042 case op_left:
9043 case op_right:
9044 case op_full:
9045
9046 case op_semi:
9047 case op_anti:
9048
9049 case op_union:
9050 case op_inter:
9051 case op_except:
9052 rel->l = rewrite(sql, rel->l, rewriter, has_changes);
9053 rel->r = rewrite(sql, rel->r, rewriter, has_changes);
9054 break;
9055 case op_project:
9056 case op_select:
9057 case op_groupby:
9058 case op_topn:
9059 case op_sample:
9060 rel->l = rewrite(sql, rel->l, rewriter, has_changes);
9061 break;
9062 case op_ddl:
9063 if (rel->flag == ddl_psm && rel->exps)
9064 rel->exps = rewrite_exps(sql, rel->exps, &rewrite, rewriter, has_changes);
9065 rel->l = rewrite(sql, rel->l, rewriter, has_changes);
9066 if (rel->r)
9067 rel->r = rewrite(sql, rel->r, rewriter, has_changes);
9068 break;
9069 case op_insert:
9070 case op_update:
9071 case op_delete:
9072 case op_truncate:
9073 rel->l = rewrite(sql, rel->l, rewriter, has_changes);
9074 rel->r = rewrite(sql, rel->r, rewriter, has_changes);
9075 break;
9076 }
9077 rel = rewriter(&changes, sql, rel);
9078 if (changes) {
9079 (*has_changes)++;
9080 return rewrite(sql, rel, rewriter, has_changes);
9081 }
9082 return rel;
9083}
9084
9085static sql_rel *
9086rewrite_topdown(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes)
9087{
9088 if (!rel)
9089 return rel;
9090
9091 rel = rewriter(has_changes, sql, rel);
9092 if (!rel)
9093 return rel;
9094
9095 switch (rel->op) {
9096 case op_basetable:
9097 case op_table:
9098 if (rel->op == op_table && rel->l && rel->flag != 2)
9099 rel->l = rewrite(sql, rel->l, rewriter, has_changes);
9100 if (rel->op == op_table && rel->l && rel->flag != 2)
9101 rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes);
9102 break;
9103 case op_join:
9104 case op_left:
9105 case op_right:
9106 case op_full:
9107
9108 case op_semi:
9109 case op_anti:
9110
9111 case op_union:
9112 case op_inter:
9113 case op_except:
9114 rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes);
9115 rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes);
9116 break;
9117 case op_project:
9118 case op_select:
9119 case op_groupby:
9120 case op_topn:
9121 case op_sample:
9122 rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes);
9123 break;
9124 case op_ddl:
9125 if (rel->flag == ddl_psm && rel->exps)
9126 rewrite_exps(sql, rel->exps, &rewrite_topdown, rewriter, has_changes);
9127 rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes);
9128 if (rel->r)
9129 rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes);
9130 break;
9131 case op_insert:
9132 case op_update:
9133 case op_delete:
9134 case op_truncate:
9135 rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes);
9136 rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes);
9137 break;
9138 }
9139 return rel;
9140}
9141
9142static sql_rel *
9143optimize_rel(mvc *sql, sql_rel *rel, int *g_changes, int level, int value_based_opt)
9144{
9145 int changes = 0, e_changes = 0;
9146 global_props gp;
9147
9148 gp = (global_props) {.cnt = {0},};
9149 rel_properties(sql, &gp, rel);
9150
9151#ifdef DEBUG
9152{
9153 int i;
9154 for (i = 0; i < ddl_maxops; i++) {
9155 if (gp.cnt[i]> 0)
9156 printf("%s %d\n", op2string((operator_type)i), gp.cnt[i]);
9157 }
9158}
9159#endif
9160 if (level <= 0 && gp.cnt[op_select])
9161 rel = rel_split_select(&changes, sql, rel, 1);
9162
9163 /* simple merging of projects */
9164 if (gp.cnt[op_project] || gp.cnt[op_groupby] || gp.cnt[op_ddl]) {
9165 rel = rewrite(sql, rel, &rel_merge_projects, &changes);
9166
9167 /* push (simple renaming) projections up */
9168 if (gp.cnt[op_project])
9169 rel = rewrite(sql, rel, &rel_push_project_up, &changes);
9170 if (level <= 0 && (gp.cnt[op_project] || gp.cnt[op_groupby]))
9171 rel = rel_split_project(&changes, sql, rel, 1);
9172
9173 if (level <= 0) {
9174 rel = rel_case_fixup(&changes, sql, rel, 1);
9175 if (value_based_opt)
9176 rel = rewrite(sql, rel, &rel_simplify_math, &changes);
9177 rel = rewrite(sql, rel, &rel_distinct_aggregate_on_unique_values, &changes);
9178 rel = rewrite(sql, rel, &rel_distinct_project2groupby, &changes);
9179 }
9180 }
9181
9182 if ((gp.cnt[op_select] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] ||
9183 gp.cnt[op_join] || gp.cnt[op_semi] || gp.cnt[op_anti]) && level <= 0)
9184 if (value_based_opt)
9185 rel = rewrite(sql, rel, &rel_simplify_predicates, &changes);
9186
9187 /* join's/crossproducts between a relation and a constant (row).
9188 * could be rewritten
9189 *
9190 * also joins between a relation and a DICT (which isn't used)
9191 * could be removed.
9192 * */
9193 if (gp.cnt[op_join] && gp.cnt[op_project] && /* DISABLES CODE */ (0))
9194 rel = rewrite(sql, rel, &rel_remove_join, &changes);
9195
9196 if (gp.cnt[op_join] ||
9197 gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] ||
9198 gp.cnt[op_semi] || gp.cnt[op_anti] ||
9199 gp.cnt[op_select]) {
9200 rel = rewrite(sql, rel, &rel_find_range, &changes);
9201 if (value_based_opt) {
9202 rel = rel_project_reduce_casts(&changes, sql, rel);
9203 rel = rewrite(sql, rel, &rel_reduce_casts, &changes);
9204 }
9205 }
9206
9207 if (gp.cnt[op_union])
9208 rel = rewrite(sql, rel, &rel_merge_union, &changes);
9209
9210 if (gp.cnt[op_select] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] ||
9211 gp.cnt[op_anti] || gp.cnt[op_join] || gp.cnt[op_semi])
9212 rel = rewrite(sql, rel, &rel_select_cse, &changes);
9213
9214 if (gp.cnt[op_project])
9215 rel = rewrite(sql, rel, &rel_project_cse, &changes);
9216
9217 rel = rewrite(sql, rel, &rel_rewrite_types, &changes);
9218
9219 if ((gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full]) && /* DISABLES CODE */ (0))
9220 rel = rewrite_topdown(sql, rel, &rel_split_outerjoin, &changes);
9221
9222 if (gp.cnt[op_select] || gp.cnt[op_project])
9223 if (level == 1) /* only once */
9224 rel = rewrite(sql, rel, &rel_merge_rse, &changes);
9225
9226 if (gp.cnt[op_select] && gp.cnt[op_join] && /* DISABLES CODE */ (0))
9227 rel = rewrite_topdown(sql, rel, &rel_push_select_down_join, &changes);
9228
9229 if (gp.cnt[op_select])
9230 rel = rewrite_topdown(sql, rel, &rel_push_select_down_union, &changes);
9231
9232 if (gp.cnt[op_union] && gp.cnt[op_select])
9233 rel = rewrite(sql, rel, &rel_remove_union_partitions, &changes);
9234
9235 if (gp.cnt[op_select])
9236 rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
9237
9238 if (gp.cnt[op_groupby]) {
9239 rel = rewrite_topdown(sql, rel, &rel_push_aggr_down, &changes);
9240 rel = rewrite_topdown(sql, rel, &rel_push_groupby_down, &changes);
9241 rel = rewrite(sql, rel, &rel_groupby_order, &changes);
9242 rel = rewrite(sql, rel, &rel_reduce_groupby_exps, &changes);
9243 rel = rewrite(sql, rel, &rel_groupby_distinct, &changes);
9244 }
9245
9246 if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || gp.cnt[op_semi] || gp.cnt[op_anti]) {
9247 rel = rel_remove_empty_join(sql, rel, &changes);
9248 if (!gp.cnt[op_update])
9249 rel = rel_join_order(sql, rel);
9250 rel = rewrite(sql, rel, &rel_push_join_down_union, &changes);
9251 /* rel_join_order may introduce empty selects */
9252 rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
9253
9254 if (level <= 0)
9255 rel = rewrite(sql, rel, &rel_join_push_exps_down, &changes);
9256
9257 rel = rewrite(sql, rel, &rel_merge_identical_joins, &e_changes);
9258 }
9259
9260 /* Important -> Re-write semijoins after rel_join_order */
9261 if ((gp.cnt[op_join] || gp.cnt[op_semi] || gp.cnt[op_anti]) && gp.cnt[op_groupby]) {
9262 rel = rewrite_topdown(sql, rel, &rel_push_count_down, &changes);
9263 if (level <= 0)
9264 rel = rewrite_topdown(sql, rel, &rel_push_join_down, &changes);
9265
9266 /* push_join_down introduces semijoins */
9267 /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
9268 rel = rewrite(sql, rel, &rel_rewrite_semijoin, &changes);
9269 }
9270
9271 if (gp.cnt[op_anti] || gp.cnt[op_semi]) {
9272 /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */
9273 rel = rewrite(sql, rel, &rel_rewrite_semijoin, &changes);
9274 /* push semijoin through join */
9275 rel = rewrite(sql, rel, &rel_push_semijoin_down_or_up, &changes);
9276 /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */
9277 rel = rewrite(sql, rel, &rel_rewrite_antijoin, &changes);
9278 if (level <= 0)
9279 rel = rewrite_topdown(sql, rel, &rel_semijoin_use_fk, &changes);
9280 }
9281
9282 /* Important -> Make sure rel_push_select_down gets called after rel_join_order,
9283 because pushing down select expressions makes rel_join_order more difficult */
9284 if (gp.cnt[op_select] || gp.cnt[op_semi]) {
9285 rel = rewrite_topdown(sql, rel, &rel_push_select_down, &changes);
9286 rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
9287 }
9288
9289 if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || gp.cnt[op_semi] || gp.cnt[op_anti]) {
9290 rel = rewrite_topdown(sql, rel, &rel_simplify_fk_joins, &changes);
9291 }
9292
9293 if (gp.cnt[op_select] && sql->emode != m_prepare)
9294 rel = rewrite(sql, rel, &rel_simplify_like_select, &changes);
9295
9296 if (gp.cnt[op_select])
9297 rel = rewrite(sql, rel, &rel_select_order, &changes);
9298
9299 if (gp.cnt[op_select] || gp.cnt[op_join])
9300 rel = rewrite(sql, rel, &rel_use_index, &changes);
9301
9302 if (gp.cnt[op_project])
9303 rel = rewrite_topdown(sql, rel, &rel_push_project_down_union, &changes);
9304
9305 /* Remove unused expressions */
9306 if (level <= 0)
9307 rel = rel_dce(sql, rel);
9308
9309 if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] ||
9310 gp.cnt[op_semi] || gp.cnt[op_anti] || gp.cnt[op_select]) {
9311 rel = rewrite(sql, rel, &rel_push_func_down, &changes);
9312 rel = rewrite_topdown(sql, rel, &rel_push_select_down, &changes);
9313 rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
9314 }
9315
9316 if (!changes && gp.cnt[op_topn]) {
9317 rel = rewrite_topdown(sql, rel, &rel_push_topn_down, &changes);
9318 changes = 0;
9319 }
9320
9321 if (value_based_opt)
9322 rel = rewrite_topdown(sql, rel, &rel_merge_table_rewrite, &changes);
9323 if (level <= 0 && mvc_debug_on(sql,8))
9324 rel = rewrite_topdown(sql, rel, &rel_add_dicts, &changes);
9325 *g_changes = changes;
9326 return rel;
9327}
9328
9329static sql_rel *
9330optimize(mvc *sql, sql_rel *rel, int value_based_opt)
9331{
9332 list *refs = sa_list(sql->sa);
9333 node *n;
9334 int level = 0, changes = 1;
9335
9336
9337 for( ;rel && level < 20 && changes; level++)
9338 rel = optimize_rel(sql, rel, &changes, level, value_based_opt);
9339
9340 rel_dce_refs(sql, rel, refs);
9341 if (refs) {
9342 refs = rel_opt_dependencies(sql, refs);
9343 for (n = refs->h; n; n = n->next)
9344 n->data = optimize_rel(sql, n->data, &changes, 0, value_based_opt);
9345 }
9346 rel = rel_dce(sql, rel);
9347 return rel;
9348}
9349
9350sql_rel *
9351rel_optimizer(mvc *sql, sql_rel *rel, int value_based_opt)
9352{
9353 lng Tbegin = GDKusec();
9354 rel = optimize(sql, rel, value_based_opt);
9355 sql->Topt += GDKusec() - Tbegin;
9356 return rel;
9357}
9358