| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /*#define DEBUG*/ |
| 10 | |
| 11 | #include "monetdb_config.h" |
| 12 | #include "rel_optimizer.h" |
| 13 | #include "rel_rel.h" |
| 14 | #include "rel_exp.h" |
| 15 | #include "rel_prop.h" |
| 16 | #include "rel_dump.h" |
| 17 | #include "rel_planner.h" |
| 18 | #include "rel_propagate.h" |
| 19 | #include "sql_mvc.h" |
| 20 | #ifdef HAVE_HGE |
| 21 | #include "mal.h" /* for have_hge */ |
| 22 | #endif |
| 23 | #include "mtime.h" |
| 24 | |
| 25 | #define new_func_list(sa) sa_list(sa) |
| 26 | #define new_col_list(sa) sa_list(sa) |
| 27 | |
| 28 | typedef struct global_props { |
| 29 | int cnt[ddl_maxops]; |
| 30 | } global_props; |
| 31 | |
| 32 | typedef sql_rel *(*rewrite_fptr)(int *changes, mvc *sql, sql_rel *rel); |
| 33 | typedef sql_rel *(*rewrite_rel_fptr)(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes); |
| 34 | typedef int (*find_prop_fptr)(mvc *sql, sql_rel *rel); |
| 35 | |
| 36 | static sql_rel * rewrite_topdown(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes); |
| 37 | static sql_rel * rewrite(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes) ; |
| 38 | static list * rewrite_exps(mvc *sql, list *l, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes); |
| 39 | |
| 40 | static sql_rel * rel_remove_empty_select(int *changes, mvc *sql, sql_rel *rel); |
| 41 | |
| 42 | static sql_subfunc *find_func( mvc *sql, char *name, list *exps ); |
| 43 | |
| 44 | /* The important task of the relational optimizer is to optimize the |
| 45 | join order. |
| 46 | |
| 47 | The current implementation chooses the join order based on |
| 48 | select counts, ie if one of the join sides has been reduced using |
| 49 | a select this join is choosen over one without such selections. |
| 50 | */ |
| 51 | |
| 52 | /* currently we only find simple column expressions */ |
| 53 | void * |
| 54 | name_find_column( sql_rel *rel, const char *rname, const char *name, int pnr, sql_rel **bt ) |
| 55 | { |
| 56 | sql_exp *alias = NULL; |
| 57 | sql_column *c = NULL; |
| 58 | |
| 59 | switch (rel->op) { |
| 60 | case op_basetable: { |
| 61 | node *cn; |
| 62 | sql_table *t = rel->l; |
| 63 | |
| 64 | if (rel->exps) { |
| 65 | sql_exp *e; |
| 66 | |
| 67 | if (rname) |
| 68 | e = exps_bind_column2(rel->exps, rname, name); |
| 69 | else |
| 70 | e = exps_bind_column(rel->exps, name, NULL); |
| 71 | if (!e || e->type != e_column) |
| 72 | return NULL; |
| 73 | if (e->l) |
| 74 | rname = e->l; |
| 75 | name = e->r; |
| 76 | } |
| 77 | if (name && !t) |
| 78 | return rel->r; |
| 79 | if (rname && strcmp(t->base.name, rname) != 0) |
| 80 | return NULL; |
| 81 | for (cn = t->columns.set->h; cn; cn = cn->next) { |
| 82 | sql_column *c = cn->data; |
| 83 | if (strcmp(c->base.name, name) == 0) { |
| 84 | *bt = rel; |
| 85 | if (pnr < 0 || (c->t->p && |
| 86 | list_position(c->t->p->members.set, c->t) == pnr)) |
| 87 | return c; |
| 88 | } |
| 89 | } |
| 90 | if (t->idxs.set) |
| 91 | for (cn = t->idxs.set->h; cn; cn = cn->next) { |
| 92 | sql_idx *i = cn->data; |
| 93 | if (strcmp(i->base.name, name+1 /* skip % */) == 0) { |
| 94 | *bt = rel; |
| 95 | if (pnr < 0 || (i->t->p && |
| 96 | list_position(i->t->p->members.set, i->t) == pnr)) { |
| 97 | sql_kc *c = i->columns->h->data; |
| 98 | return c->c; |
| 99 | } |
| 100 | } |
| 101 | } |
| 102 | break; |
| 103 | } |
| 104 | case op_table: |
| 105 | /* table func */ |
| 106 | return NULL; |
| 107 | case op_ddl: |
| 108 | if (is_updateble(rel)) |
| 109 | return name_find_column( rel->l, rname, name, pnr, bt); |
| 110 | return NULL; |
| 111 | case op_join: |
| 112 | case op_left: |
| 113 | case op_right: |
| 114 | case op_full: |
| 115 | /* first right (possible subquery) */ |
| 116 | c = name_find_column( rel->r, rname, name, pnr, bt); |
| 117 | /* fall through */ |
| 118 | case op_semi: |
| 119 | case op_anti: |
| 120 | if (!c) |
| 121 | c = name_find_column( rel->l, rname, name, pnr, bt); |
| 122 | return c; |
| 123 | case op_select: |
| 124 | case op_topn: |
| 125 | case op_sample: |
| 126 | return name_find_column( rel->l, rname, name, pnr, bt); |
| 127 | case op_union: |
| 128 | case op_inter: |
| 129 | case op_except: |
| 130 | |
| 131 | if (pnr >= 0 || pnr == -2) { |
| 132 | /* first right (possible subquery) */ |
| 133 | c = name_find_column( rel->r, rname, name, pnr, bt); |
| 134 | if (!c) |
| 135 | c = name_find_column( rel->l, rname, name, pnr, bt); |
| 136 | return c; |
| 137 | } |
| 138 | return NULL; |
| 139 | |
| 140 | case op_project: |
| 141 | case op_groupby: |
| 142 | if (!rel->exps) |
| 143 | break; |
| 144 | if (rname) |
| 145 | alias = exps_bind_column2(rel->exps, rname, name); |
| 146 | else |
| 147 | alias = exps_bind_column(rel->exps, name, NULL); |
| 148 | if (is_groupby(rel->op) && alias && alias->type == e_column && rel->r) { |
| 149 | if (alias->l) |
| 150 | alias = exps_bind_column2(rel->r, alias->l, alias->r); |
| 151 | else |
| 152 | alias = exps_bind_column(rel->r, alias->r, NULL); |
| 153 | } |
| 154 | if (is_groupby(rel->op) && !alias && rel->l) { |
| 155 | /* Group by column not found as alias in projection |
| 156 | * list, fall back to check plain input columns */ |
| 157 | return name_find_column( rel->l, rname, name, pnr, bt); |
| 158 | } |
| 159 | break; |
| 160 | case op_insert: |
| 161 | case op_update: |
| 162 | case op_delete: |
| 163 | case op_truncate: |
| 164 | break; |
| 165 | } |
| 166 | if (alias) { /* we found an expression with the correct name, but |
| 167 | we need sql_columns */ |
| 168 | if (rel->l && alias->type == e_column) /* real alias */ |
| 169 | return name_find_column(rel->l, alias->l, alias->r, pnr, bt); |
| 170 | } |
| 171 | return NULL; |
| 172 | } |
| 173 | |
| 174 | static sql_column * |
| 175 | exp_find_column( sql_rel *rel, sql_exp *exp, int pnr ) |
| 176 | { |
| 177 | if (exp->type == e_column) { |
| 178 | sql_rel *bt = NULL; |
| 179 | return name_find_column(rel, exp->l, exp->r, pnr, &bt); |
| 180 | } |
| 181 | return NULL; |
| 182 | } |
| 183 | |
| 184 | static sql_column * |
| 185 | exp_find_column_( sql_rel *rel, sql_exp *exp, int pnr, sql_rel **bt ) |
| 186 | { |
| 187 | if (exp->type == e_column) |
| 188 | return name_find_column(rel, exp->l, exp->r, pnr, bt); |
| 189 | return NULL; |
| 190 | } |
| 191 | |
| 192 | /* find column for the select/join expression */ |
| 193 | static sql_column * |
| 194 | sjexp_col(sql_exp *e, sql_rel *r) |
| 195 | { |
| 196 | sql_column *res = NULL; |
| 197 | |
| 198 | if (e->type == e_cmp && !is_complex_exp(e->flag)) { |
| 199 | res = exp_find_column(r, e->l, -2); |
| 200 | if (!res) |
| 201 | res = exp_find_column(r, e->r, -2); |
| 202 | } |
| 203 | return res; |
| 204 | } |
| 205 | |
| 206 | static sql_exp * |
| 207 | list_find_exp( list *exps, sql_exp *e) |
| 208 | { |
| 209 | sql_exp *ne = NULL; |
| 210 | |
| 211 | if (e->type != e_column) |
| 212 | return NULL; |
| 213 | if (( e->l && (ne=exps_bind_column2(exps, e->l, e->r)) != NULL) || |
| 214 | ((!e->l && (ne=exps_bind_column(exps, e->r, NULL)) != NULL))) |
| 215 | return ne; |
| 216 | return NULL; |
| 217 | } |
| 218 | |
| 219 | static int |
| 220 | kc_column_cmp(sql_kc *kc, sql_column *c) |
| 221 | { |
| 222 | /* return on equality */ |
| 223 | return !(c == kc->c); |
| 224 | } |
| 225 | |
| 226 | static void psm_exps_properties(mvc *sql, global_props *gp, list *exps); |
| 227 | static void rel_properties(mvc *sql, global_props *gp, sql_rel *rel); |
| 228 | |
| 229 | static void |
| 230 | psm_exp_properties(mvc *sql, global_props *gp, sql_exp *e) |
| 231 | { |
| 232 | /* only functions need fix up */ |
| 233 | if (e->type == e_psm) { |
| 234 | if (e->flag & PSM_SET) { |
| 235 | psm_exp_properties(sql, gp, e->l); |
| 236 | } else if (e->flag & PSM_RETURN) { |
| 237 | psm_exp_properties(sql, gp, e->l); |
| 238 | } else if (e->flag & PSM_WHILE) { |
| 239 | psm_exp_properties(sql, gp, e->l); |
| 240 | psm_exps_properties(sql, gp, e->r); |
| 241 | } else if (e->flag & PSM_IF) { |
| 242 | psm_exp_properties(sql, gp, e->l); |
| 243 | psm_exps_properties(sql, gp, e->r); |
| 244 | if (e->f) |
| 245 | psm_exps_properties(sql, gp, e->f); |
| 246 | } else if (e->flag & PSM_REL) { |
| 247 | rel_properties(sql, gp, e->l); |
| 248 | } else if (e->flag & PSM_EXCEPTION) { |
| 249 | psm_exp_properties(sql, gp, e->l); |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | static void |
| 255 | psm_exps_properties(mvc *sql, global_props *gp, list *exps) |
| 256 | { |
| 257 | node *n; |
| 258 | |
| 259 | if (!exps) |
| 260 | return; |
| 261 | for (n = exps->h; n; n = n->next) |
| 262 | psm_exp_properties(sql, gp, n->data); |
| 263 | } |
| 264 | |
| 265 | static void |
| 266 | rel_properties(mvc *sql, global_props *gp, sql_rel *rel) |
| 267 | { |
| 268 | if(!rel) |
| 269 | return; |
| 270 | |
| 271 | gp->cnt[(int)rel->op]++; |
| 272 | switch (rel->op) { |
| 273 | case op_basetable: |
| 274 | case op_table: |
| 275 | if (rel->op == op_table && rel->l && rel->flag != 2) |
| 276 | rel_properties(sql, gp, rel->l); |
| 277 | break; |
| 278 | case op_join: |
| 279 | case op_left: |
| 280 | case op_right: |
| 281 | case op_full: |
| 282 | |
| 283 | case op_semi: |
| 284 | case op_anti: |
| 285 | |
| 286 | case op_union: |
| 287 | case op_inter: |
| 288 | case op_except: |
| 289 | rel_properties(sql, gp, rel->l); |
| 290 | rel_properties(sql, gp, rel->r); |
| 291 | break; |
| 292 | case op_project: |
| 293 | case op_select: |
| 294 | case op_groupby: |
| 295 | case op_topn: |
| 296 | case op_sample: |
| 297 | case op_ddl: |
| 298 | if (rel->op == op_ddl && rel->flag == ddl_psm && rel->exps) |
| 299 | psm_exps_properties(sql, gp, rel->exps); |
| 300 | if (rel->l) |
| 301 | rel_properties(sql, gp, rel->l); |
| 302 | break; |
| 303 | case op_insert: |
| 304 | case op_update: |
| 305 | case op_delete: |
| 306 | case op_truncate: |
| 307 | if (rel->r) |
| 308 | rel_properties(sql, gp, rel->r); |
| 309 | break; |
| 310 | } |
| 311 | |
| 312 | switch (rel->op) { |
| 313 | case op_basetable: |
| 314 | case op_table: |
| 315 | if (!find_prop(rel->p, PROP_COUNT)) |
| 316 | rel->p = prop_create(sql->sa, PROP_COUNT, rel->p); |
| 317 | break; |
| 318 | case op_join: |
| 319 | case op_left: |
| 320 | case op_right: |
| 321 | case op_full: |
| 322 | |
| 323 | case op_semi: |
| 324 | case op_anti: |
| 325 | |
| 326 | case op_union: |
| 327 | case op_inter: |
| 328 | case op_except: |
| 329 | break; |
| 330 | |
| 331 | case op_project: |
| 332 | case op_groupby: |
| 333 | case op_topn: |
| 334 | case op_sample: |
| 335 | case op_select: |
| 336 | break; |
| 337 | |
| 338 | case op_insert: |
| 339 | case op_update: |
| 340 | case op_delete: |
| 341 | case op_truncate: |
| 342 | case op_ddl: |
| 343 | break; |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | static sql_rel * rel_join_order(mvc *sql, sql_rel *rel) ; |
| 348 | |
| 349 | static void |
| 350 | get_relations(mvc *sql, sql_rel *rel, list *rels) |
| 351 | { |
| 352 | if (!rel_is_ref(rel) && rel->op == op_join && rel->exps == NULL) { |
| 353 | sql_rel *l = rel->l; |
| 354 | sql_rel *r = rel->r; |
| 355 | |
| 356 | get_relations(sql, l, rels); |
| 357 | get_relations(sql, r, rels); |
| 358 | rel->l = NULL; |
| 359 | rel->r = NULL; |
| 360 | rel_destroy(rel); |
| 361 | } else { |
| 362 | rel = rel_join_order(sql, rel); |
| 363 | append(rels, rel); |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | static void |
| 368 | get_inner_relations(mvc *sql, sql_rel *rel, list *rels) |
| 369 | { |
| 370 | if (!rel_is_ref(rel) && is_join(rel->op)) { |
| 371 | sql_rel *l = rel->l; |
| 372 | sql_rel *r = rel->r; |
| 373 | |
| 374 | get_inner_relations(sql, l, rels); |
| 375 | get_inner_relations(sql, r, rels); |
| 376 | } else { |
| 377 | append(rels, rel); |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | static int |
| 382 | exp_count(int *cnt, sql_exp *e) |
| 383 | { |
| 384 | if (!e) |
| 385 | return 0; |
| 386 | if (find_prop(e->p, PROP_JOINIDX)) |
| 387 | *cnt += 100; |
| 388 | if (find_prop(e->p, PROP_HASHCOL)) |
| 389 | *cnt += 100; |
| 390 | if (find_prop(e->p, PROP_HASHIDX)) |
| 391 | *cnt += 100; |
| 392 | switch(e->type) { |
| 393 | case e_cmp: |
| 394 | if (!is_complex_exp(e->flag)) { |
| 395 | exp_count(cnt, e->l); |
| 396 | exp_count(cnt, e->r); |
| 397 | if (e->f) |
| 398 | exp_count(cnt, e->f); |
| 399 | } |
| 400 | switch (get_cmp(e)) { |
| 401 | case cmp_equal: |
| 402 | *cnt += 90; |
| 403 | return 90; |
| 404 | case cmp_notequal: |
| 405 | *cnt += 7; |
| 406 | return 7; |
| 407 | case cmp_gt: |
| 408 | case cmp_gte: |
| 409 | case cmp_lt: |
| 410 | case cmp_lte: |
| 411 | *cnt += 6; |
| 412 | if (e->f){ /* range */ |
| 413 | *cnt += 6; |
| 414 | return 12; |
| 415 | } |
| 416 | return 6; |
| 417 | case cmp_filter: |
| 418 | if (exps_card(e->r) > CARD_AGGR) { |
| 419 | /* filters for joins are special */ |
| 420 | *cnt += 1000; |
| 421 | return 1000; |
| 422 | } |
| 423 | *cnt += 2; |
| 424 | return 2; |
| 425 | case cmp_in: |
| 426 | case cmp_notin: { |
| 427 | list *l = e->r; |
| 428 | int c = 9 - 10*list_length(l); |
| 429 | *cnt += c; |
| 430 | return c; |
| 431 | } |
| 432 | case cmp_or: /* prefer or over functions */ |
| 433 | *cnt += 3; |
| 434 | return 3; |
| 435 | case mark_in: |
| 436 | case mark_notin: |
| 437 | case mark_exists: |
| 438 | case mark_notexists: |
| 439 | *cnt += 0; |
| 440 | return 0; |
| 441 | default: |
| 442 | return 0; |
| 443 | } |
| 444 | case e_column: |
| 445 | *cnt += 20; |
| 446 | return 20; |
| 447 | case e_atom: |
| 448 | *cnt += 10; |
| 449 | return 10; |
| 450 | case e_func: |
| 451 | /* functions are more expensive, depending on the number of columns involved. */ |
| 452 | if (e->card == CARD_ATOM) |
| 453 | return 0; |
| 454 | *cnt -= 5*list_length(e->l); |
| 455 | return 5*list_length(e->l); |
| 456 | case e_convert: |
| 457 | /* functions are more expensive, depending on the number of columns involved. */ |
| 458 | if (e->card == CARD_ATOM) |
| 459 | return 0; |
| 460 | /* fall through */ |
| 461 | default: |
| 462 | *cnt -= 5; |
| 463 | return -5; |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | static int |
| 468 | exp_keyvalue(sql_exp *e) |
| 469 | { |
| 470 | int cnt = 0; |
| 471 | exp_count(&cnt, e); |
| 472 | return cnt; |
| 473 | } |
| 474 | |
| 475 | static sql_exp * |
| 476 | joinexp_col(sql_exp *e, sql_rel *r) |
| 477 | { |
| 478 | if (e->type == e_cmp) { |
| 479 | if (rel_has_exp(r, e->l) >= 0) |
| 480 | return e->l; |
| 481 | return e->r; |
| 482 | } |
| 483 | assert(0); |
| 484 | return NULL; |
| 485 | } |
| 486 | |
| 487 | static sql_column * |
| 488 | table_colexp(sql_exp *e, sql_rel *r) |
| 489 | { |
| 490 | sql_table *t = r->l; |
| 491 | |
| 492 | if (e->type == e_column) { |
| 493 | const char *name = exp_name(e); |
| 494 | node *cn; |
| 495 | |
| 496 | if (r->exps) { /* use alias */ |
| 497 | for (cn = r->exps->h; cn; cn = cn->next) { |
| 498 | sql_exp *ce = cn->data; |
| 499 | if (strcmp(exp_name(ce), name) == 0) { |
| 500 | name = ce->r; |
| 501 | break; |
| 502 | } |
| 503 | } |
| 504 | } |
| 505 | for (cn = t->columns.set->h; cn; cn = cn->next) { |
| 506 | sql_column *c = cn->data; |
| 507 | if (strcmp(c->base.name, name) == 0) |
| 508 | return c; |
| 509 | } |
| 510 | } |
| 511 | return NULL; |
| 512 | } |
| 513 | |
| 514 | int |
| 515 | exp_joins_rels(sql_exp *e, list *rels) |
| 516 | { |
| 517 | sql_rel *l = NULL, *r = NULL; |
| 518 | |
| 519 | assert (e->type == e_cmp); |
| 520 | |
| 521 | if (get_cmp(e) == cmp_or) { |
| 522 | l = NULL; |
| 523 | } else if (get_cmp(e) == cmp_filter) { |
| 524 | list *ll = e->l; |
| 525 | list *lr = e->r; |
| 526 | |
| 527 | l = find_rel(rels, ll->h->data); |
| 528 | r = find_rel(rels, lr->h->data); |
| 529 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 530 | list *lr = e->r; |
| 531 | |
| 532 | l = find_rel(rels, e->l); |
| 533 | if (lr && lr->h) |
| 534 | r = find_rel(rels, lr->h->data); |
| 535 | } else { |
| 536 | l = find_rel(rels, e->l); |
| 537 | r = find_rel(rels, e->r); |
| 538 | } |
| 539 | |
| 540 | if (l && r) |
| 541 | return 0; |
| 542 | return -1; |
| 543 | } |
| 544 | |
| 545 | static list * |
| 546 | matching_joins(sql_allocator *sa, list *rels, list *exps, sql_exp *je) |
| 547 | { |
| 548 | sql_rel *l, *r; |
| 549 | |
| 550 | assert (je->type == e_cmp); |
| 551 | |
| 552 | l = find_rel(rels, je->l); |
| 553 | r = find_rel(rels, je->r); |
| 554 | if (l && r) { |
| 555 | list *res; |
| 556 | list *n_rels = new_rel_list(sa); |
| 557 | |
| 558 | append(n_rels, l); |
| 559 | append(n_rels, r); |
| 560 | res = list_select(exps, n_rels, (fcmp) &exp_joins_rels, (fdup)NULL); |
| 561 | return res; |
| 562 | } |
| 563 | return new_rel_list(sa); |
| 564 | } |
| 565 | |
| 566 | static int |
| 567 | sql_column_kc_cmp(sql_column *c, sql_kc *kc) |
| 568 | { |
| 569 | /* return on equality */ |
| 570 | return (c->colnr - kc->c->colnr); |
| 571 | } |
| 572 | |
| 573 | static sql_idx * |
| 574 | find_fk_index(sql_table *l, list *lcols, sql_table *r, list *rcols) |
| 575 | { |
| 576 | if (l->idxs.set) { |
| 577 | node *in; |
| 578 | for (in = l->idxs.set->h; in; in = in->next){ |
| 579 | sql_idx *li = in->data; |
| 580 | if (li->type == join_idx) { |
| 581 | sql_key *rk = &((sql_fkey*)li->key)->rkey->k; |
| 582 | fcmp cmp = (fcmp)&sql_column_kc_cmp; |
| 583 | |
| 584 | if (rk->t == r && |
| 585 | list_match(lcols, li->columns, cmp) == 0 && |
| 586 | list_match(rcols, rk->columns, cmp) == 0) { |
| 587 | return li; |
| 588 | } |
| 589 | } |
| 590 | } |
| 591 | } |
| 592 | return NULL; |
| 593 | } |
| 594 | |
| 595 | static sql_rel * |
| 596 | find_basetable( sql_rel *r) |
| 597 | { |
| 598 | if (!r) |
| 599 | return NULL; |
| 600 | switch(r->op) { |
| 601 | case op_basetable: |
| 602 | if (!r->l) |
| 603 | return NULL; |
| 604 | return r; |
| 605 | case op_project: |
| 606 | case op_select: |
| 607 | return find_basetable(r->l); |
| 608 | default: |
| 609 | return NULL; |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | static int |
| 614 | exps_count(list *exps) |
| 615 | { |
| 616 | node *n; |
| 617 | int cnt = 0; |
| 618 | |
| 619 | if (!exps) |
| 620 | return 0; |
| 621 | for (n = exps->h; n; n=n->next) |
| 622 | exp_count(&cnt, n->data); |
| 623 | return cnt; |
| 624 | } |
| 625 | |
| 626 | static list * |
| 627 | order_join_expressions(mvc *sql, list *dje, list *rels) |
| 628 | { |
| 629 | list *res; |
| 630 | node *n = NULL; |
| 631 | int i, *keys, cnt = list_length(dje); |
| 632 | void **data; |
| 633 | int debug = mvc_debug_on(sql, 16); |
| 634 | |
| 635 | keys = malloc(cnt*sizeof(int)); |
| 636 | data = malloc(cnt*sizeof(void *)); |
| 637 | if (keys == NULL || data == NULL) { |
| 638 | if (keys) |
| 639 | free(keys); |
| 640 | if (data) |
| 641 | free(data); |
| 642 | return NULL; |
| 643 | } |
| 644 | res = sa_list(sql->sa); |
| 645 | if (res == NULL) { |
| 646 | free(keys); |
| 647 | free(data); |
| 648 | return NULL; |
| 649 | } |
| 650 | for (n = dje->h, i = 0; n; n = n->next, i++) { |
| 651 | sql_exp *e = n->data; |
| 652 | |
| 653 | keys[i] = exp_keyvalue(e); |
| 654 | /* add some weight for the selections */ |
| 655 | if (e->type == e_cmp && !is_complex_exp(e->flag)) { |
| 656 | sql_rel *l = find_rel(rels, e->l); |
| 657 | sql_rel *r = find_rel(rels, e->r); |
| 658 | |
| 659 | if (l && is_select(l->op) && l->exps) |
| 660 | keys[i] += list_length(l->exps)*10 + exps_count(l->exps)*debug; |
| 661 | if (r && is_select(r->op) && r->exps) |
| 662 | keys[i] += list_length(r->exps)*10 + exps_count(r->exps)*debug; |
| 663 | } |
| 664 | data[i] = n->data; |
| 665 | } |
| 666 | /* sort descending */ |
| 667 | GDKqsort(keys, data, NULL, cnt, sizeof(int), sizeof(void *), TYPE_int, true, true); |
| 668 | for(i=0; i<cnt; i++) { |
| 669 | list_append(res, data[i]); |
| 670 | } |
| 671 | free(keys); |
| 672 | free(data); |
| 673 | return res; |
| 674 | } |
| 675 | |
| 676 | static int |
| 677 | find_join_rels(list **L, list **R, list *exps, list *rels) |
| 678 | { |
| 679 | node *n; |
| 680 | |
| 681 | *L = sa_list(exps->sa); |
| 682 | *R = sa_list(exps->sa); |
| 683 | if (!exps || list_length(exps) <= 1) |
| 684 | return -1; |
| 685 | for(n = exps->h; n; n = n->next) { |
| 686 | sql_exp *e = n->data; |
| 687 | sql_rel *l = NULL, *r = NULL; |
| 688 | |
| 689 | if (!is_complex_exp(e->flag)){ |
| 690 | l = find_rel(rels, e->l); |
| 691 | r = find_rel(rels, e->r); |
| 692 | } |
| 693 | if (l<r) { |
| 694 | list_append(*L, l); |
| 695 | list_append(*R, r); |
| 696 | } else { |
| 697 | list_append(*L, r); |
| 698 | list_append(*R, l); |
| 699 | } |
| 700 | } |
| 701 | return 0; |
| 702 | } |
| 703 | |
| 704 | static list * |
| 705 | distinct_join_exps(list *aje, list *lrels, list *rrels) |
| 706 | { |
| 707 | node *n, *m, *o, *p; |
| 708 | int len = 0, i, j; |
| 709 | char *used = SA_NEW_ARRAY(aje->sa, char, len = list_length(aje)); |
| 710 | list *res = sa_list(aje->sa); |
| 711 | |
| 712 | memset(used, 0, len); |
| 713 | assert(len == list_length(lrels)); |
| 714 | for(n = lrels->h, m = rrels->h, j = 0; n && m; |
| 715 | n = n->next, m = m->next, j++) { |
| 716 | if (n->data && m->data) |
| 717 | for(o = n->next, p = m->next, i = j+1; o && p; |
| 718 | o = o->next, p = p->next, i++) { |
| 719 | if (o->data == n->data && p->data == m->data) |
| 720 | used[i] = 1; |
| 721 | } |
| 722 | } |
| 723 | for (i = 0, n = aje->h; i < len; n = n->next, i++) { |
| 724 | if (!used[i]) |
| 725 | list_append(res, n->data); |
| 726 | } |
| 727 | return res; |
| 728 | } |
| 729 | |
| 730 | static list * |
| 731 | find_fk( mvc *sql, list *rels, list *exps) |
| 732 | { |
| 733 | node *djn; |
| 734 | list *sdje, *aje, *dje; |
| 735 | list *lrels, *rrels; |
| 736 | |
| 737 | /* first find the distinct join expressions */ |
| 738 | aje = list_select(exps, rels, (fcmp) &exp_is_join, (fdup)NULL); |
| 739 | /* add left/right relation */ |
| 740 | if (find_join_rels(&lrels, &rrels, aje, rels) < 0) |
| 741 | dje = aje; |
| 742 | else |
| 743 | dje = distinct_join_exps(aje, lrels, rrels); |
| 744 | for(djn=dje->h; djn; djn = djn->next) { |
| 745 | /* equal join expressions */ |
| 746 | sql_idx *idx = NULL; |
| 747 | sql_exp *je = djn->data, *le = je->l, *re = je->r; |
| 748 | |
| 749 | if (is_complex_exp(je->flag)) |
| 750 | break; |
| 751 | if (!find_prop(je->p, PROP_JOINIDX)) { |
| 752 | int swapped = 0; |
| 753 | list *aaje = matching_joins(sql->sa, rels, aje, je); |
| 754 | list *eje = list_select(aaje, (void*)1, (fcmp) &exp_is_eqjoin, (fdup)NULL); |
| 755 | sql_rel *lr = find_rel(rels, le), *olr = lr; |
| 756 | sql_rel *rr = find_rel(rels, re), *orr = rr; |
| 757 | sql_rel *bt = NULL; |
| 758 | char *iname; |
| 759 | |
| 760 | sql_table *l, *r; |
| 761 | list *lexps = list_map(eje, lr, (fmap) &joinexp_col); |
| 762 | list *rexps = list_map(eje, rr, (fmap) &joinexp_col); |
| 763 | list *lcols, *rcols; |
| 764 | |
| 765 | lr = find_basetable(lr); |
| 766 | rr = find_basetable(rr); |
| 767 | if (!lr || !rr) |
| 768 | continue; |
| 769 | l = lr->l; |
| 770 | r = rr->l; |
| 771 | lcols = list_map(lexps, lr, (fmap) &table_colexp); |
| 772 | rcols = list_map(rexps, rr, (fmap) &table_colexp); |
| 773 | lcols->destroy = NULL; |
| 774 | rcols->destroy = NULL; |
| 775 | if (list_length(lcols) != list_length(rcols)) |
| 776 | continue; |
| 777 | |
| 778 | idx = find_fk_index(l, lcols, r, rcols); |
| 779 | if (!idx) { |
| 780 | idx = find_fk_index(r, rcols, l, lcols); |
| 781 | swapped = 1; |
| 782 | } |
| 783 | |
| 784 | if (idx && (iname = sa_strconcat( sql->sa, "%" , idx->base.name)) != NULL && |
| 785 | ((!swapped && name_find_column(olr, NULL, iname, -2, &bt) == NULL) || |
| 786 | ( swapped && name_find_column(orr, NULL, iname, -2, &bt) == NULL))) |
| 787 | idx = NULL; |
| 788 | |
| 789 | if (idx) { |
| 790 | prop *p; |
| 791 | node *n; |
| 792 | sql_exp *t = NULL, *i = NULL; |
| 793 | |
| 794 | if (list_length(lcols) > 1 || !mvc_debug_on(sql, 512)) { |
| 795 | |
| 796 | /* Add join between idx and TID */ |
| 797 | if (swapped) { |
| 798 | sql_exp *s = je->l, *l = je->r; |
| 799 | |
| 800 | t = rel_find_column(sql->sa, olr, s->l, TID); |
| 801 | i = rel_find_column(sql->sa, orr, l->l, iname); |
| 802 | if (!t || !i) |
| 803 | continue; |
| 804 | je = exp_compare(sql->sa, i, t, cmp_equal); |
| 805 | } else { |
| 806 | sql_exp *s = je->r, *l = je->l; |
| 807 | |
| 808 | t = rel_find_column(sql->sa, orr, s->l, TID); |
| 809 | i = rel_find_column(sql->sa, olr, l->l, iname); |
| 810 | if (!t || !i) |
| 811 | continue; |
| 812 | je = exp_compare(sql->sa, i, t, cmp_equal); |
| 813 | } |
| 814 | |
| 815 | /* Remove all join expressions */ |
| 816 | for (n = eje->h; n; n = n->next) |
| 817 | list_remove_data(exps, n->data); |
| 818 | append(exps, je); |
| 819 | djn->data = je; |
| 820 | } else if (swapped) { /* else keep je for single column expressions */ |
| 821 | je = exp_compare(sql->sa, je->r, je->l, cmp_equal); |
| 822 | /* Remove all join expressions */ |
| 823 | for (n = eje->h; n; n = n->next) |
| 824 | list_remove_data(exps, n->data); |
| 825 | append(exps, je); |
| 826 | djn->data = je; |
| 827 | } |
| 828 | je->p = p = prop_create(sql->sa, PROP_JOINIDX, je->p); |
| 829 | p->value = idx; |
| 830 | } |
| 831 | } |
| 832 | } |
| 833 | |
| 834 | /* sort expressions on weighted number of reducing operators */ |
| 835 | sdje = order_join_expressions(sql, dje, rels); |
| 836 | return sdje; |
| 837 | } |
| 838 | |
| 839 | static sql_rel * |
| 840 | order_joins(mvc *sql, list *rels, list *exps) |
| 841 | { |
| 842 | sql_rel *top = NULL, *l = NULL, *r = NULL; |
| 843 | sql_exp *cje; |
| 844 | node *djn; |
| 845 | list *sdje, *n_rels = new_rel_list(sql->sa); |
| 846 | int fnd = 0; |
| 847 | |
| 848 | /* find foreign keys and reorder the expressions on reducing quality */ |
| 849 | sdje = find_fk(sql, rels, exps); |
| 850 | |
| 851 | if (list_length(rels) > 2 && mvc_debug_on(sql, 256)) { |
| 852 | for(djn = sdje->h; djn; djn = djn->next ) { |
| 853 | sql_exp *e = djn->data; |
| 854 | list_remove_data(exps, e); |
| 855 | } |
| 856 | top = rel_planner(sql, rels, sdje, exps); |
| 857 | return top; |
| 858 | } |
| 859 | |
| 860 | /* open problem, some expressions use more than 2 relations */ |
| 861 | /* For example a.x = b.y * c.z; */ |
| 862 | if (list_length(rels) >= 2 && sdje->h) { |
| 863 | /* get the first expression */ |
| 864 | cje = sdje->h->data; |
| 865 | |
| 866 | /* find the involved relations */ |
| 867 | |
| 868 | /* complex expressions may touch multiple base tables |
| 869 | * Should be pushed up to extra selection. |
| 870 | * */ |
| 871 | if (cje->type != e_cmp || !is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) /*|| |
| 872 | (cje->type == e_cmp && cje->f == NULL)*/) { |
| 873 | l = find_one_rel(rels, cje->l); |
| 874 | r = find_one_rel(rels, cje->r); |
| 875 | } |
| 876 | |
| 877 | if (l && r && l != r) { |
| 878 | list_remove_data(sdje, cje); |
| 879 | list_remove_data(exps, cje); |
| 880 | } |
| 881 | } |
| 882 | if (l && r && l != r) { |
| 883 | list_remove_data(rels, l); |
| 884 | list_remove_data(rels, r); |
| 885 | list_append(n_rels, l); |
| 886 | list_append(n_rels, r); |
| 887 | |
| 888 | /* Create a relation between l and r. Since the calling |
| 889 | functions rewrote the join tree, into a list of expressions |
| 890 | and a list of (simple) relations, there are no outer joins |
| 891 | involved, we can simply do a crossproduct here. |
| 892 | */ |
| 893 | top = rel_crossproduct(sql->sa, l, r, op_join); |
| 894 | rel_join_add_exp(sql->sa, top, cje); |
| 895 | |
| 896 | /* all other join expressions on these 2 relations */ |
| 897 | while((djn = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) { |
| 898 | sql_exp *e = djn->data; |
| 899 | |
| 900 | rel_join_add_exp(sql->sa, top, e); |
| 901 | list_remove_data(exps, e); |
| 902 | } |
| 903 | /* Remove other joins on the current 'n_rels' set in the distinct list too */ |
| 904 | while((djn = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL) |
| 905 | list_remove_data(sdje, djn->data); |
| 906 | fnd = 1; |
| 907 | } |
| 908 | /* build join tree using the ordered list */ |
| 909 | while(list_length(exps) && fnd) { |
| 910 | fnd = 0; |
| 911 | /* find the first expression which could be added */ |
| 912 | for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) { |
| 913 | node *ln, *rn, *en; |
| 914 | |
| 915 | cje = djn->data; |
| 916 | ln = list_find(n_rels, cje->l, (fcmp)&rel_has_exp); |
| 917 | rn = list_find(n_rels, cje->r, (fcmp)&rel_has_exp); |
| 918 | |
| 919 | if (ln || rn) { |
| 920 | /* remove the expression from the lists */ |
| 921 | list_remove_data(sdje, cje); |
| 922 | list_remove_data(exps, cje); |
| 923 | } |
| 924 | if (ln && rn) { |
| 925 | assert(0); |
| 926 | /* create a selection on the current */ |
| 927 | l = ln->data; |
| 928 | r = rn->data; |
| 929 | rel_join_add_exp(sql->sa, top, cje); |
| 930 | fnd = 1; |
| 931 | } else if (ln || rn) { |
| 932 | if (ln) { |
| 933 | l = ln->data; |
| 934 | r = find_rel(rels, cje->r); |
| 935 | } else { |
| 936 | l = rn->data; |
| 937 | r = find_rel(rels, cje->l); |
| 938 | } |
| 939 | list_remove_data(rels, r); |
| 940 | append(n_rels, r); |
| 941 | |
| 942 | /* create a join using the current expression */ |
| 943 | top = rel_crossproduct(sql->sa, top, r, op_join); |
| 944 | rel_join_add_exp(sql->sa, top, cje); |
| 945 | |
| 946 | /* all join expressions on these tables */ |
| 947 | while((en = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) { |
| 948 | sql_exp *e = en->data; |
| 949 | rel_join_add_exp(sql->sa, top, e); |
| 950 | list_remove_data(exps, e); |
| 951 | } |
| 952 | /* Remove other joins on the current 'n_rels' |
| 953 | set in the distinct list too */ |
| 954 | while((en = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL) |
| 955 | list_remove_data(sdje, en->data); |
| 956 | fnd = 1; |
| 957 | } |
| 958 | } |
| 959 | } |
| 960 | if (list_length(rels)) { /* more relations */ |
| 961 | node *n; |
| 962 | for(n=rels->h; n; n = n->next) { |
| 963 | if (top) |
| 964 | top = rel_crossproduct(sql->sa, top, n->data, op_join); |
| 965 | else |
| 966 | top = n->data; |
| 967 | } |
| 968 | } |
| 969 | if (list_length(exps)) { /* more expressions (add selects) */ |
| 970 | node *n; |
| 971 | //set_processed(top); |
| 972 | top = rel_select(sql->sa, top, NULL); |
| 973 | for(n=exps->h; n; n = n->next) { |
| 974 | sql_exp *e = n->data; |
| 975 | |
| 976 | /* find the involved relations */ |
| 977 | |
| 978 | /* complex expressions may touch multiple base tables |
| 979 | * Should be push up to extra selection. */ |
| 980 | /* |
| 981 | l = find_one_rel(rels, e->l); |
| 982 | r = find_one_rel(rels, e->r); |
| 983 | |
| 984 | if (l && r) |
| 985 | */ |
| 986 | if (exp_is_join_exp(e) == 0) { |
| 987 | sql_rel *nr = NULL; |
| 988 | if (e->flag == cmp_equal) |
| 989 | nr = rel_push_join(sql, top->l, e->l, e->r, NULL, e); |
| 990 | if (!nr) |
| 991 | rel_join_add_exp(sql->sa, top->l, e); |
| 992 | } else |
| 993 | rel_select_add_exp(sql->sa, top, e); |
| 994 | } |
| 995 | } |
| 996 | return top; |
| 997 | } |
| 998 | |
| 999 | static int |
| 1000 | rel_neg_in_size(sql_rel *r) |
| 1001 | { |
| 1002 | if (is_union(r->op) && r->nrcols == 0) |
| 1003 | return -1 + rel_neg_in_size(r->l); |
| 1004 | if (is_project(r->op) && r->nrcols == 0) |
| 1005 | return -1; |
| 1006 | return 0; |
| 1007 | } |
| 1008 | |
| 1009 | static list * |
| 1010 | push_in_join_down(mvc *sql, list *rels, list *exps) |
| 1011 | { |
| 1012 | node *n; |
| 1013 | int restart = 1; |
| 1014 | list *nrels; |
| 1015 | |
| 1016 | /* we should sort these first, ie small in's before large one's */ |
| 1017 | nrels = list_sort(rels, (fkeyvalue)&rel_neg_in_size, (fdup)&rel_dup); |
| 1018 | |
| 1019 | /* we need to cleanup, the new refs ! */ |
| 1020 | rels->destroy = (fdestroy)rel_destroy; |
| 1021 | list_destroy(rels); |
| 1022 | rels = nrels; |
| 1023 | |
| 1024 | /* one of the rels should be a op_union with nrcols == 0 */ |
| 1025 | while (restart) { |
| 1026 | for (n = rels->h; n; n = n->next) { |
| 1027 | sql_rel *r = n->data; |
| 1028 | |
| 1029 | restart = 0; |
| 1030 | if ((is_union(r->op) || is_project(r->op)) && r->nrcols == 0) { |
| 1031 | /* next step find expression on this relation */ |
| 1032 | node *m; |
| 1033 | sql_rel *l = NULL; |
| 1034 | sql_exp *je = NULL; |
| 1035 | |
| 1036 | for(m = exps->h; !je && m; m = m->next) { |
| 1037 | sql_exp *e = m->data; |
| 1038 | |
| 1039 | if (e->type == e_cmp && e->flag == cmp_equal) { |
| 1040 | /* in values are on |
| 1041 | the right of the join */ |
| 1042 | if (rel_has_exp(r, e->r) >= 0) |
| 1043 | je = e; |
| 1044 | } |
| 1045 | } |
| 1046 | /* with this expression find other relation */ |
| 1047 | if (je && (l = find_rel(rels, je->l)) != NULL) { |
| 1048 | sql_rel *nr = rel_crossproduct(sql->sa, l, r, op_join); |
| 1049 | |
| 1050 | rel_join_add_exp(sql->sa, nr, je); |
| 1051 | list_append(rels, nr); |
| 1052 | list_remove_data(rels, l); |
| 1053 | list_remove_data(rels, r); |
| 1054 | list_remove_data(exps, je); |
| 1055 | restart = 1; |
| 1056 | break; |
| 1057 | } |
| 1058 | |
| 1059 | } |
| 1060 | } |
| 1061 | } |
| 1062 | return rels; |
| 1063 | } |
| 1064 | |
| 1065 | static list * |
| 1066 | push_up_join_exps( mvc *sql, sql_rel *rel) |
| 1067 | { |
| 1068 | if (rel_is_ref(rel)) |
| 1069 | return NULL; |
| 1070 | |
| 1071 | switch(rel->op) { |
| 1072 | case op_join: { |
| 1073 | sql_rel *rl = rel->l; |
| 1074 | sql_rel *rr = rel->r; |
| 1075 | list *l, *r; |
| 1076 | |
| 1077 | if (rel_is_ref(rl) && rel_is_ref(rr)) { |
| 1078 | l = rel->exps; |
| 1079 | rel->exps = NULL; |
| 1080 | return l; |
| 1081 | } |
| 1082 | l = push_up_join_exps(sql, rl); |
| 1083 | r = push_up_join_exps(sql, rr); |
| 1084 | if (l && r) { |
| 1085 | l = list_merge(l, r, (fdup)NULL); |
| 1086 | r = NULL; |
| 1087 | } |
| 1088 | if (rel->exps) { |
| 1089 | if (l && !r) |
| 1090 | r = l; |
| 1091 | l = list_merge(rel->exps, r, (fdup)NULL); |
| 1092 | } |
| 1093 | rel->exps = NULL; |
| 1094 | return l; |
| 1095 | } |
| 1096 | default: |
| 1097 | return NULL; |
| 1098 | } |
| 1099 | } |
| 1100 | |
| 1101 | static sql_rel * |
| 1102 | reorder_join(mvc *sql, sql_rel *rel) |
| 1103 | { |
| 1104 | list *exps; |
| 1105 | list *rels; |
| 1106 | |
| 1107 | if (rel->op == op_join && !rel_is_ref(rel)) |
| 1108 | rel->exps = push_up_join_exps(sql, rel); |
| 1109 | |
| 1110 | exps = rel->exps; |
| 1111 | if (!exps) /* crosstable, ie order not important */ |
| 1112 | return rel; |
| 1113 | rel->exps = NULL; /* should be all crosstables by now */ |
| 1114 | rels = new_rel_list(sql->sa); |
| 1115 | if (is_outerjoin(rel->op)) { |
| 1116 | sql_rel *l, *r; |
| 1117 | int cnt = 0; |
| 1118 | /* try to use an join index also for outer joins */ |
| 1119 | get_inner_relations(sql, rel, rels); |
| 1120 | cnt = list_length(exps); |
| 1121 | rel->exps = find_fk(sql, rels, exps); |
| 1122 | if (list_length(rel->exps) != cnt) |
| 1123 | rel->exps = order_join_expressions(sql, exps, rels); |
| 1124 | l = rel->l; |
| 1125 | r = rel->r; |
| 1126 | if (is_join(l->op)) |
| 1127 | rel->l = reorder_join(sql, rel->l); |
| 1128 | if (is_join(r->op)) |
| 1129 | rel->r = reorder_join(sql, rel->r); |
| 1130 | } else { |
| 1131 | get_relations(sql, rel, rels); |
| 1132 | if (list_length(rels) > 1) { |
| 1133 | rels = push_in_join_down(sql, rels, exps); |
| 1134 | rel = order_joins(sql, rels, exps); |
| 1135 | } else { |
| 1136 | rel->exps = exps; |
| 1137 | exps = NULL; |
| 1138 | } |
| 1139 | } |
| 1140 | return rel; |
| 1141 | } |
| 1142 | |
| 1143 | static sql_rel * |
| 1144 | rel_join_order(mvc *sql, sql_rel *rel) |
| 1145 | { |
| 1146 | int e_changes = 0; |
| 1147 | |
| 1148 | if (!rel) |
| 1149 | return rel; |
| 1150 | |
| 1151 | switch (rel->op) { |
| 1152 | case op_basetable: |
| 1153 | case op_table: |
| 1154 | break; |
| 1155 | case op_join: |
| 1156 | case op_left: |
| 1157 | case op_right: |
| 1158 | case op_full: |
| 1159 | break; |
| 1160 | |
| 1161 | case op_semi: |
| 1162 | case op_anti: |
| 1163 | |
| 1164 | case op_union: |
| 1165 | case op_inter: |
| 1166 | case op_except: |
| 1167 | rel->l = rel_join_order(sql, rel->l); |
| 1168 | rel->r = rel_join_order(sql, rel->r); |
| 1169 | break; |
| 1170 | case op_project: |
| 1171 | case op_select: |
| 1172 | case op_groupby: |
| 1173 | case op_topn: |
| 1174 | case op_sample: |
| 1175 | rel->l = rel_join_order(sql, rel->l); |
| 1176 | break; |
| 1177 | case op_ddl: |
| 1178 | rel->l = rel_join_order(sql, rel->l); |
| 1179 | if (rel->r) |
| 1180 | rel->r = rel_join_order(sql, rel->r); |
| 1181 | break; |
| 1182 | case op_insert: |
| 1183 | case op_update: |
| 1184 | case op_delete: |
| 1185 | case op_truncate: |
| 1186 | rel->l = rel_join_order(sql, rel->l); |
| 1187 | rel->r = rel_join_order(sql, rel->r); |
| 1188 | break; |
| 1189 | } |
| 1190 | if (is_join(rel->op) && rel->exps && !rel_is_ref(rel)) { |
| 1191 | rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); |
| 1192 | if (!rel_is_ref(rel)) |
| 1193 | rel = reorder_join(sql, rel); |
| 1194 | } else if (is_join(rel->op)) { |
| 1195 | rel->l = rel_join_order(sql, rel->l); |
| 1196 | rel->r = rel_join_order(sql, rel->r); |
| 1197 | } |
| 1198 | (void)e_changes; |
| 1199 | return rel; |
| 1200 | } |
| 1201 | |
| 1202 | /* exp_rename */ |
| 1203 | static sql_exp * exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t); |
| 1204 | |
| 1205 | static list * |
| 1206 | exps_rename(mvc *sql, list *l, sql_rel *f, sql_rel *t) |
| 1207 | { |
| 1208 | node *n; |
| 1209 | list *nl = new_exp_list(sql->sa); |
| 1210 | |
| 1211 | for(n=l->h; n; n=n->next) { |
| 1212 | sql_exp *arg = n->data; |
| 1213 | |
| 1214 | arg = exp_rename(sql, arg, f, t); |
| 1215 | if (!arg) |
| 1216 | return NULL; |
| 1217 | append(nl, arg); |
| 1218 | } |
| 1219 | return nl; |
| 1220 | } |
| 1221 | |
| 1222 | /* exp_rename */ |
| 1223 | static sql_exp * |
| 1224 | exp_rename(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t) |
| 1225 | { |
| 1226 | sql_exp *ne = NULL, *l, *r, *r2; |
| 1227 | |
| 1228 | switch(e->type) { |
| 1229 | case e_column: |
| 1230 | if (e->l) { |
| 1231 | ne = exps_bind_column2(f->exps, e->l, e->r); |
| 1232 | /* if relation name matches expressions relation name, find column based on column name alone */ |
| 1233 | } else { |
| 1234 | ne = exps_bind_column(f->exps, e->r, NULL); |
| 1235 | } |
| 1236 | if (!ne) |
| 1237 | return e; |
| 1238 | e = NULL; |
| 1239 | if (exp_name(ne) && ne->r && ne->l) |
| 1240 | e = rel_bind_column2(sql, t, ne->l, ne->r, 0); |
| 1241 | if (!e && ne->r) |
| 1242 | e = rel_bind_column(sql, t, ne->r, 0); |
| 1243 | sql->session->status = 0; |
| 1244 | sql->errstr[0] = 0; |
| 1245 | if (!e && exp_is_atom(ne)) |
| 1246 | return ne; |
| 1247 | return exp_ref(sql->sa ,e); |
| 1248 | case e_cmp: |
| 1249 | if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) { |
| 1250 | list *l = exps_rename(sql, e->l, f, t); |
| 1251 | list *r = exps_rename(sql, e->r, f, t); |
| 1252 | if (l && r) { |
| 1253 | if (get_cmp(e) == cmp_filter) |
| 1254 | ne = exp_filter(sql->sa, l, r, e->f, is_anti(e)); |
| 1255 | else |
| 1256 | ne = exp_or(sql->sa, l, r, is_anti(e)); |
| 1257 | } |
| 1258 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 1259 | sql_exp *l = exp_rename(sql, e->l, f, t); |
| 1260 | list *r = exps_rename(sql, e->r, f, t); |
| 1261 | if (l && r) |
| 1262 | ne = exp_in(sql->sa, l, r, e->flag); |
| 1263 | } else { |
| 1264 | l = exp_rename(sql, e->l, f, t); |
| 1265 | r = exp_rename(sql, e->r, f, t); |
| 1266 | if (e->f) { |
| 1267 | r2 = exp_rename(sql, e->f, f, t); |
| 1268 | if (l && r && r2) |
| 1269 | ne = exp_compare2(sql->sa, l, r, r2, e->flag); |
| 1270 | } else if (l && r) { |
| 1271 | ne = exp_compare(sql->sa, l, r, e->flag); |
| 1272 | } |
| 1273 | } |
| 1274 | break; |
| 1275 | case e_convert: |
| 1276 | l = exp_rename(sql, e->l, f, t); |
| 1277 | if (l) |
| 1278 | ne = exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e)); |
| 1279 | break; |
| 1280 | case e_aggr: |
| 1281 | case e_func: { |
| 1282 | list *l = e->l, *nl = NULL; |
| 1283 | |
| 1284 | if (!l) { |
| 1285 | return e; |
| 1286 | } else { |
| 1287 | nl = exps_rename(sql, l, f, t); |
| 1288 | if (!nl) |
| 1289 | return NULL; |
| 1290 | } |
| 1291 | if (e->type == e_func) |
| 1292 | ne = exp_op(sql->sa, nl, e->f); |
| 1293 | else |
| 1294 | ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e)); |
| 1295 | break; |
| 1296 | } |
| 1297 | case e_atom: |
| 1298 | case e_psm: |
| 1299 | return e; |
| 1300 | } |
| 1301 | if (!ne) |
| 1302 | return NULL; |
| 1303 | return exp_propagate(sql->sa, ne, e); |
| 1304 | } |
| 1305 | |
| 1306 | /* push the expression down, ie translate colum references |
| 1307 | from relation f into expression of relation t |
| 1308 | */ |
| 1309 | |
| 1310 | static sql_exp * _exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t); |
| 1311 | |
| 1312 | static list * |
| 1313 | exps_push_down(mvc *sql, list *exps, sql_rel *f, sql_rel *t) |
| 1314 | { |
| 1315 | node *n; |
| 1316 | list *nl = new_exp_list(sql->sa); |
| 1317 | |
| 1318 | for(n = exps->h; n; n = n->next) { |
| 1319 | sql_exp *arg = n->data, *narg = NULL; |
| 1320 | |
| 1321 | narg = _exp_push_down(sql, arg, f, t); |
| 1322 | if (!narg) |
| 1323 | return NULL; |
| 1324 | narg = exp_propagate(sql->sa, narg, arg); |
| 1325 | append(nl, narg); |
| 1326 | } |
| 1327 | return nl; |
| 1328 | } |
| 1329 | |
| 1330 | static sql_exp * |
| 1331 | _exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t) |
| 1332 | { |
| 1333 | int flag = e->flag; |
| 1334 | sql_exp *ne = NULL, *l, *r, *r2; |
| 1335 | |
| 1336 | switch(e->type) { |
| 1337 | case e_column: |
| 1338 | if (is_union(f->op)) { |
| 1339 | int p = list_position(f->exps, rel_find_exp(f, e)); |
| 1340 | |
| 1341 | return list_fetch(t->exps, p); |
| 1342 | } |
| 1343 | if (e->l) { |
| 1344 | ne = rel_bind_column2(sql, f, e->l, e->r, 0); |
| 1345 | /* if relation name matches expressions relation name, find column based on column name alone */ |
| 1346 | } |
| 1347 | if (!ne && !e->l) |
| 1348 | ne = rel_bind_column(sql, f, e->r, 0); |
| 1349 | if (!ne || ne->type != e_column) |
| 1350 | return NULL; |
| 1351 | e = NULL; |
| 1352 | /* |
| 1353 | if (exp_name(ne) && exp_relname(ne)) |
| 1354 | e = rel_bind_column2(sql, t, exp_relname(ne), exp_name(ne), 0); |
| 1355 | if (!e && exp_name(ne) && !exp_relname(ne)) |
| 1356 | e = rel_bind_column(sql, t, exp_name(ne), 0); |
| 1357 | if (!e && exp_name(ne) && ne->r && ne->l) |
| 1358 | e = rel_bind_column2(sql, t, ne->l, ne->r, 0); |
| 1359 | if (!e && ne->r && !ne->l) |
| 1360 | e = rel_bind_column(sql, t, ne->r, 0); |
| 1361 | */ |
| 1362 | if (ne->l && ne->r) |
| 1363 | e = rel_bind_column2(sql, t, ne->l, ne->r, 0); |
| 1364 | if (!e && ne->r && !ne->l) |
| 1365 | e = rel_bind_column(sql, t, ne->r, 0); |
| 1366 | sql->session->status = 0; |
| 1367 | sql->errstr[0] = 0; |
| 1368 | if (e && flag) |
| 1369 | e->flag = flag; |
| 1370 | /* if the upper exp was an alias, keep this */ |
| 1371 | if (e && exp_relname(ne)) |
| 1372 | exp_setname(sql->sa, e, exp_relname(ne), exp_name(ne)); |
| 1373 | return e; |
| 1374 | case e_cmp: |
| 1375 | if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) { |
| 1376 | list *l, *r; |
| 1377 | |
| 1378 | l = exps_push_down(sql, e->l, f, t); |
| 1379 | if (!l) |
| 1380 | return NULL; |
| 1381 | r = exps_push_down(sql, e->r, f, t); |
| 1382 | if (!r) |
| 1383 | return NULL; |
| 1384 | if (get_cmp(e) == cmp_filter) |
| 1385 | return exp_filter(sql->sa, l, r, e->f, is_anti(e)); |
| 1386 | return exp_or(sql->sa, l, r, is_anti(e)); |
| 1387 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 1388 | list *r; |
| 1389 | |
| 1390 | l = _exp_push_down(sql, e->l, f, t); |
| 1391 | if (!l) |
| 1392 | return NULL; |
| 1393 | r = exps_push_down(sql, e->r, f, t); |
| 1394 | if (!r) |
| 1395 | return NULL; |
| 1396 | return exp_in(sql->sa, l, r, e->flag); |
| 1397 | } else { |
| 1398 | l = _exp_push_down(sql, e->l, f, t); |
| 1399 | if (!l) |
| 1400 | return NULL; |
| 1401 | r = _exp_push_down(sql, e->r, f, t); |
| 1402 | if (!r) |
| 1403 | return NULL; |
| 1404 | if (e->f) { |
| 1405 | r2 = _exp_push_down(sql, e->f, f, t); |
| 1406 | if (l && r && r2) |
| 1407 | ne = exp_compare2(sql->sa, l, r, r2, e->flag); |
| 1408 | } else if (l && r) { |
| 1409 | if (l->card < r->card) |
| 1410 | ne = exp_compare(sql->sa, r, l, swap_compare((comp_type)e->flag)); |
| 1411 | else |
| 1412 | ne = exp_compare(sql->sa, l, r, e->flag); |
| 1413 | } |
| 1414 | } |
| 1415 | if (!ne) |
| 1416 | return NULL; |
| 1417 | return exp_propagate(sql->sa, ne, e); |
| 1418 | case e_convert: |
| 1419 | l = _exp_push_down(sql, e->l, f, t); |
| 1420 | if (l) |
| 1421 | return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e)); |
| 1422 | return NULL; |
| 1423 | case e_aggr: |
| 1424 | case e_func: { |
| 1425 | list *l = e->l, *nl = NULL; |
| 1426 | |
| 1427 | if (!l) { |
| 1428 | return e; |
| 1429 | } else { |
| 1430 | nl = exps_push_down(sql, l, f, t); |
| 1431 | if (!nl) |
| 1432 | return NULL; |
| 1433 | } |
| 1434 | if (e->type == e_func) |
| 1435 | return exp_op(sql->sa, nl, e->f); |
| 1436 | else |
| 1437 | return exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e)); |
| 1438 | } |
| 1439 | case e_atom: |
| 1440 | case e_psm: |
| 1441 | return e; |
| 1442 | } |
| 1443 | return NULL; |
| 1444 | } |
| 1445 | |
| 1446 | static sql_exp * |
| 1447 | exp_push_down(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t) |
| 1448 | { |
| 1449 | return _exp_push_down(sql, e, f, t); |
| 1450 | } |
| 1451 | |
| 1452 | /* some projections results are order dependend (row_number etc) */ |
| 1453 | static int |
| 1454 | project_unsafe(sql_rel *rel, int allow_identity) |
| 1455 | { |
| 1456 | sql_rel *sub = rel->l; |
| 1457 | node *n; |
| 1458 | |
| 1459 | if (need_distinct(rel) || rel->r /* order by */) |
| 1460 | return 1; |
| 1461 | if (!rel->exps) |
| 1462 | return 0; |
| 1463 | /* projects without sub and projects around ddl's cannot be changed */ |
| 1464 | if (!sub || (sub && sub->op == op_ddl)) |
| 1465 | return 1; |
| 1466 | for(n = rel->exps->h; n; n = n->next) { |
| 1467 | sql_exp *e = n->data; |
| 1468 | |
| 1469 | /* aggr func in project ! */ |
| 1470 | if (exp_unsafe(e, allow_identity)) |
| 1471 | return 1; |
| 1472 | } |
| 1473 | return 0; |
| 1474 | } |
| 1475 | |
| 1476 | static int |
| 1477 | math_unsafe(sql_subfunc *f) |
| 1478 | { |
| 1479 | if (!f->func->s) { |
| 1480 | if (strcmp(f->func->base.name, "sql_div" ) == 0 || |
| 1481 | strcmp(f->func->base.name, "sqrt" ) == 0 || |
| 1482 | strcmp(f->func->base.name, "atan" ) == 0 ) |
| 1483 | return 1; |
| 1484 | } |
| 1485 | return 0; |
| 1486 | } |
| 1487 | |
| 1488 | static int |
| 1489 | can_push_func(sql_exp *e, sql_rel *rel, int *must) |
| 1490 | { |
| 1491 | if (!e) |
| 1492 | return 0; |
| 1493 | switch(e->type) { |
| 1494 | case e_cmp: { |
| 1495 | int mustl = 0, mustr = 0, mustf = 0; |
| 1496 | sql_exp *l = e->l, *r = e->r, *f = e->f; |
| 1497 | |
| 1498 | if (get_cmp(e) == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) |
| 1499 | return 0; |
| 1500 | return ((l->type == e_column || can_push_func(l, rel, &mustl)) && (*must = mustl)) || |
| 1501 | (!f && (r->type == e_column || can_push_func(r, rel, &mustr)) && (*must = mustr)) || |
| 1502 | (f && |
| 1503 | (r->type == e_column || can_push_func(r, rel, &mustr)) && |
| 1504 | (f->type == e_column || can_push_func(f, rel, &mustf)) && (*must = (mustr || mustf))); |
| 1505 | } |
| 1506 | case e_convert: |
| 1507 | return can_push_func(e->l, rel, must); |
| 1508 | case e_func: { |
| 1509 | list *l = e->l; |
| 1510 | node *n; |
| 1511 | int res = 1, lmust = 0; |
| 1512 | |
| 1513 | if (e->f){ |
| 1514 | sql_subfunc *f = e->f; |
| 1515 | if (math_unsafe(f) || f->func->type != F_FUNC) |
| 1516 | return 0; |
| 1517 | } |
| 1518 | if (l) for (n = l->h; n && res; n = n->next) |
| 1519 | res &= can_push_func(n->data, rel, &lmust); |
| 1520 | if (res && !lmust) |
| 1521 | return 1; |
| 1522 | (*must) |= lmust; |
| 1523 | return res; |
| 1524 | } |
| 1525 | case e_column: |
| 1526 | if (rel && !rel_find_exp(rel, e)) |
| 1527 | return 0; |
| 1528 | (*must) = 1; |
| 1529 | /* fall through */ |
| 1530 | case e_atom: |
| 1531 | default: |
| 1532 | return 1; |
| 1533 | } |
| 1534 | } |
| 1535 | |
| 1536 | static int |
| 1537 | exps_can_push_func(list *exps, sql_rel *rel) |
| 1538 | { |
| 1539 | node *n; |
| 1540 | |
| 1541 | for(n = exps->h; n; n = n->next) { |
| 1542 | sql_exp *e = n->data; |
| 1543 | int must = 0, mustl = 0, mustr = 0; |
| 1544 | |
| 1545 | if (is_joinop(rel->op) && ((can_push_func(e, rel->l, &mustl) && mustl) || (can_push_func(e, rel->r, &mustr) && mustr))) |
| 1546 | return 1; |
| 1547 | else if (is_select(rel->op) && can_push_func(e, NULL, &must) && must) |
| 1548 | return 1; |
| 1549 | } |
| 1550 | return 0; |
| 1551 | } |
| 1552 | |
| 1553 | static int |
| 1554 | exp_needs_push_down(sql_exp *e) |
| 1555 | { |
| 1556 | if (!e) |
| 1557 | return 0; |
| 1558 | switch(e->type) { |
| 1559 | case e_cmp: |
| 1560 | if (get_cmp(e) == cmp_or || e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) |
| 1561 | return 0; |
| 1562 | return exp_needs_push_down(e->l) || exp_needs_push_down(e->r) || (e->f && exp_needs_push_down(e->f)); |
| 1563 | case e_convert: |
| 1564 | return exp_needs_push_down(e->l); |
| 1565 | case e_aggr: |
| 1566 | case e_func: |
| 1567 | return 1; |
| 1568 | case e_column: |
| 1569 | case e_atom: |
| 1570 | default: |
| 1571 | return 0; |
| 1572 | } |
| 1573 | } |
| 1574 | |
| 1575 | static int |
| 1576 | exps_need_push_down( list *exps ) |
| 1577 | { |
| 1578 | node *n; |
| 1579 | for(n = exps->h; n; n = n->next) |
| 1580 | if (exp_needs_push_down(n->data)) |
| 1581 | return 1; |
| 1582 | return 0; |
| 1583 | } |
| 1584 | |
| 1585 | static sql_rel * |
| 1586 | rel_push_func_down(int *changes, mvc *sql, sql_rel *rel) |
| 1587 | { |
| 1588 | if ((is_select(rel->op) || is_joinop(rel->op)) && rel->l && rel->exps && !(rel_is_ref(rel))) { |
| 1589 | list *exps = rel->exps; |
| 1590 | |
| 1591 | if (is_select(rel->op) && list_length(rel->exps) <= 1) /* only push down when thats useful */ |
| 1592 | return rel; |
| 1593 | if (exps_can_push_func(exps, rel) && exps_need_push_down(exps)) { |
| 1594 | sql_rel *nrel; |
| 1595 | sql_rel *l = rel->l, *ol = l; |
| 1596 | sql_rel *r = rel->r, *or = r; |
| 1597 | node *n; |
| 1598 | |
| 1599 | /* we need a full projection, group by's and unions cannot be extended |
| 1600 | * with more expressions */ |
| 1601 | if (rel_is_ref(l)) |
| 1602 | return rel; |
| 1603 | if (l->op != op_project) { |
| 1604 | if (is_subquery(l)) |
| 1605 | return rel; |
| 1606 | rel->l = l = rel_project(sql->sa, l, |
| 1607 | rel_projections(sql, l, NULL, 1, 1)); |
| 1608 | } |
| 1609 | if (is_joinop(rel->op) && rel_is_ref(r)) |
| 1610 | return rel; |
| 1611 | if (is_joinop(rel->op) && r->op != op_project) { |
| 1612 | if (is_subquery(r)) |
| 1613 | return rel; |
| 1614 | rel->r = r = rel_project(sql->sa, r, |
| 1615 | rel_projections(sql, r, NULL, 1, 1)); |
| 1616 | } |
| 1617 | nrel = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1)); |
| 1618 | for(n = exps->h; n; n = n->next) { |
| 1619 | sql_exp *e = n->data, *ne = NULL; |
| 1620 | int must = 0, mustl = 0, mustr = 0; |
| 1621 | |
| 1622 | if (e->type == e_column) |
| 1623 | continue; |
| 1624 | if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl) && mustl) || (can_push_func(e, r, &mustr) && mustr))) || |
| 1625 | (is_select(rel->op) && can_push_func(e, NULL, &must) && must)) { |
| 1626 | must = 0; mustl = 0; mustr = 0; |
| 1627 | if (e->type != e_cmp) { /* predicate */ |
| 1628 | if ((is_joinop(rel->op) && ((can_push_func(e, l, &mustl) && mustl) || (can_push_func(e, r, &mustr) && mustr))) || |
| 1629 | (is_select(rel->op) && can_push_func(e, NULL, &must) && must)) { |
| 1630 | exp_label(sql->sa, e, ++sql->label); |
| 1631 | if (mustr) |
| 1632 | append(r->exps, e); |
| 1633 | else |
| 1634 | append(l->exps, e); |
| 1635 | e = exp_ref(sql->sa, e); |
| 1636 | n->data = e; |
| 1637 | (*changes)++; |
| 1638 | } |
| 1639 | } else { |
| 1640 | ne = e->l; |
| 1641 | if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) || |
| 1642 | (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) { |
| 1643 | exp_label(sql->sa, ne, ++sql->label); |
| 1644 | if (mustr) |
| 1645 | append(r->exps, ne); |
| 1646 | else |
| 1647 | append(l->exps, ne); |
| 1648 | ne = exp_ref(sql->sa, ne); |
| 1649 | (*changes)++; |
| 1650 | } |
| 1651 | e->l = ne; |
| 1652 | |
| 1653 | must = 0; mustl = 0; mustr = 0; |
| 1654 | ne = e->r; |
| 1655 | if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) || |
| 1656 | (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) { |
| 1657 | exp_label(sql->sa, ne, ++sql->label); |
| 1658 | if (mustr) |
| 1659 | append(r->exps, ne); |
| 1660 | else |
| 1661 | append(l->exps, ne); |
| 1662 | ne = exp_ref(sql->sa, ne); |
| 1663 | (*changes)++; |
| 1664 | } |
| 1665 | e->r = ne; |
| 1666 | |
| 1667 | if (e->f) { |
| 1668 | must = 0; mustl = 0; mustr = 0; |
| 1669 | ne = e->f; |
| 1670 | if ((is_joinop(rel->op) && ((can_push_func(ne, l, &mustl) && mustl) || (can_push_func(ne, r, &mustr) && mustr))) || |
| 1671 | (is_select(rel->op) && can_push_func(ne, NULL, &must) && must)) { |
| 1672 | exp_label(sql->sa, ne, ++sql->label); |
| 1673 | if (mustr) |
| 1674 | append(r->exps, ne); |
| 1675 | else |
| 1676 | append(l->exps, ne); |
| 1677 | ne = exp_ref(sql->sa, ne); |
| 1678 | (*changes)++; |
| 1679 | } |
| 1680 | e->f = ne; |
| 1681 | } |
| 1682 | } |
| 1683 | } |
| 1684 | } |
| 1685 | if (*changes) { |
| 1686 | rel = nrel; |
| 1687 | } else { |
| 1688 | if (l != ol) |
| 1689 | rel->l = ol; |
| 1690 | if (is_joinop(rel->op) && r != or) |
| 1691 | rel->r = or; |
| 1692 | } |
| 1693 | } |
| 1694 | } |
| 1695 | if (rel->op == op_project && rel->l && rel->exps) { |
| 1696 | sql_rel *pl = rel->l; |
| 1697 | |
| 1698 | if (is_joinop(pl->op) && exps_can_push_func(rel->exps, rel)) { |
| 1699 | node *n; |
| 1700 | sql_rel *l = pl->l, *r = pl->r; |
| 1701 | list *nexps; |
| 1702 | |
| 1703 | if (l->op != op_project) { |
| 1704 | if (is_subquery(l)) |
| 1705 | return rel; |
| 1706 | pl->l = l = rel_project(sql->sa, l, |
| 1707 | rel_projections(sql, l, NULL, 1, 1)); |
| 1708 | } |
| 1709 | if (is_joinop(rel->op) && r->op != op_project) { |
| 1710 | if (is_subquery(r)) |
| 1711 | return rel; |
| 1712 | pl->r = r = rel_project(sql->sa, r, |
| 1713 | rel_projections(sql, r, NULL, 1, 1)); |
| 1714 | } |
| 1715 | nexps = new_exp_list(sql->sa); |
| 1716 | for ( n = rel->exps->h; n; n = n->next) { |
| 1717 | sql_exp *e = n->data; |
| 1718 | int mustl = 0, mustr = 0; |
| 1719 | |
| 1720 | if ((can_push_func(e, l, &mustl) && mustl) || |
| 1721 | (can_push_func(e, r, &mustr) && mustr)) { |
| 1722 | if (mustl) |
| 1723 | append(l->exps, e); |
| 1724 | else |
| 1725 | append(r->exps, e); |
| 1726 | } else |
| 1727 | append(nexps, e); |
| 1728 | } |
| 1729 | rel->exps = nexps; |
| 1730 | (*changes)++; |
| 1731 | } |
| 1732 | } |
| 1733 | return rel; |
| 1734 | } |
| 1735 | |
| 1736 | |
| 1737 | /* |
| 1738 | * Push Count inside crossjoin down, and multiply the results |
| 1739 | * |
| 1740 | * project ( project( |
| 1741 | * group by ( crossproduct ( |
| 1742 | * crossproduct( project ( |
| 1743 | * L, => group by ( |
| 1744 | * R L |
| 1745 | * ) [ ] [ count NOT NULL ] ) [ ] [ count NOT NULL ] |
| 1746 | * ) ), |
| 1747 | * ) [ NOT NULL ] project ( |
| 1748 | * group by ( |
| 1749 | * R |
| 1750 | * ) [ ] [ count NOT NULL ] |
| 1751 | * ) |
| 1752 | * ) [ sql_mul(.., .. NOT NULL) ] |
| 1753 | * ) |
| 1754 | */ |
| 1755 | static sql_rel * |
| 1756 | rel_push_count_down(int *changes, mvc *sql, sql_rel *rel) |
| 1757 | { |
| 1758 | sql_rel *r; |
| 1759 | |
| 1760 | if (!is_groupby(rel->op)) |
| 1761 | return rel; |
| 1762 | |
| 1763 | r = rel->l; |
| 1764 | |
| 1765 | if (is_groupby(rel->op) && !rel_is_ref(rel) && |
| 1766 | r && !r->exps && r->op == op_join && !(rel_is_ref(r)) && |
| 1767 | /* currently only single count aggregation is handled, no other projects or aggregation */ |
| 1768 | list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) { |
| 1769 | sql_exp *nce, *oce; |
| 1770 | sql_rel *gbl, *gbr; /* Group By */ |
| 1771 | sql_rel *cp; /* Cross Product */ |
| 1772 | sql_subfunc *mult; |
| 1773 | list *args; |
| 1774 | const char *rname = NULL, *name = NULL; |
| 1775 | sql_rel *srel; |
| 1776 | |
| 1777 | oce = rel->exps->h->data; |
| 1778 | if (oce->l) /* we only handle COUNT(*) */ |
| 1779 | return rel; |
| 1780 | rname = exp_relname(oce); |
| 1781 | name = exp_name(oce); |
| 1782 | |
| 1783 | args = new_exp_list(sql->sa); |
| 1784 | srel = r->l; |
| 1785 | { |
| 1786 | sql_subaggr *cf = sql_bind_aggr(sql->sa, sql->session->schema, "count" , NULL); |
| 1787 | sql_exp *cnt, *e = exp_aggr(sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0); |
| 1788 | |
| 1789 | exp_label(sql->sa, e, ++sql->label); |
| 1790 | cnt = exp_ref(sql->sa, e); |
| 1791 | gbl = rel_groupby(sql, rel_dup(srel), NULL); |
| 1792 | rel_groupby_add_aggr(sql, gbl, e); |
| 1793 | append(args, cnt); |
| 1794 | } |
| 1795 | |
| 1796 | srel = r->r; |
| 1797 | { |
| 1798 | sql_subaggr *cf = sql_bind_aggr(sql->sa, sql->session->schema, "count" , NULL); |
| 1799 | sql_exp *cnt, *e = exp_aggr(sql->sa, NULL, cf, need_distinct(oce), need_no_nil(oce), oce->card, 0); |
| 1800 | |
| 1801 | exp_label(sql->sa, e, ++sql->label); |
| 1802 | cnt = exp_ref(sql->sa, e); |
| 1803 | gbr = rel_groupby(sql, rel_dup(srel), NULL); |
| 1804 | rel_groupby_add_aggr(sql, gbr, e); |
| 1805 | append(args, cnt); |
| 1806 | } |
| 1807 | |
| 1808 | mult = find_func(sql, "sql_mul" , args); |
| 1809 | cp = rel_crossproduct(sql->sa, gbl, gbr, op_join); |
| 1810 | |
| 1811 | nce = exp_op(sql->sa, args, mult); |
| 1812 | exp_setname(sql->sa, nce, rname, name ); |
| 1813 | |
| 1814 | rel_destroy(rel); |
| 1815 | rel = rel_project(sql->sa, cp, append(new_exp_list(sql->sa), nce)); |
| 1816 | |
| 1817 | (*changes)++; |
| 1818 | } |
| 1819 | |
| 1820 | return rel; |
| 1821 | } |
| 1822 | |
| 1823 | |
| 1824 | static sql_rel * |
| 1825 | rel_simplify_project_fk_join(int *changes, mvc *sql, sql_rel *r, list *pexps) |
| 1826 | { |
| 1827 | sql_rel *rl = r->l; |
| 1828 | sql_rel *rr = r->r; |
| 1829 | sql_exp *je; |
| 1830 | node *n; |
| 1831 | int fk_left = 1; |
| 1832 | |
| 1833 | /* check for foreign key join */ |
| 1834 | if (!r->exps || list_length(r->exps) != 1) |
| 1835 | return r; |
| 1836 | je = r->exps->h->data; |
| 1837 | if (je && !find_prop(je->p, PROP_JOINIDX)) |
| 1838 | return r; |
| 1839 | /* je->l == foreign expression, je->r == primary expression */ |
| 1840 | if (rel_find_exp(r->l, je->l)) { |
| 1841 | fk_left = 1; |
| 1842 | } else if (rel_find_exp(r->r, je->l)) { |
| 1843 | fk_left = 0; |
| 1844 | } else { /* not found */ |
| 1845 | return r; |
| 1846 | } |
| 1847 | |
| 1848 | (void)sql; |
| 1849 | #if 0 |
| 1850 | if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) { |
| 1851 | rl = rel_simplify_project_fk_join(changes, sql, rl, pexps); |
| 1852 | r->l = rl; |
| 1853 | } |
| 1854 | if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) { |
| 1855 | rr = rel_simplify_project_fk_join(changes, sql, rr, pexps); |
| 1856 | r->r = rr; |
| 1857 | } |
| 1858 | #endif |
| 1859 | /* primary side must be a full table */ |
| 1860 | if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) || |
| 1861 | (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op))) |
| 1862 | return r; |
| 1863 | |
| 1864 | /* projection columns from the foreign side */ |
| 1865 | for (n = pexps->h; n; n = n->next) { |
| 1866 | sql_exp *pe = n->data; |
| 1867 | |
| 1868 | if (pe && is_atom(pe->type)) |
| 1869 | continue; |
| 1870 | if (pe && !is_alias(pe->type)) |
| 1871 | return r; |
| 1872 | /* check for columns from the pk side, then keep the join with the pk */ |
| 1873 | if ((fk_left && rel_find_exp(r->r, pe)) || |
| 1874 | (!fk_left && rel_find_exp(r->l, pe))) |
| 1875 | return r; |
| 1876 | } |
| 1877 | |
| 1878 | (*changes)++; |
| 1879 | /* rewrite, ie remove pkey side */ |
| 1880 | if (fk_left) |
| 1881 | return r->l; |
| 1882 | return r->r; |
| 1883 | } |
| 1884 | |
| 1885 | static sql_rel * |
| 1886 | rel_simplify_count_fk_join(int *changes, mvc *sql, sql_rel *r, list *gexps) |
| 1887 | { |
| 1888 | sql_rel *rl = r->l; |
| 1889 | sql_rel *rr = r->r; |
| 1890 | sql_exp *oce, *je; |
| 1891 | int fk_left = 1; |
| 1892 | |
| 1893 | /* check for foreign key join */ |
| 1894 | if (!r->exps || list_length(r->exps) != 1) |
| 1895 | return r; |
| 1896 | je = r->exps->h->data; |
| 1897 | if (je && !find_prop(je->p, PROP_JOINIDX)) |
| 1898 | return r; |
| 1899 | /* je->l == foreign expression, je->r == primary expression */ |
| 1900 | if (rel_find_exp(r->l, je->l)) { |
| 1901 | fk_left = 1; |
| 1902 | } else if (rel_find_exp(r->r, je->l)) { |
| 1903 | fk_left = 0; |
| 1904 | } else { /* not found */ |
| 1905 | return r; |
| 1906 | } |
| 1907 | |
| 1908 | oce = gexps->h->data; |
| 1909 | if (oce->l) /* we only handle COUNT(*) */ |
| 1910 | return r; |
| 1911 | |
| 1912 | if (fk_left && is_join(rl->op) && !rel_is_ref(rl)) { |
| 1913 | rl = rel_simplify_count_fk_join(changes, sql, rl, gexps); |
| 1914 | r->l = rl; |
| 1915 | } |
| 1916 | if (!fk_left && is_join(rr->op) && !rel_is_ref(rr)) { |
| 1917 | rr = rel_simplify_count_fk_join(changes, sql, rr, gexps); |
| 1918 | r->r = rr; |
| 1919 | } |
| 1920 | /* primary side must be a full table */ |
| 1921 | if ((fk_left && (!is_left(r->op) && !is_full(r->op)) && !is_basetable(rr->op)) || |
| 1922 | (!fk_left && (!is_right(r->op) && !is_full(r->op)) && !is_basetable(rl->op))) |
| 1923 | return r; |
| 1924 | |
| 1925 | (*changes)++; |
| 1926 | /* rewrite, ie remove pkey side */ |
| 1927 | if (fk_left) |
| 1928 | return r->l; |
| 1929 | return r->r; |
| 1930 | } |
| 1931 | |
| 1932 | /* |
| 1933 | * Handle (left/right/outer/natural) join fk-pk rewrites |
| 1934 | * 1 group by ( fk-pk-join () ) [ count(*) ] -> groub py ( fk ) |
| 1935 | * 2 project ( fk-pk-join () ) [ fk-column ] -> project (fk table)[ fk-column ] |
| 1936 | * 3 project ( fk1-pk1-join( fk2-pk2-join()) [ fk-column, pk1 column ] -> project (fk1-pk1-join)[ fk-column, pk1 column ] |
| 1937 | */ |
| 1938 | static sql_rel * |
| 1939 | rel_simplify_fk_joins(int *changes, mvc *sql, sql_rel *rel) |
| 1940 | { |
| 1941 | sql_rel *r = NULL; |
| 1942 | |
| 1943 | if (rel->op == op_project) |
| 1944 | r = rel->l; |
| 1945 | |
| 1946 | while (rel->op == op_project && r && r->exps && list_length(r->exps) == 1 && is_join(r->op) && !(rel_is_ref(r))) { |
| 1947 | sql_rel *or = r; |
| 1948 | |
| 1949 | r = rel_simplify_project_fk_join(changes, sql, r, rel->exps); |
| 1950 | if (r == or) |
| 1951 | return rel; |
| 1952 | rel->l = r; |
| 1953 | } |
| 1954 | |
| 1955 | (void)sql; |
| 1956 | if (!is_groupby(rel->op)) |
| 1957 | return rel; |
| 1958 | |
| 1959 | r = rel->l; |
| 1960 | while(r && r->op == op_project) |
| 1961 | r = r->l; |
| 1962 | |
| 1963 | while (is_groupby(rel->op) && !rel_is_ref(rel) && |
| 1964 | r && r->exps && is_join(r->op) && list_length(r->exps) == 1 && !(rel_is_ref(r)) && |
| 1965 | /* currently only single count aggregation is handled, no other projects or aggregation */ |
| 1966 | list_length(rel->exps) == 1 && exp_aggr_is_count(rel->exps->h->data)) { |
| 1967 | sql_rel *or = r; |
| 1968 | |
| 1969 | r = rel_simplify_count_fk_join(changes, sql, r, rel->exps); |
| 1970 | if (r == or) |
| 1971 | return rel; |
| 1972 | rel->l = r; |
| 1973 | } |
| 1974 | return rel; |
| 1975 | } |
| 1976 | |
| 1977 | /* |
| 1978 | * Push TopN (only LIMIT, no ORDER BY) down through projections underneath crossproduct, i.e., |
| 1979 | * |
| 1980 | * topn( topn( |
| 1981 | * project( project( |
| 1982 | * crossproduct( crossproduct( |
| 1983 | * L, => topn( L )[ n ], |
| 1984 | * R topn( R )[ n ] |
| 1985 | * ) ) |
| 1986 | * )[ Cs ]* )[ Cs ]* |
| 1987 | * )[ n ] )[ n ] |
| 1988 | * |
| 1989 | * (TODO: in case of n==1 we can omit the original top-level TopN) |
| 1990 | * |
| 1991 | * also push topn under (non reordering) projections. |
| 1992 | */ |
| 1993 | |
| 1994 | static list * |
| 1995 | sum_limit_offset(mvc *sql, list *exps ) |
| 1996 | { |
| 1997 | list *nexps = new_exp_list(sql->sa); |
| 1998 | sql_subtype *lng = sql_bind_localtype("lng" ); |
| 1999 | sql_subfunc *add; |
| 2000 | |
| 2001 | /* if the expression list only consists of a limit expression, |
| 2002 | * we copy it */ |
| 2003 | if (list_length(exps) == 1 && exps->h->data) |
| 2004 | return append(nexps, exps->h->data); |
| 2005 | add = sql_bind_func_result(sql->sa, sql->session->schema, "sql_add" , lng, lng, lng); |
| 2006 | return append(nexps, exp_op(sql->sa, exps, add)); |
| 2007 | } |
| 2008 | |
| 2009 | static int |
| 2010 | topn_save_exps( list *exps ) |
| 2011 | { |
| 2012 | node *n; |
| 2013 | |
| 2014 | /* Limit only expression lists are always save */ |
| 2015 | if (list_length(exps) == 1) |
| 2016 | return 1; |
| 2017 | for (n = exps->h; n; n = n->next ) { |
| 2018 | sql_exp *e = n->data; |
| 2019 | |
| 2020 | if (!e || e->type != e_atom) |
| 2021 | return 0; |
| 2022 | } |
| 2023 | return 1; |
| 2024 | } |
| 2025 | |
| 2026 | static void |
| 2027 | rel_no_rename_exps( list *exps ) |
| 2028 | { |
| 2029 | node *n; |
| 2030 | |
| 2031 | for (n = exps->h; n; n = n->next) { |
| 2032 | sql_exp *e = n->data; |
| 2033 | |
| 2034 | exp_setalias(e, e->l, e->r); |
| 2035 | } |
| 2036 | } |
| 2037 | |
| 2038 | static void |
| 2039 | rel_rename_exps( mvc *sql, list *exps1, list *exps2) |
| 2040 | { |
| 2041 | int pos = 0; |
| 2042 | node *n, *m; |
| 2043 | |
| 2044 | (void)sql; |
| 2045 | /* check if a column uses an alias earlier in the list */ |
| 2046 | for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next, pos++) { |
| 2047 | sql_exp *e2 = m->data; |
| 2048 | |
| 2049 | if (e2->type == e_column) { |
| 2050 | sql_exp *ne = NULL; |
| 2051 | |
| 2052 | if (e2->l) |
| 2053 | ne = exps_bind_column2(exps2, e2->l, e2->r); |
| 2054 | if (!ne && !e2->l) |
| 2055 | ne = exps_bind_column(exps2, e2->r, NULL); |
| 2056 | if (ne) { |
| 2057 | int p = list_position(exps2, ne); |
| 2058 | |
| 2059 | if (p < pos) { |
| 2060 | ne = list_fetch(exps1, p); |
| 2061 | if (e2->l) |
| 2062 | e2->l = (void *) exp_relname(ne); |
| 2063 | e2->r = (void *) exp_name(ne); |
| 2064 | } |
| 2065 | } |
| 2066 | } |
| 2067 | } |
| 2068 | |
| 2069 | assert(list_length(exps1) <= list_length(exps2)); |
| 2070 | for (n = exps1->h, m = exps2->h; n && m; n = n->next, m = m->next) { |
| 2071 | sql_exp *e1 = n->data; |
| 2072 | sql_exp *e2 = m->data; |
| 2073 | const char *rname = exp_relname(e1); |
| 2074 | |
| 2075 | if (!rname && e1->type == e_column && e1->l && exp_relname(e2) && |
| 2076 | strcmp(e1->l, exp_relname(e2)) == 0) |
| 2077 | rname = exp_relname(e2); |
| 2078 | exp_setalias(e2, rname, exp_name(e1)); |
| 2079 | } |
| 2080 | MT_lock_set(&exps2->ht_lock); |
| 2081 | exps2->ht = NULL; |
| 2082 | MT_lock_unset(&exps2->ht_lock); |
| 2083 | } |
| 2084 | |
| 2085 | static sql_rel * |
| 2086 | rel_push_topn_down(int *changes, mvc *sql, sql_rel *rel) |
| 2087 | { |
| 2088 | sql_rel *rl, *r = rel->l; |
| 2089 | |
| 2090 | if (rel->op == op_topn && topn_save_exps(rel->exps)) { |
| 2091 | sql_rel *rp = NULL; |
| 2092 | |
| 2093 | if (r && r->op == op_project && need_distinct(r)) |
| 2094 | return rel; |
| 2095 | /* duplicate topn direct under union */ |
| 2096 | |
| 2097 | if (r && r->exps && r->op == op_union && !(rel_is_ref(r)) && r->l) { |
| 2098 | sql_rel *u = r, *x; |
| 2099 | sql_rel *ul = u->l; |
| 2100 | sql_rel *ur = u->r; |
| 2101 | |
| 2102 | /* only push topn once */ |
| 2103 | x = ul; |
| 2104 | while(x->op == op_project && x->l) |
| 2105 | x = x->l; |
| 2106 | if (x && x->op == op_topn) |
| 2107 | return rel; |
| 2108 | x = ur; |
| 2109 | while(x->op == op_project && x->l) |
| 2110 | x = x->l; |
| 2111 | if (x && x->op == op_topn) |
| 2112 | return rel; |
| 2113 | |
| 2114 | ul = rel_topn(sql->sa, ul, sum_limit_offset(sql, rel->exps)); |
| 2115 | ur = rel_topn(sql->sa, ur, sum_limit_offset(sql, rel->exps)); |
| 2116 | u->l = ul; |
| 2117 | u->r = ur; |
| 2118 | (*changes)++; |
| 2119 | return rel; |
| 2120 | } |
| 2121 | /* duplicate topn + [ project-order ] under union */ |
| 2122 | if (r) |
| 2123 | rp = r->l; |
| 2124 | if (r && r->exps && r->op == op_project && !(rel_is_ref(r)) && r->r && r->l && |
| 2125 | rp->op == op_union) { |
| 2126 | sql_rel *u = rp, *ou = u, *x; |
| 2127 | sql_rel *ul = u->l; |
| 2128 | sql_rel *ur = u->r; |
| 2129 | int add_r = 0; |
| 2130 | |
| 2131 | /* only push topn once */ |
| 2132 | x = ul; |
| 2133 | while(x->op == op_project && x->l) |
| 2134 | x = x->l; |
| 2135 | if (x && x->op == op_topn) |
| 2136 | return rel; |
| 2137 | x = ur; |
| 2138 | while(x->op == op_project && x->l) |
| 2139 | x = x->l; |
| 2140 | if (x && x->op == op_topn) |
| 2141 | return rel; |
| 2142 | |
| 2143 | if (list_length(ul->exps) > list_length(r->exps)) |
| 2144 | add_r = 1; |
| 2145 | ul = rel_dup(ul); |
| 2146 | ur = rel_dup(ur); |
| 2147 | if (!is_project(ul->op)) |
| 2148 | ul = rel_project(sql->sa, ul, |
| 2149 | rel_projections(sql, ul, NULL, 1, 1)); |
| 2150 | if (!is_project(ur->op)) |
| 2151 | ur = rel_project(sql->sa, ur, |
| 2152 | rel_projections(sql, ur, NULL, 1, 1)); |
| 2153 | rel_rename_exps(sql, u->exps, ul->exps); |
| 2154 | rel_rename_exps(sql, u->exps, ur->exps); |
| 2155 | |
| 2156 | /* introduce projects under the set */ |
| 2157 | ul = rel_project(sql->sa, ul, NULL); |
| 2158 | ul->exps = exps_copy(sql, r->exps); |
| 2159 | /* possibly add order by column */ |
| 2160 | if (add_r) |
| 2161 | ul->exps = list_merge(ul->exps, exps_copy(sql, r->r), NULL); |
| 2162 | ul->r = exps_copy(sql, r->r); |
| 2163 | ul = rel_topn(sql->sa, ul, sum_limit_offset(sql, rel->exps)); |
| 2164 | ur = rel_project(sql->sa, ur, NULL); |
| 2165 | ur->exps = exps_copy(sql, r->exps); |
| 2166 | /* possibly add order by column */ |
| 2167 | if (add_r) |
| 2168 | ur->exps = list_merge(ur->exps, exps_copy(sql, r->r), NULL); |
| 2169 | ur->r = exps_copy(sql, r->r); |
| 2170 | ur = rel_topn(sql->sa, ur, sum_limit_offset(sql, rel->exps)); |
| 2171 | u = rel_setop(sql->sa, ul, ur, op_union); |
| 2172 | u->exps = exps_alias(sql->sa, r->exps); |
| 2173 | set_processed(u); |
| 2174 | /* possibly add order by column */ |
| 2175 | if (add_r) |
| 2176 | u->exps = list_merge(u->exps, exps_copy(sql, r->r), NULL); |
| 2177 | if (need_distinct(r)) { |
| 2178 | set_distinct(ul); |
| 2179 | set_distinct(ur); |
| 2180 | } |
| 2181 | |
| 2182 | /* zap names */ |
| 2183 | rel_no_rename_exps(u->exps); |
| 2184 | rel_destroy(ou); |
| 2185 | |
| 2186 | ur = rel_project(sql->sa, u, exps_alias(sql->sa, r->exps)); |
| 2187 | ur->r = r->r; |
| 2188 | r->l = NULL; |
| 2189 | |
| 2190 | if (need_distinct(r)) |
| 2191 | set_distinct(ur); |
| 2192 | |
| 2193 | rel_destroy(r); |
| 2194 | rel->l = ur; |
| 2195 | (*changes)++; |
| 2196 | return rel; |
| 2197 | } |
| 2198 | |
| 2199 | /* pass through projections */ |
| 2200 | while (r && is_project(r->op) && !need_distinct(r) && |
| 2201 | !(rel_is_ref(r)) && |
| 2202 | !r->r && (rl = r->l) != NULL && is_project(rl->op)) { |
| 2203 | /* ensure there is no order by */ |
| 2204 | if (!r->r) { |
| 2205 | r = r->l; |
| 2206 | } else { |
| 2207 | r = NULL; |
| 2208 | } |
| 2209 | } |
| 2210 | if (r && r != rel && r->op == op_project && !(rel_is_ref(r)) && !r->r && r->l) { |
| 2211 | r = rel_topn(sql->sa, r, sum_limit_offset(sql, rel->exps)); |
| 2212 | } |
| 2213 | |
| 2214 | /* push topn under crossproduct */ |
| 2215 | if (r && !r->exps && r->op == op_join && !(rel_is_ref(r)) && |
| 2216 | ((sql_rel *)r->l)->op != op_topn && ((sql_rel *)r->r)->op != op_topn) { |
| 2217 | r->l = rel_topn(sql->sa, r->l, sum_limit_offset(sql, rel->exps)); |
| 2218 | r->r = rel_topn(sql->sa, r->r, sum_limit_offset(sql, rel->exps)); |
| 2219 | (*changes)++; |
| 2220 | return rel; |
| 2221 | } |
| 2222 | /* TODO */ |
| 2223 | #if 0 |
| 2224 | /* duplicate topn + [ project-order ] under join on independend always matching joins */ |
| 2225 | if (r) |
| 2226 | rp = r->l; |
| 2227 | if (r && r->exps && r->op == op_project && !(rel_is_ref(r)) && r->r && r->l && |
| 2228 | rp->op == op_join && rp->exps && rp->exps->h && ((prop*)((sql_exp*)rp->exps->h->data)->p)->kind == PROP_FETCH && |
| 2229 | ((sql_rel *)rp->l)->op != op_topn && ((sql_rel *)rp->r)->op != op_topn) { |
| 2230 | /* TODO check if order by columns are independend of join conditions */ |
| 2231 | r->l = rel_topn(sql->sa, r->l, sum_limit_offset(sql, rel->exps)); |
| 2232 | r->r = rel_topn(sql->sa, r->r, sum_limit_offset(sql, rel->exps)); |
| 2233 | (*changes)++; |
| 2234 | return rel; |
| 2235 | } |
| 2236 | #endif |
| 2237 | } |
| 2238 | return rel; |
| 2239 | } |
| 2240 | |
| 2241 | /* merge projection */ |
| 2242 | |
| 2243 | /* push an expression through a projection. |
| 2244 | * The result should again used in a projection. |
| 2245 | */ |
| 2246 | static sql_exp * |
| 2247 | exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t); |
| 2248 | |
| 2249 | static list * |
| 2250 | exps_push_down_prj(mvc *sql, list *exps, sql_rel *f, sql_rel *t) |
| 2251 | { |
| 2252 | node *n; |
| 2253 | list *nl = new_exp_list(sql->sa); |
| 2254 | |
| 2255 | for(n = exps->h; n; n = n->next) { |
| 2256 | sql_exp *arg = n->data, *narg = NULL; |
| 2257 | |
| 2258 | narg = exp_push_down_prj(sql, arg, f, t); |
| 2259 | if (!narg) |
| 2260 | return NULL; |
| 2261 | narg = exp_propagate(sql->sa, narg, arg); |
| 2262 | append(nl, narg); |
| 2263 | } |
| 2264 | return nl; |
| 2265 | } |
| 2266 | |
| 2267 | static sql_exp * |
| 2268 | exp_push_down_prj(mvc *sql, sql_exp *e, sql_rel *f, sql_rel *t) |
| 2269 | { |
| 2270 | sql_exp *ne = NULL, *l, *r, *r2; |
| 2271 | |
| 2272 | assert(is_project(f->op)); |
| 2273 | |
| 2274 | switch(e->type) { |
| 2275 | case e_column: |
| 2276 | if (e->l) |
| 2277 | ne = exps_bind_column2(f->exps, e->l, e->r); |
| 2278 | if (!ne && !e->l) |
| 2279 | ne = exps_bind_column(f->exps, e->r, NULL); |
| 2280 | if (!ne || (ne->type != e_column && ne->type != e_atom)) |
| 2281 | return NULL; |
| 2282 | while (ne && has_label(ne) && f->op == op_project && ne->type == e_column) { |
| 2283 | sql_exp *oe = e, *one = ne; |
| 2284 | |
| 2285 | e = ne; |
| 2286 | ne = NULL; |
| 2287 | if (e->l) |
| 2288 | ne = exps_bind_column2(f->exps, e->l, e->r); |
| 2289 | if (!ne && !e->l) |
| 2290 | ne = exps_bind_column(f->exps, e->r, NULL); |
| 2291 | if (ne && ne != one && list_position(f->exps, ne) >= list_position(f->exps, one)) |
| 2292 | ne = NULL; |
| 2293 | if (!ne || ne == one) { |
| 2294 | ne = one; |
| 2295 | e = oe; |
| 2296 | break; |
| 2297 | } |
| 2298 | if (ne->type != e_column && ne->type != e_atom) |
| 2299 | return NULL; |
| 2300 | } |
| 2301 | /* possibly a groupby/project column is renamed */ |
| 2302 | if (is_groupby(f->op) && f->r) { |
| 2303 | sql_exp *gbe = NULL; |
| 2304 | if (ne->l) |
| 2305 | gbe = exps_bind_column2(f->r, ne->l, ne->r); |
| 2306 | if (!gbe && !e->l) |
| 2307 | gbe = exps_bind_column(f->r, ne->r, NULL); |
| 2308 | ne = gbe; |
| 2309 | if (!ne || (ne->type != e_column && ne->type != e_atom)) |
| 2310 | return NULL; |
| 2311 | } |
| 2312 | if (ne->type == e_atom) |
| 2313 | e = exp_copy(sql, ne); |
| 2314 | else |
| 2315 | e = exp_alias(sql->sa, exp_relname(e), exp_name(e), ne->l, ne->r, exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 2316 | return exp_propagate(sql->sa, e, ne); |
| 2317 | case e_cmp: |
| 2318 | if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) { |
| 2319 | list *l = exps_push_down_prj(sql, e->l, f, t); |
| 2320 | list *r = exps_push_down_prj(sql, e->r, f, t); |
| 2321 | |
| 2322 | if (!l || !r) |
| 2323 | return NULL; |
| 2324 | if (get_cmp(e) == cmp_filter) |
| 2325 | return exp_filter(sql->sa, l, r, e->f, is_anti(e)); |
| 2326 | return exp_or(sql->sa, l, r, is_anti(e)); |
| 2327 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 2328 | sql_exp *l = exp_push_down_prj(sql, e->l, f, t); |
| 2329 | list *r = exps_push_down_prj(sql, e->r, f, t); |
| 2330 | |
| 2331 | if (!l || !r) |
| 2332 | return NULL; |
| 2333 | return exp_in(sql->sa, l, r, e->flag); |
| 2334 | } else { |
| 2335 | l = exp_push_down_prj(sql, e->l, f, t); |
| 2336 | r = exp_push_down_prj(sql, e->r, f, t); |
| 2337 | if (e->f) { |
| 2338 | r2 = exp_push_down_prj(sql, e->f, f, t); |
| 2339 | if (l && r && r2) |
| 2340 | ne = exp_compare2(sql->sa, l, r, r2, e->flag); |
| 2341 | } else if (l && r) { |
| 2342 | ne = exp_compare(sql->sa, l, r, e->flag); |
| 2343 | } |
| 2344 | } |
| 2345 | if (!ne) |
| 2346 | return NULL; |
| 2347 | return exp_propagate(sql->sa, ne, e); |
| 2348 | case e_convert: |
| 2349 | l = exp_push_down_prj(sql, e->l, f, t); |
| 2350 | if (l) |
| 2351 | return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e)); |
| 2352 | return NULL; |
| 2353 | case e_aggr: |
| 2354 | case e_func: { |
| 2355 | list *l = e->l, *nl = NULL; |
| 2356 | sql_exp *ne = NULL; |
| 2357 | |
| 2358 | if (e->type == e_func && exp_unsafe(e,0)) |
| 2359 | return NULL; |
| 2360 | if (!l) { |
| 2361 | return e; |
| 2362 | } else { |
| 2363 | nl = exps_push_down_prj(sql, l, f, t); |
| 2364 | if (!nl) |
| 2365 | return NULL; |
| 2366 | } |
| 2367 | if (e->type == e_func) |
| 2368 | ne = exp_op(sql->sa, nl, e->f); |
| 2369 | else |
| 2370 | ne = exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e)); |
| 2371 | return exp_propagate(sql->sa, ne, e); |
| 2372 | } |
| 2373 | case e_atom: |
| 2374 | case e_psm: |
| 2375 | if (e->type == e_atom && e->f) /* value list */ |
| 2376 | return NULL; |
| 2377 | return e; |
| 2378 | } |
| 2379 | return NULL; |
| 2380 | } |
| 2381 | |
| 2382 | static int |
| 2383 | rel_is_unique( sql_rel *rel, sql_ukey *k) |
| 2384 | { |
| 2385 | switch(rel->op) { |
| 2386 | case op_left: |
| 2387 | case op_right: |
| 2388 | case op_full: |
| 2389 | case op_join: |
| 2390 | return 0; |
| 2391 | case op_semi: |
| 2392 | case op_anti: |
| 2393 | return rel_is_unique(rel->l, k); |
| 2394 | case op_table: |
| 2395 | case op_basetable: |
| 2396 | return 1; |
| 2397 | default: |
| 2398 | return 0; |
| 2399 | } |
| 2400 | } |
| 2401 | |
| 2402 | int |
| 2403 | exps_unique(mvc *sql, sql_rel *rel, list *exps) |
| 2404 | { |
| 2405 | node *n; |
| 2406 | char *matched = NULL; |
| 2407 | int nr = 0; |
| 2408 | sql_ukey *k = NULL; |
| 2409 | |
| 2410 | if (list_empty(exps)) |
| 2411 | return 0; |
| 2412 | for(n = exps->h; n && !k; n = n->next) { |
| 2413 | sql_exp *e = n->data; |
| 2414 | prop *p; |
| 2415 | |
| 2416 | if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) |
| 2417 | k = p->value; |
| 2418 | } |
| 2419 | if (!k || list_length(k->k.columns) > list_length(exps)) |
| 2420 | return 0; |
| 2421 | if (rel) { |
| 2422 | matched = (char*)sa_alloc(sql->sa, list_length(k->k.columns)); |
| 2423 | memset(matched, 0, list_length(k->k.columns)); |
| 2424 | for(n = exps->h; n; n = n->next) { |
| 2425 | sql_exp *e = n->data; |
| 2426 | fcmp cmp = (fcmp)&kc_column_cmp; |
| 2427 | sql_column *c = exp_find_column(rel, e, -2); |
| 2428 | node *m; |
| 2429 | |
| 2430 | if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) { |
| 2431 | int pos = list_position(k->k.columns, m->data); |
| 2432 | if (!matched[pos]) |
| 2433 | nr++; |
| 2434 | matched[pos] = 1; |
| 2435 | } |
| 2436 | } |
| 2437 | if (nr == list_length(k->k.columns)) { |
| 2438 | return rel_is_unique(rel, k); |
| 2439 | } |
| 2440 | } |
| 2441 | /* |
| 2442 | if ((n = exps->h) != NULL) { |
| 2443 | sql_exp *e = n->data; |
| 2444 | prop *p; |
| 2445 | |
| 2446 | if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) { |
| 2447 | sql_ukey *k = p->value; |
| 2448 | if (k && list_length(k->k.columns) <= 1) |
| 2449 | return 1; |
| 2450 | } |
| 2451 | } |
| 2452 | */ |
| 2453 | return 0; |
| 2454 | } |
| 2455 | |
| 2456 | static int |
| 2457 | rel_is_join_on_pkey( sql_rel *rel ) |
| 2458 | { |
| 2459 | node *n; |
| 2460 | |
| 2461 | if (!rel || !rel->exps) |
| 2462 | return 0; |
| 2463 | for (n = rel->exps->h; n; n = n->next){ |
| 2464 | sql_exp *je = n->data; |
| 2465 | |
| 2466 | if (je->type == e_cmp && je->flag == cmp_equal && |
| 2467 | find_prop(((sql_exp*)je->l)->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */ |
| 2468 | fcmp cmp = (fcmp)&kc_column_cmp; |
| 2469 | sql_exp *e = je->l; |
| 2470 | sql_column *c = exp_find_column(rel, e, -2); |
| 2471 | |
| 2472 | if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL) |
| 2473 | return 1; |
| 2474 | } |
| 2475 | } |
| 2476 | return 0; |
| 2477 | } |
| 2478 | |
| 2479 | /* if all arguments to a distinct aggregate are unique, remove 'distinct' property */ |
| 2480 | static sql_rel * |
| 2481 | rel_distinct_aggregate_on_unique_values(int *changes, mvc *sql, sql_rel *rel) |
| 2482 | { |
| 2483 | sql_rel *l = (sql_rel*) rel->l; |
| 2484 | |
| 2485 | (void) sql; |
| 2486 | if (rel->op == op_groupby && (!l || is_base(l->op))) { |
| 2487 | for (node *n = rel->exps->h; n; n = n->next) { |
| 2488 | sql_exp *exp = (sql_exp*) n->data; |
| 2489 | |
| 2490 | if (exp->type == e_aggr && need_distinct(exp)) { |
| 2491 | bool all_unique = true; |
| 2492 | |
| 2493 | for (node *m = ((list*)exp->l)->h; m && all_unique; m = m->next) { |
| 2494 | sql_exp *arg = (sql_exp*) m->data; |
| 2495 | |
| 2496 | if (arg->card == CARD_ATOM) /* constants are always unique */ |
| 2497 | continue; |
| 2498 | else if (arg->type == e_column) { |
| 2499 | fcmp cmp = (fcmp)&kc_column_cmp; |
| 2500 | sql_column *c = exp_find_column(rel, arg, -2); |
| 2501 | |
| 2502 | if (c) { |
| 2503 | /* column is the only primary key column of its table */ |
| 2504 | if (find_prop(arg->p, PROP_HASHCOL) && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL && list_length(c->t->pkey->k.columns) == 1) |
| 2505 | continue; |
| 2506 | else if (c->unique == 1) /* column has unique constraint */ |
| 2507 | continue; |
| 2508 | else |
| 2509 | all_unique = false; |
| 2510 | } else |
| 2511 | all_unique = false; |
| 2512 | } else |
| 2513 | all_unique = false; |
| 2514 | } |
| 2515 | if (all_unique) { |
| 2516 | set_nodistinct(exp); |
| 2517 | *changes = 1; |
| 2518 | } |
| 2519 | } |
| 2520 | } |
| 2521 | } |
| 2522 | return rel; |
| 2523 | } |
| 2524 | |
| 2525 | static sql_rel * |
| 2526 | rel_distinct_project2groupby(int *changes, mvc *sql, sql_rel *rel) |
| 2527 | { |
| 2528 | sql_rel *l = rel->l; |
| 2529 | |
| 2530 | /* rewrite distinct project (table) [ constant ] -> project [ constant ] */ |
| 2531 | if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) && |
| 2532 | exps_card(rel->exps) <= CARD_ATOM) { |
| 2533 | set_nodistinct(rel); |
| 2534 | rel->l = rel_topn(sql->sa, rel->l, append(sa_list(sql->sa), exp_atom_lng(sql->sa, 1))); |
| 2535 | } |
| 2536 | |
| 2537 | /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ]) |
| 2538 | * into project [ pk ] ( select/semijoin ( table ) */ |
| 2539 | if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) && |
| 2540 | (l->op == op_select || l->op == op_semi) && exps_unique(sql, rel, rel->exps)) |
| 2541 | set_nodistinct(rel); |
| 2542 | |
| 2543 | /* rewrite distinct project ( join(p,f) [ p.pk = f.fk] ) [ p.pk ] -> |
| 2544 | * project(p)[p.pk] |
| 2545 | */ |
| 2546 | if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) && |
| 2547 | l && l->op == op_join && rel_is_join_on_pkey(l) /* [ pk == fk ] */) { |
| 2548 | sql_rel *j = l; |
| 2549 | sql_rel *p = j->l; |
| 2550 | sql_exp *je = l->exps->h->data, *le = je->l; |
| 2551 | |
| 2552 | if (exps_find_exp(rel->exps, le)) { /* rel must have the same primary key on the projection list */ |
| 2553 | int pside = (rel_find_exp(p, le) != NULL)?1:0; |
| 2554 | |
| 2555 | p = (pside)?j->l:j->r; |
| 2556 | rel->l = rel_dup(p); |
| 2557 | rel_destroy(j); |
| 2558 | *changes = 1; |
| 2559 | set_nodistinct(rel); |
| 2560 | return rel; |
| 2561 | } |
| 2562 | } |
| 2563 | /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ]) |
| 2564 | * into project [ gbe ] ( select ( group etc ) */ |
| 2565 | if (rel->op == op_project && rel->l && !rel->r /* no order by */ && |
| 2566 | need_distinct(rel) && l->op == op_select){ |
| 2567 | sql_rel *g = l->l; |
| 2568 | if (is_groupby(g->op)) { |
| 2569 | list *used = sa_list(sql->sa); |
| 2570 | list *gbe = g->r; |
| 2571 | node *n; |
| 2572 | int fnd = 1; |
| 2573 | |
| 2574 | for (n = rel->exps->h; n && fnd; n = n->next) { |
| 2575 | sql_exp *e = n->data; |
| 2576 | |
| 2577 | if (e->card > CARD_ATOM) { |
| 2578 | /* find e in gbe */ |
| 2579 | sql_exp *ne = list_find_exp(g->exps, e); |
| 2580 | |
| 2581 | if (ne) |
| 2582 | ne = list_find_exp( gbe, ne); |
| 2583 | if (ne && !list_find_exp(used, ne)) { |
| 2584 | fnd++; |
| 2585 | list_append(used, ne); |
| 2586 | } |
| 2587 | if (!ne) |
| 2588 | fnd = 0; |
| 2589 | } |
| 2590 | } |
| 2591 | if (fnd == (list_length(gbe)+1)) |
| 2592 | set_nodistinct(rel); |
| 2593 | } |
| 2594 | } |
| 2595 | if (rel->op == op_project && rel->l && |
| 2596 | need_distinct(rel) && exps_card(rel->exps) > CARD_ATOM) { |
| 2597 | node *n; |
| 2598 | list *exps = new_exp_list(sql->sa), *gbe = new_exp_list(sql->sa); |
| 2599 | list *obe = rel->r; /* we need to read the ordering later */ |
| 2600 | |
| 2601 | if (obe) { |
| 2602 | int fnd = 0; |
| 2603 | |
| 2604 | for(n = obe->h; n && !fnd; n = n->next) { |
| 2605 | sql_exp *e = n->data; |
| 2606 | |
| 2607 | if (e->type != e_column) |
| 2608 | fnd = 1; |
| 2609 | else if (exps_bind_column2(rel->exps, e->l, e->r) == 0) |
| 2610 | fnd = 1; |
| 2611 | } |
| 2612 | if (fnd) |
| 2613 | return rel; |
| 2614 | } |
| 2615 | rel->l = rel_project(sql->sa, rel->l, rel->exps); |
| 2616 | |
| 2617 | for (n = rel->exps->h; n; n = n->next) { |
| 2618 | sql_exp *e = n->data, *ne; |
| 2619 | |
| 2620 | if (!exp_name(e)) |
| 2621 | exp_label(sql->sa, e, ++sql->label); |
| 2622 | ne = exp_ref(sql->sa, e); |
| 2623 | if (e->card > CARD_ATOM) { /* no need to group by on constants */ |
| 2624 | append(gbe, ne); |
| 2625 | } |
| 2626 | append(exps, ne); |
| 2627 | } |
| 2628 | rel->op = op_groupby; |
| 2629 | rel->exps = exps; |
| 2630 | rel->r = gbe; |
| 2631 | set_nodistinct(rel); |
| 2632 | if (obe) { |
| 2633 | /* add order again */ |
| 2634 | rel = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1)); |
| 2635 | rel->r = obe; |
| 2636 | } |
| 2637 | *changes = 1; |
| 2638 | } |
| 2639 | return rel; |
| 2640 | } |
| 2641 | |
| 2642 | static int |
| 2643 | exp_shares_exps( sql_exp *e, list *shared, lng *uses) |
| 2644 | { |
| 2645 | switch(e->type) { |
| 2646 | case e_cmp: /* not in projection list */ |
| 2647 | case e_psm: |
| 2648 | assert(0); |
| 2649 | case e_atom: |
| 2650 | return 0; |
| 2651 | case e_column: |
| 2652 | { |
| 2653 | sql_exp *ne = NULL; |
| 2654 | if (e->l) |
| 2655 | ne = exps_bind_column2(shared, e->l, e->r); |
| 2656 | if (!ne && !e->l) |
| 2657 | ne = exps_bind_column(shared, e->r, NULL); |
| 2658 | if (!ne) |
| 2659 | return 0; |
| 2660 | if (ne && ne->type != e_column) { |
| 2661 | lng used = (lng) 1 << list_position(shared, ne); |
| 2662 | if (used & *uses) |
| 2663 | return 1; |
| 2664 | *uses &= used; |
| 2665 | return 0; |
| 2666 | } |
| 2667 | if (ne && ne != e && (list_position(shared, e) < 0 || list_position(shared, e) > list_position(shared, ne))) |
| 2668 | /* maybe ne refers to a local complex exp */ |
| 2669 | return exp_shares_exps( ne, shared, uses); |
| 2670 | return 0; |
| 2671 | } |
| 2672 | case e_convert: |
| 2673 | return exp_shares_exps(e->l, shared, uses); |
| 2674 | |
| 2675 | case e_aggr: |
| 2676 | case e_func: |
| 2677 | { |
| 2678 | list *l = e->l; |
| 2679 | node *n; |
| 2680 | |
| 2681 | if (!l) |
| 2682 | return 0; |
| 2683 | for (n = l->h; n; n = n->next) { |
| 2684 | sql_exp *e = n->data; |
| 2685 | |
| 2686 | if (exp_shares_exps( e, shared, uses)) |
| 2687 | return 1; |
| 2688 | } |
| 2689 | } |
| 2690 | } |
| 2691 | return 0; |
| 2692 | } |
| 2693 | |
| 2694 | static int |
| 2695 | exps_share_expensive_exp( list *exps, list *shared ) |
| 2696 | { |
| 2697 | node *n; |
| 2698 | lng uses = 0; |
| 2699 | |
| 2700 | if (!exps || !shared) |
| 2701 | return 0; |
| 2702 | for (n = exps->h; n; n = n->next){ |
| 2703 | sql_exp *e = n->data; |
| 2704 | |
| 2705 | if (exp_shares_exps( e, shared, &uses)) |
| 2706 | return 1; |
| 2707 | } |
| 2708 | return 0; |
| 2709 | } |
| 2710 | |
| 2711 | static int ambigious_ref( list *exps, sql_exp *e); |
| 2712 | static int |
| 2713 | ambigious_refs( list *exps, list *refs) |
| 2714 | { |
| 2715 | node *n; |
| 2716 | |
| 2717 | if (!refs) |
| 2718 | return 0; |
| 2719 | for(n=refs->h; n; n = n->next) { |
| 2720 | if (ambigious_ref(exps, n->data)) |
| 2721 | return 1; |
| 2722 | } |
| 2723 | return 0; |
| 2724 | } |
| 2725 | |
| 2726 | static int |
| 2727 | ambigious_ref( list *exps, sql_exp *e) |
| 2728 | { |
| 2729 | sql_exp *ne = NULL; |
| 2730 | |
| 2731 | if (e->type == e_column) { |
| 2732 | if (e->l) |
| 2733 | ne = exps_bind_column2(exps, e->l, e->r); |
| 2734 | if (!ne && !e->l) |
| 2735 | ne = exps_bind_column(exps, e->r, NULL); |
| 2736 | if (ne && e != ne) |
| 2737 | return 1; |
| 2738 | } |
| 2739 | if (e->type == e_func) |
| 2740 | return ambigious_refs(exps, e->l); |
| 2741 | return 0; |
| 2742 | } |
| 2743 | |
| 2744 | /* merge 2 projects into the lower one */ |
| 2745 | static sql_rel * |
| 2746 | rel_merge_projects(int *changes, mvc *sql, sql_rel *rel) |
| 2747 | { |
| 2748 | list *exps = rel->exps; |
| 2749 | sql_rel *prj = rel->l; |
| 2750 | node *n; |
| 2751 | |
| 2752 | if (rel->op == op_project && |
| 2753 | prj && prj->op == op_project && !(rel_is_ref(prj)) && !prj->r) { |
| 2754 | int all = 1; |
| 2755 | |
| 2756 | if (project_unsafe(rel,0) || project_unsafe(prj,0) || exps_share_expensive_exp(rel->exps, prj->exps)) |
| 2757 | return rel; |
| 2758 | |
| 2759 | /* here we need to fix aliases */ |
| 2760 | rel->exps = new_exp_list(sql->sa); |
| 2761 | |
| 2762 | /* for each exp check if we can rename it */ |
| 2763 | for (n = exps->h; n && all; n = n->next) { |
| 2764 | sql_exp *e = n->data, *ne = NULL; |
| 2765 | |
| 2766 | /* We do not handle expressions pointing back in the list */ |
| 2767 | if (ambigious_ref(exps, e)) { |
| 2768 | all = 0; |
| 2769 | break; |
| 2770 | } |
| 2771 | ne = exp_push_down_prj(sql, e, prj, prj->l); |
| 2772 | /* check if the refered alias name isn't used twice */ |
| 2773 | if (ne && ambigious_ref(rel->exps, ne)) { |
| 2774 | all = 0; |
| 2775 | break; |
| 2776 | } |
| 2777 | /* |
| 2778 | if (ne && ne->type == e_column) { |
| 2779 | sql_exp *nne = NULL; |
| 2780 | |
| 2781 | if (ne->l) |
| 2782 | nne = exps_bind_column2(rel->exps, ne->l, ne->r); |
| 2783 | if (!nne && !ne->l) |
| 2784 | nne = exps_bind_column(rel->exps, ne->r, NULL); |
| 2785 | if (nne && ne != nne && nne != e) { |
| 2786 | all = 0; |
| 2787 | break; |
| 2788 | } |
| 2789 | } |
| 2790 | */ |
| 2791 | if (ne) { |
| 2792 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 2793 | list_append(rel->exps, ne); |
| 2794 | } else { |
| 2795 | all = 0; |
| 2796 | } |
| 2797 | } |
| 2798 | if (all) { |
| 2799 | /* we can now remove the intermediate project */ |
| 2800 | /* push order by expressions */ |
| 2801 | if (rel->r) { |
| 2802 | list *nr = new_exp_list(sql->sa), *res = rel->r; |
| 2803 | for (n = res->h; n; n = n->next) { |
| 2804 | sql_exp *e = n->data, *ne = NULL; |
| 2805 | |
| 2806 | ne = exp_push_down_prj(sql, e, prj, prj->l); |
| 2807 | if (ne) { |
| 2808 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 2809 | list_append(nr, ne); |
| 2810 | } else { |
| 2811 | all = 0; |
| 2812 | } |
| 2813 | } |
| 2814 | if (all) { |
| 2815 | rel->r = nr; |
| 2816 | } else { |
| 2817 | /* leave as is */ |
| 2818 | rel->exps = exps; |
| 2819 | return rel; |
| 2820 | } |
| 2821 | } |
| 2822 | rel->l = prj->l; |
| 2823 | prj->l = NULL; |
| 2824 | rel_destroy(prj); |
| 2825 | (*changes)++; |
| 2826 | return rel_merge_projects(changes, sql, rel); |
| 2827 | } else { |
| 2828 | /* leave as is */ |
| 2829 | rel->exps = exps; |
| 2830 | } |
| 2831 | return rel; |
| 2832 | } |
| 2833 | return rel; |
| 2834 | } |
| 2835 | |
| 2836 | static sql_subfunc * |
| 2837 | find_func( mvc *sql, char *name, list *exps ) |
| 2838 | { |
| 2839 | list * l = new_func_list(sql->sa); |
| 2840 | node *n; |
| 2841 | |
| 2842 | for(n = exps->h; n; n = n->next) |
| 2843 | append(l, exp_subtype(n->data)); |
| 2844 | return sql_bind_func_(sql->sa, sql->session->schema, name, l, F_FUNC); |
| 2845 | } |
| 2846 | |
| 2847 | static sql_exp * exp_case_fixup( mvc *sql, sql_rel *rel, sql_exp *e, sql_exp *cc ); |
| 2848 | |
| 2849 | static list * |
| 2850 | exps_case_fixup( mvc *sql, list *exps, sql_exp *cond ) |
| 2851 | { |
| 2852 | node *n; |
| 2853 | |
| 2854 | if (exps) { |
| 2855 | list *nexps = new_exp_list(sql->sa); |
| 2856 | for( n = exps->h; n; n = n->next) { |
| 2857 | sql_exp *e = n->data; |
| 2858 | |
| 2859 | e = exp_case_fixup(sql, NULL, e, cond); |
| 2860 | append(nexps, e); |
| 2861 | } |
| 2862 | return nexps; |
| 2863 | } |
| 2864 | return exps; |
| 2865 | } |
| 2866 | |
| 2867 | static sql_exp * |
| 2868 | exp_case_fixup( mvc *sql, sql_rel *rel, sql_exp *e, sql_exp *cc ) |
| 2869 | { |
| 2870 | /* only functions need fix up */ |
| 2871 | if (e->type == e_psm) { |
| 2872 | if (e->flag & PSM_SET) { |
| 2873 | /* todo */ |
| 2874 | } else if (e->flag & PSM_VAR) { |
| 2875 | /* todo */ |
| 2876 | } else if (e->flag & PSM_RETURN) { |
| 2877 | e->l = exp_case_fixup(sql, rel, e->l, cc); |
| 2878 | } else if (e->flag & PSM_WHILE) { |
| 2879 | e->l = exp_case_fixup(sql, rel, e->l, cc); |
| 2880 | e->r = exps_case_fixup(sql, e->r, cc); |
| 2881 | } else if (e->flag & PSM_IF) { |
| 2882 | e->l = exp_case_fixup(sql, rel, e->l, cc); |
| 2883 | e->r = exps_case_fixup(sql, e->r, cc); |
| 2884 | if (e->f) |
| 2885 | e->f = exps_case_fixup(sql, e->f, cc); |
| 2886 | } else if (e->flag & PSM_REL || e->flag & PSM_EXCEPTION) { |
| 2887 | } |
| 2888 | return e; |
| 2889 | } |
| 2890 | if (e->type == e_func && e->l && !is_analytic(e) ) { |
| 2891 | list *l = new_exp_list(sql->sa), *args = e->l; |
| 2892 | node *n; |
| 2893 | sql_exp *ne = e; |
| 2894 | sql_subfunc *f = e->f; |
| 2895 | |
| 2896 | /* first fixup arguments */ |
| 2897 | if (f->func->s || strcmp(f->func->base.name, "ifthenelse" )) { |
| 2898 | for (n=args->h; n; n=n->next) { |
| 2899 | sql_exp *a = exp_case_fixup(sql, rel, n->data, cc); |
| 2900 | list_append(l, a); |
| 2901 | } |
| 2902 | ne = exp_op(sql->sa, l, f); |
| 2903 | exp_prop_alias(sql->sa, ne, e); |
| 2904 | if (cc && math_unsafe(f)) { |
| 2905 | /* only add one condition */ |
| 2906 | assert(f->func->varres || f->func->vararg || list_length(ne->l) == list_length(f->func->ops)); |
| 2907 | append(ne->l, cc); |
| 2908 | } |
| 2909 | } else { |
| 2910 | /* ifthenelse with one of the sides an 'sql_div' */ |
| 2911 | sql_exp *cond = args->h->data, *nne, *ncond = NULL; |
| 2912 | sql_exp *a1 = args->h->next->data; |
| 2913 | sql_exp *a2 = args->h->next->next->data; |
| 2914 | |
| 2915 | cond = exp_case_fixup(sql, rel, cond, cc); |
| 2916 | if (rel) { |
| 2917 | exp_label(sql->sa, cond, ++sql->label); |
| 2918 | append(rel->exps, cond); |
| 2919 | cond = exp_column(sql->sa, exp_find_rel_name(cond), exp_name(cond), exp_subtype(cond), cond->card, has_nil(cond), is_intern(cond)); |
| 2920 | } |
| 2921 | /* rewrite right hands of div */ |
| 2922 | ncond = cond; |
| 2923 | if (cc) { |
| 2924 | sql_subtype *t = exp_subtype(cc); |
| 2925 | sql_subfunc *f = sql_bind_func(sql->sa, NULL, "and" , t, t, F_FUNC); |
| 2926 | |
| 2927 | ncond = exp_binop(sql->sa, cc, ncond, f); |
| 2928 | } |
| 2929 | a1 = exp_case_fixup(sql, rel, a1, ncond); |
| 2930 | if (1){ |
| 2931 | sql_subtype *t = exp_subtype(cond); |
| 2932 | sql_subfunc *f = sql_bind_func(sql->sa, NULL, "not" , t, NULL, F_FUNC); |
| 2933 | sql_exp *nc; |
| 2934 | |
| 2935 | assert(f); |
| 2936 | nc = exp_unop(sql->sa, cond, f); |
| 2937 | if (cc) { |
| 2938 | sql_subtype *t = exp_subtype(cc); |
| 2939 | sql_subfunc *f = sql_bind_func(sql->sa, NULL, "and" , t, t, F_FUNC); |
| 2940 | |
| 2941 | nc = exp_binop(sql->sa, cc, nc, f); |
| 2942 | } |
| 2943 | a2 = exp_case_fixup(sql, rel, a2, nc); |
| 2944 | } |
| 2945 | assert(cond && a1 && a2); |
| 2946 | nne = exp_op3(sql->sa, cond, a1, a2, ne->f); |
| 2947 | exp_prop_alias(sql->sa, nne, ne); |
| 2948 | ne = nne; |
| 2949 | } |
| 2950 | return ne; |
| 2951 | } |
| 2952 | if (e->type == e_convert) { |
| 2953 | sql_exp *e1 = exp_case_fixup(sql, rel, e->l, cc); |
| 2954 | sql_exp *ne = exp_convert(sql->sa, e1, exp_fromtype(e), exp_totype(e)); |
| 2955 | |
| 2956 | exp_prop_alias(sql->sa, ne, e); |
| 2957 | return ne; |
| 2958 | } |
| 2959 | if (e->type == e_aggr) { |
| 2960 | list *l = NULL, *args = e->l; |
| 2961 | node *n; |
| 2962 | sql_exp *ne; |
| 2963 | sql_subaggr *f = e->f; |
| 2964 | |
| 2965 | /* first fixup arguments */ |
| 2966 | if (args) { |
| 2967 | l = new_exp_list(sql->sa); |
| 2968 | for (n=args->h; n; n=n->next) { |
| 2969 | sql_exp *a = exp_case_fixup(sql, rel, n->data, cc); |
| 2970 | list_append(l, a); |
| 2971 | } |
| 2972 | } |
| 2973 | ne = exp_aggr(sql->sa, l, f, need_distinct(e), need_no_nil(e), e->card, has_nil(e)); |
| 2974 | exp_prop_alias(sql->sa, ne, e); |
| 2975 | return ne; |
| 2976 | } |
| 2977 | return e; |
| 2978 | } |
| 2979 | |
| 2980 | static sql_rel * rel_case_fixup(int *changes, mvc *sql, sql_rel *rel, int top); |
| 2981 | static sql_exp * rewrite_case_exp(mvc *sql, sql_exp *e, int *has_changes); |
| 2982 | |
| 2983 | static sql_rel * |
| 2984 | rel_case_fixup_top(int *changes, mvc *sql, sql_rel *rel) |
| 2985 | { |
| 2986 | return rel_case_fixup(changes, sql, rel, 1); |
| 2987 | } |
| 2988 | |
| 2989 | static list * |
| 2990 | rewrite_case_exps(mvc *sql, list *l, int *has_changes) |
| 2991 | { |
| 2992 | node *n; |
| 2993 | |
| 2994 | if (!l) |
| 2995 | return l; |
| 2996 | for(n = l->h; n; n = n->next) |
| 2997 | n->data = rewrite_case_exp(sql, n->data, has_changes); |
| 2998 | return l; |
| 2999 | } |
| 3000 | |
| 3001 | |
| 3002 | static sql_exp * |
| 3003 | rewrite_case_exp(mvc *sql, sql_exp *e, int *has_changes) |
| 3004 | { |
| 3005 | if (e->type != e_psm) |
| 3006 | return e; |
| 3007 | if (e->flag & PSM_VAR) |
| 3008 | return e; |
| 3009 | if (e->flag & PSM_SET || e->flag & PSM_RETURN) { |
| 3010 | e->l = rewrite_case_exp(sql, e->l, has_changes); |
| 3011 | } |
| 3012 | if (e->flag & PSM_WHILE || e->flag & PSM_IF) { |
| 3013 | e->l = rewrite_case_exp(sql, e->l, has_changes); |
| 3014 | e->r = rewrite_case_exps(sql, e->r, has_changes); |
| 3015 | if (e->f) |
| 3016 | e->f = rewrite_case_exps(sql, e->f, has_changes); |
| 3017 | return e; |
| 3018 | } |
| 3019 | if ((e->flag & PSM_REL) && e->l) |
| 3020 | e->l = rel_case_fixup_top(has_changes, sql, e->l); |
| 3021 | if (e->flag & PSM_EXCEPTION) |
| 3022 | e->l = rewrite_case_exp(sql, e->l, has_changes); |
| 3023 | return e; |
| 3024 | } |
| 3025 | |
| 3026 | static sql_rel * |
| 3027 | rel_case_fixup(int *changes, mvc *sql, sql_rel *rel, int top) |
| 3028 | { |
| 3029 | (void)changes; /* only go through it once, ie don't mark for changes */ |
| 3030 | |
| 3031 | if (!top && rel_is_ref(rel)) |
| 3032 | return rel; |
| 3033 | if ((is_project(rel->op) || (rel->op == op_ddl && rel->flag == ddl_psm)) && rel->exps) { |
| 3034 | list *exps = rel->exps; |
| 3035 | node *n; |
| 3036 | int needed = 0; |
| 3037 | sql_rel *res = rel; |
| 3038 | int push_down = 0; |
| 3039 | |
| 3040 | for (n = exps->h; n && !needed; n = n->next) { |
| 3041 | sql_exp *e = n->data; |
| 3042 | |
| 3043 | if (e->type == e_func || e->type == e_convert || |
| 3044 | e->type == e_aggr || e->type == e_psm) |
| 3045 | needed = 1; |
| 3046 | } |
| 3047 | if (!needed) { |
| 3048 | if (rel->l) |
| 3049 | rel->l = rel_case_fixup(changes, sql, rel->l, is_topn(rel->op)?top:0); |
| 3050 | return rel; |
| 3051 | } |
| 3052 | |
| 3053 | /* get proper output first, then rewrite lower project (such that it can split expressions) */ |
| 3054 | push_down = is_simple_project(rel->op) && !rel->r && !rel_is_ref(rel); |
| 3055 | if (push_down) { |
| 3056 | if (top) |
| 3057 | res = rel_safe_project(sql, rel); |
| 3058 | else |
| 3059 | res = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 2)); |
| 3060 | if (need_distinct(rel)) |
| 3061 | set_distinct(res); |
| 3062 | } |
| 3063 | |
| 3064 | rel->exps = new_exp_list(sql->sa); |
| 3065 | for (n = exps->h; n; n = n->next) { |
| 3066 | sql_exp *e = exp_case_fixup( sql, push_down?rel:NULL, n->data, NULL ); |
| 3067 | |
| 3068 | if (!e) |
| 3069 | return NULL; |
| 3070 | list_append(rel->exps, e); |
| 3071 | } |
| 3072 | if (is_ddl(rel->op) && rel->flag == ddl_psm) |
| 3073 | rel->exps = rewrite_case_exps(sql, rel->exps, changes); |
| 3074 | if (rel->l) |
| 3075 | rel->l = rel_case_fixup(changes, sql, rel->l, is_topn(rel->op)?top:0); |
| 3076 | if (is_ddl(rel->op) && rel->r) |
| 3077 | rel->r = rel_case_fixup(changes, sql, rel->r, is_ddl(rel->op)?top:0); |
| 3078 | return res; |
| 3079 | } |
| 3080 | if (is_basetable(rel->op)) |
| 3081 | return rel; |
| 3082 | if (rel->l) |
| 3083 | rel->l = rel_case_fixup(changes, sql, rel->l, |
| 3084 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 3085 | if ((is_join(rel->op) || is_ddl(rel->op) || is_modify(rel->op) || is_set(rel->op)) && rel->r) |
| 3086 | rel->r = rel_case_fixup(changes, sql, rel->r, |
| 3087 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 3088 | return rel; |
| 3089 | } |
| 3090 | |
| 3091 | static sql_exp * |
| 3092 | exp_simplify_math( mvc *sql, sql_exp *e, int *changes) |
| 3093 | { |
| 3094 | if (e->type == e_func || e->type == e_aggr) { |
| 3095 | list *l = e->l; |
| 3096 | sql_subfunc *f = e->f; |
| 3097 | node *n; |
| 3098 | sql_exp *le; |
| 3099 | |
| 3100 | if (list_length(l) < 1) |
| 3101 | return e; |
| 3102 | |
| 3103 | le = l->h->data; |
| 3104 | if (!exp_subtype(le) || (!EC_COMPUTE(exp_subtype(le)->type->eclass) && exp_subtype(le)->type->eclass != EC_DEC)) |
| 3105 | return e; |
| 3106 | |
| 3107 | if (!f->func->s && list_length(l) == 2) { |
| 3108 | sql_exp *le = l->h->data; |
| 3109 | sql_exp *re = l->h->next->data; |
| 3110 | sql_subtype *et = exp_subtype(e); |
| 3111 | |
| 3112 | /* if one argument is NULL, return it, EXCEPT |
| 3113 | * if "_no_nil" is in the name of the |
| 3114 | * implementation function (currently either |
| 3115 | * min_no_nil or max_no_nil), in which case we |
| 3116 | * ignore the NULL and return the other |
| 3117 | * value */ |
| 3118 | if (exp_is_atom(le) && exp_is_null(sql, le)) { |
| 3119 | (*changes)++; |
| 3120 | if (f && f->func && f->func->imp && strstr(f->func->imp, "_no_nil" ) != NULL) { |
| 3121 | exp_setname(sql->sa, re, exp_relname(e), exp_name(e)); |
| 3122 | if (subtype_cmp(et, exp_subtype(re)) != 0) |
| 3123 | re = exp_convert(sql->sa, re, exp_subtype(re), et); |
| 3124 | return re; |
| 3125 | } |
| 3126 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3127 | if (subtype_cmp(et, exp_subtype(le)) != 0) |
| 3128 | le = exp_convert(sql->sa, le, exp_subtype(le), et); |
| 3129 | return le; |
| 3130 | } |
| 3131 | if (exp_is_atom(re) && exp_is_null(sql, re)) { |
| 3132 | (*changes)++; |
| 3133 | if (f && f->func && f->func->imp && strstr(f->func->imp, "_no_nil" ) != NULL) { |
| 3134 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3135 | if (subtype_cmp(et, exp_subtype(le)) != 0) |
| 3136 | le = exp_convert(sql->sa, le, exp_subtype(le), et); |
| 3137 | return le; |
| 3138 | } |
| 3139 | exp_setname(sql->sa, re, exp_relname(e), exp_name(e)); |
| 3140 | if (subtype_cmp(et, exp_subtype(re)) != 0) |
| 3141 | re = exp_convert(sql->sa, re, exp_subtype(re), et); |
| 3142 | return re; |
| 3143 | } |
| 3144 | } |
| 3145 | if (!f->func->s && !strcmp(f->func->base.name, "sql_mul" ) && list_length(l) == 2) { |
| 3146 | sql_exp *le = l->h->data; |
| 3147 | sql_exp *re = l->h->next->data; |
| 3148 | sql_subtype *et = exp_subtype(e); |
| 3149 | |
| 3150 | /* 0*a = 0 */ |
| 3151 | if (exp_is_atom(le) && exp_is_zero(sql, le) && exp_is_atom(re) && exp_is_not_null(sql, re)) { |
| 3152 | (*changes)++; |
| 3153 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3154 | if (subtype_cmp(et, exp_subtype(le)) != 0) |
| 3155 | le = exp_convert(sql->sa, le, exp_subtype(le), et); |
| 3156 | return le; |
| 3157 | } |
| 3158 | /* a*0 = 0 */ |
| 3159 | if (exp_is_atom(re) && exp_is_zero(sql, re) && exp_is_atom(le) && exp_is_not_null(sql, le)) { |
| 3160 | (*changes)++; |
| 3161 | exp_setname(sql->sa, re, exp_relname(e), exp_name(e)); |
| 3162 | if (subtype_cmp(et, exp_subtype(re)) != 0) |
| 3163 | re = exp_convert(sql->sa, re, exp_subtype(re), et); |
| 3164 | return re; |
| 3165 | } |
| 3166 | /* 1*a = a |
| 3167 | if (exp_is_atom(le) && exp_is_one(sql, le)) { |
| 3168 | (*changes)++; |
| 3169 | exp_setname(sql->sa, re, exp_relname(e), exp_name(e)); |
| 3170 | return re; |
| 3171 | } |
| 3172 | */ |
| 3173 | /* a*1 = a |
| 3174 | if (exp_is_atom(re) && exp_is_one(sql, re)) { |
| 3175 | (*changes)++; |
| 3176 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3177 | return le; |
| 3178 | } |
| 3179 | */ |
| 3180 | if (exp_is_atom(le) && exp_is_atom(re)) { |
| 3181 | atom *la = exp_flatten(sql, le); |
| 3182 | atom *ra = exp_flatten(sql, re); |
| 3183 | |
| 3184 | /* TODO check if output type is larger then input */ |
| 3185 | if (la && ra && subtype_cmp(atom_type(la), atom_type(ra)) == 0 && subtype_cmp(atom_type(la), exp_subtype(e)) == 0) { |
| 3186 | atom *a = atom_mul(la, ra); |
| 3187 | |
| 3188 | if (a && atom_cast(sql->sa, a, exp_subtype(e))) { |
| 3189 | sql_exp *ne = exp_atom(sql->sa, a); |
| 3190 | (*changes)++; |
| 3191 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3192 | return ne; |
| 3193 | } |
| 3194 | } |
| 3195 | } |
| 3196 | /* move constants to the right, ie c*A = A*c */ |
| 3197 | else if (exp_is_atom(le)) { |
| 3198 | l->h->data = re; |
| 3199 | l->h->next->data = le; |
| 3200 | e->f = sql_bind_func(sql->sa, NULL, "sql_mul" , exp_subtype(re), exp_subtype(le), F_FUNC); |
| 3201 | exp_sum_scales(e->f, re, le); |
| 3202 | (*changes)++; |
| 3203 | return e; |
| 3204 | } |
| 3205 | /* change a*a into pow(a,2), later change pow(a,2) back into a*a */ |
| 3206 | if (exp_equal(le, re)==0 && exp_subtype(le)->type->eclass == EC_FLT) { |
| 3207 | /* pow */ |
| 3208 | list *l; |
| 3209 | sql_exp *ne; |
| 3210 | sql_subfunc *pow = sql_bind_func(sql->sa, sql->session->schema, "power" , exp_subtype(le), exp_subtype(re), F_FUNC); |
| 3211 | assert(pow); |
| 3212 | if (exp_subtype(le)->type->localtype == TYPE_flt) |
| 3213 | re = exp_atom_flt(sql->sa, 2); |
| 3214 | else |
| 3215 | re = exp_atom_dbl(sql->sa, 2); |
| 3216 | l = sa_list(sql->sa); |
| 3217 | append(l, le); |
| 3218 | append(l, re); |
| 3219 | (*changes)++; |
| 3220 | ne = exp_op(sql->sa, l, pow); |
| 3221 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3222 | return ne; |
| 3223 | } |
| 3224 | /* change a*pow(a,n) or pow(a,n)*a into pow(a,n+1) */ |
| 3225 | if (is_func(le->type)) { |
| 3226 | list *l = le->l; |
| 3227 | sql_subfunc *f = le->f; |
| 3228 | |
| 3229 | if (!f->func->s && !strcmp(f->func->base.name, "power" ) && list_length(l) == 2) { |
| 3230 | sql_exp *lle = l->h->data; |
| 3231 | sql_exp *lre = l->h->next->data; |
| 3232 | if (exp_equal(re, lle)==0) { |
| 3233 | if (atom_inc(exp_value(sql, lre, sql->args, sql->argc))) { |
| 3234 | (*changes)++; |
| 3235 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3236 | return le; |
| 3237 | } |
| 3238 | } |
| 3239 | } |
| 3240 | if (!f->func->s && !strcmp(f->func->base.name, "sql_mul" ) && list_length(l) == 2) { |
| 3241 | sql_exp *lle = l->h->data; |
| 3242 | sql_exp *lre = l->h->next->data; |
| 3243 | if (!exp_is_atom(lle) && exp_is_atom(lre) && exp_is_atom(re)) { |
| 3244 | sql_subtype et = *exp_subtype(e); |
| 3245 | /* (x*c1)*c2 -> x * (c1*c2) */ |
| 3246 | list *l = sa_list(sql->sa); |
| 3247 | append(l, lre); |
| 3248 | append(l, re); |
| 3249 | le->l = l; |
| 3250 | le->f = sql_bind_func(sql->sa, NULL, "sql_mul" , exp_subtype(lre), exp_subtype(re), F_FUNC); |
| 3251 | exp_sum_scales(le->f, lre, re); |
| 3252 | l = e->l; |
| 3253 | l->h->data = lle; |
| 3254 | l->h->next->data = le; |
| 3255 | e->f = sql_bind_func(sql->sa, NULL, "sql_mul" , exp_subtype(lle), exp_subtype(le), F_FUNC); |
| 3256 | exp_sum_scales(e->f, lle, le); |
| 3257 | if (subtype_cmp(&et, exp_subtype(e)) != 0) |
| 3258 | e = exp_convert(sql->sa, e, exp_subtype(e), &et); |
| 3259 | (*changes)++; |
| 3260 | return e; |
| 3261 | } |
| 3262 | } |
| 3263 | } |
| 3264 | } |
| 3265 | if (!f->func->s && !strcmp(f->func->base.name, "sql_add" ) && list_length(l) == 2) { |
| 3266 | sql_exp *le = l->h->data; |
| 3267 | sql_exp *re = l->h->next->data; |
| 3268 | if (exp_is_atom(le) && exp_is_zero(sql, le)) { |
| 3269 | (*changes)++; |
| 3270 | exp_setname(sql->sa, re, exp_relname(e), exp_name(e)); |
| 3271 | return re; |
| 3272 | } |
| 3273 | if (exp_is_atom(re) && exp_is_zero(sql, re)) { |
| 3274 | (*changes)++; |
| 3275 | exp_setname(sql->sa, le, exp_relname(e), exp_name(e)); |
| 3276 | return le; |
| 3277 | } |
| 3278 | if (exp_is_atom(le) && exp_is_atom(re)) { |
| 3279 | atom *la = exp_flatten(sql, le); |
| 3280 | atom *ra = exp_flatten(sql, re); |
| 3281 | |
| 3282 | if (la && ra) { |
| 3283 | atom *a = atom_add(la, ra); |
| 3284 | |
| 3285 | if (a) { |
| 3286 | sql_exp *ne = exp_atom(sql->sa, a); |
| 3287 | (*changes)++; |
| 3288 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3289 | return ne; |
| 3290 | } |
| 3291 | } |
| 3292 | } |
| 3293 | /* move constants to the right, ie c+A = A+c */ |
| 3294 | else if (exp_is_atom(le)) { |
| 3295 | l->h->data = re; |
| 3296 | l->h->next->data = le; |
| 3297 | (*changes)++; |
| 3298 | return e; |
| 3299 | } else if (is_func(le->type)) { |
| 3300 | list *ll = le->l; |
| 3301 | sql_subfunc *f = le->f; |
| 3302 | if (!f->func->s && !strcmp(f->func->base.name, "sql_add" ) && list_length(ll) == 2) { |
| 3303 | sql_exp *lle = ll->h->data; |
| 3304 | sql_exp *lre = ll->h->next->data; |
| 3305 | |
| 3306 | if (exp_is_atom(lle) && exp_is_atom(lre)) |
| 3307 | return e; |
| 3308 | if (!exp_is_atom(re) && exp_is_atom(lre)) { |
| 3309 | /* (x+c1)+y -> (x+y) + c1 */ |
| 3310 | ll->h->next->data = re; |
| 3311 | l->h->next->data = lre; |
| 3312 | l->h->data = exp_simplify_math(sql, le, changes); |
| 3313 | (*changes)++; |
| 3314 | return e; |
| 3315 | } |
| 3316 | if (exp_is_atom(re) && exp_is_atom(lre)) { |
| 3317 | /* (x+c1)+c2 -> (c2+c1) + x */ |
| 3318 | ll->h->data = re; |
| 3319 | l->h->next->data = lle; |
| 3320 | l->h->data = exp_simplify_math(sql, le, changes); |
| 3321 | (*changes)++; |
| 3322 | return e; |
| 3323 | } |
| 3324 | } |
| 3325 | } |
| 3326 | /* |
| 3327 | if (is_func(re->type)) { |
| 3328 | list *ll = re->l; |
| 3329 | sql_subfunc *f = re->f; |
| 3330 | if (!f->func->s && !strcmp(f->func->base.name, "sql_add") && list_length(ll) == 2) { |
| 3331 | if (exp_is_atom(le)) { |
| 3332 | * c1+(x+y) -> (x+y) + c1 * |
| 3333 | l->h->data = re; |
| 3334 | l->h->next->data = le; |
| 3335 | (*changes)++; |
| 3336 | return e; |
| 3337 | } |
| 3338 | } |
| 3339 | } |
| 3340 | */ |
| 3341 | } |
| 3342 | if (!f->func->s && !strcmp(f->func->base.name, "sql_sub" ) && list_length(l) == 2) { |
| 3343 | sql_exp *le = l->h->data; |
| 3344 | sql_exp *re = l->h->next->data; |
| 3345 | |
| 3346 | if (exp_is_atom(le) && exp_is_atom(re)) { |
| 3347 | atom *la = exp_flatten(sql, le); |
| 3348 | atom *ra = exp_flatten(sql, re); |
| 3349 | |
| 3350 | if (la && ra) { |
| 3351 | atom *a = atom_sub(la, ra); |
| 3352 | |
| 3353 | if (a) { |
| 3354 | sql_exp *ne = exp_atom(sql->sa, a); |
| 3355 | (*changes)++; |
| 3356 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3357 | return ne; |
| 3358 | } |
| 3359 | } |
| 3360 | } |
| 3361 | if (exp_equal(le,re) == 0) { /* a - a = 0 */ |
| 3362 | atom *a; |
| 3363 | sql_exp *ne; |
| 3364 | |
| 3365 | if (exp_subtype(le)->type->eclass == EC_NUM) { |
| 3366 | a = atom_int(sql->sa, exp_subtype(le), 0); |
| 3367 | } else if (exp_subtype(le)->type->eclass == EC_FLT) { |
| 3368 | a = atom_float(sql->sa, exp_subtype(le), 0); |
| 3369 | } else { |
| 3370 | return e; |
| 3371 | } |
| 3372 | ne = exp_atom(sql->sa, a); |
| 3373 | (*changes)++; |
| 3374 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3375 | return ne; |
| 3376 | } |
| 3377 | if (is_func(le->type)) { |
| 3378 | list *ll = le->l; |
| 3379 | sql_subfunc *f = le->f; |
| 3380 | if (!f->func->s && !strcmp(f->func->base.name, "sql_add" ) && list_length(ll) == 2) { |
| 3381 | sql_exp *lle = ll->h->data; |
| 3382 | sql_exp *lre = ll->h->next->data; |
| 3383 | if (exp_equal(re, lre) == 0) { |
| 3384 | /* (x+a)-a = x*/ |
| 3385 | exp_setname(sql->sa, lle, exp_relname(e), exp_name(e)); |
| 3386 | (*changes)++; |
| 3387 | return lle; |
| 3388 | } |
| 3389 | if (exp_is_atom(lle) && exp_is_atom(lre)) |
| 3390 | return e; |
| 3391 | if (!exp_is_atom(re) && exp_is_atom(lre)) { |
| 3392 | /* (x+c1)-y -> (x-y) + c1 */ |
| 3393 | ll->h->next->data = re; |
| 3394 | l->h->next->data = lre; |
| 3395 | le->f = e->f; |
| 3396 | e->f = f; |
| 3397 | l->h->data = exp_simplify_math(sql, le, changes); |
| 3398 | (*changes)++; |
| 3399 | return e; |
| 3400 | } |
| 3401 | if (exp_is_atom(re) && exp_is_atom(lre)) { |
| 3402 | /* (x+c1)-c2 -> (c1-c2) + x */ |
| 3403 | ll->h->data = lre; |
| 3404 | ll->h->next->data = re; |
| 3405 | l->h->next->data = lle; |
| 3406 | le->f = e->f; |
| 3407 | e->f = f; |
| 3408 | l->h->data = exp_simplify_math(sql, le, changes); |
| 3409 | (*changes)++; |
| 3410 | return e; |
| 3411 | } |
| 3412 | } |
| 3413 | } |
| 3414 | } |
| 3415 | if (l) |
| 3416 | for (n = l->h; n; n = n->next) |
| 3417 | n->data = exp_simplify_math(sql, n->data, changes); |
| 3418 | } |
| 3419 | if (e->type == e_convert) |
| 3420 | e->l = exp_simplify_math(sql, e->l, changes); |
| 3421 | return e; |
| 3422 | } |
| 3423 | |
| 3424 | static sql_rel * |
| 3425 | rel_simplify_math(int *changes, mvc *sql, sql_rel *rel) |
| 3426 | { |
| 3427 | |
| 3428 | if ((is_project(rel->op) || (rel->op == op_ddl && rel->flag == ddl_psm)) && rel->exps) { |
| 3429 | list *exps = rel->exps; |
| 3430 | node *n; |
| 3431 | int needed = 0; |
| 3432 | |
| 3433 | for (n = exps->h; n && !needed; n = n->next) { |
| 3434 | sql_exp *e = n->data; |
| 3435 | |
| 3436 | if (e->type == e_func || e->type == e_convert || |
| 3437 | e->type == e_aggr || e->type == e_psm) |
| 3438 | needed = 1; |
| 3439 | } |
| 3440 | if (!needed) |
| 3441 | return rel; |
| 3442 | |
| 3443 | rel->exps = new_exp_list(sql->sa); |
| 3444 | for (n = exps->h; n; n = n->next) { |
| 3445 | sql_exp *e = exp_simplify_math( sql, n->data, changes); |
| 3446 | |
| 3447 | if (!e) |
| 3448 | return NULL; |
| 3449 | list_append(rel->exps, e); |
| 3450 | } |
| 3451 | } |
| 3452 | if (*changes) /* if rewritten don't cache this query */ |
| 3453 | sql->caching = 0; |
| 3454 | return rel; |
| 3455 | } |
| 3456 | |
| 3457 | static sql_rel * |
| 3458 | rel_find_ref( sql_rel *r) |
| 3459 | { |
| 3460 | while (!rel_is_ref(r) && r->l && |
| 3461 | (is_project(r->op) || is_select(r->op) /*|| is_join(r->op)*/)) |
| 3462 | r = r->l; |
| 3463 | if (rel_is_ref(r)) |
| 3464 | return r; |
| 3465 | return NULL; |
| 3466 | } |
| 3467 | |
| 3468 | static sql_rel * |
| 3469 | rel_find_select( sql_rel *r) |
| 3470 | { |
| 3471 | while (!is_select(r->op) && r->l && is_project(r->op)) |
| 3472 | r = r->l; |
| 3473 | if (is_select(r->op)) |
| 3474 | return r; |
| 3475 | return NULL; |
| 3476 | } |
| 3477 | |
| 3478 | static int |
| 3479 | rel_match_projections(sql_rel *l, sql_rel *r) |
| 3480 | { |
| 3481 | node *n, *m; |
| 3482 | list *le = l->exps; |
| 3483 | list *re = r->exps; |
| 3484 | |
| 3485 | if (!le || !re) |
| 3486 | return 0; |
| 3487 | if (list_length(le) != list_length(re)) |
| 3488 | return 0; |
| 3489 | |
| 3490 | for (n = le->h, m = re->h; n && m; n = n->next, m = m->next) |
| 3491 | if (!exp_match(n->data, m->data)) |
| 3492 | return 0; |
| 3493 | return 1; |
| 3494 | } |
| 3495 | |
| 3496 | static int |
| 3497 | exps_has_predicate( list *l ) |
| 3498 | { |
| 3499 | node *n; |
| 3500 | |
| 3501 | for( n = l->h; n; n = n->next){ |
| 3502 | sql_exp *e = n->data; |
| 3503 | |
| 3504 | if (e->card <= CARD_ATOM) |
| 3505 | return 1; |
| 3506 | } |
| 3507 | return 0; |
| 3508 | } |
| 3509 | |
| 3510 | static sql_rel * |
| 3511 | rel_merge_union(int *changes, mvc *sql, sql_rel *rel) |
| 3512 | { |
| 3513 | sql_rel *l = rel->l; |
| 3514 | sql_rel *r = rel->r; |
| 3515 | sql_rel *ref = NULL; |
| 3516 | |
| 3517 | if (is_union(rel->op) && |
| 3518 | l && is_project(l->op) && !project_unsafe(l,0) && |
| 3519 | r && is_project(r->op) && !project_unsafe(r,0) && |
| 3520 | (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) { |
| 3521 | /* Find selects and try to merge */ |
| 3522 | sql_rel *ls = rel_find_select(l); |
| 3523 | sql_rel *rs = rel_find_select(r); |
| 3524 | |
| 3525 | /* can we merge ? */ |
| 3526 | if (!ls || !rs) |
| 3527 | return rel; |
| 3528 | |
| 3529 | /* merge any extra projects */ |
| 3530 | if (l->l != ls) |
| 3531 | rel->l = l = rel_merge_projects(changes, sql, l); |
| 3532 | if (r->l != rs) |
| 3533 | rel->r = r = rel_merge_projects(changes, sql, r); |
| 3534 | |
| 3535 | if (!rel_match_projections(l,r)) |
| 3536 | return rel; |
| 3537 | |
| 3538 | /* for now only union(project*(select(R),project*(select(R))) */ |
| 3539 | if (ls != l->l || rs != r->l || |
| 3540 | ls->l != rs->l || !rel_is_ref(ls->l)) |
| 3541 | return rel; |
| 3542 | |
| 3543 | if (!ls->exps || !rs->exps || |
| 3544 | exps_has_predicate(ls->exps) || |
| 3545 | exps_has_predicate(rs->exps)) |
| 3546 | return rel; |
| 3547 | |
| 3548 | /* merge, ie. add 'or exp' */ |
| 3549 | (*changes)++; |
| 3550 | ls->exps = append(new_exp_list(sql->sa), exp_or(sql->sa, ls->exps, rs->exps, 0)); |
| 3551 | rs->exps = NULL; |
| 3552 | rel = rel_inplace_project(sql->sa, rel, rel_dup(rel->l), rel->exps); |
| 3553 | set_processed(rel); |
| 3554 | return rel; |
| 3555 | } |
| 3556 | return rel; |
| 3557 | } |
| 3558 | |
| 3559 | static int |
| 3560 | exps_cse( mvc *sql, list *oexps, list *l, list *r ) |
| 3561 | { |
| 3562 | list *nexps; |
| 3563 | node *n, *m; |
| 3564 | char *lu, *ru; |
| 3565 | int lc = 0, rc = 0, match = 0, res = 0; |
| 3566 | |
| 3567 | /* first recusive exps_cse */ |
| 3568 | nexps = new_exp_list(sql->sa); |
| 3569 | for (n = l->h; n; n = n->next) { |
| 3570 | sql_exp *e = n->data; |
| 3571 | |
| 3572 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3573 | res = exps_cse(sql, nexps, e->l, e->r); |
| 3574 | } else { |
| 3575 | append(nexps, e); |
| 3576 | } |
| 3577 | } |
| 3578 | l = nexps; |
| 3579 | |
| 3580 | nexps = new_exp_list(sql->sa); |
| 3581 | for (n = r->h; n; n = n->next) { |
| 3582 | sql_exp *e = n->data; |
| 3583 | |
| 3584 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3585 | res = exps_cse(sql, nexps, e->l, e->r); |
| 3586 | } else { |
| 3587 | append(nexps, e); |
| 3588 | } |
| 3589 | } |
| 3590 | r = nexps; |
| 3591 | |
| 3592 | /* simplify true or .. and .. or true */ |
| 3593 | if (list_length(l) == list_length(r) && list_length(l) == 1) { |
| 3594 | sql_exp *le = l->h->data, *re = r->h->data; |
| 3595 | |
| 3596 | if (exp_is_true(sql, le)) { |
| 3597 | append(oexps, le); |
| 3598 | return 1; |
| 3599 | } |
| 3600 | if (exp_is_true(sql, re)) { |
| 3601 | append(oexps, re); |
| 3602 | return 1; |
| 3603 | } |
| 3604 | } |
| 3605 | |
| 3606 | lu = calloc(list_length(l), sizeof(char)); |
| 3607 | ru = calloc(list_length(r), sizeof(char)); |
| 3608 | for (n = l->h, lc = 0; n; n = n->next, lc++) { |
| 3609 | sql_exp *le = n->data; |
| 3610 | |
| 3611 | for ( m = r->h, rc = 0; m; m = m->next, rc++) { |
| 3612 | sql_exp *re = m->data; |
| 3613 | |
| 3614 | if (!ru[rc] && exp_match_exp(le,re)) { |
| 3615 | lu[lc] = 1; |
| 3616 | ru[rc] = 1; |
| 3617 | match = 1; |
| 3618 | } |
| 3619 | } |
| 3620 | } |
| 3621 | if (match) { |
| 3622 | list *nl = new_exp_list(sql->sa); |
| 3623 | list *nr = new_exp_list(sql->sa); |
| 3624 | |
| 3625 | for (n = l->h, lc = 0; n; n = n->next, lc++) |
| 3626 | if (!lu[lc]) |
| 3627 | append(nl, n->data); |
| 3628 | for (n = r->h, rc = 0; n; n = n->next, rc++) |
| 3629 | if (!ru[rc]) |
| 3630 | append(nr, n->data); |
| 3631 | |
| 3632 | if (list_length(nl) && list_length(nr)) |
| 3633 | append(oexps, exp_or(sql->sa, nl, nr, 0)); |
| 3634 | |
| 3635 | for (n = l->h, lc = 0; n; n = n->next, lc++) { |
| 3636 | if (lu[lc]) |
| 3637 | append(oexps, n->data); |
| 3638 | } |
| 3639 | res = 1; |
| 3640 | } else { |
| 3641 | append(oexps, exp_or(sql->sa, list_dup(l, (fdup)NULL), |
| 3642 | list_dup(r, (fdup)NULL), 0)); |
| 3643 | } |
| 3644 | free(lu); |
| 3645 | free(ru); |
| 3646 | return res; |
| 3647 | } |
| 3648 | |
| 3649 | static int |
| 3650 | are_equality_exps( list *exps, sql_exp **L) |
| 3651 | { |
| 3652 | sql_exp *l = *L; |
| 3653 | |
| 3654 | if (list_length(exps) == 1) { |
| 3655 | sql_exp *e = exps->h->data, *le = e->l, *re = e->r; |
| 3656 | |
| 3657 | if (e->type == e_cmp && e->flag == cmp_equal && le->card != CARD_ATOM && re->card == CARD_ATOM) { |
| 3658 | if (!l) { |
| 3659 | *L = l = le; |
| 3660 | if (!is_column(le->type)) |
| 3661 | return 0; |
| 3662 | } |
| 3663 | return (exp_match(l, le)); |
| 3664 | } |
| 3665 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) |
| 3666 | return (are_equality_exps(e->l, L) && |
| 3667 | are_equality_exps(e->r, L)); |
| 3668 | } |
| 3669 | return 0; |
| 3670 | } |
| 3671 | |
| 3672 | static void |
| 3673 | get_exps( list *n, list *l ) |
| 3674 | { |
| 3675 | sql_exp *e = l->h->data, *re = e->r; |
| 3676 | |
| 3677 | if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM) |
| 3678 | list_append(n, re); |
| 3679 | if (e->type == e_cmp && e->flag == cmp_or) { |
| 3680 | get_exps(n, e->l); |
| 3681 | get_exps(n, e->r); |
| 3682 | } |
| 3683 | } |
| 3684 | |
| 3685 | static sql_exp * |
| 3686 | equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r) |
| 3687 | { |
| 3688 | list *nl = new_exp_list(sql->sa); |
| 3689 | |
| 3690 | get_exps(nl, l); |
| 3691 | get_exps(nl, r); |
| 3692 | |
| 3693 | return exp_in( sql->sa, ce, nl, cmp_in); |
| 3694 | } |
| 3695 | |
| 3696 | static sql_rel * |
| 3697 | rel_select_cse(int *changes, mvc *sql, sql_rel *rel) |
| 3698 | { |
| 3699 | if (is_select(rel->op) && rel->exps) { |
| 3700 | node *n; |
| 3701 | list *nexps; |
| 3702 | int needed = 0; |
| 3703 | |
| 3704 | for (n=rel->exps->h; n && !needed; n = n->next) { |
| 3705 | sql_exp *e = n->data; |
| 3706 | |
| 3707 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) |
| 3708 | needed = 1; |
| 3709 | } |
| 3710 | if (!needed) |
| 3711 | return rel; |
| 3712 | |
| 3713 | nexps = new_exp_list(sql->sa); |
| 3714 | for (n=rel->exps->h; n; n = n->next) { |
| 3715 | sql_exp *e = n->data, *l = NULL; |
| 3716 | |
| 3717 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && are_equality_exps(e->l, &l) && are_equality_exps(e->r, &l) && l) { |
| 3718 | (*changes)++; |
| 3719 | append(nexps, equality_exps_2_in(sql, l, e->l, e->r)); |
| 3720 | } else { |
| 3721 | append(nexps, e); |
| 3722 | } |
| 3723 | } |
| 3724 | rel->exps = nexps; |
| 3725 | } |
| 3726 | if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) { |
| 3727 | node *n; |
| 3728 | list *nexps; |
| 3729 | int needed = 0; |
| 3730 | |
| 3731 | for (n=rel->exps->h; n && !needed; n = n->next) { |
| 3732 | sql_exp *e = n->data; |
| 3733 | |
| 3734 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) |
| 3735 | needed = 1; |
| 3736 | } |
| 3737 | if (!needed) |
| 3738 | return rel; |
| 3739 | nexps = new_exp_list(sql->sa); |
| 3740 | for (n=rel->exps->h; n; n = n->next) { |
| 3741 | sql_exp *e = n->data; |
| 3742 | |
| 3743 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3744 | /* split the common expressions */ |
| 3745 | *changes += exps_cse(sql, nexps, e->l, e->r); |
| 3746 | } else { |
| 3747 | append(nexps, e); |
| 3748 | } |
| 3749 | } |
| 3750 | rel->exps = nexps; |
| 3751 | } |
| 3752 | return rel; |
| 3753 | } |
| 3754 | |
| 3755 | static sql_rel * |
| 3756 | rel_project_cse(int *changes, mvc *sql, sql_rel *rel) |
| 3757 | { |
| 3758 | (void)changes; |
| 3759 | if (is_project(rel->op) && rel->exps) { |
| 3760 | node *n, *m; |
| 3761 | list *nexps; |
| 3762 | int needed = 0; |
| 3763 | |
| 3764 | for (n=rel->exps->h; n && !needed; n = n->next) { |
| 3765 | sql_exp *e1 = n->data; |
| 3766 | |
| 3767 | if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) { |
| 3768 | for (m=n->next; m; m = m->next){ |
| 3769 | sql_exp *e2 = m->data; |
| 3770 | |
| 3771 | if (exp_name(e2) && exp_match_exp(e1, e2)) |
| 3772 | needed = 1; |
| 3773 | } |
| 3774 | } |
| 3775 | } |
| 3776 | |
| 3777 | if (!needed) |
| 3778 | return rel; |
| 3779 | |
| 3780 | nexps = new_exp_list(sql->sa); |
| 3781 | for (n=rel->exps->h; n; n = n->next) { |
| 3782 | sql_exp *e1 = n->data; |
| 3783 | |
| 3784 | if (e1->type != e_column && !exp_is_atom(e1) && exp_name(e1)) { |
| 3785 | for (m=nexps->h; m; m = m->next){ |
| 3786 | sql_exp *e2 = m->data; |
| 3787 | |
| 3788 | if (exp_name(e2) && exp_match_exp(e1, e2)) { |
| 3789 | sql_exp *ne = exp_alias(sql->sa, exp_relname(e1), exp_name(e1), exp_relname(e2), exp_name(e2), exp_subtype(e2), e2->card, has_nil(e2), is_intern(e1)); |
| 3790 | |
| 3791 | ne = exp_propagate(sql->sa, ne, e1); |
| 3792 | e1 = ne; |
| 3793 | break; |
| 3794 | } |
| 3795 | } |
| 3796 | } |
| 3797 | append(nexps, e1); |
| 3798 | } |
| 3799 | rel->exps = nexps; |
| 3800 | } |
| 3801 | return rel; |
| 3802 | } |
| 3803 | |
| 3804 | static list * |
| 3805 | exps_merge_select_rse( mvc *sql, list *l, list *r ) |
| 3806 | { |
| 3807 | node *n, *m, *o; |
| 3808 | list *nexps = NULL, *lexps, *rexps; |
| 3809 | |
| 3810 | lexps = new_exp_list(sql->sa); |
| 3811 | for (n = l->h; n; n = n->next) { |
| 3812 | sql_exp *e = n->data; |
| 3813 | |
| 3814 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3815 | list *nexps = exps_merge_select_rse(sql, e->l, e->r); |
| 3816 | for (o = nexps->h; o; o = o->next) |
| 3817 | append(lexps, o->data); |
| 3818 | } else { |
| 3819 | append(lexps, e); |
| 3820 | } |
| 3821 | } |
| 3822 | rexps = new_exp_list(sql->sa); |
| 3823 | for (n = r->h; n; n = n->next) { |
| 3824 | sql_exp *e = n->data; |
| 3825 | |
| 3826 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3827 | list *nexps = exps_merge_select_rse(sql, e->l, e->r); |
| 3828 | for (o = nexps->h; o; o = o->next) |
| 3829 | append(rexps, o->data); |
| 3830 | } else { |
| 3831 | append(rexps, e); |
| 3832 | } |
| 3833 | } |
| 3834 | |
| 3835 | nexps = new_exp_list(sql->sa); |
| 3836 | |
| 3837 | /* merge merged lists first ? */ |
| 3838 | for (n = lexps->h; n; n = n->next) { |
| 3839 | sql_exp *le = n->data, *re, *fnd = NULL; |
| 3840 | |
| 3841 | if (le->type != e_cmp || le->flag == cmp_or || is_anti(le)) |
| 3842 | continue; |
| 3843 | for (m = rexps->h; !fnd && m; m = m->next) { |
| 3844 | re = m->data; |
| 3845 | if (exps_match_col_exps(le, re)) |
| 3846 | fnd = re; |
| 3847 | } |
| 3848 | if (fnd && is_anti(fnd)) |
| 3849 | continue; |
| 3850 | /* cases |
| 3851 | * 1) 2 values (cmp_equal) |
| 3852 | * 2) 1 value (cmp_equal), and cmp_in |
| 3853 | * (also cmp_in, cmp_equal) |
| 3854 | * 3) 2 cmp_in |
| 3855 | * 4) ranges |
| 3856 | */ |
| 3857 | if (fnd) { |
| 3858 | re = fnd; |
| 3859 | fnd = NULL; |
| 3860 | if (le->anti || re->anti) |
| 3861 | continue; |
| 3862 | if (le->flag == cmp_equal && re->flag == cmp_equal) { |
| 3863 | list *exps = new_exp_list(sql->sa); |
| 3864 | |
| 3865 | append(exps, le->r); |
| 3866 | append(exps, re->r); |
| 3867 | fnd = exp_in(sql->sa, le->l, exps, cmp_in); |
| 3868 | } else if (le->flag == cmp_equal && re->flag == cmp_in){ |
| 3869 | list *exps = new_exp_list(sql->sa); |
| 3870 | |
| 3871 | append(exps, le->r); |
| 3872 | list_merge(exps, re->r, NULL); |
| 3873 | fnd = exp_in(sql->sa, le->l, exps, cmp_in); |
| 3874 | } else if (le->flag == cmp_in && re->flag == cmp_equal){ |
| 3875 | list *exps = new_exp_list(sql->sa); |
| 3876 | |
| 3877 | append(exps, re->r); |
| 3878 | list_merge(exps, le->r, NULL); |
| 3879 | fnd = exp_in(sql->sa, le->l, exps, cmp_in); |
| 3880 | } else if (le->flag == cmp_in && re->flag == cmp_in){ |
| 3881 | list *exps = new_exp_list(sql->sa); |
| 3882 | |
| 3883 | list_merge(exps, le->r, NULL); |
| 3884 | list_merge(exps, re->r, NULL); |
| 3885 | fnd = exp_in(sql->sa, le->l, exps, cmp_in); |
| 3886 | } else if (le->f && re->f && /* merge ranges */ |
| 3887 | le->flag == re->flag && le->flag <= cmp_lt) { |
| 3888 | sql_subfunc *min = sql_bind_func(sql->sa, sql->session->schema, "sql_min" , exp_subtype(le->r), exp_subtype(re->r), F_FUNC); |
| 3889 | sql_subfunc *max = sql_bind_func(sql->sa, sql->session->schema, "sql_max" , exp_subtype(le->f), exp_subtype(re->f), F_FUNC); |
| 3890 | sql_exp *mine, *maxe; |
| 3891 | |
| 3892 | if (!min || !max) |
| 3893 | continue; |
| 3894 | mine = exp_binop(sql->sa, le->r, re->r, min); |
| 3895 | maxe = exp_binop(sql->sa, le->f, re->f, max); |
| 3896 | fnd = exp_compare2(sql->sa, le->l, mine, maxe, le->flag); |
| 3897 | } |
| 3898 | if (fnd) |
| 3899 | append(nexps, fnd); |
| 3900 | } |
| 3901 | } |
| 3902 | return nexps; |
| 3903 | } |
| 3904 | |
| 3905 | static list * |
| 3906 | exps_merge_project_rse( mvc *sql, list *exps) |
| 3907 | { |
| 3908 | node *n; |
| 3909 | list *nexps = NULL; |
| 3910 | |
| 3911 | nexps = new_exp_list(sql->sa); |
| 3912 | for (n = exps->h; n; n = n->next) { |
| 3913 | sql_exp *e = n->data; |
| 3914 | |
| 3915 | if (is_func(e->type) && e->l) { |
| 3916 | list *fexps = e->l; |
| 3917 | sql_subfunc *f = e->f; |
| 3918 | |
| 3919 | /* is and function */ |
| 3920 | if (strcmp(f->func->base.name, "and" ) == 0 && list_length(fexps) == 2) { |
| 3921 | sql_exp *l = list_fetch(fexps, 0); |
| 3922 | sql_exp *r = list_fetch(fexps, 1); |
| 3923 | |
| 3924 | /* check merge into single between */ |
| 3925 | if (is_func(l->type) && is_func(r->type)) { |
| 3926 | list *lfexps = l->l; |
| 3927 | list *rfexps = r->l; |
| 3928 | sql_subfunc *lf = l->f; |
| 3929 | sql_subfunc *rf = r->f; |
| 3930 | |
| 3931 | if (((strcmp(lf->func->base.name, ">=" ) == 0 || strcmp(lf->func->base.name, ">" ) == 0) && list_length(lfexps) == 2) && |
| 3932 | ((strcmp(rf->func->base.name, "<=" ) == 0 || strcmp(rf->func->base.name, "<" ) == 0) && list_length(rfexps) == 2) |
| 3933 | && exp_equal(list_fetch(lfexps,0), list_fetch(rfexps,0)) == 0) { |
| 3934 | sql_exp *ce = list_fetch(lfexps, 0); |
| 3935 | list *types, *ops = sa_list(sql->sa); |
| 3936 | sql_subfunc *between; |
| 3937 | |
| 3938 | append(ops, ce); |
| 3939 | append(ops, list_fetch(lfexps, 1)); |
| 3940 | append(ops, list_fetch(rfexps, 1)); |
| 3941 | append(ops, exp_atom_bool(sql->sa, 0)); /* non symetrical */ |
| 3942 | append(ops, exp_atom_bool(sql->sa, lf->func->base.name[1] == '=')); /* left inclusive */ |
| 3943 | append(ops, exp_atom_bool(sql->sa, rf->func->base.name[1] == '=')); /* right exclusive */ |
| 3944 | append(ops, exp_atom_bool(sql->sa, 0)); /* nils_false */ |
| 3945 | append(ops, exp_atom_bool(sql->sa, 0)); /* anti */ |
| 3946 | |
| 3947 | types = exp_types(sql->sa, ops); |
| 3948 | /* convert into between */ |
| 3949 | between = sql_bind_func_(sql->sa, mvc_bind_schema(sql, "sys" ), "between" , types, F_FUNC); |
| 3950 | if (between) { |
| 3951 | sql_exp *ne = exp_op(sql->sa, ops, between); |
| 3952 | |
| 3953 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 3954 | e = ne; |
| 3955 | } |
| 3956 | } |
| 3957 | } |
| 3958 | } else { |
| 3959 | e->l = exps_merge_project_rse(sql, fexps); |
| 3960 | } |
| 3961 | } |
| 3962 | append(nexps, e); |
| 3963 | } |
| 3964 | return nexps; |
| 3965 | } |
| 3966 | |
| 3967 | /* merge related sub expressions |
| 3968 | * |
| 3969 | * ie (x = a and y > 1 and y < 5) or |
| 3970 | * (x = c and y > 1 and y < 10) or |
| 3971 | * (x = e and y > 1 and y < 20) |
| 3972 | * -> |
| 3973 | * ((x = a and y > 1 and y < 5) or |
| 3974 | * (x = c and y > 1 and y < 10) or |
| 3975 | * (x = e and y > 1 and y < 20)) and |
| 3976 | * x in (a,c,e) and |
| 3977 | * y > 1 and y < 20 |
| 3978 | * */ |
| 3979 | static sql_rel * |
| 3980 | rel_merge_rse(int *changes, mvc *sql, sql_rel *rel) |
| 3981 | { |
| 3982 | /* only execute once per select */ |
| 3983 | (void)*changes; |
| 3984 | |
| 3985 | if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps) { |
| 3986 | node *n, *o; |
| 3987 | list *nexps = new_exp_list(sql->sa); |
| 3988 | |
| 3989 | for (n=rel->exps->h; n; n = n->next) { |
| 3990 | sql_exp *e = n->data; |
| 3991 | |
| 3992 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 3993 | /* possibly merge related expressions */ |
| 3994 | list *ps = exps_merge_select_rse(sql, e->l, e->r); |
| 3995 | for (o = ps->h; o; o = o->next) |
| 3996 | append(nexps, o->data); |
| 3997 | } |
| 3998 | } |
| 3999 | if (list_length(nexps)) |
| 4000 | for (o = nexps->h; o; o = o->next) |
| 4001 | append(rel->exps, o->data); |
| 4002 | } |
| 4003 | /* the project case of rse */ |
| 4004 | if (is_project(rel->op) && rel->exps) |
| 4005 | rel->exps = exps_merge_project_rse(sql, rel->exps); |
| 4006 | return rel; |
| 4007 | } |
| 4008 | |
| 4009 | /* find in the list of expression an expression which uses e */ |
| 4010 | static sql_exp * |
| 4011 | exp_uses_exp( list *exps, sql_exp *e) |
| 4012 | { |
| 4013 | node *n; |
| 4014 | const char *rname = exp_relname(e); |
| 4015 | const char *name = exp_name(e); |
| 4016 | |
| 4017 | if (!exps) |
| 4018 | return NULL; |
| 4019 | |
| 4020 | for ( n = exps->h; n; n = n->next) { |
| 4021 | sql_exp *u = n->data; |
| 4022 | |
| 4023 | if (u->l && rname && strcmp(u->l, rname) == 0 && |
| 4024 | u->r && name && strcmp(u->r, name) == 0) |
| 4025 | return u; |
| 4026 | if (!u->l && !rname && |
| 4027 | u->r && name && strcmp(u->r, name) == 0) |
| 4028 | return u; |
| 4029 | } |
| 4030 | return NULL; |
| 4031 | } |
| 4032 | |
| 4033 | /* |
| 4034 | * Rewrite aggregations over union all. |
| 4035 | * groupby ([ union all (a, b) ], [gbe], [ count, sum ] ) |
| 4036 | * |
| 4037 | * into |
| 4038 | * groupby ( [ union all( groupby( a, [gbe], [ count, sum] ), [ groupby( b, [gbe], [ count, sum] )) , [gbe], [sum, sum] ) |
| 4039 | */ |
| 4040 | static sql_rel * |
| 4041 | rel_push_aggr_down(int *changes, mvc *sql, sql_rel *rel) |
| 4042 | { |
| 4043 | if (rel->op == op_groupby && rel->l) { |
| 4044 | sql_rel *u = rel->l, *ou = u; |
| 4045 | sql_rel *g = rel; |
| 4046 | sql_rel *ul = u->l; |
| 4047 | sql_rel *ur = u->r; |
| 4048 | node *n, *m; |
| 4049 | list *lgbe = NULL, *rgbe = NULL, *gbe = NULL, *exps = NULL; |
| 4050 | |
| 4051 | if (u->op == op_project) |
| 4052 | u = u->l; |
| 4053 | |
| 4054 | if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u)) |
| 4055 | return rel; |
| 4056 | |
| 4057 | ul = u->l; |
| 4058 | ur = u->r; |
| 4059 | |
| 4060 | /* make sure we don't create group by on group by's */ |
| 4061 | if (ul->op == op_groupby || ur->op == op_groupby) |
| 4062 | return rel; |
| 4063 | |
| 4064 | rel->subquery = 0; |
| 4065 | /* distinct should be done over the full result */ |
| 4066 | for (n = g->exps->h; n; n = n->next) { |
| 4067 | sql_exp *e = n->data; |
| 4068 | sql_subaggr *af = e->f; |
| 4069 | |
| 4070 | if (e->type == e_atom || |
| 4071 | e->type == e_func || |
| 4072 | (e->type == e_aggr && |
| 4073 | ((strcmp(af->aggr->base.name, "sum" ) && |
| 4074 | strcmp(af->aggr->base.name, "count" ) && |
| 4075 | strcmp(af->aggr->base.name, "min" ) && |
| 4076 | strcmp(af->aggr->base.name, "max" )) || |
| 4077 | need_distinct(e)))) |
| 4078 | return rel; |
| 4079 | } |
| 4080 | |
| 4081 | ul = rel_dup(ul); |
| 4082 | ur = rel_dup(ur); |
| 4083 | if (!is_project(ul->op)) |
| 4084 | ul = rel_project(sql->sa, ul, |
| 4085 | rel_projections(sql, ul, NULL, 1, 1)); |
| 4086 | if (!is_project(ur->op)) |
| 4087 | ur = rel_project(sql->sa, ur, |
| 4088 | rel_projections(sql, ur, NULL, 1, 1)); |
| 4089 | rel_rename_exps(sql, u->exps, ul->exps); |
| 4090 | rel_rename_exps(sql, u->exps, ur->exps); |
| 4091 | if (u != ou) { |
| 4092 | ul = rel_project(sql->sa, ul, NULL); |
| 4093 | ul->exps = exps_copy(sql, ou->exps); |
| 4094 | rel_rename_exps(sql, ou->exps, ul->exps); |
| 4095 | ur = rel_project(sql->sa, ur, NULL); |
| 4096 | ur->exps = exps_copy(sql, ou->exps); |
| 4097 | rel_rename_exps(sql, ou->exps, ur->exps); |
| 4098 | } |
| 4099 | |
| 4100 | if (g->r && list_length(g->r) > 0) { |
| 4101 | list *gbe = g->r; |
| 4102 | |
| 4103 | lgbe = exps_copy(sql, gbe); |
| 4104 | rgbe = exps_copy(sql, gbe); |
| 4105 | } |
| 4106 | ul = rel_groupby(sql, ul, NULL); |
| 4107 | ul->r = lgbe; |
| 4108 | ul->nrcols = g->nrcols; |
| 4109 | ul->card = g->card; |
| 4110 | ul->exps = list_merge(exps_copy(sql, g->exps), exps_copy(sql, ul->r), (fdup)NULL); |
| 4111 | |
| 4112 | ur = rel_groupby(sql, ur, NULL); |
| 4113 | ur->r = rgbe; |
| 4114 | ur->nrcols = g->nrcols; |
| 4115 | ur->card = g->card; |
| 4116 | ur->exps = list_merge(exps_copy(sql, g->exps), exps_copy(sql, ur->r), (fdup)NULL); |
| 4117 | |
| 4118 | /* group by on primary keys which define the partioning scheme |
| 4119 | * don't need a finalizing group by */ |
| 4120 | /* how to check if a partion is based on some primary key ? |
| 4121 | * */ |
| 4122 | if (rel->r && list_length(rel->r)) { |
| 4123 | node *n; |
| 4124 | |
| 4125 | for (n = ((list*)rel->r)->h; n; n = n->next) { |
| 4126 | sql_exp *gbe = n->data; |
| 4127 | |
| 4128 | if (find_prop(gbe->p, PROP_HASHCOL)) { |
| 4129 | fcmp cmp = (fcmp)&kc_column_cmp; |
| 4130 | sql_column *c = exp_find_column(rel->l, gbe, -2); |
| 4131 | |
| 4132 | /* check if key is partition key */ |
| 4133 | if (c && c->t->p && list_find(c->t->pkey->k.columns, c, cmp) != NULL) { |
| 4134 | (*changes)++; |
| 4135 | return rel_inplace_setop(rel, ul, ur, op_union, |
| 4136 | rel_projections(sql, rel, NULL, 1, 1)); |
| 4137 | } |
| 4138 | } |
| 4139 | } |
| 4140 | } |
| 4141 | |
| 4142 | u = rel_setop(sql->sa, ul, ur, op_union); |
| 4143 | u->exps = rel_projections(sql, ul, NULL, 1, 1); |
| 4144 | set_processed(u); |
| 4145 | |
| 4146 | if (rel->r) { |
| 4147 | list *ogbe = rel->r; |
| 4148 | |
| 4149 | gbe = new_exp_list(sql->sa); |
| 4150 | for (n = ogbe->h; n; n = n->next) { |
| 4151 | sql_exp *e = n->data, *ne; |
| 4152 | |
| 4153 | ne = exp_uses_exp( rel->exps, e); |
| 4154 | //assert(ne); |
| 4155 | if (!ne) |
| 4156 | ne = e; |
| 4157 | ne = list_find_exp( u->exps, ne); |
| 4158 | assert(ne); |
| 4159 | ne = exp_column(sql->sa, exp_find_rel_name(ne), exp_name(ne), exp_subtype(ne), ne->card, has_nil(ne), is_intern(ne)); |
| 4160 | append(gbe, ne); |
| 4161 | } |
| 4162 | } |
| 4163 | exps = new_exp_list(sql->sa); |
| 4164 | for (n = u->exps->h, m = rel->exps->h; n && m; n = n->next, m = m->next) { |
| 4165 | sql_exp *ne, *e = n->data, *oa = m->data; |
| 4166 | |
| 4167 | if (oa->type == e_aggr) { |
| 4168 | sql_subaggr *f = oa->f; |
| 4169 | int cnt = exp_aggr_is_count(oa); |
| 4170 | sql_subaggr *a = sql_bind_aggr(sql->sa, sql->session->schema, (cnt)?"sum" :f->aggr->base.name, exp_subtype(e)); |
| 4171 | |
| 4172 | assert(a); |
| 4173 | /* union of aggr result may have nils |
| 4174 | * because sum/count of empty set */ |
| 4175 | set_has_nil(e); |
| 4176 | e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 4177 | ne = exp_aggr1(sql->sa, e, a, need_distinct(e), 1, e->card, 1); |
| 4178 | if (/* DISABLES CODE */ (0) && cnt) |
| 4179 | ne->p = prop_create(sql->sa, PROP_COUNT, ne->p); |
| 4180 | } else { |
| 4181 | ne = exp_copy(sql, oa); |
| 4182 | } |
| 4183 | exp_setname(sql->sa, ne, exp_find_rel_name(oa), exp_name(oa)); |
| 4184 | append(exps, ne); |
| 4185 | } |
| 4186 | (*changes)++; |
| 4187 | return rel_inplace_groupby( rel, u, gbe, exps); |
| 4188 | } |
| 4189 | return rel; |
| 4190 | } |
| 4191 | |
| 4192 | /* |
| 4193 | * More general |
| 4194 | * groupby( |
| 4195 | * [ outer ] join( |
| 4196 | * project( |
| 4197 | * table(A) [ c1, c2, .. ] |
| 4198 | * ) [ c1, c2, identity(c2) as I, .. ], |
| 4199 | * table(B) [ c1, c2, .. ] |
| 4200 | * ) [ A.c1 = B.c1 ] |
| 4201 | * ) [ I ] [ a1, a2, .. ] |
| 4202 | * |
| 4203 | * -> |
| 4204 | * |
| 4205 | * [ outer ] join( |
| 4206 | * project( |
| 4207 | * table(A) [ c1, c2, .. ] |
| 4208 | * ) [ c1, c2, .. ], |
| 4209 | * groupby ( |
| 4210 | * table(B) [ c1, c2, .. ] |
| 4211 | * ) [ B.c1 ] [ a1, a2, .. ] |
| 4212 | * ) [ A.c1 = B.c1 ] |
| 4213 | */ |
| 4214 | static sql_rel * |
| 4215 | gen_push_groupby_down(int *changes, mvc *sql, sql_rel *rel) |
| 4216 | { |
| 4217 | sql_rel *j = rel->l; |
| 4218 | list *gbe = rel->r; |
| 4219 | |
| 4220 | (void)changes; |
| 4221 | if (rel->op == op_groupby && list_length(gbe) == 1 && j->op == op_join){ //&& is_join(j->op)) { |
| 4222 | sql_rel *jl = j->l, *jr = j->r, *cr, *cl; |
| 4223 | sql_exp *gb = gbe->h->data, *e; |
| 4224 | node *n; |
| 4225 | int left = 1; |
| 4226 | list *aggrs, *aliases, *gbe; |
| 4227 | |
| 4228 | if (!is_identity(gb, jl) && !is_identity(gb, jr)) |
| 4229 | return rel; |
| 4230 | if (jl->op == op_project && |
| 4231 | (e = list_find_exp( jl->exps, gb)) != NULL && |
| 4232 | find_prop(e->p, PROP_HASHCOL) != NULL) { |
| 4233 | left = 0; |
| 4234 | cr = jr; |
| 4235 | cl = jl; |
| 4236 | } else if (jr->op == op_project && |
| 4237 | (e = list_find_exp( jr->exps, gb)) != NULL && |
| 4238 | find_prop(e->p, PROP_HASHCOL) != NULL) { |
| 4239 | left = 1; |
| 4240 | cr = jl; |
| 4241 | cl = jr; |
| 4242 | } else { |
| 4243 | return rel; |
| 4244 | } |
| 4245 | |
| 4246 | if ((left && is_base(jl->op)) || (!left && is_base(jr->op))|| |
| 4247 | (left && is_select(jl->op)) || (!left && is_select(jr->op)) |
| 4248 | || rel_is_join_on_pkey(j)) |
| 4249 | return rel; |
| 4250 | |
| 4251 | /* only add aggr (based on left/right), and repeat the group by column */ |
| 4252 | aggrs = sa_list(sql->sa); |
| 4253 | aliases = sa_list(sql->sa); |
| 4254 | if (rel->exps) for (n = rel->exps->h; n; n = n->next) { |
| 4255 | sql_exp *ce = n->data; |
| 4256 | |
| 4257 | if (exp_is_atom(ce)) |
| 4258 | list_append(aliases, ce); |
| 4259 | else if (ce->type == e_column) { |
| 4260 | if (rel_has_exp(cl, ce) == 0) /* collect aliases outside groupby */ |
| 4261 | list_append(aliases, ce); |
| 4262 | else |
| 4263 | list_append(aggrs, ce); |
| 4264 | } else if (ce->type == e_aggr) { |
| 4265 | list *args = ce->l; |
| 4266 | |
| 4267 | /* check args are part of left/right */ |
| 4268 | if (!list_empty(args) && rel_has_exps(cl, args) == 0) |
| 4269 | return rel; |
| 4270 | if (rel->op != op_join && exp_aggr_is_count(ce)) |
| 4271 | ce->p = prop_create(sql->sa, PROP_COUNT, ce->p); |
| 4272 | list_append(aggrs, ce); |
| 4273 | } |
| 4274 | } |
| 4275 | /* TODO move any column expressions (aliases) into the project list */ |
| 4276 | |
| 4277 | /* find gb in left or right and should be unique */ |
| 4278 | gbe = sa_list(sql->sa); |
| 4279 | /* push groupby to right, group on join exps */ |
| 4280 | if (j->exps) for (n = j->exps->h; n; n = n->next) { |
| 4281 | sql_exp *ce = n->data, *e; |
| 4282 | |
| 4283 | /* get left/right hand of e_cmp */ |
| 4284 | assert(ce->type == e_cmp); |
| 4285 | if (ce->flag != cmp_equal) |
| 4286 | return rel; |
| 4287 | e = rel_find_exp(cr, ce->l); |
| 4288 | if (!e) |
| 4289 | e = rel_find_exp(cr, ce->r); |
| 4290 | if (!e) |
| 4291 | return rel; |
| 4292 | e = exp_ref(sql->sa, e); |
| 4293 | list_append(gbe, e); |
| 4294 | } |
| 4295 | if (!left) |
| 4296 | cr = j->r = rel_groupby(sql, cr, gbe); |
| 4297 | else |
| 4298 | cr = j->l = rel_groupby(sql, cr, gbe); |
| 4299 | cr->exps = list_merge(cr->exps, aggrs, (fdup)NULL); |
| 4300 | if (!is_project(cl->op)) |
| 4301 | cl = rel_project(sql->sa, cl, |
| 4302 | rel_projections(sql, cl, NULL, 1, 1)); |
| 4303 | cl->exps = list_merge(cl->exps, aliases, (fdup)NULL); |
| 4304 | if (!left) |
| 4305 | j->l = cl; |
| 4306 | else |
| 4307 | j->r = cl; |
| 4308 | rel -> l = NULL; |
| 4309 | rel_destroy(rel); |
| 4310 | |
| 4311 | if (list_empty(cr->exps) && list_empty(j->exps)) { /* remove crossproduct */ |
| 4312 | sql_rel *r = cl; |
| 4313 | if (!left) |
| 4314 | j->l = NULL; |
| 4315 | else |
| 4316 | j->r = NULL; |
| 4317 | rel_destroy(j); |
| 4318 | j = r; |
| 4319 | } |
| 4320 | return j; |
| 4321 | } |
| 4322 | return rel; |
| 4323 | } |
| 4324 | |
| 4325 | /* |
| 4326 | * Rewrite group(project(join(A,Dict)[a.i==dict.i])[...dict.n])[dict.n][ ... dict.n ] |
| 4327 | * into |
| 4328 | * project(join(groupby (A)[a.i],[a.i]), Dict)[a.i==dict.i])[dict.n] |
| 4329 | * |
| 4330 | */ |
| 4331 | static sql_rel * |
| 4332 | rel_push_groupby_down(int *changes, mvc *sql, sql_rel *rel) |
| 4333 | { |
| 4334 | sql_rel *p = rel->l; |
| 4335 | list *gbe = rel->r; |
| 4336 | |
| 4337 | if (rel->op == op_groupby && gbe && p && is_join(p->op)) |
| 4338 | return gen_push_groupby_down(changes, sql, rel); |
| 4339 | if (rel->op == op_groupby && gbe && p && p->op == op_project) { |
| 4340 | sql_rel *j = p->l; |
| 4341 | sql_rel *jl, *jr; |
| 4342 | node *n; |
| 4343 | |
| 4344 | if (!j || j->op != op_join || list_length(j->exps) != 1) |
| 4345 | return gen_push_groupby_down(changes, sql, rel); |
| 4346 | jl = j->l; |
| 4347 | jr = j->r; |
| 4348 | |
| 4349 | /* check if jr is a dict with index and var still used */ |
| 4350 | if (jr->op != op_basetable || jr->l || !jr->r || list_length(jr->exps) != 2) |
| 4351 | return gen_push_groupby_down(changes, sql, rel); |
| 4352 | |
| 4353 | /* check if group by is done on dict column */ |
| 4354 | for(n = gbe->h; n; n = n->next) { |
| 4355 | sql_exp *ge = n->data, *pe = NULL, *e = NULL; |
| 4356 | |
| 4357 | /* find group by exp in project, then in dict */ |
| 4358 | pe = rel_find_exp(p, ge); |
| 4359 | if (pe) /* find project exp in right hand of join, ie dict */ |
| 4360 | e = rel_find_exp(jr, pe); |
| 4361 | if (pe && e) { /* Rewrite: join with dict after the group by */ |
| 4362 | list *pexps = rel_projections(sql, rel, NULL, 1, 1), *npexps; |
| 4363 | node *m; |
| 4364 | sql_exp *ne = j->exps->h->data; /* join exp */ |
| 4365 | p->l = jl; /* Project now only on the left side of the join */ |
| 4366 | |
| 4367 | ne = ne->l; /* The left side of the compare is the index of the left */ |
| 4368 | |
| 4369 | /* find ge reference in new projection list */ |
| 4370 | npexps = sa_list(sql->sa); |
| 4371 | for (m = pexps->h; m; m = m->next) { |
| 4372 | sql_exp *a = m->data; |
| 4373 | |
| 4374 | if (exp_refers(ge, a)) { |
| 4375 | sql_exp *sc = jr->exps->t->data; |
| 4376 | sql_exp *e = exp_ref(sql->sa, sc); |
| 4377 | exp_setname(sql->sa, e, exp_relname(a), exp_name(a)); |
| 4378 | a = e; |
| 4379 | } |
| 4380 | append(npexps, a); |
| 4381 | } |
| 4382 | |
| 4383 | /* find ge in aggr list */ |
| 4384 | for (m = rel->exps->h; m; m = m->next) { |
| 4385 | sql_exp *a = m->data; |
| 4386 | |
| 4387 | if (exp_match_exp(a, ge) || exp_refers(ge, a)) { |
| 4388 | a = exp_ref(sql->sa, ne); |
| 4389 | exp_setname(sql->sa, a, exp_relname(ne), exp_name(ne)); |
| 4390 | m->data = a; |
| 4391 | } |
| 4392 | } |
| 4393 | |
| 4394 | /* change alias pe, ie project out the index */ |
| 4395 | pe->l = (void*)exp_relname(ne); |
| 4396 | pe->r = (void*)exp_name(ne); |
| 4397 | exp_setname(sql->sa, pe, exp_relname(ne), exp_name(ne)); |
| 4398 | |
| 4399 | /* change alias ge */ |
| 4400 | ge->l = (void*)exp_relname(pe); |
| 4401 | ge->r = (void*)exp_name(pe); |
| 4402 | exp_setname(sql->sa, ge, exp_relname(pe), exp_name(pe)); |
| 4403 | |
| 4404 | /* zap both project and groupby name hash tables (as we changed names above) */ |
| 4405 | rel->exps->ht = NULL; |
| 4406 | ((list*)rel->r)->ht = NULL; |
| 4407 | p->exps->ht = NULL; |
| 4408 | |
| 4409 | /* add join */ |
| 4410 | j->l = rel; |
| 4411 | rel = rel_project(sql->sa, j, npexps); |
| 4412 | (*changes)++; |
| 4413 | } |
| 4414 | } |
| 4415 | (void)sql; |
| 4416 | } |
| 4417 | return rel; |
| 4418 | } |
| 4419 | |
| 4420 | /* |
| 4421 | * Push select down, pushes the selects through (simple) projections. Also |
| 4422 | * it cleans up the projections which become useless. |
| 4423 | */ |
| 4424 | |
| 4425 | /* TODO push select expressions in outer joins down */ |
| 4426 | static sql_rel * |
| 4427 | rel_push_select_down(int *changes, mvc *sql, sql_rel *rel) |
| 4428 | { |
| 4429 | list *exps = NULL; |
| 4430 | sql_rel *r = NULL; |
| 4431 | node *n; |
| 4432 | |
| 4433 | if (rel_is_ref(rel)) { |
| 4434 | if (is_select(rel->op) && rel->exps) { |
| 4435 | /* add inplace empty select */ |
| 4436 | sql_rel *l = rel_select(sql->sa, rel->l, NULL); |
| 4437 | |
| 4438 | if (!l->exps) |
| 4439 | l->exps = sa_list(sql->sa); |
| 4440 | (void)list_merge(l->exps, rel->exps, (fdup)NULL); |
| 4441 | rel->exps = NULL; |
| 4442 | rel->l = l; |
| 4443 | (*changes)++; |
| 4444 | } |
| 4445 | return rel; |
| 4446 | } |
| 4447 | |
| 4448 | /* don't make changes for empty selects */ |
| 4449 | if (is_select(rel->op) && (!rel->exps || list_length(rel->exps) == 0)) |
| 4450 | return rel; |
| 4451 | |
| 4452 | /* merge 2 selects */ |
| 4453 | r = rel->l; |
| 4454 | if (is_select(rel->op) && r && r->exps && is_select(r->op) && !(rel_is_ref(r))) { |
| 4455 | (void)list_merge(r->exps, rel->exps, (fdup)NULL); |
| 4456 | rel->l = NULL; |
| 4457 | rel_destroy(rel); |
| 4458 | (*changes)++; |
| 4459 | return rel_push_select_down(changes, sql, r); |
| 4460 | } |
| 4461 | /* |
| 4462 | * Push select through semi/anti join |
| 4463 | * select (semi(A,B)) == semi(select(A), B) |
| 4464 | */ |
| 4465 | if (is_select(rel->op) && r && is_semi(r->op) && !(rel_is_ref(r))) { |
| 4466 | rel->l = r->l; |
| 4467 | r->l = rel; |
| 4468 | (*changes)++; |
| 4469 | /* |
| 4470 | * if A has 2 references (ie used on both sides of |
| 4471 | * the semi join), we also push the select into A. |
| 4472 | */ |
| 4473 | if (rel_is_ref(rel->l) && rel->l == rel_find_ref(r->r)){ |
| 4474 | sql_rel *lx = rel->l; |
| 4475 | sql_rel *rx = r->r; |
| 4476 | if (lx->ref.refcnt == 2 && !rel_is_ref(rx)) { |
| 4477 | while (rx->l && !rel_is_ref(rx->l) && |
| 4478 | (is_project(rx->op) || |
| 4479 | is_select(rx->op) || |
| 4480 | is_join(rx->op))) |
| 4481 | rx = rx->l; |
| 4482 | /* probably we need to introduce a project */ |
| 4483 | rel_destroy(rel->l); |
| 4484 | lx = rel_project(sql->sa, rel, rel_projections(sql, rel, NULL, 1, 1)); |
| 4485 | r->l = lx; |
| 4486 | rx->l = rel_dup(lx); |
| 4487 | } |
| 4488 | } |
| 4489 | return r; |
| 4490 | } |
| 4491 | exps = rel->exps; |
| 4492 | |
| 4493 | if (rel->op == op_project && |
| 4494 | r && r->op == op_project && !(rel_is_ref(r))) |
| 4495 | return rel_merge_projects(changes, sql, rel); |
| 4496 | |
| 4497 | /* push select through join */ |
| 4498 | if (is_select(rel->op) && r && is_join(r->op) && !(rel_is_ref(r))) { |
| 4499 | sql_rel *jl = r->l; |
| 4500 | sql_rel *jr = r->r; |
| 4501 | int left = r->op == op_join || r->op == op_left; |
| 4502 | int right = r->op == op_join || r->op == op_right; |
| 4503 | |
| 4504 | if (r->op == op_full) |
| 4505 | return rel; |
| 4506 | |
| 4507 | /* introduce selects under the join (if needed) */ |
| 4508 | set_processed(jl); |
| 4509 | set_processed(jr); |
| 4510 | if (!is_select(jl->op)) |
| 4511 | r->l = jl = rel_select(sql->sa, jl, NULL); |
| 4512 | if (!is_select(jr->op)) |
| 4513 | r->r = jr = rel_select(sql->sa, jr, NULL); |
| 4514 | |
| 4515 | rel->exps = new_exp_list(sql->sa); |
| 4516 | for (n = exps->h; n; n = n->next) { |
| 4517 | sql_exp *e = n->data, *ne = NULL; |
| 4518 | int done = 0; |
| 4519 | |
| 4520 | if (left) |
| 4521 | ne = exp_push_down(sql, e, jl, jl); |
| 4522 | if (ne && ne != e) { |
| 4523 | done = 1; |
| 4524 | rel_select_add_exp(sql->sa, jl, ne); |
| 4525 | } else if (right) { |
| 4526 | ne = exp_push_down(sql, e, jr, jr); |
| 4527 | if (ne && ne != e) { |
| 4528 | done = 1; |
| 4529 | rel_select_add_exp(sql->sa, jr, ne); |
| 4530 | } |
| 4531 | } |
| 4532 | if (!done) |
| 4533 | append(rel->exps, e); |
| 4534 | *changes += done; |
| 4535 | } |
| 4536 | } |
| 4537 | |
| 4538 | /* merge select and cross product ? */ |
| 4539 | if (is_select(rel->op) && r && r->op == op_join && !(rel_is_ref(r))) { |
| 4540 | list *exps = rel->exps; |
| 4541 | |
| 4542 | if (!r->exps) |
| 4543 | r->exps = new_exp_list(sql->sa); |
| 4544 | rel->exps = new_exp_list(sql->sa); |
| 4545 | for (n = exps->h; n; n = n->next) { |
| 4546 | sql_exp *e = n->data; |
| 4547 | |
| 4548 | //if (exp_is_join_exp(e) == 0) { |
| 4549 | if (exp_is_join(e, NULL) == 0) { |
| 4550 | append(r->exps, e); |
| 4551 | (*changes)++; |
| 4552 | } else { |
| 4553 | append(rel->exps, e); |
| 4554 | } |
| 4555 | } |
| 4556 | return rel; |
| 4557 | } |
| 4558 | |
| 4559 | if (is_select(rel->op) && r && r->op == op_project && !(rel_is_ref(r))){ |
| 4560 | list *exps = rel->exps; |
| 4561 | sql_rel *pl; |
| 4562 | /* we cannot push through rank (row_number etc) functions or |
| 4563 | projects with distinct */ |
| 4564 | if (!r->l || project_unsafe(r,1)) |
| 4565 | return rel; |
| 4566 | |
| 4567 | /* here we need to fix aliases */ |
| 4568 | rel->exps = new_exp_list(sql->sa); |
| 4569 | pl = r->l; |
| 4570 | /* introduce selects under the project (if needed) */ |
| 4571 | set_processed(pl); |
| 4572 | if (!is_select(pl->op) || rel_is_ref(pl)) |
| 4573 | r->l = pl = rel_select(sql->sa, pl, NULL); |
| 4574 | |
| 4575 | /* for each exp check if we can rename it */ |
| 4576 | for (n = exps->h; n; n = n->next) { |
| 4577 | sql_exp *e = n->data, *ne = NULL; |
| 4578 | |
| 4579 | if (e->type == e_cmp) { |
| 4580 | ne = exp_push_down_prj(sql, e, r, pl); |
| 4581 | |
| 4582 | /* can we move it down */ |
| 4583 | if (ne && ne != e && pl->exps) { |
| 4584 | rel_select_add_exp(sql->sa, pl, ne); |
| 4585 | (*changes)++; |
| 4586 | } else { |
| 4587 | append(rel->exps, (ne)?ne:e); |
| 4588 | } |
| 4589 | } else { |
| 4590 | list_append(rel->exps, e); |
| 4591 | } |
| 4592 | } |
| 4593 | return rel; |
| 4594 | } |
| 4595 | return rel; |
| 4596 | } |
| 4597 | |
| 4598 | static sql_rel * |
| 4599 | rel_push_select_down_join(int *changes, mvc *sql, sql_rel *rel) |
| 4600 | { |
| 4601 | list *exps = NULL; |
| 4602 | sql_rel *r = NULL; |
| 4603 | node *n; |
| 4604 | |
| 4605 | exps = rel->exps; |
| 4606 | r = rel->l; |
| 4607 | |
| 4608 | /* push select through join */ |
| 4609 | if (is_select(rel->op) && exps && r && r->op == op_join && !(rel_is_ref(r))) { |
| 4610 | rel->exps = new_exp_list(sql->sa); |
| 4611 | for (n = exps->h; n; n = n->next) { |
| 4612 | sql_exp *e = n->data; |
| 4613 | if (e->type == e_cmp && !e->f && !is_complex_exp(e->flag)) { |
| 4614 | sql_rel *nr = NULL; |
| 4615 | sql_exp *re = e->r, *ne = rel_find_exp(r, re); |
| 4616 | |
| 4617 | if (ne && ne->card >= CARD_AGGR) |
| 4618 | re->card = ne->card; |
| 4619 | |
| 4620 | if (re->card >= CARD_AGGR) { |
| 4621 | nr = rel_push_join(sql, r, e->l, re, NULL, e); |
| 4622 | } else { |
| 4623 | nr = rel_push_select(sql, r, e->l, e); |
| 4624 | } |
| 4625 | if (nr) |
| 4626 | rel->l = nr; |
| 4627 | /* only pushed down selects are counted */ |
| 4628 | if (r == rel->l) { |
| 4629 | (*changes)++; |
| 4630 | } else { /* Do not introduce an extra select */ |
| 4631 | sql_rel *r = rel->l; |
| 4632 | |
| 4633 | rel->l = r->l; |
| 4634 | r->l = NULL; |
| 4635 | list_append(rel->exps, e); |
| 4636 | rel_destroy(r); |
| 4637 | } |
| 4638 | assert(r == rel->l); |
| 4639 | } else { |
| 4640 | list_append(rel->exps, e); |
| 4641 | } |
| 4642 | } |
| 4643 | return rel; |
| 4644 | } |
| 4645 | return rel; |
| 4646 | } |
| 4647 | |
| 4648 | static sql_rel * |
| 4649 | rel_remove_empty_select(int *changes, mvc *sql, sql_rel *rel) |
| 4650 | { |
| 4651 | (void)sql; |
| 4652 | |
| 4653 | if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op) || is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op)) && rel->l) { |
| 4654 | sql_rel *l = rel->l; |
| 4655 | if (is_select(l->op) && !(rel_is_ref(l)) && list_empty(l->exps)) { |
| 4656 | rel->l = l->l; |
| 4657 | l->l = NULL; |
| 4658 | rel_destroy(l); |
| 4659 | (*changes)++; |
| 4660 | } |
| 4661 | } |
| 4662 | if ((is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) && rel->r) { |
| 4663 | sql_rel *r = rel->r; |
| 4664 | if (is_select(r->op) && !(rel_is_ref(r)) && list_empty(r->exps)) { |
| 4665 | rel->r = r->l; |
| 4666 | r->l = NULL; |
| 4667 | rel_destroy(r); |
| 4668 | (*changes)++; |
| 4669 | } |
| 4670 | } |
| 4671 | if (is_join(rel->op) && list_empty(rel->exps)) |
| 4672 | rel->exps = NULL; /* crossproduct */ |
| 4673 | return rel; |
| 4674 | } |
| 4675 | |
| 4676 | /* |
| 4677 | * Push {semi}joins down, pushes the joins through group by expressions. |
| 4678 | * When the join is on the group by columns, we can push the joins left |
| 4679 | * under the group by. This should only be done, iff the new semijoin would |
| 4680 | * reduce the input table to the groupby. So there should be a reduction |
| 4681 | * (selection) on the table A and this should be propagated to the groupby via |
| 4682 | * for example a primary key. |
| 4683 | * |
| 4684 | * {semi}join( A, groupby( B ) [gbe][aggrs] ) [ gbe == A.x ] |
| 4685 | * -> |
| 4686 | * {semi}join( A, groupby( semijoin(B,A) [gbe == A.x] ) [gbe][aggrs] ) [ gbe == A.x ] |
| 4687 | */ |
| 4688 | |
| 4689 | static sql_rel * |
| 4690 | rel_push_join_down(int *changes, mvc *sql, sql_rel *rel) |
| 4691 | { |
| 4692 | list *exps = NULL; |
| 4693 | |
| 4694 | (void)*changes; |
| 4695 | if (!rel_is_ref(rel) && ((is_join(rel->op) || is_semi(rel->op)) && rel->l && rel->exps)) { |
| 4696 | sql_rel *gb = rel->r, *ogb = gb, *l = NULL, *rell = rel->l; |
| 4697 | |
| 4698 | if (gb->op == op_project) |
| 4699 | gb = gb->l; |
| 4700 | |
| 4701 | if (is_basetable(rell->op) || rel_is_ref(rell)) |
| 4702 | return rel; |
| 4703 | |
| 4704 | exps = rel->exps; |
| 4705 | if (gb && gb->op == op_groupby && gb->r && list_length(gb->r)) { |
| 4706 | list *jes = new_exp_list(sql->sa); |
| 4707 | node *n, *m; |
| 4708 | list *gbes = gb->r; |
| 4709 | /* find out if all group by expressions are used in the join */ |
| 4710 | for(n = gbes->h; n; n = n->next) { |
| 4711 | sql_exp *gbe = n->data; |
| 4712 | int fnd = 0; |
| 4713 | const char *rname = NULL, *name = NULL; |
| 4714 | |
| 4715 | /* project in between, ie find alias */ |
| 4716 | /* first find expression in expression list */ |
| 4717 | gbe = exp_uses_exp( gb->exps, gbe); |
| 4718 | if (!gbe) |
| 4719 | continue; |
| 4720 | if (ogb != gb) |
| 4721 | gbe = exp_uses_exp( ogb->exps, gbe); |
| 4722 | if (gbe) { |
| 4723 | rname = exp_find_rel_name(gbe); |
| 4724 | name = exp_name(gbe); |
| 4725 | } |
| 4726 | |
| 4727 | if (!name) |
| 4728 | return rel; |
| 4729 | |
| 4730 | for (m = exps->h; m && !fnd; m = m->next) { |
| 4731 | sql_exp *je = m->data; |
| 4732 | |
| 4733 | if (je->card >= CARD_ATOM && je->type == e_cmp && |
| 4734 | !is_complex_exp(je->flag)) { |
| 4735 | /* expect right expression to match */ |
| 4736 | sql_exp *r = je->r; |
| 4737 | |
| 4738 | if (r == 0 || r->type != e_column) |
| 4739 | continue; |
| 4740 | if (r->l && rname && strcmp(r->l, rname) == 0 && strcmp(r->r, name)==0) { |
| 4741 | fnd = 1; |
| 4742 | } else if (!r->l && !rname && strcmp(r->r, name)==0) { |
| 4743 | fnd = 1; |
| 4744 | } |
| 4745 | if (fnd) { |
| 4746 | sql_exp *le = je->l; |
| 4747 | sql_exp *re = exp_push_down_prj(sql, r, gb, gb->l); |
| 4748 | if (!re || (list_length(jes) == 0 && !find_prop(le->p, PROP_HASHCOL))) { |
| 4749 | fnd = 0; |
| 4750 | } else { |
| 4751 | int anti = is_anti(je); |
| 4752 | |
| 4753 | je = exp_compare(sql->sa, le, re, je->flag); |
| 4754 | if (anti) set_anti(je); |
| 4755 | list_append(jes, je); |
| 4756 | } |
| 4757 | } |
| 4758 | } |
| 4759 | } |
| 4760 | if (!fnd) |
| 4761 | return rel; |
| 4762 | } |
| 4763 | l = rel_dup(rel->l); |
| 4764 | |
| 4765 | /* push join's left side (as semijoin) down group by */ |
| 4766 | l = gb->l = rel_crossproduct(sql->sa, gb->l, l, op_semi); |
| 4767 | l->exps = jes; |
| 4768 | return rel; |
| 4769 | } |
| 4770 | } |
| 4771 | return rel; |
| 4772 | } |
| 4773 | |
| 4774 | /* |
| 4775 | * Push semijoins down, pushes the semijoin through a join. |
| 4776 | * |
| 4777 | * semijoin( join(A, B) [ A.x == B.y ], C ) [ A.z == C.c ] |
| 4778 | * -> |
| 4779 | * join( semijoin(A, C) [ A.z == C.c ], B ) [ A.x == B.y ] |
| 4780 | * |
| 4781 | * also push simple expressions of a semijoin down if they only |
| 4782 | * involve the left sided of the semijoin. |
| 4783 | * |
| 4784 | * in some cases the other way is usefull, ie push join down |
| 4785 | * semijoin. When the join reduces (ie when there are selects on it). |
| 4786 | */ |
| 4787 | static sql_rel * |
| 4788 | rel_push_semijoin_down_or_up(int *changes, mvc *sql, sql_rel *rel) |
| 4789 | { |
| 4790 | (void)*changes; |
| 4791 | |
| 4792 | if (rel->op == op_join && rel->exps && rel->l) { |
| 4793 | sql_rel *l = rel->l, *r = rel->r; |
| 4794 | |
| 4795 | if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) { |
| 4796 | rel->l = l->l; |
| 4797 | l->l = rel; |
| 4798 | return l; |
| 4799 | } |
| 4800 | } |
| 4801 | /* also case with 2 joins */ |
| 4802 | /* join ( join ( semijoin(), table), select (table)); */ |
| 4803 | if (rel->op == op_join && rel->exps && rel->l) { |
| 4804 | sql_rel *l = rel->l, *r = rel->r; |
| 4805 | sql_rel *ll; |
| 4806 | |
| 4807 | if (is_join(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) { |
| 4808 | ll = l->l; |
| 4809 | if (is_semi(ll->op) && !rel_is_ref(ll)) { |
| 4810 | l->l = ll->l; |
| 4811 | ll->l = rel; |
| 4812 | return ll; |
| 4813 | } |
| 4814 | } |
| 4815 | } |
| 4816 | /* first push down the expressions involving only A */ |
| 4817 | if (rel->op == op_semi && rel->exps && rel->l) { |
| 4818 | list *exps = rel->exps, *nexps = sa_list(sql->sa); |
| 4819 | node *n; |
| 4820 | |
| 4821 | if (nexps == NULL) |
| 4822 | return NULL; |
| 4823 | for(n = exps->h; n; n = n->next) { |
| 4824 | sql_exp *sje = n->data; |
| 4825 | |
| 4826 | if (n != exps->h && sje->type == e_cmp && |
| 4827 | !is_complex_exp(sje->flag) && |
| 4828 | rel_has_exp(rel->l, sje->l) >= 0 && |
| 4829 | rel_has_exp(rel->l, sje->r) >= 0) { |
| 4830 | rel->l = rel_select(sql->sa, rel->l, NULL); |
| 4831 | rel_select_add_exp(sql->sa, rel->l, sje); |
| 4832 | } else { |
| 4833 | append(nexps, sje); |
| 4834 | } |
| 4835 | } |
| 4836 | rel->exps = nexps; |
| 4837 | } |
| 4838 | if (rel->op == op_semi && rel->exps && rel->l) { |
| 4839 | operator_type op = rel->op, lop; |
| 4840 | node *n; |
| 4841 | sql_rel *l = rel->l, *ll = NULL, *lr = NULL; |
| 4842 | sql_rel *r = rel->r; |
| 4843 | list *exps = rel->exps, *nsexps, *njexps; |
| 4844 | int left = 1, right = 1; |
| 4845 | |
| 4846 | /* handle project |
| 4847 | if (l->op == op_project && !need_distinct(l)) |
| 4848 | l = l->l; |
| 4849 | */ |
| 4850 | |
| 4851 | if (!is_join(l->op) || rel_is_ref(l)) |
| 4852 | return rel; |
| 4853 | |
| 4854 | lop = l->op; |
| 4855 | ll = l->l; |
| 4856 | lr = l->r; |
| 4857 | /* semijoin shouldn't be based on right relation of join */ |
| 4858 | for(n = exps->h; n; n = n->next) { |
| 4859 | sql_exp *sje = n->data; |
| 4860 | |
| 4861 | if (sje->type != e_cmp) |
| 4862 | return rel; |
| 4863 | if (right && |
| 4864 | (is_complex_exp(sje->flag) || |
| 4865 | rel_has_exp(lr, sje->l) >= 0 || |
| 4866 | rel_has_exp(lr, sje->r) >= 0)) { |
| 4867 | right = 0; |
| 4868 | } |
| 4869 | if (right) |
| 4870 | left = 0; |
| 4871 | if (!right && left && |
| 4872 | (is_complex_exp(sje->flag) || |
| 4873 | rel_has_exp(ll, sje->l) >= 0 || |
| 4874 | rel_has_exp(ll, sje->r) >= 0)) { |
| 4875 | left = 0; |
| 4876 | } |
| 4877 | if (!right && !left) |
| 4878 | return rel; |
| 4879 | } |
| 4880 | nsexps = exps_copy(sql, rel->exps); |
| 4881 | njexps = exps_copy(sql, l->exps); |
| 4882 | if (right) |
| 4883 | l = rel_crossproduct(sql->sa, rel_dup(ll), rel_dup(r), op); |
| 4884 | else |
| 4885 | l = rel_crossproduct(sql->sa, rel_dup(lr), rel_dup(r), op); |
| 4886 | l->exps = nsexps; |
| 4887 | if (right) |
| 4888 | l = rel_crossproduct(sql->sa, l, rel_dup(lr), lop); |
| 4889 | else |
| 4890 | l = rel_crossproduct(sql->sa, l, rel_dup(ll), lop); |
| 4891 | l->exps = njexps; |
| 4892 | rel_destroy(rel); |
| 4893 | rel = l; |
| 4894 | } |
| 4895 | return rel; |
| 4896 | } |
| 4897 | |
| 4898 | static int |
| 4899 | rel_part_nr( sql_rel *rel, sql_exp *e ) |
| 4900 | { |
| 4901 | sql_column *c; |
| 4902 | sql_table *pp; |
| 4903 | assert(e->type == e_cmp); |
| 4904 | |
| 4905 | c = exp_find_column(rel, e->l, -1); |
| 4906 | if (!c) |
| 4907 | c = exp_find_column(rel, e->r, -1); |
| 4908 | if (!c) |
| 4909 | return -1; |
| 4910 | pp = c->t; |
| 4911 | if (pp->p) |
| 4912 | return list_position(pp->p->members.set, pp); |
| 4913 | return -1; |
| 4914 | } |
| 4915 | |
| 4916 | static int |
| 4917 | rel_uses_part_nr( sql_rel *rel, sql_exp *e, int pnr ) |
| 4918 | { |
| 4919 | sql_column *c; |
| 4920 | assert(e->type == e_cmp); |
| 4921 | |
| 4922 | /* |
| 4923 | * following case fails. |
| 4924 | * |
| 4925 | * semijoin( A1, union [A1, A2] ) |
| 4926 | * The union will never return proper column (from A2). |
| 4927 | * ie need different solution (probaly pass pnr). |
| 4928 | */ |
| 4929 | c = exp_find_column(rel, e->l, pnr); |
| 4930 | if (!c) |
| 4931 | c = exp_find_column(rel, e->r, pnr); |
| 4932 | if (c) { |
| 4933 | sql_table *pp = c->t; |
| 4934 | if (pp->p && list_position(pp->p->members.set, pp) == pnr) |
| 4935 | return 1; |
| 4936 | } |
| 4937 | /* for projects we may need to do a rename! */ |
| 4938 | if (is_project(rel->op) || is_topn(rel->op) || is_sample(rel->op)) |
| 4939 | return rel_uses_part_nr( rel->l, e, pnr); |
| 4940 | |
| 4941 | if (is_union(rel->op) || is_join(rel->op) || is_semi(rel->op)) { |
| 4942 | if (rel_uses_part_nr( rel->l, e, pnr)) |
| 4943 | return 1; |
| 4944 | if (!is_semi(rel->op) && rel_uses_part_nr( rel->r, e, pnr)) |
| 4945 | return 1; |
| 4946 | } |
| 4947 | return 0; |
| 4948 | } |
| 4949 | |
| 4950 | static int |
| 4951 | rel_has_cmp_exp(sql_rel *rel, sql_exp *e) |
| 4952 | { |
| 4953 | if (e->type == e_cmp) { |
| 4954 | if (get_cmp(e) == cmp_or) { |
| 4955 | return rel_has_exp(rel, e->l) == 0 && |
| 4956 | rel_has_all_exps(rel, e->r); |
| 4957 | } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) { |
| 4958 | return rel_has_all_exps(rel, e->l) && |
| 4959 | rel_has_all_exps(rel, e->r); |
| 4960 | } else { |
| 4961 | return rel_has_exp(rel, e->l) == 0 && |
| 4962 | rel_has_exp(rel, e->r) == 0 && |
| 4963 | (!e->f || rel_has_exp(rel, e->f) == 0); |
| 4964 | } |
| 4965 | } |
| 4966 | return 0; |
| 4967 | } |
| 4968 | |
| 4969 | static sql_rel * |
| 4970 | rel_join_push_exps_down(int *changes, mvc *sql, sql_rel *rel) |
| 4971 | { |
| 4972 | if ((is_join(rel->op) && !is_outerjoin(rel->op)) || is_semi(rel->op)) { |
| 4973 | sql_rel *l = rel->l, *r = rel->r; |
| 4974 | list *jexps = NULL, *lexps = NULL, *rexps = NULL; |
| 4975 | node *n; |
| 4976 | |
| 4977 | if (list_empty(rel->exps)) |
| 4978 | return rel; |
| 4979 | |
| 4980 | for(n=rel->exps->h; n; n=n->next) { |
| 4981 | sql_exp *e = n->data; |
| 4982 | int le = rel_has_cmp_exp(l, e); |
| 4983 | int re = rel_has_cmp_exp(r, e); |
| 4984 | |
| 4985 | /* select expressions on left */ |
| 4986 | if (le && !re) { |
| 4987 | if (!lexps) |
| 4988 | lexps=sa_list(sql->sa); |
| 4989 | append(lexps, e); |
| 4990 | /* select expressions on right */ |
| 4991 | } else if (!le && re && (rel->op != op_anti || (e->flag != mark_notin && e->flag != mark_in))) { |
| 4992 | if (!rexps) |
| 4993 | rexps=sa_list(sql->sa); |
| 4994 | append(rexps, e); |
| 4995 | } else { |
| 4996 | if (!jexps) |
| 4997 | jexps=sa_list(sql->sa); |
| 4998 | append(jexps, e); |
| 4999 | } |
| 5000 | } |
| 5001 | if (lexps || rexps) |
| 5002 | rel->exps = jexps; |
| 5003 | if (lexps) { |
| 5004 | l = rel->l = rel_select(sql->sa, rel->l, NULL); |
| 5005 | l->exps = lexps; |
| 5006 | (*changes) = 1; |
| 5007 | } |
| 5008 | if (rexps) { |
| 5009 | r = rel->r = rel_select(sql->sa, rel->r, NULL); |
| 5010 | r->exps = rexps; |
| 5011 | (*changes) = 1; |
| 5012 | } |
| 5013 | } |
| 5014 | return rel; |
| 5015 | } |
| 5016 | |
| 5017 | /* |
| 5018 | * Push (semi)joins down unions, this is basically for merge tables, where |
| 5019 | * we know that the fk-indices are split over two clustered merge tables. |
| 5020 | */ |
| 5021 | static sql_rel * |
| 5022 | rel_push_join_down_union(int *changes, mvc *sql, sql_rel *rel) |
| 5023 | { |
| 5024 | if ((is_join(rel->op) && !is_outerjoin(rel->op)) || is_semi(rel->op)) { |
| 5025 | sql_rel *l = rel->l, *r = rel->r, *ol = l, *or = r; |
| 5026 | list *exps = rel->exps; |
| 5027 | sql_exp *je = !list_empty(exps)?exps->h->data:NULL; |
| 5028 | |
| 5029 | if (!l || !r || need_distinct(l) || need_distinct(r)) |
| 5030 | return rel; |
| 5031 | if (l->op == op_project) |
| 5032 | l = l->l; |
| 5033 | if (r->op == op_project) |
| 5034 | r = r->l; |
| 5035 | |
| 5036 | /* both sides only if we have a join index */ |
| 5037 | if (!l || !r ||(is_union(l->op) && is_union(r->op) && |
| 5038 | je && !find_prop(je->p, PROP_JOINIDX) && /* FKEY JOIN */ |
| 5039 | !rel_is_join_on_pkey(rel))) /* aligned PKEY JOIN */ |
| 5040 | return rel; |
| 5041 | if (is_semi(rel->op) && is_union(l->op) && je && !find_prop(je->p, PROP_JOINIDX)) |
| 5042 | return rel; |
| 5043 | |
| 5044 | ol->subquery = or->subquery = 0; |
| 5045 | if ((is_union(l->op) && !need_distinct(l)) && !is_union(r->op)){ |
| 5046 | sql_rel *nl, *nr; |
| 5047 | sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r); |
| 5048 | |
| 5049 | /* join(union(a,b), c) -> union(join(a,c), join(b,c)) */ |
| 5050 | if (!is_project(ll->op)) |
| 5051 | ll = rel_project(sql->sa, ll, |
| 5052 | rel_projections(sql, ll, NULL, 1, 1)); |
| 5053 | if (!is_project(lr->op)) |
| 5054 | lr = rel_project(sql->sa, lr, |
| 5055 | rel_projections(sql, lr, NULL, 1, 1)); |
| 5056 | rel_rename_exps(sql, l->exps, ll->exps); |
| 5057 | rel_rename_exps(sql, l->exps, lr->exps); |
| 5058 | if (l != ol) { |
| 5059 | ll = rel_project(sql->sa, ll, NULL); |
| 5060 | ll->exps = exps_copy(sql, ol->exps); |
| 5061 | lr = rel_project(sql->sa, lr, NULL); |
| 5062 | lr->exps = exps_copy(sql, ol->exps); |
| 5063 | } |
| 5064 | nl = rel_crossproduct(sql->sa, ll, rel_dup(or), rel->op); |
| 5065 | nr = rel_crossproduct(sql->sa, lr, rel_dup(or), rel->op); |
| 5066 | if (need_no_nil(rel)) { |
| 5067 | set_no_nil(nl); |
| 5068 | set_no_nil(nr); |
| 5069 | } |
| 5070 | nl->exps = exps_copy(sql, exps); |
| 5071 | nr->exps = exps_copy(sql, exps); |
| 5072 | nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1)); |
| 5073 | nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1)); |
| 5074 | (*changes)++; |
| 5075 | return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1)); |
| 5076 | } else if (is_union(l->op) && !need_distinct(l) && |
| 5077 | is_union(r->op) && !need_distinct(r)) { |
| 5078 | sql_rel *nl, *nr; |
| 5079 | sql_rel *ll = rel_dup(l->l), *lr = rel_dup(l->r); |
| 5080 | sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r); |
| 5081 | |
| 5082 | /* join(union(a,b), union(c,d)) -> union(join(a,c), join(b,d)) */ |
| 5083 | if (!is_project(ll->op)) |
| 5084 | ll = rel_project(sql->sa, ll, |
| 5085 | rel_projections(sql, ll, NULL, 1, 1)); |
| 5086 | if (!is_project(lr->op)) |
| 5087 | lr = rel_project(sql->sa, lr, |
| 5088 | rel_projections(sql, lr, NULL, 1, 1)); |
| 5089 | rel_rename_exps(sql, l->exps, ll->exps); |
| 5090 | rel_rename_exps(sql, l->exps, lr->exps); |
| 5091 | if (l != ol) { |
| 5092 | ll = rel_project(sql->sa, ll, NULL); |
| 5093 | ll->exps = exps_copy(sql, ol->exps); |
| 5094 | lr = rel_project(sql->sa, lr, NULL); |
| 5095 | lr->exps = exps_copy(sql, ol->exps); |
| 5096 | } |
| 5097 | if (!is_project(rl->op)) |
| 5098 | rl = rel_project(sql->sa, rl, |
| 5099 | rel_projections(sql, rl, NULL, 1, 1)); |
| 5100 | if (!is_project(rr->op)) |
| 5101 | rr = rel_project(sql->sa, rr, |
| 5102 | rel_projections(sql, rr, NULL, 1, 1)); |
| 5103 | rel_rename_exps(sql, r->exps, rl->exps); |
| 5104 | rel_rename_exps(sql, r->exps, rr->exps); |
| 5105 | if (r != or) { |
| 5106 | rl = rel_project(sql->sa, rl, NULL); |
| 5107 | rl->exps = exps_copy(sql, or->exps); |
| 5108 | rr = rel_project(sql->sa, rr, NULL); |
| 5109 | rr->exps = exps_copy(sql, or->exps); |
| 5110 | } |
| 5111 | nl = rel_crossproduct(sql->sa, ll, rl, rel->op); |
| 5112 | nr = rel_crossproduct(sql->sa, lr, rr, rel->op); |
| 5113 | if (need_no_nil(rel)) { |
| 5114 | set_no_nil(nl); |
| 5115 | set_no_nil(nr); |
| 5116 | } |
| 5117 | nl->exps = exps_copy(sql, exps); |
| 5118 | nr->exps = exps_copy(sql, exps); |
| 5119 | nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1)); |
| 5120 | nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1)); |
| 5121 | (*changes)++; |
| 5122 | return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1)); |
| 5123 | } else if (!is_union(l->op) && |
| 5124 | is_union(r->op) && !need_distinct(r) && |
| 5125 | !is_semi(rel->op)) { |
| 5126 | sql_rel *nl, *nr; |
| 5127 | sql_rel *rl = rel_dup(r->l), *rr = rel_dup(r->r); |
| 5128 | |
| 5129 | /* join(a, union(b,c)) -> union(join(a,b), join(a,c)) */ |
| 5130 | if (!is_project(rl->op)) |
| 5131 | rl = rel_project(sql->sa, rl, |
| 5132 | rel_projections(sql, rl, NULL, 1, 1)); |
| 5133 | if (!is_project(rr->op)) |
| 5134 | rr = rel_project(sql->sa, rr, |
| 5135 | rel_projections(sql, rr, NULL, 1, 1)); |
| 5136 | rel_rename_exps(sql, r->exps, rl->exps); |
| 5137 | rel_rename_exps(sql, r->exps, rr->exps); |
| 5138 | if (r != or) { |
| 5139 | rl = rel_project(sql->sa, rl, NULL); |
| 5140 | rl->exps = exps_copy(sql, or->exps); |
| 5141 | rr = rel_project(sql->sa, rr, NULL); |
| 5142 | rr->exps = exps_copy(sql, or->exps); |
| 5143 | } |
| 5144 | nl = rel_crossproduct(sql->sa, rel_dup(ol), rl, rel->op); |
| 5145 | nr = rel_crossproduct(sql->sa, rel_dup(ol), rr, rel->op); |
| 5146 | if (need_no_nil(rel)) { |
| 5147 | set_no_nil(nl); |
| 5148 | set_no_nil(nr); |
| 5149 | } |
| 5150 | nl->exps = exps_copy(sql, exps); |
| 5151 | nr->exps = exps_copy(sql, exps); |
| 5152 | nl = rel_project(sql->sa, nl, rel_projections(sql, nl, NULL, 1, 1)); |
| 5153 | nr = rel_project(sql->sa, nr, rel_projections(sql, nr, NULL, 1, 1)); |
| 5154 | (*changes)++; |
| 5155 | return rel_inplace_setop(rel, nl, nr, op_union, rel_projections(sql, rel, NULL, 1, 1)); |
| 5156 | /* {semi}join ( A1, union (A2, B)) [A1.partkey = A2.partkey] -> |
| 5157 | * {semi}join ( A1, A2 ) |
| 5158 | * and |
| 5159 | * {semi}join ( A1, union (B, A2)) [A1.partkey = A2.partkey] -> |
| 5160 | * {semi}join ( A1, A2 ) |
| 5161 | * (ie a single part on the left) |
| 5162 | * |
| 5163 | * Howto detect that a relation isn't matching. |
| 5164 | * |
| 5165 | * partitioning is currently done only on pkey/fkey's |
| 5166 | * ie only matching per part if join is on pkey/fkey (parts) |
| 5167 | * |
| 5168 | * and part numbers should match. |
| 5169 | * |
| 5170 | * */ |
| 5171 | } else if (!is_union(l->op) && |
| 5172 | is_union(r->op) && !need_distinct(r) && |
| 5173 | is_semi(rel->op) && rel_is_join_on_pkey(rel)) { |
| 5174 | /* use first join expression, to find part nr */ |
| 5175 | sql_exp *je = rel->exps->h->data; |
| 5176 | int lpnr = rel_part_nr(l, je); |
| 5177 | sql_rel *rl = r->l; |
| 5178 | sql_rel *rr = r->r; |
| 5179 | |
| 5180 | if (lpnr < 0) |
| 5181 | return rel; |
| 5182 | /* case 1: uses left not right */ |
| 5183 | if (rel_uses_part_nr(rl, je, lpnr) && |
| 5184 | !rel_uses_part_nr(rr, je, lpnr)) { |
| 5185 | sql_rel *nl; |
| 5186 | |
| 5187 | rl = rel_dup(rl); |
| 5188 | if (!is_project(rl->op)) |
| 5189 | rl = rel_project(sql->sa, rl, |
| 5190 | rel_projections(sql, rl, NULL, 1, 1)); |
| 5191 | rel_rename_exps(sql, r->exps, rl->exps); |
| 5192 | if (r != or) { |
| 5193 | rl = rel_project(sql->sa, rl, NULL); |
| 5194 | rl->exps = exps_copy(sql, or->exps); |
| 5195 | } |
| 5196 | nl = rel_crossproduct(sql->sa, rel_dup(ol), rl, rel->op); |
| 5197 | if (need_no_nil(rel)) |
| 5198 | set_no_nil(nl); |
| 5199 | nl->exps = exps_copy(sql, exps); |
| 5200 | (*changes)++; |
| 5201 | return rel_inplace_project(sql->sa, rel, nl, rel_projections(sql, rel, NULL, 1, 1)); |
| 5202 | /* case 2: uses right not left */ |
| 5203 | } else if (!rel_uses_part_nr(rl, je, lpnr) && |
| 5204 | rel_uses_part_nr(rr, je, lpnr)) { |
| 5205 | sql_rel *nl; |
| 5206 | |
| 5207 | rr = rel_dup(rr); |
| 5208 | if (!is_project(rr->op)) |
| 5209 | rr = rel_project(sql->sa, rr, |
| 5210 | rel_projections(sql, rr, NULL, 1, 1)); |
| 5211 | rel_rename_exps(sql, r->exps, rr->exps); |
| 5212 | if (r != or) { |
| 5213 | rr = rel_project(sql->sa, rr, NULL); |
| 5214 | rr->exps = exps_copy(sql, or->exps); |
| 5215 | } |
| 5216 | nl = rel_crossproduct(sql->sa, rel_dup(ol), rr, rel->op); |
| 5217 | if (need_no_nil(rel)) |
| 5218 | set_no_nil(nl); |
| 5219 | nl->exps = exps_copy(sql, exps); |
| 5220 | (*changes)++; |
| 5221 | return rel_inplace_project(sql->sa, rel, nl, rel_projections(sql, rel, NULL, 1, 1)); |
| 5222 | } |
| 5223 | } |
| 5224 | } |
| 5225 | return rel; |
| 5226 | } |
| 5227 | |
| 5228 | static int |
| 5229 | rel_is_empty( sql_rel *rel ) |
| 5230 | { |
| 5231 | if ((is_join(rel->op) || is_semi(rel->op)) && !list_empty(rel->exps)) { |
| 5232 | sql_rel *l = rel->l, *r = rel->r; |
| 5233 | |
| 5234 | if (rel_is_empty(l) || ((is_join(rel->op) || is_semi(rel->op)) && rel_is_empty(r))) |
| 5235 | return 1; |
| 5236 | /* check */ |
| 5237 | if (rel_is_join_on_pkey(rel)) { |
| 5238 | sql_exp *je = rel->exps->h->data; |
| 5239 | int lpnr = rel_part_nr(l, je); |
| 5240 | |
| 5241 | if (lpnr >= 0 && !rel_uses_part_nr(r, je, lpnr)) |
| 5242 | return 1; |
| 5243 | } |
| 5244 | } |
| 5245 | if (!is_union(rel->op)) { |
| 5246 | if (is_simple_project(rel->op) || is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) { |
| 5247 | if (rel->l) |
| 5248 | return rel_is_empty(rel->l); |
| 5249 | } else if (is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) { |
| 5250 | int empty = 1; |
| 5251 | if (rel->l) |
| 5252 | empty &= rel_is_empty(rel->l); |
| 5253 | if (empty && rel->r) |
| 5254 | empty &= rel_is_empty(rel->r); |
| 5255 | return empty; |
| 5256 | } |
| 5257 | } |
| 5258 | return 0; |
| 5259 | } |
| 5260 | |
| 5261 | /* non overlapping partitions should be removed */ |
| 5262 | static sql_rel * |
| 5263 | rel_remove_empty_join(mvc *sql, sql_rel *rel, int *changes) |
| 5264 | { |
| 5265 | /* recurse check rel_is_empty |
| 5266 | * For half empty unions replace by projects |
| 5267 | * */ |
| 5268 | if (is_union(rel->op)) { |
| 5269 | sql_rel *l = rel->l, *r = rel->r; |
| 5270 | |
| 5271 | rel->l = l = rel_remove_empty_join(sql, l, changes); |
| 5272 | rel->r = r = rel_remove_empty_join(sql, r, changes); |
| 5273 | if (rel_is_empty(l)) { |
| 5274 | (*changes)++; |
| 5275 | return rel_inplace_project(sql->sa, rel, rel_dup(r), rel->exps); |
| 5276 | } else if (rel_is_empty(r)) { |
| 5277 | (*changes)++; |
| 5278 | return rel_inplace_project(sql->sa, rel, rel_dup(l), rel->exps); |
| 5279 | } |
| 5280 | } else if ((is_simple_project(rel->op) || is_groupby(rel->op) || is_topn(rel->op) || |
| 5281 | is_select(rel->op) || is_sample(rel->op))) { |
| 5282 | if (rel->l) |
| 5283 | rel->l = rel_remove_empty_join(sql, rel->l, changes); |
| 5284 | } else if (is_join(rel->op) || is_semi(rel->op) || is_set(rel->op)) { |
| 5285 | if (rel->l) |
| 5286 | rel->l = rel_remove_empty_join(sql, rel->l, changes); |
| 5287 | if (rel->r) |
| 5288 | rel->r = rel_remove_empty_join(sql, rel->r, changes); |
| 5289 | } |
| 5290 | return rel; |
| 5291 | } |
| 5292 | |
| 5293 | typedef struct { |
| 5294 | sql_rel *p; /* the found join's parent */ |
| 5295 | sql_rel *j; /* the found join relation itself */ |
| 5296 | } found_join; |
| 5297 | |
| 5298 | static void |
| 5299 | rel_find_joins(mvc *sql, sql_rel *parent, sql_rel *rel, list *l, int depth) |
| 5300 | { |
| 5301 | if (!rel || depth == 5) /* limit to 5 relations bellow in the tree */ |
| 5302 | return; |
| 5303 | |
| 5304 | switch (rel->op) { |
| 5305 | case op_basetable: |
| 5306 | case op_table: |
| 5307 | case op_ddl: |
| 5308 | break; |
| 5309 | case op_join: |
| 5310 | case op_left: |
| 5311 | case op_right: |
| 5312 | case op_full: |
| 5313 | case op_semi: |
| 5314 | case op_anti: { |
| 5315 | found_join *fl = SA_NEW(sql->sa, found_join); |
| 5316 | fl->p = parent; |
| 5317 | fl->j = rel; |
| 5318 | list_append(l, fl); |
| 5319 | |
| 5320 | if (rel->l) |
| 5321 | rel_find_joins(sql, rel, rel->l, l, depth + 1); |
| 5322 | if (rel->r) |
| 5323 | rel_find_joins(sql, rel, rel->r, l, depth + 1); |
| 5324 | } break; |
| 5325 | case op_union: |
| 5326 | case op_inter: |
| 5327 | case op_except: { |
| 5328 | if (rel->l) |
| 5329 | rel_find_joins(sql, rel, rel->l, l, depth + 1); |
| 5330 | if (rel->r) |
| 5331 | rel_find_joins(sql, rel, rel->r, l, depth + 1); |
| 5332 | } break; |
| 5333 | case op_groupby: |
| 5334 | case op_project: |
| 5335 | case op_select: |
| 5336 | case op_topn: |
| 5337 | case op_sample: { |
| 5338 | if (rel->l) |
| 5339 | rel_find_joins(sql, rel, rel->l, l, depth + 1); |
| 5340 | } break; |
| 5341 | case op_insert: |
| 5342 | case op_update: |
| 5343 | case op_delete: |
| 5344 | case op_truncate: { |
| 5345 | if (rel->r) |
| 5346 | rel_find_joins(sql, rel, rel->r, l, depth + 1); |
| 5347 | } break; |
| 5348 | } |
| 5349 | } |
| 5350 | |
| 5351 | /* find identical joins in diferent branches of the relational plan and merge them together */ |
| 5352 | static sql_rel * |
| 5353 | rel_merge_identical_joins(int *changes, mvc *sql, sql_rel *rel) |
| 5354 | { |
| 5355 | if (is_joinop(rel->op) && rel->l && rel->r) { |
| 5356 | list *l1 = sa_list(sql->sa), *l2 = sa_list(sql->sa); |
| 5357 | |
| 5358 | rel_find_joins(sql, rel, rel->l, l1, 0); |
| 5359 | rel_find_joins(sql, rel, rel->r, l2, 0); |
| 5360 | |
| 5361 | if (list_length(l1) && list_length(l2)) { /* found joins on both */ |
| 5362 | for (node *n1 = l1->h ; n1; n1 = n1->next) { |
| 5363 | found_join *f1 = (found_join*) n1->data; |
| 5364 | for (node *n2 = l2->h ; n2; n2 = n2->next) { |
| 5365 | found_join *f2 = (found_join*) n2->data; |
| 5366 | sql_rel *j1 = f1->j, *j2 = f2->j, *j1_l = j1->l, *j1_r = j1->r, *j2_l = j2->l, *j2_r = j2->r; |
| 5367 | bool sides_equal = false; |
| 5368 | |
| 5369 | if (j1 != j2) { |
| 5370 | const char *j1_ln = rel_name(j1_l), *j1_rn = rel_name(j1_r), *j2_ln = rel_name(j2_l), *j2_rn = rel_name(j2_r); |
| 5371 | |
| 5372 | /* So far it looks on identical relations and common basetable relations */ |
| 5373 | if ((j1_l == j2_l || (is_basetable(j1_l->op) && is_basetable(j2_l->op) && strcmp(j1_ln, j2_ln) == 0 && j1_l->l == j2_l->l)) && |
| 5374 | (j1_r == j2_r || (is_basetable(j1_r->op) && is_basetable(j2_r->op) && strcmp(j1_rn, j2_rn) == 0 && j1_r->l == j2_r->l))) |
| 5375 | sides_equal = true; |
| 5376 | else if ((j1_l == j2_r || (is_basetable(j1_l->op) && is_basetable(j2_r->op) && strcmp(j1_ln, j2_rn) == 0 && j1_l->l == j2_r->l)) && |
| 5377 | (j1_r == j2_l || (is_basetable(j1_r->op) && is_basetable(j2_l->op) && strcmp(j1_rn, j2_ln) == 0 && j1_r->l == j2_l->l))) |
| 5378 | sides_equal = true; |
| 5379 | |
| 5380 | /* the left and right sides are equal */ |
| 5381 | if (sides_equal && exp_match_list(j1->exps, j2->exps)) { |
| 5382 | sql_rel *p2 = f2->p; |
| 5383 | |
| 5384 | if (p2->l == j2) {/* replace j2's parent join with j1 */ |
| 5385 | rel_destroy(p2->l); |
| 5386 | p2->l = rel_dup(j1); |
| 5387 | } else { |
| 5388 | rel_destroy(p2->r); |
| 5389 | p2->r = rel_dup(j1); |
| 5390 | } |
| 5391 | (*changes)++; |
| 5392 | return rel; |
| 5393 | } |
| 5394 | } |
| 5395 | } |
| 5396 | } |
| 5397 | } |
| 5398 | } |
| 5399 | return rel; |
| 5400 | } |
| 5401 | |
| 5402 | static sql_rel * |
| 5403 | rel_push_select_down_union(int *changes, mvc *sql, sql_rel *rel) |
| 5404 | { |
| 5405 | if (is_select(rel->op) && rel->l && rel->exps) { |
| 5406 | sql_rel *u = rel->l, *ou = u; |
| 5407 | sql_rel *s = rel; |
| 5408 | sql_rel *ul = u->l; |
| 5409 | sql_rel *ur = u->r; |
| 5410 | |
| 5411 | if (u->op == op_project) |
| 5412 | u = u->l; |
| 5413 | |
| 5414 | if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u)) |
| 5415 | return rel; |
| 5416 | |
| 5417 | ul = u->l; |
| 5418 | ur = u->r; |
| 5419 | |
| 5420 | rel->subquery = 0; |
| 5421 | u->subquery = 0; |
| 5422 | ul->subquery = 0; |
| 5423 | ur->subquery = 0; |
| 5424 | ul = rel_dup(ul); |
| 5425 | ur = rel_dup(ur); |
| 5426 | if (!is_project(ul->op)) |
| 5427 | ul = rel_project(sql->sa, ul, |
| 5428 | rel_projections(sql, ul, NULL, 1, 1)); |
| 5429 | if (!is_project(ur->op)) |
| 5430 | ur = rel_project(sql->sa, ur, |
| 5431 | rel_projections(sql, ur, NULL, 1, 1)); |
| 5432 | rel_rename_exps(sql, u->exps, ul->exps); |
| 5433 | rel_rename_exps(sql, u->exps, ur->exps); |
| 5434 | |
| 5435 | if (u != ou) { |
| 5436 | ul = rel_project(sql->sa, ul, NULL); |
| 5437 | ul->exps = exps_copy(sql, ou->exps); |
| 5438 | rel_rename_exps(sql, ou->exps, ul->exps); |
| 5439 | ur = rel_project(sql->sa, ur, NULL); |
| 5440 | ur->exps = exps_copy(sql, ou->exps); |
| 5441 | rel_rename_exps(sql, ou->exps, ur->exps); |
| 5442 | } |
| 5443 | |
| 5444 | /* introduce selects under the set (if needed) */ |
| 5445 | set_processed(ul); |
| 5446 | set_processed(ur); |
| 5447 | ul = rel_select(sql->sa, ul, NULL); |
| 5448 | ur = rel_select(sql->sa, ur, NULL); |
| 5449 | |
| 5450 | ul->exps = exps_copy(sql, s->exps); |
| 5451 | ur->exps = exps_copy(sql, s->exps); |
| 5452 | |
| 5453 | rel = rel_inplace_setop(rel, ul, ur, op_union, rel_projections(sql, rel, NULL, 1, 1)); |
| 5454 | (*changes)++; |
| 5455 | return rel; |
| 5456 | } |
| 5457 | return rel; |
| 5458 | } |
| 5459 | |
| 5460 | static sql_rel * |
| 5461 | rel_push_project_down_union(int *changes, mvc *sql, sql_rel *rel) |
| 5462 | { |
| 5463 | /* first remove distinct if already unique */ |
| 5464 | if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(sql, rel, rel->exps)) |
| 5465 | set_nodistinct(rel); |
| 5466 | |
| 5467 | if (rel->op == op_project && rel->l && rel->exps && !rel->r) { |
| 5468 | int need_distinct = need_distinct(rel); |
| 5469 | sql_rel *u = rel->l; |
| 5470 | sql_rel *p = rel; |
| 5471 | sql_rel *ul = u->l; |
| 5472 | sql_rel *ur = u->r; |
| 5473 | |
| 5474 | if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0)) |
| 5475 | return rel; |
| 5476 | /* don't push project down union of single values */ |
| 5477 | if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && !ur->l)) |
| 5478 | return rel; |
| 5479 | |
| 5480 | rel->subquery = 0; |
| 5481 | u->subquery = 0; |
| 5482 | ul = rel_dup(ul); |
| 5483 | ur = rel_dup(ur); |
| 5484 | |
| 5485 | if (!is_project(ul->op)) |
| 5486 | ul = rel_project(sql->sa, ul, |
| 5487 | rel_projections(sql, ul, NULL, 1, 1)); |
| 5488 | if (!is_project(ur->op)) |
| 5489 | ur = rel_project(sql->sa, ur, |
| 5490 | rel_projections(sql, ur, NULL, 1, 1)); |
| 5491 | need_distinct = (need_distinct && |
| 5492 | (!exps_unique(sql, ul, ul->exps) || |
| 5493 | !exps_unique(sql, ur, ur->exps))); |
| 5494 | rel_rename_exps(sql, u->exps, ul->exps); |
| 5495 | rel_rename_exps(sql, u->exps, ur->exps); |
| 5496 | |
| 5497 | /* introduce projects under the set */ |
| 5498 | ul = rel_project(sql->sa, ul, NULL); |
| 5499 | if (need_distinct) |
| 5500 | set_distinct(ul); |
| 5501 | ur = rel_project(sql->sa, ur, NULL); |
| 5502 | if (need_distinct) |
| 5503 | set_distinct(ur); |
| 5504 | |
| 5505 | ul->exps = exps_copy(sql, p->exps); |
| 5506 | ur->exps = exps_copy(sql, p->exps); |
| 5507 | |
| 5508 | rel = rel_inplace_setop(rel, ul, ur, op_union, |
| 5509 | rel_projections(sql, rel, NULL, 1, 1)); |
| 5510 | if (need_distinct) |
| 5511 | set_distinct(rel); |
| 5512 | (*changes)++; |
| 5513 | rel->l = rel_merge_projects(changes, sql, rel->l); |
| 5514 | rel->r = rel_merge_projects(changes, sql, rel->r); |
| 5515 | return rel; |
| 5516 | } |
| 5517 | return rel; |
| 5518 | } |
| 5519 | |
| 5520 | /* Compute the efficiency of using this expression early in a group by list */ |
| 5521 | static int |
| 5522 | score_gbe( mvc *sql, sql_rel *rel, sql_exp *e) |
| 5523 | { |
| 5524 | int res = 10; |
| 5525 | sql_subtype *t = exp_subtype(e); |
| 5526 | sql_column *c = NULL; |
| 5527 | |
| 5528 | /* can we find out if the underlying table is sorted */ |
| 5529 | if ( (c = exp_find_column(rel, e, -2)) != NULL) { |
| 5530 | if (mvc_is_sorted (sql, c)) |
| 5531 | res += 500; |
| 5532 | } |
| 5533 | |
| 5534 | /* is the column selective */ |
| 5535 | |
| 5536 | /* prefer the shorter var types over the longer onces */ |
| 5537 | if (!EC_FIXED(t->type->eclass) && t->digits) |
| 5538 | res -= t->digits; |
| 5539 | /* smallest type first */ |
| 5540 | if (EC_FIXED(t->type->eclass)) |
| 5541 | res -= t->type->eclass; |
| 5542 | return res; |
| 5543 | } |
| 5544 | |
| 5545 | /* reorder group by expressions */ |
| 5546 | static sql_rel * |
| 5547 | rel_groupby_order(int *changes, mvc *sql, sql_rel *rel) |
| 5548 | { |
| 5549 | list *gbe = rel->r; |
| 5550 | |
| 5551 | (void)*changes; |
| 5552 | if (is_groupby(rel->op) && list_length(gbe) > 1 && list_length(gbe)<9) { |
| 5553 | node *n; |
| 5554 | int i, *scores = calloc(list_length(gbe), sizeof(int)); |
| 5555 | |
| 5556 | for (i = 0, n = gbe->h; n; i++, n = n->next) |
| 5557 | scores[i] = score_gbe(sql, rel, n->data); |
| 5558 | rel->r = list_keysort(gbe, scores, (fdup)NULL); |
| 5559 | free(scores); |
| 5560 | } |
| 5561 | return rel; |
| 5562 | } |
| 5563 | |
| 5564 | |
| 5565 | /* reduce group by expressions based on pkey info |
| 5566 | * |
| 5567 | * The reduced group by and (derived) aggr expressions are restored via |
| 5568 | * extra (new) aggregate columns. |
| 5569 | */ |
| 5570 | static sql_rel * |
| 5571 | rel_reduce_groupby_exps(int *changes, mvc *sql, sql_rel *rel) |
| 5572 | { |
| 5573 | list *gbe = rel->r; |
| 5574 | |
| 5575 | if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) { |
| 5576 | node *n, *m; |
| 5577 | int8_t *scores = malloc(list_length(gbe)); |
| 5578 | int k, j, i; |
| 5579 | sql_column *c; |
| 5580 | sql_table **tbls; |
| 5581 | sql_rel **bts, *bt = NULL; |
| 5582 | |
| 5583 | gbe = rel->r; |
| 5584 | tbls = (sql_table**)malloc(sizeof(sql_table*)*list_length(gbe)); |
| 5585 | bts = (sql_rel**)malloc(sizeof(sql_rel*)*list_length(gbe)); |
| 5586 | if (scores == NULL || tbls == NULL || bts == NULL) { |
| 5587 | if (scores) |
| 5588 | free(scores); |
| 5589 | if (tbls) |
| 5590 | free(tbls); |
| 5591 | if (bts) |
| 5592 | free(bts); |
| 5593 | return NULL; |
| 5594 | } |
| 5595 | for (k = 0, i = 0, n = gbe->h; n; n = n->next, k++) { |
| 5596 | sql_exp *e = n->data; |
| 5597 | |
| 5598 | c = exp_find_column_(rel, e, -2, &bt); |
| 5599 | if (c) { |
| 5600 | for(j = 0; j < i; j++) |
| 5601 | if (c->t == tbls[j] && bts[j] == bt) |
| 5602 | break; |
| 5603 | tbls[j] = c->t; |
| 5604 | bts[j] = bt; |
| 5605 | i += (j == i); |
| 5606 | } |
| 5607 | } |
| 5608 | if (i) { /* forall tables find pkey and |
| 5609 | remove useless other columns */ |
| 5610 | /* TODO also remove group by columns which are related to |
| 5611 | * the other columns using a foreign-key join (n->1), ie 1 |
| 5612 | * on the to be removed side. |
| 5613 | */ |
| 5614 | for(j = 0; j < i; j++) { |
| 5615 | int l, nr = 0, cnr = 0; |
| 5616 | |
| 5617 | k = list_length(gbe); |
| 5618 | memset(scores, 0, list_length(gbe)); |
| 5619 | if (tbls[j]->pkey) { |
| 5620 | for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) { |
| 5621 | fcmp cmp = (fcmp)&kc_column_cmp; |
| 5622 | sql_exp *e = n->data; |
| 5623 | |
| 5624 | c = exp_find_column_(rel, e, -2, &bt); |
| 5625 | if (c && c->t == tbls[j] && bts[j] == bt && |
| 5626 | list_find(tbls[j]->pkey->k.columns, c, cmp) != NULL) { |
| 5627 | scores[l] = 1; |
| 5628 | nr ++; |
| 5629 | } else if (c && c->t == tbls[j] && bts[j] == bt) { |
| 5630 | /* Okay we can cleanup a group by column */ |
| 5631 | scores[l] = -1; |
| 5632 | cnr ++; |
| 5633 | } |
| 5634 | } |
| 5635 | } |
| 5636 | if (nr) { |
| 5637 | int all = (list_length(tbls[j]->pkey->k.columns) == nr); |
| 5638 | sql_kc *kc = tbls[j]->pkey->k.columns->h->data; |
| 5639 | |
| 5640 | c = kc->c; |
| 5641 | for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) { |
| 5642 | sql_exp *e = n->data; |
| 5643 | |
| 5644 | /* pkey based group by */ |
| 5645 | if (scores[l] == 1 && ((all || |
| 5646 | /* first of key */ |
| 5647 | (c == exp_find_column(rel, e, -2))) && !find_prop(e->p, PROP_HASHCOL))) |
| 5648 | e->p = prop_create(sql->sa, PROP_HASHCOL, e->p); |
| 5649 | } |
| 5650 | for (m = rel->exps->h; m; m = m->next ){ |
| 5651 | sql_exp *e = m->data; |
| 5652 | |
| 5653 | for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) { |
| 5654 | sql_exp *gb = n->data; |
| 5655 | |
| 5656 | /* pkey based group by */ |
| 5657 | if (scores[l] == 1 && exp_match_exp(e,gb) && find_prop(gb->p, PROP_HASHCOL) && !find_prop(e->p, PROP_HASHCOL)) { |
| 5658 | e->p = prop_create(sql->sa, PROP_HASHCOL, e->p); |
| 5659 | break; |
| 5660 | } |
| 5661 | |
| 5662 | } |
| 5663 | } |
| 5664 | } |
| 5665 | if (cnr && nr && list_length(tbls[j]->pkey->k.columns) == nr) { |
| 5666 | list *ngbe = new_exp_list(sql->sa); |
| 5667 | list *exps = rel->exps, *nexps = new_exp_list(sql->sa); |
| 5668 | |
| 5669 | for (l = 0, n = gbe->h; l < k && n; l++, n = n->next) { |
| 5670 | sql_exp *e = n->data; |
| 5671 | |
| 5672 | /* keep the group by columns which form a primary key |
| 5673 | * of this table. And those unrelated to this table. */ |
| 5674 | if (scores[l] != -1) |
| 5675 | append(ngbe, e); |
| 5676 | } |
| 5677 | rel->r = ngbe; |
| 5678 | /* rewrite gbe and aggr, in the aggr list */ |
| 5679 | for (m = exps->h; m; m = m->next ){ |
| 5680 | sql_exp *e = m->data; |
| 5681 | int fnd = 0; |
| 5682 | |
| 5683 | for (l = 0, n = gbe->h; l < k && n && !fnd; l++, n = n->next) { |
| 5684 | sql_exp *gb = n->data; |
| 5685 | |
| 5686 | if (scores[l] == -1 && exp_refers(gb, e)) { |
| 5687 | sql_exp *rs = exp_column(sql->sa, gb->l?gb->l:exp_relname(gb), gb->r?gb->r:exp_name(gb), exp_subtype(gb), rel->card, has_nil(gb), is_intern(gb)); |
| 5688 | exp_setname(sql->sa, rs, exp_find_rel_name(e), exp_name(e)); |
| 5689 | e = rs; |
| 5690 | fnd = 1; |
| 5691 | } |
| 5692 | } |
| 5693 | append(nexps, e); |
| 5694 | } |
| 5695 | /* new reduced aggr expression list */ |
| 5696 | assert(list_length(nexps)>0); |
| 5697 | rel->exps = nexps; |
| 5698 | /* only one reduction at a time */ |
| 5699 | *changes = 1; |
| 5700 | free(bts); |
| 5701 | free(tbls); |
| 5702 | free(scores); |
| 5703 | return rel; |
| 5704 | } |
| 5705 | gbe = rel->r; |
| 5706 | } |
| 5707 | } |
| 5708 | free(bts); |
| 5709 | free(tbls); |
| 5710 | free(scores); |
| 5711 | } |
| 5712 | /* remove constants from group by list */ |
| 5713 | if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) { |
| 5714 | int i; |
| 5715 | node *n; |
| 5716 | |
| 5717 | for (i = 0, n = gbe->h; n; n = n->next) { |
| 5718 | sql_exp *e = n->data; |
| 5719 | |
| 5720 | if (exp_is_atom(e)) |
| 5721 | i++; |
| 5722 | } |
| 5723 | if (i) { |
| 5724 | list *ngbe = new_exp_list(sql->sa); |
| 5725 | list *dgbe = new_exp_list(sql->sa); |
| 5726 | |
| 5727 | for (n = gbe->h; n; n = n->next) { |
| 5728 | sql_exp *e = n->data; |
| 5729 | |
| 5730 | if (!exp_is_atom(e)) |
| 5731 | append(ngbe, e); |
| 5732 | /* we need at least one gbe */ |
| 5733 | else if (!n->next && list_empty(ngbe)) |
| 5734 | append(ngbe, e); |
| 5735 | else |
| 5736 | append(dgbe, e); |
| 5737 | } |
| 5738 | rel->r = ngbe; |
| 5739 | if (!list_empty(dgbe)) { |
| 5740 | /* use atom's directly in the aggr expr list */ |
| 5741 | list *nexps = new_exp_list(sql->sa); |
| 5742 | |
| 5743 | for (n = rel->exps->h; n; n = n->next) { |
| 5744 | sql_exp *e = n->data, *ne = NULL; |
| 5745 | |
| 5746 | if (e->type == e_column) { |
| 5747 | if (e->l) |
| 5748 | ne = exps_bind_column2(dgbe, e->l, e->r); |
| 5749 | else |
| 5750 | ne = exps_bind_column(dgbe, e->r, NULL); |
| 5751 | if (ne) { |
| 5752 | ne = exp_copy(sql, ne); |
| 5753 | exp_prop_alias(sql->sa, ne, e); |
| 5754 | e = ne; |
| 5755 | } |
| 5756 | } |
| 5757 | append(nexps, e); |
| 5758 | } |
| 5759 | rel->exps = nexps; |
| 5760 | (*changes)++; |
| 5761 | } |
| 5762 | } |
| 5763 | } |
| 5764 | return rel; |
| 5765 | } |
| 5766 | |
| 5767 | /* Rewrite group by expressions with distinct |
| 5768 | * |
| 5769 | * ie select a, count(distinct b) from c where ... groupby a; |
| 5770 | * No other aggregations should be present |
| 5771 | * |
| 5772 | * Rewrite the more general case, good for parallel execution |
| 5773 | * |
| 5774 | * groupby(R) [e,f] [ aggr1 a distinct, aggr2 b distinct, aggr3 c, aggr4 d] |
| 5775 | * |
| 5776 | * into |
| 5777 | * |
| 5778 | * groupby( |
| 5779 | * groupby(R) [e,f,a,b] [ a, b, aggr3 c, aggr4 d] |
| 5780 | * ) [e,f]( aggr1 a distinct, aggr2 b distinct, aggr3_phase2 c, aggr4_phase2 d) |
| 5781 | */ |
| 5782 | |
| 5783 | #if 0 |
| 5784 | static sql_rel * |
| 5785 | rel_groupby_distinct2(int *changes, mvc *sql, sql_rel *rel) |
| 5786 | { |
| 5787 | list *ngbes = sa_list(sql->sa), *gbes, *naggrs = sa_list(sql->sa), *aggrs = sa_list(sql->sa); |
| 5788 | sql_rel *l; |
| 5789 | node *n; |
| 5790 | |
| 5791 | gbes = rel->r; |
| 5792 | if (!gbes) |
| 5793 | return rel; |
| 5794 | |
| 5795 | /* check if each aggr is, rewritable (max,min,sum,count) |
| 5796 | * and only has one argument */ |
| 5797 | for (n = rel->exps->h; n; n = n->next) { |
| 5798 | sql_exp *e = n->data; |
| 5799 | sql_subaggr *af = e->f; |
| 5800 | |
| 5801 | if (e->type == e_aggr && |
| 5802 | (strcmp(af->aggr->base.name, "sum" ) && |
| 5803 | strcmp(af->aggr->base.name, "count" ) && |
| 5804 | strcmp(af->aggr->base.name, "min" ) && |
| 5805 | strcmp(af->aggr->base.name, "max" ))) |
| 5806 | return rel; |
| 5807 | } |
| 5808 | |
| 5809 | for (n = gbes->h; n; n = n->next) { |
| 5810 | sql_exp *e = n->data; |
| 5811 | |
| 5812 | e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 5813 | append(ngbes, e); |
| 5814 | } |
| 5815 | |
| 5816 | /* 1 for each aggr(distinct v) add the attribute expression v to gbes and aggrs list |
| 5817 | * 2 for each aggr(z) add aggr_phase2('z') to the naggrs list |
| 5818 | * 3 for each group by col, add also to the naggrs list |
| 5819 | * */ |
| 5820 | for (n = rel->exps->h; n; n = n->next) { |
| 5821 | sql_exp *e = n->data; |
| 5822 | |
| 5823 | if (e->type == e_aggr && need_distinct(e)) { /* 1 */ |
| 5824 | /* need column expression */ |
| 5825 | list *args = e->l; |
| 5826 | sql_exp *v = args->h->data; |
| 5827 | append(gbes, v); |
| 5828 | if (!exp_name(v)) |
| 5829 | exp_label(sql->sa, v, ++sql->label); |
| 5830 | v = exp_column(sql->sa, exp_find_rel_name(v), exp_name(v), exp_subtype(v), v->card, has_nil(v), is_intern(v)); |
| 5831 | append(aggrs, v); |
| 5832 | v = exp_aggr1(sql->sa, v, e->f, need_distinct(e), 1, e->card, 1); |
| 5833 | exp_setname(sql->sa, v, exp_find_rel_name(e), exp_name(e)); |
| 5834 | append(naggrs, v); |
| 5835 | } else if (e->type == e_aggr && !need_distinct(e)) { |
| 5836 | sql_exp *v; |
| 5837 | sql_subaggr *f = e->f; |
| 5838 | int cnt = exp_aggr_is_count(e); |
| 5839 | sql_subaggr *a = sql_bind_aggr(sql->sa, sql->session->schema, (cnt)?"sum" :f->aggr->base.name, exp_subtype(e)); |
| 5840 | |
| 5841 | append(aggrs, e); |
| 5842 | if (!exp_name(e)) |
| 5843 | exp_label(sql->sa, e, ++sql->label); |
| 5844 | set_has_nil(e); |
| 5845 | v = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 5846 | v = exp_aggr1(sql->sa, v, a, 0, 1, e->card, 1); |
| 5847 | if (cnt) |
| 5848 | set_zero_if_empty(v); |
| 5849 | exp_setname(sql->sa, v, exp_find_rel_name(e), exp_name(e)); |
| 5850 | append(naggrs, v); |
| 5851 | } else { /* group by col */ |
| 5852 | if (list_find_exp(gbes, e) || !list_find_exp(naggrs, e)) { |
| 5853 | append(aggrs, e); |
| 5854 | |
| 5855 | e = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 5856 | } |
| 5857 | append(naggrs, e); |
| 5858 | } |
| 5859 | } |
| 5860 | |
| 5861 | l = rel->l = rel_groupby(sql, rel->l, gbes); |
| 5862 | l->exps = aggrs; |
| 5863 | rel->r = ngbes; |
| 5864 | rel->exps = naggrs; |
| 5865 | (*changes)++; |
| 5866 | return rel; |
| 5867 | } |
| 5868 | #endif |
| 5869 | |
| 5870 | static sql_rel * |
| 5871 | rel_groupby_distinct(int *changes, mvc *sql, sql_rel *rel) |
| 5872 | { |
| 5873 | if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_empty(rel->r)) { |
| 5874 | node *n; |
| 5875 | |
| 5876 | for (n = rel->exps->h; n; n = n->next) { |
| 5877 | sql_exp *e = n->data; |
| 5878 | |
| 5879 | if (exp_aggr_is_count(e) && need_distinct(e)) { |
| 5880 | /* if count over unique values (ukey/pkey) */ |
| 5881 | if (e->l && exps_unique(sql, rel, e->l)) |
| 5882 | set_nodistinct(e); |
| 5883 | } |
| 5884 | } |
| 5885 | } |
| 5886 | |
| 5887 | if (is_groupby(rel->op)) { |
| 5888 | sql_rel *l = rel->l; |
| 5889 | if (!l || is_groupby(l->op)) |
| 5890 | return rel; |
| 5891 | } |
| 5892 | if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) { |
| 5893 | node *n; |
| 5894 | int nr = 0; |
| 5895 | list *gbe, *ngbe, *arg, *exps, *nexps; |
| 5896 | sql_exp *distinct = NULL, *darg; |
| 5897 | sql_rel *l = NULL; |
| 5898 | |
| 5899 | for (n=rel->exps->h; n && nr <= 2; n = n->next) { |
| 5900 | sql_exp *e = n->data; |
| 5901 | if (need_distinct(e)) { |
| 5902 | distinct = n->data; |
| 5903 | nr++; |
| 5904 | } |
| 5905 | } |
| 5906 | if (nr < 1 || distinct->type != e_aggr) |
| 5907 | return rel; |
| 5908 | if ((nr > 1 || list_length(rel->r) + nr != list_length(rel->exps))) |
| 5909 | return rel;//rel_groupby_distinct2(changes, sql, rel); |
| 5910 | arg = distinct->l; |
| 5911 | if (list_length(arg) != 1 || list_length(rel->r) + nr != list_length(rel->exps)) |
| 5912 | return rel; |
| 5913 | |
| 5914 | gbe = rel->r; |
| 5915 | ngbe = sa_list(sql->sa); |
| 5916 | exps = sa_list(sql->sa); |
| 5917 | nexps = sa_list(sql->sa); |
| 5918 | for (n=rel->exps->h; n; n = n->next) { |
| 5919 | sql_exp *e = n->data; |
| 5920 | if (e != distinct) { |
| 5921 | e = exp_ref(sql->sa, e); |
| 5922 | append(ngbe, e); |
| 5923 | append(exps, e); |
| 5924 | e = exp_ref(sql->sa, e); |
| 5925 | append(nexps, e); |
| 5926 | } |
| 5927 | } |
| 5928 | |
| 5929 | darg = arg->h->data; |
| 5930 | list_append(gbe, darg = exp_copy(sql, darg)); |
| 5931 | exp_label(sql->sa, darg, ++sql->label); |
| 5932 | |
| 5933 | darg = exp_ref(sql->sa, darg); |
| 5934 | list_append(exps, darg); |
| 5935 | darg = exp_ref(sql->sa, darg); |
| 5936 | arg->h->data = darg; |
| 5937 | l = rel->l = rel_groupby(sql, rel->l, gbe); |
| 5938 | l->exps = exps; |
| 5939 | rel->r = ngbe; |
| 5940 | rel->exps = nexps; |
| 5941 | set_nodistinct(distinct); |
| 5942 | append(nexps, distinct); |
| 5943 | (*changes)++; |
| 5944 | } |
| 5945 | return rel; |
| 5946 | } |
| 5947 | |
| 5948 | static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e); |
| 5949 | |
| 5950 | static void |
| 5951 | list_split_aggr_and_project(mvc *sql, list *aexps, list *exps) |
| 5952 | { |
| 5953 | node *n; |
| 5954 | |
| 5955 | if (!exps) |
| 5956 | return ; |
| 5957 | for(n = exps->h; n; n = n->next) |
| 5958 | n->data = split_aggr_and_project(sql, aexps, n->data); |
| 5959 | } |
| 5960 | |
| 5961 | static sql_exp * |
| 5962 | split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e) |
| 5963 | { |
| 5964 | switch(e->type) { |
| 5965 | case e_aggr: |
| 5966 | /* add to the aggrs */ |
| 5967 | if (!exp_name(e)) |
| 5968 | exp_label(sql->sa, e, ++sql->label); |
| 5969 | list_append(aexps, e); |
| 5970 | return exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 5971 | case e_cmp: |
| 5972 | /* e_cmp's shouldn't exist in an aggr expression list */ |
| 5973 | assert(0); |
| 5974 | case e_convert: |
| 5975 | e->l = split_aggr_and_project(sql, aexps, e->l); |
| 5976 | return e; |
| 5977 | case e_func: |
| 5978 | list_split_aggr_and_project(sql, aexps, e->l); |
| 5979 | return e; |
| 5980 | case e_column: /* constants and columns shouldn't be rewriten */ |
| 5981 | case e_atom: |
| 5982 | case e_psm: |
| 5983 | return e; |
| 5984 | } |
| 5985 | return NULL; |
| 5986 | } |
| 5987 | |
| 5988 | static sql_exp * |
| 5989 | exp_use_consts(mvc *sql, sql_exp *e, list *consts); |
| 5990 | |
| 5991 | static list * |
| 5992 | exps_use_consts(mvc *sql, list *exps, list *consts) |
| 5993 | { |
| 5994 | node *n; |
| 5995 | list *nl = new_exp_list(sql->sa); |
| 5996 | |
| 5997 | if (!exps) |
| 5998 | return sa_list(sql->sa); |
| 5999 | for(n = exps->h; n; n = n->next) { |
| 6000 | sql_exp *arg = n->data, *narg = NULL; |
| 6001 | |
| 6002 | narg = exp_use_consts(sql, arg, consts); |
| 6003 | if (!narg) |
| 6004 | return NULL; |
| 6005 | narg = exp_propagate(sql->sa, narg, arg); |
| 6006 | append(nl, narg); |
| 6007 | } |
| 6008 | return nl; |
| 6009 | } |
| 6010 | |
| 6011 | static sql_exp * |
| 6012 | exp_use_consts(mvc *sql, sql_exp *e, list *consts) |
| 6013 | { |
| 6014 | sql_exp *ne = NULL, *l, *r, *r2; |
| 6015 | |
| 6016 | switch(e->type) { |
| 6017 | case e_column: |
| 6018 | if (e->l) |
| 6019 | ne = exps_bind_column2(consts, e->l, e->r); |
| 6020 | if (!ne && !e->l) |
| 6021 | ne = exps_bind_column(consts, e->r, NULL); |
| 6022 | if (!ne) |
| 6023 | return e; |
| 6024 | return ne; |
| 6025 | case e_cmp: |
| 6026 | if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) { |
| 6027 | list *l = exps_use_consts(sql, e->l, consts); |
| 6028 | list *r = exps_use_consts(sql, e->r, consts); |
| 6029 | |
| 6030 | if (!l || !r) |
| 6031 | return NULL; |
| 6032 | if (get_cmp(e) == cmp_filter) |
| 6033 | return exp_filter(sql->sa, l, r, e->f, is_anti(e)); |
| 6034 | return exp_or(sql->sa, l, r, is_anti(e)); |
| 6035 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 6036 | sql_exp *l = exp_use_consts(sql, e->l, consts); |
| 6037 | list *r = exps_use_consts(sql, e->r, consts); |
| 6038 | |
| 6039 | if (!l || !r) |
| 6040 | return NULL; |
| 6041 | return exp_in(sql->sa, l, r, e->flag); |
| 6042 | } else { |
| 6043 | l = exp_use_consts(sql, e->l, consts); |
| 6044 | r = exp_use_consts(sql, e->r, consts); |
| 6045 | if (e->f) { |
| 6046 | r2 = exp_use_consts(sql, e->f, consts); |
| 6047 | if (l && r && r2) |
| 6048 | ne = exp_compare2(sql->sa, l, r, r2, e->flag); |
| 6049 | } else if (l && r) { |
| 6050 | ne = exp_compare(sql->sa, l, r, e->flag); |
| 6051 | } |
| 6052 | } |
| 6053 | if (!ne) |
| 6054 | return NULL; |
| 6055 | return exp_propagate(sql->sa, ne, e); |
| 6056 | case e_convert: |
| 6057 | l = exp_use_consts(sql, e->l, consts); |
| 6058 | if (l) |
| 6059 | return exp_convert(sql->sa, l, exp_fromtype(e), exp_totype(e)); |
| 6060 | return NULL; |
| 6061 | case e_aggr: |
| 6062 | case e_func: { |
| 6063 | list *l = e->l, *nl = NULL; |
| 6064 | |
| 6065 | if (!l) { |
| 6066 | return e; |
| 6067 | } else { |
| 6068 | nl = exps_use_consts(sql, l, consts); |
| 6069 | if (!nl) |
| 6070 | return NULL; |
| 6071 | } |
| 6072 | if (e->type == e_func) |
| 6073 | return exp_op(sql->sa, nl, e->f); |
| 6074 | else |
| 6075 | return exp_aggr(sql->sa, nl, e->f, need_distinct(e), need_no_nil(e), e->card, has_nil(e)); |
| 6076 | } |
| 6077 | case e_atom: |
| 6078 | case e_psm: |
| 6079 | return e; |
| 6080 | } |
| 6081 | return NULL; |
| 6082 | } |
| 6083 | |
| 6084 | static list * |
| 6085 | exps_remove_dictexps(mvc *sql, list *exps, sql_rel *r) |
| 6086 | { |
| 6087 | node *n; |
| 6088 | list *nl = new_exp_list(sql->sa); |
| 6089 | |
| 6090 | if (!exps) |
| 6091 | return nl; |
| 6092 | for(n = exps->h; n; n = n->next) { |
| 6093 | sql_exp *arg = n->data; |
| 6094 | |
| 6095 | if (!list_find_exp(r->exps, arg->l) && !list_find_exp(r->exps, arg->r)) |
| 6096 | append(nl, arg); |
| 6097 | } |
| 6098 | return nl; |
| 6099 | } |
| 6100 | |
| 6101 | static sql_rel * |
| 6102 | rel_remove_join(int *changes, mvc *sql, sql_rel *rel) |
| 6103 | { |
| 6104 | if (is_join(rel->op) && !is_outerjoin(rel->op)) { |
| 6105 | sql_rel *l = rel->l; |
| 6106 | sql_rel *r = rel->r; |
| 6107 | int lconst = 0, rconst = 0; |
| 6108 | |
| 6109 | if (!l || rel_is_ref(l) || !r || rel_is_ref(r) || |
| 6110 | (l->op != op_project && r->op != op_project)) |
| 6111 | return rel; |
| 6112 | if (l->op == op_project && exps_are_atoms(l->exps)) |
| 6113 | lconst = 1; |
| 6114 | if (r->op == op_project && exps_are_atoms(r->exps)) |
| 6115 | rconst = 1; |
| 6116 | if (lconst || rconst) { |
| 6117 | (*changes)++; |
| 6118 | /* use constant (instead of alias) in expressions */ |
| 6119 | if (lconst) { |
| 6120 | sql_rel *s = l; |
| 6121 | l = r; |
| 6122 | r = s; |
| 6123 | } |
| 6124 | rel->exps = exps_use_consts(sql, rel->exps, r->exps); |
| 6125 | /* change into select */ |
| 6126 | rel->op = op_select; |
| 6127 | rel->l = l; |
| 6128 | rel->r = NULL; |
| 6129 | /* wrap in a project including, the constant columns */ |
| 6130 | l->subquery = 0; |
| 6131 | rel = rel_project(sql->sa, rel, rel_projections(sql, l, NULL, 1, 1)); |
| 6132 | list_merge(rel->exps, r->exps, (fdup)NULL); |
| 6133 | } |
| 6134 | } |
| 6135 | if (is_join(rel->op)) { |
| 6136 | sql_rel *l = rel->l; |
| 6137 | sql_rel *r = rel->r; |
| 6138 | int ldict = 0, rdict = 0; |
| 6139 | |
| 6140 | if (!l || rel_is_ref(l) || !r || rel_is_ref(r) || |
| 6141 | (l->op != op_basetable && r->op != op_basetable)) |
| 6142 | return rel; |
| 6143 | /* check if dict (last column) isn't used, one column only */ |
| 6144 | if (l->op == op_basetable && !l->l && list_length(l->exps) <= 1) |
| 6145 | ldict = 1; |
| 6146 | if (r->op == op_basetable && !r->l && list_length(r->exps) <= 1) |
| 6147 | rdict = 1; |
| 6148 | if (!ldict && !rdict) |
| 6149 | return rel; |
| 6150 | (*changes)++; |
| 6151 | |
| 6152 | assert(0); |
| 6153 | if (ldict) { |
| 6154 | sql_rel *s = l; |
| 6155 | l = r; |
| 6156 | r = s; |
| 6157 | } |
| 6158 | rel->exps = exps_remove_dictexps(sql, rel->exps, r); |
| 6159 | /* change into select */ |
| 6160 | rel->op = op_select; |
| 6161 | rel->l = l; |
| 6162 | rel->r = NULL; |
| 6163 | /* wrap in a project including, the dict/index columns */ |
| 6164 | l->subquery = 0; |
| 6165 | rel = rel_project(sql->sa, rel, rel_projections(sql, l, NULL, 1, 1)); |
| 6166 | list_merge(rel->exps, r->exps, (fdup)NULL); |
| 6167 | } |
| 6168 | /* project (join (A,B)[ A.x = B.y ] ) [project_cols] -> project (A) [project_cols] |
| 6169 | * where non of the project_cols are from B and x=y is a foreign key join (B is the unique side) |
| 6170 | * and there are no filters on B |
| 6171 | */ |
| 6172 | if (is_project(rel->op)) { |
| 6173 | sql_rel *j = rel->l; |
| 6174 | |
| 6175 | if (is_join(j->op)) { |
| 6176 | node *n; |
| 6177 | sql_rel *l = j->l; |
| 6178 | sql_rel *r = j->r; |
| 6179 | |
| 6180 | if (!l || rel_is_ref(l) || !r || rel_is_ref(r) || r->op != op_basetable || r->l) |
| 6181 | return rel; |
| 6182 | |
| 6183 | /* check if all projection cols can be found in l */ |
| 6184 | for(n = rel->exps->h; n; n = n->next) { |
| 6185 | sql_exp *e = n->data; |
| 6186 | |
| 6187 | if (!rel_find_exp(l, e)) |
| 6188 | return rel; |
| 6189 | |
| 6190 | } |
| 6191 | assert(0); |
| 6192 | (*changes)++; |
| 6193 | rel->l = l; |
| 6194 | rel->r = NULL; |
| 6195 | l->subquery = 0; |
| 6196 | } |
| 6197 | } |
| 6198 | return rel; |
| 6199 | } |
| 6200 | |
| 6201 | /* Pushing projects up the tree. Done very early in the optimizer. |
| 6202 | * Makes later steps easier. |
| 6203 | */ |
| 6204 | static sql_rel * |
| 6205 | rel_push_project_up(int *changes, mvc *sql, sql_rel *rel) |
| 6206 | { |
| 6207 | /* project/project cleanup is done later */ |
| 6208 | if (is_join(rel->op) || is_select(rel->op)) { |
| 6209 | node *n; |
| 6210 | list *exps = NULL, *l_exps, *r_exps; |
| 6211 | sql_rel *l = rel->l; |
| 6212 | sql_rel *r = rel->r; |
| 6213 | sql_rel *t; |
| 6214 | |
| 6215 | /* Don't rewrite refs, non projections or constant or |
| 6216 | order by projections */ |
| 6217 | if (!l || rel_is_ref(l) || |
| 6218 | (is_join(rel->op) && (!r || rel_is_ref(r))) || |
| 6219 | (is_select(rel->op) && l->op != op_project) || |
| 6220 | (is_join(rel->op) && l->op != op_project && r->op != op_project) || |
| 6221 | ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op)))) || |
| 6222 | (is_join(rel->op) && (is_subquery(r) || |
| 6223 | (r->op == op_project && (!r->l || r->r || project_unsafe(r,0))))))) |
| 6224 | return rel; |
| 6225 | |
| 6226 | if (l->op == op_project && l->l) { |
| 6227 | /* Go through the list of project expressions. |
| 6228 | Check if they can be pushed up, ie are they not |
| 6229 | changing or introducing any columns used |
| 6230 | by the upper operator. */ |
| 6231 | |
| 6232 | exps = new_exp_list(sql->sa); |
| 6233 | for (n = l->exps->h; n; n = n->next) { |
| 6234 | sql_exp *e = n->data; |
| 6235 | |
| 6236 | /* we cannot rewrite projection with atomic values from outer joins */ |
| 6237 | if (is_column(e->type) && exp_is_atom(e) && !(is_right(rel->op) || is_full(rel->op))) { |
| 6238 | list_append(exps, e); |
| 6239 | } else if (e->type == e_column) { |
| 6240 | if (has_label(e)) |
| 6241 | return rel; |
| 6242 | list_append(exps, e); |
| 6243 | } else { |
| 6244 | return rel; |
| 6245 | } |
| 6246 | } |
| 6247 | } else { |
| 6248 | exps = rel_projections(sql, l, NULL, 1, 1); |
| 6249 | } |
| 6250 | /* also handle right hand of join */ |
| 6251 | if (is_join(rel->op) && r->op == op_project && r->l) { |
| 6252 | /* Here we also check all expressions of r like above |
| 6253 | but also we need to check for ambigious names. */ |
| 6254 | |
| 6255 | for (n = r->exps->h; n; n = n->next) { |
| 6256 | sql_exp *e = n->data; |
| 6257 | |
| 6258 | /* we cannot rewrite projection with atomic values from outer joins */ |
| 6259 | if (is_column(e->type) && exp_is_atom(e) && !(is_left(rel->op) || is_full(rel->op))) { |
| 6260 | list_append(exps, e); |
| 6261 | } else if (e->type == e_column) { |
| 6262 | if (has_label(e)) |
| 6263 | return rel; |
| 6264 | list_append(exps, e); |
| 6265 | } else { |
| 6266 | return rel; |
| 6267 | } |
| 6268 | } |
| 6269 | } else if (is_join(rel->op)) { |
| 6270 | list *r_exps = rel_projections(sql, r, NULL, 1, 2); |
| 6271 | |
| 6272 | list_merge(exps, r_exps, (fdup)NULL); |
| 6273 | } |
| 6274 | /* Here we should check for ambigious names ? */ |
| 6275 | if (is_join(rel->op) && r) { |
| 6276 | t = (l->op == op_project && l->l)?l->l:l; |
| 6277 | l_exps = rel_projections(sql, t, NULL, 1, 1); |
| 6278 | /* conflict with old right expressions */ |
| 6279 | r_exps = rel_projections(sql, r, NULL, 1, 1); |
| 6280 | for(n = l_exps->h; n; n = n->next) { |
| 6281 | sql_exp *e = n->data; |
| 6282 | const char *rname = exp_relname(e); |
| 6283 | const char *name = exp_name(e); |
| 6284 | |
| 6285 | if (exp_is_atom(e)) |
| 6286 | continue; |
| 6287 | if ((rname && exps_bind_column2(r_exps, rname, name) != NULL) || |
| 6288 | (!rname && exps_bind_column(r_exps, name, NULL) != NULL)) |
| 6289 | return rel; |
| 6290 | } |
| 6291 | t = (r->op == op_project && r->l)?r->l:r; |
| 6292 | r_exps = rel_projections(sql, t, NULL, 1, 1); |
| 6293 | /* conflict with new right expressions */ |
| 6294 | for(n = l_exps->h; n; n = n->next) { |
| 6295 | sql_exp *e = n->data; |
| 6296 | |
| 6297 | if (exp_is_atom(e)) |
| 6298 | continue; |
| 6299 | if ((e->l && exps_bind_column2(r_exps, e->l, e->r) != NULL) || |
| 6300 | (exps_bind_column(r_exps, e->r, NULL) != NULL && (!e->l || !e->r))) |
| 6301 | return rel; |
| 6302 | } |
| 6303 | /* conflict with new left expressions */ |
| 6304 | for(n = r_exps->h; n; n = n->next) { |
| 6305 | sql_exp *e = n->data; |
| 6306 | |
| 6307 | if (exp_is_atom(e)) |
| 6308 | continue; |
| 6309 | if ((e->l && exps_bind_column2(l_exps, e->l, e->r) != NULL) || |
| 6310 | (exps_bind_column(l_exps, e->r, NULL) != NULL && (!e->l || !e->r))) |
| 6311 | return rel; |
| 6312 | } |
| 6313 | } |
| 6314 | |
| 6315 | /* rename operator expressions */ |
| 6316 | if (l->op == op_project) { |
| 6317 | /* rewrite rel from rel->l into rel->l->l */ |
| 6318 | if (rel->exps) { |
| 6319 | list *nexps = new_exp_list(sql->sa); |
| 6320 | |
| 6321 | for (n = rel->exps->h; n; n = n->next) { |
| 6322 | sql_exp *e = n->data; |
| 6323 | |
| 6324 | e = exp_rename(sql, e, l, l->l); |
| 6325 | assert(e); |
| 6326 | list_append(nexps, e); |
| 6327 | } |
| 6328 | rel->exps = nexps; |
| 6329 | } |
| 6330 | rel->l = l->l; |
| 6331 | l->l = NULL; |
| 6332 | rel_destroy(l); |
| 6333 | } |
| 6334 | if (is_join(rel->op) && r->op == op_project) { |
| 6335 | /* rewrite rel from rel->r into rel->r->l */ |
| 6336 | if (rel->exps) { |
| 6337 | list *nexps = new_exp_list(sql->sa); |
| 6338 | |
| 6339 | for (n = rel->exps->h; n; n = n->next) { |
| 6340 | sql_exp *e = n->data; |
| 6341 | |
| 6342 | e = exp_rename(sql, e, r, r->l); |
| 6343 | assert(e); |
| 6344 | list_append(nexps, e); |
| 6345 | } |
| 6346 | rel->exps = nexps; |
| 6347 | } |
| 6348 | rel->r = r->l; |
| 6349 | r->l = NULL; |
| 6350 | rel_destroy(r); |
| 6351 | } |
| 6352 | /* Done, ie introduce new project */ |
| 6353 | exps_fix_card(exps, rel->card); |
| 6354 | (*changes)++; |
| 6355 | return rel_inplace_project(sql->sa, rel, NULL, exps); |
| 6356 | } |
| 6357 | if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && list_length(rel->exps) > 1) { |
| 6358 | node *n; |
| 6359 | int fnd = 0; |
| 6360 | list *aexps, *pexps; |
| 6361 | |
| 6362 | /* check if some are expressions aren't e_aggr */ |
| 6363 | for (n = rel->exps->h; n && !fnd; n = n->next) { |
| 6364 | sql_exp *e = n->data; |
| 6365 | |
| 6366 | if (e->type != e_aggr && e->type != e_column && e->type != e_atom) { |
| 6367 | fnd = 1; |
| 6368 | } |
| 6369 | } |
| 6370 | /* only aggr, no rewrite needed */ |
| 6371 | if (!fnd) |
| 6372 | return rel; |
| 6373 | |
| 6374 | aexps = sa_list(sql->sa); |
| 6375 | pexps = sa_list(sql->sa); |
| 6376 | for (n = rel->exps->h; n; n = n->next) { |
| 6377 | sql_exp *e = n->data, *ne = NULL; |
| 6378 | |
| 6379 | switch (e->type) { |
| 6380 | case e_atom: /* move over to the projection */ |
| 6381 | list_append(pexps, e); |
| 6382 | break; |
| 6383 | case e_func: |
| 6384 | list_append(pexps, e); |
| 6385 | list_split_aggr_and_project(sql, aexps, e->l); |
| 6386 | break; |
| 6387 | case e_convert: |
| 6388 | list_append(pexps, e); |
| 6389 | e->l = split_aggr_and_project(sql, aexps, e->l); |
| 6390 | break; |
| 6391 | default: /* simple alias */ |
| 6392 | list_append(aexps, e); |
| 6393 | ne = exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_intern(e)); |
| 6394 | list_append(pexps, ne); |
| 6395 | break; |
| 6396 | } |
| 6397 | } |
| 6398 | (*changes)++; |
| 6399 | rel->exps = aexps; |
| 6400 | return rel_inplace_project( sql->sa, rel, NULL, pexps); |
| 6401 | } |
| 6402 | return rel; |
| 6403 | } |
| 6404 | |
| 6405 | static int exp_mark_used(sql_rel *subrel, sql_exp *e); |
| 6406 | |
| 6407 | static int |
| 6408 | exps_mark_used(sql_rel *subrel, list *l) |
| 6409 | { |
| 6410 | int nr = 0; |
| 6411 | if (list_empty(l)) |
| 6412 | return nr; |
| 6413 | |
| 6414 | for (node *n = l->h; n != NULL; n = n->next) |
| 6415 | nr += exp_mark_used(subrel, n->data); |
| 6416 | return nr; |
| 6417 | } |
| 6418 | |
| 6419 | static int |
| 6420 | exp_mark_used(sql_rel *subrel, sql_exp *e) |
| 6421 | { |
| 6422 | int nr = 0; |
| 6423 | sql_exp *ne = NULL; |
| 6424 | |
| 6425 | switch(e->type) { |
| 6426 | case e_column: |
| 6427 | ne = rel_find_exp(subrel, e); |
| 6428 | break; |
| 6429 | case e_convert: |
| 6430 | return exp_mark_used(subrel, e->l); |
| 6431 | case e_aggr: |
| 6432 | case e_func: { |
| 6433 | if (e->l) |
| 6434 | nr += exps_mark_used(subrel, e->l); |
| 6435 | break; |
| 6436 | } |
| 6437 | case e_cmp: |
| 6438 | if (get_cmp(e) == cmp_or || get_cmp(e) == cmp_filter) { |
| 6439 | nr += exps_mark_used(subrel, e->l); |
| 6440 | nr += exps_mark_used(subrel, e->r); |
| 6441 | } else if (e->flag == cmp_in || e->flag == cmp_notin) { |
| 6442 | nr += exp_mark_used(subrel, e->l); |
| 6443 | nr += exps_mark_used(subrel, e->r); |
| 6444 | } else { |
| 6445 | nr += exp_mark_used(subrel, e->l); |
| 6446 | nr += exp_mark_used(subrel, e->r); |
| 6447 | if (e->f) |
| 6448 | nr += exp_mark_used(subrel, e->f); |
| 6449 | } |
| 6450 | break; |
| 6451 | case e_atom: |
| 6452 | /* atoms are used in e_cmp */ |
| 6453 | e->used = 1; |
| 6454 | /* return 0 as constants may require a full column ! */ |
| 6455 | if (e->f) |
| 6456 | nr += exps_mark_used(subrel, e->f); |
| 6457 | return nr; |
| 6458 | case e_psm: |
| 6459 | e->used = 1; |
| 6460 | break; |
| 6461 | } |
| 6462 | if (ne) { |
| 6463 | ne->used = 1; |
| 6464 | return ne->used; |
| 6465 | } |
| 6466 | return nr; |
| 6467 | } |
| 6468 | |
| 6469 | static void |
| 6470 | positional_exps_mark_used( sql_rel *rel, sql_rel *subrel ) |
| 6471 | { |
| 6472 | assert(rel->exps); |
| 6473 | |
| 6474 | if ((is_topn(subrel->op) || is_sample(subrel->op)) && subrel->l) |
| 6475 | subrel = subrel->l; |
| 6476 | /* everything is used within the set operation */ |
| 6477 | if (rel->exps && subrel->exps) { |
| 6478 | node *m; |
| 6479 | for (m=subrel->exps->h; m; m = m->next) { |
| 6480 | sql_exp *se = m->data; |
| 6481 | |
| 6482 | se->used = 1; |
| 6483 | } |
| 6484 | } |
| 6485 | } |
| 6486 | |
| 6487 | static void |
| 6488 | rel_exps_mark_used(sql_allocator *sa, sql_rel *rel, sql_rel *subrel) |
| 6489 | { |
| 6490 | int nr = 0; |
| 6491 | |
| 6492 | if (rel->r && (rel->op == op_project || rel->op == op_groupby)) { |
| 6493 | list *l = rel->r; |
| 6494 | node *n; |
| 6495 | |
| 6496 | for (n=l->h; n; n = n->next) { |
| 6497 | sql_exp *e = n->data; |
| 6498 | |
| 6499 | exp_mark_used(rel, e); |
| 6500 | } |
| 6501 | } |
| 6502 | |
| 6503 | if (rel->exps) { |
| 6504 | node *n; |
| 6505 | int len = list_length(rel->exps), i; |
| 6506 | sql_exp **exps = SA_NEW_ARRAY(sa, sql_exp*, len); |
| 6507 | |
| 6508 | for (n=rel->exps->h, i = 0; n; n = n->next, i++) { |
| 6509 | sql_exp *e = exps[i] = n->data; |
| 6510 | |
| 6511 | nr += e->used; |
| 6512 | } |
| 6513 | |
| 6514 | if (!nr && is_project(rel->op)) /* project atleast one column */ |
| 6515 | exps[0]->used = 1; |
| 6516 | |
| 6517 | for (i = len-1; i >= 0; i--) { |
| 6518 | sql_exp *e = exps[i]; |
| 6519 | |
| 6520 | if (!is_project(rel->op) || e->used) { |
| 6521 | if (is_project(rel->op)) |
| 6522 | nr += exp_mark_used(rel, e); |
| 6523 | nr += exp_mark_used(subrel, e); |
| 6524 | } |
| 6525 | } |
| 6526 | } |
| 6527 | /* for count/rank we need atleast one column */ |
| 6528 | if (subrel && !nr && (is_project(subrel->op) || is_base(subrel->op)) && subrel->exps->h) { |
| 6529 | sql_exp *e = subrel->exps->h->data; |
| 6530 | e->used = 1; |
| 6531 | } |
| 6532 | if (rel->r && (rel->op == op_project || rel->op == op_groupby)) { |
| 6533 | list *l = rel->r; |
| 6534 | node *n; |
| 6535 | |
| 6536 | for (n=l->h; n; n = n->next) { |
| 6537 | sql_exp *e = n->data; |
| 6538 | |
| 6539 | // exp_mark_used(rel, e); |
| 6540 | /* possibly project/groupby uses columns from the inner */ |
| 6541 | exp_mark_used(subrel, e); |
| 6542 | } |
| 6543 | } |
| 6544 | } |
| 6545 | |
| 6546 | static void exps_used(list *l); |
| 6547 | |
| 6548 | static void |
| 6549 | exp_used(sql_exp *e) |
| 6550 | { |
| 6551 | if (e) { |
| 6552 | e->used = 1; |
| 6553 | if ((e->type == e_func || e->type == e_aggr) && e->l) |
| 6554 | exps_used(e->l); |
| 6555 | } |
| 6556 | } |
| 6557 | |
| 6558 | static void |
| 6559 | exps_used(list *l) |
| 6560 | { |
| 6561 | if (l) { |
| 6562 | node *n; |
| 6563 | |
| 6564 | for (n = l->h; n; n = n->next) |
| 6565 | exp_used(n->data); |
| 6566 | } |
| 6567 | } |
| 6568 | |
| 6569 | static void |
| 6570 | rel_used(sql_rel *rel) |
| 6571 | { |
| 6572 | if (!rel) |
| 6573 | return; |
| 6574 | if (is_join(rel->op) || is_set(rel->op) || is_semi(rel->op)) { |
| 6575 | if (rel->l) |
| 6576 | rel_used(rel->l); |
| 6577 | if (rel->r) |
| 6578 | rel_used(rel->r); |
| 6579 | } else if (is_topn(rel->op) || is_select(rel->op) || is_sample(rel->op)) { |
| 6580 | rel_used(rel->l); |
| 6581 | rel = rel->l; |
| 6582 | } else if (rel->op == op_table && rel->r) { |
| 6583 | exp_used(rel->r); |
| 6584 | } |
| 6585 | if (rel && rel->exps) { |
| 6586 | exps_used(rel->exps); |
| 6587 | if (rel->r && (rel->op == op_project || rel->op == op_groupby)) |
| 6588 | exps_used(rel->r); |
| 6589 | } |
| 6590 | } |
| 6591 | |
| 6592 | static void |
| 6593 | rel_mark_used(mvc *sql, sql_rel *rel, int proj) |
| 6594 | { |
| 6595 | (void)sql; |
| 6596 | |
| 6597 | if (proj && (need_distinct(rel))) |
| 6598 | rel_used(rel); |
| 6599 | |
| 6600 | switch(rel->op) { |
| 6601 | case op_basetable: |
| 6602 | case op_table: |
| 6603 | |
| 6604 | if (rel->op == op_table && rel->l && rel->flag != 2) { |
| 6605 | rel_used(rel); |
| 6606 | if (rel->r) |
| 6607 | exp_mark_used(rel->l, rel->r); |
| 6608 | rel_mark_used(sql, rel->l, proj); |
| 6609 | } |
| 6610 | break; |
| 6611 | |
| 6612 | case op_topn: |
| 6613 | case op_sample: |
| 6614 | if (proj) { |
| 6615 | rel = rel ->l; |
| 6616 | rel_mark_used(sql, rel, proj); |
| 6617 | break; |
| 6618 | } |
| 6619 | /* fall through */ |
| 6620 | case op_project: |
| 6621 | case op_groupby: |
| 6622 | if (proj && rel->l) { |
| 6623 | rel_exps_mark_used(sql->sa, rel, rel->l); |
| 6624 | rel_mark_used(sql, rel->l, 0); |
| 6625 | } else if (proj) { |
| 6626 | rel_exps_mark_used(sql->sa, rel, NULL); |
| 6627 | } |
| 6628 | break; |
| 6629 | case op_update: |
| 6630 | case op_delete: |
| 6631 | if (proj && rel->r) { |
| 6632 | sql_rel *r = rel->r; |
| 6633 | if (r->exps && r->exps->h) { /* TID is used */ |
| 6634 | sql_exp *e = r->exps->h->data; |
| 6635 | e->used = 1; |
| 6636 | } |
| 6637 | rel_exps_mark_used(sql->sa, rel, rel->r); |
| 6638 | rel_mark_used(sql, rel->r, 0); |
| 6639 | } |
| 6640 | break; |
| 6641 | |
| 6642 | case op_insert: |
| 6643 | case op_truncate: |
| 6644 | case op_ddl: |
| 6645 | break; |
| 6646 | |
| 6647 | case op_select: |
| 6648 | if (rel->l) { |
| 6649 | rel_exps_mark_used(sql->sa, rel, rel->l); |
| 6650 | rel_mark_used(sql, rel->l, 0); |
| 6651 | } |
| 6652 | break; |
| 6653 | |
| 6654 | case op_union: |
| 6655 | case op_inter: |
| 6656 | case op_except: |
| 6657 | /* For now we mark all union expression as used */ |
| 6658 | |
| 6659 | /* Later we should (in case of union all) remove unused |
| 6660 | * columns from the projection. |
| 6661 | * |
| 6662 | * Project part of union is based on column position. |
| 6663 | */ |
| 6664 | if (proj && (need_distinct(rel) || !rel->exps)) { |
| 6665 | rel_used(rel); |
| 6666 | if (!rel->exps) { |
| 6667 | rel_used(rel->l); |
| 6668 | rel_used(rel->r); |
| 6669 | } |
| 6670 | rel_mark_used(sql, rel->l, 0); |
| 6671 | rel_mark_used(sql, rel->r, 0); |
| 6672 | } else if (proj && !need_distinct(rel)) { |
| 6673 | sql_rel *l = rel->l; |
| 6674 | |
| 6675 | positional_exps_mark_used(rel, l); |
| 6676 | rel_exps_mark_used(sql->sa, rel, l); |
| 6677 | rel_mark_used(sql, rel->l, 0); |
| 6678 | /* based on child check set expression list */ |
| 6679 | if (is_project(l->op) && need_distinct(l)) |
| 6680 | positional_exps_mark_used(l, rel); |
| 6681 | positional_exps_mark_used(rel, rel->r); |
| 6682 | rel_exps_mark_used(sql->sa, rel, rel->r); |
| 6683 | rel_mark_used(sql, rel->r, 0); |
| 6684 | } |
| 6685 | break; |
| 6686 | |
| 6687 | case op_join: |
| 6688 | case op_left: |
| 6689 | case op_right: |
| 6690 | case op_full: |
| 6691 | case op_semi: |
| 6692 | case op_anti: |
| 6693 | rel_exps_mark_used(sql->sa, rel, rel->l); |
| 6694 | rel_exps_mark_used(sql->sa, rel, rel->r); |
| 6695 | rel_mark_used(sql, rel->l, 0); |
| 6696 | rel_mark_used(sql, rel->r, 0); |
| 6697 | break; |
| 6698 | } |
| 6699 | } |
| 6700 | |
| 6701 | static sql_rel * rel_dce_sub(mvc *sql, sql_rel *rel); |
| 6702 | |
| 6703 | static sql_rel * |
| 6704 | rel_remove_unused(mvc *sql, sql_rel *rel) |
| 6705 | { |
| 6706 | int needed = 0; |
| 6707 | |
| 6708 | if (!rel) |
| 6709 | return rel; |
| 6710 | |
| 6711 | switch(rel->op) { |
| 6712 | case op_basetable: { |
| 6713 | sql_table *t = rel->l; |
| 6714 | |
| 6715 | if (t && isReplicaTable(t)) /* TODO fix rewriting in rel_distribute.c */ |
| 6716 | return rel; |
| 6717 | } |
| 6718 | /* fall through */ |
| 6719 | case op_table: |
| 6720 | if (rel->exps) { |
| 6721 | node *n; |
| 6722 | list *exps; |
| 6723 | |
| 6724 | for(n=rel->exps->h; n && !needed; n = n->next) { |
| 6725 | sql_exp *e = n->data; |
| 6726 | |
| 6727 | if (!e->used) |
| 6728 | needed = 1; |
| 6729 | } |
| 6730 | |
| 6731 | if (!needed) |
| 6732 | return rel; |
| 6733 | |
| 6734 | exps = new_exp_list(sql->sa); |
| 6735 | for(n=rel->exps->h; n; n = n->next) { |
| 6736 | sql_exp *e = n->data; |
| 6737 | |
| 6738 | if (e->used) |
| 6739 | append(exps, e); |
| 6740 | } |
| 6741 | /* atleast one (needed for crossproducts, count(*), rank() and single value projections) !, handled by rel_exps_mark_used */ |
| 6742 | if (list_length(exps) == 0) |
| 6743 | append(exps, rel->exps->h->data); |
| 6744 | rel->exps = exps; |
| 6745 | } |
| 6746 | return rel; |
| 6747 | |
| 6748 | case op_topn: |
| 6749 | case op_sample: |
| 6750 | |
| 6751 | if (rel->l) |
| 6752 | rel->l = rel_remove_unused(sql, rel->l); |
| 6753 | return rel; |
| 6754 | |
| 6755 | case op_project: |
| 6756 | case op_groupby: |
| 6757 | |
| 6758 | if (/*rel->l &&*/ rel->exps) { |
| 6759 | node *n; |
| 6760 | list *exps; |
| 6761 | |
| 6762 | for(n=rel->exps->h; n && !needed; n = n->next) { |
| 6763 | sql_exp *e = n->data; |
| 6764 | |
| 6765 | if (!e->used) |
| 6766 | needed = 1; |
| 6767 | } |
| 6768 | if (!needed) |
| 6769 | return rel; |
| 6770 | |
| 6771 | exps = new_exp_list(sql->sa); |
| 6772 | for(n=rel->exps->h; n; n = n->next) { |
| 6773 | sql_exp *e = n->data; |
| 6774 | |
| 6775 | if (e->used) |
| 6776 | append(exps, e); |
| 6777 | } |
| 6778 | /* atleast one (needed for crossproducts, count(*), rank() and single value projections) */ |
| 6779 | if (list_length(exps) <= 0) |
| 6780 | append(exps, rel->exps->h->data); |
| 6781 | rel->exps = exps; |
| 6782 | } |
| 6783 | return rel; |
| 6784 | |
| 6785 | case op_union: |
| 6786 | case op_inter: |
| 6787 | case op_except: |
| 6788 | |
| 6789 | case op_insert: |
| 6790 | case op_update: |
| 6791 | case op_delete: |
| 6792 | case op_truncate: |
| 6793 | |
| 6794 | case op_select: |
| 6795 | |
| 6796 | case op_join: |
| 6797 | case op_left: |
| 6798 | case op_right: |
| 6799 | case op_full: |
| 6800 | case op_semi: |
| 6801 | case op_anti: |
| 6802 | case op_ddl: |
| 6803 | return rel; |
| 6804 | } |
| 6805 | return rel; |
| 6806 | } |
| 6807 | |
| 6808 | static void |
| 6809 | rel_dep_graph( char *deps, list *refs, sql_rel *parent, sql_rel *rel) |
| 6810 | { |
| 6811 | if (!parent) |
| 6812 | return ; |
| 6813 | |
| 6814 | if (rel_is_ref(rel) && parent != rel) { |
| 6815 | int n = list_length(refs); |
| 6816 | int pnr = list_position(refs, parent); |
| 6817 | int cnr = list_position(refs, rel); |
| 6818 | |
| 6819 | deps[pnr*n + cnr] = 1; |
| 6820 | parent = rel; |
| 6821 | } |
| 6822 | |
| 6823 | switch(rel->op) { |
| 6824 | case op_table: |
| 6825 | case op_topn: |
| 6826 | case op_sample: |
| 6827 | case op_project: |
| 6828 | case op_groupby: |
| 6829 | case op_select: |
| 6830 | |
| 6831 | if (rel->l && (rel->op != op_table || rel->flag != 2)) |
| 6832 | rel_dep_graph(deps, refs, parent, rel->l); |
| 6833 | |
| 6834 | case op_basetable: |
| 6835 | case op_insert: |
| 6836 | case op_ddl: |
| 6837 | break; |
| 6838 | |
| 6839 | case op_update: |
| 6840 | case op_delete: |
| 6841 | case op_truncate: |
| 6842 | |
| 6843 | if (rel->r) |
| 6844 | rel_dep_graph(deps, refs, parent, rel->r); |
| 6845 | break; |
| 6846 | |
| 6847 | |
| 6848 | case op_union: |
| 6849 | case op_inter: |
| 6850 | case op_except: |
| 6851 | case op_join: |
| 6852 | case op_left: |
| 6853 | case op_right: |
| 6854 | case op_full: |
| 6855 | case op_semi: |
| 6856 | case op_anti: |
| 6857 | |
| 6858 | if (rel->l) |
| 6859 | rel_dep_graph(deps, refs, parent, rel->l); |
| 6860 | if (rel->r) |
| 6861 | rel_dep_graph(deps, refs, parent, rel->r); |
| 6862 | break; |
| 6863 | } |
| 6864 | } |
| 6865 | |
| 6866 | /* |
| 6867 | extern void _rel_print(mvc *sql, sql_rel *rel); |
| 6868 | |
| 6869 | static void |
| 6870 | print_deps(mvc *sql, char *deps, list *refs) |
| 6871 | { |
| 6872 | int i, j; |
| 6873 | int n = list_length(refs); |
| 6874 | |
| 6875 | for (i=0; i<n; i++) { |
| 6876 | sql_rel *r = list_fetch(refs, i); |
| 6877 | printf("dep %d\n", i); |
| 6878 | _rel_print(sql,r); |
| 6879 | } |
| 6880 | for (i=0; i<n; i++) { |
| 6881 | for (j=0; j<n; j++) { |
| 6882 | printf("%c ", i==j?'x' : deps[i*n + j]?'1':'0'); |
| 6883 | } |
| 6884 | printf("\n"); |
| 6885 | } |
| 6886 | |
| 6887 | } |
| 6888 | */ |
| 6889 | |
| 6890 | static int |
| 6891 | depends_on(int nr, char *deps, int n, int dnr) |
| 6892 | { |
| 6893 | for(;dnr < n; dnr++) { |
| 6894 | if (dnr == nr) |
| 6895 | dnr++; |
| 6896 | if (deps[nr*n + dnr]) |
| 6897 | return dnr; |
| 6898 | } |
| 6899 | return -1; |
| 6900 | } |
| 6901 | |
| 6902 | static void |
| 6903 | flatten_dep(list *nrefs, list *refs, int nr, char *deps, int n) |
| 6904 | { |
| 6905 | int dnr = 0; |
| 6906 | |
| 6907 | if (deps[nr*n + nr]) |
| 6908 | return; |
| 6909 | for (;(dnr = depends_on(nr, deps, n, dnr)) >= 0 && dnr < n; dnr++) |
| 6910 | flatten_dep(nrefs, refs, dnr, deps, n); |
| 6911 | if (!deps[nr*n + nr]) { |
| 6912 | list_prepend(nrefs, list_fetch(refs,nr)); |
| 6913 | deps[nr*n+nr] = 1; /* mark done */ |
| 6914 | } |
| 6915 | } |
| 6916 | |
| 6917 | static list * |
| 6918 | flatten_dep_graph(mvc *sql, char *deps, list *refs) |
| 6919 | { |
| 6920 | list *nrefs = sa_list(sql->sa); |
| 6921 | int n = list_length(refs), nr = 0; |
| 6922 | |
| 6923 | for (nr = 0; nr < n; nr++) { |
| 6924 | if (deps[nr*n + nr]) |
| 6925 | continue; |
| 6926 | flatten_dep(nrefs, refs, nr, deps, n); |
| 6927 | } |
| 6928 | return nrefs; |
| 6929 | } |
| 6930 | |
| 6931 | static list * |
| 6932 | rel_opt_dependencies(mvc *sql, list *refs) |
| 6933 | { |
| 6934 | int n = list_length(refs); |
| 6935 | |
| 6936 | if (n > 1) { |
| 6937 | char *deps = SA_NEW_ARRAY(sql->sa, char, n*n); |
| 6938 | node *m; |
| 6939 | |
| 6940 | memset(deps, 0, n*n); |
| 6941 | for (m = refs->h; m; m = m->next) { |
| 6942 | rel_dep_graph(deps, refs, m->data, m->data); |
| 6943 | } |
| 6944 | refs = flatten_dep_graph(sql, deps, refs); |
| 6945 | //print_deps(sql, deps, refs); |
| 6946 | } |
| 6947 | return refs; |
| 6948 | } |
| 6949 | |
| 6950 | static void |
| 6951 | rel_dce_refs(mvc *sql, sql_rel *rel, list *refs) |
| 6952 | { |
| 6953 | if (!rel || (rel_is_ref(rel) && list_find(refs, rel, NULL))) |
| 6954 | return ; |
| 6955 | |
| 6956 | switch(rel->op) { |
| 6957 | case op_table: |
| 6958 | case op_topn: |
| 6959 | case op_sample: |
| 6960 | case op_project: |
| 6961 | case op_groupby: |
| 6962 | case op_select: |
| 6963 | |
| 6964 | if (rel->l && (rel->op != op_table || rel->flag != 2)) |
| 6965 | rel_dce_refs(sql, rel->l, refs); |
| 6966 | break; |
| 6967 | |
| 6968 | case op_basetable: |
| 6969 | case op_insert: |
| 6970 | case op_ddl: |
| 6971 | break; |
| 6972 | |
| 6973 | case op_update: |
| 6974 | case op_delete: |
| 6975 | case op_truncate: |
| 6976 | |
| 6977 | if (rel->r) |
| 6978 | rel_dce_refs(sql, rel->r, refs); |
| 6979 | break; |
| 6980 | |
| 6981 | |
| 6982 | case op_union: |
| 6983 | case op_inter: |
| 6984 | case op_except: |
| 6985 | case op_join: |
| 6986 | case op_left: |
| 6987 | case op_right: |
| 6988 | case op_full: |
| 6989 | case op_semi: |
| 6990 | case op_anti: |
| 6991 | |
| 6992 | if (rel->l) |
| 6993 | rel_dce_refs(sql, rel->l, refs); |
| 6994 | if (rel->r) |
| 6995 | rel_dce_refs(sql, rel->r, refs); |
| 6996 | break; |
| 6997 | } |
| 6998 | |
| 6999 | if (rel_is_ref(rel) && !list_find(refs, rel, NULL)) |
| 7000 | list_prepend(refs, rel); |
| 7001 | } |
| 7002 | |
| 7003 | static sql_rel * |
| 7004 | rel_dce_down(mvc *sql, sql_rel *rel, int skip_proj) |
| 7005 | { |
| 7006 | if (!rel) |
| 7007 | return rel; |
| 7008 | |
| 7009 | if (!skip_proj && rel_is_ref(rel)) |
| 7010 | return rel; |
| 7011 | |
| 7012 | switch(rel->op) { |
| 7013 | case op_basetable: |
| 7014 | case op_table: |
| 7015 | |
| 7016 | if (skip_proj && rel->l && rel->op == op_table && rel->flag != 2) |
| 7017 | rel->l = rel_dce_down(sql, rel->l, 0); |
| 7018 | if (!skip_proj) |
| 7019 | rel_dce_sub(sql, rel); |
| 7020 | /* fall through */ |
| 7021 | |
| 7022 | case op_truncate: |
| 7023 | case op_ddl: |
| 7024 | |
| 7025 | return rel; |
| 7026 | |
| 7027 | case op_insert: |
| 7028 | rel_used(rel->r); |
| 7029 | rel_dce_sub(sql, rel->r); |
| 7030 | return rel; |
| 7031 | |
| 7032 | case op_update: |
| 7033 | case op_delete: |
| 7034 | |
| 7035 | if (skip_proj && rel->r) |
| 7036 | rel->r = rel_dce_down(sql, rel->r, 0); |
| 7037 | if (!skip_proj) |
| 7038 | rel_dce_sub(sql, rel); |
| 7039 | return rel; |
| 7040 | |
| 7041 | case op_topn: |
| 7042 | case op_sample: |
| 7043 | case op_project: |
| 7044 | case op_groupby: |
| 7045 | |
| 7046 | if (skip_proj && rel->l) |
| 7047 | rel->l = rel_dce_down(sql, rel->l, is_topn(rel->op) || is_sample(rel->op)); |
| 7048 | if (!skip_proj) |
| 7049 | rel_dce_sub(sql, rel); |
| 7050 | return rel; |
| 7051 | |
| 7052 | case op_union: |
| 7053 | case op_inter: |
| 7054 | case op_except: |
| 7055 | if (skip_proj) { |
| 7056 | if (rel->l) |
| 7057 | rel->l = rel_dce_down(sql, rel->l, 0); |
| 7058 | if (rel->r) |
| 7059 | rel->r = rel_dce_down(sql, rel->r, 0); |
| 7060 | } |
| 7061 | if (!skip_proj) |
| 7062 | rel_dce_sub(sql, rel); |
| 7063 | return rel; |
| 7064 | |
| 7065 | case op_select: |
| 7066 | if (rel->l) |
| 7067 | rel->l = rel_dce_down(sql, rel->l, 0); |
| 7068 | return rel; |
| 7069 | |
| 7070 | case op_join: |
| 7071 | case op_left: |
| 7072 | case op_right: |
| 7073 | case op_full: |
| 7074 | case op_semi: |
| 7075 | case op_anti: |
| 7076 | if (rel->l) |
| 7077 | rel->l = rel_dce_down(sql, rel->l, 0); |
| 7078 | if (rel->r) |
| 7079 | rel->r = rel_dce_down(sql, rel->r, 0); |
| 7080 | return rel; |
| 7081 | } |
| 7082 | return rel; |
| 7083 | } |
| 7084 | |
| 7085 | /* DCE |
| 7086 | * |
| 7087 | * Based on top relation expressions mark sub expressions as used. |
| 7088 | * Then recurse down until the projections. Clean them up and repeat. |
| 7089 | */ |
| 7090 | |
| 7091 | static sql_rel * |
| 7092 | rel_dce_sub(mvc *sql, sql_rel *rel) |
| 7093 | { |
| 7094 | if (!rel) |
| 7095 | return rel; |
| 7096 | |
| 7097 | /* |
| 7098 | * Mark used up until the next project |
| 7099 | * For setops we need to first mark, then remove |
| 7100 | * because of positional dependency |
| 7101 | */ |
| 7102 | rel_mark_used(sql, rel, 1); |
| 7103 | rel = rel_remove_unused(sql, rel); |
| 7104 | rel_dce_down(sql, rel, 1); |
| 7105 | return rel; |
| 7106 | } |
| 7107 | |
| 7108 | /* add projects under set ops */ |
| 7109 | static sql_rel * |
| 7110 | rel_add_projects(mvc *sql, sql_rel *rel) |
| 7111 | { |
| 7112 | if (!rel) |
| 7113 | return rel; |
| 7114 | |
| 7115 | switch(rel->op) { |
| 7116 | case op_basetable: |
| 7117 | case op_table: |
| 7118 | |
| 7119 | case op_insert: |
| 7120 | case op_update: |
| 7121 | case op_delete: |
| 7122 | case op_truncate: |
| 7123 | case op_ddl: |
| 7124 | |
| 7125 | return rel; |
| 7126 | |
| 7127 | case op_union: |
| 7128 | case op_inter: |
| 7129 | case op_except: |
| 7130 | |
| 7131 | /* We can only reduce the list of expressions of an set op |
| 7132 | * if the projection under it can also be reduced. |
| 7133 | */ |
| 7134 | if (rel->l) { |
| 7135 | sql_rel *l = rel->l; |
| 7136 | |
| 7137 | l->subquery = 0; |
| 7138 | if (!is_project(l->op) && !need_distinct(rel)) |
| 7139 | l = rel_project(sql->sa, l, rel_projections(sql, l, NULL, 1, 1)); |
| 7140 | rel->l = rel_add_projects(sql, l); |
| 7141 | } |
| 7142 | if (rel->r) { |
| 7143 | sql_rel *r = rel->r; |
| 7144 | |
| 7145 | r->subquery = 0; |
| 7146 | if (!is_project(r->op) && !need_distinct(rel)) |
| 7147 | r = rel_project(sql->sa, r, rel_projections(sql, r, NULL, 1, 1)); |
| 7148 | rel->r = rel_add_projects(sql, r); |
| 7149 | } |
| 7150 | return rel; |
| 7151 | |
| 7152 | case op_topn: |
| 7153 | case op_sample: |
| 7154 | case op_project: |
| 7155 | case op_groupby: |
| 7156 | case op_select: |
| 7157 | if (rel->l) |
| 7158 | rel->l = rel_add_projects(sql, rel->l); |
| 7159 | return rel; |
| 7160 | |
| 7161 | case op_join: |
| 7162 | case op_left: |
| 7163 | case op_right: |
| 7164 | case op_full: |
| 7165 | case op_semi: |
| 7166 | case op_anti: |
| 7167 | if (rel->l) |
| 7168 | rel->l = rel_add_projects(sql, rel->l); |
| 7169 | if (rel->r) |
| 7170 | rel->r = rel_add_projects(sql, rel->r); |
| 7171 | return rel; |
| 7172 | } |
| 7173 | return rel; |
| 7174 | } |
| 7175 | |
| 7176 | sql_rel * |
| 7177 | rel_dce(mvc *sql, sql_rel *rel) |
| 7178 | { |
| 7179 | list *refs = sa_list(sql->sa); |
| 7180 | |
| 7181 | rel_dce_refs(sql, rel, refs); |
| 7182 | if (refs) { |
| 7183 | node *n; |
| 7184 | |
| 7185 | for(n = refs->h; n; n = n->next) { |
| 7186 | sql_rel *i = n->data; |
| 7187 | |
| 7188 | while (!rel_is_ref(i) && i->l && !is_base(i->op)) |
| 7189 | i = i->l; |
| 7190 | if (i) |
| 7191 | rel_used(i); |
| 7192 | } |
| 7193 | } |
| 7194 | rel = rel_add_projects(sql, rel); |
| 7195 | rel_used(rel); |
| 7196 | rel_dce_sub(sql, rel); |
| 7197 | return rel; |
| 7198 | } |
| 7199 | |
| 7200 | static int |
| 7201 | index_exp(sql_exp *e, sql_idx *i) |
| 7202 | { |
| 7203 | if (e->type == e_cmp && !is_complex_exp(e->flag)) { |
| 7204 | switch(i->type) { |
| 7205 | case hash_idx: |
| 7206 | case oph_idx: |
| 7207 | if (e->flag == cmp_equal) |
| 7208 | return 0; |
| 7209 | /* fall through */ |
| 7210 | case join_idx: |
| 7211 | default: |
| 7212 | return -1; |
| 7213 | } |
| 7214 | } |
| 7215 | return -1; |
| 7216 | } |
| 7217 | |
| 7218 | static sql_idx * |
| 7219 | find_index(sql_allocator *sa, sql_rel *rel, sql_rel *sub, list **EXPS) |
| 7220 | { |
| 7221 | node *n; |
| 7222 | |
| 7223 | /* any (partial) match of the expressions with the index columns */ |
| 7224 | /* Depending on the index type we may need full matches and only |
| 7225 | limited number of cmp types (hash only equality etc) */ |
| 7226 | /* Depending on the index type we should (in the rel_bin) generate |
| 7227 | more code, ie for spatial index add post filter etc, for hash |
| 7228 | compute hash value and use index */ |
| 7229 | |
| 7230 | if (sub->exps && rel->exps) |
| 7231 | for(n = sub->exps->h; n; n = n->next) { |
| 7232 | prop *p; |
| 7233 | sql_exp *e = n->data; |
| 7234 | |
| 7235 | if ((p = find_prop(e->p, PROP_HASHIDX)) != NULL) { |
| 7236 | list *exps, *cols; |
| 7237 | sql_idx *i = p->value; |
| 7238 | fcmp cmp = (fcmp)&sql_column_kc_cmp; |
| 7239 | |
| 7240 | /* join indices are only interesting for joins */ |
| 7241 | if (i->type == join_idx || list_length(i->columns) <= 1) |
| 7242 | continue; |
| 7243 | /* based on the index type, find qualifying exps */ |
| 7244 | exps = list_select(rel->exps, i, (fcmp) &index_exp, (fdup)NULL); |
| 7245 | if (!exps || !list_length(exps)) |
| 7246 | continue; |
| 7247 | /* now we obtain the columns, move into sql_column_kc_cmp! */ |
| 7248 | cols = list_map(exps, sub, (fmap) &sjexp_col); |
| 7249 | |
| 7250 | /* TODO check that at most 2 relations are involved */ |
| 7251 | |
| 7252 | /* Match the index columns with the expression columns. |
| 7253 | TODO, Allow partial matches ! */ |
| 7254 | if (list_match(cols, i->columns, cmp) == 0) { |
| 7255 | /* re-order exps in index order */ |
| 7256 | node *n, *m; |
| 7257 | list *es = sa_list(sa); |
| 7258 | |
| 7259 | for(n = i->columns->h; n; n = n->next) { |
| 7260 | int i = 0; |
| 7261 | for(m = cols->h; m; m = m->next, i++) { |
| 7262 | if (cmp(m->data, n->data) == 0){ |
| 7263 | sql_exp *e = list_fetch(exps, i); |
| 7264 | list_append(es, e); |
| 7265 | break; |
| 7266 | } |
| 7267 | } |
| 7268 | } |
| 7269 | /* fix the destroy function */ |
| 7270 | cols->destroy = NULL; |
| 7271 | *EXPS = es; |
| 7272 | e->used = 1; |
| 7273 | return i; |
| 7274 | } |
| 7275 | cols->destroy = NULL; |
| 7276 | } |
| 7277 | } |
| 7278 | return NULL; |
| 7279 | } |
| 7280 | |
| 7281 | static sql_rel * |
| 7282 | rel_use_index(int *changes, mvc *sql, sql_rel *rel) |
| 7283 | { |
| 7284 | (void)changes; |
| 7285 | if (rel->l && (is_select(rel->op) || is_join(rel->op))) { |
| 7286 | list *exps = NULL; |
| 7287 | sql_idx *i = find_index(sql->sa, rel, rel->l, &exps); |
| 7288 | int left = 1; |
| 7289 | |
| 7290 | if (!i && is_join(rel->op)) |
| 7291 | i = find_index(sql->sa, rel, rel->l, &exps); |
| 7292 | if (!i && is_join(rel->op)) { |
| 7293 | left = 0; |
| 7294 | i = find_index(sql->sa, rel, rel->r, &exps); |
| 7295 | } |
| 7296 | |
| 7297 | if (i) { |
| 7298 | prop *p; |
| 7299 | node *n; |
| 7300 | int single_table = 1; |
| 7301 | sql_exp *re = NULL; |
| 7302 | |
| 7303 | for( n = exps->h; n && single_table; n = n->next) { |
| 7304 | sql_exp *e = n->data; |
| 7305 | sql_exp *nre = e->r; |
| 7306 | |
| 7307 | if (is_join(rel->op) && |
| 7308 | ((left && !rel_find_exp(rel->l, e->l)) || |
| 7309 | (!left && !rel_find_exp(rel->r, e->l)))) |
| 7310 | nre = e->l; |
| 7311 | single_table = (!re || (exp_relname(nre) && exp_relname(re) && strcmp(exp_relname(nre), exp_relname(re)) == 0)); |
| 7312 | re = nre; |
| 7313 | } |
| 7314 | if (single_table) { /* add PROP_HASHCOL to all column exps */ |
| 7315 | for( n = exps->h; n; n = n->next) { |
| 7316 | sql_exp *e = n->data; |
| 7317 | int anti = is_anti(e); |
| 7318 | |
| 7319 | /* swapped ? */ |
| 7320 | if (is_join(rel->op) && |
| 7321 | ((left && !rel_find_exp(rel->l, e->l)) || |
| 7322 | (!left && !rel_find_exp(rel->r, e->l)))) |
| 7323 | n->data = e = exp_compare(sql->sa, e->r, e->l, cmp_equal); |
| 7324 | if (anti) set_anti(e); |
| 7325 | p = find_prop(e->p, PROP_HASHCOL); |
| 7326 | if (!p) |
| 7327 | e->p = p = prop_create(sql->sa, PROP_HASHCOL, e->p); |
| 7328 | p->value = i; |
| 7329 | } |
| 7330 | } |
| 7331 | /* add the remaining exps to the new exp list */ |
| 7332 | if (list_length(rel->exps) > list_length(exps)) { |
| 7333 | for( n = rel->exps->h; n; n = n->next) { |
| 7334 | sql_exp *e = n->data; |
| 7335 | if (!list_find(exps, e, (fcmp)&exp_cmp)) |
| 7336 | list_append(exps, e); |
| 7337 | } |
| 7338 | } |
| 7339 | rel->exps = exps; |
| 7340 | } |
| 7341 | } |
| 7342 | return rel; |
| 7343 | } |
| 7344 | |
| 7345 | static int |
| 7346 | score_se( mvc *sql, sql_rel *rel, sql_exp *e) |
| 7347 | { |
| 7348 | int score = 0; |
| 7349 | if (e->type == e_cmp && !is_complex_exp(e->flag)) { |
| 7350 | score += score_gbe(sql, rel, e->l); |
| 7351 | } |
| 7352 | score += exp_keyvalue(e); |
| 7353 | return score; |
| 7354 | } |
| 7355 | |
| 7356 | static sql_rel * |
| 7357 | rel_select_order(int *changes, mvc *sql, sql_rel *rel) |
| 7358 | { |
| 7359 | (void)changes; |
| 7360 | if (is_select(rel->op) && rel->exps && list_length(rel->exps)>1) { |
| 7361 | int i, *scores = calloc(list_length(rel->exps), sizeof(int)); |
| 7362 | node *n; |
| 7363 | |
| 7364 | for (i = 0, n = rel->exps->h; n; i++, n = n->next) |
| 7365 | scores[i] = score_se(sql, rel, n->data); |
| 7366 | rel->exps = list_keysort(rel->exps, scores, (fdup)NULL); |
| 7367 | free(scores); |
| 7368 | } |
| 7369 | return rel; |
| 7370 | } |
| 7371 | |
| 7372 | static sql_rel * |
| 7373 | rel_simplify_like_select(int *changes, mvc *sql, sql_rel *rel) |
| 7374 | { |
| 7375 | if (is_select(rel->op) && rel->exps) { |
| 7376 | node *n; |
| 7377 | list *exps; |
| 7378 | int needed = 0; |
| 7379 | |
| 7380 | for (n = rel->exps->h; n && !needed; n = n->next) { |
| 7381 | sql_exp *e = n->data; |
| 7382 | list *l = e->l; |
| 7383 | list *r = e->r; |
| 7384 | |
| 7385 | if (e->type == e_cmp && get_cmp(e) == cmp_filter && strcmp(((sql_subfunc*)e->f)->func->base.name, "like" ) == 0 && list_length(l) == 1 && list_length(r) <= 2 && !is_anti(e)) |
| 7386 | needed = 1; |
| 7387 | } |
| 7388 | |
| 7389 | if (!needed) |
| 7390 | return rel; |
| 7391 | |
| 7392 | exps = sa_list(sql->sa); |
| 7393 | if (exps == NULL) |
| 7394 | return NULL; |
| 7395 | for (n = rel->exps->h; n; n = n->next) { |
| 7396 | sql_exp *e = n->data; |
| 7397 | list *l = e->l; |
| 7398 | list *r = e->r; |
| 7399 | |
| 7400 | if (e->type == e_cmp && get_cmp(e) == cmp_filter && strcmp(((sql_subfunc*)e->f)->func->base.name, "like" ) == 0 && list_length(l) == 1 && list_length(r) <= 2 && !is_anti(e)) { |
| 7401 | list *r = e->r; |
| 7402 | sql_exp *fmt = r->h->data; |
| 7403 | sql_exp *esc = (r->h->next)?r->h->next->data:NULL; |
| 7404 | int rewrite = 0; |
| 7405 | |
| 7406 | if (fmt->type == e_convert) |
| 7407 | fmt = fmt->l; |
| 7408 | /* check for simple like expression */ |
| 7409 | if (is_atom(fmt->type)) { |
| 7410 | atom *fa = NULL; |
| 7411 | |
| 7412 | if (fmt->l) { |
| 7413 | fa = fmt->l; |
| 7414 | /* simple numbered argument */ |
| 7415 | } else if (!fmt->r && !fmt->f) { |
| 7416 | fa = sql->args[fmt->flag]; |
| 7417 | |
| 7418 | } |
| 7419 | if (fa && fa->data.vtype == TYPE_str && |
| 7420 | !strchr(fa->data.val.sval, '%') && |
| 7421 | !strchr(fa->data.val.sval, '_')) |
| 7422 | rewrite = 1; |
| 7423 | } |
| 7424 | if (rewrite && esc && is_atom(esc->type)) { |
| 7425 | atom *ea = NULL; |
| 7426 | |
| 7427 | if (esc->l) { |
| 7428 | ea = esc->l; |
| 7429 | /* simple numbered argument */ |
| 7430 | } else if (!esc->r && !esc->f) { |
| 7431 | ea = sql->args[esc->flag]; |
| 7432 | |
| 7433 | } |
| 7434 | if (ea && (ea->data.vtype != TYPE_str || |
| 7435 | strlen(ea->data.val.sval) != 0)) |
| 7436 | rewrite = 0; |
| 7437 | } |
| 7438 | if (rewrite) { /* rewrite to cmp_equal ! */ |
| 7439 | list *l = e->l; |
| 7440 | list *r = e->r; |
| 7441 | sql_exp *ne = exp_compare(sql->sa, l->h->data, r->h->data, cmp_equal); |
| 7442 | |
| 7443 | if (is_anti(e)) set_anti(ne); |
| 7444 | /* if rewritten don't cache this query */ |
| 7445 | list_append(exps, ne); |
| 7446 | sql->caching = 0; |
| 7447 | (*changes)++; |
| 7448 | } else { |
| 7449 | list_append(exps, e); |
| 7450 | } |
| 7451 | } else { |
| 7452 | list_append(exps, e); |
| 7453 | } |
| 7454 | } |
| 7455 | rel->exps = exps; |
| 7456 | } |
| 7457 | return rel; |
| 7458 | } |
| 7459 | |
| 7460 | static sql_rel * |
| 7461 | rel_simplify_predicates(int *changes, mvc *sql, sql_rel *rel) |
| 7462 | { |
| 7463 | if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && rel->exps && rel->card > CARD_ATOM) { |
| 7464 | node *n; |
| 7465 | list *exps = sa_list(sql->sa); |
| 7466 | |
| 7467 | for (n = rel->exps->h; n; n = n->next) { |
| 7468 | sql_exp *e = n->data; |
| 7469 | |
| 7470 | if (is_atom(e->type) && e->l) { /* direct literal */ |
| 7471 | atom *a = e->l; |
| 7472 | int flag = a->data.val.bval; |
| 7473 | |
| 7474 | /* remove simple select true expressions */ |
| 7475 | if (flag) |
| 7476 | continue; |
| 7477 | } |
| 7478 | if (is_atom(e->type) && !e->l && !e->r) { /* numbered variable */ |
| 7479 | atom *a = sql->args[e->flag]; |
| 7480 | int flag = a->data.val.bval; |
| 7481 | |
| 7482 | /* remove simple select true expressions */ |
| 7483 | if (flag) { |
| 7484 | sql->caching = 0; |
| 7485 | continue; |
| 7486 | } |
| 7487 | } |
| 7488 | if (e->type == e_cmp && get_cmp(e) == cmp_equal) { |
| 7489 | sql_exp *l = e->l; |
| 7490 | sql_exp *r = e->r; |
| 7491 | |
| 7492 | if (l->type == e_func) { |
| 7493 | sql_subfunc *f = l->f; |
| 7494 | |
| 7495 | /* rewrite isnull(x) = TRUE/FALSE => x =/<> NULL */ |
| 7496 | if (is_select(rel->op) && !f->func->s && !strcmp(f->func->base.name, "isnull" ) && |
| 7497 | is_atom(r->type) && r->l) { /* direct literal */ |
| 7498 | atom *a = r->l; |
| 7499 | int flag = a->data.val.bval; |
| 7500 | list *args = l->l; |
| 7501 | |
| 7502 | assert(list_length(args) == 1); |
| 7503 | l = args->h->data; |
| 7504 | if (exp_subtype(l)) { |
| 7505 | r = exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(l), NULL)); |
| 7506 | e = exp_compare2(sql->sa, l, r, r, 3); |
| 7507 | if (e && !flag) |
| 7508 | set_anti(e); |
| 7509 | } |
| 7510 | } else if (!f->func->s && !strcmp(f->func->base.name, "not" )) { |
| 7511 | if (is_atom(r->type) && r->l) { /* direct literal */ |
| 7512 | atom *a = r->l; |
| 7513 | list *args = l->l; |
| 7514 | sql_exp *inner = args->h->data; |
| 7515 | sql_subfunc *inf = inner->f; |
| 7516 | |
| 7517 | assert(list_length(args) == 1); |
| 7518 | |
| 7519 | /* not(not(x)) = TRUE/FALSE => x = TRUE/FALSE */ |
| 7520 | if (inner->type == e_func && |
| 7521 | !inf->func->s && |
| 7522 | !strcmp(inf->func->base.name, "not" )) { |
| 7523 | int anti = is_anti(e); |
| 7524 | |
| 7525 | args = inner->l; |
| 7526 | assert(list_length(args) == 1); |
| 7527 | l = args->h->data; |
| 7528 | e = exp_compare(sql->sa, l, r, e->flag); |
| 7529 | if (anti) set_anti(e); |
| 7530 | /* rewrite not(=/<>(a,b)) = TRUE/FALSE => a=b of a<>b */ |
| 7531 | } else if (inner->type == e_func && |
| 7532 | !inf->func->s && |
| 7533 | (!strcmp(inf->func->base.name, "=" ) || |
| 7534 | !strcmp(inf->func->base.name, "<>" ))) { |
| 7535 | int flag = a->data.val.bval; |
| 7536 | args = inner->l; |
| 7537 | |
| 7538 | if (!strcmp(inf->func->base.name, "<>" )) |
| 7539 | flag = !flag; |
| 7540 | assert(list_length(args) == 2); |
| 7541 | l = args->h->data; |
| 7542 | r = args->h->next->data; |
| 7543 | e = exp_compare(sql->sa, l, r, (!flag)?cmp_equal:cmp_notequal); |
| 7544 | } else if (a && a->data.vtype == TYPE_bit) { |
| 7545 | int anti = is_anti(e); |
| 7546 | |
| 7547 | /* change atom's value on right */ |
| 7548 | l = args->h->data; |
| 7549 | a->data.val.bval = !a->data.val.bval; |
| 7550 | e = exp_compare(sql->sa, l, r, e->flag); |
| 7551 | if (anti) set_anti(e); |
| 7552 | (*changes)++; |
| 7553 | } |
| 7554 | } |
| 7555 | } |
| 7556 | } |
| 7557 | list_append(exps, e); |
| 7558 | } else { |
| 7559 | list_append(exps, e); |
| 7560 | } |
| 7561 | } |
| 7562 | rel->exps = exps; |
| 7563 | } |
| 7564 | return rel; |
| 7565 | } |
| 7566 | |
| 7567 | static void split_exps(mvc *sql, list *exps, sql_rel *rel); |
| 7568 | |
| 7569 | static int |
| 7570 | exp_match_exp_cmp( sql_exp *e1, sql_exp *e2) |
| 7571 | { |
| 7572 | if (exp_match_exp(e1,e2)) |
| 7573 | return 0; |
| 7574 | return -1; |
| 7575 | } |
| 7576 | |
| 7577 | static int |
| 7578 | exp_refers_cmp( sql_exp *e1, sql_exp *e2) |
| 7579 | { |
| 7580 | if (exp_refers(e1,e2)) |
| 7581 | return 0; |
| 7582 | return -1; |
| 7583 | } |
| 7584 | |
| 7585 | static sql_exp * |
| 7586 | add_exp_too_project(mvc *sql, sql_exp *e, sql_rel *rel) |
| 7587 | { |
| 7588 | node *n = list_find(rel->exps, e, (fcmp)&exp_match_exp_cmp); |
| 7589 | |
| 7590 | /* if not matching we may refer to an older expression */ |
| 7591 | if (!n) |
| 7592 | n = list_find(rel->exps, e, (fcmp)&exp_refers_cmp); |
| 7593 | if (!n) { |
| 7594 | exp_label(sql->sa, e, ++sql->label); |
| 7595 | append(rel->exps, e); |
| 7596 | } else { |
| 7597 | e = n->data; |
| 7598 | } |
| 7599 | e = exp_ref(sql->sa, e); |
| 7600 | return e; |
| 7601 | } |
| 7602 | |
| 7603 | static void |
| 7604 | add_exps_too_project(mvc *sql, list *exps, sql_rel *rel) |
| 7605 | { |
| 7606 | node *n; |
| 7607 | |
| 7608 | if (!exps) |
| 7609 | return; |
| 7610 | for(n=exps->h; n; n = n->next) { |
| 7611 | sql_exp *e = n->data; |
| 7612 | |
| 7613 | if (e->type != e_column && !exp_is_atom(e)) |
| 7614 | n->data = add_exp_too_project(sql, e, rel); |
| 7615 | } |
| 7616 | } |
| 7617 | |
| 7618 | static sql_exp * |
| 7619 | split_exp(mvc *sql, sql_exp *e, sql_rel *rel) |
| 7620 | { |
| 7621 | if (exp_is_atom(e)) |
| 7622 | return e; |
| 7623 | switch(e->type) { |
| 7624 | case e_column: |
| 7625 | return e; |
| 7626 | case e_convert: |
| 7627 | e->l = split_exp(sql, e->l, rel); |
| 7628 | return e; |
| 7629 | case e_aggr: |
| 7630 | case e_func: |
| 7631 | if (!is_analytic(e) && !exp_has_sideeffect(e)) { |
| 7632 | sql_subfunc *f = e->f; |
| 7633 | if (e->type == e_func && !f->func->s && !strcmp(f->func->base.name, "ifthenelse" )) { |
| 7634 | return e; |
| 7635 | } else { |
| 7636 | split_exps(sql, e->l, rel); |
| 7637 | add_exps_too_project(sql, e->l, rel); |
| 7638 | } |
| 7639 | } |
| 7640 | return e; |
| 7641 | case e_cmp: |
| 7642 | if (get_cmp(e) == cmp_or) { |
| 7643 | split_exps(sql, e->l, rel); |
| 7644 | split_exps(sql, e->r, rel); |
| 7645 | } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) { |
| 7646 | e->l = split_exp(sql, e->l, rel); |
| 7647 | split_exps(sql, e->r, rel); |
| 7648 | } else { |
| 7649 | e->l = split_exp(sql, e->l, rel); |
| 7650 | e->r = split_exp(sql, e->r, rel); |
| 7651 | if (e->f) { |
| 7652 | e->f = split_exp(sql, e->f, rel); |
| 7653 | } |
| 7654 | } |
| 7655 | return e; |
| 7656 | case e_psm: |
| 7657 | case e_atom: |
| 7658 | return e; |
| 7659 | } |
| 7660 | return e; |
| 7661 | } |
| 7662 | |
| 7663 | static void |
| 7664 | split_exps(mvc *sql, list *exps, sql_rel *rel) |
| 7665 | { |
| 7666 | node *n; |
| 7667 | |
| 7668 | if (!exps) |
| 7669 | return; |
| 7670 | for(n=exps->h; n; n = n->next){ |
| 7671 | sql_exp *e = n->data; |
| 7672 | |
| 7673 | e = split_exp(sql, e, rel); |
| 7674 | n->data = e; |
| 7675 | } |
| 7676 | } |
| 7677 | |
| 7678 | static sql_rel * |
| 7679 | rel_split_project(int *changes, mvc *sql, sql_rel *rel, int top) |
| 7680 | { |
| 7681 | if (is_project(rel->op) && list_length(rel->exps) && (is_groupby(rel->op) || rel->l) && !need_distinct(rel)) { |
| 7682 | list *exps = rel->exps; |
| 7683 | node *n; |
| 7684 | int funcs = 0; |
| 7685 | sql_rel *nrel; |
| 7686 | |
| 7687 | /* are there functions */ |
| 7688 | for (n=exps->h; n && !funcs; n = n->next) { |
| 7689 | sql_exp *e = n->data; |
| 7690 | |
| 7691 | funcs = exp_has_func(e); |
| 7692 | } |
| 7693 | /* introduce extra project */ |
| 7694 | if (funcs && rel->op != op_project) { |
| 7695 | nrel = rel_project(sql->sa, rel->l, |
| 7696 | rel_projections(sql, rel->l, NULL, 1, 1)); |
| 7697 | rel->l = nrel; |
| 7698 | /* recursively split all functions and add those to the projection list */ |
| 7699 | split_exps(sql, rel->exps, nrel); |
| 7700 | if (nrel->l) |
| 7701 | nrel->l = rel_split_project(changes, sql, nrel->l, is_topn(rel->op)?top:0); |
| 7702 | return rel; |
| 7703 | } else if (funcs && !top && !rel->r) { |
| 7704 | /* projects can have columns point back into the expression list, ie |
| 7705 | * create a new list including the split expressions */ |
| 7706 | node *n; |
| 7707 | list *exps = rel->exps; |
| 7708 | |
| 7709 | rel->exps = sa_list(sql->sa); |
| 7710 | for (n=exps->h; n; n = n->next) |
| 7711 | append(rel->exps, split_exp(sql, n->data, rel)); |
| 7712 | } else if (funcs && top && rel_is_ref(rel) && !rel->r) { |
| 7713 | /* inplace */ |
| 7714 | list *exps = rel_projections(sql, rel, NULL, 1, 1); |
| 7715 | sql_rel *l = rel_project(sql->sa, rel->l, NULL); |
| 7716 | rel->l = l; |
| 7717 | l->exps = rel->exps; |
| 7718 | rel->exps = exps; |
| 7719 | } |
| 7720 | } |
| 7721 | if (is_set(rel->op) || is_basetable(rel->op)) |
| 7722 | return rel; |
| 7723 | if (rel->l) |
| 7724 | rel->l = rel_split_project(changes, sql, rel->l, |
| 7725 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 7726 | if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) |
| 7727 | rel->r = rel_split_project(changes, sql, rel->r, |
| 7728 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 7729 | return rel; |
| 7730 | } |
| 7731 | |
| 7732 | static void select_split_exps(mvc *sql, list *exps, sql_rel *rel); |
| 7733 | |
| 7734 | static sql_exp * |
| 7735 | select_split_exp(mvc *sql, sql_exp *e, sql_rel *rel) |
| 7736 | { |
| 7737 | switch(e->type) { |
| 7738 | case e_column: |
| 7739 | return e; |
| 7740 | case e_convert: |
| 7741 | e->l = select_split_exp(sql, e->l, rel); |
| 7742 | return e; |
| 7743 | case e_aggr: |
| 7744 | case e_func: |
| 7745 | if (!is_analytic(e) && !exp_has_sideeffect(e)) { |
| 7746 | sql_subfunc *f = e->f; |
| 7747 | if (e->type == e_func && !f->func->s && !strcmp(f->func->base.name, "ifthenelse" )) |
| 7748 | return add_exp_too_project(sql, e, rel); |
| 7749 | } |
| 7750 | return e; |
| 7751 | case e_cmp: |
| 7752 | if (get_cmp(e) == cmp_or) { |
| 7753 | select_split_exps(sql, e->l, rel); |
| 7754 | select_split_exps(sql, e->r, rel); |
| 7755 | } else if (e->flag == cmp_in || e->flag == cmp_notin || get_cmp(e) == cmp_filter) { |
| 7756 | e->l = select_split_exp(sql, e->l, rel); |
| 7757 | select_split_exps(sql, e->r, rel); |
| 7758 | } else { |
| 7759 | e->l = select_split_exp(sql, e->l, rel); |
| 7760 | e->r = select_split_exp(sql, e->r, rel); |
| 7761 | if (e->f) { |
| 7762 | e->f = select_split_exp(sql, e->f, rel); |
| 7763 | } |
| 7764 | } |
| 7765 | return e; |
| 7766 | case e_psm: |
| 7767 | case e_atom: |
| 7768 | return e; |
| 7769 | } |
| 7770 | return e; |
| 7771 | } |
| 7772 | |
| 7773 | static void |
| 7774 | select_split_exps(mvc *sql, list *exps, sql_rel *rel) |
| 7775 | { |
| 7776 | node *n; |
| 7777 | |
| 7778 | if (!exps) |
| 7779 | return; |
| 7780 | for(n=exps->h; n; n = n->next){ |
| 7781 | sql_exp *e = n->data; |
| 7782 | |
| 7783 | e = select_split_exp(sql, e, rel); |
| 7784 | n->data = e; |
| 7785 | } |
| 7786 | } |
| 7787 | |
| 7788 | static sql_rel * |
| 7789 | rel_split_select(int *changes, mvc *sql, sql_rel *rel, int top) |
| 7790 | { |
| 7791 | if (is_select(rel->op) && list_length(rel->exps) && rel->l) { |
| 7792 | list *exps = rel->exps; |
| 7793 | node *n; |
| 7794 | int funcs = 0; |
| 7795 | sql_rel *nrel; |
| 7796 | |
| 7797 | /* are there functions */ |
| 7798 | for (n=exps->h; n && !funcs; n = n->next) { |
| 7799 | sql_exp *e = n->data; |
| 7800 | |
| 7801 | funcs = exp_has_func(e); |
| 7802 | } |
| 7803 | /* introduce extra project */ |
| 7804 | if (funcs && rel->op != op_project) { |
| 7805 | nrel = rel_project(sql->sa, rel->l, |
| 7806 | rel_projections(sql, rel->l, NULL, 1, 1)); |
| 7807 | rel->l = nrel; |
| 7808 | /* recursively split all functions and add those to the projection list */ |
| 7809 | select_split_exps(sql, rel->exps, nrel); |
| 7810 | if (nrel->l) |
| 7811 | nrel->l = rel_split_project(changes, sql, nrel->l, is_topn(rel->op)?top:0); |
| 7812 | return rel; |
| 7813 | } else if (funcs && !top && !rel->r) { |
| 7814 | /* projects can have columns point back into the expression list, ie |
| 7815 | * create a new list including the split expressions */ |
| 7816 | node *n; |
| 7817 | list *exps = rel->exps; |
| 7818 | |
| 7819 | rel->exps = sa_list(sql->sa); |
| 7820 | for (n=exps->h; n; n = n->next) |
| 7821 | append(rel->exps, select_split_exp(sql, n->data, rel)); |
| 7822 | } else if (funcs && top && rel_is_ref(rel) && !rel->r) { |
| 7823 | /* inplace */ |
| 7824 | list *exps = rel_projections(sql, rel, NULL, 1, 1); |
| 7825 | sql_rel *l = rel_project(sql->sa, rel->l, NULL); |
| 7826 | rel->l = l; |
| 7827 | l->exps = rel->exps; |
| 7828 | rel->exps = exps; |
| 7829 | } |
| 7830 | } |
| 7831 | if (is_set(rel->op) || is_basetable(rel->op)) |
| 7832 | return rel; |
| 7833 | if (rel->l) |
| 7834 | rel->l = rel_split_select(changes, sql, rel->l, |
| 7835 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 7836 | if ((is_join(rel->op) || is_semi(rel->op)) && rel->r) |
| 7837 | rel->r = rel_split_select(changes, sql, rel->r, |
| 7838 | (is_topn(rel->op)||is_ddl(rel->op)||is_modify(rel->op))?top:0); |
| 7839 | return rel; |
| 7840 | } |
| 7841 | |
| 7842 | static list * |
| 7843 | exp_merge_range(sql_allocator *sa, list *exps) |
| 7844 | { |
| 7845 | node *n, *m; |
| 7846 | for (n=exps->h; n; n = n->next) { |
| 7847 | sql_exp *e = n->data; |
| 7848 | sql_exp *le = e->l; |
| 7849 | sql_exp *re = e->r; |
| 7850 | |
| 7851 | /* handle the and's in the or lists */ |
| 7852 | if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) { |
| 7853 | e->l = exp_merge_range(sa, e->l); |
| 7854 | e->r = exp_merge_range(sa, e->r); |
| 7855 | /* only look for gt, gte, lte, lt */ |
| 7856 | } else if (n->next && |
| 7857 | e->type == e_cmp && e->flag < cmp_equal && !e->f && |
| 7858 | re->card == CARD_ATOM && !is_anti(e)) { |
| 7859 | for (m=n->next; m; m = m->next) { |
| 7860 | sql_exp *f = m->data; |
| 7861 | sql_exp *lf = f->l; |
| 7862 | sql_exp *rf = f->r; |
| 7863 | |
| 7864 | if (f->type == e_cmp && f->flag < cmp_equal && !f->f && |
| 7865 | rf->card == CARD_ATOM && !is_anti(f) && |
| 7866 | exp_match_exp(le, lf)) { |
| 7867 | sql_exp *ne; |
| 7868 | int swap = 0, lt = 0, gt = 0; |
| 7869 | /* for now only c1 <[=] x <[=] c2 */ |
| 7870 | |
| 7871 | swap = lt = (e->flag == cmp_lt || e->flag == cmp_lte); |
| 7872 | gt = !lt; |
| 7873 | |
| 7874 | if (gt && |
| 7875 | (f->flag == cmp_gt || |
| 7876 | f->flag == cmp_gte)) |
| 7877 | continue; |
| 7878 | if (lt && |
| 7879 | (f->flag == cmp_lt || |
| 7880 | f->flag == cmp_lte)) |
| 7881 | continue; |
| 7882 | if (!swap) |
| 7883 | ne = exp_compare2(sa, le, re, rf, compare2range(e->flag, f->flag)); |
| 7884 | else |
| 7885 | ne = exp_compare2(sa, le, rf, re, compare2range(f->flag, e->flag)); |
| 7886 | |
| 7887 | list_remove_data(exps, e); |
| 7888 | list_remove_data(exps, f); |
| 7889 | list_append(exps, ne); |
| 7890 | return exp_merge_range(sa, exps); |
| 7891 | } |
| 7892 | } |
| 7893 | } else if (n->next && |
| 7894 | e->type == e_cmp && e->flag < cmp_equal && !e->f && |
| 7895 | re->card > CARD_ATOM && !is_anti(e)) { |
| 7896 | for (m=n->next; m; m = m->next) { |
| 7897 | sql_exp *f = m->data; |
| 7898 | sql_exp *lf = f->l; |
| 7899 | sql_exp *rf = f->r; |
| 7900 | |
| 7901 | if (f->type == e_cmp && f->flag < cmp_equal && !f->f && |
| 7902 | rf->card > CARD_ATOM && !is_anti(f)) { |
| 7903 | sql_exp *ne, *t; |
| 7904 | int swap = 0, lt = 0, gt = 0; |
| 7905 | comp_type ef = (comp_type) e->flag, ff = (comp_type) f->flag; |
| 7906 | |
| 7907 | /* both swapped ? */ |
| 7908 | if (exp_match_exp(re, rf)) { |
| 7909 | t = re; |
| 7910 | re = le; |
| 7911 | le = t; |
| 7912 | ef = swap_compare(ef); |
| 7913 | t = rf; |
| 7914 | rf = lf; |
| 7915 | lf = t; |
| 7916 | ff = swap_compare(ff); |
| 7917 | } |
| 7918 | |
| 7919 | /* is left swapped ? */ |
| 7920 | if (exp_match_exp(re, lf)) { |
| 7921 | t = re; |
| 7922 | re = le; |
| 7923 | le = t; |
| 7924 | ef = swap_compare(ef); |
| 7925 | } |
| 7926 | |
| 7927 | /* is right swapped ? */ |
| 7928 | if (exp_match_exp(le, rf)) { |
| 7929 | t = rf; |
| 7930 | rf = lf; |
| 7931 | lf = t; |
| 7932 | ff = swap_compare(ff); |
| 7933 | } |
| 7934 | |
| 7935 | if (!exp_match_exp(le, lf)) |
| 7936 | continue; |
| 7937 | |
| 7938 | /* for now only c1 <[=] x <[=] c2 */ |
| 7939 | swap = lt = (ef == cmp_lt || ef == cmp_lte); |
| 7940 | gt = !lt; |
| 7941 | |
| 7942 | if (gt && (ff == cmp_gt || ff == cmp_gte)) |
| 7943 | continue; |
| 7944 | if (lt && (ff == cmp_lt || ff == cmp_lte)) |
| 7945 | continue; |
| 7946 | if (!swap) |
| 7947 | ne = exp_compare2(sa, le, re, rf, compare2range(ef, ff)); |
| 7948 | else |
| 7949 | ne = exp_compare2(sa, le, rf, re, compare2range(ff, ef)); |
| 7950 | |
| 7951 | list_remove_data(exps, e); |
| 7952 | list_remove_data(exps, f); |
| 7953 | list_append(exps, ne); |
| 7954 | return exp_merge_range(sa, exps); |
| 7955 | } |
| 7956 | } |
| 7957 | } |
| 7958 | } |
| 7959 | return exps; |
| 7960 | } |
| 7961 | |
| 7962 | static sql_rel * |
| 7963 | rel_find_range(int *changes, mvc *sql, sql_rel *rel) |
| 7964 | { |
| 7965 | (void)changes; |
| 7966 | if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op)) && rel->exps && !list_empty(rel->exps)) |
| 7967 | rel->exps = exp_merge_range(sql->sa, rel->exps); |
| 7968 | return rel; |
| 7969 | } |
| 7970 | |
| 7971 | /* |
| 7972 | * Casting decimal values on both sides of a compare expression is expensive, |
| 7973 | * both in preformance (cpu cost) and memory requirements (need for large |
| 7974 | * types). |
| 7975 | */ |
| 7976 | |
| 7977 | static int |
| 7978 | reduce_scale(atom *a) |
| 7979 | { |
| 7980 | #ifdef HAVE_HGE |
| 7981 | if (a->data.vtype == TYPE_hge) { |
| 7982 | hge v = a->data.val.hval; |
| 7983 | int i = 0; |
| 7984 | |
| 7985 | if (v != 0) |
| 7986 | while( (v/10)*10 == v ) { |
| 7987 | i++; |
| 7988 | v /= 10; |
| 7989 | } |
| 7990 | a->data.val.hval = v; |
| 7991 | return i; |
| 7992 | } |
| 7993 | #endif |
| 7994 | if (a->data.vtype == TYPE_lng) { |
| 7995 | lng v = a->data.val.lval; |
| 7996 | int i = 0; |
| 7997 | |
| 7998 | if (v != 0) |
| 7999 | while( (v/10)*10 == v ) { |
| 8000 | i++; |
| 8001 | v /= 10; |
| 8002 | } |
| 8003 | a->data.val.lval = v; |
| 8004 | return i; |
| 8005 | } |
| 8006 | if (a->data.vtype == TYPE_int) { |
| 8007 | int v = a->data.val.ival; |
| 8008 | int i = 0; |
| 8009 | |
| 8010 | if (v != 0) |
| 8011 | while( (v/10)*10 == v ) { |
| 8012 | i++; |
| 8013 | v /= 10; |
| 8014 | } |
| 8015 | a->data.val.ival = v; |
| 8016 | return i; |
| 8017 | } |
| 8018 | if (a->data.vtype == TYPE_sht) { |
| 8019 | sht v = a->data.val.shval; |
| 8020 | int i = 0; |
| 8021 | |
| 8022 | if (v != 0) |
| 8023 | while( (v/10)*10 == v ) { |
| 8024 | i++; |
| 8025 | v /= 10; |
| 8026 | } |
| 8027 | a->data.val.shval = v; |
| 8028 | return i; |
| 8029 | } |
| 8030 | return 0; |
| 8031 | } |
| 8032 | |
| 8033 | static sql_rel * |
| 8034 | rel_project_reduce_casts(int *changes, mvc *sql, sql_rel *rel) |
| 8035 | { |
| 8036 | if (is_project(rel->op) && list_length(rel->exps)) { |
| 8037 | list *exps = rel->exps; |
| 8038 | node *n; |
| 8039 | |
| 8040 | for (n=exps->h; n; n = n->next) { |
| 8041 | sql_exp *e = n->data; |
| 8042 | |
| 8043 | if (e && e->type == e_func) { |
| 8044 | sql_subfunc *f = e->f; |
| 8045 | sql_subtype *res = f->res->h->data; |
| 8046 | |
| 8047 | if (!f->func->s && !strcmp(f->func->base.name, "sql_mul" ) && res->scale > 0) { |
| 8048 | list *args = e->l; |
| 8049 | sql_exp *h = args->h->data; |
| 8050 | sql_exp *t = args->t->data; |
| 8051 | atom *a; |
| 8052 | |
| 8053 | if ((is_atom(h->type) && (a = exp_value(sql, h, sql->args, sql->argc)) != NULL) || |
| 8054 | (is_atom(t->type) && (a = exp_value(sql, t, sql->args, sql->argc)) != NULL)) { |
| 8055 | int rs = reduce_scale(a); |
| 8056 | |
| 8057 | res->scale -= rs; |
| 8058 | if (rs) |
| 8059 | (*changes)+= rs; |
| 8060 | } |
| 8061 | } |
| 8062 | } |
| 8063 | } |
| 8064 | } |
| 8065 | return rel; |
| 8066 | } |
| 8067 | |
| 8068 | static sql_rel * |
| 8069 | rel_reduce_casts(int *changes, mvc *sql, sql_rel *rel) |
| 8070 | { |
| 8071 | (void)sql; |
| 8072 | (void)changes; |
| 8073 | if ((is_join(rel->op) || is_semi(rel->op) || is_select(rel->op)) && |
| 8074 | rel->exps && list_length(rel->exps)) { |
| 8075 | list *exps = rel->exps; |
| 8076 | node *n; |
| 8077 | |
| 8078 | for (n=exps->h; n; n = n->next) { |
| 8079 | sql_exp *e = n->data; |
| 8080 | sql_exp *le = e->l; |
| 8081 | sql_exp *re = e->r; |
| 8082 | int anti = is_anti(e); |
| 8083 | |
| 8084 | /* handle the and's in the or lists */ |
| 8085 | if (e->type != e_cmp || !is_theta_exp(e->flag) || e->f) |
| 8086 | continue; |
| 8087 | /* rewrite e if left or right is a cast */ |
| 8088 | if (le->type == e_convert || re->type == e_convert) { |
| 8089 | sql_rel *r = rel->r; |
| 8090 | sql_subtype *st = exp_subtype(re); |
| 8091 | |
| 8092 | /* e_convert(le) ==, <(=), >(=), != e_atom(re), conversion between integers only */ |
| 8093 | if (le->type == e_convert && is_simple_atom(re) && st->type->eclass == EC_NUM) { |
| 8094 | sql_subtype *tt = exp_totype(le); |
| 8095 | sql_subtype *ft = exp_fromtype(le); |
| 8096 | |
| 8097 | if (tt->type->eclass != EC_NUM || ft->type->eclass != EC_NUM || tt->type->localtype < ft->type->localtype) |
| 8098 | continue; |
| 8099 | |
| 8100 | /* tt->type larger then tt->type, ie empty result, ie change into > max */ |
| 8101 | re = exp_atom_max( sql->sa, ft); |
| 8102 | if (!re) |
| 8103 | continue; |
| 8104 | /* the ==, > and >= change to l > max, the !=, < and <= change to l < max */ |
| 8105 | if (e->flag == cmp_equal || e->flag == cmp_gt || e->flag == cmp_gte) |
| 8106 | e = exp_compare(sql->sa, le->l, re, cmp_gt); |
| 8107 | else |
| 8108 | e = exp_compare(sql->sa, le->l, re, cmp_lt); |
| 8109 | sql->caching = 0; |
| 8110 | } else |
| 8111 | /* if convert on left then find |
| 8112 | * mul or div on right which increased |
| 8113 | * scale! |
| 8114 | */ |
| 8115 | if (le->type == e_convert && re->type == e_column && (e->flag == cmp_lt || e->flag == cmp_gt) && r && is_project(r->op)) { |
| 8116 | sql_exp *nre = rel_find_exp(r, re); |
| 8117 | sql_subtype *tt = exp_totype(le); |
| 8118 | sql_subtype *ft = exp_fromtype(le); |
| 8119 | |
| 8120 | if (nre && nre->type == e_func) { |
| 8121 | sql_subfunc *f = nre->f; |
| 8122 | |
| 8123 | if (!f->func->s && !strcmp(f->func->base.name, "sql_mul" )) { |
| 8124 | list *args = nre->l; |
| 8125 | sql_exp *ce = args->t->data; |
| 8126 | sql_subtype *fst = exp_subtype(args->h->data); |
| 8127 | atom *a; |
| 8128 | |
| 8129 | if (fst->scale == ft->scale && |
| 8130 | (a = exp_value(sql, ce, sql->args, sql->argc)) != NULL) { |
| 8131 | #ifdef HAVE_HGE |
| 8132 | hge v = 1; |
| 8133 | #else |
| 8134 | lng v = 1; |
| 8135 | #endif |
| 8136 | /* multiply with smallest value, then scale and (round) */ |
| 8137 | int scale = tt->scale - ft->scale; |
| 8138 | int rs = reduce_scale(a); |
| 8139 | |
| 8140 | scale -= rs; |
| 8141 | |
| 8142 | args = new_exp_list(sql->sa); |
| 8143 | while(scale > 0) { |
| 8144 | scale--; |
| 8145 | v *= 10; |
| 8146 | } |
| 8147 | append(args, re); |
| 8148 | #ifdef HAVE_HGE |
| 8149 | append(args, have_hge ? exp_atom_hge(sql->sa, v) : exp_atom_lng(sql->sa, (lng) v)); |
| 8150 | #else |
| 8151 | append(args, exp_atom_lng(sql->sa, v)); |
| 8152 | #endif |
| 8153 | f = find_func(sql, "scale_down" , args); |
| 8154 | nre = exp_op(sql->sa, args, f); |
| 8155 | e = exp_compare(sql->sa, le->l, nre, e->flag); |
| 8156 | } |
| 8157 | } |
| 8158 | } |
| 8159 | } |
| 8160 | } |
| 8161 | if (anti) set_anti(e); |
| 8162 | n->data = e; |
| 8163 | } |
| 8164 | } |
| 8165 | return rel; |
| 8166 | } |
| 8167 | |
| 8168 | static int |
| 8169 | is_identity_of(sql_exp *e, sql_rel *l) |
| 8170 | { |
| 8171 | if (e->type != e_cmp) |
| 8172 | return 0; |
| 8173 | if (!is_identity(e->l, l) || !is_identity(e->r, l)) |
| 8174 | return 0; |
| 8175 | return 1; |
| 8176 | } |
| 8177 | |
| 8178 | |
| 8179 | static sql_rel * |
| 8180 | rel_rewrite_semijoin(int *changes, mvc *sql, sql_rel *rel) |
| 8181 | { |
| 8182 | (void)sql; |
| 8183 | if (is_semi(rel->op)) { |
| 8184 | sql_rel *l = rel->l; |
| 8185 | sql_rel *r = rel->r; |
| 8186 | sql_rel *rl = (r->l)?r->l:NULL; |
| 8187 | int on_identity = 1; |
| 8188 | |
| 8189 | if (!rel->exps || list_length(rel->exps) != 1 || !is_identity_of(rel->exps->h->data, l)) |
| 8190 | on_identity = 0; |
| 8191 | |
| 8192 | /* rewrite {semi,anti}join (A, join(A,B)) into {semi,anti}join (A,B) |
| 8193 | * and {semi,anti}join (A, join(B,A)) into {semi,anti}join (A,B) |
| 8194 | * Where the semi/anti join is done using the identity */ |
| 8195 | if (on_identity && l->ref.refcnt == 2 && ((is_join(r->op) && (l == r->l || l == r->r)) || |
| 8196 | (is_project(r->op) && rl && is_join(rl->op) && (l == rl->l || l == rl->r)))){ |
| 8197 | sql_rel *or = r; |
| 8198 | |
| 8199 | if (is_project(r->op)) |
| 8200 | r = rl; |
| 8201 | |
| 8202 | if (l == r->r) |
| 8203 | rel->r = rel_dup(r->l); |
| 8204 | else |
| 8205 | rel->r = rel_dup(r->r); |
| 8206 | |
| 8207 | rel->exps = r->exps; |
| 8208 | r->exps = NULL; |
| 8209 | rel_destroy(or); |
| 8210 | (*changes)++; |
| 8211 | } |
| 8212 | } |
| 8213 | if (is_semi(rel->op)) { |
| 8214 | sql_rel *l = rel->l, *rl = NULL; |
| 8215 | sql_rel *r = rel->r, *or = r; |
| 8216 | |
| 8217 | if (r) |
| 8218 | rl = r->l; |
| 8219 | if (r && is_project(r->op)) { |
| 8220 | r = rl; |
| 8221 | if (r) |
| 8222 | rl = r->l; |
| 8223 | } |
| 8224 | |
| 8225 | /* More general case is (join reduction) |
| 8226 | {semi,anti}join (A, join(A,B) [A.c1 == B.c1]) [ A.c1 == B.c1 ] |
| 8227 | into {semi,anti}join (A,B) [ A.c1 == B.c1 ] |
| 8228 | |
| 8229 | for semijoin also A.c1 == B.k1 ] [ A.c1 == B.k2 ] could be rewriten |
| 8230 | */ |
| 8231 | if (l && r && rl && |
| 8232 | is_basetable(l->op) && is_basetable(rl->op) && |
| 8233 | is_join(r->op) && l->l == rl->l) |
| 8234 | { |
| 8235 | node *n, *m; |
| 8236 | list *exps; |
| 8237 | |
| 8238 | if (!rel->exps || !r->exps || |
| 8239 | list_length(rel->exps) != list_length(r->exps)) |
| 8240 | return rel; |
| 8241 | exps = new_exp_list(sql->sa); |
| 8242 | |
| 8243 | /* are the join conditions equal */ |
| 8244 | for (n = rel->exps->h, m = r->exps->h; |
| 8245 | n && m; n = n->next, m = m->next) |
| 8246 | { |
| 8247 | sql_exp *le = NULL, *oe = n->data; |
| 8248 | sql_exp *re = NULL, *ne = m->data; |
| 8249 | sql_column *cl; |
| 8250 | int equal = 0; |
| 8251 | |
| 8252 | if (oe->type != e_cmp || ne->type != e_cmp || |
| 8253 | oe->flag != cmp_equal || |
| 8254 | ne->flag != cmp_equal || is_anti(oe) || is_anti(ne)) |
| 8255 | return rel; |
| 8256 | |
| 8257 | if ((cl = exp_find_column(rel->l, oe->l, -2)) != NULL) { |
| 8258 | le = oe->l; |
| 8259 | re = oe->r; |
| 8260 | } else if ((cl = exp_find_column(rel->l, oe->r, -2)) != NULL) { |
| 8261 | le = oe->r; |
| 8262 | re = oe->l; |
| 8263 | } else |
| 8264 | return rel; |
| 8265 | |
| 8266 | if (exp_find_column(rl, ne->l, -2) == cl) { |
| 8267 | sql_exp *e = (or != r)?rel_find_exp(or, re):re; |
| 8268 | |
| 8269 | equal = exp_match_exp(ne->r, e); |
| 8270 | if (!equal) |
| 8271 | return rel; |
| 8272 | re = ne->r; |
| 8273 | } else if (exp_find_column(rl, ne->r, -2) == cl) { |
| 8274 | sql_exp *e = (or != r)?rel_find_exp(or, re):re; |
| 8275 | |
| 8276 | equal = exp_match_exp(ne->l, e); |
| 8277 | if (!equal) |
| 8278 | return rel; |
| 8279 | re = ne->l; |
| 8280 | } else |
| 8281 | return rel; |
| 8282 | |
| 8283 | ne = exp_compare(sql->sa, le, re, cmp_equal); |
| 8284 | append(exps, ne); |
| 8285 | } |
| 8286 | |
| 8287 | rel->r = rel_dup(r->r); |
| 8288 | rel->exps = exps; |
| 8289 | rel_destroy(or); |
| 8290 | (*changes)++; |
| 8291 | } |
| 8292 | } |
| 8293 | return rel; |
| 8294 | } |
| 8295 | |
| 8296 | /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */ |
| 8297 | static sql_rel * |
| 8298 | rel_rewrite_antijoin(int *changes, mvc *sql, sql_rel *rel) |
| 8299 | { |
| 8300 | if (rel->op == op_anti) { |
| 8301 | sql_rel *l = rel->l; |
| 8302 | sql_rel *r = rel->r; |
| 8303 | |
| 8304 | if (l && !rel_is_ref(l) && |
| 8305 | r && !rel_is_ref(r) && is_union(r->op)) { |
| 8306 | sql_rel *rl = rel_dup(r->l), *nl; |
| 8307 | sql_rel *rr = rel_dup(r->r); |
| 8308 | |
| 8309 | if (!is_project(rl->op)) |
| 8310 | rl = rel_project(sql->sa, rl, |
| 8311 | rel_projections(sql, rl, NULL, 1, 1)); |
| 8312 | if (!is_project(rr->op)) |
| 8313 | rr = rel_project(sql->sa, rr, |
| 8314 | rel_projections(sql, rr, NULL, 1, 1)); |
| 8315 | rel_rename_exps(sql, r->exps, rl->exps); |
| 8316 | rel_rename_exps(sql, r->exps, rr->exps); |
| 8317 | |
| 8318 | nl = rel_crossproduct(sql->sa, rel->l, rl, op_anti); |
| 8319 | if (need_no_nil(rel)) |
| 8320 | set_no_nil(nl); |
| 8321 | nl->exps = exps_copy(sql, rel->exps); |
| 8322 | rel->l = nl; |
| 8323 | rel->r = rr; |
| 8324 | rel_destroy(r); |
| 8325 | (*changes)++; |
| 8326 | return rel; |
| 8327 | } |
| 8328 | } |
| 8329 | return rel; |
| 8330 | } |
| 8331 | |
| 8332 | static sql_rel * |
| 8333 | rel_semijoin_use_fk(int *changes, mvc *sql, sql_rel *rel) |
| 8334 | { |
| 8335 | (void)changes; |
| 8336 | if (is_semi(rel->op) && rel->exps) { |
| 8337 | list *exps = rel->exps; |
| 8338 | list *rels = new_rel_list(sql->sa); |
| 8339 | |
| 8340 | rel->exps = NULL; |
| 8341 | append(rels, rel->l); |
| 8342 | append(rels, rel->r); |
| 8343 | (void) find_fk( sql, rels, exps); |
| 8344 | |
| 8345 | rel->exps = exps; |
| 8346 | } |
| 8347 | return rel; |
| 8348 | } |
| 8349 | |
| 8350 | /* leftouterjoin(a,b)[ a.C op b.D or a.E op2 b.F ]) -> |
| 8351 | * union( |
| 8352 | * join(a,b)[ a.C op b.D or a.E op2 b. F ], |
| 8353 | * project( |
| 8354 | * antijoin(a,b) [a.C op b.D or a.E op2 b.F ]) |
| 8355 | * [ a.*, NULL * foreach column of b] |
| 8356 | * ) |
| 8357 | */ |
| 8358 | static int |
| 8359 | exps_nr_of_or(list *exps) |
| 8360 | { |
| 8361 | int ors = 0; |
| 8362 | node *n; |
| 8363 | |
| 8364 | if (!exps) |
| 8365 | return ors; |
| 8366 | for(n=exps->h; n; n = n->next) { |
| 8367 | sql_exp *e = n->data; |
| 8368 | |
| 8369 | if (e->type == e_cmp && e->flag == cmp_or) |
| 8370 | ors++; |
| 8371 | } |
| 8372 | return ors; |
| 8373 | } |
| 8374 | |
| 8375 | static void |
| 8376 | add_nulls(mvc *sql, sql_rel *rel, sql_rel *r) |
| 8377 | { |
| 8378 | list *exps; |
| 8379 | node *n; |
| 8380 | |
| 8381 | exps = rel_projections(sql, r, NULL, 1, 1); |
| 8382 | for(n = exps->h; n; n = n->next) { |
| 8383 | sql_exp *e = n->data, *ne; |
| 8384 | |
| 8385 | ne = exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(e), NULL)); |
| 8386 | exp_setname(sql->sa, ne, exp_relname(e), exp_name(e)); |
| 8387 | append(rel->exps, ne); |
| 8388 | } |
| 8389 | } |
| 8390 | |
| 8391 | static sql_rel * |
| 8392 | rel_split_outerjoin(int *changes, mvc *sql, sql_rel *rel) |
| 8393 | { |
| 8394 | if ((rel->op == op_left || rel->op == op_right || rel->op == op_full) && |
| 8395 | list_length(rel->exps) == 1 && exps_nr_of_or(rel->exps) == list_length(rel->exps)) { |
| 8396 | sql_rel *l = rel->l, *nl, *nll, *nlr; |
| 8397 | sql_rel *r = rel->r, *nr; |
| 8398 | sql_exp *e; |
| 8399 | list *exps; |
| 8400 | |
| 8401 | nll = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_join); |
| 8402 | nlr = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_join); |
| 8403 | |
| 8404 | /* TODO find or exp, ie handle rest with extra joins */ |
| 8405 | /* expect only a single or expr for now */ |
| 8406 | assert(list_length(rel->exps) == 1); |
| 8407 | e = rel->exps->h->data; |
| 8408 | nll->exps = exps_copy(sql, e->l); |
| 8409 | nlr->exps = exps_copy(sql, e->r); |
| 8410 | nl = rel_or( sql, NULL, nll, nlr, NULL, NULL, NULL); |
| 8411 | |
| 8412 | if (rel->op == op_left || rel->op == op_full) { |
| 8413 | /* split in 2 anti joins */ |
| 8414 | nr = rel_crossproduct(sql->sa, rel_dup(l), rel_dup(r), op_anti); |
| 8415 | nr->exps = exps_copy(sql, e->l); |
| 8416 | nr = rel_crossproduct(sql->sa, nr, rel_dup(r), op_anti); |
| 8417 | nr->exps = exps_copy(sql, e->r); |
| 8418 | |
| 8419 | /* project left */ |
| 8420 | nr = rel_project(sql->sa, nr, |
| 8421 | rel_projections(sql, l, NULL, 1, 1)); |
| 8422 | /* add null's for right */ |
| 8423 | add_nulls( sql, nr, r); |
| 8424 | exps = rel_projections(sql, nl, NULL, 1, 1); |
| 8425 | nl = rel_setop(sql->sa, nl, nr, op_union); |
| 8426 | nl->exps = exps; |
| 8427 | set_processed(nl); |
| 8428 | } |
| 8429 | if (rel->op == op_right || rel->op == op_full) { |
| 8430 | /* split in 2 anti joins */ |
| 8431 | nr = rel_crossproduct(sql->sa, rel_dup(r), rel_dup(l), op_anti); |
| 8432 | nr->exps = exps_copy(sql, e->l); |
| 8433 | nr = rel_crossproduct(sql->sa, nr, rel_dup(l), op_anti); |
| 8434 | nr->exps = exps_copy(sql, e->r); |
| 8435 | |
| 8436 | nr = rel_project(sql->sa, nr, sa_list(sql->sa)); |
| 8437 | /* add null's for left */ |
| 8438 | add_nulls( sql, nr, l); |
| 8439 | /* project right */ |
| 8440 | nr->exps = list_merge(nr->exps, |
| 8441 | rel_projections(sql, r, NULL, 1, 1), |
| 8442 | (fdup)NULL); |
| 8443 | exps = rel_projections(sql, nl, NULL, 1, 1); |
| 8444 | nl = rel_setop(sql->sa, nl, nr, op_union); |
| 8445 | nl->exps = exps; |
| 8446 | set_processed(nl); |
| 8447 | } |
| 8448 | |
| 8449 | rel_destroy(rel); |
| 8450 | *changes = 1; |
| 8451 | rel = nl; |
| 8452 | } |
| 8453 | return rel; |
| 8454 | } |
| 8455 | |
| 8456 | /* rewrite sqltype into backend types */ |
| 8457 | static sql_rel * |
| 8458 | rel_rewrite_types(int *changes, mvc *sql, sql_rel *rel) |
| 8459 | { |
| 8460 | (void)sql; |
| 8461 | (void)changes; |
| 8462 | return rel; |
| 8463 | } |
| 8464 | |
| 8465 | static sql_exp * |
| 8466 | exp_indexcol(mvc *sql, sql_exp *e, const char *tname, const char *cname, int de, bit unique) |
| 8467 | { |
| 8468 | sql_subtype *rt = sql_bind_localtype(de==1?"bte" :de==2?"sht" :"int" ); |
| 8469 | sql_exp *u = exp_atom_bool(sql->sa, unique); |
| 8470 | sql_subfunc *f = sql_bind_func_result(sql->sa, mvc_bind_schema(sql,"sys" ), "index" , exp_subtype(e), exp_subtype(u), rt); |
| 8471 | |
| 8472 | e = exp_binop(sql->sa, e, u, f); |
| 8473 | exp_setname(sql->sa, e, tname, cname); |
| 8474 | return e; |
| 8475 | } |
| 8476 | |
| 8477 | static sql_exp * |
| 8478 | exp_stringscol(mvc *sql, sql_exp *e, const char *tname, const char *cname) |
| 8479 | { |
| 8480 | sql_subfunc *f = sql_bind_func(sql->sa, mvc_bind_schema(sql,"sys" ), "strings" , exp_subtype(e), NULL, F_FUNC); |
| 8481 | |
| 8482 | e = exp_unop(sql->sa, e, f); |
| 8483 | exp_setname(sql->sa, e, tname, cname); |
| 8484 | return e; |
| 8485 | } |
| 8486 | |
| 8487 | static sql_rel * |
| 8488 | rel_dicttable(mvc *sql, sql_column *c, const char *tname, int de) |
| 8489 | { |
| 8490 | sql_rel *rel = rel_create(sql->sa); |
| 8491 | sql_exp *e, *ie; |
| 8492 | int nr = 0; |
| 8493 | char name[16], *nme; |
| 8494 | if(!rel) |
| 8495 | return NULL; |
| 8496 | |
| 8497 | e = exp_column(sql->sa, tname, c->base.name, &c->type, CARD_MULTI, c->null, 0); |
| 8498 | rel->l = NULL; |
| 8499 | rel->r = c; |
| 8500 | rel->op = op_basetable; |
| 8501 | rel->exps = new_exp_list(sql->sa); |
| 8502 | |
| 8503 | ie = exp_indexcol(sql, e, tname, c->base.name, de, 1); |
| 8504 | nr = ++sql->label; |
| 8505 | nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr)); |
| 8506 | exp_setname(sql->sa, ie, nme, nme); |
| 8507 | append(rel->exps, ie); |
| 8508 | |
| 8509 | ie = exp_stringscol(sql, e, tname, c->base.name); |
| 8510 | nr = ++sql->label; |
| 8511 | nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr)); |
| 8512 | exp_setname(sql->sa, ie, nme, nme); |
| 8513 | append(rel->exps, ie); |
| 8514 | e->p = prop_create(sql->sa, PROP_HASHCOL, e->p); |
| 8515 | |
| 8516 | rel->card = CARD_MULTI; |
| 8517 | rel->nrcols = 2; |
| 8518 | return rel; |
| 8519 | } |
| 8520 | |
| 8521 | /* rewrite merge tables into union of base tables and call optimizer again */ |
| 8522 | static sql_rel * |
| 8523 | rel_add_dicts(int *changes, mvc *sql, sql_rel *rel) |
| 8524 | { |
| 8525 | if (is_basetable(rel->op) && rel->l) { |
| 8526 | node *n; |
| 8527 | sql_table *t = rel->l; |
| 8528 | list *l = sa_list(sql->sa), *vcols = NULL, *pexps = sa_list(sql->sa); |
| 8529 | |
| 8530 | for (n = rel->exps->h; n; n = n->next) { |
| 8531 | sql_exp *e = n->data, *ne = NULL; |
| 8532 | const char *rname = exp_relname(e)?exp_relname(e):e->l; |
| 8533 | const char *oname = e->r; |
| 8534 | int de; |
| 8535 | |
| 8536 | if (!is_func(e->type) && oname[0] != '%') { |
| 8537 | sql_column *c = find_sql_column(t, oname); |
| 8538 | |
| 8539 | if (EC_VARCHAR(c->type.type->eclass) && (de = store_funcs.double_elim_col(sql->session->tr, c)) != 0) { |
| 8540 | int nr = ++sql->label; |
| 8541 | char name[16], *nme; |
| 8542 | sql_rel *vt = rel_dicttable(sql, c, rname, de); |
| 8543 | |
| 8544 | nme = sa_strdup(sql->sa, number2name(name, sizeof(name), nr)); |
| 8545 | if (!vcols) |
| 8546 | vcols = sa_list(sql->sa); |
| 8547 | append(vcols, vt); |
| 8548 | e = exp_indexcol(sql, e, nme, nme, de, 0); |
| 8549 | ne = exp_ref(sql->sa, e); |
| 8550 | append(vcols, ne); |
| 8551 | append(vcols, n->data); |
| 8552 | (*changes)++; |
| 8553 | } |
| 8554 | } |
| 8555 | list_append(l, e); |
| 8556 | if (!ne) |
| 8557 | list_append(pexps, e); |
| 8558 | } |
| 8559 | rel->exps = l; |
| 8560 | |
| 8561 | /* add joins for double_eliminated (large) columns */ |
| 8562 | if (vcols) { |
| 8563 | node *n; |
| 8564 | |
| 8565 | for(n = vcols->h; n; n = n->next->next->next) { |
| 8566 | sql_rel *vt = n->data; |
| 8567 | sql_exp *ic = n->next->data, *vti = NULL, *vtv; |
| 8568 | sql_exp *c = n->next->next->data, *cmp; |
| 8569 | const char *rname = exp_relname(c)?exp_relname(c):c->l; |
| 8570 | const char *oname = c->r; |
| 8571 | |
| 8572 | rel = rel_crossproduct(sql->sa, rel, vt, op_join); |
| 8573 | vti = vt->exps->h->data; |
| 8574 | vtv = vt->exps->h->next->data; |
| 8575 | vti = exp_ref(sql->sa, vti); |
| 8576 | cmp = exp_compare(sql->sa, ic, vti, cmp_equal); |
| 8577 | cmp->p = prop_create(sql->sa, PROP_FETCH, cmp->p); |
| 8578 | rel_join_add_exp( sql->sa, rel, cmp); |
| 8579 | |
| 8580 | vtv = exp_ref(sql->sa, vtv); |
| 8581 | exp_setname(sql->sa, vtv, rname, oname); |
| 8582 | append(pexps, vtv); |
| 8583 | } |
| 8584 | rel = rel_project(sql->sa, rel, pexps); |
| 8585 | } |
| 8586 | } |
| 8587 | return rel; |
| 8588 | } |
| 8589 | |
| 8590 | static int |
| 8591 | find_col_exp( list *exps, sql_exp *e) |
| 8592 | { |
| 8593 | node *n; |
| 8594 | int nr = 0; |
| 8595 | |
| 8596 | for (n=exps->h; n; n=n->next, nr++){ |
| 8597 | if (n->data == e) |
| 8598 | return nr; |
| 8599 | } |
| 8600 | return -1; |
| 8601 | } |
| 8602 | |
| 8603 | static int |
| 8604 | exp_range_overlap( mvc *sql, sql_exp *e, char *min, char *max, atom *emin, atom *emax) |
| 8605 | { |
| 8606 | sql_subtype *t = exp_subtype(e); |
| 8607 | |
| 8608 | if (!min || !max || !emin || !emax) |
| 8609 | return 0; |
| 8610 | |
| 8611 | if (GDK_STRNIL(min)) |
| 8612 | return 0; |
| 8613 | if (GDK_STRNIL(max)) |
| 8614 | return 0; |
| 8615 | |
| 8616 | if (t->type->localtype == TYPE_dbl) { |
| 8617 | atom *cmin = atom_general(sql->sa, t, min); |
| 8618 | atom *cmax = atom_general(sql->sa, t, max); |
| 8619 | |
| 8620 | if (emax->d < cmin->data.val.dval || emin->d > cmax->data.val.dval) |
| 8621 | return 0; |
| 8622 | } |
| 8623 | if (t->type->localtype == TYPE_bte) { |
| 8624 | atom *cmin = atom_general(sql->sa, t, min); |
| 8625 | atom *cmax = atom_general(sql->sa, t, max); |
| 8626 | |
| 8627 | if (emax->data.val.btval < cmin->data.val.btval || emin->data.val.btval > cmax->data.val.btval) |
| 8628 | return 0; |
| 8629 | } |
| 8630 | if (t->type->localtype == TYPE_sht) { |
| 8631 | atom *cmin = atom_general(sql->sa, t, min); |
| 8632 | atom *cmax = atom_general(sql->sa, t, max); |
| 8633 | |
| 8634 | if (emax->data.val.shval < cmin->data.val.shval || emin->data.val.shval > cmax->data.val.shval) |
| 8635 | return 0; |
| 8636 | } |
| 8637 | if (t->type->localtype == TYPE_int || t->type->localtype == TYPE_date) { |
| 8638 | atom *cmin = atom_general(sql->sa, t, min); |
| 8639 | atom *cmax = atom_general(sql->sa, t, max); |
| 8640 | |
| 8641 | if (emax->data.val.ival < cmin->data.val.ival || emin->data.val.ival > cmax->data.val.ival) |
| 8642 | return 0; |
| 8643 | } |
| 8644 | if (t->type->localtype == TYPE_lng || t->type->localtype == TYPE_timestamp) { |
| 8645 | atom *cmin = atom_general(sql->sa, t, min); |
| 8646 | atom *cmax = atom_general(sql->sa, t, max); |
| 8647 | |
| 8648 | if (emax->data.val.lval < cmin->data.val.lval || emin->data.val.lval > cmax->data.val.lval) |
| 8649 | return 0; |
| 8650 | } |
| 8651 | return 1; |
| 8652 | } |
| 8653 | |
| 8654 | static sql_rel * |
| 8655 | rel_rename_part(mvc *sql, sql_rel *p, char *tname, sql_table *mt) |
| 8656 | { |
| 8657 | node *n, *m; |
| 8658 | |
| 8659 | assert(list_length(p->exps) >= list_length(mt->columns.set)); |
| 8660 | for( n = p->exps->h, m = mt->columns.set->h; n && m; n = n->next, m = m->next) { |
| 8661 | sql_exp *ne = n->data; |
| 8662 | sql_column *c = m->data; |
| 8663 | |
| 8664 | exp_setname(sql->sa, ne, tname, c->base.name); |
| 8665 | } |
| 8666 | if (n) /* skip TID */ |
| 8667 | n = n->next; |
| 8668 | if (mt->idxs.set) { |
| 8669 | /* also possible index name mismatches */ |
| 8670 | for( m = mt->idxs.set->h; n && m; m = m->next) { |
| 8671 | sql_exp *ne = n->data; |
| 8672 | sql_idx *i = m->data; |
| 8673 | char *iname = NULL; |
| 8674 | |
| 8675 | if (hash_index(i->type) && list_length(i->columns) <= 1) |
| 8676 | continue; |
| 8677 | |
| 8678 | iname = sa_strconcat( sql->sa, "%" , i->base.name); |
| 8679 | exp_setname(sql->sa, ne, tname, iname); |
| 8680 | n = n->next; |
| 8681 | } |
| 8682 | } |
| 8683 | return p; |
| 8684 | } |
| 8685 | |
| 8686 | /* rewrite merge tables into union of base tables and call optimizer again */ |
| 8687 | static sql_rel * |
| 8688 | rel_merge_table_rewrite(int *changes, mvc *sql, sql_rel *rel) |
| 8689 | { |
| 8690 | sql_rel *sel = NULL; |
| 8691 | |
| 8692 | if(is_modify(rel->op)) { |
| 8693 | sql_query *query = query_create(sql); |
| 8694 | return rel_propagate(query, rel, changes); |
| 8695 | } else { |
| 8696 | if (is_select(rel->op) && rel->l) { |
| 8697 | sel = rel; |
| 8698 | rel = rel->l; |
| 8699 | } |
| 8700 | if (is_basetable(rel->op) && rel->l) { |
| 8701 | sql_table *t = rel->l; |
| 8702 | |
| 8703 | if (isMergeTable(t)) { |
| 8704 | /* instantiate merge table */ |
| 8705 | sql_rel *nrel = NULL; |
| 8706 | char *tname = t->base.name; |
| 8707 | list *cols = NULL, *low = NULL, *high = NULL; |
| 8708 | |
| 8709 | if (list_empty(t->members.set)) |
| 8710 | return rel; |
| 8711 | if (sel) { |
| 8712 | node *n; |
| 8713 | |
| 8714 | /* no need to reduce the tables list */ |
| 8715 | if (list_length(t->members.set) <= 1) |
| 8716 | return sel; |
| 8717 | |
| 8718 | cols = sa_list(sql->sa); |
| 8719 | low = sa_list(sql->sa); |
| 8720 | high = sa_list(sql->sa); |
| 8721 | for(n = sel->exps->h; n; n = n->next) { |
| 8722 | sql_exp *e = n->data; |
| 8723 | atom *lval = NULL, *hval = NULL; |
| 8724 | |
| 8725 | if (e->type == e_cmp && (e->flag == cmp_equal || e->f )) { |
| 8726 | sql_exp *l = e->r; |
| 8727 | sql_exp *h = e->f; |
| 8728 | sql_exp *c = e->l; |
| 8729 | |
| 8730 | c = rel_find_exp(rel, c); |
| 8731 | lval = exp_flatten(sql, l); |
| 8732 | if (!h) |
| 8733 | hval = lval; |
| 8734 | else if (h) |
| 8735 | hval = exp_flatten(sql, h); |
| 8736 | if (c && lval && hval) { |
| 8737 | append(cols, c); |
| 8738 | append(low, lval); |
| 8739 | append(high, hval); |
| 8740 | } |
| 8741 | } |
| 8742 | /* handle in lists */ |
| 8743 | if (e->type == e_cmp && e->flag == cmp_in) { |
| 8744 | list *vals = e->r; |
| 8745 | sql_exp *c = e->l; |
| 8746 | node *n; |
| 8747 | list *vlist = sa_list(sql->sa); |
| 8748 | |
| 8749 | c = rel_find_exp(rel, c); |
| 8750 | if (c) { |
| 8751 | for ( n = vals->h; n; n = n->next) { |
| 8752 | sql_exp *l = n->data; |
| 8753 | atom *lval = exp_flatten(sql, l); |
| 8754 | |
| 8755 | if (!lval) |
| 8756 | break; |
| 8757 | append(vlist, lval); |
| 8758 | } |
| 8759 | if (!n) { |
| 8760 | append(cols, c); |
| 8761 | append(low, NULL); /* mark high as value list */ |
| 8762 | append(high, vlist); |
| 8763 | } |
| 8764 | } |
| 8765 | } |
| 8766 | } |
| 8767 | } |
| 8768 | (*changes)++; |
| 8769 | if (t->members.set) { |
| 8770 | list *tables = sa_list(sql->sa); |
| 8771 | node *nt; |
| 8772 | int *pos = NULL, nr = list_length(rel->exps), first = 1; |
| 8773 | |
| 8774 | /* rename (mostly the idxs) */ |
| 8775 | pos = SA_NEW_ARRAY(sql->sa, int, nr); |
| 8776 | memset(pos, 0, sizeof(int)*nr); |
| 8777 | for (nt = t->members.set->h; nt; nt = nt->next) { |
| 8778 | sql_part *pd = nt->data; |
| 8779 | sql_table *pt = find_sql_table(t->s, pd->base.name); |
| 8780 | sql_rel *prel = rel_basetable(sql, pt, tname); |
| 8781 | node *n; |
| 8782 | int skip = 0, j; |
| 8783 | list *exps = NULL; |
| 8784 | |
| 8785 | /* do not include empty partitions */ |
| 8786 | if ((nrel || nt->next) && |
| 8787 | pt && isTable(pt) && pt->access == TABLE_READONLY && !store_funcs.count_col(sql->session->tr, pt->columns.set->h->data, 1)){ |
| 8788 | continue; |
| 8789 | } |
| 8790 | |
| 8791 | prel = rel_rename_part(sql, prel, tname, t); |
| 8792 | |
| 8793 | MT_lock_set(&prel->exps->ht_lock); |
| 8794 | prel->exps->ht = NULL; |
| 8795 | MT_lock_unset(&prel->exps->ht_lock); |
| 8796 | exps = sa_list(sql->sa); |
| 8797 | for (n = rel->exps->h, j=0; n && (!skip || first); n = n->next, j++) { |
| 8798 | sql_exp *e = n->data, *ne = NULL; |
| 8799 | int i; |
| 8800 | |
| 8801 | if (e) |
| 8802 | ne = exps_bind_column2(prel->exps, e->l, e->r); |
| 8803 | if (!e || !ne) { |
| 8804 | (*changes)--; |
| 8805 | assert(0); |
| 8806 | return rel; |
| 8807 | } |
| 8808 | if (pt && isTable(pt) && pt->access == TABLE_READONLY && sel && (nrel || nt->next) && |
| 8809 | ((first && (i=find_col_exp(cols, e)) != -1) || |
| 8810 | (!first && pos[j] > 0))) { |
| 8811 | /* check if the part falls within the bounds of the select expression else skip this (keep at least on part-table) */ |
| 8812 | char *min, *max; |
| 8813 | sql_column *col = NULL; |
| 8814 | sql_rel *bt = NULL; |
| 8815 | |
| 8816 | if (first) |
| 8817 | pos[j] = i + 1; |
| 8818 | i = pos[j] - 1; |
| 8819 | col = name_find_column(prel, e->l, e->r, -2, &bt); |
| 8820 | assert(col); |
| 8821 | if (sql_trans_ranges(sql->session->tr, col, &min, &max)) { |
| 8822 | atom *lval = list_fetch(low,i); |
| 8823 | atom *hval = list_fetch(high,i); |
| 8824 | |
| 8825 | if (lval && !exp_range_overlap(sql, e, min, max, lval, hval)) |
| 8826 | skip = 1; |
| 8827 | else if (!lval) { |
| 8828 | node *n; |
| 8829 | list *l = list_fetch(high,i); |
| 8830 | |
| 8831 | skip = 1; |
| 8832 | for (n = l->h; n && skip; n = n->next) { |
| 8833 | hval = lval = n->data; |
| 8834 | |
| 8835 | if (exp_range_overlap(sql, e, min, max, lval, hval)) |
| 8836 | skip = 0; |
| 8837 | } |
| 8838 | } |
| 8839 | } |
| 8840 | } |
| 8841 | assert(e->type == e_column); |
| 8842 | exp_setname(sql->sa, ne, e->l, e->r); |
| 8843 | append(exps, ne); |
| 8844 | } |
| 8845 | prel->exps = exps; |
| 8846 | first = 0; |
| 8847 | if (!skip) { |
| 8848 | append(tables, prel); |
| 8849 | nrel = prel; |
| 8850 | } else { |
| 8851 | sql->caching = 0; |
| 8852 | } |
| 8853 | } |
| 8854 | while (list_length(tables) > 1) { |
| 8855 | list *ntables = sa_list(sql->sa); |
| 8856 | node *n; |
| 8857 | |
| 8858 | for(n=tables->h; n && n->next; n = n->next->next) { |
| 8859 | sql_rel *l = n->data; |
| 8860 | sql_rel *r = n->next->data; |
| 8861 | nrel = rel_setop(sql->sa, l, r, op_union); |
| 8862 | nrel->exps = rel_projections(sql, rel, NULL, 1, 1); |
| 8863 | set_processed(nrel); |
| 8864 | append(ntables, nrel); |
| 8865 | } |
| 8866 | if (n) |
| 8867 | append(ntables, n->data); |
| 8868 | tables = ntables; |
| 8869 | } |
| 8870 | } |
| 8871 | if (nrel && list_length(t->members.set) == 1) { |
| 8872 | nrel = rel_project(sql->sa, nrel, rel->exps); |
| 8873 | } else if (nrel) |
| 8874 | nrel->exps = rel->exps; |
| 8875 | rel_destroy(rel); |
| 8876 | if (sel) { |
| 8877 | int changes = 0; |
| 8878 | sel->l = nrel; |
| 8879 | sel = rewrite_topdown(sql, sel, &rel_push_select_down_union, &changes); |
| 8880 | if (changes) |
| 8881 | sel = rewrite(sql, sel, &rel_push_project_up, &changes); |
| 8882 | return sel; |
| 8883 | } |
| 8884 | return nrel; |
| 8885 | } |
| 8886 | } |
| 8887 | } |
| 8888 | if (sel) |
| 8889 | return sel; |
| 8890 | return rel; |
| 8891 | } |
| 8892 | |
| 8893 | static sql_rel* |
| 8894 | exp_skip_output_parts(sql_rel *rel) |
| 8895 | { |
| 8896 | while ((is_topn(rel->op) || is_project(rel->op) || is_sample(rel->op)) && rel->l) { |
| 8897 | if (rel->op == op_groupby && list_empty(rel->r)) |
| 8898 | return rel; /* a group-by with no columns is a plain aggregate and hence always returns one row */ |
| 8899 | rel = rel->l; |
| 8900 | } |
| 8901 | return rel; |
| 8902 | } |
| 8903 | |
| 8904 | /* return true if the given expression is guaranteed to have no rows */ |
| 8905 | static int |
| 8906 | exp_is_zero_rows(mvc *sql, sql_rel *rel, sql_rel *sel) |
| 8907 | { |
| 8908 | sql_table *t; |
| 8909 | node *n; |
| 8910 | |
| 8911 | if (!rel) |
| 8912 | return 0; |
| 8913 | rel = exp_skip_output_parts(rel); |
| 8914 | if (is_select(rel->op) && rel->l) { |
| 8915 | sel = rel; |
| 8916 | rel = exp_skip_output_parts(rel->l); |
| 8917 | } |
| 8918 | if (!sel) |
| 8919 | return 0; |
| 8920 | if (rel->op == op_join) |
| 8921 | return exp_is_zero_rows(sql, rel->l, sel) || exp_is_zero_rows(sql, rel->r, sel); |
| 8922 | if (rel->op == op_left || is_semi(rel->op)) |
| 8923 | return exp_is_zero_rows(sql, rel->l, sel); |
| 8924 | if (rel->op == op_right) |
| 8925 | return exp_is_zero_rows(sql, rel->r, sel); |
| 8926 | if (!is_basetable(rel->op) || !rel->l) |
| 8927 | return 0; |
| 8928 | t = rel->l; |
| 8929 | if (!isTable(t) || t->access != TABLE_READONLY) |
| 8930 | return 0; |
| 8931 | |
| 8932 | if (sel->exps) for (n = sel->exps->h; n; n = n->next) { |
| 8933 | sql_exp *e = n->data; |
| 8934 | atom *lval = NULL, *hval = NULL; |
| 8935 | |
| 8936 | if (e->type == e_cmp && (e->flag == cmp_equal || e->f)) { /* half-ranges are theoretically optimizable here, but not implemented */ |
| 8937 | sql_exp *c = e->l; |
| 8938 | if (c->type == e_column) { |
| 8939 | sql_exp *l = e->r; |
| 8940 | sql_exp *h = e->f; |
| 8941 | |
| 8942 | lval = exp_flatten(sql, l); |
| 8943 | hval = h ? exp_flatten(sql, h) : lval; |
| 8944 | if (lval && hval) { |
| 8945 | sql_rel *bt; |
| 8946 | sql_column *col = name_find_column(sel, exp_relname(c), exp_name(c), -2, &bt); |
| 8947 | char *min, *max; |
| 8948 | if (col |
| 8949 | && col->t == t |
| 8950 | && sql_trans_ranges(sql->session->tr, col, &min, &max) |
| 8951 | && !exp_range_overlap(sql, c, min, max, lval, hval)) { |
| 8952 | return 1; |
| 8953 | } |
| 8954 | } |
| 8955 | } |
| 8956 | } |
| 8957 | } |
| 8958 | return 0; |
| 8959 | } |
| 8960 | |
| 8961 | /* discard sides of UNION or UNION ALL which cannot produce any rows, as per |
| 8962 | statistics, similarly to the merge table optimizer, e.g. |
| 8963 | select * from a where x between 1 and 2 union all select * from b where x between 1 and 2 |
| 8964 | -> select * from b where x between 1 and 2 [assuming a has no rows with 1<=x<=2] |
| 8965 | */ |
| 8966 | static sql_rel * |
| 8967 | rel_remove_union_partitions(int *changes, mvc *sql, sql_rel *rel) |
| 8968 | { |
| 8969 | if (!is_union(rel->op)) |
| 8970 | return rel; |
| 8971 | if (exp_is_zero_rows(sql, rel->l, NULL)) { |
| 8972 | sql_rel *r = rel->r; |
| 8973 | rel_rename_exps(sql, rel->exps, r->exps); |
| 8974 | rel->r = NULL; |
| 8975 | rel_destroy(rel); |
| 8976 | (*changes)++; |
| 8977 | sql->caching = 0; |
| 8978 | return r; |
| 8979 | } |
| 8980 | if (exp_is_zero_rows(sql, rel->r, NULL)) { |
| 8981 | sql_rel *l = rel->l; |
| 8982 | rel_rename_exps(sql, rel->exps, l->exps); |
| 8983 | rel->l = NULL; |
| 8984 | rel_destroy(rel); |
| 8985 | (*changes)++; |
| 8986 | sql->caching = 0; |
| 8987 | return l; |
| 8988 | } |
| 8989 | return rel; |
| 8990 | } |
| 8991 | |
| 8992 | static sql_exp * |
| 8993 | rewrite_exp(mvc *sql, sql_exp *e, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes) |
| 8994 | { |
| 8995 | if (e->type != e_psm) |
| 8996 | return e; |
| 8997 | if (e->flag & PSM_VAR) |
| 8998 | return e; |
| 8999 | if (e->flag & PSM_SET || e->flag & PSM_RETURN) { |
| 9000 | e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes); |
| 9001 | } |
| 9002 | if (e->flag & PSM_WHILE || e->flag & PSM_IF) { |
| 9003 | e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes); |
| 9004 | e->r = rewrite_exps(sql, e->r, rewrite_rel, rewriter, has_changes); |
| 9005 | if (e->f) |
| 9006 | e->f = rewrite_exps(sql, e->f, rewrite_rel, rewriter, has_changes); |
| 9007 | return e; |
| 9008 | } |
| 9009 | if (e->flag & PSM_REL) |
| 9010 | e->l = rewrite_rel(sql, e->l, rewriter, has_changes); |
| 9011 | if (e->flag & PSM_EXCEPTION) |
| 9012 | e->l = rewrite_exp(sql, e->l, rewrite_rel, rewriter, has_changes); |
| 9013 | return e; |
| 9014 | } |
| 9015 | |
| 9016 | static list * |
| 9017 | rewrite_exps(mvc *sql, list *l, rewrite_rel_fptr rewrite_rel, rewrite_fptr rewriter, int *has_changes) |
| 9018 | { |
| 9019 | node *n; |
| 9020 | |
| 9021 | if (!l) |
| 9022 | return l; |
| 9023 | for(n = l->h; n; n = n->next) |
| 9024 | n->data = rewrite_exp(sql, n->data, rewrite_rel, rewriter, has_changes); |
| 9025 | return l; |
| 9026 | } |
| 9027 | |
| 9028 | |
| 9029 | static sql_rel * |
| 9030 | rewrite(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes) |
| 9031 | { |
| 9032 | int changes = 0; |
| 9033 | |
| 9034 | if (!rel) |
| 9035 | return rel; |
| 9036 | |
| 9037 | switch (rel->op) { |
| 9038 | case op_basetable: |
| 9039 | case op_table: |
| 9040 | break; |
| 9041 | case op_join: |
| 9042 | case op_left: |
| 9043 | case op_right: |
| 9044 | case op_full: |
| 9045 | |
| 9046 | case op_semi: |
| 9047 | case op_anti: |
| 9048 | |
| 9049 | case op_union: |
| 9050 | case op_inter: |
| 9051 | case op_except: |
| 9052 | rel->l = rewrite(sql, rel->l, rewriter, has_changes); |
| 9053 | rel->r = rewrite(sql, rel->r, rewriter, has_changes); |
| 9054 | break; |
| 9055 | case op_project: |
| 9056 | case op_select: |
| 9057 | case op_groupby: |
| 9058 | case op_topn: |
| 9059 | case op_sample: |
| 9060 | rel->l = rewrite(sql, rel->l, rewriter, has_changes); |
| 9061 | break; |
| 9062 | case op_ddl: |
| 9063 | if (rel->flag == ddl_psm && rel->exps) |
| 9064 | rel->exps = rewrite_exps(sql, rel->exps, &rewrite, rewriter, has_changes); |
| 9065 | rel->l = rewrite(sql, rel->l, rewriter, has_changes); |
| 9066 | if (rel->r) |
| 9067 | rel->r = rewrite(sql, rel->r, rewriter, has_changes); |
| 9068 | break; |
| 9069 | case op_insert: |
| 9070 | case op_update: |
| 9071 | case op_delete: |
| 9072 | case op_truncate: |
| 9073 | rel->l = rewrite(sql, rel->l, rewriter, has_changes); |
| 9074 | rel->r = rewrite(sql, rel->r, rewriter, has_changes); |
| 9075 | break; |
| 9076 | } |
| 9077 | rel = rewriter(&changes, sql, rel); |
| 9078 | if (changes) { |
| 9079 | (*has_changes)++; |
| 9080 | return rewrite(sql, rel, rewriter, has_changes); |
| 9081 | } |
| 9082 | return rel; |
| 9083 | } |
| 9084 | |
| 9085 | static sql_rel * |
| 9086 | rewrite_topdown(mvc *sql, sql_rel *rel, rewrite_fptr rewriter, int *has_changes) |
| 9087 | { |
| 9088 | if (!rel) |
| 9089 | return rel; |
| 9090 | |
| 9091 | rel = rewriter(has_changes, sql, rel); |
| 9092 | if (!rel) |
| 9093 | return rel; |
| 9094 | |
| 9095 | switch (rel->op) { |
| 9096 | case op_basetable: |
| 9097 | case op_table: |
| 9098 | if (rel->op == op_table && rel->l && rel->flag != 2) |
| 9099 | rel->l = rewrite(sql, rel->l, rewriter, has_changes); |
| 9100 | if (rel->op == op_table && rel->l && rel->flag != 2) |
| 9101 | rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes); |
| 9102 | break; |
| 9103 | case op_join: |
| 9104 | case op_left: |
| 9105 | case op_right: |
| 9106 | case op_full: |
| 9107 | |
| 9108 | case op_semi: |
| 9109 | case op_anti: |
| 9110 | |
| 9111 | case op_union: |
| 9112 | case op_inter: |
| 9113 | case op_except: |
| 9114 | rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes); |
| 9115 | rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes); |
| 9116 | break; |
| 9117 | case op_project: |
| 9118 | case op_select: |
| 9119 | case op_groupby: |
| 9120 | case op_topn: |
| 9121 | case op_sample: |
| 9122 | rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes); |
| 9123 | break; |
| 9124 | case op_ddl: |
| 9125 | if (rel->flag == ddl_psm && rel->exps) |
| 9126 | rewrite_exps(sql, rel->exps, &rewrite_topdown, rewriter, has_changes); |
| 9127 | rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes); |
| 9128 | if (rel->r) |
| 9129 | rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes); |
| 9130 | break; |
| 9131 | case op_insert: |
| 9132 | case op_update: |
| 9133 | case op_delete: |
| 9134 | case op_truncate: |
| 9135 | rel->l = rewrite_topdown(sql, rel->l, rewriter, has_changes); |
| 9136 | rel->r = rewrite_topdown(sql, rel->r, rewriter, has_changes); |
| 9137 | break; |
| 9138 | } |
| 9139 | return rel; |
| 9140 | } |
| 9141 | |
| 9142 | static sql_rel * |
| 9143 | optimize_rel(mvc *sql, sql_rel *rel, int *g_changes, int level, int value_based_opt) |
| 9144 | { |
| 9145 | int changes = 0, e_changes = 0; |
| 9146 | global_props gp; |
| 9147 | |
| 9148 | gp = (global_props) {.cnt = {0},}; |
| 9149 | rel_properties(sql, &gp, rel); |
| 9150 | |
| 9151 | #ifdef DEBUG |
| 9152 | { |
| 9153 | int i; |
| 9154 | for (i = 0; i < ddl_maxops; i++) { |
| 9155 | if (gp.cnt[i]> 0) |
| 9156 | printf("%s %d\n" , op2string((operator_type)i), gp.cnt[i]); |
| 9157 | } |
| 9158 | } |
| 9159 | #endif |
| 9160 | if (level <= 0 && gp.cnt[op_select]) |
| 9161 | rel = rel_split_select(&changes, sql, rel, 1); |
| 9162 | |
| 9163 | /* simple merging of projects */ |
| 9164 | if (gp.cnt[op_project] || gp.cnt[op_groupby] || gp.cnt[op_ddl]) { |
| 9165 | rel = rewrite(sql, rel, &rel_merge_projects, &changes); |
| 9166 | |
| 9167 | /* push (simple renaming) projections up */ |
| 9168 | if (gp.cnt[op_project]) |
| 9169 | rel = rewrite(sql, rel, &rel_push_project_up, &changes); |
| 9170 | if (level <= 0 && (gp.cnt[op_project] || gp.cnt[op_groupby])) |
| 9171 | rel = rel_split_project(&changes, sql, rel, 1); |
| 9172 | |
| 9173 | if (level <= 0) { |
| 9174 | rel = rel_case_fixup(&changes, sql, rel, 1); |
| 9175 | if (value_based_opt) |
| 9176 | rel = rewrite(sql, rel, &rel_simplify_math, &changes); |
| 9177 | rel = rewrite(sql, rel, &rel_distinct_aggregate_on_unique_values, &changes); |
| 9178 | rel = rewrite(sql, rel, &rel_distinct_project2groupby, &changes); |
| 9179 | } |
| 9180 | } |
| 9181 | |
| 9182 | if ((gp.cnt[op_select] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || |
| 9183 | gp.cnt[op_join] || gp.cnt[op_semi] || gp.cnt[op_anti]) && level <= 0) |
| 9184 | if (value_based_opt) |
| 9185 | rel = rewrite(sql, rel, &rel_simplify_predicates, &changes); |
| 9186 | |
| 9187 | /* join's/crossproducts between a relation and a constant (row). |
| 9188 | * could be rewritten |
| 9189 | * |
| 9190 | * also joins between a relation and a DICT (which isn't used) |
| 9191 | * could be removed. |
| 9192 | * */ |
| 9193 | if (gp.cnt[op_join] && gp.cnt[op_project] && /* DISABLES CODE */ (0)) |
| 9194 | rel = rewrite(sql, rel, &rel_remove_join, &changes); |
| 9195 | |
| 9196 | if (gp.cnt[op_join] || |
| 9197 | gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || |
| 9198 | gp.cnt[op_semi] || gp.cnt[op_anti] || |
| 9199 | gp.cnt[op_select]) { |
| 9200 | rel = rewrite(sql, rel, &rel_find_range, &changes); |
| 9201 | if (value_based_opt) { |
| 9202 | rel = rel_project_reduce_casts(&changes, sql, rel); |
| 9203 | rel = rewrite(sql, rel, &rel_reduce_casts, &changes); |
| 9204 | } |
| 9205 | } |
| 9206 | |
| 9207 | if (gp.cnt[op_union]) |
| 9208 | rel = rewrite(sql, rel, &rel_merge_union, &changes); |
| 9209 | |
| 9210 | if (gp.cnt[op_select] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || |
| 9211 | gp.cnt[op_anti] || gp.cnt[op_join] || gp.cnt[op_semi]) |
| 9212 | rel = rewrite(sql, rel, &rel_select_cse, &changes); |
| 9213 | |
| 9214 | if (gp.cnt[op_project]) |
| 9215 | rel = rewrite(sql, rel, &rel_project_cse, &changes); |
| 9216 | |
| 9217 | rel = rewrite(sql, rel, &rel_rewrite_types, &changes); |
| 9218 | |
| 9219 | if ((gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full]) && /* DISABLES CODE */ (0)) |
| 9220 | rel = rewrite_topdown(sql, rel, &rel_split_outerjoin, &changes); |
| 9221 | |
| 9222 | if (gp.cnt[op_select] || gp.cnt[op_project]) |
| 9223 | if (level == 1) /* only once */ |
| 9224 | rel = rewrite(sql, rel, &rel_merge_rse, &changes); |
| 9225 | |
| 9226 | if (gp.cnt[op_select] && gp.cnt[op_join] && /* DISABLES CODE */ (0)) |
| 9227 | rel = rewrite_topdown(sql, rel, &rel_push_select_down_join, &changes); |
| 9228 | |
| 9229 | if (gp.cnt[op_select]) |
| 9230 | rel = rewrite_topdown(sql, rel, &rel_push_select_down_union, &changes); |
| 9231 | |
| 9232 | if (gp.cnt[op_union] && gp.cnt[op_select]) |
| 9233 | rel = rewrite(sql, rel, &rel_remove_union_partitions, &changes); |
| 9234 | |
| 9235 | if (gp.cnt[op_select]) |
| 9236 | rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); |
| 9237 | |
| 9238 | if (gp.cnt[op_groupby]) { |
| 9239 | rel = rewrite_topdown(sql, rel, &rel_push_aggr_down, &changes); |
| 9240 | rel = rewrite_topdown(sql, rel, &rel_push_groupby_down, &changes); |
| 9241 | rel = rewrite(sql, rel, &rel_groupby_order, &changes); |
| 9242 | rel = rewrite(sql, rel, &rel_reduce_groupby_exps, &changes); |
| 9243 | rel = rewrite(sql, rel, &rel_groupby_distinct, &changes); |
| 9244 | } |
| 9245 | |
| 9246 | if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || gp.cnt[op_semi] || gp.cnt[op_anti]) { |
| 9247 | rel = rel_remove_empty_join(sql, rel, &changes); |
| 9248 | if (!gp.cnt[op_update]) |
| 9249 | rel = rel_join_order(sql, rel); |
| 9250 | rel = rewrite(sql, rel, &rel_push_join_down_union, &changes); |
| 9251 | /* rel_join_order may introduce empty selects */ |
| 9252 | rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); |
| 9253 | |
| 9254 | if (level <= 0) |
| 9255 | rel = rewrite(sql, rel, &rel_join_push_exps_down, &changes); |
| 9256 | |
| 9257 | rel = rewrite(sql, rel, &rel_merge_identical_joins, &e_changes); |
| 9258 | } |
| 9259 | |
| 9260 | /* Important -> Re-write semijoins after rel_join_order */ |
| 9261 | if ((gp.cnt[op_join] || gp.cnt[op_semi] || gp.cnt[op_anti]) && gp.cnt[op_groupby]) { |
| 9262 | rel = rewrite_topdown(sql, rel, &rel_push_count_down, &changes); |
| 9263 | if (level <= 0) |
| 9264 | rel = rewrite_topdown(sql, rel, &rel_push_join_down, &changes); |
| 9265 | |
| 9266 | /* push_join_down introduces semijoins */ |
| 9267 | /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */ |
| 9268 | rel = rewrite(sql, rel, &rel_rewrite_semijoin, &changes); |
| 9269 | } |
| 9270 | |
| 9271 | if (gp.cnt[op_anti] || gp.cnt[op_semi]) { |
| 9272 | /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */ |
| 9273 | rel = rewrite(sql, rel, &rel_rewrite_semijoin, &changes); |
| 9274 | /* push semijoin through join */ |
| 9275 | rel = rewrite(sql, rel, &rel_push_semijoin_down_or_up, &changes); |
| 9276 | /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */ |
| 9277 | rel = rewrite(sql, rel, &rel_rewrite_antijoin, &changes); |
| 9278 | if (level <= 0) |
| 9279 | rel = rewrite_topdown(sql, rel, &rel_semijoin_use_fk, &changes); |
| 9280 | } |
| 9281 | |
| 9282 | /* Important -> Make sure rel_push_select_down gets called after rel_join_order, |
| 9283 | because pushing down select expressions makes rel_join_order more difficult */ |
| 9284 | if (gp.cnt[op_select] || gp.cnt[op_semi]) { |
| 9285 | rel = rewrite_topdown(sql, rel, &rel_push_select_down, &changes); |
| 9286 | rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); |
| 9287 | } |
| 9288 | |
| 9289 | if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || gp.cnt[op_semi] || gp.cnt[op_anti]) { |
| 9290 | rel = rewrite_topdown(sql, rel, &rel_simplify_fk_joins, &changes); |
| 9291 | } |
| 9292 | |
| 9293 | if (gp.cnt[op_select] && sql->emode != m_prepare) |
| 9294 | rel = rewrite(sql, rel, &rel_simplify_like_select, &changes); |
| 9295 | |
| 9296 | if (gp.cnt[op_select]) |
| 9297 | rel = rewrite(sql, rel, &rel_select_order, &changes); |
| 9298 | |
| 9299 | if (gp.cnt[op_select] || gp.cnt[op_join]) |
| 9300 | rel = rewrite(sql, rel, &rel_use_index, &changes); |
| 9301 | |
| 9302 | if (gp.cnt[op_project]) |
| 9303 | rel = rewrite_topdown(sql, rel, &rel_push_project_down_union, &changes); |
| 9304 | |
| 9305 | /* Remove unused expressions */ |
| 9306 | if (level <= 0) |
| 9307 | rel = rel_dce(sql, rel); |
| 9308 | |
| 9309 | if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_right] || gp.cnt[op_full] || |
| 9310 | gp.cnt[op_semi] || gp.cnt[op_anti] || gp.cnt[op_select]) { |
| 9311 | rel = rewrite(sql, rel, &rel_push_func_down, &changes); |
| 9312 | rel = rewrite_topdown(sql, rel, &rel_push_select_down, &changes); |
| 9313 | rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); |
| 9314 | } |
| 9315 | |
| 9316 | if (!changes && gp.cnt[op_topn]) { |
| 9317 | rel = rewrite_topdown(sql, rel, &rel_push_topn_down, &changes); |
| 9318 | changes = 0; |
| 9319 | } |
| 9320 | |
| 9321 | if (value_based_opt) |
| 9322 | rel = rewrite_topdown(sql, rel, &rel_merge_table_rewrite, &changes); |
| 9323 | if (level <= 0 && mvc_debug_on(sql,8)) |
| 9324 | rel = rewrite_topdown(sql, rel, &rel_add_dicts, &changes); |
| 9325 | *g_changes = changes; |
| 9326 | return rel; |
| 9327 | } |
| 9328 | |
| 9329 | static sql_rel * |
| 9330 | optimize(mvc *sql, sql_rel *rel, int value_based_opt) |
| 9331 | { |
| 9332 | list *refs = sa_list(sql->sa); |
| 9333 | node *n; |
| 9334 | int level = 0, changes = 1; |
| 9335 | |
| 9336 | |
| 9337 | for( ;rel && level < 20 && changes; level++) |
| 9338 | rel = optimize_rel(sql, rel, &changes, level, value_based_opt); |
| 9339 | |
| 9340 | rel_dce_refs(sql, rel, refs); |
| 9341 | if (refs) { |
| 9342 | refs = rel_opt_dependencies(sql, refs); |
| 9343 | for (n = refs->h; n; n = n->next) |
| 9344 | n->data = optimize_rel(sql, n->data, &changes, 0, value_based_opt); |
| 9345 | } |
| 9346 | rel = rel_dce(sql, rel); |
| 9347 | return rel; |
| 9348 | } |
| 9349 | |
| 9350 | sql_rel * |
| 9351 | rel_optimizer(mvc *sql, sql_rel *rel, int value_based_opt) |
| 9352 | { |
| 9353 | lng Tbegin = GDKusec(); |
| 9354 | rel = optimize(sql, rel, value_based_opt); |
| 9355 | sql->Topt += GDKusec() - Tbegin; |
| 9356 | return rel; |
| 9357 | } |
| 9358 | |