1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "monetdb_config.h"
10#include "opt_mergetable.h"
11
12typedef enum mat_type_t {
13 mat_none = 0, /* Simple mat aligned operations (ie batcalc etc) */
14 mat_grp = 1, /* result of phase one of a mat - group.new/derive */
15 mat_ext = 2, /* mat_grp extend */
16 mat_cnt = 3, /* mat_grp count */
17 mat_tpn = 4, /* Phase one of topn on a mat */
18 mat_slc = 5, /* Last phase of topn (or just slice) on a mat */
19 mat_rdr = 6 /* Phase one of sorting, ie sorted the parts sofar */
20} mat_type_t;
21
22typedef struct mat {
23 InstrPtr mi; /* mat instruction */
24 InstrPtr org; /* orignal instruction */
25 int mv; /* mat variable */
26 int im; /* input mat, for attribute of sub relations */
27 int pm; /* parent mat, for sub relations */
28 mat_type_t type; /* type of operation */
29 int packed;
30 int pushed; /* set if instruction pushed and shouldn't be freed */
31} mat_t;
32
33typedef struct matlist {
34 mat_t *v;
35 int *vars; /* result variable is a mat */
36 int top;
37 int size;
38
39 int *horigin;
40 int *torigin;
41 int vsize;
42} matlist_t;
43
44static mat_type_t
45mat_type( mat_t *mat, int n)
46{
47 mat_type_t type = mat_none;
48 (void)mat;
49 (void)n;
50 return type;
51}
52
53static int
54is_a_mat(int idx, matlist_t *ml)
55{
56 if (ml->vars[idx] >= 0 && !ml->v[ml->vars[idx]].packed)
57 return ml->vars[idx];
58 return -1;
59}
60
61static int
62was_a_mat(int idx, matlist_t *ml){
63 int i;
64 for(i =0; i<ml->top; i++)
65 if (ml->v[i].mv == idx)
66 return i;
67 return -1;
68}
69
70static int
71nr_of_mats(InstrPtr p, matlist_t *ml)
72{
73 int j,cnt=0;
74 for(j=p->retc; j<p->argc; j++)
75 if (is_a_mat(getArg(p,j), ml) >= 0)
76 cnt++;
77 return cnt;
78}
79
80static int
81nr_of_bats(MalBlkPtr mb, InstrPtr p)
82{
83 int j,cnt=0;
84 for(j=p->retc; j<p->argc; j++)
85 if (isaBatType(getArgType(mb,p,j)))
86 cnt++;
87 return cnt;
88}
89
90/* some mat's have intermediates (with intermediate result variables), therefor
91 * we pass the old output mat variable */
92inline static int
93mat_add_var(matlist_t *ml, InstrPtr q, InstrPtr p, int var, mat_type_t type, int inputmat, int parentmat, int pushed)
94{
95 mat_t *dst = &ml->v[ml->top];
96 if (ml->top == ml->size) {
97 int s = ml->size * 2;
98 mat_t *v = (mat_t*)GDKzalloc(s * sizeof(mat_t));
99 if (!v)
100 return -1;
101 memcpy(v, ml->v, ml->top * sizeof(mat_t));
102 GDKfree(ml->v);
103 ml->size = s;
104 ml->v = v;
105 dst = &ml->v[ml->top];
106 }
107 dst->mi = q;
108 dst->org = p;
109 dst->mv = var;
110 dst->type = type;
111 dst->im = inputmat;
112 dst->pm = parentmat;
113 dst->packed = 0;
114 dst->pushed = pushed;
115 if (ml->vars[var] < 0 || dst->type != mat_ext) {
116 if (ml->vars[var] >= 0) {
117 ml->v[ml->vars[var]].packed = 1;
118 }
119 ml->vars[var] = ml->top;
120 }
121 ++ml->top;
122 return 0;
123}
124
125inline static int
126mat_add(matlist_t *ml, InstrPtr q, mat_type_t type, const char *func)
127{
128 (void)func;
129 //printf (" ml.top %d %s\n", ml.top, func);
130 return mat_add_var(ml, q, NULL, getArg(q,0), type, -1, -1, 0);
131}
132
133static void
134matlist_pack(matlist_t *ml, int m)
135{
136 int i, idx = ml->v[m].mv;
137
138 assert(ml->v[m].packed == 0);
139 ml->v[m].packed = 1;
140 ml->vars[idx] = -1;
141
142 for(i =0; i<ml->top; i++)
143 if (!ml->v[i].packed && ml->v[i].mv == idx) {
144 ml->vars[idx] = i;
145 break;
146 }
147}
148
149static void
150mat_pack(MalBlkPtr mb, matlist_t *ml, int m)
151{
152 InstrPtr r;
153
154 if (ml->v[m].packed)
155 return ;
156
157 if((ml->v[m].mi->argc-ml->v[m].mi->retc) == 1){
158 /* simple assignment is sufficient */
159 r = newInstruction(mb, NULL, NULL);
160 getArg(r,0) = getArg(ml->v[m].mi,0);
161 getArg(r,1) = getArg(ml->v[m].mi,1);
162 r->retc = 1;
163 r->argc = 2;
164 } else {
165 int l;
166
167 r = newInstruction(mb, matRef, packRef);
168 getArg(r,0) = getArg(ml->v[m].mi, 0);
169 for(l=ml->v[m].mi->retc; l< ml->v[m].mi->argc; l++)
170 r= pushArgument(mb,r, getArg(ml->v[m].mi,l));
171 }
172 matlist_pack(ml, m);
173 pushInstruction(mb, r);
174}
175
176static int
177checksize(matlist_t *ml, int v)
178{
179 if (v >= ml->vsize) {
180 int sz = ml->vsize, i, nvsize, *nhorigin, *ntorigin, *nvars;
181
182 nvsize = ml->vsize * 2;
183 nhorigin = (int*) GDKrealloc(ml->horigin, sizeof(int)* nvsize);
184 ntorigin = (int*) GDKrealloc(ml->torigin, sizeof(int)* nvsize);
185 nvars = (int*) GDKrealloc(ml->vars, sizeof(int)* nvsize);
186 if(!nhorigin || !ntorigin || !nvars) {
187 if(nhorigin)
188 GDKfree(nhorigin);
189 if(ntorigin)
190 GDKfree(ntorigin);
191 if(nvars)
192 GDKfree(nvars);
193 return -1;
194 }
195 ml->vsize = nvsize;
196 ml->horigin = nhorigin;
197 ml->torigin = ntorigin;
198 ml->vars = nvars;
199
200 for (i = sz; i < ml->vsize; i++) {
201 ml->horigin[i] = ml->torigin[i] = -1;
202 ml->vars[i] = -1;
203 }
204 }
205 return 0;
206}
207
208static int
209setPartnr(matlist_t *ml, int ivar, int ovar, int pnr)
210{
211 int tpnr = -1;
212
213 if(checksize(ml, ivar) || checksize(ml, ovar))
214 return -1;
215 if (ivar >= 0)
216 tpnr = ml->torigin[ivar];
217 if (tpnr >= 0)
218 ml->torigin[ovar] = tpnr;
219 ml->horigin[ovar] = pnr;
220 //printf("%d %d ", pnr, tpnr);
221 return 0;
222}
223
224static int
225propagatePartnr(matlist_t *ml, int ivar, int ovar, int pnr)
226{
227 /* prop head ids to tail */
228 int tpnr = -1;
229
230 if(checksize(ml, ivar) || checksize(ml, ovar))
231 return -1;
232 if (ivar >= 0)
233 tpnr = ml->horigin[ivar];
234 if (tpnr >= 0)
235 ml->torigin[ovar] = tpnr;
236 ml->horigin[ovar] = pnr;
237 //printf("%d %d ", pnr, tpnr);
238 return 0;
239}
240
241static int
242propagateMirror(matlist_t *ml, int ivar, int ovar)
243{
244 /* prop head ids to head and tail */
245 int tpnr;
246
247 if(checksize(ml, ivar) || checksize(ml, ovar))
248 return -1;
249 tpnr = ml->horigin[ivar];
250 if (tpnr >= 0) {
251 ml->horigin[ovar] = tpnr;
252 ml->torigin[ovar] = tpnr;
253 }
254 return 0;
255}
256
257static int
258overlap(matlist_t *ml, int lv, int rv, int lnr, int rnr, int ontails)
259{
260 int lpnr, rpnr;
261
262 checksize(ml, lv);
263 checksize(ml, rv);
264 lpnr = ml->torigin[lv];
265 rpnr = (ontails)?ml->torigin[rv]:ml->horigin[rv];
266
267 if (lpnr < 0 && rpnr < 0)
268 return lnr == rnr;
269 if (rpnr < 0)
270 return lpnr == rnr;
271 if (lpnr < 0)
272 return rpnr == lnr;
273 return lpnr == rpnr;
274}
275
276static int
277mat_set_prop(matlist_t *ml, MalBlkPtr mb, InstrPtr p)
278{
279 int k, tpe = getArgType(mb, p, 0);
280
281 tpe = getBatType(tpe);
282 for(k=1; k < p->argc; k++) {
283 if(setPartnr(ml, -1, getArg(p,k), k))
284 return -1;
285 if (tpe == TYPE_oid && propagateMirror(ml, getArg(p,k), getArg(p,k)))
286 return -1;
287 }
288 return 0;
289}
290
291static InstrPtr
292mat_delta(matlist_t *ml, MalBlkPtr mb, InstrPtr p, mat_t *mat, int m, int n, int o, int e, int mvar, int nvar, int ovar, int evar)
293{
294 int tpe, k, j, is_subdelta = (getFunctionId(p) == subdeltaRef), is_projectdelta = (getFunctionId(p) == projectdeltaRef);
295 InstrPtr r = NULL;
296 int pushed = 0;
297
298 //printf("# %s.%s(%d,%d,%d,%d)", getModuleId(p), getFunctionId(p), m, n, o, e);
299
300 if((r = newInstruction(mb, matRef,packRef)) == NULL)
301 return NULL;
302 getArg(r, 0) = getArg(p,0);
303 tpe = getArgType(mb,p,0);
304
305 /* Handle like mat_projection, ie overlapping partitions */
306 if (evar == 1 && mat[e].mi->argc != mat[m].mi->argc) {
307 int nr = 1;
308 for(k=1; k < mat[e].mi->argc; k++) {
309 for(j=1; j < mat[m].mi->argc; j++) {
310 if (overlap(ml, getArg(mat[e].mi, k), getArg(mat[m].mi, j), k, j, 0)){
311 InstrPtr q = copyInstruction(p);
312 if(!q)
313 return NULL;
314
315 /* remove last argument (inserts only on last part) */
316 if (k < mat[m].mi->argc-1)
317 q->argc--;
318 /* make sure to resolve again */
319 q->token = ASSIGNsymbol;
320 q->typechk = TYPE_UNKNOWN;
321 q->fcn = NULL;
322 q->blk = NULL;
323
324 getArg(q, 0) = newTmpVariable(mb, tpe);
325 getArg(q, mvar) = getArg(mat[m].mi, j);
326 getArg(q, nvar) = getArg(mat[n].mi, j);
327 getArg(q, ovar) = getArg(mat[o].mi, j);
328 getArg(q, evar) = getArg(mat[e].mi, k);
329 pushInstruction(mb, q);
330 if(setPartnr(ml, getArg(mat[m].mi, j), getArg(q,0), nr)) {
331 freeInstruction(r);
332 return NULL;
333 }
334 r = pushArgument(mb, r, getArg(q, 0));
335
336 nr++;
337 break;
338 }
339 }
340 }
341 } else {
342 for(k=1; k < mat[m].mi->argc; k++) {
343 InstrPtr q = copyInstruction(p);
344 if(!q)
345 return NULL;
346
347 /* remove last argument (inserts only on last part) */
348 if (k < mat[m].mi->argc-1)
349 q->argc--;
350 /* make sure to resolve again */
351 q->token = ASSIGNsymbol;
352 q->typechk = TYPE_UNKNOWN;
353 q->fcn = NULL;
354 q->blk = NULL;
355
356 getArg(q, 0) = newTmpVariable(mb, tpe);
357 getArg(q, mvar) = getArg(mat[m].mi, k);
358 getArg(q, nvar) = getArg(mat[n].mi, k);
359 getArg(q, ovar) = getArg(mat[o].mi, k);
360 if (e >= 0)
361 getArg(q, evar) = getArg(mat[e].mi, k);
362 pushInstruction(mb, q);
363 if(setPartnr(ml, is_subdelta?getArg(mat[m].mi, k):-1, getArg(q,0), k)) {
364 freeInstruction(r);
365 return NULL;
366 }
367 r = pushArgument(mb, r, getArg(q, 0));
368 }
369 if (evar == 1 && e >= 0 && mat[e].type == mat_slc && is_projectdelta) {
370 InstrPtr q = newInstruction(mb, algebraRef, projectionRef);
371 getArg(q, 0) = getArg(r, 0);
372 q = pushArgument(mb, q, getArg(mat[e].mi, 0));
373 getArg(r, 0) = newTmpVariable(mb, tpe);
374 q = pushArgument(mb, q, getArg(r, 0));
375 pushInstruction(mb, r);
376 pushInstruction(mb, q);
377 pushed = 1;
378 r = q;
379 }
380 }
381 if(mat_add_var(ml, r, NULL, getArg(r, 0), mat_type(mat, m), -1, -1, pushed))
382 return NULL;
383 if (pushed)
384 matlist_pack(ml, ml->top-1);
385 return r;
386}
387
388
389static InstrPtr
390mat_apply1(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int var)
391{
392 int tpe, k, is_select = isSelect(p), is_mirror = (getFunctionId(p) == mirrorRef);
393 int is_identity = (getFunctionId(p) == identityRef && getModuleId(p) == batcalcRef);
394 int ident_var = 0, is_assign = (getFunctionId(p) == NULL), n = 0;
395 InstrPtr r = NULL, q;
396 mat_t *mat = ml->v;
397
398 assert (p->retc == 1);
399
400 /* Find the mat we overwrite */
401 if (is_assign) {
402 n = is_a_mat(getArg(p, 0), ml);
403 is_assign = (n >= 0);
404 }
405
406 if((r = newInstruction(mb, matRef, packRef)) == NULL)
407 return NULL;
408 getArg(r, 0) = getArg(p,0);
409 tpe = getArgType(mb,p,0);
410
411 if (is_identity) {
412 if((q = newInstruction(mb, NULL,NULL)) == NULL) {
413 freeInstruction(r);
414 return NULL;
415 }
416 getArg(q, 0) = newTmpVariable(mb, TYPE_oid);
417 q->retc = 1;
418 q->argc = 1;
419 q = pushOid(mb, q, 0);
420 ident_var = getArg(q, 0);
421 pushInstruction(mb, q);
422 }
423 for(k=1; k < mat[m].mi->argc; k++) {
424 int res = 0;
425 if((q = copyInstruction(p)) == NULL) {
426 freeInstruction(r);
427 return NULL;
428 }
429
430 if (is_assign)
431 getArg(q, 0) = getArg(mat[n].mi, k);
432 else
433 getArg(q, 0) = newTmpVariable(mb, tpe);
434 if (is_identity)
435 getArg(q, 1) = newTmpVariable(mb, TYPE_oid);
436 getArg(q, var+is_identity) = getArg(mat[m].mi, k);
437 if (is_identity) {
438 getArg(q, 3) = ident_var;
439 q->retc = 2;
440 q->argc = 4;
441 /* make sure to resolve again */
442 q->token = ASSIGNsymbol;
443 q->typechk = TYPE_UNKNOWN;
444 q->fcn = NULL;
445 q->blk = NULL;
446 }
447 ident_var = getArg(q, 1);
448 pushInstruction(mb, q);
449 if (is_mirror || is_identity) {
450 res = propagateMirror(ml, getArg(mat[m].mi, k), getArg(q,0));
451 } else if (is_select)
452 res = propagatePartnr(ml, getArg(mat[m].mi, k), getArg(q,0), k);
453 else
454 res = setPartnr(ml, -1, getArg(q,0), k);
455 if(res) {
456 freeInstruction(r);
457 return NULL;
458 }
459 r = pushArgument(mb, r, getArg(q, 0));
460 }
461 return r;
462}
463
464static int
465mat_apply2(matlist_t *ml, MalBlkPtr mb, InstrPtr p, mat_t *mat, int m, int n, int mvar, int nvar)
466{
467 int k, l, is_select = isSelect(p);
468 InstrPtr *r = NULL;
469 r = (InstrPtr*) GDKmalloc(sizeof(InstrPtr)* p->retc);
470 if(!r)
471 return -1;
472 for(k=0; k < p->retc; k++) {
473 if((r[k] = newInstruction(mb, matRef, packRef)) == NULL) {
474 for(l=0; l < k; l++)
475 freeInstruction(r[l]);
476 GDKfree(r);
477 return -1;
478 }
479 getArg(r[k],0) = getArg(p,k);
480 }
481
482 for(k=1; k < mat[m].mi->argc; k++) {
483 int tpe;
484 InstrPtr q = copyInstruction(p);
485 if(!q) {
486 GDKfree(r);
487 return -1;
488 }
489
490 for(l=0; l < p->retc; l++) {
491 tpe = getArgType(mb,p,l);
492 getArg(q, l) = newTmpVariable(mb, tpe);
493 }
494 getArg(q, mvar) = getArg(mat[m].mi, k);
495 getArg(q, nvar) = getArg(mat[n].mi, k);
496 pushInstruction(mb, q);
497 for(l=0; l < p->retc; l++) {
498 int res = 0;
499 if (is_select)
500 res = propagatePartnr(ml, getArg(q,p->retc+1), getArg(q,l), k);
501 else
502 res = propagatePartnr(ml, -1, getArg(q,l), k);
503 if(res) {
504 for(l=0; l < k; l++)
505 freeInstruction(r[l]);
506 GDKfree(r);
507 return -1;
508 }
509 r[l] = pushArgument(mb, r[l], getArg(q, l));
510 }
511 }
512
513 for(k=0; k < p->retc; k++) {
514 if(mat_add_var(ml, r[k], NULL, getArg(r[k], 0), mat_type(ml->v, m), -1, -1, 0)) {
515 for(l=0; l < k; l++)
516 freeInstruction(r[l]);
517 GDKfree(r);
518 return -1;
519 }
520 }
521 GDKfree(r);
522 return 0;
523}
524
525static int
526mat_apply3(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n, int o, int mvar, int nvar, int ovar)
527{
528 int k, l;
529 InstrPtr *r = NULL;
530 r = (InstrPtr*) GDKmalloc(sizeof(InstrPtr)* p->retc);
531 if(!r)
532 return -1;
533 for(k=0; k < p->retc; k++) {
534 if((r[k] = newInstruction(mb, matRef, packRef)) == NULL) {
535 for(l=0; l < k; l++)
536 freeInstruction(r[l]);
537 GDKfree(r);
538 return -1;
539 }
540 getArg(r[k],0) = getArg(p,k);
541 }
542
543 for(k = 1; k < ml->v[m].mi->argc; k++) {
544 int tpe;
545 InstrPtr q = copyInstruction(p);
546 if(!q) {
547 GDKfree(r);
548 return -1;
549 }
550
551 for(l=0; l < p->retc; l++) {
552 tpe = getArgType(mb,p,l);
553 getArg(q, l) = newTmpVariable(mb, tpe);
554 }
555 getArg(q, mvar) = getArg(ml->v[m].mi, k);
556 getArg(q, nvar) = getArg(ml->v[n].mi, k);
557 getArg(q, ovar) = getArg(ml->v[o].mi, k);
558 pushInstruction(mb, q);
559 for(l=0; l < p->retc; l++) {
560 if(setPartnr(ml, -1, getArg(q,l), k)) {
561 for(l=0; l < k; l++)
562 freeInstruction(r[l]);
563 GDKfree(r);
564 return -1;
565 }
566 r[l] = pushArgument(mb, r[l], getArg(q, l));
567 }
568 }
569 for(k=0; k < p->retc; k++) {
570 if(mat_add_var(ml, r[k], NULL, getArg(r[k], 0), mat_type(ml->v, m), -1, -1, 1)) {
571 for(l=0; l < k; l++)
572 freeInstruction(r[l]);
573 GDKfree(r);
574 return -1;
575 }
576 pushInstruction(mb, r[k]);
577 }
578 GDKfree(r);
579 return 0;
580}
581
582static int
583mat_setop(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n)
584{
585 int tpe = getArgType(mb,p, 0), k, j;
586 InstrPtr r = newInstruction(mb, matRef, packRef);
587 mat_t *mat = ml->v;
588
589 if(!r)
590 return -1;
591
592 getArg(r,0) = getArg(p,0);
593
594 //printf("# %s.%s(%d,%d)", getModuleId(p), getFunctionId(p), m, n);
595 assert(m>=0 || n>=0);
596 if (m >= 0 && n >= 0) {
597 int nr = 1;
598 for(k=1; k<mat[m].mi->argc; k++) {
599 InstrPtr q = copyInstruction(p);
600 InstrPtr s = newInstruction(mb, matRef, packRef);
601 int ttpe = 0;
602
603 if(!q || !s) {
604 if(q)
605 freeInstruction(q);
606 if(s)
607 freeInstruction(s);
608 freeInstruction(r);
609 return -1;
610 }
611
612 getArg(s,0) = newTmpVariable(mb, getArgType(mb, mat[n].mi, k));
613
614 ttpe = getArgType(mb, mat[n].mi, 0);
615 for (j=1; j<mat[n].mi->argc; j++) {
616 if (getBatType(ttpe) != TYPE_oid || overlap(ml, getArg(mat[m].mi, k), getArg(mat[n].mi, j), k, j, 1)){
617 s = pushArgument(mb,s,getArg(mat[n].mi,j));
618 }
619 }
620 if (s->retc == 1 && s->argc == 2){ /* only one input, change into an assignment */
621 getFunctionId(s) = NULL;
622 getModuleId(s) = NULL;
623 s->token = ASSIGNsymbol;
624 s->typechk = TYPE_UNKNOWN;
625 s->fcn = NULL;
626 s->blk = NULL;
627 }
628 pushInstruction(mb,s);
629
630 getArg(q,0) = newTmpVariable(mb, tpe);
631 getArg(q,1) = getArg(mat[m].mi,k);
632 getArg(q,2) = getArg(s,0);
633 if(setPartnr(ml, getArg(mat[m].mi,k), getArg(q,0), nr)) {
634 freeInstruction(q);
635 freeInstruction(r);
636 return -1;
637 }
638 pushInstruction(mb,q);
639
640 r = pushArgument(mb,r,getArg(q,0));
641 nr++;
642 }
643 } else {
644 assert(m >= 0);
645 for(k=1; k<mat[m].mi->argc; k++) {
646 InstrPtr q = copyInstruction(p);
647 if(!q) {
648 freeInstruction(r);
649 return -1;
650 }
651
652 getArg(q,0) = newTmpVariable(mb, tpe);
653 getArg(q,1) = getArg(mat[m].mi, k);
654 pushInstruction(mb,q);
655
656 if(setPartnr(ml, getArg(q, 2), getArg(q,0), k)) {
657 freeInstruction(r);
658 return -1;
659 }
660 r = pushArgument(mb, r, getArg(q,0));
661 }
662 }
663
664 return mat_add(ml, r, mat_none, getFunctionId(p));
665}
666
667static int
668mat_projection(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n)
669{
670 int tpe = getArgType(mb,p, 0), k, j;
671 InstrPtr r = newInstruction(mb, matRef, packRef);
672 mat_t *mat = ml->v;
673
674 if(!r)
675 return -1;
676
677 getArg(r,0) = getArg(p,0);
678
679 //printf("# %s.%s(%d,%d)", getModuleId(p), getFunctionId(p), m, n);
680 assert(m>=0 || n>=0);
681 if (m >= 0 && n >= 0) {
682 int nr = 1;
683 for(k=1; k<mat[m].mi->argc; k++) {
684 for (j=1; j<mat[n].mi->argc; j++) {
685 if (overlap(ml, getArg(mat[m].mi, k), getArg(mat[n].mi, j), k, j, 0)){
686 InstrPtr q = copyInstruction(p);
687
688 if(!q) {
689 if(q)
690 freeInstruction(q);
691 freeInstruction(r);
692 return -1;
693 }
694
695 getArg(q,0) = newTmpVariable(mb, tpe);
696 getArg(q,1) = getArg(mat[m].mi,k);
697 getArg(q,2) = getArg(mat[n].mi,j);
698 pushInstruction(mb,q);
699
700 if(setPartnr(ml, getArg(mat[n].mi, j), getArg(q,0), nr)) {
701 freeInstruction(r);
702 return -1;
703 }
704 r = pushArgument(mb,r,getArg(q,0));
705
706 nr++;
707 break;
708 }
709 }
710 }
711 } else {
712 assert(m >= 0);
713 for(k=1; k<mat[m].mi->argc; k++) {
714 InstrPtr q = copyInstruction(p);
715
716 if(!q) {
717 if(q)
718 freeInstruction(q);
719 freeInstruction(r);
720 return -1;
721 }
722
723 getArg(q,0) = newTmpVariable(mb, tpe);
724 getArg(q,1) = getArg(mat[m].mi, k);
725 pushInstruction(mb,q);
726
727 if(setPartnr(ml, getArg(q, 2), getArg(q,0), k)) {
728 freeInstruction(r);
729 return -1;
730 }
731 r = pushArgument(mb, r, getArg(q,0));
732 }
733 }
734
735 return mat_add(ml, r, mat_none, getFunctionId(p));
736}
737
738static int
739mat_join2(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n)
740{
741 int tpe = getArgType(mb,p, 0), j,k, nr = 1;
742 InstrPtr l = newInstruction(mb, matRef, packRef);
743 InstrPtr r = newInstruction(mb, matRef, packRef);
744 mat_t *mat = ml->v;
745
746 if(!l || !r) {
747 if(l)
748 freeInstruction(l);
749 if(r)
750 freeInstruction(r);
751 return -1;
752 }
753
754 getArg(l,0) = getArg(p,0);
755 getArg(r,0) = getArg(p,1);
756
757 //printf("# %s.%s(%d,%d)", getModuleId(p), getFunctionId(p), m, n);
758
759 assert(m>=0 || n>=0);
760 if (m >= 0 && n >= 0) {
761 for(k=1; k<mat[m].mi->argc; k++) {
762 for (j=1; j<mat[n].mi->argc; j++) {
763 InstrPtr q = copyInstruction(p);
764
765 if(!q) {
766 freeInstruction(l);
767 freeInstruction(r);
768 return -1;
769 }
770
771 getArg(q,0) = newTmpVariable(mb, tpe);
772 getArg(q,1) = newTmpVariable(mb, tpe);
773 getArg(q,2) = getArg(mat[m].mi,k);
774 getArg(q,3) = getArg(mat[n].mi,j);
775 pushInstruction(mb,q);
776
777 if(propagatePartnr(ml, getArg(mat[m].mi, k), getArg(q,0), nr) ||
778 propagatePartnr(ml, getArg(mat[n].mi, j), getArg(q,1), nr)) {
779 freeInstruction(r);
780 return -1;
781 }
782
783 /* add result to mat */
784 l = pushArgument(mb,l,getArg(q,0));
785 r = pushArgument(mb,r,getArg(q,1));
786 nr++;
787 }
788 }
789 } else {
790 int mv = (m>=0)?m:n;
791 int av = (m<0);
792 int bv = (m>=0);
793
794 for(k=1; k<mat[mv].mi->argc; k++) {
795 InstrPtr q = copyInstruction(p);
796
797 if(!q) {
798 freeInstruction(l);
799 freeInstruction(r);
800 return -1;
801 }
802
803 getArg(q,0) = newTmpVariable(mb, tpe);
804 getArg(q,1) = newTmpVariable(mb, tpe);
805 getArg(q,p->retc+av) = getArg(mat[mv].mi, k);
806 pushInstruction(mb,q);
807
808 if(propagatePartnr(ml, getArg(mat[mv].mi, k), getArg(q,av), k) ||
809 propagatePartnr(ml, getArg(p, p->retc+bv), getArg(q,bv), k)) {
810 freeInstruction(l);
811 freeInstruction(r);
812 return -1;
813 }
814
815 /* add result to mat */
816 l = pushArgument(mb, l, getArg(q,0));
817 r = pushArgument(mb, r, getArg(q,1));
818 }
819 }
820 return mat_add(ml, l, mat_none, getFunctionId(p)) || mat_add(ml, r, mat_none, getFunctionId(p));
821}
822
823static int
824join_split(Client cntxt, InstrPtr p, int args)
825{
826 char *name = NULL;
827 size_t len;
828 int i, res = 0;
829 Symbol sym;
830 MalBlkPtr mb;
831 InstrPtr q;
832
833 if (args <= 3) /* we asume there are no 2x1 joins! */
834 return 1;
835
836 len = strlen( getFunctionId(p) );
837 name = GDKmalloc(len+3);
838 if (!name)
839 return -1;
840 strncpy(name, getFunctionId(p), len-7);
841 strcpy(name+len-7, "join");
842
843 sym = findSymbol(cntxt->usermodule, getModuleId(p), name);
844 assert(sym);
845 mb = sym->def;
846
847 q = mb->stmt[0];
848 for(i = q->retc; i<q->argc; i++ ) {
849 if (isaBatType(getArgType(mb,q,i)))
850 res++;
851 else
852 break;
853 }
854 GDKfree(name);
855 return res-1;
856}
857
858/* 1 or 2 mat lists:
859 * in case of one take the second half of the code
860 * in case of two we need to detect the list lengths.
861 *
862 * input is one list of arguments (just total length of mats)
863 */
864static int
865mat_joinNxM(Client cntxt, MalBlkPtr mb, InstrPtr p, matlist_t *ml, int args)
866{
867 int tpe = getArgType(mb,p, 0), j,k, nr = 1;
868 InstrPtr l = newInstruction(mb, matRef, packRef);
869 InstrPtr r = newInstruction(mb, matRef, packRef);
870 mat_t *mat = ml->v;
871 int *mats = (int*)GDKzalloc(sizeof(int) * args);
872 int nr_mats = 0, first = 0, res = 0;
873
874 if (!mats) {
875 return -1;
876 }
877
878 for(j=0;j<args;j++) {
879 mats[j] = is_a_mat(getArg(p,p->retc+j), ml);
880 if (mats[j] != -1) {
881 nr_mats++;
882 if (!first)
883 first = j;
884 }
885 }
886 getArg(l,0) = getArg(p,0);
887 getArg(r,0) = getArg(p,1);
888
889 //printf("# %s.%s(%d,%d)", getModuleId(p), getFunctionId(p), m, n);
890
891 if (args == nr_mats) {
892 int mv1 = mats[0], i;
893 int mv2 = mats[args-1];
894 int split = join_split(cntxt, p, args);
895 int nr_mv1 = split;
896
897 if (split < 0) {
898 freeInstruction(r);
899 freeInstruction(l);
900 GDKfree(mats);
901 mb->errors= createException(MAL,"mergetable.join", SQLSTATE(42000) " incorrect split level");
902 return 0;
903 }
904 /* now detect split point */
905 for(k=1; k<mat[mv1].mi->argc; k++) {
906 for (j=1; j<mat[mv2].mi->argc; j++) {
907 InstrPtr q = copyInstruction(p);
908 if(!q) {
909 freeInstruction(r);
910 freeInstruction(l);
911 GDKfree(mats);
912 return -1;
913 }
914
915 getArg(q,0) = newTmpVariable(mb, tpe);
916 getArg(q,1) = newTmpVariable(mb, tpe);
917 for (i = 0; i < nr_mv1; i++ )
918 getArg(q,q->retc+i) = getArg(mat[mats[i]].mi,k);
919 for (; i < nr_mats; i++ )
920 getArg(q,q->retc+i) = getArg(mat[mats[i]].mi,j);
921 pushInstruction(mb,q);
922
923 if(propagatePartnr(ml, getArg(mat[mv1].mi, k), getArg(q,0), nr) ||
924 propagatePartnr(ml, getArg(mat[mv2].mi, j), getArg(q,1), nr)) {
925 freeInstruction(r);
926 freeInstruction(l);
927 GDKfree(mats);
928 return -1;
929 }
930
931 /* add result to mat */
932 l = pushArgument(mb,l,getArg(q,0));
933 r = pushArgument(mb,r,getArg(q,1));
934 nr++;
935 }
936 }
937 } else {
938 /* only one side
939 * mats from first..first+nr_mats
940 */
941 int mv = mats[first];
942
943 for(k=1; k<mat[mv].mi->argc; k++) {
944 InstrPtr q = copyInstruction(p);
945 if(!q) {
946 freeInstruction(r);
947 freeInstruction(l);
948 GDKfree(mats);
949 return -1;
950 }
951
952 getArg(q,0) = newTmpVariable(mb, tpe);
953 getArg(q,1) = newTmpVariable(mb, tpe);
954 for (j=0;j<nr_mats;j++) {
955 assert(mat[mats[first]].mi->argc == mat[mats[first+j]].mi->argc);
956 getArg(q,p->retc+first+j) = getArg(mat[mats[first+j]].mi, k);
957 }
958 if(propagatePartnr(ml, getArg(mat[mv].mi, k), getArg(q,(first!=0)), k) ||
959 propagatePartnr(ml, getArg(p, p->retc+(first)?nr_mats:0), getArg(q,(first==0)), k)) {
960 freeInstruction(q);
961 freeInstruction(r);
962 freeInstruction(l);
963 GDKfree(mats);
964 return -1;
965 }
966 pushInstruction(mb,q);
967
968 /* add result to mat */
969 l = pushArgument(mb, l, getArg(q,0));
970 r = pushArgument(mb, r, getArg(q,1));
971 }
972 }
973 res = mat_add(ml, l, mat_none, getFunctionId(p)) || mat_add(ml, r, mat_none, getFunctionId(p));
974 GDKfree(mats);
975 return res;
976}
977
978
979static char *
980aggr_phase2(char *aggr)
981{
982 if (aggr == countRef || aggr == count_no_nilRef || aggr == avgRef)
983 return sumRef;
984 if (aggr == subcountRef || aggr == subavgRef)
985 return subsumRef;
986 /* min/max/sum/prod and unique are fine */
987 return aggr;
988}
989
990static void
991mat_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int m)
992{
993 int tp = getArgType(mb,p,0), k, tp2 = TYPE_lng, i;
994 int battp = (getModuleId(p)==aggrRef)?newBatType(tp):tp, battp2 = 0;
995 int isAvg = (getFunctionId(p) == avgRef);
996 InstrPtr r = NULL, s = NULL, q = NULL, u = NULL;
997
998 /* we pack the partitial result */
999 r = newInstruction(mb, matRef, packRef);
1000 getArg(r,0) = newTmpVariable(mb, battp);
1001
1002 if (isAvg) { /* counts */
1003 battp2 = newBatType( tp2);
1004 u = newInstruction(mb, matRef, packRef);
1005 getArg(u,0) = newTmpVariable(mb, battp2);
1006 }
1007 for(k=1; k< mat[m].mi->argc; k++) {
1008 q = newInstruction(mb, NULL, NULL);
1009 setModuleId(q,getModuleId(p));
1010 if (isAvg)
1011 setModuleId(q,batcalcRef);
1012 setFunctionId(q,getFunctionId(p));
1013 getArg(q,0) = newTmpVariable(mb, tp);
1014 if (isAvg)
1015 q = pushReturn(mb, q, newTmpVariable(mb, tp2));
1016 q = pushArgument(mb,q,getArg(mat[m].mi,k));
1017 for (i = q->argc; i<p->argc; i++)
1018 q = pushArgument(mb,q,getArg(p,i));
1019 pushInstruction(mb,q);
1020
1021 r = pushArgument(mb,r,getArg(q,0));
1022 if (isAvg)
1023 u = pushArgument(mb,u,getArg(q,1));
1024 }
1025 pushInstruction(mb,r);
1026 if (isAvg)
1027 pushInstruction(mb, u);
1028
1029 /* Filter empty partitions */
1030 if (getModuleId(p) == aggrRef && !isAvg) {
1031 s = newInstruction(mb, algebraRef, selectNotNilRef);
1032 getArg(s,0) = newTmpVariable(mb, battp);
1033 s = pushArgument(mb, s, getArg(r,0));
1034 pushInstruction(mb, s);
1035 r = s;
1036 }
1037
1038 /* for avg we do sum (avg*(count/sumcount) ) */
1039 if (isAvg) {
1040 InstrPtr v,w,x,y,cond;
1041
1042 /* lng w = sum counts */
1043 w = newInstruction(mb, aggrRef, sumRef);
1044 getArg(w,0) = newTmpVariable(mb, tp2);
1045 w = pushArgument(mb, w, getArg(u, 0));
1046 pushInstruction(mb, w);
1047
1048 /* y=count = ifthenelse(w=count==0,NULL,w=count) */
1049 cond = newInstruction(mb, calcRef, eqRef);
1050 getArg(cond,0) = newTmpVariable(mb, TYPE_bit);
1051 cond = pushArgument(mb, cond, getArg(w, 0));
1052 cond = pushLng(mb, cond, 0);
1053 pushInstruction(mb,cond);
1054
1055 y = newInstruction(mb, calcRef, ifthenelseRef);
1056 getArg(y,0) = newTmpVariable(mb, tp2);
1057 y = pushArgument(mb, y, getArg(cond, 0));
1058 y = pushNil(mb, y, tp2);
1059 y = pushArgument(mb, y, getArg(w, 0));
1060 pushInstruction(mb,y);
1061
1062 /* dbl v = double(count) */
1063 v = newInstruction(mb, batcalcRef, dblRef);
1064 getArg(v,0) = newTmpVariable(mb, newBatType(TYPE_dbl));
1065 v = pushArgument(mb, v, getArg(u, 0));
1066 pushInstruction(mb, v);
1067
1068 /* dbl x = v / y */
1069 x = newInstruction(mb, batcalcRef, divRef);
1070 getArg(x,0) = newTmpVariable(mb, newBatType(TYPE_dbl));
1071 x = pushArgument(mb, x, getArg(v, 0));
1072 x = pushArgument(mb, x, getArg(y, 0));
1073 pushInstruction(mb, x);
1074
1075 /* dbl w = avg * x */
1076 w = newInstruction(mb, batcalcRef, mulRef);
1077 getArg(w,0) = newTmpVariable(mb, battp);
1078 w = pushArgument(mb, w, getArg(r, 0));
1079 w = pushArgument(mb, w, getArg(x, 0));
1080 pushInstruction(mb, w);
1081
1082 r = w;
1083
1084 /* filter nils */
1085 s = newInstruction(mb, algebraRef, selectNotNilRef);
1086 getArg(s,0) = newTmpVariable(mb, battp);
1087 s = pushArgument(mb, s, getArg(r,0));
1088 pushInstruction(mb, s);
1089 r = s;
1090 }
1091
1092 s = newInstruction(mb, getModuleId(p), aggr_phase2(getFunctionId(p)));
1093 getArg(s,0) = getArg(p,0);
1094 s = pushArgument(mb, s, getArg(r,0));
1095 pushInstruction(mb, s);
1096}
1097
1098static int
1099chain_by_length(mat_t *mat, int g)
1100{
1101 int cnt = 0;
1102 while(g >= 0) {
1103 g = mat[g].pm;
1104 cnt++;
1105 }
1106 return cnt;
1107}
1108
1109static int
1110walk_n_back(mat_t *mat, int g, int cnt)
1111{
1112 while(cnt > 0){
1113 g = mat[g].pm;
1114 cnt--;
1115 }
1116 return g;
1117}
1118
1119static int
1120group_by_ext(matlist_t *ml, int g)
1121{
1122 int i;
1123
1124 for(i=g; i< ml->top; i++){
1125 if (ml->v[i].pm == g)
1126 return i;
1127 }
1128 return 0;
1129}
1130
1131/* In some cases we have non groupby attribute columns, these require
1132 * gext.projection(mat.pack(per partition ext.projections(x)))
1133 */
1134
1135static int
1136mat_group_project(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int e, int a)
1137{
1138 int tp = getArgType(mb,p,0), k;
1139 InstrPtr ai1 = newInstruction(mb, matRef, packRef), r;
1140 mat_t *mat = ml->v;
1141
1142 if(!ai1)
1143 return -1;
1144
1145 getArg(ai1,0) = newTmpVariable(mb, tp);
1146
1147 assert(mat[e].mi->argc == mat[a].mi->argc);
1148 for(k=1; k<mat[a].mi->argc; k++) {
1149 InstrPtr q = copyInstruction(p);
1150 if(!q) {
1151 freeInstruction(ai1);
1152 return -1;
1153 }
1154
1155 getArg(q,0) = newTmpVariable(mb, tp);
1156 getArg(q,1) = getArg(mat[e].mi,k);
1157 getArg(q,2) = getArg(mat[a].mi,k);
1158 pushInstruction(mb,q);
1159 if(setPartnr(ml, getArg(mat[a].mi,k), getArg(q,0), k))
1160 return -1;
1161
1162 /* pack the result into a mat */
1163 ai1 = pushArgument(mb,ai1,getArg(q,0));
1164 }
1165 pushInstruction(mb, ai1);
1166
1167 if((r = copyInstruction(p)) == NULL)
1168 return -1;
1169 getArg(r,1) = mat[e].mv;
1170 getArg(r,2) = getArg(ai1,0);
1171 pushInstruction(mb,r);
1172 return 0;
1173}
1174
1175/* Per partition aggregates are merged and aggregated together. For
1176 * most (handled) aggregates thats relatively simple. AVG is somewhat
1177 * more complex. */
1178static int
1179mat_group_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int b, int g, int e)
1180{
1181 int tp = getArgType(mb,p,0), k, tp2 = 0;
1182 char *aggr2 = aggr_phase2(getFunctionId(p));
1183 int isAvg = (getFunctionId(p) == subavgRef);
1184 InstrPtr ai1 = newInstruction(mb, matRef, packRef), ai10 = NULL, ai2;
1185
1186 if(!ai1)
1187 return -1;
1188
1189 getArg(ai1,0) = newTmpVariable(mb, tp);
1190
1191 if (isAvg) { /* counts */
1192 tp2 = newBatType(TYPE_lng);
1193 ai10 = newInstruction(mb, matRef, packRef);
1194 if(!ai10) {
1195 freeInstruction(ai1);
1196 return -1;
1197 }
1198 getArg(ai10,0) = newTmpVariable(mb, tp2);
1199 }
1200
1201 for(k=1; k<mat[b].mi->argc; k++) {
1202 InstrPtr q = copyInstruction(p);
1203 if(!q) {
1204 freeInstruction(ai1);
1205 return -1;
1206 }
1207
1208 getArg(q,0) = newTmpVariable(mb, tp);
1209 if (isAvg) {
1210 getArg(q,1) = newTmpVariable(mb, tp2);
1211 q = pushArgument(mb, q, getArg(q,1)); /* push at end, create space */
1212 q->retc = 2;
1213 getArg(q,q->argc-1) = getArg(q,q->argc-2);
1214 getArg(q,q->argc-2) = getArg(q,q->argc-3);
1215 }
1216 getArg(q,1+isAvg) = getArg(mat[b].mi,k);
1217 getArg(q,2+isAvg) = getArg(mat[g].mi,k);
1218 getArg(q,3+isAvg) = getArg(mat[e].mi,k);
1219 pushInstruction(mb,q);
1220
1221 /* pack the result into a mat */
1222 ai1 = pushArgument(mb,ai1,getArg(q,0));
1223 if (isAvg)
1224 ai10 = pushArgument(mb,ai10,getArg(q,1));
1225 }
1226 pushInstruction(mb, ai1);
1227 if (isAvg)
1228 pushInstruction(mb, ai10);
1229
1230 /* for avg we do sum (avg*(count/sumcount) ) */
1231 if (isAvg) {
1232 InstrPtr r,s,v,w, cond;
1233
1234 /* lng s = sum counts */
1235 s = newInstruction(mb, aggrRef, subsumRef);
1236 getArg(s,0) = newTmpVariable(mb, tp2);
1237 s = pushArgument(mb, s, getArg(ai10, 0));
1238 s = pushArgument(mb, s, mat[g].mv);
1239 s = pushArgument(mb, s, mat[e].mv);
1240 s = pushBit(mb, s, 1); /* skip nils */
1241 s = pushBit(mb, s, 1);
1242 pushInstruction(mb,s);
1243
1244 /* w=count = ifthenelse(s=count==0,NULL,s=count) */
1245 cond = newInstruction(mb, batcalcRef, eqRef);
1246 getArg(cond,0) = newTmpVariable(mb, newBatType(TYPE_bit));
1247 cond = pushArgument(mb, cond, getArg(s, 0));
1248 cond = pushLng(mb, cond, 0);
1249 pushInstruction(mb,cond);
1250
1251 w = newInstruction(mb, batcalcRef, ifthenelseRef);
1252 getArg(w,0) = newTmpVariable(mb, tp2);
1253 w = pushArgument(mb, w, getArg(cond, 0));
1254 w = pushNil(mb, w, TYPE_lng);
1255 w = pushArgument(mb, w, getArg(s, 0));
1256 pushInstruction(mb,w);
1257
1258 /* fetchjoin with groups */
1259 r = newInstruction(mb, algebraRef, projectionRef);
1260 getArg(r, 0) = newTmpVariable(mb, tp2);
1261 r = pushArgument(mb, r, mat[g].mv);
1262 r = pushArgument(mb, r, getArg(w,0));
1263 pushInstruction(mb,r);
1264 s = r;
1265
1266 /* dbl v = double(count) */
1267 v = newInstruction(mb, batcalcRef, dblRef);
1268 getArg(v,0) = newTmpVariable(mb, newBatType(TYPE_dbl));
1269 v = pushArgument(mb, v, getArg(ai10, 0));
1270 pushInstruction(mb, v);
1271
1272 /* dbl r = v / s */
1273 r = newInstruction(mb, batcalcRef, divRef);
1274 getArg(r,0) = newTmpVariable(mb, newBatType(TYPE_dbl));
1275 r = pushArgument(mb, r, getArg(v, 0));
1276 r = pushArgument(mb, r, getArg(s, 0));
1277 pushInstruction(mb,r);
1278
1279 /* dbl s = avg * r */
1280 s = newInstruction(mb, batcalcRef, mulRef);
1281 getArg(s,0) = newTmpVariable(mb, tp);
1282 s = pushArgument(mb, s, getArg(ai1, 0));
1283 s = pushArgument(mb, s, getArg(r, 0));
1284 pushInstruction(mb,s);
1285
1286 ai1 = s;
1287 }
1288 ai2 = newInstruction(mb, aggrRef, aggr2);
1289 getArg(ai2,0) = getArg(p,0);
1290 ai2 = pushArgument(mb, ai2, getArg(ai1, 0));
1291 ai2 = pushArgument(mb, ai2, mat[g].mv);
1292 ai2 = pushArgument(mb, ai2, mat[e].mv);
1293 ai2 = pushBit(mb, ai2, 1); /* skip nils */
1294 if (getFunctionId(p) != subminRef && getFunctionId(p) != submaxRef)
1295 ai2 = pushBit(mb, ai2, 1);
1296 pushInstruction(mb, ai2);
1297 return 0;
1298}
1299
1300/* The mat_group_{new,derive} keep an ext,attr1..attrn table.
1301 * This is the input for the final second phase group by.
1302 */
1303static void
1304mat_pack_group(MalBlkPtr mb, matlist_t *ml, int g)
1305{
1306 mat_t *mat = ml->v;
1307 int cnt = chain_by_length(mat, g), i;
1308 InstrPtr cur = NULL;
1309
1310 for(i=cnt-1; i>=0; i--) {
1311 /* if cur is non-NULL, it's a subgroup; if i is zero, it's "done" */
1312 InstrPtr grp = newInstruction(mb, groupRef,cur?i?subgroupRef:subgroupdoneRef:i?groupRef:groupdoneRef);
1313 int ogrp = walk_n_back(mat, g, i);
1314 int oext = group_by_ext(ml, ogrp);
1315 int attr = mat[oext].im;
1316
1317 getArg(grp,0) = mat[ogrp].mv;
1318 grp = pushReturn(mb, grp, mat[oext].mv);
1319 grp = pushReturn(mb, grp, newTmpVariable(mb, newBatType(TYPE_lng)));
1320 grp = pushArgument(mb, grp, getArg(mat[attr].mi, 0));
1321 if (cur)
1322 grp = pushArgument(mb, grp, getArg(cur, 0));
1323 pushInstruction(mb, grp);
1324 cur = grp;
1325 }
1326 mat[g].im = -1; /* only pack once */
1327}
1328
1329/*
1330 * foreach parent subgroup, do the
1331 * e2.projection(grp.projection((ext.projection(b)))
1332 * and one for the current group
1333 */
1334static int
1335mat_group_attr(MalBlkPtr mb, matlist_t *ml, int g, InstrPtr cext, int push )
1336{
1337 int cnt = chain_by_length(ml->v, g), i; /* number of attributes */
1338 int ogrp = g; /* previous group */
1339
1340 for(i = 0; i < cnt; i++) {
1341 int agrp = walk_n_back(ml->v, ogrp, i);
1342 int b = ml->v[agrp].im;
1343 int aext = group_by_ext(ml, agrp);
1344 int a = ml->v[aext].im;
1345 int atp = getArgType(mb,ml->v[a].mi,0), k;
1346 InstrPtr attr = newInstruction(mb, matRef, packRef);
1347
1348 //getArg(attr,0) = newTmpVariable(mb, atp);
1349 getArg(attr,0) = getArg(ml->v[b].mi,0);
1350
1351 for (k = 1; k<ml->v[a].mi->argc; k++ ) {
1352 InstrPtr r = newInstruction(mb, algebraRef, projectionRef);
1353 InstrPtr q = newInstruction(mb, algebraRef, projectionRef);
1354
1355 getArg(r, 0) = newTmpVariable(mb, newBatType(TYPE_oid));
1356 r = pushArgument(mb, r, getArg(cext,k));
1357 r = pushArgument(mb, r, getArg(ml->v[ogrp].mi,k));
1358 pushInstruction(mb,r);
1359
1360 getArg(q, 0) = newTmpVariable(mb, atp);
1361 q = pushArgument(mb, q, getArg(r,0));
1362 q = pushArgument(mb, q, getArg(ml->v[a].mi,k));
1363 pushInstruction(mb,q);
1364
1365 attr = pushArgument(mb, attr, getArg(q, 0));
1366 }
1367 if (push)
1368 pushInstruction(mb,attr);
1369 if(mat_add_var(ml, attr, NULL, getArg(attr, 0), mat_ext, -1, -1, push))
1370 return -1;
1371 /* keep new attribute with the group extend */
1372 ml->v[aext].im = ml->top-1;
1373 }
1374 return 0;
1375}
1376
1377static int
1378mat_group_new(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int b)
1379{
1380 int tp0 = getArgType(mb,p,0);
1381 int tp1 = getArgType(mb,p,1);
1382 int tp2 = getArgType(mb,p,2);
1383 int atp = getArgType(mb,p,3), i, a, g, push = 0;
1384 InstrPtr r0, r1, r2, attr;
1385
1386 if (getFunctionId(p) == subgroupdoneRef || getFunctionId(p) == groupdoneRef)
1387 push = 1;
1388
1389 r0 = newInstruction(mb, matRef, packRef);
1390 getArg(r0,0) = newTmpVariable(mb, tp0);
1391
1392 r1 = newInstruction(mb, matRef, packRef);
1393 getArg(r1,0) = newTmpVariable(mb, tp1);
1394
1395 r2 = newInstruction(mb, matRef, packRef);
1396 getArg(r2,0) = newTmpVariable(mb, tp2);
1397
1398 /* we keep an extend, attr table result, which will later be used
1399 * when we pack the group result */
1400 attr = newInstruction(mb, matRef, packRef);
1401 getArg(attr,0) = getArg(ml->v[b].mi,0);
1402
1403 for(i=1; i<ml->v[b].mi->argc; i++) {
1404 InstrPtr q = copyInstruction(p), r;
1405 if(!q) {
1406 freeInstruction(r0);
1407 freeInstruction(r1);
1408 freeInstruction(r2);
1409 freeInstruction(attr);
1410 return -1;
1411 }
1412
1413 getArg(q, 0) = newTmpVariable(mb, tp0);
1414 getArg(q, 1) = newTmpVariable(mb, tp1);
1415 getArg(q, 2) = newTmpVariable(mb, tp2);
1416 getArg(q, 3) = getArg(ml->v[b].mi, i);
1417 pushInstruction(mb, q);
1418 if(setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,0), i) ||
1419 setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,1), i) ||
1420 setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,2), i))
1421 return -1;
1422
1423 /* add result to mats */
1424 r0 = pushArgument(mb,r0,getArg(q,0));
1425 r1 = pushArgument(mb,r1,getArg(q,1));
1426 r2 = pushArgument(mb,r2,getArg(q,2));
1427
1428 r = newInstruction(mb, algebraRef, projectionRef);
1429 getArg(r, 0) = newTmpVariable(mb, atp);
1430 r = pushArgument(mb, r, getArg(q,1));
1431 r = pushArgument(mb, r, getArg(ml->v[b].mi,i));
1432 if(setPartnr(ml, getArg(ml->v[b].mi,i), getArg(r,0), i))
1433 return -1;
1434 pushInstruction(mb,r);
1435
1436 attr = pushArgument(mb, attr, getArg(r, 0));
1437 }
1438 pushInstruction(mb,r0);
1439 pushInstruction(mb,r1);
1440 pushInstruction(mb,r2);
1441 if (push)
1442 pushInstruction(mb,attr);
1443
1444 /* create mat's for the intermediates */
1445 a = ml->top;
1446 if(mat_add_var(ml, attr, NULL, getArg(attr, 0), mat_ext, -1, -1, push))
1447 return -1;
1448 g = ml->top;
1449 if(mat_add_var(ml, r0, p, getArg(p, 0), mat_grp, b, -1, 1) ||
1450 mat_add_var(ml, r1, p, getArg(p, 1), mat_ext, a, ml->top-1, 1) || /* point back at group */
1451 mat_add_var(ml, r2, p, getArg(p, 2), mat_cnt, -1, ml->top-1, 1)) /* point back at ext */
1452 return -1;
1453 if (push)
1454 mat_pack_group(mb, ml, g);
1455 return 0;
1456}
1457
1458static int
1459mat_group_derive(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int b, int g)
1460{
1461 int tp0 = getArgType(mb,p,0);
1462 int tp1 = getArgType(mb,p,1);
1463 int tp2 = getArgType(mb,p,2);
1464 int atp = getArgType(mb,p,3), i, a, push = 0;
1465 InstrPtr r0, r1, r2, attr;
1466
1467 if (getFunctionId(p) == subgroupdoneRef || getFunctionId(p) == groupdoneRef)
1468 push = 1;
1469
1470 if (ml->v[g].im == -1){ /* already packed */
1471 InstrPtr q = copyInstruction(p);
1472 if(!q)
1473 return -1;
1474 pushInstruction(mb, q);
1475 return 0;
1476 }
1477
1478 r0 = newInstruction(mb, matRef, packRef);
1479 getArg(r0,0) = newTmpVariable(mb, tp0);
1480
1481 r1 = newInstruction(mb, matRef, packRef);
1482 getArg(r1,0) = newTmpVariable(mb, tp1);
1483
1484 r2 = newInstruction(mb, matRef, packRef);
1485 getArg(r2,0) = newTmpVariable(mb, tp2);
1486
1487 /* we keep an extend, attr table result, which will later be used
1488 * when we pack the group result */
1489 attr = newInstruction(mb, matRef, packRef);
1490 getArg(attr,0) = getArg(ml->v[b].mi,0);
1491
1492 /* we need overlapping ranges */
1493 for(i=1; i<ml->v[b].mi->argc; i++) {
1494 InstrPtr q = copyInstruction(p), r;
1495 if(!q) {
1496 freeInstruction(r0);
1497 freeInstruction(r1);
1498 freeInstruction(r2);
1499 freeInstruction(attr);
1500 return -1;
1501 }
1502
1503 getArg(q,0) = newTmpVariable(mb, tp0);
1504 getArg(q,1) = newTmpVariable(mb, tp1);
1505 getArg(q,2) = newTmpVariable(mb, tp2);
1506 getArg(q,3) = getArg(ml->v[b].mi,i);
1507 getArg(q,4) = getArg(ml->v[g].mi,i);
1508 pushInstruction(mb,q);
1509 if(setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,0), i) ||
1510 setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,1), i) ||
1511 setPartnr(ml, getArg(ml->v[b].mi,i), getArg(q,2), i))
1512 return -1;
1513
1514 /* add result to mats */
1515 r0 = pushArgument(mb,r0,getArg(q,0));
1516 r1 = pushArgument(mb,r1,getArg(q,1));
1517 r2 = pushArgument(mb,r2,getArg(q,2));
1518
1519 r = newInstruction(mb, algebraRef, projectionRef);
1520 getArg(r, 0) = newTmpVariable(mb, atp);
1521 r = pushArgument(mb, r, getArg(q,1));
1522 r = pushArgument(mb, r, getArg(ml->v[b].mi,i));
1523 if(setPartnr(ml, getArg(ml->v[b].mi,i), getArg(r,0), i))
1524 return -1;
1525 pushInstruction(mb,r);
1526
1527 attr = pushArgument(mb, attr, getArg(r, 0));
1528 }
1529 pushInstruction(mb,r0);
1530 pushInstruction(mb,r1);
1531 pushInstruction(mb,r2);
1532 if (push)
1533 pushInstruction(mb,attr);
1534
1535 if(mat_group_attr(mb, ml, g, r1, push))
1536 return -1;
1537
1538 /* create mat's for the intermediates */
1539 a = ml->top;
1540 if(mat_add_var(ml, attr, NULL, getArg(attr, 0), mat_ext, -1, -1, push) ||
1541 mat_add_var(ml, r0, p, getArg(p, 0), mat_grp, b, g, 1))
1542 return -1;
1543 g = ml->top-1;
1544 if(mat_add_var(ml, r1, p, getArg(p, 1), mat_ext, a, ml->top-1, 1) || /* point back at group */
1545 mat_add_var(ml, r2, p, getArg(p, 2), mat_cnt, -1, ml->top-1, 1)) /* point back at ext */
1546 return -1;
1547 if (push)
1548 mat_pack_group(mb, ml, g);
1549 return 0;
1550}
1551
1552static int
1553mat_topn_project(MalBlkPtr mb, InstrPtr p, mat_t *mat, int m, int n)
1554{
1555 int tpe = getArgType(mb, p, 0), k;
1556 InstrPtr pck, q;
1557
1558 pck = newInstruction(mb, matRef, packRef);
1559 getArg(pck,0) = newTmpVariable(mb, tpe);
1560
1561 for(k=1; k<mat[m].mi->argc; k++) {
1562 InstrPtr q = copyInstruction(p);
1563 if(!q) {
1564 freeInstruction(pck);
1565 return -1;
1566 }
1567
1568 getArg(q,0) = newTmpVariable(mb, tpe);
1569 getArg(q,1) = getArg(mat[m].mi, k);
1570 getArg(q,2) = getArg(mat[n].mi, k);
1571 pushInstruction(mb, q);
1572
1573 pck = pushArgument(mb, pck, getArg(q, 0));
1574 }
1575 pushInstruction(mb, pck);
1576
1577 if((q = copyInstruction(p)) == NULL)
1578 return -1;
1579 getArg(q,2) = getArg(pck,0);
1580 pushInstruction(mb, q);
1581 return 0;
1582}
1583
1584static int
1585mat_pack_topn(MalBlkPtr mb, InstrPtr slc, mat_t *mat, int m)
1586{
1587 /* find chain of topn's */
1588 int cnt = chain_by_length(mat, m), i;
1589 InstrPtr cur = NULL;
1590
1591 for(i=cnt-1; i>=0; i--) {
1592 int otpn = walk_n_back(mat, m, i), var = 1, k;
1593 int attr = mat[otpn].im;
1594 int tpe = getVarType(mb, getArg(mat[attr].mi,0));
1595 InstrPtr pck, tpn, otopn = mat[otpn].org, a;
1596
1597 pck = newInstruction(mb, matRef, packRef);
1598 getArg(pck,0) = newTmpVariable(mb, tpe);
1599
1600 /* m.projection(attr); */
1601 for(k=1; k < mat[attr].mi->argc; k++) {
1602 InstrPtr q = newInstruction(mb, algebraRef, projectionRef);
1603 getArg(q, 0) = newTmpVariable(mb, tpe);
1604
1605 q = pushArgument(mb, q, getArg(slc, k));
1606 q = pushArgument(mb, q, getArg(mat[attr].mi, k));
1607 pushInstruction(mb, q);
1608
1609 pck = pushArgument(mb, pck, getArg(q,0));
1610 }
1611 pushInstruction(mb, pck);
1612
1613 a = pck;
1614
1615 if((tpn = copyInstruction(otopn)) == NULL)
1616 return -1;
1617 var = 1;
1618 if (cur) {
1619 getArg(tpn, tpn->retc+var) = getArg(cur, 0);
1620 var++;
1621 if (cur->retc == 2) {
1622 getArg(tpn, tpn->retc+var) = getArg(cur, 1);
1623 var++;
1624 }
1625 }
1626 getArg(tpn, tpn->retc) = getArg(a,0);
1627 pushInstruction(mb, tpn);
1628 cur = tpn;
1629 }
1630 return 0;
1631}
1632
1633static int
1634mat_topn(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n, int o)
1635{
1636 int tpe = getArgType(mb,p,0), k, is_slice = isSlice(p), zero = -1;
1637 InstrPtr pck, gpck = NULL, q, r;
1638 int with_groups = (p->retc == 2), piv = 0, topn2 = (n >= 0);
1639
1640 assert( topn2 || o < 0);
1641 /* dummy mat instruction (needed to share result of p) */
1642 pck = newInstruction(mb, matRef, packRef);
1643 getArg(pck,0) = getArg(p,0);
1644
1645 if (with_groups) {
1646 gpck = newInstruction(mb, matRef, packRef);
1647 getArg(gpck,0) = getArg(p,1);
1648 }
1649
1650 if (is_slice) {
1651 ValRecord cst;
1652 cst.vtype= getArgType(mb,p,2);
1653 cst.val.lval= 0;
1654 cst.len = 0;
1655 zero = defConstant(mb, cst.vtype, &cst);
1656 }
1657 assert( (n<0 && o<0) ||
1658 (ml->v[m].mi->argc == ml->v[n].mi->argc &&
1659 ml->v[m].mi->argc == ml->v[o].mi->argc));
1660
1661 for(k=1; k< ml->v[m].mi->argc; k++) {
1662 if((q = copyInstruction(p)) == NULL) {
1663 if(gpck)
1664 freeInstruction(gpck);
1665 freeInstruction(pck);
1666 }
1667 getArg(q,0) = newTmpVariable(mb, tpe);
1668 if (with_groups)
1669 getArg(q,1) = newTmpVariable(mb, tpe);
1670 getArg(q,q->retc) = getArg(ml->v[m].mi,k);
1671 if (is_slice) /* lower bound should always be 0 on partial slices */
1672 getArg(q,q->retc+1) = zero;
1673 else if (topn2) {
1674 getArg(q,q->retc+1) = getArg(ml->v[n].mi,k);
1675 getArg(q,q->retc+2) = getArg(ml->v[o].mi,k);
1676 }
1677 pushInstruction(mb,q);
1678
1679 pck = pushArgument(mb, pck, getArg(q,0));
1680 if (with_groups)
1681 gpck = pushArgument(mb, gpck, getArg(q,1));
1682 }
1683
1684 piv = ml->top;
1685 if(mat_add_var(ml, pck, p, getArg(p,0), is_slice?mat_slc:mat_tpn, m, n, 0))
1686 return -1;
1687 if (with_groups && mat_add_var(ml, gpck, p, getArg(p,1), is_slice?mat_slc:mat_tpn, m, piv, 0))
1688 return -1;
1689
1690 if (is_slice || p->retc ==1 /* single result, ie last of the topn's */) {
1691 if (ml->v[m].type == mat_tpn || !is_slice) {
1692 if(mat_pack_topn(mb, pck, ml->v, (!is_slice)?piv:m))
1693 return -1;
1694 }
1695
1696 /* topn/slice over merged parts */
1697 if (is_slice) {
1698 /* real instruction */
1699 r = newInstruction(mb, matRef, packRef);
1700 getArg(r,0) = newTmpVariable(mb, tpe);
1701
1702 for(k=1; k< pck->argc; k++)
1703 r = pushArgument(mb, r, getArg(pck,k));
1704 pushInstruction(mb,r);
1705
1706 if((q = copyInstruction(p)) == NULL)
1707 return -1;
1708 setFunctionId(q, subsliceRef);
1709 if (ml->v[m].type != mat_tpn || is_slice)
1710 getArg(q,1) = getArg(r,0);
1711 pushInstruction(mb,q);
1712 }
1713
1714 ml->v[piv].type = mat_slc;
1715 }
1716 return 0;
1717}
1718
1719static int
1720mat_sample(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m)
1721{
1722 /* transform
1723 * a := sample.subuniform(b,n);
1724 * into
1725 * t1 := sample.subuniform(b1,n);
1726 * t2 := sample.subuniform(b2,n);
1727 * ...
1728 * t0 := mat.pack(t1,t2,...);
1729 * tn := sample.subuniform(t0,n);
1730 * a := algebra.projection(tn,t0);
1731 *
1732 * Note that this does *not* give a uniform sample of the original
1733 * bat b!
1734 */
1735
1736 int tpe = getArgType(mb,p,0), k, piv;
1737 InstrPtr pck, q, r;
1738
1739 pck = newInstruction(mb,matRef,packRef);
1740 getArg(pck,0) = newTmpVariable(mb, tpe);
1741
1742 for(k=1; k< ml->v[m].mi->argc; k++) {
1743 if((q = copyInstruction(p)) == NULL) {
1744 freeInstruction(pck);
1745 return -1;
1746 }
1747 getArg(q,0) = newTmpVariable(mb, tpe);
1748 getArg(q,q->retc) = getArg(ml->v[m].mi,k);
1749 pushInstruction(mb,q);
1750 pck = pushArgument(mb, pck, getArg(q,0));
1751 }
1752
1753 piv = ml->top;
1754 if(mat_add_var(ml, pck, p, getArg(p,0), mat_slc, m, -1, 1)) {
1755 freeInstruction(pck);
1756 return -1;
1757 }
1758 pushInstruction(mb,pck);
1759
1760 if((q = copyInstruction(p)) == NULL)
1761 return -1;
1762 getArg(q,0) = newTmpVariable(mb, tpe);
1763 getArg(q,q->retc) = getArg(pck,0);
1764 pushInstruction(mb,q);
1765
1766 r = newInstruction(mb, algebraRef, projectionRef);
1767 getArg(r,0) = getArg(p,0);
1768 pushArgument(mb, r, getArg(q, 0));
1769 pushArgument(mb, r, getArg(pck, 0));
1770 pushInstruction(mb, r);
1771
1772 matlist_pack(ml, piv);
1773 ml->v[piv].type = mat_slc;
1774 return 0;
1775}
1776
1777str
1778OPTmergetableImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
1779{
1780 InstrPtr *old;
1781 matlist_t ml;
1782 int oldtop, fm, fn, fo, fe, i, k, m, n, o, e, slimit, bailout = 0;
1783 int size=0, match, actions=0, distinct_topn = 0, /*topn_res = 0,*/ groupdone = 0, *vars;
1784 char buf[256], *group_input;
1785 lng usec = GDKusec();
1786 str msg = MAL_SUCCEED;
1787
1788 //if( optimizerIsApplied(mb, "mergetable") || !optimizerIsApplied(mb,"mitosis"))
1789 //return 0;
1790 old = mb->stmt;
1791 oldtop= mb->stop;
1792
1793 vars = (int*) GDKmalloc(sizeof(int)* mb->vtop);
1794 group_input = (char*) GDKzalloc(sizeof(char)* mb->vtop);
1795 if (vars == NULL || group_input == NULL){
1796 if (vars)
1797 GDKfree(vars);
1798 throw(MAL, "optimizer.mergetable", SQLSTATE(HY001) MAL_MALLOC_FAIL);
1799 }
1800 /* check for bailout conditions */
1801 for (i = 1; i < oldtop && !bailout; i++) {
1802 int j;
1803
1804 p = old[i];
1805
1806 for (j = 0; j<p->retc; j++) {
1807 int res = getArg(p, j);
1808 vars[res] = i;
1809 }
1810
1811 /* pack if there is a group statement following a groupdone (ie aggr(distinct)) */
1812 if (getModuleId(p) == groupRef && p->argc == 5 &&
1813 (getFunctionId(p) == subgroupRef ||
1814 getFunctionId(p) == subgroupdoneRef ||
1815 getFunctionId(p) == groupRef ||
1816 getFunctionId(p) == groupdoneRef)) {
1817 InstrPtr q = old[vars[getArg(p, p->argc-1)]]; /* group result from a previous group(done) */
1818
1819 if (getFunctionId(q) == subgroupdoneRef || getFunctionId(q) == groupdoneRef)
1820 groupdone = 1;
1821 }
1822 /* bail out if there is a input for a group, which has been used for a group already (solves problems with qube like groupings) */
1823 if (getModuleId(p) == groupRef &&
1824 (getFunctionId(p) == subgroupRef ||
1825 getFunctionId(p) == subgroupdoneRef ||
1826 getFunctionId(p) == groupRef ||
1827 getFunctionId(p) == groupdoneRef)) {
1828 int input = getArg(p, p->retc); /* argument one is first input */
1829
1830 if (group_input[input]) {
1831 if( OPTdebug & OPTmergetable){
1832 fprintf(stderr,"WARNING::: mergetable bailout on group input reuse in group statement \n");
1833 }
1834 bailout = 1;
1835 }
1836
1837 group_input[input] = 1;
1838 }
1839 if (getModuleId(p) == algebraRef &&
1840 getFunctionId(p) == selectNotNilRef ) {
1841 if( OPTdebug & OPTmergetable){
1842 fprintf(stderr,"WARNING::: mergetable bailout not nil ref \n");
1843 }
1844 bailout = 1;
1845 }
1846 /*
1847 if (isTopn(p))
1848 topn_res = getArg(p, 0);
1849 */
1850 /* not idea how to detect this yet */
1851 //distinct_topn = 1;
1852 }
1853 GDKfree(vars);
1854 GDKfree(group_input);
1855
1856 ml.horigin = 0;
1857 ml.torigin = 0;
1858 ml.v = 0;
1859 ml.vars = 0;
1860 if (bailout)
1861 goto cleanup;
1862
1863 /* the number of MATs is limited to the variable stack*/
1864 ml.size = mb->vtop;
1865 ml.top = 0;
1866 ml.v = (mat_t*) GDKzalloc(ml.size * sizeof(mat_t));
1867 ml.vsize = mb->vsize;
1868 ml.horigin = (int*) GDKmalloc(sizeof(int)* ml.vsize);
1869 ml.torigin = (int*) GDKmalloc(sizeof(int)* ml.vsize);
1870 ml.vars = (int*) GDKzalloc(sizeof(int)* ml.vsize);
1871 if ( ml.v == NULL || ml.horigin == NULL || ml.torigin == NULL || ml.vars == NULL) {
1872 goto cleanup;
1873 }
1874 for (i=0; i<ml.vsize; i++) {
1875 ml.horigin[i] = ml.torigin[i] = -1;
1876 ml.vars[i] = -1;
1877 }
1878
1879 slimit = mb->ssize;
1880 size = (mb->stop * 1.2 < mb->ssize)? mb->ssize:(int)(mb->stop * 1.2);
1881 mb->stmt = (InstrPtr *) GDKzalloc(size * sizeof(InstrPtr));
1882 if ( mb->stmt == NULL) {
1883 mb->stmt = old;
1884 msg = createException(MAL,"optimizer.mergetable", SQLSTATE(HY001) MAL_MALLOC_FAIL);
1885 goto cleanup;
1886 }
1887 mb->ssize = size;
1888 mb->stop = 0;
1889
1890 for( i=0; i<oldtop; i++){
1891 int bats = 0;
1892 InstrPtr r, cp;
1893
1894 p = old[i];
1895 if (getModuleId(p) == matRef &&
1896 (getFunctionId(p) == newRef || getFunctionId(p) == packRef)){
1897 if(mat_set_prop(&ml, mb, p) || mat_add_var(&ml, p, NULL, getArg(p,0), mat_none, -1, -1, 1)) {
1898 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1899 goto cleanup;
1900 }
1901 continue;
1902 }
1903
1904 /*
1905 * If the instruction does not contain MAT references it can simply be added.
1906 * Otherwise we have to decide on either packing them or replacement.
1907 */
1908 if ((match = nr_of_mats(p, &ml)) == 0) {
1909 cp = copyInstruction(p);
1910 if(!cp) {
1911 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1912 goto cleanup;
1913 }
1914 pushInstruction(mb, cp);
1915 continue;
1916 }
1917 bats = nr_of_bats(mb, p);
1918
1919 /* (l,r) Join (L, R, ..)
1920 * 2 -> (l,r) equi/theta joins (l,r)
1921 * 3 -> (l,r) range-joins (l,r1,r2)
1922 * NxM -> (l,r) filter-joins (l1,..,ln,r1,..,rm)
1923 */
1924 if (match > 0 && isMatJoinOp(p) &&
1925 p->argc >= 3 && p->retc == 2 && bats >= 2) {
1926 if (bats == 2) {
1927 m = is_a_mat(getArg(p,p->retc), &ml);
1928 n = is_a_mat(getArg(p,p->retc+1), &ml);
1929 if(mat_join2(mb, p, &ml, m, n)) {
1930 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1931 goto cleanup;
1932 }
1933 } else {
1934 if ( mat_joinNxM(cntxt, mb, p, &ml, bats)) {
1935 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1936 goto cleanup;
1937 }
1938 }
1939 actions++;
1940 continue;
1941 }
1942 if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 3 && p->retc == 2 &&
1943 match == 1 && bats == 2) {
1944 m = is_a_mat(getArg(p,p->retc), &ml);
1945 n = -1;
1946
1947 if (m >= 0) {
1948 if(mat_join2(mb, p, &ml, m, n)) {
1949 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1950 goto cleanup;
1951 }
1952 actions++;
1953 continue;
1954 }
1955 }
1956 /*
1957 * Aggregate handling is a prime target for optimization.
1958 * The simple cases are dealt with first.
1959 * Handle the rewrite v:=aggr.count(b) and sum()
1960 * And the min/max is as easy
1961 */
1962 if (match == 1 && p->argc >= 2 &&
1963 ((getModuleId(p)==aggrRef &&
1964 (getFunctionId(p)== countRef ||
1965 getFunctionId(p)== count_no_nilRef ||
1966 getFunctionId(p)== minRef ||
1967 getFunctionId(p)== maxRef ||
1968 getFunctionId(p)== avgRef ||
1969 getFunctionId(p)== sumRef ||
1970 getFunctionId(p) == prodRef))) &&
1971 (m=is_a_mat(getArg(p,1), &ml)) >= 0) {
1972 mat_aggr(mb, p, ml.v, m);
1973 actions++;
1974 continue;
1975 }
1976
1977 if (match == 1 && bats == 1 && p->argc == 4 && isSlice(p) && ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0)) {
1978 if(mat_topn(mb, p, &ml, m, -1, -1)) {
1979 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1980 goto cleanup;
1981 }
1982 actions++;
1983 continue;
1984 }
1985
1986 if (match == 1 && bats == 1 && p->argc == 3 && isSample(p) && ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0)) {
1987 if(mat_sample(mb, p, &ml, m)) {
1988 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1989 goto cleanup;
1990 }
1991 actions++;
1992 continue;
1993 }
1994
1995 if (!distinct_topn && match == 1 && bats == 1 && (p->argc-p->retc) == 4 && isTopn(p) && ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0)) {
1996 if(mat_topn(mb, p, &ml, m, -1, -1)) {
1997 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
1998 goto cleanup;
1999 }
2000 actions++;
2001 continue;
2002 }
2003 if (!distinct_topn && match == 3 && bats == 3 && (p->argc-p->retc) == 6 && isTopn(p) &&
2004 ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0) &&
2005 ((n=is_a_mat(getArg(p,p->retc+1), &ml)) >= 0) &&
2006 ((o=is_a_mat(getArg(p,p->retc+2), &ml)) >= 0)) {
2007 if(mat_topn(mb, p, &ml, m, n, o)) {
2008 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2009 goto cleanup;
2010 }
2011 actions++;
2012 continue;
2013 }
2014
2015 /* Now we handle subgroup and aggregation statements. */
2016 if (!groupdone && match == 1 && bats == 1 && p->argc == 4 && getModuleId(p) == groupRef &&
2017 (getFunctionId(p) == subgroupRef || getFunctionId(p) == subgroupdoneRef || getFunctionId(p) == groupRef || getFunctionId(p) == groupdoneRef) &&
2018 ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0)) {
2019 if(mat_group_new(mb, p, &ml, m)) {
2020 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2021 goto cleanup;
2022 }
2023 actions++;
2024 continue;
2025 }
2026 if (!groupdone && match == 2 && bats == 2 && p->argc == 5 && getModuleId(p) == groupRef &&
2027 (getFunctionId(p) == subgroupRef || getFunctionId(p) == subgroupdoneRef || getFunctionId(p) == groupRef || getFunctionId(p) == groupdoneRef) &&
2028 ((m=is_a_mat(getArg(p,p->retc), &ml)) >= 0) &&
2029 ((n=is_a_mat(getArg(p,p->retc+1), &ml)) >= 0) &&
2030 ml.v[n].im >= 0 /* not packed */) {
2031 if(mat_group_derive(mb, p, &ml, m, n)) {
2032 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2033 goto cleanup;
2034 }
2035 actions++;
2036 continue;
2037 }
2038 /* TODO sub'aggr' with cand list */
2039 if (match == 3 && bats == 3 && getModuleId(p) == aggrRef && p->argc >= 4 &&
2040 (getFunctionId(p) == subcountRef ||
2041 getFunctionId(p) == subminRef ||
2042 getFunctionId(p) == submaxRef ||
2043 getFunctionId(p) == subavgRef ||
2044 getFunctionId(p) == subsumRef ||
2045 getFunctionId(p) == subprodRef) &&
2046 ((m=is_a_mat(getArg(p,1), &ml)) >= 0) &&
2047 ((n=is_a_mat(getArg(p,2), &ml)) >= 0) &&
2048 ((o=is_a_mat(getArg(p,3), &ml)) >= 0)) {
2049 if(mat_group_aggr(mb, p, ml.v, m, n, o)) {
2050 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2051 goto cleanup;
2052 }
2053 actions++;
2054 continue;
2055 }
2056 /* Handle cases of ext.projection and .projection(grp) */
2057 if (match == 2 && getModuleId(p) == algebraRef &&
2058 getFunctionId(p) == projectionRef &&
2059 (m=is_a_mat(getArg(p,1), &ml)) >= 0 &&
2060 (n=is_a_mat(getArg(p,2), &ml)) >= 0 &&
2061 (ml.v[m].type == mat_ext || ml.v[n].type == mat_grp)) {
2062 assert(ml.v[m].pushed);
2063 if (!ml.v[n].pushed) {
2064 if(mat_group_project(mb, p, &ml, m, n)) {
2065 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2066 goto cleanup;
2067 }
2068 } else {
2069 cp = copyInstruction(p);
2070 if(!cp) {
2071 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2072 goto cleanup;
2073 }
2074 pushInstruction(mb, cp);
2075 }
2076 continue;
2077 }
2078
2079 /* Handle cases of slice.projection */
2080 if (match == 2 && getModuleId(p) == algebraRef &&
2081 getFunctionId(p) == projectionRef &&
2082 (m=is_a_mat(getArg(p,1), &ml)) >= 0 &&
2083 (n=is_a_mat(getArg(p,2), &ml)) >= 0 &&
2084 (ml.v[m].type == mat_slc)) {
2085 if(mat_topn_project(mb, p, ml.v, m, n)) {
2086 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2087 goto cleanup;
2088 }
2089 actions++;
2090 continue;
2091 }
2092
2093 /* Handle projection */
2094 if (match > 0 && getModuleId(p) == algebraRef &&
2095 getFunctionId(p) == projectionRef &&
2096 (m=is_a_mat(getArg(p,1), &ml)) >= 0) {
2097 n=is_a_mat(getArg(p,2), &ml);
2098 if(mat_projection(mb, p, &ml, m, n)) {
2099 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2100 goto cleanup;
2101 }
2102 actions++;
2103 continue;
2104 }
2105 /* Handle setops */
2106 if (match > 0 && getModuleId(p) == algebraRef &&
2107 (getFunctionId(p) == differenceRef ||
2108 getFunctionId(p) == intersectRef) &&
2109 (m=is_a_mat(getArg(p,1), &ml)) >= 0) {
2110 n=is_a_mat(getArg(p,2), &ml);
2111 if(mat_setop(mb, p, &ml, m, n)) {
2112 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2113 goto cleanup;
2114 }
2115 actions++;
2116 continue;
2117 }
2118
2119 m = n = o = e = -1;
2120 for( fm= p->argc-1; fm>=p->retc ; fm--)
2121 if ((m=is_a_mat(getArg(p,fm), &ml)) >= 0)
2122 break;
2123
2124 for( fn= fm-1; fn>=p->retc ; fn--)
2125 if ((n=is_a_mat(getArg(p,fn), &ml)) >= 0)
2126 break;
2127
2128 for( fo= fn-1; fo>=p->retc ; fo--)
2129 if ((o=is_a_mat(getArg(p,fo), &ml)) >= 0)
2130 break;
2131
2132 for( fe= fo-1; fe>=p->retc ; fe--)
2133 if ((e=is_a_mat(getArg(p,fe), &ml)) >= 0)
2134 break;
2135
2136 /* delta* operator have a ins bat as last argument, we move the inserts into the last delta statement, ie
2137 * all but last need to remove one argument */
2138 if (match == 3 && bats == 4 && isDelta(p) &&
2139 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 &&
2140 (n=is_a_mat(getArg(p,fn), &ml)) >= 0 &&
2141 (o=is_a_mat(getArg(p,fo), &ml)) >= 0){
2142 if ((r = mat_delta(&ml, mb, p, ml.v, m, n, o, -1, fm, fn, fo, 0)) != NULL) {
2143 actions++;
2144 } else {
2145 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2146 goto cleanup;
2147 }
2148
2149 continue;
2150 }
2151 if (match == 4 && bats == 5 && isDelta(p) &&
2152 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 &&
2153 (n=is_a_mat(getArg(p,fn), &ml)) >= 0 &&
2154 (o=is_a_mat(getArg(p,fo), &ml)) >= 0 &&
2155 (e=is_a_mat(getArg(p,fe), &ml)) >= 0){
2156 if ((r = mat_delta(&ml, mb, p, ml.v, m, n, o, e, fm, fn, fo, fe)) != NULL) {
2157 actions++;
2158 } else {
2159 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2160 goto cleanup;
2161 }
2162 continue;
2163 }
2164
2165 /* select on insert, should use last tid only */
2166 if (match == 1 && fm == 2 && isSelect(p) && p->retc == 1 &&
2167 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 &&
2168 !ml.v[m].packed && /* not packed yet */
2169 was_a_mat(getArg(p,fm-1), &ml) < 0){ /* not previously packed */
2170 if((r = copyInstruction(p)) == NULL) {
2171 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2172 goto cleanup;
2173 }
2174 getArg(r, fm) = getArg(ml.v[m].mi, ml.v[m].mi->argc-1);
2175 pushInstruction(mb, r);
2176 actions++;
2177 continue;
2178 }
2179
2180 /* select on update, with nil bat */
2181 if (match == 1 && fm == 1 && isSelect(p) && p->retc == 1 &&
2182 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 && bats == 2 &&
2183 isaBatType(getArgType(mb,p,2)) && isVarConstant(mb,getArg(p,2)) &&
2184 is_bat_nil(getVarConstant(mb,getArg(p,2)).val.bval)) {
2185 if ((r = mat_apply1(mb, p, &ml, m, fm)) != NULL) {
2186 if(mat_add(&ml, r, mat_type(ml.v, m), getFunctionId(p))) {
2187 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2188 goto cleanup;
2189 }
2190 } else {
2191 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2192 goto cleanup;
2193 }
2194 actions++;
2195 continue;
2196 }
2197
2198
2199 if (match == 3 && bats == 3 && (isFragmentGroup(p) || isFragmentGroup2(p) || isMapOp(p)) && p->retc != 2 &&
2200 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 &&
2201 (n=is_a_mat(getArg(p,fn), &ml)) >= 0 &&
2202 (o=is_a_mat(getArg(p,fo), &ml)) >= 0){
2203 assert(ml.v[m].mi->argc == ml.v[n].mi->argc);
2204 if(mat_apply3(mb, p, &ml, m, n, o, fm, fn, fo)) {
2205 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2206 goto cleanup;
2207 }
2208 actions++;
2209 continue;
2210 }
2211 if (match == 2 && bats == 2 && (isFragmentGroup(p) || isFragmentGroup2(p) || isMapOp(p)) && p->retc != 2 &&
2212 (m=is_a_mat(getArg(p,fm), &ml)) >= 0 &&
2213 (n=is_a_mat(getArg(p,fn), &ml)) >= 0){
2214 assert(ml.v[m].mi->argc == ml.v[n].mi->argc);
2215 if(mat_apply2(&ml, mb, p, ml.v, m, n, fm, fn)) {
2216 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2217 goto cleanup;
2218 }
2219 actions++;
2220 continue;
2221 }
2222
2223 if (match == 1 && bats == 1 && (isFragmentGroup(p) || isMapOp(p) ||
2224 (!getModuleId(p) && !getFunctionId(p) && p->barrier == 0 /* simple assignment */)) && p->retc != 2 &&
2225 (m=is_a_mat(getArg(p,fm), &ml)) >= 0){
2226 if ((r = mat_apply1(mb, p, &ml, m, fm)) != NULL) {
2227 if(mat_add(&ml, r, mat_type(ml.v, m), getFunctionId(p))) {
2228 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2229 goto cleanup;
2230 }
2231 } else {
2232 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2233 goto cleanup;
2234 }
2235 actions++;
2236 continue;
2237 }
2238
2239 /*
2240 * All other instructions should be checked for remaining MAT dependencies.
2241 * It requires MAT materialization.
2242 */
2243 if( OPTdebug & OPTmergetable){
2244 fprintf(stderr, "# %s.%s %d\n", getModuleId(p), getFunctionId(p), match);
2245 }
2246
2247 for (k = p->retc; k<p->argc; k++) {
2248 if((m=is_a_mat(getArg(p,k), &ml)) >= 0){
2249 mat_pack(mb, &ml, m);
2250 }
2251 }
2252
2253 cp = copyInstruction(p);
2254 if(!cp) {
2255 msg = createException(MAL,"optimizer.mergetable",SQLSTATE(HY001) MAL_MALLOC_FAIL);
2256 goto cleanup;
2257 }
2258 pushInstruction(mb, cp);
2259 }
2260 (void) stk;
2261 chkTypes(cntxt->usermodule,mb, TRUE);
2262 if( mb->errors != MAL_SUCCEED)
2263 goto cleanup;
2264
2265 if( OPTdebug & OPTmergetable)
2266 {
2267 fprintf(stderr,"#Result of multi table optimizer\n");
2268 chkTypes(cntxt->usermodule, mb, FALSE);
2269 chkFlow(mb);
2270 chkDeclarations(mb);
2271 fprintFunction(stderr, mb, 0, LIST_MAL_ALL);
2272 }
2273
2274 if ( mb->errors == MAL_SUCCEED) {
2275 for(i=0; i<slimit; i++)
2276 if (old[i])
2277 freeInstruction(old[i]);
2278 GDKfree(old);
2279 }
2280 for (i=0; i<ml.top; i++) {
2281 if (ml.v[i].mi && !ml.v[i].pushed)
2282 freeInstruction(ml.v[i].mi);
2283 }
2284cleanup:
2285 if (ml.v) GDKfree(ml.v);
2286 if (ml.horigin) GDKfree(ml.horigin);
2287 if (ml.torigin) GDKfree(ml.torigin);
2288 if (ml.vars) GDKfree(ml.vars);
2289 /* Defense line against incorrect plans */
2290 if( actions > 0 && msg == MAL_SUCCEED){
2291 chkTypes(cntxt->usermodule, mb, FALSE);
2292 chkFlow(mb);
2293 chkDeclarations(mb);
2294 }
2295 /* keep all actions taken as a post block comment */
2296 usec = GDKusec()- usec;
2297 snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec","mergetable",actions, usec);
2298 newComment(mb,buf);
2299 if( actions >= 0)
2300 addtoMalBlkHistory(mb);
2301
2302 if( OPTdebug & OPTmergetable){
2303 fprintf(stderr, "#MERGETABLE optimizer exit\n");
2304 fprintFunction(stderr, mb, 0, LIST_MAL_ALL);
2305 }
2306 return msg;
2307}
2308