1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9 /* (c) M. Kersten
10 */
11#include "monetdb_config.h"
12#include "opt_prelude.h"
13#include "opt_support.h"
14#include "mal_interpreter.h"
15#include "mal_listing.h"
16#include "mal_debugger.h"
17#include "opt_multiplex.h"
18#include "optimizer_private.h"
19#include "manifold.h"
20
21/*
22 * Optimizer catalog with runtime statistics;
23 */
24struct OPTcatalog {
25 char *name;
26 int enabled;
27 int calls;
28 int actions;
29} optcatalog[]= {
30{"aliases", 0, 0, 0},
31{"coercions", 0, 0, 0},
32{"commonTerms", 0, 0, 0},
33{"constants", 0, 0, 0},
34{"costModel", 0, 0, 0},
35{"dataflow", 0, 0, 0},
36{"deadcode", 0, 0, 0},
37{"emptybind", 0, 0, 0},
38{"evaluate", 0, 0, 0},
39{"garbage", 0, 0, 0},
40{"generator", 0, 0, 0},
41{"history", 0, 0, 0},
42{"inline", 0, 0, 0},
43{"projectionpath", 0, 0, 0},
44{"jit", 0, 0, 0},
45{"json", 0, 0, 0},
46{"macro", 0, 0, 0},
47{"matpack", 0, 0, 0},
48{"mergetable", 0, 0, 0},
49{"mitosis", 0, 0, 0},
50{"multiplex", 0, 0, 0},
51{"oltp", 0, 0, 0},
52{"postfix", 0, 0, 0},
53{"reduce", 0, 0, 0},
54{"remap", 0, 0, 0},
55{"remote", 0, 0, 0},
56{"reorder", 0, 0, 0},
57{"wlcr", 0, 0, 0},
58{"pushselect", 0, 0, 0},
59{ 0, 0, 0, 0}
60};
61
62/* some optimizers can only be applied once.
63 * The optimizer trace at the end of the MAL block
64 * can be used to check for this.
65 */
66int
67optimizerIsApplied(MalBlkPtr mb, str optname)
68{
69 InstrPtr p;
70 int i;
71 for( i = mb->stop; i < mb->ssize; i++){
72 p = getInstrPtr(mb,i);
73 if (p && getModuleId(p) == optimizerRef && p->token == REMsymbol && strcmp(getFunctionId(p),optname) == 0)
74 return 1;
75 }
76 return 0;
77}
78
79/*
80 * Limit the loop count in the optimizer to guard against indefinite
81 * recursion, provided the optimizer does not itself generate
82 * a growing list.
83 */
84str
85optimizeMALBlock(Client cntxt, MalBlkPtr mb)
86{
87 InstrPtr p;
88 int pc;
89 int qot = 0;
90 str msg = MAL_SUCCEED;
91 int cnt = 0;
92 int actions = 0;
93 lng clk = GDKusec();
94 char buf[256];
95
96 /* assume the type and flow have been checked already */
97 /* SQL functions intended to be inlined should not be optimized */
98 if ( mb->inlineProp)
99 return 0;
100
101 mb->optimize = 0;
102 if (mb->errors)
103 throw(MAL, "optimizer.MALoptimizer", SQLSTATE(42000) "Start with inconsistent MAL plan");
104
105 // strong defense line, assure that MAL plan is initially correct
106 if( mb->errors == 0 && mb->stop > 1){
107 resetMalBlk(mb, mb->stop);
108 chkTypes(cntxt->usermodule, mb, FALSE);
109 chkFlow(mb);
110 chkDeclarations(mb);
111 if( msg)
112 return msg;
113 if( mb->errors != MAL_SUCCEED){
114 msg = mb->errors;
115 mb->errors = MAL_SUCCEED;
116 return msg;
117 }
118 }
119
120 /* Optimizers may massage the plan in such a way that a new pass is needed.
121 * When no optimzer call is found, then terminate. */
122 do {
123 qot = 0;
124 for (pc = 0; pc < mb->stop; pc++) {
125 p = getInstrPtr(mb, pc);
126 if (getModuleId(p) == optimizerRef && p->fcn && p->token != REMsymbol) {
127 /* all optimizers should behave like patterns */
128 /* However, we don't have a stack now */
129 qot++;
130 actions++;
131 msg = (str) (*p->fcn) (cntxt, mb, 0, p);
132 if (msg) {
133 str place = getExceptionPlace(msg);
134 str nmsg = NULL;
135 if (place){
136 nmsg = createException(getExceptionType(msg), place, "%s", getExceptionMessageAndState(msg));
137 GDKfree(place);
138 }
139 if (nmsg ) {
140 freeException(msg);
141 msg = nmsg;
142 }
143 goto wrapup;
144 }
145 if (cntxt->mode == FINISHCLIENT){
146 mb->optimize = GDKusec() - clk;
147 throw(MAL, "optimizeMALBlock", SQLSTATE(42000) "prematurely stopped client");
148 }
149 pc= -1;
150 }
151 }
152 } while (qot && cnt++ < mb->stop);
153
154wrapup:
155 /* Keep the total time spent on optimizing the plan for inspection */
156 if(actions > 0 && msg == MAL_SUCCEED){
157 mb->optimize = GDKusec() - clk;
158 snprintf(buf, 256, "%-20s actions=%2d time=" LLFMT " usec", "total", actions, mb->optimize);
159 newComment(mb, buf);
160 }
161 if (cnt >= mb->stop)
162 throw(MAL, "optimizer.MALoptimizer", SQLSTATE(42000) OPTIMIZER_CYCLE);
163 return msg;
164}
165
166/*
167 * The default MAL optimizer includes a final call to
168 * the multiplex expander.
169 * We should take care of functions marked as 'inline',
170 * because they should be kept in raw form.
171 * Their optimization takes place after inlining.
172 */
173str
174MALoptimizer(Client c)
175{
176 str msg;
177
178 if ( c->curprg->def->inlineProp)
179 return MAL_SUCCEED;
180 // only a signature statement can be skipped
181 if (c ->curprg->def->stop == 1)
182 return MAL_SUCCEED;
183 msg= optimizeMALBlock(c, c->curprg->def);
184 if( msg == MAL_SUCCEED)
185 msg = OPTmultiplexSimple(c, c->curprg->def);
186 return msg;
187}
188
189/* Only used by opt_commonTerms! */
190int hasSameSignature(MalBlkPtr mb, InstrPtr p, InstrPtr q){
191 int i;
192
193 if ( getFunctionId(q) == getFunctionId(p) &&
194 getModuleId(q) == getModuleId(p) &&
195 getFunctionId(q) != 0 &&
196 getModuleId(q) != 0) {
197 if( q->retc != p->retc || q->argc != p->argc)
198 return FALSE;
199 for( i=0; i < p->argc; i++)
200 if (getArgType(mb,p,i) != getArgType(mb,q,i))
201 return FALSE;
202 return TRUE;
203 }
204 return FALSE;
205}
206
207/* Only used by opt_commonTerms! */
208int hasSameArguments(MalBlkPtr mb, InstrPtr p, InstrPtr q)
209{ int k;
210 int (*cmp)(const void *, const void *);
211 VarPtr w,u;
212
213 (void) mb;
214 if( p->retc != q->retc || p->argc != q->argc)
215 return FALSE;
216 /* heuristic, because instructions are linked using last constant argument */
217 for(k=p->argc-1; k >= p->retc; k--)
218 if( q->argv[k]!= p->argv[k]){
219 if( isVarConstant(mb,getArg(p,k)) &&
220 isVarConstant(mb,getArg(q,k)) ) {
221 w= getVar(mb,getArg(p,k));
222 u= getVar(mb,getArg(q,k));
223 cmp = ATOMcompare(w->value.vtype);
224 if ( w->value.vtype == u->value.vtype &&
225 (*cmp)(VALptr(&w->value), VALptr(&u->value)) == 0)
226 continue;
227 }
228 return FALSE;
229 }
230 return TRUE;
231}
232
233/*
234 * If two instructions have elements in common in their target list,
235 * it means a variable is re-initialized and should not be considered
236 * an alias.
237 */
238int
239hasCommonResults(InstrPtr p, InstrPtr q)
240{
241 int k, l;
242
243 for (k = 0; k < p->retc; k++)
244 for (l = 0; l < q->retc; l++)
245 if (p->argv[k] == q->argv[l])
246 return TRUE;
247 return FALSE;
248}
249/*
250 * Dependency between target variables and arguments can be
251 * checked with isDependent().
252 */
253static int
254isDependent(InstrPtr p, InstrPtr q){
255 int i,j;
256 for(i= 0; i<q->retc; i++)
257 for(j= p->retc; j<p->argc; j++)
258 if( getArg(q,i)== getArg(p,j)) return TRUE;
259 return FALSE;
260}
261/*
262 * The safety property should be relatively easy to determine for
263 * each MAL function. This calls for accessing the function MAL block
264 * and to inspect the arguments of the signature.
265 */
266int
267isUnsafeFunction(InstrPtr q)
268{
269 InstrPtr p;
270
271 if (q->fcn == 0 || getFunctionId(q) == 0 || q->blk == NULL)
272 return FALSE;
273 p= getInstrPtr(q->blk,0);
274 if( p->retc== 0)
275 return TRUE;
276 return q->blk->unsafeProp;
277}
278
279int
280isSealedFunction(InstrPtr q)
281{
282 InstrPtr p;
283
284 if (q->fcn == 0 || getFunctionId(q) == 0 || q->blk == NULL)
285 return FALSE;
286 p= getInstrPtr(q->blk,0);
287 if( p->retc== 0)
288 return TRUE;
289 return q->blk->sealedProp;
290}
291
292/*
293 * Instructions are unsafe if one of the arguments is also mentioned
294 * in the result list. Alternatively, the 'unsafe' property is set
295 * for the function call itself.
296 */
297int
298isUnsafeInstruction(InstrPtr q)
299{
300 int j, k;
301
302 for (j = 0; j < q->retc; j++)
303 for (k = q->retc; k < q->argc; k++)
304 if (q->argv[k] == q->argv[j])
305 return TRUE;
306 return FALSE;
307}
308
309/*
310 * Any instruction may block identification of a common
311 * subexpression. It suffices to stumble upon an unsafe function
312 * whose parameter lists has a non-empty intersection with the
313 * targeted instruction.
314 * To illustrate, consider the sequence
315 * @example
316 * L1 := f(A,B,C);
317 * ...
318 * G1 := g(D,E,F);
319 * ...
320 * l2:= f(A,B,C);
321 * ...
322 * L2:= h()
323 * @end example
324 *
325 * The instruction G1:=g(D,E,F) is blocking if G1 is an alias
326 * for @verb{ { }A,B,C@verb{ } }.
327 * Alternatively, function g() may be unsafe and @verb{ { }D,E,F@verb{ } }
328 * has a non-empty intersection with @verb{ { }A,B,C@verb{ } }.
329 * An alias can only be used later on for readonly (and not be used for a function with side effects).
330 */
331int
332safetyBarrier(InstrPtr p, InstrPtr q)
333{
334 int i,j;
335 if( isDependent(q,p))
336 return TRUE;
337 if (isUnsafeFunction(q)) {
338 for (i = p->retc; i < p->argc; i++)
339 for (j = q->retc; j < q->argc; j++)
340 if (p->argv[i] == q->argv[j]) {
341 /* TODO check safety property of the argument */
342 return TRUE;
343 }
344 }
345 return FALSE;
346}
347
348
349int
350isUpdateInstruction(InstrPtr p){
351 if ( getModuleId(p) == sqlRef &&
352 ( getFunctionId(p) == inplaceRef ||
353 getFunctionId(p) == appendRef ||
354 getFunctionId(p) == updateRef ||
355 getFunctionId(p) == replaceRef ||
356 getFunctionId(p) == clear_tableRef))
357 return TRUE;
358 if ( getModuleId(p) == batRef &&
359 ( getFunctionId(p) == inplaceRef ||
360 getFunctionId(p) == appendRef ||
361 getFunctionId(p) == updateRef ||
362 getFunctionId(p) == replaceRef ||
363 getFunctionId(p) == clear_tableRef))
364 return TRUE;
365 return FALSE;
366}
367int
368hasSideEffects(MalBlkPtr mb, InstrPtr p, int strict)
369{
370 if( getFunctionId(p) == NULL) return FALSE;
371
372/*
373 * Void-returning operations have side-effects and
374 * should be considered as such
375 */
376 if (p->retc == 0 || (p->retc == 1 && getArgType(mb,p,0) == TYPE_void))
377 return TRUE;
378
379/*
380 * Any function marked as unsafe can not be moved around without
381 * affecting its behavior on the program. For example, because they
382 * check for volatile resource levels.
383 */
384 if ( isUnsafeFunction(p))
385 return TRUE;
386
387 /* update instructions have side effects, they can be marked as unsafe */
388 if (isUpdateInstruction(p))
389 return TRUE;
390
391 if ( (getModuleId(p) == batRef || getModuleId(p)==sqlRef) &&
392 (getFunctionId(p) == setAccessRef ||
393 getFunctionId(p) == setWriteModeRef ))
394 return TRUE;
395
396 if (getModuleId(p) == malRef && getFunctionId(p) == multiplexRef)
397 return FALSE;
398
399 if( getModuleId(p) == ioRef ||
400 getModuleId(p) == streamsRef ||
401 getModuleId(p) == bstreamRef ||
402 getModuleId(p) == mdbRef ||
403 getModuleId(p) == malRef ||
404 getModuleId(p) == remapRef ||
405 getModuleId(p) == optimizerRef ||
406 getModuleId(p) == lockRef ||
407 getModuleId(p) == semaRef ||
408 getModuleId(p) == alarmRef)
409 return TRUE;
410
411 if( getModuleId(p) == pyapiRef ||
412 getModuleId(p) == pyapimapRef ||
413 getModuleId(p) == pyapi3Ref ||
414 getModuleId(p) == pyapi3mapRef ||
415 getModuleId(p) == rapiRef ||
416 getModuleId(p) == capiRef)
417 return TRUE;
418
419 if (getModuleId(p) == sqlcatalogRef)
420 return TRUE;
421 if (getModuleId(p) == sqlRef){
422 if (getFunctionId(p) == tidRef) return FALSE;
423 if (getFunctionId(p) == deltaRef) return FALSE;
424 if (getFunctionId(p) == subdeltaRef) return FALSE;
425 if (getFunctionId(p) == projectdeltaRef) return FALSE;
426 if (getFunctionId(p) == bindRef) return FALSE;
427 if (getFunctionId(p) == bindidxRef) return FALSE;
428 if (getFunctionId(p) == binddbatRef) return FALSE;
429 if (getFunctionId(p) == columnBindRef) return FALSE;
430 if (getFunctionId(p) == copy_fromRef) return FALSE;
431 /* assertions are the end-point of a flow path */
432 if (getFunctionId(p) == not_uniqueRef) return FALSE;
433 if (getFunctionId(p) == zero_or_oneRef) return FALSE;
434 if (getFunctionId(p) == mvcRef) return FALSE;
435 if (getFunctionId(p) == singleRef) return FALSE;
436 return TRUE;
437 }
438 if( getModuleId(p) == mapiRef){
439 if( getFunctionId(p) == rpcRef)
440 return TRUE;
441 if( getFunctionId(p) == reconnectRef)
442 return TRUE;
443 if( getFunctionId(p) == disconnectRef)
444 return TRUE;
445 }
446 if (strict && getFunctionId(p) == newRef &&
447 getModuleId(p) != groupRef )
448 return TRUE;
449
450 if ( getModuleId(p) == sqlcatalogRef)
451 return TRUE;
452 if ( getModuleId(p) == oltpRef)
453 return TRUE;
454 if ( getModuleId(p) == wlrRef)
455 return TRUE;
456 if ( getModuleId(p) == wlcRef)
457 return TRUE;
458 if ( getModuleId(p) == remoteRef)
459 return TRUE;
460 return FALSE;
461}
462
463/* Void returning functions always have side-effects.
464 */
465int
466mayhaveSideEffects(Client cntxt, MalBlkPtr mb, InstrPtr p, int strict)
467{
468 int tpe;
469 tpe= getVarType(mb,getArg(p,0));
470 if( tpe == TYPE_void)
471 return TRUE;
472 if (getModuleId(p) != malRef || getFunctionId(p) != multiplexRef)
473 return hasSideEffects(mb, p, strict);
474 // a manifold instruction can also have side effects.
475 // for this to check we need the function signature, not its function address.
476 // The easy way out now is to consider all manifold instructions as potentially having side effects.
477 if ( getModuleId(p) == malRef && getFunctionId(p) == manifoldRef)
478 return TRUE;
479 if (MANIFOLDtypecheck(cntxt,mb,p,1) == NULL)
480 return TRUE;
481 return FALSE;
482}
483
484/*
485 * Side-effect free functions are crucial for several operators.
486 */
487int
488isSideEffectFree(MalBlkPtr mb){
489 int i;
490 for(i=1; i< mb->stop && getInstrPtr(mb,i)->token != ENDsymbol; i++){
491 if( hasSideEffects(mb,getInstrPtr(mb,i), TRUE))
492 return FALSE;
493 }
494 return TRUE;
495}
496
497/*
498 * Breaking up a MAL program into pieces for distributed processing requires
499 * identification of (partial) blocking instructions. A conservative
500 * definition can be used.
501 */
502int
503isBlocking(InstrPtr p)
504{
505 if (blockStart(p) || blockExit(p) || blockCntrl(p))
506 return TRUE;
507
508 if ( getFunctionId(p) == sortRef )
509 return TRUE;
510
511 if( getModuleId(p) == aggrRef ||
512 getModuleId(p) == groupRef ||
513 getModuleId(p) == sqlcatalogRef )
514 return TRUE;
515 return FALSE;
516}
517
518/*
519 * Used in the merge table optimizer. It is built incrementally
520 * and should be conservative.
521 */
522
523static int
524isOrderDepenent(InstrPtr p)
525{
526 if( getModuleId(p) != batsqlRef)
527 return 0;
528 if ( getFunctionId(p) == differenceRef ||
529 getFunctionId(p) == window_boundRef ||
530 getFunctionId(p) == row_numberRef ||
531 getFunctionId(p) == rankRef ||
532 getFunctionId(p) == dense_rankRef ||
533 getFunctionId(p) == percent_rankRef ||
534 getFunctionId(p) == cume_distRef ||
535 getFunctionId(p) == ntileRef ||
536 getFunctionId(p) == first_valueRef ||
537 getFunctionId(p) == last_valueRef ||
538 getFunctionId(p) == nth_valueRef ||
539 getFunctionId(p) == lagRef ||
540 getFunctionId(p) == leadRef)
541 return 1;
542 return 0;
543}
544
545int isMapOp(InstrPtr p){
546 if (isUnsafeFunction(p) || isSealedFunction(p))
547 return 0;
548 return getModuleId(p) &&
549 ((getModuleId(p) == malRef && getFunctionId(p) == multiplexRef) ||
550 (getModuleId(p) == malRef && getFunctionId(p) == manifoldRef) ||
551 (getModuleId(p) == batcalcRef) ||
552 (getModuleId(p) != batcalcRef && getModuleId(p) != batRef && strncmp(getModuleId(p), "bat", 3) == 0) ||
553 (getModuleId(p) == mkeyRef)) && !isOrderDepenent(p) &&
554 getModuleId(p) != batrapiRef &&
555 getModuleId(p) != batpyapiRef &&
556 getModuleId(p) != batpyapi3Ref &&
557 getModuleId(p) != batcapiRef;
558}
559
560int isLikeOp(InstrPtr p){
561 return (getModuleId(p) == batalgebraRef &&
562 (getFunctionId(p) == likeRef ||
563 getFunctionId(p) == not_likeRef ||
564 getFunctionId(p) == ilikeRef ||
565 getFunctionId(p) == not_ilikeRef));
566}
567
568int
569isTopn(InstrPtr p)
570{
571 return ((getModuleId(p) == algebraRef && getFunctionId(p) == firstnRef) ||
572 isSlice(p));
573}
574
575int
576isSlice(InstrPtr p)
577{
578 return (getModuleId(p) == algebraRef &&
579 (getFunctionId(p) == subsliceRef || getFunctionId(p) == sliceRef));
580}
581
582int
583isSample(InstrPtr p)
584{
585 return (getModuleId(p) == sampleRef && getFunctionId(p) == subuniformRef);
586}
587
588int isOrderby(InstrPtr p){
589 return getModuleId(p) == algebraRef &&
590 getFunctionId(p) == sortRef;
591}
592
593int
594isMatJoinOp(InstrPtr p)
595{
596 return (isSubJoin(p) || (getModuleId(p) == algebraRef &&
597 (getFunctionId(p) == crossRef ||
598 getFunctionId(p) == joinRef ||
599 getFunctionId(p) == antijoinRef || /* is not mat save */
600 getFunctionId(p) == thetajoinRef ||
601 getFunctionId(p) == bandjoinRef ||
602 getFunctionId(p) == rangejoinRef)
603 ));
604}
605
606int
607isMatLeftJoinOp(InstrPtr p)
608{
609 return (getModuleId(p) == algebraRef &&
610 getFunctionId(p) == leftjoinRef);
611}
612
613int isDelta(InstrPtr p){
614 return
615 (getModuleId(p)== sqlRef && (
616 getFunctionId(p)== deltaRef ||
617 getFunctionId(p)== projectdeltaRef ||
618 getFunctionId(p)== subdeltaRef
619 )
620 );
621}
622
623int isFragmentGroup2(InstrPtr p){
624 return
625 (getModuleId(p)== algebraRef && (
626 getFunctionId(p)== projectionRef
627 )) ||
628 (getModuleId(p)== batRef && (
629 getFunctionId(p)== mergecandRef ||
630 getFunctionId(p)== intersectcandRef ||
631 getFunctionId(p)== diffcandRef
632 )
633 );
634}
635
636int isSelect(InstrPtr p)
637{
638 char *func = getFunctionId(p);
639 size_t l = func?strlen(func):0;
640
641 return (l >= 6 && strcmp(func+l-6,"select") == 0);
642}
643
644int isSubJoin(InstrPtr p)
645{
646 char *func = getFunctionId(p);
647 size_t l = func?strlen(func):0;
648
649 return (l >= 7 && strcmp(func+l-7,"join") == 0);
650}
651
652int isMultiplex(InstrPtr p)
653{
654 return (malRef && (getModuleId(p) == malRef || getModuleId(p) == batmalRef) &&
655 getFunctionId(p) == multiplexRef);
656}
657
658int isFragmentGroup(InstrPtr p){
659 return
660 (getModuleId(p)== algebraRef && (
661 getFunctionId(p)== projectRef ||
662 getFunctionId(p)== selectNotNilRef
663 )) ||
664 isSelect(p) ||
665 (getModuleId(p)== batRef && (
666 getFunctionId(p)== mirrorRef
667 ));
668}
669
670/*
671 * Some optimizers are interdependent (e.g. mitosis ), which
672 * requires inspection of the pipeline attached to a MAL block.
673 */
674int
675isOptimizerEnabled(MalBlkPtr mb, str opt)
676{
677 int i;
678 InstrPtr q;
679
680 for (i= mb->stop-1; i > 0; i--){
681 q= getInstrPtr(mb,i);
682 if ( q->token == ENDsymbol)
683 break;
684 if ( getModuleId(q) == optimizerRef &&
685 getFunctionId(q) == opt)
686 return 1;
687 }
688 return 0;
689}
690
691