1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * author M.L. Kersten
11 * The default SQL optimizer pipeline can be set per server. See the
12 * optpipe setting in monetdb(1) when using merovingian. During SQL
13 * initialization, the optimizer pipeline is checked against the
14 * dependency information maintained in the optimizer library to ensure
15 * there are no conflicts and at least the pre-requisite optimizers are
16 * used. The setting of sql_optimizer can be either the list of
17 * optimizers to run, or one or more variables containing the optimizer
18 * pipeline to run. The latter is provided for readability purposes
19 * only.
20 */
21#include "monetdb_config.h"
22#include "opt_pipes.h"
23#include "mal_client.h"
24#include "mal_instruction.h"
25#include "mal_function.h"
26#include "mal_listing.h"
27#include "mal_linker.h"
28
29#define MAXOPTPIPES 64
30
31static struct PIPELINES {
32 char *name;
33 char *def;
34 char *status;
35 char *prerequisite;
36 MalBlkPtr mb;
37 char builtin;
38} pipes[MAXOPTPIPES] = {
39/* The minimal pipeline necessary by the server to operate correctly
40 *
41 * NOTE:
42 * If you change the minimal pipe, please also update the man page
43 * (see tools/mserver/mserver5.1) accordingly!
44 */
45 {"minimal_pipe",
46 "optimizer.inline();"
47 "optimizer.remap();"
48 "optimizer.deadcode();"
49 "optimizer.multiplex();"
50 "optimizer.generator();"
51 "optimizer.profiler();"
52 "optimizer.candidates();"
53 "optimizer.garbageCollector();",
54 "stable", NULL, NULL, 1},
55/* The default pipe line contains as of Feb2010
56 * mitosis-mergetable-reorder, aimed at large tables and improved
57 * access locality.
58 *
59 * NOTE:
60 * If you change the default pipe, please also update the no_mitosis pipe
61 * and sequential pipe (see below, as well as the man page (see
62 * tools/mserver/mserver5.1) accordingly!
63 */
64 {"default_pipe",
65 "optimizer.inline();"
66 "optimizer.remap();"
67 "optimizer.costModel();"
68 "optimizer.coercions();"
69 "optimizer.aliases();"
70 "optimizer.evaluate();"
71 "optimizer.emptybind();"
72 "optimizer.pushselect();"
73 "optimizer.aliases();"
74 "optimizer.mitosis();"
75 "optimizer.mergetable();"
76 "optimizer.deadcode();"
77 "optimizer.aliases();"
78 "optimizer.constants();"
79 "optimizer.commonTerms();"
80 "optimizer.projectionpath();"
81 "optimizer.deadcode();"
82 "optimizer.reorder();"
83 "optimizer.matpack();"
84 "optimizer.dataflow();"
85 "optimizer.querylog();"
86 "optimizer.multiplex();"
87 "optimizer.generator();"
88 "optimizer.profiler();"
89 "optimizer.candidates();"
90 "optimizer.deadcode();"
91 "optimizer.postfix();"
92// "optimizer.jit();" awaiting the new batcalc api
93 "optimizer.wlc();"
94 "optimizer.garbageCollector();",
95 "stable", NULL, NULL, 1},
96/*
97 * Optimistic concurreny control in general leads to more transaction failures
98 * in an OLTP setting. The partial solution provided is to give out
99 * advisory locks and delay updates until they are released or timeout.
100 */
101 {"oltp_pipe",
102 "optimizer.inline();"
103 "optimizer.remap();"
104 "optimizer.costModel();"
105 "optimizer.coercions();"
106 "optimizer.evaluate();"
107 "optimizer.emptybind();"
108 "optimizer.pushselect();"
109 "optimizer.aliases();"
110 "optimizer.mitosis();"
111 "optimizer.mergetable();"
112 "optimizer.deadcode();"
113 "optimizer.aliases();"
114 "optimizer.constants();"
115 "optimizer.commonTerms();"
116 "optimizer.projectionpath();"
117 "optimizer.deadcode();"
118 "optimizer.reorder();"
119 "optimizer.matpack();"
120 "optimizer.dataflow();"
121 "optimizer.querylog();"
122 "optimizer.multiplex();"
123 "optimizer.generator();"
124 "optimizer.profiler();"
125 "optimizer.candidates();"
126 "optimizer.deadcode();"
127 "optimizer.postfix();"
128// "optimizer.jit();" awaiting the new batcalc api
129 "optimizer.oltp();"
130 "optimizer.wlc();"
131 "optimizer.garbageCollector();",
132 "stable", NULL, NULL, 1},
133/*
134 * Volcano style execution produces a sequence of blocks from the source relation
135 */
136 {"volcano_pipe",
137 "optimizer.inline();"
138 "optimizer.remap();"
139 "optimizer.costModel();"
140 "optimizer.coercions();"
141 "optimizer.aliases();"
142 "optimizer.evaluate();"
143 "optimizer.emptybind();"
144 "optimizer.pushselect();"
145 "optimizer.aliases();"
146 "optimizer.mitosis();"
147 "optimizer.mergetable();"
148 "optimizer.deadcode();"
149 "optimizer.aliases();"
150 "optimizer.constants();"
151 "optimizer.commonTerms();"
152 "optimizer.projectionpath();"
153 "optimizer.deadcode();"
154 "optimizer.reorder();"
155 "optimizer.matpack();"
156 "optimizer.dataflow();"
157 "optimizer.querylog();"
158 "optimizer.multiplex();"
159 "optimizer.generator();"
160 "optimizer.volcano();"
161 "optimizer.profiler();"
162 "optimizer.candidates();"
163 "optimizer.deadcode();"
164 "optimizer.postfix();"
165// "optimizer.jit();" awaiting the new batcalc api
166 "optimizer.wlc();"
167 "optimizer.garbageCollector();",
168 "stable", NULL, NULL, 1},
169/* The no_mitosis pipe line is (and should be kept!) identical to the
170 * default pipeline, except that optimizer mitosis is omitted. It is
171 * used mainly to make some tests work deterministically, and to check
172 * / debug whether "unexpected" problems are related to mitosis
173 * (and/or mergetable).
174 *
175 * NOTE:
176 * If you change the no_mitosis pipe, please also update the man page
177 * (see tools/mserver/mserver5.1) accordingly!
178 */
179 {"no_mitosis_pipe",
180 "optimizer.inline();"
181 "optimizer.remap();"
182 "optimizer.costModel();"
183 "optimizer.coercions();"
184 "optimizer.aliases();"
185 "optimizer.evaluate();"
186 "optimizer.emptybind();"
187 "optimizer.pushselect();"
188 "optimizer.aliases();"
189 "optimizer.mergetable();"
190 "optimizer.deadcode();"
191 "optimizer.aliases();"
192 "optimizer.constants();"
193 "optimizer.commonTerms();"
194 "optimizer.projectionpath();"
195 "optimizer.deadcode();"
196 "optimizer.reorder();"
197 "optimizer.matpack();"
198 "optimizer.dataflow();"
199 "optimizer.querylog();"
200 "optimizer.multiplex();"
201 "optimizer.generator();"
202 "optimizer.profiler();"
203 "optimizer.candidates();"
204 "optimizer.deadcode();"
205 "optimizer.postfix();"
206// "optimizer.jit();" awaiting the new batcalc api
207 "optimizer.wlc();"
208 "optimizer.garbageCollector();",
209 "stable", NULL, NULL, 1},
210/* The sequential pipe line is (and should be kept!) identical to the
211 * default pipeline, except that optimizers mitosis & dataflow are
212 * omitted. It is use mainly to make some tests work
213 * deterministically, i.e., avoid ambigious output, by avoiding
214 * parallelism.
215 *
216 * NOTE:
217 * If you change the sequential pipe, please also update the man page
218 * (see tools/mserver/mserver5.1) accordingly!
219 */
220 {"sequential_pipe",
221 "optimizer.inline();"
222 "optimizer.remap();"
223 "optimizer.costModel();"
224 "optimizer.coercions();"
225 "optimizer.aliases();"
226 "optimizer.evaluate();"
227 "optimizer.emptybind();"
228 "optimizer.pushselect();"
229 "optimizer.aliases();"
230 "optimizer.mergetable();"
231 "optimizer.deadcode();"
232 "optimizer.aliases();"
233 "optimizer.constants();"
234 "optimizer.commonTerms();"
235 "optimizer.projectionpath();"
236 "optimizer.deadcode();"
237 "optimizer.reorder();"
238 "optimizer.matpack();"
239 "optimizer.querylog();"
240 "optimizer.multiplex();"
241 "optimizer.generator();"
242 "optimizer.profiler();"
243 "optimizer.candidates();"
244 "optimizer.deadcode();"
245 "optimizer.postfix();"
246// "optimizer.jit();" awaiting the new batcalc api
247 "optimizer.wlc();"
248 "optimizer.garbageCollector();",
249 "stable", NULL, NULL, 1},
250/* Experimental pipelines stressing various components under
251 * development. Do not use any of these pipelines in production
252 * settings!
253 */
254/* sentinel */
255 {NULL, NULL, NULL, NULL, NULL, 0}
256};
257
258/*
259 * Debugging the optimizer pipeline",
260 * The best way is to use mdb and inspect the information gathered",
261 * during the optimization phase. Several optimizers produce more",
262 * intermediate information, which may shed light on the details. The",
263 * opt_debug bitvector controls their output. It can be set to a",
264 * pipeline or a comma separated list of optimizers you would like to",
265 * trace. It is a server wide property and can not be set dynamically,",
266 * as it is intended for internal use.",
267 */
268#include "opt_pipes.h"
269#include "optimizer_private.h"
270
271static MT_Lock pipeLock = MT_LOCK_INITIALIZER("pipeLock");
272
273/* the session_pipe is the one defined by the user */
274str
275addPipeDefinition(Client cntxt, const char *name, const char *pipe)
276{
277 int i;
278 str msg;
279 struct PIPELINES oldpipe;
280
281 MT_lock_set(&pipeLock);
282 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
283 if (strcmp(name, pipes[i].name) == 0)
284 break;
285
286 if (i == MAXOPTPIPES) {
287 MT_lock_unset(&pipeLock);
288 throw(MAL, "optimizer.addPipeDefinition", SQLSTATE(HY001) "Out of slots");
289 }
290 if (pipes[i].name && pipes[i].builtin) {
291 MT_lock_unset(&pipeLock);
292 throw(MAL, "optimizer.addPipeDefinition", SQLSTATE(42000) "No overwrite of built in allowed");
293 }
294
295 /* save old value */
296 oldpipe = pipes[i];
297 pipes[i].name = GDKstrdup(name);
298 pipes[i].def = GDKstrdup(pipe);
299 pipes[i].status = GDKstrdup("experimental");
300 if(pipes[i].name == NULL || pipes[i].def == NULL || pipes[i].status == NULL) {
301 GDKfree(pipes[i].name);
302 GDKfree(pipes[i].def);
303 GDKfree(pipes[i].status);
304 pipes[i].name = oldpipe.name;
305 pipes[i].def = oldpipe.def;
306 pipes[i].status = oldpipe.status;
307 MT_lock_unset(&pipeLock);
308 throw(MAL, "optimizer.addPipeDefinition", SQLSTATE(HY001) MAL_MALLOC_FAIL);
309 }
310 pipes[i].mb = NULL;
311 MT_lock_unset(&pipeLock);
312 msg = compileOptimizer(cntxt, name);
313 if (msg) {
314 /* failed: restore old value */
315 MT_lock_set(&pipeLock);
316 GDKfree(pipes[i].name);
317 GDKfree(pipes[i].def);
318 GDKfree(pipes[i].status);
319 pipes[i] = oldpipe;
320 MT_lock_unset(&pipeLock);
321 } else {
322 /* succeeded: destroy old value */
323 if (oldpipe.name)
324 GDKfree(oldpipe.name);
325 if (oldpipe.def)
326 GDKfree(oldpipe.def);
327 if (oldpipe.mb)
328 freeMalBlk(oldpipe.mb);
329 if (oldpipe.status)
330 GDKfree(oldpipe.status);
331 }
332 return msg;
333}
334
335int
336isOptimizerPipe(const char *name)
337{
338 int i;
339
340 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
341 if (strcmp(name, pipes[i].name) == 0)
342 return TRUE;
343 return FALSE;
344}
345
346str
347getPipeDefinition(str name)
348{
349 int i;
350
351 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
352 if (strcmp(name, pipes[i].name) == 0)
353 return GDKstrdup(pipes[i].def);
354 return NULL;
355}
356
357str
358getPipeCatalog(bat *nme, bat *def, bat *stat)
359{
360 BAT *b, *bn, *bs;
361 int i;
362
363 b = COLnew(0, TYPE_str, 20, TRANSIENT);
364 bn = COLnew(0, TYPE_str, 20, TRANSIENT);
365 bs = COLnew(0, TYPE_str, 20, TRANSIENT);
366 if (b == NULL || bn == NULL || bs == NULL) {
367 BBPreclaim(b);
368 BBPreclaim(bn);
369 BBPreclaim(bs);
370 throw(MAL, "optimizer.getpipeDefinition", SQLSTATE(HY001) MAL_MALLOC_FAIL);
371 }
372
373 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) {
374 if (pipes[i].prerequisite && getAddress(pipes[i].prerequisite) == NULL){
375 BBPreclaim(b);
376 BBPreclaim(bn);
377 BBPreclaim(bs);
378 throw(MAL,"getPipeCatalog", SQLSTATE(HY002) "#MAL.getAddress address of '%s' not found",pipes[i].name);
379 }
380 if (BUNappend(b, pipes[i].name, false) != GDK_SUCCEED ||
381 BUNappend(bn, pipes[i].def, false) != GDK_SUCCEED ||
382 BUNappend(bs, pipes[i].status, false) != GDK_SUCCEED) {
383 BBPreclaim(b);
384 BBPreclaim(bn);
385 BBPreclaim(bs);
386 throw(MAL, "optimizer.getpipeDefinition", SQLSTATE(HY001) MAL_MALLOC_FAIL);
387 }
388 }
389
390 BBPkeepref(*nme = b->batCacheid);
391 BBPkeepref(*def = bn->batCacheid);
392 BBPkeepref(*stat = bs->batCacheid);
393 return MAL_SUCCEED;
394}
395
396static str
397validatePipe(MalBlkPtr mb)
398{
399 int mitosis = FALSE, deadcode = FALSE, mergetable = FALSE, multiplex = FALSE, garbage = FALSE, generator = FALSE, remap = FALSE;
400 int i;
401 InstrPtr p;
402
403 if (mb == NULL )
404 throw(MAL, "optimizer.validate", SQLSTATE(42000) "missing optimizer mal block\n");
405 p = getInstrPtr(mb,1);
406 if (getFunctionId(p) == NULL || idcmp(getFunctionId(p), "inline"))
407 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'inline' should be the first\n");
408
409 for (i = 1; i < mb->stop - 1; i++){
410 p = getInstrPtr(mb,i);
411 if (getFunctionId(getInstrPtr(mb, i)) != NULL) {
412 if (strcmp(getFunctionId(p), "deadcode") == 0)
413 deadcode = TRUE;
414 else if (strcmp(getFunctionId(p), "remap") == 0)
415 remap = TRUE;
416 else if (strcmp(getFunctionId(p), "mitosis") == 0)
417 mitosis = TRUE;
418 else if (strcmp(getFunctionId(p), "mergetable") == 0)
419 mergetable = TRUE;
420 else if (strcmp(getFunctionId(p), "multiplex") == 0)
421 multiplex = TRUE;
422 else if (strcmp(getFunctionId(p), "generator") == 0)
423 generator = TRUE;
424 else if (strcmp(getFunctionId(p), "garbageCollector") == 0)
425 garbage = TRUE;
426 } else
427 throw(MAL, "optimizer.validate", SQLSTATE(42000) "Missing optimizer call\n");
428 }
429
430 if (mitosis == TRUE && mergetable == FALSE)
431 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'mitosis' needs 'mergetable'\n");
432
433 /* several optimizer should be used */
434 if (multiplex == 0)
435 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'multiplex' should be used\n");
436 if (deadcode == FALSE)
437 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'deadcode' should be used at least once\n");
438 if (garbage == FALSE)
439 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'garbageCollector' should be used as the last one\n");
440 if (remap == FALSE)
441 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'remap' should be used\n");
442 if (generator == FALSE)
443 throw(MAL, "optimizer.validate", SQLSTATE(42000) "'generator' should be used\n");
444
445 return MAL_SUCCEED;
446}
447
448static str
449validateOptimizerPipes(void)
450{
451 int i;
452 str msg = MAL_SUCCEED;
453
454 MT_lock_set(&mal_contextLock);
455 for (i = 0; i < MAXOPTPIPES && pipes[i].def; i++)
456 if (pipes[i].mb) {
457 msg = validatePipe(pipes[i].mb);
458 if (msg != MAL_SUCCEED)
459 break;
460 }
461 MT_lock_unset(&mal_contextLock);
462 return msg;
463}
464
465/*
466 * Compile (the first time) an optimizer pipe string
467 * then copy the statements to the end of the MAL plan
468*/
469str
470compileOptimizer(Client cntxt, const char *name)
471{
472 int i, j;
473 char buf[2048];
474 str msg = MAL_SUCCEED;
475 Symbol fcn, compiled;
476
477 MT_lock_set(&pipeLock);
478 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
479 if (strcmp(pipes[i].name, name) == 0 && pipes[i].mb == 0) {
480 /* precompile a pipeline as MAL string */
481 for (j = 0; j < MAXOPTPIPES && pipes[j].def; j++) {
482 if (pipes[j].mb == NULL) {
483 if (pipes[j].prerequisite && getAddress(pipes[j].prerequisite) == NULL)
484 continue;
485 snprintf(buf,2048,"function optimizer.%s(); %s;end %s;", pipes[j].name,pipes[j].def,pipes[j].name);
486 msg = compileString(&fcn,cntxt, buf);
487 if( msg == MAL_SUCCEED){
488 compiled = findSymbol(cntxt->usermodule,getName("optimizer"), getName(pipes[j].name));
489 if( compiled){
490 pipes[j].mb = compiled->def;
491 //fprintFunction(stderr, pipes[j].mb, 0, LIST_MAL_ALL);
492 }
493 }
494 }
495 }
496 if (msg != MAL_SUCCEED ||
497 (msg = validateOptimizerPipes()) != MAL_SUCCEED)
498 break;
499 }
500 MT_lock_unset(&pipeLock);
501 return msg;
502}
503
504str
505compileAllOptimizers(Client cntxt)
506{
507 int i;
508 str msg = MAL_SUCCEED;
509
510 for(i=0;pipes[i].def && msg == MAL_SUCCEED; i++){
511 msg =compileOptimizer(cntxt,pipes[i].name);
512 }
513 return msg;
514}
515
516/*
517 * Add a new components of the optimizer pipe to the plan
518 */
519str
520addOptimizerPipe(Client cntxt, MalBlkPtr mb, const char *name)
521{
522 int i, j, k;
523 InstrPtr p,q;
524 str msg = MAL_SUCCEED;
525
526 for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++)
527 if (strcmp(pipes[i].name, name) == 0)
528 break;
529
530 if (i == MAXOPTPIPES)
531 throw(MAL, "optimizer.addOptimizerPipe", SQLSTATE(HY001) "Out of slots");
532
533 if (pipes[i].mb == NULL)
534 msg = compileOptimizer(cntxt, name);
535
536 if (pipes[i].mb && pipes[i].mb->stop) {
537 for (j = 1; j < pipes[i].mb->stop - 1; j++) {
538 q= getInstrPtr(pipes[i].mb,j);
539 if( getModuleId(q) != optimizerRef)
540 continue;
541 p = copyInstruction(q);
542 if (!p) {
543 throw(MAL, "optimizer.addOptimizerPipe", SQLSTATE(HY001) MAL_MALLOC_FAIL);
544 }
545 for (k = 0; k < p->argc; k++)
546 getArg(p, k) = cloneVariable(mb, pipes[i].mb, getArg(p, k));
547 typeChecker(cntxt->usermodule, mb, p, FALSE);
548 pushInstruction(mb, p);
549 }
550 }
551 return msg;
552}
553
554void
555opt_pipes_reset(void)
556{
557 int i;
558
559 for (i = 0; i < MAXOPTPIPES; i++)
560 pipes[i].mb = NULL;
561}
562