1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * author M.L. Kersten |
11 | * The default SQL optimizer pipeline can be set per server. See the |
12 | * optpipe setting in monetdb(1) when using merovingian. During SQL |
13 | * initialization, the optimizer pipeline is checked against the |
14 | * dependency information maintained in the optimizer library to ensure |
15 | * there are no conflicts and at least the pre-requisite optimizers are |
16 | * used. The setting of sql_optimizer can be either the list of |
17 | * optimizers to run, or one or more variables containing the optimizer |
18 | * pipeline to run. The latter is provided for readability purposes |
19 | * only. |
20 | */ |
21 | #include "monetdb_config.h" |
22 | #include "opt_pipes.h" |
23 | #include "mal_client.h" |
24 | #include "mal_instruction.h" |
25 | #include "mal_function.h" |
26 | #include "mal_listing.h" |
27 | #include "mal_linker.h" |
28 | |
29 | #define MAXOPTPIPES 64 |
30 | |
31 | static struct PIPELINES { |
32 | char *name; |
33 | char *def; |
34 | char *status; |
35 | char *prerequisite; |
36 | MalBlkPtr mb; |
37 | char builtin; |
38 | } pipes[MAXOPTPIPES] = { |
39 | /* The minimal pipeline necessary by the server to operate correctly |
40 | * |
41 | * NOTE: |
42 | * If you change the minimal pipe, please also update the man page |
43 | * (see tools/mserver/mserver5.1) accordingly! |
44 | */ |
45 | {"minimal_pipe" , |
46 | "optimizer.inline();" |
47 | "optimizer.remap();" |
48 | "optimizer.deadcode();" |
49 | "optimizer.multiplex();" |
50 | "optimizer.generator();" |
51 | "optimizer.profiler();" |
52 | "optimizer.candidates();" |
53 | "optimizer.garbageCollector();" , |
54 | "stable" , NULL, NULL, 1}, |
55 | /* The default pipe line contains as of Feb2010 |
56 | * mitosis-mergetable-reorder, aimed at large tables and improved |
57 | * access locality. |
58 | * |
59 | * NOTE: |
60 | * If you change the default pipe, please also update the no_mitosis pipe |
61 | * and sequential pipe (see below, as well as the man page (see |
62 | * tools/mserver/mserver5.1) accordingly! |
63 | */ |
64 | {"default_pipe" , |
65 | "optimizer.inline();" |
66 | "optimizer.remap();" |
67 | "optimizer.costModel();" |
68 | "optimizer.coercions();" |
69 | "optimizer.aliases();" |
70 | "optimizer.evaluate();" |
71 | "optimizer.emptybind();" |
72 | "optimizer.pushselect();" |
73 | "optimizer.aliases();" |
74 | "optimizer.mitosis();" |
75 | "optimizer.mergetable();" |
76 | "optimizer.deadcode();" |
77 | "optimizer.aliases();" |
78 | "optimizer.constants();" |
79 | "optimizer.commonTerms();" |
80 | "optimizer.projectionpath();" |
81 | "optimizer.deadcode();" |
82 | "optimizer.reorder();" |
83 | "optimizer.matpack();" |
84 | "optimizer.dataflow();" |
85 | "optimizer.querylog();" |
86 | "optimizer.multiplex();" |
87 | "optimizer.generator();" |
88 | "optimizer.profiler();" |
89 | "optimizer.candidates();" |
90 | "optimizer.deadcode();" |
91 | "optimizer.postfix();" |
92 | // "optimizer.jit();" awaiting the new batcalc api |
93 | "optimizer.wlc();" |
94 | "optimizer.garbageCollector();" , |
95 | "stable" , NULL, NULL, 1}, |
96 | /* |
97 | * Optimistic concurreny control in general leads to more transaction failures |
98 | * in an OLTP setting. The partial solution provided is to give out |
99 | * advisory locks and delay updates until they are released or timeout. |
100 | */ |
101 | {"oltp_pipe" , |
102 | "optimizer.inline();" |
103 | "optimizer.remap();" |
104 | "optimizer.costModel();" |
105 | "optimizer.coercions();" |
106 | "optimizer.evaluate();" |
107 | "optimizer.emptybind();" |
108 | "optimizer.pushselect();" |
109 | "optimizer.aliases();" |
110 | "optimizer.mitosis();" |
111 | "optimizer.mergetable();" |
112 | "optimizer.deadcode();" |
113 | "optimizer.aliases();" |
114 | "optimizer.constants();" |
115 | "optimizer.commonTerms();" |
116 | "optimizer.projectionpath();" |
117 | "optimizer.deadcode();" |
118 | "optimizer.reorder();" |
119 | "optimizer.matpack();" |
120 | "optimizer.dataflow();" |
121 | "optimizer.querylog();" |
122 | "optimizer.multiplex();" |
123 | "optimizer.generator();" |
124 | "optimizer.profiler();" |
125 | "optimizer.candidates();" |
126 | "optimizer.deadcode();" |
127 | "optimizer.postfix();" |
128 | // "optimizer.jit();" awaiting the new batcalc api |
129 | "optimizer.oltp();" |
130 | "optimizer.wlc();" |
131 | "optimizer.garbageCollector();" , |
132 | "stable" , NULL, NULL, 1}, |
133 | /* |
134 | * Volcano style execution produces a sequence of blocks from the source relation |
135 | */ |
136 | {"volcano_pipe" , |
137 | "optimizer.inline();" |
138 | "optimizer.remap();" |
139 | "optimizer.costModel();" |
140 | "optimizer.coercions();" |
141 | "optimizer.aliases();" |
142 | "optimizer.evaluate();" |
143 | "optimizer.emptybind();" |
144 | "optimizer.pushselect();" |
145 | "optimizer.aliases();" |
146 | "optimizer.mitosis();" |
147 | "optimizer.mergetable();" |
148 | "optimizer.deadcode();" |
149 | "optimizer.aliases();" |
150 | "optimizer.constants();" |
151 | "optimizer.commonTerms();" |
152 | "optimizer.projectionpath();" |
153 | "optimizer.deadcode();" |
154 | "optimizer.reorder();" |
155 | "optimizer.matpack();" |
156 | "optimizer.dataflow();" |
157 | "optimizer.querylog();" |
158 | "optimizer.multiplex();" |
159 | "optimizer.generator();" |
160 | "optimizer.volcano();" |
161 | "optimizer.profiler();" |
162 | "optimizer.candidates();" |
163 | "optimizer.deadcode();" |
164 | "optimizer.postfix();" |
165 | // "optimizer.jit();" awaiting the new batcalc api |
166 | "optimizer.wlc();" |
167 | "optimizer.garbageCollector();" , |
168 | "stable" , NULL, NULL, 1}, |
169 | /* The no_mitosis pipe line is (and should be kept!) identical to the |
170 | * default pipeline, except that optimizer mitosis is omitted. It is |
171 | * used mainly to make some tests work deterministically, and to check |
172 | * / debug whether "unexpected" problems are related to mitosis |
173 | * (and/or mergetable). |
174 | * |
175 | * NOTE: |
176 | * If you change the no_mitosis pipe, please also update the man page |
177 | * (see tools/mserver/mserver5.1) accordingly! |
178 | */ |
179 | {"no_mitosis_pipe" , |
180 | "optimizer.inline();" |
181 | "optimizer.remap();" |
182 | "optimizer.costModel();" |
183 | "optimizer.coercions();" |
184 | "optimizer.aliases();" |
185 | "optimizer.evaluate();" |
186 | "optimizer.emptybind();" |
187 | "optimizer.pushselect();" |
188 | "optimizer.aliases();" |
189 | "optimizer.mergetable();" |
190 | "optimizer.deadcode();" |
191 | "optimizer.aliases();" |
192 | "optimizer.constants();" |
193 | "optimizer.commonTerms();" |
194 | "optimizer.projectionpath();" |
195 | "optimizer.deadcode();" |
196 | "optimizer.reorder();" |
197 | "optimizer.matpack();" |
198 | "optimizer.dataflow();" |
199 | "optimizer.querylog();" |
200 | "optimizer.multiplex();" |
201 | "optimizer.generator();" |
202 | "optimizer.profiler();" |
203 | "optimizer.candidates();" |
204 | "optimizer.deadcode();" |
205 | "optimizer.postfix();" |
206 | // "optimizer.jit();" awaiting the new batcalc api |
207 | "optimizer.wlc();" |
208 | "optimizer.garbageCollector();" , |
209 | "stable" , NULL, NULL, 1}, |
210 | /* The sequential pipe line is (and should be kept!) identical to the |
211 | * default pipeline, except that optimizers mitosis & dataflow are |
212 | * omitted. It is use mainly to make some tests work |
213 | * deterministically, i.e., avoid ambigious output, by avoiding |
214 | * parallelism. |
215 | * |
216 | * NOTE: |
217 | * If you change the sequential pipe, please also update the man page |
218 | * (see tools/mserver/mserver5.1) accordingly! |
219 | */ |
220 | {"sequential_pipe" , |
221 | "optimizer.inline();" |
222 | "optimizer.remap();" |
223 | "optimizer.costModel();" |
224 | "optimizer.coercions();" |
225 | "optimizer.aliases();" |
226 | "optimizer.evaluate();" |
227 | "optimizer.emptybind();" |
228 | "optimizer.pushselect();" |
229 | "optimizer.aliases();" |
230 | "optimizer.mergetable();" |
231 | "optimizer.deadcode();" |
232 | "optimizer.aliases();" |
233 | "optimizer.constants();" |
234 | "optimizer.commonTerms();" |
235 | "optimizer.projectionpath();" |
236 | "optimizer.deadcode();" |
237 | "optimizer.reorder();" |
238 | "optimizer.matpack();" |
239 | "optimizer.querylog();" |
240 | "optimizer.multiplex();" |
241 | "optimizer.generator();" |
242 | "optimizer.profiler();" |
243 | "optimizer.candidates();" |
244 | "optimizer.deadcode();" |
245 | "optimizer.postfix();" |
246 | // "optimizer.jit();" awaiting the new batcalc api |
247 | "optimizer.wlc();" |
248 | "optimizer.garbageCollector();" , |
249 | "stable" , NULL, NULL, 1}, |
250 | /* Experimental pipelines stressing various components under |
251 | * development. Do not use any of these pipelines in production |
252 | * settings! |
253 | */ |
254 | /* sentinel */ |
255 | {NULL, NULL, NULL, NULL, NULL, 0} |
256 | }; |
257 | |
258 | /* |
259 | * Debugging the optimizer pipeline", |
260 | * The best way is to use mdb and inspect the information gathered", |
261 | * during the optimization phase. Several optimizers produce more", |
262 | * intermediate information, which may shed light on the details. The", |
263 | * opt_debug bitvector controls their output. It can be set to a", |
264 | * pipeline or a comma separated list of optimizers you would like to", |
265 | * trace. It is a server wide property and can not be set dynamically,", |
266 | * as it is intended for internal use.", |
267 | */ |
268 | #include "opt_pipes.h" |
269 | #include "optimizer_private.h" |
270 | |
271 | static MT_Lock pipeLock = MT_LOCK_INITIALIZER("pipeLock" ); |
272 | |
273 | /* the session_pipe is the one defined by the user */ |
274 | str |
275 | addPipeDefinition(Client cntxt, const char *name, const char *pipe) |
276 | { |
277 | int i; |
278 | str msg; |
279 | struct PIPELINES oldpipe; |
280 | |
281 | MT_lock_set(&pipeLock); |
282 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) |
283 | if (strcmp(name, pipes[i].name) == 0) |
284 | break; |
285 | |
286 | if (i == MAXOPTPIPES) { |
287 | MT_lock_unset(&pipeLock); |
288 | throw(MAL, "optimizer.addPipeDefinition" , SQLSTATE(HY001) "Out of slots" ); |
289 | } |
290 | if (pipes[i].name && pipes[i].builtin) { |
291 | MT_lock_unset(&pipeLock); |
292 | throw(MAL, "optimizer.addPipeDefinition" , SQLSTATE(42000) "No overwrite of built in allowed" ); |
293 | } |
294 | |
295 | /* save old value */ |
296 | oldpipe = pipes[i]; |
297 | pipes[i].name = GDKstrdup(name); |
298 | pipes[i].def = GDKstrdup(pipe); |
299 | pipes[i].status = GDKstrdup("experimental" ); |
300 | if(pipes[i].name == NULL || pipes[i].def == NULL || pipes[i].status == NULL) { |
301 | GDKfree(pipes[i].name); |
302 | GDKfree(pipes[i].def); |
303 | GDKfree(pipes[i].status); |
304 | pipes[i].name = oldpipe.name; |
305 | pipes[i].def = oldpipe.def; |
306 | pipes[i].status = oldpipe.status; |
307 | MT_lock_unset(&pipeLock); |
308 | throw(MAL, "optimizer.addPipeDefinition" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
309 | } |
310 | pipes[i].mb = NULL; |
311 | MT_lock_unset(&pipeLock); |
312 | msg = compileOptimizer(cntxt, name); |
313 | if (msg) { |
314 | /* failed: restore old value */ |
315 | MT_lock_set(&pipeLock); |
316 | GDKfree(pipes[i].name); |
317 | GDKfree(pipes[i].def); |
318 | GDKfree(pipes[i].status); |
319 | pipes[i] = oldpipe; |
320 | MT_lock_unset(&pipeLock); |
321 | } else { |
322 | /* succeeded: destroy old value */ |
323 | if (oldpipe.name) |
324 | GDKfree(oldpipe.name); |
325 | if (oldpipe.def) |
326 | GDKfree(oldpipe.def); |
327 | if (oldpipe.mb) |
328 | freeMalBlk(oldpipe.mb); |
329 | if (oldpipe.status) |
330 | GDKfree(oldpipe.status); |
331 | } |
332 | return msg; |
333 | } |
334 | |
335 | int |
336 | isOptimizerPipe(const char *name) |
337 | { |
338 | int i; |
339 | |
340 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) |
341 | if (strcmp(name, pipes[i].name) == 0) |
342 | return TRUE; |
343 | return FALSE; |
344 | } |
345 | |
346 | str |
347 | getPipeDefinition(str name) |
348 | { |
349 | int i; |
350 | |
351 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) |
352 | if (strcmp(name, pipes[i].name) == 0) |
353 | return GDKstrdup(pipes[i].def); |
354 | return NULL; |
355 | } |
356 | |
357 | str |
358 | getPipeCatalog(bat *nme, bat *def, bat *stat) |
359 | { |
360 | BAT *b, *bn, *bs; |
361 | int i; |
362 | |
363 | b = COLnew(0, TYPE_str, 20, TRANSIENT); |
364 | bn = COLnew(0, TYPE_str, 20, TRANSIENT); |
365 | bs = COLnew(0, TYPE_str, 20, TRANSIENT); |
366 | if (b == NULL || bn == NULL || bs == NULL) { |
367 | BBPreclaim(b); |
368 | BBPreclaim(bn); |
369 | BBPreclaim(bs); |
370 | throw(MAL, "optimizer.getpipeDefinition" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
371 | } |
372 | |
373 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) { |
374 | if (pipes[i].prerequisite && getAddress(pipes[i].prerequisite) == NULL){ |
375 | BBPreclaim(b); |
376 | BBPreclaim(bn); |
377 | BBPreclaim(bs); |
378 | throw(MAL,"getPipeCatalog" , SQLSTATE(HY002) "#MAL.getAddress address of '%s' not found" ,pipes[i].name); |
379 | } |
380 | if (BUNappend(b, pipes[i].name, false) != GDK_SUCCEED || |
381 | BUNappend(bn, pipes[i].def, false) != GDK_SUCCEED || |
382 | BUNappend(bs, pipes[i].status, false) != GDK_SUCCEED) { |
383 | BBPreclaim(b); |
384 | BBPreclaim(bn); |
385 | BBPreclaim(bs); |
386 | throw(MAL, "optimizer.getpipeDefinition" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
387 | } |
388 | } |
389 | |
390 | BBPkeepref(*nme = b->batCacheid); |
391 | BBPkeepref(*def = bn->batCacheid); |
392 | BBPkeepref(*stat = bs->batCacheid); |
393 | return MAL_SUCCEED; |
394 | } |
395 | |
396 | static str |
397 | validatePipe(MalBlkPtr mb) |
398 | { |
399 | int mitosis = FALSE, deadcode = FALSE, mergetable = FALSE, multiplex = FALSE, garbage = FALSE, generator = FALSE, remap = FALSE; |
400 | int i; |
401 | InstrPtr p; |
402 | |
403 | if (mb == NULL ) |
404 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "missing optimizer mal block\n" ); |
405 | p = getInstrPtr(mb,1); |
406 | if (getFunctionId(p) == NULL || idcmp(getFunctionId(p), "inline" )) |
407 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'inline' should be the first\n" ); |
408 | |
409 | for (i = 1; i < mb->stop - 1; i++){ |
410 | p = getInstrPtr(mb,i); |
411 | if (getFunctionId(getInstrPtr(mb, i)) != NULL) { |
412 | if (strcmp(getFunctionId(p), "deadcode" ) == 0) |
413 | deadcode = TRUE; |
414 | else if (strcmp(getFunctionId(p), "remap" ) == 0) |
415 | remap = TRUE; |
416 | else if (strcmp(getFunctionId(p), "mitosis" ) == 0) |
417 | mitosis = TRUE; |
418 | else if (strcmp(getFunctionId(p), "mergetable" ) == 0) |
419 | mergetable = TRUE; |
420 | else if (strcmp(getFunctionId(p), "multiplex" ) == 0) |
421 | multiplex = TRUE; |
422 | else if (strcmp(getFunctionId(p), "generator" ) == 0) |
423 | generator = TRUE; |
424 | else if (strcmp(getFunctionId(p), "garbageCollector" ) == 0) |
425 | garbage = TRUE; |
426 | } else |
427 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "Missing optimizer call\n" ); |
428 | } |
429 | |
430 | if (mitosis == TRUE && mergetable == FALSE) |
431 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'mitosis' needs 'mergetable'\n" ); |
432 | |
433 | /* several optimizer should be used */ |
434 | if (multiplex == 0) |
435 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'multiplex' should be used\n" ); |
436 | if (deadcode == FALSE) |
437 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'deadcode' should be used at least once\n" ); |
438 | if (garbage == FALSE) |
439 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'garbageCollector' should be used as the last one\n" ); |
440 | if (remap == FALSE) |
441 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'remap' should be used\n" ); |
442 | if (generator == FALSE) |
443 | throw(MAL, "optimizer.validate" , SQLSTATE(42000) "'generator' should be used\n" ); |
444 | |
445 | return MAL_SUCCEED; |
446 | } |
447 | |
448 | static str |
449 | validateOptimizerPipes(void) |
450 | { |
451 | int i; |
452 | str msg = MAL_SUCCEED; |
453 | |
454 | MT_lock_set(&mal_contextLock); |
455 | for (i = 0; i < MAXOPTPIPES && pipes[i].def; i++) |
456 | if (pipes[i].mb) { |
457 | msg = validatePipe(pipes[i].mb); |
458 | if (msg != MAL_SUCCEED) |
459 | break; |
460 | } |
461 | MT_lock_unset(&mal_contextLock); |
462 | return msg; |
463 | } |
464 | |
465 | /* |
466 | * Compile (the first time) an optimizer pipe string |
467 | * then copy the statements to the end of the MAL plan |
468 | */ |
469 | str |
470 | compileOptimizer(Client cntxt, const char *name) |
471 | { |
472 | int i, j; |
473 | char buf[2048]; |
474 | str msg = MAL_SUCCEED; |
475 | Symbol fcn, compiled; |
476 | |
477 | MT_lock_set(&pipeLock); |
478 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) |
479 | if (strcmp(pipes[i].name, name) == 0 && pipes[i].mb == 0) { |
480 | /* precompile a pipeline as MAL string */ |
481 | for (j = 0; j < MAXOPTPIPES && pipes[j].def; j++) { |
482 | if (pipes[j].mb == NULL) { |
483 | if (pipes[j].prerequisite && getAddress(pipes[j].prerequisite) == NULL) |
484 | continue; |
485 | snprintf(buf,2048,"function optimizer.%s(); %s;end %s;" , pipes[j].name,pipes[j].def,pipes[j].name); |
486 | msg = compileString(&fcn,cntxt, buf); |
487 | if( msg == MAL_SUCCEED){ |
488 | compiled = findSymbol(cntxt->usermodule,getName("optimizer" ), getName(pipes[j].name)); |
489 | if( compiled){ |
490 | pipes[j].mb = compiled->def; |
491 | //fprintFunction(stderr, pipes[j].mb, 0, LIST_MAL_ALL); |
492 | } |
493 | } |
494 | } |
495 | } |
496 | if (msg != MAL_SUCCEED || |
497 | (msg = validateOptimizerPipes()) != MAL_SUCCEED) |
498 | break; |
499 | } |
500 | MT_lock_unset(&pipeLock); |
501 | return msg; |
502 | } |
503 | |
504 | str |
505 | compileAllOptimizers(Client cntxt) |
506 | { |
507 | int i; |
508 | str msg = MAL_SUCCEED; |
509 | |
510 | for(i=0;pipes[i].def && msg == MAL_SUCCEED; i++){ |
511 | msg =compileOptimizer(cntxt,pipes[i].name); |
512 | } |
513 | return msg; |
514 | } |
515 | |
516 | /* |
517 | * Add a new components of the optimizer pipe to the plan |
518 | */ |
519 | str |
520 | addOptimizerPipe(Client cntxt, MalBlkPtr mb, const char *name) |
521 | { |
522 | int i, j, k; |
523 | InstrPtr p,q; |
524 | str msg = MAL_SUCCEED; |
525 | |
526 | for (i = 0; i < MAXOPTPIPES && pipes[i].name; i++) |
527 | if (strcmp(pipes[i].name, name) == 0) |
528 | break; |
529 | |
530 | if (i == MAXOPTPIPES) |
531 | throw(MAL, "optimizer.addOptimizerPipe" , SQLSTATE(HY001) "Out of slots" ); |
532 | |
533 | if (pipes[i].mb == NULL) |
534 | msg = compileOptimizer(cntxt, name); |
535 | |
536 | if (pipes[i].mb && pipes[i].mb->stop) { |
537 | for (j = 1; j < pipes[i].mb->stop - 1; j++) { |
538 | q= getInstrPtr(pipes[i].mb,j); |
539 | if( getModuleId(q) != optimizerRef) |
540 | continue; |
541 | p = copyInstruction(q); |
542 | if (!p) { |
543 | throw(MAL, "optimizer.addOptimizerPipe" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
544 | } |
545 | for (k = 0; k < p->argc; k++) |
546 | getArg(p, k) = cloneVariable(mb, pipes[i].mb, getArg(p, k)); |
547 | typeChecker(cntxt->usermodule, mb, p, FALSE); |
548 | pushInstruction(mb, p); |
549 | } |
550 | } |
551 | return msg; |
552 | } |
553 | |
554 | void |
555 | opt_pipes_reset(void) |
556 | { |
557 | int i; |
558 | |
559 | for (i = 0; i < MAXOPTPIPES; i++) |
560 | pipes[i].mb = NULL; |
561 | } |
562 | |