1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @a S. Manegold |
11 | * @- SQL append show-case optimizer |
12 | * |
13 | * This optimizer is mainly for demonstrating how to implement a MAL |
14 | * optimizer in principle. |
15 | * |
16 | * Purely for that purpose, the optimizer's task is to recognize MAL code |
17 | * patterns like |
18 | * |
19 | * ... |
20 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
21 | * ... |
22 | * v5 := ... v0 ...; |
23 | * ... |
24 | * |
25 | * i.e., an sql.append() statement that is eventually followed by some other |
26 | * statement later on in the MAL program that uses the same v0 BAT as |
27 | * argument as the sql.append() statement does, |
28 | * Do you assume a single re-use of the variable v0? |
29 | * Do you assume a non-nested MAL block ? |
30 | * |
31 | * and transform them into |
32 | * |
33 | * ... |
34 | * v1 := aggr.count( v0 ); |
35 | * v2 := algebra.slice( v0, 0, v1 ); |
36 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
37 | * ... |
38 | * v5 := ... v0 ...; |
39 | * ... |
40 | * |
41 | * i.e., handing a BAT view v2 of BAT v0 as argument to the sql.append() |
42 | * statement, rather than the original BAT v0. |
43 | * My advice, always use new variable names, it may capture some easy |
44 | * to make errors. |
45 | * |
46 | * As a refinement, patterns like |
47 | * |
48 | * ... |
49 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
50 | * v4 := aggr.count( v0 ); |
51 | * ... |
52 | * v5 := ... v0 ...; |
53 | * ... |
54 | * |
55 | * are transformed into |
56 | * |
57 | * ... |
58 | * v4 := aggr.count( v0 ); |
59 | * v2 := algebra.slice( v0, 0, v4 ); |
60 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
61 | * ... |
62 | * v5 := ... v0 ...; |
63 | * ... |
64 | * |
65 | */ |
66 | |
67 | /* |
68 | * It is mandatory to make optimizers part of the 'optimizer' module. |
69 | * This allows the optimizer implementation to find them and react on them. |
70 | */ |
71 | #include "monetdb_config.h" |
72 | #include "opt_sql_append.h" |
73 | #include "mal_interpreter.h" |
74 | |
75 | /* focus initially on persistent tables. */ |
76 | |
77 | static int |
78 | OPTsql_appendImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
79 | { |
80 | InstrPtr *old = NULL; |
81 | int i, limit, slimit, actions = 0; |
82 | |
83 | (void) cntxt; |
84 | (void) pci; /* Tell compilers that we know that we do not */ |
85 | (void) stk; /* use these function parameters, here. */ |
86 | |
87 | /* In general, a MAL optimizer transforms a given MAL program into a |
88 | * modified one by sequentially walking through the given program |
89 | * and concurrently creating a new one from scratch by |
90 | * (1) copying statements as is, modified, or in a different order, |
91 | * or (2) omitting statements or (3) introducing new statements. |
92 | */ |
93 | |
94 | /* check for logical error: mb must never be NULL */ |
95 | assert (mb != NULL); |
96 | |
97 | /* save the old stage of the MAL block */ |
98 | old = mb->stmt; |
99 | limit= mb->stop; |
100 | slimit = mb->ssize; |
101 | |
102 | /* initialize the statement list. Notice, the symbol table remains intact */ |
103 | if (newMalBlkStmt(mb, mb->ssize) < 0) |
104 | return 0; |
105 | |
106 | /* the plan signature can be copied safely */ |
107 | pushInstruction(mb, old[0]); |
108 | |
109 | /* iterate over the instructions of the input MAL program */ |
110 | for (i = 1; i < limit; i++) { |
111 | InstrPtr p = old[i]; |
112 | |
113 | /* check for |
114 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
115 | */ |
116 | if (getModuleId(p) == sqlRef && |
117 | getFunctionId(p) == appendRef && |
118 | p->argc > 5 && |
119 | p->retc == 1 && |
120 | isaBatType(getArgType(mb, p, 5))) { |
121 | /* found |
122 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
123 | */ |
124 | int j = 0, k = 0; |
125 | InstrPtr q1 = NULL, q2 = NULL; |
126 | bit found = FALSE; |
127 | |
128 | /* check whether next is |
129 | * v4 := aggr.count(v0); |
130 | */ |
131 | if (i+1 < limit) { |
132 | InstrPtr q = old[i+1]; |
133 | if (getModuleId(q) == aggrRef && |
134 | getFunctionId(q) == countRef && |
135 | q->argc == 2 && |
136 | q->retc == 1 && |
137 | getArg(q, 1) == getArg(p, 5)) { |
138 | /* found |
139 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
140 | * v4 := aggr.count(v0); |
141 | */ |
142 | /* issue/execute |
143 | * v4 := aggr.count(v0); |
144 | * before |
145 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
146 | */ |
147 | pushInstruction(mb, q); |
148 | q1 = q; |
149 | i++; |
150 | actions++; /* to keep track if anything has been done */ |
151 | } |
152 | } |
153 | |
154 | /* look for |
155 | * v5 := ... v0 ...; |
156 | */ |
157 | /* an expensive loop, better would be to remember that v0 |
158 | * has a different role. A typical method is to keep a |
159 | * map from variable -> instruction where it was |
160 | * detected. Then you can check each assignment for use of |
161 | * v0 |
162 | */ |
163 | for (j = i+1; !found && j < limit; j++) |
164 | for (k = old[j]->retc; !found && k < old[j]->argc; k++) |
165 | found = (getArg(old[j], k) == getArg(p, 5)); |
166 | if (found) { |
167 | /* replace |
168 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
169 | * with |
170 | * v1 := aggr.count( v0 ); |
171 | * v2 := algebra.slice( v0, 0, v1 ); |
172 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
173 | */ |
174 | |
175 | /* push new v1 := aggr.count( v0 ); unless already available */ |
176 | if (q1 == NULL) { |
177 | /* use mal_builder.h primitives |
178 | * q1 = newStmt(mb, aggrRef,countRef); |
179 | * setArgType(mb,q1,TYPE_lng) */ |
180 | /* it will be added to the block and even my |
181 | * re-use MAL instructions */ |
182 | q1 = newInstruction(mb,aggrRef,countRef); |
183 | getArg(q1,0) = newTmpVariable(mb, TYPE_lng); |
184 | q1 = pushArgument(mb, q1, getArg(p, 5)); |
185 | pushInstruction(mb, q1); |
186 | } |
187 | |
188 | /* push new v2 := algebra.slice( v0, 0, v1 ); */ |
189 | /* use mal_builder.h primitives |
190 | * q1 = newStmt(mb, algebraRef,sliceRef); */ |
191 | q2 = newInstruction(mb,algebraRef, sliceRef); |
192 | getArg(q2,0) = newTmpVariable(mb, TYPE_any); |
193 | q2 = pushArgument(mb, q2, getArg(p, 5)); |
194 | q2 = pushLng(mb, q2, 0); |
195 | q2 = pushArgument(mb, q2, getArg(q1, 0)); |
196 | pushInstruction(mb, q2); |
197 | |
198 | /* push modified v3 := sql.append( ..., ..., ..., ..., v2 ); */ |
199 | getArg(p, 5) = getArg(q2, 0); |
200 | pushInstruction(mb, p); |
201 | |
202 | actions++; |
203 | continue; |
204 | } |
205 | } |
206 | |
207 | pushInstruction(mb, p); |
208 | if (p->token == ENDsymbol) break; |
209 | } |
210 | |
211 | /* We would like to retain everything from the ENDsymbol |
212 | * up to the end of the plan, because after the ENDsymbol |
213 | * the remaining optimizer steps are stored. |
214 | */ |
215 | for(i++; i<limit; i++) |
216 | if (old[i]) |
217 | pushInstruction(mb, old[i]); |
218 | /* any remaining MAL instruction records are removed */ |
219 | for(; i<slimit; i++) |
220 | if (old[i]) |
221 | freeInstruction(old[i]); |
222 | |
223 | GDKfree(old); |
224 | |
225 | /* for statistics we return if/how many patches have been made */ |
226 | #ifdef DEBUG_OPT_OPTIMIZERS |
227 | mnstr_printf(cntxt->fdout,"#opt_sql_append: %d statements added\n" , actions); |
228 | #endif |
229 | return actions; |
230 | } |
231 | |
232 | /* optimizers have to be registered in the optcatalog in opt_support.c. |
233 | * you have to path the file accordingly. |
234 | */ |
235 | |
236 | /* Optimizer code wrapper |
237 | * The optimizer wrapper code is the interface to the MAL optimizer calls. |
238 | * It prepares the environment for the optimizers to do their work and removes |
239 | * the call itself to avoid endless recursions. |
240 | * |
241 | * Before an optimizer is finished, it should leave a clean state behind. |
242 | * Moreover, the information of the optimization step is saved for |
243 | * debugging and analysis. |
244 | * |
245 | * The wrapper expects the optimizers to return the number of |
246 | * actions taken, i.e. number of successful changes to the code. |
247 | */ |
248 | |
249 | str OPTsql_append(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p){ |
250 | str modnme; |
251 | str fcnnme; |
252 | str msg= MAL_SUCCEED; |
253 | Symbol s= NULL; |
254 | char buf[256]; |
255 | lng clk= GDKusec(); |
256 | int actions = 0; |
257 | |
258 | (void) cntxt; |
259 | if( p ) |
260 | removeInstruction(mb, p); |
261 | #ifdef DEBUG_OPT_OPTIMIZERS |
262 | mnstr_printf(cntxt->fdout,"=APPLY OPTIMIZER sql_append\n" ); |
263 | #endif |
264 | if( p && p->argc > 1 ){ |
265 | if( getArgType(mb,p,1) != TYPE_str || |
266 | getArgType(mb,p,2) != TYPE_str || |
267 | !isVarConstant(mb,getArg(p,1)) || |
268 | !isVarConstant(mb,getArg(p,2)) |
269 | ) { |
270 | throw(MAL, "optimizer.sql_append" , ILLARG_CONSTANTS); |
271 | } |
272 | if( stk != 0){ |
273 | modnme= *getArgReference_str(stk,p,1); |
274 | fcnnme= *getArgReference_str(stk,p,2); |
275 | } else { |
276 | modnme= getArgDefault(mb,p,1); |
277 | fcnnme= getArgDefault(mb,p,2); |
278 | } |
279 | s= findSymbol(cntxt->usermodule, putName(modnme),putName(fcnnme)); |
280 | |
281 | if( s == NULL) { |
282 | char buf[1024]; |
283 | snprintf(buf,1024, "%s.%s" ,modnme,fcnnme); |
284 | throw(MAL, "optimizer.sql_append" , RUNTIME_OBJECT_UNDEFINED ":%s" , buf); |
285 | } |
286 | mb = s->def; |
287 | stk= 0; |
288 | } |
289 | if( mb->errors ){ |
290 | /* when we have errors, we still want to see them */ |
291 | addtoMalBlkHistory(mb); |
292 | return MAL_SUCCEED; |
293 | } |
294 | actions= OPTsql_appendImplementation(cntxt, mb,stk,p); |
295 | |
296 | /* Defense line against incorrect plans */ |
297 | chkTypes(cntxt->usermodule, mb, FALSE); |
298 | chkFlow(mb); |
299 | chkDeclarations(mb); |
300 | #ifdef DEBUG_OPT_OPTIMIZERS |
301 | mnstr_printf(cntxt->fdout,"=FINISHED sql_append %d\n" ,actions); |
302 | printFunction(cntxt->fdout,mb,0,LIST_MAL_ALL ); |
303 | mnstr_printf(cntxt->fdout,"#opt_reduce: " LLFMT " ms\n" ,t); |
304 | #endif |
305 | clk = GDKusec()- clk; |
306 | snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec" ,"optimizer.sql_append" ,actions, clk); |
307 | newComment(mb,buf); |
308 | addtoMalBlkHistory(mb); |
309 | return msg; |
310 | } |
311 | |