1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* author: M Kersten |
10 | * Post-optimization of projection lists. |
11 | */ |
12 | #include "monetdb_config.h" |
13 | #include "opt_deadcode.h" |
14 | #include "opt_projectionpath.h" |
15 | |
16 | |
17 | // Common prefix reduction was not effective it is retained for |
18 | // future experiments. |
19 | //#define ELIMCOMMONPREFIX |
20 | |
21 | #define LOOKAHEAD 500 /* limit the lookahead for candidates */ |
22 | |
23 | /* locate common prefixes in projection lists |
24 | * The algorithm is quadratic in the number of paths considered. */ |
25 | |
26 | #ifdef ELIMCOMMONPREFIX |
27 | static int |
28 | OPTprojectionPrefix(Client cntxt, MalBlkPtr mb, int prefixlength) |
29 | { |
30 | int i, j, k, match, actions=0; |
31 | InstrPtr p,q,r,*old; |
32 | int limit, slimit; |
33 | str msg = MAL_SUCCEED; |
34 | |
35 | old = mb->stmt; |
36 | limit = mb->stop; |
37 | slimit= mb->ssize; |
38 | if (newMalBlkStmt(mb,mb->ssize) < 0) |
39 | return 0; |
40 | |
41 | if( OPTdebug & OPTprojectionpath){ |
42 | fprintf(stderr,"#projectionpath find common prefix prefixlength %d\n" , prefixlength); |
43 | } |
44 | |
45 | for( i = 0; i < limit; i++){ |
46 | p = old[i]; |
47 | assert(p); |
48 | if ( getFunctionId(p) != projectionpathRef || p->argc < prefixlength) { |
49 | pushInstruction(mb,p); |
50 | continue; |
51 | } |
52 | |
53 | if( OPTdebug & OPTprojectionpath){ |
54 | fprintf(stderr,"#projectionpath candidate prefix pc %d \n" , i); |
55 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
56 | } |
57 | /* we fixed a projection path of the target prefixlength |
58 | * Search now the remainder for at least one case where it |
59 | * has a common prefix of prefixlength |
60 | */ |
61 | for(match = 0, j= i+1; j < limit && j < i + LOOKAHEAD; j++) { |
62 | q= old[j]; |
63 | if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength) |
64 | continue; |
65 | for( match =0, k = q->retc; k < prefixlength; k++) |
66 | match += getArg(q,k) == getArg(p,k); |
67 | if ( match == prefixlength - q->retc ) |
68 | break; |
69 | match = 0; |
70 | } |
71 | if ( match && match == prefixlength - q->retc ){ |
72 | /* at least one instruction has been found. |
73 | * Inject the prefex projection path and replace all use cases |
74 | */ |
75 | |
76 | if( OPTdebug & OPTprojectionpath){ |
77 | fprintf(stderr,"#projectionpath found common prefix pc %d \n" , j); |
78 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
79 | } |
80 | |
81 | /* create the factored out prefix projection */ |
82 | r = copyInstruction(p); |
83 | if( r == NULL){ |
84 | return -1; |
85 | } |
86 | r->argc = prefixlength; |
87 | getArg(r,0) = newTmpVariable(mb, newBatType(getBatType(getArgType(mb,r,r->argc-1)))); |
88 | setVarUDFtype(mb, getArg(r,0)); |
89 | if( r->argc == 3) |
90 | setFunctionId(r,projectionRef); |
91 | r->typechk = TYPE_UNKNOWN; |
92 | pushInstruction(mb,r); |
93 | |
94 | if( OPTdebug & OPTprojectionpath){ |
95 | fprintf(stderr,"#projectionpath prefix instruction\n" ); |
96 | fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL); |
97 | } |
98 | |
99 | |
100 | /* patch all instructions with same prefix. */ |
101 | for( ; j < limit; j++) { |
102 | q= old[j]; |
103 | if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength) |
104 | continue; |
105 | for( match =0, k = r->retc; k < r->argc; k++) |
106 | match += getArg(q,k) == getArg(r,k); |
107 | if (match && match == prefixlength - r->retc ){ |
108 | actions++; |
109 | if( OPTdebug & OPTprojectionpath){ |
110 | fprintf(stderr,"#projectionpath before:" ); |
111 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
112 | } |
113 | if( q->argc == r->argc ){ |
114 | clrFunction(q); |
115 | getArg(q,q->retc) = getArg(r,0); |
116 | q->argc = q->retc + 1; |
117 | } else { |
118 | getArg(q,q->retc) = getArg(r,0); |
119 | for( k= q->retc +1 ; k < prefixlength; k++) |
120 | delArgument(q, q->retc + 1); |
121 | if( q->argc == 3) |
122 | setFunctionId(q,projectionRef); |
123 | } |
124 | if( OPTdebug & OPTprojectionpath){ |
125 | fprintf(stderr,"#projectionpath after :" ); |
126 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
127 | } |
128 | } |
129 | } |
130 | /* patch instruction p by deletion of common prefix */ |
131 | if( r->argc == p->argc ){ |
132 | clrFunction(p); |
133 | getArg(p,p->retc) = getArg(r,0); |
134 | p->argc = p->retc + 1; |
135 | } else { |
136 | getArg(p,p->retc) = getArg(r,0); |
137 | for( k= p->retc + 1; k < prefixlength; k++) |
138 | delArgument(p, p->retc + 1); |
139 | if( p->argc == 3) |
140 | setFunctionId(p,projectionRef); |
141 | } |
142 | |
143 | if( OPTdebug & OPTprojectionpath){ |
144 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
145 | } |
146 | } |
147 | pushInstruction(mb,p); |
148 | } |
149 | |
150 | if( OPTdebug & OPTprojectionpath){ |
151 | if( actions > 0){ |
152 | chkTypes(cntxt->usermodule, mb, FALSE); |
153 | chkFlow(mb); |
154 | chkDeclarations(mb); |
155 | } |
156 | mnstr_printf(cntxt->fdout,"#projectionpath prefix actions %d\n" ,actions); |
157 | if(actions) printFunction(cntxt->fdout,mb, 0, LIST_MAL_ALL); |
158 | } |
159 | for(; i<slimit; i++) |
160 | if(old[i]) |
161 | freeInstruction(old[i]); |
162 | GDKfree(old); |
163 | if( actions) |
164 | actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0); |
165 | return actions; |
166 | } |
167 | #endif |
168 | |
169 | str |
170 | OPTprojectionpathImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
171 | { |
172 | int i,j,k, actions=0, maxprefixlength=0; |
173 | int *pc =0; |
174 | InstrPtr q,r; |
175 | InstrPtr *old=0; |
176 | int *varcnt= 0; /* use count */ |
177 | int limit,slimit; |
178 | char buf[256]; |
179 | lng usec = GDKusec(); |
180 | str msg = MAL_SUCCEED; |
181 | |
182 | (void) cntxt; |
183 | (void) stk; |
184 | if ( mb->inlineProp) |
185 | return MAL_SUCCEED; |
186 | //if ( optimizerIsApplied(mb,"projectionpath") ) |
187 | //return 0; |
188 | |
189 | |
190 | if( OPTdebug & OPTprojectionpath){ |
191 | fprintf(stderr,"#projectionpath optimizer start \n" ); |
192 | fprintFunction(stderr,mb, 0, LIST_MAL_ALL); |
193 | } |
194 | |
195 | old= mb->stmt; |
196 | limit= mb->stop; |
197 | slimit= mb->ssize; |
198 | if ( newMalBlkStmt(mb, 2 * mb->stop) < 0) |
199 | throw(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
200 | |
201 | /* beware, new variables and instructions are introduced */ |
202 | pc= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2); /* to find last assignment */ |
203 | varcnt= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2); |
204 | if (pc == NULL || varcnt == NULL ){ |
205 | msg = createException(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
206 | goto wrapupall; |
207 | } |
208 | |
209 | /* |
210 | * Count the variable re-use used as arguments first. |
211 | * A pass operation is not a real re-use |
212 | */ |
213 | for (i = 0; i<limit; i++){ |
214 | p= old[i]; |
215 | for(j=p->retc; j<p->argc; j++) |
216 | if( ! (getModuleId(p) == languageRef && getFunctionId(p)== passRef)) |
217 | varcnt[getArg(p,j)]++; |
218 | } |
219 | |
220 | /* assume a single pass over the plan, and only consider projection sequences |
221 | * beware, we are only able to deal with projections without candidate lists. (argc=3) |
222 | * We also should not change the type of the outcome, i.e. leaving the last argument untouched. |
223 | */ |
224 | for (i = 0; i<limit; i++){ |
225 | p= old[i]; |
226 | if( getModuleId(p)== algebraRef && getFunctionId(p) == projectionRef && p->argc == 3){ |
227 | /* |
228 | * Try to expand its argument list with what we have found so far. |
229 | */ |
230 | if((q = copyInstruction(p)) == NULL) { |
231 | msg = createException(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
232 | goto wrapupall; |
233 | } |
234 | if( OPTdebug & OPTprojectionpath){ |
235 | fprintf(stderr,"#before " ); |
236 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
237 | } |
238 | |
239 | q->argc=p->retc; |
240 | for(j=p->retc; j<p->argc; j++){ |
241 | if (pc[getArg(p,j)] ) |
242 | r = getInstrPtr(mb,pc[getArg(p,j)]); |
243 | else |
244 | r = 0; |
245 | if (r && varcnt[getArg(p,j)] > 1 ) |
246 | r = 0; |
247 | |
248 | /* inject the complete sub-path */ |
249 | |
250 | if( OPTdebug & OPTprojectionpath){ |
251 | if (r) { |
252 | fprintf(stderr,"#inject " ); |
253 | fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL); |
254 | } |
255 | } |
256 | if ( getFunctionId(p) == projectionRef){ |
257 | if( r && getModuleId(r)== algebraRef && ( getFunctionId(r)== projectionRef || getFunctionId(r)== projectionpathRef) ){ |
258 | for(k= r->retc; k<r->argc; k++) |
259 | q = pushArgument(mb,q,getArg(r,k)); |
260 | } else |
261 | q = pushArgument(mb,q,getArg(p,j)); |
262 | } |
263 | } |
264 | if(q->argc<= p->argc){ |
265 | /* no change */ |
266 | freeInstruction(q); |
267 | goto wrapup; |
268 | } |
269 | /* |
270 | * Final type check and hardwire the result type, because that can not be inferred directly from the signature |
271 | * We already know that all heads are void. Only the last element may have a non-oid type. |
272 | */ |
273 | for(j=1; j<q->argc-1; j++) |
274 | if( getBatType(getArgType(mb,q,j)) != TYPE_oid && getBatType(getArgType(mb,q,j)) != TYPE_void ){ |
275 | /* don't use the candidate list */ |
276 | freeInstruction(q); |
277 | goto wrapup; |
278 | } |
279 | |
280 | /* fix the type */ |
281 | setVarUDFtype(mb, getArg(q,0)); |
282 | setVarType(mb, getArg(q,0), newBatType(getBatType(getArgType(mb,q,q->argc-1)))); |
283 | if ( getFunctionId(q) == projectionRef ) |
284 | setFunctionId(q,projectionpathRef); |
285 | q->typechk = TYPE_UNKNOWN; |
286 | |
287 | if( OPTdebug & OPTprojectionpath){ |
288 | fprintf(stderr,"#after " ); |
289 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
290 | } |
291 | freeInstruction(p); |
292 | p = q; |
293 | /* keep track of the longest projection path */ |
294 | if ( p->argc > maxprefixlength) |
295 | maxprefixlength = p->argc; |
296 | actions++; |
297 | } |
298 | wrapup: |
299 | pushInstruction(mb,p); |
300 | for(j=0; j< p->retc; j++) |
301 | if( getModuleId(p)== algebraRef && ( getFunctionId(p)== projectionRef || getFunctionId(p)== projectionpathRef) ){ |
302 | pc[getArg(p,j)]= mb->stop-1; |
303 | if( OPTdebug & OPTprojectionpath){ |
304 | fprintf(stderr,"#keep " ); |
305 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
306 | } |
307 | } |
308 | } |
309 | if( OPTdebug & OPTprojectionpath){ |
310 | fprintf(stderr,"#projection path prefixlength %d\n" ,maxprefixlength); |
311 | } |
312 | |
313 | for(; i<slimit; i++) |
314 | if(old[i]) |
315 | freeInstruction(old[i]); |
316 | |
317 | /* All complete projection paths have been constructed. |
318 | * There may be cases where there is a common prefix used multiple times. |
319 | * They are located and removed in a few scans over the plan |
320 | * |
321 | * The prefix path mostly consist of smaller columns, |
322 | * which make the benefit not large. In SF100 roughly 100 out of |
323 | * 4500 projection operations were removed. |
324 | * On medium scale databases it may save cpu cycles. |
325 | * Turning this feature into a compile time option. |
326 | if( maxprefixlength > 3){ |
327 | //Before searching the prefix, we should remove all non-used instructions. |
328 | actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0); |
329 | for( ; maxprefixlength > 2; maxprefixlength--) |
330 | actions += OPTprojectionPrefix(cntxt, mb, maxprefixlength); |
331 | } |
332 | */ |
333 | |
334 | /* Defense line against incorrect plans */ |
335 | if( actions > 0){ |
336 | chkTypes(cntxt->usermodule, mb, FALSE); |
337 | chkFlow(mb); |
338 | chkDeclarations(mb); |
339 | } |
340 | /* keep all actions taken as a post block comment */ |
341 | wrapupall: |
342 | usec = GDKusec()- usec; |
343 | snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec" ,"projectionpath" ,actions, usec); |
344 | newComment(mb,buf); |
345 | if( actions >= 0) |
346 | addtoMalBlkHistory(mb); |
347 | if (pc ) GDKfree(pc); |
348 | if (varcnt ) GDKfree(varcnt); |
349 | if(old) GDKfree(old); |
350 | |
351 | if( OPTdebug & OPTprojectionpath){ |
352 | fprintf(stderr, "#PROJECTIONPATH optimizer exit\n" ); |
353 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
354 | } |
355 | return msg; |
356 | } |
357 | |