1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/* author: M Kersten
10 * Post-optimization of projection lists.
11 */
12#include "monetdb_config.h"
13#include "opt_deadcode.h"
14#include "opt_projectionpath.h"
15
16
17// Common prefix reduction was not effective it is retained for
18// future experiments.
19//#define ELIMCOMMONPREFIX
20
21#define LOOKAHEAD 500 /* limit the lookahead for candidates */
22
23/* locate common prefixes in projection lists
24 * The algorithm is quadratic in the number of paths considered. */
25
26#ifdef ELIMCOMMONPREFIX
27static int
28OPTprojectionPrefix(Client cntxt, MalBlkPtr mb, int prefixlength)
29{
30 int i, j, k, match, actions=0;
31 InstrPtr p,q,r,*old;
32 int limit, slimit;
33 str msg = MAL_SUCCEED;
34
35 old = mb->stmt;
36 limit = mb->stop;
37 slimit= mb->ssize;
38 if (newMalBlkStmt(mb,mb->ssize) < 0)
39 return 0;
40
41 if( OPTdebug & OPTprojectionpath){
42 fprintf(stderr,"#projectionpath find common prefix prefixlength %d\n", prefixlength);
43 }
44
45 for( i = 0; i < limit; i++){
46 p = old[i];
47 assert(p);
48 if ( getFunctionId(p) != projectionpathRef || p->argc < prefixlength) {
49 pushInstruction(mb,p);
50 continue;
51 }
52
53 if( OPTdebug & OPTprojectionpath){
54 fprintf(stderr,"#projectionpath candidate prefix pc %d \n", i);
55 fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL);
56 }
57 /* we fixed a projection path of the target prefixlength
58 * Search now the remainder for at least one case where it
59 * has a common prefix of prefixlength
60 */
61 for(match = 0, j= i+1; j < limit && j < i + LOOKAHEAD; j++) {
62 q= old[j];
63 if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength)
64 continue;
65 for( match =0, k = q->retc; k < prefixlength; k++)
66 match += getArg(q,k) == getArg(p,k);
67 if ( match == prefixlength - q->retc )
68 break;
69 match = 0;
70 }
71 if ( match && match == prefixlength - q->retc ){
72 /* at least one instruction has been found.
73 * Inject the prefex projection path and replace all use cases
74 */
75
76 if( OPTdebug & OPTprojectionpath){
77 fprintf(stderr,"#projectionpath found common prefix pc %d \n", j);
78 fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL);
79 }
80
81 /* create the factored out prefix projection */
82 r = copyInstruction(p);
83 if( r == NULL){
84 return -1;
85 }
86 r->argc = prefixlength;
87 getArg(r,0) = newTmpVariable(mb, newBatType(getBatType(getArgType(mb,r,r->argc-1))));
88 setVarUDFtype(mb, getArg(r,0));
89 if( r->argc == 3)
90 setFunctionId(r,projectionRef);
91 r->typechk = TYPE_UNKNOWN;
92 pushInstruction(mb,r);
93
94 if( OPTdebug & OPTprojectionpath){
95 fprintf(stderr,"#projectionpath prefix instruction\n");
96 fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL);
97 }
98
99
100 /* patch all instructions with same prefix. */
101 for( ; j < limit; j++) {
102 q= old[j];
103 if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength)
104 continue;
105 for( match =0, k = r->retc; k < r->argc; k++)
106 match += getArg(q,k) == getArg(r,k);
107 if (match && match == prefixlength - r->retc ){
108 actions++;
109 if( OPTdebug & OPTprojectionpath){
110 fprintf(stderr,"#projectionpath before:");
111 fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL);
112 }
113 if( q->argc == r->argc ){
114 clrFunction(q);
115 getArg(q,q->retc) = getArg(r,0);
116 q->argc = q->retc + 1;
117 } else {
118 getArg(q,q->retc) = getArg(r,0);
119 for( k= q->retc +1 ; k < prefixlength; k++)
120 delArgument(q, q->retc + 1);
121 if( q->argc == 3)
122 setFunctionId(q,projectionRef);
123 }
124 if( OPTdebug & OPTprojectionpath){
125 fprintf(stderr,"#projectionpath after :");
126 fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL);
127 }
128 }
129 }
130 /* patch instruction p by deletion of common prefix */
131 if( r->argc == p->argc ){
132 clrFunction(p);
133 getArg(p,p->retc) = getArg(r,0);
134 p->argc = p->retc + 1;
135 } else {
136 getArg(p,p->retc) = getArg(r,0);
137 for( k= p->retc + 1; k < prefixlength; k++)
138 delArgument(p, p->retc + 1);
139 if( p->argc == 3)
140 setFunctionId(p,projectionRef);
141 }
142
143 if( OPTdebug & OPTprojectionpath){
144 fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL);
145 }
146 }
147 pushInstruction(mb,p);
148 }
149
150 if( OPTdebug & OPTprojectionpath){
151 if( actions > 0){
152 chkTypes(cntxt->usermodule, mb, FALSE);
153 chkFlow(mb);
154 chkDeclarations(mb);
155 }
156 mnstr_printf(cntxt->fdout,"#projectionpath prefix actions %d\n",actions);
157 if(actions) printFunction(cntxt->fdout,mb, 0, LIST_MAL_ALL);
158 }
159 for(; i<slimit; i++)
160 if(old[i])
161 freeInstruction(old[i]);
162 GDKfree(old);
163 if( actions)
164 actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0);
165 return actions;
166}
167#endif
168
169str
170OPTprojectionpathImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
171{
172 int i,j,k, actions=0, maxprefixlength=0;
173 int *pc =0;
174 InstrPtr q,r;
175 InstrPtr *old=0;
176 int *varcnt= 0; /* use count */
177 int limit,slimit;
178 char buf[256];
179 lng usec = GDKusec();
180 str msg = MAL_SUCCEED;
181
182 (void) cntxt;
183 (void) stk;
184 if ( mb->inlineProp)
185 return MAL_SUCCEED;
186 //if ( optimizerIsApplied(mb,"projectionpath") )
187 //return 0;
188
189
190 if( OPTdebug & OPTprojectionpath){
191 fprintf(stderr,"#projectionpath optimizer start \n");
192 fprintFunction(stderr,mb, 0, LIST_MAL_ALL);
193 }
194
195 old= mb->stmt;
196 limit= mb->stop;
197 slimit= mb->ssize;
198 if ( newMalBlkStmt(mb, 2 * mb->stop) < 0)
199 throw(MAL,"optimizer.projectionpath", SQLSTATE(HY001) MAL_MALLOC_FAIL);
200
201 /* beware, new variables and instructions are introduced */
202 pc= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2); /* to find last assignment */
203 varcnt= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2);
204 if (pc == NULL || varcnt == NULL ){
205 msg = createException(MAL,"optimizer.projectionpath", SQLSTATE(HY001) MAL_MALLOC_FAIL);
206 goto wrapupall;
207 }
208
209 /*
210 * Count the variable re-use used as arguments first.
211 * A pass operation is not a real re-use
212 */
213 for (i = 0; i<limit; i++){
214 p= old[i];
215 for(j=p->retc; j<p->argc; j++)
216 if( ! (getModuleId(p) == languageRef && getFunctionId(p)== passRef))
217 varcnt[getArg(p,j)]++;
218 }
219
220 /* assume a single pass over the plan, and only consider projection sequences
221 * beware, we are only able to deal with projections without candidate lists. (argc=3)
222 * We also should not change the type of the outcome, i.e. leaving the last argument untouched.
223 */
224 for (i = 0; i<limit; i++){
225 p= old[i];
226 if( getModuleId(p)== algebraRef && getFunctionId(p) == projectionRef && p->argc == 3){
227 /*
228 * Try to expand its argument list with what we have found so far.
229 */
230 if((q = copyInstruction(p)) == NULL) {
231 msg = createException(MAL,"optimizer.projectionpath", SQLSTATE(HY001) MAL_MALLOC_FAIL);
232 goto wrapupall;
233 }
234 if( OPTdebug & OPTprojectionpath){
235 fprintf(stderr,"#before ");
236 fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL);
237 }
238
239 q->argc=p->retc;
240 for(j=p->retc; j<p->argc; j++){
241 if (pc[getArg(p,j)] )
242 r = getInstrPtr(mb,pc[getArg(p,j)]);
243 else
244 r = 0;
245 if (r && varcnt[getArg(p,j)] > 1 )
246 r = 0;
247
248 /* inject the complete sub-path */
249
250 if( OPTdebug & OPTprojectionpath){
251 if (r) {
252 fprintf(stderr,"#inject ");
253 fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL);
254 }
255 }
256 if ( getFunctionId(p) == projectionRef){
257 if( r && getModuleId(r)== algebraRef && ( getFunctionId(r)== projectionRef || getFunctionId(r)== projectionpathRef) ){
258 for(k= r->retc; k<r->argc; k++)
259 q = pushArgument(mb,q,getArg(r,k));
260 } else
261 q = pushArgument(mb,q,getArg(p,j));
262 }
263 }
264 if(q->argc<= p->argc){
265 /* no change */
266 freeInstruction(q);
267 goto wrapup;
268 }
269 /*
270 * Final type check and hardwire the result type, because that can not be inferred directly from the signature
271 * We already know that all heads are void. Only the last element may have a non-oid type.
272 */
273 for(j=1; j<q->argc-1; j++)
274 if( getBatType(getArgType(mb,q,j)) != TYPE_oid && getBatType(getArgType(mb,q,j)) != TYPE_void ){
275 /* don't use the candidate list */
276 freeInstruction(q);
277 goto wrapup;
278 }
279
280 /* fix the type */
281 setVarUDFtype(mb, getArg(q,0));
282 setVarType(mb, getArg(q,0), newBatType(getBatType(getArgType(mb,q,q->argc-1))));
283 if ( getFunctionId(q) == projectionRef )
284 setFunctionId(q,projectionpathRef);
285 q->typechk = TYPE_UNKNOWN;
286
287 if( OPTdebug & OPTprojectionpath){
288 fprintf(stderr,"#after ");
289 fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL);
290 }
291 freeInstruction(p);
292 p = q;
293 /* keep track of the longest projection path */
294 if ( p->argc > maxprefixlength)
295 maxprefixlength = p->argc;
296 actions++;
297 }
298 wrapup:
299 pushInstruction(mb,p);
300 for(j=0; j< p->retc; j++)
301 if( getModuleId(p)== algebraRef && ( getFunctionId(p)== projectionRef || getFunctionId(p)== projectionpathRef) ){
302 pc[getArg(p,j)]= mb->stop-1;
303 if( OPTdebug & OPTprojectionpath){
304 fprintf(stderr,"#keep ");
305 fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL);
306 }
307 }
308 }
309 if( OPTdebug & OPTprojectionpath){
310 fprintf(stderr,"#projection path prefixlength %d\n",maxprefixlength);
311 }
312
313 for(; i<slimit; i++)
314 if(old[i])
315 freeInstruction(old[i]);
316
317 /* All complete projection paths have been constructed.
318 * There may be cases where there is a common prefix used multiple times.
319 * They are located and removed in a few scans over the plan
320 *
321 * The prefix path mostly consist of smaller columns,
322 * which make the benefit not large. In SF100 roughly 100 out of
323 * 4500 projection operations were removed.
324 * On medium scale databases it may save cpu cycles.
325 * Turning this feature into a compile time option.
326 if( maxprefixlength > 3){
327 //Before searching the prefix, we should remove all non-used instructions.
328 actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0);
329 for( ; maxprefixlength > 2; maxprefixlength--)
330 actions += OPTprojectionPrefix(cntxt, mb, maxprefixlength);
331 }
332 */
333
334 /* Defense line against incorrect plans */
335 if( actions > 0){
336 chkTypes(cntxt->usermodule, mb, FALSE);
337 chkFlow(mb);
338 chkDeclarations(mb);
339 }
340 /* keep all actions taken as a post block comment */
341wrapupall:
342 usec = GDKusec()- usec;
343 snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec","projectionpath",actions, usec);
344 newComment(mb,buf);
345 if( actions >= 0)
346 addtoMalBlkHistory(mb);
347 if (pc ) GDKfree(pc);
348 if (varcnt ) GDKfree(varcnt);
349 if(old) GDKfree(old);
350
351 if( OPTdebug & OPTprojectionpath){
352 fprintf(stderr, "#PROJECTIONPATH optimizer exit\n");
353 fprintFunction(stderr, mb, 0, LIST_MAL_ALL);
354 }
355 return msg;
356}
357