| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* author: M Kersten |
| 10 | * Post-optimization of projection lists. |
| 11 | */ |
| 12 | #include "monetdb_config.h" |
| 13 | #include "opt_deadcode.h" |
| 14 | #include "opt_projectionpath.h" |
| 15 | |
| 16 | |
| 17 | // Common prefix reduction was not effective it is retained for |
| 18 | // future experiments. |
| 19 | //#define ELIMCOMMONPREFIX |
| 20 | |
| 21 | #define LOOKAHEAD 500 /* limit the lookahead for candidates */ |
| 22 | |
| 23 | /* locate common prefixes in projection lists |
| 24 | * The algorithm is quadratic in the number of paths considered. */ |
| 25 | |
| 26 | #ifdef ELIMCOMMONPREFIX |
| 27 | static int |
| 28 | OPTprojectionPrefix(Client cntxt, MalBlkPtr mb, int prefixlength) |
| 29 | { |
| 30 | int i, j, k, match, actions=0; |
| 31 | InstrPtr p,q,r,*old; |
| 32 | int limit, slimit; |
| 33 | str msg = MAL_SUCCEED; |
| 34 | |
| 35 | old = mb->stmt; |
| 36 | limit = mb->stop; |
| 37 | slimit= mb->ssize; |
| 38 | if (newMalBlkStmt(mb,mb->ssize) < 0) |
| 39 | return 0; |
| 40 | |
| 41 | if( OPTdebug & OPTprojectionpath){ |
| 42 | fprintf(stderr,"#projectionpath find common prefix prefixlength %d\n" , prefixlength); |
| 43 | } |
| 44 | |
| 45 | for( i = 0; i < limit; i++){ |
| 46 | p = old[i]; |
| 47 | assert(p); |
| 48 | if ( getFunctionId(p) != projectionpathRef || p->argc < prefixlength) { |
| 49 | pushInstruction(mb,p); |
| 50 | continue; |
| 51 | } |
| 52 | |
| 53 | if( OPTdebug & OPTprojectionpath){ |
| 54 | fprintf(stderr,"#projectionpath candidate prefix pc %d \n" , i); |
| 55 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
| 56 | } |
| 57 | /* we fixed a projection path of the target prefixlength |
| 58 | * Search now the remainder for at least one case where it |
| 59 | * has a common prefix of prefixlength |
| 60 | */ |
| 61 | for(match = 0, j= i+1; j < limit && j < i + LOOKAHEAD; j++) { |
| 62 | q= old[j]; |
| 63 | if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength) |
| 64 | continue; |
| 65 | for( match =0, k = q->retc; k < prefixlength; k++) |
| 66 | match += getArg(q,k) == getArg(p,k); |
| 67 | if ( match == prefixlength - q->retc ) |
| 68 | break; |
| 69 | match = 0; |
| 70 | } |
| 71 | if ( match && match == prefixlength - q->retc ){ |
| 72 | /* at least one instruction has been found. |
| 73 | * Inject the prefex projection path and replace all use cases |
| 74 | */ |
| 75 | |
| 76 | if( OPTdebug & OPTprojectionpath){ |
| 77 | fprintf(stderr,"#projectionpath found common prefix pc %d \n" , j); |
| 78 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
| 79 | } |
| 80 | |
| 81 | /* create the factored out prefix projection */ |
| 82 | r = copyInstruction(p); |
| 83 | if( r == NULL){ |
| 84 | return -1; |
| 85 | } |
| 86 | r->argc = prefixlength; |
| 87 | getArg(r,0) = newTmpVariable(mb, newBatType(getBatType(getArgType(mb,r,r->argc-1)))); |
| 88 | setVarUDFtype(mb, getArg(r,0)); |
| 89 | if( r->argc == 3) |
| 90 | setFunctionId(r,projectionRef); |
| 91 | r->typechk = TYPE_UNKNOWN; |
| 92 | pushInstruction(mb,r); |
| 93 | |
| 94 | if( OPTdebug & OPTprojectionpath){ |
| 95 | fprintf(stderr,"#projectionpath prefix instruction\n" ); |
| 96 | fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL); |
| 97 | } |
| 98 | |
| 99 | |
| 100 | /* patch all instructions with same prefix. */ |
| 101 | for( ; j < limit; j++) { |
| 102 | q= old[j]; |
| 103 | if ( getFunctionId(q) != projectionpathRef || q->argc < prefixlength) |
| 104 | continue; |
| 105 | for( match =0, k = r->retc; k < r->argc; k++) |
| 106 | match += getArg(q,k) == getArg(r,k); |
| 107 | if (match && match == prefixlength - r->retc ){ |
| 108 | actions++; |
| 109 | if( OPTdebug & OPTprojectionpath){ |
| 110 | fprintf(stderr,"#projectionpath before:" ); |
| 111 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
| 112 | } |
| 113 | if( q->argc == r->argc ){ |
| 114 | clrFunction(q); |
| 115 | getArg(q,q->retc) = getArg(r,0); |
| 116 | q->argc = q->retc + 1; |
| 117 | } else { |
| 118 | getArg(q,q->retc) = getArg(r,0); |
| 119 | for( k= q->retc +1 ; k < prefixlength; k++) |
| 120 | delArgument(q, q->retc + 1); |
| 121 | if( q->argc == 3) |
| 122 | setFunctionId(q,projectionRef); |
| 123 | } |
| 124 | if( OPTdebug & OPTprojectionpath){ |
| 125 | fprintf(stderr,"#projectionpath after :" ); |
| 126 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | /* patch instruction p by deletion of common prefix */ |
| 131 | if( r->argc == p->argc ){ |
| 132 | clrFunction(p); |
| 133 | getArg(p,p->retc) = getArg(r,0); |
| 134 | p->argc = p->retc + 1; |
| 135 | } else { |
| 136 | getArg(p,p->retc) = getArg(r,0); |
| 137 | for( k= p->retc + 1; k < prefixlength; k++) |
| 138 | delArgument(p, p->retc + 1); |
| 139 | if( p->argc == 3) |
| 140 | setFunctionId(p,projectionRef); |
| 141 | } |
| 142 | |
| 143 | if( OPTdebug & OPTprojectionpath){ |
| 144 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
| 145 | } |
| 146 | } |
| 147 | pushInstruction(mb,p); |
| 148 | } |
| 149 | |
| 150 | if( OPTdebug & OPTprojectionpath){ |
| 151 | if( actions > 0){ |
| 152 | chkTypes(cntxt->usermodule, mb, FALSE); |
| 153 | chkFlow(mb); |
| 154 | chkDeclarations(mb); |
| 155 | } |
| 156 | mnstr_printf(cntxt->fdout,"#projectionpath prefix actions %d\n" ,actions); |
| 157 | if(actions) printFunction(cntxt->fdout,mb, 0, LIST_MAL_ALL); |
| 158 | } |
| 159 | for(; i<slimit; i++) |
| 160 | if(old[i]) |
| 161 | freeInstruction(old[i]); |
| 162 | GDKfree(old); |
| 163 | if( actions) |
| 164 | actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0); |
| 165 | return actions; |
| 166 | } |
| 167 | #endif |
| 168 | |
| 169 | str |
| 170 | OPTprojectionpathImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
| 171 | { |
| 172 | int i,j,k, actions=0, maxprefixlength=0; |
| 173 | int *pc =0; |
| 174 | InstrPtr q,r; |
| 175 | InstrPtr *old=0; |
| 176 | int *varcnt= 0; /* use count */ |
| 177 | int limit,slimit; |
| 178 | char buf[256]; |
| 179 | lng usec = GDKusec(); |
| 180 | str msg = MAL_SUCCEED; |
| 181 | |
| 182 | (void) cntxt; |
| 183 | (void) stk; |
| 184 | if ( mb->inlineProp) |
| 185 | return MAL_SUCCEED; |
| 186 | //if ( optimizerIsApplied(mb,"projectionpath") ) |
| 187 | //return 0; |
| 188 | |
| 189 | |
| 190 | if( OPTdebug & OPTprojectionpath){ |
| 191 | fprintf(stderr,"#projectionpath optimizer start \n" ); |
| 192 | fprintFunction(stderr,mb, 0, LIST_MAL_ALL); |
| 193 | } |
| 194 | |
| 195 | old= mb->stmt; |
| 196 | limit= mb->stop; |
| 197 | slimit= mb->ssize; |
| 198 | if ( newMalBlkStmt(mb, 2 * mb->stop) < 0) |
| 199 | throw(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 200 | |
| 201 | /* beware, new variables and instructions are introduced */ |
| 202 | pc= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2); /* to find last assignment */ |
| 203 | varcnt= (int*) GDKzalloc(sizeof(int)* mb->vtop * 2); |
| 204 | if (pc == NULL || varcnt == NULL ){ |
| 205 | msg = createException(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 206 | goto wrapupall; |
| 207 | } |
| 208 | |
| 209 | /* |
| 210 | * Count the variable re-use used as arguments first. |
| 211 | * A pass operation is not a real re-use |
| 212 | */ |
| 213 | for (i = 0; i<limit; i++){ |
| 214 | p= old[i]; |
| 215 | for(j=p->retc; j<p->argc; j++) |
| 216 | if( ! (getModuleId(p) == languageRef && getFunctionId(p)== passRef)) |
| 217 | varcnt[getArg(p,j)]++; |
| 218 | } |
| 219 | |
| 220 | /* assume a single pass over the plan, and only consider projection sequences |
| 221 | * beware, we are only able to deal with projections without candidate lists. (argc=3) |
| 222 | * We also should not change the type of the outcome, i.e. leaving the last argument untouched. |
| 223 | */ |
| 224 | for (i = 0; i<limit; i++){ |
| 225 | p= old[i]; |
| 226 | if( getModuleId(p)== algebraRef && getFunctionId(p) == projectionRef && p->argc == 3){ |
| 227 | /* |
| 228 | * Try to expand its argument list with what we have found so far. |
| 229 | */ |
| 230 | if((q = copyInstruction(p)) == NULL) { |
| 231 | msg = createException(MAL,"optimizer.projectionpath" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 232 | goto wrapupall; |
| 233 | } |
| 234 | if( OPTdebug & OPTprojectionpath){ |
| 235 | fprintf(stderr,"#before " ); |
| 236 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
| 237 | } |
| 238 | |
| 239 | q->argc=p->retc; |
| 240 | for(j=p->retc; j<p->argc; j++){ |
| 241 | if (pc[getArg(p,j)] ) |
| 242 | r = getInstrPtr(mb,pc[getArg(p,j)]); |
| 243 | else |
| 244 | r = 0; |
| 245 | if (r && varcnt[getArg(p,j)] > 1 ) |
| 246 | r = 0; |
| 247 | |
| 248 | /* inject the complete sub-path */ |
| 249 | |
| 250 | if( OPTdebug & OPTprojectionpath){ |
| 251 | if (r) { |
| 252 | fprintf(stderr,"#inject " ); |
| 253 | fprintInstruction(stderr,mb, 0, r, LIST_MAL_ALL); |
| 254 | } |
| 255 | } |
| 256 | if ( getFunctionId(p) == projectionRef){ |
| 257 | if( r && getModuleId(r)== algebraRef && ( getFunctionId(r)== projectionRef || getFunctionId(r)== projectionpathRef) ){ |
| 258 | for(k= r->retc; k<r->argc; k++) |
| 259 | q = pushArgument(mb,q,getArg(r,k)); |
| 260 | } else |
| 261 | q = pushArgument(mb,q,getArg(p,j)); |
| 262 | } |
| 263 | } |
| 264 | if(q->argc<= p->argc){ |
| 265 | /* no change */ |
| 266 | freeInstruction(q); |
| 267 | goto wrapup; |
| 268 | } |
| 269 | /* |
| 270 | * Final type check and hardwire the result type, because that can not be inferred directly from the signature |
| 271 | * We already know that all heads are void. Only the last element may have a non-oid type. |
| 272 | */ |
| 273 | for(j=1; j<q->argc-1; j++) |
| 274 | if( getBatType(getArgType(mb,q,j)) != TYPE_oid && getBatType(getArgType(mb,q,j)) != TYPE_void ){ |
| 275 | /* don't use the candidate list */ |
| 276 | freeInstruction(q); |
| 277 | goto wrapup; |
| 278 | } |
| 279 | |
| 280 | /* fix the type */ |
| 281 | setVarUDFtype(mb, getArg(q,0)); |
| 282 | setVarType(mb, getArg(q,0), newBatType(getBatType(getArgType(mb,q,q->argc-1)))); |
| 283 | if ( getFunctionId(q) == projectionRef ) |
| 284 | setFunctionId(q,projectionpathRef); |
| 285 | q->typechk = TYPE_UNKNOWN; |
| 286 | |
| 287 | if( OPTdebug & OPTprojectionpath){ |
| 288 | fprintf(stderr,"#after " ); |
| 289 | fprintInstruction(stderr,mb, 0, q, LIST_MAL_ALL); |
| 290 | } |
| 291 | freeInstruction(p); |
| 292 | p = q; |
| 293 | /* keep track of the longest projection path */ |
| 294 | if ( p->argc > maxprefixlength) |
| 295 | maxprefixlength = p->argc; |
| 296 | actions++; |
| 297 | } |
| 298 | wrapup: |
| 299 | pushInstruction(mb,p); |
| 300 | for(j=0; j< p->retc; j++) |
| 301 | if( getModuleId(p)== algebraRef && ( getFunctionId(p)== projectionRef || getFunctionId(p)== projectionpathRef) ){ |
| 302 | pc[getArg(p,j)]= mb->stop-1; |
| 303 | if( OPTdebug & OPTprojectionpath){ |
| 304 | fprintf(stderr,"#keep " ); |
| 305 | fprintInstruction(stderr,mb, 0, p, LIST_MAL_ALL); |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | if( OPTdebug & OPTprojectionpath){ |
| 310 | fprintf(stderr,"#projection path prefixlength %d\n" ,maxprefixlength); |
| 311 | } |
| 312 | |
| 313 | for(; i<slimit; i++) |
| 314 | if(old[i]) |
| 315 | freeInstruction(old[i]); |
| 316 | |
| 317 | /* All complete projection paths have been constructed. |
| 318 | * There may be cases where there is a common prefix used multiple times. |
| 319 | * They are located and removed in a few scans over the plan |
| 320 | * |
| 321 | * The prefix path mostly consist of smaller columns, |
| 322 | * which make the benefit not large. In SF100 roughly 100 out of |
| 323 | * 4500 projection operations were removed. |
| 324 | * On medium scale databases it may save cpu cycles. |
| 325 | * Turning this feature into a compile time option. |
| 326 | if( maxprefixlength > 3){ |
| 327 | //Before searching the prefix, we should remove all non-used instructions. |
| 328 | actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0); |
| 329 | for( ; maxprefixlength > 2; maxprefixlength--) |
| 330 | actions += OPTprojectionPrefix(cntxt, mb, maxprefixlength); |
| 331 | } |
| 332 | */ |
| 333 | |
| 334 | /* Defense line against incorrect plans */ |
| 335 | if( actions > 0){ |
| 336 | chkTypes(cntxt->usermodule, mb, FALSE); |
| 337 | chkFlow(mb); |
| 338 | chkDeclarations(mb); |
| 339 | } |
| 340 | /* keep all actions taken as a post block comment */ |
| 341 | wrapupall: |
| 342 | usec = GDKusec()- usec; |
| 343 | snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec" ,"projectionpath" ,actions, usec); |
| 344 | newComment(mb,buf); |
| 345 | if( actions >= 0) |
| 346 | addtoMalBlkHistory(mb); |
| 347 | if (pc ) GDKfree(pc); |
| 348 | if (varcnt ) GDKfree(varcnt); |
| 349 | if(old) GDKfree(old); |
| 350 | |
| 351 | if( OPTdebug & OPTprojectionpath){ |
| 352 | fprintf(stderr, "#PROJECTIONPATH optimizer exit\n" ); |
| 353 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
| 354 | } |
| 355 | return msg; |
| 356 | } |
| 357 | |