| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * @a S. Manegold |
| 11 | * @- SQL append show-case optimizer |
| 12 | * |
| 13 | * This optimizer is mainly for demonstrating how to implement a MAL |
| 14 | * optimizer in principle. |
| 15 | * |
| 16 | * Purely for that purpose, the optimizer's task is to recognize MAL code |
| 17 | * patterns like |
| 18 | * |
| 19 | * ... |
| 20 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 21 | * ... |
| 22 | * v5 := ... v0 ...; |
| 23 | * ... |
| 24 | * |
| 25 | * i.e., an sql.append() statement that is eventually followed by some other |
| 26 | * statement later on in the MAL program that uses the same v0 BAT as |
| 27 | * argument as the sql.append() statement does, |
| 28 | * Do you assume a single re-use of the variable v0? |
| 29 | * Do you assume a non-nested MAL block ? |
| 30 | * |
| 31 | * and transform them into |
| 32 | * |
| 33 | * ... |
| 34 | * v1 := aggr.count( v0 ); |
| 35 | * v2 := algebra.slice( v0, 0, v1 ); |
| 36 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
| 37 | * ... |
| 38 | * v5 := ... v0 ...; |
| 39 | * ... |
| 40 | * |
| 41 | * i.e., handing a BAT view v2 of BAT v0 as argument to the sql.append() |
| 42 | * statement, rather than the original BAT v0. |
| 43 | * My advice, always use new variable names, it may capture some easy |
| 44 | * to make errors. |
| 45 | * |
| 46 | * As a refinement, patterns like |
| 47 | * |
| 48 | * ... |
| 49 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 50 | * v4 := aggr.count( v0 ); |
| 51 | * ... |
| 52 | * v5 := ... v0 ...; |
| 53 | * ... |
| 54 | * |
| 55 | * are transformed into |
| 56 | * |
| 57 | * ... |
| 58 | * v4 := aggr.count( v0 ); |
| 59 | * v2 := algebra.slice( v0, 0, v4 ); |
| 60 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
| 61 | * ... |
| 62 | * v5 := ... v0 ...; |
| 63 | * ... |
| 64 | * |
| 65 | */ |
| 66 | |
| 67 | /* |
| 68 | * It is mandatory to make optimizers part of the 'optimizer' module. |
| 69 | * This allows the optimizer implementation to find them and react on them. |
| 70 | */ |
| 71 | #include "monetdb_config.h" |
| 72 | #include "opt_sql_append.h" |
| 73 | #include "mal_interpreter.h" |
| 74 | |
| 75 | /* focus initially on persistent tables. */ |
| 76 | |
| 77 | static int |
| 78 | OPTsql_appendImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 79 | { |
| 80 | InstrPtr *old = NULL; |
| 81 | int i, limit, slimit, actions = 0; |
| 82 | |
| 83 | (void) cntxt; |
| 84 | (void) pci; /* Tell compilers that we know that we do not */ |
| 85 | (void) stk; /* use these function parameters, here. */ |
| 86 | |
| 87 | /* In general, a MAL optimizer transforms a given MAL program into a |
| 88 | * modified one by sequentially walking through the given program |
| 89 | * and concurrently creating a new one from scratch by |
| 90 | * (1) copying statements as is, modified, or in a different order, |
| 91 | * or (2) omitting statements or (3) introducing new statements. |
| 92 | */ |
| 93 | |
| 94 | /* check for logical error: mb must never be NULL */ |
| 95 | assert (mb != NULL); |
| 96 | |
| 97 | /* save the old stage of the MAL block */ |
| 98 | old = mb->stmt; |
| 99 | limit= mb->stop; |
| 100 | slimit = mb->ssize; |
| 101 | |
| 102 | /* initialize the statement list. Notice, the symbol table remains intact */ |
| 103 | if (newMalBlkStmt(mb, mb->ssize) < 0) |
| 104 | return 0; |
| 105 | |
| 106 | /* the plan signature can be copied safely */ |
| 107 | pushInstruction(mb, old[0]); |
| 108 | |
| 109 | /* iterate over the instructions of the input MAL program */ |
| 110 | for (i = 1; i < limit; i++) { |
| 111 | InstrPtr p = old[i]; |
| 112 | |
| 113 | /* check for |
| 114 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 115 | */ |
| 116 | if (getModuleId(p) == sqlRef && |
| 117 | getFunctionId(p) == appendRef && |
| 118 | p->argc > 5 && |
| 119 | p->retc == 1 && |
| 120 | isaBatType(getArgType(mb, p, 5))) { |
| 121 | /* found |
| 122 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 123 | */ |
| 124 | int j = 0, k = 0; |
| 125 | InstrPtr q1 = NULL, q2 = NULL; |
| 126 | bit found = FALSE; |
| 127 | |
| 128 | /* check whether next is |
| 129 | * v4 := aggr.count(v0); |
| 130 | */ |
| 131 | if (i+1 < limit) { |
| 132 | InstrPtr q = old[i+1]; |
| 133 | if (getModuleId(q) == aggrRef && |
| 134 | getFunctionId(q) == countRef && |
| 135 | q->argc == 2 && |
| 136 | q->retc == 1 && |
| 137 | getArg(q, 1) == getArg(p, 5)) { |
| 138 | /* found |
| 139 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 140 | * v4 := aggr.count(v0); |
| 141 | */ |
| 142 | /* issue/execute |
| 143 | * v4 := aggr.count(v0); |
| 144 | * before |
| 145 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 146 | */ |
| 147 | pushInstruction(mb, q); |
| 148 | q1 = q; |
| 149 | i++; |
| 150 | actions++; /* to keep track if anything has been done */ |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | /* look for |
| 155 | * v5 := ... v0 ...; |
| 156 | */ |
| 157 | /* an expensive loop, better would be to remember that v0 |
| 158 | * has a different role. A typical method is to keep a |
| 159 | * map from variable -> instruction where it was |
| 160 | * detected. Then you can check each assignment for use of |
| 161 | * v0 |
| 162 | */ |
| 163 | for (j = i+1; !found && j < limit; j++) |
| 164 | for (k = old[j]->retc; !found && k < old[j]->argc; k++) |
| 165 | found = (getArg(old[j], k) == getArg(p, 5)); |
| 166 | if (found) { |
| 167 | /* replace |
| 168 | * v3 := sql.append( ..., ..., ..., ..., v0 ); |
| 169 | * with |
| 170 | * v1 := aggr.count( v0 ); |
| 171 | * v2 := algebra.slice( v0, 0, v1 ); |
| 172 | * v3 := sql.append( ..., ..., ..., ..., v2 ); |
| 173 | */ |
| 174 | |
| 175 | /* push new v1 := aggr.count( v0 ); unless already available */ |
| 176 | if (q1 == NULL) { |
| 177 | /* use mal_builder.h primitives |
| 178 | * q1 = newStmt(mb, aggrRef,countRef); |
| 179 | * setArgType(mb,q1,TYPE_lng) */ |
| 180 | /* it will be added to the block and even my |
| 181 | * re-use MAL instructions */ |
| 182 | q1 = newInstruction(mb,aggrRef,countRef); |
| 183 | getArg(q1,0) = newTmpVariable(mb, TYPE_lng); |
| 184 | q1 = pushArgument(mb, q1, getArg(p, 5)); |
| 185 | pushInstruction(mb, q1); |
| 186 | } |
| 187 | |
| 188 | /* push new v2 := algebra.slice( v0, 0, v1 ); */ |
| 189 | /* use mal_builder.h primitives |
| 190 | * q1 = newStmt(mb, algebraRef,sliceRef); */ |
| 191 | q2 = newInstruction(mb,algebraRef, sliceRef); |
| 192 | getArg(q2,0) = newTmpVariable(mb, TYPE_any); |
| 193 | q2 = pushArgument(mb, q2, getArg(p, 5)); |
| 194 | q2 = pushLng(mb, q2, 0); |
| 195 | q2 = pushArgument(mb, q2, getArg(q1, 0)); |
| 196 | pushInstruction(mb, q2); |
| 197 | |
| 198 | /* push modified v3 := sql.append( ..., ..., ..., ..., v2 ); */ |
| 199 | getArg(p, 5) = getArg(q2, 0); |
| 200 | pushInstruction(mb, p); |
| 201 | |
| 202 | actions++; |
| 203 | continue; |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | pushInstruction(mb, p); |
| 208 | if (p->token == ENDsymbol) break; |
| 209 | } |
| 210 | |
| 211 | /* We would like to retain everything from the ENDsymbol |
| 212 | * up to the end of the plan, because after the ENDsymbol |
| 213 | * the remaining optimizer steps are stored. |
| 214 | */ |
| 215 | for(i++; i<limit; i++) |
| 216 | if (old[i]) |
| 217 | pushInstruction(mb, old[i]); |
| 218 | /* any remaining MAL instruction records are removed */ |
| 219 | for(; i<slimit; i++) |
| 220 | if (old[i]) |
| 221 | freeInstruction(old[i]); |
| 222 | |
| 223 | GDKfree(old); |
| 224 | |
| 225 | /* for statistics we return if/how many patches have been made */ |
| 226 | #ifdef DEBUG_OPT_OPTIMIZERS |
| 227 | mnstr_printf(cntxt->fdout,"#opt_sql_append: %d statements added\n" , actions); |
| 228 | #endif |
| 229 | return actions; |
| 230 | } |
| 231 | |
| 232 | /* optimizers have to be registered in the optcatalog in opt_support.c. |
| 233 | * you have to path the file accordingly. |
| 234 | */ |
| 235 | |
| 236 | /* Optimizer code wrapper |
| 237 | * The optimizer wrapper code is the interface to the MAL optimizer calls. |
| 238 | * It prepares the environment for the optimizers to do their work and removes |
| 239 | * the call itself to avoid endless recursions. |
| 240 | * |
| 241 | * Before an optimizer is finished, it should leave a clean state behind. |
| 242 | * Moreover, the information of the optimization step is saved for |
| 243 | * debugging and analysis. |
| 244 | * |
| 245 | * The wrapper expects the optimizers to return the number of |
| 246 | * actions taken, i.e. number of successful changes to the code. |
| 247 | */ |
| 248 | |
| 249 | str OPTsql_append(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p){ |
| 250 | str modnme; |
| 251 | str fcnnme; |
| 252 | str msg= MAL_SUCCEED; |
| 253 | Symbol s= NULL; |
| 254 | char buf[256]; |
| 255 | lng clk= GDKusec(); |
| 256 | int actions = 0; |
| 257 | |
| 258 | (void) cntxt; |
| 259 | if( p ) |
| 260 | removeInstruction(mb, p); |
| 261 | #ifdef DEBUG_OPT_OPTIMIZERS |
| 262 | mnstr_printf(cntxt->fdout,"=APPLY OPTIMIZER sql_append\n" ); |
| 263 | #endif |
| 264 | if( p && p->argc > 1 ){ |
| 265 | if( getArgType(mb,p,1) != TYPE_str || |
| 266 | getArgType(mb,p,2) != TYPE_str || |
| 267 | !isVarConstant(mb,getArg(p,1)) || |
| 268 | !isVarConstant(mb,getArg(p,2)) |
| 269 | ) { |
| 270 | throw(MAL, "optimizer.sql_append" , ILLARG_CONSTANTS); |
| 271 | } |
| 272 | if( stk != 0){ |
| 273 | modnme= *getArgReference_str(stk,p,1); |
| 274 | fcnnme= *getArgReference_str(stk,p,2); |
| 275 | } else { |
| 276 | modnme= getArgDefault(mb,p,1); |
| 277 | fcnnme= getArgDefault(mb,p,2); |
| 278 | } |
| 279 | s= findSymbol(cntxt->usermodule, putName(modnme),putName(fcnnme)); |
| 280 | |
| 281 | if( s == NULL) { |
| 282 | char buf[1024]; |
| 283 | snprintf(buf,1024, "%s.%s" ,modnme,fcnnme); |
| 284 | throw(MAL, "optimizer.sql_append" , RUNTIME_OBJECT_UNDEFINED ":%s" , buf); |
| 285 | } |
| 286 | mb = s->def; |
| 287 | stk= 0; |
| 288 | } |
| 289 | if( mb->errors ){ |
| 290 | /* when we have errors, we still want to see them */ |
| 291 | addtoMalBlkHistory(mb); |
| 292 | return MAL_SUCCEED; |
| 293 | } |
| 294 | actions= OPTsql_appendImplementation(cntxt, mb,stk,p); |
| 295 | |
| 296 | /* Defense line against incorrect plans */ |
| 297 | chkTypes(cntxt->usermodule, mb, FALSE); |
| 298 | chkFlow(mb); |
| 299 | chkDeclarations(mb); |
| 300 | #ifdef DEBUG_OPT_OPTIMIZERS |
| 301 | mnstr_printf(cntxt->fdout,"=FINISHED sql_append %d\n" ,actions); |
| 302 | printFunction(cntxt->fdout,mb,0,LIST_MAL_ALL ); |
| 303 | mnstr_printf(cntxt->fdout,"#opt_reduce: " LLFMT " ms\n" ,t); |
| 304 | #endif |
| 305 | clk = GDKusec()- clk; |
| 306 | snprintf(buf,256,"%-20s actions=%2d time=" LLFMT " usec" ,"optimizer.sql_append" ,actions, clk); |
| 307 | newComment(mb,buf); |
| 308 | addtoMalBlkHistory(mb); |
| 309 | return msg; |
| 310 | } |
| 311 | |