| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "monetdb_config.h" |
| 10 | #include "opt_costModel.h" |
| 11 | |
| 12 | /* |
| 13 | * The cost formula are repetative |
| 14 | */ |
| 15 | #define newRows(W,X,Y,Z) {\ |
| 16 | c1 = getRowCnt(mb, getArg(p,W));\ |
| 17 | c2 = getRowCnt(mb, getArg(p,X));\ |
| 18 | /* just to ensure that rowcnt was/is never set to -1 */\ |
| 19 | assert(c1 != (BUN) -1);\ |
| 20 | assert(c2 != (BUN) -1);\ |
| 21 | if (c1 == BUN_NONE || c2 == BUN_NONE) \ |
| 22 | continue;\ |
| 23 | setRowCnt(mb, getArg(p,Z), (Y));\ |
| 24 | } |
| 25 | /* |
| 26 | * The cost will be used in many places to make decisions. |
| 27 | * Access should be fast. |
| 28 | * The SQL front-end also makes the BAT index available as the |
| 29 | * property bid. This can be used to access the BAT and involve |
| 30 | * more properties into the decision procedure. |
| 31 | * [to be done] |
| 32 | * Also make sure you don't re-use variables, because then the |
| 33 | * row count becomes non-deterministic. |
| 34 | */ |
| 35 | str |
| 36 | OPTcostModelImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 37 | { |
| 38 | int i; |
| 39 | BUN c1, c2; |
| 40 | InstrPtr p; |
| 41 | char buf[256]; |
| 42 | lng usec = GDKusec(); |
| 43 | |
| 44 | (void) cntxt; |
| 45 | (void) stk; |
| 46 | (void) pci; |
| 47 | |
| 48 | if( OPTdebug & OPTcostmodel){ |
| 49 | fprintf(stderr, "#COSTMODEL optimizer exit\n" ); |
| 50 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
| 51 | } |
| 52 | if ( mb->inlineProp ) |
| 53 | return MAL_SUCCEED; |
| 54 | |
| 55 | for (i = 0; i < mb->stop; i++) { |
| 56 | p = getInstrPtr(mb, i); |
| 57 | if (getModuleId(p)==algebraRef) { |
| 58 | if (getFunctionId(p) == selectRef || |
| 59 | getFunctionId(p) == thetaselectRef) { |
| 60 | newRows(1,2, (c1 > 2 ? c2 / 2 +1: c1/2+1),0); |
| 61 | } else if ( |
| 62 | getFunctionId(p) == selectNotNilRef || |
| 63 | getFunctionId(p) == sortRef || |
| 64 | getFunctionId(p) == projectRef ){ |
| 65 | newRows(1,1,c1,0); |
| 66 | } else if (getFunctionId(p) == joinRef || |
| 67 | getFunctionId(p) == projectionRef || |
| 68 | getFunctionId(p) == bandjoinRef || |
| 69 | getFunctionId(p) == projectionpathRef ) { |
| 70 | /* assume 1-1 joins */ |
| 71 | newRows(1,2,(c1 < c2 ? c1 : c2),0); |
| 72 | } else |
| 73 | if (getFunctionId(p) == crossRef) { |
| 74 | newRows(1,2,((log((double) c1) + log((double) c2) > log(INT_MAX) ? INT_MAX : c1 * c2 +1)),0); |
| 75 | /* log sets errno if it cannot compute the log. This will then screw with code that checks errno */ |
| 76 | if (errno == ERANGE || errno == EDOM) { |
| 77 | errno = 0; |
| 78 | } |
| 79 | } |
| 80 | } else if (getModuleId(p) == batcalcRef) { |
| 81 | if( getFunctionId(p) == ifthenelseRef) { |
| 82 | if( isaBatType(getArgType(mb,p,2) ) ) { |
| 83 | newRows(2,2, c1,0); |
| 84 | } else { |
| 85 | newRows(3,3, c1,0); |
| 86 | } |
| 87 | } else if( isaBatType(getArgType(mb,p,1)) ){ |
| 88 | newRows(1,1, c1,0); |
| 89 | } else { |
| 90 | newRows(2, 2, c2,0); |
| 91 | } |
| 92 | } else if (getModuleId(p) == batstrRef) { |
| 93 | newRows(1,1, c1,0); |
| 94 | } else if (getModuleId(p) == batRef) { |
| 95 | if (getFunctionId(p) == appendRef ){ |
| 96 | /* |
| 97 | * Updates are a little more complicated, because you have to |
| 98 | * propagate changes in the expected size up the expression tree. |
| 99 | * For example, the SQL snippet: |
| 100 | * _49:bat[:oid,:oid]{rows=0,bid=622} := sql.bind_dbat("sys","example",3); |
| 101 | * _54 := bat.setWriteMode(_49); |
| 102 | * bat.append(_54,_47,true); |
| 103 | * shows what is produced when it encounters a deletion. If a non-empty |
| 104 | * append is not properly passed back to _49, the emptySet |
| 105 | * optimizer might remove the complete deletion code. |
| 106 | * The same holds for replacement operations, which add information to |
| 107 | * an initially empty insertion BAT. |
| 108 | */ |
| 109 | if( isaBatType(getArgType(mb,p,2)) ){ |
| 110 | /* insert BAT */ |
| 111 | newRows(1,2, (c1 + c2+1),1); |
| 112 | } else { |
| 113 | /* insert scalars */ |
| 114 | newRows(1,1, (c1 +1),1); |
| 115 | } |
| 116 | } else if (getFunctionId(p) == deleteRef){ |
| 117 | if( isaBatType(getArgType(mb,p,2)) ){ |
| 118 | /* delete BAT */ |
| 119 | newRows(1, 2, (c2 >= c1 ? 1 : c1 - c2), 1); |
| 120 | } else { |
| 121 | /* insert scalars */ |
| 122 | newRows(1, 1, (c1 <= 1 ? 1 : c1 - 1), 1); |
| 123 | } |
| 124 | } |
| 125 | } else if (getModuleId(p)==groupRef) { |
| 126 | if (getFunctionId(p) ==subgroupRef || getFunctionId(p) ==groupRef ) { |
| 127 | newRows(1,1,( c1 / 10+1),0); |
| 128 | } else { |
| 129 | newRows(1,1, c1,0); |
| 130 | } |
| 131 | } else if (getModuleId(p)== aggrRef) { |
| 132 | if (getFunctionId(p) == sumRef || |
| 133 | getFunctionId(p) == minRef || |
| 134 | getFunctionId(p) == maxRef || |
| 135 | getFunctionId(p) == avgRef) { |
| 136 | newRows(1, 1, (c1 != 0 ? c1 : 1), 0); |
| 137 | } else if (getFunctionId(p) == countRef){ |
| 138 | newRows(1,1, 1,0); |
| 139 | } |
| 140 | } else if( p->token == ASSIGNsymbol && p->argc== 2){ |
| 141 | /* copy the rows property */ |
| 142 | c1 = getRowCnt(mb, getArg(p,1)); |
| 143 | /* just to ensure that rowcnt was/is never set to -1 */ |
| 144 | assert(c1 != (BUN) -1); |
| 145 | if (c1 != BUN_NONE) |
| 146 | setRowCnt(mb, getArg(p,0), c1); |
| 147 | } |
| 148 | } |
| 149 | /* Defense line against incorrect plans */ |
| 150 | /* plan remains unaffected */ |
| 151 | //chkTypes(cntxt->usermodule, mb, FALSE); |
| 152 | //chkFlow(mb); |
| 153 | //chkDeclarations(mb); |
| 154 | /* keep all actions taken as a post block comment */ |
| 155 | usec = GDKusec()- usec; |
| 156 | snprintf(buf,256,"%-20s actions= 1 time=" LLFMT " usec" ,"costmodel" ,usec); |
| 157 | newComment(mb,buf); |
| 158 | addtoMalBlkHistory(mb); |
| 159 | |
| 160 | if( OPTdebug & OPTcostmodel){ |
| 161 | fprintf(stderr, "#COSTMODEL optimizer exit\n" ); |
| 162 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
| 163 | } |
| 164 | return MAL_SUCCEED; |
| 165 | } |
| 166 | |