| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * N. Nes, M.L. Kersten |
| 11 | * The queries are stored in the user cache after they have been |
| 12 | * type checked and optimized. |
| 13 | */ |
| 14 | #include "monetdb_config.h" |
| 15 | #include "mal_builder.h" |
| 16 | #include "mal_debugger.h" |
| 17 | #include "mal_runtime.h" |
| 18 | #include "opt_prelude.h" |
| 19 | #include "sql_mvc.h" |
| 20 | #include "sql_optimizer.h" |
| 21 | #include "sql_scenario.h" |
| 22 | #include "sql_gencode.h" |
| 23 | #include "opt_pipes.h" |
| 24 | |
| 25 | /* calculate the footprint for optimizer pipe line choices |
| 26 | * and identify empty columns upfront for just in time optimizers. |
| 27 | */ |
| 28 | static lng |
| 29 | SQLgetColumnSize(sql_trans *tr, sql_column *c, int access) |
| 30 | { |
| 31 | lng size = 0; |
| 32 | BAT *b; |
| 33 | switch(access){ |
| 34 | case 0: |
| 35 | b= store_funcs.bind_col(tr, c, RDONLY); |
| 36 | if (b) { |
| 37 | size += getBatSpace(b); |
| 38 | BBPunfix(b->batCacheid); |
| 39 | } |
| 40 | break; |
| 41 | case 1: |
| 42 | b = store_funcs.bind_col(tr, c, RD_INS); |
| 43 | if (b) { |
| 44 | size+= getBatSpace(b); |
| 45 | BBPunfix(b->batCacheid); |
| 46 | } |
| 47 | break; |
| 48 | case 2: |
| 49 | b = store_funcs.bind_col(tr, c, RD_UPD_VAL); |
| 50 | if (b) { |
| 51 | size += getBatSpace(b); |
| 52 | BBPunfix(b->batCacheid); |
| 53 | } |
| 54 | b = store_funcs.bind_col(tr, c, RD_UPD_ID); |
| 55 | if (b) { |
| 56 | size+= getBatSpace(b); |
| 57 | BBPunfix(b->batCacheid); |
| 58 | } |
| 59 | } |
| 60 | return size; |
| 61 | } |
| 62 | |
| 63 | /* |
| 64 | * The maximal space occupied by a query is calculated |
| 65 | * under the assumption that the complete database should fit in memory. |
| 66 | * The assumption is that the plan does not contain duplicate bind operations. |
| 67 | * Calculation of the precise footprint is much more complex |
| 68 | * and can not deal with intermediate structures, or fast |
| 69 | * access using sorted probing. |
| 70 | * |
| 71 | * A run where we only take the size of a table only once, |
| 72 | * caused major degration on SF100 Q3 with SSD(>6x) |
| 73 | */ |
| 74 | |
| 75 | static lng |
| 76 | SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare) |
| 77 | { |
| 78 | sql_trans *tr = m->session->tr; |
| 79 | lng size,space = 0, i; |
| 80 | str lasttable = 0; |
| 81 | |
| 82 | for (i = 0; i < mb->stop; i++) { |
| 83 | InstrPtr p = mb->stmt[i]; |
| 84 | |
| 85 | /* now deal with the update binds, it is only necessary to identify that there are updats |
| 86 | * The actual size is not that important */ |
| 87 | if (getModuleId(p) == sqlRef && getFunctionId(p) == bindRef && p->retc <= 2){ |
| 88 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; |
| 89 | char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; |
| 90 | char *cname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; |
| 91 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; |
| 92 | sql_schema *s = mvc_bind_schema(m, sname); |
| 93 | sql_table *t = 0; |
| 94 | sql_column *c = 0; |
| 95 | |
| 96 | if (!s || strcmp(s->base.name, dt_schema) == 0) |
| 97 | continue; |
| 98 | t = mvc_bind_table(m, s, tname); |
| 99 | if (!t) |
| 100 | continue; |
| 101 | c = mvc_bind_column(m, t, cname); |
| 102 | if (!s) |
| 103 | continue; |
| 104 | |
| 105 | /* we have to sum the cost of all three components of a BAT */ |
| 106 | if (c && (!isRemote(c->t) && !isMergeTable(c->t)) && (lasttable == 0 || strcmp(lasttable,tname)==0)) { |
| 107 | size = SQLgetColumnSize(tr, c, access); |
| 108 | space += size; // accumulate once per table |
| 109 | //lasttable = tname; invalidate this attempt |
| 110 | if( !prepare && size == 0 && ! t->system){ |
| 111 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size); |
| 112 | setFunctionId(p, emptybindRef); |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | if (getModuleId(p) == sqlRef && (getFunctionId(p) == bindidxRef)) { |
| 117 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; |
| 118 | //char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; |
| 119 | char *idxname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; |
| 120 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; |
| 121 | sql_schema *s = mvc_bind_schema(m, sname); |
| 122 | BAT *b; |
| 123 | |
| 124 | if (getFunctionId(p) == bindidxRef) { |
| 125 | sql_idx *i = mvc_bind_idx(m, s, idxname); |
| 126 | |
| 127 | if (i && (!isRemote(i->t) && !isMergeTable(i->t))) { |
| 128 | b = store_funcs.bind_idx(tr, i, RDONLY); |
| 129 | if (b) { |
| 130 | space += (size =getBatSpace(b)); |
| 131 | if (!size) { |
| 132 | sql_column *c = i->t->columns.set->h->data; |
| 133 | size = SQLgetColumnSize(tr, c, access); |
| 134 | } |
| 135 | |
| 136 | if( !prepare && size == 0 && ! i->t->system){ |
| 137 | setFunctionId(p, emptybindidxRef); |
| 138 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,idxname,prepare,size); |
| 139 | } |
| 140 | BBPunfix(b->batCacheid); |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | return space; |
| 147 | } |
| 148 | |
| 149 | /* gather the optimizer pipeline defined in the current session */ |
| 150 | str |
| 151 | getSQLoptimizer(mvc *m) |
| 152 | { |
| 153 | char *opt = stack_get_string(m, "optimizer" ); |
| 154 | char *pipe = "default_pipe" ; |
| 155 | |
| 156 | if (opt) |
| 157 | pipe = opt; |
| 158 | return pipe; |
| 159 | } |
| 160 | |
| 161 | static str |
| 162 | addOptimizers(Client c, MalBlkPtr mb, char *pipe, int prepare) |
| 163 | { |
| 164 | int i; |
| 165 | InstrPtr q; |
| 166 | backend *be; |
| 167 | str msg= MAL_SUCCEED; |
| 168 | |
| 169 | be = (backend *) c->sqlcontext; |
| 170 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
| 171 | |
| 172 | (void) SQLgetSpace(be->mvc, mb, prepare); // detect empty bats. |
| 173 | /* The volcano optimizer seems relevant for traditional HDD settings. |
| 174 | * It produced about 8 % improvement onf TPCH SF 100 on a 16G machine. |
| 175 | * In a SSD setting it was counter productive, leading to worse parallel behavior. |
| 176 | * The automatic switch to volcano is now disabled assuming more use of SSD. |
| 177 | * The volcano optimizer pipeline can be used instead |
| 178 | if(space && (pipe == NULL || strcmp(pipe,"default_pipe")== 0)){ |
| 179 | if( space > (lng)(0.8 * MT_npages() * MT_pagesize()) && GDKnr_threads > 1){ |
| 180 | pipe = "volcano_pipe"; |
| 181 | //fprintf(stderr, "#use volcano optimizer pipeline? %zu\n", space); |
| 182 | }else |
| 183 | pipe = "default_pipe"; |
| 184 | } else |
| 185 | */ |
| 186 | pipe = pipe? pipe: "default_pipe" ; |
| 187 | msg = addOptimizerPipe(c, mb, pipe); |
| 188 | if (msg){ |
| 189 | return msg; |
| 190 | } |
| 191 | mb->keephistory |= be->mvc->emod & mod_debug; |
| 192 | if (be->mvc->no_mitosis) { |
| 193 | for (i = mb->stop - 1; i > 0; i--) { |
| 194 | q = getInstrPtr(mb, i); |
| 195 | if (q->token == ENDsymbol) |
| 196 | break; |
| 197 | if (getFunctionId(q) == mitosisRef || getFunctionId(q) == dataflowRef) |
| 198 | q->token = REMsymbol; /* they are ignored */ |
| 199 | } |
| 200 | } |
| 201 | addtoMalBlkHistory(mb); |
| 202 | return msg; |
| 203 | } |
| 204 | |
| 205 | /* Queries that should rely on the latest consolidated state |
| 206 | * are not allowed to remove sql.binds operations. |
| 207 | */ |
| 208 | |
| 209 | str |
| 210 | SQLoptimizeFunction(Client c, MalBlkPtr mb) |
| 211 | { |
| 212 | str msg; |
| 213 | str pipe; |
| 214 | backend *be = (backend *) c->sqlcontext; |
| 215 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
| 216 | |
| 217 | pipe = getSQLoptimizer(be->mvc); |
| 218 | msg = addOptimizers(c, mb, pipe, TRUE); |
| 219 | if (msg) |
| 220 | return msg; |
| 221 | mb->keephistory |= be->mvc->emod & mod_debug; |
| 222 | msg = optimizeMALBlock(c, mb); |
| 223 | mb->keephistory = FALSE; |
| 224 | return msg; |
| 225 | } |
| 226 | |
| 227 | str |
| 228 | SQLoptimizeQuery(Client c, MalBlkPtr mb) |
| 229 | { |
| 230 | backend *be; |
| 231 | str msg = 0, pipe = 0; |
| 232 | bool free_pipe = false; |
| 233 | |
| 234 | if (mb->stop > 0 && |
| 235 | mb->stmt[mb->stop-1]->token == REMsymbol && |
| 236 | mb->stmt[mb->stop-1]->argc > 0 && |
| 237 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.vtype == TYPE_str && |
| 238 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval && |
| 239 | strncmp(mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval, "total" , 5) == 0) |
| 240 | return MAL_SUCCEED; /* already optimized */ |
| 241 | |
| 242 | be = (backend *) c->sqlcontext; |
| 243 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
| 244 | |
| 245 | c->blkmode = 0; |
| 246 | chkProgram(c->usermodule, mb); |
| 247 | |
| 248 | /* |
| 249 | * An error in the compilation should be reported to the user. |
| 250 | * And if the debugging option is set, the debugger is called |
| 251 | * to allow inspection. |
| 252 | */ |
| 253 | if (mb->errors) { |
| 254 | if (c->listing) |
| 255 | printFunction(c->fdout, mb, 0, c->listing); |
| 256 | if (be->mvc->debug) { |
| 257 | msg = runMALDebugger(c, c->curprg->def); |
| 258 | if (msg != MAL_SUCCEED) |
| 259 | freeException(msg); /* ignore error */ |
| 260 | } |
| 261 | return createException(MAL, "optimizer.optimizeQuery" , "%s" , mb->errors); |
| 262 | } |
| 263 | |
| 264 | pipe = getSQLoptimizer(be->mvc); |
| 265 | if( strcmp(pipe, "default_pipe" ) == 0 && strcmp(c->optimizer, "default_pipe" ) != 0) { |
| 266 | if (!(pipe = GDKstrdup(c->optimizer))) |
| 267 | throw(MAL, "sql.optimizeQuery" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 268 | free_pipe = true; |
| 269 | } |
| 270 | |
| 271 | msg = addOptimizers(c, mb, pipe, FALSE); |
| 272 | if (free_pipe) |
| 273 | GDKfree(pipe); |
| 274 | if (msg) |
| 275 | return msg; |
| 276 | mb->keephistory |= be->mvc->emod & mod_debug; |
| 277 | msg = optimizeMALBlock(c, mb); |
| 278 | return msg; |
| 279 | } |
| 280 | |
| 281 | /* queries are added to the MAL catalog under the client session namespace */ |
| 282 | void |
| 283 | SQLaddQueryToCache(Client c) |
| 284 | { |
| 285 | insertSymbol(c->usermodule, c->curprg); |
| 286 | } |
| 287 | |