| 1 | /* | 
|---|
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public | 
|---|
| 3 | * License, v. 2.0.  If a copy of the MPL was not distributed with this | 
|---|
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | /* | 
|---|
| 10 | * N. Nes, M.L. Kersten | 
|---|
| 11 | * The queries are stored in the user cache after they have been | 
|---|
| 12 | * type checked and optimized. | 
|---|
| 13 | */ | 
|---|
| 14 | #include "monetdb_config.h" | 
|---|
| 15 | #include "mal_builder.h" | 
|---|
| 16 | #include "mal_debugger.h" | 
|---|
| 17 | #include "mal_runtime.h" | 
|---|
| 18 | #include "opt_prelude.h" | 
|---|
| 19 | #include "sql_mvc.h" | 
|---|
| 20 | #include "sql_optimizer.h" | 
|---|
| 21 | #include "sql_scenario.h" | 
|---|
| 22 | #include "sql_gencode.h" | 
|---|
| 23 | #include "opt_pipes.h" | 
|---|
| 24 |  | 
|---|
| 25 | /* calculate the footprint for optimizer pipe line choices | 
|---|
| 26 | * and identify empty columns upfront for just in time optimizers. | 
|---|
| 27 | */ | 
|---|
| 28 | static lng | 
|---|
| 29 | SQLgetColumnSize(sql_trans *tr, sql_column *c, int access) | 
|---|
| 30 | { | 
|---|
| 31 | lng size = 0; | 
|---|
| 32 | BAT *b; | 
|---|
| 33 | switch(access){ | 
|---|
| 34 | case 0: | 
|---|
| 35 | b= store_funcs.bind_col(tr, c, RDONLY); | 
|---|
| 36 | if (b) { | 
|---|
| 37 | size += getBatSpace(b); | 
|---|
| 38 | BBPunfix(b->batCacheid); | 
|---|
| 39 | } | 
|---|
| 40 | break; | 
|---|
| 41 | case 1: | 
|---|
| 42 | b = store_funcs.bind_col(tr, c, RD_INS); | 
|---|
| 43 | if (b) { | 
|---|
| 44 | size+= getBatSpace(b); | 
|---|
| 45 | BBPunfix(b->batCacheid); | 
|---|
| 46 | } | 
|---|
| 47 | break; | 
|---|
| 48 | case 2: | 
|---|
| 49 | b = store_funcs.bind_col(tr, c, RD_UPD_VAL); | 
|---|
| 50 | if (b) { | 
|---|
| 51 | size += getBatSpace(b); | 
|---|
| 52 | BBPunfix(b->batCacheid); | 
|---|
| 53 | } | 
|---|
| 54 | b = store_funcs.bind_col(tr, c, RD_UPD_ID); | 
|---|
| 55 | if (b) { | 
|---|
| 56 | size+= getBatSpace(b); | 
|---|
| 57 | BBPunfix(b->batCacheid); | 
|---|
| 58 | } | 
|---|
| 59 | } | 
|---|
| 60 | return size; | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | /* | 
|---|
| 64 | * The maximal space occupied by a query is calculated | 
|---|
| 65 | * under the assumption that the complete database should fit in memory. | 
|---|
| 66 | * The assumption is that the plan does not contain duplicate bind operations. | 
|---|
| 67 | * Calculation of the precise footprint is much more complex | 
|---|
| 68 | * and can not deal with intermediate structures, or fast | 
|---|
| 69 | * access using sorted probing. | 
|---|
| 70 | * | 
|---|
| 71 | * A run where we only take the size of a table only once, | 
|---|
| 72 | * caused major degration on SF100 Q3 with SSD(>6x) | 
|---|
| 73 | */ | 
|---|
| 74 |  | 
|---|
| 75 | static lng | 
|---|
| 76 | SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare) | 
|---|
| 77 | { | 
|---|
| 78 | sql_trans *tr = m->session->tr; | 
|---|
| 79 | lng size,space = 0, i; | 
|---|
| 80 | str lasttable = 0; | 
|---|
| 81 |  | 
|---|
| 82 | for (i = 0; i < mb->stop; i++) { | 
|---|
| 83 | InstrPtr p = mb->stmt[i]; | 
|---|
| 84 |  | 
|---|
| 85 | /* now deal with the update binds, it is only necessary to identify that there are updats | 
|---|
| 86 | * The actual size is not that important */ | 
|---|
| 87 | if (getModuleId(p) == sqlRef && getFunctionId(p) == bindRef  && p->retc <= 2){ | 
|---|
| 88 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; | 
|---|
| 89 | char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; | 
|---|
| 90 | char *cname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; | 
|---|
| 91 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; | 
|---|
| 92 | sql_schema *s = mvc_bind_schema(m, sname); | 
|---|
| 93 | sql_table *t = 0; | 
|---|
| 94 | sql_column *c = 0; | 
|---|
| 95 |  | 
|---|
| 96 | if (!s || strcmp(s->base.name, dt_schema) == 0) | 
|---|
| 97 | continue; | 
|---|
| 98 | t = mvc_bind_table(m, s, tname); | 
|---|
| 99 | if (!t) | 
|---|
| 100 | continue; | 
|---|
| 101 | c = mvc_bind_column(m, t, cname); | 
|---|
| 102 | if (!s) | 
|---|
| 103 | continue; | 
|---|
| 104 |  | 
|---|
| 105 | /* we have to sum the cost of all three components of a BAT */ | 
|---|
| 106 | if (c && (!isRemote(c->t) && !isMergeTable(c->t)) && (lasttable == 0 || strcmp(lasttable,tname)==0)) { | 
|---|
| 107 | size = SQLgetColumnSize(tr, c, access); | 
|---|
| 108 | space += size;	// accumulate once per table | 
|---|
| 109 | //lasttable = tname;	 invalidate this attempt | 
|---|
| 110 | if( !prepare && size == 0  && ! t->system){ | 
|---|
| 111 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size); | 
|---|
| 112 | setFunctionId(p, emptybindRef); | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 | if (getModuleId(p) == sqlRef && (getFunctionId(p) == bindidxRef)) { | 
|---|
| 117 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; | 
|---|
| 118 | //char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; | 
|---|
| 119 | char *idxname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; | 
|---|
| 120 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; | 
|---|
| 121 | sql_schema *s = mvc_bind_schema(m, sname); | 
|---|
| 122 | BAT *b; | 
|---|
| 123 |  | 
|---|
| 124 | if (getFunctionId(p) == bindidxRef) { | 
|---|
| 125 | sql_idx *i = mvc_bind_idx(m, s, idxname); | 
|---|
| 126 |  | 
|---|
| 127 | if (i && (!isRemote(i->t) && !isMergeTable(i->t))) { | 
|---|
| 128 | b = store_funcs.bind_idx(tr, i, RDONLY); | 
|---|
| 129 | if (b) { | 
|---|
| 130 | space += (size =getBatSpace(b)); | 
|---|
| 131 | if (!size) { | 
|---|
| 132 | sql_column *c = i->t->columns.set->h->data; | 
|---|
| 133 | size = SQLgetColumnSize(tr, c, access); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | if( !prepare && size == 0 && ! i->t->system){ | 
|---|
| 137 | setFunctionId(p, emptybindidxRef); | 
|---|
| 138 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,idxname,prepare,size); | 
|---|
| 139 | } | 
|---|
| 140 | BBPunfix(b->batCacheid); | 
|---|
| 141 | } | 
|---|
| 142 | } | 
|---|
| 143 | } | 
|---|
| 144 | } | 
|---|
| 145 | } | 
|---|
| 146 | return space; | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | /* gather the optimizer pipeline defined in the current session */ | 
|---|
| 150 | str | 
|---|
| 151 | getSQLoptimizer(mvc *m) | 
|---|
| 152 | { | 
|---|
| 153 | char *opt = stack_get_string(m, "optimizer"); | 
|---|
| 154 | char *pipe = "default_pipe"; | 
|---|
| 155 |  | 
|---|
| 156 | if (opt) | 
|---|
| 157 | pipe = opt; | 
|---|
| 158 | return pipe; | 
|---|
| 159 | } | 
|---|
| 160 |  | 
|---|
| 161 | static str | 
|---|
| 162 | addOptimizers(Client c, MalBlkPtr mb, char *pipe, int prepare) | 
|---|
| 163 | { | 
|---|
| 164 | int i; | 
|---|
| 165 | InstrPtr q; | 
|---|
| 166 | backend *be; | 
|---|
| 167 | str msg= MAL_SUCCEED; | 
|---|
| 168 |  | 
|---|
| 169 | be = (backend *) c->sqlcontext; | 
|---|
| 170 | assert(be && be->mvc);	/* SQL clients should always have their state set */ | 
|---|
| 171 |  | 
|---|
| 172 | (void) SQLgetSpace(be->mvc, mb, prepare); // detect empty bats. | 
|---|
| 173 | /* The volcano optimizer seems relevant for traditional HDD settings. | 
|---|
| 174 | * It produced about 8 % improvement onf TPCH SF 100 on a 16G machine. | 
|---|
| 175 | * In a SSD setting it was counter productive, leading to worse parallel behavior. | 
|---|
| 176 | * The automatic switch to volcano is now disabled assuming more use of SSD. | 
|---|
| 177 | * The volcano optimizer pipeline can be used instead | 
|---|
| 178 | if(space && (pipe == NULL || strcmp(pipe,"default_pipe")== 0)){ | 
|---|
| 179 | if( space > (lng)(0.8 * MT_npages() * MT_pagesize())  && GDKnr_threads > 1){ | 
|---|
| 180 | pipe = "volcano_pipe"; | 
|---|
| 181 | //fprintf(stderr, "#use volcano optimizer pipeline? %zu\n", space); | 
|---|
| 182 | }else | 
|---|
| 183 | pipe = "default_pipe"; | 
|---|
| 184 | } else | 
|---|
| 185 | */ | 
|---|
| 186 | pipe = pipe? pipe: "default_pipe"; | 
|---|
| 187 | msg = addOptimizerPipe(c, mb, pipe); | 
|---|
| 188 | if (msg){ | 
|---|
| 189 | return msg; | 
|---|
| 190 | } | 
|---|
| 191 | mb->keephistory |= be->mvc->emod & mod_debug; | 
|---|
| 192 | if (be->mvc->no_mitosis) { | 
|---|
| 193 | for (i = mb->stop - 1; i > 0; i--) { | 
|---|
| 194 | q = getInstrPtr(mb, i); | 
|---|
| 195 | if (q->token == ENDsymbol) | 
|---|
| 196 | break; | 
|---|
| 197 | if (getFunctionId(q) == mitosisRef || getFunctionId(q) == dataflowRef) | 
|---|
| 198 | q->token = REMsymbol;	/* they are ignored */ | 
|---|
| 199 | } | 
|---|
| 200 | } | 
|---|
| 201 | addtoMalBlkHistory(mb); | 
|---|
| 202 | return msg; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /* Queries that should rely on the latest consolidated state | 
|---|
| 206 | * are not allowed to remove sql.binds operations. | 
|---|
| 207 | */ | 
|---|
| 208 |  | 
|---|
| 209 | str | 
|---|
| 210 | SQLoptimizeFunction(Client c, MalBlkPtr mb) | 
|---|
| 211 | { | 
|---|
| 212 | str msg; | 
|---|
| 213 | str pipe; | 
|---|
| 214 | backend *be = (backend *) c->sqlcontext; | 
|---|
| 215 | assert(be && be->mvc);	/* SQL clients should always have their state set */ | 
|---|
| 216 |  | 
|---|
| 217 | pipe = getSQLoptimizer(be->mvc); | 
|---|
| 218 | msg = addOptimizers(c, mb, pipe, TRUE); | 
|---|
| 219 | if (msg) | 
|---|
| 220 | return msg; | 
|---|
| 221 | mb->keephistory |= be->mvc->emod & mod_debug; | 
|---|
| 222 | msg = optimizeMALBlock(c, mb); | 
|---|
| 223 | mb->keephistory = FALSE; | 
|---|
| 224 | return msg; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | str | 
|---|
| 228 | SQLoptimizeQuery(Client c, MalBlkPtr mb) | 
|---|
| 229 | { | 
|---|
| 230 | backend *be; | 
|---|
| 231 | str msg = 0, pipe = 0; | 
|---|
| 232 | bool free_pipe = false; | 
|---|
| 233 |  | 
|---|
| 234 | if (mb->stop > 0 && | 
|---|
| 235 | mb->stmt[mb->stop-1]->token == REMsymbol && | 
|---|
| 236 | mb->stmt[mb->stop-1]->argc > 0 && | 
|---|
| 237 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.vtype == TYPE_str && | 
|---|
| 238 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval && | 
|---|
| 239 | strncmp(mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval, "total", 5) == 0) | 
|---|
| 240 | return MAL_SUCCEED; /* already optimized */ | 
|---|
| 241 |  | 
|---|
| 242 | be = (backend *) c->sqlcontext; | 
|---|
| 243 | assert(be && be->mvc);	/* SQL clients should always have their state set */ | 
|---|
| 244 |  | 
|---|
| 245 | c->blkmode = 0; | 
|---|
| 246 | chkProgram(c->usermodule, mb); | 
|---|
| 247 |  | 
|---|
| 248 | /* | 
|---|
| 249 | * An error in the compilation should be reported to the user. | 
|---|
| 250 | * And if the debugging option is set, the debugger is called | 
|---|
| 251 | * to allow inspection. | 
|---|
| 252 | */ | 
|---|
| 253 | if (mb->errors) { | 
|---|
| 254 | if (c->listing) | 
|---|
| 255 | printFunction(c->fdout, mb, 0, c->listing); | 
|---|
| 256 | if (be->mvc->debug) { | 
|---|
| 257 | msg = runMALDebugger(c, c->curprg->def); | 
|---|
| 258 | if (msg != MAL_SUCCEED) | 
|---|
| 259 | freeException(msg); /* ignore error */ | 
|---|
| 260 | } | 
|---|
| 261 | return createException(MAL, "optimizer.optimizeQuery", "%s", mb->errors); | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | pipe = getSQLoptimizer(be->mvc); | 
|---|
| 265 | if( strcmp(pipe, "default_pipe") == 0 && strcmp(c->optimizer, "default_pipe") != 0) { | 
|---|
| 266 | if (!(pipe = GDKstrdup(c->optimizer))) | 
|---|
| 267 | throw(MAL, "sql.optimizeQuery", SQLSTATE(HY001) MAL_MALLOC_FAIL); | 
|---|
| 268 | free_pipe = true; | 
|---|
| 269 | } | 
|---|
| 270 |  | 
|---|
| 271 | msg = addOptimizers(c, mb, pipe, FALSE); | 
|---|
| 272 | if (free_pipe) | 
|---|
| 273 | GDKfree(pipe); | 
|---|
| 274 | if (msg) | 
|---|
| 275 | return msg; | 
|---|
| 276 | mb->keephistory |= be->mvc->emod & mod_debug; | 
|---|
| 277 | msg = optimizeMALBlock(c, mb); | 
|---|
| 278 | return msg; | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | /* queries are added to the MAL catalog  under the client session namespace */ | 
|---|
| 282 | void | 
|---|
| 283 | SQLaddQueryToCache(Client c) | 
|---|
| 284 | { | 
|---|
| 285 | insertSymbol(c->usermodule, c->curprg); | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|