1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * N. Nes, M.L. Kersten |
11 | * The queries are stored in the user cache after they have been |
12 | * type checked and optimized. |
13 | */ |
14 | #include "monetdb_config.h" |
15 | #include "mal_builder.h" |
16 | #include "mal_debugger.h" |
17 | #include "mal_runtime.h" |
18 | #include "opt_prelude.h" |
19 | #include "sql_mvc.h" |
20 | #include "sql_optimizer.h" |
21 | #include "sql_scenario.h" |
22 | #include "sql_gencode.h" |
23 | #include "opt_pipes.h" |
24 | |
25 | /* calculate the footprint for optimizer pipe line choices |
26 | * and identify empty columns upfront for just in time optimizers. |
27 | */ |
28 | static lng |
29 | SQLgetColumnSize(sql_trans *tr, sql_column *c, int access) |
30 | { |
31 | lng size = 0; |
32 | BAT *b; |
33 | switch(access){ |
34 | case 0: |
35 | b= store_funcs.bind_col(tr, c, RDONLY); |
36 | if (b) { |
37 | size += getBatSpace(b); |
38 | BBPunfix(b->batCacheid); |
39 | } |
40 | break; |
41 | case 1: |
42 | b = store_funcs.bind_col(tr, c, RD_INS); |
43 | if (b) { |
44 | size+= getBatSpace(b); |
45 | BBPunfix(b->batCacheid); |
46 | } |
47 | break; |
48 | case 2: |
49 | b = store_funcs.bind_col(tr, c, RD_UPD_VAL); |
50 | if (b) { |
51 | size += getBatSpace(b); |
52 | BBPunfix(b->batCacheid); |
53 | } |
54 | b = store_funcs.bind_col(tr, c, RD_UPD_ID); |
55 | if (b) { |
56 | size+= getBatSpace(b); |
57 | BBPunfix(b->batCacheid); |
58 | } |
59 | } |
60 | return size; |
61 | } |
62 | |
63 | /* |
64 | * The maximal space occupied by a query is calculated |
65 | * under the assumption that the complete database should fit in memory. |
66 | * The assumption is that the plan does not contain duplicate bind operations. |
67 | * Calculation of the precise footprint is much more complex |
68 | * and can not deal with intermediate structures, or fast |
69 | * access using sorted probing. |
70 | * |
71 | * A run where we only take the size of a table only once, |
72 | * caused major degration on SF100 Q3 with SSD(>6x) |
73 | */ |
74 | |
75 | static lng |
76 | SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare) |
77 | { |
78 | sql_trans *tr = m->session->tr; |
79 | lng size,space = 0, i; |
80 | str lasttable = 0; |
81 | |
82 | for (i = 0; i < mb->stop; i++) { |
83 | InstrPtr p = mb->stmt[i]; |
84 | |
85 | /* now deal with the update binds, it is only necessary to identify that there are updats |
86 | * The actual size is not that important */ |
87 | if (getModuleId(p) == sqlRef && getFunctionId(p) == bindRef && p->retc <= 2){ |
88 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; |
89 | char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; |
90 | char *cname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; |
91 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; |
92 | sql_schema *s = mvc_bind_schema(m, sname); |
93 | sql_table *t = 0; |
94 | sql_column *c = 0; |
95 | |
96 | if (!s || strcmp(s->base.name, dt_schema) == 0) |
97 | continue; |
98 | t = mvc_bind_table(m, s, tname); |
99 | if (!t) |
100 | continue; |
101 | c = mvc_bind_column(m, t, cname); |
102 | if (!s) |
103 | continue; |
104 | |
105 | /* we have to sum the cost of all three components of a BAT */ |
106 | if (c && (!isRemote(c->t) && !isMergeTable(c->t)) && (lasttable == 0 || strcmp(lasttable,tname)==0)) { |
107 | size = SQLgetColumnSize(tr, c, access); |
108 | space += size; // accumulate once per table |
109 | //lasttable = tname; invalidate this attempt |
110 | if( !prepare && size == 0 && ! t->system){ |
111 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size); |
112 | setFunctionId(p, emptybindRef); |
113 | } |
114 | } |
115 | } |
116 | if (getModuleId(p) == sqlRef && (getFunctionId(p) == bindidxRef)) { |
117 | char *sname = getVarConstant(mb, getArg(p, 1 + p->retc)).val.sval; |
118 | //char *tname = getVarConstant(mb, getArg(p, 2 + p->retc)).val.sval; |
119 | char *idxname = getVarConstant(mb, getArg(p, 3 + p->retc)).val.sval; |
120 | int access = getVarConstant(mb, getArg(p, 4 + p->retc)).val.ival; |
121 | sql_schema *s = mvc_bind_schema(m, sname); |
122 | BAT *b; |
123 | |
124 | if (getFunctionId(p) == bindidxRef) { |
125 | sql_idx *i = mvc_bind_idx(m, s, idxname); |
126 | |
127 | if (i && (!isRemote(i->t) && !isMergeTable(i->t))) { |
128 | b = store_funcs.bind_idx(tr, i, RDONLY); |
129 | if (b) { |
130 | space += (size =getBatSpace(b)); |
131 | if (!size) { |
132 | sql_column *c = i->t->columns.set->h->data; |
133 | size = SQLgetColumnSize(tr, c, access); |
134 | } |
135 | |
136 | if( !prepare && size == 0 && ! i->t->system){ |
137 | setFunctionId(p, emptybindidxRef); |
138 | //fprintf(stderr,"found empty column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,idxname,prepare,size); |
139 | } |
140 | BBPunfix(b->batCacheid); |
141 | } |
142 | } |
143 | } |
144 | } |
145 | } |
146 | return space; |
147 | } |
148 | |
149 | /* gather the optimizer pipeline defined in the current session */ |
150 | str |
151 | getSQLoptimizer(mvc *m) |
152 | { |
153 | char *opt = stack_get_string(m, "optimizer" ); |
154 | char *pipe = "default_pipe" ; |
155 | |
156 | if (opt) |
157 | pipe = opt; |
158 | return pipe; |
159 | } |
160 | |
161 | static str |
162 | addOptimizers(Client c, MalBlkPtr mb, char *pipe, int prepare) |
163 | { |
164 | int i; |
165 | InstrPtr q; |
166 | backend *be; |
167 | str msg= MAL_SUCCEED; |
168 | |
169 | be = (backend *) c->sqlcontext; |
170 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
171 | |
172 | (void) SQLgetSpace(be->mvc, mb, prepare); // detect empty bats. |
173 | /* The volcano optimizer seems relevant for traditional HDD settings. |
174 | * It produced about 8 % improvement onf TPCH SF 100 on a 16G machine. |
175 | * In a SSD setting it was counter productive, leading to worse parallel behavior. |
176 | * The automatic switch to volcano is now disabled assuming more use of SSD. |
177 | * The volcano optimizer pipeline can be used instead |
178 | if(space && (pipe == NULL || strcmp(pipe,"default_pipe")== 0)){ |
179 | if( space > (lng)(0.8 * MT_npages() * MT_pagesize()) && GDKnr_threads > 1){ |
180 | pipe = "volcano_pipe"; |
181 | //fprintf(stderr, "#use volcano optimizer pipeline? %zu\n", space); |
182 | }else |
183 | pipe = "default_pipe"; |
184 | } else |
185 | */ |
186 | pipe = pipe? pipe: "default_pipe" ; |
187 | msg = addOptimizerPipe(c, mb, pipe); |
188 | if (msg){ |
189 | return msg; |
190 | } |
191 | mb->keephistory |= be->mvc->emod & mod_debug; |
192 | if (be->mvc->no_mitosis) { |
193 | for (i = mb->stop - 1; i > 0; i--) { |
194 | q = getInstrPtr(mb, i); |
195 | if (q->token == ENDsymbol) |
196 | break; |
197 | if (getFunctionId(q) == mitosisRef || getFunctionId(q) == dataflowRef) |
198 | q->token = REMsymbol; /* they are ignored */ |
199 | } |
200 | } |
201 | addtoMalBlkHistory(mb); |
202 | return msg; |
203 | } |
204 | |
205 | /* Queries that should rely on the latest consolidated state |
206 | * are not allowed to remove sql.binds operations. |
207 | */ |
208 | |
209 | str |
210 | SQLoptimizeFunction(Client c, MalBlkPtr mb) |
211 | { |
212 | str msg; |
213 | str pipe; |
214 | backend *be = (backend *) c->sqlcontext; |
215 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
216 | |
217 | pipe = getSQLoptimizer(be->mvc); |
218 | msg = addOptimizers(c, mb, pipe, TRUE); |
219 | if (msg) |
220 | return msg; |
221 | mb->keephistory |= be->mvc->emod & mod_debug; |
222 | msg = optimizeMALBlock(c, mb); |
223 | mb->keephistory = FALSE; |
224 | return msg; |
225 | } |
226 | |
227 | str |
228 | SQLoptimizeQuery(Client c, MalBlkPtr mb) |
229 | { |
230 | backend *be; |
231 | str msg = 0, pipe = 0; |
232 | bool free_pipe = false; |
233 | |
234 | if (mb->stop > 0 && |
235 | mb->stmt[mb->stop-1]->token == REMsymbol && |
236 | mb->stmt[mb->stop-1]->argc > 0 && |
237 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.vtype == TYPE_str && |
238 | mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval && |
239 | strncmp(mb->var[mb->stmt[mb->stop-1]->argv[0]].value.val.sval, "total" , 5) == 0) |
240 | return MAL_SUCCEED; /* already optimized */ |
241 | |
242 | be = (backend *) c->sqlcontext; |
243 | assert(be && be->mvc); /* SQL clients should always have their state set */ |
244 | |
245 | c->blkmode = 0; |
246 | chkProgram(c->usermodule, mb); |
247 | |
248 | /* |
249 | * An error in the compilation should be reported to the user. |
250 | * And if the debugging option is set, the debugger is called |
251 | * to allow inspection. |
252 | */ |
253 | if (mb->errors) { |
254 | if (c->listing) |
255 | printFunction(c->fdout, mb, 0, c->listing); |
256 | if (be->mvc->debug) { |
257 | msg = runMALDebugger(c, c->curprg->def); |
258 | if (msg != MAL_SUCCEED) |
259 | freeException(msg); /* ignore error */ |
260 | } |
261 | return createException(MAL, "optimizer.optimizeQuery" , "%s" , mb->errors); |
262 | } |
263 | |
264 | pipe = getSQLoptimizer(be->mvc); |
265 | if( strcmp(pipe, "default_pipe" ) == 0 && strcmp(c->optimizer, "default_pipe" ) != 0) { |
266 | if (!(pipe = GDKstrdup(c->optimizer))) |
267 | throw(MAL, "sql.optimizeQuery" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
268 | free_pipe = true; |
269 | } |
270 | |
271 | msg = addOptimizers(c, mb, pipe, FALSE); |
272 | if (free_pipe) |
273 | GDKfree(pipe); |
274 | if (msg) |
275 | return msg; |
276 | mb->keephistory |= be->mvc->emod & mod_debug; |
277 | msg = optimizeMALBlock(c, mb); |
278 | return msg; |
279 | } |
280 | |
281 | /* queries are added to the MAL catalog under the client session namespace */ |
282 | void |
283 | SQLaddQueryToCache(Client c) |
284 | { |
285 | insertSymbol(c->usermodule, c->curprg); |
286 | } |
287 | |