1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @f run_memo |
11 | * @a M. Kersten |
12 | * @+ Memo-based Query Execution |
13 | * Modern cost-based query optimizers use a memo structure |
14 | * to organize the search space for an efficient query execution plan. |
15 | * For example, consider an oid join path 'A.B.C.D'. |
16 | * We can start the evaluation at any point in this path. |
17 | * |
18 | * Its memo structure can be represented by a (large) MAL program. |
19 | * The memo levels are encapsulated with a @code{choice} operator. |
20 | * The arguments of the second dictate which instructions to consider |
21 | * for cost evaluation. |
22 | * @example |
23 | * ... |
24 | * scheduler.choice("getVolume"); |
25 | * T1:= algebra.join(A,B); |
26 | * T2:= algebra.join(B,C); |
27 | * T3:= algebra.join(C,D); |
28 | * scheduler.choice("getVolume",T1,T2,T3); |
29 | * T4:= algebra.join(T1,C); |
30 | * T5:= algebra.join(A,T2); |
31 | * T6:= algebra.join(T2,D); |
32 | * T7:= algebra.join(B,T3); |
33 | * T8:= algebra.join(C,D); |
34 | * scheduler.choice("getVolume",T4,T5,T6,T7,T8); |
35 | * T9:= algebra.join(T4,D); |
36 | * T10:= algebra.join(T5,D); |
37 | * T11:= algebra.join(A,T6); |
38 | * T12:= algebra.join(A,T7); |
39 | * T13:= algebra.join(T1,T8); |
40 | * scheduler.choice("getVolume",T9,T10,T11,T12,T13); |
41 | * answer:= scheduler.pick(T9, T10, T11, T12, T13); |
42 | * @end example |
43 | * |
44 | * The @code{scheduler.choice()} operator calls a builtin @code{getVolume} |
45 | * for each target variable and expects an integer-valued cost. |
46 | * In this case it returns the total number of bytes uses as arguments. |
47 | * |
48 | * The target variable with the lowest |
49 | * cost is chosen for execution and remaining variables are turned into |
50 | * a temporary NOOP operation.(You may want to re-use the memo) |
51 | * They are skipped by the interpreter, but also in subsequent |
52 | * calls to the scheduler. It reduces the alternatives as we proceed |
53 | * in the plan. |
54 | * |
55 | * A built-in naive cost function is used. |
56 | * It would be nice if the user could provide a |
57 | * private cost function defined as a @code{pattern} |
58 | * with a polymorphic argument for the target and a @code{:lng} result. |
59 | * Its implementation can use the complete context information to |
60 | * make a decision. For example, it can trace the potential use |
61 | * of the target variable in subsequent statements to determine |
62 | * a total cost when this step is taken towards the final result. |
63 | * |
64 | * A complete plan likely includes other expressions to |
65 | * prepare or use the target variables before reaching the next |
66 | * choice point. It is the task of the choice operator |
67 | * to avoid any superfluous operation. |
68 | * |
69 | * The MAL block should be privately owned by the caller, |
70 | * which can be assured with @code{scheduler.isolation()}. |
71 | * |
72 | * A refinement of the scheme is to make cost analysis |
73 | * part of the plan as well. Then you don't have to |
74 | * include a hardwired cost function. |
75 | * @example |
76 | * Acost:= aggr.count(A); |
77 | * Bcost:= aggr.count(B); |
78 | * Ccost:= aggr.count(C); |
79 | * T1cost:= Acost+Bcost; |
80 | * T2cost:= Bcost+Ccost; |
81 | * T3cost:= Ccost+Dcost; |
82 | * scheduler.choice(T1cost,T1, T2cost,T2, T3cost,T3); |
83 | * T1:= algebra.join(A,B); |
84 | * T2:= algebra.join(B,C); |
85 | * T3:= algebra.join(C,D); |
86 | * ... |
87 | * @end example |
88 | * The current implementation assumes a regular plan |
89 | * and unique use of variables. |
90 | */ |
91 | /* |
92 | * @+ Memorun implementation |
93 | * The code below is a mixture of generic routines and |
94 | * sample implementations to run the tests. |
95 | */ |
96 | #include "monetdb_config.h" |
97 | #include "run_memo.h" |
98 | #include "mal_runtime.h" |
99 | |
100 | static void |
101 | propagateNonTarget(MalBlkPtr mb, int pc) |
102 | { |
103 | int i; |
104 | InstrPtr p; |
105 | str scheduler = putName("scheduler" ); |
106 | |
107 | for (; pc < mb->stop; pc++) { |
108 | p = getInstrPtr(mb, pc); |
109 | if (getModuleId(p) == scheduler) |
110 | continue; |
111 | for (i = 0; i < p->argc; i++) |
112 | if (isVarDisabled(mb, getArg(p, i)) && p->token >= 0) |
113 | p->token = -p->token; /* temporary NOOP */ |
114 | if (p->token < 0) |
115 | for (i = 0; i < p->retc; i++) |
116 | setVarDisabled(mb, getArg(p, i)); |
117 | } |
118 | } |
119 | /* |
120 | * THe choice operator first searches the next one to identify |
121 | * the fragment to be optimized and to gain access to the variables |
122 | * without the need to declare them upfront. |
123 | */ |
124 | str |
125 | RUNchoice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
126 | { |
127 | int target; |
128 | lng cost, mincost; |
129 | int i, j, pc; |
130 | char *nme; |
131 | InstrPtr q; |
132 | |
133 | pc = getPC(mb, p); |
134 | for (i = pc + 1; i < mb->stop; i++) { |
135 | q = getInstrPtr(mb, i); |
136 | if (getModuleId(p) == getModuleId(q) && |
137 | getFunctionId(p) == getFunctionId(q)) { |
138 | p = q; |
139 | break; |
140 | } |
141 | } |
142 | if (i == mb->stop) |
143 | return MAL_SUCCEED; |
144 | target = getArg(p, 2); |
145 | if (getArgType(mb, p, 1) == TYPE_int && p->argc >= 3 && (p->argc - 1) % 2 == 0) { |
146 | /* choice pairs */ |
147 | mincost = *getArgReference_int(stk, p, 1); |
148 | for (i = 3; i < p->argc; i += 2) { |
149 | cost = *getArgReference_int(stk, p, i); |
150 | if (cost < mincost && !isVarDisabled(mb, getArg(p, i + 1))) { |
151 | mincost = cost; |
152 | target = getArg(p, i + 1); |
153 | } |
154 | } |
155 | } else if (getArgType(mb, p, 1) == TYPE_str) { |
156 | nme = *getArgReference_str(stk, p, 1); |
157 | /* should be generalized to allow an arbitrary user defined function */ |
158 | if (strcmp(nme, "getVolume" ) != 0) |
159 | throw(MAL, "scheduler.choice" , ILLEGAL_ARGUMENT "Illegal cost function" ); |
160 | |
161 | mincost = -1; |
162 | for (j = 2; j < p->argc; j++) { |
163 | if (!isVarDisabled(mb, getArg(p, j))) |
164 | for (i = pc + 1; i < mb->stop; i++) { |
165 | InstrPtr q = getInstrPtr(mb, i); |
166 | if (p->token >= 0 && getArg(q, 0) == getArg(p, j)) { |
167 | cost = getVolume(stk, q, 1); |
168 | if (cost > 0 && (cost < mincost || mincost == -1)) { |
169 | mincost = cost; |
170 | target = getArg(p, j); |
171 | } |
172 | break; |
173 | } |
174 | } |
175 | |
176 | } |
177 | } |
178 | #ifdef DEBUG_RUN_MEMORUN |
179 | fprintf(stderr, "#function target %s cost %d\n" , getVarName(mb, target), mincost); |
180 | #else |
181 | (void) cntxt; |
182 | #endif |
183 | /* remove non-qualifying variables */ |
184 | for (i = 2; i < p->argc; i += 2) |
185 | if (getArg(p, i) != target) { |
186 | setVarDisabled(mb, getArg(p, i - 1)); |
187 | setVarDisabled(mb, getArg(p, i)); |
188 | } |
189 | |
190 | propagateNonTarget(mb, pc + 1); |
191 | #ifdef DEBUG_RUN_MEMORUN |
192 | fprintf(stderr, "#cost choice selected %s %d\n" , |
193 | getVarName(mb, target), mincost); |
194 | fprintFunction(stderr, mb, 1); |
195 | #endif |
196 | return MAL_SUCCEED; |
197 | } |
198 | /* |
199 | * At the end of the query plan we save the result in |
200 | * a separate variable. |
201 | */ |
202 | str |
203 | RUNpickResult(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
204 | { |
205 | ValPtr lhs, rhs; |
206 | int i; |
207 | |
208 | (void) cntxt; |
209 | lhs = &stk->stk[getArg(p, 0)]; |
210 | for (i = p->retc; i < p->argc; i++) |
211 | if (!isVarDisabled(mb, getArg(p, i))) { |
212 | rhs = &stk->stk[getArg(p, i)]; |
213 | if ((rhs)->vtype < TYPE_str) |
214 | *lhs = *rhs; |
215 | else if (VALcopy(lhs, rhs) == NULL) |
216 | throw(MAL, "scheduler.pick" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
217 | if (lhs->vtype == TYPE_bat) |
218 | BBPretain(lhs->val.bval); |
219 | return MAL_SUCCEED; |
220 | } |
221 | |
222 | throw(MAL, "scheduler.pick" , OPERATION_FAILED "No result available" ); |
223 | } |
224 | /* |
225 | * The routine below calculates a cost based on the BAT volume in bytes. |
226 | * The MAL compiler ensures that all arguments have been |
227 | * assigned a value. |
228 | */ |
229 | str |
230 | RUNvolumeCost(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
231 | { |
232 | lng *cost = getArgReference_lng(stk, p, 0); |
233 | (void) mb; |
234 | |
235 | (void) cntxt; |
236 | *cost = getVolume(stk, p, 0); /* calculate total input size */ |
237 | return MAL_SUCCEED; |
238 | } |
239 | /* |
240 | * The second example shows how you can look into the remaining |
241 | * instructions to assess the total cost if you follow the path |
242 | * starting at the argument given. |
243 | */ |
244 | str |
245 | RUNcostPrediction(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
246 | { |
247 | lng *cost = getArgReference_lng(stk, p, 0); |
248 | (void) mb; |
249 | (void) cntxt; |
250 | |
251 | *cost = 0; |
252 | return MAL_SUCCEED; |
253 | } |
254 | |
255 | |