1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @f run_memo
11 * @a M. Kersten
12 * @+ Memo-based Query Execution
13 * Modern cost-based query optimizers use a memo structure
14 * to organize the search space for an efficient query execution plan.
15 * For example, consider an oid join path 'A.B.C.D'.
16 * We can start the evaluation at any point in this path.
17 *
18 * Its memo structure can be represented by a (large) MAL program.
19 * The memo levels are encapsulated with a @code{choice} operator.
20 * The arguments of the second dictate which instructions to consider
21 * for cost evaluation.
22 * @example
23 * ...
24 * scheduler.choice("getVolume");
25 * T1:= algebra.join(A,B);
26 * T2:= algebra.join(B,C);
27 * T3:= algebra.join(C,D);
28 * scheduler.choice("getVolume",T1,T2,T3);
29 * T4:= algebra.join(T1,C);
30 * T5:= algebra.join(A,T2);
31 * T6:= algebra.join(T2,D);
32 * T7:= algebra.join(B,T3);
33 * T8:= algebra.join(C,D);
34 * scheduler.choice("getVolume",T4,T5,T6,T7,T8);
35 * T9:= algebra.join(T4,D);
36 * T10:= algebra.join(T5,D);
37 * T11:= algebra.join(A,T6);
38 * T12:= algebra.join(A,T7);
39 * T13:= algebra.join(T1,T8);
40 * scheduler.choice("getVolume",T9,T10,T11,T12,T13);
41 * answer:= scheduler.pick(T9, T10, T11, T12, T13);
42 * @end example
43 *
44 * The @code{scheduler.choice()} operator calls a builtin @code{getVolume}
45 * for each target variable and expects an integer-valued cost.
46 * In this case it returns the total number of bytes uses as arguments.
47 *
48 * The target variable with the lowest
49 * cost is chosen for execution and remaining variables are turned into
50 * a temporary NOOP operation.(You may want to re-use the memo)
51 * They are skipped by the interpreter, but also in subsequent
52 * calls to the scheduler. It reduces the alternatives as we proceed
53 * in the plan.
54 *
55 * A built-in naive cost function is used.
56 * It would be nice if the user could provide a
57 * private cost function defined as a @code{pattern}
58 * with a polymorphic argument for the target and a @code{:lng} result.
59 * Its implementation can use the complete context information to
60 * make a decision. For example, it can trace the potential use
61 * of the target variable in subsequent statements to determine
62 * a total cost when this step is taken towards the final result.
63 *
64 * A complete plan likely includes other expressions to
65 * prepare or use the target variables before reaching the next
66 * choice point. It is the task of the choice operator
67 * to avoid any superfluous operation.
68 *
69 * The MAL block should be privately owned by the caller,
70 * which can be assured with @code{scheduler.isolation()}.
71 *
72 * A refinement of the scheme is to make cost analysis
73 * part of the plan as well. Then you don't have to
74 * include a hardwired cost function.
75 * @example
76 * Acost:= aggr.count(A);
77 * Bcost:= aggr.count(B);
78 * Ccost:= aggr.count(C);
79 * T1cost:= Acost+Bcost;
80 * T2cost:= Bcost+Ccost;
81 * T3cost:= Ccost+Dcost;
82 * scheduler.choice(T1cost,T1, T2cost,T2, T3cost,T3);
83 * T1:= algebra.join(A,B);
84 * T2:= algebra.join(B,C);
85 * T3:= algebra.join(C,D);
86 * ...
87 * @end example
88 * The current implementation assumes a regular plan
89 * and unique use of variables.
90 */
91/*
92 * @+ Memorun implementation
93 * The code below is a mixture of generic routines and
94 * sample implementations to run the tests.
95 */
96#include "monetdb_config.h"
97#include "run_memo.h"
98#include "mal_runtime.h"
99
100static void
101propagateNonTarget(MalBlkPtr mb, int pc)
102{
103 int i;
104 InstrPtr p;
105 str scheduler = putName("scheduler");
106
107 for (; pc < mb->stop; pc++) {
108 p = getInstrPtr(mb, pc);
109 if (getModuleId(p) == scheduler)
110 continue;
111 for (i = 0; i < p->argc; i++)
112 if (isVarDisabled(mb, getArg(p, i)) && p->token >= 0)
113 p->token = -p->token; /* temporary NOOP */
114 if (p->token < 0)
115 for (i = 0; i < p->retc; i++)
116 setVarDisabled(mb, getArg(p, i));
117 }
118}
119/*
120 * THe choice operator first searches the next one to identify
121 * the fragment to be optimized and to gain access to the variables
122 * without the need to declare them upfront.
123 */
124str
125RUNchoice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
126{
127 int target;
128 lng cost, mincost;
129 int i, j, pc;
130 char *nme;
131 InstrPtr q;
132
133 pc = getPC(mb, p);
134 for (i = pc + 1; i < mb->stop; i++) {
135 q = getInstrPtr(mb, i);
136 if (getModuleId(p) == getModuleId(q) &&
137 getFunctionId(p) == getFunctionId(q)) {
138 p = q;
139 break;
140 }
141 }
142 if (i == mb->stop)
143 return MAL_SUCCEED;
144 target = getArg(p, 2);
145 if (getArgType(mb, p, 1) == TYPE_int && p->argc >= 3 && (p->argc - 1) % 2 == 0) {
146 /* choice pairs */
147 mincost = *getArgReference_int(stk, p, 1);
148 for (i = 3; i < p->argc; i += 2) {
149 cost = *getArgReference_int(stk, p, i);
150 if (cost < mincost && !isVarDisabled(mb, getArg(p, i + 1))) {
151 mincost = cost;
152 target = getArg(p, i + 1);
153 }
154 }
155 } else if (getArgType(mb, p, 1) == TYPE_str) {
156 nme = *getArgReference_str(stk, p, 1);
157 /* should be generalized to allow an arbitrary user defined function */
158 if (strcmp(nme, "getVolume") != 0)
159 throw(MAL, "scheduler.choice", ILLEGAL_ARGUMENT "Illegal cost function");
160
161 mincost = -1;
162 for (j = 2; j < p->argc; j++) {
163 if (!isVarDisabled(mb, getArg(p, j)))
164 for (i = pc + 1; i < mb->stop; i++) {
165 InstrPtr q = getInstrPtr(mb, i);
166 if (p->token >= 0 && getArg(q, 0) == getArg(p, j)) {
167 cost = getVolume(stk, q, 1);
168 if (cost > 0 && (cost < mincost || mincost == -1)) {
169 mincost = cost;
170 target = getArg(p, j);
171 }
172 break;
173 }
174 }
175
176 }
177 }
178#ifdef DEBUG_RUN_MEMORUN
179 fprintf(stderr, "#function target %s cost %d\n", getVarName(mb, target), mincost);
180#else
181 (void) cntxt;
182#endif
183 /* remove non-qualifying variables */
184 for (i = 2; i < p->argc; i += 2)
185 if (getArg(p, i) != target) {
186 setVarDisabled(mb, getArg(p, i - 1));
187 setVarDisabled(mb, getArg(p, i));
188 }
189
190 propagateNonTarget(mb, pc + 1);
191#ifdef DEBUG_RUN_MEMORUN
192 fprintf(stderr, "#cost choice selected %s %d\n",
193 getVarName(mb, target), mincost);
194 fprintFunction(stderr, mb, 1);
195#endif
196 return MAL_SUCCEED;
197}
198/*
199 * At the end of the query plan we save the result in
200 * a separate variable.
201 */
202str
203RUNpickResult(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
204{
205 ValPtr lhs, rhs;
206 int i;
207
208 (void) cntxt;
209 lhs = &stk->stk[getArg(p, 0)];
210 for (i = p->retc; i < p->argc; i++)
211 if (!isVarDisabled(mb, getArg(p, i))) {
212 rhs = &stk->stk[getArg(p, i)];
213 if ((rhs)->vtype < TYPE_str)
214 *lhs = *rhs;
215 else if (VALcopy(lhs, rhs) == NULL)
216 throw(MAL, "scheduler.pick", SQLSTATE(HY001) MAL_MALLOC_FAIL);
217 if (lhs->vtype == TYPE_bat)
218 BBPretain(lhs->val.bval);
219 return MAL_SUCCEED;
220 }
221
222 throw(MAL, "scheduler.pick", OPERATION_FAILED "No result available");
223}
224/*
225 * The routine below calculates a cost based on the BAT volume in bytes.
226 * The MAL compiler ensures that all arguments have been
227 * assigned a value.
228 */
229str
230RUNvolumeCost(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
231{
232 lng *cost = getArgReference_lng(stk, p, 0);
233 (void) mb;
234
235 (void) cntxt;
236 *cost = getVolume(stk, p, 0); /* calculate total input size */
237 return MAL_SUCCEED;
238}
239/*
240 * The second example shows how you can look into the remaining
241 * instructions to assess the total cost if you follow the path
242 * starting at the argument given.
243 */
244str
245RUNcostPrediction(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
246{
247 lng *cost = getArgReference_lng(stk, p, 0);
248 (void) mb;
249 (void) cntxt;
250
251 *cost = 0;
252 return MAL_SUCCEED;
253}
254
255