| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * @f run_memo |
| 11 | * @a M. Kersten |
| 12 | * @+ Memo-based Query Execution |
| 13 | * Modern cost-based query optimizers use a memo structure |
| 14 | * to organize the search space for an efficient query execution plan. |
| 15 | * For example, consider an oid join path 'A.B.C.D'. |
| 16 | * We can start the evaluation at any point in this path. |
| 17 | * |
| 18 | * Its memo structure can be represented by a (large) MAL program. |
| 19 | * The memo levels are encapsulated with a @code{choice} operator. |
| 20 | * The arguments of the second dictate which instructions to consider |
| 21 | * for cost evaluation. |
| 22 | * @example |
| 23 | * ... |
| 24 | * scheduler.choice("getVolume"); |
| 25 | * T1:= algebra.join(A,B); |
| 26 | * T2:= algebra.join(B,C); |
| 27 | * T3:= algebra.join(C,D); |
| 28 | * scheduler.choice("getVolume",T1,T2,T3); |
| 29 | * T4:= algebra.join(T1,C); |
| 30 | * T5:= algebra.join(A,T2); |
| 31 | * T6:= algebra.join(T2,D); |
| 32 | * T7:= algebra.join(B,T3); |
| 33 | * T8:= algebra.join(C,D); |
| 34 | * scheduler.choice("getVolume",T4,T5,T6,T7,T8); |
| 35 | * T9:= algebra.join(T4,D); |
| 36 | * T10:= algebra.join(T5,D); |
| 37 | * T11:= algebra.join(A,T6); |
| 38 | * T12:= algebra.join(A,T7); |
| 39 | * T13:= algebra.join(T1,T8); |
| 40 | * scheduler.choice("getVolume",T9,T10,T11,T12,T13); |
| 41 | * answer:= scheduler.pick(T9, T10, T11, T12, T13); |
| 42 | * @end example |
| 43 | * |
| 44 | * The @code{scheduler.choice()} operator calls a builtin @code{getVolume} |
| 45 | * for each target variable and expects an integer-valued cost. |
| 46 | * In this case it returns the total number of bytes uses as arguments. |
| 47 | * |
| 48 | * The target variable with the lowest |
| 49 | * cost is chosen for execution and remaining variables are turned into |
| 50 | * a temporary NOOP operation.(You may want to re-use the memo) |
| 51 | * They are skipped by the interpreter, but also in subsequent |
| 52 | * calls to the scheduler. It reduces the alternatives as we proceed |
| 53 | * in the plan. |
| 54 | * |
| 55 | * A built-in naive cost function is used. |
| 56 | * It would be nice if the user could provide a |
| 57 | * private cost function defined as a @code{pattern} |
| 58 | * with a polymorphic argument for the target and a @code{:lng} result. |
| 59 | * Its implementation can use the complete context information to |
| 60 | * make a decision. For example, it can trace the potential use |
| 61 | * of the target variable in subsequent statements to determine |
| 62 | * a total cost when this step is taken towards the final result. |
| 63 | * |
| 64 | * A complete plan likely includes other expressions to |
| 65 | * prepare or use the target variables before reaching the next |
| 66 | * choice point. It is the task of the choice operator |
| 67 | * to avoid any superfluous operation. |
| 68 | * |
| 69 | * The MAL block should be privately owned by the caller, |
| 70 | * which can be assured with @code{scheduler.isolation()}. |
| 71 | * |
| 72 | * A refinement of the scheme is to make cost analysis |
| 73 | * part of the plan as well. Then you don't have to |
| 74 | * include a hardwired cost function. |
| 75 | * @example |
| 76 | * Acost:= aggr.count(A); |
| 77 | * Bcost:= aggr.count(B); |
| 78 | * Ccost:= aggr.count(C); |
| 79 | * T1cost:= Acost+Bcost; |
| 80 | * T2cost:= Bcost+Ccost; |
| 81 | * T3cost:= Ccost+Dcost; |
| 82 | * scheduler.choice(T1cost,T1, T2cost,T2, T3cost,T3); |
| 83 | * T1:= algebra.join(A,B); |
| 84 | * T2:= algebra.join(B,C); |
| 85 | * T3:= algebra.join(C,D); |
| 86 | * ... |
| 87 | * @end example |
| 88 | * The current implementation assumes a regular plan |
| 89 | * and unique use of variables. |
| 90 | */ |
| 91 | /* |
| 92 | * @+ Memorun implementation |
| 93 | * The code below is a mixture of generic routines and |
| 94 | * sample implementations to run the tests. |
| 95 | */ |
| 96 | #include "monetdb_config.h" |
| 97 | #include "run_memo.h" |
| 98 | #include "mal_runtime.h" |
| 99 | |
| 100 | static void |
| 101 | propagateNonTarget(MalBlkPtr mb, int pc) |
| 102 | { |
| 103 | int i; |
| 104 | InstrPtr p; |
| 105 | str scheduler = putName("scheduler" ); |
| 106 | |
| 107 | for (; pc < mb->stop; pc++) { |
| 108 | p = getInstrPtr(mb, pc); |
| 109 | if (getModuleId(p) == scheduler) |
| 110 | continue; |
| 111 | for (i = 0; i < p->argc; i++) |
| 112 | if (isVarDisabled(mb, getArg(p, i)) && p->token >= 0) |
| 113 | p->token = -p->token; /* temporary NOOP */ |
| 114 | if (p->token < 0) |
| 115 | for (i = 0; i < p->retc; i++) |
| 116 | setVarDisabled(mb, getArg(p, i)); |
| 117 | } |
| 118 | } |
| 119 | /* |
| 120 | * THe choice operator first searches the next one to identify |
| 121 | * the fragment to be optimized and to gain access to the variables |
| 122 | * without the need to declare them upfront. |
| 123 | */ |
| 124 | str |
| 125 | RUNchoice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
| 126 | { |
| 127 | int target; |
| 128 | lng cost, mincost; |
| 129 | int i, j, pc; |
| 130 | char *nme; |
| 131 | InstrPtr q; |
| 132 | |
| 133 | pc = getPC(mb, p); |
| 134 | for (i = pc + 1; i < mb->stop; i++) { |
| 135 | q = getInstrPtr(mb, i); |
| 136 | if (getModuleId(p) == getModuleId(q) && |
| 137 | getFunctionId(p) == getFunctionId(q)) { |
| 138 | p = q; |
| 139 | break; |
| 140 | } |
| 141 | } |
| 142 | if (i == mb->stop) |
| 143 | return MAL_SUCCEED; |
| 144 | target = getArg(p, 2); |
| 145 | if (getArgType(mb, p, 1) == TYPE_int && p->argc >= 3 && (p->argc - 1) % 2 == 0) { |
| 146 | /* choice pairs */ |
| 147 | mincost = *getArgReference_int(stk, p, 1); |
| 148 | for (i = 3; i < p->argc; i += 2) { |
| 149 | cost = *getArgReference_int(stk, p, i); |
| 150 | if (cost < mincost && !isVarDisabled(mb, getArg(p, i + 1))) { |
| 151 | mincost = cost; |
| 152 | target = getArg(p, i + 1); |
| 153 | } |
| 154 | } |
| 155 | } else if (getArgType(mb, p, 1) == TYPE_str) { |
| 156 | nme = *getArgReference_str(stk, p, 1); |
| 157 | /* should be generalized to allow an arbitrary user defined function */ |
| 158 | if (strcmp(nme, "getVolume" ) != 0) |
| 159 | throw(MAL, "scheduler.choice" , ILLEGAL_ARGUMENT "Illegal cost function" ); |
| 160 | |
| 161 | mincost = -1; |
| 162 | for (j = 2; j < p->argc; j++) { |
| 163 | if (!isVarDisabled(mb, getArg(p, j))) |
| 164 | for (i = pc + 1; i < mb->stop; i++) { |
| 165 | InstrPtr q = getInstrPtr(mb, i); |
| 166 | if (p->token >= 0 && getArg(q, 0) == getArg(p, j)) { |
| 167 | cost = getVolume(stk, q, 1); |
| 168 | if (cost > 0 && (cost < mincost || mincost == -1)) { |
| 169 | mincost = cost; |
| 170 | target = getArg(p, j); |
| 171 | } |
| 172 | break; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | } |
| 177 | } |
| 178 | #ifdef DEBUG_RUN_MEMORUN |
| 179 | fprintf(stderr, "#function target %s cost %d\n" , getVarName(mb, target), mincost); |
| 180 | #else |
| 181 | (void) cntxt; |
| 182 | #endif |
| 183 | /* remove non-qualifying variables */ |
| 184 | for (i = 2; i < p->argc; i += 2) |
| 185 | if (getArg(p, i) != target) { |
| 186 | setVarDisabled(mb, getArg(p, i - 1)); |
| 187 | setVarDisabled(mb, getArg(p, i)); |
| 188 | } |
| 189 | |
| 190 | propagateNonTarget(mb, pc + 1); |
| 191 | #ifdef DEBUG_RUN_MEMORUN |
| 192 | fprintf(stderr, "#cost choice selected %s %d\n" , |
| 193 | getVarName(mb, target), mincost); |
| 194 | fprintFunction(stderr, mb, 1); |
| 195 | #endif |
| 196 | return MAL_SUCCEED; |
| 197 | } |
| 198 | /* |
| 199 | * At the end of the query plan we save the result in |
| 200 | * a separate variable. |
| 201 | */ |
| 202 | str |
| 203 | RUNpickResult(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
| 204 | { |
| 205 | ValPtr lhs, rhs; |
| 206 | int i; |
| 207 | |
| 208 | (void) cntxt; |
| 209 | lhs = &stk->stk[getArg(p, 0)]; |
| 210 | for (i = p->retc; i < p->argc; i++) |
| 211 | if (!isVarDisabled(mb, getArg(p, i))) { |
| 212 | rhs = &stk->stk[getArg(p, i)]; |
| 213 | if ((rhs)->vtype < TYPE_str) |
| 214 | *lhs = *rhs; |
| 215 | else if (VALcopy(lhs, rhs) == NULL) |
| 216 | throw(MAL, "scheduler.pick" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 217 | if (lhs->vtype == TYPE_bat) |
| 218 | BBPretain(lhs->val.bval); |
| 219 | return MAL_SUCCEED; |
| 220 | } |
| 221 | |
| 222 | throw(MAL, "scheduler.pick" , OPERATION_FAILED "No result available" ); |
| 223 | } |
| 224 | /* |
| 225 | * The routine below calculates a cost based on the BAT volume in bytes. |
| 226 | * The MAL compiler ensures that all arguments have been |
| 227 | * assigned a value. |
| 228 | */ |
| 229 | str |
| 230 | RUNvolumeCost(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
| 231 | { |
| 232 | lng *cost = getArgReference_lng(stk, p, 0); |
| 233 | (void) mb; |
| 234 | |
| 235 | (void) cntxt; |
| 236 | *cost = getVolume(stk, p, 0); /* calculate total input size */ |
| 237 | return MAL_SUCCEED; |
| 238 | } |
| 239 | /* |
| 240 | * The second example shows how you can look into the remaining |
| 241 | * instructions to assess the total cost if you follow the path |
| 242 | * starting at the argument given. |
| 243 | */ |
| 244 | str |
| 245 | RUNcostPrediction(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
| 246 | { |
| 247 | lng *cost = getArgReference_lng(stk, p, 0); |
| 248 | (void) mb; |
| 249 | (void) cntxt; |
| 250 | |
| 251 | *cost = 0; |
| 252 | return MAL_SUCCEED; |
| 253 | } |
| 254 | |
| 255 | |