1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @a M. Kersten |
11 | * @* Scheduler framework |
12 | * A key design decision for this MonetDB version is to create a clean |
13 | * processing pipeline: compile-, optimize-, schedule- and execute. |
14 | * The MAL scheduler controls the actual execution of a program through a number of |
15 | * techniques described here. It accepts a program with the intend to execute it |
16 | * as soon as possible with the best possible behavior. |
17 | * |
18 | * Note that the scheduler should only work on temporary plans. Keeping an |
19 | * execution schedule in the symbol table calls for a sound administration |
20 | * to invalidate it. This assumed that such decisions are taken by a cooperative |
21 | * front-end compiler, e.g. SQL. For the remainder we assume a volatile MAL |
22 | * program is available. |
23 | * |
24 | * Recall that the MAL optimizer already had an opportunity to massage the program |
25 | * into something that has a better execution behavior. These optimizations |
26 | * necessarily rely on static (or stale) information, such as histograms, indices, |
27 | * and rewrite heuristics. It is the task of the scheduler to achieve this |
28 | * performance objective (or better), while at the same time guaranteeing |
29 | * great throughput. |
30 | * |
31 | * Unlike the optimizer, the scheduler has access to the calling parameters, |
32 | * runtime stack for the about-to-be-executed plan, and system resources, |
33 | * such as globally cached results. Moreover, a scheduler can be called at any time |
34 | * during a MAL interpreter loop to re-consider the actions to be taken. |
35 | * This gives many more opportunities to improve performance. |
36 | * This does not hold for e.g. compiled code. |
37 | * |
38 | * Much like the optimizer framework, there are many aspects you can deal with |
39 | * at scheduling time. The easiest is to simply do nothing and rely on |
40 | * the MAL interpreter, which is the default action when no scheduler directive |
41 | * is found. |
42 | * |
43 | * In most cases, however, you would like a scheduler to take a specific action. |
44 | * It typically rewrites, extends, or reshuffles the query plan as it is being |
45 | * prepared for execution or even during its execution. |
46 | * In this process it can use the optimizer infrastructure, provided the |
47 | * changes are isolated to a private instance. This should be prepared |
48 | * before the interpreter is started and requires care during execution to |
49 | * ensure proper behaviour. The upfront cost attached is to make a complete |
50 | * copy of the program code and to assure that the definitions in the |
51 | * symbol table remain intact. |
52 | * |
53 | * To make its life easy, it is assumed that all scheduler decisions to be |
54 | * taken before the back-end is called reside in the module 'scheduler'. |
55 | * Any action to be taken during execution is kept in module 'rescheduler'. |
56 | * |
57 | * What scheduler operators are provided? |
58 | * |
59 | * The generic scheduler presented here shows a series of techniques that |
60 | * might be useful in construction of your own scheduler. |
61 | * |
62 | * @emph{Run isolation (RUNisolated)} |
63 | * Goal: to isolate changes to the query plan from others |
64 | * Rationale: A scheduler may change the order, and actual function calls. |
65 | * These changes should be confined to a single execution. The next time around |
66 | * there may be a different situation to take care off. This is achieved by |
67 | * replacing the current program with a private copy. |
68 | * Note that this process may involve a deep analysis to identify the pieces |
69 | * needed for isolation, e.g. for SQL we would extract a copy from the SQL query |
70 | * cache using code expansion. |
71 | * Just massaging the call to the cached plan is not effective. |
72 | * Impact: cost of program copying may become an issue. |
73 | * |
74 | * @emph{Run trace (RUNtracing)} |
75 | * Goal: to collect performance data for either direct monitoring or post-analysis |
76 | * Rationale:The performance profiling option in the interpreter is overly expensive, |
77 | * because it is also called for simple statements. One way out of this, is to |
78 | * inject specific performance metric calls at places where it counts. |
79 | * For example, you could wrap calls calls to a specific module or operation. |
80 | * |
81 | * @emph{Run notification (RUNnotification)} |
82 | * Goal: to inject notification calls to awaiting servers |
83 | * Rationale: Operations on the database may require side effects to take place, |
84 | * such as activating a trigger (implemented as a factory) |
85 | * |
86 | * @emph{Update notification (RUNupdateNotification)} |
87 | * Goal: Updates to the Boxes may have to forwarded to stand-by replicas. |
88 | * Rationale: actually a refinement of the notification scheme, but geared at |
89 | * maintaining a replicated system. |
90 | * |
91 | * @emph{Materialized Execution Scheduler (MEscheduler)} |
92 | * Goal:to transfor the program into fragment-based processing |
93 | * Rationale: to reduce the impact of resource limitations, such |
94 | * as main memory, in the face of materialized intermediate. |
95 | * |
96 | * @emph{Run parallelization (RUNparallel)} |
97 | * Goal: to improve throughput/response time by exploiting parallel hardware |
98 | * |
99 | * @emph{Run stepping RUNstepping} |
100 | * goal: to return to the scheduler at specific points in the program |
101 | * execution. For example to re-consider the scheduling actions. |
102 | * Note that this requires a scheme to 'backtrack' in the scenario |
103 | * |
104 | * @emph{Cache management (RUNbatCaching} |
105 | * Goal: to capture expensive BAT operations and to keep them around for |
106 | * re-use. |
107 | * |
108 | * @emph{JIT compilation} |
109 | * Use the Mcc just in time compilation feature to derive and link |
110 | * a small C-program. |
111 | * |
112 | * What the scheduler could do more? |
113 | * A separete thread of control can be used to administer the |
114 | * information needed by concurrent scheduler calls. |
115 | * |
116 | * One of the areas to be decided upon is whether the scheduler is |
117 | * also the place to manage the stack spaces. It certainly could |
118 | * reconcile parallel executions by inspection of the stack. |
119 | * |
120 | * Upon starting the scheduler decisions, we should (regularly) |
121 | * refresh our notion on the availability of resources. |
122 | * This is kept around in a global structure and is the basis for |
123 | * micro scheduling decisions. |
124 | */ |
125 | |
126 | #include "monetdb_config.h" |
127 | #include "run_pipeline.h" |
128 | #include "mal_interpreter.h" /* for showErrors() */ |
129 | #include "opt_prelude.h" |
130 | #include "opt_macro.h" |
131 | |
132 | /* |
133 | * The implementation approach of the scheduler aligns with that of the |
134 | * optimizer. We look for specific scheduler module calls and act accordingly. |
135 | * The result should be a MAL block that can be executed by the corresponding |
136 | * engine. |
137 | */ |
138 | /* |
139 | * The second example is derived from the SQL environment, which |
140 | * produces two MAL functions: one (already) stored in the query cache |
141 | * and a program to be executed in the client record. |
142 | * The latter lives only for the duration of the call and need |
143 | * not be safeguarded. Thus, we immediately call the scheduler |
144 | * to expand the code from the query cache, propagating the |
145 | * actual parameters. |
146 | * The SQL specific scheduler is called to bind the table |
147 | * columns and to make statistics known as properties. |
148 | * This creates a situation where a cost-based optimizer could |
149 | * step in and re-arrange the plan, which is ignored for now. |
150 | * @example |
151 | * function sql_cache.qry01(int A1, bit A2); |
152 | * t1:= sql.bind("schema","tables",0,0); |
153 | * ... |
154 | * end qry01; |
155 | * |
156 | * function main(); |
157 | * scheduler.inline("sql_cache","qry01"); |
158 | * scheduler.sqlbind(); |
159 | * sql_cache.qry01(12,false); |
160 | * @end example |
161 | * |
162 | * These scheduler interventions are called for by the SQL compiler. |
163 | * It may have flagged the cached version as a high potential for |
164 | * dynamic optimization, something that is not known at the MAL level. |
165 | * [The current optimizer also generates a factory, that should be |
166 | * done here and upon explicit request] |
167 | */ |
168 | #if 0 |
169 | str |
170 | RUNinline(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
171 | { |
172 | Symbol qc; |
173 | str modnme = getVarConstant(mb, getArg(p, 1)).val.sval; |
174 | str fcnnme = getVarConstant(mb, getArg(p, 2)).val.sval; |
175 | |
176 | (void) stk; |
177 | (void) p; |
178 | qc = findSymbol(cntxt ->nspace, getName(modnme), |
179 | putName(fcnnme)); |
180 | |
181 | if (qc) |
182 | MACROprocessor(cntxt, mb, qc); |
183 | |
184 | return MAL_SUCCEED; |
185 | } |
186 | #endif |
187 | |
188 | /* |
189 | * @- |
190 | * The SQL scheduler is presented merely as an example. The real one |
191 | * is located in the SQL libraries for it needs specific information. |
192 | */ |
193 | #if 0 |
194 | str |
195 | RUNsqlbind(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) |
196 | { |
197 | int i; |
198 | str msg = MAL_SUCCEED; |
199 | Symbol sqlbind = findSymbol(cntxt ->nspace, getName("sql" ), getName("bind" )); |
200 | MALfcn f = NULL; |
201 | |
202 | if (sqlbind ) |
203 | f = getSignature(sqlbind)->fcn; |
204 | |
205 | if ( f ) |
206 | for (i = 0; i < mb->stop; i++) { |
207 | p = getInstrPtr(mb, i); |
208 | if (p->fcn == f) { |
209 | if ((msg = reenterMAL(cntxt, mb, i, i + 1, stk))) |
210 | break; |
211 | /* fetch the BAT properties and turn off this instruction */ |
212 | p->token = NOOPsymbol; |
213 | } |
214 | } |
215 | #ifdef DEBUG_MAL_SCHEDULER |
216 | fprintf(stderr, "scheduler.sqlbind results\n" ); |
217 | fprintFunction(stderr, mb, stk, LIST_MAL_ALL); |
218 | #endif |
219 | return msg; |
220 | } |
221 | #endif |
222 | |
223 | |