1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "opt_costModel.h" |
11 | |
12 | /* |
13 | * The cost formula are repetative |
14 | */ |
15 | #define newRows(W,X,Y,Z) {\ |
16 | c1 = getRowCnt(mb, getArg(p,W));\ |
17 | c2 = getRowCnt(mb, getArg(p,X));\ |
18 | /* just to ensure that rowcnt was/is never set to -1 */\ |
19 | assert(c1 != (BUN) -1);\ |
20 | assert(c2 != (BUN) -1);\ |
21 | if (c1 == BUN_NONE || c2 == BUN_NONE) \ |
22 | continue;\ |
23 | setRowCnt(mb, getArg(p,Z), (Y));\ |
24 | } |
25 | /* |
26 | * The cost will be used in many places to make decisions. |
27 | * Access should be fast. |
28 | * The SQL front-end also makes the BAT index available as the |
29 | * property bid. This can be used to access the BAT and involve |
30 | * more properties into the decision procedure. |
31 | * [to be done] |
32 | * Also make sure you don't re-use variables, because then the |
33 | * row count becomes non-deterministic. |
34 | */ |
35 | str |
36 | OPTcostModelImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
37 | { |
38 | int i; |
39 | BUN c1, c2; |
40 | InstrPtr p; |
41 | char buf[256]; |
42 | lng usec = GDKusec(); |
43 | |
44 | (void) cntxt; |
45 | (void) stk; |
46 | (void) pci; |
47 | |
48 | if( OPTdebug & OPTcostmodel){ |
49 | fprintf(stderr, "#COSTMODEL optimizer exit\n" ); |
50 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
51 | } |
52 | if ( mb->inlineProp ) |
53 | return MAL_SUCCEED; |
54 | |
55 | for (i = 0; i < mb->stop; i++) { |
56 | p = getInstrPtr(mb, i); |
57 | if (getModuleId(p)==algebraRef) { |
58 | if (getFunctionId(p) == selectRef || |
59 | getFunctionId(p) == thetaselectRef) { |
60 | newRows(1,2, (c1 > 2 ? c2 / 2 +1: c1/2+1),0); |
61 | } else if ( |
62 | getFunctionId(p) == selectNotNilRef || |
63 | getFunctionId(p) == sortRef || |
64 | getFunctionId(p) == projectRef ){ |
65 | newRows(1,1,c1,0); |
66 | } else if (getFunctionId(p) == joinRef || |
67 | getFunctionId(p) == projectionRef || |
68 | getFunctionId(p) == bandjoinRef || |
69 | getFunctionId(p) == projectionpathRef ) { |
70 | /* assume 1-1 joins */ |
71 | newRows(1,2,(c1 < c2 ? c1 : c2),0); |
72 | } else |
73 | if (getFunctionId(p) == crossRef) { |
74 | newRows(1,2,((log((double) c1) + log((double) c2) > log(INT_MAX) ? INT_MAX : c1 * c2 +1)),0); |
75 | /* log sets errno if it cannot compute the log. This will then screw with code that checks errno */ |
76 | if (errno == ERANGE || errno == EDOM) { |
77 | errno = 0; |
78 | } |
79 | } |
80 | } else if (getModuleId(p) == batcalcRef) { |
81 | if( getFunctionId(p) == ifthenelseRef) { |
82 | if( isaBatType(getArgType(mb,p,2) ) ) { |
83 | newRows(2,2, c1,0); |
84 | } else { |
85 | newRows(3,3, c1,0); |
86 | } |
87 | } else if( isaBatType(getArgType(mb,p,1)) ){ |
88 | newRows(1,1, c1,0); |
89 | } else { |
90 | newRows(2, 2, c2,0); |
91 | } |
92 | } else if (getModuleId(p) == batstrRef) { |
93 | newRows(1,1, c1,0); |
94 | } else if (getModuleId(p) == batRef) { |
95 | if (getFunctionId(p) == appendRef ){ |
96 | /* |
97 | * Updates are a little more complicated, because you have to |
98 | * propagate changes in the expected size up the expression tree. |
99 | * For example, the SQL snippet: |
100 | * _49:bat[:oid,:oid]{rows=0,bid=622} := sql.bind_dbat("sys","example",3); |
101 | * _54 := bat.setWriteMode(_49); |
102 | * bat.append(_54,_47,true); |
103 | * shows what is produced when it encounters a deletion. If a non-empty |
104 | * append is not properly passed back to _49, the emptySet |
105 | * optimizer might remove the complete deletion code. |
106 | * The same holds for replacement operations, which add information to |
107 | * an initially empty insertion BAT. |
108 | */ |
109 | if( isaBatType(getArgType(mb,p,2)) ){ |
110 | /* insert BAT */ |
111 | newRows(1,2, (c1 + c2+1),1); |
112 | } else { |
113 | /* insert scalars */ |
114 | newRows(1,1, (c1 +1),1); |
115 | } |
116 | } else if (getFunctionId(p) == deleteRef){ |
117 | if( isaBatType(getArgType(mb,p,2)) ){ |
118 | /* delete BAT */ |
119 | newRows(1, 2, (c2 >= c1 ? 1 : c1 - c2), 1); |
120 | } else { |
121 | /* insert scalars */ |
122 | newRows(1, 1, (c1 <= 1 ? 1 : c1 - 1), 1); |
123 | } |
124 | } |
125 | } else if (getModuleId(p)==groupRef) { |
126 | if (getFunctionId(p) ==subgroupRef || getFunctionId(p) ==groupRef ) { |
127 | newRows(1,1,( c1 / 10+1),0); |
128 | } else { |
129 | newRows(1,1, c1,0); |
130 | } |
131 | } else if (getModuleId(p)== aggrRef) { |
132 | if (getFunctionId(p) == sumRef || |
133 | getFunctionId(p) == minRef || |
134 | getFunctionId(p) == maxRef || |
135 | getFunctionId(p) == avgRef) { |
136 | newRows(1, 1, (c1 != 0 ? c1 : 1), 0); |
137 | } else if (getFunctionId(p) == countRef){ |
138 | newRows(1,1, 1,0); |
139 | } |
140 | } else if( p->token == ASSIGNsymbol && p->argc== 2){ |
141 | /* copy the rows property */ |
142 | c1 = getRowCnt(mb, getArg(p,1)); |
143 | /* just to ensure that rowcnt was/is never set to -1 */ |
144 | assert(c1 != (BUN) -1); |
145 | if (c1 != BUN_NONE) |
146 | setRowCnt(mb, getArg(p,0), c1); |
147 | } |
148 | } |
149 | /* Defense line against incorrect plans */ |
150 | /* plan remains unaffected */ |
151 | //chkTypes(cntxt->usermodule, mb, FALSE); |
152 | //chkFlow(mb); |
153 | //chkDeclarations(mb); |
154 | /* keep all actions taken as a post block comment */ |
155 | usec = GDKusec()- usec; |
156 | snprintf(buf,256,"%-20s actions= 1 time=" LLFMT " usec" ,"costmodel" ,usec); |
157 | newComment(mb,buf); |
158 | addtoMalBlkHistory(mb); |
159 | |
160 | if( OPTdebug & OPTcostmodel){ |
161 | fprintf(stderr, "#COSTMODEL optimizer exit\n" ); |
162 | fprintFunction(stderr, mb, 0, LIST_MAL_ALL); |
163 | } |
164 | return MAL_SUCCEED; |
165 | } |
166 | |