1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/* (c) M.L. Kersten
10Most optimizers need easy access to key information
11for proper plan generation. Amongst others, this
12information consists of the tuple count, size,
13min- and max-value, and the null-density.
14They are kept around as persistent tables, modeled
15directly as a collection of BATs.
16
17We made need an directly accessible structure to speedup
18analysis by optimizers.
19*/
20#include "monetdb_config.h"
21#include "sql_statistics.h"
22#include "sql_execute.h"
23
24str
25sql_drop_statistics(mvc *m, sql_table *t)
26{
27 node *ncol;
28 sql_trans *tr;
29 sql_schema *sys;
30 sql_table *sysstats;
31 sql_column *statsid;
32 oid rid;
33
34 tr = m->session->tr;
35 sys = mvc_bind_schema(m, "sys");
36 if (sys == NULL)
37 throw(SQL, "sql_drop_statistics", SQLSTATE(3F000) "Internal error");
38 sysstats = mvc_bind_table(m, sys, "statistics");
39 if (sysstats == NULL)
40 throw(SQL, "sql_drop_statistics", SQLSTATE(3F000) "No table sys.statistics");
41 statsid = mvc_bind_column(m, sysstats, "column_id");
42 if (statsid == NULL)
43 throw(SQL, "sql_drop_statistics", SQLSTATE(3F000) "No table sys.statistics");
44
45 if (isTable(t) && t->columns.set) {
46 for (ncol = (t)->columns.set->h; ncol; ncol = ncol->next) {
47 sql_column *c = ncol->data;
48
49 rid = table_funcs.column_find_row(tr, statsid, &c->base.id, NULL);
50 if (!is_oid_nil(rid) &&
51 table_funcs.table_delete(tr, sysstats, rid) != LOG_OK)
52 throw(SQL, "analyze", "delete failed");
53 }
54 }
55 return MAL_SUCCEED;
56}
57
58str
59sql_analyze(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
60{
61 mvc *m = NULL;
62 str msg = getSQLContext(cntxt, mb, &m, NULL);
63 sql_trans *tr = m->session->tr;
64 node *nsch, *ntab, *ncol;
65 char *maxval = NULL, *minval = NULL;
66 size_t minlen = 0, maxlen = 0;
67 str sch = 0, tbl = 0, col = 0;
68 bit sorted, revsorted; /* not bool since address is taken */
69 lng nils = 0;
70 lng uniq = 0;
71 lng samplesize = *getArgReference_lng(stk, pci, 2);
72 int argc = pci->argc;
73 int width = 0;
74 int minmax = *getArgReference_int(stk, pci, 1);
75 int sfnd = 0, tfnd = 0, cfnd = 0;
76 sql_schema *sys;
77 sql_table *sysstats;
78 sql_column *statsid;
79 oid rid;
80 timestamp ts;
81
82 if (msg != MAL_SUCCEED || (msg = checkSQLContext(cntxt)) != NULL)
83 return msg;
84
85 sys = mvc_bind_schema(m, "sys");
86 if (sys == NULL)
87 throw(SQL, "sql.analyze", SQLSTATE(3F000) "Internal error");
88 sysstats = mvc_bind_table(m, sys, "statistics");
89 if (sysstats == NULL)
90 throw(SQL, "sql.analyze", SQLSTATE(3F000) "No table sys.statistics");
91 statsid = mvc_bind_column(m, sysstats, "column_id");
92 if (statsid == NULL)
93 throw(SQL, "sql.analyze", SQLSTATE(3F000) "No table sys.statistics");
94
95 switch (argc) {
96 case 6:
97 col = *getArgReference_str(stk, pci, 5);
98 /* fall through */
99 case 5:
100 tbl = *getArgReference_str(stk, pci, 4);
101 /* fall through */
102 case 4:
103 sch = *getArgReference_str(stk, pci, 3);
104 }
105#ifdef DEBUG_SQL_STATISTICS
106 fprintf(stderr, "analyze %s.%s.%s sample " LLFMT "%s\n", (sch ? sch : ""), (tbl ? tbl : " "), (col ? col : " "), samplesize, (minmax)?"MinMax":"");
107#endif
108 for (nsch = tr->schemas.set->h; nsch; nsch = nsch->next) {
109 sql_base *b = nsch->data;
110 sql_schema *s = (sql_schema *) nsch->data;
111 if (!isalpha((unsigned char) b->name[0]))
112 continue;
113
114 if (sch && strcmp(sch, b->name))
115 continue;
116 sfnd = 1;
117 if (s->tables.set)
118 for (ntab = (s)->tables.set->h; ntab; ntab = ntab->next) {
119 sql_base *bt = ntab->data;
120 sql_table *t = (sql_table *) bt;
121
122 if (tbl && strcmp(bt->name, tbl))
123 continue;
124 if (t->persistence != SQL_PERSIST) {
125 GDKfree(maxval);
126 GDKfree(minval);
127 throw(SQL, "analyze", SQLSTATE(42S02) "Table '%s' is not persistent", bt->name);
128 }
129 tfnd = 1;
130 if (isTable(t) && t->columns.set)
131 for (ncol = (t)->columns.set->h; ncol; ncol = ncol->next) {
132 sql_base *bc = ncol->data;
133 sql_column *c = (sql_column *) ncol->data;
134 BAT *bn, *br;
135 BAT *bsample;
136 lng sz;
137 ssize_t (*tostr)(str*,size_t*,const void*,bool);
138 void *val=0;
139
140 if (col && strcmp(bc->name, col))
141 continue;
142
143 /* remove cached value */
144 if (c->min)
145 c->min = NULL;
146 if (c->max)
147 c->max = NULL;
148
149 if ((bn = store_funcs.bind_col(tr, c, RDONLY)) == NULL) {
150 /* XXX throw error instead? */
151 continue;
152 }
153 sz = BATcount(bn);
154 tostr = BATatoms[bn->ttype].atomToStr;
155
156 rid = table_funcs.column_find_row(tr, statsid, &c->base.id, NULL);
157 cfnd = 1;
158 if (samplesize > 0) {
159 bsample = BATsample(bn, (BUN) samplesize);
160 } else
161 bsample = NULL;
162 br = BATselect(bn, bsample, ATOMnilptr(bn->ttype), NULL, true, false, false);
163 if (br == NULL) {
164 BBPunfix(bn->batCacheid);
165 /* XXX throw error instead? */
166 continue;
167 }
168 nils = BATcount(br);
169 BBPunfix(br->batCacheid);
170 if (bn->tkey)
171 uniq = sz;
172 else if (!minmax) {
173 BAT *en;
174 if (bsample)
175 br = BATproject(bsample, bn);
176 else
177 br = bn;
178 if (br && (en = BATunique(br, NULL)) != NULL) {
179 uniq = BATcount(en);
180 BBPunfix(en->batCacheid);
181 } else
182 uniq = 0;
183 if (bsample && br)
184 BBPunfix(br->batCacheid);
185 }
186 if (bsample)
187 BBPunfix(bsample->batCacheid);
188 /* use BATordered(_rev)
189 * and not
190 * BATt(rev)ordered
191 * because we want to
192 * know for sure */
193 sorted = BATordered(bn);
194 revsorted = BATordered_rev(bn);
195
196 // Gather the min/max value for builtin types
197 width = bn->twidth;
198
199 if (maxlen < 4) {
200 GDKfree(maxval);
201 maxval = GDKmalloc(4);
202 if (maxval == NULL) {
203 GDKfree(minval);
204 throw(SQL, "analyze", SQLSTATE(HY001) MAL_MALLOC_FAIL);
205 }
206 maxlen = 4;
207 }
208 if (minlen < 4) {
209 GDKfree(minval);
210 minval = GDKmalloc(4);
211 if (minval == NULL){
212 GDKfree(maxval);
213 throw(SQL, "analyze", SQLSTATE(HY001) MAL_MALLOC_FAIL);
214 }
215 minlen = 4;
216 }
217 if (tostr) {
218 if ((val = BATmax(bn, NULL)) == NULL)
219 strcpy(maxval, str_nil);
220 else {
221 if (tostr(&maxval, &maxlen, val, false) < 0) {
222 GDKfree(val);
223 GDKfree(minval);
224 GDKfree(maxval);
225 throw(SQL, "analyze", GDK_EXCEPTION);
226 }
227 GDKfree(val);
228 }
229 if ((val = BATmin(bn, NULL)) == NULL)
230 strcpy(minval, str_nil);
231 else {
232 if (tostr(&minval, &minlen, val, false) < 0) {
233 GDKfree(val);
234 GDKfree(minval);
235 GDKfree(maxval);
236 throw(SQL, "analyze", GDK_EXCEPTION);
237 }
238 GDKfree(val);
239 }
240 } else {
241 strcpy(maxval, str_nil);
242 strcpy(minval, str_nil);
243 }
244 BBPunfix(bn->batCacheid);
245 ts = timestamp_current();
246 if (!is_oid_nil(rid) && table_funcs.table_delete(tr, sysstats, rid) != LOG_OK) {
247 GDKfree(maxval);
248 GDKfree(minval);
249 throw(SQL, "analyze", "delete failed");
250 }
251 if (table_funcs.table_insert(tr, sysstats, &c->base.id, c->type.type->sqlname, &width, &ts, samplesize ? &samplesize : &sz, &sz, &uniq, &nils, minval, maxval, &sorted, &revsorted) != LOG_OK) {
252 GDKfree(maxval);
253 GDKfree(minval);
254 throw(SQL, "analyze", "insert failed");
255 }
256 }
257 }
258 }
259 GDKfree(maxval);
260 GDKfree(minval);
261 if (sch && !sfnd)
262 throw(SQL, "analyze", SQLSTATE(3F000) "Schema '%s' does not exist", sch);
263 if (tbl && !tfnd)
264 throw(SQL, "analyze", SQLSTATE(42S02) "Table '%s' does not exist", tbl);
265 if (col && !cfnd)
266 throw(SQL, "analyze", SQLSTATE(38000) "Column '%s' does not exist", col);
267 return MAL_SUCCEED;
268}
269