1/*-------------------------------------------------------------------------
2 *
3 * statscmds.c
4 * Commands for creating and altering extended statistics objects
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/commands/statscmds.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/relation.h"
18#include "access/relscan.h"
19#include "access/table.h"
20#include "catalog/catalog.h"
21#include "catalog/dependency.h"
22#include "catalog/indexing.h"
23#include "catalog/namespace.h"
24#include "catalog/pg_namespace.h"
25#include "catalog/pg_statistic_ext.h"
26#include "catalog/pg_statistic_ext_data.h"
27#include "commands/comment.h"
28#include "commands/defrem.h"
29#include "miscadmin.h"
30#include "statistics/statistics.h"
31#include "utils/builtins.h"
32#include "utils/inval.h"
33#include "utils/memutils.h"
34#include "utils/rel.h"
35#include "utils/syscache.h"
36#include "utils/typcache.h"
37
38
39static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
40 const char *label, Oid namespaceid);
41static char *ChooseExtendedStatisticNameAddition(List *exprs);
42
43
44/* qsort comparator for the attnums in CreateStatistics */
45static int
46compare_int16(const void *a, const void *b)
47{
48 int av = *(const int16 *) a;
49 int bv = *(const int16 *) b;
50
51 /* this can't overflow if int is wider than int16 */
52 return (av - bv);
53}
54
55/*
56 * CREATE STATISTICS
57 */
58ObjectAddress
59CreateStatistics(CreateStatsStmt *stmt)
60{
61 int16 attnums[STATS_MAX_DIMENSIONS];
62 int numcols = 0;
63 char *namestr;
64 NameData stxname;
65 Oid statoid;
66 Oid namespaceId;
67 Oid stxowner = GetUserId();
68 HeapTuple htup;
69 Datum values[Natts_pg_statistic_ext];
70 bool nulls[Natts_pg_statistic_ext];
71 Datum datavalues[Natts_pg_statistic_ext_data];
72 bool datanulls[Natts_pg_statistic_ext_data];
73 int2vector *stxkeys;
74 Relation statrel;
75 Relation datarel;
76 Relation rel = NULL;
77 Oid relid;
78 ObjectAddress parentobject,
79 myself;
80 Datum types[3]; /* one for each possible type of statistic */
81 int ntypes;
82 ArrayType *stxkind;
83 bool build_ndistinct;
84 bool build_dependencies;
85 bool build_mcv;
86 bool requested_type = false;
87 int i;
88 ListCell *cell;
89
90 Assert(IsA(stmt, CreateStatsStmt));
91
92 /*
93 * Examine the FROM clause. Currently, we only allow it to be a single
94 * simple table, but later we'll probably allow multiple tables and JOIN
95 * syntax. The grammar is already prepared for that, so we have to check
96 * here that what we got is what we can support.
97 */
98 if (list_length(stmt->relations) != 1)
99 ereport(ERROR,
100 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
101 errmsg("only a single relation is allowed in CREATE STATISTICS")));
102
103 foreach(cell, stmt->relations)
104 {
105 Node *rln = (Node *) lfirst(cell);
106
107 if (!IsA(rln, RangeVar))
108 ereport(ERROR,
109 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
110 errmsg("only a single relation is allowed in CREATE STATISTICS")));
111
112 /*
113 * CREATE STATISTICS will influence future execution plans but does
114 * not interfere with currently executing plans. So it should be
115 * enough to take only ShareUpdateExclusiveLock on relation,
116 * conflicting with ANALYZE and other DDL that sets statistical
117 * information, but not with normal queries.
118 */
119 rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
120
121 /* Restrict to allowed relation types */
122 if (rel->rd_rel->relkind != RELKIND_RELATION &&
123 rel->rd_rel->relkind != RELKIND_MATVIEW &&
124 rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
125 rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
126 ereport(ERROR,
127 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
128 errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
129 RelationGetRelationName(rel))));
130
131 /* You must own the relation to create stats on it */
132 if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
133 aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
134 RelationGetRelationName(rel));
135 }
136
137 Assert(rel);
138 relid = RelationGetRelid(rel);
139
140 /*
141 * If the node has a name, split it up and determine creation namespace.
142 * If not (a possibility not considered by the grammar, but one which can
143 * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
144 * object in the same namespace as the relation, and cons up a name for
145 * it.
146 */
147 if (stmt->defnames)
148 namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
149 &namestr);
150 else
151 {
152 namespaceId = RelationGetNamespace(rel);
153 namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
154 ChooseExtendedStatisticNameAddition(stmt->exprs),
155 "stat",
156 namespaceId);
157 }
158 namestrcpy(&stxname, namestr);
159
160 /*
161 * Deal with the possibility that the statistics object already exists.
162 */
163 if (SearchSysCacheExists2(STATEXTNAMENSP,
164 CStringGetDatum(namestr),
165 ObjectIdGetDatum(namespaceId)))
166 {
167 if (stmt->if_not_exists)
168 {
169 ereport(NOTICE,
170 (errcode(ERRCODE_DUPLICATE_OBJECT),
171 errmsg("statistics object \"%s\" already exists, skipping",
172 namestr)));
173 relation_close(rel, NoLock);
174 return InvalidObjectAddress;
175 }
176
177 ereport(ERROR,
178 (errcode(ERRCODE_DUPLICATE_OBJECT),
179 errmsg("statistics object \"%s\" already exists", namestr)));
180 }
181
182 /*
183 * Currently, we only allow simple column references in the expression
184 * list. That will change someday, and again the grammar already supports
185 * it so we have to enforce restrictions here. For now, we can convert
186 * the expression list to a simple array of attnums. While at it, enforce
187 * some constraints.
188 */
189 foreach(cell, stmt->exprs)
190 {
191 Node *expr = (Node *) lfirst(cell);
192 ColumnRef *cref;
193 char *attname;
194 HeapTuple atttuple;
195 Form_pg_attribute attForm;
196 TypeCacheEntry *type;
197
198 if (!IsA(expr, ColumnRef))
199 ereport(ERROR,
200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201 errmsg("only simple column references are allowed in CREATE STATISTICS")));
202 cref = (ColumnRef *) expr;
203
204 if (list_length(cref->fields) != 1)
205 ereport(ERROR,
206 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
207 errmsg("only simple column references are allowed in CREATE STATISTICS")));
208 attname = strVal((Value *) linitial(cref->fields));
209
210 atttuple = SearchSysCacheAttName(relid, attname);
211 if (!HeapTupleIsValid(atttuple))
212 ereport(ERROR,
213 (errcode(ERRCODE_UNDEFINED_COLUMN),
214 errmsg("column \"%s\" does not exist",
215 attname)));
216 attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
217
218 /* Disallow use of system attributes in extended stats */
219 if (attForm->attnum <= 0)
220 ereport(ERROR,
221 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
222 errmsg("statistics creation on system columns is not supported")));
223
224 /* Disallow data types without a less-than operator */
225 type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
226 if (type->lt_opr == InvalidOid)
227 ereport(ERROR,
228 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
229 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
230 attname, format_type_be(attForm->atttypid))));
231
232 /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
233 if (numcols >= STATS_MAX_DIMENSIONS)
234 ereport(ERROR,
235 (errcode(ERRCODE_TOO_MANY_COLUMNS),
236 errmsg("cannot have more than %d columns in statistics",
237 STATS_MAX_DIMENSIONS)));
238
239 attnums[numcols] = attForm->attnum;
240 numcols++;
241 ReleaseSysCache(atttuple);
242 }
243
244 /*
245 * Check that at least two columns were specified in the statement. The
246 * upper bound was already checked in the loop above.
247 */
248 if (numcols < 2)
249 ereport(ERROR,
250 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
251 errmsg("extended statistics require at least 2 columns")));
252
253 /*
254 * Sort the attnums, which makes detecting duplicates somewhat easier, and
255 * it does not hurt (it does not affect the efficiency, unlike for
256 * indexes, for example).
257 */
258 qsort(attnums, numcols, sizeof(int16), compare_int16);
259
260 /*
261 * Check for duplicates in the list of columns. The attnums are sorted so
262 * just check consecutive elements.
263 */
264 for (i = 1; i < numcols; i++)
265 {
266 if (attnums[i] == attnums[i - 1])
267 ereport(ERROR,
268 (errcode(ERRCODE_DUPLICATE_COLUMN),
269 errmsg("duplicate column name in statistics definition")));
270 }
271
272 /* Form an int2vector representation of the sorted column list */
273 stxkeys = buildint2vector(attnums, numcols);
274
275 /*
276 * Parse the statistics kinds.
277 */
278 build_ndistinct = false;
279 build_dependencies = false;
280 build_mcv = false;
281 foreach(cell, stmt->stat_types)
282 {
283 char *type = strVal((Value *) lfirst(cell));
284
285 if (strcmp(type, "ndistinct") == 0)
286 {
287 build_ndistinct = true;
288 requested_type = true;
289 }
290 else if (strcmp(type, "dependencies") == 0)
291 {
292 build_dependencies = true;
293 requested_type = true;
294 }
295 else if (strcmp(type, "mcv") == 0)
296 {
297 build_mcv = true;
298 requested_type = true;
299 }
300 else
301 ereport(ERROR,
302 (errcode(ERRCODE_SYNTAX_ERROR),
303 errmsg("unrecognized statistics kind \"%s\"",
304 type)));
305 }
306 /* If no statistic type was specified, build them all. */
307 if (!requested_type)
308 {
309 build_ndistinct = true;
310 build_dependencies = true;
311 build_mcv = true;
312 }
313
314 /* construct the char array of enabled statistic types */
315 ntypes = 0;
316 if (build_ndistinct)
317 types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
318 if (build_dependencies)
319 types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
320 if (build_mcv)
321 types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
322 Assert(ntypes > 0 && ntypes <= lengthof(types));
323 stxkind = construct_array(types, ntypes, CHAROID, 1, true, 'c');
324
325 statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
326
327 /*
328 * Everything seems fine, so let's build the pg_statistic_ext tuple.
329 */
330 memset(values, 0, sizeof(values));
331 memset(nulls, false, sizeof(nulls));
332
333 statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
334 Anum_pg_statistic_ext_oid);
335 values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid);
336 values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
337 values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
338 values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
339 values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
340 values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
341 values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
342
343 /* insert it into pg_statistic_ext */
344 htup = heap_form_tuple(statrel->rd_att, values, nulls);
345 CatalogTupleInsert(statrel, htup);
346 heap_freetuple(htup);
347
348 relation_close(statrel, RowExclusiveLock);
349
350 /*
351 * Also build the pg_statistic_ext_data tuple, to hold the actual
352 * statistics data.
353 */
354 datarel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
355
356 memset(datavalues, 0, sizeof(datavalues));
357 memset(datanulls, false, sizeof(datanulls));
358
359 datavalues[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statoid);
360
361 /* no statistics built yet */
362 datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
363 datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
364 datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
365
366 /* insert it into pg_statistic_ext_data */
367 htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
368 CatalogTupleInsert(datarel, htup);
369 heap_freetuple(htup);
370
371 relation_close(datarel, RowExclusiveLock);
372
373 /*
374 * Invalidate relcache so that others see the new statistics object.
375 */
376 CacheInvalidateRelcache(rel);
377
378 relation_close(rel, NoLock);
379
380 /*
381 * Add an AUTO dependency on each column used in the stats, so that the
382 * stats object goes away if any or all of them get dropped.
383 */
384 ObjectAddressSet(myself, StatisticExtRelationId, statoid);
385
386 for (i = 0; i < numcols; i++)
387 {
388 ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
389 recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
390 }
391
392 /*
393 * Also add dependencies on namespace and owner. These are required
394 * because the stats object might have a different namespace and/or owner
395 * than the underlying table(s).
396 */
397 ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
398 recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
399
400 recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
401
402 /*
403 * XXX probably there should be a recordDependencyOnCurrentExtension call
404 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
405 * STATISTICS, which is more work than it seems worth.
406 */
407
408 /* Add any requested comment */
409 if (stmt->stxcomment != NULL)
410 CreateComments(statoid, StatisticExtRelationId, 0,
411 stmt->stxcomment);
412
413 /* Return stats object's address */
414 return myself;
415}
416
417/*
418 * Guts of statistics object deletion.
419 */
420void
421RemoveStatisticsById(Oid statsOid)
422{
423 Relation relation;
424 HeapTuple tup;
425 Form_pg_statistic_ext statext;
426 Oid relid;
427
428 /*
429 * First delete the pg_statistic_ext_data tuple holding the actual
430 * statistical data.
431 */
432 relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
433
434 tup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
435
436 if (!HeapTupleIsValid(tup)) /* should not happen */
437 elog(ERROR, "cache lookup failed for statistics data %u", statsOid);
438
439 CatalogTupleDelete(relation, &tup->t_self);
440
441 ReleaseSysCache(tup);
442
443 table_close(relation, RowExclusiveLock);
444
445 /*
446 * Delete the pg_statistic_ext tuple. Also send out a cache inval on the
447 * associated table, so that dependent plans will be rebuilt.
448 */
449 relation = table_open(StatisticExtRelationId, RowExclusiveLock);
450
451 tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
452
453 if (!HeapTupleIsValid(tup)) /* should not happen */
454 elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
455
456 statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
457 relid = statext->stxrelid;
458
459 CacheInvalidateRelcacheByRelid(relid);
460
461 CatalogTupleDelete(relation, &tup->t_self);
462
463 ReleaseSysCache(tup);
464
465 table_close(relation, RowExclusiveLock);
466}
467
468/*
469 * Update a statistics object for ALTER COLUMN TYPE on a source column.
470 *
471 * This could throw an error if the type change can't be supported.
472 * If it can be supported, but the stats must be recomputed, a likely choice
473 * would be to set the relevant column(s) of the pg_statistic_ext_data tuple
474 * to null until the next ANALYZE. (Note that the type change hasn't actually
475 * happened yet, so one option that's *not* on the table is to recompute
476 * immediately.)
477 *
478 * For both ndistinct and functional-dependencies stats, the on-disk
479 * representation is independent of the source column data types, and it is
480 * plausible to assume that the old statistic values will still be good for
481 * the new column contents. (Obviously, if the ALTER COLUMN TYPE has a USING
482 * expression that substantially alters the semantic meaning of the column
483 * values, this assumption could fail. But that seems like a corner case
484 * that doesn't justify zapping the stats in common cases.)
485 *
486 * For MCV lists that's not the case, as those statistics store the datums
487 * internally. In this case we simply reset the statistics value to NULL.
488 *
489 * Note that "type change" includes collation change, which means we can rely
490 * on the MCV list being consistent with the collation info in pg_attribute
491 * during estimation.
492 */
493void
494UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
495 Oid oldColumnType, Oid newColumnType)
496{
497 HeapTuple stup,
498 oldtup;
499
500 Relation rel;
501
502 Datum values[Natts_pg_statistic_ext_data];
503 bool nulls[Natts_pg_statistic_ext_data];
504 bool replaces[Natts_pg_statistic_ext_data];
505
506 oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
507 if (!HeapTupleIsValid(oldtup))
508 elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
509
510 /*
511 * When none of the defined statistics types contain datum values from the
512 * table's columns then there's no need to reset the stats. Functional
513 * dependencies and ndistinct stats should still hold true.
514 */
515 if (!statext_is_kind_built(oldtup, STATS_EXT_MCV))
516 {
517 ReleaseSysCache(oldtup);
518 return;
519 }
520
521 /*
522 * OK, we need to reset some statistics. So let's build the new tuple,
523 * replacing the affected statistics types with NULL.
524 */
525 memset(nulls, 0, Natts_pg_statistic_ext_data * sizeof(bool));
526 memset(replaces, 0, Natts_pg_statistic_ext_data * sizeof(bool));
527 memset(values, 0, Natts_pg_statistic_ext_data * sizeof(Datum));
528
529 replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
530 nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
531
532 rel = heap_open(StatisticExtDataRelationId, RowExclusiveLock);
533
534 /* replace the old tuple */
535 stup = heap_modify_tuple(oldtup,
536 RelationGetDescr(rel),
537 values,
538 nulls,
539 replaces);
540
541 ReleaseSysCache(oldtup);
542 CatalogTupleUpdate(rel, &stup->t_self, stup);
543
544 heap_freetuple(stup);
545
546 heap_close(rel, RowExclusiveLock);
547}
548
549/*
550 * Select a nonconflicting name for a new statistics.
551 *
552 * name1, name2, and label are used the same way as for makeObjectName(),
553 * except that the label can't be NULL; digits will be appended to the label
554 * if needed to create a name that is unique within the specified namespace.
555 *
556 * Returns a palloc'd string.
557 *
558 * Note: it is theoretically possible to get a collision anyway, if someone
559 * else chooses the same name concurrently. This is fairly unlikely to be
560 * a problem in practice, especially if one is holding a share update
561 * exclusive lock on the relation identified by name1. However, if choosing
562 * multiple names within a single command, you'd better create the new object
563 * and do CommandCounterIncrement before choosing the next one!
564 */
565static char *
566ChooseExtendedStatisticName(const char *name1, const char *name2,
567 const char *label, Oid namespaceid)
568{
569 int pass = 0;
570 char *stxname = NULL;
571 char modlabel[NAMEDATALEN];
572
573 /* try the unmodified label first */
574 StrNCpy(modlabel, label, sizeof(modlabel));
575
576 for (;;)
577 {
578 Oid existingstats;
579
580 stxname = makeObjectName(name1, name2, modlabel);
581
582 existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
583 PointerGetDatum(stxname),
584 ObjectIdGetDatum(namespaceid));
585 if (!OidIsValid(existingstats))
586 break;
587
588 /* found a conflict, so try a new name component */
589 pfree(stxname);
590 snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
591 }
592
593 return stxname;
594}
595
596/*
597 * Generate "name2" for a new statistics given the list of column names for it
598 * This will be passed to ChooseExtendedStatisticName along with the parent
599 * table name and a suitable label.
600 *
601 * We know that less than NAMEDATALEN characters will actually be used,
602 * so we can truncate the result once we've generated that many.
603 *
604 * XXX see also ChooseForeignKeyConstraintNameAddition and
605 * ChooseIndexNameAddition.
606 */
607static char *
608ChooseExtendedStatisticNameAddition(List *exprs)
609{
610 char buf[NAMEDATALEN * 2];
611 int buflen = 0;
612 ListCell *lc;
613
614 buf[0] = '\0';
615 foreach(lc, exprs)
616 {
617 ColumnRef *cref = (ColumnRef *) lfirst(lc);
618 const char *name;
619
620 /* It should be one of these, but just skip if it happens not to be */
621 if (!IsA(cref, ColumnRef))
622 continue;
623
624 name = strVal((Value *) linitial(cref->fields));
625
626 if (buflen > 0)
627 buf[buflen++] = '_'; /* insert _ between names */
628
629 /*
630 * At this point we have buflen <= NAMEDATALEN. name should be less
631 * than NAMEDATALEN already, but use strlcpy for paranoia.
632 */
633 strlcpy(buf + buflen, name, NAMEDATALEN);
634 buflen += strlen(buf + buflen);
635 if (buflen >= NAMEDATALEN)
636 break;
637 }
638 return pstrdup(buf);
639}
640