statscmds.c source code [PostgreSQL/src/backend/commands/statscmds.c]

1	/-------------------------------------------------------------------------*
2	*
3	* statscmds.c
4	* Commands for creating and altering extended statistics objects
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/commands/statscmds.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include "access/relation.h"
18	#include "access/relscan.h"
19	#include "access/table.h"
20	#include "catalog/catalog.h"
21	#include "catalog/dependency.h"
22	#include "catalog/indexing.h"
23	#include "catalog/namespace.h"
24	#include "catalog/pg_namespace.h"
25	#include "catalog/pg_statistic_ext.h"
26	#include "catalog/pg_statistic_ext_data.h"
27	#include "commands/comment.h"
28	#include "commands/defrem.h"
29	#include "miscadmin.h"
30	#include "statistics/statistics.h"
31	#include "utils/builtins.h"
32	#include "utils/inval.h"
33	#include "utils/memutils.h"
34	#include "utils/rel.h"
35	#include "utils/syscache.h"
36	#include "utils/typcache.h"
37
38
39	static char ChooseExtendedStatisticName(const* char name1, const* char *name2,
40	const char *label, Oid namespaceid);
41	static char ChooseExtendedStatisticNameAddition(List exprs);
42
43
44	/ qsort comparator for the attnums in CreateStatistics /
45	static int
46	compare_int16(const void a, const* void *b)
47	{
48	int av = (const* int16 *) a;
49	int bv = (const* int16 *) b;
50
51	/ this can't overflow if int is wider than int16 /
52	return (av - bv);
53	}
54
55	/*
56	* CREATE STATISTICS
57	*/
58	ObjectAddress
59	CreateStatistics(CreateStatsStmt *stmt)
60	{
61	int16 attnums[STATS_MAX_DIMENSIONS];
62	int numcols = `0`;
63	char *namestr;
64	NameData stxname;
65	Oid statoid;
66	Oid namespaceId;
67	Oid stxowner = GetUserId();
68	HeapTuple htup;
69	Datum values[Natts_pg_statistic_ext];
70	bool nulls[Natts_pg_statistic_ext];
71	Datum datavalues[Natts_pg_statistic_ext_data];
72	bool datanulls[Natts_pg_statistic_ext_data];
73	int2vector *stxkeys;
74	Relation statrel;
75	Relation datarel;
76	Relation rel = NULL;
77	Oid relid;
78	ObjectAddress parentobject,
79	myself;
80	Datum types[`3`]; / one for each possible type of statistic /
81	int ntypes;
82	ArrayType *stxkind;
83	bool build_ndistinct;
84	bool build_dependencies;
85	bool build_mcv;
86	bool requested_type = false;
87	int i;
88	ListCell *cell;
89
90	Assert(IsA(stmt, CreateStatsStmt));
91
92	/*
93	* Examine the FROM clause. Currently, we only allow it to be a single
94	* simple table, but later we'll probably allow multiple tables and JOIN
95	* syntax. The grammar is already prepared for that, so we have to check
96	* here that what we got is what we can support.
97	*/
98	if (list_length(stmt->relations) != `1`)
99	ereport(ERROR,
100	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
101	errmsg("only a single relation is allowed in CREATE STATISTICS")));
102
103	foreach(cell, stmt->relations)
104	{
105	Node rln = (Node ) lfirst(cell);
106
107	if (!IsA(rln, RangeVar))
108	ereport(ERROR,
109	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
110	errmsg("only a single relation is allowed in CREATE STATISTICS")));
111
112	/*
113	* CREATE STATISTICS will influence future execution plans but does
114	* not interfere with currently executing plans. So it should be
115	* enough to take only ShareUpdateExclusiveLock on relation,
116	* conflicting with ANALYZE and other DDL that sets statistical
117	* information, but not with normal queries.
118	*/
119	rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
120
121	/ Restrict to allowed relation types /
122	if (rel->rd_rel->relkind != RELKIND_RELATION &&
123	rel->rd_rel->relkind != RELKIND_MATVIEW &&
124	rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
125	rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
126	ereport(ERROR,
127	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
128	errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
129	RelationGetRelationName(rel))));
130
131	/ You must own the relation to create stats on it /
132	if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
133	aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
134	RelationGetRelationName(rel));
135	}
136
137	Assert(rel);
138	relid = RelationGetRelid(rel);
139
140	/*
141	* If the node has a name, split it up and determine creation namespace.
142	* If not (a possibility not considered by the grammar, but one which can
143	* occur via the "CREATE TABLE ... (LIKE)" command), then we put the
144	* object in the same namespace as the relation, and cons up a name for
145	* it.
146	*/
147	if (stmt->defnames)
148	namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
149	&namestr);
150	else
151	{
152	namespaceId = RelationGetNamespace(rel);
153	namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
154	ChooseExtendedStatisticNameAddition(stmt->exprs),
155	"stat",
156	namespaceId);
157	}
158	namestrcpy(&stxname, namestr);
159
160	/*
161	* Deal with the possibility that the statistics object already exists.
162	*/
163	if (SearchSysCacheExists2(STATEXTNAMENSP,
164	CStringGetDatum(namestr),
165	ObjectIdGetDatum(namespaceId)))
166	{
167	if (stmt->if_not_exists)
168	{
169	ereport(NOTICE,
170	(errcode(ERRCODE_DUPLICATE_OBJECT),
171	errmsg("statistics object \"%s\" already exists, skipping",
172	namestr)));
173	relation_close(rel, NoLock);
174	return InvalidObjectAddress;
175	}
176
177	ereport(ERROR,
178	(errcode(ERRCODE_DUPLICATE_OBJECT),
179	errmsg("statistics object \"%s\" already exists", namestr)));
180	}
181
182	/*
183	* Currently, we only allow simple column references in the expression
184	* list. That will change someday, and again the grammar already supports
185	* it so we have to enforce restrictions here. For now, we can convert
186	* the expression list to a simple array of attnums. While at it, enforce
187	* some constraints.
188	*/
189	foreach(cell, stmt->exprs)
190	{
191	Node expr = (Node ) lfirst(cell);
192	ColumnRef *cref;
193	char *attname;
194	HeapTuple atttuple;
195	Form_pg_attribute attForm;
196	TypeCacheEntry *type;
197
198	if (!IsA(expr, ColumnRef))
199	ereport(ERROR,
200	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201	errmsg("only simple column references are allowed in CREATE STATISTICS")));
202	cref = (ColumnRef *) expr;
203
204	if (list_length(cref->fields) != `1`)
205	ereport(ERROR,
206	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
207	errmsg("only simple column references are allowed in CREATE STATISTICS")));
208	attname = strVal((Value *) linitial(cref->fields));
209
210	atttuple = SearchSysCacheAttName(relid, attname);
211	if (!HeapTupleIsValid(atttuple))
212	ereport(ERROR,
213	(errcode(ERRCODE_UNDEFINED_COLUMN),
214	errmsg("column \"%s\" does not exist",
215	attname)));
216	attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
217
218	/ Disallow use of system attributes in extended stats /
219	if (attForm->attnum <= `0`)
220	ereport(ERROR,
221	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
222	errmsg("statistics creation on system columns is not supported")));
223
224	/ Disallow data types without a less-than operator /
225	type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
226	if (type->lt_opr == InvalidOid)
227	ereport(ERROR,
228	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
229	errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
230	attname, format_type_be(attForm->atttypid))));
231
232	/ Make sure no more than STATS_MAX_DIMENSIONS columns are used /
233	if (numcols >= STATS_MAX_DIMENSIONS)
234	ereport(ERROR,
235	(errcode(ERRCODE_TOO_MANY_COLUMNS),
236	errmsg("cannot have more than %d columns in statistics",
237	STATS_MAX_DIMENSIONS)));
238
239	attnums[numcols] = attForm->attnum;
240	numcols++;
241	ReleaseSysCache(atttuple);
242	}
243
244	/*
245	* Check that at least two columns were specified in the statement. The
246	* upper bound was already checked in the loop above.
247	*/
248	if (numcols < `2`)
249	ereport(ERROR,
250	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
251	errmsg("extended statistics require at least 2 columns")));
252
253	/*
254	* Sort the attnums, which makes detecting duplicates somewhat easier, and
255	* it does not hurt (it does not affect the efficiency, unlike for
256	* indexes, for example).
257	*/
258	qsort(attnums, numcols, sizeof(int16), compare_int16);
259
260	/*
261	* Check for duplicates in the list of columns. The attnums are sorted so
262	* just check consecutive elements.
263	*/
264	for (i = `1`; i < numcols; i++)
265	{
266	if (attnums[i] == attnums[i - `1`])
267	ereport(ERROR,
268	(errcode(ERRCODE_DUPLICATE_COLUMN),
269	errmsg("duplicate column name in statistics definition")));
270	}
271
272	/ Form an int2vector representation of the sorted column list /
273	stxkeys = buildint2vector(attnums, numcols);
274
275	/*
276	* Parse the statistics kinds.
277	*/
278	build_ndistinct = false;
279	build_dependencies = false;
280	build_mcv = false;
281	foreach(cell, stmt->stat_types)
282	{
283	char type = strVal((Value ) lfirst(cell));
284
285	if (strcmp(type, "ndistinct") == `0`)
286	{
287	build_ndistinct = true;
288	requested_type = true;
289	}
290	else if (strcmp(type, "dependencies") == `0`)
291	{
292	build_dependencies = true;
293	requested_type = true;
294	}
295	else if (strcmp(type, "mcv") == `0`)
296	{
297	build_mcv = true;
298	requested_type = true;
299	}
300	else
301	ereport(ERROR,
302	(errcode(ERRCODE_SYNTAX_ERROR),
303	errmsg("unrecognized statistics kind \"%s\"",
304	type)));
305	}
306	/ If no statistic type was specified, build them all. /
307	if (!requested_type)
308	{
309	build_ndistinct = true;
310	build_dependencies = true;
311	build_mcv = true;
312	}
313
314	/ construct the char array of enabled statistic types /
315	ntypes = `0`;
316	if (build_ndistinct)
317	types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
318	if (build_dependencies)
319	types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
320	if (build_mcv)
321	types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
322	Assert(ntypes > `0` && ntypes <= lengthof(types));
323	stxkind = construct_array(types, ntypes, CHAROID, `1`, true, `'c'`);
324
325	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
326
327	/*
328	* Everything seems fine, so let's build the pg_statistic_ext tuple.
329	*/
330	memset(values, `0`, sizeof(values));
331	memset(nulls, false, sizeof(nulls));
332
333	statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
334	Anum_pg_statistic_ext_oid);
335	values[Anum_pg_statistic_ext_oid - `1`] = ObjectIdGetDatum(statoid);
336	values[Anum_pg_statistic_ext_stxrelid - `1`] = ObjectIdGetDatum(relid);
337	values[Anum_pg_statistic_ext_stxname - `1`] = NameGetDatum(&stxname);
338	values[Anum_pg_statistic_ext_stxnamespace - `1`] = ObjectIdGetDatum(namespaceId);
339	values[Anum_pg_statistic_ext_stxowner - `1`] = ObjectIdGetDatum(stxowner);
340	values[Anum_pg_statistic_ext_stxkeys - `1`] = PointerGetDatum(stxkeys);
341	values[Anum_pg_statistic_ext_stxkind - `1`] = PointerGetDatum(stxkind);
342
343	/ insert it into pg_statistic_ext /
344	htup = heap_form_tuple(statrel->rd_att, values, nulls);
345	CatalogTupleInsert(statrel, htup);
346	heap_freetuple(htup);
347
348	relation_close(statrel, RowExclusiveLock);
349
350	/*
351	* Also build the pg_statistic_ext_data tuple, to hold the actual
352	* statistics data.
353	*/
354	datarel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
355
356	memset(datavalues, `0`, sizeof(datavalues));
357	memset(datanulls, false, sizeof(datanulls));
358
359	datavalues[Anum_pg_statistic_ext_data_stxoid - `1`] = ObjectIdGetDatum(statoid);
360
361	/ no statistics built yet /
362	datanulls[Anum_pg_statistic_ext_data_stxdndistinct - `1`] = true;
363	datanulls[Anum_pg_statistic_ext_data_stxddependencies - `1`] = true;
364	datanulls[Anum_pg_statistic_ext_data_stxdmcv - `1`] = true;
365
366	/ insert it into pg_statistic_ext_data /
367	htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
368	CatalogTupleInsert(datarel, htup);
369	heap_freetuple(htup);
370
371	relation_close(datarel, RowExclusiveLock);
372
373	/*
374	* Invalidate relcache so that others see the new statistics object.
375	*/
376	CacheInvalidateRelcache(rel);
377
378	relation_close(rel, NoLock);
379
380	/*
381	* Add an AUTO dependency on each column used in the stats, so that the
382	* stats object goes away if any or all of them get dropped.
383	*/
384	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
385
386	for (i = `0`; i < numcols; i++)
387	{
388	ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
389	recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
390	}
391
392	/*
393	* Also add dependencies on namespace and owner. These are required
394	* because the stats object might have a different namespace and/or owner
395	* than the underlying table(s).
396	*/
397	ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
398	recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
399
400	recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
401
402	/*
403	* XXX probably there should be a recordDependencyOnCurrentExtension call
404	* here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
405	* STATISTICS, which is more work than it seems worth.
406	*/
407
408	/ Add any requested comment /
409	if (stmt->stxcomment != NULL)
410	CreateComments(statoid, StatisticExtRelationId, `0`,
411	stmt->stxcomment);
412
413	/ Return stats object's address /
414	return myself;
415	}
416
417	/*
418	* Guts of statistics object deletion.
419	*/
420	void
421	RemoveStatisticsById(Oid statsOid)
422	{
423	Relation relation;
424	HeapTuple tup;
425	Form_pg_statistic_ext statext;
426	Oid relid;
427
428	/*
429	* First delete the pg_statistic_ext_data tuple holding the actual
430	* statistical data.
431	*/
432	relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
433
434	tup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
435
436	if (!HeapTupleIsValid(tup)) / should not happen /
437	elog(ERROR, "cache lookup failed for statistics data %u", statsOid);
438
439	CatalogTupleDelete(relation, &tup->t_self);
440
441	ReleaseSysCache(tup);
442
443	table_close(relation, RowExclusiveLock);
444
445	/*
446	* Delete the pg_statistic_ext tuple. Also send out a cache inval on the
447	* associated table, so that dependent plans will be rebuilt.
448	*/
449	relation = table_open(StatisticExtRelationId, RowExclusiveLock);
450
451	tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
452
453	if (!HeapTupleIsValid(tup)) / should not happen /
454	elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
455
456	statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
457	relid = statext->stxrelid;
458
459	CacheInvalidateRelcacheByRelid(relid);
460
461	CatalogTupleDelete(relation, &tup->t_self);
462
463	ReleaseSysCache(tup);
464
465	table_close(relation, RowExclusiveLock);
466	}
467
468	/*
469	* Update a statistics object for ALTER COLUMN TYPE on a source column.
470	*
471	* This could throw an error if the type change can't be supported.
472	* If it can be supported, but the stats must be recomputed, a likely choice
473	* would be to set the relevant column(s) of the pg_statistic_ext_data tuple
474	* to null until the next ANALYZE. (Note that the type change hasn't actually
475	* happened yet, so one option that's not on the table is to recompute
476	* immediately.)
477	*
478	* For both ndistinct and functional-dependencies stats, the on-disk
479	* representation is independent of the source column data types, and it is
480	* plausible to assume that the old statistic values will still be good for
481	* the new column contents. (Obviously, if the ALTER COLUMN TYPE has a USING
482	* expression that substantially alters the semantic meaning of the column
483	* values, this assumption could fail. But that seems like a corner case
484	* that doesn't justify zapping the stats in common cases.)
485	*
486	* For MCV lists that's not the case, as those statistics store the datums
487	* internally. In this case we simply reset the statistics value to NULL.
488	*
489	* Note that "type change" includes collation change, which means we can rely
490	* on the MCV list being consistent with the collation info in pg_attribute
491	* during estimation.
492	*/
493	void
494	UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
495	Oid oldColumnType, Oid newColumnType)
496	{
497	HeapTuple stup,
498	oldtup;
499
500	Relation rel;
501
502	Datum values[Natts_pg_statistic_ext_data];
503	bool nulls[Natts_pg_statistic_ext_data];
504	bool replaces[Natts_pg_statistic_ext_data];
505
506	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
507	if (!HeapTupleIsValid(oldtup))
508	elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
509
510	/*
511	* When none of the defined statistics types contain datum values from the
512	* table's columns then there's no need to reset the stats. Functional
513	* dependencies and ndistinct stats should still hold true.
514	*/
515	if (!statext_is_kind_built(oldtup, STATS_EXT_MCV))
516	{
517	ReleaseSysCache(oldtup);
518	return;
519	}
520
521	/*
522	* OK, we need to reset some statistics. So let's build the new tuple,
523	* replacing the affected statistics types with NULL.
524	*/
525	memset(nulls, `0`, Natts_pg_statistic_ext_data * sizeof(bool));
526	memset(replaces, `0`, Natts_pg_statistic_ext_data * sizeof(bool));
527	memset(values, `0`, Natts_pg_statistic_ext_data * sizeof(Datum));
528
529	replaces[Anum_pg_statistic_ext_data_stxdmcv - `1`] = true;
530	nulls[Anum_pg_statistic_ext_data_stxdmcv - `1`] = true;
531
532	rel = heap_open(StatisticExtDataRelationId, RowExclusiveLock);
533
534	/ replace the old tuple /
535	stup = heap_modify_tuple(oldtup,
536	RelationGetDescr(rel),
537	values,
538	nulls,
539	replaces);
540
541	ReleaseSysCache(oldtup);
542	CatalogTupleUpdate(rel, &stup->t_self, stup);
543
544	heap_freetuple(stup);
545
546	heap_close(rel, RowExclusiveLock);
547	}
548
549	/*
550	* Select a nonconflicting name for a new statistics.
551	*
552	* name1, name2, and label are used the same way as for makeObjectName(),
553	* except that the label can't be NULL; digits will be appended to the label
554	* if needed to create a name that is unique within the specified namespace.
555	*
556	* Returns a palloc'd string.
557	*
558	* Note: it is theoretically possible to get a collision anyway, if someone
559	* else chooses the same name concurrently. This is fairly unlikely to be
560	* a problem in practice, especially if one is holding a share update
561	* exclusive lock on the relation identified by name1. However, if choosing
562	* multiple names within a single command, you'd better create the new object
563	* and do CommandCounterIncrement before choosing the next one!
564	*/
565	static char *
566	ChooseExtendedStatisticName(const char name1, const* char *name2,
567	const char *label, Oid namespaceid)
568	{
569	int pass = `0`;
570	char *stxname = NULL;
571	char modlabel[NAMEDATALEN];
572
573	/ try the unmodified label first /
574	StrNCpy(modlabel, label, sizeof(modlabel));
575
576	for (;;)
577	{
578	Oid existingstats;
579
580	stxname = makeObjectName(name1, name2, modlabel);
581
582	existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
583	PointerGetDatum(stxname),
584	ObjectIdGetDatum(namespaceid));
585	if (!OidIsValid(existingstats))
586	break;
587
588	/ found a conflict, so try a new name component /
589	pfree(stxname);
590	snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
591	}
592
593	return stxname;
594	}
595
596	/*
597	* Generate "name2" for a new statistics given the list of column names for it
598	* This will be passed to ChooseExtendedStatisticName along with the parent
599	* table name and a suitable label.
600	*
601	* We know that less than NAMEDATALEN characters will actually be used,
602	* so we can truncate the result once we've generated that many.
603	*
604	* XXX see also ChooseForeignKeyConstraintNameAddition and
605	* ChooseIndexNameAddition.
606	*/
607	static char *
608	ChooseExtendedStatisticNameAddition(List *exprs)
609	{
610	char buf[NAMEDATALEN * `2`];
611	int buflen = `0`;
612	ListCell *lc;
613
614	buf[`0`] = `'\0'`;
615	foreach(lc, exprs)
616	{
617	ColumnRef cref = (ColumnRef ) lfirst(lc);
618	const char *name;
619
620	/ It should be one of these, but just skip if it happens not to be /
621	if (!IsA(cref, ColumnRef))
622	continue;
623
624	name = strVal((Value *) linitial(cref->fields));
625
626	if (buflen > `0`)
627	buf[buflen++] = `'_'`; / insert _ between names /
628
629	/*
630	* At this point we have buflen <= NAMEDATALEN. name should be less
631	* than NAMEDATALEN already, but use strlcpy for paranoia.
632	*/
633	strlcpy(buf + buflen, name, NAMEDATALEN);
634	buflen += strlen(buf + buflen);
635	if (buflen >= NAMEDATALEN)
636	break;
637	}
638	return pstrdup(buf);
639	}
640

Browse the source code of PostgreSQL/src/backend/commands/statscmds.c