indexcmds.c source code [PostgreSQL/src/backend/commands/indexcmds.c]

1	/-------------------------------------------------------------------------*
2	*
3	* indexcmds.c
4	* POSTGRES define and remove index code.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/commands/indexcmds.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15
16	#include "postgres.h"
17
18	#include "access/amapi.h"
19	#include "access/heapam.h"
20	#include "access/htup_details.h"
21	#include "access/reloptions.h"
22	#include "access/sysattr.h"
23	#include "access/tableam.h"
24	#include "access/xact.h"
25	#include "catalog/catalog.h"
26	#include "catalog/index.h"
27	#include "catalog/indexing.h"
28	#include "catalog/pg_am.h"
29	#include "catalog/pg_constraint.h"
30	#include "catalog/pg_inherits.h"
31	#include "catalog/pg_opclass.h"
32	#include "catalog/pg_opfamily.h"
33	#include "catalog/pg_tablespace.h"
34	#include "catalog/pg_type.h"
35	#include "commands/comment.h"
36	#include "commands/dbcommands.h"
37	#include "commands/defrem.h"
38	#include "commands/event_trigger.h"
39	#include "commands/progress.h"
40	#include "commands/tablecmds.h"
41	#include "commands/tablespace.h"
42	#include "mb/pg_wchar.h"
43	#include "miscadmin.h"
44	#include "nodes/makefuncs.h"
45	#include "nodes/nodeFuncs.h"
46	#include "optimizer/optimizer.h"
47	#include "parser/parse_coerce.h"
48	#include "parser/parse_func.h"
49	#include "parser/parse_oper.h"
50	#include "partitioning/partdesc.h"
51	#include "pgstat.h"
52	#include "rewrite/rewriteManip.h"
53	#include "storage/lmgr.h"
54	#include "storage/proc.h"
55	#include "storage/procarray.h"
56	#include "storage/sinvaladt.h"
57	#include "utils/acl.h"
58	#include "utils/builtins.h"
59	#include "utils/fmgroids.h"
60	#include "utils/inval.h"
61	#include "utils/lsyscache.h"
62	#include "utils/memutils.h"
63	#include "utils/partcache.h"
64	#include "utils/pg_rusage.h"
65	#include "utils/regproc.h"
66	#include "utils/snapmgr.h"
67	#include "utils/syscache.h"
68
69
70	/ non-export function prototypes /
71	static void CheckPredicate(Expr *predicate);
72	static void ComputeIndexAttrs(IndexInfo *indexInfo,
73	Oid *typeOidP,
74	Oid *collationOidP,
75	Oid *classOidP,
76	int16 *colOptionP,
77	List *attList,
78	List *exclusionOpNames,
79	Oid relId,
80	const char *accessMethodName, Oid accessMethodId,
81	bool amcanorder,
82	bool isconstraint);
83	static char ChooseIndexName(const* char *tabname, Oid namespaceId,
84	List colnames, List exclusionOpNames,
85	bool primary, bool isconstraint);
86	static char ChooseIndexNameAddition(List colnames);
87	static List ChooseIndexColumnNames(List indexElems);
88	static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
89	Oid relId, Oid oldRelId, void *arg);
90	static bool ReindexRelationConcurrently(Oid relationOid, int options);
91	static void ReindexPartitionedIndex(Relation parentIdx);
92	static void update_relispartition(Oid relationId, bool newval);
93
94	/*
95	* callback argument type for RangeVarCallbackForReindexIndex()
96	*/
97	struct ReindexIndexCallbackState
98	{
99	bool concurrent; / flag from statement /
100	Oid locked_table_oid; / tracks previously locked table /
101	};
102
103	/*
104	* CheckIndexCompatible
105	* Determine whether an existing index definition is compatible with a
106	* prospective index definition, such that the existing index storage
107	* could become the storage of the new index, avoiding a rebuild.
108	*
109	* 'heapRelation': the relation the index would apply to.
110	* 'accessMethodName': name of the AM to use.
111	* 'attributeList': a list of IndexElem specifying columns and expressions
112	* to index on.
113	* 'exclusionOpNames': list of names of exclusion-constraint operators,
114	* or NIL if not an exclusion constraint.
115	*
116	* This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
117	* any indexes that depended on a changing column from their pg_get_indexdef
118	* or pg_get_constraintdef definitions. We omit some of the sanity checks of
119	* DefineIndex. We assume that the old and new indexes have the same number
120	* of columns and that if one has an expression column or predicate, both do.
121	* Errors arising from the attribute list still apply.
122	*
123	* Most column type changes that can skip a table rewrite do not invalidate
124	* indexes. We acknowledge this when all operator classes, collations and
125	* exclusion operators match. Though we could further permit intra-opfamily
126	* changes for btree and hash indexes, that adds subtle complexity with no
127	* concrete benefit for core types. Note, that INCLUDE columns aren't
128	* checked by this function, for them it's enough that table rewrite is
129	* skipped.
130	*
131	* When a comparison or exclusion operator has a polymorphic input type, the
132	* actual input types must also match. This defends against the possibility
133	* that operators could vary behavior in response to get_fn_expr_argtype().
134	* At present, this hazard is theoretical: check_exclusion_constraint() and
135	* all core index access methods decline to set fn_expr for such calls.
136	*
137	* We do not yet implement a test to verify compatibility of expression
138	* columns or predicates, so assume any such index is incompatible.
139	*/
140	bool
141	CheckIndexCompatible(Oid oldId,
142	const char *accessMethodName,
143	List *attributeList,
144	List *exclusionOpNames)
145	{
146	bool isconstraint;
147	Oid *typeObjectId;
148	Oid *collationObjectId;
149	Oid *classObjectId;
150	Oid accessMethodId;
151	Oid relationId;
152	HeapTuple tuple;
153	Form_pg_index indexForm;
154	Form_pg_am accessMethodForm;
155	IndexAmRoutine *amRoutine;
156	bool amcanorder;
157	int16 *coloptions;
158	IndexInfo *indexInfo;
159	int numberOfAttributes;
160	int old_natts;
161	bool isnull;
162	bool ret = true;
163	oidvector *old_indclass;
164	oidvector *old_indcollation;
165	Relation irel;
166	int i;
167	Datum d;
168
169	/ Caller should already have the relation locked in some way. /
170	relationId = IndexGetRelation(oldId, false);
171
172	/*
173	* We can pretend isconstraint = false unconditionally. It only serves to
174	* decide the text of an error message that should never happen for us.
175	*/
176	isconstraint = false;
177
178	numberOfAttributes = list_length(attributeList);
179	Assert(numberOfAttributes > `0`);
180	Assert(numberOfAttributes <= INDEX_MAX_KEYS);
181
182	/ look up the access method /
183	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
184	if (!HeapTupleIsValid(tuple))
185	ereport(ERROR,
186	(errcode(ERRCODE_UNDEFINED_OBJECT),
187	errmsg("access method \"%s\" does not exist",
188	accessMethodName)));
189	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
190	accessMethodId = accessMethodForm->oid;
191	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
192	ReleaseSysCache(tuple);
193
194	amcanorder = amRoutine->amcanorder;
195
196	/*
197	* Compute the operator classes, collations, and exclusion operators for
198	* the new index, so we can test whether it's compatible with the existing
199	* one. Note that ComputeIndexAttrs might fail here, but that's OK:
200	* DefineIndex would have called this function with the same arguments
201	* later on, and it would have failed then anyway. Our attributeList
202	* contains only key attributes, thus we're filling ii_NumIndexAttrs and
203	* ii_NumIndexKeyAttrs with same value.
204	*/
205	indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
206	accessMethodId, NIL, NIL, false, false, false);
207	typeObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
208	collationObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
209	classObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
210	coloptions = (int16 ) palloc(numberOfAttributes sizeof(int16));
211	ComputeIndexAttrs(indexInfo,
212	typeObjectId, collationObjectId, classObjectId,
213	coloptions, attributeList,
214	exclusionOpNames, relationId,
215	accessMethodName, accessMethodId,
216	amcanorder, isconstraint);
217
218
219	/ Get the soon-obsolete pg_index tuple. /
220	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
221	if (!HeapTupleIsValid(tuple))
222	elog(ERROR, "cache lookup failed for index %u", oldId);
223	indexForm = (Form_pg_index) GETSTRUCT(tuple);
224
225	/*
226	* We don't assess expressions or predicates; assume incompatibility.
227	* Also, if the index is invalid for any reason, treat it as incompatible.
228	*/
229	if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
230	heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
231	indexForm->indisvalid))
232	{
233	ReleaseSysCache(tuple);
234	return false;
235	}
236
237	/ Any change in operator class or collation breaks compatibility. /
238	old_natts = indexForm->indnkeyatts;
239	Assert(old_natts == numberOfAttributes);
240
241	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
242	Assert(!isnull);
243	old_indcollation = (oidvector *) DatumGetPointer(d);
244
245	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
246	Assert(!isnull);
247	old_indclass = (oidvector *) DatumGetPointer(d);
248
249	ret = (memcmp(old_indclass->values, classObjectId,
250	old_natts * sizeof(Oid)) == `0` &&
251	memcmp(old_indcollation->values, collationObjectId,
252	old_natts * sizeof(Oid)) == `0`);
253
254	ReleaseSysCache(tuple);
255
256	if (!ret)
257	return false;
258
259	/ For polymorphic opcintype, column type changes break compatibility. /
260	irel = index_open(oldId, AccessShareLock); / caller probably has a lock /
261	for (i = `0`; i < old_natts; i++)
262	{
263	if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
264	TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
265	{
266	ret = false;
267	break;
268	}
269	}
270
271	/ Any change in exclusion operator selections breaks compatibility. /
272	if (ret && indexInfo->ii_ExclusionOps != NULL)
273	{
274	Oid *old_operators,
275	*old_procs;
276	uint16 *old_strats;
277
278	RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
279	ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
280	old_natts * sizeof(Oid)) == `0`;
281
282	/ Require an exact input type match for polymorphic operators. /
283	if (ret)
284	{
285	for (i = `0`; i < old_natts && ret; i++)
286	{
287	Oid left,
288	right;
289
290	op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
291	if ((IsPolymorphicType(left) \|\| IsPolymorphicType(right)) &&
292	TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
293	{
294	ret = false;
295	break;
296	}
297	}
298	}
299	}
300
301	index_close(irel, NoLock);
302	return ret;
303	}
304
305
306	/*
307	* WaitForOlderSnapshots
308	*
309	* Wait for transactions that might have an older snapshot than the given xmin
310	* limit, because it might not contain tuples deleted just before it has
311	* been taken. Obtain a list of VXIDs of such transactions, and wait for them
312	* individually. This is used when building an index concurrently.
313	*
314	* We can exclude any running transactions that have xmin > the xmin given;
315	* their oldest snapshot must be newer than our xmin limit.
316	* We can also exclude any transactions that have xmin = zero, since they
317	* evidently have no live snapshot at all (and any one they might be in
318	* process of taking is certainly newer than ours). Transactions in other
319	* DBs can be ignored too, since they'll never even be able to see the
320	* index being worked on.
321	*
322	* We can also exclude autovacuum processes and processes running manual
323	* lazy VACUUMs, because they won't be fazed by missing index entries
324	* either. (Manual ANALYZEs, however, can't be excluded because they
325	* might be within transactions that are going to do arbitrary operations
326	* later.)
327	*
328	* Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
329	* check for that.
330	*
331	* If a process goes idle-in-transaction with xmin zero, we do not need to
332	* wait for it anymore, per the above argument. We do not have the
333	* infrastructure right now to stop waiting if that happens, but we can at
334	* least avoid the folly of waiting when it is idle at the time we would
335	* begin to wait. We do this by repeatedly rechecking the output of
336	* GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
337	* doesn't show up in the output, we know we can forget about it.
338	*/
339	static void
340	WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
341	{
342	int n_old_snapshots;
343	int i;
344	VirtualTransactionId *old_snapshots;
345
346	old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
347	PROC_IS_AUTOVACUUM \| PROC_IN_VACUUM,
348	&n_old_snapshots);
349	if (progress)
350	pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
351
352	for (i = `0`; i < n_old_snapshots; i++)
353	{
354	if (!VirtualTransactionIdIsValid(old_snapshots[i]))
355	continue; / found uninteresting in previous cycle /
356
357	if (i > `0`)
358	{
359	/ see if anything's changed ... /
360	VirtualTransactionId *newer_snapshots;
361	int n_newer_snapshots;
362	int j;
363	int k;
364
365	newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
366	true, false,
367	PROC_IS_AUTOVACUUM \| PROC_IN_VACUUM,
368	&n_newer_snapshots);
369	for (j = i; j < n_old_snapshots; j++)
370	{
371	if (!VirtualTransactionIdIsValid(old_snapshots[j]))
372	continue; / found uninteresting in previous cycle /
373	for (k = `0`; k < n_newer_snapshots; k++)
374	{
375	if (VirtualTransactionIdEquals(old_snapshots[j],
376	newer_snapshots[k]))
377	break;
378	}
379	if (k >= n_newer_snapshots) / not there anymore /
380	SetInvalidVirtualTransactionId(old_snapshots[j]);
381	}
382	pfree(newer_snapshots);
383	}
384
385	if (VirtualTransactionIdIsValid(old_snapshots[i]))
386	{
387	if (progress)
388	{
389	PGPROC *holder = BackendIdGetProc(old_snapshots[i].backendId);
390
391	pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
392	holder->pid);
393	}
394	VirtualXactLock(old_snapshots[i], true);
395	}
396
397	if (progress)
398	pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + `1`);
399	}
400	}
401
402
403	/*
404	* DefineIndex
405	* Creates a new index.
406	*
407	* 'relationId': the OID of the heap relation on which the index is to be
408	* created
409	* 'stmt': IndexStmt describing the properties of the new index.
410	* 'indexRelationId': normally InvalidOid, but during bootstrap can be
411	* nonzero to specify a preselected OID for the index.
412	* 'parentIndexId': the OID of the parent index; InvalidOid if not the child
413	* of a partitioned index.
414	* 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
415	* the child of a constraint (only used when recursing)
416	* 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
417	* 'check_rights': check for CREATE rights in namespace and tablespace. (This
418	* should be true except when ALTER is deleting/recreating an index.)
419	* 'check_not_in_use': check for table not already in use in current session.
420	* This should be true unless caller is holding the table open, in which
421	* case the caller had better have checked it earlier.
422	* 'skip_build': make the catalog entries but don't create the index files
423	* 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
424	*
425	* Returns the object address of the created index.
426	*/
427	ObjectAddress
428	DefineIndex(Oid relationId,
429	IndexStmt *stmt,
430	Oid indexRelationId,
431	Oid parentIndexId,
432	Oid parentConstraintId,
433	bool is_alter_table,
434	bool check_rights,
435	bool check_not_in_use,
436	bool skip_build,
437	bool quiet)
438	{
439	char *indexRelationName;
440	char *accessMethodName;
441	Oid *typeObjectId;
442	Oid *collationObjectId;
443	Oid *classObjectId;
444	Oid accessMethodId;
445	Oid namespaceId;
446	Oid tablespaceId;
447	Oid createdConstraintId = InvalidOid;
448	List *indexColNames;
449	List *allIndexParams;
450	Relation rel;
451	HeapTuple tuple;
452	Form_pg_am accessMethodForm;
453	IndexAmRoutine *amRoutine;
454	bool amcanorder;
455	amoptions_function amoptions;
456	bool partitioned;
457	Datum reloptions;
458	int16 *coloptions;
459	IndexInfo *indexInfo;
460	bits16 flags;
461	bits16 constr_flags;
462	int numberOfAttributes;
463	int numberOfKeyAttributes;
464	TransactionId limitXmin;
465	ObjectAddress address;
466	LockRelId heaprelid;
467	LOCKTAG heaplocktag;
468	LOCKMODE lockmode;
469	Snapshot snapshot;
470	int save_nestlevel = -`1`;
471	int i;
472
473	/*
474	* Some callers need us to run with an empty default_tablespace; this is a
475	* necessary hack to be able to reproduce catalog state accurately when
476	* recreating indexes after table-rewriting ALTER TABLE.
477	*/
478	if (stmt->reset_default_tblspc)
479	{
480	save_nestlevel = NewGUCNestLevel();
481	(void) set_config_option("default_tablespace", "",
482	PGC_USERSET, PGC_S_SESSION,
483	GUC_ACTION_SAVE, true, `0`, false);
484	}
485
486	/*
487	* Start progress report. If we're building a partition, this was already
488	* done.
489	*/
490	if (!OidIsValid(parentIndexId))
491	{
492	pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
493	relationId);
494	pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
495	stmt->concurrent ?
496	PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
497	PROGRESS_CREATEIDX_COMMAND_CREATE);
498	}
499
500	/*
501	* No index OID to report yet
502	*/
503	pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
504	InvalidOid);
505
506	/*
507	* count key attributes in index
508	*/
509	numberOfKeyAttributes = list_length(stmt->indexParams);
510
511	/*
512	* Calculate the new list of index columns including both key columns and
513	* INCLUDE columns. Later we can determine which of these are key
514	* columns, and which are just part of the INCLUDE list by checking the
515	* list position. A list item in a position less than ii_NumIndexKeyAttrs
516	* is part of the key columns, and anything equal to and over is part of
517	* the INCLUDE columns.
518	*/
519	allIndexParams = list_concat(list_copy(stmt->indexParams),
520	list_copy(stmt->indexIncludingParams));
521	numberOfAttributes = list_length(allIndexParams);
522
523	if (numberOfAttributes <= `0`)
524	ereport(ERROR,
525	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
526	errmsg("must specify at least one column")));
527	if (numberOfAttributes > INDEX_MAX_KEYS)
528	ereport(ERROR,
529	(errcode(ERRCODE_TOO_MANY_COLUMNS),
530	errmsg("cannot use more than %d columns in an index",
531	INDEX_MAX_KEYS)));
532
533	/*
534	* Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
535	* index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
536	* (but not VACUUM).
537	*
538	* NB: Caller is responsible for making sure that relationId refers to the
539	* relation on which the index should be built; except in bootstrap mode,
540	* this will typically require the caller to have already locked the
541	* relation. To avoid lock upgrade hazards, that lock should be at least
542	* as strong as the one we take here.
543	*
544	* NB: If the lock strength here ever changes, code that is run by
545	* parallel workers under the control of certain particular ambuild
546	* functions will need to be updated, too.
547	*/
548	lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
549	rel = table_open(relationId, lockmode);
550
551	namespaceId = RelationGetNamespace(rel);
552
553	/ Ensure that it makes sense to index this kind of relation /
554	switch (rel->rd_rel->relkind)
555	{
556	case RELKIND_RELATION:
557	case RELKIND_MATVIEW:
558	case RELKIND_PARTITIONED_TABLE:
559	/ OK /
560	break;
561	case RELKIND_FOREIGN_TABLE:
562
563	/*
564	* Custom error message for FOREIGN TABLE since the term is close
565	* to a regular table and can confuse the user.
566	*/
567	ereport(ERROR,
568	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
569	errmsg("cannot create index on foreign table \"%s\"",
570	RelationGetRelationName(rel))));
571	break;
572	default:
573	ereport(ERROR,
574	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
575	errmsg("\"%s\" is not a table or materialized view",
576	RelationGetRelationName(rel))));
577	break;
578	}
579
580	/*
581	* Establish behavior for partitioned tables, and verify sanity of
582	* parameters.
583	*
584	* We do not build an actual index in this case; we only create a few
585	* catalog entries. The actual indexes are built by recursing for each
586	* partition.
587	*/
588	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
589	if (partitioned)
590	{
591	if (stmt->concurrent)
592	ereport(ERROR,
593	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
594	errmsg("cannot create index on partitioned table \"%s\" concurrently",
595	RelationGetRelationName(rel))));
596	if (stmt->excludeOpNames)
597	ereport(ERROR,
598	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
599	errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
600	RelationGetRelationName(rel))));
601	}
602
603	/*
604	* Don't try to CREATE INDEX on temp tables of other backends.
605	*/
606	if (RELATION_IS_OTHER_TEMP(rel))
607	ereport(ERROR,
608	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
609	errmsg("cannot create indexes on temporary tables of other sessions")));
610
611	/*
612	* Unless our caller vouches for having checked this already, insist that
613	* the table not be in use by our own session, either. Otherwise we might
614	* fail to make entries in the new index (for instance, if an INSERT or
615	* UPDATE is in progress and has already made its list of target indexes).
616	*/
617	if (check_not_in_use)
618	CheckTableNotInUse(rel, "CREATE INDEX");
619
620	/*
621	* Verify we (still) have CREATE rights in the rel's namespace.
622	* (Presumably we did when the rel was created, but maybe not anymore.)
623	* Skip check if caller doesn't want it. Also skip check if
624	* bootstrapping, since permissions machinery may not be working yet.
625	*/
626	if (check_rights && !IsBootstrapProcessingMode())
627	{
628	AclResult aclresult;
629
630	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
631	ACL_CREATE);
632	if (aclresult != ACLCHECK_OK)
633	aclcheck_error(aclresult, OBJECT_SCHEMA,
634	get_namespace_name(namespaceId));
635	}
636
637	/*
638	* Select tablespace to use. If not specified, use default tablespace
639	* (which may in turn default to database's default).
640	*/
641	if (stmt->tableSpace)
642	{
643	tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
644	if (partitioned && tablespaceId == MyDatabaseTableSpace)
645	ereport(ERROR,
646	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
647	errmsg("cannot specify default tablespace for partitioned relations")));
648	}
649	else
650	{
651	tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
652	partitioned);
653	/ note InvalidOid is OK in this case /
654	}
655
656	/ Check tablespace permissions /
657	if (check_rights &&
658	OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
659	{
660	AclResult aclresult;
661
662	aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
663	ACL_CREATE);
664	if (aclresult != ACLCHECK_OK)
665	aclcheck_error(aclresult, OBJECT_TABLESPACE,
666	get_tablespace_name(tablespaceId));
667	}
668
669	/*
670	* Force shared indexes into the pg_global tablespace. This is a bit of a
671	* hack but seems simpler than marking them in the BKI commands. On the
672	* other hand, if it's not shared, don't allow it to be placed there.
673	*/
674	if (rel->rd_rel->relisshared)
675	tablespaceId = GLOBALTABLESPACE_OID;
676	else if (tablespaceId == GLOBALTABLESPACE_OID)
677	ereport(ERROR,
678	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
679	errmsg("only shared relations can be placed in pg_global tablespace")));
680
681	/*
682	* Choose the index column names.
683	*/
684	indexColNames = ChooseIndexColumnNames(allIndexParams);
685
686	/*
687	* Select name for index if caller didn't specify
688	*/
689	indexRelationName = stmt->idxname;
690	if (indexRelationName == NULL)
691	indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
692	namespaceId,
693	indexColNames,
694	stmt->excludeOpNames,
695	stmt->primary,
696	stmt->isconstraint);
697
698	/*
699	* look up the access method, verify it can handle the requested features
700	*/
701	accessMethodName = stmt->accessMethod;
702	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
703	if (!HeapTupleIsValid(tuple))
704	{
705	/*
706	* Hack to provide more-or-less-transparent updating of old RTREE
707	* indexes to GiST: if RTREE is requested and not found, use GIST.
708	*/
709	if (strcmp(accessMethodName, "rtree") == `0`)
710	{
711	ereport(NOTICE,
712	(errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
713	accessMethodName = "gist";
714	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
715	}
716
717	if (!HeapTupleIsValid(tuple))
718	ereport(ERROR,
719	(errcode(ERRCODE_UNDEFINED_OBJECT),
720	errmsg("access method \"%s\" does not exist",
721	accessMethodName)));
722	}
723	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
724	accessMethodId = accessMethodForm->oid;
725	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
726
727	pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
728	accessMethodId);
729
730	if (stmt->unique && !amRoutine->amcanunique)
731	ereport(ERROR,
732	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
733	errmsg("access method \"%s\" does not support unique indexes",
734	accessMethodName)));
735	if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
736	ereport(ERROR,
737	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
738	errmsg("access method \"%s\" does not support included columns",
739	accessMethodName)));
740	if (numberOfAttributes > `1` && !amRoutine->amcanmulticol)
741	ereport(ERROR,
742	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
743	errmsg("access method \"%s\" does not support multicolumn indexes",
744	accessMethodName)));
745	if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
746	ereport(ERROR,
747	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
748	errmsg("access method \"%s\" does not support exclusion constraints",
749	accessMethodName)));
750
751	amcanorder = amRoutine->amcanorder;
752	amoptions = amRoutine->amoptions;
753
754	pfree(amRoutine);
755	ReleaseSysCache(tuple);
756
757	/*
758	* Validate predicate, if given
759	*/
760	if (stmt->whereClause)
761	CheckPredicate((Expr *) stmt->whereClause);
762
763	/*
764	* Parse AM-specific options, convert to text array form, validate.
765	*/
766	reloptions = transformRelOptions((Datum) `0`, stmt->options,
767	NULL, NULL, false, false);
768
769	(void) index_reloptions(amoptions, reloptions, true);
770
771	/*
772	* Prepare arguments for index_create, primarily an IndexInfo structure.
773	* Note that predicates must be in implicit-AND format. In a concurrent
774	* build, mark it not-ready-for-inserts.
775	*/
776	indexInfo = makeIndexInfo(numberOfAttributes,
777	numberOfKeyAttributes,
778	accessMethodId,
779	NIL, / expressions, NIL for now /
780	make_ands_implicit((Expr *) stmt->whereClause),
781	stmt->unique,
782	!stmt->concurrent,
783	stmt->concurrent);
784
785	typeObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
786	collationObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
787	classObjectId = (Oid ) palloc(numberOfAttributes sizeof(Oid));
788	coloptions = (int16 ) palloc(numberOfAttributes sizeof(int16));
789	ComputeIndexAttrs(indexInfo,
790	typeObjectId, collationObjectId, classObjectId,
791	coloptions, allIndexParams,
792	stmt->excludeOpNames, relationId,
793	accessMethodName, accessMethodId,
794	amcanorder, stmt->isconstraint);
795
796	/*
797	* Extra checks when creating a PRIMARY KEY index.
798	*/
799	if (stmt->primary)
800	index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
801
802	/*
803	* If this table is partitioned and we're creating a unique index or a
804	* primary key, make sure that the indexed columns are part of the
805	* partition key. Otherwise it would be possible to violate uniqueness by
806	* putting values that ought to be unique in different partitions.
807	*
808	* We could lift this limitation if we had global indexes, but those have
809	* their own problems, so this is a useful feature combination.
810	*/
811	if (partitioned && (stmt->unique \|\| stmt->primary))
812	{
813	PartitionKey key = rel->rd_partkey;
814	int i;
815
816	/*
817	* A partitioned table can have unique indexes, as long as all the
818	* columns in the partition key appear in the unique key. A
819	* partition-local index can enforce global uniqueness iff the PK
820	* value completely determines the partition that a row is in.
821	*
822	* Thus, verify that all the columns in the partition key appear in
823	* the unique key definition.
824	*/
825	for (i = `0`; i < key->partnatts; i++)
826	{
827	bool found = false;
828	int j;
829	const char *constraint_type;
830
831	if (stmt->primary)
832	constraint_type = "PRIMARY KEY";
833	else if (stmt->unique)
834	constraint_type = "UNIQUE";
835	else if (stmt->excludeOpNames != NIL)
836	constraint_type = "EXCLUDE";
837	else
838	{
839	elog(ERROR, "unknown constraint type");
840	constraint_type = NULL; / keep compiler quiet /
841	}
842
843	/*
844	* It may be possible to support UNIQUE constraints when partition
845	* keys are expressions, but is it worth it? Give up for now.
846	*/
847	if (key->partattrs[i] == `0`)
848	ereport(ERROR,
849	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
850	errmsg("unsupported %s constraint with partition key definition",
851	constraint_type),
852	errdetail("%s constraints cannot be used when partition keys include expressions.",
853	constraint_type)));
854
855	for (j = `0`; j < indexInfo->ii_NumIndexKeyAttrs; j++)
856	{
857	if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
858	{
859	found = true;
860	break;
861	}
862	}
863	if (!found)
864	{
865	Form_pg_attribute att;
866
867	att = TupleDescAttr(RelationGetDescr(rel), key->partattrs[i] - `1`);
868	ereport(ERROR,
869	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
870	errmsg("insufficient columns in %s constraint definition",
871	constraint_type),
872	errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
873	constraint_type, RelationGetRelationName(rel),
874	NameStr(att->attname))));
875	}
876	}
877	}
878
879
880	/*
881	* We disallow indexes on system columns. They would not necessarily get
882	* updated correctly, and they don't seem useful anyway.
883	*/
884	for (i = `0`; i < indexInfo->ii_NumIndexAttrs; i++)
885	{
886	AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
887
888	if (attno < `0`)
889	ereport(ERROR,
890	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
891	errmsg("index creation on system columns is not supported")));
892	}
893
894	/*
895	* Also check for system columns used in expressions or predicates.
896	*/
897	if (indexInfo->ii_Expressions \|\| indexInfo->ii_Predicate)
898	{
899	Bitmapset *indexattrs = NULL;
900
901	pull_varattnos((Node *) indexInfo->ii_Expressions, `1`, &indexattrs);
902	pull_varattnos((Node *) indexInfo->ii_Predicate, `1`, &indexattrs);
903
904	for (i = FirstLowInvalidHeapAttributeNumber + `1`; i < `0`; i++)
905	{
906	if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
907	indexattrs))
908	ereport(ERROR,
909	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
910	errmsg("index creation on system columns is not supported")));
911	}
912	}
913
914	/*
915	* Report index creation if appropriate (delay this till after most of the
916	* error checks)
917	*/
918	if (stmt->isconstraint && !quiet)
919	{
920	const char *constraint_type;
921
922	if (stmt->primary)
923	constraint_type = "PRIMARY KEY";
924	else if (stmt->unique)
925	constraint_type = "UNIQUE";
926	else if (stmt->excludeOpNames != NIL)
927	constraint_type = "EXCLUDE";
928	else
929	{
930	elog(ERROR, "unknown constraint type");
931	constraint_type = NULL; / keep compiler quiet /
932	}
933
934	ereport(DEBUG1,
935	(errmsg("%s %s will create implicit index \"%s\" for table \"%s\"",
936	is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
937	constraint_type,
938	indexRelationName, RelationGetRelationName(rel))));
939	}
940
941	/*
942	* A valid stmt->oldNode implies that we already have a built form of the
943	* index. The caller should also decline any index build.
944	*/
945	Assert(!OidIsValid(stmt->oldNode) \|\| (skip_build && !stmt->concurrent));
946
947	/*
948	* Make the catalog entries for the index, including constraints. This
949	* step also actually builds the index, except if caller requested not to
950	* or in concurrent mode, in which case it'll be done later, or doing a
951	* partitioned index (because those don't have storage).
952	*/
953	flags = constr_flags = `0`;
954	if (stmt->isconstraint)
955	flags \|= INDEX_CREATE_ADD_CONSTRAINT;
956	if (skip_build \|\| stmt->concurrent \|\| partitioned)
957	flags \|= INDEX_CREATE_SKIP_BUILD;
958	if (stmt->if_not_exists)
959	flags \|= INDEX_CREATE_IF_NOT_EXISTS;
960	if (stmt->concurrent)
961	flags \|= INDEX_CREATE_CONCURRENT;
962	if (partitioned)
963	flags \|= INDEX_CREATE_PARTITIONED;
964	if (stmt->primary)
965	flags \|= INDEX_CREATE_IS_PRIMARY;
966
967	/*
968	* If the table is partitioned, and recursion was declined but partitions
969	* exist, mark the index as invalid.
970	*/
971	if (partitioned && stmt->relation && !stmt->relation->inh)
972	{
973	PartitionDesc pd = RelationGetPartitionDesc(rel);
974
975	if (pd->nparts != `0`)
976	flags \|= INDEX_CREATE_INVALID;
977	}
978
979	if (stmt->deferrable)
980	constr_flags \|= INDEX_CONSTR_CREATE_DEFERRABLE;
981	if (stmt->initdeferred)
982	constr_flags \|= INDEX_CONSTR_CREATE_INIT_DEFERRED;
983
984	indexRelationId =
985	index_create(rel, indexRelationName, indexRelationId, parentIndexId,
986	parentConstraintId,
987	stmt->oldNode, indexInfo, indexColNames,
988	accessMethodId, tablespaceId,
989	collationObjectId, classObjectId,
990	coloptions, reloptions,
991	flags, constr_flags,
992	allowSystemTableMods, !check_rights,
993	&createdConstraintId);
994
995	ObjectAddressSet(address, RelationRelationId, indexRelationId);
996
997	/*
998	* Revert to original default_tablespace. Must do this before any return
999	* from this function, but after index_create, so this is a good time.
1000	*/
1001	if (save_nestlevel >= `0`)
1002	AtEOXact_GUC(true, save_nestlevel);
1003
1004	if (!OidIsValid(indexRelationId))
1005	{
1006	table_close(rel, NoLock);
1007
1008	/ If this is the top-level index, we're done /
1009	if (!OidIsValid(parentIndexId))
1010	pgstat_progress_end_command();
1011
1012	return address;
1013	}
1014
1015	/ Add any requested comment /
1016	if (stmt->idxcomment != NULL)
1017	CreateComments(indexRelationId, RelationRelationId, `0`,
1018	stmt->idxcomment);
1019
1020	if (partitioned)
1021	{
1022	/*
1023	* Unless caller specified to skip this step (via ONLY), process each
1024	* partition to make sure they all contain a corresponding index.
1025	*
1026	* If we're called internally (no stmt->relation), recurse always.
1027	*/
1028	if (!stmt->relation \|\| stmt->relation->inh)
1029	{
1030	PartitionDesc partdesc = RelationGetPartitionDesc(rel);
1031	int nparts = partdesc->nparts;
1032	Oid part_oids = palloc(sizeof(Oid) nparts);
1033	bool invalidate_parent = false;
1034	TupleDesc parentDesc;
1035	Oid *opfamOids;
1036
1037	pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
1038	nparts);
1039
1040	memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
1041
1042	parentDesc = RelationGetDescr(rel);
1043	opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
1044	for (i = `0`; i < numberOfKeyAttributes; i++)
1045	opfamOids[i] = get_opclass_family(classObjectId[i]);
1046
1047	/*
1048	* For each partition, scan all existing indexes; if one matches
1049	* our index definition and is not already attached to some other
1050	* parent index, attach it to the one we just created.
1051	*
1052	* If none matches, build a new index by calling ourselves
1053	* recursively with the same options (except for the index name).
1054	*/
1055	for (i = `0`; i < nparts; i++)
1056	{
1057	Oid childRelid = part_oids[i];
1058	Relation childrel;
1059	List *childidxs;
1060	ListCell *cell;
1061	AttrNumber *attmap;
1062	bool found = false;
1063	int maplen;
1064
1065	childrel = table_open(childRelid, lockmode);
1066
1067	/*
1068	* Don't try to create indexes on foreign tables, though. Skip
1069	* those if a regular index, or fail if trying to create a
1070	* constraint index.
1071	*/
1072	if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1073	{
1074	if (stmt->unique \|\| stmt->primary)
1075	ereport(ERROR,
1076	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1077	errmsg("cannot create unique index on partitioned table \"%s\"",
1078	RelationGetRelationName(rel)),
1079	errdetail("Table \"%s\" contains partitions that are foreign tables.",
1080	RelationGetRelationName(rel))));
1081
1082	table_close(childrel, lockmode);
1083	continue;
1084	}
1085
1086	childidxs = RelationGetIndexList(childrel);
1087	attmap =
1088	convert_tuples_by_name_map(RelationGetDescr(childrel),
1089	parentDesc,
1090	gettext_noop("could not convert row type"));
1091	maplen = parentDesc->natts;
1092
1093	foreach(cell, childidxs)
1094	{
1095	Oid cldidxid = lfirst_oid(cell);
1096	Relation cldidx;
1097	IndexInfo *cldIdxInfo;
1098
1099	/ this index is already partition of another one /
1100	if (has_superclass(cldidxid))
1101	continue;
1102
1103	cldidx = index_open(cldidxid, lockmode);
1104	cldIdxInfo = BuildIndexInfo(cldidx);
1105	if (CompareIndexInfo(cldIdxInfo, indexInfo,
1106	cldidx->rd_indcollation,
1107	collationObjectId,
1108	cldidx->rd_opfamily,
1109	opfamOids,
1110	attmap, maplen))
1111	{
1112	Oid cldConstrOid = InvalidOid;
1113
1114	/*
1115	* Found a match.
1116	*
1117	* If this index is being created in the parent
1118	* because of a constraint, then the child needs to
1119	* have a constraint also, so look for one. If there
1120	* is no such constraint, this index is no good, so
1121	* keep looking.
1122	*/
1123	if (createdConstraintId != InvalidOid)
1124	{
1125	cldConstrOid =
1126	get_relation_idx_constraint_oid(childRelid,
1127	cldidxid);
1128	if (cldConstrOid == InvalidOid)
1129	{
1130	index_close(cldidx, lockmode);
1131	continue;
1132	}
1133	}
1134
1135	/ Attach index to parent and we're done. /
1136	IndexSetParentIndex(cldidx, indexRelationId);
1137	if (createdConstraintId != InvalidOid)
1138	ConstraintSetParentConstraint(cldConstrOid,
1139	createdConstraintId,
1140	childRelid);
1141
1142	if (!cldidx->rd_index->indisvalid)
1143	invalidate_parent = true;
1144
1145	found = true;
1146	/ keep lock till commit /
1147	index_close(cldidx, NoLock);
1148	break;
1149	}
1150
1151	index_close(cldidx, lockmode);
1152	}
1153
1154	list_free(childidxs);
1155	table_close(childrel, NoLock);
1156
1157	/*
1158	* If no matching index was found, create our own.
1159	*/
1160	if (!found)
1161	{
1162	IndexStmt *childStmt = copyObject(stmt);
1163	bool found_whole_row;
1164	ListCell *lc;
1165
1166	/*
1167	* We can't use the same index name for the child index,
1168	* so clear idxname to let the recursive invocation choose
1169	* a new name. Likewise, the existing target relation
1170	* field is wrong, and if indexOid or oldNode are set,
1171	* they mustn't be applied to the child either.
1172	*/
1173	childStmt->idxname = NULL;
1174	childStmt->relation = NULL;
1175	childStmt->indexOid = InvalidOid;
1176	childStmt->oldNode = InvalidOid;
1177
1178	/*
1179	* Adjust any Vars (both in expressions and in the index's
1180	* WHERE clause) to match the partition's column numbering
1181	* in case it's different from the parent's.
1182	*/
1183	foreach(lc, childStmt->indexParams)
1184	{
1185	IndexElem *ielem = lfirst(lc);
1186
1187	/*
1188	* If the index parameter is an expression, we must
1189	* translate it to contain child Vars.
1190	*/
1191	if (ielem->expr)
1192	{
1193	ielem->expr =
1194	map_variable_attnos((Node *) ielem->expr,
1195	`1`, `0`, attmap, maplen,
1196	InvalidOid,
1197	&found_whole_row);
1198	if (found_whole_row)
1199	elog(ERROR, "cannot convert whole-row table reference");
1200	}
1201	}
1202	childStmt->whereClause =
1203	map_variable_attnos(stmt->whereClause, `1`, `0`,
1204	attmap, maplen,
1205	InvalidOid, &found_whole_row);
1206	if (found_whole_row)
1207	elog(ERROR, "cannot convert whole-row table reference");
1208
1209	DefineIndex(childRelid, childStmt,
1210	InvalidOid, / no predefined OID /
1211	indexRelationId, / this is our child /
1212	createdConstraintId,
1213	is_alter_table, check_rights, check_not_in_use,
1214	skip_build, quiet);
1215	}
1216
1217	pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
1218	i + `1`);
1219	pfree(attmap);
1220	}
1221
1222	/*
1223	* The pg_index row we inserted for this index was marked
1224	* indisvalid=true. But if we attached an existing index that is
1225	* invalid, this is incorrect, so update our row to invalid too.
1226	*/
1227	if (invalidate_parent)
1228	{
1229	Relation pg_index = table_open(IndexRelationId, RowExclusiveLock);
1230	HeapTuple tup,
1231	newtup;
1232
1233	tup = SearchSysCache1(INDEXRELID,
1234	ObjectIdGetDatum(indexRelationId));
1235	if (!HeapTupleIsValid(tup))
1236	elog(ERROR, "cache lookup failed for index %u",
1237	indexRelationId);
1238	newtup = heap_copytuple(tup);
1239	((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
1240	CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
1241	ReleaseSysCache(tup);
1242	table_close(pg_index, RowExclusiveLock);
1243	heap_freetuple(newtup);
1244	}
1245	}
1246
1247	/*
1248	* Indexes on partitioned tables are not themselves built, so we're
1249	* done here.
1250	*/
1251	table_close(rel, NoLock);
1252	if (!OidIsValid(parentIndexId))
1253	pgstat_progress_end_command();
1254	return address;
1255	}
1256
1257	if (!stmt->concurrent)
1258	{
1259	/ Close the heap and we're done, in the non-concurrent case /
1260	table_close(rel, NoLock);
1261
1262	/ If this is the top-level index, we're done. /
1263	if (!OidIsValid(parentIndexId))
1264	pgstat_progress_end_command();
1265
1266	return address;
1267	}
1268
1269	/ save lockrelid and locktag for below, then close rel /
1270	heaprelid = rel->rd_lockInfo.lockRelId;
1271	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1272	table_close(rel, NoLock);
1273
1274	/*
1275	* For a concurrent build, it's important to make the catalog entries
1276	* visible to other transactions before we start to build the index. That
1277	* will prevent them from making incompatible HOT updates. The new index
1278	* will be marked not indisready and not indisvalid, so that no one else
1279	* tries to either insert into it or use it for queries.
1280	*
1281	* We must commit our current transaction so that the index becomes
1282	* visible; then start another. Note that all the data structures we just
1283	* built are lost in the commit. The only data we keep past here are the
1284	* relation IDs.
1285	*
1286	* Before committing, get a session-level lock on the table, to ensure
1287	* that neither it nor the index can be dropped before we finish. This
1288	* cannot block, even if someone else is waiting for access, because we
1289	* already have the same lock within our transaction.
1290	*
1291	* Note: we don't currently bother with a session lock on the index,
1292	* because there are no operations that could change its state while we
1293	* hold lock on the parent table. This might need to change later.
1294	*/
1295	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1296
1297	PopActiveSnapshot();
1298	CommitTransactionCommand();
1299	StartTransactionCommand();
1300
1301	/*
1302	* The index is now visible, so we can report the OID.
1303	*/
1304	pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
1305	indexRelationId);
1306
1307	/*
1308	* Phase 2 of concurrent index build (see comments for validate_index()
1309	* for an overview of how this works)
1310	*
1311	* Now we must wait until no running transaction could have the table open
1312	* with the old list of indexes. Use ShareLock to consider running
1313	* transactions that hold locks that permit writing to the table. Note we
1314	* do not need to worry about xacts that open the table for writing after
1315	* this point; they will see the new index when they open it.
1316	*
1317	* Note: the reason we use actual lock acquisition here, rather than just
1318	* checking the ProcArray and sleeping, is that deadlock is possible if
1319	* one of the transactions in question is blocked trying to acquire an
1320	* exclusive lock on our table. The lock code will detect deadlock and
1321	* error out properly.
1322	*/
1323	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1324	PROGRESS_CREATEIDX_PHASE_WAIT_1);
1325	WaitForLockers(heaplocktag, ShareLock, true);
1326
1327	/*
1328	* At this moment we are sure that there are no transactions with the
1329	* table open for write that don't have this new index in their list of
1330	* indexes. We have waited out all the existing transactions and any new
1331	* transaction will have the new index in its list, but the index is still
1332	* marked as "not-ready-for-inserts". The index is consulted while
1333	* deciding HOT-safety though. This arrangement ensures that no new HOT
1334	* chains can be created where the new tuple and the old tuple in the
1335	* chain have different index keys.
1336	*
1337	* We now take a new snapshot, and build the index using all tuples that
1338	* are visible in this snapshot. We can be sure that any HOT updates to
1339	* these tuples will be compatible with the index, since any updates made
1340	* by transactions that didn't know about the index are now committed or
1341	* rolled back. Thus, each visible tuple is either the end of its
1342	* HOT-chain or the extension of the chain is HOT-safe for this index.
1343	*/
1344
1345	/ Set ActiveSnapshot since functions in the indexes may need it /
1346	PushActiveSnapshot(GetTransactionSnapshot());
1347
1348	/ Perform concurrent build of index /
1349	index_concurrently_build(relationId, indexRelationId);
1350
1351	/ we can do away with our snapshot /
1352	PopActiveSnapshot();
1353
1354	/*
1355	* Commit this transaction to make the indisready update visible.
1356	*/
1357	CommitTransactionCommand();
1358	StartTransactionCommand();
1359
1360	/*
1361	* Phase 3 of concurrent index build
1362	*
1363	* We once again wait until no transaction can have the table open with
1364	* the index marked as read-only for updates.
1365	*/
1366	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1367	PROGRESS_CREATEIDX_PHASE_WAIT_2);
1368	WaitForLockers(heaplocktag, ShareLock, true);
1369
1370	/*
1371	* Now take the "reference snapshot" that will be used by validate_index()
1372	* to filter candidate tuples. Beware! There might still be snapshots in
1373	* use that treat some transaction as in-progress that our reference
1374	* snapshot treats as committed. If such a recently-committed transaction
1375	* deleted tuples in the table, we will not include them in the index; yet
1376	* those transactions which see the deleting one as still-in-progress will
1377	* expect such tuples to be there once we mark the index as valid.
1378	*
1379	* We solve this by waiting for all endangered transactions to exit before
1380	* we mark the index as valid.
1381	*
1382	* We also set ActiveSnapshot to this snap, since functions in indexes may
1383	* need a snapshot.
1384	*/
1385	snapshot = RegisterSnapshot(GetTransactionSnapshot());
1386	PushActiveSnapshot(snapshot);
1387
1388	/*
1389	* Scan the index and the heap, insert any missing index entries.
1390	*/
1391	validate_index(relationId, indexRelationId, snapshot);
1392
1393	/*
1394	* Drop the reference snapshot. We must do this before waiting out other
1395	* snapshot holders, else we will deadlock against other processes also
1396	* doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
1397	* they must wait for. But first, save the snapshot's xmin to use as
1398	* limitXmin for GetCurrentVirtualXIDs().
1399	*/
1400	limitXmin = snapshot->xmin;
1401
1402	PopActiveSnapshot();
1403	UnregisterSnapshot(snapshot);
1404
1405	/*
1406	* The snapshot subsystem could still contain registered snapshots that
1407	* are holding back our process's advertised xmin; in particular, if
1408	* default_transaction_isolation = serializable, there is a transaction
1409	* snapshot that is still active. The CatalogSnapshot is likewise a
1410	* hazard. To ensure no deadlocks, we must commit and start yet another
1411	* transaction, and do our wait before any snapshot has been taken in it.
1412	*/
1413	CommitTransactionCommand();
1414	StartTransactionCommand();
1415
1416	/ We should now definitely not be advertising any xmin. /
1417	Assert(MyPgXact->xmin == InvalidTransactionId);
1418
1419	/*
1420	* The index is now valid in the sense that it contains all currently
1421	* interesting tuples. But since it might not contain tuples deleted just
1422	* before the reference snap was taken, we have to wait out any
1423	* transactions that might have older snapshots.
1424	*/
1425	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1426	PROGRESS_CREATEIDX_PHASE_WAIT_3);
1427	WaitForOlderSnapshots(limitXmin, true);
1428
1429	/*
1430	* Index can now be marked valid -- update its pg_index entry
1431	*/
1432	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
1433
1434	/*
1435	* The pg_index update will cause backends (including this one) to update
1436	* relcache entries for the index itself, but we should also send a
1437	* relcache inval on the parent table to force replanning of cached plans.
1438	* Otherwise existing sessions might fail to use the new index where it
1439	* would be useful. (Note that our earlier commits did not create reasons
1440	* to replan; so relcache flush on the index itself was sufficient.)
1441	*/
1442	CacheInvalidateRelcacheByRelid(heaprelid.relId);
1443
1444	/*
1445	* Last thing to do is release the session-level lock on the parent table.
1446	*/
1447	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1448
1449	pgstat_progress_end_command();
1450
1451	return address;
1452	}
1453
1454
1455	/*
1456	* CheckMutability
1457	* Test whether given expression is mutable
1458	*/
1459	static bool
1460	CheckMutability(Expr *expr)
1461	{
1462	/*
1463	* First run the expression through the planner. This has a couple of
1464	* important consequences. First, function default arguments will get
1465	* inserted, which may affect volatility (consider "default now()").
1466	* Second, inline-able functions will get inlined, which may allow us to
1467	* conclude that the function is really less volatile than it's marked. As
1468	* an example, polymorphic functions must be marked with the most volatile
1469	* behavior that they have for any input type, but once we inline the
1470	* function we may be able to conclude that it's not so volatile for the
1471	* particular input type we're dealing with.
1472	*
1473	* We assume here that expression_planner() won't scribble on its input.
1474	*/
1475	expr = expression_planner(expr);
1476
1477	/ Now we can search for non-immutable functions /
1478	return contain_mutable_functions((Node *) expr);
1479	}
1480
1481
1482	/*
1483	* CheckPredicate
1484	* Checks that the given partial-index predicate is valid.
1485	*
1486	* This used to also constrain the form of the predicate to forms that
1487	* indxpath.c could do something with. However, that seems overly
1488	* restrictive. One useful application of partial indexes is to apply
1489	* a UNIQUE constraint across a subset of a table, and in that scenario
1490	* any evaluable predicate will work. So accept any predicate here
1491	* (except ones requiring a plan), and let indxpath.c fend for itself.
1492	*/
1493	static void
1494	CheckPredicate(Expr *predicate)
1495	{
1496	/*
1497	* transformExpr() should have already rejected subqueries, aggregates,
1498	* and window functions, based on the EXPR_KIND_ for a predicate.
1499	*/
1500
1501	/*
1502	* A predicate using mutable functions is probably wrong, for the same
1503	* reasons that we don't allow an index expression to use one.
1504	*/
1505	if (CheckMutability(predicate))
1506	ereport(ERROR,
1507	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1508	errmsg("functions in index predicate must be marked IMMUTABLE")));
1509	}
1510
1511	/*
1512	* Compute per-index-column information, including indexed column numbers
1513	* or index expressions, opclasses, and indoptions. Note, all output vectors
1514	* should be allocated for all columns, including "including" ones.
1515	*/
1516	static void
1517	ComputeIndexAttrs(IndexInfo *indexInfo,
1518	Oid *typeOidP,
1519	Oid *collationOidP,
1520	Oid *classOidP,
1521	int16 *colOptionP,
1522	List attList, /* list of IndexElem's /
1523	List *exclusionOpNames,
1524	Oid relId,
1525	const char *accessMethodName,
1526	Oid accessMethodId,
1527	bool amcanorder,
1528	bool isconstraint)
1529	{
1530	ListCell *nextExclOp;
1531	ListCell *lc;
1532	int attn;
1533	int nkeycols = indexInfo->ii_NumIndexKeyAttrs;
1534
1535	/ Allocate space for exclusion operator info, if needed /
1536	if (exclusionOpNames)
1537	{
1538	Assert(list_length(exclusionOpNames) == nkeycols);
1539	indexInfo->ii_ExclusionOps = (Oid ) palloc(sizeof(Oid) nkeycols);
1540	indexInfo->ii_ExclusionProcs = (Oid ) palloc(sizeof(Oid) nkeycols);
1541	indexInfo->ii_ExclusionStrats = (uint16 ) palloc(sizeof(uint16) nkeycols);
1542	nextExclOp = list_head(exclusionOpNames);
1543	}
1544	else
1545	nextExclOp = NULL;
1546
1547	/*
1548	* process attributeList
1549	*/
1550	attn = `0`;
1551	foreach(lc, attList)
1552	{
1553	IndexElem attribute = (IndexElem ) lfirst(lc);
1554	Oid atttype;
1555	Oid attcollation;
1556
1557	/*
1558	* Process the column-or-expression to be indexed.
1559	*/
1560	if (attribute->name != NULL)
1561	{
1562	/ Simple index attribute /
1563	HeapTuple atttuple;
1564	Form_pg_attribute attform;
1565
1566	Assert(attribute->expr == NULL);
1567	atttuple = SearchSysCacheAttName(relId, attribute->name);
1568	if (!HeapTupleIsValid(atttuple))
1569	{
1570	/ difference in error message spellings is historical /
1571	if (isconstraint)
1572	ereport(ERROR,
1573	(errcode(ERRCODE_UNDEFINED_COLUMN),
1574	errmsg("column \"%s\" named in key does not exist",
1575	attribute->name)));
1576	else
1577	ereport(ERROR,
1578	(errcode(ERRCODE_UNDEFINED_COLUMN),
1579	errmsg("column \"%s\" does not exist",
1580	attribute->name)));
1581	}
1582	attform = (Form_pg_attribute) GETSTRUCT(atttuple);
1583	indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
1584	atttype = attform->atttypid;
1585	attcollation = attform->attcollation;
1586	ReleaseSysCache(atttuple);
1587	}
1588	else
1589	{
1590	/ Index expression /
1591	Node *expr = attribute->expr;
1592
1593	Assert(expr != NULL);
1594
1595	if (attn >= nkeycols)
1596	ereport(ERROR,
1597	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1598	errmsg("expressions are not supported in included columns")));
1599	atttype = exprType(expr);
1600	attcollation = exprCollation(expr);
1601
1602	/*
1603	* Strip any top-level COLLATE clause. This ensures that we treat
1604	* "x COLLATE y" and "(x COLLATE y)" alike.
1605	*/
1606	while (IsA(expr, CollateExpr))
1607	expr = (Node ) ((CollateExpr ) expr)->arg;
1608
1609	if (IsA(expr, Var) &&
1610	((Var *) expr)->varattno != InvalidAttrNumber)
1611	{
1612	/*
1613	* User wrote "(column)" or "(column COLLATE something)".
1614	* Treat it like simple attribute anyway.
1615	*/
1616	indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
1617	}
1618	else
1619	{
1620	indexInfo->ii_IndexAttrNumbers[attn] = `0`; / marks expression /
1621	indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
1622	expr);
1623
1624	/*
1625	* transformExpr() should have already rejected subqueries,
1626	* aggregates, and window functions, based on the EXPR_KIND_
1627	* for an index expression.
1628	*/
1629
1630	/*
1631	* An expression using mutable functions is probably wrong,
1632	* since if you aren't going to get the same result for the
1633	* same data every time, it's not clear what the index entries
1634	* mean at all.
1635	*/
1636	if (CheckMutability((Expr *) expr))
1637	ereport(ERROR,
1638	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1639	errmsg("functions in index expression must be marked IMMUTABLE")));
1640	}
1641	}
1642
1643	typeOidP[attn] = atttype;
1644
1645	/*
1646	* Included columns have no collation, no opclass and no ordering
1647	* options.
1648	*/
1649	if (attn >= nkeycols)
1650	{
1651	if (attribute->collation)
1652	ereport(ERROR,
1653	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1654	errmsg("including column does not support a collation")));
1655	if (attribute->opclass)
1656	ereport(ERROR,
1657	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1658	errmsg("including column does not support an operator class")));
1659	if (attribute->ordering != SORTBY_DEFAULT)
1660	ereport(ERROR,
1661	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1662	errmsg("including column does not support ASC/DESC options")));
1663	if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1664	ereport(ERROR,
1665	(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1666	errmsg("including column does not support NULLS FIRST/LAST options")));
1667
1668	classOidP[attn] = InvalidOid;
1669	colOptionP[attn] = `0`;
1670	collationOidP[attn] = InvalidOid;
1671	attn++;
1672
1673	continue;
1674	}
1675
1676	/*
1677	* Apply collation override if any
1678	*/
1679	if (attribute->collation)
1680	attcollation = get_collation_oid(attribute->collation, false);
1681
1682	/*
1683	* Check we have a collation iff it's a collatable type. The only
1684	* expected failures here are (1) COLLATE applied to a noncollatable
1685	* type, or (2) index expression had an unresolved collation. But we
1686	* might as well code this to be a complete consistency check.
1687	*/
1688	if (type_is_collatable(atttype))
1689	{
1690	if (!OidIsValid(attcollation))
1691	ereport(ERROR,
1692	(errcode(ERRCODE_INDETERMINATE_COLLATION),
1693	errmsg("could not determine which collation to use for index expression"),
1694	errhint("Use the COLLATE clause to set the collation explicitly.")));
1695	}
1696	else
1697	{
1698	if (OidIsValid(attcollation))
1699	ereport(ERROR,
1700	(errcode(ERRCODE_DATATYPE_MISMATCH),
1701	errmsg("collations are not supported by type %s",
1702	format_type_be(atttype))));
1703	}
1704
1705	collationOidP[attn] = attcollation;
1706
1707	/*
1708	* Identify the opclass to use.
1709	*/
1710	classOidP[attn] = ResolveOpClass(attribute->opclass,
1711	atttype,
1712	accessMethodName,
1713	accessMethodId);
1714
1715	/*
1716	* Identify the exclusion operator, if any.
1717	*/
1718	if (nextExclOp)
1719	{
1720	List opname = (List ) lfirst(nextExclOp);
1721	Oid opid;
1722	Oid opfamily;
1723	int strat;
1724
1725	/*
1726	* Find the operator --- it must accept the column datatype
1727	* without runtime coercion (but binary compatibility is OK)
1728	*/
1729	opid = compatible_oper_opid(opname, atttype, atttype, false);
1730
1731	/*
1732	* Only allow commutative operators to be used in exclusion
1733	* constraints. If X conflicts with Y, but Y does not conflict
1734	* with X, bad things will happen.
1735	*/
1736	if (get_commutator(opid) != opid)
1737	ereport(ERROR,
1738	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1739	errmsg("operator %s is not commutative",
1740	format_operator(opid)),
1741	errdetail("Only commutative operators can be used in exclusion constraints.")));
1742
1743	/*
1744	* Operator must be a member of the right opfamily, too
1745	*/
1746	opfamily = get_opclass_family(classOidP[attn]);
1747	strat = get_op_opfamily_strategy(opid, opfamily);
1748	if (strat == `0`)
1749	{
1750	HeapTuple opftuple;
1751	Form_pg_opfamily opfform;
1752
1753	/*
1754	* attribute->opclass might not explicitly name the opfamily,
1755	* so fetch the name of the selected opfamily for use in the
1756	* error message.
1757	*/
1758	opftuple = SearchSysCache1(OPFAMILYOID,
1759	ObjectIdGetDatum(opfamily));
1760	if (!HeapTupleIsValid(opftuple))
1761	elog(ERROR, "cache lookup failed for opfamily %u",
1762	opfamily);
1763	opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
1764
1765	ereport(ERROR,
1766	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1767	errmsg("operator %s is not a member of operator family \"%s\"",
1768	format_operator(opid),
1769	NameStr(opfform->opfname)),
1770	errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
1771	}
1772
1773	indexInfo->ii_ExclusionOps[attn] = opid;
1774	indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
1775	indexInfo->ii_ExclusionStrats[attn] = strat;
1776	nextExclOp = lnext(nextExclOp);
1777	}
1778
1779	/*
1780	* Set up the per-column options (indoption field). For now, this is
1781	* zero for any un-ordered index, while ordered indexes have DESC and
1782	* NULLS FIRST/LAST options.
1783	*/
1784	colOptionP[attn] = `0`;
1785	if (amcanorder)
1786	{
1787	/ default ordering is ASC /
1788	if (attribute->ordering == SORTBY_DESC)
1789	colOptionP[attn] \|= INDOPTION_DESC;
1790	/ default null ordering is LAST for ASC, FIRST for DESC /
1791	if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
1792	{
1793	if (attribute->ordering == SORTBY_DESC)
1794	colOptionP[attn] \|= INDOPTION_NULLS_FIRST;
1795	}
1796	else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
1797	colOptionP[attn] \|= INDOPTION_NULLS_FIRST;
1798	}
1799	else
1800	{
1801	/ index AM does not support ordering /
1802	if (attribute->ordering != SORTBY_DEFAULT)
1803	ereport(ERROR,
1804	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1805	errmsg("access method \"%s\" does not support ASC/DESC options",
1806	accessMethodName)));
1807	if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1808	ereport(ERROR,
1809	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1810	errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
1811	accessMethodName)));
1812	}
1813
1814	attn++;
1815	}
1816	}
1817
1818	/*
1819	* Resolve possibly-defaulted operator class specification
1820	*
1821	* Note: This is used to resolve operator class specification in index and
1822	* partition key definitions.
1823	*/
1824	Oid
1825	ResolveOpClass(List *opclass, Oid attrType,
1826	const char *accessMethodName, Oid accessMethodId)
1827	{
1828	char *schemaname;
1829	char *opcname;
1830	HeapTuple tuple;
1831	Form_pg_opclass opform;
1832	Oid opClassId,
1833	opInputType;
1834
1835	/*
1836	* Release 7.0 removed network_ops, timespan_ops, and datetime_ops, so we
1837	* ignore those opclass names so the default *_ops is used. This can be
1838	* removed in some later release. bjm 2000/02/07
1839	*
1840	* Release 7.1 removes lztext_ops, so suppress that too for a while. tgl
1841	* 2000/07/30
1842	*
1843	* Release 7.2 renames timestamp_ops to timestamptz_ops, so suppress that
1844	* too for awhile. I'm starting to think we need a better approach. tgl
1845	* 2000/10/01
1846	*
1847	* Release 8.0 removes bigbox_ops (which was dead code for a long while
1848	* anyway). tgl 2003/11/11
1849	*/
1850	if (list_length(opclass) == `1`)
1851	{
1852	char *claname = strVal(linitial(opclass));
1853
1854	if (strcmp(claname, "network_ops") == `0` \|\|
1855	strcmp(claname, "timespan_ops") == `0` \|\|
1856	strcmp(claname, "datetime_ops") == `0` \|\|
1857	strcmp(claname, "lztext_ops") == `0` \|\|
1858	strcmp(claname, "timestamp_ops") == `0` \|\|
1859	strcmp(claname, "bigbox_ops") == `0`)
1860	opclass = NIL;
1861	}
1862
1863	if (opclass == NIL)
1864	{
1865	/ no operator class specified, so find the default /
1866	opClassId = GetDefaultOpClass(attrType, accessMethodId);
1867	if (!OidIsValid(opClassId))
1868	ereport(ERROR,
1869	(errcode(ERRCODE_UNDEFINED_OBJECT),
1870	errmsg("data type %s has no default operator class for access method \"%s\"",
1871	format_type_be(attrType), accessMethodName),
1872	errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
1873	return opClassId;
1874	}
1875
1876	/*
1877	* Specific opclass name given, so look up the opclass.
1878	*/
1879
1880	/ deconstruct the name list /
1881	DeconstructQualifiedName(opclass, &schemaname, &opcname);
1882
1883	if (schemaname)
1884	{
1885	/ Look in specific schema only /
1886	Oid namespaceId;
1887
1888	namespaceId = LookupExplicitNamespace(schemaname, false);
1889	tuple = SearchSysCache3(CLAAMNAMENSP,
1890	ObjectIdGetDatum(accessMethodId),
1891	PointerGetDatum(opcname),
1892	ObjectIdGetDatum(namespaceId));
1893	}
1894	else
1895	{
1896	/ Unqualified opclass name, so search the search path /
1897	opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
1898	if (!OidIsValid(opClassId))
1899	ereport(ERROR,
1900	(errcode(ERRCODE_UNDEFINED_OBJECT),
1901	errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1902	opcname, accessMethodName)));
1903	tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
1904	}
1905
1906	if (!HeapTupleIsValid(tuple))
1907	ereport(ERROR,
1908	(errcode(ERRCODE_UNDEFINED_OBJECT),
1909	errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1910	NameListToString(opclass), accessMethodName)));
1911
1912	/*
1913	* Verify that the index operator class accepts this datatype. Note we
1914	* will accept binary compatibility.
1915	*/
1916	opform = (Form_pg_opclass) GETSTRUCT(tuple);
1917	opClassId = opform->oid;
1918	opInputType = opform->opcintype;
1919
1920	if (!IsBinaryCoercible(attrType, opInputType))
1921	ereport(ERROR,
1922	(errcode(ERRCODE_DATATYPE_MISMATCH),
1923	errmsg("operator class \"%s\" does not accept data type %s",
1924	NameListToString(opclass), format_type_be(attrType))));
1925
1926	ReleaseSysCache(tuple);
1927
1928	return opClassId;
1929	}
1930
1931	/*
1932	* GetDefaultOpClass
1933	*
1934	* Given the OIDs of a datatype and an access method, find the default
1935	* operator class, if any. Returns InvalidOid if there is none.
1936	*/
1937	Oid
1938	GetDefaultOpClass(Oid type_id, Oid am_id)
1939	{
1940	Oid result = InvalidOid;
1941	int nexact = `0`;
1942	int ncompatible = `0`;
1943	int ncompatiblepreferred = `0`;
1944	Relation rel;
1945	ScanKeyData skey[`1`];
1946	SysScanDesc scan;
1947	HeapTuple tup;
1948	TYPCATEGORY tcategory;
1949
1950	/ If it's a domain, look at the base type instead /
1951	type_id = getBaseType(type_id);
1952
1953	tcategory = TypeCategory(type_id);
1954
1955	/*
1956	* We scan through all the opclasses available for the access method,
1957	* looking for one that is marked default and matches the target type
1958	* (either exactly or binary-compatibly, but prefer an exact match).
1959	*
1960	* We could find more than one binary-compatible match. If just one is
1961	* for a preferred type, use that one; otherwise we fail, forcing the user
1962	* to specify which one he wants. (The preferred-type special case is a
1963	* kluge for varchar: it's binary-compatible to both text and bpchar, so
1964	* we need a tiebreaker.) If we find more than one exact match, then
1965	* someone put bogus entries in pg_opclass.
1966	*/
1967	rel = table_open(OperatorClassRelationId, AccessShareLock);
1968
1969	ScanKeyInit(&skey[`0`],
1970	Anum_pg_opclass_opcmethod,
1971	BTEqualStrategyNumber, F_OIDEQ,
1972	ObjectIdGetDatum(am_id));
1973
1974	scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
1975	NULL, `1`, skey);
1976
1977	while (HeapTupleIsValid(tup = systable_getnext(scan)))
1978	{
1979	Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
1980
1981	/ ignore altogether if not a default opclass /
1982	if (!opclass->opcdefault)
1983	continue;
1984	if (opclass->opcintype == type_id)
1985	{
1986	nexact++;
1987	result = opclass->oid;
1988	}
1989	else if (nexact == `0` &&
1990	IsBinaryCoercible(type_id, opclass->opcintype))
1991	{
1992	if (IsPreferredType(tcategory, opclass->opcintype))
1993	{
1994	ncompatiblepreferred++;
1995	result = opclass->oid;
1996	}
1997	else if (ncompatiblepreferred == `0`)
1998	{
1999	ncompatible++;
2000	result = opclass->oid;
2001	}
2002	}
2003	}
2004
2005	systable_endscan(scan);
2006
2007	table_close(rel, AccessShareLock);
2008
2009	/ raise error if pg_opclass contains inconsistent data /
2010	if (nexact > `1`)
2011	ereport(ERROR,
2012	(errcode(ERRCODE_DUPLICATE_OBJECT),
2013	errmsg("there are multiple default operator classes for data type %s",
2014	format_type_be(type_id))));
2015
2016	if (nexact == `1` \|\|
2017	ncompatiblepreferred == `1` \|\|
2018	(ncompatiblepreferred == `0` && ncompatible == `1`))
2019	return result;
2020
2021	return InvalidOid;
2022	}
2023
2024	/*
2025	* makeObjectName()
2026	*
2027	* Create a name for an implicitly created index, sequence, constraint,
2028	* extended statistics, etc.
2029	*
2030	* The parameters are typically: the original table name, the original field
2031	* name, and a "type" string (such as "seq" or "pkey"). The field name
2032	* and/or type can be NULL if not relevant.
2033	*
2034	* The result is a palloc'd string.
2035	*
2036	* The basic result we want is "name1_name2_label", omitting "_name2" or
2037	* "_label" when those parameters are NULL. However, we must generate
2038	* a name with less than NAMEDATALEN characters! So, we truncate one or
2039	* both names if necessary to make a short-enough string. The label part
2040	* is never truncated (so it had better be reasonably short).
2041	*
2042	* The caller is responsible for checking uniqueness of the generated
2043	* name and retrying as needed; retrying will be done by altering the
2044	* "label" string (which is why we never truncate that part).
2045	*/
2046	char *
2047	makeObjectName(const char name1, const* char name2, const* char *label)
2048	{
2049	char *name;
2050	int overhead = `0`; / chars needed for label and underscores /
2051	int availchars; / chars available for name(s) /
2052	int name1chars; / chars allocated to name1 /
2053	int name2chars; / chars allocated to name2 /
2054	int ndx;
2055
2056	name1chars = strlen(name1);
2057	if (name2)
2058	{
2059	name2chars = strlen(name2);
2060	overhead++; / allow for separating underscore /
2061	}
2062	else
2063	name2chars = `0`;
2064	if (label)
2065	overhead += strlen(label) + `1`;
2066
2067	availchars = NAMEDATALEN - `1` - overhead;
2068	Assert(availchars > `0`); / else caller chose a bad label /
2069
2070	/*
2071	* If we must truncate, preferentially truncate the longer name. This
2072	* logic could be expressed without a loop, but it's simple and obvious as
2073	* a loop.
2074	*/
2075	while (name1chars + name2chars > availchars)
2076	{
2077	if (name1chars > name2chars)
2078	name1chars--;
2079	else
2080	name2chars--;
2081	}
2082
2083	name1chars = pg_mbcliplen(name1, name1chars, name1chars);
2084	if (name2)
2085	name2chars = pg_mbcliplen(name2, name2chars, name2chars);
2086
2087	/ Now construct the string using the chosen lengths /
2088	name = palloc(name1chars + name2chars + overhead + `1`);
2089	memcpy(name, name1, name1chars);
2090	ndx = name1chars;
2091	if (name2)
2092	{
2093	name[ndx++] = `'_'`;
2094	memcpy(name + ndx, name2, name2chars);
2095	ndx += name2chars;
2096	}
2097	if (label)
2098	{
2099	name[ndx++] = `'_'`;
2100	strcpy(name + ndx, label);
2101	}
2102	else
2103	name[ndx] = `'\0'`;
2104
2105	return name;
2106	}
2107
2108	/*
2109	* Select a nonconflicting name for a new relation. This is ordinarily
2110	* used to choose index names (which is why it's here) but it can also
2111	* be used for sequences, or any autogenerated relation kind.
2112	*
2113	* name1, name2, and label are used the same way as for makeObjectName(),
2114	* except that the label can't be NULL; digits will be appended to the label
2115	* if needed to create a name that is unique within the specified namespace.
2116	*
2117	* If isconstraint is true, we also avoid choosing a name matching any
2118	* existing constraint in the same namespace. (This is stricter than what
2119	* Postgres itself requires, but the SQL standard says that constraint names
2120	* should be unique within schemas, so we follow that for autogenerated
2121	* constraint names.)
2122	*
2123	* Note: it is theoretically possible to get a collision anyway, if someone
2124	* else chooses the same name concurrently. This is fairly unlikely to be
2125	* a problem in practice, especially if one is holding an exclusive lock on
2126	* the relation identified by name1. However, if choosing multiple names
2127	* within a single command, you'd better create the new object and do
2128	* CommandCounterIncrement before choosing the next one!
2129	*
2130	* Returns a palloc'd string.
2131	*/
2132	char *
2133	ChooseRelationName(const char name1, const* char *name2,
2134	const char *label, Oid namespaceid,
2135	bool isconstraint)
2136	{
2137	int pass = `0`;
2138	char *relname = NULL;
2139	char modlabel[NAMEDATALEN];
2140
2141	/ try the unmodified label first /
2142	StrNCpy(modlabel, label, sizeof(modlabel));
2143
2144	for (;;)
2145	{
2146	relname = makeObjectName(name1, name2, modlabel);
2147
2148	if (!OidIsValid(get_relname_relid(relname, namespaceid)))
2149	{
2150	if (!isconstraint \|\|
2151	!ConstraintNameExists(relname, namespaceid))
2152	break;
2153	}
2154
2155	/ found a conflict, so try a new name component /
2156	pfree(relname);
2157	snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
2158	}
2159
2160	return relname;
2161	}
2162
2163	/*
2164	* Select the name to be used for an index.
2165	*
2166	* The argument list is pretty ad-hoc :-(
2167	*/
2168	static char *
2169	ChooseIndexName(const char *tabname, Oid namespaceId,
2170	List colnames, List exclusionOpNames,
2171	bool primary, bool isconstraint)
2172	{
2173	char *indexname;
2174
2175	if (primary)
2176	{
2177	/ the primary key's name does not depend on the specific column(s) /
2178	indexname = ChooseRelationName(tabname,
2179	NULL,
2180	"pkey",
2181	namespaceId,
2182	true);
2183	}
2184	else if (exclusionOpNames != NIL)
2185	{
2186	indexname = ChooseRelationName(tabname,
2187	ChooseIndexNameAddition(colnames),
2188	"excl",
2189	namespaceId,
2190	true);
2191	}
2192	else if (isconstraint)
2193	{
2194	indexname = ChooseRelationName(tabname,
2195	ChooseIndexNameAddition(colnames),
2196	"key",
2197	namespaceId,
2198	true);
2199	}
2200	else
2201	{
2202	indexname = ChooseRelationName(tabname,
2203	ChooseIndexNameAddition(colnames),
2204	"idx",
2205	namespaceId,
2206	false);
2207	}
2208
2209	return indexname;
2210	}
2211
2212	/*
2213	* Generate "name2" for a new index given the list of column names for it
2214	* (as produced by ChooseIndexColumnNames). This will be passed to
2215	* ChooseRelationName along with the parent table name and a suitable label.
2216	*
2217	* We know that less than NAMEDATALEN characters will actually be used,
2218	* so we can truncate the result once we've generated that many.
2219	*
2220	* XXX See also ChooseForeignKeyConstraintNameAddition and
2221	* ChooseExtendedStatisticNameAddition.
2222	*/
2223	static char *
2224	ChooseIndexNameAddition(List *colnames)
2225	{
2226	char buf[NAMEDATALEN * `2`];
2227	int buflen = `0`;
2228	ListCell *lc;
2229
2230	buf[`0`] = `'\0'`;
2231	foreach(lc, colnames)
2232	{
2233	const char name = (const* char *) lfirst(lc);
2234
2235	if (buflen > `0`)
2236	buf[buflen++] = `'_'`; / insert _ between names /
2237
2238	/*
2239	* At this point we have buflen <= NAMEDATALEN. name should be less
2240	* than NAMEDATALEN already, but use strlcpy for paranoia.
2241	*/
2242	strlcpy(buf + buflen, name, NAMEDATALEN);
2243	buflen += strlen(buf + buflen);
2244	if (buflen >= NAMEDATALEN)
2245	break;
2246	}
2247	return pstrdup(buf);
2248	}
2249
2250	/*
2251	* Select the actual names to be used for the columns of an index, given the
2252	* list of IndexElems for the columns. This is mostly about ensuring the
2253	* names are unique so we don't get a conflicting-attribute-names error.
2254	*
2255	* Returns a List of plain strings (char *, not String nodes).
2256	*/
2257	static List *
2258	ChooseIndexColumnNames(List *indexElems)
2259	{
2260	List *result = NIL;
2261	ListCell *lc;
2262
2263	foreach(lc, indexElems)
2264	{
2265	IndexElem ielem = (IndexElem ) lfirst(lc);
2266	const char *origname;
2267	const char *curname;
2268	int i;
2269	char buf[NAMEDATALEN];
2270
2271	/ Get the preliminary name from the IndexElem /
2272	if (ielem->indexcolname)
2273	origname = ielem->indexcolname; / caller-specified name /
2274	else if (ielem->name)
2275	origname = ielem->name; / simple column reference /
2276	else
2277	origname = "expr"; / default name for expression /
2278
2279	/ If it conflicts with any previous column, tweak it /
2280	curname = origname;
2281	for (i = `1`;; i++)
2282	{
2283	ListCell *lc2;
2284	char nbuf[`32`];
2285	int nlen;
2286
2287	foreach(lc2, result)
2288	{
2289	if (strcmp(curname, (char *) lfirst(lc2)) == `0`)
2290	break;
2291	}
2292	if (lc2 == NULL)
2293	break; / found nonconflicting name /
2294
2295	sprintf(nbuf, "%d", i);
2296
2297	/ Ensure generated names are shorter than NAMEDATALEN /
2298	nlen = pg_mbcliplen(origname, strlen(origname),
2299	NAMEDATALEN - `1` - strlen(nbuf));
2300	memcpy(buf, origname, nlen);
2301	strcpy(buf + nlen, nbuf);
2302	curname = buf;
2303	}
2304
2305	/ And attach to the result list /
2306	result = lappend(result, pstrdup(curname));
2307	}
2308	return result;
2309	}
2310
2311	/*
2312	* ReindexIndex
2313	* Recreate a specific index.
2314	*/
2315	void
2316	ReindexIndex(RangeVar indexRelation, int* options, bool concurrent)
2317	{
2318	struct ReindexIndexCallbackState state;
2319	Oid indOid;
2320	Relation irel;
2321	char persistence;
2322
2323	/*
2324	* Find and lock index, and check permissions on table; use callback to
2325	* obtain lock on table first, to avoid deadlock hazard. The lock level
2326	* used here must match the index lock obtained in reindex_index().
2327	*/
2328	state.concurrent = concurrent;
2329	state.locked_table_oid = InvalidOid;
2330	indOid = RangeVarGetRelidExtended(indexRelation,
2331	concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock,
2332	`0`,
2333	RangeVarCallbackForReindexIndex,
2334	&state);
2335
2336	/*
2337	* Obtain the current persistence of the existing index. We already hold
2338	* lock on the index.
2339	*/
2340	irel = index_open(indOid, NoLock);
2341
2342	if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2343	{
2344	ReindexPartitionedIndex(irel);
2345	return;
2346	}
2347
2348	persistence = irel->rd_rel->relpersistence;
2349	index_close(irel, NoLock);
2350
2351	if (concurrent)
2352	ReindexRelationConcurrently(indOid, options);
2353	else
2354	reindex_index(indOid, false, persistence,
2355	options \| REINDEXOPT_REPORT_PROGRESS);
2356	}
2357
2358	/*
2359	* Check permissions on table before acquiring relation lock; also lock
2360	* the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
2361	* deadlocks.
2362	*/
2363	static void
2364	RangeVarCallbackForReindexIndex(const RangeVar *relation,
2365	Oid relId, Oid oldRelId, void *arg)
2366	{
2367	char relkind;
2368	struct ReindexIndexCallbackState *state = arg;
2369	LOCKMODE table_lockmode;
2370
2371	/*
2372	* Lock level here should match table lock in reindex_index() for
2373	* non-concurrent case and table locks used by index_concurrently_*() for
2374	* concurrent case.
2375	*/
2376	table_lockmode = state->concurrent ? ShareUpdateExclusiveLock : ShareLock;
2377
2378	/*
2379	* If we previously locked some other index's heap, and the name we're
2380	* looking up no longer refers to that relation, release the now-useless
2381	* lock.
2382	*/
2383	if (relId != oldRelId && OidIsValid(oldRelId))
2384	{
2385	UnlockRelationOid(state->locked_table_oid, table_lockmode);
2386	state->locked_table_oid = InvalidOid;
2387	}
2388
2389	/ If the relation does not exist, there's nothing more to do. /
2390	if (!OidIsValid(relId))
2391	return;
2392
2393	/*
2394	* If the relation does exist, check whether it's an index. But note that
2395	* the relation might have been dropped between the time we did the name
2396	* lookup and now. In that case, there's nothing to do.
2397	*/
2398	relkind = get_rel_relkind(relId);
2399	if (!relkind)
2400	return;
2401	if (relkind != RELKIND_INDEX &&
2402	relkind != RELKIND_PARTITIONED_INDEX)
2403	ereport(ERROR,
2404	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
2405	errmsg("\"%s\" is not an index", relation->relname)));
2406
2407	/ Check permissions /
2408	if (!pg_class_ownercheck(relId, GetUserId()))
2409	aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
2410
2411	/ Lock heap before index to avoid deadlock. /
2412	if (relId != oldRelId)
2413	{
2414	Oid table_oid = IndexGetRelation(relId, true);
2415
2416	/*
2417	* If the OID isn't valid, it means the index was concurrently
2418	* dropped, which is not a problem for us; just return normally.
2419	*/
2420	if (OidIsValid(table_oid))
2421	{
2422	LockRelationOid(table_oid, table_lockmode);
2423	state->locked_table_oid = table_oid;
2424	}
2425	}
2426	}
2427
2428	/*
2429	* ReindexTable
2430	* Recreate all indexes of a table (and of its toast table, if any)
2431	*/
2432	Oid
2433	ReindexTable(RangeVar relation, int* options, bool concurrent)
2434	{
2435	Oid heapOid;
2436	bool result;
2437
2438	/ The lock level used here should match reindex_relation(). /
2439	heapOid = RangeVarGetRelidExtended(relation,
2440	concurrent ? ShareUpdateExclusiveLock : ShareLock,
2441	`0`,
2442	RangeVarCallbackOwnsTable, NULL);
2443
2444	if (concurrent)
2445	{
2446	result = ReindexRelationConcurrently(heapOid, options);
2447
2448	if (!result)
2449	ereport(NOTICE,
2450	(errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
2451	relation->relname)));
2452	}
2453	else
2454	{
2455	result = reindex_relation(heapOid,
2456	REINDEX_REL_PROCESS_TOAST \|
2457	REINDEX_REL_CHECK_CONSTRAINTS,
2458	options \| REINDEXOPT_REPORT_PROGRESS);
2459	if (!result)
2460	ereport(NOTICE,
2461	(errmsg("table \"%s\" has no indexes to reindex",
2462	relation->relname)));
2463	}
2464
2465	return heapOid;
2466	}
2467
2468	/*
2469	* ReindexMultipleTables
2470	* Recreate indexes of tables selected by objectName/objectKind.
2471	*
2472	* To reduce the probability of deadlocks, each table is reindexed in a
2473	* separate transaction, so we can release the lock on it right away.
2474	* That means this must not be called within a user transaction block!
2475	*/
2476	void
2477	ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
2478	int options, bool concurrent)
2479	{
2480	Oid objectOid;
2481	Relation relationRelation;
2482	TableScanDesc scan;
2483	ScanKeyData scan_keys[`1`];
2484	HeapTuple tuple;
2485	MemoryContext private_context;
2486	MemoryContext old;
2487	List *relids = NIL;
2488	ListCell *l;
2489	int num_keys;
2490	bool concurrent_warning = false;
2491
2492	AssertArg(objectName);
2493	Assert(objectKind == REINDEX_OBJECT_SCHEMA \|\|
2494	objectKind == REINDEX_OBJECT_SYSTEM \|\|
2495	objectKind == REINDEX_OBJECT_DATABASE);
2496
2497	if (objectKind == REINDEX_OBJECT_SYSTEM && concurrent)
2498	ereport(ERROR,
2499	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2500	errmsg("cannot reindex system catalogs concurrently")));
2501
2502	/*
2503	* Get OID of object to reindex, being the database currently being used
2504	* by session for a database or for system catalogs, or the schema defined
2505	* by caller. At the same time do permission checks that need different
2506	* processing depending on the object type.
2507	*/
2508	if (objectKind == REINDEX_OBJECT_SCHEMA)
2509	{
2510	objectOid = get_namespace_oid(objectName, false);
2511
2512	if (!pg_namespace_ownercheck(objectOid, GetUserId()))
2513	aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
2514	objectName);
2515	}
2516	else
2517	{
2518	objectOid = MyDatabaseId;
2519
2520	if (strcmp(objectName, get_database_name(objectOid)) != `0`)
2521	ereport(ERROR,
2522	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2523	errmsg("can only reindex the currently open database")));
2524	if (!pg_database_ownercheck(objectOid, GetUserId()))
2525	aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
2526	objectName);
2527	}
2528
2529	/*
2530	* Create a memory context that will survive forced transaction commits we
2531	* do below. Since it is a child of PortalContext, it will go away
2532	* eventually even if we suffer an error; there's no need for special
2533	* abort cleanup logic.
2534	*/
2535	private_context = AllocSetContextCreate(PortalContext,
2536	"ReindexMultipleTables",
2537	ALLOCSET_SMALL_SIZES);
2538
2539	/*
2540	* Define the search keys to find the objects to reindex. For a schema, we
2541	* select target relations using relnamespace, something not necessary for
2542	* a database-wide operation.
2543	*/
2544	if (objectKind == REINDEX_OBJECT_SCHEMA)
2545	{
2546	num_keys = `1`;
2547	ScanKeyInit(&scan_keys[`0`],
2548	Anum_pg_class_relnamespace,
2549	BTEqualStrategyNumber, F_OIDEQ,
2550	ObjectIdGetDatum(objectOid));
2551	}
2552	else
2553	num_keys = `0`;
2554
2555	/*
2556	* Scan pg_class to build a list of the relations we need to reindex.
2557	*
2558	* We only consider plain relations and materialized views here (toast
2559	* rels will be processed indirectly by reindex_relation).
2560	*/
2561	relationRelation = table_open(RelationRelationId, AccessShareLock);
2562	scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
2563	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2564	{
2565	Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
2566	Oid relid = classtuple->oid;
2567
2568	/*
2569	* Only regular tables and matviews can have indexes, so ignore any
2570	* other kind of relation.
2571	*
2572	* It is tempting to also consider partitioned tables here, but that
2573	* has the problem that if the children are in the same schema, they
2574	* would be processed twice. Maybe we could have a separate list of
2575	* partitioned tables, and expand that afterwards into relids,
2576	* ignoring any duplicates.
2577	*/
2578	if (classtuple->relkind != RELKIND_RELATION &&
2579	classtuple->relkind != RELKIND_MATVIEW)
2580	continue;
2581
2582	/ Skip temp tables of other backends; we can't reindex them at all /
2583	if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
2584	!isTempNamespace(classtuple->relnamespace))
2585	continue;
2586
2587	/ Check user/system classification, and optionally skip /
2588	if (objectKind == REINDEX_OBJECT_SYSTEM &&
2589	!IsSystemClass(relid, classtuple))
2590	continue;
2591
2592	/*
2593	* The table can be reindexed if the user is superuser, the table
2594	* owner, or the database/schema owner (but in the latter case, only
2595	* if it's not a shared relation). pg_class_ownercheck includes the
2596	* superuser case, and depending on objectKind we already know that
2597	* the user has permission to run REINDEX on this database or schema
2598	* per the permission checks at the beginning of this routine.
2599	*/
2600	if (classtuple->relisshared &&
2601	!pg_class_ownercheck(relid, GetUserId()))
2602	continue;
2603
2604	/*
2605	* Skip system tables, since index_create() would reject indexing them
2606	* concurrently (and it would likely fail if we tried).
2607	*/
2608	if (concurrent &&
2609	IsCatalogRelationOid(relid))
2610	{
2611	if (!concurrent_warning)
2612	ereport(WARNING,
2613	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2614	errmsg("cannot reindex system catalogs concurrently, skipping all")));
2615	concurrent_warning = true;
2616	continue;
2617	}
2618
2619	/ Save the list of relation OIDs in private context /
2620	old = MemoryContextSwitchTo(private_context);
2621
2622	/*
2623	* We always want to reindex pg_class first if it's selected to be
2624	* reindexed. This ensures that if there is any corruption in
2625	* pg_class' indexes, they will be fixed before we process any other
2626	* tables. This is critical because reindexing itself will try to
2627	* update pg_class.
2628	*/
2629	if (relid == RelationRelationId)
2630	relids = lcons_oid(relid, relids);
2631	else
2632	relids = lappend_oid(relids, relid);
2633
2634	MemoryContextSwitchTo(old);
2635	}
2636	table_endscan(scan);
2637	table_close(relationRelation, AccessShareLock);
2638
2639	/ Now reindex each rel in a separate transaction /
2640	PopActiveSnapshot();
2641	CommitTransactionCommand();
2642	foreach(l, relids)
2643	{
2644	Oid relid = lfirst_oid(l);
2645
2646	StartTransactionCommand();
2647	/ functions in indexes may want a snapshot set /
2648	PushActiveSnapshot(GetTransactionSnapshot());
2649
2650	if (concurrent)
2651	{
2652	(void) ReindexRelationConcurrently(relid, options);
2653	/ ReindexRelationConcurrently() does the verbose output /
2654	}
2655	else
2656	{
2657	bool result;
2658
2659	result = reindex_relation(relid,
2660	REINDEX_REL_PROCESS_TOAST \|
2661	REINDEX_REL_CHECK_CONSTRAINTS,
2662	options \| REINDEXOPT_REPORT_PROGRESS);
2663
2664	if (result && (options & REINDEXOPT_VERBOSE))
2665	ereport(INFO,
2666	(errmsg("table \"%s.%s\" was reindexed",
2667	get_namespace_name(get_rel_namespace(relid)),
2668	get_rel_name(relid))));
2669
2670	PopActiveSnapshot();
2671	}
2672
2673	CommitTransactionCommand();
2674	}
2675	StartTransactionCommand();
2676
2677	MemoryContextDelete(private_context);
2678	}
2679
2680
2681	/*
2682	* ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
2683	* relation OID
2684	*
2685	* 'relationOid' can either belong to an index, a table or a materialized
2686	* view. For tables and materialized views, all its indexes will be rebuilt,
2687	* excluding invalid indexes and any indexes used in exclusion constraints,
2688	* but including its associated toast table indexes. For indexes, the index
2689	* itself will be rebuilt. If 'relationOid' belongs to a partitioned table
2690	* then we issue a warning to mention these are not yet supported.
2691	*
2692	* The locks taken on parent tables and involved indexes are kept until the
2693	* transaction is committed, at which point a session lock is taken on each
2694	* relation. Both of these protect against concurrent schema changes.
2695	*
2696	* Returns true if any indexes have been rebuilt (including toast table's
2697	* indexes, when relevant), otherwise returns false.
2698	*/
2699	static bool
2700	ReindexRelationConcurrently(Oid relationOid, int options)
2701	{
2702	List *heapRelationIds = NIL;
2703	List *indexIds = NIL;
2704	List *newIndexIds = NIL;
2705	List *relationLocks = NIL;
2706	List *lockTags = NIL;
2707	ListCell *lc,
2708	*lc2;
2709	MemoryContext private_context;
2710	MemoryContext oldcontext;
2711	char relkind;
2712	char *relationName = NULL;
2713	char *relationNamespace = NULL;
2714	PGRUsage ru0;
2715
2716	/*
2717	* Create a memory context that will survive forced transaction commits we
2718	* do below. Since it is a child of PortalContext, it will go away
2719	* eventually even if we suffer an error; there's no need for special
2720	* abort cleanup logic.
2721	*/
2722	private_context = AllocSetContextCreate(PortalContext,
2723	"ReindexConcurrent",
2724	ALLOCSET_SMALL_SIZES);
2725
2726	if (options & REINDEXOPT_VERBOSE)
2727	{
2728	/ Save data needed by REINDEX VERBOSE in private context /
2729	oldcontext = MemoryContextSwitchTo(private_context);
2730
2731	relationName = get_rel_name(relationOid);
2732	relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
2733
2734	pg_rusage_init(&ru0);
2735
2736	MemoryContextSwitchTo(oldcontext);
2737	}
2738
2739	relkind = get_rel_relkind(relationOid);
2740
2741	/*
2742	* Extract the list of indexes that are going to be rebuilt based on the
2743	* list of relation Oids given by caller.
2744	*/
2745	switch (relkind)
2746	{
2747	case RELKIND_RELATION:
2748	case RELKIND_MATVIEW:
2749	case RELKIND_TOASTVALUE:
2750	{
2751	/*
2752	* In the case of a relation, find all its indexes including
2753	* toast indexes.
2754	*/
2755	Relation heapRelation;
2756
2757	/ Save the list of relation OIDs in private context /
2758	oldcontext = MemoryContextSwitchTo(private_context);
2759
2760	/ Track this relation for session locks /
2761	heapRelationIds = lappend_oid(heapRelationIds, relationOid);
2762
2763	MemoryContextSwitchTo(oldcontext);
2764
2765	if (IsCatalogRelationOid(relationOid))
2766	ereport(ERROR,
2767	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2768	errmsg("cannot reindex system catalogs concurrently")));
2769
2770	/ Open relation to get its indexes /
2771	heapRelation = table_open(relationOid, ShareUpdateExclusiveLock);
2772
2773	/ Add all the valid indexes of relation to list /
2774	foreach(lc, RelationGetIndexList(heapRelation))
2775	{
2776	Oid cellOid = lfirst_oid(lc);
2777	Relation indexRelation = index_open(cellOid,
2778	ShareUpdateExclusiveLock);
2779
2780	if (!indexRelation->rd_index->indisvalid)
2781	ereport(WARNING,
2782	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2783	errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
2784	get_namespace_name(get_rel_namespace(cellOid)),
2785	get_rel_name(cellOid))));
2786	else if (indexRelation->rd_index->indisexclusion)
2787	ereport(WARNING,
2788	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2789	errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
2790	get_namespace_name(get_rel_namespace(cellOid)),
2791	get_rel_name(cellOid))));
2792	else
2793	{
2794	/ Save the list of relation OIDs in private context /
2795	oldcontext = MemoryContextSwitchTo(private_context);
2796
2797	indexIds = lappend_oid(indexIds, cellOid);
2798
2799	MemoryContextSwitchTo(oldcontext);
2800	}
2801
2802	index_close(indexRelation, NoLock);
2803	}
2804
2805	/ Also add the toast indexes /
2806	if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
2807	{
2808	Oid toastOid = heapRelation->rd_rel->reltoastrelid;
2809	Relation toastRelation = table_open(toastOid,
2810	ShareUpdateExclusiveLock);
2811
2812	/ Save the list of relation OIDs in private context /
2813	oldcontext = MemoryContextSwitchTo(private_context);
2814
2815	/ Track this relation for session locks /
2816	heapRelationIds = lappend_oid(heapRelationIds, toastOid);
2817
2818	MemoryContextSwitchTo(oldcontext);
2819
2820	foreach(lc2, RelationGetIndexList(toastRelation))
2821	{
2822	Oid cellOid = lfirst_oid(lc2);
2823	Relation indexRelation = index_open(cellOid,
2824	ShareUpdateExclusiveLock);
2825
2826	if (!indexRelation->rd_index->indisvalid)
2827	ereport(WARNING,
2828	(errcode(ERRCODE_INDEX_CORRUPTED),
2829	errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
2830	get_namespace_name(get_rel_namespace(cellOid)),
2831	get_rel_name(cellOid))));
2832	else
2833	{
2834	/*
2835	* Save the list of relation OIDs in private
2836	* context
2837	*/
2838	oldcontext = MemoryContextSwitchTo(private_context);
2839
2840	indexIds = lappend_oid(indexIds, cellOid);
2841
2842	MemoryContextSwitchTo(oldcontext);
2843	}
2844
2845	index_close(indexRelation, NoLock);
2846	}
2847
2848	table_close(toastRelation, NoLock);
2849	}
2850
2851	table_close(heapRelation, NoLock);
2852	break;
2853	}
2854	case RELKIND_INDEX:
2855	{
2856	Oid heapId = IndexGetRelation(relationOid, false);
2857
2858	if (IsCatalogRelationOid(heapId))
2859	ereport(ERROR,
2860	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2861	errmsg("cannot reindex system catalogs concurrently")));
2862
2863	/ Save the list of relation OIDs in private context /
2864	oldcontext = MemoryContextSwitchTo(private_context);
2865
2866	/ Track the heap relation of this index for session locks /
2867	heapRelationIds = list_make1_oid(heapId);
2868
2869	/*
2870	* Save the list of relation OIDs in private context. Note
2871	* that invalid indexes are allowed here.
2872	*/
2873	indexIds = lappend_oid(indexIds, relationOid);
2874
2875	MemoryContextSwitchTo(oldcontext);
2876	break;
2877	}
2878	case RELKIND_PARTITIONED_TABLE:
2879	/ see reindex_relation() /
2880	ereport(WARNING,
2881	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2882	errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
2883	get_rel_name(relationOid))));
2884	return false;
2885	default:
2886	/ Return error if type of relation is not supported /
2887	ereport(ERROR,
2888	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
2889	errmsg("cannot reindex this type of relation concurrently")));
2890	break;
2891	}
2892
2893	/ Definitely no indexes, so leave /
2894	if (indexIds == NIL)
2895	{
2896	PopActiveSnapshot();
2897	return false;
2898	}
2899
2900	Assert(heapRelationIds != NIL);
2901
2902	/-----*
2903	* Now we have all the indexes we want to process in indexIds.
2904	*
2905	* The phases now are:
2906	*
2907	* 1. create new indexes in the catalog
2908	* 2. build new indexes
2909	* 3. let new indexes catch up with tuples inserted in the meantime
2910	* 4. swap index names
2911	* 5. mark old indexes as dead
2912	* 6. drop old indexes
2913	*
2914	* We process each phase for all indexes before moving to the next phase,
2915	* for efficiency.
2916	*/
2917
2918	/*
2919	* Phase 1 of REINDEX CONCURRENTLY
2920	*
2921	* Create a new index with the same properties as the old one, but it is
2922	* only registered in catalogs and will be built later. Then get session
2923	* locks on all involved tables. See analogous code in DefineIndex() for
2924	* more detailed comments.
2925	*/
2926
2927	foreach(lc, indexIds)
2928	{
2929	char *concurrentName;
2930	Oid indexId = lfirst_oid(lc);
2931	Oid newIndexId;
2932	Relation indexRel;
2933	Relation heapRel;
2934	Relation newIndexRel;
2935	LockRelId *lockrelid;
2936
2937	indexRel = index_open(indexId, ShareUpdateExclusiveLock);
2938	heapRel = table_open(indexRel->rd_index->indrelid,
2939	ShareUpdateExclusiveLock);
2940
2941	pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
2942	RelationGetRelid(heapRel));
2943	pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
2944	PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY);
2945	pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
2946	indexId);
2947	pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
2948	indexRel->rd_rel->relam);
2949
2950	/ Choose a temporary relation name for the new index /
2951	concurrentName = ChooseRelationName(get_rel_name(indexId),
2952	NULL,
2953	"ccnew",
2954	get_rel_namespace(indexRel->rd_index->indrelid),
2955	false);
2956
2957	/ Create new index definition based on given index /
2958	newIndexId = index_concurrently_create_copy(heapRel,
2959	indexId,
2960	concurrentName);
2961
2962	/ Now open the relation of the new index, a lock is also needed on it /
2963	newIndexRel = index_open(indexId, ShareUpdateExclusiveLock);
2964
2965	/*
2966	* Save the list of OIDs and locks in private context
2967	*/
2968	oldcontext = MemoryContextSwitchTo(private_context);
2969
2970	newIndexIds = lappend_oid(newIndexIds, newIndexId);
2971
2972	/*
2973	* Save lockrelid to protect each relation from drop then close
2974	* relations. The lockrelid on parent relation is not taken here to
2975	* avoid multiple locks taken on the same relation, instead we rely on
2976	* parentRelationIds built earlier.
2977	*/
2978	lockrelid = palloc(sizeof(*lockrelid));
2979	*lockrelid = indexRel->rd_lockInfo.lockRelId;
2980	relationLocks = lappend(relationLocks, lockrelid);
2981	lockrelid = palloc(sizeof(*lockrelid));
2982	*lockrelid = newIndexRel->rd_lockInfo.lockRelId;
2983	relationLocks = lappend(relationLocks, lockrelid);
2984
2985	MemoryContextSwitchTo(oldcontext);
2986
2987	index_close(indexRel, NoLock);
2988	index_close(newIndexRel, NoLock);
2989	table_close(heapRel, NoLock);
2990	}
2991
2992	/*
2993	* Save the heap lock for following visibility checks with other backends
2994	* might conflict with this session.
2995	*/
2996	foreach(lc, heapRelationIds)
2997	{
2998	Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
2999	LockRelId *lockrelid;
3000	LOCKTAG *heaplocktag;
3001
3002	/ Save the list of locks in private context /
3003	oldcontext = MemoryContextSwitchTo(private_context);
3004
3005	/ Add lockrelid of heap relation to the list of locked relations /
3006	lockrelid = palloc(sizeof(*lockrelid));
3007	*lockrelid = heapRelation->rd_lockInfo.lockRelId;
3008	relationLocks = lappend(relationLocks, lockrelid);
3009
3010	heaplocktag = (LOCKTAG ) palloc(sizeof*(LOCKTAG));
3011
3012	/ Save the LOCKTAG for this parent relation for the wait phase /
3013	SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
3014	lockTags = lappend(lockTags, heaplocktag);
3015
3016	MemoryContextSwitchTo(oldcontext);
3017
3018	/ Close heap relation /
3019	table_close(heapRelation, NoLock);
3020	}
3021
3022	/ Get a session-level lock on each table. /
3023	foreach(lc, relationLocks)
3024	{
3025	LockRelId lockrelid = (LockRelId ) lfirst(lc);
3026
3027	LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3028	}
3029
3030	PopActiveSnapshot();
3031	CommitTransactionCommand();
3032	StartTransactionCommand();
3033
3034	/*
3035	* Phase 2 of REINDEX CONCURRENTLY
3036	*
3037	* Build the new indexes in a separate transaction for each index to avoid
3038	* having open transactions for an unnecessary long time. But before
3039	* doing that, wait until no running transactions could have the table of
3040	* the index open with the old list of indexes. See "phase 2" in
3041	* DefineIndex() for more details.
3042	*/
3043
3044	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3045	PROGRESS_CREATEIDX_PHASE_WAIT_1);
3046	WaitForLockersMultiple(lockTags, ShareLock, true);
3047	CommitTransactionCommand();
3048
3049	forboth(lc, indexIds, lc2, newIndexIds)
3050	{
3051	Relation indexRel;
3052	Oid oldIndexId = lfirst_oid(lc);
3053	Oid newIndexId = lfirst_oid(lc2);
3054	Oid heapId;
3055
3056	CHECK_FOR_INTERRUPTS();
3057
3058	/ Start new transaction for this index's concurrent build /
3059	StartTransactionCommand();
3060
3061	/ Set ActiveSnapshot since functions in the indexes may need it /
3062	PushActiveSnapshot(GetTransactionSnapshot());
3063
3064	/*
3065	* Index relation has been closed by previous commit, so reopen it to
3066	* get its information.
3067	*/
3068	indexRel = index_open(oldIndexId, ShareUpdateExclusiveLock);
3069	heapId = indexRel->rd_index->indrelid;
3070	index_close(indexRel, NoLock);
3071
3072	/ Perform concurrent build of new index /
3073	index_concurrently_build(heapId, newIndexId);
3074
3075	PopActiveSnapshot();
3076	CommitTransactionCommand();
3077	}
3078	StartTransactionCommand();
3079
3080	/*
3081	* Phase 3 of REINDEX CONCURRENTLY
3082	*
3083	* During this phase the old indexes catch up with any new tuples that
3084	* were created during the previous phase. See "phase 3" in DefineIndex()
3085	* for more details.
3086	*/
3087
3088	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3089	PROGRESS_CREATEIDX_PHASE_WAIT_2);
3090	WaitForLockersMultiple(lockTags, ShareLock, true);
3091	CommitTransactionCommand();
3092
3093	foreach(lc, newIndexIds)
3094	{
3095	Oid newIndexId = lfirst_oid(lc);
3096	Oid heapId;
3097	TransactionId limitXmin;
3098	Snapshot snapshot;
3099
3100	CHECK_FOR_INTERRUPTS();
3101
3102	StartTransactionCommand();
3103
3104	heapId = IndexGetRelation(newIndexId, false);
3105
3106	/*
3107	* Take the "reference snapshot" that will be used by validate_index()
3108	* to filter candidate tuples.
3109	*/
3110	snapshot = RegisterSnapshot(GetTransactionSnapshot());
3111	PushActiveSnapshot(snapshot);
3112
3113	validate_index(heapId, newIndexId, snapshot);
3114
3115	/*
3116	* We can now do away with our active snapshot, we still need to save
3117	* the xmin limit to wait for older snapshots.
3118	*/
3119	limitXmin = snapshot->xmin;
3120
3121	PopActiveSnapshot();
3122	UnregisterSnapshot(snapshot);
3123
3124	/*
3125	* To ensure no deadlocks, we must commit and start yet another
3126	* transaction, and do our wait before any snapshot has been taken in
3127	* it.
3128	*/
3129	CommitTransactionCommand();
3130	StartTransactionCommand();
3131
3132	/*
3133	* The index is now valid in the sense that it contains all currently
3134	* interesting tuples. But since it might not contain tuples deleted
3135	* just before the reference snap was taken, we have to wait out any
3136	* transactions that might have older snapshots.
3137	*/
3138	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3139	PROGRESS_CREATEIDX_PHASE_WAIT_3);
3140	WaitForOlderSnapshots(limitXmin, true);
3141
3142	CommitTransactionCommand();
3143	}
3144
3145	/*
3146	* Phase 4 of REINDEX CONCURRENTLY
3147	*
3148	* Now that the new indexes have been validated, swap each new index with
3149	* its corresponding old index.
3150	*
3151	* We mark the new indexes as valid and the old indexes as not valid at
3152	* the same time to make sure we only get constraint violations from the
3153	* indexes with the correct names.
3154	*/
3155
3156	StartTransactionCommand();
3157
3158	forboth(lc, indexIds, lc2, newIndexIds)
3159	{
3160	char *oldName;
3161	Oid oldIndexId = lfirst_oid(lc);
3162	Oid newIndexId = lfirst_oid(lc2);
3163	Oid heapId;
3164
3165	CHECK_FOR_INTERRUPTS();
3166
3167	heapId = IndexGetRelation(oldIndexId, false);
3168
3169	/ Choose a relation name for old index /
3170	oldName = ChooseRelationName(get_rel_name(oldIndexId),
3171	NULL,
3172	"ccold",
3173	get_rel_namespace(heapId),
3174	false);
3175
3176	/*
3177	* Swap old index with the new one. This also marks the new one as
3178	* valid and the old one as not valid.
3179	*/
3180	index_concurrently_swap(newIndexId, oldIndexId, oldName);
3181
3182	/*
3183	* Invalidate the relcache for the table, so that after this commit
3184	* all sessions will refresh any cached plans that might reference the
3185	* index.
3186	*/
3187	CacheInvalidateRelcacheByRelid(heapId);
3188
3189	/*
3190	* CCI here so that subsequent iterations see the oldName in the
3191	* catalog and can choose a nonconflicting name for their oldName.
3192	* Otherwise, this could lead to conflicts if a table has two indexes
3193	* whose names are equal for the first NAMEDATALEN-minus-a-few
3194	* characters.
3195	*/
3196	CommandCounterIncrement();
3197	}
3198
3199	/ Commit this transaction and make index swaps visible /
3200	CommitTransactionCommand();
3201	StartTransactionCommand();
3202
3203	/*
3204	* Phase 5 of REINDEX CONCURRENTLY
3205	*
3206	* Mark the old indexes as dead. First we must wait until no running
3207	* transaction could be using the index for a query. See also
3208	* index_drop() for more details.
3209	*/
3210
3211	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3212	PROGRESS_CREATEIDX_PHASE_WAIT_4);
3213	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3214
3215	foreach(lc, indexIds)
3216	{
3217	Oid oldIndexId = lfirst_oid(lc);
3218	Oid heapId;
3219
3220	CHECK_FOR_INTERRUPTS();
3221	heapId = IndexGetRelation(oldIndexId, false);
3222	index_concurrently_set_dead(heapId, oldIndexId);
3223	}
3224
3225	/ Commit this transaction to make the updates visible. /
3226	CommitTransactionCommand();
3227	StartTransactionCommand();
3228
3229	/*
3230	* Phase 6 of REINDEX CONCURRENTLY
3231	*
3232	* Drop the old indexes.
3233	*/
3234
3235	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3236	PROGRESS_CREATEIDX_PHASE_WAIT_4);
3237	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3238
3239	PushActiveSnapshot(GetTransactionSnapshot());
3240
3241	{
3242	ObjectAddresses *objects = new_object_addresses();
3243
3244	foreach(lc, indexIds)
3245	{
3246	Oid oldIndexId = lfirst_oid(lc);
3247	ObjectAddress object;
3248
3249	object.classId = RelationRelationId;
3250	object.objectId = oldIndexId;
3251	object.objectSubId = `0`;
3252
3253	add_exact_object_address(&object, objects);
3254	}
3255
3256	/*
3257	* Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
3258	* right lock level.
3259	*/
3260	performMultipleDeletions(objects, DROP_RESTRICT,
3261	PERFORM_DELETION_CONCURRENT_LOCK \| PERFORM_DELETION_INTERNAL);
3262	}
3263
3264	PopActiveSnapshot();
3265	CommitTransactionCommand();
3266
3267	/*
3268	* Finally, release the session-level lock on the table.
3269	*/
3270	foreach(lc, relationLocks)
3271	{
3272	LockRelId lockrelid = (LockRelId ) lfirst(lc);
3273
3274	UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3275	}
3276
3277	/ Start a new transaction to finish process properly /
3278	StartTransactionCommand();
3279
3280	/ Log what we did /
3281	if (options & REINDEXOPT_VERBOSE)
3282	{
3283	if (relkind == RELKIND_INDEX)
3284	ereport(INFO,
3285	(errmsg("index \"%s.%s\" was reindexed",
3286	relationNamespace, relationName),
3287	errdetail("%s.",
3288	pg_rusage_show(&ru0))));
3289	else
3290	{
3291	foreach(lc, newIndexIds)
3292	{
3293	Oid indOid = lfirst_oid(lc);
3294
3295	ereport(INFO,
3296	(errmsg("index \"%s.%s\" was reindexed",
3297	get_namespace_name(get_rel_namespace(indOid)),
3298	get_rel_name(indOid))));
3299	/ Don't show rusage here, since it's not per index. /
3300	}
3301
3302	ereport(INFO,
3303	(errmsg("table \"%s.%s\" was reindexed",
3304	relationNamespace, relationName),
3305	errdetail("%s.",
3306	pg_rusage_show(&ru0))));
3307	}
3308	}
3309
3310	MemoryContextDelete(private_context);
3311
3312	pgstat_progress_end_command();
3313
3314	return true;
3315	}
3316
3317	/*
3318	* ReindexPartitionedIndex
3319	* Reindex each child of the given partitioned index.
3320	*
3321	* Not yet implemented.
3322	*/
3323	static void
3324	ReindexPartitionedIndex(Relation parentIdx)
3325	{
3326	ereport(ERROR,
3327	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3328	errmsg("REINDEX is not yet implemented for partitioned indexes")));
3329	}
3330
3331	/*
3332	* Insert or delete an appropriate pg_inherits tuple to make the given index
3333	* be a partition of the indicated parent index.
3334	*
3335	* This also corrects the pg_depend information for the affected index.
3336	*/
3337	void
3338	IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
3339	{
3340	Relation pg_inherits;
3341	ScanKeyData key[`2`];
3342	SysScanDesc scan;
3343	Oid partRelid = RelationGetRelid(partitionIdx);
3344	HeapTuple tuple;
3345	bool fix_dependencies;
3346
3347	/ Make sure this is an index /
3348	Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX \|\|
3349	partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
3350
3351	/*
3352	* Scan pg_inherits for rows linking our index to some parent.
3353	*/
3354	pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
3355	ScanKeyInit(&key[`0`],
3356	Anum_pg_inherits_inhrelid,
3357	BTEqualStrategyNumber, F_OIDEQ,
3358	ObjectIdGetDatum(partRelid));
3359	ScanKeyInit(&key[`1`],
3360	Anum_pg_inherits_inhseqno,
3361	BTEqualStrategyNumber, F_INT4EQ,
3362	Int32GetDatum(`1`));
3363	scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
3364	NULL, `2`, key);
3365	tuple = systable_getnext(scan);
3366
3367	if (!HeapTupleIsValid(tuple))
3368	{
3369	if (parentOid == InvalidOid)
3370	{
3371	/*
3372	* No pg_inherits row, and no parent wanted: nothing to do in this
3373	* case.
3374	*/
3375	fix_dependencies = false;
3376	}
3377	else
3378	{
3379	Datum values[Natts_pg_inherits];
3380	bool isnull[Natts_pg_inherits];
3381
3382	/*
3383	* No pg_inherits row exists, and we want a parent for this index,
3384	* so insert it.
3385	*/
3386	values[Anum_pg_inherits_inhrelid - `1`] = ObjectIdGetDatum(partRelid);
3387	values[Anum_pg_inherits_inhparent - `1`] =
3388	ObjectIdGetDatum(parentOid);
3389	values[Anum_pg_inherits_inhseqno - `1`] = Int32GetDatum(`1`);
3390	memset(isnull, false, sizeof(isnull));
3391
3392	tuple = heap_form_tuple(RelationGetDescr(pg_inherits),
3393	values, isnull);
3394	CatalogTupleInsert(pg_inherits, tuple);
3395
3396	fix_dependencies = true;
3397	}
3398	}
3399	else
3400	{
3401	Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
3402
3403	if (parentOid == InvalidOid)
3404	{
3405	/*
3406	* There exists a pg_inherits row, which we want to clear; do so.
3407	*/
3408	CatalogTupleDelete(pg_inherits, &tuple->t_self);
3409	fix_dependencies = true;
3410	}
3411	else
3412	{
3413	/*
3414	* A pg_inherits row exists. If it's the same we want, then we're
3415	* good; if it differs, that amounts to a corrupt catalog and
3416	* should not happen.
3417	*/
3418	if (inhForm->inhparent != parentOid)
3419	{
3420	/ unexpected: we should not get called in this case /
3421	elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
3422	inhForm->inhrelid, inhForm->inhparent);
3423	}
3424
3425	/ already in the right state /
3426	fix_dependencies = false;
3427	}
3428	}
3429
3430	/ done with pg_inherits /
3431	systable_endscan(scan);
3432	relation_close(pg_inherits, RowExclusiveLock);
3433
3434	/ set relhassubclass if an index partition has been added to the parent /
3435	if (OidIsValid(parentOid))
3436	SetRelationHasSubclass(parentOid, true);
3437
3438	/ set relispartition correctly on the partition /
3439	update_relispartition(partRelid, OidIsValid(parentOid));
3440
3441	if (fix_dependencies)
3442	{
3443	/*
3444	* Insert/delete pg_depend rows. If setting a parent, add PARTITION
3445	* dependencies on the parent index and the table; if removing a
3446	* parent, delete PARTITION dependencies.
3447	*/
3448	if (OidIsValid(parentOid))
3449	{
3450	ObjectAddress partIdx;
3451	ObjectAddress parentIdx;
3452	ObjectAddress partitionTbl;
3453
3454	ObjectAddressSet(partIdx, RelationRelationId, partRelid);
3455	ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
3456	ObjectAddressSet(partitionTbl, RelationRelationId,
3457	partitionIdx->rd_index->indrelid);
3458	recordDependencyOn(&partIdx, &parentIdx,
3459	DEPENDENCY_PARTITION_PRI);
3460	recordDependencyOn(&partIdx, &partitionTbl,
3461	DEPENDENCY_PARTITION_SEC);
3462	}
3463	else
3464	{
3465	deleteDependencyRecordsForClass(RelationRelationId, partRelid,
3466	RelationRelationId,
3467	DEPENDENCY_PARTITION_PRI);
3468	deleteDependencyRecordsForClass(RelationRelationId, partRelid,
3469	RelationRelationId,
3470	DEPENDENCY_PARTITION_SEC);
3471	}
3472
3473	/ make our updates visible /
3474	CommandCounterIncrement();
3475	}
3476	}
3477
3478	/*
3479	* Subroutine of IndexSetParentIndex to update the relispartition flag of the
3480	* given index to the given value.
3481	*/
3482	static void
3483	update_relispartition(Oid relationId, bool newval)
3484	{
3485	HeapTuple tup;
3486	Relation classRel;
3487
3488	classRel = table_open(RelationRelationId, RowExclusiveLock);
3489	tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
3490	if (!HeapTupleIsValid(tup))
3491	elog(ERROR, "cache lookup failed for relation %u", relationId);
3492	Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
3493	((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
3494	CatalogTupleUpdate(classRel, &tup->t_self, tup);
3495	heap_freetuple(tup);
3496	table_close(classRel, RowExclusiveLock);
3497	}
3498

Browse the source code of PostgreSQL/src/backend/commands/indexcmds.c