1/*-------------------------------------------------------------------------
2 *
3 * indexcmds.c
4 * POSTGRES define and remove index code.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/commands/indexcmds.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "access/amapi.h"
19#include "access/heapam.h"
20#include "access/htup_details.h"
21#include "access/reloptions.h"
22#include "access/sysattr.h"
23#include "access/tableam.h"
24#include "access/xact.h"
25#include "catalog/catalog.h"
26#include "catalog/index.h"
27#include "catalog/indexing.h"
28#include "catalog/pg_am.h"
29#include "catalog/pg_constraint.h"
30#include "catalog/pg_inherits.h"
31#include "catalog/pg_opclass.h"
32#include "catalog/pg_opfamily.h"
33#include "catalog/pg_tablespace.h"
34#include "catalog/pg_type.h"
35#include "commands/comment.h"
36#include "commands/dbcommands.h"
37#include "commands/defrem.h"
38#include "commands/event_trigger.h"
39#include "commands/progress.h"
40#include "commands/tablecmds.h"
41#include "commands/tablespace.h"
42#include "mb/pg_wchar.h"
43#include "miscadmin.h"
44#include "nodes/makefuncs.h"
45#include "nodes/nodeFuncs.h"
46#include "optimizer/optimizer.h"
47#include "parser/parse_coerce.h"
48#include "parser/parse_func.h"
49#include "parser/parse_oper.h"
50#include "partitioning/partdesc.h"
51#include "pgstat.h"
52#include "rewrite/rewriteManip.h"
53#include "storage/lmgr.h"
54#include "storage/proc.h"
55#include "storage/procarray.h"
56#include "storage/sinvaladt.h"
57#include "utils/acl.h"
58#include "utils/builtins.h"
59#include "utils/fmgroids.h"
60#include "utils/inval.h"
61#include "utils/lsyscache.h"
62#include "utils/memutils.h"
63#include "utils/partcache.h"
64#include "utils/pg_rusage.h"
65#include "utils/regproc.h"
66#include "utils/snapmgr.h"
67#include "utils/syscache.h"
68
69
70/* non-export function prototypes */
71static void CheckPredicate(Expr *predicate);
72static void ComputeIndexAttrs(IndexInfo *indexInfo,
73 Oid *typeOidP,
74 Oid *collationOidP,
75 Oid *classOidP,
76 int16 *colOptionP,
77 List *attList,
78 List *exclusionOpNames,
79 Oid relId,
80 const char *accessMethodName, Oid accessMethodId,
81 bool amcanorder,
82 bool isconstraint);
83static char *ChooseIndexName(const char *tabname, Oid namespaceId,
84 List *colnames, List *exclusionOpNames,
85 bool primary, bool isconstraint);
86static char *ChooseIndexNameAddition(List *colnames);
87static List *ChooseIndexColumnNames(List *indexElems);
88static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
89 Oid relId, Oid oldRelId, void *arg);
90static bool ReindexRelationConcurrently(Oid relationOid, int options);
91static void ReindexPartitionedIndex(Relation parentIdx);
92static void update_relispartition(Oid relationId, bool newval);
93
94/*
95 * callback argument type for RangeVarCallbackForReindexIndex()
96 */
97struct ReindexIndexCallbackState
98{
99 bool concurrent; /* flag from statement */
100 Oid locked_table_oid; /* tracks previously locked table */
101};
102
103/*
104 * CheckIndexCompatible
105 * Determine whether an existing index definition is compatible with a
106 * prospective index definition, such that the existing index storage
107 * could become the storage of the new index, avoiding a rebuild.
108 *
109 * 'heapRelation': the relation the index would apply to.
110 * 'accessMethodName': name of the AM to use.
111 * 'attributeList': a list of IndexElem specifying columns and expressions
112 * to index on.
113 * 'exclusionOpNames': list of names of exclusion-constraint operators,
114 * or NIL if not an exclusion constraint.
115 *
116 * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
117 * any indexes that depended on a changing column from their pg_get_indexdef
118 * or pg_get_constraintdef definitions. We omit some of the sanity checks of
119 * DefineIndex. We assume that the old and new indexes have the same number
120 * of columns and that if one has an expression column or predicate, both do.
121 * Errors arising from the attribute list still apply.
122 *
123 * Most column type changes that can skip a table rewrite do not invalidate
124 * indexes. We acknowledge this when all operator classes, collations and
125 * exclusion operators match. Though we could further permit intra-opfamily
126 * changes for btree and hash indexes, that adds subtle complexity with no
127 * concrete benefit for core types. Note, that INCLUDE columns aren't
128 * checked by this function, for them it's enough that table rewrite is
129 * skipped.
130 *
131 * When a comparison or exclusion operator has a polymorphic input type, the
132 * actual input types must also match. This defends against the possibility
133 * that operators could vary behavior in response to get_fn_expr_argtype().
134 * At present, this hazard is theoretical: check_exclusion_constraint() and
135 * all core index access methods decline to set fn_expr for such calls.
136 *
137 * We do not yet implement a test to verify compatibility of expression
138 * columns or predicates, so assume any such index is incompatible.
139 */
140bool
141CheckIndexCompatible(Oid oldId,
142 const char *accessMethodName,
143 List *attributeList,
144 List *exclusionOpNames)
145{
146 bool isconstraint;
147 Oid *typeObjectId;
148 Oid *collationObjectId;
149 Oid *classObjectId;
150 Oid accessMethodId;
151 Oid relationId;
152 HeapTuple tuple;
153 Form_pg_index indexForm;
154 Form_pg_am accessMethodForm;
155 IndexAmRoutine *amRoutine;
156 bool amcanorder;
157 int16 *coloptions;
158 IndexInfo *indexInfo;
159 int numberOfAttributes;
160 int old_natts;
161 bool isnull;
162 bool ret = true;
163 oidvector *old_indclass;
164 oidvector *old_indcollation;
165 Relation irel;
166 int i;
167 Datum d;
168
169 /* Caller should already have the relation locked in some way. */
170 relationId = IndexGetRelation(oldId, false);
171
172 /*
173 * We can pretend isconstraint = false unconditionally. It only serves to
174 * decide the text of an error message that should never happen for us.
175 */
176 isconstraint = false;
177
178 numberOfAttributes = list_length(attributeList);
179 Assert(numberOfAttributes > 0);
180 Assert(numberOfAttributes <= INDEX_MAX_KEYS);
181
182 /* look up the access method */
183 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
184 if (!HeapTupleIsValid(tuple))
185 ereport(ERROR,
186 (errcode(ERRCODE_UNDEFINED_OBJECT),
187 errmsg("access method \"%s\" does not exist",
188 accessMethodName)));
189 accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
190 accessMethodId = accessMethodForm->oid;
191 amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
192 ReleaseSysCache(tuple);
193
194 amcanorder = amRoutine->amcanorder;
195
196 /*
197 * Compute the operator classes, collations, and exclusion operators for
198 * the new index, so we can test whether it's compatible with the existing
199 * one. Note that ComputeIndexAttrs might fail here, but that's OK:
200 * DefineIndex would have called this function with the same arguments
201 * later on, and it would have failed then anyway. Our attributeList
202 * contains only key attributes, thus we're filling ii_NumIndexAttrs and
203 * ii_NumIndexKeyAttrs with same value.
204 */
205 indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
206 accessMethodId, NIL, NIL, false, false, false);
207 typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
208 collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
209 classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
210 coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
211 ComputeIndexAttrs(indexInfo,
212 typeObjectId, collationObjectId, classObjectId,
213 coloptions, attributeList,
214 exclusionOpNames, relationId,
215 accessMethodName, accessMethodId,
216 amcanorder, isconstraint);
217
218
219 /* Get the soon-obsolete pg_index tuple. */
220 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
221 if (!HeapTupleIsValid(tuple))
222 elog(ERROR, "cache lookup failed for index %u", oldId);
223 indexForm = (Form_pg_index) GETSTRUCT(tuple);
224
225 /*
226 * We don't assess expressions or predicates; assume incompatibility.
227 * Also, if the index is invalid for any reason, treat it as incompatible.
228 */
229 if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
230 heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
231 indexForm->indisvalid))
232 {
233 ReleaseSysCache(tuple);
234 return false;
235 }
236
237 /* Any change in operator class or collation breaks compatibility. */
238 old_natts = indexForm->indnkeyatts;
239 Assert(old_natts == numberOfAttributes);
240
241 d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
242 Assert(!isnull);
243 old_indcollation = (oidvector *) DatumGetPointer(d);
244
245 d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
246 Assert(!isnull);
247 old_indclass = (oidvector *) DatumGetPointer(d);
248
249 ret = (memcmp(old_indclass->values, classObjectId,
250 old_natts * sizeof(Oid)) == 0 &&
251 memcmp(old_indcollation->values, collationObjectId,
252 old_natts * sizeof(Oid)) == 0);
253
254 ReleaseSysCache(tuple);
255
256 if (!ret)
257 return false;
258
259 /* For polymorphic opcintype, column type changes break compatibility. */
260 irel = index_open(oldId, AccessShareLock); /* caller probably has a lock */
261 for (i = 0; i < old_natts; i++)
262 {
263 if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
264 TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
265 {
266 ret = false;
267 break;
268 }
269 }
270
271 /* Any change in exclusion operator selections breaks compatibility. */
272 if (ret && indexInfo->ii_ExclusionOps != NULL)
273 {
274 Oid *old_operators,
275 *old_procs;
276 uint16 *old_strats;
277
278 RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
279 ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
280 old_natts * sizeof(Oid)) == 0;
281
282 /* Require an exact input type match for polymorphic operators. */
283 if (ret)
284 {
285 for (i = 0; i < old_natts && ret; i++)
286 {
287 Oid left,
288 right;
289
290 op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
291 if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
292 TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
293 {
294 ret = false;
295 break;
296 }
297 }
298 }
299 }
300
301 index_close(irel, NoLock);
302 return ret;
303}
304
305
306/*
307 * WaitForOlderSnapshots
308 *
309 * Wait for transactions that might have an older snapshot than the given xmin
310 * limit, because it might not contain tuples deleted just before it has
311 * been taken. Obtain a list of VXIDs of such transactions, and wait for them
312 * individually. This is used when building an index concurrently.
313 *
314 * We can exclude any running transactions that have xmin > the xmin given;
315 * their oldest snapshot must be newer than our xmin limit.
316 * We can also exclude any transactions that have xmin = zero, since they
317 * evidently have no live snapshot at all (and any one they might be in
318 * process of taking is certainly newer than ours). Transactions in other
319 * DBs can be ignored too, since they'll never even be able to see the
320 * index being worked on.
321 *
322 * We can also exclude autovacuum processes and processes running manual
323 * lazy VACUUMs, because they won't be fazed by missing index entries
324 * either. (Manual ANALYZEs, however, can't be excluded because they
325 * might be within transactions that are going to do arbitrary operations
326 * later.)
327 *
328 * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
329 * check for that.
330 *
331 * If a process goes idle-in-transaction with xmin zero, we do not need to
332 * wait for it anymore, per the above argument. We do not have the
333 * infrastructure right now to stop waiting if that happens, but we can at
334 * least avoid the folly of waiting when it is idle at the time we would
335 * begin to wait. We do this by repeatedly rechecking the output of
336 * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
337 * doesn't show up in the output, we know we can forget about it.
338 */
339static void
340WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
341{
342 int n_old_snapshots;
343 int i;
344 VirtualTransactionId *old_snapshots;
345
346 old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
347 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
348 &n_old_snapshots);
349 if (progress)
350 pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
351
352 for (i = 0; i < n_old_snapshots; i++)
353 {
354 if (!VirtualTransactionIdIsValid(old_snapshots[i]))
355 continue; /* found uninteresting in previous cycle */
356
357 if (i > 0)
358 {
359 /* see if anything's changed ... */
360 VirtualTransactionId *newer_snapshots;
361 int n_newer_snapshots;
362 int j;
363 int k;
364
365 newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
366 true, false,
367 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
368 &n_newer_snapshots);
369 for (j = i; j < n_old_snapshots; j++)
370 {
371 if (!VirtualTransactionIdIsValid(old_snapshots[j]))
372 continue; /* found uninteresting in previous cycle */
373 for (k = 0; k < n_newer_snapshots; k++)
374 {
375 if (VirtualTransactionIdEquals(old_snapshots[j],
376 newer_snapshots[k]))
377 break;
378 }
379 if (k >= n_newer_snapshots) /* not there anymore */
380 SetInvalidVirtualTransactionId(old_snapshots[j]);
381 }
382 pfree(newer_snapshots);
383 }
384
385 if (VirtualTransactionIdIsValid(old_snapshots[i]))
386 {
387 if (progress)
388 {
389 PGPROC *holder = BackendIdGetProc(old_snapshots[i].backendId);
390
391 pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
392 holder->pid);
393 }
394 VirtualXactLock(old_snapshots[i], true);
395 }
396
397 if (progress)
398 pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1);
399 }
400}
401
402
403/*
404 * DefineIndex
405 * Creates a new index.
406 *
407 * 'relationId': the OID of the heap relation on which the index is to be
408 * created
409 * 'stmt': IndexStmt describing the properties of the new index.
410 * 'indexRelationId': normally InvalidOid, but during bootstrap can be
411 * nonzero to specify a preselected OID for the index.
412 * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
413 * of a partitioned index.
414 * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
415 * the child of a constraint (only used when recursing)
416 * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
417 * 'check_rights': check for CREATE rights in namespace and tablespace. (This
418 * should be true except when ALTER is deleting/recreating an index.)
419 * 'check_not_in_use': check for table not already in use in current session.
420 * This should be true unless caller is holding the table open, in which
421 * case the caller had better have checked it earlier.
422 * 'skip_build': make the catalog entries but don't create the index files
423 * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
424 *
425 * Returns the object address of the created index.
426 */
427ObjectAddress
428DefineIndex(Oid relationId,
429 IndexStmt *stmt,
430 Oid indexRelationId,
431 Oid parentIndexId,
432 Oid parentConstraintId,
433 bool is_alter_table,
434 bool check_rights,
435 bool check_not_in_use,
436 bool skip_build,
437 bool quiet)
438{
439 char *indexRelationName;
440 char *accessMethodName;
441 Oid *typeObjectId;
442 Oid *collationObjectId;
443 Oid *classObjectId;
444 Oid accessMethodId;
445 Oid namespaceId;
446 Oid tablespaceId;
447 Oid createdConstraintId = InvalidOid;
448 List *indexColNames;
449 List *allIndexParams;
450 Relation rel;
451 HeapTuple tuple;
452 Form_pg_am accessMethodForm;
453 IndexAmRoutine *amRoutine;
454 bool amcanorder;
455 amoptions_function amoptions;
456 bool partitioned;
457 Datum reloptions;
458 int16 *coloptions;
459 IndexInfo *indexInfo;
460 bits16 flags;
461 bits16 constr_flags;
462 int numberOfAttributes;
463 int numberOfKeyAttributes;
464 TransactionId limitXmin;
465 ObjectAddress address;
466 LockRelId heaprelid;
467 LOCKTAG heaplocktag;
468 LOCKMODE lockmode;
469 Snapshot snapshot;
470 int save_nestlevel = -1;
471 int i;
472
473 /*
474 * Some callers need us to run with an empty default_tablespace; this is a
475 * necessary hack to be able to reproduce catalog state accurately when
476 * recreating indexes after table-rewriting ALTER TABLE.
477 */
478 if (stmt->reset_default_tblspc)
479 {
480 save_nestlevel = NewGUCNestLevel();
481 (void) set_config_option("default_tablespace", "",
482 PGC_USERSET, PGC_S_SESSION,
483 GUC_ACTION_SAVE, true, 0, false);
484 }
485
486 /*
487 * Start progress report. If we're building a partition, this was already
488 * done.
489 */
490 if (!OidIsValid(parentIndexId))
491 {
492 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
493 relationId);
494 pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
495 stmt->concurrent ?
496 PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
497 PROGRESS_CREATEIDX_COMMAND_CREATE);
498 }
499
500 /*
501 * No index OID to report yet
502 */
503 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
504 InvalidOid);
505
506 /*
507 * count key attributes in index
508 */
509 numberOfKeyAttributes = list_length(stmt->indexParams);
510
511 /*
512 * Calculate the new list of index columns including both key columns and
513 * INCLUDE columns. Later we can determine which of these are key
514 * columns, and which are just part of the INCLUDE list by checking the
515 * list position. A list item in a position less than ii_NumIndexKeyAttrs
516 * is part of the key columns, and anything equal to and over is part of
517 * the INCLUDE columns.
518 */
519 allIndexParams = list_concat(list_copy(stmt->indexParams),
520 list_copy(stmt->indexIncludingParams));
521 numberOfAttributes = list_length(allIndexParams);
522
523 if (numberOfAttributes <= 0)
524 ereport(ERROR,
525 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
526 errmsg("must specify at least one column")));
527 if (numberOfAttributes > INDEX_MAX_KEYS)
528 ereport(ERROR,
529 (errcode(ERRCODE_TOO_MANY_COLUMNS),
530 errmsg("cannot use more than %d columns in an index",
531 INDEX_MAX_KEYS)));
532
533 /*
534 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
535 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
536 * (but not VACUUM).
537 *
538 * NB: Caller is responsible for making sure that relationId refers to the
539 * relation on which the index should be built; except in bootstrap mode,
540 * this will typically require the caller to have already locked the
541 * relation. To avoid lock upgrade hazards, that lock should be at least
542 * as strong as the one we take here.
543 *
544 * NB: If the lock strength here ever changes, code that is run by
545 * parallel workers under the control of certain particular ambuild
546 * functions will need to be updated, too.
547 */
548 lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
549 rel = table_open(relationId, lockmode);
550
551 namespaceId = RelationGetNamespace(rel);
552
553 /* Ensure that it makes sense to index this kind of relation */
554 switch (rel->rd_rel->relkind)
555 {
556 case RELKIND_RELATION:
557 case RELKIND_MATVIEW:
558 case RELKIND_PARTITIONED_TABLE:
559 /* OK */
560 break;
561 case RELKIND_FOREIGN_TABLE:
562
563 /*
564 * Custom error message for FOREIGN TABLE since the term is close
565 * to a regular table and can confuse the user.
566 */
567 ereport(ERROR,
568 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
569 errmsg("cannot create index on foreign table \"%s\"",
570 RelationGetRelationName(rel))));
571 break;
572 default:
573 ereport(ERROR,
574 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
575 errmsg("\"%s\" is not a table or materialized view",
576 RelationGetRelationName(rel))));
577 break;
578 }
579
580 /*
581 * Establish behavior for partitioned tables, and verify sanity of
582 * parameters.
583 *
584 * We do not build an actual index in this case; we only create a few
585 * catalog entries. The actual indexes are built by recursing for each
586 * partition.
587 */
588 partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
589 if (partitioned)
590 {
591 if (stmt->concurrent)
592 ereport(ERROR,
593 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
594 errmsg("cannot create index on partitioned table \"%s\" concurrently",
595 RelationGetRelationName(rel))));
596 if (stmt->excludeOpNames)
597 ereport(ERROR,
598 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
599 errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
600 RelationGetRelationName(rel))));
601 }
602
603 /*
604 * Don't try to CREATE INDEX on temp tables of other backends.
605 */
606 if (RELATION_IS_OTHER_TEMP(rel))
607 ereport(ERROR,
608 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
609 errmsg("cannot create indexes on temporary tables of other sessions")));
610
611 /*
612 * Unless our caller vouches for having checked this already, insist that
613 * the table not be in use by our own session, either. Otherwise we might
614 * fail to make entries in the new index (for instance, if an INSERT or
615 * UPDATE is in progress and has already made its list of target indexes).
616 */
617 if (check_not_in_use)
618 CheckTableNotInUse(rel, "CREATE INDEX");
619
620 /*
621 * Verify we (still) have CREATE rights in the rel's namespace.
622 * (Presumably we did when the rel was created, but maybe not anymore.)
623 * Skip check if caller doesn't want it. Also skip check if
624 * bootstrapping, since permissions machinery may not be working yet.
625 */
626 if (check_rights && !IsBootstrapProcessingMode())
627 {
628 AclResult aclresult;
629
630 aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
631 ACL_CREATE);
632 if (aclresult != ACLCHECK_OK)
633 aclcheck_error(aclresult, OBJECT_SCHEMA,
634 get_namespace_name(namespaceId));
635 }
636
637 /*
638 * Select tablespace to use. If not specified, use default tablespace
639 * (which may in turn default to database's default).
640 */
641 if (stmt->tableSpace)
642 {
643 tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
644 if (partitioned && tablespaceId == MyDatabaseTableSpace)
645 ereport(ERROR,
646 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
647 errmsg("cannot specify default tablespace for partitioned relations")));
648 }
649 else
650 {
651 tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
652 partitioned);
653 /* note InvalidOid is OK in this case */
654 }
655
656 /* Check tablespace permissions */
657 if (check_rights &&
658 OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
659 {
660 AclResult aclresult;
661
662 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
663 ACL_CREATE);
664 if (aclresult != ACLCHECK_OK)
665 aclcheck_error(aclresult, OBJECT_TABLESPACE,
666 get_tablespace_name(tablespaceId));
667 }
668
669 /*
670 * Force shared indexes into the pg_global tablespace. This is a bit of a
671 * hack but seems simpler than marking them in the BKI commands. On the
672 * other hand, if it's not shared, don't allow it to be placed there.
673 */
674 if (rel->rd_rel->relisshared)
675 tablespaceId = GLOBALTABLESPACE_OID;
676 else if (tablespaceId == GLOBALTABLESPACE_OID)
677 ereport(ERROR,
678 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
679 errmsg("only shared relations can be placed in pg_global tablespace")));
680
681 /*
682 * Choose the index column names.
683 */
684 indexColNames = ChooseIndexColumnNames(allIndexParams);
685
686 /*
687 * Select name for index if caller didn't specify
688 */
689 indexRelationName = stmt->idxname;
690 if (indexRelationName == NULL)
691 indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
692 namespaceId,
693 indexColNames,
694 stmt->excludeOpNames,
695 stmt->primary,
696 stmt->isconstraint);
697
698 /*
699 * look up the access method, verify it can handle the requested features
700 */
701 accessMethodName = stmt->accessMethod;
702 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
703 if (!HeapTupleIsValid(tuple))
704 {
705 /*
706 * Hack to provide more-or-less-transparent updating of old RTREE
707 * indexes to GiST: if RTREE is requested and not found, use GIST.
708 */
709 if (strcmp(accessMethodName, "rtree") == 0)
710 {
711 ereport(NOTICE,
712 (errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
713 accessMethodName = "gist";
714 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
715 }
716
717 if (!HeapTupleIsValid(tuple))
718 ereport(ERROR,
719 (errcode(ERRCODE_UNDEFINED_OBJECT),
720 errmsg("access method \"%s\" does not exist",
721 accessMethodName)));
722 }
723 accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
724 accessMethodId = accessMethodForm->oid;
725 amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
726
727 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
728 accessMethodId);
729
730 if (stmt->unique && !amRoutine->amcanunique)
731 ereport(ERROR,
732 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
733 errmsg("access method \"%s\" does not support unique indexes",
734 accessMethodName)));
735 if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
736 ereport(ERROR,
737 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
738 errmsg("access method \"%s\" does not support included columns",
739 accessMethodName)));
740 if (numberOfAttributes > 1 && !amRoutine->amcanmulticol)
741 ereport(ERROR,
742 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
743 errmsg("access method \"%s\" does not support multicolumn indexes",
744 accessMethodName)));
745 if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
746 ereport(ERROR,
747 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
748 errmsg("access method \"%s\" does not support exclusion constraints",
749 accessMethodName)));
750
751 amcanorder = amRoutine->amcanorder;
752 amoptions = amRoutine->amoptions;
753
754 pfree(amRoutine);
755 ReleaseSysCache(tuple);
756
757 /*
758 * Validate predicate, if given
759 */
760 if (stmt->whereClause)
761 CheckPredicate((Expr *) stmt->whereClause);
762
763 /*
764 * Parse AM-specific options, convert to text array form, validate.
765 */
766 reloptions = transformRelOptions((Datum) 0, stmt->options,
767 NULL, NULL, false, false);
768
769 (void) index_reloptions(amoptions, reloptions, true);
770
771 /*
772 * Prepare arguments for index_create, primarily an IndexInfo structure.
773 * Note that predicates must be in implicit-AND format. In a concurrent
774 * build, mark it not-ready-for-inserts.
775 */
776 indexInfo = makeIndexInfo(numberOfAttributes,
777 numberOfKeyAttributes,
778 accessMethodId,
779 NIL, /* expressions, NIL for now */
780 make_ands_implicit((Expr *) stmt->whereClause),
781 stmt->unique,
782 !stmt->concurrent,
783 stmt->concurrent);
784
785 typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
786 collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
787 classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
788 coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
789 ComputeIndexAttrs(indexInfo,
790 typeObjectId, collationObjectId, classObjectId,
791 coloptions, allIndexParams,
792 stmt->excludeOpNames, relationId,
793 accessMethodName, accessMethodId,
794 amcanorder, stmt->isconstraint);
795
796 /*
797 * Extra checks when creating a PRIMARY KEY index.
798 */
799 if (stmt->primary)
800 index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
801
802 /*
803 * If this table is partitioned and we're creating a unique index or a
804 * primary key, make sure that the indexed columns are part of the
805 * partition key. Otherwise it would be possible to violate uniqueness by
806 * putting values that ought to be unique in different partitions.
807 *
808 * We could lift this limitation if we had global indexes, but those have
809 * their own problems, so this is a useful feature combination.
810 */
811 if (partitioned && (stmt->unique || stmt->primary))
812 {
813 PartitionKey key = rel->rd_partkey;
814 int i;
815
816 /*
817 * A partitioned table can have unique indexes, as long as all the
818 * columns in the partition key appear in the unique key. A
819 * partition-local index can enforce global uniqueness iff the PK
820 * value completely determines the partition that a row is in.
821 *
822 * Thus, verify that all the columns in the partition key appear in
823 * the unique key definition.
824 */
825 for (i = 0; i < key->partnatts; i++)
826 {
827 bool found = false;
828 int j;
829 const char *constraint_type;
830
831 if (stmt->primary)
832 constraint_type = "PRIMARY KEY";
833 else if (stmt->unique)
834 constraint_type = "UNIQUE";
835 else if (stmt->excludeOpNames != NIL)
836 constraint_type = "EXCLUDE";
837 else
838 {
839 elog(ERROR, "unknown constraint type");
840 constraint_type = NULL; /* keep compiler quiet */
841 }
842
843 /*
844 * It may be possible to support UNIQUE constraints when partition
845 * keys are expressions, but is it worth it? Give up for now.
846 */
847 if (key->partattrs[i] == 0)
848 ereport(ERROR,
849 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
850 errmsg("unsupported %s constraint with partition key definition",
851 constraint_type),
852 errdetail("%s constraints cannot be used when partition keys include expressions.",
853 constraint_type)));
854
855 for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
856 {
857 if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
858 {
859 found = true;
860 break;
861 }
862 }
863 if (!found)
864 {
865 Form_pg_attribute att;
866
867 att = TupleDescAttr(RelationGetDescr(rel), key->partattrs[i] - 1);
868 ereport(ERROR,
869 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
870 errmsg("insufficient columns in %s constraint definition",
871 constraint_type),
872 errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
873 constraint_type, RelationGetRelationName(rel),
874 NameStr(att->attname))));
875 }
876 }
877 }
878
879
880 /*
881 * We disallow indexes on system columns. They would not necessarily get
882 * updated correctly, and they don't seem useful anyway.
883 */
884 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
885 {
886 AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
887
888 if (attno < 0)
889 ereport(ERROR,
890 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
891 errmsg("index creation on system columns is not supported")));
892 }
893
894 /*
895 * Also check for system columns used in expressions or predicates.
896 */
897 if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
898 {
899 Bitmapset *indexattrs = NULL;
900
901 pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
902 pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
903
904 for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
905 {
906 if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
907 indexattrs))
908 ereport(ERROR,
909 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
910 errmsg("index creation on system columns is not supported")));
911 }
912 }
913
914 /*
915 * Report index creation if appropriate (delay this till after most of the
916 * error checks)
917 */
918 if (stmt->isconstraint && !quiet)
919 {
920 const char *constraint_type;
921
922 if (stmt->primary)
923 constraint_type = "PRIMARY KEY";
924 else if (stmt->unique)
925 constraint_type = "UNIQUE";
926 else if (stmt->excludeOpNames != NIL)
927 constraint_type = "EXCLUDE";
928 else
929 {
930 elog(ERROR, "unknown constraint type");
931 constraint_type = NULL; /* keep compiler quiet */
932 }
933
934 ereport(DEBUG1,
935 (errmsg("%s %s will create implicit index \"%s\" for table \"%s\"",
936 is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
937 constraint_type,
938 indexRelationName, RelationGetRelationName(rel))));
939 }
940
941 /*
942 * A valid stmt->oldNode implies that we already have a built form of the
943 * index. The caller should also decline any index build.
944 */
945 Assert(!OidIsValid(stmt->oldNode) || (skip_build && !stmt->concurrent));
946
947 /*
948 * Make the catalog entries for the index, including constraints. This
949 * step also actually builds the index, except if caller requested not to
950 * or in concurrent mode, in which case it'll be done later, or doing a
951 * partitioned index (because those don't have storage).
952 */
953 flags = constr_flags = 0;
954 if (stmt->isconstraint)
955 flags |= INDEX_CREATE_ADD_CONSTRAINT;
956 if (skip_build || stmt->concurrent || partitioned)
957 flags |= INDEX_CREATE_SKIP_BUILD;
958 if (stmt->if_not_exists)
959 flags |= INDEX_CREATE_IF_NOT_EXISTS;
960 if (stmt->concurrent)
961 flags |= INDEX_CREATE_CONCURRENT;
962 if (partitioned)
963 flags |= INDEX_CREATE_PARTITIONED;
964 if (stmt->primary)
965 flags |= INDEX_CREATE_IS_PRIMARY;
966
967 /*
968 * If the table is partitioned, and recursion was declined but partitions
969 * exist, mark the index as invalid.
970 */
971 if (partitioned && stmt->relation && !stmt->relation->inh)
972 {
973 PartitionDesc pd = RelationGetPartitionDesc(rel);
974
975 if (pd->nparts != 0)
976 flags |= INDEX_CREATE_INVALID;
977 }
978
979 if (stmt->deferrable)
980 constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
981 if (stmt->initdeferred)
982 constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
983
984 indexRelationId =
985 index_create(rel, indexRelationName, indexRelationId, parentIndexId,
986 parentConstraintId,
987 stmt->oldNode, indexInfo, indexColNames,
988 accessMethodId, tablespaceId,
989 collationObjectId, classObjectId,
990 coloptions, reloptions,
991 flags, constr_flags,
992 allowSystemTableMods, !check_rights,
993 &createdConstraintId);
994
995 ObjectAddressSet(address, RelationRelationId, indexRelationId);
996
997 /*
998 * Revert to original default_tablespace. Must do this before any return
999 * from this function, but after index_create, so this is a good time.
1000 */
1001 if (save_nestlevel >= 0)
1002 AtEOXact_GUC(true, save_nestlevel);
1003
1004 if (!OidIsValid(indexRelationId))
1005 {
1006 table_close(rel, NoLock);
1007
1008 /* If this is the top-level index, we're done */
1009 if (!OidIsValid(parentIndexId))
1010 pgstat_progress_end_command();
1011
1012 return address;
1013 }
1014
1015 /* Add any requested comment */
1016 if (stmt->idxcomment != NULL)
1017 CreateComments(indexRelationId, RelationRelationId, 0,
1018 stmt->idxcomment);
1019
1020 if (partitioned)
1021 {
1022 /*
1023 * Unless caller specified to skip this step (via ONLY), process each
1024 * partition to make sure they all contain a corresponding index.
1025 *
1026 * If we're called internally (no stmt->relation), recurse always.
1027 */
1028 if (!stmt->relation || stmt->relation->inh)
1029 {
1030 PartitionDesc partdesc = RelationGetPartitionDesc(rel);
1031 int nparts = partdesc->nparts;
1032 Oid *part_oids = palloc(sizeof(Oid) * nparts);
1033 bool invalidate_parent = false;
1034 TupleDesc parentDesc;
1035 Oid *opfamOids;
1036
1037 pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
1038 nparts);
1039
1040 memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
1041
1042 parentDesc = RelationGetDescr(rel);
1043 opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
1044 for (i = 0; i < numberOfKeyAttributes; i++)
1045 opfamOids[i] = get_opclass_family(classObjectId[i]);
1046
1047 /*
1048 * For each partition, scan all existing indexes; if one matches
1049 * our index definition and is not already attached to some other
1050 * parent index, attach it to the one we just created.
1051 *
1052 * If none matches, build a new index by calling ourselves
1053 * recursively with the same options (except for the index name).
1054 */
1055 for (i = 0; i < nparts; i++)
1056 {
1057 Oid childRelid = part_oids[i];
1058 Relation childrel;
1059 List *childidxs;
1060 ListCell *cell;
1061 AttrNumber *attmap;
1062 bool found = false;
1063 int maplen;
1064
1065 childrel = table_open(childRelid, lockmode);
1066
1067 /*
1068 * Don't try to create indexes on foreign tables, though. Skip
1069 * those if a regular index, or fail if trying to create a
1070 * constraint index.
1071 */
1072 if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1073 {
1074 if (stmt->unique || stmt->primary)
1075 ereport(ERROR,
1076 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1077 errmsg("cannot create unique index on partitioned table \"%s\"",
1078 RelationGetRelationName(rel)),
1079 errdetail("Table \"%s\" contains partitions that are foreign tables.",
1080 RelationGetRelationName(rel))));
1081
1082 table_close(childrel, lockmode);
1083 continue;
1084 }
1085
1086 childidxs = RelationGetIndexList(childrel);
1087 attmap =
1088 convert_tuples_by_name_map(RelationGetDescr(childrel),
1089 parentDesc,
1090 gettext_noop("could not convert row type"));
1091 maplen = parentDesc->natts;
1092
1093 foreach(cell, childidxs)
1094 {
1095 Oid cldidxid = lfirst_oid(cell);
1096 Relation cldidx;
1097 IndexInfo *cldIdxInfo;
1098
1099 /* this index is already partition of another one */
1100 if (has_superclass(cldidxid))
1101 continue;
1102
1103 cldidx = index_open(cldidxid, lockmode);
1104 cldIdxInfo = BuildIndexInfo(cldidx);
1105 if (CompareIndexInfo(cldIdxInfo, indexInfo,
1106 cldidx->rd_indcollation,
1107 collationObjectId,
1108 cldidx->rd_opfamily,
1109 opfamOids,
1110 attmap, maplen))
1111 {
1112 Oid cldConstrOid = InvalidOid;
1113
1114 /*
1115 * Found a match.
1116 *
1117 * If this index is being created in the parent
1118 * because of a constraint, then the child needs to
1119 * have a constraint also, so look for one. If there
1120 * is no such constraint, this index is no good, so
1121 * keep looking.
1122 */
1123 if (createdConstraintId != InvalidOid)
1124 {
1125 cldConstrOid =
1126 get_relation_idx_constraint_oid(childRelid,
1127 cldidxid);
1128 if (cldConstrOid == InvalidOid)
1129 {
1130 index_close(cldidx, lockmode);
1131 continue;
1132 }
1133 }
1134
1135 /* Attach index to parent and we're done. */
1136 IndexSetParentIndex(cldidx, indexRelationId);
1137 if (createdConstraintId != InvalidOid)
1138 ConstraintSetParentConstraint(cldConstrOid,
1139 createdConstraintId,
1140 childRelid);
1141
1142 if (!cldidx->rd_index->indisvalid)
1143 invalidate_parent = true;
1144
1145 found = true;
1146 /* keep lock till commit */
1147 index_close(cldidx, NoLock);
1148 break;
1149 }
1150
1151 index_close(cldidx, lockmode);
1152 }
1153
1154 list_free(childidxs);
1155 table_close(childrel, NoLock);
1156
1157 /*
1158 * If no matching index was found, create our own.
1159 */
1160 if (!found)
1161 {
1162 IndexStmt *childStmt = copyObject(stmt);
1163 bool found_whole_row;
1164 ListCell *lc;
1165
1166 /*
1167 * We can't use the same index name for the child index,
1168 * so clear idxname to let the recursive invocation choose
1169 * a new name. Likewise, the existing target relation
1170 * field is wrong, and if indexOid or oldNode are set,
1171 * they mustn't be applied to the child either.
1172 */
1173 childStmt->idxname = NULL;
1174 childStmt->relation = NULL;
1175 childStmt->indexOid = InvalidOid;
1176 childStmt->oldNode = InvalidOid;
1177
1178 /*
1179 * Adjust any Vars (both in expressions and in the index's
1180 * WHERE clause) to match the partition's column numbering
1181 * in case it's different from the parent's.
1182 */
1183 foreach(lc, childStmt->indexParams)
1184 {
1185 IndexElem *ielem = lfirst(lc);
1186
1187 /*
1188 * If the index parameter is an expression, we must
1189 * translate it to contain child Vars.
1190 */
1191 if (ielem->expr)
1192 {
1193 ielem->expr =
1194 map_variable_attnos((Node *) ielem->expr,
1195 1, 0, attmap, maplen,
1196 InvalidOid,
1197 &found_whole_row);
1198 if (found_whole_row)
1199 elog(ERROR, "cannot convert whole-row table reference");
1200 }
1201 }
1202 childStmt->whereClause =
1203 map_variable_attnos(stmt->whereClause, 1, 0,
1204 attmap, maplen,
1205 InvalidOid, &found_whole_row);
1206 if (found_whole_row)
1207 elog(ERROR, "cannot convert whole-row table reference");
1208
1209 DefineIndex(childRelid, childStmt,
1210 InvalidOid, /* no predefined OID */
1211 indexRelationId, /* this is our child */
1212 createdConstraintId,
1213 is_alter_table, check_rights, check_not_in_use,
1214 skip_build, quiet);
1215 }
1216
1217 pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
1218 i + 1);
1219 pfree(attmap);
1220 }
1221
1222 /*
1223 * The pg_index row we inserted for this index was marked
1224 * indisvalid=true. But if we attached an existing index that is
1225 * invalid, this is incorrect, so update our row to invalid too.
1226 */
1227 if (invalidate_parent)
1228 {
1229 Relation pg_index = table_open(IndexRelationId, RowExclusiveLock);
1230 HeapTuple tup,
1231 newtup;
1232
1233 tup = SearchSysCache1(INDEXRELID,
1234 ObjectIdGetDatum(indexRelationId));
1235 if (!HeapTupleIsValid(tup))
1236 elog(ERROR, "cache lookup failed for index %u",
1237 indexRelationId);
1238 newtup = heap_copytuple(tup);
1239 ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
1240 CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
1241 ReleaseSysCache(tup);
1242 table_close(pg_index, RowExclusiveLock);
1243 heap_freetuple(newtup);
1244 }
1245 }
1246
1247 /*
1248 * Indexes on partitioned tables are not themselves built, so we're
1249 * done here.
1250 */
1251 table_close(rel, NoLock);
1252 if (!OidIsValid(parentIndexId))
1253 pgstat_progress_end_command();
1254 return address;
1255 }
1256
1257 if (!stmt->concurrent)
1258 {
1259 /* Close the heap and we're done, in the non-concurrent case */
1260 table_close(rel, NoLock);
1261
1262 /* If this is the top-level index, we're done. */
1263 if (!OidIsValid(parentIndexId))
1264 pgstat_progress_end_command();
1265
1266 return address;
1267 }
1268
1269 /* save lockrelid and locktag for below, then close rel */
1270 heaprelid = rel->rd_lockInfo.lockRelId;
1271 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1272 table_close(rel, NoLock);
1273
1274 /*
1275 * For a concurrent build, it's important to make the catalog entries
1276 * visible to other transactions before we start to build the index. That
1277 * will prevent them from making incompatible HOT updates. The new index
1278 * will be marked not indisready and not indisvalid, so that no one else
1279 * tries to either insert into it or use it for queries.
1280 *
1281 * We must commit our current transaction so that the index becomes
1282 * visible; then start another. Note that all the data structures we just
1283 * built are lost in the commit. The only data we keep past here are the
1284 * relation IDs.
1285 *
1286 * Before committing, get a session-level lock on the table, to ensure
1287 * that neither it nor the index can be dropped before we finish. This
1288 * cannot block, even if someone else is waiting for access, because we
1289 * already have the same lock within our transaction.
1290 *
1291 * Note: we don't currently bother with a session lock on the index,
1292 * because there are no operations that could change its state while we
1293 * hold lock on the parent table. This might need to change later.
1294 */
1295 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1296
1297 PopActiveSnapshot();
1298 CommitTransactionCommand();
1299 StartTransactionCommand();
1300
1301 /*
1302 * The index is now visible, so we can report the OID.
1303 */
1304 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
1305 indexRelationId);
1306
1307 /*
1308 * Phase 2 of concurrent index build (see comments for validate_index()
1309 * for an overview of how this works)
1310 *
1311 * Now we must wait until no running transaction could have the table open
1312 * with the old list of indexes. Use ShareLock to consider running
1313 * transactions that hold locks that permit writing to the table. Note we
1314 * do not need to worry about xacts that open the table for writing after
1315 * this point; they will see the new index when they open it.
1316 *
1317 * Note: the reason we use actual lock acquisition here, rather than just
1318 * checking the ProcArray and sleeping, is that deadlock is possible if
1319 * one of the transactions in question is blocked trying to acquire an
1320 * exclusive lock on our table. The lock code will detect deadlock and
1321 * error out properly.
1322 */
1323 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1324 PROGRESS_CREATEIDX_PHASE_WAIT_1);
1325 WaitForLockers(heaplocktag, ShareLock, true);
1326
1327 /*
1328 * At this moment we are sure that there are no transactions with the
1329 * table open for write that don't have this new index in their list of
1330 * indexes. We have waited out all the existing transactions and any new
1331 * transaction will have the new index in its list, but the index is still
1332 * marked as "not-ready-for-inserts". The index is consulted while
1333 * deciding HOT-safety though. This arrangement ensures that no new HOT
1334 * chains can be created where the new tuple and the old tuple in the
1335 * chain have different index keys.
1336 *
1337 * We now take a new snapshot, and build the index using all tuples that
1338 * are visible in this snapshot. We can be sure that any HOT updates to
1339 * these tuples will be compatible with the index, since any updates made
1340 * by transactions that didn't know about the index are now committed or
1341 * rolled back. Thus, each visible tuple is either the end of its
1342 * HOT-chain or the extension of the chain is HOT-safe for this index.
1343 */
1344
1345 /* Set ActiveSnapshot since functions in the indexes may need it */
1346 PushActiveSnapshot(GetTransactionSnapshot());
1347
1348 /* Perform concurrent build of index */
1349 index_concurrently_build(relationId, indexRelationId);
1350
1351 /* we can do away with our snapshot */
1352 PopActiveSnapshot();
1353
1354 /*
1355 * Commit this transaction to make the indisready update visible.
1356 */
1357 CommitTransactionCommand();
1358 StartTransactionCommand();
1359
1360 /*
1361 * Phase 3 of concurrent index build
1362 *
1363 * We once again wait until no transaction can have the table open with
1364 * the index marked as read-only for updates.
1365 */
1366 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1367 PROGRESS_CREATEIDX_PHASE_WAIT_2);
1368 WaitForLockers(heaplocktag, ShareLock, true);
1369
1370 /*
1371 * Now take the "reference snapshot" that will be used by validate_index()
1372 * to filter candidate tuples. Beware! There might still be snapshots in
1373 * use that treat some transaction as in-progress that our reference
1374 * snapshot treats as committed. If such a recently-committed transaction
1375 * deleted tuples in the table, we will not include them in the index; yet
1376 * those transactions which see the deleting one as still-in-progress will
1377 * expect such tuples to be there once we mark the index as valid.
1378 *
1379 * We solve this by waiting for all endangered transactions to exit before
1380 * we mark the index as valid.
1381 *
1382 * We also set ActiveSnapshot to this snap, since functions in indexes may
1383 * need a snapshot.
1384 */
1385 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1386 PushActiveSnapshot(snapshot);
1387
1388 /*
1389 * Scan the index and the heap, insert any missing index entries.
1390 */
1391 validate_index(relationId, indexRelationId, snapshot);
1392
1393 /*
1394 * Drop the reference snapshot. We must do this before waiting out other
1395 * snapshot holders, else we will deadlock against other processes also
1396 * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
1397 * they must wait for. But first, save the snapshot's xmin to use as
1398 * limitXmin for GetCurrentVirtualXIDs().
1399 */
1400 limitXmin = snapshot->xmin;
1401
1402 PopActiveSnapshot();
1403 UnregisterSnapshot(snapshot);
1404
1405 /*
1406 * The snapshot subsystem could still contain registered snapshots that
1407 * are holding back our process's advertised xmin; in particular, if
1408 * default_transaction_isolation = serializable, there is a transaction
1409 * snapshot that is still active. The CatalogSnapshot is likewise a
1410 * hazard. To ensure no deadlocks, we must commit and start yet another
1411 * transaction, and do our wait before any snapshot has been taken in it.
1412 */
1413 CommitTransactionCommand();
1414 StartTransactionCommand();
1415
1416 /* We should now definitely not be advertising any xmin. */
1417 Assert(MyPgXact->xmin == InvalidTransactionId);
1418
1419 /*
1420 * The index is now valid in the sense that it contains all currently
1421 * interesting tuples. But since it might not contain tuples deleted just
1422 * before the reference snap was taken, we have to wait out any
1423 * transactions that might have older snapshots.
1424 */
1425 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1426 PROGRESS_CREATEIDX_PHASE_WAIT_3);
1427 WaitForOlderSnapshots(limitXmin, true);
1428
1429 /*
1430 * Index can now be marked valid -- update its pg_index entry
1431 */
1432 index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
1433
1434 /*
1435 * The pg_index update will cause backends (including this one) to update
1436 * relcache entries for the index itself, but we should also send a
1437 * relcache inval on the parent table to force replanning of cached plans.
1438 * Otherwise existing sessions might fail to use the new index where it
1439 * would be useful. (Note that our earlier commits did not create reasons
1440 * to replan; so relcache flush on the index itself was sufficient.)
1441 */
1442 CacheInvalidateRelcacheByRelid(heaprelid.relId);
1443
1444 /*
1445 * Last thing to do is release the session-level lock on the parent table.
1446 */
1447 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1448
1449 pgstat_progress_end_command();
1450
1451 return address;
1452}
1453
1454
1455/*
1456 * CheckMutability
1457 * Test whether given expression is mutable
1458 */
1459static bool
1460CheckMutability(Expr *expr)
1461{
1462 /*
1463 * First run the expression through the planner. This has a couple of
1464 * important consequences. First, function default arguments will get
1465 * inserted, which may affect volatility (consider "default now()").
1466 * Second, inline-able functions will get inlined, which may allow us to
1467 * conclude that the function is really less volatile than it's marked. As
1468 * an example, polymorphic functions must be marked with the most volatile
1469 * behavior that they have for any input type, but once we inline the
1470 * function we may be able to conclude that it's not so volatile for the
1471 * particular input type we're dealing with.
1472 *
1473 * We assume here that expression_planner() won't scribble on its input.
1474 */
1475 expr = expression_planner(expr);
1476
1477 /* Now we can search for non-immutable functions */
1478 return contain_mutable_functions((Node *) expr);
1479}
1480
1481
1482/*
1483 * CheckPredicate
1484 * Checks that the given partial-index predicate is valid.
1485 *
1486 * This used to also constrain the form of the predicate to forms that
1487 * indxpath.c could do something with. However, that seems overly
1488 * restrictive. One useful application of partial indexes is to apply
1489 * a UNIQUE constraint across a subset of a table, and in that scenario
1490 * any evaluable predicate will work. So accept any predicate here
1491 * (except ones requiring a plan), and let indxpath.c fend for itself.
1492 */
1493static void
1494CheckPredicate(Expr *predicate)
1495{
1496 /*
1497 * transformExpr() should have already rejected subqueries, aggregates,
1498 * and window functions, based on the EXPR_KIND_ for a predicate.
1499 */
1500
1501 /*
1502 * A predicate using mutable functions is probably wrong, for the same
1503 * reasons that we don't allow an index expression to use one.
1504 */
1505 if (CheckMutability(predicate))
1506 ereport(ERROR,
1507 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1508 errmsg("functions in index predicate must be marked IMMUTABLE")));
1509}
1510
1511/*
1512 * Compute per-index-column information, including indexed column numbers
1513 * or index expressions, opclasses, and indoptions. Note, all output vectors
1514 * should be allocated for all columns, including "including" ones.
1515 */
1516static void
1517ComputeIndexAttrs(IndexInfo *indexInfo,
1518 Oid *typeOidP,
1519 Oid *collationOidP,
1520 Oid *classOidP,
1521 int16 *colOptionP,
1522 List *attList, /* list of IndexElem's */
1523 List *exclusionOpNames,
1524 Oid relId,
1525 const char *accessMethodName,
1526 Oid accessMethodId,
1527 bool amcanorder,
1528 bool isconstraint)
1529{
1530 ListCell *nextExclOp;
1531 ListCell *lc;
1532 int attn;
1533 int nkeycols = indexInfo->ii_NumIndexKeyAttrs;
1534
1535 /* Allocate space for exclusion operator info, if needed */
1536 if (exclusionOpNames)
1537 {
1538 Assert(list_length(exclusionOpNames) == nkeycols);
1539 indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
1540 indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
1541 indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
1542 nextExclOp = list_head(exclusionOpNames);
1543 }
1544 else
1545 nextExclOp = NULL;
1546
1547 /*
1548 * process attributeList
1549 */
1550 attn = 0;
1551 foreach(lc, attList)
1552 {
1553 IndexElem *attribute = (IndexElem *) lfirst(lc);
1554 Oid atttype;
1555 Oid attcollation;
1556
1557 /*
1558 * Process the column-or-expression to be indexed.
1559 */
1560 if (attribute->name != NULL)
1561 {
1562 /* Simple index attribute */
1563 HeapTuple atttuple;
1564 Form_pg_attribute attform;
1565
1566 Assert(attribute->expr == NULL);
1567 atttuple = SearchSysCacheAttName(relId, attribute->name);
1568 if (!HeapTupleIsValid(atttuple))
1569 {
1570 /* difference in error message spellings is historical */
1571 if (isconstraint)
1572 ereport(ERROR,
1573 (errcode(ERRCODE_UNDEFINED_COLUMN),
1574 errmsg("column \"%s\" named in key does not exist",
1575 attribute->name)));
1576 else
1577 ereport(ERROR,
1578 (errcode(ERRCODE_UNDEFINED_COLUMN),
1579 errmsg("column \"%s\" does not exist",
1580 attribute->name)));
1581 }
1582 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
1583 indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
1584 atttype = attform->atttypid;
1585 attcollation = attform->attcollation;
1586 ReleaseSysCache(atttuple);
1587 }
1588 else
1589 {
1590 /* Index expression */
1591 Node *expr = attribute->expr;
1592
1593 Assert(expr != NULL);
1594
1595 if (attn >= nkeycols)
1596 ereport(ERROR,
1597 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1598 errmsg("expressions are not supported in included columns")));
1599 atttype = exprType(expr);
1600 attcollation = exprCollation(expr);
1601
1602 /*
1603 * Strip any top-level COLLATE clause. This ensures that we treat
1604 * "x COLLATE y" and "(x COLLATE y)" alike.
1605 */
1606 while (IsA(expr, CollateExpr))
1607 expr = (Node *) ((CollateExpr *) expr)->arg;
1608
1609 if (IsA(expr, Var) &&
1610 ((Var *) expr)->varattno != InvalidAttrNumber)
1611 {
1612 /*
1613 * User wrote "(column)" or "(column COLLATE something)".
1614 * Treat it like simple attribute anyway.
1615 */
1616 indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
1617 }
1618 else
1619 {
1620 indexInfo->ii_IndexAttrNumbers[attn] = 0; /* marks expression */
1621 indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
1622 expr);
1623
1624 /*
1625 * transformExpr() should have already rejected subqueries,
1626 * aggregates, and window functions, based on the EXPR_KIND_
1627 * for an index expression.
1628 */
1629
1630 /*
1631 * An expression using mutable functions is probably wrong,
1632 * since if you aren't going to get the same result for the
1633 * same data every time, it's not clear what the index entries
1634 * mean at all.
1635 */
1636 if (CheckMutability((Expr *) expr))
1637 ereport(ERROR,
1638 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1639 errmsg("functions in index expression must be marked IMMUTABLE")));
1640 }
1641 }
1642
1643 typeOidP[attn] = atttype;
1644
1645 /*
1646 * Included columns have no collation, no opclass and no ordering
1647 * options.
1648 */
1649 if (attn >= nkeycols)
1650 {
1651 if (attribute->collation)
1652 ereport(ERROR,
1653 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1654 errmsg("including column does not support a collation")));
1655 if (attribute->opclass)
1656 ereport(ERROR,
1657 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1658 errmsg("including column does not support an operator class")));
1659 if (attribute->ordering != SORTBY_DEFAULT)
1660 ereport(ERROR,
1661 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1662 errmsg("including column does not support ASC/DESC options")));
1663 if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1664 ereport(ERROR,
1665 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1666 errmsg("including column does not support NULLS FIRST/LAST options")));
1667
1668 classOidP[attn] = InvalidOid;
1669 colOptionP[attn] = 0;
1670 collationOidP[attn] = InvalidOid;
1671 attn++;
1672
1673 continue;
1674 }
1675
1676 /*
1677 * Apply collation override if any
1678 */
1679 if (attribute->collation)
1680 attcollation = get_collation_oid(attribute->collation, false);
1681
1682 /*
1683 * Check we have a collation iff it's a collatable type. The only
1684 * expected failures here are (1) COLLATE applied to a noncollatable
1685 * type, or (2) index expression had an unresolved collation. But we
1686 * might as well code this to be a complete consistency check.
1687 */
1688 if (type_is_collatable(atttype))
1689 {
1690 if (!OidIsValid(attcollation))
1691 ereport(ERROR,
1692 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1693 errmsg("could not determine which collation to use for index expression"),
1694 errhint("Use the COLLATE clause to set the collation explicitly.")));
1695 }
1696 else
1697 {
1698 if (OidIsValid(attcollation))
1699 ereport(ERROR,
1700 (errcode(ERRCODE_DATATYPE_MISMATCH),
1701 errmsg("collations are not supported by type %s",
1702 format_type_be(atttype))));
1703 }
1704
1705 collationOidP[attn] = attcollation;
1706
1707 /*
1708 * Identify the opclass to use.
1709 */
1710 classOidP[attn] = ResolveOpClass(attribute->opclass,
1711 atttype,
1712 accessMethodName,
1713 accessMethodId);
1714
1715 /*
1716 * Identify the exclusion operator, if any.
1717 */
1718 if (nextExclOp)
1719 {
1720 List *opname = (List *) lfirst(nextExclOp);
1721 Oid opid;
1722 Oid opfamily;
1723 int strat;
1724
1725 /*
1726 * Find the operator --- it must accept the column datatype
1727 * without runtime coercion (but binary compatibility is OK)
1728 */
1729 opid = compatible_oper_opid(opname, atttype, atttype, false);
1730
1731 /*
1732 * Only allow commutative operators to be used in exclusion
1733 * constraints. If X conflicts with Y, but Y does not conflict
1734 * with X, bad things will happen.
1735 */
1736 if (get_commutator(opid) != opid)
1737 ereport(ERROR,
1738 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1739 errmsg("operator %s is not commutative",
1740 format_operator(opid)),
1741 errdetail("Only commutative operators can be used in exclusion constraints.")));
1742
1743 /*
1744 * Operator must be a member of the right opfamily, too
1745 */
1746 opfamily = get_opclass_family(classOidP[attn]);
1747 strat = get_op_opfamily_strategy(opid, opfamily);
1748 if (strat == 0)
1749 {
1750 HeapTuple opftuple;
1751 Form_pg_opfamily opfform;
1752
1753 /*
1754 * attribute->opclass might not explicitly name the opfamily,
1755 * so fetch the name of the selected opfamily for use in the
1756 * error message.
1757 */
1758 opftuple = SearchSysCache1(OPFAMILYOID,
1759 ObjectIdGetDatum(opfamily));
1760 if (!HeapTupleIsValid(opftuple))
1761 elog(ERROR, "cache lookup failed for opfamily %u",
1762 opfamily);
1763 opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
1764
1765 ereport(ERROR,
1766 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1767 errmsg("operator %s is not a member of operator family \"%s\"",
1768 format_operator(opid),
1769 NameStr(opfform->opfname)),
1770 errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
1771 }
1772
1773 indexInfo->ii_ExclusionOps[attn] = opid;
1774 indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
1775 indexInfo->ii_ExclusionStrats[attn] = strat;
1776 nextExclOp = lnext(nextExclOp);
1777 }
1778
1779 /*
1780 * Set up the per-column options (indoption field). For now, this is
1781 * zero for any un-ordered index, while ordered indexes have DESC and
1782 * NULLS FIRST/LAST options.
1783 */
1784 colOptionP[attn] = 0;
1785 if (amcanorder)
1786 {
1787 /* default ordering is ASC */
1788 if (attribute->ordering == SORTBY_DESC)
1789 colOptionP[attn] |= INDOPTION_DESC;
1790 /* default null ordering is LAST for ASC, FIRST for DESC */
1791 if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
1792 {
1793 if (attribute->ordering == SORTBY_DESC)
1794 colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1795 }
1796 else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
1797 colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1798 }
1799 else
1800 {
1801 /* index AM does not support ordering */
1802 if (attribute->ordering != SORTBY_DEFAULT)
1803 ereport(ERROR,
1804 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1805 errmsg("access method \"%s\" does not support ASC/DESC options",
1806 accessMethodName)));
1807 if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1808 ereport(ERROR,
1809 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1810 errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
1811 accessMethodName)));
1812 }
1813
1814 attn++;
1815 }
1816}
1817
1818/*
1819 * Resolve possibly-defaulted operator class specification
1820 *
1821 * Note: This is used to resolve operator class specification in index and
1822 * partition key definitions.
1823 */
1824Oid
1825ResolveOpClass(List *opclass, Oid attrType,
1826 const char *accessMethodName, Oid accessMethodId)
1827{
1828 char *schemaname;
1829 char *opcname;
1830 HeapTuple tuple;
1831 Form_pg_opclass opform;
1832 Oid opClassId,
1833 opInputType;
1834
1835 /*
1836 * Release 7.0 removed network_ops, timespan_ops, and datetime_ops, so we
1837 * ignore those opclass names so the default *_ops is used. This can be
1838 * removed in some later release. bjm 2000/02/07
1839 *
1840 * Release 7.1 removes lztext_ops, so suppress that too for a while. tgl
1841 * 2000/07/30
1842 *
1843 * Release 7.2 renames timestamp_ops to timestamptz_ops, so suppress that
1844 * too for awhile. I'm starting to think we need a better approach. tgl
1845 * 2000/10/01
1846 *
1847 * Release 8.0 removes bigbox_ops (which was dead code for a long while
1848 * anyway). tgl 2003/11/11
1849 */
1850 if (list_length(opclass) == 1)
1851 {
1852 char *claname = strVal(linitial(opclass));
1853
1854 if (strcmp(claname, "network_ops") == 0 ||
1855 strcmp(claname, "timespan_ops") == 0 ||
1856 strcmp(claname, "datetime_ops") == 0 ||
1857 strcmp(claname, "lztext_ops") == 0 ||
1858 strcmp(claname, "timestamp_ops") == 0 ||
1859 strcmp(claname, "bigbox_ops") == 0)
1860 opclass = NIL;
1861 }
1862
1863 if (opclass == NIL)
1864 {
1865 /* no operator class specified, so find the default */
1866 opClassId = GetDefaultOpClass(attrType, accessMethodId);
1867 if (!OidIsValid(opClassId))
1868 ereport(ERROR,
1869 (errcode(ERRCODE_UNDEFINED_OBJECT),
1870 errmsg("data type %s has no default operator class for access method \"%s\"",
1871 format_type_be(attrType), accessMethodName),
1872 errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
1873 return opClassId;
1874 }
1875
1876 /*
1877 * Specific opclass name given, so look up the opclass.
1878 */
1879
1880 /* deconstruct the name list */
1881 DeconstructQualifiedName(opclass, &schemaname, &opcname);
1882
1883 if (schemaname)
1884 {
1885 /* Look in specific schema only */
1886 Oid namespaceId;
1887
1888 namespaceId = LookupExplicitNamespace(schemaname, false);
1889 tuple = SearchSysCache3(CLAAMNAMENSP,
1890 ObjectIdGetDatum(accessMethodId),
1891 PointerGetDatum(opcname),
1892 ObjectIdGetDatum(namespaceId));
1893 }
1894 else
1895 {
1896 /* Unqualified opclass name, so search the search path */
1897 opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
1898 if (!OidIsValid(opClassId))
1899 ereport(ERROR,
1900 (errcode(ERRCODE_UNDEFINED_OBJECT),
1901 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1902 opcname, accessMethodName)));
1903 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
1904 }
1905
1906 if (!HeapTupleIsValid(tuple))
1907 ereport(ERROR,
1908 (errcode(ERRCODE_UNDEFINED_OBJECT),
1909 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1910 NameListToString(opclass), accessMethodName)));
1911
1912 /*
1913 * Verify that the index operator class accepts this datatype. Note we
1914 * will accept binary compatibility.
1915 */
1916 opform = (Form_pg_opclass) GETSTRUCT(tuple);
1917 opClassId = opform->oid;
1918 opInputType = opform->opcintype;
1919
1920 if (!IsBinaryCoercible(attrType, opInputType))
1921 ereport(ERROR,
1922 (errcode(ERRCODE_DATATYPE_MISMATCH),
1923 errmsg("operator class \"%s\" does not accept data type %s",
1924 NameListToString(opclass), format_type_be(attrType))));
1925
1926 ReleaseSysCache(tuple);
1927
1928 return opClassId;
1929}
1930
1931/*
1932 * GetDefaultOpClass
1933 *
1934 * Given the OIDs of a datatype and an access method, find the default
1935 * operator class, if any. Returns InvalidOid if there is none.
1936 */
1937Oid
1938GetDefaultOpClass(Oid type_id, Oid am_id)
1939{
1940 Oid result = InvalidOid;
1941 int nexact = 0;
1942 int ncompatible = 0;
1943 int ncompatiblepreferred = 0;
1944 Relation rel;
1945 ScanKeyData skey[1];
1946 SysScanDesc scan;
1947 HeapTuple tup;
1948 TYPCATEGORY tcategory;
1949
1950 /* If it's a domain, look at the base type instead */
1951 type_id = getBaseType(type_id);
1952
1953 tcategory = TypeCategory(type_id);
1954
1955 /*
1956 * We scan through all the opclasses available for the access method,
1957 * looking for one that is marked default and matches the target type
1958 * (either exactly or binary-compatibly, but prefer an exact match).
1959 *
1960 * We could find more than one binary-compatible match. If just one is
1961 * for a preferred type, use that one; otherwise we fail, forcing the user
1962 * to specify which one he wants. (The preferred-type special case is a
1963 * kluge for varchar: it's binary-compatible to both text and bpchar, so
1964 * we need a tiebreaker.) If we find more than one exact match, then
1965 * someone put bogus entries in pg_opclass.
1966 */
1967 rel = table_open(OperatorClassRelationId, AccessShareLock);
1968
1969 ScanKeyInit(&skey[0],
1970 Anum_pg_opclass_opcmethod,
1971 BTEqualStrategyNumber, F_OIDEQ,
1972 ObjectIdGetDatum(am_id));
1973
1974 scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
1975 NULL, 1, skey);
1976
1977 while (HeapTupleIsValid(tup = systable_getnext(scan)))
1978 {
1979 Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
1980
1981 /* ignore altogether if not a default opclass */
1982 if (!opclass->opcdefault)
1983 continue;
1984 if (opclass->opcintype == type_id)
1985 {
1986 nexact++;
1987 result = opclass->oid;
1988 }
1989 else if (nexact == 0 &&
1990 IsBinaryCoercible(type_id, opclass->opcintype))
1991 {
1992 if (IsPreferredType(tcategory, opclass->opcintype))
1993 {
1994 ncompatiblepreferred++;
1995 result = opclass->oid;
1996 }
1997 else if (ncompatiblepreferred == 0)
1998 {
1999 ncompatible++;
2000 result = opclass->oid;
2001 }
2002 }
2003 }
2004
2005 systable_endscan(scan);
2006
2007 table_close(rel, AccessShareLock);
2008
2009 /* raise error if pg_opclass contains inconsistent data */
2010 if (nexact > 1)
2011 ereport(ERROR,
2012 (errcode(ERRCODE_DUPLICATE_OBJECT),
2013 errmsg("there are multiple default operator classes for data type %s",
2014 format_type_be(type_id))));
2015
2016 if (nexact == 1 ||
2017 ncompatiblepreferred == 1 ||
2018 (ncompatiblepreferred == 0 && ncompatible == 1))
2019 return result;
2020
2021 return InvalidOid;
2022}
2023
2024/*
2025 * makeObjectName()
2026 *
2027 * Create a name for an implicitly created index, sequence, constraint,
2028 * extended statistics, etc.
2029 *
2030 * The parameters are typically: the original table name, the original field
2031 * name, and a "type" string (such as "seq" or "pkey"). The field name
2032 * and/or type can be NULL if not relevant.
2033 *
2034 * The result is a palloc'd string.
2035 *
2036 * The basic result we want is "name1_name2_label", omitting "_name2" or
2037 * "_label" when those parameters are NULL. However, we must generate
2038 * a name with less than NAMEDATALEN characters! So, we truncate one or
2039 * both names if necessary to make a short-enough string. The label part
2040 * is never truncated (so it had better be reasonably short).
2041 *
2042 * The caller is responsible for checking uniqueness of the generated
2043 * name and retrying as needed; retrying will be done by altering the
2044 * "label" string (which is why we never truncate that part).
2045 */
2046char *
2047makeObjectName(const char *name1, const char *name2, const char *label)
2048{
2049 char *name;
2050 int overhead = 0; /* chars needed for label and underscores */
2051 int availchars; /* chars available for name(s) */
2052 int name1chars; /* chars allocated to name1 */
2053 int name2chars; /* chars allocated to name2 */
2054 int ndx;
2055
2056 name1chars = strlen(name1);
2057 if (name2)
2058 {
2059 name2chars = strlen(name2);
2060 overhead++; /* allow for separating underscore */
2061 }
2062 else
2063 name2chars = 0;
2064 if (label)
2065 overhead += strlen(label) + 1;
2066
2067 availchars = NAMEDATALEN - 1 - overhead;
2068 Assert(availchars > 0); /* else caller chose a bad label */
2069
2070 /*
2071 * If we must truncate, preferentially truncate the longer name. This
2072 * logic could be expressed without a loop, but it's simple and obvious as
2073 * a loop.
2074 */
2075 while (name1chars + name2chars > availchars)
2076 {
2077 if (name1chars > name2chars)
2078 name1chars--;
2079 else
2080 name2chars--;
2081 }
2082
2083 name1chars = pg_mbcliplen(name1, name1chars, name1chars);
2084 if (name2)
2085 name2chars = pg_mbcliplen(name2, name2chars, name2chars);
2086
2087 /* Now construct the string using the chosen lengths */
2088 name = palloc(name1chars + name2chars + overhead + 1);
2089 memcpy(name, name1, name1chars);
2090 ndx = name1chars;
2091 if (name2)
2092 {
2093 name[ndx++] = '_';
2094 memcpy(name + ndx, name2, name2chars);
2095 ndx += name2chars;
2096 }
2097 if (label)
2098 {
2099 name[ndx++] = '_';
2100 strcpy(name + ndx, label);
2101 }
2102 else
2103 name[ndx] = '\0';
2104
2105 return name;
2106}
2107
2108/*
2109 * Select a nonconflicting name for a new relation. This is ordinarily
2110 * used to choose index names (which is why it's here) but it can also
2111 * be used for sequences, or any autogenerated relation kind.
2112 *
2113 * name1, name2, and label are used the same way as for makeObjectName(),
2114 * except that the label can't be NULL; digits will be appended to the label
2115 * if needed to create a name that is unique within the specified namespace.
2116 *
2117 * If isconstraint is true, we also avoid choosing a name matching any
2118 * existing constraint in the same namespace. (This is stricter than what
2119 * Postgres itself requires, but the SQL standard says that constraint names
2120 * should be unique within schemas, so we follow that for autogenerated
2121 * constraint names.)
2122 *
2123 * Note: it is theoretically possible to get a collision anyway, if someone
2124 * else chooses the same name concurrently. This is fairly unlikely to be
2125 * a problem in practice, especially if one is holding an exclusive lock on
2126 * the relation identified by name1. However, if choosing multiple names
2127 * within a single command, you'd better create the new object and do
2128 * CommandCounterIncrement before choosing the next one!
2129 *
2130 * Returns a palloc'd string.
2131 */
2132char *
2133ChooseRelationName(const char *name1, const char *name2,
2134 const char *label, Oid namespaceid,
2135 bool isconstraint)
2136{
2137 int pass = 0;
2138 char *relname = NULL;
2139 char modlabel[NAMEDATALEN];
2140
2141 /* try the unmodified label first */
2142 StrNCpy(modlabel, label, sizeof(modlabel));
2143
2144 for (;;)
2145 {
2146 relname = makeObjectName(name1, name2, modlabel);
2147
2148 if (!OidIsValid(get_relname_relid(relname, namespaceid)))
2149 {
2150 if (!isconstraint ||
2151 !ConstraintNameExists(relname, namespaceid))
2152 break;
2153 }
2154
2155 /* found a conflict, so try a new name component */
2156 pfree(relname);
2157 snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
2158 }
2159
2160 return relname;
2161}
2162
2163/*
2164 * Select the name to be used for an index.
2165 *
2166 * The argument list is pretty ad-hoc :-(
2167 */
2168static char *
2169ChooseIndexName(const char *tabname, Oid namespaceId,
2170 List *colnames, List *exclusionOpNames,
2171 bool primary, bool isconstraint)
2172{
2173 char *indexname;
2174
2175 if (primary)
2176 {
2177 /* the primary key's name does not depend on the specific column(s) */
2178 indexname = ChooseRelationName(tabname,
2179 NULL,
2180 "pkey",
2181 namespaceId,
2182 true);
2183 }
2184 else if (exclusionOpNames != NIL)
2185 {
2186 indexname = ChooseRelationName(tabname,
2187 ChooseIndexNameAddition(colnames),
2188 "excl",
2189 namespaceId,
2190 true);
2191 }
2192 else if (isconstraint)
2193 {
2194 indexname = ChooseRelationName(tabname,
2195 ChooseIndexNameAddition(colnames),
2196 "key",
2197 namespaceId,
2198 true);
2199 }
2200 else
2201 {
2202 indexname = ChooseRelationName(tabname,
2203 ChooseIndexNameAddition(colnames),
2204 "idx",
2205 namespaceId,
2206 false);
2207 }
2208
2209 return indexname;
2210}
2211
2212/*
2213 * Generate "name2" for a new index given the list of column names for it
2214 * (as produced by ChooseIndexColumnNames). This will be passed to
2215 * ChooseRelationName along with the parent table name and a suitable label.
2216 *
2217 * We know that less than NAMEDATALEN characters will actually be used,
2218 * so we can truncate the result once we've generated that many.
2219 *
2220 * XXX See also ChooseForeignKeyConstraintNameAddition and
2221 * ChooseExtendedStatisticNameAddition.
2222 */
2223static char *
2224ChooseIndexNameAddition(List *colnames)
2225{
2226 char buf[NAMEDATALEN * 2];
2227 int buflen = 0;
2228 ListCell *lc;
2229
2230 buf[0] = '\0';
2231 foreach(lc, colnames)
2232 {
2233 const char *name = (const char *) lfirst(lc);
2234
2235 if (buflen > 0)
2236 buf[buflen++] = '_'; /* insert _ between names */
2237
2238 /*
2239 * At this point we have buflen <= NAMEDATALEN. name should be less
2240 * than NAMEDATALEN already, but use strlcpy for paranoia.
2241 */
2242 strlcpy(buf + buflen, name, NAMEDATALEN);
2243 buflen += strlen(buf + buflen);
2244 if (buflen >= NAMEDATALEN)
2245 break;
2246 }
2247 return pstrdup(buf);
2248}
2249
2250/*
2251 * Select the actual names to be used for the columns of an index, given the
2252 * list of IndexElems for the columns. This is mostly about ensuring the
2253 * names are unique so we don't get a conflicting-attribute-names error.
2254 *
2255 * Returns a List of plain strings (char *, not String nodes).
2256 */
2257static List *
2258ChooseIndexColumnNames(List *indexElems)
2259{
2260 List *result = NIL;
2261 ListCell *lc;
2262
2263 foreach(lc, indexElems)
2264 {
2265 IndexElem *ielem = (IndexElem *) lfirst(lc);
2266 const char *origname;
2267 const char *curname;
2268 int i;
2269 char buf[NAMEDATALEN];
2270
2271 /* Get the preliminary name from the IndexElem */
2272 if (ielem->indexcolname)
2273 origname = ielem->indexcolname; /* caller-specified name */
2274 else if (ielem->name)
2275 origname = ielem->name; /* simple column reference */
2276 else
2277 origname = "expr"; /* default name for expression */
2278
2279 /* If it conflicts with any previous column, tweak it */
2280 curname = origname;
2281 for (i = 1;; i++)
2282 {
2283 ListCell *lc2;
2284 char nbuf[32];
2285 int nlen;
2286
2287 foreach(lc2, result)
2288 {
2289 if (strcmp(curname, (char *) lfirst(lc2)) == 0)
2290 break;
2291 }
2292 if (lc2 == NULL)
2293 break; /* found nonconflicting name */
2294
2295 sprintf(nbuf, "%d", i);
2296
2297 /* Ensure generated names are shorter than NAMEDATALEN */
2298 nlen = pg_mbcliplen(origname, strlen(origname),
2299 NAMEDATALEN - 1 - strlen(nbuf));
2300 memcpy(buf, origname, nlen);
2301 strcpy(buf + nlen, nbuf);
2302 curname = buf;
2303 }
2304
2305 /* And attach to the result list */
2306 result = lappend(result, pstrdup(curname));
2307 }
2308 return result;
2309}
2310
2311/*
2312 * ReindexIndex
2313 * Recreate a specific index.
2314 */
2315void
2316ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
2317{
2318 struct ReindexIndexCallbackState state;
2319 Oid indOid;
2320 Relation irel;
2321 char persistence;
2322
2323 /*
2324 * Find and lock index, and check permissions on table; use callback to
2325 * obtain lock on table first, to avoid deadlock hazard. The lock level
2326 * used here must match the index lock obtained in reindex_index().
2327 */
2328 state.concurrent = concurrent;
2329 state.locked_table_oid = InvalidOid;
2330 indOid = RangeVarGetRelidExtended(indexRelation,
2331 concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock,
2332 0,
2333 RangeVarCallbackForReindexIndex,
2334 &state);
2335
2336 /*
2337 * Obtain the current persistence of the existing index. We already hold
2338 * lock on the index.
2339 */
2340 irel = index_open(indOid, NoLock);
2341
2342 if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2343 {
2344 ReindexPartitionedIndex(irel);
2345 return;
2346 }
2347
2348 persistence = irel->rd_rel->relpersistence;
2349 index_close(irel, NoLock);
2350
2351 if (concurrent)
2352 ReindexRelationConcurrently(indOid, options);
2353 else
2354 reindex_index(indOid, false, persistence,
2355 options | REINDEXOPT_REPORT_PROGRESS);
2356}
2357
2358/*
2359 * Check permissions on table before acquiring relation lock; also lock
2360 * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
2361 * deadlocks.
2362 */
2363static void
2364RangeVarCallbackForReindexIndex(const RangeVar *relation,
2365 Oid relId, Oid oldRelId, void *arg)
2366{
2367 char relkind;
2368 struct ReindexIndexCallbackState *state = arg;
2369 LOCKMODE table_lockmode;
2370
2371 /*
2372 * Lock level here should match table lock in reindex_index() for
2373 * non-concurrent case and table locks used by index_concurrently_*() for
2374 * concurrent case.
2375 */
2376 table_lockmode = state->concurrent ? ShareUpdateExclusiveLock : ShareLock;
2377
2378 /*
2379 * If we previously locked some other index's heap, and the name we're
2380 * looking up no longer refers to that relation, release the now-useless
2381 * lock.
2382 */
2383 if (relId != oldRelId && OidIsValid(oldRelId))
2384 {
2385 UnlockRelationOid(state->locked_table_oid, table_lockmode);
2386 state->locked_table_oid = InvalidOid;
2387 }
2388
2389 /* If the relation does not exist, there's nothing more to do. */
2390 if (!OidIsValid(relId))
2391 return;
2392
2393 /*
2394 * If the relation does exist, check whether it's an index. But note that
2395 * the relation might have been dropped between the time we did the name
2396 * lookup and now. In that case, there's nothing to do.
2397 */
2398 relkind = get_rel_relkind(relId);
2399 if (!relkind)
2400 return;
2401 if (relkind != RELKIND_INDEX &&
2402 relkind != RELKIND_PARTITIONED_INDEX)
2403 ereport(ERROR,
2404 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2405 errmsg("\"%s\" is not an index", relation->relname)));
2406
2407 /* Check permissions */
2408 if (!pg_class_ownercheck(relId, GetUserId()))
2409 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
2410
2411 /* Lock heap before index to avoid deadlock. */
2412 if (relId != oldRelId)
2413 {
2414 Oid table_oid = IndexGetRelation(relId, true);
2415
2416 /*
2417 * If the OID isn't valid, it means the index was concurrently
2418 * dropped, which is not a problem for us; just return normally.
2419 */
2420 if (OidIsValid(table_oid))
2421 {
2422 LockRelationOid(table_oid, table_lockmode);
2423 state->locked_table_oid = table_oid;
2424 }
2425 }
2426}
2427
2428/*
2429 * ReindexTable
2430 * Recreate all indexes of a table (and of its toast table, if any)
2431 */
2432Oid
2433ReindexTable(RangeVar *relation, int options, bool concurrent)
2434{
2435 Oid heapOid;
2436 bool result;
2437
2438 /* The lock level used here should match reindex_relation(). */
2439 heapOid = RangeVarGetRelidExtended(relation,
2440 concurrent ? ShareUpdateExclusiveLock : ShareLock,
2441 0,
2442 RangeVarCallbackOwnsTable, NULL);
2443
2444 if (concurrent)
2445 {
2446 result = ReindexRelationConcurrently(heapOid, options);
2447
2448 if (!result)
2449 ereport(NOTICE,
2450 (errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
2451 relation->relname)));
2452 }
2453 else
2454 {
2455 result = reindex_relation(heapOid,
2456 REINDEX_REL_PROCESS_TOAST |
2457 REINDEX_REL_CHECK_CONSTRAINTS,
2458 options | REINDEXOPT_REPORT_PROGRESS);
2459 if (!result)
2460 ereport(NOTICE,
2461 (errmsg("table \"%s\" has no indexes to reindex",
2462 relation->relname)));
2463 }
2464
2465 return heapOid;
2466}
2467
2468/*
2469 * ReindexMultipleTables
2470 * Recreate indexes of tables selected by objectName/objectKind.
2471 *
2472 * To reduce the probability of deadlocks, each table is reindexed in a
2473 * separate transaction, so we can release the lock on it right away.
2474 * That means this must not be called within a user transaction block!
2475 */
2476void
2477ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
2478 int options, bool concurrent)
2479{
2480 Oid objectOid;
2481 Relation relationRelation;
2482 TableScanDesc scan;
2483 ScanKeyData scan_keys[1];
2484 HeapTuple tuple;
2485 MemoryContext private_context;
2486 MemoryContext old;
2487 List *relids = NIL;
2488 ListCell *l;
2489 int num_keys;
2490 bool concurrent_warning = false;
2491
2492 AssertArg(objectName);
2493 Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
2494 objectKind == REINDEX_OBJECT_SYSTEM ||
2495 objectKind == REINDEX_OBJECT_DATABASE);
2496
2497 if (objectKind == REINDEX_OBJECT_SYSTEM && concurrent)
2498 ereport(ERROR,
2499 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2500 errmsg("cannot reindex system catalogs concurrently")));
2501
2502 /*
2503 * Get OID of object to reindex, being the database currently being used
2504 * by session for a database or for system catalogs, or the schema defined
2505 * by caller. At the same time do permission checks that need different
2506 * processing depending on the object type.
2507 */
2508 if (objectKind == REINDEX_OBJECT_SCHEMA)
2509 {
2510 objectOid = get_namespace_oid(objectName, false);
2511
2512 if (!pg_namespace_ownercheck(objectOid, GetUserId()))
2513 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
2514 objectName);
2515 }
2516 else
2517 {
2518 objectOid = MyDatabaseId;
2519
2520 if (strcmp(objectName, get_database_name(objectOid)) != 0)
2521 ereport(ERROR,
2522 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2523 errmsg("can only reindex the currently open database")));
2524 if (!pg_database_ownercheck(objectOid, GetUserId()))
2525 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
2526 objectName);
2527 }
2528
2529 /*
2530 * Create a memory context that will survive forced transaction commits we
2531 * do below. Since it is a child of PortalContext, it will go away
2532 * eventually even if we suffer an error; there's no need for special
2533 * abort cleanup logic.
2534 */
2535 private_context = AllocSetContextCreate(PortalContext,
2536 "ReindexMultipleTables",
2537 ALLOCSET_SMALL_SIZES);
2538
2539 /*
2540 * Define the search keys to find the objects to reindex. For a schema, we
2541 * select target relations using relnamespace, something not necessary for
2542 * a database-wide operation.
2543 */
2544 if (objectKind == REINDEX_OBJECT_SCHEMA)
2545 {
2546 num_keys = 1;
2547 ScanKeyInit(&scan_keys[0],
2548 Anum_pg_class_relnamespace,
2549 BTEqualStrategyNumber, F_OIDEQ,
2550 ObjectIdGetDatum(objectOid));
2551 }
2552 else
2553 num_keys = 0;
2554
2555 /*
2556 * Scan pg_class to build a list of the relations we need to reindex.
2557 *
2558 * We only consider plain relations and materialized views here (toast
2559 * rels will be processed indirectly by reindex_relation).
2560 */
2561 relationRelation = table_open(RelationRelationId, AccessShareLock);
2562 scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
2563 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2564 {
2565 Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
2566 Oid relid = classtuple->oid;
2567
2568 /*
2569 * Only regular tables and matviews can have indexes, so ignore any
2570 * other kind of relation.
2571 *
2572 * It is tempting to also consider partitioned tables here, but that
2573 * has the problem that if the children are in the same schema, they
2574 * would be processed twice. Maybe we could have a separate list of
2575 * partitioned tables, and expand that afterwards into relids,
2576 * ignoring any duplicates.
2577 */
2578 if (classtuple->relkind != RELKIND_RELATION &&
2579 classtuple->relkind != RELKIND_MATVIEW)
2580 continue;
2581
2582 /* Skip temp tables of other backends; we can't reindex them at all */
2583 if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
2584 !isTempNamespace(classtuple->relnamespace))
2585 continue;
2586
2587 /* Check user/system classification, and optionally skip */
2588 if (objectKind == REINDEX_OBJECT_SYSTEM &&
2589 !IsSystemClass(relid, classtuple))
2590 continue;
2591
2592 /*
2593 * The table can be reindexed if the user is superuser, the table
2594 * owner, or the database/schema owner (but in the latter case, only
2595 * if it's not a shared relation). pg_class_ownercheck includes the
2596 * superuser case, and depending on objectKind we already know that
2597 * the user has permission to run REINDEX on this database or schema
2598 * per the permission checks at the beginning of this routine.
2599 */
2600 if (classtuple->relisshared &&
2601 !pg_class_ownercheck(relid, GetUserId()))
2602 continue;
2603
2604 /*
2605 * Skip system tables, since index_create() would reject indexing them
2606 * concurrently (and it would likely fail if we tried).
2607 */
2608 if (concurrent &&
2609 IsCatalogRelationOid(relid))
2610 {
2611 if (!concurrent_warning)
2612 ereport(WARNING,
2613 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2614 errmsg("cannot reindex system catalogs concurrently, skipping all")));
2615 concurrent_warning = true;
2616 continue;
2617 }
2618
2619 /* Save the list of relation OIDs in private context */
2620 old = MemoryContextSwitchTo(private_context);
2621
2622 /*
2623 * We always want to reindex pg_class first if it's selected to be
2624 * reindexed. This ensures that if there is any corruption in
2625 * pg_class' indexes, they will be fixed before we process any other
2626 * tables. This is critical because reindexing itself will try to
2627 * update pg_class.
2628 */
2629 if (relid == RelationRelationId)
2630 relids = lcons_oid(relid, relids);
2631 else
2632 relids = lappend_oid(relids, relid);
2633
2634 MemoryContextSwitchTo(old);
2635 }
2636 table_endscan(scan);
2637 table_close(relationRelation, AccessShareLock);
2638
2639 /* Now reindex each rel in a separate transaction */
2640 PopActiveSnapshot();
2641 CommitTransactionCommand();
2642 foreach(l, relids)
2643 {
2644 Oid relid = lfirst_oid(l);
2645
2646 StartTransactionCommand();
2647 /* functions in indexes may want a snapshot set */
2648 PushActiveSnapshot(GetTransactionSnapshot());
2649
2650 if (concurrent)
2651 {
2652 (void) ReindexRelationConcurrently(relid, options);
2653 /* ReindexRelationConcurrently() does the verbose output */
2654 }
2655 else
2656 {
2657 bool result;
2658
2659 result = reindex_relation(relid,
2660 REINDEX_REL_PROCESS_TOAST |
2661 REINDEX_REL_CHECK_CONSTRAINTS,
2662 options | REINDEXOPT_REPORT_PROGRESS);
2663
2664 if (result && (options & REINDEXOPT_VERBOSE))
2665 ereport(INFO,
2666 (errmsg("table \"%s.%s\" was reindexed",
2667 get_namespace_name(get_rel_namespace(relid)),
2668 get_rel_name(relid))));
2669
2670 PopActiveSnapshot();
2671 }
2672
2673 CommitTransactionCommand();
2674 }
2675 StartTransactionCommand();
2676
2677 MemoryContextDelete(private_context);
2678}
2679
2680
2681/*
2682 * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
2683 * relation OID
2684 *
2685 * 'relationOid' can either belong to an index, a table or a materialized
2686 * view. For tables and materialized views, all its indexes will be rebuilt,
2687 * excluding invalid indexes and any indexes used in exclusion constraints,
2688 * but including its associated toast table indexes. For indexes, the index
2689 * itself will be rebuilt. If 'relationOid' belongs to a partitioned table
2690 * then we issue a warning to mention these are not yet supported.
2691 *
2692 * The locks taken on parent tables and involved indexes are kept until the
2693 * transaction is committed, at which point a session lock is taken on each
2694 * relation. Both of these protect against concurrent schema changes.
2695 *
2696 * Returns true if any indexes have been rebuilt (including toast table's
2697 * indexes, when relevant), otherwise returns false.
2698 */
2699static bool
2700ReindexRelationConcurrently(Oid relationOid, int options)
2701{
2702 List *heapRelationIds = NIL;
2703 List *indexIds = NIL;
2704 List *newIndexIds = NIL;
2705 List *relationLocks = NIL;
2706 List *lockTags = NIL;
2707 ListCell *lc,
2708 *lc2;
2709 MemoryContext private_context;
2710 MemoryContext oldcontext;
2711 char relkind;
2712 char *relationName = NULL;
2713 char *relationNamespace = NULL;
2714 PGRUsage ru0;
2715
2716 /*
2717 * Create a memory context that will survive forced transaction commits we
2718 * do below. Since it is a child of PortalContext, it will go away
2719 * eventually even if we suffer an error; there's no need for special
2720 * abort cleanup logic.
2721 */
2722 private_context = AllocSetContextCreate(PortalContext,
2723 "ReindexConcurrent",
2724 ALLOCSET_SMALL_SIZES);
2725
2726 if (options & REINDEXOPT_VERBOSE)
2727 {
2728 /* Save data needed by REINDEX VERBOSE in private context */
2729 oldcontext = MemoryContextSwitchTo(private_context);
2730
2731 relationName = get_rel_name(relationOid);
2732 relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
2733
2734 pg_rusage_init(&ru0);
2735
2736 MemoryContextSwitchTo(oldcontext);
2737 }
2738
2739 relkind = get_rel_relkind(relationOid);
2740
2741 /*
2742 * Extract the list of indexes that are going to be rebuilt based on the
2743 * list of relation Oids given by caller.
2744 */
2745 switch (relkind)
2746 {
2747 case RELKIND_RELATION:
2748 case RELKIND_MATVIEW:
2749 case RELKIND_TOASTVALUE:
2750 {
2751 /*
2752 * In the case of a relation, find all its indexes including
2753 * toast indexes.
2754 */
2755 Relation heapRelation;
2756
2757 /* Save the list of relation OIDs in private context */
2758 oldcontext = MemoryContextSwitchTo(private_context);
2759
2760 /* Track this relation for session locks */
2761 heapRelationIds = lappend_oid(heapRelationIds, relationOid);
2762
2763 MemoryContextSwitchTo(oldcontext);
2764
2765 if (IsCatalogRelationOid(relationOid))
2766 ereport(ERROR,
2767 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2768 errmsg("cannot reindex system catalogs concurrently")));
2769
2770 /* Open relation to get its indexes */
2771 heapRelation = table_open(relationOid, ShareUpdateExclusiveLock);
2772
2773 /* Add all the valid indexes of relation to list */
2774 foreach(lc, RelationGetIndexList(heapRelation))
2775 {
2776 Oid cellOid = lfirst_oid(lc);
2777 Relation indexRelation = index_open(cellOid,
2778 ShareUpdateExclusiveLock);
2779
2780 if (!indexRelation->rd_index->indisvalid)
2781 ereport(WARNING,
2782 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2783 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
2784 get_namespace_name(get_rel_namespace(cellOid)),
2785 get_rel_name(cellOid))));
2786 else if (indexRelation->rd_index->indisexclusion)
2787 ereport(WARNING,
2788 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2789 errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
2790 get_namespace_name(get_rel_namespace(cellOid)),
2791 get_rel_name(cellOid))));
2792 else
2793 {
2794 /* Save the list of relation OIDs in private context */
2795 oldcontext = MemoryContextSwitchTo(private_context);
2796
2797 indexIds = lappend_oid(indexIds, cellOid);
2798
2799 MemoryContextSwitchTo(oldcontext);
2800 }
2801
2802 index_close(indexRelation, NoLock);
2803 }
2804
2805 /* Also add the toast indexes */
2806 if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
2807 {
2808 Oid toastOid = heapRelation->rd_rel->reltoastrelid;
2809 Relation toastRelation = table_open(toastOid,
2810 ShareUpdateExclusiveLock);
2811
2812 /* Save the list of relation OIDs in private context */
2813 oldcontext = MemoryContextSwitchTo(private_context);
2814
2815 /* Track this relation for session locks */
2816 heapRelationIds = lappend_oid(heapRelationIds, toastOid);
2817
2818 MemoryContextSwitchTo(oldcontext);
2819
2820 foreach(lc2, RelationGetIndexList(toastRelation))
2821 {
2822 Oid cellOid = lfirst_oid(lc2);
2823 Relation indexRelation = index_open(cellOid,
2824 ShareUpdateExclusiveLock);
2825
2826 if (!indexRelation->rd_index->indisvalid)
2827 ereport(WARNING,
2828 (errcode(ERRCODE_INDEX_CORRUPTED),
2829 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
2830 get_namespace_name(get_rel_namespace(cellOid)),
2831 get_rel_name(cellOid))));
2832 else
2833 {
2834 /*
2835 * Save the list of relation OIDs in private
2836 * context
2837 */
2838 oldcontext = MemoryContextSwitchTo(private_context);
2839
2840 indexIds = lappend_oid(indexIds, cellOid);
2841
2842 MemoryContextSwitchTo(oldcontext);
2843 }
2844
2845 index_close(indexRelation, NoLock);
2846 }
2847
2848 table_close(toastRelation, NoLock);
2849 }
2850
2851 table_close(heapRelation, NoLock);
2852 break;
2853 }
2854 case RELKIND_INDEX:
2855 {
2856 Oid heapId = IndexGetRelation(relationOid, false);
2857
2858 if (IsCatalogRelationOid(heapId))
2859 ereport(ERROR,
2860 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2861 errmsg("cannot reindex system catalogs concurrently")));
2862
2863 /* Save the list of relation OIDs in private context */
2864 oldcontext = MemoryContextSwitchTo(private_context);
2865
2866 /* Track the heap relation of this index for session locks */
2867 heapRelationIds = list_make1_oid(heapId);
2868
2869 /*
2870 * Save the list of relation OIDs in private context. Note
2871 * that invalid indexes are allowed here.
2872 */
2873 indexIds = lappend_oid(indexIds, relationOid);
2874
2875 MemoryContextSwitchTo(oldcontext);
2876 break;
2877 }
2878 case RELKIND_PARTITIONED_TABLE:
2879 /* see reindex_relation() */
2880 ereport(WARNING,
2881 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2882 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
2883 get_rel_name(relationOid))));
2884 return false;
2885 default:
2886 /* Return error if type of relation is not supported */
2887 ereport(ERROR,
2888 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2889 errmsg("cannot reindex this type of relation concurrently")));
2890 break;
2891 }
2892
2893 /* Definitely no indexes, so leave */
2894 if (indexIds == NIL)
2895 {
2896 PopActiveSnapshot();
2897 return false;
2898 }
2899
2900 Assert(heapRelationIds != NIL);
2901
2902 /*-----
2903 * Now we have all the indexes we want to process in indexIds.
2904 *
2905 * The phases now are:
2906 *
2907 * 1. create new indexes in the catalog
2908 * 2. build new indexes
2909 * 3. let new indexes catch up with tuples inserted in the meantime
2910 * 4. swap index names
2911 * 5. mark old indexes as dead
2912 * 6. drop old indexes
2913 *
2914 * We process each phase for all indexes before moving to the next phase,
2915 * for efficiency.
2916 */
2917
2918 /*
2919 * Phase 1 of REINDEX CONCURRENTLY
2920 *
2921 * Create a new index with the same properties as the old one, but it is
2922 * only registered in catalogs and will be built later. Then get session
2923 * locks on all involved tables. See analogous code in DefineIndex() for
2924 * more detailed comments.
2925 */
2926
2927 foreach(lc, indexIds)
2928 {
2929 char *concurrentName;
2930 Oid indexId = lfirst_oid(lc);
2931 Oid newIndexId;
2932 Relation indexRel;
2933 Relation heapRel;
2934 Relation newIndexRel;
2935 LockRelId *lockrelid;
2936
2937 indexRel = index_open(indexId, ShareUpdateExclusiveLock);
2938 heapRel = table_open(indexRel->rd_index->indrelid,
2939 ShareUpdateExclusiveLock);
2940
2941 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
2942 RelationGetRelid(heapRel));
2943 pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
2944 PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY);
2945 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
2946 indexId);
2947 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
2948 indexRel->rd_rel->relam);
2949
2950 /* Choose a temporary relation name for the new index */
2951 concurrentName = ChooseRelationName(get_rel_name(indexId),
2952 NULL,
2953 "ccnew",
2954 get_rel_namespace(indexRel->rd_index->indrelid),
2955 false);
2956
2957 /* Create new index definition based on given index */
2958 newIndexId = index_concurrently_create_copy(heapRel,
2959 indexId,
2960 concurrentName);
2961
2962 /* Now open the relation of the new index, a lock is also needed on it */
2963 newIndexRel = index_open(indexId, ShareUpdateExclusiveLock);
2964
2965 /*
2966 * Save the list of OIDs and locks in private context
2967 */
2968 oldcontext = MemoryContextSwitchTo(private_context);
2969
2970 newIndexIds = lappend_oid(newIndexIds, newIndexId);
2971
2972 /*
2973 * Save lockrelid to protect each relation from drop then close
2974 * relations. The lockrelid on parent relation is not taken here to
2975 * avoid multiple locks taken on the same relation, instead we rely on
2976 * parentRelationIds built earlier.
2977 */
2978 lockrelid = palloc(sizeof(*lockrelid));
2979 *lockrelid = indexRel->rd_lockInfo.lockRelId;
2980 relationLocks = lappend(relationLocks, lockrelid);
2981 lockrelid = palloc(sizeof(*lockrelid));
2982 *lockrelid = newIndexRel->rd_lockInfo.lockRelId;
2983 relationLocks = lappend(relationLocks, lockrelid);
2984
2985 MemoryContextSwitchTo(oldcontext);
2986
2987 index_close(indexRel, NoLock);
2988 index_close(newIndexRel, NoLock);
2989 table_close(heapRel, NoLock);
2990 }
2991
2992 /*
2993 * Save the heap lock for following visibility checks with other backends
2994 * might conflict with this session.
2995 */
2996 foreach(lc, heapRelationIds)
2997 {
2998 Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
2999 LockRelId *lockrelid;
3000 LOCKTAG *heaplocktag;
3001
3002 /* Save the list of locks in private context */
3003 oldcontext = MemoryContextSwitchTo(private_context);
3004
3005 /* Add lockrelid of heap relation to the list of locked relations */
3006 lockrelid = palloc(sizeof(*lockrelid));
3007 *lockrelid = heapRelation->rd_lockInfo.lockRelId;
3008 relationLocks = lappend(relationLocks, lockrelid);
3009
3010 heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
3011
3012 /* Save the LOCKTAG for this parent relation for the wait phase */
3013 SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
3014 lockTags = lappend(lockTags, heaplocktag);
3015
3016 MemoryContextSwitchTo(oldcontext);
3017
3018 /* Close heap relation */
3019 table_close(heapRelation, NoLock);
3020 }
3021
3022 /* Get a session-level lock on each table. */
3023 foreach(lc, relationLocks)
3024 {
3025 LockRelId *lockrelid = (LockRelId *) lfirst(lc);
3026
3027 LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3028 }
3029
3030 PopActiveSnapshot();
3031 CommitTransactionCommand();
3032 StartTransactionCommand();
3033
3034 /*
3035 * Phase 2 of REINDEX CONCURRENTLY
3036 *
3037 * Build the new indexes in a separate transaction for each index to avoid
3038 * having open transactions for an unnecessary long time. But before
3039 * doing that, wait until no running transactions could have the table of
3040 * the index open with the old list of indexes. See "phase 2" in
3041 * DefineIndex() for more details.
3042 */
3043
3044 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3045 PROGRESS_CREATEIDX_PHASE_WAIT_1);
3046 WaitForLockersMultiple(lockTags, ShareLock, true);
3047 CommitTransactionCommand();
3048
3049 forboth(lc, indexIds, lc2, newIndexIds)
3050 {
3051 Relation indexRel;
3052 Oid oldIndexId = lfirst_oid(lc);
3053 Oid newIndexId = lfirst_oid(lc2);
3054 Oid heapId;
3055
3056 CHECK_FOR_INTERRUPTS();
3057
3058 /* Start new transaction for this index's concurrent build */
3059 StartTransactionCommand();
3060
3061 /* Set ActiveSnapshot since functions in the indexes may need it */
3062 PushActiveSnapshot(GetTransactionSnapshot());
3063
3064 /*
3065 * Index relation has been closed by previous commit, so reopen it to
3066 * get its information.
3067 */
3068 indexRel = index_open(oldIndexId, ShareUpdateExclusiveLock);
3069 heapId = indexRel->rd_index->indrelid;
3070 index_close(indexRel, NoLock);
3071
3072 /* Perform concurrent build of new index */
3073 index_concurrently_build(heapId, newIndexId);
3074
3075 PopActiveSnapshot();
3076 CommitTransactionCommand();
3077 }
3078 StartTransactionCommand();
3079
3080 /*
3081 * Phase 3 of REINDEX CONCURRENTLY
3082 *
3083 * During this phase the old indexes catch up with any new tuples that
3084 * were created during the previous phase. See "phase 3" in DefineIndex()
3085 * for more details.
3086 */
3087
3088 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3089 PROGRESS_CREATEIDX_PHASE_WAIT_2);
3090 WaitForLockersMultiple(lockTags, ShareLock, true);
3091 CommitTransactionCommand();
3092
3093 foreach(lc, newIndexIds)
3094 {
3095 Oid newIndexId = lfirst_oid(lc);
3096 Oid heapId;
3097 TransactionId limitXmin;
3098 Snapshot snapshot;
3099
3100 CHECK_FOR_INTERRUPTS();
3101
3102 StartTransactionCommand();
3103
3104 heapId = IndexGetRelation(newIndexId, false);
3105
3106 /*
3107 * Take the "reference snapshot" that will be used by validate_index()
3108 * to filter candidate tuples.
3109 */
3110 snapshot = RegisterSnapshot(GetTransactionSnapshot());
3111 PushActiveSnapshot(snapshot);
3112
3113 validate_index(heapId, newIndexId, snapshot);
3114
3115 /*
3116 * We can now do away with our active snapshot, we still need to save
3117 * the xmin limit to wait for older snapshots.
3118 */
3119 limitXmin = snapshot->xmin;
3120
3121 PopActiveSnapshot();
3122 UnregisterSnapshot(snapshot);
3123
3124 /*
3125 * To ensure no deadlocks, we must commit and start yet another
3126 * transaction, and do our wait before any snapshot has been taken in
3127 * it.
3128 */
3129 CommitTransactionCommand();
3130 StartTransactionCommand();
3131
3132 /*
3133 * The index is now valid in the sense that it contains all currently
3134 * interesting tuples. But since it might not contain tuples deleted
3135 * just before the reference snap was taken, we have to wait out any
3136 * transactions that might have older snapshots.
3137 */
3138 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3139 PROGRESS_CREATEIDX_PHASE_WAIT_3);
3140 WaitForOlderSnapshots(limitXmin, true);
3141
3142 CommitTransactionCommand();
3143 }
3144
3145 /*
3146 * Phase 4 of REINDEX CONCURRENTLY
3147 *
3148 * Now that the new indexes have been validated, swap each new index with
3149 * its corresponding old index.
3150 *
3151 * We mark the new indexes as valid and the old indexes as not valid at
3152 * the same time to make sure we only get constraint violations from the
3153 * indexes with the correct names.
3154 */
3155
3156 StartTransactionCommand();
3157
3158 forboth(lc, indexIds, lc2, newIndexIds)
3159 {
3160 char *oldName;
3161 Oid oldIndexId = lfirst_oid(lc);
3162 Oid newIndexId = lfirst_oid(lc2);
3163 Oid heapId;
3164
3165 CHECK_FOR_INTERRUPTS();
3166
3167 heapId = IndexGetRelation(oldIndexId, false);
3168
3169 /* Choose a relation name for old index */
3170 oldName = ChooseRelationName(get_rel_name(oldIndexId),
3171 NULL,
3172 "ccold",
3173 get_rel_namespace(heapId),
3174 false);
3175
3176 /*
3177 * Swap old index with the new one. This also marks the new one as
3178 * valid and the old one as not valid.
3179 */
3180 index_concurrently_swap(newIndexId, oldIndexId, oldName);
3181
3182 /*
3183 * Invalidate the relcache for the table, so that after this commit
3184 * all sessions will refresh any cached plans that might reference the
3185 * index.
3186 */
3187 CacheInvalidateRelcacheByRelid(heapId);
3188
3189 /*
3190 * CCI here so that subsequent iterations see the oldName in the
3191 * catalog and can choose a nonconflicting name for their oldName.
3192 * Otherwise, this could lead to conflicts if a table has two indexes
3193 * whose names are equal for the first NAMEDATALEN-minus-a-few
3194 * characters.
3195 */
3196 CommandCounterIncrement();
3197 }
3198
3199 /* Commit this transaction and make index swaps visible */
3200 CommitTransactionCommand();
3201 StartTransactionCommand();
3202
3203 /*
3204 * Phase 5 of REINDEX CONCURRENTLY
3205 *
3206 * Mark the old indexes as dead. First we must wait until no running
3207 * transaction could be using the index for a query. See also
3208 * index_drop() for more details.
3209 */
3210
3211 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3212 PROGRESS_CREATEIDX_PHASE_WAIT_4);
3213 WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3214
3215 foreach(lc, indexIds)
3216 {
3217 Oid oldIndexId = lfirst_oid(lc);
3218 Oid heapId;
3219
3220 CHECK_FOR_INTERRUPTS();
3221 heapId = IndexGetRelation(oldIndexId, false);
3222 index_concurrently_set_dead(heapId, oldIndexId);
3223 }
3224
3225 /* Commit this transaction to make the updates visible. */
3226 CommitTransactionCommand();
3227 StartTransactionCommand();
3228
3229 /*
3230 * Phase 6 of REINDEX CONCURRENTLY
3231 *
3232 * Drop the old indexes.
3233 */
3234
3235 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3236 PROGRESS_CREATEIDX_PHASE_WAIT_4);
3237 WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3238
3239 PushActiveSnapshot(GetTransactionSnapshot());
3240
3241 {
3242 ObjectAddresses *objects = new_object_addresses();
3243
3244 foreach(lc, indexIds)
3245 {
3246 Oid oldIndexId = lfirst_oid(lc);
3247 ObjectAddress object;
3248
3249 object.classId = RelationRelationId;
3250 object.objectId = oldIndexId;
3251 object.objectSubId = 0;
3252
3253 add_exact_object_address(&object, objects);
3254 }
3255
3256 /*
3257 * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
3258 * right lock level.
3259 */
3260 performMultipleDeletions(objects, DROP_RESTRICT,
3261 PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
3262 }
3263
3264 PopActiveSnapshot();
3265 CommitTransactionCommand();
3266
3267 /*
3268 * Finally, release the session-level lock on the table.
3269 */
3270 foreach(lc, relationLocks)
3271 {
3272 LockRelId *lockrelid = (LockRelId *) lfirst(lc);
3273
3274 UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3275 }
3276
3277 /* Start a new transaction to finish process properly */
3278 StartTransactionCommand();
3279
3280 /* Log what we did */
3281 if (options & REINDEXOPT_VERBOSE)
3282 {
3283 if (relkind == RELKIND_INDEX)
3284 ereport(INFO,
3285 (errmsg("index \"%s.%s\" was reindexed",
3286 relationNamespace, relationName),
3287 errdetail("%s.",
3288 pg_rusage_show(&ru0))));
3289 else
3290 {
3291 foreach(lc, newIndexIds)
3292 {
3293 Oid indOid = lfirst_oid(lc);
3294
3295 ereport(INFO,
3296 (errmsg("index \"%s.%s\" was reindexed",
3297 get_namespace_name(get_rel_namespace(indOid)),
3298 get_rel_name(indOid))));
3299 /* Don't show rusage here, since it's not per index. */
3300 }
3301
3302 ereport(INFO,
3303 (errmsg("table \"%s.%s\" was reindexed",
3304 relationNamespace, relationName),
3305 errdetail("%s.",
3306 pg_rusage_show(&ru0))));
3307 }
3308 }
3309
3310 MemoryContextDelete(private_context);
3311
3312 pgstat_progress_end_command();
3313
3314 return true;
3315}
3316
3317/*
3318 * ReindexPartitionedIndex
3319 * Reindex each child of the given partitioned index.
3320 *
3321 * Not yet implemented.
3322 */
3323static void
3324ReindexPartitionedIndex(Relation parentIdx)
3325{
3326 ereport(ERROR,
3327 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3328 errmsg("REINDEX is not yet implemented for partitioned indexes")));
3329}
3330
3331/*
3332 * Insert or delete an appropriate pg_inherits tuple to make the given index
3333 * be a partition of the indicated parent index.
3334 *
3335 * This also corrects the pg_depend information for the affected index.
3336 */
3337void
3338IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
3339{
3340 Relation pg_inherits;
3341 ScanKeyData key[2];
3342 SysScanDesc scan;
3343 Oid partRelid = RelationGetRelid(partitionIdx);
3344 HeapTuple tuple;
3345 bool fix_dependencies;
3346
3347 /* Make sure this is an index */
3348 Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
3349 partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
3350
3351 /*
3352 * Scan pg_inherits for rows linking our index to some parent.
3353 */
3354 pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
3355 ScanKeyInit(&key[0],
3356 Anum_pg_inherits_inhrelid,
3357 BTEqualStrategyNumber, F_OIDEQ,
3358 ObjectIdGetDatum(partRelid));
3359 ScanKeyInit(&key[1],
3360 Anum_pg_inherits_inhseqno,
3361 BTEqualStrategyNumber, F_INT4EQ,
3362 Int32GetDatum(1));
3363 scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
3364 NULL, 2, key);
3365 tuple = systable_getnext(scan);
3366
3367 if (!HeapTupleIsValid(tuple))
3368 {
3369 if (parentOid == InvalidOid)
3370 {
3371 /*
3372 * No pg_inherits row, and no parent wanted: nothing to do in this
3373 * case.
3374 */
3375 fix_dependencies = false;
3376 }
3377 else
3378 {
3379 Datum values[Natts_pg_inherits];
3380 bool isnull[Natts_pg_inherits];
3381
3382 /*
3383 * No pg_inherits row exists, and we want a parent for this index,
3384 * so insert it.
3385 */
3386 values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(partRelid);
3387 values[Anum_pg_inherits_inhparent - 1] =
3388 ObjectIdGetDatum(parentOid);
3389 values[Anum_pg_inherits_inhseqno - 1] = Int32GetDatum(1);
3390 memset(isnull, false, sizeof(isnull));
3391
3392 tuple = heap_form_tuple(RelationGetDescr(pg_inherits),
3393 values, isnull);
3394 CatalogTupleInsert(pg_inherits, tuple);
3395
3396 fix_dependencies = true;
3397 }
3398 }
3399 else
3400 {
3401 Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
3402
3403 if (parentOid == InvalidOid)
3404 {
3405 /*
3406 * There exists a pg_inherits row, which we want to clear; do so.
3407 */
3408 CatalogTupleDelete(pg_inherits, &tuple->t_self);
3409 fix_dependencies = true;
3410 }
3411 else
3412 {
3413 /*
3414 * A pg_inherits row exists. If it's the same we want, then we're
3415 * good; if it differs, that amounts to a corrupt catalog and
3416 * should not happen.
3417 */
3418 if (inhForm->inhparent != parentOid)
3419 {
3420 /* unexpected: we should not get called in this case */
3421 elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
3422 inhForm->inhrelid, inhForm->inhparent);
3423 }
3424
3425 /* already in the right state */
3426 fix_dependencies = false;
3427 }
3428 }
3429
3430 /* done with pg_inherits */
3431 systable_endscan(scan);
3432 relation_close(pg_inherits, RowExclusiveLock);
3433
3434 /* set relhassubclass if an index partition has been added to the parent */
3435 if (OidIsValid(parentOid))
3436 SetRelationHasSubclass(parentOid, true);
3437
3438 /* set relispartition correctly on the partition */
3439 update_relispartition(partRelid, OidIsValid(parentOid));
3440
3441 if (fix_dependencies)
3442 {
3443 /*
3444 * Insert/delete pg_depend rows. If setting a parent, add PARTITION
3445 * dependencies on the parent index and the table; if removing a
3446 * parent, delete PARTITION dependencies.
3447 */
3448 if (OidIsValid(parentOid))
3449 {
3450 ObjectAddress partIdx;
3451 ObjectAddress parentIdx;
3452 ObjectAddress partitionTbl;
3453
3454 ObjectAddressSet(partIdx, RelationRelationId, partRelid);
3455 ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
3456 ObjectAddressSet(partitionTbl, RelationRelationId,
3457 partitionIdx->rd_index->indrelid);
3458 recordDependencyOn(&partIdx, &parentIdx,
3459 DEPENDENCY_PARTITION_PRI);
3460 recordDependencyOn(&partIdx, &partitionTbl,
3461 DEPENDENCY_PARTITION_SEC);
3462 }
3463 else
3464 {
3465 deleteDependencyRecordsForClass(RelationRelationId, partRelid,
3466 RelationRelationId,
3467 DEPENDENCY_PARTITION_PRI);
3468 deleteDependencyRecordsForClass(RelationRelationId, partRelid,
3469 RelationRelationId,
3470 DEPENDENCY_PARTITION_SEC);
3471 }
3472
3473 /* make our updates visible */
3474 CommandCounterIncrement();
3475 }
3476}
3477
3478/*
3479 * Subroutine of IndexSetParentIndex to update the relispartition flag of the
3480 * given index to the given value.
3481 */
3482static void
3483update_relispartition(Oid relationId, bool newval)
3484{
3485 HeapTuple tup;
3486 Relation classRel;
3487
3488 classRel = table_open(RelationRelationId, RowExclusiveLock);
3489 tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
3490 if (!HeapTupleIsValid(tup))
3491 elog(ERROR, "cache lookup failed for relation %u", relationId);
3492 Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
3493 ((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
3494 CatalogTupleUpdate(classRel, &tup->t_self, tup);
3495 heap_freetuple(tup);
3496 table_close(classRel, RowExclusiveLock);
3497}
3498