1/*-------------------------------------------------------------------------
2 *
3 * relcache.c
4 * POSTGRES relation descriptor cache code
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/cache/relcache.c
12 *
13 *-------------------------------------------------------------------------
14 */
15/*
16 * INTERFACE ROUTINES
17 * RelationCacheInitialize - initialize relcache (to empty)
18 * RelationCacheInitializePhase2 - initialize shared-catalog entries
19 * RelationCacheInitializePhase3 - finish initializing relcache
20 * RelationIdGetRelation - get a reldesc by relation id
21 * RelationClose - close an open relation
22 *
23 * NOTES
24 * The following code contains many undocumented hacks. Please be
25 * careful....
26 */
27#include "postgres.h"
28
29#include <sys/file.h>
30#include <fcntl.h>
31#include <unistd.h>
32
33#include "access/htup_details.h"
34#include "access/multixact.h"
35#include "access/nbtree.h"
36#include "access/reloptions.h"
37#include "access/sysattr.h"
38#include "access/table.h"
39#include "access/tableam.h"
40#include "access/tupdesc_details.h"
41#include "access/xact.h"
42#include "access/xlog.h"
43#include "catalog/catalog.h"
44#include "catalog/indexing.h"
45#include "catalog/namespace.h"
46#include "catalog/partition.h"
47#include "catalog/pg_am.h"
48#include "catalog/pg_amproc.h"
49#include "catalog/pg_attrdef.h"
50#include "catalog/pg_authid.h"
51#include "catalog/pg_auth_members.h"
52#include "catalog/pg_constraint.h"
53#include "catalog/pg_database.h"
54#include "catalog/pg_namespace.h"
55#include "catalog/pg_opclass.h"
56#include "catalog/pg_partitioned_table.h"
57#include "catalog/pg_proc.h"
58#include "catalog/pg_publication.h"
59#include "catalog/pg_rewrite.h"
60#include "catalog/pg_shseclabel.h"
61#include "catalog/pg_statistic_ext.h"
62#include "catalog/pg_subscription.h"
63#include "catalog/pg_tablespace.h"
64#include "catalog/pg_trigger.h"
65#include "catalog/pg_type.h"
66#include "catalog/schemapg.h"
67#include "catalog/storage.h"
68#include "commands/policy.h"
69#include "commands/trigger.h"
70#include "miscadmin.h"
71#include "nodes/makefuncs.h"
72#include "nodes/nodeFuncs.h"
73#include "optimizer/optimizer.h"
74#include "partitioning/partbounds.h"
75#include "partitioning/partdesc.h"
76#include "rewrite/rewriteDefine.h"
77#include "rewrite/rowsecurity.h"
78#include "storage/lmgr.h"
79#include "storage/smgr.h"
80#include "utils/array.h"
81#include "utils/builtins.h"
82#include "utils/datum.h"
83#include "utils/fmgroids.h"
84#include "utils/inval.h"
85#include "utils/lsyscache.h"
86#include "utils/memutils.h"
87#include "utils/partcache.h"
88#include "utils/relmapper.h"
89#include "utils/resowner_private.h"
90#include "utils/snapmgr.h"
91#include "utils/syscache.h"
92
93
94#define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
95
96/*
97 * Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY:
98 * do so in clobber-cache builds but not otherwise. This choice can be
99 * overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0.
100 */
101#ifndef RECOVER_RELATION_BUILD_MEMORY
102#if defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY)
103#define RECOVER_RELATION_BUILD_MEMORY 1
104#else
105#define RECOVER_RELATION_BUILD_MEMORY 0
106#endif
107#endif
108
109/*
110 * hardcoded tuple descriptors, contents generated by genbki.pl
111 */
112static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
113static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
114static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
115static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
116static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
117static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
118static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
119static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
120static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
121static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
122
123/*
124 * Hash tables that index the relation cache
125 *
126 * We used to index the cache by both name and OID, but now there
127 * is only an index by OID.
128 */
129typedef struct relidcacheent
130{
131 Oid reloid;
132 Relation reldesc;
133} RelIdCacheEnt;
134
135static HTAB *RelationIdCache;
136
137/*
138 * This flag is false until we have prepared the critical relcache entries
139 * that are needed to do indexscans on the tables read by relcache building.
140 */
141bool criticalRelcachesBuilt = false;
142
143/*
144 * This flag is false until we have prepared the critical relcache entries
145 * for shared catalogs (which are the tables needed for login).
146 */
147bool criticalSharedRelcachesBuilt = false;
148
149/*
150 * This counter counts relcache inval events received since backend startup
151 * (but only for rels that are actually in cache). Presently, we use it only
152 * to detect whether data about to be written by write_relcache_init_file()
153 * might already be obsolete.
154 */
155static long relcacheInvalsReceived = 0L;
156
157/*
158 * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
159 * cleanup work. This list intentionally has limited size; if it overflows,
160 * we fall back to scanning the whole hashtable. There is no value in a very
161 * large list because (1) at some point, a hash_seq_search scan is faster than
162 * retail lookups, and (2) the value of this is to reduce EOXact work for
163 * short transactions, which can't have dirtied all that many tables anyway.
164 * EOXactListAdd() does not bother to prevent duplicate list entries, so the
165 * cleanup processing must be idempotent.
166 */
167#define MAX_EOXACT_LIST 32
168static Oid eoxact_list[MAX_EOXACT_LIST];
169static int eoxact_list_len = 0;
170static bool eoxact_list_overflowed = false;
171
172#define EOXactListAdd(rel) \
173 do { \
174 if (eoxact_list_len < MAX_EOXACT_LIST) \
175 eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
176 else \
177 eoxact_list_overflowed = true; \
178 } while (0)
179
180/*
181 * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
182 * cleanup work. The array expands as needed; there is no hashtable because
183 * we don't need to access individual items except at EOXact.
184 */
185static TupleDesc *EOXactTupleDescArray;
186static int NextEOXactTupleDescNum = 0;
187static int EOXactTupleDescArrayLen = 0;
188
189/*
190 * macros to manipulate the lookup hashtable
191 */
192#define RelationCacheInsert(RELATION, replace_allowed) \
193do { \
194 RelIdCacheEnt *hentry; bool found; \
195 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
196 (void *) &((RELATION)->rd_id), \
197 HASH_ENTER, &found); \
198 if (found) \
199 { \
200 /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
201 Relation _old_rel = hentry->reldesc; \
202 Assert(replace_allowed); \
203 hentry->reldesc = (RELATION); \
204 if (RelationHasReferenceCountZero(_old_rel)) \
205 RelationDestroyRelation(_old_rel, false); \
206 else if (!IsBootstrapProcessingMode()) \
207 elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
208 RelationGetRelationName(_old_rel)); \
209 } \
210 else \
211 hentry->reldesc = (RELATION); \
212} while(0)
213
214#define RelationIdCacheLookup(ID, RELATION) \
215do { \
216 RelIdCacheEnt *hentry; \
217 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
218 (void *) &(ID), \
219 HASH_FIND, NULL); \
220 if (hentry) \
221 RELATION = hentry->reldesc; \
222 else \
223 RELATION = NULL; \
224} while(0)
225
226#define RelationCacheDelete(RELATION) \
227do { \
228 RelIdCacheEnt *hentry; \
229 hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
230 (void *) &((RELATION)->rd_id), \
231 HASH_REMOVE, NULL); \
232 if (hentry == NULL) \
233 elog(WARNING, "failed to delete relcache entry for OID %u", \
234 (RELATION)->rd_id); \
235} while(0)
236
237
238/*
239 * Special cache for opclass-related information
240 *
241 * Note: only default support procs get cached, ie, those with
242 * lefttype = righttype = opcintype.
243 */
244typedef struct opclasscacheent
245{
246 Oid opclassoid; /* lookup key: OID of opclass */
247 bool valid; /* set true after successful fill-in */
248 StrategyNumber numSupport; /* max # of support procs (from pg_am) */
249 Oid opcfamily; /* OID of opclass's family */
250 Oid opcintype; /* OID of opclass's declared input type */
251 RegProcedure *supportProcs; /* OIDs of support procedures */
252} OpClassCacheEnt;
253
254static HTAB *OpClassCache = NULL;
255
256
257/* non-export function prototypes */
258
259static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
260static void RelationClearRelation(Relation relation, bool rebuild);
261
262static void RelationReloadIndexInfo(Relation relation);
263static void RelationReloadNailed(Relation relation);
264static void RelationFlushRelation(Relation relation);
265static void RememberToFreeTupleDescAtEOX(TupleDesc td);
266static void AtEOXact_cleanup(Relation relation, bool isCommit);
267static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
268 SubTransactionId mySubid, SubTransactionId parentSubid);
269static bool load_relcache_init_file(bool shared);
270static void write_relcache_init_file(bool shared);
271static void write_item(const void *data, Size len, FILE *fp);
272
273static void formrdesc(const char *relationName, Oid relationReltype,
274 bool isshared, int natts, const FormData_pg_attribute *attrs);
275
276static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
277static Relation AllocateRelationDesc(Form_pg_class relp);
278static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
279static void RelationBuildTupleDesc(Relation relation);
280static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
281static void RelationInitPhysicalAddr(Relation relation);
282static void load_critical_index(Oid indexoid, Oid heapoid);
283static TupleDesc GetPgClassDescriptor(void);
284static TupleDesc GetPgIndexDescriptor(void);
285static void AttrDefaultFetch(Relation relation);
286static void CheckConstraintFetch(Relation relation);
287static int CheckConstraintCmp(const void *a, const void *b);
288static List *insert_ordered_oid(List *list, Oid datum);
289static void InitIndexAmRoutine(Relation relation);
290static void IndexSupportInitialize(oidvector *indclass,
291 RegProcedure *indexSupport,
292 Oid *opFamily,
293 Oid *opcInType,
294 StrategyNumber maxSupportNumber,
295 AttrNumber maxAttributeNumber);
296static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
297 StrategyNumber numSupport);
298static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
299static void unlink_initfile(const char *initfilename, int elevel);
300
301
302/*
303 * ScanPgRelation
304 *
305 * This is used by RelationBuildDesc to find a pg_class
306 * tuple matching targetRelId. The caller must hold at least
307 * AccessShareLock on the target relid to prevent concurrent-update
308 * scenarios; it isn't guaranteed that all scans used to build the
309 * relcache entry will use the same snapshot. If, for example,
310 * an attribute were to be added after scanning pg_class and before
311 * scanning pg_attribute, relnatts wouldn't match.
312 *
313 * NB: the returned tuple has been copied into palloc'd storage
314 * and must eventually be freed with heap_freetuple.
315 */
316static HeapTuple
317ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
318{
319 HeapTuple pg_class_tuple;
320 Relation pg_class_desc;
321 SysScanDesc pg_class_scan;
322 ScanKeyData key[1];
323 Snapshot snapshot;
324
325 /*
326 * If something goes wrong during backend startup, we might find ourselves
327 * trying to read pg_class before we've selected a database. That ain't
328 * gonna work, so bail out with a useful error message. If this happens,
329 * it probably means a relcache entry that needs to be nailed isn't.
330 */
331 if (!OidIsValid(MyDatabaseId))
332 elog(FATAL, "cannot read pg_class without having selected a database");
333
334 /*
335 * form a scan key
336 */
337 ScanKeyInit(&key[0],
338 Anum_pg_class_oid,
339 BTEqualStrategyNumber, F_OIDEQ,
340 ObjectIdGetDatum(targetRelId));
341
342 /*
343 * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
344 * built the critical relcache entries (this includes initdb and startup
345 * without a pg_internal.init file). The caller can also force a heap
346 * scan by setting indexOK == false.
347 */
348 pg_class_desc = table_open(RelationRelationId, AccessShareLock);
349
350 /*
351 * The caller might need a tuple that's newer than the one the historic
352 * snapshot; currently the only case requiring to do so is looking up the
353 * relfilenode of non mapped system relations during decoding.
354 */
355 if (force_non_historic)
356 snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
357 else
358 snapshot = GetCatalogSnapshot(RelationRelationId);
359
360 pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
361 indexOK && criticalRelcachesBuilt,
362 snapshot,
363 1, key);
364
365 pg_class_tuple = systable_getnext(pg_class_scan);
366
367 /*
368 * Must copy tuple before releasing buffer.
369 */
370 if (HeapTupleIsValid(pg_class_tuple))
371 pg_class_tuple = heap_copytuple(pg_class_tuple);
372
373 /* all done */
374 systable_endscan(pg_class_scan);
375 table_close(pg_class_desc, AccessShareLock);
376
377 return pg_class_tuple;
378}
379
380/*
381 * AllocateRelationDesc
382 *
383 * This is used to allocate memory for a new relation descriptor
384 * and initialize the rd_rel field from the given pg_class tuple.
385 */
386static Relation
387AllocateRelationDesc(Form_pg_class relp)
388{
389 Relation relation;
390 MemoryContext oldcxt;
391 Form_pg_class relationForm;
392
393 /* Relcache entries must live in CacheMemoryContext */
394 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
395
396 /*
397 * allocate and zero space for new relation descriptor
398 */
399 relation = (Relation) palloc0(sizeof(RelationData));
400
401 /* make sure relation is marked as having no open file yet */
402 relation->rd_smgr = NULL;
403
404 /*
405 * Copy the relation tuple form
406 *
407 * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
408 * variable-length fields (relacl, reloptions) are NOT stored in the
409 * relcache --- there'd be little point in it, since we don't copy the
410 * tuple's nulls bitmap and hence wouldn't know if the values are valid.
411 * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
412 * it from the syscache if you need it. The same goes for the original
413 * form of reloptions (however, we do store the parsed form of reloptions
414 * in rd_options).
415 */
416 relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
417
418 memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
419
420 /* initialize relation tuple form */
421 relation->rd_rel = relationForm;
422
423 /* and allocate attribute tuple form storage */
424 relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
425 /* which we mark as a reference-counted tupdesc */
426 relation->rd_att->tdrefcount = 1;
427
428 MemoryContextSwitchTo(oldcxt);
429
430 return relation;
431}
432
433/*
434 * RelationParseRelOptions
435 * Convert pg_class.reloptions into pre-parsed rd_options
436 *
437 * tuple is the real pg_class tuple (not rd_rel!) for relation
438 *
439 * Note: rd_rel and (if an index) rd_indam must be valid already
440 */
441static void
442RelationParseRelOptions(Relation relation, HeapTuple tuple)
443{
444 bytea *options;
445 amoptions_function amoptsfn;
446
447 relation->rd_options = NULL;
448
449 /*
450 * Look up any AM-specific parse function; fall out if relkind should not
451 * have options.
452 */
453 switch (relation->rd_rel->relkind)
454 {
455 case RELKIND_RELATION:
456 case RELKIND_TOASTVALUE:
457 case RELKIND_VIEW:
458 case RELKIND_MATVIEW:
459 case RELKIND_PARTITIONED_TABLE:
460 amoptsfn = NULL;
461 break;
462 case RELKIND_INDEX:
463 case RELKIND_PARTITIONED_INDEX:
464 amoptsfn = relation->rd_indam->amoptions;
465 break;
466 default:
467 return;
468 }
469
470 /*
471 * Fetch reloptions from tuple; have to use a hardwired descriptor because
472 * we might not have any other for pg_class yet (consider executing this
473 * code for pg_class itself)
474 */
475 options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
476
477 /*
478 * Copy parsed data into CacheMemoryContext. To guard against the
479 * possibility of leaks in the reloptions code, we want to do the actual
480 * parsing in the caller's memory context and copy the results into
481 * CacheMemoryContext after the fact.
482 */
483 if (options)
484 {
485 relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
486 VARSIZE(options));
487 memcpy(relation->rd_options, options, VARSIZE(options));
488 pfree(options);
489 }
490}
491
492/*
493 * RelationBuildTupleDesc
494 *
495 * Form the relation's tuple descriptor from information in
496 * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
497 */
498static void
499RelationBuildTupleDesc(Relation relation)
500{
501 HeapTuple pg_attribute_tuple;
502 Relation pg_attribute_desc;
503 SysScanDesc pg_attribute_scan;
504 ScanKeyData skey[2];
505 int need;
506 TupleConstr *constr;
507 AttrDefault *attrdef = NULL;
508 AttrMissing *attrmiss = NULL;
509 int ndef = 0;
510
511 /* copy some fields from pg_class row to rd_att */
512 relation->rd_att->tdtypeid = relation->rd_rel->reltype;
513 relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
514
515 constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
516 sizeof(TupleConstr));
517 constr->has_not_null = false;
518 constr->has_generated_stored = false;
519
520 /*
521 * Form a scan key that selects only user attributes (attnum > 0).
522 * (Eliminating system attribute rows at the index level is lots faster
523 * than fetching them.)
524 */
525 ScanKeyInit(&skey[0],
526 Anum_pg_attribute_attrelid,
527 BTEqualStrategyNumber, F_OIDEQ,
528 ObjectIdGetDatum(RelationGetRelid(relation)));
529 ScanKeyInit(&skey[1],
530 Anum_pg_attribute_attnum,
531 BTGreaterStrategyNumber, F_INT2GT,
532 Int16GetDatum(0));
533
534 /*
535 * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
536 * built the critical relcache entries (this includes initdb and startup
537 * without a pg_internal.init file).
538 */
539 pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
540 pg_attribute_scan = systable_beginscan(pg_attribute_desc,
541 AttributeRelidNumIndexId,
542 criticalRelcachesBuilt,
543 NULL,
544 2, skey);
545
546 /*
547 * add attribute data to relation->rd_att
548 */
549 need = RelationGetNumberOfAttributes(relation);
550
551 while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
552 {
553 Form_pg_attribute attp;
554 int attnum;
555
556 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
557
558 attnum = attp->attnum;
559 if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
560 elog(ERROR, "invalid attribute number %d for %s",
561 attp->attnum, RelationGetRelationName(relation));
562
563
564 memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
565 attp,
566 ATTRIBUTE_FIXED_PART_SIZE);
567
568 /* Update constraint/default info */
569 if (attp->attnotnull)
570 constr->has_not_null = true;
571 if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
572 constr->has_generated_stored = true;
573
574 /* If the column has a default, fill it into the attrdef array */
575 if (attp->atthasdef)
576 {
577 if (attrdef == NULL)
578 attrdef = (AttrDefault *)
579 MemoryContextAllocZero(CacheMemoryContext,
580 RelationGetNumberOfAttributes(relation) *
581 sizeof(AttrDefault));
582 attrdef[ndef].adnum = attnum;
583 attrdef[ndef].adbin = NULL;
584
585 ndef++;
586 }
587
588 /* Likewise for a missing value */
589 if (attp->atthasmissing)
590 {
591 Datum missingval;
592 bool missingNull;
593
594 /* Do we have a missing value? */
595 missingval = heap_getattr(pg_attribute_tuple,
596 Anum_pg_attribute_attmissingval,
597 pg_attribute_desc->rd_att,
598 &missingNull);
599 if (!missingNull)
600 {
601 /* Yes, fetch from the array */
602 MemoryContext oldcxt;
603 bool is_null;
604 int one = 1;
605 Datum missval;
606
607 if (attrmiss == NULL)
608 attrmiss = (AttrMissing *)
609 MemoryContextAllocZero(CacheMemoryContext,
610 relation->rd_rel->relnatts *
611 sizeof(AttrMissing));
612
613 missval = array_get_element(missingval,
614 1,
615 &one,
616 -1,
617 attp->attlen,
618 attp->attbyval,
619 attp->attalign,
620 &is_null);
621 Assert(!is_null);
622 if (attp->attbyval)
623 {
624 /* for copy by val just copy the datum direct */
625 attrmiss[attnum - 1].am_value = missval;
626 }
627 else
628 {
629 /* otherwise copy in the correct context */
630 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
631 attrmiss[attnum - 1].am_value = datumCopy(missval,
632 attp->attbyval,
633 attp->attlen);
634 MemoryContextSwitchTo(oldcxt);
635 }
636 attrmiss[attnum - 1].am_present = true;
637 }
638 }
639 need--;
640 if (need == 0)
641 break;
642 }
643
644 /*
645 * end the scan and close the attribute relation
646 */
647 systable_endscan(pg_attribute_scan);
648 table_close(pg_attribute_desc, AccessShareLock);
649
650 if (need != 0)
651 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
652 need, RelationGetRelid(relation));
653
654 /*
655 * The attcacheoff values we read from pg_attribute should all be -1
656 * ("unknown"). Verify this if assert checking is on. They will be
657 * computed when and if needed during tuple access.
658 */
659#ifdef USE_ASSERT_CHECKING
660 {
661 int i;
662
663 for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
664 Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
665 }
666#endif
667
668 /*
669 * However, we can easily set the attcacheoff value for the first
670 * attribute: it must be zero. This eliminates the need for special cases
671 * for attnum=1 that used to exist in fastgetattr() and index_getattr().
672 */
673 if (RelationGetNumberOfAttributes(relation) > 0)
674 TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
675
676 /*
677 * Set up constraint/default info
678 */
679 if (constr->has_not_null || ndef > 0 ||
680 attrmiss || relation->rd_rel->relchecks)
681 {
682 relation->rd_att->constr = constr;
683
684 if (ndef > 0) /* DEFAULTs */
685 {
686 if (ndef < RelationGetNumberOfAttributes(relation))
687 constr->defval = (AttrDefault *)
688 repalloc(attrdef, ndef * sizeof(AttrDefault));
689 else
690 constr->defval = attrdef;
691 constr->num_defval = ndef;
692 AttrDefaultFetch(relation);
693 }
694 else
695 constr->num_defval = 0;
696
697 constr->missing = attrmiss;
698
699 if (relation->rd_rel->relchecks > 0) /* CHECKs */
700 {
701 constr->num_check = relation->rd_rel->relchecks;
702 constr->check = (ConstrCheck *)
703 MemoryContextAllocZero(CacheMemoryContext,
704 constr->num_check * sizeof(ConstrCheck));
705 CheckConstraintFetch(relation);
706 }
707 else
708 constr->num_check = 0;
709 }
710 else
711 {
712 pfree(constr);
713 relation->rd_att->constr = NULL;
714 }
715}
716
717/*
718 * RelationBuildRuleLock
719 *
720 * Form the relation's rewrite rules from information in
721 * the pg_rewrite system catalog.
722 *
723 * Note: The rule parsetrees are potentially very complex node structures.
724 * To allow these trees to be freed when the relcache entry is flushed,
725 * we make a private memory context to hold the RuleLock information for
726 * each relcache entry that has associated rules. The context is used
727 * just for rule info, not for any other subsidiary data of the relcache
728 * entry, because that keeps the update logic in RelationClearRelation()
729 * manageable. The other subsidiary data structures are simple enough
730 * to be easy to free explicitly, anyway.
731 */
732static void
733RelationBuildRuleLock(Relation relation)
734{
735 MemoryContext rulescxt;
736 MemoryContext oldcxt;
737 HeapTuple rewrite_tuple;
738 Relation rewrite_desc;
739 TupleDesc rewrite_tupdesc;
740 SysScanDesc rewrite_scan;
741 ScanKeyData key;
742 RuleLock *rulelock;
743 int numlocks;
744 RewriteRule **rules;
745 int maxlocks;
746
747 /*
748 * Make the private context. Assume it'll not contain much data.
749 */
750 rulescxt = AllocSetContextCreate(CacheMemoryContext,
751 "relation rules",
752 ALLOCSET_SMALL_SIZES);
753 relation->rd_rulescxt = rulescxt;
754 MemoryContextCopyAndSetIdentifier(rulescxt,
755 RelationGetRelationName(relation));
756
757 /*
758 * allocate an array to hold the rewrite rules (the array is extended if
759 * necessary)
760 */
761 maxlocks = 4;
762 rules = (RewriteRule **)
763 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
764 numlocks = 0;
765
766 /*
767 * form a scan key
768 */
769 ScanKeyInit(&key,
770 Anum_pg_rewrite_ev_class,
771 BTEqualStrategyNumber, F_OIDEQ,
772 ObjectIdGetDatum(RelationGetRelid(relation)));
773
774 /*
775 * open pg_rewrite and begin a scan
776 *
777 * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
778 * be reading the rules in name order, except possibly during
779 * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
780 * ensures that rules will be fired in name order.
781 */
782 rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
783 rewrite_tupdesc = RelationGetDescr(rewrite_desc);
784 rewrite_scan = systable_beginscan(rewrite_desc,
785 RewriteRelRulenameIndexId,
786 true, NULL,
787 1, &key);
788
789 while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
790 {
791 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
792 bool isnull;
793 Datum rule_datum;
794 char *rule_str;
795 RewriteRule *rule;
796
797 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
798 sizeof(RewriteRule));
799
800 rule->ruleId = rewrite_form->oid;
801
802 rule->event = rewrite_form->ev_type - '0';
803 rule->enabled = rewrite_form->ev_enabled;
804 rule->isInstead = rewrite_form->is_instead;
805
806 /*
807 * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
808 * rule strings are often large enough to be toasted. To avoid
809 * leaking memory in the caller's context, do the detoasting here so
810 * we can free the detoasted version.
811 */
812 rule_datum = heap_getattr(rewrite_tuple,
813 Anum_pg_rewrite_ev_action,
814 rewrite_tupdesc,
815 &isnull);
816 Assert(!isnull);
817 rule_str = TextDatumGetCString(rule_datum);
818 oldcxt = MemoryContextSwitchTo(rulescxt);
819 rule->actions = (List *) stringToNode(rule_str);
820 MemoryContextSwitchTo(oldcxt);
821 pfree(rule_str);
822
823 rule_datum = heap_getattr(rewrite_tuple,
824 Anum_pg_rewrite_ev_qual,
825 rewrite_tupdesc,
826 &isnull);
827 Assert(!isnull);
828 rule_str = TextDatumGetCString(rule_datum);
829 oldcxt = MemoryContextSwitchTo(rulescxt);
830 rule->qual = (Node *) stringToNode(rule_str);
831 MemoryContextSwitchTo(oldcxt);
832 pfree(rule_str);
833
834 /*
835 * We want the rule's table references to be checked as though by the
836 * table owner, not the user referencing the rule. Therefore, scan
837 * through the rule's actions and set the checkAsUser field on all
838 * rtable entries. We have to look at the qual as well, in case it
839 * contains sublinks.
840 *
841 * The reason for doing this when the rule is loaded, rather than when
842 * it is stored, is that otherwise ALTER TABLE OWNER would have to
843 * grovel through stored rules to update checkAsUser fields. Scanning
844 * the rule tree during load is relatively cheap (compared to
845 * constructing it in the first place), so we do it here.
846 */
847 setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
848 setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
849
850 if (numlocks >= maxlocks)
851 {
852 maxlocks *= 2;
853 rules = (RewriteRule **)
854 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
855 }
856 rules[numlocks++] = rule;
857 }
858
859 /*
860 * end the scan and close the attribute relation
861 */
862 systable_endscan(rewrite_scan);
863 table_close(rewrite_desc, AccessShareLock);
864
865 /*
866 * there might not be any rules (if relhasrules is out-of-date)
867 */
868 if (numlocks == 0)
869 {
870 relation->rd_rules = NULL;
871 relation->rd_rulescxt = NULL;
872 MemoryContextDelete(rulescxt);
873 return;
874 }
875
876 /*
877 * form a RuleLock and insert into relation
878 */
879 rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
880 rulelock->numLocks = numlocks;
881 rulelock->rules = rules;
882
883 relation->rd_rules = rulelock;
884}
885
886/*
887 * equalRuleLocks
888 *
889 * Determine whether two RuleLocks are equivalent
890 *
891 * Probably this should be in the rules code someplace...
892 */
893static bool
894equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
895{
896 int i;
897
898 /*
899 * As of 7.3 we assume the rule ordering is repeatable, because
900 * RelationBuildRuleLock should read 'em in a consistent order. So just
901 * compare corresponding slots.
902 */
903 if (rlock1 != NULL)
904 {
905 if (rlock2 == NULL)
906 return false;
907 if (rlock1->numLocks != rlock2->numLocks)
908 return false;
909 for (i = 0; i < rlock1->numLocks; i++)
910 {
911 RewriteRule *rule1 = rlock1->rules[i];
912 RewriteRule *rule2 = rlock2->rules[i];
913
914 if (rule1->ruleId != rule2->ruleId)
915 return false;
916 if (rule1->event != rule2->event)
917 return false;
918 if (rule1->enabled != rule2->enabled)
919 return false;
920 if (rule1->isInstead != rule2->isInstead)
921 return false;
922 if (!equal(rule1->qual, rule2->qual))
923 return false;
924 if (!equal(rule1->actions, rule2->actions))
925 return false;
926 }
927 }
928 else if (rlock2 != NULL)
929 return false;
930 return true;
931}
932
933/*
934 * equalPolicy
935 *
936 * Determine whether two policies are equivalent
937 */
938static bool
939equalPolicy(RowSecurityPolicy *policy1, RowSecurityPolicy *policy2)
940{
941 int i;
942 Oid *r1,
943 *r2;
944
945 if (policy1 != NULL)
946 {
947 if (policy2 == NULL)
948 return false;
949
950 if (policy1->polcmd != policy2->polcmd)
951 return false;
952 if (policy1->hassublinks != policy2->hassublinks)
953 return false;
954 if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
955 return false;
956 if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
957 return false;
958
959 r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
960 r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
961
962 for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
963 {
964 if (r1[i] != r2[i])
965 return false;
966 }
967
968 if (!equal(policy1->qual, policy2->qual))
969 return false;
970 if (!equal(policy1->with_check_qual, policy2->with_check_qual))
971 return false;
972 }
973 else if (policy2 != NULL)
974 return false;
975
976 return true;
977}
978
979/*
980 * equalRSDesc
981 *
982 * Determine whether two RowSecurityDesc's are equivalent
983 */
984static bool
985equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
986{
987 ListCell *lc,
988 *rc;
989
990 if (rsdesc1 == NULL && rsdesc2 == NULL)
991 return true;
992
993 if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
994 (rsdesc1 == NULL && rsdesc2 != NULL))
995 return false;
996
997 if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
998 return false;
999
1000 /* RelationBuildRowSecurity should build policies in order */
1001 forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1002 {
1003 RowSecurityPolicy *l = (RowSecurityPolicy *) lfirst(lc);
1004 RowSecurityPolicy *r = (RowSecurityPolicy *) lfirst(rc);
1005
1006 if (!equalPolicy(l, r))
1007 return false;
1008 }
1009
1010 return true;
1011}
1012
1013/*
1014 * RelationBuildDesc
1015 *
1016 * Build a relation descriptor. The caller must hold at least
1017 * AccessShareLock on the target relid.
1018 *
1019 * The new descriptor is inserted into the hash table if insertIt is true.
1020 *
1021 * Returns NULL if no pg_class row could be found for the given relid
1022 * (suggesting we are trying to access a just-deleted relation).
1023 * Any other error is reported via elog.
1024 */
1025static Relation
1026RelationBuildDesc(Oid targetRelId, bool insertIt)
1027{
1028 Relation relation;
1029 Oid relid;
1030 HeapTuple pg_class_tuple;
1031 Form_pg_class relp;
1032
1033 /*
1034 * This function and its subroutines can allocate a good deal of transient
1035 * data in CurrentMemoryContext. Traditionally we've just leaked that
1036 * data, reasoning that the caller's context is at worst of transaction
1037 * scope, and relcache loads shouldn't happen so often that it's essential
1038 * to recover transient data before end of statement/transaction. However
1039 * that's definitely not true in clobber-cache test builds, and perhaps
1040 * it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not
1041 * zero, arrange to allocate the junk in a temporary context that we'll
1042 * free before returning. Make it a child of caller's context so that it
1043 * will get cleaned up appropriately if we error out partway through.
1044 */
1045#if RECOVER_RELATION_BUILD_MEMORY
1046 MemoryContext tmpcxt;
1047 MemoryContext oldcxt;
1048
1049 tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
1050 "RelationBuildDesc workspace",
1051 ALLOCSET_DEFAULT_SIZES);
1052 oldcxt = MemoryContextSwitchTo(tmpcxt);
1053#endif
1054
1055 /*
1056 * find the tuple in pg_class corresponding to the given relation id
1057 */
1058 pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1059
1060 /*
1061 * if no such tuple exists, return NULL
1062 */
1063 if (!HeapTupleIsValid(pg_class_tuple))
1064 {
1065#if RECOVER_RELATION_BUILD_MEMORY
1066 /* Return to caller's context, and blow away the temporary context */
1067 MemoryContextSwitchTo(oldcxt);
1068 MemoryContextDelete(tmpcxt);
1069#endif
1070 return NULL;
1071 }
1072
1073 /*
1074 * get information from the pg_class_tuple
1075 */
1076 relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1077 relid = relp->oid;
1078 Assert(relid == targetRelId);
1079
1080 /*
1081 * allocate storage for the relation descriptor, and copy pg_class_tuple
1082 * to relation->rd_rel.
1083 */
1084 relation = AllocateRelationDesc(relp);
1085
1086 /*
1087 * initialize the relation's relation id (relation->rd_id)
1088 */
1089 RelationGetRelid(relation) = relid;
1090
1091 /*
1092 * normal relations are not nailed into the cache; nor can a pre-existing
1093 * relation be new. It could be temp though. (Actually, it could be new
1094 * too, but it's okay to forget that fact if forced to flush the entry.)
1095 */
1096 relation->rd_refcnt = 0;
1097 relation->rd_isnailed = false;
1098 relation->rd_createSubid = InvalidSubTransactionId;
1099 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1100 switch (relation->rd_rel->relpersistence)
1101 {
1102 case RELPERSISTENCE_UNLOGGED:
1103 case RELPERSISTENCE_PERMANENT:
1104 relation->rd_backend = InvalidBackendId;
1105 relation->rd_islocaltemp = false;
1106 break;
1107 case RELPERSISTENCE_TEMP:
1108 if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1109 {
1110 relation->rd_backend = BackendIdForTempRelations();
1111 relation->rd_islocaltemp = true;
1112 }
1113 else
1114 {
1115 /*
1116 * If it's a temp table, but not one of ours, we have to use
1117 * the slow, grotty method to figure out the owning backend.
1118 *
1119 * Note: it's possible that rd_backend gets set to MyBackendId
1120 * here, in case we are looking at a pg_class entry left over
1121 * from a crashed backend that coincidentally had the same
1122 * BackendId we're using. We should *not* consider such a
1123 * table to be "ours"; this is why we need the separate
1124 * rd_islocaltemp flag. The pg_class entry will get flushed
1125 * if/when we clean out the corresponding temp table namespace
1126 * in preparation for using it.
1127 */
1128 relation->rd_backend =
1129 GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1130 Assert(relation->rd_backend != InvalidBackendId);
1131 relation->rd_islocaltemp = false;
1132 }
1133 break;
1134 default:
1135 elog(ERROR, "invalid relpersistence: %c",
1136 relation->rd_rel->relpersistence);
1137 break;
1138 }
1139
1140 /*
1141 * initialize the tuple descriptor (relation->rd_att).
1142 */
1143 RelationBuildTupleDesc(relation);
1144
1145 /*
1146 * Fetch rules and triggers that affect this relation
1147 */
1148 if (relation->rd_rel->relhasrules)
1149 RelationBuildRuleLock(relation);
1150 else
1151 {
1152 relation->rd_rules = NULL;
1153 relation->rd_rulescxt = NULL;
1154 }
1155
1156 if (relation->rd_rel->relhastriggers)
1157 RelationBuildTriggers(relation);
1158 else
1159 relation->trigdesc = NULL;
1160
1161 if (relation->rd_rel->relrowsecurity)
1162 RelationBuildRowSecurity(relation);
1163 else
1164 relation->rd_rsdesc = NULL;
1165
1166 /* foreign key data is not loaded till asked for */
1167 relation->rd_fkeylist = NIL;
1168 relation->rd_fkeyvalid = false;
1169
1170 /* if a partitioned table, initialize key and partition descriptor info */
1171 if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1172 {
1173 RelationBuildPartitionKey(relation);
1174 RelationBuildPartitionDesc(relation);
1175 }
1176 else
1177 {
1178 relation->rd_partkey = NULL;
1179 relation->rd_partkeycxt = NULL;
1180 relation->rd_partdesc = NULL;
1181 relation->rd_pdcxt = NULL;
1182 }
1183 /* ... but partcheck is not loaded till asked for */
1184 relation->rd_partcheck = NIL;
1185 relation->rd_partcheckvalid = false;
1186 relation->rd_partcheckcxt = NULL;
1187
1188 /*
1189 * initialize access method information
1190 */
1191 switch (relation->rd_rel->relkind)
1192 {
1193 case RELKIND_INDEX:
1194 case RELKIND_PARTITIONED_INDEX:
1195 Assert(relation->rd_rel->relam != InvalidOid);
1196 RelationInitIndexAccessInfo(relation);
1197 break;
1198 case RELKIND_RELATION:
1199 case RELKIND_TOASTVALUE:
1200 case RELKIND_MATVIEW:
1201 Assert(relation->rd_rel->relam != InvalidOid);
1202 RelationInitTableAccessMethod(relation);
1203 break;
1204 case RELKIND_SEQUENCE:
1205 Assert(relation->rd_rel->relam == InvalidOid);
1206 RelationInitTableAccessMethod(relation);
1207 break;
1208 case RELKIND_VIEW:
1209 case RELKIND_COMPOSITE_TYPE:
1210 case RELKIND_FOREIGN_TABLE:
1211 case RELKIND_PARTITIONED_TABLE:
1212 Assert(relation->rd_rel->relam == InvalidOid);
1213 break;
1214 }
1215
1216 /* extract reloptions if any */
1217 RelationParseRelOptions(relation, pg_class_tuple);
1218
1219 /*
1220 * initialize the relation lock manager information
1221 */
1222 RelationInitLockInfo(relation); /* see lmgr.c */
1223
1224 /*
1225 * initialize physical addressing information for the relation
1226 */
1227 RelationInitPhysicalAddr(relation);
1228
1229 /* make sure relation is marked as having no open file yet */
1230 relation->rd_smgr = NULL;
1231
1232 /*
1233 * now we can free the memory allocated for pg_class_tuple
1234 */
1235 heap_freetuple(pg_class_tuple);
1236
1237 /*
1238 * Insert newly created relation into relcache hash table, if requested.
1239 *
1240 * There is one scenario in which we might find a hashtable entry already
1241 * present, even though our caller failed to find it: if the relation is a
1242 * system catalog or index that's used during relcache load, we might have
1243 * recursively created the same relcache entry during the preceding steps.
1244 * So allow RelationCacheInsert to delete any already-present relcache
1245 * entry for the same OID. The already-present entry should have refcount
1246 * zero (else somebody forgot to close it); in the event that it doesn't,
1247 * we'll elog a WARNING and leak the already-present entry.
1248 */
1249 if (insertIt)
1250 RelationCacheInsert(relation, true);
1251
1252 /* It's fully valid */
1253 relation->rd_isvalid = true;
1254
1255#if RECOVER_RELATION_BUILD_MEMORY
1256 /* Return to caller's context, and blow away the temporary context */
1257 MemoryContextSwitchTo(oldcxt);
1258 MemoryContextDelete(tmpcxt);
1259#endif
1260
1261 return relation;
1262}
1263
1264/*
1265 * Initialize the physical addressing info (RelFileNode) for a relcache entry
1266 *
1267 * Note: at the physical level, relations in the pg_global tablespace must
1268 * be treated as shared, even if relisshared isn't set. Hence we do not
1269 * look at relisshared here.
1270 */
1271static void
1272RelationInitPhysicalAddr(Relation relation)
1273{
1274 /* these relations kinds never have storage */
1275 if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1276 return;
1277
1278 if (relation->rd_rel->reltablespace)
1279 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1280 else
1281 relation->rd_node.spcNode = MyDatabaseTableSpace;
1282 if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1283 relation->rd_node.dbNode = InvalidOid;
1284 else
1285 relation->rd_node.dbNode = MyDatabaseId;
1286
1287 if (relation->rd_rel->relfilenode)
1288 {
1289 /*
1290 * Even if we are using a decoding snapshot that doesn't represent the
1291 * current state of the catalog we need to make sure the filenode
1292 * points to the current file since the older file will be gone (or
1293 * truncated). The new file will still contain older rows so lookups
1294 * in them will work correctly. This wouldn't work correctly if
1295 * rewrites were allowed to change the schema in an incompatible way,
1296 * but those are prevented both on catalog tables and on user tables
1297 * declared as additional catalog tables.
1298 */
1299 if (HistoricSnapshotActive()
1300 && RelationIsAccessibleInLogicalDecoding(relation)
1301 && IsTransactionState())
1302 {
1303 HeapTuple phys_tuple;
1304 Form_pg_class physrel;
1305
1306 phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1307 RelationGetRelid(relation) != ClassOidIndexId,
1308 true);
1309 if (!HeapTupleIsValid(phys_tuple))
1310 elog(ERROR, "could not find pg_class entry for %u",
1311 RelationGetRelid(relation));
1312 physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1313
1314 relation->rd_rel->reltablespace = physrel->reltablespace;
1315 relation->rd_rel->relfilenode = physrel->relfilenode;
1316 heap_freetuple(phys_tuple);
1317 }
1318
1319 relation->rd_node.relNode = relation->rd_rel->relfilenode;
1320 }
1321 else
1322 {
1323 /* Consult the relation mapper */
1324 relation->rd_node.relNode =
1325 RelationMapOidToFilenode(relation->rd_id,
1326 relation->rd_rel->relisshared);
1327 if (!OidIsValid(relation->rd_node.relNode))
1328 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1329 RelationGetRelationName(relation), relation->rd_id);
1330 }
1331}
1332
1333/*
1334 * Fill in the IndexAmRoutine for an index relation.
1335 *
1336 * relation's rd_amhandler and rd_indexcxt must be valid already.
1337 */
1338static void
1339InitIndexAmRoutine(Relation relation)
1340{
1341 IndexAmRoutine *cached,
1342 *tmp;
1343
1344 /*
1345 * Call the amhandler in current, short-lived memory context, just in case
1346 * it leaks anything (it probably won't, but let's be paranoid).
1347 */
1348 tmp = GetIndexAmRoutine(relation->rd_amhandler);
1349
1350 /* OK, now transfer the data into relation's rd_indexcxt. */
1351 cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1352 sizeof(IndexAmRoutine));
1353 memcpy(cached, tmp, sizeof(IndexAmRoutine));
1354 relation->rd_indam = cached;
1355
1356 pfree(tmp);
1357}
1358
1359/*
1360 * Initialize index-access-method support data for an index relation
1361 */
1362void
1363RelationInitIndexAccessInfo(Relation relation)
1364{
1365 HeapTuple tuple;
1366 Form_pg_am aform;
1367 Datum indcollDatum;
1368 Datum indclassDatum;
1369 Datum indoptionDatum;
1370 bool isnull;
1371 oidvector *indcoll;
1372 oidvector *indclass;
1373 int2vector *indoption;
1374 MemoryContext indexcxt;
1375 MemoryContext oldcontext;
1376 int indnatts;
1377 int indnkeyatts;
1378 uint16 amsupport;
1379
1380 /*
1381 * Make a copy of the pg_index entry for the index. Since pg_index
1382 * contains variable-length and possibly-null fields, we have to do this
1383 * honestly rather than just treating it as a Form_pg_index struct.
1384 */
1385 tuple = SearchSysCache1(INDEXRELID,
1386 ObjectIdGetDatum(RelationGetRelid(relation)));
1387 if (!HeapTupleIsValid(tuple))
1388 elog(ERROR, "cache lookup failed for index %u",
1389 RelationGetRelid(relation));
1390 oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1391 relation->rd_indextuple = heap_copytuple(tuple);
1392 relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1393 MemoryContextSwitchTo(oldcontext);
1394 ReleaseSysCache(tuple);
1395
1396 /*
1397 * Look up the index's access method, save the OID of its handler function
1398 */
1399 tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1400 if (!HeapTupleIsValid(tuple))
1401 elog(ERROR, "cache lookup failed for access method %u",
1402 relation->rd_rel->relam);
1403 aform = (Form_pg_am) GETSTRUCT(tuple);
1404 relation->rd_amhandler = aform->amhandler;
1405 ReleaseSysCache(tuple);
1406
1407 indnatts = RelationGetNumberOfAttributes(relation);
1408 if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1409 elog(ERROR, "relnatts disagrees with indnatts for index %u",
1410 RelationGetRelid(relation));
1411 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1412
1413 /*
1414 * Make the private context to hold index access info. The reason we need
1415 * a context, and not just a couple of pallocs, is so that we won't leak
1416 * any subsidiary info attached to fmgr lookup records.
1417 */
1418 indexcxt = AllocSetContextCreate(CacheMemoryContext,
1419 "index info",
1420 ALLOCSET_SMALL_SIZES);
1421 relation->rd_indexcxt = indexcxt;
1422 MemoryContextCopyAndSetIdentifier(indexcxt,
1423 RelationGetRelationName(relation));
1424
1425 /*
1426 * Now we can fetch the index AM's API struct
1427 */
1428 InitIndexAmRoutine(relation);
1429
1430 /*
1431 * Allocate arrays to hold data. Opclasses are not used for included
1432 * columns, so allocate them for indnkeyatts only.
1433 */
1434 relation->rd_opfamily = (Oid *)
1435 MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1436 relation->rd_opcintype = (Oid *)
1437 MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1438
1439 amsupport = relation->rd_indam->amsupport;
1440 if (amsupport > 0)
1441 {
1442 int nsupport = indnatts * amsupport;
1443
1444 relation->rd_support = (RegProcedure *)
1445 MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1446 relation->rd_supportinfo = (FmgrInfo *)
1447 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1448 }
1449 else
1450 {
1451 relation->rd_support = NULL;
1452 relation->rd_supportinfo = NULL;
1453 }
1454
1455 relation->rd_indcollation = (Oid *)
1456 MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1457
1458 relation->rd_indoption = (int16 *)
1459 MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1460
1461 /*
1462 * indcollation cannot be referenced directly through the C struct,
1463 * because it comes after the variable-width indkey field. Must extract
1464 * the datum the hard way...
1465 */
1466 indcollDatum = fastgetattr(relation->rd_indextuple,
1467 Anum_pg_index_indcollation,
1468 GetPgIndexDescriptor(),
1469 &isnull);
1470 Assert(!isnull);
1471 indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1472 memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1473
1474 /*
1475 * indclass cannot be referenced directly through the C struct, because it
1476 * comes after the variable-width indkey field. Must extract the datum
1477 * the hard way...
1478 */
1479 indclassDatum = fastgetattr(relation->rd_indextuple,
1480 Anum_pg_index_indclass,
1481 GetPgIndexDescriptor(),
1482 &isnull);
1483 Assert(!isnull);
1484 indclass = (oidvector *) DatumGetPointer(indclassDatum);
1485
1486 /*
1487 * Fill the support procedure OID array, as well as the info about
1488 * opfamilies and opclass input types. (aminfo and supportinfo are left
1489 * as zeroes, and are filled on-the-fly when used)
1490 */
1491 IndexSupportInitialize(indclass, relation->rd_support,
1492 relation->rd_opfamily, relation->rd_opcintype,
1493 amsupport, indnkeyatts);
1494
1495 /*
1496 * Similarly extract indoption and copy it to the cache entry
1497 */
1498 indoptionDatum = fastgetattr(relation->rd_indextuple,
1499 Anum_pg_index_indoption,
1500 GetPgIndexDescriptor(),
1501 &isnull);
1502 Assert(!isnull);
1503 indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1504 memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1505
1506 /*
1507 * expressions, predicate, exclusion caches will be filled later
1508 */
1509 relation->rd_indexprs = NIL;
1510 relation->rd_indpred = NIL;
1511 relation->rd_exclops = NULL;
1512 relation->rd_exclprocs = NULL;
1513 relation->rd_exclstrats = NULL;
1514 relation->rd_amcache = NULL;
1515}
1516
1517/*
1518 * IndexSupportInitialize
1519 * Initializes an index's cached opclass information,
1520 * given the index's pg_index.indclass entry.
1521 *
1522 * Data is returned into *indexSupport, *opFamily, and *opcInType,
1523 * which are arrays allocated by the caller.
1524 *
1525 * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1526 * indicate the size of the arrays it has allocated --- but in practice these
1527 * numbers must always match those obtainable from the system catalog entries
1528 * for the index and access method.
1529 */
1530static void
1531IndexSupportInitialize(oidvector *indclass,
1532 RegProcedure *indexSupport,
1533 Oid *opFamily,
1534 Oid *opcInType,
1535 StrategyNumber maxSupportNumber,
1536 AttrNumber maxAttributeNumber)
1537{
1538 int attIndex;
1539
1540 for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1541 {
1542 OpClassCacheEnt *opcentry;
1543
1544 if (!OidIsValid(indclass->values[attIndex]))
1545 elog(ERROR, "bogus pg_index tuple");
1546
1547 /* look up the info for this opclass, using a cache */
1548 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1549 maxSupportNumber);
1550
1551 /* copy cached data into relcache entry */
1552 opFamily[attIndex] = opcentry->opcfamily;
1553 opcInType[attIndex] = opcentry->opcintype;
1554 if (maxSupportNumber > 0)
1555 memcpy(&indexSupport[attIndex * maxSupportNumber],
1556 opcentry->supportProcs,
1557 maxSupportNumber * sizeof(RegProcedure));
1558 }
1559}
1560
1561/*
1562 * LookupOpclassInfo
1563 *
1564 * This routine maintains a per-opclass cache of the information needed
1565 * by IndexSupportInitialize(). This is more efficient than relying on
1566 * the catalog cache, because we can load all the info about a particular
1567 * opclass in a single indexscan of pg_amproc.
1568 *
1569 * The information from pg_am about expected range of support function
1570 * numbers is passed in, rather than being looked up, mainly because the
1571 * caller will have it already.
1572 *
1573 * Note there is no provision for flushing the cache. This is OK at the
1574 * moment because there is no way to ALTER any interesting properties of an
1575 * existing opclass --- all you can do is drop it, which will result in
1576 * a useless but harmless dead entry in the cache. To support altering
1577 * opclass membership (not the same as opfamily membership!), we'd need to
1578 * be able to flush this cache as well as the contents of relcache entries
1579 * for indexes.
1580 */
1581static OpClassCacheEnt *
1582LookupOpclassInfo(Oid operatorClassOid,
1583 StrategyNumber numSupport)
1584{
1585 OpClassCacheEnt *opcentry;
1586 bool found;
1587 Relation rel;
1588 SysScanDesc scan;
1589 ScanKeyData skey[3];
1590 HeapTuple htup;
1591 bool indexOK;
1592
1593 if (OpClassCache == NULL)
1594 {
1595 /* First time through: initialize the opclass cache */
1596 HASHCTL ctl;
1597
1598 MemSet(&ctl, 0, sizeof(ctl));
1599 ctl.keysize = sizeof(Oid);
1600 ctl.entrysize = sizeof(OpClassCacheEnt);
1601 OpClassCache = hash_create("Operator class cache", 64,
1602 &ctl, HASH_ELEM | HASH_BLOBS);
1603
1604 /* Also make sure CacheMemoryContext exists */
1605 if (!CacheMemoryContext)
1606 CreateCacheMemoryContext();
1607 }
1608
1609 opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1610 (void *) &operatorClassOid,
1611 HASH_ENTER, &found);
1612
1613 if (!found)
1614 {
1615 /* Need to allocate memory for new entry */
1616 opcentry->valid = false; /* until known OK */
1617 opcentry->numSupport = numSupport;
1618
1619 if (numSupport > 0)
1620 opcentry->supportProcs = (RegProcedure *)
1621 MemoryContextAllocZero(CacheMemoryContext,
1622 numSupport * sizeof(RegProcedure));
1623 else
1624 opcentry->supportProcs = NULL;
1625 }
1626 else
1627 {
1628 Assert(numSupport == opcentry->numSupport);
1629 }
1630
1631 /*
1632 * When testing for cache-flush hazards, we intentionally disable the
1633 * operator class cache and force reloading of the info on each call. This
1634 * is helpful because we want to test the case where a cache flush occurs
1635 * while we are loading the info, and it's very hard to provoke that if
1636 * this happens only once per opclass per backend.
1637 */
1638#if defined(CLOBBER_CACHE_ALWAYS)
1639 opcentry->valid = false;
1640#endif
1641
1642 if (opcentry->valid)
1643 return opcentry;
1644
1645 /*
1646 * Need to fill in new entry.
1647 *
1648 * To avoid infinite recursion during startup, force heap scans if we're
1649 * looking up info for the opclasses used by the indexes we would like to
1650 * reference here.
1651 */
1652 indexOK = criticalRelcachesBuilt ||
1653 (operatorClassOid != OID_BTREE_OPS_OID &&
1654 operatorClassOid != INT2_BTREE_OPS_OID);
1655
1656 /*
1657 * We have to fetch the pg_opclass row to determine its opfamily and
1658 * opcintype, which are needed to look up related operators and functions.
1659 * It'd be convenient to use the syscache here, but that probably doesn't
1660 * work while bootstrapping.
1661 */
1662 ScanKeyInit(&skey[0],
1663 Anum_pg_opclass_oid,
1664 BTEqualStrategyNumber, F_OIDEQ,
1665 ObjectIdGetDatum(operatorClassOid));
1666 rel = table_open(OperatorClassRelationId, AccessShareLock);
1667 scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1668 NULL, 1, skey);
1669
1670 if (HeapTupleIsValid(htup = systable_getnext(scan)))
1671 {
1672 Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1673
1674 opcentry->opcfamily = opclassform->opcfamily;
1675 opcentry->opcintype = opclassform->opcintype;
1676 }
1677 else
1678 elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1679
1680 systable_endscan(scan);
1681 table_close(rel, AccessShareLock);
1682
1683 /*
1684 * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1685 * the default ones (those with lefttype = righttype = opcintype).
1686 */
1687 if (numSupport > 0)
1688 {
1689 ScanKeyInit(&skey[0],
1690 Anum_pg_amproc_amprocfamily,
1691 BTEqualStrategyNumber, F_OIDEQ,
1692 ObjectIdGetDatum(opcentry->opcfamily));
1693 ScanKeyInit(&skey[1],
1694 Anum_pg_amproc_amproclefttype,
1695 BTEqualStrategyNumber, F_OIDEQ,
1696 ObjectIdGetDatum(opcentry->opcintype));
1697 ScanKeyInit(&skey[2],
1698 Anum_pg_amproc_amprocrighttype,
1699 BTEqualStrategyNumber, F_OIDEQ,
1700 ObjectIdGetDatum(opcentry->opcintype));
1701 rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1702 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1703 NULL, 3, skey);
1704
1705 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1706 {
1707 Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1708
1709 if (amprocform->amprocnum <= 0 ||
1710 (StrategyNumber) amprocform->amprocnum > numSupport)
1711 elog(ERROR, "invalid amproc number %d for opclass %u",
1712 amprocform->amprocnum, operatorClassOid);
1713
1714 opcentry->supportProcs[amprocform->amprocnum - 1] =
1715 amprocform->amproc;
1716 }
1717
1718 systable_endscan(scan);
1719 table_close(rel, AccessShareLock);
1720 }
1721
1722 opcentry->valid = true;
1723 return opcentry;
1724}
1725
1726/*
1727 * Fill in the TableAmRoutine for a relation
1728 *
1729 * relation's rd_amhandler must be valid already.
1730 */
1731static void
1732InitTableAmRoutine(Relation relation)
1733{
1734 relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1735}
1736
1737/*
1738 * Initialize table access method support for a table like relation
1739 */
1740void
1741RelationInitTableAccessMethod(Relation relation)
1742{
1743 HeapTuple tuple;
1744 Form_pg_am aform;
1745
1746 if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1747 {
1748 /*
1749 * Sequences are currently accessed like heap tables, but it doesn't
1750 * seem prudent to show that in the catalog. So just overwrite it
1751 * here.
1752 */
1753 relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1754 }
1755 else if (IsCatalogRelation(relation))
1756 {
1757 /*
1758 * Avoid doing a syscache lookup for catalog tables.
1759 */
1760 Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1761 relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1762 }
1763 else
1764 {
1765 /*
1766 * Look up the table access method, save the OID of its handler
1767 * function.
1768 */
1769 Assert(relation->rd_rel->relam != InvalidOid);
1770 tuple = SearchSysCache1(AMOID,
1771 ObjectIdGetDatum(relation->rd_rel->relam));
1772 if (!HeapTupleIsValid(tuple))
1773 elog(ERROR, "cache lookup failed for access method %u",
1774 relation->rd_rel->relam);
1775 aform = (Form_pg_am) GETSTRUCT(tuple);
1776 relation->rd_amhandler = aform->amhandler;
1777 ReleaseSysCache(tuple);
1778 }
1779
1780 /*
1781 * Now we can fetch the table AM's API struct
1782 */
1783 InitTableAmRoutine(relation);
1784}
1785
1786/*
1787 * formrdesc
1788 *
1789 * This is a special cut-down version of RelationBuildDesc(),
1790 * used while initializing the relcache.
1791 * The relation descriptor is built just from the supplied parameters,
1792 * without actually looking at any system table entries. We cheat
1793 * quite a lot since we only need to work for a few basic system
1794 * catalogs.
1795 *
1796 * The catalogs this is used for can't have constraints (except attnotnull),
1797 * default values, rules, or triggers, since we don't cope with any of that.
1798 * (Well, actually, this only matters for properties that need to be valid
1799 * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1800 * these properties matter then...)
1801 *
1802 * NOTE: we assume we are already switched into CacheMemoryContext.
1803 */
1804static void
1805formrdesc(const char *relationName, Oid relationReltype,
1806 bool isshared,
1807 int natts, const FormData_pg_attribute *attrs)
1808{
1809 Relation relation;
1810 int i;
1811 bool has_not_null;
1812
1813 /*
1814 * allocate new relation desc, clear all fields of reldesc
1815 */
1816 relation = (Relation) palloc0(sizeof(RelationData));
1817
1818 /* make sure relation is marked as having no open file yet */
1819 relation->rd_smgr = NULL;
1820
1821 /*
1822 * initialize reference count: 1 because it is nailed in cache
1823 */
1824 relation->rd_refcnt = 1;
1825
1826 /*
1827 * all entries built with this routine are nailed-in-cache; none are for
1828 * new or temp relations.
1829 */
1830 relation->rd_isnailed = true;
1831 relation->rd_createSubid = InvalidSubTransactionId;
1832 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1833 relation->rd_backend = InvalidBackendId;
1834 relation->rd_islocaltemp = false;
1835
1836 /*
1837 * initialize relation tuple form
1838 *
1839 * The data we insert here is pretty incomplete/bogus, but it'll serve to
1840 * get us launched. RelationCacheInitializePhase3() will read the real
1841 * data from pg_class and replace what we've done here. Note in
1842 * particular that relowner is left as zero; this cues
1843 * RelationCacheInitializePhase3 that the real data isn't there yet.
1844 */
1845 relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1846
1847 namestrcpy(&relation->rd_rel->relname, relationName);
1848 relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1849 relation->rd_rel->reltype = relationReltype;
1850
1851 /*
1852 * It's important to distinguish between shared and non-shared relations,
1853 * even at bootstrap time, to make sure we know where they are stored.
1854 */
1855 relation->rd_rel->relisshared = isshared;
1856 if (isshared)
1857 relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1858
1859 /* formrdesc is used only for permanent relations */
1860 relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1861
1862 /* ... and they're always populated, too */
1863 relation->rd_rel->relispopulated = true;
1864
1865 relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1866 relation->rd_rel->relpages = 0;
1867 relation->rd_rel->reltuples = 0;
1868 relation->rd_rel->relallvisible = 0;
1869 relation->rd_rel->relkind = RELKIND_RELATION;
1870 relation->rd_rel->relnatts = (int16) natts;
1871 relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1872
1873 /*
1874 * initialize attribute tuple form
1875 *
1876 * Unlike the case with the relation tuple, this data had better be right
1877 * because it will never be replaced. The data comes from
1878 * src/include/catalog/ headers via genbki.pl.
1879 */
1880 relation->rd_att = CreateTemplateTupleDesc(natts);
1881 relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1882
1883 relation->rd_att->tdtypeid = relationReltype;
1884 relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1885
1886 /*
1887 * initialize tuple desc info
1888 */
1889 has_not_null = false;
1890 for (i = 0; i < natts; i++)
1891 {
1892 memcpy(TupleDescAttr(relation->rd_att, i),
1893 &attrs[i],
1894 ATTRIBUTE_FIXED_PART_SIZE);
1895 has_not_null |= attrs[i].attnotnull;
1896 /* make sure attcacheoff is valid */
1897 TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1898 }
1899
1900 /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1901 TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1902
1903 /* mark not-null status */
1904 if (has_not_null)
1905 {
1906 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1907
1908 constr->has_not_null = true;
1909 relation->rd_att->constr = constr;
1910 }
1911
1912 /*
1913 * initialize relation id from info in att array (my, this is ugly)
1914 */
1915 RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1916
1917 /*
1918 * All relations made with formrdesc are mapped. This is necessarily so
1919 * because there is no other way to know what filenode they currently
1920 * have. In bootstrap mode, add them to the initial relation mapper data,
1921 * specifying that the initial filenode is the same as the OID.
1922 */
1923 relation->rd_rel->relfilenode = InvalidOid;
1924 if (IsBootstrapProcessingMode())
1925 RelationMapUpdateMap(RelationGetRelid(relation),
1926 RelationGetRelid(relation),
1927 isshared, true);
1928
1929 /*
1930 * initialize the relation lock manager information
1931 */
1932 RelationInitLockInfo(relation); /* see lmgr.c */
1933
1934 /*
1935 * initialize physical addressing information for the relation
1936 */
1937 RelationInitPhysicalAddr(relation);
1938
1939 /*
1940 * initialize the table am handler
1941 */
1942 relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1943 relation->rd_tableam = GetHeapamTableAmRoutine();
1944
1945 /*
1946 * initialize the rel-has-index flag, using hardwired knowledge
1947 */
1948 if (IsBootstrapProcessingMode())
1949 {
1950 /* In bootstrap mode, we have no indexes */
1951 relation->rd_rel->relhasindex = false;
1952 }
1953 else
1954 {
1955 /* Otherwise, all the rels formrdesc is used for have indexes */
1956 relation->rd_rel->relhasindex = true;
1957 }
1958
1959 /*
1960 * add new reldesc to relcache
1961 */
1962 RelationCacheInsert(relation, false);
1963
1964 /* It's fully valid */
1965 relation->rd_isvalid = true;
1966}
1967
1968
1969/* ----------------------------------------------------------------
1970 * Relation Descriptor Lookup Interface
1971 * ----------------------------------------------------------------
1972 */
1973
1974/*
1975 * RelationIdGetRelation
1976 *
1977 * Lookup a reldesc by OID; make one if not already in cache.
1978 *
1979 * Returns NULL if no pg_class row could be found for the given relid
1980 * (suggesting we are trying to access a just-deleted relation).
1981 * Any other error is reported via elog.
1982 *
1983 * NB: caller should already have at least AccessShareLock on the
1984 * relation ID, else there are nasty race conditions.
1985 *
1986 * NB: relation ref count is incremented, or set to 1 if new entry.
1987 * Caller should eventually decrement count. (Usually,
1988 * that happens by calling RelationClose().)
1989 */
1990Relation
1991RelationIdGetRelation(Oid relationId)
1992{
1993 Relation rd;
1994
1995 /* Make sure we're in an xact, even if this ends up being a cache hit */
1996 Assert(IsTransactionState());
1997
1998 /*
1999 * first try to find reldesc in the cache
2000 */
2001 RelationIdCacheLookup(relationId, rd);
2002
2003 if (RelationIsValid(rd))
2004 {
2005 RelationIncrementReferenceCount(rd);
2006 /* revalidate cache entry if necessary */
2007 if (!rd->rd_isvalid)
2008 {
2009 /*
2010 * Indexes only have a limited number of possible schema changes,
2011 * and we don't want to use the full-blown procedure because it's
2012 * a headache for indexes that reload itself depends on.
2013 */
2014 if (rd->rd_rel->relkind == RELKIND_INDEX ||
2015 rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2016 RelationReloadIndexInfo(rd);
2017 else
2018 RelationClearRelation(rd, true);
2019
2020 /*
2021 * Normally entries need to be valid here, but before the relcache
2022 * has been initialized, not enough infrastructure exists to
2023 * perform pg_class lookups. The structure of such entries doesn't
2024 * change, but we still want to update the rd_rel entry. So
2025 * rd_isvalid = false is left in place for a later lookup.
2026 */
2027 Assert(rd->rd_isvalid ||
2028 (rd->rd_isnailed && !criticalRelcachesBuilt));
2029 }
2030 return rd;
2031 }
2032
2033 /*
2034 * no reldesc in the cache, so have RelationBuildDesc() build one and add
2035 * it.
2036 */
2037 rd = RelationBuildDesc(relationId, true);
2038 if (RelationIsValid(rd))
2039 RelationIncrementReferenceCount(rd);
2040 return rd;
2041}
2042
2043/* ----------------------------------------------------------------
2044 * cache invalidation support routines
2045 * ----------------------------------------------------------------
2046 */
2047
2048/*
2049 * RelationIncrementReferenceCount
2050 * Increments relation reference count.
2051 *
2052 * Note: bootstrap mode has its own weird ideas about relation refcount
2053 * behavior; we ought to fix it someday, but for now, just disable
2054 * reference count ownership tracking in bootstrap mode.
2055 */
2056void
2057RelationIncrementReferenceCount(Relation rel)
2058{
2059 ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
2060 rel->rd_refcnt += 1;
2061 if (!IsBootstrapProcessingMode())
2062 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
2063}
2064
2065/*
2066 * RelationDecrementReferenceCount
2067 * Decrements relation reference count.
2068 */
2069void
2070RelationDecrementReferenceCount(Relation rel)
2071{
2072 Assert(rel->rd_refcnt > 0);
2073 rel->rd_refcnt -= 1;
2074 if (!IsBootstrapProcessingMode())
2075 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
2076}
2077
2078/*
2079 * RelationClose - close an open relation
2080 *
2081 * Actually, we just decrement the refcount.
2082 *
2083 * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2084 * will be freed as soon as their refcount goes to zero. In combination
2085 * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2086 * to catch references to already-released relcache entries. It slows
2087 * things down quite a bit, however.
2088 */
2089void
2090RelationClose(Relation relation)
2091{
2092 /* Note: no locking manipulations needed */
2093 RelationDecrementReferenceCount(relation);
2094
2095#ifdef RELCACHE_FORCE_RELEASE
2096 if (RelationHasReferenceCountZero(relation) &&
2097 relation->rd_createSubid == InvalidSubTransactionId &&
2098 relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
2099 RelationClearRelation(relation, false);
2100#endif
2101}
2102
2103/*
2104 * RelationReloadIndexInfo - reload minimal information for an open index
2105 *
2106 * This function is used only for indexes. A relcache inval on an index
2107 * can mean that its pg_class or pg_index row changed. There are only
2108 * very limited changes that are allowed to an existing index's schema,
2109 * so we can update the relcache entry without a complete rebuild; which
2110 * is fortunate because we can't rebuild an index entry that is "nailed"
2111 * and/or in active use. We support full replacement of the pg_class row,
2112 * as well as updates of a few simple fields of the pg_index row.
2113 *
2114 * We can't necessarily reread the catalog rows right away; we might be
2115 * in a failed transaction when we receive the SI notification. If so,
2116 * RelationClearRelation just marks the entry as invalid by setting
2117 * rd_isvalid to false. This routine is called to fix the entry when it
2118 * is next needed.
2119 *
2120 * We assume that at the time we are called, we have at least AccessShareLock
2121 * on the target index. (Note: in the calls from RelationClearRelation,
2122 * this is legitimate because we know the rel has positive refcount.)
2123 *
2124 * If the target index is an index on pg_class or pg_index, we'd better have
2125 * previously gotten at least AccessShareLock on its underlying catalog,
2126 * else we are at risk of deadlock against someone trying to exclusive-lock
2127 * the heap and index in that order. This is ensured in current usage by
2128 * only applying this to indexes being opened or having positive refcount.
2129 */
2130static void
2131RelationReloadIndexInfo(Relation relation)
2132{
2133 bool indexOK;
2134 HeapTuple pg_class_tuple;
2135 Form_pg_class relp;
2136
2137 /* Should be called only for invalidated indexes */
2138 Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2139 relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2140 !relation->rd_isvalid);
2141
2142 /* Ensure it's closed at smgr level */
2143 RelationCloseSmgr(relation);
2144
2145 /* Must free any AM cached data upon relcache flush */
2146 if (relation->rd_amcache)
2147 pfree(relation->rd_amcache);
2148 relation->rd_amcache = NULL;
2149
2150 /*
2151 * If it's a shared index, we might be called before backend startup has
2152 * finished selecting a database, in which case we have no way to read
2153 * pg_class yet. However, a shared index can never have any significant
2154 * schema updates, so it's okay to ignore the invalidation signal. Just
2155 * mark it valid and return without doing anything more.
2156 */
2157 if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2158 {
2159 relation->rd_isvalid = true;
2160 return;
2161 }
2162
2163 /*
2164 * Read the pg_class row
2165 *
2166 * Don't try to use an indexscan of pg_class_oid_index to reload the info
2167 * for pg_class_oid_index ...
2168 */
2169 indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2170 pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2171 if (!HeapTupleIsValid(pg_class_tuple))
2172 elog(ERROR, "could not find pg_class tuple for index %u",
2173 RelationGetRelid(relation));
2174 relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2175 memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2176 /* Reload reloptions in case they changed */
2177 if (relation->rd_options)
2178 pfree(relation->rd_options);
2179 RelationParseRelOptions(relation, pg_class_tuple);
2180 /* done with pg_class tuple */
2181 heap_freetuple(pg_class_tuple);
2182 /* We must recalculate physical address in case it changed */
2183 RelationInitPhysicalAddr(relation);
2184
2185 /*
2186 * For a non-system index, there are fields of the pg_index row that are
2187 * allowed to change, so re-read that row and update the relcache entry.
2188 * Most of the info derived from pg_index (such as support function lookup
2189 * info) cannot change, and indeed the whole point of this routine is to
2190 * update the relcache entry without clobbering that data; so wholesale
2191 * replacement is not appropriate.
2192 */
2193 if (!IsSystemRelation(relation))
2194 {
2195 HeapTuple tuple;
2196 Form_pg_index index;
2197
2198 tuple = SearchSysCache1(INDEXRELID,
2199 ObjectIdGetDatum(RelationGetRelid(relation)));
2200 if (!HeapTupleIsValid(tuple))
2201 elog(ERROR, "cache lookup failed for index %u",
2202 RelationGetRelid(relation));
2203 index = (Form_pg_index) GETSTRUCT(tuple);
2204
2205 /*
2206 * Basically, let's just copy all the bool fields. There are one or
2207 * two of these that can't actually change in the current code, but
2208 * it's not worth it to track exactly which ones they are. None of
2209 * the array fields are allowed to change, though.
2210 */
2211 relation->rd_index->indisunique = index->indisunique;
2212 relation->rd_index->indisprimary = index->indisprimary;
2213 relation->rd_index->indisexclusion = index->indisexclusion;
2214 relation->rd_index->indimmediate = index->indimmediate;
2215 relation->rd_index->indisclustered = index->indisclustered;
2216 relation->rd_index->indisvalid = index->indisvalid;
2217 relation->rd_index->indcheckxmin = index->indcheckxmin;
2218 relation->rd_index->indisready = index->indisready;
2219 relation->rd_index->indislive = index->indislive;
2220
2221 /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2222 HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
2223 HeapTupleHeaderGetXmin(tuple->t_data));
2224
2225 ReleaseSysCache(tuple);
2226 }
2227
2228 /* Okay, now it's valid again */
2229 relation->rd_isvalid = true;
2230}
2231
2232/*
2233 * RelationReloadNailed - reload minimal information for nailed relations.
2234 *
2235 * The structure of a nailed relation can never change (which is good, because
2236 * we rely on knowing their structure to be able to read catalog content). But
2237 * some parts, e.g. pg_class.relfrozenxid, are still important to have
2238 * accurate content for. Therefore those need to be reloaded after the arrival
2239 * of invalidations.
2240 */
2241static void
2242RelationReloadNailed(Relation relation)
2243{
2244 Assert(relation->rd_isnailed);
2245
2246 /*
2247 * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2248 * mapping changed.
2249 */
2250 RelationInitPhysicalAddr(relation);
2251
2252 /* flag as needing to be revalidated */
2253 relation->rd_isvalid = false;
2254
2255 /*
2256 * Can only reread catalog contents if in a transaction. If the relation
2257 * is currently open (not counting the nailed refcount), do so
2258 * immediately. Otherwise we've already marked the entry as possibly
2259 * invalid, and it'll be fixed when next opened.
2260 */
2261 if (!IsTransactionState() || relation->rd_refcnt <= 1)
2262 return;
2263
2264 if (relation->rd_rel->relkind == RELKIND_INDEX)
2265 {
2266 /*
2267 * If it's a nailed-but-not-mapped index, then we need to re-read the
2268 * pg_class row to see if its relfilenode changed.
2269 */
2270 RelationReloadIndexInfo(relation);
2271 }
2272 else
2273 {
2274 /*
2275 * Reload a non-index entry. We can't easily do so if relcaches
2276 * aren't yet built, but that's fine because at that stage the
2277 * attributes that need to be current (like relfrozenxid) aren't yet
2278 * accessed. To ensure the entry will later be revalidated, we leave
2279 * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2280 */
2281 if (criticalRelcachesBuilt)
2282 {
2283 HeapTuple pg_class_tuple;
2284 Form_pg_class relp;
2285
2286 /*
2287 * NB: Mark the entry as valid before starting to scan, to avoid
2288 * self-recursion when re-building pg_class.
2289 */
2290 relation->rd_isvalid = true;
2291
2292 pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2293 true, false);
2294 relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2295 memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2296 heap_freetuple(pg_class_tuple);
2297
2298 /*
2299 * Again mark as valid, to protect against concurrently arriving
2300 * invalidations.
2301 */
2302 relation->rd_isvalid = true;
2303 }
2304 }
2305}
2306
2307/*
2308 * RelationDestroyRelation
2309 *
2310 * Physically delete a relation cache entry and all subsidiary data.
2311 * Caller must already have unhooked the entry from the hash table.
2312 */
2313static void
2314RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2315{
2316 Assert(RelationHasReferenceCountZero(relation));
2317
2318 /*
2319 * Make sure smgr and lower levels close the relation's files, if they
2320 * weren't closed already. (This was probably done by caller, but let's
2321 * just be real sure.)
2322 */
2323 RelationCloseSmgr(relation);
2324
2325 /*
2326 * Free all the subsidiary data structures of the relcache entry, then the
2327 * entry itself.
2328 */
2329 if (relation->rd_rel)
2330 pfree(relation->rd_rel);
2331 /* can't use DecrTupleDescRefCount here */
2332 Assert(relation->rd_att->tdrefcount > 0);
2333 if (--relation->rd_att->tdrefcount == 0)
2334 {
2335 /*
2336 * If we Rebuilt a relcache entry during a transaction then its
2337 * possible we did that because the TupDesc changed as the result of
2338 * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2339 * possible someone copied that TupDesc, in which case the copy would
2340 * point to free'd memory. So if we rebuild an entry we keep the
2341 * TupDesc around until end of transaction, to be safe.
2342 */
2343 if (remember_tupdesc)
2344 RememberToFreeTupleDescAtEOX(relation->rd_att);
2345 else
2346 FreeTupleDesc(relation->rd_att);
2347 }
2348 FreeTriggerDesc(relation->trigdesc);
2349 list_free_deep(relation->rd_fkeylist);
2350 list_free(relation->rd_indexlist);
2351 bms_free(relation->rd_indexattr);
2352 bms_free(relation->rd_keyattr);
2353 bms_free(relation->rd_pkattr);
2354 bms_free(relation->rd_idattr);
2355 if (relation->rd_pubactions)
2356 pfree(relation->rd_pubactions);
2357 if (relation->rd_options)
2358 pfree(relation->rd_options);
2359 if (relation->rd_indextuple)
2360 pfree(relation->rd_indextuple);
2361 if (relation->rd_amcache)
2362 pfree(relation->rd_amcache);
2363 if (relation->rd_fdwroutine)
2364 pfree(relation->rd_fdwroutine);
2365 if (relation->rd_indexcxt)
2366 MemoryContextDelete(relation->rd_indexcxt);
2367 if (relation->rd_rulescxt)
2368 MemoryContextDelete(relation->rd_rulescxt);
2369 if (relation->rd_rsdesc)
2370 MemoryContextDelete(relation->rd_rsdesc->rscxt);
2371 if (relation->rd_partkeycxt)
2372 MemoryContextDelete(relation->rd_partkeycxt);
2373 if (relation->rd_pdcxt)
2374 MemoryContextDelete(relation->rd_pdcxt);
2375 if (relation->rd_partcheckcxt)
2376 MemoryContextDelete(relation->rd_partcheckcxt);
2377 pfree(relation);
2378}
2379
2380/*
2381 * RelationClearRelation
2382 *
2383 * Physically blow away a relation cache entry, or reset it and rebuild
2384 * it from scratch (that is, from catalog entries). The latter path is
2385 * used when we are notified of a change to an open relation (one with
2386 * refcount > 0).
2387 *
2388 * NB: when rebuilding, we'd better hold some lock on the relation,
2389 * else the catalog data we need to read could be changing under us.
2390 * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2391 * a sinval reset could happen while we're accessing the catalogs, and
2392 * the rel would get blown away underneath us by RelationCacheInvalidate
2393 * if it has zero refcnt.
2394 *
2395 * The "rebuild" parameter is redundant in current usage because it has
2396 * to match the relation's refcnt status, but we keep it as a crosscheck
2397 * that we're doing what the caller expects.
2398 */
2399static void
2400RelationClearRelation(Relation relation, bool rebuild)
2401{
2402 /*
2403 * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2404 * course it would be an equally bad idea to blow away one with nonzero
2405 * refcnt, since that would leave someone somewhere with a dangling
2406 * pointer. All callers are expected to have verified that this holds.
2407 */
2408 Assert(rebuild ?
2409 !RelationHasReferenceCountZero(relation) :
2410 RelationHasReferenceCountZero(relation));
2411
2412 /*
2413 * Make sure smgr and lower levels close the relation's files, if they
2414 * weren't closed already. If the relation is not getting deleted, the
2415 * next smgr access should reopen the files automatically. This ensures
2416 * that the low-level file access state is updated after, say, a vacuum
2417 * truncation.
2418 */
2419 RelationCloseSmgr(relation);
2420
2421 /* Free AM cached data, if any */
2422 if (relation->rd_amcache)
2423 pfree(relation->rd_amcache);
2424 relation->rd_amcache = NULL;
2425
2426 /*
2427 * Treat nailed-in system relations separately, they always need to be
2428 * accessible, so we can't blow them away.
2429 */
2430 if (relation->rd_isnailed)
2431 {
2432 RelationReloadNailed(relation);
2433 return;
2434 }
2435
2436 /*
2437 * Even non-system indexes should not be blown away if they are open and
2438 * have valid index support information. This avoids problems with active
2439 * use of the index support information. As with nailed indexes, we
2440 * re-read the pg_class row to handle possible physical relocation of the
2441 * index, and we check for pg_index updates too.
2442 */
2443 if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2444 relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2445 relation->rd_refcnt > 0 &&
2446 relation->rd_indexcxt != NULL)
2447 {
2448 relation->rd_isvalid = false; /* needs to be revalidated */
2449 if (IsTransactionState())
2450 RelationReloadIndexInfo(relation);
2451 return;
2452 }
2453
2454 /* Mark it invalid until we've finished rebuild */
2455 relation->rd_isvalid = false;
2456
2457 /*
2458 * If we're really done with the relcache entry, blow it away. But if
2459 * someone is still using it, reconstruct the whole deal without moving
2460 * the physical RelationData record (so that the someone's pointer is
2461 * still valid).
2462 */
2463 if (!rebuild)
2464 {
2465 /* Remove it from the hash table */
2466 RelationCacheDelete(relation);
2467
2468 /* And release storage */
2469 RelationDestroyRelation(relation, false);
2470 }
2471 else if (!IsTransactionState())
2472 {
2473 /*
2474 * If we're not inside a valid transaction, we can't do any catalog
2475 * access so it's not possible to rebuild yet. Just exit, leaving
2476 * rd_isvalid = false so that the rebuild will occur when the entry is
2477 * next opened.
2478 *
2479 * Note: it's possible that we come here during subtransaction abort,
2480 * and the reason for wanting to rebuild is that the rel is open in
2481 * the outer transaction. In that case it might seem unsafe to not
2482 * rebuild immediately, since whatever code has the rel already open
2483 * will keep on using the relcache entry as-is. However, in such a
2484 * case the outer transaction should be holding a lock that's
2485 * sufficient to prevent any significant change in the rel's schema,
2486 * so the existing entry contents should be good enough for its
2487 * purposes; at worst we might be behind on statistics updates or the
2488 * like. (See also CheckTableNotInUse() and its callers.) These same
2489 * remarks also apply to the cases above where we exit without having
2490 * done RelationReloadIndexInfo() yet.
2491 */
2492 return;
2493 }
2494 else
2495 {
2496 /*
2497 * Our strategy for rebuilding an open relcache entry is to build a
2498 * new entry from scratch, swap its contents with the old entry, and
2499 * finally delete the new entry (along with any infrastructure swapped
2500 * over from the old entry). This is to avoid trouble in case an
2501 * error causes us to lose control partway through. The old entry
2502 * will still be marked !rd_isvalid, so we'll try to rebuild it again
2503 * on next access. Meanwhile it's not any less valid than it was
2504 * before, so any code that might expect to continue accessing it
2505 * isn't hurt by the rebuild failure. (Consider for example a
2506 * subtransaction that ALTERs a table and then gets canceled partway
2507 * through the cache entry rebuild. The outer transaction should
2508 * still see the not-modified cache entry as valid.) The worst
2509 * consequence of an error is leaking the necessarily-unreferenced new
2510 * entry, and this shouldn't happen often enough for that to be a big
2511 * problem.
2512 *
2513 * When rebuilding an open relcache entry, we must preserve ref count,
2514 * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2515 * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2516 * rewrite-rule, partition key, and partition descriptor substructures
2517 * in place, because various places assume that these structures won't
2518 * move while they are working with an open relcache entry. (Note:
2519 * the refcount mechanism for tupledescs might someday allow us to
2520 * remove this hack for the tupledesc.)
2521 *
2522 * Note that this process does not touch CurrentResourceOwner; which
2523 * is good because whatever ref counts the entry may have do not
2524 * necessarily belong to that resource owner.
2525 */
2526 Relation newrel;
2527 Oid save_relid = RelationGetRelid(relation);
2528 bool keep_tupdesc;
2529 bool keep_rules;
2530 bool keep_policies;
2531 bool keep_partkey;
2532 bool keep_partdesc;
2533
2534 /* Build temporary entry, but don't link it into hashtable */
2535 newrel = RelationBuildDesc(save_relid, false);
2536 if (newrel == NULL)
2537 {
2538 /*
2539 * We can validly get here, if we're using a historic snapshot in
2540 * which a relation, accessed from outside logical decoding, is
2541 * still invisible. In that case it's fine to just mark the
2542 * relation as invalid and return - it'll fully get reloaded by
2543 * the cache reset at the end of logical decoding (or at the next
2544 * access). During normal processing we don't want to ignore this
2545 * case as it shouldn't happen there, as explained below.
2546 */
2547 if (HistoricSnapshotActive())
2548 return;
2549
2550 /*
2551 * This shouldn't happen as dropping a relation is intended to be
2552 * impossible if still referenced (cf. CheckTableNotInUse()). But
2553 * if we get here anyway, we can't just delete the relcache entry,
2554 * as it possibly could get accessed later (as e.g. the error
2555 * might get trapped and handled via a subtransaction rollback).
2556 */
2557 elog(ERROR, "relation %u deleted while still in use", save_relid);
2558 }
2559
2560 keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2561 keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2562 keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2563 /* partkey is immutable once set up, so we can always keep it */
2564 keep_partkey = (relation->rd_partkey != NULL);
2565 keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2566 relation->rd_partdesc,
2567 newrel->rd_partdesc);
2568
2569 /*
2570 * Perform swapping of the relcache entry contents. Within this
2571 * process the old entry is momentarily invalid, so there *must* be no
2572 * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2573 * all-in-line code for safety.
2574 *
2575 * Since the vast majority of fields should be swapped, our method is
2576 * to swap the whole structures and then re-swap those few fields we
2577 * didn't want swapped.
2578 */
2579#define SWAPFIELD(fldtype, fldname) \
2580 do { \
2581 fldtype _tmp = newrel->fldname; \
2582 newrel->fldname = relation->fldname; \
2583 relation->fldname = _tmp; \
2584 } while (0)
2585
2586 /* swap all Relation struct fields */
2587 {
2588 RelationData tmpstruct;
2589
2590 memcpy(&tmpstruct, newrel, sizeof(RelationData));
2591 memcpy(newrel, relation, sizeof(RelationData));
2592 memcpy(relation, &tmpstruct, sizeof(RelationData));
2593 }
2594
2595 /* rd_smgr must not be swapped, due to back-links from smgr level */
2596 SWAPFIELD(SMgrRelation, rd_smgr);
2597 /* rd_refcnt must be preserved */
2598 SWAPFIELD(int, rd_refcnt);
2599 /* isnailed shouldn't change */
2600 Assert(newrel->rd_isnailed == relation->rd_isnailed);
2601 /* creation sub-XIDs must be preserved */
2602 SWAPFIELD(SubTransactionId, rd_createSubid);
2603 SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2604 /* un-swap rd_rel pointers, swap contents instead */
2605 SWAPFIELD(Form_pg_class, rd_rel);
2606 /* ... but actually, we don't have to update newrel->rd_rel */
2607 memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2608 /* preserve old tupledesc, rules, policies if no logical change */
2609 if (keep_tupdesc)
2610 SWAPFIELD(TupleDesc, rd_att);
2611 if (keep_rules)
2612 {
2613 SWAPFIELD(RuleLock *, rd_rules);
2614 SWAPFIELD(MemoryContext, rd_rulescxt);
2615 }
2616 if (keep_policies)
2617 SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2618 /* toast OID override must be preserved */
2619 SWAPFIELD(Oid, rd_toastoid);
2620 /* pgstat_info must be preserved */
2621 SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2622 /* preserve old partitioning info if no logical change */
2623 if (keep_partkey)
2624 {
2625 SWAPFIELD(PartitionKey, rd_partkey);
2626 SWAPFIELD(MemoryContext, rd_partkeycxt);
2627 }
2628 if (keep_partdesc)
2629 {
2630 SWAPFIELD(PartitionDesc, rd_partdesc);
2631 SWAPFIELD(MemoryContext, rd_pdcxt);
2632 }
2633 else if (rebuild && newrel->rd_pdcxt != NULL)
2634 {
2635 /*
2636 * We are rebuilding a partitioned relation with a non-zero
2637 * reference count, so keep the old partition descriptor around,
2638 * in case there's a PartitionDirectory with a pointer to it.
2639 * Attach it to the new rd_pdcxt so that it gets cleaned up
2640 * eventually. In the case where the reference count is 0, this
2641 * code is not reached, which should be OK because in that case
2642 * there should be no PartitionDirectory with a pointer to the old
2643 * entry.
2644 *
2645 * Note that newrel and relation have already been swapped, so the
2646 * "old" partition descriptor is actually the one hanging off of
2647 * newrel.
2648 */
2649 MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2650 newrel->rd_partdesc = NULL;
2651 newrel->rd_pdcxt = NULL;
2652 }
2653
2654#undef SWAPFIELD
2655
2656 /* And now we can throw away the temporary entry */
2657 RelationDestroyRelation(newrel, !keep_tupdesc);
2658 }
2659}
2660
2661/*
2662 * RelationFlushRelation
2663 *
2664 * Rebuild the relation if it is open (refcount > 0), else blow it away.
2665 * This is used when we receive a cache invalidation event for the rel.
2666 */
2667static void
2668RelationFlushRelation(Relation relation)
2669{
2670 if (relation->rd_createSubid != InvalidSubTransactionId ||
2671 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2672 {
2673 /*
2674 * New relcache entries are always rebuilt, not flushed; else we'd
2675 * forget the "new" status of the relation, which is a useful
2676 * optimization to have. Ditto for the new-relfilenode status.
2677 *
2678 * The rel could have zero refcnt here, so temporarily increment the
2679 * refcnt to ensure it's safe to rebuild it. We can assume that the
2680 * current transaction has some lock on the rel already.
2681 */
2682 RelationIncrementReferenceCount(relation);
2683 RelationClearRelation(relation, true);
2684 RelationDecrementReferenceCount(relation);
2685 }
2686 else
2687 {
2688 /*
2689 * Pre-existing rels can be dropped from the relcache if not open.
2690 */
2691 bool rebuild = !RelationHasReferenceCountZero(relation);
2692
2693 RelationClearRelation(relation, rebuild);
2694 }
2695}
2696
2697/*
2698 * RelationForgetRelation - unconditionally remove a relcache entry
2699 *
2700 * External interface for destroying a relcache entry when we
2701 * drop the relation.
2702 */
2703void
2704RelationForgetRelation(Oid rid)
2705{
2706 Relation relation;
2707
2708 RelationIdCacheLookup(rid, relation);
2709
2710 if (!PointerIsValid(relation))
2711 return; /* not in cache, nothing to do */
2712
2713 if (!RelationHasReferenceCountZero(relation))
2714 elog(ERROR, "relation %u is still open", rid);
2715
2716 /* Unconditionally destroy the relcache entry */
2717 RelationClearRelation(relation, false);
2718}
2719
2720/*
2721 * RelationCacheInvalidateEntry
2722 *
2723 * This routine is invoked for SI cache flush messages.
2724 *
2725 * Any relcache entry matching the relid must be flushed. (Note: caller has
2726 * already determined that the relid belongs to our database or is a shared
2727 * relation.)
2728 *
2729 * We used to skip local relations, on the grounds that they could
2730 * not be targets of cross-backend SI update messages; but it seems
2731 * safer to process them, so that our *own* SI update messages will
2732 * have the same effects during CommandCounterIncrement for both
2733 * local and nonlocal relations.
2734 */
2735void
2736RelationCacheInvalidateEntry(Oid relationId)
2737{
2738 Relation relation;
2739
2740 RelationIdCacheLookup(relationId, relation);
2741
2742 if (PointerIsValid(relation))
2743 {
2744 relcacheInvalsReceived++;
2745 RelationFlushRelation(relation);
2746 }
2747}
2748
2749/*
2750 * RelationCacheInvalidate
2751 * Blow away cached relation descriptors that have zero reference counts,
2752 * and rebuild those with positive reference counts. Also reset the smgr
2753 * relation cache and re-read relation mapping data.
2754 *
2755 * This is currently used only to recover from SI message buffer overflow,
2756 * so we do not touch new-in-transaction relations; they cannot be targets
2757 * of cross-backend SI updates (and our own updates now go through a
2758 * separate linked list that isn't limited by the SI message buffer size).
2759 * Likewise, we need not discard new-relfilenode-in-transaction hints,
2760 * since any invalidation of those would be a local event.
2761 *
2762 * We do this in two phases: the first pass deletes deletable items, and
2763 * the second one rebuilds the rebuildable items. This is essential for
2764 * safety, because hash_seq_search only copes with concurrent deletion of
2765 * the element it is currently visiting. If a second SI overflow were to
2766 * occur while we are walking the table, resulting in recursive entry to
2767 * this routine, we could crash because the inner invocation blows away
2768 * the entry next to be visited by the outer scan. But this way is OK,
2769 * because (a) during the first pass we won't process any more SI messages,
2770 * so hash_seq_search will complete safely; (b) during the second pass we
2771 * only hold onto pointers to nondeletable entries.
2772 *
2773 * The two-phase approach also makes it easy to update relfilenodes for
2774 * mapped relations before we do anything else, and to ensure that the
2775 * second pass processes nailed-in-cache items before other nondeletable
2776 * items. This should ensure that system catalogs are up to date before
2777 * we attempt to use them to reload information about other open relations.
2778 */
2779void
2780RelationCacheInvalidate(void)
2781{
2782 HASH_SEQ_STATUS status;
2783 RelIdCacheEnt *idhentry;
2784 Relation relation;
2785 List *rebuildFirstList = NIL;
2786 List *rebuildList = NIL;
2787 ListCell *l;
2788
2789 /*
2790 * Reload relation mapping data before starting to reconstruct cache.
2791 */
2792 RelationMapInvalidateAll();
2793
2794 /* Phase 1 */
2795 hash_seq_init(&status, RelationIdCache);
2796
2797 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2798 {
2799 relation = idhentry->reldesc;
2800
2801 /* Must close all smgr references to avoid leaving dangling ptrs */
2802 RelationCloseSmgr(relation);
2803
2804 /*
2805 * Ignore new relations; no other backend will manipulate them before
2806 * we commit. Likewise, before replacing a relation's relfilenode, we
2807 * shall have acquired AccessExclusiveLock and drained any applicable
2808 * pending invalidations.
2809 */
2810 if (relation->rd_createSubid != InvalidSubTransactionId ||
2811 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2812 continue;
2813
2814 relcacheInvalsReceived++;
2815
2816 if (RelationHasReferenceCountZero(relation))
2817 {
2818 /* Delete this entry immediately */
2819 Assert(!relation->rd_isnailed);
2820 RelationClearRelation(relation, false);
2821 }
2822 else
2823 {
2824 /*
2825 * If it's a mapped relation, immediately update its rd_node in
2826 * case its relfilenode changed. We must do this during phase 1
2827 * in case the relation is consulted during rebuild of other
2828 * relcache entries in phase 2. It's safe since consulting the
2829 * map doesn't involve any access to relcache entries.
2830 */
2831 if (RelationIsMapped(relation))
2832 RelationInitPhysicalAddr(relation);
2833
2834 /*
2835 * Add this entry to list of stuff to rebuild in second pass.
2836 * pg_class goes to the front of rebuildFirstList while
2837 * pg_class_oid_index goes to the back of rebuildFirstList, so
2838 * they are done first and second respectively. Other nailed
2839 * relations go to the front of rebuildList, so they'll be done
2840 * next in no particular order; and everything else goes to the
2841 * back of rebuildList.
2842 */
2843 if (RelationGetRelid(relation) == RelationRelationId)
2844 rebuildFirstList = lcons(relation, rebuildFirstList);
2845 else if (RelationGetRelid(relation) == ClassOidIndexId)
2846 rebuildFirstList = lappend(rebuildFirstList, relation);
2847 else if (relation->rd_isnailed)
2848 rebuildList = lcons(relation, rebuildList);
2849 else
2850 rebuildList = lappend(rebuildList, relation);
2851 }
2852 }
2853
2854 /*
2855 * Now zap any remaining smgr cache entries. This must happen before we
2856 * start to rebuild entries, since that may involve catalog fetches which
2857 * will re-open catalog files.
2858 */
2859 smgrcloseall();
2860
2861 /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2862 foreach(l, rebuildFirstList)
2863 {
2864 relation = (Relation) lfirst(l);
2865 RelationClearRelation(relation, true);
2866 }
2867 list_free(rebuildFirstList);
2868 foreach(l, rebuildList)
2869 {
2870 relation = (Relation) lfirst(l);
2871 RelationClearRelation(relation, true);
2872 }
2873 list_free(rebuildList);
2874}
2875
2876/*
2877 * RelationCloseSmgrByOid - close a relcache entry's smgr link
2878 *
2879 * Needed in some cases where we are changing a relation's physical mapping.
2880 * The link will be automatically reopened on next use.
2881 */
2882void
2883RelationCloseSmgrByOid(Oid relationId)
2884{
2885 Relation relation;
2886
2887 RelationIdCacheLookup(relationId, relation);
2888
2889 if (!PointerIsValid(relation))
2890 return; /* not in cache, nothing to do */
2891
2892 RelationCloseSmgr(relation);
2893}
2894
2895static void
2896RememberToFreeTupleDescAtEOX(TupleDesc td)
2897{
2898 if (EOXactTupleDescArray == NULL)
2899 {
2900 MemoryContext oldcxt;
2901
2902 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2903
2904 EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2905 EOXactTupleDescArrayLen = 16;
2906 NextEOXactTupleDescNum = 0;
2907 MemoryContextSwitchTo(oldcxt);
2908 }
2909 else if (NextEOXactTupleDescNum >= EOXactTupleDescArrayLen)
2910 {
2911 int32 newlen = EOXactTupleDescArrayLen * 2;
2912
2913 Assert(EOXactTupleDescArrayLen > 0);
2914
2915 EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2916 newlen * sizeof(TupleDesc));
2917 EOXactTupleDescArrayLen = newlen;
2918 }
2919
2920 EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2921}
2922
2923/*
2924 * AtEOXact_RelationCache
2925 *
2926 * Clean up the relcache at main-transaction commit or abort.
2927 *
2928 * Note: this must be called *before* processing invalidation messages.
2929 * In the case of abort, we don't want to try to rebuild any invalidated
2930 * cache entries (since we can't safely do database accesses). Therefore
2931 * we must reset refcnts before handling pending invalidations.
2932 *
2933 * As of PostgreSQL 8.1, relcache refcnts should get released by the
2934 * ResourceOwner mechanism. This routine just does a debugging
2935 * cross-check that no pins remain. However, we also need to do special
2936 * cleanup when the current transaction created any relations or made use
2937 * of forced index lists.
2938 */
2939void
2940AtEOXact_RelationCache(bool isCommit)
2941{
2942 HASH_SEQ_STATUS status;
2943 RelIdCacheEnt *idhentry;
2944 int i;
2945
2946 /*
2947 * Unless the eoxact_list[] overflowed, we only need to examine the rels
2948 * listed in it. Otherwise fall back on a hash_seq_search scan.
2949 *
2950 * For simplicity, eoxact_list[] entries are not deleted till end of
2951 * top-level transaction, even though we could remove them at
2952 * subtransaction end in some cases, or remove relations from the list if
2953 * they are cleared for other reasons. Therefore we should expect the
2954 * case that list entries are not found in the hashtable; if not, there's
2955 * nothing to do for them.
2956 */
2957 if (eoxact_list_overflowed)
2958 {
2959 hash_seq_init(&status, RelationIdCache);
2960 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2961 {
2962 AtEOXact_cleanup(idhentry->reldesc, isCommit);
2963 }
2964 }
2965 else
2966 {
2967 for (i = 0; i < eoxact_list_len; i++)
2968 {
2969 idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2970 (void *) &eoxact_list[i],
2971 HASH_FIND,
2972 NULL);
2973 if (idhentry != NULL)
2974 AtEOXact_cleanup(idhentry->reldesc, isCommit);
2975 }
2976 }
2977
2978 if (EOXactTupleDescArrayLen > 0)
2979 {
2980 Assert(EOXactTupleDescArray != NULL);
2981 for (i = 0; i < NextEOXactTupleDescNum; i++)
2982 FreeTupleDesc(EOXactTupleDescArray[i]);
2983 pfree(EOXactTupleDescArray);
2984 EOXactTupleDescArray = NULL;
2985 }
2986
2987 /* Now we're out of the transaction and can clear the lists */
2988 eoxact_list_len = 0;
2989 eoxact_list_overflowed = false;
2990 NextEOXactTupleDescNum = 0;
2991 EOXactTupleDescArrayLen = 0;
2992}
2993
2994/*
2995 * AtEOXact_cleanup
2996 *
2997 * Clean up a single rel at main-transaction commit or abort
2998 *
2999 * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3000 * bother to prevent duplicate entries in eoxact_list[].
3001 */
3002static void
3003AtEOXact_cleanup(Relation relation, bool isCommit)
3004{
3005 /*
3006 * The relcache entry's ref count should be back to its normal
3007 * not-in-a-transaction state: 0 unless it's nailed in cache.
3008 *
3009 * In bootstrap mode, this is NOT true, so don't check it --- the
3010 * bootstrap code expects relations to stay open across start/commit
3011 * transaction calls. (That seems bogus, but it's not worth fixing.)
3012 *
3013 * Note: ideally this check would be applied to every relcache entry, not
3014 * just those that have eoxact work to do. But it's not worth forcing a
3015 * scan of the whole relcache just for this. (Moreover, doing so would
3016 * mean that assert-enabled testing never tests the hash_search code path
3017 * above, which seems a bad idea.)
3018 */
3019#ifdef USE_ASSERT_CHECKING
3020 if (!IsBootstrapProcessingMode())
3021 {
3022 int expected_refcnt;
3023
3024 expected_refcnt = relation->rd_isnailed ? 1 : 0;
3025 Assert(relation->rd_refcnt == expected_refcnt);
3026 }
3027#endif
3028
3029 /*
3030 * Is it a relation created in the current transaction?
3031 *
3032 * During commit, reset the flag to zero, since we are now out of the
3033 * creating transaction. During abort, simply delete the relcache entry
3034 * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3035 * new-ness of a new relation due to a forced cache flush, the entry will
3036 * get deleted anyway by shared-cache-inval processing of the aborted
3037 * pg_class insertion.)
3038 */
3039 if (relation->rd_createSubid != InvalidSubTransactionId)
3040 {
3041 if (isCommit)
3042 relation->rd_createSubid = InvalidSubTransactionId;
3043 else if (RelationHasReferenceCountZero(relation))
3044 {
3045 RelationClearRelation(relation, false);
3046 return;
3047 }
3048 else
3049 {
3050 /*
3051 * Hmm, somewhere there's a (leaked?) reference to the relation.
3052 * We daren't remove the entry for fear of dereferencing a
3053 * dangling pointer later. Bleat, and mark it as not belonging to
3054 * the current transaction. Hopefully it'll get cleaned up
3055 * eventually. This must be just a WARNING to avoid
3056 * error-during-error-recovery loops.
3057 */
3058 relation->rd_createSubid = InvalidSubTransactionId;
3059 elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3060 RelationGetRelationName(relation));
3061 }
3062 }
3063
3064 /*
3065 * Likewise, reset the hint about the relfilenode being new.
3066 */
3067 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3068}
3069
3070/*
3071 * AtEOSubXact_RelationCache
3072 *
3073 * Clean up the relcache at sub-transaction commit or abort.
3074 *
3075 * Note: this must be called *before* processing invalidation messages.
3076 */
3077void
3078AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
3079 SubTransactionId parentSubid)
3080{
3081 HASH_SEQ_STATUS status;
3082 RelIdCacheEnt *idhentry;
3083 int i;
3084
3085 /*
3086 * Unless the eoxact_list[] overflowed, we only need to examine the rels
3087 * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3088 * logic as in AtEOXact_RelationCache.
3089 */
3090 if (eoxact_list_overflowed)
3091 {
3092 hash_seq_init(&status, RelationIdCache);
3093 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3094 {
3095 AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3096 mySubid, parentSubid);
3097 }
3098 }
3099 else
3100 {
3101 for (i = 0; i < eoxact_list_len; i++)
3102 {
3103 idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3104 (void *) &eoxact_list[i],
3105 HASH_FIND,
3106 NULL);
3107 if (idhentry != NULL)
3108 AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3109 mySubid, parentSubid);
3110 }
3111 }
3112
3113 /* Don't reset the list; we still need more cleanup later */
3114}
3115
3116/*
3117 * AtEOSubXact_cleanup
3118 *
3119 * Clean up a single rel at subtransaction commit or abort
3120 *
3121 * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3122 * bother to prevent duplicate entries in eoxact_list[].
3123 */
3124static void
3125AtEOSubXact_cleanup(Relation relation, bool isCommit,
3126 SubTransactionId mySubid, SubTransactionId parentSubid)
3127{
3128 /*
3129 * Is it a relation created in the current subtransaction?
3130 *
3131 * During subcommit, mark it as belonging to the parent, instead. During
3132 * subabort, simply delete the relcache entry.
3133 */
3134 if (relation->rd_createSubid == mySubid)
3135 {
3136 if (isCommit)
3137 relation->rd_createSubid = parentSubid;
3138 else if (RelationHasReferenceCountZero(relation))
3139 {
3140 RelationClearRelation(relation, false);
3141 return;
3142 }
3143 else
3144 {
3145 /*
3146 * Hmm, somewhere there's a (leaked?) reference to the relation.
3147 * We daren't remove the entry for fear of dereferencing a
3148 * dangling pointer later. Bleat, and transfer it to the parent
3149 * subtransaction so we can try again later. This must be just a
3150 * WARNING to avoid error-during-error-recovery loops.
3151 */
3152 relation->rd_createSubid = parentSubid;
3153 elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3154 RelationGetRelationName(relation));
3155 }
3156 }
3157
3158 /*
3159 * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3160 */
3161 if (relation->rd_newRelfilenodeSubid == mySubid)
3162 {
3163 if (isCommit)
3164 relation->rd_newRelfilenodeSubid = parentSubid;
3165 else
3166 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3167 }
3168}
3169
3170
3171/*
3172 * RelationBuildLocalRelation
3173 * Build a relcache entry for an about-to-be-created relation,
3174 * and enter it into the relcache.
3175 */
3176Relation
3177RelationBuildLocalRelation(const char *relname,
3178 Oid relnamespace,
3179 TupleDesc tupDesc,
3180 Oid relid,
3181 Oid accessmtd,
3182 Oid relfilenode,
3183 Oid reltablespace,
3184 bool shared_relation,
3185 bool mapped_relation,
3186 char relpersistence,
3187 char relkind)
3188{
3189 Relation rel;
3190 MemoryContext oldcxt;
3191 int natts = tupDesc->natts;
3192 int i;
3193 bool has_not_null;
3194 bool nailit;
3195
3196 AssertArg(natts >= 0);
3197
3198 /*
3199 * check for creation of a rel that must be nailed in cache.
3200 *
3201 * XXX this list had better match the relations specially handled in
3202 * RelationCacheInitializePhase2/3.
3203 */
3204 switch (relid)
3205 {
3206 case DatabaseRelationId:
3207 case AuthIdRelationId:
3208 case AuthMemRelationId:
3209 case RelationRelationId:
3210 case AttributeRelationId:
3211 case ProcedureRelationId:
3212 case TypeRelationId:
3213 nailit = true;
3214 break;
3215 default:
3216 nailit = false;
3217 break;
3218 }
3219
3220 /*
3221 * check that hardwired list of shared rels matches what's in the
3222 * bootstrap .bki file. If you get a failure here during initdb, you
3223 * probably need to fix IsSharedRelation() to match whatever you've done
3224 * to the set of shared relations.
3225 */
3226 if (shared_relation != IsSharedRelation(relid))
3227 elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3228 relname, relid);
3229
3230 /* Shared relations had better be mapped, too */
3231 Assert(mapped_relation || !shared_relation);
3232
3233 /*
3234 * switch to the cache context to create the relcache entry.
3235 */
3236 if (!CacheMemoryContext)
3237 CreateCacheMemoryContext();
3238
3239 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3240
3241 /*
3242 * allocate a new relation descriptor and fill in basic state fields.
3243 */
3244 rel = (Relation) palloc0(sizeof(RelationData));
3245
3246 /* make sure relation is marked as having no open file yet */
3247 rel->rd_smgr = NULL;
3248
3249 /* mark it nailed if appropriate */
3250 rel->rd_isnailed = nailit;
3251
3252 rel->rd_refcnt = nailit ? 1 : 0;
3253
3254 /* it's being created in this transaction */
3255 rel->rd_createSubid = GetCurrentSubTransactionId();
3256 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3257
3258 /*
3259 * create a new tuple descriptor from the one passed in. We do this
3260 * partly to copy it into the cache context, and partly because the new
3261 * relation can't have any defaults or constraints yet; they have to be
3262 * added in later steps, because they require additions to multiple system
3263 * catalogs. We can copy attnotnull constraints here, however.
3264 */
3265 rel->rd_att = CreateTupleDescCopy(tupDesc);
3266 rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3267 has_not_null = false;
3268 for (i = 0; i < natts; i++)
3269 {
3270 Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3271 Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3272
3273 datt->attidentity = satt->attidentity;
3274 datt->attgenerated = satt->attgenerated;
3275 datt->attnotnull = satt->attnotnull;
3276 has_not_null |= satt->attnotnull;
3277 }
3278
3279 if (has_not_null)
3280 {
3281 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3282
3283 constr->has_not_null = true;
3284 rel->rd_att->constr = constr;
3285 }
3286
3287 /*
3288 * initialize relation tuple form (caller may add/override data later)
3289 */
3290 rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
3291
3292 namestrcpy(&rel->rd_rel->relname, relname);
3293 rel->rd_rel->relnamespace = relnamespace;
3294
3295 rel->rd_rel->relkind = relkind;
3296 rel->rd_rel->relnatts = natts;
3297 rel->rd_rel->reltype = InvalidOid;
3298 /* needed when bootstrapping: */
3299 rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3300
3301 /* set up persistence and relcache fields dependent on it */
3302 rel->rd_rel->relpersistence = relpersistence;
3303 switch (relpersistence)
3304 {
3305 case RELPERSISTENCE_UNLOGGED:
3306 case RELPERSISTENCE_PERMANENT:
3307 rel->rd_backend = InvalidBackendId;
3308 rel->rd_islocaltemp = false;
3309 break;
3310 case RELPERSISTENCE_TEMP:
3311 Assert(isTempOrTempToastNamespace(relnamespace));
3312 rel->rd_backend = BackendIdForTempRelations();
3313 rel->rd_islocaltemp = true;
3314 break;
3315 default:
3316 elog(ERROR, "invalid relpersistence: %c", relpersistence);
3317 break;
3318 }
3319
3320 /* if it's a materialized view, it's not populated initially */
3321 if (relkind == RELKIND_MATVIEW)
3322 rel->rd_rel->relispopulated = false;
3323 else
3324 rel->rd_rel->relispopulated = true;
3325
3326 /* set replica identity -- system catalogs and non-tables don't have one */
3327 if (!IsCatalogNamespace(relnamespace) &&
3328 (relkind == RELKIND_RELATION ||
3329 relkind == RELKIND_MATVIEW ||
3330 relkind == RELKIND_PARTITIONED_TABLE))
3331 rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3332 else
3333 rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3334
3335 /*
3336 * Insert relation physical and logical identifiers (OIDs) into the right
3337 * places. For a mapped relation, we set relfilenode to zero and rely on
3338 * RelationInitPhysicalAddr to consult the map.
3339 */
3340 rel->rd_rel->relisshared = shared_relation;
3341
3342 RelationGetRelid(rel) = relid;
3343
3344 for (i = 0; i < natts; i++)
3345 TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3346
3347 rel->rd_rel->reltablespace = reltablespace;
3348
3349 if (mapped_relation)
3350 {
3351 rel->rd_rel->relfilenode = InvalidOid;
3352 /* Add it to the active mapping information */
3353 RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3354 }
3355 else
3356 rel->rd_rel->relfilenode = relfilenode;
3357
3358 RelationInitLockInfo(rel); /* see lmgr.c */
3359
3360 RelationInitPhysicalAddr(rel);
3361
3362 rel->rd_rel->relam = accessmtd;
3363
3364 if (relkind == RELKIND_RELATION ||
3365 relkind == RELKIND_SEQUENCE ||
3366 relkind == RELKIND_TOASTVALUE ||
3367 relkind == RELKIND_MATVIEW)
3368 RelationInitTableAccessMethod(rel);
3369
3370 /*
3371 * Okay to insert into the relcache hash table.
3372 *
3373 * Ordinarily, there should certainly not be an existing hash entry for
3374 * the same OID; but during bootstrap, when we create a "real" relcache
3375 * entry for one of the bootstrap relations, we'll be overwriting the
3376 * phony one created with formrdesc. So allow that to happen for nailed
3377 * rels.
3378 */
3379 RelationCacheInsert(rel, nailit);
3380
3381 /*
3382 * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3383 * can't do this before storing relid in it.
3384 */
3385 EOXactListAdd(rel);
3386
3387 /*
3388 * done building relcache entry.
3389 */
3390 MemoryContextSwitchTo(oldcxt);
3391
3392 /* It's fully valid */
3393 rel->rd_isvalid = true;
3394
3395 /*
3396 * Caller expects us to pin the returned entry.
3397 */
3398 RelationIncrementReferenceCount(rel);
3399
3400 return rel;
3401}
3402
3403
3404/*
3405 * RelationSetNewRelfilenode
3406 *
3407 * Assign a new relfilenode (physical file name), and possibly a new
3408 * persistence setting, to the relation.
3409 *
3410 * This allows a full rewrite of the relation to be done with transactional
3411 * safety (since the filenode assignment can be rolled back). Note however
3412 * that there is no simple way to access the relation's old data for the
3413 * remainder of the current transaction. This limits the usefulness to cases
3414 * such as TRUNCATE or rebuilding an index from scratch.
3415 *
3416 * Caller must already hold exclusive lock on the relation.
3417 */
3418void
3419RelationSetNewRelfilenode(Relation relation, char persistence)
3420{
3421 Oid newrelfilenode;
3422 Relation pg_class;
3423 HeapTuple tuple;
3424 Form_pg_class classform;
3425 MultiXactId minmulti = InvalidMultiXactId;
3426 TransactionId freezeXid = InvalidTransactionId;
3427 RelFileNode newrnode;
3428
3429 /* Allocate a new relfilenode */
3430 newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3431 persistence);
3432
3433 /*
3434 * Get a writable copy of the pg_class tuple for the given relation.
3435 */
3436 pg_class = table_open(RelationRelationId, RowExclusiveLock);
3437
3438 tuple = SearchSysCacheCopy1(RELOID,
3439 ObjectIdGetDatum(RelationGetRelid(relation)));
3440 if (!HeapTupleIsValid(tuple))
3441 elog(ERROR, "could not find tuple for relation %u",
3442 RelationGetRelid(relation));
3443 classform = (Form_pg_class) GETSTRUCT(tuple);
3444
3445 /*
3446 * Schedule unlinking of the old storage at transaction commit.
3447 */
3448 RelationDropStorage(relation);
3449
3450 /*
3451 * Create storage for the main fork of the new relfilenode. If it's a
3452 * table-like object, call into the table AM to do so, which'll also
3453 * create the table's init fork if needed.
3454 *
3455 * NOTE: If relevant for the AM, any conflict in relfilenode value will be
3456 * caught here, if GetNewRelFileNode messes up for any reason.
3457 */
3458 newrnode = relation->rd_node;
3459 newrnode.relNode = newrelfilenode;
3460
3461 switch (relation->rd_rel->relkind)
3462 {
3463 case RELKIND_INDEX:
3464 case RELKIND_SEQUENCE:
3465 {
3466 /* handle these directly, at least for now */
3467 SMgrRelation srel;
3468
3469 srel = RelationCreateStorage(newrnode, persistence);
3470 smgrclose(srel);
3471 }
3472 break;
3473
3474 case RELKIND_RELATION:
3475 case RELKIND_TOASTVALUE:
3476 case RELKIND_MATVIEW:
3477 table_relation_set_new_filenode(relation, &newrnode,
3478 persistence,
3479 &freezeXid, &minmulti);
3480 break;
3481
3482 default:
3483 /* we shouldn't be called for anything else */
3484 elog(ERROR, "relation \"%s\" does not have storage",
3485 RelationGetRelationName(relation));
3486 break;
3487 }
3488
3489 /*
3490 * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3491 * change; instead we have to send the update to the relation mapper.
3492 *
3493 * For mapped indexes, we don't actually change the pg_class entry at all;
3494 * this is essential when reindexing pg_class itself. That leaves us with
3495 * possibly-inaccurate values of relpages etc, but those will be fixed up
3496 * later.
3497 */
3498 if (RelationIsMapped(relation))
3499 {
3500 /* This case is only supported for indexes */
3501 Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3502
3503 /* Since we're not updating pg_class, these had better not change */
3504 Assert(classform->relfrozenxid == freezeXid);
3505 Assert(classform->relminmxid == minmulti);
3506 Assert(classform->relpersistence == persistence);
3507
3508 /*
3509 * In some code paths it's possible that the tuple update we'd
3510 * otherwise do here is the only thing that would assign an XID for
3511 * the current transaction. However, we must have an XID to delete
3512 * files, so make sure one is assigned.
3513 */
3514 (void) GetCurrentTransactionId();
3515
3516 /* Do the deed */
3517 RelationMapUpdateMap(RelationGetRelid(relation),
3518 newrelfilenode,
3519 relation->rd_rel->relisshared,
3520 false);
3521
3522 /* Since we're not updating pg_class, must trigger inval manually */
3523 CacheInvalidateRelcache(relation);
3524 }
3525 else
3526 {
3527 /* Normal case, update the pg_class entry */
3528 classform->relfilenode = newrelfilenode;
3529
3530 /* relpages etc. never change for sequences */
3531 if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3532 {
3533 classform->relpages = 0; /* it's empty until further notice */
3534 classform->reltuples = 0;
3535 classform->relallvisible = 0;
3536 }
3537 classform->relfrozenxid = freezeXid;
3538 classform->relminmxid = minmulti;
3539 classform->relpersistence = persistence;
3540
3541 CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3542 }
3543
3544 heap_freetuple(tuple);
3545
3546 table_close(pg_class, RowExclusiveLock);
3547
3548 /*
3549 * Make the pg_class row change or relation map change visible. This will
3550 * cause the relcache entry to get updated, too.
3551 */
3552 CommandCounterIncrement();
3553
3554 /*
3555 * Mark the rel as having been given a new relfilenode in the current
3556 * (sub) transaction. This is a hint that can be used to optimize later
3557 * operations on the rel in the same transaction.
3558 */
3559 relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
3560
3561 /* Flag relation as needing eoxact cleanup (to remove the hint) */
3562 EOXactListAdd(relation);
3563}
3564
3565
3566/*
3567 * RelationCacheInitialize
3568 *
3569 * This initializes the relation descriptor cache. At the time
3570 * that this is invoked, we can't do database access yet (mainly
3571 * because the transaction subsystem is not up); all we are doing
3572 * is making an empty cache hashtable. This must be done before
3573 * starting the initialization transaction, because otherwise
3574 * AtEOXact_RelationCache would crash if that transaction aborts
3575 * before we can get the relcache set up.
3576 */
3577
3578#define INITRELCACHESIZE 400
3579
3580void
3581RelationCacheInitialize(void)
3582{
3583 HASHCTL ctl;
3584
3585 /*
3586 * make sure cache memory context exists
3587 */
3588 if (!CacheMemoryContext)
3589 CreateCacheMemoryContext();
3590
3591 /*
3592 * create hashtable that indexes the relcache
3593 */
3594 MemSet(&ctl, 0, sizeof(ctl));
3595 ctl.keysize = sizeof(Oid);
3596 ctl.entrysize = sizeof(RelIdCacheEnt);
3597 RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3598 &ctl, HASH_ELEM | HASH_BLOBS);
3599
3600 /*
3601 * relation mapper needs to be initialized too
3602 */
3603 RelationMapInitialize();
3604}
3605
3606/*
3607 * RelationCacheInitializePhase2
3608 *
3609 * This is called to prepare for access to shared catalogs during startup.
3610 * We must at least set up nailed reldescs for pg_database, pg_authid,
3611 * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3612 * for their indexes, too. We attempt to load this information from the
3613 * shared relcache init file. If that's missing or broken, just make
3614 * phony entries for the catalogs themselves.
3615 * RelationCacheInitializePhase3 will clean up as needed.
3616 */
3617void
3618RelationCacheInitializePhase2(void)
3619{
3620 MemoryContext oldcxt;
3621
3622 /*
3623 * relation mapper needs initialized too
3624 */
3625 RelationMapInitializePhase2();
3626
3627 /*
3628 * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3629 * nothing.
3630 */
3631 if (IsBootstrapProcessingMode())
3632 return;
3633
3634 /*
3635 * switch to cache memory context
3636 */
3637 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3638
3639 /*
3640 * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3641 * the cache with pre-made descriptors for the critical shared catalogs.
3642 */
3643 if (!load_relcache_init_file(true))
3644 {
3645 formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3646 Natts_pg_database, Desc_pg_database);
3647 formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3648 Natts_pg_authid, Desc_pg_authid);
3649 formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3650 Natts_pg_auth_members, Desc_pg_auth_members);
3651 formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3652 Natts_pg_shseclabel, Desc_pg_shseclabel);
3653 formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3654 Natts_pg_subscription, Desc_pg_subscription);
3655
3656#define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3657 }
3658
3659 MemoryContextSwitchTo(oldcxt);
3660}
3661
3662/*
3663 * RelationCacheInitializePhase3
3664 *
3665 * This is called as soon as the catcache and transaction system
3666 * are functional and we have determined MyDatabaseId. At this point
3667 * we can actually read data from the database's system catalogs.
3668 * We first try to read pre-computed relcache entries from the local
3669 * relcache init file. If that's missing or broken, make phony entries
3670 * for the minimum set of nailed-in-cache relations. Then (unless
3671 * bootstrapping) make sure we have entries for the critical system
3672 * indexes. Once we've done all this, we have enough infrastructure to
3673 * open any system catalog or use any catcache. The last step is to
3674 * rewrite the cache files if needed.
3675 */
3676void
3677RelationCacheInitializePhase3(void)
3678{
3679 HASH_SEQ_STATUS status;
3680 RelIdCacheEnt *idhentry;
3681 MemoryContext oldcxt;
3682 bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3683
3684 /*
3685 * relation mapper needs initialized too
3686 */
3687 RelationMapInitializePhase3();
3688
3689 /*
3690 * switch to cache memory context
3691 */
3692 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3693
3694 /*
3695 * Try to load the local relcache cache file. If unsuccessful, bootstrap
3696 * the cache with pre-made descriptors for the critical "nailed-in" system
3697 * catalogs.
3698 */
3699 if (IsBootstrapProcessingMode() ||
3700 !load_relcache_init_file(false))
3701 {
3702 needNewCacheFile = true;
3703
3704 formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3705 Natts_pg_class, Desc_pg_class);
3706 formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3707 Natts_pg_attribute, Desc_pg_attribute);
3708 formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3709 Natts_pg_proc, Desc_pg_proc);
3710 formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3711 Natts_pg_type, Desc_pg_type);
3712
3713#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3714 }
3715
3716 MemoryContextSwitchTo(oldcxt);
3717
3718 /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3719 if (IsBootstrapProcessingMode())
3720 return;
3721
3722 /*
3723 * If we didn't get the critical system indexes loaded into relcache, do
3724 * so now. These are critical because the catcache and/or opclass cache
3725 * depend on them for fetches done during relcache load. Thus, we have an
3726 * infinite-recursion problem. We can break the recursion by doing
3727 * heapscans instead of indexscans at certain key spots. To avoid hobbling
3728 * performance, we only want to do that until we have the critical indexes
3729 * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3730 * decide whether to do heapscan or indexscan at the key spots, and we set
3731 * it true after we've loaded the critical indexes.
3732 *
3733 * The critical indexes are marked as "nailed in cache", partly to make it
3734 * easy for load_relcache_init_file to count them, but mainly because we
3735 * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3736 * true. (NOTE: perhaps it would be possible to reload them by
3737 * temporarily setting criticalRelcachesBuilt to false again. For now,
3738 * though, we just nail 'em in.)
3739 *
3740 * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3741 * in the same way as the others, because the critical catalogs don't
3742 * (currently) have any rules or triggers, and so these indexes can be
3743 * rebuilt without inducing recursion. However they are used during
3744 * relcache load when a rel does have rules or triggers, so we choose to
3745 * nail them for performance reasons.
3746 */
3747 if (!criticalRelcachesBuilt)
3748 {
3749 load_critical_index(ClassOidIndexId,
3750 RelationRelationId);
3751 load_critical_index(AttributeRelidNumIndexId,
3752 AttributeRelationId);
3753 load_critical_index(IndexRelidIndexId,
3754 IndexRelationId);
3755 load_critical_index(OpclassOidIndexId,
3756 OperatorClassRelationId);
3757 load_critical_index(AccessMethodProcedureIndexId,
3758 AccessMethodProcedureRelationId);
3759 load_critical_index(RewriteRelRulenameIndexId,
3760 RewriteRelationId);
3761 load_critical_index(TriggerRelidNameIndexId,
3762 TriggerRelationId);
3763
3764#define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3765
3766 criticalRelcachesBuilt = true;
3767 }
3768
3769 /*
3770 * Process critical shared indexes too.
3771 *
3772 * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3773 * initial lookup of MyDatabaseId, without which we'll never find any
3774 * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3775 * database OID, so it instead depends on DatabaseOidIndexId. We also
3776 * need to nail up some indexes on pg_authid and pg_auth_members for use
3777 * during client authentication. SharedSecLabelObjectIndexId isn't
3778 * critical for the core system, but authentication hooks might be
3779 * interested in it.
3780 */
3781 if (!criticalSharedRelcachesBuilt)
3782 {
3783 load_critical_index(DatabaseNameIndexId,
3784 DatabaseRelationId);
3785 load_critical_index(DatabaseOidIndexId,
3786 DatabaseRelationId);
3787 load_critical_index(AuthIdRolnameIndexId,
3788 AuthIdRelationId);
3789 load_critical_index(AuthIdOidIndexId,
3790 AuthIdRelationId);
3791 load_critical_index(AuthMemMemRoleIndexId,
3792 AuthMemRelationId);
3793 load_critical_index(SharedSecLabelObjectIndexId,
3794 SharedSecLabelRelationId);
3795
3796#define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3797
3798 criticalSharedRelcachesBuilt = true;
3799 }
3800
3801 /*
3802 * Now, scan all the relcache entries and update anything that might be
3803 * wrong in the results from formrdesc or the relcache cache file. If we
3804 * faked up relcache entries using formrdesc, then read the real pg_class
3805 * rows and replace the fake entries with them. Also, if any of the
3806 * relcache entries have rules, triggers, or security policies, load that
3807 * info the hard way since it isn't recorded in the cache file.
3808 *
3809 * Whenever we access the catalogs to read data, there is a possibility of
3810 * a shared-inval cache flush causing relcache entries to be removed.
3811 * Since hash_seq_search only guarantees to still work after the *current*
3812 * entry is removed, it's unsafe to continue the hashtable scan afterward.
3813 * We handle this by restarting the scan from scratch after each access.
3814 * This is theoretically O(N^2), but the number of entries that actually
3815 * need to be fixed is small enough that it doesn't matter.
3816 */
3817 hash_seq_init(&status, RelationIdCache);
3818
3819 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3820 {
3821 Relation relation = idhentry->reldesc;
3822 bool restart = false;
3823
3824 /*
3825 * Make sure *this* entry doesn't get flushed while we work with it.
3826 */
3827 RelationIncrementReferenceCount(relation);
3828
3829 /*
3830 * If it's a faked-up entry, read the real pg_class tuple.
3831 */
3832 if (relation->rd_rel->relowner == InvalidOid)
3833 {
3834 HeapTuple htup;
3835 Form_pg_class relp;
3836
3837 htup = SearchSysCache1(RELOID,
3838 ObjectIdGetDatum(RelationGetRelid(relation)));
3839 if (!HeapTupleIsValid(htup))
3840 elog(FATAL, "cache lookup failed for relation %u",
3841 RelationGetRelid(relation));
3842 relp = (Form_pg_class) GETSTRUCT(htup);
3843
3844 /*
3845 * Copy tuple to relation->rd_rel. (See notes in
3846 * AllocateRelationDesc())
3847 */
3848 memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3849
3850 /* Update rd_options while we have the tuple */
3851 if (relation->rd_options)
3852 pfree(relation->rd_options);
3853 RelationParseRelOptions(relation, htup);
3854
3855 /*
3856 * Check the values in rd_att were set up correctly. (We cannot
3857 * just copy them over now: formrdesc must have set up the rd_att
3858 * data correctly to start with, because it may already have been
3859 * copied into one or more catcache entries.)
3860 */
3861 Assert(relation->rd_att->tdtypeid == relp->reltype);
3862 Assert(relation->rd_att->tdtypmod == -1);
3863
3864 ReleaseSysCache(htup);
3865
3866 /* relowner had better be OK now, else we'll loop forever */
3867 if (relation->rd_rel->relowner == InvalidOid)
3868 elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3869 RelationGetRelationName(relation));
3870
3871 restart = true;
3872 }
3873
3874 /*
3875 * Fix data that isn't saved in relcache cache file.
3876 *
3877 * relhasrules or relhastriggers could possibly be wrong or out of
3878 * date. If we don't actually find any rules or triggers, clear the
3879 * local copy of the flag so that we don't get into an infinite loop
3880 * here. We don't make any attempt to fix the pg_class entry, though.
3881 */
3882 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3883 {
3884 RelationBuildRuleLock(relation);
3885 if (relation->rd_rules == NULL)
3886 relation->rd_rel->relhasrules = false;
3887 restart = true;
3888 }
3889 if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3890 {
3891 RelationBuildTriggers(relation);
3892 if (relation->trigdesc == NULL)
3893 relation->rd_rel->relhastriggers = false;
3894 restart = true;
3895 }
3896
3897 /*
3898 * Re-load the row security policies if the relation has them, since
3899 * they are not preserved in the cache. Note that we can never NOT
3900 * have a policy while relrowsecurity is true,
3901 * RelationBuildRowSecurity will create a single default-deny policy
3902 * if there is no policy defined in pg_policy.
3903 */
3904 if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3905 {
3906 RelationBuildRowSecurity(relation);
3907
3908 Assert(relation->rd_rsdesc != NULL);
3909 restart = true;
3910 }
3911
3912 /*
3913 * Reload the partition key and descriptor for a partitioned table.
3914 */
3915 if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3916 relation->rd_partkey == NULL)
3917 {
3918 RelationBuildPartitionKey(relation);
3919 Assert(relation->rd_partkey != NULL);
3920
3921 restart = true;
3922 }
3923
3924 if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3925 relation->rd_partdesc == NULL)
3926 {
3927 RelationBuildPartitionDesc(relation);
3928 Assert(relation->rd_partdesc != NULL);
3929
3930 restart = true;
3931 }
3932
3933 if (relation->rd_tableam == NULL &&
3934 (relation->rd_rel->relkind == RELKIND_RELATION ||
3935 relation->rd_rel->relkind == RELKIND_SEQUENCE ||
3936 relation->rd_rel->relkind == RELKIND_TOASTVALUE ||
3937 relation->rd_rel->relkind == RELKIND_MATVIEW))
3938 {
3939 RelationInitTableAccessMethod(relation);
3940 Assert(relation->rd_tableam != NULL);
3941
3942 restart = true;
3943 }
3944
3945 /* Release hold on the relation */
3946 RelationDecrementReferenceCount(relation);
3947
3948 /* Now, restart the hashtable scan if needed */
3949 if (restart)
3950 {
3951 hash_seq_term(&status);
3952 hash_seq_init(&status, RelationIdCache);
3953 }
3954 }
3955
3956 /*
3957 * Lastly, write out new relcache cache files if needed. We don't bother
3958 * to distinguish cases where only one of the two needs an update.
3959 */
3960 if (needNewCacheFile)
3961 {
3962 /*
3963 * Force all the catcaches to finish initializing and thereby open the
3964 * catalogs and indexes they use. This will preload the relcache with
3965 * entries for all the most important system catalogs and indexes, so
3966 * that the init files will be most useful for future backends.
3967 */
3968 InitCatalogCachePhase2();
3969
3970 /* now write the files */
3971 write_relcache_init_file(true);
3972 write_relcache_init_file(false);
3973 }
3974}
3975
3976/*
3977 * Load one critical system index into the relcache
3978 *
3979 * indexoid is the OID of the target index, heapoid is the OID of the catalog
3980 * it belongs to.
3981 */
3982static void
3983load_critical_index(Oid indexoid, Oid heapoid)
3984{
3985 Relation ird;
3986
3987 /*
3988 * We must lock the underlying catalog before locking the index to avoid
3989 * deadlock, since RelationBuildDesc might well need to read the catalog,
3990 * and if anyone else is exclusive-locking this catalog and index they'll
3991 * be doing it in that order.
3992 */
3993 LockRelationOid(heapoid, AccessShareLock);
3994 LockRelationOid(indexoid, AccessShareLock);
3995 ird = RelationBuildDesc(indexoid, true);
3996 if (ird == NULL)
3997 elog(PANIC, "could not open critical system index %u", indexoid);
3998 ird->rd_isnailed = true;
3999 ird->rd_refcnt = 1;
4000 UnlockRelationOid(indexoid, AccessShareLock);
4001 UnlockRelationOid(heapoid, AccessShareLock);
4002}
4003
4004/*
4005 * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4006 * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4007 *
4008 * We need this kluge because we have to be able to access non-fixed-width
4009 * fields of pg_class and pg_index before we have the standard catalog caches
4010 * available. We use predefined data that's set up in just the same way as
4011 * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4012 * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4013 * does it have a TupleConstr field. But it's good enough for the purpose of
4014 * extracting fields.
4015 */
4016static TupleDesc
4017BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
4018{
4019 TupleDesc result;
4020 MemoryContext oldcxt;
4021 int i;
4022
4023 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4024
4025 result = CreateTemplateTupleDesc(natts);
4026 result->tdtypeid = RECORDOID; /* not right, but we don't care */
4027 result->tdtypmod = -1;
4028
4029 for (i = 0; i < natts; i++)
4030 {
4031 memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4032 /* make sure attcacheoff is valid */
4033 TupleDescAttr(result, i)->attcacheoff = -1;
4034 }
4035
4036 /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4037 TupleDescAttr(result, 0)->attcacheoff = 0;
4038
4039 /* Note: we don't bother to set up a TupleConstr entry */
4040
4041 MemoryContextSwitchTo(oldcxt);
4042
4043 return result;
4044}
4045
4046static TupleDesc
4047GetPgClassDescriptor(void)
4048{
4049 static TupleDesc pgclassdesc = NULL;
4050
4051 /* Already done? */
4052 if (pgclassdesc == NULL)
4053 pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4054 Desc_pg_class);
4055
4056 return pgclassdesc;
4057}
4058
4059static TupleDesc
4060GetPgIndexDescriptor(void)
4061{
4062 static TupleDesc pgindexdesc = NULL;
4063
4064 /* Already done? */
4065 if (pgindexdesc == NULL)
4066 pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4067 Desc_pg_index);
4068
4069 return pgindexdesc;
4070}
4071
4072/*
4073 * Load any default attribute value definitions for the relation.
4074 */
4075static void
4076AttrDefaultFetch(Relation relation)
4077{
4078 AttrDefault *attrdef = relation->rd_att->constr->defval;
4079 int ndef = relation->rd_att->constr->num_defval;
4080 Relation adrel;
4081 SysScanDesc adscan;
4082 ScanKeyData skey;
4083 HeapTuple htup;
4084 Datum val;
4085 bool isnull;
4086 int found;
4087 int i;
4088
4089 ScanKeyInit(&skey,
4090 Anum_pg_attrdef_adrelid,
4091 BTEqualStrategyNumber, F_OIDEQ,
4092 ObjectIdGetDatum(RelationGetRelid(relation)));
4093
4094 adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4095 adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4096 NULL, 1, &skey);
4097 found = 0;
4098
4099 while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4100 {
4101 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4102 Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4103
4104 for (i = 0; i < ndef; i++)
4105 {
4106 if (adform->adnum != attrdef[i].adnum)
4107 continue;
4108 if (attrdef[i].adbin != NULL)
4109 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4110 NameStr(attr->attname),
4111 RelationGetRelationName(relation));
4112 else
4113 found++;
4114
4115 val = fastgetattr(htup,
4116 Anum_pg_attrdef_adbin,
4117 adrel->rd_att, &isnull);
4118 if (isnull)
4119 elog(WARNING, "null adbin for attr %s of rel %s",
4120 NameStr(attr->attname),
4121 RelationGetRelationName(relation));
4122 else
4123 {
4124 /* detoast and convert to cstring in caller's context */
4125 char *s = TextDatumGetCString(val);
4126
4127 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4128 pfree(s);
4129 }
4130 break;
4131 }
4132
4133 if (i >= ndef)
4134 elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4135 adform->adnum, RelationGetRelationName(relation));
4136 }
4137
4138 systable_endscan(adscan);
4139 table_close(adrel, AccessShareLock);
4140}
4141
4142/*
4143 * Load any check constraints for the relation.
4144 */
4145static void
4146CheckConstraintFetch(Relation relation)
4147{
4148 ConstrCheck *check = relation->rd_att->constr->check;
4149 int ncheck = relation->rd_att->constr->num_check;
4150 Relation conrel;
4151 SysScanDesc conscan;
4152 ScanKeyData skey[1];
4153 HeapTuple htup;
4154 int found = 0;
4155
4156 ScanKeyInit(&skey[0],
4157 Anum_pg_constraint_conrelid,
4158 BTEqualStrategyNumber, F_OIDEQ,
4159 ObjectIdGetDatum(RelationGetRelid(relation)));
4160
4161 conrel = table_open(ConstraintRelationId, AccessShareLock);
4162 conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4163 NULL, 1, skey);
4164
4165 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4166 {
4167 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
4168 Datum val;
4169 bool isnull;
4170 char *s;
4171
4172 /* We want check constraints only */
4173 if (conform->contype != CONSTRAINT_CHECK)
4174 continue;
4175
4176 if (found >= ncheck)
4177 elog(ERROR, "unexpected constraint record found for rel %s",
4178 RelationGetRelationName(relation));
4179
4180 check[found].ccvalid = conform->convalidated;
4181 check[found].ccnoinherit = conform->connoinherit;
4182 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
4183 NameStr(conform->conname));
4184
4185 /* Grab and test conbin is actually set */
4186 val = fastgetattr(htup,
4187 Anum_pg_constraint_conbin,
4188 conrel->rd_att, &isnull);
4189 if (isnull)
4190 elog(ERROR, "null conbin for rel %s",
4191 RelationGetRelationName(relation));
4192
4193 /* detoast and convert to cstring in caller's context */
4194 s = TextDatumGetCString(val);
4195 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4196 pfree(s);
4197
4198 found++;
4199 }
4200
4201 systable_endscan(conscan);
4202 table_close(conrel, AccessShareLock);
4203
4204 if (found != ncheck)
4205 elog(ERROR, "%d constraint record(s) missing for rel %s",
4206 ncheck - found, RelationGetRelationName(relation));
4207
4208 /* Sort the records so that CHECKs are applied in a deterministic order */
4209 if (ncheck > 1)
4210 qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4211}
4212
4213/*
4214 * qsort comparator to sort ConstrCheck entries by name
4215 */
4216static int
4217CheckConstraintCmp(const void *a, const void *b)
4218{
4219 const ConstrCheck *ca = (const ConstrCheck *) a;
4220 const ConstrCheck *cb = (const ConstrCheck *) b;
4221
4222 return strcmp(ca->ccname, cb->ccname);
4223}
4224
4225/*
4226 * RelationGetFKeyList -- get a list of foreign key info for the relation
4227 *
4228 * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4229 * the given relation. This data is a direct copy of relevant fields from
4230 * pg_constraint. The list items are in no particular order.
4231 *
4232 * CAUTION: the returned list is part of the relcache's data, and could
4233 * vanish in a relcache entry reset. Callers must inspect or copy it
4234 * before doing anything that might trigger a cache flush, such as
4235 * system catalog accesses. copyObject() can be used if desired.
4236 * (We define it this way because current callers want to filter and
4237 * modify the list entries anyway, so copying would be a waste of time.)
4238 */
4239List *
4240RelationGetFKeyList(Relation relation)
4241{
4242 List *result;
4243 Relation conrel;
4244 SysScanDesc conscan;
4245 ScanKeyData skey;
4246 HeapTuple htup;
4247 List *oldlist;
4248 MemoryContext oldcxt;
4249
4250 /* Quick exit if we already computed the list. */
4251 if (relation->rd_fkeyvalid)
4252 return relation->rd_fkeylist;
4253
4254 /* Fast path: non-partitioned tables without triggers can't have FKs */
4255 if (!relation->rd_rel->relhastriggers &&
4256 relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4257 return NIL;
4258
4259 /*
4260 * We build the list we intend to return (in the caller's context) while
4261 * doing the scan. After successfully completing the scan, we copy that
4262 * list into the relcache entry. This avoids cache-context memory leakage
4263 * if we get some sort of error partway through.
4264 */
4265 result = NIL;
4266
4267 /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4268 ScanKeyInit(&skey,
4269 Anum_pg_constraint_conrelid,
4270 BTEqualStrategyNumber, F_OIDEQ,
4271 ObjectIdGetDatum(RelationGetRelid(relation)));
4272
4273 conrel = table_open(ConstraintRelationId, AccessShareLock);
4274 conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4275 NULL, 1, &skey);
4276
4277 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4278 {
4279 Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4280 ForeignKeyCacheInfo *info;
4281
4282 /* consider only foreign keys */
4283 if (constraint->contype != CONSTRAINT_FOREIGN)
4284 continue;
4285
4286 info = makeNode(ForeignKeyCacheInfo);
4287 info->conoid = constraint->oid;
4288 info->conrelid = constraint->conrelid;
4289 info->confrelid = constraint->confrelid;
4290
4291 DeconstructFkConstraintRow(htup, &info->nkeys,
4292 info->conkey,
4293 info->confkey,
4294 info->conpfeqop,
4295 NULL, NULL);
4296
4297 /* Add FK's node to the result list */
4298 result = lappend(result, info);
4299 }
4300
4301 systable_endscan(conscan);
4302 table_close(conrel, AccessShareLock);
4303
4304 /* Now save a copy of the completed list in the relcache entry. */
4305 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4306 oldlist = relation->rd_fkeylist;
4307 relation->rd_fkeylist = copyObject(result);
4308 relation->rd_fkeyvalid = true;
4309 MemoryContextSwitchTo(oldcxt);
4310
4311 /* Don't leak the old list, if there is one */
4312 list_free_deep(oldlist);
4313
4314 return result;
4315}
4316
4317/*
4318 * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4319 *
4320 * The index list is created only if someone requests it. We scan pg_index
4321 * to find relevant indexes, and add the list to the relcache entry so that
4322 * we won't have to compute it again. Note that shared cache inval of a
4323 * relcache entry will delete the old list and set rd_indexvalid to false,
4324 * so that we must recompute the index list on next request. This handles
4325 * creation or deletion of an index.
4326 *
4327 * Indexes that are marked not indislive are omitted from the returned list.
4328 * Such indexes are expected to be dropped momentarily, and should not be
4329 * touched at all by any caller of this function.
4330 *
4331 * The returned list is guaranteed to be sorted in order by OID. This is
4332 * needed by the executor, since for index types that we obtain exclusive
4333 * locks on when updating the index, all backends must lock the indexes in
4334 * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4335 * consistent ordering would do, but ordering by OID is easy.
4336 *
4337 * Since shared cache inval causes the relcache's copy of the list to go away,
4338 * we return a copy of the list palloc'd in the caller's context. The caller
4339 * may list_free() the returned list after scanning it. This is necessary
4340 * since the caller will typically be doing syscache lookups on the relevant
4341 * indexes, and syscache lookup could cause SI messages to be processed!
4342 *
4343 * In exactly the same way, we update rd_pkindex, which is the OID of the
4344 * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4345 * which is the pg_class OID of an index to be used as the relation's
4346 * replication identity index, or InvalidOid if there is no such index.
4347 */
4348List *
4349RelationGetIndexList(Relation relation)
4350{
4351 Relation indrel;
4352 SysScanDesc indscan;
4353 ScanKeyData skey;
4354 HeapTuple htup;
4355 List *result;
4356 List *oldlist;
4357 char replident = relation->rd_rel->relreplident;
4358 Oid pkeyIndex = InvalidOid;
4359 Oid candidateIndex = InvalidOid;
4360 MemoryContext oldcxt;
4361
4362 /* Quick exit if we already computed the list. */
4363 if (relation->rd_indexvalid)
4364 return list_copy(relation->rd_indexlist);
4365
4366 /*
4367 * We build the list we intend to return (in the caller's context) while
4368 * doing the scan. After successfully completing the scan, we copy that
4369 * list into the relcache entry. This avoids cache-context memory leakage
4370 * if we get some sort of error partway through.
4371 */
4372 result = NIL;
4373
4374 /* Prepare to scan pg_index for entries having indrelid = this rel. */
4375 ScanKeyInit(&skey,
4376 Anum_pg_index_indrelid,
4377 BTEqualStrategyNumber, F_OIDEQ,
4378 ObjectIdGetDatum(RelationGetRelid(relation)));
4379
4380 indrel = table_open(IndexRelationId, AccessShareLock);
4381 indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4382 NULL, 1, &skey);
4383
4384 while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4385 {
4386 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
4387
4388 /*
4389 * Ignore any indexes that are currently being dropped. This will
4390 * prevent them from being searched, inserted into, or considered in
4391 * HOT-safety decisions. It's unsafe to touch such an index at all
4392 * since its catalog entries could disappear at any instant.
4393 */
4394 if (!index->indislive)
4395 continue;
4396
4397 /* Add index's OID to result list in the proper order */
4398 result = insert_ordered_oid(result, index->indexrelid);
4399
4400 /*
4401 * Invalid, non-unique, non-immediate or predicate indexes aren't
4402 * interesting for either oid indexes or replication identity indexes,
4403 * so don't check them.
4404 */
4405 if (!index->indisvalid || !index->indisunique ||
4406 !index->indimmediate ||
4407 !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4408 continue;
4409
4410 /* remember primary key index if any */
4411 if (index->indisprimary)
4412 pkeyIndex = index->indexrelid;
4413
4414 /* remember explicitly chosen replica index */
4415 if (index->indisreplident)
4416 candidateIndex = index->indexrelid;
4417 }
4418
4419 systable_endscan(indscan);
4420
4421 table_close(indrel, AccessShareLock);
4422
4423 /* Now save a copy of the completed list in the relcache entry. */
4424 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4425 oldlist = relation->rd_indexlist;
4426 relation->rd_indexlist = list_copy(result);
4427 relation->rd_pkindex = pkeyIndex;
4428 if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4429 relation->rd_replidindex = pkeyIndex;
4430 else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4431 relation->rd_replidindex = candidateIndex;
4432 else
4433 relation->rd_replidindex = InvalidOid;
4434 relation->rd_indexvalid = true;
4435 MemoryContextSwitchTo(oldcxt);
4436
4437 /* Don't leak the old list, if there is one */
4438 list_free(oldlist);
4439
4440 return result;
4441}
4442
4443/*
4444 * RelationGetStatExtList
4445 * get a list of OIDs of statistics objects on this relation
4446 *
4447 * The statistics list is created only if someone requests it, in a way
4448 * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4449 * relevant statistics, and add the list to the relcache entry so that we
4450 * won't have to compute it again. Note that shared cache inval of a
4451 * relcache entry will delete the old list and set rd_statvalid to 0,
4452 * so that we must recompute the statistics list on next request. This
4453 * handles creation or deletion of a statistics object.
4454 *
4455 * The returned list is guaranteed to be sorted in order by OID, although
4456 * this is not currently needed.
4457 *
4458 * Since shared cache inval causes the relcache's copy of the list to go away,
4459 * we return a copy of the list palloc'd in the caller's context. The caller
4460 * may list_free() the returned list after scanning it. This is necessary
4461 * since the caller will typically be doing syscache lookups on the relevant
4462 * statistics, and syscache lookup could cause SI messages to be processed!
4463 */
4464List *
4465RelationGetStatExtList(Relation relation)
4466{
4467 Relation indrel;
4468 SysScanDesc indscan;
4469 ScanKeyData skey;
4470 HeapTuple htup;
4471 List *result;
4472 List *oldlist;
4473 MemoryContext oldcxt;
4474
4475 /* Quick exit if we already computed the list. */
4476 if (relation->rd_statvalid != 0)
4477 return list_copy(relation->rd_statlist);
4478
4479 /*
4480 * We build the list we intend to return (in the caller's context) while
4481 * doing the scan. After successfully completing the scan, we copy that
4482 * list into the relcache entry. This avoids cache-context memory leakage
4483 * if we get some sort of error partway through.
4484 */
4485 result = NIL;
4486
4487 /*
4488 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4489 * rel.
4490 */
4491 ScanKeyInit(&skey,
4492 Anum_pg_statistic_ext_stxrelid,
4493 BTEqualStrategyNumber, F_OIDEQ,
4494 ObjectIdGetDatum(RelationGetRelid(relation)));
4495
4496 indrel = table_open(StatisticExtRelationId, AccessShareLock);
4497 indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4498 NULL, 1, &skey);
4499
4500 while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4501 {
4502 Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4503
4504 result = insert_ordered_oid(result, oid);
4505 }
4506
4507 systable_endscan(indscan);
4508
4509 table_close(indrel, AccessShareLock);
4510
4511 /* Now save a copy of the completed list in the relcache entry. */
4512 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4513 oldlist = relation->rd_statlist;
4514 relation->rd_statlist = list_copy(result);
4515
4516 relation->rd_statvalid = true;
4517 MemoryContextSwitchTo(oldcxt);
4518
4519 /* Don't leak the old list, if there is one */
4520 list_free(oldlist);
4521
4522 return result;
4523}
4524
4525/*
4526 * insert_ordered_oid
4527 * Insert a new Oid into a sorted list of Oids, preserving ordering
4528 *
4529 * Building the ordered list this way is O(N^2), but with a pretty small
4530 * constant, so for the number of entries we expect it will probably be
4531 * faster than trying to apply qsort(). Most tables don't have very many
4532 * indexes...
4533 */
4534static List *
4535insert_ordered_oid(List *list, Oid datum)
4536{
4537 ListCell *prev;
4538
4539 /* Does the datum belong at the front? */
4540 if (list == NIL || datum < linitial_oid(list))
4541 return lcons_oid(datum, list);
4542 /* No, so find the entry it belongs after */
4543 prev = list_head(list);
4544 for (;;)
4545 {
4546 ListCell *curr = lnext(prev);
4547
4548 if (curr == NULL || datum < lfirst_oid(curr))
4549 break; /* it belongs after 'prev', before 'curr' */
4550
4551 prev = curr;
4552 }
4553 /* Insert datum into list after 'prev' */
4554 lappend_cell_oid(list, prev, datum);
4555 return list;
4556}
4557
4558/*
4559 * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4560 *
4561 * Returns InvalidOid if there is no such index.
4562 */
4563Oid
4564RelationGetPrimaryKeyIndex(Relation relation)
4565{
4566 List *ilist;
4567
4568 if (!relation->rd_indexvalid)
4569 {
4570 /* RelationGetIndexList does the heavy lifting. */
4571 ilist = RelationGetIndexList(relation);
4572 list_free(ilist);
4573 Assert(relation->rd_indexvalid);
4574 }
4575
4576 return relation->rd_pkindex;
4577}
4578
4579/*
4580 * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4581 *
4582 * Returns InvalidOid if there is no such index.
4583 */
4584Oid
4585RelationGetReplicaIndex(Relation relation)
4586{
4587 List *ilist;
4588
4589 if (!relation->rd_indexvalid)
4590 {
4591 /* RelationGetIndexList does the heavy lifting. */
4592 ilist = RelationGetIndexList(relation);
4593 list_free(ilist);
4594 Assert(relation->rd_indexvalid);
4595 }
4596
4597 return relation->rd_replidindex;
4598}
4599
4600/*
4601 * RelationGetIndexExpressions -- get the index expressions for an index
4602 *
4603 * We cache the result of transforming pg_index.indexprs into a node tree.
4604 * If the rel is not an index or has no expressional columns, we return NIL.
4605 * Otherwise, the returned tree is copied into the caller's memory context.
4606 * (We don't want to return a pointer to the relcache copy, since it could
4607 * disappear due to relcache invalidation.)
4608 */
4609List *
4610RelationGetIndexExpressions(Relation relation)
4611{
4612 List *result;
4613 Datum exprsDatum;
4614 bool isnull;
4615 char *exprsString;
4616 MemoryContext oldcxt;
4617
4618 /* Quick exit if we already computed the result. */
4619 if (relation->rd_indexprs)
4620 return copyObject(relation->rd_indexprs);
4621
4622 /* Quick exit if there is nothing to do. */
4623 if (relation->rd_indextuple == NULL ||
4624 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4625 return NIL;
4626
4627 /*
4628 * We build the tree we intend to return in the caller's context. After
4629 * successfully completing the work, we copy it into the relcache entry.
4630 * This avoids problems if we get some sort of error partway through.
4631 */
4632 exprsDatum = heap_getattr(relation->rd_indextuple,
4633 Anum_pg_index_indexprs,
4634 GetPgIndexDescriptor(),
4635 &isnull);
4636 Assert(!isnull);
4637 exprsString = TextDatumGetCString(exprsDatum);
4638 result = (List *) stringToNode(exprsString);
4639 pfree(exprsString);
4640
4641 /*
4642 * Run the expressions through eval_const_expressions. This is not just an
4643 * optimization, but is necessary, because the planner will be comparing
4644 * them to similarly-processed qual clauses, and may fail to detect valid
4645 * matches without this. We must not use canonicalize_qual, however,
4646 * since these aren't qual expressions.
4647 */
4648 result = (List *) eval_const_expressions(NULL, (Node *) result);
4649
4650 /* May as well fix opfuncids too */
4651 fix_opfuncids((Node *) result);
4652
4653 /* Now save a copy of the completed tree in the relcache entry. */
4654 oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4655 relation->rd_indexprs = copyObject(result);
4656 MemoryContextSwitchTo(oldcxt);
4657
4658 return result;
4659}
4660
4661/*
4662 * RelationGetIndexPredicate -- get the index predicate for an index
4663 *
4664 * We cache the result of transforming pg_index.indpred into an implicit-AND
4665 * node tree (suitable for use in planning).
4666 * If the rel is not an index or has no predicate, we return NIL.
4667 * Otherwise, the returned tree is copied into the caller's memory context.
4668 * (We don't want to return a pointer to the relcache copy, since it could
4669 * disappear due to relcache invalidation.)
4670 */
4671List *
4672RelationGetIndexPredicate(Relation relation)
4673{
4674 List *result;
4675 Datum predDatum;
4676 bool isnull;
4677 char *predString;
4678 MemoryContext oldcxt;
4679
4680 /* Quick exit if we already computed the result. */
4681 if (relation->rd_indpred)
4682 return copyObject(relation->rd_indpred);
4683
4684 /* Quick exit if there is nothing to do. */
4685 if (relation->rd_indextuple == NULL ||
4686 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4687 return NIL;
4688
4689 /*
4690 * We build the tree we intend to return in the caller's context. After
4691 * successfully completing the work, we copy it into the relcache entry.
4692 * This avoids problems if we get some sort of error partway through.
4693 */
4694 predDatum = heap_getattr(relation->rd_indextuple,
4695 Anum_pg_index_indpred,
4696 GetPgIndexDescriptor(),
4697 &isnull);
4698 Assert(!isnull);
4699 predString = TextDatumGetCString(predDatum);
4700 result = (List *) stringToNode(predString);
4701 pfree(predString);
4702
4703 /*
4704 * Run the expression through const-simplification and canonicalization.
4705 * This is not just an optimization, but is necessary, because the planner
4706 * will be comparing it to similarly-processed qual clauses, and may fail
4707 * to detect valid matches without this. This must match the processing
4708 * done to qual clauses in preprocess_expression()! (We can skip the
4709 * stuff involving subqueries, however, since we don't allow any in index
4710 * predicates.)
4711 */
4712 result = (List *) eval_const_expressions(NULL, (Node *) result);
4713
4714 result = (List *) canonicalize_qual((Expr *) result, false);
4715
4716 /* Also convert to implicit-AND format */
4717 result = make_ands_implicit((Expr *) result);
4718
4719 /* May as well fix opfuncids too */
4720 fix_opfuncids((Node *) result);
4721
4722 /* Now save a copy of the completed tree in the relcache entry. */
4723 oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4724 relation->rd_indpred = copyObject(result);
4725 MemoryContextSwitchTo(oldcxt);
4726
4727 return result;
4728}
4729
4730/*
4731 * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4732 *
4733 * The result has a bit set for each attribute used anywhere in the index
4734 * definitions of all the indexes on this relation. (This includes not only
4735 * simple index keys, but attributes used in expressions and partial-index
4736 * predicates.)
4737 *
4738 * Depending on attrKind, a bitmap covering the attnums for all index columns,
4739 * for all potential foreign key columns, or for all columns in the configured
4740 * replica identity index is returned.
4741 *
4742 * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4743 * we can include system attributes (e.g., OID) in the bitmap representation.
4744 *
4745 * Caller had better hold at least RowExclusiveLock on the target relation
4746 * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4747 * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4748 * that lock level doesn't guarantee a stable set of indexes, so we have to
4749 * be prepared to retry here in case of a change in the set of indexes.
4750 *
4751 * The returned result is palloc'd in the caller's memory context and should
4752 * be bms_free'd when not needed anymore.
4753 */
4754Bitmapset *
4755RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
4756{
4757 Bitmapset *indexattrs; /* indexed columns */
4758 Bitmapset *uindexattrs; /* columns in unique indexes */
4759 Bitmapset *pkindexattrs; /* columns in the primary index */
4760 Bitmapset *idindexattrs; /* columns in the replica identity */
4761 List *indexoidlist;
4762 List *newindexoidlist;
4763 Oid relpkindex;
4764 Oid relreplindex;
4765 ListCell *l;
4766 MemoryContext oldcxt;
4767
4768 /* Quick exit if we already computed the result. */
4769 if (relation->rd_indexattr != NULL)
4770 {
4771 switch (attrKind)
4772 {
4773 case INDEX_ATTR_BITMAP_ALL:
4774 return bms_copy(relation->rd_indexattr);
4775 case INDEX_ATTR_BITMAP_KEY:
4776 return bms_copy(relation->rd_keyattr);
4777 case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4778 return bms_copy(relation->rd_pkattr);
4779 case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4780 return bms_copy(relation->rd_idattr);
4781 default:
4782 elog(ERROR, "unknown attrKind %u", attrKind);
4783 }
4784 }
4785
4786 /* Fast path if definitely no indexes */
4787 if (!RelationGetForm(relation)->relhasindex)
4788 return NULL;
4789
4790 /*
4791 * Get cached list of index OIDs. If we have to start over, we do so here.
4792 */
4793restart:
4794 indexoidlist = RelationGetIndexList(relation);
4795
4796 /* Fall out if no indexes (but relhasindex was set) */
4797 if (indexoidlist == NIL)
4798 return NULL;
4799
4800 /*
4801 * Copy the rd_pkindex and rd_replidindex values computed by
4802 * RelationGetIndexList before proceeding. This is needed because a
4803 * relcache flush could occur inside index_open below, resetting the
4804 * fields managed by RelationGetIndexList. We need to do the work with
4805 * stable values of these fields.
4806 */
4807 relpkindex = relation->rd_pkindex;
4808 relreplindex = relation->rd_replidindex;
4809
4810 /*
4811 * For each index, add referenced attributes to indexattrs.
4812 *
4813 * Note: we consider all indexes returned by RelationGetIndexList, even if
4814 * they are not indisready or indisvalid. This is important because an
4815 * index for which CREATE INDEX CONCURRENTLY has just started must be
4816 * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4817 * CONCURRENTLY is far enough along that we should ignore the index, it
4818 * won't be returned at all by RelationGetIndexList.
4819 */
4820 indexattrs = NULL;
4821 uindexattrs = NULL;
4822 pkindexattrs = NULL;
4823 idindexattrs = NULL;
4824 foreach(l, indexoidlist)
4825 {
4826 Oid indexOid = lfirst_oid(l);
4827 Relation indexDesc;
4828 Datum datum;
4829 bool isnull;
4830 Node *indexExpressions;
4831 Node *indexPredicate;
4832 int i;
4833 bool isKey; /* candidate key */
4834 bool isPK; /* primary key */
4835 bool isIDKey; /* replica identity index */
4836
4837 indexDesc = index_open(indexOid, AccessShareLock);
4838
4839 /*
4840 * Extract index expressions and index predicate. Note: Don't use
4841 * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
4842 * those might run constant expressions evaluation, which needs a
4843 * snapshot, which we might not have here. (Also, it's probably more
4844 * sound to collect the bitmaps before any transformations that might
4845 * eliminate columns, but the practical impact of this is limited.)
4846 */
4847
4848 datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
4849 GetPgIndexDescriptor(), &isnull);
4850 if (!isnull)
4851 indexExpressions = stringToNode(TextDatumGetCString(datum));
4852 else
4853 indexExpressions = NULL;
4854
4855 datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
4856 GetPgIndexDescriptor(), &isnull);
4857 if (!isnull)
4858 indexPredicate = stringToNode(TextDatumGetCString(datum));
4859 else
4860 indexPredicate = NULL;
4861
4862 /* Can this index be referenced by a foreign key? */
4863 isKey = indexDesc->rd_index->indisunique &&
4864 indexExpressions == NULL &&
4865 indexPredicate == NULL;
4866
4867 /* Is this a primary key? */
4868 isPK = (indexOid == relpkindex);
4869
4870 /* Is this index the configured (or default) replica identity? */
4871 isIDKey = (indexOid == relreplindex);
4872
4873 /* Collect simple attribute references */
4874 for (i = 0; i < indexDesc->rd_index->indnatts; i++)
4875 {
4876 int attrnum = indexDesc->rd_index->indkey.values[i];
4877
4878 /*
4879 * Since we have covering indexes with non-key columns, we must
4880 * handle them accurately here. non-key columns must be added into
4881 * indexattrs, since they are in index, and HOT-update shouldn't
4882 * miss them. Obviously, non-key columns couldn't be referenced by
4883 * foreign key or identity key. Hence we do not include them into
4884 * uindexattrs, pkindexattrs and idindexattrs bitmaps.
4885 */
4886 if (attrnum != 0)
4887 {
4888 indexattrs = bms_add_member(indexattrs,
4889 attrnum - FirstLowInvalidHeapAttributeNumber);
4890
4891 if (isKey && i < indexDesc->rd_index->indnkeyatts)
4892 uindexattrs = bms_add_member(uindexattrs,
4893 attrnum - FirstLowInvalidHeapAttributeNumber);
4894
4895 if (isPK && i < indexDesc->rd_index->indnkeyatts)
4896 pkindexattrs = bms_add_member(pkindexattrs,
4897 attrnum - FirstLowInvalidHeapAttributeNumber);
4898
4899 if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
4900 idindexattrs = bms_add_member(idindexattrs,
4901 attrnum - FirstLowInvalidHeapAttributeNumber);
4902 }
4903 }
4904
4905 /* Collect all attributes used in expressions, too */
4906 pull_varattnos(indexExpressions, 1, &indexattrs);
4907
4908 /* Collect all attributes in the index predicate, too */
4909 pull_varattnos(indexPredicate, 1, &indexattrs);
4910
4911 index_close(indexDesc, AccessShareLock);
4912 }
4913
4914 /*
4915 * During one of the index_opens in the above loop, we might have received
4916 * a relcache flush event on this relcache entry, which might have been
4917 * signaling a change in the rel's index list. If so, we'd better start
4918 * over to ensure we deliver up-to-date attribute bitmaps.
4919 */
4920 newindexoidlist = RelationGetIndexList(relation);
4921 if (equal(indexoidlist, newindexoidlist) &&
4922 relpkindex == relation->rd_pkindex &&
4923 relreplindex == relation->rd_replidindex)
4924 {
4925 /* Still the same index set, so proceed */
4926 list_free(newindexoidlist);
4927 list_free(indexoidlist);
4928 }
4929 else
4930 {
4931 /* Gotta do it over ... might as well not leak memory */
4932 list_free(newindexoidlist);
4933 list_free(indexoidlist);
4934 bms_free(uindexattrs);
4935 bms_free(pkindexattrs);
4936 bms_free(idindexattrs);
4937 bms_free(indexattrs);
4938
4939 goto restart;
4940 }
4941
4942 /* Don't leak the old values of these bitmaps, if any */
4943 bms_free(relation->rd_indexattr);
4944 relation->rd_indexattr = NULL;
4945 bms_free(relation->rd_keyattr);
4946 relation->rd_keyattr = NULL;
4947 bms_free(relation->rd_pkattr);
4948 relation->rd_pkattr = NULL;
4949 bms_free(relation->rd_idattr);
4950 relation->rd_idattr = NULL;
4951
4952 /*
4953 * Now save copies of the bitmaps in the relcache entry. We intentionally
4954 * set rd_indexattr last, because that's the one that signals validity of
4955 * the values; if we run out of memory before making that copy, we won't
4956 * leave the relcache entry looking like the other ones are valid but
4957 * empty.
4958 */
4959 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4960 relation->rd_keyattr = bms_copy(uindexattrs);
4961 relation->rd_pkattr = bms_copy(pkindexattrs);
4962 relation->rd_idattr = bms_copy(idindexattrs);
4963 relation->rd_indexattr = bms_copy(indexattrs);
4964 MemoryContextSwitchTo(oldcxt);
4965
4966 /* We return our original working copy for caller to play with */
4967 switch (attrKind)
4968 {
4969 case INDEX_ATTR_BITMAP_ALL:
4970 return indexattrs;
4971 case INDEX_ATTR_BITMAP_KEY:
4972 return uindexattrs;
4973 case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4974 return pkindexattrs;
4975 case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4976 return idindexattrs;
4977 default:
4978 elog(ERROR, "unknown attrKind %u", attrKind);
4979 return NULL;
4980 }
4981}
4982
4983/*
4984 * RelationGetExclusionInfo -- get info about index's exclusion constraint
4985 *
4986 * This should be called only for an index that is known to have an
4987 * associated exclusion constraint. It returns arrays (palloc'd in caller's
4988 * context) of the exclusion operator OIDs, their underlying functions'
4989 * OIDs, and their strategy numbers in the index's opclasses. We cache
4990 * all this information since it requires a fair amount of work to get.
4991 */
4992void
4993RelationGetExclusionInfo(Relation indexRelation,
4994 Oid **operators,
4995 Oid **procs,
4996 uint16 **strategies)
4997{
4998 int indnkeyatts;
4999 Oid *ops;
5000 Oid *funcs;
5001 uint16 *strats;
5002 Relation conrel;
5003 SysScanDesc conscan;
5004 ScanKeyData skey[1];
5005 HeapTuple htup;
5006 bool found;
5007 MemoryContext oldcxt;
5008 int i;
5009
5010 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5011
5012 /* Allocate result space in caller context */
5013 *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5014 *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5015 *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5016
5017 /* Quick exit if we have the data cached already */
5018 if (indexRelation->rd_exclstrats != NULL)
5019 {
5020 memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5021 memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5022 memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5023 return;
5024 }
5025
5026 /*
5027 * Search pg_constraint for the constraint associated with the index. To
5028 * make this not too painfully slow, we use the index on conrelid; that
5029 * will hold the parent relation's OID not the index's own OID.
5030 *
5031 * Note: if we wanted to rely on the constraint name matching the index's
5032 * name, we could just do a direct lookup using pg_constraint's unique
5033 * index. For the moment it doesn't seem worth requiring that.
5034 */
5035 ScanKeyInit(&skey[0],
5036 Anum_pg_constraint_conrelid,
5037 BTEqualStrategyNumber, F_OIDEQ,
5038 ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5039
5040 conrel = table_open(ConstraintRelationId, AccessShareLock);
5041 conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5042 NULL, 1, skey);
5043 found = false;
5044
5045 while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5046 {
5047 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
5048 Datum val;
5049 bool isnull;
5050 ArrayType *arr;
5051 int nelem;
5052
5053 /* We want the exclusion constraint owning the index */
5054 if (conform->contype != CONSTRAINT_EXCLUSION ||
5055 conform->conindid != RelationGetRelid(indexRelation))
5056 continue;
5057
5058 /* There should be only one */
5059 if (found)
5060 elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5061 RelationGetRelationName(indexRelation));
5062 found = true;
5063
5064 /* Extract the operator OIDS from conexclop */
5065 val = fastgetattr(htup,
5066 Anum_pg_constraint_conexclop,
5067 conrel->rd_att, &isnull);
5068 if (isnull)
5069 elog(ERROR, "null conexclop for rel %s",
5070 RelationGetRelationName(indexRelation));
5071
5072 arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5073 nelem = ARR_DIMS(arr)[0];
5074 if (ARR_NDIM(arr) != 1 ||
5075 nelem != indnkeyatts ||
5076 ARR_HASNULL(arr) ||
5077 ARR_ELEMTYPE(arr) != OIDOID)
5078 elog(ERROR, "conexclop is not a 1-D Oid array");
5079
5080 memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5081 }
5082
5083 systable_endscan(conscan);
5084 table_close(conrel, AccessShareLock);
5085
5086 if (!found)
5087 elog(ERROR, "exclusion constraint record missing for rel %s",
5088 RelationGetRelationName(indexRelation));
5089
5090 /* We need the func OIDs and strategy numbers too */
5091 for (i = 0; i < indnkeyatts; i++)
5092 {
5093 funcs[i] = get_opcode(ops[i]);
5094 strats[i] = get_op_opfamily_strategy(ops[i],
5095 indexRelation->rd_opfamily[i]);
5096 /* shouldn't fail, since it was checked at index creation */
5097 if (strats[i] == InvalidStrategy)
5098 elog(ERROR, "could not find strategy for operator %u in family %u",
5099 ops[i], indexRelation->rd_opfamily[i]);
5100 }
5101
5102 /* Save a copy of the results in the relcache entry. */
5103 oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5104 indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5105 indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5106 indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5107 memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5108 memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5109 memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5110 MemoryContextSwitchTo(oldcxt);
5111}
5112
5113/*
5114 * Get publication actions for the given relation.
5115 */
5116struct PublicationActions *
5117GetRelationPublicationActions(Relation relation)
5118{
5119 List *puboids;
5120 ListCell *lc;
5121 MemoryContext oldcxt;
5122 PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5123
5124 /*
5125 * If not publishable, it publishes no actions. (pgoutput_change() will
5126 * ignore it.)
5127 */
5128 if (!is_publishable_relation(relation))
5129 return pubactions;
5130
5131 if (relation->rd_pubactions)
5132 return memcpy(pubactions, relation->rd_pubactions,
5133 sizeof(PublicationActions));
5134
5135 /* Fetch the publication membership info. */
5136 puboids = GetRelationPublications(RelationGetRelid(relation));
5137 puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5138
5139 foreach(lc, puboids)
5140 {
5141 Oid pubid = lfirst_oid(lc);
5142 HeapTuple tup;
5143 Form_pg_publication pubform;
5144
5145 tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
5146
5147 if (!HeapTupleIsValid(tup))
5148 elog(ERROR, "cache lookup failed for publication %u", pubid);
5149
5150 pubform = (Form_pg_publication) GETSTRUCT(tup);
5151
5152 pubactions->pubinsert |= pubform->pubinsert;
5153 pubactions->pubupdate |= pubform->pubupdate;
5154 pubactions->pubdelete |= pubform->pubdelete;
5155 pubactions->pubtruncate |= pubform->pubtruncate;
5156
5157 ReleaseSysCache(tup);
5158
5159 /*
5160 * If we know everything is replicated, there is no point to check for
5161 * other publications.
5162 */
5163 if (pubactions->pubinsert && pubactions->pubupdate &&
5164 pubactions->pubdelete && pubactions->pubtruncate)
5165 break;
5166 }
5167
5168 if (relation->rd_pubactions)
5169 {
5170 pfree(relation->rd_pubactions);
5171 relation->rd_pubactions = NULL;
5172 }
5173
5174 /* Now save copy of the actions in the relcache entry. */
5175 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
5176 relation->rd_pubactions = palloc(sizeof(PublicationActions));
5177 memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5178 MemoryContextSwitchTo(oldcxt);
5179
5180 return pubactions;
5181}
5182
5183/*
5184 * Routines to support ereport() reports of relation-related errors
5185 *
5186 * These could have been put into elog.c, but it seems like a module layering
5187 * violation to have elog.c calling relcache or syscache stuff --- and we
5188 * definitely don't want elog.h including rel.h. So we put them here.
5189 */
5190
5191/*
5192 * errtable --- stores schema_name and table_name of a table
5193 * within the current errordata.
5194 */
5195int
5196errtable(Relation rel)
5197{
5198 err_generic_string(PG_DIAG_SCHEMA_NAME,
5199 get_namespace_name(RelationGetNamespace(rel)));
5200 err_generic_string(PG_DIAG_TABLE_NAME, RelationGetRelationName(rel));
5201
5202 return 0; /* return value does not matter */
5203}
5204
5205/*
5206 * errtablecol --- stores schema_name, table_name and column_name
5207 * of a table column within the current errordata.
5208 *
5209 * The column is specified by attribute number --- for most callers, this is
5210 * easier and less error-prone than getting the column name for themselves.
5211 */
5212int
5213errtablecol(Relation rel, int attnum)
5214{
5215 TupleDesc reldesc = RelationGetDescr(rel);
5216 const char *colname;
5217
5218 /* Use reldesc if it's a user attribute, else consult the catalogs */
5219 if (attnum > 0 && attnum <= reldesc->natts)
5220 colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5221 else
5222 colname = get_attname(RelationGetRelid(rel), attnum, false);
5223
5224 return errtablecolname(rel, colname);
5225}
5226
5227/*
5228 * errtablecolname --- stores schema_name, table_name and column_name
5229 * of a table column within the current errordata, where the column name is
5230 * given directly rather than extracted from the relation's catalog data.
5231 *
5232 * Don't use this directly unless errtablecol() is inconvenient for some
5233 * reason. This might possibly be needed during intermediate states in ALTER
5234 * TABLE, for instance.
5235 */
5236int
5237errtablecolname(Relation rel, const char *colname)
5238{
5239 errtable(rel);
5240 err_generic_string(PG_DIAG_COLUMN_NAME, colname);
5241
5242 return 0; /* return value does not matter */
5243}
5244
5245/*
5246 * errtableconstraint --- stores schema_name, table_name and constraint_name
5247 * of a table-related constraint within the current errordata.
5248 */
5249int
5250errtableconstraint(Relation rel, const char *conname)
5251{
5252 errtable(rel);
5253 err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname);
5254
5255 return 0; /* return value does not matter */
5256}
5257
5258
5259/*
5260 * load_relcache_init_file, write_relcache_init_file
5261 *
5262 * In late 1992, we started regularly having databases with more than
5263 * a thousand classes in them. With this number of classes, it became
5264 * critical to do indexed lookups on the system catalogs.
5265 *
5266 * Bootstrapping these lookups is very hard. We want to be able to
5267 * use an index on pg_attribute, for example, but in order to do so,
5268 * we must have read pg_attribute for the attributes in the index,
5269 * which implies that we need to use the index.
5270 *
5271 * In order to get around the problem, we do the following:
5272 *
5273 * + When the database system is initialized (at initdb time), we
5274 * don't use indexes. We do sequential scans.
5275 *
5276 * + When the backend is started up in normal mode, we load an image
5277 * of the appropriate relation descriptors, in internal format,
5278 * from an initialization file in the data/base/... directory.
5279 *
5280 * + If the initialization file isn't there, then we create the
5281 * relation descriptors using sequential scans and write 'em to
5282 * the initialization file for use by subsequent backends.
5283 *
5284 * As of Postgres 9.0, there is one local initialization file in each
5285 * database, plus one shared initialization file for shared catalogs.
5286 *
5287 * We could dispense with the initialization files and just build the
5288 * critical reldescs the hard way on every backend startup, but that
5289 * slows down backend startup noticeably.
5290 *
5291 * We can in fact go further, and save more relcache entries than
5292 * just the ones that are absolutely critical; this allows us to speed
5293 * up backend startup by not having to build such entries the hard way.
5294 * Presently, all the catalog and index entries that are referred to
5295 * by catcaches are stored in the initialization files.
5296 *
5297 * The same mechanism that detects when catcache and relcache entries
5298 * need to be invalidated (due to catalog updates) also arranges to
5299 * unlink the initialization files when the contents may be out of date.
5300 * The files will then be rebuilt during the next backend startup.
5301 */
5302
5303/*
5304 * load_relcache_init_file -- attempt to load cache from the shared
5305 * or local cache init file
5306 *
5307 * If successful, return true and set criticalRelcachesBuilt or
5308 * criticalSharedRelcachesBuilt to true.
5309 * If not successful, return false.
5310 *
5311 * NOTE: we assume we are already switched into CacheMemoryContext.
5312 */
5313static bool
5314load_relcache_init_file(bool shared)
5315{
5316 FILE *fp;
5317 char initfilename[MAXPGPATH];
5318 Relation *rels;
5319 int relno,
5320 num_rels,
5321 max_rels,
5322 nailed_rels,
5323 nailed_indexes,
5324 magic;
5325 int i;
5326
5327 if (shared)
5328 snprintf(initfilename, sizeof(initfilename), "global/%s",
5329 RELCACHE_INIT_FILENAME);
5330 else
5331 snprintf(initfilename, sizeof(initfilename), "%s/%s",
5332 DatabasePath, RELCACHE_INIT_FILENAME);
5333
5334 fp = AllocateFile(initfilename, PG_BINARY_R);
5335 if (fp == NULL)
5336 return false;
5337
5338 /*
5339 * Read the index relcache entries from the file. Note we will not enter
5340 * any of them into the cache if the read fails partway through; this
5341 * helps to guard against broken init files.
5342 */
5343 max_rels = 100;
5344 rels = (Relation *) palloc(max_rels * sizeof(Relation));
5345 num_rels = 0;
5346 nailed_rels = nailed_indexes = 0;
5347
5348 /* check for correct magic number (compatible version) */
5349 if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5350 goto read_failed;
5351 if (magic != RELCACHE_INIT_FILEMAGIC)
5352 goto read_failed;
5353
5354 for (relno = 0;; relno++)
5355 {
5356 Size len;
5357 size_t nread;
5358 Relation rel;
5359 Form_pg_class relform;
5360 bool has_not_null;
5361
5362 /* first read the relation descriptor length */
5363 nread = fread(&len, 1, sizeof(len), fp);
5364 if (nread != sizeof(len))
5365 {
5366 if (nread == 0)
5367 break; /* end of file */
5368 goto read_failed;
5369 }
5370
5371 /* safety check for incompatible relcache layout */
5372 if (len != sizeof(RelationData))
5373 goto read_failed;
5374
5375 /* allocate another relcache header */
5376 if (num_rels >= max_rels)
5377 {
5378 max_rels *= 2;
5379 rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5380 }
5381
5382 rel = rels[num_rels++] = (Relation) palloc(len);
5383
5384 /* then, read the Relation structure */
5385 if (fread(rel, 1, len, fp) != len)
5386 goto read_failed;
5387
5388 /* next read the relation tuple form */
5389 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5390 goto read_failed;
5391
5392 relform = (Form_pg_class) palloc(len);
5393 if (fread(relform, 1, len, fp) != len)
5394 goto read_failed;
5395
5396 rel->rd_rel = relform;
5397
5398 /* initialize attribute tuple forms */
5399 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5400 rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5401
5402 rel->rd_att->tdtypeid = relform->reltype;
5403 rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5404
5405 /* next read all the attribute tuple form data entries */
5406 has_not_null = false;
5407 for (i = 0; i < relform->relnatts; i++)
5408 {
5409 Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5410
5411 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5412 goto read_failed;
5413 if (len != ATTRIBUTE_FIXED_PART_SIZE)
5414 goto read_failed;
5415 if (fread(attr, 1, len, fp) != len)
5416 goto read_failed;
5417
5418 has_not_null |= attr->attnotnull;
5419 }
5420
5421 /* next read the access method specific field */
5422 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5423 goto read_failed;
5424 if (len > 0)
5425 {
5426 rel->rd_options = palloc(len);
5427 if (fread(rel->rd_options, 1, len, fp) != len)
5428 goto read_failed;
5429 if (len != VARSIZE(rel->rd_options))
5430 goto read_failed; /* sanity check */
5431 }
5432 else
5433 {
5434 rel->rd_options = NULL;
5435 }
5436
5437 /* mark not-null status */
5438 if (has_not_null)
5439 {
5440 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5441
5442 constr->has_not_null = true;
5443 rel->rd_att->constr = constr;
5444 }
5445
5446 /*
5447 * If it's an index, there's more to do. Note we explicitly ignore
5448 * partitioned indexes here.
5449 */
5450 if (rel->rd_rel->relkind == RELKIND_INDEX)
5451 {
5452 MemoryContext indexcxt;
5453 Oid *opfamily;
5454 Oid *opcintype;
5455 RegProcedure *support;
5456 int nsupport;
5457 int16 *indoption;
5458 Oid *indcollation;
5459
5460 /* Count nailed indexes to ensure we have 'em all */
5461 if (rel->rd_isnailed)
5462 nailed_indexes++;
5463
5464 /* next, read the pg_index tuple */
5465 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5466 goto read_failed;
5467
5468 rel->rd_indextuple = (HeapTuple) palloc(len);
5469 if (fread(rel->rd_indextuple, 1, len, fp) != len)
5470 goto read_failed;
5471
5472 /* Fix up internal pointers in the tuple -- see heap_copytuple */
5473 rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5474 rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
5475
5476 /*
5477 * prepare index info context --- parameters should match
5478 * RelationInitIndexAccessInfo
5479 */
5480 indexcxt = AllocSetContextCreate(CacheMemoryContext,
5481 "index info",
5482 ALLOCSET_SMALL_SIZES);
5483 rel->rd_indexcxt = indexcxt;
5484 MemoryContextCopyAndSetIdentifier(indexcxt,
5485 RelationGetRelationName(rel));
5486
5487 /*
5488 * Now we can fetch the index AM's API struct. (We can't store
5489 * that in the init file, since it contains function pointers that
5490 * might vary across server executions. Fortunately, it should be
5491 * safe to call the amhandler even while bootstrapping indexes.)
5492 */
5493 InitIndexAmRoutine(rel);
5494
5495 /* next, read the vector of opfamily OIDs */
5496 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5497 goto read_failed;
5498
5499 opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5500 if (fread(opfamily, 1, len, fp) != len)
5501 goto read_failed;
5502
5503 rel->rd_opfamily = opfamily;
5504
5505 /* next, read the vector of opcintype OIDs */
5506 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5507 goto read_failed;
5508
5509 opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5510 if (fread(opcintype, 1, len, fp) != len)
5511 goto read_failed;
5512
5513 rel->rd_opcintype = opcintype;
5514
5515 /* next, read the vector of support procedure OIDs */
5516 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5517 goto read_failed;
5518 support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5519 if (fread(support, 1, len, fp) != len)
5520 goto read_failed;
5521
5522 rel->rd_support = support;
5523
5524 /* next, read the vector of collation OIDs */
5525 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5526 goto read_failed;
5527
5528 indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5529 if (fread(indcollation, 1, len, fp) != len)
5530 goto read_failed;
5531
5532 rel->rd_indcollation = indcollation;
5533
5534 /* finally, read the vector of indoption values */
5535 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5536 goto read_failed;
5537
5538 indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5539 if (fread(indoption, 1, len, fp) != len)
5540 goto read_failed;
5541
5542 rel->rd_indoption = indoption;
5543
5544 /* set up zeroed fmgr-info vector */
5545 nsupport = relform->relnatts * rel->rd_indam->amsupport;
5546 rel->rd_supportinfo = (FmgrInfo *)
5547 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5548 }
5549 else
5550 {
5551 /* Count nailed rels to ensure we have 'em all */
5552 if (rel->rd_isnailed)
5553 nailed_rels++;
5554
5555 /* Load table AM data */
5556 if (rel->rd_rel->relkind == RELKIND_RELATION ||
5557 rel->rd_rel->relkind == RELKIND_SEQUENCE ||
5558 rel->rd_rel->relkind == RELKIND_TOASTVALUE ||
5559 rel->rd_rel->relkind == RELKIND_MATVIEW)
5560 RelationInitTableAccessMethod(rel);
5561
5562 Assert(rel->rd_index == NULL);
5563 Assert(rel->rd_indextuple == NULL);
5564 Assert(rel->rd_indexcxt == NULL);
5565 Assert(rel->rd_indam == NULL);
5566 Assert(rel->rd_opfamily == NULL);
5567 Assert(rel->rd_opcintype == NULL);
5568 Assert(rel->rd_support == NULL);
5569 Assert(rel->rd_supportinfo == NULL);
5570 Assert(rel->rd_indoption == NULL);
5571 Assert(rel->rd_indcollation == NULL);
5572 }
5573
5574 /*
5575 * Rules and triggers are not saved (mainly because the internal
5576 * format is complex and subject to change). They must be rebuilt if
5577 * needed by RelationCacheInitializePhase3. This is not expected to
5578 * be a big performance hit since few system catalogs have such. Ditto
5579 * for RLS policy data, partition info, index expressions, predicates,
5580 * exclusion info, and FDW info.
5581 */
5582 rel->rd_rules = NULL;
5583 rel->rd_rulescxt = NULL;
5584 rel->trigdesc = NULL;
5585 rel->rd_rsdesc = NULL;
5586 rel->rd_partkey = NULL;
5587 rel->rd_partkeycxt = NULL;
5588 rel->rd_partdesc = NULL;
5589 rel->rd_pdcxt = NULL;
5590 rel->rd_partcheck = NIL;
5591 rel->rd_partcheckvalid = false;
5592 rel->rd_partcheckcxt = NULL;
5593 rel->rd_indexprs = NIL;
5594 rel->rd_indpred = NIL;
5595 rel->rd_exclops = NULL;
5596 rel->rd_exclprocs = NULL;
5597 rel->rd_exclstrats = NULL;
5598 rel->rd_fdwroutine = NULL;
5599
5600 /*
5601 * Reset transient-state fields in the relcache entry
5602 */
5603 rel->rd_smgr = NULL;
5604 if (rel->rd_isnailed)
5605 rel->rd_refcnt = 1;
5606 else
5607 rel->rd_refcnt = 0;
5608 rel->rd_indexvalid = false;
5609 rel->rd_indexlist = NIL;
5610 rel->rd_pkindex = InvalidOid;
5611 rel->rd_replidindex = InvalidOid;
5612 rel->rd_indexattr = NULL;
5613 rel->rd_keyattr = NULL;
5614 rel->rd_pkattr = NULL;
5615 rel->rd_idattr = NULL;
5616 rel->rd_pubactions = NULL;
5617 rel->rd_statvalid = false;
5618 rel->rd_statlist = NIL;
5619 rel->rd_fkeyvalid = false;
5620 rel->rd_fkeylist = NIL;
5621 rel->rd_createSubid = InvalidSubTransactionId;
5622 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
5623 rel->rd_amcache = NULL;
5624 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5625
5626 /*
5627 * Recompute lock and physical addressing info. This is needed in
5628 * case the pg_internal.init file was copied from some other database
5629 * by CREATE DATABASE.
5630 */
5631 RelationInitLockInfo(rel);
5632 RelationInitPhysicalAddr(rel);
5633 }
5634
5635 /*
5636 * We reached the end of the init file without apparent problem. Did we
5637 * get the right number of nailed items? This is a useful crosscheck in
5638 * case the set of critical rels or indexes changes. However, that should
5639 * not happen in a normally-running system, so let's bleat if it does.
5640 *
5641 * For the shared init file, we're called before client authentication is
5642 * done, which means that elog(WARNING) will go only to the postmaster
5643 * log, where it's easily missed. To ensure that developers notice bad
5644 * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5645 * an Assert(false) there.
5646 */
5647 if (shared)
5648 {
5649 if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5650 nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5651 {
5652 elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5653 nailed_rels, nailed_indexes,
5654 NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES);
5655 /* Make sure we get developers' attention about this */
5656 Assert(false);
5657 /* In production builds, recover by bootstrapping the relcache */
5658 goto read_failed;
5659 }
5660 }
5661 else
5662 {
5663 if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5664 nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5665 {
5666 elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5667 nailed_rels, nailed_indexes,
5668 NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES);
5669 /* We don't need an Assert() in this case */
5670 goto read_failed;
5671 }
5672 }
5673
5674 /*
5675 * OK, all appears well.
5676 *
5677 * Now insert all the new relcache entries into the cache.
5678 */
5679 for (relno = 0; relno < num_rels; relno++)
5680 {
5681 RelationCacheInsert(rels[relno], false);
5682 }
5683
5684 pfree(rels);
5685 FreeFile(fp);
5686
5687 if (shared)
5688 criticalSharedRelcachesBuilt = true;
5689 else
5690 criticalRelcachesBuilt = true;
5691 return true;
5692
5693 /*
5694 * init file is broken, so do it the hard way. We don't bother trying to
5695 * free the clutter we just allocated; it's not in the relcache so it
5696 * won't hurt.
5697 */
5698read_failed:
5699 pfree(rels);
5700 FreeFile(fp);
5701
5702 return false;
5703}
5704
5705/*
5706 * Write out a new initialization file with the current contents
5707 * of the relcache (either shared rels or local rels, as indicated).
5708 */
5709static void
5710write_relcache_init_file(bool shared)
5711{
5712 FILE *fp;
5713 char tempfilename[MAXPGPATH];
5714 char finalfilename[MAXPGPATH];
5715 int magic;
5716 HASH_SEQ_STATUS status;
5717 RelIdCacheEnt *idhentry;
5718 int i;
5719
5720 /*
5721 * If we have already received any relcache inval events, there's no
5722 * chance of succeeding so we may as well skip the whole thing.
5723 */
5724 if (relcacheInvalsReceived != 0L)
5725 return;
5726
5727 /*
5728 * We must write a temporary file and rename it into place. Otherwise,
5729 * another backend starting at about the same time might crash trying to
5730 * read the partially-complete file.
5731 */
5732 if (shared)
5733 {
5734 snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5735 RELCACHE_INIT_FILENAME, MyProcPid);
5736 snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5737 RELCACHE_INIT_FILENAME);
5738 }
5739 else
5740 {
5741 snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5742 DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
5743 snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5744 DatabasePath, RELCACHE_INIT_FILENAME);
5745 }
5746
5747 unlink(tempfilename); /* in case it exists w/wrong permissions */
5748
5749 fp = AllocateFile(tempfilename, PG_BINARY_W);
5750 if (fp == NULL)
5751 {
5752 /*
5753 * We used to consider this a fatal error, but we might as well
5754 * continue with backend startup ...
5755 */
5756 ereport(WARNING,
5757 (errcode_for_file_access(),
5758 errmsg("could not create relation-cache initialization file \"%s\": %m",
5759 tempfilename),
5760 errdetail("Continuing anyway, but there's something wrong.")));
5761 return;
5762 }
5763
5764 /*
5765 * Write a magic number to serve as a file version identifier. We can
5766 * change the magic number whenever the relcache layout changes.
5767 */
5768 magic = RELCACHE_INIT_FILEMAGIC;
5769 if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5770 elog(FATAL, "could not write init file");
5771
5772 /*
5773 * Write all the appropriate reldescs (in no particular order).
5774 */
5775 hash_seq_init(&status, RelationIdCache);
5776
5777 while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5778 {
5779 Relation rel = idhentry->reldesc;
5780 Form_pg_class relform = rel->rd_rel;
5781
5782 /* ignore if not correct group */
5783 if (relform->relisshared != shared)
5784 continue;
5785
5786 /*
5787 * Ignore if not supposed to be in init file. We can allow any shared
5788 * relation that's been loaded so far to be in the shared init file,
5789 * but unshared relations must be ones that should be in the local
5790 * file per RelationIdIsInInitFile. (Note: if you want to change the
5791 * criterion for rels to be kept in the init file, see also inval.c.
5792 * The reason for filtering here is to be sure that we don't put
5793 * anything into the local init file for which a relcache inval would
5794 * not cause invalidation of that init file.)
5795 */
5796 if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5797 {
5798 /* Nailed rels had better get stored. */
5799 Assert(!rel->rd_isnailed);
5800 continue;
5801 }
5802
5803 /* first write the relcache entry proper */
5804 write_item(rel, sizeof(RelationData), fp);
5805
5806 /* next write the relation tuple form */
5807 write_item(relform, CLASS_TUPLE_SIZE, fp);
5808
5809 /* next, do all the attribute tuple form data entries */
5810 for (i = 0; i < relform->relnatts; i++)
5811 {
5812 write_item(TupleDescAttr(rel->rd_att, i),
5813 ATTRIBUTE_FIXED_PART_SIZE, fp);
5814 }
5815
5816 /* next, do the access method specific field */
5817 write_item(rel->rd_options,
5818 (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
5819 fp);
5820
5821 /*
5822 * If it's an index, there's more to do. Note we explicitly ignore
5823 * partitioned indexes here.
5824 */
5825 if (rel->rd_rel->relkind == RELKIND_INDEX)
5826 {
5827 /* write the pg_index tuple */
5828 /* we assume this was created by heap_copytuple! */
5829 write_item(rel->rd_indextuple,
5830 HEAPTUPLESIZE + rel->rd_indextuple->t_len,
5831 fp);
5832
5833 /* next, write the vector of opfamily OIDs */
5834 write_item(rel->rd_opfamily,
5835 relform->relnatts * sizeof(Oid),
5836 fp);
5837
5838 /* next, write the vector of opcintype OIDs */
5839 write_item(rel->rd_opcintype,
5840 relform->relnatts * sizeof(Oid),
5841 fp);
5842
5843 /* next, write the vector of support procedure OIDs */
5844 write_item(rel->rd_support,
5845 relform->relnatts * (rel->rd_indam->amsupport * sizeof(RegProcedure)),
5846 fp);
5847
5848 /* next, write the vector of collation OIDs */
5849 write_item(rel->rd_indcollation,
5850 relform->relnatts * sizeof(Oid),
5851 fp);
5852
5853 /* finally, write the vector of indoption values */
5854 write_item(rel->rd_indoption,
5855 relform->relnatts * sizeof(int16),
5856 fp);
5857 }
5858 }
5859
5860 if (FreeFile(fp))
5861 elog(FATAL, "could not write init file");
5862
5863 /*
5864 * Now we have to check whether the data we've so painstakingly
5865 * accumulated is already obsolete due to someone else's just-committed
5866 * catalog changes. If so, we just delete the temp file and leave it to
5867 * the next backend to try again. (Our own relcache entries will be
5868 * updated by SI message processing, but we can't be sure whether what we
5869 * wrote out was up-to-date.)
5870 *
5871 * This mustn't run concurrently with the code that unlinks an init file
5872 * and sends SI messages, so grab a serialization lock for the duration.
5873 */
5874 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5875
5876 /* Make sure we have seen all incoming SI messages */
5877 AcceptInvalidationMessages();
5878
5879 /*
5880 * If we have received any SI relcache invals since backend start, assume
5881 * we may have written out-of-date data.
5882 */
5883 if (relcacheInvalsReceived == 0L)
5884 {
5885 /*
5886 * OK, rename the temp file to its final name, deleting any
5887 * previously-existing init file.
5888 *
5889 * Note: a failure here is possible under Cygwin, if some other
5890 * backend is holding open an unlinked-but-not-yet-gone init file. So
5891 * treat this as a noncritical failure; just remove the useless temp
5892 * file on failure.
5893 */
5894 if (rename(tempfilename, finalfilename) < 0)
5895 unlink(tempfilename);
5896 }
5897 else
5898 {
5899 /* Delete the already-obsolete temp file */
5900 unlink(tempfilename);
5901 }
5902
5903 LWLockRelease(RelCacheInitLock);
5904}
5905
5906/* write a chunk of data preceded by its length */
5907static void
5908write_item(const void *data, Size len, FILE *fp)
5909{
5910 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
5911 elog(FATAL, "could not write init file");
5912 if (fwrite(data, 1, len, fp) != len)
5913 elog(FATAL, "could not write init file");
5914}
5915
5916/*
5917 * Determine whether a given relation (identified by OID) is one of the ones
5918 * we should store in a relcache init file.
5919 *
5920 * We must cache all nailed rels, and for efficiency we should cache every rel
5921 * that supports a syscache. The former set is almost but not quite a subset
5922 * of the latter. The special cases are relations where
5923 * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5924 * which do not support any syscache.
5925 */
5926bool
5927RelationIdIsInInitFile(Oid relationId)
5928{
5929 if (relationId == SharedSecLabelRelationId ||
5930 relationId == TriggerRelidNameIndexId ||
5931 relationId == DatabaseNameIndexId ||
5932 relationId == SharedSecLabelObjectIndexId)
5933 {
5934 /*
5935 * If this Assert fails, we don't need the applicable special case
5936 * anymore.
5937 */
5938 Assert(!RelationSupportsSysCache(relationId));
5939 return true;
5940 }
5941 return RelationSupportsSysCache(relationId);
5942}
5943
5944/*
5945 * Invalidate (remove) the init file during commit of a transaction that
5946 * changed one or more of the relation cache entries that are kept in the
5947 * local init file.
5948 *
5949 * To be safe against concurrent inspection or rewriting of the init file,
5950 * we must take RelCacheInitLock, then remove the old init file, then send
5951 * the SI messages that include relcache inval for such relations, and then
5952 * release RelCacheInitLock. This serializes the whole affair against
5953 * write_relcache_init_file, so that we can be sure that any other process
5954 * that's concurrently trying to create a new init file won't move an
5955 * already-stale version into place after we unlink. Also, because we unlink
5956 * before sending the SI messages, a backend that's currently starting cannot
5957 * read the now-obsolete init file and then miss the SI messages that will
5958 * force it to update its relcache entries. (This works because the backend
5959 * startup sequence gets into the sinval array before trying to load the init
5960 * file.)
5961 *
5962 * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
5963 * then release the lock in RelationCacheInitFilePostInvalidate. Caller must
5964 * send any pending SI messages between those calls.
5965 */
5966void
5967RelationCacheInitFilePreInvalidate(void)
5968{
5969 char localinitfname[MAXPGPATH];
5970 char sharedinitfname[MAXPGPATH];
5971
5972 if (DatabasePath)
5973 snprintf(localinitfname, sizeof(localinitfname), "%s/%s",
5974 DatabasePath, RELCACHE_INIT_FILENAME);
5975 snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s",
5976 RELCACHE_INIT_FILENAME);
5977
5978 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5979
5980 /*
5981 * The files might not be there if no backend has been started since the
5982 * last removal. But complain about failures other than ENOENT with
5983 * ERROR. Fortunately, it's not too late to abort the transaction if we
5984 * can't get rid of the would-be-obsolete init file.
5985 */
5986 if (DatabasePath)
5987 unlink_initfile(localinitfname, ERROR);
5988 unlink_initfile(sharedinitfname, ERROR);
5989}
5990
5991void
5992RelationCacheInitFilePostInvalidate(void)
5993{
5994 LWLockRelease(RelCacheInitLock);
5995}
5996
5997/*
5998 * Remove the init files during postmaster startup.
5999 *
6000 * We used to keep the init files across restarts, but that is unsafe in PITR
6001 * scenarios, and even in simple crash-recovery cases there are windows for
6002 * the init files to become out-of-sync with the database. So now we just
6003 * remove them during startup and expect the first backend launch to rebuild
6004 * them. Of course, this has to happen in each database of the cluster.
6005 */
6006void
6007RelationCacheInitFileRemove(void)
6008{
6009 const char *tblspcdir = "pg_tblspc";
6010 DIR *dir;
6011 struct dirent *de;
6012 char path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
6013
6014 snprintf(path, sizeof(path), "global/%s",
6015 RELCACHE_INIT_FILENAME);
6016 unlink_initfile(path, LOG);
6017
6018 /* Scan everything in the default tablespace */
6019 RelationCacheInitFileRemoveInDir("base");
6020
6021 /* Scan the tablespace link directory to find non-default tablespaces */
6022 dir = AllocateDir(tblspcdir);
6023
6024 while ((de = ReadDirExtended(dir, tblspcdir, LOG)) != NULL)
6025 {
6026 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
6027 {
6028 /* Scan the tablespace dir for per-database dirs */
6029 snprintf(path, sizeof(path), "%s/%s/%s",
6030 tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY);
6031 RelationCacheInitFileRemoveInDir(path);
6032 }
6033 }
6034
6035 FreeDir(dir);
6036}
6037
6038/* Process one per-tablespace directory for RelationCacheInitFileRemove */
6039static void
6040RelationCacheInitFileRemoveInDir(const char *tblspcpath)
6041{
6042 DIR *dir;
6043 struct dirent *de;
6044 char initfilename[MAXPGPATH * 2];
6045
6046 /* Scan the tablespace directory to find per-database directories */
6047 dir = AllocateDir(tblspcpath);
6048
6049 while ((de = ReadDirExtended(dir, tblspcpath, LOG)) != NULL)
6050 {
6051 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
6052 {
6053 /* Try to remove the init file in each database */
6054 snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
6055 tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
6056 unlink_initfile(initfilename, LOG);
6057 }
6058 }
6059
6060 FreeDir(dir);
6061}
6062
6063static void
6064unlink_initfile(const char *initfilename, int elevel)
6065{
6066 if (unlink(initfilename) < 0)
6067 {
6068 /* It might not be there, but log any error other than ENOENT */
6069 if (errno != ENOENT)
6070 ereport(elevel,
6071 (errcode_for_file_access(),
6072 errmsg("could not remove cache file \"%s\": %m",
6073 initfilename)));
6074 }
6075}
6076