relcache.c source code [PostgreSQL/src/backend/utils/cache/relcache.c]

1	/-------------------------------------------------------------------------*
2	*
3	* relcache.c
4	* POSTGRES relation descriptor cache code
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/utils/cache/relcache.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	/*
16	* INTERFACE ROUTINES
17	* RelationCacheInitialize - initialize relcache (to empty)
18	* RelationCacheInitializePhase2 - initialize shared-catalog entries
19	* RelationCacheInitializePhase3 - finish initializing relcache
20	* RelationIdGetRelation - get a reldesc by relation id
21	* RelationClose - close an open relation
22	*
23	* NOTES
24	* The following code contains many undocumented hacks. Please be
25	* careful....
26	*/
27	#include "postgres.h"
28
29	#include <sys/file.h>
30	#include <fcntl.h>
31	#include <unistd.h>
32
33	#include "access/htup_details.h"
34	#include "access/multixact.h"
35	#include "access/nbtree.h"
36	#include "access/reloptions.h"
37	#include "access/sysattr.h"
38	#include "access/table.h"
39	#include "access/tableam.h"
40	#include "access/tupdesc_details.h"
41	#include "access/xact.h"
42	#include "access/xlog.h"
43	#include "catalog/catalog.h"
44	#include "catalog/indexing.h"
45	#include "catalog/namespace.h"
46	#include "catalog/partition.h"
47	#include "catalog/pg_am.h"
48	#include "catalog/pg_amproc.h"
49	#include "catalog/pg_attrdef.h"
50	#include "catalog/pg_authid.h"
51	#include "catalog/pg_auth_members.h"
52	#include "catalog/pg_constraint.h"
53	#include "catalog/pg_database.h"
54	#include "catalog/pg_namespace.h"
55	#include "catalog/pg_opclass.h"
56	#include "catalog/pg_partitioned_table.h"
57	#include "catalog/pg_proc.h"
58	#include "catalog/pg_publication.h"
59	#include "catalog/pg_rewrite.h"
60	#include "catalog/pg_shseclabel.h"
61	#include "catalog/pg_statistic_ext.h"
62	#include "catalog/pg_subscription.h"
63	#include "catalog/pg_tablespace.h"
64	#include "catalog/pg_trigger.h"
65	#include "catalog/pg_type.h"
66	#include "catalog/schemapg.h"
67	#include "catalog/storage.h"
68	#include "commands/policy.h"
69	#include "commands/trigger.h"
70	#include "miscadmin.h"
71	#include "nodes/makefuncs.h"
72	#include "nodes/nodeFuncs.h"
73	#include "optimizer/optimizer.h"
74	#include "partitioning/partbounds.h"
75	#include "partitioning/partdesc.h"
76	#include "rewrite/rewriteDefine.h"
77	#include "rewrite/rowsecurity.h"
78	#include "storage/lmgr.h"
79	#include "storage/smgr.h"
80	#include "utils/array.h"
81	#include "utils/builtins.h"
82	#include "utils/datum.h"
83	#include "utils/fmgroids.h"
84	#include "utils/inval.h"
85	#include "utils/lsyscache.h"
86	#include "utils/memutils.h"
87	#include "utils/partcache.h"
88	#include "utils/relmapper.h"
89	#include "utils/resowner_private.h"
90	#include "utils/snapmgr.h"
91	#include "utils/syscache.h"
92
93
94	#define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
95
96	/*
97	* Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY:
98	* do so in clobber-cache builds but not otherwise. This choice can be
99	* overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0.
100	*/
101	#ifndef RECOVER_RELATION_BUILD_MEMORY
102	#if defined(CLOBBER_CACHE_ALWAYS) \|\| defined(CLOBBER_CACHE_RECURSIVELY)
103	#define RECOVER_RELATION_BUILD_MEMORY 1
104	#else
105	#define RECOVER_RELATION_BUILD_MEMORY 0
106	#endif
107	#endif
108
109	/*
110	* hardcoded tuple descriptors, contents generated by genbki.pl
111	*/
112	static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
113	static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
114	static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
115	static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
116	static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
117	static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
118	static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
119	static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
120	static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
121	static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
122
123	/*
124	* Hash tables that index the relation cache
125	*
126	* We used to index the cache by both name and OID, but now there
127	* is only an index by OID.
128	*/
129	typedef struct relidcacheent
130	{
131	Oid reloid;
132	Relation reldesc;
133	} RelIdCacheEnt;
134
135	static HTAB *RelationIdCache;
136
137	/*
138	* This flag is false until we have prepared the critical relcache entries
139	* that are needed to do indexscans on the tables read by relcache building.
140	*/
141	bool criticalRelcachesBuilt = false;
142
143	/*
144	* This flag is false until we have prepared the critical relcache entries
145	* for shared catalogs (which are the tables needed for login).
146	*/
147	bool criticalSharedRelcachesBuilt = false;
148
149	/*
150	* This counter counts relcache inval events received since backend startup
151	* (but only for rels that are actually in cache). Presently, we use it only
152	* to detect whether data about to be written by write_relcache_init_file()
153	* might already be obsolete.
154	*/
155	static long relcacheInvalsReceived = `0L`;
156
157	/*
158	* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
159	* cleanup work. This list intentionally has limited size; if it overflows,
160	* we fall back to scanning the whole hashtable. There is no value in a very
161	* large list because (1) at some point, a hash_seq_search scan is faster than
162	* retail lookups, and (2) the value of this is to reduce EOXact work for
163	* short transactions, which can't have dirtied all that many tables anyway.
164	* EOXactListAdd() does not bother to prevent duplicate list entries, so the
165	* cleanup processing must be idempotent.
166	*/
167	#define MAX_EOXACT_LIST 32
168	static Oid eoxact_list[MAX_EOXACT_LIST];
169	static int eoxact_list_len = `0`;
170	static bool eoxact_list_overflowed = false;
171
172	#define EOXactListAdd(rel) \
173	do { \
174	if (eoxact_list_len < MAX_EOXACT_LIST) \
175	eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
176	else \
177	eoxact_list_overflowed = true; \
178	} while (0)
179
180	/*
181	* EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
182	* cleanup work. The array expands as needed; there is no hashtable because
183	* we don't need to access individual items except at EOXact.
184	*/
185	static TupleDesc *EOXactTupleDescArray;
186	static int NextEOXactTupleDescNum = `0`;
187	static int EOXactTupleDescArrayLen = `0`;
188
189	/*
190	* macros to manipulate the lookup hashtable
191	*/
192	#define RelationCacheInsert(RELATION, replace_allowed) \
193	do { \
194	RelIdCacheEnt *hentry; bool found; \
195	hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
196	(void *) &((RELATION)->rd_id), \
197	HASH_ENTER, &found); \
198	if (found) \
199	{ \
200	/* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
201	Relation _old_rel = hentry->reldesc; \
202	Assert(replace_allowed); \
203	hentry->reldesc = (RELATION); \
204	if (RelationHasReferenceCountZero(_old_rel)) \
205	RelationDestroyRelation(_old_rel, false); \
206	else if (!IsBootstrapProcessingMode()) \
207	elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
208	RelationGetRelationName(_old_rel)); \
209	} \
210	else \
211	hentry->reldesc = (RELATION); \
212	} while(0)
213
214	#define RelationIdCacheLookup(ID, RELATION) \
215	do { \
216	RelIdCacheEnt *hentry; \
217	hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
218	(void *) &(ID), \
219	HASH_FIND, NULL); \
220	if (hentry) \
221	RELATION = hentry->reldesc; \
222	else \
223	RELATION = NULL; \
224	} while(0)
225
226	#define RelationCacheDelete(RELATION) \
227	do { \
228	RelIdCacheEnt *hentry; \
229	hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
230	(void *) &((RELATION)->rd_id), \
231	HASH_REMOVE, NULL); \
232	if (hentry == NULL) \
233	elog(WARNING, "failed to delete relcache entry for OID %u", \
234	(RELATION)->rd_id); \
235	} while(0)
236
237
238	/*
239	* Special cache for opclass-related information
240	*
241	* Note: only default support procs get cached, ie, those with
242	* lefttype = righttype = opcintype.
243	*/
244	typedef struct opclasscacheent
245	{
246	Oid opclassoid; / lookup key: OID of opclass /
247	bool valid; / set true after successful fill-in /
248	StrategyNumber numSupport; / max # of support procs (from pg_am) /
249	Oid opcfamily; / OID of opclass's family /
250	Oid opcintype; / OID of opclass's declared input type /
251	RegProcedure supportProcs; /* OIDs of support procedures /
252	} OpClassCacheEnt;
253
254	static HTAB *OpClassCache = NULL;
255
256
257	/ non-export function prototypes /
258
259	static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
260	static void RelationClearRelation(Relation relation, bool rebuild);
261
262	static void RelationReloadIndexInfo(Relation relation);
263	static void RelationReloadNailed(Relation relation);
264	static void RelationFlushRelation(Relation relation);
265	static void RememberToFreeTupleDescAtEOX(TupleDesc td);
266	static void AtEOXact_cleanup(Relation relation, bool isCommit);
267	static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
268	SubTransactionId mySubid, SubTransactionId parentSubid);
269	static bool load_relcache_init_file(bool shared);
270	static void write_relcache_init_file(bool shared);
271	static void write_item(const void data, Size len, FILE fp);
272
273	static void formrdesc(const char *relationName, Oid relationReltype,
274	bool isshared, int natts, const FormData_pg_attribute *attrs);
275
276	static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
277	static Relation AllocateRelationDesc(Form_pg_class relp);
278	static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
279	static void RelationBuildTupleDesc(Relation relation);
280	static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
281	static void RelationInitPhysicalAddr(Relation relation);
282	static void load_critical_index(Oid indexoid, Oid heapoid);
283	static TupleDesc GetPgClassDescriptor(void);
284	static TupleDesc GetPgIndexDescriptor(void);
285	static void AttrDefaultFetch(Relation relation);
286	static void CheckConstraintFetch(Relation relation);
287	static int CheckConstraintCmp(const void a, const* void *b);
288	static List insert_ordered_oid(List list, Oid datum);
289	static void InitIndexAmRoutine(Relation relation);
290	static void IndexSupportInitialize(oidvector *indclass,
291	RegProcedure *indexSupport,
292	Oid *opFamily,
293	Oid *opcInType,
294	StrategyNumber maxSupportNumber,
295	AttrNumber maxAttributeNumber);
296	static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
297	StrategyNumber numSupport);
298	static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
299	static void unlink_initfile(const char initfilename, int* elevel);
300
301
302	/*
303	* ScanPgRelation
304	*
305	* This is used by RelationBuildDesc to find a pg_class
306	* tuple matching targetRelId. The caller must hold at least
307	* AccessShareLock on the target relid to prevent concurrent-update
308	* scenarios; it isn't guaranteed that all scans used to build the
309	* relcache entry will use the same snapshot. If, for example,
310	* an attribute were to be added after scanning pg_class and before
311	* scanning pg_attribute, relnatts wouldn't match.
312	*
313	* NB: the returned tuple has been copied into palloc'd storage
314	* and must eventually be freed with heap_freetuple.
315	*/
316	static HeapTuple
317	ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
318	{
319	HeapTuple pg_class_tuple;
320	Relation pg_class_desc;
321	SysScanDesc pg_class_scan;
322	ScanKeyData key[`1`];
323	Snapshot snapshot;
324
325	/*
326	* If something goes wrong during backend startup, we might find ourselves
327	* trying to read pg_class before we've selected a database. That ain't
328	* gonna work, so bail out with a useful error message. If this happens,
329	* it probably means a relcache entry that needs to be nailed isn't.
330	*/
331	if (!OidIsValid(MyDatabaseId))
332	elog(FATAL, "cannot read pg_class without having selected a database");
333
334	/*
335	* form a scan key
336	*/
337	ScanKeyInit(&key[`0`],
338	Anum_pg_class_oid,
339	BTEqualStrategyNumber, F_OIDEQ,
340	ObjectIdGetDatum(targetRelId));
341
342	/*
343	* Open pg_class and fetch a tuple. Force heap scan if we haven't yet
344	* built the critical relcache entries (this includes initdb and startup
345	* without a pg_internal.init file). The caller can also force a heap
346	* scan by setting indexOK == false.
347	*/
348	pg_class_desc = table_open(RelationRelationId, AccessShareLock);
349
350	/*
351	* The caller might need a tuple that's newer than the one the historic
352	* snapshot; currently the only case requiring to do so is looking up the
353	* relfilenode of non mapped system relations during decoding.
354	*/
355	if (force_non_historic)
356	snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
357	else
358	snapshot = GetCatalogSnapshot(RelationRelationId);
359
360	pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
361	indexOK && criticalRelcachesBuilt,
362	snapshot,
363	`1`, key);
364
365	pg_class_tuple = systable_getnext(pg_class_scan);
366
367	/*
368	* Must copy tuple before releasing buffer.
369	*/
370	if (HeapTupleIsValid(pg_class_tuple))
371	pg_class_tuple = heap_copytuple(pg_class_tuple);
372
373	/ all done /
374	systable_endscan(pg_class_scan);
375	table_close(pg_class_desc, AccessShareLock);
376
377	return pg_class_tuple;
378	}
379
380	/*
381	* AllocateRelationDesc
382	*
383	* This is used to allocate memory for a new relation descriptor
384	* and initialize the rd_rel field from the given pg_class tuple.
385	*/
386	static Relation
387	AllocateRelationDesc(Form_pg_class relp)
388	{
389	Relation relation;
390	MemoryContext oldcxt;
391	Form_pg_class relationForm;
392
393	/ Relcache entries must live in CacheMemoryContext /
394	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
395
396	/*
397	* allocate and zero space for new relation descriptor
398	*/
399	relation = (Relation) palloc0(sizeof(RelationData));
400
401	/ make sure relation is marked as having no open file yet /
402	relation->rd_smgr = NULL;
403
404	/*
405	* Copy the relation tuple form
406	*
407	* We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
408	* variable-length fields (relacl, reloptions) are NOT stored in the
409	* relcache --- there'd be little point in it, since we don't copy the
410	* tuple's nulls bitmap and hence wouldn't know if the values are valid.
411	* Bottom line is that relacl cannot be retrieved from the relcache. Get
412	* it from the syscache if you need it. The same goes for the original
413	* form of reloptions (however, we do store the parsed form of reloptions
414	* in rd_options).
415	*/
416	relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
417
418	memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
419
420	/ initialize relation tuple form /
421	relation->rd_rel = relationForm;
422
423	/ and allocate attribute tuple form storage /
424	relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
425	/ which we mark as a reference-counted tupdesc /
426	relation->rd_att->tdrefcount = `1`;
427
428	MemoryContextSwitchTo(oldcxt);
429
430	return relation;
431	}
432
433	/*
434	* RelationParseRelOptions
435	* Convert pg_class.reloptions into pre-parsed rd_options
436	*
437	* tuple is the real pg_class tuple (not rd_rel!) for relation
438	*
439	* Note: rd_rel and (if an index) rd_indam must be valid already
440	*/
441	static void
442	RelationParseRelOptions(Relation relation, HeapTuple tuple)
443	{
444	bytea *options;
445	amoptions_function amoptsfn;
446
447	relation->rd_options = NULL;
448
449	/*
450	* Look up any AM-specific parse function; fall out if relkind should not
451	* have options.
452	*/
453	switch (relation->rd_rel->relkind)
454	{
455	case RELKIND_RELATION:
456	case RELKIND_TOASTVALUE:
457	case RELKIND_VIEW:
458	case RELKIND_MATVIEW:
459	case RELKIND_PARTITIONED_TABLE:
460	amoptsfn = NULL;
461	break;
462	case RELKIND_INDEX:
463	case RELKIND_PARTITIONED_INDEX:
464	amoptsfn = relation->rd_indam->amoptions;
465	break;
466	default:
467	return;
468	}
469
470	/*
471	* Fetch reloptions from tuple; have to use a hardwired descriptor because
472	* we might not have any other for pg_class yet (consider executing this
473	* code for pg_class itself)
474	*/
475	options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
476
477	/*
478	* Copy parsed data into CacheMemoryContext. To guard against the
479	* possibility of leaks in the reloptions code, we want to do the actual
480	* parsing in the caller's memory context and copy the results into
481	* CacheMemoryContext after the fact.
482	*/
483	if (options)
484	{
485	relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
486	VARSIZE(options));
487	memcpy(relation->rd_options, options, VARSIZE(options));
488	pfree(options);
489	}
490	}
491
492	/*
493	* RelationBuildTupleDesc
494	*
495	* Form the relation's tuple descriptor from information in
496	* the pg_attribute, pg_attrdef & pg_constraint system catalogs.
497	*/
498	static void
499	RelationBuildTupleDesc(Relation relation)
500	{
501	HeapTuple pg_attribute_tuple;
502	Relation pg_attribute_desc;
503	SysScanDesc pg_attribute_scan;
504	ScanKeyData skey[`2`];
505	int need;
506	TupleConstr *constr;
507	AttrDefault *attrdef = NULL;
508	AttrMissing *attrmiss = NULL;
509	int ndef = `0`;
510
511	/ copy some fields from pg_class row to rd_att /
512	relation->rd_att->tdtypeid = relation->rd_rel->reltype;
513	relation->rd_att->tdtypmod = -`1`; / unnecessary, but... /
514
515	constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
516	sizeof(TupleConstr));
517	constr->has_not_null = false;
518	constr->has_generated_stored = false;
519
520	/*
521	* Form a scan key that selects only user attributes (attnum > 0).
522	* (Eliminating system attribute rows at the index level is lots faster
523	* than fetching them.)
524	*/
525	ScanKeyInit(&skey[`0`],
526	Anum_pg_attribute_attrelid,
527	BTEqualStrategyNumber, F_OIDEQ,
528	ObjectIdGetDatum(RelationGetRelid(relation)));
529	ScanKeyInit(&skey[`1`],
530	Anum_pg_attribute_attnum,
531	BTGreaterStrategyNumber, F_INT2GT,
532	Int16GetDatum(`0`));
533
534	/*
535	* Open pg_attribute and begin a scan. Force heap scan if we haven't yet
536	* built the critical relcache entries (this includes initdb and startup
537	* without a pg_internal.init file).
538	*/
539	pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
540	pg_attribute_scan = systable_beginscan(pg_attribute_desc,
541	AttributeRelidNumIndexId,
542	criticalRelcachesBuilt,
543	NULL,
544	`2`, skey);
545
546	/*
547	* add attribute data to relation->rd_att
548	*/
549	need = RelationGetNumberOfAttributes(relation);
550
551	while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
552	{
553	Form_pg_attribute attp;
554	int attnum;
555
556	attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
557
558	attnum = attp->attnum;
559	if (attnum <= `0` \|\| attnum > RelationGetNumberOfAttributes(relation))
560	elog(ERROR, "invalid attribute number %d for %s",
561	attp->attnum, RelationGetRelationName(relation));
562
563
564	memcpy(TupleDescAttr(relation->rd_att, attnum - `1`),
565	attp,
566	ATTRIBUTE_FIXED_PART_SIZE);
567
568	/ Update constraint/default info /
569	if (attp->attnotnull)
570	constr->has_not_null = true;
571	if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
572	constr->has_generated_stored = true;
573
574	/ If the column has a default, fill it into the attrdef array /
575	if (attp->atthasdef)
576	{
577	if (attrdef == NULL)
578	attrdef = (AttrDefault *)
579	MemoryContextAllocZero(CacheMemoryContext,
580	RelationGetNumberOfAttributes(relation) *
581	sizeof(AttrDefault));
582	attrdef[ndef].adnum = attnum;
583	attrdef[ndef].adbin = NULL;
584
585	ndef++;
586	}
587
588	/ Likewise for a missing value /
589	if (attp->atthasmissing)
590	{
591	Datum missingval;
592	bool missingNull;
593
594	/ Do we have a missing value? /
595	missingval = heap_getattr(pg_attribute_tuple,
596	Anum_pg_attribute_attmissingval,
597	pg_attribute_desc->rd_att,
598	&missingNull);
599	if (!missingNull)
600	{
601	/ Yes, fetch from the array /
602	MemoryContext oldcxt;
603	bool is_null;
604	int one = `1`;
605	Datum missval;
606
607	if (attrmiss == NULL)
608	attrmiss = (AttrMissing *)
609	MemoryContextAllocZero(CacheMemoryContext,
610	relation->rd_rel->relnatts *
611	sizeof(AttrMissing));
612
613	missval = array_get_element(missingval,
614	`1`,
615	&one,
616	-`1`,
617	attp->attlen,
618	attp->attbyval,
619	attp->attalign,
620	&is_null);
621	Assert(!is_null);
622	if (attp->attbyval)
623	{
624	/ for copy by val just copy the datum direct /
625	attrmiss[attnum - `1`].am_value = missval;
626	}
627	else
628	{
629	/ otherwise copy in the correct context /
630	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
631	attrmiss[attnum - `1`].am_value = datumCopy(missval,
632	attp->attbyval,
633	attp->attlen);
634	MemoryContextSwitchTo(oldcxt);
635	}
636	attrmiss[attnum - `1`].am_present = true;
637	}
638	}
639	need--;
640	if (need == `0`)
641	break;
642	}
643
644	/*
645	* end the scan and close the attribute relation
646	*/
647	systable_endscan(pg_attribute_scan);
648	table_close(pg_attribute_desc, AccessShareLock);
649
650	if (need != `0`)
651	elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
652	need, RelationGetRelid(relation));
653
654	/*
655	* The attcacheoff values we read from pg_attribute should all be -1
656	* ("unknown"). Verify this if assert checking is on. They will be
657	* computed when and if needed during tuple access.
658	*/
659	#ifdef USE_ASSERT_CHECKING
660	{
661	int i;
662
663	for (i = `0`; i < RelationGetNumberOfAttributes(relation); i++)
664	Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -`1`);
665	}
666	#endif
667
668	/*
669	* However, we can easily set the attcacheoff value for the first
670	* attribute: it must be zero. This eliminates the need for special cases
671	* for attnum=1 that used to exist in fastgetattr() and index_getattr().
672	*/
673	if (RelationGetNumberOfAttributes(relation) > `0`)
674	TupleDescAttr(relation->rd_att, `0`)->attcacheoff = `0`;
675
676	/*
677	* Set up constraint/default info
678	*/
679	if (constr->has_not_null \|\| ndef > `0` \|\|
680	attrmiss \|\| relation->rd_rel->relchecks)
681	{
682	relation->rd_att->constr = constr;
683
684	if (ndef > `0`) / DEFAULTs /
685	{
686	if (ndef < RelationGetNumberOfAttributes(relation))
687	constr->defval = (AttrDefault *)
688	repalloc(attrdef, ndef * sizeof(AttrDefault));
689	else
690	constr->defval = attrdef;
691	constr->num_defval = ndef;
692	AttrDefaultFetch(relation);
693	}
694	else
695	constr->num_defval = `0`;
696
697	constr->missing = attrmiss;
698
699	if (relation->rd_rel->relchecks > `0`) / CHECKs /
700	{
701	constr->num_check = relation->rd_rel->relchecks;
702	constr->check = (ConstrCheck *)
703	MemoryContextAllocZero(CacheMemoryContext,
704	constr->num_check * sizeof(ConstrCheck));
705	CheckConstraintFetch(relation);
706	}
707	else
708	constr->num_check = `0`;
709	}
710	else
711	{
712	pfree(constr);
713	relation->rd_att->constr = NULL;
714	}
715	}
716
717	/*
718	* RelationBuildRuleLock
719	*
720	* Form the relation's rewrite rules from information in
721	* the pg_rewrite system catalog.
722	*
723	* Note: The rule parsetrees are potentially very complex node structures.
724	* To allow these trees to be freed when the relcache entry is flushed,
725	* we make a private memory context to hold the RuleLock information for
726	* each relcache entry that has associated rules. The context is used
727	* just for rule info, not for any other subsidiary data of the relcache
728	* entry, because that keeps the update logic in RelationClearRelation()
729	* manageable. The other subsidiary data structures are simple enough
730	* to be easy to free explicitly, anyway.
731	*/
732	static void
733	RelationBuildRuleLock(Relation relation)
734	{
735	MemoryContext rulescxt;
736	MemoryContext oldcxt;
737	HeapTuple rewrite_tuple;
738	Relation rewrite_desc;
739	TupleDesc rewrite_tupdesc;
740	SysScanDesc rewrite_scan;
741	ScanKeyData key;
742	RuleLock *rulelock;
743	int numlocks;
744	RewriteRule **rules;
745	int maxlocks;
746
747	/*
748	* Make the private context. Assume it'll not contain much data.
749	*/
750	rulescxt = AllocSetContextCreate(CacheMemoryContext,
751	"relation rules",
752	ALLOCSET_SMALL_SIZES);
753	relation->rd_rulescxt = rulescxt;
754	MemoryContextCopyAndSetIdentifier(rulescxt,
755	RelationGetRelationName(relation));
756
757	/*
758	* allocate an array to hold the rewrite rules (the array is extended if
759	* necessary)
760	*/
761	maxlocks = `4`;
762	rules = (RewriteRule **)
763	MemoryContextAlloc(rulescxt, sizeof(RewriteRule ) maxlocks);
764	numlocks = `0`;
765
766	/*
767	* form a scan key
768	*/
769	ScanKeyInit(&key,
770	Anum_pg_rewrite_ev_class,
771	BTEqualStrategyNumber, F_OIDEQ,
772	ObjectIdGetDatum(RelationGetRelid(relation)));
773
774	/*
775	* open pg_rewrite and begin a scan
776	*
777	* Note: since we scan the rules using RewriteRelRulenameIndexId, we will
778	* be reading the rules in name order, except possibly during
779	* emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
780	* ensures that rules will be fired in name order.
781	*/
782	rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
783	rewrite_tupdesc = RelationGetDescr(rewrite_desc);
784	rewrite_scan = systable_beginscan(rewrite_desc,
785	RewriteRelRulenameIndexId,
786	true, NULL,
787	`1`, &key);
788
789	while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
790	{
791	Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
792	bool isnull;
793	Datum rule_datum;
794	char *rule_str;
795	RewriteRule *rule;
796
797	rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
798	sizeof(RewriteRule));
799
800	rule->ruleId = rewrite_form->oid;
801
802	rule->event = rewrite_form->ev_type - `'0'`;
803	rule->enabled = rewrite_form->ev_enabled;
804	rule->isInstead = rewrite_form->is_instead;
805
806	/*
807	* Must use heap_getattr to fetch ev_action and ev_qual. Also, the
808	* rule strings are often large enough to be toasted. To avoid
809	* leaking memory in the caller's context, do the detoasting here so
810	* we can free the detoasted version.
811	*/
812	rule_datum = heap_getattr(rewrite_tuple,
813	Anum_pg_rewrite_ev_action,
814	rewrite_tupdesc,
815	&isnull);
816	Assert(!isnull);
817	rule_str = TextDatumGetCString(rule_datum);
818	oldcxt = MemoryContextSwitchTo(rulescxt);
819	rule->actions = (List *) stringToNode(rule_str);
820	MemoryContextSwitchTo(oldcxt);
821	pfree(rule_str);
822
823	rule_datum = heap_getattr(rewrite_tuple,
824	Anum_pg_rewrite_ev_qual,
825	rewrite_tupdesc,
826	&isnull);
827	Assert(!isnull);
828	rule_str = TextDatumGetCString(rule_datum);
829	oldcxt = MemoryContextSwitchTo(rulescxt);
830	rule->qual = (Node *) stringToNode(rule_str);
831	MemoryContextSwitchTo(oldcxt);
832	pfree(rule_str);
833
834	/*
835	* We want the rule's table references to be checked as though by the
836	* table owner, not the user referencing the rule. Therefore, scan
837	* through the rule's actions and set the checkAsUser field on all
838	* rtable entries. We have to look at the qual as well, in case it
839	* contains sublinks.
840	*
841	* The reason for doing this when the rule is loaded, rather than when
842	* it is stored, is that otherwise ALTER TABLE OWNER would have to
843	* grovel through stored rules to update checkAsUser fields. Scanning
844	* the rule tree during load is relatively cheap (compared to
845	* constructing it in the first place), so we do it here.
846	*/
847	setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
848	setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
849
850	if (numlocks >= maxlocks)
851	{
852	maxlocks *= `2`;
853	rules = (RewriteRule **)
854	repalloc(rules, sizeof(RewriteRule ) maxlocks);
855	}
856	rules[numlocks++] = rule;
857	}
858
859	/*
860	* end the scan and close the attribute relation
861	*/
862	systable_endscan(rewrite_scan);
863	table_close(rewrite_desc, AccessShareLock);
864
865	/*
866	* there might not be any rules (if relhasrules is out-of-date)
867	*/
868	if (numlocks == `0`)
869	{
870	relation->rd_rules = NULL;
871	relation->rd_rulescxt = NULL;
872	MemoryContextDelete(rulescxt);
873	return;
874	}
875
876	/*
877	* form a RuleLock and insert into relation
878	*/
879	rulelock = (RuleLock ) MemoryContextAlloc(rulescxt, sizeof*(RuleLock));
880	rulelock->numLocks = numlocks;
881	rulelock->rules = rules;
882
883	relation->rd_rules = rulelock;
884	}
885
886	/*
887	* equalRuleLocks
888	*
889	* Determine whether two RuleLocks are equivalent
890	*
891	* Probably this should be in the rules code someplace...
892	*/
893	static bool
894	equalRuleLocks(RuleLock rlock1, RuleLock rlock2)
895	{
896	int i;
897
898	/*
899	* As of 7.3 we assume the rule ordering is repeatable, because
900	* RelationBuildRuleLock should read 'em in a consistent order. So just
901	* compare corresponding slots.
902	*/
903	if (rlock1 != NULL)
904	{
905	if (rlock2 == NULL)
906	return false;
907	if (rlock1->numLocks != rlock2->numLocks)
908	return false;
909	for (i = `0`; i < rlock1->numLocks; i++)
910	{
911	RewriteRule *rule1 = rlock1->rules[i];
912	RewriteRule *rule2 = rlock2->rules[i];
913
914	if (rule1->ruleId != rule2->ruleId)
915	return false;
916	if (rule1->event != rule2->event)
917	return false;
918	if (rule1->enabled != rule2->enabled)
919	return false;
920	if (rule1->isInstead != rule2->isInstead)
921	return false;
922	if (!equal(rule1->qual, rule2->qual))
923	return false;
924	if (!equal(rule1->actions, rule2->actions))
925	return false;
926	}
927	}
928	else if (rlock2 != NULL)
929	return false;
930	return true;
931	}
932
933	/*
934	* equalPolicy
935	*
936	* Determine whether two policies are equivalent
937	*/
938	static bool
939	equalPolicy(RowSecurityPolicy policy1, RowSecurityPolicy policy2)
940	{
941	int i;
942	Oid *r1,
943	*r2;
944
945	if (policy1 != NULL)
946	{
947	if (policy2 == NULL)
948	return false;
949
950	if (policy1->polcmd != policy2->polcmd)
951	return false;
952	if (policy1->hassublinks != policy2->hassublinks)
953	return false;
954	if (strcmp(policy1->policy_name, policy2->policy_name) != `0`)
955	return false;
956	if (ARR_DIMS(policy1->roles)[`0`] != ARR_DIMS(policy2->roles)[`0`])
957	return false;
958
959	r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
960	r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
961
962	for (i = `0`; i < ARR_DIMS(policy1->roles)[`0`]; i++)
963	{
964	if (r1[i] != r2[i])
965	return false;
966	}
967
968	if (!equal(policy1->qual, policy2->qual))
969	return false;
970	if (!equal(policy1->with_check_qual, policy2->with_check_qual))
971	return false;
972	}
973	else if (policy2 != NULL)
974	return false;
975
976	return true;
977	}
978
979	/*
980	* equalRSDesc
981	*
982	* Determine whether two RowSecurityDesc's are equivalent
983	*/
984	static bool
985	equalRSDesc(RowSecurityDesc rsdesc1, RowSecurityDesc rsdesc2)
986	{
987	ListCell *lc,
988	*rc;
989
990	if (rsdesc1 == NULL && rsdesc2 == NULL)
991	return true;
992
993	if ((rsdesc1 != NULL && rsdesc2 == NULL) \|\|
994	(rsdesc1 == NULL && rsdesc2 != NULL))
995	return false;
996
997	if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
998	return false;
999
1000	/ RelationBuildRowSecurity should build policies in order /
1001	forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1002	{
1003	RowSecurityPolicy l = (RowSecurityPolicy ) lfirst(lc);
1004	RowSecurityPolicy r = (RowSecurityPolicy ) lfirst(rc);
1005
1006	if (!equalPolicy(l, r))
1007	return false;
1008	}
1009
1010	return true;
1011	}
1012
1013	/*
1014	* RelationBuildDesc
1015	*
1016	* Build a relation descriptor. The caller must hold at least
1017	* AccessShareLock on the target relid.
1018	*
1019	* The new descriptor is inserted into the hash table if insertIt is true.
1020	*
1021	* Returns NULL if no pg_class row could be found for the given relid
1022	* (suggesting we are trying to access a just-deleted relation).
1023	* Any other error is reported via elog.
1024	*/
1025	static Relation
1026	RelationBuildDesc(Oid targetRelId, bool insertIt)
1027	{
1028	Relation relation;
1029	Oid relid;
1030	HeapTuple pg_class_tuple;
1031	Form_pg_class relp;
1032
1033	/*
1034	* This function and its subroutines can allocate a good deal of transient
1035	* data in CurrentMemoryContext. Traditionally we've just leaked that
1036	* data, reasoning that the caller's context is at worst of transaction
1037	* scope, and relcache loads shouldn't happen so often that it's essential
1038	* to recover transient data before end of statement/transaction. However
1039	* that's definitely not true in clobber-cache test builds, and perhaps
1040	* it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not
1041	* zero, arrange to allocate the junk in a temporary context that we'll
1042	* free before returning. Make it a child of caller's context so that it
1043	* will get cleaned up appropriately if we error out partway through.
1044	*/
1045	#if RECOVER_RELATION_BUILD_MEMORY
1046	MemoryContext tmpcxt;
1047	MemoryContext oldcxt;
1048
1049	tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
1050	"RelationBuildDesc workspace",
1051	ALLOCSET_DEFAULT_SIZES);
1052	oldcxt = MemoryContextSwitchTo(tmpcxt);
1053	#endif
1054
1055	/*
1056	* find the tuple in pg_class corresponding to the given relation id
1057	*/
1058	pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1059
1060	/*
1061	* if no such tuple exists, return NULL
1062	*/
1063	if (!HeapTupleIsValid(pg_class_tuple))
1064	{
1065	#if RECOVER_RELATION_BUILD_MEMORY
1066	/ Return to caller's context, and blow away the temporary context /
1067	MemoryContextSwitchTo(oldcxt);
1068	MemoryContextDelete(tmpcxt);
1069	#endif
1070	return NULL;
1071	}
1072
1073	/*
1074	* get information from the pg_class_tuple
1075	*/
1076	relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1077	relid = relp->oid;
1078	Assert(relid == targetRelId);
1079
1080	/*
1081	* allocate storage for the relation descriptor, and copy pg_class_tuple
1082	* to relation->rd_rel.
1083	*/
1084	relation = AllocateRelationDesc(relp);
1085
1086	/*
1087	* initialize the relation's relation id (relation->rd_id)
1088	*/
1089	RelationGetRelid(relation) = relid;
1090
1091	/*
1092	* normal relations are not nailed into the cache; nor can a pre-existing
1093	* relation be new. It could be temp though. (Actually, it could be new
1094	* too, but it's okay to forget that fact if forced to flush the entry.)
1095	*/
1096	relation->rd_refcnt = `0`;
1097	relation->rd_isnailed = false;
1098	relation->rd_createSubid = InvalidSubTransactionId;
1099	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1100	switch (relation->rd_rel->relpersistence)
1101	{
1102	case RELPERSISTENCE_UNLOGGED:
1103	case RELPERSISTENCE_PERMANENT:
1104	relation->rd_backend = InvalidBackendId;
1105	relation->rd_islocaltemp = false;
1106	break;
1107	case RELPERSISTENCE_TEMP:
1108	if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1109	{
1110	relation->rd_backend = BackendIdForTempRelations();
1111	relation->rd_islocaltemp = true;
1112	}
1113	else
1114	{
1115	/*
1116	* If it's a temp table, but not one of ours, we have to use
1117	* the slow, grotty method to figure out the owning backend.
1118	*
1119	* Note: it's possible that rd_backend gets set to MyBackendId
1120	* here, in case we are looking at a pg_class entry left over
1121	* from a crashed backend that coincidentally had the same
1122	* BackendId we're using. We should not consider such a
1123	* table to be "ours"; this is why we need the separate
1124	* rd_islocaltemp flag. The pg_class entry will get flushed
1125	* if/when we clean out the corresponding temp table namespace
1126	* in preparation for using it.
1127	*/
1128	relation->rd_backend =
1129	GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1130	Assert(relation->rd_backend != InvalidBackendId);
1131	relation->rd_islocaltemp = false;
1132	}
1133	break;
1134	default:
1135	elog(ERROR, "invalid relpersistence: %c",
1136	relation->rd_rel->relpersistence);
1137	break;
1138	}
1139
1140	/*
1141	* initialize the tuple descriptor (relation->rd_att).
1142	*/
1143	RelationBuildTupleDesc(relation);
1144
1145	/*
1146	* Fetch rules and triggers that affect this relation
1147	*/
1148	if (relation->rd_rel->relhasrules)
1149	RelationBuildRuleLock(relation);
1150	else
1151	{
1152	relation->rd_rules = NULL;
1153	relation->rd_rulescxt = NULL;
1154	}
1155
1156	if (relation->rd_rel->relhastriggers)
1157	RelationBuildTriggers(relation);
1158	else
1159	relation->trigdesc = NULL;
1160
1161	if (relation->rd_rel->relrowsecurity)
1162	RelationBuildRowSecurity(relation);
1163	else
1164	relation->rd_rsdesc = NULL;
1165
1166	/ foreign key data is not loaded till asked for /
1167	relation->rd_fkeylist = NIL;
1168	relation->rd_fkeyvalid = false;
1169
1170	/ if a partitioned table, initialize key and partition descriptor info /
1171	if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1172	{
1173	RelationBuildPartitionKey(relation);
1174	RelationBuildPartitionDesc(relation);
1175	}
1176	else
1177	{
1178	relation->rd_partkey = NULL;
1179	relation->rd_partkeycxt = NULL;
1180	relation->rd_partdesc = NULL;
1181	relation->rd_pdcxt = NULL;
1182	}
1183	/ ... but partcheck is not loaded till asked for /
1184	relation->rd_partcheck = NIL;
1185	relation->rd_partcheckvalid = false;
1186	relation->rd_partcheckcxt = NULL;
1187
1188	/*
1189	* initialize access method information
1190	*/
1191	switch (relation->rd_rel->relkind)
1192	{
1193	case RELKIND_INDEX:
1194	case RELKIND_PARTITIONED_INDEX:
1195	Assert(relation->rd_rel->relam != InvalidOid);
1196	RelationInitIndexAccessInfo(relation);
1197	break;
1198	case RELKIND_RELATION:
1199	case RELKIND_TOASTVALUE:
1200	case RELKIND_MATVIEW:
1201	Assert(relation->rd_rel->relam != InvalidOid);
1202	RelationInitTableAccessMethod(relation);
1203	break;
1204	case RELKIND_SEQUENCE:
1205	Assert(relation->rd_rel->relam == InvalidOid);
1206	RelationInitTableAccessMethod(relation);
1207	break;
1208	case RELKIND_VIEW:
1209	case RELKIND_COMPOSITE_TYPE:
1210	case RELKIND_FOREIGN_TABLE:
1211	case RELKIND_PARTITIONED_TABLE:
1212	Assert(relation->rd_rel->relam == InvalidOid);
1213	break;
1214	}
1215
1216	/ extract reloptions if any /
1217	RelationParseRelOptions(relation, pg_class_tuple);
1218
1219	/*
1220	* initialize the relation lock manager information
1221	*/
1222	RelationInitLockInfo(relation); / see lmgr.c /
1223
1224	/*
1225	* initialize physical addressing information for the relation
1226	*/
1227	RelationInitPhysicalAddr(relation);
1228
1229	/ make sure relation is marked as having no open file yet /
1230	relation->rd_smgr = NULL;
1231
1232	/*
1233	* now we can free the memory allocated for pg_class_tuple
1234	*/
1235	heap_freetuple(pg_class_tuple);
1236
1237	/*
1238	* Insert newly created relation into relcache hash table, if requested.
1239	*
1240	* There is one scenario in which we might find a hashtable entry already
1241	* present, even though our caller failed to find it: if the relation is a
1242	* system catalog or index that's used during relcache load, we might have
1243	* recursively created the same relcache entry during the preceding steps.
1244	* So allow RelationCacheInsert to delete any already-present relcache
1245	* entry for the same OID. The already-present entry should have refcount
1246	* zero (else somebody forgot to close it); in the event that it doesn't,
1247	* we'll elog a WARNING and leak the already-present entry.
1248	*/
1249	if (insertIt)
1250	RelationCacheInsert(relation, true);
1251
1252	/ It's fully valid /
1253	relation->rd_isvalid = true;
1254
1255	#if RECOVER_RELATION_BUILD_MEMORY
1256	/ Return to caller's context, and blow away the temporary context /
1257	MemoryContextSwitchTo(oldcxt);
1258	MemoryContextDelete(tmpcxt);
1259	#endif
1260
1261	return relation;
1262	}
1263
1264	/*
1265	* Initialize the physical addressing info (RelFileNode) for a relcache entry
1266	*
1267	* Note: at the physical level, relations in the pg_global tablespace must
1268	* be treated as shared, even if relisshared isn't set. Hence we do not
1269	* look at relisshared here.
1270	*/
1271	static void
1272	RelationInitPhysicalAddr(Relation relation)
1273	{
1274	/ these relations kinds never have storage /
1275	if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1276	return;
1277
1278	if (relation->rd_rel->reltablespace)
1279	relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1280	else
1281	relation->rd_node.spcNode = MyDatabaseTableSpace;
1282	if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1283	relation->rd_node.dbNode = InvalidOid;
1284	else
1285	relation->rd_node.dbNode = MyDatabaseId;
1286
1287	if (relation->rd_rel->relfilenode)
1288	{
1289	/*
1290	* Even if we are using a decoding snapshot that doesn't represent the
1291	* current state of the catalog we need to make sure the filenode
1292	* points to the current file since the older file will be gone (or
1293	* truncated). The new file will still contain older rows so lookups
1294	* in them will work correctly. This wouldn't work correctly if
1295	* rewrites were allowed to change the schema in an incompatible way,
1296	* but those are prevented both on catalog tables and on user tables
1297	* declared as additional catalog tables.
1298	*/
1299	if (HistoricSnapshotActive()
1300	&& RelationIsAccessibleInLogicalDecoding(relation)
1301	&& IsTransactionState())
1302	{
1303	HeapTuple phys_tuple;
1304	Form_pg_class physrel;
1305
1306	phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1307	RelationGetRelid(relation) != ClassOidIndexId,
1308	true);
1309	if (!HeapTupleIsValid(phys_tuple))
1310	elog(ERROR, "could not find pg_class entry for %u",
1311	RelationGetRelid(relation));
1312	physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1313
1314	relation->rd_rel->reltablespace = physrel->reltablespace;
1315	relation->rd_rel->relfilenode = physrel->relfilenode;
1316	heap_freetuple(phys_tuple);
1317	}
1318
1319	relation->rd_node.relNode = relation->rd_rel->relfilenode;
1320	}
1321	else
1322	{
1323	/ Consult the relation mapper /
1324	relation->rd_node.relNode =
1325	RelationMapOidToFilenode(relation->rd_id,
1326	relation->rd_rel->relisshared);
1327	if (!OidIsValid(relation->rd_node.relNode))
1328	elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1329	RelationGetRelationName(relation), relation->rd_id);
1330	}
1331	}
1332
1333	/*
1334	* Fill in the IndexAmRoutine for an index relation.
1335	*
1336	* relation's rd_amhandler and rd_indexcxt must be valid already.
1337	*/
1338	static void
1339	InitIndexAmRoutine(Relation relation)
1340	{
1341	IndexAmRoutine *cached,
1342	*tmp;
1343
1344	/*
1345	* Call the amhandler in current, short-lived memory context, just in case
1346	* it leaks anything (it probably won't, but let's be paranoid).
1347	*/
1348	tmp = GetIndexAmRoutine(relation->rd_amhandler);
1349
1350	/ OK, now transfer the data into relation's rd_indexcxt. /
1351	cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1352	sizeof(IndexAmRoutine));
1353	memcpy(cached, tmp, sizeof(IndexAmRoutine));
1354	relation->rd_indam = cached;
1355
1356	pfree(tmp);
1357	}
1358
1359	/*
1360	* Initialize index-access-method support data for an index relation
1361	*/
1362	void
1363	RelationInitIndexAccessInfo(Relation relation)
1364	{
1365	HeapTuple tuple;
1366	Form_pg_am aform;
1367	Datum indcollDatum;
1368	Datum indclassDatum;
1369	Datum indoptionDatum;
1370	bool isnull;
1371	oidvector *indcoll;
1372	oidvector *indclass;
1373	int2vector *indoption;
1374	MemoryContext indexcxt;
1375	MemoryContext oldcontext;
1376	int indnatts;
1377	int indnkeyatts;
1378	uint16 amsupport;
1379
1380	/*
1381	* Make a copy of the pg_index entry for the index. Since pg_index
1382	* contains variable-length and possibly-null fields, we have to do this
1383	* honestly rather than just treating it as a Form_pg_index struct.
1384	*/
1385	tuple = SearchSysCache1(INDEXRELID,
1386	ObjectIdGetDatum(RelationGetRelid(relation)));
1387	if (!HeapTupleIsValid(tuple))
1388	elog(ERROR, "cache lookup failed for index %u",
1389	RelationGetRelid(relation));
1390	oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1391	relation->rd_indextuple = heap_copytuple(tuple);
1392	relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1393	MemoryContextSwitchTo(oldcontext);
1394	ReleaseSysCache(tuple);
1395
1396	/*
1397	* Look up the index's access method, save the OID of its handler function
1398	*/
1399	tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1400	if (!HeapTupleIsValid(tuple))
1401	elog(ERROR, "cache lookup failed for access method %u",
1402	relation->rd_rel->relam);
1403	aform = (Form_pg_am) GETSTRUCT(tuple);
1404	relation->rd_amhandler = aform->amhandler;
1405	ReleaseSysCache(tuple);
1406
1407	indnatts = RelationGetNumberOfAttributes(relation);
1408	if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1409	elog(ERROR, "relnatts disagrees with indnatts for index %u",
1410	RelationGetRelid(relation));
1411	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1412
1413	/*
1414	* Make the private context to hold index access info. The reason we need
1415	* a context, and not just a couple of pallocs, is so that we won't leak
1416	* any subsidiary info attached to fmgr lookup records.
1417	*/
1418	indexcxt = AllocSetContextCreate(CacheMemoryContext,
1419	"index info",
1420	ALLOCSET_SMALL_SIZES);
1421	relation->rd_indexcxt = indexcxt;
1422	MemoryContextCopyAndSetIdentifier(indexcxt,
1423	RelationGetRelationName(relation));
1424
1425	/*
1426	* Now we can fetch the index AM's API struct
1427	*/
1428	InitIndexAmRoutine(relation);
1429
1430	/*
1431	* Allocate arrays to hold data. Opclasses are not used for included
1432	* columns, so allocate them for indnkeyatts only.
1433	*/
1434	relation->rd_opfamily = (Oid *)
1435	MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1436	relation->rd_opcintype = (Oid *)
1437	MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1438
1439	amsupport = relation->rd_indam->amsupport;
1440	if (amsupport > `0`)
1441	{
1442	int nsupport = indnatts * amsupport;
1443
1444	relation->rd_support = (RegProcedure *)
1445	MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1446	relation->rd_supportinfo = (FmgrInfo *)
1447	MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1448	}
1449	else
1450	{
1451	relation->rd_support = NULL;
1452	relation->rd_supportinfo = NULL;
1453	}
1454
1455	relation->rd_indcollation = (Oid *)
1456	MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1457
1458	relation->rd_indoption = (int16 *)
1459	MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1460
1461	/*
1462	* indcollation cannot be referenced directly through the C struct,
1463	* because it comes after the variable-width indkey field. Must extract
1464	* the datum the hard way...
1465	*/
1466	indcollDatum = fastgetattr(relation->rd_indextuple,
1467	Anum_pg_index_indcollation,
1468	GetPgIndexDescriptor(),
1469	&isnull);
1470	Assert(!isnull);
1471	indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1472	memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1473
1474	/*
1475	* indclass cannot be referenced directly through the C struct, because it
1476	* comes after the variable-width indkey field. Must extract the datum
1477	* the hard way...
1478	*/
1479	indclassDatum = fastgetattr(relation->rd_indextuple,
1480	Anum_pg_index_indclass,
1481	GetPgIndexDescriptor(),
1482	&isnull);
1483	Assert(!isnull);
1484	indclass = (oidvector *) DatumGetPointer(indclassDatum);
1485
1486	/*
1487	* Fill the support procedure OID array, as well as the info about
1488	* opfamilies and opclass input types. (aminfo and supportinfo are left
1489	* as zeroes, and are filled on-the-fly when used)
1490	*/
1491	IndexSupportInitialize(indclass, relation->rd_support,
1492	relation->rd_opfamily, relation->rd_opcintype,
1493	amsupport, indnkeyatts);
1494
1495	/*
1496	* Similarly extract indoption and copy it to the cache entry
1497	*/
1498	indoptionDatum = fastgetattr(relation->rd_indextuple,
1499	Anum_pg_index_indoption,
1500	GetPgIndexDescriptor(),
1501	&isnull);
1502	Assert(!isnull);
1503	indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1504	memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1505
1506	/*
1507	* expressions, predicate, exclusion caches will be filled later
1508	*/
1509	relation->rd_indexprs = NIL;
1510	relation->rd_indpred = NIL;
1511	relation->rd_exclops = NULL;
1512	relation->rd_exclprocs = NULL;
1513	relation->rd_exclstrats = NULL;
1514	relation->rd_amcache = NULL;
1515	}
1516
1517	/*
1518	* IndexSupportInitialize
1519	* Initializes an index's cached opclass information,
1520	* given the index's pg_index.indclass entry.
1521	*
1522	* Data is returned into indexSupport, opFamily, and *opcInType,
1523	* which are arrays allocated by the caller.
1524	*
1525	* The caller also passes maxSupportNumber and maxAttributeNumber, since these
1526	* indicate the size of the arrays it has allocated --- but in practice these
1527	* numbers must always match those obtainable from the system catalog entries
1528	* for the index and access method.
1529	*/
1530	static void
1531	IndexSupportInitialize(oidvector *indclass,
1532	RegProcedure *indexSupport,
1533	Oid *opFamily,
1534	Oid *opcInType,
1535	StrategyNumber maxSupportNumber,
1536	AttrNumber maxAttributeNumber)
1537	{
1538	int attIndex;
1539
1540	for (attIndex = `0`; attIndex < maxAttributeNumber; attIndex++)
1541	{
1542	OpClassCacheEnt *opcentry;
1543
1544	if (!OidIsValid(indclass->values[attIndex]))
1545	elog(ERROR, "bogus pg_index tuple");
1546
1547	/ look up the info for this opclass, using a cache /
1548	opcentry = LookupOpclassInfo(indclass->values[attIndex],
1549	maxSupportNumber);
1550
1551	/ copy cached data into relcache entry /
1552	opFamily[attIndex] = opcentry->opcfamily;
1553	opcInType[attIndex] = opcentry->opcintype;
1554	if (maxSupportNumber > `0`)
1555	memcpy(&indexSupport[attIndex * maxSupportNumber],
1556	opcentry->supportProcs,
1557	maxSupportNumber * sizeof(RegProcedure));
1558	}
1559	}
1560
1561	/*
1562	* LookupOpclassInfo
1563	*
1564	* This routine maintains a per-opclass cache of the information needed
1565	* by IndexSupportInitialize(). This is more efficient than relying on
1566	* the catalog cache, because we can load all the info about a particular
1567	* opclass in a single indexscan of pg_amproc.
1568	*
1569	* The information from pg_am about expected range of support function
1570	* numbers is passed in, rather than being looked up, mainly because the
1571	* caller will have it already.
1572	*
1573	* Note there is no provision for flushing the cache. This is OK at the
1574	* moment because there is no way to ALTER any interesting properties of an
1575	* existing opclass --- all you can do is drop it, which will result in
1576	* a useless but harmless dead entry in the cache. To support altering
1577	* opclass membership (not the same as opfamily membership!), we'd need to
1578	* be able to flush this cache as well as the contents of relcache entries
1579	* for indexes.
1580	*/
1581	static OpClassCacheEnt *
1582	LookupOpclassInfo(Oid operatorClassOid,
1583	StrategyNumber numSupport)
1584	{
1585	OpClassCacheEnt *opcentry;
1586	bool found;
1587	Relation rel;
1588	SysScanDesc scan;
1589	ScanKeyData skey[`3`];
1590	HeapTuple htup;
1591	bool indexOK;
1592
1593	if (OpClassCache == NULL)
1594	{
1595	/ First time through: initialize the opclass cache /
1596	HASHCTL ctl;
1597
1598	MemSet(&ctl, `0`, sizeof(ctl));
1599	ctl.keysize = sizeof(Oid);
1600	ctl.entrysize = sizeof(OpClassCacheEnt);
1601	OpClassCache = hash_create("Operator class cache", `64`,
1602	&ctl, HASH_ELEM \| HASH_BLOBS);
1603
1604	/ Also make sure CacheMemoryContext exists /
1605	if (!CacheMemoryContext)
1606	CreateCacheMemoryContext();
1607	}
1608
1609	opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1610	(void *) &operatorClassOid,
1611	HASH_ENTER, &found);
1612
1613	if (!found)
1614	{
1615	/ Need to allocate memory for new entry /
1616	opcentry->valid = false; / until known OK /
1617	opcentry->numSupport = numSupport;
1618
1619	if (numSupport > `0`)
1620	opcentry->supportProcs = (RegProcedure *)
1621	MemoryContextAllocZero(CacheMemoryContext,
1622	numSupport * sizeof(RegProcedure));
1623	else
1624	opcentry->supportProcs = NULL;
1625	}
1626	else
1627	{
1628	Assert(numSupport == opcentry->numSupport);
1629	}
1630
1631	/*
1632	* When testing for cache-flush hazards, we intentionally disable the
1633	* operator class cache and force reloading of the info on each call. This
1634	* is helpful because we want to test the case where a cache flush occurs
1635	* while we are loading the info, and it's very hard to provoke that if
1636	* this happens only once per opclass per backend.
1637	*/
1638	#if defined(CLOBBER_CACHE_ALWAYS)
1639	opcentry->valid = false;
1640	#endif
1641
1642	if (opcentry->valid)
1643	return opcentry;
1644
1645	/*
1646	* Need to fill in new entry.
1647	*
1648	* To avoid infinite recursion during startup, force heap scans if we're
1649	* looking up info for the opclasses used by the indexes we would like to
1650	* reference here.
1651	*/
1652	indexOK = criticalRelcachesBuilt \|\|
1653	(operatorClassOid != OID_BTREE_OPS_OID &&
1654	operatorClassOid != INT2_BTREE_OPS_OID);
1655
1656	/*
1657	* We have to fetch the pg_opclass row to determine its opfamily and
1658	* opcintype, which are needed to look up related operators and functions.
1659	* It'd be convenient to use the syscache here, but that probably doesn't
1660	* work while bootstrapping.
1661	*/
1662	ScanKeyInit(&skey[`0`],
1663	Anum_pg_opclass_oid,
1664	BTEqualStrategyNumber, F_OIDEQ,
1665	ObjectIdGetDatum(operatorClassOid));
1666	rel = table_open(OperatorClassRelationId, AccessShareLock);
1667	scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1668	NULL, `1`, skey);
1669
1670	if (HeapTupleIsValid(htup = systable_getnext(scan)))
1671	{
1672	Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1673
1674	opcentry->opcfamily = opclassform->opcfamily;
1675	opcentry->opcintype = opclassform->opcintype;
1676	}
1677	else
1678	elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1679
1680	systable_endscan(scan);
1681	table_close(rel, AccessShareLock);
1682
1683	/*
1684	* Scan pg_amproc to obtain support procs for the opclass. We only fetch
1685	* the default ones (those with lefttype = righttype = opcintype).
1686	*/
1687	if (numSupport > `0`)
1688	{
1689	ScanKeyInit(&skey[`0`],
1690	Anum_pg_amproc_amprocfamily,
1691	BTEqualStrategyNumber, F_OIDEQ,
1692	ObjectIdGetDatum(opcentry->opcfamily));
1693	ScanKeyInit(&skey[`1`],
1694	Anum_pg_amproc_amproclefttype,
1695	BTEqualStrategyNumber, F_OIDEQ,
1696	ObjectIdGetDatum(opcentry->opcintype));
1697	ScanKeyInit(&skey[`2`],
1698	Anum_pg_amproc_amprocrighttype,
1699	BTEqualStrategyNumber, F_OIDEQ,
1700	ObjectIdGetDatum(opcentry->opcintype));
1701	rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1702	scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1703	NULL, `3`, skey);
1704
1705	while (HeapTupleIsValid(htup = systable_getnext(scan)))
1706	{
1707	Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1708
1709	if (amprocform->amprocnum <= `0` \|\|
1710	(StrategyNumber) amprocform->amprocnum > numSupport)
1711	elog(ERROR, "invalid amproc number %d for opclass %u",
1712	amprocform->amprocnum, operatorClassOid);
1713
1714	opcentry->supportProcs[amprocform->amprocnum - `1`] =
1715	amprocform->amproc;
1716	}
1717
1718	systable_endscan(scan);
1719	table_close(rel, AccessShareLock);
1720	}
1721
1722	opcentry->valid = true;
1723	return opcentry;
1724	}
1725
1726	/*
1727	* Fill in the TableAmRoutine for a relation
1728	*
1729	* relation's rd_amhandler must be valid already.
1730	*/
1731	static void
1732	InitTableAmRoutine(Relation relation)
1733	{
1734	relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1735	}
1736
1737	/*
1738	* Initialize table access method support for a table like relation
1739	*/
1740	void
1741	RelationInitTableAccessMethod(Relation relation)
1742	{
1743	HeapTuple tuple;
1744	Form_pg_am aform;
1745
1746	if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1747	{
1748	/*
1749	* Sequences are currently accessed like heap tables, but it doesn't
1750	* seem prudent to show that in the catalog. So just overwrite it
1751	* here.
1752	*/
1753	relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1754	}
1755	else if (IsCatalogRelation(relation))
1756	{
1757	/*
1758	* Avoid doing a syscache lookup for catalog tables.
1759	*/
1760	Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1761	relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1762	}
1763	else
1764	{
1765	/*
1766	* Look up the table access method, save the OID of its handler
1767	* function.
1768	*/
1769	Assert(relation->rd_rel->relam != InvalidOid);
1770	tuple = SearchSysCache1(AMOID,
1771	ObjectIdGetDatum(relation->rd_rel->relam));
1772	if (!HeapTupleIsValid(tuple))
1773	elog(ERROR, "cache lookup failed for access method %u",
1774	relation->rd_rel->relam);
1775	aform = (Form_pg_am) GETSTRUCT(tuple);
1776	relation->rd_amhandler = aform->amhandler;
1777	ReleaseSysCache(tuple);
1778	}
1779
1780	/*
1781	* Now we can fetch the table AM's API struct
1782	*/
1783	InitTableAmRoutine(relation);
1784	}
1785
1786	/*
1787	* formrdesc
1788	*
1789	* This is a special cut-down version of RelationBuildDesc(),
1790	* used while initializing the relcache.
1791	* The relation descriptor is built just from the supplied parameters,
1792	* without actually looking at any system table entries. We cheat
1793	* quite a lot since we only need to work for a few basic system
1794	* catalogs.
1795	*
1796	* The catalogs this is used for can't have constraints (except attnotnull),
1797	* default values, rules, or triggers, since we don't cope with any of that.
1798	* (Well, actually, this only matters for properties that need to be valid
1799	* during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1800	* these properties matter then...)
1801	*
1802	* NOTE: we assume we are already switched into CacheMemoryContext.
1803	*/
1804	static void
1805	formrdesc(const char *relationName, Oid relationReltype,
1806	bool isshared,
1807	int natts, const FormData_pg_attribute *attrs)
1808	{
1809	Relation relation;
1810	int i;
1811	bool has_not_null;
1812
1813	/*
1814	* allocate new relation desc, clear all fields of reldesc
1815	*/
1816	relation = (Relation) palloc0(sizeof(RelationData));
1817
1818	/ make sure relation is marked as having no open file yet /
1819	relation->rd_smgr = NULL;
1820
1821	/*
1822	* initialize reference count: 1 because it is nailed in cache
1823	*/
1824	relation->rd_refcnt = `1`;
1825
1826	/*
1827	* all entries built with this routine are nailed-in-cache; none are for
1828	* new or temp relations.
1829	*/
1830	relation->rd_isnailed = true;
1831	relation->rd_createSubid = InvalidSubTransactionId;
1832	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1833	relation->rd_backend = InvalidBackendId;
1834	relation->rd_islocaltemp = false;
1835
1836	/*
1837	* initialize relation tuple form
1838	*
1839	* The data we insert here is pretty incomplete/bogus, but it'll serve to
1840	* get us launched. RelationCacheInitializePhase3() will read the real
1841	* data from pg_class and replace what we've done here. Note in
1842	* particular that relowner is left as zero; this cues
1843	* RelationCacheInitializePhase3 that the real data isn't there yet.
1844	*/
1845	relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1846
1847	namestrcpy(&relation->rd_rel->relname, relationName);
1848	relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1849	relation->rd_rel->reltype = relationReltype;
1850
1851	/*
1852	* It's important to distinguish between shared and non-shared relations,
1853	* even at bootstrap time, to make sure we know where they are stored.
1854	*/
1855	relation->rd_rel->relisshared = isshared;
1856	if (isshared)
1857	relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1858
1859	/ formrdesc is used only for permanent relations /
1860	relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1861
1862	/ ... and they're always populated, too /
1863	relation->rd_rel->relispopulated = true;
1864
1865	relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1866	relation->rd_rel->relpages = `0`;
1867	relation->rd_rel->reltuples = `0`;
1868	relation->rd_rel->relallvisible = `0`;
1869	relation->rd_rel->relkind = RELKIND_RELATION;
1870	relation->rd_rel->relnatts = (int16) natts;
1871	relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1872
1873	/*
1874	* initialize attribute tuple form
1875	*
1876	* Unlike the case with the relation tuple, this data had better be right
1877	* because it will never be replaced. The data comes from
1878	* src/include/catalog/ headers via genbki.pl.
1879	*/
1880	relation->rd_att = CreateTemplateTupleDesc(natts);
1881	relation->rd_att->tdrefcount = `1`; / mark as refcounted /
1882
1883	relation->rd_att->tdtypeid = relationReltype;
1884	relation->rd_att->tdtypmod = -`1`; / unnecessary, but... /
1885
1886	/*
1887	* initialize tuple desc info
1888	*/
1889	has_not_null = false;
1890	for (i = `0`; i < natts; i++)
1891	{
1892	memcpy(TupleDescAttr(relation->rd_att, i),
1893	&attrs[i],
1894	ATTRIBUTE_FIXED_PART_SIZE);
1895	has_not_null \|= attrs[i].attnotnull;
1896	/ make sure attcacheoff is valid /
1897	TupleDescAttr(relation->rd_att, i)->attcacheoff = -`1`;
1898	}
1899
1900	/ initialize first attribute's attcacheoff, cf RelationBuildTupleDesc /
1901	TupleDescAttr(relation->rd_att, `0`)->attcacheoff = `0`;
1902
1903	/ mark not-null status /
1904	if (has_not_null)
1905	{
1906	TupleConstr constr = (TupleConstr ) palloc0(sizeof(TupleConstr));
1907
1908	constr->has_not_null = true;
1909	relation->rd_att->constr = constr;
1910	}
1911
1912	/*
1913	* initialize relation id from info in att array (my, this is ugly)
1914	*/
1915	RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, `0`)->attrelid;
1916
1917	/*
1918	* All relations made with formrdesc are mapped. This is necessarily so
1919	* because there is no other way to know what filenode they currently
1920	* have. In bootstrap mode, add them to the initial relation mapper data,
1921	* specifying that the initial filenode is the same as the OID.
1922	*/
1923	relation->rd_rel->relfilenode = InvalidOid;
1924	if (IsBootstrapProcessingMode())
1925	RelationMapUpdateMap(RelationGetRelid(relation),
1926	RelationGetRelid(relation),
1927	isshared, true);
1928
1929	/*
1930	* initialize the relation lock manager information
1931	*/
1932	RelationInitLockInfo(relation); / see lmgr.c /
1933
1934	/*
1935	* initialize physical addressing information for the relation
1936	*/
1937	RelationInitPhysicalAddr(relation);
1938
1939	/*
1940	* initialize the table am handler
1941	*/
1942	relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1943	relation->rd_tableam = GetHeapamTableAmRoutine();
1944
1945	/*
1946	* initialize the rel-has-index flag, using hardwired knowledge
1947	*/
1948	if (IsBootstrapProcessingMode())
1949	{
1950	/ In bootstrap mode, we have no indexes /
1951	relation->rd_rel->relhasindex = false;
1952	}
1953	else
1954	{
1955	/ Otherwise, all the rels formrdesc is used for have indexes /
1956	relation->rd_rel->relhasindex = true;
1957	}
1958
1959	/*
1960	* add new reldesc to relcache
1961	*/
1962	RelationCacheInsert(relation, false);
1963
1964	/ It's fully valid /
1965	relation->rd_isvalid = true;
1966	}
1967
1968
1969	/ ----------------------------------------------------------------*
1970	* Relation Descriptor Lookup Interface
1971	* ----------------------------------------------------------------
1972	*/
1973
1974	/*
1975	* RelationIdGetRelation
1976	*
1977	* Lookup a reldesc by OID; make one if not already in cache.
1978	*
1979	* Returns NULL if no pg_class row could be found for the given relid
1980	* (suggesting we are trying to access a just-deleted relation).
1981	* Any other error is reported via elog.
1982	*
1983	* NB: caller should already have at least AccessShareLock on the
1984	* relation ID, else there are nasty race conditions.
1985	*
1986	* NB: relation ref count is incremented, or set to 1 if new entry.
1987	* Caller should eventually decrement count. (Usually,
1988	* that happens by calling RelationClose().)
1989	*/
1990	Relation
1991	RelationIdGetRelation(Oid relationId)
1992	{
1993	Relation rd;
1994
1995	/ Make sure we're in an xact, even if this ends up being a cache hit /
1996	Assert(IsTransactionState());
1997
1998	/*
1999	* first try to find reldesc in the cache
2000	*/
2001	RelationIdCacheLookup(relationId, rd);
2002
2003	if (RelationIsValid(rd))
2004	{
2005	RelationIncrementReferenceCount(rd);
2006	/ revalidate cache entry if necessary /
2007	if (!rd->rd_isvalid)
2008	{
2009	/*
2010	* Indexes only have a limited number of possible schema changes,
2011	* and we don't want to use the full-blown procedure because it's
2012	* a headache for indexes that reload itself depends on.
2013	*/
2014	if (rd->rd_rel->relkind == RELKIND_INDEX \|\|
2015	rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2016	RelationReloadIndexInfo(rd);
2017	else
2018	RelationClearRelation(rd, true);
2019
2020	/*
2021	* Normally entries need to be valid here, but before the relcache
2022	* has been initialized, not enough infrastructure exists to
2023	* perform pg_class lookups. The structure of such entries doesn't
2024	* change, but we still want to update the rd_rel entry. So
2025	* rd_isvalid = false is left in place for a later lookup.
2026	*/
2027	Assert(rd->rd_isvalid \|\|
2028	(rd->rd_isnailed && !criticalRelcachesBuilt));
2029	}
2030	return rd;
2031	}
2032
2033	/*
2034	* no reldesc in the cache, so have RelationBuildDesc() build one and add
2035	* it.
2036	*/
2037	rd = RelationBuildDesc(relationId, true);
2038	if (RelationIsValid(rd))
2039	RelationIncrementReferenceCount(rd);
2040	return rd;
2041	}
2042
2043	/ ----------------------------------------------------------------*
2044	* cache invalidation support routines
2045	* ----------------------------------------------------------------
2046	*/
2047
2048	/*
2049	* RelationIncrementReferenceCount
2050	* Increments relation reference count.
2051	*
2052	* Note: bootstrap mode has its own weird ideas about relation refcount
2053	* behavior; we ought to fix it someday, but for now, just disable
2054	* reference count ownership tracking in bootstrap mode.
2055	*/
2056	void
2057	RelationIncrementReferenceCount(Relation rel)
2058	{
2059	ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
2060	rel->rd_refcnt += `1`;
2061	if (!IsBootstrapProcessingMode())
2062	ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
2063	}
2064
2065	/*
2066	* RelationDecrementReferenceCount
2067	* Decrements relation reference count.
2068	*/
2069	void
2070	RelationDecrementReferenceCount(Relation rel)
2071	{
2072	Assert(rel->rd_refcnt > `0`);
2073	rel->rd_refcnt -= `1`;
2074	if (!IsBootstrapProcessingMode())
2075	ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
2076	}
2077
2078	/*
2079	* RelationClose - close an open relation
2080	*
2081	* Actually, we just decrement the refcount.
2082	*
2083	* NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2084	* will be freed as soon as their refcount goes to zero. In combination
2085	* with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2086	* to catch references to already-released relcache entries. It slows
2087	* things down quite a bit, however.
2088	*/
2089	void
2090	RelationClose(Relation relation)
2091	{
2092	/ Note: no locking manipulations needed /
2093	RelationDecrementReferenceCount(relation);
2094
2095	#ifdef RELCACHE_FORCE_RELEASE
2096	if (RelationHasReferenceCountZero(relation) &&
2097	relation->rd_createSubid == InvalidSubTransactionId &&
2098	relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
2099	RelationClearRelation(relation, false);
2100	#endif
2101	}
2102
2103	/*
2104	* RelationReloadIndexInfo - reload minimal information for an open index
2105	*
2106	* This function is used only for indexes. A relcache inval on an index
2107	* can mean that its pg_class or pg_index row changed. There are only
2108	* very limited changes that are allowed to an existing index's schema,
2109	* so we can update the relcache entry without a complete rebuild; which
2110	* is fortunate because we can't rebuild an index entry that is "nailed"
2111	* and/or in active use. We support full replacement of the pg_class row,
2112	* as well as updates of a few simple fields of the pg_index row.
2113	*
2114	* We can't necessarily reread the catalog rows right away; we might be
2115	* in a failed transaction when we receive the SI notification. If so,
2116	* RelationClearRelation just marks the entry as invalid by setting
2117	* rd_isvalid to false. This routine is called to fix the entry when it
2118	* is next needed.
2119	*
2120	* We assume that at the time we are called, we have at least AccessShareLock
2121	* on the target index. (Note: in the calls from RelationClearRelation,
2122	* this is legitimate because we know the rel has positive refcount.)
2123	*
2124	* If the target index is an index on pg_class or pg_index, we'd better have
2125	* previously gotten at least AccessShareLock on its underlying catalog,
2126	* else we are at risk of deadlock against someone trying to exclusive-lock
2127	* the heap and index in that order. This is ensured in current usage by
2128	* only applying this to indexes being opened or having positive refcount.
2129	*/
2130	static void
2131	RelationReloadIndexInfo(Relation relation)
2132	{
2133	bool indexOK;
2134	HeapTuple pg_class_tuple;
2135	Form_pg_class relp;
2136
2137	/ Should be called only for invalidated indexes /
2138	Assert((relation->rd_rel->relkind == RELKIND_INDEX \|\|
2139	relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2140	!relation->rd_isvalid);
2141
2142	/ Ensure it's closed at smgr level /
2143	RelationCloseSmgr(relation);
2144
2145	/ Must free any AM cached data upon relcache flush /
2146	if (relation->rd_amcache)
2147	pfree(relation->rd_amcache);
2148	relation->rd_amcache = NULL;
2149
2150	/*
2151	* If it's a shared index, we might be called before backend startup has
2152	* finished selecting a database, in which case we have no way to read
2153	* pg_class yet. However, a shared index can never have any significant
2154	* schema updates, so it's okay to ignore the invalidation signal. Just
2155	* mark it valid and return without doing anything more.
2156	*/
2157	if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2158	{
2159	relation->rd_isvalid = true;
2160	return;
2161	}
2162
2163	/*
2164	* Read the pg_class row
2165	*
2166	* Don't try to use an indexscan of pg_class_oid_index to reload the info
2167	* for pg_class_oid_index ...
2168	*/
2169	indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2170	pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2171	if (!HeapTupleIsValid(pg_class_tuple))
2172	elog(ERROR, "could not find pg_class tuple for index %u",
2173	RelationGetRelid(relation));
2174	relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2175	memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2176	/ Reload reloptions in case they changed /
2177	if (relation->rd_options)
2178	pfree(relation->rd_options);
2179	RelationParseRelOptions(relation, pg_class_tuple);
2180	/ done with pg_class tuple /
2181	heap_freetuple(pg_class_tuple);
2182	/ We must recalculate physical address in case it changed /
2183	RelationInitPhysicalAddr(relation);
2184
2185	/*
2186	* For a non-system index, there are fields of the pg_index row that are
2187	* allowed to change, so re-read that row and update the relcache entry.
2188	* Most of the info derived from pg_index (such as support function lookup
2189	* info) cannot change, and indeed the whole point of this routine is to
2190	* update the relcache entry without clobbering that data; so wholesale
2191	* replacement is not appropriate.
2192	*/
2193	if (!IsSystemRelation(relation))
2194	{
2195	HeapTuple tuple;
2196	Form_pg_index index;
2197
2198	tuple = SearchSysCache1(INDEXRELID,
2199	ObjectIdGetDatum(RelationGetRelid(relation)));
2200	if (!HeapTupleIsValid(tuple))
2201	elog(ERROR, "cache lookup failed for index %u",
2202	RelationGetRelid(relation));
2203	index = (Form_pg_index) GETSTRUCT(tuple);
2204
2205	/*
2206	* Basically, let's just copy all the bool fields. There are one or
2207	* two of these that can't actually change in the current code, but
2208	* it's not worth it to track exactly which ones they are. None of
2209	* the array fields are allowed to change, though.
2210	*/
2211	relation->rd_index->indisunique = index->indisunique;
2212	relation->rd_index->indisprimary = index->indisprimary;
2213	relation->rd_index->indisexclusion = index->indisexclusion;
2214	relation->rd_index->indimmediate = index->indimmediate;
2215	relation->rd_index->indisclustered = index->indisclustered;
2216	relation->rd_index->indisvalid = index->indisvalid;
2217	relation->rd_index->indcheckxmin = index->indcheckxmin;
2218	relation->rd_index->indisready = index->indisready;
2219	relation->rd_index->indislive = index->indislive;
2220
2221	/ Copy xmin too, as that is needed to make sense of indcheckxmin /
2222	HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
2223	HeapTupleHeaderGetXmin(tuple->t_data));
2224
2225	ReleaseSysCache(tuple);
2226	}
2227
2228	/ Okay, now it's valid again /
2229	relation->rd_isvalid = true;
2230	}
2231
2232	/*
2233	* RelationReloadNailed - reload minimal information for nailed relations.
2234	*
2235	* The structure of a nailed relation can never change (which is good, because
2236	* we rely on knowing their structure to be able to read catalog content). But
2237	* some parts, e.g. pg_class.relfrozenxid, are still important to have
2238	* accurate content for. Therefore those need to be reloaded after the arrival
2239	* of invalidations.
2240	*/
2241	static void
2242	RelationReloadNailed(Relation relation)
2243	{
2244	Assert(relation->rd_isnailed);
2245
2246	/*
2247	* Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2248	* mapping changed.
2249	*/
2250	RelationInitPhysicalAddr(relation);
2251
2252	/ flag as needing to be revalidated /
2253	relation->rd_isvalid = false;
2254
2255	/*
2256	* Can only reread catalog contents if in a transaction. If the relation
2257	* is currently open (not counting the nailed refcount), do so
2258	* immediately. Otherwise we've already marked the entry as possibly
2259	* invalid, and it'll be fixed when next opened.
2260	*/
2261	if (!IsTransactionState() \|\| relation->rd_refcnt <= `1`)
2262	return;
2263
2264	if (relation->rd_rel->relkind == RELKIND_INDEX)
2265	{
2266	/*
2267	* If it's a nailed-but-not-mapped index, then we need to re-read the
2268	* pg_class row to see if its relfilenode changed.
2269	*/
2270	RelationReloadIndexInfo(relation);
2271	}
2272	else
2273	{
2274	/*
2275	* Reload a non-index entry. We can't easily do so if relcaches
2276	* aren't yet built, but that's fine because at that stage the
2277	* attributes that need to be current (like relfrozenxid) aren't yet
2278	* accessed. To ensure the entry will later be revalidated, we leave
2279	* it in invalid state, but allow use (cf. RelationIdGetRelation()).
2280	*/
2281	if (criticalRelcachesBuilt)
2282	{
2283	HeapTuple pg_class_tuple;
2284	Form_pg_class relp;
2285
2286	/*
2287	* NB: Mark the entry as valid before starting to scan, to avoid
2288	* self-recursion when re-building pg_class.
2289	*/
2290	relation->rd_isvalid = true;
2291
2292	pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2293	true, false);
2294	relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2295	memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2296	heap_freetuple(pg_class_tuple);
2297
2298	/*
2299	* Again mark as valid, to protect against concurrently arriving
2300	* invalidations.
2301	*/
2302	relation->rd_isvalid = true;
2303	}
2304	}
2305	}
2306
2307	/*
2308	* RelationDestroyRelation
2309	*
2310	* Physically delete a relation cache entry and all subsidiary data.
2311	* Caller must already have unhooked the entry from the hash table.
2312	*/
2313	static void
2314	RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2315	{
2316	Assert(RelationHasReferenceCountZero(relation));
2317
2318	/*
2319	* Make sure smgr and lower levels close the relation's files, if they
2320	* weren't closed already. (This was probably done by caller, but let's
2321	* just be real sure.)
2322	*/
2323	RelationCloseSmgr(relation);
2324
2325	/*
2326	* Free all the subsidiary data structures of the relcache entry, then the
2327	* entry itself.
2328	*/
2329	if (relation->rd_rel)
2330	pfree(relation->rd_rel);
2331	/ can't use DecrTupleDescRefCount here /
2332	Assert(relation->rd_att->tdrefcount > `0`);
2333	if (--relation->rd_att->tdrefcount == `0`)
2334	{
2335	/*
2336	* If we Rebuilt a relcache entry during a transaction then its
2337	* possible we did that because the TupDesc changed as the result of
2338	* an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2339	* possible someone copied that TupDesc, in which case the copy would
2340	* point to free'd memory. So if we rebuild an entry we keep the
2341	* TupDesc around until end of transaction, to be safe.
2342	*/
2343	if (remember_tupdesc)
2344	RememberToFreeTupleDescAtEOX(relation->rd_att);
2345	else
2346	FreeTupleDesc(relation->rd_att);
2347	}
2348	FreeTriggerDesc(relation->trigdesc);
2349	list_free_deep(relation->rd_fkeylist);
2350	list_free(relation->rd_indexlist);
2351	bms_free(relation->rd_indexattr);
2352	bms_free(relation->rd_keyattr);
2353	bms_free(relation->rd_pkattr);
2354	bms_free(relation->rd_idattr);
2355	if (relation->rd_pubactions)
2356	pfree(relation->rd_pubactions);
2357	if (relation->rd_options)
2358	pfree(relation->rd_options);
2359	if (relation->rd_indextuple)
2360	pfree(relation->rd_indextuple);
2361	if (relation->rd_amcache)
2362	pfree(relation->rd_amcache);
2363	if (relation->rd_fdwroutine)
2364	pfree(relation->rd_fdwroutine);
2365	if (relation->rd_indexcxt)
2366	MemoryContextDelete(relation->rd_indexcxt);
2367	if (relation->rd_rulescxt)
2368	MemoryContextDelete(relation->rd_rulescxt);
2369	if (relation->rd_rsdesc)
2370	MemoryContextDelete(relation->rd_rsdesc->rscxt);
2371	if (relation->rd_partkeycxt)
2372	MemoryContextDelete(relation->rd_partkeycxt);
2373	if (relation->rd_pdcxt)
2374	MemoryContextDelete(relation->rd_pdcxt);
2375	if (relation->rd_partcheckcxt)
2376	MemoryContextDelete(relation->rd_partcheckcxt);
2377	pfree(relation);
2378	}
2379
2380	/*
2381	* RelationClearRelation
2382	*
2383	* Physically blow away a relation cache entry, or reset it and rebuild
2384	* it from scratch (that is, from catalog entries). The latter path is
2385	* used when we are notified of a change to an open relation (one with
2386	* refcount > 0).
2387	*
2388	* NB: when rebuilding, we'd better hold some lock on the relation,
2389	* else the catalog data we need to read could be changing under us.
2390	* Also, a rel to be rebuilt had better have refcnt > 0. This is because
2391	* a sinval reset could happen while we're accessing the catalogs, and
2392	* the rel would get blown away underneath us by RelationCacheInvalidate
2393	* if it has zero refcnt.
2394	*
2395	* The "rebuild" parameter is redundant in current usage because it has
2396	* to match the relation's refcnt status, but we keep it as a crosscheck
2397	* that we're doing what the caller expects.
2398	*/
2399	static void
2400	RelationClearRelation(Relation relation, bool rebuild)
2401	{
2402	/*
2403	* As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2404	* course it would be an equally bad idea to blow away one with nonzero
2405	* refcnt, since that would leave someone somewhere with a dangling
2406	* pointer. All callers are expected to have verified that this holds.
2407	*/
2408	Assert(rebuild ?
2409	!RelationHasReferenceCountZero(relation) :
2410	RelationHasReferenceCountZero(relation));
2411
2412	/*
2413	* Make sure smgr and lower levels close the relation's files, if they
2414	* weren't closed already. If the relation is not getting deleted, the
2415	* next smgr access should reopen the files automatically. This ensures
2416	* that the low-level file access state is updated after, say, a vacuum
2417	* truncation.
2418	*/
2419	RelationCloseSmgr(relation);
2420
2421	/ Free AM cached data, if any /
2422	if (relation->rd_amcache)
2423	pfree(relation->rd_amcache);
2424	relation->rd_amcache = NULL;
2425
2426	/*
2427	* Treat nailed-in system relations separately, they always need to be
2428	* accessible, so we can't blow them away.
2429	*/
2430	if (relation->rd_isnailed)
2431	{
2432	RelationReloadNailed(relation);
2433	return;
2434	}
2435
2436	/*
2437	* Even non-system indexes should not be blown away if they are open and
2438	* have valid index support information. This avoids problems with active
2439	* use of the index support information. As with nailed indexes, we
2440	* re-read the pg_class row to handle possible physical relocation of the
2441	* index, and we check for pg_index updates too.
2442	*/
2443	if ((relation->rd_rel->relkind == RELKIND_INDEX \|\|
2444	relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2445	relation->rd_refcnt > `0` &&
2446	relation->rd_indexcxt != NULL)
2447	{
2448	relation->rd_isvalid = false; / needs to be revalidated /
2449	if (IsTransactionState())
2450	RelationReloadIndexInfo(relation);
2451	return;
2452	}
2453
2454	/ Mark it invalid until we've finished rebuild /
2455	relation->rd_isvalid = false;
2456
2457	/*
2458	* If we're really done with the relcache entry, blow it away. But if
2459	* someone is still using it, reconstruct the whole deal without moving
2460	* the physical RelationData record (so that the someone's pointer is
2461	* still valid).
2462	*/
2463	if (!rebuild)
2464	{
2465	/ Remove it from the hash table /
2466	RelationCacheDelete(relation);
2467
2468	/ And release storage /
2469	RelationDestroyRelation(relation, false);
2470	}
2471	else if (!IsTransactionState())
2472	{
2473	/*
2474	* If we're not inside a valid transaction, we can't do any catalog
2475	* access so it's not possible to rebuild yet. Just exit, leaving
2476	* rd_isvalid = false so that the rebuild will occur when the entry is
2477	* next opened.
2478	*
2479	* Note: it's possible that we come here during subtransaction abort,
2480	* and the reason for wanting to rebuild is that the rel is open in
2481	* the outer transaction. In that case it might seem unsafe to not
2482	* rebuild immediately, since whatever code has the rel already open
2483	* will keep on using the relcache entry as-is. However, in such a
2484	* case the outer transaction should be holding a lock that's
2485	* sufficient to prevent any significant change in the rel's schema,
2486	* so the existing entry contents should be good enough for its
2487	* purposes; at worst we might be behind on statistics updates or the
2488	* like. (See also CheckTableNotInUse() and its callers.) These same
2489	* remarks also apply to the cases above where we exit without having
2490	* done RelationReloadIndexInfo() yet.
2491	*/
2492	return;
2493	}
2494	else
2495	{
2496	/*
2497	* Our strategy for rebuilding an open relcache entry is to build a
2498	* new entry from scratch, swap its contents with the old entry, and
2499	* finally delete the new entry (along with any infrastructure swapped
2500	* over from the old entry). This is to avoid trouble in case an
2501	* error causes us to lose control partway through. The old entry
2502	* will still be marked !rd_isvalid, so we'll try to rebuild it again
2503	* on next access. Meanwhile it's not any less valid than it was
2504	* before, so any code that might expect to continue accessing it
2505	* isn't hurt by the rebuild failure. (Consider for example a
2506	* subtransaction that ALTERs a table and then gets canceled partway
2507	* through the cache entry rebuild. The outer transaction should
2508	* still see the not-modified cache entry as valid.) The worst
2509	* consequence of an error is leaking the necessarily-unreferenced new
2510	* entry, and this shouldn't happen often enough for that to be a big
2511	* problem.
2512	*
2513	* When rebuilding an open relcache entry, we must preserve ref count,
2514	* rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2515	* attempt to preserve the pg_class entry (rd_rel), tupledesc,
2516	* rewrite-rule, partition key, and partition descriptor substructures
2517	* in place, because various places assume that these structures won't
2518	* move while they are working with an open relcache entry. (Note:
2519	* the refcount mechanism for tupledescs might someday allow us to
2520	* remove this hack for the tupledesc.)
2521	*
2522	* Note that this process does not touch CurrentResourceOwner; which
2523	* is good because whatever ref counts the entry may have do not
2524	* necessarily belong to that resource owner.
2525	*/
2526	Relation newrel;
2527	Oid save_relid = RelationGetRelid(relation);
2528	bool keep_tupdesc;
2529	bool keep_rules;
2530	bool keep_policies;
2531	bool keep_partkey;
2532	bool keep_partdesc;
2533
2534	/ Build temporary entry, but don't link it into hashtable /
2535	newrel = RelationBuildDesc(save_relid, false);
2536	if (newrel == NULL)
2537	{
2538	/*
2539	* We can validly get here, if we're using a historic snapshot in
2540	* which a relation, accessed from outside logical decoding, is
2541	* still invisible. In that case it's fine to just mark the
2542	* relation as invalid and return - it'll fully get reloaded by
2543	* the cache reset at the end of logical decoding (or at the next
2544	* access). During normal processing we don't want to ignore this
2545	* case as it shouldn't happen there, as explained below.
2546	*/
2547	if (HistoricSnapshotActive())
2548	return;
2549
2550	/*
2551	* This shouldn't happen as dropping a relation is intended to be
2552	* impossible if still referenced (cf. CheckTableNotInUse()). But
2553	* if we get here anyway, we can't just delete the relcache entry,
2554	* as it possibly could get accessed later (as e.g. the error
2555	* might get trapped and handled via a subtransaction rollback).
2556	*/
2557	elog(ERROR, "relation %u deleted while still in use", save_relid);
2558	}
2559
2560	keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2561	keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2562	keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2563	/ partkey is immutable once set up, so we can always keep it /
2564	keep_partkey = (relation->rd_partkey != NULL);
2565	keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2566	relation->rd_partdesc,
2567	newrel->rd_partdesc);
2568
2569	/*
2570	* Perform swapping of the relcache entry contents. Within this
2571	* process the old entry is momentarily invalid, so there must be no
2572	* possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2573	* all-in-line code for safety.
2574	*
2575	* Since the vast majority of fields should be swapped, our method is
2576	* to swap the whole structures and then re-swap those few fields we
2577	* didn't want swapped.
2578	*/
2579	#define SWAPFIELD(fldtype, fldname) \
2580	do { \
2581	fldtype _tmp = newrel->fldname; \
2582	newrel->fldname = relation->fldname; \
2583	relation->fldname = _tmp; \
2584	} while (0)
2585
2586	/ swap all Relation struct fields /
2587	{
2588	RelationData tmpstruct;
2589
2590	memcpy(&tmpstruct, newrel, sizeof(RelationData));
2591	memcpy(newrel, relation, sizeof(RelationData));
2592	memcpy(relation, &tmpstruct, sizeof(RelationData));
2593	}
2594
2595	/ rd_smgr must not be swapped, due to back-links from smgr level /
2596	SWAPFIELD(SMgrRelation, rd_smgr);
2597	/ rd_refcnt must be preserved /
2598	SWAPFIELD(int, rd_refcnt);
2599	/ isnailed shouldn't change /
2600	Assert(newrel->rd_isnailed == relation->rd_isnailed);
2601	/ creation sub-XIDs must be preserved /
2602	SWAPFIELD(SubTransactionId, rd_createSubid);
2603	SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2604	/ un-swap rd_rel pointers, swap contents instead /
2605	SWAPFIELD(Form_pg_class, rd_rel);
2606	/ ... but actually, we don't have to update newrel->rd_rel /
2607	memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2608	/ preserve old tupledesc, rules, policies if no logical change /
2609	if (keep_tupdesc)
2610	SWAPFIELD(TupleDesc, rd_att);
2611	if (keep_rules)
2612	{
2613	SWAPFIELD(RuleLock *, rd_rules);
2614	SWAPFIELD(MemoryContext, rd_rulescxt);
2615	}
2616	if (keep_policies)
2617	SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2618	/ toast OID override must be preserved /
2619	SWAPFIELD(Oid, rd_toastoid);
2620	/ pgstat_info must be preserved /
2621	SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2622	/ preserve old partitioning info if no logical change /
2623	if (keep_partkey)
2624	{
2625	SWAPFIELD(PartitionKey, rd_partkey);
2626	SWAPFIELD(MemoryContext, rd_partkeycxt);
2627	}
2628	if (keep_partdesc)
2629	{
2630	SWAPFIELD(PartitionDesc, rd_partdesc);
2631	SWAPFIELD(MemoryContext, rd_pdcxt);
2632	}
2633	else if (rebuild && newrel->rd_pdcxt != NULL)
2634	{
2635	/*
2636	* We are rebuilding a partitioned relation with a non-zero
2637	* reference count, so keep the old partition descriptor around,
2638	* in case there's a PartitionDirectory with a pointer to it.
2639	* Attach it to the new rd_pdcxt so that it gets cleaned up
2640	* eventually. In the case where the reference count is 0, this
2641	* code is not reached, which should be OK because in that case
2642	* there should be no PartitionDirectory with a pointer to the old
2643	* entry.
2644	*
2645	* Note that newrel and relation have already been swapped, so the
2646	* "old" partition descriptor is actually the one hanging off of
2647	* newrel.
2648	*/
2649	MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2650	newrel->rd_partdesc = NULL;
2651	newrel->rd_pdcxt = NULL;
2652	}
2653
2654	#undef SWAPFIELD
2655
2656	/ And now we can throw away the temporary entry /
2657	RelationDestroyRelation(newrel, !keep_tupdesc);
2658	}
2659	}
2660
2661	/*
2662	* RelationFlushRelation
2663	*
2664	* Rebuild the relation if it is open (refcount > 0), else blow it away.
2665	* This is used when we receive a cache invalidation event for the rel.
2666	*/
2667	static void
2668	RelationFlushRelation(Relation relation)
2669	{
2670	if (relation->rd_createSubid != InvalidSubTransactionId \|\|
2671	relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2672	{
2673	/*
2674	* New relcache entries are always rebuilt, not flushed; else we'd
2675	* forget the "new" status of the relation, which is a useful
2676	* optimization to have. Ditto for the new-relfilenode status.
2677	*
2678	* The rel could have zero refcnt here, so temporarily increment the
2679	* refcnt to ensure it's safe to rebuild it. We can assume that the
2680	* current transaction has some lock on the rel already.
2681	*/
2682	RelationIncrementReferenceCount(relation);
2683	RelationClearRelation(relation, true);
2684	RelationDecrementReferenceCount(relation);
2685	}
2686	else
2687	{
2688	/*
2689	* Pre-existing rels can be dropped from the relcache if not open.
2690	*/
2691	bool rebuild = !RelationHasReferenceCountZero(relation);
2692
2693	RelationClearRelation(relation, rebuild);
2694	}
2695	}
2696
2697	/*
2698	* RelationForgetRelation - unconditionally remove a relcache entry
2699	*
2700	* External interface for destroying a relcache entry when we
2701	* drop the relation.
2702	*/
2703	void
2704	RelationForgetRelation(Oid rid)
2705	{
2706	Relation relation;
2707
2708	RelationIdCacheLookup(rid, relation);
2709
2710	if (!PointerIsValid(relation))
2711	return; / not in cache, nothing to do /
2712
2713	if (!RelationHasReferenceCountZero(relation))
2714	elog(ERROR, "relation %u is still open", rid);
2715
2716	/ Unconditionally destroy the relcache entry /
2717	RelationClearRelation(relation, false);
2718	}
2719
2720	/*
2721	* RelationCacheInvalidateEntry
2722	*
2723	* This routine is invoked for SI cache flush messages.
2724	*
2725	* Any relcache entry matching the relid must be flushed. (Note: caller has
2726	* already determined that the relid belongs to our database or is a shared
2727	* relation.)
2728	*
2729	* We used to skip local relations, on the grounds that they could
2730	* not be targets of cross-backend SI update messages; but it seems
2731	* safer to process them, so that our own SI update messages will
2732	* have the same effects during CommandCounterIncrement for both
2733	* local and nonlocal relations.
2734	*/
2735	void
2736	RelationCacheInvalidateEntry(Oid relationId)
2737	{
2738	Relation relation;
2739
2740	RelationIdCacheLookup(relationId, relation);
2741
2742	if (PointerIsValid(relation))
2743	{
2744	relcacheInvalsReceived++;
2745	RelationFlushRelation(relation);
2746	}
2747	}
2748
2749	/*
2750	* RelationCacheInvalidate
2751	* Blow away cached relation descriptors that have zero reference counts,
2752	* and rebuild those with positive reference counts. Also reset the smgr
2753	* relation cache and re-read relation mapping data.
2754	*
2755	* This is currently used only to recover from SI message buffer overflow,
2756	* so we do not touch new-in-transaction relations; they cannot be targets
2757	* of cross-backend SI updates (and our own updates now go through a
2758	* separate linked list that isn't limited by the SI message buffer size).
2759	* Likewise, we need not discard new-relfilenode-in-transaction hints,
2760	* since any invalidation of those would be a local event.
2761	*
2762	* We do this in two phases: the first pass deletes deletable items, and
2763	* the second one rebuilds the rebuildable items. This is essential for
2764	* safety, because hash_seq_search only copes with concurrent deletion of
2765	* the element it is currently visiting. If a second SI overflow were to
2766	* occur while we are walking the table, resulting in recursive entry to
2767	* this routine, we could crash because the inner invocation blows away
2768	* the entry next to be visited by the outer scan. But this way is OK,
2769	* because (a) during the first pass we won't process any more SI messages,
2770	* so hash_seq_search will complete safely; (b) during the second pass we
2771	* only hold onto pointers to nondeletable entries.
2772	*
2773	* The two-phase approach also makes it easy to update relfilenodes for
2774	* mapped relations before we do anything else, and to ensure that the
2775	* second pass processes nailed-in-cache items before other nondeletable
2776	* items. This should ensure that system catalogs are up to date before
2777	* we attempt to use them to reload information about other open relations.
2778	*/
2779	void
2780	RelationCacheInvalidate(void)
2781	{
2782	HASH_SEQ_STATUS status;
2783	RelIdCacheEnt *idhentry;
2784	Relation relation;
2785	List *rebuildFirstList = NIL;
2786	List *rebuildList = NIL;
2787	ListCell *l;
2788
2789	/*
2790	* Reload relation mapping data before starting to reconstruct cache.
2791	*/
2792	RelationMapInvalidateAll();
2793
2794	/ Phase 1 /
2795	hash_seq_init(&status, RelationIdCache);
2796
2797	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2798	{
2799	relation = idhentry->reldesc;
2800
2801	/ Must close all smgr references to avoid leaving dangling ptrs /
2802	RelationCloseSmgr(relation);
2803
2804	/*
2805	* Ignore new relations; no other backend will manipulate them before
2806	* we commit. Likewise, before replacing a relation's relfilenode, we
2807	* shall have acquired AccessExclusiveLock and drained any applicable
2808	* pending invalidations.
2809	*/
2810	if (relation->rd_createSubid != InvalidSubTransactionId \|\|
2811	relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2812	continue;
2813
2814	relcacheInvalsReceived++;
2815
2816	if (RelationHasReferenceCountZero(relation))
2817	{
2818	/ Delete this entry immediately /
2819	Assert(!relation->rd_isnailed);
2820	RelationClearRelation(relation, false);
2821	}
2822	else
2823	{
2824	/*
2825	* If it's a mapped relation, immediately update its rd_node in
2826	* case its relfilenode changed. We must do this during phase 1
2827	* in case the relation is consulted during rebuild of other
2828	* relcache entries in phase 2. It's safe since consulting the
2829	* map doesn't involve any access to relcache entries.
2830	*/
2831	if (RelationIsMapped(relation))
2832	RelationInitPhysicalAddr(relation);
2833
2834	/*
2835	* Add this entry to list of stuff to rebuild in second pass.
2836	* pg_class goes to the front of rebuildFirstList while
2837	* pg_class_oid_index goes to the back of rebuildFirstList, so
2838	* they are done first and second respectively. Other nailed
2839	* relations go to the front of rebuildList, so they'll be done
2840	* next in no particular order; and everything else goes to the
2841	* back of rebuildList.
2842	*/
2843	if (RelationGetRelid(relation) == RelationRelationId)
2844	rebuildFirstList = lcons(relation, rebuildFirstList);
2845	else if (RelationGetRelid(relation) == ClassOidIndexId)
2846	rebuildFirstList = lappend(rebuildFirstList, relation);
2847	else if (relation->rd_isnailed)
2848	rebuildList = lcons(relation, rebuildList);
2849	else
2850	rebuildList = lappend(rebuildList, relation);
2851	}
2852	}
2853
2854	/*
2855	* Now zap any remaining smgr cache entries. This must happen before we
2856	* start to rebuild entries, since that may involve catalog fetches which
2857	* will re-open catalog files.
2858	*/
2859	smgrcloseall();
2860
2861	/ Phase 2: rebuild the items found to need rebuild in phase 1 /
2862	foreach(l, rebuildFirstList)
2863	{
2864	relation = (Relation) lfirst(l);
2865	RelationClearRelation(relation, true);
2866	}
2867	list_free(rebuildFirstList);
2868	foreach(l, rebuildList)
2869	{
2870	relation = (Relation) lfirst(l);
2871	RelationClearRelation(relation, true);
2872	}
2873	list_free(rebuildList);
2874	}
2875
2876	/*
2877	* RelationCloseSmgrByOid - close a relcache entry's smgr link
2878	*
2879	* Needed in some cases where we are changing a relation's physical mapping.
2880	* The link will be automatically reopened on next use.
2881	*/
2882	void
2883	RelationCloseSmgrByOid(Oid relationId)
2884	{
2885	Relation relation;
2886
2887	RelationIdCacheLookup(relationId, relation);
2888
2889	if (!PointerIsValid(relation))
2890	return; / not in cache, nothing to do /
2891
2892	RelationCloseSmgr(relation);
2893	}
2894
2895	static void
2896	RememberToFreeTupleDescAtEOX(TupleDesc td)
2897	{
2898	if (EOXactTupleDescArray == NULL)
2899	{
2900	MemoryContext oldcxt;
2901
2902	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2903
2904	EOXactTupleDescArray = (TupleDesc ) palloc(`16` sizeof(TupleDesc));
2905	EOXactTupleDescArrayLen = `16`;
2906	NextEOXactTupleDescNum = `0`;
2907	MemoryContextSwitchTo(oldcxt);
2908	}
2909	else if (NextEOXactTupleDescNum >= EOXactTupleDescArrayLen)
2910	{
2911	int32 newlen = EOXactTupleDescArrayLen * `2`;
2912
2913	Assert(EOXactTupleDescArrayLen > `0`);
2914
2915	EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2916	newlen * sizeof(TupleDesc));
2917	EOXactTupleDescArrayLen = newlen;
2918	}
2919
2920	EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2921	}
2922
2923	/*
2924	* AtEOXact_RelationCache
2925	*
2926	* Clean up the relcache at main-transaction commit or abort.
2927	*
2928	* Note: this must be called before processing invalidation messages.
2929	* In the case of abort, we don't want to try to rebuild any invalidated
2930	* cache entries (since we can't safely do database accesses). Therefore
2931	* we must reset refcnts before handling pending invalidations.
2932	*
2933	* As of PostgreSQL 8.1, relcache refcnts should get released by the
2934	* ResourceOwner mechanism. This routine just does a debugging
2935	* cross-check that no pins remain. However, we also need to do special
2936	* cleanup when the current transaction created any relations or made use
2937	* of forced index lists.
2938	*/
2939	void
2940	AtEOXact_RelationCache(bool isCommit)
2941	{
2942	HASH_SEQ_STATUS status;
2943	RelIdCacheEnt *idhentry;
2944	int i;
2945
2946	/*
2947	* Unless the eoxact_list[] overflowed, we only need to examine the rels
2948	* listed in it. Otherwise fall back on a hash_seq_search scan.
2949	*
2950	* For simplicity, eoxact_list[] entries are not deleted till end of
2951	* top-level transaction, even though we could remove them at
2952	* subtransaction end in some cases, or remove relations from the list if
2953	* they are cleared for other reasons. Therefore we should expect the
2954	* case that list entries are not found in the hashtable; if not, there's
2955	* nothing to do for them.
2956	*/
2957	if (eoxact_list_overflowed)
2958	{
2959	hash_seq_init(&status, RelationIdCache);
2960	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2961	{
2962	AtEOXact_cleanup(idhentry->reldesc, isCommit);
2963	}
2964	}
2965	else
2966	{
2967	for (i = `0`; i < eoxact_list_len; i++)
2968	{
2969	idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2970	(void *) &eoxact_list[i],
2971	HASH_FIND,
2972	NULL);
2973	if (idhentry != NULL)
2974	AtEOXact_cleanup(idhentry->reldesc, isCommit);
2975	}
2976	}
2977
2978	if (EOXactTupleDescArrayLen > `0`)
2979	{
2980	Assert(EOXactTupleDescArray != NULL);
2981	for (i = `0`; i < NextEOXactTupleDescNum; i++)
2982	FreeTupleDesc(EOXactTupleDescArray[i]);
2983	pfree(EOXactTupleDescArray);
2984	EOXactTupleDescArray = NULL;
2985	}
2986
2987	/ Now we're out of the transaction and can clear the lists /
2988	eoxact_list_len = `0`;
2989	eoxact_list_overflowed = false;
2990	NextEOXactTupleDescNum = `0`;
2991	EOXactTupleDescArrayLen = `0`;
2992	}
2993
2994	/*
2995	* AtEOXact_cleanup
2996	*
2997	* Clean up a single rel at main-transaction commit or abort
2998	*
2999	* NB: this processing must be idempotent, because EOXactListAdd() doesn't
3000	* bother to prevent duplicate entries in eoxact_list[].
3001	*/
3002	static void
3003	AtEOXact_cleanup(Relation relation, bool isCommit)
3004	{
3005	/*
3006	* The relcache entry's ref count should be back to its normal
3007	* not-in-a-transaction state: 0 unless it's nailed in cache.
3008	*
3009	* In bootstrap mode, this is NOT true, so don't check it --- the
3010	* bootstrap code expects relations to stay open across start/commit
3011	* transaction calls. (That seems bogus, but it's not worth fixing.)
3012	*
3013	* Note: ideally this check would be applied to every relcache entry, not
3014	* just those that have eoxact work to do. But it's not worth forcing a
3015	* scan of the whole relcache just for this. (Moreover, doing so would
3016	* mean that assert-enabled testing never tests the hash_search code path
3017	* above, which seems a bad idea.)
3018	*/
3019	#ifdef USE_ASSERT_CHECKING
3020	if (!IsBootstrapProcessingMode())
3021	{
3022	int expected_refcnt;
3023
3024	expected_refcnt = relation->rd_isnailed ? `1` : `0`;
3025	Assert(relation->rd_refcnt == expected_refcnt);
3026	}
3027	#endif
3028
3029	/*
3030	* Is it a relation created in the current transaction?
3031	*
3032	* During commit, reset the flag to zero, since we are now out of the
3033	* creating transaction. During abort, simply delete the relcache entry
3034	* --- it isn't interesting any longer. (NOTE: if we have forgotten the
3035	* new-ness of a new relation due to a forced cache flush, the entry will
3036	* get deleted anyway by shared-cache-inval processing of the aborted
3037	* pg_class insertion.)
3038	*/
3039	if (relation->rd_createSubid != InvalidSubTransactionId)
3040	{
3041	if (isCommit)
3042	relation->rd_createSubid = InvalidSubTransactionId;
3043	else if (RelationHasReferenceCountZero(relation))
3044	{
3045	RelationClearRelation(relation, false);
3046	return;
3047	}
3048	else
3049	{
3050	/*
3051	* Hmm, somewhere there's a (leaked?) reference to the relation.
3052	* We daren't remove the entry for fear of dereferencing a
3053	* dangling pointer later. Bleat, and mark it as not belonging to
3054	* the current transaction. Hopefully it'll get cleaned up
3055	* eventually. This must be just a WARNING to avoid
3056	* error-during-error-recovery loops.
3057	*/
3058	relation->rd_createSubid = InvalidSubTransactionId;
3059	elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3060	RelationGetRelationName(relation));
3061	}
3062	}
3063
3064	/*
3065	* Likewise, reset the hint about the relfilenode being new.
3066	*/
3067	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3068	}
3069
3070	/*
3071	* AtEOSubXact_RelationCache
3072	*
3073	* Clean up the relcache at sub-transaction commit or abort.
3074	*
3075	* Note: this must be called before processing invalidation messages.
3076	*/
3077	void
3078	AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
3079	SubTransactionId parentSubid)
3080	{
3081	HASH_SEQ_STATUS status;
3082	RelIdCacheEnt *idhentry;
3083	int i;
3084
3085	/*
3086	* Unless the eoxact_list[] overflowed, we only need to examine the rels
3087	* listed in it. Otherwise fall back on a hash_seq_search scan. Same
3088	* logic as in AtEOXact_RelationCache.
3089	*/
3090	if (eoxact_list_overflowed)
3091	{
3092	hash_seq_init(&status, RelationIdCache);
3093	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3094	{
3095	AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3096	mySubid, parentSubid);
3097	}
3098	}
3099	else
3100	{
3101	for (i = `0`; i < eoxact_list_len; i++)
3102	{
3103	idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3104	(void *) &eoxact_list[i],
3105	HASH_FIND,
3106	NULL);
3107	if (idhentry != NULL)
3108	AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3109	mySubid, parentSubid);
3110	}
3111	}
3112
3113	/ Don't reset the list; we still need more cleanup later /
3114	}
3115
3116	/*
3117	* AtEOSubXact_cleanup
3118	*
3119	* Clean up a single rel at subtransaction commit or abort
3120	*
3121	* NB: this processing must be idempotent, because EOXactListAdd() doesn't
3122	* bother to prevent duplicate entries in eoxact_list[].
3123	*/
3124	static void
3125	AtEOSubXact_cleanup(Relation relation, bool isCommit,
3126	SubTransactionId mySubid, SubTransactionId parentSubid)
3127	{
3128	/*
3129	* Is it a relation created in the current subtransaction?
3130	*
3131	* During subcommit, mark it as belonging to the parent, instead. During
3132	* subabort, simply delete the relcache entry.
3133	*/
3134	if (relation->rd_createSubid == mySubid)
3135	{
3136	if (isCommit)
3137	relation->rd_createSubid = parentSubid;
3138	else if (RelationHasReferenceCountZero(relation))
3139	{
3140	RelationClearRelation(relation, false);
3141	return;
3142	}
3143	else
3144	{
3145	/*
3146	* Hmm, somewhere there's a (leaked?) reference to the relation.
3147	* We daren't remove the entry for fear of dereferencing a
3148	* dangling pointer later. Bleat, and transfer it to the parent
3149	* subtransaction so we can try again later. This must be just a
3150	* WARNING to avoid error-during-error-recovery loops.
3151	*/
3152	relation->rd_createSubid = parentSubid;
3153	elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3154	RelationGetRelationName(relation));
3155	}
3156	}
3157
3158	/*
3159	* Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3160	*/
3161	if (relation->rd_newRelfilenodeSubid == mySubid)
3162	{
3163	if (isCommit)
3164	relation->rd_newRelfilenodeSubid = parentSubid;
3165	else
3166	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3167	}
3168	}
3169
3170
3171	/*
3172	* RelationBuildLocalRelation
3173	* Build a relcache entry for an about-to-be-created relation,
3174	* and enter it into the relcache.
3175	*/
3176	Relation
3177	RelationBuildLocalRelation(const char *relname,
3178	Oid relnamespace,
3179	TupleDesc tupDesc,
3180	Oid relid,
3181	Oid accessmtd,
3182	Oid relfilenode,
3183	Oid reltablespace,
3184	bool shared_relation,
3185	bool mapped_relation,
3186	char relpersistence,
3187	char relkind)
3188	{
3189	Relation rel;
3190	MemoryContext oldcxt;
3191	int natts = tupDesc->natts;
3192	int i;
3193	bool has_not_null;
3194	bool nailit;
3195
3196	AssertArg(natts >= `0`);
3197
3198	/*
3199	* check for creation of a rel that must be nailed in cache.
3200	*
3201	* XXX this list had better match the relations specially handled in
3202	* RelationCacheInitializePhase2/3.
3203	*/
3204	switch (relid)
3205	{
3206	case DatabaseRelationId:
3207	case AuthIdRelationId:
3208	case AuthMemRelationId:
3209	case RelationRelationId:
3210	case AttributeRelationId:
3211	case ProcedureRelationId:
3212	case TypeRelationId:
3213	nailit = true;
3214	break;
3215	default:
3216	nailit = false;
3217	break;
3218	}
3219
3220	/*
3221	* check that hardwired list of shared rels matches what's in the
3222	* bootstrap .bki file. If you get a failure here during initdb, you
3223	* probably need to fix IsSharedRelation() to match whatever you've done
3224	* to the set of shared relations.
3225	*/
3226	if (shared_relation != IsSharedRelation(relid))
3227	elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3228	relname, relid);
3229
3230	/ Shared relations had better be mapped, too /
3231	Assert(mapped_relation \|\| !shared_relation);
3232
3233	/*
3234	* switch to the cache context to create the relcache entry.
3235	*/
3236	if (!CacheMemoryContext)
3237	CreateCacheMemoryContext();
3238
3239	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3240
3241	/*
3242	* allocate a new relation descriptor and fill in basic state fields.
3243	*/
3244	rel = (Relation) palloc0(sizeof(RelationData));
3245
3246	/ make sure relation is marked as having no open file yet /
3247	rel->rd_smgr = NULL;
3248
3249	/ mark it nailed if appropriate /
3250	rel->rd_isnailed = nailit;
3251
3252	rel->rd_refcnt = nailit ? `1` : `0`;
3253
3254	/ it's being created in this transaction /
3255	rel->rd_createSubid = GetCurrentSubTransactionId();
3256	rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3257
3258	/*
3259	* create a new tuple descriptor from the one passed in. We do this
3260	* partly to copy it into the cache context, and partly because the new
3261	* relation can't have any defaults or constraints yet; they have to be
3262	* added in later steps, because they require additions to multiple system
3263	* catalogs. We can copy attnotnull constraints here, however.
3264	*/
3265	rel->rd_att = CreateTupleDescCopy(tupDesc);
3266	rel->rd_att->tdrefcount = `1`; / mark as refcounted /
3267	has_not_null = false;
3268	for (i = `0`; i < natts; i++)
3269	{
3270	Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3271	Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3272
3273	datt->attidentity = satt->attidentity;
3274	datt->attgenerated = satt->attgenerated;
3275	datt->attnotnull = satt->attnotnull;
3276	has_not_null \|= satt->attnotnull;
3277	}
3278
3279	if (has_not_null)
3280	{
3281	TupleConstr constr = (TupleConstr ) palloc0(sizeof(TupleConstr));
3282
3283	constr->has_not_null = true;
3284	rel->rd_att->constr = constr;
3285	}
3286
3287	/*
3288	* initialize relation tuple form (caller may add/override data later)
3289	*/
3290	rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
3291
3292	namestrcpy(&rel->rd_rel->relname, relname);
3293	rel->rd_rel->relnamespace = relnamespace;
3294
3295	rel->rd_rel->relkind = relkind;
3296	rel->rd_rel->relnatts = natts;
3297	rel->rd_rel->reltype = InvalidOid;
3298	/ needed when bootstrapping: /
3299	rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3300
3301	/ set up persistence and relcache fields dependent on it /
3302	rel->rd_rel->relpersistence = relpersistence;
3303	switch (relpersistence)
3304	{
3305	case RELPERSISTENCE_UNLOGGED:
3306	case RELPERSISTENCE_PERMANENT:
3307	rel->rd_backend = InvalidBackendId;
3308	rel->rd_islocaltemp = false;
3309	break;
3310	case RELPERSISTENCE_TEMP:
3311	Assert(isTempOrTempToastNamespace(relnamespace));
3312	rel->rd_backend = BackendIdForTempRelations();
3313	rel->rd_islocaltemp = true;
3314	break;
3315	default:
3316	elog(ERROR, "invalid relpersistence: %c", relpersistence);
3317	break;
3318	}
3319
3320	/ if it's a materialized view, it's not populated initially /
3321	if (relkind == RELKIND_MATVIEW)
3322	rel->rd_rel->relispopulated = false;
3323	else
3324	rel->rd_rel->relispopulated = true;
3325
3326	/ set replica identity -- system catalogs and non-tables don't have one /
3327	if (!IsCatalogNamespace(relnamespace) &&
3328	(relkind == RELKIND_RELATION \|\|
3329	relkind == RELKIND_MATVIEW \|\|
3330	relkind == RELKIND_PARTITIONED_TABLE))
3331	rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3332	else
3333	rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3334
3335	/*
3336	* Insert relation physical and logical identifiers (OIDs) into the right
3337	* places. For a mapped relation, we set relfilenode to zero and rely on
3338	* RelationInitPhysicalAddr to consult the map.
3339	*/
3340	rel->rd_rel->relisshared = shared_relation;
3341
3342	RelationGetRelid(rel) = relid;
3343
3344	for (i = `0`; i < natts; i++)
3345	TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3346
3347	rel->rd_rel->reltablespace = reltablespace;
3348
3349	if (mapped_relation)
3350	{
3351	rel->rd_rel->relfilenode = InvalidOid;
3352	/ Add it to the active mapping information /
3353	RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3354	}
3355	else
3356	rel->rd_rel->relfilenode = relfilenode;
3357
3358	RelationInitLockInfo(rel); / see lmgr.c /
3359
3360	RelationInitPhysicalAddr(rel);
3361
3362	rel->rd_rel->relam = accessmtd;
3363
3364	if (relkind == RELKIND_RELATION \|\|
3365	relkind == RELKIND_SEQUENCE \|\|
3366	relkind == RELKIND_TOASTVALUE \|\|
3367	relkind == RELKIND_MATVIEW)
3368	RelationInitTableAccessMethod(rel);
3369
3370	/*
3371	* Okay to insert into the relcache hash table.
3372	*
3373	* Ordinarily, there should certainly not be an existing hash entry for
3374	* the same OID; but during bootstrap, when we create a "real" relcache
3375	* entry for one of the bootstrap relations, we'll be overwriting the
3376	* phony one created with formrdesc. So allow that to happen for nailed
3377	* rels.
3378	*/
3379	RelationCacheInsert(rel, nailit);
3380
3381	/*
3382	* Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3383	* can't do this before storing relid in it.
3384	*/
3385	EOXactListAdd(rel);
3386
3387	/*
3388	* done building relcache entry.
3389	*/
3390	MemoryContextSwitchTo(oldcxt);
3391
3392	/ It's fully valid /
3393	rel->rd_isvalid = true;
3394
3395	/*
3396	* Caller expects us to pin the returned entry.
3397	*/
3398	RelationIncrementReferenceCount(rel);
3399
3400	return rel;
3401	}
3402
3403
3404	/*
3405	* RelationSetNewRelfilenode
3406	*
3407	* Assign a new relfilenode (physical file name), and possibly a new
3408	* persistence setting, to the relation.
3409	*
3410	* This allows a full rewrite of the relation to be done with transactional
3411	* safety (since the filenode assignment can be rolled back). Note however
3412	* that there is no simple way to access the relation's old data for the
3413	* remainder of the current transaction. This limits the usefulness to cases
3414	* such as TRUNCATE or rebuilding an index from scratch.
3415	*
3416	* Caller must already hold exclusive lock on the relation.
3417	*/
3418	void
3419	RelationSetNewRelfilenode(Relation relation, char persistence)
3420	{
3421	Oid newrelfilenode;
3422	Relation pg_class;
3423	HeapTuple tuple;
3424	Form_pg_class classform;
3425	MultiXactId minmulti = InvalidMultiXactId;
3426	TransactionId freezeXid = InvalidTransactionId;
3427	RelFileNode newrnode;
3428
3429	/ Allocate a new relfilenode /
3430	newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3431	persistence);
3432
3433	/*
3434	* Get a writable copy of the pg_class tuple for the given relation.
3435	*/
3436	pg_class = table_open(RelationRelationId, RowExclusiveLock);
3437
3438	tuple = SearchSysCacheCopy1(RELOID,
3439	ObjectIdGetDatum(RelationGetRelid(relation)));
3440	if (!HeapTupleIsValid(tuple))
3441	elog(ERROR, "could not find tuple for relation %u",
3442	RelationGetRelid(relation));
3443	classform = (Form_pg_class) GETSTRUCT(tuple);
3444
3445	/*
3446	* Schedule unlinking of the old storage at transaction commit.
3447	*/
3448	RelationDropStorage(relation);
3449
3450	/*
3451	* Create storage for the main fork of the new relfilenode. If it's a
3452	* table-like object, call into the table AM to do so, which'll also
3453	* create the table's init fork if needed.
3454	*
3455	* NOTE: If relevant for the AM, any conflict in relfilenode value will be
3456	* caught here, if GetNewRelFileNode messes up for any reason.
3457	*/
3458	newrnode = relation->rd_node;
3459	newrnode.relNode = newrelfilenode;
3460
3461	switch (relation->rd_rel->relkind)
3462	{
3463	case RELKIND_INDEX:
3464	case RELKIND_SEQUENCE:
3465	{
3466	/ handle these directly, at least for now /
3467	SMgrRelation srel;
3468
3469	srel = RelationCreateStorage(newrnode, persistence);
3470	smgrclose(srel);
3471	}
3472	break;
3473
3474	case RELKIND_RELATION:
3475	case RELKIND_TOASTVALUE:
3476	case RELKIND_MATVIEW:
3477	table_relation_set_new_filenode(relation, &newrnode,
3478	persistence,
3479	&freezeXid, &minmulti);
3480	break;
3481
3482	default:
3483	/ we shouldn't be called for anything else /
3484	elog(ERROR, "relation \"%s\" does not have storage",
3485	RelationGetRelationName(relation));
3486	break;
3487	}
3488
3489	/*
3490	* If we're dealing with a mapped index, pg_class.relfilenode doesn't
3491	* change; instead we have to send the update to the relation mapper.
3492	*
3493	* For mapped indexes, we don't actually change the pg_class entry at all;
3494	* this is essential when reindexing pg_class itself. That leaves us with
3495	* possibly-inaccurate values of relpages etc, but those will be fixed up
3496	* later.
3497	*/
3498	if (RelationIsMapped(relation))
3499	{
3500	/ This case is only supported for indexes /
3501	Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3502
3503	/ Since we're not updating pg_class, these had better not change /
3504	Assert(classform->relfrozenxid == freezeXid);
3505	Assert(classform->relminmxid == minmulti);
3506	Assert(classform->relpersistence == persistence);
3507
3508	/*
3509	* In some code paths it's possible that the tuple update we'd
3510	* otherwise do here is the only thing that would assign an XID for
3511	* the current transaction. However, we must have an XID to delete
3512	* files, so make sure one is assigned.
3513	*/
3514	(void) GetCurrentTransactionId();
3515
3516	/ Do the deed /
3517	RelationMapUpdateMap(RelationGetRelid(relation),
3518	newrelfilenode,
3519	relation->rd_rel->relisshared,
3520	false);
3521
3522	/ Since we're not updating pg_class, must trigger inval manually /
3523	CacheInvalidateRelcache(relation);
3524	}
3525	else
3526	{
3527	/ Normal case, update the pg_class entry /
3528	classform->relfilenode = newrelfilenode;
3529
3530	/ relpages etc. never change for sequences /
3531	if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3532	{
3533	classform->relpages = `0`; / it's empty until further notice /
3534	classform->reltuples = `0`;
3535	classform->relallvisible = `0`;
3536	}
3537	classform->relfrozenxid = freezeXid;
3538	classform->relminmxid = minmulti;
3539	classform->relpersistence = persistence;
3540
3541	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3542	}
3543
3544	heap_freetuple(tuple);
3545
3546	table_close(pg_class, RowExclusiveLock);
3547
3548	/*
3549	* Make the pg_class row change or relation map change visible. This will
3550	* cause the relcache entry to get updated, too.
3551	*/
3552	CommandCounterIncrement();
3553
3554	/*
3555	* Mark the rel as having been given a new relfilenode in the current
3556	* (sub) transaction. This is a hint that can be used to optimize later
3557	* operations on the rel in the same transaction.
3558	*/
3559	relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
3560
3561	/ Flag relation as needing eoxact cleanup (to remove the hint) /
3562	EOXactListAdd(relation);
3563	}
3564
3565
3566	/*
3567	* RelationCacheInitialize
3568	*
3569	* This initializes the relation descriptor cache. At the time
3570	* that this is invoked, we can't do database access yet (mainly
3571	* because the transaction subsystem is not up); all we are doing
3572	* is making an empty cache hashtable. This must be done before
3573	* starting the initialization transaction, because otherwise
3574	* AtEOXact_RelationCache would crash if that transaction aborts
3575	* before we can get the relcache set up.
3576	*/
3577
3578	#define INITRELCACHESIZE 400
3579
3580	void
3581	RelationCacheInitialize(void)
3582	{
3583	HASHCTL ctl;
3584
3585	/*
3586	* make sure cache memory context exists
3587	*/
3588	if (!CacheMemoryContext)
3589	CreateCacheMemoryContext();
3590
3591	/*
3592	* create hashtable that indexes the relcache
3593	*/
3594	MemSet(&ctl, `0`, sizeof(ctl));
3595	ctl.keysize = sizeof(Oid);
3596	ctl.entrysize = sizeof(RelIdCacheEnt);
3597	RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3598	&ctl, HASH_ELEM \| HASH_BLOBS);
3599
3600	/*
3601	* relation mapper needs to be initialized too
3602	*/
3603	RelationMapInitialize();
3604	}
3605
3606	/*
3607	* RelationCacheInitializePhase2
3608	*
3609	* This is called to prepare for access to shared catalogs during startup.
3610	* We must at least set up nailed reldescs for pg_database, pg_authid,
3611	* pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3612	* for their indexes, too. We attempt to load this information from the
3613	* shared relcache init file. If that's missing or broken, just make
3614	* phony entries for the catalogs themselves.
3615	* RelationCacheInitializePhase3 will clean up as needed.
3616	*/
3617	void
3618	RelationCacheInitializePhase2(void)
3619	{
3620	MemoryContext oldcxt;
3621
3622	/*
3623	* relation mapper needs initialized too
3624	*/
3625	RelationMapInitializePhase2();
3626
3627	/*
3628	* In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3629	* nothing.
3630	*/
3631	if (IsBootstrapProcessingMode())
3632	return;
3633
3634	/*
3635	* switch to cache memory context
3636	*/
3637	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3638
3639	/*
3640	* Try to load the shared relcache cache file. If unsuccessful, bootstrap
3641	* the cache with pre-made descriptors for the critical shared catalogs.
3642	*/
3643	if (!load_relcache_init_file(true))
3644	{
3645	formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3646	Natts_pg_database, Desc_pg_database);
3647	formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3648	Natts_pg_authid, Desc_pg_authid);
3649	formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3650	Natts_pg_auth_members, Desc_pg_auth_members);
3651	formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3652	Natts_pg_shseclabel, Desc_pg_shseclabel);
3653	formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3654	Natts_pg_subscription, Desc_pg_subscription);
3655
3656	#define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3657	}
3658
3659	MemoryContextSwitchTo(oldcxt);
3660	}
3661
3662	/*
3663	* RelationCacheInitializePhase3
3664	*
3665	* This is called as soon as the catcache and transaction system
3666	* are functional and we have determined MyDatabaseId. At this point
3667	* we can actually read data from the database's system catalogs.
3668	* We first try to read pre-computed relcache entries from the local
3669	* relcache init file. If that's missing or broken, make phony entries
3670	* for the minimum set of nailed-in-cache relations. Then (unless
3671	* bootstrapping) make sure we have entries for the critical system
3672	* indexes. Once we've done all this, we have enough infrastructure to
3673	* open any system catalog or use any catcache. The last step is to
3674	* rewrite the cache files if needed.
3675	*/
3676	void
3677	RelationCacheInitializePhase3(void)
3678	{
3679	HASH_SEQ_STATUS status;
3680	RelIdCacheEnt *idhentry;
3681	MemoryContext oldcxt;
3682	bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3683
3684	/*
3685	* relation mapper needs initialized too
3686	*/
3687	RelationMapInitializePhase3();
3688
3689	/*
3690	* switch to cache memory context
3691	*/
3692	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3693
3694	/*
3695	* Try to load the local relcache cache file. If unsuccessful, bootstrap
3696	* the cache with pre-made descriptors for the critical "nailed-in" system
3697	* catalogs.
3698	*/
3699	if (IsBootstrapProcessingMode() \|\|
3700	!load_relcache_init_file(false))
3701	{
3702	needNewCacheFile = true;
3703
3704	formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3705	Natts_pg_class, Desc_pg_class);
3706	formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3707	Natts_pg_attribute, Desc_pg_attribute);
3708	formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3709	Natts_pg_proc, Desc_pg_proc);
3710	formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3711	Natts_pg_type, Desc_pg_type);
3712
3713	#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3714	}
3715
3716	MemoryContextSwitchTo(oldcxt);
3717
3718	/ In bootstrap mode, the faked-up formrdesc info is all we'll have /
3719	if (IsBootstrapProcessingMode())
3720	return;
3721
3722	/*
3723	* If we didn't get the critical system indexes loaded into relcache, do
3724	* so now. These are critical because the catcache and/or opclass cache
3725	* depend on them for fetches done during relcache load. Thus, we have an
3726	* infinite-recursion problem. We can break the recursion by doing
3727	* heapscans instead of indexscans at certain key spots. To avoid hobbling
3728	* performance, we only want to do that until we have the critical indexes
3729	* loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3730	* decide whether to do heapscan or indexscan at the key spots, and we set
3731	* it true after we've loaded the critical indexes.
3732	*
3733	* The critical indexes are marked as "nailed in cache", partly to make it
3734	* easy for load_relcache_init_file to count them, but mainly because we
3735	* cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3736	* true. (NOTE: perhaps it would be possible to reload them by
3737	* temporarily setting criticalRelcachesBuilt to false again. For now,
3738	* though, we just nail 'em in.)
3739	*
3740	* RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3741	* in the same way as the others, because the critical catalogs don't
3742	* (currently) have any rules or triggers, and so these indexes can be
3743	* rebuilt without inducing recursion. However they are used during
3744	* relcache load when a rel does have rules or triggers, so we choose to
3745	* nail them for performance reasons.
3746	*/
3747	if (!criticalRelcachesBuilt)
3748	{
3749	load_critical_index(ClassOidIndexId,
3750	RelationRelationId);
3751	load_critical_index(AttributeRelidNumIndexId,
3752	AttributeRelationId);
3753	load_critical_index(IndexRelidIndexId,
3754	IndexRelationId);
3755	load_critical_index(OpclassOidIndexId,
3756	OperatorClassRelationId);
3757	load_critical_index(AccessMethodProcedureIndexId,
3758	AccessMethodProcedureRelationId);
3759	load_critical_index(RewriteRelRulenameIndexId,
3760	RewriteRelationId);
3761	load_critical_index(TriggerRelidNameIndexId,
3762	TriggerRelationId);
3763
3764	#define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3765
3766	criticalRelcachesBuilt = true;
3767	}
3768
3769	/*
3770	* Process critical shared indexes too.
3771	*
3772	* DatabaseNameIndexId isn't critical for relcache loading, but rather for
3773	* initial lookup of MyDatabaseId, without which we'll never find any
3774	* non-shared catalogs at all. Autovacuum calls InitPostgres with a
3775	* database OID, so it instead depends on DatabaseOidIndexId. We also
3776	* need to nail up some indexes on pg_authid and pg_auth_members for use
3777	* during client authentication. SharedSecLabelObjectIndexId isn't
3778	* critical for the core system, but authentication hooks might be
3779	* interested in it.
3780	*/
3781	if (!criticalSharedRelcachesBuilt)
3782	{
3783	load_critical_index(DatabaseNameIndexId,
3784	DatabaseRelationId);
3785	load_critical_index(DatabaseOidIndexId,
3786	DatabaseRelationId);
3787	load_critical_index(AuthIdRolnameIndexId,
3788	AuthIdRelationId);
3789	load_critical_index(AuthIdOidIndexId,
3790	AuthIdRelationId);
3791	load_critical_index(AuthMemMemRoleIndexId,
3792	AuthMemRelationId);
3793	load_critical_index(SharedSecLabelObjectIndexId,
3794	SharedSecLabelRelationId);
3795
3796	#define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3797
3798	criticalSharedRelcachesBuilt = true;
3799	}
3800
3801	/*
3802	* Now, scan all the relcache entries and update anything that might be
3803	* wrong in the results from formrdesc or the relcache cache file. If we
3804	* faked up relcache entries using formrdesc, then read the real pg_class
3805	* rows and replace the fake entries with them. Also, if any of the
3806	* relcache entries have rules, triggers, or security policies, load that
3807	* info the hard way since it isn't recorded in the cache file.
3808	*
3809	* Whenever we access the catalogs to read data, there is a possibility of
3810	* a shared-inval cache flush causing relcache entries to be removed.
3811	* Since hash_seq_search only guarantees to still work after the current
3812	* entry is removed, it's unsafe to continue the hashtable scan afterward.
3813	* We handle this by restarting the scan from scratch after each access.
3814	* This is theoretically O(N^2), but the number of entries that actually
3815	* need to be fixed is small enough that it doesn't matter.
3816	*/
3817	hash_seq_init(&status, RelationIdCache);
3818
3819	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3820	{
3821	Relation relation = idhentry->reldesc;
3822	bool restart = false;
3823
3824	/*
3825	* Make sure this entry doesn't get flushed while we work with it.
3826	*/
3827	RelationIncrementReferenceCount(relation);
3828
3829	/*
3830	* If it's a faked-up entry, read the real pg_class tuple.
3831	*/
3832	if (relation->rd_rel->relowner == InvalidOid)
3833	{
3834	HeapTuple htup;
3835	Form_pg_class relp;
3836
3837	htup = SearchSysCache1(RELOID,
3838	ObjectIdGetDatum(RelationGetRelid(relation)));
3839	if (!HeapTupleIsValid(htup))
3840	elog(FATAL, "cache lookup failed for relation %u",
3841	RelationGetRelid(relation));
3842	relp = (Form_pg_class) GETSTRUCT(htup);
3843
3844	/*
3845	* Copy tuple to relation->rd_rel. (See notes in
3846	* AllocateRelationDesc())
3847	*/
3848	memcpy((char ) relation->rd_rel, (char* *) relp, CLASS_TUPLE_SIZE);
3849
3850	/ Update rd_options while we have the tuple /
3851	if (relation->rd_options)
3852	pfree(relation->rd_options);
3853	RelationParseRelOptions(relation, htup);
3854
3855	/*
3856	* Check the values in rd_att were set up correctly. (We cannot
3857	* just copy them over now: formrdesc must have set up the rd_att
3858	* data correctly to start with, because it may already have been
3859	* copied into one or more catcache entries.)
3860	*/
3861	Assert(relation->rd_att->tdtypeid == relp->reltype);
3862	Assert(relation->rd_att->tdtypmod == -`1`);
3863
3864	ReleaseSysCache(htup);
3865
3866	/ relowner had better be OK now, else we'll loop forever /
3867	if (relation->rd_rel->relowner == InvalidOid)
3868	elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3869	RelationGetRelationName(relation));
3870
3871	restart = true;
3872	}
3873
3874	/*
3875	* Fix data that isn't saved in relcache cache file.
3876	*
3877	* relhasrules or relhastriggers could possibly be wrong or out of
3878	* date. If we don't actually find any rules or triggers, clear the
3879	* local copy of the flag so that we don't get into an infinite loop
3880	* here. We don't make any attempt to fix the pg_class entry, though.
3881	*/
3882	if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3883	{
3884	RelationBuildRuleLock(relation);
3885	if (relation->rd_rules == NULL)
3886	relation->rd_rel->relhasrules = false;
3887	restart = true;
3888	}
3889	if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3890	{
3891	RelationBuildTriggers(relation);
3892	if (relation->trigdesc == NULL)
3893	relation->rd_rel->relhastriggers = false;
3894	restart = true;
3895	}
3896
3897	/*
3898	* Re-load the row security policies if the relation has them, since
3899	* they are not preserved in the cache. Note that we can never NOT
3900	* have a policy while relrowsecurity is true,
3901	* RelationBuildRowSecurity will create a single default-deny policy
3902	* if there is no policy defined in pg_policy.
3903	*/
3904	if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3905	{
3906	RelationBuildRowSecurity(relation);
3907
3908	Assert(relation->rd_rsdesc != NULL);
3909	restart = true;
3910	}
3911
3912	/*
3913	* Reload the partition key and descriptor for a partitioned table.
3914	*/
3915	if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3916	relation->rd_partkey == NULL)
3917	{
3918	RelationBuildPartitionKey(relation);
3919	Assert(relation->rd_partkey != NULL);
3920
3921	restart = true;
3922	}
3923
3924	if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3925	relation->rd_partdesc == NULL)
3926	{
3927	RelationBuildPartitionDesc(relation);
3928	Assert(relation->rd_partdesc != NULL);
3929
3930	restart = true;
3931	}
3932
3933	if (relation->rd_tableam == NULL &&
3934	(relation->rd_rel->relkind == RELKIND_RELATION \|\|
3935	relation->rd_rel->relkind == RELKIND_SEQUENCE \|\|
3936	relation->rd_rel->relkind == RELKIND_TOASTVALUE \|\|
3937	relation->rd_rel->relkind == RELKIND_MATVIEW))
3938	{
3939	RelationInitTableAccessMethod(relation);
3940	Assert(relation->rd_tableam != NULL);
3941
3942	restart = true;
3943	}
3944
3945	/ Release hold on the relation /
3946	RelationDecrementReferenceCount(relation);
3947
3948	/ Now, restart the hashtable scan if needed /
3949	if (restart)
3950	{
3951	hash_seq_term(&status);
3952	hash_seq_init(&status, RelationIdCache);
3953	}
3954	}
3955
3956	/*
3957	* Lastly, write out new relcache cache files if needed. We don't bother
3958	* to distinguish cases where only one of the two needs an update.
3959	*/
3960	if (needNewCacheFile)
3961	{
3962	/*
3963	* Force all the catcaches to finish initializing and thereby open the
3964	* catalogs and indexes they use. This will preload the relcache with
3965	* entries for all the most important system catalogs and indexes, so
3966	* that the init files will be most useful for future backends.
3967	*/
3968	InitCatalogCachePhase2();
3969
3970	/ now write the files /
3971	write_relcache_init_file(true);
3972	write_relcache_init_file(false);
3973	}
3974	}
3975
3976	/*
3977	* Load one critical system index into the relcache
3978	*
3979	* indexoid is the OID of the target index, heapoid is the OID of the catalog
3980	* it belongs to.
3981	*/
3982	static void
3983	load_critical_index(Oid indexoid, Oid heapoid)
3984	{
3985	Relation ird;
3986
3987	/*
3988	* We must lock the underlying catalog before locking the index to avoid
3989	* deadlock, since RelationBuildDesc might well need to read the catalog,
3990	* and if anyone else is exclusive-locking this catalog and index they'll
3991	* be doing it in that order.
3992	*/
3993	LockRelationOid(heapoid, AccessShareLock);
3994	LockRelationOid(indexoid, AccessShareLock);
3995	ird = RelationBuildDesc(indexoid, true);
3996	if (ird == NULL)
3997	elog(PANIC, "could not open critical system index %u", indexoid);
3998	ird->rd_isnailed = true;
3999	ird->rd_refcnt = `1`;
4000	UnlockRelationOid(indexoid, AccessShareLock);
4001	UnlockRelationOid(heapoid, AccessShareLock);
4002	}
4003
4004	/*
4005	* GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4006	* GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4007	*
4008	* We need this kluge because we have to be able to access non-fixed-width
4009	* fields of pg_class and pg_index before we have the standard catalog caches
4010	* available. We use predefined data that's set up in just the same way as
4011	* the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4012	* not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4013	* does it have a TupleConstr field. But it's good enough for the purpose of
4014	* extracting fields.
4015	*/
4016	static TupleDesc
4017	BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
4018	{
4019	TupleDesc result;
4020	MemoryContext oldcxt;
4021	int i;
4022
4023	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4024
4025	result = CreateTemplateTupleDesc(natts);
4026	result->tdtypeid = RECORDOID; / not right, but we don't care /
4027	result->tdtypmod = -`1`;
4028
4029	for (i = `0`; i < natts; i++)
4030	{
4031	memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4032	/ make sure attcacheoff is valid /
4033	TupleDescAttr(result, i)->attcacheoff = -`1`;
4034	}
4035
4036	/ initialize first attribute's attcacheoff, cf RelationBuildTupleDesc /
4037	TupleDescAttr(result, `0`)->attcacheoff = `0`;
4038
4039	/ Note: we don't bother to set up a TupleConstr entry /
4040
4041	MemoryContextSwitchTo(oldcxt);
4042
4043	return result;
4044	}
4045
4046	static TupleDesc
4047	GetPgClassDescriptor(void)
4048	{
4049	static TupleDesc pgclassdesc = NULL;
4050
4051	/ Already done? /
4052	if (pgclassdesc == NULL)
4053	pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4054	Desc_pg_class);
4055
4056	return pgclassdesc;
4057	}
4058
4059	static TupleDesc
4060	GetPgIndexDescriptor(void)
4061	{
4062	static TupleDesc pgindexdesc = NULL;
4063
4064	/ Already done? /
4065	if (pgindexdesc == NULL)
4066	pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4067	Desc_pg_index);
4068
4069	return pgindexdesc;
4070	}
4071
4072	/*
4073	* Load any default attribute value definitions for the relation.
4074	*/
4075	static void
4076	AttrDefaultFetch(Relation relation)
4077	{
4078	AttrDefault *attrdef = relation->rd_att->constr->defval;
4079	int ndef = relation->rd_att->constr->num_defval;
4080	Relation adrel;
4081	SysScanDesc adscan;
4082	ScanKeyData skey;
4083	HeapTuple htup;
4084	Datum val;
4085	bool isnull;
4086	int found;
4087	int i;
4088
4089	ScanKeyInit(&skey,
4090	Anum_pg_attrdef_adrelid,
4091	BTEqualStrategyNumber, F_OIDEQ,
4092	ObjectIdGetDatum(RelationGetRelid(relation)));
4093
4094	adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4095	adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4096	NULL, `1`, &skey);
4097	found = `0`;
4098
4099	while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4100	{
4101	Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4102	Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - `1`);
4103
4104	for (i = `0`; i < ndef; i++)
4105	{
4106	if (adform->adnum != attrdef[i].adnum)
4107	continue;
4108	if (attrdef[i].adbin != NULL)
4109	elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4110	NameStr(attr->attname),
4111	RelationGetRelationName(relation));
4112	else
4113	found++;
4114
4115	val = fastgetattr(htup,
4116	Anum_pg_attrdef_adbin,
4117	adrel->rd_att, &isnull);
4118	if (isnull)
4119	elog(WARNING, "null adbin for attr %s of rel %s",
4120	NameStr(attr->attname),
4121	RelationGetRelationName(relation));
4122	else
4123	{
4124	/ detoast and convert to cstring in caller's context /
4125	char *s = TextDatumGetCString(val);
4126
4127	attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4128	pfree(s);
4129	}
4130	break;
4131	}
4132
4133	if (i >= ndef)
4134	elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4135	adform->adnum, RelationGetRelationName(relation));
4136	}
4137
4138	systable_endscan(adscan);
4139	table_close(adrel, AccessShareLock);
4140	}
4141
4142	/*
4143	* Load any check constraints for the relation.
4144	*/
4145	static void
4146	CheckConstraintFetch(Relation relation)
4147	{
4148	ConstrCheck *check = relation->rd_att->constr->check;
4149	int ncheck = relation->rd_att->constr->num_check;
4150	Relation conrel;
4151	SysScanDesc conscan;
4152	ScanKeyData skey[`1`];
4153	HeapTuple htup;
4154	int found = `0`;
4155
4156	ScanKeyInit(&skey[`0`],
4157	Anum_pg_constraint_conrelid,
4158	BTEqualStrategyNumber, F_OIDEQ,
4159	ObjectIdGetDatum(RelationGetRelid(relation)));
4160
4161	conrel = table_open(ConstraintRelationId, AccessShareLock);
4162	conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4163	NULL, `1`, skey);
4164
4165	while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4166	{
4167	Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
4168	Datum val;
4169	bool isnull;
4170	char *s;
4171
4172	/ We want check constraints only /
4173	if (conform->contype != CONSTRAINT_CHECK)
4174	continue;
4175
4176	if (found >= ncheck)
4177	elog(ERROR, "unexpected constraint record found for rel %s",
4178	RelationGetRelationName(relation));
4179
4180	check[found].ccvalid = conform->convalidated;
4181	check[found].ccnoinherit = conform->connoinherit;
4182	check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
4183	NameStr(conform->conname));
4184
4185	/ Grab and test conbin is actually set /
4186	val = fastgetattr(htup,
4187	Anum_pg_constraint_conbin,
4188	conrel->rd_att, &isnull);
4189	if (isnull)
4190	elog(ERROR, "null conbin for rel %s",
4191	RelationGetRelationName(relation));
4192
4193	/ detoast and convert to cstring in caller's context /
4194	s = TextDatumGetCString(val);
4195	check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4196	pfree(s);
4197
4198	found++;
4199	}
4200
4201	systable_endscan(conscan);
4202	table_close(conrel, AccessShareLock);
4203
4204	if (found != ncheck)
4205	elog(ERROR, "%d constraint record(s) missing for rel %s",
4206	ncheck - found, RelationGetRelationName(relation));
4207
4208	/ Sort the records so that CHECKs are applied in a deterministic order /
4209	if (ncheck > `1`)
4210	qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4211	}
4212
4213	/*
4214	* qsort comparator to sort ConstrCheck entries by name
4215	*/
4216	static int
4217	CheckConstraintCmp(const void a, const* void *b)
4218	{
4219	const ConstrCheck ca = (const* ConstrCheck *) a;
4220	const ConstrCheck cb = (const* ConstrCheck *) b;
4221
4222	return strcmp(ca->ccname, cb->ccname);
4223	}
4224
4225	/*
4226	* RelationGetFKeyList -- get a list of foreign key info for the relation
4227	*
4228	* Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4229	* the given relation. This data is a direct copy of relevant fields from
4230	* pg_constraint. The list items are in no particular order.
4231	*
4232	* CAUTION: the returned list is part of the relcache's data, and could
4233	* vanish in a relcache entry reset. Callers must inspect or copy it
4234	* before doing anything that might trigger a cache flush, such as
4235	* system catalog accesses. copyObject() can be used if desired.
4236	* (We define it this way because current callers want to filter and
4237	* modify the list entries anyway, so copying would be a waste of time.)
4238	*/
4239	List *
4240	RelationGetFKeyList(Relation relation)
4241	{
4242	List *result;
4243	Relation conrel;
4244	SysScanDesc conscan;
4245	ScanKeyData skey;
4246	HeapTuple htup;
4247	List *oldlist;
4248	MemoryContext oldcxt;
4249
4250	/ Quick exit if we already computed the list. /
4251	if (relation->rd_fkeyvalid)
4252	return relation->rd_fkeylist;
4253
4254	/ Fast path: non-partitioned tables without triggers can't have FKs /
4255	if (!relation->rd_rel->relhastriggers &&
4256	relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4257	return NIL;
4258
4259	/*
4260	* We build the list we intend to return (in the caller's context) while
4261	* doing the scan. After successfully completing the scan, we copy that
4262	* list into the relcache entry. This avoids cache-context memory leakage
4263	* if we get some sort of error partway through.
4264	*/
4265	result = NIL;
4266
4267	/ Prepare to scan pg_constraint for entries having conrelid = this rel. /
4268	ScanKeyInit(&skey,
4269	Anum_pg_constraint_conrelid,
4270	BTEqualStrategyNumber, F_OIDEQ,
4271	ObjectIdGetDatum(RelationGetRelid(relation)));
4272
4273	conrel = table_open(ConstraintRelationId, AccessShareLock);
4274	conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4275	NULL, `1`, &skey);
4276
4277	while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4278	{
4279	Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4280	ForeignKeyCacheInfo *info;
4281
4282	/ consider only foreign keys /
4283	if (constraint->contype != CONSTRAINT_FOREIGN)
4284	continue;
4285
4286	info = makeNode(ForeignKeyCacheInfo);
4287	info->conoid = constraint->oid;
4288	info->conrelid = constraint->conrelid;
4289	info->confrelid = constraint->confrelid;
4290
4291	DeconstructFkConstraintRow(htup, &info->nkeys,
4292	info->conkey,
4293	info->confkey,
4294	info->conpfeqop,
4295	NULL, NULL);
4296
4297	/ Add FK's node to the result list /
4298	result = lappend(result, info);
4299	}
4300
4301	systable_endscan(conscan);
4302	table_close(conrel, AccessShareLock);
4303
4304	/ Now save a copy of the completed list in the relcache entry. /
4305	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4306	oldlist = relation->rd_fkeylist;
4307	relation->rd_fkeylist = copyObject(result);
4308	relation->rd_fkeyvalid = true;
4309	MemoryContextSwitchTo(oldcxt);
4310
4311	/ Don't leak the old list, if there is one /
4312	list_free_deep(oldlist);
4313
4314	return result;
4315	}
4316
4317	/*
4318	* RelationGetIndexList -- get a list of OIDs of indexes on this relation
4319	*
4320	* The index list is created only if someone requests it. We scan pg_index
4321	* to find relevant indexes, and add the list to the relcache entry so that
4322	* we won't have to compute it again. Note that shared cache inval of a
4323	* relcache entry will delete the old list and set rd_indexvalid to false,
4324	* so that we must recompute the index list on next request. This handles
4325	* creation or deletion of an index.
4326	*
4327	* Indexes that are marked not indislive are omitted from the returned list.
4328	* Such indexes are expected to be dropped momentarily, and should not be
4329	* touched at all by any caller of this function.
4330	*
4331	* The returned list is guaranteed to be sorted in order by OID. This is
4332	* needed by the executor, since for index types that we obtain exclusive
4333	* locks on when updating the index, all backends must lock the indexes in
4334	* the same order or we will get deadlocks (see ExecOpenIndices()). Any
4335	* consistent ordering would do, but ordering by OID is easy.
4336	*
4337	* Since shared cache inval causes the relcache's copy of the list to go away,
4338	* we return a copy of the list palloc'd in the caller's context. The caller
4339	* may list_free() the returned list after scanning it. This is necessary
4340	* since the caller will typically be doing syscache lookups on the relevant
4341	* indexes, and syscache lookup could cause SI messages to be processed!
4342	*
4343	* In exactly the same way, we update rd_pkindex, which is the OID of the
4344	* relation's primary key index if any, else InvalidOid; and rd_replidindex,
4345	* which is the pg_class OID of an index to be used as the relation's
4346	* replication identity index, or InvalidOid if there is no such index.
4347	*/
4348	List *
4349	RelationGetIndexList(Relation relation)
4350	{
4351	Relation indrel;
4352	SysScanDesc indscan;
4353	ScanKeyData skey;
4354	HeapTuple htup;
4355	List *result;
4356	List *oldlist;
4357	char replident = relation->rd_rel->relreplident;
4358	Oid pkeyIndex = InvalidOid;
4359	Oid candidateIndex = InvalidOid;
4360	MemoryContext oldcxt;
4361
4362	/ Quick exit if we already computed the list. /
4363	if (relation->rd_indexvalid)
4364	return list_copy(relation->rd_indexlist);
4365
4366	/*
4367	* We build the list we intend to return (in the caller's context) while
4368	* doing the scan. After successfully completing the scan, we copy that
4369	* list into the relcache entry. This avoids cache-context memory leakage
4370	* if we get some sort of error partway through.
4371	*/
4372	result = NIL;
4373
4374	/ Prepare to scan pg_index for entries having indrelid = this rel. /
4375	ScanKeyInit(&skey,
4376	Anum_pg_index_indrelid,
4377	BTEqualStrategyNumber, F_OIDEQ,
4378	ObjectIdGetDatum(RelationGetRelid(relation)));
4379
4380	indrel = table_open(IndexRelationId, AccessShareLock);
4381	indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4382	NULL, `1`, &skey);
4383
4384	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4385	{
4386	Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
4387
4388	/*
4389	* Ignore any indexes that are currently being dropped. This will
4390	* prevent them from being searched, inserted into, or considered in
4391	* HOT-safety decisions. It's unsafe to touch such an index at all
4392	* since its catalog entries could disappear at any instant.
4393	*/
4394	if (!index->indislive)
4395	continue;
4396
4397	/ Add index's OID to result list in the proper order /
4398	result = insert_ordered_oid(result, index->indexrelid);
4399
4400	/*
4401	* Invalid, non-unique, non-immediate or predicate indexes aren't
4402	* interesting for either oid indexes or replication identity indexes,
4403	* so don't check them.
4404	*/
4405	if (!index->indisvalid \|\| !index->indisunique \|\|
4406	!index->indimmediate \|\|
4407	!heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4408	continue;
4409
4410	/ remember primary key index if any /
4411	if (index->indisprimary)
4412	pkeyIndex = index->indexrelid;
4413
4414	/ remember explicitly chosen replica index /
4415	if (index->indisreplident)
4416	candidateIndex = index->indexrelid;
4417	}
4418
4419	systable_endscan(indscan);
4420
4421	table_close(indrel, AccessShareLock);
4422
4423	/ Now save a copy of the completed list in the relcache entry. /
4424	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4425	oldlist = relation->rd_indexlist;
4426	relation->rd_indexlist = list_copy(result);
4427	relation->rd_pkindex = pkeyIndex;
4428	if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4429	relation->rd_replidindex = pkeyIndex;
4430	else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4431	relation->rd_replidindex = candidateIndex;
4432	else
4433	relation->rd_replidindex = InvalidOid;
4434	relation->rd_indexvalid = true;
4435	MemoryContextSwitchTo(oldcxt);
4436
4437	/ Don't leak the old list, if there is one /
4438	list_free(oldlist);
4439
4440	return result;
4441	}
4442
4443	/*
4444	* RelationGetStatExtList
4445	* get a list of OIDs of statistics objects on this relation
4446	*
4447	* The statistics list is created only if someone requests it, in a way
4448	* similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4449	* relevant statistics, and add the list to the relcache entry so that we
4450	* won't have to compute it again. Note that shared cache inval of a
4451	* relcache entry will delete the old list and set rd_statvalid to 0,
4452	* so that we must recompute the statistics list on next request. This
4453	* handles creation or deletion of a statistics object.
4454	*
4455	* The returned list is guaranteed to be sorted in order by OID, although
4456	* this is not currently needed.
4457	*
4458	* Since shared cache inval causes the relcache's copy of the list to go away,
4459	* we return a copy of the list palloc'd in the caller's context. The caller
4460	* may list_free() the returned list after scanning it. This is necessary
4461	* since the caller will typically be doing syscache lookups on the relevant
4462	* statistics, and syscache lookup could cause SI messages to be processed!
4463	*/
4464	List *
4465	RelationGetStatExtList(Relation relation)
4466	{
4467	Relation indrel;
4468	SysScanDesc indscan;
4469	ScanKeyData skey;
4470	HeapTuple htup;
4471	List *result;
4472	List *oldlist;
4473	MemoryContext oldcxt;
4474
4475	/ Quick exit if we already computed the list. /
4476	if (relation->rd_statvalid != `0`)
4477	return list_copy(relation->rd_statlist);
4478
4479	/*
4480	* We build the list we intend to return (in the caller's context) while
4481	* doing the scan. After successfully completing the scan, we copy that
4482	* list into the relcache entry. This avoids cache-context memory leakage
4483	* if we get some sort of error partway through.
4484	*/
4485	result = NIL;
4486
4487	/*
4488	* Prepare to scan pg_statistic_ext for entries having stxrelid = this
4489	* rel.
4490	*/
4491	ScanKeyInit(&skey,
4492	Anum_pg_statistic_ext_stxrelid,
4493	BTEqualStrategyNumber, F_OIDEQ,
4494	ObjectIdGetDatum(RelationGetRelid(relation)));
4495
4496	indrel = table_open(StatisticExtRelationId, AccessShareLock);
4497	indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4498	NULL, `1`, &skey);
4499
4500	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4501	{
4502	Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4503
4504	result = insert_ordered_oid(result, oid);
4505	}
4506
4507	systable_endscan(indscan);
4508
4509	table_close(indrel, AccessShareLock);
4510
4511	/ Now save a copy of the completed list in the relcache entry. /
4512	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4513	oldlist = relation->rd_statlist;
4514	relation->rd_statlist = list_copy(result);
4515
4516	relation->rd_statvalid = true;
4517	MemoryContextSwitchTo(oldcxt);
4518
4519	/ Don't leak the old list, if there is one /
4520	list_free(oldlist);
4521
4522	return result;
4523	}
4524
4525	/*
4526	* insert_ordered_oid
4527	* Insert a new Oid into a sorted list of Oids, preserving ordering
4528	*
4529	* Building the ordered list this way is O(N^2), but with a pretty small
4530	* constant, so for the number of entries we expect it will probably be
4531	* faster than trying to apply qsort(). Most tables don't have very many
4532	* indexes...
4533	*/
4534	static List *
4535	insert_ordered_oid(List *list, Oid datum)
4536	{
4537	ListCell *prev;
4538
4539	/ Does the datum belong at the front? /
4540	if (list == NIL \|\| datum < linitial_oid(list))
4541	return lcons_oid(datum, list);
4542	/ No, so find the entry it belongs after /
4543	prev = list_head(list);
4544	for (;;)
4545	{
4546	ListCell *curr = lnext(prev);
4547
4548	if (curr == NULL \|\| datum < lfirst_oid(curr))
4549	break; / it belongs after 'prev', before 'curr' /
4550
4551	prev = curr;
4552	}
4553	/ Insert datum into list after 'prev' /
4554	lappend_cell_oid(list, prev, datum);
4555	return list;
4556	}
4557
4558	/*
4559	* RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4560	*
4561	* Returns InvalidOid if there is no such index.
4562	*/
4563	Oid
4564	RelationGetPrimaryKeyIndex(Relation relation)
4565	{
4566	List *ilist;
4567
4568	if (!relation->rd_indexvalid)
4569	{
4570	/ RelationGetIndexList does the heavy lifting. /
4571	ilist = RelationGetIndexList(relation);
4572	list_free(ilist);
4573	Assert(relation->rd_indexvalid);
4574	}
4575
4576	return relation->rd_pkindex;
4577	}
4578
4579	/*
4580	* RelationGetReplicaIndex -- get OID of the relation's replica identity index
4581	*
4582	* Returns InvalidOid if there is no such index.
4583	*/
4584	Oid
4585	RelationGetReplicaIndex(Relation relation)
4586	{
4587	List *ilist;
4588
4589	if (!relation->rd_indexvalid)
4590	{
4591	/ RelationGetIndexList does the heavy lifting. /
4592	ilist = RelationGetIndexList(relation);
4593	list_free(ilist);
4594	Assert(relation->rd_indexvalid);
4595	}
4596
4597	return relation->rd_replidindex;
4598	}
4599
4600	/*
4601	* RelationGetIndexExpressions -- get the index expressions for an index
4602	*
4603	* We cache the result of transforming pg_index.indexprs into a node tree.
4604	* If the rel is not an index or has no expressional columns, we return NIL.
4605	* Otherwise, the returned tree is copied into the caller's memory context.
4606	* (We don't want to return a pointer to the relcache copy, since it could
4607	* disappear due to relcache invalidation.)
4608	*/
4609	List *
4610	RelationGetIndexExpressions(Relation relation)
4611	{
4612	List *result;
4613	Datum exprsDatum;
4614	bool isnull;
4615	char *exprsString;
4616	MemoryContext oldcxt;
4617
4618	/ Quick exit if we already computed the result. /
4619	if (relation->rd_indexprs)
4620	return copyObject(relation->rd_indexprs);
4621
4622	/ Quick exit if there is nothing to do. /
4623	if (relation->rd_indextuple == NULL \|\|
4624	heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4625	return NIL;
4626
4627	/*
4628	* We build the tree we intend to return in the caller's context. After
4629	* successfully completing the work, we copy it into the relcache entry.
4630	* This avoids problems if we get some sort of error partway through.
4631	*/
4632	exprsDatum = heap_getattr(relation->rd_indextuple,
4633	Anum_pg_index_indexprs,
4634	GetPgIndexDescriptor(),
4635	&isnull);
4636	Assert(!isnull);
4637	exprsString = TextDatumGetCString(exprsDatum);
4638	result = (List *) stringToNode(exprsString);
4639	pfree(exprsString);
4640
4641	/*
4642	* Run the expressions through eval_const_expressions. This is not just an
4643	* optimization, but is necessary, because the planner will be comparing
4644	* them to similarly-processed qual clauses, and may fail to detect valid
4645	* matches without this. We must not use canonicalize_qual, however,
4646	* since these aren't qual expressions.
4647	*/
4648	result = (List ) eval_const_expressions(NULL, (Node ) result);
4649
4650	/ May as well fix opfuncids too /
4651	fix_opfuncids((Node *) result);
4652
4653	/ Now save a copy of the completed tree in the relcache entry. /
4654	oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4655	relation->rd_indexprs = copyObject(result);
4656	MemoryContextSwitchTo(oldcxt);
4657
4658	return result;
4659	}
4660
4661	/*
4662	* RelationGetIndexPredicate -- get the index predicate for an index
4663	*
4664	* We cache the result of transforming pg_index.indpred into an implicit-AND
4665	* node tree (suitable for use in planning).
4666	* If the rel is not an index or has no predicate, we return NIL.
4667	* Otherwise, the returned tree is copied into the caller's memory context.
4668	* (We don't want to return a pointer to the relcache copy, since it could
4669	* disappear due to relcache invalidation.)
4670	*/
4671	List *
4672	RelationGetIndexPredicate(Relation relation)
4673	{
4674	List *result;
4675	Datum predDatum;
4676	bool isnull;
4677	char *predString;
4678	MemoryContext oldcxt;
4679
4680	/ Quick exit if we already computed the result. /
4681	if (relation->rd_indpred)
4682	return copyObject(relation->rd_indpred);
4683
4684	/ Quick exit if there is nothing to do. /
4685	if (relation->rd_indextuple == NULL \|\|
4686	heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4687	return NIL;
4688
4689	/*
4690	* We build the tree we intend to return in the caller's context. After
4691	* successfully completing the work, we copy it into the relcache entry.
4692	* This avoids problems if we get some sort of error partway through.
4693	*/
4694	predDatum = heap_getattr(relation->rd_indextuple,
4695	Anum_pg_index_indpred,
4696	GetPgIndexDescriptor(),
4697	&isnull);
4698	Assert(!isnull);
4699	predString = TextDatumGetCString(predDatum);
4700	result = (List *) stringToNode(predString);
4701	pfree(predString);
4702
4703	/*
4704	* Run the expression through const-simplification and canonicalization.
4705	* This is not just an optimization, but is necessary, because the planner
4706	* will be comparing it to similarly-processed qual clauses, and may fail
4707	* to detect valid matches without this. This must match the processing
4708	* done to qual clauses in preprocess_expression()! (We can skip the
4709	* stuff involving subqueries, however, since we don't allow any in index
4710	* predicates.)
4711	*/
4712	result = (List ) eval_const_expressions(NULL, (Node ) result);
4713
4714	result = (List ) canonicalize_qual((Expr ) result, false);
4715
4716	/ Also convert to implicit-AND format /
4717	result = make_ands_implicit((Expr *) result);
4718
4719	/ May as well fix opfuncids too /
4720	fix_opfuncids((Node *) result);
4721
4722	/ Now save a copy of the completed tree in the relcache entry. /
4723	oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4724	relation->rd_indpred = copyObject(result);
4725	MemoryContextSwitchTo(oldcxt);
4726
4727	return result;
4728	}
4729
4730	/*
4731	* RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4732	*
4733	* The result has a bit set for each attribute used anywhere in the index
4734	* definitions of all the indexes on this relation. (This includes not only
4735	* simple index keys, but attributes used in expressions and partial-index
4736	* predicates.)
4737	*
4738	* Depending on attrKind, a bitmap covering the attnums for all index columns,
4739	* for all potential foreign key columns, or for all columns in the configured
4740	* replica identity index is returned.
4741	*
4742	* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4743	* we can include system attributes (e.g., OID) in the bitmap representation.
4744	*
4745	* Caller had better hold at least RowExclusiveLock on the target relation
4746	* to ensure it is safe (deadlock-free) for us to take locks on the relation's
4747	* indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4748	* that lock level doesn't guarantee a stable set of indexes, so we have to
4749	* be prepared to retry here in case of a change in the set of indexes.
4750	*
4751	* The returned result is palloc'd in the caller's memory context and should
4752	* be bms_free'd when not needed anymore.
4753	*/
4754	Bitmapset *
4755	RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
4756	{
4757	Bitmapset indexattrs; /* indexed columns /
4758	Bitmapset uindexattrs; /* columns in unique indexes /
4759	Bitmapset pkindexattrs; /* columns in the primary index /
4760	Bitmapset idindexattrs; /* columns in the replica identity /
4761	List *indexoidlist;
4762	List *newindexoidlist;
4763	Oid relpkindex;
4764	Oid relreplindex;
4765	ListCell *l;
4766	MemoryContext oldcxt;
4767
4768	/ Quick exit if we already computed the result. /
4769	if (relation->rd_indexattr != NULL)
4770	{
4771	switch (attrKind)
4772	{
4773	case INDEX_ATTR_BITMAP_ALL:
4774	return bms_copy(relation->rd_indexattr);
4775	case INDEX_ATTR_BITMAP_KEY:
4776	return bms_copy(relation->rd_keyattr);
4777	case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4778	return bms_copy(relation->rd_pkattr);
4779	case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4780	return bms_copy(relation->rd_idattr);
4781	default:
4782	elog(ERROR, "unknown attrKind %u", attrKind);
4783	}
4784	}
4785
4786	/ Fast path if definitely no indexes /
4787	if (!RelationGetForm(relation)->relhasindex)
4788	return NULL;
4789
4790	/*
4791	* Get cached list of index OIDs. If we have to start over, we do so here.
4792	*/
4793	restart:
4794	indexoidlist = RelationGetIndexList(relation);
4795
4796	/ Fall out if no indexes (but relhasindex was set) /
4797	if (indexoidlist == NIL)
4798	return NULL;
4799
4800	/*
4801	* Copy the rd_pkindex and rd_replidindex values computed by
4802	* RelationGetIndexList before proceeding. This is needed because a
4803	* relcache flush could occur inside index_open below, resetting the
4804	* fields managed by RelationGetIndexList. We need to do the work with
4805	* stable values of these fields.
4806	*/
4807	relpkindex = relation->rd_pkindex;
4808	relreplindex = relation->rd_replidindex;
4809
4810	/*
4811	* For each index, add referenced attributes to indexattrs.
4812	*
4813	* Note: we consider all indexes returned by RelationGetIndexList, even if
4814	* they are not indisready or indisvalid. This is important because an
4815	* index for which CREATE INDEX CONCURRENTLY has just started must be
4816	* included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4817	* CONCURRENTLY is far enough along that we should ignore the index, it
4818	* won't be returned at all by RelationGetIndexList.
4819	*/
4820	indexattrs = NULL;
4821	uindexattrs = NULL;
4822	pkindexattrs = NULL;
4823	idindexattrs = NULL;
4824	foreach(l, indexoidlist)
4825	{
4826	Oid indexOid = lfirst_oid(l);
4827	Relation indexDesc;
4828	Datum datum;
4829	bool isnull;
4830	Node *indexExpressions;
4831	Node *indexPredicate;
4832	int i;
4833	bool isKey; / candidate key /
4834	bool isPK; / primary key /
4835	bool isIDKey; / replica identity index /
4836
4837	indexDesc = index_open(indexOid, AccessShareLock);
4838
4839	/*
4840	* Extract index expressions and index predicate. Note: Don't use
4841	* RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
4842	* those might run constant expressions evaluation, which needs a
4843	* snapshot, which we might not have here. (Also, it's probably more
4844	* sound to collect the bitmaps before any transformations that might
4845	* eliminate columns, but the practical impact of this is limited.)
4846	*/
4847
4848	datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
4849	GetPgIndexDescriptor(), &isnull);
4850	if (!isnull)
4851	indexExpressions = stringToNode(TextDatumGetCString(datum));
4852	else
4853	indexExpressions = NULL;
4854
4855	datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
4856	GetPgIndexDescriptor(), &isnull);
4857	if (!isnull)
4858	indexPredicate = stringToNode(TextDatumGetCString(datum));
4859	else
4860	indexPredicate = NULL;
4861
4862	/ Can this index be referenced by a foreign key? /
4863	isKey = indexDesc->rd_index->indisunique &&
4864	indexExpressions == NULL &&
4865	indexPredicate == NULL;
4866
4867	/ Is this a primary key? /
4868	isPK = (indexOid == relpkindex);
4869
4870	/ Is this index the configured (or default) replica identity? /
4871	isIDKey = (indexOid == relreplindex);
4872
4873	/ Collect simple attribute references /
4874	for (i = `0`; i < indexDesc->rd_index->indnatts; i++)
4875	{
4876	int attrnum = indexDesc->rd_index->indkey.values[i];
4877
4878	/*
4879	* Since we have covering indexes with non-key columns, we must
4880	* handle them accurately here. non-key columns must be added into
4881	* indexattrs, since they are in index, and HOT-update shouldn't
4882	* miss them. Obviously, non-key columns couldn't be referenced by
4883	* foreign key or identity key. Hence we do not include them into
4884	* uindexattrs, pkindexattrs and idindexattrs bitmaps.
4885	*/
4886	if (attrnum != `0`)
4887	{
4888	indexattrs = bms_add_member(indexattrs,
4889	attrnum - FirstLowInvalidHeapAttributeNumber);
4890
4891	if (isKey && i < indexDesc->rd_index->indnkeyatts)
4892	uindexattrs = bms_add_member(uindexattrs,
4893	attrnum - FirstLowInvalidHeapAttributeNumber);
4894
4895	if (isPK && i < indexDesc->rd_index->indnkeyatts)
4896	pkindexattrs = bms_add_member(pkindexattrs,
4897	attrnum - FirstLowInvalidHeapAttributeNumber);
4898
4899	if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
4900	idindexattrs = bms_add_member(idindexattrs,
4901	attrnum - FirstLowInvalidHeapAttributeNumber);
4902	}
4903	}
4904
4905	/ Collect all attributes used in expressions, too /
4906	pull_varattnos(indexExpressions, `1`, &indexattrs);
4907
4908	/ Collect all attributes in the index predicate, too /
4909	pull_varattnos(indexPredicate, `1`, &indexattrs);
4910
4911	index_close(indexDesc, AccessShareLock);
4912	}
4913
4914	/*
4915	* During one of the index_opens in the above loop, we might have received
4916	* a relcache flush event on this relcache entry, which might have been
4917	* signaling a change in the rel's index list. If so, we'd better start
4918	* over to ensure we deliver up-to-date attribute bitmaps.
4919	*/
4920	newindexoidlist = RelationGetIndexList(relation);
4921	if (equal(indexoidlist, newindexoidlist) &&
4922	relpkindex == relation->rd_pkindex &&
4923	relreplindex == relation->rd_replidindex)
4924	{
4925	/ Still the same index set, so proceed /
4926	list_free(newindexoidlist);
4927	list_free(indexoidlist);
4928	}
4929	else
4930	{
4931	/ Gotta do it over ... might as well not leak memory /
4932	list_free(newindexoidlist);
4933	list_free(indexoidlist);
4934	bms_free(uindexattrs);
4935	bms_free(pkindexattrs);
4936	bms_free(idindexattrs);
4937	bms_free(indexattrs);
4938
4939	goto restart;
4940	}
4941
4942	/ Don't leak the old values of these bitmaps, if any /
4943	bms_free(relation->rd_indexattr);
4944	relation->rd_indexattr = NULL;
4945	bms_free(relation->rd_keyattr);
4946	relation->rd_keyattr = NULL;
4947	bms_free(relation->rd_pkattr);
4948	relation->rd_pkattr = NULL;
4949	bms_free(relation->rd_idattr);
4950	relation->rd_idattr = NULL;
4951
4952	/*
4953	* Now save copies of the bitmaps in the relcache entry. We intentionally
4954	* set rd_indexattr last, because that's the one that signals validity of
4955	* the values; if we run out of memory before making that copy, we won't
4956	* leave the relcache entry looking like the other ones are valid but
4957	* empty.
4958	*/
4959	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4960	relation->rd_keyattr = bms_copy(uindexattrs);
4961	relation->rd_pkattr = bms_copy(pkindexattrs);
4962	relation->rd_idattr = bms_copy(idindexattrs);
4963	relation->rd_indexattr = bms_copy(indexattrs);
4964	MemoryContextSwitchTo(oldcxt);
4965
4966	/ We return our original working copy for caller to play with /
4967	switch (attrKind)
4968	{
4969	case INDEX_ATTR_BITMAP_ALL:
4970	return indexattrs;
4971	case INDEX_ATTR_BITMAP_KEY:
4972	return uindexattrs;
4973	case INDEX_ATTR_BITMAP_PRIMARY_KEY:
4974	return pkindexattrs;
4975	case INDEX_ATTR_BITMAP_IDENTITY_KEY:
4976	return idindexattrs;
4977	default:
4978	elog(ERROR, "unknown attrKind %u", attrKind);
4979	return NULL;
4980	}
4981	}
4982
4983	/*
4984	* RelationGetExclusionInfo -- get info about index's exclusion constraint
4985	*
4986	* This should be called only for an index that is known to have an
4987	* associated exclusion constraint. It returns arrays (palloc'd in caller's
4988	* context) of the exclusion operator OIDs, their underlying functions'
4989	* OIDs, and their strategy numbers in the index's opclasses. We cache
4990	* all this information since it requires a fair amount of work to get.
4991	*/
4992	void
4993	RelationGetExclusionInfo(Relation indexRelation,
4994	Oid **operators,
4995	Oid **procs,
4996	uint16 **strategies)
4997	{
4998	int indnkeyatts;
4999	Oid *ops;
5000	Oid *funcs;
5001	uint16 *strats;
5002	Relation conrel;
5003	SysScanDesc conscan;
5004	ScanKeyData skey[`1`];
5005	HeapTuple htup;
5006	bool found;
5007	MemoryContext oldcxt;
5008	int i;
5009
5010	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5011
5012	/ Allocate result space in caller context /
5013	operators = ops = (Oid ) palloc(sizeof(Oid) * indnkeyatts);
5014	procs = funcs = (Oid ) palloc(sizeof(Oid) * indnkeyatts);
5015	strategies = strats = (uint16 ) palloc(sizeof(uint16) * indnkeyatts);
5016
5017	/ Quick exit if we have the data cached already /
5018	if (indexRelation->rd_exclstrats != NULL)
5019	{
5020	memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5021	memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5022	memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5023	return;
5024	}
5025
5026	/*
5027	* Search pg_constraint for the constraint associated with the index. To
5028	* make this not too painfully slow, we use the index on conrelid; that
5029	* will hold the parent relation's OID not the index's own OID.
5030	*
5031	* Note: if we wanted to rely on the constraint name matching the index's
5032	* name, we could just do a direct lookup using pg_constraint's unique
5033	* index. For the moment it doesn't seem worth requiring that.
5034	*/
5035	ScanKeyInit(&skey[`0`],
5036	Anum_pg_constraint_conrelid,
5037	BTEqualStrategyNumber, F_OIDEQ,
5038	ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5039
5040	conrel = table_open(ConstraintRelationId, AccessShareLock);
5041	conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5042	NULL, `1`, skey);
5043	found = false;
5044
5045	while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5046	{
5047	Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
5048	Datum val;
5049	bool isnull;
5050	ArrayType *arr;
5051	int nelem;
5052
5053	/ We want the exclusion constraint owning the index /
5054	if (conform->contype != CONSTRAINT_EXCLUSION \|\|
5055	conform->conindid != RelationGetRelid(indexRelation))
5056	continue;
5057
5058	/ There should be only one /
5059	if (found)
5060	elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5061	RelationGetRelationName(indexRelation));
5062	found = true;
5063
5064	/ Extract the operator OIDS from conexclop /
5065	val = fastgetattr(htup,
5066	Anum_pg_constraint_conexclop,
5067	conrel->rd_att, &isnull);
5068	if (isnull)
5069	elog(ERROR, "null conexclop for rel %s",
5070	RelationGetRelationName(indexRelation));
5071
5072	arr = DatumGetArrayTypeP(val); / ensure not toasted /
5073	nelem = ARR_DIMS(arr)[`0`];
5074	if (ARR_NDIM(arr) != `1` \|\|
5075	nelem != indnkeyatts \|\|
5076	ARR_HASNULL(arr) \|\|
5077	ARR_ELEMTYPE(arr) != OIDOID)
5078	elog(ERROR, "conexclop is not a 1-D Oid array");
5079
5080	memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5081	}
5082
5083	systable_endscan(conscan);
5084	table_close(conrel, AccessShareLock);
5085
5086	if (!found)
5087	elog(ERROR, "exclusion constraint record missing for rel %s",
5088	RelationGetRelationName(indexRelation));
5089
5090	/ We need the func OIDs and strategy numbers too /
5091	for (i = `0`; i < indnkeyatts; i++)
5092	{
5093	funcs[i] = get_opcode(ops[i]);
5094	strats[i] = get_op_opfamily_strategy(ops[i],
5095	indexRelation->rd_opfamily[i]);
5096	/ shouldn't fail, since it was checked at index creation /
5097	if (strats[i] == InvalidStrategy)
5098	elog(ERROR, "could not find strategy for operator %u in family %u",
5099	ops[i], indexRelation->rd_opfamily[i]);
5100	}
5101
5102	/ Save a copy of the results in the relcache entry. /
5103	oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5104	indexRelation->rd_exclops = (Oid ) palloc(sizeof(Oid) indnkeyatts);
5105	indexRelation->rd_exclprocs = (Oid ) palloc(sizeof(Oid) indnkeyatts);
5106	indexRelation->rd_exclstrats = (uint16 ) palloc(sizeof(uint16) indnkeyatts);
5107	memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5108	memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5109	memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5110	MemoryContextSwitchTo(oldcxt);
5111	}
5112
5113	/*
5114	* Get publication actions for the given relation.
5115	*/
5116	struct PublicationActions *
5117	GetRelationPublicationActions(Relation relation)
5118	{
5119	List *puboids;
5120	ListCell *lc;
5121	MemoryContext oldcxt;
5122	PublicationActions pubactions = palloc0(sizeof*(PublicationActions));
5123
5124	/*
5125	* If not publishable, it publishes no actions. (pgoutput_change() will
5126	* ignore it.)
5127	*/
5128	if (!is_publishable_relation(relation))
5129	return pubactions;
5130
5131	if (relation->rd_pubactions)
5132	return memcpy(pubactions, relation->rd_pubactions,
5133	sizeof(PublicationActions));
5134
5135	/ Fetch the publication membership info. /
5136	puboids = GetRelationPublications(RelationGetRelid(relation));
5137	puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5138
5139	foreach(lc, puboids)
5140	{
5141	Oid pubid = lfirst_oid(lc);
5142	HeapTuple tup;
5143	Form_pg_publication pubform;
5144
5145	tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
5146
5147	if (!HeapTupleIsValid(tup))
5148	elog(ERROR, "cache lookup failed for publication %u", pubid);
5149
5150	pubform = (Form_pg_publication) GETSTRUCT(tup);
5151
5152	pubactions->pubinsert \|= pubform->pubinsert;
5153	pubactions->pubupdate \|= pubform->pubupdate;
5154	pubactions->pubdelete \|= pubform->pubdelete;
5155	pubactions->pubtruncate \|= pubform->pubtruncate;
5156
5157	ReleaseSysCache(tup);
5158
5159	/*
5160	* If we know everything is replicated, there is no point to check for
5161	* other publications.
5162	*/
5163	if (pubactions->pubinsert && pubactions->pubupdate &&
5164	pubactions->pubdelete && pubactions->pubtruncate)
5165	break;
5166	}
5167
5168	if (relation->rd_pubactions)
5169	{
5170	pfree(relation->rd_pubactions);
5171	relation->rd_pubactions = NULL;
5172	}
5173
5174	/ Now save copy of the actions in the relcache entry. /
5175	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
5176	relation->rd_pubactions = palloc(sizeof(PublicationActions));
5177	memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5178	MemoryContextSwitchTo(oldcxt);
5179
5180	return pubactions;
5181	}
5182
5183	/*
5184	* Routines to support ereport() reports of relation-related errors
5185	*
5186	* These could have been put into elog.c, but it seems like a module layering
5187	* violation to have elog.c calling relcache or syscache stuff --- and we
5188	* definitely don't want elog.h including rel.h. So we put them here.
5189	*/
5190
5191	/*
5192	* errtable --- stores schema_name and table_name of a table
5193	* within the current errordata.
5194	*/
5195	int
5196	errtable(Relation rel)
5197	{
5198	err_generic_string(PG_DIAG_SCHEMA_NAME,
5199	get_namespace_name(RelationGetNamespace(rel)));
5200	err_generic_string(PG_DIAG_TABLE_NAME, RelationGetRelationName(rel));
5201
5202	return `0`; / return value does not matter /
5203	}
5204
5205	/*
5206	* errtablecol --- stores schema_name, table_name and column_name
5207	* of a table column within the current errordata.
5208	*
5209	* The column is specified by attribute number --- for most callers, this is
5210	* easier and less error-prone than getting the column name for themselves.
5211	*/
5212	int
5213	errtablecol(Relation rel, int attnum)
5214	{
5215	TupleDesc reldesc = RelationGetDescr(rel);
5216	const char *colname;
5217
5218	/ Use reldesc if it's a user attribute, else consult the catalogs /
5219	if (attnum > `0` && attnum <= reldesc->natts)
5220	colname = NameStr(TupleDescAttr(reldesc, attnum - `1`)->attname);
5221	else
5222	colname = get_attname(RelationGetRelid(rel), attnum, false);
5223
5224	return errtablecolname(rel, colname);
5225	}
5226
5227	/*
5228	* errtablecolname --- stores schema_name, table_name and column_name
5229	* of a table column within the current errordata, where the column name is
5230	* given directly rather than extracted from the relation's catalog data.
5231	*
5232	* Don't use this directly unless errtablecol() is inconvenient for some
5233	* reason. This might possibly be needed during intermediate states in ALTER
5234	* TABLE, for instance.
5235	*/
5236	int
5237	errtablecolname(Relation rel, const char *colname)
5238	{
5239	errtable(rel);
5240	err_generic_string(PG_DIAG_COLUMN_NAME, colname);
5241
5242	return `0`; / return value does not matter /
5243	}
5244
5245	/*
5246	* errtableconstraint --- stores schema_name, table_name and constraint_name
5247	* of a table-related constraint within the current errordata.
5248	*/
5249	int
5250	errtableconstraint(Relation rel, const char *conname)
5251	{
5252	errtable(rel);
5253	err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname);
5254
5255	return `0`; / return value does not matter /
5256	}
5257
5258
5259	/*
5260	* load_relcache_init_file, write_relcache_init_file
5261	*
5262	* In late 1992, we started regularly having databases with more than
5263	* a thousand classes in them. With this number of classes, it became
5264	* critical to do indexed lookups on the system catalogs.
5265	*
5266	* Bootstrapping these lookups is very hard. We want to be able to
5267	* use an index on pg_attribute, for example, but in order to do so,
5268	* we must have read pg_attribute for the attributes in the index,
5269	* which implies that we need to use the index.
5270	*
5271	* In order to get around the problem, we do the following:
5272	*
5273	* + When the database system is initialized (at initdb time), we
5274	* don't use indexes. We do sequential scans.
5275	*
5276	* + When the backend is started up in normal mode, we load an image
5277	* of the appropriate relation descriptors, in internal format,
5278	* from an initialization file in the data/base/... directory.
5279	*
5280	* + If the initialization file isn't there, then we create the
5281	* relation descriptors using sequential scans and write 'em to
5282	* the initialization file for use by subsequent backends.
5283	*
5284	* As of Postgres 9.0, there is one local initialization file in each
5285	* database, plus one shared initialization file for shared catalogs.
5286	*
5287	* We could dispense with the initialization files and just build the
5288	* critical reldescs the hard way on every backend startup, but that
5289	* slows down backend startup noticeably.
5290	*
5291	* We can in fact go further, and save more relcache entries than
5292	* just the ones that are absolutely critical; this allows us to speed
5293	* up backend startup by not having to build such entries the hard way.
5294	* Presently, all the catalog and index entries that are referred to
5295	* by catcaches are stored in the initialization files.
5296	*
5297	* The same mechanism that detects when catcache and relcache entries
5298	* need to be invalidated (due to catalog updates) also arranges to
5299	* unlink the initialization files when the contents may be out of date.
5300	* The files will then be rebuilt during the next backend startup.
5301	*/
5302
5303	/*
5304	* load_relcache_init_file -- attempt to load cache from the shared
5305	* or local cache init file
5306	*
5307	* If successful, return true and set criticalRelcachesBuilt or
5308	* criticalSharedRelcachesBuilt to true.
5309	* If not successful, return false.
5310	*
5311	* NOTE: we assume we are already switched into CacheMemoryContext.
5312	*/
5313	static bool
5314	load_relcache_init_file(bool shared)
5315	{
5316	FILE *fp;
5317	char initfilename[MAXPGPATH];
5318	Relation *rels;
5319	int relno,
5320	num_rels,
5321	max_rels,
5322	nailed_rels,
5323	nailed_indexes,
5324	magic;
5325	int i;
5326
5327	if (shared)
5328	snprintf(initfilename, sizeof(initfilename), "global/%s",
5329	RELCACHE_INIT_FILENAME);
5330	else
5331	snprintf(initfilename, sizeof(initfilename), "%s/%s",
5332	DatabasePath, RELCACHE_INIT_FILENAME);
5333
5334	fp = AllocateFile(initfilename, PG_BINARY_R);
5335	if (fp == NULL)
5336	return false;
5337
5338	/*
5339	* Read the index relcache entries from the file. Note we will not enter
5340	* any of them into the cache if the read fails partway through; this
5341	* helps to guard against broken init files.
5342	*/
5343	max_rels = `100`;
5344	rels = (Relation ) palloc(max_rels sizeof(Relation));
5345	num_rels = `0`;
5346	nailed_rels = nailed_indexes = `0`;
5347
5348	/ check for correct magic number (compatible version) /
5349	if (fread(&magic, `1`, sizeof(magic), fp) != sizeof(magic))
5350	goto read_failed;
5351	if (magic != RELCACHE_INIT_FILEMAGIC)
5352	goto read_failed;
5353
5354	for (relno = `0`;; relno++)
5355	{
5356	Size len;
5357	size_t nread;
5358	Relation rel;
5359	Form_pg_class relform;
5360	bool has_not_null;
5361
5362	/ first read the relation descriptor length /
5363	nread = fread(&len, `1`, sizeof(len), fp);
5364	if (nread != sizeof(len))
5365	{
5366	if (nread == `0`)
5367	break; / end of file /
5368	goto read_failed;
5369	}
5370
5371	/ safety check for incompatible relcache layout /
5372	if (len != sizeof(RelationData))
5373	goto read_failed;
5374
5375	/ allocate another relcache header /
5376	if (num_rels >= max_rels)
5377	{
5378	max_rels *= `2`;
5379	rels = (Relation ) repalloc(rels, max_rels sizeof(Relation));
5380	}
5381
5382	rel = rels[num_rels++] = (Relation) palloc(len);
5383
5384	/ then, read the Relation structure /
5385	if (fread(rel, `1`, len, fp) != len)
5386	goto read_failed;
5387
5388	/ next read the relation tuple form /
5389	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5390	goto read_failed;
5391
5392	relform = (Form_pg_class) palloc(len);
5393	if (fread(relform, `1`, len, fp) != len)
5394	goto read_failed;
5395
5396	rel->rd_rel = relform;
5397
5398	/ initialize attribute tuple forms /
5399	rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5400	rel->rd_att->tdrefcount = `1`; / mark as refcounted /
5401
5402	rel->rd_att->tdtypeid = relform->reltype;
5403	rel->rd_att->tdtypmod = -`1`; / unnecessary, but... /
5404
5405	/ next read all the attribute tuple form data entries /
5406	has_not_null = false;
5407	for (i = `0`; i < relform->relnatts; i++)
5408	{
5409	Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5410
5411	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5412	goto read_failed;
5413	if (len != ATTRIBUTE_FIXED_PART_SIZE)
5414	goto read_failed;
5415	if (fread(attr, `1`, len, fp) != len)
5416	goto read_failed;
5417
5418	has_not_null \|= attr->attnotnull;
5419	}
5420
5421	/ next read the access method specific field /
5422	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5423	goto read_failed;
5424	if (len > `0`)
5425	{
5426	rel->rd_options = palloc(len);
5427	if (fread(rel->rd_options, `1`, len, fp) != len)
5428	goto read_failed;
5429	if (len != VARSIZE(rel->rd_options))
5430	goto read_failed; / sanity check /
5431	}
5432	else
5433	{
5434	rel->rd_options = NULL;
5435	}
5436
5437	/ mark not-null status /
5438	if (has_not_null)
5439	{
5440	TupleConstr constr = (TupleConstr ) palloc0(sizeof(TupleConstr));
5441
5442	constr->has_not_null = true;
5443	rel->rd_att->constr = constr;
5444	}
5445
5446	/*
5447	* If it's an index, there's more to do. Note we explicitly ignore
5448	* partitioned indexes here.
5449	*/
5450	if (rel->rd_rel->relkind == RELKIND_INDEX)
5451	{
5452	MemoryContext indexcxt;
5453	Oid *opfamily;
5454	Oid *opcintype;
5455	RegProcedure *support;
5456	int nsupport;
5457	int16 *indoption;
5458	Oid *indcollation;
5459
5460	/ Count nailed indexes to ensure we have 'em all /
5461	if (rel->rd_isnailed)
5462	nailed_indexes++;
5463
5464	/ next, read the pg_index tuple /
5465	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5466	goto read_failed;
5467
5468	rel->rd_indextuple = (HeapTuple) palloc(len);
5469	if (fread(rel->rd_indextuple, `1`, len, fp) != len)
5470	goto read_failed;
5471
5472	/ Fix up internal pointers in the tuple -- see heap_copytuple /
5473	rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5474	rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
5475
5476	/*
5477	* prepare index info context --- parameters should match
5478	* RelationInitIndexAccessInfo
5479	*/
5480	indexcxt = AllocSetContextCreate(CacheMemoryContext,
5481	"index info",
5482	ALLOCSET_SMALL_SIZES);
5483	rel->rd_indexcxt = indexcxt;
5484	MemoryContextCopyAndSetIdentifier(indexcxt,
5485	RelationGetRelationName(rel));
5486
5487	/*
5488	* Now we can fetch the index AM's API struct. (We can't store
5489	* that in the init file, since it contains function pointers that
5490	* might vary across server executions. Fortunately, it should be
5491	* safe to call the amhandler even while bootstrapping indexes.)
5492	*/
5493	InitIndexAmRoutine(rel);
5494
5495	/ next, read the vector of opfamily OIDs /
5496	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5497	goto read_failed;
5498
5499	opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5500	if (fread(opfamily, `1`, len, fp) != len)
5501	goto read_failed;
5502
5503	rel->rd_opfamily = opfamily;
5504
5505	/ next, read the vector of opcintype OIDs /
5506	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5507	goto read_failed;
5508
5509	opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5510	if (fread(opcintype, `1`, len, fp) != len)
5511	goto read_failed;
5512
5513	rel->rd_opcintype = opcintype;
5514
5515	/ next, read the vector of support procedure OIDs /
5516	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5517	goto read_failed;
5518	support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5519	if (fread(support, `1`, len, fp) != len)
5520	goto read_failed;
5521
5522	rel->rd_support = support;
5523
5524	/ next, read the vector of collation OIDs /
5525	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5526	goto read_failed;
5527
5528	indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5529	if (fread(indcollation, `1`, len, fp) != len)
5530	goto read_failed;
5531
5532	rel->rd_indcollation = indcollation;
5533
5534	/ finally, read the vector of indoption values /
5535	if (fread(&len, `1`, sizeof(len), fp) != sizeof(len))
5536	goto read_failed;
5537
5538	indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5539	if (fread(indoption, `1`, len, fp) != len)
5540	goto read_failed;
5541
5542	rel->rd_indoption = indoption;
5543
5544	/ set up zeroed fmgr-info vector /
5545	nsupport = relform->relnatts * rel->rd_indam->amsupport;
5546	rel->rd_supportinfo = (FmgrInfo *)
5547	MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5548	}
5549	else
5550	{
5551	/ Count nailed rels to ensure we have 'em all /
5552	if (rel->rd_isnailed)
5553	nailed_rels++;
5554
5555	/ Load table AM data /
5556	if (rel->rd_rel->relkind == RELKIND_RELATION \|\|
5557	rel->rd_rel->relkind == RELKIND_SEQUENCE \|\|
5558	rel->rd_rel->relkind == RELKIND_TOASTVALUE \|\|
5559	rel->rd_rel->relkind == RELKIND_MATVIEW)
5560	RelationInitTableAccessMethod(rel);
5561
5562	Assert(rel->rd_index == NULL);
5563	Assert(rel->rd_indextuple == NULL);
5564	Assert(rel->rd_indexcxt == NULL);
5565	Assert(rel->rd_indam == NULL);
5566	Assert(rel->rd_opfamily == NULL);
5567	Assert(rel->rd_opcintype == NULL);
5568	Assert(rel->rd_support == NULL);
5569	Assert(rel->rd_supportinfo == NULL);
5570	Assert(rel->rd_indoption == NULL);
5571	Assert(rel->rd_indcollation == NULL);
5572	}
5573
5574	/*
5575	* Rules and triggers are not saved (mainly because the internal
5576	* format is complex and subject to change). They must be rebuilt if
5577	* needed by RelationCacheInitializePhase3. This is not expected to
5578	* be a big performance hit since few system catalogs have such. Ditto
5579	* for RLS policy data, partition info, index expressions, predicates,
5580	* exclusion info, and FDW info.
5581	*/
5582	rel->rd_rules = NULL;
5583	rel->rd_rulescxt = NULL;
5584	rel->trigdesc = NULL;
5585	rel->rd_rsdesc = NULL;
5586	rel->rd_partkey = NULL;
5587	rel->rd_partkeycxt = NULL;
5588	rel->rd_partdesc = NULL;
5589	rel->rd_pdcxt = NULL;
5590	rel->rd_partcheck = NIL;
5591	rel->rd_partcheckvalid = false;
5592	rel->rd_partcheckcxt = NULL;
5593	rel->rd_indexprs = NIL;
5594	rel->rd_indpred = NIL;
5595	rel->rd_exclops = NULL;
5596	rel->rd_exclprocs = NULL;
5597	rel->rd_exclstrats = NULL;
5598	rel->rd_fdwroutine = NULL;
5599
5600	/*
5601	* Reset transient-state fields in the relcache entry
5602	*/
5603	rel->rd_smgr = NULL;
5604	if (rel->rd_isnailed)
5605	rel->rd_refcnt = `1`;
5606	else
5607	rel->rd_refcnt = `0`;
5608	rel->rd_indexvalid = false;
5609	rel->rd_indexlist = NIL;
5610	rel->rd_pkindex = InvalidOid;
5611	rel->rd_replidindex = InvalidOid;
5612	rel->rd_indexattr = NULL;
5613	rel->rd_keyattr = NULL;
5614	rel->rd_pkattr = NULL;
5615	rel->rd_idattr = NULL;
5616	rel->rd_pubactions = NULL;
5617	rel->rd_statvalid = false;
5618	rel->rd_statlist = NIL;
5619	rel->rd_fkeyvalid = false;
5620	rel->rd_fkeylist = NIL;
5621	rel->rd_createSubid = InvalidSubTransactionId;
5622	rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
5623	rel->rd_amcache = NULL;
5624	MemSet(&rel->pgstat_info, `0`, sizeof(rel->pgstat_info));
5625
5626	/*
5627	* Recompute lock and physical addressing info. This is needed in
5628	* case the pg_internal.init file was copied from some other database
5629	* by CREATE DATABASE.
5630	*/
5631	RelationInitLockInfo(rel);
5632	RelationInitPhysicalAddr(rel);
5633	}
5634
5635	/*
5636	* We reached the end of the init file without apparent problem. Did we
5637	* get the right number of nailed items? This is a useful crosscheck in
5638	* case the set of critical rels or indexes changes. However, that should
5639	* not happen in a normally-running system, so let's bleat if it does.
5640	*
5641	* For the shared init file, we're called before client authentication is
5642	* done, which means that elog(WARNING) will go only to the postmaster
5643	* log, where it's easily missed. To ensure that developers notice bad
5644	* values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5645	* an Assert(false) there.
5646	*/
5647	if (shared)
5648	{
5649	if (nailed_rels != NUM_CRITICAL_SHARED_RELS \|\|
5650	nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5651	{
5652	elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5653	nailed_rels, nailed_indexes,
5654	NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES);
5655	/ Make sure we get developers' attention about this /
5656	Assert(false);
5657	/ In production builds, recover by bootstrapping the relcache /
5658	goto read_failed;
5659	}
5660	}
5661	else
5662	{
5663	if (nailed_rels != NUM_CRITICAL_LOCAL_RELS \|\|
5664	nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5665	{
5666	elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5667	nailed_rels, nailed_indexes,
5668	NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES);
5669	/ We don't need an Assert() in this case /
5670	goto read_failed;
5671	}
5672	}
5673
5674	/*
5675	* OK, all appears well.
5676	*
5677	* Now insert all the new relcache entries into the cache.
5678	*/
5679	for (relno = `0`; relno < num_rels; relno++)
5680	{
5681	RelationCacheInsert(rels[relno], false);
5682	}
5683
5684	pfree(rels);
5685	FreeFile(fp);
5686
5687	if (shared)
5688	criticalSharedRelcachesBuilt = true;
5689	else
5690	criticalRelcachesBuilt = true;
5691	return true;
5692
5693	/*
5694	* init file is broken, so do it the hard way. We don't bother trying to
5695	* free the clutter we just allocated; it's not in the relcache so it
5696	* won't hurt.
5697	*/
5698	read_failed:
5699	pfree(rels);
5700	FreeFile(fp);
5701
5702	return false;
5703	}
5704
5705	/*
5706	* Write out a new initialization file with the current contents
5707	* of the relcache (either shared rels or local rels, as indicated).
5708	*/
5709	static void
5710	write_relcache_init_file(bool shared)
5711	{
5712	FILE *fp;
5713	char tempfilename[MAXPGPATH];
5714	char finalfilename[MAXPGPATH];
5715	int magic;
5716	HASH_SEQ_STATUS status;
5717	RelIdCacheEnt *idhentry;
5718	int i;
5719
5720	/*
5721	* If we have already received any relcache inval events, there's no
5722	* chance of succeeding so we may as well skip the whole thing.
5723	*/
5724	if (relcacheInvalsReceived != `0L`)
5725	return;
5726
5727	/*
5728	* We must write a temporary file and rename it into place. Otherwise,
5729	* another backend starting at about the same time might crash trying to
5730	* read the partially-complete file.
5731	*/
5732	if (shared)
5733	{
5734	snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5735	RELCACHE_INIT_FILENAME, MyProcPid);
5736	snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5737	RELCACHE_INIT_FILENAME);
5738	}
5739	else
5740	{
5741	snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5742	DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
5743	snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5744	DatabasePath, RELCACHE_INIT_FILENAME);
5745	}
5746
5747	unlink(tempfilename); / in case it exists w/wrong permissions /
5748
5749	fp = AllocateFile(tempfilename, PG_BINARY_W);
5750	if (fp == NULL)
5751	{
5752	/*
5753	* We used to consider this a fatal error, but we might as well
5754	* continue with backend startup ...
5755	*/
5756	ereport(WARNING,
5757	(errcode_for_file_access(),
5758	errmsg("could not create relation-cache initialization file \"%s\": %m",
5759	tempfilename),
5760	errdetail("Continuing anyway, but there's something wrong.")));
5761	return;
5762	}
5763
5764	/*
5765	* Write a magic number to serve as a file version identifier. We can
5766	* change the magic number whenever the relcache layout changes.
5767	*/
5768	magic = RELCACHE_INIT_FILEMAGIC;
5769	if (fwrite(&magic, `1`, sizeof(magic), fp) != sizeof(magic))
5770	elog(FATAL, "could not write init file");
5771
5772	/*
5773	* Write all the appropriate reldescs (in no particular order).
5774	*/
5775	hash_seq_init(&status, RelationIdCache);
5776
5777	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5778	{
5779	Relation rel = idhentry->reldesc;
5780	Form_pg_class relform = rel->rd_rel;
5781
5782	/ ignore if not correct group /
5783	if (relform->relisshared != shared)
5784	continue;
5785
5786	/*
5787	* Ignore if not supposed to be in init file. We can allow any shared
5788	* relation that's been loaded so far to be in the shared init file,
5789	* but unshared relations must be ones that should be in the local
5790	* file per RelationIdIsInInitFile. (Note: if you want to change the
5791	* criterion for rels to be kept in the init file, see also inval.c.
5792	* The reason for filtering here is to be sure that we don't put
5793	* anything into the local init file for which a relcache inval would
5794	* not cause invalidation of that init file.)
5795	*/
5796	if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5797	{
5798	/ Nailed rels had better get stored. /
5799	Assert(!rel->rd_isnailed);
5800	continue;
5801	}
5802
5803	/ first write the relcache entry proper /
5804	write_item(rel, sizeof(RelationData), fp);
5805
5806	/ next write the relation tuple form /
5807	write_item(relform, CLASS_TUPLE_SIZE, fp);
5808
5809	/ next, do all the attribute tuple form data entries /
5810	for (i = `0`; i < relform->relnatts; i++)
5811	{
5812	write_item(TupleDescAttr(rel->rd_att, i),
5813	ATTRIBUTE_FIXED_PART_SIZE, fp);
5814	}
5815
5816	/ next, do the access method specific field /
5817	write_item(rel->rd_options,
5818	(rel->rd_options ? VARSIZE(rel->rd_options) : `0`),
5819	fp);
5820
5821	/*
5822	* If it's an index, there's more to do. Note we explicitly ignore
5823	* partitioned indexes here.
5824	*/
5825	if (rel->rd_rel->relkind == RELKIND_INDEX)
5826	{
5827	/ write the pg_index tuple /
5828	/ we assume this was created by heap_copytuple! /
5829	write_item(rel->rd_indextuple,
5830	HEAPTUPLESIZE + rel->rd_indextuple->t_len,
5831	fp);
5832
5833	/ next, write the vector of opfamily OIDs /
5834	write_item(rel->rd_opfamily,
5835	relform->relnatts * sizeof(Oid),
5836	fp);
5837
5838	/ next, write the vector of opcintype OIDs /
5839	write_item(rel->rd_opcintype,
5840	relform->relnatts * sizeof(Oid),
5841	fp);
5842
5843	/ next, write the vector of support procedure OIDs /
5844	write_item(rel->rd_support,
5845	relform->relnatts * (rel->rd_indam->amsupport * sizeof(RegProcedure)),
5846	fp);
5847
5848	/ next, write the vector of collation OIDs /
5849	write_item(rel->rd_indcollation,
5850	relform->relnatts * sizeof(Oid),
5851	fp);
5852
5853	/ finally, write the vector of indoption values /
5854	write_item(rel->rd_indoption,
5855	relform->relnatts * sizeof(int16),
5856	fp);
5857	}
5858	}
5859
5860	if (FreeFile(fp))
5861	elog(FATAL, "could not write init file");
5862
5863	/*
5864	* Now we have to check whether the data we've so painstakingly
5865	* accumulated is already obsolete due to someone else's just-committed
5866	* catalog changes. If so, we just delete the temp file and leave it to
5867	* the next backend to try again. (Our own relcache entries will be
5868	* updated by SI message processing, but we can't be sure whether what we
5869	* wrote out was up-to-date.)
5870	*
5871	* This mustn't run concurrently with the code that unlinks an init file
5872	* and sends SI messages, so grab a serialization lock for the duration.
5873	*/
5874	LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5875
5876	/ Make sure we have seen all incoming SI messages /
5877	AcceptInvalidationMessages();
5878
5879	/*
5880	* If we have received any SI relcache invals since backend start, assume
5881	* we may have written out-of-date data.
5882	*/
5883	if (relcacheInvalsReceived == `0L`)
5884	{
5885	/*
5886	* OK, rename the temp file to its final name, deleting any
5887	* previously-existing init file.
5888	*
5889	* Note: a failure here is possible under Cygwin, if some other
5890	* backend is holding open an unlinked-but-not-yet-gone init file. So
5891	* treat this as a noncritical failure; just remove the useless temp
5892	* file on failure.
5893	*/
5894	if (rename(tempfilename, finalfilename) < `0`)
5895	unlink(tempfilename);
5896	}
5897	else
5898	{
5899	/ Delete the already-obsolete temp file /
5900	unlink(tempfilename);
5901	}
5902
5903	LWLockRelease(RelCacheInitLock);
5904	}
5905
5906	/ write a chunk of data preceded by its length /
5907	static void
5908	write_item(const void data, Size len, FILE fp)
5909	{
5910	if (fwrite(&len, `1`, sizeof(len), fp) != sizeof(len))
5911	elog(FATAL, "could not write init file");
5912	if (fwrite(data, `1`, len, fp) != len)
5913	elog(FATAL, "could not write init file");
5914	}
5915
5916	/*
5917	* Determine whether a given relation (identified by OID) is one of the ones
5918	* we should store in a relcache init file.
5919	*
5920	* We must cache all nailed rels, and for efficiency we should cache every rel
5921	* that supports a syscache. The former set is almost but not quite a subset
5922	* of the latter. The special cases are relations where
5923	* RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5924	* which do not support any syscache.
5925	*/
5926	bool
5927	RelationIdIsInInitFile(Oid relationId)
5928	{
5929	if (relationId == SharedSecLabelRelationId \|\|
5930	relationId == TriggerRelidNameIndexId \|\|
5931	relationId == DatabaseNameIndexId \|\|
5932	relationId == SharedSecLabelObjectIndexId)
5933	{
5934	/*
5935	* If this Assert fails, we don't need the applicable special case
5936	* anymore.
5937	*/
5938	Assert(!RelationSupportsSysCache(relationId));
5939	return true;
5940	}
5941	return RelationSupportsSysCache(relationId);
5942	}
5943
5944	/*
5945	* Invalidate (remove) the init file during commit of a transaction that
5946	* changed one or more of the relation cache entries that are kept in the
5947	* local init file.
5948	*
5949	* To be safe against concurrent inspection or rewriting of the init file,
5950	* we must take RelCacheInitLock, then remove the old init file, then send
5951	* the SI messages that include relcache inval for such relations, and then
5952	* release RelCacheInitLock. This serializes the whole affair against
5953	* write_relcache_init_file, so that we can be sure that any other process
5954	* that's concurrently trying to create a new init file won't move an
5955	* already-stale version into place after we unlink. Also, because we unlink
5956	* before sending the SI messages, a backend that's currently starting cannot
5957	* read the now-obsolete init file and then miss the SI messages that will
5958	* force it to update its relcache entries. (This works because the backend
5959	* startup sequence gets into the sinval array before trying to load the init
5960	* file.)
5961	*
5962	* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
5963	* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
5964	* send any pending SI messages between those calls.
5965	*/
5966	void
5967	RelationCacheInitFilePreInvalidate(void)
5968	{
5969	char localinitfname[MAXPGPATH];
5970	char sharedinitfname[MAXPGPATH];
5971
5972	if (DatabasePath)
5973	snprintf(localinitfname, sizeof(localinitfname), "%s/%s",
5974	DatabasePath, RELCACHE_INIT_FILENAME);
5975	snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s",
5976	RELCACHE_INIT_FILENAME);
5977
5978	LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5979
5980	/*
5981	* The files might not be there if no backend has been started since the
5982	* last removal. But complain about failures other than ENOENT with
5983	* ERROR. Fortunately, it's not too late to abort the transaction if we
5984	* can't get rid of the would-be-obsolete init file.
5985	*/
5986	if (DatabasePath)
5987	unlink_initfile(localinitfname, ERROR);
5988	unlink_initfile(sharedinitfname, ERROR);
5989	}
5990
5991	void
5992	RelationCacheInitFilePostInvalidate(void)
5993	{
5994	LWLockRelease(RelCacheInitLock);
5995	}
5996
5997	/*
5998	* Remove the init files during postmaster startup.
5999	*
6000	* We used to keep the init files across restarts, but that is unsafe in PITR
6001	* scenarios, and even in simple crash-recovery cases there are windows for
6002	* the init files to become out-of-sync with the database. So now we just
6003	* remove them during startup and expect the first backend launch to rebuild
6004	* them. Of course, this has to happen in each database of the cluster.
6005	*/
6006	void
6007	RelationCacheInitFileRemove(void)
6008	{
6009	const char *tblspcdir = "pg_tblspc";
6010	DIR *dir;
6011	struct dirent *de;
6012	char path[MAXPGPATH + `10` + sizeof(TABLESPACE_VERSION_DIRECTORY)];
6013
6014	snprintf(path, sizeof(path), "global/%s",
6015	RELCACHE_INIT_FILENAME);
6016	unlink_initfile(path, LOG);
6017
6018	/ Scan everything in the default tablespace /
6019	RelationCacheInitFileRemoveInDir("base");
6020
6021	/ Scan the tablespace link directory to find non-default tablespaces /
6022	dir = AllocateDir(tblspcdir);
6023
6024	while ((de = ReadDirExtended(dir, tblspcdir, LOG)) != NULL)
6025	{
6026	if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
6027	{
6028	/ Scan the tablespace dir for per-database dirs /
6029	snprintf(path, sizeof(path), "%s/%s/%s",
6030	tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY);
6031	RelationCacheInitFileRemoveInDir(path);
6032	}
6033	}
6034
6035	FreeDir(dir);
6036	}
6037
6038	/ Process one per-tablespace directory for RelationCacheInitFileRemove /
6039	static void
6040	RelationCacheInitFileRemoveInDir(const char *tblspcpath)
6041	{
6042	DIR *dir;
6043	struct dirent *de;
6044	char initfilename[MAXPGPATH * `2`];
6045
6046	/ Scan the tablespace directory to find per-database directories /
6047	dir = AllocateDir(tblspcpath);
6048
6049	while ((de = ReadDirExtended(dir, tblspcpath, LOG)) != NULL)
6050	{
6051	if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
6052	{
6053	/ Try to remove the init file in each database /
6054	snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
6055	tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
6056	unlink_initfile(initfilename, LOG);
6057	}
6058	}
6059
6060	FreeDir(dir);
6061	}
6062
6063	static void
6064	unlink_initfile(const char initfilename, int* elevel)
6065	{
6066	if (unlink(initfilename) < `0`)
6067	{
6068	/ It might not be there, but log any error other than ENOENT /
6069	if (errno != ENOENT)
6070	ereport(elevel,
6071	(errcode_for_file_access(),
6072	errmsg("could not remove cache file \"%s\": %m",
6073	initfilename)));
6074	}
6075	}
6076

Browse the source code of PostgreSQL/src/backend/utils/cache/relcache.c