predicate_internals.h source code [PostgreSQL/src/include/storage/predicate_internals.h]

1	/-------------------------------------------------------------------------*
2	*
3	* predicate_internals.h
4	* POSTGRES internal predicate locking definitions.
5	*
6	*
7	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8	* Portions Copyright (c) 1994, Regents of the University of California
9	*
10	* src/include/storage/predicate_internals.h
11	*
12	*-------------------------------------------------------------------------
13	*/
14	#ifndef PREDICATE_INTERNALS_H
15	#define PREDICATE_INTERNALS_H
16
17	#include "storage/lock.h"
18	#include "storage/lwlock.h"
19
20	/*
21	* Commit number.
22	*/
23	typedef uint64 SerCommitSeqNo;
24
25	/*
26	* Reserved commit sequence numbers:
27	* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
28	* used as a SerCommitSeqNo, even an invalid one
29	* - InvalidSerCommitSeqNo is used to indicate a transaction that
30	* hasn't committed yet, so use a number greater than all valid
31	* ones to make comparison do the expected thing
32	* - RecoverySerCommitSeqNo is used to refer to transactions that
33	* happened before a crash/recovery, since we restart the sequence
34	* at that point. It's earlier than all normal sequence numbers,
35	* and is only used by recovered prepared transactions
36	*/
37	#define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
38	#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
39	#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
40
41	/*
42	* The SERIALIZABLEXACT struct contains information needed for each
43	* serializable database transaction to support SSI techniques.
44	*
45	* A home-grown list is maintained in shared memory to manage these.
46	* An entry is used when the serializable transaction acquires a snapshot.
47	* Unless the transaction is rolled back, this entry must generally remain
48	* until all concurrent transactions have completed. (There are special
49	* optimizations for READ ONLY transactions which often allow them to be
50	* cleaned up earlier.) A transaction which is rolled back is cleaned up
51	* as soon as possible.
52	*
53	* Eligibility for cleanup of committed transactions is generally determined
54	* by comparing the transaction's finishedBefore field to
55	* SerializableGlobalXmin.
56	*/
57	typedef struct SERIALIZABLEXACT
58	{
59	VirtualTransactionId vxid; / The executing process always has one of*
60	* these. */
61
62	/*
63	* We use two numbers to track the order that transactions commit. Before
64	* commit, a transaction is marked as prepared, and prepareSeqNo is set.
65	* Shortly after commit, it's marked as committed, and commitSeqNo is set.
66	* This doesn't give a strict commit order, but these two values together
67	* are good enough for us, as we can always err on the safe side and
68	* assume that there's a conflict, if we can't be sure of the exact
69	* ordering of two commits.
70	*
71	* Note that a transaction is marked as prepared for a short period during
72	* commit processing, even if two-phase commit is not used. But with
73	* two-phase commit, a transaction can stay in prepared state for some
74	* time.
75	*/
76	SerCommitSeqNo prepareSeqNo;
77	SerCommitSeqNo commitSeqNo;
78
79	/ these values are not both interesting at the same time /
80	union
81	{
82	SerCommitSeqNo earliestOutConflictCommit; / when committed with*
83	* conflict out */
84	SerCommitSeqNo lastCommitBeforeSnapshot; / when not committed or*
85	* no conflict out */
86	} SeqNo;
87	SHM_QUEUE outConflicts; / list of write transactions whose data we*
88	* couldn't read. */
89	SHM_QUEUE inConflicts; / list of read transactions which couldn't*
90	* see our write. */
91	SHM_QUEUE predicateLocks; / list of associated PREDICATELOCK objects /
92	SHM_QUEUE finishedLink; / list link in*
93	* FinishedSerializableTransactions */
94
95	LWLock predicateLockListLock; / protects predicateLocks in parallel*
96	* mode */
97
98	/*
99	* for r/o transactions: list of concurrent r/w transactions that we could
100	* potentially have conflicts with, and vice versa for r/w transactions
101	*/
102	SHM_QUEUE possibleUnsafeConflicts;
103
104	TransactionId topXid; / top level xid for the transaction, if one*
105	* exists; else invalid */
106	TransactionId finishedBefore; / invalid means still running; else the*
107	* struct expires when no serializable
108	* xids are before this. */
109	TransactionId xmin; / the transaction's snapshot xmin /
110	uint32 flags; / OR'd combination of values defined below /
111	int pid; / pid of associated process /
112	} SERIALIZABLEXACT;
113
114	#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
115	#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
116	#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
117	#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
118	/*
119	* The following flag actually means that the flagged transaction has a
120	* conflict out to a transaction which committed ahead of it. It's hard
121	* to get that into a name of a reasonable length.
122	*/
123	#define SXACT_FLAG_CONFLICT_OUT 0x00000010
124	#define SXACT_FLAG_READ_ONLY 0x00000020
125	#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
126	#define SXACT_FLAG_RO_SAFE 0x00000080
127	#define SXACT_FLAG_RO_UNSAFE 0x00000100
128	#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
129	#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
130	/*
131	* The following flag means the transaction has been partially released
132	* already, but is being preserved because parallel workers might have a
133	* reference to it. It'll be recycled by the leader at end-of-transaction.
134	*/
135	#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
136
137	/*
138	* The following types are used to provide an ad hoc list for holding
139	* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
140	* access these by key -- there are direct pointers to these objects where
141	* needed. If a shared memory list is created, these types can probably be
142	* eliminated in favor of using the general solution.
143	*/
144	typedef struct PredXactListElementData
145	{
146	SHM_QUEUE link;
147	SERIALIZABLEXACT sxact;
148	} PredXactListElementData;
149
150	typedef struct PredXactListElementData *PredXactListElement;
151
152	#define PredXactListElementDataSize \
153	((Size)MAXALIGN(sizeof(PredXactListElementData)))
154
155	typedef struct PredXactListData
156	{
157	SHM_QUEUE availableList;
158	SHM_QUEUE activeList;
159
160	/*
161	* These global variables are maintained when registering and cleaning up
162	* serializable transactions. They must be global across all backends,
163	* but are not needed outside the predicate.c source file. Protected by
164	* SerializableXactHashLock.
165	*/
166	TransactionId SxactGlobalXmin; / global xmin for active serializable*
167	* transactions */
168	int SxactGlobalXminCount; / how many active serializable*
169	* transactions have this xmin */
170	int WritableSxactCount; / how many non-read-only serializable*
171	* transactions are active */
172	SerCommitSeqNo LastSxactCommitSeqNo; / a strictly monotonically*
173	* increasing number for commits
174	* of serializable transactions */
175	/ Protected by SerializableXactHashLock. /
176	SerCommitSeqNo CanPartialClearThrough; / can clear predicate locks and*
177	* inConflicts for committed
178	* transactions through this seq
179	* no */
180	/ Protected by SerializableFinishedListLock. /
181	SerCommitSeqNo HavePartialClearedThrough; / have cleared through this*
182	* seq no */
183	SERIALIZABLEXACT OldCommittedSxact; /* shared copy of dummy sxact /
184
185	PredXactListElement element;
186	} PredXactListData;
187
188	typedef struct PredXactListData *PredXactList;
189
190	#define PredXactListDataSize \
191	((Size)MAXALIGN(sizeof(PredXactListData)))
192
193
194	/*
195	* The following types are used to provide lists of rw-conflicts between
196	* pairs of transactions. Since exactly the same information is needed,
197	* they are also used to record possible unsafe transaction relationships
198	* for purposes of identifying safe snapshots for read-only transactions.
199	*
200	* When a RWConflictData is not in use to record either type of relationship
201	* between a pair of transactions, it is kept on an "available" list. The
202	* outLink field is used for maintaining that list.
203	*/
204	typedef struct RWConflictData
205	{
206	SHM_QUEUE outLink; / link for list of conflicts out from a sxact /
207	SHM_QUEUE inLink; / link for list of conflicts in to a sxact /
208	SERIALIZABLEXACT *sxactOut;
209	SERIALIZABLEXACT *sxactIn;
210	} RWConflictData;
211
212	typedef struct RWConflictData *RWConflict;
213
214	#define RWConflictDataSize \
215	((Size)MAXALIGN(sizeof(RWConflictData)))
216
217	typedef struct RWConflictPoolHeaderData
218	{
219	SHM_QUEUE availableList;
220	RWConflict element;
221	} RWConflictPoolHeaderData;
222
223	typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
224
225	#define RWConflictPoolHeaderDataSize \
226	((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
227
228
229	/*
230	* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
231	* transaction or any of its subtransactions.
232	*/
233	typedef struct SERIALIZABLEXIDTAG
234	{
235	TransactionId xid;
236	} SERIALIZABLEXIDTAG;
237
238	/*
239	* The SERIALIZABLEXID struct provides a link from a TransactionId for a
240	* serializable transaction to the related SERIALIZABLEXACT record, even if
241	* the transaction has completed and its connection has been closed.
242	*
243	* These are created as new top level transaction IDs are first assigned to
244	* transactions which are participating in predicate locking. This may
245	* never happen for a particular transaction if it doesn't write anything.
246	* They are removed with their related serializable transaction objects.
247	*
248	* The SubTransGetTopmostTransaction method is used where necessary to get
249	* from an XID which might be from a subtransaction to the top level XID.
250	*/
251	typedef struct SERIALIZABLEXID
252	{
253	/ hash key /
254	SERIALIZABLEXIDTAG tag;
255
256	/ data /
257	SERIALIZABLEXACT myXact; /* pointer to the top level transaction data /
258	} SERIALIZABLEXID;
259
260
261	/*
262	* The PREDICATELOCKTARGETTAG struct identifies a database object which can
263	* be the target of predicate locks.
264	*
265	* Note that the hash function being used doesn't properly respect tag
266	* length -- if the length of the structure isn't a multiple of four bytes it
267	* will go to a four byte boundary past the end of the tag. If you change
268	* this struct, make sure any slack space is initialized, so that any random
269	* bytes in the middle or at the end are not included in the hash.
270	*
271	* TODO SSI: If we always use the same fields for the same type of value, we
272	* should rename these. Holding off until it's clear there are no exceptions.
273	* Since indexes are relations with blocks and tuples, it's looking likely that
274	* the rename will be possible. If not, we may need to divide the last field
275	* and use part of it for a target type, so that we know how to interpret the
276	* data..
277	*/
278	typedef struct PREDICATELOCKTARGETTAG
279	{
280	uint32 locktag_field1; / a 32-bit ID field /
281	uint32 locktag_field2; / a 32-bit ID field /
282	uint32 locktag_field3; / a 32-bit ID field /
283	uint32 locktag_field4; / a 32-bit ID field /
284	} PREDICATELOCKTARGETTAG;
285
286	/*
287	* The PREDICATELOCKTARGET struct represents a database object on which there
288	* are predicate locks.
289	*
290	* A hash list of these objects is maintained in shared memory. An entry is
291	* added when a predicate lock is requested on an object which doesn't
292	* already have one. An entry is removed when the last lock is removed from
293	* its list.
294	*/
295	typedef struct PREDICATELOCKTARGET
296	{
297	/ hash key /
298	PREDICATELOCKTARGETTAG tag; / unique identifier of lockable object /
299
300	/ data /
301	SHM_QUEUE predicateLocks; / list of PREDICATELOCK objects assoc. with*
302	* predicate lock target */
303	} PREDICATELOCKTARGET;
304
305
306	/*
307	* The PREDICATELOCKTAG struct identifies an individual predicate lock.
308	*
309	* It is the combination of predicate lock target (which is a lockable
310	* object) and a serializable transaction which has acquired a lock on that
311	* target.
312	*/
313	typedef struct PREDICATELOCKTAG
314	{
315	PREDICATELOCKTARGET *myTarget;
316	SERIALIZABLEXACT *myXact;
317	} PREDICATELOCKTAG;
318
319	/*
320	* The PREDICATELOCK struct represents an individual lock.
321	*
322	* An entry can be created here when the related database object is read, or
323	* by promotion of multiple finer-grained targets. All entries related to a
324	* serializable transaction are removed when that serializable transaction is
325	* cleaned up. Entries can also be removed when they are combined into a
326	* single coarser-grained lock entry.
327	*/
328	typedef struct PREDICATELOCK
329	{
330	/ hash key /
331	PREDICATELOCKTAG tag; / unique identifier of lock /
332
333	/ data /
334	SHM_QUEUE targetLink; / list link in PREDICATELOCKTARGET's list of*
335	* predicate locks */
336	SHM_QUEUE xactLink; / list link in SERIALIZABLEXACT's list of*
337	* predicate locks */
338	SerCommitSeqNo commitSeqNo; / only used for summarized predicate locks /
339	} PREDICATELOCK;
340
341
342	/*
343	* The LOCALPREDICATELOCK struct represents a local copy of data which is
344	* also present in the PREDICATELOCK table, organized for fast access without
345	* needing to acquire a LWLock. It is strictly for optimization.
346	*
347	* Each serializable transaction creates its own local hash table to hold a
348	* collection of these. This information is used to determine when a number
349	* of fine-grained locks should be promoted to a single coarser-grained lock.
350	* The information is maintained more-or-less in parallel to the
351	* PREDICATELOCK data, but because this data is not protected by locks and is
352	* only used in an optimization heuristic, it is allowed to drift in a few
353	* corner cases where maintaining exact data would be expensive.
354	*
355	* The hash table is created when the serializable transaction acquires its
356	* snapshot, and its memory is released upon completion of the transaction.
357	*/
358	typedef struct LOCALPREDICATELOCK
359	{
360	/ hash key /
361	PREDICATELOCKTARGETTAG tag; / unique identifier of lockable object /
362
363	/ data /
364	bool held; / is lock held, or just its children? /
365	int childLocks; / number of child locks currently held /
366	} LOCALPREDICATELOCK;
367
368
369	/*
370	* The types of predicate locks which can be acquired.
371	*/
372	typedef enum PredicateLockTargetType
373	{
374	PREDLOCKTAG_RELATION,
375	PREDLOCKTAG_PAGE,
376	PREDLOCKTAG_TUPLE
377	/ TODO SSI: Other types may be needed for index locking /
378	} PredicateLockTargetType;
379
380
381	/*
382	* This structure is used to quickly capture a copy of all predicate
383	* locks. This is currently used only by the pg_lock_status function,
384	* which in turn is used by the pg_locks view.
385	*/
386	typedef struct PredicateLockData
387	{
388	int nelements;
389	PREDICATELOCKTARGETTAG *locktags;
390	SERIALIZABLEXACT *xacts;
391	} PredicateLockData;
392
393
394	/*
395	* These macros define how we map logical IDs of lockable objects into the
396	* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
397	* rather than accessing the fields directly. Note multiple eval of target!
398	*/
399	#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
400	((locktag).locktag_field1 = (dboid), \
401	(locktag).locktag_field2 = (reloid), \
402	(locktag).locktag_field3 = InvalidBlockNumber, \
403	(locktag).locktag_field4 = InvalidOffsetNumber)
404
405	#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
406	((locktag).locktag_field1 = (dboid), \
407	(locktag).locktag_field2 = (reloid), \
408	(locktag).locktag_field3 = (blocknum), \
409	(locktag).locktag_field4 = InvalidOffsetNumber)
410
411	#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
412	((locktag).locktag_field1 = (dboid), \
413	(locktag).locktag_field2 = (reloid), \
414	(locktag).locktag_field3 = (blocknum), \
415	(locktag).locktag_field4 = (offnum))
416
417	#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
418	((Oid) (locktag).locktag_field1)
419	#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
420	((Oid) (locktag).locktag_field2)
421	#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
422	((BlockNumber) (locktag).locktag_field3)
423	#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
424	((OffsetNumber) (locktag).locktag_field4)
425	#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
426	(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
427	(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
428	PREDLOCKTAG_RELATION))
429
430	/*
431	* Two-phase commit statefile records. There are two types: for each
432	* transaction, we generate one per-transaction record and a variable
433	* number of per-predicate-lock records.
434	*/
435	typedef enum TwoPhasePredicateRecordType
436	{
437	TWOPHASEPREDICATERECORD_XACT,
438	TWOPHASEPREDICATERECORD_LOCK
439	} TwoPhasePredicateRecordType;
440
441	/*
442	* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
443	* much is needed because most of it not meaningful for a recovered
444	* prepared transaction.
445	*
446	* In particular, we do not record the in and out conflict lists for a
447	* prepared transaction because the associated SERIALIZABLEXACTs will
448	* not be available after recovery. Instead, we simply record the
449	* existence of each type of conflict by setting the transaction's
450	* summary conflict in/out flag.
451	*/
452	typedef struct TwoPhasePredicateXactRecord
453	{
454	TransactionId xmin;
455	uint32 flags;
456	} TwoPhasePredicateXactRecord;
457
458	/ Per-lock state /
459	typedef struct TwoPhasePredicateLockRecord
460	{
461	PREDICATELOCKTARGETTAG target;
462	uint32 filler; / to avoid length change in back-patched fix /
463	} TwoPhasePredicateLockRecord;
464
465	typedef struct TwoPhasePredicateRecord
466	{
467	TwoPhasePredicateRecordType type;
468	union
469	{
470	TwoPhasePredicateXactRecord xactRecord;
471	TwoPhasePredicateLockRecord lockRecord;
472	} data;
473	} TwoPhasePredicateRecord;
474
475	/*
476	* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
477	*/
478	#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
479
480
481	/*
482	* Function definitions for functions needing awareness of predicate
483	* locking internals.
484	*/
485	extern PredicateLockData GetPredicateLockStatusData(void*);
486	extern int GetSafeSnapshotBlockingPids(int blocked_pid,
487	int output, int* output_size);
488
489	#endif /* PREDICATE_INTERNALS_H */
490

Browse the source code of PostgreSQL/src/include/storage/predicate_internals.h