1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * snapmgr.c |
4 | * PostgreSQL snapshot manager |
5 | * |
6 | * We keep track of snapshots in two ways: those "registered" by resowner.c, |
7 | * and the "active snapshot" stack. All snapshots in either of them live in |
8 | * persistent memory. When a snapshot is no longer in any of these lists |
9 | * (tracked by separate refcounts on each snapshot), its memory can be freed. |
10 | * |
11 | * The FirstXactSnapshot, if any, is treated a bit specially: we increment its |
12 | * regd_count and list it in RegisteredSnapshots, but this reference is not |
13 | * tracked by a resource owner. We used to use the TopTransactionResourceOwner |
14 | * to track this snapshot reference, but that introduces logical circularity |
15 | * and thus makes it impossible to clean up in a sane fashion. It's better to |
16 | * handle this reference as an internally-tracked registration, so that this |
17 | * module is entirely lower-level than ResourceOwners. |
18 | * |
19 | * Likewise, any snapshots that have been exported by pg_export_snapshot |
20 | * have regd_count = 1 and are listed in RegisteredSnapshots, but are not |
21 | * tracked by any resource owner. |
22 | * |
23 | * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it |
24 | * is valid, but is not tracked by any resource owner. |
25 | * |
26 | * The same is true for historic snapshots used during logical decoding, |
27 | * their lifetime is managed separately (as they live longer than one xact.c |
28 | * transaction). |
29 | * |
30 | * These arrangements let us reset MyPgXact->xmin when there are no snapshots |
31 | * referenced by this transaction, and advance it when the one with oldest |
32 | * Xmin is no longer referenced. For simplicity however, only registered |
33 | * snapshots not active snapshots participate in tracking which one is oldest; |
34 | * we don't try to change MyPgXact->xmin except when the active-snapshot |
35 | * stack is empty. |
36 | * |
37 | * |
38 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
39 | * Portions Copyright (c) 1994, Regents of the University of California |
40 | * |
41 | * IDENTIFICATION |
42 | * src/backend/utils/time/snapmgr.c |
43 | * |
44 | *------------------------------------------------------------------------- |
45 | */ |
46 | #include "postgres.h" |
47 | |
48 | #include <sys/stat.h> |
49 | #include <unistd.h> |
50 | |
51 | #include "access/subtrans.h" |
52 | #include "access/transam.h" |
53 | #include "access/xact.h" |
54 | #include "access/xlog.h" |
55 | #include "catalog/catalog.h" |
56 | #include "lib/pairingheap.h" |
57 | #include "miscadmin.h" |
58 | #include "storage/predicate.h" |
59 | #include "storage/proc.h" |
60 | #include "storage/procarray.h" |
61 | #include "storage/sinval.h" |
62 | #include "storage/sinvaladt.h" |
63 | #include "storage/spin.h" |
64 | #include "utils/builtins.h" |
65 | #include "utils/memutils.h" |
66 | #include "utils/rel.h" |
67 | #include "utils/resowner_private.h" |
68 | #include "utils/snapmgr.h" |
69 | #include "utils/syscache.h" |
70 | |
71 | |
72 | /* |
73 | * GUC parameters |
74 | */ |
75 | int old_snapshot_threshold; /* number of minutes, -1 disables */ |
76 | |
77 | /* |
78 | * Structure for dealing with old_snapshot_threshold implementation. |
79 | */ |
80 | typedef struct OldSnapshotControlData |
81 | { |
82 | /* |
83 | * Variables for old snapshot handling are shared among processes and are |
84 | * only allowed to move forward. |
85 | */ |
86 | slock_t mutex_current; /* protect current_timestamp */ |
87 | TimestampTz current_timestamp; /* latest snapshot timestamp */ |
88 | slock_t mutex_latest_xmin; /* protect latest_xmin and next_map_update */ |
89 | TransactionId latest_xmin; /* latest snapshot xmin */ |
90 | TimestampTz next_map_update; /* latest snapshot valid up to */ |
91 | slock_t mutex_threshold; /* protect threshold fields */ |
92 | TimestampTz threshold_timestamp; /* earlier snapshot is old */ |
93 | TransactionId threshold_xid; /* earlier xid may be gone */ |
94 | |
95 | /* |
96 | * Keep one xid per minute for old snapshot error handling. |
97 | * |
98 | * Use a circular buffer with a head offset, a count of entries currently |
99 | * used, and a timestamp corresponding to the xid at the head offset. A |
100 | * count_used value of zero means that there are no times stored; a |
101 | * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer |
102 | * is full and the head must be advanced to add new entries. Use |
103 | * timestamps aligned to minute boundaries, since that seems less |
104 | * surprising than aligning based on the first usage timestamp. The |
105 | * latest bucket is effectively stored within latest_xmin. The circular |
106 | * buffer is updated when we get a new xmin value that doesn't fall into |
107 | * the same interval. |
108 | * |
109 | * It is OK if the xid for a given time slot is from earlier than |
110 | * calculated by adding the number of minutes corresponding to the |
111 | * (possibly wrapped) distance from the head offset to the time of the |
112 | * head entry, since that just results in the vacuuming of old tuples |
113 | * being slightly less aggressive. It would not be OK for it to be off in |
114 | * the other direction, since it might result in vacuuming tuples that are |
115 | * still expected to be there. |
116 | * |
117 | * Use of an SLRU was considered but not chosen because it is more |
118 | * heavyweight than is needed for this, and would probably not be any less |
119 | * code to implement. |
120 | * |
121 | * Persistence is not needed. |
122 | */ |
123 | int head_offset; /* subscript of oldest tracked time */ |
124 | TimestampTz head_timestamp; /* time corresponding to head xid */ |
125 | int count_used; /* how many slots are in use */ |
126 | TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER]; |
127 | } OldSnapshotControlData; |
128 | |
129 | static volatile OldSnapshotControlData *oldSnapshotControl; |
130 | |
131 | |
132 | /* |
133 | * CurrentSnapshot points to the only snapshot taken in transaction-snapshot |
134 | * mode, and to the latest one taken in a read-committed transaction. |
135 | * SecondarySnapshot is a snapshot that's always up-to-date as of the current |
136 | * instant, even in transaction-snapshot mode. It should only be used for |
137 | * special-purpose code (say, RI checking.) CatalogSnapshot points to an |
138 | * MVCC snapshot intended to be used for catalog scans; we must invalidate it |
139 | * whenever a system catalog change occurs. |
140 | * |
141 | * These SnapshotData structs are static to simplify memory allocation |
142 | * (see the hack in GetSnapshotData to avoid repeated malloc/free). |
143 | */ |
144 | static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC}; |
145 | static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC}; |
146 | SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC}; |
147 | SnapshotData SnapshotSelfData = {SNAPSHOT_SELF}; |
148 | SnapshotData SnapshotAnyData = {SNAPSHOT_ANY}; |
149 | |
150 | /* Pointers to valid snapshots */ |
151 | static Snapshot CurrentSnapshot = NULL; |
152 | static Snapshot SecondarySnapshot = NULL; |
153 | static Snapshot CatalogSnapshot = NULL; |
154 | static Snapshot HistoricSnapshot = NULL; |
155 | |
156 | /* |
157 | * These are updated by GetSnapshotData. We initialize them this way |
158 | * for the convenience of TransactionIdIsInProgress: even in bootstrap |
159 | * mode, we don't want it to say that BootstrapTransactionId is in progress. |
160 | * |
161 | * RecentGlobalXmin and RecentGlobalDataXmin are initialized to |
162 | * InvalidTransactionId, to ensure that no one tries to use a stale |
163 | * value. Readers should ensure that it has been set to something else |
164 | * before using it. |
165 | */ |
166 | TransactionId TransactionXmin = FirstNormalTransactionId; |
167 | TransactionId RecentXmin = FirstNormalTransactionId; |
168 | TransactionId RecentGlobalXmin = InvalidTransactionId; |
169 | TransactionId RecentGlobalDataXmin = InvalidTransactionId; |
170 | |
171 | /* (table, ctid) => (cmin, cmax) mapping during timetravel */ |
172 | static HTAB *tuplecid_data = NULL; |
173 | |
174 | /* |
175 | * Elements of the active snapshot stack. |
176 | * |
177 | * Each element here accounts for exactly one active_count on SnapshotData. |
178 | * |
179 | * NB: the code assumes that elements in this list are in non-increasing |
180 | * order of as_level; also, the list must be NULL-terminated. |
181 | */ |
182 | typedef struct ActiveSnapshotElt |
183 | { |
184 | Snapshot as_snap; |
185 | int as_level; |
186 | struct ActiveSnapshotElt *as_next; |
187 | } ActiveSnapshotElt; |
188 | |
189 | /* Top of the stack of active snapshots */ |
190 | static ActiveSnapshotElt *ActiveSnapshot = NULL; |
191 | |
192 | /* Bottom of the stack of active snapshots */ |
193 | static ActiveSnapshotElt *OldestActiveSnapshot = NULL; |
194 | |
195 | /* |
196 | * Currently registered Snapshots. Ordered in a heap by xmin, so that we can |
197 | * quickly find the one with lowest xmin, to advance our MyPgXact->xmin. |
198 | */ |
199 | static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, |
200 | void *arg); |
201 | |
202 | static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL}; |
203 | |
204 | /* first GetTransactionSnapshot call in a transaction? */ |
205 | bool FirstSnapshotSet = false; |
206 | |
207 | /* |
208 | * Remember the serializable transaction snapshot, if any. We cannot trust |
209 | * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because |
210 | * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot. |
211 | */ |
212 | static Snapshot FirstXactSnapshot = NULL; |
213 | |
214 | /* Define pathname of exported-snapshot files */ |
215 | #define SNAPSHOT_EXPORT_DIR "pg_snapshots" |
216 | |
217 | /* Structure holding info about exported snapshot. */ |
218 | typedef struct ExportedSnapshot |
219 | { |
220 | char *snapfile; |
221 | Snapshot snapshot; |
222 | } ExportedSnapshot; |
223 | |
224 | /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */ |
225 | static List *exportedSnapshots = NIL; |
226 | |
227 | /* Prototypes for local functions */ |
228 | static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts); |
229 | static Snapshot CopySnapshot(Snapshot snapshot); |
230 | static void FreeSnapshot(Snapshot snapshot); |
231 | static void SnapshotResetXmin(void); |
232 | |
233 | /* |
234 | * Snapshot fields to be serialized. |
235 | * |
236 | * Only these fields need to be sent to the cooperating backend; the |
237 | * remaining ones can (and must) be set by the receiver upon restore. |
238 | */ |
239 | typedef struct SerializedSnapshotData |
240 | { |
241 | TransactionId xmin; |
242 | TransactionId xmax; |
243 | uint32 xcnt; |
244 | int32 subxcnt; |
245 | bool suboverflowed; |
246 | bool takenDuringRecovery; |
247 | CommandId curcid; |
248 | TimestampTz whenTaken; |
249 | XLogRecPtr lsn; |
250 | } SerializedSnapshotData; |
251 | |
252 | Size |
253 | SnapMgrShmemSize(void) |
254 | { |
255 | Size size; |
256 | |
257 | size = offsetof(OldSnapshotControlData, xid_by_minute); |
258 | if (old_snapshot_threshold > 0) |
259 | size = add_size(size, mul_size(sizeof(TransactionId), |
260 | OLD_SNAPSHOT_TIME_MAP_ENTRIES)); |
261 | |
262 | return size; |
263 | } |
264 | |
265 | /* |
266 | * Initialize for managing old snapshot detection. |
267 | */ |
268 | void |
269 | SnapMgrInit(void) |
270 | { |
271 | bool found; |
272 | |
273 | /* |
274 | * Create or attach to the OldSnapshotControlData structure. |
275 | */ |
276 | oldSnapshotControl = (volatile OldSnapshotControlData *) |
277 | ShmemInitStruct("OldSnapshotControlData" , |
278 | SnapMgrShmemSize(), &found); |
279 | |
280 | if (!found) |
281 | { |
282 | SpinLockInit(&oldSnapshotControl->mutex_current); |
283 | oldSnapshotControl->current_timestamp = 0; |
284 | SpinLockInit(&oldSnapshotControl->mutex_latest_xmin); |
285 | oldSnapshotControl->latest_xmin = InvalidTransactionId; |
286 | oldSnapshotControl->next_map_update = 0; |
287 | SpinLockInit(&oldSnapshotControl->mutex_threshold); |
288 | oldSnapshotControl->threshold_timestamp = 0; |
289 | oldSnapshotControl->threshold_xid = InvalidTransactionId; |
290 | oldSnapshotControl->head_offset = 0; |
291 | oldSnapshotControl->head_timestamp = 0; |
292 | oldSnapshotControl->count_used = 0; |
293 | } |
294 | } |
295 | |
296 | /* |
297 | * GetTransactionSnapshot |
298 | * Get the appropriate snapshot for a new query in a transaction. |
299 | * |
300 | * Note that the return value may point at static storage that will be modified |
301 | * by future calls and by CommandCounterIncrement(). Callers should call |
302 | * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be |
303 | * used very long. |
304 | */ |
305 | Snapshot |
306 | GetTransactionSnapshot(void) |
307 | { |
308 | /* |
309 | * Return historic snapshot if doing logical decoding. We'll never need a |
310 | * non-historic transaction snapshot in this (sub-)transaction, so there's |
311 | * no need to be careful to set one up for later calls to |
312 | * GetTransactionSnapshot(). |
313 | */ |
314 | if (HistoricSnapshotActive()) |
315 | { |
316 | Assert(!FirstSnapshotSet); |
317 | return HistoricSnapshot; |
318 | } |
319 | |
320 | /* First call in transaction? */ |
321 | if (!FirstSnapshotSet) |
322 | { |
323 | /* |
324 | * Don't allow catalog snapshot to be older than xact snapshot. Must |
325 | * do this first to allow the empty-heap Assert to succeed. |
326 | */ |
327 | InvalidateCatalogSnapshot(); |
328 | |
329 | Assert(pairingheap_is_empty(&RegisteredSnapshots)); |
330 | Assert(FirstXactSnapshot == NULL); |
331 | |
332 | if (IsInParallelMode()) |
333 | elog(ERROR, |
334 | "cannot take query snapshot during a parallel operation" ); |
335 | |
336 | /* |
337 | * In transaction-snapshot mode, the first snapshot must live until |
338 | * end of xact regardless of what the caller does with it, so we must |
339 | * make a copy of it rather than returning CurrentSnapshotData |
340 | * directly. Furthermore, if we're running in serializable mode, |
341 | * predicate.c needs to wrap the snapshot fetch in its own processing. |
342 | */ |
343 | if (IsolationUsesXactSnapshot()) |
344 | { |
345 | /* First, create the snapshot in CurrentSnapshotData */ |
346 | if (IsolationIsSerializable()) |
347 | CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); |
348 | else |
349 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
350 | /* Make a saved copy */ |
351 | CurrentSnapshot = CopySnapshot(CurrentSnapshot); |
352 | FirstXactSnapshot = CurrentSnapshot; |
353 | /* Mark it as "registered" in FirstXactSnapshot */ |
354 | FirstXactSnapshot->regd_count++; |
355 | pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
356 | } |
357 | else |
358 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
359 | |
360 | FirstSnapshotSet = true; |
361 | return CurrentSnapshot; |
362 | } |
363 | |
364 | if (IsolationUsesXactSnapshot()) |
365 | return CurrentSnapshot; |
366 | |
367 | /* Don't allow catalog snapshot to be older than xact snapshot. */ |
368 | InvalidateCatalogSnapshot(); |
369 | |
370 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
371 | |
372 | return CurrentSnapshot; |
373 | } |
374 | |
375 | /* |
376 | * GetLatestSnapshot |
377 | * Get a snapshot that is up-to-date as of the current instant, |
378 | * even if we are executing in transaction-snapshot mode. |
379 | */ |
380 | Snapshot |
381 | GetLatestSnapshot(void) |
382 | { |
383 | /* |
384 | * We might be able to relax this, but nothing that could otherwise work |
385 | * needs it. |
386 | */ |
387 | if (IsInParallelMode()) |
388 | elog(ERROR, |
389 | "cannot update SecondarySnapshot during a parallel operation" ); |
390 | |
391 | /* |
392 | * So far there are no cases requiring support for GetLatestSnapshot() |
393 | * during logical decoding, but it wouldn't be hard to add if required. |
394 | */ |
395 | Assert(!HistoricSnapshotActive()); |
396 | |
397 | /* If first call in transaction, go ahead and set the xact snapshot */ |
398 | if (!FirstSnapshotSet) |
399 | return GetTransactionSnapshot(); |
400 | |
401 | SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData); |
402 | |
403 | return SecondarySnapshot; |
404 | } |
405 | |
406 | /* |
407 | * GetOldestSnapshot |
408 | * |
409 | * Get the transaction's oldest known snapshot, as judged by the LSN. |
410 | * Will return NULL if there are no active or registered snapshots. |
411 | */ |
412 | Snapshot |
413 | GetOldestSnapshot(void) |
414 | { |
415 | Snapshot OldestRegisteredSnapshot = NULL; |
416 | XLogRecPtr RegisteredLSN = InvalidXLogRecPtr; |
417 | |
418 | if (!pairingheap_is_empty(&RegisteredSnapshots)) |
419 | { |
420 | OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node, |
421 | pairingheap_first(&RegisteredSnapshots)); |
422 | RegisteredLSN = OldestRegisteredSnapshot->lsn; |
423 | } |
424 | |
425 | if (OldestActiveSnapshot != NULL) |
426 | { |
427 | XLogRecPtr ActiveLSN = OldestActiveSnapshot->as_snap->lsn; |
428 | |
429 | if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN) |
430 | return OldestActiveSnapshot->as_snap; |
431 | } |
432 | |
433 | return OldestRegisteredSnapshot; |
434 | } |
435 | |
436 | /* |
437 | * GetCatalogSnapshot |
438 | * Get a snapshot that is sufficiently up-to-date for scan of the |
439 | * system catalog with the specified OID. |
440 | */ |
441 | Snapshot |
442 | GetCatalogSnapshot(Oid relid) |
443 | { |
444 | /* |
445 | * Return historic snapshot while we're doing logical decoding, so we can |
446 | * see the appropriate state of the catalog. |
447 | * |
448 | * This is the primary reason for needing to reset the system caches after |
449 | * finishing decoding. |
450 | */ |
451 | if (HistoricSnapshotActive()) |
452 | return HistoricSnapshot; |
453 | |
454 | return GetNonHistoricCatalogSnapshot(relid); |
455 | } |
456 | |
457 | /* |
458 | * GetNonHistoricCatalogSnapshot |
459 | * Get a snapshot that is sufficiently up-to-date for scan of the system |
460 | * catalog with the specified OID, even while historic snapshots are set |
461 | * up. |
462 | */ |
463 | Snapshot |
464 | GetNonHistoricCatalogSnapshot(Oid relid) |
465 | { |
466 | /* |
467 | * If the caller is trying to scan a relation that has no syscache, no |
468 | * catcache invalidations will be sent when it is updated. For a few key |
469 | * relations, snapshot invalidations are sent instead. If we're trying to |
470 | * scan a relation for which neither catcache nor snapshot invalidations |
471 | * are sent, we must refresh the snapshot every time. |
472 | */ |
473 | if (CatalogSnapshot && |
474 | !RelationInvalidatesSnapshotsOnly(relid) && |
475 | !RelationHasSysCache(relid)) |
476 | InvalidateCatalogSnapshot(); |
477 | |
478 | if (CatalogSnapshot == NULL) |
479 | { |
480 | /* Get new snapshot. */ |
481 | CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData); |
482 | |
483 | /* |
484 | * Make sure the catalog snapshot will be accounted for in decisions |
485 | * about advancing PGXACT->xmin. We could apply RegisterSnapshot, but |
486 | * that would result in making a physical copy, which is overkill; and |
487 | * it would also create a dependency on some resource owner, which we |
488 | * do not want for reasons explained at the head of this file. Instead |
489 | * just shove the CatalogSnapshot into the pairing heap manually. This |
490 | * has to be reversed in InvalidateCatalogSnapshot, of course. |
491 | * |
492 | * NB: it had better be impossible for this to throw error, since the |
493 | * CatalogSnapshot pointer is already valid. |
494 | */ |
495 | pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node); |
496 | } |
497 | |
498 | return CatalogSnapshot; |
499 | } |
500 | |
501 | /* |
502 | * InvalidateCatalogSnapshot |
503 | * Mark the current catalog snapshot, if any, as invalid |
504 | * |
505 | * We could change this API to allow the caller to provide more fine-grained |
506 | * invalidation details, so that a change to relation A wouldn't prevent us |
507 | * from using our cached snapshot to scan relation B, but so far there's no |
508 | * evidence that the CPU cycles we spent tracking such fine details would be |
509 | * well-spent. |
510 | */ |
511 | void |
512 | InvalidateCatalogSnapshot(void) |
513 | { |
514 | if (CatalogSnapshot) |
515 | { |
516 | pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node); |
517 | CatalogSnapshot = NULL; |
518 | SnapshotResetXmin(); |
519 | } |
520 | } |
521 | |
522 | /* |
523 | * InvalidateCatalogSnapshotConditionally |
524 | * Drop catalog snapshot if it's the only one we have |
525 | * |
526 | * This is called when we are about to wait for client input, so we don't |
527 | * want to continue holding the catalog snapshot if it might mean that the |
528 | * global xmin horizon can't advance. However, if there are other snapshots |
529 | * still active or registered, the catalog snapshot isn't likely to be the |
530 | * oldest one, so we might as well keep it. |
531 | */ |
532 | void |
533 | InvalidateCatalogSnapshotConditionally(void) |
534 | { |
535 | if (CatalogSnapshot && |
536 | ActiveSnapshot == NULL && |
537 | pairingheap_is_singular(&RegisteredSnapshots)) |
538 | InvalidateCatalogSnapshot(); |
539 | } |
540 | |
541 | /* |
542 | * SnapshotSetCommandId |
543 | * Propagate CommandCounterIncrement into the static snapshots, if set |
544 | */ |
545 | void |
546 | SnapshotSetCommandId(CommandId curcid) |
547 | { |
548 | if (!FirstSnapshotSet) |
549 | return; |
550 | |
551 | if (CurrentSnapshot) |
552 | CurrentSnapshot->curcid = curcid; |
553 | if (SecondarySnapshot) |
554 | SecondarySnapshot->curcid = curcid; |
555 | /* Should we do the same with CatalogSnapshot? */ |
556 | } |
557 | |
558 | /* |
559 | * SetTransactionSnapshot |
560 | * Set the transaction's snapshot from an imported MVCC snapshot. |
561 | * |
562 | * Note that this is very closely tied to GetTransactionSnapshot --- it |
563 | * must take care of all the same considerations as the first-snapshot case |
564 | * in GetTransactionSnapshot. |
565 | */ |
566 | static void |
567 | SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, |
568 | int sourcepid, PGPROC *sourceproc) |
569 | { |
570 | /* Caller should have checked this already */ |
571 | Assert(!FirstSnapshotSet); |
572 | |
573 | /* Better do this to ensure following Assert succeeds. */ |
574 | InvalidateCatalogSnapshot(); |
575 | |
576 | Assert(pairingheap_is_empty(&RegisteredSnapshots)); |
577 | Assert(FirstXactSnapshot == NULL); |
578 | Assert(!HistoricSnapshotActive()); |
579 | |
580 | /* |
581 | * Even though we are not going to use the snapshot it computes, we must |
582 | * call GetSnapshotData, for two reasons: (1) to be sure that |
583 | * CurrentSnapshotData's XID arrays have been allocated, and (2) to update |
584 | * RecentXmin and RecentGlobalXmin. (We could alternatively include those |
585 | * two variables in exported snapshot files, but it seems better to have |
586 | * snapshot importers compute reasonably up-to-date values for them.) |
587 | */ |
588 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
589 | |
590 | /* |
591 | * Now copy appropriate fields from the source snapshot. |
592 | */ |
593 | CurrentSnapshot->xmin = sourcesnap->xmin; |
594 | CurrentSnapshot->xmax = sourcesnap->xmax; |
595 | CurrentSnapshot->xcnt = sourcesnap->xcnt; |
596 | Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount()); |
597 | memcpy(CurrentSnapshot->xip, sourcesnap->xip, |
598 | sourcesnap->xcnt * sizeof(TransactionId)); |
599 | CurrentSnapshot->subxcnt = sourcesnap->subxcnt; |
600 | Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount()); |
601 | memcpy(CurrentSnapshot->subxip, sourcesnap->subxip, |
602 | sourcesnap->subxcnt * sizeof(TransactionId)); |
603 | CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed; |
604 | CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery; |
605 | /* NB: curcid should NOT be copied, it's a local matter */ |
606 | |
607 | /* |
608 | * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and |
609 | * TransactionXmin. There is a race condition: to make sure we are not |
610 | * causing the global xmin to go backwards, we have to test that the |
611 | * source transaction is still running, and that has to be done |
612 | * atomically. So let procarray.c do it. |
613 | * |
614 | * Note: in serializable mode, predicate.c will do this a second time. It |
615 | * doesn't seem worth contorting the logic here to avoid two calls, |
616 | * especially since it's not clear that predicate.c *must* do this. |
617 | */ |
618 | if (sourceproc != NULL) |
619 | { |
620 | if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc)) |
621 | ereport(ERROR, |
622 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
623 | errmsg("could not import the requested snapshot" ), |
624 | errdetail("The source transaction is not running anymore." ))); |
625 | } |
626 | else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid)) |
627 | ereport(ERROR, |
628 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
629 | errmsg("could not import the requested snapshot" ), |
630 | errdetail("The source process with PID %d is not running anymore." , |
631 | sourcepid))); |
632 | |
633 | /* |
634 | * In transaction-snapshot mode, the first snapshot must live until end of |
635 | * xact, so we must make a copy of it. Furthermore, if we're running in |
636 | * serializable mode, predicate.c needs to do its own processing. |
637 | */ |
638 | if (IsolationUsesXactSnapshot()) |
639 | { |
640 | if (IsolationIsSerializable()) |
641 | SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid, |
642 | sourcepid); |
643 | /* Make a saved copy */ |
644 | CurrentSnapshot = CopySnapshot(CurrentSnapshot); |
645 | FirstXactSnapshot = CurrentSnapshot; |
646 | /* Mark it as "registered" in FirstXactSnapshot */ |
647 | FirstXactSnapshot->regd_count++; |
648 | pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
649 | } |
650 | |
651 | FirstSnapshotSet = true; |
652 | } |
653 | |
654 | /* |
655 | * CopySnapshot |
656 | * Copy the given snapshot. |
657 | * |
658 | * The copy is palloc'd in TopTransactionContext and has initial refcounts set |
659 | * to 0. The returned snapshot has the copied flag set. |
660 | */ |
661 | static Snapshot |
662 | CopySnapshot(Snapshot snapshot) |
663 | { |
664 | Snapshot newsnap; |
665 | Size subxipoff; |
666 | Size size; |
667 | |
668 | Assert(snapshot != InvalidSnapshot); |
669 | |
670 | /* We allocate any XID arrays needed in the same palloc block. */ |
671 | size = subxipoff = sizeof(SnapshotData) + |
672 | snapshot->xcnt * sizeof(TransactionId); |
673 | if (snapshot->subxcnt > 0) |
674 | size += snapshot->subxcnt * sizeof(TransactionId); |
675 | |
676 | newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); |
677 | memcpy(newsnap, snapshot, sizeof(SnapshotData)); |
678 | |
679 | newsnap->regd_count = 0; |
680 | newsnap->active_count = 0; |
681 | newsnap->copied = true; |
682 | |
683 | /* setup XID array */ |
684 | if (snapshot->xcnt > 0) |
685 | { |
686 | newsnap->xip = (TransactionId *) (newsnap + 1); |
687 | memcpy(newsnap->xip, snapshot->xip, |
688 | snapshot->xcnt * sizeof(TransactionId)); |
689 | } |
690 | else |
691 | newsnap->xip = NULL; |
692 | |
693 | /* |
694 | * Setup subXID array. Don't bother to copy it if it had overflowed, |
695 | * though, because it's not used anywhere in that case. Except if it's a |
696 | * snapshot taken during recovery; all the top-level XIDs are in subxip as |
697 | * well in that case, so we mustn't lose them. |
698 | */ |
699 | if (snapshot->subxcnt > 0 && |
700 | (!snapshot->suboverflowed || snapshot->takenDuringRecovery)) |
701 | { |
702 | newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff); |
703 | memcpy(newsnap->subxip, snapshot->subxip, |
704 | snapshot->subxcnt * sizeof(TransactionId)); |
705 | } |
706 | else |
707 | newsnap->subxip = NULL; |
708 | |
709 | return newsnap; |
710 | } |
711 | |
712 | /* |
713 | * FreeSnapshot |
714 | * Free the memory associated with a snapshot. |
715 | */ |
716 | static void |
717 | FreeSnapshot(Snapshot snapshot) |
718 | { |
719 | Assert(snapshot->regd_count == 0); |
720 | Assert(snapshot->active_count == 0); |
721 | Assert(snapshot->copied); |
722 | |
723 | pfree(snapshot); |
724 | } |
725 | |
726 | /* |
727 | * PushActiveSnapshot |
728 | * Set the given snapshot as the current active snapshot |
729 | * |
730 | * If the passed snapshot is a statically-allocated one, or it is possibly |
731 | * subject to a future command counter update, create a new long-lived copy |
732 | * with active refcount=1. Otherwise, only increment the refcount. |
733 | */ |
734 | void |
735 | PushActiveSnapshot(Snapshot snap) |
736 | { |
737 | ActiveSnapshotElt *newactive; |
738 | |
739 | Assert(snap != InvalidSnapshot); |
740 | |
741 | newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt)); |
742 | |
743 | /* |
744 | * Checking SecondarySnapshot is probably useless here, but it seems |
745 | * better to be sure. |
746 | */ |
747 | if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied) |
748 | newactive->as_snap = CopySnapshot(snap); |
749 | else |
750 | newactive->as_snap = snap; |
751 | |
752 | newactive->as_next = ActiveSnapshot; |
753 | newactive->as_level = GetCurrentTransactionNestLevel(); |
754 | |
755 | newactive->as_snap->active_count++; |
756 | |
757 | ActiveSnapshot = newactive; |
758 | if (OldestActiveSnapshot == NULL) |
759 | OldestActiveSnapshot = ActiveSnapshot; |
760 | } |
761 | |
762 | /* |
763 | * PushCopiedSnapshot |
764 | * As above, except forcibly copy the presented snapshot. |
765 | * |
766 | * This should be used when the ActiveSnapshot has to be modifiable, for |
767 | * example if the caller intends to call UpdateActiveSnapshotCommandId. |
768 | * The new snapshot will be released when popped from the stack. |
769 | */ |
770 | void |
771 | PushCopiedSnapshot(Snapshot snapshot) |
772 | { |
773 | PushActiveSnapshot(CopySnapshot(snapshot)); |
774 | } |
775 | |
776 | /* |
777 | * UpdateActiveSnapshotCommandId |
778 | * |
779 | * Update the current CID of the active snapshot. This can only be applied |
780 | * to a snapshot that is not referenced elsewhere. |
781 | */ |
782 | void |
783 | UpdateActiveSnapshotCommandId(void) |
784 | { |
785 | CommandId save_curcid, |
786 | curcid; |
787 | |
788 | Assert(ActiveSnapshot != NULL); |
789 | Assert(ActiveSnapshot->as_snap->active_count == 1); |
790 | Assert(ActiveSnapshot->as_snap->regd_count == 0); |
791 | |
792 | /* |
793 | * Don't allow modification of the active snapshot during parallel |
794 | * operation. We share the snapshot to worker backends at the beginning |
795 | * of parallel operation, so any change to the snapshot can lead to |
796 | * inconsistencies. We have other defenses against |
797 | * CommandCounterIncrement, but there are a few places that call this |
798 | * directly, so we put an additional guard here. |
799 | */ |
800 | save_curcid = ActiveSnapshot->as_snap->curcid; |
801 | curcid = GetCurrentCommandId(false); |
802 | if (IsInParallelMode() && save_curcid != curcid) |
803 | elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation" ); |
804 | ActiveSnapshot->as_snap->curcid = curcid; |
805 | } |
806 | |
807 | /* |
808 | * PopActiveSnapshot |
809 | * |
810 | * Remove the topmost snapshot from the active snapshot stack, decrementing the |
811 | * reference count, and free it if this was the last reference. |
812 | */ |
813 | void |
814 | PopActiveSnapshot(void) |
815 | { |
816 | ActiveSnapshotElt *newstack; |
817 | |
818 | newstack = ActiveSnapshot->as_next; |
819 | |
820 | Assert(ActiveSnapshot->as_snap->active_count > 0); |
821 | |
822 | ActiveSnapshot->as_snap->active_count--; |
823 | |
824 | if (ActiveSnapshot->as_snap->active_count == 0 && |
825 | ActiveSnapshot->as_snap->regd_count == 0) |
826 | FreeSnapshot(ActiveSnapshot->as_snap); |
827 | |
828 | pfree(ActiveSnapshot); |
829 | ActiveSnapshot = newstack; |
830 | if (ActiveSnapshot == NULL) |
831 | OldestActiveSnapshot = NULL; |
832 | |
833 | SnapshotResetXmin(); |
834 | } |
835 | |
836 | /* |
837 | * GetActiveSnapshot |
838 | * Return the topmost snapshot in the Active stack. |
839 | */ |
840 | Snapshot |
841 | GetActiveSnapshot(void) |
842 | { |
843 | Assert(ActiveSnapshot != NULL); |
844 | |
845 | return ActiveSnapshot->as_snap; |
846 | } |
847 | |
848 | /* |
849 | * ActiveSnapshotSet |
850 | * Return whether there is at least one snapshot in the Active stack |
851 | */ |
852 | bool |
853 | ActiveSnapshotSet(void) |
854 | { |
855 | return ActiveSnapshot != NULL; |
856 | } |
857 | |
858 | /* |
859 | * RegisterSnapshot |
860 | * Register a snapshot as being in use by the current resource owner |
861 | * |
862 | * If InvalidSnapshot is passed, it is not registered. |
863 | */ |
864 | Snapshot |
865 | RegisterSnapshot(Snapshot snapshot) |
866 | { |
867 | if (snapshot == InvalidSnapshot) |
868 | return InvalidSnapshot; |
869 | |
870 | return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner); |
871 | } |
872 | |
873 | /* |
874 | * RegisterSnapshotOnOwner |
875 | * As above, but use the specified resource owner |
876 | */ |
877 | Snapshot |
878 | RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner) |
879 | { |
880 | Snapshot snap; |
881 | |
882 | if (snapshot == InvalidSnapshot) |
883 | return InvalidSnapshot; |
884 | |
885 | /* Static snapshot? Create a persistent copy */ |
886 | snap = snapshot->copied ? snapshot : CopySnapshot(snapshot); |
887 | |
888 | /* and tell resowner.c about it */ |
889 | ResourceOwnerEnlargeSnapshots(owner); |
890 | snap->regd_count++; |
891 | ResourceOwnerRememberSnapshot(owner, snap); |
892 | |
893 | if (snap->regd_count == 1) |
894 | pairingheap_add(&RegisteredSnapshots, &snap->ph_node); |
895 | |
896 | return snap; |
897 | } |
898 | |
899 | /* |
900 | * UnregisterSnapshot |
901 | * |
902 | * Decrement the reference count of a snapshot, remove the corresponding |
903 | * reference from CurrentResourceOwner, and free the snapshot if no more |
904 | * references remain. |
905 | */ |
906 | void |
907 | UnregisterSnapshot(Snapshot snapshot) |
908 | { |
909 | if (snapshot == NULL) |
910 | return; |
911 | |
912 | UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner); |
913 | } |
914 | |
915 | /* |
916 | * UnregisterSnapshotFromOwner |
917 | * As above, but use the specified resource owner |
918 | */ |
919 | void |
920 | UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) |
921 | { |
922 | if (snapshot == NULL) |
923 | return; |
924 | |
925 | Assert(snapshot->regd_count > 0); |
926 | Assert(!pairingheap_is_empty(&RegisteredSnapshots)); |
927 | |
928 | ResourceOwnerForgetSnapshot(owner, snapshot); |
929 | |
930 | snapshot->regd_count--; |
931 | if (snapshot->regd_count == 0) |
932 | pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node); |
933 | |
934 | if (snapshot->regd_count == 0 && snapshot->active_count == 0) |
935 | { |
936 | FreeSnapshot(snapshot); |
937 | SnapshotResetXmin(); |
938 | } |
939 | } |
940 | |
941 | /* |
942 | * Comparison function for RegisteredSnapshots heap. Snapshots are ordered |
943 | * by xmin, so that the snapshot with smallest xmin is at the top. |
944 | */ |
945 | static int |
946 | xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg) |
947 | { |
948 | const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a); |
949 | const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b); |
950 | |
951 | if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin)) |
952 | return 1; |
953 | else if (TransactionIdFollows(asnap->xmin, bsnap->xmin)) |
954 | return -1; |
955 | else |
956 | return 0; |
957 | } |
958 | |
959 | /* |
960 | * Get current RecentGlobalXmin value, as a FullTransactionId. |
961 | */ |
962 | FullTransactionId |
963 | GetFullRecentGlobalXmin(void) |
964 | { |
965 | FullTransactionId nextxid_full; |
966 | uint32 nextxid_epoch; |
967 | TransactionId nextxid_xid; |
968 | uint32 epoch; |
969 | |
970 | Assert(TransactionIdIsNormal(RecentGlobalXmin)); |
971 | |
972 | /* |
973 | * Compute the epoch from the next XID's epoch. This relies on the fact |
974 | * that RecentGlobalXmin must be within the 2 billion XID horizon from the |
975 | * next XID. |
976 | */ |
977 | nextxid_full = ReadNextFullTransactionId(); |
978 | nextxid_epoch = EpochFromFullTransactionId(nextxid_full); |
979 | nextxid_xid = XidFromFullTransactionId(nextxid_full); |
980 | |
981 | if (RecentGlobalXmin > nextxid_xid) |
982 | epoch = nextxid_epoch - 1; |
983 | else |
984 | epoch = nextxid_epoch; |
985 | |
986 | return FullTransactionIdFromEpochAndXid(epoch, RecentGlobalXmin); |
987 | } |
988 | |
989 | /* |
990 | * SnapshotResetXmin |
991 | * |
992 | * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid. |
993 | * Note we can do this without locking because we assume that storing an Xid |
994 | * is atomic. |
995 | * |
996 | * Even if there are some remaining snapshots, we may be able to advance our |
997 | * PGXACT->xmin to some degree. This typically happens when a portal is |
998 | * dropped. For efficiency, we only consider recomputing PGXACT->xmin when |
999 | * the active snapshot stack is empty; this allows us not to need to track |
1000 | * which active snapshot is oldest. |
1001 | * |
1002 | * Note: it's tempting to use GetOldestSnapshot() here so that we can include |
1003 | * active snapshots in the calculation. However, that compares by LSN not |
1004 | * xmin so it's not entirely clear that it's the same thing. Also, we'd be |
1005 | * critically dependent on the assumption that the bottommost active snapshot |
1006 | * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are |
1007 | * not actually critical, but this would be.) |
1008 | */ |
1009 | static void |
1010 | SnapshotResetXmin(void) |
1011 | { |
1012 | Snapshot minSnapshot; |
1013 | |
1014 | if (ActiveSnapshot != NULL) |
1015 | return; |
1016 | |
1017 | if (pairingheap_is_empty(&RegisteredSnapshots)) |
1018 | { |
1019 | MyPgXact->xmin = InvalidTransactionId; |
1020 | return; |
1021 | } |
1022 | |
1023 | minSnapshot = pairingheap_container(SnapshotData, ph_node, |
1024 | pairingheap_first(&RegisteredSnapshots)); |
1025 | |
1026 | if (TransactionIdPrecedes(MyPgXact->xmin, minSnapshot->xmin)) |
1027 | MyPgXact->xmin = minSnapshot->xmin; |
1028 | } |
1029 | |
1030 | /* |
1031 | * AtSubCommit_Snapshot |
1032 | */ |
1033 | void |
1034 | AtSubCommit_Snapshot(int level) |
1035 | { |
1036 | ActiveSnapshotElt *active; |
1037 | |
1038 | /* |
1039 | * Relabel the active snapshots set in this subtransaction as though they |
1040 | * are owned by the parent subxact. |
1041 | */ |
1042 | for (active = ActiveSnapshot; active != NULL; active = active->as_next) |
1043 | { |
1044 | if (active->as_level < level) |
1045 | break; |
1046 | active->as_level = level - 1; |
1047 | } |
1048 | } |
1049 | |
1050 | /* |
1051 | * AtSubAbort_Snapshot |
1052 | * Clean up snapshots after a subtransaction abort |
1053 | */ |
1054 | void |
1055 | AtSubAbort_Snapshot(int level) |
1056 | { |
1057 | /* Forget the active snapshots set by this subtransaction */ |
1058 | while (ActiveSnapshot && ActiveSnapshot->as_level >= level) |
1059 | { |
1060 | ActiveSnapshotElt *next; |
1061 | |
1062 | next = ActiveSnapshot->as_next; |
1063 | |
1064 | /* |
1065 | * Decrement the snapshot's active count. If it's still registered or |
1066 | * marked as active by an outer subtransaction, we can't free it yet. |
1067 | */ |
1068 | Assert(ActiveSnapshot->as_snap->active_count >= 1); |
1069 | ActiveSnapshot->as_snap->active_count -= 1; |
1070 | |
1071 | if (ActiveSnapshot->as_snap->active_count == 0 && |
1072 | ActiveSnapshot->as_snap->regd_count == 0) |
1073 | FreeSnapshot(ActiveSnapshot->as_snap); |
1074 | |
1075 | /* and free the stack element */ |
1076 | pfree(ActiveSnapshot); |
1077 | |
1078 | ActiveSnapshot = next; |
1079 | if (ActiveSnapshot == NULL) |
1080 | OldestActiveSnapshot = NULL; |
1081 | } |
1082 | |
1083 | SnapshotResetXmin(); |
1084 | } |
1085 | |
1086 | /* |
1087 | * AtEOXact_Snapshot |
1088 | * Snapshot manager's cleanup function for end of transaction |
1089 | */ |
1090 | void |
1091 | AtEOXact_Snapshot(bool isCommit, bool resetXmin) |
1092 | { |
1093 | /* |
1094 | * In transaction-snapshot mode we must release our privately-managed |
1095 | * reference to the transaction snapshot. We must remove it from |
1096 | * RegisteredSnapshots to keep the check below happy. But we don't bother |
1097 | * to do FreeSnapshot, for two reasons: the memory will go away with |
1098 | * TopTransactionContext anyway, and if someone has left the snapshot |
1099 | * stacked as active, we don't want the code below to be chasing through a |
1100 | * dangling pointer. |
1101 | */ |
1102 | if (FirstXactSnapshot != NULL) |
1103 | { |
1104 | Assert(FirstXactSnapshot->regd_count > 0); |
1105 | Assert(!pairingheap_is_empty(&RegisteredSnapshots)); |
1106 | pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
1107 | } |
1108 | FirstXactSnapshot = NULL; |
1109 | |
1110 | /* |
1111 | * If we exported any snapshots, clean them up. |
1112 | */ |
1113 | if (exportedSnapshots != NIL) |
1114 | { |
1115 | ListCell *lc; |
1116 | |
1117 | /* |
1118 | * Get rid of the files. Unlink failure is only a WARNING because (1) |
1119 | * it's too late to abort the transaction, and (2) leaving a leaked |
1120 | * file around has little real consequence anyway. |
1121 | * |
1122 | * We also need to remove the snapshots from RegisteredSnapshots to |
1123 | * prevent a warning below. |
1124 | * |
1125 | * As with the FirstXactSnapshot, we don't need to free resources of |
1126 | * the snapshot iself as it will go away with the memory context. |
1127 | */ |
1128 | foreach(lc, exportedSnapshots) |
1129 | { |
1130 | ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc); |
1131 | |
1132 | if (unlink(esnap->snapfile)) |
1133 | elog(WARNING, "could not unlink file \"%s\": %m" , |
1134 | esnap->snapfile); |
1135 | |
1136 | pairingheap_remove(&RegisteredSnapshots, |
1137 | &esnap->snapshot->ph_node); |
1138 | } |
1139 | |
1140 | exportedSnapshots = NIL; |
1141 | } |
1142 | |
1143 | /* Drop catalog snapshot if any */ |
1144 | InvalidateCatalogSnapshot(); |
1145 | |
1146 | /* On commit, complain about leftover snapshots */ |
1147 | if (isCommit) |
1148 | { |
1149 | ActiveSnapshotElt *active; |
1150 | |
1151 | if (!pairingheap_is_empty(&RegisteredSnapshots)) |
1152 | elog(WARNING, "registered snapshots seem to remain after cleanup" ); |
1153 | |
1154 | /* complain about unpopped active snapshots */ |
1155 | for (active = ActiveSnapshot; active != NULL; active = active->as_next) |
1156 | elog(WARNING, "snapshot %p still active" , active); |
1157 | } |
1158 | |
1159 | /* |
1160 | * And reset our state. We don't need to free the memory explicitly -- |
1161 | * it'll go away with TopTransactionContext. |
1162 | */ |
1163 | ActiveSnapshot = NULL; |
1164 | OldestActiveSnapshot = NULL; |
1165 | pairingheap_reset(&RegisteredSnapshots); |
1166 | |
1167 | CurrentSnapshot = NULL; |
1168 | SecondarySnapshot = NULL; |
1169 | |
1170 | FirstSnapshotSet = false; |
1171 | |
1172 | /* |
1173 | * During normal commit processing, we call ProcArrayEndTransaction() to |
1174 | * reset the PgXact->xmin. That call happens prior to the call to |
1175 | * AtEOXact_Snapshot(), so we need not touch xmin here at all. |
1176 | */ |
1177 | if (resetXmin) |
1178 | SnapshotResetXmin(); |
1179 | |
1180 | Assert(resetXmin || MyPgXact->xmin == 0); |
1181 | } |
1182 | |
1183 | |
1184 | /* |
1185 | * ExportSnapshot |
1186 | * Export the snapshot to a file so that other backends can import it. |
1187 | * Returns the token (the file name) that can be used to import this |
1188 | * snapshot. |
1189 | */ |
1190 | char * |
1191 | ExportSnapshot(Snapshot snapshot) |
1192 | { |
1193 | TransactionId topXid; |
1194 | TransactionId *children; |
1195 | ExportedSnapshot *esnap; |
1196 | int nchildren; |
1197 | int addTopXid; |
1198 | StringInfoData buf; |
1199 | FILE *f; |
1200 | int i; |
1201 | MemoryContext oldcxt; |
1202 | char path[MAXPGPATH]; |
1203 | char pathtmp[MAXPGPATH]; |
1204 | |
1205 | /* |
1206 | * It's tempting to call RequireTransactionBlock here, since it's not very |
1207 | * useful to export a snapshot that will disappear immediately afterwards. |
1208 | * However, we haven't got enough information to do that, since we don't |
1209 | * know if we're at top level or not. For example, we could be inside a |
1210 | * plpgsql function that is going to fire off other transactions via |
1211 | * dblink. Rather than disallow perfectly legitimate usages, don't make a |
1212 | * check. |
1213 | * |
1214 | * Also note that we don't make any restriction on the transaction's |
1215 | * isolation level; however, importers must check the level if they are |
1216 | * serializable. |
1217 | */ |
1218 | |
1219 | /* |
1220 | * Get our transaction ID if there is one, to include in the snapshot. |
1221 | */ |
1222 | topXid = GetTopTransactionIdIfAny(); |
1223 | |
1224 | /* |
1225 | * We cannot export a snapshot from a subtransaction because there's no |
1226 | * easy way for importers to verify that the same subtransaction is still |
1227 | * running. |
1228 | */ |
1229 | if (IsSubTransaction()) |
1230 | ereport(ERROR, |
1231 | (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), |
1232 | errmsg("cannot export a snapshot from a subtransaction" ))); |
1233 | |
1234 | /* |
1235 | * We do however allow previous committed subtransactions to exist. |
1236 | * Importers of the snapshot must see them as still running, so get their |
1237 | * XIDs to add them to the snapshot. |
1238 | */ |
1239 | nchildren = xactGetCommittedChildren(&children); |
1240 | |
1241 | /* |
1242 | * Generate file path for the snapshot. We start numbering of snapshots |
1243 | * inside the transaction from 1. |
1244 | */ |
1245 | snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d" , |
1246 | MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1); |
1247 | |
1248 | /* |
1249 | * Copy the snapshot into TopTransactionContext, add it to the |
1250 | * exportedSnapshots list, and mark it pseudo-registered. We do this to |
1251 | * ensure that the snapshot's xmin is honored for the rest of the |
1252 | * transaction. |
1253 | */ |
1254 | snapshot = CopySnapshot(snapshot); |
1255 | |
1256 | oldcxt = MemoryContextSwitchTo(TopTransactionContext); |
1257 | esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot)); |
1258 | esnap->snapfile = pstrdup(path); |
1259 | esnap->snapshot = snapshot; |
1260 | exportedSnapshots = lappend(exportedSnapshots, esnap); |
1261 | MemoryContextSwitchTo(oldcxt); |
1262 | |
1263 | snapshot->regd_count++; |
1264 | pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node); |
1265 | |
1266 | /* |
1267 | * Fill buf with a text serialization of the snapshot, plus identification |
1268 | * data about this transaction. The format expected by ImportSnapshot is |
1269 | * pretty rigid: each line must be fieldname:value. |
1270 | */ |
1271 | initStringInfo(&buf); |
1272 | |
1273 | appendStringInfo(&buf, "vxid:%d/%u\n" , MyProc->backendId, MyProc->lxid); |
1274 | appendStringInfo(&buf, "pid:%d\n" , MyProcPid); |
1275 | appendStringInfo(&buf, "dbid:%u\n" , MyDatabaseId); |
1276 | appendStringInfo(&buf, "iso:%d\n" , XactIsoLevel); |
1277 | appendStringInfo(&buf, "ro:%d\n" , XactReadOnly); |
1278 | |
1279 | appendStringInfo(&buf, "xmin:%u\n" , snapshot->xmin); |
1280 | appendStringInfo(&buf, "xmax:%u\n" , snapshot->xmax); |
1281 | |
1282 | /* |
1283 | * We must include our own top transaction ID in the top-xid data, since |
1284 | * by definition we will still be running when the importing transaction |
1285 | * adopts the snapshot, but GetSnapshotData never includes our own XID in |
1286 | * the snapshot. (There must, therefore, be enough room to add it.) |
1287 | * |
1288 | * However, it could be that our topXid is after the xmax, in which case |
1289 | * we shouldn't include it because xip[] members are expected to be before |
1290 | * xmax. (We need not make the same check for subxip[] members, see |
1291 | * snapshot.h.) |
1292 | */ |
1293 | addTopXid = (TransactionIdIsValid(topXid) && |
1294 | TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0; |
1295 | appendStringInfo(&buf, "xcnt:%d\n" , snapshot->xcnt + addTopXid); |
1296 | for (i = 0; i < snapshot->xcnt; i++) |
1297 | appendStringInfo(&buf, "xip:%u\n" , snapshot->xip[i]); |
1298 | if (addTopXid) |
1299 | appendStringInfo(&buf, "xip:%u\n" , topXid); |
1300 | |
1301 | /* |
1302 | * Similarly, we add our subcommitted child XIDs to the subxid data. Here, |
1303 | * we have to cope with possible overflow. |
1304 | */ |
1305 | if (snapshot->suboverflowed || |
1306 | snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) |
1307 | appendStringInfoString(&buf, "sof:1\n" ); |
1308 | else |
1309 | { |
1310 | appendStringInfoString(&buf, "sof:0\n" ); |
1311 | appendStringInfo(&buf, "sxcnt:%d\n" , snapshot->subxcnt + nchildren); |
1312 | for (i = 0; i < snapshot->subxcnt; i++) |
1313 | appendStringInfo(&buf, "sxp:%u\n" , snapshot->subxip[i]); |
1314 | for (i = 0; i < nchildren; i++) |
1315 | appendStringInfo(&buf, "sxp:%u\n" , children[i]); |
1316 | } |
1317 | appendStringInfo(&buf, "rec:%u\n" , snapshot->takenDuringRecovery); |
1318 | |
1319 | /* |
1320 | * Now write the text representation into a file. We first write to a |
1321 | * ".tmp" filename, and rename to final filename if no error. This |
1322 | * ensures that no other backend can read an incomplete file |
1323 | * (ImportSnapshot won't allow it because of its valid-characters check). |
1324 | */ |
1325 | snprintf(pathtmp, sizeof(pathtmp), "%s.tmp" , path); |
1326 | if (!(f = AllocateFile(pathtmp, PG_BINARY_W))) |
1327 | ereport(ERROR, |
1328 | (errcode_for_file_access(), |
1329 | errmsg("could not create file \"%s\": %m" , pathtmp))); |
1330 | |
1331 | if (fwrite(buf.data, buf.len, 1, f) != 1) |
1332 | ereport(ERROR, |
1333 | (errcode_for_file_access(), |
1334 | errmsg("could not write to file \"%s\": %m" , pathtmp))); |
1335 | |
1336 | /* no fsync() since file need not survive a system crash */ |
1337 | |
1338 | if (FreeFile(f)) |
1339 | ereport(ERROR, |
1340 | (errcode_for_file_access(), |
1341 | errmsg("could not write to file \"%s\": %m" , pathtmp))); |
1342 | |
1343 | /* |
1344 | * Now that we have written everything into a .tmp file, rename the file |
1345 | * to remove the .tmp suffix. |
1346 | */ |
1347 | if (rename(pathtmp, path) < 0) |
1348 | ereport(ERROR, |
1349 | (errcode_for_file_access(), |
1350 | errmsg("could not rename file \"%s\" to \"%s\": %m" , |
1351 | pathtmp, path))); |
1352 | |
1353 | /* |
1354 | * The basename of the file is what we return from pg_export_snapshot(). |
1355 | * It's already in path in a textual format and we know that the path |
1356 | * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash |
1357 | * and pstrdup it so as not to return the address of a local variable. |
1358 | */ |
1359 | return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1); |
1360 | } |
1361 | |
1362 | /* |
1363 | * pg_export_snapshot |
1364 | * SQL-callable wrapper for ExportSnapshot. |
1365 | */ |
1366 | Datum |
1367 | pg_export_snapshot(PG_FUNCTION_ARGS) |
1368 | { |
1369 | char *snapshotName; |
1370 | |
1371 | snapshotName = ExportSnapshot(GetActiveSnapshot()); |
1372 | PG_RETURN_TEXT_P(cstring_to_text(snapshotName)); |
1373 | } |
1374 | |
1375 | |
1376 | /* |
1377 | * Parsing subroutines for ImportSnapshot: parse a line with the given |
1378 | * prefix followed by a value, and advance *s to the next line. The |
1379 | * filename is provided for use in error messages. |
1380 | */ |
1381 | static int |
1382 | parseIntFromText(const char *prefix, char **s, const char *filename) |
1383 | { |
1384 | char *ptr = *s; |
1385 | int prefixlen = strlen(prefix); |
1386 | int val; |
1387 | |
1388 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1389 | ereport(ERROR, |
1390 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1391 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1392 | ptr += prefixlen; |
1393 | if (sscanf(ptr, "%d" , &val) != 1) |
1394 | ereport(ERROR, |
1395 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1396 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1397 | ptr = strchr(ptr, '\n'); |
1398 | if (!ptr) |
1399 | ereport(ERROR, |
1400 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1401 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1402 | *s = ptr + 1; |
1403 | return val; |
1404 | } |
1405 | |
1406 | static TransactionId |
1407 | parseXidFromText(const char *prefix, char **s, const char *filename) |
1408 | { |
1409 | char *ptr = *s; |
1410 | int prefixlen = strlen(prefix); |
1411 | TransactionId val; |
1412 | |
1413 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1414 | ereport(ERROR, |
1415 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1416 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1417 | ptr += prefixlen; |
1418 | if (sscanf(ptr, "%u" , &val) != 1) |
1419 | ereport(ERROR, |
1420 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1421 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1422 | ptr = strchr(ptr, '\n'); |
1423 | if (!ptr) |
1424 | ereport(ERROR, |
1425 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1426 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1427 | *s = ptr + 1; |
1428 | return val; |
1429 | } |
1430 | |
1431 | static void |
1432 | parseVxidFromText(const char *prefix, char **s, const char *filename, |
1433 | VirtualTransactionId *vxid) |
1434 | { |
1435 | char *ptr = *s; |
1436 | int prefixlen = strlen(prefix); |
1437 | |
1438 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1439 | ereport(ERROR, |
1440 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1441 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1442 | ptr += prefixlen; |
1443 | if (sscanf(ptr, "%d/%u" , &vxid->backendId, &vxid->localTransactionId) != 2) |
1444 | ereport(ERROR, |
1445 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1446 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1447 | ptr = strchr(ptr, '\n'); |
1448 | if (!ptr) |
1449 | ereport(ERROR, |
1450 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1451 | errmsg("invalid snapshot data in file \"%s\"" , filename))); |
1452 | *s = ptr + 1; |
1453 | } |
1454 | |
1455 | /* |
1456 | * ImportSnapshot |
1457 | * Import a previously exported snapshot. The argument should be a |
1458 | * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file. |
1459 | * This is called by "SET TRANSACTION SNAPSHOT 'foo'". |
1460 | */ |
1461 | void |
1462 | ImportSnapshot(const char *idstr) |
1463 | { |
1464 | char path[MAXPGPATH]; |
1465 | FILE *f; |
1466 | struct stat stat_buf; |
1467 | char *filebuf; |
1468 | int xcnt; |
1469 | int i; |
1470 | VirtualTransactionId src_vxid; |
1471 | int src_pid; |
1472 | Oid src_dbid; |
1473 | int src_isolevel; |
1474 | bool src_readonly; |
1475 | SnapshotData snapshot; |
1476 | |
1477 | /* |
1478 | * Must be at top level of a fresh transaction. Note in particular that |
1479 | * we check we haven't acquired an XID --- if we have, it's conceivable |
1480 | * that the snapshot would show it as not running, making for very screwy |
1481 | * behavior. |
1482 | */ |
1483 | if (FirstSnapshotSet || |
1484 | GetTopTransactionIdIfAny() != InvalidTransactionId || |
1485 | IsSubTransaction()) |
1486 | ereport(ERROR, |
1487 | (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), |
1488 | errmsg("SET TRANSACTION SNAPSHOT must be called before any query" ))); |
1489 | |
1490 | /* |
1491 | * If we are in read committed mode then the next query would execute with |
1492 | * a new snapshot thus making this function call quite useless. |
1493 | */ |
1494 | if (!IsolationUsesXactSnapshot()) |
1495 | ereport(ERROR, |
1496 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1497 | errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ" ))); |
1498 | |
1499 | /* |
1500 | * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do |
1501 | * this mainly to prevent reading arbitrary files. |
1502 | */ |
1503 | if (strspn(idstr, "0123456789ABCDEF-" ) != strlen(idstr)) |
1504 | ereport(ERROR, |
1505 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
1506 | errmsg("invalid snapshot identifier: \"%s\"" , idstr))); |
1507 | |
1508 | /* OK, read the file */ |
1509 | snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s" , idstr); |
1510 | |
1511 | f = AllocateFile(path, PG_BINARY_R); |
1512 | if (!f) |
1513 | ereport(ERROR, |
1514 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
1515 | errmsg("invalid snapshot identifier: \"%s\"" , idstr))); |
1516 | |
1517 | /* get the size of the file so that we know how much memory we need */ |
1518 | if (fstat(fileno(f), &stat_buf)) |
1519 | elog(ERROR, "could not stat file \"%s\": %m" , path); |
1520 | |
1521 | /* and read the file into a palloc'd string */ |
1522 | filebuf = (char *) palloc(stat_buf.st_size + 1); |
1523 | if (fread(filebuf, stat_buf.st_size, 1, f) != 1) |
1524 | elog(ERROR, "could not read file \"%s\": %m" , path); |
1525 | |
1526 | filebuf[stat_buf.st_size] = '\0'; |
1527 | |
1528 | FreeFile(f); |
1529 | |
1530 | /* |
1531 | * Construct a snapshot struct by parsing the file content. |
1532 | */ |
1533 | memset(&snapshot, 0, sizeof(snapshot)); |
1534 | |
1535 | parseVxidFromText("vxid:" , &filebuf, path, &src_vxid); |
1536 | src_pid = parseIntFromText("pid:" , &filebuf, path); |
1537 | /* we abuse parseXidFromText a bit here ... */ |
1538 | src_dbid = parseXidFromText("dbid:" , &filebuf, path); |
1539 | src_isolevel = parseIntFromText("iso:" , &filebuf, path); |
1540 | src_readonly = parseIntFromText("ro:" , &filebuf, path); |
1541 | |
1542 | snapshot.snapshot_type = SNAPSHOT_MVCC; |
1543 | |
1544 | snapshot.xmin = parseXidFromText("xmin:" , &filebuf, path); |
1545 | snapshot.xmax = parseXidFromText("xmax:" , &filebuf, path); |
1546 | |
1547 | snapshot.xcnt = xcnt = parseIntFromText("xcnt:" , &filebuf, path); |
1548 | |
1549 | /* sanity-check the xid count before palloc */ |
1550 | if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount()) |
1551 | ereport(ERROR, |
1552 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1553 | errmsg("invalid snapshot data in file \"%s\"" , path))); |
1554 | |
1555 | snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); |
1556 | for (i = 0; i < xcnt; i++) |
1557 | snapshot.xip[i] = parseXidFromText("xip:" , &filebuf, path); |
1558 | |
1559 | snapshot.suboverflowed = parseIntFromText("sof:" , &filebuf, path); |
1560 | |
1561 | if (!snapshot.suboverflowed) |
1562 | { |
1563 | snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:" , &filebuf, path); |
1564 | |
1565 | /* sanity-check the xid count before palloc */ |
1566 | if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount()) |
1567 | ereport(ERROR, |
1568 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1569 | errmsg("invalid snapshot data in file \"%s\"" , path))); |
1570 | |
1571 | snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); |
1572 | for (i = 0; i < xcnt; i++) |
1573 | snapshot.subxip[i] = parseXidFromText("sxp:" , &filebuf, path); |
1574 | } |
1575 | else |
1576 | { |
1577 | snapshot.subxcnt = 0; |
1578 | snapshot.subxip = NULL; |
1579 | } |
1580 | |
1581 | snapshot.takenDuringRecovery = parseIntFromText("rec:" , &filebuf, path); |
1582 | |
1583 | /* |
1584 | * Do some additional sanity checking, just to protect ourselves. We |
1585 | * don't trouble to check the array elements, just the most critical |
1586 | * fields. |
1587 | */ |
1588 | if (!VirtualTransactionIdIsValid(src_vxid) || |
1589 | !OidIsValid(src_dbid) || |
1590 | !TransactionIdIsNormal(snapshot.xmin) || |
1591 | !TransactionIdIsNormal(snapshot.xmax)) |
1592 | ereport(ERROR, |
1593 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1594 | errmsg("invalid snapshot data in file \"%s\"" , path))); |
1595 | |
1596 | /* |
1597 | * If we're serializable, the source transaction must be too, otherwise |
1598 | * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a |
1599 | * non-read-only transaction can't adopt a snapshot from a read-only |
1600 | * transaction, as predicate.c handles the cases very differently. |
1601 | */ |
1602 | if (IsolationIsSerializable()) |
1603 | { |
1604 | if (src_isolevel != XACT_SERIALIZABLE) |
1605 | ereport(ERROR, |
1606 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1607 | errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction" ))); |
1608 | if (src_readonly && !XactReadOnly) |
1609 | ereport(ERROR, |
1610 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1611 | errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction" ))); |
1612 | } |
1613 | |
1614 | /* |
1615 | * We cannot import a snapshot that was taken in a different database, |
1616 | * because vacuum calculates OldestXmin on a per-database basis; so the |
1617 | * source transaction's xmin doesn't protect us from data loss. This |
1618 | * restriction could be removed if the source transaction were to mark its |
1619 | * xmin as being globally applicable. But that would require some |
1620 | * additional syntax, since that has to be known when the snapshot is |
1621 | * initially taken. (See pgsql-hackers discussion of 2011-10-21.) |
1622 | */ |
1623 | if (src_dbid != MyDatabaseId) |
1624 | ereport(ERROR, |
1625 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1626 | errmsg("cannot import a snapshot from a different database" ))); |
1627 | |
1628 | /* OK, install the snapshot */ |
1629 | SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL); |
1630 | } |
1631 | |
1632 | /* |
1633 | * XactHasExportedSnapshots |
1634 | * Test whether current transaction has exported any snapshots. |
1635 | */ |
1636 | bool |
1637 | XactHasExportedSnapshots(void) |
1638 | { |
1639 | return (exportedSnapshots != NIL); |
1640 | } |
1641 | |
1642 | /* |
1643 | * DeleteAllExportedSnapshotFiles |
1644 | * Clean up any files that have been left behind by a crashed backend |
1645 | * that had exported snapshots before it died. |
1646 | * |
1647 | * This should be called during database startup or crash recovery. |
1648 | */ |
1649 | void |
1650 | DeleteAllExportedSnapshotFiles(void) |
1651 | { |
1652 | char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)]; |
1653 | DIR *s_dir; |
1654 | struct dirent *s_de; |
1655 | |
1656 | /* |
1657 | * Problems in reading the directory, or unlinking files, are reported at |
1658 | * LOG level. Since we're running in the startup process, ERROR level |
1659 | * would prevent database start, and it's not important enough for that. |
1660 | */ |
1661 | s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR); |
1662 | |
1663 | while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL) |
1664 | { |
1665 | if (strcmp(s_de->d_name, "." ) == 0 || |
1666 | strcmp(s_de->d_name, ".." ) == 0) |
1667 | continue; |
1668 | |
1669 | snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s" , s_de->d_name); |
1670 | |
1671 | if (unlink(buf) != 0) |
1672 | ereport(LOG, |
1673 | (errcode_for_file_access(), |
1674 | errmsg("could not remove file \"%s\": %m" , buf))); |
1675 | } |
1676 | |
1677 | FreeDir(s_dir); |
1678 | } |
1679 | |
1680 | /* |
1681 | * ThereAreNoPriorRegisteredSnapshots |
1682 | * Is the registered snapshot count less than or equal to one? |
1683 | * |
1684 | * Don't use this to settle important decisions. While zero registrations and |
1685 | * no ActiveSnapshot would confirm a certain idleness, the system makes no |
1686 | * guarantees about the significance of one registered snapshot. |
1687 | */ |
1688 | bool |
1689 | ThereAreNoPriorRegisteredSnapshots(void) |
1690 | { |
1691 | if (pairingheap_is_empty(&RegisteredSnapshots) || |
1692 | pairingheap_is_singular(&RegisteredSnapshots)) |
1693 | return true; |
1694 | |
1695 | return false; |
1696 | } |
1697 | |
1698 | |
1699 | /* |
1700 | * Return a timestamp that is exactly on a minute boundary. |
1701 | * |
1702 | * If the argument is already aligned, return that value, otherwise move to |
1703 | * the next minute boundary following the given time. |
1704 | */ |
1705 | static TimestampTz |
1706 | AlignTimestampToMinuteBoundary(TimestampTz ts) |
1707 | { |
1708 | TimestampTz retval = ts + (USECS_PER_MINUTE - 1); |
1709 | |
1710 | return retval - (retval % USECS_PER_MINUTE); |
1711 | } |
1712 | |
1713 | /* |
1714 | * Get current timestamp for snapshots |
1715 | * |
1716 | * This is basically GetCurrentTimestamp(), but with a guarantee that |
1717 | * the result never moves backward. |
1718 | */ |
1719 | TimestampTz |
1720 | GetSnapshotCurrentTimestamp(void) |
1721 | { |
1722 | TimestampTz now = GetCurrentTimestamp(); |
1723 | |
1724 | /* |
1725 | * Don't let time move backward; if it hasn't advanced, use the old value. |
1726 | */ |
1727 | SpinLockAcquire(&oldSnapshotControl->mutex_current); |
1728 | if (now <= oldSnapshotControl->current_timestamp) |
1729 | now = oldSnapshotControl->current_timestamp; |
1730 | else |
1731 | oldSnapshotControl->current_timestamp = now; |
1732 | SpinLockRelease(&oldSnapshotControl->mutex_current); |
1733 | |
1734 | return now; |
1735 | } |
1736 | |
1737 | /* |
1738 | * Get timestamp through which vacuum may have processed based on last stored |
1739 | * value for threshold_timestamp. |
1740 | * |
1741 | * XXX: So far, we never trust that a 64-bit value can be read atomically; if |
1742 | * that ever changes, we could get rid of the spinlock here. |
1743 | */ |
1744 | TimestampTz |
1745 | GetOldSnapshotThresholdTimestamp(void) |
1746 | { |
1747 | TimestampTz threshold_timestamp; |
1748 | |
1749 | SpinLockAcquire(&oldSnapshotControl->mutex_threshold); |
1750 | threshold_timestamp = oldSnapshotControl->threshold_timestamp; |
1751 | SpinLockRelease(&oldSnapshotControl->mutex_threshold); |
1752 | |
1753 | return threshold_timestamp; |
1754 | } |
1755 | |
1756 | static void |
1757 | SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit) |
1758 | { |
1759 | SpinLockAcquire(&oldSnapshotControl->mutex_threshold); |
1760 | oldSnapshotControl->threshold_timestamp = ts; |
1761 | oldSnapshotControl->threshold_xid = xlimit; |
1762 | SpinLockRelease(&oldSnapshotControl->mutex_threshold); |
1763 | } |
1764 | |
1765 | /* |
1766 | * TransactionIdLimitedForOldSnapshots |
1767 | * |
1768 | * Apply old snapshot limit, if any. This is intended to be called for page |
1769 | * pruning and table vacuuming, to allow old_snapshot_threshold to override |
1770 | * the normal global xmin value. Actual testing for snapshot too old will be |
1771 | * based on whether a snapshot timestamp is prior to the threshold timestamp |
1772 | * set in this function. |
1773 | */ |
1774 | TransactionId |
1775 | TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, |
1776 | Relation relation) |
1777 | { |
1778 | if (TransactionIdIsNormal(recentXmin) |
1779 | && old_snapshot_threshold >= 0 |
1780 | && RelationAllowsEarlyPruning(relation)) |
1781 | { |
1782 | TimestampTz ts = GetSnapshotCurrentTimestamp(); |
1783 | TransactionId xlimit = recentXmin; |
1784 | TransactionId latest_xmin; |
1785 | TimestampTz update_ts; |
1786 | bool same_ts_as_threshold = false; |
1787 | |
1788 | SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin); |
1789 | latest_xmin = oldSnapshotControl->latest_xmin; |
1790 | update_ts = oldSnapshotControl->next_map_update; |
1791 | SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin); |
1792 | |
1793 | /* |
1794 | * Zero threshold always overrides to latest xmin, if valid. Without |
1795 | * some heuristic it will find its own snapshot too old on, for |
1796 | * example, a simple UPDATE -- which would make it useless for most |
1797 | * testing, but there is no principled way to ensure that it doesn't |
1798 | * fail in this way. Use a five-second delay to try to get useful |
1799 | * testing behavior, but this may need adjustment. |
1800 | */ |
1801 | if (old_snapshot_threshold == 0) |
1802 | { |
1803 | if (TransactionIdPrecedes(latest_xmin, MyPgXact->xmin) |
1804 | && TransactionIdFollows(latest_xmin, xlimit)) |
1805 | xlimit = latest_xmin; |
1806 | |
1807 | ts -= 5 * USECS_PER_SEC; |
1808 | SetOldSnapshotThresholdTimestamp(ts, xlimit); |
1809 | |
1810 | return xlimit; |
1811 | } |
1812 | |
1813 | ts = AlignTimestampToMinuteBoundary(ts) |
1814 | - (old_snapshot_threshold * USECS_PER_MINUTE); |
1815 | |
1816 | /* Check for fast exit without LW locking. */ |
1817 | SpinLockAcquire(&oldSnapshotControl->mutex_threshold); |
1818 | if (ts == oldSnapshotControl->threshold_timestamp) |
1819 | { |
1820 | xlimit = oldSnapshotControl->threshold_xid; |
1821 | same_ts_as_threshold = true; |
1822 | } |
1823 | SpinLockRelease(&oldSnapshotControl->mutex_threshold); |
1824 | |
1825 | if (!same_ts_as_threshold) |
1826 | { |
1827 | if (ts == update_ts) |
1828 | { |
1829 | xlimit = latest_xmin; |
1830 | if (NormalTransactionIdFollows(xlimit, recentXmin)) |
1831 | SetOldSnapshotThresholdTimestamp(ts, xlimit); |
1832 | } |
1833 | else |
1834 | { |
1835 | LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED); |
1836 | |
1837 | if (oldSnapshotControl->count_used > 0 |
1838 | && ts >= oldSnapshotControl->head_timestamp) |
1839 | { |
1840 | int offset; |
1841 | |
1842 | offset = ((ts - oldSnapshotControl->head_timestamp) |
1843 | / USECS_PER_MINUTE); |
1844 | if (offset > oldSnapshotControl->count_used - 1) |
1845 | offset = oldSnapshotControl->count_used - 1; |
1846 | offset = (oldSnapshotControl->head_offset + offset) |
1847 | % OLD_SNAPSHOT_TIME_MAP_ENTRIES; |
1848 | xlimit = oldSnapshotControl->xid_by_minute[offset]; |
1849 | |
1850 | if (NormalTransactionIdFollows(xlimit, recentXmin)) |
1851 | SetOldSnapshotThresholdTimestamp(ts, xlimit); |
1852 | } |
1853 | |
1854 | LWLockRelease(OldSnapshotTimeMapLock); |
1855 | } |
1856 | } |
1857 | |
1858 | /* |
1859 | * Failsafe protection against vacuuming work of active transaction. |
1860 | * |
1861 | * This is not an assertion because we avoid the spinlock for |
1862 | * performance, leaving open the possibility that xlimit could advance |
1863 | * and be more current; but it seems prudent to apply this limit. It |
1864 | * might make pruning a tiny bit less aggressive than it could be, but |
1865 | * protects against data loss bugs. |
1866 | */ |
1867 | if (TransactionIdIsNormal(latest_xmin) |
1868 | && TransactionIdPrecedes(latest_xmin, xlimit)) |
1869 | xlimit = latest_xmin; |
1870 | |
1871 | if (NormalTransactionIdFollows(xlimit, recentXmin)) |
1872 | return xlimit; |
1873 | } |
1874 | |
1875 | return recentXmin; |
1876 | } |
1877 | |
1878 | /* |
1879 | * Take care of the circular buffer that maps time to xid. |
1880 | */ |
1881 | void |
1882 | MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) |
1883 | { |
1884 | TimestampTz ts; |
1885 | TransactionId latest_xmin; |
1886 | TimestampTz update_ts; |
1887 | bool map_update_required = false; |
1888 | |
1889 | /* Never call this function when old snapshot checking is disabled. */ |
1890 | Assert(old_snapshot_threshold >= 0); |
1891 | |
1892 | ts = AlignTimestampToMinuteBoundary(whenTaken); |
1893 | |
1894 | /* |
1895 | * Keep track of the latest xmin seen by any process. Update mapping with |
1896 | * a new value when we have crossed a bucket boundary. |
1897 | */ |
1898 | SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin); |
1899 | latest_xmin = oldSnapshotControl->latest_xmin; |
1900 | update_ts = oldSnapshotControl->next_map_update; |
1901 | if (ts > update_ts) |
1902 | { |
1903 | oldSnapshotControl->next_map_update = ts; |
1904 | map_update_required = true; |
1905 | } |
1906 | if (TransactionIdFollows(xmin, latest_xmin)) |
1907 | oldSnapshotControl->latest_xmin = xmin; |
1908 | SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin); |
1909 | |
1910 | /* We only needed to update the most recent xmin value. */ |
1911 | if (!map_update_required) |
1912 | return; |
1913 | |
1914 | /* No further tracking needed for 0 (used for testing). */ |
1915 | if (old_snapshot_threshold == 0) |
1916 | return; |
1917 | |
1918 | /* |
1919 | * We don't want to do something stupid with unusual values, but we don't |
1920 | * want to litter the log with warnings or break otherwise normal |
1921 | * processing for this feature; so if something seems unreasonable, just |
1922 | * log at DEBUG level and return without doing anything. |
1923 | */ |
1924 | if (whenTaken < 0) |
1925 | { |
1926 | elog(DEBUG1, |
1927 | "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld" , |
1928 | (long) whenTaken); |
1929 | return; |
1930 | } |
1931 | if (!TransactionIdIsNormal(xmin)) |
1932 | { |
1933 | elog(DEBUG1, |
1934 | "MaintainOldSnapshotTimeMapping called with xmin = %lu" , |
1935 | (unsigned long) xmin); |
1936 | return; |
1937 | } |
1938 | |
1939 | LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE); |
1940 | |
1941 | Assert(oldSnapshotControl->head_offset >= 0); |
1942 | Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES); |
1943 | Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0); |
1944 | Assert(oldSnapshotControl->count_used >= 0); |
1945 | Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES); |
1946 | |
1947 | if (oldSnapshotControl->count_used == 0) |
1948 | { |
1949 | /* set up first entry for empty mapping */ |
1950 | oldSnapshotControl->head_offset = 0; |
1951 | oldSnapshotControl->head_timestamp = ts; |
1952 | oldSnapshotControl->count_used = 1; |
1953 | oldSnapshotControl->xid_by_minute[0] = xmin; |
1954 | } |
1955 | else if (ts < oldSnapshotControl->head_timestamp) |
1956 | { |
1957 | /* old ts; log it at DEBUG */ |
1958 | LWLockRelease(OldSnapshotTimeMapLock); |
1959 | elog(DEBUG1, |
1960 | "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld" , |
1961 | (long) whenTaken); |
1962 | return; |
1963 | } |
1964 | else if (ts <= (oldSnapshotControl->head_timestamp + |
1965 | ((oldSnapshotControl->count_used - 1) |
1966 | * USECS_PER_MINUTE))) |
1967 | { |
1968 | /* existing mapping; advance xid if possible */ |
1969 | int bucket = (oldSnapshotControl->head_offset |
1970 | + ((ts - oldSnapshotControl->head_timestamp) |
1971 | / USECS_PER_MINUTE)) |
1972 | % OLD_SNAPSHOT_TIME_MAP_ENTRIES; |
1973 | |
1974 | if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin)) |
1975 | oldSnapshotControl->xid_by_minute[bucket] = xmin; |
1976 | } |
1977 | else |
1978 | { |
1979 | /* We need a new bucket, but it might not be the very next one. */ |
1980 | int advance = ((ts - oldSnapshotControl->head_timestamp) |
1981 | / USECS_PER_MINUTE); |
1982 | |
1983 | oldSnapshotControl->head_timestamp = ts; |
1984 | |
1985 | if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES) |
1986 | { |
1987 | /* Advance is so far that all old data is junk; start over. */ |
1988 | oldSnapshotControl->head_offset = 0; |
1989 | oldSnapshotControl->count_used = 1; |
1990 | oldSnapshotControl->xid_by_minute[0] = xmin; |
1991 | } |
1992 | else |
1993 | { |
1994 | /* Store the new value in one or more buckets. */ |
1995 | int i; |
1996 | |
1997 | for (i = 0; i < advance; i++) |
1998 | { |
1999 | if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES) |
2000 | { |
2001 | /* Map full and new value replaces old head. */ |
2002 | int old_head = oldSnapshotControl->head_offset; |
2003 | |
2004 | if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1)) |
2005 | oldSnapshotControl->head_offset = 0; |
2006 | else |
2007 | oldSnapshotControl->head_offset = old_head + 1; |
2008 | oldSnapshotControl->xid_by_minute[old_head] = xmin; |
2009 | } |
2010 | else |
2011 | { |
2012 | /* Extend map to unused entry. */ |
2013 | int new_tail = (oldSnapshotControl->head_offset |
2014 | + oldSnapshotControl->count_used) |
2015 | % OLD_SNAPSHOT_TIME_MAP_ENTRIES; |
2016 | |
2017 | oldSnapshotControl->count_used++; |
2018 | oldSnapshotControl->xid_by_minute[new_tail] = xmin; |
2019 | } |
2020 | } |
2021 | } |
2022 | } |
2023 | |
2024 | LWLockRelease(OldSnapshotTimeMapLock); |
2025 | } |
2026 | |
2027 | |
2028 | /* |
2029 | * Setup a snapshot that replaces normal catalog snapshots that allows catalog |
2030 | * access to behave just like it did at a certain point in the past. |
2031 | * |
2032 | * Needed for logical decoding. |
2033 | */ |
2034 | void |
2035 | SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids) |
2036 | { |
2037 | Assert(historic_snapshot != NULL); |
2038 | |
2039 | /* setup the timetravel snapshot */ |
2040 | HistoricSnapshot = historic_snapshot; |
2041 | |
2042 | /* setup (cmin, cmax) lookup hash */ |
2043 | tuplecid_data = tuplecids; |
2044 | } |
2045 | |
2046 | |
2047 | /* |
2048 | * Make catalog snapshots behave normally again. |
2049 | */ |
2050 | void |
2051 | TeardownHistoricSnapshot(bool is_error) |
2052 | { |
2053 | HistoricSnapshot = NULL; |
2054 | tuplecid_data = NULL; |
2055 | } |
2056 | |
2057 | bool |
2058 | HistoricSnapshotActive(void) |
2059 | { |
2060 | return HistoricSnapshot != NULL; |
2061 | } |
2062 | |
2063 | HTAB * |
2064 | HistoricSnapshotGetTupleCids(void) |
2065 | { |
2066 | Assert(HistoricSnapshotActive()); |
2067 | return tuplecid_data; |
2068 | } |
2069 | |
2070 | /* |
2071 | * EstimateSnapshotSpace |
2072 | * Returns the size needed to store the given snapshot. |
2073 | * |
2074 | * We are exporting only required fields from the Snapshot, stored in |
2075 | * SerializedSnapshotData. |
2076 | */ |
2077 | Size |
2078 | EstimateSnapshotSpace(Snapshot snap) |
2079 | { |
2080 | Size size; |
2081 | |
2082 | Assert(snap != InvalidSnapshot); |
2083 | Assert(snap->snapshot_type == SNAPSHOT_MVCC); |
2084 | |
2085 | /* We allocate any XID arrays needed in the same palloc block. */ |
2086 | size = add_size(sizeof(SerializedSnapshotData), |
2087 | mul_size(snap->xcnt, sizeof(TransactionId))); |
2088 | if (snap->subxcnt > 0 && |
2089 | (!snap->suboverflowed || snap->takenDuringRecovery)) |
2090 | size = add_size(size, |
2091 | mul_size(snap->subxcnt, sizeof(TransactionId))); |
2092 | |
2093 | return size; |
2094 | } |
2095 | |
2096 | /* |
2097 | * SerializeSnapshot |
2098 | * Dumps the serialized snapshot (extracted from given snapshot) onto the |
2099 | * memory location at start_address. |
2100 | */ |
2101 | void |
2102 | SerializeSnapshot(Snapshot snapshot, char *start_address) |
2103 | { |
2104 | SerializedSnapshotData serialized_snapshot; |
2105 | |
2106 | Assert(snapshot->subxcnt >= 0); |
2107 | |
2108 | /* Copy all required fields */ |
2109 | serialized_snapshot.xmin = snapshot->xmin; |
2110 | serialized_snapshot.xmax = snapshot->xmax; |
2111 | serialized_snapshot.xcnt = snapshot->xcnt; |
2112 | serialized_snapshot.subxcnt = snapshot->subxcnt; |
2113 | serialized_snapshot.suboverflowed = snapshot->suboverflowed; |
2114 | serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery; |
2115 | serialized_snapshot.curcid = snapshot->curcid; |
2116 | serialized_snapshot.whenTaken = snapshot->whenTaken; |
2117 | serialized_snapshot.lsn = snapshot->lsn; |
2118 | |
2119 | /* |
2120 | * Ignore the SubXID array if it has overflowed, unless the snapshot was |
2121 | * taken during recovery - in that case, top-level XIDs are in subxip as |
2122 | * well, and we mustn't lose them. |
2123 | */ |
2124 | if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery) |
2125 | serialized_snapshot.subxcnt = 0; |
2126 | |
2127 | /* Copy struct to possibly-unaligned buffer */ |
2128 | memcpy(start_address, |
2129 | &serialized_snapshot, sizeof(SerializedSnapshotData)); |
2130 | |
2131 | /* Copy XID array */ |
2132 | if (snapshot->xcnt > 0) |
2133 | memcpy((TransactionId *) (start_address + |
2134 | sizeof(SerializedSnapshotData)), |
2135 | snapshot->xip, snapshot->xcnt * sizeof(TransactionId)); |
2136 | |
2137 | /* |
2138 | * Copy SubXID array. Don't bother to copy it if it had overflowed, |
2139 | * though, because it's not used anywhere in that case. Except if it's a |
2140 | * snapshot taken during recovery; all the top-level XIDs are in subxip as |
2141 | * well in that case, so we mustn't lose them. |
2142 | */ |
2143 | if (serialized_snapshot.subxcnt > 0) |
2144 | { |
2145 | Size subxipoff = sizeof(SerializedSnapshotData) + |
2146 | snapshot->xcnt * sizeof(TransactionId); |
2147 | |
2148 | memcpy((TransactionId *) (start_address + subxipoff), |
2149 | snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId)); |
2150 | } |
2151 | } |
2152 | |
2153 | /* |
2154 | * RestoreSnapshot |
2155 | * Restore a serialized snapshot from the specified address. |
2156 | * |
2157 | * The copy is palloc'd in TopTransactionContext and has initial refcounts set |
2158 | * to 0. The returned snapshot has the copied flag set. |
2159 | */ |
2160 | Snapshot |
2161 | RestoreSnapshot(char *start_address) |
2162 | { |
2163 | SerializedSnapshotData serialized_snapshot; |
2164 | Size size; |
2165 | Snapshot snapshot; |
2166 | TransactionId *serialized_xids; |
2167 | |
2168 | memcpy(&serialized_snapshot, start_address, |
2169 | sizeof(SerializedSnapshotData)); |
2170 | serialized_xids = (TransactionId *) |
2171 | (start_address + sizeof(SerializedSnapshotData)); |
2172 | |
2173 | /* We allocate any XID arrays needed in the same palloc block. */ |
2174 | size = sizeof(SnapshotData) |
2175 | + serialized_snapshot.xcnt * sizeof(TransactionId) |
2176 | + serialized_snapshot.subxcnt * sizeof(TransactionId); |
2177 | |
2178 | /* Copy all required fields */ |
2179 | snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); |
2180 | snapshot->snapshot_type = SNAPSHOT_MVCC; |
2181 | snapshot->xmin = serialized_snapshot.xmin; |
2182 | snapshot->xmax = serialized_snapshot.xmax; |
2183 | snapshot->xip = NULL; |
2184 | snapshot->xcnt = serialized_snapshot.xcnt; |
2185 | snapshot->subxip = NULL; |
2186 | snapshot->subxcnt = serialized_snapshot.subxcnt; |
2187 | snapshot->suboverflowed = serialized_snapshot.suboverflowed; |
2188 | snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery; |
2189 | snapshot->curcid = serialized_snapshot.curcid; |
2190 | snapshot->whenTaken = serialized_snapshot.whenTaken; |
2191 | snapshot->lsn = serialized_snapshot.lsn; |
2192 | |
2193 | /* Copy XIDs, if present. */ |
2194 | if (serialized_snapshot.xcnt > 0) |
2195 | { |
2196 | snapshot->xip = (TransactionId *) (snapshot + 1); |
2197 | memcpy(snapshot->xip, serialized_xids, |
2198 | serialized_snapshot.xcnt * sizeof(TransactionId)); |
2199 | } |
2200 | |
2201 | /* Copy SubXIDs, if present. */ |
2202 | if (serialized_snapshot.subxcnt > 0) |
2203 | { |
2204 | snapshot->subxip = ((TransactionId *) (snapshot + 1)) + |
2205 | serialized_snapshot.xcnt; |
2206 | memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt, |
2207 | serialized_snapshot.subxcnt * sizeof(TransactionId)); |
2208 | } |
2209 | |
2210 | /* Set the copied flag so that the caller will set refcounts correctly. */ |
2211 | snapshot->regd_count = 0; |
2212 | snapshot->active_count = 0; |
2213 | snapshot->copied = true; |
2214 | |
2215 | return snapshot; |
2216 | } |
2217 | |
2218 | /* |
2219 | * Install a restored snapshot as the transaction snapshot. |
2220 | * |
2221 | * The second argument is of type void * so that snapmgr.h need not include |
2222 | * the declaration for PGPROC. |
2223 | */ |
2224 | void |
2225 | RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc) |
2226 | { |
2227 | SetTransactionSnapshot(snapshot, NULL, InvalidPid, master_pgproc); |
2228 | } |
2229 | |
2230 | /* |
2231 | * XidInMVCCSnapshot |
2232 | * Is the given XID still-in-progress according to the snapshot? |
2233 | * |
2234 | * Note: GetSnapshotData never stores either top xid or subxids of our own |
2235 | * backend into a snapshot, so these xids will not be reported as "running" |
2236 | * by this function. This is OK for current uses, because we always check |
2237 | * TransactionIdIsCurrentTransactionId first, except when it's known the |
2238 | * XID could not be ours anyway. |
2239 | */ |
2240 | bool |
2241 | XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) |
2242 | { |
2243 | uint32 i; |
2244 | |
2245 | /* |
2246 | * Make a quick range check to eliminate most XIDs without looking at the |
2247 | * xip arrays. Note that this is OK even if we convert a subxact XID to |
2248 | * its parent below, because a subxact with XID < xmin has surely also got |
2249 | * a parent with XID < xmin, while one with XID >= xmax must belong to a |
2250 | * parent that was not yet committed at the time of this snapshot. |
2251 | */ |
2252 | |
2253 | /* Any xid < xmin is not in-progress */ |
2254 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
2255 | return false; |
2256 | /* Any xid >= xmax is in-progress */ |
2257 | if (TransactionIdFollowsOrEquals(xid, snapshot->xmax)) |
2258 | return true; |
2259 | |
2260 | /* |
2261 | * Snapshot information is stored slightly differently in snapshots taken |
2262 | * during recovery. |
2263 | */ |
2264 | if (!snapshot->takenDuringRecovery) |
2265 | { |
2266 | /* |
2267 | * If the snapshot contains full subxact data, the fastest way to |
2268 | * check things is just to compare the given XID against both subxact |
2269 | * XIDs and top-level XIDs. If the snapshot overflowed, we have to |
2270 | * use pg_subtrans to convert a subxact XID to its parent XID, but |
2271 | * then we need only look at top-level XIDs not subxacts. |
2272 | */ |
2273 | if (!snapshot->suboverflowed) |
2274 | { |
2275 | /* we have full data, so search subxip */ |
2276 | int32 j; |
2277 | |
2278 | for (j = 0; j < snapshot->subxcnt; j++) |
2279 | { |
2280 | if (TransactionIdEquals(xid, snapshot->subxip[j])) |
2281 | return true; |
2282 | } |
2283 | |
2284 | /* not there, fall through to search xip[] */ |
2285 | } |
2286 | else |
2287 | { |
2288 | /* |
2289 | * Snapshot overflowed, so convert xid to top-level. This is safe |
2290 | * because we eliminated too-old XIDs above. |
2291 | */ |
2292 | xid = SubTransGetTopmostTransaction(xid); |
2293 | |
2294 | /* |
2295 | * If xid was indeed a subxact, we might now have an xid < xmin, |
2296 | * so recheck to avoid an array scan. No point in rechecking |
2297 | * xmax. |
2298 | */ |
2299 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
2300 | return false; |
2301 | } |
2302 | |
2303 | for (i = 0; i < snapshot->xcnt; i++) |
2304 | { |
2305 | if (TransactionIdEquals(xid, snapshot->xip[i])) |
2306 | return true; |
2307 | } |
2308 | } |
2309 | else |
2310 | { |
2311 | int32 j; |
2312 | |
2313 | /* |
2314 | * In recovery we store all xids in the subxact array because it is by |
2315 | * far the bigger array, and we mostly don't know which xids are |
2316 | * top-level and which are subxacts. The xip array is empty. |
2317 | * |
2318 | * We start by searching subtrans, if we overflowed. |
2319 | */ |
2320 | if (snapshot->suboverflowed) |
2321 | { |
2322 | /* |
2323 | * Snapshot overflowed, so convert xid to top-level. This is safe |
2324 | * because we eliminated too-old XIDs above. |
2325 | */ |
2326 | xid = SubTransGetTopmostTransaction(xid); |
2327 | |
2328 | /* |
2329 | * If xid was indeed a subxact, we might now have an xid < xmin, |
2330 | * so recheck to avoid an array scan. No point in rechecking |
2331 | * xmax. |
2332 | */ |
2333 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
2334 | return false; |
2335 | } |
2336 | |
2337 | /* |
2338 | * We now have either a top-level xid higher than xmin or an |
2339 | * indeterminate xid. We don't know whether it's top level or subxact |
2340 | * but it doesn't matter. If it's present, the xid is visible. |
2341 | */ |
2342 | for (j = 0; j < snapshot->subxcnt; j++) |
2343 | { |
2344 | if (TransactionIdEquals(xid, snapshot->subxip[j])) |
2345 | return true; |
2346 | } |
2347 | } |
2348 | |
2349 | return false; |
2350 | } |
2351 | |