1 | /* |
2 | * reorderbuffer.h |
3 | * PostgreSQL logical replay/reorder buffer management. |
4 | * |
5 | * Copyright (c) 2012-2019, PostgreSQL Global Development Group |
6 | * |
7 | * src/include/replication/reorderbuffer.h |
8 | */ |
9 | #ifndef REORDERBUFFER_H |
10 | #define REORDERBUFFER_H |
11 | |
12 | #include "access/htup_details.h" |
13 | #include "lib/ilist.h" |
14 | #include "storage/sinval.h" |
15 | #include "utils/hsearch.h" |
16 | #include "utils/relcache.h" |
17 | #include "utils/snapshot.h" |
18 | #include "utils/timestamp.h" |
19 | |
20 | /* an individual tuple, stored in one chunk of memory */ |
21 | typedef struct ReorderBufferTupleBuf |
22 | { |
23 | /* position in preallocated list */ |
24 | slist_node node; |
25 | |
26 | /* tuple header, the interesting bit for users of logical decoding */ |
27 | HeapTupleData tuple; |
28 | |
29 | /* pre-allocated size of tuple buffer, different from tuple size */ |
30 | Size alloc_tuple_size; |
31 | |
32 | /* actual tuple data follows */ |
33 | } ReorderBufferTupleBuf; |
34 | |
35 | /* pointer to the data stored in a TupleBuf */ |
36 | #define ReorderBufferTupleBufData(p) \ |
37 | ((HeapTupleHeader) MAXALIGN(((char *) p) + sizeof(ReorderBufferTupleBuf))) |
38 | |
39 | /* |
40 | * Types of the change passed to a 'change' callback. |
41 | * |
42 | * For efficiency and simplicity reasons we want to keep Snapshots, CommandIds |
43 | * and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE |
44 | * changes. Users of the decoding facilities will never see changes with |
45 | * *_INTERNAL_* actions. |
46 | * |
47 | * The INTERNAL_SPEC_INSERT and INTERNAL_SPEC_CONFIRM changes concern |
48 | * "speculative insertions", and their confirmation respectively. They're |
49 | * used by INSERT .. ON CONFLICT .. UPDATE. Users of logical decoding don't |
50 | * have to care about these. |
51 | */ |
52 | enum ReorderBufferChangeType |
53 | { |
54 | REORDER_BUFFER_CHANGE_INSERT, |
55 | REORDER_BUFFER_CHANGE_UPDATE, |
56 | REORDER_BUFFER_CHANGE_DELETE, |
57 | REORDER_BUFFER_CHANGE_MESSAGE, |
58 | REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, |
59 | REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, |
60 | REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, |
61 | REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, |
62 | REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, |
63 | REORDER_BUFFER_CHANGE_TRUNCATE |
64 | }; |
65 | |
66 | /* |
67 | * a single 'change', can be an insert (with one tuple), an update (old, new), |
68 | * or a delete (old). |
69 | * |
70 | * The same struct is also used internally for other purposes but that should |
71 | * never be visible outside reorderbuffer.c. |
72 | */ |
73 | typedef struct ReorderBufferChange |
74 | { |
75 | XLogRecPtr lsn; |
76 | |
77 | /* The type of change. */ |
78 | enum ReorderBufferChangeType action; |
79 | |
80 | RepOriginId origin_id; |
81 | |
82 | /* |
83 | * Context data for the change. Which part of the union is valid depends |
84 | * on action. |
85 | */ |
86 | union |
87 | { |
88 | /* Old, new tuples when action == *_INSERT|UPDATE|DELETE */ |
89 | struct |
90 | { |
91 | /* relation that has been changed */ |
92 | RelFileNode relnode; |
93 | |
94 | /* no previously reassembled toast chunks are necessary anymore */ |
95 | bool clear_toast_afterwards; |
96 | |
97 | /* valid for DELETE || UPDATE */ |
98 | ReorderBufferTupleBuf *oldtuple; |
99 | /* valid for INSERT || UPDATE */ |
100 | ReorderBufferTupleBuf *newtuple; |
101 | } tp; |
102 | |
103 | /* |
104 | * Truncate data for REORDER_BUFFER_CHANGE_TRUNCATE representing one |
105 | * set of relations to be truncated. |
106 | */ |
107 | struct |
108 | { |
109 | Size nrelids; |
110 | bool cascade; |
111 | bool restart_seqs; |
112 | Oid *relids; |
113 | } truncate; |
114 | |
115 | /* Message with arbitrary data. */ |
116 | struct |
117 | { |
118 | char *prefix; |
119 | Size message_size; |
120 | char *message; |
121 | } msg; |
122 | |
123 | /* New snapshot, set when action == *_INTERNAL_SNAPSHOT */ |
124 | Snapshot snapshot; |
125 | |
126 | /* |
127 | * New command id for existing snapshot in a catalog changing tx. Set |
128 | * when action == *_INTERNAL_COMMAND_ID. |
129 | */ |
130 | CommandId command_id; |
131 | |
132 | /* |
133 | * New cid mapping for catalog changing transaction, set when action |
134 | * == *_INTERNAL_TUPLECID. |
135 | */ |
136 | struct |
137 | { |
138 | RelFileNode node; |
139 | ItemPointerData tid; |
140 | CommandId cmin; |
141 | CommandId cmax; |
142 | CommandId combocid; |
143 | } tuplecid; |
144 | } data; |
145 | |
146 | /* |
147 | * While in use this is how a change is linked into a transactions, |
148 | * otherwise it's the preallocated list. |
149 | */ |
150 | dlist_node node; |
151 | } ReorderBufferChange; |
152 | |
153 | typedef struct ReorderBufferTXN |
154 | { |
155 | /* |
156 | * The transactions transaction id, can be a toplevel or sub xid. |
157 | */ |
158 | TransactionId xid; |
159 | |
160 | /* did the TX have catalog changes */ |
161 | bool has_catalog_changes; |
162 | |
163 | /* Do we know this is a subxact? Xid of top-level txn if so */ |
164 | bool is_known_as_subxact; |
165 | TransactionId toplevel_xid; |
166 | |
167 | /* |
168 | * LSN of the first data carrying, WAL record with knowledge about this |
169 | * xid. This is allowed to *not* be first record adorned with this xid, if |
170 | * the previous records aren't relevant for logical decoding. |
171 | */ |
172 | XLogRecPtr first_lsn; |
173 | |
174 | /* ---- |
175 | * LSN of the record that lead to this xact to be committed or |
176 | * aborted. This can be a |
177 | * * plain commit record |
178 | * * plain commit record, of a parent transaction |
179 | * * prepared transaction commit |
180 | * * plain abort record |
181 | * * prepared transaction abort |
182 | * * error during decoding |
183 | * * for a crashed transaction, the LSN of the last change, regardless of |
184 | * what it was. |
185 | * ---- |
186 | */ |
187 | XLogRecPtr final_lsn; |
188 | |
189 | /* |
190 | * LSN pointing to the end of the commit record + 1. |
191 | */ |
192 | XLogRecPtr end_lsn; |
193 | |
194 | /* |
195 | * LSN of the last lsn at which snapshot information reside, so we can |
196 | * restart decoding from there and fully recover this transaction from |
197 | * WAL. |
198 | */ |
199 | XLogRecPtr restart_decoding_lsn; |
200 | |
201 | /* origin of the change that caused this transaction */ |
202 | RepOriginId origin_id; |
203 | XLogRecPtr origin_lsn; |
204 | |
205 | /* |
206 | * Commit time, only known when we read the actual commit record. |
207 | */ |
208 | TimestampTz commit_time; |
209 | |
210 | /* |
211 | * The base snapshot is used to decode all changes until either this |
212 | * transaction modifies the catalog, or another catalog-modifying |
213 | * transaction commits. |
214 | */ |
215 | Snapshot base_snapshot; |
216 | XLogRecPtr base_snapshot_lsn; |
217 | dlist_node base_snapshot_node; /* link in txns_by_base_snapshot_lsn */ |
218 | |
219 | /* |
220 | * How many ReorderBufferChange's do we have in this txn. |
221 | * |
222 | * Changes in subtransactions are *not* included but tracked separately. |
223 | */ |
224 | uint64 nentries; |
225 | |
226 | /* |
227 | * How many of the above entries are stored in memory in contrast to being |
228 | * spilled to disk. |
229 | */ |
230 | uint64 nentries_mem; |
231 | |
232 | /* |
233 | * Has this transaction been spilled to disk? It's not always possible to |
234 | * deduce that fact by comparing nentries with nentries_mem, because e.g. |
235 | * subtransactions of a large transaction might get serialized together |
236 | * with the parent - if they're restored to memory they'd have |
237 | * nentries_mem == nentries. |
238 | */ |
239 | bool serialized; |
240 | |
241 | /* |
242 | * List of ReorderBufferChange structs, including new Snapshots and new |
243 | * CommandIds |
244 | */ |
245 | dlist_head changes; |
246 | |
247 | /* |
248 | * List of (relation, ctid) => (cmin, cmax) mappings for catalog tuples. |
249 | * Those are always assigned to the toplevel transaction. (Keep track of |
250 | * #entries to create a hash of the right size) |
251 | */ |
252 | dlist_head tuplecids; |
253 | uint64 ntuplecids; |
254 | |
255 | /* |
256 | * On-demand built hash for looking up the above values. |
257 | */ |
258 | HTAB *tuplecid_hash; |
259 | |
260 | /* |
261 | * Hash containing (potentially partial) toast entries. NULL if no toast |
262 | * tuples have been found for the current change. |
263 | */ |
264 | HTAB *toast_hash; |
265 | |
266 | /* |
267 | * non-hierarchical list of subtransactions that are *not* aborted. Only |
268 | * used in toplevel transactions. |
269 | */ |
270 | dlist_head subtxns; |
271 | uint32 nsubtxns; |
272 | |
273 | /* |
274 | * Stored cache invalidations. This is not a linked list because we get |
275 | * all the invalidations at once. |
276 | */ |
277 | uint32 ninvalidations; |
278 | SharedInvalidationMessage *invalidations; |
279 | |
280 | /* --- |
281 | * Position in one of three lists: |
282 | * * list of subtransactions if we are *known* to be subxact |
283 | * * list of toplevel xacts (can be an as-yet unknown subxact) |
284 | * * list of preallocated ReorderBufferTXNs (if unused) |
285 | * --- |
286 | */ |
287 | dlist_node node; |
288 | |
289 | } ReorderBufferTXN; |
290 | |
291 | /* so we can define the callbacks used inside struct ReorderBuffer itself */ |
292 | typedef struct ReorderBuffer ReorderBuffer; |
293 | |
294 | /* change callback signature */ |
295 | typedef void (*ReorderBufferApplyChangeCB) (ReorderBuffer *rb, |
296 | ReorderBufferTXN *txn, |
297 | Relation relation, |
298 | ReorderBufferChange *change); |
299 | |
300 | /* truncate callback signature */ |
301 | typedef void (*ReorderBufferApplyTruncateCB) (ReorderBuffer *rb, |
302 | ReorderBufferTXN *txn, |
303 | int nrelations, |
304 | Relation relations[], |
305 | ReorderBufferChange *change); |
306 | |
307 | /* begin callback signature */ |
308 | typedef void (*ReorderBufferBeginCB) (ReorderBuffer *rb, |
309 | ReorderBufferTXN *txn); |
310 | |
311 | /* commit callback signature */ |
312 | typedef void (*ReorderBufferCommitCB) (ReorderBuffer *rb, |
313 | ReorderBufferTXN *txn, |
314 | XLogRecPtr commit_lsn); |
315 | |
316 | /* message callback signature */ |
317 | typedef void (*ReorderBufferMessageCB) (ReorderBuffer *rb, |
318 | ReorderBufferTXN *txn, |
319 | XLogRecPtr message_lsn, |
320 | bool transactional, |
321 | const char *prefix, Size sz, |
322 | const char *message); |
323 | |
324 | struct ReorderBuffer |
325 | { |
326 | /* |
327 | * xid => ReorderBufferTXN lookup table |
328 | */ |
329 | HTAB *by_txn; |
330 | |
331 | /* |
332 | * Transactions that could be a toplevel xact, ordered by LSN of the first |
333 | * record bearing that xid. |
334 | */ |
335 | dlist_head toplevel_by_lsn; |
336 | |
337 | /* |
338 | * Transactions and subtransactions that have a base snapshot, ordered by |
339 | * LSN of the record which caused us to first obtain the base snapshot. |
340 | * This is not the same as toplevel_by_lsn, because we only set the base |
341 | * snapshot on the first logical-decoding-relevant record (eg. heap |
342 | * writes), whereas the initial LSN could be set by other operations. |
343 | */ |
344 | dlist_head txns_by_base_snapshot_lsn; |
345 | |
346 | /* |
347 | * one-entry sized cache for by_txn. Very frequently the same txn gets |
348 | * looked up over and over again. |
349 | */ |
350 | TransactionId by_txn_last_xid; |
351 | ReorderBufferTXN *by_txn_last_txn; |
352 | |
353 | /* |
354 | * Callbacks to be called when a transactions commits. |
355 | */ |
356 | ReorderBufferBeginCB begin; |
357 | ReorderBufferApplyChangeCB apply_change; |
358 | ReorderBufferApplyTruncateCB apply_truncate; |
359 | ReorderBufferCommitCB commit; |
360 | ReorderBufferMessageCB message; |
361 | |
362 | /* |
363 | * Pointer that will be passed untouched to the callbacks. |
364 | */ |
365 | void *private_data; |
366 | |
367 | /* |
368 | * Saved output plugin option |
369 | */ |
370 | bool output_rewrites; |
371 | |
372 | /* |
373 | * Private memory context. |
374 | */ |
375 | MemoryContext context; |
376 | |
377 | /* |
378 | * Memory contexts for specific types objects |
379 | */ |
380 | MemoryContext change_context; |
381 | MemoryContext txn_context; |
382 | MemoryContext tup_context; |
383 | |
384 | XLogRecPtr current_restart_decoding_lsn; |
385 | |
386 | /* buffer for disk<->memory conversions */ |
387 | char *outbuf; |
388 | Size outbufsize; |
389 | }; |
390 | |
391 | |
392 | ReorderBuffer *ReorderBufferAllocate(void); |
393 | void ReorderBufferFree(ReorderBuffer *); |
394 | |
395 | ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *, Size tuple_len); |
396 | void ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple); |
397 | ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *); |
398 | void ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *); |
399 | |
400 | Oid *ReorderBufferGetRelids(ReorderBuffer *, int nrelids); |
401 | void ReorderBufferReturnRelids(ReorderBuffer *, Oid *relids); |
402 | |
403 | void ReorderBufferQueueChange(ReorderBuffer *, TransactionId, XLogRecPtr lsn, ReorderBufferChange *); |
404 | void ReorderBufferQueueMessage(ReorderBuffer *, TransactionId, Snapshot snapshot, XLogRecPtr lsn, |
405 | bool transactional, const char *prefix, |
406 | Size message_size, const char *message); |
407 | void ReorderBufferCommit(ReorderBuffer *, TransactionId, |
408 | XLogRecPtr commit_lsn, XLogRecPtr end_lsn, |
409 | TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn); |
410 | void ReorderBufferAssignChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn); |
411 | void ReorderBufferCommitChild(ReorderBuffer *, TransactionId, TransactionId, |
412 | XLogRecPtr commit_lsn, XLogRecPtr end_lsn); |
413 | void ReorderBufferAbort(ReorderBuffer *, TransactionId, XLogRecPtr lsn); |
414 | void ReorderBufferAbortOld(ReorderBuffer *, TransactionId xid); |
415 | void ReorderBufferForget(ReorderBuffer *, TransactionId, XLogRecPtr lsn); |
416 | |
417 | void ReorderBufferSetBaseSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); |
418 | void ReorderBufferAddSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); |
419 | void ReorderBufferAddNewCommandId(ReorderBuffer *, TransactionId, XLogRecPtr lsn, |
420 | CommandId cid); |
421 | void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn, |
422 | RelFileNode node, ItemPointerData pt, |
423 | CommandId cmin, CommandId cmax, CommandId combocid); |
424 | void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn, |
425 | Size nmsgs, SharedInvalidationMessage *msgs); |
426 | void ReorderBufferImmediateInvalidation(ReorderBuffer *, uint32 ninvalidations, |
427 | SharedInvalidationMessage *invalidations); |
428 | void ReorderBufferProcessXid(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn); |
429 | void ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn); |
430 | bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid); |
431 | bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid); |
432 | |
433 | ReorderBufferTXN *ReorderBufferGetOldestTXN(ReorderBuffer *); |
434 | TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb); |
435 | |
436 | void ReorderBufferSetRestartPoint(ReorderBuffer *, XLogRecPtr ptr); |
437 | |
438 | void StartupReorderBuffer(void); |
439 | |
440 | #endif |
441 | |