nodeTidscan.c source code [PostgreSQL/src/backend/executor/nodeTidscan.c]

1	/-------------------------------------------------------------------------*
2	*
3	* nodeTidscan.c
4	* Routines to support direct tid scans of relations
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/executor/nodeTidscan.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	/*
16	* INTERFACE ROUTINES
17	*
18	* ExecTidScan scans a relation using tids
19	* ExecInitTidScan creates and initializes state info.
20	* ExecReScanTidScan rescans the tid relation.
21	* ExecEndTidScan releases all storage.
22	*/
23	#include "postgres.h"
24
25	#include "access/sysattr.h"
26	#include "access/tableam.h"
27	#include "catalog/pg_type.h"
28	#include "executor/execdebug.h"
29	#include "executor/nodeTidscan.h"
30	#include "miscadmin.h"
31	#include "nodes/nodeFuncs.h"
32	#include "storage/bufmgr.h"
33	#include "utils/array.h"
34	#include "utils/rel.h"
35
36
37	#define IsCTIDVar(node) \
38	((node) != NULL && \
39	IsA((node), Var) && \
40	((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
41	((Var *) (node))->varlevelsup == 0)
42
43	/ one element in tss_tidexprs /
44	typedef struct TidExpr
45	{
46	ExprState exprstate; /* ExprState for a TID-yielding subexpr /
47	bool isarray; / if true, it yields tid[] not just tid /
48	CurrentOfExpr cexpr; /* alternatively, we can have CURRENT OF /
49	} TidExpr;
50
51	static void TidExprListCreate(TidScanState *tidstate);
52	static void TidListEval(TidScanState *tidstate);
53	static int itemptr_comparator(const void a, const* void *b);
54	static TupleTableSlot TidNext(TidScanState node);
55
56
57	/*
58	* Extract the qual subexpressions that yield TIDs to search for,
59	* and compile them into ExprStates if they're ordinary expressions.
60	*
61	* CURRENT OF is a special case that we can't compile usefully;
62	* just drop it into the TidExpr list as-is.
63	*/
64	static void
65	TidExprListCreate(TidScanState *tidstate)
66	{
67	TidScan node = (TidScan ) tidstate->ss.ps.plan;
68	ListCell *l;
69
70	tidstate->tss_tidexprs = NIL;
71	tidstate->tss_isCurrentOf = false;
72
73	foreach(l, node->tidquals)
74	{
75	Expr expr = (Expr ) lfirst(l);
76	TidExpr tidexpr = (TidExpr ) palloc0(sizeof(TidExpr));
77
78	if (is_opclause(expr))
79	{
80	Node *arg1;
81	Node *arg2;
82
83	arg1 = get_leftop(expr);
84	arg2 = get_rightop(expr);
85	if (IsCTIDVar(arg1))
86	tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
87	&tidstate->ss.ps);
88	else if (IsCTIDVar(arg2))
89	tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
90	&tidstate->ss.ps);
91	else
92	elog(ERROR, "could not identify CTID variable");
93	tidexpr->isarray = false;
94	}
95	else if (expr && IsA(expr, ScalarArrayOpExpr))
96	{
97	ScalarArrayOpExpr saex = (ScalarArrayOpExpr ) expr;
98
99	Assert(IsCTIDVar(linitial(saex->args)));
100	tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
101	&tidstate->ss.ps);
102	tidexpr->isarray = true;
103	}
104	else if (expr && IsA(expr, CurrentOfExpr))
105	{
106	CurrentOfExpr cexpr = (CurrentOfExpr ) expr;
107
108	tidexpr->cexpr = cexpr;
109	tidstate->tss_isCurrentOf = true;
110	}
111	else
112	elog(ERROR, "could not identify CTID expression");
113
114	tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
115	}
116
117	/ CurrentOfExpr could never appear OR'd with something else /
118	Assert(list_length(tidstate->tss_tidexprs) == `1` \|\|
119	!tidstate->tss_isCurrentOf);
120	}
121
122	/*
123	* Compute the list of TIDs to be visited, by evaluating the expressions
124	* for them.
125	*
126	* (The result is actually an array, not a list.)
127	*/
128	static void
129	TidListEval(TidScanState *tidstate)
130	{
131	ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
132	TableScanDesc scan;
133	ItemPointerData *tidList;
134	int numAllocTids;
135	int numTids;
136	ListCell *l;
137
138	/*
139	* Start scan on-demand - initializing a scan isn't free (e.g. heap stats
140	* the size of the table), so it makes sense to delay that until needed -
141	* the node might never get executed.
142	*/
143	if (tidstate->ss.ss_currentScanDesc == NULL)
144	tidstate->ss.ss_currentScanDesc =
145	table_beginscan(tidstate->ss.ss_currentRelation,
146	tidstate->ss.ps.state->es_snapshot,
147	`0`, NULL);
148	scan = tidstate->ss.ss_currentScanDesc;
149
150	/*
151	* We initialize the array with enough slots for the case that all quals
152	* are simple OpExprs or CurrentOfExprs. If there are any
153	* ScalarArrayOpExprs, we may have to enlarge the array.
154	*/
155	numAllocTids = list_length(tidstate->tss_tidexprs);
156	tidList = (ItemPointerData *)
157	palloc(numAllocTids * sizeof(ItemPointerData));
158	numTids = `0`;
159
160	foreach(l, tidstate->tss_tidexprs)
161	{
162	TidExpr tidexpr = (TidExpr ) lfirst(l);
163	ItemPointer itemptr;
164	bool isNull;
165
166	if (tidexpr->exprstate && !tidexpr->isarray)
167	{
168	itemptr = (ItemPointer)
169	DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
170	econtext,
171	&isNull));
172	if (isNull)
173	continue;
174
175	/*
176	* We silently discard any TIDs that the AM considers invalid
177	* (E.g. for heap, they could be out of range at the time of scan
178	* start. Since we hold at least AccessShareLock on the table, it
179	* won't be possible for someone to truncate away the blocks we
180	* intend to visit.).
181	*/
182	if (!table_tuple_tid_valid(scan, itemptr))
183	continue;
184
185	if (numTids >= numAllocTids)
186	{
187	numAllocTids *= `2`;
188	tidList = (ItemPointerData *)
189	repalloc(tidList,
190	numAllocTids * sizeof(ItemPointerData));
191	}
192	tidList[numTids++] = *itemptr;
193	}
194	else if (tidexpr->exprstate && tidexpr->isarray)
195	{
196	Datum arraydatum;
197	ArrayType *itemarray;
198	Datum *ipdatums;
199	bool *ipnulls;
200	int ndatums;
201	int i;
202
203	arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
204	econtext,
205	&isNull);
206	if (isNull)
207	continue;
208	itemarray = DatumGetArrayTypeP(arraydatum);
209	deconstruct_array(itemarray,
210	TIDOID, sizeof(ItemPointerData), false, `'s'`,
211	&ipdatums, &ipnulls, &ndatums);
212	if (numTids + ndatums > numAllocTids)
213	{
214	numAllocTids = numTids + ndatums;
215	tidList = (ItemPointerData *)
216	repalloc(tidList,
217	numAllocTids * sizeof(ItemPointerData));
218	}
219	for (i = `0`; i < ndatums; i++)
220	{
221	if (ipnulls[i])
222	continue;
223
224	itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
225
226	if (!table_tuple_tid_valid(scan, itemptr))
227	continue;
228
229	tidList[numTids++] = *itemptr;
230	}
231	pfree(ipdatums);
232	pfree(ipnulls);
233	}
234	else
235	{
236	ItemPointerData cursor_tid;
237
238	Assert(tidexpr->cexpr);
239	if (execCurrentOf(tidexpr->cexpr, econtext,
240	RelationGetRelid(tidstate->ss.ss_currentRelation),
241	&cursor_tid))
242	{
243	if (numTids >= numAllocTids)
244	{
245	numAllocTids *= `2`;
246	tidList = (ItemPointerData *)
247	repalloc(tidList,
248	numAllocTids * sizeof(ItemPointerData));
249	}
250	tidList[numTids++] = cursor_tid;
251	}
252	}
253	}
254
255	/*
256	* Sort the array of TIDs into order, and eliminate duplicates.
257	* Eliminating duplicates is necessary since we want OR semantics across
258	* the list. Sorting makes it easier to detect duplicates, and as a bonus
259	* ensures that we will visit the heap in the most efficient way.
260	*/
261	if (numTids > `1`)
262	{
263	int lastTid;
264	int i;
265
266	/ CurrentOfExpr could never appear OR'd with something else /
267	Assert(!tidstate->tss_isCurrentOf);
268
269	qsort((void ) tidList, numTids, sizeof*(ItemPointerData),
270	itemptr_comparator);
271	lastTid = `0`;
272	for (i = `1`; i < numTids; i++)
273	{
274	if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
275	tidList[++lastTid] = tidList[i];
276	}
277	numTids = lastTid + `1`;
278	}
279
280	tidstate->tss_TidList = tidList;
281	tidstate->tss_NumTids = numTids;
282	tidstate->tss_TidPtr = -`1`;
283	}
284
285	/*
286	* qsort comparator for ItemPointerData items
287	*/
288	static int
289	itemptr_comparator(const void a, const* void *b)
290	{
291	const ItemPointerData ipa = (const* ItemPointerData *) a;
292	const ItemPointerData ipb = (const* ItemPointerData *) b;
293	BlockNumber ba = ItemPointerGetBlockNumber(ipa);
294	BlockNumber bb = ItemPointerGetBlockNumber(ipb);
295	OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
296	OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
297
298	if (ba < bb)
299	return -`1`;
300	if (ba > bb)
301	return `1`;
302	if (oa < ob)
303	return -`1`;
304	if (oa > ob)
305	return `1`;
306	return `0`;
307	}
308
309	/ ----------------------------------------------------------------*
310	* TidNext
311	*
312	* Retrieve a tuple from the TidScan node's currentRelation
313	* using the tids in the TidScanState information.
314	*
315	* ----------------------------------------------------------------
316	*/
317	static TupleTableSlot *
318	TidNext(TidScanState *node)
319	{
320	EState *estate;
321	ScanDirection direction;
322	Snapshot snapshot;
323	TableScanDesc scan;
324	Relation heapRelation;
325	TupleTableSlot *slot;
326	ItemPointerData *tidList;
327	int numTids;
328	bool bBackward;
329
330	/*
331	* extract necessary information from tid scan node
332	*/
333	estate = node->ss.ps.state;
334	direction = estate->es_direction;
335	snapshot = estate->es_snapshot;
336	heapRelation = node->ss.ss_currentRelation;
337	slot = node->ss.ss_ScanTupleSlot;
338
339	/*
340	* First time through, compute the list of TIDs to be visited
341	*/
342	if (node->tss_TidList == NULL)
343	TidListEval(node);
344
345	scan = node->ss.ss_currentScanDesc;
346	tidList = node->tss_TidList;
347	numTids = node->tss_NumTids;
348
349	/*
350	* Initialize or advance scan position, depending on direction.
351	*/
352	bBackward = ScanDirectionIsBackward(direction);
353	if (bBackward)
354	{
355	if (node->tss_TidPtr < `0`)
356	{
357	/ initialize for backward scan /
358	node->tss_TidPtr = numTids - `1`;
359	}
360	else
361	node->tss_TidPtr--;
362	}
363	else
364	{
365	if (node->tss_TidPtr < `0`)
366	{
367	/ initialize for forward scan /
368	node->tss_TidPtr = `0`;
369	}
370	else
371	node->tss_TidPtr++;
372	}
373
374	while (node->tss_TidPtr >= `0` && node->tss_TidPtr < numTids)
375	{
376	ItemPointerData tid = tidList[node->tss_TidPtr];
377
378	/*
379	* For WHERE CURRENT OF, the tuple retrieved from the cursor might
380	* since have been updated; if so, we should fetch the version that is
381	* current according to our snapshot.
382	*/
383	if (node->tss_isCurrentOf)
384	table_tuple_get_latest_tid(scan, &tid);
385
386	if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
387	return slot;
388
389	/ Bad TID or failed snapshot qual; try next /
390	if (bBackward)
391	node->tss_TidPtr--;
392	else
393	node->tss_TidPtr++;
394
395	CHECK_FOR_INTERRUPTS();
396	}
397
398	/*
399	* if we get here it means the tid scan failed so we are at the end of the
400	* scan..
401	*/
402	return ExecClearTuple(slot);
403	}
404
405	/*
406	* TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
407	*/
408	static bool
409	TidRecheck(TidScanState node, TupleTableSlot slot)
410	{
411	/*
412	* XXX shouldn't we check here to make sure tuple matches TID list? In
413	* runtime-key case this is not certain, is it? However, in the WHERE
414	* CURRENT OF case it might not match anyway ...
415	*/
416	return true;
417	}
418
419
420	/ ----------------------------------------------------------------*
421	* ExecTidScan(node)
422	*
423	* Scans the relation using tids and returns
424	* the next qualifying tuple in the direction specified.
425	* We call the ExecScan() routine and pass it the appropriate
426	* access method functions.
427	*
428	* Conditions:
429	* -- the "cursor" maintained by the AMI is positioned at the tuple
430	* returned previously.
431	*
432	* Initial States:
433	* -- the relation indicated is opened for scanning so that the
434	* "cursor" is positioned before the first qualifying tuple.
435	* -- tidPtr is -1.
436	* ----------------------------------------------------------------
437	*/
438	static TupleTableSlot *
439	ExecTidScan(PlanState *pstate)
440	{
441	TidScanState *node = castNode(TidScanState, pstate);
442
443	return ExecScan(&node->ss,
444	(ExecScanAccessMtd) TidNext,
445	(ExecScanRecheckMtd) TidRecheck);
446	}
447
448	/ ----------------------------------------------------------------*
449	* ExecReScanTidScan(node)
450	* ----------------------------------------------------------------
451	*/
452	void
453	ExecReScanTidScan(TidScanState *node)
454	{
455	if (node->tss_TidList)
456	pfree(node->tss_TidList);
457	node->tss_TidList = NULL;
458	node->tss_NumTids = `0`;
459	node->tss_TidPtr = -`1`;
460
461	/ not really necessary, but seems good form /
462	if (node->ss.ss_currentScanDesc)
463	table_rescan(node->ss.ss_currentScanDesc, NULL);
464
465	ExecScanReScan(&node->ss);
466	}
467
468	/ ----------------------------------------------------------------*
469	* ExecEndTidScan
470	*
471	* Releases any storage allocated through C routines.
472	* Returns nothing.
473	* ----------------------------------------------------------------
474	*/
475	void
476	ExecEndTidScan(TidScanState *node)
477	{
478	if (node->ss.ss_currentScanDesc)
479	table_endscan(node->ss.ss_currentScanDesc);
480
481	/*
482	* Free the exprcontext
483	*/
484	ExecFreeExprContext(&node->ss.ps);
485
486	/*
487	* clear out tuple table slots
488	*/
489	if (node->ss.ps.ps_ResultTupleSlot)
490	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
491	ExecClearTuple(node->ss.ss_ScanTupleSlot);
492	}
493
494	/ ----------------------------------------------------------------*
495	* ExecInitTidScan
496	*
497	* Initializes the tid scan's state information, creates
498	* scan keys, and opens the base and tid relations.
499	*
500	* Parameters:
501	* node: TidNode node produced by the planner.
502	* estate: the execution state initialized in InitPlan.
503	* ----------------------------------------------------------------
504	*/
505	TidScanState *
506	ExecInitTidScan(TidScan node, EState estate, int eflags)
507	{
508	TidScanState *tidstate;
509	Relation currentRelation;
510
511	/*
512	* create state structure
513	*/
514	tidstate = makeNode(TidScanState);
515	tidstate->ss.ps.plan = (Plan *) node;
516	tidstate->ss.ps.state = estate;
517	tidstate->ss.ps.ExecProcNode = ExecTidScan;
518
519	/*
520	* Miscellaneous initialization
521	*
522	* create expression context for node
523	*/
524	ExecAssignExprContext(estate, &tidstate->ss.ps);
525
526	/*
527	* mark tid list as not computed yet
528	*/
529	tidstate->tss_TidList = NULL;
530	tidstate->tss_NumTids = `0`;
531	tidstate->tss_TidPtr = -`1`;
532
533	/*
534	* open the scan relation
535	*/
536	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
537
538	tidstate->ss.ss_currentRelation = currentRelation;
539	tidstate->ss.ss_currentScanDesc = NULL; / no heap scan here /
540
541	/*
542	* get the scan type from the relation descriptor.
543	*/
544	ExecInitScanTupleSlot(estate, &tidstate->ss,
545	RelationGetDescr(currentRelation),
546	table_slot_callbacks(currentRelation));
547
548	/*
549	* Initialize result type and projection.
550	*/
551	ExecInitResultTypeTL(&tidstate->ss.ps);
552	ExecAssignScanProjectionInfo(&tidstate->ss);
553
554	/*
555	* initialize child expressions
556	*/
557	tidstate->ss.ps.qual =
558	ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
559
560	TidExprListCreate(tidstate);
561
562	/*
563	* all done.
564	*/
565	return tidstate;
566	}
567

Browse the source code of PostgreSQL/src/backend/executor/nodeTidscan.c