1/*-------------------------------------------------------------------------
2 *
3 * nodeSamplescan.c
4 * Support routines for sample scans of relations (table sampling).
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeSamplescan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/relscan.h"
18#include "access/tableam.h"
19#include "access/tsmapi.h"
20#include "executor/executor.h"
21#include "executor/nodeSamplescan.h"
22#include "miscadmin.h"
23#include "pgstat.h"
24#include "storage/bufmgr.h"
25#include "storage/predicate.h"
26#include "utils/builtins.h"
27#include "utils/rel.h"
28
29static TupleTableSlot *SampleNext(SampleScanState *node);
30static void tablesample_init(SampleScanState *scanstate);
31static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
32
33/* ----------------------------------------------------------------
34 * Scan Support
35 * ----------------------------------------------------------------
36 */
37
38/* ----------------------------------------------------------------
39 * SampleNext
40 *
41 * This is a workhorse for ExecSampleScan
42 * ----------------------------------------------------------------
43 */
44static TupleTableSlot *
45SampleNext(SampleScanState *node)
46{
47 /*
48 * if this is first call within a scan, initialize
49 */
50 if (!node->begun)
51 tablesample_init(node);
52
53 /*
54 * get the next tuple, and store it in our result slot
55 */
56 return tablesample_getnext(node);
57}
58
59/*
60 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
61 */
62static bool
63SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
64{
65 /*
66 * No need to recheck for SampleScan, since like SeqScan we don't pass any
67 * checkable keys to heap_beginscan.
68 */
69 return true;
70}
71
72/* ----------------------------------------------------------------
73 * ExecSampleScan(node)
74 *
75 * Scans the relation using the sampling method and returns
76 * the next qualifying tuple.
77 * We call the ExecScan() routine and pass it the appropriate
78 * access method functions.
79 * ----------------------------------------------------------------
80 */
81static TupleTableSlot *
82ExecSampleScan(PlanState *pstate)
83{
84 SampleScanState *node = castNode(SampleScanState, pstate);
85
86 return ExecScan(&node->ss,
87 (ExecScanAccessMtd) SampleNext,
88 (ExecScanRecheckMtd) SampleRecheck);
89}
90
91/* ----------------------------------------------------------------
92 * ExecInitSampleScan
93 * ----------------------------------------------------------------
94 */
95SampleScanState *
96ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
97{
98 SampleScanState *scanstate;
99 TableSampleClause *tsc = node->tablesample;
100 TsmRoutine *tsm;
101
102 Assert(outerPlan(node) == NULL);
103 Assert(innerPlan(node) == NULL);
104
105 /*
106 * create state structure
107 */
108 scanstate = makeNode(SampleScanState);
109 scanstate->ss.ps.plan = (Plan *) node;
110 scanstate->ss.ps.state = estate;
111 scanstate->ss.ps.ExecProcNode = ExecSampleScan;
112
113 /*
114 * Miscellaneous initialization
115 *
116 * create expression context for node
117 */
118 ExecAssignExprContext(estate, &scanstate->ss.ps);
119
120 /*
121 * open the scan relation
122 */
123 scanstate->ss.ss_currentRelation =
124 ExecOpenScanRelation(estate,
125 node->scan.scanrelid,
126 eflags);
127
128 /* we won't set up the HeapScanDesc till later */
129 scanstate->ss.ss_currentScanDesc = NULL;
130
131 /* and create slot with appropriate rowtype */
132 ExecInitScanTupleSlot(estate, &scanstate->ss,
133 RelationGetDescr(scanstate->ss.ss_currentRelation),
134 table_slot_callbacks(scanstate->ss.ss_currentRelation));
135
136 /*
137 * Initialize result type and projection.
138 */
139 ExecInitResultTypeTL(&scanstate->ss.ps);
140 ExecAssignScanProjectionInfo(&scanstate->ss);
141
142 /*
143 * initialize child expressions
144 */
145 scanstate->ss.ps.qual =
146 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
147
148 scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
149 scanstate->repeatable =
150 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
151
152 /*
153 * If we don't have a REPEATABLE clause, select a random seed. We want to
154 * do this just once, since the seed shouldn't change over rescans.
155 */
156 if (tsc->repeatable == NULL)
157 scanstate->seed = random();
158
159 /*
160 * Finally, initialize the TABLESAMPLE method handler.
161 */
162 tsm = GetTsmRoutine(tsc->tsmhandler);
163 scanstate->tsmroutine = tsm;
164 scanstate->tsm_state = NULL;
165
166 if (tsm->InitSampleScan)
167 tsm->InitSampleScan(scanstate, eflags);
168
169 /* We'll do BeginSampleScan later; we can't evaluate params yet */
170 scanstate->begun = false;
171
172 return scanstate;
173}
174
175/* ----------------------------------------------------------------
176 * ExecEndSampleScan
177 *
178 * frees any storage allocated through C routines.
179 * ----------------------------------------------------------------
180 */
181void
182ExecEndSampleScan(SampleScanState *node)
183{
184 /*
185 * Tell sampling function that we finished the scan.
186 */
187 if (node->tsmroutine->EndSampleScan)
188 node->tsmroutine->EndSampleScan(node);
189
190 /*
191 * Free the exprcontext
192 */
193 ExecFreeExprContext(&node->ss.ps);
194
195 /*
196 * clean out the tuple table
197 */
198 if (node->ss.ps.ps_ResultTupleSlot)
199 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
200 ExecClearTuple(node->ss.ss_ScanTupleSlot);
201
202 /*
203 * close heap scan
204 */
205 if (node->ss.ss_currentScanDesc)
206 table_endscan(node->ss.ss_currentScanDesc);
207}
208
209/* ----------------------------------------------------------------
210 * ExecReScanSampleScan
211 *
212 * Rescans the relation.
213 *
214 * ----------------------------------------------------------------
215 */
216void
217ExecReScanSampleScan(SampleScanState *node)
218{
219 /* Remember we need to do BeginSampleScan again (if we did it at all) */
220 node->begun = false;
221 node->done = false;
222 node->haveblock = false;
223 node->donetuples = 0;
224
225 ExecScanReScan(&node->ss);
226}
227
228
229/*
230 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
231 */
232static void
233tablesample_init(SampleScanState *scanstate)
234{
235 TsmRoutine *tsm = scanstate->tsmroutine;
236 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
237 Datum *params;
238 Datum datum;
239 bool isnull;
240 uint32 seed;
241 bool allow_sync;
242 int i;
243 ListCell *arg;
244
245 scanstate->donetuples = 0;
246 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
247
248 i = 0;
249 foreach(arg, scanstate->args)
250 {
251 ExprState *argstate = (ExprState *) lfirst(arg);
252
253 params[i] = ExecEvalExprSwitchContext(argstate,
254 econtext,
255 &isnull);
256 if (isnull)
257 ereport(ERROR,
258 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
259 errmsg("TABLESAMPLE parameter cannot be null")));
260 i++;
261 }
262
263 if (scanstate->repeatable)
264 {
265 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
266 econtext,
267 &isnull);
268 if (isnull)
269 ereport(ERROR,
270 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
271 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
272
273 /*
274 * The REPEATABLE parameter has been coerced to float8 by the parser.
275 * The reason for using float8 at the SQL level is that it will
276 * produce unsurprising results both for users used to databases that
277 * accept only integers in the REPEATABLE clause and for those who
278 * might expect that REPEATABLE works like setseed() (a float in the
279 * range from -1 to 1).
280 *
281 * We use hashfloat8() to convert the supplied value into a suitable
282 * seed. For regression-testing purposes, that has the convenient
283 * property that REPEATABLE(0) gives a machine-independent result.
284 */
285 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
286 }
287 else
288 {
289 /* Use the seed selected by ExecInitSampleScan */
290 seed = scanstate->seed;
291 }
292
293 /* Set default values for params that BeginSampleScan can adjust */
294 scanstate->use_bulkread = true;
295 scanstate->use_pagemode = true;
296
297 /* Let tablesample method do its thing */
298 tsm->BeginSampleScan(scanstate,
299 params,
300 list_length(scanstate->args),
301 seed);
302
303 /* We'll use syncscan if there's no NextSampleBlock function */
304 allow_sync = (tsm->NextSampleBlock == NULL);
305
306 /* Now we can create or reset the HeapScanDesc */
307 if (scanstate->ss.ss_currentScanDesc == NULL)
308 {
309 scanstate->ss.ss_currentScanDesc =
310 table_beginscan_sampling(scanstate->ss.ss_currentRelation,
311 scanstate->ss.ps.state->es_snapshot,
312 0, NULL,
313 scanstate->use_bulkread,
314 allow_sync,
315 scanstate->use_pagemode);
316 }
317 else
318 {
319 table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
320 scanstate->use_bulkread,
321 allow_sync,
322 scanstate->use_pagemode);
323 }
324
325 pfree(params);
326
327 /* And we're initialized. */
328 scanstate->begun = true;
329}
330
331/*
332 * Get next tuple from TABLESAMPLE method.
333 */
334static TupleTableSlot *
335tablesample_getnext(SampleScanState *scanstate)
336{
337 TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
338 TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
339
340 ExecClearTuple(slot);
341
342 if (scanstate->done)
343 return NULL;
344
345 for (;;)
346 {
347 if (!scanstate->haveblock)
348 {
349 if (!table_scan_sample_next_block(scan, scanstate))
350 {
351 scanstate->haveblock = false;
352 scanstate->done = true;
353
354 /* exhausted relation */
355 return NULL;
356 }
357
358 scanstate->haveblock = true;
359 }
360
361 if (!table_scan_sample_next_tuple(scan, scanstate, slot))
362 {
363 /*
364 * If we get here, it means we've exhausted the items on this page
365 * and it's time to move to the next.
366 */
367 scanstate->haveblock = false;
368 continue;
369 }
370
371 /* Found visible tuple, return it. */
372 break;
373 }
374
375 scanstate->donetuples++;
376
377 return slot;
378}
379