system.c source code [PostgreSQL/src/backend/access/tablesample/system.c]

1	/-------------------------------------------------------------------------*
2	*
3	* system.c
4	* support routines for SYSTEM tablesample method
5	*
6	* To ensure repeatability of samples, it is necessary that selection of a
7	* given tuple be history-independent; otherwise syncscanning would break
8	* repeatability, to say nothing of logically-irrelevant maintenance such
9	* as physical extension or shortening of the relation.
10	*
11	* To achieve that, we proceed by hashing each candidate block number together
12	* with the active seed, and then selecting it if the hash is less than the
13	* cutoff value computed from the selection probability by BeginSampleScan.
14	*
15	*
16	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
17	* Portions Copyright (c) 1994, Regents of the University of California
18	*
19	* IDENTIFICATION
20	* src/backend/access/tablesample/system.c
21	*
22	*-------------------------------------------------------------------------
23	*/
24
25	#include "postgres.h"
26
27	#include <math.h>
28
29	#include "access/relscan.h"
30	#include "access/tsmapi.h"
31	#include "catalog/pg_type.h"
32	#include "optimizer/optimizer.h"
33	#include "utils/builtins.h"
34	#include "utils/hashutils.h"
35
36
37	/ Private state /
38	typedef struct
39	{
40	uint64 cutoff; / select blocks with hash less than this /
41	uint32 seed; / random seed /
42	BlockNumber nextblock; / next block to consider sampling /
43	OffsetNumber lt; / last tuple returned from current block /
44	} SystemSamplerData;
45
46
47	static void system_samplescangetsamplesize(PlannerInfo *root,
48	RelOptInfo *baserel,
49	List *paramexprs,
50	BlockNumber *pages,
51	double *tuples);
52	static void system_initsamplescan(SampleScanState *node,
53	int eflags);
54	static void system_beginsamplescan(SampleScanState *node,
55	Datum *params,
56	int nparams,
57	uint32 seed);
58	static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
59	static OffsetNumber system_nextsampletuple(SampleScanState *node,
60	BlockNumber blockno,
61	OffsetNumber maxoffset);
62
63
64	/*
65	* Create a TsmRoutine descriptor for the SYSTEM method.
66	*/
67	Datum
68	tsm_system_handler(PG_FUNCTION_ARGS)
69	{
70	TsmRoutine *tsm = makeNode(TsmRoutine);
71
72	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
73	tsm->repeatable_across_queries = true;
74	tsm->repeatable_across_scans = true;
75	tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
76	tsm->InitSampleScan = system_initsamplescan;
77	tsm->BeginSampleScan = system_beginsamplescan;
78	tsm->NextSampleBlock = system_nextsampleblock;
79	tsm->NextSampleTuple = system_nextsampletuple;
80	tsm->EndSampleScan = NULL;
81
82	PG_RETURN_POINTER(tsm);
83	}
84
85	/*
86	* Sample size estimation.
87	*/
88	static void
89	system_samplescangetsamplesize(PlannerInfo *root,
90	RelOptInfo *baserel,
91	List *paramexprs,
92	BlockNumber *pages,
93	double *tuples)
94	{
95	Node *pctnode;
96	float4 samplefract;
97
98	/ Try to extract an estimate for the sample percentage /
99	pctnode = (Node *) linitial(paramexprs);
100	pctnode = estimate_expression_value(root, pctnode);
101
102	if (IsA(pctnode, Const) &&
103	!((Const *) pctnode)->constisnull)
104	{
105	samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
106	if (samplefract >= `0` && samplefract <= `100` && !isnan(samplefract))
107	samplefract /= `100.0f`;
108	else
109	{
110	/ Default samplefract if the value is bogus /
111	samplefract = `0.1f`;
112	}
113	}
114	else
115	{
116	/ Default samplefract if we didn't obtain a non-null Const /
117	samplefract = `0.1f`;
118	}
119
120	/ We'll visit a sample of the pages ... /
121	pages = clamp_row_est(baserel->pages samplefract);
122
123	/ ... and hopefully get a representative number of tuples from them /
124	tuples = clamp_row_est(baserel->tuples samplefract);
125	}
126
127	/*
128	* Initialize during executor setup.
129	*/
130	static void
131	system_initsamplescan(SampleScanState node, int* eflags)
132	{
133	node->tsm_state = palloc0(sizeof(SystemSamplerData));
134	}
135
136	/*
137	* Examine parameters and prepare for a sample scan.
138	*/
139	static void
140	system_beginsamplescan(SampleScanState *node,
141	Datum *params,
142	int nparams,
143	uint32 seed)
144	{
145	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
146	double percent = DatumGetFloat4(params[`0`]);
147	double dcutoff;
148
149	if (percent < `0` \|\| percent > `100` \|\| isnan(percent))
150	ereport(ERROR,
151	(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
152	errmsg("sample percentage must be between 0 and 100")));
153
154	/*
155	* The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
156	* store that as a uint64, of course. Note that this gives strictly
157	* correct behavior at the limits of zero or one probability.
158	*/
159	dcutoff = rint(((double) PG_UINT32_MAX + `1`) * percent / `100`);
160	sampler->cutoff = (uint64) dcutoff;
161	sampler->seed = seed;
162	sampler->nextblock = `0`;
163	sampler->lt = InvalidOffsetNumber;
164
165	/*
166	* Bulkread buffer access strategy probably makes sense unless we're
167	* scanning a very small fraction of the table. The 1% cutoff here is a
168	* guess. We should use pagemode visibility checking, since we scan all
169	* tuples on each selected page.
170	*/
171	node->use_bulkread = (percent >= `1`);
172	node->use_pagemode = true;
173	}
174
175	/*
176	* Select next block to sample.
177	*/
178	static BlockNumber
179	system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
180	{
181	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
182	BlockNumber nextblock = sampler->nextblock;
183	uint32 hashinput[`2`];
184
185	/*
186	* We compute the hash by applying hash_any to an array of 2 uint32's
187	* containing the block number and seed. This is efficient to set up, and
188	* with the current implementation of hash_any, it gives
189	* machine-independent results, which is a nice property for regression
190	* testing.
191	*
192	* These words in the hash input are the same throughout the block:
193	*/
194	hashinput[`1`] = sampler->seed;
195
196	/*
197	* Loop over block numbers until finding suitable block or reaching end of
198	* relation.
199	*/
200	for (; nextblock < nblocks; nextblock++)
201	{
202	uint32 hash;
203
204	hashinput[`0`] = nextblock;
205
206	hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
207	(int) sizeof(hashinput)));
208	if (hash < sampler->cutoff)
209	break;
210	}
211
212	if (nextblock < nblocks)
213	{
214	/ Found a suitable block; remember where we should start next time /
215	sampler->nextblock = nextblock + `1`;
216	return nextblock;
217	}
218
219	/ Done, but let's reset nextblock to 0 for safety. /
220	sampler->nextblock = `0`;
221	return InvalidBlockNumber;
222	}
223
224	/*
225	* Select next sampled tuple in current block.
226	*
227	* In block sampling, we just want to sample all the tuples in each selected
228	* block.
229	*
230	* It is OK here to return an offset without knowing if the tuple is visible
231	* (or even exists); nodeSamplescan.c will deal with that.
232	*
233	* When we reach end of the block, return InvalidOffsetNumber which tells
234	* SampleScan to go to next block.
235	*/
236	static OffsetNumber
237	system_nextsampletuple(SampleScanState *node,
238	BlockNumber blockno,
239	OffsetNumber maxoffset)
240	{
241	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
242	OffsetNumber tupoffset = sampler->lt;
243
244	/ Advance to next possible offset on page /
245	if (tupoffset == InvalidOffsetNumber)
246	tupoffset = FirstOffsetNumber;
247	else
248	tupoffset++;
249
250	/ Done? /
251	if (tupoffset > maxoffset)
252	tupoffset = InvalidOffsetNumber;
253
254	sampler->lt = tupoffset;
255
256	return tupoffset;
257	}
258

Browse the source code of PostgreSQL/src/backend/access/tablesample/system.c