analyze.c source code [PostgreSQL/src/backend/commands/analyze.c]

1	/-------------------------------------------------------------------------*
2	*
3	* analyze.c
4	* the Postgres statistics generator
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/commands/analyze.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include <math.h>
18
19	#include "access/genam.h"
20	#include "access/multixact.h"
21	#include "access/relation.h"
22	#include "access/sysattr.h"
23	#include "access/table.h"
24	#include "access/tableam.h"
25	#include "access/transam.h"
26	#include "access/tupconvert.h"
27	#include "access/tuptoaster.h"
28	#include "access/visibilitymap.h"
29	#include "access/xact.h"
30	#include "catalog/catalog.h"
31	#include "catalog/index.h"
32	#include "catalog/indexing.h"
33	#include "catalog/pg_collation.h"
34	#include "catalog/pg_inherits.h"
35	#include "catalog/pg_namespace.h"
36	#include "catalog/pg_statistic_ext.h"
37	#include "commands/dbcommands.h"
38	#include "commands/tablecmds.h"
39	#include "commands/vacuum.h"
40	#include "executor/executor.h"
41	#include "foreign/fdwapi.h"
42	#include "miscadmin.h"
43	#include "nodes/nodeFuncs.h"
44	#include "parser/parse_oper.h"
45	#include "parser/parse_relation.h"
46	#include "pgstat.h"
47	#include "postmaster/autovacuum.h"
48	#include "statistics/extended_stats_internal.h"
49	#include "statistics/statistics.h"
50	#include "storage/bufmgr.h"
51	#include "storage/lmgr.h"
52	#include "storage/proc.h"
53	#include "storage/procarray.h"
54	#include "utils/acl.h"
55	#include "utils/attoptcache.h"
56	#include "utils/builtins.h"
57	#include "utils/datum.h"
58	#include "utils/fmgroids.h"
59	#include "utils/guc.h"
60	#include "utils/lsyscache.h"
61	#include "utils/memutils.h"
62	#include "utils/pg_rusage.h"
63	#include "utils/sampling.h"
64	#include "utils/sortsupport.h"
65	#include "utils/syscache.h"
66	#include "utils/timestamp.h"
67
68
69	/ Per-index data for ANALYZE /
70	typedef struct AnlIndexData
71	{
72	IndexInfo indexInfo; /* BuildIndexInfo result /
73	double tupleFract; / fraction of rows for partial index /
74	VacAttrStats *vacattrstats; /* index attrs to analyze /
75	int attr_cnt;
76	} AnlIndexData;
77
78
79	/ Default statistics target (GUC parameter) /
80	int default_statistics_target = `100`;
81
82	/ A few variables that don't seem worth passing around as parameters /
83	static MemoryContext anl_context = NULL;
84	static BufferAccessStrategy vac_strategy;
85
86
87	static void do_analyze_rel(Relation onerel,
88	VacuumParams params, List va_cols,
89	AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
90	bool inh, bool in_outer_xact, int elevel);
91	static void compute_index_stats(Relation onerel, double totalrows,
92	AnlIndexData indexdata, int* nindexes,
93	HeapTuple rows, int* numrows,
94	MemoryContext col_context);
95	static VacAttrStats examine_attribute(Relation onerel, int* attnum,
96	Node *index_expr);
97	static int acquire_sample_rows(Relation onerel, int elevel,
98	HeapTuple rows, int* targrows,
99	double totalrows, double* *totaldeadrows);
100	static int compare_rows(const void a, const* void *b);
101	static int acquire_inherited_sample_rows(Relation onerel, int elevel,
102	HeapTuple rows, int* targrows,
103	double totalrows, double* *totaldeadrows);
104	static void update_attstats(Oid relid, bool inh,
105	int natts, VacAttrStats **vacattrstats);
106	static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
107	static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
108
109
110	/*
111	* analyze_rel() -- analyze one relation
112	*
113	* relid identifies the relation to analyze. If relation is supplied, use
114	* the name therein for reporting any failure to open/lock the rel; do not
115	* use it once we've successfully opened the rel, since it might be stale.
116	*/
117	void
118	analyze_rel(Oid relid, RangeVar *relation,
119	VacuumParams params, List va_cols, bool in_outer_xact,
120	BufferAccessStrategy bstrategy)
121	{
122	Relation onerel;
123	int elevel;
124	AcquireSampleRowsFunc acquirefunc = NULL;
125	BlockNumber relpages = `0`;
126
127	/ Select logging level /
128	if (params->options & VACOPT_VERBOSE)
129	elevel = INFO;
130	else
131	elevel = DEBUG2;
132
133	/ Set up static variables /
134	vac_strategy = bstrategy;
135
136	/*
137	* Check for user-requested abort.
138	*/
139	CHECK_FOR_INTERRUPTS();
140
141	/*
142	* Open the relation, getting ShareUpdateExclusiveLock to ensure that two
143	* ANALYZEs don't run on it concurrently. (This also locks out a
144	* concurrent VACUUM, which doesn't matter much at the moment but might
145	* matter if we ever try to accumulate stats on dead tuples.) If the rel
146	* has been dropped since we last saw it, we don't need to process it.
147	*
148	* Make sure to generate only logs for ANALYZE in this case.
149	*/
150	onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
151	params->log_min_duration >= `0`,
152	ShareUpdateExclusiveLock);
153
154	/ leave if relation could not be opened or locked /
155	if (!onerel)
156	return;
157
158	/*
159	* Check if relation needs to be skipped based on ownership. This check
160	* happens also when building the relation list to analyze for a manual
161	* operation, and needs to be done additionally here as ANALYZE could
162	* happen across multiple transactions where relation ownership could have
163	* changed in-between. Make sure to generate only logs for ANALYZE in
164	* this case.
165	*/
166	if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
167	onerel->rd_rel,
168	params->options & VACOPT_ANALYZE))
169	{
170	relation_close(onerel, ShareUpdateExclusiveLock);
171	return;
172	}
173
174	/*
175	* Silently ignore tables that are temp tables of other backends ---
176	* trying to analyze these is rather pointless, since their contents are
177	* probably not up-to-date on disk. (We don't throw a warning here; it
178	* would just lead to chatter during a database-wide ANALYZE.)
179	*/
180	if (RELATION_IS_OTHER_TEMP(onerel))
181	{
182	relation_close(onerel, ShareUpdateExclusiveLock);
183	return;
184	}
185
186	/*
187	* We can ANALYZE any table except pg_statistic. See update_attstats
188	*/
189	if (RelationGetRelid(onerel) == StatisticRelationId)
190	{
191	relation_close(onerel, ShareUpdateExclusiveLock);
192	return;
193	}
194
195	/*
196	* Check that it's of an analyzable relkind, and set up appropriately.
197	*/
198	if (onerel->rd_rel->relkind == RELKIND_RELATION \|\|
199	onerel->rd_rel->relkind == RELKIND_MATVIEW)
200	{
201	/ Regular table, so we'll use the regular row acquisition function /
202	acquirefunc = acquire_sample_rows;
203	/ Also get regular table's size /
204	relpages = RelationGetNumberOfBlocks(onerel);
205	}
206	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
207	{
208	/*
209	* For a foreign table, call the FDW's hook function to see whether it
210	* supports analysis.
211	*/
212	FdwRoutine *fdwroutine;
213	bool ok = false;
214
215	fdwroutine = GetFdwRoutineForRelation(onerel, false);
216
217	if (fdwroutine->AnalyzeForeignTable != NULL)
218	ok = fdwroutine->AnalyzeForeignTable(onerel,
219	&acquirefunc,
220	&relpages);
221
222	if (!ok)
223	{
224	ereport(WARNING,
225	(errmsg("skipping \"%s\" --- cannot analyze this foreign table",
226	RelationGetRelationName(onerel))));
227	relation_close(onerel, ShareUpdateExclusiveLock);
228	return;
229	}
230	}
231	else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
232	{
233	/*
234	* For partitioned tables, we want to do the recursive ANALYZE below.
235	*/
236	}
237	else
238	{
239	/ No need for a WARNING if we already complained during VACUUM /
240	if (!(params->options & VACOPT_VACUUM))
241	ereport(WARNING,
242	(errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
243	RelationGetRelationName(onerel))));
244	relation_close(onerel, ShareUpdateExclusiveLock);
245	return;
246	}
247
248	/*
249	* OK, let's do it. First let other backends know I'm in ANALYZE.
250	*/
251	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
252	MyPgXact->vacuumFlags \|= PROC_IN_ANALYZE;
253	LWLockRelease(ProcArrayLock);
254
255	/*
256	* Do the normal non-recursive ANALYZE. We can skip this for partitioned
257	* tables, which don't contain any rows.
258	*/
259	if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
260	do_analyze_rel(onerel, params, va_cols, acquirefunc,
261	relpages, false, in_outer_xact, elevel);
262
263	/*
264	* If there are child tables, do recursive ANALYZE.
265	*/
266	if (onerel->rd_rel->relhassubclass)
267	do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
268	true, in_outer_xact, elevel);
269
270	/*
271	* Close source relation now, but keep lock so that no one deletes it
272	* before we commit. (If someone did, they'd fail to clean up the entries
273	* we made in pg_statistic. Also, releasing the lock before commit would
274	* expose us to concurrent-update failures in update_attstats.)
275	*/
276	relation_close(onerel, NoLock);
277
278	/*
279	* Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel,
280	* because the vacuum flag is cleared by the end-of-xact code.
281	*/
282	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
283	MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE;
284	LWLockRelease(ProcArrayLock);
285	}
286
287	/*
288	* do_analyze_rel() -- analyze one relation, recursively or not
289	*
290	* Note that "acquirefunc" is only relevant for the non-inherited case.
291	* For the inherited case, acquire_inherited_sample_rows() determines the
292	* appropriate acquirefunc for each child table.
293	*/
294	static void
295	do_analyze_rel(Relation onerel, VacuumParams *params,
296	List *va_cols, AcquireSampleRowsFunc acquirefunc,
297	BlockNumber relpages, bool inh, bool in_outer_xact,
298	int elevel)
299	{
300	int attr_cnt,
301	tcnt,
302	i,
303	ind;
304	Relation *Irel;
305	int nindexes;
306	bool hasindex;
307	VacAttrStats **vacattrstats;
308	AnlIndexData *indexdata;
309	int targrows,
310	numrows;
311	double totalrows,
312	totaldeadrows;
313	HeapTuple *rows;
314	PGRUsage ru0;
315	TimestampTz starttime = `0`;
316	MemoryContext caller_context;
317	Oid save_userid;
318	int save_sec_context;
319	int save_nestlevel;
320
321	if (inh)
322	ereport(elevel,
323	(errmsg("analyzing \"%s.%s\" inheritance tree",
324	get_namespace_name(RelationGetNamespace(onerel)),
325	RelationGetRelationName(onerel))));
326	else
327	ereport(elevel,
328	(errmsg("analyzing \"%s.%s\"",
329	get_namespace_name(RelationGetNamespace(onerel)),
330	RelationGetRelationName(onerel))));
331
332	/*
333	* Set up a working context so that we can easily free whatever junk gets
334	* created.
335	*/
336	anl_context = AllocSetContextCreate(CurrentMemoryContext,
337	"Analyze",
338	ALLOCSET_DEFAULT_SIZES);
339	caller_context = MemoryContextSwitchTo(anl_context);
340
341	/*
342	* Switch to the table owner's userid, so that any index functions are run
343	* as that user. Also lock down security-restricted operations and
344	* arrange to make GUC variable changes local to this command.
345	*/
346	GetUserIdAndSecContext(&save_userid, &save_sec_context);
347	SetUserIdAndSecContext(onerel->rd_rel->relowner,
348	save_sec_context \| SECURITY_RESTRICTED_OPERATION);
349	save_nestlevel = NewGUCNestLevel();
350
351	/ measure elapsed time iff autovacuum logging requires it /
352	if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= `0`)
353	{
354	pg_rusage_init(&ru0);
355	if (params->log_min_duration > `0`)
356	starttime = GetCurrentTimestamp();
357	}
358
359	/*
360	* Determine which columns to analyze
361	*
362	* Note that system attributes are never analyzed, so we just reject them
363	* at the lookup stage. We also reject duplicate column mentions. (We
364	* could alternatively ignore duplicates, but analyzing a column twice
365	* won't work; we'd end up making a conflicting update in pg_statistic.)
366	*/
367	if (va_cols != NIL)
368	{
369	Bitmapset *unique_cols = NULL;
370	ListCell *le;
371
372	vacattrstats = (VacAttrStats *) palloc(list_length(va_cols)
373	sizeof(VacAttrStats *));
374	tcnt = `0`;
375	foreach(le, va_cols)
376	{
377	char *col = strVal(lfirst(le));
378
379	i = attnameAttNum(onerel, col, false);
380	if (i == InvalidAttrNumber)
381	ereport(ERROR,
382	(errcode(ERRCODE_UNDEFINED_COLUMN),
383	errmsg("column \"%s\" of relation \"%s\" does not exist",
384	col, RelationGetRelationName(onerel))));
385	if (bms_is_member(i, unique_cols))
386	ereport(ERROR,
387	(errcode(ERRCODE_DUPLICATE_COLUMN),
388	errmsg("column \"%s\" of relation \"%s\" appears more than once",
389	col, RelationGetRelationName(onerel))));
390	unique_cols = bms_add_member(unique_cols, i);
391
392	vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
393	if (vacattrstats[tcnt] != NULL)
394	tcnt++;
395	}
396	attr_cnt = tcnt;
397	}
398	else
399	{
400	attr_cnt = onerel->rd_att->natts;
401	vacattrstats = (VacAttrStats **)
402	palloc(attr_cnt * sizeof(VacAttrStats *));
403	tcnt = `0`;
404	for (i = `1`; i <= attr_cnt; i++)
405	{
406	vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
407	if (vacattrstats[tcnt] != NULL)
408	tcnt++;
409	}
410	attr_cnt = tcnt;
411	}
412
413	/*
414	* Open all indexes of the relation, and see if there are any analyzable
415	* columns in the indexes. We do not analyze index columns if there was
416	* an explicit column list in the ANALYZE command, however. If we are
417	* doing a recursive scan, we don't want to touch the parent's indexes at
418	* all.
419	*/
420	if (!inh)
421	vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
422	else
423	{
424	Irel = NULL;
425	nindexes = `0`;
426	}
427	hasindex = (nindexes > `0`);
428	indexdata = NULL;
429	if (hasindex)
430	{
431	indexdata = (AnlIndexData ) palloc0(nindexes sizeof(AnlIndexData));
432	for (ind = `0`; ind < nindexes; ind++)
433	{
434	AnlIndexData *thisdata = &indexdata[ind];
435	IndexInfo *indexInfo;
436
437	thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
438	thisdata->tupleFract = `1.0`; / fix later if partial /
439	if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
440	{
441	ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
442
443	thisdata->vacattrstats = (VacAttrStats **)
444	palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
445	tcnt = `0`;
446	for (i = `0`; i < indexInfo->ii_NumIndexAttrs; i++)
447	{
448	int keycol = indexInfo->ii_IndexAttrNumbers[i];
449
450	if (keycol == `0`)
451	{
452	/ Found an index expression /
453	Node *indexkey;
454
455	if (indexpr_item == NULL) / shouldn't happen /
456	elog(ERROR, "too few entries in indexprs list");
457	indexkey = (Node *) lfirst(indexpr_item);
458	indexpr_item = lnext(indexpr_item);
459	thisdata->vacattrstats[tcnt] =
460	examine_attribute(Irel[ind], i + `1`, indexkey);
461	if (thisdata->vacattrstats[tcnt] != NULL)
462	tcnt++;
463	}
464	}
465	thisdata->attr_cnt = tcnt;
466	}
467	}
468	}
469
470	/*
471	* Determine how many rows we need to sample, using the worst case from
472	* all analyzable columns. We use a lower bound of 100 rows to avoid
473	* possible overflow in Vitter's algorithm. (Note: that will also be the
474	* target in the corner case where there are no analyzable columns.)
475	*/
476	targrows = `100`;
477	for (i = `0`; i < attr_cnt; i++)
478	{
479	if (targrows < vacattrstats[i]->minrows)
480	targrows = vacattrstats[i]->minrows;
481	}
482	for (ind = `0`; ind < nindexes; ind++)
483	{
484	AnlIndexData *thisdata = &indexdata[ind];
485
486	for (i = `0`; i < thisdata->attr_cnt; i++)
487	{
488	if (targrows < thisdata->vacattrstats[i]->minrows)
489	targrows = thisdata->vacattrstats[i]->minrows;
490	}
491	}
492
493	/*
494	* Acquire the sample rows
495	*/
496	rows = (HeapTuple ) palloc(targrows sizeof(HeapTuple));
497	if (inh)
498	numrows = acquire_inherited_sample_rows(onerel, elevel,
499	rows, targrows,
500	&totalrows, &totaldeadrows);
501	else
502	numrows = (*acquirefunc) (onerel, elevel,
503	rows, targrows,
504	&totalrows, &totaldeadrows);
505
506	/*
507	* Compute the statistics. Temporary results during the calculations for
508	* each column are stored in a child context. The calc routines are
509	* responsible to make sure that whatever they store into the VacAttrStats
510	* structure is allocated in anl_context.
511	*/
512	if (numrows > `0`)
513	{
514	MemoryContext col_context,
515	old_context;
516
517	col_context = AllocSetContextCreate(anl_context,
518	"Analyze Column",
519	ALLOCSET_DEFAULT_SIZES);
520	old_context = MemoryContextSwitchTo(col_context);
521
522	for (i = `0`; i < attr_cnt; i++)
523	{
524	VacAttrStats *stats = vacattrstats[i];
525	AttributeOpts *aopt;
526
527	stats->rows = rows;
528	stats->tupDesc = onerel->rd_att;
529	stats->compute_stats(stats,
530	std_fetch_func,
531	numrows,
532	totalrows);
533
534	/*
535	* If the appropriate flavor of the n_distinct option is
536	* specified, override with the corresponding value.
537	*/
538	aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
539	if (aopt != NULL)
540	{
541	float8 n_distinct;
542
543	n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
544	if (n_distinct != `0.0`)
545	stats->stadistinct = n_distinct;
546	}
547
548	MemoryContextResetAndDeleteChildren(col_context);
549	}
550
551	if (hasindex)
552	compute_index_stats(onerel, totalrows,
553	indexdata, nindexes,
554	rows, numrows,
555	col_context);
556
557	MemoryContextSwitchTo(old_context);
558	MemoryContextDelete(col_context);
559
560	/*
561	* Emit the completed stats rows into pg_statistic, replacing any
562	* previous statistics for the target columns. (If there are stats in
563	* pg_statistic for columns we didn't process, we leave them alone.)
564	*/
565	update_attstats(RelationGetRelid(onerel), inh,
566	attr_cnt, vacattrstats);
567
568	for (ind = `0`; ind < nindexes; ind++)
569	{
570	AnlIndexData *thisdata = &indexdata[ind];
571
572	update_attstats(RelationGetRelid(Irel[ind]), false,
573	thisdata->attr_cnt, thisdata->vacattrstats);
574	}
575
576	/*
577	* Build extended statistics (if there are any).
578	*
579	* For now we only build extended statistics on individual relations,
580	* not for relations representing inheritance trees.
581	*/
582	if (!inh)
583	BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
584	attr_cnt, vacattrstats);
585	}
586
587	/*
588	* Update pages/tuples stats in pg_class ... but not if we're doing
589	* inherited stats.
590	*/
591	if (!inh)
592	{
593	BlockNumber relallvisible;
594
595	visibilitymap_count(onerel, &relallvisible, NULL);
596
597	vac_update_relstats(onerel,
598	relpages,
599	totalrows,
600	relallvisible,
601	hasindex,
602	InvalidTransactionId,
603	InvalidMultiXactId,
604	in_outer_xact);
605	}
606
607	/*
608	* Same for indexes. Vacuum always scans all indexes, so if we're part of
609	* VACUUM ANALYZE, don't overwrite the accurate count already inserted by
610	* VACUUM.
611	*/
612	if (!inh && !(params->options & VACOPT_VACUUM))
613	{
614	for (ind = `0`; ind < nindexes; ind++)
615	{
616	AnlIndexData *thisdata = &indexdata[ind];
617	double totalindexrows;
618
619	totalindexrows = ceil(thisdata->tupleFract * totalrows);
620	vac_update_relstats(Irel[ind],
621	RelationGetNumberOfBlocks(Irel[ind]),
622	totalindexrows,
623	`0`,
624	false,
625	InvalidTransactionId,
626	InvalidMultiXactId,
627	in_outer_xact);
628	}
629	}
630
631	/*
632	* Report ANALYZE to the stats collector, too. However, if doing
633	* inherited stats we shouldn't report, because the stats collector only
634	* tracks per-table stats. Reset the changes_since_analyze counter only
635	* if we analyzed all columns; otherwise, there is still work for
636	* auto-analyze to do.
637	*/
638	if (!inh)
639	pgstat_report_analyze(onerel, totalrows, totaldeadrows,
640	(va_cols == NIL));
641
642	/ If this isn't part of VACUUM ANALYZE, let index AMs do cleanup /
643	if (!(params->options & VACOPT_VACUUM))
644	{
645	for (ind = `0`; ind < nindexes; ind++)
646	{
647	IndexBulkDeleteResult *stats;
648	IndexVacuumInfo ivinfo;
649
650	ivinfo.index = Irel[ind];
651	ivinfo.analyze_only = true;
652	ivinfo.estimated_count = true;
653	ivinfo.message_level = elevel;
654	ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
655	ivinfo.strategy = vac_strategy;
656
657	stats = index_vacuum_cleanup(&ivinfo, NULL);
658
659	if (stats)
660	pfree(stats);
661	}
662	}
663
664	/ Done with indexes /
665	vac_close_indexes(nindexes, Irel, NoLock);
666
667	/ Log the action if appropriate /
668	if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= `0`)
669	{
670	if (params->log_min_duration == `0` \|\|
671	TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
672	params->log_min_duration))
673	ereport(LOG,
674	(errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
675	get_database_name(MyDatabaseId),
676	get_namespace_name(RelationGetNamespace(onerel)),
677	RelationGetRelationName(onerel),
678	pg_rusage_show(&ru0))));
679	}
680
681	/ Roll back any GUC changes executed by index functions /
682	AtEOXact_GUC(false, save_nestlevel);
683
684	/ Restore userid and security context /
685	SetUserIdAndSecContext(save_userid, save_sec_context);
686
687	/ Restore current context and release memory /
688	MemoryContextSwitchTo(caller_context);
689	MemoryContextDelete(anl_context);
690	anl_context = NULL;
691	}
692
693	/*
694	* Compute statistics about indexes of a relation
695	*/
696	static void
697	compute_index_stats(Relation onerel, double totalrows,
698	AnlIndexData indexdata, int* nindexes,
699	HeapTuple rows, int* numrows,
700	MemoryContext col_context)
701	{
702	MemoryContext ind_context,
703	old_context;
704	Datum values[INDEX_MAX_KEYS];
705	bool isnull[INDEX_MAX_KEYS];
706	int ind,
707	i;
708
709	ind_context = AllocSetContextCreate(anl_context,
710	"Analyze Index",
711	ALLOCSET_DEFAULT_SIZES);
712	old_context = MemoryContextSwitchTo(ind_context);
713
714	for (ind = `0`; ind < nindexes; ind++)
715	{
716	AnlIndexData *thisdata = &indexdata[ind];
717	IndexInfo *indexInfo = thisdata->indexInfo;
718	int attr_cnt = thisdata->attr_cnt;
719	TupleTableSlot *slot;
720	EState *estate;
721	ExprContext *econtext;
722	ExprState *predicate;
723	Datum *exprvals;
724	bool *exprnulls;
725	int numindexrows,
726	tcnt,
727	rowno;
728	double totalindexrows;
729
730	/ Ignore index if no columns to analyze and not partial /
731	if (attr_cnt == `0` && indexInfo->ii_Predicate == NIL)
732	continue;
733
734	/*
735	* Need an EState for evaluation of index expressions and
736	* partial-index predicates. Create it in the per-index context to be
737	* sure it gets cleaned up at the bottom of the loop.
738	*/
739	estate = CreateExecutorState();
740	econtext = GetPerTupleExprContext(estate);
741	/ Need a slot to hold the current heap tuple, too /
742	slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
743	&TTSOpsHeapTuple);
744
745	/ Arrange for econtext's scan tuple to be the tuple under test /
746	econtext->ecxt_scantuple = slot;
747
748	/ Set up execution state for predicate. /
749	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
750
751	/ Compute and save index expression values /
752	exprvals = (Datum ) palloc(numrows attr_cnt * sizeof(Datum));
753	exprnulls = (bool ) palloc(numrows attr_cnt * sizeof(bool));
754	numindexrows = `0`;
755	tcnt = `0`;
756	for (rowno = `0`; rowno < numrows; rowno++)
757	{
758	HeapTuple heapTuple = rows[rowno];
759
760	vacuum_delay_point();
761
762	/*
763	* Reset the per-tuple context each time, to reclaim any cruft
764	* left behind by evaluating the predicate or index expressions.
765	*/
766	ResetExprContext(econtext);
767
768	/ Set up for predicate or expression evaluation /
769	ExecStoreHeapTuple(heapTuple, slot, false);
770
771	/ If index is partial, check predicate /
772	if (predicate != NULL)
773	{
774	if (!ExecQual(predicate, econtext))
775	continue;
776	}
777	numindexrows++;
778
779	if (attr_cnt > `0`)
780	{
781	/*
782	* Evaluate the index row to compute expression values. We
783	* could do this by hand, but FormIndexDatum is convenient.
784	*/
785	FormIndexDatum(indexInfo,
786	slot,
787	estate,
788	values,
789	isnull);
790
791	/*
792	* Save just the columns we care about. We copy the values
793	* into ind_context from the estate's per-tuple context.
794	*/
795	for (i = `0`; i < attr_cnt; i++)
796	{
797	VacAttrStats *stats = thisdata->vacattrstats[i];
798	int attnum = stats->attr->attnum;
799
800	if (isnull[attnum - `1`])
801	{
802	exprvals[tcnt] = (Datum) `0`;
803	exprnulls[tcnt] = true;
804	}
805	else
806	{
807	exprvals[tcnt] = datumCopy(values[attnum - `1`],
808	stats->attrtype->typbyval,
809	stats->attrtype->typlen);
810	exprnulls[tcnt] = false;
811	}
812	tcnt++;
813	}
814	}
815	}
816
817	/*
818	* Having counted the number of rows that pass the predicate in the
819	* sample, we can estimate the total number of rows in the index.
820	*/
821	thisdata->tupleFract = (double) numindexrows / (double) numrows;
822	totalindexrows = ceil(thisdata->tupleFract * totalrows);
823
824	/*
825	* Now we can compute the statistics for the expression columns.
826	*/
827	if (numindexrows > `0`)
828	{
829	MemoryContextSwitchTo(col_context);
830	for (i = `0`; i < attr_cnt; i++)
831	{
832	VacAttrStats *stats = thisdata->vacattrstats[i];
833	AttributeOpts *aopt =
834	get_attribute_options(stats->attr->attrelid,
835	stats->attr->attnum);
836
837	stats->exprvals = exprvals + i;
838	stats->exprnulls = exprnulls + i;
839	stats->rowstride = attr_cnt;
840	stats->compute_stats(stats,
841	ind_fetch_func,
842	numindexrows,
843	totalindexrows);
844
845	/*
846	* If the n_distinct option is specified, it overrides the
847	* above computation. For indices, we always use just
848	* n_distinct, not n_distinct_inherited.
849	*/
850	if (aopt != NULL && aopt->n_distinct != `0.0`)
851	stats->stadistinct = aopt->n_distinct;
852
853	MemoryContextResetAndDeleteChildren(col_context);
854	}
855	}
856
857	/ And clean up /
858	MemoryContextSwitchTo(ind_context);
859
860	ExecDropSingleTupleTableSlot(slot);
861	FreeExecutorState(estate);
862	MemoryContextResetAndDeleteChildren(ind_context);
863	}
864
865	MemoryContextSwitchTo(old_context);
866	MemoryContextDelete(ind_context);
867	}
868
869	/*
870	* examine_attribute -- pre-analysis of a single column
871	*
872	* Determine whether the column is analyzable; if so, create and initialize
873	* a VacAttrStats struct for it. If not, return NULL.
874	*
875	* If index_expr isn't NULL, then we're trying to analyze an expression index,
876	* and index_expr is the expression tree representing the column's data.
877	*/
878	static VacAttrStats *
879	examine_attribute(Relation onerel, int attnum, Node *index_expr)
880	{
881	Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - `1`);
882	HeapTuple typtuple;
883	VacAttrStats *stats;
884	int i;
885	bool ok;
886
887	/ Never analyze dropped columns /
888	if (attr->attisdropped)
889	return NULL;
890
891	/ Don't analyze column if user has specified not to /
892	if (attr->attstattarget == `0`)
893	return NULL;
894
895	/*
896	* Create the VacAttrStats struct. Note that we only have a copy of the
897	* fixed fields of the pg_attribute tuple.
898	*/
899	stats = (VacAttrStats ) palloc0(sizeof*(VacAttrStats));
900	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
901	memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
902
903	/*
904	* When analyzing an expression index, believe the expression tree's type
905	* not the column datatype --- the latter might be the opckeytype storage
906	* type of the opclass, which is not interesting for our purposes. (Note:
907	* if we did anything with non-expression index columns, we'd need to
908	* figure out where to get the correct type info from, but for now that's
909	* not a problem.) It's not clear whether anyone will care about the
910	* typmod, but we store that too just in case.
911	*/
912	if (index_expr)
913	{
914	stats->attrtypid = exprType(index_expr);
915	stats->attrtypmod = exprTypmod(index_expr);
916
917	/*
918	* If a collation has been specified for the index column, use that in
919	* preference to anything else; but if not, fall back to whatever we
920	* can get from the expression.
921	*/
922	if (OidIsValid(onerel->rd_indcollation[attnum - `1`]))
923	stats->attrcollid = onerel->rd_indcollation[attnum - `1`];
924	else
925	stats->attrcollid = exprCollation(index_expr);
926	}
927	else
928	{
929	stats->attrtypid = attr->atttypid;
930	stats->attrtypmod = attr->atttypmod;
931	stats->attrcollid = attr->attcollation;
932	}
933
934	typtuple = SearchSysCacheCopy1(TYPEOID,
935	ObjectIdGetDatum(stats->attrtypid));
936	if (!HeapTupleIsValid(typtuple))
937	elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
938	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
939	stats->anl_context = anl_context;
940	stats->tupattnum = attnum;
941
942	/*
943	* The fields describing the stats->stavalues[n] element types default to
944	* the type of the data being analyzed, but the type-specific typanalyze
945	* function can change them if it wants to store something else.
946	*/
947	for (i = `0`; i < STATISTIC_NUM_SLOTS; i++)
948	{
949	stats->statypid[i] = stats->attrtypid;
950	stats->statyplen[i] = stats->attrtype->typlen;
951	stats->statypbyval[i] = stats->attrtype->typbyval;
952	stats->statypalign[i] = stats->attrtype->typalign;
953	}
954
955	/*
956	* Call the type-specific typanalyze function. If none is specified, use
957	* std_typanalyze().
958	*/
959	if (OidIsValid(stats->attrtype->typanalyze))
960	ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
961	PointerGetDatum(stats)));
962	else
963	ok = std_typanalyze(stats);
964
965	if (!ok \|\| stats->compute_stats == NULL \|\| stats->minrows <= `0`)
966	{
967	heap_freetuple(typtuple);
968	pfree(stats->attr);
969	pfree(stats);
970	return NULL;
971	}
972
973	return stats;
974	}
975
976	/*
977	* acquire_sample_rows -- acquire a random sample of rows from the table
978	*
979	* Selected rows are returned in the caller-allocated array rows[], which
980	* must have at least targrows entries.
981	* The actual number of rows selected is returned as the function result.
982	* We also estimate the total numbers of live and dead rows in the table,
983	* and return them into totalrows and totaldeadrows, respectively.
984	*
985	* The returned list of tuples is in order by physical position in the table.
986	* (We will rely on this later to derive correlation estimates.)
987	*
988	* As of May 2004 we use a new two-stage method: Stage one selects up
989	* to targrows random blocks (or all blocks, if there aren't so many).
990	* Stage two scans these blocks and uses the Vitter algorithm to create
991	* a random sample of targrows rows (or less, if there are less in the
992	* sample of blocks). The two stages are executed simultaneously: each
993	* block is processed as soon as stage one returns its number and while
994	* the rows are read stage two controls which ones are to be inserted
995	* into the sample.
996	*
997	* Although every row has an equal chance of ending up in the final
998	* sample, this sampling method is not perfect: not every possible
999	* sample has an equal chance of being selected. For large relations
1000	* the number of different blocks represented by the sample tends to be
1001	* too small. We can live with that for now. Improvements are welcome.
1002	*
1003	* An important property of this sampling method is that because we do
1004	* look at a statistically unbiased set of blocks, we should get
1005	* unbiased estimates of the average numbers of live and dead rows per
1006	* block. The previous sampling method put too much credence in the row
1007	* density near the start of the table.
1008	*/
1009	static int
1010	acquire_sample_rows(Relation onerel, int elevel,
1011	HeapTuple rows, int* targrows,
1012	double totalrows, double* *totaldeadrows)
1013	{
1014	int numrows = `0`; / # rows now in reservoir /
1015	double samplerows = `0`; / total # rows collected /
1016	double liverows = `0`; / # live rows seen /
1017	double deadrows = `0`; / # dead rows seen /
1018	double rowstoskip = -`1`; / -1 means not set yet /
1019	BlockNumber totalblocks;
1020	TransactionId OldestXmin;
1021	BlockSamplerData bs;
1022	ReservoirStateData rstate;
1023	TupleTableSlot *slot;
1024	TableScanDesc scan;
1025
1026	Assert(targrows > `0`);
1027
1028	totalblocks = RelationGetNumberOfBlocks(onerel);
1029
1030	/ Need a cutoff xmin for HeapTupleSatisfiesVacuum /
1031	OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM);
1032
1033	/ Prepare for sampling block numbers /
1034	BlockSampler_Init(&bs, totalblocks, targrows, random());
1035	/ Prepare for sampling rows /
1036	reservoir_init_selection_state(&rstate, targrows);
1037
1038	scan = table_beginscan_analyze(onerel);
1039	slot = table_slot_create(onerel, NULL);
1040
1041	/ Outer loop over blocks to sample /
1042	while (BlockSampler_HasMore(&bs))
1043	{
1044	BlockNumber targblock = BlockSampler_Next(&bs);
1045
1046	vacuum_delay_point();
1047
1048	if (!table_scan_analyze_next_block(scan, targblock, vac_strategy))
1049	continue;
1050
1051	while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
1052	{
1053	/*
1054	* The first targrows sample rows are simply copied into the
1055	* reservoir. Then we start replacing tuples in the sample until
1056	* we reach the end of the relation. This algorithm is from Jeff
1057	* Vitter's paper (see full citation in utils/misc/sampling.c). It
1058	* works by repeatedly computing the number of tuples to skip
1059	* before selecting a tuple, which replaces a randomly chosen
1060	* element of the reservoir (current set of tuples). At all times
1061	* the reservoir is a true random sample of the tuples we've
1062	* passed over so far, so when we fall off the end of the relation
1063	* we're done.
1064	*/
1065	if (numrows < targrows)
1066	rows[numrows++] = ExecCopySlotHeapTuple(slot);
1067	else
1068	{
1069	/*
1070	* t in Vitter's paper is the number of records already
1071	* processed. If we need to compute a new S value, we must
1072	* use the not-yet-incremented value of samplerows as t.
1073	*/
1074	if (rowstoskip < `0`)
1075	rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
1076
1077	if (rowstoskip <= `0`)
1078	{
1079	/*
1080	* Found a suitable tuple, so save it, replacing one old
1081	* tuple at random
1082	*/
1083	int k = (int) (targrows * sampler_random_fract(rstate.randstate));
1084
1085	Assert(k >= `0` && k < targrows);
1086	heap_freetuple(rows[k]);
1087	rows[k] = ExecCopySlotHeapTuple(slot);
1088	}
1089
1090	rowstoskip -= `1`;
1091	}
1092
1093	samplerows += `1`;
1094	}
1095	}
1096
1097	ExecDropSingleTupleTableSlot(slot);
1098	table_endscan(scan);
1099
1100	/*
1101	* If we didn't find as many tuples as we wanted then we're done. No sort
1102	* is needed, since they're already in order.
1103	*
1104	* Otherwise we need to sort the collected tuples by position
1105	* (itempointer). It's not worth worrying about corner cases where the
1106	* tuples are already sorted.
1107	*/
1108	if (numrows == targrows)
1109	qsort((void ) rows, numrows, sizeof*(HeapTuple), compare_rows);
1110
1111	/*
1112	* Estimate total numbers of live and dead rows in relation, extrapolating
1113	* on the assumption that the average tuple density in pages we didn't
1114	* scan is the same as in the pages we did scan. Since what we scanned is
1115	* a random sample of the pages in the relation, this should be a good
1116	* assumption.
1117	*/
1118	if (bs.m > `0`)
1119	{
1120	totalrows = floor((liverows / bs.m) totalblocks + `0.5`);
1121	totaldeadrows = floor((deadrows / bs.m) totalblocks + `0.5`);
1122	}
1123	else
1124	{
1125	*totalrows = `0.0`;
1126	*totaldeadrows = `0.0`;
1127	}
1128
1129	/*
1130	* Emit some interesting relation info
1131	*/
1132	ereport(elevel,
1133	(errmsg("\"%s\": scanned %d of %u pages, "
1134	"containing %.0f live rows and %.0f dead rows; "
1135	"%d rows in sample, %.0f estimated total rows",
1136	RelationGetRelationName(onerel),
1137	bs.m, totalblocks,
1138	liverows, deadrows,
1139	numrows, *totalrows)));
1140
1141	return numrows;
1142	}
1143
1144	/*
1145	* qsort comparator for sorting rows[] array
1146	*/
1147	static int
1148	compare_rows(const void a, const* void *b)
1149	{
1150	HeapTuple ha = (const* HeapTuple *) a;
1151	HeapTuple hb = (const* HeapTuple *) b;
1152	BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
1153	OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
1154	BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
1155	OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
1156
1157	if (ba < bb)
1158	return -`1`;
1159	if (ba > bb)
1160	return `1`;
1161	if (oa < ob)
1162	return -`1`;
1163	if (oa > ob)
1164	return `1`;
1165	return `0`;
1166	}
1167
1168
1169	/*
1170	* acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1171	*
1172	* This has the same API as acquire_sample_rows, except that rows are
1173	* collected from all inheritance children as well as the specified table.
1174	* We fail and return zero if there are no inheritance children, or if all
1175	* children are foreign tables that don't support ANALYZE.
1176	*/
1177	static int
1178	acquire_inherited_sample_rows(Relation onerel, int elevel,
1179	HeapTuple rows, int* targrows,
1180	double totalrows, double* *totaldeadrows)
1181	{
1182	List *tableOIDs;
1183	Relation *rels;
1184	AcquireSampleRowsFunc *acquirefuncs;
1185	double *relblocks;
1186	double totalblocks;
1187	int numrows,
1188	nrels,
1189	i;
1190	ListCell *lc;
1191	bool has_child;
1192
1193	/*
1194	* Find all members of inheritance set. We only need AccessShareLock on
1195	* the children.
1196	*/
1197	tableOIDs =
1198	find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
1199
1200	/*
1201	* Check that there's at least one descendant, else fail. This could
1202	* happen despite analyze_rel's relhassubclass check, if table once had a
1203	* child but no longer does. In that case, we can clear the
1204	* relhassubclass field so as not to make the same mistake again later.
1205	* (This is safe because we hold ShareUpdateExclusiveLock.)
1206	*/
1207	if (list_length(tableOIDs) < `2`)
1208	{
1209	/ CCI because we already updated the pg_class row in this command /
1210	CommandCounterIncrement();
1211	SetRelationHasSubclass(RelationGetRelid(onerel), false);
1212	ereport(elevel,
1213	(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1214	get_namespace_name(RelationGetNamespace(onerel)),
1215	RelationGetRelationName(onerel))));
1216	return `0`;
1217	}
1218
1219	/*
1220	* Identify acquirefuncs to use, and count blocks in all the relations.
1221	* The result could overflow BlockNumber, so we use double arithmetic.
1222	*/
1223	rels = (Relation ) palloc(list_length(tableOIDs) sizeof(Relation));
1224	acquirefuncs = (AcquireSampleRowsFunc *)
1225	palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
1226	relblocks = (double ) palloc(list_length(tableOIDs) sizeof(double));
1227	totalblocks = `0`;
1228	nrels = `0`;
1229	has_child = false;
1230	foreach(lc, tableOIDs)
1231	{
1232	Oid childOID = lfirst_oid(lc);
1233	Relation childrel;
1234	AcquireSampleRowsFunc acquirefunc = NULL;
1235	BlockNumber relpages = `0`;
1236
1237	/ We already got the needed lock /
1238	childrel = table_open(childOID, NoLock);
1239
1240	/ Ignore if temp table of another backend /
1241	if (RELATION_IS_OTHER_TEMP(childrel))
1242	{
1243	/ ... but release the lock on it /
1244	Assert(childrel != onerel);
1245	table_close(childrel, AccessShareLock);
1246	continue;
1247	}
1248
1249	/ Check table type (MATVIEW can't happen, but might as well allow) /
1250	if (childrel->rd_rel->relkind == RELKIND_RELATION \|\|
1251	childrel->rd_rel->relkind == RELKIND_MATVIEW)
1252	{
1253	/ Regular table, so use the regular row acquisition function /
1254	acquirefunc = acquire_sample_rows;
1255	relpages = RelationGetNumberOfBlocks(childrel);
1256	}
1257	else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1258	{
1259	/*
1260	* For a foreign table, call the FDW's hook function to see
1261	* whether it supports analysis.
1262	*/
1263	FdwRoutine *fdwroutine;
1264	bool ok = false;
1265
1266	fdwroutine = GetFdwRoutineForRelation(childrel, false);
1267
1268	if (fdwroutine->AnalyzeForeignTable != NULL)
1269	ok = fdwroutine->AnalyzeForeignTable(childrel,
1270	&acquirefunc,
1271	&relpages);
1272
1273	if (!ok)
1274	{
1275	/ ignore, but release the lock on it /
1276	Assert(childrel != onerel);
1277	table_close(childrel, AccessShareLock);
1278	continue;
1279	}
1280	}
1281	else
1282	{
1283	/*
1284	* ignore, but release the lock on it. don't try to unlock the
1285	* passed-in relation
1286	*/
1287	Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
1288	if (childrel != onerel)
1289	table_close(childrel, AccessShareLock);
1290	else
1291	table_close(childrel, NoLock);
1292	continue;
1293	}
1294
1295	/ OK, we'll process this child /
1296	has_child = true;
1297	rels[nrels] = childrel;
1298	acquirefuncs[nrels] = acquirefunc;
1299	relblocks[nrels] = (double) relpages;
1300	totalblocks += (double) relpages;
1301	nrels++;
1302	}
1303
1304	/*
1305	* If we don't have at least one child table to consider, fail. If the
1306	* relation is a partitioned table, it's not counted as a child table.
1307	*/
1308	if (!has_child)
1309	{
1310	ereport(elevel,
1311	(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
1312	get_namespace_name(RelationGetNamespace(onerel)),
1313	RelationGetRelationName(onerel))));
1314	return `0`;
1315	}
1316
1317	/*
1318	* Now sample rows from each relation, proportionally to its fraction of
1319	* the total block count. (This might be less than desirable if the child
1320	* rels have radically different free-space percentages, but it's not
1321	* clear that it's worth working harder.)
1322	*/
1323	numrows = `0`;
1324	*totalrows = `0`;
1325	*totaldeadrows = `0`;
1326	for (i = `0`; i < nrels; i++)
1327	{
1328	Relation childrel = rels[i];
1329	AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
1330	double childblocks = relblocks[i];
1331
1332	if (childblocks > `0`)
1333	{
1334	int childtargrows;
1335
1336	childtargrows = (int) rint(targrows * childblocks / totalblocks);
1337	/ Make sure we don't overrun due to roundoff error /
1338	childtargrows = Min(childtargrows, targrows - numrows);
1339	if (childtargrows > `0`)
1340	{
1341	int childrows;
1342	double trows,
1343	tdrows;
1344
1345	/ Fetch a random sample of the child's rows /
1346	childrows = (*acquirefunc) (childrel, elevel,
1347	rows + numrows, childtargrows,
1348	&trows, &tdrows);
1349
1350	/ We may need to convert from child's rowtype to parent's /
1351	if (childrows > `0` &&
1352	!equalTupleDescs(RelationGetDescr(childrel),
1353	RelationGetDescr(onerel)))
1354	{
1355	TupleConversionMap *map;
1356
1357	map = convert_tuples_by_name(RelationGetDescr(childrel),
1358	RelationGetDescr(onerel),
1359	gettext_noop("could not convert row type"));
1360	if (map != NULL)
1361	{
1362	int j;
1363
1364	for (j = `0`; j < childrows; j++)
1365	{
1366	HeapTuple newtup;
1367
1368	newtup = execute_attr_map_tuple(rows[numrows + j], map);
1369	heap_freetuple(rows[numrows + j]);
1370	rows[numrows + j] = newtup;
1371	}
1372	free_conversion_map(map);
1373	}
1374	}
1375
1376	/ And add to counts /
1377	numrows += childrows;
1378	*totalrows += trows;
1379	*totaldeadrows += tdrows;
1380	}
1381	}
1382
1383	/*
1384	* Note: we cannot release the child-table locks, since we may have
1385	* pointers to their TOAST tables in the sampled rows.
1386	*/
1387	table_close(childrel, NoLock);
1388	}
1389
1390	return numrows;
1391	}
1392
1393
1394	/*
1395	* update_attstats() -- update attribute statistics for one relation
1396	*
1397	* Statistics are stored in several places: the pg_class row for the
1398	* relation has stats about the whole relation, and there is a
1399	* pg_statistic row for each (non-system) attribute that has ever
1400	* been analyzed. The pg_class values are updated by VACUUM, not here.
1401	*
1402	* pg_statistic rows are just added or updated normally. This means
1403	* that pg_statistic will probably contain some deleted rows at the
1404	* completion of a vacuum cycle, unless it happens to get vacuumed last.
1405	*
1406	* To keep things simple, we punt for pg_statistic, and don't try
1407	* to compute or store rows for pg_statistic itself in pg_statistic.
1408	* This could possibly be made to work, but it's not worth the trouble.
1409	* Note analyze_rel() has seen to it that we won't come here when
1410	* vacuuming pg_statistic itself.
1411	*
1412	* Note: there would be a race condition here if two backends could
1413	* ANALYZE the same table concurrently. Presently, we lock that out
1414	* by taking a self-exclusive lock on the relation in analyze_rel().
1415	*/
1416	static void
1417	update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
1418	{
1419	Relation sd;
1420	int attno;
1421
1422	if (natts <= `0`)
1423	return; / nothing to do /
1424
1425	sd = table_open(StatisticRelationId, RowExclusiveLock);
1426
1427	for (attno = `0`; attno < natts; attno++)
1428	{
1429	VacAttrStats *stats = vacattrstats[attno];
1430	HeapTuple stup,
1431	oldtup;
1432	int i,
1433	k,
1434	n;
1435	Datum values[Natts_pg_statistic];
1436	bool nulls[Natts_pg_statistic];
1437	bool replaces[Natts_pg_statistic];
1438
1439	/ Ignore attr if we weren't able to collect stats /
1440	if (!stats->stats_valid)
1441	continue;
1442
1443	/*
1444	* Construct a new pg_statistic tuple
1445	*/
1446	for (i = `0`; i < Natts_pg_statistic; ++i)
1447	{
1448	nulls[i] = false;
1449	replaces[i] = true;
1450	}
1451
1452	values[Anum_pg_statistic_starelid - `1`] = ObjectIdGetDatum(relid);
1453	values[Anum_pg_statistic_staattnum - `1`] = Int16GetDatum(stats->attr->attnum);
1454	values[Anum_pg_statistic_stainherit - `1`] = BoolGetDatum(inh);
1455	values[Anum_pg_statistic_stanullfrac - `1`] = Float4GetDatum(stats->stanullfrac);
1456	values[Anum_pg_statistic_stawidth - `1`] = Int32GetDatum(stats->stawidth);
1457	values[Anum_pg_statistic_stadistinct - `1`] = Float4GetDatum(stats->stadistinct);
1458	i = Anum_pg_statistic_stakind1 - `1`;
1459	for (k = `0`; k < STATISTIC_NUM_SLOTS; k++)
1460	{
1461	values[i++] = Int16GetDatum(stats->stakind[k]); / stakindN /
1462	}
1463	i = Anum_pg_statistic_staop1 - `1`;
1464	for (k = `0`; k < STATISTIC_NUM_SLOTS; k++)
1465	{
1466	values[i++] = ObjectIdGetDatum(stats->staop[k]); / staopN /
1467	}
1468	i = Anum_pg_statistic_stacoll1 - `1`;
1469	for (k = `0`; k < STATISTIC_NUM_SLOTS; k++)
1470	{
1471	values[i++] = ObjectIdGetDatum(stats->stacoll[k]); / stacollN /
1472	}
1473	i = Anum_pg_statistic_stanumbers1 - `1`;
1474	for (k = `0`; k < STATISTIC_NUM_SLOTS; k++)
1475	{
1476	int nnum = stats->numnumbers[k];
1477
1478	if (nnum > `0`)
1479	{
1480	Datum numdatums = (Datum ) palloc(nnum * sizeof(Datum));
1481	ArrayType *arry;
1482
1483	for (n = `0`; n < nnum; n++)
1484	numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
1485	/ XXX knows more than it should about type float4: /
1486	arry = construct_array(numdatums, nnum,
1487	FLOAT4OID,
1488	sizeof(float4), FLOAT4PASSBYVAL, `'i'`);
1489	values[i++] = PointerGetDatum(arry); / stanumbersN /
1490	}
1491	else
1492	{
1493	nulls[i] = true;
1494	values[i++] = (Datum) `0`;
1495	}
1496	}
1497	i = Anum_pg_statistic_stavalues1 - `1`;
1498	for (k = `0`; k < STATISTIC_NUM_SLOTS; k++)
1499	{
1500	if (stats->numvalues[k] > `0`)
1501	{
1502	ArrayType *arry;
1503
1504	arry = construct_array(stats->stavalues[k],
1505	stats->numvalues[k],
1506	stats->statypid[k],
1507	stats->statyplen[k],
1508	stats->statypbyval[k],
1509	stats->statypalign[k]);
1510	values[i++] = PointerGetDatum(arry); / stavaluesN /
1511	}
1512	else
1513	{
1514	nulls[i] = true;
1515	values[i++] = (Datum) `0`;
1516	}
1517	}
1518
1519	/ Is there already a pg_statistic tuple for this attribute? /
1520	oldtup = SearchSysCache3(STATRELATTINH,
1521	ObjectIdGetDatum(relid),
1522	Int16GetDatum(stats->attr->attnum),
1523	BoolGetDatum(inh));
1524
1525	if (HeapTupleIsValid(oldtup))
1526	{
1527	/ Yes, replace it /
1528	stup = heap_modify_tuple(oldtup,
1529	RelationGetDescr(sd),
1530	values,
1531	nulls,
1532	replaces);
1533	ReleaseSysCache(oldtup);
1534	CatalogTupleUpdate(sd, &stup->t_self, stup);
1535	}
1536	else
1537	{
1538	/ No, insert new tuple /
1539	stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
1540	CatalogTupleInsert(sd, stup);
1541	}
1542
1543	heap_freetuple(stup);
1544	}
1545
1546	table_close(sd, RowExclusiveLock);
1547	}
1548
1549	/*
1550	* Standard fetch function for use by compute_stats subroutines.
1551	*
1552	* This exists to provide some insulation between compute_stats routines
1553	* and the actual storage of the sample data.
1554	*/
1555	static Datum
1556	std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1557	{
1558	int attnum = stats->tupattnum;
1559	HeapTuple tuple = stats->rows[rownum];
1560	TupleDesc tupDesc = stats->tupDesc;
1561
1562	return heap_getattr(tuple, attnum, tupDesc, isNull);
1563	}
1564
1565	/*
1566	* Fetch function for analyzing index expressions.
1567	*
1568	* We have not bothered to construct index tuples, instead the data is
1569	* just in Datum arrays.
1570	*/
1571	static Datum
1572	ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1573	{
1574	int i;
1575
1576	/ exprvals and exprnulls are already offset for proper column /
1577	i = rownum * stats->rowstride;
1578	*isNull = stats->exprnulls[i];
1579	return stats->exprvals[i];
1580	}
1581
1582
1583	/==========================================================================*
1584	*
1585	* Code below this point represents the "standard" type-specific statistics
1586	* analysis algorithms. This code can be replaced on a per-data-type basis
1587	* by setting a nonzero value in pg_type.typanalyze.
1588	*
1589	*==========================================================================
1590	*/
1591
1592
1593	/*
1594	* To avoid consuming too much memory during analysis and/or too much space
1595	* in the resulting pg_statistic rows, we ignore varlena datums that are wider
1596	* than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
1597	* and distinct-value calculations since a wide value is unlikely to be
1598	* duplicated at all, much less be a most-common value. For the same reason,
1599	* ignoring wide values will not affect our estimates of histogram bin
1600	* boundaries very much.
1601	*/
1602	#define WIDTH_THRESHOLD 1024
1603
1604	#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1605	#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1606
1607	/*
1608	* Extra information used by the default analysis routines
1609	*/
1610	typedef struct
1611	{
1612	int count; / # of duplicates /
1613	int first; / values[] index of first occurrence /
1614	} ScalarMCVItem;
1615
1616	typedef struct
1617	{
1618	SortSupport ssup;
1619	int *tupnoLink;
1620	} CompareScalarsContext;
1621
1622
1623	static void compute_trivial_stats(VacAttrStatsP stats,
1624	AnalyzeAttrFetchFunc fetchfunc,
1625	int samplerows,
1626	double totalrows);
1627	static void compute_distinct_stats(VacAttrStatsP stats,
1628	AnalyzeAttrFetchFunc fetchfunc,
1629	int samplerows,
1630	double totalrows);
1631	static void compute_scalar_stats(VacAttrStatsP stats,
1632	AnalyzeAttrFetchFunc fetchfunc,
1633	int samplerows,
1634	double totalrows);
1635	static int compare_scalars(const void a, const* void b, void* *arg);
1636	static int compare_mcvs(const void a, const* void *b);
1637	static int analyze_mcv_list(int *mcv_counts,
1638	int num_mcv,
1639	double stadistinct,
1640	double stanullfrac,
1641	int samplerows,
1642	double totalrows);
1643
1644
1645	/*
1646	* std_typanalyze -- the default type-specific typanalyze function
1647	*/
1648	bool
1649	std_typanalyze(VacAttrStats *stats)
1650	{
1651	Form_pg_attribute attr = stats->attr;
1652	Oid ltopr;
1653	Oid eqopr;
1654	StdAnalyzeData *mystats;
1655
1656	/ If the attstattarget column is negative, use the default value /
1657	/ NB: it is okay to scribble on stats->attr since it's a copy /
1658	if (attr->attstattarget < `0`)
1659	attr->attstattarget = default_statistics_target;
1660
1661	/ Look for default "<" and "=" operators for column's type /
1662	get_sort_group_operators(stats->attrtypid,
1663	false, false, false,
1664	&ltopr, &eqopr, NULL,
1665	NULL);
1666
1667	/ Save the operator info for compute_stats routines /
1668	mystats = (StdAnalyzeData ) palloc(sizeof*(StdAnalyzeData));
1669	mystats->eqopr = eqopr;
1670	mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
1671	mystats->ltopr = ltopr;
1672	stats->extra_data = mystats;
1673
1674	/*
1675	* Determine which standard statistics algorithm to use
1676	*/
1677	if (OidIsValid(eqopr) && OidIsValid(ltopr))
1678	{
1679	/ Seems to be a scalar datatype /
1680	stats->compute_stats = compute_scalar_stats;
1681	/--------------------*
1682	* The following choice of minrows is based on the paper
1683	* "Random sampling for histogram construction: how much is enough?"
1684	* by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1685	* Proceedings of ACM SIGMOD International Conference on Management
1686	* of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
1687	* says that for table size n, histogram size k, maximum relative
1688	* error in bin size f, and error probability gamma, the minimum
1689	* random sample size is
1690	* r = 4 * k * ln(2*n/gamma) / f^2
1691	* Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1692	* r = 305.82 * k
1693	* Note that because of the log function, the dependence on n is
1694	* quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1695	* bin size error with probability 0.99. So there's no real need to
1696	* scale for n, which is a good thing because we don't necessarily
1697	* know it at this point.
1698	*--------------------
1699	*/
1700	stats->minrows = `300` * attr->attstattarget;
1701	}
1702	else if (OidIsValid(eqopr))
1703	{
1704	/ We can still recognize distinct values /
1705	stats->compute_stats = compute_distinct_stats;
1706	/ Might as well use the same minrows as above /
1707	stats->minrows = `300` * attr->attstattarget;
1708	}
1709	else
1710	{
1711	/ Can't do much but the trivial stuff /
1712	stats->compute_stats = compute_trivial_stats;
1713	/ Might as well use the same minrows as above /
1714	stats->minrows = `300` * attr->attstattarget;
1715	}
1716
1717	return true;
1718	}
1719
1720
1721	/*
1722	* compute_trivial_stats() -- compute very basic column statistics
1723	*
1724	* We use this when we cannot find a hash "=" operator for the datatype.
1725	*
1726	* We determine the fraction of non-null rows and the average datum width.
1727	*/
1728	static void
1729	compute_trivial_stats(VacAttrStatsP stats,
1730	AnalyzeAttrFetchFunc fetchfunc,
1731	int samplerows,
1732	double totalrows)
1733	{
1734	int i;
1735	int null_cnt = `0`;
1736	int nonnull_cnt = `0`;
1737	double total_width = `0`;
1738	bool is_varlena = (!stats->attrtype->typbyval &&
1739	stats->attrtype->typlen == -`1`);
1740	bool is_varwidth = (!stats->attrtype->typbyval &&
1741	stats->attrtype->typlen < `0`);
1742
1743	for (i = `0`; i < samplerows; i++)
1744	{
1745	Datum value;
1746	bool isnull;
1747
1748	vacuum_delay_point();
1749
1750	value = fetchfunc(stats, i, &isnull);
1751
1752	/ Check for null/nonnull /
1753	if (isnull)
1754	{
1755	null_cnt++;
1756	continue;
1757	}
1758	nonnull_cnt++;
1759
1760	/*
1761	* If it's a variable-width field, add up widths for average width
1762	* calculation. Note that if the value is toasted, we use the toasted
1763	* width. We don't bother with this calculation if it's a fixed-width
1764	* type.
1765	*/
1766	if (is_varlena)
1767	{
1768	total_width += VARSIZE_ANY(DatumGetPointer(value));
1769	}
1770	else if (is_varwidth)
1771	{
1772	/ must be cstring /
1773	total_width += strlen(DatumGetCString(value)) + `1`;
1774	}
1775	}
1776
1777	/ We can only compute average width if we found some non-null values. /
1778	if (nonnull_cnt > `0`)
1779	{
1780	stats->stats_valid = true;
1781	/ Do the simple null-frac and width stats /
1782	stats->stanullfrac = (double) null_cnt / (double) samplerows;
1783	if (is_varwidth)
1784	stats->stawidth = total_width / (double) nonnull_cnt;
1785	else
1786	stats->stawidth = stats->attrtype->typlen;
1787	stats->stadistinct = `0.0`; / "unknown" /
1788	}
1789	else if (null_cnt > `0`)
1790	{
1791	/ We found only nulls; assume the column is entirely null /
1792	stats->stats_valid = true;
1793	stats->stanullfrac = `1.0`;
1794	if (is_varwidth)
1795	stats->stawidth = `0`; / "unknown" /
1796	else
1797	stats->stawidth = stats->attrtype->typlen;
1798	stats->stadistinct = `0.0`; / "unknown" /
1799	}
1800	}
1801
1802
1803	/*
1804	* compute_distinct_stats() -- compute column statistics including ndistinct
1805	*
1806	* We use this when we can find only an "=" operator for the datatype.
1807	*
1808	* We determine the fraction of non-null rows, the average width, the
1809	* most common values, and the (estimated) number of distinct values.
1810	*
1811	* The most common values are determined by brute force: we keep a list
1812	* of previously seen values, ordered by number of times seen, as we scan
1813	* the samples. A newly seen value is inserted just after the last
1814	* multiply-seen value, causing the bottommost (oldest) singly-seen value
1815	* to drop off the list. The accuracy of this method, and also its cost,
1816	* depend mainly on the length of the list we are willing to keep.
1817	*/
1818	static void
1819	compute_distinct_stats(VacAttrStatsP stats,
1820	AnalyzeAttrFetchFunc fetchfunc,
1821	int samplerows,
1822	double totalrows)
1823	{
1824	int i;
1825	int null_cnt = `0`;
1826	int nonnull_cnt = `0`;
1827	int toowide_cnt = `0`;
1828	double total_width = `0`;
1829	bool is_varlena = (!stats->attrtype->typbyval &&
1830	stats->attrtype->typlen == -`1`);
1831	bool is_varwidth = (!stats->attrtype->typbyval &&
1832	stats->attrtype->typlen < `0`);
1833	FmgrInfo f_cmpeq;
1834	typedef struct
1835	{
1836	Datum value;
1837	int count;
1838	} TrackItem;
1839	TrackItem *track;
1840	int track_cnt,
1841	track_max;
1842	int num_mcv = stats->attr->attstattarget;
1843	StdAnalyzeData mystats = (StdAnalyzeData ) stats->extra_data;
1844
1845	/*
1846	* We track up to 2*n values for an n-element MCV list; but at least 10
1847	*/
1848	track_max = `2` * num_mcv;
1849	if (track_max < `10`)
1850	track_max = `10`;
1851	track = (TrackItem ) palloc(track_max sizeof(TrackItem));
1852	track_cnt = `0`;
1853
1854	fmgr_info(mystats->eqfunc, &f_cmpeq);
1855
1856	for (i = `0`; i < samplerows; i++)
1857	{
1858	Datum value;
1859	bool isnull;
1860	bool match;
1861	int firstcount1,
1862	j;
1863
1864	vacuum_delay_point();
1865
1866	value = fetchfunc(stats, i, &isnull);
1867
1868	/ Check for null/nonnull /
1869	if (isnull)
1870	{
1871	null_cnt++;
1872	continue;
1873	}
1874	nonnull_cnt++;
1875
1876	/*
1877	* If it's a variable-width field, add up widths for average width
1878	* calculation. Note that if the value is toasted, we use the toasted
1879	* width. We don't bother with this calculation if it's a fixed-width
1880	* type.
1881	*/
1882	if (is_varlena)
1883	{
1884	total_width += VARSIZE_ANY(DatumGetPointer(value));
1885
1886	/*
1887	* If the value is toasted, we want to detoast it just once to
1888	* avoid repeated detoastings and resultant excess memory usage
1889	* during the comparisons. Also, check to see if the value is
1890	* excessively wide, and if so don't detoast at all --- just
1891	* ignore the value.
1892	*/
1893	if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
1894	{
1895	toowide_cnt++;
1896	continue;
1897	}
1898	value = PointerGetDatum(PG_DETOAST_DATUM(value));
1899	}
1900	else if (is_varwidth)
1901	{
1902	/ must be cstring /
1903	total_width += strlen(DatumGetCString(value)) + `1`;
1904	}
1905
1906	/*
1907	* See if the value matches anything we're already tracking.
1908	*/
1909	match = false;
1910	firstcount1 = track_cnt;
1911	for (j = `0`; j < track_cnt; j++)
1912	{
1913	if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
1914	stats->attrcollid,
1915	value, track[j].value)))
1916	{
1917	match = true;
1918	break;
1919	}
1920	if (j < firstcount1 && track[j].count == `1`)
1921	firstcount1 = j;
1922	}
1923
1924	if (match)
1925	{
1926	/ Found a match /
1927	track[j].count++;
1928	/ This value may now need to "bubble up" in the track list /
1929	while (j > `0` && track[j].count > track[j - `1`].count)
1930	{
1931	swapDatum(track[j].value, track[j - `1`].value);
1932	swapInt(track[j].count, track[j - `1`].count);
1933	j--;
1934	}
1935	}
1936	else
1937	{
1938	/ No match. Insert at head of count-1 list /
1939	if (track_cnt < track_max)
1940	track_cnt++;
1941	for (j = track_cnt - `1`; j > firstcount1; j--)
1942	{
1943	track[j].value = track[j - `1`].value;
1944	track[j].count = track[j - `1`].count;
1945	}
1946	if (firstcount1 < track_cnt)
1947	{
1948	track[firstcount1].value = value;
1949	track[firstcount1].count = `1`;
1950	}
1951	}
1952	}
1953
1954	/ We can only compute real stats if we found some non-null values. /
1955	if (nonnull_cnt > `0`)
1956	{
1957	int nmultiple,
1958	summultiple;
1959
1960	stats->stats_valid = true;
1961	/ Do the simple null-frac and width stats /
1962	stats->stanullfrac = (double) null_cnt / (double) samplerows;
1963	if (is_varwidth)
1964	stats->stawidth = total_width / (double) nonnull_cnt;
1965	else
1966	stats->stawidth = stats->attrtype->typlen;
1967
1968	/ Count the number of values we found multiple times /
1969	summultiple = `0`;
1970	for (nmultiple = `0`; nmultiple < track_cnt; nmultiple++)
1971	{
1972	if (track[nmultiple].count == `1`)
1973	break;
1974	summultiple += track[nmultiple].count;
1975	}
1976
1977	if (nmultiple == `0`)
1978	{
1979	/*
1980	* If we found no repeated non-null values, assume it's a unique
1981	* column; but be sure to discount for any nulls we found.
1982	*/
1983	stats->stadistinct = -`1.0` * (`1.0` - stats->stanullfrac);
1984	}
1985	else if (track_cnt < track_max && toowide_cnt == `0` &&
1986	nmultiple == track_cnt)
1987	{
1988	/*
1989	* Our track list includes every value in the sample, and every
1990	* value appeared more than once. Assume the column has just
1991	* these values. (This case is meant to address columns with
1992	* small, fixed sets of possible values, such as boolean or enum
1993	* columns. If there are any values that appear just once in the
1994	* sample, including too-wide values, we should assume that that's
1995	* not what we're dealing with.)
1996	*/
1997	stats->stadistinct = track_cnt;
1998	}
1999	else
2000	{
2001	/----------*
2002	* Estimate the number of distinct values using the estimator
2003	* proposed by Haas and Stokes in IBM Research Report RJ 10025:
2004	* nd / (n - f1 + f1n/N)
2005	* where f1 is the number of distinct values that occurred
2006	* exactly once in our sample of n rows (from a total of N),
2007	* and d is the total number of distinct values in the sample.
2008	* This is their Duj1 estimator; the other estimators they
2009	* recommend are considerably more complex, and are numerically
2010	* very unstable when n is much smaller than N.
2011	*
2012	* In this calculation, we consider only non-nulls. We used to
2013	* include rows with null values in the n and N counts, but that
2014	* leads to inaccurate answers in columns with many nulls, and
2015	* it's intuitively bogus anyway considering the desired result is
2016	* the number of distinct non-null values.
2017	*
2018	* We assume (not very reliably!) that all the multiply-occurring
2019	* values are reflected in the final track[] list, and the other
2020	* nonnull values all appeared but once. (XXX this usually
2021	* results in a drastic overestimate of ndistinct. Can we do
2022	* any better?)
2023	*----------
2024	*/
2025	int f1 = nonnull_cnt - summultiple;
2026	int d = f1 + nmultiple;
2027	double n = samplerows - null_cnt;
2028	double N = totalrows * (`1.0` - stats->stanullfrac);
2029	double stadistinct;
2030
2031	/ N == 0 shouldn't happen, but just in case ... /
2032	if (N > `0`)
2033	stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2034	else
2035	stadistinct = `0`;
2036
2037	/ Clamp to sane range in case of roundoff error /
2038	if (stadistinct < d)
2039	stadistinct = d;
2040	if (stadistinct > N)
2041	stadistinct = N;
2042	/ And round to integer /
2043	stats->stadistinct = floor(stadistinct + `0.5`);
2044	}
2045
2046	/*
2047	* If we estimated the number of distinct values at more than 10% of
2048	* the total row count (a very arbitrary limit), then assume that
2049	* stadistinct should scale with the row count rather than be a fixed
2050	* value.
2051	*/
2052	if (stats->stadistinct > `0.1` * totalrows)
2053	stats->stadistinct = -(stats->stadistinct / totalrows);
2054
2055	/*
2056	* Decide how many values are worth storing as most-common values. If
2057	* we are able to generate a complete MCV list (all the values in the
2058	* sample will fit, and we think these are all the ones in the table),
2059	* then do so. Otherwise, store only those values that are
2060	* significantly more common than the values not in the list.
2061	*
2062	* Note: the first of these cases is meant to address columns with
2063	* small, fixed sets of possible values, such as boolean or enum
2064	* columns. If we can completely represent the column population by
2065	* an MCV list that will fit into the stats target, then we should do
2066	* so and thus provide the planner with complete information. But if
2067	* the MCV list is not complete, it's generally worth being more
2068	* selective, and not just filling it all the way up to the stats
2069	* target.
2070	*/
2071	if (track_cnt < track_max && toowide_cnt == `0` &&
2072	stats->stadistinct > `0` &&
2073	track_cnt <= num_mcv)
2074	{
2075	/ Track list includes all values seen, and all will fit /
2076	num_mcv = track_cnt;
2077	}
2078	else
2079	{
2080	int *mcv_counts;
2081
2082	/ Incomplete list; decide how many values are worth keeping /
2083	if (num_mcv > track_cnt)
2084	num_mcv = track_cnt;
2085
2086	if (num_mcv > `0`)
2087	{
2088	mcv_counts = (int ) palloc(num_mcv sizeof(int));
2089	for (i = `0`; i < num_mcv; i++)
2090	mcv_counts[i] = track[i].count;
2091
2092	num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2093	stats->stadistinct,
2094	stats->stanullfrac,
2095	samplerows, totalrows);
2096	}
2097	}
2098
2099	/ Generate MCV slot entry /
2100	if (num_mcv > `0`)
2101	{
2102	MemoryContext old_context;
2103	Datum *mcv_values;
2104	float4 *mcv_freqs;
2105
2106	/ Must copy the target values into anl_context /
2107	old_context = MemoryContextSwitchTo(stats->anl_context);
2108	mcv_values = (Datum ) palloc(num_mcv sizeof(Datum));
2109	mcv_freqs = (float4 ) palloc(num_mcv sizeof(float4));
2110	for (i = `0`; i < num_mcv; i++)
2111	{
2112	mcv_values[i] = datumCopy(track[i].value,
2113	stats->attrtype->typbyval,
2114	stats->attrtype->typlen);
2115	mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2116	}
2117	MemoryContextSwitchTo(old_context);
2118
2119	stats->stakind[`0`] = STATISTIC_KIND_MCV;
2120	stats->staop[`0`] = mystats->eqopr;
2121	stats->stacoll[`0`] = stats->attrcollid;
2122	stats->stanumbers[`0`] = mcv_freqs;
2123	stats->numnumbers[`0`] = num_mcv;
2124	stats->stavalues[`0`] = mcv_values;
2125	stats->numvalues[`0`] = num_mcv;
2126
2127	/*
2128	* Accept the defaults for stats->statypid and others. They have
2129	* been set before we were called (see vacuum.h)
2130	*/
2131	}
2132	}
2133	else if (null_cnt > `0`)
2134	{
2135	/ We found only nulls; assume the column is entirely null /
2136	stats->stats_valid = true;
2137	stats->stanullfrac = `1.0`;
2138	if (is_varwidth)
2139	stats->stawidth = `0`; / "unknown" /
2140	else
2141	stats->stawidth = stats->attrtype->typlen;
2142	stats->stadistinct = `0.0`; / "unknown" /
2143	}
2144
2145	/ We don't need to bother cleaning up any of our temporary palloc's /
2146	}
2147
2148
2149	/*
2150	* compute_scalar_stats() -- compute column statistics
2151	*
2152	* We use this when we can find "=" and "<" operators for the datatype.
2153	*
2154	* We determine the fraction of non-null rows, the average width, the
2155	* most common values, the (estimated) number of distinct values, the
2156	* distribution histogram, and the correlation of physical to logical order.
2157	*
2158	* The desired stats can be determined fairly easily after sorting the
2159	* data values into order.
2160	*/
2161	static void
2162	compute_scalar_stats(VacAttrStatsP stats,
2163	AnalyzeAttrFetchFunc fetchfunc,
2164	int samplerows,
2165	double totalrows)
2166	{
2167	int i;
2168	int null_cnt = `0`;
2169	int nonnull_cnt = `0`;
2170	int toowide_cnt = `0`;
2171	double total_width = `0`;
2172	bool is_varlena = (!stats->attrtype->typbyval &&
2173	stats->attrtype->typlen == -`1`);
2174	bool is_varwidth = (!stats->attrtype->typbyval &&
2175	stats->attrtype->typlen < `0`);
2176	double corr_xysum;
2177	SortSupportData ssup;
2178	ScalarItem *values;
2179	int values_cnt = `0`;
2180	int *tupnoLink;
2181	ScalarMCVItem *track;
2182	int track_cnt = `0`;
2183	int num_mcv = stats->attr->attstattarget;
2184	int num_bins = stats->attr->attstattarget;
2185	StdAnalyzeData mystats = (StdAnalyzeData ) stats->extra_data;
2186
2187	values = (ScalarItem ) palloc(samplerows sizeof(ScalarItem));
2188	tupnoLink = (int ) palloc(samplerows sizeof(int));
2189	track = (ScalarMCVItem ) palloc(num_mcv sizeof(ScalarMCVItem));
2190
2191	memset(&ssup, `0`, sizeof(ssup));
2192	ssup.ssup_cxt = CurrentMemoryContext;
2193	ssup.ssup_collation = stats->attrcollid;
2194	ssup.ssup_nulls_first = false;
2195
2196	/*
2197	* For now, don't perform abbreviated key conversion, because full values
2198	* are required for MCV slot generation. Supporting that optimization
2199	* would necessitate teaching compare_scalars() to call a tie-breaker.
2200	*/
2201	ssup.abbreviate = false;
2202
2203	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
2204
2205	/ Initial scan to find sortable values /
2206	for (i = `0`; i < samplerows; i++)
2207	{
2208	Datum value;
2209	bool isnull;
2210
2211	vacuum_delay_point();
2212
2213	value = fetchfunc(stats, i, &isnull);
2214
2215	/ Check for null/nonnull /
2216	if (isnull)
2217	{
2218	null_cnt++;
2219	continue;
2220	}
2221	nonnull_cnt++;
2222
2223	/*
2224	* If it's a variable-width field, add up widths for average width
2225	* calculation. Note that if the value is toasted, we use the toasted
2226	* width. We don't bother with this calculation if it's a fixed-width
2227	* type.
2228	*/
2229	if (is_varlena)
2230	{
2231	total_width += VARSIZE_ANY(DatumGetPointer(value));
2232
2233	/*
2234	* If the value is toasted, we want to detoast it just once to
2235	* avoid repeated detoastings and resultant excess memory usage
2236	* during the comparisons. Also, check to see if the value is
2237	* excessively wide, and if so don't detoast at all --- just
2238	* ignore the value.
2239	*/
2240	if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2241	{
2242	toowide_cnt++;
2243	continue;
2244	}
2245	value = PointerGetDatum(PG_DETOAST_DATUM(value));
2246	}
2247	else if (is_varwidth)
2248	{
2249	/ must be cstring /
2250	total_width += strlen(DatumGetCString(value)) + `1`;
2251	}
2252
2253	/ Add it to the list to be sorted /
2254	values[values_cnt].value = value;
2255	values[values_cnt].tupno = values_cnt;
2256	tupnoLink[values_cnt] = values_cnt;
2257	values_cnt++;
2258	}
2259
2260	/ We can only compute real stats if we found some sortable values. /
2261	if (values_cnt > `0`)
2262	{
2263	int ndistinct, / # distinct values in sample /
2264	nmultiple, / # that appear multiple times /
2265	num_hist,
2266	dups_cnt;
2267	int slot_idx = `0`;
2268	CompareScalarsContext cxt;
2269
2270	/ Sort the collected values /
2271	cxt.ssup = &ssup;
2272	cxt.tupnoLink = tupnoLink;
2273	qsort_arg((void ) values, values_cnt, sizeof*(ScalarItem),
2274	compare_scalars, (void *) &cxt);
2275
2276	/*
2277	* Now scan the values in order, find the most common ones, and also
2278	* accumulate ordering-correlation statistics.
2279	*
2280	* To determine which are most common, we first have to count the
2281	* number of duplicates of each value. The duplicates are adjacent in
2282	* the sorted list, so a brute-force approach is to compare successive
2283	* datum values until we find two that are not equal. However, that
2284	* requires N-1 invocations of the datum comparison routine, which are
2285	* completely redundant with work that was done during the sort. (The
2286	* sort algorithm must at some point have compared each pair of items
2287	* that are adjacent in the sorted order; otherwise it could not know
2288	* that it's ordered the pair correctly.) We exploit this by having
2289	* compare_scalars remember the highest tupno index that each
2290	* ScalarItem has been found equal to. At the end of the sort, a
2291	* ScalarItem's tupnoLink will still point to itself if and only if it
2292	* is the last item of its group of duplicates (since the group will
2293	* be ordered by tupno).
2294	*/
2295	corr_xysum = `0`;
2296	ndistinct = `0`;
2297	nmultiple = `0`;
2298	dups_cnt = `0`;
2299	for (i = `0`; i < values_cnt; i++)
2300	{
2301	int tupno = values[i].tupno;
2302
2303	corr_xysum += ((double) i) * ((double) tupno);
2304	dups_cnt++;
2305	if (tupnoLink[tupno] == tupno)
2306	{
2307	/ Reached end of duplicates of this value /
2308	ndistinct++;
2309	if (dups_cnt > `1`)
2310	{
2311	nmultiple++;
2312	if (track_cnt < num_mcv \|\|
2313	dups_cnt > track[track_cnt - `1`].count)
2314	{
2315	/*
2316	* Found a new item for the mcv list; find its
2317	* position, bubbling down old items if needed. Loop
2318	* invariant is that j points at an empty/ replaceable
2319	* slot.
2320	*/
2321	int j;
2322
2323	if (track_cnt < num_mcv)
2324	track_cnt++;
2325	for (j = track_cnt - `1`; j > `0`; j--)
2326	{
2327	if (dups_cnt <= track[j - `1`].count)
2328	break;
2329	track[j].count = track[j - `1`].count;
2330	track[j].first = track[j - `1`].first;
2331	}
2332	track[j].count = dups_cnt;
2333	track[j].first = i + `1` - dups_cnt;
2334	}
2335	}
2336	dups_cnt = `0`;
2337	}
2338	}
2339
2340	stats->stats_valid = true;
2341	/ Do the simple null-frac and width stats /
2342	stats->stanullfrac = (double) null_cnt / (double) samplerows;
2343	if (is_varwidth)
2344	stats->stawidth = total_width / (double) nonnull_cnt;
2345	else
2346	stats->stawidth = stats->attrtype->typlen;
2347
2348	if (nmultiple == `0`)
2349	{
2350	/*
2351	* If we found no repeated non-null values, assume it's a unique
2352	* column; but be sure to discount for any nulls we found.
2353	*/
2354	stats->stadistinct = -`1.0` * (`1.0` - stats->stanullfrac);
2355	}
2356	else if (toowide_cnt == `0` && nmultiple == ndistinct)
2357	{
2358	/*
2359	* Every value in the sample appeared more than once. Assume the
2360	* column has just these values. (This case is meant to address
2361	* columns with small, fixed sets of possible values, such as
2362	* boolean or enum columns. If there are any values that appear
2363	* just once in the sample, including too-wide values, we should
2364	* assume that that's not what we're dealing with.)
2365	*/
2366	stats->stadistinct = ndistinct;
2367	}
2368	else
2369	{
2370	/----------*
2371	* Estimate the number of distinct values using the estimator
2372	* proposed by Haas and Stokes in IBM Research Report RJ 10025:
2373	* nd / (n - f1 + f1n/N)
2374	* where f1 is the number of distinct values that occurred
2375	* exactly once in our sample of n rows (from a total of N),
2376	* and d is the total number of distinct values in the sample.
2377	* This is their Duj1 estimator; the other estimators they
2378	* recommend are considerably more complex, and are numerically
2379	* very unstable when n is much smaller than N.
2380	*
2381	* In this calculation, we consider only non-nulls. We used to
2382	* include rows with null values in the n and N counts, but that
2383	* leads to inaccurate answers in columns with many nulls, and
2384	* it's intuitively bogus anyway considering the desired result is
2385	* the number of distinct non-null values.
2386	*
2387	* Overwidth values are assumed to have been distinct.
2388	*----------
2389	*/
2390	int f1 = ndistinct - nmultiple + toowide_cnt;
2391	int d = f1 + nmultiple;
2392	double n = samplerows - null_cnt;
2393	double N = totalrows * (`1.0` - stats->stanullfrac);
2394	double stadistinct;
2395
2396	/ N == 0 shouldn't happen, but just in case ... /
2397	if (N > `0`)
2398	stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2399	else
2400	stadistinct = `0`;
2401
2402	/ Clamp to sane range in case of roundoff error /
2403	if (stadistinct < d)
2404	stadistinct = d;
2405	if (stadistinct > N)
2406	stadistinct = N;
2407	/ And round to integer /
2408	stats->stadistinct = floor(stadistinct + `0.5`);
2409	}
2410
2411	/*
2412	* If we estimated the number of distinct values at more than 10% of
2413	* the total row count (a very arbitrary limit), then assume that
2414	* stadistinct should scale with the row count rather than be a fixed
2415	* value.
2416	*/
2417	if (stats->stadistinct > `0.1` * totalrows)
2418	stats->stadistinct = -(stats->stadistinct / totalrows);
2419
2420	/*
2421	* Decide how many values are worth storing as most-common values. If
2422	* we are able to generate a complete MCV list (all the values in the
2423	* sample will fit, and we think these are all the ones in the table),
2424	* then do so. Otherwise, store only those values that are
2425	* significantly more common than the values not in the list.
2426	*
2427	* Note: the first of these cases is meant to address columns with
2428	* small, fixed sets of possible values, such as boolean or enum
2429	* columns. If we can completely represent the column population by
2430	* an MCV list that will fit into the stats target, then we should do
2431	* so and thus provide the planner with complete information. But if
2432	* the MCV list is not complete, it's generally worth being more
2433	* selective, and not just filling it all the way up to the stats
2434	* target.
2435	*/
2436	if (track_cnt == ndistinct && toowide_cnt == `0` &&
2437	stats->stadistinct > `0` &&
2438	track_cnt <= num_mcv)
2439	{
2440	/ Track list includes all values seen, and all will fit /
2441	num_mcv = track_cnt;
2442	}
2443	else
2444	{
2445	int *mcv_counts;
2446
2447	/ Incomplete list; decide how many values are worth keeping /
2448	if (num_mcv > track_cnt)
2449	num_mcv = track_cnt;
2450
2451	if (num_mcv > `0`)
2452	{
2453	mcv_counts = (int ) palloc(num_mcv sizeof(int));
2454	for (i = `0`; i < num_mcv; i++)
2455	mcv_counts[i] = track[i].count;
2456
2457	num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2458	stats->stadistinct,
2459	stats->stanullfrac,
2460	samplerows, totalrows);
2461	}
2462	}
2463
2464	/ Generate MCV slot entry /
2465	if (num_mcv > `0`)
2466	{
2467	MemoryContext old_context;
2468	Datum *mcv_values;
2469	float4 *mcv_freqs;
2470
2471	/ Must copy the target values into anl_context /
2472	old_context = MemoryContextSwitchTo(stats->anl_context);
2473	mcv_values = (Datum ) palloc(num_mcv sizeof(Datum));
2474	mcv_freqs = (float4 ) palloc(num_mcv sizeof(float4));
2475	for (i = `0`; i < num_mcv; i++)
2476	{
2477	mcv_values[i] = datumCopy(values[track[i].first].value,
2478	stats->attrtype->typbyval,
2479	stats->attrtype->typlen);
2480	mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2481	}
2482	MemoryContextSwitchTo(old_context);
2483
2484	stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
2485	stats->staop[slot_idx] = mystats->eqopr;
2486	stats->stacoll[slot_idx] = stats->attrcollid;
2487	stats->stanumbers[slot_idx] = mcv_freqs;
2488	stats->numnumbers[slot_idx] = num_mcv;
2489	stats->stavalues[slot_idx] = mcv_values;
2490	stats->numvalues[slot_idx] = num_mcv;
2491
2492	/*
2493	* Accept the defaults for stats->statypid and others. They have
2494	* been set before we were called (see vacuum.h)
2495	*/
2496	slot_idx++;
2497	}
2498
2499	/*
2500	* Generate a histogram slot entry if there are at least two distinct
2501	* values not accounted for in the MCV list. (This ensures the
2502	* histogram won't collapse to empty or a singleton.)
2503	*/
2504	num_hist = ndistinct - num_mcv;
2505	if (num_hist > num_bins)
2506	num_hist = num_bins + `1`;
2507	if (num_hist >= `2`)
2508	{
2509	MemoryContext old_context;
2510	Datum *hist_values;
2511	int nvals;
2512	int pos,
2513	posfrac,
2514	delta,
2515	deltafrac;
2516
2517	/ Sort the MCV items into position order to speed next loop /
2518	qsort((void *) track, num_mcv,
2519	sizeof(ScalarMCVItem), compare_mcvs);
2520
2521	/*
2522	* Collapse out the MCV items from the values[] array.
2523	*
2524	* Note we destroy the values[] array here... but we don't need it
2525	* for anything more. We do, however, still need values_cnt.
2526	* nvals will be the number of remaining entries in values[].
2527	*/
2528	if (num_mcv > `0`)
2529	{
2530	int src,
2531	dest;
2532	int j;
2533
2534	src = dest = `0`;
2535	j = `0`; / index of next interesting MCV item /
2536	while (src < values_cnt)
2537	{
2538	int ncopy;
2539
2540	if (j < num_mcv)
2541	{
2542	int first = track[j].first;
2543
2544	if (src >= first)
2545	{
2546	/ advance past this MCV item /
2547	src = first + track[j].count;
2548	j++;
2549	continue;
2550	}
2551	ncopy = first - src;
2552	}
2553	else
2554	ncopy = values_cnt - src;
2555	memmove(&values[dest], &values[src],
2556	ncopy * sizeof(ScalarItem));
2557	src += ncopy;
2558	dest += ncopy;
2559	}
2560	nvals = dest;
2561	}
2562	else
2563	nvals = values_cnt;
2564	Assert(nvals >= num_hist);
2565
2566	/ Must copy the target values into anl_context /
2567	old_context = MemoryContextSwitchTo(stats->anl_context);
2568	hist_values = (Datum ) palloc(num_hist sizeof(Datum));
2569
2570	/*
2571	* The object of this loop is to copy the first and last values[]
2572	* entries along with evenly-spaced values in between. So the
2573	* i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
2574	* computing that subscript directly risks integer overflow when
2575	* the stats target is more than a couple thousand. Instead we
2576	* add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2577	* the integral and fractional parts of the sum separately.
2578	*/
2579	delta = (nvals - `1`) / (num_hist - `1`);
2580	deltafrac = (nvals - `1`) % (num_hist - `1`);
2581	pos = posfrac = `0`;
2582
2583	for (i = `0`; i < num_hist; i++)
2584	{
2585	hist_values[i] = datumCopy(values[pos].value,
2586	stats->attrtype->typbyval,
2587	stats->attrtype->typlen);
2588	pos += delta;
2589	posfrac += deltafrac;
2590	if (posfrac >= (num_hist - `1`))
2591	{
2592	/ fractional part exceeds 1, carry to integer part /
2593	pos++;
2594	posfrac -= (num_hist - `1`);
2595	}
2596	}
2597
2598	MemoryContextSwitchTo(old_context);
2599
2600	stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
2601	stats->staop[slot_idx] = mystats->ltopr;
2602	stats->stacoll[slot_idx] = stats->attrcollid;
2603	stats->stavalues[slot_idx] = hist_values;
2604	stats->numvalues[slot_idx] = num_hist;
2605
2606	/*
2607	* Accept the defaults for stats->statypid and others. They have
2608	* been set before we were called (see vacuum.h)
2609	*/
2610	slot_idx++;
2611	}
2612
2613	/ Generate a correlation entry if there are multiple values /
2614	if (values_cnt > `1`)
2615	{
2616	MemoryContext old_context;
2617	float4 *corrs;
2618	double corr_xsum,
2619	corr_x2sum;
2620
2621	/ Must copy the target values into anl_context /
2622	old_context = MemoryContextSwitchTo(stats->anl_context);
2623	corrs = (float4 ) palloc(sizeof*(float4));
2624	MemoryContextSwitchTo(old_context);
2625
2626	/----------*
2627	* Since we know the x and y value sets are both
2628	* 0, 1, ..., values_cnt-1
2629	* we have sum(x) = sum(y) =
2630	* (values_cnt-1)*values_cnt / 2
2631	* and sum(x^2) = sum(y^2) =
2632	* (values_cnt-1)values_cnt(2*values_cnt-1) / 6.
2633	*----------
2634	*/
2635	corr_xsum = ((double) (values_cnt - `1`)) *
2636	((double) values_cnt) / `2.0`;
2637	corr_x2sum = ((double) (values_cnt - `1`)) *
2638	((double) values_cnt) * (double) (`2` * values_cnt - `1`) / `6.0`;
2639
2640	/ And the correlation coefficient reduces to /
2641	corrs[`0`] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
2642	(values_cnt * corr_x2sum - corr_xsum * corr_xsum);
2643
2644	stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
2645	stats->staop[slot_idx] = mystats->ltopr;
2646	stats->stacoll[slot_idx] = stats->attrcollid;
2647	stats->stanumbers[slot_idx] = corrs;
2648	stats->numnumbers[slot_idx] = `1`;
2649	slot_idx++;
2650	}
2651	}
2652	else if (nonnull_cnt > `0`)
2653	{
2654	/ We found some non-null values, but they were all too wide /
2655	Assert(nonnull_cnt == toowide_cnt);
2656	stats->stats_valid = true;
2657	/ Do the simple null-frac and width stats /
2658	stats->stanullfrac = (double) null_cnt / (double) samplerows;
2659	if (is_varwidth)
2660	stats->stawidth = total_width / (double) nonnull_cnt;
2661	else
2662	stats->stawidth = stats->attrtype->typlen;
2663	/ Assume all too-wide values are distinct, so it's a unique column /
2664	stats->stadistinct = -`1.0` * (`1.0` - stats->stanullfrac);
2665	}
2666	else if (null_cnt > `0`)
2667	{
2668	/ We found only nulls; assume the column is entirely null /
2669	stats->stats_valid = true;
2670	stats->stanullfrac = `1.0`;
2671	if (is_varwidth)
2672	stats->stawidth = `0`; / "unknown" /
2673	else
2674	stats->stawidth = stats->attrtype->typlen;
2675	stats->stadistinct = `0.0`; / "unknown" /
2676	}
2677
2678	/ We don't need to bother cleaning up any of our temporary palloc's /
2679	}
2680
2681	/*
2682	* qsort_arg comparator for sorting ScalarItems
2683	*
2684	* Aside from sorting the items, we update the tupnoLink[] array
2685	* whenever two ScalarItems are found to contain equal datums. The array
2686	* is indexed by tupno; for each ScalarItem, it contains the highest
2687	* tupno that that item's datum has been found to be equal to. This allows
2688	* us to avoid additional comparisons in compute_scalar_stats().
2689	*/
2690	static int
2691	compare_scalars(const void a, const* void b, void* *arg)
2692	{
2693	Datum da = ((const ScalarItem *) a)->value;
2694	int ta = ((const ScalarItem *) a)->tupno;
2695	Datum db = ((const ScalarItem *) b)->value;
2696	int tb = ((const ScalarItem *) b)->tupno;
2697	CompareScalarsContext cxt = (CompareScalarsContext ) arg;
2698	int compare;
2699
2700	compare = ApplySortComparator(da, false, db, false, cxt->ssup);
2701	if (compare != `0`)
2702	return compare;
2703
2704	/*
2705	* The two datums are equal, so update cxt->tupnoLink[].
2706	*/
2707	if (cxt->tupnoLink[ta] < tb)
2708	cxt->tupnoLink[ta] = tb;
2709	if (cxt->tupnoLink[tb] < ta)
2710	cxt->tupnoLink[tb] = ta;
2711
2712	/*
2713	* For equal datums, sort by tupno
2714	*/
2715	return ta - tb;
2716	}
2717
2718	/*
2719	* qsort comparator for sorting ScalarMCVItems by position
2720	*/
2721	static int
2722	compare_mcvs(const void a, const* void *b)
2723	{
2724	int da = ((const ScalarMCVItem *) a)->first;
2725	int db = ((const ScalarMCVItem *) b)->first;
2726
2727	return da - db;
2728	}
2729
2730	/*
2731	* Analyze the list of common values in the sample and decide how many are
2732	* worth storing in the table's MCV list.
2733	*
2734	* mcv_counts is assumed to be a list of the counts of the most common values
2735	* seen in the sample, starting with the most common. The return value is the
2736	* number that are significantly more common than the values not in the list,
2737	* and which are therefore deemed worth storing in the table's MCV list.
2738	*/
2739	static int
2740	analyze_mcv_list(int *mcv_counts,
2741	int num_mcv,
2742	double stadistinct,
2743	double stanullfrac,
2744	int samplerows,
2745	double totalrows)
2746	{
2747	double ndistinct_table;
2748	double sumcount;
2749	int i;
2750
2751	/*
2752	* If the entire table was sampled, keep the whole list. This also
2753	* protects us against division by zero in the code below.
2754	*/
2755	if (samplerows == totalrows \|\| totalrows <= `1.0`)
2756	return num_mcv;
2757
2758	/ Re-extract the estimated number of distinct nonnull values in table /
2759	ndistinct_table = stadistinct;
2760	if (ndistinct_table < `0`)
2761	ndistinct_table = -ndistinct_table * totalrows;
2762
2763	/*
2764	* Exclude the least common values from the MCV list, if they are not
2765	* significantly more common than the estimated selectivity they would
2766	* have if they weren't in the list. All non-MCV values are assumed to be
2767	* equally common, after taking into account the frequencies of all the
2768	* values in the MCV list and the number of nulls (c.f. eqsel()).
2769	*
2770	* Here sumcount tracks the total count of all but the last (least common)
2771	* value in the MCV list, allowing us to determine the effect of excluding
2772	* that value from the list.
2773	*
2774	* Note that we deliberately do this by removing values from the full
2775	* list, rather than starting with an empty list and adding values,
2776	* because the latter approach can fail to add any values if all the most
2777	* common values have around the same frequency and make up the majority
2778	* of the table, so that the overall average frequency of all values is
2779	* roughly the same as that of the common values. This would lead to any
2780	* uncommon values being significantly overestimated.
2781	*/
2782	sumcount = `0.0`;
2783	for (i = `0`; i < num_mcv - `1`; i++)
2784	sumcount += mcv_counts[i];
2785
2786	while (num_mcv > `0`)
2787	{
2788	double selec,
2789	otherdistinct,
2790	N,
2791	n,
2792	K,
2793	variance,
2794	stddev;
2795
2796	/*
2797	* Estimated selectivity the least common value would have if it
2798	* wasn't in the MCV list (c.f. eqsel()).
2799	*/
2800	selec = `1.0` - sumcount / samplerows - stanullfrac;
2801	if (selec < `0.0`)
2802	selec = `0.0`;
2803	if (selec > `1.0`)
2804	selec = `1.0`;
2805	otherdistinct = ndistinct_table - (num_mcv - `1`);
2806	if (otherdistinct > `1`)
2807	selec /= otherdistinct;
2808
2809	/*
2810	* If the value is kept in the MCV list, its population frequency is
2811	* assumed to equal its sample frequency. We use the lower end of a
2812	* textbook continuity-corrected Wald-type confidence interval to
2813	* determine if that is significantly more common than the non-MCV
2814	* frequency --- specifically we assume the population frequency is
2815	* highly likely to be within around 2 standard errors of the sample
2816	* frequency, which equates to an interval of 2 standard deviations
2817	* either side of the sample count, plus an additional 0.5 for the
2818	* continuity correction. Since we are sampling without replacement,
2819	* this is a hypergeometric distribution.
2820	*
2821	* XXX: Empirically, this approach seems to work quite well, but it
2822	* may be worth considering more advanced techniques for estimating
2823	* the confidence interval of the hypergeometric distribution.
2824	*/
2825	N = totalrows;
2826	n = samplerows;
2827	K = N * mcv_counts[num_mcv - `1`] / n;
2828	variance = n * K * (N - K) * (N - n) / (N * N * (N - `1`));
2829	stddev = sqrt(variance);
2830
2831	if (mcv_counts[num_mcv - `1`] > selec * samplerows + `2` * stddev + `0.5`)
2832	{
2833	/*
2834	* The value is significantly more common than the non-MCV
2835	* selectivity would suggest. Keep it, and all the other more
2836	* common values in the list.
2837	*/
2838	break;
2839	}
2840	else
2841	{
2842	/ Discard this value and consider the next least common value /
2843	num_mcv--;
2844	if (num_mcv == `0`)
2845	break;
2846	sumcount -= mcv_counts[num_mcv - `1`];
2847	}
2848	}
2849	return num_mcv;
2850	}
2851

Browse the source code of PostgreSQL/src/backend/commands/analyze.c