1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * analyze.c |
4 | * the Postgres statistics generator |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/commands/analyze.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include <math.h> |
18 | |
19 | #include "access/genam.h" |
20 | #include "access/multixact.h" |
21 | #include "access/relation.h" |
22 | #include "access/sysattr.h" |
23 | #include "access/table.h" |
24 | #include "access/tableam.h" |
25 | #include "access/transam.h" |
26 | #include "access/tupconvert.h" |
27 | #include "access/tuptoaster.h" |
28 | #include "access/visibilitymap.h" |
29 | #include "access/xact.h" |
30 | #include "catalog/catalog.h" |
31 | #include "catalog/index.h" |
32 | #include "catalog/indexing.h" |
33 | #include "catalog/pg_collation.h" |
34 | #include "catalog/pg_inherits.h" |
35 | #include "catalog/pg_namespace.h" |
36 | #include "catalog/pg_statistic_ext.h" |
37 | #include "commands/dbcommands.h" |
38 | #include "commands/tablecmds.h" |
39 | #include "commands/vacuum.h" |
40 | #include "executor/executor.h" |
41 | #include "foreign/fdwapi.h" |
42 | #include "miscadmin.h" |
43 | #include "nodes/nodeFuncs.h" |
44 | #include "parser/parse_oper.h" |
45 | #include "parser/parse_relation.h" |
46 | #include "pgstat.h" |
47 | #include "postmaster/autovacuum.h" |
48 | #include "statistics/extended_stats_internal.h" |
49 | #include "statistics/statistics.h" |
50 | #include "storage/bufmgr.h" |
51 | #include "storage/lmgr.h" |
52 | #include "storage/proc.h" |
53 | #include "storage/procarray.h" |
54 | #include "utils/acl.h" |
55 | #include "utils/attoptcache.h" |
56 | #include "utils/builtins.h" |
57 | #include "utils/datum.h" |
58 | #include "utils/fmgroids.h" |
59 | #include "utils/guc.h" |
60 | #include "utils/lsyscache.h" |
61 | #include "utils/memutils.h" |
62 | #include "utils/pg_rusage.h" |
63 | #include "utils/sampling.h" |
64 | #include "utils/sortsupport.h" |
65 | #include "utils/syscache.h" |
66 | #include "utils/timestamp.h" |
67 | |
68 | |
69 | /* Per-index data for ANALYZE */ |
70 | typedef struct AnlIndexData |
71 | { |
72 | IndexInfo *indexInfo; /* BuildIndexInfo result */ |
73 | double tupleFract; /* fraction of rows for partial index */ |
74 | VacAttrStats **vacattrstats; /* index attrs to analyze */ |
75 | int attr_cnt; |
76 | } AnlIndexData; |
77 | |
78 | |
79 | /* Default statistics target (GUC parameter) */ |
80 | int default_statistics_target = 100; |
81 | |
82 | /* A few variables that don't seem worth passing around as parameters */ |
83 | static MemoryContext anl_context = NULL; |
84 | static BufferAccessStrategy vac_strategy; |
85 | |
86 | |
87 | static void do_analyze_rel(Relation onerel, |
88 | VacuumParams *params, List *va_cols, |
89 | AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, |
90 | bool inh, bool in_outer_xact, int elevel); |
91 | static void compute_index_stats(Relation onerel, double totalrows, |
92 | AnlIndexData *indexdata, int nindexes, |
93 | HeapTuple *rows, int numrows, |
94 | MemoryContext col_context); |
95 | static VacAttrStats *examine_attribute(Relation onerel, int attnum, |
96 | Node *index_expr); |
97 | static int acquire_sample_rows(Relation onerel, int elevel, |
98 | HeapTuple *rows, int targrows, |
99 | double *totalrows, double *totaldeadrows); |
100 | static int compare_rows(const void *a, const void *b); |
101 | static int acquire_inherited_sample_rows(Relation onerel, int elevel, |
102 | HeapTuple *rows, int targrows, |
103 | double *totalrows, double *totaldeadrows); |
104 | static void update_attstats(Oid relid, bool inh, |
105 | int natts, VacAttrStats **vacattrstats); |
106 | static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); |
107 | static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); |
108 | |
109 | |
110 | /* |
111 | * analyze_rel() -- analyze one relation |
112 | * |
113 | * relid identifies the relation to analyze. If relation is supplied, use |
114 | * the name therein for reporting any failure to open/lock the rel; do not |
115 | * use it once we've successfully opened the rel, since it might be stale. |
116 | */ |
117 | void |
118 | analyze_rel(Oid relid, RangeVar *relation, |
119 | VacuumParams *params, List *va_cols, bool in_outer_xact, |
120 | BufferAccessStrategy bstrategy) |
121 | { |
122 | Relation onerel; |
123 | int elevel; |
124 | AcquireSampleRowsFunc acquirefunc = NULL; |
125 | BlockNumber relpages = 0; |
126 | |
127 | /* Select logging level */ |
128 | if (params->options & VACOPT_VERBOSE) |
129 | elevel = INFO; |
130 | else |
131 | elevel = DEBUG2; |
132 | |
133 | /* Set up static variables */ |
134 | vac_strategy = bstrategy; |
135 | |
136 | /* |
137 | * Check for user-requested abort. |
138 | */ |
139 | CHECK_FOR_INTERRUPTS(); |
140 | |
141 | /* |
142 | * Open the relation, getting ShareUpdateExclusiveLock to ensure that two |
143 | * ANALYZEs don't run on it concurrently. (This also locks out a |
144 | * concurrent VACUUM, which doesn't matter much at the moment but might |
145 | * matter if we ever try to accumulate stats on dead tuples.) If the rel |
146 | * has been dropped since we last saw it, we don't need to process it. |
147 | * |
148 | * Make sure to generate only logs for ANALYZE in this case. |
149 | */ |
150 | onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM), |
151 | params->log_min_duration >= 0, |
152 | ShareUpdateExclusiveLock); |
153 | |
154 | /* leave if relation could not be opened or locked */ |
155 | if (!onerel) |
156 | return; |
157 | |
158 | /* |
159 | * Check if relation needs to be skipped based on ownership. This check |
160 | * happens also when building the relation list to analyze for a manual |
161 | * operation, and needs to be done additionally here as ANALYZE could |
162 | * happen across multiple transactions where relation ownership could have |
163 | * changed in-between. Make sure to generate only logs for ANALYZE in |
164 | * this case. |
165 | */ |
166 | if (!vacuum_is_relation_owner(RelationGetRelid(onerel), |
167 | onerel->rd_rel, |
168 | params->options & VACOPT_ANALYZE)) |
169 | { |
170 | relation_close(onerel, ShareUpdateExclusiveLock); |
171 | return; |
172 | } |
173 | |
174 | /* |
175 | * Silently ignore tables that are temp tables of other backends --- |
176 | * trying to analyze these is rather pointless, since their contents are |
177 | * probably not up-to-date on disk. (We don't throw a warning here; it |
178 | * would just lead to chatter during a database-wide ANALYZE.) |
179 | */ |
180 | if (RELATION_IS_OTHER_TEMP(onerel)) |
181 | { |
182 | relation_close(onerel, ShareUpdateExclusiveLock); |
183 | return; |
184 | } |
185 | |
186 | /* |
187 | * We can ANALYZE any table except pg_statistic. See update_attstats |
188 | */ |
189 | if (RelationGetRelid(onerel) == StatisticRelationId) |
190 | { |
191 | relation_close(onerel, ShareUpdateExclusiveLock); |
192 | return; |
193 | } |
194 | |
195 | /* |
196 | * Check that it's of an analyzable relkind, and set up appropriately. |
197 | */ |
198 | if (onerel->rd_rel->relkind == RELKIND_RELATION || |
199 | onerel->rd_rel->relkind == RELKIND_MATVIEW) |
200 | { |
201 | /* Regular table, so we'll use the regular row acquisition function */ |
202 | acquirefunc = acquire_sample_rows; |
203 | /* Also get regular table's size */ |
204 | relpages = RelationGetNumberOfBlocks(onerel); |
205 | } |
206 | else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
207 | { |
208 | /* |
209 | * For a foreign table, call the FDW's hook function to see whether it |
210 | * supports analysis. |
211 | */ |
212 | FdwRoutine *fdwroutine; |
213 | bool ok = false; |
214 | |
215 | fdwroutine = GetFdwRoutineForRelation(onerel, false); |
216 | |
217 | if (fdwroutine->AnalyzeForeignTable != NULL) |
218 | ok = fdwroutine->AnalyzeForeignTable(onerel, |
219 | &acquirefunc, |
220 | &relpages); |
221 | |
222 | if (!ok) |
223 | { |
224 | ereport(WARNING, |
225 | (errmsg("skipping \"%s\" --- cannot analyze this foreign table" , |
226 | RelationGetRelationName(onerel)))); |
227 | relation_close(onerel, ShareUpdateExclusiveLock); |
228 | return; |
229 | } |
230 | } |
231 | else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
232 | { |
233 | /* |
234 | * For partitioned tables, we want to do the recursive ANALYZE below. |
235 | */ |
236 | } |
237 | else |
238 | { |
239 | /* No need for a WARNING if we already complained during VACUUM */ |
240 | if (!(params->options & VACOPT_VACUUM)) |
241 | ereport(WARNING, |
242 | (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables" , |
243 | RelationGetRelationName(onerel)))); |
244 | relation_close(onerel, ShareUpdateExclusiveLock); |
245 | return; |
246 | } |
247 | |
248 | /* |
249 | * OK, let's do it. First let other backends know I'm in ANALYZE. |
250 | */ |
251 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
252 | MyPgXact->vacuumFlags |= PROC_IN_ANALYZE; |
253 | LWLockRelease(ProcArrayLock); |
254 | |
255 | /* |
256 | * Do the normal non-recursive ANALYZE. We can skip this for partitioned |
257 | * tables, which don't contain any rows. |
258 | */ |
259 | if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) |
260 | do_analyze_rel(onerel, params, va_cols, acquirefunc, |
261 | relpages, false, in_outer_xact, elevel); |
262 | |
263 | /* |
264 | * If there are child tables, do recursive ANALYZE. |
265 | */ |
266 | if (onerel->rd_rel->relhassubclass) |
267 | do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages, |
268 | true, in_outer_xact, elevel); |
269 | |
270 | /* |
271 | * Close source relation now, but keep lock so that no one deletes it |
272 | * before we commit. (If someone did, they'd fail to clean up the entries |
273 | * we made in pg_statistic. Also, releasing the lock before commit would |
274 | * expose us to concurrent-update failures in update_attstats.) |
275 | */ |
276 | relation_close(onerel, NoLock); |
277 | |
278 | /* |
279 | * Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel, |
280 | * because the vacuum flag is cleared by the end-of-xact code. |
281 | */ |
282 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
283 | MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE; |
284 | LWLockRelease(ProcArrayLock); |
285 | } |
286 | |
287 | /* |
288 | * do_analyze_rel() -- analyze one relation, recursively or not |
289 | * |
290 | * Note that "acquirefunc" is only relevant for the non-inherited case. |
291 | * For the inherited case, acquire_inherited_sample_rows() determines the |
292 | * appropriate acquirefunc for each child table. |
293 | */ |
294 | static void |
295 | do_analyze_rel(Relation onerel, VacuumParams *params, |
296 | List *va_cols, AcquireSampleRowsFunc acquirefunc, |
297 | BlockNumber relpages, bool inh, bool in_outer_xact, |
298 | int elevel) |
299 | { |
300 | int attr_cnt, |
301 | tcnt, |
302 | i, |
303 | ind; |
304 | Relation *Irel; |
305 | int nindexes; |
306 | bool hasindex; |
307 | VacAttrStats **vacattrstats; |
308 | AnlIndexData *indexdata; |
309 | int targrows, |
310 | numrows; |
311 | double totalrows, |
312 | totaldeadrows; |
313 | HeapTuple *rows; |
314 | PGRUsage ru0; |
315 | TimestampTz starttime = 0; |
316 | MemoryContext caller_context; |
317 | Oid save_userid; |
318 | int save_sec_context; |
319 | int save_nestlevel; |
320 | |
321 | if (inh) |
322 | ereport(elevel, |
323 | (errmsg("analyzing \"%s.%s\" inheritance tree" , |
324 | get_namespace_name(RelationGetNamespace(onerel)), |
325 | RelationGetRelationName(onerel)))); |
326 | else |
327 | ereport(elevel, |
328 | (errmsg("analyzing \"%s.%s\"" , |
329 | get_namespace_name(RelationGetNamespace(onerel)), |
330 | RelationGetRelationName(onerel)))); |
331 | |
332 | /* |
333 | * Set up a working context so that we can easily free whatever junk gets |
334 | * created. |
335 | */ |
336 | anl_context = AllocSetContextCreate(CurrentMemoryContext, |
337 | "Analyze" , |
338 | ALLOCSET_DEFAULT_SIZES); |
339 | caller_context = MemoryContextSwitchTo(anl_context); |
340 | |
341 | /* |
342 | * Switch to the table owner's userid, so that any index functions are run |
343 | * as that user. Also lock down security-restricted operations and |
344 | * arrange to make GUC variable changes local to this command. |
345 | */ |
346 | GetUserIdAndSecContext(&save_userid, &save_sec_context); |
347 | SetUserIdAndSecContext(onerel->rd_rel->relowner, |
348 | save_sec_context | SECURITY_RESTRICTED_OPERATION); |
349 | save_nestlevel = NewGUCNestLevel(); |
350 | |
351 | /* measure elapsed time iff autovacuum logging requires it */ |
352 | if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) |
353 | { |
354 | pg_rusage_init(&ru0); |
355 | if (params->log_min_duration > 0) |
356 | starttime = GetCurrentTimestamp(); |
357 | } |
358 | |
359 | /* |
360 | * Determine which columns to analyze |
361 | * |
362 | * Note that system attributes are never analyzed, so we just reject them |
363 | * at the lookup stage. We also reject duplicate column mentions. (We |
364 | * could alternatively ignore duplicates, but analyzing a column twice |
365 | * won't work; we'd end up making a conflicting update in pg_statistic.) |
366 | */ |
367 | if (va_cols != NIL) |
368 | { |
369 | Bitmapset *unique_cols = NULL; |
370 | ListCell *le; |
371 | |
372 | vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) * |
373 | sizeof(VacAttrStats *)); |
374 | tcnt = 0; |
375 | foreach(le, va_cols) |
376 | { |
377 | char *col = strVal(lfirst(le)); |
378 | |
379 | i = attnameAttNum(onerel, col, false); |
380 | if (i == InvalidAttrNumber) |
381 | ereport(ERROR, |
382 | (errcode(ERRCODE_UNDEFINED_COLUMN), |
383 | errmsg("column \"%s\" of relation \"%s\" does not exist" , |
384 | col, RelationGetRelationName(onerel)))); |
385 | if (bms_is_member(i, unique_cols)) |
386 | ereport(ERROR, |
387 | (errcode(ERRCODE_DUPLICATE_COLUMN), |
388 | errmsg("column \"%s\" of relation \"%s\" appears more than once" , |
389 | col, RelationGetRelationName(onerel)))); |
390 | unique_cols = bms_add_member(unique_cols, i); |
391 | |
392 | vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); |
393 | if (vacattrstats[tcnt] != NULL) |
394 | tcnt++; |
395 | } |
396 | attr_cnt = tcnt; |
397 | } |
398 | else |
399 | { |
400 | attr_cnt = onerel->rd_att->natts; |
401 | vacattrstats = (VacAttrStats **) |
402 | palloc(attr_cnt * sizeof(VacAttrStats *)); |
403 | tcnt = 0; |
404 | for (i = 1; i <= attr_cnt; i++) |
405 | { |
406 | vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); |
407 | if (vacattrstats[tcnt] != NULL) |
408 | tcnt++; |
409 | } |
410 | attr_cnt = tcnt; |
411 | } |
412 | |
413 | /* |
414 | * Open all indexes of the relation, and see if there are any analyzable |
415 | * columns in the indexes. We do not analyze index columns if there was |
416 | * an explicit column list in the ANALYZE command, however. If we are |
417 | * doing a recursive scan, we don't want to touch the parent's indexes at |
418 | * all. |
419 | */ |
420 | if (!inh) |
421 | vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); |
422 | else |
423 | { |
424 | Irel = NULL; |
425 | nindexes = 0; |
426 | } |
427 | hasindex = (nindexes > 0); |
428 | indexdata = NULL; |
429 | if (hasindex) |
430 | { |
431 | indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData)); |
432 | for (ind = 0; ind < nindexes; ind++) |
433 | { |
434 | AnlIndexData *thisdata = &indexdata[ind]; |
435 | IndexInfo *indexInfo; |
436 | |
437 | thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); |
438 | thisdata->tupleFract = 1.0; /* fix later if partial */ |
439 | if (indexInfo->ii_Expressions != NIL && va_cols == NIL) |
440 | { |
441 | ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); |
442 | |
443 | thisdata->vacattrstats = (VacAttrStats **) |
444 | palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); |
445 | tcnt = 0; |
446 | for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
447 | { |
448 | int keycol = indexInfo->ii_IndexAttrNumbers[i]; |
449 | |
450 | if (keycol == 0) |
451 | { |
452 | /* Found an index expression */ |
453 | Node *indexkey; |
454 | |
455 | if (indexpr_item == NULL) /* shouldn't happen */ |
456 | elog(ERROR, "too few entries in indexprs list" ); |
457 | indexkey = (Node *) lfirst(indexpr_item); |
458 | indexpr_item = lnext(indexpr_item); |
459 | thisdata->vacattrstats[tcnt] = |
460 | examine_attribute(Irel[ind], i + 1, indexkey); |
461 | if (thisdata->vacattrstats[tcnt] != NULL) |
462 | tcnt++; |
463 | } |
464 | } |
465 | thisdata->attr_cnt = tcnt; |
466 | } |
467 | } |
468 | } |
469 | |
470 | /* |
471 | * Determine how many rows we need to sample, using the worst case from |
472 | * all analyzable columns. We use a lower bound of 100 rows to avoid |
473 | * possible overflow in Vitter's algorithm. (Note: that will also be the |
474 | * target in the corner case where there are no analyzable columns.) |
475 | */ |
476 | targrows = 100; |
477 | for (i = 0; i < attr_cnt; i++) |
478 | { |
479 | if (targrows < vacattrstats[i]->minrows) |
480 | targrows = vacattrstats[i]->minrows; |
481 | } |
482 | for (ind = 0; ind < nindexes; ind++) |
483 | { |
484 | AnlIndexData *thisdata = &indexdata[ind]; |
485 | |
486 | for (i = 0; i < thisdata->attr_cnt; i++) |
487 | { |
488 | if (targrows < thisdata->vacattrstats[i]->minrows) |
489 | targrows = thisdata->vacattrstats[i]->minrows; |
490 | } |
491 | } |
492 | |
493 | /* |
494 | * Acquire the sample rows |
495 | */ |
496 | rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); |
497 | if (inh) |
498 | numrows = acquire_inherited_sample_rows(onerel, elevel, |
499 | rows, targrows, |
500 | &totalrows, &totaldeadrows); |
501 | else |
502 | numrows = (*acquirefunc) (onerel, elevel, |
503 | rows, targrows, |
504 | &totalrows, &totaldeadrows); |
505 | |
506 | /* |
507 | * Compute the statistics. Temporary results during the calculations for |
508 | * each column are stored in a child context. The calc routines are |
509 | * responsible to make sure that whatever they store into the VacAttrStats |
510 | * structure is allocated in anl_context. |
511 | */ |
512 | if (numrows > 0) |
513 | { |
514 | MemoryContext col_context, |
515 | old_context; |
516 | |
517 | col_context = AllocSetContextCreate(anl_context, |
518 | "Analyze Column" , |
519 | ALLOCSET_DEFAULT_SIZES); |
520 | old_context = MemoryContextSwitchTo(col_context); |
521 | |
522 | for (i = 0; i < attr_cnt; i++) |
523 | { |
524 | VacAttrStats *stats = vacattrstats[i]; |
525 | AttributeOpts *aopt; |
526 | |
527 | stats->rows = rows; |
528 | stats->tupDesc = onerel->rd_att; |
529 | stats->compute_stats(stats, |
530 | std_fetch_func, |
531 | numrows, |
532 | totalrows); |
533 | |
534 | /* |
535 | * If the appropriate flavor of the n_distinct option is |
536 | * specified, override with the corresponding value. |
537 | */ |
538 | aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum); |
539 | if (aopt != NULL) |
540 | { |
541 | float8 n_distinct; |
542 | |
543 | n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct; |
544 | if (n_distinct != 0.0) |
545 | stats->stadistinct = n_distinct; |
546 | } |
547 | |
548 | MemoryContextResetAndDeleteChildren(col_context); |
549 | } |
550 | |
551 | if (hasindex) |
552 | compute_index_stats(onerel, totalrows, |
553 | indexdata, nindexes, |
554 | rows, numrows, |
555 | col_context); |
556 | |
557 | MemoryContextSwitchTo(old_context); |
558 | MemoryContextDelete(col_context); |
559 | |
560 | /* |
561 | * Emit the completed stats rows into pg_statistic, replacing any |
562 | * previous statistics for the target columns. (If there are stats in |
563 | * pg_statistic for columns we didn't process, we leave them alone.) |
564 | */ |
565 | update_attstats(RelationGetRelid(onerel), inh, |
566 | attr_cnt, vacattrstats); |
567 | |
568 | for (ind = 0; ind < nindexes; ind++) |
569 | { |
570 | AnlIndexData *thisdata = &indexdata[ind]; |
571 | |
572 | update_attstats(RelationGetRelid(Irel[ind]), false, |
573 | thisdata->attr_cnt, thisdata->vacattrstats); |
574 | } |
575 | |
576 | /* |
577 | * Build extended statistics (if there are any). |
578 | * |
579 | * For now we only build extended statistics on individual relations, |
580 | * not for relations representing inheritance trees. |
581 | */ |
582 | if (!inh) |
583 | BuildRelationExtStatistics(onerel, totalrows, numrows, rows, |
584 | attr_cnt, vacattrstats); |
585 | } |
586 | |
587 | /* |
588 | * Update pages/tuples stats in pg_class ... but not if we're doing |
589 | * inherited stats. |
590 | */ |
591 | if (!inh) |
592 | { |
593 | BlockNumber relallvisible; |
594 | |
595 | visibilitymap_count(onerel, &relallvisible, NULL); |
596 | |
597 | vac_update_relstats(onerel, |
598 | relpages, |
599 | totalrows, |
600 | relallvisible, |
601 | hasindex, |
602 | InvalidTransactionId, |
603 | InvalidMultiXactId, |
604 | in_outer_xact); |
605 | } |
606 | |
607 | /* |
608 | * Same for indexes. Vacuum always scans all indexes, so if we're part of |
609 | * VACUUM ANALYZE, don't overwrite the accurate count already inserted by |
610 | * VACUUM. |
611 | */ |
612 | if (!inh && !(params->options & VACOPT_VACUUM)) |
613 | { |
614 | for (ind = 0; ind < nindexes; ind++) |
615 | { |
616 | AnlIndexData *thisdata = &indexdata[ind]; |
617 | double totalindexrows; |
618 | |
619 | totalindexrows = ceil(thisdata->tupleFract * totalrows); |
620 | vac_update_relstats(Irel[ind], |
621 | RelationGetNumberOfBlocks(Irel[ind]), |
622 | totalindexrows, |
623 | 0, |
624 | false, |
625 | InvalidTransactionId, |
626 | InvalidMultiXactId, |
627 | in_outer_xact); |
628 | } |
629 | } |
630 | |
631 | /* |
632 | * Report ANALYZE to the stats collector, too. However, if doing |
633 | * inherited stats we shouldn't report, because the stats collector only |
634 | * tracks per-table stats. Reset the changes_since_analyze counter only |
635 | * if we analyzed all columns; otherwise, there is still work for |
636 | * auto-analyze to do. |
637 | */ |
638 | if (!inh) |
639 | pgstat_report_analyze(onerel, totalrows, totaldeadrows, |
640 | (va_cols == NIL)); |
641 | |
642 | /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */ |
643 | if (!(params->options & VACOPT_VACUUM)) |
644 | { |
645 | for (ind = 0; ind < nindexes; ind++) |
646 | { |
647 | IndexBulkDeleteResult *stats; |
648 | IndexVacuumInfo ivinfo; |
649 | |
650 | ivinfo.index = Irel[ind]; |
651 | ivinfo.analyze_only = true; |
652 | ivinfo.estimated_count = true; |
653 | ivinfo.message_level = elevel; |
654 | ivinfo.num_heap_tuples = onerel->rd_rel->reltuples; |
655 | ivinfo.strategy = vac_strategy; |
656 | |
657 | stats = index_vacuum_cleanup(&ivinfo, NULL); |
658 | |
659 | if (stats) |
660 | pfree(stats); |
661 | } |
662 | } |
663 | |
664 | /* Done with indexes */ |
665 | vac_close_indexes(nindexes, Irel, NoLock); |
666 | |
667 | /* Log the action if appropriate */ |
668 | if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) |
669 | { |
670 | if (params->log_min_duration == 0 || |
671 | TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), |
672 | params->log_min_duration)) |
673 | ereport(LOG, |
674 | (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s" , |
675 | get_database_name(MyDatabaseId), |
676 | get_namespace_name(RelationGetNamespace(onerel)), |
677 | RelationGetRelationName(onerel), |
678 | pg_rusage_show(&ru0)))); |
679 | } |
680 | |
681 | /* Roll back any GUC changes executed by index functions */ |
682 | AtEOXact_GUC(false, save_nestlevel); |
683 | |
684 | /* Restore userid and security context */ |
685 | SetUserIdAndSecContext(save_userid, save_sec_context); |
686 | |
687 | /* Restore current context and release memory */ |
688 | MemoryContextSwitchTo(caller_context); |
689 | MemoryContextDelete(anl_context); |
690 | anl_context = NULL; |
691 | } |
692 | |
693 | /* |
694 | * Compute statistics about indexes of a relation |
695 | */ |
696 | static void |
697 | compute_index_stats(Relation onerel, double totalrows, |
698 | AnlIndexData *indexdata, int nindexes, |
699 | HeapTuple *rows, int numrows, |
700 | MemoryContext col_context) |
701 | { |
702 | MemoryContext ind_context, |
703 | old_context; |
704 | Datum values[INDEX_MAX_KEYS]; |
705 | bool isnull[INDEX_MAX_KEYS]; |
706 | int ind, |
707 | i; |
708 | |
709 | ind_context = AllocSetContextCreate(anl_context, |
710 | "Analyze Index" , |
711 | ALLOCSET_DEFAULT_SIZES); |
712 | old_context = MemoryContextSwitchTo(ind_context); |
713 | |
714 | for (ind = 0; ind < nindexes; ind++) |
715 | { |
716 | AnlIndexData *thisdata = &indexdata[ind]; |
717 | IndexInfo *indexInfo = thisdata->indexInfo; |
718 | int attr_cnt = thisdata->attr_cnt; |
719 | TupleTableSlot *slot; |
720 | EState *estate; |
721 | ExprContext *econtext; |
722 | ExprState *predicate; |
723 | Datum *exprvals; |
724 | bool *exprnulls; |
725 | int numindexrows, |
726 | tcnt, |
727 | rowno; |
728 | double totalindexrows; |
729 | |
730 | /* Ignore index if no columns to analyze and not partial */ |
731 | if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL) |
732 | continue; |
733 | |
734 | /* |
735 | * Need an EState for evaluation of index expressions and |
736 | * partial-index predicates. Create it in the per-index context to be |
737 | * sure it gets cleaned up at the bottom of the loop. |
738 | */ |
739 | estate = CreateExecutorState(); |
740 | econtext = GetPerTupleExprContext(estate); |
741 | /* Need a slot to hold the current heap tuple, too */ |
742 | slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel), |
743 | &TTSOpsHeapTuple); |
744 | |
745 | /* Arrange for econtext's scan tuple to be the tuple under test */ |
746 | econtext->ecxt_scantuple = slot; |
747 | |
748 | /* Set up execution state for predicate. */ |
749 | predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); |
750 | |
751 | /* Compute and save index expression values */ |
752 | exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum)); |
753 | exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool)); |
754 | numindexrows = 0; |
755 | tcnt = 0; |
756 | for (rowno = 0; rowno < numrows; rowno++) |
757 | { |
758 | HeapTuple heapTuple = rows[rowno]; |
759 | |
760 | vacuum_delay_point(); |
761 | |
762 | /* |
763 | * Reset the per-tuple context each time, to reclaim any cruft |
764 | * left behind by evaluating the predicate or index expressions. |
765 | */ |
766 | ResetExprContext(econtext); |
767 | |
768 | /* Set up for predicate or expression evaluation */ |
769 | ExecStoreHeapTuple(heapTuple, slot, false); |
770 | |
771 | /* If index is partial, check predicate */ |
772 | if (predicate != NULL) |
773 | { |
774 | if (!ExecQual(predicate, econtext)) |
775 | continue; |
776 | } |
777 | numindexrows++; |
778 | |
779 | if (attr_cnt > 0) |
780 | { |
781 | /* |
782 | * Evaluate the index row to compute expression values. We |
783 | * could do this by hand, but FormIndexDatum is convenient. |
784 | */ |
785 | FormIndexDatum(indexInfo, |
786 | slot, |
787 | estate, |
788 | values, |
789 | isnull); |
790 | |
791 | /* |
792 | * Save just the columns we care about. We copy the values |
793 | * into ind_context from the estate's per-tuple context. |
794 | */ |
795 | for (i = 0; i < attr_cnt; i++) |
796 | { |
797 | VacAttrStats *stats = thisdata->vacattrstats[i]; |
798 | int attnum = stats->attr->attnum; |
799 | |
800 | if (isnull[attnum - 1]) |
801 | { |
802 | exprvals[tcnt] = (Datum) 0; |
803 | exprnulls[tcnt] = true; |
804 | } |
805 | else |
806 | { |
807 | exprvals[tcnt] = datumCopy(values[attnum - 1], |
808 | stats->attrtype->typbyval, |
809 | stats->attrtype->typlen); |
810 | exprnulls[tcnt] = false; |
811 | } |
812 | tcnt++; |
813 | } |
814 | } |
815 | } |
816 | |
817 | /* |
818 | * Having counted the number of rows that pass the predicate in the |
819 | * sample, we can estimate the total number of rows in the index. |
820 | */ |
821 | thisdata->tupleFract = (double) numindexrows / (double) numrows; |
822 | totalindexrows = ceil(thisdata->tupleFract * totalrows); |
823 | |
824 | /* |
825 | * Now we can compute the statistics for the expression columns. |
826 | */ |
827 | if (numindexrows > 0) |
828 | { |
829 | MemoryContextSwitchTo(col_context); |
830 | for (i = 0; i < attr_cnt; i++) |
831 | { |
832 | VacAttrStats *stats = thisdata->vacattrstats[i]; |
833 | AttributeOpts *aopt = |
834 | get_attribute_options(stats->attr->attrelid, |
835 | stats->attr->attnum); |
836 | |
837 | stats->exprvals = exprvals + i; |
838 | stats->exprnulls = exprnulls + i; |
839 | stats->rowstride = attr_cnt; |
840 | stats->compute_stats(stats, |
841 | ind_fetch_func, |
842 | numindexrows, |
843 | totalindexrows); |
844 | |
845 | /* |
846 | * If the n_distinct option is specified, it overrides the |
847 | * above computation. For indices, we always use just |
848 | * n_distinct, not n_distinct_inherited. |
849 | */ |
850 | if (aopt != NULL && aopt->n_distinct != 0.0) |
851 | stats->stadistinct = aopt->n_distinct; |
852 | |
853 | MemoryContextResetAndDeleteChildren(col_context); |
854 | } |
855 | } |
856 | |
857 | /* And clean up */ |
858 | MemoryContextSwitchTo(ind_context); |
859 | |
860 | ExecDropSingleTupleTableSlot(slot); |
861 | FreeExecutorState(estate); |
862 | MemoryContextResetAndDeleteChildren(ind_context); |
863 | } |
864 | |
865 | MemoryContextSwitchTo(old_context); |
866 | MemoryContextDelete(ind_context); |
867 | } |
868 | |
869 | /* |
870 | * examine_attribute -- pre-analysis of a single column |
871 | * |
872 | * Determine whether the column is analyzable; if so, create and initialize |
873 | * a VacAttrStats struct for it. If not, return NULL. |
874 | * |
875 | * If index_expr isn't NULL, then we're trying to analyze an expression index, |
876 | * and index_expr is the expression tree representing the column's data. |
877 | */ |
878 | static VacAttrStats * |
879 | examine_attribute(Relation onerel, int attnum, Node *index_expr) |
880 | { |
881 | Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1); |
882 | HeapTuple typtuple; |
883 | VacAttrStats *stats; |
884 | int i; |
885 | bool ok; |
886 | |
887 | /* Never analyze dropped columns */ |
888 | if (attr->attisdropped) |
889 | return NULL; |
890 | |
891 | /* Don't analyze column if user has specified not to */ |
892 | if (attr->attstattarget == 0) |
893 | return NULL; |
894 | |
895 | /* |
896 | * Create the VacAttrStats struct. Note that we only have a copy of the |
897 | * fixed fields of the pg_attribute tuple. |
898 | */ |
899 | stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); |
900 | stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); |
901 | memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); |
902 | |
903 | /* |
904 | * When analyzing an expression index, believe the expression tree's type |
905 | * not the column datatype --- the latter might be the opckeytype storage |
906 | * type of the opclass, which is not interesting for our purposes. (Note: |
907 | * if we did anything with non-expression index columns, we'd need to |
908 | * figure out where to get the correct type info from, but for now that's |
909 | * not a problem.) It's not clear whether anyone will care about the |
910 | * typmod, but we store that too just in case. |
911 | */ |
912 | if (index_expr) |
913 | { |
914 | stats->attrtypid = exprType(index_expr); |
915 | stats->attrtypmod = exprTypmod(index_expr); |
916 | |
917 | /* |
918 | * If a collation has been specified for the index column, use that in |
919 | * preference to anything else; but if not, fall back to whatever we |
920 | * can get from the expression. |
921 | */ |
922 | if (OidIsValid(onerel->rd_indcollation[attnum - 1])) |
923 | stats->attrcollid = onerel->rd_indcollation[attnum - 1]; |
924 | else |
925 | stats->attrcollid = exprCollation(index_expr); |
926 | } |
927 | else |
928 | { |
929 | stats->attrtypid = attr->atttypid; |
930 | stats->attrtypmod = attr->atttypmod; |
931 | stats->attrcollid = attr->attcollation; |
932 | } |
933 | |
934 | typtuple = SearchSysCacheCopy1(TYPEOID, |
935 | ObjectIdGetDatum(stats->attrtypid)); |
936 | if (!HeapTupleIsValid(typtuple)) |
937 | elog(ERROR, "cache lookup failed for type %u" , stats->attrtypid); |
938 | stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); |
939 | stats->anl_context = anl_context; |
940 | stats->tupattnum = attnum; |
941 | |
942 | /* |
943 | * The fields describing the stats->stavalues[n] element types default to |
944 | * the type of the data being analyzed, but the type-specific typanalyze |
945 | * function can change them if it wants to store something else. |
946 | */ |
947 | for (i = 0; i < STATISTIC_NUM_SLOTS; i++) |
948 | { |
949 | stats->statypid[i] = stats->attrtypid; |
950 | stats->statyplen[i] = stats->attrtype->typlen; |
951 | stats->statypbyval[i] = stats->attrtype->typbyval; |
952 | stats->statypalign[i] = stats->attrtype->typalign; |
953 | } |
954 | |
955 | /* |
956 | * Call the type-specific typanalyze function. If none is specified, use |
957 | * std_typanalyze(). |
958 | */ |
959 | if (OidIsValid(stats->attrtype->typanalyze)) |
960 | ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, |
961 | PointerGetDatum(stats))); |
962 | else |
963 | ok = std_typanalyze(stats); |
964 | |
965 | if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) |
966 | { |
967 | heap_freetuple(typtuple); |
968 | pfree(stats->attr); |
969 | pfree(stats); |
970 | return NULL; |
971 | } |
972 | |
973 | return stats; |
974 | } |
975 | |
976 | /* |
977 | * acquire_sample_rows -- acquire a random sample of rows from the table |
978 | * |
979 | * Selected rows are returned in the caller-allocated array rows[], which |
980 | * must have at least targrows entries. |
981 | * The actual number of rows selected is returned as the function result. |
982 | * We also estimate the total numbers of live and dead rows in the table, |
983 | * and return them into *totalrows and *totaldeadrows, respectively. |
984 | * |
985 | * The returned list of tuples is in order by physical position in the table. |
986 | * (We will rely on this later to derive correlation estimates.) |
987 | * |
988 | * As of May 2004 we use a new two-stage method: Stage one selects up |
989 | * to targrows random blocks (or all blocks, if there aren't so many). |
990 | * Stage two scans these blocks and uses the Vitter algorithm to create |
991 | * a random sample of targrows rows (or less, if there are less in the |
992 | * sample of blocks). The two stages are executed simultaneously: each |
993 | * block is processed as soon as stage one returns its number and while |
994 | * the rows are read stage two controls which ones are to be inserted |
995 | * into the sample. |
996 | * |
997 | * Although every row has an equal chance of ending up in the final |
998 | * sample, this sampling method is not perfect: not every possible |
999 | * sample has an equal chance of being selected. For large relations |
1000 | * the number of different blocks represented by the sample tends to be |
1001 | * too small. We can live with that for now. Improvements are welcome. |
1002 | * |
1003 | * An important property of this sampling method is that because we do |
1004 | * look at a statistically unbiased set of blocks, we should get |
1005 | * unbiased estimates of the average numbers of live and dead rows per |
1006 | * block. The previous sampling method put too much credence in the row |
1007 | * density near the start of the table. |
1008 | */ |
1009 | static int |
1010 | acquire_sample_rows(Relation onerel, int elevel, |
1011 | HeapTuple *rows, int targrows, |
1012 | double *totalrows, double *totaldeadrows) |
1013 | { |
1014 | int numrows = 0; /* # rows now in reservoir */ |
1015 | double samplerows = 0; /* total # rows collected */ |
1016 | double liverows = 0; /* # live rows seen */ |
1017 | double deadrows = 0; /* # dead rows seen */ |
1018 | double rowstoskip = -1; /* -1 means not set yet */ |
1019 | BlockNumber totalblocks; |
1020 | TransactionId OldestXmin; |
1021 | BlockSamplerData bs; |
1022 | ReservoirStateData rstate; |
1023 | TupleTableSlot *slot; |
1024 | TableScanDesc scan; |
1025 | |
1026 | Assert(targrows > 0); |
1027 | |
1028 | totalblocks = RelationGetNumberOfBlocks(onerel); |
1029 | |
1030 | /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ |
1031 | OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM); |
1032 | |
1033 | /* Prepare for sampling block numbers */ |
1034 | BlockSampler_Init(&bs, totalblocks, targrows, random()); |
1035 | /* Prepare for sampling rows */ |
1036 | reservoir_init_selection_state(&rstate, targrows); |
1037 | |
1038 | scan = table_beginscan_analyze(onerel); |
1039 | slot = table_slot_create(onerel, NULL); |
1040 | |
1041 | /* Outer loop over blocks to sample */ |
1042 | while (BlockSampler_HasMore(&bs)) |
1043 | { |
1044 | BlockNumber targblock = BlockSampler_Next(&bs); |
1045 | |
1046 | vacuum_delay_point(); |
1047 | |
1048 | if (!table_scan_analyze_next_block(scan, targblock, vac_strategy)) |
1049 | continue; |
1050 | |
1051 | while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) |
1052 | { |
1053 | /* |
1054 | * The first targrows sample rows are simply copied into the |
1055 | * reservoir. Then we start replacing tuples in the sample until |
1056 | * we reach the end of the relation. This algorithm is from Jeff |
1057 | * Vitter's paper (see full citation in utils/misc/sampling.c). It |
1058 | * works by repeatedly computing the number of tuples to skip |
1059 | * before selecting a tuple, which replaces a randomly chosen |
1060 | * element of the reservoir (current set of tuples). At all times |
1061 | * the reservoir is a true random sample of the tuples we've |
1062 | * passed over so far, so when we fall off the end of the relation |
1063 | * we're done. |
1064 | */ |
1065 | if (numrows < targrows) |
1066 | rows[numrows++] = ExecCopySlotHeapTuple(slot); |
1067 | else |
1068 | { |
1069 | /* |
1070 | * t in Vitter's paper is the number of records already |
1071 | * processed. If we need to compute a new S value, we must |
1072 | * use the not-yet-incremented value of samplerows as t. |
1073 | */ |
1074 | if (rowstoskip < 0) |
1075 | rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); |
1076 | |
1077 | if (rowstoskip <= 0) |
1078 | { |
1079 | /* |
1080 | * Found a suitable tuple, so save it, replacing one old |
1081 | * tuple at random |
1082 | */ |
1083 | int k = (int) (targrows * sampler_random_fract(rstate.randstate)); |
1084 | |
1085 | Assert(k >= 0 && k < targrows); |
1086 | heap_freetuple(rows[k]); |
1087 | rows[k] = ExecCopySlotHeapTuple(slot); |
1088 | } |
1089 | |
1090 | rowstoskip -= 1; |
1091 | } |
1092 | |
1093 | samplerows += 1; |
1094 | } |
1095 | } |
1096 | |
1097 | ExecDropSingleTupleTableSlot(slot); |
1098 | table_endscan(scan); |
1099 | |
1100 | /* |
1101 | * If we didn't find as many tuples as we wanted then we're done. No sort |
1102 | * is needed, since they're already in order. |
1103 | * |
1104 | * Otherwise we need to sort the collected tuples by position |
1105 | * (itempointer). It's not worth worrying about corner cases where the |
1106 | * tuples are already sorted. |
1107 | */ |
1108 | if (numrows == targrows) |
1109 | qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); |
1110 | |
1111 | /* |
1112 | * Estimate total numbers of live and dead rows in relation, extrapolating |
1113 | * on the assumption that the average tuple density in pages we didn't |
1114 | * scan is the same as in the pages we did scan. Since what we scanned is |
1115 | * a random sample of the pages in the relation, this should be a good |
1116 | * assumption. |
1117 | */ |
1118 | if (bs.m > 0) |
1119 | { |
1120 | *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); |
1121 | *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); |
1122 | } |
1123 | else |
1124 | { |
1125 | *totalrows = 0.0; |
1126 | *totaldeadrows = 0.0; |
1127 | } |
1128 | |
1129 | /* |
1130 | * Emit some interesting relation info |
1131 | */ |
1132 | ereport(elevel, |
1133 | (errmsg("\"%s\": scanned %d of %u pages, " |
1134 | "containing %.0f live rows and %.0f dead rows; " |
1135 | "%d rows in sample, %.0f estimated total rows" , |
1136 | RelationGetRelationName(onerel), |
1137 | bs.m, totalblocks, |
1138 | liverows, deadrows, |
1139 | numrows, *totalrows))); |
1140 | |
1141 | return numrows; |
1142 | } |
1143 | |
1144 | /* |
1145 | * qsort comparator for sorting rows[] array |
1146 | */ |
1147 | static int |
1148 | compare_rows(const void *a, const void *b) |
1149 | { |
1150 | HeapTuple ha = *(const HeapTuple *) a; |
1151 | HeapTuple hb = *(const HeapTuple *) b; |
1152 | BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self); |
1153 | OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self); |
1154 | BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self); |
1155 | OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self); |
1156 | |
1157 | if (ba < bb) |
1158 | return -1; |
1159 | if (ba > bb) |
1160 | return 1; |
1161 | if (oa < ob) |
1162 | return -1; |
1163 | if (oa > ob) |
1164 | return 1; |
1165 | return 0; |
1166 | } |
1167 | |
1168 | |
1169 | /* |
1170 | * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree |
1171 | * |
1172 | * This has the same API as acquire_sample_rows, except that rows are |
1173 | * collected from all inheritance children as well as the specified table. |
1174 | * We fail and return zero if there are no inheritance children, or if all |
1175 | * children are foreign tables that don't support ANALYZE. |
1176 | */ |
1177 | static int |
1178 | acquire_inherited_sample_rows(Relation onerel, int elevel, |
1179 | HeapTuple *rows, int targrows, |
1180 | double *totalrows, double *totaldeadrows) |
1181 | { |
1182 | List *tableOIDs; |
1183 | Relation *rels; |
1184 | AcquireSampleRowsFunc *acquirefuncs; |
1185 | double *relblocks; |
1186 | double totalblocks; |
1187 | int numrows, |
1188 | nrels, |
1189 | i; |
1190 | ListCell *lc; |
1191 | bool has_child; |
1192 | |
1193 | /* |
1194 | * Find all members of inheritance set. We only need AccessShareLock on |
1195 | * the children. |
1196 | */ |
1197 | tableOIDs = |
1198 | find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL); |
1199 | |
1200 | /* |
1201 | * Check that there's at least one descendant, else fail. This could |
1202 | * happen despite analyze_rel's relhassubclass check, if table once had a |
1203 | * child but no longer does. In that case, we can clear the |
1204 | * relhassubclass field so as not to make the same mistake again later. |
1205 | * (This is safe because we hold ShareUpdateExclusiveLock.) |
1206 | */ |
1207 | if (list_length(tableOIDs) < 2) |
1208 | { |
1209 | /* CCI because we already updated the pg_class row in this command */ |
1210 | CommandCounterIncrement(); |
1211 | SetRelationHasSubclass(RelationGetRelid(onerel), false); |
1212 | ereport(elevel, |
1213 | (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables" , |
1214 | get_namespace_name(RelationGetNamespace(onerel)), |
1215 | RelationGetRelationName(onerel)))); |
1216 | return 0; |
1217 | } |
1218 | |
1219 | /* |
1220 | * Identify acquirefuncs to use, and count blocks in all the relations. |
1221 | * The result could overflow BlockNumber, so we use double arithmetic. |
1222 | */ |
1223 | rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation)); |
1224 | acquirefuncs = (AcquireSampleRowsFunc *) |
1225 | palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc)); |
1226 | relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double)); |
1227 | totalblocks = 0; |
1228 | nrels = 0; |
1229 | has_child = false; |
1230 | foreach(lc, tableOIDs) |
1231 | { |
1232 | Oid childOID = lfirst_oid(lc); |
1233 | Relation childrel; |
1234 | AcquireSampleRowsFunc acquirefunc = NULL; |
1235 | BlockNumber relpages = 0; |
1236 | |
1237 | /* We already got the needed lock */ |
1238 | childrel = table_open(childOID, NoLock); |
1239 | |
1240 | /* Ignore if temp table of another backend */ |
1241 | if (RELATION_IS_OTHER_TEMP(childrel)) |
1242 | { |
1243 | /* ... but release the lock on it */ |
1244 | Assert(childrel != onerel); |
1245 | table_close(childrel, AccessShareLock); |
1246 | continue; |
1247 | } |
1248 | |
1249 | /* Check table type (MATVIEW can't happen, but might as well allow) */ |
1250 | if (childrel->rd_rel->relkind == RELKIND_RELATION || |
1251 | childrel->rd_rel->relkind == RELKIND_MATVIEW) |
1252 | { |
1253 | /* Regular table, so use the regular row acquisition function */ |
1254 | acquirefunc = acquire_sample_rows; |
1255 | relpages = RelationGetNumberOfBlocks(childrel); |
1256 | } |
1257 | else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
1258 | { |
1259 | /* |
1260 | * For a foreign table, call the FDW's hook function to see |
1261 | * whether it supports analysis. |
1262 | */ |
1263 | FdwRoutine *fdwroutine; |
1264 | bool ok = false; |
1265 | |
1266 | fdwroutine = GetFdwRoutineForRelation(childrel, false); |
1267 | |
1268 | if (fdwroutine->AnalyzeForeignTable != NULL) |
1269 | ok = fdwroutine->AnalyzeForeignTable(childrel, |
1270 | &acquirefunc, |
1271 | &relpages); |
1272 | |
1273 | if (!ok) |
1274 | { |
1275 | /* ignore, but release the lock on it */ |
1276 | Assert(childrel != onerel); |
1277 | table_close(childrel, AccessShareLock); |
1278 | continue; |
1279 | } |
1280 | } |
1281 | else |
1282 | { |
1283 | /* |
1284 | * ignore, but release the lock on it. don't try to unlock the |
1285 | * passed-in relation |
1286 | */ |
1287 | Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); |
1288 | if (childrel != onerel) |
1289 | table_close(childrel, AccessShareLock); |
1290 | else |
1291 | table_close(childrel, NoLock); |
1292 | continue; |
1293 | } |
1294 | |
1295 | /* OK, we'll process this child */ |
1296 | has_child = true; |
1297 | rels[nrels] = childrel; |
1298 | acquirefuncs[nrels] = acquirefunc; |
1299 | relblocks[nrels] = (double) relpages; |
1300 | totalblocks += (double) relpages; |
1301 | nrels++; |
1302 | } |
1303 | |
1304 | /* |
1305 | * If we don't have at least one child table to consider, fail. If the |
1306 | * relation is a partitioned table, it's not counted as a child table. |
1307 | */ |
1308 | if (!has_child) |
1309 | { |
1310 | ereport(elevel, |
1311 | (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables" , |
1312 | get_namespace_name(RelationGetNamespace(onerel)), |
1313 | RelationGetRelationName(onerel)))); |
1314 | return 0; |
1315 | } |
1316 | |
1317 | /* |
1318 | * Now sample rows from each relation, proportionally to its fraction of |
1319 | * the total block count. (This might be less than desirable if the child |
1320 | * rels have radically different free-space percentages, but it's not |
1321 | * clear that it's worth working harder.) |
1322 | */ |
1323 | numrows = 0; |
1324 | *totalrows = 0; |
1325 | *totaldeadrows = 0; |
1326 | for (i = 0; i < nrels; i++) |
1327 | { |
1328 | Relation childrel = rels[i]; |
1329 | AcquireSampleRowsFunc acquirefunc = acquirefuncs[i]; |
1330 | double childblocks = relblocks[i]; |
1331 | |
1332 | if (childblocks > 0) |
1333 | { |
1334 | int childtargrows; |
1335 | |
1336 | childtargrows = (int) rint(targrows * childblocks / totalblocks); |
1337 | /* Make sure we don't overrun due to roundoff error */ |
1338 | childtargrows = Min(childtargrows, targrows - numrows); |
1339 | if (childtargrows > 0) |
1340 | { |
1341 | int childrows; |
1342 | double trows, |
1343 | tdrows; |
1344 | |
1345 | /* Fetch a random sample of the child's rows */ |
1346 | childrows = (*acquirefunc) (childrel, elevel, |
1347 | rows + numrows, childtargrows, |
1348 | &trows, &tdrows); |
1349 | |
1350 | /* We may need to convert from child's rowtype to parent's */ |
1351 | if (childrows > 0 && |
1352 | !equalTupleDescs(RelationGetDescr(childrel), |
1353 | RelationGetDescr(onerel))) |
1354 | { |
1355 | TupleConversionMap *map; |
1356 | |
1357 | map = convert_tuples_by_name(RelationGetDescr(childrel), |
1358 | RelationGetDescr(onerel), |
1359 | gettext_noop("could not convert row type" )); |
1360 | if (map != NULL) |
1361 | { |
1362 | int j; |
1363 | |
1364 | for (j = 0; j < childrows; j++) |
1365 | { |
1366 | HeapTuple newtup; |
1367 | |
1368 | newtup = execute_attr_map_tuple(rows[numrows + j], map); |
1369 | heap_freetuple(rows[numrows + j]); |
1370 | rows[numrows + j] = newtup; |
1371 | } |
1372 | free_conversion_map(map); |
1373 | } |
1374 | } |
1375 | |
1376 | /* And add to counts */ |
1377 | numrows += childrows; |
1378 | *totalrows += trows; |
1379 | *totaldeadrows += tdrows; |
1380 | } |
1381 | } |
1382 | |
1383 | /* |
1384 | * Note: we cannot release the child-table locks, since we may have |
1385 | * pointers to their TOAST tables in the sampled rows. |
1386 | */ |
1387 | table_close(childrel, NoLock); |
1388 | } |
1389 | |
1390 | return numrows; |
1391 | } |
1392 | |
1393 | |
1394 | /* |
1395 | * update_attstats() -- update attribute statistics for one relation |
1396 | * |
1397 | * Statistics are stored in several places: the pg_class row for the |
1398 | * relation has stats about the whole relation, and there is a |
1399 | * pg_statistic row for each (non-system) attribute that has ever |
1400 | * been analyzed. The pg_class values are updated by VACUUM, not here. |
1401 | * |
1402 | * pg_statistic rows are just added or updated normally. This means |
1403 | * that pg_statistic will probably contain some deleted rows at the |
1404 | * completion of a vacuum cycle, unless it happens to get vacuumed last. |
1405 | * |
1406 | * To keep things simple, we punt for pg_statistic, and don't try |
1407 | * to compute or store rows for pg_statistic itself in pg_statistic. |
1408 | * This could possibly be made to work, but it's not worth the trouble. |
1409 | * Note analyze_rel() has seen to it that we won't come here when |
1410 | * vacuuming pg_statistic itself. |
1411 | * |
1412 | * Note: there would be a race condition here if two backends could |
1413 | * ANALYZE the same table concurrently. Presently, we lock that out |
1414 | * by taking a self-exclusive lock on the relation in analyze_rel(). |
1415 | */ |
1416 | static void |
1417 | update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) |
1418 | { |
1419 | Relation sd; |
1420 | int attno; |
1421 | |
1422 | if (natts <= 0) |
1423 | return; /* nothing to do */ |
1424 | |
1425 | sd = table_open(StatisticRelationId, RowExclusiveLock); |
1426 | |
1427 | for (attno = 0; attno < natts; attno++) |
1428 | { |
1429 | VacAttrStats *stats = vacattrstats[attno]; |
1430 | HeapTuple stup, |
1431 | oldtup; |
1432 | int i, |
1433 | k, |
1434 | n; |
1435 | Datum values[Natts_pg_statistic]; |
1436 | bool nulls[Natts_pg_statistic]; |
1437 | bool replaces[Natts_pg_statistic]; |
1438 | |
1439 | /* Ignore attr if we weren't able to collect stats */ |
1440 | if (!stats->stats_valid) |
1441 | continue; |
1442 | |
1443 | /* |
1444 | * Construct a new pg_statistic tuple |
1445 | */ |
1446 | for (i = 0; i < Natts_pg_statistic; ++i) |
1447 | { |
1448 | nulls[i] = false; |
1449 | replaces[i] = true; |
1450 | } |
1451 | |
1452 | values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid); |
1453 | values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); |
1454 | values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); |
1455 | values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); |
1456 | values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); |
1457 | values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); |
1458 | i = Anum_pg_statistic_stakind1 - 1; |
1459 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
1460 | { |
1461 | values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ |
1462 | } |
1463 | i = Anum_pg_statistic_staop1 - 1; |
1464 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
1465 | { |
1466 | values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ |
1467 | } |
1468 | i = Anum_pg_statistic_stacoll1 - 1; |
1469 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
1470 | { |
1471 | values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ |
1472 | } |
1473 | i = Anum_pg_statistic_stanumbers1 - 1; |
1474 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
1475 | { |
1476 | int nnum = stats->numnumbers[k]; |
1477 | |
1478 | if (nnum > 0) |
1479 | { |
1480 | Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); |
1481 | ArrayType *arry; |
1482 | |
1483 | for (n = 0; n < nnum; n++) |
1484 | numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); |
1485 | /* XXX knows more than it should about type float4: */ |
1486 | arry = construct_array(numdatums, nnum, |
1487 | FLOAT4OID, |
1488 | sizeof(float4), FLOAT4PASSBYVAL, 'i'); |
1489 | values[i++] = PointerGetDatum(arry); /* stanumbersN */ |
1490 | } |
1491 | else |
1492 | { |
1493 | nulls[i] = true; |
1494 | values[i++] = (Datum) 0; |
1495 | } |
1496 | } |
1497 | i = Anum_pg_statistic_stavalues1 - 1; |
1498 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
1499 | { |
1500 | if (stats->numvalues[k] > 0) |
1501 | { |
1502 | ArrayType *arry; |
1503 | |
1504 | arry = construct_array(stats->stavalues[k], |
1505 | stats->numvalues[k], |
1506 | stats->statypid[k], |
1507 | stats->statyplen[k], |
1508 | stats->statypbyval[k], |
1509 | stats->statypalign[k]); |
1510 | values[i++] = PointerGetDatum(arry); /* stavaluesN */ |
1511 | } |
1512 | else |
1513 | { |
1514 | nulls[i] = true; |
1515 | values[i++] = (Datum) 0; |
1516 | } |
1517 | } |
1518 | |
1519 | /* Is there already a pg_statistic tuple for this attribute? */ |
1520 | oldtup = SearchSysCache3(STATRELATTINH, |
1521 | ObjectIdGetDatum(relid), |
1522 | Int16GetDatum(stats->attr->attnum), |
1523 | BoolGetDatum(inh)); |
1524 | |
1525 | if (HeapTupleIsValid(oldtup)) |
1526 | { |
1527 | /* Yes, replace it */ |
1528 | stup = heap_modify_tuple(oldtup, |
1529 | RelationGetDescr(sd), |
1530 | values, |
1531 | nulls, |
1532 | replaces); |
1533 | ReleaseSysCache(oldtup); |
1534 | CatalogTupleUpdate(sd, &stup->t_self, stup); |
1535 | } |
1536 | else |
1537 | { |
1538 | /* No, insert new tuple */ |
1539 | stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); |
1540 | CatalogTupleInsert(sd, stup); |
1541 | } |
1542 | |
1543 | heap_freetuple(stup); |
1544 | } |
1545 | |
1546 | table_close(sd, RowExclusiveLock); |
1547 | } |
1548 | |
1549 | /* |
1550 | * Standard fetch function for use by compute_stats subroutines. |
1551 | * |
1552 | * This exists to provide some insulation between compute_stats routines |
1553 | * and the actual storage of the sample data. |
1554 | */ |
1555 | static Datum |
1556 | std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) |
1557 | { |
1558 | int attnum = stats->tupattnum; |
1559 | HeapTuple tuple = stats->rows[rownum]; |
1560 | TupleDesc tupDesc = stats->tupDesc; |
1561 | |
1562 | return heap_getattr(tuple, attnum, tupDesc, isNull); |
1563 | } |
1564 | |
1565 | /* |
1566 | * Fetch function for analyzing index expressions. |
1567 | * |
1568 | * We have not bothered to construct index tuples, instead the data is |
1569 | * just in Datum arrays. |
1570 | */ |
1571 | static Datum |
1572 | ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) |
1573 | { |
1574 | int i; |
1575 | |
1576 | /* exprvals and exprnulls are already offset for proper column */ |
1577 | i = rownum * stats->rowstride; |
1578 | *isNull = stats->exprnulls[i]; |
1579 | return stats->exprvals[i]; |
1580 | } |
1581 | |
1582 | |
1583 | /*========================================================================== |
1584 | * |
1585 | * Code below this point represents the "standard" type-specific statistics |
1586 | * analysis algorithms. This code can be replaced on a per-data-type basis |
1587 | * by setting a nonzero value in pg_type.typanalyze. |
1588 | * |
1589 | *========================================================================== |
1590 | */ |
1591 | |
1592 | |
1593 | /* |
1594 | * To avoid consuming too much memory during analysis and/or too much space |
1595 | * in the resulting pg_statistic rows, we ignore varlena datums that are wider |
1596 | * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV |
1597 | * and distinct-value calculations since a wide value is unlikely to be |
1598 | * duplicated at all, much less be a most-common value. For the same reason, |
1599 | * ignoring wide values will not affect our estimates of histogram bin |
1600 | * boundaries very much. |
1601 | */ |
1602 | #define WIDTH_THRESHOLD 1024 |
1603 | |
1604 | #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0) |
1605 | #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0) |
1606 | |
1607 | /* |
1608 | * Extra information used by the default analysis routines |
1609 | */ |
1610 | typedef struct |
1611 | { |
1612 | int count; /* # of duplicates */ |
1613 | int first; /* values[] index of first occurrence */ |
1614 | } ScalarMCVItem; |
1615 | |
1616 | typedef struct |
1617 | { |
1618 | SortSupport ssup; |
1619 | int *tupnoLink; |
1620 | } CompareScalarsContext; |
1621 | |
1622 | |
1623 | static void compute_trivial_stats(VacAttrStatsP stats, |
1624 | AnalyzeAttrFetchFunc fetchfunc, |
1625 | int samplerows, |
1626 | double totalrows); |
1627 | static void compute_distinct_stats(VacAttrStatsP stats, |
1628 | AnalyzeAttrFetchFunc fetchfunc, |
1629 | int samplerows, |
1630 | double totalrows); |
1631 | static void compute_scalar_stats(VacAttrStatsP stats, |
1632 | AnalyzeAttrFetchFunc fetchfunc, |
1633 | int samplerows, |
1634 | double totalrows); |
1635 | static int compare_scalars(const void *a, const void *b, void *arg); |
1636 | static int compare_mcvs(const void *a, const void *b); |
1637 | static int analyze_mcv_list(int *mcv_counts, |
1638 | int num_mcv, |
1639 | double stadistinct, |
1640 | double stanullfrac, |
1641 | int samplerows, |
1642 | double totalrows); |
1643 | |
1644 | |
1645 | /* |
1646 | * std_typanalyze -- the default type-specific typanalyze function |
1647 | */ |
1648 | bool |
1649 | std_typanalyze(VacAttrStats *stats) |
1650 | { |
1651 | Form_pg_attribute attr = stats->attr; |
1652 | Oid ltopr; |
1653 | Oid eqopr; |
1654 | StdAnalyzeData *mystats; |
1655 | |
1656 | /* If the attstattarget column is negative, use the default value */ |
1657 | /* NB: it is okay to scribble on stats->attr since it's a copy */ |
1658 | if (attr->attstattarget < 0) |
1659 | attr->attstattarget = default_statistics_target; |
1660 | |
1661 | /* Look for default "<" and "=" operators for column's type */ |
1662 | get_sort_group_operators(stats->attrtypid, |
1663 | false, false, false, |
1664 | <opr, &eqopr, NULL, |
1665 | NULL); |
1666 | |
1667 | /* Save the operator info for compute_stats routines */ |
1668 | mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); |
1669 | mystats->eqopr = eqopr; |
1670 | mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid; |
1671 | mystats->ltopr = ltopr; |
1672 | stats->extra_data = mystats; |
1673 | |
1674 | /* |
1675 | * Determine which standard statistics algorithm to use |
1676 | */ |
1677 | if (OidIsValid(eqopr) && OidIsValid(ltopr)) |
1678 | { |
1679 | /* Seems to be a scalar datatype */ |
1680 | stats->compute_stats = compute_scalar_stats; |
1681 | /*-------------------- |
1682 | * The following choice of minrows is based on the paper |
1683 | * "Random sampling for histogram construction: how much is enough?" |
1684 | * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in |
1685 | * Proceedings of ACM SIGMOD International Conference on Management |
1686 | * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5 |
1687 | * says that for table size n, histogram size k, maximum relative |
1688 | * error in bin size f, and error probability gamma, the minimum |
1689 | * random sample size is |
1690 | * r = 4 * k * ln(2*n/gamma) / f^2 |
1691 | * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain |
1692 | * r = 305.82 * k |
1693 | * Note that because of the log function, the dependence on n is |
1694 | * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66 |
1695 | * bin size error with probability 0.99. So there's no real need to |
1696 | * scale for n, which is a good thing because we don't necessarily |
1697 | * know it at this point. |
1698 | *-------------------- |
1699 | */ |
1700 | stats->minrows = 300 * attr->attstattarget; |
1701 | } |
1702 | else if (OidIsValid(eqopr)) |
1703 | { |
1704 | /* We can still recognize distinct values */ |
1705 | stats->compute_stats = compute_distinct_stats; |
1706 | /* Might as well use the same minrows as above */ |
1707 | stats->minrows = 300 * attr->attstattarget; |
1708 | } |
1709 | else |
1710 | { |
1711 | /* Can't do much but the trivial stuff */ |
1712 | stats->compute_stats = compute_trivial_stats; |
1713 | /* Might as well use the same minrows as above */ |
1714 | stats->minrows = 300 * attr->attstattarget; |
1715 | } |
1716 | |
1717 | return true; |
1718 | } |
1719 | |
1720 | |
1721 | /* |
1722 | * compute_trivial_stats() -- compute very basic column statistics |
1723 | * |
1724 | * We use this when we cannot find a hash "=" operator for the datatype. |
1725 | * |
1726 | * We determine the fraction of non-null rows and the average datum width. |
1727 | */ |
1728 | static void |
1729 | compute_trivial_stats(VacAttrStatsP stats, |
1730 | AnalyzeAttrFetchFunc fetchfunc, |
1731 | int samplerows, |
1732 | double totalrows) |
1733 | { |
1734 | int i; |
1735 | int null_cnt = 0; |
1736 | int nonnull_cnt = 0; |
1737 | double total_width = 0; |
1738 | bool is_varlena = (!stats->attrtype->typbyval && |
1739 | stats->attrtype->typlen == -1); |
1740 | bool is_varwidth = (!stats->attrtype->typbyval && |
1741 | stats->attrtype->typlen < 0); |
1742 | |
1743 | for (i = 0; i < samplerows; i++) |
1744 | { |
1745 | Datum value; |
1746 | bool isnull; |
1747 | |
1748 | vacuum_delay_point(); |
1749 | |
1750 | value = fetchfunc(stats, i, &isnull); |
1751 | |
1752 | /* Check for null/nonnull */ |
1753 | if (isnull) |
1754 | { |
1755 | null_cnt++; |
1756 | continue; |
1757 | } |
1758 | nonnull_cnt++; |
1759 | |
1760 | /* |
1761 | * If it's a variable-width field, add up widths for average width |
1762 | * calculation. Note that if the value is toasted, we use the toasted |
1763 | * width. We don't bother with this calculation if it's a fixed-width |
1764 | * type. |
1765 | */ |
1766 | if (is_varlena) |
1767 | { |
1768 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
1769 | } |
1770 | else if (is_varwidth) |
1771 | { |
1772 | /* must be cstring */ |
1773 | total_width += strlen(DatumGetCString(value)) + 1; |
1774 | } |
1775 | } |
1776 | |
1777 | /* We can only compute average width if we found some non-null values. */ |
1778 | if (nonnull_cnt > 0) |
1779 | { |
1780 | stats->stats_valid = true; |
1781 | /* Do the simple null-frac and width stats */ |
1782 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
1783 | if (is_varwidth) |
1784 | stats->stawidth = total_width / (double) nonnull_cnt; |
1785 | else |
1786 | stats->stawidth = stats->attrtype->typlen; |
1787 | stats->stadistinct = 0.0; /* "unknown" */ |
1788 | } |
1789 | else if (null_cnt > 0) |
1790 | { |
1791 | /* We found only nulls; assume the column is entirely null */ |
1792 | stats->stats_valid = true; |
1793 | stats->stanullfrac = 1.0; |
1794 | if (is_varwidth) |
1795 | stats->stawidth = 0; /* "unknown" */ |
1796 | else |
1797 | stats->stawidth = stats->attrtype->typlen; |
1798 | stats->stadistinct = 0.0; /* "unknown" */ |
1799 | } |
1800 | } |
1801 | |
1802 | |
1803 | /* |
1804 | * compute_distinct_stats() -- compute column statistics including ndistinct |
1805 | * |
1806 | * We use this when we can find only an "=" operator for the datatype. |
1807 | * |
1808 | * We determine the fraction of non-null rows, the average width, the |
1809 | * most common values, and the (estimated) number of distinct values. |
1810 | * |
1811 | * The most common values are determined by brute force: we keep a list |
1812 | * of previously seen values, ordered by number of times seen, as we scan |
1813 | * the samples. A newly seen value is inserted just after the last |
1814 | * multiply-seen value, causing the bottommost (oldest) singly-seen value |
1815 | * to drop off the list. The accuracy of this method, and also its cost, |
1816 | * depend mainly on the length of the list we are willing to keep. |
1817 | */ |
1818 | static void |
1819 | compute_distinct_stats(VacAttrStatsP stats, |
1820 | AnalyzeAttrFetchFunc fetchfunc, |
1821 | int samplerows, |
1822 | double totalrows) |
1823 | { |
1824 | int i; |
1825 | int null_cnt = 0; |
1826 | int nonnull_cnt = 0; |
1827 | int toowide_cnt = 0; |
1828 | double total_width = 0; |
1829 | bool is_varlena = (!stats->attrtype->typbyval && |
1830 | stats->attrtype->typlen == -1); |
1831 | bool is_varwidth = (!stats->attrtype->typbyval && |
1832 | stats->attrtype->typlen < 0); |
1833 | FmgrInfo f_cmpeq; |
1834 | typedef struct |
1835 | { |
1836 | Datum value; |
1837 | int count; |
1838 | } TrackItem; |
1839 | TrackItem *track; |
1840 | int track_cnt, |
1841 | track_max; |
1842 | int num_mcv = stats->attr->attstattarget; |
1843 | StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; |
1844 | |
1845 | /* |
1846 | * We track up to 2*n values for an n-element MCV list; but at least 10 |
1847 | */ |
1848 | track_max = 2 * num_mcv; |
1849 | if (track_max < 10) |
1850 | track_max = 10; |
1851 | track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); |
1852 | track_cnt = 0; |
1853 | |
1854 | fmgr_info(mystats->eqfunc, &f_cmpeq); |
1855 | |
1856 | for (i = 0; i < samplerows; i++) |
1857 | { |
1858 | Datum value; |
1859 | bool isnull; |
1860 | bool match; |
1861 | int firstcount1, |
1862 | j; |
1863 | |
1864 | vacuum_delay_point(); |
1865 | |
1866 | value = fetchfunc(stats, i, &isnull); |
1867 | |
1868 | /* Check for null/nonnull */ |
1869 | if (isnull) |
1870 | { |
1871 | null_cnt++; |
1872 | continue; |
1873 | } |
1874 | nonnull_cnt++; |
1875 | |
1876 | /* |
1877 | * If it's a variable-width field, add up widths for average width |
1878 | * calculation. Note that if the value is toasted, we use the toasted |
1879 | * width. We don't bother with this calculation if it's a fixed-width |
1880 | * type. |
1881 | */ |
1882 | if (is_varlena) |
1883 | { |
1884 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
1885 | |
1886 | /* |
1887 | * If the value is toasted, we want to detoast it just once to |
1888 | * avoid repeated detoastings and resultant excess memory usage |
1889 | * during the comparisons. Also, check to see if the value is |
1890 | * excessively wide, and if so don't detoast at all --- just |
1891 | * ignore the value. |
1892 | */ |
1893 | if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) |
1894 | { |
1895 | toowide_cnt++; |
1896 | continue; |
1897 | } |
1898 | value = PointerGetDatum(PG_DETOAST_DATUM(value)); |
1899 | } |
1900 | else if (is_varwidth) |
1901 | { |
1902 | /* must be cstring */ |
1903 | total_width += strlen(DatumGetCString(value)) + 1; |
1904 | } |
1905 | |
1906 | /* |
1907 | * See if the value matches anything we're already tracking. |
1908 | */ |
1909 | match = false; |
1910 | firstcount1 = track_cnt; |
1911 | for (j = 0; j < track_cnt; j++) |
1912 | { |
1913 | if (DatumGetBool(FunctionCall2Coll(&f_cmpeq, |
1914 | stats->attrcollid, |
1915 | value, track[j].value))) |
1916 | { |
1917 | match = true; |
1918 | break; |
1919 | } |
1920 | if (j < firstcount1 && track[j].count == 1) |
1921 | firstcount1 = j; |
1922 | } |
1923 | |
1924 | if (match) |
1925 | { |
1926 | /* Found a match */ |
1927 | track[j].count++; |
1928 | /* This value may now need to "bubble up" in the track list */ |
1929 | while (j > 0 && track[j].count > track[j - 1].count) |
1930 | { |
1931 | swapDatum(track[j].value, track[j - 1].value); |
1932 | swapInt(track[j].count, track[j - 1].count); |
1933 | j--; |
1934 | } |
1935 | } |
1936 | else |
1937 | { |
1938 | /* No match. Insert at head of count-1 list */ |
1939 | if (track_cnt < track_max) |
1940 | track_cnt++; |
1941 | for (j = track_cnt - 1; j > firstcount1; j--) |
1942 | { |
1943 | track[j].value = track[j - 1].value; |
1944 | track[j].count = track[j - 1].count; |
1945 | } |
1946 | if (firstcount1 < track_cnt) |
1947 | { |
1948 | track[firstcount1].value = value; |
1949 | track[firstcount1].count = 1; |
1950 | } |
1951 | } |
1952 | } |
1953 | |
1954 | /* We can only compute real stats if we found some non-null values. */ |
1955 | if (nonnull_cnt > 0) |
1956 | { |
1957 | int nmultiple, |
1958 | summultiple; |
1959 | |
1960 | stats->stats_valid = true; |
1961 | /* Do the simple null-frac and width stats */ |
1962 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
1963 | if (is_varwidth) |
1964 | stats->stawidth = total_width / (double) nonnull_cnt; |
1965 | else |
1966 | stats->stawidth = stats->attrtype->typlen; |
1967 | |
1968 | /* Count the number of values we found multiple times */ |
1969 | summultiple = 0; |
1970 | for (nmultiple = 0; nmultiple < track_cnt; nmultiple++) |
1971 | { |
1972 | if (track[nmultiple].count == 1) |
1973 | break; |
1974 | summultiple += track[nmultiple].count; |
1975 | } |
1976 | |
1977 | if (nmultiple == 0) |
1978 | { |
1979 | /* |
1980 | * If we found no repeated non-null values, assume it's a unique |
1981 | * column; but be sure to discount for any nulls we found. |
1982 | */ |
1983 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
1984 | } |
1985 | else if (track_cnt < track_max && toowide_cnt == 0 && |
1986 | nmultiple == track_cnt) |
1987 | { |
1988 | /* |
1989 | * Our track list includes every value in the sample, and every |
1990 | * value appeared more than once. Assume the column has just |
1991 | * these values. (This case is meant to address columns with |
1992 | * small, fixed sets of possible values, such as boolean or enum |
1993 | * columns. If there are any values that appear just once in the |
1994 | * sample, including too-wide values, we should assume that that's |
1995 | * not what we're dealing with.) |
1996 | */ |
1997 | stats->stadistinct = track_cnt; |
1998 | } |
1999 | else |
2000 | { |
2001 | /*---------- |
2002 | * Estimate the number of distinct values using the estimator |
2003 | * proposed by Haas and Stokes in IBM Research Report RJ 10025: |
2004 | * n*d / (n - f1 + f1*n/N) |
2005 | * where f1 is the number of distinct values that occurred |
2006 | * exactly once in our sample of n rows (from a total of N), |
2007 | * and d is the total number of distinct values in the sample. |
2008 | * This is their Duj1 estimator; the other estimators they |
2009 | * recommend are considerably more complex, and are numerically |
2010 | * very unstable when n is much smaller than N. |
2011 | * |
2012 | * In this calculation, we consider only non-nulls. We used to |
2013 | * include rows with null values in the n and N counts, but that |
2014 | * leads to inaccurate answers in columns with many nulls, and |
2015 | * it's intuitively bogus anyway considering the desired result is |
2016 | * the number of distinct non-null values. |
2017 | * |
2018 | * We assume (not very reliably!) that all the multiply-occurring |
2019 | * values are reflected in the final track[] list, and the other |
2020 | * nonnull values all appeared but once. (XXX this usually |
2021 | * results in a drastic overestimate of ndistinct. Can we do |
2022 | * any better?) |
2023 | *---------- |
2024 | */ |
2025 | int f1 = nonnull_cnt - summultiple; |
2026 | int d = f1 + nmultiple; |
2027 | double n = samplerows - null_cnt; |
2028 | double N = totalrows * (1.0 - stats->stanullfrac); |
2029 | double stadistinct; |
2030 | |
2031 | /* N == 0 shouldn't happen, but just in case ... */ |
2032 | if (N > 0) |
2033 | stadistinct = (n * d) / ((n - f1) + f1 * n / N); |
2034 | else |
2035 | stadistinct = 0; |
2036 | |
2037 | /* Clamp to sane range in case of roundoff error */ |
2038 | if (stadistinct < d) |
2039 | stadistinct = d; |
2040 | if (stadistinct > N) |
2041 | stadistinct = N; |
2042 | /* And round to integer */ |
2043 | stats->stadistinct = floor(stadistinct + 0.5); |
2044 | } |
2045 | |
2046 | /* |
2047 | * If we estimated the number of distinct values at more than 10% of |
2048 | * the total row count (a very arbitrary limit), then assume that |
2049 | * stadistinct should scale with the row count rather than be a fixed |
2050 | * value. |
2051 | */ |
2052 | if (stats->stadistinct > 0.1 * totalrows) |
2053 | stats->stadistinct = -(stats->stadistinct / totalrows); |
2054 | |
2055 | /* |
2056 | * Decide how many values are worth storing as most-common values. If |
2057 | * we are able to generate a complete MCV list (all the values in the |
2058 | * sample will fit, and we think these are all the ones in the table), |
2059 | * then do so. Otherwise, store only those values that are |
2060 | * significantly more common than the values not in the list. |
2061 | * |
2062 | * Note: the first of these cases is meant to address columns with |
2063 | * small, fixed sets of possible values, such as boolean or enum |
2064 | * columns. If we can *completely* represent the column population by |
2065 | * an MCV list that will fit into the stats target, then we should do |
2066 | * so and thus provide the planner with complete information. But if |
2067 | * the MCV list is not complete, it's generally worth being more |
2068 | * selective, and not just filling it all the way up to the stats |
2069 | * target. |
2070 | */ |
2071 | if (track_cnt < track_max && toowide_cnt == 0 && |
2072 | stats->stadistinct > 0 && |
2073 | track_cnt <= num_mcv) |
2074 | { |
2075 | /* Track list includes all values seen, and all will fit */ |
2076 | num_mcv = track_cnt; |
2077 | } |
2078 | else |
2079 | { |
2080 | int *mcv_counts; |
2081 | |
2082 | /* Incomplete list; decide how many values are worth keeping */ |
2083 | if (num_mcv > track_cnt) |
2084 | num_mcv = track_cnt; |
2085 | |
2086 | if (num_mcv > 0) |
2087 | { |
2088 | mcv_counts = (int *) palloc(num_mcv * sizeof(int)); |
2089 | for (i = 0; i < num_mcv; i++) |
2090 | mcv_counts[i] = track[i].count; |
2091 | |
2092 | num_mcv = analyze_mcv_list(mcv_counts, num_mcv, |
2093 | stats->stadistinct, |
2094 | stats->stanullfrac, |
2095 | samplerows, totalrows); |
2096 | } |
2097 | } |
2098 | |
2099 | /* Generate MCV slot entry */ |
2100 | if (num_mcv > 0) |
2101 | { |
2102 | MemoryContext old_context; |
2103 | Datum *mcv_values; |
2104 | float4 *mcv_freqs; |
2105 | |
2106 | /* Must copy the target values into anl_context */ |
2107 | old_context = MemoryContextSwitchTo(stats->anl_context); |
2108 | mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); |
2109 | mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); |
2110 | for (i = 0; i < num_mcv; i++) |
2111 | { |
2112 | mcv_values[i] = datumCopy(track[i].value, |
2113 | stats->attrtype->typbyval, |
2114 | stats->attrtype->typlen); |
2115 | mcv_freqs[i] = (double) track[i].count / (double) samplerows; |
2116 | } |
2117 | MemoryContextSwitchTo(old_context); |
2118 | |
2119 | stats->stakind[0] = STATISTIC_KIND_MCV; |
2120 | stats->staop[0] = mystats->eqopr; |
2121 | stats->stacoll[0] = stats->attrcollid; |
2122 | stats->stanumbers[0] = mcv_freqs; |
2123 | stats->numnumbers[0] = num_mcv; |
2124 | stats->stavalues[0] = mcv_values; |
2125 | stats->numvalues[0] = num_mcv; |
2126 | |
2127 | /* |
2128 | * Accept the defaults for stats->statypid and others. They have |
2129 | * been set before we were called (see vacuum.h) |
2130 | */ |
2131 | } |
2132 | } |
2133 | else if (null_cnt > 0) |
2134 | { |
2135 | /* We found only nulls; assume the column is entirely null */ |
2136 | stats->stats_valid = true; |
2137 | stats->stanullfrac = 1.0; |
2138 | if (is_varwidth) |
2139 | stats->stawidth = 0; /* "unknown" */ |
2140 | else |
2141 | stats->stawidth = stats->attrtype->typlen; |
2142 | stats->stadistinct = 0.0; /* "unknown" */ |
2143 | } |
2144 | |
2145 | /* We don't need to bother cleaning up any of our temporary palloc's */ |
2146 | } |
2147 | |
2148 | |
2149 | /* |
2150 | * compute_scalar_stats() -- compute column statistics |
2151 | * |
2152 | * We use this when we can find "=" and "<" operators for the datatype. |
2153 | * |
2154 | * We determine the fraction of non-null rows, the average width, the |
2155 | * most common values, the (estimated) number of distinct values, the |
2156 | * distribution histogram, and the correlation of physical to logical order. |
2157 | * |
2158 | * The desired stats can be determined fairly easily after sorting the |
2159 | * data values into order. |
2160 | */ |
2161 | static void |
2162 | compute_scalar_stats(VacAttrStatsP stats, |
2163 | AnalyzeAttrFetchFunc fetchfunc, |
2164 | int samplerows, |
2165 | double totalrows) |
2166 | { |
2167 | int i; |
2168 | int null_cnt = 0; |
2169 | int nonnull_cnt = 0; |
2170 | int toowide_cnt = 0; |
2171 | double total_width = 0; |
2172 | bool is_varlena = (!stats->attrtype->typbyval && |
2173 | stats->attrtype->typlen == -1); |
2174 | bool is_varwidth = (!stats->attrtype->typbyval && |
2175 | stats->attrtype->typlen < 0); |
2176 | double corr_xysum; |
2177 | SortSupportData ssup; |
2178 | ScalarItem *values; |
2179 | int values_cnt = 0; |
2180 | int *tupnoLink; |
2181 | ScalarMCVItem *track; |
2182 | int track_cnt = 0; |
2183 | int num_mcv = stats->attr->attstattarget; |
2184 | int num_bins = stats->attr->attstattarget; |
2185 | StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; |
2186 | |
2187 | values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem)); |
2188 | tupnoLink = (int *) palloc(samplerows * sizeof(int)); |
2189 | track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); |
2190 | |
2191 | memset(&ssup, 0, sizeof(ssup)); |
2192 | ssup.ssup_cxt = CurrentMemoryContext; |
2193 | ssup.ssup_collation = stats->attrcollid; |
2194 | ssup.ssup_nulls_first = false; |
2195 | |
2196 | /* |
2197 | * For now, don't perform abbreviated key conversion, because full values |
2198 | * are required for MCV slot generation. Supporting that optimization |
2199 | * would necessitate teaching compare_scalars() to call a tie-breaker. |
2200 | */ |
2201 | ssup.abbreviate = false; |
2202 | |
2203 | PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); |
2204 | |
2205 | /* Initial scan to find sortable values */ |
2206 | for (i = 0; i < samplerows; i++) |
2207 | { |
2208 | Datum value; |
2209 | bool isnull; |
2210 | |
2211 | vacuum_delay_point(); |
2212 | |
2213 | value = fetchfunc(stats, i, &isnull); |
2214 | |
2215 | /* Check for null/nonnull */ |
2216 | if (isnull) |
2217 | { |
2218 | null_cnt++; |
2219 | continue; |
2220 | } |
2221 | nonnull_cnt++; |
2222 | |
2223 | /* |
2224 | * If it's a variable-width field, add up widths for average width |
2225 | * calculation. Note that if the value is toasted, we use the toasted |
2226 | * width. We don't bother with this calculation if it's a fixed-width |
2227 | * type. |
2228 | */ |
2229 | if (is_varlena) |
2230 | { |
2231 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
2232 | |
2233 | /* |
2234 | * If the value is toasted, we want to detoast it just once to |
2235 | * avoid repeated detoastings and resultant excess memory usage |
2236 | * during the comparisons. Also, check to see if the value is |
2237 | * excessively wide, and if so don't detoast at all --- just |
2238 | * ignore the value. |
2239 | */ |
2240 | if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) |
2241 | { |
2242 | toowide_cnt++; |
2243 | continue; |
2244 | } |
2245 | value = PointerGetDatum(PG_DETOAST_DATUM(value)); |
2246 | } |
2247 | else if (is_varwidth) |
2248 | { |
2249 | /* must be cstring */ |
2250 | total_width += strlen(DatumGetCString(value)) + 1; |
2251 | } |
2252 | |
2253 | /* Add it to the list to be sorted */ |
2254 | values[values_cnt].value = value; |
2255 | values[values_cnt].tupno = values_cnt; |
2256 | tupnoLink[values_cnt] = values_cnt; |
2257 | values_cnt++; |
2258 | } |
2259 | |
2260 | /* We can only compute real stats if we found some sortable values. */ |
2261 | if (values_cnt > 0) |
2262 | { |
2263 | int ndistinct, /* # distinct values in sample */ |
2264 | nmultiple, /* # that appear multiple times */ |
2265 | num_hist, |
2266 | dups_cnt; |
2267 | int slot_idx = 0; |
2268 | CompareScalarsContext cxt; |
2269 | |
2270 | /* Sort the collected values */ |
2271 | cxt.ssup = &ssup; |
2272 | cxt.tupnoLink = tupnoLink; |
2273 | qsort_arg((void *) values, values_cnt, sizeof(ScalarItem), |
2274 | compare_scalars, (void *) &cxt); |
2275 | |
2276 | /* |
2277 | * Now scan the values in order, find the most common ones, and also |
2278 | * accumulate ordering-correlation statistics. |
2279 | * |
2280 | * To determine which are most common, we first have to count the |
2281 | * number of duplicates of each value. The duplicates are adjacent in |
2282 | * the sorted list, so a brute-force approach is to compare successive |
2283 | * datum values until we find two that are not equal. However, that |
2284 | * requires N-1 invocations of the datum comparison routine, which are |
2285 | * completely redundant with work that was done during the sort. (The |
2286 | * sort algorithm must at some point have compared each pair of items |
2287 | * that are adjacent in the sorted order; otherwise it could not know |
2288 | * that it's ordered the pair correctly.) We exploit this by having |
2289 | * compare_scalars remember the highest tupno index that each |
2290 | * ScalarItem has been found equal to. At the end of the sort, a |
2291 | * ScalarItem's tupnoLink will still point to itself if and only if it |
2292 | * is the last item of its group of duplicates (since the group will |
2293 | * be ordered by tupno). |
2294 | */ |
2295 | corr_xysum = 0; |
2296 | ndistinct = 0; |
2297 | nmultiple = 0; |
2298 | dups_cnt = 0; |
2299 | for (i = 0; i < values_cnt; i++) |
2300 | { |
2301 | int tupno = values[i].tupno; |
2302 | |
2303 | corr_xysum += ((double) i) * ((double) tupno); |
2304 | dups_cnt++; |
2305 | if (tupnoLink[tupno] == tupno) |
2306 | { |
2307 | /* Reached end of duplicates of this value */ |
2308 | ndistinct++; |
2309 | if (dups_cnt > 1) |
2310 | { |
2311 | nmultiple++; |
2312 | if (track_cnt < num_mcv || |
2313 | dups_cnt > track[track_cnt - 1].count) |
2314 | { |
2315 | /* |
2316 | * Found a new item for the mcv list; find its |
2317 | * position, bubbling down old items if needed. Loop |
2318 | * invariant is that j points at an empty/ replaceable |
2319 | * slot. |
2320 | */ |
2321 | int j; |
2322 | |
2323 | if (track_cnt < num_mcv) |
2324 | track_cnt++; |
2325 | for (j = track_cnt - 1; j > 0; j--) |
2326 | { |
2327 | if (dups_cnt <= track[j - 1].count) |
2328 | break; |
2329 | track[j].count = track[j - 1].count; |
2330 | track[j].first = track[j - 1].first; |
2331 | } |
2332 | track[j].count = dups_cnt; |
2333 | track[j].first = i + 1 - dups_cnt; |
2334 | } |
2335 | } |
2336 | dups_cnt = 0; |
2337 | } |
2338 | } |
2339 | |
2340 | stats->stats_valid = true; |
2341 | /* Do the simple null-frac and width stats */ |
2342 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
2343 | if (is_varwidth) |
2344 | stats->stawidth = total_width / (double) nonnull_cnt; |
2345 | else |
2346 | stats->stawidth = stats->attrtype->typlen; |
2347 | |
2348 | if (nmultiple == 0) |
2349 | { |
2350 | /* |
2351 | * If we found no repeated non-null values, assume it's a unique |
2352 | * column; but be sure to discount for any nulls we found. |
2353 | */ |
2354 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
2355 | } |
2356 | else if (toowide_cnt == 0 && nmultiple == ndistinct) |
2357 | { |
2358 | /* |
2359 | * Every value in the sample appeared more than once. Assume the |
2360 | * column has just these values. (This case is meant to address |
2361 | * columns with small, fixed sets of possible values, such as |
2362 | * boolean or enum columns. If there are any values that appear |
2363 | * just once in the sample, including too-wide values, we should |
2364 | * assume that that's not what we're dealing with.) |
2365 | */ |
2366 | stats->stadistinct = ndistinct; |
2367 | } |
2368 | else |
2369 | { |
2370 | /*---------- |
2371 | * Estimate the number of distinct values using the estimator |
2372 | * proposed by Haas and Stokes in IBM Research Report RJ 10025: |
2373 | * n*d / (n - f1 + f1*n/N) |
2374 | * where f1 is the number of distinct values that occurred |
2375 | * exactly once in our sample of n rows (from a total of N), |
2376 | * and d is the total number of distinct values in the sample. |
2377 | * This is their Duj1 estimator; the other estimators they |
2378 | * recommend are considerably more complex, and are numerically |
2379 | * very unstable when n is much smaller than N. |
2380 | * |
2381 | * In this calculation, we consider only non-nulls. We used to |
2382 | * include rows with null values in the n and N counts, but that |
2383 | * leads to inaccurate answers in columns with many nulls, and |
2384 | * it's intuitively bogus anyway considering the desired result is |
2385 | * the number of distinct non-null values. |
2386 | * |
2387 | * Overwidth values are assumed to have been distinct. |
2388 | *---------- |
2389 | */ |
2390 | int f1 = ndistinct - nmultiple + toowide_cnt; |
2391 | int d = f1 + nmultiple; |
2392 | double n = samplerows - null_cnt; |
2393 | double N = totalrows * (1.0 - stats->stanullfrac); |
2394 | double stadistinct; |
2395 | |
2396 | /* N == 0 shouldn't happen, but just in case ... */ |
2397 | if (N > 0) |
2398 | stadistinct = (n * d) / ((n - f1) + f1 * n / N); |
2399 | else |
2400 | stadistinct = 0; |
2401 | |
2402 | /* Clamp to sane range in case of roundoff error */ |
2403 | if (stadistinct < d) |
2404 | stadistinct = d; |
2405 | if (stadistinct > N) |
2406 | stadistinct = N; |
2407 | /* And round to integer */ |
2408 | stats->stadistinct = floor(stadistinct + 0.5); |
2409 | } |
2410 | |
2411 | /* |
2412 | * If we estimated the number of distinct values at more than 10% of |
2413 | * the total row count (a very arbitrary limit), then assume that |
2414 | * stadistinct should scale with the row count rather than be a fixed |
2415 | * value. |
2416 | */ |
2417 | if (stats->stadistinct > 0.1 * totalrows) |
2418 | stats->stadistinct = -(stats->stadistinct / totalrows); |
2419 | |
2420 | /* |
2421 | * Decide how many values are worth storing as most-common values. If |
2422 | * we are able to generate a complete MCV list (all the values in the |
2423 | * sample will fit, and we think these are all the ones in the table), |
2424 | * then do so. Otherwise, store only those values that are |
2425 | * significantly more common than the values not in the list. |
2426 | * |
2427 | * Note: the first of these cases is meant to address columns with |
2428 | * small, fixed sets of possible values, such as boolean or enum |
2429 | * columns. If we can *completely* represent the column population by |
2430 | * an MCV list that will fit into the stats target, then we should do |
2431 | * so and thus provide the planner with complete information. But if |
2432 | * the MCV list is not complete, it's generally worth being more |
2433 | * selective, and not just filling it all the way up to the stats |
2434 | * target. |
2435 | */ |
2436 | if (track_cnt == ndistinct && toowide_cnt == 0 && |
2437 | stats->stadistinct > 0 && |
2438 | track_cnt <= num_mcv) |
2439 | { |
2440 | /* Track list includes all values seen, and all will fit */ |
2441 | num_mcv = track_cnt; |
2442 | } |
2443 | else |
2444 | { |
2445 | int *mcv_counts; |
2446 | |
2447 | /* Incomplete list; decide how many values are worth keeping */ |
2448 | if (num_mcv > track_cnt) |
2449 | num_mcv = track_cnt; |
2450 | |
2451 | if (num_mcv > 0) |
2452 | { |
2453 | mcv_counts = (int *) palloc(num_mcv * sizeof(int)); |
2454 | for (i = 0; i < num_mcv; i++) |
2455 | mcv_counts[i] = track[i].count; |
2456 | |
2457 | num_mcv = analyze_mcv_list(mcv_counts, num_mcv, |
2458 | stats->stadistinct, |
2459 | stats->stanullfrac, |
2460 | samplerows, totalrows); |
2461 | } |
2462 | } |
2463 | |
2464 | /* Generate MCV slot entry */ |
2465 | if (num_mcv > 0) |
2466 | { |
2467 | MemoryContext old_context; |
2468 | Datum *mcv_values; |
2469 | float4 *mcv_freqs; |
2470 | |
2471 | /* Must copy the target values into anl_context */ |
2472 | old_context = MemoryContextSwitchTo(stats->anl_context); |
2473 | mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); |
2474 | mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); |
2475 | for (i = 0; i < num_mcv; i++) |
2476 | { |
2477 | mcv_values[i] = datumCopy(values[track[i].first].value, |
2478 | stats->attrtype->typbyval, |
2479 | stats->attrtype->typlen); |
2480 | mcv_freqs[i] = (double) track[i].count / (double) samplerows; |
2481 | } |
2482 | MemoryContextSwitchTo(old_context); |
2483 | |
2484 | stats->stakind[slot_idx] = STATISTIC_KIND_MCV; |
2485 | stats->staop[slot_idx] = mystats->eqopr; |
2486 | stats->stacoll[slot_idx] = stats->attrcollid; |
2487 | stats->stanumbers[slot_idx] = mcv_freqs; |
2488 | stats->numnumbers[slot_idx] = num_mcv; |
2489 | stats->stavalues[slot_idx] = mcv_values; |
2490 | stats->numvalues[slot_idx] = num_mcv; |
2491 | |
2492 | /* |
2493 | * Accept the defaults for stats->statypid and others. They have |
2494 | * been set before we were called (see vacuum.h) |
2495 | */ |
2496 | slot_idx++; |
2497 | } |
2498 | |
2499 | /* |
2500 | * Generate a histogram slot entry if there are at least two distinct |
2501 | * values not accounted for in the MCV list. (This ensures the |
2502 | * histogram won't collapse to empty or a singleton.) |
2503 | */ |
2504 | num_hist = ndistinct - num_mcv; |
2505 | if (num_hist > num_bins) |
2506 | num_hist = num_bins + 1; |
2507 | if (num_hist >= 2) |
2508 | { |
2509 | MemoryContext old_context; |
2510 | Datum *hist_values; |
2511 | int nvals; |
2512 | int pos, |
2513 | posfrac, |
2514 | delta, |
2515 | deltafrac; |
2516 | |
2517 | /* Sort the MCV items into position order to speed next loop */ |
2518 | qsort((void *) track, num_mcv, |
2519 | sizeof(ScalarMCVItem), compare_mcvs); |
2520 | |
2521 | /* |
2522 | * Collapse out the MCV items from the values[] array. |
2523 | * |
2524 | * Note we destroy the values[] array here... but we don't need it |
2525 | * for anything more. We do, however, still need values_cnt. |
2526 | * nvals will be the number of remaining entries in values[]. |
2527 | */ |
2528 | if (num_mcv > 0) |
2529 | { |
2530 | int src, |
2531 | dest; |
2532 | int j; |
2533 | |
2534 | src = dest = 0; |
2535 | j = 0; /* index of next interesting MCV item */ |
2536 | while (src < values_cnt) |
2537 | { |
2538 | int ncopy; |
2539 | |
2540 | if (j < num_mcv) |
2541 | { |
2542 | int first = track[j].first; |
2543 | |
2544 | if (src >= first) |
2545 | { |
2546 | /* advance past this MCV item */ |
2547 | src = first + track[j].count; |
2548 | j++; |
2549 | continue; |
2550 | } |
2551 | ncopy = first - src; |
2552 | } |
2553 | else |
2554 | ncopy = values_cnt - src; |
2555 | memmove(&values[dest], &values[src], |
2556 | ncopy * sizeof(ScalarItem)); |
2557 | src += ncopy; |
2558 | dest += ncopy; |
2559 | } |
2560 | nvals = dest; |
2561 | } |
2562 | else |
2563 | nvals = values_cnt; |
2564 | Assert(nvals >= num_hist); |
2565 | |
2566 | /* Must copy the target values into anl_context */ |
2567 | old_context = MemoryContextSwitchTo(stats->anl_context); |
2568 | hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); |
2569 | |
2570 | /* |
2571 | * The object of this loop is to copy the first and last values[] |
2572 | * entries along with evenly-spaced values in between. So the |
2573 | * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But |
2574 | * computing that subscript directly risks integer overflow when |
2575 | * the stats target is more than a couple thousand. Instead we |
2576 | * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking |
2577 | * the integral and fractional parts of the sum separately. |
2578 | */ |
2579 | delta = (nvals - 1) / (num_hist - 1); |
2580 | deltafrac = (nvals - 1) % (num_hist - 1); |
2581 | pos = posfrac = 0; |
2582 | |
2583 | for (i = 0; i < num_hist; i++) |
2584 | { |
2585 | hist_values[i] = datumCopy(values[pos].value, |
2586 | stats->attrtype->typbyval, |
2587 | stats->attrtype->typlen); |
2588 | pos += delta; |
2589 | posfrac += deltafrac; |
2590 | if (posfrac >= (num_hist - 1)) |
2591 | { |
2592 | /* fractional part exceeds 1, carry to integer part */ |
2593 | pos++; |
2594 | posfrac -= (num_hist - 1); |
2595 | } |
2596 | } |
2597 | |
2598 | MemoryContextSwitchTo(old_context); |
2599 | |
2600 | stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM; |
2601 | stats->staop[slot_idx] = mystats->ltopr; |
2602 | stats->stacoll[slot_idx] = stats->attrcollid; |
2603 | stats->stavalues[slot_idx] = hist_values; |
2604 | stats->numvalues[slot_idx] = num_hist; |
2605 | |
2606 | /* |
2607 | * Accept the defaults for stats->statypid and others. They have |
2608 | * been set before we were called (see vacuum.h) |
2609 | */ |
2610 | slot_idx++; |
2611 | } |
2612 | |
2613 | /* Generate a correlation entry if there are multiple values */ |
2614 | if (values_cnt > 1) |
2615 | { |
2616 | MemoryContext old_context; |
2617 | float4 *corrs; |
2618 | double corr_xsum, |
2619 | corr_x2sum; |
2620 | |
2621 | /* Must copy the target values into anl_context */ |
2622 | old_context = MemoryContextSwitchTo(stats->anl_context); |
2623 | corrs = (float4 *) palloc(sizeof(float4)); |
2624 | MemoryContextSwitchTo(old_context); |
2625 | |
2626 | /*---------- |
2627 | * Since we know the x and y value sets are both |
2628 | * 0, 1, ..., values_cnt-1 |
2629 | * we have sum(x) = sum(y) = |
2630 | * (values_cnt-1)*values_cnt / 2 |
2631 | * and sum(x^2) = sum(y^2) = |
2632 | * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. |
2633 | *---------- |
2634 | */ |
2635 | corr_xsum = ((double) (values_cnt - 1)) * |
2636 | ((double) values_cnt) / 2.0; |
2637 | corr_x2sum = ((double) (values_cnt - 1)) * |
2638 | ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0; |
2639 | |
2640 | /* And the correlation coefficient reduces to */ |
2641 | corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) / |
2642 | (values_cnt * corr_x2sum - corr_xsum * corr_xsum); |
2643 | |
2644 | stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; |
2645 | stats->staop[slot_idx] = mystats->ltopr; |
2646 | stats->stacoll[slot_idx] = stats->attrcollid; |
2647 | stats->stanumbers[slot_idx] = corrs; |
2648 | stats->numnumbers[slot_idx] = 1; |
2649 | slot_idx++; |
2650 | } |
2651 | } |
2652 | else if (nonnull_cnt > 0) |
2653 | { |
2654 | /* We found some non-null values, but they were all too wide */ |
2655 | Assert(nonnull_cnt == toowide_cnt); |
2656 | stats->stats_valid = true; |
2657 | /* Do the simple null-frac and width stats */ |
2658 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
2659 | if (is_varwidth) |
2660 | stats->stawidth = total_width / (double) nonnull_cnt; |
2661 | else |
2662 | stats->stawidth = stats->attrtype->typlen; |
2663 | /* Assume all too-wide values are distinct, so it's a unique column */ |
2664 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
2665 | } |
2666 | else if (null_cnt > 0) |
2667 | { |
2668 | /* We found only nulls; assume the column is entirely null */ |
2669 | stats->stats_valid = true; |
2670 | stats->stanullfrac = 1.0; |
2671 | if (is_varwidth) |
2672 | stats->stawidth = 0; /* "unknown" */ |
2673 | else |
2674 | stats->stawidth = stats->attrtype->typlen; |
2675 | stats->stadistinct = 0.0; /* "unknown" */ |
2676 | } |
2677 | |
2678 | /* We don't need to bother cleaning up any of our temporary palloc's */ |
2679 | } |
2680 | |
2681 | /* |
2682 | * qsort_arg comparator for sorting ScalarItems |
2683 | * |
2684 | * Aside from sorting the items, we update the tupnoLink[] array |
2685 | * whenever two ScalarItems are found to contain equal datums. The array |
2686 | * is indexed by tupno; for each ScalarItem, it contains the highest |
2687 | * tupno that that item's datum has been found to be equal to. This allows |
2688 | * us to avoid additional comparisons in compute_scalar_stats(). |
2689 | */ |
2690 | static int |
2691 | compare_scalars(const void *a, const void *b, void *arg) |
2692 | { |
2693 | Datum da = ((const ScalarItem *) a)->value; |
2694 | int ta = ((const ScalarItem *) a)->tupno; |
2695 | Datum db = ((const ScalarItem *) b)->value; |
2696 | int tb = ((const ScalarItem *) b)->tupno; |
2697 | CompareScalarsContext *cxt = (CompareScalarsContext *) arg; |
2698 | int compare; |
2699 | |
2700 | compare = ApplySortComparator(da, false, db, false, cxt->ssup); |
2701 | if (compare != 0) |
2702 | return compare; |
2703 | |
2704 | /* |
2705 | * The two datums are equal, so update cxt->tupnoLink[]. |
2706 | */ |
2707 | if (cxt->tupnoLink[ta] < tb) |
2708 | cxt->tupnoLink[ta] = tb; |
2709 | if (cxt->tupnoLink[tb] < ta) |
2710 | cxt->tupnoLink[tb] = ta; |
2711 | |
2712 | /* |
2713 | * For equal datums, sort by tupno |
2714 | */ |
2715 | return ta - tb; |
2716 | } |
2717 | |
2718 | /* |
2719 | * qsort comparator for sorting ScalarMCVItems by position |
2720 | */ |
2721 | static int |
2722 | compare_mcvs(const void *a, const void *b) |
2723 | { |
2724 | int da = ((const ScalarMCVItem *) a)->first; |
2725 | int db = ((const ScalarMCVItem *) b)->first; |
2726 | |
2727 | return da - db; |
2728 | } |
2729 | |
2730 | /* |
2731 | * Analyze the list of common values in the sample and decide how many are |
2732 | * worth storing in the table's MCV list. |
2733 | * |
2734 | * mcv_counts is assumed to be a list of the counts of the most common values |
2735 | * seen in the sample, starting with the most common. The return value is the |
2736 | * number that are significantly more common than the values not in the list, |
2737 | * and which are therefore deemed worth storing in the table's MCV list. |
2738 | */ |
2739 | static int |
2740 | analyze_mcv_list(int *mcv_counts, |
2741 | int num_mcv, |
2742 | double stadistinct, |
2743 | double stanullfrac, |
2744 | int samplerows, |
2745 | double totalrows) |
2746 | { |
2747 | double ndistinct_table; |
2748 | double sumcount; |
2749 | int i; |
2750 | |
2751 | /* |
2752 | * If the entire table was sampled, keep the whole list. This also |
2753 | * protects us against division by zero in the code below. |
2754 | */ |
2755 | if (samplerows == totalrows || totalrows <= 1.0) |
2756 | return num_mcv; |
2757 | |
2758 | /* Re-extract the estimated number of distinct nonnull values in table */ |
2759 | ndistinct_table = stadistinct; |
2760 | if (ndistinct_table < 0) |
2761 | ndistinct_table = -ndistinct_table * totalrows; |
2762 | |
2763 | /* |
2764 | * Exclude the least common values from the MCV list, if they are not |
2765 | * significantly more common than the estimated selectivity they would |
2766 | * have if they weren't in the list. All non-MCV values are assumed to be |
2767 | * equally common, after taking into account the frequencies of all the |
2768 | * values in the MCV list and the number of nulls (c.f. eqsel()). |
2769 | * |
2770 | * Here sumcount tracks the total count of all but the last (least common) |
2771 | * value in the MCV list, allowing us to determine the effect of excluding |
2772 | * that value from the list. |
2773 | * |
2774 | * Note that we deliberately do this by removing values from the full |
2775 | * list, rather than starting with an empty list and adding values, |
2776 | * because the latter approach can fail to add any values if all the most |
2777 | * common values have around the same frequency and make up the majority |
2778 | * of the table, so that the overall average frequency of all values is |
2779 | * roughly the same as that of the common values. This would lead to any |
2780 | * uncommon values being significantly overestimated. |
2781 | */ |
2782 | sumcount = 0.0; |
2783 | for (i = 0; i < num_mcv - 1; i++) |
2784 | sumcount += mcv_counts[i]; |
2785 | |
2786 | while (num_mcv > 0) |
2787 | { |
2788 | double selec, |
2789 | otherdistinct, |
2790 | N, |
2791 | n, |
2792 | K, |
2793 | variance, |
2794 | stddev; |
2795 | |
2796 | /* |
2797 | * Estimated selectivity the least common value would have if it |
2798 | * wasn't in the MCV list (c.f. eqsel()). |
2799 | */ |
2800 | selec = 1.0 - sumcount / samplerows - stanullfrac; |
2801 | if (selec < 0.0) |
2802 | selec = 0.0; |
2803 | if (selec > 1.0) |
2804 | selec = 1.0; |
2805 | otherdistinct = ndistinct_table - (num_mcv - 1); |
2806 | if (otherdistinct > 1) |
2807 | selec /= otherdistinct; |
2808 | |
2809 | /* |
2810 | * If the value is kept in the MCV list, its population frequency is |
2811 | * assumed to equal its sample frequency. We use the lower end of a |
2812 | * textbook continuity-corrected Wald-type confidence interval to |
2813 | * determine if that is significantly more common than the non-MCV |
2814 | * frequency --- specifically we assume the population frequency is |
2815 | * highly likely to be within around 2 standard errors of the sample |
2816 | * frequency, which equates to an interval of 2 standard deviations |
2817 | * either side of the sample count, plus an additional 0.5 for the |
2818 | * continuity correction. Since we are sampling without replacement, |
2819 | * this is a hypergeometric distribution. |
2820 | * |
2821 | * XXX: Empirically, this approach seems to work quite well, but it |
2822 | * may be worth considering more advanced techniques for estimating |
2823 | * the confidence interval of the hypergeometric distribution. |
2824 | */ |
2825 | N = totalrows; |
2826 | n = samplerows; |
2827 | K = N * mcv_counts[num_mcv - 1] / n; |
2828 | variance = n * K * (N - K) * (N - n) / (N * N * (N - 1)); |
2829 | stddev = sqrt(variance); |
2830 | |
2831 | if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5) |
2832 | { |
2833 | /* |
2834 | * The value is significantly more common than the non-MCV |
2835 | * selectivity would suggest. Keep it, and all the other more |
2836 | * common values in the list. |
2837 | */ |
2838 | break; |
2839 | } |
2840 | else |
2841 | { |
2842 | /* Discard this value and consider the next least common value */ |
2843 | num_mcv--; |
2844 | if (num_mcv == 0) |
2845 | break; |
2846 | sumcount -= mcv_counts[num_mcv - 1]; |
2847 | } |
2848 | } |
2849 | return num_mcv; |
2850 | } |
2851 | |