| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * analyze.c |
| 4 | * the Postgres statistics generator |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/commands/analyze.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include <math.h> |
| 18 | |
| 19 | #include "access/genam.h" |
| 20 | #include "access/multixact.h" |
| 21 | #include "access/relation.h" |
| 22 | #include "access/sysattr.h" |
| 23 | #include "access/table.h" |
| 24 | #include "access/tableam.h" |
| 25 | #include "access/transam.h" |
| 26 | #include "access/tupconvert.h" |
| 27 | #include "access/tuptoaster.h" |
| 28 | #include "access/visibilitymap.h" |
| 29 | #include "access/xact.h" |
| 30 | #include "catalog/catalog.h" |
| 31 | #include "catalog/index.h" |
| 32 | #include "catalog/indexing.h" |
| 33 | #include "catalog/pg_collation.h" |
| 34 | #include "catalog/pg_inherits.h" |
| 35 | #include "catalog/pg_namespace.h" |
| 36 | #include "catalog/pg_statistic_ext.h" |
| 37 | #include "commands/dbcommands.h" |
| 38 | #include "commands/tablecmds.h" |
| 39 | #include "commands/vacuum.h" |
| 40 | #include "executor/executor.h" |
| 41 | #include "foreign/fdwapi.h" |
| 42 | #include "miscadmin.h" |
| 43 | #include "nodes/nodeFuncs.h" |
| 44 | #include "parser/parse_oper.h" |
| 45 | #include "parser/parse_relation.h" |
| 46 | #include "pgstat.h" |
| 47 | #include "postmaster/autovacuum.h" |
| 48 | #include "statistics/extended_stats_internal.h" |
| 49 | #include "statistics/statistics.h" |
| 50 | #include "storage/bufmgr.h" |
| 51 | #include "storage/lmgr.h" |
| 52 | #include "storage/proc.h" |
| 53 | #include "storage/procarray.h" |
| 54 | #include "utils/acl.h" |
| 55 | #include "utils/attoptcache.h" |
| 56 | #include "utils/builtins.h" |
| 57 | #include "utils/datum.h" |
| 58 | #include "utils/fmgroids.h" |
| 59 | #include "utils/guc.h" |
| 60 | #include "utils/lsyscache.h" |
| 61 | #include "utils/memutils.h" |
| 62 | #include "utils/pg_rusage.h" |
| 63 | #include "utils/sampling.h" |
| 64 | #include "utils/sortsupport.h" |
| 65 | #include "utils/syscache.h" |
| 66 | #include "utils/timestamp.h" |
| 67 | |
| 68 | |
| 69 | /* Per-index data for ANALYZE */ |
| 70 | typedef struct AnlIndexData |
| 71 | { |
| 72 | IndexInfo *indexInfo; /* BuildIndexInfo result */ |
| 73 | double tupleFract; /* fraction of rows for partial index */ |
| 74 | VacAttrStats **vacattrstats; /* index attrs to analyze */ |
| 75 | int attr_cnt; |
| 76 | } AnlIndexData; |
| 77 | |
| 78 | |
| 79 | /* Default statistics target (GUC parameter) */ |
| 80 | int default_statistics_target = 100; |
| 81 | |
| 82 | /* A few variables that don't seem worth passing around as parameters */ |
| 83 | static MemoryContext anl_context = NULL; |
| 84 | static BufferAccessStrategy vac_strategy; |
| 85 | |
| 86 | |
| 87 | static void do_analyze_rel(Relation onerel, |
| 88 | VacuumParams *params, List *va_cols, |
| 89 | AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, |
| 90 | bool inh, bool in_outer_xact, int elevel); |
| 91 | static void compute_index_stats(Relation onerel, double totalrows, |
| 92 | AnlIndexData *indexdata, int nindexes, |
| 93 | HeapTuple *rows, int numrows, |
| 94 | MemoryContext col_context); |
| 95 | static VacAttrStats *examine_attribute(Relation onerel, int attnum, |
| 96 | Node *index_expr); |
| 97 | static int acquire_sample_rows(Relation onerel, int elevel, |
| 98 | HeapTuple *rows, int targrows, |
| 99 | double *totalrows, double *totaldeadrows); |
| 100 | static int compare_rows(const void *a, const void *b); |
| 101 | static int acquire_inherited_sample_rows(Relation onerel, int elevel, |
| 102 | HeapTuple *rows, int targrows, |
| 103 | double *totalrows, double *totaldeadrows); |
| 104 | static void update_attstats(Oid relid, bool inh, |
| 105 | int natts, VacAttrStats **vacattrstats); |
| 106 | static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); |
| 107 | static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); |
| 108 | |
| 109 | |
| 110 | /* |
| 111 | * analyze_rel() -- analyze one relation |
| 112 | * |
| 113 | * relid identifies the relation to analyze. If relation is supplied, use |
| 114 | * the name therein for reporting any failure to open/lock the rel; do not |
| 115 | * use it once we've successfully opened the rel, since it might be stale. |
| 116 | */ |
| 117 | void |
| 118 | analyze_rel(Oid relid, RangeVar *relation, |
| 119 | VacuumParams *params, List *va_cols, bool in_outer_xact, |
| 120 | BufferAccessStrategy bstrategy) |
| 121 | { |
| 122 | Relation onerel; |
| 123 | int elevel; |
| 124 | AcquireSampleRowsFunc acquirefunc = NULL; |
| 125 | BlockNumber relpages = 0; |
| 126 | |
| 127 | /* Select logging level */ |
| 128 | if (params->options & VACOPT_VERBOSE) |
| 129 | elevel = INFO; |
| 130 | else |
| 131 | elevel = DEBUG2; |
| 132 | |
| 133 | /* Set up static variables */ |
| 134 | vac_strategy = bstrategy; |
| 135 | |
| 136 | /* |
| 137 | * Check for user-requested abort. |
| 138 | */ |
| 139 | CHECK_FOR_INTERRUPTS(); |
| 140 | |
| 141 | /* |
| 142 | * Open the relation, getting ShareUpdateExclusiveLock to ensure that two |
| 143 | * ANALYZEs don't run on it concurrently. (This also locks out a |
| 144 | * concurrent VACUUM, which doesn't matter much at the moment but might |
| 145 | * matter if we ever try to accumulate stats on dead tuples.) If the rel |
| 146 | * has been dropped since we last saw it, we don't need to process it. |
| 147 | * |
| 148 | * Make sure to generate only logs for ANALYZE in this case. |
| 149 | */ |
| 150 | onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM), |
| 151 | params->log_min_duration >= 0, |
| 152 | ShareUpdateExclusiveLock); |
| 153 | |
| 154 | /* leave if relation could not be opened or locked */ |
| 155 | if (!onerel) |
| 156 | return; |
| 157 | |
| 158 | /* |
| 159 | * Check if relation needs to be skipped based on ownership. This check |
| 160 | * happens also when building the relation list to analyze for a manual |
| 161 | * operation, and needs to be done additionally here as ANALYZE could |
| 162 | * happen across multiple transactions where relation ownership could have |
| 163 | * changed in-between. Make sure to generate only logs for ANALYZE in |
| 164 | * this case. |
| 165 | */ |
| 166 | if (!vacuum_is_relation_owner(RelationGetRelid(onerel), |
| 167 | onerel->rd_rel, |
| 168 | params->options & VACOPT_ANALYZE)) |
| 169 | { |
| 170 | relation_close(onerel, ShareUpdateExclusiveLock); |
| 171 | return; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | * Silently ignore tables that are temp tables of other backends --- |
| 176 | * trying to analyze these is rather pointless, since their contents are |
| 177 | * probably not up-to-date on disk. (We don't throw a warning here; it |
| 178 | * would just lead to chatter during a database-wide ANALYZE.) |
| 179 | */ |
| 180 | if (RELATION_IS_OTHER_TEMP(onerel)) |
| 181 | { |
| 182 | relation_close(onerel, ShareUpdateExclusiveLock); |
| 183 | return; |
| 184 | } |
| 185 | |
| 186 | /* |
| 187 | * We can ANALYZE any table except pg_statistic. See update_attstats |
| 188 | */ |
| 189 | if (RelationGetRelid(onerel) == StatisticRelationId) |
| 190 | { |
| 191 | relation_close(onerel, ShareUpdateExclusiveLock); |
| 192 | return; |
| 193 | } |
| 194 | |
| 195 | /* |
| 196 | * Check that it's of an analyzable relkind, and set up appropriately. |
| 197 | */ |
| 198 | if (onerel->rd_rel->relkind == RELKIND_RELATION || |
| 199 | onerel->rd_rel->relkind == RELKIND_MATVIEW) |
| 200 | { |
| 201 | /* Regular table, so we'll use the regular row acquisition function */ |
| 202 | acquirefunc = acquire_sample_rows; |
| 203 | /* Also get regular table's size */ |
| 204 | relpages = RelationGetNumberOfBlocks(onerel); |
| 205 | } |
| 206 | else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
| 207 | { |
| 208 | /* |
| 209 | * For a foreign table, call the FDW's hook function to see whether it |
| 210 | * supports analysis. |
| 211 | */ |
| 212 | FdwRoutine *fdwroutine; |
| 213 | bool ok = false; |
| 214 | |
| 215 | fdwroutine = GetFdwRoutineForRelation(onerel, false); |
| 216 | |
| 217 | if (fdwroutine->AnalyzeForeignTable != NULL) |
| 218 | ok = fdwroutine->AnalyzeForeignTable(onerel, |
| 219 | &acquirefunc, |
| 220 | &relpages); |
| 221 | |
| 222 | if (!ok) |
| 223 | { |
| 224 | ereport(WARNING, |
| 225 | (errmsg("skipping \"%s\" --- cannot analyze this foreign table" , |
| 226 | RelationGetRelationName(onerel)))); |
| 227 | relation_close(onerel, ShareUpdateExclusiveLock); |
| 228 | return; |
| 229 | } |
| 230 | } |
| 231 | else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
| 232 | { |
| 233 | /* |
| 234 | * For partitioned tables, we want to do the recursive ANALYZE below. |
| 235 | */ |
| 236 | } |
| 237 | else |
| 238 | { |
| 239 | /* No need for a WARNING if we already complained during VACUUM */ |
| 240 | if (!(params->options & VACOPT_VACUUM)) |
| 241 | ereport(WARNING, |
| 242 | (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables" , |
| 243 | RelationGetRelationName(onerel)))); |
| 244 | relation_close(onerel, ShareUpdateExclusiveLock); |
| 245 | return; |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | * OK, let's do it. First let other backends know I'm in ANALYZE. |
| 250 | */ |
| 251 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
| 252 | MyPgXact->vacuumFlags |= PROC_IN_ANALYZE; |
| 253 | LWLockRelease(ProcArrayLock); |
| 254 | |
| 255 | /* |
| 256 | * Do the normal non-recursive ANALYZE. We can skip this for partitioned |
| 257 | * tables, which don't contain any rows. |
| 258 | */ |
| 259 | if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) |
| 260 | do_analyze_rel(onerel, params, va_cols, acquirefunc, |
| 261 | relpages, false, in_outer_xact, elevel); |
| 262 | |
| 263 | /* |
| 264 | * If there are child tables, do recursive ANALYZE. |
| 265 | */ |
| 266 | if (onerel->rd_rel->relhassubclass) |
| 267 | do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages, |
| 268 | true, in_outer_xact, elevel); |
| 269 | |
| 270 | /* |
| 271 | * Close source relation now, but keep lock so that no one deletes it |
| 272 | * before we commit. (If someone did, they'd fail to clean up the entries |
| 273 | * we made in pg_statistic. Also, releasing the lock before commit would |
| 274 | * expose us to concurrent-update failures in update_attstats.) |
| 275 | */ |
| 276 | relation_close(onerel, NoLock); |
| 277 | |
| 278 | /* |
| 279 | * Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel, |
| 280 | * because the vacuum flag is cleared by the end-of-xact code. |
| 281 | */ |
| 282 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
| 283 | MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE; |
| 284 | LWLockRelease(ProcArrayLock); |
| 285 | } |
| 286 | |
| 287 | /* |
| 288 | * do_analyze_rel() -- analyze one relation, recursively or not |
| 289 | * |
| 290 | * Note that "acquirefunc" is only relevant for the non-inherited case. |
| 291 | * For the inherited case, acquire_inherited_sample_rows() determines the |
| 292 | * appropriate acquirefunc for each child table. |
| 293 | */ |
| 294 | static void |
| 295 | do_analyze_rel(Relation onerel, VacuumParams *params, |
| 296 | List *va_cols, AcquireSampleRowsFunc acquirefunc, |
| 297 | BlockNumber relpages, bool inh, bool in_outer_xact, |
| 298 | int elevel) |
| 299 | { |
| 300 | int attr_cnt, |
| 301 | tcnt, |
| 302 | i, |
| 303 | ind; |
| 304 | Relation *Irel; |
| 305 | int nindexes; |
| 306 | bool hasindex; |
| 307 | VacAttrStats **vacattrstats; |
| 308 | AnlIndexData *indexdata; |
| 309 | int targrows, |
| 310 | numrows; |
| 311 | double totalrows, |
| 312 | totaldeadrows; |
| 313 | HeapTuple *rows; |
| 314 | PGRUsage ru0; |
| 315 | TimestampTz starttime = 0; |
| 316 | MemoryContext caller_context; |
| 317 | Oid save_userid; |
| 318 | int save_sec_context; |
| 319 | int save_nestlevel; |
| 320 | |
| 321 | if (inh) |
| 322 | ereport(elevel, |
| 323 | (errmsg("analyzing \"%s.%s\" inheritance tree" , |
| 324 | get_namespace_name(RelationGetNamespace(onerel)), |
| 325 | RelationGetRelationName(onerel)))); |
| 326 | else |
| 327 | ereport(elevel, |
| 328 | (errmsg("analyzing \"%s.%s\"" , |
| 329 | get_namespace_name(RelationGetNamespace(onerel)), |
| 330 | RelationGetRelationName(onerel)))); |
| 331 | |
| 332 | /* |
| 333 | * Set up a working context so that we can easily free whatever junk gets |
| 334 | * created. |
| 335 | */ |
| 336 | anl_context = AllocSetContextCreate(CurrentMemoryContext, |
| 337 | "Analyze" , |
| 338 | ALLOCSET_DEFAULT_SIZES); |
| 339 | caller_context = MemoryContextSwitchTo(anl_context); |
| 340 | |
| 341 | /* |
| 342 | * Switch to the table owner's userid, so that any index functions are run |
| 343 | * as that user. Also lock down security-restricted operations and |
| 344 | * arrange to make GUC variable changes local to this command. |
| 345 | */ |
| 346 | GetUserIdAndSecContext(&save_userid, &save_sec_context); |
| 347 | SetUserIdAndSecContext(onerel->rd_rel->relowner, |
| 348 | save_sec_context | SECURITY_RESTRICTED_OPERATION); |
| 349 | save_nestlevel = NewGUCNestLevel(); |
| 350 | |
| 351 | /* measure elapsed time iff autovacuum logging requires it */ |
| 352 | if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) |
| 353 | { |
| 354 | pg_rusage_init(&ru0); |
| 355 | if (params->log_min_duration > 0) |
| 356 | starttime = GetCurrentTimestamp(); |
| 357 | } |
| 358 | |
| 359 | /* |
| 360 | * Determine which columns to analyze |
| 361 | * |
| 362 | * Note that system attributes are never analyzed, so we just reject them |
| 363 | * at the lookup stage. We also reject duplicate column mentions. (We |
| 364 | * could alternatively ignore duplicates, but analyzing a column twice |
| 365 | * won't work; we'd end up making a conflicting update in pg_statistic.) |
| 366 | */ |
| 367 | if (va_cols != NIL) |
| 368 | { |
| 369 | Bitmapset *unique_cols = NULL; |
| 370 | ListCell *le; |
| 371 | |
| 372 | vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) * |
| 373 | sizeof(VacAttrStats *)); |
| 374 | tcnt = 0; |
| 375 | foreach(le, va_cols) |
| 376 | { |
| 377 | char *col = strVal(lfirst(le)); |
| 378 | |
| 379 | i = attnameAttNum(onerel, col, false); |
| 380 | if (i == InvalidAttrNumber) |
| 381 | ereport(ERROR, |
| 382 | (errcode(ERRCODE_UNDEFINED_COLUMN), |
| 383 | errmsg("column \"%s\" of relation \"%s\" does not exist" , |
| 384 | col, RelationGetRelationName(onerel)))); |
| 385 | if (bms_is_member(i, unique_cols)) |
| 386 | ereport(ERROR, |
| 387 | (errcode(ERRCODE_DUPLICATE_COLUMN), |
| 388 | errmsg("column \"%s\" of relation \"%s\" appears more than once" , |
| 389 | col, RelationGetRelationName(onerel)))); |
| 390 | unique_cols = bms_add_member(unique_cols, i); |
| 391 | |
| 392 | vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); |
| 393 | if (vacattrstats[tcnt] != NULL) |
| 394 | tcnt++; |
| 395 | } |
| 396 | attr_cnt = tcnt; |
| 397 | } |
| 398 | else |
| 399 | { |
| 400 | attr_cnt = onerel->rd_att->natts; |
| 401 | vacattrstats = (VacAttrStats **) |
| 402 | palloc(attr_cnt * sizeof(VacAttrStats *)); |
| 403 | tcnt = 0; |
| 404 | for (i = 1; i <= attr_cnt; i++) |
| 405 | { |
| 406 | vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); |
| 407 | if (vacattrstats[tcnt] != NULL) |
| 408 | tcnt++; |
| 409 | } |
| 410 | attr_cnt = tcnt; |
| 411 | } |
| 412 | |
| 413 | /* |
| 414 | * Open all indexes of the relation, and see if there are any analyzable |
| 415 | * columns in the indexes. We do not analyze index columns if there was |
| 416 | * an explicit column list in the ANALYZE command, however. If we are |
| 417 | * doing a recursive scan, we don't want to touch the parent's indexes at |
| 418 | * all. |
| 419 | */ |
| 420 | if (!inh) |
| 421 | vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); |
| 422 | else |
| 423 | { |
| 424 | Irel = NULL; |
| 425 | nindexes = 0; |
| 426 | } |
| 427 | hasindex = (nindexes > 0); |
| 428 | indexdata = NULL; |
| 429 | if (hasindex) |
| 430 | { |
| 431 | indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData)); |
| 432 | for (ind = 0; ind < nindexes; ind++) |
| 433 | { |
| 434 | AnlIndexData *thisdata = &indexdata[ind]; |
| 435 | IndexInfo *indexInfo; |
| 436 | |
| 437 | thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); |
| 438 | thisdata->tupleFract = 1.0; /* fix later if partial */ |
| 439 | if (indexInfo->ii_Expressions != NIL && va_cols == NIL) |
| 440 | { |
| 441 | ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); |
| 442 | |
| 443 | thisdata->vacattrstats = (VacAttrStats **) |
| 444 | palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); |
| 445 | tcnt = 0; |
| 446 | for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) |
| 447 | { |
| 448 | int keycol = indexInfo->ii_IndexAttrNumbers[i]; |
| 449 | |
| 450 | if (keycol == 0) |
| 451 | { |
| 452 | /* Found an index expression */ |
| 453 | Node *indexkey; |
| 454 | |
| 455 | if (indexpr_item == NULL) /* shouldn't happen */ |
| 456 | elog(ERROR, "too few entries in indexprs list" ); |
| 457 | indexkey = (Node *) lfirst(indexpr_item); |
| 458 | indexpr_item = lnext(indexpr_item); |
| 459 | thisdata->vacattrstats[tcnt] = |
| 460 | examine_attribute(Irel[ind], i + 1, indexkey); |
| 461 | if (thisdata->vacattrstats[tcnt] != NULL) |
| 462 | tcnt++; |
| 463 | } |
| 464 | } |
| 465 | thisdata->attr_cnt = tcnt; |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | /* |
| 471 | * Determine how many rows we need to sample, using the worst case from |
| 472 | * all analyzable columns. We use a lower bound of 100 rows to avoid |
| 473 | * possible overflow in Vitter's algorithm. (Note: that will also be the |
| 474 | * target in the corner case where there are no analyzable columns.) |
| 475 | */ |
| 476 | targrows = 100; |
| 477 | for (i = 0; i < attr_cnt; i++) |
| 478 | { |
| 479 | if (targrows < vacattrstats[i]->minrows) |
| 480 | targrows = vacattrstats[i]->minrows; |
| 481 | } |
| 482 | for (ind = 0; ind < nindexes; ind++) |
| 483 | { |
| 484 | AnlIndexData *thisdata = &indexdata[ind]; |
| 485 | |
| 486 | for (i = 0; i < thisdata->attr_cnt; i++) |
| 487 | { |
| 488 | if (targrows < thisdata->vacattrstats[i]->minrows) |
| 489 | targrows = thisdata->vacattrstats[i]->minrows; |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | /* |
| 494 | * Acquire the sample rows |
| 495 | */ |
| 496 | rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); |
| 497 | if (inh) |
| 498 | numrows = acquire_inherited_sample_rows(onerel, elevel, |
| 499 | rows, targrows, |
| 500 | &totalrows, &totaldeadrows); |
| 501 | else |
| 502 | numrows = (*acquirefunc) (onerel, elevel, |
| 503 | rows, targrows, |
| 504 | &totalrows, &totaldeadrows); |
| 505 | |
| 506 | /* |
| 507 | * Compute the statistics. Temporary results during the calculations for |
| 508 | * each column are stored in a child context. The calc routines are |
| 509 | * responsible to make sure that whatever they store into the VacAttrStats |
| 510 | * structure is allocated in anl_context. |
| 511 | */ |
| 512 | if (numrows > 0) |
| 513 | { |
| 514 | MemoryContext col_context, |
| 515 | old_context; |
| 516 | |
| 517 | col_context = AllocSetContextCreate(anl_context, |
| 518 | "Analyze Column" , |
| 519 | ALLOCSET_DEFAULT_SIZES); |
| 520 | old_context = MemoryContextSwitchTo(col_context); |
| 521 | |
| 522 | for (i = 0; i < attr_cnt; i++) |
| 523 | { |
| 524 | VacAttrStats *stats = vacattrstats[i]; |
| 525 | AttributeOpts *aopt; |
| 526 | |
| 527 | stats->rows = rows; |
| 528 | stats->tupDesc = onerel->rd_att; |
| 529 | stats->compute_stats(stats, |
| 530 | std_fetch_func, |
| 531 | numrows, |
| 532 | totalrows); |
| 533 | |
| 534 | /* |
| 535 | * If the appropriate flavor of the n_distinct option is |
| 536 | * specified, override with the corresponding value. |
| 537 | */ |
| 538 | aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum); |
| 539 | if (aopt != NULL) |
| 540 | { |
| 541 | float8 n_distinct; |
| 542 | |
| 543 | n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct; |
| 544 | if (n_distinct != 0.0) |
| 545 | stats->stadistinct = n_distinct; |
| 546 | } |
| 547 | |
| 548 | MemoryContextResetAndDeleteChildren(col_context); |
| 549 | } |
| 550 | |
| 551 | if (hasindex) |
| 552 | compute_index_stats(onerel, totalrows, |
| 553 | indexdata, nindexes, |
| 554 | rows, numrows, |
| 555 | col_context); |
| 556 | |
| 557 | MemoryContextSwitchTo(old_context); |
| 558 | MemoryContextDelete(col_context); |
| 559 | |
| 560 | /* |
| 561 | * Emit the completed stats rows into pg_statistic, replacing any |
| 562 | * previous statistics for the target columns. (If there are stats in |
| 563 | * pg_statistic for columns we didn't process, we leave them alone.) |
| 564 | */ |
| 565 | update_attstats(RelationGetRelid(onerel), inh, |
| 566 | attr_cnt, vacattrstats); |
| 567 | |
| 568 | for (ind = 0; ind < nindexes; ind++) |
| 569 | { |
| 570 | AnlIndexData *thisdata = &indexdata[ind]; |
| 571 | |
| 572 | update_attstats(RelationGetRelid(Irel[ind]), false, |
| 573 | thisdata->attr_cnt, thisdata->vacattrstats); |
| 574 | } |
| 575 | |
| 576 | /* |
| 577 | * Build extended statistics (if there are any). |
| 578 | * |
| 579 | * For now we only build extended statistics on individual relations, |
| 580 | * not for relations representing inheritance trees. |
| 581 | */ |
| 582 | if (!inh) |
| 583 | BuildRelationExtStatistics(onerel, totalrows, numrows, rows, |
| 584 | attr_cnt, vacattrstats); |
| 585 | } |
| 586 | |
| 587 | /* |
| 588 | * Update pages/tuples stats in pg_class ... but not if we're doing |
| 589 | * inherited stats. |
| 590 | */ |
| 591 | if (!inh) |
| 592 | { |
| 593 | BlockNumber relallvisible; |
| 594 | |
| 595 | visibilitymap_count(onerel, &relallvisible, NULL); |
| 596 | |
| 597 | vac_update_relstats(onerel, |
| 598 | relpages, |
| 599 | totalrows, |
| 600 | relallvisible, |
| 601 | hasindex, |
| 602 | InvalidTransactionId, |
| 603 | InvalidMultiXactId, |
| 604 | in_outer_xact); |
| 605 | } |
| 606 | |
| 607 | /* |
| 608 | * Same for indexes. Vacuum always scans all indexes, so if we're part of |
| 609 | * VACUUM ANALYZE, don't overwrite the accurate count already inserted by |
| 610 | * VACUUM. |
| 611 | */ |
| 612 | if (!inh && !(params->options & VACOPT_VACUUM)) |
| 613 | { |
| 614 | for (ind = 0; ind < nindexes; ind++) |
| 615 | { |
| 616 | AnlIndexData *thisdata = &indexdata[ind]; |
| 617 | double totalindexrows; |
| 618 | |
| 619 | totalindexrows = ceil(thisdata->tupleFract * totalrows); |
| 620 | vac_update_relstats(Irel[ind], |
| 621 | RelationGetNumberOfBlocks(Irel[ind]), |
| 622 | totalindexrows, |
| 623 | 0, |
| 624 | false, |
| 625 | InvalidTransactionId, |
| 626 | InvalidMultiXactId, |
| 627 | in_outer_xact); |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | /* |
| 632 | * Report ANALYZE to the stats collector, too. However, if doing |
| 633 | * inherited stats we shouldn't report, because the stats collector only |
| 634 | * tracks per-table stats. Reset the changes_since_analyze counter only |
| 635 | * if we analyzed all columns; otherwise, there is still work for |
| 636 | * auto-analyze to do. |
| 637 | */ |
| 638 | if (!inh) |
| 639 | pgstat_report_analyze(onerel, totalrows, totaldeadrows, |
| 640 | (va_cols == NIL)); |
| 641 | |
| 642 | /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */ |
| 643 | if (!(params->options & VACOPT_VACUUM)) |
| 644 | { |
| 645 | for (ind = 0; ind < nindexes; ind++) |
| 646 | { |
| 647 | IndexBulkDeleteResult *stats; |
| 648 | IndexVacuumInfo ivinfo; |
| 649 | |
| 650 | ivinfo.index = Irel[ind]; |
| 651 | ivinfo.analyze_only = true; |
| 652 | ivinfo.estimated_count = true; |
| 653 | ivinfo.message_level = elevel; |
| 654 | ivinfo.num_heap_tuples = onerel->rd_rel->reltuples; |
| 655 | ivinfo.strategy = vac_strategy; |
| 656 | |
| 657 | stats = index_vacuum_cleanup(&ivinfo, NULL); |
| 658 | |
| 659 | if (stats) |
| 660 | pfree(stats); |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | /* Done with indexes */ |
| 665 | vac_close_indexes(nindexes, Irel, NoLock); |
| 666 | |
| 667 | /* Log the action if appropriate */ |
| 668 | if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) |
| 669 | { |
| 670 | if (params->log_min_duration == 0 || |
| 671 | TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), |
| 672 | params->log_min_duration)) |
| 673 | ereport(LOG, |
| 674 | (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s" , |
| 675 | get_database_name(MyDatabaseId), |
| 676 | get_namespace_name(RelationGetNamespace(onerel)), |
| 677 | RelationGetRelationName(onerel), |
| 678 | pg_rusage_show(&ru0)))); |
| 679 | } |
| 680 | |
| 681 | /* Roll back any GUC changes executed by index functions */ |
| 682 | AtEOXact_GUC(false, save_nestlevel); |
| 683 | |
| 684 | /* Restore userid and security context */ |
| 685 | SetUserIdAndSecContext(save_userid, save_sec_context); |
| 686 | |
| 687 | /* Restore current context and release memory */ |
| 688 | MemoryContextSwitchTo(caller_context); |
| 689 | MemoryContextDelete(anl_context); |
| 690 | anl_context = NULL; |
| 691 | } |
| 692 | |
| 693 | /* |
| 694 | * Compute statistics about indexes of a relation |
| 695 | */ |
| 696 | static void |
| 697 | compute_index_stats(Relation onerel, double totalrows, |
| 698 | AnlIndexData *indexdata, int nindexes, |
| 699 | HeapTuple *rows, int numrows, |
| 700 | MemoryContext col_context) |
| 701 | { |
| 702 | MemoryContext ind_context, |
| 703 | old_context; |
| 704 | Datum values[INDEX_MAX_KEYS]; |
| 705 | bool isnull[INDEX_MAX_KEYS]; |
| 706 | int ind, |
| 707 | i; |
| 708 | |
| 709 | ind_context = AllocSetContextCreate(anl_context, |
| 710 | "Analyze Index" , |
| 711 | ALLOCSET_DEFAULT_SIZES); |
| 712 | old_context = MemoryContextSwitchTo(ind_context); |
| 713 | |
| 714 | for (ind = 0; ind < nindexes; ind++) |
| 715 | { |
| 716 | AnlIndexData *thisdata = &indexdata[ind]; |
| 717 | IndexInfo *indexInfo = thisdata->indexInfo; |
| 718 | int attr_cnt = thisdata->attr_cnt; |
| 719 | TupleTableSlot *slot; |
| 720 | EState *estate; |
| 721 | ExprContext *econtext; |
| 722 | ExprState *predicate; |
| 723 | Datum *exprvals; |
| 724 | bool *exprnulls; |
| 725 | int numindexrows, |
| 726 | tcnt, |
| 727 | rowno; |
| 728 | double totalindexrows; |
| 729 | |
| 730 | /* Ignore index if no columns to analyze and not partial */ |
| 731 | if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL) |
| 732 | continue; |
| 733 | |
| 734 | /* |
| 735 | * Need an EState for evaluation of index expressions and |
| 736 | * partial-index predicates. Create it in the per-index context to be |
| 737 | * sure it gets cleaned up at the bottom of the loop. |
| 738 | */ |
| 739 | estate = CreateExecutorState(); |
| 740 | econtext = GetPerTupleExprContext(estate); |
| 741 | /* Need a slot to hold the current heap tuple, too */ |
| 742 | slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel), |
| 743 | &TTSOpsHeapTuple); |
| 744 | |
| 745 | /* Arrange for econtext's scan tuple to be the tuple under test */ |
| 746 | econtext->ecxt_scantuple = slot; |
| 747 | |
| 748 | /* Set up execution state for predicate. */ |
| 749 | predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); |
| 750 | |
| 751 | /* Compute and save index expression values */ |
| 752 | exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum)); |
| 753 | exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool)); |
| 754 | numindexrows = 0; |
| 755 | tcnt = 0; |
| 756 | for (rowno = 0; rowno < numrows; rowno++) |
| 757 | { |
| 758 | HeapTuple heapTuple = rows[rowno]; |
| 759 | |
| 760 | vacuum_delay_point(); |
| 761 | |
| 762 | /* |
| 763 | * Reset the per-tuple context each time, to reclaim any cruft |
| 764 | * left behind by evaluating the predicate or index expressions. |
| 765 | */ |
| 766 | ResetExprContext(econtext); |
| 767 | |
| 768 | /* Set up for predicate or expression evaluation */ |
| 769 | ExecStoreHeapTuple(heapTuple, slot, false); |
| 770 | |
| 771 | /* If index is partial, check predicate */ |
| 772 | if (predicate != NULL) |
| 773 | { |
| 774 | if (!ExecQual(predicate, econtext)) |
| 775 | continue; |
| 776 | } |
| 777 | numindexrows++; |
| 778 | |
| 779 | if (attr_cnt > 0) |
| 780 | { |
| 781 | /* |
| 782 | * Evaluate the index row to compute expression values. We |
| 783 | * could do this by hand, but FormIndexDatum is convenient. |
| 784 | */ |
| 785 | FormIndexDatum(indexInfo, |
| 786 | slot, |
| 787 | estate, |
| 788 | values, |
| 789 | isnull); |
| 790 | |
| 791 | /* |
| 792 | * Save just the columns we care about. We copy the values |
| 793 | * into ind_context from the estate's per-tuple context. |
| 794 | */ |
| 795 | for (i = 0; i < attr_cnt; i++) |
| 796 | { |
| 797 | VacAttrStats *stats = thisdata->vacattrstats[i]; |
| 798 | int attnum = stats->attr->attnum; |
| 799 | |
| 800 | if (isnull[attnum - 1]) |
| 801 | { |
| 802 | exprvals[tcnt] = (Datum) 0; |
| 803 | exprnulls[tcnt] = true; |
| 804 | } |
| 805 | else |
| 806 | { |
| 807 | exprvals[tcnt] = datumCopy(values[attnum - 1], |
| 808 | stats->attrtype->typbyval, |
| 809 | stats->attrtype->typlen); |
| 810 | exprnulls[tcnt] = false; |
| 811 | } |
| 812 | tcnt++; |
| 813 | } |
| 814 | } |
| 815 | } |
| 816 | |
| 817 | /* |
| 818 | * Having counted the number of rows that pass the predicate in the |
| 819 | * sample, we can estimate the total number of rows in the index. |
| 820 | */ |
| 821 | thisdata->tupleFract = (double) numindexrows / (double) numrows; |
| 822 | totalindexrows = ceil(thisdata->tupleFract * totalrows); |
| 823 | |
| 824 | /* |
| 825 | * Now we can compute the statistics for the expression columns. |
| 826 | */ |
| 827 | if (numindexrows > 0) |
| 828 | { |
| 829 | MemoryContextSwitchTo(col_context); |
| 830 | for (i = 0; i < attr_cnt; i++) |
| 831 | { |
| 832 | VacAttrStats *stats = thisdata->vacattrstats[i]; |
| 833 | AttributeOpts *aopt = |
| 834 | get_attribute_options(stats->attr->attrelid, |
| 835 | stats->attr->attnum); |
| 836 | |
| 837 | stats->exprvals = exprvals + i; |
| 838 | stats->exprnulls = exprnulls + i; |
| 839 | stats->rowstride = attr_cnt; |
| 840 | stats->compute_stats(stats, |
| 841 | ind_fetch_func, |
| 842 | numindexrows, |
| 843 | totalindexrows); |
| 844 | |
| 845 | /* |
| 846 | * If the n_distinct option is specified, it overrides the |
| 847 | * above computation. For indices, we always use just |
| 848 | * n_distinct, not n_distinct_inherited. |
| 849 | */ |
| 850 | if (aopt != NULL && aopt->n_distinct != 0.0) |
| 851 | stats->stadistinct = aopt->n_distinct; |
| 852 | |
| 853 | MemoryContextResetAndDeleteChildren(col_context); |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | /* And clean up */ |
| 858 | MemoryContextSwitchTo(ind_context); |
| 859 | |
| 860 | ExecDropSingleTupleTableSlot(slot); |
| 861 | FreeExecutorState(estate); |
| 862 | MemoryContextResetAndDeleteChildren(ind_context); |
| 863 | } |
| 864 | |
| 865 | MemoryContextSwitchTo(old_context); |
| 866 | MemoryContextDelete(ind_context); |
| 867 | } |
| 868 | |
| 869 | /* |
| 870 | * examine_attribute -- pre-analysis of a single column |
| 871 | * |
| 872 | * Determine whether the column is analyzable; if so, create and initialize |
| 873 | * a VacAttrStats struct for it. If not, return NULL. |
| 874 | * |
| 875 | * If index_expr isn't NULL, then we're trying to analyze an expression index, |
| 876 | * and index_expr is the expression tree representing the column's data. |
| 877 | */ |
| 878 | static VacAttrStats * |
| 879 | examine_attribute(Relation onerel, int attnum, Node *index_expr) |
| 880 | { |
| 881 | Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1); |
| 882 | HeapTuple typtuple; |
| 883 | VacAttrStats *stats; |
| 884 | int i; |
| 885 | bool ok; |
| 886 | |
| 887 | /* Never analyze dropped columns */ |
| 888 | if (attr->attisdropped) |
| 889 | return NULL; |
| 890 | |
| 891 | /* Don't analyze column if user has specified not to */ |
| 892 | if (attr->attstattarget == 0) |
| 893 | return NULL; |
| 894 | |
| 895 | /* |
| 896 | * Create the VacAttrStats struct. Note that we only have a copy of the |
| 897 | * fixed fields of the pg_attribute tuple. |
| 898 | */ |
| 899 | stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); |
| 900 | stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); |
| 901 | memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); |
| 902 | |
| 903 | /* |
| 904 | * When analyzing an expression index, believe the expression tree's type |
| 905 | * not the column datatype --- the latter might be the opckeytype storage |
| 906 | * type of the opclass, which is not interesting for our purposes. (Note: |
| 907 | * if we did anything with non-expression index columns, we'd need to |
| 908 | * figure out where to get the correct type info from, but for now that's |
| 909 | * not a problem.) It's not clear whether anyone will care about the |
| 910 | * typmod, but we store that too just in case. |
| 911 | */ |
| 912 | if (index_expr) |
| 913 | { |
| 914 | stats->attrtypid = exprType(index_expr); |
| 915 | stats->attrtypmod = exprTypmod(index_expr); |
| 916 | |
| 917 | /* |
| 918 | * If a collation has been specified for the index column, use that in |
| 919 | * preference to anything else; but if not, fall back to whatever we |
| 920 | * can get from the expression. |
| 921 | */ |
| 922 | if (OidIsValid(onerel->rd_indcollation[attnum - 1])) |
| 923 | stats->attrcollid = onerel->rd_indcollation[attnum - 1]; |
| 924 | else |
| 925 | stats->attrcollid = exprCollation(index_expr); |
| 926 | } |
| 927 | else |
| 928 | { |
| 929 | stats->attrtypid = attr->atttypid; |
| 930 | stats->attrtypmod = attr->atttypmod; |
| 931 | stats->attrcollid = attr->attcollation; |
| 932 | } |
| 933 | |
| 934 | typtuple = SearchSysCacheCopy1(TYPEOID, |
| 935 | ObjectIdGetDatum(stats->attrtypid)); |
| 936 | if (!HeapTupleIsValid(typtuple)) |
| 937 | elog(ERROR, "cache lookup failed for type %u" , stats->attrtypid); |
| 938 | stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); |
| 939 | stats->anl_context = anl_context; |
| 940 | stats->tupattnum = attnum; |
| 941 | |
| 942 | /* |
| 943 | * The fields describing the stats->stavalues[n] element types default to |
| 944 | * the type of the data being analyzed, but the type-specific typanalyze |
| 945 | * function can change them if it wants to store something else. |
| 946 | */ |
| 947 | for (i = 0; i < STATISTIC_NUM_SLOTS; i++) |
| 948 | { |
| 949 | stats->statypid[i] = stats->attrtypid; |
| 950 | stats->statyplen[i] = stats->attrtype->typlen; |
| 951 | stats->statypbyval[i] = stats->attrtype->typbyval; |
| 952 | stats->statypalign[i] = stats->attrtype->typalign; |
| 953 | } |
| 954 | |
| 955 | /* |
| 956 | * Call the type-specific typanalyze function. If none is specified, use |
| 957 | * std_typanalyze(). |
| 958 | */ |
| 959 | if (OidIsValid(stats->attrtype->typanalyze)) |
| 960 | ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, |
| 961 | PointerGetDatum(stats))); |
| 962 | else |
| 963 | ok = std_typanalyze(stats); |
| 964 | |
| 965 | if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) |
| 966 | { |
| 967 | heap_freetuple(typtuple); |
| 968 | pfree(stats->attr); |
| 969 | pfree(stats); |
| 970 | return NULL; |
| 971 | } |
| 972 | |
| 973 | return stats; |
| 974 | } |
| 975 | |
| 976 | /* |
| 977 | * acquire_sample_rows -- acquire a random sample of rows from the table |
| 978 | * |
| 979 | * Selected rows are returned in the caller-allocated array rows[], which |
| 980 | * must have at least targrows entries. |
| 981 | * The actual number of rows selected is returned as the function result. |
| 982 | * We also estimate the total numbers of live and dead rows in the table, |
| 983 | * and return them into *totalrows and *totaldeadrows, respectively. |
| 984 | * |
| 985 | * The returned list of tuples is in order by physical position in the table. |
| 986 | * (We will rely on this later to derive correlation estimates.) |
| 987 | * |
| 988 | * As of May 2004 we use a new two-stage method: Stage one selects up |
| 989 | * to targrows random blocks (or all blocks, if there aren't so many). |
| 990 | * Stage two scans these blocks and uses the Vitter algorithm to create |
| 991 | * a random sample of targrows rows (or less, if there are less in the |
| 992 | * sample of blocks). The two stages are executed simultaneously: each |
| 993 | * block is processed as soon as stage one returns its number and while |
| 994 | * the rows are read stage two controls which ones are to be inserted |
| 995 | * into the sample. |
| 996 | * |
| 997 | * Although every row has an equal chance of ending up in the final |
| 998 | * sample, this sampling method is not perfect: not every possible |
| 999 | * sample has an equal chance of being selected. For large relations |
| 1000 | * the number of different blocks represented by the sample tends to be |
| 1001 | * too small. We can live with that for now. Improvements are welcome. |
| 1002 | * |
| 1003 | * An important property of this sampling method is that because we do |
| 1004 | * look at a statistically unbiased set of blocks, we should get |
| 1005 | * unbiased estimates of the average numbers of live and dead rows per |
| 1006 | * block. The previous sampling method put too much credence in the row |
| 1007 | * density near the start of the table. |
| 1008 | */ |
| 1009 | static int |
| 1010 | acquire_sample_rows(Relation onerel, int elevel, |
| 1011 | HeapTuple *rows, int targrows, |
| 1012 | double *totalrows, double *totaldeadrows) |
| 1013 | { |
| 1014 | int numrows = 0; /* # rows now in reservoir */ |
| 1015 | double samplerows = 0; /* total # rows collected */ |
| 1016 | double liverows = 0; /* # live rows seen */ |
| 1017 | double deadrows = 0; /* # dead rows seen */ |
| 1018 | double rowstoskip = -1; /* -1 means not set yet */ |
| 1019 | BlockNumber totalblocks; |
| 1020 | TransactionId OldestXmin; |
| 1021 | BlockSamplerData bs; |
| 1022 | ReservoirStateData rstate; |
| 1023 | TupleTableSlot *slot; |
| 1024 | TableScanDesc scan; |
| 1025 | |
| 1026 | Assert(targrows > 0); |
| 1027 | |
| 1028 | totalblocks = RelationGetNumberOfBlocks(onerel); |
| 1029 | |
| 1030 | /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ |
| 1031 | OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM); |
| 1032 | |
| 1033 | /* Prepare for sampling block numbers */ |
| 1034 | BlockSampler_Init(&bs, totalblocks, targrows, random()); |
| 1035 | /* Prepare for sampling rows */ |
| 1036 | reservoir_init_selection_state(&rstate, targrows); |
| 1037 | |
| 1038 | scan = table_beginscan_analyze(onerel); |
| 1039 | slot = table_slot_create(onerel, NULL); |
| 1040 | |
| 1041 | /* Outer loop over blocks to sample */ |
| 1042 | while (BlockSampler_HasMore(&bs)) |
| 1043 | { |
| 1044 | BlockNumber targblock = BlockSampler_Next(&bs); |
| 1045 | |
| 1046 | vacuum_delay_point(); |
| 1047 | |
| 1048 | if (!table_scan_analyze_next_block(scan, targblock, vac_strategy)) |
| 1049 | continue; |
| 1050 | |
| 1051 | while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) |
| 1052 | { |
| 1053 | /* |
| 1054 | * The first targrows sample rows are simply copied into the |
| 1055 | * reservoir. Then we start replacing tuples in the sample until |
| 1056 | * we reach the end of the relation. This algorithm is from Jeff |
| 1057 | * Vitter's paper (see full citation in utils/misc/sampling.c). It |
| 1058 | * works by repeatedly computing the number of tuples to skip |
| 1059 | * before selecting a tuple, which replaces a randomly chosen |
| 1060 | * element of the reservoir (current set of tuples). At all times |
| 1061 | * the reservoir is a true random sample of the tuples we've |
| 1062 | * passed over so far, so when we fall off the end of the relation |
| 1063 | * we're done. |
| 1064 | */ |
| 1065 | if (numrows < targrows) |
| 1066 | rows[numrows++] = ExecCopySlotHeapTuple(slot); |
| 1067 | else |
| 1068 | { |
| 1069 | /* |
| 1070 | * t in Vitter's paper is the number of records already |
| 1071 | * processed. If we need to compute a new S value, we must |
| 1072 | * use the not-yet-incremented value of samplerows as t. |
| 1073 | */ |
| 1074 | if (rowstoskip < 0) |
| 1075 | rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); |
| 1076 | |
| 1077 | if (rowstoskip <= 0) |
| 1078 | { |
| 1079 | /* |
| 1080 | * Found a suitable tuple, so save it, replacing one old |
| 1081 | * tuple at random |
| 1082 | */ |
| 1083 | int k = (int) (targrows * sampler_random_fract(rstate.randstate)); |
| 1084 | |
| 1085 | Assert(k >= 0 && k < targrows); |
| 1086 | heap_freetuple(rows[k]); |
| 1087 | rows[k] = ExecCopySlotHeapTuple(slot); |
| 1088 | } |
| 1089 | |
| 1090 | rowstoskip -= 1; |
| 1091 | } |
| 1092 | |
| 1093 | samplerows += 1; |
| 1094 | } |
| 1095 | } |
| 1096 | |
| 1097 | ExecDropSingleTupleTableSlot(slot); |
| 1098 | table_endscan(scan); |
| 1099 | |
| 1100 | /* |
| 1101 | * If we didn't find as many tuples as we wanted then we're done. No sort |
| 1102 | * is needed, since they're already in order. |
| 1103 | * |
| 1104 | * Otherwise we need to sort the collected tuples by position |
| 1105 | * (itempointer). It's not worth worrying about corner cases where the |
| 1106 | * tuples are already sorted. |
| 1107 | */ |
| 1108 | if (numrows == targrows) |
| 1109 | qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); |
| 1110 | |
| 1111 | /* |
| 1112 | * Estimate total numbers of live and dead rows in relation, extrapolating |
| 1113 | * on the assumption that the average tuple density in pages we didn't |
| 1114 | * scan is the same as in the pages we did scan. Since what we scanned is |
| 1115 | * a random sample of the pages in the relation, this should be a good |
| 1116 | * assumption. |
| 1117 | */ |
| 1118 | if (bs.m > 0) |
| 1119 | { |
| 1120 | *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); |
| 1121 | *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); |
| 1122 | } |
| 1123 | else |
| 1124 | { |
| 1125 | *totalrows = 0.0; |
| 1126 | *totaldeadrows = 0.0; |
| 1127 | } |
| 1128 | |
| 1129 | /* |
| 1130 | * Emit some interesting relation info |
| 1131 | */ |
| 1132 | ereport(elevel, |
| 1133 | (errmsg("\"%s\": scanned %d of %u pages, " |
| 1134 | "containing %.0f live rows and %.0f dead rows; " |
| 1135 | "%d rows in sample, %.0f estimated total rows" , |
| 1136 | RelationGetRelationName(onerel), |
| 1137 | bs.m, totalblocks, |
| 1138 | liverows, deadrows, |
| 1139 | numrows, *totalrows))); |
| 1140 | |
| 1141 | return numrows; |
| 1142 | } |
| 1143 | |
| 1144 | /* |
| 1145 | * qsort comparator for sorting rows[] array |
| 1146 | */ |
| 1147 | static int |
| 1148 | compare_rows(const void *a, const void *b) |
| 1149 | { |
| 1150 | HeapTuple ha = *(const HeapTuple *) a; |
| 1151 | HeapTuple hb = *(const HeapTuple *) b; |
| 1152 | BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self); |
| 1153 | OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self); |
| 1154 | BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self); |
| 1155 | OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self); |
| 1156 | |
| 1157 | if (ba < bb) |
| 1158 | return -1; |
| 1159 | if (ba > bb) |
| 1160 | return 1; |
| 1161 | if (oa < ob) |
| 1162 | return -1; |
| 1163 | if (oa > ob) |
| 1164 | return 1; |
| 1165 | return 0; |
| 1166 | } |
| 1167 | |
| 1168 | |
| 1169 | /* |
| 1170 | * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree |
| 1171 | * |
| 1172 | * This has the same API as acquire_sample_rows, except that rows are |
| 1173 | * collected from all inheritance children as well as the specified table. |
| 1174 | * We fail and return zero if there are no inheritance children, or if all |
| 1175 | * children are foreign tables that don't support ANALYZE. |
| 1176 | */ |
| 1177 | static int |
| 1178 | acquire_inherited_sample_rows(Relation onerel, int elevel, |
| 1179 | HeapTuple *rows, int targrows, |
| 1180 | double *totalrows, double *totaldeadrows) |
| 1181 | { |
| 1182 | List *tableOIDs; |
| 1183 | Relation *rels; |
| 1184 | AcquireSampleRowsFunc *acquirefuncs; |
| 1185 | double *relblocks; |
| 1186 | double totalblocks; |
| 1187 | int numrows, |
| 1188 | nrels, |
| 1189 | i; |
| 1190 | ListCell *lc; |
| 1191 | bool has_child; |
| 1192 | |
| 1193 | /* |
| 1194 | * Find all members of inheritance set. We only need AccessShareLock on |
| 1195 | * the children. |
| 1196 | */ |
| 1197 | tableOIDs = |
| 1198 | find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL); |
| 1199 | |
| 1200 | /* |
| 1201 | * Check that there's at least one descendant, else fail. This could |
| 1202 | * happen despite analyze_rel's relhassubclass check, if table once had a |
| 1203 | * child but no longer does. In that case, we can clear the |
| 1204 | * relhassubclass field so as not to make the same mistake again later. |
| 1205 | * (This is safe because we hold ShareUpdateExclusiveLock.) |
| 1206 | */ |
| 1207 | if (list_length(tableOIDs) < 2) |
| 1208 | { |
| 1209 | /* CCI because we already updated the pg_class row in this command */ |
| 1210 | CommandCounterIncrement(); |
| 1211 | SetRelationHasSubclass(RelationGetRelid(onerel), false); |
| 1212 | ereport(elevel, |
| 1213 | (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables" , |
| 1214 | get_namespace_name(RelationGetNamespace(onerel)), |
| 1215 | RelationGetRelationName(onerel)))); |
| 1216 | return 0; |
| 1217 | } |
| 1218 | |
| 1219 | /* |
| 1220 | * Identify acquirefuncs to use, and count blocks in all the relations. |
| 1221 | * The result could overflow BlockNumber, so we use double arithmetic. |
| 1222 | */ |
| 1223 | rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation)); |
| 1224 | acquirefuncs = (AcquireSampleRowsFunc *) |
| 1225 | palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc)); |
| 1226 | relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double)); |
| 1227 | totalblocks = 0; |
| 1228 | nrels = 0; |
| 1229 | has_child = false; |
| 1230 | foreach(lc, tableOIDs) |
| 1231 | { |
| 1232 | Oid childOID = lfirst_oid(lc); |
| 1233 | Relation childrel; |
| 1234 | AcquireSampleRowsFunc acquirefunc = NULL; |
| 1235 | BlockNumber relpages = 0; |
| 1236 | |
| 1237 | /* We already got the needed lock */ |
| 1238 | childrel = table_open(childOID, NoLock); |
| 1239 | |
| 1240 | /* Ignore if temp table of another backend */ |
| 1241 | if (RELATION_IS_OTHER_TEMP(childrel)) |
| 1242 | { |
| 1243 | /* ... but release the lock on it */ |
| 1244 | Assert(childrel != onerel); |
| 1245 | table_close(childrel, AccessShareLock); |
| 1246 | continue; |
| 1247 | } |
| 1248 | |
| 1249 | /* Check table type (MATVIEW can't happen, but might as well allow) */ |
| 1250 | if (childrel->rd_rel->relkind == RELKIND_RELATION || |
| 1251 | childrel->rd_rel->relkind == RELKIND_MATVIEW) |
| 1252 | { |
| 1253 | /* Regular table, so use the regular row acquisition function */ |
| 1254 | acquirefunc = acquire_sample_rows; |
| 1255 | relpages = RelationGetNumberOfBlocks(childrel); |
| 1256 | } |
| 1257 | else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
| 1258 | { |
| 1259 | /* |
| 1260 | * For a foreign table, call the FDW's hook function to see |
| 1261 | * whether it supports analysis. |
| 1262 | */ |
| 1263 | FdwRoutine *fdwroutine; |
| 1264 | bool ok = false; |
| 1265 | |
| 1266 | fdwroutine = GetFdwRoutineForRelation(childrel, false); |
| 1267 | |
| 1268 | if (fdwroutine->AnalyzeForeignTable != NULL) |
| 1269 | ok = fdwroutine->AnalyzeForeignTable(childrel, |
| 1270 | &acquirefunc, |
| 1271 | &relpages); |
| 1272 | |
| 1273 | if (!ok) |
| 1274 | { |
| 1275 | /* ignore, but release the lock on it */ |
| 1276 | Assert(childrel != onerel); |
| 1277 | table_close(childrel, AccessShareLock); |
| 1278 | continue; |
| 1279 | } |
| 1280 | } |
| 1281 | else |
| 1282 | { |
| 1283 | /* |
| 1284 | * ignore, but release the lock on it. don't try to unlock the |
| 1285 | * passed-in relation |
| 1286 | */ |
| 1287 | Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); |
| 1288 | if (childrel != onerel) |
| 1289 | table_close(childrel, AccessShareLock); |
| 1290 | else |
| 1291 | table_close(childrel, NoLock); |
| 1292 | continue; |
| 1293 | } |
| 1294 | |
| 1295 | /* OK, we'll process this child */ |
| 1296 | has_child = true; |
| 1297 | rels[nrels] = childrel; |
| 1298 | acquirefuncs[nrels] = acquirefunc; |
| 1299 | relblocks[nrels] = (double) relpages; |
| 1300 | totalblocks += (double) relpages; |
| 1301 | nrels++; |
| 1302 | } |
| 1303 | |
| 1304 | /* |
| 1305 | * If we don't have at least one child table to consider, fail. If the |
| 1306 | * relation is a partitioned table, it's not counted as a child table. |
| 1307 | */ |
| 1308 | if (!has_child) |
| 1309 | { |
| 1310 | ereport(elevel, |
| 1311 | (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables" , |
| 1312 | get_namespace_name(RelationGetNamespace(onerel)), |
| 1313 | RelationGetRelationName(onerel)))); |
| 1314 | return 0; |
| 1315 | } |
| 1316 | |
| 1317 | /* |
| 1318 | * Now sample rows from each relation, proportionally to its fraction of |
| 1319 | * the total block count. (This might be less than desirable if the child |
| 1320 | * rels have radically different free-space percentages, but it's not |
| 1321 | * clear that it's worth working harder.) |
| 1322 | */ |
| 1323 | numrows = 0; |
| 1324 | *totalrows = 0; |
| 1325 | *totaldeadrows = 0; |
| 1326 | for (i = 0; i < nrels; i++) |
| 1327 | { |
| 1328 | Relation childrel = rels[i]; |
| 1329 | AcquireSampleRowsFunc acquirefunc = acquirefuncs[i]; |
| 1330 | double childblocks = relblocks[i]; |
| 1331 | |
| 1332 | if (childblocks > 0) |
| 1333 | { |
| 1334 | int childtargrows; |
| 1335 | |
| 1336 | childtargrows = (int) rint(targrows * childblocks / totalblocks); |
| 1337 | /* Make sure we don't overrun due to roundoff error */ |
| 1338 | childtargrows = Min(childtargrows, targrows - numrows); |
| 1339 | if (childtargrows > 0) |
| 1340 | { |
| 1341 | int childrows; |
| 1342 | double trows, |
| 1343 | tdrows; |
| 1344 | |
| 1345 | /* Fetch a random sample of the child's rows */ |
| 1346 | childrows = (*acquirefunc) (childrel, elevel, |
| 1347 | rows + numrows, childtargrows, |
| 1348 | &trows, &tdrows); |
| 1349 | |
| 1350 | /* We may need to convert from child's rowtype to parent's */ |
| 1351 | if (childrows > 0 && |
| 1352 | !equalTupleDescs(RelationGetDescr(childrel), |
| 1353 | RelationGetDescr(onerel))) |
| 1354 | { |
| 1355 | TupleConversionMap *map; |
| 1356 | |
| 1357 | map = convert_tuples_by_name(RelationGetDescr(childrel), |
| 1358 | RelationGetDescr(onerel), |
| 1359 | gettext_noop("could not convert row type" )); |
| 1360 | if (map != NULL) |
| 1361 | { |
| 1362 | int j; |
| 1363 | |
| 1364 | for (j = 0; j < childrows; j++) |
| 1365 | { |
| 1366 | HeapTuple newtup; |
| 1367 | |
| 1368 | newtup = execute_attr_map_tuple(rows[numrows + j], map); |
| 1369 | heap_freetuple(rows[numrows + j]); |
| 1370 | rows[numrows + j] = newtup; |
| 1371 | } |
| 1372 | free_conversion_map(map); |
| 1373 | } |
| 1374 | } |
| 1375 | |
| 1376 | /* And add to counts */ |
| 1377 | numrows += childrows; |
| 1378 | *totalrows += trows; |
| 1379 | *totaldeadrows += tdrows; |
| 1380 | } |
| 1381 | } |
| 1382 | |
| 1383 | /* |
| 1384 | * Note: we cannot release the child-table locks, since we may have |
| 1385 | * pointers to their TOAST tables in the sampled rows. |
| 1386 | */ |
| 1387 | table_close(childrel, NoLock); |
| 1388 | } |
| 1389 | |
| 1390 | return numrows; |
| 1391 | } |
| 1392 | |
| 1393 | |
| 1394 | /* |
| 1395 | * update_attstats() -- update attribute statistics for one relation |
| 1396 | * |
| 1397 | * Statistics are stored in several places: the pg_class row for the |
| 1398 | * relation has stats about the whole relation, and there is a |
| 1399 | * pg_statistic row for each (non-system) attribute that has ever |
| 1400 | * been analyzed. The pg_class values are updated by VACUUM, not here. |
| 1401 | * |
| 1402 | * pg_statistic rows are just added or updated normally. This means |
| 1403 | * that pg_statistic will probably contain some deleted rows at the |
| 1404 | * completion of a vacuum cycle, unless it happens to get vacuumed last. |
| 1405 | * |
| 1406 | * To keep things simple, we punt for pg_statistic, and don't try |
| 1407 | * to compute or store rows for pg_statistic itself in pg_statistic. |
| 1408 | * This could possibly be made to work, but it's not worth the trouble. |
| 1409 | * Note analyze_rel() has seen to it that we won't come here when |
| 1410 | * vacuuming pg_statistic itself. |
| 1411 | * |
| 1412 | * Note: there would be a race condition here if two backends could |
| 1413 | * ANALYZE the same table concurrently. Presently, we lock that out |
| 1414 | * by taking a self-exclusive lock on the relation in analyze_rel(). |
| 1415 | */ |
| 1416 | static void |
| 1417 | update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) |
| 1418 | { |
| 1419 | Relation sd; |
| 1420 | int attno; |
| 1421 | |
| 1422 | if (natts <= 0) |
| 1423 | return; /* nothing to do */ |
| 1424 | |
| 1425 | sd = table_open(StatisticRelationId, RowExclusiveLock); |
| 1426 | |
| 1427 | for (attno = 0; attno < natts; attno++) |
| 1428 | { |
| 1429 | VacAttrStats *stats = vacattrstats[attno]; |
| 1430 | HeapTuple stup, |
| 1431 | oldtup; |
| 1432 | int i, |
| 1433 | k, |
| 1434 | n; |
| 1435 | Datum values[Natts_pg_statistic]; |
| 1436 | bool nulls[Natts_pg_statistic]; |
| 1437 | bool replaces[Natts_pg_statistic]; |
| 1438 | |
| 1439 | /* Ignore attr if we weren't able to collect stats */ |
| 1440 | if (!stats->stats_valid) |
| 1441 | continue; |
| 1442 | |
| 1443 | /* |
| 1444 | * Construct a new pg_statistic tuple |
| 1445 | */ |
| 1446 | for (i = 0; i < Natts_pg_statistic; ++i) |
| 1447 | { |
| 1448 | nulls[i] = false; |
| 1449 | replaces[i] = true; |
| 1450 | } |
| 1451 | |
| 1452 | values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid); |
| 1453 | values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); |
| 1454 | values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); |
| 1455 | values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); |
| 1456 | values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); |
| 1457 | values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); |
| 1458 | i = Anum_pg_statistic_stakind1 - 1; |
| 1459 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
| 1460 | { |
| 1461 | values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ |
| 1462 | } |
| 1463 | i = Anum_pg_statistic_staop1 - 1; |
| 1464 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
| 1465 | { |
| 1466 | values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ |
| 1467 | } |
| 1468 | i = Anum_pg_statistic_stacoll1 - 1; |
| 1469 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
| 1470 | { |
| 1471 | values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ |
| 1472 | } |
| 1473 | i = Anum_pg_statistic_stanumbers1 - 1; |
| 1474 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
| 1475 | { |
| 1476 | int nnum = stats->numnumbers[k]; |
| 1477 | |
| 1478 | if (nnum > 0) |
| 1479 | { |
| 1480 | Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); |
| 1481 | ArrayType *arry; |
| 1482 | |
| 1483 | for (n = 0; n < nnum; n++) |
| 1484 | numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); |
| 1485 | /* XXX knows more than it should about type float4: */ |
| 1486 | arry = construct_array(numdatums, nnum, |
| 1487 | FLOAT4OID, |
| 1488 | sizeof(float4), FLOAT4PASSBYVAL, 'i'); |
| 1489 | values[i++] = PointerGetDatum(arry); /* stanumbersN */ |
| 1490 | } |
| 1491 | else |
| 1492 | { |
| 1493 | nulls[i] = true; |
| 1494 | values[i++] = (Datum) 0; |
| 1495 | } |
| 1496 | } |
| 1497 | i = Anum_pg_statistic_stavalues1 - 1; |
| 1498 | for (k = 0; k < STATISTIC_NUM_SLOTS; k++) |
| 1499 | { |
| 1500 | if (stats->numvalues[k] > 0) |
| 1501 | { |
| 1502 | ArrayType *arry; |
| 1503 | |
| 1504 | arry = construct_array(stats->stavalues[k], |
| 1505 | stats->numvalues[k], |
| 1506 | stats->statypid[k], |
| 1507 | stats->statyplen[k], |
| 1508 | stats->statypbyval[k], |
| 1509 | stats->statypalign[k]); |
| 1510 | values[i++] = PointerGetDatum(arry); /* stavaluesN */ |
| 1511 | } |
| 1512 | else |
| 1513 | { |
| 1514 | nulls[i] = true; |
| 1515 | values[i++] = (Datum) 0; |
| 1516 | } |
| 1517 | } |
| 1518 | |
| 1519 | /* Is there already a pg_statistic tuple for this attribute? */ |
| 1520 | oldtup = SearchSysCache3(STATRELATTINH, |
| 1521 | ObjectIdGetDatum(relid), |
| 1522 | Int16GetDatum(stats->attr->attnum), |
| 1523 | BoolGetDatum(inh)); |
| 1524 | |
| 1525 | if (HeapTupleIsValid(oldtup)) |
| 1526 | { |
| 1527 | /* Yes, replace it */ |
| 1528 | stup = heap_modify_tuple(oldtup, |
| 1529 | RelationGetDescr(sd), |
| 1530 | values, |
| 1531 | nulls, |
| 1532 | replaces); |
| 1533 | ReleaseSysCache(oldtup); |
| 1534 | CatalogTupleUpdate(sd, &stup->t_self, stup); |
| 1535 | } |
| 1536 | else |
| 1537 | { |
| 1538 | /* No, insert new tuple */ |
| 1539 | stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); |
| 1540 | CatalogTupleInsert(sd, stup); |
| 1541 | } |
| 1542 | |
| 1543 | heap_freetuple(stup); |
| 1544 | } |
| 1545 | |
| 1546 | table_close(sd, RowExclusiveLock); |
| 1547 | } |
| 1548 | |
| 1549 | /* |
| 1550 | * Standard fetch function for use by compute_stats subroutines. |
| 1551 | * |
| 1552 | * This exists to provide some insulation between compute_stats routines |
| 1553 | * and the actual storage of the sample data. |
| 1554 | */ |
| 1555 | static Datum |
| 1556 | std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) |
| 1557 | { |
| 1558 | int attnum = stats->tupattnum; |
| 1559 | HeapTuple tuple = stats->rows[rownum]; |
| 1560 | TupleDesc tupDesc = stats->tupDesc; |
| 1561 | |
| 1562 | return heap_getattr(tuple, attnum, tupDesc, isNull); |
| 1563 | } |
| 1564 | |
| 1565 | /* |
| 1566 | * Fetch function for analyzing index expressions. |
| 1567 | * |
| 1568 | * We have not bothered to construct index tuples, instead the data is |
| 1569 | * just in Datum arrays. |
| 1570 | */ |
| 1571 | static Datum |
| 1572 | ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) |
| 1573 | { |
| 1574 | int i; |
| 1575 | |
| 1576 | /* exprvals and exprnulls are already offset for proper column */ |
| 1577 | i = rownum * stats->rowstride; |
| 1578 | *isNull = stats->exprnulls[i]; |
| 1579 | return stats->exprvals[i]; |
| 1580 | } |
| 1581 | |
| 1582 | |
| 1583 | /*========================================================================== |
| 1584 | * |
| 1585 | * Code below this point represents the "standard" type-specific statistics |
| 1586 | * analysis algorithms. This code can be replaced on a per-data-type basis |
| 1587 | * by setting a nonzero value in pg_type.typanalyze. |
| 1588 | * |
| 1589 | *========================================================================== |
| 1590 | */ |
| 1591 | |
| 1592 | |
| 1593 | /* |
| 1594 | * To avoid consuming too much memory during analysis and/or too much space |
| 1595 | * in the resulting pg_statistic rows, we ignore varlena datums that are wider |
| 1596 | * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV |
| 1597 | * and distinct-value calculations since a wide value is unlikely to be |
| 1598 | * duplicated at all, much less be a most-common value. For the same reason, |
| 1599 | * ignoring wide values will not affect our estimates of histogram bin |
| 1600 | * boundaries very much. |
| 1601 | */ |
| 1602 | #define WIDTH_THRESHOLD 1024 |
| 1603 | |
| 1604 | #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0) |
| 1605 | #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0) |
| 1606 | |
| 1607 | /* |
| 1608 | * Extra information used by the default analysis routines |
| 1609 | */ |
| 1610 | typedef struct |
| 1611 | { |
| 1612 | int count; /* # of duplicates */ |
| 1613 | int first; /* values[] index of first occurrence */ |
| 1614 | } ScalarMCVItem; |
| 1615 | |
| 1616 | typedef struct |
| 1617 | { |
| 1618 | SortSupport ssup; |
| 1619 | int *tupnoLink; |
| 1620 | } CompareScalarsContext; |
| 1621 | |
| 1622 | |
| 1623 | static void compute_trivial_stats(VacAttrStatsP stats, |
| 1624 | AnalyzeAttrFetchFunc fetchfunc, |
| 1625 | int samplerows, |
| 1626 | double totalrows); |
| 1627 | static void compute_distinct_stats(VacAttrStatsP stats, |
| 1628 | AnalyzeAttrFetchFunc fetchfunc, |
| 1629 | int samplerows, |
| 1630 | double totalrows); |
| 1631 | static void compute_scalar_stats(VacAttrStatsP stats, |
| 1632 | AnalyzeAttrFetchFunc fetchfunc, |
| 1633 | int samplerows, |
| 1634 | double totalrows); |
| 1635 | static int compare_scalars(const void *a, const void *b, void *arg); |
| 1636 | static int compare_mcvs(const void *a, const void *b); |
| 1637 | static int analyze_mcv_list(int *mcv_counts, |
| 1638 | int num_mcv, |
| 1639 | double stadistinct, |
| 1640 | double stanullfrac, |
| 1641 | int samplerows, |
| 1642 | double totalrows); |
| 1643 | |
| 1644 | |
| 1645 | /* |
| 1646 | * std_typanalyze -- the default type-specific typanalyze function |
| 1647 | */ |
| 1648 | bool |
| 1649 | std_typanalyze(VacAttrStats *stats) |
| 1650 | { |
| 1651 | Form_pg_attribute attr = stats->attr; |
| 1652 | Oid ltopr; |
| 1653 | Oid eqopr; |
| 1654 | StdAnalyzeData *mystats; |
| 1655 | |
| 1656 | /* If the attstattarget column is negative, use the default value */ |
| 1657 | /* NB: it is okay to scribble on stats->attr since it's a copy */ |
| 1658 | if (attr->attstattarget < 0) |
| 1659 | attr->attstattarget = default_statistics_target; |
| 1660 | |
| 1661 | /* Look for default "<" and "=" operators for column's type */ |
| 1662 | get_sort_group_operators(stats->attrtypid, |
| 1663 | false, false, false, |
| 1664 | <opr, &eqopr, NULL, |
| 1665 | NULL); |
| 1666 | |
| 1667 | /* Save the operator info for compute_stats routines */ |
| 1668 | mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); |
| 1669 | mystats->eqopr = eqopr; |
| 1670 | mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid; |
| 1671 | mystats->ltopr = ltopr; |
| 1672 | stats->extra_data = mystats; |
| 1673 | |
| 1674 | /* |
| 1675 | * Determine which standard statistics algorithm to use |
| 1676 | */ |
| 1677 | if (OidIsValid(eqopr) && OidIsValid(ltopr)) |
| 1678 | { |
| 1679 | /* Seems to be a scalar datatype */ |
| 1680 | stats->compute_stats = compute_scalar_stats; |
| 1681 | /*-------------------- |
| 1682 | * The following choice of minrows is based on the paper |
| 1683 | * "Random sampling for histogram construction: how much is enough?" |
| 1684 | * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in |
| 1685 | * Proceedings of ACM SIGMOD International Conference on Management |
| 1686 | * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5 |
| 1687 | * says that for table size n, histogram size k, maximum relative |
| 1688 | * error in bin size f, and error probability gamma, the minimum |
| 1689 | * random sample size is |
| 1690 | * r = 4 * k * ln(2*n/gamma) / f^2 |
| 1691 | * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain |
| 1692 | * r = 305.82 * k |
| 1693 | * Note that because of the log function, the dependence on n is |
| 1694 | * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66 |
| 1695 | * bin size error with probability 0.99. So there's no real need to |
| 1696 | * scale for n, which is a good thing because we don't necessarily |
| 1697 | * know it at this point. |
| 1698 | *-------------------- |
| 1699 | */ |
| 1700 | stats->minrows = 300 * attr->attstattarget; |
| 1701 | } |
| 1702 | else if (OidIsValid(eqopr)) |
| 1703 | { |
| 1704 | /* We can still recognize distinct values */ |
| 1705 | stats->compute_stats = compute_distinct_stats; |
| 1706 | /* Might as well use the same minrows as above */ |
| 1707 | stats->minrows = 300 * attr->attstattarget; |
| 1708 | } |
| 1709 | else |
| 1710 | { |
| 1711 | /* Can't do much but the trivial stuff */ |
| 1712 | stats->compute_stats = compute_trivial_stats; |
| 1713 | /* Might as well use the same minrows as above */ |
| 1714 | stats->minrows = 300 * attr->attstattarget; |
| 1715 | } |
| 1716 | |
| 1717 | return true; |
| 1718 | } |
| 1719 | |
| 1720 | |
| 1721 | /* |
| 1722 | * compute_trivial_stats() -- compute very basic column statistics |
| 1723 | * |
| 1724 | * We use this when we cannot find a hash "=" operator for the datatype. |
| 1725 | * |
| 1726 | * We determine the fraction of non-null rows and the average datum width. |
| 1727 | */ |
| 1728 | static void |
| 1729 | compute_trivial_stats(VacAttrStatsP stats, |
| 1730 | AnalyzeAttrFetchFunc fetchfunc, |
| 1731 | int samplerows, |
| 1732 | double totalrows) |
| 1733 | { |
| 1734 | int i; |
| 1735 | int null_cnt = 0; |
| 1736 | int nonnull_cnt = 0; |
| 1737 | double total_width = 0; |
| 1738 | bool is_varlena = (!stats->attrtype->typbyval && |
| 1739 | stats->attrtype->typlen == -1); |
| 1740 | bool is_varwidth = (!stats->attrtype->typbyval && |
| 1741 | stats->attrtype->typlen < 0); |
| 1742 | |
| 1743 | for (i = 0; i < samplerows; i++) |
| 1744 | { |
| 1745 | Datum value; |
| 1746 | bool isnull; |
| 1747 | |
| 1748 | vacuum_delay_point(); |
| 1749 | |
| 1750 | value = fetchfunc(stats, i, &isnull); |
| 1751 | |
| 1752 | /* Check for null/nonnull */ |
| 1753 | if (isnull) |
| 1754 | { |
| 1755 | null_cnt++; |
| 1756 | continue; |
| 1757 | } |
| 1758 | nonnull_cnt++; |
| 1759 | |
| 1760 | /* |
| 1761 | * If it's a variable-width field, add up widths for average width |
| 1762 | * calculation. Note that if the value is toasted, we use the toasted |
| 1763 | * width. We don't bother with this calculation if it's a fixed-width |
| 1764 | * type. |
| 1765 | */ |
| 1766 | if (is_varlena) |
| 1767 | { |
| 1768 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
| 1769 | } |
| 1770 | else if (is_varwidth) |
| 1771 | { |
| 1772 | /* must be cstring */ |
| 1773 | total_width += strlen(DatumGetCString(value)) + 1; |
| 1774 | } |
| 1775 | } |
| 1776 | |
| 1777 | /* We can only compute average width if we found some non-null values. */ |
| 1778 | if (nonnull_cnt > 0) |
| 1779 | { |
| 1780 | stats->stats_valid = true; |
| 1781 | /* Do the simple null-frac and width stats */ |
| 1782 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
| 1783 | if (is_varwidth) |
| 1784 | stats->stawidth = total_width / (double) nonnull_cnt; |
| 1785 | else |
| 1786 | stats->stawidth = stats->attrtype->typlen; |
| 1787 | stats->stadistinct = 0.0; /* "unknown" */ |
| 1788 | } |
| 1789 | else if (null_cnt > 0) |
| 1790 | { |
| 1791 | /* We found only nulls; assume the column is entirely null */ |
| 1792 | stats->stats_valid = true; |
| 1793 | stats->stanullfrac = 1.0; |
| 1794 | if (is_varwidth) |
| 1795 | stats->stawidth = 0; /* "unknown" */ |
| 1796 | else |
| 1797 | stats->stawidth = stats->attrtype->typlen; |
| 1798 | stats->stadistinct = 0.0; /* "unknown" */ |
| 1799 | } |
| 1800 | } |
| 1801 | |
| 1802 | |
| 1803 | /* |
| 1804 | * compute_distinct_stats() -- compute column statistics including ndistinct |
| 1805 | * |
| 1806 | * We use this when we can find only an "=" operator for the datatype. |
| 1807 | * |
| 1808 | * We determine the fraction of non-null rows, the average width, the |
| 1809 | * most common values, and the (estimated) number of distinct values. |
| 1810 | * |
| 1811 | * The most common values are determined by brute force: we keep a list |
| 1812 | * of previously seen values, ordered by number of times seen, as we scan |
| 1813 | * the samples. A newly seen value is inserted just after the last |
| 1814 | * multiply-seen value, causing the bottommost (oldest) singly-seen value |
| 1815 | * to drop off the list. The accuracy of this method, and also its cost, |
| 1816 | * depend mainly on the length of the list we are willing to keep. |
| 1817 | */ |
| 1818 | static void |
| 1819 | compute_distinct_stats(VacAttrStatsP stats, |
| 1820 | AnalyzeAttrFetchFunc fetchfunc, |
| 1821 | int samplerows, |
| 1822 | double totalrows) |
| 1823 | { |
| 1824 | int i; |
| 1825 | int null_cnt = 0; |
| 1826 | int nonnull_cnt = 0; |
| 1827 | int toowide_cnt = 0; |
| 1828 | double total_width = 0; |
| 1829 | bool is_varlena = (!stats->attrtype->typbyval && |
| 1830 | stats->attrtype->typlen == -1); |
| 1831 | bool is_varwidth = (!stats->attrtype->typbyval && |
| 1832 | stats->attrtype->typlen < 0); |
| 1833 | FmgrInfo f_cmpeq; |
| 1834 | typedef struct |
| 1835 | { |
| 1836 | Datum value; |
| 1837 | int count; |
| 1838 | } TrackItem; |
| 1839 | TrackItem *track; |
| 1840 | int track_cnt, |
| 1841 | track_max; |
| 1842 | int num_mcv = stats->attr->attstattarget; |
| 1843 | StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; |
| 1844 | |
| 1845 | /* |
| 1846 | * We track up to 2*n values for an n-element MCV list; but at least 10 |
| 1847 | */ |
| 1848 | track_max = 2 * num_mcv; |
| 1849 | if (track_max < 10) |
| 1850 | track_max = 10; |
| 1851 | track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); |
| 1852 | track_cnt = 0; |
| 1853 | |
| 1854 | fmgr_info(mystats->eqfunc, &f_cmpeq); |
| 1855 | |
| 1856 | for (i = 0; i < samplerows; i++) |
| 1857 | { |
| 1858 | Datum value; |
| 1859 | bool isnull; |
| 1860 | bool match; |
| 1861 | int firstcount1, |
| 1862 | j; |
| 1863 | |
| 1864 | vacuum_delay_point(); |
| 1865 | |
| 1866 | value = fetchfunc(stats, i, &isnull); |
| 1867 | |
| 1868 | /* Check for null/nonnull */ |
| 1869 | if (isnull) |
| 1870 | { |
| 1871 | null_cnt++; |
| 1872 | continue; |
| 1873 | } |
| 1874 | nonnull_cnt++; |
| 1875 | |
| 1876 | /* |
| 1877 | * If it's a variable-width field, add up widths for average width |
| 1878 | * calculation. Note that if the value is toasted, we use the toasted |
| 1879 | * width. We don't bother with this calculation if it's a fixed-width |
| 1880 | * type. |
| 1881 | */ |
| 1882 | if (is_varlena) |
| 1883 | { |
| 1884 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
| 1885 | |
| 1886 | /* |
| 1887 | * If the value is toasted, we want to detoast it just once to |
| 1888 | * avoid repeated detoastings and resultant excess memory usage |
| 1889 | * during the comparisons. Also, check to see if the value is |
| 1890 | * excessively wide, and if so don't detoast at all --- just |
| 1891 | * ignore the value. |
| 1892 | */ |
| 1893 | if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) |
| 1894 | { |
| 1895 | toowide_cnt++; |
| 1896 | continue; |
| 1897 | } |
| 1898 | value = PointerGetDatum(PG_DETOAST_DATUM(value)); |
| 1899 | } |
| 1900 | else if (is_varwidth) |
| 1901 | { |
| 1902 | /* must be cstring */ |
| 1903 | total_width += strlen(DatumGetCString(value)) + 1; |
| 1904 | } |
| 1905 | |
| 1906 | /* |
| 1907 | * See if the value matches anything we're already tracking. |
| 1908 | */ |
| 1909 | match = false; |
| 1910 | firstcount1 = track_cnt; |
| 1911 | for (j = 0; j < track_cnt; j++) |
| 1912 | { |
| 1913 | if (DatumGetBool(FunctionCall2Coll(&f_cmpeq, |
| 1914 | stats->attrcollid, |
| 1915 | value, track[j].value))) |
| 1916 | { |
| 1917 | match = true; |
| 1918 | break; |
| 1919 | } |
| 1920 | if (j < firstcount1 && track[j].count == 1) |
| 1921 | firstcount1 = j; |
| 1922 | } |
| 1923 | |
| 1924 | if (match) |
| 1925 | { |
| 1926 | /* Found a match */ |
| 1927 | track[j].count++; |
| 1928 | /* This value may now need to "bubble up" in the track list */ |
| 1929 | while (j > 0 && track[j].count > track[j - 1].count) |
| 1930 | { |
| 1931 | swapDatum(track[j].value, track[j - 1].value); |
| 1932 | swapInt(track[j].count, track[j - 1].count); |
| 1933 | j--; |
| 1934 | } |
| 1935 | } |
| 1936 | else |
| 1937 | { |
| 1938 | /* No match. Insert at head of count-1 list */ |
| 1939 | if (track_cnt < track_max) |
| 1940 | track_cnt++; |
| 1941 | for (j = track_cnt - 1; j > firstcount1; j--) |
| 1942 | { |
| 1943 | track[j].value = track[j - 1].value; |
| 1944 | track[j].count = track[j - 1].count; |
| 1945 | } |
| 1946 | if (firstcount1 < track_cnt) |
| 1947 | { |
| 1948 | track[firstcount1].value = value; |
| 1949 | track[firstcount1].count = 1; |
| 1950 | } |
| 1951 | } |
| 1952 | } |
| 1953 | |
| 1954 | /* We can only compute real stats if we found some non-null values. */ |
| 1955 | if (nonnull_cnt > 0) |
| 1956 | { |
| 1957 | int nmultiple, |
| 1958 | summultiple; |
| 1959 | |
| 1960 | stats->stats_valid = true; |
| 1961 | /* Do the simple null-frac and width stats */ |
| 1962 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
| 1963 | if (is_varwidth) |
| 1964 | stats->stawidth = total_width / (double) nonnull_cnt; |
| 1965 | else |
| 1966 | stats->stawidth = stats->attrtype->typlen; |
| 1967 | |
| 1968 | /* Count the number of values we found multiple times */ |
| 1969 | summultiple = 0; |
| 1970 | for (nmultiple = 0; nmultiple < track_cnt; nmultiple++) |
| 1971 | { |
| 1972 | if (track[nmultiple].count == 1) |
| 1973 | break; |
| 1974 | summultiple += track[nmultiple].count; |
| 1975 | } |
| 1976 | |
| 1977 | if (nmultiple == 0) |
| 1978 | { |
| 1979 | /* |
| 1980 | * If we found no repeated non-null values, assume it's a unique |
| 1981 | * column; but be sure to discount for any nulls we found. |
| 1982 | */ |
| 1983 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
| 1984 | } |
| 1985 | else if (track_cnt < track_max && toowide_cnt == 0 && |
| 1986 | nmultiple == track_cnt) |
| 1987 | { |
| 1988 | /* |
| 1989 | * Our track list includes every value in the sample, and every |
| 1990 | * value appeared more than once. Assume the column has just |
| 1991 | * these values. (This case is meant to address columns with |
| 1992 | * small, fixed sets of possible values, such as boolean or enum |
| 1993 | * columns. If there are any values that appear just once in the |
| 1994 | * sample, including too-wide values, we should assume that that's |
| 1995 | * not what we're dealing with.) |
| 1996 | */ |
| 1997 | stats->stadistinct = track_cnt; |
| 1998 | } |
| 1999 | else |
| 2000 | { |
| 2001 | /*---------- |
| 2002 | * Estimate the number of distinct values using the estimator |
| 2003 | * proposed by Haas and Stokes in IBM Research Report RJ 10025: |
| 2004 | * n*d / (n - f1 + f1*n/N) |
| 2005 | * where f1 is the number of distinct values that occurred |
| 2006 | * exactly once in our sample of n rows (from a total of N), |
| 2007 | * and d is the total number of distinct values in the sample. |
| 2008 | * This is their Duj1 estimator; the other estimators they |
| 2009 | * recommend are considerably more complex, and are numerically |
| 2010 | * very unstable when n is much smaller than N. |
| 2011 | * |
| 2012 | * In this calculation, we consider only non-nulls. We used to |
| 2013 | * include rows with null values in the n and N counts, but that |
| 2014 | * leads to inaccurate answers in columns with many nulls, and |
| 2015 | * it's intuitively bogus anyway considering the desired result is |
| 2016 | * the number of distinct non-null values. |
| 2017 | * |
| 2018 | * We assume (not very reliably!) that all the multiply-occurring |
| 2019 | * values are reflected in the final track[] list, and the other |
| 2020 | * nonnull values all appeared but once. (XXX this usually |
| 2021 | * results in a drastic overestimate of ndistinct. Can we do |
| 2022 | * any better?) |
| 2023 | *---------- |
| 2024 | */ |
| 2025 | int f1 = nonnull_cnt - summultiple; |
| 2026 | int d = f1 + nmultiple; |
| 2027 | double n = samplerows - null_cnt; |
| 2028 | double N = totalrows * (1.0 - stats->stanullfrac); |
| 2029 | double stadistinct; |
| 2030 | |
| 2031 | /* N == 0 shouldn't happen, but just in case ... */ |
| 2032 | if (N > 0) |
| 2033 | stadistinct = (n * d) / ((n - f1) + f1 * n / N); |
| 2034 | else |
| 2035 | stadistinct = 0; |
| 2036 | |
| 2037 | /* Clamp to sane range in case of roundoff error */ |
| 2038 | if (stadistinct < d) |
| 2039 | stadistinct = d; |
| 2040 | if (stadistinct > N) |
| 2041 | stadistinct = N; |
| 2042 | /* And round to integer */ |
| 2043 | stats->stadistinct = floor(stadistinct + 0.5); |
| 2044 | } |
| 2045 | |
| 2046 | /* |
| 2047 | * If we estimated the number of distinct values at more than 10% of |
| 2048 | * the total row count (a very arbitrary limit), then assume that |
| 2049 | * stadistinct should scale with the row count rather than be a fixed |
| 2050 | * value. |
| 2051 | */ |
| 2052 | if (stats->stadistinct > 0.1 * totalrows) |
| 2053 | stats->stadistinct = -(stats->stadistinct / totalrows); |
| 2054 | |
| 2055 | /* |
| 2056 | * Decide how many values are worth storing as most-common values. If |
| 2057 | * we are able to generate a complete MCV list (all the values in the |
| 2058 | * sample will fit, and we think these are all the ones in the table), |
| 2059 | * then do so. Otherwise, store only those values that are |
| 2060 | * significantly more common than the values not in the list. |
| 2061 | * |
| 2062 | * Note: the first of these cases is meant to address columns with |
| 2063 | * small, fixed sets of possible values, such as boolean or enum |
| 2064 | * columns. If we can *completely* represent the column population by |
| 2065 | * an MCV list that will fit into the stats target, then we should do |
| 2066 | * so and thus provide the planner with complete information. But if |
| 2067 | * the MCV list is not complete, it's generally worth being more |
| 2068 | * selective, and not just filling it all the way up to the stats |
| 2069 | * target. |
| 2070 | */ |
| 2071 | if (track_cnt < track_max && toowide_cnt == 0 && |
| 2072 | stats->stadistinct > 0 && |
| 2073 | track_cnt <= num_mcv) |
| 2074 | { |
| 2075 | /* Track list includes all values seen, and all will fit */ |
| 2076 | num_mcv = track_cnt; |
| 2077 | } |
| 2078 | else |
| 2079 | { |
| 2080 | int *mcv_counts; |
| 2081 | |
| 2082 | /* Incomplete list; decide how many values are worth keeping */ |
| 2083 | if (num_mcv > track_cnt) |
| 2084 | num_mcv = track_cnt; |
| 2085 | |
| 2086 | if (num_mcv > 0) |
| 2087 | { |
| 2088 | mcv_counts = (int *) palloc(num_mcv * sizeof(int)); |
| 2089 | for (i = 0; i < num_mcv; i++) |
| 2090 | mcv_counts[i] = track[i].count; |
| 2091 | |
| 2092 | num_mcv = analyze_mcv_list(mcv_counts, num_mcv, |
| 2093 | stats->stadistinct, |
| 2094 | stats->stanullfrac, |
| 2095 | samplerows, totalrows); |
| 2096 | } |
| 2097 | } |
| 2098 | |
| 2099 | /* Generate MCV slot entry */ |
| 2100 | if (num_mcv > 0) |
| 2101 | { |
| 2102 | MemoryContext old_context; |
| 2103 | Datum *mcv_values; |
| 2104 | float4 *mcv_freqs; |
| 2105 | |
| 2106 | /* Must copy the target values into anl_context */ |
| 2107 | old_context = MemoryContextSwitchTo(stats->anl_context); |
| 2108 | mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); |
| 2109 | mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); |
| 2110 | for (i = 0; i < num_mcv; i++) |
| 2111 | { |
| 2112 | mcv_values[i] = datumCopy(track[i].value, |
| 2113 | stats->attrtype->typbyval, |
| 2114 | stats->attrtype->typlen); |
| 2115 | mcv_freqs[i] = (double) track[i].count / (double) samplerows; |
| 2116 | } |
| 2117 | MemoryContextSwitchTo(old_context); |
| 2118 | |
| 2119 | stats->stakind[0] = STATISTIC_KIND_MCV; |
| 2120 | stats->staop[0] = mystats->eqopr; |
| 2121 | stats->stacoll[0] = stats->attrcollid; |
| 2122 | stats->stanumbers[0] = mcv_freqs; |
| 2123 | stats->numnumbers[0] = num_mcv; |
| 2124 | stats->stavalues[0] = mcv_values; |
| 2125 | stats->numvalues[0] = num_mcv; |
| 2126 | |
| 2127 | /* |
| 2128 | * Accept the defaults for stats->statypid and others. They have |
| 2129 | * been set before we were called (see vacuum.h) |
| 2130 | */ |
| 2131 | } |
| 2132 | } |
| 2133 | else if (null_cnt > 0) |
| 2134 | { |
| 2135 | /* We found only nulls; assume the column is entirely null */ |
| 2136 | stats->stats_valid = true; |
| 2137 | stats->stanullfrac = 1.0; |
| 2138 | if (is_varwidth) |
| 2139 | stats->stawidth = 0; /* "unknown" */ |
| 2140 | else |
| 2141 | stats->stawidth = stats->attrtype->typlen; |
| 2142 | stats->stadistinct = 0.0; /* "unknown" */ |
| 2143 | } |
| 2144 | |
| 2145 | /* We don't need to bother cleaning up any of our temporary palloc's */ |
| 2146 | } |
| 2147 | |
| 2148 | |
| 2149 | /* |
| 2150 | * compute_scalar_stats() -- compute column statistics |
| 2151 | * |
| 2152 | * We use this when we can find "=" and "<" operators for the datatype. |
| 2153 | * |
| 2154 | * We determine the fraction of non-null rows, the average width, the |
| 2155 | * most common values, the (estimated) number of distinct values, the |
| 2156 | * distribution histogram, and the correlation of physical to logical order. |
| 2157 | * |
| 2158 | * The desired stats can be determined fairly easily after sorting the |
| 2159 | * data values into order. |
| 2160 | */ |
| 2161 | static void |
| 2162 | compute_scalar_stats(VacAttrStatsP stats, |
| 2163 | AnalyzeAttrFetchFunc fetchfunc, |
| 2164 | int samplerows, |
| 2165 | double totalrows) |
| 2166 | { |
| 2167 | int i; |
| 2168 | int null_cnt = 0; |
| 2169 | int nonnull_cnt = 0; |
| 2170 | int toowide_cnt = 0; |
| 2171 | double total_width = 0; |
| 2172 | bool is_varlena = (!stats->attrtype->typbyval && |
| 2173 | stats->attrtype->typlen == -1); |
| 2174 | bool is_varwidth = (!stats->attrtype->typbyval && |
| 2175 | stats->attrtype->typlen < 0); |
| 2176 | double corr_xysum; |
| 2177 | SortSupportData ssup; |
| 2178 | ScalarItem *values; |
| 2179 | int values_cnt = 0; |
| 2180 | int *tupnoLink; |
| 2181 | ScalarMCVItem *track; |
| 2182 | int track_cnt = 0; |
| 2183 | int num_mcv = stats->attr->attstattarget; |
| 2184 | int num_bins = stats->attr->attstattarget; |
| 2185 | StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; |
| 2186 | |
| 2187 | values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem)); |
| 2188 | tupnoLink = (int *) palloc(samplerows * sizeof(int)); |
| 2189 | track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); |
| 2190 | |
| 2191 | memset(&ssup, 0, sizeof(ssup)); |
| 2192 | ssup.ssup_cxt = CurrentMemoryContext; |
| 2193 | ssup.ssup_collation = stats->attrcollid; |
| 2194 | ssup.ssup_nulls_first = false; |
| 2195 | |
| 2196 | /* |
| 2197 | * For now, don't perform abbreviated key conversion, because full values |
| 2198 | * are required for MCV slot generation. Supporting that optimization |
| 2199 | * would necessitate teaching compare_scalars() to call a tie-breaker. |
| 2200 | */ |
| 2201 | ssup.abbreviate = false; |
| 2202 | |
| 2203 | PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); |
| 2204 | |
| 2205 | /* Initial scan to find sortable values */ |
| 2206 | for (i = 0; i < samplerows; i++) |
| 2207 | { |
| 2208 | Datum value; |
| 2209 | bool isnull; |
| 2210 | |
| 2211 | vacuum_delay_point(); |
| 2212 | |
| 2213 | value = fetchfunc(stats, i, &isnull); |
| 2214 | |
| 2215 | /* Check for null/nonnull */ |
| 2216 | if (isnull) |
| 2217 | { |
| 2218 | null_cnt++; |
| 2219 | continue; |
| 2220 | } |
| 2221 | nonnull_cnt++; |
| 2222 | |
| 2223 | /* |
| 2224 | * If it's a variable-width field, add up widths for average width |
| 2225 | * calculation. Note that if the value is toasted, we use the toasted |
| 2226 | * width. We don't bother with this calculation if it's a fixed-width |
| 2227 | * type. |
| 2228 | */ |
| 2229 | if (is_varlena) |
| 2230 | { |
| 2231 | total_width += VARSIZE_ANY(DatumGetPointer(value)); |
| 2232 | |
| 2233 | /* |
| 2234 | * If the value is toasted, we want to detoast it just once to |
| 2235 | * avoid repeated detoastings and resultant excess memory usage |
| 2236 | * during the comparisons. Also, check to see if the value is |
| 2237 | * excessively wide, and if so don't detoast at all --- just |
| 2238 | * ignore the value. |
| 2239 | */ |
| 2240 | if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) |
| 2241 | { |
| 2242 | toowide_cnt++; |
| 2243 | continue; |
| 2244 | } |
| 2245 | value = PointerGetDatum(PG_DETOAST_DATUM(value)); |
| 2246 | } |
| 2247 | else if (is_varwidth) |
| 2248 | { |
| 2249 | /* must be cstring */ |
| 2250 | total_width += strlen(DatumGetCString(value)) + 1; |
| 2251 | } |
| 2252 | |
| 2253 | /* Add it to the list to be sorted */ |
| 2254 | values[values_cnt].value = value; |
| 2255 | values[values_cnt].tupno = values_cnt; |
| 2256 | tupnoLink[values_cnt] = values_cnt; |
| 2257 | values_cnt++; |
| 2258 | } |
| 2259 | |
| 2260 | /* We can only compute real stats if we found some sortable values. */ |
| 2261 | if (values_cnt > 0) |
| 2262 | { |
| 2263 | int ndistinct, /* # distinct values in sample */ |
| 2264 | nmultiple, /* # that appear multiple times */ |
| 2265 | num_hist, |
| 2266 | dups_cnt; |
| 2267 | int slot_idx = 0; |
| 2268 | CompareScalarsContext cxt; |
| 2269 | |
| 2270 | /* Sort the collected values */ |
| 2271 | cxt.ssup = &ssup; |
| 2272 | cxt.tupnoLink = tupnoLink; |
| 2273 | qsort_arg((void *) values, values_cnt, sizeof(ScalarItem), |
| 2274 | compare_scalars, (void *) &cxt); |
| 2275 | |
| 2276 | /* |
| 2277 | * Now scan the values in order, find the most common ones, and also |
| 2278 | * accumulate ordering-correlation statistics. |
| 2279 | * |
| 2280 | * To determine which are most common, we first have to count the |
| 2281 | * number of duplicates of each value. The duplicates are adjacent in |
| 2282 | * the sorted list, so a brute-force approach is to compare successive |
| 2283 | * datum values until we find two that are not equal. However, that |
| 2284 | * requires N-1 invocations of the datum comparison routine, which are |
| 2285 | * completely redundant with work that was done during the sort. (The |
| 2286 | * sort algorithm must at some point have compared each pair of items |
| 2287 | * that are adjacent in the sorted order; otherwise it could not know |
| 2288 | * that it's ordered the pair correctly.) We exploit this by having |
| 2289 | * compare_scalars remember the highest tupno index that each |
| 2290 | * ScalarItem has been found equal to. At the end of the sort, a |
| 2291 | * ScalarItem's tupnoLink will still point to itself if and only if it |
| 2292 | * is the last item of its group of duplicates (since the group will |
| 2293 | * be ordered by tupno). |
| 2294 | */ |
| 2295 | corr_xysum = 0; |
| 2296 | ndistinct = 0; |
| 2297 | nmultiple = 0; |
| 2298 | dups_cnt = 0; |
| 2299 | for (i = 0; i < values_cnt; i++) |
| 2300 | { |
| 2301 | int tupno = values[i].tupno; |
| 2302 | |
| 2303 | corr_xysum += ((double) i) * ((double) tupno); |
| 2304 | dups_cnt++; |
| 2305 | if (tupnoLink[tupno] == tupno) |
| 2306 | { |
| 2307 | /* Reached end of duplicates of this value */ |
| 2308 | ndistinct++; |
| 2309 | if (dups_cnt > 1) |
| 2310 | { |
| 2311 | nmultiple++; |
| 2312 | if (track_cnt < num_mcv || |
| 2313 | dups_cnt > track[track_cnt - 1].count) |
| 2314 | { |
| 2315 | /* |
| 2316 | * Found a new item for the mcv list; find its |
| 2317 | * position, bubbling down old items if needed. Loop |
| 2318 | * invariant is that j points at an empty/ replaceable |
| 2319 | * slot. |
| 2320 | */ |
| 2321 | int j; |
| 2322 | |
| 2323 | if (track_cnt < num_mcv) |
| 2324 | track_cnt++; |
| 2325 | for (j = track_cnt - 1; j > 0; j--) |
| 2326 | { |
| 2327 | if (dups_cnt <= track[j - 1].count) |
| 2328 | break; |
| 2329 | track[j].count = track[j - 1].count; |
| 2330 | track[j].first = track[j - 1].first; |
| 2331 | } |
| 2332 | track[j].count = dups_cnt; |
| 2333 | track[j].first = i + 1 - dups_cnt; |
| 2334 | } |
| 2335 | } |
| 2336 | dups_cnt = 0; |
| 2337 | } |
| 2338 | } |
| 2339 | |
| 2340 | stats->stats_valid = true; |
| 2341 | /* Do the simple null-frac and width stats */ |
| 2342 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
| 2343 | if (is_varwidth) |
| 2344 | stats->stawidth = total_width / (double) nonnull_cnt; |
| 2345 | else |
| 2346 | stats->stawidth = stats->attrtype->typlen; |
| 2347 | |
| 2348 | if (nmultiple == 0) |
| 2349 | { |
| 2350 | /* |
| 2351 | * If we found no repeated non-null values, assume it's a unique |
| 2352 | * column; but be sure to discount for any nulls we found. |
| 2353 | */ |
| 2354 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
| 2355 | } |
| 2356 | else if (toowide_cnt == 0 && nmultiple == ndistinct) |
| 2357 | { |
| 2358 | /* |
| 2359 | * Every value in the sample appeared more than once. Assume the |
| 2360 | * column has just these values. (This case is meant to address |
| 2361 | * columns with small, fixed sets of possible values, such as |
| 2362 | * boolean or enum columns. If there are any values that appear |
| 2363 | * just once in the sample, including too-wide values, we should |
| 2364 | * assume that that's not what we're dealing with.) |
| 2365 | */ |
| 2366 | stats->stadistinct = ndistinct; |
| 2367 | } |
| 2368 | else |
| 2369 | { |
| 2370 | /*---------- |
| 2371 | * Estimate the number of distinct values using the estimator |
| 2372 | * proposed by Haas and Stokes in IBM Research Report RJ 10025: |
| 2373 | * n*d / (n - f1 + f1*n/N) |
| 2374 | * where f1 is the number of distinct values that occurred |
| 2375 | * exactly once in our sample of n rows (from a total of N), |
| 2376 | * and d is the total number of distinct values in the sample. |
| 2377 | * This is their Duj1 estimator; the other estimators they |
| 2378 | * recommend are considerably more complex, and are numerically |
| 2379 | * very unstable when n is much smaller than N. |
| 2380 | * |
| 2381 | * In this calculation, we consider only non-nulls. We used to |
| 2382 | * include rows with null values in the n and N counts, but that |
| 2383 | * leads to inaccurate answers in columns with many nulls, and |
| 2384 | * it's intuitively bogus anyway considering the desired result is |
| 2385 | * the number of distinct non-null values. |
| 2386 | * |
| 2387 | * Overwidth values are assumed to have been distinct. |
| 2388 | *---------- |
| 2389 | */ |
| 2390 | int f1 = ndistinct - nmultiple + toowide_cnt; |
| 2391 | int d = f1 + nmultiple; |
| 2392 | double n = samplerows - null_cnt; |
| 2393 | double N = totalrows * (1.0 - stats->stanullfrac); |
| 2394 | double stadistinct; |
| 2395 | |
| 2396 | /* N == 0 shouldn't happen, but just in case ... */ |
| 2397 | if (N > 0) |
| 2398 | stadistinct = (n * d) / ((n - f1) + f1 * n / N); |
| 2399 | else |
| 2400 | stadistinct = 0; |
| 2401 | |
| 2402 | /* Clamp to sane range in case of roundoff error */ |
| 2403 | if (stadistinct < d) |
| 2404 | stadistinct = d; |
| 2405 | if (stadistinct > N) |
| 2406 | stadistinct = N; |
| 2407 | /* And round to integer */ |
| 2408 | stats->stadistinct = floor(stadistinct + 0.5); |
| 2409 | } |
| 2410 | |
| 2411 | /* |
| 2412 | * If we estimated the number of distinct values at more than 10% of |
| 2413 | * the total row count (a very arbitrary limit), then assume that |
| 2414 | * stadistinct should scale with the row count rather than be a fixed |
| 2415 | * value. |
| 2416 | */ |
| 2417 | if (stats->stadistinct > 0.1 * totalrows) |
| 2418 | stats->stadistinct = -(stats->stadistinct / totalrows); |
| 2419 | |
| 2420 | /* |
| 2421 | * Decide how many values are worth storing as most-common values. If |
| 2422 | * we are able to generate a complete MCV list (all the values in the |
| 2423 | * sample will fit, and we think these are all the ones in the table), |
| 2424 | * then do so. Otherwise, store only those values that are |
| 2425 | * significantly more common than the values not in the list. |
| 2426 | * |
| 2427 | * Note: the first of these cases is meant to address columns with |
| 2428 | * small, fixed sets of possible values, such as boolean or enum |
| 2429 | * columns. If we can *completely* represent the column population by |
| 2430 | * an MCV list that will fit into the stats target, then we should do |
| 2431 | * so and thus provide the planner with complete information. But if |
| 2432 | * the MCV list is not complete, it's generally worth being more |
| 2433 | * selective, and not just filling it all the way up to the stats |
| 2434 | * target. |
| 2435 | */ |
| 2436 | if (track_cnt == ndistinct && toowide_cnt == 0 && |
| 2437 | stats->stadistinct > 0 && |
| 2438 | track_cnt <= num_mcv) |
| 2439 | { |
| 2440 | /* Track list includes all values seen, and all will fit */ |
| 2441 | num_mcv = track_cnt; |
| 2442 | } |
| 2443 | else |
| 2444 | { |
| 2445 | int *mcv_counts; |
| 2446 | |
| 2447 | /* Incomplete list; decide how many values are worth keeping */ |
| 2448 | if (num_mcv > track_cnt) |
| 2449 | num_mcv = track_cnt; |
| 2450 | |
| 2451 | if (num_mcv > 0) |
| 2452 | { |
| 2453 | mcv_counts = (int *) palloc(num_mcv * sizeof(int)); |
| 2454 | for (i = 0; i < num_mcv; i++) |
| 2455 | mcv_counts[i] = track[i].count; |
| 2456 | |
| 2457 | num_mcv = analyze_mcv_list(mcv_counts, num_mcv, |
| 2458 | stats->stadistinct, |
| 2459 | stats->stanullfrac, |
| 2460 | samplerows, totalrows); |
| 2461 | } |
| 2462 | } |
| 2463 | |
| 2464 | /* Generate MCV slot entry */ |
| 2465 | if (num_mcv > 0) |
| 2466 | { |
| 2467 | MemoryContext old_context; |
| 2468 | Datum *mcv_values; |
| 2469 | float4 *mcv_freqs; |
| 2470 | |
| 2471 | /* Must copy the target values into anl_context */ |
| 2472 | old_context = MemoryContextSwitchTo(stats->anl_context); |
| 2473 | mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); |
| 2474 | mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); |
| 2475 | for (i = 0; i < num_mcv; i++) |
| 2476 | { |
| 2477 | mcv_values[i] = datumCopy(values[track[i].first].value, |
| 2478 | stats->attrtype->typbyval, |
| 2479 | stats->attrtype->typlen); |
| 2480 | mcv_freqs[i] = (double) track[i].count / (double) samplerows; |
| 2481 | } |
| 2482 | MemoryContextSwitchTo(old_context); |
| 2483 | |
| 2484 | stats->stakind[slot_idx] = STATISTIC_KIND_MCV; |
| 2485 | stats->staop[slot_idx] = mystats->eqopr; |
| 2486 | stats->stacoll[slot_idx] = stats->attrcollid; |
| 2487 | stats->stanumbers[slot_idx] = mcv_freqs; |
| 2488 | stats->numnumbers[slot_idx] = num_mcv; |
| 2489 | stats->stavalues[slot_idx] = mcv_values; |
| 2490 | stats->numvalues[slot_idx] = num_mcv; |
| 2491 | |
| 2492 | /* |
| 2493 | * Accept the defaults for stats->statypid and others. They have |
| 2494 | * been set before we were called (see vacuum.h) |
| 2495 | */ |
| 2496 | slot_idx++; |
| 2497 | } |
| 2498 | |
| 2499 | /* |
| 2500 | * Generate a histogram slot entry if there are at least two distinct |
| 2501 | * values not accounted for in the MCV list. (This ensures the |
| 2502 | * histogram won't collapse to empty or a singleton.) |
| 2503 | */ |
| 2504 | num_hist = ndistinct - num_mcv; |
| 2505 | if (num_hist > num_bins) |
| 2506 | num_hist = num_bins + 1; |
| 2507 | if (num_hist >= 2) |
| 2508 | { |
| 2509 | MemoryContext old_context; |
| 2510 | Datum *hist_values; |
| 2511 | int nvals; |
| 2512 | int pos, |
| 2513 | posfrac, |
| 2514 | delta, |
| 2515 | deltafrac; |
| 2516 | |
| 2517 | /* Sort the MCV items into position order to speed next loop */ |
| 2518 | qsort((void *) track, num_mcv, |
| 2519 | sizeof(ScalarMCVItem), compare_mcvs); |
| 2520 | |
| 2521 | /* |
| 2522 | * Collapse out the MCV items from the values[] array. |
| 2523 | * |
| 2524 | * Note we destroy the values[] array here... but we don't need it |
| 2525 | * for anything more. We do, however, still need values_cnt. |
| 2526 | * nvals will be the number of remaining entries in values[]. |
| 2527 | */ |
| 2528 | if (num_mcv > 0) |
| 2529 | { |
| 2530 | int src, |
| 2531 | dest; |
| 2532 | int j; |
| 2533 | |
| 2534 | src = dest = 0; |
| 2535 | j = 0; /* index of next interesting MCV item */ |
| 2536 | while (src < values_cnt) |
| 2537 | { |
| 2538 | int ncopy; |
| 2539 | |
| 2540 | if (j < num_mcv) |
| 2541 | { |
| 2542 | int first = track[j].first; |
| 2543 | |
| 2544 | if (src >= first) |
| 2545 | { |
| 2546 | /* advance past this MCV item */ |
| 2547 | src = first + track[j].count; |
| 2548 | j++; |
| 2549 | continue; |
| 2550 | } |
| 2551 | ncopy = first - src; |
| 2552 | } |
| 2553 | else |
| 2554 | ncopy = values_cnt - src; |
| 2555 | memmove(&values[dest], &values[src], |
| 2556 | ncopy * sizeof(ScalarItem)); |
| 2557 | src += ncopy; |
| 2558 | dest += ncopy; |
| 2559 | } |
| 2560 | nvals = dest; |
| 2561 | } |
| 2562 | else |
| 2563 | nvals = values_cnt; |
| 2564 | Assert(nvals >= num_hist); |
| 2565 | |
| 2566 | /* Must copy the target values into anl_context */ |
| 2567 | old_context = MemoryContextSwitchTo(stats->anl_context); |
| 2568 | hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); |
| 2569 | |
| 2570 | /* |
| 2571 | * The object of this loop is to copy the first and last values[] |
| 2572 | * entries along with evenly-spaced values in between. So the |
| 2573 | * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But |
| 2574 | * computing that subscript directly risks integer overflow when |
| 2575 | * the stats target is more than a couple thousand. Instead we |
| 2576 | * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking |
| 2577 | * the integral and fractional parts of the sum separately. |
| 2578 | */ |
| 2579 | delta = (nvals - 1) / (num_hist - 1); |
| 2580 | deltafrac = (nvals - 1) % (num_hist - 1); |
| 2581 | pos = posfrac = 0; |
| 2582 | |
| 2583 | for (i = 0; i < num_hist; i++) |
| 2584 | { |
| 2585 | hist_values[i] = datumCopy(values[pos].value, |
| 2586 | stats->attrtype->typbyval, |
| 2587 | stats->attrtype->typlen); |
| 2588 | pos += delta; |
| 2589 | posfrac += deltafrac; |
| 2590 | if (posfrac >= (num_hist - 1)) |
| 2591 | { |
| 2592 | /* fractional part exceeds 1, carry to integer part */ |
| 2593 | pos++; |
| 2594 | posfrac -= (num_hist - 1); |
| 2595 | } |
| 2596 | } |
| 2597 | |
| 2598 | MemoryContextSwitchTo(old_context); |
| 2599 | |
| 2600 | stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM; |
| 2601 | stats->staop[slot_idx] = mystats->ltopr; |
| 2602 | stats->stacoll[slot_idx] = stats->attrcollid; |
| 2603 | stats->stavalues[slot_idx] = hist_values; |
| 2604 | stats->numvalues[slot_idx] = num_hist; |
| 2605 | |
| 2606 | /* |
| 2607 | * Accept the defaults for stats->statypid and others. They have |
| 2608 | * been set before we were called (see vacuum.h) |
| 2609 | */ |
| 2610 | slot_idx++; |
| 2611 | } |
| 2612 | |
| 2613 | /* Generate a correlation entry if there are multiple values */ |
| 2614 | if (values_cnt > 1) |
| 2615 | { |
| 2616 | MemoryContext old_context; |
| 2617 | float4 *corrs; |
| 2618 | double corr_xsum, |
| 2619 | corr_x2sum; |
| 2620 | |
| 2621 | /* Must copy the target values into anl_context */ |
| 2622 | old_context = MemoryContextSwitchTo(stats->anl_context); |
| 2623 | corrs = (float4 *) palloc(sizeof(float4)); |
| 2624 | MemoryContextSwitchTo(old_context); |
| 2625 | |
| 2626 | /*---------- |
| 2627 | * Since we know the x and y value sets are both |
| 2628 | * 0, 1, ..., values_cnt-1 |
| 2629 | * we have sum(x) = sum(y) = |
| 2630 | * (values_cnt-1)*values_cnt / 2 |
| 2631 | * and sum(x^2) = sum(y^2) = |
| 2632 | * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. |
| 2633 | *---------- |
| 2634 | */ |
| 2635 | corr_xsum = ((double) (values_cnt - 1)) * |
| 2636 | ((double) values_cnt) / 2.0; |
| 2637 | corr_x2sum = ((double) (values_cnt - 1)) * |
| 2638 | ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0; |
| 2639 | |
| 2640 | /* And the correlation coefficient reduces to */ |
| 2641 | corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) / |
| 2642 | (values_cnt * corr_x2sum - corr_xsum * corr_xsum); |
| 2643 | |
| 2644 | stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; |
| 2645 | stats->staop[slot_idx] = mystats->ltopr; |
| 2646 | stats->stacoll[slot_idx] = stats->attrcollid; |
| 2647 | stats->stanumbers[slot_idx] = corrs; |
| 2648 | stats->numnumbers[slot_idx] = 1; |
| 2649 | slot_idx++; |
| 2650 | } |
| 2651 | } |
| 2652 | else if (nonnull_cnt > 0) |
| 2653 | { |
| 2654 | /* We found some non-null values, but they were all too wide */ |
| 2655 | Assert(nonnull_cnt == toowide_cnt); |
| 2656 | stats->stats_valid = true; |
| 2657 | /* Do the simple null-frac and width stats */ |
| 2658 | stats->stanullfrac = (double) null_cnt / (double) samplerows; |
| 2659 | if (is_varwidth) |
| 2660 | stats->stawidth = total_width / (double) nonnull_cnt; |
| 2661 | else |
| 2662 | stats->stawidth = stats->attrtype->typlen; |
| 2663 | /* Assume all too-wide values are distinct, so it's a unique column */ |
| 2664 | stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); |
| 2665 | } |
| 2666 | else if (null_cnt > 0) |
| 2667 | { |
| 2668 | /* We found only nulls; assume the column is entirely null */ |
| 2669 | stats->stats_valid = true; |
| 2670 | stats->stanullfrac = 1.0; |
| 2671 | if (is_varwidth) |
| 2672 | stats->stawidth = 0; /* "unknown" */ |
| 2673 | else |
| 2674 | stats->stawidth = stats->attrtype->typlen; |
| 2675 | stats->stadistinct = 0.0; /* "unknown" */ |
| 2676 | } |
| 2677 | |
| 2678 | /* We don't need to bother cleaning up any of our temporary palloc's */ |
| 2679 | } |
| 2680 | |
| 2681 | /* |
| 2682 | * qsort_arg comparator for sorting ScalarItems |
| 2683 | * |
| 2684 | * Aside from sorting the items, we update the tupnoLink[] array |
| 2685 | * whenever two ScalarItems are found to contain equal datums. The array |
| 2686 | * is indexed by tupno; for each ScalarItem, it contains the highest |
| 2687 | * tupno that that item's datum has been found to be equal to. This allows |
| 2688 | * us to avoid additional comparisons in compute_scalar_stats(). |
| 2689 | */ |
| 2690 | static int |
| 2691 | compare_scalars(const void *a, const void *b, void *arg) |
| 2692 | { |
| 2693 | Datum da = ((const ScalarItem *) a)->value; |
| 2694 | int ta = ((const ScalarItem *) a)->tupno; |
| 2695 | Datum db = ((const ScalarItem *) b)->value; |
| 2696 | int tb = ((const ScalarItem *) b)->tupno; |
| 2697 | CompareScalarsContext *cxt = (CompareScalarsContext *) arg; |
| 2698 | int compare; |
| 2699 | |
| 2700 | compare = ApplySortComparator(da, false, db, false, cxt->ssup); |
| 2701 | if (compare != 0) |
| 2702 | return compare; |
| 2703 | |
| 2704 | /* |
| 2705 | * The two datums are equal, so update cxt->tupnoLink[]. |
| 2706 | */ |
| 2707 | if (cxt->tupnoLink[ta] < tb) |
| 2708 | cxt->tupnoLink[ta] = tb; |
| 2709 | if (cxt->tupnoLink[tb] < ta) |
| 2710 | cxt->tupnoLink[tb] = ta; |
| 2711 | |
| 2712 | /* |
| 2713 | * For equal datums, sort by tupno |
| 2714 | */ |
| 2715 | return ta - tb; |
| 2716 | } |
| 2717 | |
| 2718 | /* |
| 2719 | * qsort comparator for sorting ScalarMCVItems by position |
| 2720 | */ |
| 2721 | static int |
| 2722 | compare_mcvs(const void *a, const void *b) |
| 2723 | { |
| 2724 | int da = ((const ScalarMCVItem *) a)->first; |
| 2725 | int db = ((const ScalarMCVItem *) b)->first; |
| 2726 | |
| 2727 | return da - db; |
| 2728 | } |
| 2729 | |
| 2730 | /* |
| 2731 | * Analyze the list of common values in the sample and decide how many are |
| 2732 | * worth storing in the table's MCV list. |
| 2733 | * |
| 2734 | * mcv_counts is assumed to be a list of the counts of the most common values |
| 2735 | * seen in the sample, starting with the most common. The return value is the |
| 2736 | * number that are significantly more common than the values not in the list, |
| 2737 | * and which are therefore deemed worth storing in the table's MCV list. |
| 2738 | */ |
| 2739 | static int |
| 2740 | analyze_mcv_list(int *mcv_counts, |
| 2741 | int num_mcv, |
| 2742 | double stadistinct, |
| 2743 | double stanullfrac, |
| 2744 | int samplerows, |
| 2745 | double totalrows) |
| 2746 | { |
| 2747 | double ndistinct_table; |
| 2748 | double sumcount; |
| 2749 | int i; |
| 2750 | |
| 2751 | /* |
| 2752 | * If the entire table was sampled, keep the whole list. This also |
| 2753 | * protects us against division by zero in the code below. |
| 2754 | */ |
| 2755 | if (samplerows == totalrows || totalrows <= 1.0) |
| 2756 | return num_mcv; |
| 2757 | |
| 2758 | /* Re-extract the estimated number of distinct nonnull values in table */ |
| 2759 | ndistinct_table = stadistinct; |
| 2760 | if (ndistinct_table < 0) |
| 2761 | ndistinct_table = -ndistinct_table * totalrows; |
| 2762 | |
| 2763 | /* |
| 2764 | * Exclude the least common values from the MCV list, if they are not |
| 2765 | * significantly more common than the estimated selectivity they would |
| 2766 | * have if they weren't in the list. All non-MCV values are assumed to be |
| 2767 | * equally common, after taking into account the frequencies of all the |
| 2768 | * values in the MCV list and the number of nulls (c.f. eqsel()). |
| 2769 | * |
| 2770 | * Here sumcount tracks the total count of all but the last (least common) |
| 2771 | * value in the MCV list, allowing us to determine the effect of excluding |
| 2772 | * that value from the list. |
| 2773 | * |
| 2774 | * Note that we deliberately do this by removing values from the full |
| 2775 | * list, rather than starting with an empty list and adding values, |
| 2776 | * because the latter approach can fail to add any values if all the most |
| 2777 | * common values have around the same frequency and make up the majority |
| 2778 | * of the table, so that the overall average frequency of all values is |
| 2779 | * roughly the same as that of the common values. This would lead to any |
| 2780 | * uncommon values being significantly overestimated. |
| 2781 | */ |
| 2782 | sumcount = 0.0; |
| 2783 | for (i = 0; i < num_mcv - 1; i++) |
| 2784 | sumcount += mcv_counts[i]; |
| 2785 | |
| 2786 | while (num_mcv > 0) |
| 2787 | { |
| 2788 | double selec, |
| 2789 | otherdistinct, |
| 2790 | N, |
| 2791 | n, |
| 2792 | K, |
| 2793 | variance, |
| 2794 | stddev; |
| 2795 | |
| 2796 | /* |
| 2797 | * Estimated selectivity the least common value would have if it |
| 2798 | * wasn't in the MCV list (c.f. eqsel()). |
| 2799 | */ |
| 2800 | selec = 1.0 - sumcount / samplerows - stanullfrac; |
| 2801 | if (selec < 0.0) |
| 2802 | selec = 0.0; |
| 2803 | if (selec > 1.0) |
| 2804 | selec = 1.0; |
| 2805 | otherdistinct = ndistinct_table - (num_mcv - 1); |
| 2806 | if (otherdistinct > 1) |
| 2807 | selec /= otherdistinct; |
| 2808 | |
| 2809 | /* |
| 2810 | * If the value is kept in the MCV list, its population frequency is |
| 2811 | * assumed to equal its sample frequency. We use the lower end of a |
| 2812 | * textbook continuity-corrected Wald-type confidence interval to |
| 2813 | * determine if that is significantly more common than the non-MCV |
| 2814 | * frequency --- specifically we assume the population frequency is |
| 2815 | * highly likely to be within around 2 standard errors of the sample |
| 2816 | * frequency, which equates to an interval of 2 standard deviations |
| 2817 | * either side of the sample count, plus an additional 0.5 for the |
| 2818 | * continuity correction. Since we are sampling without replacement, |
| 2819 | * this is a hypergeometric distribution. |
| 2820 | * |
| 2821 | * XXX: Empirically, this approach seems to work quite well, but it |
| 2822 | * may be worth considering more advanced techniques for estimating |
| 2823 | * the confidence interval of the hypergeometric distribution. |
| 2824 | */ |
| 2825 | N = totalrows; |
| 2826 | n = samplerows; |
| 2827 | K = N * mcv_counts[num_mcv - 1] / n; |
| 2828 | variance = n * K * (N - K) * (N - n) / (N * N * (N - 1)); |
| 2829 | stddev = sqrt(variance); |
| 2830 | |
| 2831 | if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5) |
| 2832 | { |
| 2833 | /* |
| 2834 | * The value is significantly more common than the non-MCV |
| 2835 | * selectivity would suggest. Keep it, and all the other more |
| 2836 | * common values in the list. |
| 2837 | */ |
| 2838 | break; |
| 2839 | } |
| 2840 | else |
| 2841 | { |
| 2842 | /* Discard this value and consider the next least common value */ |
| 2843 | num_mcv--; |
| 2844 | if (num_mcv == 0) |
| 2845 | break; |
| 2846 | sumcount -= mcv_counts[num_mcv - 1]; |
| 2847 | } |
| 2848 | } |
| 2849 | return num_mcv; |
| 2850 | } |
| 2851 | |