1/*-------------------------------------------------------------------------
2 *
3 * vacuum.c
4 * The postgres vacuum cleaner.
5 *
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9 * in cluster.c.
10 *
11 *
12 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
14 *
15 *
16 * IDENTIFICATION
17 * src/backend/commands/vacuum.c
18 *
19 *-------------------------------------------------------------------------
20 */
21#include "postgres.h"
22
23#include <math.h>
24
25#include "access/clog.h"
26#include "access/commit_ts.h"
27#include "access/genam.h"
28#include "access/heapam.h"
29#include "access/htup_details.h"
30#include "access/multixact.h"
31#include "access/tableam.h"
32#include "access/transam.h"
33#include "access/xact.h"
34#include "catalog/namespace.h"
35#include "catalog/pg_database.h"
36#include "catalog/pg_inherits.h"
37#include "catalog/pg_namespace.h"
38#include "commands/cluster.h"
39#include "commands/defrem.h"
40#include "commands/vacuum.h"
41#include "miscadmin.h"
42#include "nodes/makefuncs.h"
43#include "pgstat.h"
44#include "postmaster/autovacuum.h"
45#include "storage/bufmgr.h"
46#include "storage/lmgr.h"
47#include "storage/proc.h"
48#include "storage/procarray.h"
49#include "utils/acl.h"
50#include "utils/fmgroids.h"
51#include "utils/guc.h"
52#include "utils/memutils.h"
53#include "utils/snapmgr.h"
54#include "utils/syscache.h"
55
56
57/*
58 * GUC parameters
59 */
60int vacuum_freeze_min_age;
61int vacuum_freeze_table_age;
62int vacuum_multixact_freeze_min_age;
63int vacuum_multixact_freeze_table_age;
64
65
66/* A few variables that don't seem worth passing around as parameters */
67static MemoryContext vac_context = NULL;
68static BufferAccessStrategy vac_strategy;
69
70
71/* non-export function prototypes */
72static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
73static List *get_all_vacuum_rels(int options);
74static void vac_truncate_clog(TransactionId frozenXID,
75 MultiXactId minMulti,
76 TransactionId lastSaneFrozenXid,
77 MultiXactId lastSaneMinMulti);
78static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
79static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def);
80
81/*
82 * Primary entry point for manual VACUUM and ANALYZE commands
83 *
84 * This is mainly a preparation wrapper for the real operations that will
85 * happen in vacuum().
86 */
87void
88ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
89{
90 VacuumParams params;
91 bool verbose = false;
92 bool skip_locked = false;
93 bool analyze = false;
94 bool freeze = false;
95 bool full = false;
96 bool disable_page_skipping = false;
97 ListCell *lc;
98
99 /* Set default value */
100 params.index_cleanup = VACOPT_TERNARY_DEFAULT;
101 params.truncate = VACOPT_TERNARY_DEFAULT;
102
103 /* Parse options list */
104 foreach(lc, vacstmt->options)
105 {
106 DefElem *opt = (DefElem *) lfirst(lc);
107
108 /* Parse common options for VACUUM and ANALYZE */
109 if (strcmp(opt->defname, "verbose") == 0)
110 verbose = defGetBoolean(opt);
111 else if (strcmp(opt->defname, "skip_locked") == 0)
112 skip_locked = defGetBoolean(opt);
113 else if (!vacstmt->is_vacuumcmd)
114 ereport(ERROR,
115 (errcode(ERRCODE_SYNTAX_ERROR),
116 errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
117 parser_errposition(pstate, opt->location)));
118
119 /* Parse options available on VACUUM */
120 else if (strcmp(opt->defname, "analyze") == 0)
121 analyze = defGetBoolean(opt);
122 else if (strcmp(opt->defname, "freeze") == 0)
123 freeze = defGetBoolean(opt);
124 else if (strcmp(opt->defname, "full") == 0)
125 full = defGetBoolean(opt);
126 else if (strcmp(opt->defname, "disable_page_skipping") == 0)
127 disable_page_skipping = defGetBoolean(opt);
128 else if (strcmp(opt->defname, "index_cleanup") == 0)
129 params.index_cleanup = get_vacopt_ternary_value(opt);
130 else if (strcmp(opt->defname, "truncate") == 0)
131 params.truncate = get_vacopt_ternary_value(opt);
132 else
133 ereport(ERROR,
134 (errcode(ERRCODE_SYNTAX_ERROR),
135 errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
136 parser_errposition(pstate, opt->location)));
137 }
138
139 /* Set vacuum options */
140 params.options =
141 (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
142 (verbose ? VACOPT_VERBOSE : 0) |
143 (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
144 (analyze ? VACOPT_ANALYZE : 0) |
145 (freeze ? VACOPT_FREEZE : 0) |
146 (full ? VACOPT_FULL : 0) |
147 (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
148
149 /* sanity checks on options */
150 Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
151 Assert((params.options & VACOPT_VACUUM) ||
152 !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
153 Assert(!(params.options & VACOPT_SKIPTOAST));
154
155 /*
156 * Make sure VACOPT_ANALYZE is specified if any column lists are present.
157 */
158 if (!(params.options & VACOPT_ANALYZE))
159 {
160 ListCell *lc;
161
162 foreach(lc, vacstmt->rels)
163 {
164 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
165
166 if (vrel->va_cols != NIL)
167 ereport(ERROR,
168 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
169 errmsg("ANALYZE option must be specified when a column list is provided")));
170 }
171 }
172
173 /*
174 * All freeze ages are zero if the FREEZE option is given; otherwise pass
175 * them as -1 which means to use the default values.
176 */
177 if (params.options & VACOPT_FREEZE)
178 {
179 params.freeze_min_age = 0;
180 params.freeze_table_age = 0;
181 params.multixact_freeze_min_age = 0;
182 params.multixact_freeze_table_age = 0;
183 }
184 else
185 {
186 params.freeze_min_age = -1;
187 params.freeze_table_age = -1;
188 params.multixact_freeze_min_age = -1;
189 params.multixact_freeze_table_age = -1;
190 }
191
192 /* user-invoked vacuum is never "for wraparound" */
193 params.is_wraparound = false;
194
195 /* user-invoked vacuum never uses this parameter */
196 params.log_min_duration = -1;
197
198 /* Now go through the common routine */
199 vacuum(vacstmt->rels, &params, NULL, isTopLevel);
200}
201
202/*
203 * Internal entry point for VACUUM and ANALYZE commands.
204 *
205 * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
206 * we process all relevant tables in the database. For each VacuumRelation,
207 * if a valid OID is supplied, the table with that OID is what to process;
208 * otherwise, the VacuumRelation's RangeVar indicates what to process.
209 *
210 * params contains a set of parameters that can be used to customize the
211 * behavior.
212 *
213 * bstrategy is normally given as NULL, but in autovacuum it can be passed
214 * in to use the same buffer strategy object across multiple vacuum() calls.
215 *
216 * isTopLevel should be passed down from ProcessUtility.
217 *
218 * It is the caller's responsibility that all parameters are allocated in a
219 * memory context that will not disappear at transaction commit.
220 */
221void
222vacuum(List *relations, VacuumParams *params,
223 BufferAccessStrategy bstrategy, bool isTopLevel)
224{
225 static bool in_vacuum = false;
226
227 const char *stmttype;
228 volatile bool in_outer_xact,
229 use_own_xacts;
230
231 Assert(params != NULL);
232
233 stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
234
235 /*
236 * We cannot run VACUUM inside a user transaction block; if we were inside
237 * a transaction, then our commit- and start-transaction-command calls
238 * would not have the intended effect! There are numerous other subtle
239 * dependencies on this, too.
240 *
241 * ANALYZE (without VACUUM) can run either way.
242 */
243 if (params->options & VACOPT_VACUUM)
244 {
245 PreventInTransactionBlock(isTopLevel, stmttype);
246 in_outer_xact = false;
247 }
248 else
249 in_outer_xact = IsInTransactionBlock(isTopLevel);
250
251 /*
252 * Due to static variables vac_context, anl_context and vac_strategy,
253 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
254 * calls a hostile index expression that itself calls ANALYZE.
255 */
256 if (in_vacuum)
257 ereport(ERROR,
258 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
259 errmsg("%s cannot be executed from VACUUM or ANALYZE",
260 stmttype)));
261
262 /*
263 * Sanity check DISABLE_PAGE_SKIPPING option.
264 */
265 if ((params->options & VACOPT_FULL) != 0 &&
266 (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
267 ereport(ERROR,
268 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
269 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
270
271 /*
272 * Send info about dead objects to the statistics collector, unless we are
273 * in autovacuum --- autovacuum.c does this for itself.
274 */
275 if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
276 pgstat_vacuum_stat();
277
278 /*
279 * Create special memory context for cross-transaction storage.
280 *
281 * Since it is a child of PortalContext, it will go away eventually even
282 * if we suffer an error; there's no need for special abort cleanup logic.
283 */
284 vac_context = AllocSetContextCreate(PortalContext,
285 "Vacuum",
286 ALLOCSET_DEFAULT_SIZES);
287
288 /*
289 * If caller didn't give us a buffer strategy object, make one in the
290 * cross-transaction memory context.
291 */
292 if (bstrategy == NULL)
293 {
294 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
295
296 bstrategy = GetAccessStrategy(BAS_VACUUM);
297 MemoryContextSwitchTo(old_context);
298 }
299 vac_strategy = bstrategy;
300
301 /*
302 * Build list of relation(s) to process, putting any new data in
303 * vac_context for safekeeping.
304 */
305 if (relations != NIL)
306 {
307 List *newrels = NIL;
308 ListCell *lc;
309
310 foreach(lc, relations)
311 {
312 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
313 List *sublist;
314 MemoryContext old_context;
315
316 sublist = expand_vacuum_rel(vrel, params->options);
317 old_context = MemoryContextSwitchTo(vac_context);
318 newrels = list_concat(newrels, sublist);
319 MemoryContextSwitchTo(old_context);
320 }
321 relations = newrels;
322 }
323 else
324 relations = get_all_vacuum_rels(params->options);
325
326 /*
327 * Decide whether we need to start/commit our own transactions.
328 *
329 * For VACUUM (with or without ANALYZE): always do so, so that we can
330 * release locks as soon as possible. (We could possibly use the outer
331 * transaction for a one-table VACUUM, but handling TOAST tables would be
332 * problematic.)
333 *
334 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
335 * start/commit our own transactions. Also, there's no need to do so if
336 * only processing one relation. For multiple relations when not within a
337 * transaction block, and also in an autovacuum worker, use own
338 * transactions so we can release locks sooner.
339 */
340 if (params->options & VACOPT_VACUUM)
341 use_own_xacts = true;
342 else
343 {
344 Assert(params->options & VACOPT_ANALYZE);
345 if (IsAutoVacuumWorkerProcess())
346 use_own_xacts = true;
347 else if (in_outer_xact)
348 use_own_xacts = false;
349 else if (list_length(relations) > 1)
350 use_own_xacts = true;
351 else
352 use_own_xacts = false;
353 }
354
355 /*
356 * vacuum_rel expects to be entered with no transaction active; it will
357 * start and commit its own transaction. But we are called by an SQL
358 * command, and so we are executing inside a transaction already. We
359 * commit the transaction started in PostgresMain() here, and start
360 * another one before exiting to match the commit waiting for us back in
361 * PostgresMain().
362 */
363 if (use_own_xacts)
364 {
365 Assert(!in_outer_xact);
366
367 /* ActiveSnapshot is not set by autovacuum */
368 if (ActiveSnapshotSet())
369 PopActiveSnapshot();
370
371 /* matches the StartTransaction in PostgresMain() */
372 CommitTransactionCommand();
373 }
374
375 /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
376 PG_TRY();
377 {
378 ListCell *cur;
379
380 in_vacuum = true;
381 VacuumCostActive = (VacuumCostDelay > 0);
382 VacuumCostBalance = 0;
383 VacuumPageHit = 0;
384 VacuumPageMiss = 0;
385 VacuumPageDirty = 0;
386
387 /*
388 * Loop to process each selected relation.
389 */
390 foreach(cur, relations)
391 {
392 VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
393
394 if (params->options & VACOPT_VACUUM)
395 {
396 if (!vacuum_rel(vrel->oid, vrel->relation, params))
397 continue;
398 }
399
400 if (params->options & VACOPT_ANALYZE)
401 {
402 /*
403 * If using separate xacts, start one for analyze. Otherwise,
404 * we can use the outer transaction.
405 */
406 if (use_own_xacts)
407 {
408 StartTransactionCommand();
409 /* functions in indexes may want a snapshot set */
410 PushActiveSnapshot(GetTransactionSnapshot());
411 }
412
413 analyze_rel(vrel->oid, vrel->relation, params,
414 vrel->va_cols, in_outer_xact, vac_strategy);
415
416 if (use_own_xacts)
417 {
418 PopActiveSnapshot();
419 CommitTransactionCommand();
420 }
421 else
422 {
423 /*
424 * If we're not using separate xacts, better separate the
425 * ANALYZE actions with CCIs. This avoids trouble if user
426 * says "ANALYZE t, t".
427 */
428 CommandCounterIncrement();
429 }
430 }
431 }
432 }
433 PG_CATCH();
434 {
435 in_vacuum = false;
436 VacuumCostActive = false;
437 PG_RE_THROW();
438 }
439 PG_END_TRY();
440
441 in_vacuum = false;
442 VacuumCostActive = false;
443
444 /*
445 * Finish up processing.
446 */
447 if (use_own_xacts)
448 {
449 /* here, we are not in a transaction */
450
451 /*
452 * This matches the CommitTransaction waiting for us in
453 * PostgresMain().
454 */
455 StartTransactionCommand();
456 }
457
458 if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
459 {
460 /*
461 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
462 * (autovacuum.c does this for itself.)
463 */
464 vac_update_datfrozenxid();
465 }
466
467 /*
468 * Clean up working storage --- note we must do this after
469 * StartTransactionCommand, else we might be trying to delete the active
470 * context!
471 */
472 MemoryContextDelete(vac_context);
473 vac_context = NULL;
474}
475
476/*
477 * Check if a given relation can be safely vacuumed or analyzed. If the
478 * user is not the relation owner, issue a WARNING log message and return
479 * false to let the caller decide what to do with this relation. This
480 * routine is used to decide if a relation can be processed for VACUUM or
481 * ANALYZE.
482 */
483bool
484vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options)
485{
486 char *relname;
487
488 Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
489
490 /*
491 * Check permissions.
492 *
493 * We allow the user to vacuum or analyze a table if he is superuser, the
494 * table owner, or the database owner (but in the latter case, only if
495 * it's not a shared relation). pg_class_ownercheck includes the
496 * superuser case.
497 *
498 * Note we choose to treat permissions failure as a WARNING and keep
499 * trying to vacuum or analyze the rest of the DB --- is this appropriate?
500 */
501 if (pg_class_ownercheck(relid, GetUserId()) ||
502 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
503 return true;
504
505 relname = NameStr(reltuple->relname);
506
507 if ((options & VACOPT_VACUUM) != 0)
508 {
509 if (reltuple->relisshared)
510 ereport(WARNING,
511 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
512 relname)));
513 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
514 ereport(WARNING,
515 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
516 relname)));
517 else
518 ereport(WARNING,
519 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
520 relname)));
521
522 /*
523 * For VACUUM ANALYZE, both logs could show up, but just generate
524 * information for VACUUM as that would be the first one to be
525 * processed.
526 */
527 return false;
528 }
529
530 if ((options & VACOPT_ANALYZE) != 0)
531 {
532 if (reltuple->relisshared)
533 ereport(WARNING,
534 (errmsg("skipping \"%s\" --- only superuser can analyze it",
535 relname)));
536 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
537 ereport(WARNING,
538 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
539 relname)));
540 else
541 ereport(WARNING,
542 (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
543 relname)));
544 }
545
546 return false;
547}
548
549
550/*
551 * vacuum_open_relation
552 *
553 * This routine is used for attempting to open and lock a relation which
554 * is going to be vacuumed or analyzed. If the relation cannot be opened
555 * or locked, a log is emitted if possible.
556 */
557Relation
558vacuum_open_relation(Oid relid, RangeVar *relation, int options,
559 bool verbose, LOCKMODE lmode)
560{
561 Relation onerel;
562 bool rel_lock = true;
563 int elevel;
564
565 Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
566
567 /*
568 * Open the relation and get the appropriate lock on it.
569 *
570 * There's a race condition here: the relation may have gone away since
571 * the last time we saw it. If so, we don't need to vacuum or analyze it.
572 *
573 * If we've been asked not to wait for the relation lock, acquire it first
574 * in non-blocking mode, before calling try_relation_open().
575 */
576 if (!(options & VACOPT_SKIP_LOCKED))
577 onerel = try_relation_open(relid, lmode);
578 else if (ConditionalLockRelationOid(relid, lmode))
579 onerel = try_relation_open(relid, NoLock);
580 else
581 {
582 onerel = NULL;
583 rel_lock = false;
584 }
585
586 /* if relation is opened, leave */
587 if (onerel)
588 return onerel;
589
590 /*
591 * Relation could not be opened, hence generate if possible a log
592 * informing on the situation.
593 *
594 * If the RangeVar is not defined, we do not have enough information to
595 * provide a meaningful log statement. Chances are that the caller has
596 * intentionally not provided this information so that this logging is
597 * skipped, anyway.
598 */
599 if (relation == NULL)
600 return NULL;
601
602 /*
603 * Determine the log level.
604 *
605 * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
606 * statements in the permission checks; otherwise, only log if the caller
607 * so requested.
608 */
609 if (!IsAutoVacuumWorkerProcess())
610 elevel = WARNING;
611 else if (verbose)
612 elevel = LOG;
613 else
614 return NULL;
615
616 if ((options & VACOPT_VACUUM) != 0)
617 {
618 if (!rel_lock)
619 ereport(elevel,
620 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
621 errmsg("skipping vacuum of \"%s\" --- lock not available",
622 relation->relname)));
623 else
624 ereport(elevel,
625 (errcode(ERRCODE_UNDEFINED_TABLE),
626 errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
627 relation->relname)));
628
629 /*
630 * For VACUUM ANALYZE, both logs could show up, but just generate
631 * information for VACUUM as that would be the first one to be
632 * processed.
633 */
634 return NULL;
635 }
636
637 if ((options & VACOPT_ANALYZE) != 0)
638 {
639 if (!rel_lock)
640 ereport(elevel,
641 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
642 errmsg("skipping analyze of \"%s\" --- lock not available",
643 relation->relname)));
644 else
645 ereport(elevel,
646 (errcode(ERRCODE_UNDEFINED_TABLE),
647 errmsg("skipping analyze of \"%s\" --- relation no longer exists",
648 relation->relname)));
649 }
650
651 return NULL;
652}
653
654
655/*
656 * Given a VacuumRelation, fill in the table OID if it wasn't specified,
657 * and optionally add VacuumRelations for partitions of the table.
658 *
659 * If a VacuumRelation does not have an OID supplied and is a partitioned
660 * table, an extra entry will be added to the output for each partition.
661 * Presently, only autovacuum supplies OIDs when calling vacuum(), and
662 * it does not want us to expand partitioned tables.
663 *
664 * We take care not to modify the input data structure, but instead build
665 * new VacuumRelation(s) to return. (But note that they will reference
666 * unmodified parts of the input, eg column lists.) New data structures
667 * are made in vac_context.
668 */
669static List *
670expand_vacuum_rel(VacuumRelation *vrel, int options)
671{
672 List *vacrels = NIL;
673 MemoryContext oldcontext;
674
675 /* If caller supplied OID, there's nothing we need do here. */
676 if (OidIsValid(vrel->oid))
677 {
678 oldcontext = MemoryContextSwitchTo(vac_context);
679 vacrels = lappend(vacrels, vrel);
680 MemoryContextSwitchTo(oldcontext);
681 }
682 else
683 {
684 /* Process a specific relation, and possibly partitions thereof */
685 Oid relid;
686 HeapTuple tuple;
687 Form_pg_class classForm;
688 bool include_parts;
689 int rvr_opts;
690
691 /*
692 * Since autovacuum workers supply OIDs when calling vacuum(), no
693 * autovacuum worker should reach this code.
694 */
695 Assert(!IsAutoVacuumWorkerProcess());
696
697 /*
698 * We transiently take AccessShareLock to protect the syscache lookup
699 * below, as well as find_all_inheritors's expectation that the caller
700 * holds some lock on the starting relation.
701 */
702 rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
703 relid = RangeVarGetRelidExtended(vrel->relation,
704 AccessShareLock,
705 rvr_opts,
706 NULL, NULL);
707
708 /*
709 * If the lock is unavailable, emit the same log statement that
710 * vacuum_rel() and analyze_rel() would.
711 */
712 if (!OidIsValid(relid))
713 {
714 if (options & VACOPT_VACUUM)
715 ereport(WARNING,
716 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
717 errmsg("skipping vacuum of \"%s\" --- lock not available",
718 vrel->relation->relname)));
719 else
720 ereport(WARNING,
721 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
722 errmsg("skipping analyze of \"%s\" --- lock not available",
723 vrel->relation->relname)));
724 return vacrels;
725 }
726
727 /*
728 * To check whether the relation is a partitioned table and its
729 * ownership, fetch its syscache entry.
730 */
731 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
732 if (!HeapTupleIsValid(tuple))
733 elog(ERROR, "cache lookup failed for relation %u", relid);
734 classForm = (Form_pg_class) GETSTRUCT(tuple);
735
736 /*
737 * Make a returnable VacuumRelation for this rel if user is a proper
738 * owner.
739 */
740 if (vacuum_is_relation_owner(relid, classForm, options))
741 {
742 oldcontext = MemoryContextSwitchTo(vac_context);
743 vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
744 relid,
745 vrel->va_cols));
746 MemoryContextSwitchTo(oldcontext);
747 }
748
749
750 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
751 ReleaseSysCache(tuple);
752
753 /*
754 * If it is, make relation list entries for its partitions. Note that
755 * the list returned by find_all_inheritors() includes the passed-in
756 * OID, so we have to skip that. There's no point in taking locks on
757 * the individual partitions yet, and doing so would just add
758 * unnecessary deadlock risk. For this last reason we do not check
759 * yet the ownership of the partitions, which get added to the list to
760 * process. Ownership will be checked later on anyway.
761 */
762 if (include_parts)
763 {
764 List *part_oids = find_all_inheritors(relid, NoLock, NULL);
765 ListCell *part_lc;
766
767 foreach(part_lc, part_oids)
768 {
769 Oid part_oid = lfirst_oid(part_lc);
770
771 if (part_oid == relid)
772 continue; /* ignore original table */
773
774 /*
775 * We omit a RangeVar since it wouldn't be appropriate to
776 * complain about failure to open one of these relations
777 * later.
778 */
779 oldcontext = MemoryContextSwitchTo(vac_context);
780 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
781 part_oid,
782 vrel->va_cols));
783 MemoryContextSwitchTo(oldcontext);
784 }
785 }
786
787 /*
788 * Release lock again. This means that by the time we actually try to
789 * process the table, it might be gone or renamed. In the former case
790 * we'll silently ignore it; in the latter case we'll process it
791 * anyway, but we must beware that the RangeVar doesn't necessarily
792 * identify it anymore. This isn't ideal, perhaps, but there's little
793 * practical alternative, since we're typically going to commit this
794 * transaction and begin a new one between now and then. Moreover,
795 * holding locks on multiple relations would create significant risk
796 * of deadlock.
797 */
798 UnlockRelationOid(relid, AccessShareLock);
799 }
800
801 return vacrels;
802}
803
804/*
805 * Construct a list of VacuumRelations for all vacuumable rels in
806 * the current database. The list is built in vac_context.
807 */
808static List *
809get_all_vacuum_rels(int options)
810{
811 List *vacrels = NIL;
812 Relation pgclass;
813 TableScanDesc scan;
814 HeapTuple tuple;
815
816 pgclass = table_open(RelationRelationId, AccessShareLock);
817
818 scan = table_beginscan_catalog(pgclass, 0, NULL);
819
820 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
821 {
822 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
823 MemoryContext oldcontext;
824 Oid relid = classForm->oid;
825
826 /* check permissions of relation */
827 if (!vacuum_is_relation_owner(relid, classForm, options))
828 continue;
829
830 /*
831 * We include partitioned tables here; depending on which operation is
832 * to be performed, caller will decide whether to process or ignore
833 * them.
834 */
835 if (classForm->relkind != RELKIND_RELATION &&
836 classForm->relkind != RELKIND_MATVIEW &&
837 classForm->relkind != RELKIND_PARTITIONED_TABLE)
838 continue;
839
840 /*
841 * Build VacuumRelation(s) specifying the table OIDs to be processed.
842 * We omit a RangeVar since it wouldn't be appropriate to complain
843 * about failure to open one of these relations later.
844 */
845 oldcontext = MemoryContextSwitchTo(vac_context);
846 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
847 relid,
848 NIL));
849 MemoryContextSwitchTo(oldcontext);
850 }
851
852 table_endscan(scan);
853 table_close(pgclass, AccessShareLock);
854
855 return vacrels;
856}
857
858/*
859 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
860 *
861 * The output parameters are:
862 * - oldestXmin is the cutoff value used to distinguish whether tuples are
863 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
864 * - freezeLimit is the Xid below which all Xids are replaced by
865 * FrozenTransactionId during vacuum.
866 * - xidFullScanLimit (computed from table_freeze_age parameter)
867 * represents a minimum Xid value; a table whose relfrozenxid is older than
868 * this will have a full-table vacuum applied to it, to freeze tuples across
869 * the whole table. Vacuuming a table younger than this value can use a
870 * partial scan.
871 * - multiXactCutoff is the value below which all MultiXactIds are removed from
872 * Xmax.
873 * - mxactFullScanLimit is a value against which a table's relminmxid value is
874 * compared to produce a full-table vacuum, as with xidFullScanLimit.
875 *
876 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
877 * not interested.
878 */
879void
880vacuum_set_xid_limits(Relation rel,
881 int freeze_min_age,
882 int freeze_table_age,
883 int multixact_freeze_min_age,
884 int multixact_freeze_table_age,
885 TransactionId *oldestXmin,
886 TransactionId *freezeLimit,
887 TransactionId *xidFullScanLimit,
888 MultiXactId *multiXactCutoff,
889 MultiXactId *mxactFullScanLimit)
890{
891 int freezemin;
892 int mxid_freezemin;
893 int effective_multixact_freeze_max_age;
894 TransactionId limit;
895 TransactionId safeLimit;
896 MultiXactId mxactLimit;
897 MultiXactId safeMxactLimit;
898
899 /*
900 * We can always ignore processes running lazy vacuum. This is because we
901 * use these values only for deciding which tuples we must keep in the
902 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
903 * ignore it. In theory it could be problematic to ignore lazy vacuums in
904 * a full vacuum, but keep in mind that only one vacuum process can be
905 * working on a particular table at any time, and that each vacuum is
906 * always an independent transaction.
907 */
908 *oldestXmin =
909 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
910
911 Assert(TransactionIdIsNormal(*oldestXmin));
912
913 /*
914 * Determine the minimum freeze age to use: as specified by the caller, or
915 * vacuum_freeze_min_age, but in any case not more than half
916 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
917 * wraparound won't occur too frequently.
918 */
919 freezemin = freeze_min_age;
920 if (freezemin < 0)
921 freezemin = vacuum_freeze_min_age;
922 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
923 Assert(freezemin >= 0);
924
925 /*
926 * Compute the cutoff XID, being careful not to generate a "permanent" XID
927 */
928 limit = *oldestXmin - freezemin;
929 if (!TransactionIdIsNormal(limit))
930 limit = FirstNormalTransactionId;
931
932 /*
933 * If oldestXmin is very far back (in practice, more than
934 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
935 * freeze age of zero.
936 */
937 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
938 if (!TransactionIdIsNormal(safeLimit))
939 safeLimit = FirstNormalTransactionId;
940
941 if (TransactionIdPrecedes(limit, safeLimit))
942 {
943 ereport(WARNING,
944 (errmsg("oldest xmin is far in the past"),
945 errhint("Close open transactions soon to avoid wraparound problems.\n"
946 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
947 limit = *oldestXmin;
948 }
949
950 *freezeLimit = limit;
951
952 /*
953 * Compute the multixact age for which freezing is urgent. This is
954 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
955 * short of multixact member space.
956 */
957 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
958
959 /*
960 * Determine the minimum multixact freeze age to use: as specified by
961 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
962 * than half effective_multixact_freeze_max_age, so that autovacuums to
963 * prevent MultiXact wraparound won't occur too frequently.
964 */
965 mxid_freezemin = multixact_freeze_min_age;
966 if (mxid_freezemin < 0)
967 mxid_freezemin = vacuum_multixact_freeze_min_age;
968 mxid_freezemin = Min(mxid_freezemin,
969 effective_multixact_freeze_max_age / 2);
970 Assert(mxid_freezemin >= 0);
971
972 /* compute the cutoff multi, being careful to generate a valid value */
973 mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
974 if (mxactLimit < FirstMultiXactId)
975 mxactLimit = FirstMultiXactId;
976
977 safeMxactLimit =
978 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
979 if (safeMxactLimit < FirstMultiXactId)
980 safeMxactLimit = FirstMultiXactId;
981
982 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
983 {
984 ereport(WARNING,
985 (errmsg("oldest multixact is far in the past"),
986 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
987 mxactLimit = safeMxactLimit;
988 }
989
990 *multiXactCutoff = mxactLimit;
991
992 if (xidFullScanLimit != NULL)
993 {
994 int freezetable;
995
996 Assert(mxactFullScanLimit != NULL);
997
998 /*
999 * Determine the table freeze age to use: as specified by the caller,
1000 * or vacuum_freeze_table_age, but in any case not more than
1001 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1002 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1003 * before anti-wraparound autovacuum is launched.
1004 */
1005 freezetable = freeze_table_age;
1006 if (freezetable < 0)
1007 freezetable = vacuum_freeze_table_age;
1008 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1009 Assert(freezetable >= 0);
1010
1011 /*
1012 * Compute XID limit causing a full-table vacuum, being careful not to
1013 * generate a "permanent" XID.
1014 */
1015 limit = ReadNewTransactionId() - freezetable;
1016 if (!TransactionIdIsNormal(limit))
1017 limit = FirstNormalTransactionId;
1018
1019 *xidFullScanLimit = limit;
1020
1021 /*
1022 * Similar to the above, determine the table freeze age to use for
1023 * multixacts: as specified by the caller, or
1024 * vacuum_multixact_freeze_table_age, but in any case not more than
1025 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1026 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1027 * freeze multixacts before anti-wraparound autovacuum is launched.
1028 */
1029 freezetable = multixact_freeze_table_age;
1030 if (freezetable < 0)
1031 freezetable = vacuum_multixact_freeze_table_age;
1032 freezetable = Min(freezetable,
1033 effective_multixact_freeze_max_age * 0.95);
1034 Assert(freezetable >= 0);
1035
1036 /*
1037 * Compute MultiXact limit causing a full-table vacuum, being careful
1038 * to generate a valid MultiXact value.
1039 */
1040 mxactLimit = ReadNextMultiXactId() - freezetable;
1041 if (mxactLimit < FirstMultiXactId)
1042 mxactLimit = FirstMultiXactId;
1043
1044 *mxactFullScanLimit = mxactLimit;
1045 }
1046 else
1047 {
1048 Assert(mxactFullScanLimit == NULL);
1049 }
1050}
1051
1052/*
1053 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1054 *
1055 * If we scanned the whole relation then we should just use the count of
1056 * live tuples seen; but if we did not, we should not blindly extrapolate
1057 * from that number, since VACUUM may have scanned a quite nonrandom
1058 * subset of the table. When we have only partial information, we take
1059 * the old value of pg_class.reltuples as a measurement of the
1060 * tuple density in the unscanned pages.
1061 *
1062 * Note: scanned_tuples should count only *live* tuples, since
1063 * pg_class.reltuples is defined that way.
1064 */
1065double
1066vac_estimate_reltuples(Relation relation,
1067 BlockNumber total_pages,
1068 BlockNumber scanned_pages,
1069 double scanned_tuples)
1070{
1071 BlockNumber old_rel_pages = relation->rd_rel->relpages;
1072 double old_rel_tuples = relation->rd_rel->reltuples;
1073 double old_density;
1074 double unscanned_pages;
1075 double total_tuples;
1076
1077 /* If we did scan the whole table, just use the count as-is */
1078 if (scanned_pages >= total_pages)
1079 return scanned_tuples;
1080
1081 /*
1082 * If scanned_pages is zero but total_pages isn't, keep the existing value
1083 * of reltuples. (Note: callers should avoid updating the pg_class
1084 * statistics in this situation, since no new information has been
1085 * provided.)
1086 */
1087 if (scanned_pages == 0)
1088 return old_rel_tuples;
1089
1090 /*
1091 * If old value of relpages is zero, old density is indeterminate; we
1092 * can't do much except scale up scanned_tuples to match total_pages.
1093 */
1094 if (old_rel_pages == 0)
1095 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1096
1097 /*
1098 * Okay, we've covered the corner cases. The normal calculation is to
1099 * convert the old measurement to a density (tuples per page), then
1100 * estimate the number of tuples in the unscanned pages using that figure,
1101 * and finally add on the number of tuples in the scanned pages.
1102 */
1103 old_density = old_rel_tuples / old_rel_pages;
1104 unscanned_pages = (double) total_pages - (double) scanned_pages;
1105 total_tuples = old_density * unscanned_pages + scanned_tuples;
1106 return floor(total_tuples + 0.5);
1107}
1108
1109
1110/*
1111 * vac_update_relstats() -- update statistics for one relation
1112 *
1113 * Update the whole-relation statistics that are kept in its pg_class
1114 * row. There are additional stats that will be updated if we are
1115 * doing ANALYZE, but we always update these stats. This routine works
1116 * for both index and heap relation entries in pg_class.
1117 *
1118 * We violate transaction semantics here by overwriting the rel's
1119 * existing pg_class tuple with the new values. This is reasonably
1120 * safe as long as we're sure that the new values are correct whether or
1121 * not this transaction commits. The reason for doing this is that if
1122 * we updated these tuples in the usual way, vacuuming pg_class itself
1123 * wouldn't work very well --- by the time we got done with a vacuum
1124 * cycle, most of the tuples in pg_class would've been obsoleted. Of
1125 * course, this only works for fixed-size not-null columns, but these are.
1126 *
1127 * Another reason for doing it this way is that when we are in a lazy
1128 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1129 * Somebody vacuuming pg_class might think they could delete a tuple
1130 * marked with xmin = our xid.
1131 *
1132 * In addition to fundamentally nontransactional statistics such as
1133 * relpages and relallvisible, we try to maintain certain lazily-updated
1134 * DDL flags such as relhasindex, by clearing them if no longer correct.
1135 * It's safe to do this in VACUUM, which can't run in parallel with
1136 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1137 * However, it's *not* safe to do it in an ANALYZE that's within an
1138 * outer transaction, because for example the current transaction might
1139 * have dropped the last index; then we'd think relhasindex should be
1140 * cleared, but if the transaction later rolls back this would be wrong.
1141 * So we refrain from updating the DDL flags if we're inside an outer
1142 * transaction. This is OK since postponing the flag maintenance is
1143 * always allowable.
1144 *
1145 * Note: num_tuples should count only *live* tuples, since
1146 * pg_class.reltuples is defined that way.
1147 *
1148 * This routine is shared by VACUUM and ANALYZE.
1149 */
1150void
1151vac_update_relstats(Relation relation,
1152 BlockNumber num_pages, double num_tuples,
1153 BlockNumber num_all_visible_pages,
1154 bool hasindex, TransactionId frozenxid,
1155 MultiXactId minmulti,
1156 bool in_outer_xact)
1157{
1158 Oid relid = RelationGetRelid(relation);
1159 Relation rd;
1160 HeapTuple ctup;
1161 Form_pg_class pgcform;
1162 bool dirty;
1163
1164 rd = table_open(RelationRelationId, RowExclusiveLock);
1165
1166 /* Fetch a copy of the tuple to scribble on */
1167 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1168 if (!HeapTupleIsValid(ctup))
1169 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1170 relid);
1171 pgcform = (Form_pg_class) GETSTRUCT(ctup);
1172
1173 /* Apply statistical updates, if any, to copied tuple */
1174
1175 dirty = false;
1176 if (pgcform->relpages != (int32) num_pages)
1177 {
1178 pgcform->relpages = (int32) num_pages;
1179 dirty = true;
1180 }
1181 if (pgcform->reltuples != (float4) num_tuples)
1182 {
1183 pgcform->reltuples = (float4) num_tuples;
1184 dirty = true;
1185 }
1186 if (pgcform->relallvisible != (int32) num_all_visible_pages)
1187 {
1188 pgcform->relallvisible = (int32) num_all_visible_pages;
1189 dirty = true;
1190 }
1191
1192 /* Apply DDL updates, but not inside an outer transaction (see above) */
1193
1194 if (!in_outer_xact)
1195 {
1196 /*
1197 * If we didn't find any indexes, reset relhasindex.
1198 */
1199 if (pgcform->relhasindex && !hasindex)
1200 {
1201 pgcform->relhasindex = false;
1202 dirty = true;
1203 }
1204
1205 /* We also clear relhasrules and relhastriggers if needed */
1206 if (pgcform->relhasrules && relation->rd_rules == NULL)
1207 {
1208 pgcform->relhasrules = false;
1209 dirty = true;
1210 }
1211 if (pgcform->relhastriggers && relation->trigdesc == NULL)
1212 {
1213 pgcform->relhastriggers = false;
1214 dirty = true;
1215 }
1216 }
1217
1218 /*
1219 * Update relfrozenxid, unless caller passed InvalidTransactionId
1220 * indicating it has no new data.
1221 *
1222 * Ordinarily, we don't let relfrozenxid go backwards: if things are
1223 * working correctly, the only way the new frozenxid could be older would
1224 * be if a previous VACUUM was done with a tighter freeze_min_age, in
1225 * which case we don't want to forget the work it already did. However,
1226 * if the stored relfrozenxid is "in the future", then it must be corrupt
1227 * and it seems best to overwrite it with the cutoff we used this time.
1228 * This should match vac_update_datfrozenxid() concerning what we consider
1229 * to be "in the future".
1230 */
1231 if (TransactionIdIsNormal(frozenxid) &&
1232 pgcform->relfrozenxid != frozenxid &&
1233 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1234 TransactionIdPrecedes(ReadNewTransactionId(),
1235 pgcform->relfrozenxid)))
1236 {
1237 pgcform->relfrozenxid = frozenxid;
1238 dirty = true;
1239 }
1240
1241 /* Similarly for relminmxid */
1242 if (MultiXactIdIsValid(minmulti) &&
1243 pgcform->relminmxid != minmulti &&
1244 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1245 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1246 {
1247 pgcform->relminmxid = minmulti;
1248 dirty = true;
1249 }
1250
1251 /* If anything changed, write out the tuple. */
1252 if (dirty)
1253 heap_inplace_update(rd, ctup);
1254
1255 table_close(rd, RowExclusiveLock);
1256}
1257
1258
1259/*
1260 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1261 *
1262 * Update pg_database's datfrozenxid entry for our database to be the
1263 * minimum of the pg_class.relfrozenxid values.
1264 *
1265 * Similarly, update our datminmxid to be the minimum of the
1266 * pg_class.relminmxid values.
1267 *
1268 * If we are able to advance either pg_database value, also try to
1269 * truncate pg_xact and pg_multixact.
1270 *
1271 * We violate transaction semantics here by overwriting the database's
1272 * existing pg_database tuple with the new values. This is reasonably
1273 * safe since the new values are correct whether or not this transaction
1274 * commits. As with vac_update_relstats, this avoids leaving dead tuples
1275 * behind after a VACUUM.
1276 */
1277void
1278vac_update_datfrozenxid(void)
1279{
1280 HeapTuple tuple;
1281 Form_pg_database dbform;
1282 Relation relation;
1283 SysScanDesc scan;
1284 HeapTuple classTup;
1285 TransactionId newFrozenXid;
1286 MultiXactId newMinMulti;
1287 TransactionId lastSaneFrozenXid;
1288 MultiXactId lastSaneMinMulti;
1289 bool bogus = false;
1290 bool dirty = false;
1291
1292 /*
1293 * Initialize the "min" calculation with GetOldestXmin, which is a
1294 * reasonable approximation to the minimum relfrozenxid for not-yet-
1295 * committed pg_class entries for new tables; see AddNewRelationTuple().
1296 * So we cannot produce a wrong minimum by starting with this.
1297 */
1298 newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1299
1300 /*
1301 * Similarly, initialize the MultiXact "min" with the value that would be
1302 * used on pg_class for new tables. See AddNewRelationTuple().
1303 */
1304 newMinMulti = GetOldestMultiXactId();
1305
1306 /*
1307 * Identify the latest relfrozenxid and relminmxid values that we could
1308 * validly see during the scan. These are conservative values, but it's
1309 * not really worth trying to be more exact.
1310 */
1311 lastSaneFrozenXid = ReadNewTransactionId();
1312 lastSaneMinMulti = ReadNextMultiXactId();
1313
1314 /*
1315 * We must seqscan pg_class to find the minimum Xid, because there is no
1316 * index that can help us here.
1317 */
1318 relation = table_open(RelationRelationId, AccessShareLock);
1319
1320 scan = systable_beginscan(relation, InvalidOid, false,
1321 NULL, 0, NULL);
1322
1323 while ((classTup = systable_getnext(scan)) != NULL)
1324 {
1325 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1326
1327 /*
1328 * Only consider relations able to hold unfrozen XIDs (anything else
1329 * should have InvalidTransactionId in relfrozenxid anyway).
1330 */
1331 if (classForm->relkind != RELKIND_RELATION &&
1332 classForm->relkind != RELKIND_MATVIEW &&
1333 classForm->relkind != RELKIND_TOASTVALUE)
1334 {
1335 Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1336 Assert(!MultiXactIdIsValid(classForm->relminmxid));
1337 continue;
1338 }
1339
1340 /*
1341 * Some table AMs might not need per-relation xid / multixid horizons.
1342 * It therefore seems reasonable to allow relfrozenxid and relminmxid
1343 * to not be set (i.e. set to their respective Invalid*Id)
1344 * independently. Thus validate and compute horizon for each only if
1345 * set.
1346 *
1347 * If things are working properly, no relation should have a
1348 * relfrozenxid or relminmxid that is "in the future". However, such
1349 * cases have been known to arise due to bugs in pg_upgrade. If we
1350 * see any entries that are "in the future", chicken out and don't do
1351 * anything. This ensures we won't truncate clog & multixact SLRUs
1352 * before those relations have been scanned and cleaned up.
1353 */
1354
1355 if (TransactionIdIsValid(classForm->relfrozenxid))
1356 {
1357 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1358
1359 /* check for values in the future */
1360 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1361 {
1362 bogus = true;
1363 break;
1364 }
1365
1366 /* determine new horizon */
1367 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1368 newFrozenXid = classForm->relfrozenxid;
1369 }
1370
1371 if (MultiXactIdIsValid(classForm->relminmxid))
1372 {
1373 /* check for values in the future */
1374 if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1375 {
1376 bogus = true;
1377 break;
1378 }
1379
1380 /* determine new horizon */
1381 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1382 newMinMulti = classForm->relminmxid;
1383 }
1384 }
1385
1386 /* we're done with pg_class */
1387 systable_endscan(scan);
1388 table_close(relation, AccessShareLock);
1389
1390 /* chicken out if bogus data found */
1391 if (bogus)
1392 return;
1393
1394 Assert(TransactionIdIsNormal(newFrozenXid));
1395 Assert(MultiXactIdIsValid(newMinMulti));
1396
1397 /* Now fetch the pg_database tuple we need to update. */
1398 relation = table_open(DatabaseRelationId, RowExclusiveLock);
1399
1400 /* Fetch a copy of the tuple to scribble on */
1401 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1402 if (!HeapTupleIsValid(tuple))
1403 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1404 dbform = (Form_pg_database) GETSTRUCT(tuple);
1405
1406 /*
1407 * As in vac_update_relstats(), we ordinarily don't want to let
1408 * datfrozenxid go backward; but if it's "in the future" then it must be
1409 * corrupt and it seems best to overwrite it.
1410 */
1411 if (dbform->datfrozenxid != newFrozenXid &&
1412 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1413 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1414 {
1415 dbform->datfrozenxid = newFrozenXid;
1416 dirty = true;
1417 }
1418 else
1419 newFrozenXid = dbform->datfrozenxid;
1420
1421 /* Ditto for datminmxid */
1422 if (dbform->datminmxid != newMinMulti &&
1423 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1424 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1425 {
1426 dbform->datminmxid = newMinMulti;
1427 dirty = true;
1428 }
1429 else
1430 newMinMulti = dbform->datminmxid;
1431
1432 if (dirty)
1433 heap_inplace_update(relation, tuple);
1434
1435 heap_freetuple(tuple);
1436 table_close(relation, RowExclusiveLock);
1437
1438 /*
1439 * If we were able to advance datfrozenxid or datminmxid, see if we can
1440 * truncate pg_xact and/or pg_multixact. Also do it if the shared
1441 * XID-wrap-limit info is stale, since this action will update that too.
1442 */
1443 if (dirty || ForceTransactionIdLimitUpdate())
1444 vac_truncate_clog(newFrozenXid, newMinMulti,
1445 lastSaneFrozenXid, lastSaneMinMulti);
1446}
1447
1448
1449/*
1450 * vac_truncate_clog() -- attempt to truncate the commit log
1451 *
1452 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1453 * and use it to truncate the transaction commit log (pg_xact).
1454 * Also update the XID wrap limit info maintained by varsup.c.
1455 * Likewise for datminmxid.
1456 *
1457 * The passed frozenXID and minMulti are the updated values for my own
1458 * pg_database entry. They're used to initialize the "min" calculations.
1459 * The caller also passes the "last sane" XID and MXID, since it has
1460 * those at hand already.
1461 *
1462 * This routine is only invoked when we've managed to change our
1463 * DB's datfrozenxid/datminmxid values, or we found that the shared
1464 * XID-wrap-limit info is stale.
1465 */
1466static void
1467vac_truncate_clog(TransactionId frozenXID,
1468 MultiXactId minMulti,
1469 TransactionId lastSaneFrozenXid,
1470 MultiXactId lastSaneMinMulti)
1471{
1472 TransactionId nextXID = ReadNewTransactionId();
1473 Relation relation;
1474 TableScanDesc scan;
1475 HeapTuple tuple;
1476 Oid oldestxid_datoid;
1477 Oid minmulti_datoid;
1478 bool bogus = false;
1479 bool frozenAlreadyWrapped = false;
1480
1481 /* init oldest datoids to sync with my frozenXID/minMulti values */
1482 oldestxid_datoid = MyDatabaseId;
1483 minmulti_datoid = MyDatabaseId;
1484
1485 /*
1486 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1487 *
1488 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1489 * the values could change while we look at them. Fetch each one just
1490 * once to ensure sane behavior of the comparison logic. (Here, as in
1491 * many other places, we assume that fetching or updating an XID in shared
1492 * storage is atomic.)
1493 *
1494 * Note: we need not worry about a race condition with new entries being
1495 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1496 * existing DB's datfrozenxid, and that source DB cannot be ours because
1497 * of the interlock against copying a DB containing an active backend.
1498 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1499 * concurrently modify the datfrozenxid's of different databases, the
1500 * worst possible outcome is that pg_xact is not truncated as aggressively
1501 * as it could be.
1502 */
1503 relation = table_open(DatabaseRelationId, AccessShareLock);
1504
1505 scan = table_beginscan_catalog(relation, 0, NULL);
1506
1507 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1508 {
1509 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1510 TransactionId datfrozenxid = dbform->datfrozenxid;
1511 TransactionId datminmxid = dbform->datminmxid;
1512
1513 Assert(TransactionIdIsNormal(datfrozenxid));
1514 Assert(MultiXactIdIsValid(datminmxid));
1515
1516 /*
1517 * If things are working properly, no database should have a
1518 * datfrozenxid or datminmxid that is "in the future". However, such
1519 * cases have been known to arise due to bugs in pg_upgrade. If we
1520 * see any entries that are "in the future", chicken out and don't do
1521 * anything. This ensures we won't truncate clog before those
1522 * databases have been scanned and cleaned up. (We will issue the
1523 * "already wrapped" warning if appropriate, though.)
1524 */
1525 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1526 MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1527 bogus = true;
1528
1529 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1530 frozenAlreadyWrapped = true;
1531 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1532 {
1533 frozenXID = datfrozenxid;
1534 oldestxid_datoid = dbform->oid;
1535 }
1536
1537 if (MultiXactIdPrecedes(datminmxid, minMulti))
1538 {
1539 minMulti = datminmxid;
1540 minmulti_datoid = dbform->oid;
1541 }
1542 }
1543
1544 table_endscan(scan);
1545
1546 table_close(relation, AccessShareLock);
1547
1548 /*
1549 * Do not truncate CLOG if we seem to have suffered wraparound already;
1550 * the computed minimum XID might be bogus. This case should now be
1551 * impossible due to the defenses in GetNewTransactionId, but we keep the
1552 * test anyway.
1553 */
1554 if (frozenAlreadyWrapped)
1555 {
1556 ereport(WARNING,
1557 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1558 errdetail("You might have already suffered transaction-wraparound data loss.")));
1559 return;
1560 }
1561
1562 /* chicken out if data is bogus in any other way */
1563 if (bogus)
1564 return;
1565
1566 /*
1567 * Advance the oldest value for commit timestamps before truncating, so
1568 * that if a user requests a timestamp for a transaction we're truncating
1569 * away right after this point, they get NULL instead of an ugly "file not
1570 * found" error from slru.c. This doesn't matter for xact/multixact
1571 * because they are not subject to arbitrary lookups from users.
1572 */
1573 AdvanceOldestCommitTsXid(frozenXID);
1574
1575 /*
1576 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1577 */
1578 TruncateCLOG(frozenXID, oldestxid_datoid);
1579 TruncateCommitTs(frozenXID);
1580 TruncateMultiXact(minMulti, minmulti_datoid);
1581
1582 /*
1583 * Update the wrap limit for GetNewTransactionId and creation of new
1584 * MultiXactIds. Note: these functions will also signal the postmaster
1585 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1586 * signalling twice?
1587 */
1588 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1589 SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1590}
1591
1592
1593/*
1594 * vacuum_rel() -- vacuum one heap relation
1595 *
1596 * relid identifies the relation to vacuum. If relation is supplied,
1597 * use the name therein for reporting any failure to open/lock the rel;
1598 * do not use it once we've successfully opened the rel, since it might
1599 * be stale.
1600 *
1601 * Returns true if it's okay to proceed with a requested ANALYZE
1602 * operation on this table.
1603 *
1604 * Doing one heap at a time incurs extra overhead, since we need to
1605 * check that the heap exists again just before we vacuum it. The
1606 * reason that we do this is so that vacuuming can be spread across
1607 * many small transactions. Otherwise, two-phase locking would require
1608 * us to lock the entire database during one pass of the vacuum cleaner.
1609 *
1610 * At entry and exit, we are not inside a transaction.
1611 */
1612static bool
1613vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1614{
1615 LOCKMODE lmode;
1616 Relation onerel;
1617 LockRelId onerelid;
1618 Oid toast_relid;
1619 Oid save_userid;
1620 int save_sec_context;
1621 int save_nestlevel;
1622
1623 Assert(params != NULL);
1624
1625 /* Begin a transaction for vacuuming this relation */
1626 StartTransactionCommand();
1627
1628 /*
1629 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1630 * ensures that RecentGlobalXmin is kept truly recent.
1631 */
1632 PushActiveSnapshot(GetTransactionSnapshot());
1633
1634 if (!(params->options & VACOPT_FULL))
1635 {
1636 /*
1637 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1638 * other concurrent VACUUMs know that they can ignore this one while
1639 * determining their OldestXmin. (The reason we don't set it during a
1640 * full VACUUM is exactly that we may have to run user-defined
1641 * functions for functional indexes, and we want to make sure that if
1642 * they use the snapshot set above, any tuples it requires can't get
1643 * removed from other tables. An index function that depends on the
1644 * contents of other tables is arguably broken, but we won't break it
1645 * here by violating transaction semantics.)
1646 *
1647 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1648 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1649 * in an emergency.
1650 *
1651 * Note: these flags remain set until CommitTransaction or
1652 * AbortTransaction. We don't want to clear them until we reset
1653 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1654 * which is probably Not Good.
1655 */
1656 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1657 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1658 if (params->is_wraparound)
1659 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1660 LWLockRelease(ProcArrayLock);
1661 }
1662
1663 /*
1664 * Check for user-requested abort. Note we want this to be inside a
1665 * transaction, so xact.c doesn't issue useless WARNING.
1666 */
1667 CHECK_FOR_INTERRUPTS();
1668
1669 /*
1670 * Determine the type of lock we want --- hard exclusive lock for a FULL
1671 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1672 * way, we can be sure that no other backend is vacuuming the same table.
1673 */
1674 lmode = (params->options & VACOPT_FULL) ?
1675 AccessExclusiveLock : ShareUpdateExclusiveLock;
1676
1677 /* open the relation and get the appropriate lock on it */
1678 onerel = vacuum_open_relation(relid, relation, params->options,
1679 params->log_min_duration >= 0, lmode);
1680
1681 /* leave if relation could not be opened or locked */
1682 if (!onerel)
1683 {
1684 PopActiveSnapshot();
1685 CommitTransactionCommand();
1686 return false;
1687 }
1688
1689 /*
1690 * Check if relation needs to be skipped based on ownership. This check
1691 * happens also when building the relation list to vacuum for a manual
1692 * operation, and needs to be done additionally here as VACUUM could
1693 * happen across multiple transactions where relation ownership could have
1694 * changed in-between. Make sure to only generate logs for VACUUM in this
1695 * case.
1696 */
1697 if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
1698 onerel->rd_rel,
1699 params->options & VACOPT_VACUUM))
1700 {
1701 relation_close(onerel, lmode);
1702 PopActiveSnapshot();
1703 CommitTransactionCommand();
1704 return false;
1705 }
1706
1707 /*
1708 * Check that it's of a vacuumable relkind.
1709 */
1710 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1711 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1712 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1713 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1714 {
1715 ereport(WARNING,
1716 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1717 RelationGetRelationName(onerel))));
1718 relation_close(onerel, lmode);
1719 PopActiveSnapshot();
1720 CommitTransactionCommand();
1721 return false;
1722 }
1723
1724 /*
1725 * Silently ignore tables that are temp tables of other backends ---
1726 * trying to vacuum these will lead to great unhappiness, since their
1727 * contents are probably not up-to-date on disk. (We don't throw a
1728 * warning here; it would just lead to chatter during a database-wide
1729 * VACUUM.)
1730 */
1731 if (RELATION_IS_OTHER_TEMP(onerel))
1732 {
1733 relation_close(onerel, lmode);
1734 PopActiveSnapshot();
1735 CommitTransactionCommand();
1736 return false;
1737 }
1738
1739 /*
1740 * Silently ignore partitioned tables as there is no work to be done. The
1741 * useful work is on their child partitions, which have been queued up for
1742 * us separately.
1743 */
1744 if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1745 {
1746 relation_close(onerel, lmode);
1747 PopActiveSnapshot();
1748 CommitTransactionCommand();
1749 /* It's OK to proceed with ANALYZE on this table */
1750 return true;
1751 }
1752
1753 /*
1754 * Get a session-level lock too. This will protect our access to the
1755 * relation across multiple transactions, so that we can vacuum the
1756 * relation's TOAST table (if any) secure in the knowledge that no one is
1757 * deleting the parent relation.
1758 *
1759 * NOTE: this cannot block, even if someone else is waiting for access,
1760 * because the lock manager knows that both lock requests are from the
1761 * same process.
1762 */
1763 onerelid = onerel->rd_lockInfo.lockRelId;
1764 LockRelationIdForSession(&onerelid, lmode);
1765
1766 /* Set index cleanup option based on reloptions if not yet */
1767 if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1768 {
1769 if (onerel->rd_options == NULL ||
1770 ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
1771 params->index_cleanup = VACOPT_TERNARY_ENABLED;
1772 else
1773 params->index_cleanup = VACOPT_TERNARY_DISABLED;
1774 }
1775
1776 /* Set truncate option based on reloptions if not yet */
1777 if (params->truncate == VACOPT_TERNARY_DEFAULT)
1778 {
1779 if (onerel->rd_options == NULL ||
1780 ((StdRdOptions *) onerel->rd_options)->vacuum_truncate)
1781 params->truncate = VACOPT_TERNARY_ENABLED;
1782 else
1783 params->truncate = VACOPT_TERNARY_DISABLED;
1784 }
1785
1786 /*
1787 * Remember the relation's TOAST relation for later, if the caller asked
1788 * us to process it. In VACUUM FULL, though, the toast table is
1789 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1790 */
1791 if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1792 toast_relid = onerel->rd_rel->reltoastrelid;
1793 else
1794 toast_relid = InvalidOid;
1795
1796 /*
1797 * Switch to the table owner's userid, so that any index functions are run
1798 * as that user. Also lock down security-restricted operations and
1799 * arrange to make GUC variable changes local to this command. (This is
1800 * unnecessary, but harmless, for lazy VACUUM.)
1801 */
1802 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1803 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1804 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1805 save_nestlevel = NewGUCNestLevel();
1806
1807 /*
1808 * Do the actual work --- either FULL or "lazy" vacuum
1809 */
1810 if (params->options & VACOPT_FULL)
1811 {
1812 int cluster_options = 0;
1813
1814 /* close relation before vacuuming, but hold lock until commit */
1815 relation_close(onerel, NoLock);
1816 onerel = NULL;
1817
1818 if ((params->options & VACOPT_VERBOSE) != 0)
1819 cluster_options |= CLUOPT_VERBOSE;
1820
1821 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1822 cluster_rel(relid, InvalidOid, cluster_options);
1823 }
1824 else
1825 table_relation_vacuum(onerel, params, vac_strategy);
1826
1827 /* Roll back any GUC changes executed by index functions */
1828 AtEOXact_GUC(false, save_nestlevel);
1829
1830 /* Restore userid and security context */
1831 SetUserIdAndSecContext(save_userid, save_sec_context);
1832
1833 /* all done with this class, but hold lock until commit */
1834 if (onerel)
1835 relation_close(onerel, NoLock);
1836
1837 /*
1838 * Complete the transaction and free all temporary memory used.
1839 */
1840 PopActiveSnapshot();
1841 CommitTransactionCommand();
1842
1843 /*
1844 * If the relation has a secondary toast rel, vacuum that too while we
1845 * still hold the session lock on the master table. Note however that
1846 * "analyze" will not get done on the toast table. This is good, because
1847 * the toaster always uses hardcoded index access and statistics are
1848 * totally unimportant for toast relations.
1849 */
1850 if (toast_relid != InvalidOid)
1851 vacuum_rel(toast_relid, NULL, params);
1852
1853 /*
1854 * Now release the session-level lock on the master table.
1855 */
1856 UnlockRelationIdForSession(&onerelid, lmode);
1857
1858 /* Report that we really did it. */
1859 return true;
1860}
1861
1862
1863/*
1864 * Open all the vacuumable indexes of the given relation, obtaining the
1865 * specified kind of lock on each. Return an array of Relation pointers for
1866 * the indexes into *Irel, and the number of indexes into *nindexes.
1867 *
1868 * We consider an index vacuumable if it is marked insertable (indisready).
1869 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1870 * execution, and what we have is too corrupt to be processable. We will
1871 * vacuum even if the index isn't indisvalid; this is important because in a
1872 * unique index, uniqueness checks will be performed anyway and had better not
1873 * hit dangling index pointers.
1874 */
1875void
1876vac_open_indexes(Relation relation, LOCKMODE lockmode,
1877 int *nindexes, Relation **Irel)
1878{
1879 List *indexoidlist;
1880 ListCell *indexoidscan;
1881 int i;
1882
1883 Assert(lockmode != NoLock);
1884
1885 indexoidlist = RelationGetIndexList(relation);
1886
1887 /* allocate enough memory for all indexes */
1888 i = list_length(indexoidlist);
1889
1890 if (i > 0)
1891 *Irel = (Relation *) palloc(i * sizeof(Relation));
1892 else
1893 *Irel = NULL;
1894
1895 /* collect just the ready indexes */
1896 i = 0;
1897 foreach(indexoidscan, indexoidlist)
1898 {
1899 Oid indexoid = lfirst_oid(indexoidscan);
1900 Relation indrel;
1901
1902 indrel = index_open(indexoid, lockmode);
1903 if (indrel->rd_index->indisready)
1904 (*Irel)[i++] = indrel;
1905 else
1906 index_close(indrel, lockmode);
1907 }
1908
1909 *nindexes = i;
1910
1911 list_free(indexoidlist);
1912}
1913
1914/*
1915 * Release the resources acquired by vac_open_indexes. Optionally release
1916 * the locks (say NoLock to keep 'em).
1917 */
1918void
1919vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1920{
1921 if (Irel == NULL)
1922 return;
1923
1924 while (nindexes--)
1925 {
1926 Relation ind = Irel[nindexes];
1927
1928 index_close(ind, lockmode);
1929 }
1930 pfree(Irel);
1931}
1932
1933/*
1934 * vacuum_delay_point --- check for interrupts and cost-based delay.
1935 *
1936 * This should be called in each major loop of VACUUM processing,
1937 * typically once per page processed.
1938 */
1939void
1940vacuum_delay_point(void)
1941{
1942 /* Always check for interrupts */
1943 CHECK_FOR_INTERRUPTS();
1944
1945 /* Nap if appropriate */
1946 if (VacuumCostActive && !InterruptPending &&
1947 VacuumCostBalance >= VacuumCostLimit)
1948 {
1949 double msec;
1950
1951 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1952 if (msec > VacuumCostDelay * 4)
1953 msec = VacuumCostDelay * 4;
1954
1955 pg_usleep((long) (msec * 1000));
1956
1957 VacuumCostBalance = 0;
1958
1959 /* update balance values for workers */
1960 AutoVacuumUpdateDelay();
1961
1962 /* Might have gotten an interrupt while sleeping */
1963 CHECK_FOR_INTERRUPTS();
1964 }
1965}
1966
1967/*
1968 * A wrapper function of defGetBoolean().
1969 *
1970 * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
1971 * instead of true and false.
1972 */
1973static VacOptTernaryValue
1974get_vacopt_ternary_value(DefElem *def)
1975{
1976 return defGetBoolean(def) ? VACOPT_TERNARY_ENABLED : VACOPT_TERNARY_DISABLED;
1977}
1978