| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * toasting.c |
| 4 | * This file contains routines to support creation of toast tables |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/catalog/toasting.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "access/heapam.h" |
| 18 | #include "access/xact.h" |
| 19 | #include "catalog/binary_upgrade.h" |
| 20 | #include "catalog/catalog.h" |
| 21 | #include "catalog/dependency.h" |
| 22 | #include "catalog/heap.h" |
| 23 | #include "catalog/index.h" |
| 24 | #include "catalog/namespace.h" |
| 25 | #include "catalog/pg_am.h" |
| 26 | #include "catalog/pg_namespace.h" |
| 27 | #include "catalog/pg_opclass.h" |
| 28 | #include "catalog/pg_type.h" |
| 29 | #include "catalog/toasting.h" |
| 30 | #include "miscadmin.h" |
| 31 | #include "nodes/makefuncs.h" |
| 32 | #include "storage/lock.h" |
| 33 | #include "utils/builtins.h" |
| 34 | #include "utils/rel.h" |
| 35 | #include "utils/syscache.h" |
| 36 | |
| 37 | /* Potentially set by pg_upgrade_support functions */ |
| 38 | Oid binary_upgrade_next_toast_pg_type_oid = InvalidOid; |
| 39 | |
| 40 | static void CheckAndCreateToastTable(Oid relOid, Datum reloptions, |
| 41 | LOCKMODE lockmode, bool check); |
| 42 | static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, |
| 43 | Datum reloptions, LOCKMODE lockmode, bool check); |
| 44 | static bool needs_toast_table(Relation rel); |
| 45 | |
| 46 | |
| 47 | /* |
| 48 | * CreateToastTable variants |
| 49 | * If the table needs a toast table, and doesn't already have one, |
| 50 | * then create a toast table for it. |
| 51 | * |
| 52 | * reloptions for the toast table can be passed, too. Pass (Datum) 0 |
| 53 | * for default reloptions. |
| 54 | * |
| 55 | * We expect the caller to have verified that the relation is a table and have |
| 56 | * already done any necessary permission checks. Callers expect this function |
| 57 | * to end with CommandCounterIncrement if it makes any changes. |
| 58 | */ |
| 59 | void |
| 60 | AlterTableCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode) |
| 61 | { |
| 62 | CheckAndCreateToastTable(relOid, reloptions, lockmode, true); |
| 63 | } |
| 64 | |
| 65 | void |
| 66 | NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode) |
| 67 | { |
| 68 | CheckAndCreateToastTable(relOid, reloptions, lockmode, false); |
| 69 | } |
| 70 | |
| 71 | void |
| 72 | NewRelationCreateToastTable(Oid relOid, Datum reloptions) |
| 73 | { |
| 74 | CheckAndCreateToastTable(relOid, reloptions, AccessExclusiveLock, false); |
| 75 | } |
| 76 | |
| 77 | static void |
| 78 | CheckAndCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode, bool check) |
| 79 | { |
| 80 | Relation rel; |
| 81 | |
| 82 | rel = table_open(relOid, lockmode); |
| 83 | |
| 84 | /* create_toast_table does all the work */ |
| 85 | (void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions, lockmode, check); |
| 86 | |
| 87 | table_close(rel, NoLock); |
| 88 | } |
| 89 | |
| 90 | /* |
| 91 | * Create a toast table during bootstrap |
| 92 | * |
| 93 | * Here we need to prespecify the OIDs of the toast table and its index |
| 94 | */ |
| 95 | void |
| 96 | BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid) |
| 97 | { |
| 98 | Relation rel; |
| 99 | |
| 100 | rel = table_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock); |
| 101 | |
| 102 | if (rel->rd_rel->relkind != RELKIND_RELATION && |
| 103 | rel->rd_rel->relkind != RELKIND_MATVIEW) |
| 104 | ereport(ERROR, |
| 105 | (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| 106 | errmsg("\"%s\" is not a table or materialized view" , |
| 107 | relName))); |
| 108 | |
| 109 | /* create_toast_table does all the work */ |
| 110 | if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0, |
| 111 | AccessExclusiveLock, false)) |
| 112 | elog(ERROR, "\"%s\" does not require a toast table" , |
| 113 | relName); |
| 114 | |
| 115 | table_close(rel, NoLock); |
| 116 | } |
| 117 | |
| 118 | |
| 119 | /* |
| 120 | * create_toast_table --- internal workhorse |
| 121 | * |
| 122 | * rel is already opened and locked |
| 123 | * toastOid and toastIndexOid are normally InvalidOid, but during |
| 124 | * bootstrap they can be nonzero to specify hand-assigned OIDs |
| 125 | */ |
| 126 | static bool |
| 127 | create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, |
| 128 | Datum reloptions, LOCKMODE lockmode, bool check) |
| 129 | { |
| 130 | Oid relOid = RelationGetRelid(rel); |
| 131 | HeapTuple reltup; |
| 132 | TupleDesc tupdesc; |
| 133 | bool shared_relation; |
| 134 | bool mapped_relation; |
| 135 | Relation toast_rel; |
| 136 | Relation class_rel; |
| 137 | Oid toast_relid; |
| 138 | Oid toast_typid = InvalidOid; |
| 139 | Oid namespaceid; |
| 140 | char toast_relname[NAMEDATALEN]; |
| 141 | char toast_idxname[NAMEDATALEN]; |
| 142 | IndexInfo *indexInfo; |
| 143 | Oid collationObjectId[2]; |
| 144 | Oid classObjectId[2]; |
| 145 | int16 coloptions[2]; |
| 146 | ObjectAddress baseobject, |
| 147 | toastobject; |
| 148 | |
| 149 | /* |
| 150 | * Is it already toasted? |
| 151 | */ |
| 152 | if (rel->rd_rel->reltoastrelid != InvalidOid) |
| 153 | return false; |
| 154 | |
| 155 | /* |
| 156 | * Check to see whether the table actually needs a TOAST table. |
| 157 | */ |
| 158 | if (!IsBinaryUpgrade) |
| 159 | { |
| 160 | /* Normal mode, normal check */ |
| 161 | if (!needs_toast_table(rel)) |
| 162 | return false; |
| 163 | } |
| 164 | else |
| 165 | { |
| 166 | /* |
| 167 | * In binary-upgrade mode, create a TOAST table if and only if |
| 168 | * pg_upgrade told us to (ie, a TOAST table OID has been provided). |
| 169 | * |
| 170 | * This indicates that the old cluster had a TOAST table for the |
| 171 | * current table. We must create a TOAST table to receive the old |
| 172 | * TOAST file, even if the table seems not to need one. |
| 173 | * |
| 174 | * Contrariwise, if the old cluster did not have a TOAST table, we |
| 175 | * should be able to get along without one even if the new version's |
| 176 | * needs_toast_table rules suggest we should have one. There is a lot |
| 177 | * of daylight between where we will create a TOAST table and where |
| 178 | * one is really necessary to avoid failures, so small cross-version |
| 179 | * differences in the when-to-create heuristic shouldn't be a problem. |
| 180 | * If we tried to create a TOAST table anyway, we would have the |
| 181 | * problem that it might take up an OID that will conflict with some |
| 182 | * old-cluster table we haven't seen yet. |
| 183 | */ |
| 184 | if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) || |
| 185 | !OidIsValid(binary_upgrade_next_toast_pg_type_oid)) |
| 186 | return false; |
| 187 | } |
| 188 | |
| 189 | /* |
| 190 | * If requested check lockmode is sufficient. This is a cross check in |
| 191 | * case of errors or conflicting decisions in earlier code. |
| 192 | */ |
| 193 | if (check && lockmode != AccessExclusiveLock) |
| 194 | elog(ERROR, "AccessExclusiveLock required to add toast table." ); |
| 195 | |
| 196 | /* |
| 197 | * Create the toast table and its index |
| 198 | */ |
| 199 | snprintf(toast_relname, sizeof(toast_relname), |
| 200 | "pg_toast_%u" , relOid); |
| 201 | snprintf(toast_idxname, sizeof(toast_idxname), |
| 202 | "pg_toast_%u_index" , relOid); |
| 203 | |
| 204 | /* this is pretty painful... need a tuple descriptor */ |
| 205 | tupdesc = CreateTemplateTupleDesc(3); |
| 206 | TupleDescInitEntry(tupdesc, (AttrNumber) 1, |
| 207 | "chunk_id" , |
| 208 | OIDOID, |
| 209 | -1, 0); |
| 210 | TupleDescInitEntry(tupdesc, (AttrNumber) 2, |
| 211 | "chunk_seq" , |
| 212 | INT4OID, |
| 213 | -1, 0); |
| 214 | TupleDescInitEntry(tupdesc, (AttrNumber) 3, |
| 215 | "chunk_data" , |
| 216 | BYTEAOID, |
| 217 | -1, 0); |
| 218 | |
| 219 | /* |
| 220 | * Ensure that the toast table doesn't itself get toasted, or we'll be |
| 221 | * toast :-(. This is essential for chunk_data because type bytea is |
| 222 | * toastable; hit the other two just to be sure. |
| 223 | */ |
| 224 | TupleDescAttr(tupdesc, 0)->attstorage = 'p'; |
| 225 | TupleDescAttr(tupdesc, 1)->attstorage = 'p'; |
| 226 | TupleDescAttr(tupdesc, 2)->attstorage = 'p'; |
| 227 | |
| 228 | /* |
| 229 | * Toast tables for regular relations go in pg_toast; those for temp |
| 230 | * relations go into the per-backend temp-toast-table namespace. |
| 231 | */ |
| 232 | if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace)) |
| 233 | namespaceid = GetTempToastNamespace(); |
| 234 | else |
| 235 | namespaceid = PG_TOAST_NAMESPACE; |
| 236 | |
| 237 | /* |
| 238 | * Use binary-upgrade override for pg_type.oid, if supplied. We might be |
| 239 | * in the post-schema-restore phase where we are doing ALTER TABLE to |
| 240 | * create TOAST tables that didn't exist in the old cluster. |
| 241 | */ |
| 242 | if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid)) |
| 243 | { |
| 244 | toast_typid = binary_upgrade_next_toast_pg_type_oid; |
| 245 | binary_upgrade_next_toast_pg_type_oid = InvalidOid; |
| 246 | } |
| 247 | |
| 248 | /* Toast table is shared if and only if its parent is. */ |
| 249 | shared_relation = rel->rd_rel->relisshared; |
| 250 | |
| 251 | /* It's mapped if and only if its parent is, too */ |
| 252 | mapped_relation = RelationIsMapped(rel); |
| 253 | |
| 254 | toast_relid = heap_create_with_catalog(toast_relname, |
| 255 | namespaceid, |
| 256 | rel->rd_rel->reltablespace, |
| 257 | toastOid, |
| 258 | toast_typid, |
| 259 | InvalidOid, |
| 260 | rel->rd_rel->relowner, |
| 261 | rel->rd_rel->relam, |
| 262 | tupdesc, |
| 263 | NIL, |
| 264 | RELKIND_TOASTVALUE, |
| 265 | rel->rd_rel->relpersistence, |
| 266 | shared_relation, |
| 267 | mapped_relation, |
| 268 | ONCOMMIT_NOOP, |
| 269 | reloptions, |
| 270 | false, |
| 271 | true, |
| 272 | true, |
| 273 | InvalidOid, |
| 274 | NULL); |
| 275 | Assert(toast_relid != InvalidOid); |
| 276 | |
| 277 | /* make the toast relation visible, else table_open will fail */ |
| 278 | CommandCounterIncrement(); |
| 279 | |
| 280 | /* ShareLock is not really needed here, but take it anyway */ |
| 281 | toast_rel = table_open(toast_relid, ShareLock); |
| 282 | |
| 283 | /* |
| 284 | * Create unique index on chunk_id, chunk_seq. |
| 285 | * |
| 286 | * NOTE: the normal TOAST access routines could actually function with a |
| 287 | * single-column index on chunk_id only. However, the slice access |
| 288 | * routines use both columns for faster access to an individual chunk. In |
| 289 | * addition, we want it to be unique as a check against the possibility of |
| 290 | * duplicate TOAST chunk OIDs. The index might also be a little more |
| 291 | * efficient this way, since btree isn't all that happy with large numbers |
| 292 | * of equal keys. |
| 293 | */ |
| 294 | |
| 295 | indexInfo = makeNode(IndexInfo); |
| 296 | indexInfo->ii_NumIndexAttrs = 2; |
| 297 | indexInfo->ii_NumIndexKeyAttrs = 2; |
| 298 | indexInfo->ii_IndexAttrNumbers[0] = 1; |
| 299 | indexInfo->ii_IndexAttrNumbers[1] = 2; |
| 300 | indexInfo->ii_Expressions = NIL; |
| 301 | indexInfo->ii_ExpressionsState = NIL; |
| 302 | indexInfo->ii_Predicate = NIL; |
| 303 | indexInfo->ii_PredicateState = NULL; |
| 304 | indexInfo->ii_ExclusionOps = NULL; |
| 305 | indexInfo->ii_ExclusionProcs = NULL; |
| 306 | indexInfo->ii_ExclusionStrats = NULL; |
| 307 | indexInfo->ii_Unique = true; |
| 308 | indexInfo->ii_ReadyForInserts = true; |
| 309 | indexInfo->ii_Concurrent = false; |
| 310 | indexInfo->ii_BrokenHotChain = false; |
| 311 | indexInfo->ii_ParallelWorkers = 0; |
| 312 | indexInfo->ii_Am = BTREE_AM_OID; |
| 313 | indexInfo->ii_AmCache = NULL; |
| 314 | indexInfo->ii_Context = CurrentMemoryContext; |
| 315 | |
| 316 | collationObjectId[0] = InvalidOid; |
| 317 | collationObjectId[1] = InvalidOid; |
| 318 | |
| 319 | classObjectId[0] = OID_BTREE_OPS_OID; |
| 320 | classObjectId[1] = INT4_BTREE_OPS_OID; |
| 321 | |
| 322 | coloptions[0] = 0; |
| 323 | coloptions[1] = 0; |
| 324 | |
| 325 | index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid, |
| 326 | InvalidOid, InvalidOid, |
| 327 | indexInfo, |
| 328 | list_make2("chunk_id" , "chunk_seq" ), |
| 329 | BTREE_AM_OID, |
| 330 | rel->rd_rel->reltablespace, |
| 331 | collationObjectId, classObjectId, coloptions, (Datum) 0, |
| 332 | INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL); |
| 333 | |
| 334 | table_close(toast_rel, NoLock); |
| 335 | |
| 336 | /* |
| 337 | * Store the toast table's OID in the parent relation's pg_class row |
| 338 | */ |
| 339 | class_rel = table_open(RelationRelationId, RowExclusiveLock); |
| 340 | |
| 341 | reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid)); |
| 342 | if (!HeapTupleIsValid(reltup)) |
| 343 | elog(ERROR, "cache lookup failed for relation %u" , relOid); |
| 344 | |
| 345 | ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid; |
| 346 | |
| 347 | if (!IsBootstrapProcessingMode()) |
| 348 | { |
| 349 | /* normal case, use a transactional update */ |
| 350 | CatalogTupleUpdate(class_rel, &reltup->t_self, reltup); |
| 351 | } |
| 352 | else |
| 353 | { |
| 354 | /* While bootstrapping, we cannot UPDATE, so overwrite in-place */ |
| 355 | heap_inplace_update(class_rel, reltup); |
| 356 | } |
| 357 | |
| 358 | heap_freetuple(reltup); |
| 359 | |
| 360 | table_close(class_rel, RowExclusiveLock); |
| 361 | |
| 362 | /* |
| 363 | * Register dependency from the toast table to the master, so that the |
| 364 | * toast table will be deleted if the master is. Skip this in bootstrap |
| 365 | * mode. |
| 366 | */ |
| 367 | if (!IsBootstrapProcessingMode()) |
| 368 | { |
| 369 | baseobject.classId = RelationRelationId; |
| 370 | baseobject.objectId = relOid; |
| 371 | baseobject.objectSubId = 0; |
| 372 | toastobject.classId = RelationRelationId; |
| 373 | toastobject.objectId = toast_relid; |
| 374 | toastobject.objectSubId = 0; |
| 375 | |
| 376 | recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); |
| 377 | } |
| 378 | |
| 379 | /* |
| 380 | * Make changes visible |
| 381 | */ |
| 382 | CommandCounterIncrement(); |
| 383 | |
| 384 | return true; |
| 385 | } |
| 386 | |
| 387 | /* |
| 388 | * Check to see whether the table needs a TOAST table. |
| 389 | */ |
| 390 | static bool |
| 391 | needs_toast_table(Relation rel) |
| 392 | { |
| 393 | /* |
| 394 | * No need to create a TOAST table for partitioned tables. |
| 395 | */ |
| 396 | if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
| 397 | return false; |
| 398 | |
| 399 | /* |
| 400 | * We cannot allow toasting a shared relation after initdb (because |
| 401 | * there's no way to mark it toasted in other databases' pg_class). |
| 402 | */ |
| 403 | if (rel->rd_rel->relisshared && !IsBootstrapProcessingMode()) |
| 404 | return false; |
| 405 | |
| 406 | /* |
| 407 | * Ignore attempts to create toast tables on catalog tables after initdb. |
| 408 | * Which catalogs get toast tables is explicitly chosen in |
| 409 | * catalog/toasting.h. (We could get here via some ALTER TABLE command if |
| 410 | * the catalog doesn't have a toast table.) |
| 411 | */ |
| 412 | if (IsCatalogRelation(rel) && !IsBootstrapProcessingMode()) |
| 413 | return false; |
| 414 | |
| 415 | /* Otherwise, let the AM decide. */ |
| 416 | return table_relation_needs_toast_table(rel); |
| 417 | } |
| 418 | |