1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * reinit.c |
4 | * Reinitialization of unlogged relations |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/storage/file/reinit.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include <unistd.h> |
18 | |
19 | #include "common/relpath.h" |
20 | #include "storage/copydir.h" |
21 | #include "storage/fd.h" |
22 | #include "storage/reinit.h" |
23 | #include "utils/hsearch.h" |
24 | #include "utils/memutils.h" |
25 | |
26 | static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, |
27 | int op); |
28 | static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, |
29 | int op); |
30 | |
31 | typedef struct |
32 | { |
33 | char oid[OIDCHARS + 1]; |
34 | } unlogged_relation_entry; |
35 | |
36 | /* |
37 | * Reset unlogged relations from before the last restart. |
38 | * |
39 | * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any |
40 | * relation with an "init" fork, except for the "init" fork itself. |
41 | * |
42 | * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main |
43 | * fork. |
44 | */ |
45 | void |
46 | ResetUnloggedRelations(int op) |
47 | { |
48 | char temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)]; |
49 | DIR *spc_dir; |
50 | struct dirent *spc_de; |
51 | MemoryContext tmpctx, |
52 | oldctx; |
53 | |
54 | /* Log it. */ |
55 | elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d" , |
56 | (op & UNLOGGED_RELATION_CLEANUP) != 0, |
57 | (op & UNLOGGED_RELATION_INIT) != 0); |
58 | |
59 | /* |
60 | * Just to be sure we don't leak any memory, let's create a temporary |
61 | * memory context for this operation. |
62 | */ |
63 | tmpctx = AllocSetContextCreate(CurrentMemoryContext, |
64 | "ResetUnloggedRelations" , |
65 | ALLOCSET_DEFAULT_SIZES); |
66 | oldctx = MemoryContextSwitchTo(tmpctx); |
67 | |
68 | /* |
69 | * First process unlogged files in pg_default ($PGDATA/base) |
70 | */ |
71 | ResetUnloggedRelationsInTablespaceDir("base" , op); |
72 | |
73 | /* |
74 | * Cycle through directories for all non-default tablespaces. |
75 | */ |
76 | spc_dir = AllocateDir("pg_tblspc" ); |
77 | |
78 | while ((spc_de = ReadDir(spc_dir, "pg_tblspc" )) != NULL) |
79 | { |
80 | if (strcmp(spc_de->d_name, "." ) == 0 || |
81 | strcmp(spc_de->d_name, ".." ) == 0) |
82 | continue; |
83 | |
84 | snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s" , |
85 | spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); |
86 | ResetUnloggedRelationsInTablespaceDir(temp_path, op); |
87 | } |
88 | |
89 | FreeDir(spc_dir); |
90 | |
91 | /* |
92 | * Restore memory context. |
93 | */ |
94 | MemoryContextSwitchTo(oldctx); |
95 | MemoryContextDelete(tmpctx); |
96 | } |
97 | |
98 | /* |
99 | * Process one tablespace directory for ResetUnloggedRelations |
100 | */ |
101 | static void |
102 | ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) |
103 | { |
104 | DIR *ts_dir; |
105 | struct dirent *de; |
106 | char dbspace_path[MAXPGPATH * 2]; |
107 | |
108 | ts_dir = AllocateDir(tsdirname); |
109 | |
110 | /* |
111 | * If we get ENOENT on a tablespace directory, log it and return. This |
112 | * can happen if a previous DROP TABLESPACE crashed between removing the |
113 | * tablespace directory and removing the symlink in pg_tblspc. We don't |
114 | * really want to prevent database startup in that scenario, so let it |
115 | * pass instead. Any other type of error will be reported by ReadDir |
116 | * (causing a startup failure). |
117 | */ |
118 | if (ts_dir == NULL && errno == ENOENT) |
119 | { |
120 | ereport(LOG, |
121 | (errcode_for_file_access(), |
122 | errmsg("could not open directory \"%s\": %m" , |
123 | tsdirname))); |
124 | return; |
125 | } |
126 | |
127 | while ((de = ReadDir(ts_dir, tsdirname)) != NULL) |
128 | { |
129 | /* |
130 | * We're only interested in the per-database directories, which have |
131 | * numeric names. Note that this code will also (properly) ignore "." |
132 | * and "..". |
133 | */ |
134 | if (strspn(de->d_name, "0123456789" ) != strlen(de->d_name)) |
135 | continue; |
136 | |
137 | snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s" , |
138 | tsdirname, de->d_name); |
139 | ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); |
140 | } |
141 | |
142 | FreeDir(ts_dir); |
143 | } |
144 | |
145 | /* |
146 | * Process one per-dbspace directory for ResetUnloggedRelations |
147 | */ |
148 | static void |
149 | ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) |
150 | { |
151 | DIR *dbspace_dir; |
152 | struct dirent *de; |
153 | char rm_path[MAXPGPATH * 2]; |
154 | |
155 | /* Caller must specify at least one operation. */ |
156 | Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0); |
157 | |
158 | /* |
159 | * Cleanup is a two-pass operation. First, we go through and identify all |
160 | * the files with init forks. Then, we go through again and nuke |
161 | * everything with the same OID except the init fork. |
162 | */ |
163 | if ((op & UNLOGGED_RELATION_CLEANUP) != 0) |
164 | { |
165 | HTAB *hash; |
166 | HASHCTL ctl; |
167 | |
168 | /* |
169 | * It's possible that someone could create a ton of unlogged relations |
170 | * in the same database & tablespace, so we'd better use a hash table |
171 | * rather than an array or linked list to keep track of which files |
172 | * need to be reset. Otherwise, this cleanup operation would be |
173 | * O(n^2). |
174 | */ |
175 | memset(&ctl, 0, sizeof(ctl)); |
176 | ctl.keysize = sizeof(unlogged_relation_entry); |
177 | ctl.entrysize = sizeof(unlogged_relation_entry); |
178 | hash = hash_create("unlogged hash" , 32, &ctl, HASH_ELEM); |
179 | |
180 | /* Scan the directory. */ |
181 | dbspace_dir = AllocateDir(dbspacedirname); |
182 | while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) |
183 | { |
184 | ForkNumber forkNum; |
185 | int oidchars; |
186 | unlogged_relation_entry ent; |
187 | |
188 | /* Skip anything that doesn't look like a relation data file. */ |
189 | if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, |
190 | &forkNum)) |
191 | continue; |
192 | |
193 | /* Also skip it unless this is the init fork. */ |
194 | if (forkNum != INIT_FORKNUM) |
195 | continue; |
196 | |
197 | /* |
198 | * Put the OID portion of the name into the hash table, if it |
199 | * isn't already. |
200 | */ |
201 | memset(ent.oid, 0, sizeof(ent.oid)); |
202 | memcpy(ent.oid, de->d_name, oidchars); |
203 | hash_search(hash, &ent, HASH_ENTER, NULL); |
204 | } |
205 | |
206 | /* Done with the first pass. */ |
207 | FreeDir(dbspace_dir); |
208 | |
209 | /* |
210 | * If we didn't find any init forks, there's no point in continuing; |
211 | * we can bail out now. |
212 | */ |
213 | if (hash_get_num_entries(hash) == 0) |
214 | { |
215 | hash_destroy(hash); |
216 | return; |
217 | } |
218 | |
219 | /* |
220 | * Now, make a second pass and remove anything that matches. |
221 | */ |
222 | dbspace_dir = AllocateDir(dbspacedirname); |
223 | while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) |
224 | { |
225 | ForkNumber forkNum; |
226 | int oidchars; |
227 | bool found; |
228 | unlogged_relation_entry ent; |
229 | |
230 | /* Skip anything that doesn't look like a relation data file. */ |
231 | if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, |
232 | &forkNum)) |
233 | continue; |
234 | |
235 | /* We never remove the init fork. */ |
236 | if (forkNum == INIT_FORKNUM) |
237 | continue; |
238 | |
239 | /* |
240 | * See whether the OID portion of the name shows up in the hash |
241 | * table. |
242 | */ |
243 | memset(ent.oid, 0, sizeof(ent.oid)); |
244 | memcpy(ent.oid, de->d_name, oidchars); |
245 | hash_search(hash, &ent, HASH_FIND, &found); |
246 | |
247 | /* If so, nuke it! */ |
248 | if (found) |
249 | { |
250 | snprintf(rm_path, sizeof(rm_path), "%s/%s" , |
251 | dbspacedirname, de->d_name); |
252 | if (unlink(rm_path) < 0) |
253 | ereport(ERROR, |
254 | (errcode_for_file_access(), |
255 | errmsg("could not remove file \"%s\": %m" , |
256 | rm_path))); |
257 | else |
258 | elog(DEBUG2, "unlinked file \"%s\"" , rm_path); |
259 | } |
260 | } |
261 | |
262 | /* Cleanup is complete. */ |
263 | FreeDir(dbspace_dir); |
264 | hash_destroy(hash); |
265 | } |
266 | |
267 | /* |
268 | * Initialization happens after cleanup is complete: we copy each init |
269 | * fork file to the corresponding main fork file. Note that if we are |
270 | * asked to do both cleanup and init, we may never get here: if the |
271 | * cleanup code determines that there are no init forks in this dbspace, |
272 | * it will return before we get to this point. |
273 | */ |
274 | if ((op & UNLOGGED_RELATION_INIT) != 0) |
275 | { |
276 | /* Scan the directory. */ |
277 | dbspace_dir = AllocateDir(dbspacedirname); |
278 | while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) |
279 | { |
280 | ForkNumber forkNum; |
281 | int oidchars; |
282 | char oidbuf[OIDCHARS + 1]; |
283 | char srcpath[MAXPGPATH * 2]; |
284 | char dstpath[MAXPGPATH]; |
285 | |
286 | /* Skip anything that doesn't look like a relation data file. */ |
287 | if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, |
288 | &forkNum)) |
289 | continue; |
290 | |
291 | /* Also skip it unless this is the init fork. */ |
292 | if (forkNum != INIT_FORKNUM) |
293 | continue; |
294 | |
295 | /* Construct source pathname. */ |
296 | snprintf(srcpath, sizeof(srcpath), "%s/%s" , |
297 | dbspacedirname, de->d_name); |
298 | |
299 | /* Construct destination pathname. */ |
300 | memcpy(oidbuf, de->d_name, oidchars); |
301 | oidbuf[oidchars] = '\0'; |
302 | snprintf(dstpath, sizeof(dstpath), "%s/%s%s" , |
303 | dbspacedirname, oidbuf, de->d_name + oidchars + 1 + |
304 | strlen(forkNames[INIT_FORKNUM])); |
305 | |
306 | /* OK, we're ready to perform the actual copy. */ |
307 | elog(DEBUG2, "copying %s to %s" , srcpath, dstpath); |
308 | copy_file(srcpath, dstpath); |
309 | } |
310 | |
311 | FreeDir(dbspace_dir); |
312 | |
313 | /* |
314 | * copy_file() above has already called pg_flush_data() on the files |
315 | * it created. Now we need to fsync those files, because a checkpoint |
316 | * won't do it for us while we're in recovery. We do this in a |
317 | * separate pass to allow the kernel to perform all the flushes |
318 | * (especially the metadata ones) at once. |
319 | */ |
320 | dbspace_dir = AllocateDir(dbspacedirname); |
321 | while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) |
322 | { |
323 | ForkNumber forkNum; |
324 | int oidchars; |
325 | char oidbuf[OIDCHARS + 1]; |
326 | char mainpath[MAXPGPATH]; |
327 | |
328 | /* Skip anything that doesn't look like a relation data file. */ |
329 | if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, |
330 | &forkNum)) |
331 | continue; |
332 | |
333 | /* Also skip it unless this is the init fork. */ |
334 | if (forkNum != INIT_FORKNUM) |
335 | continue; |
336 | |
337 | /* Construct main fork pathname. */ |
338 | memcpy(oidbuf, de->d_name, oidchars); |
339 | oidbuf[oidchars] = '\0'; |
340 | snprintf(mainpath, sizeof(mainpath), "%s/%s%s" , |
341 | dbspacedirname, oidbuf, de->d_name + oidchars + 1 + |
342 | strlen(forkNames[INIT_FORKNUM])); |
343 | |
344 | fsync_fname(mainpath, false); |
345 | } |
346 | |
347 | FreeDir(dbspace_dir); |
348 | |
349 | /* |
350 | * Lastly, fsync the database directory itself, ensuring the |
351 | * filesystem remembers the file creations and deletions we've done. |
352 | * We don't bother with this during a call that does only |
353 | * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we |
354 | * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step |
355 | * too at the next startup attempt. |
356 | */ |
357 | fsync_fname(dbspacedirname, true); |
358 | } |
359 | } |
360 | |
361 | /* |
362 | * Basic parsing of putative relation filenames. |
363 | * |
364 | * This function returns true if the file appears to be in the correct format |
365 | * for a non-temporary relation and false otherwise. |
366 | * |
367 | * NB: If this function returns true, the caller is entitled to assume that |
368 | * *oidchars has been set to the a value no more than OIDCHARS, and thus |
369 | * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID |
370 | * portion of the filename. This is critical to protect against a possible |
371 | * buffer overrun. |
372 | */ |
373 | bool |
374 | parse_filename_for_nontemp_relation(const char *name, int *oidchars, |
375 | ForkNumber *fork) |
376 | { |
377 | int pos; |
378 | |
379 | /* Look for a non-empty string of digits (that isn't too long). */ |
380 | for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) |
381 | ; |
382 | if (pos == 0 || pos > OIDCHARS) |
383 | return false; |
384 | *oidchars = pos; |
385 | |
386 | /* Check for a fork name. */ |
387 | if (name[pos] != '_') |
388 | *fork = MAIN_FORKNUM; |
389 | else |
390 | { |
391 | int forkchar; |
392 | |
393 | forkchar = forkname_chars(&name[pos + 1], fork); |
394 | if (forkchar <= 0) |
395 | return false; |
396 | pos += forkchar + 1; |
397 | } |
398 | |
399 | /* Check for a segment number. */ |
400 | if (name[pos] == '.') |
401 | { |
402 | int segchar; |
403 | |
404 | for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) |
405 | ; |
406 | if (segchar <= 1) |
407 | return false; |
408 | pos += segchar; |
409 | } |
410 | |
411 | /* Now we should be at the end. */ |
412 | if (name[pos] != '\0') |
413 | return false; |
414 | return true; |
415 | } |
416 | |