1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2014, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file fil/fil0fil.cc |
22 | The tablespace memory cache |
23 | |
24 | Created 10/25/1995 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #include "fil0fil.h" |
28 | #include "fil0crypt.h" |
29 | |
30 | #include "btr0btr.h" |
31 | #include "buf0buf.h" |
32 | #include "dict0boot.h" |
33 | #include "dict0dict.h" |
34 | #include "fsp0file.h" |
35 | #include "fsp0file.h" |
36 | #include "fsp0fsp.h" |
37 | #include "fsp0space.h" |
38 | #include "fsp0sysspace.h" |
39 | #include "hash0hash.h" |
40 | #include "log0log.h" |
41 | #include "log0recv.h" |
42 | #include "mach0data.h" |
43 | #include "mem0mem.h" |
44 | #include "mtr0log.h" |
45 | #include "os0file.h" |
46 | #include "page0zip.h" |
47 | #include "row0mysql.h" |
48 | #include "row0trunc.h" |
49 | #include "srv0start.h" |
50 | #include "trx0purge.h" |
51 | #include "ut0new.h" |
52 | #include "buf0lru.h" |
53 | #include "ibuf0ibuf.h" |
54 | #include "os0event.h" |
55 | #include "sync0sync.h" |
56 | #include "buf0flu.h" |
57 | #include "os0api.h" |
58 | |
59 | /** Tries to close a file in the LRU list. The caller must hold the fil_sys |
60 | mutex. |
61 | @return true if success, false if should retry later; since i/o's |
62 | generally complete in < 100 ms, and as InnoDB writes at most 128 pages |
63 | from the buffer pool in a batch, and then immediately flushes the |
64 | files, there is a good chance that the next time we find a suitable |
65 | node from the LRU list. |
66 | @param[in] print_info if true, prints information why it |
67 | cannot close a file */ |
68 | static |
69 | bool |
70 | fil_try_to_close_file_in_LRU(bool print_info); |
71 | |
72 | /** Test if a tablespace file can be renamed to a new filepath by checking |
73 | if that the old filepath exists and the new filepath does not exist. |
74 | @param[in] old_path old filepath |
75 | @param[in] new_path new filepath |
76 | @param[in] is_discarded whether the tablespace is discarded |
77 | @return innodb error code */ |
78 | static dberr_t |
79 | fil_rename_tablespace_check( |
80 | const char* old_path, |
81 | const char* new_path, |
82 | bool is_discarded); |
83 | /** Rename a single-table tablespace. |
84 | The tablespace must exist in the memory cache. |
85 | @param[in] id tablespace identifier |
86 | @param[in] old_path old file name |
87 | @param[in] new_name new table name in the |
88 | databasename/tablename format |
89 | @param[in] new_path_in new file name, |
90 | or NULL if it is located in the normal data directory |
91 | @return true if success */ |
92 | static bool |
93 | fil_rename_tablespace( |
94 | ulint id, |
95 | const char* old_path, |
96 | const char* new_name, |
97 | const char* new_path_in); |
98 | |
99 | /* |
100 | IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE |
101 | ============================================= |
102 | |
103 | The tablespace cache is responsible for providing fast read/write access to |
104 | tablespaces and logs of the database. File creation and deletion is done |
105 | in other modules which know more of the logic of the operation, however. |
106 | |
107 | A tablespace consists of a chain of files. The size of the files does not |
108 | have to be divisible by the database block size, because we may just leave |
109 | the last incomplete block unused. When a new file is appended to the |
110 | tablespace, the maximum size of the file is also specified. At the moment, |
111 | we think that it is best to extend the file to its maximum size already at |
112 | the creation of the file, because then we can avoid dynamically extending |
113 | the file when more space is needed for the tablespace. |
114 | |
115 | A block's position in the tablespace is specified with a 32-bit unsigned |
116 | integer. The files in the chain are thought to be catenated, and the block |
117 | corresponding to an address n is the nth block in the catenated file (where |
118 | the first block is named the 0th block, and the incomplete block fragments |
119 | at the end of files are not taken into account). A tablespace can be extended |
120 | by appending a new file at the end of the chain. |
121 | |
122 | Our tablespace concept is similar to the one of Oracle. |
123 | |
124 | To acquire more speed in disk transfers, a technique called disk striping is |
125 | sometimes used. This means that logical block addresses are divided in a |
126 | round-robin fashion across several disks. Windows NT supports disk striping, |
127 | so there we do not need to support it in the database. Disk striping is |
128 | implemented in hardware in RAID disks. We conclude that it is not necessary |
129 | to implement it in the database. Oracle 7 does not support disk striping, |
130 | either. |
131 | |
132 | Another trick used at some database sites is replacing tablespace files by |
133 | raw disks, that is, the whole physical disk drive, or a partition of it, is |
134 | opened as a single file, and it is accessed through byte offsets calculated |
135 | from the start of the disk or the partition. This is recommended in some |
136 | books on database tuning to achieve more speed in i/o. Using raw disk |
137 | certainly prevents the OS from fragmenting disk space, but it is not clear |
138 | if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file |
139 | system + EIDE Conner disk only a negligible difference in speed when reading |
140 | from a file, versus reading from a raw disk. |
141 | |
142 | To have fast access to a tablespace or a log file, we put the data structures |
143 | to a hash table. Each tablespace and log file is given an unique 32-bit |
144 | identifier. |
145 | |
146 | Some operating systems do not support many open files at the same time, |
147 | though NT seems to tolerate at least 900 open files. Therefore, we put the |
148 | open files in an LRU-list. If we need to open another file, we may close the |
149 | file at the end of the LRU-list. When an i/o-operation is pending on a file, |
150 | the file cannot be closed. We take the file nodes with pending i/o-operations |
151 | out of the LRU-list and keep a count of pending operations. When an operation |
152 | completes, we decrement the count and return the file node to the LRU-list if |
153 | the count drops to zero. */ |
154 | |
155 | /** Reference to the server data directory. Usually it is the |
156 | current working directory ".", but in the MySQL Embedded Server Library |
157 | it is an absolute path. */ |
158 | const char* fil_path_to_mysql_datadir; |
159 | |
160 | /** Common InnoDB file extentions */ |
161 | const char* dot_ext[] = { "" , ".ibd" , ".isl" , ".cfg" }; |
162 | |
163 | /** The number of fsyncs done to the log */ |
164 | ulint fil_n_log_flushes = 0; |
165 | |
166 | /** Number of pending redo log flushes */ |
167 | ulint fil_n_pending_log_flushes = 0; |
168 | /** Number of pending tablespace flushes */ |
169 | ulint fil_n_pending_tablespace_flushes = 0; |
170 | |
171 | /** Number of files currently open */ |
172 | ulint fil_n_file_opened = 0; |
173 | |
174 | /** The null file address */ |
175 | const fil_addr_t fil_addr_null = {FIL_NULL, 0}; |
176 | |
177 | /** The tablespace memory cache. This variable is NULL before the module is |
178 | initialized. */ |
179 | fil_system_t fil_system; |
180 | |
181 | /** At this age or older a space/page will be rotated */ |
182 | UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; |
183 | UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; |
184 | |
185 | /** Determine if user has explicitly disabled fsync(). */ |
186 | # define fil_buffering_disabled(s) \ |
187 | ((s)->purpose == FIL_TYPE_TABLESPACE \ |
188 | && srv_file_flush_method \ |
189 | == SRV_O_DIRECT_NO_FSYNC) |
190 | |
191 | /** Determine if the space id is a user tablespace id or not. |
192 | @param[in] space_id Space ID to check |
193 | @return true if it is a user tablespace ID */ |
194 | inline |
195 | bool |
196 | fil_is_user_tablespace_id(ulint space_id) |
197 | { |
198 | return(space_id != TRX_SYS_SPACE |
199 | && space_id != SRV_TMP_SPACE_ID |
200 | && !srv_is_undo_tablespace(space_id)); |
201 | } |
202 | |
203 | #ifdef UNIV_DEBUG |
204 | /** Try fil_validate() every this many times */ |
205 | # define FIL_VALIDATE_SKIP 17 |
206 | |
207 | /******************************************************************//** |
208 | Checks the consistency of the tablespace cache some of the time. |
209 | @return true if ok or the check was skipped */ |
210 | static |
211 | bool |
212 | fil_validate_skip(void) |
213 | /*===================*/ |
214 | { |
215 | /** The fil_validate() call skip counter. */ |
216 | static int fil_validate_count = FIL_VALIDATE_SKIP; |
217 | |
218 | /* We want to reduce the call frequency of the costly fil_validate() |
219 | check in debug builds. */ |
220 | int count = my_atomic_add32_explicit(&fil_validate_count, -1, |
221 | MY_MEMORY_ORDER_RELAXED); |
222 | if (count > 0) { |
223 | return(true); |
224 | } |
225 | |
226 | my_atomic_store32_explicit(&fil_validate_count, FIL_VALIDATE_SKIP, |
227 | MY_MEMORY_ORDER_RELAXED); |
228 | return(fil_validate()); |
229 | } |
230 | #endif /* UNIV_DEBUG */ |
231 | |
232 | /********************************************************************//** |
233 | Determines if a file node belongs to the least-recently-used list. |
234 | @return true if the file belongs to fil_system.LRU mutex. */ |
235 | UNIV_INLINE |
236 | bool |
237 | fil_space_belongs_in_lru( |
238 | /*=====================*/ |
239 | const fil_space_t* space) /*!< in: file space */ |
240 | { |
241 | switch (space->purpose) { |
242 | case FIL_TYPE_TEMPORARY: |
243 | case FIL_TYPE_LOG: |
244 | return(false); |
245 | case FIL_TYPE_TABLESPACE: |
246 | return(fil_is_user_tablespace_id(space->id)); |
247 | case FIL_TYPE_IMPORT: |
248 | return(true); |
249 | } |
250 | |
251 | ut_ad(0); |
252 | return(false); |
253 | } |
254 | |
255 | /********************************************************************//** |
256 | NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! |
257 | |
258 | Prepares a file node for i/o. Opens the file if it is closed. Updates the |
259 | pending i/o's field in the node and the system appropriately. Takes the node |
260 | off the LRU list if it is in the LRU list. The caller must hold the fil_sys |
261 | mutex. |
262 | @return false if the file can't be opened, otherwise true */ |
263 | static |
264 | bool |
265 | fil_node_prepare_for_io( |
266 | /*====================*/ |
267 | fil_node_t* node, /*!< in: file node */ |
268 | fil_space_t* space); /*!< in: space */ |
269 | |
270 | /** Update the data structures when an i/o operation finishes. |
271 | @param[in,out] node file node |
272 | @param[in] type IO context */ |
273 | static |
274 | void |
275 | fil_node_complete_io(fil_node_t* node, const IORequest& type); |
276 | |
277 | /** Reads data from a space to a buffer. Remember that the possible incomplete |
278 | blocks at the end of file are ignored: they are not taken into account when |
279 | calculating the byte offset within a space. |
280 | @param[in] page_id page id |
281 | @param[in] page_size page size |
282 | @param[in] byte_offset remainder of offset in bytes; in aio this |
283 | must be divisible by the OS block size |
284 | @param[in] len how many bytes to read; this must not cross a |
285 | file boundary; in aio this must be a block size multiple |
286 | @param[in,out] buf buffer where to store data read; in aio this |
287 | must be appropriately aligned |
288 | @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do |
289 | i/o on a tablespace which does not exist */ |
290 | UNIV_INLINE |
291 | dberr_t |
292 | fil_read( |
293 | const page_id_t& page_id, |
294 | const page_size_t& page_size, |
295 | ulint byte_offset, |
296 | ulint len, |
297 | void* buf) |
298 | { |
299 | return(fil_io(IORequestRead, true, page_id, page_size, |
300 | byte_offset, len, buf, NULL)); |
301 | } |
302 | |
303 | /** Writes data to a space from a buffer. Remember that the possible incomplete |
304 | blocks at the end of file are ignored: they are not taken into account when |
305 | calculating the byte offset within a space. |
306 | @param[in] page_id page id |
307 | @param[in] page_size page size |
308 | @param[in] byte_offset remainder of offset in bytes; in aio this |
309 | must be divisible by the OS block size |
310 | @param[in] len how many bytes to write; this must not cross |
311 | a file boundary; in aio this must be a block size multiple |
312 | @param[in] buf buffer from which to write; in aio this must |
313 | be appropriately aligned |
314 | @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do |
315 | i/o on a tablespace which does not exist */ |
316 | UNIV_INLINE |
317 | dberr_t |
318 | fil_write( |
319 | const page_id_t& page_id, |
320 | const page_size_t& page_size, |
321 | ulint byte_offset, |
322 | ulint len, |
323 | void* buf) |
324 | { |
325 | ut_ad(!srv_read_only_mode); |
326 | |
327 | return(fil_io(IORequestWrite, true, page_id, page_size, |
328 | byte_offset, len, buf, NULL)); |
329 | } |
330 | |
331 | /*******************************************************************//** |
332 | Returns the table space by a given id, NULL if not found. |
333 | It is unsafe to dereference the returned pointer. It is fine to check |
334 | for NULL. */ |
335 | fil_space_t* |
336 | fil_space_get_by_id( |
337 | /*================*/ |
338 | ulint id) /*!< in: space id */ |
339 | { |
340 | fil_space_t* space; |
341 | |
342 | ut_ad(mutex_own(&fil_system.mutex)); |
343 | |
344 | HASH_SEARCH(hash, fil_system.spaces, id, |
345 | fil_space_t*, space, |
346 | ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), |
347 | space->id == id); |
348 | |
349 | return(space); |
350 | } |
351 | |
352 | /** Look up a tablespace. |
353 | The caller should hold an InnoDB table lock or a MDL that prevents |
354 | the tablespace from being dropped during the operation, |
355 | or the caller should be in single-threaded crash recovery mode |
356 | (no user connections that could drop tablespaces). |
357 | If this is not the case, fil_space_acquire() and fil_space_t::release() |
358 | should be used instead. |
359 | @param[in] id tablespace ID |
360 | @return tablespace, or NULL if not found */ |
361 | fil_space_t* |
362 | fil_space_get( |
363 | ulint id) |
364 | { |
365 | mutex_enter(&fil_system.mutex); |
366 | fil_space_t* space = fil_space_get_by_id(id); |
367 | mutex_exit(&fil_system.mutex); |
368 | ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG); |
369 | return(space); |
370 | } |
371 | |
372 | /** Returns the latch of a file space. |
373 | @param[in] id space id |
374 | @param[out] flags tablespace flags |
375 | @return latch protecting storage allocation */ |
376 | rw_lock_t* |
377 | fil_space_get_latch( |
378 | ulint id, |
379 | ulint* flags) |
380 | { |
381 | fil_space_t* space; |
382 | |
383 | ut_ad(fil_system.is_initialised()); |
384 | |
385 | mutex_enter(&fil_system.mutex); |
386 | |
387 | space = fil_space_get_by_id(id); |
388 | |
389 | ut_a(space); |
390 | |
391 | if (flags) { |
392 | *flags = space->flags; |
393 | } |
394 | |
395 | mutex_exit(&fil_system.mutex); |
396 | |
397 | return(&(space->latch)); |
398 | } |
399 | |
400 | /** Note that the tablespace has been imported. |
401 | Initially, purpose=FIL_TYPE_IMPORT so that no redo log is |
402 | written while the space ID is being updated in each page. */ |
403 | void fil_space_t::set_imported() |
404 | { |
405 | ut_ad(purpose == FIL_TYPE_IMPORT); |
406 | const fil_node_t* node = UT_LIST_GET_FIRST(chain); |
407 | atomic_write_supported = node->atomic_write |
408 | && srv_use_atomic_writes |
409 | && my_test_if_atomic_write(node->handle, |
410 | int(page_size_t(flags).physical())); |
411 | purpose = FIL_TYPE_TABLESPACE; |
412 | } |
413 | |
414 | /**********************************************************************//** |
415 | Checks if all the file nodes in a space are flushed. |
416 | @return true if all are flushed */ |
417 | static |
418 | bool |
419 | fil_space_is_flushed( |
420 | /*=================*/ |
421 | fil_space_t* space) /*!< in: space */ |
422 | { |
423 | ut_ad(mutex_own(&fil_system.mutex)); |
424 | |
425 | for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
426 | node != NULL; |
427 | node = UT_LIST_GET_NEXT(chain, node)) { |
428 | |
429 | if (node->modification_counter > node->flush_counter) { |
430 | |
431 | ut_ad(!fil_buffering_disabled(space)); |
432 | return(false); |
433 | } |
434 | } |
435 | |
436 | return(true); |
437 | } |
438 | |
439 | |
440 | /** Append a file to the chain of files of a space. |
441 | @param[in] name file name of a file that is not open |
442 | @param[in] size file size in entire database blocks |
443 | @param[in,out] space tablespace from fil_space_create() |
444 | @param[in] is_raw whether this is a raw device or partition |
445 | @param[in] atomic_write true if the file could use atomic write |
446 | @param[in] max_pages maximum number of pages in file, |
447 | ULINT_MAX means the file size is unlimited. |
448 | @return pointer to the file name |
449 | @retval NULL if error */ |
450 | static |
451 | fil_node_t* |
452 | fil_node_create_low( |
453 | const char* name, |
454 | ulint size, |
455 | fil_space_t* space, |
456 | bool is_raw, |
457 | bool atomic_write, |
458 | ulint max_pages = ULINT_MAX) |
459 | { |
460 | fil_node_t* node; |
461 | |
462 | ut_ad(name != NULL); |
463 | ut_ad(fil_system.is_initialised()); |
464 | |
465 | if (space == NULL) { |
466 | return(NULL); |
467 | } |
468 | |
469 | node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node))); |
470 | |
471 | node->handle = OS_FILE_CLOSED; |
472 | |
473 | node->name = mem_strdup(name); |
474 | |
475 | ut_a(!is_raw || srv_start_raw_disk_in_use); |
476 | |
477 | node->sync_event = os_event_create("fsync_event" ); |
478 | |
479 | node->is_raw_disk = is_raw; |
480 | |
481 | node->size = size; |
482 | |
483 | node->magic_n = FIL_NODE_MAGIC_N; |
484 | |
485 | node->init_size = size; |
486 | node->max_size = max_pages; |
487 | |
488 | mutex_enter(&fil_system.mutex); |
489 | |
490 | space->size += size; |
491 | |
492 | node->space = space; |
493 | |
494 | node->atomic_write = atomic_write; |
495 | |
496 | UT_LIST_ADD_LAST(space->chain, node); |
497 | mutex_exit(&fil_system.mutex); |
498 | |
499 | return(node); |
500 | } |
501 | |
502 | /** Appends a new file to the chain of files of a space. File must be closed. |
503 | @param[in] name file name (file must be closed) |
504 | @param[in] size file size in database blocks, rounded downwards to |
505 | an integer |
506 | @param[in,out] space space where to append |
507 | @param[in] is_raw true if a raw device or a raw disk partition |
508 | @param[in] atomic_write true if the file could use atomic write |
509 | @param[in] max_pages maximum number of pages in file, |
510 | ULINT_MAX means the file size is unlimited. |
511 | @return pointer to the file name |
512 | @retval NULL if error */ |
513 | char* |
514 | fil_node_create( |
515 | const char* name, |
516 | ulint size, |
517 | fil_space_t* space, |
518 | bool is_raw, |
519 | bool atomic_write, |
520 | ulint max_pages) |
521 | { |
522 | fil_node_t* node; |
523 | |
524 | node = fil_node_create_low( |
525 | name, size, space, is_raw, atomic_write, max_pages); |
526 | |
527 | return(node == NULL ? NULL : node->name); |
528 | } |
529 | |
530 | /** Open a file node of a tablespace. |
531 | @param[in,out] node File node |
532 | @return false if the file can't be opened, otherwise true */ |
533 | static |
534 | bool |
535 | fil_node_open_file( |
536 | fil_node_t* node) |
537 | { |
538 | bool success; |
539 | bool read_only_mode; |
540 | fil_space_t* space = node->space; |
541 | |
542 | ut_ad(mutex_own(&fil_system.mutex)); |
543 | ut_a(node->n_pending == 0); |
544 | ut_a(!node->is_open()); |
545 | |
546 | read_only_mode = space->purpose != FIL_TYPE_TEMPORARY |
547 | && srv_read_only_mode; |
548 | |
549 | const bool first_time_open = node->size == 0; |
550 | |
551 | if (first_time_open |
552 | || (space->purpose == FIL_TYPE_TABLESPACE |
553 | && node == UT_LIST_GET_FIRST(space->chain) |
554 | && srv_startup_is_before_trx_rollback_phase |
555 | && !undo::Truncate::was_tablespace_truncated(space->id))) { |
556 | /* We do not know the size of the file yet. First we |
557 | open the file in the normal mode, no async I/O here, |
558 | for simplicity. Then do some checks, and close the |
559 | file again. NOTE that we could not use the simple |
560 | file read function os_file_read() in Windows to read |
561 | from a file opened for async I/O! */ |
562 | |
563 | retry: |
564 | node->handle = os_file_create_simple_no_error_handling( |
565 | innodb_data_file_key, node->name, OS_FILE_OPEN, |
566 | OS_FILE_READ_ONLY, read_only_mode, &success); |
567 | |
568 | if (!success) { |
569 | /* The following call prints an error message */ |
570 | ulint err = os_file_get_last_error(true); |
571 | if (err == EMFILE + 100) { |
572 | if (fil_try_to_close_file_in_LRU(true)) |
573 | goto retry; |
574 | } |
575 | |
576 | ib::warn() << "Cannot open '" << node->name << "'." |
577 | " Have you deleted .ibd files under a" |
578 | " running mysqld server?" ; |
579 | return(false); |
580 | } |
581 | |
582 | os_offset_t size_bytes = os_file_get_size(node->handle); |
583 | ut_a(size_bytes != (os_offset_t) -1); |
584 | |
585 | ut_a(space->purpose != FIL_TYPE_LOG); |
586 | const page_size_t page_size(space->flags); |
587 | const ulint psize = page_size.physical(); |
588 | const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE |
589 | * psize; |
590 | |
591 | if (size_bytes < min_size) { |
592 | ib::error() << "The size of the file " << node->name |
593 | << " is only " << size_bytes |
594 | << " bytes, should be at least " << min_size; |
595 | os_file_close(node->handle); |
596 | node->handle = OS_FILE_CLOSED; |
597 | return(false); |
598 | } |
599 | |
600 | /* Read the first page of the tablespace */ |
601 | |
602 | byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize)); |
603 | |
604 | /* Align the memory for file i/o if we might have O_DIRECT |
605 | set */ |
606 | byte* page = static_cast<byte*>(ut_align(buf2, psize)); |
607 | |
608 | IORequest request(IORequest::READ); |
609 | |
610 | success = os_file_read( |
611 | request, |
612 | node->handle, page, 0, psize); |
613 | srv_stats.page0_read.add(1); |
614 | |
615 | const ulint space_id |
616 | = fsp_header_get_space_id(page); |
617 | ulint flags = fsp_header_get_flags(page); |
618 | const ulint size = fsp_header_get_field( |
619 | page, FSP_SIZE); |
620 | const ulint free_limit = fsp_header_get_field( |
621 | page, FSP_FREE_LIMIT); |
622 | const ulint free_len = flst_get_len( |
623 | FSP_HEADER_OFFSET + FSP_FREE + page); |
624 | |
625 | /* Try to read crypt_data from page 0 if it is not yet |
626 | read. */ |
627 | if (!space->crypt_data) { |
628 | space->crypt_data = fil_space_read_crypt_data( |
629 | page_size_t(space->flags), page); |
630 | } |
631 | |
632 | ut_free(buf2); |
633 | os_file_close(node->handle); |
634 | node->handle = OS_FILE_CLOSED; |
635 | |
636 | if (!fsp_flags_is_valid(flags, space->id)) { |
637 | ulint cflags = fsp_flags_convert_from_101(flags); |
638 | if (cflags == ULINT_UNDEFINED |
639 | || (cflags ^ space->flags) & ~FSP_FLAGS_MEM_MASK) { |
640 | ib::error() |
641 | << "Expected tablespace flags " |
642 | << ib::hex(space->flags) |
643 | << " but found " << ib::hex(flags) |
644 | << " in the file " << node->name; |
645 | return(false); |
646 | } |
647 | |
648 | flags = cflags; |
649 | } |
650 | |
651 | if (UNIV_UNLIKELY(space_id != space->id)) { |
652 | ib::error() |
653 | << "Expected tablespace id " << space->id |
654 | << " but found " << space_id |
655 | << " in the file" << node->name; |
656 | return(false); |
657 | } |
658 | |
659 | ut_ad(space->free_limit == 0 |
660 | || space->free_limit == free_limit); |
661 | ut_ad(space->free_len == 0 |
662 | || space->free_len == free_len); |
663 | space->size_in_header = size; |
664 | space->free_limit = free_limit; |
665 | space->free_len = free_len; |
666 | |
667 | if (first_time_open) { |
668 | /* Truncate the size to a multiple of extent size. */ |
669 | ulint mask = psize * FSP_EXTENT_SIZE - 1; |
670 | |
671 | if (size_bytes <= mask) { |
672 | /* .ibd files start smaller than an |
673 | extent size. Do not truncate valid data. */ |
674 | } else { |
675 | size_bytes &= ~os_offset_t(mask); |
676 | } |
677 | |
678 | node->size = ulint(size_bytes / psize); |
679 | space->size += node->size; |
680 | } |
681 | } |
682 | |
683 | /* printf("Opening file %s\n", node->name); */ |
684 | |
685 | /* Open the file for reading and writing, in Windows normally in the |
686 | unbuffered async I/O mode, though global variables may make |
687 | os_file_create() to fall back to the normal file I/O mode. */ |
688 | |
689 | if (space->purpose == FIL_TYPE_LOG) { |
690 | node->handle = os_file_create( |
691 | innodb_log_file_key, node->name, OS_FILE_OPEN, |
692 | OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success); |
693 | } else if (node->is_raw_disk) { |
694 | node->handle = os_file_create( |
695 | innodb_data_file_key, node->name, OS_FILE_OPEN_RAW, |
696 | OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success); |
697 | } else { |
698 | node->handle = os_file_create( |
699 | innodb_data_file_key, node->name, OS_FILE_OPEN, |
700 | OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success); |
701 | |
702 | if (first_time_open) { |
703 | /* |
704 | For the temporary tablespace and during the |
705 | non-redo-logged adjustments in |
706 | IMPORT TABLESPACE, we do not care about |
707 | the atomicity of writes. |
708 | |
709 | Atomic writes is supported if the file can be used |
710 | with atomic_writes (not log file), O_DIRECT is |
711 | used (tested in ha_innodb.cc) and the file is |
712 | device and file system that supports atomic writes |
713 | for the given block size |
714 | */ |
715 | space->atomic_write_supported |
716 | = space->purpose == FIL_TYPE_TEMPORARY |
717 | || space->purpose == FIL_TYPE_IMPORT |
718 | || (node->atomic_write |
719 | && srv_use_atomic_writes |
720 | && my_test_if_atomic_write( |
721 | node->handle, |
722 | int(page_size_t(space->flags) |
723 | .physical()))); |
724 | } |
725 | } |
726 | |
727 | ut_a(success); |
728 | ut_a(node->is_open()); |
729 | |
730 | fil_system.n_open++; |
731 | fil_n_file_opened++; |
732 | |
733 | if (fil_space_belongs_in_lru(space)) { |
734 | |
735 | /* Put the node to the LRU list */ |
736 | UT_LIST_ADD_FIRST(fil_system.LRU, node); |
737 | } |
738 | |
739 | return(true); |
740 | } |
741 | |
742 | /** Close a file node. |
743 | @param[in,out] node File node */ |
744 | static |
745 | void |
746 | fil_node_close_file( |
747 | fil_node_t* node) |
748 | { |
749 | bool ret; |
750 | |
751 | ut_ad(mutex_own(&(fil_system.mutex))); |
752 | ut_a(node->is_open()); |
753 | ut_a(node->n_pending == 0); |
754 | ut_a(node->n_pending_flushes == 0); |
755 | ut_a(!node->being_extended); |
756 | ut_a(node->modification_counter == node->flush_counter |
757 | || node->space->purpose == FIL_TYPE_TEMPORARY |
758 | || srv_fast_shutdown == 2 |
759 | || !srv_was_started); |
760 | |
761 | ret = os_file_close(node->handle); |
762 | ut_a(ret); |
763 | |
764 | /* printf("Closing file %s\n", node->name); */ |
765 | |
766 | node->handle = OS_FILE_CLOSED; |
767 | ut_ad(!node->is_open()); |
768 | ut_a(fil_system.n_open > 0); |
769 | fil_system.n_open--; |
770 | fil_n_file_opened--; |
771 | |
772 | if (fil_space_belongs_in_lru(node->space)) { |
773 | |
774 | ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0); |
775 | |
776 | /* The node is in the LRU list, remove it */ |
777 | UT_LIST_REMOVE(fil_system.LRU, node); |
778 | } |
779 | } |
780 | |
781 | /** Tries to close a file in the LRU list. The caller must hold the fil_sys |
782 | mutex. |
783 | @return true if success, false if should retry later; since i/o's |
784 | generally complete in < 100 ms, and as InnoDB writes at most 128 pages |
785 | from the buffer pool in a batch, and then immediately flushes the |
786 | files, there is a good chance that the next time we find a suitable |
787 | node from the LRU list. |
788 | @param[in] print_info if true, prints information why it |
789 | cannot close a file*/ |
790 | static |
791 | bool |
792 | fil_try_to_close_file_in_LRU( |
793 | |
794 | bool print_info) |
795 | { |
796 | fil_node_t* node; |
797 | |
798 | ut_ad(mutex_own(&fil_system.mutex)); |
799 | |
800 | if (print_info) { |
801 | ib::info() << "fil_sys open file LRU len " |
802 | << UT_LIST_GET_LEN(fil_system.LRU); |
803 | } |
804 | |
805 | for (node = UT_LIST_GET_LAST(fil_system.LRU); |
806 | node != NULL; |
807 | node = UT_LIST_GET_PREV(LRU, node)) { |
808 | |
809 | if (node->modification_counter == node->flush_counter |
810 | && node->n_pending_flushes == 0 |
811 | && !node->being_extended) { |
812 | |
813 | fil_node_close_file(node); |
814 | |
815 | return(true); |
816 | } |
817 | |
818 | if (!print_info) { |
819 | continue; |
820 | } |
821 | |
822 | if (node->n_pending_flushes > 0) { |
823 | |
824 | ib::info() << "Cannot close file " << node->name |
825 | << ", because n_pending_flushes " |
826 | << node->n_pending_flushes; |
827 | } |
828 | |
829 | if (node->modification_counter != node->flush_counter) { |
830 | ib::warn() << "Cannot close file " << node->name |
831 | << ", because modification count " |
832 | << node->modification_counter << |
833 | " != flush count " << node->flush_counter; |
834 | } |
835 | |
836 | if (node->being_extended) { |
837 | ib::info() << "Cannot close file " << node->name |
838 | << ", because it is being extended" ; |
839 | } |
840 | } |
841 | |
842 | return(false); |
843 | } |
844 | |
845 | /** Flush any writes cached by the file system. |
846 | @param[in,out] space tablespace */ |
847 | static |
848 | void |
849 | fil_flush_low(fil_space_t* space) |
850 | { |
851 | ut_ad(mutex_own(&fil_system.mutex)); |
852 | ut_ad(space); |
853 | ut_ad(!space->stop_new_ops); |
854 | |
855 | if (fil_buffering_disabled(space)) { |
856 | |
857 | /* No need to flush. User has explicitly disabled |
858 | buffering. */ |
859 | ut_ad(!space->is_in_unflushed_spaces); |
860 | ut_ad(fil_space_is_flushed(space)); |
861 | ut_ad(space->n_pending_flushes == 0); |
862 | |
863 | #ifdef UNIV_DEBUG |
864 | for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
865 | node != NULL; |
866 | node = UT_LIST_GET_NEXT(chain, node)) { |
867 | ut_ad(node->modification_counter |
868 | == node->flush_counter); |
869 | ut_ad(node->n_pending_flushes == 0); |
870 | } |
871 | #endif /* UNIV_DEBUG */ |
872 | |
873 | return; |
874 | } |
875 | |
876 | /* Prevent dropping of the space while we are flushing */ |
877 | space->n_pending_flushes++; |
878 | |
879 | for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
880 | node != NULL; |
881 | node = UT_LIST_GET_NEXT(chain, node)) { |
882 | |
883 | int64_t old_mod_counter = node->modification_counter; |
884 | |
885 | if (old_mod_counter <= node->flush_counter) { |
886 | continue; |
887 | } |
888 | |
889 | ut_a(node->is_open()); |
890 | |
891 | switch (space->purpose) { |
892 | case FIL_TYPE_TEMPORARY: |
893 | ut_ad(0); // we already checked for this |
894 | case FIL_TYPE_TABLESPACE: |
895 | case FIL_TYPE_IMPORT: |
896 | fil_n_pending_tablespace_flushes++; |
897 | break; |
898 | case FIL_TYPE_LOG: |
899 | fil_n_pending_log_flushes++; |
900 | fil_n_log_flushes++; |
901 | break; |
902 | } |
903 | #ifdef _WIN32 |
904 | if (node->is_raw_disk) { |
905 | |
906 | goto skip_flush; |
907 | } |
908 | #endif /* _WIN32 */ |
909 | retry: |
910 | if (node->n_pending_flushes > 0) { |
911 | /* We want to avoid calling os_file_flush() on |
912 | the file twice at the same time, because we do |
913 | not know what bugs OS's may contain in file |
914 | i/o */ |
915 | |
916 | int64_t sig_count = os_event_reset(node->sync_event); |
917 | |
918 | mutex_exit(&fil_system.mutex); |
919 | |
920 | os_event_wait_low(node->sync_event, sig_count); |
921 | |
922 | mutex_enter(&fil_system.mutex); |
923 | |
924 | if (node->flush_counter >= old_mod_counter) { |
925 | |
926 | goto skip_flush; |
927 | } |
928 | |
929 | goto retry; |
930 | } |
931 | |
932 | ut_a(node->is_open()); |
933 | node->n_pending_flushes++; |
934 | |
935 | mutex_exit(&fil_system.mutex); |
936 | |
937 | os_file_flush(node->handle); |
938 | |
939 | mutex_enter(&fil_system.mutex); |
940 | |
941 | os_event_set(node->sync_event); |
942 | |
943 | node->n_pending_flushes--; |
944 | skip_flush: |
945 | if (node->flush_counter < old_mod_counter) { |
946 | node->flush_counter = old_mod_counter; |
947 | |
948 | if (space->is_in_unflushed_spaces |
949 | && fil_space_is_flushed(space)) { |
950 | |
951 | space->is_in_unflushed_spaces = false; |
952 | |
953 | UT_LIST_REMOVE( |
954 | fil_system.unflushed_spaces, |
955 | space); |
956 | } |
957 | } |
958 | |
959 | switch (space->purpose) { |
960 | case FIL_TYPE_TEMPORARY: |
961 | break; |
962 | case FIL_TYPE_TABLESPACE: |
963 | case FIL_TYPE_IMPORT: |
964 | fil_n_pending_tablespace_flushes--; |
965 | continue; |
966 | case FIL_TYPE_LOG: |
967 | fil_n_pending_log_flushes--; |
968 | continue; |
969 | } |
970 | |
971 | ut_ad(0); |
972 | } |
973 | |
974 | space->n_pending_flushes--; |
975 | } |
976 | |
977 | /** Try to extend a tablespace. |
978 | @param[in,out] space tablespace to be extended |
979 | @param[in,out] node last file of the tablespace |
980 | @param[in] size desired size in number of pages |
981 | @param[out] success whether the operation succeeded |
982 | @return whether the operation should be retried */ |
983 | static ATTRIBUTE_COLD __attribute__((warn_unused_result, nonnull)) |
984 | bool |
985 | fil_space_extend_must_retry( |
986 | fil_space_t* space, |
987 | fil_node_t* node, |
988 | ulint size, |
989 | bool* success) |
990 | { |
991 | ut_ad(mutex_own(&fil_system.mutex)); |
992 | ut_ad(UT_LIST_GET_LAST(space->chain) == node); |
993 | ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); |
994 | |
995 | *success = space->size >= size; |
996 | |
997 | if (*success) { |
998 | /* Space already big enough */ |
999 | return(false); |
1000 | } |
1001 | |
1002 | if (node->being_extended) { |
1003 | /* Another thread is currently extending the file. Wait |
1004 | for it to finish. |
1005 | It'd have been better to use event driven mechanism but |
1006 | the entire module is peppered with polling stuff. */ |
1007 | mutex_exit(&fil_system.mutex); |
1008 | os_thread_sleep(100000); |
1009 | return(true); |
1010 | } |
1011 | |
1012 | node->being_extended = true; |
1013 | |
1014 | if (!fil_node_prepare_for_io(node, space)) { |
1015 | /* The tablespace data file, such as .ibd file, is missing */ |
1016 | node->being_extended = false; |
1017 | return(false); |
1018 | } |
1019 | |
1020 | /* At this point it is safe to release fil_system.mutex. No |
1021 | other thread can rename, delete, close or extend the file because |
1022 | we have set the node->being_extended flag. */ |
1023 | mutex_exit(&fil_system.mutex); |
1024 | |
1025 | ut_ad(size > space->size); |
1026 | |
1027 | ulint last_page_no = space->size; |
1028 | const ulint file_start_page_no = last_page_no - node->size; |
1029 | |
1030 | /* Determine correct file block size */ |
1031 | if (node->block_size == 0) { |
1032 | node->block_size = os_file_get_block_size( |
1033 | node->handle, node->name); |
1034 | } |
1035 | |
1036 | const page_size_t pageSize(space->flags); |
1037 | const ulint page_size = pageSize.physical(); |
1038 | |
1039 | /* fil_read_first_page() expects srv_page_size bytes. |
1040 | fil_node_open_file() expects at least 4 * srv_page_size bytes.*/ |
1041 | os_offset_t new_size = std::max( |
1042 | os_offset_t(size - file_start_page_no) * page_size, |
1043 | os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift)); |
1044 | |
1045 | *success = os_file_set_size(node->name, node->handle, new_size, |
1046 | FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); |
1047 | |
1048 | os_has_said_disk_full = *success; |
1049 | if (*success) { |
1050 | last_page_no = size; |
1051 | } else { |
1052 | /* Let us measure the size of the file |
1053 | to determine how much we were able to |
1054 | extend it */ |
1055 | os_offset_t fsize = os_file_get_size(node->handle); |
1056 | ut_a(fsize != os_offset_t(-1)); |
1057 | |
1058 | last_page_no = ulint(fsize / page_size) |
1059 | + file_start_page_no; |
1060 | } |
1061 | mutex_enter(&fil_system.mutex); |
1062 | |
1063 | ut_a(node->being_extended); |
1064 | node->being_extended = false; |
1065 | ut_a(last_page_no - file_start_page_no >= node->size); |
1066 | |
1067 | ulint file_size = last_page_no - file_start_page_no; |
1068 | space->size += file_size - node->size; |
1069 | node->size = file_size; |
1070 | const ulint pages_in_MiB = node->size |
1071 | & ~ulint((1U << (20U - srv_page_size_shift)) - 1); |
1072 | |
1073 | fil_node_complete_io(node,IORequestRead); |
1074 | |
1075 | /* Keep the last data file size info up to date, rounded to |
1076 | full megabytes */ |
1077 | |
1078 | switch (space->id) { |
1079 | case TRX_SYS_SPACE: |
1080 | srv_sys_space.set_last_file_size(pages_in_MiB); |
1081 | fil_flush_low(space); |
1082 | return(false); |
1083 | default: |
1084 | ut_ad(space->purpose == FIL_TYPE_TABLESPACE |
1085 | || space->purpose == FIL_TYPE_IMPORT); |
1086 | if (space->purpose == FIL_TYPE_TABLESPACE |
1087 | && !space->is_being_truncated) { |
1088 | fil_flush_low(space); |
1089 | } |
1090 | return(false); |
1091 | case SRV_TMP_SPACE_ID: |
1092 | ut_ad(space->purpose == FIL_TYPE_TEMPORARY); |
1093 | srv_tmp_space.set_last_file_size(pages_in_MiB); |
1094 | return(false); |
1095 | } |
1096 | |
1097 | } |
1098 | |
1099 | /*******************************************************************//** |
1100 | Reserves the fil_system.mutex and tries to make sure we can open at least one |
1101 | file while holding it. This should be called before calling |
1102 | fil_node_prepare_for_io(), because that function may need to open a file. */ |
1103 | static |
1104 | void |
1105 | fil_mutex_enter_and_prepare_for_io( |
1106 | /*===============================*/ |
1107 | ulint space_id) /*!< in: space id */ |
1108 | { |
1109 | for (ulint count = 0, count2 = 0;;) { |
1110 | mutex_enter(&fil_system.mutex); |
1111 | |
1112 | if (space_id >= SRV_LOG_SPACE_FIRST_ID) { |
1113 | /* We keep log files always open. */ |
1114 | break; |
1115 | } |
1116 | |
1117 | fil_space_t* space = fil_space_get_by_id(space_id); |
1118 | |
1119 | if (space == NULL) { |
1120 | break; |
1121 | } |
1122 | |
1123 | if (space->stop_ios) { |
1124 | ut_ad(space->id != 0); |
1125 | /* We are going to do a rename file and want to stop |
1126 | new i/o's for a while. */ |
1127 | |
1128 | if (count2 > 20000) { |
1129 | ib::warn() << "Tablespace " << space->name |
1130 | << " has i/o ops stopped for a long" |
1131 | " time " << count2; |
1132 | } |
1133 | |
1134 | mutex_exit(&fil_system.mutex); |
1135 | |
1136 | /* Wake the i/o-handler threads to make sure pending |
1137 | i/o's are performed */ |
1138 | os_aio_simulated_wake_handler_threads(); |
1139 | |
1140 | /* The sleep here is just to give IO helper threads a |
1141 | bit of time to do some work. It is not required that |
1142 | all IO related to the tablespace being renamed must |
1143 | be flushed here as we do fil_flush() in |
1144 | fil_rename_tablespace() as well. */ |
1145 | os_thread_sleep(20000); |
1146 | |
1147 | /* Flush tablespaces so that we can close modified |
1148 | files in the LRU list */ |
1149 | fil_flush_file_spaces(FIL_TYPE_TABLESPACE); |
1150 | |
1151 | os_thread_sleep(20000); |
1152 | |
1153 | count2++; |
1154 | |
1155 | continue; |
1156 | } |
1157 | |
1158 | fil_node_t* node = UT_LIST_GET_LAST(space->chain); |
1159 | ut_ad(space->id == 0 |
1160 | || node == UT_LIST_GET_FIRST(space->chain)); |
1161 | |
1162 | if (space->id == 0) { |
1163 | /* We keep the system tablespace files always |
1164 | open; this is important in preventing |
1165 | deadlocks in this module, as a page read |
1166 | completion often performs another read from |
1167 | the insert buffer. The insert buffer is in |
1168 | tablespace 0, and we cannot end up waiting in |
1169 | this function. */ |
1170 | } else if (!node || node->is_open()) { |
1171 | /* If the file is already open, no need to do |
1172 | anything; if the space does not exist, we handle the |
1173 | situation in the function which called this |
1174 | function */ |
1175 | } else { |
1176 | while (fil_system.n_open >= srv_max_n_open_files) { |
1177 | /* Too many files are open */ |
1178 | if (fil_try_to_close_file_in_LRU(count > 1)) { |
1179 | /* No problem */ |
1180 | } else if (count >= 2) { |
1181 | ib::warn() << "innodb_open_files=" |
1182 | << srv_max_n_open_files |
1183 | << " is exceeded (" |
1184 | << fil_system.n_open |
1185 | << ") files stay open)" ; |
1186 | break; |
1187 | } else { |
1188 | mutex_exit(&fil_system.mutex); |
1189 | os_aio_simulated_wake_handler_threads(); |
1190 | os_thread_sleep(20000); |
1191 | /* Flush tablespaces so that we can |
1192 | close modified files in the LRU list */ |
1193 | fil_flush_file_spaces(FIL_TYPE_TABLESPACE); |
1194 | |
1195 | count++; |
1196 | mutex_enter(&fil_system.mutex); |
1197 | continue; |
1198 | } |
1199 | } |
1200 | } |
1201 | |
1202 | ulint size = space->recv_size; |
1203 | if (UNIV_UNLIKELY(size != 0)) { |
1204 | ut_ad(node); |
1205 | bool success; |
1206 | if (fil_space_extend_must_retry(space, node, size, |
1207 | &success)) { |
1208 | continue; |
1209 | } |
1210 | |
1211 | ut_ad(mutex_own(&fil_system.mutex)); |
1212 | /* Crash recovery requires the file extension |
1213 | to succeed. */ |
1214 | ut_a(success); |
1215 | /* InnoDB data files cannot shrink. */ |
1216 | ut_a(space->size >= size); |
1217 | |
1218 | /* There could be multiple concurrent I/O requests for |
1219 | this tablespace (multiple threads trying to extend |
1220 | this tablespace). |
1221 | |
1222 | Also, fil_space_set_recv_size() may have been invoked |
1223 | again during the file extension while fil_system.mutex |
1224 | was not being held by us. |
1225 | |
1226 | Only if space->recv_size matches what we read |
1227 | originally, reset the field. In this way, a |
1228 | subsequent I/O request will handle any pending |
1229 | fil_space_set_recv_size(). */ |
1230 | |
1231 | if (size == space->recv_size) { |
1232 | space->recv_size = 0; |
1233 | } |
1234 | } |
1235 | |
1236 | break; |
1237 | } |
1238 | } |
1239 | |
1240 | /** Try to extend a tablespace if it is smaller than the specified size. |
1241 | @param[in,out] space tablespace |
1242 | @param[in] size desired size in pages |
1243 | @return whether the tablespace is at least as big as requested */ |
1244 | bool |
1245 | fil_space_extend( |
1246 | fil_space_t* space, |
1247 | ulint size) |
1248 | { |
1249 | ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY); |
1250 | |
1251 | bool success; |
1252 | |
1253 | do { |
1254 | fil_mutex_enter_and_prepare_for_io(space->id); |
1255 | } while (fil_space_extend_must_retry( |
1256 | space, UT_LIST_GET_LAST(space->chain), size, |
1257 | &success)); |
1258 | |
1259 | mutex_exit(&fil_system.mutex); |
1260 | return(success); |
1261 | } |
1262 | |
1263 | /** Prepare to free a file node object from a tablespace memory cache. |
1264 | @param[in,out] node file node |
1265 | @param[in] space tablespace */ |
1266 | static |
1267 | void |
1268 | fil_node_close_to_free( |
1269 | fil_node_t* node, |
1270 | fil_space_t* space) |
1271 | { |
1272 | ut_ad(mutex_own(&fil_system.mutex)); |
1273 | ut_a(node->magic_n == FIL_NODE_MAGIC_N); |
1274 | ut_a(node->n_pending == 0); |
1275 | ut_a(!node->being_extended); |
1276 | |
1277 | if (node->is_open()) { |
1278 | /* We fool the assertion in fil_node_close_file() to think |
1279 | there are no unflushed modifications in the file */ |
1280 | |
1281 | node->modification_counter = node->flush_counter; |
1282 | os_event_set(node->sync_event); |
1283 | |
1284 | if (fil_buffering_disabled(space)) { |
1285 | |
1286 | ut_ad(!space->is_in_unflushed_spaces); |
1287 | ut_ad(fil_space_is_flushed(space)); |
1288 | |
1289 | } else if (space->is_in_unflushed_spaces |
1290 | && fil_space_is_flushed(space)) { |
1291 | |
1292 | space->is_in_unflushed_spaces = false; |
1293 | |
1294 | UT_LIST_REMOVE(fil_system.unflushed_spaces, space); |
1295 | } |
1296 | |
1297 | fil_node_close_file(node); |
1298 | } |
1299 | } |
1300 | |
1301 | /** Detach a space object from the tablespace memory cache. |
1302 | Closes the files in the chain but does not delete them. |
1303 | There must not be any pending i/o's or flushes on the files. |
1304 | @param[in,out] space tablespace */ |
1305 | static |
1306 | void |
1307 | fil_space_detach( |
1308 | fil_space_t* space) |
1309 | { |
1310 | ut_ad(mutex_own(&fil_system.mutex)); |
1311 | |
1312 | HASH_DELETE(fil_space_t, hash, fil_system.spaces, space->id, space); |
1313 | |
1314 | if (space->is_in_unflushed_spaces) { |
1315 | |
1316 | ut_ad(!fil_buffering_disabled(space)); |
1317 | space->is_in_unflushed_spaces = false; |
1318 | |
1319 | UT_LIST_REMOVE(fil_system.unflushed_spaces, space); |
1320 | } |
1321 | |
1322 | if (space->is_in_rotation_list) { |
1323 | space->is_in_rotation_list = false; |
1324 | |
1325 | UT_LIST_REMOVE(fil_system.rotation_list, space); |
1326 | } |
1327 | |
1328 | UT_LIST_REMOVE(fil_system.space_list, space); |
1329 | |
1330 | ut_a(space->magic_n == FIL_SPACE_MAGIC_N); |
1331 | ut_a(space->n_pending_flushes == 0); |
1332 | |
1333 | for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain); |
1334 | fil_node != NULL; |
1335 | fil_node = UT_LIST_GET_NEXT(chain, fil_node)) { |
1336 | |
1337 | fil_node_close_to_free(fil_node, space); |
1338 | } |
1339 | |
1340 | if (space == fil_system.sys_space) { |
1341 | fil_system.sys_space = NULL; |
1342 | } else if (space == fil_system.temp_space) { |
1343 | fil_system.temp_space = NULL; |
1344 | } |
1345 | } |
1346 | |
1347 | /** Free a tablespace object on which fil_space_detach() was invoked. |
1348 | There must not be any pending i/o's or flushes on the files. |
1349 | @param[in,out] space tablespace */ |
1350 | static |
1351 | void |
1352 | fil_space_free_low( |
1353 | fil_space_t* space) |
1354 | { |
1355 | /* The tablespace must not be in fil_system.named_spaces. */ |
1356 | ut_ad(srv_fast_shutdown == 2 || !srv_was_started |
1357 | || space->max_lsn == 0); |
1358 | |
1359 | /* Wait for fil_space_t::release_for_io(); after |
1360 | fil_space_detach(), the tablespace cannot be found, so |
1361 | fil_space_acquire_for_io() would return NULL */ |
1362 | while (space->pending_io()) { |
1363 | os_thread_sleep(100); |
1364 | } |
1365 | |
1366 | for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
1367 | node != NULL; ) { |
1368 | ut_d(space->size -= node->size); |
1369 | os_event_destroy(node->sync_event); |
1370 | ut_free(node->name); |
1371 | fil_node_t* old_node = node; |
1372 | node = UT_LIST_GET_NEXT(chain, node); |
1373 | ut_free(old_node); |
1374 | } |
1375 | |
1376 | ut_ad(space->size == 0); |
1377 | |
1378 | rw_lock_free(&space->latch); |
1379 | fil_space_destroy_crypt_data(&space->crypt_data); |
1380 | |
1381 | ut_free(space->name); |
1382 | ut_free(space); |
1383 | } |
1384 | |
1385 | /** Frees a space object from the tablespace memory cache. |
1386 | Closes the files in the chain but does not delete them. |
1387 | There must not be any pending i/o's or flushes on the files. |
1388 | @param[in] id tablespace identifier |
1389 | @param[in] x_latched whether the caller holds X-mode space->latch |
1390 | @return true if success */ |
1391 | bool |
1392 | fil_space_free( |
1393 | ulint id, |
1394 | bool x_latched) |
1395 | { |
1396 | ut_ad(id != TRX_SYS_SPACE); |
1397 | |
1398 | mutex_enter(&fil_system.mutex); |
1399 | fil_space_t* space = fil_space_get_by_id(id); |
1400 | |
1401 | if (space != NULL) { |
1402 | fil_space_detach(space); |
1403 | } |
1404 | |
1405 | mutex_exit(&fil_system.mutex); |
1406 | |
1407 | if (space != NULL) { |
1408 | if (x_latched) { |
1409 | rw_lock_x_unlock(&space->latch); |
1410 | } |
1411 | |
1412 | bool need_mutex = !recv_recovery_on; |
1413 | |
1414 | if (need_mutex) { |
1415 | log_mutex_enter(); |
1416 | } |
1417 | |
1418 | ut_ad(log_mutex_own()); |
1419 | |
1420 | if (space->max_lsn != 0) { |
1421 | ut_d(space->max_lsn = 0); |
1422 | UT_LIST_REMOVE(fil_system.named_spaces, space); |
1423 | } |
1424 | |
1425 | if (need_mutex) { |
1426 | log_mutex_exit(); |
1427 | } |
1428 | |
1429 | fil_space_free_low(space); |
1430 | } |
1431 | |
1432 | return(space != NULL); |
1433 | } |
1434 | |
1435 | /** Create a space memory object and put it to the fil_system hash table. |
1436 | Error messages are issued to the server log. |
1437 | @param[in] name tablespace name |
1438 | @param[in] id tablespace identifier |
1439 | @param[in] flags tablespace flags |
1440 | @param[in] purpose tablespace purpose |
1441 | @param[in,out] crypt_data encryption information |
1442 | @param[in] mode encryption mode |
1443 | @return pointer to created tablespace, to be filled in with fil_node_create() |
1444 | @retval NULL on failure (such as when the same tablespace exists) */ |
1445 | fil_space_t* |
1446 | fil_space_create( |
1447 | const char* name, |
1448 | ulint id, |
1449 | ulint flags, |
1450 | fil_type_t purpose, |
1451 | fil_space_crypt_t* crypt_data, |
1452 | fil_encryption_t mode) |
1453 | { |
1454 | fil_space_t* space; |
1455 | |
1456 | ut_ad(fil_system.is_initialised()); |
1457 | ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id)); |
1458 | ut_ad(purpose == FIL_TYPE_LOG |
1459 | || srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0); |
1460 | |
1461 | DBUG_EXECUTE_IF("fil_space_create_failure" , return(NULL);); |
1462 | |
1463 | mutex_enter(&fil_system.mutex); |
1464 | |
1465 | space = fil_space_get_by_id(id); |
1466 | |
1467 | if (space != NULL) { |
1468 | ib::error() << "Trying to add tablespace '" << name |
1469 | << "' with id " << id |
1470 | << " to the tablespace memory cache, but tablespace '" |
1471 | << space->name << "' already exists in the cache!" ; |
1472 | mutex_exit(&fil_system.mutex); |
1473 | return(NULL); |
1474 | } |
1475 | |
1476 | space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space))); |
1477 | |
1478 | space->id = id; |
1479 | space->name = mem_strdup(name); |
1480 | |
1481 | UT_LIST_INIT(space->chain, &fil_node_t::chain); |
1482 | |
1483 | if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT) |
1484 | && !recv_recovery_on |
1485 | && id > fil_system.max_assigned_id) { |
1486 | |
1487 | if (!fil_system.space_id_reuse_warned) { |
1488 | fil_system.space_id_reuse_warned = true; |
1489 | |
1490 | ib::warn() << "Allocated tablespace ID " << id |
1491 | << " for " << name << ", old maximum was " |
1492 | << fil_system.max_assigned_id; |
1493 | } |
1494 | |
1495 | fil_system.max_assigned_id = id; |
1496 | } |
1497 | |
1498 | space->purpose = purpose; |
1499 | space->flags = flags; |
1500 | |
1501 | space->magic_n = FIL_SPACE_MAGIC_N; |
1502 | space->crypt_data = crypt_data; |
1503 | |
1504 | DBUG_LOG("tablespace" , |
1505 | "Created metadata for " << id << " name " << name); |
1506 | if (crypt_data) { |
1507 | DBUG_LOG("crypt" , |
1508 | "Tablespace " << id << " name " << name |
1509 | << " encryption " << crypt_data->encryption |
1510 | << " key id " << crypt_data->key_id |
1511 | << ":" << fil_crypt_get_mode(crypt_data) |
1512 | << " " << fil_crypt_get_type(crypt_data)); |
1513 | } |
1514 | |
1515 | rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); |
1516 | |
1517 | if (space->purpose == FIL_TYPE_TEMPORARY) { |
1518 | ut_d(space->latch.set_temp_fsp()); |
1519 | /* SysTablespace::open_or_create() would pass |
1520 | size!=0 to fil_node_create(), so first_time_open |
1521 | would not hold in fil_node_open_file(), and we |
1522 | must assign this manually. We do not care about |
1523 | the durability or atomicity of writes to the |
1524 | temporary tablespace files. */ |
1525 | space->atomic_write_supported = true; |
1526 | } |
1527 | |
1528 | HASH_INSERT(fil_space_t, hash, fil_system.spaces, id, space); |
1529 | |
1530 | UT_LIST_ADD_LAST(fil_system.space_list, space); |
1531 | |
1532 | if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system.max_assigned_id) { |
1533 | |
1534 | fil_system.max_assigned_id = id; |
1535 | } |
1536 | |
1537 | /* Inform key rotation that there could be something |
1538 | to do */ |
1539 | if (purpose == FIL_TYPE_TABLESPACE |
1540 | && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event && |
1541 | (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || |
1542 | srv_encrypt_tables)) { |
1543 | /* Key rotation is not enabled, need to inform background |
1544 | encryption threads. */ |
1545 | UT_LIST_ADD_LAST(fil_system.rotation_list, space); |
1546 | space->is_in_rotation_list = true; |
1547 | mutex_exit(&fil_system.mutex); |
1548 | mutex_enter(&fil_crypt_threads_mutex); |
1549 | os_event_set(fil_crypt_threads_event); |
1550 | mutex_exit(&fil_crypt_threads_mutex); |
1551 | } else { |
1552 | mutex_exit(&fil_system.mutex); |
1553 | } |
1554 | |
1555 | return(space); |
1556 | } |
1557 | |
1558 | /*******************************************************************//** |
1559 | Assigns a new space id for a new single-table tablespace. This works simply by |
1560 | incrementing the global counter. If 4 billion id's is not enough, we may need |
1561 | to recycle id's. |
1562 | @return true if assigned, false if not */ |
1563 | bool |
1564 | fil_assign_new_space_id( |
1565 | /*====================*/ |
1566 | ulint* space_id) /*!< in/out: space id */ |
1567 | { |
1568 | ulint id; |
1569 | bool success; |
1570 | |
1571 | mutex_enter(&fil_system.mutex); |
1572 | |
1573 | id = *space_id; |
1574 | |
1575 | if (id < fil_system.max_assigned_id) { |
1576 | id = fil_system.max_assigned_id; |
1577 | } |
1578 | |
1579 | id++; |
1580 | |
1581 | if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { |
1582 | ib::warn() << "You are running out of new single-table" |
1583 | " tablespace id's. Current counter is " << id |
1584 | << " and it must not exceed" << SRV_LOG_SPACE_FIRST_ID |
1585 | << "! To reset the counter to zero you have to dump" |
1586 | " all your tables and recreate the whole InnoDB" |
1587 | " installation." ; |
1588 | } |
1589 | |
1590 | success = (id < SRV_LOG_SPACE_FIRST_ID); |
1591 | |
1592 | if (success) { |
1593 | *space_id = fil_system.max_assigned_id = id; |
1594 | } else { |
1595 | ib::warn() << "You have run out of single-table tablespace" |
1596 | " id's! Current counter is " << id |
1597 | << ". To reset the counter to zero" |
1598 | " you have to dump all your tables and" |
1599 | " recreate the whole InnoDB installation." ; |
1600 | *space_id = ULINT_UNDEFINED; |
1601 | } |
1602 | |
1603 | mutex_exit(&fil_system.mutex); |
1604 | |
1605 | return(success); |
1606 | } |
1607 | |
1608 | /*******************************************************************//** |
1609 | Returns a pointer to the fil_space_t that is in the memory cache |
1610 | associated with a space id. The caller must lock fil_system.mutex. |
1611 | @return file_space_t pointer, NULL if space not found */ |
1612 | UNIV_INLINE |
1613 | fil_space_t* |
1614 | fil_space_get_space( |
1615 | /*================*/ |
1616 | ulint id) /*!< in: space id */ |
1617 | { |
1618 | fil_space_t* space; |
1619 | fil_node_t* node; |
1620 | |
1621 | ut_ad(fil_system.is_initialised()); |
1622 | |
1623 | space = fil_space_get_by_id(id); |
1624 | if (space == NULL || space->size != 0) { |
1625 | return(space); |
1626 | } |
1627 | |
1628 | switch (space->purpose) { |
1629 | case FIL_TYPE_LOG: |
1630 | break; |
1631 | case FIL_TYPE_TEMPORARY: |
1632 | case FIL_TYPE_TABLESPACE: |
1633 | case FIL_TYPE_IMPORT: |
1634 | ut_a(id != 0); |
1635 | |
1636 | mutex_exit(&fil_system.mutex); |
1637 | |
1638 | /* It is possible that the space gets evicted at this point |
1639 | before the fil_mutex_enter_and_prepare_for_io() acquires |
1640 | the fil_system.mutex. Check for this after completing the |
1641 | call to fil_mutex_enter_and_prepare_for_io(). */ |
1642 | fil_mutex_enter_and_prepare_for_io(id); |
1643 | |
1644 | /* We are still holding the fil_system.mutex. Check if |
1645 | the space is still in memory cache. */ |
1646 | space = fil_space_get_by_id(id); |
1647 | |
1648 | if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) { |
1649 | return(NULL); |
1650 | } |
1651 | |
1652 | /* The following code must change when InnoDB supports |
1653 | multiple datafiles per tablespace. */ |
1654 | ut_a(1 == UT_LIST_GET_LEN(space->chain)); |
1655 | |
1656 | node = UT_LIST_GET_FIRST(space->chain); |
1657 | |
1658 | /* It must be a single-table tablespace and we have not opened |
1659 | the file yet; the following calls will open it and update the |
1660 | size fields */ |
1661 | |
1662 | if (!fil_node_prepare_for_io(node, space)) { |
1663 | /* The single-table tablespace can't be opened, |
1664 | because the ibd file is missing. */ |
1665 | return(NULL); |
1666 | } |
1667 | |
1668 | fil_node_complete_io(node, IORequestRead); |
1669 | } |
1670 | |
1671 | return(space); |
1672 | } |
1673 | |
1674 | /** Set the recovered size of a tablespace in pages. |
1675 | @param id tablespace ID |
1676 | @param size recovered size in pages */ |
1677 | UNIV_INTERN |
1678 | void |
1679 | fil_space_set_recv_size(ulint id, ulint size) |
1680 | { |
1681 | mutex_enter(&fil_system.mutex); |
1682 | ut_ad(size); |
1683 | ut_ad(id < SRV_LOG_SPACE_FIRST_ID); |
1684 | |
1685 | if (fil_space_t* space = fil_space_get_space(id)) { |
1686 | space->recv_size = size; |
1687 | } |
1688 | |
1689 | mutex_exit(&fil_system.mutex); |
1690 | } |
1691 | |
1692 | /*******************************************************************//** |
1693 | Returns the size of the space in pages. The tablespace must be cached in the |
1694 | memory cache. |
1695 | @return space size, 0 if space not found */ |
1696 | ulint |
1697 | fil_space_get_size( |
1698 | /*===============*/ |
1699 | ulint id) /*!< in: space id */ |
1700 | { |
1701 | fil_space_t* space; |
1702 | ulint size; |
1703 | |
1704 | ut_ad(fil_system.is_initialised()); |
1705 | mutex_enter(&fil_system.mutex); |
1706 | |
1707 | space = fil_space_get_space(id); |
1708 | |
1709 | size = space ? space->size : 0; |
1710 | |
1711 | mutex_exit(&fil_system.mutex); |
1712 | |
1713 | return(size); |
1714 | } |
1715 | |
1716 | /*******************************************************************//** |
1717 | Returns the flags of the space. The tablespace must be cached |
1718 | in the memory cache. |
1719 | @return flags, ULINT_UNDEFINED if space not found */ |
1720 | ulint |
1721 | fil_space_get_flags( |
1722 | /*================*/ |
1723 | ulint id) /*!< in: space id */ |
1724 | { |
1725 | fil_space_t* space; |
1726 | ulint flags; |
1727 | |
1728 | ut_ad(fil_system.is_initialised()); |
1729 | |
1730 | mutex_enter(&fil_system.mutex); |
1731 | |
1732 | space = fil_space_get_space(id); |
1733 | |
1734 | if (space == NULL) { |
1735 | mutex_exit(&fil_system.mutex); |
1736 | |
1737 | return(ULINT_UNDEFINED); |
1738 | } |
1739 | |
1740 | flags = space->flags; |
1741 | |
1742 | mutex_exit(&fil_system.mutex); |
1743 | |
1744 | return(flags); |
1745 | } |
1746 | |
1747 | /** Open each file. Only invoked on fil_system.temp_space. |
1748 | @return whether all files were opened */ |
1749 | bool fil_space_t::open() |
1750 | { |
1751 | ut_ad(fil_system.is_initialised()); |
1752 | |
1753 | mutex_enter(&fil_system.mutex); |
1754 | ut_ad(this == fil_system.temp_space |
1755 | || srv_operation == SRV_OPERATION_BACKUP |
1756 | || srv_operation == SRV_OPERATION_RESTORE |
1757 | || srv_operation == SRV_OPERATION_RESTORE_DELTA); |
1758 | |
1759 | for (fil_node_t* node = UT_LIST_GET_FIRST(chain); |
1760 | node != NULL; |
1761 | node = UT_LIST_GET_NEXT(chain, node)) { |
1762 | if (!node->is_open() && !fil_node_open_file(node)) { |
1763 | mutex_exit(&fil_system.mutex); |
1764 | return false; |
1765 | } |
1766 | } |
1767 | |
1768 | mutex_exit(&fil_system.mutex); |
1769 | return true; |
1770 | } |
1771 | |
1772 | /** Close each file. Only invoked on fil_system.temp_space. */ |
1773 | void fil_space_t::close() |
1774 | { |
1775 | if (!fil_system.is_initialised()) { |
1776 | return; |
1777 | } |
1778 | |
1779 | mutex_enter(&fil_system.mutex); |
1780 | ut_ad(this == fil_system.temp_space |
1781 | || srv_operation == SRV_OPERATION_BACKUP |
1782 | || srv_operation == SRV_OPERATION_RESTORE |
1783 | || srv_operation == SRV_OPERATION_RESTORE_DELTA); |
1784 | |
1785 | for (fil_node_t* node = UT_LIST_GET_FIRST(chain); |
1786 | node != NULL; |
1787 | node = UT_LIST_GET_NEXT(chain, node)) { |
1788 | if (node->is_open()) { |
1789 | fil_node_close_file(node); |
1790 | } |
1791 | } |
1792 | |
1793 | mutex_exit(&fil_system.mutex); |
1794 | } |
1795 | |
1796 | /** Returns the page size of the space and whether it is compressed or not. |
1797 | The tablespace must be cached in the memory cache. |
1798 | @param[in] id space id |
1799 | @param[out] found true if tablespace was found |
1800 | @return page size */ |
1801 | const page_size_t |
1802 | fil_space_get_page_size( |
1803 | ulint id, |
1804 | bool* found) |
1805 | { |
1806 | const ulint flags = fil_space_get_flags(id); |
1807 | |
1808 | if (flags == ULINT_UNDEFINED) { |
1809 | *found = false; |
1810 | return(univ_page_size); |
1811 | } |
1812 | |
1813 | *found = true; |
1814 | |
1815 | return(page_size_t(flags)); |
1816 | } |
1817 | |
1818 | void fil_system_t::create(ulint hash_size) |
1819 | { |
1820 | ut_ad(this == &fil_system); |
1821 | ut_ad(!is_initialised()); |
1822 | ut_ad(!(srv_page_size % FSP_EXTENT_SIZE)); |
1823 | ut_ad(srv_page_size); |
1824 | ut_ad(!spaces); |
1825 | |
1826 | m_initialised = true; |
1827 | |
1828 | compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX)); |
1829 | compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN)); |
1830 | |
1831 | ut_ad(hash_size > 0); |
1832 | |
1833 | mutex_create(LATCH_ID_FIL_SYSTEM, &mutex); |
1834 | |
1835 | spaces = hash_create(hash_size); |
1836 | |
1837 | fil_space_crypt_init(); |
1838 | } |
1839 | |
1840 | void fil_system_t::close() |
1841 | { |
1842 | ut_ad(this == &fil_system); |
1843 | ut_a(!UT_LIST_GET_LEN(LRU)); |
1844 | ut_a(!UT_LIST_GET_LEN(unflushed_spaces)); |
1845 | ut_a(!UT_LIST_GET_LEN(space_list)); |
1846 | ut_ad(!sys_space); |
1847 | ut_ad(!temp_space); |
1848 | |
1849 | if (is_initialised()) { |
1850 | m_initialised = false; |
1851 | hash_table_free(spaces); |
1852 | spaces = NULL; |
1853 | mutex_free(&mutex); |
1854 | fil_space_crypt_cleanup(); |
1855 | } |
1856 | |
1857 | ut_ad(!spaces); |
1858 | } |
1859 | |
1860 | /*******************************************************************//** |
1861 | Opens all log files and system tablespace data files. They stay open until the |
1862 | database server shutdown. This should be called at a server startup after the |
1863 | space objects for the log and the system tablespace have been created. The |
1864 | purpose of this operation is to make sure we never run out of file descriptors |
1865 | if we need to read from the insert buffer or to write to the log. */ |
1866 | void |
1867 | fil_open_log_and_system_tablespace_files(void) |
1868 | /*==========================================*/ |
1869 | { |
1870 | fil_space_t* space; |
1871 | |
1872 | mutex_enter(&fil_system.mutex); |
1873 | |
1874 | for (space = UT_LIST_GET_FIRST(fil_system.space_list); |
1875 | space != NULL; |
1876 | space = UT_LIST_GET_NEXT(space_list, space)) { |
1877 | |
1878 | fil_node_t* node; |
1879 | |
1880 | if (fil_space_belongs_in_lru(space)) { |
1881 | |
1882 | continue; |
1883 | } |
1884 | |
1885 | for (node = UT_LIST_GET_FIRST(space->chain); |
1886 | node != NULL; |
1887 | node = UT_LIST_GET_NEXT(chain, node)) { |
1888 | |
1889 | if (!node->is_open()) { |
1890 | if (!fil_node_open_file(node)) { |
1891 | /* This func is called during server's |
1892 | startup. If some file of log or system |
1893 | tablespace is missing, the server |
1894 | can't start successfully. So we should |
1895 | assert for it. */ |
1896 | ut_a(0); |
1897 | } |
1898 | } |
1899 | |
1900 | if (srv_max_n_open_files < 10 + fil_system.n_open) { |
1901 | |
1902 | ib::warn() << "You must raise the value of" |
1903 | " innodb_open_files in my.cnf!" |
1904 | " Remember that InnoDB keeps all" |
1905 | " log files and all system" |
1906 | " tablespace files open" |
1907 | " for the whole time mysqld is" |
1908 | " running, and needs to open also" |
1909 | " some .ibd files if the" |
1910 | " file-per-table storage model is used." |
1911 | " Current open files " |
1912 | << fil_system.n_open |
1913 | << ", max allowed open files " |
1914 | << srv_max_n_open_files |
1915 | << "." ; |
1916 | } |
1917 | } |
1918 | } |
1919 | |
1920 | mutex_exit(&fil_system.mutex); |
1921 | } |
1922 | |
1923 | /*******************************************************************//** |
1924 | Closes all open files. There must not be any pending i/o's or not flushed |
1925 | modifications in the files. */ |
1926 | void |
1927 | fil_close_all_files(void) |
1928 | /*=====================*/ |
1929 | { |
1930 | fil_space_t* space; |
1931 | |
1932 | /* At shutdown, we should not have any files in this list. */ |
1933 | ut_ad(fil_system.is_initialised()); |
1934 | ut_ad(srv_fast_shutdown == 2 |
1935 | || !srv_was_started |
1936 | || UT_LIST_GET_LEN(fil_system.named_spaces) == 0); |
1937 | |
1938 | mutex_enter(&fil_system.mutex); |
1939 | |
1940 | for (space = UT_LIST_GET_FIRST(fil_system.space_list); |
1941 | space != NULL; ) { |
1942 | fil_node_t* node; |
1943 | fil_space_t* prev_space = space; |
1944 | |
1945 | for (node = UT_LIST_GET_FIRST(space->chain); |
1946 | node != NULL; |
1947 | node = UT_LIST_GET_NEXT(chain, node)) { |
1948 | |
1949 | if (node->is_open()) { |
1950 | fil_node_close_file(node); |
1951 | } |
1952 | } |
1953 | |
1954 | space = UT_LIST_GET_NEXT(space_list, space); |
1955 | fil_space_detach(prev_space); |
1956 | fil_space_free_low(prev_space); |
1957 | } |
1958 | |
1959 | mutex_exit(&fil_system.mutex); |
1960 | |
1961 | ut_ad(srv_fast_shutdown == 2 |
1962 | || !srv_was_started |
1963 | || UT_LIST_GET_LEN(fil_system.named_spaces) == 0); |
1964 | } |
1965 | |
1966 | /*******************************************************************//** |
1967 | Closes the redo log files. There must not be any pending i/o's or not |
1968 | flushed modifications in the files. */ |
1969 | void |
1970 | fil_close_log_files( |
1971 | /*================*/ |
1972 | bool free) /*!< in: whether to free the memory object */ |
1973 | { |
1974 | fil_space_t* space; |
1975 | |
1976 | mutex_enter(&fil_system.mutex); |
1977 | |
1978 | space = UT_LIST_GET_FIRST(fil_system.space_list); |
1979 | |
1980 | while (space != NULL) { |
1981 | fil_node_t* node; |
1982 | fil_space_t* prev_space = space; |
1983 | |
1984 | if (space->purpose != FIL_TYPE_LOG) { |
1985 | space = UT_LIST_GET_NEXT(space_list, space); |
1986 | continue; |
1987 | } |
1988 | |
1989 | /* Log files are not in the fil_system.named_spaces list. */ |
1990 | ut_ad(space->max_lsn == 0); |
1991 | |
1992 | for (node = UT_LIST_GET_FIRST(space->chain); |
1993 | node != NULL; |
1994 | node = UT_LIST_GET_NEXT(chain, node)) { |
1995 | |
1996 | if (node->is_open()) { |
1997 | fil_node_close_file(node); |
1998 | } |
1999 | } |
2000 | |
2001 | space = UT_LIST_GET_NEXT(space_list, space); |
2002 | |
2003 | if (free) { |
2004 | fil_space_detach(prev_space); |
2005 | fil_space_free_low(prev_space); |
2006 | } |
2007 | } |
2008 | |
2009 | mutex_exit(&fil_system.mutex); |
2010 | |
2011 | if (free) { |
2012 | log_sys.log.close(); |
2013 | } |
2014 | } |
2015 | |
2016 | /*******************************************************************//** |
2017 | Sets the max tablespace id counter if the given number is bigger than the |
2018 | previous value. */ |
2019 | void |
2020 | fil_set_max_space_id_if_bigger( |
2021 | /*===========================*/ |
2022 | ulint max_id) /*!< in: maximum known id */ |
2023 | { |
2024 | if (max_id >= SRV_LOG_SPACE_FIRST_ID) { |
2025 | ib::fatal() << "Max tablespace id is too high, " << max_id; |
2026 | } |
2027 | |
2028 | mutex_enter(&fil_system.mutex); |
2029 | |
2030 | if (fil_system.max_assigned_id < max_id) { |
2031 | |
2032 | fil_system.max_assigned_id = max_id; |
2033 | } |
2034 | |
2035 | mutex_exit(&fil_system.mutex); |
2036 | } |
2037 | |
2038 | /** Write the flushed LSN to the page header of the first page in the |
2039 | system tablespace. |
2040 | @param[in] lsn flushed LSN |
2041 | @return DB_SUCCESS or error number */ |
2042 | dberr_t |
2043 | fil_write_flushed_lsn( |
2044 | lsn_t lsn) |
2045 | { |
2046 | byte* buf1; |
2047 | byte* buf; |
2048 | dberr_t err = DB_TABLESPACE_NOT_FOUND; |
2049 | |
2050 | buf1 = static_cast<byte*>(ut_malloc_nokey(2U << srv_page_size_shift)); |
2051 | buf = static_cast<byte*>(ut_align(buf1, srv_page_size)); |
2052 | |
2053 | const page_id_t page_id(TRX_SYS_SPACE, 0); |
2054 | |
2055 | err = fil_read(page_id, univ_page_size, 0, srv_page_size, |
2056 | buf); |
2057 | |
2058 | if (err == DB_SUCCESS) { |
2059 | mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn); |
2060 | err = fil_write(page_id, univ_page_size, 0, |
2061 | srv_page_size, buf); |
2062 | fil_flush_file_spaces(FIL_TYPE_TABLESPACE); |
2063 | } |
2064 | |
2065 | ut_free(buf1); |
2066 | return(err); |
2067 | } |
2068 | |
2069 | /** Acquire a tablespace when it could be dropped concurrently. |
2070 | Used by background threads that do not necessarily hold proper locks |
2071 | for concurrency control. |
2072 | @param[in] id tablespace ID |
2073 | @param[in] silent whether to silently ignore missing tablespaces |
2074 | @return the tablespace |
2075 | @retval NULL if missing or being deleted or truncated */ |
2076 | UNIV_INTERN |
2077 | fil_space_t* |
2078 | fil_space_acquire_low(ulint id, bool silent) |
2079 | { |
2080 | fil_space_t* space; |
2081 | |
2082 | mutex_enter(&fil_system.mutex); |
2083 | |
2084 | space = fil_space_get_by_id(id); |
2085 | |
2086 | if (space == NULL) { |
2087 | if (!silent) { |
2088 | ib::warn() << "Trying to access missing" |
2089 | " tablespace " << id; |
2090 | } |
2091 | } else if (space->is_stopping()) { |
2092 | space = NULL; |
2093 | } else { |
2094 | space->acquire(); |
2095 | } |
2096 | |
2097 | mutex_exit(&fil_system.mutex); |
2098 | |
2099 | return(space); |
2100 | } |
2101 | |
2102 | /** Acquire a tablespace for reading or writing a block, |
2103 | when it could be dropped concurrently. |
2104 | @param[in] id tablespace ID |
2105 | @return the tablespace |
2106 | @retval NULL if missing */ |
2107 | fil_space_t* |
2108 | fil_space_acquire_for_io(ulint id) |
2109 | { |
2110 | mutex_enter(&fil_system.mutex); |
2111 | |
2112 | fil_space_t* space = fil_space_get_by_id(id); |
2113 | |
2114 | if (space) { |
2115 | space->acquire_for_io(); |
2116 | } |
2117 | |
2118 | mutex_exit(&fil_system.mutex); |
2119 | |
2120 | return(space); |
2121 | } |
2122 | |
2123 | /********************************************************//** |
2124 | Creates the database directory for a table if it does not exist yet. */ |
2125 | void |
2126 | fil_create_directory_for_tablename( |
2127 | /*===============================*/ |
2128 | const char* name) /*!< in: name in the standard |
2129 | 'databasename/tablename' format */ |
2130 | { |
2131 | const char* namend; |
2132 | char* path; |
2133 | ulint len; |
2134 | |
2135 | len = strlen(fil_path_to_mysql_datadir); |
2136 | namend = strchr(name, '/'); |
2137 | ut_a(namend); |
2138 | path = static_cast<char*>( |
2139 | ut_malloc_nokey(len + ulint(namend - name) + 2)); |
2140 | |
2141 | memcpy(path, fil_path_to_mysql_datadir, len); |
2142 | path[len] = '/'; |
2143 | memcpy(path + len + 1, name, ulint(namend - name)); |
2144 | path[len + ulint(namend - name) + 1] = 0; |
2145 | |
2146 | os_normalize_path(path); |
2147 | |
2148 | bool success = os_file_create_directory(path, false); |
2149 | ut_a(success); |
2150 | |
2151 | ut_free(path); |
2152 | } |
2153 | |
2154 | /** Write a log record about an operation on a tablespace file. |
2155 | @param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE |
2156 | or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2 |
2157 | @param[in] space_id tablespace identifier |
2158 | @param[in] first_page_no first page number in the file |
2159 | @param[in] path file path |
2160 | @param[in] new_path if type is MLOG_FILE_RENAME2, the new name |
2161 | @param[in] flags if type is MLOG_FILE_CREATE2, the space flags |
2162 | @param[in,out] mtr mini-transaction */ |
2163 | static |
2164 | void |
2165 | fil_op_write_log( |
2166 | mlog_id_t type, |
2167 | ulint space_id, |
2168 | ulint first_page_no, |
2169 | const char* path, |
2170 | const char* new_path, |
2171 | ulint flags, |
2172 | mtr_t* mtr) |
2173 | { |
2174 | byte* log_ptr; |
2175 | ulint len; |
2176 | |
2177 | ut_ad(first_page_no == 0); |
2178 | ut_ad(fsp_flags_is_valid(flags, space_id)); |
2179 | |
2180 | /* fil_name_parse() requires that there be at least one path |
2181 | separator and that the file path end with ".ibd". */ |
2182 | ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL); |
2183 | ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD) == 0); |
2184 | |
2185 | log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1); |
2186 | |
2187 | if (log_ptr == NULL) { |
2188 | /* Logging in mtr is switched off during crash recovery: |
2189 | in that case mlog_open returns NULL */ |
2190 | return; |
2191 | } |
2192 | |
2193 | log_ptr = mlog_write_initial_log_record_low( |
2194 | type, space_id, first_page_no, log_ptr, mtr); |
2195 | |
2196 | if (type == MLOG_FILE_CREATE2) { |
2197 | mach_write_to_4(log_ptr, flags); |
2198 | log_ptr += 4; |
2199 | } |
2200 | |
2201 | /* Let us store the strings as null-terminated for easier readability |
2202 | and handling */ |
2203 | |
2204 | len = strlen(path) + 1; |
2205 | |
2206 | mach_write_to_2(log_ptr, len); |
2207 | log_ptr += 2; |
2208 | mlog_close(mtr, log_ptr); |
2209 | |
2210 | mlog_catenate_string( |
2211 | mtr, reinterpret_cast<const byte*>(path), len); |
2212 | |
2213 | switch (type) { |
2214 | case MLOG_FILE_RENAME2: |
2215 | ut_ad(strchr(new_path, OS_PATH_SEPARATOR) != NULL); |
2216 | len = strlen(new_path) + 1; |
2217 | log_ptr = mlog_open(mtr, 2 + len); |
2218 | ut_a(log_ptr); |
2219 | mach_write_to_2(log_ptr, len); |
2220 | log_ptr += 2; |
2221 | mlog_close(mtr, log_ptr); |
2222 | |
2223 | mlog_catenate_string( |
2224 | mtr, reinterpret_cast<const byte*>(new_path), len); |
2225 | break; |
2226 | case MLOG_FILE_NAME: |
2227 | case MLOG_FILE_DELETE: |
2228 | case MLOG_FILE_CREATE2: |
2229 | break; |
2230 | default: |
2231 | ut_ad(0); |
2232 | } |
2233 | } |
2234 | |
2235 | /** Write redo log for renaming a file. |
2236 | @param[in] space_id tablespace id |
2237 | @param[in] first_page_no first page number in the file |
2238 | @param[in] old_name tablespace file name |
2239 | @param[in] new_name tablespace file name after renaming |
2240 | @param[in,out] mtr mini-transaction */ |
2241 | static |
2242 | void |
2243 | fil_name_write_rename_low( |
2244 | ulint space_id, |
2245 | ulint first_page_no, |
2246 | const char* old_name, |
2247 | const char* new_name, |
2248 | mtr_t* mtr) |
2249 | { |
2250 | ut_ad(!is_predefined_tablespace(space_id)); |
2251 | |
2252 | fil_op_write_log( |
2253 | MLOG_FILE_RENAME2, |
2254 | space_id, first_page_no, old_name, new_name, 0, mtr); |
2255 | } |
2256 | |
2257 | /** Write redo log for renaming a file. |
2258 | @param[in] space_id tablespace id |
2259 | @param[in] old_name tablespace file name |
2260 | @param[in] new_name tablespace file name after renaming */ |
2261 | static void |
2262 | fil_name_write_rename( |
2263 | ulint space_id, |
2264 | const char* old_name, |
2265 | const char* new_name) |
2266 | { |
2267 | mtr_t mtr; |
2268 | mtr.start(); |
2269 | fil_name_write_rename_low(space_id, 0, old_name, new_name, &mtr); |
2270 | mtr.commit(); |
2271 | log_write_up_to(mtr.commit_lsn(), true); |
2272 | } |
2273 | |
2274 | /** Write MLOG_FILE_NAME for a file. |
2275 | @param[in] space_id tablespace id |
2276 | @param[in] first_page_no first page number in the file |
2277 | @param[in] name tablespace file name |
2278 | @param[in,out] mtr mini-transaction */ |
2279 | static |
2280 | void |
2281 | fil_name_write( |
2282 | ulint space_id, |
2283 | ulint first_page_no, |
2284 | const char* name, |
2285 | mtr_t* mtr) |
2286 | { |
2287 | fil_op_write_log( |
2288 | MLOG_FILE_NAME, space_id, first_page_no, name, NULL, 0, mtr); |
2289 | } |
2290 | /** Write MLOG_FILE_NAME for a file. |
2291 | @param[in] space tablespace |
2292 | @param[in] first_page_no first page number in the file |
2293 | @param[in] file tablespace file |
2294 | @param[in,out] mtr mini-transaction */ |
2295 | static |
2296 | void |
2297 | fil_name_write( |
2298 | const fil_space_t* space, |
2299 | ulint first_page_no, |
2300 | const fil_node_t* file, |
2301 | mtr_t* mtr) |
2302 | { |
2303 | fil_name_write(space->id, first_page_no, file->name, mtr); |
2304 | } |
2305 | |
2306 | /** Replay a file rename operation if possible. |
2307 | @param[in] space_id tablespace identifier |
2308 | @param[in] first_page_no first page number in the file |
2309 | @param[in] name old file name |
2310 | @param[in] new_name new file name |
2311 | @return whether the operation was successfully applied |
2312 | (the name did not exist, or new_name did not exist and |
2313 | name was successfully renamed to new_name) */ |
2314 | bool |
2315 | fil_op_replay_rename( |
2316 | ulint space_id, |
2317 | ulint first_page_no, |
2318 | const char* name, |
2319 | const char* new_name) |
2320 | { |
2321 | ut_ad(first_page_no == 0); |
2322 | |
2323 | /* In order to replay the rename, the following must hold: |
2324 | * The new name is not already used. |
2325 | * A tablespace exists with the old name. |
2326 | * The space ID for that tablepace matches this log entry. |
2327 | This will prevent unintended renames during recovery. */ |
2328 | fil_space_t* space = fil_space_get(space_id); |
2329 | |
2330 | if (space == NULL) { |
2331 | return(true); |
2332 | } |
2333 | |
2334 | const bool name_match |
2335 | = strcmp(name, UT_LIST_GET_FIRST(space->chain)->name) == 0; |
2336 | |
2337 | if (!name_match) { |
2338 | return(true); |
2339 | } |
2340 | |
2341 | /* Create the database directory for the new name, if |
2342 | it does not exist yet */ |
2343 | |
2344 | const char* namend = strrchr(new_name, OS_PATH_SEPARATOR); |
2345 | ut_a(namend != NULL); |
2346 | |
2347 | char* dir = static_cast<char*>( |
2348 | ut_malloc_nokey(ulint(namend - new_name) + 1)); |
2349 | |
2350 | memcpy(dir, new_name, ulint(namend - new_name)); |
2351 | dir[namend - new_name] = '\0'; |
2352 | |
2353 | bool success = os_file_create_directory(dir, false); |
2354 | ut_a(success); |
2355 | |
2356 | ulint dirlen = 0; |
2357 | |
2358 | if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) { |
2359 | dirlen = ulint(dirend - dir) + 1; |
2360 | } |
2361 | |
2362 | ut_free(dir); |
2363 | |
2364 | /* New path must not exist. */ |
2365 | dberr_t err = fil_rename_tablespace_check( |
2366 | name, new_name, false); |
2367 | if (err != DB_SUCCESS) { |
2368 | ib::error() << " Cannot replay file rename." |
2369 | " Remove either file and try again." ; |
2370 | return(false); |
2371 | } |
2372 | |
2373 | char* new_table = mem_strdupl( |
2374 | new_name + dirlen, |
2375 | strlen(new_name + dirlen) |
2376 | - 4 /* remove ".ibd" */); |
2377 | |
2378 | ut_ad(new_table[ulint(namend - new_name) - dirlen] |
2379 | == OS_PATH_SEPARATOR); |
2380 | #if OS_PATH_SEPARATOR != '/' |
2381 | new_table[namend - new_name - dirlen] = '/'; |
2382 | #endif |
2383 | |
2384 | if (!fil_rename_tablespace( |
2385 | space_id, name, new_table, new_name)) { |
2386 | ut_error; |
2387 | } |
2388 | |
2389 | ut_free(new_table); |
2390 | return(true); |
2391 | } |
2392 | |
2393 | /** File operations for tablespace */ |
2394 | enum fil_operation_t { |
2395 | FIL_OPERATION_DELETE, /*!< delete a single-table tablespace */ |
2396 | FIL_OPERATION_CLOSE, /*!< close a single-table tablespace */ |
2397 | FIL_OPERATION_TRUNCATE /*!< truncate a single-table tablespace */ |
2398 | }; |
2399 | |
2400 | /** Check for pending operations. |
2401 | @param[in] space tablespace |
2402 | @param[in] count number of attempts so far |
2403 | @return 0 if no operations else count + 1. */ |
2404 | static |
2405 | ulint |
2406 | fil_check_pending_ops(const fil_space_t* space, ulint count) |
2407 | { |
2408 | ut_ad(mutex_own(&fil_system.mutex)); |
2409 | |
2410 | if (space == NULL) { |
2411 | return 0; |
2412 | } |
2413 | |
2414 | if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) { |
2415 | |
2416 | if (count > 5000) { |
2417 | ib::warn() << "Trying to close/delete/truncate" |
2418 | " tablespace '" << space->name |
2419 | << "' but there are " << n_pending_ops |
2420 | << " pending operations on it." ; |
2421 | } |
2422 | |
2423 | return(count + 1); |
2424 | } |
2425 | |
2426 | return(0); |
2427 | } |
2428 | |
2429 | /*******************************************************************//** |
2430 | Check for pending IO. |
2431 | @return 0 if no pending else count + 1. */ |
2432 | static |
2433 | ulint |
2434 | fil_check_pending_io( |
2435 | /*=================*/ |
2436 | fil_operation_t operation, /*!< in: File operation */ |
2437 | fil_space_t* space, /*!< in/out: Tablespace to check */ |
2438 | fil_node_t** node, /*!< out: Node in space list */ |
2439 | ulint count) /*!< in: number of attempts so far */ |
2440 | { |
2441 | ut_ad(mutex_own(&fil_system.mutex)); |
2442 | ut_ad(!space->referenced()); |
2443 | |
2444 | switch (operation) { |
2445 | case FIL_OPERATION_DELETE: |
2446 | case FIL_OPERATION_CLOSE: |
2447 | break; |
2448 | case FIL_OPERATION_TRUNCATE: |
2449 | space->is_being_truncated = true; |
2450 | break; |
2451 | } |
2452 | |
2453 | /* The following code must change when InnoDB supports |
2454 | multiple datafiles per tablespace. */ |
2455 | ut_a(UT_LIST_GET_LEN(space->chain) == 1); |
2456 | |
2457 | *node = UT_LIST_GET_FIRST(space->chain); |
2458 | |
2459 | if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) { |
2460 | |
2461 | ut_a(!(*node)->being_extended); |
2462 | |
2463 | if (count > 1000) { |
2464 | ib::warn() << "Trying to delete/close/truncate" |
2465 | " tablespace '" << space->name |
2466 | << "' but there are " |
2467 | << space->n_pending_flushes |
2468 | << " flushes and " << (*node)->n_pending |
2469 | << " pending i/o's on it." ; |
2470 | } |
2471 | |
2472 | return(count + 1); |
2473 | } |
2474 | |
2475 | return(0); |
2476 | } |
2477 | |
2478 | /*******************************************************************//** |
2479 | Check pending operations on a tablespace. |
2480 | @return DB_SUCCESS or error failure. */ |
2481 | static |
2482 | dberr_t |
2483 | fil_check_pending_operations( |
2484 | /*=========================*/ |
2485 | ulint id, /*!< in: space id */ |
2486 | fil_operation_t operation, /*!< in: File operation */ |
2487 | fil_space_t** space, /*!< out: tablespace instance |
2488 | in memory */ |
2489 | char** path) /*!< out/own: tablespace path */ |
2490 | { |
2491 | ulint count = 0; |
2492 | |
2493 | ut_a(!is_system_tablespace(id)); |
2494 | ut_ad(space); |
2495 | |
2496 | *space = 0; |
2497 | |
2498 | mutex_enter(&fil_system.mutex); |
2499 | fil_space_t* sp = fil_space_get_by_id(id); |
2500 | |
2501 | if (sp) { |
2502 | sp->stop_new_ops = true; |
2503 | if (sp->crypt_data) { |
2504 | sp->acquire(); |
2505 | mutex_exit(&fil_system.mutex); |
2506 | fil_space_crypt_close_tablespace(sp); |
2507 | mutex_enter(&fil_system.mutex); |
2508 | sp->release(); |
2509 | } |
2510 | } |
2511 | |
2512 | /* Check for pending operations. */ |
2513 | |
2514 | do { |
2515 | sp = fil_space_get_by_id(id); |
2516 | |
2517 | count = fil_check_pending_ops(sp, count); |
2518 | |
2519 | mutex_exit(&fil_system.mutex); |
2520 | |
2521 | if (count > 0) { |
2522 | os_thread_sleep(20000); |
2523 | } |
2524 | |
2525 | mutex_enter(&fil_system.mutex); |
2526 | } while (count > 0); |
2527 | |
2528 | /* Check for pending IO. */ |
2529 | |
2530 | *path = 0; |
2531 | |
2532 | for (;;) { |
2533 | sp = fil_space_get_by_id(id); |
2534 | |
2535 | if (sp == NULL) { |
2536 | mutex_exit(&fil_system.mutex); |
2537 | return(DB_TABLESPACE_NOT_FOUND); |
2538 | } |
2539 | |
2540 | fil_node_t* node; |
2541 | |
2542 | count = fil_check_pending_io(operation, sp, &node, count); |
2543 | |
2544 | if (count == 0) { |
2545 | *path = mem_strdup(node->name); |
2546 | } |
2547 | |
2548 | mutex_exit(&fil_system.mutex); |
2549 | |
2550 | if (count == 0) { |
2551 | break; |
2552 | } |
2553 | |
2554 | os_thread_sleep(20000); |
2555 | mutex_enter(&fil_system.mutex); |
2556 | } |
2557 | |
2558 | ut_ad(sp); |
2559 | |
2560 | *space = sp; |
2561 | return(DB_SUCCESS); |
2562 | } |
2563 | |
2564 | /*******************************************************************//** |
2565 | Closes a single-table tablespace. The tablespace must be cached in the |
2566 | memory cache. Free all pages used by the tablespace. |
2567 | @return DB_SUCCESS or error */ |
2568 | dberr_t |
2569 | fil_close_tablespace( |
2570 | /*=================*/ |
2571 | trx_t* trx, /*!< in/out: Transaction covering the close */ |
2572 | ulint id) /*!< in: space id */ |
2573 | { |
2574 | char* path = 0; |
2575 | fil_space_t* space = 0; |
2576 | dberr_t err; |
2577 | |
2578 | ut_a(!is_system_tablespace(id)); |
2579 | |
2580 | err = fil_check_pending_operations(id, FIL_OPERATION_CLOSE, |
2581 | &space, &path); |
2582 | |
2583 | if (err != DB_SUCCESS) { |
2584 | return(err); |
2585 | } |
2586 | |
2587 | ut_a(space); |
2588 | ut_a(path != 0); |
2589 | |
2590 | rw_lock_x_lock(&space->latch); |
2591 | |
2592 | /* Invalidate in the buffer pool all pages belonging to the |
2593 | tablespace. Since we have set space->stop_new_ops = true, readahead |
2594 | or ibuf merge can no longer read more pages of this tablespace to the |
2595 | buffer pool. Thus we can clean the tablespace out of the buffer pool |
2596 | completely and permanently. The flag stop_new_ops also prevents |
2597 | fil_flush() from being applied to this tablespace. */ |
2598 | |
2599 | { |
2600 | FlushObserver observer(space, trx, NULL); |
2601 | buf_LRU_flush_or_remove_pages(id, &observer); |
2602 | } |
2603 | |
2604 | /* If the free is successful, the X lock will be released before |
2605 | the space memory data structure is freed. */ |
2606 | |
2607 | if (!fil_space_free(id, true)) { |
2608 | rw_lock_x_unlock(&space->latch); |
2609 | err = DB_TABLESPACE_NOT_FOUND; |
2610 | } else { |
2611 | err = DB_SUCCESS; |
2612 | } |
2613 | |
2614 | /* If it is a delete then also delete any generated files, otherwise |
2615 | when we drop the database the remove directory will fail. */ |
2616 | |
2617 | char* cfg_name = fil_make_filepath(path, NULL, CFG, false); |
2618 | if (cfg_name != NULL) { |
2619 | os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL); |
2620 | ut_free(cfg_name); |
2621 | } |
2622 | |
2623 | ut_free(path); |
2624 | |
2625 | return(err); |
2626 | } |
2627 | |
2628 | /** Determine whether a table can be accessed in operations that are |
2629 | not (necessarily) protected by meta-data locks. |
2630 | (Rollback would generally be protected, but rollback of |
2631 | FOREIGN KEY CASCADE/SET NULL is not protected by meta-data locks |
2632 | but only by InnoDB table locks, which may be broken by TRUNCATE TABLE.) |
2633 | @param[in] table persistent table |
2634 | checked @return whether the table is accessible */ |
2635 | bool |
2636 | fil_table_accessible(const dict_table_t* table) |
2637 | { |
2638 | if (UNIV_UNLIKELY(!table->is_readable() || table->corrupted)) { |
2639 | return(false); |
2640 | } |
2641 | |
2642 | mutex_enter(&fil_system.mutex); |
2643 | bool accessible = table->space && !table->space->is_stopping(); |
2644 | mutex_exit(&fil_system.mutex); |
2645 | ut_ad(accessible || dict_table_is_file_per_table(table)); |
2646 | return accessible; |
2647 | } |
2648 | |
2649 | /** Delete a tablespace and associated .ibd file. |
2650 | @param[in] id tablespace identifier |
2651 | @return DB_SUCCESS or error */ |
2652 | dberr_t |
2653 | fil_delete_tablespace( |
2654 | ulint id |
2655 | #ifdef BTR_CUR_HASH_ADAPT |
2656 | , bool drop_ahi /*!< whether to drop the adaptive hash index */ |
2657 | #endif /* BTR_CUR_HASH_ADAPT */ |
2658 | ) |
2659 | { |
2660 | char* path = 0; |
2661 | fil_space_t* space = 0; |
2662 | |
2663 | ut_a(!is_system_tablespace(id)); |
2664 | |
2665 | dberr_t err = fil_check_pending_operations( |
2666 | id, FIL_OPERATION_DELETE, &space, &path); |
2667 | |
2668 | if (err != DB_SUCCESS) { |
2669 | |
2670 | ib::error() << "Cannot delete tablespace " << id |
2671 | << " because it is not found in the tablespace" |
2672 | " memory cache." ; |
2673 | |
2674 | return(err); |
2675 | } |
2676 | |
2677 | ut_a(space); |
2678 | ut_a(path != 0); |
2679 | |
2680 | /* IMPORTANT: Because we have set space::stop_new_ops there |
2681 | can't be any new ibuf merges, reads or flushes. We are here |
2682 | because node::n_pending was zero above. However, it is still |
2683 | possible to have pending read and write requests: |
2684 | |
2685 | A read request can happen because the reader thread has |
2686 | gone through the ::stop_new_ops check in buf_page_init_for_read() |
2687 | before the flag was set and has not yet incremented ::n_pending |
2688 | when we checked it above. |
2689 | |
2690 | A write request can be issued any time because we don't check |
2691 | the ::stop_new_ops flag when queueing a block for write. |
2692 | |
2693 | We deal with pending write requests in the following function |
2694 | where we'd minimally evict all dirty pages belonging to this |
2695 | space from the flush_list. Note that if a block is IO-fixed |
2696 | we'll wait for IO to complete. |
2697 | |
2698 | To deal with potential read requests, we will check the |
2699 | ::stop_new_ops flag in fil_io(). */ |
2700 | |
2701 | buf_LRU_flush_or_remove_pages(id, NULL |
2702 | #ifdef BTR_CUR_HASH_ADAPT |
2703 | , drop_ahi |
2704 | #endif /* BTR_CUR_HASH_ADAPT */ |
2705 | ); |
2706 | |
2707 | /* If it is a delete then also delete any generated files, otherwise |
2708 | when we drop the database the remove directory will fail. */ |
2709 | { |
2710 | /* Before deleting the file, write a log record about |
2711 | it, so that InnoDB crash recovery will expect the file |
2712 | to be gone. */ |
2713 | mtr_t mtr; |
2714 | |
2715 | mtr_start(&mtr); |
2716 | fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, 0, &mtr); |
2717 | mtr_commit(&mtr); |
2718 | /* Even if we got killed shortly after deleting the |
2719 | tablespace file, the record must have already been |
2720 | written to the redo log. */ |
2721 | log_write_up_to(mtr.commit_lsn(), true); |
2722 | |
2723 | char* cfg_name = fil_make_filepath(path, NULL, CFG, false); |
2724 | if (cfg_name != NULL) { |
2725 | os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL); |
2726 | ut_free(cfg_name); |
2727 | } |
2728 | } |
2729 | |
2730 | /* Delete the link file pointing to the ibd file we are deleting. */ |
2731 | if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) { |
2732 | RemoteDatafile::delete_link_file(space->name); |
2733 | } |
2734 | |
2735 | mutex_enter(&fil_system.mutex); |
2736 | |
2737 | /* Double check the sanity of pending ops after reacquiring |
2738 | the fil_system::mutex. */ |
2739 | if (const fil_space_t* s = fil_space_get_by_id(id)) { |
2740 | ut_a(s == space); |
2741 | ut_a(!space->referenced()); |
2742 | ut_a(UT_LIST_GET_LEN(space->chain) == 1); |
2743 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
2744 | ut_a(node->n_pending == 0); |
2745 | |
2746 | fil_space_detach(space); |
2747 | mutex_exit(&fil_system.mutex); |
2748 | |
2749 | log_mutex_enter(); |
2750 | |
2751 | if (space->max_lsn != 0) { |
2752 | ut_d(space->max_lsn = 0); |
2753 | UT_LIST_REMOVE(fil_system.named_spaces, space); |
2754 | } |
2755 | |
2756 | log_mutex_exit(); |
2757 | fil_space_free_low(space); |
2758 | |
2759 | if (!os_file_delete(innodb_data_file_key, path) |
2760 | && !os_file_delete_if_exists( |
2761 | innodb_data_file_key, path, NULL)) { |
2762 | |
2763 | /* Note: This is because we have removed the |
2764 | tablespace instance from the cache. */ |
2765 | |
2766 | err = DB_IO_ERROR; |
2767 | } |
2768 | } else { |
2769 | mutex_exit(&fil_system.mutex); |
2770 | err = DB_TABLESPACE_NOT_FOUND; |
2771 | } |
2772 | |
2773 | ut_free(path); |
2774 | |
2775 | return(err); |
2776 | } |
2777 | |
2778 | /** Truncate the tablespace to needed size. |
2779 | @param[in,out] space tablespace truncate |
2780 | @param[in] size_in_pages truncate size. |
2781 | @return true if truncate was successful. */ |
2782 | bool fil_truncate_tablespace(fil_space_t* space, ulint size_in_pages) |
2783 | { |
2784 | /* Step-1: Prepare tablespace for truncate. This involves |
2785 | stopping all the new operations + IO on that tablespace |
2786 | and ensuring that related pages are flushed to disk. */ |
2787 | if (fil_prepare_for_truncate(space->id) != DB_SUCCESS) { |
2788 | return(false); |
2789 | } |
2790 | |
2791 | /* Step-2: Invalidate buffer pool pages belonging to the tablespace |
2792 | to re-create. Remove all insert buffer entries for the tablespace */ |
2793 | buf_LRU_flush_or_remove_pages(space->id, NULL); |
2794 | |
2795 | /* Step-3: Truncate the tablespace and accordingly update |
2796 | the fil_space_t handler that is used to access this tablespace. */ |
2797 | mutex_enter(&fil_system.mutex); |
2798 | |
2799 | /* The following code must change when InnoDB supports |
2800 | multiple datafiles per tablespace. */ |
2801 | ut_a(UT_LIST_GET_LEN(space->chain) == 1); |
2802 | |
2803 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
2804 | |
2805 | ut_ad(node->is_open()); |
2806 | |
2807 | space->size = node->size = size_in_pages; |
2808 | |
2809 | bool success = os_file_truncate(node->name, node->handle, 0); |
2810 | if (success) { |
2811 | |
2812 | os_offset_t size = os_offset_t(size_in_pages) |
2813 | << srv_page_size_shift; |
2814 | |
2815 | success = os_file_set_size( |
2816 | node->name, node->handle, size, |
2817 | FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); |
2818 | |
2819 | if (success) { |
2820 | space->stop_new_ops = false; |
2821 | space->is_being_truncated = false; |
2822 | } |
2823 | } |
2824 | |
2825 | mutex_exit(&fil_system.mutex); |
2826 | |
2827 | return(success); |
2828 | } |
2829 | |
2830 | /*******************************************************************//** |
2831 | Prepare for truncating a single-table tablespace. |
2832 | 1) Check pending operations on a tablespace; |
2833 | 2) Remove all insert buffer entries for the tablespace; |
2834 | @return DB_SUCCESS or error */ |
2835 | dberr_t |
2836 | fil_prepare_for_truncate( |
2837 | /*=====================*/ |
2838 | ulint id) /*!< in: space id */ |
2839 | { |
2840 | char* path = 0; |
2841 | fil_space_t* space = 0; |
2842 | |
2843 | ut_a(!is_system_tablespace(id)); |
2844 | |
2845 | dberr_t err = fil_check_pending_operations( |
2846 | id, FIL_OPERATION_TRUNCATE, &space, &path); |
2847 | |
2848 | ut_free(path); |
2849 | |
2850 | if (err == DB_TABLESPACE_NOT_FOUND) { |
2851 | ib::error() << "Cannot truncate tablespace " << id |
2852 | << " because it is not found in the tablespace" |
2853 | " memory cache." ; |
2854 | } |
2855 | |
2856 | return(err); |
2857 | } |
2858 | |
2859 | /*******************************************************************//** |
2860 | Allocates and builds a file name from a path, a table or tablespace name |
2861 | and a suffix. The string must be freed by caller with ut_free(). |
2862 | @param[in] path NULL or the direcory path or the full path and filename. |
2863 | @param[in] name NULL if path is full, or Table/Tablespace name |
2864 | @param[in] suffix NULL or the file extention to use. |
2865 | @param[in] trim_name true if the last name on the path should be trimmed. |
2866 | @return own: file name */ |
2867 | char* |
2868 | fil_make_filepath( |
2869 | const char* path, |
2870 | const char* name, |
2871 | ib_extention ext, |
2872 | bool trim_name) |
2873 | { |
2874 | /* The path may contain the basename of the file, if so we do not |
2875 | need the name. If the path is NULL, we can use the default path, |
2876 | but there needs to be a name. */ |
2877 | ut_ad(path != NULL || name != NULL); |
2878 | |
2879 | /* If we are going to strip a name off the path, there better be a |
2880 | path and a new name to put back on. */ |
2881 | ut_ad(!trim_name || (path != NULL && name != NULL)); |
2882 | |
2883 | if (path == NULL) { |
2884 | path = fil_path_to_mysql_datadir; |
2885 | } |
2886 | |
2887 | ulint len = 0; /* current length */ |
2888 | ulint path_len = strlen(path); |
2889 | ulint name_len = (name ? strlen(name) : 0); |
2890 | const char* suffix = dot_ext[ext]; |
2891 | ulint suffix_len = strlen(suffix); |
2892 | ulint full_len = path_len + 1 + name_len + suffix_len + 1; |
2893 | |
2894 | char* full_name = static_cast<char*>(ut_malloc_nokey(full_len)); |
2895 | if (full_name == NULL) { |
2896 | return NULL; |
2897 | } |
2898 | |
2899 | /* If the name is a relative path, do not prepend "./". */ |
2900 | if (path[0] == '.' |
2901 | && (path[1] == '\0' || path[1] == OS_PATH_SEPARATOR) |
2902 | && name != NULL && name[0] == '.') { |
2903 | path = NULL; |
2904 | path_len = 0; |
2905 | } |
2906 | |
2907 | if (path != NULL) { |
2908 | memcpy(full_name, path, path_len); |
2909 | len = path_len; |
2910 | full_name[len] = '\0'; |
2911 | os_normalize_path(full_name); |
2912 | } |
2913 | |
2914 | if (trim_name) { |
2915 | /* Find the offset of the last DIR separator and set it to |
2916 | null in order to strip off the old basename from this path. */ |
2917 | char* last_dir_sep = strrchr(full_name, OS_PATH_SEPARATOR); |
2918 | if (last_dir_sep) { |
2919 | last_dir_sep[0] = '\0'; |
2920 | len = strlen(full_name); |
2921 | } |
2922 | } |
2923 | |
2924 | if (name != NULL) { |
2925 | if (len && full_name[len - 1] != OS_PATH_SEPARATOR) { |
2926 | /* Add a DIR separator */ |
2927 | full_name[len] = OS_PATH_SEPARATOR; |
2928 | full_name[++len] = '\0'; |
2929 | } |
2930 | |
2931 | char* ptr = &full_name[len]; |
2932 | memcpy(ptr, name, name_len); |
2933 | len += name_len; |
2934 | full_name[len] = '\0'; |
2935 | os_normalize_path(ptr); |
2936 | } |
2937 | |
2938 | /* Make sure that the specified suffix is at the end of the filepath |
2939 | string provided. This assumes that the suffix starts with '.'. |
2940 | If the first char of the suffix is found in the filepath at the same |
2941 | length as the suffix from the end, then we will assume that there is |
2942 | a previous suffix that needs to be replaced. */ |
2943 | if (suffix != NULL) { |
2944 | /* Need room for the trailing null byte. */ |
2945 | ut_ad(len < full_len); |
2946 | |
2947 | if ((len > suffix_len) |
2948 | && (full_name[len - suffix_len] == suffix[0])) { |
2949 | /* Another suffix exists, make it the one requested. */ |
2950 | memcpy(&full_name[len - suffix_len], suffix, suffix_len); |
2951 | |
2952 | } else { |
2953 | /* No previous suffix, add it. */ |
2954 | ut_ad(len + suffix_len < full_len); |
2955 | memcpy(&full_name[len], suffix, suffix_len); |
2956 | full_name[len + suffix_len] = '\0'; |
2957 | } |
2958 | } |
2959 | |
2960 | return(full_name); |
2961 | } |
2962 | |
2963 | /** Test if a tablespace file can be renamed to a new filepath by checking |
2964 | if that the old filepath exists and the new filepath does not exist. |
2965 | @param[in] old_path old filepath |
2966 | @param[in] new_path new filepath |
2967 | @param[in] is_discarded whether the tablespace is discarded |
2968 | @return innodb error code */ |
2969 | static dberr_t |
2970 | fil_rename_tablespace_check( |
2971 | const char* old_path, |
2972 | const char* new_path, |
2973 | bool is_discarded) |
2974 | { |
2975 | bool exists = false; |
2976 | os_file_type_t ftype; |
2977 | |
2978 | if (!is_discarded |
2979 | && os_file_status(old_path, &exists, &ftype) |
2980 | && !exists) { |
2981 | ib::error() << "Cannot rename '" << old_path |
2982 | << "' to '" << new_path |
2983 | << "' because the source file" |
2984 | << " does not exist." ; |
2985 | return(DB_TABLESPACE_NOT_FOUND); |
2986 | } |
2987 | |
2988 | exists = false; |
2989 | if (!os_file_status(new_path, &exists, &ftype) || exists) { |
2990 | ib::error() << "Cannot rename '" << old_path |
2991 | << "' to '" << new_path |
2992 | << "' because the target file exists." |
2993 | " Remove the target file and try again." ; |
2994 | return(DB_TABLESPACE_EXISTS); |
2995 | } |
2996 | |
2997 | return(DB_SUCCESS); |
2998 | } |
2999 | |
3000 | dberr_t fil_space_t::rename(const char* name, const char* path, bool log) |
3001 | { |
3002 | ut_ad(UT_LIST_GET_LEN(chain) == 1); |
3003 | ut_ad(!is_system_tablespace(id)); |
3004 | |
3005 | if (log) { |
3006 | dberr_t err = fil_rename_tablespace_check( |
3007 | chain.start->name, path, false); |
3008 | if (err != DB_SUCCESS) { |
3009 | return(err); |
3010 | } |
3011 | fil_name_write_rename(id, chain.start->name, path); |
3012 | } |
3013 | |
3014 | return fil_rename_tablespace(id, chain.start->name, name, path) |
3015 | ? DB_SUCCESS : DB_ERROR; |
3016 | } |
3017 | |
3018 | /** Rename a single-table tablespace. |
3019 | The tablespace must exist in the memory cache. |
3020 | @param[in] id tablespace identifier |
3021 | @param[in] old_path old file name |
3022 | @param[in] new_name new table name in the |
3023 | databasename/tablename format |
3024 | @param[in] new_path_in new file name, |
3025 | or NULL if it is located in the normal data directory |
3026 | @return true if success */ |
3027 | static bool |
3028 | fil_rename_tablespace( |
3029 | ulint id, |
3030 | const char* old_path, |
3031 | const char* new_name, |
3032 | const char* new_path_in) |
3033 | { |
3034 | bool sleep = false; |
3035 | bool flush = false; |
3036 | fil_space_t* space; |
3037 | fil_node_t* node; |
3038 | ulint count = 0; |
3039 | ut_a(id != 0); |
3040 | |
3041 | ut_ad(strchr(new_name, '/') != NULL); |
3042 | retry: |
3043 | count++; |
3044 | |
3045 | if (!(count % 1000)) { |
3046 | ib::warn() << "Cannot rename file " << old_path |
3047 | << " (space id " << id << "), retried " << count |
3048 | << " times." |
3049 | " There are either pending IOs or flushes or" |
3050 | " the file is being extended." ; |
3051 | } |
3052 | |
3053 | mutex_enter(&fil_system.mutex); |
3054 | |
3055 | space = fil_space_get_by_id(id); |
3056 | |
3057 | DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1" , space = NULL; ); |
3058 | |
3059 | if (space == NULL) { |
3060 | ib::error() << "Cannot find space id " << id |
3061 | << " in the tablespace memory cache, though the file '" |
3062 | << old_path |
3063 | << "' in a rename operation should have that id." ; |
3064 | func_exit: |
3065 | mutex_exit(&fil_system.mutex); |
3066 | return(false); |
3067 | } |
3068 | |
3069 | if (count > 25000) { |
3070 | space->stop_ios = false; |
3071 | goto func_exit; |
3072 | } |
3073 | |
3074 | /* We temporarily close the .ibd file because we do not trust that |
3075 | operating systems can rename an open file. For the closing we have to |
3076 | wait until there are no pending i/o's or flushes on the file. */ |
3077 | |
3078 | space->stop_ios = true; |
3079 | |
3080 | /* The following code must change when InnoDB supports |
3081 | multiple datafiles per tablespace. */ |
3082 | ut_a(UT_LIST_GET_LEN(space->chain) == 1); |
3083 | node = UT_LIST_GET_FIRST(space->chain); |
3084 | |
3085 | if (node->n_pending > 0 |
3086 | || node->n_pending_flushes > 0 |
3087 | || node->being_extended) { |
3088 | /* There are pending i/o's or flushes or the file is |
3089 | currently being extended, sleep for a while and |
3090 | retry */ |
3091 | sleep = true; |
3092 | } else if (node->modification_counter > node->flush_counter) { |
3093 | /* Flush the space */ |
3094 | sleep = flush = true; |
3095 | } else if (node->is_open()) { |
3096 | /* Close the file */ |
3097 | |
3098 | fil_node_close_file(node); |
3099 | } |
3100 | |
3101 | mutex_exit(&fil_system.mutex); |
3102 | |
3103 | if (sleep) { |
3104 | os_thread_sleep(20000); |
3105 | |
3106 | if (flush) { |
3107 | fil_flush(id); |
3108 | } |
3109 | |
3110 | sleep = flush = false; |
3111 | goto retry; |
3112 | } |
3113 | ut_ad(space->stop_ios); |
3114 | char* new_file_name = new_path_in == NULL |
3115 | ? fil_make_filepath(NULL, new_name, IBD, false) |
3116 | : mem_strdup(new_path_in); |
3117 | char* old_file_name = node->name; |
3118 | char* new_space_name = mem_strdup(new_name); |
3119 | char* old_space_name = space->name; |
3120 | |
3121 | ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL); |
3122 | ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL); |
3123 | |
3124 | if (!recv_recovery_on) { |
3125 | fil_name_write_rename(id, old_file_name, new_file_name); |
3126 | log_mutex_enter(); |
3127 | } |
3128 | |
3129 | /* log_sys.mutex is above fil_system.mutex in the latching order */ |
3130 | ut_ad(log_mutex_own()); |
3131 | mutex_enter(&fil_system.mutex); |
3132 | ut_ad(space->name == old_space_name); |
3133 | ut_ad(node->name == old_file_name); |
3134 | |
3135 | bool success; |
3136 | |
3137 | DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2" , |
3138 | goto skip_rename; ); |
3139 | |
3140 | success = os_file_rename( |
3141 | innodb_data_file_key, old_file_name, new_file_name); |
3142 | |
3143 | DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2" , |
3144 | skip_rename: success = false; ); |
3145 | |
3146 | ut_ad(node->name == old_file_name); |
3147 | |
3148 | if (success) { |
3149 | node->name = new_file_name; |
3150 | } |
3151 | |
3152 | if (!recv_recovery_on) { |
3153 | log_mutex_exit(); |
3154 | } |
3155 | |
3156 | ut_ad(space->name == old_space_name); |
3157 | if (success) { |
3158 | space->name = new_space_name; |
3159 | } else { |
3160 | /* Because nothing was renamed, we must free the new |
3161 | names, not the old ones. */ |
3162 | old_file_name = new_file_name; |
3163 | old_space_name = new_space_name; |
3164 | } |
3165 | |
3166 | ut_ad(space->stop_ios); |
3167 | space->stop_ios = false; |
3168 | mutex_exit(&fil_system.mutex); |
3169 | |
3170 | ut_free(old_file_name); |
3171 | ut_free(old_space_name); |
3172 | |
3173 | return(success); |
3174 | } |
3175 | |
3176 | /** Create a tablespace file. |
3177 | @param[in] space_id Tablespace ID |
3178 | @param[in] name Tablespace name in dbname/tablename format. |
3179 | @param[in] path Path and filename of the datafile to create. |
3180 | @param[in] flags Tablespace flags |
3181 | @param[in] size Initial size of the tablespace file in pages, |
3182 | must be >= FIL_IBD_FILE_INITIAL_SIZE |
3183 | @param[in] mode MariaDB encryption mode |
3184 | @param[in] key_id MariaDB encryption key_id |
3185 | @param[out] err DB_SUCCESS or error code |
3186 | @return the created tablespace |
3187 | @retval NULL on error */ |
3188 | fil_space_t* |
3189 | fil_ibd_create( |
3190 | ulint space_id, |
3191 | const char* name, |
3192 | const char* path, |
3193 | ulint flags, |
3194 | ulint size, |
3195 | fil_encryption_t mode, |
3196 | uint32_t key_id, |
3197 | dberr_t* err) |
3198 | { |
3199 | pfs_os_file_t file; |
3200 | byte* buf2; |
3201 | byte* page; |
3202 | bool success; |
3203 | bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags) != 0; |
3204 | fil_space_t* space = NULL; |
3205 | fil_space_crypt_t *crypt_data = NULL; |
3206 | |
3207 | ut_ad(!is_system_tablespace(space_id)); |
3208 | ut_ad(!srv_read_only_mode); |
3209 | ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); |
3210 | ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); |
3211 | ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, space_id)); |
3212 | |
3213 | /* Create the subdirectories in the path, if they are |
3214 | not there already. */ |
3215 | *err = os_file_create_subdirs_if_needed(path); |
3216 | if (*err != DB_SUCCESS) { |
3217 | return NULL; |
3218 | } |
3219 | |
3220 | file = os_file_create( |
3221 | innodb_data_file_key, path, |
3222 | OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, |
3223 | OS_FILE_NORMAL, |
3224 | OS_DATA_FILE, |
3225 | srv_read_only_mode, |
3226 | &success); |
3227 | |
3228 | if (!success) { |
3229 | /* The following call will print an error message */ |
3230 | switch (os_file_get_last_error(true)) { |
3231 | case OS_FILE_ALREADY_EXISTS: |
3232 | ib::info() << "The file '" << path << "'" |
3233 | " already exists though the" |
3234 | " corresponding table did not exist" |
3235 | " in the InnoDB data dictionary." |
3236 | " You can resolve the problem by removing" |
3237 | " the file." ; |
3238 | *err = DB_TABLESPACE_EXISTS; |
3239 | break; |
3240 | case OS_FILE_DISK_FULL: |
3241 | *err = DB_OUT_OF_FILE_SPACE; |
3242 | break; |
3243 | default: |
3244 | *err = DB_ERROR; |
3245 | } |
3246 | ib::error() << "Cannot create file '" << path << "'" ; |
3247 | return NULL; |
3248 | } |
3249 | |
3250 | const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags); |
3251 | |
3252 | #ifdef _WIN32 |
3253 | if (is_compressed) { |
3254 | os_file_set_sparse_win32(file); |
3255 | } |
3256 | #endif |
3257 | |
3258 | if (!os_file_set_size( |
3259 | path, file, |
3260 | os_offset_t(size) << srv_page_size_shift, is_compressed)) { |
3261 | *err = DB_OUT_OF_FILE_SPACE; |
3262 | err_exit: |
3263 | os_file_close(file); |
3264 | os_file_delete(innodb_data_file_key, path); |
3265 | return NULL; |
3266 | } |
3267 | |
3268 | bool punch_hole = os_is_sparse_file_supported(file); |
3269 | |
3270 | ulint block_size = os_file_get_block_size(file, path); |
3271 | |
3272 | /* We have to write the space id to the file immediately and flush the |
3273 | file to disk. This is because in crash recovery we must be aware what |
3274 | tablespaces exist and what are their space id's, so that we can apply |
3275 | the log records to the right file. It may take quite a while until |
3276 | buffer pool flush algorithms write anything to the file and flush it to |
3277 | disk. If we would not write here anything, the file would be filled |
3278 | with zeros from the call of os_file_set_size(), until a buffer pool |
3279 | flush would write to it. */ |
3280 | |
3281 | buf2 = static_cast<byte*>(ut_malloc_nokey(3U << srv_page_size_shift)); |
3282 | /* Align the memory for file i/o if we might have O_DIRECT set */ |
3283 | page = static_cast<byte*>(ut_align(buf2, srv_page_size)); |
3284 | |
3285 | memset(page, '\0', srv_page_size); |
3286 | |
3287 | flags |= FSP_FLAGS_PAGE_SSIZE(); |
3288 | fsp_header_init_fields(page, space_id, flags); |
3289 | mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); |
3290 | |
3291 | const page_size_t page_size(flags); |
3292 | IORequest request(IORequest::WRITE); |
3293 | |
3294 | if (!page_size.is_compressed()) { |
3295 | |
3296 | buf_flush_init_for_writing(NULL, page, NULL, 0); |
3297 | |
3298 | *err = os_file_write( |
3299 | request, path, file, page, 0, page_size.physical()); |
3300 | } else { |
3301 | page_zip_des_t page_zip; |
3302 | page_zip_set_size(&page_zip, page_size.physical()); |
3303 | page_zip.data = page + srv_page_size; |
3304 | #ifdef UNIV_DEBUG |
3305 | page_zip.m_start = |
3306 | #endif /* UNIV_DEBUG */ |
3307 | page_zip.m_end = page_zip.m_nonempty = |
3308 | page_zip.n_blobs = 0; |
3309 | |
3310 | buf_flush_init_for_writing(NULL, page, &page_zip, 0); |
3311 | |
3312 | *err = os_file_write( |
3313 | request, path, file, page_zip.data, 0, |
3314 | page_size.physical()); |
3315 | } |
3316 | |
3317 | ut_free(buf2); |
3318 | |
3319 | if (*err != DB_SUCCESS) { |
3320 | ib::error() |
3321 | << "Could not write the first page to" |
3322 | << " tablespace '" << path << "'" ; |
3323 | goto err_exit; |
3324 | } |
3325 | |
3326 | if (!os_file_flush(file)) { |
3327 | ib::error() << "File flush of tablespace '" |
3328 | << path << "' failed" ; |
3329 | *err = DB_ERROR; |
3330 | goto err_exit; |
3331 | } |
3332 | |
3333 | if (has_data_dir) { |
3334 | /* Make the ISL file if the IBD file is not |
3335 | in the default location. */ |
3336 | *err = RemoteDatafile::create_link_file(name, path); |
3337 | if (*err != DB_SUCCESS) { |
3338 | goto err_exit; |
3339 | } |
3340 | } |
3341 | |
3342 | /* Create crypt data if the tablespace is either encrypted or user has |
3343 | requested it to remain unencrypted. */ |
3344 | if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || |
3345 | srv_encrypt_tables) { |
3346 | crypt_data = fil_space_create_crypt_data(mode, key_id); |
3347 | } |
3348 | |
3349 | space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE, |
3350 | crypt_data, mode); |
3351 | if (!space) { |
3352 | free(crypt_data); |
3353 | *err = DB_ERROR; |
3354 | } else { |
3355 | fil_node_t* node = fil_node_create_low(path, size, space, |
3356 | false, true); |
3357 | mtr_t mtr; |
3358 | mtr.start(); |
3359 | fil_op_write_log( |
3360 | MLOG_FILE_CREATE2, space_id, 0, node->name, |
3361 | NULL, space->flags & ~FSP_FLAGS_MEM_MASK, &mtr); |
3362 | fil_name_write(space, 0, node, &mtr); |
3363 | mtr.commit(); |
3364 | |
3365 | node->block_size = block_size; |
3366 | space->punch_hole = punch_hole; |
3367 | |
3368 | *err = DB_SUCCESS; |
3369 | } |
3370 | |
3371 | os_file_close(file); |
3372 | |
3373 | if (*err != DB_SUCCESS) { |
3374 | if (has_data_dir) { |
3375 | RemoteDatafile::delete_link_file(name); |
3376 | } |
3377 | |
3378 | os_file_delete(innodb_data_file_key, path); |
3379 | } |
3380 | |
3381 | return space; |
3382 | } |
3383 | |
3384 | /** Try to open a single-table tablespace and optionally check that the |
3385 | space id in it is correct. If this does not succeed, print an error message |
3386 | to the .err log. This function is used to open a tablespace when we start |
3387 | mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE. |
3388 | |
3389 | NOTE that we assume this operation is used either at the database startup |
3390 | or under the protection of the dictionary mutex, so that two users cannot |
3391 | race here. This operation does not leave the file associated with the |
3392 | tablespace open, but closes it after we have looked at the space id in it. |
3393 | |
3394 | If the validate boolean is set, we read the first page of the file and |
3395 | check that the space id in the file is what we expect. We assume that |
3396 | this function runs much faster if no check is made, since accessing the |
3397 | file inode probably is much faster (the OS caches them) than accessing |
3398 | the first page of the file. This boolean may be initially false, but if |
3399 | a remote tablespace is found it will be changed to true. |
3400 | |
3401 | If the fix_dict boolean is set, then it is safe to use an internal SQL |
3402 | statement to update the dictionary tables if they are incorrect. |
3403 | |
3404 | @param[in] validate true if we should validate the tablespace |
3405 | @param[in] fix_dict true if the dictionary is available to be fixed |
3406 | @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY |
3407 | @param[in] id tablespace ID |
3408 | @param[in] flags expected FSP_SPACE_FLAGS |
3409 | @param[in] space_name tablespace name of the datafile |
3410 | If file-per-table, it is the table name in the databasename/tablename format |
3411 | @param[in] path_in expected filepath, usually read from dictionary |
3412 | @param[out] err DB_SUCCESS or error code |
3413 | @return tablespace |
3414 | @retval NULL if the tablespace could not be opened */ |
3415 | fil_space_t* |
3416 | fil_ibd_open( |
3417 | bool validate, |
3418 | bool fix_dict, |
3419 | fil_type_t purpose, |
3420 | ulint id, |
3421 | ulint flags, |
3422 | const table_name_t& tablename, |
3423 | const char* path_in, |
3424 | dberr_t* err) |
3425 | { |
3426 | mutex_enter(&fil_system.mutex); |
3427 | if (fil_space_t* space = fil_space_get_by_id(id)) { |
3428 | if (strcmp(space->name, tablename.m_name)) { |
3429 | table_name_t space_name; |
3430 | space_name.m_name = space->name; |
3431 | ib::error() |
3432 | << "Trying to open table " << tablename |
3433 | << " with id " << id |
3434 | << ", conflicting with " << space_name; |
3435 | space = NULL; |
3436 | if (err) *err = DB_TABLESPACE_EXISTS; |
3437 | } else if (err) *err = DB_SUCCESS; |
3438 | |
3439 | mutex_exit(&fil_system.mutex); |
3440 | |
3441 | if (space && validate && !srv_read_only_mode) { |
3442 | fsp_flags_try_adjust(space, |
3443 | flags & ~FSP_FLAGS_MEM_MASK); |
3444 | } |
3445 | |
3446 | return space; |
3447 | } |
3448 | mutex_exit(&fil_system.mutex); |
3449 | |
3450 | bool dict_filepath_same_as_default = false; |
3451 | bool link_file_found = false; |
3452 | bool link_file_is_bad = false; |
3453 | Datafile df_default; /* default location */ |
3454 | Datafile df_dict; /* dictionary location */ |
3455 | RemoteDatafile df_remote; /* remote location */ |
3456 | ulint tablespaces_found = 0; |
3457 | ulint valid_tablespaces_found = 0; |
3458 | |
3459 | ut_ad(!fix_dict || rw_lock_own(dict_operation_lock, RW_LOCK_X)); |
3460 | |
3461 | ut_ad(!fix_dict || mutex_own(&dict_sys->mutex)); |
3462 | ut_ad(!fix_dict || !srv_read_only_mode); |
3463 | ut_ad(!fix_dict || srv_log_file_size != 0); |
3464 | ut_ad(fil_type_is_data(purpose)); |
3465 | |
3466 | /* Table flags can be ULINT_UNDEFINED if |
3467 | dict_tf_to_fsp_flags_failure is set. */ |
3468 | if (flags == ULINT_UNDEFINED) { |
3469 | corrupted: |
3470 | if (err) *err = DB_CORRUPTION; |
3471 | return NULL; |
3472 | } |
3473 | |
3474 | ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id)); |
3475 | df_default.init(tablename.m_name, flags); |
3476 | df_dict.init(tablename.m_name, flags); |
3477 | df_remote.init(tablename.m_name, flags); |
3478 | |
3479 | /* Discover the correct file by looking in three possible locations |
3480 | while avoiding unecessary effort. */ |
3481 | |
3482 | /* We will always look for an ibd in the default location. */ |
3483 | df_default.make_filepath(NULL, tablename.m_name, IBD); |
3484 | |
3485 | /* Look for a filepath embedded in an ISL where the default file |
3486 | would be. */ |
3487 | if (df_remote.open_read_only(true) == DB_SUCCESS) { |
3488 | ut_ad(df_remote.is_open()); |
3489 | |
3490 | /* Always validate a file opened from an ISL pointer */ |
3491 | validate = true; |
3492 | ++tablespaces_found; |
3493 | link_file_found = true; |
3494 | } else if (df_remote.filepath() != NULL) { |
3495 | /* An ISL file was found but contained a bad filepath in it. |
3496 | Better validate anything we do find. */ |
3497 | validate = true; |
3498 | } |
3499 | |
3500 | /* Attempt to open the tablespace at the dictionary filepath. */ |
3501 | if (path_in) { |
3502 | if (df_default.same_filepath_as(path_in)) { |
3503 | dict_filepath_same_as_default = true; |
3504 | } else { |
3505 | /* Dict path is not the default path. Always validate |
3506 | remote files. If default is opened, it was moved. */ |
3507 | validate = true; |
3508 | df_dict.set_filepath(path_in); |
3509 | if (df_dict.open_read_only(true) == DB_SUCCESS) { |
3510 | ut_ad(df_dict.is_open()); |
3511 | ++tablespaces_found; |
3512 | } |
3513 | } |
3514 | } |
3515 | |
3516 | /* Always look for a file at the default location. But don't log |
3517 | an error if the tablespace is already open in remote or dict. */ |
3518 | ut_a(df_default.filepath()); |
3519 | const bool strict = (tablespaces_found == 0); |
3520 | if (df_default.open_read_only(strict) == DB_SUCCESS) { |
3521 | ut_ad(df_default.is_open()); |
3522 | ++tablespaces_found; |
3523 | } |
3524 | |
3525 | /* Check if multiple locations point to the same file. */ |
3526 | if (tablespaces_found > 1 && df_default.same_as(df_remote)) { |
3527 | /* A link file was found with the default path in it. |
3528 | Use the default path and delete the link file. */ |
3529 | --tablespaces_found; |
3530 | df_remote.delete_link_file(); |
3531 | df_remote.close(); |
3532 | } |
3533 | if (tablespaces_found > 1 && df_default.same_as(df_dict)) { |
3534 | --tablespaces_found; |
3535 | df_dict.close(); |
3536 | } |
3537 | if (tablespaces_found > 1 && df_remote.same_as(df_dict)) { |
3538 | --tablespaces_found; |
3539 | df_dict.close(); |
3540 | } |
3541 | |
3542 | /* We have now checked all possible tablespace locations and |
3543 | have a count of how many unique files we found. If things are |
3544 | normal, we only found 1. */ |
3545 | /* For encrypted tablespace, we need to check the |
3546 | encryption in header of first page. */ |
3547 | if (!validate && tablespaces_found == 1) { |
3548 | goto skip_validate; |
3549 | } |
3550 | |
3551 | /* Read and validate the first page of these three tablespace |
3552 | locations, if found. */ |
3553 | valid_tablespaces_found += |
3554 | (df_remote.validate_to_dd(id, flags) == DB_SUCCESS); |
3555 | |
3556 | valid_tablespaces_found += |
3557 | (df_default.validate_to_dd(id, flags) == DB_SUCCESS); |
3558 | |
3559 | valid_tablespaces_found += |
3560 | (df_dict.validate_to_dd(id, flags) == DB_SUCCESS); |
3561 | |
3562 | /* Make sense of these three possible locations. |
3563 | First, bail out if no tablespace files were found. */ |
3564 | if (valid_tablespaces_found == 0) { |
3565 | os_file_get_last_error(true); |
3566 | ib::error() << "Could not find a valid tablespace file for `" |
3567 | << tablename << "`. " << TROUBLESHOOT_DATADICT_MSG; |
3568 | goto corrupted; |
3569 | } |
3570 | if (!validate) { |
3571 | goto skip_validate; |
3572 | } |
3573 | |
3574 | /* Do not open any tablespaces if more than one tablespace with |
3575 | the correct space ID and flags were found. */ |
3576 | if (tablespaces_found > 1) { |
3577 | ib::error() << "A tablespace for `" << tablename |
3578 | << "` has been found in multiple places;" ; |
3579 | |
3580 | if (df_default.is_open()) { |
3581 | ib::error() << "Default location: " |
3582 | << df_default.filepath() |
3583 | << ", Space ID=" << df_default.space_id() |
3584 | << ", Flags=" << df_default.flags(); |
3585 | } |
3586 | if (df_remote.is_open()) { |
3587 | ib::error() << "Remote location: " |
3588 | << df_remote.filepath() |
3589 | << ", Space ID=" << df_remote.space_id() |
3590 | << ", Flags=" << df_remote.flags(); |
3591 | } |
3592 | if (df_dict.is_open()) { |
3593 | ib::error() << "Dictionary location: " |
3594 | << df_dict.filepath() |
3595 | << ", Space ID=" << df_dict.space_id() |
3596 | << ", Flags=" << df_dict.flags(); |
3597 | } |
3598 | |
3599 | /* Force-recovery will allow some tablespaces to be |
3600 | skipped by REDO if there was more than one file found. |
3601 | Unlike during the REDO phase of recovery, we now know |
3602 | if the tablespace is valid according to the dictionary, |
3603 | which was not available then. So if we did not force |
3604 | recovery and there is only one good tablespace, ignore |
3605 | any bad tablespaces. */ |
3606 | if (valid_tablespaces_found > 1 || srv_force_recovery > 0) { |
3607 | ib::error() << "Will not open tablespace `" |
3608 | << tablename << "`" ; |
3609 | |
3610 | /* If the file is not open it cannot be valid. */ |
3611 | ut_ad(df_default.is_open() || !df_default.is_valid()); |
3612 | ut_ad(df_dict.is_open() || !df_dict.is_valid()); |
3613 | ut_ad(df_remote.is_open() || !df_remote.is_valid()); |
3614 | |
3615 | /* Having established that, this is an easy way to |
3616 | look for corrupted data files. */ |
3617 | if (df_default.is_open() != df_default.is_valid() |
3618 | || df_dict.is_open() != df_dict.is_valid() |
3619 | || df_remote.is_open() != df_remote.is_valid()) { |
3620 | goto corrupted; |
3621 | } |
3622 | error: |
3623 | if (err) *err = DB_ERROR; |
3624 | return NULL; |
3625 | } |
3626 | |
3627 | /* There is only one valid tablespace found and we did |
3628 | not use srv_force_recovery during REDO. Use this one |
3629 | tablespace and clean up invalid tablespace pointers */ |
3630 | if (df_default.is_open() && !df_default.is_valid()) { |
3631 | df_default.close(); |
3632 | tablespaces_found--; |
3633 | } |
3634 | |
3635 | if (df_dict.is_open() && !df_dict.is_valid()) { |
3636 | df_dict.close(); |
3637 | /* Leave dict.filepath so that SYS_DATAFILES |
3638 | can be corrected below. */ |
3639 | tablespaces_found--; |
3640 | } |
3641 | |
3642 | if (df_remote.is_open() && !df_remote.is_valid()) { |
3643 | df_remote.close(); |
3644 | tablespaces_found--; |
3645 | link_file_is_bad = true; |
3646 | } |
3647 | } |
3648 | |
3649 | /* At this point, there should be only one filepath. */ |
3650 | ut_a(tablespaces_found == 1); |
3651 | ut_a(valid_tablespaces_found == 1); |
3652 | |
3653 | /* Only fix the dictionary at startup when there is only one thread. |
3654 | Calls to dict_load_table() can be done while holding other latches. */ |
3655 | if (!fix_dict) { |
3656 | goto skip_validate; |
3657 | } |
3658 | |
3659 | /* We may need to update what is stored in SYS_DATAFILES or |
3660 | SYS_TABLESPACES or adjust the link file. Since a failure to |
3661 | update SYS_TABLESPACES or SYS_DATAFILES does not prevent opening |
3662 | and using the tablespace either this time or the next, we do not |
3663 | check the return code or fail to open the tablespace. But if it |
3664 | fails, dict_update_filepath() will issue a warning to the log. */ |
3665 | if (df_dict.filepath()) { |
3666 | ut_ad(path_in != NULL); |
3667 | ut_ad(df_dict.same_filepath_as(path_in)); |
3668 | |
3669 | if (df_remote.is_open()) { |
3670 | if (!df_remote.same_filepath_as(path_in)) { |
3671 | dict_update_filepath(id, df_remote.filepath()); |
3672 | } |
3673 | |
3674 | } else if (df_default.is_open()) { |
3675 | ut_ad(!dict_filepath_same_as_default); |
3676 | dict_update_filepath(id, df_default.filepath()); |
3677 | if (link_file_is_bad) { |
3678 | RemoteDatafile::delete_link_file( |
3679 | tablename.m_name); |
3680 | } |
3681 | |
3682 | } else if (!link_file_found || link_file_is_bad) { |
3683 | ut_ad(df_dict.is_open()); |
3684 | /* Fix the link file if we got our filepath |
3685 | from the dictionary but a link file did not |
3686 | exist or it did not point to a valid file. */ |
3687 | RemoteDatafile::delete_link_file(tablename.m_name); |
3688 | RemoteDatafile::create_link_file( |
3689 | tablename.m_name, df_dict.filepath()); |
3690 | } |
3691 | |
3692 | } else if (df_remote.is_open()) { |
3693 | if (dict_filepath_same_as_default) { |
3694 | dict_update_filepath(id, df_remote.filepath()); |
3695 | |
3696 | } else if (path_in == NULL) { |
3697 | /* SYS_DATAFILES record for this space ID |
3698 | was not found. */ |
3699 | dict_replace_tablespace_and_filepath( |
3700 | id, tablename.m_name, |
3701 | df_remote.filepath(), flags); |
3702 | } |
3703 | |
3704 | } else if (df_default.is_open()) { |
3705 | /* We opened the tablespace in the default location. |
3706 | SYS_DATAFILES.PATH needs to be updated if it is different |
3707 | from this default path or if the SYS_DATAFILES.PATH was not |
3708 | supplied and it should have been. Also update the dictionary |
3709 | if we found an ISL file (since !df_remote.is_open). Since |
3710 | path_in is not suppled for file-per-table, we must assume |
3711 | that it matched the ISL. */ |
3712 | if ((path_in != NULL && !dict_filepath_same_as_default) |
3713 | || (path_in == NULL && DICT_TF_HAS_DATA_DIR(flags)) |
3714 | || df_remote.filepath() != NULL) { |
3715 | dict_replace_tablespace_and_filepath( |
3716 | id, tablename.m_name, df_default.filepath(), |
3717 | flags); |
3718 | } |
3719 | } |
3720 | |
3721 | skip_validate: |
3722 | const byte* first_page = |
3723 | df_default.is_open() ? df_default.get_first_page() : |
3724 | df_dict.is_open() ? df_dict.get_first_page() : |
3725 | df_remote.get_first_page(); |
3726 | |
3727 | fil_space_crypt_t* crypt_data = first_page |
3728 | ? fil_space_read_crypt_data(page_size_t(flags), first_page) |
3729 | : NULL; |
3730 | |
3731 | fil_space_t* space = fil_space_create( |
3732 | tablename.m_name, id, flags, purpose, crypt_data); |
3733 | |
3734 | /* We do not measure the size of the file, that is why |
3735 | we pass the 0 below */ |
3736 | |
3737 | if (fil_node_create_low( |
3738 | df_remote.is_open() ? df_remote.filepath() : |
3739 | df_dict.is_open() ? df_dict.filepath() : |
3740 | df_default.filepath(), 0, space, false, |
3741 | true) == NULL) { |
3742 | goto error; |
3743 | } |
3744 | |
3745 | if (validate && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) { |
3746 | df_remote.close(); |
3747 | df_dict.close(); |
3748 | df_default.close(); |
3749 | fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK); |
3750 | } |
3751 | |
3752 | if (err) *err = DB_SUCCESS; |
3753 | return space; |
3754 | } |
3755 | |
3756 | /** Looks for a pre-existing fil_space_t with the given tablespace ID |
3757 | and, if found, returns the name and filepath in newly allocated buffers |
3758 | that the caller must free. |
3759 | @param[in] space_id The tablespace ID to search for. |
3760 | @param[out] name Name of the tablespace found. |
3761 | @param[out] filepath The filepath of the first datafile for the |
3762 | tablespace. |
3763 | @return true if tablespace is found, false if not. */ |
3764 | bool |
3765 | fil_space_read_name_and_filepath( |
3766 | ulint space_id, |
3767 | char** name, |
3768 | char** filepath) |
3769 | { |
3770 | bool success = false; |
3771 | *name = NULL; |
3772 | *filepath = NULL; |
3773 | |
3774 | mutex_enter(&fil_system.mutex); |
3775 | |
3776 | fil_space_t* space = fil_space_get_by_id(space_id); |
3777 | |
3778 | if (space != NULL) { |
3779 | *name = mem_strdup(space->name); |
3780 | |
3781 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
3782 | *filepath = mem_strdup(node->name); |
3783 | |
3784 | success = true; |
3785 | } |
3786 | |
3787 | mutex_exit(&fil_system.mutex); |
3788 | |
3789 | return(success); |
3790 | } |
3791 | |
3792 | /** Convert a file name to a tablespace name. |
3793 | @param[in] filename directory/databasename/tablename.ibd |
3794 | @return database/tablename string, to be freed with ut_free() */ |
3795 | char* |
3796 | fil_path_to_space_name( |
3797 | const char* filename) |
3798 | { |
3799 | /* Strip the file name prefix and suffix, leaving |
3800 | only databasename/tablename. */ |
3801 | ulint filename_len = strlen(filename); |
3802 | const char* end = filename + filename_len; |
3803 | #ifdef HAVE_MEMRCHR |
3804 | const char* tablename = 1 + static_cast<const char*>( |
3805 | memrchr(filename, OS_PATH_SEPARATOR, |
3806 | filename_len)); |
3807 | const char* dbname = 1 + static_cast<const char*>( |
3808 | memrchr(filename, OS_PATH_SEPARATOR, |
3809 | tablename - filename - 1)); |
3810 | #else /* HAVE_MEMRCHR */ |
3811 | const char* tablename = filename; |
3812 | const char* dbname = NULL; |
3813 | |
3814 | while (const char* t = static_cast<const char*>( |
3815 | memchr(tablename, OS_PATH_SEPARATOR, |
3816 | ulint(end - tablename)))) { |
3817 | dbname = tablename; |
3818 | tablename = t + 1; |
3819 | } |
3820 | #endif /* HAVE_MEMRCHR */ |
3821 | |
3822 | ut_ad(dbname != NULL); |
3823 | ut_ad(tablename > dbname); |
3824 | ut_ad(tablename < end); |
3825 | ut_ad(end - tablename > 4); |
3826 | ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0); |
3827 | |
3828 | char* name = mem_strdupl(dbname, ulint(end - dbname) - 4); |
3829 | |
3830 | ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR); |
3831 | #if OS_PATH_SEPARATOR != '/' |
3832 | /* space->name uses '/', not OS_PATH_SEPARATOR. */ |
3833 | name[tablename - dbname - 1] = '/'; |
3834 | #endif |
3835 | |
3836 | return(name); |
3837 | } |
3838 | |
3839 | /** Discover the correct IBD file to open given a remote or missing |
3840 | filepath from the REDO log. Administrators can move a crashed |
3841 | database to another location on the same machine and try to recover it. |
3842 | Remote IBD files might be moved as well to the new location. |
3843 | The problem with this is that the REDO log contains the old location |
3844 | which may be still accessible. During recovery, if files are found in |
3845 | both locations, we can chose on based on these priorities; |
3846 | 1. Default location |
3847 | 2. ISL location |
3848 | 3. REDO location |
3849 | @param[in] space_id tablespace ID |
3850 | @param[in] df Datafile object with path from redo |
3851 | @return true if a valid datafile was found, false if not */ |
3852 | static |
3853 | bool |
3854 | fil_ibd_discover( |
3855 | ulint space_id, |
3856 | Datafile& df) |
3857 | { |
3858 | Datafile df_def_per; /* default file-per-table datafile */ |
3859 | RemoteDatafile df_rem_per; /* remote file-per-table datafile */ |
3860 | |
3861 | /* Look for the datafile in the default location. */ |
3862 | const char* filename = df.filepath(); |
3863 | const char* basename = base_name(filename); |
3864 | |
3865 | /* If this datafile is file-per-table it will have a schema dir. */ |
3866 | ulint sep_found = 0; |
3867 | const char* db = basename; |
3868 | for (; db > filename && sep_found < 2; db--) { |
3869 | if (db[0] == OS_PATH_SEPARATOR) { |
3870 | sep_found++; |
3871 | } |
3872 | } |
3873 | if (sep_found == 2) { |
3874 | db += 2; |
3875 | df_def_per.init(db, 0); |
3876 | df_def_per.make_filepath(NULL, db, IBD); |
3877 | if (df_def_per.open_read_only(false) == DB_SUCCESS |
3878 | && df_def_per.validate_for_recovery() == DB_SUCCESS |
3879 | && df_def_per.space_id() == space_id) { |
3880 | df.set_filepath(df_def_per.filepath()); |
3881 | df.open_read_only(false); |
3882 | return(true); |
3883 | } |
3884 | |
3885 | /* Look for a remote file-per-table tablespace. */ |
3886 | |
3887 | switch (srv_operation) { |
3888 | case SRV_OPERATION_BACKUP: |
3889 | case SRV_OPERATION_RESTORE_DELTA: |
3890 | ut_ad(0); |
3891 | break; |
3892 | case SRV_OPERATION_RESTORE_EXPORT: |
3893 | case SRV_OPERATION_RESTORE: |
3894 | break; |
3895 | case SRV_OPERATION_NORMAL: |
3896 | df_rem_per.set_name(db); |
3897 | if (df_rem_per.open_link_file() != DB_SUCCESS) { |
3898 | break; |
3899 | } |
3900 | |
3901 | /* An ISL file was found with contents. */ |
3902 | if (df_rem_per.open_read_only(false) != DB_SUCCESS |
3903 | || df_rem_per.validate_for_recovery() |
3904 | != DB_SUCCESS) { |
3905 | |
3906 | /* Assume that this ISL file is intended to |
3907 | be used. Do not continue looking for another |
3908 | if this file cannot be opened or is not |
3909 | a valid IBD file. */ |
3910 | ib::error() << "ISL file '" |
3911 | << df_rem_per.link_filepath() |
3912 | << "' was found but the linked file '" |
3913 | << df_rem_per.filepath() |
3914 | << "' could not be opened or is" |
3915 | " not correct." ; |
3916 | return(false); |
3917 | } |
3918 | |
3919 | /* Use this file if it has the space_id from the |
3920 | MLOG record. */ |
3921 | if (df_rem_per.space_id() == space_id) { |
3922 | df.set_filepath(df_rem_per.filepath()); |
3923 | df.open_read_only(false); |
3924 | return(true); |
3925 | } |
3926 | |
3927 | /* Since old MLOG records can use the same basename |
3928 | in multiple CREATE/DROP TABLE sequences, this ISL |
3929 | file could be pointing to a later version of this |
3930 | basename.ibd file which has a different space_id. |
3931 | Keep looking. */ |
3932 | } |
3933 | } |
3934 | |
3935 | /* No ISL files were found in the default location. Use the location |
3936 | given in the redo log. */ |
3937 | if (df.open_read_only(false) == DB_SUCCESS |
3938 | && df.validate_for_recovery() == DB_SUCCESS |
3939 | && df.space_id() == space_id) { |
3940 | return(true); |
3941 | } |
3942 | |
3943 | /* A datafile was not discovered for the filename given. */ |
3944 | return(false); |
3945 | } |
3946 | /** Open an ibd tablespace and add it to the InnoDB data structures. |
3947 | This is similar to fil_ibd_open() except that it is used while processing |
3948 | the REDO log, so the data dictionary is not available and very little |
3949 | validation is done. The tablespace name is extracred from the |
3950 | dbname/tablename.ibd portion of the filename, which assumes that the file |
3951 | is a file-per-table tablespace. Any name will do for now. General |
3952 | tablespace names will be read from the dictionary after it has been |
3953 | recovered. The tablespace flags are read at this time from the first page |
3954 | of the file in validate_for_recovery(). |
3955 | @param[in] space_id tablespace ID |
3956 | @param[in] filename path/to/databasename/tablename.ibd |
3957 | @param[out] space the tablespace, or NULL on error |
3958 | @return status of the operation */ |
3959 | enum fil_load_status |
3960 | fil_ibd_load( |
3961 | ulint space_id, |
3962 | const char* filename, |
3963 | fil_space_t*& space) |
3964 | { |
3965 | /* If the a space is already in the file system cache with this |
3966 | space ID, then there is nothing to do. */ |
3967 | mutex_enter(&fil_system.mutex); |
3968 | space = fil_space_get_by_id(space_id); |
3969 | mutex_exit(&fil_system.mutex); |
3970 | |
3971 | if (space != NULL) { |
3972 | /* Compare the filename we are trying to open with the |
3973 | filename from the first node of the tablespace we opened |
3974 | previously. Fail if it is different. */ |
3975 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
3976 | if (0 != strcmp(innobase_basename(filename), |
3977 | innobase_basename(node->name))) { |
3978 | ib::info() |
3979 | << "Ignoring data file '" << filename |
3980 | << "' with space ID " << space->id |
3981 | << ". Another data file called " << node->name |
3982 | << " exists with the same space ID." ; |
3983 | space = NULL; |
3984 | return(FIL_LOAD_ID_CHANGED); |
3985 | } |
3986 | return(FIL_LOAD_OK); |
3987 | } |
3988 | |
3989 | if (srv_operation == SRV_OPERATION_RESTORE) { |
3990 | /* Replace absolute DATA DIRECTORY file paths with |
3991 | short names relative to the backup directory. */ |
3992 | if (const char* name = strrchr(filename, OS_PATH_SEPARATOR)) { |
3993 | while (--name > filename |
3994 | && *name != OS_PATH_SEPARATOR); |
3995 | if (name > filename) { |
3996 | filename = name + 1; |
3997 | } |
3998 | } |
3999 | } |
4000 | |
4001 | Datafile file; |
4002 | file.set_filepath(filename); |
4003 | file.open_read_only(false); |
4004 | |
4005 | if (!file.is_open()) { |
4006 | /* The file has been moved or it is a remote datafile. */ |
4007 | if (!fil_ibd_discover(space_id, file) |
4008 | || !file.is_open()) { |
4009 | return(FIL_LOAD_NOT_FOUND); |
4010 | } |
4011 | } |
4012 | |
4013 | os_offset_t size; |
4014 | |
4015 | /* Read and validate the first page of the tablespace. |
4016 | Assign a tablespace name based on the tablespace type. */ |
4017 | switch (file.validate_for_recovery()) { |
4018 | os_offset_t minimum_size; |
4019 | case DB_SUCCESS: |
4020 | if (file.space_id() != space_id) { |
4021 | ib::info() |
4022 | << "Ignoring data file '" |
4023 | << file.filepath() |
4024 | << "' with space ID " << file.space_id() |
4025 | << ", since the redo log references " |
4026 | << file.filepath() << " with space ID " |
4027 | << space_id << "." ; |
4028 | return(FIL_LOAD_ID_CHANGED); |
4029 | } |
4030 | /* Get and test the file size. */ |
4031 | size = os_file_get_size(file.handle()); |
4032 | |
4033 | /* Every .ibd file is created >= 4 pages in size. |
4034 | Smaller files cannot be OK. */ |
4035 | minimum_size = os_offset_t(FIL_IBD_FILE_INITIAL_SIZE) |
4036 | << srv_page_size_shift; |
4037 | |
4038 | if (size == static_cast<os_offset_t>(-1)) { |
4039 | /* The following call prints an error message */ |
4040 | os_file_get_last_error(true); |
4041 | |
4042 | ib::error() << "Could not measure the size of" |
4043 | " single-table tablespace file '" |
4044 | << file.filepath() << "'" ; |
4045 | } else if (size < minimum_size) { |
4046 | ib::error() << "The size of tablespace file '" |
4047 | << file.filepath() << "' is only " << size |
4048 | << ", should be at least " << minimum_size |
4049 | << "!" ; |
4050 | } else { |
4051 | /* Everything is fine so far. */ |
4052 | break; |
4053 | } |
4054 | |
4055 | /* fall through */ |
4056 | |
4057 | case DB_TABLESPACE_EXISTS: |
4058 | return(FIL_LOAD_INVALID); |
4059 | |
4060 | default: |
4061 | return(FIL_LOAD_NOT_FOUND); |
4062 | } |
4063 | |
4064 | ut_ad(space == NULL); |
4065 | |
4066 | /* Adjust the memory-based flags that would normally be set by |
4067 | dict_tf_to_fsp_flags(). In recovery, we have no data dictionary. */ |
4068 | ulint flags = file.flags(); |
4069 | if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) { |
4070 | flags |= page_zip_level |
4071 | << FSP_FLAGS_MEM_COMPRESSION_LEVEL; |
4072 | } |
4073 | |
4074 | const byte* first_page = file.get_first_page(); |
4075 | fil_space_crypt_t* crypt_data = first_page |
4076 | ? fil_space_read_crypt_data(page_size_t(flags), first_page) |
4077 | : NULL; |
4078 | space = fil_space_create( |
4079 | file.name(), space_id, flags, FIL_TYPE_TABLESPACE, crypt_data); |
4080 | |
4081 | if (space == NULL) { |
4082 | return(FIL_LOAD_INVALID); |
4083 | } |
4084 | |
4085 | ut_ad(space->id == file.space_id()); |
4086 | ut_ad(space->id == space_id); |
4087 | |
4088 | /* We do not use the size information we have about the file, because |
4089 | the rounding formula for extents and pages is somewhat complex; we |
4090 | let fil_node_open() do that task. */ |
4091 | |
4092 | if (!fil_node_create_low(file.filepath(), 0, space, false, false)) { |
4093 | ut_error; |
4094 | } |
4095 | |
4096 | return(FIL_LOAD_OK); |
4097 | } |
4098 | |
4099 | /***********************************************************************//** |
4100 | A fault-tolerant function that tries to read the next file name in the |
4101 | directory. We retry 100 times if os_file_readdir_next_file() returns -1. The |
4102 | idea is to read as much good data as we can and jump over bad data. |
4103 | @return 0 if ok, -1 if error even after the retries, 1 if at the end |
4104 | of the directory */ |
4105 | int |
4106 | fil_file_readdir_next_file( |
4107 | /*=======================*/ |
4108 | dberr_t* err, /*!< out: this is set to DB_ERROR if an error |
4109 | was encountered, otherwise not changed */ |
4110 | const char* dirname,/*!< in: directory name or path */ |
4111 | os_file_dir_t dir, /*!< in: directory stream */ |
4112 | os_file_stat_t* info) /*!< in/out: buffer where the |
4113 | info is returned */ |
4114 | { |
4115 | for (ulint i = 0; i < 100; i++) { |
4116 | int ret = os_file_readdir_next_file(dirname, dir, info); |
4117 | |
4118 | if (ret != -1) { |
4119 | |
4120 | return(ret); |
4121 | } |
4122 | |
4123 | ib::error() << "os_file_readdir_next_file() returned -1 in" |
4124 | " directory " << dirname |
4125 | << ", crash recovery may have failed" |
4126 | " for some .ibd files!" ; |
4127 | |
4128 | *err = DB_ERROR; |
4129 | } |
4130 | |
4131 | return(-1); |
4132 | } |
4133 | |
4134 | /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations. |
4135 | (Typically when upgrading from MariaDB 10.1.0..10.1.20.) |
4136 | @param[in,out] space tablespace |
4137 | @param[in] flags desired tablespace flags */ |
4138 | void fsp_flags_try_adjust(fil_space_t* space, ulint flags) |
4139 | { |
4140 | ut_ad(!srv_read_only_mode); |
4141 | ut_ad(fsp_flags_is_valid(flags, space->id)); |
4142 | if (!space->size && (space->purpose != FIL_TYPE_TABLESPACE |
4143 | || !fil_space_get_size(space->id))) { |
4144 | return; |
4145 | } |
4146 | /* This code is executed during server startup while no |
4147 | connections are allowed. We do not need to protect against |
4148 | DROP TABLE by fil_space_acquire(). */ |
4149 | mtr_t mtr; |
4150 | mtr.start(); |
4151 | if (buf_block_t* b = buf_page_get( |
4152 | page_id_t(space->id, 0), page_size_t(flags), |
4153 | RW_X_LATCH, &mtr)) { |
4154 | ulint f = fsp_header_get_flags(b->frame); |
4155 | /* Suppress the message if only the DATA_DIR flag to differs. */ |
4156 | if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) { |
4157 | ib::warn() |
4158 | << "adjusting FSP_SPACE_FLAGS of file '" |
4159 | << UT_LIST_GET_FIRST(space->chain)->name |
4160 | << "' from " << ib::hex(f) |
4161 | << " to " << ib::hex(flags); |
4162 | } |
4163 | if (f != flags) { |
4164 | mtr.set_named_space(space); |
4165 | mlog_write_ulint(FSP_HEADER_OFFSET |
4166 | + FSP_SPACE_FLAGS + b->frame, |
4167 | flags, MLOG_4BYTES, &mtr); |
4168 | } |
4169 | } |
4170 | mtr.commit(); |
4171 | } |
4172 | |
4173 | /** Determine if a matching tablespace exists in the InnoDB tablespace |
4174 | memory cache. Note that if we have not done a crash recovery at the database |
4175 | startup, there may be many tablespaces which are not yet in the memory cache. |
4176 | @param[in] id Tablespace ID |
4177 | @param[in] name Tablespace name used in fil_space_create(). |
4178 | @param[in] print_error_if_does_not_exist |
4179 | Print detailed error information to the |
4180 | error log if a matching tablespace is not found from memory. |
4181 | @param[in] table_flags table flags |
4182 | @return the tablespace |
4183 | @retval NULL if no matching tablespace exists in the memory cache */ |
4184 | fil_space_t* |
4185 | fil_space_for_table_exists_in_mem( |
4186 | ulint id, |
4187 | const char* name, |
4188 | bool print_error_if_does_not_exist, |
4189 | ulint table_flags) |
4190 | { |
4191 | const ulint expected_flags = dict_tf_to_fsp_flags(table_flags); |
4192 | |
4193 | mutex_enter(&fil_system.mutex); |
4194 | if (fil_space_t* space = fil_space_get_by_id(id)) { |
4195 | if ((space->flags ^ expected_flags) & ~FSP_FLAGS_MEM_MASK) { |
4196 | goto func_exit; |
4197 | } |
4198 | |
4199 | if (strcmp(space->name, name)) { |
4200 | ib::error() << "Table " << name |
4201 | << " in InnoDB data dictionary" |
4202 | " has tablespace id " << id |
4203 | << ", but the tablespace" |
4204 | " with that id has name " << space->name << "." |
4205 | " Have you deleted or moved .ibd files?" ; |
4206 | goto error_exit; |
4207 | } |
4208 | |
4209 | /* Adjust the flags that are in FSP_FLAGS_MEM_MASK. |
4210 | FSP_SPACE_FLAGS will not be written back here. */ |
4211 | space->flags = expected_flags; |
4212 | mutex_exit(&fil_system.mutex); |
4213 | if (!srv_read_only_mode) { |
4214 | fsp_flags_try_adjust(space, expected_flags |
4215 | & ~FSP_FLAGS_MEM_MASK); |
4216 | } |
4217 | return space; |
4218 | } |
4219 | |
4220 | if (print_error_if_does_not_exist) { |
4221 | ib::error() << "Table " << name |
4222 | << " in the InnoDB data dictionary" |
4223 | " has tablespace id " << id |
4224 | << ", but tablespace with that id" |
4225 | " or name does not exist. Have" |
4226 | " you deleted or moved .ibd files?" ; |
4227 | error_exit: |
4228 | ib::info() << TROUBLESHOOT_DATADICT_MSG; |
4229 | } |
4230 | |
4231 | func_exit: |
4232 | mutex_exit(&fil_system.mutex); |
4233 | return NULL; |
4234 | } |
4235 | |
4236 | /*============================ FILE I/O ================================*/ |
4237 | |
4238 | /********************************************************************//** |
4239 | NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! |
4240 | |
4241 | Prepares a file node for i/o. Opens the file if it is closed. Updates the |
4242 | pending i/o's field in the node and the system appropriately. Takes the node |
4243 | off the LRU list if it is in the LRU list. The caller must hold the fil_sys |
4244 | mutex. |
4245 | @return false if the file can't be opened, otherwise true */ |
4246 | static |
4247 | bool |
4248 | fil_node_prepare_for_io( |
4249 | /*====================*/ |
4250 | fil_node_t* node, /*!< in: file node */ |
4251 | fil_space_t* space) /*!< in: space */ |
4252 | { |
4253 | ut_ad(node && space); |
4254 | ut_ad(mutex_own(&fil_system.mutex)); |
4255 | |
4256 | if (fil_system.n_open > srv_max_n_open_files + 5) { |
4257 | ib::warn() << "Open files " << fil_system.n_open |
4258 | << " exceeds the limit " << srv_max_n_open_files; |
4259 | } |
4260 | |
4261 | if (!node->is_open()) { |
4262 | /* File is closed: open it */ |
4263 | ut_a(node->n_pending == 0); |
4264 | |
4265 | if (!fil_node_open_file(node)) { |
4266 | return(false); |
4267 | } |
4268 | } |
4269 | |
4270 | if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) { |
4271 | /* The node is in the LRU list, remove it */ |
4272 | ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0); |
4273 | UT_LIST_REMOVE(fil_system.LRU, node); |
4274 | } |
4275 | |
4276 | node->n_pending++; |
4277 | |
4278 | return(true); |
4279 | } |
4280 | |
4281 | /** Update the data structures when an i/o operation finishes. |
4282 | @param[in,out] node file node |
4283 | @param[in] type IO context */ |
4284 | static |
4285 | void |
4286 | fil_node_complete_io(fil_node_t* node, const IORequest& type) |
4287 | { |
4288 | ut_ad(mutex_own(&fil_system.mutex)); |
4289 | ut_a(node->n_pending > 0); |
4290 | |
4291 | --node->n_pending; |
4292 | |
4293 | ut_ad(type.validate()); |
4294 | |
4295 | if (type.is_write()) { |
4296 | |
4297 | ut_ad(!srv_read_only_mode |
4298 | || node->space->purpose == FIL_TYPE_TEMPORARY); |
4299 | |
4300 | ++fil_system.modification_counter; |
4301 | |
4302 | node->modification_counter = fil_system.modification_counter; |
4303 | |
4304 | if (fil_buffering_disabled(node->space)) { |
4305 | |
4306 | /* We don't need to keep track of unflushed |
4307 | changes as user has explicitly disabled |
4308 | buffering. */ |
4309 | ut_ad(!node->space->is_in_unflushed_spaces); |
4310 | node->flush_counter = node->modification_counter; |
4311 | |
4312 | } else if (!node->space->is_in_unflushed_spaces) { |
4313 | |
4314 | node->space->is_in_unflushed_spaces = true; |
4315 | |
4316 | UT_LIST_ADD_FIRST( |
4317 | fil_system.unflushed_spaces, node->space); |
4318 | } |
4319 | } |
4320 | |
4321 | if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { |
4322 | |
4323 | /* The node must be put back to the LRU list */ |
4324 | UT_LIST_ADD_FIRST(fil_system.LRU, node); |
4325 | } |
4326 | } |
4327 | |
4328 | /** Report information about an invalid page access. */ |
4329 | static |
4330 | void |
4331 | fil_report_invalid_page_access( |
4332 | ulint block_offset, /*!< in: block offset */ |
4333 | ulint space_id, /*!< in: space id */ |
4334 | const char* space_name, /*!< in: space name */ |
4335 | ulint byte_offset, /*!< in: byte offset */ |
4336 | ulint len, /*!< in: I/O length */ |
4337 | bool is_read) /*!< in: I/O type */ |
4338 | { |
4339 | ib::fatal() |
4340 | << "Trying to " << (is_read ? "read" : "write" ) |
4341 | << " page number " << block_offset << " in" |
4342 | " space " << space_id << ", space name " << space_name << "," |
4343 | " which is outside the tablespace bounds. Byte offset " |
4344 | << byte_offset << ", len " << len << |
4345 | (space_id == 0 && !srv_was_started |
4346 | ? "Please check that the configuration matches" |
4347 | " the InnoDB system tablespace location (ibdata files)" |
4348 | : "" ); |
4349 | } |
4350 | |
4351 | /** Reads or writes data. This operation could be asynchronous (aio). |
4352 | |
4353 | @param[in,out] type IO context |
4354 | @param[in] sync true if synchronous aio is desired |
4355 | @param[in] page_id page id |
4356 | @param[in] page_size page size |
4357 | @param[in] byte_offset remainder of offset in bytes; in aio this |
4358 | must be divisible by the OS block size |
4359 | @param[in] len how many bytes to read or write; this must |
4360 | not cross a file boundary; in aio this must |
4361 | be a block size multiple |
4362 | @param[in,out] buf buffer where to store read data or from where |
4363 | to write; in aio this must be appropriately |
4364 | aligned |
4365 | @param[in] message message for aio handler if non-sync aio |
4366 | used, else ignored |
4367 | @param[in] ignore_missing_space true=ignore missing space duging read |
4368 | @return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED |
4369 | if we are trying to do i/o on a tablespace which does not exist */ |
4370 | dberr_t |
4371 | fil_io( |
4372 | const IORequest& type, |
4373 | bool sync, |
4374 | const page_id_t& page_id, |
4375 | const page_size_t& page_size, |
4376 | ulint byte_offset, |
4377 | ulint len, |
4378 | void* buf, |
4379 | void* message, |
4380 | bool ignore_missing_space) |
4381 | { |
4382 | os_offset_t offset; |
4383 | IORequest req_type(type); |
4384 | |
4385 | ut_ad(req_type.validate()); |
4386 | |
4387 | ut_ad(len > 0); |
4388 | ut_ad(byte_offset < srv_page_size); |
4389 | ut_ad(!page_size.is_compressed() || byte_offset == 0); |
4390 | ut_ad(srv_page_size == 1UL << srv_page_size_shift); |
4391 | compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MAX) |
4392 | == UNIV_PAGE_SIZE_MAX); |
4393 | compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MIN) |
4394 | == UNIV_PAGE_SIZE_MIN); |
4395 | ut_ad(fil_validate_skip()); |
4396 | |
4397 | /* ibuf bitmap pages must be read in the sync AIO mode: */ |
4398 | ut_ad(recv_no_ibuf_operations |
4399 | || req_type.is_write() |
4400 | || !ibuf_bitmap_page(page_id, page_size) |
4401 | || sync |
4402 | || req_type.is_log()); |
4403 | |
4404 | ulint mode; |
4405 | |
4406 | if (sync) { |
4407 | |
4408 | mode = OS_AIO_SYNC; |
4409 | |
4410 | } else if (req_type.is_log()) { |
4411 | |
4412 | mode = OS_AIO_LOG; |
4413 | |
4414 | } else if (req_type.is_read() |
4415 | && !recv_no_ibuf_operations |
4416 | && ibuf_page(page_id, page_size, NULL)) { |
4417 | |
4418 | mode = OS_AIO_IBUF; |
4419 | |
4420 | /* Reduce probability of deadlock bugs in connection with ibuf: |
4421 | do not let the ibuf i/o handler sleep */ |
4422 | |
4423 | req_type.clear_do_not_wake(); |
4424 | } else { |
4425 | mode = OS_AIO_NORMAL; |
4426 | } |
4427 | |
4428 | if (req_type.is_read()) { |
4429 | |
4430 | srv_stats.data_read.add(len); |
4431 | |
4432 | } else if (req_type.is_write()) { |
4433 | |
4434 | ut_ad(!srv_read_only_mode |
4435 | || fsp_is_system_temporary(page_id.space())); |
4436 | |
4437 | srv_stats.data_written.add(len); |
4438 | } |
4439 | |
4440 | /* Reserve the fil_system mutex and make sure that we can open at |
4441 | least one file while holding it, if the file is not already open */ |
4442 | |
4443 | fil_mutex_enter_and_prepare_for_io(page_id.space()); |
4444 | |
4445 | fil_space_t* space = fil_space_get_by_id(page_id.space()); |
4446 | |
4447 | /* If we are deleting a tablespace we don't allow async read operations |
4448 | on that. However, we do allow write operations and sync read operations. */ |
4449 | if (space == NULL |
4450 | || (req_type.is_read() |
4451 | && !sync |
4452 | && space->stop_new_ops |
4453 | && !space->is_being_truncated)) { |
4454 | |
4455 | mutex_exit(&fil_system.mutex); |
4456 | |
4457 | if (!req_type.ignore_missing() && !ignore_missing_space) { |
4458 | ib::error() |
4459 | << "Trying to do I/O to a tablespace which" |
4460 | " does not exist. I/O type: " |
4461 | << (req_type.is_read() ? "read" : "write" ) |
4462 | << ", page: " << page_id |
4463 | << ", I/O length: " << len << " bytes" ; |
4464 | } |
4465 | |
4466 | return(DB_TABLESPACE_DELETED); |
4467 | } |
4468 | |
4469 | ut_ad(mode != OS_AIO_IBUF || fil_type_is_data(space->purpose)); |
4470 | |
4471 | ulint cur_page_no = page_id.page_no(); |
4472 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
4473 | |
4474 | for (;;) { |
4475 | |
4476 | if (node == NULL) { |
4477 | |
4478 | if (req_type.ignore_missing()) { |
4479 | mutex_exit(&fil_system.mutex); |
4480 | return(DB_ERROR); |
4481 | } |
4482 | |
4483 | fil_report_invalid_page_access( |
4484 | page_id.page_no(), page_id.space(), |
4485 | space->name, byte_offset, len, |
4486 | req_type.is_read()); |
4487 | |
4488 | } else if (fil_is_user_tablespace_id(space->id) |
4489 | && node->size == 0) { |
4490 | |
4491 | /* We do not know the size of a single-table tablespace |
4492 | before we open the file */ |
4493 | break; |
4494 | |
4495 | } else if (node->size > cur_page_no) { |
4496 | /* Found! */ |
4497 | break; |
4498 | |
4499 | } else { |
4500 | if (space->id != TRX_SYS_SPACE |
4501 | && UT_LIST_GET_LEN(space->chain) == 1 |
4502 | && (srv_is_tablespace_truncated(space->id) |
4503 | || space->is_being_truncated |
4504 | || srv_was_tablespace_truncated(space)) |
4505 | && req_type.is_read()) { |
4506 | |
4507 | /* Handle page which is outside the truncated |
4508 | tablespace bounds when recovering from a crash |
4509 | happened during a truncation */ |
4510 | mutex_exit(&fil_system.mutex); |
4511 | return(DB_TABLESPACE_TRUNCATED); |
4512 | } |
4513 | |
4514 | cur_page_no -= node->size; |
4515 | |
4516 | node = UT_LIST_GET_NEXT(chain, node); |
4517 | } |
4518 | } |
4519 | |
4520 | /* Open file if closed */ |
4521 | if (!fil_node_prepare_for_io(node, space)) { |
4522 | if (fil_type_is_data(space->purpose) |
4523 | && fil_is_user_tablespace_id(space->id)) { |
4524 | mutex_exit(&fil_system.mutex); |
4525 | |
4526 | if (!req_type.ignore_missing()) { |
4527 | ib::error() |
4528 | << "Trying to do I/O to a tablespace" |
4529 | " which exists without .ibd data file." |
4530 | " I/O type: " |
4531 | << (req_type.is_read() |
4532 | ? "read" : "write" ) |
4533 | << ", page: " |
4534 | << page_id_t(page_id.space(), |
4535 | cur_page_no) |
4536 | << ", I/O length: " << len << " bytes" ; |
4537 | } |
4538 | |
4539 | return(DB_TABLESPACE_DELETED); |
4540 | } |
4541 | |
4542 | /* The tablespace is for log. Currently, we just assert here |
4543 | to prevent handling errors along the way fil_io returns. |
4544 | Also, if the log files are missing, it would be hard to |
4545 | promise the server can continue running. */ |
4546 | ut_a(0); |
4547 | } |
4548 | |
4549 | /* Check that at least the start offset is within the bounds of a |
4550 | single-table tablespace, including rollback tablespaces. */ |
4551 | if (node->size <= cur_page_no |
4552 | && space->id != TRX_SYS_SPACE |
4553 | && fil_type_is_data(space->purpose)) { |
4554 | |
4555 | if (req_type.ignore_missing()) { |
4556 | /* If we can tolerate the non-existent pages, we |
4557 | should return with DB_ERROR and let caller decide |
4558 | what to do. */ |
4559 | fil_node_complete_io(node, req_type); |
4560 | mutex_exit(&fil_system.mutex); |
4561 | return(DB_ERROR); |
4562 | } |
4563 | |
4564 | fil_report_invalid_page_access( |
4565 | page_id.page_no(), page_id.space(), |
4566 | space->name, byte_offset, len, req_type.is_read()); |
4567 | } |
4568 | |
4569 | /* Now we have made the changes in the data structures of fil_system */ |
4570 | mutex_exit(&fil_system.mutex); |
4571 | |
4572 | /* Calculate the low 32 bits and the high 32 bits of the file offset */ |
4573 | |
4574 | if (!page_size.is_compressed()) { |
4575 | |
4576 | offset = ((os_offset_t) cur_page_no |
4577 | << srv_page_size_shift) + byte_offset; |
4578 | |
4579 | ut_a(node->size - cur_page_no |
4580 | >= ((byte_offset + len + (srv_page_size - 1)) |
4581 | >> srv_page_size_shift)); |
4582 | } else { |
4583 | ulint size_shift; |
4584 | |
4585 | switch (page_size.physical()) { |
4586 | case 1024: size_shift = 10; break; |
4587 | case 2048: size_shift = 11; break; |
4588 | case 4096: size_shift = 12; break; |
4589 | case 8192: size_shift = 13; break; |
4590 | case 16384: size_shift = 14; break; |
4591 | case 32768: size_shift = 15; break; |
4592 | case 65536: size_shift = 16; break; |
4593 | default: ut_error; |
4594 | } |
4595 | |
4596 | offset = ((os_offset_t) cur_page_no << size_shift) |
4597 | + byte_offset; |
4598 | |
4599 | ut_a(node->size - cur_page_no |
4600 | >= (len + (page_size.physical() - 1)) |
4601 | / page_size.physical()); |
4602 | } |
4603 | |
4604 | /* Do AIO */ |
4605 | |
4606 | ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); |
4607 | ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); |
4608 | |
4609 | const char* name = node->name == NULL ? space->name : node->name; |
4610 | |
4611 | req_type.set_fil_node(node); |
4612 | |
4613 | /* Queue the aio request */ |
4614 | dberr_t err = os_aio( |
4615 | req_type, |
4616 | mode, name, node->handle, buf, offset, len, |
4617 | space->purpose != FIL_TYPE_TEMPORARY |
4618 | && srv_read_only_mode, |
4619 | node, message); |
4620 | |
4621 | /* We an try to recover the page from the double write buffer if |
4622 | the decompression fails or the page is corrupt. */ |
4623 | |
4624 | ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS); |
4625 | |
4626 | if (sync) { |
4627 | /* The i/o operation is already completed when we return from |
4628 | os_aio: */ |
4629 | |
4630 | mutex_enter(&fil_system.mutex); |
4631 | |
4632 | fil_node_complete_io(node, req_type); |
4633 | |
4634 | mutex_exit(&fil_system.mutex); |
4635 | |
4636 | ut_ad(fil_validate_skip()); |
4637 | } |
4638 | |
4639 | return(err); |
4640 | } |
4641 | |
4642 | /**********************************************************************//** |
4643 | Waits for an aio operation to complete. This function is used to write the |
4644 | handler for completed requests. The aio array of pending requests is divided |
4645 | into segments (see os0file.cc for more info). The thread specifies which |
4646 | segment it wants to wait for. */ |
4647 | void |
4648 | fil_aio_wait( |
4649 | /*=========*/ |
4650 | ulint segment) /*!< in: the number of the segment in the aio |
4651 | array to wait for */ |
4652 | { |
4653 | fil_node_t* node; |
4654 | IORequest type; |
4655 | void* message; |
4656 | |
4657 | ut_ad(fil_validate_skip()); |
4658 | |
4659 | dberr_t err = os_aio_handler(segment, &node, &message, &type); |
4660 | |
4661 | ut_a(err == DB_SUCCESS); |
4662 | |
4663 | if (node == NULL) { |
4664 | ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); |
4665 | return; |
4666 | } |
4667 | |
4668 | srv_set_io_thread_op_info(segment, "complete io for fil node" ); |
4669 | |
4670 | mutex_enter(&fil_system.mutex); |
4671 | |
4672 | fil_node_complete_io(node, type); |
4673 | const fil_type_t purpose = node->space->purpose; |
4674 | const ulint space_id= node->space->id; |
4675 | const bool dblwr = node->space->use_doublewrite(); |
4676 | |
4677 | mutex_exit(&fil_system.mutex); |
4678 | |
4679 | ut_ad(fil_validate_skip()); |
4680 | |
4681 | /* Do the i/o handling */ |
4682 | /* IMPORTANT: since i/o handling for reads will read also the insert |
4683 | buffer in tablespace 0, you have to be very careful not to introduce |
4684 | deadlocks in the i/o system. We keep tablespace 0 data files always |
4685 | open, and use a special i/o thread to serve insert buffer requests. */ |
4686 | |
4687 | switch (purpose) { |
4688 | case FIL_TYPE_LOG: |
4689 | srv_set_io_thread_op_info(segment, "complete io for log" ); |
4690 | /* We use synchronous writing of the logs |
4691 | and can only end up here when writing a log checkpoint! */ |
4692 | ut_a(ptrdiff_t(message) == 1); |
4693 | /* It was a checkpoint write */ |
4694 | switch (srv_flush_t(srv_file_flush_method)) { |
4695 | case SRV_O_DSYNC: |
4696 | case SRV_NOSYNC: |
4697 | break; |
4698 | case SRV_FSYNC: |
4699 | case SRV_LITTLESYNC: |
4700 | case SRV_O_DIRECT: |
4701 | case SRV_O_DIRECT_NO_FSYNC: |
4702 | #ifdef _WIN32 |
4703 | case SRV_ALL_O_DIRECT_FSYNC: |
4704 | #endif |
4705 | fil_flush(SRV_LOG_SPACE_FIRST_ID); |
4706 | } |
4707 | |
4708 | DBUG_PRINT("ib_log" , ("checkpoint info written" )); |
4709 | log_sys.complete_checkpoint(); |
4710 | return; |
4711 | case FIL_TYPE_TABLESPACE: |
4712 | case FIL_TYPE_TEMPORARY: |
4713 | case FIL_TYPE_IMPORT: |
4714 | srv_set_io_thread_op_info(segment, "complete io for buf page" ); |
4715 | |
4716 | /* async single page writes from the dblwr buffer don't have |
4717 | access to the page */ |
4718 | buf_page_t* bpage = static_cast<buf_page_t*>(message); |
4719 | if (!bpage) { |
4720 | return; |
4721 | } |
4722 | |
4723 | ulint offset = bpage->id.page_no(); |
4724 | dberr_t err = buf_page_io_complete(bpage, dblwr); |
4725 | if (err == DB_SUCCESS) { |
4726 | return; |
4727 | } |
4728 | |
4729 | ut_ad(type.is_read()); |
4730 | if (recv_recovery_is_on() && !srv_force_recovery) { |
4731 | recv_sys->found_corrupt_fs = true; |
4732 | } |
4733 | |
4734 | if (fil_space_t* space = fil_space_acquire_for_io(space_id)) { |
4735 | if (space == node->space) { |
4736 | ib::error() << "Failed to read file '" |
4737 | << node->name |
4738 | << "' at offset " << offset |
4739 | << ": " << ut_strerr(err); |
4740 | } |
4741 | |
4742 | space->release_for_io(); |
4743 | } |
4744 | return; |
4745 | } |
4746 | |
4747 | ut_ad(0); |
4748 | } |
4749 | |
4750 | /**********************************************************************//** |
4751 | Flushes to disk possible writes cached by the OS. If the space does not exist |
4752 | or is being dropped, does not do anything. */ |
4753 | void |
4754 | fil_flush( |
4755 | /*======*/ |
4756 | ulint space_id) /*!< in: file space id (this can be a group of |
4757 | log files or a tablespace of the database) */ |
4758 | { |
4759 | mutex_enter(&fil_system.mutex); |
4760 | |
4761 | if (fil_space_t* space = fil_space_get_by_id(space_id)) { |
4762 | if (space->purpose != FIL_TYPE_TEMPORARY |
4763 | && !space->is_stopping()) { |
4764 | fil_flush_low(space); |
4765 | } |
4766 | } |
4767 | |
4768 | mutex_exit(&fil_system.mutex); |
4769 | } |
4770 | |
4771 | /** Flush a tablespace. |
4772 | @param[in,out] space tablespace to flush */ |
4773 | void |
4774 | fil_flush(fil_space_t* space) |
4775 | { |
4776 | ut_ad(space->pending_io()); |
4777 | ut_ad(space->purpose == FIL_TYPE_TABLESPACE |
4778 | || space->purpose == FIL_TYPE_IMPORT); |
4779 | |
4780 | if (!space->is_stopping()) { |
4781 | mutex_enter(&fil_system.mutex); |
4782 | if (!space->is_stopping()) { |
4783 | fil_flush_low(space); |
4784 | } |
4785 | mutex_exit(&fil_system.mutex); |
4786 | } |
4787 | } |
4788 | |
4789 | /** Flush to disk the writes in file spaces of the given type |
4790 | possibly cached by the OS. |
4791 | @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */ |
4792 | void |
4793 | fil_flush_file_spaces( |
4794 | fil_type_t purpose) |
4795 | { |
4796 | fil_space_t* space; |
4797 | ulint* space_ids; |
4798 | ulint n_space_ids; |
4799 | |
4800 | ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG); |
4801 | |
4802 | mutex_enter(&fil_system.mutex); |
4803 | |
4804 | n_space_ids = UT_LIST_GET_LEN(fil_system.unflushed_spaces); |
4805 | if (n_space_ids == 0) { |
4806 | |
4807 | mutex_exit(&fil_system.mutex); |
4808 | return; |
4809 | } |
4810 | |
4811 | /* Assemble a list of space ids to flush. Previously, we |
4812 | traversed fil_system.unflushed_spaces and called UT_LIST_GET_NEXT() |
4813 | on a space that was just removed from the list by fil_flush(). |
4814 | Thus, the space could be dropped and the memory overwritten. */ |
4815 | space_ids = static_cast<ulint*>( |
4816 | ut_malloc_nokey(n_space_ids * sizeof(*space_ids))); |
4817 | |
4818 | n_space_ids = 0; |
4819 | |
4820 | for (space = UT_LIST_GET_FIRST(fil_system.unflushed_spaces); |
4821 | space; |
4822 | space = UT_LIST_GET_NEXT(unflushed_spaces, space)) { |
4823 | |
4824 | if (space->purpose == purpose |
4825 | && !space->is_stopping()) { |
4826 | |
4827 | space_ids[n_space_ids++] = space->id; |
4828 | } |
4829 | } |
4830 | |
4831 | mutex_exit(&fil_system.mutex); |
4832 | |
4833 | /* Flush the spaces. It will not hurt to call fil_flush() on |
4834 | a non-existing space id. */ |
4835 | for (ulint i = 0; i < n_space_ids; i++) { |
4836 | |
4837 | fil_flush(space_ids[i]); |
4838 | } |
4839 | |
4840 | ut_free(space_ids); |
4841 | } |
4842 | |
4843 | /** Functor to validate the file node list of a tablespace. */ |
4844 | struct Check { |
4845 | /** Total size of file nodes visited so far */ |
4846 | ulint size; |
4847 | /** Total number of open files visited so far */ |
4848 | ulint n_open; |
4849 | |
4850 | /** Constructor */ |
4851 | Check() : size(0), n_open(0) {} |
4852 | |
4853 | /** Visit a file node |
4854 | @param[in] elem file node to visit */ |
4855 | void operator()(const fil_node_t* elem) |
4856 | { |
4857 | ut_a(elem->is_open() || !elem->n_pending); |
4858 | n_open += elem->is_open(); |
4859 | size += elem->size; |
4860 | } |
4861 | |
4862 | /** Validate a tablespace. |
4863 | @param[in] space tablespace to validate |
4864 | @return number of open file nodes */ |
4865 | static ulint validate(const fil_space_t* space) |
4866 | { |
4867 | ut_ad(mutex_own(&fil_system.mutex)); |
4868 | Check check; |
4869 | ut_list_validate(space->chain, check); |
4870 | ut_a(space->size == check.size); |
4871 | ut_ad(space->id != TRX_SYS_SPACE |
4872 | || space == fil_system.sys_space); |
4873 | ut_ad(space->id != SRV_TMP_SPACE_ID |
4874 | || space == fil_system.temp_space); |
4875 | return(check.n_open); |
4876 | } |
4877 | }; |
4878 | |
4879 | /******************************************************************//** |
4880 | Checks the consistency of the tablespace cache. |
4881 | @return true if ok */ |
4882 | bool |
4883 | fil_validate(void) |
4884 | /*==============*/ |
4885 | { |
4886 | fil_space_t* space; |
4887 | fil_node_t* fil_node; |
4888 | ulint n_open = 0; |
4889 | |
4890 | mutex_enter(&fil_system.mutex); |
4891 | |
4892 | /* Look for spaces in the hash table */ |
4893 | |
4894 | for (ulint i = 0; i < hash_get_n_cells(fil_system.spaces); i++) { |
4895 | |
4896 | for (space = static_cast<fil_space_t*>( |
4897 | HASH_GET_FIRST(fil_system.spaces, i)); |
4898 | space != 0; |
4899 | space = static_cast<fil_space_t*>( |
4900 | HASH_GET_NEXT(hash, space))) { |
4901 | |
4902 | n_open += Check::validate(space); |
4903 | } |
4904 | } |
4905 | |
4906 | ut_a(fil_system.n_open == n_open); |
4907 | |
4908 | UT_LIST_CHECK(fil_system.LRU); |
4909 | |
4910 | for (fil_node = UT_LIST_GET_FIRST(fil_system.LRU); |
4911 | fil_node != 0; |
4912 | fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) { |
4913 | |
4914 | ut_a(fil_node->n_pending == 0); |
4915 | ut_a(!fil_node->being_extended); |
4916 | ut_a(fil_node->is_open()); |
4917 | ut_a(fil_space_belongs_in_lru(fil_node->space)); |
4918 | } |
4919 | |
4920 | mutex_exit(&fil_system.mutex); |
4921 | |
4922 | return(true); |
4923 | } |
4924 | |
4925 | /********************************************************************//** |
4926 | Returns true if file address is undefined. |
4927 | @return true if undefined */ |
4928 | bool |
4929 | fil_addr_is_null( |
4930 | /*=============*/ |
4931 | fil_addr_t addr) /*!< in: address */ |
4932 | { |
4933 | return(addr.page == FIL_NULL); |
4934 | } |
4935 | |
4936 | /********************************************************************//** |
4937 | Get the predecessor of a file page. |
4938 | @return FIL_PAGE_PREV */ |
4939 | ulint |
4940 | fil_page_get_prev( |
4941 | /*==============*/ |
4942 | const byte* page) /*!< in: file page */ |
4943 | { |
4944 | return(mach_read_from_4(page + FIL_PAGE_PREV)); |
4945 | } |
4946 | |
4947 | /********************************************************************//** |
4948 | Get the successor of a file page. |
4949 | @return FIL_PAGE_NEXT */ |
4950 | ulint |
4951 | fil_page_get_next( |
4952 | /*==============*/ |
4953 | const byte* page) /*!< in: file page */ |
4954 | { |
4955 | return(mach_read_from_4(page + FIL_PAGE_NEXT)); |
4956 | } |
4957 | |
4958 | /*********************************************************************//** |
4959 | Sets the file page type. */ |
4960 | void |
4961 | fil_page_set_type( |
4962 | /*==============*/ |
4963 | byte* page, /*!< in/out: file page */ |
4964 | ulint type) /*!< in: type */ |
4965 | { |
4966 | ut_ad(page); |
4967 | |
4968 | mach_write_to_2(page + FIL_PAGE_TYPE, type); |
4969 | } |
4970 | |
4971 | /** Reset the page type. |
4972 | Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE. |
4973 | In MySQL 3.23.53, only undo log pages and index pages were tagged. |
4974 | Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE. |
4975 | @param[in] page_id page number |
4976 | @param[in,out] page page with invalid FIL_PAGE_TYPE |
4977 | @param[in] type expected page type |
4978 | @param[in,out] mtr mini-transaction */ |
4979 | void |
4980 | fil_page_reset_type( |
4981 | const page_id_t& page_id, |
4982 | byte* page, |
4983 | ulint type, |
4984 | mtr_t* mtr) |
4985 | { |
4986 | ib::info() |
4987 | << "Resetting invalid page " << page_id << " type " |
4988 | << fil_page_get_type(page) << " to " << type << "." ; |
4989 | mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr); |
4990 | } |
4991 | |
4992 | /********************************************************************//** |
4993 | Delete the tablespace file and any related files like .cfg. |
4994 | This should not be called for temporary tables. |
4995 | @param[in] ibd_filepath File path of the IBD tablespace */ |
4996 | void |
4997 | fil_delete_file( |
4998 | /*============*/ |
4999 | const char* ibd_filepath) |
5000 | { |
5001 | /* Force a delete of any stale .ibd files that are lying around. */ |
5002 | |
5003 | ib::info() << "Deleting " << ibd_filepath; |
5004 | os_file_delete_if_exists(innodb_data_file_key, ibd_filepath, NULL); |
5005 | |
5006 | char* cfg_filepath = fil_make_filepath( |
5007 | ibd_filepath, NULL, CFG, false); |
5008 | if (cfg_filepath != NULL) { |
5009 | os_file_delete_if_exists( |
5010 | innodb_data_file_key, cfg_filepath, NULL); |
5011 | ut_free(cfg_filepath); |
5012 | } |
5013 | } |
5014 | |
5015 | /** Generate redo log for swapping two .ibd files |
5016 | @param[in] old_table old table |
5017 | @param[in] new_table new table |
5018 | @param[in] tmp_name temporary table name |
5019 | @param[in,out] mtr mini-transaction |
5020 | @return innodb error code */ |
5021 | dberr_t |
5022 | fil_mtr_rename_log( |
5023 | const dict_table_t* old_table, |
5024 | const dict_table_t* new_table, |
5025 | const char* tmp_name, |
5026 | mtr_t* mtr) |
5027 | { |
5028 | ut_ad(old_table->space != fil_system.temp_space); |
5029 | ut_ad(new_table->space != fil_system.temp_space); |
5030 | ut_ad(old_table->space_id == old_table->space->id); |
5031 | ut_ad(new_table->space_id == new_table->space->id); |
5032 | |
5033 | /* If neither table is file-per-table, |
5034 | there will be no renaming of files. */ |
5035 | if (!old_table->space_id && !new_table->space_id) { |
5036 | return(DB_SUCCESS); |
5037 | } |
5038 | |
5039 | const bool has_data_dir = DICT_TF_HAS_DATA_DIR(old_table->flags); |
5040 | |
5041 | if (old_table->space_id) { |
5042 | char* tmp_path = fil_make_filepath( |
5043 | has_data_dir ? old_table->data_dir_path : NULL, |
5044 | tmp_name, IBD, has_data_dir); |
5045 | if (tmp_path == NULL) { |
5046 | return(DB_OUT_OF_MEMORY); |
5047 | } |
5048 | |
5049 | const char* old_path = old_table->space->chain.start->name; |
5050 | /* Temp filepath must not exist. */ |
5051 | dberr_t err = fil_rename_tablespace_check( |
5052 | old_path, tmp_path, !old_table->space); |
5053 | if (err != DB_SUCCESS) { |
5054 | ut_free(tmp_path); |
5055 | return(err); |
5056 | } |
5057 | |
5058 | fil_name_write_rename_low( |
5059 | old_table->space_id, 0, old_path, tmp_path, mtr); |
5060 | |
5061 | ut_free(tmp_path); |
5062 | } |
5063 | |
5064 | if (new_table->space_id) { |
5065 | const char* new_path = new_table->space->chain.start->name; |
5066 | char* old_path = fil_make_filepath( |
5067 | has_data_dir ? old_table->data_dir_path : NULL, |
5068 | old_table->name.m_name, IBD, has_data_dir); |
5069 | |
5070 | /* Destination filepath must not exist unless this ALTER |
5071 | TABLE starts and ends with a file_per-table tablespace. */ |
5072 | if (!old_table->space_id) { |
5073 | dberr_t err = fil_rename_tablespace_check( |
5074 | new_path, old_path, !new_table->space); |
5075 | if (err != DB_SUCCESS) { |
5076 | ut_free(old_path); |
5077 | return(err); |
5078 | } |
5079 | } |
5080 | |
5081 | fil_name_write_rename_low( |
5082 | new_table->space_id, 0, new_path, old_path, mtr); |
5083 | ut_free(old_path); |
5084 | } |
5085 | |
5086 | return DB_SUCCESS; |
5087 | } |
5088 | |
5089 | #ifdef UNIV_DEBUG |
5090 | /** Check that a tablespace is valid for mtr_commit(). |
5091 | @param[in] space persistent tablespace that has been changed */ |
5092 | static |
5093 | void |
5094 | fil_space_validate_for_mtr_commit( |
5095 | const fil_space_t* space) |
5096 | { |
5097 | ut_ad(!mutex_own(&fil_system.mutex)); |
5098 | ut_ad(space != NULL); |
5099 | ut_ad(space->purpose == FIL_TYPE_TABLESPACE); |
5100 | ut_ad(!is_predefined_tablespace(space->id)); |
5101 | |
5102 | /* We are serving mtr_commit(). While there is an active |
5103 | mini-transaction, we should have !space->stop_new_ops. This is |
5104 | guaranteed by meta-data locks or transactional locks, or |
5105 | dict_operation_lock (X-lock in DROP, S-lock in purge). |
5106 | |
5107 | However, a file I/O thread can invoke change buffer merge |
5108 | while fil_check_pending_operations() is waiting for operations |
5109 | to quiesce. This is not a problem, because |
5110 | ibuf_merge_or_delete_for_page() would call |
5111 | fil_space_acquire() before mtr_start() and |
5112 | fil_space_t::release() after mtr_commit(). This is why |
5113 | n_pending_ops should not be zero if stop_new_ops is set. */ |
5114 | ut_ad(!space->stop_new_ops |
5115 | || space->is_being_truncated /* TRUNCATE sets stop_new_ops */ |
5116 | || space->referenced()); |
5117 | } |
5118 | #endif /* UNIV_DEBUG */ |
5119 | |
5120 | /** Write a MLOG_FILE_NAME record for a persistent tablespace. |
5121 | @param[in] space tablespace |
5122 | @param[in,out] mtr mini-transaction */ |
5123 | static |
5124 | void |
5125 | fil_names_write( |
5126 | const fil_space_t* space, |
5127 | mtr_t* mtr) |
5128 | { |
5129 | ut_ad(UT_LIST_GET_LEN(space->chain) == 1); |
5130 | fil_name_write(space, 0, UT_LIST_GET_FIRST(space->chain), mtr); |
5131 | } |
5132 | |
5133 | /** Note that a non-predefined persistent tablespace has been modified |
5134 | by redo log. |
5135 | @param[in,out] space tablespace */ |
5136 | void |
5137 | fil_names_dirty( |
5138 | fil_space_t* space) |
5139 | { |
5140 | ut_ad(log_mutex_own()); |
5141 | ut_ad(recv_recovery_is_on()); |
5142 | ut_ad(log_sys.lsn != 0); |
5143 | ut_ad(space->max_lsn == 0); |
5144 | ut_d(fil_space_validate_for_mtr_commit(space)); |
5145 | |
5146 | UT_LIST_ADD_LAST(fil_system.named_spaces, space); |
5147 | space->max_lsn = log_sys.lsn; |
5148 | } |
5149 | |
5150 | /** Write MLOG_FILE_NAME records when a non-predefined persistent |
5151 | tablespace was modified for the first time since the latest |
5152 | fil_names_clear(). |
5153 | @param[in,out] space tablespace |
5154 | @param[in,out] mtr mini-transaction */ |
5155 | void |
5156 | fil_names_dirty_and_write( |
5157 | fil_space_t* space, |
5158 | mtr_t* mtr) |
5159 | { |
5160 | ut_ad(log_mutex_own()); |
5161 | ut_d(fil_space_validate_for_mtr_commit(space)); |
5162 | ut_ad(space->max_lsn == log_sys.lsn); |
5163 | |
5164 | UT_LIST_ADD_LAST(fil_system.named_spaces, space); |
5165 | fil_names_write(space, mtr); |
5166 | |
5167 | DBUG_EXECUTE_IF("fil_names_write_bogus" , |
5168 | { |
5169 | char bogus_name[] = "./test/bogus file.ibd" ; |
5170 | os_normalize_path(bogus_name); |
5171 | fil_name_write( |
5172 | SRV_LOG_SPACE_FIRST_ID, 0, |
5173 | bogus_name, mtr); |
5174 | }); |
5175 | } |
5176 | |
5177 | /** On a log checkpoint, reset fil_names_dirty_and_write() flags |
5178 | and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed. |
5179 | @param[in] lsn checkpoint LSN |
5180 | @param[in] do_write whether to always write MLOG_CHECKPOINT |
5181 | @return whether anything was written to the redo log |
5182 | @retval false if no flags were set and nothing written |
5183 | @retval true if anything was written to the redo log */ |
5184 | bool |
5185 | fil_names_clear( |
5186 | lsn_t lsn, |
5187 | bool do_write) |
5188 | { |
5189 | mtr_t mtr; |
5190 | ulint mtr_checkpoint_size = LOG_CHECKPOINT_FREE_PER_THREAD; |
5191 | |
5192 | DBUG_EXECUTE_IF( |
5193 | "increase_mtr_checkpoint_size" , |
5194 | mtr_checkpoint_size = 75 * 1024; |
5195 | ); |
5196 | |
5197 | ut_ad(log_mutex_own()); |
5198 | |
5199 | if (log_sys.append_on_checkpoint) { |
5200 | mtr_write_log(log_sys.append_on_checkpoint); |
5201 | do_write = true; |
5202 | } |
5203 | |
5204 | mtr.start(); |
5205 | |
5206 | for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.named_spaces); |
5207 | space != NULL; ) { |
5208 | fil_space_t* next = UT_LIST_GET_NEXT(named_spaces, space); |
5209 | |
5210 | ut_ad(space->max_lsn > 0); |
5211 | if (space->max_lsn < lsn) { |
5212 | /* The tablespace was last dirtied before the |
5213 | checkpoint LSN. Remove it from the list, so |
5214 | that if the tablespace is not going to be |
5215 | modified any more, subsequent checkpoints will |
5216 | avoid calling fil_names_write() on it. */ |
5217 | space->max_lsn = 0; |
5218 | UT_LIST_REMOVE(fil_system.named_spaces, space); |
5219 | } |
5220 | |
5221 | /* max_lsn is the last LSN where fil_names_dirty_and_write() |
5222 | was called. If we kept track of "min_lsn" (the first LSN |
5223 | where max_lsn turned nonzero), we could avoid the |
5224 | fil_names_write() call if min_lsn > lsn. */ |
5225 | |
5226 | fil_names_write(space, &mtr); |
5227 | do_write = true; |
5228 | |
5229 | const mtr_buf_t* mtr_log = mtr_get_log(&mtr); |
5230 | |
5231 | /** If the mtr buffer size exceeds the size of |
5232 | LOG_CHECKPOINT_FREE_PER_THREAD then commit the multi record |
5233 | mini-transaction, start the new mini-transaction to |
5234 | avoid the parsing buffer overflow error during recovery. */ |
5235 | |
5236 | if (mtr_log->size() > mtr_checkpoint_size) { |
5237 | ut_ad(mtr_log->size() < (RECV_PARSING_BUF_SIZE / 2)); |
5238 | mtr.commit_checkpoint(lsn, false); |
5239 | mtr.start(); |
5240 | } |
5241 | |
5242 | space = next; |
5243 | } |
5244 | |
5245 | if (do_write) { |
5246 | mtr.commit_checkpoint(lsn, true); |
5247 | } else { |
5248 | ut_ad(!mtr.has_modifications()); |
5249 | } |
5250 | |
5251 | return(do_write); |
5252 | } |
5253 | |
5254 | /** Truncate a single-table tablespace. The tablespace must be cached |
5255 | in the memory cache. |
5256 | @param space_id space id |
5257 | @param dir_path directory path |
5258 | @param tablename the table name in the usual |
5259 | databasename/tablename format of InnoDB |
5260 | @param flags tablespace flags |
5261 | @param trunc_to_default truncate to default size if tablespace |
5262 | is being newly re-initialized. |
5263 | @return DB_SUCCESS or error */ |
5264 | dberr_t |
5265 | truncate_t::truncate( |
5266 | /*=================*/ |
5267 | ulint space_id, |
5268 | const char* dir_path, |
5269 | const char* tablename, |
5270 | ulint flags, |
5271 | bool trunc_to_default) |
5272 | { |
5273 | dberr_t err = DB_SUCCESS; |
5274 | char* path; |
5275 | |
5276 | ut_a(!is_system_tablespace(space_id)); |
5277 | |
5278 | if (FSP_FLAGS_HAS_DATA_DIR(flags)) { |
5279 | ut_ad(dir_path != NULL); |
5280 | path = fil_make_filepath(dir_path, tablename, IBD, true); |
5281 | } else { |
5282 | path = fil_make_filepath(NULL, tablename, IBD, false); |
5283 | } |
5284 | |
5285 | if (path == NULL) { |
5286 | return(DB_OUT_OF_MEMORY); |
5287 | } |
5288 | |
5289 | mutex_enter(&fil_system.mutex); |
5290 | |
5291 | fil_space_t* space = fil_space_get_by_id(space_id); |
5292 | |
5293 | /* The following code must change when InnoDB supports |
5294 | multiple datafiles per tablespace. */ |
5295 | ut_a(UT_LIST_GET_LEN(space->chain) == 1); |
5296 | |
5297 | fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
5298 | |
5299 | if (trunc_to_default) { |
5300 | space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE; |
5301 | } |
5302 | |
5303 | const bool already_open = node->is_open(); |
5304 | |
5305 | if (!already_open) { |
5306 | |
5307 | bool ret; |
5308 | |
5309 | node->handle = os_file_create_simple_no_error_handling( |
5310 | innodb_data_file_key, path, OS_FILE_OPEN, |
5311 | OS_FILE_READ_WRITE, |
5312 | space->purpose != FIL_TYPE_TEMPORARY |
5313 | && srv_read_only_mode, &ret); |
5314 | |
5315 | if (!ret) { |
5316 | ib::error() << "Failed to open tablespace file " |
5317 | << path << "." ; |
5318 | |
5319 | ut_free(path); |
5320 | |
5321 | return(DB_ERROR); |
5322 | } |
5323 | |
5324 | ut_a(node->is_open()); |
5325 | } |
5326 | |
5327 | os_offset_t trunc_size = trunc_to_default |
5328 | ? FIL_IBD_FILE_INITIAL_SIZE |
5329 | : space->size; |
5330 | |
5331 | const bool success = os_file_truncate( |
5332 | path, node->handle, trunc_size << srv_page_size_shift); |
5333 | |
5334 | if (!success) { |
5335 | ib::error() << "Cannot truncate file " << path |
5336 | << " in TRUNCATE TABLESPACE." ; |
5337 | err = DB_ERROR; |
5338 | } |
5339 | |
5340 | space->stop_new_ops = false; |
5341 | space->is_being_truncated = false; |
5342 | |
5343 | /* If we opened the file in this function, close it. */ |
5344 | if (!already_open) { |
5345 | bool closed = os_file_close(node->handle); |
5346 | |
5347 | if (!closed) { |
5348 | |
5349 | ib::error() << "Failed to close tablespace file " |
5350 | << path << "." ; |
5351 | |
5352 | err = DB_ERROR; |
5353 | } else { |
5354 | node->handle = OS_FILE_CLOSED; |
5355 | } |
5356 | } |
5357 | |
5358 | mutex_exit(&fil_system.mutex); |
5359 | |
5360 | ut_free(path); |
5361 | |
5362 | return(err); |
5363 | } |
5364 | |
5365 | /* Unit Tests */ |
5366 | #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH |
5367 | #define MF fil_make_filepath |
5368 | #define DISPLAY ib::info() << path |
5369 | void |
5370 | test_make_filepath() |
5371 | { |
5372 | char* path; |
5373 | const char* long_path = |
5374 | "this/is/a/very/long/path/including/a/very/" |
5375 | "looooooooooooooooooooooooooooooooooooooooooooooooo" |
5376 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5377 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5378 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5379 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5380 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5381 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5382 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5383 | "oooooooooooooooooooooooooooooooooooooooooooooooooo" |
5384 | "oooooooooooooooooooooooooooooooooooooooooooooooong" |
5385 | "/folder/name" ; |
5386 | path = MF("/this/is/a/path/with/a/filename" , NULL, IBD, false); DISPLAY; |
5387 | path = MF("/this/is/a/path/with/a/filename" , NULL, ISL, false); DISPLAY; |
5388 | path = MF("/this/is/a/path/with/a/filename" , NULL, CFG, false); DISPLAY; |
5389 | path = MF("/this/is/a/path/with/a/filename.ibd" , NULL, IBD, false); DISPLAY; |
5390 | path = MF("/this/is/a/path/with/a/filename.ibd" , NULL, IBD, false); DISPLAY; |
5391 | path = MF("/this/is/a/path/with/a/filename.dat" , NULL, IBD, false); DISPLAY; |
5392 | path = MF(NULL, "tablespacename" , NO_EXT, false); DISPLAY; |
5393 | path = MF(NULL, "tablespacename" , IBD, false); DISPLAY; |
5394 | path = MF(NULL, "dbname/tablespacename" , NO_EXT, false); DISPLAY; |
5395 | path = MF(NULL, "dbname/tablespacename" , IBD, false); DISPLAY; |
5396 | path = MF(NULL, "dbname/tablespacename" , ISL, false); DISPLAY; |
5397 | path = MF(NULL, "dbname/tablespacename" , CFG, false); DISPLAY; |
5398 | path = MF(NULL, "dbname\\tablespacename" , NO_EXT, false); DISPLAY; |
5399 | path = MF(NULL, "dbname\\tablespacename" , IBD, false); DISPLAY; |
5400 | path = MF("/this/is/a/path" , "dbname/tablespacename" , IBD, false); DISPLAY; |
5401 | path = MF("/this/is/a/path" , "dbname/tablespacename" , IBD, true); DISPLAY; |
5402 | path = MF("./this/is/a/path" , "dbname/tablespacename.ibd" , IBD, true); DISPLAY; |
5403 | path = MF("this\\is\\a\\path" , "dbname/tablespacename" , IBD, true); DISPLAY; |
5404 | path = MF("/this/is/a/path" , "dbname\\tablespacename" , IBD, true); DISPLAY; |
5405 | path = MF(long_path, NULL, IBD, false); DISPLAY; |
5406 | path = MF(long_path, "tablespacename" , IBD, false); DISPLAY; |
5407 | path = MF(long_path, "tablespacename" , IBD, true); DISPLAY; |
5408 | } |
5409 | #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */ |
5410 | /* @} */ |
5411 | |
5412 | /** Return the next fil_space_t. |
5413 | Once started, the caller must keep calling this until it returns NULL. |
5414 | fil_space_t::acquire() and fil_space_t::release() are invoked here which |
5415 | blocks a concurrent operation from dropping the tablespace. |
5416 | @param[in] prev_space Pointer to the previous fil_space_t. |
5417 | If NULL, use the first fil_space_t on fil_system.space_list. |
5418 | @return pointer to the next fil_space_t. |
5419 | @retval NULL if this was the last*/ |
5420 | fil_space_t* |
5421 | fil_space_next(fil_space_t* prev_space) |
5422 | { |
5423 | fil_space_t* space=prev_space; |
5424 | |
5425 | mutex_enter(&fil_system.mutex); |
5426 | |
5427 | if (!space) { |
5428 | space = UT_LIST_GET_FIRST(fil_system.space_list); |
5429 | } else { |
5430 | ut_a(space->referenced()); |
5431 | |
5432 | /* Move on to the next fil_space_t */ |
5433 | space->release(); |
5434 | space = UT_LIST_GET_NEXT(space_list, space); |
5435 | } |
5436 | |
5437 | /* Skip spaces that are being created by |
5438 | fil_ibd_create(), or dropped, or !tablespace. */ |
5439 | while (space != NULL |
5440 | && (UT_LIST_GET_LEN(space->chain) == 0 |
5441 | || space->is_stopping() |
5442 | || space->purpose != FIL_TYPE_TABLESPACE)) { |
5443 | space = UT_LIST_GET_NEXT(space_list, space); |
5444 | } |
5445 | |
5446 | if (space != NULL) { |
5447 | space->acquire(); |
5448 | } |
5449 | |
5450 | mutex_exit(&fil_system.mutex); |
5451 | |
5452 | return(space); |
5453 | } |
5454 | |
5455 | /** |
5456 | Remove space from key rotation list if there are no more |
5457 | pending operations. |
5458 | @param[in,out] space Tablespace */ |
5459 | static |
5460 | void |
5461 | fil_space_remove_from_keyrotation(fil_space_t* space) |
5462 | { |
5463 | ut_ad(mutex_own(&fil_system.mutex)); |
5464 | ut_ad(space); |
5465 | |
5466 | if (space->is_in_rotation_list && !space->referenced()) { |
5467 | space->is_in_rotation_list = false; |
5468 | ut_a(UT_LIST_GET_LEN(fil_system.rotation_list) > 0); |
5469 | UT_LIST_REMOVE(fil_system.rotation_list, space); |
5470 | } |
5471 | } |
5472 | |
5473 | |
5474 | /** Return the next fil_space_t from key rotation list. |
5475 | Once started, the caller must keep calling this until it returns NULL. |
5476 | fil_space_t::acquire() and fil_space_t::release() are invoked here which |
5477 | blocks a concurrent operation from dropping the tablespace. |
5478 | @param[in] prev_space Pointer to the previous fil_space_t. |
5479 | If NULL, use the first fil_space_t on fil_system.space_list. |
5480 | @return pointer to the next fil_space_t. |
5481 | @retval NULL if this was the last*/ |
5482 | fil_space_t* |
5483 | fil_space_keyrotate_next( |
5484 | fil_space_t* prev_space) |
5485 | { |
5486 | fil_space_t* space = prev_space; |
5487 | fil_space_t* old = NULL; |
5488 | |
5489 | mutex_enter(&fil_system.mutex); |
5490 | |
5491 | if (UT_LIST_GET_LEN(fil_system.rotation_list) == 0) { |
5492 | if (space) { |
5493 | space->release(); |
5494 | fil_space_remove_from_keyrotation(space); |
5495 | } |
5496 | mutex_exit(&fil_system.mutex); |
5497 | return(NULL); |
5498 | } |
5499 | |
5500 | if (prev_space == NULL) { |
5501 | space = UT_LIST_GET_FIRST(fil_system.rotation_list); |
5502 | |
5503 | /* We can trust that space is not NULL because we |
5504 | checked list length above */ |
5505 | } else { |
5506 | /* Move on to the next fil_space_t */ |
5507 | space->release(); |
5508 | |
5509 | old = space; |
5510 | space = UT_LIST_GET_NEXT(rotation_list, space); |
5511 | |
5512 | fil_space_remove_from_keyrotation(old); |
5513 | } |
5514 | |
5515 | /* Skip spaces that are being created by fil_ibd_create(), |
5516 | or dropped or truncated. Note that rotation_list contains only |
5517 | space->purpose == FIL_TYPE_TABLESPACE. */ |
5518 | while (space != NULL |
5519 | && (UT_LIST_GET_LEN(space->chain) == 0 |
5520 | || space->is_stopping())) { |
5521 | |
5522 | old = space; |
5523 | space = UT_LIST_GET_NEXT(rotation_list, space); |
5524 | fil_space_remove_from_keyrotation(old); |
5525 | } |
5526 | |
5527 | if (space != NULL) { |
5528 | space->acquire(); |
5529 | } |
5530 | |
5531 | mutex_exit(&fil_system.mutex); |
5532 | |
5533 | return(space); |
5534 | } |
5535 | |
5536 | /** Determine the block size of the data file. |
5537 | @param[in] space tablespace |
5538 | @param[in] offset page number |
5539 | @return block size */ |
5540 | UNIV_INTERN |
5541 | ulint |
5542 | fil_space_get_block_size(const fil_space_t* space, unsigned offset) |
5543 | { |
5544 | ulint block_size = 512; |
5545 | |
5546 | for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); |
5547 | node != NULL; |
5548 | node = UT_LIST_GET_NEXT(chain, node)) { |
5549 | block_size = node->block_size; |
5550 | if (node->size > offset) { |
5551 | ut_ad(node->size <= 0xFFFFFFFFU); |
5552 | break; |
5553 | } |
5554 | offset -= static_cast<unsigned>(node->size); |
5555 | } |
5556 | |
5557 | /* Currently supporting block size up to 4K, |
5558 | fall back to default if bigger requested. */ |
5559 | if (block_size > 4096) { |
5560 | block_size = 512; |
5561 | } |
5562 | |
5563 | return block_size; |
5564 | } |
5565 | |
5566 | /*******************************************************************//** |
5567 | Returns the table space by a given id, NULL if not found. */ |
5568 | fil_space_t* |
5569 | fil_space_found_by_id( |
5570 | /*==================*/ |
5571 | ulint id) /*!< in: space id */ |
5572 | { |
5573 | fil_space_t* space = NULL; |
5574 | mutex_enter(&fil_system.mutex); |
5575 | space = fil_space_get_by_id(id); |
5576 | |
5577 | /* Not found if space is being deleted */ |
5578 | if (space && space->stop_new_ops) { |
5579 | space = NULL; |
5580 | } |
5581 | |
5582 | mutex_exit(&fil_system.mutex); |
5583 | return space; |
5584 | } |
5585 | |
5586 | /** |
5587 | Get should we punch hole to tablespace. |
5588 | @param[in] node File node |
5589 | @return true, if punch hole should be tried, false if not. */ |
5590 | bool |
5591 | fil_node_should_punch_hole( |
5592 | const fil_node_t* node) |
5593 | { |
5594 | return (node->space->punch_hole); |
5595 | } |
5596 | |
5597 | /** |
5598 | Set punch hole to tablespace to given value. |
5599 | @param[in] node File node |
5600 | @param[in] val value to be set. */ |
5601 | void |
5602 | fil_space_set_punch_hole( |
5603 | fil_node_t* node, |
5604 | bool val) |
5605 | { |
5606 | node->space->punch_hole = val; |
5607 | } |
5608 | |