1/*****************************************************************************
2
3Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2014, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file fil/fil0fil.cc
22The tablespace memory cache
23
24Created 10/25/1995 Heikki Tuuri
25*******************************************************/
26
27#include "fil0fil.h"
28#include "fil0crypt.h"
29
30#include "btr0btr.h"
31#include "buf0buf.h"
32#include "dict0boot.h"
33#include "dict0dict.h"
34#include "fsp0file.h"
35#include "fsp0file.h"
36#include "fsp0fsp.h"
37#include "fsp0space.h"
38#include "fsp0sysspace.h"
39#include "hash0hash.h"
40#include "log0log.h"
41#include "log0recv.h"
42#include "mach0data.h"
43#include "mem0mem.h"
44#include "mtr0log.h"
45#include "os0file.h"
46#include "page0zip.h"
47#include "row0mysql.h"
48#include "row0trunc.h"
49#include "srv0start.h"
50#include "trx0purge.h"
51#include "ut0new.h"
52#include "buf0lru.h"
53#include "ibuf0ibuf.h"
54#include "os0event.h"
55#include "sync0sync.h"
56#include "buf0flu.h"
57#include "os0api.h"
58
59/** Tries to close a file in the LRU list. The caller must hold the fil_sys
60mutex.
61@return true if success, false if should retry later; since i/o's
62generally complete in < 100 ms, and as InnoDB writes at most 128 pages
63from the buffer pool in a batch, and then immediately flushes the
64files, there is a good chance that the next time we find a suitable
65node from the LRU list.
66@param[in] print_info if true, prints information why it
67 cannot close a file */
68static
69bool
70fil_try_to_close_file_in_LRU(bool print_info);
71
72/** Test if a tablespace file can be renamed to a new filepath by checking
73if that the old filepath exists and the new filepath does not exist.
74@param[in] old_path old filepath
75@param[in] new_path new filepath
76@param[in] is_discarded whether the tablespace is discarded
77@return innodb error code */
78static dberr_t
79fil_rename_tablespace_check(
80 const char* old_path,
81 const char* new_path,
82 bool is_discarded);
83/** Rename a single-table tablespace.
84The tablespace must exist in the memory cache.
85@param[in] id tablespace identifier
86@param[in] old_path old file name
87@param[in] new_name new table name in the
88databasename/tablename format
89@param[in] new_path_in new file name,
90or NULL if it is located in the normal data directory
91@return true if success */
92static bool
93fil_rename_tablespace(
94 ulint id,
95 const char* old_path,
96 const char* new_name,
97 const char* new_path_in);
98
99/*
100 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
101 =============================================
102
103The tablespace cache is responsible for providing fast read/write access to
104tablespaces and logs of the database. File creation and deletion is done
105in other modules which know more of the logic of the operation, however.
106
107A tablespace consists of a chain of files. The size of the files does not
108have to be divisible by the database block size, because we may just leave
109the last incomplete block unused. When a new file is appended to the
110tablespace, the maximum size of the file is also specified. At the moment,
111we think that it is best to extend the file to its maximum size already at
112the creation of the file, because then we can avoid dynamically extending
113the file when more space is needed for the tablespace.
114
115A block's position in the tablespace is specified with a 32-bit unsigned
116integer. The files in the chain are thought to be catenated, and the block
117corresponding to an address n is the nth block in the catenated file (where
118the first block is named the 0th block, and the incomplete block fragments
119at the end of files are not taken into account). A tablespace can be extended
120by appending a new file at the end of the chain.
121
122Our tablespace concept is similar to the one of Oracle.
123
124To acquire more speed in disk transfers, a technique called disk striping is
125sometimes used. This means that logical block addresses are divided in a
126round-robin fashion across several disks. Windows NT supports disk striping,
127so there we do not need to support it in the database. Disk striping is
128implemented in hardware in RAID disks. We conclude that it is not necessary
129to implement it in the database. Oracle 7 does not support disk striping,
130either.
131
132Another trick used at some database sites is replacing tablespace files by
133raw disks, that is, the whole physical disk drive, or a partition of it, is
134opened as a single file, and it is accessed through byte offsets calculated
135from the start of the disk or the partition. This is recommended in some
136books on database tuning to achieve more speed in i/o. Using raw disk
137certainly prevents the OS from fragmenting disk space, but it is not clear
138if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
139system + EIDE Conner disk only a negligible difference in speed when reading
140from a file, versus reading from a raw disk.
141
142To have fast access to a tablespace or a log file, we put the data structures
143to a hash table. Each tablespace and log file is given an unique 32-bit
144identifier.
145
146Some operating systems do not support many open files at the same time,
147though NT seems to tolerate at least 900 open files. Therefore, we put the
148open files in an LRU-list. If we need to open another file, we may close the
149file at the end of the LRU-list. When an i/o-operation is pending on a file,
150the file cannot be closed. We take the file nodes with pending i/o-operations
151out of the LRU-list and keep a count of pending operations. When an operation
152completes, we decrement the count and return the file node to the LRU-list if
153the count drops to zero. */
154
155/** Reference to the server data directory. Usually it is the
156current working directory ".", but in the MySQL Embedded Server Library
157it is an absolute path. */
158const char* fil_path_to_mysql_datadir;
159
160/** Common InnoDB file extentions */
161const char* dot_ext[] = { "", ".ibd", ".isl", ".cfg" };
162
163/** The number of fsyncs done to the log */
164ulint fil_n_log_flushes = 0;
165
166/** Number of pending redo log flushes */
167ulint fil_n_pending_log_flushes = 0;
168/** Number of pending tablespace flushes */
169ulint fil_n_pending_tablespace_flushes = 0;
170
171/** Number of files currently open */
172ulint fil_n_file_opened = 0;
173
174/** The null file address */
175const fil_addr_t fil_addr_null = {FIL_NULL, 0};
176
177/** The tablespace memory cache. This variable is NULL before the module is
178initialized. */
179fil_system_t fil_system;
180
181/** At this age or older a space/page will be rotated */
182UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
183UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex;
184
185/** Determine if user has explicitly disabled fsync(). */
186# define fil_buffering_disabled(s) \
187 ((s)->purpose == FIL_TYPE_TABLESPACE \
188 && srv_file_flush_method \
189 == SRV_O_DIRECT_NO_FSYNC)
190
191/** Determine if the space id is a user tablespace id or not.
192@param[in] space_id Space ID to check
193@return true if it is a user tablespace ID */
194inline
195bool
196fil_is_user_tablespace_id(ulint space_id)
197{
198 return(space_id != TRX_SYS_SPACE
199 && space_id != SRV_TMP_SPACE_ID
200 && !srv_is_undo_tablespace(space_id));
201}
202
203#ifdef UNIV_DEBUG
204/** Try fil_validate() every this many times */
205# define FIL_VALIDATE_SKIP 17
206
207/******************************************************************//**
208Checks the consistency of the tablespace cache some of the time.
209@return true if ok or the check was skipped */
210static
211bool
212fil_validate_skip(void)
213/*===================*/
214{
215 /** The fil_validate() call skip counter. */
216 static int fil_validate_count = FIL_VALIDATE_SKIP;
217
218 /* We want to reduce the call frequency of the costly fil_validate()
219 check in debug builds. */
220 int count = my_atomic_add32_explicit(&fil_validate_count, -1,
221 MY_MEMORY_ORDER_RELAXED);
222 if (count > 0) {
223 return(true);
224 }
225
226 my_atomic_store32_explicit(&fil_validate_count, FIL_VALIDATE_SKIP,
227 MY_MEMORY_ORDER_RELAXED);
228 return(fil_validate());
229}
230#endif /* UNIV_DEBUG */
231
232/********************************************************************//**
233Determines if a file node belongs to the least-recently-used list.
234@return true if the file belongs to fil_system.LRU mutex. */
235UNIV_INLINE
236bool
237fil_space_belongs_in_lru(
238/*=====================*/
239 const fil_space_t* space) /*!< in: file space */
240{
241 switch (space->purpose) {
242 case FIL_TYPE_TEMPORARY:
243 case FIL_TYPE_LOG:
244 return(false);
245 case FIL_TYPE_TABLESPACE:
246 return(fil_is_user_tablespace_id(space->id));
247 case FIL_TYPE_IMPORT:
248 return(true);
249 }
250
251 ut_ad(0);
252 return(false);
253}
254
255/********************************************************************//**
256NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
257
258Prepares a file node for i/o. Opens the file if it is closed. Updates the
259pending i/o's field in the node and the system appropriately. Takes the node
260off the LRU list if it is in the LRU list. The caller must hold the fil_sys
261mutex.
262@return false if the file can't be opened, otherwise true */
263static
264bool
265fil_node_prepare_for_io(
266/*====================*/
267 fil_node_t* node, /*!< in: file node */
268 fil_space_t* space); /*!< in: space */
269
270/** Update the data structures when an i/o operation finishes.
271@param[in,out] node file node
272@param[in] type IO context */
273static
274void
275fil_node_complete_io(fil_node_t* node, const IORequest& type);
276
277/** Reads data from a space to a buffer. Remember that the possible incomplete
278blocks at the end of file are ignored: they are not taken into account when
279calculating the byte offset within a space.
280@param[in] page_id page id
281@param[in] page_size page size
282@param[in] byte_offset remainder of offset in bytes; in aio this
283must be divisible by the OS block size
284@param[in] len how many bytes to read; this must not cross a
285file boundary; in aio this must be a block size multiple
286@param[in,out] buf buffer where to store data read; in aio this
287must be appropriately aligned
288@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
289i/o on a tablespace which does not exist */
290UNIV_INLINE
291dberr_t
292fil_read(
293 const page_id_t& page_id,
294 const page_size_t& page_size,
295 ulint byte_offset,
296 ulint len,
297 void* buf)
298{
299 return(fil_io(IORequestRead, true, page_id, page_size,
300 byte_offset, len, buf, NULL));
301}
302
303/** Writes data to a space from a buffer. Remember that the possible incomplete
304blocks at the end of file are ignored: they are not taken into account when
305calculating the byte offset within a space.
306@param[in] page_id page id
307@param[in] page_size page size
308@param[in] byte_offset remainder of offset in bytes; in aio this
309must be divisible by the OS block size
310@param[in] len how many bytes to write; this must not cross
311a file boundary; in aio this must be a block size multiple
312@param[in] buf buffer from which to write; in aio this must
313be appropriately aligned
314@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
315i/o on a tablespace which does not exist */
316UNIV_INLINE
317dberr_t
318fil_write(
319 const page_id_t& page_id,
320 const page_size_t& page_size,
321 ulint byte_offset,
322 ulint len,
323 void* buf)
324{
325 ut_ad(!srv_read_only_mode);
326
327 return(fil_io(IORequestWrite, true, page_id, page_size,
328 byte_offset, len, buf, NULL));
329}
330
331/*******************************************************************//**
332Returns the table space by a given id, NULL if not found.
333It is unsafe to dereference the returned pointer. It is fine to check
334for NULL. */
335fil_space_t*
336fil_space_get_by_id(
337/*================*/
338 ulint id) /*!< in: space id */
339{
340 fil_space_t* space;
341
342 ut_ad(mutex_own(&fil_system.mutex));
343
344 HASH_SEARCH(hash, fil_system.spaces, id,
345 fil_space_t*, space,
346 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
347 space->id == id);
348
349 return(space);
350}
351
352/** Look up a tablespace.
353The caller should hold an InnoDB table lock or a MDL that prevents
354the tablespace from being dropped during the operation,
355or the caller should be in single-threaded crash recovery mode
356(no user connections that could drop tablespaces).
357If this is not the case, fil_space_acquire() and fil_space_t::release()
358should be used instead.
359@param[in] id tablespace ID
360@return tablespace, or NULL if not found */
361fil_space_t*
362fil_space_get(
363 ulint id)
364{
365 mutex_enter(&fil_system.mutex);
366 fil_space_t* space = fil_space_get_by_id(id);
367 mutex_exit(&fil_system.mutex);
368 ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG);
369 return(space);
370}
371
372/** Returns the latch of a file space.
373@param[in] id space id
374@param[out] flags tablespace flags
375@return latch protecting storage allocation */
376rw_lock_t*
377fil_space_get_latch(
378 ulint id,
379 ulint* flags)
380{
381 fil_space_t* space;
382
383 ut_ad(fil_system.is_initialised());
384
385 mutex_enter(&fil_system.mutex);
386
387 space = fil_space_get_by_id(id);
388
389 ut_a(space);
390
391 if (flags) {
392 *flags = space->flags;
393 }
394
395 mutex_exit(&fil_system.mutex);
396
397 return(&(space->latch));
398}
399
400/** Note that the tablespace has been imported.
401Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
402written while the space ID is being updated in each page. */
403void fil_space_t::set_imported()
404{
405 ut_ad(purpose == FIL_TYPE_IMPORT);
406 const fil_node_t* node = UT_LIST_GET_FIRST(chain);
407 atomic_write_supported = node->atomic_write
408 && srv_use_atomic_writes
409 && my_test_if_atomic_write(node->handle,
410 int(page_size_t(flags).physical()));
411 purpose = FIL_TYPE_TABLESPACE;
412}
413
414/**********************************************************************//**
415Checks if all the file nodes in a space are flushed.
416@return true if all are flushed */
417static
418bool
419fil_space_is_flushed(
420/*=================*/
421 fil_space_t* space) /*!< in: space */
422{
423 ut_ad(mutex_own(&fil_system.mutex));
424
425 for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
426 node != NULL;
427 node = UT_LIST_GET_NEXT(chain, node)) {
428
429 if (node->modification_counter > node->flush_counter) {
430
431 ut_ad(!fil_buffering_disabled(space));
432 return(false);
433 }
434 }
435
436 return(true);
437}
438
439
440/** Append a file to the chain of files of a space.
441@param[in] name file name of a file that is not open
442@param[in] size file size in entire database blocks
443@param[in,out] space tablespace from fil_space_create()
444@param[in] is_raw whether this is a raw device or partition
445@param[in] atomic_write true if the file could use atomic write
446@param[in] max_pages maximum number of pages in file,
447ULINT_MAX means the file size is unlimited.
448@return pointer to the file name
449@retval NULL if error */
450static
451fil_node_t*
452fil_node_create_low(
453 const char* name,
454 ulint size,
455 fil_space_t* space,
456 bool is_raw,
457 bool atomic_write,
458 ulint max_pages = ULINT_MAX)
459{
460 fil_node_t* node;
461
462 ut_ad(name != NULL);
463 ut_ad(fil_system.is_initialised());
464
465 if (space == NULL) {
466 return(NULL);
467 }
468
469 node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node)));
470
471 node->handle = OS_FILE_CLOSED;
472
473 node->name = mem_strdup(name);
474
475 ut_a(!is_raw || srv_start_raw_disk_in_use);
476
477 node->sync_event = os_event_create("fsync_event");
478
479 node->is_raw_disk = is_raw;
480
481 node->size = size;
482
483 node->magic_n = FIL_NODE_MAGIC_N;
484
485 node->init_size = size;
486 node->max_size = max_pages;
487
488 mutex_enter(&fil_system.mutex);
489
490 space->size += size;
491
492 node->space = space;
493
494 node->atomic_write = atomic_write;
495
496 UT_LIST_ADD_LAST(space->chain, node);
497 mutex_exit(&fil_system.mutex);
498
499 return(node);
500}
501
502/** Appends a new file to the chain of files of a space. File must be closed.
503@param[in] name file name (file must be closed)
504@param[in] size file size in database blocks, rounded downwards to
505 an integer
506@param[in,out] space space where to append
507@param[in] is_raw true if a raw device or a raw disk partition
508@param[in] atomic_write true if the file could use atomic write
509@param[in] max_pages maximum number of pages in file,
510ULINT_MAX means the file size is unlimited.
511@return pointer to the file name
512@retval NULL if error */
513char*
514fil_node_create(
515 const char* name,
516 ulint size,
517 fil_space_t* space,
518 bool is_raw,
519 bool atomic_write,
520 ulint max_pages)
521{
522 fil_node_t* node;
523
524 node = fil_node_create_low(
525 name, size, space, is_raw, atomic_write, max_pages);
526
527 return(node == NULL ? NULL : node->name);
528}
529
530/** Open a file node of a tablespace.
531@param[in,out] node File node
532@return false if the file can't be opened, otherwise true */
533static
534bool
535fil_node_open_file(
536 fil_node_t* node)
537{
538 bool success;
539 bool read_only_mode;
540 fil_space_t* space = node->space;
541
542 ut_ad(mutex_own(&fil_system.mutex));
543 ut_a(node->n_pending == 0);
544 ut_a(!node->is_open());
545
546 read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
547 && srv_read_only_mode;
548
549 const bool first_time_open = node->size == 0;
550
551 if (first_time_open
552 || (space->purpose == FIL_TYPE_TABLESPACE
553 && node == UT_LIST_GET_FIRST(space->chain)
554 && srv_startup_is_before_trx_rollback_phase
555 && !undo::Truncate::was_tablespace_truncated(space->id))) {
556 /* We do not know the size of the file yet. First we
557 open the file in the normal mode, no async I/O here,
558 for simplicity. Then do some checks, and close the
559 file again. NOTE that we could not use the simple
560 file read function os_file_read() in Windows to read
561 from a file opened for async I/O! */
562
563retry:
564 node->handle = os_file_create_simple_no_error_handling(
565 innodb_data_file_key, node->name, OS_FILE_OPEN,
566 OS_FILE_READ_ONLY, read_only_mode, &success);
567
568 if (!success) {
569 /* The following call prints an error message */
570 ulint err = os_file_get_last_error(true);
571 if (err == EMFILE + 100) {
572 if (fil_try_to_close_file_in_LRU(true))
573 goto retry;
574 }
575
576 ib::warn() << "Cannot open '" << node->name << "'."
577 " Have you deleted .ibd files under a"
578 " running mysqld server?";
579 return(false);
580 }
581
582 os_offset_t size_bytes = os_file_get_size(node->handle);
583 ut_a(size_bytes != (os_offset_t) -1);
584
585 ut_a(space->purpose != FIL_TYPE_LOG);
586 const page_size_t page_size(space->flags);
587 const ulint psize = page_size.physical();
588 const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE
589 * psize;
590
591 if (size_bytes < min_size) {
592 ib::error() << "The size of the file " << node->name
593 << " is only " << size_bytes
594 << " bytes, should be at least " << min_size;
595 os_file_close(node->handle);
596 node->handle = OS_FILE_CLOSED;
597 return(false);
598 }
599
600 /* Read the first page of the tablespace */
601
602 byte* buf2 = static_cast<byte*>(ut_malloc_nokey(2 * psize));
603
604 /* Align the memory for file i/o if we might have O_DIRECT
605 set */
606 byte* page = static_cast<byte*>(ut_align(buf2, psize));
607
608 IORequest request(IORequest::READ);
609
610 success = os_file_read(
611 request,
612 node->handle, page, 0, psize);
613 srv_stats.page0_read.add(1);
614
615 const ulint space_id
616 = fsp_header_get_space_id(page);
617 ulint flags = fsp_header_get_flags(page);
618 const ulint size = fsp_header_get_field(
619 page, FSP_SIZE);
620 const ulint free_limit = fsp_header_get_field(
621 page, FSP_FREE_LIMIT);
622 const ulint free_len = flst_get_len(
623 FSP_HEADER_OFFSET + FSP_FREE + page);
624
625 /* Try to read crypt_data from page 0 if it is not yet
626 read. */
627 if (!space->crypt_data) {
628 space->crypt_data = fil_space_read_crypt_data(
629 page_size_t(space->flags), page);
630 }
631
632 ut_free(buf2);
633 os_file_close(node->handle);
634 node->handle = OS_FILE_CLOSED;
635
636 if (!fsp_flags_is_valid(flags, space->id)) {
637 ulint cflags = fsp_flags_convert_from_101(flags);
638 if (cflags == ULINT_UNDEFINED
639 || (cflags ^ space->flags) & ~FSP_FLAGS_MEM_MASK) {
640 ib::error()
641 << "Expected tablespace flags "
642 << ib::hex(space->flags)
643 << " but found " << ib::hex(flags)
644 << " in the file " << node->name;
645 return(false);
646 }
647
648 flags = cflags;
649 }
650
651 if (UNIV_UNLIKELY(space_id != space->id)) {
652 ib::error()
653 << "Expected tablespace id " << space->id
654 << " but found " << space_id
655 << " in the file" << node->name;
656 return(false);
657 }
658
659 ut_ad(space->free_limit == 0
660 || space->free_limit == free_limit);
661 ut_ad(space->free_len == 0
662 || space->free_len == free_len);
663 space->size_in_header = size;
664 space->free_limit = free_limit;
665 space->free_len = free_len;
666
667 if (first_time_open) {
668 /* Truncate the size to a multiple of extent size. */
669 ulint mask = psize * FSP_EXTENT_SIZE - 1;
670
671 if (size_bytes <= mask) {
672 /* .ibd files start smaller than an
673 extent size. Do not truncate valid data. */
674 } else {
675 size_bytes &= ~os_offset_t(mask);
676 }
677
678 node->size = ulint(size_bytes / psize);
679 space->size += node->size;
680 }
681 }
682
683 /* printf("Opening file %s\n", node->name); */
684
685 /* Open the file for reading and writing, in Windows normally in the
686 unbuffered async I/O mode, though global variables may make
687 os_file_create() to fall back to the normal file I/O mode. */
688
689 if (space->purpose == FIL_TYPE_LOG) {
690 node->handle = os_file_create(
691 innodb_log_file_key, node->name, OS_FILE_OPEN,
692 OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
693 } else if (node->is_raw_disk) {
694 node->handle = os_file_create(
695 innodb_data_file_key, node->name, OS_FILE_OPEN_RAW,
696 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
697 } else {
698 node->handle = os_file_create(
699 innodb_data_file_key, node->name, OS_FILE_OPEN,
700 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
701
702 if (first_time_open) {
703 /*
704 For the temporary tablespace and during the
705 non-redo-logged adjustments in
706 IMPORT TABLESPACE, we do not care about
707 the atomicity of writes.
708
709 Atomic writes is supported if the file can be used
710 with atomic_writes (not log file), O_DIRECT is
711 used (tested in ha_innodb.cc) and the file is
712 device and file system that supports atomic writes
713 for the given block size
714 */
715 space->atomic_write_supported
716 = space->purpose == FIL_TYPE_TEMPORARY
717 || space->purpose == FIL_TYPE_IMPORT
718 || (node->atomic_write
719 && srv_use_atomic_writes
720 && my_test_if_atomic_write(
721 node->handle,
722 int(page_size_t(space->flags)
723 .physical())));
724 }
725 }
726
727 ut_a(success);
728 ut_a(node->is_open());
729
730 fil_system.n_open++;
731 fil_n_file_opened++;
732
733 if (fil_space_belongs_in_lru(space)) {
734
735 /* Put the node to the LRU list */
736 UT_LIST_ADD_FIRST(fil_system.LRU, node);
737 }
738
739 return(true);
740}
741
742/** Close a file node.
743@param[in,out] node File node */
744static
745void
746fil_node_close_file(
747 fil_node_t* node)
748{
749 bool ret;
750
751 ut_ad(mutex_own(&(fil_system.mutex)));
752 ut_a(node->is_open());
753 ut_a(node->n_pending == 0);
754 ut_a(node->n_pending_flushes == 0);
755 ut_a(!node->being_extended);
756 ut_a(node->modification_counter == node->flush_counter
757 || node->space->purpose == FIL_TYPE_TEMPORARY
758 || srv_fast_shutdown == 2
759 || !srv_was_started);
760
761 ret = os_file_close(node->handle);
762 ut_a(ret);
763
764 /* printf("Closing file %s\n", node->name); */
765
766 node->handle = OS_FILE_CLOSED;
767 ut_ad(!node->is_open());
768 ut_a(fil_system.n_open > 0);
769 fil_system.n_open--;
770 fil_n_file_opened--;
771
772 if (fil_space_belongs_in_lru(node->space)) {
773
774 ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
775
776 /* The node is in the LRU list, remove it */
777 UT_LIST_REMOVE(fil_system.LRU, node);
778 }
779}
780
781/** Tries to close a file in the LRU list. The caller must hold the fil_sys
782mutex.
783@return true if success, false if should retry later; since i/o's
784generally complete in < 100 ms, and as InnoDB writes at most 128 pages
785from the buffer pool in a batch, and then immediately flushes the
786files, there is a good chance that the next time we find a suitable
787node from the LRU list.
788@param[in] print_info if true, prints information why it
789 cannot close a file*/
790static
791bool
792fil_try_to_close_file_in_LRU(
793
794 bool print_info)
795{
796 fil_node_t* node;
797
798 ut_ad(mutex_own(&fil_system.mutex));
799
800 if (print_info) {
801 ib::info() << "fil_sys open file LRU len "
802 << UT_LIST_GET_LEN(fil_system.LRU);
803 }
804
805 for (node = UT_LIST_GET_LAST(fil_system.LRU);
806 node != NULL;
807 node = UT_LIST_GET_PREV(LRU, node)) {
808
809 if (node->modification_counter == node->flush_counter
810 && node->n_pending_flushes == 0
811 && !node->being_extended) {
812
813 fil_node_close_file(node);
814
815 return(true);
816 }
817
818 if (!print_info) {
819 continue;
820 }
821
822 if (node->n_pending_flushes > 0) {
823
824 ib::info() << "Cannot close file " << node->name
825 << ", because n_pending_flushes "
826 << node->n_pending_flushes;
827 }
828
829 if (node->modification_counter != node->flush_counter) {
830 ib::warn() << "Cannot close file " << node->name
831 << ", because modification count "
832 << node->modification_counter <<
833 " != flush count " << node->flush_counter;
834 }
835
836 if (node->being_extended) {
837 ib::info() << "Cannot close file " << node->name
838 << ", because it is being extended";
839 }
840 }
841
842 return(false);
843}
844
845/** Flush any writes cached by the file system.
846@param[in,out] space tablespace */
847static
848void
849fil_flush_low(fil_space_t* space)
850{
851 ut_ad(mutex_own(&fil_system.mutex));
852 ut_ad(space);
853 ut_ad(!space->stop_new_ops);
854
855 if (fil_buffering_disabled(space)) {
856
857 /* No need to flush. User has explicitly disabled
858 buffering. */
859 ut_ad(!space->is_in_unflushed_spaces);
860 ut_ad(fil_space_is_flushed(space));
861 ut_ad(space->n_pending_flushes == 0);
862
863#ifdef UNIV_DEBUG
864 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
865 node != NULL;
866 node = UT_LIST_GET_NEXT(chain, node)) {
867 ut_ad(node->modification_counter
868 == node->flush_counter);
869 ut_ad(node->n_pending_flushes == 0);
870 }
871#endif /* UNIV_DEBUG */
872
873 return;
874 }
875
876 /* Prevent dropping of the space while we are flushing */
877 space->n_pending_flushes++;
878
879 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
880 node != NULL;
881 node = UT_LIST_GET_NEXT(chain, node)) {
882
883 int64_t old_mod_counter = node->modification_counter;
884
885 if (old_mod_counter <= node->flush_counter) {
886 continue;
887 }
888
889 ut_a(node->is_open());
890
891 switch (space->purpose) {
892 case FIL_TYPE_TEMPORARY:
893 ut_ad(0); // we already checked for this
894 case FIL_TYPE_TABLESPACE:
895 case FIL_TYPE_IMPORT:
896 fil_n_pending_tablespace_flushes++;
897 break;
898 case FIL_TYPE_LOG:
899 fil_n_pending_log_flushes++;
900 fil_n_log_flushes++;
901 break;
902 }
903#ifdef _WIN32
904 if (node->is_raw_disk) {
905
906 goto skip_flush;
907 }
908#endif /* _WIN32 */
909retry:
910 if (node->n_pending_flushes > 0) {
911 /* We want to avoid calling os_file_flush() on
912 the file twice at the same time, because we do
913 not know what bugs OS's may contain in file
914 i/o */
915
916 int64_t sig_count = os_event_reset(node->sync_event);
917
918 mutex_exit(&fil_system.mutex);
919
920 os_event_wait_low(node->sync_event, sig_count);
921
922 mutex_enter(&fil_system.mutex);
923
924 if (node->flush_counter >= old_mod_counter) {
925
926 goto skip_flush;
927 }
928
929 goto retry;
930 }
931
932 ut_a(node->is_open());
933 node->n_pending_flushes++;
934
935 mutex_exit(&fil_system.mutex);
936
937 os_file_flush(node->handle);
938
939 mutex_enter(&fil_system.mutex);
940
941 os_event_set(node->sync_event);
942
943 node->n_pending_flushes--;
944skip_flush:
945 if (node->flush_counter < old_mod_counter) {
946 node->flush_counter = old_mod_counter;
947
948 if (space->is_in_unflushed_spaces
949 && fil_space_is_flushed(space)) {
950
951 space->is_in_unflushed_spaces = false;
952
953 UT_LIST_REMOVE(
954 fil_system.unflushed_spaces,
955 space);
956 }
957 }
958
959 switch (space->purpose) {
960 case FIL_TYPE_TEMPORARY:
961 break;
962 case FIL_TYPE_TABLESPACE:
963 case FIL_TYPE_IMPORT:
964 fil_n_pending_tablespace_flushes--;
965 continue;
966 case FIL_TYPE_LOG:
967 fil_n_pending_log_flushes--;
968 continue;
969 }
970
971 ut_ad(0);
972 }
973
974 space->n_pending_flushes--;
975}
976
977/** Try to extend a tablespace.
978@param[in,out] space tablespace to be extended
979@param[in,out] node last file of the tablespace
980@param[in] size desired size in number of pages
981@param[out] success whether the operation succeeded
982@return whether the operation should be retried */
983static ATTRIBUTE_COLD __attribute__((warn_unused_result, nonnull))
984bool
985fil_space_extend_must_retry(
986 fil_space_t* space,
987 fil_node_t* node,
988 ulint size,
989 bool* success)
990{
991 ut_ad(mutex_own(&fil_system.mutex));
992 ut_ad(UT_LIST_GET_LAST(space->chain) == node);
993 ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
994
995 *success = space->size >= size;
996
997 if (*success) {
998 /* Space already big enough */
999 return(false);
1000 }
1001
1002 if (node->being_extended) {
1003 /* Another thread is currently extending the file. Wait
1004 for it to finish.
1005 It'd have been better to use event driven mechanism but
1006 the entire module is peppered with polling stuff. */
1007 mutex_exit(&fil_system.mutex);
1008 os_thread_sleep(100000);
1009 return(true);
1010 }
1011
1012 node->being_extended = true;
1013
1014 if (!fil_node_prepare_for_io(node, space)) {
1015 /* The tablespace data file, such as .ibd file, is missing */
1016 node->being_extended = false;
1017 return(false);
1018 }
1019
1020 /* At this point it is safe to release fil_system.mutex. No
1021 other thread can rename, delete, close or extend the file because
1022 we have set the node->being_extended flag. */
1023 mutex_exit(&fil_system.mutex);
1024
1025 ut_ad(size > space->size);
1026
1027 ulint last_page_no = space->size;
1028 const ulint file_start_page_no = last_page_no - node->size;
1029
1030 /* Determine correct file block size */
1031 if (node->block_size == 0) {
1032 node->block_size = os_file_get_block_size(
1033 node->handle, node->name);
1034 }
1035
1036 const page_size_t pageSize(space->flags);
1037 const ulint page_size = pageSize.physical();
1038
1039 /* fil_read_first_page() expects srv_page_size bytes.
1040 fil_node_open_file() expects at least 4 * srv_page_size bytes.*/
1041 os_offset_t new_size = std::max(
1042 os_offset_t(size - file_start_page_no) * page_size,
1043 os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift));
1044
1045 *success = os_file_set_size(node->name, node->handle, new_size,
1046 FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
1047
1048 os_has_said_disk_full = *success;
1049 if (*success) {
1050 last_page_no = size;
1051 } else {
1052 /* Let us measure the size of the file
1053 to determine how much we were able to
1054 extend it */
1055 os_offset_t fsize = os_file_get_size(node->handle);
1056 ut_a(fsize != os_offset_t(-1));
1057
1058 last_page_no = ulint(fsize / page_size)
1059 + file_start_page_no;
1060 }
1061 mutex_enter(&fil_system.mutex);
1062
1063 ut_a(node->being_extended);
1064 node->being_extended = false;
1065 ut_a(last_page_no - file_start_page_no >= node->size);
1066
1067 ulint file_size = last_page_no - file_start_page_no;
1068 space->size += file_size - node->size;
1069 node->size = file_size;
1070 const ulint pages_in_MiB = node->size
1071 & ~ulint((1U << (20U - srv_page_size_shift)) - 1);
1072
1073 fil_node_complete_io(node,IORequestRead);
1074
1075 /* Keep the last data file size info up to date, rounded to
1076 full megabytes */
1077
1078 switch (space->id) {
1079 case TRX_SYS_SPACE:
1080 srv_sys_space.set_last_file_size(pages_in_MiB);
1081 fil_flush_low(space);
1082 return(false);
1083 default:
1084 ut_ad(space->purpose == FIL_TYPE_TABLESPACE
1085 || space->purpose == FIL_TYPE_IMPORT);
1086 if (space->purpose == FIL_TYPE_TABLESPACE
1087 && !space->is_being_truncated) {
1088 fil_flush_low(space);
1089 }
1090 return(false);
1091 case SRV_TMP_SPACE_ID:
1092 ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
1093 srv_tmp_space.set_last_file_size(pages_in_MiB);
1094 return(false);
1095 }
1096
1097}
1098
1099/*******************************************************************//**
1100Reserves the fil_system.mutex and tries to make sure we can open at least one
1101file while holding it. This should be called before calling
1102fil_node_prepare_for_io(), because that function may need to open a file. */
1103static
1104void
1105fil_mutex_enter_and_prepare_for_io(
1106/*===============================*/
1107 ulint space_id) /*!< in: space id */
1108{
1109 for (ulint count = 0, count2 = 0;;) {
1110 mutex_enter(&fil_system.mutex);
1111
1112 if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
1113 /* We keep log files always open. */
1114 break;
1115 }
1116
1117 fil_space_t* space = fil_space_get_by_id(space_id);
1118
1119 if (space == NULL) {
1120 break;
1121 }
1122
1123 if (space->stop_ios) {
1124 ut_ad(space->id != 0);
1125 /* We are going to do a rename file and want to stop
1126 new i/o's for a while. */
1127
1128 if (count2 > 20000) {
1129 ib::warn() << "Tablespace " << space->name
1130 << " has i/o ops stopped for a long"
1131 " time " << count2;
1132 }
1133
1134 mutex_exit(&fil_system.mutex);
1135
1136 /* Wake the i/o-handler threads to make sure pending
1137 i/o's are performed */
1138 os_aio_simulated_wake_handler_threads();
1139
1140 /* The sleep here is just to give IO helper threads a
1141 bit of time to do some work. It is not required that
1142 all IO related to the tablespace being renamed must
1143 be flushed here as we do fil_flush() in
1144 fil_rename_tablespace() as well. */
1145 os_thread_sleep(20000);
1146
1147 /* Flush tablespaces so that we can close modified
1148 files in the LRU list */
1149 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1150
1151 os_thread_sleep(20000);
1152
1153 count2++;
1154
1155 continue;
1156 }
1157
1158 fil_node_t* node = UT_LIST_GET_LAST(space->chain);
1159 ut_ad(space->id == 0
1160 || node == UT_LIST_GET_FIRST(space->chain));
1161
1162 if (space->id == 0) {
1163 /* We keep the system tablespace files always
1164 open; this is important in preventing
1165 deadlocks in this module, as a page read
1166 completion often performs another read from
1167 the insert buffer. The insert buffer is in
1168 tablespace 0, and we cannot end up waiting in
1169 this function. */
1170 } else if (!node || node->is_open()) {
1171 /* If the file is already open, no need to do
1172 anything; if the space does not exist, we handle the
1173 situation in the function which called this
1174 function */
1175 } else {
1176 while (fil_system.n_open >= srv_max_n_open_files) {
1177 /* Too many files are open */
1178 if (fil_try_to_close_file_in_LRU(count > 1)) {
1179 /* No problem */
1180 } else if (count >= 2) {
1181 ib::warn() << "innodb_open_files="
1182 << srv_max_n_open_files
1183 << " is exceeded ("
1184 << fil_system.n_open
1185 << ") files stay open)";
1186 break;
1187 } else {
1188 mutex_exit(&fil_system.mutex);
1189 os_aio_simulated_wake_handler_threads();
1190 os_thread_sleep(20000);
1191 /* Flush tablespaces so that we can
1192 close modified files in the LRU list */
1193 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1194
1195 count++;
1196 mutex_enter(&fil_system.mutex);
1197 continue;
1198 }
1199 }
1200 }
1201
1202 ulint size = space->recv_size;
1203 if (UNIV_UNLIKELY(size != 0)) {
1204 ut_ad(node);
1205 bool success;
1206 if (fil_space_extend_must_retry(space, node, size,
1207 &success)) {
1208 continue;
1209 }
1210
1211 ut_ad(mutex_own(&fil_system.mutex));
1212 /* Crash recovery requires the file extension
1213 to succeed. */
1214 ut_a(success);
1215 /* InnoDB data files cannot shrink. */
1216 ut_a(space->size >= size);
1217
1218 /* There could be multiple concurrent I/O requests for
1219 this tablespace (multiple threads trying to extend
1220 this tablespace).
1221
1222 Also, fil_space_set_recv_size() may have been invoked
1223 again during the file extension while fil_system.mutex
1224 was not being held by us.
1225
1226 Only if space->recv_size matches what we read
1227 originally, reset the field. In this way, a
1228 subsequent I/O request will handle any pending
1229 fil_space_set_recv_size(). */
1230
1231 if (size == space->recv_size) {
1232 space->recv_size = 0;
1233 }
1234 }
1235
1236 break;
1237 }
1238}
1239
1240/** Try to extend a tablespace if it is smaller than the specified size.
1241@param[in,out] space tablespace
1242@param[in] size desired size in pages
1243@return whether the tablespace is at least as big as requested */
1244bool
1245fil_space_extend(
1246 fil_space_t* space,
1247 ulint size)
1248{
1249 ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
1250
1251 bool success;
1252
1253 do {
1254 fil_mutex_enter_and_prepare_for_io(space->id);
1255 } while (fil_space_extend_must_retry(
1256 space, UT_LIST_GET_LAST(space->chain), size,
1257 &success));
1258
1259 mutex_exit(&fil_system.mutex);
1260 return(success);
1261}
1262
1263/** Prepare to free a file node object from a tablespace memory cache.
1264@param[in,out] node file node
1265@param[in] space tablespace */
1266static
1267void
1268fil_node_close_to_free(
1269 fil_node_t* node,
1270 fil_space_t* space)
1271{
1272 ut_ad(mutex_own(&fil_system.mutex));
1273 ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1274 ut_a(node->n_pending == 0);
1275 ut_a(!node->being_extended);
1276
1277 if (node->is_open()) {
1278 /* We fool the assertion in fil_node_close_file() to think
1279 there are no unflushed modifications in the file */
1280
1281 node->modification_counter = node->flush_counter;
1282 os_event_set(node->sync_event);
1283
1284 if (fil_buffering_disabled(space)) {
1285
1286 ut_ad(!space->is_in_unflushed_spaces);
1287 ut_ad(fil_space_is_flushed(space));
1288
1289 } else if (space->is_in_unflushed_spaces
1290 && fil_space_is_flushed(space)) {
1291
1292 space->is_in_unflushed_spaces = false;
1293
1294 UT_LIST_REMOVE(fil_system.unflushed_spaces, space);
1295 }
1296
1297 fil_node_close_file(node);
1298 }
1299}
1300
1301/** Detach a space object from the tablespace memory cache.
1302Closes the files in the chain but does not delete them.
1303There must not be any pending i/o's or flushes on the files.
1304@param[in,out] space tablespace */
1305static
1306void
1307fil_space_detach(
1308 fil_space_t* space)
1309{
1310 ut_ad(mutex_own(&fil_system.mutex));
1311
1312 HASH_DELETE(fil_space_t, hash, fil_system.spaces, space->id, space);
1313
1314 if (space->is_in_unflushed_spaces) {
1315
1316 ut_ad(!fil_buffering_disabled(space));
1317 space->is_in_unflushed_spaces = false;
1318
1319 UT_LIST_REMOVE(fil_system.unflushed_spaces, space);
1320 }
1321
1322 if (space->is_in_rotation_list) {
1323 space->is_in_rotation_list = false;
1324
1325 UT_LIST_REMOVE(fil_system.rotation_list, space);
1326 }
1327
1328 UT_LIST_REMOVE(fil_system.space_list, space);
1329
1330 ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1331 ut_a(space->n_pending_flushes == 0);
1332
1333 for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
1334 fil_node != NULL;
1335 fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
1336
1337 fil_node_close_to_free(fil_node, space);
1338 }
1339
1340 if (space == fil_system.sys_space) {
1341 fil_system.sys_space = NULL;
1342 } else if (space == fil_system.temp_space) {
1343 fil_system.temp_space = NULL;
1344 }
1345}
1346
1347/** Free a tablespace object on which fil_space_detach() was invoked.
1348There must not be any pending i/o's or flushes on the files.
1349@param[in,out] space tablespace */
1350static
1351void
1352fil_space_free_low(
1353 fil_space_t* space)
1354{
1355 /* The tablespace must not be in fil_system.named_spaces. */
1356 ut_ad(srv_fast_shutdown == 2 || !srv_was_started
1357 || space->max_lsn == 0);
1358
1359 /* Wait for fil_space_t::release_for_io(); after
1360 fil_space_detach(), the tablespace cannot be found, so
1361 fil_space_acquire_for_io() would return NULL */
1362 while (space->pending_io()) {
1363 os_thread_sleep(100);
1364 }
1365
1366 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
1367 node != NULL; ) {
1368 ut_d(space->size -= node->size);
1369 os_event_destroy(node->sync_event);
1370 ut_free(node->name);
1371 fil_node_t* old_node = node;
1372 node = UT_LIST_GET_NEXT(chain, node);
1373 ut_free(old_node);
1374 }
1375
1376 ut_ad(space->size == 0);
1377
1378 rw_lock_free(&space->latch);
1379 fil_space_destroy_crypt_data(&space->crypt_data);
1380
1381 ut_free(space->name);
1382 ut_free(space);
1383}
1384
1385/** Frees a space object from the tablespace memory cache.
1386Closes the files in the chain but does not delete them.
1387There must not be any pending i/o's or flushes on the files.
1388@param[in] id tablespace identifier
1389@param[in] x_latched whether the caller holds X-mode space->latch
1390@return true if success */
1391bool
1392fil_space_free(
1393 ulint id,
1394 bool x_latched)
1395{
1396 ut_ad(id != TRX_SYS_SPACE);
1397
1398 mutex_enter(&fil_system.mutex);
1399 fil_space_t* space = fil_space_get_by_id(id);
1400
1401 if (space != NULL) {
1402 fil_space_detach(space);
1403 }
1404
1405 mutex_exit(&fil_system.mutex);
1406
1407 if (space != NULL) {
1408 if (x_latched) {
1409 rw_lock_x_unlock(&space->latch);
1410 }
1411
1412 bool need_mutex = !recv_recovery_on;
1413
1414 if (need_mutex) {
1415 log_mutex_enter();
1416 }
1417
1418 ut_ad(log_mutex_own());
1419
1420 if (space->max_lsn != 0) {
1421 ut_d(space->max_lsn = 0);
1422 UT_LIST_REMOVE(fil_system.named_spaces, space);
1423 }
1424
1425 if (need_mutex) {
1426 log_mutex_exit();
1427 }
1428
1429 fil_space_free_low(space);
1430 }
1431
1432 return(space != NULL);
1433}
1434
1435/** Create a space memory object and put it to the fil_system hash table.
1436Error messages are issued to the server log.
1437@param[in] name tablespace name
1438@param[in] id tablespace identifier
1439@param[in] flags tablespace flags
1440@param[in] purpose tablespace purpose
1441@param[in,out] crypt_data encryption information
1442@param[in] mode encryption mode
1443@return pointer to created tablespace, to be filled in with fil_node_create()
1444@retval NULL on failure (such as when the same tablespace exists) */
1445fil_space_t*
1446fil_space_create(
1447 const char* name,
1448 ulint id,
1449 ulint flags,
1450 fil_type_t purpose,
1451 fil_space_crypt_t* crypt_data,
1452 fil_encryption_t mode)
1453{
1454 fil_space_t* space;
1455
1456 ut_ad(fil_system.is_initialised());
1457 ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
1458 ut_ad(purpose == FIL_TYPE_LOG
1459 || srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
1460
1461 DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
1462
1463 mutex_enter(&fil_system.mutex);
1464
1465 space = fil_space_get_by_id(id);
1466
1467 if (space != NULL) {
1468 ib::error() << "Trying to add tablespace '" << name
1469 << "' with id " << id
1470 << " to the tablespace memory cache, but tablespace '"
1471 << space->name << "' already exists in the cache!";
1472 mutex_exit(&fil_system.mutex);
1473 return(NULL);
1474 }
1475
1476 space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space)));
1477
1478 space->id = id;
1479 space->name = mem_strdup(name);
1480
1481 UT_LIST_INIT(space->chain, &fil_node_t::chain);
1482
1483 if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
1484 && !recv_recovery_on
1485 && id > fil_system.max_assigned_id) {
1486
1487 if (!fil_system.space_id_reuse_warned) {
1488 fil_system.space_id_reuse_warned = true;
1489
1490 ib::warn() << "Allocated tablespace ID " << id
1491 << " for " << name << ", old maximum was "
1492 << fil_system.max_assigned_id;
1493 }
1494
1495 fil_system.max_assigned_id = id;
1496 }
1497
1498 space->purpose = purpose;
1499 space->flags = flags;
1500
1501 space->magic_n = FIL_SPACE_MAGIC_N;
1502 space->crypt_data = crypt_data;
1503
1504 DBUG_LOG("tablespace",
1505 "Created metadata for " << id << " name " << name);
1506 if (crypt_data) {
1507 DBUG_LOG("crypt",
1508 "Tablespace " << id << " name " << name
1509 << " encryption " << crypt_data->encryption
1510 << " key id " << crypt_data->key_id
1511 << ":" << fil_crypt_get_mode(crypt_data)
1512 << " " << fil_crypt_get_type(crypt_data));
1513 }
1514
1515 rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1516
1517 if (space->purpose == FIL_TYPE_TEMPORARY) {
1518 ut_d(space->latch.set_temp_fsp());
1519 /* SysTablespace::open_or_create() would pass
1520 size!=0 to fil_node_create(), so first_time_open
1521 would not hold in fil_node_open_file(), and we
1522 must assign this manually. We do not care about
1523 the durability or atomicity of writes to the
1524 temporary tablespace files. */
1525 space->atomic_write_supported = true;
1526 }
1527
1528 HASH_INSERT(fil_space_t, hash, fil_system.spaces, id, space);
1529
1530 UT_LIST_ADD_LAST(fil_system.space_list, space);
1531
1532 if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system.max_assigned_id) {
1533
1534 fil_system.max_assigned_id = id;
1535 }
1536
1537 /* Inform key rotation that there could be something
1538 to do */
1539 if (purpose == FIL_TYPE_TABLESPACE
1540 && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event &&
1541 (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
1542 srv_encrypt_tables)) {
1543 /* Key rotation is not enabled, need to inform background
1544 encryption threads. */
1545 UT_LIST_ADD_LAST(fil_system.rotation_list, space);
1546 space->is_in_rotation_list = true;
1547 mutex_exit(&fil_system.mutex);
1548 mutex_enter(&fil_crypt_threads_mutex);
1549 os_event_set(fil_crypt_threads_event);
1550 mutex_exit(&fil_crypt_threads_mutex);
1551 } else {
1552 mutex_exit(&fil_system.mutex);
1553 }
1554
1555 return(space);
1556}
1557
1558/*******************************************************************//**
1559Assigns a new space id for a new single-table tablespace. This works simply by
1560incrementing the global counter. If 4 billion id's is not enough, we may need
1561to recycle id's.
1562@return true if assigned, false if not */
1563bool
1564fil_assign_new_space_id(
1565/*====================*/
1566 ulint* space_id) /*!< in/out: space id */
1567{
1568 ulint id;
1569 bool success;
1570
1571 mutex_enter(&fil_system.mutex);
1572
1573 id = *space_id;
1574
1575 if (id < fil_system.max_assigned_id) {
1576 id = fil_system.max_assigned_id;
1577 }
1578
1579 id++;
1580
1581 if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1582 ib::warn() << "You are running out of new single-table"
1583 " tablespace id's. Current counter is " << id
1584 << " and it must not exceed" << SRV_LOG_SPACE_FIRST_ID
1585 << "! To reset the counter to zero you have to dump"
1586 " all your tables and recreate the whole InnoDB"
1587 " installation.";
1588 }
1589
1590 success = (id < SRV_LOG_SPACE_FIRST_ID);
1591
1592 if (success) {
1593 *space_id = fil_system.max_assigned_id = id;
1594 } else {
1595 ib::warn() << "You have run out of single-table tablespace"
1596 " id's! Current counter is " << id
1597 << ". To reset the counter to zero"
1598 " you have to dump all your tables and"
1599 " recreate the whole InnoDB installation.";
1600 *space_id = ULINT_UNDEFINED;
1601 }
1602
1603 mutex_exit(&fil_system.mutex);
1604
1605 return(success);
1606}
1607
1608/*******************************************************************//**
1609Returns a pointer to the fil_space_t that is in the memory cache
1610associated with a space id. The caller must lock fil_system.mutex.
1611@return file_space_t pointer, NULL if space not found */
1612UNIV_INLINE
1613fil_space_t*
1614fil_space_get_space(
1615/*================*/
1616 ulint id) /*!< in: space id */
1617{
1618 fil_space_t* space;
1619 fil_node_t* node;
1620
1621 ut_ad(fil_system.is_initialised());
1622
1623 space = fil_space_get_by_id(id);
1624 if (space == NULL || space->size != 0) {
1625 return(space);
1626 }
1627
1628 switch (space->purpose) {
1629 case FIL_TYPE_LOG:
1630 break;
1631 case FIL_TYPE_TEMPORARY:
1632 case FIL_TYPE_TABLESPACE:
1633 case FIL_TYPE_IMPORT:
1634 ut_a(id != 0);
1635
1636 mutex_exit(&fil_system.mutex);
1637
1638 /* It is possible that the space gets evicted at this point
1639 before the fil_mutex_enter_and_prepare_for_io() acquires
1640 the fil_system.mutex. Check for this after completing the
1641 call to fil_mutex_enter_and_prepare_for_io(). */
1642 fil_mutex_enter_and_prepare_for_io(id);
1643
1644 /* We are still holding the fil_system.mutex. Check if
1645 the space is still in memory cache. */
1646 space = fil_space_get_by_id(id);
1647
1648 if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) {
1649 return(NULL);
1650 }
1651
1652 /* The following code must change when InnoDB supports
1653 multiple datafiles per tablespace. */
1654 ut_a(1 == UT_LIST_GET_LEN(space->chain));
1655
1656 node = UT_LIST_GET_FIRST(space->chain);
1657
1658 /* It must be a single-table tablespace and we have not opened
1659 the file yet; the following calls will open it and update the
1660 size fields */
1661
1662 if (!fil_node_prepare_for_io(node, space)) {
1663 /* The single-table tablespace can't be opened,
1664 because the ibd file is missing. */
1665 return(NULL);
1666 }
1667
1668 fil_node_complete_io(node, IORequestRead);
1669 }
1670
1671 return(space);
1672}
1673
1674/** Set the recovered size of a tablespace in pages.
1675@param id tablespace ID
1676@param size recovered size in pages */
1677UNIV_INTERN
1678void
1679fil_space_set_recv_size(ulint id, ulint size)
1680{
1681 mutex_enter(&fil_system.mutex);
1682 ut_ad(size);
1683 ut_ad(id < SRV_LOG_SPACE_FIRST_ID);
1684
1685 if (fil_space_t* space = fil_space_get_space(id)) {
1686 space->recv_size = size;
1687 }
1688
1689 mutex_exit(&fil_system.mutex);
1690}
1691
1692/*******************************************************************//**
1693Returns the size of the space in pages. The tablespace must be cached in the
1694memory cache.
1695@return space size, 0 if space not found */
1696ulint
1697fil_space_get_size(
1698/*===============*/
1699 ulint id) /*!< in: space id */
1700{
1701 fil_space_t* space;
1702 ulint size;
1703
1704 ut_ad(fil_system.is_initialised());
1705 mutex_enter(&fil_system.mutex);
1706
1707 space = fil_space_get_space(id);
1708
1709 size = space ? space->size : 0;
1710
1711 mutex_exit(&fil_system.mutex);
1712
1713 return(size);
1714}
1715
1716/*******************************************************************//**
1717Returns the flags of the space. The tablespace must be cached
1718in the memory cache.
1719@return flags, ULINT_UNDEFINED if space not found */
1720ulint
1721fil_space_get_flags(
1722/*================*/
1723 ulint id) /*!< in: space id */
1724{
1725 fil_space_t* space;
1726 ulint flags;
1727
1728 ut_ad(fil_system.is_initialised());
1729
1730 mutex_enter(&fil_system.mutex);
1731
1732 space = fil_space_get_space(id);
1733
1734 if (space == NULL) {
1735 mutex_exit(&fil_system.mutex);
1736
1737 return(ULINT_UNDEFINED);
1738 }
1739
1740 flags = space->flags;
1741
1742 mutex_exit(&fil_system.mutex);
1743
1744 return(flags);
1745}
1746
1747/** Open each file. Only invoked on fil_system.temp_space.
1748@return whether all files were opened */
1749bool fil_space_t::open()
1750{
1751 ut_ad(fil_system.is_initialised());
1752
1753 mutex_enter(&fil_system.mutex);
1754 ut_ad(this == fil_system.temp_space
1755 || srv_operation == SRV_OPERATION_BACKUP
1756 || srv_operation == SRV_OPERATION_RESTORE
1757 || srv_operation == SRV_OPERATION_RESTORE_DELTA);
1758
1759 for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
1760 node != NULL;
1761 node = UT_LIST_GET_NEXT(chain, node)) {
1762 if (!node->is_open() && !fil_node_open_file(node)) {
1763 mutex_exit(&fil_system.mutex);
1764 return false;
1765 }
1766 }
1767
1768 mutex_exit(&fil_system.mutex);
1769 return true;
1770}
1771
1772/** Close each file. Only invoked on fil_system.temp_space. */
1773void fil_space_t::close()
1774{
1775 if (!fil_system.is_initialised()) {
1776 return;
1777 }
1778
1779 mutex_enter(&fil_system.mutex);
1780 ut_ad(this == fil_system.temp_space
1781 || srv_operation == SRV_OPERATION_BACKUP
1782 || srv_operation == SRV_OPERATION_RESTORE
1783 || srv_operation == SRV_OPERATION_RESTORE_DELTA);
1784
1785 for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
1786 node != NULL;
1787 node = UT_LIST_GET_NEXT(chain, node)) {
1788 if (node->is_open()) {
1789 fil_node_close_file(node);
1790 }
1791 }
1792
1793 mutex_exit(&fil_system.mutex);
1794}
1795
1796/** Returns the page size of the space and whether it is compressed or not.
1797The tablespace must be cached in the memory cache.
1798@param[in] id space id
1799@param[out] found true if tablespace was found
1800@return page size */
1801const page_size_t
1802fil_space_get_page_size(
1803 ulint id,
1804 bool* found)
1805{
1806 const ulint flags = fil_space_get_flags(id);
1807
1808 if (flags == ULINT_UNDEFINED) {
1809 *found = false;
1810 return(univ_page_size);
1811 }
1812
1813 *found = true;
1814
1815 return(page_size_t(flags));
1816}
1817
1818void fil_system_t::create(ulint hash_size)
1819{
1820 ut_ad(this == &fil_system);
1821 ut_ad(!is_initialised());
1822 ut_ad(!(srv_page_size % FSP_EXTENT_SIZE));
1823 ut_ad(srv_page_size);
1824 ut_ad(!spaces);
1825
1826 m_initialised = true;
1827
1828 compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX));
1829 compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN));
1830
1831 ut_ad(hash_size > 0);
1832
1833 mutex_create(LATCH_ID_FIL_SYSTEM, &mutex);
1834
1835 spaces = hash_create(hash_size);
1836
1837 fil_space_crypt_init();
1838}
1839
1840void fil_system_t::close()
1841{
1842 ut_ad(this == &fil_system);
1843 ut_a(!UT_LIST_GET_LEN(LRU));
1844 ut_a(!UT_LIST_GET_LEN(unflushed_spaces));
1845 ut_a(!UT_LIST_GET_LEN(space_list));
1846 ut_ad(!sys_space);
1847 ut_ad(!temp_space);
1848
1849 if (is_initialised()) {
1850 m_initialised = false;
1851 hash_table_free(spaces);
1852 spaces = NULL;
1853 mutex_free(&mutex);
1854 fil_space_crypt_cleanup();
1855 }
1856
1857 ut_ad(!spaces);
1858}
1859
1860/*******************************************************************//**
1861Opens all log files and system tablespace data files. They stay open until the
1862database server shutdown. This should be called at a server startup after the
1863space objects for the log and the system tablespace have been created. The
1864purpose of this operation is to make sure we never run out of file descriptors
1865if we need to read from the insert buffer or to write to the log. */
1866void
1867fil_open_log_and_system_tablespace_files(void)
1868/*==========================================*/
1869{
1870 fil_space_t* space;
1871
1872 mutex_enter(&fil_system.mutex);
1873
1874 for (space = UT_LIST_GET_FIRST(fil_system.space_list);
1875 space != NULL;
1876 space = UT_LIST_GET_NEXT(space_list, space)) {
1877
1878 fil_node_t* node;
1879
1880 if (fil_space_belongs_in_lru(space)) {
1881
1882 continue;
1883 }
1884
1885 for (node = UT_LIST_GET_FIRST(space->chain);
1886 node != NULL;
1887 node = UT_LIST_GET_NEXT(chain, node)) {
1888
1889 if (!node->is_open()) {
1890 if (!fil_node_open_file(node)) {
1891 /* This func is called during server's
1892 startup. If some file of log or system
1893 tablespace is missing, the server
1894 can't start successfully. So we should
1895 assert for it. */
1896 ut_a(0);
1897 }
1898 }
1899
1900 if (srv_max_n_open_files < 10 + fil_system.n_open) {
1901
1902 ib::warn() << "You must raise the value of"
1903 " innodb_open_files in my.cnf!"
1904 " Remember that InnoDB keeps all"
1905 " log files and all system"
1906 " tablespace files open"
1907 " for the whole time mysqld is"
1908 " running, and needs to open also"
1909 " some .ibd files if the"
1910 " file-per-table storage model is used."
1911 " Current open files "
1912 << fil_system.n_open
1913 << ", max allowed open files "
1914 << srv_max_n_open_files
1915 << ".";
1916 }
1917 }
1918 }
1919
1920 mutex_exit(&fil_system.mutex);
1921}
1922
1923/*******************************************************************//**
1924Closes all open files. There must not be any pending i/o's or not flushed
1925modifications in the files. */
1926void
1927fil_close_all_files(void)
1928/*=====================*/
1929{
1930 fil_space_t* space;
1931
1932 /* At shutdown, we should not have any files in this list. */
1933 ut_ad(fil_system.is_initialised());
1934 ut_ad(srv_fast_shutdown == 2
1935 || !srv_was_started
1936 || UT_LIST_GET_LEN(fil_system.named_spaces) == 0);
1937
1938 mutex_enter(&fil_system.mutex);
1939
1940 for (space = UT_LIST_GET_FIRST(fil_system.space_list);
1941 space != NULL; ) {
1942 fil_node_t* node;
1943 fil_space_t* prev_space = space;
1944
1945 for (node = UT_LIST_GET_FIRST(space->chain);
1946 node != NULL;
1947 node = UT_LIST_GET_NEXT(chain, node)) {
1948
1949 if (node->is_open()) {
1950 fil_node_close_file(node);
1951 }
1952 }
1953
1954 space = UT_LIST_GET_NEXT(space_list, space);
1955 fil_space_detach(prev_space);
1956 fil_space_free_low(prev_space);
1957 }
1958
1959 mutex_exit(&fil_system.mutex);
1960
1961 ut_ad(srv_fast_shutdown == 2
1962 || !srv_was_started
1963 || UT_LIST_GET_LEN(fil_system.named_spaces) == 0);
1964}
1965
1966/*******************************************************************//**
1967Closes the redo log files. There must not be any pending i/o's or not
1968flushed modifications in the files. */
1969void
1970fil_close_log_files(
1971/*================*/
1972 bool free) /*!< in: whether to free the memory object */
1973{
1974 fil_space_t* space;
1975
1976 mutex_enter(&fil_system.mutex);
1977
1978 space = UT_LIST_GET_FIRST(fil_system.space_list);
1979
1980 while (space != NULL) {
1981 fil_node_t* node;
1982 fil_space_t* prev_space = space;
1983
1984 if (space->purpose != FIL_TYPE_LOG) {
1985 space = UT_LIST_GET_NEXT(space_list, space);
1986 continue;
1987 }
1988
1989 /* Log files are not in the fil_system.named_spaces list. */
1990 ut_ad(space->max_lsn == 0);
1991
1992 for (node = UT_LIST_GET_FIRST(space->chain);
1993 node != NULL;
1994 node = UT_LIST_GET_NEXT(chain, node)) {
1995
1996 if (node->is_open()) {
1997 fil_node_close_file(node);
1998 }
1999 }
2000
2001 space = UT_LIST_GET_NEXT(space_list, space);
2002
2003 if (free) {
2004 fil_space_detach(prev_space);
2005 fil_space_free_low(prev_space);
2006 }
2007 }
2008
2009 mutex_exit(&fil_system.mutex);
2010
2011 if (free) {
2012 log_sys.log.close();
2013 }
2014}
2015
2016/*******************************************************************//**
2017Sets the max tablespace id counter if the given number is bigger than the
2018previous value. */
2019void
2020fil_set_max_space_id_if_bigger(
2021/*===========================*/
2022 ulint max_id) /*!< in: maximum known id */
2023{
2024 if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
2025 ib::fatal() << "Max tablespace id is too high, " << max_id;
2026 }
2027
2028 mutex_enter(&fil_system.mutex);
2029
2030 if (fil_system.max_assigned_id < max_id) {
2031
2032 fil_system.max_assigned_id = max_id;
2033 }
2034
2035 mutex_exit(&fil_system.mutex);
2036}
2037
2038/** Write the flushed LSN to the page header of the first page in the
2039system tablespace.
2040@param[in] lsn flushed LSN
2041@return DB_SUCCESS or error number */
2042dberr_t
2043fil_write_flushed_lsn(
2044 lsn_t lsn)
2045{
2046 byte* buf1;
2047 byte* buf;
2048 dberr_t err = DB_TABLESPACE_NOT_FOUND;
2049
2050 buf1 = static_cast<byte*>(ut_malloc_nokey(2U << srv_page_size_shift));
2051 buf = static_cast<byte*>(ut_align(buf1, srv_page_size));
2052
2053 const page_id_t page_id(TRX_SYS_SPACE, 0);
2054
2055 err = fil_read(page_id, univ_page_size, 0, srv_page_size,
2056 buf);
2057
2058 if (err == DB_SUCCESS) {
2059 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
2060 err = fil_write(page_id, univ_page_size, 0,
2061 srv_page_size, buf);
2062 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
2063 }
2064
2065 ut_free(buf1);
2066 return(err);
2067}
2068
2069/** Acquire a tablespace when it could be dropped concurrently.
2070Used by background threads that do not necessarily hold proper locks
2071for concurrency control.
2072@param[in] id tablespace ID
2073@param[in] silent whether to silently ignore missing tablespaces
2074@return the tablespace
2075@retval NULL if missing or being deleted or truncated */
2076UNIV_INTERN
2077fil_space_t*
2078fil_space_acquire_low(ulint id, bool silent)
2079{
2080 fil_space_t* space;
2081
2082 mutex_enter(&fil_system.mutex);
2083
2084 space = fil_space_get_by_id(id);
2085
2086 if (space == NULL) {
2087 if (!silent) {
2088 ib::warn() << "Trying to access missing"
2089 " tablespace " << id;
2090 }
2091 } else if (space->is_stopping()) {
2092 space = NULL;
2093 } else {
2094 space->acquire();
2095 }
2096
2097 mutex_exit(&fil_system.mutex);
2098
2099 return(space);
2100}
2101
2102/** Acquire a tablespace for reading or writing a block,
2103when it could be dropped concurrently.
2104@param[in] id tablespace ID
2105@return the tablespace
2106@retval NULL if missing */
2107fil_space_t*
2108fil_space_acquire_for_io(ulint id)
2109{
2110 mutex_enter(&fil_system.mutex);
2111
2112 fil_space_t* space = fil_space_get_by_id(id);
2113
2114 if (space) {
2115 space->acquire_for_io();
2116 }
2117
2118 mutex_exit(&fil_system.mutex);
2119
2120 return(space);
2121}
2122
2123/********************************************************//**
2124Creates the database directory for a table if it does not exist yet. */
2125void
2126fil_create_directory_for_tablename(
2127/*===============================*/
2128 const char* name) /*!< in: name in the standard
2129 'databasename/tablename' format */
2130{
2131 const char* namend;
2132 char* path;
2133 ulint len;
2134
2135 len = strlen(fil_path_to_mysql_datadir);
2136 namend = strchr(name, '/');
2137 ut_a(namend);
2138 path = static_cast<char*>(
2139 ut_malloc_nokey(len + ulint(namend - name) + 2));
2140
2141 memcpy(path, fil_path_to_mysql_datadir, len);
2142 path[len] = '/';
2143 memcpy(path + len + 1, name, ulint(namend - name));
2144 path[len + ulint(namend - name) + 1] = 0;
2145
2146 os_normalize_path(path);
2147
2148 bool success = os_file_create_directory(path, false);
2149 ut_a(success);
2150
2151 ut_free(path);
2152}
2153
2154/** Write a log record about an operation on a tablespace file.
2155@param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
2156or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
2157@param[in] space_id tablespace identifier
2158@param[in] first_page_no first page number in the file
2159@param[in] path file path
2160@param[in] new_path if type is MLOG_FILE_RENAME2, the new name
2161@param[in] flags if type is MLOG_FILE_CREATE2, the space flags
2162@param[in,out] mtr mini-transaction */
2163static
2164void
2165fil_op_write_log(
2166 mlog_id_t type,
2167 ulint space_id,
2168 ulint first_page_no,
2169 const char* path,
2170 const char* new_path,
2171 ulint flags,
2172 mtr_t* mtr)
2173{
2174 byte* log_ptr;
2175 ulint len;
2176
2177 ut_ad(first_page_no == 0);
2178 ut_ad(fsp_flags_is_valid(flags, space_id));
2179
2180 /* fil_name_parse() requires that there be at least one path
2181 separator and that the file path end with ".ibd". */
2182 ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL);
2183 ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD) == 0);
2184
2185 log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1);
2186
2187 if (log_ptr == NULL) {
2188 /* Logging in mtr is switched off during crash recovery:
2189 in that case mlog_open returns NULL */
2190 return;
2191 }
2192
2193 log_ptr = mlog_write_initial_log_record_low(
2194 type, space_id, first_page_no, log_ptr, mtr);
2195
2196 if (type == MLOG_FILE_CREATE2) {
2197 mach_write_to_4(log_ptr, flags);
2198 log_ptr += 4;
2199 }
2200
2201 /* Let us store the strings as null-terminated for easier readability
2202 and handling */
2203
2204 len = strlen(path) + 1;
2205
2206 mach_write_to_2(log_ptr, len);
2207 log_ptr += 2;
2208 mlog_close(mtr, log_ptr);
2209
2210 mlog_catenate_string(
2211 mtr, reinterpret_cast<const byte*>(path), len);
2212
2213 switch (type) {
2214 case MLOG_FILE_RENAME2:
2215 ut_ad(strchr(new_path, OS_PATH_SEPARATOR) != NULL);
2216 len = strlen(new_path) + 1;
2217 log_ptr = mlog_open(mtr, 2 + len);
2218 ut_a(log_ptr);
2219 mach_write_to_2(log_ptr, len);
2220 log_ptr += 2;
2221 mlog_close(mtr, log_ptr);
2222
2223 mlog_catenate_string(
2224 mtr, reinterpret_cast<const byte*>(new_path), len);
2225 break;
2226 case MLOG_FILE_NAME:
2227 case MLOG_FILE_DELETE:
2228 case MLOG_FILE_CREATE2:
2229 break;
2230 default:
2231 ut_ad(0);
2232 }
2233}
2234
2235/** Write redo log for renaming a file.
2236@param[in] space_id tablespace id
2237@param[in] first_page_no first page number in the file
2238@param[in] old_name tablespace file name
2239@param[in] new_name tablespace file name after renaming
2240@param[in,out] mtr mini-transaction */
2241static
2242void
2243fil_name_write_rename_low(
2244 ulint space_id,
2245 ulint first_page_no,
2246 const char* old_name,
2247 const char* new_name,
2248 mtr_t* mtr)
2249{
2250 ut_ad(!is_predefined_tablespace(space_id));
2251
2252 fil_op_write_log(
2253 MLOG_FILE_RENAME2,
2254 space_id, first_page_no, old_name, new_name, 0, mtr);
2255}
2256
2257/** Write redo log for renaming a file.
2258@param[in] space_id tablespace id
2259@param[in] old_name tablespace file name
2260@param[in] new_name tablespace file name after renaming */
2261static void
2262fil_name_write_rename(
2263 ulint space_id,
2264 const char* old_name,
2265 const char* new_name)
2266{
2267 mtr_t mtr;
2268 mtr.start();
2269 fil_name_write_rename_low(space_id, 0, old_name, new_name, &mtr);
2270 mtr.commit();
2271 log_write_up_to(mtr.commit_lsn(), true);
2272}
2273
2274/** Write MLOG_FILE_NAME for a file.
2275@param[in] space_id tablespace id
2276@param[in] first_page_no first page number in the file
2277@param[in] name tablespace file name
2278@param[in,out] mtr mini-transaction */
2279static
2280void
2281fil_name_write(
2282 ulint space_id,
2283 ulint first_page_no,
2284 const char* name,
2285 mtr_t* mtr)
2286{
2287 fil_op_write_log(
2288 MLOG_FILE_NAME, space_id, first_page_no, name, NULL, 0, mtr);
2289}
2290/** Write MLOG_FILE_NAME for a file.
2291@param[in] space tablespace
2292@param[in] first_page_no first page number in the file
2293@param[in] file tablespace file
2294@param[in,out] mtr mini-transaction */
2295static
2296void
2297fil_name_write(
2298 const fil_space_t* space,
2299 ulint first_page_no,
2300 const fil_node_t* file,
2301 mtr_t* mtr)
2302{
2303 fil_name_write(space->id, first_page_no, file->name, mtr);
2304}
2305
2306/** Replay a file rename operation if possible.
2307@param[in] space_id tablespace identifier
2308@param[in] first_page_no first page number in the file
2309@param[in] name old file name
2310@param[in] new_name new file name
2311@return whether the operation was successfully applied
2312(the name did not exist, or new_name did not exist and
2313name was successfully renamed to new_name) */
2314bool
2315fil_op_replay_rename(
2316 ulint space_id,
2317 ulint first_page_no,
2318 const char* name,
2319 const char* new_name)
2320{
2321 ut_ad(first_page_no == 0);
2322
2323 /* In order to replay the rename, the following must hold:
2324 * The new name is not already used.
2325 * A tablespace exists with the old name.
2326 * The space ID for that tablepace matches this log entry.
2327 This will prevent unintended renames during recovery. */
2328 fil_space_t* space = fil_space_get(space_id);
2329
2330 if (space == NULL) {
2331 return(true);
2332 }
2333
2334 const bool name_match
2335 = strcmp(name, UT_LIST_GET_FIRST(space->chain)->name) == 0;
2336
2337 if (!name_match) {
2338 return(true);
2339 }
2340
2341 /* Create the database directory for the new name, if
2342 it does not exist yet */
2343
2344 const char* namend = strrchr(new_name, OS_PATH_SEPARATOR);
2345 ut_a(namend != NULL);
2346
2347 char* dir = static_cast<char*>(
2348 ut_malloc_nokey(ulint(namend - new_name) + 1));
2349
2350 memcpy(dir, new_name, ulint(namend - new_name));
2351 dir[namend - new_name] = '\0';
2352
2353 bool success = os_file_create_directory(dir, false);
2354 ut_a(success);
2355
2356 ulint dirlen = 0;
2357
2358 if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) {
2359 dirlen = ulint(dirend - dir) + 1;
2360 }
2361
2362 ut_free(dir);
2363
2364 /* New path must not exist. */
2365 dberr_t err = fil_rename_tablespace_check(
2366 name, new_name, false);
2367 if (err != DB_SUCCESS) {
2368 ib::error() << " Cannot replay file rename."
2369 " Remove either file and try again.";
2370 return(false);
2371 }
2372
2373 char* new_table = mem_strdupl(
2374 new_name + dirlen,
2375 strlen(new_name + dirlen)
2376 - 4 /* remove ".ibd" */);
2377
2378 ut_ad(new_table[ulint(namend - new_name) - dirlen]
2379 == OS_PATH_SEPARATOR);
2380#if OS_PATH_SEPARATOR != '/'
2381 new_table[namend - new_name - dirlen] = '/';
2382#endif
2383
2384 if (!fil_rename_tablespace(
2385 space_id, name, new_table, new_name)) {
2386 ut_error;
2387 }
2388
2389 ut_free(new_table);
2390 return(true);
2391}
2392
2393/** File operations for tablespace */
2394enum fil_operation_t {
2395 FIL_OPERATION_DELETE, /*!< delete a single-table tablespace */
2396 FIL_OPERATION_CLOSE, /*!< close a single-table tablespace */
2397 FIL_OPERATION_TRUNCATE /*!< truncate a single-table tablespace */
2398};
2399
2400/** Check for pending operations.
2401@param[in] space tablespace
2402@param[in] count number of attempts so far
2403@return 0 if no operations else count + 1. */
2404static
2405ulint
2406fil_check_pending_ops(const fil_space_t* space, ulint count)
2407{
2408 ut_ad(mutex_own(&fil_system.mutex));
2409
2410 if (space == NULL) {
2411 return 0;
2412 }
2413
2414 if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) {
2415
2416 if (count > 5000) {
2417 ib::warn() << "Trying to close/delete/truncate"
2418 " tablespace '" << space->name
2419 << "' but there are " << n_pending_ops
2420 << " pending operations on it.";
2421 }
2422
2423 return(count + 1);
2424 }
2425
2426 return(0);
2427}
2428
2429/*******************************************************************//**
2430Check for pending IO.
2431@return 0 if no pending else count + 1. */
2432static
2433ulint
2434fil_check_pending_io(
2435/*=================*/
2436 fil_operation_t operation, /*!< in: File operation */
2437 fil_space_t* space, /*!< in/out: Tablespace to check */
2438 fil_node_t** node, /*!< out: Node in space list */
2439 ulint count) /*!< in: number of attempts so far */
2440{
2441 ut_ad(mutex_own(&fil_system.mutex));
2442 ut_ad(!space->referenced());
2443
2444 switch (operation) {
2445 case FIL_OPERATION_DELETE:
2446 case FIL_OPERATION_CLOSE:
2447 break;
2448 case FIL_OPERATION_TRUNCATE:
2449 space->is_being_truncated = true;
2450 break;
2451 }
2452
2453 /* The following code must change when InnoDB supports
2454 multiple datafiles per tablespace. */
2455 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2456
2457 *node = UT_LIST_GET_FIRST(space->chain);
2458
2459 if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
2460
2461 ut_a(!(*node)->being_extended);
2462
2463 if (count > 1000) {
2464 ib::warn() << "Trying to delete/close/truncate"
2465 " tablespace '" << space->name
2466 << "' but there are "
2467 << space->n_pending_flushes
2468 << " flushes and " << (*node)->n_pending
2469 << " pending i/o's on it.";
2470 }
2471
2472 return(count + 1);
2473 }
2474
2475 return(0);
2476}
2477
2478/*******************************************************************//**
2479Check pending operations on a tablespace.
2480@return DB_SUCCESS or error failure. */
2481static
2482dberr_t
2483fil_check_pending_operations(
2484/*=========================*/
2485 ulint id, /*!< in: space id */
2486 fil_operation_t operation, /*!< in: File operation */
2487 fil_space_t** space, /*!< out: tablespace instance
2488 in memory */
2489 char** path) /*!< out/own: tablespace path */
2490{
2491 ulint count = 0;
2492
2493 ut_a(!is_system_tablespace(id));
2494 ut_ad(space);
2495
2496 *space = 0;
2497
2498 mutex_enter(&fil_system.mutex);
2499 fil_space_t* sp = fil_space_get_by_id(id);
2500
2501 if (sp) {
2502 sp->stop_new_ops = true;
2503 if (sp->crypt_data) {
2504 sp->acquire();
2505 mutex_exit(&fil_system.mutex);
2506 fil_space_crypt_close_tablespace(sp);
2507 mutex_enter(&fil_system.mutex);
2508 sp->release();
2509 }
2510 }
2511
2512 /* Check for pending operations. */
2513
2514 do {
2515 sp = fil_space_get_by_id(id);
2516
2517 count = fil_check_pending_ops(sp, count);
2518
2519 mutex_exit(&fil_system.mutex);
2520
2521 if (count > 0) {
2522 os_thread_sleep(20000);
2523 }
2524
2525 mutex_enter(&fil_system.mutex);
2526 } while (count > 0);
2527
2528 /* Check for pending IO. */
2529
2530 *path = 0;
2531
2532 for (;;) {
2533 sp = fil_space_get_by_id(id);
2534
2535 if (sp == NULL) {
2536 mutex_exit(&fil_system.mutex);
2537 return(DB_TABLESPACE_NOT_FOUND);
2538 }
2539
2540 fil_node_t* node;
2541
2542 count = fil_check_pending_io(operation, sp, &node, count);
2543
2544 if (count == 0) {
2545 *path = mem_strdup(node->name);
2546 }
2547
2548 mutex_exit(&fil_system.mutex);
2549
2550 if (count == 0) {
2551 break;
2552 }
2553
2554 os_thread_sleep(20000);
2555 mutex_enter(&fil_system.mutex);
2556 }
2557
2558 ut_ad(sp);
2559
2560 *space = sp;
2561 return(DB_SUCCESS);
2562}
2563
2564/*******************************************************************//**
2565Closes a single-table tablespace. The tablespace must be cached in the
2566memory cache. Free all pages used by the tablespace.
2567@return DB_SUCCESS or error */
2568dberr_t
2569fil_close_tablespace(
2570/*=================*/
2571 trx_t* trx, /*!< in/out: Transaction covering the close */
2572 ulint id) /*!< in: space id */
2573{
2574 char* path = 0;
2575 fil_space_t* space = 0;
2576 dberr_t err;
2577
2578 ut_a(!is_system_tablespace(id));
2579
2580 err = fil_check_pending_operations(id, FIL_OPERATION_CLOSE,
2581 &space, &path);
2582
2583 if (err != DB_SUCCESS) {
2584 return(err);
2585 }
2586
2587 ut_a(space);
2588 ut_a(path != 0);
2589
2590 rw_lock_x_lock(&space->latch);
2591
2592 /* Invalidate in the buffer pool all pages belonging to the
2593 tablespace. Since we have set space->stop_new_ops = true, readahead
2594 or ibuf merge can no longer read more pages of this tablespace to the
2595 buffer pool. Thus we can clean the tablespace out of the buffer pool
2596 completely and permanently. The flag stop_new_ops also prevents
2597 fil_flush() from being applied to this tablespace. */
2598
2599 {
2600 FlushObserver observer(space, trx, NULL);
2601 buf_LRU_flush_or_remove_pages(id, &observer);
2602 }
2603
2604 /* If the free is successful, the X lock will be released before
2605 the space memory data structure is freed. */
2606
2607 if (!fil_space_free(id, true)) {
2608 rw_lock_x_unlock(&space->latch);
2609 err = DB_TABLESPACE_NOT_FOUND;
2610 } else {
2611 err = DB_SUCCESS;
2612 }
2613
2614 /* If it is a delete then also delete any generated files, otherwise
2615 when we drop the database the remove directory will fail. */
2616
2617 char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
2618 if (cfg_name != NULL) {
2619 os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
2620 ut_free(cfg_name);
2621 }
2622
2623 ut_free(path);
2624
2625 return(err);
2626}
2627
2628/** Determine whether a table can be accessed in operations that are
2629not (necessarily) protected by meta-data locks.
2630(Rollback would generally be protected, but rollback of
2631FOREIGN KEY CASCADE/SET NULL is not protected by meta-data locks
2632but only by InnoDB table locks, which may be broken by TRUNCATE TABLE.)
2633@param[in] table persistent table
2634checked @return whether the table is accessible */
2635bool
2636fil_table_accessible(const dict_table_t* table)
2637{
2638 if (UNIV_UNLIKELY(!table->is_readable() || table->corrupted)) {
2639 return(false);
2640 }
2641
2642 mutex_enter(&fil_system.mutex);
2643 bool accessible = table->space && !table->space->is_stopping();
2644 mutex_exit(&fil_system.mutex);
2645 ut_ad(accessible || dict_table_is_file_per_table(table));
2646 return accessible;
2647}
2648
2649/** Delete a tablespace and associated .ibd file.
2650@param[in] id tablespace identifier
2651@return DB_SUCCESS or error */
2652dberr_t
2653fil_delete_tablespace(
2654 ulint id
2655#ifdef BTR_CUR_HASH_ADAPT
2656 , bool drop_ahi /*!< whether to drop the adaptive hash index */
2657#endif /* BTR_CUR_HASH_ADAPT */
2658 )
2659{
2660 char* path = 0;
2661 fil_space_t* space = 0;
2662
2663 ut_a(!is_system_tablespace(id));
2664
2665 dberr_t err = fil_check_pending_operations(
2666 id, FIL_OPERATION_DELETE, &space, &path);
2667
2668 if (err != DB_SUCCESS) {
2669
2670 ib::error() << "Cannot delete tablespace " << id
2671 << " because it is not found in the tablespace"
2672 " memory cache.";
2673
2674 return(err);
2675 }
2676
2677 ut_a(space);
2678 ut_a(path != 0);
2679
2680 /* IMPORTANT: Because we have set space::stop_new_ops there
2681 can't be any new ibuf merges, reads or flushes. We are here
2682 because node::n_pending was zero above. However, it is still
2683 possible to have pending read and write requests:
2684
2685 A read request can happen because the reader thread has
2686 gone through the ::stop_new_ops check in buf_page_init_for_read()
2687 before the flag was set and has not yet incremented ::n_pending
2688 when we checked it above.
2689
2690 A write request can be issued any time because we don't check
2691 the ::stop_new_ops flag when queueing a block for write.
2692
2693 We deal with pending write requests in the following function
2694 where we'd minimally evict all dirty pages belonging to this
2695 space from the flush_list. Note that if a block is IO-fixed
2696 we'll wait for IO to complete.
2697
2698 To deal with potential read requests, we will check the
2699 ::stop_new_ops flag in fil_io(). */
2700
2701 buf_LRU_flush_or_remove_pages(id, NULL
2702#ifdef BTR_CUR_HASH_ADAPT
2703 , drop_ahi
2704#endif /* BTR_CUR_HASH_ADAPT */
2705 );
2706
2707 /* If it is a delete then also delete any generated files, otherwise
2708 when we drop the database the remove directory will fail. */
2709 {
2710 /* Before deleting the file, write a log record about
2711 it, so that InnoDB crash recovery will expect the file
2712 to be gone. */
2713 mtr_t mtr;
2714
2715 mtr_start(&mtr);
2716 fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, 0, &mtr);
2717 mtr_commit(&mtr);
2718 /* Even if we got killed shortly after deleting the
2719 tablespace file, the record must have already been
2720 written to the redo log. */
2721 log_write_up_to(mtr.commit_lsn(), true);
2722
2723 char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
2724 if (cfg_name != NULL) {
2725 os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
2726 ut_free(cfg_name);
2727 }
2728 }
2729
2730 /* Delete the link file pointing to the ibd file we are deleting. */
2731 if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
2732 RemoteDatafile::delete_link_file(space->name);
2733 }
2734
2735 mutex_enter(&fil_system.mutex);
2736
2737 /* Double check the sanity of pending ops after reacquiring
2738 the fil_system::mutex. */
2739 if (const fil_space_t* s = fil_space_get_by_id(id)) {
2740 ut_a(s == space);
2741 ut_a(!space->referenced());
2742 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2743 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2744 ut_a(node->n_pending == 0);
2745
2746 fil_space_detach(space);
2747 mutex_exit(&fil_system.mutex);
2748
2749 log_mutex_enter();
2750
2751 if (space->max_lsn != 0) {
2752 ut_d(space->max_lsn = 0);
2753 UT_LIST_REMOVE(fil_system.named_spaces, space);
2754 }
2755
2756 log_mutex_exit();
2757 fil_space_free_low(space);
2758
2759 if (!os_file_delete(innodb_data_file_key, path)
2760 && !os_file_delete_if_exists(
2761 innodb_data_file_key, path, NULL)) {
2762
2763 /* Note: This is because we have removed the
2764 tablespace instance from the cache. */
2765
2766 err = DB_IO_ERROR;
2767 }
2768 } else {
2769 mutex_exit(&fil_system.mutex);
2770 err = DB_TABLESPACE_NOT_FOUND;
2771 }
2772
2773 ut_free(path);
2774
2775 return(err);
2776}
2777
2778/** Truncate the tablespace to needed size.
2779@param[in,out] space tablespace truncate
2780@param[in] size_in_pages truncate size.
2781@return true if truncate was successful. */
2782bool fil_truncate_tablespace(fil_space_t* space, ulint size_in_pages)
2783{
2784 /* Step-1: Prepare tablespace for truncate. This involves
2785 stopping all the new operations + IO on that tablespace
2786 and ensuring that related pages are flushed to disk. */
2787 if (fil_prepare_for_truncate(space->id) != DB_SUCCESS) {
2788 return(false);
2789 }
2790
2791 /* Step-2: Invalidate buffer pool pages belonging to the tablespace
2792 to re-create. Remove all insert buffer entries for the tablespace */
2793 buf_LRU_flush_or_remove_pages(space->id, NULL);
2794
2795 /* Step-3: Truncate the tablespace and accordingly update
2796 the fil_space_t handler that is used to access this tablespace. */
2797 mutex_enter(&fil_system.mutex);
2798
2799 /* The following code must change when InnoDB supports
2800 multiple datafiles per tablespace. */
2801 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2802
2803 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2804
2805 ut_ad(node->is_open());
2806
2807 space->size = node->size = size_in_pages;
2808
2809 bool success = os_file_truncate(node->name, node->handle, 0);
2810 if (success) {
2811
2812 os_offset_t size = os_offset_t(size_in_pages)
2813 << srv_page_size_shift;
2814
2815 success = os_file_set_size(
2816 node->name, node->handle, size,
2817 FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
2818
2819 if (success) {
2820 space->stop_new_ops = false;
2821 space->is_being_truncated = false;
2822 }
2823 }
2824
2825 mutex_exit(&fil_system.mutex);
2826
2827 return(success);
2828}
2829
2830/*******************************************************************//**
2831Prepare for truncating a single-table tablespace.
28321) Check pending operations on a tablespace;
28332) Remove all insert buffer entries for the tablespace;
2834@return DB_SUCCESS or error */
2835dberr_t
2836fil_prepare_for_truncate(
2837/*=====================*/
2838 ulint id) /*!< in: space id */
2839{
2840 char* path = 0;
2841 fil_space_t* space = 0;
2842
2843 ut_a(!is_system_tablespace(id));
2844
2845 dberr_t err = fil_check_pending_operations(
2846 id, FIL_OPERATION_TRUNCATE, &space, &path);
2847
2848 ut_free(path);
2849
2850 if (err == DB_TABLESPACE_NOT_FOUND) {
2851 ib::error() << "Cannot truncate tablespace " << id
2852 << " because it is not found in the tablespace"
2853 " memory cache.";
2854 }
2855
2856 return(err);
2857}
2858
2859/*******************************************************************//**
2860Allocates and builds a file name from a path, a table or tablespace name
2861and a suffix. The string must be freed by caller with ut_free().
2862@param[in] path NULL or the direcory path or the full path and filename.
2863@param[in] name NULL if path is full, or Table/Tablespace name
2864@param[in] suffix NULL or the file extention to use.
2865@param[in] trim_name true if the last name on the path should be trimmed.
2866@return own: file name */
2867char*
2868fil_make_filepath(
2869 const char* path,
2870 const char* name,
2871 ib_extention ext,
2872 bool trim_name)
2873{
2874 /* The path may contain the basename of the file, if so we do not
2875 need the name. If the path is NULL, we can use the default path,
2876 but there needs to be a name. */
2877 ut_ad(path != NULL || name != NULL);
2878
2879 /* If we are going to strip a name off the path, there better be a
2880 path and a new name to put back on. */
2881 ut_ad(!trim_name || (path != NULL && name != NULL));
2882
2883 if (path == NULL) {
2884 path = fil_path_to_mysql_datadir;
2885 }
2886
2887 ulint len = 0; /* current length */
2888 ulint path_len = strlen(path);
2889 ulint name_len = (name ? strlen(name) : 0);
2890 const char* suffix = dot_ext[ext];
2891 ulint suffix_len = strlen(suffix);
2892 ulint full_len = path_len + 1 + name_len + suffix_len + 1;
2893
2894 char* full_name = static_cast<char*>(ut_malloc_nokey(full_len));
2895 if (full_name == NULL) {
2896 return NULL;
2897 }
2898
2899 /* If the name is a relative path, do not prepend "./". */
2900 if (path[0] == '.'
2901 && (path[1] == '\0' || path[1] == OS_PATH_SEPARATOR)
2902 && name != NULL && name[0] == '.') {
2903 path = NULL;
2904 path_len = 0;
2905 }
2906
2907 if (path != NULL) {
2908 memcpy(full_name, path, path_len);
2909 len = path_len;
2910 full_name[len] = '\0';
2911 os_normalize_path(full_name);
2912 }
2913
2914 if (trim_name) {
2915 /* Find the offset of the last DIR separator and set it to
2916 null in order to strip off the old basename from this path. */
2917 char* last_dir_sep = strrchr(full_name, OS_PATH_SEPARATOR);
2918 if (last_dir_sep) {
2919 last_dir_sep[0] = '\0';
2920 len = strlen(full_name);
2921 }
2922 }
2923
2924 if (name != NULL) {
2925 if (len && full_name[len - 1] != OS_PATH_SEPARATOR) {
2926 /* Add a DIR separator */
2927 full_name[len] = OS_PATH_SEPARATOR;
2928 full_name[++len] = '\0';
2929 }
2930
2931 char* ptr = &full_name[len];
2932 memcpy(ptr, name, name_len);
2933 len += name_len;
2934 full_name[len] = '\0';
2935 os_normalize_path(ptr);
2936 }
2937
2938 /* Make sure that the specified suffix is at the end of the filepath
2939 string provided. This assumes that the suffix starts with '.'.
2940 If the first char of the suffix is found in the filepath at the same
2941 length as the suffix from the end, then we will assume that there is
2942 a previous suffix that needs to be replaced. */
2943 if (suffix != NULL) {
2944 /* Need room for the trailing null byte. */
2945 ut_ad(len < full_len);
2946
2947 if ((len > suffix_len)
2948 && (full_name[len - suffix_len] == suffix[0])) {
2949 /* Another suffix exists, make it the one requested. */
2950 memcpy(&full_name[len - suffix_len], suffix, suffix_len);
2951
2952 } else {
2953 /* No previous suffix, add it. */
2954 ut_ad(len + suffix_len < full_len);
2955 memcpy(&full_name[len], suffix, suffix_len);
2956 full_name[len + suffix_len] = '\0';
2957 }
2958 }
2959
2960 return(full_name);
2961}
2962
2963/** Test if a tablespace file can be renamed to a new filepath by checking
2964if that the old filepath exists and the new filepath does not exist.
2965@param[in] old_path old filepath
2966@param[in] new_path new filepath
2967@param[in] is_discarded whether the tablespace is discarded
2968@return innodb error code */
2969static dberr_t
2970fil_rename_tablespace_check(
2971 const char* old_path,
2972 const char* new_path,
2973 bool is_discarded)
2974{
2975 bool exists = false;
2976 os_file_type_t ftype;
2977
2978 if (!is_discarded
2979 && os_file_status(old_path, &exists, &ftype)
2980 && !exists) {
2981 ib::error() << "Cannot rename '" << old_path
2982 << "' to '" << new_path
2983 << "' because the source file"
2984 << " does not exist.";
2985 return(DB_TABLESPACE_NOT_FOUND);
2986 }
2987
2988 exists = false;
2989 if (!os_file_status(new_path, &exists, &ftype) || exists) {
2990 ib::error() << "Cannot rename '" << old_path
2991 << "' to '" << new_path
2992 << "' because the target file exists."
2993 " Remove the target file and try again.";
2994 return(DB_TABLESPACE_EXISTS);
2995 }
2996
2997 return(DB_SUCCESS);
2998}
2999
3000dberr_t fil_space_t::rename(const char* name, const char* path, bool log)
3001{
3002 ut_ad(UT_LIST_GET_LEN(chain) == 1);
3003 ut_ad(!is_system_tablespace(id));
3004
3005 if (log) {
3006 dberr_t err = fil_rename_tablespace_check(
3007 chain.start->name, path, false);
3008 if (err != DB_SUCCESS) {
3009 return(err);
3010 }
3011 fil_name_write_rename(id, chain.start->name, path);
3012 }
3013
3014 return fil_rename_tablespace(id, chain.start->name, name, path)
3015 ? DB_SUCCESS : DB_ERROR;
3016}
3017
3018/** Rename a single-table tablespace.
3019The tablespace must exist in the memory cache.
3020@param[in] id tablespace identifier
3021@param[in] old_path old file name
3022@param[in] new_name new table name in the
3023databasename/tablename format
3024@param[in] new_path_in new file name,
3025or NULL if it is located in the normal data directory
3026@return true if success */
3027static bool
3028fil_rename_tablespace(
3029 ulint id,
3030 const char* old_path,
3031 const char* new_name,
3032 const char* new_path_in)
3033{
3034 bool sleep = false;
3035 bool flush = false;
3036 fil_space_t* space;
3037 fil_node_t* node;
3038 ulint count = 0;
3039 ut_a(id != 0);
3040
3041 ut_ad(strchr(new_name, '/') != NULL);
3042retry:
3043 count++;
3044
3045 if (!(count % 1000)) {
3046 ib::warn() << "Cannot rename file " << old_path
3047 << " (space id " << id << "), retried " << count
3048 << " times."
3049 " There are either pending IOs or flushes or"
3050 " the file is being extended.";
3051 }
3052
3053 mutex_enter(&fil_system.mutex);
3054
3055 space = fil_space_get_by_id(id);
3056
3057 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
3058
3059 if (space == NULL) {
3060 ib::error() << "Cannot find space id " << id
3061 << " in the tablespace memory cache, though the file '"
3062 << old_path
3063 << "' in a rename operation should have that id.";
3064func_exit:
3065 mutex_exit(&fil_system.mutex);
3066 return(false);
3067 }
3068
3069 if (count > 25000) {
3070 space->stop_ios = false;
3071 goto func_exit;
3072 }
3073
3074 /* We temporarily close the .ibd file because we do not trust that
3075 operating systems can rename an open file. For the closing we have to
3076 wait until there are no pending i/o's or flushes on the file. */
3077
3078 space->stop_ios = true;
3079
3080 /* The following code must change when InnoDB supports
3081 multiple datafiles per tablespace. */
3082 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3083 node = UT_LIST_GET_FIRST(space->chain);
3084
3085 if (node->n_pending > 0
3086 || node->n_pending_flushes > 0
3087 || node->being_extended) {
3088 /* There are pending i/o's or flushes or the file is
3089 currently being extended, sleep for a while and
3090 retry */
3091 sleep = true;
3092 } else if (node->modification_counter > node->flush_counter) {
3093 /* Flush the space */
3094 sleep = flush = true;
3095 } else if (node->is_open()) {
3096 /* Close the file */
3097
3098 fil_node_close_file(node);
3099 }
3100
3101 mutex_exit(&fil_system.mutex);
3102
3103 if (sleep) {
3104 os_thread_sleep(20000);
3105
3106 if (flush) {
3107 fil_flush(id);
3108 }
3109
3110 sleep = flush = false;
3111 goto retry;
3112 }
3113 ut_ad(space->stop_ios);
3114 char* new_file_name = new_path_in == NULL
3115 ? fil_make_filepath(NULL, new_name, IBD, false)
3116 : mem_strdup(new_path_in);
3117 char* old_file_name = node->name;
3118 char* new_space_name = mem_strdup(new_name);
3119 char* old_space_name = space->name;
3120
3121 ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL);
3122 ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);
3123
3124 if (!recv_recovery_on) {
3125 fil_name_write_rename(id, old_file_name, new_file_name);
3126 log_mutex_enter();
3127 }
3128
3129 /* log_sys.mutex is above fil_system.mutex in the latching order */
3130 ut_ad(log_mutex_own());
3131 mutex_enter(&fil_system.mutex);
3132 ut_ad(space->name == old_space_name);
3133 ut_ad(node->name == old_file_name);
3134
3135 bool success;
3136
3137 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3138 goto skip_rename; );
3139
3140 success = os_file_rename(
3141 innodb_data_file_key, old_file_name, new_file_name);
3142
3143 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3144 skip_rename: success = false; );
3145
3146 ut_ad(node->name == old_file_name);
3147
3148 if (success) {
3149 node->name = new_file_name;
3150 }
3151
3152 if (!recv_recovery_on) {
3153 log_mutex_exit();
3154 }
3155
3156 ut_ad(space->name == old_space_name);
3157 if (success) {
3158 space->name = new_space_name;
3159 } else {
3160 /* Because nothing was renamed, we must free the new
3161 names, not the old ones. */
3162 old_file_name = new_file_name;
3163 old_space_name = new_space_name;
3164 }
3165
3166 ut_ad(space->stop_ios);
3167 space->stop_ios = false;
3168 mutex_exit(&fil_system.mutex);
3169
3170 ut_free(old_file_name);
3171 ut_free(old_space_name);
3172
3173 return(success);
3174}
3175
3176/** Create a tablespace file.
3177@param[in] space_id Tablespace ID
3178@param[in] name Tablespace name in dbname/tablename format.
3179@param[in] path Path and filename of the datafile to create.
3180@param[in] flags Tablespace flags
3181@param[in] size Initial size of the tablespace file in pages,
3182must be >= FIL_IBD_FILE_INITIAL_SIZE
3183@param[in] mode MariaDB encryption mode
3184@param[in] key_id MariaDB encryption key_id
3185@param[out] err DB_SUCCESS or error code
3186@return the created tablespace
3187@retval NULL on error */
3188fil_space_t*
3189fil_ibd_create(
3190 ulint space_id,
3191 const char* name,
3192 const char* path,
3193 ulint flags,
3194 ulint size,
3195 fil_encryption_t mode,
3196 uint32_t key_id,
3197 dberr_t* err)
3198{
3199 pfs_os_file_t file;
3200 byte* buf2;
3201 byte* page;
3202 bool success;
3203 bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags) != 0;
3204 fil_space_t* space = NULL;
3205 fil_space_crypt_t *crypt_data = NULL;
3206
3207 ut_ad(!is_system_tablespace(space_id));
3208 ut_ad(!srv_read_only_mode);
3209 ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
3210 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
3211 ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, space_id));
3212
3213 /* Create the subdirectories in the path, if they are
3214 not there already. */
3215 *err = os_file_create_subdirs_if_needed(path);
3216 if (*err != DB_SUCCESS) {
3217 return NULL;
3218 }
3219
3220 file = os_file_create(
3221 innodb_data_file_key, path,
3222 OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
3223 OS_FILE_NORMAL,
3224 OS_DATA_FILE,
3225 srv_read_only_mode,
3226 &success);
3227
3228 if (!success) {
3229 /* The following call will print an error message */
3230 switch (os_file_get_last_error(true)) {
3231 case OS_FILE_ALREADY_EXISTS:
3232 ib::info() << "The file '" << path << "'"
3233 " already exists though the"
3234 " corresponding table did not exist"
3235 " in the InnoDB data dictionary."
3236 " You can resolve the problem by removing"
3237 " the file.";
3238 *err = DB_TABLESPACE_EXISTS;
3239 break;
3240 case OS_FILE_DISK_FULL:
3241 *err = DB_OUT_OF_FILE_SPACE;
3242 break;
3243 default:
3244 *err = DB_ERROR;
3245 }
3246 ib::error() << "Cannot create file '" << path << "'";
3247 return NULL;
3248 }
3249
3250 const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
3251
3252#ifdef _WIN32
3253 if (is_compressed) {
3254 os_file_set_sparse_win32(file);
3255 }
3256#endif
3257
3258 if (!os_file_set_size(
3259 path, file,
3260 os_offset_t(size) << srv_page_size_shift, is_compressed)) {
3261 *err = DB_OUT_OF_FILE_SPACE;
3262err_exit:
3263 os_file_close(file);
3264 os_file_delete(innodb_data_file_key, path);
3265 return NULL;
3266 }
3267
3268 bool punch_hole = os_is_sparse_file_supported(file);
3269
3270 ulint block_size = os_file_get_block_size(file, path);
3271
3272 /* We have to write the space id to the file immediately and flush the
3273 file to disk. This is because in crash recovery we must be aware what
3274 tablespaces exist and what are their space id's, so that we can apply
3275 the log records to the right file. It may take quite a while until
3276 buffer pool flush algorithms write anything to the file and flush it to
3277 disk. If we would not write here anything, the file would be filled
3278 with zeros from the call of os_file_set_size(), until a buffer pool
3279 flush would write to it. */
3280
3281 buf2 = static_cast<byte*>(ut_malloc_nokey(3U << srv_page_size_shift));
3282 /* Align the memory for file i/o if we might have O_DIRECT set */
3283 page = static_cast<byte*>(ut_align(buf2, srv_page_size));
3284
3285 memset(page, '\0', srv_page_size);
3286
3287 flags |= FSP_FLAGS_PAGE_SSIZE();
3288 fsp_header_init_fields(page, space_id, flags);
3289 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
3290
3291 const page_size_t page_size(flags);
3292 IORequest request(IORequest::WRITE);
3293
3294 if (!page_size.is_compressed()) {
3295
3296 buf_flush_init_for_writing(NULL, page, NULL, 0);
3297
3298 *err = os_file_write(
3299 request, path, file, page, 0, page_size.physical());
3300 } else {
3301 page_zip_des_t page_zip;
3302 page_zip_set_size(&page_zip, page_size.physical());
3303 page_zip.data = page + srv_page_size;
3304#ifdef UNIV_DEBUG
3305 page_zip.m_start =
3306#endif /* UNIV_DEBUG */
3307 page_zip.m_end = page_zip.m_nonempty =
3308 page_zip.n_blobs = 0;
3309
3310 buf_flush_init_for_writing(NULL, page, &page_zip, 0);
3311
3312 *err = os_file_write(
3313 request, path, file, page_zip.data, 0,
3314 page_size.physical());
3315 }
3316
3317 ut_free(buf2);
3318
3319 if (*err != DB_SUCCESS) {
3320 ib::error()
3321 << "Could not write the first page to"
3322 << " tablespace '" << path << "'";
3323 goto err_exit;
3324 }
3325
3326 if (!os_file_flush(file)) {
3327 ib::error() << "File flush of tablespace '"
3328 << path << "' failed";
3329 *err = DB_ERROR;
3330 goto err_exit;
3331 }
3332
3333 if (has_data_dir) {
3334 /* Make the ISL file if the IBD file is not
3335 in the default location. */
3336 *err = RemoteDatafile::create_link_file(name, path);
3337 if (*err != DB_SUCCESS) {
3338 goto err_exit;
3339 }
3340 }
3341
3342 /* Create crypt data if the tablespace is either encrypted or user has
3343 requested it to remain unencrypted. */
3344 if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
3345 srv_encrypt_tables) {
3346 crypt_data = fil_space_create_crypt_data(mode, key_id);
3347 }
3348
3349 space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE,
3350 crypt_data, mode);
3351 if (!space) {
3352 free(crypt_data);
3353 *err = DB_ERROR;
3354 } else {
3355 fil_node_t* node = fil_node_create_low(path, size, space,
3356 false, true);
3357 mtr_t mtr;
3358 mtr.start();
3359 fil_op_write_log(
3360 MLOG_FILE_CREATE2, space_id, 0, node->name,
3361 NULL, space->flags & ~FSP_FLAGS_MEM_MASK, &mtr);
3362 fil_name_write(space, 0, node, &mtr);
3363 mtr.commit();
3364
3365 node->block_size = block_size;
3366 space->punch_hole = punch_hole;
3367
3368 *err = DB_SUCCESS;
3369 }
3370
3371 os_file_close(file);
3372
3373 if (*err != DB_SUCCESS) {
3374 if (has_data_dir) {
3375 RemoteDatafile::delete_link_file(name);
3376 }
3377
3378 os_file_delete(innodb_data_file_key, path);
3379 }
3380
3381 return space;
3382}
3383
3384/** Try to open a single-table tablespace and optionally check that the
3385space id in it is correct. If this does not succeed, print an error message
3386to the .err log. This function is used to open a tablespace when we start
3387mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
3388
3389NOTE that we assume this operation is used either at the database startup
3390or under the protection of the dictionary mutex, so that two users cannot
3391race here. This operation does not leave the file associated with the
3392tablespace open, but closes it after we have looked at the space id in it.
3393
3394If the validate boolean is set, we read the first page of the file and
3395check that the space id in the file is what we expect. We assume that
3396this function runs much faster if no check is made, since accessing the
3397file inode probably is much faster (the OS caches them) than accessing
3398the first page of the file. This boolean may be initially false, but if
3399a remote tablespace is found it will be changed to true.
3400
3401If the fix_dict boolean is set, then it is safe to use an internal SQL
3402statement to update the dictionary tables if they are incorrect.
3403
3404@param[in] validate true if we should validate the tablespace
3405@param[in] fix_dict true if the dictionary is available to be fixed
3406@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
3407@param[in] id tablespace ID
3408@param[in] flags expected FSP_SPACE_FLAGS
3409@param[in] space_name tablespace name of the datafile
3410If file-per-table, it is the table name in the databasename/tablename format
3411@param[in] path_in expected filepath, usually read from dictionary
3412@param[out] err DB_SUCCESS or error code
3413@return tablespace
3414@retval NULL if the tablespace could not be opened */
3415fil_space_t*
3416fil_ibd_open(
3417 bool validate,
3418 bool fix_dict,
3419 fil_type_t purpose,
3420 ulint id,
3421 ulint flags,
3422 const table_name_t& tablename,
3423 const char* path_in,
3424 dberr_t* err)
3425{
3426 mutex_enter(&fil_system.mutex);
3427 if (fil_space_t* space = fil_space_get_by_id(id)) {
3428 if (strcmp(space->name, tablename.m_name)) {
3429 table_name_t space_name;
3430 space_name.m_name = space->name;
3431 ib::error()
3432 << "Trying to open table " << tablename
3433 << " with id " << id
3434 << ", conflicting with " << space_name;
3435 space = NULL;
3436 if (err) *err = DB_TABLESPACE_EXISTS;
3437 } else if (err) *err = DB_SUCCESS;
3438
3439 mutex_exit(&fil_system.mutex);
3440
3441 if (space && validate && !srv_read_only_mode) {
3442 fsp_flags_try_adjust(space,
3443 flags & ~FSP_FLAGS_MEM_MASK);
3444 }
3445
3446 return space;
3447 }
3448 mutex_exit(&fil_system.mutex);
3449
3450 bool dict_filepath_same_as_default = false;
3451 bool link_file_found = false;
3452 bool link_file_is_bad = false;
3453 Datafile df_default; /* default location */
3454 Datafile df_dict; /* dictionary location */
3455 RemoteDatafile df_remote; /* remote location */
3456 ulint tablespaces_found = 0;
3457 ulint valid_tablespaces_found = 0;
3458
3459 ut_ad(!fix_dict || rw_lock_own(dict_operation_lock, RW_LOCK_X));
3460
3461 ut_ad(!fix_dict || mutex_own(&dict_sys->mutex));
3462 ut_ad(!fix_dict || !srv_read_only_mode);
3463 ut_ad(!fix_dict || srv_log_file_size != 0);
3464 ut_ad(fil_type_is_data(purpose));
3465
3466 /* Table flags can be ULINT_UNDEFINED if
3467 dict_tf_to_fsp_flags_failure is set. */
3468 if (flags == ULINT_UNDEFINED) {
3469corrupted:
3470 if (err) *err = DB_CORRUPTION;
3471 return NULL;
3472 }
3473
3474 ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK, id));
3475 df_default.init(tablename.m_name, flags);
3476 df_dict.init(tablename.m_name, flags);
3477 df_remote.init(tablename.m_name, flags);
3478
3479 /* Discover the correct file by looking in three possible locations
3480 while avoiding unecessary effort. */
3481
3482 /* We will always look for an ibd in the default location. */
3483 df_default.make_filepath(NULL, tablename.m_name, IBD);
3484
3485 /* Look for a filepath embedded in an ISL where the default file
3486 would be. */
3487 if (df_remote.open_read_only(true) == DB_SUCCESS) {
3488 ut_ad(df_remote.is_open());
3489
3490 /* Always validate a file opened from an ISL pointer */
3491 validate = true;
3492 ++tablespaces_found;
3493 link_file_found = true;
3494 } else if (df_remote.filepath() != NULL) {
3495 /* An ISL file was found but contained a bad filepath in it.
3496 Better validate anything we do find. */
3497 validate = true;
3498 }
3499
3500 /* Attempt to open the tablespace at the dictionary filepath. */
3501 if (path_in) {
3502 if (df_default.same_filepath_as(path_in)) {
3503 dict_filepath_same_as_default = true;
3504 } else {
3505 /* Dict path is not the default path. Always validate
3506 remote files. If default is opened, it was moved. */
3507 validate = true;
3508 df_dict.set_filepath(path_in);
3509 if (df_dict.open_read_only(true) == DB_SUCCESS) {
3510 ut_ad(df_dict.is_open());
3511 ++tablespaces_found;
3512 }
3513 }
3514 }
3515
3516 /* Always look for a file at the default location. But don't log
3517 an error if the tablespace is already open in remote or dict. */
3518 ut_a(df_default.filepath());
3519 const bool strict = (tablespaces_found == 0);
3520 if (df_default.open_read_only(strict) == DB_SUCCESS) {
3521 ut_ad(df_default.is_open());
3522 ++tablespaces_found;
3523 }
3524
3525 /* Check if multiple locations point to the same file. */
3526 if (tablespaces_found > 1 && df_default.same_as(df_remote)) {
3527 /* A link file was found with the default path in it.
3528 Use the default path and delete the link file. */
3529 --tablespaces_found;
3530 df_remote.delete_link_file();
3531 df_remote.close();
3532 }
3533 if (tablespaces_found > 1 && df_default.same_as(df_dict)) {
3534 --tablespaces_found;
3535 df_dict.close();
3536 }
3537 if (tablespaces_found > 1 && df_remote.same_as(df_dict)) {
3538 --tablespaces_found;
3539 df_dict.close();
3540 }
3541
3542 /* We have now checked all possible tablespace locations and
3543 have a count of how many unique files we found. If things are
3544 normal, we only found 1. */
3545 /* For encrypted tablespace, we need to check the
3546 encryption in header of first page. */
3547 if (!validate && tablespaces_found == 1) {
3548 goto skip_validate;
3549 }
3550
3551 /* Read and validate the first page of these three tablespace
3552 locations, if found. */
3553 valid_tablespaces_found +=
3554 (df_remote.validate_to_dd(id, flags) == DB_SUCCESS);
3555
3556 valid_tablespaces_found +=
3557 (df_default.validate_to_dd(id, flags) == DB_SUCCESS);
3558
3559 valid_tablespaces_found +=
3560 (df_dict.validate_to_dd(id, flags) == DB_SUCCESS);
3561
3562 /* Make sense of these three possible locations.
3563 First, bail out if no tablespace files were found. */
3564 if (valid_tablespaces_found == 0) {
3565 os_file_get_last_error(true);
3566 ib::error() << "Could not find a valid tablespace file for `"
3567 << tablename << "`. " << TROUBLESHOOT_DATADICT_MSG;
3568 goto corrupted;
3569 }
3570 if (!validate) {
3571 goto skip_validate;
3572 }
3573
3574 /* Do not open any tablespaces if more than one tablespace with
3575 the correct space ID and flags were found. */
3576 if (tablespaces_found > 1) {
3577 ib::error() << "A tablespace for `" << tablename
3578 << "` has been found in multiple places;";
3579
3580 if (df_default.is_open()) {
3581 ib::error() << "Default location: "
3582 << df_default.filepath()
3583 << ", Space ID=" << df_default.space_id()
3584 << ", Flags=" << df_default.flags();
3585 }
3586 if (df_remote.is_open()) {
3587 ib::error() << "Remote location: "
3588 << df_remote.filepath()
3589 << ", Space ID=" << df_remote.space_id()
3590 << ", Flags=" << df_remote.flags();
3591 }
3592 if (df_dict.is_open()) {
3593 ib::error() << "Dictionary location: "
3594 << df_dict.filepath()
3595 << ", Space ID=" << df_dict.space_id()
3596 << ", Flags=" << df_dict.flags();
3597 }
3598
3599 /* Force-recovery will allow some tablespaces to be
3600 skipped by REDO if there was more than one file found.
3601 Unlike during the REDO phase of recovery, we now know
3602 if the tablespace is valid according to the dictionary,
3603 which was not available then. So if we did not force
3604 recovery and there is only one good tablespace, ignore
3605 any bad tablespaces. */
3606 if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
3607 ib::error() << "Will not open tablespace `"
3608 << tablename << "`";
3609
3610 /* If the file is not open it cannot be valid. */
3611 ut_ad(df_default.is_open() || !df_default.is_valid());
3612 ut_ad(df_dict.is_open() || !df_dict.is_valid());
3613 ut_ad(df_remote.is_open() || !df_remote.is_valid());
3614
3615 /* Having established that, this is an easy way to
3616 look for corrupted data files. */
3617 if (df_default.is_open() != df_default.is_valid()
3618 || df_dict.is_open() != df_dict.is_valid()
3619 || df_remote.is_open() != df_remote.is_valid()) {
3620 goto corrupted;
3621 }
3622error:
3623 if (err) *err = DB_ERROR;
3624 return NULL;
3625 }
3626
3627 /* There is only one valid tablespace found and we did
3628 not use srv_force_recovery during REDO. Use this one
3629 tablespace and clean up invalid tablespace pointers */
3630 if (df_default.is_open() && !df_default.is_valid()) {
3631 df_default.close();
3632 tablespaces_found--;
3633 }
3634
3635 if (df_dict.is_open() && !df_dict.is_valid()) {
3636 df_dict.close();
3637 /* Leave dict.filepath so that SYS_DATAFILES
3638 can be corrected below. */
3639 tablespaces_found--;
3640 }
3641
3642 if (df_remote.is_open() && !df_remote.is_valid()) {
3643 df_remote.close();
3644 tablespaces_found--;
3645 link_file_is_bad = true;
3646 }
3647 }
3648
3649 /* At this point, there should be only one filepath. */
3650 ut_a(tablespaces_found == 1);
3651 ut_a(valid_tablespaces_found == 1);
3652
3653 /* Only fix the dictionary at startup when there is only one thread.
3654 Calls to dict_load_table() can be done while holding other latches. */
3655 if (!fix_dict) {
3656 goto skip_validate;
3657 }
3658
3659 /* We may need to update what is stored in SYS_DATAFILES or
3660 SYS_TABLESPACES or adjust the link file. Since a failure to
3661 update SYS_TABLESPACES or SYS_DATAFILES does not prevent opening
3662 and using the tablespace either this time or the next, we do not
3663 check the return code or fail to open the tablespace. But if it
3664 fails, dict_update_filepath() will issue a warning to the log. */
3665 if (df_dict.filepath()) {
3666 ut_ad(path_in != NULL);
3667 ut_ad(df_dict.same_filepath_as(path_in));
3668
3669 if (df_remote.is_open()) {
3670 if (!df_remote.same_filepath_as(path_in)) {
3671 dict_update_filepath(id, df_remote.filepath());
3672 }
3673
3674 } else if (df_default.is_open()) {
3675 ut_ad(!dict_filepath_same_as_default);
3676 dict_update_filepath(id, df_default.filepath());
3677 if (link_file_is_bad) {
3678 RemoteDatafile::delete_link_file(
3679 tablename.m_name);
3680 }
3681
3682 } else if (!link_file_found || link_file_is_bad) {
3683 ut_ad(df_dict.is_open());
3684 /* Fix the link file if we got our filepath
3685 from the dictionary but a link file did not
3686 exist or it did not point to a valid file. */
3687 RemoteDatafile::delete_link_file(tablename.m_name);
3688 RemoteDatafile::create_link_file(
3689 tablename.m_name, df_dict.filepath());
3690 }
3691
3692 } else if (df_remote.is_open()) {
3693 if (dict_filepath_same_as_default) {
3694 dict_update_filepath(id, df_remote.filepath());
3695
3696 } else if (path_in == NULL) {
3697 /* SYS_DATAFILES record for this space ID
3698 was not found. */
3699 dict_replace_tablespace_and_filepath(
3700 id, tablename.m_name,
3701 df_remote.filepath(), flags);
3702 }
3703
3704 } else if (df_default.is_open()) {
3705 /* We opened the tablespace in the default location.
3706 SYS_DATAFILES.PATH needs to be updated if it is different
3707 from this default path or if the SYS_DATAFILES.PATH was not
3708 supplied and it should have been. Also update the dictionary
3709 if we found an ISL file (since !df_remote.is_open). Since
3710 path_in is not suppled for file-per-table, we must assume
3711 that it matched the ISL. */
3712 if ((path_in != NULL && !dict_filepath_same_as_default)
3713 || (path_in == NULL && DICT_TF_HAS_DATA_DIR(flags))
3714 || df_remote.filepath() != NULL) {
3715 dict_replace_tablespace_and_filepath(
3716 id, tablename.m_name, df_default.filepath(),
3717 flags);
3718 }
3719 }
3720
3721skip_validate:
3722 const byte* first_page =
3723 df_default.is_open() ? df_default.get_first_page() :
3724 df_dict.is_open() ? df_dict.get_first_page() :
3725 df_remote.get_first_page();
3726
3727 fil_space_crypt_t* crypt_data = first_page
3728 ? fil_space_read_crypt_data(page_size_t(flags), first_page)
3729 : NULL;
3730
3731 fil_space_t* space = fil_space_create(
3732 tablename.m_name, id, flags, purpose, crypt_data);
3733
3734 /* We do not measure the size of the file, that is why
3735 we pass the 0 below */
3736
3737 if (fil_node_create_low(
3738 df_remote.is_open() ? df_remote.filepath() :
3739 df_dict.is_open() ? df_dict.filepath() :
3740 df_default.filepath(), 0, space, false,
3741 true) == NULL) {
3742 goto error;
3743 }
3744
3745 if (validate && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
3746 df_remote.close();
3747 df_dict.close();
3748 df_default.close();
3749 fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK);
3750 }
3751
3752 if (err) *err = DB_SUCCESS;
3753 return space;
3754}
3755
3756/** Looks for a pre-existing fil_space_t with the given tablespace ID
3757and, if found, returns the name and filepath in newly allocated buffers
3758that the caller must free.
3759@param[in] space_id The tablespace ID to search for.
3760@param[out] name Name of the tablespace found.
3761@param[out] filepath The filepath of the first datafile for the
3762tablespace.
3763@return true if tablespace is found, false if not. */
3764bool
3765fil_space_read_name_and_filepath(
3766 ulint space_id,
3767 char** name,
3768 char** filepath)
3769{
3770 bool success = false;
3771 *name = NULL;
3772 *filepath = NULL;
3773
3774 mutex_enter(&fil_system.mutex);
3775
3776 fil_space_t* space = fil_space_get_by_id(space_id);
3777
3778 if (space != NULL) {
3779 *name = mem_strdup(space->name);
3780
3781 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
3782 *filepath = mem_strdup(node->name);
3783
3784 success = true;
3785 }
3786
3787 mutex_exit(&fil_system.mutex);
3788
3789 return(success);
3790}
3791
3792/** Convert a file name to a tablespace name.
3793@param[in] filename directory/databasename/tablename.ibd
3794@return database/tablename string, to be freed with ut_free() */
3795char*
3796fil_path_to_space_name(
3797 const char* filename)
3798{
3799 /* Strip the file name prefix and suffix, leaving
3800 only databasename/tablename. */
3801 ulint filename_len = strlen(filename);
3802 const char* end = filename + filename_len;
3803#ifdef HAVE_MEMRCHR
3804 const char* tablename = 1 + static_cast<const char*>(
3805 memrchr(filename, OS_PATH_SEPARATOR,
3806 filename_len));
3807 const char* dbname = 1 + static_cast<const char*>(
3808 memrchr(filename, OS_PATH_SEPARATOR,
3809 tablename - filename - 1));
3810#else /* HAVE_MEMRCHR */
3811 const char* tablename = filename;
3812 const char* dbname = NULL;
3813
3814 while (const char* t = static_cast<const char*>(
3815 memchr(tablename, OS_PATH_SEPARATOR,
3816 ulint(end - tablename)))) {
3817 dbname = tablename;
3818 tablename = t + 1;
3819 }
3820#endif /* HAVE_MEMRCHR */
3821
3822 ut_ad(dbname != NULL);
3823 ut_ad(tablename > dbname);
3824 ut_ad(tablename < end);
3825 ut_ad(end - tablename > 4);
3826 ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0);
3827
3828 char* name = mem_strdupl(dbname, ulint(end - dbname) - 4);
3829
3830 ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR);
3831#if OS_PATH_SEPARATOR != '/'
3832 /* space->name uses '/', not OS_PATH_SEPARATOR. */
3833 name[tablename - dbname - 1] = '/';
3834#endif
3835
3836 return(name);
3837}
3838
3839/** Discover the correct IBD file to open given a remote or missing
3840filepath from the REDO log. Administrators can move a crashed
3841database to another location on the same machine and try to recover it.
3842Remote IBD files might be moved as well to the new location.
3843 The problem with this is that the REDO log contains the old location
3844which may be still accessible. During recovery, if files are found in
3845both locations, we can chose on based on these priorities;
38461. Default location
38472. ISL location
38483. REDO location
3849@param[in] space_id tablespace ID
3850@param[in] df Datafile object with path from redo
3851@return true if a valid datafile was found, false if not */
3852static
3853bool
3854fil_ibd_discover(
3855 ulint space_id,
3856 Datafile& df)
3857{
3858 Datafile df_def_per; /* default file-per-table datafile */
3859 RemoteDatafile df_rem_per; /* remote file-per-table datafile */
3860
3861 /* Look for the datafile in the default location. */
3862 const char* filename = df.filepath();
3863 const char* basename = base_name(filename);
3864
3865 /* If this datafile is file-per-table it will have a schema dir. */
3866 ulint sep_found = 0;
3867 const char* db = basename;
3868 for (; db > filename && sep_found < 2; db--) {
3869 if (db[0] == OS_PATH_SEPARATOR) {
3870 sep_found++;
3871 }
3872 }
3873 if (sep_found == 2) {
3874 db += 2;
3875 df_def_per.init(db, 0);
3876 df_def_per.make_filepath(NULL, db, IBD);
3877 if (df_def_per.open_read_only(false) == DB_SUCCESS
3878 && df_def_per.validate_for_recovery() == DB_SUCCESS
3879 && df_def_per.space_id() == space_id) {
3880 df.set_filepath(df_def_per.filepath());
3881 df.open_read_only(false);
3882 return(true);
3883 }
3884
3885 /* Look for a remote file-per-table tablespace. */
3886
3887 switch (srv_operation) {
3888 case SRV_OPERATION_BACKUP:
3889 case SRV_OPERATION_RESTORE_DELTA:
3890 ut_ad(0);
3891 break;
3892 case SRV_OPERATION_RESTORE_EXPORT:
3893 case SRV_OPERATION_RESTORE:
3894 break;
3895 case SRV_OPERATION_NORMAL:
3896 df_rem_per.set_name(db);
3897 if (df_rem_per.open_link_file() != DB_SUCCESS) {
3898 break;
3899 }
3900
3901 /* An ISL file was found with contents. */
3902 if (df_rem_per.open_read_only(false) != DB_SUCCESS
3903 || df_rem_per.validate_for_recovery()
3904 != DB_SUCCESS) {
3905
3906 /* Assume that this ISL file is intended to
3907 be used. Do not continue looking for another
3908 if this file cannot be opened or is not
3909 a valid IBD file. */
3910 ib::error() << "ISL file '"
3911 << df_rem_per.link_filepath()
3912 << "' was found but the linked file '"
3913 << df_rem_per.filepath()
3914 << "' could not be opened or is"
3915 " not correct.";
3916 return(false);
3917 }
3918
3919 /* Use this file if it has the space_id from the
3920 MLOG record. */
3921 if (df_rem_per.space_id() == space_id) {
3922 df.set_filepath(df_rem_per.filepath());
3923 df.open_read_only(false);
3924 return(true);
3925 }
3926
3927 /* Since old MLOG records can use the same basename
3928 in multiple CREATE/DROP TABLE sequences, this ISL
3929 file could be pointing to a later version of this
3930 basename.ibd file which has a different space_id.
3931 Keep looking. */
3932 }
3933 }
3934
3935 /* No ISL files were found in the default location. Use the location
3936 given in the redo log. */
3937 if (df.open_read_only(false) == DB_SUCCESS
3938 && df.validate_for_recovery() == DB_SUCCESS
3939 && df.space_id() == space_id) {
3940 return(true);
3941 }
3942
3943 /* A datafile was not discovered for the filename given. */
3944 return(false);
3945}
3946/** Open an ibd tablespace and add it to the InnoDB data structures.
3947This is similar to fil_ibd_open() except that it is used while processing
3948the REDO log, so the data dictionary is not available and very little
3949validation is done. The tablespace name is extracred from the
3950dbname/tablename.ibd portion of the filename, which assumes that the file
3951is a file-per-table tablespace. Any name will do for now. General
3952tablespace names will be read from the dictionary after it has been
3953recovered. The tablespace flags are read at this time from the first page
3954of the file in validate_for_recovery().
3955@param[in] space_id tablespace ID
3956@param[in] filename path/to/databasename/tablename.ibd
3957@param[out] space the tablespace, or NULL on error
3958@return status of the operation */
3959enum fil_load_status
3960fil_ibd_load(
3961 ulint space_id,
3962 const char* filename,
3963 fil_space_t*& space)
3964{
3965 /* If the a space is already in the file system cache with this
3966 space ID, then there is nothing to do. */
3967 mutex_enter(&fil_system.mutex);
3968 space = fil_space_get_by_id(space_id);
3969 mutex_exit(&fil_system.mutex);
3970
3971 if (space != NULL) {
3972 /* Compare the filename we are trying to open with the
3973 filename from the first node of the tablespace we opened
3974 previously. Fail if it is different. */
3975 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
3976 if (0 != strcmp(innobase_basename(filename),
3977 innobase_basename(node->name))) {
3978 ib::info()
3979 << "Ignoring data file '" << filename
3980 << "' with space ID " << space->id
3981 << ". Another data file called " << node->name
3982 << " exists with the same space ID.";
3983 space = NULL;
3984 return(FIL_LOAD_ID_CHANGED);
3985 }
3986 return(FIL_LOAD_OK);
3987 }
3988
3989 if (srv_operation == SRV_OPERATION_RESTORE) {
3990 /* Replace absolute DATA DIRECTORY file paths with
3991 short names relative to the backup directory. */
3992 if (const char* name = strrchr(filename, OS_PATH_SEPARATOR)) {
3993 while (--name > filename
3994 && *name != OS_PATH_SEPARATOR);
3995 if (name > filename) {
3996 filename = name + 1;
3997 }
3998 }
3999 }
4000
4001 Datafile file;
4002 file.set_filepath(filename);
4003 file.open_read_only(false);
4004
4005 if (!file.is_open()) {
4006 /* The file has been moved or it is a remote datafile. */
4007 if (!fil_ibd_discover(space_id, file)
4008 || !file.is_open()) {
4009 return(FIL_LOAD_NOT_FOUND);
4010 }
4011 }
4012
4013 os_offset_t size;
4014
4015 /* Read and validate the first page of the tablespace.
4016 Assign a tablespace name based on the tablespace type. */
4017 switch (file.validate_for_recovery()) {
4018 os_offset_t minimum_size;
4019 case DB_SUCCESS:
4020 if (file.space_id() != space_id) {
4021 ib::info()
4022 << "Ignoring data file '"
4023 << file.filepath()
4024 << "' with space ID " << file.space_id()
4025 << ", since the redo log references "
4026 << file.filepath() << " with space ID "
4027 << space_id << ".";
4028 return(FIL_LOAD_ID_CHANGED);
4029 }
4030 /* Get and test the file size. */
4031 size = os_file_get_size(file.handle());
4032
4033 /* Every .ibd file is created >= 4 pages in size.
4034 Smaller files cannot be OK. */
4035 minimum_size = os_offset_t(FIL_IBD_FILE_INITIAL_SIZE)
4036 << srv_page_size_shift;
4037
4038 if (size == static_cast<os_offset_t>(-1)) {
4039 /* The following call prints an error message */
4040 os_file_get_last_error(true);
4041
4042 ib::error() << "Could not measure the size of"
4043 " single-table tablespace file '"
4044 << file.filepath() << "'";
4045 } else if (size < minimum_size) {
4046 ib::error() << "The size of tablespace file '"
4047 << file.filepath() << "' is only " << size
4048 << ", should be at least " << minimum_size
4049 << "!";
4050 } else {
4051 /* Everything is fine so far. */
4052 break;
4053 }
4054
4055 /* fall through */
4056
4057 case DB_TABLESPACE_EXISTS:
4058 return(FIL_LOAD_INVALID);
4059
4060 default:
4061 return(FIL_LOAD_NOT_FOUND);
4062 }
4063
4064 ut_ad(space == NULL);
4065
4066 /* Adjust the memory-based flags that would normally be set by
4067 dict_tf_to_fsp_flags(). In recovery, we have no data dictionary. */
4068 ulint flags = file.flags();
4069 if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) {
4070 flags |= page_zip_level
4071 << FSP_FLAGS_MEM_COMPRESSION_LEVEL;
4072 }
4073
4074 const byte* first_page = file.get_first_page();
4075 fil_space_crypt_t* crypt_data = first_page
4076 ? fil_space_read_crypt_data(page_size_t(flags), first_page)
4077 : NULL;
4078 space = fil_space_create(
4079 file.name(), space_id, flags, FIL_TYPE_TABLESPACE, crypt_data);
4080
4081 if (space == NULL) {
4082 return(FIL_LOAD_INVALID);
4083 }
4084
4085 ut_ad(space->id == file.space_id());
4086 ut_ad(space->id == space_id);
4087
4088 /* We do not use the size information we have about the file, because
4089 the rounding formula for extents and pages is somewhat complex; we
4090 let fil_node_open() do that task. */
4091
4092 if (!fil_node_create_low(file.filepath(), 0, space, false, false)) {
4093 ut_error;
4094 }
4095
4096 return(FIL_LOAD_OK);
4097}
4098
4099/***********************************************************************//**
4100A fault-tolerant function that tries to read the next file name in the
4101directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
4102idea is to read as much good data as we can and jump over bad data.
4103@return 0 if ok, -1 if error even after the retries, 1 if at the end
4104of the directory */
4105int
4106fil_file_readdir_next_file(
4107/*=======================*/
4108 dberr_t* err, /*!< out: this is set to DB_ERROR if an error
4109 was encountered, otherwise not changed */
4110 const char* dirname,/*!< in: directory name or path */
4111 os_file_dir_t dir, /*!< in: directory stream */
4112 os_file_stat_t* info) /*!< in/out: buffer where the
4113 info is returned */
4114{
4115 for (ulint i = 0; i < 100; i++) {
4116 int ret = os_file_readdir_next_file(dirname, dir, info);
4117
4118 if (ret != -1) {
4119
4120 return(ret);
4121 }
4122
4123 ib::error() << "os_file_readdir_next_file() returned -1 in"
4124 " directory " << dirname
4125 << ", crash recovery may have failed"
4126 " for some .ibd files!";
4127
4128 *err = DB_ERROR;
4129 }
4130
4131 return(-1);
4132}
4133
4134/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
4135(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
4136@param[in,out] space tablespace
4137@param[in] flags desired tablespace flags */
4138void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
4139{
4140 ut_ad(!srv_read_only_mode);
4141 ut_ad(fsp_flags_is_valid(flags, space->id));
4142 if (!space->size && (space->purpose != FIL_TYPE_TABLESPACE
4143 || !fil_space_get_size(space->id))) {
4144 return;
4145 }
4146 /* This code is executed during server startup while no
4147 connections are allowed. We do not need to protect against
4148 DROP TABLE by fil_space_acquire(). */
4149 mtr_t mtr;
4150 mtr.start();
4151 if (buf_block_t* b = buf_page_get(
4152 page_id_t(space->id, 0), page_size_t(flags),
4153 RW_X_LATCH, &mtr)) {
4154 ulint f = fsp_header_get_flags(b->frame);
4155 /* Suppress the message if only the DATA_DIR flag to differs. */
4156 if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) {
4157 ib::warn()
4158 << "adjusting FSP_SPACE_FLAGS of file '"
4159 << UT_LIST_GET_FIRST(space->chain)->name
4160 << "' from " << ib::hex(f)
4161 << " to " << ib::hex(flags);
4162 }
4163 if (f != flags) {
4164 mtr.set_named_space(space);
4165 mlog_write_ulint(FSP_HEADER_OFFSET
4166 + FSP_SPACE_FLAGS + b->frame,
4167 flags, MLOG_4BYTES, &mtr);
4168 }
4169 }
4170 mtr.commit();
4171}
4172
4173/** Determine if a matching tablespace exists in the InnoDB tablespace
4174memory cache. Note that if we have not done a crash recovery at the database
4175startup, there may be many tablespaces which are not yet in the memory cache.
4176@param[in] id Tablespace ID
4177@param[in] name Tablespace name used in fil_space_create().
4178@param[in] print_error_if_does_not_exist
4179 Print detailed error information to the
4180error log if a matching tablespace is not found from memory.
4181@param[in] table_flags table flags
4182@return the tablespace
4183@retval NULL if no matching tablespace exists in the memory cache */
4184fil_space_t*
4185fil_space_for_table_exists_in_mem(
4186 ulint id,
4187 const char* name,
4188 bool print_error_if_does_not_exist,
4189 ulint table_flags)
4190{
4191 const ulint expected_flags = dict_tf_to_fsp_flags(table_flags);
4192
4193 mutex_enter(&fil_system.mutex);
4194 if (fil_space_t* space = fil_space_get_by_id(id)) {
4195 if ((space->flags ^ expected_flags) & ~FSP_FLAGS_MEM_MASK) {
4196 goto func_exit;
4197 }
4198
4199 if (strcmp(space->name, name)) {
4200 ib::error() << "Table " << name
4201 << " in InnoDB data dictionary"
4202 " has tablespace id " << id
4203 << ", but the tablespace"
4204 " with that id has name " << space->name << "."
4205 " Have you deleted or moved .ibd files?";
4206 goto error_exit;
4207 }
4208
4209 /* Adjust the flags that are in FSP_FLAGS_MEM_MASK.
4210 FSP_SPACE_FLAGS will not be written back here. */
4211 space->flags = expected_flags;
4212 mutex_exit(&fil_system.mutex);
4213 if (!srv_read_only_mode) {
4214 fsp_flags_try_adjust(space, expected_flags
4215 & ~FSP_FLAGS_MEM_MASK);
4216 }
4217 return space;
4218 }
4219
4220 if (print_error_if_does_not_exist) {
4221 ib::error() << "Table " << name
4222 << " in the InnoDB data dictionary"
4223 " has tablespace id " << id
4224 << ", but tablespace with that id"
4225 " or name does not exist. Have"
4226 " you deleted or moved .ibd files?";
4227error_exit:
4228 ib::info() << TROUBLESHOOT_DATADICT_MSG;
4229 }
4230
4231func_exit:
4232 mutex_exit(&fil_system.mutex);
4233 return NULL;
4234}
4235
4236/*============================ FILE I/O ================================*/
4237
4238/********************************************************************//**
4239NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
4240
4241Prepares a file node for i/o. Opens the file if it is closed. Updates the
4242pending i/o's field in the node and the system appropriately. Takes the node
4243off the LRU list if it is in the LRU list. The caller must hold the fil_sys
4244mutex.
4245@return false if the file can't be opened, otherwise true */
4246static
4247bool
4248fil_node_prepare_for_io(
4249/*====================*/
4250 fil_node_t* node, /*!< in: file node */
4251 fil_space_t* space) /*!< in: space */
4252{
4253 ut_ad(node && space);
4254 ut_ad(mutex_own(&fil_system.mutex));
4255
4256 if (fil_system.n_open > srv_max_n_open_files + 5) {
4257 ib::warn() << "Open files " << fil_system.n_open
4258 << " exceeds the limit " << srv_max_n_open_files;
4259 }
4260
4261 if (!node->is_open()) {
4262 /* File is closed: open it */
4263 ut_a(node->n_pending == 0);
4264
4265 if (!fil_node_open_file(node)) {
4266 return(false);
4267 }
4268 }
4269
4270 if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
4271 /* The node is in the LRU list, remove it */
4272 ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
4273 UT_LIST_REMOVE(fil_system.LRU, node);
4274 }
4275
4276 node->n_pending++;
4277
4278 return(true);
4279}
4280
4281/** Update the data structures when an i/o operation finishes.
4282@param[in,out] node file node
4283@param[in] type IO context */
4284static
4285void
4286fil_node_complete_io(fil_node_t* node, const IORequest& type)
4287{
4288 ut_ad(mutex_own(&fil_system.mutex));
4289 ut_a(node->n_pending > 0);
4290
4291 --node->n_pending;
4292
4293 ut_ad(type.validate());
4294
4295 if (type.is_write()) {
4296
4297 ut_ad(!srv_read_only_mode
4298 || node->space->purpose == FIL_TYPE_TEMPORARY);
4299
4300 ++fil_system.modification_counter;
4301
4302 node->modification_counter = fil_system.modification_counter;
4303
4304 if (fil_buffering_disabled(node->space)) {
4305
4306 /* We don't need to keep track of unflushed
4307 changes as user has explicitly disabled
4308 buffering. */
4309 ut_ad(!node->space->is_in_unflushed_spaces);
4310 node->flush_counter = node->modification_counter;
4311
4312 } else if (!node->space->is_in_unflushed_spaces) {
4313
4314 node->space->is_in_unflushed_spaces = true;
4315
4316 UT_LIST_ADD_FIRST(
4317 fil_system.unflushed_spaces, node->space);
4318 }
4319 }
4320
4321 if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
4322
4323 /* The node must be put back to the LRU list */
4324 UT_LIST_ADD_FIRST(fil_system.LRU, node);
4325 }
4326}
4327
4328/** Report information about an invalid page access. */
4329static
4330void
4331fil_report_invalid_page_access(
4332 ulint block_offset, /*!< in: block offset */
4333 ulint space_id, /*!< in: space id */
4334 const char* space_name, /*!< in: space name */
4335 ulint byte_offset, /*!< in: byte offset */
4336 ulint len, /*!< in: I/O length */
4337 bool is_read) /*!< in: I/O type */
4338{
4339 ib::fatal()
4340 << "Trying to " << (is_read ? "read" : "write")
4341 << " page number " << block_offset << " in"
4342 " space " << space_id << ", space name " << space_name << ","
4343 " which is outside the tablespace bounds. Byte offset "
4344 << byte_offset << ", len " << len <<
4345 (space_id == 0 && !srv_was_started
4346 ? "Please check that the configuration matches"
4347 " the InnoDB system tablespace location (ibdata files)"
4348 : "");
4349}
4350
4351/** Reads or writes data. This operation could be asynchronous (aio).
4352
4353@param[in,out] type IO context
4354@param[in] sync true if synchronous aio is desired
4355@param[in] page_id page id
4356@param[in] page_size page size
4357@param[in] byte_offset remainder of offset in bytes; in aio this
4358 must be divisible by the OS block size
4359@param[in] len how many bytes to read or write; this must
4360 not cross a file boundary; in aio this must
4361 be a block size multiple
4362@param[in,out] buf buffer where to store read data or from where
4363 to write; in aio this must be appropriately
4364 aligned
4365@param[in] message message for aio handler if non-sync aio
4366 used, else ignored
4367@param[in] ignore_missing_space true=ignore missing space duging read
4368@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
4369 if we are trying to do i/o on a tablespace which does not exist */
4370dberr_t
4371fil_io(
4372 const IORequest& type,
4373 bool sync,
4374 const page_id_t& page_id,
4375 const page_size_t& page_size,
4376 ulint byte_offset,
4377 ulint len,
4378 void* buf,
4379 void* message,
4380 bool ignore_missing_space)
4381{
4382 os_offset_t offset;
4383 IORequest req_type(type);
4384
4385 ut_ad(req_type.validate());
4386
4387 ut_ad(len > 0);
4388 ut_ad(byte_offset < srv_page_size);
4389 ut_ad(!page_size.is_compressed() || byte_offset == 0);
4390 ut_ad(srv_page_size == 1UL << srv_page_size_shift);
4391 compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MAX)
4392 == UNIV_PAGE_SIZE_MAX);
4393 compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MIN)
4394 == UNIV_PAGE_SIZE_MIN);
4395 ut_ad(fil_validate_skip());
4396
4397 /* ibuf bitmap pages must be read in the sync AIO mode: */
4398 ut_ad(recv_no_ibuf_operations
4399 || req_type.is_write()
4400 || !ibuf_bitmap_page(page_id, page_size)
4401 || sync
4402 || req_type.is_log());
4403
4404 ulint mode;
4405
4406 if (sync) {
4407
4408 mode = OS_AIO_SYNC;
4409
4410 } else if (req_type.is_log()) {
4411
4412 mode = OS_AIO_LOG;
4413
4414 } else if (req_type.is_read()
4415 && !recv_no_ibuf_operations
4416 && ibuf_page(page_id, page_size, NULL)) {
4417
4418 mode = OS_AIO_IBUF;
4419
4420 /* Reduce probability of deadlock bugs in connection with ibuf:
4421 do not let the ibuf i/o handler sleep */
4422
4423 req_type.clear_do_not_wake();
4424 } else {
4425 mode = OS_AIO_NORMAL;
4426 }
4427
4428 if (req_type.is_read()) {
4429
4430 srv_stats.data_read.add(len);
4431
4432 } else if (req_type.is_write()) {
4433
4434 ut_ad(!srv_read_only_mode
4435 || fsp_is_system_temporary(page_id.space()));
4436
4437 srv_stats.data_written.add(len);
4438 }
4439
4440 /* Reserve the fil_system mutex and make sure that we can open at
4441 least one file while holding it, if the file is not already open */
4442
4443 fil_mutex_enter_and_prepare_for_io(page_id.space());
4444
4445 fil_space_t* space = fil_space_get_by_id(page_id.space());
4446
4447 /* If we are deleting a tablespace we don't allow async read operations
4448 on that. However, we do allow write operations and sync read operations. */
4449 if (space == NULL
4450 || (req_type.is_read()
4451 && !sync
4452 && space->stop_new_ops
4453 && !space->is_being_truncated)) {
4454
4455 mutex_exit(&fil_system.mutex);
4456
4457 if (!req_type.ignore_missing() && !ignore_missing_space) {
4458 ib::error()
4459 << "Trying to do I/O to a tablespace which"
4460 " does not exist. I/O type: "
4461 << (req_type.is_read() ? "read" : "write")
4462 << ", page: " << page_id
4463 << ", I/O length: " << len << " bytes";
4464 }
4465
4466 return(DB_TABLESPACE_DELETED);
4467 }
4468
4469 ut_ad(mode != OS_AIO_IBUF || fil_type_is_data(space->purpose));
4470
4471 ulint cur_page_no = page_id.page_no();
4472 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
4473
4474 for (;;) {
4475
4476 if (node == NULL) {
4477
4478 if (req_type.ignore_missing()) {
4479 mutex_exit(&fil_system.mutex);
4480 return(DB_ERROR);
4481 }
4482
4483 fil_report_invalid_page_access(
4484 page_id.page_no(), page_id.space(),
4485 space->name, byte_offset, len,
4486 req_type.is_read());
4487
4488 } else if (fil_is_user_tablespace_id(space->id)
4489 && node->size == 0) {
4490
4491 /* We do not know the size of a single-table tablespace
4492 before we open the file */
4493 break;
4494
4495 } else if (node->size > cur_page_no) {
4496 /* Found! */
4497 break;
4498
4499 } else {
4500 if (space->id != TRX_SYS_SPACE
4501 && UT_LIST_GET_LEN(space->chain) == 1
4502 && (srv_is_tablespace_truncated(space->id)
4503 || space->is_being_truncated
4504 || srv_was_tablespace_truncated(space))
4505 && req_type.is_read()) {
4506
4507 /* Handle page which is outside the truncated
4508 tablespace bounds when recovering from a crash
4509 happened during a truncation */
4510 mutex_exit(&fil_system.mutex);
4511 return(DB_TABLESPACE_TRUNCATED);
4512 }
4513
4514 cur_page_no -= node->size;
4515
4516 node = UT_LIST_GET_NEXT(chain, node);
4517 }
4518 }
4519
4520 /* Open file if closed */
4521 if (!fil_node_prepare_for_io(node, space)) {
4522 if (fil_type_is_data(space->purpose)
4523 && fil_is_user_tablespace_id(space->id)) {
4524 mutex_exit(&fil_system.mutex);
4525
4526 if (!req_type.ignore_missing()) {
4527 ib::error()
4528 << "Trying to do I/O to a tablespace"
4529 " which exists without .ibd data file."
4530 " I/O type: "
4531 << (req_type.is_read()
4532 ? "read" : "write")
4533 << ", page: "
4534 << page_id_t(page_id.space(),
4535 cur_page_no)
4536 << ", I/O length: " << len << " bytes";
4537 }
4538
4539 return(DB_TABLESPACE_DELETED);
4540 }
4541
4542 /* The tablespace is for log. Currently, we just assert here
4543 to prevent handling errors along the way fil_io returns.
4544 Also, if the log files are missing, it would be hard to
4545 promise the server can continue running. */
4546 ut_a(0);
4547 }
4548
4549 /* Check that at least the start offset is within the bounds of a
4550 single-table tablespace, including rollback tablespaces. */
4551 if (node->size <= cur_page_no
4552 && space->id != TRX_SYS_SPACE
4553 && fil_type_is_data(space->purpose)) {
4554
4555 if (req_type.ignore_missing()) {
4556 /* If we can tolerate the non-existent pages, we
4557 should return with DB_ERROR and let caller decide
4558 what to do. */
4559 fil_node_complete_io(node, req_type);
4560 mutex_exit(&fil_system.mutex);
4561 return(DB_ERROR);
4562 }
4563
4564 fil_report_invalid_page_access(
4565 page_id.page_no(), page_id.space(),
4566 space->name, byte_offset, len, req_type.is_read());
4567 }
4568
4569 /* Now we have made the changes in the data structures of fil_system */
4570 mutex_exit(&fil_system.mutex);
4571
4572 /* Calculate the low 32 bits and the high 32 bits of the file offset */
4573
4574 if (!page_size.is_compressed()) {
4575
4576 offset = ((os_offset_t) cur_page_no
4577 << srv_page_size_shift) + byte_offset;
4578
4579 ut_a(node->size - cur_page_no
4580 >= ((byte_offset + len + (srv_page_size - 1))
4581 >> srv_page_size_shift));
4582 } else {
4583 ulint size_shift;
4584
4585 switch (page_size.physical()) {
4586 case 1024: size_shift = 10; break;
4587 case 2048: size_shift = 11; break;
4588 case 4096: size_shift = 12; break;
4589 case 8192: size_shift = 13; break;
4590 case 16384: size_shift = 14; break;
4591 case 32768: size_shift = 15; break;
4592 case 65536: size_shift = 16; break;
4593 default: ut_error;
4594 }
4595
4596 offset = ((os_offset_t) cur_page_no << size_shift)
4597 + byte_offset;
4598
4599 ut_a(node->size - cur_page_no
4600 >= (len + (page_size.physical() - 1))
4601 / page_size.physical());
4602 }
4603
4604 /* Do AIO */
4605
4606 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4607 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4608
4609 const char* name = node->name == NULL ? space->name : node->name;
4610
4611 req_type.set_fil_node(node);
4612
4613 /* Queue the aio request */
4614 dberr_t err = os_aio(
4615 req_type,
4616 mode, name, node->handle, buf, offset, len,
4617 space->purpose != FIL_TYPE_TEMPORARY
4618 && srv_read_only_mode,
4619 node, message);
4620
4621 /* We an try to recover the page from the double write buffer if
4622 the decompression fails or the page is corrupt. */
4623
4624 ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS);
4625
4626 if (sync) {
4627 /* The i/o operation is already completed when we return from
4628 os_aio: */
4629
4630 mutex_enter(&fil_system.mutex);
4631
4632 fil_node_complete_io(node, req_type);
4633
4634 mutex_exit(&fil_system.mutex);
4635
4636 ut_ad(fil_validate_skip());
4637 }
4638
4639 return(err);
4640}
4641
4642/**********************************************************************//**
4643Waits for an aio operation to complete. This function is used to write the
4644handler for completed requests. The aio array of pending requests is divided
4645into segments (see os0file.cc for more info). The thread specifies which
4646segment it wants to wait for. */
4647void
4648fil_aio_wait(
4649/*=========*/
4650 ulint segment) /*!< in: the number of the segment in the aio
4651 array to wait for */
4652{
4653 fil_node_t* node;
4654 IORequest type;
4655 void* message;
4656
4657 ut_ad(fil_validate_skip());
4658
4659 dberr_t err = os_aio_handler(segment, &node, &message, &type);
4660
4661 ut_a(err == DB_SUCCESS);
4662
4663 if (node == NULL) {
4664 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
4665 return;
4666 }
4667
4668 srv_set_io_thread_op_info(segment, "complete io for fil node");
4669
4670 mutex_enter(&fil_system.mutex);
4671
4672 fil_node_complete_io(node, type);
4673 const fil_type_t purpose = node->space->purpose;
4674 const ulint space_id= node->space->id;
4675 const bool dblwr = node->space->use_doublewrite();
4676
4677 mutex_exit(&fil_system.mutex);
4678
4679 ut_ad(fil_validate_skip());
4680
4681 /* Do the i/o handling */
4682 /* IMPORTANT: since i/o handling for reads will read also the insert
4683 buffer in tablespace 0, you have to be very careful not to introduce
4684 deadlocks in the i/o system. We keep tablespace 0 data files always
4685 open, and use a special i/o thread to serve insert buffer requests. */
4686
4687 switch (purpose) {
4688 case FIL_TYPE_LOG:
4689 srv_set_io_thread_op_info(segment, "complete io for log");
4690 /* We use synchronous writing of the logs
4691 and can only end up here when writing a log checkpoint! */
4692 ut_a(ptrdiff_t(message) == 1);
4693 /* It was a checkpoint write */
4694 switch (srv_flush_t(srv_file_flush_method)) {
4695 case SRV_O_DSYNC:
4696 case SRV_NOSYNC:
4697 break;
4698 case SRV_FSYNC:
4699 case SRV_LITTLESYNC:
4700 case SRV_O_DIRECT:
4701 case SRV_O_DIRECT_NO_FSYNC:
4702#ifdef _WIN32
4703 case SRV_ALL_O_DIRECT_FSYNC:
4704#endif
4705 fil_flush(SRV_LOG_SPACE_FIRST_ID);
4706 }
4707
4708 DBUG_PRINT("ib_log", ("checkpoint info written"));
4709 log_sys.complete_checkpoint();
4710 return;
4711 case FIL_TYPE_TABLESPACE:
4712 case FIL_TYPE_TEMPORARY:
4713 case FIL_TYPE_IMPORT:
4714 srv_set_io_thread_op_info(segment, "complete io for buf page");
4715
4716 /* async single page writes from the dblwr buffer don't have
4717 access to the page */
4718 buf_page_t* bpage = static_cast<buf_page_t*>(message);
4719 if (!bpage) {
4720 return;
4721 }
4722
4723 ulint offset = bpage->id.page_no();
4724 dberr_t err = buf_page_io_complete(bpage, dblwr);
4725 if (err == DB_SUCCESS) {
4726 return;
4727 }
4728
4729 ut_ad(type.is_read());
4730 if (recv_recovery_is_on() && !srv_force_recovery) {
4731 recv_sys->found_corrupt_fs = true;
4732 }
4733
4734 if (fil_space_t* space = fil_space_acquire_for_io(space_id)) {
4735 if (space == node->space) {
4736 ib::error() << "Failed to read file '"
4737 << node->name
4738 << "' at offset " << offset
4739 << ": " << ut_strerr(err);
4740 }
4741
4742 space->release_for_io();
4743 }
4744 return;
4745 }
4746
4747 ut_ad(0);
4748}
4749
4750/**********************************************************************//**
4751Flushes to disk possible writes cached by the OS. If the space does not exist
4752or is being dropped, does not do anything. */
4753void
4754fil_flush(
4755/*======*/
4756 ulint space_id) /*!< in: file space id (this can be a group of
4757 log files or a tablespace of the database) */
4758{
4759 mutex_enter(&fil_system.mutex);
4760
4761 if (fil_space_t* space = fil_space_get_by_id(space_id)) {
4762 if (space->purpose != FIL_TYPE_TEMPORARY
4763 && !space->is_stopping()) {
4764 fil_flush_low(space);
4765 }
4766 }
4767
4768 mutex_exit(&fil_system.mutex);
4769}
4770
4771/** Flush a tablespace.
4772@param[in,out] space tablespace to flush */
4773void
4774fil_flush(fil_space_t* space)
4775{
4776 ut_ad(space->pending_io());
4777 ut_ad(space->purpose == FIL_TYPE_TABLESPACE
4778 || space->purpose == FIL_TYPE_IMPORT);
4779
4780 if (!space->is_stopping()) {
4781 mutex_enter(&fil_system.mutex);
4782 if (!space->is_stopping()) {
4783 fil_flush_low(space);
4784 }
4785 mutex_exit(&fil_system.mutex);
4786 }
4787}
4788
4789/** Flush to disk the writes in file spaces of the given type
4790possibly cached by the OS.
4791@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
4792void
4793fil_flush_file_spaces(
4794 fil_type_t purpose)
4795{
4796 fil_space_t* space;
4797 ulint* space_ids;
4798 ulint n_space_ids;
4799
4800 ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG);
4801
4802 mutex_enter(&fil_system.mutex);
4803
4804 n_space_ids = UT_LIST_GET_LEN(fil_system.unflushed_spaces);
4805 if (n_space_ids == 0) {
4806
4807 mutex_exit(&fil_system.mutex);
4808 return;
4809 }
4810
4811 /* Assemble a list of space ids to flush. Previously, we
4812 traversed fil_system.unflushed_spaces and called UT_LIST_GET_NEXT()
4813 on a space that was just removed from the list by fil_flush().
4814 Thus, the space could be dropped and the memory overwritten. */
4815 space_ids = static_cast<ulint*>(
4816 ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
4817
4818 n_space_ids = 0;
4819
4820 for (space = UT_LIST_GET_FIRST(fil_system.unflushed_spaces);
4821 space;
4822 space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4823
4824 if (space->purpose == purpose
4825 && !space->is_stopping()) {
4826
4827 space_ids[n_space_ids++] = space->id;
4828 }
4829 }
4830
4831 mutex_exit(&fil_system.mutex);
4832
4833 /* Flush the spaces. It will not hurt to call fil_flush() on
4834 a non-existing space id. */
4835 for (ulint i = 0; i < n_space_ids; i++) {
4836
4837 fil_flush(space_ids[i]);
4838 }
4839
4840 ut_free(space_ids);
4841}
4842
4843/** Functor to validate the file node list of a tablespace. */
4844struct Check {
4845 /** Total size of file nodes visited so far */
4846 ulint size;
4847 /** Total number of open files visited so far */
4848 ulint n_open;
4849
4850 /** Constructor */
4851 Check() : size(0), n_open(0) {}
4852
4853 /** Visit a file node
4854 @param[in] elem file node to visit */
4855 void operator()(const fil_node_t* elem)
4856 {
4857 ut_a(elem->is_open() || !elem->n_pending);
4858 n_open += elem->is_open();
4859 size += elem->size;
4860 }
4861
4862 /** Validate a tablespace.
4863 @param[in] space tablespace to validate
4864 @return number of open file nodes */
4865 static ulint validate(const fil_space_t* space)
4866 {
4867 ut_ad(mutex_own(&fil_system.mutex));
4868 Check check;
4869 ut_list_validate(space->chain, check);
4870 ut_a(space->size == check.size);
4871 ut_ad(space->id != TRX_SYS_SPACE
4872 || space == fil_system.sys_space);
4873 ut_ad(space->id != SRV_TMP_SPACE_ID
4874 || space == fil_system.temp_space);
4875 return(check.n_open);
4876 }
4877};
4878
4879/******************************************************************//**
4880Checks the consistency of the tablespace cache.
4881@return true if ok */
4882bool
4883fil_validate(void)
4884/*==============*/
4885{
4886 fil_space_t* space;
4887 fil_node_t* fil_node;
4888 ulint n_open = 0;
4889
4890 mutex_enter(&fil_system.mutex);
4891
4892 /* Look for spaces in the hash table */
4893
4894 for (ulint i = 0; i < hash_get_n_cells(fil_system.spaces); i++) {
4895
4896 for (space = static_cast<fil_space_t*>(
4897 HASH_GET_FIRST(fil_system.spaces, i));
4898 space != 0;
4899 space = static_cast<fil_space_t*>(
4900 HASH_GET_NEXT(hash, space))) {
4901
4902 n_open += Check::validate(space);
4903 }
4904 }
4905
4906 ut_a(fil_system.n_open == n_open);
4907
4908 UT_LIST_CHECK(fil_system.LRU);
4909
4910 for (fil_node = UT_LIST_GET_FIRST(fil_system.LRU);
4911 fil_node != 0;
4912 fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
4913
4914 ut_a(fil_node->n_pending == 0);
4915 ut_a(!fil_node->being_extended);
4916 ut_a(fil_node->is_open());
4917 ut_a(fil_space_belongs_in_lru(fil_node->space));
4918 }
4919
4920 mutex_exit(&fil_system.mutex);
4921
4922 return(true);
4923}
4924
4925/********************************************************************//**
4926Returns true if file address is undefined.
4927@return true if undefined */
4928bool
4929fil_addr_is_null(
4930/*=============*/
4931 fil_addr_t addr) /*!< in: address */
4932{
4933 return(addr.page == FIL_NULL);
4934}
4935
4936/********************************************************************//**
4937Get the predecessor of a file page.
4938@return FIL_PAGE_PREV */
4939ulint
4940fil_page_get_prev(
4941/*==============*/
4942 const byte* page) /*!< in: file page */
4943{
4944 return(mach_read_from_4(page + FIL_PAGE_PREV));
4945}
4946
4947/********************************************************************//**
4948Get the successor of a file page.
4949@return FIL_PAGE_NEXT */
4950ulint
4951fil_page_get_next(
4952/*==============*/
4953 const byte* page) /*!< in: file page */
4954{
4955 return(mach_read_from_4(page + FIL_PAGE_NEXT));
4956}
4957
4958/*********************************************************************//**
4959Sets the file page type. */
4960void
4961fil_page_set_type(
4962/*==============*/
4963 byte* page, /*!< in/out: file page */
4964 ulint type) /*!< in: type */
4965{
4966 ut_ad(page);
4967
4968 mach_write_to_2(page + FIL_PAGE_TYPE, type);
4969}
4970
4971/** Reset the page type.
4972Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
4973In MySQL 3.23.53, only undo log pages and index pages were tagged.
4974Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
4975@param[in] page_id page number
4976@param[in,out] page page with invalid FIL_PAGE_TYPE
4977@param[in] type expected page type
4978@param[in,out] mtr mini-transaction */
4979void
4980fil_page_reset_type(
4981 const page_id_t& page_id,
4982 byte* page,
4983 ulint type,
4984 mtr_t* mtr)
4985{
4986 ib::info()
4987 << "Resetting invalid page " << page_id << " type "
4988 << fil_page_get_type(page) << " to " << type << ".";
4989 mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
4990}
4991
4992/********************************************************************//**
4993Delete the tablespace file and any related files like .cfg.
4994This should not be called for temporary tables.
4995@param[in] ibd_filepath File path of the IBD tablespace */
4996void
4997fil_delete_file(
4998/*============*/
4999 const char* ibd_filepath)
5000{
5001 /* Force a delete of any stale .ibd files that are lying around. */
5002
5003 ib::info() << "Deleting " << ibd_filepath;
5004 os_file_delete_if_exists(innodb_data_file_key, ibd_filepath, NULL);
5005
5006 char* cfg_filepath = fil_make_filepath(
5007 ibd_filepath, NULL, CFG, false);
5008 if (cfg_filepath != NULL) {
5009 os_file_delete_if_exists(
5010 innodb_data_file_key, cfg_filepath, NULL);
5011 ut_free(cfg_filepath);
5012 }
5013}
5014
5015/** Generate redo log for swapping two .ibd files
5016@param[in] old_table old table
5017@param[in] new_table new table
5018@param[in] tmp_name temporary table name
5019@param[in,out] mtr mini-transaction
5020@return innodb error code */
5021dberr_t
5022fil_mtr_rename_log(
5023 const dict_table_t* old_table,
5024 const dict_table_t* new_table,
5025 const char* tmp_name,
5026 mtr_t* mtr)
5027{
5028 ut_ad(old_table->space != fil_system.temp_space);
5029 ut_ad(new_table->space != fil_system.temp_space);
5030 ut_ad(old_table->space_id == old_table->space->id);
5031 ut_ad(new_table->space_id == new_table->space->id);
5032
5033 /* If neither table is file-per-table,
5034 there will be no renaming of files. */
5035 if (!old_table->space_id && !new_table->space_id) {
5036 return(DB_SUCCESS);
5037 }
5038
5039 const bool has_data_dir = DICT_TF_HAS_DATA_DIR(old_table->flags);
5040
5041 if (old_table->space_id) {
5042 char* tmp_path = fil_make_filepath(
5043 has_data_dir ? old_table->data_dir_path : NULL,
5044 tmp_name, IBD, has_data_dir);
5045 if (tmp_path == NULL) {
5046 return(DB_OUT_OF_MEMORY);
5047 }
5048
5049 const char* old_path = old_table->space->chain.start->name;
5050 /* Temp filepath must not exist. */
5051 dberr_t err = fil_rename_tablespace_check(
5052 old_path, tmp_path, !old_table->space);
5053 if (err != DB_SUCCESS) {
5054 ut_free(tmp_path);
5055 return(err);
5056 }
5057
5058 fil_name_write_rename_low(
5059 old_table->space_id, 0, old_path, tmp_path, mtr);
5060
5061 ut_free(tmp_path);
5062 }
5063
5064 if (new_table->space_id) {
5065 const char* new_path = new_table->space->chain.start->name;
5066 char* old_path = fil_make_filepath(
5067 has_data_dir ? old_table->data_dir_path : NULL,
5068 old_table->name.m_name, IBD, has_data_dir);
5069
5070 /* Destination filepath must not exist unless this ALTER
5071 TABLE starts and ends with a file_per-table tablespace. */
5072 if (!old_table->space_id) {
5073 dberr_t err = fil_rename_tablespace_check(
5074 new_path, old_path, !new_table->space);
5075 if (err != DB_SUCCESS) {
5076 ut_free(old_path);
5077 return(err);
5078 }
5079 }
5080
5081 fil_name_write_rename_low(
5082 new_table->space_id, 0, new_path, old_path, mtr);
5083 ut_free(old_path);
5084 }
5085
5086 return DB_SUCCESS;
5087}
5088
5089#ifdef UNIV_DEBUG
5090/** Check that a tablespace is valid for mtr_commit().
5091@param[in] space persistent tablespace that has been changed */
5092static
5093void
5094fil_space_validate_for_mtr_commit(
5095 const fil_space_t* space)
5096{
5097 ut_ad(!mutex_own(&fil_system.mutex));
5098 ut_ad(space != NULL);
5099 ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
5100 ut_ad(!is_predefined_tablespace(space->id));
5101
5102 /* We are serving mtr_commit(). While there is an active
5103 mini-transaction, we should have !space->stop_new_ops. This is
5104 guaranteed by meta-data locks or transactional locks, or
5105 dict_operation_lock (X-lock in DROP, S-lock in purge).
5106
5107 However, a file I/O thread can invoke change buffer merge
5108 while fil_check_pending_operations() is waiting for operations
5109 to quiesce. This is not a problem, because
5110 ibuf_merge_or_delete_for_page() would call
5111 fil_space_acquire() before mtr_start() and
5112 fil_space_t::release() after mtr_commit(). This is why
5113 n_pending_ops should not be zero if stop_new_ops is set. */
5114 ut_ad(!space->stop_new_ops
5115 || space->is_being_truncated /* TRUNCATE sets stop_new_ops */
5116 || space->referenced());
5117}
5118#endif /* UNIV_DEBUG */
5119
5120/** Write a MLOG_FILE_NAME record for a persistent tablespace.
5121@param[in] space tablespace
5122@param[in,out] mtr mini-transaction */
5123static
5124void
5125fil_names_write(
5126 const fil_space_t* space,
5127 mtr_t* mtr)
5128{
5129 ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
5130 fil_name_write(space, 0, UT_LIST_GET_FIRST(space->chain), mtr);
5131}
5132
5133/** Note that a non-predefined persistent tablespace has been modified
5134by redo log.
5135@param[in,out] space tablespace */
5136void
5137fil_names_dirty(
5138 fil_space_t* space)
5139{
5140 ut_ad(log_mutex_own());
5141 ut_ad(recv_recovery_is_on());
5142 ut_ad(log_sys.lsn != 0);
5143 ut_ad(space->max_lsn == 0);
5144 ut_d(fil_space_validate_for_mtr_commit(space));
5145
5146 UT_LIST_ADD_LAST(fil_system.named_spaces, space);
5147 space->max_lsn = log_sys.lsn;
5148}
5149
5150/** Write MLOG_FILE_NAME records when a non-predefined persistent
5151tablespace was modified for the first time since the latest
5152fil_names_clear().
5153@param[in,out] space tablespace
5154@param[in,out] mtr mini-transaction */
5155void
5156fil_names_dirty_and_write(
5157 fil_space_t* space,
5158 mtr_t* mtr)
5159{
5160 ut_ad(log_mutex_own());
5161 ut_d(fil_space_validate_for_mtr_commit(space));
5162 ut_ad(space->max_lsn == log_sys.lsn);
5163
5164 UT_LIST_ADD_LAST(fil_system.named_spaces, space);
5165 fil_names_write(space, mtr);
5166
5167 DBUG_EXECUTE_IF("fil_names_write_bogus",
5168 {
5169 char bogus_name[] = "./test/bogus file.ibd";
5170 os_normalize_path(bogus_name);
5171 fil_name_write(
5172 SRV_LOG_SPACE_FIRST_ID, 0,
5173 bogus_name, mtr);
5174 });
5175}
5176
5177/** On a log checkpoint, reset fil_names_dirty_and_write() flags
5178and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
5179@param[in] lsn checkpoint LSN
5180@param[in] do_write whether to always write MLOG_CHECKPOINT
5181@return whether anything was written to the redo log
5182@retval false if no flags were set and nothing written
5183@retval true if anything was written to the redo log */
5184bool
5185fil_names_clear(
5186 lsn_t lsn,
5187 bool do_write)
5188{
5189 mtr_t mtr;
5190 ulint mtr_checkpoint_size = LOG_CHECKPOINT_FREE_PER_THREAD;
5191
5192 DBUG_EXECUTE_IF(
5193 "increase_mtr_checkpoint_size",
5194 mtr_checkpoint_size = 75 * 1024;
5195 );
5196
5197 ut_ad(log_mutex_own());
5198
5199 if (log_sys.append_on_checkpoint) {
5200 mtr_write_log(log_sys.append_on_checkpoint);
5201 do_write = true;
5202 }
5203
5204 mtr.start();
5205
5206 for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.named_spaces);
5207 space != NULL; ) {
5208 fil_space_t* next = UT_LIST_GET_NEXT(named_spaces, space);
5209
5210 ut_ad(space->max_lsn > 0);
5211 if (space->max_lsn < lsn) {
5212 /* The tablespace was last dirtied before the
5213 checkpoint LSN. Remove it from the list, so
5214 that if the tablespace is not going to be
5215 modified any more, subsequent checkpoints will
5216 avoid calling fil_names_write() on it. */
5217 space->max_lsn = 0;
5218 UT_LIST_REMOVE(fil_system.named_spaces, space);
5219 }
5220
5221 /* max_lsn is the last LSN where fil_names_dirty_and_write()
5222 was called. If we kept track of "min_lsn" (the first LSN
5223 where max_lsn turned nonzero), we could avoid the
5224 fil_names_write() call if min_lsn > lsn. */
5225
5226 fil_names_write(space, &mtr);
5227 do_write = true;
5228
5229 const mtr_buf_t* mtr_log = mtr_get_log(&mtr);
5230
5231 /** If the mtr buffer size exceeds the size of
5232 LOG_CHECKPOINT_FREE_PER_THREAD then commit the multi record
5233 mini-transaction, start the new mini-transaction to
5234 avoid the parsing buffer overflow error during recovery. */
5235
5236 if (mtr_log->size() > mtr_checkpoint_size) {
5237 ut_ad(mtr_log->size() < (RECV_PARSING_BUF_SIZE / 2));
5238 mtr.commit_checkpoint(lsn, false);
5239 mtr.start();
5240 }
5241
5242 space = next;
5243 }
5244
5245 if (do_write) {
5246 mtr.commit_checkpoint(lsn, true);
5247 } else {
5248 ut_ad(!mtr.has_modifications());
5249 }
5250
5251 return(do_write);
5252}
5253
5254/** Truncate a single-table tablespace. The tablespace must be cached
5255in the memory cache.
5256@param space_id space id
5257@param dir_path directory path
5258@param tablename the table name in the usual
5259 databasename/tablename format of InnoDB
5260@param flags tablespace flags
5261@param trunc_to_default truncate to default size if tablespace
5262 is being newly re-initialized.
5263@return DB_SUCCESS or error */
5264dberr_t
5265truncate_t::truncate(
5266/*=================*/
5267 ulint space_id,
5268 const char* dir_path,
5269 const char* tablename,
5270 ulint flags,
5271 bool trunc_to_default)
5272{
5273 dberr_t err = DB_SUCCESS;
5274 char* path;
5275
5276 ut_a(!is_system_tablespace(space_id));
5277
5278 if (FSP_FLAGS_HAS_DATA_DIR(flags)) {
5279 ut_ad(dir_path != NULL);
5280 path = fil_make_filepath(dir_path, tablename, IBD, true);
5281 } else {
5282 path = fil_make_filepath(NULL, tablename, IBD, false);
5283 }
5284
5285 if (path == NULL) {
5286 return(DB_OUT_OF_MEMORY);
5287 }
5288
5289 mutex_enter(&fil_system.mutex);
5290
5291 fil_space_t* space = fil_space_get_by_id(space_id);
5292
5293 /* The following code must change when InnoDB supports
5294 multiple datafiles per tablespace. */
5295 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
5296
5297 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
5298
5299 if (trunc_to_default) {
5300 space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE;
5301 }
5302
5303 const bool already_open = node->is_open();
5304
5305 if (!already_open) {
5306
5307 bool ret;
5308
5309 node->handle = os_file_create_simple_no_error_handling(
5310 innodb_data_file_key, path, OS_FILE_OPEN,
5311 OS_FILE_READ_WRITE,
5312 space->purpose != FIL_TYPE_TEMPORARY
5313 && srv_read_only_mode, &ret);
5314
5315 if (!ret) {
5316 ib::error() << "Failed to open tablespace file "
5317 << path << ".";
5318
5319 ut_free(path);
5320
5321 return(DB_ERROR);
5322 }
5323
5324 ut_a(node->is_open());
5325 }
5326
5327 os_offset_t trunc_size = trunc_to_default
5328 ? FIL_IBD_FILE_INITIAL_SIZE
5329 : space->size;
5330
5331 const bool success = os_file_truncate(
5332 path, node->handle, trunc_size << srv_page_size_shift);
5333
5334 if (!success) {
5335 ib::error() << "Cannot truncate file " << path
5336 << " in TRUNCATE TABLESPACE.";
5337 err = DB_ERROR;
5338 }
5339
5340 space->stop_new_ops = false;
5341 space->is_being_truncated = false;
5342
5343 /* If we opened the file in this function, close it. */
5344 if (!already_open) {
5345 bool closed = os_file_close(node->handle);
5346
5347 if (!closed) {
5348
5349 ib::error() << "Failed to close tablespace file "
5350 << path << ".";
5351
5352 err = DB_ERROR;
5353 } else {
5354 node->handle = OS_FILE_CLOSED;
5355 }
5356 }
5357
5358 mutex_exit(&fil_system.mutex);
5359
5360 ut_free(path);
5361
5362 return(err);
5363}
5364
5365/* Unit Tests */
5366#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
5367#define MF fil_make_filepath
5368#define DISPLAY ib::info() << path
5369void
5370test_make_filepath()
5371{
5372 char* path;
5373 const char* long_path =
5374 "this/is/a/very/long/path/including/a/very/"
5375 "looooooooooooooooooooooooooooooooooooooooooooooooo"
5376 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5377 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5378 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5379 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5380 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5381 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5382 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5383 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
5384 "oooooooooooooooooooooooooooooooooooooooooooooooong"
5385 "/folder/name";
5386 path = MF("/this/is/a/path/with/a/filename", NULL, IBD, false); DISPLAY;
5387 path = MF("/this/is/a/path/with/a/filename", NULL, ISL, false); DISPLAY;
5388 path = MF("/this/is/a/path/with/a/filename", NULL, CFG, false); DISPLAY;
5389 path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
5390 path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
5391 path = MF("/this/is/a/path/with/a/filename.dat", NULL, IBD, false); DISPLAY;
5392 path = MF(NULL, "tablespacename", NO_EXT, false); DISPLAY;
5393 path = MF(NULL, "tablespacename", IBD, false); DISPLAY;
5394 path = MF(NULL, "dbname/tablespacename", NO_EXT, false); DISPLAY;
5395 path = MF(NULL, "dbname/tablespacename", IBD, false); DISPLAY;
5396 path = MF(NULL, "dbname/tablespacename", ISL, false); DISPLAY;
5397 path = MF(NULL, "dbname/tablespacename", CFG, false); DISPLAY;
5398 path = MF(NULL, "dbname\\tablespacename", NO_EXT, false); DISPLAY;
5399 path = MF(NULL, "dbname\\tablespacename", IBD, false); DISPLAY;
5400 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false); DISPLAY;
5401 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true); DISPLAY;
5402 path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true); DISPLAY;
5403 path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true); DISPLAY;
5404 path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true); DISPLAY;
5405 path = MF(long_path, NULL, IBD, false); DISPLAY;
5406 path = MF(long_path, "tablespacename", IBD, false); DISPLAY;
5407 path = MF(long_path, "tablespacename", IBD, true); DISPLAY;
5408}
5409#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
5410/* @} */
5411
5412/** Return the next fil_space_t.
5413Once started, the caller must keep calling this until it returns NULL.
5414fil_space_t::acquire() and fil_space_t::release() are invoked here which
5415blocks a concurrent operation from dropping the tablespace.
5416@param[in] prev_space Pointer to the previous fil_space_t.
5417If NULL, use the first fil_space_t on fil_system.space_list.
5418@return pointer to the next fil_space_t.
5419@retval NULL if this was the last*/
5420fil_space_t*
5421fil_space_next(fil_space_t* prev_space)
5422{
5423 fil_space_t* space=prev_space;
5424
5425 mutex_enter(&fil_system.mutex);
5426
5427 if (!space) {
5428 space = UT_LIST_GET_FIRST(fil_system.space_list);
5429 } else {
5430 ut_a(space->referenced());
5431
5432 /* Move on to the next fil_space_t */
5433 space->release();
5434 space = UT_LIST_GET_NEXT(space_list, space);
5435 }
5436
5437 /* Skip spaces that are being created by
5438 fil_ibd_create(), or dropped, or !tablespace. */
5439 while (space != NULL
5440 && (UT_LIST_GET_LEN(space->chain) == 0
5441 || space->is_stopping()
5442 || space->purpose != FIL_TYPE_TABLESPACE)) {
5443 space = UT_LIST_GET_NEXT(space_list, space);
5444 }
5445
5446 if (space != NULL) {
5447 space->acquire();
5448 }
5449
5450 mutex_exit(&fil_system.mutex);
5451
5452 return(space);
5453}
5454
5455/**
5456Remove space from key rotation list if there are no more
5457pending operations.
5458@param[in,out] space Tablespace */
5459static
5460void
5461fil_space_remove_from_keyrotation(fil_space_t* space)
5462{
5463 ut_ad(mutex_own(&fil_system.mutex));
5464 ut_ad(space);
5465
5466 if (space->is_in_rotation_list && !space->referenced()) {
5467 space->is_in_rotation_list = false;
5468 ut_a(UT_LIST_GET_LEN(fil_system.rotation_list) > 0);
5469 UT_LIST_REMOVE(fil_system.rotation_list, space);
5470 }
5471}
5472
5473
5474/** Return the next fil_space_t from key rotation list.
5475Once started, the caller must keep calling this until it returns NULL.
5476fil_space_t::acquire() and fil_space_t::release() are invoked here which
5477blocks a concurrent operation from dropping the tablespace.
5478@param[in] prev_space Pointer to the previous fil_space_t.
5479If NULL, use the first fil_space_t on fil_system.space_list.
5480@return pointer to the next fil_space_t.
5481@retval NULL if this was the last*/
5482fil_space_t*
5483fil_space_keyrotate_next(
5484 fil_space_t* prev_space)
5485{
5486 fil_space_t* space = prev_space;
5487 fil_space_t* old = NULL;
5488
5489 mutex_enter(&fil_system.mutex);
5490
5491 if (UT_LIST_GET_LEN(fil_system.rotation_list) == 0) {
5492 if (space) {
5493 space->release();
5494 fil_space_remove_from_keyrotation(space);
5495 }
5496 mutex_exit(&fil_system.mutex);
5497 return(NULL);
5498 }
5499
5500 if (prev_space == NULL) {
5501 space = UT_LIST_GET_FIRST(fil_system.rotation_list);
5502
5503 /* We can trust that space is not NULL because we
5504 checked list length above */
5505 } else {
5506 /* Move on to the next fil_space_t */
5507 space->release();
5508
5509 old = space;
5510 space = UT_LIST_GET_NEXT(rotation_list, space);
5511
5512 fil_space_remove_from_keyrotation(old);
5513 }
5514
5515 /* Skip spaces that are being created by fil_ibd_create(),
5516 or dropped or truncated. Note that rotation_list contains only
5517 space->purpose == FIL_TYPE_TABLESPACE. */
5518 while (space != NULL
5519 && (UT_LIST_GET_LEN(space->chain) == 0
5520 || space->is_stopping())) {
5521
5522 old = space;
5523 space = UT_LIST_GET_NEXT(rotation_list, space);
5524 fil_space_remove_from_keyrotation(old);
5525 }
5526
5527 if (space != NULL) {
5528 space->acquire();
5529 }
5530
5531 mutex_exit(&fil_system.mutex);
5532
5533 return(space);
5534}
5535
5536/** Determine the block size of the data file.
5537@param[in] space tablespace
5538@param[in] offset page number
5539@return block size */
5540UNIV_INTERN
5541ulint
5542fil_space_get_block_size(const fil_space_t* space, unsigned offset)
5543{
5544 ulint block_size = 512;
5545
5546 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
5547 node != NULL;
5548 node = UT_LIST_GET_NEXT(chain, node)) {
5549 block_size = node->block_size;
5550 if (node->size > offset) {
5551 ut_ad(node->size <= 0xFFFFFFFFU);
5552 break;
5553 }
5554 offset -= static_cast<unsigned>(node->size);
5555 }
5556
5557 /* Currently supporting block size up to 4K,
5558 fall back to default if bigger requested. */
5559 if (block_size > 4096) {
5560 block_size = 512;
5561 }
5562
5563 return block_size;
5564}
5565
5566/*******************************************************************//**
5567Returns the table space by a given id, NULL if not found. */
5568fil_space_t*
5569fil_space_found_by_id(
5570/*==================*/
5571 ulint id) /*!< in: space id */
5572{
5573 fil_space_t* space = NULL;
5574 mutex_enter(&fil_system.mutex);
5575 space = fil_space_get_by_id(id);
5576
5577 /* Not found if space is being deleted */
5578 if (space && space->stop_new_ops) {
5579 space = NULL;
5580 }
5581
5582 mutex_exit(&fil_system.mutex);
5583 return space;
5584}
5585
5586/**
5587Get should we punch hole to tablespace.
5588@param[in] node File node
5589@return true, if punch hole should be tried, false if not. */
5590bool
5591fil_node_should_punch_hole(
5592 const fil_node_t* node)
5593{
5594 return (node->space->punch_hole);
5595}
5596
5597/**
5598Set punch hole to tablespace to given value.
5599@param[in] node File node
5600@param[in] val value to be set. */
5601void
5602fil_space_set_punch_hole(
5603 fil_node_t* node,
5604 bool val)
5605{
5606 node->space->punch_hole = val;
5607}
5608