1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. |
4 | Copyright (c) 2008, Google Inc. |
5 | Copyright (c) 2009, Percona Inc. |
6 | Copyright (c) 2013, 2018, MariaDB Corporation. |
7 | |
8 | Portions of this file contain modifications contributed and copyrighted by |
9 | Google, Inc. Those modifications are gratefully acknowledged and are described |
10 | briefly in the InnoDB documentation. The contributions by Google are |
11 | incorporated with their permission, and subject to the conditions contained in |
12 | the file COPYING.Google. |
13 | |
14 | Portions of this file contain modifications contributed and copyrighted |
15 | by Percona Inc.. Those modifications are |
16 | gratefully acknowledged and are described briefly in the InnoDB |
17 | documentation. The contributions by Percona Inc. are incorporated with |
18 | their permission, and subject to the conditions contained in the file |
19 | COPYING.Percona. |
20 | |
21 | This program is free software; you can redistribute it and/or modify it under |
22 | the terms of the GNU General Public License as published by the Free Software |
23 | Foundation; version 2 of the License. |
24 | |
25 | This program is distributed in the hope that it will be useful, but WITHOUT |
26 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
27 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
28 | |
29 | You should have received a copy of the GNU General Public License along with |
30 | this program; if not, write to the Free Software Foundation, Inc., |
31 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
32 | |
33 | *****************************************************************************/ |
34 | |
35 | /********************************************************************//** |
36 | @file srv/srv0start.cc |
37 | Starts the InnoDB database server |
38 | |
39 | Created 2/16/1996 Heikki Tuuri |
40 | *************************************************************************/ |
41 | |
42 | #include "my_global.h" |
43 | |
44 | #include "ha_prototypes.h" |
45 | |
46 | #include "mysqld.h" |
47 | #include "mysql/psi/mysql_stage.h" |
48 | #include "mysql/psi/psi.h" |
49 | |
50 | #include "row0ftsort.h" |
51 | #include "ut0mem.h" |
52 | #include "ut0timer.h" |
53 | #include "mem0mem.h" |
54 | #include "data0data.h" |
55 | #include "data0type.h" |
56 | #include "dict0dict.h" |
57 | #include "buf0buf.h" |
58 | #include "buf0dump.h" |
59 | #include "os0file.h" |
60 | #include "os0thread.h" |
61 | #include "fil0fil.h" |
62 | #include "fil0crypt.h" |
63 | #include "fsp0fsp.h" |
64 | #include "rem0rec.h" |
65 | #include "mtr0mtr.h" |
66 | #include "log0crypt.h" |
67 | #include "log0recv.h" |
68 | #include "page0page.h" |
69 | #include "page0cur.h" |
70 | #include "trx0trx.h" |
71 | #include "trx0sys.h" |
72 | #include "btr0btr.h" |
73 | #include "btr0cur.h" |
74 | #include "rem0rec.h" |
75 | #include "ibuf0ibuf.h" |
76 | #include "srv0start.h" |
77 | #include "srv0srv.h" |
78 | #include "btr0defragment.h" |
79 | #include "fsp0sysspace.h" |
80 | #include "row0trunc.h" |
81 | #include "mysql/service_wsrep.h" /* wsrep_recovery */ |
82 | #include "trx0rseg.h" |
83 | #include "os0proc.h" |
84 | #include "buf0flu.h" |
85 | #include "buf0rea.h" |
86 | #include "dict0boot.h" |
87 | #include "dict0load.h" |
88 | #include "dict0stats_bg.h" |
89 | #include "que0que.h" |
90 | #include "lock0lock.h" |
91 | #include "trx0roll.h" |
92 | #include "trx0purge.h" |
93 | #include "lock0lock.h" |
94 | #include "pars0pars.h" |
95 | #include "btr0sea.h" |
96 | #include "rem0cmp.h" |
97 | #include "dict0crea.h" |
98 | #include "row0ins.h" |
99 | #include "row0sel.h" |
100 | #include "row0upd.h" |
101 | #include "row0row.h" |
102 | #include "row0mysql.h" |
103 | #include "row0trunc.h" |
104 | #include "btr0pcur.h" |
105 | #include "os0event.h" |
106 | #include "zlib.h" |
107 | #include "ut0crc32.h" |
108 | #include "btr0scrub.h" |
109 | #include "ut0new.h" |
110 | |
111 | /** Log sequence number immediately after startup */ |
112 | lsn_t srv_start_lsn; |
113 | /** Log sequence number at shutdown */ |
114 | lsn_t srv_shutdown_lsn; |
115 | |
116 | /** TRUE if a raw partition is in use */ |
117 | ibool srv_start_raw_disk_in_use; |
118 | |
119 | /** Number of IO threads to use */ |
120 | ulint srv_n_file_io_threads; |
121 | |
122 | /** UNDO tablespaces starts with space id. */ |
123 | ulint srv_undo_space_id_start; |
124 | |
125 | /** TRUE if the server is being started, before rolling back any |
126 | incomplete transactions */ |
127 | bool srv_startup_is_before_trx_rollback_phase; |
128 | /** TRUE if the server is being started */ |
129 | bool srv_is_being_started; |
130 | /** TRUE if SYS_TABLESPACES is available for lookups */ |
131 | bool srv_sys_tablespaces_open; |
132 | /** TRUE if the server was successfully started */ |
133 | bool srv_was_started; |
134 | /** The original value of srv_log_file_size (innodb_log_file_size) */ |
135 | static ulonglong srv_log_file_size_requested; |
136 | /** whether srv_start() has been called */ |
137 | static bool srv_start_has_been_called; |
138 | |
139 | /** Whether any undo log records can be generated */ |
140 | UNIV_INTERN bool srv_undo_sources; |
141 | |
142 | #ifdef UNIV_DEBUG |
143 | /** InnoDB system tablespace to set during recovery */ |
144 | UNIV_INTERN uint srv_sys_space_size_debug; |
145 | /** whether redo log files have been created at startup */ |
146 | UNIV_INTERN bool srv_log_files_created; |
147 | #endif /* UNIV_DEBUG */ |
148 | |
149 | /** Bit flags for tracking background thread creation. They are used to |
150 | determine which threads need to be stopped if we need to abort during |
151 | the initialisation step. */ |
152 | enum srv_start_state_t { |
153 | /** No thread started */ |
154 | SRV_START_STATE_NONE = 0, /*!< No thread started */ |
155 | /** lock_wait_timeout_thread started */ |
156 | SRV_START_STATE_LOCK_SYS = 1, /*!< Started lock-timeout |
157 | thread. */ |
158 | /** buf_flush_page_cleaner_coordinator, |
159 | buf_flush_page_cleaner_worker started */ |
160 | SRV_START_STATE_IO = 2, |
161 | /** srv_error_monitor_thread, srv_monitor_thread started */ |
162 | SRV_START_STATE_MONITOR = 4, |
163 | /** srv_master_thread started */ |
164 | SRV_START_STATE_MASTER = 8, |
165 | /** srv_purge_coordinator_thread, srv_worker_thread started */ |
166 | SRV_START_STATE_PURGE = 16, |
167 | /** fil_crypt_thread, btr_defragment_thread started |
168 | (all background threads that can generate redo log but not undo log */ |
169 | SRV_START_STATE_REDO = 32 |
170 | }; |
171 | |
172 | /** Track server thrd starting phases */ |
173 | static ulint srv_start_state; |
174 | |
175 | /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to |
176 | SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ |
177 | enum srv_shutdown_t srv_shutdown_state = SRV_SHUTDOWN_NONE; |
178 | |
179 | /** Files comprising the system tablespace */ |
180 | pfs_os_file_t files[1000]; |
181 | |
182 | /** io_handler_thread parameters for thread identification */ |
183 | static ulint n[SRV_MAX_N_IO_THREADS + 6]; |
184 | /** io_handler_thread identifiers, 32 is the maximum number of purge threads */ |
185 | /** 6 is the ? */ |
186 | #define START_OLD_THREAD_CNT (SRV_MAX_N_IO_THREADS + 6 + 32) |
187 | static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32]; |
188 | |
189 | /** Thead handles */ |
190 | static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32]; |
191 | static os_thread_t buf_dump_thread_handle; |
192 | static os_thread_t dict_stats_thread_handle; |
193 | /** Status variables, is thread started ?*/ |
194 | static bool thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false}; |
195 | /** Name of srv_monitor_file */ |
196 | static char* srv_monitor_file_name; |
197 | |
198 | /** */ |
199 | #define SRV_MAX_N_PENDING_SYNC_IOS 100 |
200 | |
201 | #ifdef UNIV_PFS_THREAD |
202 | /* Keys to register InnoDB threads with performance schema */ |
203 | mysql_pfs_key_t buf_dump_thread_key; |
204 | mysql_pfs_key_t dict_stats_thread_key; |
205 | mysql_pfs_key_t io_handler_thread_key; |
206 | mysql_pfs_key_t io_ibuf_thread_key; |
207 | mysql_pfs_key_t io_log_thread_key; |
208 | mysql_pfs_key_t io_read_thread_key; |
209 | mysql_pfs_key_t io_write_thread_key; |
210 | mysql_pfs_key_t srv_error_monitor_thread_key; |
211 | mysql_pfs_key_t srv_lock_timeout_thread_key; |
212 | mysql_pfs_key_t srv_master_thread_key; |
213 | mysql_pfs_key_t srv_monitor_thread_key; |
214 | mysql_pfs_key_t srv_purge_thread_key; |
215 | mysql_pfs_key_t srv_worker_thread_key; |
216 | #endif /* UNIV_PFS_THREAD */ |
217 | |
218 | #ifdef HAVE_PSI_STAGE_INTERFACE |
219 | /** Array of all InnoDB stage events for monitoring activities via |
220 | performance schema. */ |
221 | static PSI_stage_info* srv_stages[] = |
222 | { |
223 | &srv_stage_alter_table_end, |
224 | &srv_stage_alter_table_flush, |
225 | &srv_stage_alter_table_insert, |
226 | &srv_stage_alter_table_log_index, |
227 | &srv_stage_alter_table_log_table, |
228 | &srv_stage_alter_table_merge_sort, |
229 | &srv_stage_alter_table_read_pk_internal_sort, |
230 | &srv_stage_buffer_pool_load, |
231 | }; |
232 | #endif /* HAVE_PSI_STAGE_INTERFACE */ |
233 | |
234 | /*********************************************************************//** |
235 | Check if a file can be opened in read-write mode. |
236 | @return true if it doesn't exist or can be opened in rw mode. */ |
237 | static |
238 | bool |
239 | srv_file_check_mode( |
240 | /*================*/ |
241 | const char* name) /*!< in: filename to check */ |
242 | { |
243 | os_file_stat_t stat; |
244 | |
245 | memset(&stat, 0x0, sizeof(stat)); |
246 | |
247 | dberr_t err = os_file_get_status( |
248 | name, &stat, true, srv_read_only_mode); |
249 | |
250 | if (err == DB_FAIL) { |
251 | ib::error() << "os_file_get_status() failed on '" << name |
252 | << "'. Can't determine file permissions." ; |
253 | return(false); |
254 | |
255 | } else if (err == DB_SUCCESS) { |
256 | |
257 | /* Note: stat.rw_perm is only valid of files */ |
258 | |
259 | if (stat.type == OS_FILE_TYPE_FILE) { |
260 | |
261 | if (!stat.rw_perm) { |
262 | const char* mode = srv_read_only_mode |
263 | ? "read" : "read-write" ; |
264 | ib::error() << name << " can't be opened in " |
265 | << mode << " mode." ; |
266 | return(false); |
267 | } |
268 | } else { |
269 | /* Not a regular file, bail out. */ |
270 | ib::error() << "'" << name << "' not a regular file." ; |
271 | |
272 | return(false); |
273 | } |
274 | } else { |
275 | |
276 | /* This is OK. If the file create fails on RO media, there |
277 | is nothing we can do. */ |
278 | |
279 | ut_a(err == DB_NOT_FOUND); |
280 | } |
281 | |
282 | return(true); |
283 | } |
284 | |
285 | /********************************************************************//** |
286 | I/o-handler thread function. |
287 | @return OS_THREAD_DUMMY_RETURN */ |
288 | extern "C" |
289 | os_thread_ret_t |
290 | DECLARE_THREAD(io_handler_thread)( |
291 | /*==============================*/ |
292 | void* arg) /*!< in: pointer to the number of the segment in |
293 | the aio array */ |
294 | { |
295 | ulint segment; |
296 | |
297 | segment = *((ulint*) arg); |
298 | |
299 | #ifdef UNIV_DEBUG_THREAD_CREATION |
300 | ib::info() << "Io handler thread " << segment << " starts, id " |
301 | << os_thread_pf(os_thread_get_curr_id()); |
302 | #endif |
303 | |
304 | /* For read only mode, we don't need ibuf and log I/O thread. |
305 | Please see srv_start() */ |
306 | ulint start = (srv_read_only_mode) ? 0 : 2; |
307 | |
308 | if (segment < start) { |
309 | if (segment == 0) { |
310 | pfs_register_thread(io_ibuf_thread_key); |
311 | } else { |
312 | ut_ad(segment == 1); |
313 | pfs_register_thread(io_log_thread_key); |
314 | } |
315 | } else if (segment >= start |
316 | && segment < (start + srv_n_read_io_threads)) { |
317 | pfs_register_thread(io_read_thread_key); |
318 | |
319 | } else if (segment >= (start + srv_n_read_io_threads) |
320 | && segment < (start + srv_n_read_io_threads |
321 | + srv_n_write_io_threads)) { |
322 | pfs_register_thread(io_write_thread_key); |
323 | |
324 | } else { |
325 | pfs_register_thread(io_handler_thread_key); |
326 | } |
327 | |
328 | while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS |
329 | || buf_page_cleaner_is_active |
330 | || !os_aio_all_slots_free()) { |
331 | fil_aio_wait(segment); |
332 | } |
333 | |
334 | /* We count the number of threads in os_thread_exit(). A created |
335 | thread should always use that to exit and not use return() to exit. |
336 | The thread actually never comes here because it is exited in an |
337 | os_event_wait(). */ |
338 | |
339 | os_thread_exit(); |
340 | |
341 | OS_THREAD_DUMMY_RETURN; |
342 | } |
343 | |
344 | /*********************************************************************//** |
345 | Creates a log file. |
346 | @return DB_SUCCESS or error code */ |
347 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
348 | dberr_t |
349 | create_log_file( |
350 | /*============*/ |
351 | pfs_os_file_t* file, /*!< out: file handle */ |
352 | const char* name) /*!< in: log file name */ |
353 | { |
354 | bool ret; |
355 | |
356 | *file = os_file_create( |
357 | innodb_log_file_key, name, |
358 | OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, |
359 | OS_LOG_FILE, srv_read_only_mode, &ret); |
360 | |
361 | if (!ret) { |
362 | ib::error() << "Cannot create " << name; |
363 | return(DB_ERROR); |
364 | } |
365 | |
366 | ib::info() << "Setting log file " << name << " size to " |
367 | << srv_log_file_size << " bytes" ; |
368 | |
369 | ret = os_file_set_size(name, *file, srv_log_file_size); |
370 | if (!ret) { |
371 | ib::error() << "Cannot set log file " << name << " size to " |
372 | << srv_log_file_size << " bytes" ; |
373 | return(DB_ERROR); |
374 | } |
375 | |
376 | ret = os_file_close(*file); |
377 | ut_a(ret); |
378 | |
379 | return(DB_SUCCESS); |
380 | } |
381 | |
382 | /** Initial number of the first redo log file */ |
383 | #define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1) |
384 | |
385 | /** Delete all log files. |
386 | @param[in,out] logfilename buffer for log file name |
387 | @param[in] dirnamelen length of the directory path |
388 | @param[in] n_files number of files to delete |
389 | @param[in] i first file to delete */ |
390 | static |
391 | void |
392 | delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0) |
393 | { |
394 | /* Remove any old log files. */ |
395 | for (; i < n_files; i++) { |
396 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , i); |
397 | |
398 | /* Ignore errors about non-existent files or files |
399 | that cannot be removed. The create_log_file() will |
400 | return an error when the file exists. */ |
401 | #ifdef _WIN32 |
402 | DeleteFile((LPCTSTR) logfilename); |
403 | #else |
404 | unlink(logfilename); |
405 | #endif |
406 | } |
407 | } |
408 | |
409 | /*********************************************************************//** |
410 | Creates all log files. |
411 | @return DB_SUCCESS or error code */ |
412 | static |
413 | dberr_t |
414 | create_log_files( |
415 | /*=============*/ |
416 | char* logfilename, /*!< in/out: buffer for log file name */ |
417 | size_t dirnamelen, /*!< in: length of the directory path */ |
418 | lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ |
419 | char*& logfile0) /*!< out: name of the first log file */ |
420 | { |
421 | dberr_t err; |
422 | |
423 | if (srv_read_only_mode) { |
424 | ib::error() << "Cannot create log files in read-only mode" ; |
425 | return(DB_READ_ONLY); |
426 | } |
427 | |
428 | /* Crashing after deleting the first file should be |
429 | recoverable. The buffer pool was clean, and we can simply |
430 | create all log files from the scratch. */ |
431 | DBUG_EXECUTE_IF("innodb_log_abort_6" , |
432 | delete_log_files(logfilename, dirnamelen, 1); |
433 | return(DB_ERROR);); |
434 | |
435 | delete_log_files(logfilename, dirnamelen, INIT_LOG_FILE0 + 1); |
436 | |
437 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_6" )); |
438 | ut_ad(!buf_pool_check_no_pending_io()); |
439 | |
440 | DBUG_EXECUTE_IF("innodb_log_abort_7" , return(DB_ERROR);); |
441 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_7" )); |
442 | |
443 | for (unsigned i = 0; i < srv_n_log_files; i++) { |
444 | sprintf(logfilename + dirnamelen, |
445 | "ib_logfile%u" , i ? i : INIT_LOG_FILE0); |
446 | |
447 | err = create_log_file(&files[i], logfilename); |
448 | |
449 | if (err != DB_SUCCESS) { |
450 | return(err); |
451 | } |
452 | } |
453 | |
454 | DBUG_EXECUTE_IF("innodb_log_abort_8" , return(DB_ERROR);); |
455 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_8" )); |
456 | |
457 | /* We did not create the first log file initially as |
458 | ib_logfile0, so that crash recovery cannot find it until it |
459 | has been completed and renamed. */ |
460 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , INIT_LOG_FILE0); |
461 | |
462 | fil_space_t* log_space = fil_space_create( |
463 | "innodb_redo_log" , SRV_LOG_SPACE_FIRST_ID, 0, FIL_TYPE_LOG, |
464 | NULL/* innodb_encrypt_log works at a different level */); |
465 | |
466 | ut_a(fil_validate()); |
467 | ut_a(log_space != NULL); |
468 | |
469 | const ulint size = ulint(srv_log_file_size >> srv_page_size_shift); |
470 | |
471 | logfile0 = fil_node_create( |
472 | logfilename, size, log_space, false, false); |
473 | ut_a(logfile0); |
474 | |
475 | for (unsigned i = 1; i < srv_n_log_files; i++) { |
476 | |
477 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , i); |
478 | |
479 | if (!fil_node_create(logfilename, size, |
480 | log_space, false, false)) { |
481 | |
482 | ib::error() |
483 | << "Cannot create file node for log file " |
484 | << logfilename; |
485 | |
486 | return(DB_ERROR); |
487 | } |
488 | } |
489 | |
490 | log_sys.log.create(srv_n_log_files); |
491 | if (!log_set_capacity(srv_log_file_size_requested)) { |
492 | return(DB_ERROR); |
493 | } |
494 | |
495 | fil_open_log_and_system_tablespace_files(); |
496 | |
497 | /* Create a log checkpoint. */ |
498 | log_mutex_enter(); |
499 | if (log_sys.is_encrypted() && !log_crypt_init()) { |
500 | return(DB_ERROR); |
501 | } |
502 | ut_d(recv_no_log_write = false); |
503 | recv_reset_logs(lsn); |
504 | log_mutex_exit(); |
505 | |
506 | return(DB_SUCCESS); |
507 | } |
508 | |
509 | /** Rename the first redo log file. |
510 | @param[in,out] logfilename buffer for the log file name |
511 | @param[in] dirnamelen length of the directory path |
512 | @param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value |
513 | @param[in,out] logfile0 name of the first log file |
514 | @return error code |
515 | @retval DB_SUCCESS on successful operation */ |
516 | MY_ATTRIBUTE((warn_unused_result, nonnull)) |
517 | static |
518 | dberr_t |
519 | create_log_files_rename( |
520 | /*====================*/ |
521 | char* logfilename, /*!< in/out: buffer for log file name */ |
522 | size_t dirnamelen, /*!< in: length of the directory path */ |
523 | lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ |
524 | char* logfile0) /*!< in/out: name of the first log file */ |
525 | { |
526 | /* If innodb_flush_method=O_DSYNC, |
527 | we need to explicitly flush the log buffers. */ |
528 | fil_flush(SRV_LOG_SPACE_FIRST_ID); |
529 | |
530 | ut_ad(!srv_log_files_created); |
531 | ut_d(srv_log_files_created = true); |
532 | |
533 | DBUG_EXECUTE_IF("innodb_log_abort_9" , return(DB_ERROR);); |
534 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_9" )); |
535 | |
536 | /* Close the log files, so that we can rename |
537 | the first one. */ |
538 | fil_close_log_files(false); |
539 | |
540 | /* Rename the first log file, now that a log |
541 | checkpoint has been created. */ |
542 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , 0); |
543 | |
544 | ib::info() << "Renaming log file " << logfile0 << " to " |
545 | << logfilename; |
546 | |
547 | log_mutex_enter(); |
548 | ut_ad(strlen(logfile0) == 2 + strlen(logfilename)); |
549 | dberr_t err = os_file_rename( |
550 | innodb_log_file_key, logfile0, logfilename) |
551 | ? DB_SUCCESS : DB_ERROR; |
552 | |
553 | /* Replace the first file with ib_logfile0. */ |
554 | strcpy(logfile0, logfilename); |
555 | log_mutex_exit(); |
556 | |
557 | DBUG_EXECUTE_IF("innodb_log_abort_10" , err = DB_ERROR;); |
558 | |
559 | if (err == DB_SUCCESS) { |
560 | fil_open_log_and_system_tablespace_files(); |
561 | ib::info() << "New log files created, LSN=" << lsn; |
562 | } |
563 | |
564 | return(err); |
565 | } |
566 | |
567 | /*********************************************************************//** |
568 | Opens a log file. |
569 | @return DB_SUCCESS or error code */ |
570 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
571 | dberr_t |
572 | open_log_file( |
573 | /*==========*/ |
574 | pfs_os_file_t* file, /*!< out: file handle */ |
575 | const char* name, /*!< in: log file name */ |
576 | os_offset_t* size) /*!< out: file size */ |
577 | { |
578 | bool ret; |
579 | |
580 | *file = os_file_create(innodb_log_file_key, name, |
581 | OS_FILE_OPEN, OS_FILE_AIO, |
582 | OS_LOG_FILE, srv_read_only_mode, &ret); |
583 | if (!ret) { |
584 | ib::error() << "Unable to open '" << name << "'" ; |
585 | return(DB_ERROR); |
586 | } |
587 | |
588 | *size = os_file_get_size(*file); |
589 | |
590 | ret = os_file_close(*file); |
591 | ut_a(ret); |
592 | return(DB_SUCCESS); |
593 | } |
594 | |
595 | /*********************************************************************//** |
596 | Create undo tablespace. |
597 | @return DB_SUCCESS or error code */ |
598 | static |
599 | dberr_t |
600 | srv_undo_tablespace_create( |
601 | /*=======================*/ |
602 | const char* name, /*!< in: tablespace name */ |
603 | ulint size) /*!< in: tablespace size in pages */ |
604 | { |
605 | pfs_os_file_t fh; |
606 | bool ret; |
607 | dberr_t err = DB_SUCCESS; |
608 | |
609 | os_file_create_subdirs_if_needed(name); |
610 | |
611 | fh = os_file_create( |
612 | innodb_data_file_key, |
613 | name, |
614 | srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, |
615 | OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret); |
616 | |
617 | if (srv_read_only_mode && ret) { |
618 | |
619 | ib::info() << name << " opened in read-only mode" ; |
620 | |
621 | } else if (ret == FALSE) { |
622 | if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS |
623 | #ifdef UNIV_AIX |
624 | /* AIX 5.1 after security patch ML7 may have |
625 | errno set to 0 here, which causes our function |
626 | to return 100; work around that AIX problem */ |
627 | && os_file_get_last_error(false) != 100 |
628 | #endif /* UNIV_AIX */ |
629 | ) { |
630 | ib::error() << "Can't create UNDO tablespace " |
631 | << name; |
632 | } |
633 | err = DB_ERROR; |
634 | } else { |
635 | ut_a(!srv_read_only_mode); |
636 | |
637 | /* We created the data file and now write it full of zeros */ |
638 | |
639 | ib::info() << "Data file " << name << " did not exist: new to" |
640 | " be created" ; |
641 | |
642 | ib::info() << "Setting file " << name << " size to " |
643 | << (size >> (20 - srv_page_size_shift)) << " MB" ; |
644 | |
645 | ib::info() << "Database physically writes the file full: " |
646 | << "wait..." ; |
647 | |
648 | ret = os_file_set_size( |
649 | name, fh, os_offset_t(size) << srv_page_size_shift); |
650 | |
651 | if (!ret) { |
652 | ib::info() << "Error in creating " << name |
653 | << ": probably out of disk space" ; |
654 | |
655 | err = DB_ERROR; |
656 | } |
657 | |
658 | os_file_close(fh); |
659 | } |
660 | |
661 | return(err); |
662 | } |
663 | /*********************************************************************//** |
664 | Open an undo tablespace. |
665 | @return DB_SUCCESS or error code */ |
666 | static |
667 | dberr_t |
668 | srv_undo_tablespace_open( |
669 | /*=====================*/ |
670 | const char* name, /*!< in: tablespace file name */ |
671 | ulint space_id) /*!< in: tablespace id */ |
672 | { |
673 | pfs_os_file_t fh; |
674 | bool ret; |
675 | dberr_t err = DB_ERROR; |
676 | char undo_name[sizeof "innodb_undo000" ]; |
677 | |
678 | snprintf(undo_name, sizeof(undo_name), |
679 | "innodb_undo%03u" , static_cast<unsigned>(space_id)); |
680 | |
681 | if (!srv_file_check_mode(name)) { |
682 | ib::error() << "UNDO tablespaces must be " << |
683 | (srv_read_only_mode ? "writable" : "readable" ) << "!" ; |
684 | |
685 | return(DB_ERROR); |
686 | } |
687 | |
688 | fh = os_file_create( |
689 | innodb_data_file_key, name, |
690 | OS_FILE_OPEN_RETRY |
691 | | OS_FILE_ON_ERROR_NO_EXIT |
692 | | OS_FILE_ON_ERROR_SILENT, |
693 | OS_FILE_NORMAL, |
694 | OS_DATA_FILE, |
695 | srv_read_only_mode, |
696 | &ret); |
697 | |
698 | /* If the file open was successful then load the tablespace. */ |
699 | |
700 | if (ret) { |
701 | os_offset_t size; |
702 | fil_space_t* space; |
703 | |
704 | size = os_file_get_size(fh); |
705 | ut_a(size != (os_offset_t) -1); |
706 | |
707 | ret = os_file_close(fh); |
708 | ut_a(ret); |
709 | |
710 | /* Load the tablespace into InnoDB's internal |
711 | data structures. */ |
712 | |
713 | /* We set the biggest space id to the undo tablespace |
714 | because InnoDB hasn't opened any other tablespace apart |
715 | from the system tablespace. */ |
716 | |
717 | fil_set_max_space_id_if_bigger(space_id); |
718 | |
719 | space = fil_space_create( |
720 | undo_name, space_id, FSP_FLAGS_PAGE_SSIZE(), |
721 | FIL_TYPE_TABLESPACE, NULL); |
722 | |
723 | ut_a(fil_validate()); |
724 | ut_a(space); |
725 | |
726 | os_offset_t n_pages = size >> srv_page_size_shift; |
727 | |
728 | /* On 32-bit platforms, ulint is 32 bits and os_offset_t |
729 | is 64 bits. It is OK to cast the n_pages to ulint because |
730 | the unit has been scaled to pages and page number is always |
731 | 32 bits. */ |
732 | if (fil_node_create( |
733 | name, (ulint) n_pages, space, false, TRUE)) { |
734 | |
735 | err = DB_SUCCESS; |
736 | } |
737 | } |
738 | |
739 | return(err); |
740 | } |
741 | |
742 | /** Check if undo tablespaces and redo log files exist before creating a |
743 | new system tablespace |
744 | @retval DB_SUCCESS if all undo and redo logs are not found |
745 | @retval DB_ERROR if any undo and redo logs are found */ |
746 | static |
747 | dberr_t |
748 | srv_check_undo_redo_logs_exists() |
749 | { |
750 | bool ret; |
751 | pfs_os_file_t fh; |
752 | char name[OS_FILE_MAX_PATH]; |
753 | |
754 | /* Check if any undo tablespaces exist */ |
755 | for (ulint i = 1; i <= srv_undo_tablespaces; ++i) { |
756 | |
757 | snprintf( |
758 | name, sizeof(name), |
759 | "%s%cundo%03zu" , |
760 | srv_undo_dir, OS_PATH_SEPARATOR, |
761 | i); |
762 | |
763 | fh = os_file_create( |
764 | innodb_data_file_key, name, |
765 | OS_FILE_OPEN_RETRY |
766 | | OS_FILE_ON_ERROR_NO_EXIT |
767 | | OS_FILE_ON_ERROR_SILENT, |
768 | OS_FILE_NORMAL, |
769 | OS_DATA_FILE, |
770 | srv_read_only_mode, |
771 | &ret); |
772 | |
773 | if (ret) { |
774 | os_file_close(fh); |
775 | ib::error() |
776 | << "undo tablespace '" << name << "' exists." |
777 | " Creating system tablespace with existing undo" |
778 | " tablespaces is not supported. Please delete" |
779 | " all undo tablespaces before creating new" |
780 | " system tablespace." ; |
781 | return(DB_ERROR); |
782 | } |
783 | } |
784 | |
785 | /* Check if any redo log files exist */ |
786 | char logfilename[OS_FILE_MAX_PATH]; |
787 | size_t dirnamelen = strlen(srv_log_group_home_dir); |
788 | memcpy(logfilename, srv_log_group_home_dir, dirnamelen); |
789 | |
790 | for (unsigned i = 0; i < srv_n_log_files; i++) { |
791 | sprintf(logfilename + dirnamelen, |
792 | "ib_logfile%u" , i); |
793 | |
794 | fh = os_file_create( |
795 | innodb_log_file_key, logfilename, |
796 | OS_FILE_OPEN_RETRY |
797 | | OS_FILE_ON_ERROR_NO_EXIT |
798 | | OS_FILE_ON_ERROR_SILENT, |
799 | OS_FILE_NORMAL, |
800 | OS_LOG_FILE, |
801 | srv_read_only_mode, |
802 | &ret); |
803 | |
804 | if (ret) { |
805 | os_file_close(fh); |
806 | ib::error() << "redo log file '" << logfilename |
807 | << "' exists. Creating system tablespace with" |
808 | " existing redo log files is not recommended." |
809 | " Please delete all redo log files before" |
810 | " creating new system tablespace." ; |
811 | return(DB_ERROR); |
812 | } |
813 | } |
814 | |
815 | return(DB_SUCCESS); |
816 | } |
817 | |
818 | undo::undo_spaces_t undo::Truncate::s_fix_up_spaces; |
819 | |
820 | /** Open the configured number of dedicated undo tablespaces. |
821 | @param[in] create_new_db whether the database is being initialized |
822 | @return DB_SUCCESS or error code */ |
823 | dberr_t |
824 | srv_undo_tablespaces_init(bool create_new_db) |
825 | { |
826 | ulint i; |
827 | dberr_t err = DB_SUCCESS; |
828 | ulint prev_space_id = 0; |
829 | ulint n_undo_tablespaces; |
830 | ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1]; |
831 | |
832 | srv_undo_tablespaces_open = 0; |
833 | |
834 | ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS); |
835 | ut_a(!create_new_db || srv_operation == SRV_OPERATION_NORMAL); |
836 | |
837 | memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids)); |
838 | |
839 | /* Create the undo spaces only if we are creating a new |
840 | instance. We don't allow creating of new undo tablespaces |
841 | in an existing instance (yet). This restriction exists because |
842 | we check in several places for SYSTEM tablespaces to be less than |
843 | the min of user defined tablespace ids. Once we implement saving |
844 | the location of the undo tablespaces and their space ids this |
845 | restriction will/should be lifted. */ |
846 | |
847 | for (i = 0; create_new_db && i < srv_undo_tablespaces; ++i) { |
848 | char name[OS_FILE_MAX_PATH]; |
849 | ulint space_id = i + 1; |
850 | |
851 | DBUG_EXECUTE_IF("innodb_undo_upgrade" , |
852 | space_id = i + 3;); |
853 | |
854 | snprintf( |
855 | name, sizeof(name), |
856 | "%s%cundo%03zu" , |
857 | srv_undo_dir, OS_PATH_SEPARATOR, space_id); |
858 | |
859 | if (i == 0) { |
860 | srv_undo_space_id_start = space_id; |
861 | prev_space_id = srv_undo_space_id_start - 1; |
862 | } |
863 | |
864 | undo_tablespace_ids[i] = space_id; |
865 | |
866 | err = srv_undo_tablespace_create( |
867 | name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); |
868 | |
869 | if (err != DB_SUCCESS) { |
870 | ib::error() << "Could not create undo tablespace '" |
871 | << name << "'." ; |
872 | return(err); |
873 | } |
874 | } |
875 | |
876 | /* Get the tablespace ids of all the undo segments excluding |
877 | the system tablespace (0). If we are creating a new instance then |
878 | we build the undo_tablespace_ids ourselves since they don't |
879 | already exist. */ |
880 | n_undo_tablespaces = create_new_db |
881 | || srv_operation == SRV_OPERATION_BACKUP |
882 | || srv_operation == SRV_OPERATION_RESTORE_DELTA |
883 | ? srv_undo_tablespaces |
884 | : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids); |
885 | srv_undo_tablespaces_active = srv_undo_tablespaces; |
886 | |
887 | switch (srv_operation) { |
888 | case SRV_OPERATION_RESTORE_DELTA: |
889 | case SRV_OPERATION_BACKUP: |
890 | for (i = 0; i < n_undo_tablespaces; i++) { |
891 | undo_tablespace_ids[i] = i + srv_undo_space_id_start; |
892 | } |
893 | |
894 | prev_space_id = srv_undo_space_id_start - 1; |
895 | break; |
896 | case SRV_OPERATION_NORMAL: |
897 | if (create_new_db) { |
898 | break; |
899 | } |
900 | /* fall through */ |
901 | case SRV_OPERATION_RESTORE: |
902 | case SRV_OPERATION_RESTORE_EXPORT: |
903 | ut_ad(!create_new_db); |
904 | |
905 | /* Check if any of the UNDO tablespace needs fix-up because |
906 | server crashed while truncate was active on UNDO tablespace.*/ |
907 | for (i = 0; i < n_undo_tablespaces; ++i) { |
908 | |
909 | undo::Truncate undo_trunc; |
910 | |
911 | if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) { |
912 | |
913 | char name[OS_FILE_MAX_PATH]; |
914 | |
915 | snprintf(name, sizeof(name), |
916 | "%s%cundo%03zu" , |
917 | srv_undo_dir, OS_PATH_SEPARATOR, |
918 | undo_tablespace_ids[i]); |
919 | |
920 | os_file_delete(innodb_data_file_key, name); |
921 | |
922 | err = srv_undo_tablespace_create( |
923 | name, |
924 | SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); |
925 | |
926 | if (err != DB_SUCCESS) { |
927 | ib::error() << "Could not fix-up undo " |
928 | " tablespace truncate '" |
929 | << name << "'." ; |
930 | return(err); |
931 | } |
932 | |
933 | undo::Truncate::s_fix_up_spaces.push_back( |
934 | undo_tablespace_ids[i]); |
935 | } |
936 | } |
937 | break; |
938 | } |
939 | |
940 | /* Open all the undo tablespaces that are currently in use. If we |
941 | fail to open any of these it is a fatal error. The tablespace ids |
942 | should be contiguous. It is a fatal error because they are required |
943 | for recovery and are referenced by the UNDO logs (a.k.a RBS). */ |
944 | |
945 | for (i = 0; i < n_undo_tablespaces; ++i) { |
946 | char name[OS_FILE_MAX_PATH]; |
947 | |
948 | snprintf( |
949 | name, sizeof(name), |
950 | "%s%cundo%03zu" , |
951 | srv_undo_dir, OS_PATH_SEPARATOR, |
952 | undo_tablespace_ids[i]); |
953 | |
954 | /* Should be no gaps in undo tablespace ids. */ |
955 | ut_a(!i || prev_space_id + 1 == undo_tablespace_ids[i]); |
956 | |
957 | /* The system space id should not be in this array. */ |
958 | ut_a(undo_tablespace_ids[i] != 0); |
959 | ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); |
960 | |
961 | err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); |
962 | |
963 | if (err != DB_SUCCESS) { |
964 | ib::error() << "Unable to open undo tablespace '" |
965 | << name << "'." ; |
966 | return(err); |
967 | } |
968 | |
969 | prev_space_id = undo_tablespace_ids[i]; |
970 | |
971 | /* Note the first undo tablespace id in case of |
972 | no active undo tablespace. */ |
973 | if (0 == srv_undo_tablespaces_open++) { |
974 | srv_undo_space_id_start = undo_tablespace_ids[i]; |
975 | } |
976 | } |
977 | |
978 | /* Open any extra unused undo tablespaces. These must be contiguous. |
979 | We stop at the first failure. These are undo tablespaces that are |
980 | not in use and therefore not required by recovery. We only check |
981 | that there are no gaps. */ |
982 | |
983 | for (i = prev_space_id + 1; |
984 | i < srv_undo_space_id_start + TRX_SYS_N_RSEGS; ++i) { |
985 | char name[OS_FILE_MAX_PATH]; |
986 | |
987 | snprintf( |
988 | name, sizeof(name), |
989 | "%s%cundo%03zu" , srv_undo_dir, OS_PATH_SEPARATOR, i); |
990 | |
991 | err = srv_undo_tablespace_open(name, i); |
992 | |
993 | if (err != DB_SUCCESS) { |
994 | break; |
995 | } |
996 | |
997 | ++n_undo_tablespaces; |
998 | |
999 | ++srv_undo_tablespaces_open; |
1000 | } |
1001 | |
1002 | /* Initialize srv_undo_space_id_start=0 when there are no |
1003 | dedicated undo tablespaces. */ |
1004 | if (n_undo_tablespaces == 0) { |
1005 | srv_undo_space_id_start = 0; |
1006 | } |
1007 | |
1008 | /* If the user says that there are fewer than what we find we |
1009 | tolerate that discrepancy but not the inverse. Because there could |
1010 | be unused undo tablespaces for future use. */ |
1011 | |
1012 | if (srv_undo_tablespaces > n_undo_tablespaces) { |
1013 | ib::error() << "Expected to open innodb_undo_tablespaces=" |
1014 | << srv_undo_tablespaces |
1015 | << " but was able to find only " |
1016 | << n_undo_tablespaces; |
1017 | |
1018 | return(err != DB_SUCCESS ? err : DB_ERROR); |
1019 | |
1020 | } else if (n_undo_tablespaces > 0) { |
1021 | |
1022 | ib::info() << "Opened " << n_undo_tablespaces |
1023 | << " undo tablespaces" ; |
1024 | |
1025 | if (srv_undo_tablespaces == 0) { |
1026 | ib::warn() << "innodb_undo_tablespaces=0 disables" |
1027 | " dedicated undo log tablespaces" ; |
1028 | } |
1029 | } |
1030 | |
1031 | if (create_new_db) { |
1032 | mtr_t mtr; |
1033 | |
1034 | for (i = 0; i < n_undo_tablespaces; ++i) { |
1035 | mtr.start(); |
1036 | fsp_header_init(fil_space_get(undo_tablespace_ids[i]), |
1037 | SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, |
1038 | &mtr); |
1039 | mtr.commit(); |
1040 | } |
1041 | } |
1042 | |
1043 | if (!undo::Truncate::s_fix_up_spaces.empty()) { |
1044 | |
1045 | /* Step-1: Initialize the tablespace header and rsegs header. */ |
1046 | mtr_t mtr; |
1047 | |
1048 | mtr_start(&mtr); |
1049 | /* Turn off REDO logging. We are in server start mode and fixing |
1050 | UNDO tablespace even before REDO log is read. Let's say we |
1051 | do REDO logging here then this REDO log record will be applied |
1052 | as part of the current recovery process. We surely don't need |
1053 | that as this is fix-up action parallel to REDO logging. */ |
1054 | mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); |
1055 | buf_block_t* = trx_sysf_get(&mtr); |
1056 | if (!sys_header) { |
1057 | mtr.commit(); |
1058 | return DB_CORRUPTION; |
1059 | } |
1060 | |
1061 | for (undo::undo_spaces_t::const_iterator it |
1062 | = undo::Truncate::s_fix_up_spaces.begin(); |
1063 | it != undo::Truncate::s_fix_up_spaces.end(); |
1064 | ++it) { |
1065 | |
1066 | undo::Truncate::add_space_to_trunc_list(*it); |
1067 | |
1068 | fil_space_t* space = fil_space_get(*it); |
1069 | |
1070 | fsp_header_init(space, |
1071 | SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, |
1072 | &mtr); |
1073 | |
1074 | for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) { |
1075 | if (trx_sysf_rseg_get_space(sys_header, i) |
1076 | == *it) { |
1077 | trx_rseg_header_create( |
1078 | space, i, sys_header, &mtr); |
1079 | } |
1080 | } |
1081 | |
1082 | undo::Truncate::clear_trunc_list(); |
1083 | } |
1084 | mtr_commit(&mtr); |
1085 | |
1086 | /* Step-2: Flush the dirty pages from the buffer pool. */ |
1087 | for (undo::undo_spaces_t::const_iterator it |
1088 | = undo::Truncate::s_fix_up_spaces.begin(); |
1089 | it != undo::Truncate::s_fix_up_spaces.end(); |
1090 | ++it) { |
1091 | FlushObserver dummy(fil_system.sys_space, NULL, NULL); |
1092 | buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy); |
1093 | FlushObserver dummy2(fil_space_get(*it), NULL, NULL); |
1094 | buf_LRU_flush_or_remove_pages(*it, &dummy2); |
1095 | |
1096 | /* Remove the truncate redo log file. */ |
1097 | undo::Truncate undo_trunc; |
1098 | undo_trunc.done_logging(*it); |
1099 | } |
1100 | } |
1101 | |
1102 | return(DB_SUCCESS); |
1103 | } |
1104 | |
1105 | /** Create the temporary file tablespace. |
1106 | @param[in] create_new_db whether we are creating a new database |
1107 | @return DB_SUCCESS or error code. */ |
1108 | static |
1109 | dberr_t |
1110 | srv_open_tmp_tablespace(bool create_new_db) |
1111 | { |
1112 | ulint sum_of_new_sizes; |
1113 | |
1114 | /* Will try to remove if there is existing file left-over by last |
1115 | unclean shutdown */ |
1116 | srv_tmp_space.set_sanity_check_status(true); |
1117 | srv_tmp_space.delete_files(); |
1118 | srv_tmp_space.set_ignore_read_only(true); |
1119 | |
1120 | ib::info() << "Creating shared tablespace for temporary tables" ; |
1121 | |
1122 | bool create_new_temp_space; |
1123 | |
1124 | srv_tmp_space.set_space_id(SRV_TMP_SPACE_ID); |
1125 | |
1126 | dberr_t err = srv_tmp_space.check_file_spec( |
1127 | &create_new_temp_space, 12 * 1024 * 1024); |
1128 | |
1129 | if (err == DB_FAIL) { |
1130 | ib::error() << "The innodb_temporary" |
1131 | " data file must be writable!" ; |
1132 | err = DB_ERROR; |
1133 | } else if (err != DB_SUCCESS) { |
1134 | ib::error() << "Could not create the shared innodb_temporary." ; |
1135 | } else if ((err = srv_tmp_space.open_or_create( |
1136 | true, create_new_db, &sum_of_new_sizes, NULL)) |
1137 | != DB_SUCCESS) { |
1138 | ib::error() << "Unable to create the shared innodb_temporary" ; |
1139 | } else if (fil_system.temp_space->open()) { |
1140 | /* Initialize the header page */ |
1141 | mtr_t mtr; |
1142 | mtr.start(); |
1143 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
1144 | fsp_header_init(fil_system.temp_space, |
1145 | srv_tmp_space.get_sum_of_sizes(), |
1146 | &mtr); |
1147 | mtr.commit(); |
1148 | } else { |
1149 | /* This file was just opened in the code above! */ |
1150 | ib::error() << "The innodb_temporary" |
1151 | " data file cannot be re-opened" |
1152 | " after check_file_spec() succeeded!" ; |
1153 | err = DB_ERROR; |
1154 | } |
1155 | |
1156 | return(err); |
1157 | } |
1158 | |
1159 | /****************************************************************//** |
1160 | Set state to indicate start of particular group of threads in InnoDB. */ |
1161 | UNIV_INLINE |
1162 | void |
1163 | srv_start_state_set( |
1164 | /*================*/ |
1165 | srv_start_state_t state) /*!< in: indicate current state of |
1166 | thread startup */ |
1167 | { |
1168 | srv_start_state |= ulint(state); |
1169 | } |
1170 | |
1171 | /****************************************************************//** |
1172 | Check if following group of threads is started. |
1173 | @return true if started */ |
1174 | UNIV_INLINE |
1175 | bool |
1176 | srv_start_state_is_set( |
1177 | /*===================*/ |
1178 | srv_start_state_t state) /*!< in: state to check for */ |
1179 | { |
1180 | return(srv_start_state & ulint(state)); |
1181 | } |
1182 | |
1183 | /** |
1184 | Shutdown all background threads created by InnoDB. */ |
1185 | static |
1186 | void |
1187 | srv_shutdown_all_bg_threads() |
1188 | { |
1189 | ut_ad(!srv_undo_sources); |
1190 | srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; |
1191 | |
1192 | /* All threads end up waiting for certain events. Put those events |
1193 | to the signaled state. Then the threads will exit themselves after |
1194 | os_event_wait(). */ |
1195 | for (uint i = 0; i < 1000; ++i) { |
1196 | /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM |
1197 | HERE OR EARLIER */ |
1198 | |
1199 | if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) { |
1200 | /* a. Let the lock timeout thread exit */ |
1201 | os_event_set(lock_sys.timeout_event); |
1202 | } |
1203 | |
1204 | if (!srv_read_only_mode) { |
1205 | /* b. srv error monitor thread exits automatically, |
1206 | no need to do anything here */ |
1207 | |
1208 | if (srv_start_state_is_set(SRV_START_STATE_MASTER)) { |
1209 | /* c. We wake the master thread so that |
1210 | it exits */ |
1211 | srv_wake_master_thread(); |
1212 | } |
1213 | |
1214 | if (srv_start_state_is_set(SRV_START_STATE_PURGE)) { |
1215 | /* d. Wakeup purge threads. */ |
1216 | srv_purge_wakeup(); |
1217 | } |
1218 | |
1219 | if (srv_n_fil_crypt_threads_started) { |
1220 | os_event_set(fil_crypt_threads_event); |
1221 | } |
1222 | |
1223 | if (log_scrub_thread_active) { |
1224 | os_event_set(log_scrub_event); |
1225 | } |
1226 | } |
1227 | |
1228 | if (srv_start_state_is_set(SRV_START_STATE_IO)) { |
1229 | ut_ad(!srv_read_only_mode); |
1230 | |
1231 | /* e. Exit the i/o threads */ |
1232 | if (recv_sys->flush_start != NULL) { |
1233 | os_event_set(recv_sys->flush_start); |
1234 | } |
1235 | if (recv_sys->flush_end != NULL) { |
1236 | os_event_set(recv_sys->flush_end); |
1237 | } |
1238 | |
1239 | os_event_set(buf_flush_event); |
1240 | } |
1241 | |
1242 | if (!os_thread_count) { |
1243 | return; |
1244 | } |
1245 | |
1246 | switch (srv_operation) { |
1247 | case SRV_OPERATION_BACKUP: |
1248 | case SRV_OPERATION_RESTORE_DELTA: |
1249 | break; |
1250 | case SRV_OPERATION_NORMAL: |
1251 | case SRV_OPERATION_RESTORE: |
1252 | case SRV_OPERATION_RESTORE_EXPORT: |
1253 | if (!buf_page_cleaner_is_active |
1254 | && os_aio_all_slots_free()) { |
1255 | os_aio_wake_all_threads_at_shutdown(); |
1256 | } |
1257 | } |
1258 | |
1259 | os_thread_sleep(100000); |
1260 | } |
1261 | |
1262 | ib::warn() << os_thread_count << " threads created by InnoDB" |
1263 | " had not exited at shutdown!" ; |
1264 | ut_d(os_aio_print_pending_io(stderr)); |
1265 | ut_ad(0); |
1266 | } |
1267 | |
1268 | #ifdef UNIV_DEBUG |
1269 | # define srv_init_abort(_db_err) \ |
1270 | srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err) |
1271 | #else |
1272 | # define srv_init_abort(_db_err) \ |
1273 | srv_init_abort_low(create_new_db, _db_err) |
1274 | #endif /* UNIV_DEBUG */ |
1275 | |
1276 | /** Innobase start-up aborted. Perform cleanup actions. |
1277 | @param[in] create_new_db TRUE if new db is being created |
1278 | @param[in] file File name |
1279 | @param[in] line Line number |
1280 | @param[in] err Reason for aborting InnoDB startup |
1281 | @return DB_SUCCESS or error code. */ |
1282 | MY_ATTRIBUTE((warn_unused_result, nonnull)) |
1283 | static |
1284 | dberr_t |
1285 | srv_init_abort_low( |
1286 | bool create_new_db, |
1287 | #ifdef UNIV_DEBUG |
1288 | const char* file, |
1289 | unsigned line, |
1290 | #endif /* UNIV_DEBUG */ |
1291 | dberr_t err) |
1292 | { |
1293 | if (create_new_db) { |
1294 | ib::error() << "Database creation was aborted" |
1295 | #ifdef UNIV_DEBUG |
1296 | " at " << innobase_basename(file) << "[" << line << "]" |
1297 | #endif /* UNIV_DEBUG */ |
1298 | " with error " << ut_strerr(err) << ". You may need" |
1299 | " to delete the ibdata1 file before trying to start" |
1300 | " up again." ; |
1301 | } else { |
1302 | ib::error() << "Plugin initialization aborted" |
1303 | #ifdef UNIV_DEBUG |
1304 | " at " << innobase_basename(file) << "[" << line << "]" |
1305 | #endif /* UNIV_DEBUG */ |
1306 | " with error " << ut_strerr(err); |
1307 | } |
1308 | |
1309 | srv_shutdown_bg_undo_sources(); |
1310 | srv_shutdown_all_bg_threads(); |
1311 | return(err); |
1312 | } |
1313 | |
1314 | /** Prepare to delete the redo log files. Flush the dirty pages from all the |
1315 | buffer pools. Flush the redo log buffer to the redo log file. |
1316 | @param[in] n_files number of old redo log files |
1317 | @return lsn upto which data pages have been flushed. */ |
1318 | static |
1319 | lsn_t |
1320 | srv_prepare_to_delete_redo_log_files( |
1321 | ulint n_files) |
1322 | { |
1323 | DBUG_ENTER("srv_prepare_to_delete_redo_log_files" ); |
1324 | |
1325 | lsn_t flushed_lsn; |
1326 | ulint pending_io = 0; |
1327 | ulint count = 0; |
1328 | |
1329 | do { |
1330 | /* Clean the buffer pool. */ |
1331 | buf_flush_sync_all_buf_pools(); |
1332 | |
1333 | DBUG_EXECUTE_IF("innodb_log_abort_1" , DBUG_RETURN(0);); |
1334 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_1" )); |
1335 | |
1336 | log_mutex_enter(); |
1337 | |
1338 | fil_names_clear(log_sys.lsn, false); |
1339 | |
1340 | flushed_lsn = log_sys.lsn; |
1341 | |
1342 | { |
1343 | ib::info info; |
1344 | if (srv_log_file_size == 0 |
1345 | || (log_sys.log.format |
1346 | & ~LOG_HEADER_FORMAT_ENCRYPTED) |
1347 | != LOG_HEADER_FORMAT_CURRENT) { |
1348 | info << "Upgrading redo log: " ; |
1349 | } else if (n_files != srv_n_log_files |
1350 | || srv_log_file_size |
1351 | != srv_log_file_size_requested) { |
1352 | if (srv_encrypt_log |
1353 | == (my_bool)log_sys.is_encrypted()) { |
1354 | info << (srv_encrypt_log |
1355 | ? "Resizing encrypted" |
1356 | : "Resizing" ); |
1357 | } else if (srv_encrypt_log) { |
1358 | info << "Encrypting and resizing" ; |
1359 | } else { |
1360 | info << "Removing encryption" |
1361 | " and resizing" ; |
1362 | } |
1363 | |
1364 | info << " redo log from " << n_files |
1365 | << "*" << srv_log_file_size << " to " ; |
1366 | } else if (srv_encrypt_log) { |
1367 | info << "Encrypting redo log: " ; |
1368 | } else { |
1369 | info << "Removing redo log encryption: " ; |
1370 | } |
1371 | |
1372 | info << srv_n_log_files << "*" |
1373 | << srv_log_file_size_requested |
1374 | << " bytes; LSN=" << flushed_lsn; |
1375 | } |
1376 | |
1377 | srv_start_lsn = flushed_lsn; |
1378 | /* Flush the old log files. */ |
1379 | log_mutex_exit(); |
1380 | |
1381 | log_write_up_to(flushed_lsn, true); |
1382 | |
1383 | /* If innodb_flush_method=O_DSYNC, |
1384 | we need to explicitly flush the log buffers. */ |
1385 | fil_flush(SRV_LOG_SPACE_FIRST_ID); |
1386 | |
1387 | ut_ad(flushed_lsn == log_get_lsn()); |
1388 | |
1389 | /* Check if the buffer pools are clean. If not |
1390 | retry till it is clean. */ |
1391 | pending_io = buf_pool_check_no_pending_io(); |
1392 | |
1393 | if (pending_io > 0) { |
1394 | count++; |
1395 | /* Print a message every 60 seconds if we |
1396 | are waiting to clean the buffer pools */ |
1397 | if (srv_print_verbose_log && count > 600) { |
1398 | ib::info() << "Waiting for " |
1399 | << pending_io << " buffer " |
1400 | << "page I/Os to complete" ; |
1401 | count = 0; |
1402 | } |
1403 | } |
1404 | os_thread_sleep(100000); |
1405 | |
1406 | } while (buf_pool_check_no_pending_io()); |
1407 | |
1408 | DBUG_RETURN(flushed_lsn); |
1409 | } |
1410 | |
1411 | /** Start InnoDB. |
1412 | @param[in] create_new_db whether to create a new database |
1413 | @return DB_SUCCESS or error code */ |
1414 | dberr_t srv_start(bool create_new_db) |
1415 | { |
1416 | lsn_t flushed_lsn; |
1417 | dberr_t err = DB_SUCCESS; |
1418 | ulint srv_n_log_files_found = srv_n_log_files; |
1419 | mtr_t mtr; |
1420 | char logfilename[10000]; |
1421 | char* logfile0 = NULL; |
1422 | size_t dirnamelen; |
1423 | unsigned i = 0; |
1424 | |
1425 | ut_ad(srv_operation == SRV_OPERATION_NORMAL |
1426 | || srv_operation == SRV_OPERATION_RESTORE |
1427 | || srv_operation == SRV_OPERATION_RESTORE_EXPORT); |
1428 | |
1429 | |
1430 | if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { |
1431 | srv_read_only_mode = true; |
1432 | } |
1433 | |
1434 | high_level_read_only = srv_read_only_mode |
1435 | || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO |
1436 | || srv_sys_space.created_new_raw(); |
1437 | |
1438 | /* Reset the start state. */ |
1439 | srv_start_state = SRV_START_STATE_NONE; |
1440 | |
1441 | compile_time_assert(sizeof(ulint) == sizeof(void*)); |
1442 | |
1443 | #ifdef UNIV_DEBUG |
1444 | ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!" ; |
1445 | #endif |
1446 | |
1447 | #ifdef UNIV_IBUF_DEBUG |
1448 | ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!" ; |
1449 | # ifdef UNIV_IBUF_COUNT_DEBUG |
1450 | ib::info() << "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!" ; |
1451 | ib::error() << "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG" ; |
1452 | # endif |
1453 | #endif |
1454 | |
1455 | #ifdef UNIV_LOG_LSN_DEBUG |
1456 | ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!" ; |
1457 | #endif /* UNIV_LOG_LSN_DEBUG */ |
1458 | |
1459 | #if defined(COMPILER_HINTS_ENABLED) |
1460 | ib::info() << "Compiler hints enabled." ; |
1461 | #endif /* defined(COMPILER_HINTS_ENABLED) */ |
1462 | |
1463 | #ifdef _WIN32 |
1464 | ib::info() << "Mutexes and rw_locks use Windows interlocked functions" ; |
1465 | #else |
1466 | ib::info() << "Mutexes and rw_locks use GCC atomic builtins" ; |
1467 | #endif |
1468 | ib::info() << MUTEX_TYPE; |
1469 | |
1470 | ib::info() << "Compressed tables use zlib " ZLIB_VERSION |
1471 | #ifdef UNIV_ZIP_DEBUG |
1472 | " with validation" |
1473 | #endif /* UNIV_ZIP_DEBUG */ |
1474 | ; |
1475 | #ifdef UNIV_ZIP_COPY |
1476 | ib::info() << "and extra copying" ; |
1477 | #endif /* UNIV_ZIP_COPY */ |
1478 | |
1479 | /* Since InnoDB does not currently clean up all its internal data |
1480 | structures in MySQL Embedded Server Library server_end(), we |
1481 | print an error message if someone tries to start up InnoDB a |
1482 | second time during the process lifetime. */ |
1483 | |
1484 | if (srv_start_has_been_called) { |
1485 | ib::error() << "Startup called second time" |
1486 | " during the process lifetime." |
1487 | " In the MySQL Embedded Server Library" |
1488 | " you cannot call server_init() more than" |
1489 | " once during the process lifetime." ; |
1490 | } |
1491 | |
1492 | srv_start_has_been_called = true; |
1493 | |
1494 | srv_is_being_started = true; |
1495 | |
1496 | /* Register performance schema stages before any real work has been |
1497 | started which may need to be instrumented. */ |
1498 | mysql_stage_register("innodb" , srv_stages, UT_ARR_SIZE(srv_stages)); |
1499 | |
1500 | /* Set the maximum number of threads which can wait for a semaphore |
1501 | inside InnoDB: this is the 'sync wait array' size, as well as the |
1502 | maximum number of threads that can wait in the 'srv_conc array' for |
1503 | their time to enter InnoDB. */ |
1504 | |
1505 | srv_max_n_threads = 1 /* io_ibuf_thread */ |
1506 | + 1 /* io_log_thread */ |
1507 | + 1 /* lock_wait_timeout_thread */ |
1508 | + 1 /* srv_error_monitor_thread */ |
1509 | + 1 /* srv_monitor_thread */ |
1510 | + 1 /* srv_master_thread */ |
1511 | + 1 /* srv_purge_coordinator_thread */ |
1512 | + 1 /* buf_dump_thread */ |
1513 | + 1 /* dict_stats_thread */ |
1514 | + 1 /* fts_optimize_thread */ |
1515 | + 1 /* recv_writer_thread */ |
1516 | + 1 /* trx_rollback_all_recovered */ |
1517 | + 128 /* added as margin, for use of |
1518 | InnoDB Memcached etc. */ |
1519 | + max_connections |
1520 | + srv_n_read_io_threads |
1521 | + srv_n_write_io_threads |
1522 | + srv_n_purge_threads |
1523 | + srv_n_page_cleaners |
1524 | /* FTS Parallel Sort */ |
1525 | + fts_sort_pll_degree * FTS_NUM_AUX_INDEX |
1526 | * max_connections; |
1527 | |
1528 | srv_boot(); |
1529 | |
1530 | ib::info() << ut_crc32_implementation; |
1531 | |
1532 | if (!srv_read_only_mode) { |
1533 | |
1534 | mutex_create(LATCH_ID_SRV_MONITOR_FILE, |
1535 | &srv_monitor_file_mutex); |
1536 | |
1537 | if (srv_innodb_status) { |
1538 | |
1539 | srv_monitor_file_name = static_cast<char*>( |
1540 | ut_malloc_nokey( |
1541 | strlen(fil_path_to_mysql_datadir) |
1542 | + 20 + sizeof "/innodb_status." )); |
1543 | |
1544 | sprintf(srv_monitor_file_name, |
1545 | "%s/innodb_status." ULINTPF, |
1546 | fil_path_to_mysql_datadir, |
1547 | os_proc_get_number()); |
1548 | |
1549 | srv_monitor_file = fopen(srv_monitor_file_name, "w+" ); |
1550 | |
1551 | if (!srv_monitor_file) { |
1552 | ib::error() << "Unable to create " |
1553 | << srv_monitor_file_name << ": " |
1554 | << strerror(errno); |
1555 | if (err == DB_SUCCESS) { |
1556 | err = DB_ERROR; |
1557 | } |
1558 | } |
1559 | } else { |
1560 | |
1561 | srv_monitor_file_name = NULL; |
1562 | srv_monitor_file = os_file_create_tmpfile(); |
1563 | |
1564 | if (!srv_monitor_file && err == DB_SUCCESS) { |
1565 | err = DB_ERROR; |
1566 | } |
1567 | } |
1568 | |
1569 | mutex_create(LATCH_ID_SRV_MISC_TMPFILE, |
1570 | &srv_misc_tmpfile_mutex); |
1571 | |
1572 | srv_misc_tmpfile = os_file_create_tmpfile(); |
1573 | |
1574 | if (!srv_misc_tmpfile && err == DB_SUCCESS) { |
1575 | err = DB_ERROR; |
1576 | } |
1577 | } |
1578 | |
1579 | if (err != DB_SUCCESS) { |
1580 | return(srv_init_abort(err)); |
1581 | } |
1582 | |
1583 | srv_n_file_io_threads = srv_n_read_io_threads; |
1584 | |
1585 | srv_n_file_io_threads += srv_n_write_io_threads; |
1586 | |
1587 | if (!srv_read_only_mode) { |
1588 | /* Add the log and ibuf IO threads. */ |
1589 | srv_n_file_io_threads += 2; |
1590 | } else { |
1591 | ib::info() << "Disabling background log and ibuf IO write" |
1592 | << " threads." ; |
1593 | } |
1594 | |
1595 | ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS); |
1596 | |
1597 | if (!os_aio_init(srv_n_read_io_threads, |
1598 | srv_n_write_io_threads, |
1599 | SRV_MAX_N_PENDING_SYNC_IOS)) { |
1600 | |
1601 | ib::error() << "Cannot initialize AIO sub-system" ; |
1602 | |
1603 | return(srv_init_abort(DB_ERROR)); |
1604 | } |
1605 | |
1606 | fil_system.create(srv_file_per_table ? 50000 : 5000); |
1607 | |
1608 | double size; |
1609 | char unit; |
1610 | |
1611 | if (srv_buf_pool_size >= 1024 * 1024 * 1024) { |
1612 | size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024); |
1613 | unit = 'G'; |
1614 | } else { |
1615 | size = ((double) srv_buf_pool_size) / (1024 * 1024); |
1616 | unit = 'M'; |
1617 | } |
1618 | |
1619 | double chunk_size; |
1620 | char chunk_unit; |
1621 | |
1622 | if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) { |
1623 | chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024; |
1624 | chunk_unit = 'G'; |
1625 | } else { |
1626 | chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024; |
1627 | chunk_unit = 'M'; |
1628 | } |
1629 | |
1630 | ib::info() << "Initializing buffer pool, total size = " |
1631 | << size << unit << ", instances = " << srv_buf_pool_instances |
1632 | << ", chunk size = " << chunk_size << chunk_unit; |
1633 | |
1634 | err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances); |
1635 | |
1636 | if (err != DB_SUCCESS) { |
1637 | ib::error() << "Cannot allocate memory for the buffer pool" ; |
1638 | |
1639 | return(srv_init_abort(DB_ERROR)); |
1640 | } |
1641 | |
1642 | ib::info() << "Completed initialization of buffer pool" ; |
1643 | |
1644 | #ifdef UNIV_DEBUG |
1645 | /* We have observed deadlocks with a 5MB buffer pool but |
1646 | the actual lower limit could very well be a little higher. */ |
1647 | |
1648 | if (srv_buf_pool_size <= 5 * 1024 * 1024) { |
1649 | |
1650 | ib::info() << "Small buffer pool size (" |
1651 | << srv_buf_pool_size / 1024 / 1024 |
1652 | << "M), the flst_validate() debug function can cause a" |
1653 | << " deadlock if the buffer pool fills up." ; |
1654 | } |
1655 | #endif /* UNIV_DEBUG */ |
1656 | |
1657 | log_sys.create(); |
1658 | recv_sys_init(); |
1659 | lock_sys.create(srv_lock_table_size); |
1660 | |
1661 | /* Create i/o-handler threads: */ |
1662 | |
1663 | for (ulint t = 0; t < srv_n_file_io_threads; ++t) { |
1664 | |
1665 | n[t] = t; |
1666 | |
1667 | thread_handles[t] = os_thread_create(io_handler_thread, n + t, thread_ids + t); |
1668 | thread_started[t] = true; |
1669 | } |
1670 | |
1671 | if (!srv_read_only_mode) { |
1672 | buf_flush_page_cleaner_init(); |
1673 | |
1674 | buf_page_cleaner_is_active = true; |
1675 | os_thread_create(buf_flush_page_cleaner_coordinator, |
1676 | NULL, NULL); |
1677 | |
1678 | /* Create page cleaner workers if needed. For example |
1679 | mariabackup could set srv_n_page_cleaners = 0. */ |
1680 | if (srv_n_page_cleaners > 1) { |
1681 | buf_flush_set_page_cleaner_thread_cnt(srv_n_page_cleaners); |
1682 | } |
1683 | |
1684 | #ifdef UNIV_LINUX |
1685 | /* Wait for the setpriority() call to finish. */ |
1686 | os_event_wait(recv_sys->flush_end); |
1687 | #endif /* UNIV_LINUX */ |
1688 | srv_start_state_set(SRV_START_STATE_IO); |
1689 | } |
1690 | |
1691 | srv_startup_is_before_trx_rollback_phase = !create_new_db; |
1692 | |
1693 | /* Check if undo tablespaces and redo log files exist before creating |
1694 | a new system tablespace */ |
1695 | if (create_new_db) { |
1696 | err = srv_check_undo_redo_logs_exists(); |
1697 | if (err != DB_SUCCESS) { |
1698 | return(srv_init_abort(DB_ERROR)); |
1699 | } |
1700 | recv_sys_debug_free(); |
1701 | } |
1702 | |
1703 | /* Open or create the data files. */ |
1704 | ulint sum_of_new_sizes; |
1705 | |
1706 | err = srv_sys_space.open_or_create( |
1707 | false, create_new_db, &sum_of_new_sizes, &flushed_lsn); |
1708 | |
1709 | switch (err) { |
1710 | case DB_SUCCESS: |
1711 | break; |
1712 | case DB_CANNOT_OPEN_FILE: |
1713 | ib::error() |
1714 | << "Could not open or create the system tablespace. If" |
1715 | " you tried to add new data files to the system" |
1716 | " tablespace, and it failed here, you should now" |
1717 | " edit innodb_data_file_path in my.cnf back to what" |
1718 | " it was, and remove the new ibdata files InnoDB" |
1719 | " created in this failed attempt. InnoDB only wrote" |
1720 | " those files full of zeros, but did not yet use" |
1721 | " them in any way. But be careful: do not remove" |
1722 | " old data files which contain your precious data!" ; |
1723 | /* fall through */ |
1724 | default: |
1725 | /* Other errors might come from Datafile::validate_first_page() */ |
1726 | return(srv_init_abort(err)); |
1727 | } |
1728 | |
1729 | dirnamelen = strlen(srv_log_group_home_dir); |
1730 | ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile" ); |
1731 | memcpy(logfilename, srv_log_group_home_dir, dirnamelen); |
1732 | |
1733 | /* Add a path separator if needed. */ |
1734 | if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) { |
1735 | logfilename[dirnamelen++] = OS_PATH_SEPARATOR; |
1736 | } |
1737 | |
1738 | srv_log_file_size_requested = srv_log_file_size; |
1739 | |
1740 | if (create_new_db) { |
1741 | |
1742 | buf_flush_sync_all_buf_pools(); |
1743 | |
1744 | flushed_lsn = log_get_lsn(); |
1745 | |
1746 | err = create_log_files( |
1747 | logfilename, dirnamelen, flushed_lsn, logfile0); |
1748 | |
1749 | if (err != DB_SUCCESS) { |
1750 | return(srv_init_abort(err)); |
1751 | } |
1752 | } else { |
1753 | for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { |
1754 | os_offset_t size; |
1755 | os_file_stat_t stat_info; |
1756 | |
1757 | sprintf(logfilename + dirnamelen, |
1758 | "ib_logfile%u" , i); |
1759 | |
1760 | err = os_file_get_status( |
1761 | logfilename, &stat_info, false, |
1762 | srv_read_only_mode); |
1763 | |
1764 | if (err == DB_NOT_FOUND) { |
1765 | if (i == 0) { |
1766 | if (srv_operation |
1767 | == SRV_OPERATION_RESTORE |
1768 | || srv_operation |
1769 | == SRV_OPERATION_RESTORE_EXPORT) { |
1770 | return(DB_SUCCESS); |
1771 | } |
1772 | if (flushed_lsn |
1773 | < static_cast<lsn_t>(1000)) { |
1774 | ib::error() |
1775 | << "Cannot create" |
1776 | " log files because" |
1777 | " data files are" |
1778 | " corrupt or the" |
1779 | " database was not" |
1780 | " shut down cleanly" |
1781 | " after creating" |
1782 | " the data files." ; |
1783 | return(srv_init_abort( |
1784 | DB_ERROR)); |
1785 | } |
1786 | |
1787 | err = create_log_files( |
1788 | logfilename, dirnamelen, |
1789 | flushed_lsn, logfile0); |
1790 | |
1791 | if (err == DB_SUCCESS) { |
1792 | err = create_log_files_rename( |
1793 | logfilename, |
1794 | dirnamelen, |
1795 | flushed_lsn, logfile0); |
1796 | } |
1797 | |
1798 | if (err != DB_SUCCESS) { |
1799 | return(srv_init_abort(err)); |
1800 | } |
1801 | |
1802 | /* Suppress the message about |
1803 | crash recovery. */ |
1804 | flushed_lsn = log_get_lsn(); |
1805 | goto files_checked; |
1806 | } |
1807 | |
1808 | /* opened all files */ |
1809 | break; |
1810 | } |
1811 | |
1812 | if (!srv_file_check_mode(logfilename)) { |
1813 | return(srv_init_abort(DB_ERROR)); |
1814 | } |
1815 | |
1816 | err = open_log_file(&files[i], logfilename, &size); |
1817 | |
1818 | if (err != DB_SUCCESS) { |
1819 | return(srv_init_abort(err)); |
1820 | } |
1821 | |
1822 | ut_a(size != (os_offset_t) -1); |
1823 | |
1824 | if (size & (OS_FILE_LOG_BLOCK_SIZE - 1)) { |
1825 | |
1826 | ib::error() << "Log file " << logfilename |
1827 | << " size " << size << " is not a" |
1828 | " multiple of 512 bytes" ; |
1829 | return(srv_init_abort(DB_ERROR)); |
1830 | } |
1831 | |
1832 | if (i == 0) { |
1833 | if (size == 0 |
1834 | && (srv_operation |
1835 | == SRV_OPERATION_RESTORE |
1836 | || srv_operation |
1837 | == SRV_OPERATION_RESTORE_EXPORT)) { |
1838 | /* Tolerate an empty ib_logfile0 |
1839 | from a previous run of |
1840 | mariabackup --prepare. */ |
1841 | return(DB_SUCCESS); |
1842 | } |
1843 | /* The first log file must consist of |
1844 | at least the following 512-byte pages: |
1845 | header, checkpoint page 1, empty, |
1846 | checkpoint page 2, redo log page(s) */ |
1847 | if (size <= OS_FILE_LOG_BLOCK_SIZE * 4) { |
1848 | ib::error() << "Log file " |
1849 | << logfilename << " size " |
1850 | << size << " is too small" ; |
1851 | return(srv_init_abort(DB_ERROR)); |
1852 | } |
1853 | srv_log_file_size = size; |
1854 | } else if (size != srv_log_file_size) { |
1855 | |
1856 | ib::error() << "Log file " << logfilename |
1857 | << " is of different size " << size |
1858 | << " bytes than other log files " |
1859 | << srv_log_file_size << " bytes!" ; |
1860 | return(srv_init_abort(DB_ERROR)); |
1861 | } |
1862 | } |
1863 | |
1864 | srv_n_log_files_found = i; |
1865 | |
1866 | /* Create the in-memory file space objects. */ |
1867 | |
1868 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , 0); |
1869 | |
1870 | /* Disable the doublewrite buffer for log files. */ |
1871 | fil_space_t* log_space = fil_space_create( |
1872 | "innodb_redo_log" , |
1873 | SRV_LOG_SPACE_FIRST_ID, 0, |
1874 | FIL_TYPE_LOG, |
1875 | NULL /* no encryption yet */); |
1876 | |
1877 | ut_a(fil_validate()); |
1878 | ut_a(log_space); |
1879 | |
1880 | ut_a(srv_log_file_size <= 512ULL << 30); |
1881 | |
1882 | const ulint size = 1 + ulint((srv_log_file_size - 1) |
1883 | >> srv_page_size_shift); |
1884 | |
1885 | for (unsigned j = 0; j < srv_n_log_files_found; j++) { |
1886 | sprintf(logfilename + dirnamelen, "ib_logfile%u" , j); |
1887 | |
1888 | if (!fil_node_create(logfilename, size, |
1889 | log_space, false, false)) { |
1890 | return(srv_init_abort(DB_ERROR)); |
1891 | } |
1892 | } |
1893 | |
1894 | log_sys.log.create(srv_n_log_files_found); |
1895 | |
1896 | if (!log_set_capacity(srv_log_file_size_requested)) { |
1897 | return(srv_init_abort(DB_ERROR)); |
1898 | } |
1899 | } |
1900 | |
1901 | files_checked: |
1902 | /* Open all log files and data files in the system |
1903 | tablespace: we keep them open until database |
1904 | shutdown */ |
1905 | |
1906 | fil_open_log_and_system_tablespace_files(); |
1907 | ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug); |
1908 | |
1909 | err = srv_undo_tablespaces_init(create_new_db); |
1910 | |
1911 | /* If the force recovery is set very high then we carry on regardless |
1912 | of all errors. Basically this is fingers crossed mode. */ |
1913 | |
1914 | if (err != DB_SUCCESS |
1915 | && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { |
1916 | |
1917 | return(srv_init_abort(err)); |
1918 | } |
1919 | |
1920 | /* Initialize objects used by dict stats gathering thread, which |
1921 | can also be used by recovery if it tries to drop some table */ |
1922 | if (!srv_read_only_mode) { |
1923 | dict_stats_thread_init(); |
1924 | } |
1925 | |
1926 | trx_sys.create(); |
1927 | |
1928 | if (create_new_db) { |
1929 | ut_a(!srv_read_only_mode); |
1930 | |
1931 | mtr_start(&mtr); |
1932 | ut_ad(fil_system.sys_space->id == 0); |
1933 | compile_time_assert(TRX_SYS_SPACE == 0); |
1934 | compile_time_assert(IBUF_SPACE_ID == 0); |
1935 | fsp_header_init(fil_system.sys_space, sum_of_new_sizes, &mtr); |
1936 | |
1937 | ulint ibuf_root = btr_create( |
1938 | DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space, |
1939 | DICT_IBUF_ID_MIN, dict_ind_redundant, NULL, &mtr); |
1940 | |
1941 | mtr_commit(&mtr); |
1942 | |
1943 | if (ibuf_root == FIL_NULL) { |
1944 | return(srv_init_abort(DB_ERROR)); |
1945 | } |
1946 | |
1947 | ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO); |
1948 | |
1949 | /* To maintain backward compatibility we create only |
1950 | the first rollback segment before the double write buffer. |
1951 | All the remaining rollback segments will be created later, |
1952 | after the double write buffer has been created. */ |
1953 | trx_sys_create_sys_pages(); |
1954 | trx_lists_init_at_db_start(); |
1955 | |
1956 | err = dict_create(); |
1957 | |
1958 | if (err != DB_SUCCESS) { |
1959 | return(srv_init_abort(err)); |
1960 | } |
1961 | |
1962 | buf_flush_sync_all_buf_pools(); |
1963 | |
1964 | flushed_lsn = log_get_lsn(); |
1965 | |
1966 | err = fil_write_flushed_lsn(flushed_lsn); |
1967 | |
1968 | if (err == DB_SUCCESS) { |
1969 | err = create_log_files_rename( |
1970 | logfilename, dirnamelen, |
1971 | flushed_lsn, logfile0); |
1972 | } |
1973 | |
1974 | if (err != DB_SUCCESS) { |
1975 | return(srv_init_abort(err)); |
1976 | } |
1977 | } else { |
1978 | /* Invalidate the buffer pool to ensure that we reread |
1979 | the page that we read above, during recovery. |
1980 | Note that this is not as heavy weight as it seems. At |
1981 | this point there will be only ONE page in the buf_LRU |
1982 | and there must be no page in the buf_flush list. */ |
1983 | buf_pool_invalidate(); |
1984 | |
1985 | /* Scan and locate truncate log files. Parsed located files |
1986 | and add table to truncate information to central vector for |
1987 | truncate fix-up action post recovery. */ |
1988 | err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir); |
1989 | if (err != DB_SUCCESS) { |
1990 | |
1991 | return(srv_init_abort(DB_ERROR)); |
1992 | } |
1993 | |
1994 | /* We always try to do a recovery, even if the database had |
1995 | been shut down normally: this is the normal startup path */ |
1996 | |
1997 | err = recv_recovery_from_checkpoint_start(flushed_lsn); |
1998 | |
1999 | recv_sys->dblwr.pages.clear(); |
2000 | |
2001 | if (err != DB_SUCCESS) { |
2002 | return(srv_init_abort(err)); |
2003 | } |
2004 | |
2005 | switch (srv_operation) { |
2006 | case SRV_OPERATION_NORMAL: |
2007 | case SRV_OPERATION_RESTORE_EXPORT: |
2008 | /* Initialize the change buffer. */ |
2009 | err = dict_boot(); |
2010 | if (err != DB_SUCCESS) { |
2011 | return(srv_init_abort(err)); |
2012 | } |
2013 | /* fall through */ |
2014 | case SRV_OPERATION_RESTORE: |
2015 | /* This must precede |
2016 | recv_apply_hashed_log_recs(true). */ |
2017 | trx_lists_init_at_db_start(); |
2018 | break; |
2019 | case SRV_OPERATION_RESTORE_DELTA: |
2020 | case SRV_OPERATION_BACKUP: |
2021 | ut_ad(!"wrong mariabackup mode" ); |
2022 | } |
2023 | |
2024 | if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { |
2025 | /* Apply the hashed log records to the |
2026 | respective file pages, for the last batch of |
2027 | recv_group_scan_log_recs(). */ |
2028 | |
2029 | recv_apply_hashed_log_recs(true); |
2030 | |
2031 | if (recv_sys->found_corrupt_log) { |
2032 | return(srv_init_abort(DB_CORRUPTION)); |
2033 | } |
2034 | |
2035 | DBUG_PRINT("ib_log" , ("apply completed" )); |
2036 | |
2037 | if (recv_needed_recovery) { |
2038 | trx_sys_print_mysql_binlog_offset(); |
2039 | } |
2040 | } |
2041 | |
2042 | if (!srv_read_only_mode) { |
2043 | const ulint flags = FSP_FLAGS_PAGE_SSIZE(); |
2044 | for (ulint id = 0; id <= srv_undo_tablespaces; id++) { |
2045 | if (fil_space_t* space = fil_space_get(id)) { |
2046 | fsp_flags_try_adjust(space, flags); |
2047 | } |
2048 | } |
2049 | |
2050 | if (sum_of_new_sizes > 0) { |
2051 | /* New data file(s) were added */ |
2052 | mtr.start(); |
2053 | buf_block_t* block = buf_page_get( |
2054 | page_id_t(0, 0), univ_page_size, |
2055 | RW_SX_LATCH, &mtr); |
2056 | ulint size = mach_read_from_4( |
2057 | FSP_HEADER_OFFSET + FSP_SIZE |
2058 | + block->frame); |
2059 | ut_ad(size == fil_system.sys_space |
2060 | ->size_in_header); |
2061 | size += sum_of_new_sizes; |
2062 | mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SIZE |
2063 | + block->frame, size, |
2064 | MLOG_4BYTES, &mtr); |
2065 | fil_system.sys_space->size_in_header = size; |
2066 | mtr.commit(); |
2067 | /* Immediately write the log record about |
2068 | increased tablespace size to disk, so that it |
2069 | is durable even if mysqld would crash |
2070 | quickly */ |
2071 | log_buffer_flush_to_disk(); |
2072 | } |
2073 | } |
2074 | |
2075 | #ifdef UNIV_DEBUG |
2076 | { |
2077 | mtr.start(); |
2078 | buf_block_t* block = buf_page_get(page_id_t(0, 0), |
2079 | univ_page_size, |
2080 | RW_S_LATCH, &mtr); |
2081 | ut_ad(mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET |
2082 | + block->frame) |
2083 | == fil_system.sys_space->size_in_header); |
2084 | mtr.commit(); |
2085 | } |
2086 | #endif |
2087 | const ulint |
2088 | = fil_system.sys_space->size_in_header; |
2089 | const ulint sum_of_data_file_sizes |
2090 | = srv_sys_space.get_sum_of_sizes(); |
2091 | /* Compare the system tablespace file size to what is |
2092 | stored in FSP_SIZE. In srv_sys_space.open_or_create() |
2093 | we already checked that the file sizes match the |
2094 | innodb_data_file_path specification. */ |
2095 | if (srv_read_only_mode |
2096 | || sum_of_data_file_sizes == tablespace_size_in_header) { |
2097 | /* Do not complain about the size. */ |
2098 | } else if (!srv_sys_space.can_auto_extend_last_file() |
2099 | || sum_of_data_file_sizes |
2100 | < tablespace_size_in_header) { |
2101 | ib::error() << "Tablespace size stored in header is " |
2102 | << tablespace_size_in_header |
2103 | << " pages, but the sum of data file sizes is " |
2104 | << sum_of_data_file_sizes << " pages" ; |
2105 | |
2106 | if (srv_force_recovery == 0 |
2107 | && sum_of_data_file_sizes |
2108 | < tablespace_size_in_header) { |
2109 | ib::error() << |
2110 | "Cannot start InnoDB. The tail of" |
2111 | " the system tablespace is" |
2112 | " missing. Have you edited" |
2113 | " innodb_data_file_path in my.cnf" |
2114 | " in an inappropriate way, removing" |
2115 | " data files from there?" |
2116 | " You can set innodb_force_recovery=1" |
2117 | " in my.cnf to force" |
2118 | " a startup if you are trying to" |
2119 | " recover a badly corrupt database." ; |
2120 | |
2121 | return(srv_init_abort(DB_ERROR)); |
2122 | } |
2123 | } |
2124 | |
2125 | /* recv_recovery_from_checkpoint_finish needs trx lists which |
2126 | are initialized in trx_lists_init_at_db_start(). */ |
2127 | |
2128 | recv_recovery_from_checkpoint_finish(); |
2129 | |
2130 | if (srv_operation == SRV_OPERATION_RESTORE |
2131 | || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { |
2132 | /* After applying the redo log from |
2133 | SRV_OPERATION_BACKUP, flush the changes |
2134 | to the data files and truncate or delete the log. |
2135 | Unless --export is specified, no further change to |
2136 | InnoDB files is needed. */ |
2137 | ut_ad(!srv_force_recovery); |
2138 | ut_ad(srv_n_log_files_found <= 1); |
2139 | ut_ad(recv_no_log_write); |
2140 | buf_flush_sync_all_buf_pools(); |
2141 | err = fil_write_flushed_lsn(log_get_lsn()); |
2142 | ut_ad(!buf_pool_check_no_pending_io()); |
2143 | fil_close_log_files(true); |
2144 | if (err == DB_SUCCESS) { |
2145 | bool trunc = srv_operation |
2146 | == SRV_OPERATION_RESTORE; |
2147 | /* Delete subsequent log files. */ |
2148 | delete_log_files(logfilename, dirnamelen, |
2149 | (uint)srv_n_log_files_found, trunc); |
2150 | if (trunc) { |
2151 | /* Truncate the first log file. */ |
2152 | strcpy(logfilename + dirnamelen, |
2153 | "ib_logfile0" ); |
2154 | FILE* f = fopen(logfilename, "w" ); |
2155 | fclose(f); |
2156 | } |
2157 | } |
2158 | return(err); |
2159 | } |
2160 | |
2161 | /* Upgrade or resize or rebuild the redo logs before |
2162 | generating any dirty pages, so that the old redo log |
2163 | files will not be written to. */ |
2164 | |
2165 | if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { |
2166 | /* Completely ignore the redo log. */ |
2167 | } else if (srv_read_only_mode) { |
2168 | /* Leave the redo log alone. */ |
2169 | } else if (srv_log_file_size_requested == srv_log_file_size |
2170 | && srv_n_log_files_found == srv_n_log_files |
2171 | && log_sys.log.format |
2172 | == (srv_encrypt_log |
2173 | ? LOG_HEADER_FORMAT_CURRENT |
2174 | | LOG_HEADER_FORMAT_ENCRYPTED |
2175 | : LOG_HEADER_FORMAT_CURRENT)) { |
2176 | /* No need to upgrade or resize the redo log. */ |
2177 | } else { |
2178 | /* Prepare to delete the old redo log files */ |
2179 | flushed_lsn = srv_prepare_to_delete_redo_log_files(i); |
2180 | |
2181 | DBUG_EXECUTE_IF("innodb_log_abort_1" , |
2182 | return(srv_init_abort(DB_ERROR));); |
2183 | /* Prohibit redo log writes from any other |
2184 | threads until creating a log checkpoint at the |
2185 | end of create_log_files(). */ |
2186 | ut_d(recv_no_log_write = true); |
2187 | ut_ad(!buf_pool_check_no_pending_io()); |
2188 | |
2189 | DBUG_EXECUTE_IF("innodb_log_abort_3" , |
2190 | return(srv_init_abort(DB_ERROR));); |
2191 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_3" )); |
2192 | |
2193 | /* Stamp the LSN to the data files. */ |
2194 | err = fil_write_flushed_lsn(flushed_lsn); |
2195 | |
2196 | DBUG_EXECUTE_IF("innodb_log_abort_4" , err = DB_ERROR;); |
2197 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_4" )); |
2198 | |
2199 | if (err != DB_SUCCESS) { |
2200 | return(srv_init_abort(err)); |
2201 | } |
2202 | |
2203 | /* Close and free the redo log files, so that |
2204 | we can replace them. */ |
2205 | fil_close_log_files(true); |
2206 | |
2207 | DBUG_EXECUTE_IF("innodb_log_abort_5" , |
2208 | return(srv_init_abort(DB_ERROR));); |
2209 | DBUG_PRINT("ib_log" , ("After innodb_log_abort_5" )); |
2210 | |
2211 | ib::info() << "Starting to delete and rewrite log" |
2212 | " files." ; |
2213 | |
2214 | srv_log_file_size = srv_log_file_size_requested; |
2215 | |
2216 | err = create_log_files( |
2217 | logfilename, dirnamelen, flushed_lsn, |
2218 | logfile0); |
2219 | |
2220 | if (err == DB_SUCCESS) { |
2221 | err = create_log_files_rename( |
2222 | logfilename, dirnamelen, flushed_lsn, |
2223 | logfile0); |
2224 | } |
2225 | |
2226 | if (err != DB_SUCCESS) { |
2227 | return(srv_init_abort(err)); |
2228 | } |
2229 | } |
2230 | |
2231 | /* Validate a few system page types that were left |
2232 | uninitialized by older versions of MySQL. */ |
2233 | if (!high_level_read_only) { |
2234 | buf_block_t* block; |
2235 | mtr.start(); |
2236 | /* Bitmap page types will be reset in |
2237 | buf_dblwr_check_block() without redo logging. */ |
2238 | block = buf_page_get( |
2239 | page_id_t(IBUF_SPACE_ID, |
2240 | FSP_IBUF_HEADER_PAGE_NO), |
2241 | univ_page_size, RW_X_LATCH, &mtr); |
2242 | fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr); |
2243 | /* Already MySQL 3.23.53 initialized |
2244 | FSP_IBUF_TREE_ROOT_PAGE_NO to |
2245 | FIL_PAGE_INDEX. No need to reset that one. */ |
2246 | block = buf_page_get( |
2247 | page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), |
2248 | univ_page_size, RW_X_LATCH, &mtr); |
2249 | fil_block_check_type(block, FIL_PAGE_TYPE_TRX_SYS, |
2250 | &mtr); |
2251 | block = buf_page_get( |
2252 | page_id_t(TRX_SYS_SPACE, |
2253 | FSP_FIRST_RSEG_PAGE_NO), |
2254 | univ_page_size, RW_X_LATCH, &mtr); |
2255 | fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr); |
2256 | block = buf_page_get( |
2257 | page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO), |
2258 | univ_page_size, RW_X_LATCH, &mtr); |
2259 | fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr); |
2260 | mtr.commit(); |
2261 | } |
2262 | |
2263 | /* Roll back any recovered data dictionary transactions, so |
2264 | that the data dictionary tables will be free of any locks. |
2265 | The data dictionary latch should guarantee that there is at |
2266 | most one data dictionary transaction active at a time. */ |
2267 | if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { |
2268 | trx_rollback_recovered(false); |
2269 | } |
2270 | |
2271 | /* Fix-up truncate of tables in the system tablespace |
2272 | if server crashed while truncate was active. The non- |
2273 | system tables are done after tablespace discovery. Do |
2274 | this now because this procedure assumes that no pages |
2275 | have changed since redo recovery. Tablespace discovery |
2276 | can do updates to pages in the system tablespace.*/ |
2277 | err = truncate_t::fixup_tables_in_system_tablespace(); |
2278 | |
2279 | if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { |
2280 | /* Open or Create SYS_TABLESPACES and SYS_DATAFILES |
2281 | so that tablespace names and other metadata can be |
2282 | found. */ |
2283 | err = dict_create_or_check_sys_tablespace(); |
2284 | if (err != DB_SUCCESS) { |
2285 | return(srv_init_abort(err)); |
2286 | } |
2287 | |
2288 | /* The following call is necessary for the insert |
2289 | buffer to work with multiple tablespaces. We must |
2290 | know the mapping between space id's and .ibd file |
2291 | names. |
2292 | |
2293 | In a crash recovery, we check that the info in data |
2294 | dictionary is consistent with what we already know |
2295 | about space id's from the calls to fil_ibd_load(). |
2296 | |
2297 | In a normal startup, we create the space objects for |
2298 | every table in the InnoDB data dictionary that has |
2299 | an .ibd file. |
2300 | |
2301 | We also determine the maximum tablespace id used. |
2302 | |
2303 | The 'validate' flag indicates that when a tablespace |
2304 | is opened, we also read the header page and validate |
2305 | the contents to the data dictionary. This is time |
2306 | consuming, especially for databases with lots of ibd |
2307 | files. So only do it after a crash and not forcing |
2308 | recovery. Open rw transactions at this point is not |
2309 | a good reason to validate. */ |
2310 | bool validate = recv_needed_recovery |
2311 | && srv_force_recovery == 0; |
2312 | |
2313 | dict_check_tablespaces_and_store_max_id(validate); |
2314 | } |
2315 | |
2316 | /* Fix-up truncate of table if server crashed while truncate |
2317 | was active. */ |
2318 | err = truncate_t::fixup_tables_in_non_system_tablespace(); |
2319 | |
2320 | if (err != DB_SUCCESS) { |
2321 | return(srv_init_abort(err)); |
2322 | } |
2323 | |
2324 | recv_recovery_rollback_active(); |
2325 | srv_startup_is_before_trx_rollback_phase = FALSE; |
2326 | } |
2327 | |
2328 | ut_ad(err == DB_SUCCESS); |
2329 | ut_a(sum_of_new_sizes != ULINT_UNDEFINED); |
2330 | |
2331 | /* Create the doublewrite buffer to a new tablespace */ |
2332 | if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO |
2333 | && !buf_dblwr_create()) { |
2334 | return(srv_init_abort(DB_ERROR)); |
2335 | } |
2336 | |
2337 | /* Here the double write buffer has already been created and so |
2338 | any new rollback segments will be allocated after the double |
2339 | write buffer. The default segment should already exist. |
2340 | We create the new segments only if it's a new database or |
2341 | the database was shutdown cleanly. */ |
2342 | |
2343 | /* Note: When creating the extra rollback segments during an upgrade |
2344 | we violate the latching order, even if the change buffer is empty. |
2345 | We make an exception in sync0sync.cc and check srv_is_being_started |
2346 | for that violation. It cannot create a deadlock because we are still |
2347 | running in single threaded mode essentially. Only the IO threads |
2348 | should be running at this stage. */ |
2349 | |
2350 | ut_a(srv_undo_logs > 0); |
2351 | ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS); |
2352 | |
2353 | if (!trx_sys_create_rsegs()) { |
2354 | return(srv_init_abort(DB_ERROR)); |
2355 | } |
2356 | |
2357 | srv_startup_is_before_trx_rollback_phase = false; |
2358 | |
2359 | if (!srv_read_only_mode) { |
2360 | /* Create the thread which watches the timeouts |
2361 | for lock waits */ |
2362 | thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create( |
2363 | lock_wait_timeout_thread, |
2364 | NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); |
2365 | thread_started[2 + SRV_MAX_N_IO_THREADS] = true; |
2366 | lock_sys.timeout_thread_active = true; |
2367 | |
2368 | /* Create the thread which warns of long semaphore waits */ |
2369 | srv_error_monitor_active = true; |
2370 | thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create( |
2371 | srv_error_monitor_thread, |
2372 | NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); |
2373 | thread_started[3 + SRV_MAX_N_IO_THREADS] = true; |
2374 | |
2375 | /* Create the thread which prints InnoDB monitor info */ |
2376 | srv_monitor_active = true; |
2377 | thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create( |
2378 | srv_monitor_thread, |
2379 | NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS); |
2380 | thread_started[4 + SRV_MAX_N_IO_THREADS] = true; |
2381 | srv_start_state |= SRV_START_STATE_LOCK_SYS |
2382 | | SRV_START_STATE_MONITOR; |
2383 | |
2384 | ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN |
2385 | || !purge_sys.enabled()); |
2386 | |
2387 | if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { |
2388 | srv_undo_sources = true; |
2389 | /* Create the dict stats gathering thread */ |
2390 | srv_dict_stats_thread_active = true; |
2391 | dict_stats_thread_handle = os_thread_create( |
2392 | dict_stats_thread, NULL, NULL); |
2393 | |
2394 | /* Create the thread that will optimize the |
2395 | FULLTEXT search index subsystem. */ |
2396 | fts_optimize_init(); |
2397 | } |
2398 | } |
2399 | |
2400 | /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */ |
2401 | err = dict_create_or_check_foreign_constraint_tables(); |
2402 | if (err == DB_SUCCESS) { |
2403 | err = dict_create_or_check_sys_tablespace(); |
2404 | if (err == DB_SUCCESS) { |
2405 | err = dict_create_or_check_sys_virtual(); |
2406 | } |
2407 | } |
2408 | switch (err) { |
2409 | case DB_SUCCESS: |
2410 | break; |
2411 | case DB_READ_ONLY: |
2412 | if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) { |
2413 | break; |
2414 | } |
2415 | ib::error() << "Cannot create system tables in read-only mode" ; |
2416 | /* fall through */ |
2417 | default: |
2418 | return(srv_init_abort(err)); |
2419 | } |
2420 | |
2421 | if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) { |
2422 | /* Initialize the innodb_temporary tablespace and keep |
2423 | it open until shutdown. */ |
2424 | err = srv_open_tmp_tablespace(create_new_db); |
2425 | |
2426 | if (err != DB_SUCCESS) { |
2427 | return(srv_init_abort(err)); |
2428 | } |
2429 | |
2430 | trx_temp_rseg_create(); |
2431 | |
2432 | if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { |
2433 | thread_handles[1 + SRV_MAX_N_IO_THREADS] |
2434 | = os_thread_create(srv_master_thread, NULL, |
2435 | (1 + SRV_MAX_N_IO_THREADS) |
2436 | + thread_ids); |
2437 | thread_started[1 + SRV_MAX_N_IO_THREADS] = true; |
2438 | srv_start_state_set(SRV_START_STATE_MASTER); |
2439 | } |
2440 | } |
2441 | |
2442 | if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL |
2443 | && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { |
2444 | |
2445 | thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create( |
2446 | srv_purge_coordinator_thread, |
2447 | NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS); |
2448 | |
2449 | thread_started[5 + SRV_MAX_N_IO_THREADS] = true; |
2450 | |
2451 | ut_a(UT_ARR_SIZE(thread_ids) |
2452 | > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS); |
2453 | |
2454 | /* We've already created the purge coordinator thread above. */ |
2455 | for (i = 1; i < srv_n_purge_threads; ++i) { |
2456 | thread_handles[5 + i + SRV_MAX_N_IO_THREADS] = os_thread_create( |
2457 | srv_worker_thread, NULL, |
2458 | thread_ids + 5 + i + SRV_MAX_N_IO_THREADS); |
2459 | thread_started[5 + i + SRV_MAX_N_IO_THREADS] = true; |
2460 | } |
2461 | |
2462 | while (srv_shutdown_state == SRV_SHUTDOWN_NONE |
2463 | && srv_force_recovery < SRV_FORCE_NO_BACKGROUND |
2464 | && !purge_sys.enabled()) { |
2465 | ib::info() << "Waiting for purge to start" ; |
2466 | os_thread_sleep(50000); |
2467 | } |
2468 | |
2469 | srv_start_state_set(SRV_START_STATE_PURGE); |
2470 | } |
2471 | |
2472 | srv_is_being_started = false; |
2473 | |
2474 | if (!srv_read_only_mode) { |
2475 | /* wake main loop of page cleaner up */ |
2476 | os_event_set(buf_flush_event); |
2477 | } |
2478 | |
2479 | if (srv_print_verbose_log) { |
2480 | ib::info() << INNODB_VERSION_STR |
2481 | << " started; log sequence number " |
2482 | << srv_start_lsn |
2483 | << "; transaction id " << trx_sys.get_max_trx_id(); |
2484 | } |
2485 | |
2486 | if (srv_force_recovery > 0) { |
2487 | ib::info() << "!!! innodb_force_recovery is set to " |
2488 | << srv_force_recovery << " !!!" ; |
2489 | } |
2490 | |
2491 | if (srv_force_recovery == 0) { |
2492 | /* In the insert buffer we may have even bigger tablespace |
2493 | id's, because we may have dropped those tablespaces, but |
2494 | insert buffer merge has not had time to clean the records from |
2495 | the ibuf tree. */ |
2496 | |
2497 | ibuf_update_max_tablespace_id(); |
2498 | } |
2499 | |
2500 | if (!srv_read_only_mode) { |
2501 | if (create_new_db) { |
2502 | srv_buffer_pool_load_at_startup = FALSE; |
2503 | } |
2504 | |
2505 | #ifdef WITH_WSREP |
2506 | /* |
2507 | Create the dump/load thread only when not running with |
2508 | --wsrep-recover. |
2509 | */ |
2510 | if (!wsrep_recovery) { |
2511 | #endif /* WITH_WSREP */ |
2512 | |
2513 | /* Create the buffer pool dump/load thread */ |
2514 | srv_buf_dump_thread_active = true; |
2515 | buf_dump_thread_handle= |
2516 | os_thread_create(buf_dump_thread, NULL, NULL); |
2517 | |
2518 | #ifdef WITH_WSREP |
2519 | } else { |
2520 | ib::warn() << |
2521 | "Skipping buffer pool dump/restore during " |
2522 | "wsrep recovery." ; |
2523 | } |
2524 | #endif /* WITH_WSREP */ |
2525 | |
2526 | /* Create thread(s) that handles key rotation. This is |
2527 | needed already here as log_preflush_pool_modified_pages |
2528 | will flush dirty pages and that might need e.g. |
2529 | fil_crypt_threads_event. */ |
2530 | fil_system_enter(); |
2531 | btr_scrub_init(); |
2532 | fil_crypt_threads_init(); |
2533 | fil_system_exit(); |
2534 | |
2535 | /* Initialize online defragmentation. */ |
2536 | btr_defragment_init(); |
2537 | btr_defragment_thread_active = true; |
2538 | os_thread_create(btr_defragment_thread, NULL, NULL); |
2539 | |
2540 | srv_start_state |= SRV_START_STATE_REDO; |
2541 | } |
2542 | |
2543 | /* Create the buffer pool resize thread */ |
2544 | srv_buf_resize_thread_active = true; |
2545 | os_thread_create(buf_resize_thread, NULL, NULL); |
2546 | |
2547 | return(DB_SUCCESS); |
2548 | } |
2549 | |
2550 | #if 0 |
2551 | /******************************************************************** |
2552 | Sync all FTS cache before shutdown */ |
2553 | static |
2554 | void |
2555 | srv_fts_close(void) |
2556 | /*===============*/ |
2557 | { |
2558 | dict_table_t* table; |
2559 | |
2560 | for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); |
2561 | table; table = UT_LIST_GET_NEXT(table_LRU, table)) { |
2562 | fts_t* fts = table->fts; |
2563 | |
2564 | if (fts != NULL) { |
2565 | fts_sync_table(table); |
2566 | } |
2567 | } |
2568 | |
2569 | for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); |
2570 | table; table = UT_LIST_GET_NEXT(table_LRU, table)) { |
2571 | fts_t* fts = table->fts; |
2572 | |
2573 | if (fts != NULL) { |
2574 | fts_sync_table(table); |
2575 | } |
2576 | } |
2577 | } |
2578 | #endif |
2579 | |
2580 | /** Shut down background threads that can generate undo log. */ |
2581 | void srv_shutdown_bg_undo_sources() |
2582 | { |
2583 | if (srv_undo_sources) { |
2584 | ut_ad(!srv_read_only_mode); |
2585 | fts_optimize_shutdown(); |
2586 | dict_stats_shutdown(); |
2587 | while (row_get_background_drop_list_len_low()) { |
2588 | srv_wake_master_thread(); |
2589 | os_thread_yield(); |
2590 | } |
2591 | srv_undo_sources = false; |
2592 | } |
2593 | } |
2594 | |
2595 | /** Shut down InnoDB. */ |
2596 | void innodb_shutdown() |
2597 | { |
2598 | ut_ad(!my_atomic_loadptr_explicit(reinterpret_cast<void**> |
2599 | (&srv_running), |
2600 | MY_MEMORY_ORDER_RELAXED)); |
2601 | ut_ad(!srv_undo_sources); |
2602 | |
2603 | switch (srv_operation) { |
2604 | case SRV_OPERATION_BACKUP: |
2605 | case SRV_OPERATION_RESTORE: |
2606 | case SRV_OPERATION_RESTORE_DELTA: |
2607 | case SRV_OPERATION_RESTORE_EXPORT: |
2608 | fil_close_all_files(); |
2609 | break; |
2610 | case SRV_OPERATION_NORMAL: |
2611 | /* Shut down the persistent files. */ |
2612 | logs_empty_and_mark_files_at_shutdown(); |
2613 | |
2614 | if (ulint n_threads = srv_conc_get_active_threads()) { |
2615 | ib::warn() << "Query counter shows " |
2616 | << n_threads << " queries still" |
2617 | " inside InnoDB at shutdown" ; |
2618 | } |
2619 | } |
2620 | |
2621 | /* Exit any remaining threads. */ |
2622 | srv_shutdown_all_bg_threads(); |
2623 | |
2624 | if (srv_monitor_file) { |
2625 | fclose(srv_monitor_file); |
2626 | srv_monitor_file = 0; |
2627 | if (srv_monitor_file_name) { |
2628 | unlink(srv_monitor_file_name); |
2629 | ut_free(srv_monitor_file_name); |
2630 | } |
2631 | } |
2632 | |
2633 | if (srv_misc_tmpfile) { |
2634 | fclose(srv_misc_tmpfile); |
2635 | srv_misc_tmpfile = 0; |
2636 | } |
2637 | |
2638 | ut_ad(dict_stats_event || !srv_was_started || srv_read_only_mode); |
2639 | ut_ad(dict_sys || !srv_was_started); |
2640 | ut_ad(trx_sys.is_initialised() || !srv_was_started); |
2641 | ut_ad(buf_dblwr || !srv_was_started || srv_read_only_mode |
2642 | || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); |
2643 | ut_ad(lock_sys.is_initialised() || !srv_was_started); |
2644 | ut_ad(log_sys.is_initialised() || !srv_was_started); |
2645 | #ifdef BTR_CUR_HASH_ADAPT |
2646 | ut_ad(btr_search_sys || !srv_was_started); |
2647 | #endif /* BTR_CUR_HASH_ADAPT */ |
2648 | ut_ad(ibuf || !srv_was_started); |
2649 | |
2650 | if (dict_stats_event) { |
2651 | dict_stats_thread_deinit(); |
2652 | } |
2653 | |
2654 | if (srv_start_state_is_set(SRV_START_STATE_REDO)) { |
2655 | ut_ad(!srv_read_only_mode); |
2656 | /* srv_shutdown_bg_undo_sources() already invoked |
2657 | fts_optimize_shutdown(); dict_stats_shutdown(); */ |
2658 | |
2659 | fil_crypt_threads_cleanup(); |
2660 | btr_scrub_cleanup(); |
2661 | btr_defragment_shutdown(); |
2662 | } |
2663 | |
2664 | /* This must be disabled before closing the buffer pool |
2665 | and closing the data dictionary. */ |
2666 | |
2667 | #ifdef BTR_CUR_HASH_ADAPT |
2668 | if (dict_sys) { |
2669 | btr_search_disable(true); |
2670 | } |
2671 | #endif /* BTR_CUR_HASH_ADAPT */ |
2672 | if (ibuf) { |
2673 | ibuf_close(); |
2674 | } |
2675 | log_sys.close(); |
2676 | purge_sys.close(); |
2677 | trx_sys.close(); |
2678 | if (buf_dblwr) { |
2679 | buf_dblwr_free(); |
2680 | } |
2681 | lock_sys.close(); |
2682 | trx_pool_close(); |
2683 | |
2684 | if (!srv_read_only_mode) { |
2685 | mutex_free(&srv_monitor_file_mutex); |
2686 | mutex_free(&srv_misc_tmpfile_mutex); |
2687 | } |
2688 | |
2689 | dict_close(); |
2690 | btr_search_sys_free(); |
2691 | |
2692 | /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside |
2693 | them */ |
2694 | os_aio_free(); |
2695 | row_mysql_close(); |
2696 | srv_free(); |
2697 | fil_system.close(); |
2698 | |
2699 | /* 4. Free all allocated memory */ |
2700 | |
2701 | pars_lexer_close(); |
2702 | recv_sys_close(); |
2703 | |
2704 | ut_ad(buf_pool_ptr || !srv_was_started); |
2705 | if (buf_pool_ptr) { |
2706 | buf_pool_free(srv_buf_pool_instances); |
2707 | } |
2708 | |
2709 | sync_check_close(); |
2710 | |
2711 | if (srv_was_started && srv_print_verbose_log) { |
2712 | ib::info() << "Shutdown completed; log sequence number " |
2713 | << srv_shutdown_lsn |
2714 | << "; transaction id " << trx_sys.get_max_trx_id(); |
2715 | } |
2716 | |
2717 | srv_start_state = SRV_START_STATE_NONE; |
2718 | srv_was_started = false; |
2719 | srv_start_has_been_called = false; |
2720 | } |
2721 | |
2722 | /** Get the meta-data filename from the table name for a |
2723 | single-table tablespace. |
2724 | @param[in] table table object |
2725 | @param[out] filename filename |
2726 | @param[in] max_len filename max length */ |
2727 | void |
2728 | srv_get_meta_data_filename( |
2729 | dict_table_t* table, |
2730 | char* filename, |
2731 | ulint max_len) |
2732 | { |
2733 | ulint len; |
2734 | char* path; |
2735 | |
2736 | /* Make sure the data_dir_path is set. */ |
2737 | dict_get_and_save_data_dir_path(table, false); |
2738 | |
2739 | if (DICT_TF_HAS_DATA_DIR(table->flags)) { |
2740 | ut_a(table->data_dir_path); |
2741 | |
2742 | path = fil_make_filepath( |
2743 | table->data_dir_path, table->name.m_name, CFG, true); |
2744 | } else { |
2745 | path = fil_make_filepath(NULL, table->name.m_name, CFG, false); |
2746 | } |
2747 | |
2748 | ut_a(path); |
2749 | len = ut_strlen(path); |
2750 | ut_a(max_len >= len); |
2751 | |
2752 | strcpy(filename, path); |
2753 | |
2754 | ut_free(path); |
2755 | } |
2756 | |