1 | /* Copyright (C) 2006, 2007 MySQL AB |
2 | Copyright (C) 2010, 2013, Monty Program Ab. |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
16 | |
17 | /* |
18 | WL#3072 Maria recovery |
19 | First version written by Guilhem Bichot on 2006-04-27. |
20 | */ |
21 | |
22 | /* Here is the implementation of this module */ |
23 | |
24 | #include "maria_def.h" |
25 | #include "ma_recovery.h" |
26 | #include "ma_blockrec.h" |
27 | #include "ma_checkpoint.h" |
28 | #include "trnman.h" |
29 | #include "ma_key_recover.h" |
30 | #include "ma_recovery_util.h" |
31 | #include "hash.h" |
32 | #include <my_check_opt.h> |
33 | |
34 | struct st_trn_for_recovery /* used only in the REDO phase */ |
35 | { |
36 | LSN group_start_lsn, undo_lsn, first_undo_lsn; |
37 | TrID long_trid; |
38 | }; |
39 | struct st_table_for_recovery /* used in the REDO and UNDO phase */ |
40 | { |
41 | MARIA_HA *info; |
42 | }; |
43 | /* Variables used by all functions of this module. Ok as single-threaded */ |
44 | static struct st_trn_for_recovery *all_active_trans; |
45 | static struct st_table_for_recovery *all_tables; |
46 | static struct st_dirty_page *dirty_pages_pool; |
47 | static LSN current_group_end_lsn; |
48 | #ifndef DBUG_OFF |
49 | /** Current group of REDOs is about this table and only this one */ |
50 | static MARIA_HA *current_group_table; |
51 | #endif |
52 | static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */ |
53 | static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */ |
54 | /** @brief to avoid writing a checkpoint if recovery did nothing. */ |
55 | static my_bool checkpoint_useful; |
56 | static my_bool in_redo_phase; |
57 | static my_bool trns_created; |
58 | static ulong skipped_undo_phase; |
59 | static ulonglong now; /**< for tracking execution time of phases */ |
60 | static void (*save_error_handler_hook)(uint, const char *,myf); |
61 | static uint recovery_warnings; /**< count of warnings */ |
62 | static uint recovery_found_crashed_tables; |
63 | HASH tables_to_redo; /* For maria_read_log */ |
64 | ulong maria_recovery_force_crash_counter; |
65 | |
66 | #define prototype_redo_exec_hook(R) \ |
67 | static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) |
68 | |
69 | #define prototype_redo_exec_hook_dummy(R) \ |
70 | static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec \ |
71 | __attribute__ ((unused))) |
72 | |
73 | #define prototype_undo_exec_hook(R) \ |
74 | static int exec_UNDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec, TRN *trn) |
75 | |
76 | prototype_redo_exec_hook(LONG_TRANSACTION_ID); |
77 | prototype_redo_exec_hook_dummy(CHECKPOINT); |
78 | prototype_redo_exec_hook(REDO_CREATE_TABLE); |
79 | prototype_redo_exec_hook(REDO_RENAME_TABLE); |
80 | prototype_redo_exec_hook(REDO_REPAIR_TABLE); |
81 | prototype_redo_exec_hook(REDO_DROP_TABLE); |
82 | prototype_redo_exec_hook(FILE_ID); |
83 | prototype_redo_exec_hook(INCOMPLETE_LOG); |
84 | prototype_redo_exec_hook_dummy(INCOMPLETE_GROUP); |
85 | prototype_redo_exec_hook(UNDO_BULK_INSERT); |
86 | prototype_redo_exec_hook(IMPORTED_TABLE); |
87 | prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD); |
88 | prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL); |
89 | prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD); |
90 | prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD); |
91 | prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL); |
92 | prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL); |
93 | prototype_redo_exec_hook(REDO_FREE_BLOCKS); |
94 | prototype_redo_exec_hook(REDO_DELETE_ALL); |
95 | prototype_redo_exec_hook(REDO_INDEX); |
96 | prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE); |
97 | prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE); |
98 | prototype_redo_exec_hook(REDO_BITMAP_NEW_PAGE); |
99 | prototype_redo_exec_hook(UNDO_ROW_INSERT); |
100 | prototype_redo_exec_hook(UNDO_ROW_DELETE); |
101 | prototype_redo_exec_hook(UNDO_ROW_UPDATE); |
102 | prototype_redo_exec_hook(UNDO_KEY_INSERT); |
103 | prototype_redo_exec_hook(UNDO_KEY_DELETE); |
104 | prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT); |
105 | prototype_redo_exec_hook(COMMIT); |
106 | prototype_redo_exec_hook(CLR_END); |
107 | prototype_redo_exec_hook(DEBUG_INFO); |
108 | prototype_undo_exec_hook(UNDO_ROW_INSERT); |
109 | prototype_undo_exec_hook(UNDO_ROW_DELETE); |
110 | prototype_undo_exec_hook(UNDO_ROW_UPDATE); |
111 | prototype_undo_exec_hook(UNDO_KEY_INSERT); |
112 | prototype_undo_exec_hook(UNDO_KEY_DELETE); |
113 | prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT); |
114 | prototype_undo_exec_hook(UNDO_BULK_INSERT); |
115 | |
116 | static int run_redo_phase(LSN lsn, LSN end_lsn, |
117 | enum maria_apply_log_way apply); |
118 | static uint end_of_redo_phase(my_bool prepare_for_undo_phase); |
119 | static int run_undo_phase(uint uncommitted); |
120 | static void display_record_position(const LOG_DESC *log_desc, |
121 | const TRANSLOG_HEADER_BUFFER *rec, |
122 | uint number); |
123 | static int display_and_apply_record(const LOG_DESC *log_desc, |
124 | const TRANSLOG_HEADER_BUFFER *rec); |
125 | static MARIA_HA *get_MARIA_HA_from_REDO_record(const |
126 | TRANSLOG_HEADER_BUFFER *rec); |
127 | static MARIA_HA *get_MARIA_HA_from_UNDO_record(const |
128 | TRANSLOG_HEADER_BUFFER *rec); |
129 | static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon); |
130 | static LSN parse_checkpoint_record(LSN lsn); |
131 | static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, |
132 | LSN first_undo_lsn); |
133 | static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id); |
134 | static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, |
135 | struct st_dirty_page *dirty_page); |
136 | static int close_all_tables(void); |
137 | static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr); |
138 | static void print_redo_phase_progress(TRANSLOG_ADDRESS addr); |
139 | static void delete_all_transactions(); |
140 | |
141 | /** @brief global [out] buffer for translog_read_record(); never shrinks */ |
142 | static struct |
143 | { |
144 | /* |
145 | uchar* is more adapted (less casts) than char*, thus we don't use |
146 | LEX_STRING. |
147 | */ |
148 | uchar *str; |
149 | size_t length; |
150 | } log_record_buffer; |
151 | static void enlarge_buffer(const TRANSLOG_HEADER_BUFFER *rec) |
152 | { |
153 | if (log_record_buffer.length < rec->record_length) |
154 | { |
155 | log_record_buffer.length= rec->record_length; |
156 | log_record_buffer.str= my_realloc(log_record_buffer.str, |
157 | rec->record_length, |
158 | MYF(MY_WME | MY_ALLOW_ZERO_PTR)); |
159 | } |
160 | } |
161 | /** @brief Tells what kind of progress message was printed to the error log */ |
162 | static enum recovery_message_type |
163 | { |
164 | REC_MSG_NONE= 0, REC_MSG_REDO, REC_MSG_UNDO, REC_MSG_FLUSH |
165 | } recovery_message_printed; |
166 | |
167 | |
168 | /* Hook to ensure we get nicer output if we get an error */ |
169 | |
170 | void maria_recover_error_handler_hook(uint error, const char *str, |
171 | myf flags) |
172 | { |
173 | if (procent_printed) |
174 | { |
175 | procent_printed= 0; |
176 | fputc('\n', stderr); |
177 | fflush(stderr); |
178 | } |
179 | (*save_error_handler_hook)(error, str, flags); |
180 | } |
181 | |
182 | /* Define this if you want gdb to break in some interesting situations */ |
183 | #define ALERT_USER() |
184 | |
185 | static void print_preamble() |
186 | { |
187 | ma_message_no_user(ME_JUST_INFO, "starting recovery" ); |
188 | } |
189 | |
190 | |
191 | static my_bool table_is_part_of_recovery_set(LEX_STRING *file_name) |
192 | { |
193 | uint offset =0; |
194 | if (!tables_to_redo.records) |
195 | return 1; /* Default, recover table */ |
196 | |
197 | /* Skip base directory */ |
198 | if (file_name->str[0] == '.' && |
199 | (file_name->str[1] == '/' || file_name->str[1] == '\\')) |
200 | offset= 2; |
201 | /* Only recover if table is in hash */ |
202 | return my_hash_search(&tables_to_redo, (uchar*) file_name->str + offset, |
203 | file_name->length - offset) != 0; |
204 | } |
205 | |
206 | /** |
207 | @brief Recovers from the last checkpoint. |
208 | |
209 | Runs the REDO phase using special structures, then sets up the playground |
210 | of runtime: recreates transactions inside trnman, open tables with their |
211 | two-byte-id mapping; takes a checkpoint and runs the UNDO phase. Closes all |
212 | tables. |
213 | |
214 | @return Operation status |
215 | @retval 0 OK |
216 | @retval !=0 Error |
217 | */ |
218 | |
219 | int maria_recovery_from_log(void) |
220 | { |
221 | int res= 1; |
222 | FILE *trace_file; |
223 | uint warnings_count; |
224 | #ifdef EXTRA_DEBUG |
225 | char name_buff[FN_REFLEN]; |
226 | #endif |
227 | DBUG_ENTER("maria_recovery_from_log" ); |
228 | |
229 | DBUG_ASSERT(!maria_in_recovery); |
230 | maria_in_recovery= TRUE; |
231 | |
232 | #ifdef EXTRA_DEBUG |
233 | fn_format(name_buff, "aria_recovery.trace" , maria_data_root, "" , MYF(0)); |
234 | trace_file= my_fopen(name_buff, O_WRONLY|O_APPEND|O_CREAT, MYF(MY_WME)); |
235 | #else |
236 | trace_file= NULL; /* no trace file for being fast */ |
237 | #endif |
238 | tprint(trace_file, "TRACE of the last Aria recovery from mysqld\n" ); |
239 | DBUG_ASSERT(maria_pagecache->inited); |
240 | res= maria_apply_log(LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, MARIA_LOG_APPLY, |
241 | trace_file, TRUE, TRUE, TRUE, &warnings_count); |
242 | if (!res) |
243 | { |
244 | if (warnings_count == 0 && recovery_found_crashed_tables == 0) |
245 | tprint(trace_file, "SUCCESS\n" ); |
246 | else |
247 | tprint(trace_file, "DOUBTFUL (%u warnings, check previous output)\n" , |
248 | warnings_count); |
249 | } |
250 | if (trace_file) |
251 | my_fclose(trace_file, MYF(0)); |
252 | maria_in_recovery= FALSE; |
253 | DBUG_RETURN(res); |
254 | } |
255 | |
256 | |
257 | /** |
258 | @brief Displays and/or applies the log |
259 | |
260 | @param from_lsn LSN from which log reading/applying should start; |
261 | LSN_IMPOSSIBLE means "use last checkpoint" |
262 | @param end_lsn Apply until this. LSN_IMPOSSIBLE means until end. |
263 | @param apply how log records should be applied or not |
264 | @param trace_file trace file where progress/debug messages will go |
265 | @param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR) |
266 | be skipped by the REDO phase or not |
267 | @param take_checkpoints Should we take checkpoints or not. |
268 | @param[out] warnings_count Count of warnings will be put there |
269 | |
270 | @todo This trace_file thing is primitive; soon we will make it similar to |
271 | ma_check_print_warning() etc, and a successful recovery does not need to |
272 | create a trace file. But for debugging now it is useful. |
273 | |
274 | @return Operation status |
275 | @retval 0 OK |
276 | @retval !=0 Error |
277 | */ |
278 | |
279 | int maria_apply_log(LSN from_lsn, LSN end_lsn, |
280 | enum maria_apply_log_way apply, |
281 | FILE *trace_file, |
282 | my_bool should_run_undo_phase, my_bool skip_DDLs_arg, |
283 | my_bool take_checkpoints, uint *warnings_count) |
284 | { |
285 | int error= 0; |
286 | uint uncommitted_trans; |
287 | ulonglong old_now; |
288 | my_bool abort_message_printed= 0; |
289 | DBUG_ENTER("maria_apply_log" ); |
290 | |
291 | DBUG_ASSERT(apply == MARIA_LOG_APPLY || !should_run_undo_phase); |
292 | DBUG_ASSERT(!maria_multi_threaded); |
293 | recovery_warnings= recovery_found_crashed_tables= 0; |
294 | maria_recovery_changed_data= 0; |
295 | /* checkpoints can happen only if TRNs have been built */ |
296 | DBUG_ASSERT(should_run_undo_phase || !take_checkpoints); |
297 | DBUG_ASSERT(end_lsn == LSN_IMPOSSIBLE || should_run_undo_phase == 0); |
298 | all_active_trans= (struct st_trn_for_recovery *) |
299 | my_malloc((SHORT_TRID_MAX + 1) * sizeof(struct st_trn_for_recovery), |
300 | MYF(MY_ZEROFILL)); |
301 | all_tables= (struct st_table_for_recovery *) |
302 | my_malloc((SHARE_ID_MAX + 1) * sizeof(struct st_table_for_recovery), |
303 | MYF(MY_ZEROFILL)); |
304 | |
305 | save_error_handler_hook= error_handler_hook; |
306 | error_handler_hook= maria_recover_error_handler_hook; |
307 | |
308 | if (!all_active_trans || !all_tables) |
309 | goto err; |
310 | |
311 | if (take_checkpoints && ma_checkpoint_init(0)) |
312 | goto err; |
313 | |
314 | recovery_message_printed= REC_MSG_NONE; |
315 | checkpoint_useful= trns_created= FALSE; |
316 | tracef= trace_file; |
317 | #ifdef INSTANT_FLUSH_OF_MESSAGES |
318 | /* enable this for instant flush of messages to trace file */ |
319 | setbuf(tracef, NULL); |
320 | #endif |
321 | skip_DDLs= skip_DDLs_arg; |
322 | skipped_undo_phase= 0; |
323 | |
324 | trnman_init(max_trid_in_control_file); |
325 | |
326 | if (from_lsn == LSN_IMPOSSIBLE) |
327 | { |
328 | if (last_checkpoint_lsn == LSN_IMPOSSIBLE) |
329 | { |
330 | from_lsn= translog_first_lsn_in_log(); |
331 | if (unlikely(from_lsn == LSN_ERROR)) |
332 | { |
333 | trnman_destroy(); |
334 | goto err; |
335 | } |
336 | } |
337 | else |
338 | { |
339 | from_lsn= parse_checkpoint_record(last_checkpoint_lsn); |
340 | if (from_lsn == LSN_ERROR) |
341 | { |
342 | trnman_destroy(); |
343 | goto err; |
344 | } |
345 | } |
346 | } |
347 | |
348 | now= microsecond_interval_timer(); |
349 | in_redo_phase= TRUE; |
350 | if (run_redo_phase(from_lsn, end_lsn, apply)) |
351 | { |
352 | ma_message_no_user(0, "Redo phase failed" ); |
353 | trnman_destroy(); |
354 | goto err; |
355 | } |
356 | trnman_destroy(); |
357 | |
358 | if (end_lsn != LSN_IMPOSSIBLE) |
359 | { |
360 | abort_message_printed= 1; |
361 | if (!trace_file) |
362 | fputc('\n', stderr); |
363 | my_message(HA_ERR_INITIALIZATION, |
364 | "Maria recovery aborted as end_lsn/end of file was reached" , |
365 | MYF(0)); |
366 | goto err2; |
367 | } |
368 | |
369 | if ((uncommitted_trans= |
370 | end_of_redo_phase(should_run_undo_phase)) == (uint)-1) |
371 | { |
372 | ma_message_no_user(0, "End of redo phase failed" ); |
373 | goto err; |
374 | } |
375 | in_redo_phase= FALSE; |
376 | |
377 | old_now= now; |
378 | now= microsecond_interval_timer(); |
379 | if (recovery_message_printed == REC_MSG_REDO) |
380 | { |
381 | double phase_took= (now - old_now)/1000000.0; |
382 | /* |
383 | Detailed progress info goes to stderr, because ma_message_no_user() |
384 | cannot put several messages on one line. |
385 | */ |
386 | procent_printed= 1; |
387 | fprintf(stderr, " (%.1f seconds); " , phase_took); |
388 | fflush(stderr); |
389 | } |
390 | |
391 | /** |
392 | REDO phase does not fill blocks' rec_lsn, so a checkpoint now would be |
393 | wrong: if a future recovery used it, the REDO phase would always |
394 | start from the checkpoint and never from before, wrongly skipping REDOs |
395 | (tested). Another problem is that the REDO phase uses |
396 | PAGECACHE_PLAIN_PAGE, while Checkpoint only collects PAGECACHE_LSN_PAGE. |
397 | |
398 | @todo fix this. pagecache_write() now can have a rec_lsn argument. And we |
399 | could make a function which goes through pages at end of REDO phase and |
400 | changes their type. |
401 | */ |
402 | #ifdef FIX_AND_ENABLE_LATER |
403 | if (take_checkpoints && checkpoint_useful) |
404 | { |
405 | /* |
406 | We take a checkpoint as it can save future recovery work if we crash |
407 | during the UNDO phase. But we don't flush pages, as UNDOs will change |
408 | them again probably. |
409 | If we wanted to take checkpoints in the middle of the REDO phase, at a |
410 | moment when we haven't reached the end of log so don't have exact data |
411 | about transactions, we could write a special checkpoint: containing only |
412 | the list of dirty pages, otherwise to be treated as if it was at the |
413 | same LSN as the last checkpoint. |
414 | */ |
415 | if (ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE)) |
416 | goto err; |
417 | } |
418 | #endif |
419 | |
420 | if (should_run_undo_phase) |
421 | { |
422 | if (run_undo_phase(uncommitted_trans)) |
423 | { |
424 | ma_message_no_user(0, "Undo phase failed" ); |
425 | goto err; |
426 | } |
427 | } |
428 | else if (uncommitted_trans > 0) |
429 | { |
430 | eprint(tracef, "***WARNING: %u uncommitted transactions; some tables may" |
431 | " be left inconsistent!***" , uncommitted_trans); |
432 | recovery_warnings++; |
433 | } |
434 | |
435 | if (skipped_undo_phase) |
436 | { |
437 | /* |
438 | We could want to print a list of tables for which UNDOs were skipped, |
439 | but not one line per skipped UNDO. |
440 | */ |
441 | eprint(tracef, "***WARNING: %lu UNDO records skipped in UNDO phase; some" |
442 | " tables may be left inconsistent!***" , skipped_undo_phase); |
443 | recovery_warnings++; |
444 | } |
445 | |
446 | old_now= now; |
447 | now= microsecond_interval_timer(); |
448 | if (recovery_message_printed == REC_MSG_UNDO) |
449 | { |
450 | double phase_took= (now - old_now)/1000000.0; |
451 | procent_printed= 1; |
452 | fprintf(stderr, " (%.1f seconds); " , phase_took); |
453 | fflush(stderr); |
454 | } |
455 | |
456 | /* |
457 | we don't use maria_panic() because it would maria_end(), and Recovery does |
458 | not want that (we want to keep some modules initialized for runtime). |
459 | */ |
460 | if (close_all_tables()) |
461 | { |
462 | ma_message_no_user(0, "closing of tables failed" ); |
463 | goto err; |
464 | } |
465 | |
466 | old_now= now; |
467 | now= microsecond_interval_timer(); |
468 | if (recovery_message_printed == REC_MSG_FLUSH) |
469 | { |
470 | double phase_took= (now - old_now)/1000000.0; |
471 | procent_printed= 1; |
472 | fprintf(stderr, " (%.1f seconds); " , phase_took); |
473 | fflush(stderr); |
474 | } |
475 | |
476 | if (take_checkpoints && checkpoint_useful) |
477 | { |
478 | /* No dirty pages, all tables are closed, no active transactions, save: */ |
479 | if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE)) |
480 | goto err; |
481 | } |
482 | |
483 | goto end; |
484 | err: |
485 | tprint(tracef, "\nRecovery of tables with transaction logs FAILED\n" ); |
486 | err2: |
487 | if (trns_created) |
488 | delete_all_transactions(); |
489 | error= 1; |
490 | if (close_all_tables()) |
491 | { |
492 | ma_message_no_user(0, "closing of tables failed" ); |
493 | } |
494 | end: |
495 | error_handler_hook= save_error_handler_hook; |
496 | my_hash_free(&all_dirty_pages); |
497 | bzero(&all_dirty_pages, sizeof(all_dirty_pages)); |
498 | my_free(dirty_pages_pool); |
499 | dirty_pages_pool= NULL; |
500 | my_free(all_tables); |
501 | all_tables= NULL; |
502 | my_free(all_active_trans); |
503 | all_active_trans= NULL; |
504 | my_free(log_record_buffer.str); |
505 | log_record_buffer.str= NULL; |
506 | log_record_buffer.length= 0; |
507 | ma_checkpoint_end(); |
508 | *warnings_count= recovery_warnings + recovery_found_crashed_tables; |
509 | if (recovery_message_printed != REC_MSG_NONE) |
510 | { |
511 | if (procent_printed) |
512 | { |
513 | procent_printed= 0; |
514 | fprintf(stderr, "\n" ); |
515 | fflush(stderr); |
516 | } |
517 | if (!error) |
518 | { |
519 | ma_message_no_user(ME_JUST_INFO, "recovery done" ); |
520 | maria_recovery_changed_data= 1; |
521 | } |
522 | } |
523 | else if (!error && max_trid_in_control_file != max_long_trid) |
524 | { |
525 | /* |
526 | maria_end() will set max trid in log file so that one can run |
527 | maria_chk on the tables |
528 | */ |
529 | maria_recovery_changed_data= 1; |
530 | } |
531 | |
532 | if (error && !abort_message_printed) |
533 | { |
534 | if (!trace_file) |
535 | fputc('\n', stderr); |
536 | my_message(HA_ERR_INITIALIZATION, |
537 | "Aria recovery failed. Please run aria_chk -r on all Aria " |
538 | "tables and delete all aria_log.######## files" , MYF(0)); |
539 | } |
540 | procent_printed= 0; |
541 | /* |
542 | We don't cleanly close tables if we hit some error (may corrupt them by |
543 | flushing some wrong blocks made from wrong REDOs). It also leaves their |
544 | open_count>0, which ensures that --aria-recover, if used, will try to |
545 | repair them. |
546 | */ |
547 | DBUG_RETURN(error); |
548 | } |
549 | |
550 | |
551 | /* very basic info about the record's header */ |
552 | static void display_record_position(const LOG_DESC *log_desc, |
553 | const TRANSLOG_HEADER_BUFFER *rec, |
554 | uint number) |
555 | { |
556 | /* |
557 | if number==0, we're going over records which we had already seen and which |
558 | form a group, so we indent below the group's end record |
559 | */ |
560 | tprint(tracef, |
561 | "%sRec#%u LSN " LSN_FMT " short_trid %u %s(num_type:%u) len %lu\n" , |
562 | number ? "" : " " , number, LSN_IN_PARTS(rec->lsn), |
563 | rec->short_trid, log_desc->name, rec->type, |
564 | (ulong)rec->record_length); |
565 | if (rec->type == LOGREC_DEBUG_INFO) |
566 | { |
567 | /* Print some extra information */ |
568 | (*log_desc->record_execute_in_redo_phase)(rec); |
569 | } |
570 | } |
571 | |
572 | |
573 | static int display_and_apply_record(const LOG_DESC *log_desc, |
574 | const TRANSLOG_HEADER_BUFFER *rec) |
575 | { |
576 | int error; |
577 | if (log_desc->record_execute_in_redo_phase == NULL) |
578 | { |
579 | /* die on all not-yet-handled records :) */ |
580 | DBUG_ASSERT("one more hook to write" == 0); |
581 | return 1; |
582 | } |
583 | if (rec->type == LOGREC_DEBUG_INFO) |
584 | { |
585 | /* Query already printed by display_record_position() */ |
586 | return 0; |
587 | } |
588 | if ((error= (*log_desc->record_execute_in_redo_phase)(rec))) |
589 | eprint(tracef, "Got error %d when executing record %s" , |
590 | my_errno, log_desc->name); |
591 | return error; |
592 | } |
593 | |
594 | |
595 | prototype_redo_exec_hook(LONG_TRANSACTION_ID) |
596 | { |
597 | uint16 sid= rec->short_trid; |
598 | TrID long_trid= all_active_trans[sid].long_trid; |
599 | /* |
600 | Any incomplete group should be of an old crash which already had a |
601 | recovery and thus has logged INCOMPLETE_GROUP which we must have seen. |
602 | */ |
603 | DBUG_ASSERT(all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE); |
604 | if (long_trid != 0) |
605 | { |
606 | LSN ulsn= all_active_trans[sid].undo_lsn; |
607 | /* |
608 | If the first record of that transaction is after 'rec', it's probably |
609 | because that transaction was found in the checkpoint record, and then |
610 | it's ok, we can forget about that transaction (we'll meet it later |
611 | again in the REDO phase) and replace it with the one in 'rec'. |
612 | */ |
613 | if ((ulsn != LSN_IMPOSSIBLE) && |
614 | (cmp_translog_addr(ulsn, rec->lsn) < 0)) |
615 | { |
616 | char llbuf[22]; |
617 | llstr(long_trid, llbuf); |
618 | eprint(tracef, "Found an old transaction long_trid %s short_trid %u" |
619 | " with same short id as this new transaction, and has neither" |
620 | " committed nor rollback (undo_lsn: " LSN_FMT ")" , |
621 | llbuf, sid, LSN_IN_PARTS(ulsn)); |
622 | goto err; |
623 | } |
624 | } |
625 | long_trid= uint6korr(rec->header); |
626 | new_transaction(sid, long_trid, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE); |
627 | goto end; |
628 | err: |
629 | ALERT_USER(); |
630 | return 1; |
631 | end: |
632 | return 0; |
633 | } |
634 | |
635 | |
636 | static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, |
637 | LSN first_undo_lsn) |
638 | { |
639 | char llbuf[22]; |
640 | all_active_trans[sid].long_trid= long_id; |
641 | llstr(long_id, llbuf); |
642 | tprint(tracef, "Transaction long_trid %s short_trid %u starts," |
643 | " undo_lsn " LSN_FMT " first_undo_lsn " LSN_FMT "\n" , |
644 | llbuf, sid, LSN_IN_PARTS(undo_lsn), LSN_IN_PARTS(first_undo_lsn)); |
645 | all_active_trans[sid].undo_lsn= undo_lsn; |
646 | all_active_trans[sid].first_undo_lsn= first_undo_lsn; |
647 | set_if_bigger(max_long_trid, long_id); |
648 | } |
649 | |
650 | |
651 | prototype_redo_exec_hook_dummy(CHECKPOINT) |
652 | { |
653 | /* the only checkpoint we care about was found via control file, ignore */ |
654 | tprint(tracef, "CHECKPOINT found\n" ); |
655 | return 0; |
656 | } |
657 | |
658 | |
659 | prototype_redo_exec_hook_dummy(INCOMPLETE_GROUP) |
660 | { |
661 | /* abortion was already made */ |
662 | return 0; |
663 | } |
664 | |
665 | |
666 | prototype_redo_exec_hook(INCOMPLETE_LOG) |
667 | { |
668 | MARIA_HA *info; |
669 | |
670 | if (skip_DDLs) |
671 | { |
672 | tprint(tracef, "we skip DDLs\n" ); |
673 | return 0; |
674 | } |
675 | |
676 | if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) |
677 | { |
678 | /* no such table, don't need to warn */ |
679 | return 0; |
680 | } |
681 | |
682 | if (maria_is_crashed(info)) |
683 | return 0; |
684 | |
685 | if (info->s->state.is_of_horizon > rec->lsn) |
686 | { |
687 | /* |
688 | This table was repaired at a time after this log entry. |
689 | We can assume that all rows was inserted sucessfully and we don't |
690 | have to warn about that the inserted data was not logged |
691 | */ |
692 | return 0; |
693 | } |
694 | |
695 | /* |
696 | Example of what can go wrong when replaying DDLs: |
697 | CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged); |
698 | ALTER TABLE t ... which does |
699 | CREATE a temporary table #sql... (logged) |
700 | INSERT data from t into #sql... (not logged) |
701 | RENAME #sql TO t (logged) |
702 | Removing tables by hand and replaying the log will leave in the |
703 | end an empty table "t": missing records. If after the RENAME an INSERT |
704 | into t was done, that row had number 1 in its page, executing the |
705 | REDO_INSERT_ROW_HEAD on the recreated empty t will fail (assertion |
706 | failure in _ma_apply_redo_insert_row_head_or_tail(): new data page is |
707 | created whereas rownr is not 0). |
708 | So when the server disables logging for ALTER TABLE or CREATE SELECT, it |
709 | logs LOGREC_INCOMPLETE_LOG to warn aria_read_log and then the user. |
710 | |
711 | Another issue is that replaying of DDLs is not correct enough to work if |
712 | there was a crash during a DDL (see comment in execution of |
713 | REDO_RENAME_TABLE ). |
714 | */ |
715 | |
716 | eprint(tracef, "***WARNING: Aria engine currently logs no records " |
717 | "about insertion of data by ALTER TABLE and CREATE SELECT, " |
718 | "as they are not necessary for recovery; " |
719 | "present applying of log records to table '%s' may well not work." |
720 | "***" , info->s->index_file_name.str); |
721 | |
722 | /* Prevent using the table for anything else than undo repair */ |
723 | _ma_mark_file_crashed(info->s); |
724 | recovery_warnings++; |
725 | return 0; |
726 | } |
727 | |
728 | |
729 | static my_bool create_database_if_not_exists(const char *name) |
730 | { |
731 | char dirname[FN_REFLEN]; |
732 | size_t length; |
733 | MY_STAT stat_info; |
734 | DBUG_ENTER("create_database_if_not_exists" ); |
735 | |
736 | dirname_part(dirname, name, &length); |
737 | if (!length) |
738 | { |
739 | /* Skip files without directores */ |
740 | DBUG_RETURN(0); |
741 | } |
742 | /* |
743 | Safety; Don't create files with hard path; |
744 | Should never happen with MariaDB |
745 | If hard path, then error will be detected when trying to create index file |
746 | */ |
747 | if (test_if_hard_path(dirname)) |
748 | DBUG_RETURN(0); |
749 | |
750 | if (my_stat(dirname,&stat_info,MYF(0))) |
751 | DBUG_RETURN(0); |
752 | |
753 | |
754 | tprint(tracef, "Creating not existing database '%s'\n" , dirname); |
755 | if (my_mkdir(dirname, 0777, MYF(MY_WME))) |
756 | { |
757 | eprint(tracef, "***WARNING: Can't create not existing database '%s'" , |
758 | dirname); |
759 | DBUG_RETURN(1); |
760 | } |
761 | DBUG_RETURN(0); |
762 | } |
763 | |
764 | |
765 | |
766 | |
767 | |
768 | prototype_redo_exec_hook(REDO_CREATE_TABLE) |
769 | { |
770 | File dfile= -1, kfile= -1; |
771 | char *linkname_ptr, filename[FN_REFLEN], *name, *ptr, *ptr2, |
772 | *data_file_name, *index_file_name; |
773 | uchar *; |
774 | myf create_flag; |
775 | uint flags; |
776 | int error= 1, create_mode= O_RDWR | O_TRUNC, i; |
777 | MARIA_HA *info= NULL; |
778 | uint kfile_size_before_extension, keystart; |
779 | DBUG_ENTER("exec_REDO_LOGREC_REDO_CREATE_TABLE" ); |
780 | |
781 | if (skip_DDLs) |
782 | { |
783 | tprint(tracef, "we skip DDLs\n" ); |
784 | DBUG_RETURN(0); |
785 | } |
786 | enlarge_buffer(rec); |
787 | if (log_record_buffer.str == NULL || |
788 | translog_read_record(rec->lsn, 0, rec->record_length, |
789 | log_record_buffer.str, NULL) != |
790 | rec->record_length) |
791 | { |
792 | eprint(tracef, "Failed to read record" ); |
793 | goto end; |
794 | } |
795 | name= (char *)log_record_buffer.str; |
796 | /* |
797 | TRUNCATE TABLE and REPAIR USE_FRM call maria_create(), so below we can |
798 | find a REDO_CREATE_TABLE for a table which we have open, that's why we |
799 | need to look for any open instances and close them first. |
800 | */ |
801 | if (close_one_table(name, rec->lsn)) |
802 | { |
803 | eprint(tracef, "Table '%s' got error %d on close" , name, my_errno); |
804 | ALERT_USER(); |
805 | goto end; |
806 | } |
807 | /* we try hard to get create_rename_lsn, to avoid mistakes if possible */ |
808 | info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); |
809 | if (info) |
810 | { |
811 | MARIA_SHARE *share= info->s; |
812 | /* check that we're not already using it */ |
813 | if (share->reopen != 1) |
814 | { |
815 | eprint(tracef, "Table '%s is already open (reopen=%u)" , |
816 | name, share->reopen); |
817 | ALERT_USER(); |
818 | goto end; |
819 | } |
820 | DBUG_ASSERT(share->now_transactional == share->base.born_transactional); |
821 | if (!share->base.born_transactional) |
822 | { |
823 | /* |
824 | could be that transactional table was later dropped, and a non-trans |
825 | one was renamed to its name, thus create_rename_lsn is 0 and should |
826 | not be trusted. |
827 | */ |
828 | tprint(tracef, "Table '%s' is not transactional, ignoring creation\n" , |
829 | name); |
830 | ALERT_USER(); |
831 | error= 0; |
832 | goto end; |
833 | } |
834 | if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) |
835 | { |
836 | tprint(tracef, "Table '%s' has create_rename_lsn " LSN_FMT " more " |
837 | "recent than record, ignoring creation" , |
838 | name, LSN_IN_PARTS(share->state.create_rename_lsn)); |
839 | error= 0; |
840 | goto end; |
841 | } |
842 | if (maria_is_crashed(info)) |
843 | { |
844 | eprint(tracef, "Table '%s' is crashed, can't recreate it" , name); |
845 | ALERT_USER(); |
846 | goto end; |
847 | } |
848 | maria_close(info); |
849 | info= NULL; |
850 | } |
851 | else |
852 | { |
853 | /* one or two files absent, or header corrupted... */ |
854 | tprint(tracef, "Table '%s' can't be opened (Error: %d)\n" , |
855 | name, my_errno); |
856 | } |
857 | /* if does not exist, or is older, overwrite it */ |
858 | ptr= name + strlen(name) + 1; |
859 | if ((flags= ptr[0] ? HA_DONT_TOUCH_DATA : 0)) |
860 | tprint(tracef, ", we will only touch index file" ); |
861 | ptr++; |
862 | kfile_size_before_extension= uint2korr(ptr); |
863 | ptr+= 2; |
864 | keystart= uint2korr(ptr); |
865 | ptr+= 2; |
866 | kfile_header= (uchar *)ptr; |
867 | ptr+= kfile_size_before_extension; |
868 | /* set header lsns */ |
869 | ptr2= (char *) kfile_header + sizeof(info->s->state.header) + |
870 | MARIA_FILE_CREATE_RENAME_LSN_OFFSET; |
871 | for (i= 0; i<3; i++) |
872 | { |
873 | lsn_store(ptr2, rec->lsn); |
874 | ptr2+= LSN_STORE_SIZE; |
875 | } |
876 | data_file_name= ptr; |
877 | ptr+= strlen(data_file_name) + 1; |
878 | index_file_name= ptr; |
879 | ptr+= strlen(index_file_name) + 1; |
880 | /** @todo handle symlinks */ |
881 | if (data_file_name[0] || index_file_name[0]) |
882 | { |
883 | eprint(tracef, "Table '%s' DATA|INDEX DIRECTORY clauses are not handled" , |
884 | name); |
885 | goto end; |
886 | } |
887 | if (create_database_if_not_exists(name)) |
888 | goto end; |
889 | fn_format(filename, name, "" , MARIA_NAME_IEXT, |
890 | MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH | MY_APPEND_EXT); |
891 | linkname_ptr= NULL; |
892 | create_flag= MY_DELETE_OLD; |
893 | tprint(tracef, "Table '%s' creating as '%s'\n" , name, filename); |
894 | if ((kfile= mysql_file_create_with_symlink(key_file_kfile, linkname_ptr, |
895 | filename, 0, create_mode, |
896 | MYF(MY_WME|create_flag))) < 0) |
897 | { |
898 | eprint(tracef, "Failed to create index file" ); |
899 | goto end; |
900 | } |
901 | if (my_pwrite(kfile, kfile_header, |
902 | kfile_size_before_extension, 0, MYF(MY_NABP|MY_WME)) || |
903 | mysql_file_chsize(kfile, keystart, 0, MYF(MY_WME))) |
904 | { |
905 | eprint(tracef, "Failed to write to index file" ); |
906 | goto end; |
907 | } |
908 | if (!(flags & HA_DONT_TOUCH_DATA)) |
909 | { |
910 | fn_format(filename,name,"" , MARIA_NAME_DEXT, |
911 | MY_UNPACK_FILENAME | MY_APPEND_EXT); |
912 | linkname_ptr= NULL; |
913 | create_flag=MY_DELETE_OLD; |
914 | if (((dfile= |
915 | mysql_file_create_with_symlink(key_file_dfile, linkname_ptr, |
916 | filename, 0, create_mode, |
917 | MYF(MY_WME | create_flag))) < 0) || |
918 | mysql_file_close(dfile, MYF(MY_WME))) |
919 | { |
920 | eprint(tracef, "Failed to create data file" ); |
921 | goto end; |
922 | } |
923 | /* |
924 | we now have an empty data file. To be able to |
925 | _ma_initialize_data_file() we need some pieces of the share to be |
926 | correctly filled. So we just open the table (fortunately, an empty |
927 | data file does not preclude this). |
928 | */ |
929 | if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || |
930 | _ma_initialize_data_file(info->s, info->dfile.file)) |
931 | { |
932 | eprint(tracef, "Failed to open new table or write to data file" ); |
933 | goto end; |
934 | } |
935 | } |
936 | error= 0; |
937 | end: |
938 | if (kfile >= 0) |
939 | error|= mysql_file_close(kfile, MYF(MY_WME)); |
940 | if (info != NULL) |
941 | error|= maria_close(info); |
942 | DBUG_RETURN(error); |
943 | } |
944 | |
945 | |
946 | prototype_redo_exec_hook(REDO_RENAME_TABLE) |
947 | { |
948 | char *old_name, *new_name; |
949 | int error= 1; |
950 | MARIA_HA *info= NULL; |
951 | DBUG_ENTER("exec_REDO_LOGREC_REDO_RENAME_TABLE" ); |
952 | |
953 | if (skip_DDLs) |
954 | { |
955 | tprint(tracef, "we skip DDLs\n" ); |
956 | DBUG_RETURN(0); |
957 | } |
958 | enlarge_buffer(rec); |
959 | if (log_record_buffer.str == NULL || |
960 | translog_read_record(rec->lsn, 0, rec->record_length, |
961 | log_record_buffer.str, NULL) != |
962 | rec->record_length) |
963 | { |
964 | eprint(tracef, "Failed to read record" ); |
965 | goto end; |
966 | } |
967 | old_name= (char *)log_record_buffer.str; |
968 | new_name= old_name + strlen(old_name) + 1; |
969 | tprint(tracef, "Table '%s' to rename to '%s'; old-name table " , old_name, |
970 | new_name); |
971 | /* |
972 | Here is why we skip CREATE/DROP/RENAME when doing a recovery from |
973 | ha_maria (whereas we do when called from aria_read_log). Consider: |
974 | CREATE TABLE t; |
975 | RENAME TABLE t to u; |
976 | DROP TABLE u; |
977 | RENAME TABLE v to u; # crash between index rename and data rename. |
978 | And do a Recovery (not removing tables beforehand). |
979 | Recovery replays CREATE, then RENAME: the maria_open("t") works, |
980 | maria_open("u") does not (no data file) so table "u" is considered |
981 | inexistent and so maria_rename() is done which overwrites u's index file, |
982 | which is lost. Ok, the data file (v.MAD) is still available, but only a |
983 | REPAIR USE_FRM can rebuild the index, which is unsafe and downtime. |
984 | So it is preferrable to not execute RENAME, and leave the "mess" of files, |
985 | rather than possibly destroy a file. DBA will manually rename files. |
986 | A safe recovery method would probably require checking the existence of |
987 | the index file and of the data file separately (not via maria_open()), and |
988 | maybe also to store a create_rename_lsn in the data file too |
989 | For now, all we risk is to leave the mess (half-renamed files) left by the |
990 | crash. We however sync files and directories at each file rename. The SQL |
991 | layer is anyway not crash-safe for DDLs (except the repartioning-related |
992 | ones). |
993 | We replay DDLs in aria_read_log to be able to recreate tables from |
994 | scratch. It means that "aria_read_log -a" should not be used on a |
995 | database which just crashed during a DDL. And also ALTER TABLE does not |
996 | log insertions of records into the temporary table, so replaying may |
997 | fail (grep for INCOMPLETE_LOG in files). |
998 | */ |
999 | info= maria_open(old_name, O_RDONLY, HA_OPEN_FOR_REPAIR); |
1000 | if (info) |
1001 | { |
1002 | MARIA_SHARE *share= info->s; |
1003 | if (!share->base.born_transactional) |
1004 | { |
1005 | tprint(tracef, ", is not transactional, ignoring renaming\n" ); |
1006 | ALERT_USER(); |
1007 | error= 0; |
1008 | goto end; |
1009 | } |
1010 | if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) |
1011 | { |
1012 | tprint(tracef, ", has create_rename_lsn " LSN_FMT " more recent than" |
1013 | " record, ignoring renaming" , |
1014 | LSN_IN_PARTS(share->state.create_rename_lsn)); |
1015 | error= 0; |
1016 | goto end; |
1017 | } |
1018 | if (maria_is_crashed(info)) |
1019 | { |
1020 | tprint(tracef, ", is crashed, can't rename it" ); |
1021 | ALERT_USER(); |
1022 | goto end; |
1023 | } |
1024 | if (close_one_table(info->s->open_file_name.str, rec->lsn) || |
1025 | maria_close(info)) |
1026 | goto end; |
1027 | info= NULL; |
1028 | tprint(tracef, ", is ok for renaming; new-name table " ); |
1029 | } |
1030 | else /* one or two files absent, or header corrupted... */ |
1031 | { |
1032 | tprint(tracef, ", can't be opened, probably does not exist" ); |
1033 | error= 0; |
1034 | goto end; |
1035 | } |
1036 | /* |
1037 | We must also check the create_rename_lsn of the 'new_name' table if it |
1038 | exists: otherwise we may, with our rename which overwrites, destroy |
1039 | another table. For example: |
1040 | CREATE TABLE t; |
1041 | RENAME t to u; |
1042 | DROP TABLE u; |
1043 | RENAME v to u; # v is an old table, its creation/insertions not in log |
1044 | And start executing the log (without removing tables beforehand): creates |
1045 | t, renames it to u (if not testing create_rename_lsn) thus overwriting |
1046 | old-named v, drops u, and we are stuck, we have lost data. |
1047 | */ |
1048 | info= maria_open(new_name, O_RDONLY, HA_OPEN_FOR_REPAIR); |
1049 | if (info) |
1050 | { |
1051 | MARIA_SHARE *share= info->s; |
1052 | /* We should not have open instances on this table. */ |
1053 | if (share->reopen != 1) |
1054 | { |
1055 | tprint(tracef, ", is already open (reopen=%u)\n" , share->reopen); |
1056 | ALERT_USER(); |
1057 | goto end; |
1058 | } |
1059 | if (!share->base.born_transactional) |
1060 | { |
1061 | tprint(tracef, ", is not transactional, ignoring renaming\n" ); |
1062 | ALERT_USER(); |
1063 | goto drop; |
1064 | } |
1065 | if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) |
1066 | { |
1067 | tprint(tracef, ", has create_rename_lsn " LSN_FMT " more recent than" |
1068 | " record, ignoring renaming" , |
1069 | LSN_IN_PARTS(share->state.create_rename_lsn)); |
1070 | /* |
1071 | We have to drop the old_name table. Consider: |
1072 | CREATE TABLE t; |
1073 | CREATE TABLE v; |
1074 | RENAME TABLE t to u; |
1075 | DROP TABLE u; |
1076 | RENAME TABLE v to u; |
1077 | and apply the log without removing tables beforehand. t will be |
1078 | created, v too; in REDO_RENAME u will be more recent, but we still |
1079 | have to drop t otherwise it stays. |
1080 | */ |
1081 | goto drop; |
1082 | } |
1083 | if (maria_is_crashed(info)) |
1084 | { |
1085 | tprint(tracef, ", is crashed, can't rename it" ); |
1086 | ALERT_USER(); |
1087 | goto end; |
1088 | } |
1089 | if (maria_close(info)) |
1090 | goto end; |
1091 | info= NULL; |
1092 | /* abnormal situation */ |
1093 | tprint(tracef, ", exists but is older than record, can't rename it" ); |
1094 | goto end; |
1095 | } |
1096 | else /* one or two files absent, or header corrupted... */ |
1097 | tprint(tracef, ", can't be opened, probably does not exist" ); |
1098 | tprint(tracef, ", renaming '%s'" , old_name); |
1099 | if (maria_rename(old_name, new_name)) |
1100 | { |
1101 | eprint(tracef, "Failed to rename table" ); |
1102 | goto end; |
1103 | } |
1104 | info= maria_open(new_name, O_RDONLY, 0); |
1105 | if (info == NULL) |
1106 | { |
1107 | eprint(tracef, "Failed to open renamed table" ); |
1108 | goto end; |
1109 | } |
1110 | if (_ma_update_state_lsns(info->s, rec->lsn, info->s->state.create_trid, |
1111 | TRUE, TRUE)) |
1112 | goto end; |
1113 | if (maria_close(info)) |
1114 | goto end; |
1115 | info= NULL; |
1116 | error= 0; |
1117 | goto end; |
1118 | drop: |
1119 | tprint(tracef, ", only dropping '%s'" , old_name); |
1120 | if (maria_delete_table(old_name)) |
1121 | { |
1122 | eprint(tracef, "Failed to drop table" ); |
1123 | goto end; |
1124 | } |
1125 | error= 0; |
1126 | goto end; |
1127 | end: |
1128 | tprint(tracef, "\n" ); |
1129 | if (info != NULL) |
1130 | error|= maria_close(info); |
1131 | DBUG_RETURN(error); |
1132 | } |
1133 | |
1134 | |
1135 | /* |
1136 | The record may come from REPAIR, ALTER TABLE ENABLE KEYS, OPTIMIZE. |
1137 | */ |
1138 | prototype_redo_exec_hook(REDO_REPAIR_TABLE) |
1139 | { |
1140 | int error= 1; |
1141 | MARIA_HA *info; |
1142 | HA_CHECK param; |
1143 | char *name; |
1144 | my_bool quick_repair; |
1145 | DBUG_ENTER("exec_REDO_LOGREC_REDO_REPAIR_TABLE" ); |
1146 | |
1147 | if (skip_DDLs) |
1148 | { |
1149 | /* |
1150 | REPAIR is not exactly a DDL, but it manipulates files without logging |
1151 | insertions into them. |
1152 | */ |
1153 | tprint(tracef, "we skip DDLs\n" ); |
1154 | DBUG_RETURN(0); |
1155 | } |
1156 | if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) |
1157 | DBUG_RETURN(0); |
1158 | if (maria_is_crashed(info)) |
1159 | { |
1160 | tprint(tracef, "we skip repairing crashed table\n" ); |
1161 | DBUG_RETURN(0); |
1162 | } |
1163 | /* |
1164 | Otherwise, the mapping is newer than the table, and our record is newer |
1165 | than the mapping, so we can repair. |
1166 | */ |
1167 | tprint(tracef, " repairing...\n" ); |
1168 | |
1169 | maria_chk_init(¶m); |
1170 | param.isam_file_name= name= info->s->open_file_name.str; |
1171 | param.testflag= uint8korr(rec->header + FILEID_STORE_SIZE); |
1172 | param.tmpdir= maria_tmpdir; |
1173 | param.max_trid= max_long_trid; |
1174 | DBUG_ASSERT(maria_tmpdir); |
1175 | |
1176 | info->s->state.key_map= uint8korr(rec->header + FILEID_STORE_SIZE + 8); |
1177 | quick_repair= MY_TEST(param.testflag & T_QUICK); |
1178 | |
1179 | if (param.testflag & T_REP_PARALLEL) |
1180 | { |
1181 | if (maria_repair_parallel(¶m, info, name, quick_repair)) |
1182 | goto end; |
1183 | } |
1184 | else if (param.testflag & T_REP_BY_SORT) |
1185 | { |
1186 | if (maria_repair_by_sort(¶m, info, name, quick_repair)) |
1187 | goto end; |
1188 | } |
1189 | else if (maria_repair(¶m, info, name, quick_repair)) |
1190 | goto end; |
1191 | |
1192 | if (_ma_update_state_lsns(info->s, rec->lsn, trnman_get_min_safe_trid(), |
1193 | TRUE, !(param.testflag & T_NO_CREATE_RENAME_LSN))) |
1194 | goto end; |
1195 | error= 0; |
1196 | |
1197 | end: |
1198 | DBUG_RETURN(error); |
1199 | } |
1200 | |
1201 | |
1202 | prototype_redo_exec_hook(REDO_DROP_TABLE) |
1203 | { |
1204 | char *name; |
1205 | int error= 1; |
1206 | MARIA_HA *info; |
1207 | if (skip_DDLs) |
1208 | { |
1209 | tprint(tracef, "we skip DDLs\n" ); |
1210 | return 0; |
1211 | } |
1212 | enlarge_buffer(rec); |
1213 | if (log_record_buffer.str == NULL || |
1214 | translog_read_record(rec->lsn, 0, rec->record_length, |
1215 | log_record_buffer.str, NULL) != |
1216 | rec->record_length) |
1217 | { |
1218 | eprint(tracef, "Failed to read record" ); |
1219 | return 1; |
1220 | } |
1221 | name= (char *)log_record_buffer.str; |
1222 | tprint(tracef, "Table '%s'" , name); |
1223 | info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); |
1224 | if (info) |
1225 | { |
1226 | MARIA_SHARE *share= info->s; |
1227 | if (!share->base.born_transactional) |
1228 | { |
1229 | tprint(tracef, ", is not transactional, ignoring removal\n" ); |
1230 | ALERT_USER(); |
1231 | error= 0; |
1232 | goto end; |
1233 | } |
1234 | if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) |
1235 | { |
1236 | tprint(tracef, ", has create_rename_lsn " LSN_FMT " more recent than" |
1237 | " record, ignoring removal" , |
1238 | LSN_IN_PARTS(share->state.create_rename_lsn)); |
1239 | error= 0; |
1240 | goto end; |
1241 | } |
1242 | if (maria_is_crashed(info)) |
1243 | { |
1244 | tprint(tracef, ", is crashed, can't drop it" ); |
1245 | ALERT_USER(); |
1246 | goto end; |
1247 | } |
1248 | if (close_one_table(info->s->open_file_name.str, rec->lsn) || |
1249 | maria_close(info)) |
1250 | goto end; |
1251 | info= NULL; |
1252 | /* if it is older, or its header is corrupted, drop it */ |
1253 | tprint(tracef, ", dropping '%s'" , name); |
1254 | if (maria_delete_table(name)) |
1255 | { |
1256 | eprint(tracef, "Failed to drop table" ); |
1257 | goto end; |
1258 | } |
1259 | } |
1260 | else /* one or two files absent, or header corrupted... */ |
1261 | tprint(tracef,", can't be opened, probably does not exist" ); |
1262 | error= 0; |
1263 | end: |
1264 | tprint(tracef, "\n" ); |
1265 | if (info != NULL) |
1266 | error|= maria_close(info); |
1267 | return error; |
1268 | } |
1269 | |
1270 | |
1271 | prototype_redo_exec_hook(FILE_ID) |
1272 | { |
1273 | uint16 sid; |
1274 | int error= 1; |
1275 | const char *name; |
1276 | MARIA_HA *info; |
1277 | DBUG_ENTER("exec_REDO_LOGREC_FILE_ID" ); |
1278 | |
1279 | if (cmp_translog_addr(rec->lsn, checkpoint_start) < 0) |
1280 | { |
1281 | /* |
1282 | If that mapping was still true at checkpoint time, it was found in |
1283 | checkpoint record, no need to recreate it. If that mapping had ended at |
1284 | checkpoint time (table was closed or repaired), a flush and force |
1285 | happened and so mapping is not needed. |
1286 | */ |
1287 | tprint(tracef, "ignoring because before checkpoint\n" ); |
1288 | DBUG_RETURN(0); |
1289 | } |
1290 | |
1291 | enlarge_buffer(rec); |
1292 | if (log_record_buffer.str == NULL || |
1293 | translog_read_record(rec->lsn, 0, rec->record_length, |
1294 | log_record_buffer.str, NULL) != |
1295 | rec->record_length) |
1296 | { |
1297 | eprint(tracef, "Failed to read record" ); |
1298 | goto end; |
1299 | } |
1300 | sid= fileid_korr(log_record_buffer.str); |
1301 | info= all_tables[sid].info; |
1302 | if (info != NULL) |
1303 | { |
1304 | tprint(tracef, " Closing table '%s'\n" , info->s->open_file_name.str); |
1305 | prepare_table_for_close(info, rec->lsn); |
1306 | |
1307 | /* |
1308 | Ensure that open count is 1 on close. This is needed as the |
1309 | table may initially had an open_count > 0 when we initially |
1310 | opened it as the server may have crashed without closing it |
1311 | properly. As we now have applied all redo's for the table up to |
1312 | now, we know the table is ok, so it's safe to reset the open |
1313 | count to 0. |
1314 | */ |
1315 | if (info->s->state.open_count != 0 && info->s->reopen == 1) |
1316 | { |
1317 | /* let ma_close() mark the table properly closed */ |
1318 | info->s->state.open_count= 1; |
1319 | info->s->global_changed= 1; |
1320 | info->s->changed= 1; |
1321 | } |
1322 | if (maria_close(info)) |
1323 | { |
1324 | eprint(tracef, "Failed to close table" ); |
1325 | goto end; |
1326 | } |
1327 | all_tables[sid].info= NULL; |
1328 | } |
1329 | name= (char *)log_record_buffer.str + FILEID_STORE_SIZE; |
1330 | if (new_table(sid, name, rec->lsn)) |
1331 | goto end; |
1332 | error= 0; |
1333 | end: |
1334 | DBUG_RETURN(error); |
1335 | } |
1336 | |
1337 | |
1338 | static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) |
1339 | { |
1340 | /* |
1341 | -1 (skip table): close table and return 0; |
1342 | 1 (error): close table and return 1; |
1343 | 0 (success): leave table open and return 0. |
1344 | */ |
1345 | int error= 1; |
1346 | MARIA_HA *info; |
1347 | MARIA_SHARE *share; |
1348 | my_off_t dfile_len, kfile_len; |
1349 | DBUG_ENTER("new_table" ); |
1350 | |
1351 | checkpoint_useful= TRUE; |
1352 | if ((name == NULL) || (name[0] == 0)) |
1353 | { |
1354 | /* |
1355 | we didn't use DBUG_ASSERT() because such record corruption could |
1356 | silently pass in the "info == NULL" test below. |
1357 | */ |
1358 | tprint(tracef, ", record is corrupted" ); |
1359 | info= NULL; |
1360 | recovery_warnings++; |
1361 | goto end; |
1362 | } |
1363 | tprint(tracef, "Table '%s', id %u" , name, sid); |
1364 | info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); |
1365 | if (info == NULL) |
1366 | { |
1367 | tprint(tracef, ", is absent (must have been dropped later?)" |
1368 | " or its header is so corrupted that we cannot open it;" |
1369 | " we skip it" ); |
1370 | if (my_errno != ENOENT) |
1371 | recovery_found_crashed_tables++; |
1372 | error= 0; |
1373 | goto end; |
1374 | } |
1375 | share= info->s; |
1376 | /* check that we're not already using it */ |
1377 | if (share->reopen != 1) |
1378 | { |
1379 | tprint(tracef, ", is already open (reopen=%u)\n" , share->reopen); |
1380 | /* |
1381 | It could be that we have in the log |
1382 | FILE_ID(t1,10) ... (t1 was flushed) ... FILE_ID(t1,12); |
1383 | */ |
1384 | if (close_one_table(share->open_file_name.str, lsn_of_file_id)) |
1385 | goto end; |
1386 | /* |
1387 | We should not try to get length of data/index files as the files |
1388 | are not on disk yet. |
1389 | */ |
1390 | _ma_tmp_disable_logging_for_table(info, FALSE); |
1391 | goto set_lsn_of_file_id; |
1392 | } |
1393 | if (!share->base.born_transactional) |
1394 | { |
1395 | /* |
1396 | This can happen if one converts a transactional table to a |
1397 | not transactional table |
1398 | */ |
1399 | tprint(tracef, ", is not transactional. Ignoring open request" ); |
1400 | error= -1; |
1401 | recovery_warnings++; |
1402 | goto end; |
1403 | } |
1404 | if (cmp_translog_addr(lsn_of_file_id, share->state.create_rename_lsn) <= 0) |
1405 | { |
1406 | tprint(tracef, ", has create_rename_lsn " LSN_FMT " more recent than" |
1407 | " LOGREC_FILE_ID's LSN " LSN_FMT ", ignoring open request" , |
1408 | LSN_IN_PARTS(share->state.create_rename_lsn), |
1409 | LSN_IN_PARTS(lsn_of_file_id)); |
1410 | recovery_warnings++; |
1411 | error= -1; |
1412 | goto end; |
1413 | /* |
1414 | Note that we tested that before testing corruption; a recent corrupted |
1415 | table is not a blocker for the present log record. |
1416 | */ |
1417 | } |
1418 | if (maria_is_crashed(info)) |
1419 | { |
1420 | eprint(tracef, "Table '%s' is crashed, skipping it. Please repair it with" |
1421 | " aria_chk -r" , share->open_file_name.str); |
1422 | recovery_found_crashed_tables++; |
1423 | error= -1; /* not fatal, try with other tables */ |
1424 | goto end; |
1425 | /* |
1426 | Note that if a first recovery fails to apply a REDO, it marks the table |
1427 | corrupted and stops the entire recovery. A second recovery will find the |
1428 | table is marked corrupted and skip it (and thus possibly handle other |
1429 | tables). |
1430 | */ |
1431 | } |
1432 | /* don't log any records for this work */ |
1433 | _ma_tmp_disable_logging_for_table(info, FALSE); |
1434 | /* execution of some REDO records relies on data_file_length */ |
1435 | dfile_len= mysql_file_seek(info->dfile.file, 0, SEEK_END, MYF(MY_WME)); |
1436 | kfile_len= mysql_file_seek(info->s->kfile.file, 0, SEEK_END, MYF(MY_WME)); |
1437 | if ((dfile_len == MY_FILEPOS_ERROR) || |
1438 | (kfile_len == MY_FILEPOS_ERROR)) |
1439 | { |
1440 | tprint(tracef, ", length unknown\n" ); |
1441 | recovery_warnings++; |
1442 | goto end; |
1443 | } |
1444 | if (share->state.state.data_file_length != dfile_len) |
1445 | { |
1446 | tprint(tracef, ", has wrong state.data_file_length (fixing it)" ); |
1447 | share->state.state.data_file_length= dfile_len; |
1448 | } |
1449 | if (share->state.state.key_file_length != kfile_len) |
1450 | { |
1451 | tprint(tracef, ", has wrong state.key_file_length (fixing it)" ); |
1452 | share->state.state.key_file_length= kfile_len; |
1453 | } |
1454 | if ((dfile_len % share->block_size) || (kfile_len % share->block_size)) |
1455 | { |
1456 | tprint(tracef, ", has too short last page\n" ); |
1457 | /* Recovery will fix this, no error */ |
1458 | ALERT_USER(); |
1459 | } |
1460 | |
1461 | set_lsn_of_file_id: |
1462 | /* |
1463 | This LSN serves in this situation; assume log is: |
1464 | FILE_ID(6->"t2") REDO_INSERT(6) FILE_ID(6->"t1") CHECKPOINT(6->"t1") |
1465 | then crash, checkpoint record is parsed and opens "t1" with id 6; assume |
1466 | REDO phase starts from the REDO_INSERT above: it will wrongly try to |
1467 | update a page of "t1". With this LSN below, REDO_INSERT can realize the |
1468 | mapping is newer than itself, and not execute. |
1469 | Same example is possible with UNDO_INSERT (update of the state). |
1470 | */ |
1471 | info->s->lsn_of_file_id= lsn_of_file_id; |
1472 | all_tables[sid].info= info; |
1473 | /* |
1474 | We don't set info->s->id, it would be useless (no logging in REDO phase); |
1475 | if you change that, know that some records in REDO phase call |
1476 | _ma_update_state_lsns() which resets info->s->id. |
1477 | */ |
1478 | tprint(tracef, ", opened" ); |
1479 | error= 0; |
1480 | end: |
1481 | tprint(tracef, "\n" ); |
1482 | if (error) |
1483 | { |
1484 | if (info != NULL) |
1485 | { |
1486 | /* let maria_close() mark the table properly closed */ |
1487 | info->s->state.open_count= 1; |
1488 | info->s->global_changed= 1; |
1489 | info->s->changed= 1; |
1490 | maria_close(info); |
1491 | } |
1492 | if (error == -1) |
1493 | error= 0; |
1494 | } |
1495 | DBUG_RETURN(error); |
1496 | } |
1497 | |
1498 | /* |
1499 | NOTE |
1500 | This is called for REDO_INSERT_ROW_HEAD and READ_NEW_ROW_HEAD |
1501 | */ |
1502 | |
1503 | prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD) |
1504 | { |
1505 | int error= 1; |
1506 | uchar *buff= NULL; |
1507 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1508 | if (info == NULL || maria_is_crashed(info)) |
1509 | |
1510 | { |
1511 | /* |
1512 | Table was skipped at open time (because later dropped/renamed, not |
1513 | transactional, or create_rename_lsn newer than LOGREC_FILE_ID), or |
1514 | record was skipped due to skip_redo_lsn; it is not an error. |
1515 | */ |
1516 | return 0; |
1517 | } |
1518 | /* |
1519 | Note that REDO is per page, we still consider it if its transaction |
1520 | committed long ago and is unknown. |
1521 | */ |
1522 | /* |
1523 | If REDO's LSN is > page's LSN (read from disk), we are going to modify the |
1524 | page and change its LSN. The normal runtime code stores the UNDO's LSN |
1525 | into the page. Here storing the REDO's LSN (rec->lsn) would work |
1526 | (we are not writing to the log here, so don't have to "flush up to UNDO's |
1527 | LSN"). But in a test scenario where we do updates at runtime, then remove |
1528 | tables, apply the log and check that this results in the same table as at |
1529 | runtime, putting the same LSN as runtime had done will decrease |
1530 | differences. So we use the UNDO's LSN which is current_group_end_lsn. |
1531 | */ |
1532 | enlarge_buffer(rec); |
1533 | if (log_record_buffer.str == NULL) |
1534 | { |
1535 | eprint(tracef, "Failed to read allocate buffer for record" ); |
1536 | goto end; |
1537 | } |
1538 | if (translog_read_record(rec->lsn, 0, rec->record_length, |
1539 | log_record_buffer.str, NULL) != |
1540 | rec->record_length) |
1541 | { |
1542 | eprint(tracef, "Failed to read record" ); |
1543 | goto end; |
1544 | } |
1545 | buff= log_record_buffer.str; |
1546 | if (_ma_apply_redo_insert_row_head_or_tail(info, current_group_end_lsn, |
1547 | HEAD_PAGE, |
1548 | (rec->type == |
1549 | LOGREC_REDO_NEW_ROW_HEAD), |
1550 | buff + FILEID_STORE_SIZE, |
1551 | buff + |
1552 | FILEID_STORE_SIZE + |
1553 | PAGE_STORE_SIZE + |
1554 | DIRPOS_STORE_SIZE, |
1555 | rec->record_length - |
1556 | (FILEID_STORE_SIZE + |
1557 | PAGE_STORE_SIZE + |
1558 | DIRPOS_STORE_SIZE))) |
1559 | goto end; |
1560 | error= 0; |
1561 | end: |
1562 | return error; |
1563 | } |
1564 | |
1565 | /* |
1566 | NOTE |
1567 | This is called for REDO_INSERT_ROW_TAIL and READ_NEW_ROW_TAIL |
1568 | */ |
1569 | |
1570 | prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL) |
1571 | { |
1572 | int error= 1; |
1573 | uchar *buff; |
1574 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1575 | if (info == NULL || maria_is_crashed(info)) |
1576 | return 0; |
1577 | enlarge_buffer(rec); |
1578 | if (log_record_buffer.str == NULL || |
1579 | translog_read_record(rec->lsn, 0, rec->record_length, |
1580 | log_record_buffer.str, NULL) != |
1581 | rec->record_length) |
1582 | { |
1583 | eprint(tracef, "Failed to read record" ); |
1584 | goto end; |
1585 | } |
1586 | buff= log_record_buffer.str; |
1587 | if (_ma_apply_redo_insert_row_head_or_tail(info, current_group_end_lsn, |
1588 | TAIL_PAGE, |
1589 | (rec->type == |
1590 | LOGREC_REDO_NEW_ROW_TAIL), |
1591 | buff + FILEID_STORE_SIZE, |
1592 | buff + |
1593 | FILEID_STORE_SIZE + |
1594 | PAGE_STORE_SIZE + |
1595 | DIRPOS_STORE_SIZE, |
1596 | rec->record_length - |
1597 | (FILEID_STORE_SIZE + |
1598 | PAGE_STORE_SIZE + |
1599 | DIRPOS_STORE_SIZE))) |
1600 | goto end; |
1601 | error= 0; |
1602 | |
1603 | end: |
1604 | return error; |
1605 | } |
1606 | |
1607 | |
1608 | prototype_redo_exec_hook(REDO_INSERT_ROW_BLOBS) |
1609 | { |
1610 | int error= 1; |
1611 | uchar *buff; |
1612 | uint number_of_blobs, number_of_ranges; |
1613 | pgcache_page_no_t first_page, last_page; |
1614 | char llbuf1[22], llbuf2[22]; |
1615 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1616 | if (info == NULL || maria_is_crashed(info)) |
1617 | return 0; |
1618 | enlarge_buffer(rec); |
1619 | if (log_record_buffer.str == NULL || |
1620 | translog_read_record(rec->lsn, 0, rec->record_length, |
1621 | log_record_buffer.str, NULL) != |
1622 | rec->record_length) |
1623 | { |
1624 | eprint(tracef, "Failed to read record" ); |
1625 | goto end; |
1626 | } |
1627 | buff= log_record_buffer.str; |
1628 | if (_ma_apply_redo_insert_row_blobs(info, current_group_end_lsn, |
1629 | buff, rec->lsn, &number_of_blobs, |
1630 | &number_of_ranges, |
1631 | &first_page, &last_page)) |
1632 | goto end; |
1633 | llstr(first_page, llbuf1); |
1634 | llstr(last_page, llbuf2); |
1635 | tprint(tracef, " %u blobs %u ranges, first page %s last %s" , |
1636 | number_of_blobs, number_of_ranges, llbuf1, llbuf2); |
1637 | |
1638 | error= 0; |
1639 | |
1640 | end: |
1641 | tprint(tracef, " \n" ); |
1642 | return error; |
1643 | } |
1644 | |
1645 | |
1646 | prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD) |
1647 | { |
1648 | int error= 1; |
1649 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1650 | if (info == NULL || maria_is_crashed(info)) |
1651 | return 0; |
1652 | if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, |
1653 | HEAD_PAGE, |
1654 | rec->header + FILEID_STORE_SIZE)) |
1655 | goto end; |
1656 | error= 0; |
1657 | end: |
1658 | return error; |
1659 | } |
1660 | |
1661 | |
1662 | prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL) |
1663 | { |
1664 | int error= 1; |
1665 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1666 | if (info == NULL || maria_is_crashed(info)) |
1667 | return 0; |
1668 | if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, |
1669 | TAIL_PAGE, |
1670 | rec->header + FILEID_STORE_SIZE)) |
1671 | goto end; |
1672 | error= 0; |
1673 | end: |
1674 | return error; |
1675 | } |
1676 | |
1677 | |
1678 | prototype_redo_exec_hook(REDO_FREE_BLOCKS) |
1679 | { |
1680 | int error= 1; |
1681 | uchar *buff; |
1682 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1683 | if (info == NULL || maria_is_crashed(info)) |
1684 | return 0; |
1685 | enlarge_buffer(rec); |
1686 | |
1687 | if (log_record_buffer.str == NULL || |
1688 | translog_read_record(rec->lsn, 0, rec->record_length, |
1689 | log_record_buffer.str, NULL) != |
1690 | rec->record_length) |
1691 | { |
1692 | eprint(tracef, "Failed to read record" ); |
1693 | goto end; |
1694 | } |
1695 | |
1696 | buff= log_record_buffer.str; |
1697 | if (_ma_apply_redo_free_blocks(info, current_group_end_lsn, rec->lsn, |
1698 | buff)) |
1699 | goto end; |
1700 | error= 0; |
1701 | end: |
1702 | return error; |
1703 | } |
1704 | |
1705 | |
1706 | prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL) |
1707 | { |
1708 | int error= 1; |
1709 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1710 | if (info == NULL || maria_is_crashed(info)) |
1711 | return 0; |
1712 | |
1713 | if (_ma_apply_redo_free_head_or_tail(info, current_group_end_lsn, |
1714 | rec->header + FILEID_STORE_SIZE)) |
1715 | goto end; |
1716 | error= 0; |
1717 | end: |
1718 | return error; |
1719 | } |
1720 | |
1721 | |
1722 | prototype_redo_exec_hook(REDO_DELETE_ALL) |
1723 | { |
1724 | int error= 1; |
1725 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1726 | if (info == NULL) |
1727 | return 0; |
1728 | tprint(tracef, " deleting all %lu rows\n" , |
1729 | (ulong)info->s->state.state.records); |
1730 | if (maria_delete_all_rows(info)) |
1731 | goto end; |
1732 | error= 0; |
1733 | end: |
1734 | return error; |
1735 | } |
1736 | |
1737 | |
1738 | prototype_redo_exec_hook(REDO_INDEX) |
1739 | { |
1740 | int error= 1; |
1741 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1742 | if (info == NULL || maria_is_crashed(info)) |
1743 | return 0; |
1744 | enlarge_buffer(rec); |
1745 | |
1746 | if (log_record_buffer.str == NULL || |
1747 | translog_read_record(rec->lsn, 0, rec->record_length, |
1748 | log_record_buffer.str, NULL) != |
1749 | rec->record_length) |
1750 | { |
1751 | eprint(tracef, "Failed to read record" ); |
1752 | goto end; |
1753 | } |
1754 | |
1755 | if (_ma_apply_redo_index(info, current_group_end_lsn, |
1756 | log_record_buffer.str + FILEID_STORE_SIZE, |
1757 | rec->record_length - FILEID_STORE_SIZE)) |
1758 | goto end; |
1759 | error= 0; |
1760 | end: |
1761 | return error; |
1762 | } |
1763 | |
1764 | prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE) |
1765 | { |
1766 | int error= 1; |
1767 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1768 | if (info == NULL || maria_is_crashed(info)) |
1769 | return 0; |
1770 | enlarge_buffer(rec); |
1771 | |
1772 | if (log_record_buffer.str == NULL || |
1773 | translog_read_record(rec->lsn, 0, rec->record_length, |
1774 | log_record_buffer.str, NULL) != |
1775 | rec->record_length) |
1776 | { |
1777 | eprint(tracef, "Failed to read record" ); |
1778 | goto end; |
1779 | } |
1780 | |
1781 | if (_ma_apply_redo_index_new_page(info, current_group_end_lsn, |
1782 | log_record_buffer.str + FILEID_STORE_SIZE, |
1783 | rec->record_length - FILEID_STORE_SIZE)) |
1784 | goto end; |
1785 | error= 0; |
1786 | end: |
1787 | return error; |
1788 | } |
1789 | |
1790 | |
1791 | prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE) |
1792 | { |
1793 | int error= 1; |
1794 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1795 | if (info == NULL || maria_is_crashed(info)) |
1796 | return 0; |
1797 | |
1798 | if (_ma_apply_redo_index_free_page(info, current_group_end_lsn, |
1799 | rec->header + FILEID_STORE_SIZE)) |
1800 | goto end; |
1801 | error= 0; |
1802 | end: |
1803 | return error; |
1804 | } |
1805 | |
1806 | |
1807 | prototype_redo_exec_hook(REDO_BITMAP_NEW_PAGE) |
1808 | { |
1809 | int error= 1; |
1810 | MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); |
1811 | if (info == NULL || maria_is_crashed(info)) |
1812 | return 0; |
1813 | enlarge_buffer(rec); |
1814 | |
1815 | if (log_record_buffer.str == NULL || |
1816 | translog_read_record(rec->lsn, 0, rec->record_length, |
1817 | log_record_buffer.str, NULL) != |
1818 | rec->record_length) |
1819 | { |
1820 | eprint(tracef, "Failed to read record" ); |
1821 | goto end; |
1822 | } |
1823 | |
1824 | if (cmp_translog_addr(rec->lsn, checkpoint_start) >= 0) |
1825 | { |
1826 | /* |
1827 | Record is potentially after the bitmap flush made by Checkpoint, so has |
1828 | to be replayed. It may overwrite a more recent state but that will be |
1829 | corrected by all upcoming REDOs for data pages. |
1830 | If the condition is false, we must not apply the record: it is unneeded |
1831 | and nocive (may not be corrected as REDOs can be skipped due to |
1832 | dirty-pages list). |
1833 | */ |
1834 | if (_ma_apply_redo_bitmap_new_page(info, current_group_end_lsn, |
1835 | log_record_buffer.str + |
1836 | FILEID_STORE_SIZE)) |
1837 | goto end; |
1838 | } |
1839 | error= 0; |
1840 | end: |
1841 | return error; |
1842 | } |
1843 | |
1844 | |
1845 | static inline void set_undo_lsn_for_active_trans(uint16 short_trid, LSN lsn) |
1846 | { |
1847 | if (all_active_trans[short_trid].long_trid == 0) |
1848 | { |
1849 | /* transaction unknown, so has committed or fully rolled back long ago */ |
1850 | return; |
1851 | } |
1852 | all_active_trans[short_trid].undo_lsn= lsn; |
1853 | if (all_active_trans[short_trid].first_undo_lsn == LSN_IMPOSSIBLE) |
1854 | all_active_trans[short_trid].first_undo_lsn= lsn; |
1855 | } |
1856 | |
1857 | |
1858 | prototype_redo_exec_hook(UNDO_ROW_INSERT) |
1859 | { |
1860 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
1861 | MARIA_SHARE *share; |
1862 | |
1863 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
1864 | if (info == NULL) |
1865 | { |
1866 | /* |
1867 | Note that we set undo_lsn anyway. So that if the transaction is later |
1868 | rolled back, this UNDO is tried for execution and we get a warning (as |
1869 | it would then be abnormal that info==NULL). |
1870 | */ |
1871 | return 0; |
1872 | } |
1873 | share= info->s; |
1874 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
1875 | { |
1876 | tprint(tracef, " state has LSN " LSN_FMT " older than record, updating" |
1877 | " rows' count\n" , LSN_IN_PARTS(share->state.is_of_horizon)); |
1878 | share->state.state.records++; |
1879 | if (share->calc_checksum) |
1880 | { |
1881 | uchar buff[HA_CHECKSUM_STORE_SIZE]; |
1882 | if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + |
1883 | PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, |
1884 | HA_CHECKSUM_STORE_SIZE, buff, NULL) != |
1885 | HA_CHECKSUM_STORE_SIZE) |
1886 | { |
1887 | eprint(tracef, "Failed to read record" ); |
1888 | return 1; |
1889 | } |
1890 | share->state.state.checksum+= ha_checksum_korr(buff); |
1891 | } |
1892 | info->s->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
1893 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
1894 | } |
1895 | tprint(tracef, " rows' count %lu\n" , (ulong)info->s->state.state.records); |
1896 | /* Unpin all pages, stamp them with UNDO's LSN */ |
1897 | _ma_unpin_all_pages(info, rec->lsn); |
1898 | return 0; |
1899 | } |
1900 | |
1901 | |
1902 | prototype_redo_exec_hook(UNDO_ROW_DELETE) |
1903 | { |
1904 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
1905 | MARIA_SHARE *share; |
1906 | |
1907 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
1908 | if (info == NULL) |
1909 | return 0; |
1910 | share= info->s; |
1911 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
1912 | { |
1913 | tprint(tracef, " state older than record\n" ); |
1914 | share->state.state.records--; |
1915 | if (share->calc_checksum) |
1916 | { |
1917 | uchar buff[HA_CHECKSUM_STORE_SIZE]; |
1918 | if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + |
1919 | PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + 2 + |
1920 | PAGERANGE_STORE_SIZE, |
1921 | HA_CHECKSUM_STORE_SIZE, buff, NULL) != |
1922 | HA_CHECKSUM_STORE_SIZE) |
1923 | { |
1924 | eprint(tracef, "Failed to read record" ); |
1925 | return 1; |
1926 | } |
1927 | share->state.state.checksum+= ha_checksum_korr(buff); |
1928 | } |
1929 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
1930 | STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED | |
1931 | STATE_NOT_MOVABLE); |
1932 | } |
1933 | tprint(tracef, " rows' count %lu\n" , (ulong)share->state.state.records); |
1934 | _ma_unpin_all_pages(info, rec->lsn); |
1935 | return 0; |
1936 | } |
1937 | |
1938 | |
1939 | prototype_redo_exec_hook(UNDO_ROW_UPDATE) |
1940 | { |
1941 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
1942 | MARIA_SHARE *share; |
1943 | |
1944 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
1945 | if (info == NULL) |
1946 | return 0; |
1947 | share= info->s; |
1948 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
1949 | { |
1950 | if (share->calc_checksum) |
1951 | { |
1952 | uchar buff[HA_CHECKSUM_STORE_SIZE]; |
1953 | if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + |
1954 | PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, |
1955 | HA_CHECKSUM_STORE_SIZE, buff, NULL) != |
1956 | HA_CHECKSUM_STORE_SIZE) |
1957 | { |
1958 | eprint(tracef, "Failed to read record" ); |
1959 | return 1; |
1960 | } |
1961 | share->state.state.checksum+= ha_checksum_korr(buff); |
1962 | } |
1963 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
1964 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
1965 | } |
1966 | _ma_unpin_all_pages(info, rec->lsn); |
1967 | return 0; |
1968 | } |
1969 | |
1970 | |
1971 | prototype_redo_exec_hook(UNDO_KEY_INSERT) |
1972 | { |
1973 | MARIA_HA *info; |
1974 | MARIA_SHARE *share; |
1975 | |
1976 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
1977 | if (!(info= get_MARIA_HA_from_UNDO_record(rec))) |
1978 | return 0; |
1979 | share= info->s; |
1980 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
1981 | { |
1982 | const uchar *ptr= rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE; |
1983 | uint keynr= key_nr_korr(ptr); |
1984 | if (share->base.auto_key == (keynr + 1)) /* it's auto-increment */ |
1985 | { |
1986 | const HA_KEYSEG *keyseg= info->s->keyinfo[keynr].seg; |
1987 | ulonglong value; |
1988 | char llbuf[22]; |
1989 | uchar reversed[MARIA_MAX_KEY_BUFF], *to; |
1990 | tprint(tracef, " state older than record\n" ); |
1991 | /* we read the record to find the auto_increment value */ |
1992 | enlarge_buffer(rec); |
1993 | if (log_record_buffer.str == NULL || |
1994 | translog_read_record(rec->lsn, 0, rec->record_length, |
1995 | log_record_buffer.str, NULL) != |
1996 | rec->record_length) |
1997 | { |
1998 | eprint(tracef, "Failed to read record" ); |
1999 | return 1; |
2000 | } |
2001 | to= log_record_buffer.str + LSN_STORE_SIZE + FILEID_STORE_SIZE + |
2002 | KEY_NR_STORE_SIZE; |
2003 | if (keyseg->flag & HA_SWAP_KEY) |
2004 | { |
2005 | /* We put key from log record to "data record" packing format... */ |
2006 | uchar *key_ptr= to; |
2007 | uchar *key_end= key_ptr + keyseg->length; |
2008 | to= reversed + keyseg->length; |
2009 | do |
2010 | { |
2011 | *--to= *key_ptr++; |
2012 | } while (key_ptr != key_end); |
2013 | /* ... so that we can read it with: */ |
2014 | } |
2015 | value= ma_retrieve_auto_increment(to, keyseg->type); |
2016 | set_if_bigger(share->state.auto_increment, value); |
2017 | llstr(share->state.auto_increment, llbuf); |
2018 | tprint(tracef, " auto-inc %s\n" , llbuf); |
2019 | } |
2020 | } |
2021 | _ma_unpin_all_pages(info, rec->lsn); |
2022 | return 0; |
2023 | } |
2024 | |
2025 | |
2026 | prototype_redo_exec_hook(UNDO_KEY_DELETE) |
2027 | { |
2028 | MARIA_HA *info; |
2029 | |
2030 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
2031 | if (!(info= get_MARIA_HA_from_UNDO_record(rec))) |
2032 | return 0; |
2033 | _ma_unpin_all_pages(info, rec->lsn); |
2034 | return 0; |
2035 | } |
2036 | |
2037 | |
2038 | prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT) |
2039 | { |
2040 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2041 | MARIA_SHARE *share; |
2042 | |
2043 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
2044 | if (info == NULL) |
2045 | return 0; |
2046 | share= info->s; |
2047 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
2048 | { |
2049 | uint key_nr; |
2050 | my_off_t page; |
2051 | key_nr= key_nr_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE); |
2052 | page= page_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE + |
2053 | KEY_NR_STORE_SIZE); |
2054 | share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ? |
2055 | HA_OFFSET_ERROR : |
2056 | page * share->block_size); |
2057 | } |
2058 | _ma_unpin_all_pages(info, rec->lsn); |
2059 | return 0; |
2060 | } |
2061 | |
2062 | |
2063 | prototype_redo_exec_hook(UNDO_BULK_INSERT) |
2064 | { |
2065 | /* |
2066 | If the repair finished it wrote and sync the state. If it didn't finish, |
2067 | we are going to empty the table and that will fix the state. |
2068 | */ |
2069 | set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); |
2070 | return 0; |
2071 | } |
2072 | |
2073 | |
2074 | prototype_redo_exec_hook(IMPORTED_TABLE) |
2075 | { |
2076 | char *name; |
2077 | enlarge_buffer(rec); |
2078 | if (log_record_buffer.str == NULL || |
2079 | translog_read_record(rec->lsn, 0, rec->record_length, |
2080 | log_record_buffer.str, NULL) != |
2081 | rec->record_length) |
2082 | { |
2083 | eprint(tracef, "Failed to read record" ); |
2084 | return 1; |
2085 | } |
2086 | name= (char *)log_record_buffer.str; |
2087 | tprint(tracef, "Table '%s' was imported (auto-zerofilled) in this Aria instance\n" , name); |
2088 | return 0; |
2089 | } |
2090 | |
2091 | |
2092 | prototype_redo_exec_hook(COMMIT) |
2093 | { |
2094 | uint16 sid= rec->short_trid; |
2095 | TrID long_trid= all_active_trans[sid].long_trid; |
2096 | char llbuf[22]; |
2097 | if (long_trid == 0) |
2098 | { |
2099 | tprint(tracef, "We don't know about transaction with short_trid %u;" |
2100 | "it probably committed long ago, forget it\n" , sid); |
2101 | bzero(&all_active_trans[sid], sizeof(all_active_trans[sid])); |
2102 | return 0; |
2103 | } |
2104 | llstr(long_trid, llbuf); |
2105 | tprint(tracef, "Transaction long_trid %s short_trid %u committed\n" , |
2106 | llbuf, sid); |
2107 | bzero(&all_active_trans[sid], sizeof(all_active_trans[sid])); |
2108 | #ifdef MARIA_VERSIONING |
2109 | /* |
2110 | if real recovery: |
2111 | transaction was committed, move it to some separate list for later |
2112 | purging (but don't purge now! purging may have been started before, we |
2113 | may find REDO_PURGE records soon). |
2114 | */ |
2115 | #endif |
2116 | return 0; |
2117 | } |
2118 | |
2119 | prototype_redo_exec_hook(CLR_END) |
2120 | { |
2121 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2122 | MARIA_SHARE *share; |
2123 | LSN previous_undo_lsn; |
2124 | enum translog_record_type undone_record_type; |
2125 | const LOG_DESC *log_desc; |
2126 | my_bool row_entry= 0; |
2127 | uchar *logpos; |
2128 | DBUG_ENTER("exec_REDO_LOGREC_CLR_END" ); |
2129 | |
2130 | previous_undo_lsn= lsn_korr(rec->header); |
2131 | undone_record_type= |
2132 | clr_type_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE); |
2133 | log_desc= &log_record_type_descriptor[undone_record_type]; |
2134 | |
2135 | set_undo_lsn_for_active_trans(rec->short_trid, previous_undo_lsn); |
2136 | if (info == NULL) |
2137 | DBUG_RETURN(0); |
2138 | share= info->s; |
2139 | tprint(tracef, " CLR_END was about %s, undo_lsn now LSN " LSN_FMT "\n" , |
2140 | log_desc->name, LSN_IN_PARTS(previous_undo_lsn)); |
2141 | |
2142 | enlarge_buffer(rec); |
2143 | if (log_record_buffer.str == NULL || |
2144 | translog_read_record(rec->lsn, 0, rec->record_length, |
2145 | log_record_buffer.str, NULL) != |
2146 | rec->record_length) |
2147 | { |
2148 | eprint(tracef, "Failed to read record" ); |
2149 | return 1; |
2150 | } |
2151 | logpos= (log_record_buffer.str + LSN_STORE_SIZE + FILEID_STORE_SIZE + |
2152 | CLR_TYPE_STORE_SIZE); |
2153 | |
2154 | if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) |
2155 | { |
2156 | tprint(tracef, " state older than record\n" ); |
2157 | switch (undone_record_type) { |
2158 | case LOGREC_UNDO_ROW_DELETE: |
2159 | row_entry= 1; |
2160 | share->state.state.records++; |
2161 | break; |
2162 | case LOGREC_UNDO_ROW_INSERT: |
2163 | share->state.state.records--; |
2164 | share->state.changed|= STATE_NOT_OPTIMIZED_ROWS; |
2165 | row_entry= 1; |
2166 | break; |
2167 | case LOGREC_UNDO_ROW_UPDATE: |
2168 | row_entry= 1; |
2169 | break; |
2170 | case LOGREC_UNDO_KEY_INSERT: |
2171 | case LOGREC_UNDO_KEY_DELETE: |
2172 | break; |
2173 | case LOGREC_UNDO_KEY_INSERT_WITH_ROOT: |
2174 | case LOGREC_UNDO_KEY_DELETE_WITH_ROOT: |
2175 | { |
2176 | uint key_nr; |
2177 | my_off_t page; |
2178 | key_nr= key_nr_korr(logpos); |
2179 | page= page_korr(logpos + KEY_NR_STORE_SIZE); |
2180 | share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ? |
2181 | HA_OFFSET_ERROR : |
2182 | page * share->block_size); |
2183 | break; |
2184 | } |
2185 | case LOGREC_UNDO_BULK_INSERT: |
2186 | break; |
2187 | default: |
2188 | DBUG_ASSERT(0); |
2189 | } |
2190 | if (row_entry && share->calc_checksum) |
2191 | share->state.state.checksum+= ha_checksum_korr(logpos); |
2192 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2193 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2194 | } |
2195 | if (row_entry) |
2196 | tprint(tracef, " rows' count %lu\n" , (ulong)share->state.state.records); |
2197 | _ma_unpin_all_pages(info, rec->lsn); |
2198 | DBUG_RETURN(0); |
2199 | } |
2200 | |
2201 | |
2202 | /** |
2203 | Hock to print debug information (like MySQL query) |
2204 | */ |
2205 | |
2206 | prototype_redo_exec_hook(DEBUG_INFO) |
2207 | { |
2208 | uchar *data; |
2209 | enum translog_debug_info_type debug_info; |
2210 | |
2211 | enlarge_buffer(rec); |
2212 | if (log_record_buffer.str == NULL || |
2213 | translog_read_record(rec->lsn, 0, rec->record_length, |
2214 | log_record_buffer.str, NULL) != |
2215 | rec->record_length) |
2216 | { |
2217 | eprint(tracef, "Failed to read record debug record" ); |
2218 | return 1; |
2219 | } |
2220 | debug_info= (enum translog_debug_info_type) log_record_buffer.str[0]; |
2221 | data= log_record_buffer.str + 1; |
2222 | switch (debug_info) { |
2223 | case LOGREC_DEBUG_INFO_QUERY: |
2224 | tprint(tracef, "Query: %.*s\n" , rec->record_length - 1, |
2225 | (char*) data); |
2226 | break; |
2227 | default: |
2228 | DBUG_ASSERT(0); |
2229 | } |
2230 | return 0; |
2231 | } |
2232 | |
2233 | |
2234 | /** |
2235 | In some cases we have to skip execution of an UNDO record during the UNDO |
2236 | phase. |
2237 | */ |
2238 | |
2239 | static void skip_undo_record(LSN previous_undo_lsn, TRN *trn) |
2240 | { |
2241 | trn->undo_lsn= previous_undo_lsn; |
2242 | if (previous_undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */ |
2243 | trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); |
2244 | skipped_undo_phase++; |
2245 | } |
2246 | |
2247 | |
2248 | prototype_undo_exec_hook(UNDO_ROW_INSERT) |
2249 | { |
2250 | my_bool error; |
2251 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2252 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2253 | MARIA_SHARE *share; |
2254 | const uchar *record_ptr; |
2255 | |
2256 | if (info == NULL || maria_is_crashed(info)) |
2257 | { |
2258 | /* |
2259 | Unlike for REDOs, if the table was skipped it is abnormal; we have a |
2260 | transaction to rollback which used this table, as it is not rolled back |
2261 | it was supposed to hold this table and so the table should still be |
2262 | there. Skip it (user may have repaired the table with maria_chk because |
2263 | it was so badly corrupted that a previous recovery failed) but warn. |
2264 | */ |
2265 | skip_undo_record(previous_undo_lsn, trn); |
2266 | return 0; |
2267 | } |
2268 | share= info->s; |
2269 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2270 | STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED | |
2271 | STATE_NOT_MOVABLE); |
2272 | record_ptr= rec->header; |
2273 | if (share->calc_checksum) |
2274 | { |
2275 | /* |
2276 | We need to read more of the record to put the checksum into the record |
2277 | buffer used by _ma_apply_undo_row_insert(). |
2278 | If the table has no live checksum, rec->header will be enough. |
2279 | */ |
2280 | enlarge_buffer(rec); |
2281 | if (log_record_buffer.str == NULL || |
2282 | translog_read_record(rec->lsn, 0, rec->record_length, |
2283 | log_record_buffer.str, NULL) != |
2284 | rec->record_length) |
2285 | { |
2286 | eprint(tracef, "Failed to read record" ); |
2287 | return 1; |
2288 | } |
2289 | record_ptr= log_record_buffer.str; |
2290 | } |
2291 | |
2292 | info->trn= trn; |
2293 | error= _ma_apply_undo_row_insert(info, previous_undo_lsn, |
2294 | record_ptr + LSN_STORE_SIZE + |
2295 | FILEID_STORE_SIZE); |
2296 | info->trn= 0; |
2297 | /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ |
2298 | tprint(tracef, " rows' count %lu\n" , (ulong)info->s->state.state.records); |
2299 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2300 | LSN_IN_PARTS(trn->undo_lsn)); |
2301 | return error; |
2302 | } |
2303 | |
2304 | |
2305 | prototype_undo_exec_hook(UNDO_ROW_DELETE) |
2306 | { |
2307 | my_bool error; |
2308 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2309 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2310 | MARIA_SHARE *share; |
2311 | |
2312 | if (info == NULL || maria_is_crashed(info)) |
2313 | { |
2314 | skip_undo_record(previous_undo_lsn, trn); |
2315 | return 0; |
2316 | } |
2317 | |
2318 | share= info->s; |
2319 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2320 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2321 | enlarge_buffer(rec); |
2322 | if (log_record_buffer.str == NULL || |
2323 | translog_read_record(rec->lsn, 0, rec->record_length, |
2324 | log_record_buffer.str, NULL) != |
2325 | rec->record_length) |
2326 | { |
2327 | eprint(tracef, "Failed to read record" ); |
2328 | return 1; |
2329 | } |
2330 | |
2331 | info->trn= trn; |
2332 | error= _ma_apply_undo_row_delete(info, previous_undo_lsn, |
2333 | log_record_buffer.str + LSN_STORE_SIZE + |
2334 | FILEID_STORE_SIZE, |
2335 | rec->record_length - |
2336 | (LSN_STORE_SIZE + FILEID_STORE_SIZE)); |
2337 | info->trn= 0; |
2338 | tprint(tracef, " rows' count %lu\n undo_lsn now LSN " LSN_FMT "\n" , |
2339 | (ulong)share->state.state.records, LSN_IN_PARTS(trn->undo_lsn)); |
2340 | return error; |
2341 | } |
2342 | |
2343 | |
2344 | prototype_undo_exec_hook(UNDO_ROW_UPDATE) |
2345 | { |
2346 | my_bool error; |
2347 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2348 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2349 | MARIA_SHARE *share; |
2350 | |
2351 | if (info == NULL || maria_is_crashed(info)) |
2352 | { |
2353 | skip_undo_record(previous_undo_lsn, trn); |
2354 | return 0; |
2355 | } |
2356 | |
2357 | share= info->s; |
2358 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2359 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2360 | enlarge_buffer(rec); |
2361 | if (log_record_buffer.str == NULL || |
2362 | translog_read_record(rec->lsn, 0, rec->record_length, |
2363 | log_record_buffer.str, NULL) != |
2364 | rec->record_length) |
2365 | { |
2366 | eprint(tracef, "Failed to read record" ); |
2367 | return 1; |
2368 | } |
2369 | |
2370 | info->trn= trn; |
2371 | error= _ma_apply_undo_row_update(info, previous_undo_lsn, |
2372 | log_record_buffer.str + LSN_STORE_SIZE + |
2373 | FILEID_STORE_SIZE, |
2374 | rec->record_length - |
2375 | (LSN_STORE_SIZE + FILEID_STORE_SIZE)); |
2376 | info->trn= 0; |
2377 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2378 | LSN_IN_PARTS(trn->undo_lsn)); |
2379 | return error; |
2380 | } |
2381 | |
2382 | |
2383 | prototype_undo_exec_hook(UNDO_KEY_INSERT) |
2384 | { |
2385 | my_bool error; |
2386 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2387 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2388 | MARIA_SHARE *share; |
2389 | |
2390 | if (info == NULL || maria_is_crashed(info)) |
2391 | { |
2392 | skip_undo_record(previous_undo_lsn, trn); |
2393 | return 0; |
2394 | } |
2395 | |
2396 | share= info->s; |
2397 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2398 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2399 | |
2400 | enlarge_buffer(rec); |
2401 | if (log_record_buffer.str == NULL || |
2402 | translog_read_record(rec->lsn, 0, rec->record_length, |
2403 | log_record_buffer.str, NULL) != |
2404 | rec->record_length) |
2405 | { |
2406 | eprint(tracef, "Failed to read record" ); |
2407 | return 1; |
2408 | } |
2409 | |
2410 | info->trn= trn; |
2411 | error= _ma_apply_undo_key_insert(info, previous_undo_lsn, |
2412 | log_record_buffer.str + LSN_STORE_SIZE + |
2413 | FILEID_STORE_SIZE, |
2414 | rec->record_length - LSN_STORE_SIZE - |
2415 | FILEID_STORE_SIZE); |
2416 | info->trn= 0; |
2417 | /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ |
2418 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2419 | LSN_IN_PARTS(trn->undo_lsn)); |
2420 | return error; |
2421 | } |
2422 | |
2423 | |
2424 | prototype_undo_exec_hook(UNDO_KEY_DELETE) |
2425 | { |
2426 | my_bool error; |
2427 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2428 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2429 | MARIA_SHARE *share; |
2430 | |
2431 | if (info == NULL || maria_is_crashed(info)) |
2432 | { |
2433 | skip_undo_record(previous_undo_lsn, trn); |
2434 | return 0; |
2435 | } |
2436 | |
2437 | share= info->s; |
2438 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2439 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2440 | |
2441 | enlarge_buffer(rec); |
2442 | if (log_record_buffer.str == NULL || |
2443 | translog_read_record(rec->lsn, 0, rec->record_length, |
2444 | log_record_buffer.str, NULL) != |
2445 | rec->record_length) |
2446 | { |
2447 | eprint(tracef, "Failed to read record" ); |
2448 | return 1; |
2449 | } |
2450 | |
2451 | info->trn= trn; |
2452 | error= _ma_apply_undo_key_delete(info, previous_undo_lsn, |
2453 | log_record_buffer.str + LSN_STORE_SIZE + |
2454 | FILEID_STORE_SIZE, |
2455 | rec->record_length - LSN_STORE_SIZE - |
2456 | FILEID_STORE_SIZE, FALSE); |
2457 | info->trn= 0; |
2458 | /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ |
2459 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2460 | LSN_IN_PARTS(trn->undo_lsn)); |
2461 | return error; |
2462 | } |
2463 | |
2464 | |
2465 | prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT) |
2466 | { |
2467 | my_bool error; |
2468 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2469 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2470 | MARIA_SHARE *share; |
2471 | |
2472 | if (info == NULL || maria_is_crashed(info)) |
2473 | { |
2474 | skip_undo_record(previous_undo_lsn, trn); |
2475 | return 0; |
2476 | } |
2477 | |
2478 | share= info->s; |
2479 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2480 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2481 | |
2482 | enlarge_buffer(rec); |
2483 | if (log_record_buffer.str == NULL || |
2484 | translog_read_record(rec->lsn, 0, rec->record_length, |
2485 | log_record_buffer.str, NULL) != |
2486 | rec->record_length) |
2487 | { |
2488 | eprint(tracef, "Failed to read record" ); |
2489 | return 1; |
2490 | } |
2491 | |
2492 | info->trn= trn; |
2493 | error= _ma_apply_undo_key_delete(info, previous_undo_lsn, |
2494 | log_record_buffer.str + LSN_STORE_SIZE + |
2495 | FILEID_STORE_SIZE, |
2496 | rec->record_length - LSN_STORE_SIZE - |
2497 | FILEID_STORE_SIZE, TRUE); |
2498 | info->trn= 0; |
2499 | /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ |
2500 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2501 | LSN_IN_PARTS(trn->undo_lsn)); |
2502 | return error; |
2503 | } |
2504 | |
2505 | |
2506 | prototype_undo_exec_hook(UNDO_BULK_INSERT) |
2507 | { |
2508 | my_bool error; |
2509 | MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); |
2510 | LSN previous_undo_lsn= lsn_korr(rec->header); |
2511 | MARIA_SHARE *share; |
2512 | |
2513 | /* Here we don't check for crashed as we can undo the bulk insert */ |
2514 | if (info == NULL) |
2515 | { |
2516 | skip_undo_record(previous_undo_lsn, trn); |
2517 | return 0; |
2518 | } |
2519 | |
2520 | share= info->s; |
2521 | share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED | |
2522 | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); |
2523 | |
2524 | info->trn= trn; |
2525 | error= _ma_apply_undo_bulk_insert(info, previous_undo_lsn); |
2526 | info->trn= 0; |
2527 | /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ |
2528 | tprint(tracef, " undo_lsn now LSN " LSN_FMT "\n" , |
2529 | LSN_IN_PARTS(trn->undo_lsn)); |
2530 | return error; |
2531 | } |
2532 | |
2533 | |
2534 | static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) |
2535 | { |
2536 | TRANSLOG_HEADER_BUFFER rec; |
2537 | struct st_translog_scanner_data scanner; |
2538 | int len; |
2539 | uint i; |
2540 | DBUG_ENTER("run_redo_phase" ); |
2541 | |
2542 | /* install hooks for execution */ |
2543 | #define install_redo_exec_hook(R) \ |
2544 | log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ |
2545 | exec_REDO_LOGREC_ ## R; |
2546 | #define install_redo_exec_hook_shared(R,S) \ |
2547 | log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ |
2548 | exec_REDO_LOGREC_ ## S; |
2549 | #define install_undo_exec_hook(R) \ |
2550 | log_record_type_descriptor[LOGREC_ ## R].record_execute_in_undo_phase= \ |
2551 | exec_UNDO_LOGREC_ ## R; |
2552 | install_redo_exec_hook(LONG_TRANSACTION_ID); |
2553 | install_redo_exec_hook(CHECKPOINT); |
2554 | install_redo_exec_hook(REDO_CREATE_TABLE); |
2555 | install_redo_exec_hook(REDO_RENAME_TABLE); |
2556 | install_redo_exec_hook(REDO_REPAIR_TABLE); |
2557 | install_redo_exec_hook(REDO_DROP_TABLE); |
2558 | install_redo_exec_hook(FILE_ID); |
2559 | install_redo_exec_hook(INCOMPLETE_LOG); |
2560 | install_redo_exec_hook(INCOMPLETE_GROUP); |
2561 | install_redo_exec_hook(REDO_INSERT_ROW_HEAD); |
2562 | install_redo_exec_hook(REDO_INSERT_ROW_TAIL); |
2563 | install_redo_exec_hook(REDO_INSERT_ROW_BLOBS); |
2564 | install_redo_exec_hook(REDO_PURGE_ROW_HEAD); |
2565 | install_redo_exec_hook(REDO_PURGE_ROW_TAIL); |
2566 | install_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL); |
2567 | install_redo_exec_hook(REDO_FREE_BLOCKS); |
2568 | install_redo_exec_hook(REDO_DELETE_ALL); |
2569 | install_redo_exec_hook(REDO_INDEX); |
2570 | install_redo_exec_hook(REDO_INDEX_NEW_PAGE); |
2571 | install_redo_exec_hook(REDO_INDEX_FREE_PAGE); |
2572 | install_redo_exec_hook(REDO_BITMAP_NEW_PAGE); |
2573 | install_redo_exec_hook(UNDO_ROW_INSERT); |
2574 | install_redo_exec_hook(UNDO_ROW_DELETE); |
2575 | install_redo_exec_hook(UNDO_ROW_UPDATE); |
2576 | install_redo_exec_hook(UNDO_KEY_INSERT); |
2577 | install_redo_exec_hook(UNDO_KEY_DELETE); |
2578 | install_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT); |
2579 | install_redo_exec_hook(COMMIT); |
2580 | install_redo_exec_hook(CLR_END); |
2581 | install_undo_exec_hook(UNDO_ROW_INSERT); |
2582 | install_undo_exec_hook(UNDO_ROW_DELETE); |
2583 | install_undo_exec_hook(UNDO_ROW_UPDATE); |
2584 | install_undo_exec_hook(UNDO_KEY_INSERT); |
2585 | install_undo_exec_hook(UNDO_KEY_DELETE); |
2586 | install_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT); |
2587 | /* REDO_NEW_ROW_HEAD shares entry with REDO_INSERT_ROW_HEAD */ |
2588 | install_redo_exec_hook_shared(REDO_NEW_ROW_HEAD, REDO_INSERT_ROW_HEAD); |
2589 | /* REDO_NEW_ROW_TAIL shares entry with REDO_INSERT_ROW_TAIL */ |
2590 | install_redo_exec_hook_shared(REDO_NEW_ROW_TAIL, REDO_INSERT_ROW_TAIL); |
2591 | install_redo_exec_hook(UNDO_BULK_INSERT); |
2592 | install_undo_exec_hook(UNDO_BULK_INSERT); |
2593 | install_redo_exec_hook(IMPORTED_TABLE); |
2594 | install_redo_exec_hook(DEBUG_INFO); |
2595 | |
2596 | current_group_end_lsn= LSN_IMPOSSIBLE; |
2597 | #ifndef DBUG_OFF |
2598 | current_group_table= NULL; |
2599 | #endif |
2600 | |
2601 | if (unlikely(lsn == LSN_IMPOSSIBLE || lsn == translog_get_horizon())) |
2602 | { |
2603 | tprint(tracef, "checkpoint address refers to the log end log or " |
2604 | "log is empty, nothing to do.\n" ); |
2605 | DBUG_RETURN(0); |
2606 | } |
2607 | |
2608 | len= translog_read_record_header(lsn, &rec); |
2609 | |
2610 | if (len == RECHEADER_READ_ERROR) |
2611 | { |
2612 | eprint(tracef, "Failed to read header of the first record." ); |
2613 | DBUG_RETURN(1); |
2614 | } |
2615 | if (translog_scanner_init(lsn, 1, &scanner, 1)) |
2616 | { |
2617 | tprint(tracef, "Scanner init failed\n" ); |
2618 | DBUG_RETURN(1); |
2619 | } |
2620 | for (i= 1;;i++) |
2621 | { |
2622 | uint16 sid= rec.short_trid; |
2623 | const LOG_DESC *log_desc= &log_record_type_descriptor[rec.type]; |
2624 | display_record_position(log_desc, &rec, i); |
2625 | /* |
2626 | A complete group is a set of log records with an "end mark" record |
2627 | (e.g. a set of REDOs for an operation, terminated by an UNDO for this |
2628 | operation); if there is no "end mark" record the group is incomplete and |
2629 | won't be executed. |
2630 | */ |
2631 | if ((log_desc->record_in_group == LOGREC_IS_GROUP_ITSELF) || |
2632 | (log_desc->record_in_group == LOGREC_LAST_IN_GROUP)) |
2633 | { |
2634 | if (all_active_trans[sid].group_start_lsn != LSN_IMPOSSIBLE) |
2635 | { |
2636 | if (log_desc->record_in_group == LOGREC_IS_GROUP_ITSELF) |
2637 | { |
2638 | /* |
2639 | Can happen if the transaction got a table write error, then |
2640 | unlocked tables thus wrote a COMMIT record. Or can be an |
2641 | INCOMPLETE_GROUP record written by a previous recovery. |
2642 | */ |
2643 | tprint(tracef, "\nDiscarding incomplete group before this record\n" ); |
2644 | all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; |
2645 | } |
2646 | else |
2647 | { |
2648 | struct st_translog_scanner_data scanner2; |
2649 | TRANSLOG_HEADER_BUFFER rec2; |
2650 | /* |
2651 | There is a complete group for this transaction, containing more |
2652 | than this event. |
2653 | */ |
2654 | tprint(tracef, " ends a group:\n" ); |
2655 | len= |
2656 | translog_read_record_header(all_active_trans[sid].group_start_lsn, |
2657 | &rec2); |
2658 | if (len < 0) /* EOF or error */ |
2659 | { |
2660 | tprint(tracef, "Cannot find record where it should be\n" ); |
2661 | goto err; |
2662 | } |
2663 | if (lsn_end != LSN_IMPOSSIBLE && rec2.lsn >= lsn_end) |
2664 | { |
2665 | tprint(tracef, |
2666 | "lsn_end reached at " LSN_FMT ". " |
2667 | "Skipping rest of redo entries" , |
2668 | LSN_IN_PARTS(rec2.lsn)); |
2669 | translog_destroy_scanner(&scanner); |
2670 | translog_free_record_header(&rec); |
2671 | DBUG_RETURN(0); |
2672 | } |
2673 | |
2674 | if (translog_scanner_init(rec2.lsn, 1, &scanner2, 1)) |
2675 | { |
2676 | tprint(tracef, "Scanner2 init failed\n" ); |
2677 | goto err; |
2678 | } |
2679 | current_group_end_lsn= rec.lsn; |
2680 | do |
2681 | { |
2682 | if (rec2.short_trid == sid) /* it's in our group */ |
2683 | { |
2684 | const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type]; |
2685 | display_record_position(log_desc2, &rec2, 0); |
2686 | if (apply == MARIA_LOG_CHECK) |
2687 | { |
2688 | translog_size_t read_len; |
2689 | enlarge_buffer(&rec2); |
2690 | read_len= |
2691 | translog_read_record(rec2.lsn, 0, rec2.record_length, |
2692 | log_record_buffer.str, NULL); |
2693 | if (read_len != rec2.record_length) |
2694 | { |
2695 | tprint(tracef, "Cannot read record's body: read %u of" |
2696 | " %u bytes\n" , read_len, rec2.record_length); |
2697 | translog_destroy_scanner(&scanner2); |
2698 | translog_free_record_header(&rec2); |
2699 | goto err; |
2700 | } |
2701 | } |
2702 | if (apply == MARIA_LOG_APPLY && |
2703 | display_and_apply_record(log_desc2, &rec2)) |
2704 | { |
2705 | translog_destroy_scanner(&scanner2); |
2706 | translog_free_record_header(&rec2); |
2707 | goto err; |
2708 | } |
2709 | } |
2710 | translog_free_record_header(&rec2); |
2711 | len= translog_read_next_record_header(&scanner2, &rec2); |
2712 | if (len < 0) /* EOF or error */ |
2713 | { |
2714 | tprint(tracef, "Cannot find record where it should be\n" ); |
2715 | translog_destroy_scanner(&scanner2); |
2716 | translog_free_record_header(&rec2); |
2717 | goto err; |
2718 | } |
2719 | } |
2720 | while (rec2.lsn < rec.lsn); |
2721 | /* group finished */ |
2722 | all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; |
2723 | current_group_end_lsn= LSN_IMPOSSIBLE; /* for debugging */ |
2724 | display_record_position(log_desc, &rec, 0); |
2725 | translog_destroy_scanner(&scanner2); |
2726 | translog_free_record_header(&rec2); |
2727 | } |
2728 | } |
2729 | if (apply == MARIA_LOG_APPLY && |
2730 | display_and_apply_record(log_desc, &rec)) |
2731 | goto err; |
2732 | #ifndef DBUG_OFF |
2733 | current_group_table= NULL; |
2734 | #endif |
2735 | } |
2736 | else /* record does not end group */ |
2737 | { |
2738 | /* just record the fact, can't know if can execute yet */ |
2739 | if (all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE) |
2740 | { |
2741 | /* group not yet started */ |
2742 | all_active_trans[sid].group_start_lsn= rec.lsn; |
2743 | } |
2744 | } |
2745 | translog_free_record_header(&rec); |
2746 | len= translog_read_next_record_header(&scanner, &rec); |
2747 | if (len < 0) |
2748 | { |
2749 | switch (len) |
2750 | { |
2751 | case RECHEADER_READ_EOF: |
2752 | tprint(tracef, "EOF on the log\n" ); |
2753 | break; |
2754 | case RECHEADER_READ_ERROR: |
2755 | tprint(tracef, "Error reading log\n" ); |
2756 | goto err; |
2757 | } |
2758 | break; |
2759 | } |
2760 | } |
2761 | translog_destroy_scanner(&scanner); |
2762 | translog_free_record_header(&rec); |
2763 | if (recovery_message_printed == REC_MSG_REDO) |
2764 | { |
2765 | fprintf(stderr, " 100%%" ); |
2766 | fflush(stderr); |
2767 | procent_printed= 1; |
2768 | } |
2769 | DBUG_RETURN(0); |
2770 | |
2771 | err: |
2772 | translog_destroy_scanner(&scanner); |
2773 | translog_free_record_header(&rec); |
2774 | DBUG_RETURN(1); |
2775 | } |
2776 | |
2777 | |
2778 | /** |
2779 | @brief Informs about any aborted groups or uncommitted transactions, |
2780 | prepares for the UNDO phase if needed. |
2781 | |
2782 | @note Observe that it may init trnman. |
2783 | */ |
2784 | static uint end_of_redo_phase(my_bool prepare_for_undo_phase) |
2785 | { |
2786 | uint sid, uncommitted= 0; |
2787 | char llbuf[22]; |
2788 | LSN addr; |
2789 | |
2790 | my_hash_free(&all_dirty_pages); |
2791 | /* |
2792 | hash_free() can be called multiple times probably, but be safe if that |
2793 | changes |
2794 | */ |
2795 | bzero(&all_dirty_pages, sizeof(all_dirty_pages)); |
2796 | my_free(dirty_pages_pool); |
2797 | dirty_pages_pool= NULL; |
2798 | |
2799 | llstr(max_long_trid, llbuf); |
2800 | tprint(tracef, "Maximum transaction long id seen: %s\n" , llbuf); |
2801 | llstr(max_trid_in_control_file, llbuf); |
2802 | tprint(tracef, "Maximum transaction long id seen in control file: %s\n" , |
2803 | llbuf); |
2804 | /* |
2805 | If logs were deleted, or lost, trid in control file is needed to set |
2806 | trnman's generator: |
2807 | */ |
2808 | set_if_bigger(max_long_trid, max_trid_in_control_file); |
2809 | if (prepare_for_undo_phase && trnman_init(max_long_trid)) |
2810 | return -1; |
2811 | |
2812 | trns_created= TRUE; |
2813 | |
2814 | for (sid= 0; sid <= SHORT_TRID_MAX; sid++) |
2815 | { |
2816 | TrID long_trid= all_active_trans[sid].long_trid; |
2817 | LSN gslsn= all_active_trans[sid].group_start_lsn; |
2818 | TRN *trn; |
2819 | if (gslsn != LSN_IMPOSSIBLE) |
2820 | { |
2821 | tprint(tracef, "Group at LSN " LSN_FMT " short_trid %u incomplete\n" , |
2822 | LSN_IN_PARTS(gslsn), sid); |
2823 | all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; |
2824 | } |
2825 | if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE) |
2826 | { |
2827 | llstr(long_trid, llbuf); |
2828 | tprint(tracef, "Transaction long_trid %s short_trid %u uncommitted\n" , |
2829 | llbuf, sid); |
2830 | /* |
2831 | dummy_transaction_object serves only for DDLs, where there is never a |
2832 | rollback or incomplete group. And unknown transactions (which have |
2833 | long_trid==0) should have undo_lsn==LSN_IMPOSSIBLE. |
2834 | */ |
2835 | if (long_trid ==0) |
2836 | { |
2837 | eprint(tracef, "Transaction with long_trid 0 should not roll back" ); |
2838 | ALERT_USER(); |
2839 | return -1; |
2840 | } |
2841 | if (prepare_for_undo_phase) |
2842 | { |
2843 | if ((trn= trnman_recreate_trn_from_recovery(sid, long_trid)) == NULL) |
2844 | return -1; |
2845 | trn->undo_lsn= all_active_trans[sid].undo_lsn; |
2846 | trn->first_undo_lsn= all_active_trans[sid].first_undo_lsn | |
2847 | TRANSACTION_LOGGED_LONG_ID; /* because trn is known in log */ |
2848 | if (gslsn != LSN_IMPOSSIBLE) |
2849 | { |
2850 | /* |
2851 | UNDO phase will log some records. So, a future recovery may see: |
2852 | REDO(from incomplete group) - REDO(from rollback) - CLR_END |
2853 | and thus execute the first REDO (finding it in "a complete |
2854 | group"). To prevent that: |
2855 | */ |
2856 | LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS]; |
2857 | LSN lsn; |
2858 | if (translog_write_record(&lsn, LOGREC_INCOMPLETE_GROUP, |
2859 | trn, NULL, 0, |
2860 | TRANSLOG_INTERNAL_PARTS, log_array, |
2861 | NULL, NULL)) |
2862 | return -1; |
2863 | } |
2864 | } |
2865 | uncommitted++; |
2866 | } |
2867 | #ifdef MARIA_VERSIONING |
2868 | /* |
2869 | If real recovery: if transaction was committed, move it to some separate |
2870 | list for soon purging. |
2871 | */ |
2872 | #endif |
2873 | } |
2874 | |
2875 | my_free(all_active_trans); |
2876 | all_active_trans= NULL; |
2877 | |
2878 | /* |
2879 | The UNDO phase uses some normal run-time code of ROLLBACK: generates log |
2880 | records, etc; prepare tables for that |
2881 | */ |
2882 | addr= translog_get_horizon(); |
2883 | for (sid= 0; sid <= SHARE_ID_MAX; sid++) |
2884 | { |
2885 | MARIA_HA *info= all_tables[sid].info; |
2886 | if (info != NULL) |
2887 | { |
2888 | prepare_table_for_close(info, addr); |
2889 | /* |
2890 | But we don't close it; we leave it available for the UNDO phase; |
2891 | it's likely that the UNDO phase will need it. |
2892 | */ |
2893 | if (prepare_for_undo_phase) |
2894 | translog_assign_id_to_share_from_recovery(info->s, sid); |
2895 | } |
2896 | } |
2897 | return uncommitted; |
2898 | } |
2899 | |
2900 | |
2901 | static int run_undo_phase(uint uncommitted) |
2902 | { |
2903 | LSN last_undo __attribute__((unused)); |
2904 | DBUG_ENTER("run_undo_phase" ); |
2905 | |
2906 | if (uncommitted > 0) |
2907 | { |
2908 | checkpoint_useful= TRUE; |
2909 | if (tracef != stdout) |
2910 | { |
2911 | if (recovery_message_printed == REC_MSG_NONE) |
2912 | print_preamble(); |
2913 | fprintf(stderr, "transactions to roll back:" ); |
2914 | recovery_message_printed= REC_MSG_UNDO; |
2915 | } |
2916 | tprint(tracef, "%u transactions will be rolled back\n" , uncommitted); |
2917 | procent_printed= 1; |
2918 | for( ; ; ) |
2919 | { |
2920 | char llbuf[22]; |
2921 | TRN *trn; |
2922 | if (recovery_message_printed == REC_MSG_UNDO) |
2923 | { |
2924 | fprintf(stderr, " %u" , uncommitted); |
2925 | fflush(stderr); |
2926 | } |
2927 | if ((uncommitted--) == 0) |
2928 | break; |
2929 | trn= trnman_get_any_trn(); |
2930 | DBUG_ASSERT(trn != NULL); |
2931 | llstr(trn->trid, llbuf); |
2932 | tprint(tracef, "Rolling back transaction of long id %s\n" , llbuf); |
2933 | last_undo= trn->undo_lsn + 1; |
2934 | |
2935 | /* Execute all undo entries */ |
2936 | while (trn->undo_lsn) |
2937 | { |
2938 | TRANSLOG_HEADER_BUFFER rec; |
2939 | LOG_DESC *log_desc; |
2940 | DBUG_ASSERT(trn->undo_lsn < last_undo); |
2941 | last_undo= trn->undo_lsn; |
2942 | |
2943 | if (translog_read_record_header(trn->undo_lsn, &rec) == |
2944 | RECHEADER_READ_ERROR) |
2945 | DBUG_RETURN(1); |
2946 | log_desc= &log_record_type_descriptor[rec.type]; |
2947 | display_record_position(log_desc, &rec, 0); |
2948 | if (log_desc->record_execute_in_undo_phase(&rec, trn)) |
2949 | { |
2950 | eprint(tracef, "Got error %d when executing undo %s" , my_errno, |
2951 | log_desc->name); |
2952 | translog_free_record_header(&rec); |
2953 | DBUG_RETURN(1); |
2954 | } |
2955 | translog_free_record_header(&rec); |
2956 | } |
2957 | |
2958 | /* Force a crash to test recovery of recovery */ |
2959 | if (maria_recovery_force_crash_counter) |
2960 | { |
2961 | DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); |
2962 | } |
2963 | |
2964 | if (trnman_rollback_trn(trn)) |
2965 | DBUG_RETURN(1); |
2966 | /* We could want to span a few threads (4?) instead of 1 */ |
2967 | /* In the future, we want to have this phase *online* */ |
2968 | } |
2969 | } |
2970 | procent_printed= 0; |
2971 | DBUG_RETURN(0); |
2972 | } |
2973 | |
2974 | |
2975 | /** |
2976 | In case of error in recovery, deletes all transactions from the transaction |
2977 | manager so that this module does not assert. |
2978 | |
2979 | @note no checkpoint should be taken as those transactions matter for the |
2980 | next recovery (they still haven't been properly dealt with). |
2981 | */ |
2982 | |
2983 | static void delete_all_transactions() |
2984 | { |
2985 | for( ; ; ) |
2986 | { |
2987 | TRN *trn= trnman_get_any_trn(); |
2988 | if (trn == NULL) |
2989 | break; |
2990 | trn->undo_lsn= trn->first_undo_lsn= LSN_IMPOSSIBLE; |
2991 | trnman_rollback_trn(trn); /* ignore error */ |
2992 | } |
2993 | } |
2994 | |
2995 | |
2996 | /** |
2997 | @brief re-enables transactionality, updates is_of_horizon |
2998 | |
2999 | @param info table |
3000 | @param horizon address to set is_of_horizon |
3001 | */ |
3002 | |
3003 | static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon) |
3004 | { |
3005 | MARIA_SHARE *share= info->s; |
3006 | /* |
3007 | In a fully-forward REDO phase (no checkpoint record), |
3008 | state is now at least as new as the LSN of the current record. It may be |
3009 | newer, in case we are seeing a LOGREC_FILE_ID which tells us to close a |
3010 | table, but that table was later modified further in the log. |
3011 | But if we parsed a checkpoint record, it may be this way in the log: |
3012 | FILE_ID(6->t2)... FILE_ID(6->t1)... CHECKPOINT(6->t1) |
3013 | Checkpoint parsing opened t1 with id 6; first FILE_ID above is going to |
3014 | make t1 close; the first condition below is however false (when checkpoint |
3015 | was taken it increased is_of_horizon) and so it works. For safety we |
3016 | add the second condition. |
3017 | */ |
3018 | if (cmp_translog_addr(share->state.is_of_horizon, horizon) < 0 && |
3019 | cmp_translog_addr(share->lsn_of_file_id, horizon) < 0) |
3020 | { |
3021 | share->state.is_of_horizon= horizon; |
3022 | _ma_state_info_write_sub(share->kfile.file, &share->state, |
3023 | MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET); |
3024 | } |
3025 | |
3026 | /* |
3027 | Ensure that info->state is up to date as |
3028 | _ma_renable_logging_for_table() is depending on this |
3029 | */ |
3030 | *info->state= info->s->state.state; |
3031 | |
3032 | /* |
3033 | This leaves PAGECACHE_PLAIN_PAGE pages into the cache, while the table is |
3034 | going to switch back to transactional. So the table will be a mix of |
3035 | pages, which is ok as long as we don't take any checkpoints until all |
3036 | tables get closed at the end of the UNDO phase. |
3037 | */ |
3038 | _ma_reenable_logging_for_table(info, FALSE); |
3039 | info->trn= NULL; /* safety */ |
3040 | } |
3041 | |
3042 | |
3043 | static MARIA_HA *get_MARIA_HA_from_REDO_record(const |
3044 | TRANSLOG_HEADER_BUFFER *rec) |
3045 | { |
3046 | uint16 sid; |
3047 | pgcache_page_no_t UNINIT_VAR(page); |
3048 | MARIA_HA *info; |
3049 | MARIA_SHARE *share; |
3050 | char llbuf[22]; |
3051 | my_bool index_page_redo_entry= FALSE, page_redo_entry= FALSE; |
3052 | |
3053 | print_redo_phase_progress(rec->lsn); |
3054 | sid= fileid_korr(rec->header); |
3055 | switch (rec->type) { |
3056 | /* not all REDO records have a page: */ |
3057 | case LOGREC_REDO_INDEX_NEW_PAGE: |
3058 | case LOGREC_REDO_INDEX: |
3059 | case LOGREC_REDO_INDEX_FREE_PAGE: |
3060 | index_page_redo_entry= 1; |
3061 | /* fall through*/ |
3062 | case LOGREC_REDO_INSERT_ROW_HEAD: |
3063 | case LOGREC_REDO_INSERT_ROW_TAIL: |
3064 | case LOGREC_REDO_PURGE_ROW_HEAD: |
3065 | case LOGREC_REDO_PURGE_ROW_TAIL: |
3066 | case LOGREC_REDO_NEW_ROW_HEAD: |
3067 | case LOGREC_REDO_NEW_ROW_TAIL: |
3068 | case LOGREC_REDO_FREE_HEAD_OR_TAIL: |
3069 | page_redo_entry= TRUE; |
3070 | page= page_korr(rec->header + FILEID_STORE_SIZE); |
3071 | llstr(page, llbuf); |
3072 | break; |
3073 | case LOGREC_REDO_FREE_BLOCKS: |
3074 | /* |
3075 | We are checking against the dirty pages in _ma_apply_redo_free_blocks() |
3076 | */ |
3077 | break; |
3078 | default: |
3079 | break; |
3080 | } |
3081 | tprint(tracef, " For table of short id %u" , sid); |
3082 | info= all_tables[sid].info; |
3083 | #ifndef DBUG_OFF |
3084 | DBUG_ASSERT(current_group_table == NULL || current_group_table == info); |
3085 | current_group_table= info; |
3086 | #endif |
3087 | if (info == NULL) |
3088 | { |
3089 | tprint(tracef, ", table skipped, so skipping record\n" ); |
3090 | return NULL; |
3091 | } |
3092 | share= info->s; |
3093 | tprint(tracef, ", '%s'" , share->open_file_name.str); |
3094 | DBUG_ASSERT(in_redo_phase); |
3095 | if (!table_is_part_of_recovery_set(&share->open_file_name)) |
3096 | { |
3097 | tprint(tracef, ", skipped by user\n" ); |
3098 | return NULL; |
3099 | } |
3100 | |
3101 | if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0) |
3102 | { |
3103 | /* |
3104 | This can happen only if processing a record before the checkpoint |
3105 | record. |
3106 | id->name mapping is newer than REDO record: for sure the table subject |
3107 | of the REDO has been flushed and forced (id re-assignment implies this); |
3108 | REDO can be ignored (and must be, as we don't know what this subject |
3109 | table was). |
3110 | */ |
3111 | DBUG_ASSERT(cmp_translog_addr(rec->lsn, checkpoint_start) < 0); |
3112 | tprint(tracef, ", table's LOGREC_FILE_ID has LSN " LSN_FMT " more recent" |
3113 | " than record, skipping record" , |
3114 | LSN_IN_PARTS(share->lsn_of_file_id)); |
3115 | return NULL; |
3116 | } |
3117 | if (cmp_translog_addr(rec->lsn, share->state.skip_redo_lsn) <= 0) |
3118 | { |
3119 | /* probably a bulk insert repair */ |
3120 | tprint(tracef, ", has skip_redo_lsn " LSN_FMT " more recent than" |
3121 | " record, skipping record\n" , |
3122 | LSN_IN_PARTS(share->state.skip_redo_lsn)); |
3123 | return NULL; |
3124 | } |
3125 | /* detect if an open instance of a dropped table (internal bug) */ |
3126 | DBUG_ASSERT(share->last_version != 0); |
3127 | if (page_redo_entry) |
3128 | { |
3129 | /* |
3130 | Consult dirty pages list. |
3131 | REDO_INSERT_ROW_BLOBS will consult list by itself, as it covers several |
3132 | pages. |
3133 | */ |
3134 | if (_ma_redo_not_needed_for_page(sid, rec->lsn, page, |
3135 | index_page_redo_entry)) |
3136 | return NULL; |
3137 | } |
3138 | /* |
3139 | So we are going to read the page, and if its LSN is older than the |
3140 | record's we will modify the page |
3141 | */ |
3142 | tprint(tracef, ", applying record\n" ); |
3143 | _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); /* to flush state on close */ |
3144 | return info; |
3145 | } |
3146 | |
3147 | |
3148 | static MARIA_HA *get_MARIA_HA_from_UNDO_record(const |
3149 | TRANSLOG_HEADER_BUFFER *rec) |
3150 | { |
3151 | uint16 sid; |
3152 | MARIA_HA *info; |
3153 | MARIA_SHARE *share; |
3154 | |
3155 | sid= fileid_korr(rec->header + LSN_STORE_SIZE); |
3156 | tprint(tracef, " For table of short id %u" , sid); |
3157 | info= all_tables[sid].info; |
3158 | #ifndef DBUG_OFF |
3159 | DBUG_ASSERT(!in_redo_phase || |
3160 | current_group_table == NULL || current_group_table == info); |
3161 | current_group_table= info; |
3162 | #endif |
3163 | if (info == NULL) |
3164 | { |
3165 | tprint(tracef, ", table skipped, so skipping record\n" ); |
3166 | return NULL; |
3167 | } |
3168 | share= info->s; |
3169 | tprint(tracef, ", '%s'" , share->open_file_name.str); |
3170 | |
3171 | if (!table_is_part_of_recovery_set(&share->open_file_name)) |
3172 | { |
3173 | tprint(tracef, ", skipped by user\n" ); |
3174 | return NULL; |
3175 | } |
3176 | |
3177 | if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0) |
3178 | { |
3179 | tprint(tracef, ", table's LOGREC_FILE_ID has LSN " LSN_FMT " more recent" |
3180 | " than record, skipping record" , |
3181 | LSN_IN_PARTS(share->lsn_of_file_id)); |
3182 | return NULL; |
3183 | } |
3184 | if (in_redo_phase && |
3185 | cmp_translog_addr(rec->lsn, share->state.skip_redo_lsn) <= 0) |
3186 | { |
3187 | /* probably a bulk insert repair */ |
3188 | tprint(tracef, ", has skip_redo_lsn " LSN_FMT " more recent than" |
3189 | " record, skipping record\n" , |
3190 | LSN_IN_PARTS(share->state.skip_redo_lsn)); |
3191 | return NULL; |
3192 | } |
3193 | DBUG_ASSERT(share->last_version != 0); |
3194 | _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); /* to flush state on close */ |
3195 | tprint(tracef, ", applying record\n" ); |
3196 | return info; |
3197 | } |
3198 | |
3199 | |
3200 | /** |
3201 | @brief Parses checkpoint record. |
3202 | |
3203 | Builds from it the dirty_pages list (a hash), opens tables and maps them to |
3204 | their 2-byte IDs, recreates transactions (not real TRNs though). |
3205 | |
3206 | @return LSN from where in the log the REDO phase should start |
3207 | @retval LSN_ERROR error |
3208 | @retval other ok |
3209 | */ |
3210 | |
3211 | static LSN parse_checkpoint_record(LSN lsn) |
3212 | { |
3213 | ulong i; |
3214 | ulonglong nb_dirty_pages; |
3215 | TRANSLOG_HEADER_BUFFER rec; |
3216 | TRANSLOG_ADDRESS start_address; |
3217 | int len; |
3218 | uint nb_active_transactions, nb_committed_transactions, nb_tables; |
3219 | uchar *ptr; |
3220 | LSN minimum_rec_lsn_of_active_transactions, minimum_rec_lsn_of_dirty_pages; |
3221 | struct st_dirty_page *next_dirty_page_in_pool; |
3222 | |
3223 | tprint(tracef, "Loading data from checkpoint record at LSN " LSN_FMT "\n" , |
3224 | LSN_IN_PARTS(lsn)); |
3225 | if ((len= translog_read_record_header(lsn, &rec)) == RECHEADER_READ_ERROR || |
3226 | rec.type != LOGREC_CHECKPOINT) |
3227 | { |
3228 | eprint(tracef, "Cannot find checkpoint record at LSN " LSN_FMT, |
3229 | LSN_IN_PARTS(lsn)); |
3230 | return LSN_ERROR; |
3231 | } |
3232 | |
3233 | enlarge_buffer(&rec); |
3234 | if (log_record_buffer.str == NULL || |
3235 | translog_read_record(rec.lsn, 0, rec.record_length, |
3236 | log_record_buffer.str, NULL) != |
3237 | rec.record_length) |
3238 | { |
3239 | eprint(tracef, "Failed to read record" ); |
3240 | return LSN_ERROR; |
3241 | } |
3242 | |
3243 | ptr= log_record_buffer.str; |
3244 | start_address= lsn_korr(ptr); |
3245 | ptr+= LSN_STORE_SIZE; |
3246 | tprint(tracef, "Checkpoint record has start_horizon at " LSN_FMT "\n" , |
3247 | LSN_IN_PARTS(start_address)); |
3248 | |
3249 | /* transactions */ |
3250 | nb_active_transactions= uint2korr(ptr); |
3251 | ptr+= 2; |
3252 | tprint(tracef, "%u active transactions\n" , nb_active_transactions); |
3253 | minimum_rec_lsn_of_active_transactions= lsn_korr(ptr); |
3254 | ptr+= LSN_STORE_SIZE; |
3255 | max_long_trid= transid_korr(ptr); |
3256 | ptr+= TRANSID_SIZE; |
3257 | |
3258 | /* |
3259 | how much brain juice and discussions there was to come to writing this |
3260 | line. It may make start_address slightly decrease (only by the time it |
3261 | takes to write one or a few rows, roughly). |
3262 | */ |
3263 | tprint(tracef, "Checkpoint record has min_rec_lsn of active transactions" |
3264 | " at " LSN_FMT "\n" , |
3265 | LSN_IN_PARTS(minimum_rec_lsn_of_active_transactions)); |
3266 | set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions); |
3267 | |
3268 | for (i= 0; i < nb_active_transactions; i++) |
3269 | { |
3270 | uint16 sid= uint2korr(ptr); |
3271 | TrID long_id; |
3272 | LSN undo_lsn, first_undo_lsn; |
3273 | ptr+= 2; |
3274 | long_id= uint6korr(ptr); |
3275 | ptr+= 6; |
3276 | DBUG_ASSERT(sid > 0 && long_id > 0); |
3277 | undo_lsn= lsn_korr(ptr); |
3278 | ptr+= LSN_STORE_SIZE; |
3279 | first_undo_lsn= lsn_korr(ptr); |
3280 | ptr+= LSN_STORE_SIZE; |
3281 | new_transaction(sid, long_id, undo_lsn, first_undo_lsn); |
3282 | } |
3283 | nb_committed_transactions= uint4korr(ptr); |
3284 | ptr+= 4; |
3285 | tprint(tracef, "%lu committed transactions\n" , |
3286 | (ulong)nb_committed_transactions); |
3287 | /* no purging => committed transactions are not important */ |
3288 | ptr+= (6 + LSN_STORE_SIZE) * nb_committed_transactions; |
3289 | |
3290 | /* tables */ |
3291 | nb_tables= uint4korr(ptr); |
3292 | ptr+= 4; |
3293 | tprint(tracef, "%u open tables\n" , nb_tables); |
3294 | for (i= 0; i< nb_tables; i++) |
3295 | { |
3296 | char name[FN_REFLEN]; |
3297 | LSN first_log_write_lsn; |
3298 | size_t name_len; |
3299 | uint16 sid= uint2korr(ptr); |
3300 | ptr+= 2; |
3301 | DBUG_ASSERT(sid > 0); |
3302 | first_log_write_lsn= lsn_korr(ptr); |
3303 | ptr+= LSN_STORE_SIZE; |
3304 | name_len= strlen((char *)ptr) + 1; |
3305 | strmake_buf(name, (char *)ptr); |
3306 | ptr+= name_len; |
3307 | if (new_table(sid, name, first_log_write_lsn)) |
3308 | return LSN_ERROR; |
3309 | } |
3310 | |
3311 | /* dirty pages */ |
3312 | nb_dirty_pages= uint8korr(ptr); |
3313 | |
3314 | /* Ensure casts later will not loose significant bits. */ |
3315 | DBUG_ASSERT((nb_dirty_pages <= SIZE_T_MAX/sizeof(struct st_dirty_page)) && |
3316 | (nb_dirty_pages <= ULONG_MAX)); |
3317 | |
3318 | ptr+= 8; |
3319 | tprint(tracef, "%lu dirty pages\n" , (ulong) nb_dirty_pages); |
3320 | if (my_hash_init(&all_dirty_pages, &my_charset_bin, (ulong)nb_dirty_pages, |
3321 | offsetof(struct st_dirty_page, file_and_page_id), |
3322 | sizeof(((struct st_dirty_page *)NULL)->file_and_page_id), |
3323 | NULL, NULL, 0)) |
3324 | return LSN_ERROR; |
3325 | dirty_pages_pool= |
3326 | (struct st_dirty_page *)my_malloc((size_t)nb_dirty_pages * |
3327 | sizeof(struct st_dirty_page), |
3328 | MYF(MY_WME)); |
3329 | if (unlikely(dirty_pages_pool == NULL)) |
3330 | return LSN_ERROR; |
3331 | next_dirty_page_in_pool= dirty_pages_pool; |
3332 | minimum_rec_lsn_of_dirty_pages= LSN_MAX; |
3333 | if (maria_recovery_verbose) |
3334 | tprint(tracef, "Table_id Is_index Page_id Rec_lsn\n" ); |
3335 | for (i= 0; i < nb_dirty_pages ; i++) |
3336 | { |
3337 | pgcache_page_no_t page_id; |
3338 | LSN rec_lsn; |
3339 | uint32 is_index; |
3340 | uint16 table_id= uint2korr(ptr); |
3341 | ptr+= 2; |
3342 | is_index= ptr[0]; |
3343 | ptr++; |
3344 | page_id= page_korr(ptr); |
3345 | ptr+= PAGE_STORE_SIZE; |
3346 | rec_lsn= lsn_korr(ptr); |
3347 | ptr+= LSN_STORE_SIZE; |
3348 | if (new_page((is_index << 16) | table_id, |
3349 | page_id, rec_lsn, next_dirty_page_in_pool++)) |
3350 | return LSN_ERROR; |
3351 | if (maria_recovery_verbose) |
3352 | tprint(tracef, "%8u %8u %12lu " LSN_FMT "\n" , (uint) table_id, |
3353 | (uint) is_index, (ulong) page_id, LSN_IN_PARTS(rec_lsn)); |
3354 | set_if_smaller(minimum_rec_lsn_of_dirty_pages, rec_lsn); |
3355 | } |
3356 | /* after that, there will be no insert/delete into the hash */ |
3357 | /* |
3358 | sanity check on record (did we screw up with all those "ptr+=", did the |
3359 | checkpoint write code and checkpoint read code go out of sync?). |
3360 | */ |
3361 | if (ptr != (log_record_buffer.str + log_record_buffer.length)) |
3362 | { |
3363 | eprint(tracef, "checkpoint record corrupted\n" ); |
3364 | return LSN_ERROR; |
3365 | } |
3366 | |
3367 | /* |
3368 | start_address is now from where the dirty pages list can be ignored. |
3369 | Find LSN higher or equal to this TRANSLOG_ADDRESS, suitable for |
3370 | translog_read_record() functions. |
3371 | */ |
3372 | start_address= checkpoint_start= |
3373 | translog_next_LSN(start_address, LSN_IMPOSSIBLE); |
3374 | tprint(tracef, "Checkpoint record start_horizon now adjusted to" |
3375 | " LSN " LSN_FMT "\n" , LSN_IN_PARTS(start_address)); |
3376 | if (checkpoint_start == LSN_IMPOSSIBLE) |
3377 | { |
3378 | /* |
3379 | There must be a problem, as our checkpoint record exists and is >= the |
3380 | address which is stored in its first bytes, which is >= start_address. |
3381 | */ |
3382 | return LSN_ERROR; |
3383 | } |
3384 | /* now, where the REDO phase should start reading log: */ |
3385 | tprint(tracef, "Checkpoint has min_rec_lsn of dirty pages at" |
3386 | " LSN " LSN_FMT "\n" , LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages)); |
3387 | set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages); |
3388 | DBUG_PRINT("info" , |
3389 | ("checkpoint_start: " LSN_FMT " start_address: " LSN_FMT, |
3390 | LSN_IN_PARTS(checkpoint_start), LSN_IN_PARTS(start_address))); |
3391 | return start_address; |
3392 | } |
3393 | |
3394 | |
3395 | static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, |
3396 | struct st_dirty_page *dirty_page) |
3397 | { |
3398 | /* serves as hash key */ |
3399 | dirty_page->file_and_page_id= (((uint64)fileid) << 40) | pageid; |
3400 | dirty_page->rec_lsn= rec_lsn; |
3401 | return my_hash_insert(&all_dirty_pages, (uchar *)dirty_page); |
3402 | } |
3403 | |
3404 | |
3405 | static int close_all_tables(void) |
3406 | { |
3407 | int error= 0; |
3408 | uint count= 0; |
3409 | LIST *list_element, *next_open; |
3410 | MARIA_HA *info; |
3411 | TRANSLOG_ADDRESS addr; |
3412 | DBUG_ENTER("close_all_tables" ); |
3413 | |
3414 | mysql_mutex_lock(&THR_LOCK_maria); |
3415 | if (maria_open_list == NULL) |
3416 | goto end; |
3417 | tprint(tracef, "Closing all tables\n" ); |
3418 | if (tracef != stdout) |
3419 | { |
3420 | if (recovery_message_printed == REC_MSG_NONE) |
3421 | print_preamble(); |
3422 | for (count= 0, list_element= maria_open_list ; |
3423 | list_element ; count++, (list_element= list_element->next)) |
3424 | ; |
3425 | fprintf(stderr, "tables to flush:" ); |
3426 | recovery_message_printed= REC_MSG_FLUSH; |
3427 | } |
3428 | /* |
3429 | Since the end of end_of_redo_phase(), we may have written new records |
3430 | (if UNDO phase ran) and thus the state is newer than at |
3431 | end_of_redo_phase(), we need to bump is_of_horizon again. |
3432 | */ |
3433 | addr= translog_get_horizon(); |
3434 | for (list_element= maria_open_list ; ; list_element= next_open) |
3435 | { |
3436 | if (recovery_message_printed == REC_MSG_FLUSH) |
3437 | { |
3438 | fprintf(stderr, " %u" , count--); |
3439 | fflush(stderr); |
3440 | } |
3441 | if (list_element == NULL) |
3442 | break; |
3443 | next_open= list_element->next; |
3444 | info= (MARIA_HA*)list_element->data; |
3445 | mysql_mutex_unlock(&THR_LOCK_maria); /* ok, UNDO phase not online yet */ |
3446 | /* |
3447 | Tables which we see here are exactly those which were open at time of |
3448 | crash. They might have open_count>0 as Checkpoint maybe flushed their |
3449 | state while they were used. As Recovery corrected them, don't alarm the |
3450 | user, don't ask for a table check: |
3451 | */ |
3452 | if (info->s->state.open_count != 0) |
3453 | { |
3454 | /* let maria_close() mark the table properly closed */ |
3455 | info->s->state.open_count= 1; |
3456 | info->s->global_changed= 1; |
3457 | info->s->changed= 1; |
3458 | } |
3459 | prepare_table_for_close(info, addr); |
3460 | error|= maria_close(info); |
3461 | mysql_mutex_lock(&THR_LOCK_maria); |
3462 | |
3463 | /* Force a crash to test recovery of recovery */ |
3464 | if (maria_recovery_force_crash_counter) |
3465 | { |
3466 | DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); |
3467 | } |
3468 | } |
3469 | end: |
3470 | mysql_mutex_unlock(&THR_LOCK_maria); |
3471 | DBUG_RETURN(error); |
3472 | } |
3473 | |
3474 | |
3475 | /** |
3476 | @brief Close all table instances with a certain name which are present in |
3477 | all_tables. |
3478 | |
3479 | @param name Name of table |
3480 | @param addr Log address passed to prepare_table_for_close() |
3481 | */ |
3482 | |
3483 | static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr) |
3484 | { |
3485 | my_bool res= 0; |
3486 | /* There are no other threads using the tables, so we don't need any locks */ |
3487 | struct st_table_for_recovery *internal_table, *end; |
3488 | for (internal_table= all_tables, end= internal_table + SHARE_ID_MAX + 1; |
3489 | internal_table < end ; |
3490 | internal_table++) |
3491 | { |
3492 | MARIA_HA *info= internal_table->info; |
3493 | if ((info != NULL) && !strcmp(info->s->open_file_name.str, name)) |
3494 | { |
3495 | prepare_table_for_close(info, addr); |
3496 | if (maria_close(info)) |
3497 | res= 1; |
3498 | internal_table->info= NULL; |
3499 | } |
3500 | } |
3501 | return res; |
3502 | } |
3503 | |
3504 | |
3505 | /** |
3506 | Temporarily disables logging for this table. |
3507 | |
3508 | If that makes the log incomplete, writes a LOGREC_INCOMPLETE_LOG to the log |
3509 | to warn log readers. |
3510 | |
3511 | @param info table |
3512 | @param log_incomplete if that disabling makes the log incomplete |
3513 | |
3514 | @note for example in the REDO phase we disable logging but that does not |
3515 | make the log incomplete. |
3516 | */ |
3517 | |
3518 | void _ma_tmp_disable_logging_for_table(MARIA_HA *info, |
3519 | my_bool log_incomplete) |
3520 | { |
3521 | MARIA_SHARE *share= info->s; |
3522 | DBUG_ENTER("_ma_tmp_disable_logging_for_table" ); |
3523 | |
3524 | /* |
3525 | We have to ensure that bitmap is flushed, as it's checking |
3526 | that share->now_transactional is set |
3527 | */ |
3528 | if (share->now_transactional && share->data_file_type == BLOCK_RECORD) |
3529 | _ma_bitmap_flush_all(share); |
3530 | |
3531 | if (log_incomplete) |
3532 | { |
3533 | uchar log_data[FILEID_STORE_SIZE]; |
3534 | LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; |
3535 | LSN lsn; |
3536 | log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; |
3537 | log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); |
3538 | translog_write_record(&lsn, LOGREC_INCOMPLETE_LOG, |
3539 | &dummy_transaction_object, info, |
3540 | (translog_size_t) sizeof(log_data), |
3541 | TRANSLOG_INTERNAL_PARTS + 1, log_array, |
3542 | log_data, NULL); |
3543 | } |
3544 | |
3545 | /* if we disabled before writing the record, record wouldn't reach log */ |
3546 | share->now_transactional= FALSE; |
3547 | |
3548 | /* |
3549 | Reset state pointers. This is needed as in ALTER table we may do |
3550 | commit followed by _ma_renable_logging_for_table and then |
3551 | info->state may point to a state that was deleted by |
3552 | _ma_trnman_end_trans_hook() |
3553 | */ |
3554 | share->state.common= *info->state; |
3555 | info->state= &share->state.common; |
3556 | info->switched_transactional= TRUE; |
3557 | |
3558 | /* |
3559 | Some code in ma_blockrec.c assumes a trn even if !now_transactional but in |
3560 | this case it only reads trn->rec_lsn, which has to be LSN_IMPOSSIBLE and |
3561 | should be now. info->trn may be NULL in maria_chk. |
3562 | */ |
3563 | if (info->trn == NULL) |
3564 | info->trn= &dummy_transaction_object; |
3565 | DBUG_ASSERT(info->trn->rec_lsn == LSN_IMPOSSIBLE); |
3566 | share->page_type= PAGECACHE_PLAIN_PAGE; |
3567 | /* Functions below will pick up now_transactional and change callbacks */ |
3568 | _ma_set_data_pagecache_callbacks(&info->dfile, share); |
3569 | _ma_set_index_pagecache_callbacks(&share->kfile, share); |
3570 | _ma_bitmap_set_pagecache_callbacks(&share->bitmap.file, share); |
3571 | DBUG_VOID_RETURN; |
3572 | } |
3573 | |
3574 | |
3575 | /** |
3576 | Re-enables logging for a table which had it temporarily disabled. |
3577 | |
3578 | Only the thread which disabled logging is allowed to reenable it. Indeed, |
3579 | re-enabling logging affects all open instances, one must have exclusive |
3580 | access to the table to do that. In practice, the one which disables has |
3581 | such access. |
3582 | |
3583 | @param info table |
3584 | @param flush_pages if function needs to flush pages first |
3585 | */ |
3586 | |
3587 | my_bool _ma_reenable_logging_for_table(MARIA_HA *info, my_bool flush_pages) |
3588 | { |
3589 | MARIA_SHARE *share= info->s; |
3590 | DBUG_ENTER("_ma_reenable_logging_for_table" ); |
3591 | |
3592 | if (share->now_transactional == share->base.born_transactional || |
3593 | !info->switched_transactional) |
3594 | { |
3595 | info->switched_transactional= FALSE; |
3596 | DBUG_RETURN(0); |
3597 | } |
3598 | info->switched_transactional= FALSE; |
3599 | |
3600 | if ((share->now_transactional= share->base.born_transactional)) |
3601 | { |
3602 | share->page_type= PAGECACHE_LSN_PAGE; |
3603 | |
3604 | /* |
3605 | Copy state information that where updated while the table was used |
3606 | in not transactional mode |
3607 | */ |
3608 | _ma_copy_nontrans_state_information(info); |
3609 | _ma_reset_history(info->s); |
3610 | |
3611 | if (flush_pages) |
3612 | { |
3613 | /* Ensure that recover is not executing any redo before this */ |
3614 | if (!maria_in_recovery) |
3615 | share->state.is_of_horizon= share->state.create_rename_lsn= |
3616 | share->state.skip_redo_lsn= translog_get_horizon(); |
3617 | /* |
3618 | We are going to change callbacks; if a page is flushed at this moment |
3619 | this can cause race conditions, that's one reason to flush pages |
3620 | now. Other reasons: a checkpoint could be running and miss pages; the |
3621 | pages have type PAGECACHE_PLAIN_PAGE which should not remain. As |
3622 | there are no REDOs for pages, them, bitmaps and the state also have to |
3623 | be flushed and synced. |
3624 | */ |
3625 | if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, |
3626 | FLUSH_RELEASE, FLUSH_RELEASE) || |
3627 | _ma_state_info_write(share, |
3628 | MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET | |
3629 | MA_STATE_INFO_WRITE_LOCK) || |
3630 | _ma_sync_table_files(info)) |
3631 | DBUG_RETURN(1); |
3632 | } |
3633 | else if (!maria_in_recovery) |
3634 | { |
3635 | /* |
3636 | Except in Recovery, we mustn't leave dirty pages (see comments above). |
3637 | Note that this does not verify that the state was flushed, but hey. |
3638 | */ |
3639 | pagecache_file_no_dirty_page(share->pagecache, &info->dfile); |
3640 | pagecache_file_no_dirty_page(share->pagecache, &share->kfile); |
3641 | } |
3642 | _ma_set_data_pagecache_callbacks(&info->dfile, share); |
3643 | _ma_set_index_pagecache_callbacks(&share->kfile, share); |
3644 | _ma_bitmap_set_pagecache_callbacks(&share->bitmap.file, share); |
3645 | /* |
3646 | info->trn was not changed in the disable/enable combo, so that it's |
3647 | still usable in this kind of combination: |
3648 | external_lock; |
3649 | start_bulk_insert; # table is empty, disables logging |
3650 | end_bulk_insert; # enables logging |
3651 | start_bulk_insert; # table is not empty, logging stays |
3652 | # so rows insertion needs the real trn. |
3653 | as happens during row-based replication on the slave. |
3654 | */ |
3655 | } |
3656 | DBUG_RETURN(0); |
3657 | } |
3658 | |
3659 | |
3660 | static void print_redo_phase_progress(TRANSLOG_ADDRESS addr) |
3661 | { |
3662 | static uint end_logno= FILENO_IMPOSSIBLE, percentage_printed= 0; |
3663 | static ulong end_offset; |
3664 | static ulonglong initial_remainder= ~(ulonglong) 0; |
3665 | |
3666 | uint cur_logno; |
3667 | ulong cur_offset; |
3668 | ulonglong local_remainder; |
3669 | uint percentage_done; |
3670 | |
3671 | if (tracef == stdout) |
3672 | return; |
3673 | if (recovery_message_printed == REC_MSG_NONE) |
3674 | { |
3675 | print_preamble(); |
3676 | fprintf(stderr, "recovered pages: 0%%" ); |
3677 | fflush(stderr); |
3678 | procent_printed= 1; |
3679 | recovery_message_printed= REC_MSG_REDO; |
3680 | } |
3681 | if (end_logno == FILENO_IMPOSSIBLE) |
3682 | { |
3683 | LSN end_addr= translog_get_horizon(); |
3684 | end_logno= LSN_FILE_NO(end_addr); |
3685 | end_offset= LSN_OFFSET(end_addr); |
3686 | } |
3687 | cur_logno= LSN_FILE_NO(addr); |
3688 | cur_offset= LSN_OFFSET(addr); |
3689 | local_remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) : |
3690 | (((longlong)log_file_size) - cur_offset + |
3691 | MY_MAX(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) + |
3692 | end_offset); |
3693 | if (initial_remainder == (ulonglong)(-1)) |
3694 | initial_remainder= local_remainder; |
3695 | percentage_done= (uint) ((initial_remainder - local_remainder) * 100ULL / |
3696 | initial_remainder); |
3697 | if ((percentage_done - percentage_printed) >= 10) |
3698 | { |
3699 | percentage_printed= percentage_done; |
3700 | fprintf(stderr, " %u%%" , percentage_done); |
3701 | fflush(stderr); |
3702 | procent_printed= 1; |
3703 | } |
3704 | } |
3705 | |
3706 | |
3707 | #ifdef MARIA_EXTERNAL_LOCKING |
3708 | #error Marias Checkpoint and Recovery are really not ready for it |
3709 | #endif |
3710 | |
3711 | /* |
3712 | Recovery of the state : how it works |
3713 | ===================================== |
3714 | |
3715 | Here we ignore Checkpoints for a start. |
3716 | |
3717 | The state (MARIA_HA::MARIA_SHARE::MARIA_STATE_INFO) is updated in |
3718 | memory frequently (at least at every row write/update/delete) but goes |
3719 | to disk at few moments: maria_close() when closing the last open |
3720 | instance, and a few rare places like CHECK/REPAIR/ALTER |
3721 | (non-transactional tables also do it at maria_lock_database() but we |
3722 | needn't cover them here). |
3723 | |
3724 | In case of crash, state on disk is likely to be older than what it was |
3725 | in memory, the REDO phase needs to recreate the state as it was in |
3726 | memory at the time of crash. When we say Recovery here we will always |
3727 | mean "REDO phase". |
3728 | |
3729 | For example MARIA_STATUS_INFO::records (count of records). It is updated at |
3730 | the end of every row write/update/delete/delete_all. When Recovery sees the |
3731 | sign of such row operation (UNDO or REDO), it may need to update the records' |
3732 | count if that count does not reflect that operation (is older). How to know |
3733 | the age of the state compared to the log record: every time the state |
3734 | goes to disk at runtime, its member "is_of_horizon" is updated to the |
3735 | current end-of-log horizon. So Recovery just needs to compare is_of_horizon |
3736 | and the record's LSN to know if it should modify "records". |
3737 | |
3738 | Other operations like ALTER TABLE DISABLE KEYS update the state but |
3739 | don't write log records, thus the REDO phase cannot repeat their |
3740 | effect on the state in case of crash. But we make them sync the state |
3741 | as soon as they have finished. This reduces the window for a problem. |
3742 | |
3743 | It looks like only one thread at a time updates the state in memory or |
3744 | on disk. We assume that the upper level (normally MySQL) has protection |
3745 | against issuing HA_EXTRA_(FORCE_REOPEN|PREPARE_FOR_RENAME) so that these |
3746 | are not issued while there are any running transactions on the given table. |
3747 | If this is not done, we may write a corrupted state to disk. |
3748 | |
3749 | With checkpoints |
3750 | ================ |
3751 | |
3752 | Checkpoint module needs to read the state in memory and write it to |
3753 | disk. This may happen while some other thread is modifying the state |
3754 | in memory or on disk. Checkpoint thus may be reading changing data, it |
3755 | needs a mutex to not have it corrupted, and concurrent modifiers of |
3756 | the state need that mutex too for the same reason. |
3757 | "records" is modified for every row write/update/delete, we don't want |
3758 | to add a mutex lock/unlock there. So we re-use the mutex lock/unlock |
3759 | which is already present in these moments, namely the log's mutex which is |
3760 | taken when UNDO_ROW_INSERT|UPDATE|DELETE is written: we update "records" in |
3761 | under-log-mutex hooks when writing these records (thus "records" is |
3762 | not updated at the end of maria_write/update/delete() anymore). |
3763 | Thus Checkpoint takes the log's lock and can read "records" from |
3764 | memory an write it to disk and release log's lock. |
3765 | We however want to avoid having the disk write under the log's |
3766 | lock. So it has to be under another mutex, natural choice is |
3767 | intern_lock (as Checkpoint needs it anyway to read MARIA_SHARE::kfile, |
3768 | and as maria_close() takes it too). All state writes to disk are |
3769 | changed to be protected with intern_lock. |
3770 | So Checkpoint takes intern_lock, log's lock, reads "records" from |
3771 | memory, releases log's lock, updates is_of_horizon and writes "records" to |
3772 | disk, release intern_lock. |
3773 | In practice, not only "records" needs to be written but the full |
3774 | state. So, Checkpoint reads the full state from memory. Some other |
3775 | thread may at this moment be modifying in memory some pieces of the |
3776 | state which are not protected by the lock's log (see ma_extra.c |
3777 | HA_EXTRA_NO_KEYS), and Checkpoint would be reading a corrupted state |
3778 | from memory; to guard against that we extend the intern_lock-zone to |
3779 | changes done to the state in memory by HA_EXTRA_NO_KEYS et al, and |
3780 | also any change made in memory to create_rename_lsn/state_is_of_horizon. |
3781 | Last, we don't want in Checkpoint to do |
3782 | log lock; read state from memory; release log lock; |
3783 | for each table, it may hold the log's lock too much in total. |
3784 | So, we instead do |
3785 | log lock; read N states from memory; release log lock; |
3786 | Thus, the sequence above happens outside of any intern_lock. |
3787 | But this re-introduces the problem that some other thread may be changing the |
3788 | state in memory and on disk under intern_lock, without log's lock, like |
3789 | HA_EXTRA_NO_KEYS, while we read the N states. However, when Checkpoint later |
3790 | comes to handling the table under intern_lock, which is serialized with |
3791 | HA_EXTRA_NO_KEYS, it can see that is_of_horizon is higher then when the state |
3792 | was read from memory under log's lock, and thus can decide to not flush the |
3793 | obsolete state it has, knowing that the other thread flushed a more recent |
3794 | state already. If on the other hand is_of_horizon is not higher, the read |
3795 | state is current and can be flushed. So we have a per-table sequence: |
3796 | lock intern_lock; test if is_of_horizon is higher than when we read the state |
3797 | under log's lock; if no then flush the read state to disk. |
3798 | */ |
3799 | |
3800 | /* some comments and pseudo-code which we keep for later */ |
3801 | #if 0 |
3802 | /* |
3803 | MikaelR suggests: support checkpoints during REDO phase too: do checkpoint |
3804 | after a certain amount of log records have been executed. This helps |
3805 | against repeated crashes. Those checkpoints could not be user-requested |
3806 | (as engine is not communicating during the REDO phase), so they would be |
3807 | automatic: this changes the original assumption that we don't write to the |
3808 | log while in the REDO phase, but why not. How often should we checkpoint? |
3809 | */ |
3810 | |
3811 | /* |
3812 | We want to have two steps: |
3813 | engine->recover_with_max_memory(); |
3814 | next_engine->recover_with_max_memory(); |
3815 | engine->init_with_normal_memory(); |
3816 | next_engine->init_with_normal_memory(); |
3817 | So: in recover_with_max_memory() allocate a giant page cache, do REDO |
3818 | phase, then all page cache is flushed and emptied and freed (only retain |
3819 | small structures like TM): take full checkpoint, which is useful if |
3820 | next engine crashes in its recovery the next second. |
3821 | Destroy all shares (maria_close()), then at init_with_normal_memory() we |
3822 | do this: |
3823 | */ |
3824 | |
3825 | /**** UNDO PHASE *****/ |
3826 | |
3827 | /* |
3828 | Launch one or more threads to do the background rollback. Don't wait for |
3829 | them to complete their rollback (background rollback; for debugging, we |
3830 | can have an option which waits). Set a counter (total_of_rollback_threads) |
3831 | to the number of threads to lauch. |
3832 | |
3833 | Note that InnoDB's rollback-in-background works as long as InnoDB is the |
3834 | last engine to recover, otherwise MySQL will refuse new connections until |
3835 | the last engine has recovered so it's not "background" from the user's |
3836 | point of view. InnoDB is near top of sys_table_types so all others |
3837 | (e.g. BDB) recover after it... So it's really "online rollback" only if |
3838 | InnoDB is the only engine. |
3839 | */ |
3840 | |
3841 | /* wake up delete/update handler */ |
3842 | /* tell the TM that it can now accept new transactions */ |
3843 | |
3844 | /* |
3845 | mark that checkpoint requests are now allowed. |
3846 | */ |
3847 | #endif |
3848 | |