| 1 | /* Copyright (C) 2007 MySQL AB & Guilhem Bichot & Michael Widenius |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 15 | |
| 16 | /* |
| 17 | WL#3234 Maria control file |
| 18 | First version written by Guilhem Bichot on 2006-04-27. |
| 19 | */ |
| 20 | |
| 21 | #ifndef EXTRACT_DEFINITIONS |
| 22 | #include "maria_def.h" |
| 23 | #include "ma_checkpoint.h" |
| 24 | #endif |
| 25 | |
| 26 | /* |
| 27 | A control file contains the following objects: |
| 28 | |
| 29 | Start of create time variables (at start of file): |
| 30 | - Magic string (including version number of Maria control file) |
| 31 | - Uuid |
| 32 | - Size of create time part |
| 33 | - Size of dynamic part |
| 34 | - Maria block size |
| 35 | ..... Here we can add new variables without changing format |
| 36 | - Checksum of create time part (last of block) |
| 37 | |
| 38 | Start of changeable part: |
| 39 | - Checksum of changeable part |
| 40 | - LSN of last checkpoint |
| 41 | - Number of last log file |
| 42 | - Max trid in control file (since Maria 1.5 May 2008) |
| 43 | - Number of consecutive recovery failures (since Maria 1.5 May 2008) |
| 44 | ..... Here we can add new variables without changing format |
| 45 | |
| 46 | The idea is that one can add new variables to the control file and still |
| 47 | use it with old program versions. If one needs to do an incompatible change |
| 48 | one should increment the control file version number. |
| 49 | */ |
| 50 | |
| 51 | /* Total size should be < sector size for atomic write operation */ |
| 52 | #define CF_MAX_SIZE 512 |
| 53 | #define CF_MIN_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \ |
| 54 | CF_CHECKSUM_SIZE * 2 + CF_LSN_SIZE + CF_FILENO_SIZE) |
| 55 | |
| 56 | /* Create time variables */ |
| 57 | #define CF_MAGIC_STRING "\xfe\xfe\xc" |
| 58 | #define CF_MAGIC_STRING_OFFSET 0 |
| 59 | #define CF_MAGIC_STRING_SIZE (sizeof(CF_MAGIC_STRING)-1) |
| 60 | #define CF_VERSION_OFFSET (CF_MAGIC_STRING_OFFSET + CF_MAGIC_STRING_SIZE) |
| 61 | #define CF_VERSION_SIZE 1 |
| 62 | #define CF_UUID_OFFSET (CF_VERSION_OFFSET + CF_VERSION_SIZE) |
| 63 | #define CF_UUID_SIZE MY_UUID_SIZE |
| 64 | #define CF_CREATE_TIME_SIZE_OFFSET (CF_UUID_OFFSET + CF_UUID_SIZE) |
| 65 | #define CF_SIZE_SIZE 2 |
| 66 | #define CF_CHANGEABLE_SIZE_OFFSET (CF_CREATE_TIME_SIZE_OFFSET + CF_SIZE_SIZE) |
| 67 | #define CF_BLOCKSIZE_OFFSET (CF_CHANGEABLE_SIZE_OFFSET + CF_SIZE_SIZE) |
| 68 | #define CF_BLOCKSIZE_SIZE 2 |
| 69 | |
| 70 | #define CF_CREATE_TIME_TOTAL_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \ |
| 71 | CF_CHECKSUM_SIZE) |
| 72 | |
| 73 | /* |
| 74 | Start of the part that changes during execution |
| 75 | This is stored at offset uint2korr(file[CF_CHANGEABLE_SIZE]) |
| 76 | */ |
| 77 | #define CF_CHECKSUM_OFFSET 0 |
| 78 | #define CF_CHECKSUM_SIZE 4 |
| 79 | #define CF_LSN_OFFSET (CF_CHECKSUM_OFFSET + CF_CHECKSUM_SIZE) |
| 80 | #define CF_LSN_SIZE LSN_STORE_SIZE |
| 81 | #define CF_FILENO_OFFSET (CF_LSN_OFFSET + CF_LSN_SIZE) |
| 82 | #define CF_FILENO_SIZE 4 |
| 83 | #define CF_MAX_TRID_OFFSET (CF_FILENO_OFFSET + CF_FILENO_SIZE) |
| 84 | #define CF_MAX_TRID_SIZE TRANSID_SIZE |
| 85 | #define CF_RECOV_FAIL_OFFSET (CF_MAX_TRID_OFFSET + CF_MAX_TRID_SIZE) |
| 86 | #define CF_RECOV_FAIL_SIZE 1 |
| 87 | #define CF_CHANGEABLE_TOTAL_SIZE (CF_RECOV_FAIL_OFFSET + CF_RECOV_FAIL_SIZE) |
| 88 | |
| 89 | /* |
| 90 | The following values should not be changed, except when changing version |
| 91 | number of the maria control file. These are the minimum sizes of the |
| 92 | parts the code can handle. |
| 93 | */ |
| 94 | |
| 95 | #define CF_MIN_CREATE_TIME_TOTAL_SIZE \ |
| 96 | (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + CF_CHECKSUM_SIZE) |
| 97 | #define CF_MIN_CHANGEABLE_TOTAL_SIZE \ |
| 98 | (CF_FILENO_OFFSET + CF_FILENO_SIZE) |
| 99 | |
| 100 | #ifndef EXTRACT_DEFINITIONS |
| 101 | |
| 102 | /* This module owns these two vars. */ |
| 103 | /** |
| 104 | This LSN serves for the two-checkpoint rule, and also to find the |
| 105 | checkpoint record when doing a recovery. |
| 106 | */ |
| 107 | LSN last_checkpoint_lsn= LSN_IMPOSSIBLE; |
| 108 | uint32 last_logno= FILENO_IMPOSSIBLE; |
| 109 | /** |
| 110 | The maximum transaction id given to a transaction. It is only updated at |
| 111 | clean shutdown (in case of crash, logs have better information). |
| 112 | */ |
| 113 | TrID max_trid_in_control_file= 0; |
| 114 | |
| 115 | /** |
| 116 | Number of consecutive log or recovery failures. Reset to 0 after recovery's |
| 117 | success. |
| 118 | */ |
| 119 | uint8 recovery_failures= 0; |
| 120 | |
| 121 | /** |
| 122 | @brief If log's lock should be asserted when writing to control file. |
| 123 | |
| 124 | Can be re-used by any function which needs to be thread-safe except when |
| 125 | it is called at startup. |
| 126 | */ |
| 127 | my_bool maria_multi_threaded= FALSE; |
| 128 | /** @brief if currently doing a recovery */ |
| 129 | my_bool maria_in_recovery= FALSE; |
| 130 | |
| 131 | /** |
| 132 | Control file is less then 512 bytes (a disk sector), |
| 133 | to be as atomic as possible |
| 134 | */ |
| 135 | static int control_file_fd= -1; |
| 136 | |
| 137 | static uint cf_create_time_size; |
| 138 | static uint cf_changeable_size; |
| 139 | |
| 140 | /** |
| 141 | @brief Create Maria control file |
| 142 | */ |
| 143 | |
| 144 | static CONTROL_FILE_ERROR create_control_file(const char *name, |
| 145 | int open_flags) |
| 146 | { |
| 147 | uint32 sum; |
| 148 | uchar buffer[CF_CREATE_TIME_TOTAL_SIZE]; |
| 149 | ulong rnd1,rnd2; |
| 150 | |
| 151 | DBUG_ENTER("maria_create_control_file" ); |
| 152 | |
| 153 | if ((control_file_fd= mysql_file_create(key_file_control, name, 0, |
| 154 | open_flags, MYF(MY_SYNC_DIR | MY_WME))) < 0) |
| 155 | DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); |
| 156 | |
| 157 | /* Reset variables, as we are creating the file */ |
| 158 | cf_create_time_size= CF_CREATE_TIME_TOTAL_SIZE; |
| 159 | cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE; |
| 160 | |
| 161 | /* Create unique uuid for the control file */ |
| 162 | my_random_bytes((uchar *)&rnd1, sizeof (rnd1)); |
| 163 | my_random_bytes((uchar *)&rnd2, sizeof (rnd2)); |
| 164 | my_uuid_init(rnd1, rnd2); |
| 165 | my_uuid(maria_uuid); |
| 166 | |
| 167 | /* Prepare and write the file header */ |
| 168 | memcpy(buffer, CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE); |
| 169 | buffer[CF_VERSION_OFFSET]= CONTROL_FILE_VERSION; |
| 170 | memcpy(buffer + CF_UUID_OFFSET, maria_uuid, CF_UUID_SIZE); |
| 171 | int2store(buffer + CF_CREATE_TIME_SIZE_OFFSET, cf_create_time_size); |
| 172 | int2store(buffer + CF_CHANGEABLE_SIZE_OFFSET, cf_changeable_size); |
| 173 | |
| 174 | /* Write create time variables */ |
| 175 | int2store(buffer + CF_BLOCKSIZE_OFFSET, maria_block_size); |
| 176 | |
| 177 | /* Store checksum for create time parts */ |
| 178 | sum= (uint32) my_checksum(0, buffer, cf_create_time_size - |
| 179 | CF_CHECKSUM_SIZE); |
| 180 | int4store(buffer + cf_create_time_size - CF_CHECKSUM_SIZE, sum); |
| 181 | |
| 182 | if (my_pwrite(control_file_fd, buffer, cf_create_time_size, |
| 183 | 0, MYF(MY_FNABP | MY_WME))) |
| 184 | DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); |
| 185 | |
| 186 | /* |
| 187 | To be safer we should make sure that there are no logs or data/index |
| 188 | files around (indeed it could be that the control file alone was deleted |
| 189 | or not restored, and we should not go on with life at this point). |
| 190 | |
| 191 | Things should still be relatively safe as if someone tries to use |
| 192 | an old table with a new control file the different uuid:s between |
| 193 | the files will cause ma_open() to generate an HA_ERR_OLD_FILE |
| 194 | error. When used from mysqld this will cause the table to be open |
| 195 | in repair mode which will remove all dependencies between the |
| 196 | table and the old control file. |
| 197 | |
| 198 | We could have a tool which can rebuild the control file, by reading the |
| 199 | directory of logs, finding the newest log, reading it to find last |
| 200 | checkpoint... Slow but can save your db. For this to be possible, we |
| 201 | must always write to the control file right after writing the checkpoint |
| 202 | log record, and do nothing in between (i.e. the checkpoint must be |
| 203 | usable as soon as it has been written to the log). |
| 204 | */ |
| 205 | |
| 206 | /* init the file with these "undefined" values */ |
| 207 | DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE, |
| 208 | FILENO_IMPOSSIBLE, 0, 0)); |
| 209 | } |
| 210 | |
| 211 | |
| 212 | /** |
| 213 | Locks control file exclusively. This is kept for the duration of the engine |
| 214 | process, to prevent another Maria instance to write to our logs or control |
| 215 | file. |
| 216 | */ |
| 217 | |
| 218 | static int lock_control_file(const char *name) |
| 219 | { |
| 220 | /* |
| 221 | On Windows, my_lock() uses locking() which is mandatory locking and so |
| 222 | prevents maria-recovery.test from copying the control file. And in case of |
| 223 | crash, it may take a while for Windows to unlock file, causing downtime. |
| 224 | */ |
| 225 | /** |
| 226 | @todo BUG We should explore my_sopen(_SH_DENYWRD) to open or create the |
| 227 | file under Windows. |
| 228 | */ |
| 229 | #ifndef __WIN__ |
| 230 | uint retry= 0; |
| 231 | /* |
| 232 | We can't here use the automatic wait in my_lock() as the alarm thread |
| 233 | may not yet exists. |
| 234 | */ |
| 235 | while (my_lock(control_file_fd, F_WRLCK, 0L, F_TO_EOF, |
| 236 | MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK | MY_NO_WAIT))) |
| 237 | { |
| 238 | if (retry == 0) |
| 239 | my_printf_error(HA_ERR_INITIALIZATION, |
| 240 | "Can't lock aria control file '%s' for exclusive use, " |
| 241 | "error: %d. Will retry for %d seconds" , 0, |
| 242 | name, my_errno, MARIA_MAX_CONTROL_FILE_LOCK_RETRY); |
| 243 | if (retry++ > MARIA_MAX_CONTROL_FILE_LOCK_RETRY) |
| 244 | return 1; |
| 245 | sleep(1); |
| 246 | } |
| 247 | #endif |
| 248 | return 0; |
| 249 | } |
| 250 | |
| 251 | |
| 252 | /* |
| 253 | @brief Initialize control file subsystem |
| 254 | |
| 255 | Looks for the control file. If none and creation is requested, creates file. |
| 256 | If present, reads it to find out last checkpoint's LSN and last log, updates |
| 257 | the last_checkpoint_lsn and last_logno global variables. |
| 258 | Called at engine's start. |
| 259 | |
| 260 | @note |
| 261 | The format of the control file is defined in the comments and defines |
| 262 | at the start of this file. |
| 263 | |
| 264 | @param create_if_missing create file if not found |
| 265 | |
| 266 | @return Operation status |
| 267 | @retval 0 OK |
| 268 | @retval 1 Error (in which case the file is left closed) |
| 269 | */ |
| 270 | |
| 271 | CONTROL_FILE_ERROR ma_control_file_open(my_bool create_if_missing, |
| 272 | my_bool print_error) |
| 273 | { |
| 274 | uchar buffer[CF_MAX_SIZE]; |
| 275 | char name[FN_REFLEN], errmsg_buff[256]; |
| 276 | const char *errmsg, *lock_failed_errmsg= "Could not get an exclusive lock;" |
| 277 | " file is probably in use by another process" ; |
| 278 | uint new_cf_create_time_size, new_cf_changeable_size, new_block_size; |
| 279 | my_off_t file_size; |
| 280 | int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR; |
| 281 | int error= CONTROL_FILE_UNKNOWN_ERROR; |
| 282 | DBUG_ENTER("ma_control_file_open" ); |
| 283 | |
| 284 | /* |
| 285 | If you change sizes in the #defines, you at least have to change the |
| 286 | "*store" and "*korr" calls in this file, and can even create backward |
| 287 | compatibility problems. Beware! |
| 288 | */ |
| 289 | DBUG_ASSERT(CF_LSN_SIZE == (3+4)); |
| 290 | DBUG_ASSERT(CF_FILENO_SIZE == 4); |
| 291 | |
| 292 | if (control_file_fd >= 0) /* already open */ |
| 293 | DBUG_RETURN(0); |
| 294 | |
| 295 | if (fn_format(name, CONTROL_FILE_BASE_NAME, |
| 296 | maria_data_root, "" , MYF(MY_WME)) == NullS) |
| 297 | DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); |
| 298 | |
| 299 | if (my_access(name,F_OK)) |
| 300 | { |
| 301 | CONTROL_FILE_ERROR create_error; |
| 302 | if (!create_if_missing) |
| 303 | { |
| 304 | error= CONTROL_FILE_MISSING; |
| 305 | errmsg= "Can't find file" ; |
| 306 | goto err; |
| 307 | } |
| 308 | if ((create_error= create_control_file(name, open_flags))) |
| 309 | { |
| 310 | error= create_error; |
| 311 | errmsg= "Can't create file" ; |
| 312 | goto err; |
| 313 | } |
| 314 | if (lock_control_file(name)) |
| 315 | { |
| 316 | errmsg= lock_failed_errmsg; |
| 317 | goto err; |
| 318 | } |
| 319 | goto ok; |
| 320 | } |
| 321 | |
| 322 | /* Otherwise, file exists */ |
| 323 | |
| 324 | if ((control_file_fd= mysql_file_open(key_file_control, name, |
| 325 | open_flags, MYF(MY_WME))) < 0) |
| 326 | { |
| 327 | errmsg= "Can't open file" ; |
| 328 | goto err; |
| 329 | } |
| 330 | |
| 331 | if (lock_control_file(name)) /* lock it before reading content */ |
| 332 | { |
| 333 | errmsg= lock_failed_errmsg; |
| 334 | goto err; |
| 335 | } |
| 336 | |
| 337 | file_size= mysql_file_seek(control_file_fd, 0, SEEK_END, MYF(MY_WME)); |
| 338 | if (file_size == MY_FILEPOS_ERROR) |
| 339 | { |
| 340 | errmsg= "Can't read size" ; |
| 341 | goto err; |
| 342 | } |
| 343 | if (file_size < CF_MIN_SIZE) |
| 344 | { |
| 345 | /* |
| 346 | Given that normally we write only a sector and it's atomic, the only |
| 347 | possibility for a file to be of too short size is if we crashed at the |
| 348 | very first startup, between file creation and file write. Quite unlikely |
| 349 | (and can be made even more unlikely by doing this: create a temp file, |
| 350 | write it, and then rename it to be the control file). |
| 351 | What's more likely is if someone forgot to restore the control file, |
| 352 | just did a "touch control" to try to get Maria to start, or if the |
| 353 | disk/filesystem has a problem. |
| 354 | So let's be rigid. |
| 355 | */ |
| 356 | error= CONTROL_FILE_TOO_SMALL; |
| 357 | errmsg= "Size of control file is smaller than expected" ; |
| 358 | goto err; |
| 359 | } |
| 360 | |
| 361 | /* Check if control file is unexpectedly big */ |
| 362 | if (file_size > CF_MAX_SIZE) |
| 363 | { |
| 364 | error= CONTROL_FILE_TOO_BIG; |
| 365 | errmsg= "File size bigger than expected" ; |
| 366 | goto err; |
| 367 | } |
| 368 | |
| 369 | if (mysql_file_pread(control_file_fd, buffer, (size_t)file_size, 0, MYF(MY_FNABP))) |
| 370 | { |
| 371 | errmsg= "Can't read file" ; |
| 372 | goto err; |
| 373 | } |
| 374 | |
| 375 | if (memcmp(buffer + CF_MAGIC_STRING_OFFSET, |
| 376 | CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE)) |
| 377 | { |
| 378 | error= CONTROL_FILE_BAD_MAGIC_STRING; |
| 379 | errmsg= "Missing valid id at start of file. File is not a valid aria control file" ; |
| 380 | goto err; |
| 381 | } |
| 382 | |
| 383 | if (buffer[CF_VERSION_OFFSET] > CONTROL_FILE_VERSION) |
| 384 | { |
| 385 | error= CONTROL_FILE_BAD_VERSION; |
| 386 | sprintf(errmsg_buff, "File is from a future aria system: %d. Current version is: %d" , |
| 387 | (int) buffer[CF_VERSION_OFFSET], CONTROL_FILE_VERSION); |
| 388 | errmsg= errmsg_buff; |
| 389 | goto err; |
| 390 | } |
| 391 | |
| 392 | new_cf_create_time_size= uint2korr(buffer + CF_CREATE_TIME_SIZE_OFFSET); |
| 393 | new_cf_changeable_size= uint2korr(buffer + CF_CHANGEABLE_SIZE_OFFSET); |
| 394 | |
| 395 | if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE || |
| 396 | new_cf_changeable_size < CF_MIN_CHANGEABLE_TOTAL_SIZE || |
| 397 | new_cf_create_time_size + new_cf_changeable_size != file_size) |
| 398 | { |
| 399 | error= CONTROL_FILE_INCONSISTENT_INFORMATION; |
| 400 | errmsg= "Sizes stored in control file are inconsistent" ; |
| 401 | goto err; |
| 402 | } |
| 403 | |
| 404 | new_block_size= uint2korr(buffer + CF_BLOCKSIZE_OFFSET); |
| 405 | if (new_block_size != maria_block_size && maria_block_size) |
| 406 | { |
| 407 | error= CONTROL_FILE_WRONG_BLOCKSIZE; |
| 408 | sprintf(errmsg_buff, |
| 409 | "Block size in control file (%u) is different than given aria_block_size: %u" , |
| 410 | new_block_size, (uint) maria_block_size); |
| 411 | errmsg= errmsg_buff; |
| 412 | goto err; |
| 413 | } |
| 414 | maria_block_size= new_block_size; |
| 415 | |
| 416 | if (my_checksum(0, buffer, new_cf_create_time_size - CF_CHECKSUM_SIZE) != |
| 417 | uint4korr(buffer + new_cf_create_time_size - CF_CHECKSUM_SIZE)) |
| 418 | { |
| 419 | error= CONTROL_FILE_BAD_HEAD_CHECKSUM; |
| 420 | errmsg= "Fixed part checksum mismatch" ; |
| 421 | goto err; |
| 422 | } |
| 423 | |
| 424 | if (my_checksum(0, buffer + new_cf_create_time_size + CF_CHECKSUM_SIZE, |
| 425 | new_cf_changeable_size - CF_CHECKSUM_SIZE) != |
| 426 | uint4korr(buffer + new_cf_create_time_size)) |
| 427 | { |
| 428 | error= CONTROL_FILE_BAD_CHECKSUM; |
| 429 | errmsg= "Changeable part (end of control file) checksum mismatch" ; |
| 430 | goto err; |
| 431 | } |
| 432 | |
| 433 | memcpy(maria_uuid, buffer + CF_UUID_OFFSET, CF_UUID_SIZE); |
| 434 | cf_create_time_size= new_cf_create_time_size; |
| 435 | cf_changeable_size= new_cf_changeable_size; |
| 436 | last_checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size + |
| 437 | CF_LSN_OFFSET); |
| 438 | last_logno= uint4korr(buffer + new_cf_create_time_size + CF_FILENO_OFFSET); |
| 439 | if (new_cf_changeable_size >= (CF_MAX_TRID_OFFSET + CF_MAX_TRID_SIZE)) |
| 440 | max_trid_in_control_file= |
| 441 | transid_korr(buffer + new_cf_create_time_size + CF_MAX_TRID_OFFSET); |
| 442 | if (new_cf_changeable_size >= (CF_RECOV_FAIL_OFFSET + CF_RECOV_FAIL_SIZE)) |
| 443 | recovery_failures= |
| 444 | (buffer + new_cf_create_time_size + CF_RECOV_FAIL_OFFSET)[0]; |
| 445 | |
| 446 | ok: |
| 447 | DBUG_RETURN(0); |
| 448 | |
| 449 | err: |
| 450 | if (print_error) |
| 451 | my_printf_error(HA_ERR_INITIALIZATION, |
| 452 | "Got error '%s' when trying to use aria control file " |
| 453 | "'%s'" , 0, errmsg, name); |
| 454 | ma_control_file_end(); /* will unlock file if needed */ |
| 455 | DBUG_RETURN(error); |
| 456 | } |
| 457 | |
| 458 | |
| 459 | /* |
| 460 | Write information durably to the control file; stores this information into |
| 461 | the last_checkpoint_lsn, last_logno, max_trid_in_control_file, |
| 462 | recovery_failures global variables. |
| 463 | Called when we have created a new log (after syncing this log's creation), |
| 464 | when we have written a checkpoint (after syncing this log record), at |
| 465 | shutdown (for storing trid in case logs are soon removed by user), and |
| 466 | before and after recovery (to store recovery_failures). |
| 467 | Variables last_checkpoint_lsn and last_logno must be protected by caller |
| 468 | using log's lock, unless this function is called at startup. |
| 469 | |
| 470 | SYNOPSIS |
| 471 | ma_control_file_write_and_force() |
| 472 | last_checkpoint_lsn_arg LSN of last checkpoint |
| 473 | last_logno_arg last log file number |
| 474 | max_trid_arg maximum transaction longid |
| 475 | recovery_failures_arg consecutive recovery failures |
| 476 | |
| 477 | NOTE |
| 478 | We always want to do one single my_pwrite() here to be as atomic as |
| 479 | possible. |
| 480 | |
| 481 | RETURN |
| 482 | 0 - OK |
| 483 | 1 - Error |
| 484 | */ |
| 485 | |
| 486 | int ma_control_file_write_and_force(LSN last_checkpoint_lsn_arg, |
| 487 | uint32 last_logno_arg, |
| 488 | TrID max_trid_arg, |
| 489 | uint8 recovery_failures_arg) |
| 490 | { |
| 491 | uchar buffer[CF_MAX_SIZE]; |
| 492 | uint32 sum; |
| 493 | my_bool no_need_sync; |
| 494 | DBUG_ENTER("ma_control_file_write_and_force" ); |
| 495 | |
| 496 | /* |
| 497 | We don't need to sync if this is just an increase of |
| 498 | recovery_failures: it's even good if that counter is not increased on disk |
| 499 | in case of power or hardware failure (less false positives when removing |
| 500 | logs). |
| 501 | */ |
| 502 | no_need_sync= ((last_checkpoint_lsn == last_checkpoint_lsn_arg) && |
| 503 | (last_logno == last_logno_arg) && |
| 504 | (max_trid_in_control_file == max_trid_arg) && |
| 505 | (recovery_failures_arg > 0)); |
| 506 | |
| 507 | if (control_file_fd < 0) |
| 508 | DBUG_RETURN(1); |
| 509 | |
| 510 | #ifndef DBUG_OFF |
| 511 | if (maria_multi_threaded) |
| 512 | translog_lock_handler_assert_owner(); |
| 513 | #endif |
| 514 | |
| 515 | lsn_store(buffer + CF_LSN_OFFSET, last_checkpoint_lsn_arg); |
| 516 | int4store(buffer + CF_FILENO_OFFSET, last_logno_arg); |
| 517 | transid_store(buffer + CF_MAX_TRID_OFFSET, max_trid_arg); |
| 518 | (buffer + CF_RECOV_FAIL_OFFSET)[0]= recovery_failures_arg; |
| 519 | |
| 520 | if (cf_changeable_size > CF_CHANGEABLE_TOTAL_SIZE) |
| 521 | { |
| 522 | /* |
| 523 | More room than needed for us. Must be a newer version. Clear part which |
| 524 | we cannot maintain, so that any future version notices we didn't |
| 525 | maintain its extra data. |
| 526 | */ |
| 527 | uint zeroed= cf_changeable_size - CF_CHANGEABLE_TOTAL_SIZE; |
| 528 | char msg[150]; |
| 529 | bzero(buffer + CF_CHANGEABLE_TOTAL_SIZE, zeroed); |
| 530 | my_snprintf(msg, sizeof(msg), |
| 531 | "Control file must be from a newer version; zero-ing out %u" |
| 532 | " unknown bytes in control file at offset %u" , zeroed, |
| 533 | cf_changeable_size + cf_create_time_size); |
| 534 | ma_message_no_user(ME_JUST_WARNING, msg); |
| 535 | } |
| 536 | else |
| 537 | { |
| 538 | /* not enough room for what we need to store: enlarge */ |
| 539 | cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE; |
| 540 | } |
| 541 | /* Note that the create-time portion is not touched */ |
| 542 | |
| 543 | /* Checksum is stored first */ |
| 544 | compile_time_assert(CF_CHECKSUM_OFFSET == 0); |
| 545 | sum= my_checksum(0, buffer + CF_CHECKSUM_SIZE, |
| 546 | cf_changeable_size - CF_CHECKSUM_SIZE); |
| 547 | int4store(buffer, sum); |
| 548 | |
| 549 | if (my_pwrite(control_file_fd, buffer, cf_changeable_size, |
| 550 | cf_create_time_size, MYF(MY_FNABP | MY_WME)) || |
| 551 | (!no_need_sync && mysql_file_sync(control_file_fd, MYF(MY_WME)))) |
| 552 | DBUG_RETURN(1); |
| 553 | |
| 554 | last_checkpoint_lsn= last_checkpoint_lsn_arg; |
| 555 | last_logno= last_logno_arg; |
| 556 | max_trid_in_control_file= max_trid_arg; |
| 557 | recovery_failures= recovery_failures_arg; |
| 558 | |
| 559 | cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE; /* no more warning */ |
| 560 | DBUG_RETURN(0); |
| 561 | } |
| 562 | |
| 563 | |
| 564 | /* |
| 565 | Free resources taken by control file subsystem |
| 566 | |
| 567 | SYNOPSIS |
| 568 | ma_control_file_end() |
| 569 | */ |
| 570 | |
| 571 | int ma_control_file_end(void) |
| 572 | { |
| 573 | int close_error; |
| 574 | DBUG_ENTER("ma_control_file_end" ); |
| 575 | |
| 576 | if (control_file_fd < 0) /* already closed */ |
| 577 | DBUG_RETURN(0); |
| 578 | |
| 579 | #ifndef __WIN__ |
| 580 | (void) my_lock(control_file_fd, F_UNLCK, 0L, F_TO_EOF, |
| 581 | MYF(MY_SEEK_NOT_DONE | MY_FORCE_LOCK)); |
| 582 | #endif |
| 583 | |
| 584 | close_error= mysql_file_close(control_file_fd, MYF(MY_WME)); |
| 585 | /* |
| 586 | As mysql_file_close() frees structures even if close() fails, we do the same, |
| 587 | i.e. we mark the file as closed in all cases. |
| 588 | */ |
| 589 | control_file_fd= -1; |
| 590 | /* |
| 591 | As this module owns these variables, closing the module forbids access to |
| 592 | them (just a safety): |
| 593 | */ |
| 594 | last_checkpoint_lsn= LSN_IMPOSSIBLE; |
| 595 | last_logno= FILENO_IMPOSSIBLE; |
| 596 | max_trid_in_control_file= recovery_failures= 0; |
| 597 | |
| 598 | DBUG_RETURN(close_error); |
| 599 | } |
| 600 | |
| 601 | |
| 602 | /** |
| 603 | Tells if control file is initialized. |
| 604 | */ |
| 605 | |
| 606 | my_bool ma_control_file_inited(void) |
| 607 | { |
| 608 | return (control_file_fd >= 0); |
| 609 | } |
| 610 | |
| 611 | #endif /* EXTRACT_DEFINITIONS */ |
| 612 | |