| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2017, 2018, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file trx/trx0rseg.cc |
| 22 | Rollback segment |
| 23 | |
| 24 | Created 3/26/1996 Heikki Tuuri |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #include "trx0rseg.h" |
| 28 | #include "trx0undo.h" |
| 29 | #include "fut0lst.h" |
| 30 | #include "srv0srv.h" |
| 31 | #include "trx0purge.h" |
| 32 | #include "srv0mon.h" |
| 33 | #include "fsp0sysspace.h" |
| 34 | |
| 35 | #include <algorithm> |
| 36 | |
| 37 | #ifdef WITH_WSREP |
| 38 | #include <mysql/service_wsrep.h> |
| 39 | |
| 40 | #ifdef UNIV_DEBUG |
| 41 | /** The latest known WSREP XID sequence number */ |
| 42 | static long long wsrep_seqno = -1; |
| 43 | #endif /* UNIV_DEBUG */ |
| 44 | /** The latest known WSREP XID UUID */ |
| 45 | static unsigned char wsrep_uuid[16]; |
| 46 | |
| 47 | /** Update the WSREP XID information in rollback segment header. |
| 48 | @param[in,out] rseg_header rollback segment header |
| 49 | @param[in] xid WSREP XID |
| 50 | @param[in,out] mtr mini-transaction */ |
| 51 | void |
| 52 | trx_rseg_update_wsrep_checkpoint( |
| 53 | trx_rsegf_t* , |
| 54 | const XID* xid, |
| 55 | mtr_t* mtr) |
| 56 | { |
| 57 | ut_ad(wsrep_is_wsrep_xid(xid)); |
| 58 | |
| 59 | #ifdef UNIV_DEBUG |
| 60 | /* Check that seqno is monotonically increasing */ |
| 61 | long long xid_seqno = wsrep_xid_seqno(xid); |
| 62 | const byte* xid_uuid = wsrep_xid_uuid(xid); |
| 63 | |
| 64 | if (!memcmp(xid_uuid, wsrep_uuid, sizeof wsrep_uuid)) { |
| 65 | ut_ad(xid_seqno > wsrep_seqno); |
| 66 | } else { |
| 67 | memcpy(wsrep_uuid, xid_uuid, sizeof wsrep_uuid); |
| 68 | } |
| 69 | wsrep_seqno = xid_seqno; |
| 70 | #endif /* UNIV_DEBUG */ |
| 71 | |
| 72 | mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header, |
| 73 | uint32_t(xid->formatID), |
| 74 | MLOG_4BYTES, mtr); |
| 75 | |
| 76 | mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header, |
| 77 | uint32_t(xid->gtrid_length), |
| 78 | MLOG_4BYTES, mtr); |
| 79 | |
| 80 | mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header, |
| 81 | uint32_t(xid->bqual_length), |
| 82 | MLOG_4BYTES, mtr); |
| 83 | |
| 84 | mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header, |
| 85 | reinterpret_cast<const byte*>(xid->data), |
| 86 | XIDDATASIZE, mtr); |
| 87 | } |
| 88 | |
| 89 | /** Update WSREP checkpoint XID in first rollback segment header |
| 90 | as part of wsrep_set_SE_checkpoint() when it is guaranteed that there |
| 91 | are no wsrep transactions committing. |
| 92 | If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already |
| 93 | stored into rollback segments, the WSREP XID in all the remaining rollback |
| 94 | segments will be reset. |
| 95 | @param[in] xid WSREP XID */ |
| 96 | void trx_rseg_update_wsrep_checkpoint(const XID* xid) |
| 97 | { |
| 98 | mtr_t mtr; |
| 99 | mtr.start(); |
| 100 | |
| 101 | const trx_rseg_t* rseg = trx_sys.rseg_array[0]; |
| 102 | |
| 103 | trx_rsegf_t* = trx_rsegf_get(rseg->space, rseg->page_no, |
| 104 | &mtr); |
| 105 | if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) { |
| 106 | trx_rseg_format_upgrade(rseg_header, &mtr); |
| 107 | } |
| 108 | |
| 109 | trx_rseg_update_wsrep_checkpoint(rseg_header, xid, &mtr); |
| 110 | |
| 111 | const byte* xid_uuid = wsrep_xid_uuid(xid); |
| 112 | if (memcmp(wsrep_uuid, xid_uuid, sizeof wsrep_uuid)) { |
| 113 | memcpy(wsrep_uuid, xid_uuid, sizeof wsrep_uuid); |
| 114 | |
| 115 | /* Because the UUID part of the WSREP XID differed |
| 116 | from current_xid_uuid, the WSREP group UUID was |
| 117 | changed, and we must reset the XID in all rollback |
| 118 | segment headers. */ |
| 119 | for (ulint rseg_id = 1; rseg_id < TRX_SYS_N_RSEGS; ++rseg_id) { |
| 120 | if (const trx_rseg_t* rseg = |
| 121 | trx_sys.rseg_array[rseg_id]) { |
| 122 | trx_rseg_update_wsrep_checkpoint( |
| 123 | trx_rsegf_get(rseg->space, |
| 124 | rseg->page_no, &mtr), |
| 125 | xid, &mtr); |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | mtr.commit(); |
| 131 | } |
| 132 | |
| 133 | /** Read the WSREP XID information in rollback segment header. |
| 134 | @param[in] rseg_header Rollback segment header |
| 135 | @param[out] xid Transaction XID |
| 136 | @return whether the WSREP XID was present */ |
| 137 | static |
| 138 | bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* , XID& xid) |
| 139 | { |
| 140 | int formatID = static_cast<int>( |
| 141 | mach_read_from_4( |
| 142 | TRX_RSEG_WSREP_XID_FORMAT + rseg_header)); |
| 143 | if (formatID == 0) { |
| 144 | return false; |
| 145 | } |
| 146 | |
| 147 | xid.formatID = formatID; |
| 148 | xid.gtrid_length = static_cast<int>( |
| 149 | mach_read_from_4( |
| 150 | TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header)); |
| 151 | |
| 152 | xid.bqual_length = static_cast<int>( |
| 153 | mach_read_from_4( |
| 154 | TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header)); |
| 155 | |
| 156 | memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE); |
| 157 | |
| 158 | return true; |
| 159 | } |
| 160 | |
| 161 | /** Read the WSREP XID from the TRX_SYS page (in case of upgrade). |
| 162 | @param[in] page TRX_SYS page |
| 163 | @param[out] xid WSREP XID (if present) |
| 164 | @return whether the WSREP XID is present */ |
| 165 | static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid) |
| 166 | { |
| 167 | if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO |
| 168 | + TRX_SYS_WSREP_XID_MAGIC_N_FLD |
| 169 | + page) |
| 170 | != TRX_SYS_WSREP_XID_MAGIC_N) { |
| 171 | return false; |
| 172 | } |
| 173 | |
| 174 | xid.formatID = static_cast<int>( |
| 175 | mach_read_from_4( |
| 176 | TRX_SYS + TRX_SYS_WSREP_XID_INFO |
| 177 | + TRX_SYS_WSREP_XID_FORMAT + page)); |
| 178 | xid.gtrid_length = static_cast<int>( |
| 179 | mach_read_from_4( |
| 180 | TRX_SYS + TRX_SYS_WSREP_XID_INFO |
| 181 | + TRX_SYS_WSREP_XID_GTRID_LEN + page)); |
| 182 | xid.bqual_length = static_cast<int>( |
| 183 | mach_read_from_4( |
| 184 | TRX_SYS + TRX_SYS_WSREP_XID_INFO |
| 185 | + TRX_SYS_WSREP_XID_BQUAL_LEN + page)); |
| 186 | memcpy(xid.data, |
| 187 | TRX_SYS + TRX_SYS_WSREP_XID_INFO |
| 188 | + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE); |
| 189 | return true; |
| 190 | } |
| 191 | |
| 192 | /** Recover the latest WSREP checkpoint XID. |
| 193 | @param[out] xid WSREP XID |
| 194 | @return whether the WSREP XID was found */ |
| 195 | bool trx_rseg_read_wsrep_checkpoint(XID& xid) |
| 196 | { |
| 197 | mtr_t mtr; |
| 198 | long long max_xid_seqno = -1; |
| 199 | bool found = false; |
| 200 | |
| 201 | for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; |
| 202 | rseg_id++, mtr.commit()) { |
| 203 | mtr.start(); |
| 204 | const buf_block_t* sys = trx_sysf_get(&mtr, false); |
| 205 | if (rseg_id == 0) { |
| 206 | found = trx_rseg_init_wsrep_xid(sys->frame, xid); |
| 207 | ut_ad(!found || xid.formatID == 1); |
| 208 | if (found) { |
| 209 | max_xid_seqno = wsrep_xid_seqno(&xid); |
| 210 | memcpy(wsrep_uuid, wsrep_xid_uuid(&xid), |
| 211 | sizeof wsrep_uuid); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | const uint32_t page_no = trx_sysf_rseg_get_page_no( |
| 216 | sys, rseg_id); |
| 217 | |
| 218 | if (page_no == FIL_NULL) { |
| 219 | continue; |
| 220 | } |
| 221 | |
| 222 | const trx_rsegf_t* = trx_rsegf_get_new( |
| 223 | trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr); |
| 224 | |
| 225 | if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) { |
| 226 | continue; |
| 227 | } |
| 228 | |
| 229 | XID tmp_xid; |
| 230 | long long tmp_seqno = 0; |
| 231 | if (trx_rseg_read_wsrep_checkpoint(rseg_header, tmp_xid) |
| 232 | && (tmp_seqno = wsrep_xid_seqno(&tmp_xid)) |
| 233 | > max_xid_seqno) { |
| 234 | found = true; |
| 235 | max_xid_seqno = tmp_seqno; |
| 236 | xid = tmp_xid; |
| 237 | memcpy(wsrep_uuid, wsrep_xid_uuid(&tmp_xid), |
| 238 | sizeof wsrep_uuid); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | return found; |
| 243 | } |
| 244 | #endif /* WITH_WSREP */ |
| 245 | |
| 246 | /** Upgrade a rollback segment header page to MariaDB 10.3 format. |
| 247 | @param[in,out] rseg_header rollback segment header page |
| 248 | @param[in,out] mtr mini-transaction */ |
| 249 | void trx_rseg_format_upgrade(trx_rsegf_t* , mtr_t* mtr) |
| 250 | { |
| 251 | ut_ad(page_offset(rseg_header) == TRX_RSEG); |
| 252 | byte* rseg_format = TRX_RSEG_FORMAT + rseg_header; |
| 253 | mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); |
| 254 | /* Clear also possible garbage at the end of the page. Old |
| 255 | InnoDB versions did not initialize unused parts of pages. */ |
| 256 | byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; |
| 257 | ulint len = srv_page_size |
| 258 | - (FIL_PAGE_DATA_END |
| 259 | + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); |
| 260 | memset(b, 0, len); |
| 261 | mlog_log_string(b, len, mtr); |
| 262 | } |
| 263 | |
| 264 | /** Create a rollback segment header. |
| 265 | @param[in,out] space system, undo, or temporary tablespace |
| 266 | @param[in] rseg_id rollback segment identifier |
| 267 | @param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg) |
| 268 | @param[in,out] mtr mini-transaction |
| 269 | @return page number of the created segment, FIL_NULL if fail */ |
| 270 | ulint |
| 271 | ( |
| 272 | fil_space_t* space, |
| 273 | ulint rseg_id, |
| 274 | buf_block_t* , |
| 275 | mtr_t* mtr) |
| 276 | { |
| 277 | ulint page_no; |
| 278 | trx_rsegf_t* rsegf; |
| 279 | buf_block_t* block; |
| 280 | |
| 281 | ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_X_LOCK)); |
| 282 | ut_ad(!sys_header == (space == fil_system.temp_space)); |
| 283 | |
| 284 | /* Allocate a new file segment for the rollback segment */ |
| 285 | block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr); |
| 286 | |
| 287 | if (block == NULL) { |
| 288 | /* No space left */ |
| 289 | |
| 290 | return(FIL_NULL); |
| 291 | } |
| 292 | |
| 293 | buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW); |
| 294 | |
| 295 | page_no = block->page.id.page_no(); |
| 296 | |
| 297 | /* Get the rollback segment file page */ |
| 298 | rsegf = trx_rsegf_get_new(space->id, page_no, mtr); |
| 299 | |
| 300 | mlog_write_ulint(rsegf + TRX_RSEG_FORMAT, 0, MLOG_4BYTES, mtr); |
| 301 | |
| 302 | /* Initialize the history list */ |
| 303 | |
| 304 | mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr); |
| 305 | flst_init(rsegf + TRX_RSEG_HISTORY, mtr); |
| 306 | |
| 307 | /* Reset the undo log slots */ |
| 308 | for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) { |
| 309 | |
| 310 | trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); |
| 311 | } |
| 312 | |
| 313 | if (sys_header) { |
| 314 | /* Add the rollback segment info to the free slot in |
| 315 | the trx system header */ |
| 316 | |
| 317 | mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS |
| 318 | + TRX_SYS_RSEG_SPACE |
| 319 | + rseg_id * TRX_SYS_RSEG_SLOT_SIZE |
| 320 | + sys_header->frame, |
| 321 | space->id, MLOG_4BYTES, mtr); |
| 322 | mlog_write_ulint(TRX_SYS + TRX_SYS_RSEGS |
| 323 | + TRX_SYS_RSEG_PAGE_NO |
| 324 | + rseg_id * TRX_SYS_RSEG_SLOT_SIZE |
| 325 | + sys_header->frame, |
| 326 | page_no, MLOG_4BYTES, mtr); |
| 327 | } |
| 328 | |
| 329 | return(page_no); |
| 330 | } |
| 331 | |
| 332 | /** Free a rollback segment in memory. */ |
| 333 | void |
| 334 | trx_rseg_mem_free(trx_rseg_t* rseg) |
| 335 | { |
| 336 | trx_undo_t* undo; |
| 337 | trx_undo_t* next_undo; |
| 338 | |
| 339 | mutex_free(&rseg->mutex); |
| 340 | |
| 341 | /* There can't be any active transactions. */ |
| 342 | ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); |
| 343 | ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0); |
| 344 | |
| 345 | for (undo = UT_LIST_GET_FIRST(rseg->undo_cached); |
| 346 | undo != NULL; |
| 347 | undo = next_undo) { |
| 348 | |
| 349 | next_undo = UT_LIST_GET_NEXT(undo_list, undo); |
| 350 | |
| 351 | UT_LIST_REMOVE(rseg->undo_cached, undo); |
| 352 | |
| 353 | MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); |
| 354 | |
| 355 | ut_free(undo); |
| 356 | } |
| 357 | |
| 358 | ut_free(rseg); |
| 359 | } |
| 360 | |
| 361 | /** Create a rollback segment object. |
| 362 | @param[in] id rollback segment id |
| 363 | @param[in] space space where the segment is placed |
| 364 | @param[in] page_no page number of the segment header */ |
| 365 | static |
| 366 | trx_rseg_t* |
| 367 | trx_rseg_mem_create(ulint id, fil_space_t* space, ulint page_no) |
| 368 | { |
| 369 | trx_rseg_t* rseg = static_cast<trx_rseg_t*>( |
| 370 | ut_zalloc_nokey(sizeof *rseg)); |
| 371 | |
| 372 | rseg->id = id; |
| 373 | rseg->space = space; |
| 374 | rseg->page_no = page_no; |
| 375 | rseg->last_page_no = FIL_NULL; |
| 376 | rseg->curr_size = 1; |
| 377 | |
| 378 | mutex_create(rseg->is_persistent() |
| 379 | ? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG, |
| 380 | &rseg->mutex); |
| 381 | |
| 382 | UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); |
| 383 | UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list); |
| 384 | UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); |
| 385 | |
| 386 | return(rseg); |
| 387 | } |
| 388 | |
| 389 | /** Read the undo log lists. |
| 390 | @param[in,out] rseg rollback segment |
| 391 | @param[in,out] max_trx_id maximum observed transaction identifier |
| 392 | @param[in] rseg_header rollback segment header |
| 393 | @return the combined size of undo log segments in pages */ |
| 394 | static |
| 395 | ulint |
| 396 | trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id, |
| 397 | const trx_rsegf_t* ) |
| 398 | { |
| 399 | ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN); |
| 400 | |
| 401 | ulint size = 0; |
| 402 | |
| 403 | for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) { |
| 404 | ulint page_no = trx_rsegf_get_nth_undo(rseg_header, i); |
| 405 | if (page_no != FIL_NULL) { |
| 406 | size += trx_undo_mem_create_at_db_start( |
| 407 | rseg, i, page_no, max_trx_id); |
| 408 | MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED); |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | return(size); |
| 413 | } |
| 414 | |
| 415 | /** Restore the state of a persistent rollback segment. |
| 416 | @param[in,out] rseg persistent rollback segment |
| 417 | @param[in,out] max_trx_id maximum observed transaction identifier |
| 418 | @param[in,out] mtr mini-transaction */ |
| 419 | static |
| 420 | void |
| 421 | trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr) |
| 422 | { |
| 423 | trx_rsegf_t* = trx_rsegf_get_new( |
| 424 | rseg->space->id, rseg->page_no, mtr); |
| 425 | |
| 426 | if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) { |
| 427 | trx_id_t id = mach_read_from_8(rseg_header |
| 428 | + TRX_RSEG_MAX_TRX_ID); |
| 429 | |
| 430 | if (id > max_trx_id) { |
| 431 | max_trx_id = id; |
| 432 | } |
| 433 | |
| 434 | if (rseg_header[TRX_RSEG_BINLOG_NAME]) { |
| 435 | const char* binlog_name = reinterpret_cast<const char*> |
| 436 | (rseg_header) + TRX_RSEG_BINLOG_NAME; |
| 437 | compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof |
| 438 | trx_sys.recovered_binlog_filename); |
| 439 | |
| 440 | int cmp = *trx_sys.recovered_binlog_filename |
| 441 | ? strncmp(binlog_name, |
| 442 | trx_sys.recovered_binlog_filename, |
| 443 | TRX_RSEG_BINLOG_NAME_LEN) |
| 444 | : 1; |
| 445 | |
| 446 | if (cmp >= 0) { |
| 447 | uint64_t binlog_offset = mach_read_from_8( |
| 448 | rseg_header + TRX_RSEG_BINLOG_OFFSET); |
| 449 | if (cmp) { |
| 450 | memcpy(trx_sys. |
| 451 | recovered_binlog_filename, |
| 452 | binlog_name, |
| 453 | TRX_RSEG_BINLOG_NAME_LEN); |
| 454 | trx_sys.recovered_binlog_offset |
| 455 | = binlog_offset; |
| 456 | } else if (binlog_offset > |
| 457 | trx_sys.recovered_binlog_offset) { |
| 458 | trx_sys.recovered_binlog_offset |
| 459 | = binlog_offset; |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | #ifdef WITH_WSREP |
| 464 | trx_rseg_read_wsrep_checkpoint( |
| 465 | rseg_header, trx_sys.recovered_wsrep_xid); |
| 466 | #endif |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | if (srv_operation == SRV_OPERATION_RESTORE) { |
| 471 | /* mariabackup --prepare only deals with |
| 472 | the redo log and the data files, not with |
| 473 | transactions or the data dictionary. */ |
| 474 | return; |
| 475 | } |
| 476 | |
| 477 | /* Initialize the undo log lists according to the rseg header */ |
| 478 | |
| 479 | rseg->curr_size = mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE) |
| 480 | + 1 + trx_undo_lists_init(rseg, max_trx_id, rseg_header); |
| 481 | |
| 482 | if (ulint len = flst_get_len(rseg_header + TRX_RSEG_HISTORY)) { |
| 483 | trx_sys.history_add(int32(len)); |
| 484 | |
| 485 | fil_addr_t node_addr = trx_purge_get_log_from_hist( |
| 486 | flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr)); |
| 487 | |
| 488 | rseg->last_page_no = node_addr.page; |
| 489 | rseg->last_offset = node_addr.boffset; |
| 490 | |
| 491 | const trx_ulogf_t* undo_log_hdr = trx_undo_page_get( |
| 492 | page_id_t(rseg->space->id, node_addr.page), mtr) |
| 493 | + node_addr.boffset; |
| 494 | |
| 495 | trx_id_t id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_ID); |
| 496 | if (id > max_trx_id) { |
| 497 | max_trx_id = id; |
| 498 | } |
| 499 | id = mach_read_from_8(undo_log_hdr + TRX_UNDO_TRX_NO); |
| 500 | if (id > max_trx_id) { |
| 501 | max_trx_id = id; |
| 502 | } |
| 503 | unsigned purge = mach_read_from_2( |
| 504 | undo_log_hdr + TRX_UNDO_NEEDS_PURGE); |
| 505 | ut_ad(purge <= 1); |
| 506 | rseg->set_last_trx_no(id, purge != 0); |
| 507 | rseg->needs_purge = purge != 0; |
| 508 | |
| 509 | if (rseg->last_page_no != FIL_NULL) { |
| 510 | |
| 511 | /* There is no need to cover this operation by the purge |
| 512 | mutex because we are still bootstrapping. */ |
| 513 | purge_sys.purge_queue.push(*rseg); |
| 514 | } |
| 515 | } |
| 516 | } |
| 517 | |
| 518 | /** Read binlog metadata from the TRX_SYS page, in case we are upgrading |
| 519 | from MySQL or a MariaDB version older than 10.3.5. */ |
| 520 | static void trx_rseg_init_binlog_info(const page_t* page) |
| 521 | { |
| 522 | if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO |
| 523 | + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD |
| 524 | + page) |
| 525 | == TRX_SYS_MYSQL_LOG_MAGIC_N) { |
| 526 | memcpy(trx_sys.recovered_binlog_filename, |
| 527 | TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME |
| 528 | + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN); |
| 529 | trx_sys.recovered_binlog_offset = mach_read_from_8( |
| 530 | TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET |
| 531 | + TRX_SYS + page); |
| 532 | } |
| 533 | |
| 534 | #ifdef WITH_WSREP |
| 535 | trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid); |
| 536 | #endif |
| 537 | } |
| 538 | |
| 539 | /** Initialize the rollback segments in memory at database startup. */ |
| 540 | void |
| 541 | trx_rseg_array_init() |
| 542 | { |
| 543 | trx_id_t max_trx_id = 0; |
| 544 | |
| 545 | *trx_sys.recovered_binlog_filename = '\0'; |
| 546 | trx_sys.recovered_binlog_offset = 0; |
| 547 | #ifdef WITH_WSREP |
| 548 | memset(&trx_sys.recovered_wsrep_xid, 0, |
| 549 | sizeof trx_sys.recovered_wsrep_xid); |
| 550 | trx_sys.recovered_wsrep_xid.formatID = -1; |
| 551 | #endif |
| 552 | |
| 553 | for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) { |
| 554 | mtr_t mtr; |
| 555 | mtr.start(); |
| 556 | if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) { |
| 557 | if (rseg_id == 0) { |
| 558 | /* In case this is an upgrade from |
| 559 | before MariaDB 10.3.5, fetch the base |
| 560 | information from the TRX_SYS page. */ |
| 561 | max_trx_id = mach_read_from_8( |
| 562 | TRX_SYS + TRX_SYS_TRX_ID_STORE |
| 563 | + sys->frame); |
| 564 | trx_rseg_init_binlog_info(sys->frame); |
| 565 | } |
| 566 | |
| 567 | const uint32_t page_no = trx_sysf_rseg_get_page_no( |
| 568 | sys, rseg_id); |
| 569 | if (page_no != FIL_NULL) { |
| 570 | trx_rseg_t* rseg = trx_rseg_mem_create( |
| 571 | rseg_id, |
| 572 | fil_space_get(trx_sysf_rseg_get_space( |
| 573 | sys, rseg_id)), |
| 574 | page_no); |
| 575 | ut_ad(rseg->is_persistent()); |
| 576 | ut_ad(rseg->id == rseg_id); |
| 577 | ut_ad(!trx_sys.rseg_array[rseg_id]); |
| 578 | trx_sys.rseg_array[rseg_id] = rseg; |
| 579 | trx_rseg_mem_restore(rseg, max_trx_id, &mtr); |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | mtr.commit(); |
| 584 | } |
| 585 | |
| 586 | trx_sys.init_max_trx_id(max_trx_id + 1); |
| 587 | } |
| 588 | |
| 589 | /** Create a persistent rollback segment. |
| 590 | @param[in] space_id system or undo tablespace id |
| 591 | @return pointer to new rollback segment |
| 592 | @retval NULL on failure */ |
| 593 | trx_rseg_t* |
| 594 | trx_rseg_create(ulint space_id) |
| 595 | { |
| 596 | trx_rseg_t* rseg = NULL; |
| 597 | mtr_t mtr; |
| 598 | |
| 599 | mtr.start(); |
| 600 | |
| 601 | /* To obey the latching order, acquire the file space |
| 602 | x-latch before the trx_sys.mutex. */ |
| 603 | fil_space_t* space = mtr_x_lock_space(space_id, &mtr); |
| 604 | ut_ad(space->purpose == FIL_TYPE_TABLESPACE); |
| 605 | |
| 606 | if (buf_block_t* = trx_sysf_get(&mtr)) { |
| 607 | ulint rseg_id = trx_sys_rseg_find_free(sys_header); |
| 608 | ulint page_no = rseg_id == ULINT_UNDEFINED |
| 609 | ? FIL_NULL |
| 610 | : trx_rseg_header_create(space, rseg_id, sys_header, |
| 611 | &mtr); |
| 612 | if (page_no != FIL_NULL) { |
| 613 | ut_ad(trx_sysf_rseg_get_space(sys_header, rseg_id) |
| 614 | == space_id); |
| 615 | rseg = trx_rseg_mem_create(rseg_id, space, page_no); |
| 616 | ut_ad(rseg->id == rseg_id); |
| 617 | ut_ad(rseg->is_persistent()); |
| 618 | ut_ad(!trx_sys.rseg_array[rseg->id]); |
| 619 | trx_sys.rseg_array[rseg->id] = rseg; |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | mtr.commit(); |
| 624 | |
| 625 | return(rseg); |
| 626 | } |
| 627 | |
| 628 | /** Create the temporary rollback segments. */ |
| 629 | void |
| 630 | trx_temp_rseg_create() |
| 631 | { |
| 632 | mtr_t mtr; |
| 633 | |
| 634 | for (ulong i = 0; i < TRX_SYS_N_RSEGS; i++) { |
| 635 | mtr.start(); |
| 636 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
| 637 | mtr_x_lock(&fil_system.temp_space->latch, &mtr); |
| 638 | |
| 639 | ulint page_no = trx_rseg_header_create( |
| 640 | fil_system.temp_space, i, NULL, &mtr); |
| 641 | trx_rseg_t* rseg = trx_rseg_mem_create( |
| 642 | i, fil_system.temp_space, page_no); |
| 643 | ut_ad(!rseg->is_persistent()); |
| 644 | ut_ad(!trx_sys.temp_rsegs[i]); |
| 645 | trx_sys.temp_rsegs[i] = rseg; |
| 646 | mtr.commit(); |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | /******************************************************************** |
| 651 | Get the number of unique rollback tablespaces in use except space id 0. |
| 652 | The last space id will be the sentinel value ULINT_UNDEFINED. The array |
| 653 | will be sorted on space id. Note: space_ids should have have space for |
| 654 | TRX_SYS_N_RSEGS + 1 elements. |
| 655 | @return number of unique rollback tablespaces in use. */ |
| 656 | ulint |
| 657 | trx_rseg_get_n_undo_tablespaces( |
| 658 | /*============================*/ |
| 659 | ulint* space_ids) /*!< out: array of space ids of |
| 660 | UNDO tablespaces */ |
| 661 | { |
| 662 | mtr_t mtr; |
| 663 | mtr.start(); |
| 664 | |
| 665 | buf_block_t* = trx_sysf_get(&mtr, false); |
| 666 | if (!sys_header) { |
| 667 | mtr.commit(); |
| 668 | return 0; |
| 669 | } |
| 670 | |
| 671 | ulint* end = space_ids; |
| 672 | |
| 673 | for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) { |
| 674 | uint32_t page_no = trx_sysf_rseg_get_page_no(sys_header, |
| 675 | rseg_id); |
| 676 | |
| 677 | if (page_no == FIL_NULL) { |
| 678 | continue; |
| 679 | } |
| 680 | |
| 681 | if (ulint space = trx_sysf_rseg_get_space(sys_header, |
| 682 | rseg_id)) { |
| 683 | if (std::find(space_ids, end, space) == end) { |
| 684 | *end++ = space; |
| 685 | } |
| 686 | } |
| 687 | } |
| 688 | |
| 689 | mtr.commit(); |
| 690 | |
| 691 | ut_a(end - space_ids <= TRX_SYS_N_RSEGS); |
| 692 | *end = ULINT_UNDEFINED; |
| 693 | |
| 694 | std::sort(space_ids, end); |
| 695 | |
| 696 | return ulint(end - space_ids); |
| 697 | } |
| 698 | |
| 699 | /** Update the offset information about the end of the binlog entry |
| 700 | which corresponds to the transaction just being committed. |
| 701 | In a replication slave, this updates the master binlog position |
| 702 | up to which replication has proceeded. |
| 703 | @param[in,out] rseg_header rollback segment header |
| 704 | @param[in] trx committing transaction |
| 705 | @param[in,out] mtr mini-transaction */ |
| 706 | void |
| 707 | trx_rseg_update_binlog_offset(byte* , const trx_t* trx, mtr_t* mtr) |
| 708 | { |
| 709 | DBUG_LOG("trx" , "trx_mysql_binlog_offset: " << trx->mysql_log_offset); |
| 710 | |
| 711 | const size_t len = strlen(trx->mysql_log_file_name) + 1; |
| 712 | |
| 713 | ut_ad(len > 1); |
| 714 | |
| 715 | if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) { |
| 716 | return; |
| 717 | } |
| 718 | |
| 719 | mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET, |
| 720 | trx->mysql_log_offset, mtr); |
| 721 | byte* p = rseg_header + TRX_RSEG_BINLOG_NAME; |
| 722 | const byte* binlog_name = reinterpret_cast<const byte*> |
| 723 | (trx->mysql_log_file_name); |
| 724 | |
| 725 | if (memcmp(binlog_name, p, len)) { |
| 726 | mlog_write_string(p, binlog_name, len, mtr); |
| 727 | } |
| 728 | } |
| 729 | |