1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file row/row0umod.cc |
22 | Undo modify of a row |
23 | |
24 | Created 2/27/1997 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #include "ha_prototypes.h" |
28 | |
29 | #include "row0umod.h" |
30 | #include "dict0dict.h" |
31 | #include "dict0stats.h" |
32 | #include "dict0boot.h" |
33 | #include "trx0undo.h" |
34 | #include "trx0roll.h" |
35 | #include "trx0purge.h" |
36 | #include "btr0btr.h" |
37 | #include "mach0data.h" |
38 | #include "ibuf0ibuf.h" |
39 | #include "row0undo.h" |
40 | #include "row0vers.h" |
41 | #include "row0log.h" |
42 | #include "trx0trx.h" |
43 | #include "trx0rec.h" |
44 | #include "row0row.h" |
45 | #include "row0upd.h" |
46 | #include "que0que.h" |
47 | #include "log0log.h" |
48 | |
49 | /* Considerations on undoing a modify operation. |
50 | (1) Undoing a delete marking: all index records should be found. Some of |
51 | them may have delete mark already FALSE, if the delete mark operation was |
52 | stopped underway, or if the undo operation ended prematurely because of a |
53 | system crash. |
54 | (2) Undoing an update of a delete unmarked record: the newer version of |
55 | an updated secondary index entry should be removed if no prior version |
56 | of the clustered index record requires its existence. Otherwise, it should |
57 | be delete marked. |
58 | (3) Undoing an update of a delete marked record. In this kind of update a |
59 | delete marked clustered index record was delete unmarked and possibly also |
60 | some of its fields were changed. Now, it is possible that the delete marked |
61 | version has become obsolete at the time the undo is started. */ |
62 | |
63 | /************************************************************************* |
64 | IMPORTANT NOTE: Any operation that generates redo MUST check that there |
65 | is enough space in the redo log before for that operation. This is |
66 | done by calling log_free_check(). The reason for checking the |
67 | availability of the redo log space before the start of the operation is |
68 | that we MUST not hold any synchonization objects when performing the |
69 | check. |
70 | If you make a change in this module make sure that no codepath is |
71 | introduced where a call to log_free_check() is bypassed. */ |
72 | |
73 | /***********************************************************//** |
74 | Undoes a modify in a clustered index record. |
75 | @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ |
76 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
77 | dberr_t |
78 | row_undo_mod_clust_low( |
79 | /*===================*/ |
80 | undo_node_t* node, /*!< in: row undo node */ |
81 | ulint** offsets,/*!< out: rec_get_offsets() on the record */ |
82 | mem_heap_t** offsets_heap, |
83 | /*!< in/out: memory heap that can be emptied */ |
84 | mem_heap_t* heap, /*!< in/out: memory heap */ |
85 | const dtuple_t**rebuilt_old_pk, |
86 | /*!< out: row_log_table_get_pk() |
87 | before the update, or NULL if |
88 | the table is not being rebuilt online or |
89 | the PRIMARY KEY definition does not change */ |
90 | byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR |
91 | for row_log_table_delete() */ |
92 | que_thr_t* thr, /*!< in: query thread */ |
93 | mtr_t* mtr, /*!< in: mtr; must be committed before |
94 | latching any further pages */ |
95 | ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ |
96 | { |
97 | btr_pcur_t* pcur; |
98 | btr_cur_t* btr_cur; |
99 | dberr_t err; |
100 | #ifdef UNIV_DEBUG |
101 | ibool success; |
102 | #endif /* UNIV_DEBUG */ |
103 | |
104 | pcur = &node->pcur; |
105 | btr_cur = btr_pcur_get_btr_cur(pcur); |
106 | |
107 | #ifdef UNIV_DEBUG |
108 | success = |
109 | #endif /* UNIV_DEBUG */ |
110 | btr_pcur_restore_position(mode, pcur, mtr); |
111 | |
112 | ut_ad(success); |
113 | ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), |
114 | btr_cur_get_index(btr_cur)) |
115 | == thr_get_trx(thr)->id); |
116 | |
117 | if (mode != BTR_MODIFY_LEAF |
118 | && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) { |
119 | *rebuilt_old_pk = row_log_table_get_pk( |
120 | btr_cur_get_rec(btr_cur), |
121 | btr_cur_get_index(btr_cur), NULL, sys, &heap); |
122 | } else { |
123 | *rebuilt_old_pk = NULL; |
124 | } |
125 | |
126 | if (mode != BTR_MODIFY_TREE) { |
127 | ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) |
128 | == BTR_MODIFY_LEAF); |
129 | |
130 | err = btr_cur_optimistic_update( |
131 | BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG |
132 | | BTR_KEEP_SYS_FLAG, |
133 | btr_cur, offsets, offsets_heap, |
134 | node->update, node->cmpl_info, |
135 | thr, thr_get_trx(thr)->id, mtr); |
136 | } else { |
137 | big_rec_t* dummy_big_rec; |
138 | |
139 | err = btr_cur_pessimistic_update( |
140 | BTR_NO_LOCKING_FLAG |
141 | | BTR_NO_UNDO_LOG_FLAG |
142 | | BTR_KEEP_SYS_FLAG, |
143 | btr_cur, offsets, offsets_heap, heap, |
144 | &dummy_big_rec, node->update, |
145 | node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); |
146 | |
147 | ut_a(!dummy_big_rec); |
148 | } |
149 | |
150 | return(err); |
151 | } |
152 | |
153 | /** Get the byte offset of the DB_TRX_ID column |
154 | @param[in] rec clustered index record |
155 | @param[in] index clustered index |
156 | @return the byte offset of DB_TRX_ID, from the start of rec */ |
157 | static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index) |
158 | { |
159 | ut_ad(index->n_uniq <= MAX_REF_PARTS); |
160 | ulint trx_id_offset = index->trx_id_offset; |
161 | if (!trx_id_offset) { |
162 | /* Reserve enough offsets for the PRIMARY KEY and 2 columns |
163 | so that we can access DB_TRX_ID, DB_ROLL_PTR. */ |
164 | ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; |
165 | rec_offs_init(offsets_); |
166 | mem_heap_t* heap = NULL; |
167 | const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; |
168 | ulint* offsets = rec_get_offsets(rec, index, offsets_, true, |
169 | trx_id_pos + 1, &heap); |
170 | ut_ad(!heap); |
171 | ulint len; |
172 | trx_id_offset = rec_get_nth_field_offs( |
173 | offsets, trx_id_pos, &len); |
174 | ut_ad(len == DATA_TRX_ID_LEN); |
175 | } |
176 | |
177 | return trx_id_offset; |
178 | } |
179 | |
180 | /** Determine if rollback must execute a purge-like operation. |
181 | @param[in,out] node row undo |
182 | @param[in,out] mtr mini-transaction |
183 | @return whether the record should be purged */ |
184 | static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr) |
185 | { |
186 | ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); |
187 | ut_ad(!node->table->is_temporary()); |
188 | |
189 | btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur); |
190 | ut_ad(btr_cur->index->is_primary()); |
191 | |
192 | mtr_s_lock(&purge_sys.latch, mtr); |
193 | |
194 | if (!purge_sys.view.changes_visible(node->new_trx_id, |
195 | node->table->name)) { |
196 | return false; |
197 | } |
198 | |
199 | const rec_t* rec = btr_cur_get_rec(btr_cur); |
200 | |
201 | return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index)) |
202 | == node->new_trx_id; |
203 | } |
204 | |
205 | /***********************************************************//** |
206 | Undoes a modify in a clustered index record. Sets also the node state for the |
207 | next round of undo. |
208 | @return DB_SUCCESS or error code: we may run out of file space */ |
209 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
210 | dberr_t |
211 | row_undo_mod_clust( |
212 | /*===============*/ |
213 | undo_node_t* node, /*!< in: row undo node */ |
214 | que_thr_t* thr) /*!< in: query thread */ |
215 | { |
216 | btr_pcur_t* pcur; |
217 | mtr_t mtr; |
218 | dberr_t err; |
219 | dict_index_t* index; |
220 | bool online; |
221 | |
222 | ut_ad(thr_get_trx(thr) == node->trx); |
223 | ut_ad(node->trx->dict_operation_lock_mode); |
224 | ut_ad(node->trx->in_rollback); |
225 | ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S) |
226 | || rw_lock_own(dict_operation_lock, RW_LOCK_X)); |
227 | |
228 | log_free_check(); |
229 | pcur = &node->pcur; |
230 | index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur)); |
231 | ut_ad(index->is_primary()); |
232 | |
233 | mtr.start(); |
234 | if (index->table->is_temporary()) { |
235 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
236 | } else { |
237 | index->set_modified(mtr); |
238 | } |
239 | |
240 | online = dict_index_is_online_ddl(index); |
241 | if (online) { |
242 | ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH); |
243 | mtr_s_lock(dict_index_get_lock(index), &mtr); |
244 | } |
245 | |
246 | mem_heap_t* heap = mem_heap_create(1024); |
247 | mem_heap_t* offsets_heap = NULL; |
248 | ulint* offsets = NULL; |
249 | const dtuple_t* rebuilt_old_pk; |
250 | byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]; |
251 | |
252 | /* Try optimistic processing of the record, keeping changes within |
253 | the index page */ |
254 | |
255 | err = row_undo_mod_clust_low(node, &offsets, &offsets_heap, |
256 | heap, &rebuilt_old_pk, sys, |
257 | thr, &mtr, online |
258 | ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED |
259 | : BTR_MODIFY_LEAF); |
260 | |
261 | if (err != DB_SUCCESS) { |
262 | btr_pcur_commit_specify_mtr(pcur, &mtr); |
263 | |
264 | /* We may have to modify tree structure: do a pessimistic |
265 | descent down the index tree */ |
266 | |
267 | mtr.start(); |
268 | if (index->table->is_temporary()) { |
269 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
270 | } else { |
271 | index->set_modified(mtr); |
272 | } |
273 | |
274 | err = row_undo_mod_clust_low( |
275 | node, &offsets, &offsets_heap, |
276 | heap, &rebuilt_old_pk, sys, |
277 | thr, &mtr, BTR_MODIFY_TREE); |
278 | ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); |
279 | } |
280 | |
281 | /* Online rebuild cannot be initiated while we are holding |
282 | dict_operation_lock and index->lock. (It can be aborted.) */ |
283 | ut_ad(online || !dict_index_is_online_ddl(index)); |
284 | |
285 | if (err == DB_SUCCESS && online) { |
286 | |
287 | ut_ad(rw_lock_own_flagged( |
288 | &index->lock, |
289 | RW_LOCK_FLAG_S | RW_LOCK_FLAG_X |
290 | | RW_LOCK_FLAG_SX)); |
291 | |
292 | switch (node->rec_type) { |
293 | case TRX_UNDO_DEL_MARK_REC: |
294 | row_log_table_insert( |
295 | btr_pcur_get_rec(pcur), index, offsets); |
296 | break; |
297 | case TRX_UNDO_UPD_EXIST_REC: |
298 | row_log_table_update( |
299 | btr_pcur_get_rec(pcur), index, offsets, |
300 | rebuilt_old_pk); |
301 | break; |
302 | case TRX_UNDO_UPD_DEL_REC: |
303 | row_log_table_delete( |
304 | btr_pcur_get_rec(pcur), index, offsets, sys); |
305 | break; |
306 | default: |
307 | ut_ad(0); |
308 | break; |
309 | } |
310 | } |
311 | |
312 | /** |
313 | * when scrubbing, and records gets cleared, |
314 | * the transaction id is not present afterwards. |
315 | * this is safe as: since the record is on free-list |
316 | * it can be reallocated at any time after this mtr-commits |
317 | * which is just below |
318 | */ |
319 | ut_ad(srv_immediate_scrub_data_uncompressed |
320 | || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets) |
321 | == node->new_trx_id); |
322 | |
323 | btr_pcur_commit_specify_mtr(pcur, &mtr); |
324 | |
325 | if (err != DB_SUCCESS) { |
326 | goto func_exit; |
327 | } |
328 | |
329 | /* FIXME: Perform the below operations in the above |
330 | mini-transaction when possible. */ |
331 | |
332 | if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { |
333 | /* In delete-marked records, DB_TRX_ID must |
334 | always refer to an existing update_undo log record. */ |
335 | ut_ad(node->new_trx_id); |
336 | |
337 | mtr.start(); |
338 | if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { |
339 | goto mtr_commit_exit; |
340 | } |
341 | |
342 | if (index->table->is_temporary()) { |
343 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
344 | } else { |
345 | if (!row_undo_mod_must_purge(node, &mtr)) { |
346 | goto mtr_commit_exit; |
347 | } |
348 | index->set_modified(mtr); |
349 | } |
350 | |
351 | ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), |
352 | dict_table_is_comp(node->table))); |
353 | if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) { |
354 | goto mtr_commit_exit; |
355 | } |
356 | |
357 | btr_pcur_commit_specify_mtr(pcur, &mtr); |
358 | |
359 | mtr.start(); |
360 | if (!btr_pcur_restore_position( |
361 | BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, |
362 | pcur, &mtr)) { |
363 | goto mtr_commit_exit; |
364 | } |
365 | |
366 | if (index->table->is_temporary()) { |
367 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
368 | } else { |
369 | if (!row_undo_mod_must_purge(node, &mtr)) { |
370 | goto mtr_commit_exit; |
371 | } |
372 | index->set_modified(mtr); |
373 | } |
374 | |
375 | ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), |
376 | dict_table_is_comp(node->table))); |
377 | |
378 | /* This operation is analogous to purge, we can free |
379 | also inherited externally stored fields. We can also |
380 | assume that the record was complete (including BLOBs), |
381 | because it had been delete-marked after it had been |
382 | completely inserted. Therefore, we are passing |
383 | rollback=false, just like purge does. */ |
384 | btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0, |
385 | false, &mtr); |
386 | ut_ad(err == DB_SUCCESS |
387 | || err == DB_OUT_OF_FILE_SPACE); |
388 | } else if (!index->table->is_temporary() && node->new_trx_id) { |
389 | /* We rolled back a record so that it still exists. |
390 | We must reset the DB_TRX_ID if the history is no |
391 | longer accessible by any active read view. */ |
392 | |
393 | mtr.start(); |
394 | if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { |
395 | goto mtr_commit_exit; |
396 | } |
397 | rec_t* rec = btr_pcur_get_rec(pcur); |
398 | mtr_s_lock(&purge_sys.latch, &mtr); |
399 | if (!purge_sys.view.changes_visible(node->new_trx_id, |
400 | node->table->name)) { |
401 | goto mtr_commit_exit; |
402 | } |
403 | |
404 | ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; |
405 | ut_ad(index->n_uniq <= MAX_REF_PARTS); |
406 | /* Reserve enough offsets for the PRIMARY KEY and 2 columns |
407 | so that we can access DB_TRX_ID, DB_ROLL_PTR. */ |
408 | ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; |
409 | rec_offs_init(offsets_); |
410 | offsets = rec_get_offsets( |
411 | rec, index, offsets_, true, trx_id_pos + 2, &heap); |
412 | ulint len; |
413 | ulint trx_id_offset = rec_get_nth_field_offs( |
414 | offsets, trx_id_pos, &len); |
415 | ut_ad(len == DATA_TRX_ID_LEN); |
416 | |
417 | if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) { |
418 | ut_ad(!rec_get_deleted_flag( |
419 | rec, dict_table_is_comp(node->table))); |
420 | index->set_modified(mtr); |
421 | if (page_zip_des_t* page_zip = buf_block_get_page_zip( |
422 | btr_pcur_get_block(&node->pcur))) { |
423 | page_zip_write_trx_id_and_roll_ptr( |
424 | page_zip, rec, offsets, trx_id_pos, |
425 | 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS, |
426 | &mtr); |
427 | } else { |
428 | mlog_write_string(rec + trx_id_offset, |
429 | reset_trx_id, |
430 | sizeof reset_trx_id, &mtr); |
431 | } |
432 | } |
433 | } else { |
434 | goto func_exit; |
435 | } |
436 | |
437 | mtr_commit_exit: |
438 | btr_pcur_commit_specify_mtr(pcur, &mtr); |
439 | |
440 | func_exit: |
441 | node->state = UNDO_NODE_FETCH_NEXT; |
442 | |
443 | if (offsets_heap) { |
444 | mem_heap_free(offsets_heap); |
445 | } |
446 | mem_heap_free(heap); |
447 | return(err); |
448 | } |
449 | |
450 | /***********************************************************//** |
451 | Delete marks or removes a secondary index entry if found. |
452 | @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ |
453 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
454 | dberr_t |
455 | row_undo_mod_del_mark_or_remove_sec_low( |
456 | /*====================================*/ |
457 | undo_node_t* node, /*!< in: row undo node */ |
458 | que_thr_t* thr, /*!< in: query thread */ |
459 | dict_index_t* index, /*!< in: index */ |
460 | dtuple_t* entry, /*!< in: index entry */ |
461 | ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or |
462 | BTR_MODIFY_TREE */ |
463 | { |
464 | btr_pcur_t pcur; |
465 | btr_cur_t* btr_cur; |
466 | ibool success; |
467 | dberr_t err = DB_SUCCESS; |
468 | mtr_t mtr; |
469 | mtr_t mtr_vers; |
470 | row_search_result search_result; |
471 | const bool modify_leaf = mode == BTR_MODIFY_LEAF; |
472 | |
473 | row_mtr_start(&mtr, index, !modify_leaf); |
474 | |
475 | if (!index->is_committed()) { |
476 | /* The index->online_status may change if the index is |
477 | or was being created online, but not committed yet. It |
478 | is protected by index->lock. */ |
479 | if (modify_leaf) { |
480 | mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; |
481 | mtr_s_lock(dict_index_get_lock(index), &mtr); |
482 | } else { |
483 | ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)); |
484 | mtr_sx_lock(dict_index_get_lock(index), &mtr); |
485 | } |
486 | |
487 | if (row_log_online_op_try(index, entry, 0)) { |
488 | goto func_exit_no_pcur; |
489 | } |
490 | } else { |
491 | /* For secondary indexes, |
492 | index->online_status==ONLINE_INDEX_COMPLETE if |
493 | index->is_committed(). */ |
494 | ut_ad(!dict_index_is_online_ddl(index)); |
495 | } |
496 | |
497 | btr_cur = btr_pcur_get_btr_cur(&pcur); |
498 | |
499 | if (dict_index_is_spatial(index)) { |
500 | if (modify_leaf) { |
501 | btr_cur->thr = thr; |
502 | mode |= BTR_RTREE_DELETE_MARK; |
503 | } |
504 | mode |= BTR_RTREE_UNDO_INS; |
505 | } |
506 | |
507 | search_result = row_search_index_entry(index, entry, mode, |
508 | &pcur, &mtr); |
509 | |
510 | switch (UNIV_EXPECT(search_result, ROW_FOUND)) { |
511 | case ROW_NOT_FOUND: |
512 | /* In crash recovery, the secondary index record may |
513 | be missing if the UPDATE did not have time to insert |
514 | the secondary index records before the crash. When we |
515 | are undoing that UPDATE in crash recovery, the record |
516 | may be missing. |
517 | |
518 | In normal processing, if an update ends in a deadlock |
519 | before it has inserted all updated secondary index |
520 | records, then the undo will not find those records. */ |
521 | goto func_exit; |
522 | case ROW_FOUND: |
523 | break; |
524 | case ROW_BUFFERED: |
525 | case ROW_NOT_DELETED_REF: |
526 | /* These are invalid outcomes, because the mode passed |
527 | to row_search_index_entry() did not include any of the |
528 | flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ |
529 | ut_error; |
530 | } |
531 | |
532 | /* We should remove the index record if no prior version of the row, |
533 | which cannot be purged yet, requires its existence. If some requires, |
534 | we should delete mark the record. */ |
535 | |
536 | mtr_vers.start(); |
537 | |
538 | success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur), |
539 | &mtr_vers); |
540 | ut_a(success); |
541 | |
542 | /* For temporary table, we can skip to check older version of |
543 | clustered index entry, because there is no MVCC or purge. */ |
544 | if (node->table->is_temporary() |
545 | || row_vers_old_has_index_entry( |
546 | FALSE, btr_pcur_get_rec(&node->pcur), |
547 | &mtr_vers, index, entry, 0, 0)) { |
548 | err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, |
549 | btr_cur, TRUE, thr, &mtr); |
550 | ut_ad(err == DB_SUCCESS); |
551 | } else { |
552 | /* Remove the index record */ |
553 | |
554 | if (dict_index_is_spatial(index)) { |
555 | rec_t* rec = btr_pcur_get_rec(&pcur); |
556 | if (rec_get_deleted_flag(rec, |
557 | dict_table_is_comp(index->table))) { |
558 | ib::error() << "Record found in index " |
559 | << index->name << " is deleted marked" |
560 | " on rollback update." ; |
561 | ut_ad(0); |
562 | } |
563 | } |
564 | |
565 | if (modify_leaf) { |
566 | err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) |
567 | ? DB_SUCCESS : DB_FAIL; |
568 | } else { |
569 | /* Passing rollback=false, |
570 | because we are deleting a secondary index record: |
571 | the distinction only matters when deleting a |
572 | record that contains externally stored columns. */ |
573 | ut_ad(!index->is_primary()); |
574 | btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, |
575 | false, &mtr); |
576 | |
577 | /* The delete operation may fail if we have little |
578 | file space left: TODO: easiest to crash the database |
579 | and restart with more file space */ |
580 | } |
581 | } |
582 | |
583 | btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers); |
584 | |
585 | func_exit: |
586 | btr_pcur_close(&pcur); |
587 | func_exit_no_pcur: |
588 | mtr_commit(&mtr); |
589 | |
590 | return(err); |
591 | } |
592 | |
593 | /***********************************************************//** |
594 | Delete marks or removes a secondary index entry if found. |
595 | NOTE that if we updated the fields of a delete-marked secondary index record |
596 | so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot |
597 | return to the original values because we do not know them. But this should |
598 | not cause problems because in row0sel.cc, in queries we always retrieve the |
599 | clustered index record or an earlier version of it, if the secondary index |
600 | record through which we do the search is delete-marked. |
601 | @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ |
602 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
603 | dberr_t |
604 | row_undo_mod_del_mark_or_remove_sec( |
605 | /*================================*/ |
606 | undo_node_t* node, /*!< in: row undo node */ |
607 | que_thr_t* thr, /*!< in: query thread */ |
608 | dict_index_t* index, /*!< in: index */ |
609 | dtuple_t* entry) /*!< in: index entry */ |
610 | { |
611 | dberr_t err; |
612 | |
613 | err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, |
614 | entry, BTR_MODIFY_LEAF); |
615 | if (err == DB_SUCCESS) { |
616 | |
617 | return(err); |
618 | } |
619 | |
620 | err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, |
621 | entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE); |
622 | return(err); |
623 | } |
624 | |
625 | /***********************************************************//** |
626 | Delete unmarks a secondary index entry which must be found. It might not be |
627 | delete-marked at the moment, but it does not harm to unmark it anyway. We also |
628 | need to update the fields of the secondary index record if we updated its |
629 | fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. |
630 | @retval DB_SUCCESS on success |
631 | @retval DB_FAIL if BTR_MODIFY_TREE should be tried |
632 | @retval DB_OUT_OF_FILE_SPACE when running out of tablespace |
633 | @retval DB_DUPLICATE_KEY if the value was missing |
634 | and an insert would lead to a duplicate exists */ |
635 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
636 | dberr_t |
637 | row_undo_mod_del_unmark_sec_and_undo_update( |
638 | /*========================================*/ |
639 | ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or |
640 | BTR_MODIFY_TREE */ |
641 | que_thr_t* thr, /*!< in: query thread */ |
642 | dict_index_t* index, /*!< in: index */ |
643 | dtuple_t* entry) /*!< in: index entry */ |
644 | { |
645 | btr_pcur_t pcur; |
646 | btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); |
647 | upd_t* update; |
648 | dberr_t err = DB_SUCCESS; |
649 | big_rec_t* dummy_big_rec; |
650 | mtr_t mtr; |
651 | trx_t* trx = thr_get_trx(thr); |
652 | const ulint flags |
653 | = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; |
654 | row_search_result search_result; |
655 | ulint orig_mode = mode; |
656 | |
657 | ut_ad(trx->id != 0); |
658 | |
659 | if (dict_index_is_spatial(index)) { |
660 | /* FIXME: Currently we do a 2-pass search for the undo |
661 | due to avoid undel-mark a wrong rec in rolling back in |
662 | partial update. Later, we could log some info in |
663 | secondary index updates to avoid this. */ |
664 | ut_ad(mode & BTR_MODIFY_LEAF); |
665 | mode |= BTR_RTREE_DELETE_MARK; |
666 | } |
667 | |
668 | try_again: |
669 | row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF)); |
670 | |
671 | if (!index->is_committed()) { |
672 | /* The index->online_status may change if the index is |
673 | or was being created online, but not committed yet. It |
674 | is protected by index->lock. */ |
675 | if (mode == BTR_MODIFY_LEAF) { |
676 | mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; |
677 | mtr_s_lock(dict_index_get_lock(index), &mtr); |
678 | } else { |
679 | ut_ad(mode == BTR_MODIFY_TREE); |
680 | mtr_sx_lock(dict_index_get_lock(index), &mtr); |
681 | } |
682 | |
683 | if (row_log_online_op_try(index, entry, trx->id)) { |
684 | goto func_exit_no_pcur; |
685 | } |
686 | } else { |
687 | /* For secondary indexes, |
688 | index->online_status==ONLINE_INDEX_COMPLETE if |
689 | index->is_committed(). */ |
690 | ut_ad(!dict_index_is_online_ddl(index)); |
691 | } |
692 | |
693 | btr_cur->thr = thr; |
694 | |
695 | search_result = row_search_index_entry(index, entry, mode, |
696 | &pcur, &mtr); |
697 | |
698 | switch (search_result) { |
699 | mem_heap_t* heap; |
700 | mem_heap_t* offsets_heap; |
701 | ulint* offsets; |
702 | case ROW_BUFFERED: |
703 | case ROW_NOT_DELETED_REF: |
704 | /* These are invalid outcomes, because the mode passed |
705 | to row_search_index_entry() did not include any of the |
706 | flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ |
707 | ut_error; |
708 | case ROW_NOT_FOUND: |
709 | /* For spatial index, if first search didn't find an |
710 | undel-marked rec, try to find a del-marked rec. */ |
711 | if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { |
712 | if (mode != orig_mode) { |
713 | mode = orig_mode; |
714 | btr_pcur_close(&pcur); |
715 | mtr_commit(&mtr); |
716 | goto try_again; |
717 | } |
718 | } |
719 | |
720 | if (index->is_committed()) { |
721 | /* During online secondary index creation, it |
722 | is possible that MySQL is waiting for a |
723 | meta-data lock upgrade before invoking |
724 | ha_innobase::commit_inplace_alter_table() |
725 | while this ROLLBACK is executing. InnoDB has |
726 | finished building the index, but it does not |
727 | yet exist in MySQL. In this case, we suppress |
728 | the printout to the error log. */ |
729 | ib::warn() << "Record in index " << index->name |
730 | << " of table " << index->table->name |
731 | << " was not found on rollback, trying to" |
732 | " insert: " << *entry |
733 | << " at: " << rec_index_print( |
734 | btr_cur_get_rec(btr_cur), index); |
735 | } |
736 | |
737 | if (btr_cur->up_match >= dict_index_get_n_unique(index) |
738 | || btr_cur->low_match >= dict_index_get_n_unique(index)) { |
739 | if (index->is_committed()) { |
740 | ib::warn() << "Record in index " << index->name |
741 | << " was not found on rollback, and" |
742 | " a duplicate exists" ; |
743 | } |
744 | err = DB_DUPLICATE_KEY; |
745 | break; |
746 | } |
747 | |
748 | /* Insert the missing record that we were trying to |
749 | delete-unmark. */ |
750 | big_rec_t* big_rec; |
751 | rec_t* insert_rec; |
752 | offsets = NULL; |
753 | offsets_heap = NULL; |
754 | |
755 | err = btr_cur_optimistic_insert( |
756 | flags, btr_cur, &offsets, &offsets_heap, |
757 | entry, &insert_rec, &big_rec, |
758 | 0, thr, &mtr); |
759 | ut_ad(!big_rec); |
760 | |
761 | if (err == DB_FAIL && mode == BTR_MODIFY_TREE) { |
762 | err = btr_cur_pessimistic_insert( |
763 | flags, btr_cur, |
764 | &offsets, &offsets_heap, |
765 | entry, &insert_rec, &big_rec, |
766 | 0, thr, &mtr); |
767 | /* There are no off-page columns in |
768 | secondary indexes. */ |
769 | ut_ad(!big_rec); |
770 | } |
771 | |
772 | if (err == DB_SUCCESS) { |
773 | page_update_max_trx_id( |
774 | btr_cur_get_block(btr_cur), |
775 | btr_cur_get_page_zip(btr_cur), |
776 | trx->id, &mtr); |
777 | } |
778 | |
779 | if (offsets_heap) { |
780 | mem_heap_free(offsets_heap); |
781 | } |
782 | |
783 | break; |
784 | case ROW_FOUND: |
785 | err = btr_cur_del_mark_set_sec_rec( |
786 | BTR_NO_LOCKING_FLAG, |
787 | btr_cur, FALSE, thr, &mtr); |
788 | |
789 | ut_a(err == DB_SUCCESS); |
790 | heap = mem_heap_create( |
791 | sizeof(upd_t) |
792 | + dtuple_get_n_fields(entry) * sizeof(upd_field_t)); |
793 | offsets_heap = NULL; |
794 | offsets = rec_get_offsets( |
795 | btr_cur_get_rec(btr_cur), |
796 | index, NULL, true, ULINT_UNDEFINED, &offsets_heap); |
797 | update = row_upd_build_sec_rec_difference_binary( |
798 | btr_cur_get_rec(btr_cur), index, offsets, entry, heap); |
799 | if (upd_get_n_fields(update) == 0) { |
800 | |
801 | /* Do nothing */ |
802 | |
803 | } else if (mode != BTR_MODIFY_TREE) { |
804 | /* Try an optimistic updating of the record, keeping |
805 | changes within the page */ |
806 | |
807 | /* TODO: pass offsets, not &offsets */ |
808 | err = btr_cur_optimistic_update( |
809 | flags, btr_cur, &offsets, &offsets_heap, |
810 | update, 0, thr, thr_get_trx(thr)->id, &mtr); |
811 | switch (err) { |
812 | case DB_OVERFLOW: |
813 | case DB_UNDERFLOW: |
814 | case DB_ZIP_OVERFLOW: |
815 | err = DB_FAIL; |
816 | default: |
817 | break; |
818 | } |
819 | } else { |
820 | err = btr_cur_pessimistic_update( |
821 | flags, btr_cur, &offsets, &offsets_heap, |
822 | heap, &dummy_big_rec, |
823 | update, 0, thr, thr_get_trx(thr)->id, &mtr); |
824 | ut_a(!dummy_big_rec); |
825 | } |
826 | |
827 | mem_heap_free(heap); |
828 | mem_heap_free(offsets_heap); |
829 | } |
830 | |
831 | btr_pcur_close(&pcur); |
832 | func_exit_no_pcur: |
833 | mtr_commit(&mtr); |
834 | |
835 | return(err); |
836 | } |
837 | |
838 | /***********************************************************//** |
839 | Flags a secondary index corrupted. */ |
840 | static MY_ATTRIBUTE((nonnull)) |
841 | void |
842 | row_undo_mod_sec_flag_corrupted( |
843 | /*============================*/ |
844 | trx_t* trx, /*!< in/out: transaction */ |
845 | dict_index_t* index) /*!< in: secondary index */ |
846 | { |
847 | ut_ad(!dict_index_is_clust(index)); |
848 | |
849 | switch (trx->dict_operation_lock_mode) { |
850 | case RW_S_LATCH: |
851 | /* Because row_undo() is holding an S-latch |
852 | on the data dictionary during normal rollback, |
853 | we can only mark the index corrupted in the |
854 | data dictionary cache. TODO: fix this somehow.*/ |
855 | mutex_enter(&dict_sys->mutex); |
856 | dict_set_corrupted_index_cache_only(index); |
857 | mutex_exit(&dict_sys->mutex); |
858 | break; |
859 | default: |
860 | ut_ad(0); |
861 | /* fall through */ |
862 | case RW_X_LATCH: |
863 | /* This should be the rollback of a data dictionary |
864 | transaction. */ |
865 | dict_set_corrupted(index, trx, "rollback" ); |
866 | } |
867 | } |
868 | |
869 | /***********************************************************//** |
870 | Undoes a modify in secondary indexes when undo record type is UPD_DEL. |
871 | @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ |
872 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
873 | dberr_t |
874 | row_undo_mod_upd_del_sec( |
875 | /*=====================*/ |
876 | undo_node_t* node, /*!< in: row undo node */ |
877 | que_thr_t* thr) /*!< in: query thread */ |
878 | { |
879 | mem_heap_t* heap; |
880 | dberr_t err = DB_SUCCESS; |
881 | |
882 | ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); |
883 | ut_ad(!node->undo_row); |
884 | |
885 | heap = mem_heap_create(1024); |
886 | |
887 | while (node->index != NULL) { |
888 | dict_index_t* index = node->index; |
889 | dtuple_t* entry; |
890 | |
891 | if (index->type & DICT_FTS) { |
892 | dict_table_next_uncorrupted_index(node->index); |
893 | continue; |
894 | } |
895 | |
896 | /* During online index creation, |
897 | HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk |
898 | should guarantee that any active transaction has not modified |
899 | indexed columns such that col->ord_part was 0 at the |
900 | time when the undo log record was written. When we get |
901 | to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, |
902 | it should always cover all affected indexes. */ |
903 | entry = row_build_index_entry( |
904 | node->row, node->ext, index, heap); |
905 | |
906 | if (UNIV_UNLIKELY(!entry)) { |
907 | /* The database must have crashed after |
908 | inserting a clustered index record but before |
909 | writing all the externally stored columns of |
910 | that record. Because secondary index entries |
911 | are inserted after the clustered index record, |
912 | we may assume that the secondary index record |
913 | does not exist. However, this situation may |
914 | only occur during the rollback of incomplete |
915 | transactions. */ |
916 | ut_a(thr_is_recv(thr)); |
917 | } else { |
918 | err = row_undo_mod_del_mark_or_remove_sec( |
919 | node, thr, index, entry); |
920 | |
921 | if (UNIV_UNLIKELY(err != DB_SUCCESS)) { |
922 | |
923 | break; |
924 | } |
925 | } |
926 | |
927 | mem_heap_empty(heap); |
928 | dict_table_next_uncorrupted_index(node->index); |
929 | } |
930 | |
931 | mem_heap_free(heap); |
932 | |
933 | return(err); |
934 | } |
935 | |
936 | /***********************************************************//** |
937 | Undoes a modify in secondary indexes when undo record type is DEL_MARK. |
938 | @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ |
939 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
940 | dberr_t |
941 | row_undo_mod_del_mark_sec( |
942 | /*======================*/ |
943 | undo_node_t* node, /*!< in: row undo node */ |
944 | que_thr_t* thr) /*!< in: query thread */ |
945 | { |
946 | mem_heap_t* heap; |
947 | dberr_t err = DB_SUCCESS; |
948 | |
949 | ut_ad(!node->undo_row); |
950 | |
951 | heap = mem_heap_create(1024); |
952 | |
953 | while (node->index != NULL) { |
954 | dict_index_t* index = node->index; |
955 | dtuple_t* entry; |
956 | |
957 | if (index->type == DICT_FTS) { |
958 | dict_table_next_uncorrupted_index(node->index); |
959 | continue; |
960 | } |
961 | |
962 | /* During online index creation, |
963 | HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK |
964 | should guarantee that any active transaction has not modified |
965 | indexed columns such that col->ord_part was 0 at the |
966 | time when the undo log record was written. When we get |
967 | to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, |
968 | it should always cover all affected indexes. */ |
969 | entry = row_build_index_entry( |
970 | node->row, node->ext, index, heap); |
971 | |
972 | ut_a(entry); |
973 | |
974 | err = row_undo_mod_del_unmark_sec_and_undo_update( |
975 | BTR_MODIFY_LEAF, thr, index, entry); |
976 | if (err == DB_FAIL) { |
977 | err = row_undo_mod_del_unmark_sec_and_undo_update( |
978 | BTR_MODIFY_TREE, thr, index, entry); |
979 | } |
980 | |
981 | if (err == DB_DUPLICATE_KEY) { |
982 | row_undo_mod_sec_flag_corrupted( |
983 | thr_get_trx(thr), index); |
984 | err = DB_SUCCESS; |
985 | /* Do not return any error to the caller. The |
986 | duplicate will be reported by ALTER TABLE or |
987 | CREATE UNIQUE INDEX. Unfortunately we cannot |
988 | report the duplicate key value to the DDL |
989 | thread, because the altered_table object is |
990 | private to its call stack. */ |
991 | } else if (err != DB_SUCCESS) { |
992 | break; |
993 | } |
994 | |
995 | mem_heap_empty(heap); |
996 | dict_table_next_uncorrupted_index(node->index); |
997 | } |
998 | |
999 | mem_heap_free(heap); |
1000 | |
1001 | return(err); |
1002 | } |
1003 | |
1004 | /***********************************************************//** |
1005 | Undoes a modify in secondary indexes when undo record type is UPD_EXIST. |
1006 | @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ |
1007 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
1008 | dberr_t |
1009 | row_undo_mod_upd_exist_sec( |
1010 | /*=======================*/ |
1011 | undo_node_t* node, /*!< in: row undo node */ |
1012 | que_thr_t* thr) /*!< in: query thread */ |
1013 | { |
1014 | mem_heap_t* heap; |
1015 | dberr_t err = DB_SUCCESS; |
1016 | |
1017 | if (node->index == NULL |
1018 | || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) { |
1019 | /* No change in secondary indexes */ |
1020 | |
1021 | return(err); |
1022 | } |
1023 | |
1024 | heap = mem_heap_create(1024); |
1025 | |
1026 | |
1027 | while (node->index != NULL) { |
1028 | dict_index_t* index = node->index; |
1029 | dtuple_t* entry; |
1030 | |
1031 | if (dict_index_is_spatial(index)) { |
1032 | if (!row_upd_changes_ord_field_binary_func( |
1033 | index, node->update, |
1034 | #ifdef UNIV_DEBUG |
1035 | thr, |
1036 | #endif /* UNIV_DEBUG */ |
1037 | node->row, |
1038 | node->ext, ROW_BUILD_FOR_UNDO)) { |
1039 | dict_table_next_uncorrupted_index(node->index); |
1040 | continue; |
1041 | } |
1042 | } else { |
1043 | if (index->type == DICT_FTS |
1044 | || !row_upd_changes_ord_field_binary(index, |
1045 | node->update, |
1046 | thr, node->row, |
1047 | node->ext)) { |
1048 | dict_table_next_uncorrupted_index(node->index); |
1049 | continue; |
1050 | } |
1051 | } |
1052 | |
1053 | /* Build the newest version of the index entry */ |
1054 | entry = row_build_index_entry(node->row, node->ext, |
1055 | index, heap); |
1056 | if (UNIV_UNLIKELY(!entry)) { |
1057 | /* The server must have crashed in |
1058 | row_upd_clust_rec_by_insert() before |
1059 | the updated externally stored columns (BLOBs) |
1060 | of the new clustered index entry were written. */ |
1061 | |
1062 | /* The table must be in DYNAMIC or COMPRESSED |
1063 | format. REDUNDANT and COMPACT formats |
1064 | store a local 768-byte prefix of each |
1065 | externally stored column. */ |
1066 | ut_a(dict_table_has_atomic_blobs(index->table)); |
1067 | |
1068 | /* This is only legitimate when |
1069 | rolling back an incomplete transaction |
1070 | after crash recovery. */ |
1071 | ut_a(thr_get_trx(thr)->is_recovered); |
1072 | |
1073 | /* The server must have crashed before |
1074 | completing the insert of the new |
1075 | clustered index entry and before |
1076 | inserting to the secondary indexes. |
1077 | Because node->row was not yet written |
1078 | to this index, we can ignore it. But |
1079 | we must restore node->undo_row. */ |
1080 | } else { |
1081 | /* NOTE that if we updated the fields of a |
1082 | delete-marked secondary index record so that |
1083 | alphabetically they stayed the same, e.g., |
1084 | 'abc' -> 'aBc', we cannot return to the |
1085 | original values because we do not know them. |
1086 | But this should not cause problems because |
1087 | in row0sel.cc, in queries we always retrieve |
1088 | the clustered index record or an earlier |
1089 | version of it, if the secondary index record |
1090 | through which we do the search is |
1091 | delete-marked. */ |
1092 | |
1093 | err = row_undo_mod_del_mark_or_remove_sec( |
1094 | node, thr, index, entry); |
1095 | if (err != DB_SUCCESS) { |
1096 | break; |
1097 | } |
1098 | } |
1099 | |
1100 | mem_heap_empty(heap); |
1101 | /* We may have to update the delete mark in the |
1102 | secondary index record of the previous version of |
1103 | the row. We also need to update the fields of |
1104 | the secondary index record if we updated its fields |
1105 | but alphabetically they stayed the same, e.g., |
1106 | 'abc' -> 'aBc'. */ |
1107 | if (dict_index_is_spatial(index)) { |
1108 | entry = row_build_index_entry_low(node->undo_row, |
1109 | node->undo_ext, |
1110 | index, heap, |
1111 | ROW_BUILD_FOR_UNDO); |
1112 | } else { |
1113 | entry = row_build_index_entry(node->undo_row, |
1114 | node->undo_ext, |
1115 | index, heap); |
1116 | } |
1117 | |
1118 | ut_a(entry); |
1119 | |
1120 | err = row_undo_mod_del_unmark_sec_and_undo_update( |
1121 | BTR_MODIFY_LEAF, thr, index, entry); |
1122 | if (err == DB_FAIL) { |
1123 | err = row_undo_mod_del_unmark_sec_and_undo_update( |
1124 | BTR_MODIFY_TREE, thr, index, entry); |
1125 | } |
1126 | |
1127 | if (err == DB_DUPLICATE_KEY) { |
1128 | row_undo_mod_sec_flag_corrupted( |
1129 | thr_get_trx(thr), index); |
1130 | err = DB_SUCCESS; |
1131 | } else if (err != DB_SUCCESS) { |
1132 | break; |
1133 | } |
1134 | |
1135 | mem_heap_empty(heap); |
1136 | dict_table_next_uncorrupted_index(node->index); |
1137 | } |
1138 | |
1139 | mem_heap_free(heap); |
1140 | |
1141 | return(err); |
1142 | } |
1143 | |
1144 | /***********************************************************//** |
1145 | Parses the row reference and other info in a modify undo log record. */ |
1146 | static MY_ATTRIBUTE((nonnull)) |
1147 | void |
1148 | row_undo_mod_parse_undo_rec( |
1149 | /*========================*/ |
1150 | undo_node_t* node, /*!< in: row undo node */ |
1151 | ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ |
1152 | { |
1153 | dict_index_t* clust_index; |
1154 | byte* ptr; |
1155 | undo_no_t undo_no; |
1156 | table_id_t table_id; |
1157 | trx_id_t trx_id; |
1158 | roll_ptr_t roll_ptr; |
1159 | ulint info_bits; |
1160 | ulint type; |
1161 | ulint cmpl_info; |
1162 | bool dummy_extern; |
1163 | |
1164 | ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, |
1165 | &dummy_extern, &undo_no, &table_id); |
1166 | node->rec_type = type; |
1167 | |
1168 | node->table = dict_table_open_on_id( |
1169 | table_id, dict_locked, DICT_TABLE_OP_NORMAL); |
1170 | |
1171 | /* TODO: other fixes associated with DROP TABLE + rollback in the |
1172 | same table by another user */ |
1173 | |
1174 | if (node->table == NULL) { |
1175 | /* Table was dropped */ |
1176 | return; |
1177 | } |
1178 | |
1179 | ut_ad(!node->table->skip_alter_undo); |
1180 | |
1181 | if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) { |
1182 | close_table: |
1183 | /* Normally, tables should not disappear or become |
1184 | unaccessible during ROLLBACK, because they should be |
1185 | protected by InnoDB table locks. TRUNCATE TABLE |
1186 | or table corruption could be valid exceptions. |
1187 | |
1188 | FIXME: When running out of temporary tablespace, it |
1189 | would probably be better to just drop all temporary |
1190 | tables (and temporary undo log records) of the current |
1191 | connection, instead of doing this rollback. */ |
1192 | dict_table_close(node->table, dict_locked, FALSE); |
1193 | node->table = NULL; |
1194 | return; |
1195 | } |
1196 | |
1197 | clust_index = dict_table_get_first_index(node->table); |
1198 | |
1199 | ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, |
1200 | &info_bits); |
1201 | |
1202 | ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), |
1203 | node->heap); |
1204 | |
1205 | ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, |
1206 | roll_ptr, info_bits, |
1207 | node->heap, &(node->update)); |
1208 | node->new_trx_id = trx_id; |
1209 | node->cmpl_info = cmpl_info; |
1210 | ut_ad(!node->ref->info_bits); |
1211 | |
1212 | if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) { |
1213 | /* This must be an undo log record for a subsequent |
1214 | instant ADD COLUMN on a table, extending the |
1215 | 'default value' record. */ |
1216 | ut_ad(clust_index->is_instant()); |
1217 | if (node->update->info_bits != REC_INFO_MIN_REC_FLAG) { |
1218 | ut_ad(!"wrong info_bits in undo log record" ); |
1219 | goto close_table; |
1220 | } |
1221 | node->update->info_bits = REC_INFO_DEFAULT_ROW; |
1222 | const_cast<dtuple_t*>(node->ref)->info_bits |
1223 | = REC_INFO_DEFAULT_ROW; |
1224 | } |
1225 | |
1226 | if (!row_undo_search_clust_to_pcur(node)) { |
1227 | /* As long as this rolling-back transaction exists, |
1228 | the PRIMARY KEY value pointed to by the undo log |
1229 | record should exist. |
1230 | |
1231 | However, if InnoDB is killed during a rollback, or |
1232 | shut down during the rollback of recovered |
1233 | transactions, then after restart we may try to roll |
1234 | back some of the same undo log records again, because |
1235 | trx_roll_try_truncate() is not being invoked after |
1236 | every undo log record. |
1237 | |
1238 | It is also possible that the record |
1239 | was not modified yet (the DB_ROLL_PTR does not match |
1240 | node->roll_ptr) and thus there is nothing to roll back. |
1241 | |
1242 | btr_cur_upd_lock_and_undo() only writes the undo log |
1243 | record after successfully acquiring an exclusive lock |
1244 | on the the clustered index record. That lock will not |
1245 | be released before the transaction is committed or |
1246 | fully rolled back. (Exception: if the server was |
1247 | killed, restarted, and shut down again before the |
1248 | rollback of the recovered transaction was completed, |
1249 | it is possible that the transaction was partially |
1250 | rolled back and locks released.) */ |
1251 | goto close_table; |
1252 | } |
1253 | |
1254 | /* Extract indexed virtual columns from undo log */ |
1255 | if (node->table->n_v_cols) { |
1256 | row_upd_replace_vcol(node->row, node->table, |
1257 | node->update, false, node->undo_row, |
1258 | (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) |
1259 | ? NULL : ptr); |
1260 | } |
1261 | } |
1262 | |
1263 | /***********************************************************//** |
1264 | Undoes a modify operation on a row of a table. |
1265 | @return DB_SUCCESS or error code */ |
1266 | dberr_t |
1267 | row_undo_mod( |
1268 | /*=========*/ |
1269 | undo_node_t* node, /*!< in: row undo node */ |
1270 | que_thr_t* thr) /*!< in: query thread */ |
1271 | { |
1272 | dberr_t err; |
1273 | ibool dict_locked; |
1274 | |
1275 | ut_ad(node != NULL); |
1276 | ut_ad(thr != NULL); |
1277 | ut_ad(node->state == UNDO_NODE_MODIFY); |
1278 | ut_ad(node->trx->in_rollback); |
1279 | ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); |
1280 | |
1281 | dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH; |
1282 | |
1283 | ut_ad(thr_get_trx(thr) == node->trx); |
1284 | |
1285 | row_undo_mod_parse_undo_rec(node, dict_locked); |
1286 | |
1287 | if (node->table == NULL) { |
1288 | /* It is already undone, or will be undone by another query |
1289 | thread, or table was dropped */ |
1290 | |
1291 | node->state = UNDO_NODE_FETCH_NEXT; |
1292 | |
1293 | return(DB_SUCCESS); |
1294 | } |
1295 | |
1296 | node->index = dict_table_get_first_index(node->table); |
1297 | ut_ad(dict_index_is_clust(node->index)); |
1298 | |
1299 | if (node->ref->info_bits) { |
1300 | ut_ad(node->ref->info_bits == REC_INFO_DEFAULT_ROW); |
1301 | goto rollback_clust; |
1302 | } |
1303 | |
1304 | /* Skip the clustered index (the first index) */ |
1305 | node->index = dict_table_get_next_index(node->index); |
1306 | |
1307 | /* Skip all corrupted secondary index */ |
1308 | dict_table_skip_corrupt_index(node->index); |
1309 | |
1310 | switch (node->rec_type) { |
1311 | case TRX_UNDO_UPD_EXIST_REC: |
1312 | err = row_undo_mod_upd_exist_sec(node, thr); |
1313 | break; |
1314 | case TRX_UNDO_DEL_MARK_REC: |
1315 | err = row_undo_mod_del_mark_sec(node, thr); |
1316 | break; |
1317 | case TRX_UNDO_UPD_DEL_REC: |
1318 | err = row_undo_mod_upd_del_sec(node, thr); |
1319 | break; |
1320 | default: |
1321 | ut_error; |
1322 | err = DB_ERROR; |
1323 | } |
1324 | |
1325 | if (err == DB_SUCCESS) { |
1326 | rollback_clust: |
1327 | err = row_undo_mod_clust(node, thr); |
1328 | |
1329 | bool update_statistics |
1330 | = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); |
1331 | |
1332 | if (err == DB_SUCCESS && node->table->stat_initialized) { |
1333 | switch (node->rec_type) { |
1334 | case TRX_UNDO_UPD_EXIST_REC: |
1335 | break; |
1336 | case TRX_UNDO_DEL_MARK_REC: |
1337 | dict_table_n_rows_inc(node->table); |
1338 | update_statistics = update_statistics |
1339 | || !srv_stats_include_delete_marked; |
1340 | break; |
1341 | case TRX_UNDO_UPD_DEL_REC: |
1342 | dict_table_n_rows_dec(node->table); |
1343 | update_statistics = update_statistics |
1344 | || !srv_stats_include_delete_marked; |
1345 | break; |
1346 | } |
1347 | |
1348 | /* Do not attempt to update statistics when |
1349 | executing ROLLBACK in the InnoDB SQL |
1350 | interpreter, because in that case we would |
1351 | already be holding dict_sys->mutex, which |
1352 | would be acquired when updating statistics. */ |
1353 | if (update_statistics && !dict_locked) { |
1354 | dict_stats_update_if_needed(node->table); |
1355 | } else { |
1356 | node->table->stat_modified_counter++; |
1357 | } |
1358 | } |
1359 | } |
1360 | |
1361 | dict_table_close(node->table, dict_locked, FALSE); |
1362 | |
1363 | node->table = NULL; |
1364 | |
1365 | return(err); |
1366 | } |
1367 | |