1/*****************************************************************************
2
3Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file row/row0vers.cc
22Row versions
23
24Created 2/6/1997 Heikki Tuuri
25*******************************************************/
26
27#include "ha_prototypes.h"
28
29#include "row0vers.h"
30#include "dict0dict.h"
31#include "dict0boot.h"
32#include "btr0btr.h"
33#include "mach0data.h"
34#include "trx0rseg.h"
35#include "trx0trx.h"
36#include "trx0roll.h"
37#include "trx0undo.h"
38#include "trx0purge.h"
39#include "trx0rec.h"
40#include "que0que.h"
41#include "row0row.h"
42#include "row0upd.h"
43#include "rem0cmp.h"
44#include "lock0lock.h"
45#include "row0mysql.h"
46
47/** Check whether all non-virtual index fields are equal.
48@param[in] index the secondary index
49@param[in] a first index entry to compare
50@param[in] b second index entry to compare
51@return whether all non-virtual fields are equal */
52static
53bool
54row_vers_non_virtual_fields_equal(
55 const dict_index_t* index,
56 const dfield_t* a,
57 const dfield_t* b)
58{
59 const dict_field_t* end = &index->fields[index->n_fields];
60
61 for (const dict_field_t* ifield = index->fields; ifield != end;
62 ifield++) {
63 if (!ifield->col->is_virtual()
64 && cmp_dfield_dfield(a++, b++)) {
65 return false;
66 }
67 }
68
69 return true;
70}
71
72/** Determine if an active transaction has inserted or modified a secondary
73index record.
74@param[in,out] caller_trx trx of current thread
75@param[in] clust_rec clustered index record
76@param[in] clust_index clustered index
77@param[in] rec secondary index record
78@param[in] index secondary index
79@param[in] offsets rec_get_offsets(rec, index)
80@param[in,out] mtr mini-transaction
81@return the active transaction; trx->release_reference() must be invoked
82@retval NULL if the record was committed */
83UNIV_INLINE
84trx_t*
85row_vers_impl_x_locked_low(
86 trx_t* caller_trx,
87 const rec_t* clust_rec,
88 dict_index_t* clust_index,
89 const rec_t* rec,
90 dict_index_t* index,
91 const ulint* offsets,
92 mtr_t* mtr)
93{
94 trx_id_t trx_id;
95 ulint comp;
96 ulint rec_del;
97 const rec_t* version;
98 rec_t* prev_version = NULL;
99 ulint* clust_offsets;
100 mem_heap_t* heap;
101 dtuple_t* ientry = NULL;
102 mem_heap_t* v_heap = NULL;
103 const dtuple_t* cur_vrow = NULL;
104
105 DBUG_ENTER("row_vers_impl_x_locked_low");
106
107 ut_ad(rec_offs_validate(rec, index, offsets));
108
109 if (ulint trx_id_offset = clust_index->trx_id_offset) {
110 trx_id = mach_read_from_6(clust_rec + trx_id_offset);
111 if (trx_id == 0) {
112 /* The transaction history was already purged. */
113 DBUG_RETURN(0);
114 }
115 }
116
117 heap = mem_heap_create(1024);
118
119 clust_offsets = rec_get_offsets(
120 clust_rec, clust_index, NULL, true, ULINT_UNDEFINED, &heap);
121
122 trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
123 if (trx_id == 0) {
124 /* The transaction history was already purged. */
125 mem_heap_free(heap);
126 DBUG_RETURN(0);
127 }
128
129 trx_t* trx = trx_sys.find(caller_trx, trx_id);
130
131 if (trx == 0) {
132 /* The transaction that modified or inserted clust_rec is no
133 longer active, or it is corrupt: no implicit lock on rec */
134 lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, clust_offsets);
135 mem_heap_free(heap);
136 DBUG_RETURN(0);
137 }
138
139 comp = page_rec_is_comp(rec);
140 ut_ad(index->table == clust_index->table);
141 ut_ad(!!comp == dict_table_is_comp(index->table));
142 ut_ad(!comp == !page_rec_is_comp(clust_rec));
143
144 rec_del = rec_get_deleted_flag(rec, comp);
145
146 if (dict_index_has_virtual(index)) {
147 ulint n_ext;
148 ulint est_size = DTUPLE_EST_ALLOC(index->n_fields);
149
150 /* Allocate the dtuple for virtual columns extracted from undo
151 log with its own heap, so to avoid it being freed as we
152 iterating in the version loop below. */
153 v_heap = mem_heap_create(est_size);
154 ientry = row_rec_to_index_entry(
155 rec, index, offsets, &n_ext, v_heap);
156 }
157
158 /* We look up if some earlier version, which was modified by
159 the trx_id transaction, of the clustered index record would
160 require rec to be in a different state (delete marked or
161 unmarked, or have different field values, or not existing). If
162 there is such a version, then rec was modified by the trx_id
163 transaction, and it has an implicit x-lock on rec. Note that
164 if clust_rec itself would require rec to be in a different
165 state, then the trx_id transaction has not yet had time to
166 modify rec, and does not necessarily have an implicit x-lock
167 on rec. */
168
169 for (version = clust_rec;; version = prev_version) {
170 row_ext_t* ext;
171 dtuple_t* row;
172 dtuple_t* entry;
173 ulint vers_del;
174 trx_id_t prev_trx_id;
175 mem_heap_t* old_heap = heap;
176 const dtuple_t* vrow = NULL;
177
178 /* We keep the semaphore in mtr on the clust_rec page, so
179 that no other transaction can update it and get an
180 implicit x-lock on rec until mtr_commit(mtr). */
181
182 heap = mem_heap_create(1024);
183
184 trx_undo_prev_version_build(
185 clust_rec, mtr, version, clust_index, clust_offsets,
186 heap, &prev_version, NULL,
187 dict_index_has_virtual(index) ? &vrow : NULL, 0);
188
189 /* The oldest visible clustered index version must not be
190 delete-marked, because we never start a transaction by
191 inserting a delete-marked record. */
192 ut_ad(prev_version
193 || !rec_get_deleted_flag(version, comp)
194 || !trx_sys.is_registered(caller_trx, trx_id));
195
196 /* Free version and clust_offsets. */
197 mem_heap_free(old_heap);
198
199 if (prev_version == NULL) {
200
201 /* We reached the oldest visible version without
202 finding an older version of clust_rec that would
203 match the secondary index record. If the secondary
204 index record is not delete marked, then clust_rec
205 is considered the correct match of the secondary
206 index record and hence holds the implicit lock. */
207
208 if (rec_del) {
209 /* The secondary index record is del marked.
210 So, the implicit lock holder of clust_rec
211 did not modify the secondary index record yet,
212 and is not holding an implicit lock on it.
213
214 This assumes that whenever a row is inserted
215 or updated, the leaf page record always is
216 created with a clear delete-mark flag.
217 (We never insert a delete-marked record.) */
218 trx->release_reference();
219 trx = 0;
220 }
221
222 break;
223 }
224
225 clust_offsets = rec_get_offsets(
226 prev_version, clust_index, NULL, true,
227 ULINT_UNDEFINED, &heap);
228
229 vers_del = rec_get_deleted_flag(prev_version, comp);
230
231 prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
232 clust_offsets);
233
234 /* The stack of versions is locked by mtr. Thus, it
235 is safe to fetch the prefixes for externally stored
236 columns. */
237
238 row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
239 clust_offsets,
240 NULL, NULL, NULL, &ext, heap);
241
242 if (dict_index_has_virtual(index)) {
243 if (vrow) {
244 /* Keep the virtual row info for the next
245 version */
246 cur_vrow = dtuple_copy(vrow, v_heap);
247 dtuple_dup_v_fld(cur_vrow, v_heap);
248 }
249
250 if (!cur_vrow) {
251 /* Build index entry out of row */
252 entry = row_build_index_entry(row, ext, index,
253 heap);
254
255 /* entry could only be NULL (the
256 clustered index record could contain
257 BLOB pointers that are NULL) if we
258 were accessing a freshly inserted
259 record before it was fully inserted.
260 prev_version cannot possibly be such
261 an incomplete record, because its
262 transaction would have to be committed
263 in order for later versions of the
264 record to be able to exist. */
265 ut_ad(entry);
266
267 /* If the indexed virtual columns has changed,
268 there must be log record to generate vrow.
269 Otherwise, it is not changed, so no need
270 to compare */
271 if (!row_vers_non_virtual_fields_equal(
272 index,
273 ientry->fields, entry->fields)) {
274 if (rec_del != vers_del) {
275 break;
276 }
277 } else if (!rec_del) {
278 break;
279 }
280
281 goto result_check;
282 } else {
283 ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
284 dtuple_copy_v_fields(row, cur_vrow);
285 }
286 }
287
288 entry = row_build_index_entry(row, ext, index, heap);
289
290 /* entry could only be NULL (the clustered index
291 record could contain BLOB pointers that are NULL) if
292 we were accessing a freshly inserted record before it
293 was fully inserted. prev_version cannot possibly be
294 such an incomplete record, because its transaction
295 would have to be committed in order for later versions
296 of the record to be able to exist. */
297 ut_ad(entry);
298
299 /* If we get here, we know that the trx_id transaction
300 modified prev_version. Let us check if prev_version
301 would require rec to be in a different state. */
302
303 /* The previous version of clust_rec must be
304 accessible, because clust_rec was not a fresh insert.
305 There is no guarantee that the transaction is still
306 active. */
307
308 /* We check if entry and rec are identified in the alphabetical
309 ordering */
310 if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
311 /* The delete marks of rec and prev_version should be
312 equal for rec to be in the state required by
313 prev_version */
314
315 if (rec_del != vers_del) {
316
317 break;
318 }
319
320 /* It is possible that the row was updated so that the
321 secondary index record remained the same in
322 alphabetical ordering, but the field values changed
323 still. For example, 'abc' -> 'ABC'. Check also that. */
324
325 dtuple_set_types_binary(
326 entry, dtuple_get_n_fields(entry));
327
328 if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
329
330 break;
331 }
332
333 } else if (!rec_del) {
334 /* The delete mark should be set in rec for it to be
335 in the state required by prev_version */
336
337 break;
338 }
339
340result_check:
341 if (trx->id != prev_trx_id) {
342 /* prev_version was the first version modified by
343 the trx_id transaction: no implicit x-lock */
344
345 trx->release_reference();
346 trx = 0;
347 break;
348 }
349 }
350
351 DBUG_PRINT("info", ("Implicit lock is held by trx:" TRX_ID_FMT, trx_id));
352
353 if (v_heap != NULL) {
354 mem_heap_free(v_heap);
355 }
356
357 mem_heap_free(heap);
358 DBUG_RETURN(trx);
359}
360
361/** Determine if an active transaction has inserted or modified a secondary
362index record.
363@param[in,out] caller_trx trx of current thread
364@param[in] rec secondary index record
365@param[in] index secondary index
366@param[in] offsets rec_get_offsets(rec, index)
367@return the active transaction; trx->release_reference() must be invoked
368@retval NULL if the record was committed */
369trx_t*
370row_vers_impl_x_locked(
371 trx_t* caller_trx,
372 const rec_t* rec,
373 dict_index_t* index,
374 const ulint* offsets)
375{
376 mtr_t mtr;
377 trx_t* trx;
378 const rec_t* clust_rec;
379 dict_index_t* clust_index;
380
381 ut_ad(!lock_mutex_own());
382 ut_ad(!mutex_own(&trx_sys.mutex));
383
384 mtr_start(&mtr);
385
386 /* Search for the clustered index record. The latch on the
387 page of clust_rec locks the top of the stack of versions. The
388 bottom of the version stack is not locked; oldest versions may
389 disappear by the fact that transactions may be committed and
390 collected by the purge. This is not a problem, because we are
391 only interested in active transactions. */
392
393 clust_rec = row_get_clust_rec(
394 BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr);
395
396 if (!clust_rec) {
397 /* In a rare case it is possible that no clust rec is found
398 for a secondary index record: if in row0umod.cc
399 row_undo_mod_remove_clust_low() we have already removed the
400 clust rec, while purge is still cleaning and removing
401 secondary index records associated with earlier versions of
402 the clustered index record. In that case there cannot be
403 any implicit lock on the secondary index record, because
404 an active transaction which has modified the secondary index
405 record has also modified the clustered index record. And in
406 a rollback we always undo the modifications to secondary index
407 records before the clustered index record. */
408
409 trx = 0;
410 } else {
411 trx = row_vers_impl_x_locked_low(
412 caller_trx, clust_rec, clust_index, rec, index,
413 offsets, &mtr);
414
415 ut_ad(trx == 0 || trx->is_referenced());
416 }
417
418 mtr_commit(&mtr);
419
420 return(trx);
421}
422
423/** build virtual column value from current cluster index record data
424@param[in,out] row the cluster index row in dtuple form
425@param[in] clust_index clustered index
426@param[in] index the secondary index
427@param[in] heap heap used to build virtual dtuple */
428static
429void
430row_vers_build_clust_v_col(
431 dtuple_t* row,
432 dict_index_t* clust_index,
433 dict_index_t* index,
434 mem_heap_t* heap)
435{
436 mem_heap_t* local_heap = NULL;
437 for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
438 const dict_field_t* ind_field = dict_index_get_nth_field(
439 index, i);
440
441 if (ind_field->col->is_virtual()) {
442 const dict_v_col_t* col;
443
444 col = reinterpret_cast<const dict_v_col_t*>(
445 ind_field->col);
446
447 innobase_get_computed_value(
448 row, col, clust_index, &local_heap,
449 heap, NULL, current_thd, NULL, NULL,
450 NULL, NULL);
451 }
452 }
453
454 if (local_heap) {
455 mem_heap_free(local_heap);
456 }
457}
458/** Build latest virtual column data from undo log
459@param[in] in_purge whether this is the purge thread
460@param[in] rec clustered index record
461@param[in] clust_index clustered index
462@param[in,out] clust_offsets offsets on the clustered index record
463@param[in] index the secondary index
464@param[in] roll_ptr the rollback pointer for the purging record
465@param[in] trx_id trx id for the purging record
466@param[in,out] v_heap heap used to build vrow
467@param[out] v_row dtuple holding the virtual rows
468@param[in,out] mtr mtr holding the latch on rec */
469static
470void
471row_vers_build_cur_vrow_low(
472 bool in_purge,
473 const rec_t* rec,
474 dict_index_t* clust_index,
475 ulint* clust_offsets,
476 dict_index_t* index,
477 roll_ptr_t roll_ptr,
478 trx_id_t trx_id,
479 mem_heap_t* v_heap,
480 const dtuple_t**vrow,
481 mtr_t* mtr)
482{
483 const rec_t* version;
484 rec_t* prev_version;
485 mem_heap_t* heap = NULL;
486 ulint num_v = dict_table_get_n_v_cols(index->table);
487 const dfield_t* field;
488 ulint i;
489 bool all_filled = false;
490
491 *vrow = dtuple_create_with_vcol(v_heap, 0, num_v);
492 dtuple_init_v_fld(*vrow);
493
494 for (i = 0; i < num_v; i++) {
495 dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
496 = DATA_MISSING;
497 }
498
499 version = rec;
500
501 /* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
502 bit to search the undo log until we hit the current undo log with
503 roll_ptr */
504 const ulint status = in_purge
505 ? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE
506 : TRX_UNDO_GET_OLD_V_VALUE;
507
508 while (!all_filled) {
509 mem_heap_t* heap2 = heap;
510 heap = mem_heap_create(1024);
511 roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
512 version, clust_index, clust_offsets);
513
514 trx_undo_prev_version_build(
515 rec, mtr, version, clust_index, clust_offsets,
516 heap, &prev_version, NULL, vrow, status);
517
518 if (heap2) {
519 mem_heap_free(heap2);
520 }
521
522 if (!prev_version) {
523 /* Versions end here */
524 break;
525 }
526
527 clust_offsets = rec_get_offsets(prev_version, clust_index,
528 NULL,
529 true, ULINT_UNDEFINED, &heap);
530
531 ulint entry_len = dict_index_get_n_fields(index);
532
533 all_filled = true;
534
535 for (i = 0; i < entry_len; i++) {
536 const dict_field_t* ind_field
537 = dict_index_get_nth_field(index, i);
538 const dict_col_t* col = ind_field->col;
539
540 if (!col->is_virtual()) {
541 continue;
542 }
543
544 const dict_v_col_t* v_col
545 = reinterpret_cast<const dict_v_col_t*>(col);
546 field = dtuple_get_nth_v_field(*vrow, v_col->v_pos);
547
548 if (dfield_get_type(field)->mtype == DATA_MISSING) {
549 all_filled = false;
550 break;
551 }
552
553 }
554
555 trx_id_t rec_trx_id = row_get_rec_trx_id(
556 prev_version, clust_index, clust_offsets);
557
558 if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
559 break;
560 }
561
562 version = prev_version;
563 }
564
565 mem_heap_free(heap);
566}
567
568/** Check a virtual column value index secondary virtual index matches
569that of current cluster index record, which is recreated from information
570stored in undo log
571@param[in] in_purge called by purge thread
572@param[in] rec record in the clustered index
573@param[in] icentry the index entry built from a cluster row
574@param[in] clust_index cluster index
575@param[in] clust_offsets offsets on the cluster record
576@param[in] index the secondary index
577@param[in] ientry the secondary index entry
578@param[in] roll_ptr the rollback pointer for the purging record
579@param[in] trx_id trx id for the purging record
580@param[in,out] v_heap heap used to build virtual dtuple
581@param[in,out] v_row dtuple holding the virtual rows (if needed)
582@param[in] mtr mtr holding the latch on rec
583@return true if matches, false otherwise */
584static
585bool
586row_vers_vc_matches_cluster(
587 bool in_purge,
588 const rec_t* rec,
589 const dtuple_t* icentry,
590 dict_index_t* clust_index,
591 ulint* clust_offsets,
592 dict_index_t* index,
593 const dtuple_t* ientry,
594 roll_ptr_t roll_ptr,
595 trx_id_t trx_id,
596 mem_heap_t* v_heap,
597 const dtuple_t**vrow,
598 mtr_t* mtr)
599{
600 const rec_t* version;
601 rec_t* prev_version;
602 mem_heap_t* heap2;
603 mem_heap_t* heap = NULL;
604 mem_heap_t* tuple_heap;
605 ulint num_v = dict_table_get_n_v_cols(index->table);
606 bool compare[REC_MAX_N_FIELDS];
607 ulint n_fields = dtuple_get_n_fields(ientry);
608 ulint n_non_v_col = 0;
609 ulint n_cmp_v_col = 0;
610 const dfield_t* field1;
611 dfield_t* field2;
612 ulint i;
613
614 /* First compare non-virtual columns (primary keys) */
615 ut_ad(index->n_fields == n_fields);
616 ut_ad(n_fields == dtuple_get_n_fields(icentry));
617 {
618 const dfield_t* a = ientry->fields;
619 const dfield_t* b = icentry->fields;
620
621 for (const dict_field_t *ifield = index->fields,
622 *const end = &index->fields[index->n_fields];
623 ifield != end; ifield++, a++, b++) {
624 if (!ifield->col->is_virtual()) {
625 if (cmp_dfield_dfield(a, b)) {
626 return false;
627 }
628 n_non_v_col++;
629 }
630 }
631 }
632
633 tuple_heap = mem_heap_create(1024);
634
635 ut_ad(n_fields > n_non_v_col);
636
637 *vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v);
638 dtuple_init_v_fld(*vrow);
639
640 for (i = 0; i < num_v; i++) {
641 dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
642 = DATA_MISSING;
643 compare[i] = false;
644 }
645
646 version = rec;
647
648 /* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
649 bit to search the undo log until we hit the current undo log with
650 roll_ptr */
651 ulint status = (in_purge ? TRX_UNDO_PREV_IN_PURGE : 0)
652 | TRX_UNDO_GET_OLD_V_VALUE;
653
654 while (n_cmp_v_col < n_fields - n_non_v_col) {
655 heap2 = heap;
656 heap = mem_heap_create(1024);
657 roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
658 version, clust_index, clust_offsets);
659
660 ut_ad(cur_roll_ptr != 0);
661 ut_ad(in_purge == (roll_ptr != 0));
662
663 trx_undo_prev_version_build(
664 rec, mtr, version, clust_index, clust_offsets,
665 heap, &prev_version, NULL, vrow, status);
666
667 if (heap2) {
668 mem_heap_free(heap2);
669 }
670
671 if (!prev_version) {
672 /* Versions end here */
673 goto func_exit;
674 }
675
676 clust_offsets = rec_get_offsets(prev_version, clust_index,
677 NULL,
678 true, ULINT_UNDEFINED, &heap);
679
680 ulint entry_len = dict_index_get_n_fields(index);
681
682 for (i = 0; i < entry_len; i++) {
683 const dict_field_t* ind_field
684 = dict_index_get_nth_field(index, i);
685 const dict_col_t* col = ind_field->col;
686 field1 = dtuple_get_nth_field(ientry, i);
687
688 if (!col->is_virtual()) {
689 continue;
690 }
691
692 const dict_v_col_t* v_col
693 = reinterpret_cast<const dict_v_col_t*>(col);
694 field2
695 = dtuple_get_nth_v_field(*vrow, v_col->v_pos);
696
697 if ((dfield_get_type(field2)->mtype != DATA_MISSING)
698 && (!compare[v_col->v_pos])) {
699
700 if (ind_field->prefix_len != 0
701 && !dfield_is_null(field2)
702 && field2->len > ind_field->prefix_len) {
703 field2->len = ind_field->prefix_len;
704 }
705
706 /* The index field mismatch */
707 if (v_heap
708 || cmp_dfield_dfield(field2, field1) != 0) {
709 if (v_heap) {
710 dtuple_dup_v_fld(*vrow, v_heap);
711 }
712
713 mem_heap_free(tuple_heap);
714 mem_heap_free(heap);
715 return(false);
716 }
717
718 compare[v_col->v_pos] = true;
719 n_cmp_v_col++;
720 }
721 }
722
723 trx_id_t rec_trx_id = row_get_rec_trx_id(
724 prev_version, clust_index, clust_offsets);
725
726 if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
727 break;
728 }
729
730 version = prev_version;
731 }
732
733func_exit:
734 if (n_cmp_v_col == 0) {
735 *vrow = NULL;
736 }
737
738 mem_heap_free(tuple_heap);
739 mem_heap_free(heap);
740
741 /* FIXME: In the case of n_cmp_v_col is not the same as
742 n_fields - n_non_v_col, callback is needed to compare the rest
743 columns. At the timebeing, we will need to return true */
744 return (true);
745}
746
747/** Build a dtuple contains virtual column data for current cluster index
748@param[in] in_purge called by purge thread
749@param[in] rec cluster index rec
750@param[in] clust_index cluster index
751@param[in] clust_offsets cluster rec offset
752@param[in] index secondary index
753@param[in] roll_ptr roll_ptr for the purge record
754@param[in] trx_id transaction ID on the purging record
755@param[in,out] heap heap memory
756@param[in,out] v_heap heap memory to keep virtual colum dtuple
757@param[in] mtr mtr holding the latch on rec
758@return dtuple contains virtual column data */
759static
760const dtuple_t*
761row_vers_build_cur_vrow(
762 bool in_purge,
763 const rec_t* rec,
764 dict_index_t* clust_index,
765 ulint** clust_offsets,
766 dict_index_t* index,
767 roll_ptr_t roll_ptr,
768 trx_id_t trx_id,
769 mem_heap_t* heap,
770 mem_heap_t* v_heap,
771 mtr_t* mtr)
772{
773 const dtuple_t* cur_vrow = NULL;
774
775 roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
776 rec, clust_index, *clust_offsets);
777
778 /* if the row is newly inserted, then the virtual
779 columns need to be computed */
780 if (trx_undo_roll_ptr_is_insert(t_roll_ptr)) {
781
782 ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
783
784 /* This is a newly inserted record and cannot
785 be deleted, So the externally stored field
786 cannot be freed yet. */
787 dtuple_t* row = row_build(ROW_COPY_POINTERS, clust_index,
788 rec, *clust_offsets,
789 NULL, NULL, NULL, NULL, heap);
790
791 row_vers_build_clust_v_col(
792 row, clust_index, index, heap);
793 cur_vrow = dtuple_copy(row, v_heap);
794 dtuple_dup_v_fld(cur_vrow, v_heap);
795 } else {
796 /* Try to fetch virtual column data from undo log */
797 row_vers_build_cur_vrow_low(
798 in_purge, rec, clust_index, *clust_offsets,
799 index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr);
800 }
801
802 *clust_offsets = rec_get_offsets(rec, clust_index, NULL, true,
803 ULINT_UNDEFINED, &heap);
804 return(cur_vrow);
805}
806
807/*****************************************************************//**
808Finds out if a version of the record, where the version >= the current
809purge view, should have ientry as its secondary index entry. We check
810if there is any not delete marked version of the record where the trx
811id >= purge view, and the secondary index entry and ientry are identified in
812the alphabetical ordering; exactly in this case we return TRUE.
813@return TRUE if earlier version should have */
814ibool
815row_vers_old_has_index_entry(
816/*=========================*/
817 ibool also_curr,/*!< in: TRUE if also rec is included in the
818 versions to search; otherwise only versions
819 prior to it are searched */
820 const rec_t* rec, /*!< in: record in the clustered index; the
821 caller must have a latch on the page */
822 mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
823 also hold the latch on purge_view */
824 dict_index_t* index, /*!< in: the secondary index */
825 const dtuple_t* ientry, /*!< in: the secondary index entry */
826 roll_ptr_t roll_ptr,/*!< in: roll_ptr for the purge record */
827 trx_id_t trx_id) /*!< in: transaction ID on the purging record */
828{
829 const rec_t* version;
830 rec_t* prev_version;
831 dict_index_t* clust_index;
832 ulint* clust_offsets;
833 mem_heap_t* heap;
834 mem_heap_t* heap2;
835 dtuple_t* row;
836 const dtuple_t* entry;
837 ulint comp;
838 const dtuple_t* vrow = NULL;
839 mem_heap_t* v_heap = NULL;
840 const dtuple_t* cur_vrow = NULL;
841
842 ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
843 | MTR_MEMO_PAGE_S_FIX));
844 ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
845
846 clust_index = dict_table_get_first_index(index->table);
847
848 comp = page_rec_is_comp(rec);
849 ut_ad(!dict_table_is_comp(index->table) == !comp);
850 heap = mem_heap_create(1024);
851 clust_offsets = rec_get_offsets(rec, clust_index, NULL, true,
852 ULINT_UNDEFINED, &heap);
853
854 if (dict_index_has_virtual(index)) {
855 v_heap = mem_heap_create(100);
856 }
857
858 DBUG_EXECUTE_IF("ib_purge_virtual_index_crash",
859 DBUG_SUICIDE(););
860
861 if (also_curr && !rec_get_deleted_flag(rec, comp)) {
862 row_ext_t* ext;
863
864 /* The top of the stack of versions is locked by the
865 mtr holding a latch on the page containing the
866 clustered index record. The bottom of the stack is
867 locked by the fact that the purge_sys.view must
868 'overtake' any read view of an active transaction.
869 Thus, it is safe to fetch the prefixes for
870 externally stored columns. */
871 row = row_build(ROW_COPY_POINTERS, clust_index,
872 rec, clust_offsets,
873 NULL, NULL, NULL, &ext, heap);
874
875 if (dict_index_has_virtual(index)) {
876
877
878#ifdef DBUG_OFF
879# define dbug_v_purge false
880#else /* DBUG_OFF */
881 bool dbug_v_purge = false;
882#endif /* DBUG_OFF */
883
884 DBUG_EXECUTE_IF(
885 "ib_purge_virtual_index_callback",
886 dbug_v_purge = true;);
887
888 roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
889 rec, clust_index, clust_offsets);
890
891 /* if the row is newly inserted, then the virtual
892 columns need to be computed */
893 if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
894 || dbug_v_purge) {
895 row_vers_build_clust_v_col(
896 row, clust_index, index, heap);
897
898 entry = row_build_index_entry(
899 row, ext, index, heap);
900 if (entry && !dtuple_coll_cmp(ientry, entry)) {
901 goto safe_to_purge;
902 }
903 } else {
904 /* Build index entry out of row */
905 entry = row_build_index_entry(row, ext, index, heap);
906 /* entry could only be NULL if
907 the clustered index record is an uncommitted
908 inserted record whose BLOBs have not been
909 written yet. The secondary index record
910 can be safely removed, because it cannot
911 possibly refer to this incomplete
912 clustered index record. (Insert would
913 always first be completed for the
914 clustered index record, then proceed to
915 secondary indexes.) */
916
917 if (entry && row_vers_vc_matches_cluster(
918 also_curr, rec, entry,
919 clust_index, clust_offsets,
920 index, ientry, roll_ptr,
921 trx_id, NULL, &vrow, mtr)) {
922 goto safe_to_purge;
923 }
924 }
925 clust_offsets = rec_get_offsets(rec, clust_index, NULL,
926 true,
927 ULINT_UNDEFINED, &heap);
928 } else {
929
930 entry = row_build_index_entry(
931 row, ext, index, heap);
932
933 /* If entry == NULL, the record contains unset BLOB
934 pointers. This must be a freshly inserted record. If
935 this is called from
936 row_purge_remove_sec_if_poss_low(), the thread will
937 hold latches on the clustered index and the secondary
938 index. Because the insert works in three steps:
939
940 (1) insert the record to clustered index
941 (2) store the BLOBs and update BLOB pointers
942 (3) insert records to secondary indexes
943
944 the purge thread can safely ignore freshly inserted
945 records and delete the secondary index record. The
946 thread that inserted the new record will be inserting
947 the secondary index records. */
948
949 /* NOTE that we cannot do the comparison as binary
950 fields because the row is maybe being modified so that
951 the clustered index record has already been updated to
952 a different binary value in a char field, but the
953 collation identifies the old and new value anyway! */
954 if (entry && !dtuple_coll_cmp(ientry, entry)) {
955safe_to_purge:
956 mem_heap_free(heap);
957
958 if (v_heap) {
959 mem_heap_free(v_heap);
960 }
961 return(TRUE);
962 }
963 }
964 } else if (dict_index_has_virtual(index)) {
965 /* The current cluster index record could be
966 deleted, but the previous version of it might not. We will
967 need to get the virtual column data from undo record
968 associated with current cluster index */
969 cur_vrow = row_vers_build_cur_vrow(
970 also_curr, rec, clust_index, &clust_offsets,
971 index, roll_ptr, trx_id, heap, v_heap, mtr);
972 }
973
974 version = rec;
975
976 for (;;) {
977 heap2 = heap;
978 heap = mem_heap_create(1024);
979 vrow = NULL;
980
981 trx_undo_prev_version_build(rec, mtr, version,
982 clust_index, clust_offsets,
983 heap, &prev_version, NULL,
984 dict_index_has_virtual(index)
985 ? &vrow : NULL, 0);
986 mem_heap_free(heap2); /* free version and clust_offsets */
987
988 if (!prev_version) {
989 /* Versions end here */
990
991 mem_heap_free(heap);
992
993 if (v_heap) {
994 mem_heap_free(v_heap);
995 }
996
997 return(FALSE);
998 }
999
1000 clust_offsets = rec_get_offsets(prev_version, clust_index,
1001 NULL, true,
1002 ULINT_UNDEFINED, &heap);
1003
1004 if (dict_index_has_virtual(index)) {
1005 if (vrow) {
1006 /* Keep the virtual row info for the next
1007 version, unless it is changed */
1008 mem_heap_empty(v_heap);
1009 cur_vrow = dtuple_copy(vrow, v_heap);
1010 dtuple_dup_v_fld(cur_vrow, v_heap);
1011 }
1012
1013 if (!cur_vrow) {
1014 /* Nothing for this index has changed,
1015 continue */
1016 version = prev_version;
1017 continue;
1018 }
1019 }
1020
1021 if (!rec_get_deleted_flag(prev_version, comp)) {
1022 row_ext_t* ext;
1023
1024 /* The stack of versions is locked by mtr.
1025 Thus, it is safe to fetch the prefixes for
1026 externally stored columns. */
1027 row = row_build(ROW_COPY_POINTERS, clust_index,
1028 prev_version, clust_offsets,
1029 NULL, NULL, NULL, &ext, heap);
1030
1031 if (dict_index_has_virtual(index)) {
1032 ut_ad(cur_vrow);
1033 ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
1034 dtuple_copy_v_fields(row, cur_vrow);
1035 }
1036
1037 entry = row_build_index_entry(row, ext, index, heap);
1038
1039 /* If entry == NULL, the record contains unset
1040 BLOB pointers. This must be a freshly
1041 inserted record that we can safely ignore.
1042 For the justification, see the comments after
1043 the previous row_build_index_entry() call. */
1044
1045 /* NOTE that we cannot do the comparison as binary
1046 fields because maybe the secondary index record has
1047 already been updated to a different binary value in
1048 a char field, but the collation identifies the old
1049 and new value anyway! */
1050
1051 if (entry && !dtuple_coll_cmp(ientry, entry)) {
1052 goto safe_to_purge;
1053 }
1054 }
1055
1056 version = prev_version;
1057 }
1058}
1059
1060/*****************************************************************//**
1061Constructs the version of a clustered index record which a consistent
1062read should see. We assume that the trx id stored in rec is such that
1063the consistent read should not see rec in its present version.
1064@return DB_SUCCESS or DB_MISSING_HISTORY */
1065dberr_t
1066row_vers_build_for_consistent_read(
1067/*===============================*/
1068 const rec_t* rec, /*!< in: record in a clustered index; the
1069 caller must have a latch on the page; this
1070 latch locks the top of the stack of versions
1071 of this records */
1072 mtr_t* mtr, /*!< in: mtr holding the latch on rec */
1073 dict_index_t* index, /*!< in: the clustered index */
1074 ulint** offsets,/*!< in/out: offsets returned by
1075 rec_get_offsets(rec, index) */
1076 ReadView* view, /*!< in: the consistent read view */
1077 mem_heap_t** offset_heap,/*!< in/out: memory heap from which
1078 the offsets are allocated */
1079 mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
1080 *old_vers is allocated; memory for possible
1081 intermediate versions is allocated and freed
1082 locally within the function */
1083 rec_t** old_vers,/*!< out, own: old version, or NULL
1084 if the history is missing or the record
1085 does not exist in the view, that is,
1086 it was freshly inserted afterwards */
1087 const dtuple_t**vrow) /*!< out: virtual row */
1088{
1089 const rec_t* version;
1090 rec_t* prev_version;
1091 trx_id_t trx_id;
1092 mem_heap_t* heap = NULL;
1093 byte* buf;
1094 dberr_t err;
1095
1096 ut_ad(dict_index_is_clust(index));
1097 ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
1098 | MTR_MEMO_PAGE_S_FIX));
1099 ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
1100
1101 ut_ad(rec_offs_validate(rec, index, *offsets));
1102
1103 trx_id = row_get_rec_trx_id(rec, index, *offsets);
1104
1105 ut_ad(!view->changes_visible(trx_id, index->table->name));
1106
1107 ut_ad(!vrow || !(*vrow));
1108
1109 version = rec;
1110
1111 for (;;) {
1112 mem_heap_t* prev_heap = heap;
1113
1114 heap = mem_heap_create(1024);
1115
1116 if (vrow) {
1117 *vrow = NULL;
1118 }
1119
1120 /* If purge can't see the record then we can't rely on
1121 the UNDO log record. */
1122
1123 bool purge_sees = trx_undo_prev_version_build(
1124 rec, mtr, version, index, *offsets, heap,
1125 &prev_version, NULL, vrow, 0);
1126
1127 err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY;
1128
1129 if (prev_heap != NULL) {
1130 mem_heap_free(prev_heap);
1131 }
1132
1133 if (prev_version == NULL) {
1134 /* It was a freshly inserted version */
1135 *old_vers = NULL;
1136 ut_ad(!vrow || !(*vrow));
1137 break;
1138 }
1139
1140 *offsets = rec_get_offsets(
1141 prev_version, index, *offsets,
1142 true, ULINT_UNDEFINED, offset_heap);
1143
1144#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
1145 ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
1146#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1147
1148 trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
1149
1150 if (view->changes_visible(trx_id, index->table->name)) {
1151
1152 /* The view already sees this version: we can copy
1153 it to in_heap and return */
1154
1155 buf = static_cast<byte*>(
1156 mem_heap_alloc(
1157 in_heap, rec_offs_size(*offsets)));
1158
1159 *old_vers = rec_copy(buf, prev_version, *offsets);
1160 rec_offs_make_valid(*old_vers, index, true, *offsets);
1161
1162 if (vrow && *vrow) {
1163 *vrow = dtuple_copy(*vrow, in_heap);
1164 dtuple_dup_v_fld(*vrow, in_heap);
1165 }
1166 break;
1167 }
1168
1169 version = prev_version;
1170 }
1171
1172 mem_heap_free(heap);
1173
1174 return(err);
1175}
1176
1177/*****************************************************************//**
1178Constructs the last committed version of a clustered index record,
1179which should be seen by a semi-consistent read. */
1180void
1181row_vers_build_for_semi_consistent_read(
1182/*====================================*/
1183 trx_t* caller_trx,/*!<in/out: trx of current thread */
1184 const rec_t* rec, /*!< in: record in a clustered index; the
1185 caller must have a latch on the page; this
1186 latch locks the top of the stack of versions
1187 of this records */
1188 mtr_t* mtr, /*!< in: mtr holding the latch on rec */
1189 dict_index_t* index, /*!< in: the clustered index */
1190 ulint** offsets,/*!< in/out: offsets returned by
1191 rec_get_offsets(rec, index) */
1192 mem_heap_t** offset_heap,/*!< in/out: memory heap from which
1193 the offsets are allocated */
1194 mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
1195 *old_vers is allocated; memory for possible
1196 intermediate versions is allocated and freed
1197 locally within the function */
1198 const rec_t** old_vers,/*!< out: rec, old version, or NULL if the
1199 record does not exist in the view, that is,
1200 it was freshly inserted afterwards */
1201 const dtuple_t** vrow) /*!< out: virtual row, old version, or NULL
1202 if it is not updated in the view */
1203{
1204 const rec_t* version;
1205 mem_heap_t* heap = NULL;
1206 byte* buf;
1207 trx_id_t rec_trx_id = 0;
1208
1209 ut_ad(dict_index_is_clust(index));
1210 ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX
1211 | MTR_MEMO_PAGE_S_FIX));
1212 ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
1213
1214 ut_ad(rec_offs_validate(rec, index, *offsets));
1215
1216 version = rec;
1217 ut_ad(!vrow || !(*vrow));
1218
1219 for (;;) {
1220 mem_heap_t* heap2;
1221 rec_t* prev_version;
1222 trx_id_t version_trx_id;
1223
1224 version_trx_id = row_get_rec_trx_id(version, index, *offsets);
1225 if (rec == version) {
1226 rec_trx_id = version_trx_id;
1227 }
1228
1229 if (!trx_sys.is_registered(caller_trx, version_trx_id)) {
1230committed_version_trx:
1231 /* We found a version that belongs to a
1232 committed transaction: return it. */
1233
1234#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
1235 ut_a(!rec_offs_any_null_extern(version, *offsets));
1236#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1237
1238 if (rec == version) {
1239 *old_vers = rec;
1240 if (vrow) {
1241 *vrow = NULL;
1242 }
1243 break;
1244 }
1245
1246 /* We assume that a rolled-back transaction stays in
1247 TRX_STATE_ACTIVE state until all the changes have been
1248 rolled back and the transaction is removed from
1249 the global list of transactions. */
1250
1251 if (rec_trx_id == version_trx_id) {
1252 /* The transaction was committed while
1253 we searched for earlier versions.
1254 Return the current version as a
1255 semi-consistent read. */
1256
1257 version = rec;
1258 *offsets = rec_get_offsets(version,
1259 index, *offsets,
1260 true,
1261 ULINT_UNDEFINED,
1262 offset_heap);
1263 }
1264
1265 buf = static_cast<byte*>(
1266 mem_heap_alloc(
1267 in_heap, rec_offs_size(*offsets)));
1268
1269 *old_vers = rec_copy(buf, version, *offsets);
1270 rec_offs_make_valid(*old_vers, index, true, *offsets);
1271 if (vrow && *vrow) {
1272 *vrow = dtuple_copy(*vrow, in_heap);
1273 dtuple_dup_v_fld(*vrow, in_heap);
1274 }
1275 break;
1276 }
1277
1278 DEBUG_SYNC_C("after_row_vers_check_trx_active");
1279
1280 heap2 = heap;
1281 heap = mem_heap_create(1024);
1282
1283 if (!trx_undo_prev_version_build(rec, mtr, version, index,
1284 *offsets, heap,
1285 &prev_version,
1286 in_heap, vrow, 0)) {
1287 mem_heap_free(heap);
1288 heap = heap2;
1289 heap2 = NULL;
1290 goto committed_version_trx;
1291 }
1292
1293 if (heap2) {
1294 mem_heap_free(heap2); /* free version */
1295 }
1296
1297 if (prev_version == NULL) {
1298 /* It was a freshly inserted version */
1299 *old_vers = NULL;
1300 ut_ad(!vrow || !(*vrow));
1301 break;
1302 }
1303
1304 version = prev_version;
1305 *offsets = rec_get_offsets(version, index, *offsets, true,
1306 ULINT_UNDEFINED, offset_heap);
1307#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
1308 ut_a(!rec_offs_any_null_extern(version, *offsets));
1309#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1310 }/* for (;;) */
1311
1312 if (heap) {
1313 mem_heap_free(heap);
1314 }
1315}
1316