1/*****************************************************************************
2
3Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file trx/trx0rec.cc
22Transaction undo log record
23
24Created 3/26/1996 Heikki Tuuri
25*******************************************************/
26
27#include "trx0rec.h"
28#include "fsp0fsp.h"
29#include "mach0data.h"
30#include "trx0undo.h"
31#include "mtr0log.h"
32#include "dict0dict.h"
33#include "ut0mem.h"
34#include "row0ext.h"
35#include "row0upd.h"
36#include "que0que.h"
37#include "trx0purge.h"
38#include "trx0rseg.h"
39#include "row0row.h"
40#include "fsp0sysspace.h"
41#include "row0mysql.h"
42
43/** The search tuple corresponding to TRX_UNDO_INSERT_DEFAULT */
44const dtuple_t trx_undo_default_rec = {
45 REC_INFO_DEFAULT_ROW, 0, 0,
46 NULL, 0, NULL,
47 UT_LIST_NODE_T(dtuple_t)()
48#ifdef UNIV_DEBUG
49 , DATA_TUPLE_MAGIC_N
50#endif /* UNIV_DEBUG */
51};
52
53/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
54
55/** Write redo log of writing an undo log record.
56@param[in] undo_block undo log page
57@param[in] old_free start offset of the undo log record
58@param[in] new_free end offset of the undo log record
59@param[in,out] mtr mini-transaction */
60static void trx_undof_page_add_undo_rec_log(const buf_block_t* undo_block,
61 ulint old_free, ulint new_free,
62 mtr_t* mtr)
63{
64 ut_ad(old_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
65 ut_ad(new_free >= old_free);
66 ut_ad(new_free < srv_page_size);
67 ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
68 + undo_block->frame)
69 == new_free);
70 mtr->set_modified();
71 switch (mtr->get_log_mode()) {
72 case MTR_LOG_NONE:
73 case MTR_LOG_NO_REDO:
74 return;
75 case MTR_LOG_SHORT_INSERTS:
76 ut_ad(0);
77 /* fall through */
78 case MTR_LOG_ALL:
79 break;
80 }
81
82 const uint32_t
83 len = uint32_t(new_free - old_free - 4),
84 reserved = std::min<uint32_t>(11 + 13 + len,
85 mtr->get_log()->MAX_DATA_SIZE);
86 byte* log_ptr = mtr->get_log()->open(reserved);
87 const byte* log_end = log_ptr + reserved;
88 log_ptr = mlog_write_initial_log_record_low(
89 MLOG_UNDO_INSERT,
90 undo_block->page.id.space(), undo_block->page.id.page_no(),
91 log_ptr, mtr);
92 mach_write_to_2(log_ptr, len);
93 if (log_ptr + 2 + len <= log_end) {
94 memcpy(log_ptr + 2, undo_block->frame + old_free + 2, len);
95 mlog_close(mtr, log_ptr + 2 + len);
96 } else {
97 mlog_close(mtr, log_ptr + 2);
98 mtr->get_log()->push(undo_block->frame + old_free + 2, len);
99 }
100}
101
102/** Parse MLOG_UNDO_INSERT.
103@param[in] ptr log record
104@param[in] end_ptr end of log record buffer
105@param[in,out] page page or NULL
106@return end of log record
107@retval NULL if the log record is incomplete */
108byte*
109trx_undo_parse_add_undo_rec(
110 const byte* ptr,
111 const byte* end_ptr,
112 page_t* page)
113{
114 ulint len;
115
116 if (end_ptr < ptr + 2) {
117
118 return(NULL);
119 }
120
121 len = mach_read_from_2(ptr);
122 ptr += 2;
123
124 if (end_ptr < ptr + len) {
125
126 return(NULL);
127 }
128
129 if (page) {
130 ulint first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
131 + TRX_UNDO_PAGE_FREE);
132 byte* rec = page + first_free;
133
134 mach_write_to_2(rec, first_free + 4 + len);
135 mach_write_to_2(rec + 2 + len, first_free);
136
137 mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
138 first_free + 4 + len);
139 memcpy(rec + 2, ptr, len);
140 }
141
142 return(const_cast<byte*>(ptr + len));
143}
144
145/** Calculate the free space left for extending an undo log record.
146@param[in] undo_block undo log page
147@param[in] ptr current end of the undo page
148@return bytes left */
149static ulint trx_undo_left(const buf_block_t* undo_block, const byte* ptr)
150{
151 /* The 10 is a safety margin, in case we have some small
152 calculation error below */
153 return srv_page_size - ulint(ptr - undo_block->frame)
154 - (10 + FIL_PAGE_DATA_END);
155}
156
157/**********************************************************************//**
158Set the next and previous pointers in the undo page for the undo record
159that was written to ptr. Update the first free value by the number of bytes
160written for this undo record.
161@return offset of the inserted entry on the page if succeeded, 0 if fail */
162static
163ulint
164trx_undo_page_set_next_prev_and_add(
165/*================================*/
166 buf_block_t* undo_block, /*!< in/out: undo log page */
167 byte* ptr, /*!< in: ptr up to where data has been
168 written on this undo page. */
169 mtr_t* mtr) /*!< in: mtr */
170{
171 ulint first_free; /*!< offset within undo_page */
172 ulint end_of_rec; /*!< offset within undo_page */
173 byte* ptr_to_first_free;
174 /* pointer within undo_page
175 that points to the next free
176 offset value within undo_page.*/
177
178 ut_ad(ptr > undo_block->frame);
179 ut_ad(ptr < undo_block->frame + srv_page_size);
180
181 if (UNIV_UNLIKELY(trx_undo_left(undo_block, ptr) < 2)) {
182 return(0);
183 }
184
185 ptr_to_first_free = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
186 + undo_block->frame;
187
188 first_free = mach_read_from_2(ptr_to_first_free);
189
190 /* Write offset of the previous undo log record */
191 mach_write_to_2(ptr, first_free);
192 ptr += 2;
193
194 end_of_rec = ulint(ptr - undo_block->frame);
195
196 /* Write offset of the next undo log record */
197 mach_write_to_2(undo_block->frame + first_free, end_of_rec);
198
199 /* Update the offset to first free undo record */
200 mach_write_to_2(ptr_to_first_free, end_of_rec);
201
202 /* Write this log entry to the UNDO log */
203 trx_undof_page_add_undo_rec_log(undo_block, first_free,
204 end_of_rec, mtr);
205
206 return(first_free);
207}
208
209/** Virtual column undo log version. To distinguish it from a length value
210in 5.7.8 undo log, it starts with 0xF1 */
211static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
212
213/** Write virtual column index info (index id and column position in index)
214to the undo log
215@param[in,out] undo_block undo log page
216@param[in] table the table
217@param[in] pos the virtual column position
218@param[in] ptr undo log record being written
219@param[in] first_v_col whether this is the first virtual column
220 which could start with a version marker
221@return new undo log pointer */
222static
223byte*
224trx_undo_log_v_idx(
225 buf_block_t* undo_block,
226 const dict_table_t* table,
227 ulint pos,
228 byte* ptr,
229 bool first_v_col)
230{
231 ut_ad(pos < table->n_v_def);
232 dict_v_col_t* vcol = dict_table_get_nth_v_col(table, pos);
233
234 ulint n_idx = vcol->v_indexes->size();
235 byte* old_ptr;
236
237 ut_ad(n_idx > 0);
238
239 /* Size to reserve, max 5 bytes for each index id and position, plus
240 5 bytes for num of indexes, 2 bytes for write total length.
241 1 byte for undo log record format version marker */
242 ulint size = n_idx * (5 + 5) + 5 + 2 + (first_v_col ? 1 : 0);
243
244 if (trx_undo_left(undo_block, ptr) < size) {
245 return(NULL);
246 }
247
248 if (first_v_col) {
249 /* write the version marker */
250 mach_write_to_1(ptr, VIRTUAL_COL_UNDO_FORMAT_1);
251
252 ptr += 1;
253 }
254
255 old_ptr = ptr;
256
257 ptr += 2;
258
259 ptr += mach_write_compressed(ptr, n_idx);
260
261 dict_v_idx_list::iterator it;
262
263 for (it = vcol->v_indexes->begin();
264 it != vcol->v_indexes->end(); ++it) {
265 dict_v_idx_t v_index = *it;
266
267 ptr += mach_write_compressed(
268 ptr, static_cast<ulint>(v_index.index->id));
269
270 ptr += mach_write_compressed(ptr, v_index.nth_field);
271 }
272
273 mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
274
275 return(ptr);
276}
277
278/** Read virtual column index from undo log, and verify the column is still
279indexed, and return its position
280@param[in] table the table
281@param[in] ptr undo log pointer
282@param[out] col_pos the column number or ULINT_UNDEFINED
283 if the column is not indexed any more
284@return remaining part of undo log record after reading these values */
285static
286const byte*
287trx_undo_read_v_idx_low(
288 const dict_table_t* table,
289 const byte* ptr,
290 ulint* col_pos)
291{
292 ulint len = mach_read_from_2(ptr);
293 const byte* old_ptr = ptr;
294
295 *col_pos = ULINT_UNDEFINED;
296
297 ptr += 2;
298
299 ulint num_idx = mach_read_next_compressed(&ptr);
300
301 ut_ad(num_idx > 0);
302
303 dict_index_t* clust_index = dict_table_get_first_index(table);
304
305 for (ulint i = 0; i < num_idx; i++) {
306 index_id_t id = mach_read_next_compressed(&ptr);
307 ulint pos = mach_read_next_compressed(&ptr);
308 dict_index_t* index = dict_table_get_next_index(clust_index);
309
310 while (index != NULL) {
311 /* Return if we find a matching index.
312 TODO: in the future, it might be worth to add
313 checks on other indexes */
314 if (index->id == id) {
315 const dict_col_t* col = dict_index_get_nth_col(
316 index, pos);
317 ut_ad(col->is_virtual());
318 const dict_v_col_t* vcol = reinterpret_cast<
319 const dict_v_col_t*>(col);
320 *col_pos = vcol->v_pos;
321 return(old_ptr + len);
322 }
323
324 index = dict_table_get_next_index(index);
325 }
326 }
327
328 return(old_ptr + len);
329}
330
331/** Read virtual column index from undo log or online log if the log
332contains such info, and in the undo log case, verify the column is
333still indexed, and output its position
334@param[in] table the table
335@param[in] ptr undo log pointer
336@param[in] first_v_col if this is the first virtual column, which
337 has the version marker
338@param[in,out] is_undo_log this function is used to parse both undo log,
339 and online log for virtual columns. So
340 check to see if this is undo log. When
341 first_v_col is true, is_undo_log is output,
342 when first_v_col is false, is_undo_log is input
343@param[in,out] field_no the column number
344@return remaining part of undo log record after reading these values */
345const byte*
346trx_undo_read_v_idx(
347 const dict_table_t* table,
348 const byte* ptr,
349 bool first_v_col,
350 bool* is_undo_log,
351 ulint* field_no)
352{
353 /* Version marker only put on the first virtual column */
354 if (first_v_col) {
355 /* Undo log has the virtual undo log marker */
356 *is_undo_log = (mach_read_from_1(ptr)
357 == VIRTUAL_COL_UNDO_FORMAT_1);
358
359 if (*is_undo_log) {
360 ptr += 1;
361 }
362 }
363
364 if (*is_undo_log) {
365 ptr = trx_undo_read_v_idx_low(table, ptr, field_no);
366 } else {
367 *field_no -= REC_MAX_N_FIELDS;
368 }
369
370 return(ptr);
371}
372
373/** Reports in the undo log of an insert of virtual columns.
374@param[in] undo_block undo log page
375@param[in] table the table
376@param[in] row dtuple contains the virtual columns
377@param[in,out] ptr log ptr
378@return true if write goes well, false if out of space */
379static
380bool
381trx_undo_report_insert_virtual(
382 buf_block_t* undo_block,
383 dict_table_t* table,
384 const dtuple_t* row,
385 byte** ptr)
386{
387 byte* start = *ptr;
388 bool first_v_col = true;
389
390 if (trx_undo_left(undo_block, *ptr) < 2) {
391 return(false);
392 }
393
394 /* Reserve 2 bytes to write the number
395 of bytes the stored fields take in this
396 undo record */
397 *ptr += 2;
398
399 for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
400 col_no++) {
401 dfield_t* vfield = NULL;
402
403 const dict_v_col_t* col
404 = dict_table_get_nth_v_col(table, col_no);
405
406 if (col->m_col.ord_part) {
407
408 /* make sure enought space to write the length */
409 if (trx_undo_left(undo_block, *ptr) < 5) {
410 return(false);
411 }
412
413 ulint pos = col_no;
414 pos += REC_MAX_N_FIELDS;
415 *ptr += mach_write_compressed(*ptr, pos);
416
417 *ptr = trx_undo_log_v_idx(undo_block, table,
418 col_no, *ptr, first_v_col);
419 first_v_col = false;
420
421 if (*ptr == NULL) {
422 return(false);
423 }
424
425 vfield = dtuple_get_nth_v_field(row, col->v_pos);
426 ulint flen = vfield->len;
427
428 if (flen != UNIV_SQL_NULL) {
429 ulint max_len
430 = dict_max_v_field_len_store_undo(
431 table, col_no);
432
433 if (flen > max_len) {
434 flen = max_len;
435 }
436
437 if (trx_undo_left(undo_block, *ptr)
438 < flen + 5) {
439 return(false);
440 }
441 *ptr += mach_write_compressed(*ptr, flen);
442
443 ut_memcpy(*ptr, vfield->data, flen);
444 *ptr += flen;
445 } else {
446 if (trx_undo_left(undo_block, *ptr) < 5) {
447 return(false);
448 }
449
450 *ptr += mach_write_compressed(*ptr, flen);
451 }
452 }
453 }
454
455 /* Always mark the end of the log with 2 bytes length field */
456 mach_write_to_2(start, ulint(*ptr - start));
457
458 return(true);
459}
460
461/**********************************************************************//**
462Reports in the undo log of an insert of a clustered index record.
463@return offset of the inserted entry on the page if succeed, 0 if fail */
464static
465ulint
466trx_undo_page_report_insert(
467/*========================*/
468 buf_block_t* undo_block, /*!< in: undo log page */
469 trx_t* trx, /*!< in: transaction */
470 dict_index_t* index, /*!< in: clustered index */
471 const dtuple_t* clust_entry, /*!< in: index entry which will be
472 inserted to the clustered index */
473 mtr_t* mtr) /*!< in: mtr */
474{
475 ulint first_free;
476 byte* ptr;
477 ulint i;
478
479 ut_ad(dict_index_is_clust(index));
480 /* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
481 TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
482 TRX_UNDO_INSERT == 1 into insert_undo pages,
483 or TRX_UNDO_UPDATE == 2 into update_undo pages. */
484 ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
485 + undo_block->frame) <= 2);
486
487 first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
488 + undo_block->frame);
489 ptr = undo_block->frame + first_free;
490
491 ut_ad(first_free <= srv_page_size);
492
493 if (trx_undo_left(undo_block, ptr) < 2 + 1 + 11 + 11) {
494 /* Not enough space for writing the general parameters */
495 return(0);
496 }
497
498 /* Reserve 2 bytes for the pointer to the next undo log record */
499 ptr += 2;
500
501 /* Store first some general parameters to the undo log */
502 *ptr++ = TRX_UNDO_INSERT_REC;
503 ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
504 ptr += mach_u64_write_much_compressed(ptr, index->table->id);
505 /*----------------------------------------*/
506 /* Store then the fields required to uniquely determine the record
507 to be inserted in the clustered index */
508 if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) {
509 ut_ad(clust_entry->info_bits == REC_INFO_DEFAULT_ROW);
510 ut_ad(index->is_instant());
511 ut_ad(undo_block->frame[first_free + 2]
512 == TRX_UNDO_INSERT_REC);
513 undo_block->frame[first_free + 2] = TRX_UNDO_INSERT_DEFAULT;
514 goto done;
515 }
516
517 for (i = 0; i < dict_index_get_n_unique(index); i++) {
518
519 const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
520 ulint flen = dfield_get_len(field);
521
522 if (trx_undo_left(undo_block, ptr) < 5) {
523
524 return(0);
525 }
526
527 ptr += mach_write_compressed(ptr, flen);
528
529 if (flen != UNIV_SQL_NULL) {
530 if (trx_undo_left(undo_block, ptr) < flen) {
531
532 return(0);
533 }
534
535 ut_memcpy(ptr, dfield_get_data(field), flen);
536 ptr += flen;
537 }
538 }
539
540 if (index->table->n_v_cols) {
541 if (!trx_undo_report_insert_virtual(
542 undo_block, index->table, clust_entry, &ptr)) {
543 return(0);
544 }
545 }
546
547done:
548 return(trx_undo_page_set_next_prev_and_add(undo_block, ptr, mtr));
549}
550
551/**********************************************************************//**
552Reads from an undo log record the general parameters.
553@return remaining part of undo log record after reading these values */
554byte*
555trx_undo_rec_get_pars(
556/*==================*/
557 trx_undo_rec_t* undo_rec, /*!< in: undo log record */
558 ulint* type, /*!< out: undo record type:
559 TRX_UNDO_INSERT_REC, ... */
560 ulint* cmpl_info, /*!< out: compiler info, relevant only
561 for update type records */
562 bool* updated_extern, /*!< out: true if we updated an
563 externally stored fild */
564 undo_no_t* undo_no, /*!< out: undo log record number */
565 table_id_t* table_id) /*!< out: table id */
566{
567 const byte* ptr;
568 ulint type_cmpl;
569
570 ptr = undo_rec + 2;
571
572 type_cmpl = mach_read_from_1(ptr);
573 ptr++;
574
575 *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
576 type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
577
578 *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
579 *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
580
581 *undo_no = mach_read_next_much_compressed(&ptr);
582 *table_id = mach_read_next_much_compressed(&ptr);
583
584 return(const_cast<byte*>(ptr));
585}
586
587/** Read from an undo log record a non-virtual column value.
588@param[in,out] ptr pointer to remaining part of the undo record
589@param[in,out] field stored field
590@param[in,out] len length of the field, or UNIV_SQL_NULL
591@param[in,out] orig_len original length of the locally stored part
592of an externally stored column, or 0
593@return remaining part of undo log record after reading these values */
594byte*
595trx_undo_rec_get_col_val(
596 const byte* ptr,
597 const byte** field,
598 ulint* len,
599 ulint* orig_len)
600{
601 *len = mach_read_next_compressed(&ptr);
602 *orig_len = 0;
603
604 switch (*len) {
605 case UNIV_SQL_NULL:
606 *field = NULL;
607 break;
608 case UNIV_EXTERN_STORAGE_FIELD:
609 *orig_len = mach_read_next_compressed(&ptr);
610 *len = mach_read_next_compressed(&ptr);
611 *field = ptr;
612 ptr += *len & ~SPATIAL_STATUS_MASK;
613
614 ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
615 ut_ad(*len > *orig_len);
616 /* @see dtuple_convert_big_rec() */
617 ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
618
619 /* we do not have access to index->table here
620 ut_ad(dict_table_has_atomic_blobs(index->table)
621 || *len >= col->max_prefix
622 + BTR_EXTERN_FIELD_REF_SIZE);
623 */
624
625 *len += UNIV_EXTERN_STORAGE_FIELD;
626 break;
627 default:
628 *field = ptr;
629 if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
630 ptr += (*len - UNIV_EXTERN_STORAGE_FIELD)
631 & ~SPATIAL_STATUS_MASK;
632 } else {
633 ptr += *len;
634 }
635 }
636
637 return(const_cast<byte*>(ptr));
638}
639
640/*******************************************************************//**
641Builds a row reference from an undo log record.
642@return pointer to remaining part of undo record */
643byte*
644trx_undo_rec_get_row_ref(
645/*=====================*/
646 byte* ptr, /*!< in: remaining part of a copy of an undo log
647 record, at the start of the row reference;
648 NOTE that this copy of the undo log record must
649 be preserved as long as the row reference is
650 used, as we do NOT copy the data in the
651 record! */
652 dict_index_t* index, /*!< in: clustered index */
653 const dtuple_t**ref, /*!< out, own: row reference */
654 mem_heap_t* heap) /*!< in: memory heap from which the memory
655 needed is allocated */
656{
657 ulint ref_len;
658 ulint i;
659
660 ut_ad(index && ptr && ref && heap);
661 ut_a(dict_index_is_clust(index));
662
663 ref_len = dict_index_get_n_unique(index);
664
665 dtuple_t* tuple = dtuple_create(heap, ref_len);
666 *ref = tuple;
667
668 dict_index_copy_types(tuple, index, ref_len);
669
670 for (i = 0; i < ref_len; i++) {
671 const byte* field;
672 ulint len;
673 ulint orig_len;
674
675 dfield_t* dfield = dtuple_get_nth_field(tuple, i);
676
677 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
678
679 dfield_set_data(dfield, field, len);
680 }
681
682 return(ptr);
683}
684
685/*******************************************************************//**
686Skips a row reference from an undo log record.
687@return pointer to remaining part of undo record */
688static
689byte*
690trx_undo_rec_skip_row_ref(
691/*======================*/
692 byte* ptr, /*!< in: remaining part in update undo log
693 record, at the start of the row reference */
694 dict_index_t* index) /*!< in: clustered index */
695{
696 ulint ref_len;
697 ulint i;
698
699 ut_ad(index && ptr);
700 ut_a(dict_index_is_clust(index));
701
702 ref_len = dict_index_get_n_unique(index);
703
704 for (i = 0; i < ref_len; i++) {
705 const byte* field;
706 ulint len;
707 ulint orig_len;
708
709 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
710 }
711
712 return(ptr);
713}
714
715/** Fetch a prefix of an externally stored column, for writing to the undo
716log of an update or delete marking of a clustered index record.
717@param[out] ext_buf buffer to hold the prefix data and BLOB pointer
718@param[in] prefix_len prefix size to store in the undo log
719@param[in] page_size page size
720@param[in] field an externally stored column
721@param[in,out] len input: length of field; output: used length of
722ext_buf
723@return ext_buf */
724static
725byte*
726trx_undo_page_fetch_ext(
727 byte* ext_buf,
728 ulint prefix_len,
729 const page_size_t& page_size,
730 const byte* field,
731 ulint* len)
732{
733 /* Fetch the BLOB. */
734 ulint ext_len = btr_copy_externally_stored_field_prefix(
735 ext_buf, prefix_len, page_size, field, *len);
736 /* BLOBs should always be nonempty. */
737 ut_a(ext_len);
738 /* Append the BLOB pointer to the prefix. */
739 memcpy(ext_buf + ext_len,
740 field + *len - BTR_EXTERN_FIELD_REF_SIZE,
741 BTR_EXTERN_FIELD_REF_SIZE);
742 *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
743 return(ext_buf);
744}
745
746/** Writes to the undo log a prefix of an externally stored column.
747@param[out] ptr undo log position, at least 15 bytes must be
748available
749@param[out] ext_buf a buffer of DICT_MAX_FIELD_LEN_BY_FORMAT()
750 size, or NULL when should not fetch a longer
751 prefix
752@param[in] prefix_len prefix size to store in the undo log
753@param[in] page_size page size
754@param[in,out] field the locally stored part of the externally
755stored column
756@param[in,out] len length of field, in bytes
757@param[in] spatial_status whether the column is used by spatial index or
758 regular index
759@return undo log position */
760static
761byte*
762trx_undo_page_report_modify_ext(
763 byte* ptr,
764 byte* ext_buf,
765 ulint prefix_len,
766 const page_size_t& page_size,
767 const byte** field,
768 ulint* len,
769 spatial_status_t spatial_status)
770{
771 ulint spatial_len= 0;
772
773 switch (spatial_status) {
774 case SPATIAL_UNKNOWN:
775 case SPATIAL_NONE:
776 break;
777
778 case SPATIAL_MIXED:
779 case SPATIAL_ONLY:
780 spatial_len = DATA_MBR_LEN;
781 break;
782 }
783
784 /* Encode spatial status into length. */
785 spatial_len |= ulint(spatial_status) << SPATIAL_STATUS_SHIFT;
786
787 if (spatial_status == SPATIAL_ONLY) {
788 /* If the column is only used by gis index, log its
789 MBR is enough.*/
790 ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
791 + spatial_len);
792
793 return(ptr);
794 }
795
796 if (ext_buf) {
797 ut_a(prefix_len > 0);
798
799 /* If an ordering column is externally stored, we will
800 have to store a longer prefix of the field. In this
801 case, write to the log a marker followed by the
802 original length and the real length of the field. */
803 ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
804
805 ptr += mach_write_compressed(ptr, *len);
806
807 *field = trx_undo_page_fetch_ext(ext_buf, prefix_len,
808 page_size, *field, len);
809
810 ptr += mach_write_compressed(ptr, *len + spatial_len);
811 } else {
812 ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
813 + *len + spatial_len);
814 }
815
816 return(ptr);
817}
818
819/** Get MBR from a Geometry column stored externally
820@param[out] mbr MBR to fill
821@param[in] pagesize table pagesize
822@param[in] field field contain the geometry data
823@param[in,out] len length of field, in bytes
824*/
825static
826void
827trx_undo_get_mbr_from_ext(
828/*======================*/
829 double* mbr,
830 const page_size_t& page_size,
831 const byte* field,
832 ulint* len)
833{
834 uchar* dptr = NULL;
835 ulint dlen;
836 mem_heap_t* heap = mem_heap_create(100);
837
838 dptr = btr_copy_externally_stored_field(
839 &dlen, field, page_size, *len, heap);
840
841 if (dlen <= GEO_DATA_HEADER_SIZE) {
842 for (uint i = 0; i < SPDIMS; ++i) {
843 mbr[i * 2] = DBL_MAX;
844 mbr[i * 2 + 1] = -DBL_MAX;
845 }
846 } else {
847 rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
848 static_cast<uint>(dlen
849 - GEO_DATA_HEADER_SIZE), SPDIMS, mbr);
850 }
851
852 mem_heap_free(heap);
853}
854
855/**********************************************************************//**
856Reports in the undo log of an update or delete marking of a clustered index
857record.
858@return byte offset of the inserted undo log entry on the page if
859succeed, 0 if fail */
860static
861ulint
862trx_undo_page_report_modify(
863/*========================*/
864 buf_block_t* undo_block, /*!< in: undo log page */
865 trx_t* trx, /*!< in: transaction */
866 dict_index_t* index, /*!< in: clustered index where update or
867 delete marking is done */
868 const rec_t* rec, /*!< in: clustered index record which
869 has NOT yet been modified */
870 const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
871 const upd_t* update, /*!< in: update vector which tells the
872 columns to be updated; in the case of
873 a delete, this should be set to NULL */
874 ulint cmpl_info, /*!< in: compiler info on secondary
875 index updates */
876 const dtuple_t* row, /*!< in: clustered index row contains
877 virtual column info */
878 mtr_t* mtr) /*!< in: mtr */
879{
880 ulint first_free;
881 byte* ptr;
882
883 ut_ad(index->is_primary());
884 ut_ad(rec_offs_validate(rec, index, offsets));
885 /* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
886 TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
887 TRX_UNDO_INSERT == 1 into insert_undo pages,
888 or TRX_UNDO_UPDATE == 2 into update_undo pages. */
889 ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
890 + undo_block->frame) <= 2);
891
892 first_free = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
893 + undo_block->frame);
894 ptr = undo_block->frame + first_free;
895
896 ut_ad(first_free <= srv_page_size);
897
898 if (trx_undo_left(undo_block, ptr) < 50) {
899 /* NOTE: the value 50 must be big enough so that the general
900 fields written below fit on the undo log page */
901 return 0;
902 }
903
904 /* Reserve 2 bytes for the pointer to the next undo log record */
905 ptr += 2;
906
907 dict_table_t* table = index->table;
908 const byte* field;
909 ulint flen;
910 ulint col_no;
911 ulint type_cmpl;
912 byte* type_cmpl_ptr;
913 ulint i;
914 trx_id_t trx_id;
915 ibool ignore_prefix = FALSE;
916 byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
917 + BTR_EXTERN_FIELD_REF_SIZE];
918 bool first_v_col = true;
919
920 /* Store first some general parameters to the undo log */
921
922 if (!update) {
923 ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
924 type_cmpl = TRX_UNDO_DEL_MARK_REC;
925 } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
926 /* In delete-marked records, DB_TRX_ID must
927 always refer to an existing update_undo log record. */
928 ut_ad(row_get_rec_trx_id(rec, index, offsets));
929
930 type_cmpl = TRX_UNDO_UPD_DEL_REC;
931 /* We are about to update a delete marked record.
932 We don't typically need the prefix in this case unless
933 the delete marking is done by the same transaction
934 (which we check below). */
935 ignore_prefix = TRUE;
936 } else {
937 type_cmpl = TRX_UNDO_UPD_EXIST_REC;
938 }
939
940 type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
941 type_cmpl_ptr = ptr;
942
943 *ptr++ = (byte) type_cmpl;
944 ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
945
946 ptr += mach_u64_write_much_compressed(ptr, table->id);
947
948 /*----------------------------------------*/
949 /* Store the state of the info bits */
950
951 *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
952
953 /* Store the values of the system columns */
954 field = rec_get_nth_field(rec, offsets,
955 dict_index_get_sys_col_pos(
956 index, DATA_TRX_ID), &flen);
957 ut_ad(flen == DATA_TRX_ID_LEN);
958
959 trx_id = trx_read_trx_id(field);
960
961 /* If it is an update of a delete marked record, then we are
962 allowed to ignore blob prefixes if the delete marking was done
963 by some other trx as it must have committed by now for us to
964 allow an over-write. */
965 if (trx_id == trx->id) {
966 ignore_prefix = false;
967 }
968 ptr += mach_u64_write_compressed(ptr, trx_id);
969
970 field = rec_get_nth_field(rec, offsets,
971 dict_index_get_sys_col_pos(
972 index, DATA_ROLL_PTR), &flen);
973 ut_ad(flen == DATA_ROLL_PTR_LEN);
974 ut_ad(memcmp(field, field_ref_zero, DATA_ROLL_PTR_LEN));
975
976 ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field));
977
978 /*----------------------------------------*/
979 /* Store then the fields required to uniquely determine the
980 record which will be modified in the clustered index */
981
982 for (i = 0; i < dict_index_get_n_unique(index); i++) {
983
984 /* The ordering columns must not be instant added columns. */
985 ut_ad(!rec_offs_nth_default(offsets, i));
986 field = rec_get_nth_field(rec, offsets, i, &flen);
987
988 /* The ordering columns must not be stored externally. */
989 ut_ad(!rec_offs_nth_extern(offsets, i));
990 ut_ad(dict_index_get_nth_col(index, i)->ord_part);
991
992 if (trx_undo_left(undo_block, ptr) < 5) {
993 return(0);
994 }
995
996 ptr += mach_write_compressed(ptr, flen);
997
998 if (flen != UNIV_SQL_NULL) {
999 if (trx_undo_left(undo_block, ptr) < flen) {
1000 return(0);
1001 }
1002
1003 ut_memcpy(ptr, field, flen);
1004 ptr += flen;
1005 }
1006 }
1007
1008 /*----------------------------------------*/
1009 /* Save to the undo log the old values of the columns to be updated. */
1010
1011 if (update) {
1012 if (trx_undo_left(undo_block, ptr) < 5) {
1013 return(0);
1014 }
1015
1016 ulint n_updated = upd_get_n_fields(update);
1017
1018 /* If this is an online update while an inplace alter table
1019 is in progress and the table has virtual column, we will
1020 need to double check if there are any non-indexed columns
1021 being registered in update vector in case they will be indexed
1022 in new table */
1023 if (dict_index_is_online_ddl(index) && table->n_v_cols > 0) {
1024 for (i = 0; i < upd_get_n_fields(update); i++) {
1025 upd_field_t* fld = upd_get_nth_field(
1026 update, i);
1027 ulint pos = fld->field_no;
1028
1029 /* These columns must not have an index
1030 on them */
1031 if (upd_fld_is_virtual_col(fld)
1032 && dict_table_get_nth_v_col(
1033 table, pos)->v_indexes->empty()) {
1034 n_updated--;
1035 }
1036 }
1037 }
1038
1039 ptr += mach_write_compressed(ptr, n_updated);
1040
1041 for (i = 0; i < upd_get_n_fields(update); i++) {
1042 upd_field_t* fld = upd_get_nth_field(update, i);
1043
1044 bool is_virtual = upd_fld_is_virtual_col(fld);
1045 ulint max_v_log_len = 0;
1046
1047 ulint pos = fld->field_no;
1048
1049 /* Write field number to undo log */
1050 if (trx_undo_left(undo_block, ptr) < 5) {
1051 return(0);
1052 }
1053
1054 if (is_virtual) {
1055 /* Skip the non-indexed column, during
1056 an online alter table */
1057 if (dict_index_is_online_ddl(index)
1058 && dict_table_get_nth_v_col(
1059 table, pos)->v_indexes->empty()) {
1060 continue;
1061 }
1062
1063 /* add REC_MAX_N_FIELDS to mark this
1064 is a virtual col */
1065 pos += REC_MAX_N_FIELDS;
1066 }
1067
1068 ptr += mach_write_compressed(ptr, pos);
1069
1070 /* Save the old value of field */
1071 if (is_virtual) {
1072 ut_ad(fld->field_no < table->n_v_def);
1073
1074 ptr = trx_undo_log_v_idx(undo_block, table,
1075 fld->field_no, ptr,
1076 first_v_col);
1077 if (ptr == NULL) {
1078 return(0);
1079 }
1080 first_v_col = false;
1081
1082 max_v_log_len
1083 = dict_max_v_field_len_store_undo(
1084 table, fld->field_no);
1085
1086 field = static_cast<byte*>(
1087 fld->old_v_val->data);
1088 flen = fld->old_v_val->len;
1089
1090 /* Only log sufficient bytes for index
1091 record update */
1092 if (flen != UNIV_SQL_NULL) {
1093 flen = ut_min(
1094 flen, max_v_log_len);
1095 }
1096 } else {
1097 field = rec_get_nth_cfield(
1098 rec, index, offsets, pos, &flen);
1099 }
1100
1101 if (trx_undo_left(undo_block, ptr) < 15) {
1102 return(0);
1103 }
1104
1105 if (!is_virtual && rec_offs_nth_extern(offsets, pos)) {
1106 const dict_col_t* col
1107 = dict_index_get_nth_col(index, pos);
1108 ulint prefix_len
1109 = dict_max_field_len_store_undo(
1110 table, col);
1111
1112 ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE
1113 <= sizeof ext_buf);
1114
1115 ptr = trx_undo_page_report_modify_ext(
1116 ptr,
1117 col->ord_part
1118 && !ignore_prefix
1119 && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
1120 ? ext_buf : NULL, prefix_len,
1121 dict_table_page_size(table),
1122 &field, &flen, SPATIAL_UNKNOWN);
1123
1124 *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
1125 } else {
1126 ptr += mach_write_compressed(ptr, flen);
1127 }
1128
1129 if (flen != UNIV_SQL_NULL) {
1130 if (trx_undo_left(undo_block, ptr) < flen) {
1131 return(0);
1132 }
1133
1134 ut_memcpy(ptr, field, flen);
1135 ptr += flen;
1136 }
1137
1138 /* Also record the new value for virtual column */
1139 if (is_virtual) {
1140 field = static_cast<byte*>(fld->new_val.data);
1141 flen = fld->new_val.len;
1142 if (flen != UNIV_SQL_NULL) {
1143 flen = ut_min(
1144 flen, max_v_log_len);
1145 }
1146
1147 if (trx_undo_left(undo_block, ptr) < 15) {
1148 return(0);
1149 }
1150
1151 ptr += mach_write_compressed(ptr, flen);
1152
1153 if (flen != UNIV_SQL_NULL) {
1154 if (trx_undo_left(undo_block, ptr)
1155 < flen) {
1156 return(0);
1157 }
1158
1159 ut_memcpy(ptr, field, flen);
1160 ptr += flen;
1161 }
1162 }
1163 }
1164 }
1165
1166 /* Reset the first_v_col, so to put the virtual column undo
1167 version marker again, when we log all the indexed columns */
1168 first_v_col = true;
1169
1170 /*----------------------------------------*/
1171 /* In the case of a delete marking, and also in the case of an update
1172 where any ordering field of any index changes, store the values of all
1173 columns which occur as ordering fields in any index. This info is used
1174 in the purge of old versions where we use it to build and search the
1175 delete marked index records, to look if we can remove them from the
1176 index tree. Note that starting from 4.0.14 also externally stored
1177 fields can be ordering in some index. Starting from 5.2, we no longer
1178 store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
1179 but we can construct the column prefix fields in the index by
1180 fetching the first page of the BLOB that is pointed to by the
1181 clustered index. This works also in crash recovery, because all pages
1182 (including BLOBs) are recovered before anything is rolled back. */
1183
1184 if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
1185 byte* old_ptr = ptr;
1186 double mbr[SPDIMS * 2];
1187 mem_heap_t* row_heap = NULL;
1188
1189 if (trx_undo_left(undo_block, ptr) < 5) {
1190 return(0);
1191 }
1192
1193 /* Reserve 2 bytes to write the number of bytes the stored
1194 fields take in this undo record */
1195
1196 ptr += 2;
1197
1198 for (col_no = 0; col_no < dict_table_get_n_cols(table);
1199 col_no++) {
1200
1201 const dict_col_t* col
1202 = dict_table_get_nth_col(table, col_no);
1203
1204 if (!col->ord_part) {
1205 continue;
1206 }
1207
1208 const ulint pos = dict_index_get_nth_col_pos(
1209 index, col_no, NULL);
1210 /* All non-virtual columns must be present in
1211 the clustered index. */
1212 ut_ad(pos != ULINT_UNDEFINED);
1213
1214 const bool is_ext = rec_offs_nth_extern(offsets, pos);
1215 const spatial_status_t spatial_status = is_ext
1216 ? dict_col_get_spatial_status(col)
1217 : SPATIAL_NONE;
1218
1219 switch (spatial_status) {
1220 case SPATIAL_UNKNOWN:
1221 ut_ad(0);
1222 /* fall through */
1223 case SPATIAL_MIXED:
1224 case SPATIAL_ONLY:
1225 /* Externally stored spatially indexed
1226 columns will be (redundantly) logged
1227 again, because we did not write the
1228 MBR yet, that is, the previous call to
1229 trx_undo_page_report_modify_ext()
1230 was with SPATIAL_UNKNOWN. */
1231 break;
1232 case SPATIAL_NONE:
1233 if (!update) {
1234 /* This is a DELETE operation. */
1235 break;
1236 }
1237 /* Avoid redundantly logging indexed
1238 columns that were updated. */
1239
1240 for (i = 0; i < update->n_fields; i++) {
1241 const ulint field_no
1242 = upd_get_nth_field(update, i)
1243 ->field_no;
1244 if (field_no >= index->n_fields
1245 || dict_index_get_nth_field(
1246 index, field_no)->col
1247 == col) {
1248 goto already_logged;
1249 }
1250 }
1251 }
1252
1253 if (true) {
1254 /* Write field number to undo log */
1255 if (trx_undo_left(undo_block, ptr) < 5 + 15) {
1256 return(0);
1257 }
1258
1259 ptr += mach_write_compressed(ptr, pos);
1260
1261 /* Save the old value of field */
1262 field = rec_get_nth_cfield(
1263 rec, index, offsets, pos, &flen);
1264
1265 if (is_ext) {
1266 const dict_col_t* col =
1267 dict_index_get_nth_col(
1268 index, pos);
1269 ulint prefix_len =
1270 dict_max_field_len_store_undo(
1271 table, col);
1272
1273 ut_a(prefix_len < sizeof ext_buf);
1274
1275 /* If there is a spatial index on it,
1276 log its MBR */
1277 if (spatial_status != SPATIAL_NONE) {
1278 ut_ad(DATA_GEOMETRY_MTYPE(
1279 col->mtype));
1280
1281 trx_undo_get_mbr_from_ext(
1282 mbr,
1283 dict_table_page_size(
1284 table),
1285 field, &flen);
1286 }
1287
1288 ptr = trx_undo_page_report_modify_ext(
1289 ptr,
1290 flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
1291 && !ignore_prefix
1292 ? ext_buf : NULL, prefix_len,
1293 dict_table_page_size(table),
1294 &field, &flen,
1295 spatial_status);
1296 } else {
1297 ptr += mach_write_compressed(
1298 ptr, flen);
1299 }
1300
1301 if (flen != UNIV_SQL_NULL
1302 && spatial_status != SPATIAL_ONLY) {
1303 if (trx_undo_left(undo_block, ptr)
1304 < flen) {
1305 return(0);
1306 }
1307
1308 ut_memcpy(ptr, field, flen);
1309 ptr += flen;
1310 }
1311
1312 if (spatial_status != SPATIAL_NONE) {
1313 if (trx_undo_left(undo_block, ptr)
1314 < DATA_MBR_LEN) {
1315 return(0);
1316 }
1317
1318 for (int i = 0; i < SPDIMS * 2;
1319 i++) {
1320 mach_double_write(
1321 ptr, mbr[i]);
1322 ptr += sizeof(double);
1323 }
1324 }
1325 }
1326
1327already_logged:
1328 continue;
1329 }
1330
1331 for (col_no = 0; col_no < dict_table_get_n_v_cols(table);
1332 col_no++) {
1333 dfield_t* vfield = NULL;
1334
1335 const dict_v_col_t* col
1336 = dict_table_get_nth_v_col(table, col_no);
1337
1338 if (col->m_col.ord_part) {
1339 ulint pos = col_no;
1340 ulint max_v_log_len
1341 = dict_max_v_field_len_store_undo(
1342 table, pos);
1343
1344 /* Write field number to undo log.
1345 Make sure there is enought space in log */
1346 if (trx_undo_left(undo_block, ptr) < 5) {
1347 return(0);
1348 }
1349
1350 pos += REC_MAX_N_FIELDS;
1351 ptr += mach_write_compressed(ptr, pos);
1352
1353 ut_ad(col_no < table->n_v_def);
1354 ptr = trx_undo_log_v_idx(undo_block, table,
1355 col_no, ptr,
1356 first_v_col);
1357 first_v_col = false;
1358
1359 if (!ptr) {
1360 return(0);
1361 }
1362
1363 if (update) {
1364 ut_ad(!row);
1365 if (update->old_vrow == NULL) {
1366 flen = UNIV_SQL_NULL;
1367 } else {
1368 vfield = dtuple_get_nth_v_field(
1369 update->old_vrow,
1370 col->v_pos);
1371 }
1372 } else if (row) {
1373 vfield = dtuple_get_nth_v_field(
1374 row, col->v_pos);
1375 } else {
1376 ut_ad(0);
1377 }
1378
1379 if (vfield) {
1380 field = static_cast<byte*>(vfield->data);
1381 flen = vfield->len;
1382 } else {
1383 ut_ad(flen == UNIV_SQL_NULL);
1384 }
1385
1386 if (flen != UNIV_SQL_NULL) {
1387 flen = ut_min(
1388 flen, max_v_log_len);
1389 }
1390
1391 ptr += mach_write_compressed(ptr, flen);
1392
1393 if (flen != UNIV_SQL_NULL) {
1394 if (trx_undo_left(undo_block, ptr)
1395 < flen) {
1396 return(0);
1397 }
1398
1399 ut_memcpy(ptr, field, flen);
1400 ptr += flen;
1401 }
1402 }
1403 }
1404
1405 mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
1406
1407 if (row_heap) {
1408 mem_heap_free(row_heap);
1409 }
1410 }
1411
1412 /*----------------------------------------*/
1413 /* Write pointers to the previous and the next undo log records */
1414 if (trx_undo_left(undo_block, ptr) < 2) {
1415 return(0);
1416 }
1417
1418 mach_write_to_2(ptr, first_free);
1419 ptr += 2;
1420 const ulint new_free = ulint(ptr - undo_block->frame);
1421 mach_write_to_2(undo_block->frame + first_free, new_free);
1422
1423 mach_write_to_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
1424 + undo_block->frame, new_free);
1425
1426 /* Write to the REDO log about this change in the UNDO log */
1427 trx_undof_page_add_undo_rec_log(undo_block, first_free, new_free, mtr);
1428 return(first_free);
1429}
1430
1431/**********************************************************************//**
1432Reads from an undo log update record the system field values of the old
1433version.
1434@return remaining part of undo log record after reading these values */
1435byte*
1436trx_undo_update_rec_get_sys_cols(
1437/*=============================*/
1438 const byte* ptr, /*!< in: remaining part of undo
1439 log record after reading
1440 general parameters */
1441 trx_id_t* trx_id, /*!< out: trx id */
1442 roll_ptr_t* roll_ptr, /*!< out: roll ptr */
1443 ulint* info_bits) /*!< out: info bits state */
1444{
1445 /* Read the state of the info bits */
1446 *info_bits = mach_read_from_1(ptr);
1447 ptr += 1;
1448
1449 /* Read the values of the system columns */
1450
1451 *trx_id = mach_u64_read_next_compressed(&ptr);
1452 *roll_ptr = mach_u64_read_next_compressed(&ptr);
1453
1454 return(const_cast<byte*>(ptr));
1455}
1456
1457/*******************************************************************//**
1458Builds an update vector based on a remaining part of an undo log record.
1459@return remaining part of the record, NULL if an error detected, which
1460means that the record is corrupted */
1461byte*
1462trx_undo_update_rec_get_update(
1463/*===========================*/
1464 const byte* ptr, /*!< in: remaining part in update undo log
1465 record, after reading the row reference
1466 NOTE that this copy of the undo log record must
1467 be preserved as long as the update vector is
1468 used, as we do NOT copy the data in the
1469 record! */
1470 dict_index_t* index, /*!< in: clustered index */
1471 ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
1472 TRX_UNDO_UPD_DEL_REC, or
1473 TRX_UNDO_DEL_MARK_REC; in the last case,
1474 only trx id and roll ptr fields are added to
1475 the update vector */
1476 trx_id_t trx_id, /*!< in: transaction id from this undo record */
1477 roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
1478 ulint info_bits,/*!< in: info bits from this undo record */
1479 mem_heap_t* heap, /*!< in: memory heap from which the memory
1480 needed is allocated */
1481 upd_t** upd) /*!< out, own: update vector */
1482{
1483 upd_field_t* upd_field;
1484 upd_t* update;
1485 ulint n_fields;
1486 byte* buf;
1487 ulint i;
1488 bool first_v_col = true;
1489 bool is_undo_log = true;
1490 ulint n_skip_field = 0;
1491
1492 ut_a(dict_index_is_clust(index));
1493
1494 if (type != TRX_UNDO_DEL_MARK_REC) {
1495 n_fields = mach_read_next_compressed(&ptr);
1496 } else {
1497 n_fields = 0;
1498 }
1499
1500 update = upd_create(n_fields + 2, heap);
1501
1502 update->info_bits = info_bits;
1503
1504 /* Store first trx id and roll ptr to update vector */
1505
1506 upd_field = upd_get_nth_field(update, n_fields);
1507
1508 buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
1509
1510 mach_write_to_6(buf, trx_id);
1511
1512 upd_field_set_field_no(upd_field,
1513 dict_index_get_sys_col_pos(index, DATA_TRX_ID),
1514 index);
1515 dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
1516
1517 upd_field = upd_get_nth_field(update, n_fields + 1);
1518
1519 buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN));
1520
1521 trx_write_roll_ptr(buf, roll_ptr);
1522
1523 upd_field_set_field_no(
1524 upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
1525 index);
1526 dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
1527
1528 /* Store then the updated ordinary columns to the update vector */
1529
1530 for (i = 0; i < n_fields; i++) {
1531
1532 const byte* field;
1533 ulint len;
1534 ulint field_no;
1535 ulint orig_len;
1536 bool is_virtual;
1537
1538 upd_field = upd_get_nth_field(update, i);
1539 field_no = mach_read_next_compressed(&ptr);
1540
1541 is_virtual = (field_no >= REC_MAX_N_FIELDS);
1542
1543 if (is_virtual) {
1544 /* If new version, we need to check index list to figure
1545 out the correct virtual column position */
1546 ptr = trx_undo_read_v_idx(
1547 index->table, ptr, first_v_col, &is_undo_log,
1548 &field_no);
1549 first_v_col = false;
1550 /* This column could be dropped or no longer indexed */
1551 if (field_no == ULINT_UNDEFINED) {
1552 /* Mark this is no longer needed */
1553 upd_field->field_no = REC_MAX_N_FIELDS;
1554
1555 ptr = trx_undo_rec_get_col_val(
1556 ptr, &field, &len, &orig_len);
1557 ptr = trx_undo_rec_get_col_val(
1558 ptr, &field, &len, &orig_len);
1559 n_skip_field++;
1560 continue;
1561 }
1562
1563 upd_field_set_v_field_no(upd_field, field_no, index);
1564 } else if (field_no < index->n_fields) {
1565 upd_field_set_field_no(upd_field, field_no, index);
1566 } else if (update->info_bits == REC_INFO_MIN_REC_FLAG
1567 && index->is_instant()) {
1568 /* This must be a rollback of a subsequent
1569 instant ADD COLUMN operation. This will be
1570 detected and handled by btr_cur_trim(). */
1571 upd_field->field_no = field_no;
1572 upd_field->orig_len = 0;
1573 } else {
1574 ib::error() << "Trying to access update undo rec"
1575 " field " << field_no
1576 << " in index " << index->name
1577 << " of table " << index->table->name
1578 << " but index has only "
1579 << dict_index_get_n_fields(index)
1580 << " fields " << BUG_REPORT_MSG
1581 << ". Run also CHECK TABLE "
1582 << index->table->name << "."
1583 " n_fields = " << n_fields << ", i = " << i;
1584
1585 ut_ad(0);
1586 *upd = NULL;
1587 return(NULL);
1588 }
1589
1590 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
1591
1592 upd_field->orig_len = orig_len;
1593
1594 if (len == UNIV_SQL_NULL) {
1595 dfield_set_null(&upd_field->new_val);
1596 } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
1597 dfield_set_data(&upd_field->new_val, field, len);
1598 } else {
1599 len -= UNIV_EXTERN_STORAGE_FIELD;
1600
1601 dfield_set_data(&upd_field->new_val, field, len);
1602 dfield_set_ext(&upd_field->new_val);
1603 }
1604
1605 if (is_virtual) {
1606 upd_field->old_v_val = static_cast<dfield_t*>(
1607 mem_heap_alloc(
1608 heap, sizeof *upd_field->old_v_val));
1609 ptr = trx_undo_rec_get_col_val(
1610 ptr, &field, &len, &orig_len);
1611 if (len == UNIV_SQL_NULL) {
1612 dfield_set_null(upd_field->old_v_val);
1613 } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
1614 dfield_set_data(
1615 upd_field->old_v_val, field, len);
1616 } else {
1617 ut_ad(0);
1618 }
1619 }
1620 }
1621
1622 /* In rare scenario, we could have skipped virtual column (as they
1623 are dropped. We will regenerate a update vector and skip them */
1624 if (n_skip_field > 0) {
1625 ulint n = 0;
1626 ut_ad(n_skip_field <= n_fields);
1627
1628 upd_t* new_update = upd_create(
1629 n_fields + 2 - n_skip_field, heap);
1630
1631 for (i = 0; i < n_fields + 2; i++) {
1632 upd_field = upd_get_nth_field(update, i);
1633
1634 if (upd_field->field_no == REC_MAX_N_FIELDS) {
1635 continue;
1636 }
1637
1638 upd_field_t* new_upd_field
1639 = upd_get_nth_field(new_update, n);
1640 *new_upd_field = *upd_field;
1641 n++;
1642 }
1643 ut_ad(n == n_fields + 2 - n_skip_field);
1644 *upd = new_update;
1645 } else {
1646 *upd = update;
1647 }
1648
1649 return(const_cast<byte*>(ptr));
1650}
1651
1652/*******************************************************************//**
1653Builds a partial row from an update undo log record, for purge.
1654It contains the columns which occur as ordering in any index of the table.
1655Any missing columns are indicated by col->mtype == DATA_MISSING.
1656@return pointer to remaining part of undo record */
1657byte*
1658trx_undo_rec_get_partial_row(
1659/*=========================*/
1660 const byte* ptr, /*!< in: remaining part in update undo log
1661 record of a suitable type, at the start of
1662 the stored index columns;
1663 NOTE that this copy of the undo log record must
1664 be preserved as long as the partial row is
1665 used, as we do NOT copy the data in the
1666 record! */
1667 dict_index_t* index, /*!< in: clustered index */
1668 const upd_t* update, /*!< in: updated columns */
1669 dtuple_t** row, /*!< out, own: partial row */
1670 ibool ignore_prefix, /*!< in: flag to indicate if we
1671 expect blob prefixes in undo. Used
1672 only in the assertion. */
1673 mem_heap_t* heap) /*!< in: memory heap from which the memory
1674 needed is allocated */
1675{
1676 const byte* end_ptr;
1677 bool first_v_col = true;
1678 bool is_undo_log = true;
1679
1680 ut_ad(index);
1681 ut_ad(ptr);
1682 ut_ad(row);
1683 ut_ad(heap);
1684 ut_ad(dict_index_is_clust(index));
1685
1686 *row = dtuple_create_with_vcol(
1687 heap, dict_table_get_n_cols(index->table),
1688 dict_table_get_n_v_cols(index->table));
1689
1690 /* Mark all columns in the row uninitialized, so that
1691 we can distinguish missing fields from fields that are SQL NULL. */
1692 for (ulint i = 0; i < dict_table_get_n_cols(index->table); i++) {
1693 dfield_get_type(dtuple_get_nth_field(*row, i))
1694 ->mtype = DATA_MISSING;
1695 }
1696
1697 dtuple_init_v_fld(*row);
1698
1699 for (const upd_field_t* uf = update->fields, * const ue
1700 = update->fields + update->n_fields;
1701 uf != ue; uf++) {
1702 if (uf->old_v_val) {
1703 continue;
1704 }
1705 ulint c = dict_index_get_nth_col(index, uf->field_no)->ind;
1706 *dtuple_get_nth_field(*row, c) = uf->new_val;
1707 }
1708
1709 end_ptr = ptr + mach_read_from_2(ptr);
1710 ptr += 2;
1711
1712 while (ptr != end_ptr) {
1713 dfield_t* dfield;
1714 const byte* field;
1715 ulint field_no;
1716 const dict_col_t* col;
1717 ulint col_no;
1718 ulint len;
1719 ulint orig_len;
1720 bool is_virtual;
1721
1722 field_no = mach_read_next_compressed(&ptr);
1723
1724 is_virtual = (field_no >= REC_MAX_N_FIELDS);
1725
1726 if (is_virtual) {
1727 ptr = trx_undo_read_v_idx(
1728 index->table, ptr, first_v_col, &is_undo_log,
1729 &field_no);
1730 first_v_col = false;
1731 }
1732
1733 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
1734
1735 /* This column could be dropped or no longer indexed */
1736 if (field_no == ULINT_UNDEFINED) {
1737 ut_ad(is_virtual);
1738 continue;
1739 }
1740
1741 if (is_virtual) {
1742 dict_v_col_t* vcol = dict_table_get_nth_v_col(
1743 index->table, field_no);
1744 col = &vcol->m_col;
1745 col_no = dict_col_get_no(col);
1746 dfield = dtuple_get_nth_v_field(*row, vcol->v_pos);
1747 dict_col_copy_type(
1748 &vcol->m_col,
1749 dfield_get_type(dfield));
1750 } else {
1751 col = dict_index_get_nth_col(index, field_no);
1752 col_no = dict_col_get_no(col);
1753 dfield = dtuple_get_nth_field(*row, col_no);
1754 ut_ad(dfield->type.mtype == DATA_MISSING
1755 || dict_col_type_assert_equal(col,
1756 &dfield->type));
1757 ut_ad(dfield->type.mtype == DATA_MISSING
1758 || dfield->len == len
1759 || (len != UNIV_SQL_NULL
1760 && len >= UNIV_EXTERN_STORAGE_FIELD));
1761 dict_col_copy_type(
1762 dict_table_get_nth_col(index->table, col_no),
1763 dfield_get_type(dfield));
1764 }
1765
1766 dfield_set_data(dfield, field, len);
1767
1768 if (len != UNIV_SQL_NULL
1769 && len >= UNIV_EXTERN_STORAGE_FIELD) {
1770 spatial_status_t spatial_status;
1771
1772 /* Decode spatial status. */
1773 spatial_status = static_cast<spatial_status_t>(
1774 (len & SPATIAL_STATUS_MASK)
1775 >> SPATIAL_STATUS_SHIFT);
1776 len &= ~SPATIAL_STATUS_MASK;
1777
1778 /* Keep compatible with 5.7.9 format. */
1779 if (spatial_status == SPATIAL_UNKNOWN) {
1780 spatial_status =
1781 dict_col_get_spatial_status(col);
1782 }
1783
1784 switch (spatial_status) {
1785 case SPATIAL_ONLY:
1786 ut_ad(len - UNIV_EXTERN_STORAGE_FIELD
1787 == DATA_MBR_LEN);
1788 dfield_set_len(
1789 dfield,
1790 len - UNIV_EXTERN_STORAGE_FIELD);
1791 break;
1792
1793 case SPATIAL_MIXED:
1794 dfield_set_len(
1795 dfield,
1796 len - UNIV_EXTERN_STORAGE_FIELD
1797 - DATA_MBR_LEN);
1798 break;
1799
1800 case SPATIAL_NONE:
1801 dfield_set_len(
1802 dfield,
1803 len - UNIV_EXTERN_STORAGE_FIELD);
1804 break;
1805
1806 case SPATIAL_UNKNOWN:
1807 ut_ad(0);
1808 break;
1809 }
1810
1811 dfield_set_ext(dfield);
1812 dfield_set_spatial_status(dfield, spatial_status);
1813
1814 /* If the prefix of this column is indexed,
1815 ensure that enough prefix is stored in the
1816 undo log record. */
1817 if (!ignore_prefix && col->ord_part
1818 && spatial_status != SPATIAL_ONLY) {
1819 ut_a(dfield_get_len(dfield)
1820 >= BTR_EXTERN_FIELD_REF_SIZE);
1821 ut_a(dict_table_has_atomic_blobs(index->table)
1822 || dfield_get_len(dfield)
1823 >= REC_ANTELOPE_MAX_INDEX_COL_LEN
1824 + BTR_EXTERN_FIELD_REF_SIZE);
1825 }
1826 }
1827 }
1828
1829 return(const_cast<byte*>(ptr));
1830}
1831
1832/** Erase the unused undo log page end.
1833@param[in,out] undo_page undo log page
1834@return whether the page contained something */
1835bool
1836trx_undo_erase_page_end(page_t* undo_page)
1837{
1838 ulint first_free;
1839
1840 first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
1841 + TRX_UNDO_PAGE_FREE);
1842 memset(undo_page + first_free, 0,
1843 (srv_page_size - FIL_PAGE_DATA_END) - first_free);
1844
1845 return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
1846}
1847
1848/** Report a RENAME TABLE operation.
1849@param[in,out] trx transaction
1850@param[in] table table that is being renamed
1851@param[in,out] block undo page
1852@param[in,out] mtr mini-transaction
1853@return byte offset of the undo log record
1854@retval 0 in case of failure */
1855static
1856ulint
1857trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
1858 buf_block_t* block, mtr_t* mtr)
1859{
1860 byte* ptr_first_free = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
1861 + block->frame;
1862 ulint first_free = mach_read_from_2(ptr_first_free);
1863 ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
1864 ut_ad(first_free <= srv_page_size);
1865 byte* start = block->frame + first_free;
1866 size_t len = strlen(table->name.m_name);
1867 const size_t fixed = 2 + 1 + 11 + 11 + 2;
1868 ut_ad(len <= NAME_LEN * 2 + 1);
1869 /* The -10 is used in trx_undo_left() */
1870 compile_time_assert((NAME_LEN * 1) * 2 + fixed
1871 + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE
1872 < UNIV_PAGE_SIZE_MIN - 10 - FIL_PAGE_DATA_END);
1873
1874 if (trx_undo_left(block, start) < fixed + len) {
1875 ut_ad(first_free > TRX_UNDO_PAGE_HDR
1876 + TRX_UNDO_PAGE_HDR_SIZE);
1877 return 0;
1878 }
1879
1880 byte* ptr = start + 2;
1881 *ptr++ = TRX_UNDO_RENAME_TABLE;
1882 ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
1883 ptr += mach_u64_write_much_compressed(ptr, table->id);
1884 memcpy(ptr, table->name.m_name, len);
1885 ptr += len;
1886 mach_write_to_2(ptr, first_free);
1887 ptr += 2;
1888 ulint offset = page_offset(ptr);
1889 mach_write_to_2(start, offset);
1890 mach_write_to_2(ptr_first_free, offset);
1891
1892 trx_undof_page_add_undo_rec_log(block, first_free, offset, mtr);
1893 return first_free;
1894}
1895
1896/** Report a RENAME TABLE operation.
1897@param[in,out] trx transaction
1898@param[in] table table that is being renamed
1899@return DB_SUCCESS or error code */
1900dberr_t
1901trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
1902{
1903 ut_ad(!trx->read_only);
1904 ut_ad(trx->id);
1905 ut_ad(!table->is_temporary());
1906
1907 mtr_t mtr;
1908 dberr_t err;
1909 mtr.start();
1910 if (buf_block_t* block = trx_undo_assign(trx, &err, &mtr)) {
1911 trx_undo_t* undo = trx->rsegs.m_redo.undo;
1912 ut_ad(err == DB_SUCCESS);
1913 ut_ad(undo);
1914 for (ut_d(int loop_count = 0);;) {
1915 ut_ad(++loop_count < 2);
1916 ut_ad(undo->last_page_no == block->page.id.page_no());
1917
1918 if (ulint offset = trx_undo_page_report_rename(
1919 trx, table, block, &mtr)) {
1920 undo->withdraw_clock = buf_withdraw_clock;
1921 undo->top_page_no = undo->last_page_no;
1922 undo->top_offset = offset;
1923 undo->top_undo_no = trx->undo_no++;
1924 undo->guess_block = block;
1925 ut_ad(!undo->empty());
1926
1927 err = DB_SUCCESS;
1928 break;
1929 } else {
1930 mtr.commit();
1931 mtr.start();
1932 block = trx_undo_add_page(undo, &mtr);
1933 if (!block) {
1934 err = DB_OUT_OF_FILE_SPACE;
1935 break;
1936 }
1937 }
1938 }
1939
1940 mtr.commit();
1941 }
1942
1943 return err;
1944}
1945
1946/***********************************************************************//**
1947Writes information to an undo log about an insert, update, or a delete marking
1948of a clustered index record. This information is used in a rollback of the
1949transaction and in consistent reads that must look to the history of this
1950transaction.
1951@return DB_SUCCESS or error code */
1952dberr_t
1953trx_undo_report_row_operation(
1954/*==========================*/
1955 que_thr_t* thr, /*!< in: query thread */
1956 dict_index_t* index, /*!< in: clustered index */
1957 const dtuple_t* clust_entry, /*!< in: in the case of an insert,
1958 index entry to insert into the
1959 clustered index; in updates,
1960 may contain a clustered index
1961 record tuple that also contains
1962 virtual columns of the table;
1963 otherwise, NULL */
1964 const upd_t* update, /*!< in: in the case of an update,
1965 the update vector, otherwise NULL */
1966 ulint cmpl_info, /*!< in: compiler info on secondary
1967 index updates */
1968 const rec_t* rec, /*!< in: case of an update or delete
1969 marking, the record in the clustered
1970 index; NULL if insert */
1971 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
1972 roll_ptr_t* roll_ptr) /*!< out: DB_ROLL_PTR to the
1973 undo log record */
1974{
1975 trx_t* trx;
1976 mtr_t mtr;
1977#ifdef UNIV_DEBUG
1978 int loop_count = 0;
1979#endif /* UNIV_DEBUG */
1980
1981 ut_a(dict_index_is_clust(index));
1982 ut_ad(!update || rec);
1983 ut_ad(!rec || rec_offs_validate(rec, index, offsets));
1984 ut_ad(!srv_read_only_mode);
1985
1986 trx = thr_get_trx(thr);
1987 /* This function must not be invoked during rollback
1988 (of a TRX_STATE_PREPARE transaction or otherwise). */
1989 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1990 ut_ad(!trx->in_rollback);
1991
1992 mtr.start();
1993 trx_undo_t** pundo;
1994 trx_rseg_t* rseg;
1995 const bool is_temp = index->table->is_temporary();
1996
1997 if (is_temp) {
1998 mtr.set_log_mode(MTR_LOG_NO_REDO);
1999
2000 rseg = trx->get_temp_rseg();
2001 pundo = &trx->rsegs.m_noredo.undo;
2002 } else {
2003 ut_ad(!trx->read_only);
2004 ut_ad(trx->id);
2005 pundo = &trx->rsegs.m_redo.undo;
2006 rseg = trx->rsegs.m_redo.rseg;
2007 }
2008
2009 dberr_t err;
2010 buf_block_t* undo_block = trx_undo_assign_low(trx, rseg, pundo,
2011 &err, &mtr);
2012 trx_undo_t* undo = *pundo;
2013
2014 ut_ad((err == DB_SUCCESS) == (undo_block != NULL));
2015 if (UNIV_UNLIKELY(undo_block == NULL)) {
2016 goto err_exit;
2017 }
2018
2019 ut_ad(undo != NULL);
2020
2021 do {
2022 ulint offset = !rec
2023 ? trx_undo_page_report_insert(
2024 undo_block, trx, index, clust_entry, &mtr)
2025 : trx_undo_page_report_modify(
2026 undo_block, trx, index, rec, offsets, update,
2027 cmpl_info, clust_entry, &mtr);
2028
2029 if (UNIV_UNLIKELY(offset == 0)) {
2030 if (!trx_undo_erase_page_end(undo_block->frame)) {
2031 /* The record did not fit on an empty
2032 undo page. Discard the freshly allocated
2033 page and return an error. */
2034
2035 /* When we remove a page from an undo
2036 log, this is analogous to a
2037 pessimistic insert in a B-tree, and we
2038 must reserve the counterpart of the
2039 tree latch, which is the rseg
2040 mutex. We must commit the mini-transaction
2041 first, because it may be holding lower-level
2042 latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
2043
2044 mtr.commit();
2045 mtr.start();
2046 if (is_temp) {
2047 mtr.set_log_mode(MTR_LOG_NO_REDO);
2048 }
2049
2050 mutex_enter(&rseg->mutex);
2051 trx_undo_free_last_page(undo, &mtr);
2052 mutex_exit(&rseg->mutex);
2053
2054 err = DB_UNDO_RECORD_TOO_BIG;
2055 goto err_exit;
2056 }
2057
2058 mtr_commit(&mtr);
2059 } else {
2060 /* Success */
2061 undo->withdraw_clock = buf_withdraw_clock;
2062 mtr_commit(&mtr);
2063
2064 undo->top_page_no = undo_block->page.id.page_no();
2065 undo->top_offset = offset;
2066 undo->top_undo_no = trx->undo_no++;
2067 undo->guess_block = undo_block;
2068 ut_ad(!undo->empty());
2069
2070 if (!is_temp) {
2071 const undo_no_t limit = undo->top_undo_no;
2072 /* Determine if this is the first time
2073 when this transaction modifies a
2074 system-versioned column in this table. */
2075 trx_mod_table_time_t& time
2076 = trx->mod_tables.insert(
2077 trx_mod_tables_t::value_type(
2078 index->table, limit))
2079 .first->second;
2080 ut_ad(time.valid(limit));
2081
2082 if (!time.is_versioned()
2083 && index->table->versioned_by_id()
2084 && (!rec /* INSERT */
2085 || (update
2086 && update->affects_versioned()))) {
2087 time.set_versioned(limit);
2088 }
2089 }
2090
2091 *roll_ptr = trx_undo_build_roll_ptr(
2092 !rec, rseg->id, undo->top_page_no, offset);
2093 return(DB_SUCCESS);
2094 }
2095
2096 ut_ad(undo_block->page.id.page_no() == undo->last_page_no);
2097
2098 /* We have to extend the undo log by one page */
2099
2100 ut_ad(++loop_count < 2);
2101 mtr.start();
2102
2103 if (is_temp) {
2104 mtr.set_log_mode(MTR_LOG_NO_REDO);
2105 }
2106
2107 undo_block = trx_undo_add_page(undo, &mtr);
2108
2109 DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
2110 undo_block = NULL;);
2111 } while (UNIV_LIKELY(undo_block != NULL));
2112
2113 ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
2114 DB_OUT_OF_FILE_SPACE,
2115 //ER_INNODB_UNDO_LOG_FULL,
2116 "No more space left over in %s tablespace for allocating UNDO"
2117 " log pages. Please add new data file to the tablespace or"
2118 " check if filesystem is full or enable auto-extension for"
2119 " the tablespace",
2120 undo->rseg->space == fil_system.sys_space
2121 ? "system" : is_temp ? "temporary" : "undo");
2122
2123 /* Did not succeed: out of space */
2124 err = DB_OUT_OF_FILE_SPACE;
2125
2126err_exit:
2127 mtr_commit(&mtr);
2128 return(err);
2129}
2130
2131/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
2132
2133/** Copy an undo record to heap.
2134@param[in] roll_ptr roll pointer to a record that exists
2135@param[in,out] heap memory heap where copied */
2136static
2137trx_undo_rec_t*
2138trx_undo_get_undo_rec_low(
2139 roll_ptr_t roll_ptr,
2140 mem_heap_t* heap)
2141{
2142 trx_undo_rec_t* undo_rec;
2143 ulint rseg_id;
2144 ulint page_no;
2145 ulint offset;
2146 const page_t* undo_page;
2147 trx_rseg_t* rseg;
2148 ibool is_insert;
2149 mtr_t mtr;
2150
2151 trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
2152 &offset);
2153 ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
2154 ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
2155 rseg = trx_sys.rseg_array[rseg_id];
2156 ut_ad(rseg->is_persistent());
2157
2158 mtr_start(&mtr);
2159
2160 undo_page = trx_undo_page_get_s_latched(
2161 page_id_t(rseg->space->id, page_no), &mtr);
2162
2163 undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
2164
2165 mtr_commit(&mtr);
2166
2167 return(undo_rec);
2168}
2169
2170/** Copy an undo record to heap.
2171@param[in] roll_ptr roll pointer to record
2172@param[in,out] heap memory heap where copied
2173@param[in] trx_id id of the trx that generated
2174 the roll pointer: it points to an
2175 undo log of this transaction
2176@param[in] name table name
2177@param[out] undo_rec own: copy of the record
2178@retval true if the undo log has been
2179truncated and we cannot fetch the old version
2180@retval false if the undo log record is available
2181NOTE: the caller must have latches on the clustered index page. */
2182static MY_ATTRIBUTE((warn_unused_result))
2183bool
2184trx_undo_get_undo_rec(
2185 roll_ptr_t roll_ptr,
2186 mem_heap_t* heap,
2187 trx_id_t trx_id,
2188 const table_name_t& name,
2189 trx_undo_rec_t** undo_rec)
2190{
2191 bool missing_history;
2192
2193 rw_lock_s_lock(&purge_sys.latch);
2194
2195 missing_history = purge_sys.view.changes_visible(trx_id, name);
2196 if (!missing_history) {
2197 *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
2198 }
2199
2200 rw_lock_s_unlock(&purge_sys.latch);
2201
2202 return(missing_history);
2203}
2204
2205#ifdef UNIV_DEBUG
2206#define ATTRIB_USED_ONLY_IN_DEBUG
2207#else /* UNIV_DEBUG */
2208#define ATTRIB_USED_ONLY_IN_DEBUG MY_ATTRIBUTE((unused))
2209#endif /* UNIV_DEBUG */
2210
2211/*******************************************************************//**
2212Build a previous version of a clustered index record. The caller must
2213hold a latch on the index page of the clustered index record.
2214@retval true if previous version was built, or if it was an insert
2215or the table has been rebuilt
2216@retval false if the previous version is earlier than purge_view,
2217or being purged, which means that it may have been removed */
2218bool
2219trx_undo_prev_version_build(
2220/*========================*/
2221 const rec_t* index_rec ATTRIB_USED_ONLY_IN_DEBUG,
2222 /*!< in: clustered index record in the
2223 index tree */
2224 mtr_t* index_mtr ATTRIB_USED_ONLY_IN_DEBUG,
2225 /*!< in: mtr which contains the latch to
2226 index_rec page and purge_view */
2227 const rec_t* rec, /*!< in: version of a clustered index record */
2228 dict_index_t* index, /*!< in: clustered index */
2229 ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
2230 mem_heap_t* heap, /*!< in: memory heap from which the memory
2231 needed is allocated */
2232 rec_t** old_vers,/*!< out, own: previous version, or NULL if
2233 rec is the first inserted version, or if
2234 history data has been deleted (an error),
2235 or if the purge COULD have removed the version
2236 though it has not yet done so */
2237 mem_heap_t* v_heap, /* !< in: memory heap used to create vrow
2238 dtuple if it is not yet created. This heap
2239 diffs from "heap" above in that it could be
2240 prebuilt->old_vers_heap for selection */
2241 const dtuple_t**vrow, /*!< out: virtual column info, if any */
2242 ulint v_status)
2243 /*!< in: status determine if it is going
2244 into this function by purge thread or not.
2245 And if we read "after image" of undo log */
2246{
2247 trx_undo_rec_t* undo_rec = NULL;
2248 dtuple_t* entry;
2249 trx_id_t rec_trx_id;
2250 ulint type;
2251 undo_no_t undo_no;
2252 table_id_t table_id;
2253 trx_id_t trx_id;
2254 roll_ptr_t roll_ptr;
2255 upd_t* update;
2256 byte* ptr;
2257 ulint info_bits;
2258 ulint cmpl_info;
2259 bool dummy_extern;
2260 byte* buf;
2261
2262 ut_ad(!index->table->is_temporary());
2263 ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S));
2264 ut_ad(mtr_memo_contains_page_flagged(index_mtr, index_rec,
2265 MTR_MEMO_PAGE_S_FIX
2266 | MTR_MEMO_PAGE_X_FIX));
2267 ut_ad(rec_offs_validate(rec, index, offsets));
2268 ut_a(index->is_primary());
2269
2270 roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
2271
2272 *old_vers = NULL;
2273
2274 if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
2275 /* The record rec is the first inserted version */
2276 return(true);
2277 }
2278
2279 rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
2280
2281 ut_ad(!index->table->skip_alter_undo);
2282
2283 if (trx_undo_get_undo_rec(
2284 roll_ptr, heap, rec_trx_id, index->table->name,
2285 &undo_rec)) {
2286 if (v_status & TRX_UNDO_PREV_IN_PURGE) {
2287 /* We are fetching the record being purged */
2288 undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
2289 } else {
2290 /* The undo record may already have been purged,
2291 during purge or semi-consistent read. */
2292 return(false);
2293 }
2294 }
2295
2296 ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
2297 &dummy_extern, &undo_no, &table_id);
2298
2299 if (table_id != index->table->id) {
2300 /* The table should have been rebuilt, but purge has
2301 not yet removed the undo log records for the
2302 now-dropped old table (table_id). */
2303 return(true);
2304 }
2305
2306 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
2307 &info_bits);
2308
2309 /* (a) If a clustered index record version is such that the
2310 trx id stamp in it is bigger than purge_sys.view, then the
2311 BLOBs in that version are known to exist (the purge has not
2312 progressed that far);
2313
2314 (b) if the version is the first version such that trx id in it
2315 is less than purge_sys.view, and it is not delete-marked,
2316 then the BLOBs in that version are known to exist (the purge
2317 cannot have purged the BLOBs referenced by that version
2318 yet).
2319
2320 This function does not fetch any BLOBs. The callers might, by
2321 possibly invoking row_ext_create() via row_build(). However,
2322 they should have all needed information in the *old_vers
2323 returned by this function. This is because *old_vers is based
2324 on the transaction undo log records. The function
2325 trx_undo_page_fetch_ext() will write BLOB prefixes to the
2326 transaction undo log that are at least as long as the longest
2327 possible column prefix in a secondary index. Thus, secondary
2328 index entries for *old_vers can be constructed without
2329 dereferencing any BLOB pointers. */
2330
2331 ptr = trx_undo_rec_skip_row_ref(ptr, index);
2332
2333 ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
2334 roll_ptr, info_bits,
2335 heap, &update);
2336 ut_a(ptr);
2337
2338 if (row_upd_changes_field_size_or_external(index, offsets, update)) {
2339 ulint n_ext;
2340
2341 /* We should confirm the existence of disowned external data,
2342 if the previous version record is delete marked. If the trx_id
2343 of the previous record is seen by purge view, we should treat
2344 it as missing history, because the disowned external data
2345 might be purged already.
2346
2347 The inherited external data (BLOBs) can be freed (purged)
2348 after trx_id was committed, provided that no view was started
2349 before trx_id. If the purge view can see the committed
2350 delete-marked record by trx_id, no transactions need to access
2351 the BLOB. */
2352
2353 /* the row_upd_changes_disowned_external(update) call could be
2354 omitted, but the synchronization on purge_sys.latch is likely
2355 more expensive. */
2356
2357 if ((update->info_bits & REC_INFO_DELETED_FLAG)
2358 && row_upd_changes_disowned_external(update)) {
2359 bool missing_extern;
2360
2361 rw_lock_s_lock(&purge_sys.latch);
2362
2363 missing_extern = purge_sys.view.changes_visible(
2364 trx_id, index->table->name);
2365
2366 rw_lock_s_unlock(&purge_sys.latch);
2367
2368 if (missing_extern) {
2369 /* treat as a fresh insert, not to
2370 cause assertion error at the caller. */
2371 return(true);
2372 }
2373 }
2374
2375 /* We have to set the appropriate extern storage bits in the
2376 old version of the record: the extern bits in rec for those
2377 fields that update does NOT update, as well as the bits for
2378 those fields that update updates to become externally stored
2379 fields. Store the info: */
2380
2381 entry = row_rec_to_index_entry(
2382 rec, index, offsets, &n_ext, heap);
2383 n_ext += btr_push_update_extern_fields(entry, update, heap);
2384 /* The page containing the clustered index record
2385 corresponding to entry is latched in mtr. Thus the
2386 following call is safe. */
2387 row_upd_index_replace_new_col_vals(entry, index, update, heap);
2388
2389 buf = static_cast<byte*>(mem_heap_alloc(
2390 heap, rec_get_converted_size(index, entry, n_ext)));
2391
2392 *old_vers = rec_convert_dtuple_to_rec(buf, index,
2393 entry, n_ext);
2394 } else {
2395 buf = static_cast<byte*>(mem_heap_alloc(
2396 heap, rec_offs_size(offsets)));
2397
2398 *old_vers = rec_copy(buf, rec, offsets);
2399 rec_offs_make_valid(*old_vers, index, true, offsets);
2400 row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
2401 }
2402
2403 /* Set the old value (which is the after image of an update) in the
2404 update vector to dtuple vrow */
2405 if (v_status & TRX_UNDO_GET_OLD_V_VALUE) {
2406 row_upd_replace_vcol((dtuple_t*)*vrow, index->table, update,
2407 false, NULL, NULL);
2408 }
2409
2410#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
2411 ut_a(!rec_offs_any_null_extern(
2412 *old_vers, rec_get_offsets(*old_vers, index, NULL, true,
2413 ULINT_UNDEFINED, &heap)));
2414#endif // defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
2415
2416 if (vrow && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
2417 if (!(*vrow)) {
2418 *vrow = dtuple_create_with_vcol(
2419 v_heap ? v_heap : heap,
2420 dict_table_get_n_cols(index->table),
2421 dict_table_get_n_v_cols(index->table));
2422 dtuple_init_v_fld(*vrow);
2423 }
2424
2425 ut_ad(index->table->n_v_cols);
2426 trx_undo_read_v_cols(index->table, ptr, *vrow,
2427 v_status & TRX_UNDO_PREV_IN_PURGE);
2428 }
2429
2430 return(true);
2431}
2432
2433/** Read virtual column value from undo log
2434@param[in] table the table
2435@param[in] ptr undo log pointer
2436@param[in,out] row the dtuple to fill
2437@param[in] in_purge whether this is called by purge */
2438void
2439trx_undo_read_v_cols(
2440 const dict_table_t* table,
2441 const byte* ptr,
2442 const dtuple_t* row,
2443 bool in_purge)
2444{
2445 const byte* end_ptr;
2446 bool first_v_col = true;
2447 bool is_undo_log = true;
2448
2449 end_ptr = ptr + mach_read_from_2(ptr);
2450 ptr += 2;
2451 while (ptr < end_ptr) {
2452 dfield_t* dfield;
2453 const byte* field;
2454 ulint field_no;
2455 ulint len;
2456 ulint orig_len;
2457 bool is_virtual;
2458
2459 field_no = mach_read_next_compressed(
2460 const_cast<const byte**>(&ptr));
2461
2462 is_virtual = (field_no >= REC_MAX_N_FIELDS);
2463
2464 if (is_virtual) {
2465 ptr = trx_undo_read_v_idx(
2466 table, ptr, first_v_col, &is_undo_log,
2467 &field_no);
2468 first_v_col = false;
2469 }
2470
2471 ptr = trx_undo_rec_get_col_val(
2472 ptr, &field, &len, &orig_len);
2473
2474 /* The virtual column is no longer indexed or does not exist.
2475 This needs to put after trx_undo_rec_get_col_val() so the
2476 undo ptr advances */
2477 if (field_no == ULINT_UNDEFINED) {
2478 ut_ad(is_virtual);
2479 continue;
2480 }
2481
2482 if (is_virtual) {
2483 dict_v_col_t* vcol = dict_table_get_nth_v_col(
2484 table, field_no);
2485
2486 dfield = dtuple_get_nth_v_field(row, vcol->v_pos);
2487
2488 if (!in_purge
2489 || dfield_get_type(dfield)->mtype == DATA_MISSING) {
2490 dict_col_copy_type(
2491 &vcol->m_col,
2492 dfield_get_type(dfield));
2493 dfield_set_data(dfield, field, len);
2494 }
2495 }
2496 }
2497
2498 ut_ad(ptr == end_ptr);
2499}
2500