1/*****************************************************************************
2
3Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file mtr/mtr0log.cc
22Mini-transaction log routines
23
24Created 12/7/1995 Heikki Tuuri
25*******************************************************/
26
27#include "mtr0log.h"
28#include "buf0buf.h"
29#include "dict0dict.h"
30#include "log0recv.h"
31#include "page0page.h"
32#include "buf0dblwr.h"
33#include "dict0boot.h"
34
35/********************************************************//**
36Catenates n bytes to the mtr log. */
37void
38mlog_catenate_string(
39/*=================*/
40 mtr_t* mtr, /*!< in: mtr */
41 const byte* str, /*!< in: string to write */
42 ulint len) /*!< in: string length */
43{
44 if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
45
46 return;
47 }
48
49 mtr->get_log()->push(str, ib_uint32_t(len));
50}
51
52/********************************************************//**
53Writes the initial part of a log record consisting of one-byte item
54type and four-byte space and page numbers. Also pushes info
55to the mtr memo that a buffer page has been modified. */
56void
57mlog_write_initial_log_record(
58/*==========================*/
59 const byte* ptr, /*!< in: pointer to (inside) a buffer
60 frame holding the file page where
61 modification is made */
62 mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */
63 mtr_t* mtr) /*!< in: mini-transaction handle */
64{
65 byte* log_ptr;
66
67 ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
68 ut_ad(type > MLOG_8BYTES);
69
70 log_ptr = mlog_open(mtr, 11);
71
72 /* If no logging is requested, we may return now */
73 if (log_ptr == NULL) {
74
75 return;
76 }
77
78 log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
79
80 mlog_close(mtr, log_ptr);
81}
82
83/********************************************************//**
84Parses an initial log record written by mlog_write_initial_log_record.
85@return parsed record end, NULL if not a complete record */
86byte*
87mlog_parse_initial_log_record(
88/*==========================*/
89 const byte* ptr, /*!< in: buffer */
90 const byte* end_ptr,/*!< in: buffer end */
91 mlog_id_t* type, /*!< out: log record type: MLOG_1BYTE, ... */
92 ulint* space, /*!< out: space id */
93 ulint* page_no)/*!< out: page number */
94{
95 if (end_ptr < ptr + 1) {
96
97 return(NULL);
98 }
99
100 *type = mlog_id_t(*ptr & ~MLOG_SINGLE_REC_FLAG);
101 ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type));
102
103 ptr++;
104
105 if (end_ptr < ptr + 2) {
106
107 return(NULL);
108 }
109
110 *space = mach_parse_compressed(&ptr, end_ptr);
111
112 if (ptr != NULL) {
113 *page_no = mach_parse_compressed(&ptr, end_ptr);
114 }
115
116 return(const_cast<byte*>(ptr));
117}
118
119/********************************************************//**
120Parses a log record written by mlog_write_ulint or mlog_write_ull.
121@return parsed record end, NULL if not a complete record or a corrupt record */
122byte*
123mlog_parse_nbytes(
124/*==============*/
125 mlog_id_t type, /*!< in: log record type: MLOG_1BYTE, ... */
126 const byte* ptr, /*!< in: buffer */
127 const byte* end_ptr,/*!< in: buffer end */
128 byte* page, /*!< in: page where to apply the log
129 record, or NULL */
130 void* page_zip)/*!< in/out: compressed page, or NULL */
131{
132 ulint offset;
133 ulint val;
134 ib_uint64_t dval;
135
136 ut_a(type <= MLOG_8BYTES);
137 ut_a(!page || !page_zip
138 || !fil_page_index_page_check(page));
139 if (end_ptr < ptr + 2) {
140
141 return(NULL);
142 }
143
144 offset = mach_read_from_2(ptr);
145 ptr += 2;
146
147 if (offset >= srv_page_size) {
148 recv_sys->found_corrupt_log = TRUE;
149
150 return(NULL);
151 }
152
153 if (type == MLOG_8BYTES) {
154 dval = mach_u64_parse_compressed(&ptr, end_ptr);
155
156 if (ptr == NULL) {
157
158 return(NULL);
159 }
160
161 if (page) {
162 if (page_zip) {
163 mach_write_to_8
164 (((page_zip_des_t*) page_zip)->data
165 + offset, dval);
166 }
167 mach_write_to_8(page + offset, dval);
168 }
169
170 return(const_cast<byte*>(ptr));
171 }
172
173 val = mach_parse_compressed(&ptr, end_ptr);
174
175 if (ptr == NULL) {
176
177 return(NULL);
178 }
179
180 switch (type) {
181 case MLOG_1BYTE:
182 if (val > 0xFFUL) {
183 goto corrupt;
184 }
185 if (page) {
186 if (page_zip) {
187 mach_write_to_1
188 (((page_zip_des_t*) page_zip)->data
189 + offset, val);
190 }
191 mach_write_to_1(page + offset, val);
192 }
193 break;
194 case MLOG_2BYTES:
195 if (val > 0xFFFFUL) {
196 goto corrupt;
197 }
198 if (page) {
199 if (page_zip) {
200 mach_write_to_2
201 (((page_zip_des_t*) page_zip)->data
202 + offset, val);
203 }
204 mach_write_to_2(page + offset, val);
205 }
206
207 break;
208 case MLOG_4BYTES:
209 if (page) {
210 if (page_zip) {
211 mach_write_to_4
212 (((page_zip_des_t*) page_zip)->data
213 + offset, val);
214 }
215 mach_write_to_4(page + offset, val);
216 }
217 break;
218 default:
219 corrupt:
220 recv_sys->found_corrupt_log = TRUE;
221 ptr = NULL;
222 }
223
224 return(const_cast<byte*>(ptr));
225}
226
227/********************************************************//**
228Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
229record to the mini-transaction log if mtr is not NULL. */
230void
231mlog_write_ulint(
232/*=============*/
233 byte* ptr, /*!< in: pointer where to write */
234 ulint val, /*!< in: value to write */
235 mlog_id_t type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
236 mtr_t* mtr) /*!< in: mini-transaction handle */
237{
238 switch (type) {
239 case MLOG_1BYTE:
240 mach_write_to_1(ptr, val);
241 break;
242 case MLOG_2BYTES:
243 mach_write_to_2(ptr, val);
244 break;
245 case MLOG_4BYTES:
246 mach_write_to_4(ptr, val);
247 break;
248 default:
249 ut_error;
250 }
251
252 if (mtr != 0) {
253 byte* log_ptr = mlog_open(mtr, 11 + 2 + 5);
254
255 /* If no logging is requested, we may return now */
256
257 if (log_ptr != 0) {
258
259 log_ptr = mlog_write_initial_log_record_fast(
260 ptr, type, log_ptr, mtr);
261
262 mach_write_to_2(log_ptr, page_offset(ptr));
263 log_ptr += 2;
264
265 log_ptr += mach_write_compressed(log_ptr, val);
266
267 mlog_close(mtr, log_ptr);
268 }
269 }
270}
271
272/********************************************************//**
273Writes 8 bytes to a file page. Writes the corresponding log
274record to the mini-transaction log, only if mtr is not NULL */
275void
276mlog_write_ull(
277/*===========*/
278 byte* ptr, /*!< in: pointer where to write */
279 ib_uint64_t val, /*!< in: value to write */
280 mtr_t* mtr) /*!< in: mini-transaction handle */
281{
282 mach_write_to_8(ptr, val);
283
284 if (mtr != 0) {
285 byte* log_ptr = mlog_open(mtr, 11 + 2 + 9);
286
287 /* If no logging is requested, we may return now */
288 if (log_ptr != 0) {
289
290 log_ptr = mlog_write_initial_log_record_fast(
291 ptr, MLOG_8BYTES, log_ptr, mtr);
292
293 mach_write_to_2(log_ptr, page_offset(ptr));
294 log_ptr += 2;
295
296 log_ptr += mach_u64_write_compressed(log_ptr, val);
297
298 mlog_close(mtr, log_ptr);
299 }
300 }
301}
302
303/********************************************************//**
304Writes a string to a file page buffered in the buffer pool. Writes the
305corresponding log record to the mini-transaction log. */
306void
307mlog_write_string(
308/*==============*/
309 byte* ptr, /*!< in: pointer where to write */
310 const byte* str, /*!< in: string to write */
311 ulint len, /*!< in: string length */
312 mtr_t* mtr) /*!< in: mini-transaction handle */
313{
314 ut_ad(ptr && mtr);
315 ut_a(len < srv_page_size);
316
317 memcpy(ptr, str, len);
318
319 mlog_log_string(ptr, len, mtr);
320}
321
322/********************************************************//**
323Logs a write of a string to a file page buffered in the buffer pool.
324Writes the corresponding log record to the mini-transaction log. */
325void
326mlog_log_string(
327/*============*/
328 byte* ptr, /*!< in: pointer written to */
329 ulint len, /*!< in: string length */
330 mtr_t* mtr) /*!< in: mini-transaction handle */
331{
332 byte* log_ptr;
333
334 ut_ad(ptr && mtr);
335 ut_ad(len <= srv_page_size);
336
337 log_ptr = mlog_open(mtr, 30);
338
339 /* If no logging is requested, we may return now */
340 if (log_ptr == NULL) {
341
342 return;
343 }
344
345 log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
346 log_ptr, mtr);
347 mach_write_to_2(log_ptr, page_offset(ptr));
348 log_ptr += 2;
349
350 mach_write_to_2(log_ptr, len);
351 log_ptr += 2;
352
353 mlog_close(mtr, log_ptr);
354
355 mlog_catenate_string(mtr, ptr, len);
356}
357
358/********************************************************//**
359Parses a log record written by mlog_write_string.
360@return parsed record end, NULL if not a complete record */
361byte*
362mlog_parse_string(
363/*==============*/
364 byte* ptr, /*!< in: buffer */
365 byte* end_ptr,/*!< in: buffer end */
366 byte* page, /*!< in: page where to apply the log record, or NULL */
367 void* page_zip)/*!< in/out: compressed page, or NULL */
368{
369 ulint offset;
370 ulint len;
371
372 ut_a(!page || !page_zip
373 || (fil_page_get_type(page) != FIL_PAGE_INDEX
374 && fil_page_get_type(page) != FIL_PAGE_RTREE));
375
376 if (end_ptr < ptr + 4) {
377
378 return(NULL);
379 }
380
381 offset = mach_read_from_2(ptr);
382 ptr += 2;
383 len = mach_read_from_2(ptr);
384 ptr += 2;
385
386 if (offset >= srv_page_size || len + offset > srv_page_size) {
387 recv_sys->found_corrupt_log = TRUE;
388
389 return(NULL);
390 }
391
392 if (end_ptr < ptr + len) {
393
394 return(NULL);
395 }
396
397 if (page) {
398 if (page_zip) {
399 memcpy(((page_zip_des_t*) page_zip)->data
400 + offset, ptr, len);
401 }
402 memcpy(page + offset, ptr, len);
403 }
404
405 return(ptr + len);
406}
407
408/********************************************************//**
409Opens a buffer for mlog, writes the initial log record and,
410if needed, the field lengths of an index.
411@return buffer, NULL if log mode MTR_LOG_NONE */
412byte*
413mlog_open_and_write_index(
414/*======================*/
415 mtr_t* mtr, /*!< in: mtr */
416 const byte* rec, /*!< in: index record or page */
417 const dict_index_t* index, /*!< in: record descriptor */
418 mlog_id_t type, /*!< in: log item type */
419 ulint size) /*!< in: requested buffer size in bytes
420 (if 0, calls mlog_close() and
421 returns NULL) */
422{
423 byte* log_ptr;
424 const byte* log_start;
425 const byte* log_end;
426
427 ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
428
429 mtr->set_modified();
430 switch (mtr->get_log_mode()) {
431 case MTR_LOG_NONE:
432 case MTR_LOG_NO_REDO:
433 return NULL;
434 case MTR_LOG_SHORT_INSERTS:
435 ut_ad(0);
436 /* fall through */
437 case MTR_LOG_ALL:
438 break;
439 }
440
441 if (!page_rec_is_comp(rec)) {
442 log_start = log_ptr = mtr->get_log()->open(11 + size);
443 log_ptr = mlog_write_initial_log_record_fast(rec, type,
444 log_ptr, mtr);
445 log_end = log_ptr + 11 + size;
446 } else {
447 ulint i;
448 bool is_instant = index->is_instant();
449 ulint n = dict_index_get_n_fields(index);
450 ulint total = 11 + (is_instant ? 2 : 0) + size + (n + 2) * 2;
451 ulint alloc = std::min(total,
452 ulint(mtr_buf_t::MAX_DATA_SIZE));
453
454 const bool is_leaf = page_is_leaf(page_align(rec));
455
456 /* For spatial index, on non-leaf page, we just keep
457 2 fields, MBR and page no. */
458 if (!is_leaf && dict_index_is_spatial(index)) {
459 n = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
460 }
461
462 log_start = log_ptr = mtr->get_log()->open(alloc);
463 log_end = log_ptr + alloc;
464
465 log_ptr = mlog_write_initial_log_record_fast(
466 rec, type, log_ptr, mtr);
467
468 if (is_instant) {
469 // marked as instant index
470 mach_write_to_2(log_ptr, n | 0x8000);
471
472 log_ptr += 2;
473
474 // record the n_core_fields
475 mach_write_to_2(log_ptr, index->n_core_fields);
476 } else {
477 mach_write_to_2(log_ptr, n);
478 }
479
480 log_ptr += 2;
481 mach_write_to_2(
482 log_ptr, is_leaf
483 ? dict_index_get_n_unique_in_tree(index)
484 : dict_index_get_n_unique_in_tree_nonleaf(index));
485 log_ptr += 2;
486
487 for (i = 0; i < n; i++) {
488 dict_field_t* field;
489 const dict_col_t* col;
490 ulint len;
491
492 field = dict_index_get_nth_field(index, i);
493 col = dict_field_get_col(field);
494 len = field->fixed_len;
495 ut_ad(len < 0x7fff);
496 if (len == 0
497 && (DATA_BIG_COL(col))) {
498 /* variable-length field
499 with maximum length > 255 */
500 len = 0x7fff;
501 }
502 if (col->prtype & DATA_NOT_NULL) {
503 len |= 0x8000;
504 }
505 if (log_ptr + 2 > log_end) {
506 mlog_close(mtr, log_ptr);
507 ut_a(total > ulint(log_ptr - log_start));
508 total -= ulint(log_ptr - log_start);
509 alloc = std::min(
510 total,
511 ulint(mtr_buf_t::MAX_DATA_SIZE));
512
513 log_start = log_ptr = mtr->get_log()->open(
514 alloc);
515 log_end = log_ptr + alloc;
516 }
517 mach_write_to_2(log_ptr, len);
518 log_ptr += 2;
519 }
520 }
521 if (size == 0) {
522 mlog_close(mtr, log_ptr);
523 log_ptr = NULL;
524 } else if (log_ptr + size > log_end) {
525 mlog_close(mtr, log_ptr);
526 log_ptr = mlog_open(mtr, size);
527 }
528 return(log_ptr);
529}
530
531/********************************************************//**
532Parses a log record written by mlog_open_and_write_index.
533@return parsed record end, NULL if not a complete record */
534byte*
535mlog_parse_index(
536/*=============*/
537 byte* ptr, /*!< in: buffer */
538 const byte* end_ptr,/*!< in: buffer end */
539 ibool comp, /*!< in: TRUE=compact row format */
540 dict_index_t** index) /*!< out, own: dummy index */
541{
542 ulint i, n, n_uniq;
543 dict_table_t* table;
544 dict_index_t* ind;
545 ulint n_core_fields = 0;
546
547 ut_ad(comp == FALSE || comp == TRUE);
548
549 if (comp) {
550 if (end_ptr < ptr + 4) {
551 return(NULL);
552 }
553 n = mach_read_from_2(ptr);
554 ptr += 2;
555 if (n & 0x8000) { /* record after instant ADD COLUMN */
556 n &= 0x7FFF;
557
558 n_core_fields = mach_read_from_2(ptr);
559
560 if (!n_core_fields || n_core_fields > n) {
561 recv_sys->found_corrupt_log = TRUE;
562 return(NULL);
563 }
564
565 ptr += 2;
566
567 if (end_ptr < ptr + 2) {
568 return(NULL);
569 }
570 }
571
572 n_uniq = mach_read_from_2(ptr);
573 ptr += 2;
574 ut_ad(n_uniq <= n);
575 if (end_ptr < ptr + n * 2) {
576 return(NULL);
577 }
578 } else {
579 n = n_uniq = 1;
580 }
581 table = dict_mem_table_create("LOG_DUMMY", NULL, n, 0,
582 comp ? DICT_TF_COMPACT : 0, 0);
583 ind = dict_mem_index_create(table, "LOG_DUMMY", 0, n);
584 ind->n_uniq = (unsigned int) n_uniq;
585 if (n_uniq != n) {
586 ut_a(n_uniq + DATA_ROLL_PTR <= n);
587 ind->type = DICT_CLUSTERED;
588 }
589 if (comp) {
590 for (i = 0; i < n; i++) {
591 ulint len = mach_read_from_2(ptr);
592 ptr += 2;
593 /* The high-order bit of len is the NOT NULL flag;
594 the rest is 0 or 0x7fff for variable-length fields,
595 and 1..0x7ffe for fixed-length fields. */
596 dict_mem_table_add_col(
597 table, NULL, NULL,
598 ((len + 1) & 0x7fff) <= 1
599 ? DATA_BINARY : DATA_FIXBINARY,
600 len & 0x8000 ? DATA_NOT_NULL : 0,
601 len & 0x7fff);
602
603 dict_index_add_col(ind, table,
604 dict_table_get_nth_col(table, i),
605 0);
606 }
607 dict_table_add_system_columns(table, table->heap);
608 if (n_uniq != n) {
609 /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */
610 ut_a(DATA_TRX_ID_LEN
611 == dict_index_get_nth_col(ind, DATA_TRX_ID - 1
612 + n_uniq)->len);
613 ut_a(DATA_ROLL_PTR_LEN
614 == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1
615 + n_uniq)->len);
616 ind->fields[DATA_TRX_ID - 1 + n_uniq].col
617 = &table->cols[n + DATA_TRX_ID];
618 ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
619 = &table->cols[n + DATA_ROLL_PTR];
620 }
621
622 ut_ad(table->n_cols == table->n_def);
623
624 if (n_core_fields) {
625 for (i = n_core_fields; i < n; i++) {
626 ind->fields[i].col->def_val.len
627 = UNIV_SQL_NULL;
628 }
629 ind->n_core_fields = n_core_fields;
630 ind->n_core_null_bytes = UT_BITS_IN_BYTES(
631 ind->get_n_nullable(n_core_fields));
632 } else {
633 ind->n_core_null_bytes = UT_BITS_IN_BYTES(
634 unsigned(ind->n_nullable));
635 ind->n_core_fields = ind->n_fields;
636 }
637 }
638 /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
639 ind->cached = TRUE;
640 ut_d(ind->is_dummy = true);
641 *index = ind;
642 return(ptr);
643}
644