1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file mtr/mtr0log.cc |
22 | Mini-transaction log routines |
23 | |
24 | Created 12/7/1995 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #include "mtr0log.h" |
28 | #include "buf0buf.h" |
29 | #include "dict0dict.h" |
30 | #include "log0recv.h" |
31 | #include "page0page.h" |
32 | #include "buf0dblwr.h" |
33 | #include "dict0boot.h" |
34 | |
35 | /********************************************************//** |
36 | Catenates n bytes to the mtr log. */ |
37 | void |
38 | mlog_catenate_string( |
39 | /*=================*/ |
40 | mtr_t* mtr, /*!< in: mtr */ |
41 | const byte* str, /*!< in: string to write */ |
42 | ulint len) /*!< in: string length */ |
43 | { |
44 | if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { |
45 | |
46 | return; |
47 | } |
48 | |
49 | mtr->get_log()->push(str, ib_uint32_t(len)); |
50 | } |
51 | |
52 | /********************************************************//** |
53 | Writes the initial part of a log record consisting of one-byte item |
54 | type and four-byte space and page numbers. Also pushes info |
55 | to the mtr memo that a buffer page has been modified. */ |
56 | void |
57 | mlog_write_initial_log_record( |
58 | /*==========================*/ |
59 | const byte* ptr, /*!< in: pointer to (inside) a buffer |
60 | frame holding the file page where |
61 | modification is made */ |
62 | mlog_id_t type, /*!< in: log item type: MLOG_1BYTE, ... */ |
63 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
64 | { |
65 | byte* log_ptr; |
66 | |
67 | ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); |
68 | ut_ad(type > MLOG_8BYTES); |
69 | |
70 | log_ptr = mlog_open(mtr, 11); |
71 | |
72 | /* If no logging is requested, we may return now */ |
73 | if (log_ptr == NULL) { |
74 | |
75 | return; |
76 | } |
77 | |
78 | log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr); |
79 | |
80 | mlog_close(mtr, log_ptr); |
81 | } |
82 | |
83 | /********************************************************//** |
84 | Parses an initial log record written by mlog_write_initial_log_record. |
85 | @return parsed record end, NULL if not a complete record */ |
86 | byte* |
87 | mlog_parse_initial_log_record( |
88 | /*==========================*/ |
89 | const byte* ptr, /*!< in: buffer */ |
90 | const byte* end_ptr,/*!< in: buffer end */ |
91 | mlog_id_t* type, /*!< out: log record type: MLOG_1BYTE, ... */ |
92 | ulint* space, /*!< out: space id */ |
93 | ulint* page_no)/*!< out: page number */ |
94 | { |
95 | if (end_ptr < ptr + 1) { |
96 | |
97 | return(NULL); |
98 | } |
99 | |
100 | *type = mlog_id_t(*ptr & ~MLOG_SINGLE_REC_FLAG); |
101 | ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type)); |
102 | |
103 | ptr++; |
104 | |
105 | if (end_ptr < ptr + 2) { |
106 | |
107 | return(NULL); |
108 | } |
109 | |
110 | *space = mach_parse_compressed(&ptr, end_ptr); |
111 | |
112 | if (ptr != NULL) { |
113 | *page_no = mach_parse_compressed(&ptr, end_ptr); |
114 | } |
115 | |
116 | return(const_cast<byte*>(ptr)); |
117 | } |
118 | |
119 | /********************************************************//** |
120 | Parses a log record written by mlog_write_ulint or mlog_write_ull. |
121 | @return parsed record end, NULL if not a complete record or a corrupt record */ |
122 | byte* |
123 | mlog_parse_nbytes( |
124 | /*==============*/ |
125 | mlog_id_t type, /*!< in: log record type: MLOG_1BYTE, ... */ |
126 | const byte* ptr, /*!< in: buffer */ |
127 | const byte* end_ptr,/*!< in: buffer end */ |
128 | byte* page, /*!< in: page where to apply the log |
129 | record, or NULL */ |
130 | void* page_zip)/*!< in/out: compressed page, or NULL */ |
131 | { |
132 | ulint offset; |
133 | ulint val; |
134 | ib_uint64_t dval; |
135 | |
136 | ut_a(type <= MLOG_8BYTES); |
137 | ut_a(!page || !page_zip |
138 | || !fil_page_index_page_check(page)); |
139 | if (end_ptr < ptr + 2) { |
140 | |
141 | return(NULL); |
142 | } |
143 | |
144 | offset = mach_read_from_2(ptr); |
145 | ptr += 2; |
146 | |
147 | if (offset >= srv_page_size) { |
148 | recv_sys->found_corrupt_log = TRUE; |
149 | |
150 | return(NULL); |
151 | } |
152 | |
153 | if (type == MLOG_8BYTES) { |
154 | dval = mach_u64_parse_compressed(&ptr, end_ptr); |
155 | |
156 | if (ptr == NULL) { |
157 | |
158 | return(NULL); |
159 | } |
160 | |
161 | if (page) { |
162 | if (page_zip) { |
163 | mach_write_to_8 |
164 | (((page_zip_des_t*) page_zip)->data |
165 | + offset, dval); |
166 | } |
167 | mach_write_to_8(page + offset, dval); |
168 | } |
169 | |
170 | return(const_cast<byte*>(ptr)); |
171 | } |
172 | |
173 | val = mach_parse_compressed(&ptr, end_ptr); |
174 | |
175 | if (ptr == NULL) { |
176 | |
177 | return(NULL); |
178 | } |
179 | |
180 | switch (type) { |
181 | case MLOG_1BYTE: |
182 | if (val > 0xFFUL) { |
183 | goto corrupt; |
184 | } |
185 | if (page) { |
186 | if (page_zip) { |
187 | mach_write_to_1 |
188 | (((page_zip_des_t*) page_zip)->data |
189 | + offset, val); |
190 | } |
191 | mach_write_to_1(page + offset, val); |
192 | } |
193 | break; |
194 | case MLOG_2BYTES: |
195 | if (val > 0xFFFFUL) { |
196 | goto corrupt; |
197 | } |
198 | if (page) { |
199 | if (page_zip) { |
200 | mach_write_to_2 |
201 | (((page_zip_des_t*) page_zip)->data |
202 | + offset, val); |
203 | } |
204 | mach_write_to_2(page + offset, val); |
205 | } |
206 | |
207 | break; |
208 | case MLOG_4BYTES: |
209 | if (page) { |
210 | if (page_zip) { |
211 | mach_write_to_4 |
212 | (((page_zip_des_t*) page_zip)->data |
213 | + offset, val); |
214 | } |
215 | mach_write_to_4(page + offset, val); |
216 | } |
217 | break; |
218 | default: |
219 | corrupt: |
220 | recv_sys->found_corrupt_log = TRUE; |
221 | ptr = NULL; |
222 | } |
223 | |
224 | return(const_cast<byte*>(ptr)); |
225 | } |
226 | |
227 | /********************************************************//** |
228 | Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log |
229 | record to the mini-transaction log if mtr is not NULL. */ |
230 | void |
231 | mlog_write_ulint( |
232 | /*=============*/ |
233 | byte* ptr, /*!< in: pointer where to write */ |
234 | ulint val, /*!< in: value to write */ |
235 | mlog_id_t type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ |
236 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
237 | { |
238 | switch (type) { |
239 | case MLOG_1BYTE: |
240 | mach_write_to_1(ptr, val); |
241 | break; |
242 | case MLOG_2BYTES: |
243 | mach_write_to_2(ptr, val); |
244 | break; |
245 | case MLOG_4BYTES: |
246 | mach_write_to_4(ptr, val); |
247 | break; |
248 | default: |
249 | ut_error; |
250 | } |
251 | |
252 | if (mtr != 0) { |
253 | byte* log_ptr = mlog_open(mtr, 11 + 2 + 5); |
254 | |
255 | /* If no logging is requested, we may return now */ |
256 | |
257 | if (log_ptr != 0) { |
258 | |
259 | log_ptr = mlog_write_initial_log_record_fast( |
260 | ptr, type, log_ptr, mtr); |
261 | |
262 | mach_write_to_2(log_ptr, page_offset(ptr)); |
263 | log_ptr += 2; |
264 | |
265 | log_ptr += mach_write_compressed(log_ptr, val); |
266 | |
267 | mlog_close(mtr, log_ptr); |
268 | } |
269 | } |
270 | } |
271 | |
272 | /********************************************************//** |
273 | Writes 8 bytes to a file page. Writes the corresponding log |
274 | record to the mini-transaction log, only if mtr is not NULL */ |
275 | void |
276 | mlog_write_ull( |
277 | /*===========*/ |
278 | byte* ptr, /*!< in: pointer where to write */ |
279 | ib_uint64_t val, /*!< in: value to write */ |
280 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
281 | { |
282 | mach_write_to_8(ptr, val); |
283 | |
284 | if (mtr != 0) { |
285 | byte* log_ptr = mlog_open(mtr, 11 + 2 + 9); |
286 | |
287 | /* If no logging is requested, we may return now */ |
288 | if (log_ptr != 0) { |
289 | |
290 | log_ptr = mlog_write_initial_log_record_fast( |
291 | ptr, MLOG_8BYTES, log_ptr, mtr); |
292 | |
293 | mach_write_to_2(log_ptr, page_offset(ptr)); |
294 | log_ptr += 2; |
295 | |
296 | log_ptr += mach_u64_write_compressed(log_ptr, val); |
297 | |
298 | mlog_close(mtr, log_ptr); |
299 | } |
300 | } |
301 | } |
302 | |
303 | /********************************************************//** |
304 | Writes a string to a file page buffered in the buffer pool. Writes the |
305 | corresponding log record to the mini-transaction log. */ |
306 | void |
307 | mlog_write_string( |
308 | /*==============*/ |
309 | byte* ptr, /*!< in: pointer where to write */ |
310 | const byte* str, /*!< in: string to write */ |
311 | ulint len, /*!< in: string length */ |
312 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
313 | { |
314 | ut_ad(ptr && mtr); |
315 | ut_a(len < srv_page_size); |
316 | |
317 | memcpy(ptr, str, len); |
318 | |
319 | mlog_log_string(ptr, len, mtr); |
320 | } |
321 | |
322 | /********************************************************//** |
323 | Logs a write of a string to a file page buffered in the buffer pool. |
324 | Writes the corresponding log record to the mini-transaction log. */ |
325 | void |
326 | mlog_log_string( |
327 | /*============*/ |
328 | byte* ptr, /*!< in: pointer written to */ |
329 | ulint len, /*!< in: string length */ |
330 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
331 | { |
332 | byte* log_ptr; |
333 | |
334 | ut_ad(ptr && mtr); |
335 | ut_ad(len <= srv_page_size); |
336 | |
337 | log_ptr = mlog_open(mtr, 30); |
338 | |
339 | /* If no logging is requested, we may return now */ |
340 | if (log_ptr == NULL) { |
341 | |
342 | return; |
343 | } |
344 | |
345 | log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING, |
346 | log_ptr, mtr); |
347 | mach_write_to_2(log_ptr, page_offset(ptr)); |
348 | log_ptr += 2; |
349 | |
350 | mach_write_to_2(log_ptr, len); |
351 | log_ptr += 2; |
352 | |
353 | mlog_close(mtr, log_ptr); |
354 | |
355 | mlog_catenate_string(mtr, ptr, len); |
356 | } |
357 | |
358 | /********************************************************//** |
359 | Parses a log record written by mlog_write_string. |
360 | @return parsed record end, NULL if not a complete record */ |
361 | byte* |
362 | mlog_parse_string( |
363 | /*==============*/ |
364 | byte* ptr, /*!< in: buffer */ |
365 | byte* end_ptr,/*!< in: buffer end */ |
366 | byte* page, /*!< in: page where to apply the log record, or NULL */ |
367 | void* page_zip)/*!< in/out: compressed page, or NULL */ |
368 | { |
369 | ulint offset; |
370 | ulint len; |
371 | |
372 | ut_a(!page || !page_zip |
373 | || (fil_page_get_type(page) != FIL_PAGE_INDEX |
374 | && fil_page_get_type(page) != FIL_PAGE_RTREE)); |
375 | |
376 | if (end_ptr < ptr + 4) { |
377 | |
378 | return(NULL); |
379 | } |
380 | |
381 | offset = mach_read_from_2(ptr); |
382 | ptr += 2; |
383 | len = mach_read_from_2(ptr); |
384 | ptr += 2; |
385 | |
386 | if (offset >= srv_page_size || len + offset > srv_page_size) { |
387 | recv_sys->found_corrupt_log = TRUE; |
388 | |
389 | return(NULL); |
390 | } |
391 | |
392 | if (end_ptr < ptr + len) { |
393 | |
394 | return(NULL); |
395 | } |
396 | |
397 | if (page) { |
398 | if (page_zip) { |
399 | memcpy(((page_zip_des_t*) page_zip)->data |
400 | + offset, ptr, len); |
401 | } |
402 | memcpy(page + offset, ptr, len); |
403 | } |
404 | |
405 | return(ptr + len); |
406 | } |
407 | |
408 | /********************************************************//** |
409 | Opens a buffer for mlog, writes the initial log record and, |
410 | if needed, the field lengths of an index. |
411 | @return buffer, NULL if log mode MTR_LOG_NONE */ |
412 | byte* |
413 | mlog_open_and_write_index( |
414 | /*======================*/ |
415 | mtr_t* mtr, /*!< in: mtr */ |
416 | const byte* rec, /*!< in: index record or page */ |
417 | const dict_index_t* index, /*!< in: record descriptor */ |
418 | mlog_id_t type, /*!< in: log item type */ |
419 | ulint size) /*!< in: requested buffer size in bytes |
420 | (if 0, calls mlog_close() and |
421 | returns NULL) */ |
422 | { |
423 | byte* log_ptr; |
424 | const byte* log_start; |
425 | const byte* log_end; |
426 | |
427 | ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); |
428 | |
429 | mtr->set_modified(); |
430 | switch (mtr->get_log_mode()) { |
431 | case MTR_LOG_NONE: |
432 | case MTR_LOG_NO_REDO: |
433 | return NULL; |
434 | case MTR_LOG_SHORT_INSERTS: |
435 | ut_ad(0); |
436 | /* fall through */ |
437 | case MTR_LOG_ALL: |
438 | break; |
439 | } |
440 | |
441 | if (!page_rec_is_comp(rec)) { |
442 | log_start = log_ptr = mtr->get_log()->open(11 + size); |
443 | log_ptr = mlog_write_initial_log_record_fast(rec, type, |
444 | log_ptr, mtr); |
445 | log_end = log_ptr + 11 + size; |
446 | } else { |
447 | ulint i; |
448 | bool is_instant = index->is_instant(); |
449 | ulint n = dict_index_get_n_fields(index); |
450 | ulint total = 11 + (is_instant ? 2 : 0) + size + (n + 2) * 2; |
451 | ulint alloc = std::min(total, |
452 | ulint(mtr_buf_t::MAX_DATA_SIZE)); |
453 | |
454 | const bool is_leaf = page_is_leaf(page_align(rec)); |
455 | |
456 | /* For spatial index, on non-leaf page, we just keep |
457 | 2 fields, MBR and page no. */ |
458 | if (!is_leaf && dict_index_is_spatial(index)) { |
459 | n = DICT_INDEX_SPATIAL_NODEPTR_SIZE; |
460 | } |
461 | |
462 | log_start = log_ptr = mtr->get_log()->open(alloc); |
463 | log_end = log_ptr + alloc; |
464 | |
465 | log_ptr = mlog_write_initial_log_record_fast( |
466 | rec, type, log_ptr, mtr); |
467 | |
468 | if (is_instant) { |
469 | // marked as instant index |
470 | mach_write_to_2(log_ptr, n | 0x8000); |
471 | |
472 | log_ptr += 2; |
473 | |
474 | // record the n_core_fields |
475 | mach_write_to_2(log_ptr, index->n_core_fields); |
476 | } else { |
477 | mach_write_to_2(log_ptr, n); |
478 | } |
479 | |
480 | log_ptr += 2; |
481 | mach_write_to_2( |
482 | log_ptr, is_leaf |
483 | ? dict_index_get_n_unique_in_tree(index) |
484 | : dict_index_get_n_unique_in_tree_nonleaf(index)); |
485 | log_ptr += 2; |
486 | |
487 | for (i = 0; i < n; i++) { |
488 | dict_field_t* field; |
489 | const dict_col_t* col; |
490 | ulint len; |
491 | |
492 | field = dict_index_get_nth_field(index, i); |
493 | col = dict_field_get_col(field); |
494 | len = field->fixed_len; |
495 | ut_ad(len < 0x7fff); |
496 | if (len == 0 |
497 | && (DATA_BIG_COL(col))) { |
498 | /* variable-length field |
499 | with maximum length > 255 */ |
500 | len = 0x7fff; |
501 | } |
502 | if (col->prtype & DATA_NOT_NULL) { |
503 | len |= 0x8000; |
504 | } |
505 | if (log_ptr + 2 > log_end) { |
506 | mlog_close(mtr, log_ptr); |
507 | ut_a(total > ulint(log_ptr - log_start)); |
508 | total -= ulint(log_ptr - log_start); |
509 | alloc = std::min( |
510 | total, |
511 | ulint(mtr_buf_t::MAX_DATA_SIZE)); |
512 | |
513 | log_start = log_ptr = mtr->get_log()->open( |
514 | alloc); |
515 | log_end = log_ptr + alloc; |
516 | } |
517 | mach_write_to_2(log_ptr, len); |
518 | log_ptr += 2; |
519 | } |
520 | } |
521 | if (size == 0) { |
522 | mlog_close(mtr, log_ptr); |
523 | log_ptr = NULL; |
524 | } else if (log_ptr + size > log_end) { |
525 | mlog_close(mtr, log_ptr); |
526 | log_ptr = mlog_open(mtr, size); |
527 | } |
528 | return(log_ptr); |
529 | } |
530 | |
531 | /********************************************************//** |
532 | Parses a log record written by mlog_open_and_write_index. |
533 | @return parsed record end, NULL if not a complete record */ |
534 | byte* |
535 | mlog_parse_index( |
536 | /*=============*/ |
537 | byte* ptr, /*!< in: buffer */ |
538 | const byte* end_ptr,/*!< in: buffer end */ |
539 | ibool comp, /*!< in: TRUE=compact row format */ |
540 | dict_index_t** index) /*!< out, own: dummy index */ |
541 | { |
542 | ulint i, n, n_uniq; |
543 | dict_table_t* table; |
544 | dict_index_t* ind; |
545 | ulint n_core_fields = 0; |
546 | |
547 | ut_ad(comp == FALSE || comp == TRUE); |
548 | |
549 | if (comp) { |
550 | if (end_ptr < ptr + 4) { |
551 | return(NULL); |
552 | } |
553 | n = mach_read_from_2(ptr); |
554 | ptr += 2; |
555 | if (n & 0x8000) { /* record after instant ADD COLUMN */ |
556 | n &= 0x7FFF; |
557 | |
558 | n_core_fields = mach_read_from_2(ptr); |
559 | |
560 | if (!n_core_fields || n_core_fields > n) { |
561 | recv_sys->found_corrupt_log = TRUE; |
562 | return(NULL); |
563 | } |
564 | |
565 | ptr += 2; |
566 | |
567 | if (end_ptr < ptr + 2) { |
568 | return(NULL); |
569 | } |
570 | } |
571 | |
572 | n_uniq = mach_read_from_2(ptr); |
573 | ptr += 2; |
574 | ut_ad(n_uniq <= n); |
575 | if (end_ptr < ptr + n * 2) { |
576 | return(NULL); |
577 | } |
578 | } else { |
579 | n = n_uniq = 1; |
580 | } |
581 | table = dict_mem_table_create("LOG_DUMMY" , NULL, n, 0, |
582 | comp ? DICT_TF_COMPACT : 0, 0); |
583 | ind = dict_mem_index_create(table, "LOG_DUMMY" , 0, n); |
584 | ind->n_uniq = (unsigned int) n_uniq; |
585 | if (n_uniq != n) { |
586 | ut_a(n_uniq + DATA_ROLL_PTR <= n); |
587 | ind->type = DICT_CLUSTERED; |
588 | } |
589 | if (comp) { |
590 | for (i = 0; i < n; i++) { |
591 | ulint len = mach_read_from_2(ptr); |
592 | ptr += 2; |
593 | /* The high-order bit of len is the NOT NULL flag; |
594 | the rest is 0 or 0x7fff for variable-length fields, |
595 | and 1..0x7ffe for fixed-length fields. */ |
596 | dict_mem_table_add_col( |
597 | table, NULL, NULL, |
598 | ((len + 1) & 0x7fff) <= 1 |
599 | ? DATA_BINARY : DATA_FIXBINARY, |
600 | len & 0x8000 ? DATA_NOT_NULL : 0, |
601 | len & 0x7fff); |
602 | |
603 | dict_index_add_col(ind, table, |
604 | dict_table_get_nth_col(table, i), |
605 | 0); |
606 | } |
607 | dict_table_add_system_columns(table, table->heap); |
608 | if (n_uniq != n) { |
609 | /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */ |
610 | ut_a(DATA_TRX_ID_LEN |
611 | == dict_index_get_nth_col(ind, DATA_TRX_ID - 1 |
612 | + n_uniq)->len); |
613 | ut_a(DATA_ROLL_PTR_LEN |
614 | == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1 |
615 | + n_uniq)->len); |
616 | ind->fields[DATA_TRX_ID - 1 + n_uniq].col |
617 | = &table->cols[n + DATA_TRX_ID]; |
618 | ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col |
619 | = &table->cols[n + DATA_ROLL_PTR]; |
620 | } |
621 | |
622 | ut_ad(table->n_cols == table->n_def); |
623 | |
624 | if (n_core_fields) { |
625 | for (i = n_core_fields; i < n; i++) { |
626 | ind->fields[i].col->def_val.len |
627 | = UNIV_SQL_NULL; |
628 | } |
629 | ind->n_core_fields = n_core_fields; |
630 | ind->n_core_null_bytes = UT_BITS_IN_BYTES( |
631 | ind->get_n_nullable(n_core_fields)); |
632 | } else { |
633 | ind->n_core_null_bytes = UT_BITS_IN_BYTES( |
634 | unsigned(ind->n_nullable)); |
635 | ind->n_core_fields = ind->n_fields; |
636 | } |
637 | } |
638 | /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ |
639 | ind->cached = TRUE; |
640 | ut_d(ind->is_dummy = true); |
641 | *index = ind; |
642 | return(ptr); |
643 | } |
644 | |