1/*****************************************************************************
2
3Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2012, Facebook Inc.
5Copyright (c) 2014, 2018, MariaDB Corporation.
6
7This program is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free Software
9Foundation; version 2 of the License.
10
11This program is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License along with
16this program; if not, write to the Free Software Foundation, Inc.,
1751 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
18
19*****************************************************************************/
20
21/**************************************************//**
22@file page/page0zip.cc
23Compressed page interface
24
25Created June 2005 by Marko Makela
26*******************************************************/
27
28#include "page0size.h"
29#include "page0zip.h"
30
31/** A BLOB field reference full of zero, for use in assertions and tests.
32Initially, BLOB field references are set to zero, in
33dtuple_convert_big_rec(). */
34const byte field_ref_zero[FIELD_REF_SIZE] = {
35 0, 0, 0, 0, 0,
36 0, 0, 0, 0, 0,
37 0, 0, 0, 0, 0,
38 0, 0, 0, 0, 0,
39};
40
41#ifndef UNIV_INNOCHECKSUM
42#include "page0page.h"
43#include "mtr0log.h"
44#include "dict0dict.h"
45#include "btr0cur.h"
46#include "page0types.h"
47#include "log0recv.h"
48#include "row0row.h"
49#include "row0trunc.h"
50#include "zlib.h"
51#include "buf0buf.h"
52#include "buf0types.h"
53#include "buf0checksum.h"
54#include "btr0sea.h"
55#include "dict0boot.h"
56#include "lock0lock.h"
57#include "srv0srv.h"
58#include "buf0lru.h"
59#include "srv0mon.h"
60#include "ut0crc32.h"
61
62#include <map>
63#include <algorithm>
64
65/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
66page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
67/** Statistics on compression, indexed by index->id */
68page_zip_stat_per_index_t page_zip_stat_per_index;
69
70/** Compression level to be used by zlib. Settable by user. */
71uint page_zip_level;
72
73/** Whether or not to log compressed page images to avoid possible
74compression algorithm changes in zlib. */
75my_bool page_zip_log_pages;
76
77/* Please refer to ../include/page0zip.ic for a description of the
78compressed page format. */
79
80/* The infimum and supremum records are omitted from the compressed page.
81On compress, we compare that the records are there, and on uncompress we
82restore the records. */
83/** Extra bytes of an infimum record */
84static const byte infimum_extra[] = {
85 0x01, /* info_bits=0, n_owned=1 */
86 0x00, 0x02 /* heap_no=0, status=2 */
87 /* ?, ? */ /* next=(first user rec, or supremum) */
88};
89/** Data bytes of an infimum record */
90static const byte infimum_data[] = {
91 0x69, 0x6e, 0x66, 0x69,
92 0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
93};
94/** Extra bytes and data bytes of a supremum record */
95static const byte supremum_extra_data[] = {
96 /* 0x0?, */ /* info_bits=0, n_owned=1..8 */
97 0x00, 0x0b, /* heap_no=1, status=3 */
98 0x00, 0x00, /* next=0 */
99 0x73, 0x75, 0x70, 0x72,
100 0x65, 0x6d, 0x75, 0x6d /* "supremum" */
101};
102
103/** Assert that a block of memory is filled with zero bytes.
104Compare at most sizeof(field_ref_zero) bytes.
105@param b in: memory block
106@param s in: size of the memory block, in bytes */
107#define ASSERT_ZERO(b, s) \
108 ut_ad(!memcmp(b, field_ref_zero, \
109 ut_min(static_cast<size_t>(s), sizeof field_ref_zero)));
110/** Assert that a BLOB pointer is filled with zero bytes.
111@param b in: BLOB pointer */
112#define ASSERT_ZERO_BLOB(b) \
113 ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
114
115/* Enable some extra debugging output. This code can be enabled
116independently of any UNIV_ debugging conditions. */
117#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
118# include <stdarg.h>
119MY_ATTRIBUTE((format (printf, 1, 2)))
120/**********************************************************************//**
121Report a failure to decompress or compress.
122@return number of characters printed */
123static
124int
125page_zip_fail_func(
126/*===============*/
127 const char* fmt, /*!< in: printf(3) format string */
128 ...) /*!< in: arguments corresponding to fmt */
129{
130 int res;
131 va_list ap;
132
133 ut_print_timestamp(stderr);
134 fputs(" InnoDB: ", stderr);
135 va_start(ap, fmt);
136 res = vfprintf(stderr, fmt, ap);
137 va_end(ap);
138
139 return(res);
140}
141/** Wrapper for page_zip_fail_func()
142@param fmt_args in: printf(3) format string and arguments */
143# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
144#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
145/** Dummy wrapper for page_zip_fail_func()
146@param fmt_args ignored: printf(3) format string and arguments */
147# define page_zip_fail(fmt_args) /* empty */
148#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
149
150/**********************************************************************//**
151Determine the guaranteed free space on an empty page.
152@return minimum payload size on the page */
153ulint
154page_zip_empty_size(
155/*================*/
156 ulint n_fields, /*!< in: number of columns in the index */
157 ulint zip_size) /*!< in: compressed page size in bytes */
158{
159 ulint size = zip_size
160 /* subtract the page header and the longest
161 uncompressed data needed for one record */
162 - (PAGE_DATA
163 + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE
164 + 1/* encoded heap_no==2 in page_zip_write_rec() */
165 + 1/* end of modification log */
166 - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
167 /* subtract the space for page_zip_fields_encode() */
168 - compressBound(static_cast<uLong>(2 * (n_fields + 1)));
169 return(lint(size) > 0 ? size : 0);
170}
171
172/** Check whether a tuple is too big for compressed table
173@param[in] index dict index object
174@param[in] entry entry for the index
175@return true if it's too big, otherwise false */
176bool
177page_zip_is_too_big(
178 const dict_index_t* index,
179 const dtuple_t* entry)
180{
181 const page_size_t& page_size =
182 dict_table_page_size(index->table);
183
184 /* Estimate the free space of an empty compressed page.
185 Subtract one byte for the encoded heap_no in the
186 modification log. */
187 ulint free_space_zip = page_zip_empty_size(
188 index->n_fields, page_size.physical());
189 ulint n_uniq = dict_index_get_n_unique_in_tree(index);
190
191 ut_ad(dict_table_is_comp(index->table));
192 ut_ad(page_size.is_compressed());
193
194 if (free_space_zip == 0) {
195 return(true);
196 }
197
198 /* Subtract one byte for the encoded heap_no in the
199 modification log. */
200 free_space_zip--;
201
202 /* There should be enough room for two node pointer
203 records on an empty non-leaf page. This prevents
204 infinite page splits. */
205
206 if (entry->n_fields >= n_uniq
207 && (REC_NODE_PTR_SIZE
208 + rec_get_converted_size_comp_prefix(
209 index, entry->fields, n_uniq, NULL)
210 /* On a compressed page, there is
211 a two-byte entry in the dense
212 page directory for every record.
213 But there is no record header. */
214 - (REC_N_NEW_EXTRA_BYTES - 2)
215 > free_space_zip / 2)) {
216 return(true);
217 }
218
219 return(false);
220}
221
222/*************************************************************//**
223Gets the number of elements in the dense page directory,
224including deleted records (the free list).
225@return number of elements in the dense page directory */
226UNIV_INLINE
227ulint
228page_zip_dir_elems(
229/*===============*/
230 const page_zip_des_t* page_zip) /*!< in: compressed page */
231{
232 /* Exclude the page infimum and supremum from the record count. */
233 return ulint(page_dir_get_n_heap(page_zip->data))
234 - PAGE_HEAP_NO_USER_LOW;
235}
236
237/*************************************************************//**
238Gets the size of the compressed page trailer (the dense page directory),
239including deleted records (the free list).
240@return length of dense page directory, in bytes */
241UNIV_INLINE
242ulint
243page_zip_dir_size(
244/*==============*/
245 const page_zip_des_t* page_zip) /*!< in: compressed page */
246{
247 return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
248}
249
250/*************************************************************//**
251Gets an offset to the compressed page trailer (the dense page directory),
252including deleted records (the free list).
253@return offset of the dense page directory */
254UNIV_INLINE
255ulint
256page_zip_dir_start_offs(
257/*====================*/
258 const page_zip_des_t* page_zip, /*!< in: compressed page */
259 ulint n_dense) /*!< in: directory size */
260{
261 ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
262
263 return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
264}
265
266/*************************************************************//**
267Gets a pointer to the compressed page trailer (the dense page directory),
268including deleted records (the free list).
269@param[in] page_zip compressed page
270@param[in] n_dense number of entries in the directory
271@return pointer to the dense page directory */
272#define page_zip_dir_start_low(page_zip, n_dense) \
273 ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
274/*************************************************************//**
275Gets a pointer to the compressed page trailer (the dense page directory),
276including deleted records (the free list).
277@param[in] page_zip compressed page
278@return pointer to the dense page directory */
279#define page_zip_dir_start(page_zip) \
280 page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
281
282/*************************************************************//**
283Gets the size of the compressed page trailer (the dense page directory),
284only including user records (excluding the free list).
285@return length of dense page directory comprising existing records, in bytes */
286UNIV_INLINE
287ulint
288page_zip_dir_user_size(
289/*===================*/
290 const page_zip_des_t* page_zip) /*!< in: compressed page */
291{
292 ulint size = PAGE_ZIP_DIR_SLOT_SIZE
293 * ulint(page_get_n_recs(page_zip->data));
294 ut_ad(size <= page_zip_dir_size(page_zip));
295 return(size);
296}
297
298/*************************************************************//**
299Find the slot of the given record in the dense page directory.
300@return dense directory slot, or NULL if record not found */
301UNIV_INLINE
302byte*
303page_zip_dir_find_low(
304/*==================*/
305 byte* slot, /*!< in: start of records */
306 byte* end, /*!< in: end of records */
307 ulint offset) /*!< in: offset of user record */
308{
309 ut_ad(slot <= end);
310
311 for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
312 if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
313 == offset) {
314 return(slot);
315 }
316 }
317
318 return(NULL);
319}
320
321/*************************************************************//**
322Find the slot of the given non-free record in the dense page directory.
323@return dense directory slot, or NULL if record not found */
324UNIV_INLINE
325byte*
326page_zip_dir_find(
327/*==============*/
328 page_zip_des_t* page_zip, /*!< in: compressed page */
329 ulint offset) /*!< in: offset of user record */
330{
331 byte* end = page_zip->data + page_zip_get_size(page_zip);
332
333 ut_ad(page_zip_simple_validate(page_zip));
334
335 return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
336 end,
337 offset));
338}
339
340/*************************************************************//**
341Find the slot of the given free record in the dense page directory.
342@return dense directory slot, or NULL if record not found */
343UNIV_INLINE
344byte*
345page_zip_dir_find_free(
346/*===================*/
347 page_zip_des_t* page_zip, /*!< in: compressed page */
348 ulint offset) /*!< in: offset of user record */
349{
350 byte* end = page_zip->data + page_zip_get_size(page_zip);
351
352 ut_ad(page_zip_simple_validate(page_zip));
353
354 return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
355 end - page_zip_dir_user_size(page_zip),
356 offset));
357}
358
359/*************************************************************//**
360Read a given slot in the dense page directory.
361@return record offset on the uncompressed page, possibly ORed with
362PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
363UNIV_INLINE
364ulint
365page_zip_dir_get(
366/*=============*/
367 const page_zip_des_t* page_zip, /*!< in: compressed page */
368 ulint slot) /*!< in: slot
369 (0=first user record) */
370{
371 ut_ad(page_zip_simple_validate(page_zip));
372 ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
373 return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
374 - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
375}
376
377/**********************************************************************//**
378Write a log record of compressing an index page. */
379static
380void
381page_zip_compress_write_log(
382/*========================*/
383 const page_zip_des_t* page_zip,/*!< in: compressed page */
384 const page_t* page, /*!< in: uncompressed page */
385 dict_index_t* index, /*!< in: index of the B-tree node */
386 mtr_t* mtr) /*!< in: mini-transaction */
387{
388 byte* log_ptr;
389 ulint trailer_size;
390
391 ut_ad(!dict_index_is_ibuf(index));
392
393 log_ptr = mlog_open(mtr, 11 + 2 + 2);
394
395 if (!log_ptr) {
396
397 return;
398 }
399
400 /* Read the number of user records. */
401 trailer_size = ulint(page_dir_get_n_heap(page_zip->data))
402 - PAGE_HEAP_NO_USER_LOW;
403 /* Multiply by uncompressed of size stored per record */
404 if (!page_is_leaf(page)) {
405 trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
406 } else if (dict_index_is_clust(index)) {
407 trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
408 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
409 } else {
410 trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
411 }
412 /* Add the space occupied by BLOB pointers. */
413 trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
414 ut_a(page_zip->m_end > PAGE_DATA);
415 compile_time_assert(FIL_PAGE_DATA <= PAGE_DATA);
416 ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
417
418 log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
419 MLOG_ZIP_PAGE_COMPRESS,
420 log_ptr, mtr);
421 mach_write_to_2(log_ptr, ulint(page_zip->m_end - FIL_PAGE_TYPE));
422 log_ptr += 2;
423 mach_write_to_2(log_ptr, trailer_size);
424 log_ptr += 2;
425 mlog_close(mtr, log_ptr);
426
427 /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
428 mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
429 mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
430 /* Write most of the page header, the compressed stream and
431 the modification log. */
432 mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
433 ulint(page_zip->m_end - FIL_PAGE_TYPE));
434 /* Write the uncompressed trailer of the compressed page. */
435 mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
436 - trailer_size, trailer_size);
437}
438
439/******************************************************//**
440Determine how many externally stored columns are contained
441in existing records with smaller heap_no than rec. */
442static
443ulint
444page_zip_get_n_prev_extern(
445/*=======================*/
446 const page_zip_des_t* page_zip,/*!< in: dense page directory on
447 compressed page */
448 const rec_t* rec, /*!< in: compact physical record
449 on a B-tree leaf page */
450 const dict_index_t* index) /*!< in: record descriptor */
451{
452 const page_t* page = page_align(rec);
453 ulint n_ext = 0;
454 ulint i;
455 ulint left;
456 ulint heap_no;
457 ulint n_recs = page_get_n_recs(page_zip->data);
458
459 ut_ad(page_is_leaf(page));
460 ut_ad(page_is_comp(page));
461 ut_ad(dict_table_is_comp(index->table));
462 ut_ad(dict_index_is_clust(index));
463 ut_ad(!dict_index_is_ibuf(index));
464
465 heap_no = rec_get_heap_no_new(rec);
466 ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
467 left = heap_no - PAGE_HEAP_NO_USER_LOW;
468 if (UNIV_UNLIKELY(!left)) {
469 return(0);
470 }
471
472 for (i = 0; i < n_recs; i++) {
473 const rec_t* r = page + (page_zip_dir_get(page_zip, i)
474 & PAGE_ZIP_DIR_SLOT_MASK);
475
476 if (rec_get_heap_no_new(r) < heap_no) {
477 n_ext += rec_get_n_extern_new(r, index,
478 ULINT_UNDEFINED);
479 if (!--left) {
480 break;
481 }
482 }
483 }
484
485 return(n_ext);
486}
487
488/**********************************************************************//**
489Encode the length of a fixed-length column.
490@return buf + length of encoded val */
491static
492byte*
493page_zip_fixed_field_encode(
494/*========================*/
495 byte* buf, /*!< in: pointer to buffer where to write */
496 ulint val) /*!< in: value to write */
497{
498 ut_ad(val >= 2);
499
500 if (UNIV_LIKELY(val < 126)) {
501 /*
502 0 = nullable variable field of at most 255 bytes length;
503 1 = not null variable field of at most 255 bytes length;
504 126 = nullable variable field with maximum length >255;
505 127 = not null variable field with maximum length >255
506 */
507 *buf++ = (byte) val;
508 } else {
509 *buf++ = (byte) (0x80 | val >> 8);
510 *buf++ = (byte) val;
511 }
512
513 return(buf);
514}
515
516/**********************************************************************//**
517Write the index information for the compressed page.
518@return used size of buf */
519ulint
520page_zip_fields_encode(
521/*===================*/
522 ulint n, /*!< in: number of fields
523 to compress */
524 const dict_index_t* index, /*!< in: index comprising
525 at least n fields */
526 ulint trx_id_pos,
527 /*!< in: position of the trx_id column
528 in the index, or ULINT_UNDEFINED if
529 this is a non-leaf page */
530 byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
531{
532 const byte* buf_start = buf;
533 ulint i;
534 ulint col;
535 ulint trx_id_col = 0;
536 /* sum of lengths of preceding non-nullable fixed fields, or 0 */
537 ulint fixed_sum = 0;
538
539 ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
540
541 for (i = col = 0; i < n; i++) {
542 dict_field_t* field = dict_index_get_nth_field(index, i);
543 ulint val;
544
545 if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
546 val = 1; /* set the "not nullable" flag */
547 } else {
548 val = 0; /* nullable field */
549 }
550
551 if (!field->fixed_len) {
552 /* variable-length field */
553 const dict_col_t* column
554 = dict_field_get_col(field);
555
556 if (DATA_BIG_COL(column)) {
557 val |= 0x7e; /* max > 255 bytes */
558 }
559
560 if (fixed_sum) {
561 /* write out the length of any
562 preceding non-nullable fields */
563 buf = page_zip_fixed_field_encode(
564 buf, fixed_sum << 1 | 1);
565 fixed_sum = 0;
566 col++;
567 }
568
569 *buf++ = (byte) val;
570 col++;
571 } else if (val) {
572 /* fixed-length non-nullable field */
573
574 if (fixed_sum && UNIV_UNLIKELY
575 (fixed_sum + field->fixed_len
576 > DICT_MAX_FIXED_COL_LEN)) {
577 /* Write out the length of the
578 preceding non-nullable fields,
579 to avoid exceeding the maximum
580 length of a fixed-length column. */
581 buf = page_zip_fixed_field_encode(
582 buf, fixed_sum << 1 | 1);
583 fixed_sum = 0;
584 col++;
585 }
586
587 if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
588 if (fixed_sum) {
589 /* Write out the length of any
590 preceding non-nullable fields,
591 and start a new trx_id column. */
592 buf = page_zip_fixed_field_encode(
593 buf, fixed_sum << 1 | 1);
594 col++;
595 }
596
597 trx_id_col = col;
598 fixed_sum = field->fixed_len;
599 } else {
600 /* add to the sum */
601 fixed_sum += field->fixed_len;
602 }
603 } else {
604 /* fixed-length nullable field */
605
606 if (fixed_sum) {
607 /* write out the length of any
608 preceding non-nullable fields */
609 buf = page_zip_fixed_field_encode(
610 buf, fixed_sum << 1 | 1);
611 fixed_sum = 0;
612 col++;
613 }
614
615 buf = page_zip_fixed_field_encode(
616 buf, ulint(field->fixed_len) << 1);
617 col++;
618 }
619 }
620
621 if (fixed_sum) {
622 /* Write out the lengths of last fixed-length columns. */
623 buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
624 }
625
626 if (trx_id_pos != ULINT_UNDEFINED) {
627 /* Write out the position of the trx_id column */
628 i = trx_id_col;
629 } else {
630 /* Write out the number of nullable fields */
631 i = index->n_nullable;
632 }
633
634 if (i < 128) {
635 *buf++ = (byte) i;
636 } else {
637 *buf++ = (byte) (0x80 | i >> 8);
638 *buf++ = (byte) i;
639 }
640
641 ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
642 return((ulint) (buf - buf_start));
643}
644
645/**********************************************************************//**
646Populate the dense page directory from the sparse directory. */
647static
648void
649page_zip_dir_encode(
650/*================*/
651 const page_t* page, /*!< in: compact page */
652 byte* buf, /*!< in: pointer to dense page directory[-1];
653 out: dense directory on compressed page */
654 const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
655 out: dense page directory sorted by ascending
656 address (and heap_no) */
657{
658 const byte* rec;
659 ulint status;
660 ulint min_mark;
661 ulint heap_no;
662 ulint i;
663 ulint n_heap;
664 ulint offs;
665
666 min_mark = 0;
667
668 if (page_is_leaf(page)) {
669 status = REC_STATUS_ORDINARY;
670 } else {
671 status = REC_STATUS_NODE_PTR;
672 if (UNIV_UNLIKELY(!page_has_prev(page))) {
673 min_mark = REC_INFO_MIN_REC_FLAG;
674 }
675 }
676
677 n_heap = page_dir_get_n_heap(page);
678
679 /* Traverse the list of stored records in the collation order,
680 starting from the first user record. */
681
682 rec = page + PAGE_NEW_INFIMUM;
683
684 i = 0;
685
686 for (;;) {
687 ulint info_bits;
688 offs = rec_get_next_offs(rec, TRUE);
689 if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
690 break;
691 }
692 rec = page + offs;
693 heap_no = rec_get_heap_no_new(rec);
694 ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
695 ut_a(heap_no < n_heap);
696 ut_a(offs < srv_page_size - PAGE_DIR);
697 ut_a(offs >= PAGE_ZIP_START);
698 compile_time_assert(!(PAGE_ZIP_DIR_SLOT_MASK
699 & (PAGE_ZIP_DIR_SLOT_MASK + 1)));
700 compile_time_assert(PAGE_ZIP_DIR_SLOT_MASK
701 >= UNIV_ZIP_SIZE_MAX - 1);
702
703 if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) {
704 offs |= PAGE_ZIP_DIR_SLOT_OWNED;
705 }
706
707 info_bits = rec_get_info_bits(rec, TRUE);
708 if (info_bits & REC_INFO_DELETED_FLAG) {
709 info_bits &= ~REC_INFO_DELETED_FLAG;
710 offs |= PAGE_ZIP_DIR_SLOT_DEL;
711 }
712 ut_a(info_bits == min_mark);
713 /* Only the smallest user record can have
714 REC_INFO_MIN_REC_FLAG set. */
715 min_mark = 0;
716
717 mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
718
719 if (UNIV_LIKELY_NULL(recs)) {
720 /* Ensure that each heap_no occurs at most once. */
721 ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
722 /* exclude infimum and supremum */
723 recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
724 }
725
726 ut_a(ulint(rec_get_status(rec)) == status);
727 }
728
729 offs = page_header_get_field(page, PAGE_FREE);
730
731 /* Traverse the free list (of deleted records). */
732 while (offs) {
733 ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
734 rec = page + offs;
735
736 heap_no = rec_get_heap_no_new(rec);
737 ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
738 ut_a(heap_no < n_heap);
739
740 ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
741 ut_a(ulint(rec_get_status(rec)) == status);
742
743 mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
744
745 if (UNIV_LIKELY_NULL(recs)) {
746 /* Ensure that each heap_no occurs at most once. */
747 ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
748 /* exclude infimum and supremum */
749 recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
750 }
751
752 offs = rec_get_next_offs(rec, TRUE);
753 }
754
755 /* Ensure that each heap no occurs at least once. */
756 ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
757}
758
759extern "C" {
760
761/**********************************************************************//**
762Allocate memory for zlib. */
763static
764void*
765page_zip_zalloc(
766/*============*/
767 void* opaque, /*!< in/out: memory heap */
768 uInt items, /*!< in: number of items to allocate */
769 uInt size) /*!< in: size of an item in bytes */
770{
771 return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size));
772}
773
774/**********************************************************************//**
775Deallocate memory for zlib. */
776static
777void
778page_zip_free(
779/*==========*/
780 void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */
781 void* address MY_ATTRIBUTE((unused)))/*!< in: object to free */
782{
783}
784
785} /* extern "C" */
786
787/**********************************************************************//**
788Configure the zlib allocator to use the given memory heap. */
789void
790page_zip_set_alloc(
791/*===============*/
792 void* stream, /*!< in/out: zlib stream */
793 mem_heap_t* heap) /*!< in: memory heap to use */
794{
795 z_stream* strm = static_cast<z_stream*>(stream);
796
797 strm->zalloc = page_zip_zalloc;
798 strm->zfree = page_zip_free;
799 strm->opaque = heap;
800}
801
802#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
803/** Symbol for enabling compression and decompression diagnostics */
804# define PAGE_ZIP_COMPRESS_DBG
805#endif
806
807#ifdef PAGE_ZIP_COMPRESS_DBG
808/** Set this variable in a debugger to enable
809excessive logging in page_zip_compress(). */
810static bool page_zip_compress_dbg;
811/** Set this variable in a debugger to enable
812binary logging of the data passed to deflate().
813When this variable is nonzero, it will act
814as a log file name generator. */
815static unsigned page_zip_compress_log;
816
817/**********************************************************************//**
818Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
819@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
820static
821int
822page_zip_compress_deflate(
823/*======================*/
824 FILE* logfile,/*!< in: log file, or NULL */
825 z_streamp strm, /*!< in/out: compressed stream for deflate() */
826 int flush) /*!< in: deflate() flushing method */
827{
828 int status;
829 if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
830 ut_print_buf(stderr, strm->next_in, strm->avail_in);
831 }
832 if (UNIV_LIKELY_NULL(logfile)) {
833 if (fwrite(strm->next_in, 1, strm->avail_in, logfile)
834 != strm->avail_in) {
835 perror("fwrite");
836 }
837 }
838 status = deflate(strm, flush);
839 if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
840 fprintf(stderr, " -> %d\n", status);
841 }
842 return(status);
843}
844
845/* Redefine deflate(). */
846# undef deflate
847/** Debug wrapper for the zlib compression routine deflate().
848Log the operation if page_zip_compress_dbg is set.
849@param strm in/out: compressed stream
850@param flush in: flushing method
851@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
852# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
853/** Declaration of the logfile parameter */
854# define FILE_LOGFILE FILE* logfile,
855/** The logfile parameter */
856# define LOGFILE logfile,
857#else /* PAGE_ZIP_COMPRESS_DBG */
858/** Empty declaration of the logfile parameter */
859# define FILE_LOGFILE
860/** Missing logfile parameter */
861# define LOGFILE
862#endif /* PAGE_ZIP_COMPRESS_DBG */
863
864/**********************************************************************//**
865Compress the records of a node pointer page.
866@return Z_OK, or a zlib error code */
867static
868int
869page_zip_compress_node_ptrs(
870/*========================*/
871 FILE_LOGFILE
872 z_stream* c_stream, /*!< in/out: compressed page stream */
873 const rec_t** recs, /*!< in: dense page directory
874 sorted by address */
875 ulint n_dense, /*!< in: size of recs[] */
876 dict_index_t* index, /*!< in: the index of the page */
877 byte* storage, /*!< in: end of dense page directory */
878 mem_heap_t* heap) /*!< in: temporary memory heap */
879{
880 int err = Z_OK;
881 ulint* offsets = NULL;
882
883 do {
884 const rec_t* rec = *recs++;
885
886 offsets = rec_get_offsets(rec, index, offsets, false,
887 ULINT_UNDEFINED, &heap);
888 /* Only leaf nodes may contain externally stored columns. */
889 ut_ad(!rec_offs_any_extern(offsets));
890
891 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
892 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
893 rec_offs_extra_size(offsets));
894
895 /* Compress the extra bytes. */
896 c_stream->avail_in = static_cast<uInt>(
897 rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in);
898
899 if (c_stream->avail_in) {
900 err = deflate(c_stream, Z_NO_FLUSH);
901 if (UNIV_UNLIKELY(err != Z_OK)) {
902 break;
903 }
904 }
905 ut_ad(!c_stream->avail_in);
906
907 /* Compress the data bytes, except node_ptr. */
908 c_stream->next_in = (byte*) rec;
909 c_stream->avail_in = static_cast<uInt>(
910 rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
911
912 if (c_stream->avail_in) {
913 err = deflate(c_stream, Z_NO_FLUSH);
914 if (UNIV_UNLIKELY(err != Z_OK)) {
915 break;
916 }
917 }
918
919 ut_ad(!c_stream->avail_in);
920
921 memcpy(storage - REC_NODE_PTR_SIZE
922 * (rec_get_heap_no_new(rec) - 1),
923 c_stream->next_in, REC_NODE_PTR_SIZE);
924 c_stream->next_in += REC_NODE_PTR_SIZE;
925 } while (--n_dense);
926
927 return(err);
928}
929
930/**********************************************************************//**
931Compress the records of a leaf node of a secondary index.
932@return Z_OK, or a zlib error code */
933static
934int
935page_zip_compress_sec(
936/*==================*/
937 FILE_LOGFILE
938 z_stream* c_stream, /*!< in/out: compressed page stream */
939 const rec_t** recs, /*!< in: dense page directory
940 sorted by address */
941 ulint n_dense) /*!< in: size of recs[] */
942{
943 int err = Z_OK;
944
945 ut_ad(n_dense > 0);
946
947 do {
948 const rec_t* rec = *recs++;
949
950 /* Compress everything up to this record. */
951 c_stream->avail_in = static_cast<uInt>(
952 rec - REC_N_NEW_EXTRA_BYTES
953 - c_stream->next_in);
954
955 if (UNIV_LIKELY(c_stream->avail_in != 0)) {
956 UNIV_MEM_ASSERT_RW(c_stream->next_in,
957 c_stream->avail_in);
958 err = deflate(c_stream, Z_NO_FLUSH);
959 if (UNIV_UNLIKELY(err != Z_OK)) {
960 break;
961 }
962 }
963
964 ut_ad(!c_stream->avail_in);
965 ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
966
967 /* Skip the REC_N_NEW_EXTRA_BYTES. */
968
969 c_stream->next_in = (byte*) rec;
970 } while (--n_dense);
971
972 return(err);
973}
974
975/**********************************************************************//**
976Compress a record of a leaf node of a clustered index that contains
977externally stored columns.
978@return Z_OK, or a zlib error code */
979static
980int
981page_zip_compress_clust_ext(
982/*========================*/
983 FILE_LOGFILE
984 z_stream* c_stream, /*!< in/out: compressed page stream */
985 const rec_t* rec, /*!< in: record */
986 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
987 ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
988 byte* deleted, /*!< in: dense directory entry pointing
989 to the head of the free list */
990 byte* storage, /*!< in: end of dense page directory */
991 byte** externs, /*!< in/out: pointer to the next
992 available BLOB pointer */
993 ulint* n_blobs) /*!< in/out: number of
994 externally stored columns */
995{
996 int err;
997 ulint i;
998
999 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1000 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1001 rec_offs_extra_size(offsets));
1002
1003 for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1004 ulint len;
1005 const byte* src;
1006
1007 if (UNIV_UNLIKELY(i == trx_id_col)) {
1008 ut_ad(!rec_offs_nth_extern(offsets, i));
1009 /* Store trx_id and roll_ptr
1010 in uncompressed form. */
1011 src = rec_get_nth_field(rec, offsets, i, &len);
1012 ut_ad(src + DATA_TRX_ID_LEN
1013 == rec_get_nth_field(rec, offsets,
1014 i + 1, &len));
1015 ut_ad(len == DATA_ROLL_PTR_LEN);
1016
1017 /* Compress any preceding bytes. */
1018 c_stream->avail_in = static_cast<uInt>(
1019 src - c_stream->next_in);
1020
1021 if (c_stream->avail_in) {
1022 err = deflate(c_stream, Z_NO_FLUSH);
1023 if (UNIV_UNLIKELY(err != Z_OK)) {
1024
1025 return(err);
1026 }
1027 }
1028
1029 ut_ad(!c_stream->avail_in);
1030 ut_ad(c_stream->next_in == src);
1031
1032 memcpy(storage
1033 - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1034 * (rec_get_heap_no_new(rec) - 1),
1035 c_stream->next_in,
1036 DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1037
1038 c_stream->next_in
1039 += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1040
1041 /* Skip also roll_ptr */
1042 i++;
1043 } else if (rec_offs_nth_extern(offsets, i)) {
1044 src = rec_get_nth_field(rec, offsets, i, &len);
1045 ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
1046 src += len - BTR_EXTERN_FIELD_REF_SIZE;
1047
1048 c_stream->avail_in = static_cast<uInt>(
1049 src - c_stream->next_in);
1050 if (UNIV_LIKELY(c_stream->avail_in != 0)) {
1051 err = deflate(c_stream, Z_NO_FLUSH);
1052 if (UNIV_UNLIKELY(err != Z_OK)) {
1053
1054 return(err);
1055 }
1056 }
1057
1058 ut_ad(!c_stream->avail_in);
1059 ut_ad(c_stream->next_in == src);
1060
1061 /* Reserve space for the data at
1062 the end of the space reserved for
1063 the compressed data and the page
1064 modification log. */
1065
1066 if (UNIV_UNLIKELY
1067 (c_stream->avail_out
1068 <= BTR_EXTERN_FIELD_REF_SIZE)) {
1069 /* out of space */
1070 return(Z_BUF_ERROR);
1071 }
1072
1073 ut_ad(*externs == c_stream->next_out
1074 + c_stream->avail_out
1075 + 1/* end of modif. log */);
1076
1077 c_stream->next_in
1078 += BTR_EXTERN_FIELD_REF_SIZE;
1079
1080 /* Skip deleted records. */
1081 if (UNIV_LIKELY_NULL
1082 (page_zip_dir_find_low(
1083 storage, deleted,
1084 page_offset(rec)))) {
1085 continue;
1086 }
1087
1088 (*n_blobs)++;
1089 c_stream->avail_out
1090 -= BTR_EXTERN_FIELD_REF_SIZE;
1091 *externs -= BTR_EXTERN_FIELD_REF_SIZE;
1092
1093 /* Copy the BLOB pointer */
1094 memcpy(*externs, c_stream->next_in
1095 - BTR_EXTERN_FIELD_REF_SIZE,
1096 BTR_EXTERN_FIELD_REF_SIZE);
1097 }
1098 }
1099
1100 return(Z_OK);
1101}
1102
1103/**********************************************************************//**
1104Compress the records of a leaf node of a clustered index.
1105@return Z_OK, or a zlib error code */
1106static
1107int
1108page_zip_compress_clust(
1109/*====================*/
1110 FILE_LOGFILE
1111 z_stream* c_stream, /*!< in/out: compressed page stream */
1112 const rec_t** recs, /*!< in: dense page directory
1113 sorted by address */
1114 ulint n_dense, /*!< in: size of recs[] */
1115 dict_index_t* index, /*!< in: the index of the page */
1116 ulint* n_blobs, /*!< in: 0; out: number of
1117 externally stored columns */
1118 ulint trx_id_col, /*!< index of the trx_id column */
1119 byte* deleted, /*!< in: dense directory entry pointing
1120 to the head of the free list */
1121 byte* storage, /*!< in: end of dense page directory */
1122 mem_heap_t* heap) /*!< in: temporary memory heap */
1123{
1124 int err = Z_OK;
1125 ulint* offsets = NULL;
1126 /* BTR_EXTERN_FIELD_REF storage */
1127 byte* externs = storage - n_dense
1128 * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1129
1130 ut_ad(*n_blobs == 0);
1131
1132 do {
1133 const rec_t* rec = *recs++;
1134
1135 offsets = rec_get_offsets(rec, index, offsets, true,
1136 ULINT_UNDEFINED, &heap);
1137 ut_ad(rec_offs_n_fields(offsets)
1138 == dict_index_get_n_fields(index));
1139 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1140 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1141 rec_offs_extra_size(offsets));
1142
1143 /* Compress the extra bytes. */
1144 c_stream->avail_in = static_cast<uInt>(
1145 rec - REC_N_NEW_EXTRA_BYTES
1146 - c_stream->next_in);
1147
1148 if (c_stream->avail_in) {
1149 err = deflate(c_stream, Z_NO_FLUSH);
1150 if (UNIV_UNLIKELY(err != Z_OK)) {
1151
1152 goto func_exit;
1153 }
1154 }
1155 ut_ad(!c_stream->avail_in);
1156 ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1157
1158 /* Compress the data bytes. */
1159
1160 c_stream->next_in = (byte*) rec;
1161
1162 /* Check if there are any externally stored columns.
1163 For each externally stored column, store the
1164 BTR_EXTERN_FIELD_REF separately. */
1165 if (rec_offs_any_extern(offsets)) {
1166 ut_ad(dict_index_is_clust(index));
1167
1168 err = page_zip_compress_clust_ext(
1169 LOGFILE
1170 c_stream, rec, offsets, trx_id_col,
1171 deleted, storage, &externs, n_blobs);
1172
1173 if (UNIV_UNLIKELY(err != Z_OK)) {
1174
1175 goto func_exit;
1176 }
1177 } else {
1178 ulint len;
1179 const byte* src;
1180
1181 /* Store trx_id and roll_ptr in uncompressed form. */
1182 src = rec_get_nth_field(rec, offsets,
1183 trx_id_col, &len);
1184 ut_ad(src + DATA_TRX_ID_LEN
1185 == rec_get_nth_field(rec, offsets,
1186 trx_id_col + 1, &len));
1187 ut_ad(len == DATA_ROLL_PTR_LEN);
1188 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1189 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1190 rec_offs_extra_size(offsets));
1191
1192 /* Compress any preceding bytes. */
1193 c_stream->avail_in = static_cast<uInt>(
1194 src - c_stream->next_in);
1195
1196 if (c_stream->avail_in) {
1197 err = deflate(c_stream, Z_NO_FLUSH);
1198 if (UNIV_UNLIKELY(err != Z_OK)) {
1199
1200 return(err);
1201 }
1202 }
1203
1204 ut_ad(!c_stream->avail_in);
1205 ut_ad(c_stream->next_in == src);
1206
1207 memcpy(storage
1208 - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1209 * (rec_get_heap_no_new(rec) - 1),
1210 c_stream->next_in,
1211 DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1212
1213 c_stream->next_in
1214 += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1215
1216 /* Skip also roll_ptr */
1217 ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1218 }
1219
1220 /* Compress the last bytes of the record. */
1221 c_stream->avail_in = static_cast<uInt>(
1222 rec + rec_offs_data_size(offsets) - c_stream->next_in);
1223
1224 if (c_stream->avail_in) {
1225 err = deflate(c_stream, Z_NO_FLUSH);
1226 if (UNIV_UNLIKELY(err != Z_OK)) {
1227
1228 goto func_exit;
1229 }
1230 }
1231 ut_ad(!c_stream->avail_in);
1232 } while (--n_dense);
1233
1234func_exit:
1235 return(err);}
1236
1237/**********************************************************************//**
1238Compress a page.
1239@return TRUE on success, FALSE on failure; page_zip will be left
1240intact on failure. */
1241ibool
1242page_zip_compress(
1243/*==============*/
1244 page_zip_des_t* page_zip, /*!< in: size; out: data,
1245 n_blobs, m_start, m_end,
1246 m_nonempty */
1247 const page_t* page, /*!< in: uncompressed page */
1248 dict_index_t* index, /*!< in: index of the B-tree
1249 node */
1250 ulint level, /*!< in: commpression level */
1251 const redo_page_compress_t* page_comp_info,
1252 /*!< in: used for applying
1253 TRUNCATE log
1254 record during recovery */
1255 mtr_t* mtr) /*!< in/out: mini-transaction,
1256 or NULL */
1257{
1258 z_stream c_stream;
1259 int err;
1260 ulint n_fields; /* number of index fields
1261 needed */
1262 byte* fields; /*!< index field information */
1263 byte* buf; /*!< compressed payload of the
1264 page */
1265 byte* buf_end; /* end of buf */
1266 ulint n_dense;
1267 ulint slot_size; /* amount of uncompressed bytes
1268 per record */
1269 const rec_t** recs; /*!< dense page directory,
1270 sorted by address */
1271 mem_heap_t* heap;
1272 ulint trx_id_col = ULINT_UNDEFINED;
1273 ulint n_blobs = 0;
1274 byte* storage; /* storage of uncompressed
1275 columns */
1276 index_id_t ind_id;
1277 uintmax_t usec = ut_time_us(NULL);
1278#ifdef PAGE_ZIP_COMPRESS_DBG
1279 FILE* logfile = NULL;
1280#endif
1281 /* A local copy of srv_cmp_per_index_enabled to avoid reading that
1282 variable multiple times in this function since it can be changed at
1283 anytime. */
1284 my_bool cmp_per_index_enabled;
1285 cmp_per_index_enabled = srv_cmp_per_index_enabled;
1286
1287 ut_a(page_is_comp(page));
1288 ut_a(fil_page_index_page_check(page));
1289 ut_ad(page_simple_validate_new((page_t*) page));
1290 ut_ad(page_zip_simple_validate(page_zip));
1291 ut_ad(!index
1292 || (index
1293 && dict_table_is_comp(index->table)
1294 && !dict_index_is_ibuf(index)));
1295
1296 UNIV_MEM_ASSERT_RW(page, srv_page_size);
1297
1298 /* Check the data that will be omitted. */
1299 ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1300 infimum_extra, sizeof infimum_extra));
1301 ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1302 infimum_data, sizeof infimum_data));
1303 ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1304 /* info_bits == 0, n_owned <= max */
1305 <= PAGE_DIR_SLOT_MAX_N_OWNED);
1306 ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1307 supremum_extra_data, sizeof supremum_extra_data));
1308
1309 if (page_is_empty(page)) {
1310 ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1311 == PAGE_NEW_SUPREMUM);
1312 }
1313
1314 if (truncate_t::s_fix_up_active) {
1315 ut_ad(page_comp_info != NULL);
1316 n_fields = page_comp_info->n_fields;
1317 ind_id = page_comp_info->index_id;
1318 } else {
1319 if (page_is_leaf(page)) {
1320 n_fields = dict_index_get_n_fields(index);
1321 } else {
1322 n_fields = dict_index_get_n_unique_in_tree_nonleaf(index);
1323 }
1324 ind_id = index->id;
1325 }
1326
1327 /* The dense directory excludes the infimum and supremum records. */
1328 n_dense = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
1329#ifdef PAGE_ZIP_COMPRESS_DBG
1330 if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1331 ib::info() << "compress "
1332 << static_cast<void*>(page_zip) << " "
1333 << static_cast<const void*>(page) << " "
1334 << page_is_leaf(page) << " "
1335 << n_fields << " " << n_dense;
1336 }
1337
1338 if (UNIV_UNLIKELY(page_zip_compress_log)) {
1339 /* Create a log file for every compression attempt. */
1340 char logfilename[9];
1341 snprintf(logfilename, sizeof logfilename,
1342 "%08x", page_zip_compress_log++);
1343 logfile = fopen(logfilename, "wb");
1344
1345 if (logfile) {
1346 /* Write the uncompressed page to the log. */
1347 if (fwrite(page, 1, srv_page_size, logfile)
1348 != srv_page_size) {
1349 perror("fwrite");
1350 }
1351 /* Record the compressed size as zero.
1352 This will be overwritten at successful exit. */
1353 putc(0, logfile);
1354 putc(0, logfile);
1355 putc(0, logfile);
1356 putc(0, logfile);
1357 }
1358 }
1359#endif /* PAGE_ZIP_COMPRESS_DBG */
1360 page_zip_stat[page_zip->ssize - 1].compressed++;
1361 if (cmp_per_index_enabled) {
1362 mutex_enter(&page_zip_stat_per_index_mutex);
1363 page_zip_stat_per_index[ind_id].compressed++;
1364 mutex_exit(&page_zip_stat_per_index_mutex);
1365 }
1366
1367 if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1368 >= page_zip_get_size(page_zip))) {
1369
1370 goto err_exit;
1371 }
1372
1373 MONITOR_INC(MONITOR_PAGE_COMPRESS);
1374
1375 /* Simulate a compression failure with a probability determined by
1376 innodb_simulate_comp_failures, only if the page has 2 or more
1377 records. */
1378
1379 if (srv_simulate_comp_failures
1380 && !dict_index_is_ibuf(index)
1381 && page_get_n_recs(page) >= 2
1382 && ((ulint)(rand() % 100) < srv_simulate_comp_failures)
1383 && strcmp(index->table->name.m_name, "IBUF_DUMMY")) {
1384
1385#ifdef UNIV_DEBUG
1386 ib::error()
1387 << "Simulating a compression failure"
1388 << " for table " << index->table->name
1389 << " index "
1390 << index->name()
1391 << " page "
1392 << page_get_page_no(page)
1393 << "("
1394 << (page_is_leaf(page) ? "leaf" : "non-leaf")
1395 << ")";
1396
1397#endif
1398
1399 goto err_exit;
1400 }
1401
1402 heap = mem_heap_create(page_zip_get_size(page_zip)
1403 + n_fields * (2 + sizeof(ulint))
1404 + REC_OFFS_HEADER_SIZE
1405 + n_dense * ((sizeof *recs)
1406 - PAGE_ZIP_DIR_SLOT_SIZE)
1407 + srv_page_size * 4
1408 + (512 << MAX_MEM_LEVEL));
1409
1410 recs = static_cast<const rec_t**>(
1411 mem_heap_zalloc(heap, n_dense * sizeof *recs));
1412
1413 fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2));
1414
1415 buf = static_cast<byte*>(
1416 mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
1417
1418 buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1419
1420 /* Compress the data payload. */
1421 page_zip_set_alloc(&c_stream, heap);
1422
1423 err = deflateInit2(&c_stream, static_cast<int>(level),
1424 Z_DEFLATED, srv_page_size_shift,
1425 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1426 ut_a(err == Z_OK);
1427
1428 c_stream.next_out = buf;
1429
1430 /* Subtract the space reserved for uncompressed data. */
1431 /* Page header and the end marker of the modification log */
1432 c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
1433
1434 /* Dense page directory and uncompressed columns, if any */
1435 if (page_is_leaf(page)) {
1436 if ((index && dict_index_is_clust(index))
1437 || (page_comp_info
1438 && (page_comp_info->type & DICT_CLUSTERED))) {
1439
1440 if (index) {
1441 trx_id_col = dict_index_get_sys_col_pos(
1442 index, DATA_TRX_ID);
1443 ut_ad(trx_id_col > 0);
1444 ut_ad(trx_id_col != ULINT_UNDEFINED);
1445 } else if (page_comp_info
1446 && (page_comp_info->type
1447 & DICT_CLUSTERED)) {
1448 trx_id_col = page_comp_info->trx_id_pos;
1449 }
1450
1451 slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1452 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1453
1454 } else {
1455 /* Signal the absence of trx_id
1456 in page_zip_fields_encode() */
1457 if (index) {
1458 ut_ad(dict_index_get_sys_col_pos(
1459 index, DATA_TRX_ID) == ULINT_UNDEFINED);
1460 }
1461 trx_id_col = 0;
1462 slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1463 }
1464 } else {
1465 slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1466 trx_id_col = ULINT_UNDEFINED;
1467 }
1468
1469 if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1470 + 6/* sizeof(zlib header and footer) */)) {
1471 goto zlib_error;
1472 }
1473
1474 c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size);
1475 if (truncate_t::s_fix_up_active) {
1476 ut_ad(page_comp_info != NULL);
1477 c_stream.avail_in = static_cast<uInt>(
1478 page_comp_info->field_len);
1479 for (ulint i = 0; i < page_comp_info->field_len; i++) {
1480 fields[i] = page_comp_info->fields[i];
1481 }
1482 } else {
1483 c_stream.avail_in = static_cast<uInt>(
1484 page_zip_fields_encode(
1485 n_fields, index, trx_id_col, fields));
1486 }
1487 c_stream.next_in = fields;
1488
1489 if (UNIV_LIKELY(!trx_id_col)) {
1490 trx_id_col = ULINT_UNDEFINED;
1491 }
1492
1493 UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1494 err = deflate(&c_stream, Z_FULL_FLUSH);
1495 if (err != Z_OK) {
1496 goto zlib_error;
1497 }
1498
1499 ut_ad(!c_stream.avail_in);
1500
1501 page_zip_dir_encode(page, buf_end, recs);
1502
1503 c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1504
1505 storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1506
1507 /* Compress the records in heap_no order. */
1508 if (UNIV_UNLIKELY(!n_dense)) {
1509 } else if (!page_is_leaf(page)) {
1510 /* This is a node pointer page. */
1511 err = page_zip_compress_node_ptrs(LOGFILE
1512 &c_stream, recs, n_dense,
1513 index, storage, heap);
1514 if (UNIV_UNLIKELY(err != Z_OK)) {
1515 goto zlib_error;
1516 }
1517 } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1518 /* This is a leaf page in a secondary index. */
1519 err = page_zip_compress_sec(LOGFILE
1520 &c_stream, recs, n_dense);
1521 if (UNIV_UNLIKELY(err != Z_OK)) {
1522 goto zlib_error;
1523 }
1524 } else {
1525 /* This is a leaf page in a clustered index. */
1526 err = page_zip_compress_clust(LOGFILE
1527 &c_stream, recs, n_dense,
1528 index, &n_blobs, trx_id_col,
1529 buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1530 * page_get_n_recs(page),
1531 storage, heap);
1532 if (UNIV_UNLIKELY(err != Z_OK)) {
1533 goto zlib_error;
1534 }
1535 }
1536
1537 /* Finish the compression. */
1538 ut_ad(!c_stream.avail_in);
1539 /* Compress any trailing garbage, in case the last record was
1540 allocated from an originally longer space on the free list,
1541 or the data of the last record from page_zip_compress_sec(). */
1542 c_stream.avail_in = static_cast<uInt>(
1543 page_header_get_field(page, PAGE_HEAP_TOP)
1544 - (c_stream.next_in - page));
1545 ut_a(c_stream.avail_in <= srv_page_size - PAGE_ZIP_START - PAGE_DIR);
1546
1547 UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1548 err = deflate(&c_stream, Z_FINISH);
1549
1550 if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1551zlib_error:
1552 deflateEnd(&c_stream);
1553 mem_heap_free(heap);
1554err_exit:
1555#ifdef PAGE_ZIP_COMPRESS_DBG
1556 if (logfile) {
1557 fclose(logfile);
1558 }
1559#endif /* PAGE_ZIP_COMPRESS_DBG */
1560 if (page_is_leaf(page) && index) {
1561 dict_index_zip_failure(index);
1562 }
1563
1564 uintmax_t time_diff = ut_time_us(NULL) - usec;
1565 page_zip_stat[page_zip->ssize - 1].compressed_usec
1566 += time_diff;
1567 if (cmp_per_index_enabled) {
1568 mutex_enter(&page_zip_stat_per_index_mutex);
1569 page_zip_stat_per_index[ind_id].compressed_usec
1570 += time_diff;
1571 mutex_exit(&page_zip_stat_per_index_mutex);
1572 }
1573 return(FALSE);
1574 }
1575
1576 err = deflateEnd(&c_stream);
1577 ut_a(err == Z_OK);
1578
1579 ut_ad(buf + c_stream.total_out == c_stream.next_out);
1580 ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1581
1582 /* Valgrind believes that zlib does not initialize some bits
1583 in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1584 UNIV_MEM_VALID(buf, c_stream.total_out);
1585
1586 /* Zero out the area reserved for the modification log.
1587 Space for the end marker of the modification log is not
1588 included in avail_out. */
1589 memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1590
1591#ifdef UNIV_DEBUG
1592 page_zip->m_start =
1593#endif /* UNIV_DEBUG */
1594 page_zip->m_end = unsigned(PAGE_DATA + c_stream.total_out);
1595 page_zip->m_nonempty = FALSE;
1596 page_zip->n_blobs = unsigned(n_blobs);
1597 /* Copy those header fields that will not be written
1598 in buf_flush_init_for_writing() */
1599 memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1600 FIL_PAGE_LSN - FIL_PAGE_PREV);
1601 memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1602 memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1603 PAGE_DATA - FIL_PAGE_DATA);
1604 /* Copy the rest of the compressed page */
1605 memcpy(page_zip->data + PAGE_DATA, buf,
1606 page_zip_get_size(page_zip) - PAGE_DATA);
1607 mem_heap_free(heap);
1608#ifdef UNIV_ZIP_DEBUG
1609 ut_a(page_zip_validate(page_zip, page, index));
1610#endif /* UNIV_ZIP_DEBUG */
1611
1612 if (mtr) {
1613 page_zip_compress_write_log(page_zip, page, index, mtr);
1614 }
1615
1616 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1617
1618#ifdef PAGE_ZIP_COMPRESS_DBG
1619 if (logfile) {
1620 /* Record the compressed size of the block. */
1621 byte sz[4];
1622 mach_write_to_4(sz, c_stream.total_out);
1623 fseek(logfile, srv_page_size, SEEK_SET);
1624 if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) {
1625 perror("fwrite");
1626 }
1627 fclose(logfile);
1628 }
1629#endif /* PAGE_ZIP_COMPRESS_DBG */
1630 uintmax_t time_diff = ut_time_us(NULL) - usec;
1631 page_zip_stat[page_zip->ssize - 1].compressed_ok++;
1632 page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
1633 if (cmp_per_index_enabled) {
1634 mutex_enter(&page_zip_stat_per_index_mutex);
1635 page_zip_stat_per_index[ind_id].compressed_ok++;
1636 page_zip_stat_per_index[ind_id].compressed_usec += time_diff;
1637 mutex_exit(&page_zip_stat_per_index_mutex);
1638 }
1639
1640 if (page_is_leaf(page) && !truncate_t::s_fix_up_active) {
1641 dict_index_zip_success(index);
1642 }
1643
1644 return(TRUE);
1645}
1646
1647/**********************************************************************//**
1648Deallocate the index information initialized by page_zip_fields_decode(). */
1649static
1650void
1651page_zip_fields_free(
1652/*=================*/
1653 dict_index_t* index) /*!< in: dummy index to be freed */
1654{
1655 if (index) {
1656 dict_table_t* table = index->table;
1657 dict_index_zip_pad_mutex_destroy(index);
1658 mem_heap_free(index->heap);
1659
1660 dict_mem_table_free(table);
1661 }
1662}
1663
1664/**********************************************************************//**
1665Read the index information for the compressed page.
1666@return own: dummy index describing the page, or NULL on error */
1667static
1668dict_index_t*
1669page_zip_fields_decode(
1670/*===================*/
1671 const byte* buf, /*!< in: index information */
1672 const byte* end, /*!< in: end of buf */
1673 ulint* trx_id_col,/*!< in: NULL for non-leaf pages;
1674 for leaf pages, pointer to where to store
1675 the position of the trx_id column */
1676 bool is_spatial)/*< in: is spatial index or not */
1677{
1678 const byte* b;
1679 ulint n;
1680 ulint i;
1681 ulint val;
1682 dict_table_t* table;
1683 dict_index_t* index;
1684
1685 /* Determine the number of fields. */
1686 for (b = buf, n = 0; b < end; n++) {
1687 if (*b++ & 0x80) {
1688 b++; /* skip the second byte */
1689 }
1690 }
1691
1692 n--; /* n_nullable or trx_id */
1693
1694 if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1695
1696 page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1697 (ulong) n));
1698 return(NULL);
1699 }
1700
1701 if (UNIV_UNLIKELY(b > end)) {
1702
1703 page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1704 (const void*) b, (const void*) end));
1705 return(NULL);
1706 }
1707
1708 table = dict_mem_table_create("ZIP_DUMMY", NULL, n, 0,
1709 DICT_TF_COMPACT, 0);
1710 index = dict_mem_index_create(table, "ZIP_DUMMY", 0, n);
1711 index->n_uniq = unsigned(n);
1712 /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1713 index->cached = TRUE;
1714
1715 /* Initialize the fields. */
1716 for (b = buf, i = 0; i < n; i++) {
1717 ulint mtype;
1718 ulint len;
1719
1720 val = *b++;
1721
1722 if (UNIV_UNLIKELY(val & 0x80)) {
1723 /* fixed length > 62 bytes */
1724 val = (val & 0x7f) << 8 | *b++;
1725 len = val >> 1;
1726 mtype = DATA_FIXBINARY;
1727 } else if (UNIV_UNLIKELY(val >= 126)) {
1728 /* variable length with max > 255 bytes */
1729 len = 0x7fff;
1730 mtype = DATA_BINARY;
1731 } else if (val <= 1) {
1732 /* variable length with max <= 255 bytes */
1733 len = 0;
1734 mtype = DATA_BINARY;
1735 } else {
1736 /* fixed length < 62 bytes */
1737 len = val >> 1;
1738 mtype = DATA_FIXBINARY;
1739 }
1740
1741 dict_mem_table_add_col(table, NULL, NULL, mtype,
1742 val & 1 ? DATA_NOT_NULL : 0, len);
1743 dict_index_add_col(index, table,
1744 dict_table_get_nth_col(table, i), 0);
1745 }
1746
1747 val = *b++;
1748 if (UNIV_UNLIKELY(val & 0x80)) {
1749 val = (val & 0x7f) << 8 | *b++;
1750 }
1751
1752 /* Decode the position of the trx_id column. */
1753 if (trx_id_col) {
1754 if (!val) {
1755 val = ULINT_UNDEFINED;
1756 } else if (UNIV_UNLIKELY(val >= n)) {
1757 page_zip_fields_free(index);
1758 index = NULL;
1759 } else {
1760 index->type = DICT_CLUSTERED;
1761 }
1762
1763 *trx_id_col = val;
1764 } else {
1765 /* Decode the number of nullable fields. */
1766 if (UNIV_UNLIKELY(index->n_nullable > val)) {
1767 page_zip_fields_free(index);
1768 index = NULL;
1769 } else {
1770 index->n_nullable = unsigned(val);
1771 }
1772 }
1773
1774 /* ROW_FORMAT=COMPRESSED does not support instant ADD COLUMN */
1775 index->n_core_fields = index->n_fields;
1776 index->n_core_null_bytes
1777 = UT_BITS_IN_BYTES(unsigned(index->n_nullable));
1778
1779 ut_ad(b == end);
1780
1781 if (is_spatial) {
1782 index->type |= DICT_SPATIAL;
1783 }
1784
1785 return(index);
1786}
1787
1788/**********************************************************************//**
1789Populate the sparse page directory from the dense directory.
1790@return TRUE on success, FALSE on failure */
1791static MY_ATTRIBUTE((nonnull, warn_unused_result))
1792ibool
1793page_zip_dir_decode(
1794/*================*/
1795 const page_zip_des_t* page_zip,/*!< in: dense page directory on
1796 compressed page */
1797 page_t* page, /*!< in: compact page with valid header;
1798 out: trailer and sparse page directory
1799 filled in */
1800 rec_t** recs, /*!< out: dense page directory sorted by
1801 ascending address (and heap_no) */
1802 ulint n_dense)/*!< in: number of user records, and
1803 size of recs[] */
1804{
1805 ulint i;
1806 ulint n_recs;
1807 byte* slot;
1808
1809 n_recs = page_get_n_recs(page);
1810
1811 if (UNIV_UNLIKELY(n_recs > n_dense)) {
1812 page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1813 (ulong) n_recs, (ulong) n_dense));
1814 return(FALSE);
1815 }
1816
1817 /* Traverse the list of stored records in the sorting order,
1818 starting from the first user record. */
1819
1820 slot = page + (srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1821 UNIV_PREFETCH_RW(slot);
1822
1823 /* Zero out the page trailer. */
1824 memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1825
1826 mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1827 slot -= PAGE_DIR_SLOT_SIZE;
1828 UNIV_PREFETCH_RW(slot);
1829
1830 /* Initialize the sparse directory and copy the dense directory. */
1831 for (i = 0; i < n_recs; i++) {
1832 ulint offs = page_zip_dir_get(page_zip, i);
1833
1834 if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1835 mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1836 slot -= PAGE_DIR_SLOT_SIZE;
1837 UNIV_PREFETCH_RW(slot);
1838 }
1839
1840 if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1841 < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1842 page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1843 (unsigned) i, (unsigned) n_recs,
1844 (ulong) offs));
1845 return(FALSE);
1846 }
1847
1848 recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1849 }
1850
1851 mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1852 {
1853 const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1854 page, page_dir_get_n_slots(page) - 1U);
1855
1856 if (UNIV_UNLIKELY(slot != last_slot)) {
1857 page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1858 (const void*) slot,
1859 (const void*) last_slot));
1860 return(FALSE);
1861 }
1862 }
1863
1864 /* Copy the rest of the dense directory. */
1865 for (; i < n_dense; i++) {
1866 ulint offs = page_zip_dir_get(page_zip, i);
1867
1868 if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1869 page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1870 (unsigned) i, (unsigned) n_dense,
1871 (ulong) offs));
1872 return(FALSE);
1873 }
1874
1875 recs[i] = page + offs;
1876 }
1877
1878 std::sort(recs, recs + n_dense);
1879 return(TRUE);
1880}
1881
1882/**********************************************************************//**
1883Initialize the REC_N_NEW_EXTRA_BYTES of each record.
1884@return TRUE on success, FALSE on failure */
1885static
1886ibool
1887page_zip_set_extra_bytes(
1888/*=====================*/
1889 const page_zip_des_t* page_zip,/*!< in: compressed page */
1890 page_t* page, /*!< in/out: uncompressed page */
1891 ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
1892{
1893 ulint n;
1894 ulint i;
1895 ulint n_owned = 1;
1896 ulint offs;
1897 rec_t* rec;
1898
1899 n = page_get_n_recs(page);
1900 rec = page + PAGE_NEW_INFIMUM;
1901
1902 for (i = 0; i < n; i++) {
1903 offs = page_zip_dir_get(page_zip, i);
1904
1905 if (offs & PAGE_ZIP_DIR_SLOT_DEL) {
1906 info_bits |= REC_INFO_DELETED_FLAG;
1907 }
1908 if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1909 info_bits |= n_owned;
1910 n_owned = 1;
1911 } else {
1912 n_owned++;
1913 }
1914 offs &= PAGE_ZIP_DIR_SLOT_MASK;
1915 if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1916 + REC_N_NEW_EXTRA_BYTES)) {
1917 page_zip_fail(("page_zip_set_extra_bytes 1:"
1918 " %u %u %lx\n",
1919 (unsigned) i, (unsigned) n,
1920 (ulong) offs));
1921 return(FALSE);
1922 }
1923
1924 rec_set_next_offs_new(rec, offs);
1925 rec = page + offs;
1926 rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1927 info_bits = 0;
1928 }
1929
1930 /* Set the next pointer of the last user record. */
1931 rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1932
1933 /* Set n_owned of the supremum record. */
1934 page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1935
1936 /* The dense directory excludes the infimum and supremum records. */
1937 n = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
1938
1939 if (i >= n) {
1940 if (UNIV_LIKELY(i == n)) {
1941 return(TRUE);
1942 }
1943
1944 page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1945 (unsigned) i, (unsigned) n));
1946 return(FALSE);
1947 }
1948
1949 offs = page_zip_dir_get(page_zip, i);
1950
1951 /* Set the extra bytes of deleted records on the free list. */
1952 for (;;) {
1953 if (UNIV_UNLIKELY(!offs)
1954 || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1955
1956 page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1957 (ulong) offs));
1958 return(FALSE);
1959 }
1960
1961 rec = page + offs;
1962 rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1963
1964 if (++i == n) {
1965 break;
1966 }
1967
1968 offs = page_zip_dir_get(page_zip, i);
1969 rec_set_next_offs_new(rec, offs);
1970 }
1971
1972 /* Terminate the free list. */
1973 rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1974 rec_set_next_offs_new(rec, 0);
1975
1976 return(TRUE);
1977}
1978
1979/**********************************************************************//**
1980Apply the modification log to a record containing externally stored
1981columns. Do not copy the fields that are stored separately.
1982@return pointer to modification log, or NULL on failure */
1983static
1984const byte*
1985page_zip_apply_log_ext(
1986/*===================*/
1987 rec_t* rec, /*!< in/out: record */
1988 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
1989 ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
1990 const byte* data, /*!< in: modification log */
1991 const byte* end) /*!< in: end of modification log */
1992{
1993 ulint i;
1994 ulint len;
1995 byte* next_out = rec;
1996
1997 /* Check if there are any externally stored columns.
1998 For each externally stored column, skip the
1999 BTR_EXTERN_FIELD_REF. */
2000
2001 for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2002 byte* dst;
2003
2004 if (UNIV_UNLIKELY(i == trx_id_col)) {
2005 /* Skip trx_id and roll_ptr */
2006 dst = rec_get_nth_field(rec, offsets,
2007 i, &len);
2008 if (UNIV_UNLIKELY(dst - next_out >= end - data)
2009 || UNIV_UNLIKELY
2010 (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
2011 || rec_offs_nth_extern(offsets, i)) {
2012 page_zip_fail(("page_zip_apply_log_ext:"
2013 " trx_id len %lu,"
2014 " %p - %p >= %p - %p\n",
2015 (ulong) len,
2016 (const void*) dst,
2017 (const void*) next_out,
2018 (const void*) end,
2019 (const void*) data));
2020 return(NULL);
2021 }
2022
2023 memcpy(next_out, data, ulint(dst - next_out));
2024 data += ulint(dst - next_out);
2025 next_out = dst + (DATA_TRX_ID_LEN
2026 + DATA_ROLL_PTR_LEN);
2027 } else if (rec_offs_nth_extern(offsets, i)) {
2028 dst = rec_get_nth_field(rec, offsets,
2029 i, &len);
2030 ut_ad(len
2031 >= BTR_EXTERN_FIELD_REF_SIZE);
2032
2033 len += ulint(dst - next_out)
2034 - BTR_EXTERN_FIELD_REF_SIZE;
2035
2036 if (UNIV_UNLIKELY(data + len >= end)) {
2037 page_zip_fail(("page_zip_apply_log_ext:"
2038 " ext %p+%lu >= %p\n",
2039 (const void*) data,
2040 (ulong) len,
2041 (const void*) end));
2042 return(NULL);
2043 }
2044
2045 memcpy(next_out, data, len);
2046 data += len;
2047 next_out += len
2048 + BTR_EXTERN_FIELD_REF_SIZE;
2049 }
2050 }
2051
2052 /* Copy the last bytes of the record. */
2053 len = ulint(rec_get_end(rec, offsets) - next_out);
2054 if (UNIV_UNLIKELY(data + len >= end)) {
2055 page_zip_fail(("page_zip_apply_log_ext:"
2056 " last %p+%lu >= %p\n",
2057 (const void*) data,
2058 (ulong) len,
2059 (const void*) end));
2060 return(NULL);
2061 }
2062 memcpy(next_out, data, len);
2063 data += len;
2064
2065 return(data);
2066}
2067
2068/**********************************************************************//**
2069Apply the modification log to an uncompressed page.
2070Do not copy the fields that are stored separately.
2071@return pointer to end of modification log, or NULL on failure */
2072static
2073const byte*
2074page_zip_apply_log(
2075/*===============*/
2076 const byte* data, /*!< in: modification log */
2077 ulint size, /*!< in: maximum length of the log, in bytes */
2078 rec_t** recs, /*!< in: dense page directory,
2079 sorted by address (indexed by
2080 heap_no - PAGE_HEAP_NO_USER_LOW) */
2081 ulint n_dense,/*!< in: size of recs[] */
2082 bool is_leaf,/*!< in: whether this is a leaf page */
2083 ulint trx_id_col,/*!< in: column number of trx_id in the index,
2084 or ULINT_UNDEFINED if none */
2085 ulint heap_status,
2086 /*!< in: heap_no and status bits for
2087 the next record to uncompress */
2088 dict_index_t* index, /*!< in: index of the page */
2089 ulint* offsets)/*!< in/out: work area for
2090 rec_get_offsets_reverse() */
2091{
2092 const byte* const end = data + size;
2093
2094 for (;;) {
2095 ulint val;
2096 rec_t* rec;
2097 ulint len;
2098 ulint hs;
2099
2100 val = *data++;
2101 if (UNIV_UNLIKELY(!val)) {
2102 return(data - 1);
2103 }
2104 if (val & 0x80) {
2105 val = (val & 0x7f) << 8 | *data++;
2106 if (UNIV_UNLIKELY(!val)) {
2107 page_zip_fail(("page_zip_apply_log:"
2108 " invalid val %x%x\n",
2109 data[-2], data[-1]));
2110 return(NULL);
2111 }
2112 }
2113 if (UNIV_UNLIKELY(data >= end)) {
2114 page_zip_fail(("page_zip_apply_log: %p >= %p\n",
2115 (const void*) data,
2116 (const void*) end));
2117 return(NULL);
2118 }
2119 if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
2120 page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
2121 (ulong) val, (ulong) n_dense));
2122 return(NULL);
2123 }
2124
2125 /* Determine the heap number and status bits of the record. */
2126 rec = recs[(val >> 1) - 1];
2127
2128 hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
2129 hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
2130
2131 /* This may either be an old record that is being
2132 overwritten (updated in place, or allocated from
2133 the free list), or a new record, with the next
2134 available_heap_no. */
2135 if (UNIV_UNLIKELY(hs > heap_status)) {
2136 page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
2137 (ulong) hs, (ulong) heap_status));
2138 return(NULL);
2139 } else if (hs == heap_status) {
2140 /* A new record was allocated from the heap. */
2141 if (UNIV_UNLIKELY(val & 1)) {
2142 /* Only existing records may be cleared. */
2143 page_zip_fail(("page_zip_apply_log:"
2144 " attempting to create"
2145 " deleted rec %lu\n",
2146 (ulong) hs));
2147 return(NULL);
2148 }
2149 heap_status += 1 << REC_HEAP_NO_SHIFT;
2150 }
2151
2152 mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
2153
2154 if (val & 1) {
2155 /* Clear the data bytes of the record. */
2156 mem_heap_t* heap = NULL;
2157 ulint* offs;
2158 offs = rec_get_offsets(rec, index, offsets, is_leaf,
2159 ULINT_UNDEFINED, &heap);
2160 memset(rec, 0, rec_offs_data_size(offs));
2161
2162 if (UNIV_LIKELY_NULL(heap)) {
2163 mem_heap_free(heap);
2164 }
2165 continue;
2166 }
2167
2168 compile_time_assert(REC_STATUS_NODE_PTR == TRUE);
2169 rec_get_offsets_reverse(data, index,
2170 hs & REC_STATUS_NODE_PTR,
2171 offsets);
2172 rec_offs_make_valid(rec, index, is_leaf, offsets);
2173
2174 /* Copy the extra bytes (backwards). */
2175 {
2176 byte* start = rec_get_start(rec, offsets);
2177 byte* b = rec - REC_N_NEW_EXTRA_BYTES;
2178 while (b != start) {
2179 *--b = *data++;
2180 }
2181 }
2182
2183 /* Copy the data bytes. */
2184 if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2185 /* Non-leaf nodes should not contain any
2186 externally stored columns. */
2187 if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2188 page_zip_fail(("page_zip_apply_log:"
2189 " %lu&REC_STATUS_NODE_PTR\n",
2190 (ulong) hs));
2191 return(NULL);
2192 }
2193
2194 data = page_zip_apply_log_ext(
2195 rec, offsets, trx_id_col, data, end);
2196
2197 if (UNIV_UNLIKELY(!data)) {
2198 return(NULL);
2199 }
2200 } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2201 len = rec_offs_data_size(offsets)
2202 - REC_NODE_PTR_SIZE;
2203 /* Copy the data bytes, except node_ptr. */
2204 if (UNIV_UNLIKELY(data + len >= end)) {
2205 page_zip_fail(("page_zip_apply_log:"
2206 " node_ptr %p+%lu >= %p\n",
2207 (const void*) data,
2208 (ulong) len,
2209 (const void*) end));
2210 return(NULL);
2211 }
2212 memcpy(rec, data, len);
2213 data += len;
2214 } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2215 len = rec_offs_data_size(offsets);
2216
2217 /* Copy all data bytes of
2218 a record in a secondary index. */
2219 if (UNIV_UNLIKELY(data + len >= end)) {
2220 page_zip_fail(("page_zip_apply_log:"
2221 " sec %p+%lu >= %p\n",
2222 (const void*) data,
2223 (ulong) len,
2224 (const void*) end));
2225 return(NULL);
2226 }
2227
2228 memcpy(rec, data, len);
2229 data += len;
2230 } else {
2231 /* Skip DB_TRX_ID and DB_ROLL_PTR. */
2232 ulint l = rec_get_nth_field_offs(offsets,
2233 trx_id_col, &len);
2234 byte* b;
2235
2236 if (UNIV_UNLIKELY(data + l >= end)
2237 || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2238 + DATA_ROLL_PTR_LEN))) {
2239 page_zip_fail(("page_zip_apply_log:"
2240 " trx_id %p+%lu >= %p\n",
2241 (const void*) data,
2242 (ulong) l,
2243 (const void*) end));
2244 return(NULL);
2245 }
2246
2247 /* Copy any preceding data bytes. */
2248 memcpy(rec, data, l);
2249 data += l;
2250
2251 /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2252 b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2253 len = ulint(rec_get_end(rec, offsets) - b);
2254 if (UNIV_UNLIKELY(data + len >= end)) {
2255 page_zip_fail(("page_zip_apply_log:"
2256 " clust %p+%lu >= %p\n",
2257 (const void*) data,
2258 (ulong) len,
2259 (const void*) end));
2260 return(NULL);
2261 }
2262 memcpy(b, data, len);
2263 data += len;
2264 }
2265 }
2266}
2267
2268/**********************************************************************//**
2269Set the heap_no in a record, and skip the fixed-size record header
2270that is not included in the d_stream.
2271@return TRUE on success, FALSE if d_stream does not end at rec */
2272static
2273ibool
2274page_zip_decompress_heap_no(
2275/*========================*/
2276 z_stream* d_stream, /*!< in/out: compressed page stream */
2277 rec_t* rec, /*!< in/out: record */
2278 ulint& heap_status) /*!< in/out: heap_no and status bits */
2279{
2280 if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
2281 /* n_dense has grown since the page was last compressed. */
2282 return(FALSE);
2283 }
2284
2285 /* Skip the REC_N_NEW_EXTRA_BYTES. */
2286 d_stream->next_out = rec;
2287
2288 /* Set heap_no and the status bits. */
2289 mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2290 heap_status += 1 << REC_HEAP_NO_SHIFT;
2291 return(TRUE);
2292}
2293
2294/**********************************************************************//**
2295Decompress the records of a node pointer page.
2296@return TRUE on success, FALSE on failure */
2297static
2298ibool
2299page_zip_decompress_node_ptrs(
2300/*==========================*/
2301 page_zip_des_t* page_zip, /*!< in/out: compressed page */
2302 z_stream* d_stream, /*!< in/out: compressed page stream */
2303 rec_t** recs, /*!< in: dense page directory
2304 sorted by address */
2305 ulint n_dense, /*!< in: size of recs[] */
2306 dict_index_t* index, /*!< in: the index of the page */
2307 ulint* offsets, /*!< in/out: temporary offsets */
2308 mem_heap_t* heap) /*!< in: temporary memory heap */
2309{
2310 ulint heap_status = REC_STATUS_NODE_PTR
2311 | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2312 ulint slot;
2313 const byte* storage;
2314
2315 /* Subtract the space reserved for uncompressed data. */
2316 d_stream->avail_in -= static_cast<uInt>(
2317 n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE));
2318
2319 /* Decompress the records in heap_no order. */
2320 for (slot = 0; slot < n_dense; slot++) {
2321 rec_t* rec = recs[slot];
2322
2323 d_stream->avail_out = static_cast<uInt>(
2324 rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
2325
2326 ut_ad(d_stream->avail_out < srv_page_size
2327 - PAGE_ZIP_START - PAGE_DIR);
2328 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2329 case Z_STREAM_END:
2330 page_zip_decompress_heap_no(
2331 d_stream, rec, heap_status);
2332 goto zlib_done;
2333 case Z_OK:
2334 case Z_BUF_ERROR:
2335 if (!d_stream->avail_out) {
2336 break;
2337 }
2338 /* fall through */
2339 default:
2340 page_zip_fail(("page_zip_decompress_node_ptrs:"
2341 " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2342 d_stream->msg));
2343 goto zlib_error;
2344 }
2345
2346 if (!page_zip_decompress_heap_no(
2347 d_stream, rec, heap_status)) {
2348 ut_ad(0);
2349 }
2350
2351 /* Read the offsets. The status bits are needed here. */
2352 offsets = rec_get_offsets(rec, index, offsets, false,
2353 ULINT_UNDEFINED, &heap);
2354
2355 /* Non-leaf nodes should not have any externally
2356 stored columns. */
2357 ut_ad(!rec_offs_any_extern(offsets));
2358
2359 /* Decompress the data bytes, except node_ptr. */
2360 d_stream->avail_out =static_cast<uInt>(
2361 rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
2362
2363 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2364 case Z_STREAM_END:
2365 goto zlib_done;
2366 case Z_OK:
2367 case Z_BUF_ERROR:
2368 if (!d_stream->avail_out) {
2369 break;
2370 }
2371 /* fall through */
2372 default:
2373 page_zip_fail(("page_zip_decompress_node_ptrs:"
2374 " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2375 d_stream->msg));
2376 goto zlib_error;
2377 }
2378
2379 /* Clear the node pointer in case the record
2380 will be deleted and the space will be reallocated
2381 to a smaller record. */
2382 memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2383 d_stream->next_out += REC_NODE_PTR_SIZE;
2384
2385 ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2386 }
2387
2388 /* Decompress any trailing garbage, in case the last record was
2389 allocated from an originally longer space on the free list. */
2390 d_stream->avail_out = static_cast<uInt>(
2391 page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
2392 - page_offset(d_stream->next_out));
2393 if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
2394 - PAGE_ZIP_START - PAGE_DIR)) {
2395
2396 page_zip_fail(("page_zip_decompress_node_ptrs:"
2397 " avail_out = %u\n",
2398 d_stream->avail_out));
2399 goto zlib_error;
2400 }
2401
2402 if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2403 page_zip_fail(("page_zip_decompress_node_ptrs:"
2404 " inflate(Z_FINISH)=%s\n",
2405 d_stream->msg));
2406zlib_error:
2407 inflateEnd(d_stream);
2408 return(FALSE);
2409 }
2410
2411 /* Note that d_stream->avail_out > 0 may hold here
2412 if the modification log is nonempty. */
2413
2414zlib_done:
2415 if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2416 ut_error;
2417 }
2418
2419 {
2420 page_t* page = page_align(d_stream->next_out);
2421
2422 /* Clear the unused heap space on the uncompressed page. */
2423 memset(d_stream->next_out, 0,
2424 ulint(page_dir_get_nth_slot(page,
2425 page_dir_get_n_slots(page)
2426 - 1U)
2427 - d_stream->next_out));
2428 }
2429
2430#ifdef UNIV_DEBUG
2431 page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in);
2432#endif /* UNIV_DEBUG */
2433
2434 /* Apply the modification log. */
2435 {
2436 const byte* mod_log_ptr;
2437 mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2438 d_stream->avail_in + 1,
2439 recs, n_dense, false,
2440 ULINT_UNDEFINED, heap_status,
2441 index, offsets);
2442
2443 if (UNIV_UNLIKELY(!mod_log_ptr)) {
2444 return(FALSE);
2445 }
2446 page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
2447 page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2448 }
2449
2450 if (UNIV_UNLIKELY
2451 (page_zip_get_trailer_len(page_zip,
2452 dict_index_is_clust(index))
2453 + page_zip->m_end >= page_zip_get_size(page_zip))) {
2454 page_zip_fail(("page_zip_decompress_node_ptrs:"
2455 " %lu + %lu >= %lu, %lu\n",
2456 (ulong) page_zip_get_trailer_len(
2457 page_zip, dict_index_is_clust(index)),
2458 (ulong) page_zip->m_end,
2459 (ulong) page_zip_get_size(page_zip),
2460 (ulong) dict_index_is_clust(index)));
2461 return(FALSE);
2462 }
2463
2464 /* Restore the uncompressed columns in heap_no order. */
2465 storage = page_zip_dir_start_low(page_zip, n_dense);
2466
2467 for (slot = 0; slot < n_dense; slot++) {
2468 rec_t* rec = recs[slot];
2469
2470 offsets = rec_get_offsets(rec, index, offsets, false,
2471 ULINT_UNDEFINED, &heap);
2472 /* Non-leaf nodes should not have any externally
2473 stored columns. */
2474 ut_ad(!rec_offs_any_extern(offsets));
2475 storage -= REC_NODE_PTR_SIZE;
2476
2477 memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2478 storage, REC_NODE_PTR_SIZE);
2479 }
2480
2481 return(TRUE);
2482}
2483
2484/**********************************************************************//**
2485Decompress the records of a leaf node of a secondary index.
2486@return TRUE on success, FALSE on failure */
2487static
2488ibool
2489page_zip_decompress_sec(
2490/*====================*/
2491 page_zip_des_t* page_zip, /*!< in/out: compressed page */
2492 z_stream* d_stream, /*!< in/out: compressed page stream */
2493 rec_t** recs, /*!< in: dense page directory
2494 sorted by address */
2495 ulint n_dense, /*!< in: size of recs[] */
2496 dict_index_t* index, /*!< in: the index of the page */
2497 ulint* offsets) /*!< in/out: temporary offsets */
2498{
2499 ulint heap_status = REC_STATUS_ORDINARY
2500 | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2501 ulint slot;
2502
2503 ut_a(!dict_index_is_clust(index));
2504
2505 /* Subtract the space reserved for uncompressed data. */
2506 d_stream->avail_in -= static_cast<uint>(
2507 n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
2508
2509 for (slot = 0; slot < n_dense; slot++) {
2510 rec_t* rec = recs[slot];
2511
2512 /* Decompress everything up to this record. */
2513 d_stream->avail_out = static_cast<uint>(
2514 rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
2515
2516 if (UNIV_LIKELY(d_stream->avail_out)) {
2517 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2518 case Z_STREAM_END:
2519 page_zip_decompress_heap_no(
2520 d_stream, rec, heap_status);
2521 goto zlib_done;
2522 case Z_OK:
2523 case Z_BUF_ERROR:
2524 if (!d_stream->avail_out) {
2525 break;
2526 }
2527 /* fall through */
2528 default:
2529 page_zip_fail(("page_zip_decompress_sec:"
2530 " inflate(Z_SYNC_FLUSH)=%s\n",
2531 d_stream->msg));
2532 goto zlib_error;
2533 }
2534 }
2535
2536 if (!page_zip_decompress_heap_no(
2537 d_stream, rec, heap_status)) {
2538 ut_ad(0);
2539 }
2540 }
2541
2542 /* Decompress the data of the last record and any trailing garbage,
2543 in case the last record was allocated from an originally longer space
2544 on the free list. */
2545 d_stream->avail_out = static_cast<uInt>(
2546 page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
2547 - page_offset(d_stream->next_out));
2548 if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
2549 - PAGE_ZIP_START - PAGE_DIR)) {
2550
2551 page_zip_fail(("page_zip_decompress_sec:"
2552 " avail_out = %u\n",
2553 d_stream->avail_out));
2554 goto zlib_error;
2555 }
2556
2557 if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2558 page_zip_fail(("page_zip_decompress_sec:"
2559 " inflate(Z_FINISH)=%s\n",
2560 d_stream->msg));
2561zlib_error:
2562 inflateEnd(d_stream);
2563 return(FALSE);
2564 }
2565
2566 /* Note that d_stream->avail_out > 0 may hold here
2567 if the modification log is nonempty. */
2568
2569zlib_done:
2570 if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2571 ut_error;
2572 }
2573
2574 {
2575 page_t* page = page_align(d_stream->next_out);
2576
2577 /* Clear the unused heap space on the uncompressed page. */
2578 memset(d_stream->next_out, 0,
2579 ulint(page_dir_get_nth_slot(page,
2580 page_dir_get_n_slots(page)
2581 - 1U)
2582 - d_stream->next_out));
2583 }
2584
2585 ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
2586
2587 /* Apply the modification log. */
2588 {
2589 const byte* mod_log_ptr;
2590 mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2591 d_stream->avail_in + 1,
2592 recs, n_dense, true,
2593 ULINT_UNDEFINED, heap_status,
2594 index, offsets);
2595
2596 if (UNIV_UNLIKELY(!mod_log_ptr)) {
2597 return(FALSE);
2598 }
2599 page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
2600 page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2601 }
2602
2603 if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
2604 + page_zip->m_end >= page_zip_get_size(page_zip))) {
2605
2606 page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2607 (ulong) page_zip_get_trailer_len(
2608 page_zip, FALSE),
2609 (ulong) page_zip->m_end,
2610 (ulong) page_zip_get_size(page_zip)));
2611 return(FALSE);
2612 }
2613
2614 /* There are no uncompressed columns on leaf pages of
2615 secondary indexes. */
2616
2617 return(TRUE);
2618}
2619
2620/**********************************************************************//**
2621Decompress a record of a leaf node of a clustered index that contains
2622externally stored columns.
2623@return TRUE on success */
2624static
2625ibool
2626page_zip_decompress_clust_ext(
2627/*==========================*/
2628 z_stream* d_stream, /*!< in/out: compressed page stream */
2629 rec_t* rec, /*!< in/out: record */
2630 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
2631 ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
2632{
2633 ulint i;
2634
2635 for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2636 ulint len;
2637 byte* dst;
2638
2639 if (UNIV_UNLIKELY(i == trx_id_col)) {
2640 /* Skip trx_id and roll_ptr */
2641 dst = rec_get_nth_field(rec, offsets, i, &len);
2642 if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2643 + DATA_ROLL_PTR_LEN)) {
2644
2645 page_zip_fail(("page_zip_decompress_clust_ext:"
2646 " len[%lu] = %lu\n",
2647 (ulong) i, (ulong) len));
2648 return(FALSE);
2649 }
2650
2651 if (rec_offs_nth_extern(offsets, i)) {
2652
2653 page_zip_fail(("page_zip_decompress_clust_ext:"
2654 " DB_TRX_ID at %lu is ext\n",
2655 (ulong) i));
2656 return(FALSE);
2657 }
2658
2659 d_stream->avail_out = static_cast<uInt>(
2660 dst - d_stream->next_out);
2661
2662 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2663 case Z_STREAM_END:
2664 case Z_OK:
2665 case Z_BUF_ERROR:
2666 if (!d_stream->avail_out) {
2667 break;
2668 }
2669 /* fall through */
2670 default:
2671 page_zip_fail(("page_zip_decompress_clust_ext:"
2672 " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2673 d_stream->msg));
2674 return(FALSE);
2675 }
2676
2677 ut_ad(d_stream->next_out == dst);
2678
2679 /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2680 avoid uninitialized bytes in case the record
2681 is affected by page_zip_apply_log(). */
2682 memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2683
2684 d_stream->next_out += DATA_TRX_ID_LEN
2685 + DATA_ROLL_PTR_LEN;
2686 } else if (rec_offs_nth_extern(offsets, i)) {
2687 dst = rec_get_nth_field(rec, offsets, i, &len);
2688 ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2689 dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2690
2691 d_stream->avail_out = static_cast<uInt>(
2692 dst - d_stream->next_out);
2693 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2694 case Z_STREAM_END:
2695 case Z_OK:
2696 case Z_BUF_ERROR:
2697 if (!d_stream->avail_out) {
2698 break;
2699 }
2700 /* fall through */
2701 default:
2702 page_zip_fail(("page_zip_decompress_clust_ext:"
2703 " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2704 d_stream->msg));
2705 return(FALSE);
2706 }
2707
2708 ut_ad(d_stream->next_out == dst);
2709
2710 /* Clear the BLOB pointer in case
2711 the record will be deleted and the
2712 space will not be reused. Note that
2713 the final initialization of the BLOB
2714 pointers (copying from "externs"
2715 or clearing) will have to take place
2716 only after the page modification log
2717 has been applied. Otherwise, we
2718 could end up with an uninitialized
2719 BLOB pointer when a record is deleted,
2720 reallocated and deleted. */
2721 memset(d_stream->next_out, 0,
2722 BTR_EXTERN_FIELD_REF_SIZE);
2723 d_stream->next_out
2724 += BTR_EXTERN_FIELD_REF_SIZE;
2725 }
2726 }
2727
2728 return(TRUE);
2729}
2730
2731/**********************************************************************//**
2732Compress the records of a leaf node of a clustered index.
2733@return TRUE on success, FALSE on failure */
2734static
2735ibool
2736page_zip_decompress_clust(
2737/*======================*/
2738 page_zip_des_t* page_zip, /*!< in/out: compressed page */
2739 z_stream* d_stream, /*!< in/out: compressed page stream */
2740 rec_t** recs, /*!< in: dense page directory
2741 sorted by address */
2742 ulint n_dense, /*!< in: size of recs[] */
2743 dict_index_t* index, /*!< in: the index of the page */
2744 ulint trx_id_col, /*!< index of the trx_id column */
2745 ulint* offsets, /*!< in/out: temporary offsets */
2746 mem_heap_t* heap) /*!< in: temporary memory heap */
2747{
2748 int err;
2749 ulint slot;
2750 ulint heap_status = REC_STATUS_ORDINARY
2751 | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2752 const byte* storage;
2753 const byte* externs;
2754
2755 ut_a(dict_index_is_clust(index));
2756
2757 /* Subtract the space reserved for uncompressed data. */
2758 d_stream->avail_in -= static_cast<uInt>(n_dense)
2759 * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
2760
2761 /* Decompress the records in heap_no order. */
2762 for (slot = 0; slot < n_dense; slot++) {
2763 rec_t* rec = recs[slot];
2764
2765 d_stream->avail_out =static_cast<uInt>(
2766 rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
2767
2768 ut_ad(d_stream->avail_out < srv_page_size
2769 - PAGE_ZIP_START - PAGE_DIR);
2770 err = inflate(d_stream, Z_SYNC_FLUSH);
2771 switch (err) {
2772 case Z_STREAM_END:
2773 page_zip_decompress_heap_no(
2774 d_stream, rec, heap_status);
2775 goto zlib_done;
2776 case Z_OK:
2777 case Z_BUF_ERROR:
2778 if (UNIV_LIKELY(!d_stream->avail_out)) {
2779 break;
2780 }
2781 /* fall through */
2782 default:
2783 page_zip_fail(("page_zip_decompress_clust:"
2784 " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2785 d_stream->msg));
2786 goto zlib_error;
2787 }
2788
2789 if (!page_zip_decompress_heap_no(
2790 d_stream, rec, heap_status)) {
2791 ut_ad(0);
2792 }
2793
2794 /* Read the offsets. The status bits are needed here. */
2795 offsets = rec_get_offsets(rec, index, offsets, true,
2796 ULINT_UNDEFINED, &heap);
2797
2798 /* This is a leaf page in a clustered index. */
2799
2800 /* Check if there are any externally stored columns.
2801 For each externally stored column, restore the
2802 BTR_EXTERN_FIELD_REF separately. */
2803
2804 if (rec_offs_any_extern(offsets)) {
2805 if (UNIV_UNLIKELY
2806 (!page_zip_decompress_clust_ext(
2807 d_stream, rec, offsets, trx_id_col))) {
2808
2809 goto zlib_error;
2810 }
2811 } else {
2812 /* Skip trx_id and roll_ptr */
2813 ulint len;
2814 byte* dst = rec_get_nth_field(rec, offsets,
2815 trx_id_col, &len);
2816 if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2817 + DATA_ROLL_PTR_LEN)) {
2818
2819 page_zip_fail(("page_zip_decompress_clust:"
2820 " len = %lu\n", (ulong) len));
2821 goto zlib_error;
2822 }
2823
2824 d_stream->avail_out = static_cast<uInt>(
2825 dst - d_stream->next_out);
2826
2827 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2828 case Z_STREAM_END:
2829 case Z_OK:
2830 case Z_BUF_ERROR:
2831 if (!d_stream->avail_out) {
2832 break;
2833 }
2834 /* fall through */
2835 default:
2836 page_zip_fail(("page_zip_decompress_clust:"
2837 " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2838 d_stream->msg));
2839 goto zlib_error;
2840 }
2841
2842 ut_ad(d_stream->next_out == dst);
2843
2844 /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2845 avoid uninitialized bytes in case the record
2846 is affected by page_zip_apply_log(). */
2847 memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2848
2849 d_stream->next_out += DATA_TRX_ID_LEN
2850 + DATA_ROLL_PTR_LEN;
2851 }
2852
2853 /* Decompress the last bytes of the record. */
2854 d_stream->avail_out = static_cast<uInt>(
2855 rec_get_end(rec, offsets) - d_stream->next_out);
2856
2857 switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2858 case Z_STREAM_END:
2859 case Z_OK:
2860 case Z_BUF_ERROR:
2861 if (!d_stream->avail_out) {
2862 break;
2863 }
2864 /* fall through */
2865 default:
2866 page_zip_fail(("page_zip_decompress_clust:"
2867 " 3 inflate(Z_SYNC_FLUSH)=%s\n",
2868 d_stream->msg));
2869 goto zlib_error;
2870 }
2871 }
2872
2873 /* Decompress any trailing garbage, in case the last record was
2874 allocated from an originally longer space on the free list. */
2875 d_stream->avail_out = static_cast<uInt>(
2876 page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
2877 - page_offset(d_stream->next_out));
2878 if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
2879 - PAGE_ZIP_START - PAGE_DIR)) {
2880
2881 page_zip_fail(("page_zip_decompress_clust:"
2882 " avail_out = %u\n",
2883 d_stream->avail_out));
2884 goto zlib_error;
2885 }
2886
2887 if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2888 page_zip_fail(("page_zip_decompress_clust:"
2889 " inflate(Z_FINISH)=%s\n",
2890 d_stream->msg));
2891zlib_error:
2892 inflateEnd(d_stream);
2893 return(FALSE);
2894 }
2895
2896 /* Note that d_stream->avail_out > 0 may hold here
2897 if the modification log is nonempty. */
2898
2899zlib_done:
2900 if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2901 ut_error;
2902 }
2903
2904 {
2905 page_t* page = page_align(d_stream->next_out);
2906
2907 /* Clear the unused heap space on the uncompressed page. */
2908 memset(d_stream->next_out, 0,
2909 ulint(page_dir_get_nth_slot(page,
2910 page_dir_get_n_slots(page)
2911 - 1U)
2912 - d_stream->next_out));
2913 }
2914
2915 ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in));
2916
2917 /* Apply the modification log. */
2918 {
2919 const byte* mod_log_ptr;
2920 mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2921 d_stream->avail_in + 1,
2922 recs, n_dense, true,
2923 trx_id_col, heap_status,
2924 index, offsets);
2925
2926 if (UNIV_UNLIKELY(!mod_log_ptr)) {
2927 return(FALSE);
2928 }
2929 page_zip->m_end = unsigned(mod_log_ptr - page_zip->data);
2930 page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2931 }
2932
2933 if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
2934 + page_zip->m_end >= page_zip_get_size(page_zip))) {
2935
2936 page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2937 (ulong) page_zip_get_trailer_len(
2938 page_zip, TRUE),
2939 (ulong) page_zip->m_end,
2940 (ulong) page_zip_get_size(page_zip)));
2941 return(FALSE);
2942 }
2943
2944 storage = page_zip_dir_start_low(page_zip, n_dense);
2945
2946 externs = storage - n_dense
2947 * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2948
2949 /* Restore the uncompressed columns in heap_no order. */
2950
2951 for (slot = 0; slot < n_dense; slot++) {
2952 ulint i;
2953 ulint len;
2954 byte* dst;
2955 rec_t* rec = recs[slot];
2956 bool exists = !page_zip_dir_find_free(
2957 page_zip, page_offset(rec));
2958 offsets = rec_get_offsets(rec, index, offsets, true,
2959 ULINT_UNDEFINED, &heap);
2960
2961 dst = rec_get_nth_field(rec, offsets,
2962 trx_id_col, &len);
2963 ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2964 storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2965 memcpy(dst, storage,
2966 DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2967
2968 /* Check if there are any externally stored
2969 columns in this record. For each externally
2970 stored column, restore or clear the
2971 BTR_EXTERN_FIELD_REF. */
2972 if (!rec_offs_any_extern(offsets)) {
2973 continue;
2974 }
2975
2976 for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2977 if (!rec_offs_nth_extern(offsets, i)) {
2978 continue;
2979 }
2980 dst = rec_get_nth_field(rec, offsets, i, &len);
2981
2982 if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2983 page_zip_fail(("page_zip_decompress_clust:"
2984 " %lu < 20\n",
2985 (ulong) len));
2986 return(FALSE);
2987 }
2988
2989 dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2990
2991 if (UNIV_LIKELY(exists)) {
2992 /* Existing record:
2993 restore the BLOB pointer */
2994 externs -= BTR_EXTERN_FIELD_REF_SIZE;
2995
2996 if (UNIV_UNLIKELY
2997 (externs < page_zip->data
2998 + page_zip->m_end)) {
2999 page_zip_fail(("page_zip_"
3000 "decompress_clust:"
3001 " %p < %p + %lu\n",
3002 (const void*) externs,
3003 (const void*)
3004 page_zip->data,
3005 (ulong)
3006 page_zip->m_end));
3007 return(FALSE);
3008 }
3009
3010 memcpy(dst, externs,
3011 BTR_EXTERN_FIELD_REF_SIZE);
3012
3013 page_zip->n_blobs++;
3014 } else {
3015 /* Deleted record:
3016 clear the BLOB pointer */
3017 memset(dst, 0,
3018 BTR_EXTERN_FIELD_REF_SIZE);
3019 }
3020 }
3021 }
3022
3023 return(TRUE);
3024}
3025
3026/**********************************************************************//**
3027Decompress a page. This function should tolerate errors on the compressed
3028page. Instead of letting assertions fail, it will return FALSE if an
3029inconsistency is detected.
3030@return TRUE on success, FALSE on failure */
3031static
3032ibool
3033page_zip_decompress_low(
3034/*====================*/
3035 page_zip_des_t* page_zip,/*!< in: data, ssize;
3036 out: m_start, m_end, m_nonempty, n_blobs */
3037 page_t* page, /*!< out: uncompressed page, may be trashed */
3038 ibool all) /*!< in: TRUE=decompress the whole page;
3039 FALSE=verify but do not copy some
3040 page header fields that should not change
3041 after page creation */
3042{
3043 z_stream d_stream;
3044 dict_index_t* index = NULL;
3045 rec_t** recs; /*!< dense page directory, sorted by address */
3046 ulint n_dense;/* number of user records on the page */
3047 ulint trx_id_col = ULINT_UNDEFINED;
3048 mem_heap_t* heap;
3049 ulint* offsets;
3050
3051 ut_ad(page_zip_simple_validate(page_zip));
3052 UNIV_MEM_ASSERT_W(page, srv_page_size);
3053 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3054
3055 /* The dense directory excludes the infimum and supremum records. */
3056 n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
3057 if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
3058 >= page_zip_get_size(page_zip))) {
3059 page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
3060 (ulong) n_dense,
3061 (ulong) page_zip_get_size(page_zip)));
3062 return(FALSE);
3063 }
3064
3065 heap = mem_heap_create(n_dense * (3 * sizeof *recs) + srv_page_size);
3066
3067 recs = static_cast<rec_t**>(
3068 mem_heap_alloc(heap, n_dense * sizeof *recs));
3069
3070 if (all) {
3071 /* Copy the page header. */
3072 memcpy(page, page_zip->data, PAGE_DATA);
3073 } else {
3074 /* Check that the bytes that we skip are identical. */
3075#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3076 ut_a(!memcmp(FIL_PAGE_TYPE + page,
3077 FIL_PAGE_TYPE + page_zip->data,
3078 PAGE_HEADER - FIL_PAGE_TYPE));
3079 ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
3080 PAGE_HEADER + PAGE_LEVEL + page_zip->data,
3081 PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
3082#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3083
3084 /* Copy the mutable parts of the page header. */
3085 memcpy(page, page_zip->data, FIL_PAGE_TYPE);
3086 memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
3087 PAGE_LEVEL - PAGE_N_DIR_SLOTS);
3088
3089#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3090 /* Check that the page headers match after copying. */
3091 ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
3092#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3093 }
3094
3095#ifdef UNIV_ZIP_DEBUG
3096 /* Clear the uncompressed page, except the header. */
3097 memset(PAGE_DATA + page, 0x55, srv_page_size - PAGE_DATA);
3098#endif /* UNIV_ZIP_DEBUG */
3099 UNIV_MEM_INVALID(PAGE_DATA + page, srv_page_size - PAGE_DATA);
3100
3101 /* Copy the page directory. */
3102 if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
3103 n_dense))) {
3104zlib_error:
3105 mem_heap_free(heap);
3106 return(FALSE);
3107 }
3108
3109 /* Copy the infimum and supremum records. */
3110 memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
3111 infimum_extra, sizeof infimum_extra);
3112 if (page_is_empty(page)) {
3113 rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
3114 PAGE_NEW_SUPREMUM);
3115 } else {
3116 rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
3117 page_zip_dir_get(page_zip, 0)
3118 & PAGE_ZIP_DIR_SLOT_MASK);
3119 }
3120 memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
3121 memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
3122 supremum_extra_data, sizeof supremum_extra_data);
3123
3124 page_zip_set_alloc(&d_stream, heap);
3125
3126 d_stream.next_in = page_zip->data + PAGE_DATA;
3127 /* Subtract the space reserved for
3128 the page header and the end marker of the modification log. */
3129 d_stream.avail_in = static_cast<uInt>(
3130 page_zip_get_size(page_zip) - (PAGE_DATA + 1));
3131 d_stream.next_out = page + PAGE_ZIP_START;
3132 d_stream.avail_out = uInt(srv_page_size - PAGE_ZIP_START);
3133
3134 if (UNIV_UNLIKELY(inflateInit2(&d_stream, srv_page_size_shift)
3135 != Z_OK)) {
3136 ut_error;
3137 }
3138
3139 /* Decode the zlib header and the index information. */
3140 if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
3141
3142 page_zip_fail(("page_zip_decompress:"
3143 " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
3144 goto zlib_error;
3145 }
3146
3147 if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
3148
3149 page_zip_fail(("page_zip_decompress:"
3150 " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
3151 goto zlib_error;
3152 }
3153
3154 index = page_zip_fields_decode(
3155 page + PAGE_ZIP_START, d_stream.next_out,
3156 page_is_leaf(page) ? &trx_id_col : NULL,
3157 fil_page_get_type(page) == FIL_PAGE_RTREE);
3158
3159 if (UNIV_UNLIKELY(!index)) {
3160
3161 goto zlib_error;
3162 }
3163
3164 /* Decompress the user records. */
3165 page_zip->n_blobs = 0;
3166 d_stream.next_out = page + PAGE_ZIP_START;
3167
3168 {
3169 /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
3170 ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
3171 + dict_index_get_n_fields(index);
3172
3173 offsets = static_cast<ulint*>(
3174 mem_heap_alloc(heap, n * sizeof(ulint)));
3175
3176 *offsets = n;
3177 }
3178
3179 /* Decompress the records in heap_no order. */
3180 if (!page_is_leaf(page)) {
3181 /* This is a node pointer page. */
3182 ulint info_bits;
3183
3184 if (UNIV_UNLIKELY
3185 (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
3186 recs, n_dense, index,
3187 offsets, heap))) {
3188 goto err_exit;
3189 }
3190
3191 info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG;
3192
3193 if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
3194 info_bits))) {
3195 goto err_exit;
3196 }
3197 } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
3198 /* This is a leaf page in a secondary index. */
3199 if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
3200 recs, n_dense,
3201 index, offsets))) {
3202 goto err_exit;
3203 }
3204
3205 if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3206 page, 0))) {
3207err_exit:
3208 page_zip_fields_free(index);
3209 mem_heap_free(heap);
3210 return(FALSE);
3211 }
3212 } else {
3213 /* This is a leaf page in a clustered index. */
3214 if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
3215 &d_stream, recs,
3216 n_dense, index,
3217 trx_id_col,
3218 offsets, heap))) {
3219 goto err_exit;
3220 }
3221
3222 if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3223 page, 0))) {
3224 goto err_exit;
3225 }
3226 }
3227
3228 ut_a(page_is_comp(page));
3229 UNIV_MEM_ASSERT_RW(page, srv_page_size);
3230
3231 page_zip_fields_free(index);
3232 mem_heap_free(heap);
3233
3234 return(TRUE);
3235}
3236
3237/**********************************************************************//**
3238Decompress a page. This function should tolerate errors on the compressed
3239page. Instead of letting assertions fail, it will return FALSE if an
3240inconsistency is detected.
3241@return TRUE on success, FALSE on failure */
3242ibool
3243page_zip_decompress(
3244/*================*/
3245 page_zip_des_t* page_zip,/*!< in: data, ssize;
3246 out: m_start, m_end, m_nonempty, n_blobs */
3247 page_t* page, /*!< out: uncompressed page, may be trashed */
3248 ibool all) /*!< in: TRUE=decompress the whole page;
3249 FALSE=verify but do not copy some
3250 page header fields that should not change
3251 after page creation */
3252{
3253 uintmax_t usec = ut_time_us(NULL);
3254
3255 if (!page_zip_decompress_low(page_zip, page, all)) {
3256 return(FALSE);
3257 }
3258
3259 uintmax_t time_diff = ut_time_us(NULL) - usec;
3260 page_zip_stat[page_zip->ssize - 1].decompressed++;
3261 page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
3262
3263 index_id_t index_id = btr_page_get_index_id(page);
3264
3265 if (srv_cmp_per_index_enabled) {
3266 mutex_enter(&page_zip_stat_per_index_mutex);
3267 page_zip_stat_per_index[index_id].decompressed++;
3268 page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
3269 mutex_exit(&page_zip_stat_per_index_mutex);
3270 }
3271
3272 /* Update the stat counter for LRU policy. */
3273 buf_LRU_stat_inc_unzip();
3274
3275 MONITOR_INC(MONITOR_PAGE_DECOMPRESS);
3276
3277 return(TRUE);
3278}
3279
3280#ifdef UNIV_ZIP_DEBUG
3281/**********************************************************************//**
3282Dump a block of memory on the standard error stream. */
3283static
3284void
3285page_zip_hexdump_func(
3286/*==================*/
3287 const char* name, /*!< in: name of the data structure */
3288 const void* buf, /*!< in: data */
3289 ulint size) /*!< in: length of the data, in bytes */
3290{
3291 const byte* s = static_cast<const byte*>(buf);
3292 ulint addr;
3293 const ulint width = 32; /* bytes per line */
3294
3295 fprintf(stderr, "%s:\n", name);
3296
3297 for (addr = 0; addr < size; addr += width) {
3298 ulint i;
3299
3300 fprintf(stderr, "%04lx ", (ulong) addr);
3301
3302 i = ut_min(width, size - addr);
3303
3304 while (i--) {
3305 fprintf(stderr, "%02x", *s++);
3306 }
3307
3308 putc('\n', stderr);
3309 }
3310}
3311
3312/** Dump a block of memory on the standard error stream.
3313@param buf in: data
3314@param size in: length of the data, in bytes */
3315#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3316
3317/** Flag: make page_zip_validate() compare page headers only */
3318bool page_zip_validate_header_only;
3319
3320/**********************************************************************//**
3321Check that the compressed and decompressed pages match.
3322@return TRUE if valid, FALSE if not */
3323ibool
3324page_zip_validate_low(
3325/*==================*/
3326 const page_zip_des_t* page_zip,/*!< in: compressed page */
3327 const page_t* page, /*!< in: uncompressed page */
3328 const dict_index_t* index, /*!< in: index of the page, if known */
3329 ibool sloppy) /*!< in: FALSE=strict,
3330 TRUE=ignore the MIN_REC_FLAG */
3331{
3332 page_zip_des_t temp_page_zip;
3333 byte* temp_page_buf;
3334 page_t* temp_page;
3335 ibool valid;
3336
3337 if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3338 FIL_PAGE_LSN - FIL_PAGE_PREV)
3339 || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3340 || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3341 PAGE_DATA - FIL_PAGE_DATA)) {
3342 page_zip_fail(("page_zip_validate: page header\n"));
3343 page_zip_hexdump(page_zip, sizeof *page_zip);
3344 page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3345 page_zip_hexdump(page, srv_page_size);
3346 return(FALSE);
3347 }
3348
3349 ut_a(page_is_comp(page));
3350
3351 if (page_zip_validate_header_only) {
3352 return(TRUE);
3353 }
3354
3355 /* page_zip_decompress() expects the uncompressed page to be
3356 srv_page_size aligned. */
3357 temp_page_buf = static_cast<byte*>(
3358 ut_malloc_nokey(2 << srv_page_size_shift));
3359 temp_page = static_cast<byte*>(ut_align(temp_page_buf, srv_page_size));
3360
3361 UNIV_MEM_ASSERT_RW(page, srv_page_size);
3362 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3363
3364 temp_page_zip = *page_zip;
3365 valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE);
3366 if (!valid) {
3367 fputs("page_zip_validate(): failed to decompress\n", stderr);
3368 goto func_exit;
3369 }
3370 if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3371 page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3372 page_zip->n_blobs, temp_page_zip.n_blobs));
3373 valid = FALSE;
3374 }
3375#ifdef UNIV_DEBUG
3376 if (page_zip->m_start != temp_page_zip.m_start) {
3377 page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3378 page_zip->m_start, temp_page_zip.m_start));
3379 valid = FALSE;
3380 }
3381#endif /* UNIV_DEBUG */
3382 if (page_zip->m_end != temp_page_zip.m_end) {
3383 page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3384 page_zip->m_end, temp_page_zip.m_end));
3385 valid = FALSE;
3386 }
3387 if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3388 page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3389 page_zip->m_nonempty,
3390 temp_page_zip.m_nonempty));
3391 valid = FALSE;
3392 }
3393 if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3394 srv_page_size - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3395
3396 /* In crash recovery, the "minimum record" flag may be
3397 set incorrectly until the mini-transaction is
3398 committed. Let us tolerate that difference when we
3399 are performing a sloppy validation. */
3400
3401 ulint* offsets;
3402 mem_heap_t* heap;
3403 const rec_t* rec;
3404 const rec_t* trec;
3405 byte info_bits_diff;
3406 ulint offset
3407 = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
3408 ut_a(offset >= PAGE_NEW_SUPREMUM);
3409 offset -= 5/*REC_NEW_INFO_BITS*/;
3410
3411 info_bits_diff = page[offset] ^ temp_page[offset];
3412
3413 if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3414 temp_page[offset] = page[offset];
3415
3416 if (!memcmp(page + PAGE_HEADER,
3417 temp_page + PAGE_HEADER,
3418 srv_page_size - PAGE_HEADER
3419 - FIL_PAGE_DATA_END)) {
3420
3421 /* Only the minimum record flag
3422 differed. Let us ignore it. */
3423 page_zip_fail(("page_zip_validate:"
3424 " min_rec_flag"
3425 " (%s%lu,%lu,0x%02lx)\n",
3426 sloppy ? "ignored, " : "",
3427 page_get_space_id(page),
3428 page_get_page_no(page),
3429 (ulong) page[offset]));
3430 /* We don't check for spatial index, since
3431 the "minimum record" could be deleted when
3432 doing rtr_update_mbr_field.
3433 GIS_FIXME: need to validate why
3434 rtr_update_mbr_field.() could affect this */
3435 if (index && dict_index_is_spatial(index)) {
3436 valid = true;
3437 } else {
3438 valid = sloppy;
3439 }
3440 goto func_exit;
3441 }
3442 }
3443
3444 /* Compare the pointers in the PAGE_FREE list. */
3445 rec = page_header_get_ptr(page, PAGE_FREE);
3446 trec = page_header_get_ptr(temp_page, PAGE_FREE);
3447
3448 while (rec || trec) {
3449 if (page_offset(rec) != page_offset(trec)) {
3450 page_zip_fail(("page_zip_validate:"
3451 " PAGE_FREE list: %u!=%u\n",
3452 (unsigned) page_offset(rec),
3453 (unsigned) page_offset(trec)));
3454 valid = FALSE;
3455 goto func_exit;
3456 }
3457
3458 rec = page_rec_get_next_low(rec, TRUE);
3459 trec = page_rec_get_next_low(trec, TRUE);
3460 }
3461
3462 /* Compare the records. */
3463 heap = NULL;
3464 offsets = NULL;
3465 rec = page_rec_get_next_low(
3466 page + PAGE_NEW_INFIMUM, TRUE);
3467 trec = page_rec_get_next_low(
3468 temp_page + PAGE_NEW_INFIMUM, TRUE);
3469 const bool is_leaf = page_is_leaf(page);
3470
3471 do {
3472 if (page_offset(rec) != page_offset(trec)) {
3473 page_zip_fail(("page_zip_validate:"
3474 " record list: 0x%02x!=0x%02x\n",
3475 (unsigned) page_offset(rec),
3476 (unsigned) page_offset(trec)));
3477 valid = FALSE;
3478 break;
3479 }
3480
3481 if (index) {
3482 /* Compare the data. */
3483 offsets = rec_get_offsets(
3484 rec, index, offsets, is_leaf,
3485 ULINT_UNDEFINED, &heap);
3486
3487 if (memcmp(rec - rec_offs_extra_size(offsets),
3488 trec - rec_offs_extra_size(offsets),
3489 rec_offs_size(offsets))) {
3490 page_zip_fail(
3491 ("page_zip_validate:"
3492 " record content: 0x%02x",
3493 (unsigned) page_offset(rec)));
3494 valid = FALSE;
3495 break;
3496 }
3497 }
3498
3499 rec = page_rec_get_next_low(rec, TRUE);
3500 trec = page_rec_get_next_low(trec, TRUE);
3501 } while (rec || trec);
3502
3503 if (heap) {
3504 mem_heap_free(heap);
3505 }
3506 }
3507
3508func_exit:
3509 if (!valid) {
3510 page_zip_hexdump(page_zip, sizeof *page_zip);
3511 page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3512 page_zip_hexdump(page, srv_page_size);
3513 page_zip_hexdump(temp_page, srv_page_size);
3514 }
3515 ut_free(temp_page_buf);
3516 return(valid);
3517}
3518
3519/**********************************************************************//**
3520Check that the compressed and decompressed pages match.
3521@return TRUE if valid, FALSE if not */
3522ibool
3523page_zip_validate(
3524/*==============*/
3525 const page_zip_des_t* page_zip,/*!< in: compressed page */
3526 const page_t* page, /*!< in: uncompressed page */
3527 const dict_index_t* index) /*!< in: index of the page, if known */
3528{
3529 return(page_zip_validate_low(page_zip, page, index,
3530 recv_recovery_is_on()));
3531}
3532#endif /* UNIV_ZIP_DEBUG */
3533
3534#ifdef UNIV_DEBUG
3535/**********************************************************************//**
3536Assert that the compressed and decompressed page headers match.
3537@return TRUE */
3538static
3539ibool
3540page_zip_header_cmp(
3541/*================*/
3542 const page_zip_des_t* page_zip,/*!< in: compressed page */
3543 const byte* page) /*!< in: uncompressed page */
3544{
3545 ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3546 FIL_PAGE_LSN - FIL_PAGE_PREV));
3547 ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3548 2));
3549 ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3550 PAGE_DATA - FIL_PAGE_DATA));
3551
3552 return(TRUE);
3553}
3554#endif /* UNIV_DEBUG */
3555
3556/**********************************************************************//**
3557Write a record on the compressed page that contains externally stored
3558columns. The data must already have been written to the uncompressed page.
3559@return end of modification log */
3560static
3561byte*
3562page_zip_write_rec_ext(
3563/*===================*/
3564 page_zip_des_t* page_zip, /*!< in/out: compressed page */
3565 const page_t* page, /*!< in: page containing rec */
3566 const byte* rec, /*!< in: record being written */
3567 dict_index_t* index, /*!< in: record descriptor */
3568 const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
3569 ulint create, /*!< in: nonzero=insert, zero=update */
3570 ulint trx_id_col, /*!< in: position of DB_TRX_ID */
3571 ulint heap_no, /*!< in: heap number of rec */
3572 byte* storage, /*!< in: end of dense page directory */
3573 byte* data) /*!< in: end of modification log */
3574{
3575 const byte* start = rec;
3576 ulint i;
3577 ulint len;
3578 byte* externs = storage;
3579 ulint n_ext = rec_offs_n_extern(offsets);
3580
3581 ut_ad(rec_offs_validate(rec, index, offsets));
3582 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3583 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3584 rec_offs_extra_size(offsets));
3585
3586 externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3587 * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3588
3589 /* Note that this will not take into account
3590 the BLOB columns of rec if create==TRUE. */
3591 ut_ad(data + rec_offs_data_size(offsets)
3592 - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3593 - n_ext * BTR_EXTERN_FIELD_REF_SIZE
3594 < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3595
3596 {
3597 ulint blob_no = page_zip_get_n_prev_extern(
3598 page_zip, rec, index);
3599 byte* ext_end = externs - page_zip->n_blobs
3600 * BTR_EXTERN_FIELD_REF_SIZE;
3601 ut_ad(blob_no <= page_zip->n_blobs);
3602 externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3603
3604 if (create) {
3605 page_zip->n_blobs += static_cast<unsigned>(n_ext);
3606 ASSERT_ZERO_BLOB(ext_end - n_ext
3607 * BTR_EXTERN_FIELD_REF_SIZE);
3608 memmove(ext_end - n_ext
3609 * BTR_EXTERN_FIELD_REF_SIZE,
3610 ext_end,
3611 ulint(externs - ext_end));
3612 }
3613
3614 ut_a(blob_no + n_ext <= page_zip->n_blobs);
3615 }
3616
3617 for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3618 const byte* src;
3619
3620 if (UNIV_UNLIKELY(i == trx_id_col)) {
3621 ut_ad(!rec_offs_nth_extern(offsets,
3622 i));
3623 ut_ad(!rec_offs_nth_extern(offsets,
3624 i + 1));
3625 /* Locate trx_id and roll_ptr. */
3626 src = rec_get_nth_field(rec, offsets,
3627 i, &len);
3628 ut_ad(len == DATA_TRX_ID_LEN);
3629 ut_ad(src + DATA_TRX_ID_LEN
3630 == rec_get_nth_field(
3631 rec, offsets,
3632 i + 1, &len));
3633 ut_ad(len == DATA_ROLL_PTR_LEN);
3634
3635 /* Log the preceding fields. */
3636 ASSERT_ZERO(data, src - start);
3637 memcpy(data, start, ulint(src - start));
3638 data += src - start;
3639 start = src + (DATA_TRX_ID_LEN
3640 + DATA_ROLL_PTR_LEN);
3641
3642 /* Store trx_id and roll_ptr. */
3643 memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3644 * (heap_no - 1),
3645 src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3646 i++; /* skip also roll_ptr */
3647 } else if (rec_offs_nth_extern(offsets, i)) {
3648 src = rec_get_nth_field(rec, offsets,
3649 i, &len);
3650
3651 ut_ad(dict_index_is_clust(index));
3652 ut_ad(len
3653 >= BTR_EXTERN_FIELD_REF_SIZE);
3654 src += len - BTR_EXTERN_FIELD_REF_SIZE;
3655
3656 ASSERT_ZERO(data, src - start);
3657 memcpy(data, start, ulint(src - start));
3658 data += src - start;
3659 start = src + BTR_EXTERN_FIELD_REF_SIZE;
3660
3661 /* Store the BLOB pointer. */
3662 externs -= BTR_EXTERN_FIELD_REF_SIZE;
3663 ut_ad(data < externs);
3664 memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3665 }
3666 }
3667
3668 /* Log the last bytes of the record. */
3669 len = rec_offs_data_size(offsets) - ulint(start - rec);
3670
3671 ASSERT_ZERO(data, len);
3672 memcpy(data, start, len);
3673 data += len;
3674
3675 return(data);
3676}
3677
3678/**********************************************************************//**
3679Write an entire record on the compressed page. The data must already
3680have been written to the uncompressed page. */
3681void
3682page_zip_write_rec(
3683/*===============*/
3684 page_zip_des_t* page_zip,/*!< in/out: compressed page */
3685 const byte* rec, /*!< in: record being written */
3686 dict_index_t* index, /*!< in: the index the record belongs to */
3687 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3688 ulint create) /*!< in: nonzero=insert, zero=update */
3689{
3690 const page_t* page;
3691 byte* data;
3692 byte* storage;
3693 ulint heap_no;
3694 byte* slot;
3695
3696 ut_ad(page_zip_simple_validate(page_zip));
3697 ut_ad(page_zip_get_size(page_zip)
3698 > PAGE_DATA + page_zip_dir_size(page_zip));
3699 ut_ad(rec_offs_comp(offsets));
3700 ut_ad(rec_offs_validate(rec, index, offsets));
3701
3702 ut_ad(page_zip->m_start >= PAGE_DATA);
3703
3704 page = page_align(rec);
3705
3706 ut_ad(page_zip_header_cmp(page_zip, page));
3707 ut_ad(page_simple_validate_new((page_t*) page));
3708
3709 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3710 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3711 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3712 rec_offs_extra_size(offsets));
3713
3714 slot = page_zip_dir_find(page_zip, page_offset(rec));
3715 ut_a(slot);
3716 /* Copy the delete mark. */
3717 if (rec_get_deleted_flag(rec, TRUE)) {
3718 /* In delete-marked records, DB_TRX_ID must
3719 always refer to an existing undo log record.
3720 On non-leaf pages, the delete-mark flag is garbage. */
3721 ut_ad(!index->is_primary() || !page_is_leaf(page)
3722 || row_get_rec_trx_id(rec, index, offsets));
3723 *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3724 } else {
3725 *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3726 }
3727
3728 ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3729 ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + srv_page_size
3730 - PAGE_DIR - PAGE_DIR_SLOT_SIZE
3731 * page_dir_get_n_slots(page));
3732
3733 heap_no = rec_get_heap_no_new(rec);
3734 ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3735 ut_ad(heap_no < page_dir_get_n_heap(page));
3736
3737 /* Append to the modification log. */
3738 data = page_zip->data + page_zip->m_end;
3739 ut_ad(!*data);
3740
3741 /* Identify the record by writing its heap number - 1.
3742 0 is reserved to indicate the end of the modification log. */
3743
3744 if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3745 *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3746 ut_ad(!*data);
3747 }
3748 *data++ = (byte) ((heap_no - 1) << 1);
3749 ut_ad(!*data);
3750
3751 {
3752 const byte* start = rec - rec_offs_extra_size(offsets);
3753 const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3754
3755 /* Write the extra bytes backwards, so that
3756 rec_offs_extra_size() can be easily computed in
3757 page_zip_apply_log() by invoking
3758 rec_get_offsets_reverse(). */
3759
3760 while (b != start) {
3761 *data++ = *--b;
3762 ut_ad(!*data);
3763 }
3764 }
3765
3766 /* Write the data bytes. Store the uncompressed bytes separately. */
3767 storage = page_zip_dir_start(page_zip);
3768
3769 if (page_is_leaf(page)) {
3770 ulint len;
3771
3772 if (dict_index_is_clust(index)) {
3773 ulint trx_id_col;
3774
3775 trx_id_col = dict_index_get_sys_col_pos(index,
3776 DATA_TRX_ID);
3777 ut_ad(trx_id_col != ULINT_UNDEFINED);
3778
3779 /* Store separately trx_id, roll_ptr and
3780 the BTR_EXTERN_FIELD_REF of each BLOB column. */
3781 if (rec_offs_any_extern(offsets)) {
3782 data = page_zip_write_rec_ext(
3783 page_zip, page,
3784 rec, index, offsets, create,
3785 trx_id_col, heap_no, storage, data);
3786 } else {
3787 /* Locate trx_id and roll_ptr. */
3788 const byte* src
3789 = rec_get_nth_field(rec, offsets,
3790 trx_id_col, &len);
3791 ut_ad(len == DATA_TRX_ID_LEN);
3792 ut_ad(src + DATA_TRX_ID_LEN
3793 == rec_get_nth_field(
3794 rec, offsets,
3795 trx_id_col + 1, &len));
3796 ut_ad(len == DATA_ROLL_PTR_LEN);
3797
3798 /* Log the preceding fields. */
3799 ASSERT_ZERO(data, src - rec);
3800 memcpy(data, rec, ulint(src - rec));
3801 data += src - rec;
3802
3803 /* Store trx_id and roll_ptr. */
3804 memcpy(storage
3805 - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3806 * (heap_no - 1),
3807 src,
3808 DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3809
3810 src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3811
3812 /* Log the last bytes of the record. */
3813 len = rec_offs_data_size(offsets)
3814 - ulint(src - rec);
3815
3816 ASSERT_ZERO(data, len);
3817 memcpy(data, src, len);
3818 data += len;
3819 }
3820 } else {
3821 /* Leaf page of a secondary index:
3822 no externally stored columns */
3823 ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3824 == ULINT_UNDEFINED);
3825 ut_ad(!rec_offs_any_extern(offsets));
3826
3827 /* Log the entire record. */
3828 len = rec_offs_data_size(offsets);
3829
3830 ASSERT_ZERO(data, len);
3831 memcpy(data, rec, len);
3832 data += len;
3833 }
3834 } else {
3835 /* This is a node pointer page. */
3836 ulint len;
3837
3838 /* Non-leaf nodes should not have any externally
3839 stored columns. */
3840 ut_ad(!rec_offs_any_extern(offsets));
3841
3842 /* Copy the data bytes, except node_ptr. */
3843 len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3844 ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3845 * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3846 ASSERT_ZERO(data, len);
3847 memcpy(data, rec, len);
3848 data += len;
3849
3850 /* Copy the node pointer to the uncompressed area. */
3851 memcpy(storage - REC_NODE_PTR_SIZE
3852 * (heap_no - 1),
3853 rec + len,
3854 REC_NODE_PTR_SIZE);
3855 }
3856
3857 ut_a(!*data);
3858 ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3859 page_zip->m_end = unsigned(data - page_zip->data);
3860 page_zip->m_nonempty = TRUE;
3861
3862#ifdef UNIV_ZIP_DEBUG
3863 ut_a(page_zip_validate(page_zip, page_align(rec), index));
3864#endif /* UNIV_ZIP_DEBUG */
3865}
3866
3867/***********************************************************//**
3868Parses a log record of writing a BLOB pointer of a record.
3869@return end of log record or NULL */
3870byte*
3871page_zip_parse_write_blob_ptr(
3872/*==========================*/
3873 byte* ptr, /*!< in: redo log buffer */
3874 byte* end_ptr,/*!< in: redo log buffer end */
3875 page_t* page, /*!< in/out: uncompressed page */
3876 page_zip_des_t* page_zip)/*!< in/out: compressed page */
3877{
3878 ulint offset;
3879 ulint z_offset;
3880
3881 ut_ad(ptr != NULL);
3882 ut_ad(end_ptr != NULL);
3883 ut_ad(!page == !page_zip);
3884
3885 if (UNIV_UNLIKELY
3886 (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3887
3888 return(NULL);
3889 }
3890
3891 offset = mach_read_from_2(ptr);
3892 z_offset = mach_read_from_2(ptr + 2);
3893
3894 if (offset < PAGE_ZIP_START
3895 || offset >= srv_page_size
3896 || z_offset >= srv_page_size) {
3897corrupt:
3898 recv_sys->found_corrupt_log = TRUE;
3899
3900 return(NULL);
3901 }
3902
3903 if (page) {
3904
3905 if (!page_zip || !page_is_leaf(page)) {
3906
3907 goto corrupt;
3908 }
3909
3910#ifdef UNIV_ZIP_DEBUG
3911 ut_a(page_zip_validate(page_zip, page, NULL));
3912#endif /* UNIV_ZIP_DEBUG */
3913
3914 memcpy(page + offset,
3915 ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3916 memcpy(page_zip->data + z_offset,
3917 ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3918
3919#ifdef UNIV_ZIP_DEBUG
3920 ut_a(page_zip_validate(page_zip, page, NULL));
3921#endif /* UNIV_ZIP_DEBUG */
3922 }
3923
3924 return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3925}
3926
3927/**********************************************************************//**
3928Write a BLOB pointer of a record on the leaf page of a clustered index.
3929The information must already have been updated on the uncompressed page. */
3930void
3931page_zip_write_blob_ptr(
3932/*====================*/
3933 page_zip_des_t* page_zip,/*!< in/out: compressed page */
3934 const byte* rec, /*!< in/out: record whose data is being
3935 written */
3936 dict_index_t* index, /*!< in: index of the page */
3937 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3938 ulint n, /*!< in: column index */
3939 mtr_t* mtr) /*!< in: mini-transaction handle,
3940 or NULL if no logging is needed */
3941{
3942 const byte* field;
3943 byte* externs;
3944 const page_t* page = page_align(rec);
3945 ulint blob_no;
3946 ulint len;
3947
3948 ut_ad(page_zip != NULL);
3949 ut_ad(rec != NULL);
3950 ut_ad(index != NULL);
3951 ut_ad(offsets != NULL);
3952 ut_ad(page_simple_validate_new((page_t*) page));
3953 ut_ad(page_zip_simple_validate(page_zip));
3954 ut_ad(page_zip_get_size(page_zip)
3955 > PAGE_DATA + page_zip_dir_size(page_zip));
3956 ut_ad(rec_offs_comp(offsets));
3957 ut_ad(rec_offs_validate(rec, NULL, offsets));
3958 ut_ad(rec_offs_any_extern(offsets));
3959 ut_ad(rec_offs_nth_extern(offsets, n));
3960
3961 ut_ad(page_zip->m_start >= PAGE_DATA);
3962 ut_ad(page_zip_header_cmp(page_zip, page));
3963
3964 ut_ad(page_is_leaf(page));
3965 ut_ad(dict_index_is_clust(index));
3966
3967 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3968 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3969 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3970 rec_offs_extra_size(offsets));
3971
3972 blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3973 + rec_get_n_extern_new(rec, index, n);
3974 ut_a(blob_no < page_zip->n_blobs);
3975
3976 externs = page_zip->data + page_zip_get_size(page_zip)
3977 - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3978 * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
3979
3980 field = rec_get_nth_field(rec, offsets, n, &len);
3981
3982 externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3983 field += len - BTR_EXTERN_FIELD_REF_SIZE;
3984
3985 memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3986
3987#ifdef UNIV_ZIP_DEBUG
3988 ut_a(page_zip_validate(page_zip, page, index));
3989#endif /* UNIV_ZIP_DEBUG */
3990
3991 if (mtr) {
3992 byte* log_ptr = mlog_open(
3993 mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3994 if (UNIV_UNLIKELY(!log_ptr)) {
3995 return;
3996 }
3997
3998 log_ptr = mlog_write_initial_log_record_fast(
3999 (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
4000 mach_write_to_2(log_ptr, page_offset(field));
4001 log_ptr += 2;
4002 mach_write_to_2(log_ptr, ulint(externs - page_zip->data));
4003 log_ptr += 2;
4004 memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
4005 log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
4006 mlog_close(mtr, log_ptr);
4007 }
4008}
4009
4010/***********************************************************//**
4011Parses a log record of writing the node pointer of a record.
4012@return end of log record or NULL */
4013byte*
4014page_zip_parse_write_node_ptr(
4015/*==========================*/
4016 byte* ptr, /*!< in: redo log buffer */
4017 byte* end_ptr,/*!< in: redo log buffer end */
4018 page_t* page, /*!< in/out: uncompressed page */
4019 page_zip_des_t* page_zip)/*!< in/out: compressed page */
4020{
4021 ulint offset;
4022 ulint z_offset;
4023
4024 ut_ad(ptr != NULL);
4025 ut_ad(end_ptr!= NULL);
4026 ut_ad(!page == !page_zip);
4027
4028 if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
4029
4030 return(NULL);
4031 }
4032
4033 offset = mach_read_from_2(ptr);
4034 z_offset = mach_read_from_2(ptr + 2);
4035
4036 if (offset < PAGE_ZIP_START
4037 || offset >= srv_page_size
4038 || z_offset >= srv_page_size) {
4039corrupt:
4040 recv_sys->found_corrupt_log = TRUE;
4041
4042 return(NULL);
4043 }
4044
4045 if (page) {
4046 byte* storage_end;
4047 byte* field;
4048 byte* storage;
4049 ulint heap_no;
4050
4051 if (!page_zip || page_is_leaf(page)) {
4052
4053 goto corrupt;
4054 }
4055
4056#ifdef UNIV_ZIP_DEBUG
4057 ut_a(page_zip_validate(page_zip, page, NULL));
4058#endif /* UNIV_ZIP_DEBUG */
4059
4060 field = page + offset;
4061 storage = page_zip->data + z_offset;
4062
4063 storage_end = page_zip_dir_start(page_zip);
4064
4065 heap_no = 1 + ulint(storage_end - storage) / REC_NODE_PTR_SIZE;
4066
4067 if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
4068 || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
4069 || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
4070
4071 goto corrupt;
4072 }
4073
4074 memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
4075 memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
4076
4077#ifdef UNIV_ZIP_DEBUG
4078 ut_a(page_zip_validate(page_zip, page, NULL));
4079#endif /* UNIV_ZIP_DEBUG */
4080 }
4081
4082 return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
4083}
4084
4085/**********************************************************************//**
4086Write the node pointer of a record on a non-leaf compressed page. */
4087void
4088page_zip_write_node_ptr(
4089/*====================*/
4090 page_zip_des_t* page_zip,/*!< in/out: compressed page */
4091 byte* rec, /*!< in/out: record */
4092 ulint size, /*!< in: data size of rec */
4093 ulint ptr, /*!< in: node pointer */
4094 mtr_t* mtr) /*!< in: mini-transaction, or NULL */
4095{
4096 byte* field;
4097 byte* storage;
4098#ifdef UNIV_DEBUG
4099 page_t* page = page_align(rec);
4100#endif /* UNIV_DEBUG */
4101
4102 ut_ad(page_simple_validate_new(page));
4103 ut_ad(page_zip_simple_validate(page_zip));
4104 ut_ad(page_zip_get_size(page_zip)
4105 > PAGE_DATA + page_zip_dir_size(page_zip));
4106 ut_ad(page_rec_is_comp(rec));
4107
4108 ut_ad(page_zip->m_start >= PAGE_DATA);
4109 ut_ad(page_zip_header_cmp(page_zip, page));
4110
4111 ut_ad(!page_is_leaf(page));
4112
4113 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4114 UNIV_MEM_ASSERT_RW(rec, size);
4115
4116 storage = page_zip_dir_start(page_zip)
4117 - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
4118 field = rec + size - REC_NODE_PTR_SIZE;
4119
4120#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
4121 ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
4122#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
4123 compile_time_assert(REC_NODE_PTR_SIZE == 4);
4124 mach_write_to_4(field, ptr);
4125 memcpy(storage, field, REC_NODE_PTR_SIZE);
4126
4127 if (mtr) {
4128 byte* log_ptr = mlog_open(mtr,
4129 11 + 2 + 2 + REC_NODE_PTR_SIZE);
4130 if (UNIV_UNLIKELY(!log_ptr)) {
4131 return;
4132 }
4133
4134 log_ptr = mlog_write_initial_log_record_fast(
4135 field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
4136 mach_write_to_2(log_ptr, page_offset(field));
4137 log_ptr += 2;
4138 mach_write_to_2(log_ptr, ulint(storage - page_zip->data));
4139 log_ptr += 2;
4140 memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
4141 log_ptr += REC_NODE_PTR_SIZE;
4142 mlog_close(mtr, log_ptr);
4143 }
4144}
4145
4146/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
4147@param[in,out] page_zip compressed page
4148@param[in,out] rec record
4149@param[in] offsets rec_get_offsets(rec, index)
4150@param[in] trx_id_field field number of DB_TRX_ID (number of PK fields)
4151@param[in] trx_id DB_TRX_ID value (transaction identifier)
4152@param[in] roll_ptr DB_ROLL_PTR value (undo log pointer)
4153@param[in,out] mtr mini-transaction, or NULL to skip logging */
4154void
4155page_zip_write_trx_id_and_roll_ptr(
4156 page_zip_des_t* page_zip,
4157 byte* rec,
4158 const ulint* offsets,
4159 ulint trx_id_col,
4160 trx_id_t trx_id,
4161 roll_ptr_t roll_ptr,
4162 mtr_t* mtr)
4163{
4164 byte* field;
4165 byte* storage;
4166#ifdef UNIV_DEBUG
4167 page_t* page = page_align(rec);
4168#endif /* UNIV_DEBUG */
4169 ulint len;
4170
4171 ut_ad(page_simple_validate_new(page));
4172 ut_ad(page_zip_simple_validate(page_zip));
4173 ut_ad(page_zip_get_size(page_zip)
4174 > PAGE_DATA + page_zip_dir_size(page_zip));
4175 ut_ad(rec_offs_validate(rec, NULL, offsets));
4176 ut_ad(rec_offs_comp(offsets));
4177
4178 ut_ad(page_zip->m_start >= PAGE_DATA);
4179 ut_ad(page_zip_header_cmp(page_zip, page));
4180
4181 ut_ad(page_is_leaf(page));
4182
4183 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4184
4185 storage = page_zip_dir_start(page_zip)
4186 - (rec_get_heap_no_new(rec) - 1)
4187 * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4188
4189 compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
4190 field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
4191 ut_ad(len == DATA_TRX_ID_LEN);
4192 ut_ad(field + DATA_TRX_ID_LEN
4193 == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
4194 ut_ad(len == DATA_ROLL_PTR_LEN);
4195#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
4196 ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
4197#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
4198 compile_time_assert(DATA_TRX_ID_LEN == 6);
4199 mach_write_to_6(field, trx_id);
4200 compile_time_assert(DATA_ROLL_PTR_LEN == 7);
4201 mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
4202 memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4203
4204 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4205 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4206 rec_offs_extra_size(offsets));
4207 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4208
4209 if (mtr) {
4210 byte* log_ptr = mlog_open(
4211 mtr, 11 + 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4212 if (UNIV_UNLIKELY(!log_ptr)) {
4213 return;
4214 }
4215
4216 log_ptr = mlog_write_initial_log_record_fast(
4217 (byte*) field, MLOG_ZIP_WRITE_TRX_ID, log_ptr, mtr);
4218 mach_write_to_2(log_ptr, page_offset(field));
4219 log_ptr += 2;
4220 mach_write_to_2(log_ptr, ulint(storage - page_zip->data));
4221 log_ptr += 2;
4222 memcpy(log_ptr, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4223 log_ptr += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
4224 mlog_close(mtr, log_ptr);
4225 }
4226}
4227
4228/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
4229@param[in] ptr redo log buffer
4230@param[in] end_ptr end of redo log buffer
4231@param[in,out] page uncompressed page
4232@param[in,out] page_zip compressed page
4233@return end of log record
4234@retval NULL if the log record is incomplete */
4235byte*
4236page_zip_parse_write_trx_id(
4237 byte* ptr,
4238 byte* end_ptr,
4239 page_t* page,
4240 page_zip_des_t* page_zip)
4241{
4242 byte* const end = 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + ptr;
4243
4244 if (UNIV_UNLIKELY(end_ptr < end)) {
4245 return(NULL);
4246 }
4247
4248 uint offset = mach_read_from_2(ptr);
4249 uint z_offset = mach_read_from_2(ptr + 2);
4250
4251 if (offset < PAGE_ZIP_START
4252 || offset >= srv_page_size
4253 || z_offset >= srv_page_size) {
4254corrupt:
4255 recv_sys->found_corrupt_log = TRUE;
4256
4257 return(NULL);
4258 }
4259
4260 if (page) {
4261 if (!page_zip || !page_is_leaf(page)) {
4262 goto corrupt;
4263 }
4264
4265#ifdef UNIV_ZIP_DEBUG
4266 ut_a(page_zip_validate(page_zip, page, NULL));
4267#endif /* UNIV_ZIP_DEBUG */
4268
4269 byte* field = page + offset;
4270 byte* storage = page_zip->data + z_offset;
4271
4272 if (storage >= page_zip_dir_start(page_zip)) {
4273 goto corrupt;
4274 }
4275
4276 memcpy(field, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4277 memcpy(storage, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4278
4279#ifdef UNIV_ZIP_DEBUG
4280 ut_a(page_zip_validate(page_zip, page, NULL));
4281#endif /* UNIV_ZIP_DEBUG */
4282 }
4283
4284 return end;
4285}
4286
4287/**********************************************************************//**
4288Clear an area on the uncompressed and compressed page.
4289Do not clear the data payload, as that would grow the modification log. */
4290static
4291void
4292page_zip_clear_rec(
4293/*===============*/
4294 page_zip_des_t* page_zip, /*!< in/out: compressed page */
4295 byte* rec, /*!< in: record to clear */
4296 const dict_index_t* index, /*!< in: index of rec */
4297 const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
4298{
4299 ulint heap_no;
4300 page_t* page = page_align(rec);
4301 byte* storage;
4302 byte* field;
4303 ulint len;
4304 /* page_zip_validate() would fail here if a record
4305 containing externally stored columns is being deleted. */
4306 ut_ad(rec_offs_validate(rec, index, offsets));
4307 ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
4308 ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
4309 ut_ad(page_zip_header_cmp(page_zip, page));
4310
4311 heap_no = rec_get_heap_no_new(rec);
4312 ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
4313
4314 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4315 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4316 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4317 rec_offs_extra_size(offsets));
4318
4319 if (!page_is_leaf(page)) {
4320 /* Clear node_ptr. On the compressed page,
4321 there is an array of node_ptr immediately before the
4322 dense page directory, at the very end of the page. */
4323 storage = page_zip_dir_start(page_zip);
4324 ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) ==
4325 rec_offs_n_fields(offsets) - 1);
4326 field = rec_get_nth_field(rec, offsets,
4327 rec_offs_n_fields(offsets) - 1,
4328 &len);
4329 ut_ad(len == REC_NODE_PTR_SIZE);
4330
4331 ut_ad(!rec_offs_any_extern(offsets));
4332 memset(field, 0, REC_NODE_PTR_SIZE);
4333 memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
4334 0, REC_NODE_PTR_SIZE);
4335 } else if (dict_index_is_clust(index)) {
4336 /* Clear trx_id and roll_ptr. On the compressed page,
4337 there is an array of these fields immediately before the
4338 dense page directory, at the very end of the page. */
4339 const ulint trx_id_pos
4340 = dict_col_get_clust_pos(
4341 dict_table_get_sys_col(
4342 index->table, DATA_TRX_ID), index);
4343 storage = page_zip_dir_start(page_zip);
4344 field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
4345 ut_ad(len == DATA_TRX_ID_LEN);
4346
4347 memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4348 memset(storage - (heap_no - 1)
4349 * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4350 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4351
4352 if (rec_offs_any_extern(offsets)) {
4353 ulint i;
4354
4355 for (i = rec_offs_n_fields(offsets); i--; ) {
4356 /* Clear all BLOB pointers in order to make
4357 page_zip_validate() pass. */
4358 if (rec_offs_nth_extern(offsets, i)) {
4359 field = rec_get_nth_field(
4360 rec, offsets, i, &len);
4361 ut_ad(len
4362 == BTR_EXTERN_FIELD_REF_SIZE);
4363 memset(field + len
4364 - BTR_EXTERN_FIELD_REF_SIZE,
4365 0, BTR_EXTERN_FIELD_REF_SIZE);
4366 }
4367 }
4368 }
4369 } else {
4370 ut_ad(!rec_offs_any_extern(offsets));
4371 }
4372
4373#ifdef UNIV_ZIP_DEBUG
4374 ut_a(page_zip_validate(page_zip, page, index));
4375#endif /* UNIV_ZIP_DEBUG */
4376}
4377
4378/**********************************************************************//**
4379Write the "deleted" flag of a record on a compressed page. The flag must
4380already have been written on the uncompressed page. */
4381void
4382page_zip_rec_set_deleted(
4383/*=====================*/
4384 page_zip_des_t* page_zip,/*!< in/out: compressed page */
4385 const byte* rec, /*!< in: record on the uncompressed page */
4386 ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
4387{
4388 byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4389 ut_a(slot);
4390 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4391 if (flag) {
4392 *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4393 } else {
4394 *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4395 }
4396#ifdef UNIV_ZIP_DEBUG
4397 ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
4398#endif /* UNIV_ZIP_DEBUG */
4399}
4400
4401/**********************************************************************//**
4402Write the "owned" flag of a record on a compressed page. The n_owned field
4403must already have been written on the uncompressed page. */
4404void
4405page_zip_rec_set_owned(
4406/*===================*/
4407 page_zip_des_t* page_zip,/*!< in/out: compressed page */
4408 const byte* rec, /*!< in: record on the uncompressed page */
4409 ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
4410{
4411 byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4412 ut_a(slot);
4413 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4414 if (flag) {
4415 *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4416 } else {
4417 *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4418 }
4419}
4420
4421/**********************************************************************//**
4422Insert a record to the dense page directory. */
4423void
4424page_zip_dir_insert(
4425/*================*/
4426 page_zip_des_t* page_zip,/*!< in/out: compressed page */
4427 const byte* prev_rec,/*!< in: record after which to insert */
4428 const byte* free_rec,/*!< in: record from which rec was
4429 allocated, or NULL */
4430 byte* rec) /*!< in: record to insert */
4431{
4432 ulint n_dense;
4433 byte* slot_rec;
4434 byte* slot_free;
4435
4436 ut_ad(prev_rec != rec);
4437 ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4438 ut_ad(page_zip_simple_validate(page_zip));
4439
4440 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4441
4442 if (page_rec_is_infimum(prev_rec)) {
4443 /* Use the first slot. */
4444 slot_rec = page_zip->data + page_zip_get_size(page_zip);
4445 } else {
4446 byte* end = page_zip->data + page_zip_get_size(page_zip);
4447 byte* start = end - page_zip_dir_user_size(page_zip);
4448
4449 if (UNIV_LIKELY(!free_rec)) {
4450 /* PAGE_N_RECS was already incremented
4451 in page_cur_insert_rec_zip(), but the
4452 dense directory slot at that position
4453 contains garbage. Skip it. */
4454 start += PAGE_ZIP_DIR_SLOT_SIZE;
4455 }
4456
4457 slot_rec = page_zip_dir_find_low(start, end,
4458 page_offset(prev_rec));
4459 ut_a(slot_rec);
4460 }
4461
4462 /* Read the old n_dense (n_heap may have been incremented). */
4463 n_dense = page_dir_get_n_heap(page_zip->data)
4464 - (PAGE_HEAP_NO_USER_LOW + 1U);
4465
4466 if (UNIV_LIKELY_NULL(free_rec)) {
4467 /* The record was allocated from the free list.
4468 Shift the dense directory only up to that slot.
4469 Note that in this case, n_dense is actually
4470 off by one, because page_cur_insert_rec_zip()
4471 did not increment n_heap. */
4472 ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4473 + PAGE_HEAP_NO_USER_LOW);
4474 ut_ad(rec >= free_rec);
4475 slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4476 ut_ad(slot_free);
4477 slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4478 } else {
4479 /* The record was allocated from the heap.
4480 Shift the entire dense directory. */
4481 ut_ad(rec_get_heap_no_new(rec) == n_dense
4482 + PAGE_HEAP_NO_USER_LOW);
4483
4484 /* Shift to the end of the dense page directory. */
4485 slot_free = page_zip->data + page_zip_get_size(page_zip)
4486 - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4487 }
4488
4489 /* Shift the dense directory to allocate place for rec. */
4490 memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4491 ulint(slot_rec - slot_free));
4492
4493 /* Write the entry for the inserted record.
4494 The "owned" and "deleted" flags must be zero. */
4495 mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4496}
4497
4498/**********************************************************************//**
4499Shift the dense page directory and the array of BLOB pointers
4500when a record is deleted. */
4501void
4502page_zip_dir_delete(
4503/*================*/
4504 page_zip_des_t* page_zip, /*!< in/out: compressed page */
4505 byte* rec, /*!< in: deleted record */
4506 const dict_index_t* index, /*!< in: index of rec */
4507 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
4508 const byte* free) /*!< in: previous start of
4509 the free list */
4510{
4511 byte* slot_rec;
4512 byte* slot_free;
4513 ulint n_ext;
4514 page_t* page = page_align(rec);
4515
4516 ut_ad(rec_offs_validate(rec, index, offsets));
4517 ut_ad(rec_offs_comp(offsets));
4518
4519 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4520 UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4521 UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4522 rec_offs_extra_size(offsets));
4523
4524 slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4525
4526 ut_a(slot_rec);
4527
4528 /* This could not be done before page_zip_dir_find(). */
4529 page_header_set_field(page, page_zip, PAGE_N_RECS,
4530 (ulint)(page_get_n_recs(page) - 1));
4531
4532 if (UNIV_UNLIKELY(!free)) {
4533 /* Make the last slot the start of the free list. */
4534 slot_free = page_zip->data + page_zip_get_size(page_zip)
4535 - PAGE_ZIP_DIR_SLOT_SIZE
4536 * (page_dir_get_n_heap(page_zip->data)
4537 - PAGE_HEAP_NO_USER_LOW);
4538 } else {
4539 slot_free = page_zip_dir_find_free(page_zip,
4540 page_offset(free));
4541 ut_a(slot_free < slot_rec);
4542 /* Grow the free list by one slot by moving the start. */
4543 slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4544 }
4545
4546 if (UNIV_LIKELY(slot_rec > slot_free)) {
4547 memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4548 slot_free,
4549 ulint(slot_rec - slot_free));
4550 }
4551
4552 /* Write the entry for the deleted record.
4553 The "owned" and "deleted" flags will be cleared. */
4554 mach_write_to_2(slot_free, page_offset(rec));
4555
4556 if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4557 ut_ad(!rec_offs_any_extern(offsets));
4558 goto skip_blobs;
4559 }
4560
4561 n_ext = rec_offs_n_extern(offsets);
4562 if (UNIV_UNLIKELY(n_ext != 0)) {
4563 /* Shift and zero fill the array of BLOB pointers. */
4564 ulint blob_no;
4565 byte* externs;
4566 byte* ext_end;
4567
4568 blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4569 ut_a(blob_no + n_ext <= page_zip->n_blobs);
4570
4571 externs = page_zip->data + page_zip_get_size(page_zip)
4572 - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4573 * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
4574
4575 ext_end = externs - page_zip->n_blobs
4576 * BTR_EXTERN_FIELD_REF_SIZE;
4577 externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4578
4579 page_zip->n_blobs -= static_cast<unsigned>(n_ext);
4580 /* Shift and zero fill the array. */
4581 memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4582 ulint(page_zip->n_blobs - blob_no)
4583 * BTR_EXTERN_FIELD_REF_SIZE);
4584 memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4585 }
4586
4587skip_blobs:
4588 /* The compression algorithm expects info_bits and n_owned
4589 to be 0 for deleted records. */
4590 rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4591
4592 page_zip_clear_rec(page_zip, rec, index, offsets);
4593}
4594
4595/**********************************************************************//**
4596Add a slot to the dense page directory. */
4597void
4598page_zip_dir_add_slot(
4599/*==================*/
4600 page_zip_des_t* page_zip, /*!< in/out: compressed page */
4601 ulint is_clustered) /*!< in: nonzero for clustered index,
4602 zero for others */
4603{
4604 ulint n_dense;
4605 byte* dir;
4606 byte* stored;
4607
4608 ut_ad(page_is_comp(page_zip->data));
4609 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4610
4611 /* Read the old n_dense (n_heap has already been incremented). */
4612 n_dense = page_dir_get_n_heap(page_zip->data)
4613 - (PAGE_HEAP_NO_USER_LOW + 1U);
4614
4615 dir = page_zip->data + page_zip_get_size(page_zip)
4616 - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4617
4618 if (!page_is_leaf(page_zip->data)) {
4619 ut_ad(!page_zip->n_blobs);
4620 stored = dir - n_dense * REC_NODE_PTR_SIZE;
4621 } else if (is_clustered) {
4622 /* Move the BLOB pointer array backwards to make space for the
4623 roll_ptr and trx_id columns and the dense directory slot. */
4624 byte* externs;
4625
4626 stored = dir - n_dense
4627 * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4628 externs = stored
4629 - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4630 ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
4631 PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
4632 memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
4633 externs, ulint(stored - externs));
4634 } else {
4635 stored = dir
4636 - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4637 ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4638 static_cast<size_t>(PAGE_ZIP_DIR_SLOT_SIZE));
4639 }
4640
4641 /* Move the uncompressed area backwards to make space
4642 for one directory slot. */
4643 memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, ulint(dir - stored));
4644}
4645
4646/***********************************************************//**
4647Parses a log record of writing to the header of a page.
4648@return end of log record or NULL */
4649byte*
4650page_zip_parse_write_header(
4651/*========================*/
4652 byte* ptr, /*!< in: redo log buffer */
4653 byte* end_ptr,/*!< in: redo log buffer end */
4654 page_t* page, /*!< in/out: uncompressed page */
4655 page_zip_des_t* page_zip)/*!< in/out: compressed page */
4656{
4657 ulint offset;
4658 ulint len;
4659
4660 ut_ad(ptr != NULL);
4661 ut_ad(end_ptr!= NULL);
4662 ut_ad(!page == !page_zip);
4663
4664 if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4665
4666 return(NULL);
4667 }
4668
4669 offset = (ulint) *ptr++;
4670 len = (ulint) *ptr++;
4671
4672 if (len == 0 || offset + len >= PAGE_DATA) {
4673corrupt:
4674 recv_sys->found_corrupt_log = TRUE;
4675
4676 return(NULL);
4677 }
4678
4679 if (end_ptr < ptr + len) {
4680
4681 return(NULL);
4682 }
4683
4684 if (page) {
4685 if (!page_zip) {
4686
4687 goto corrupt;
4688 }
4689#ifdef UNIV_ZIP_DEBUG
4690 ut_a(page_zip_validate(page_zip, page, NULL));
4691#endif /* UNIV_ZIP_DEBUG */
4692
4693 memcpy(page + offset, ptr, len);
4694 memcpy(page_zip->data + offset, ptr, len);
4695
4696#ifdef UNIV_ZIP_DEBUG
4697 ut_a(page_zip_validate(page_zip, page, NULL));
4698#endif /* UNIV_ZIP_DEBUG */
4699 }
4700
4701 return(ptr + len);
4702}
4703
4704/**********************************************************************//**
4705Write a log record of writing to the uncompressed header portion of a page. */
4706void
4707page_zip_write_header_log(
4708/*======================*/
4709 const byte* data, /*!< in: data on the uncompressed page */
4710 ulint length, /*!< in: length of the data */
4711 mtr_t* mtr) /*!< in: mini-transaction */
4712{
4713 byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4714 ulint offset = page_offset(data);
4715
4716 ut_ad(offset < PAGE_DATA);
4717 ut_ad(offset + length < PAGE_DATA);
4718 compile_time_assert(PAGE_DATA < 256U);
4719 ut_ad(length > 0);
4720 ut_ad(length < 256);
4721
4722 /* If no logging is requested, we may return now */
4723 if (UNIV_UNLIKELY(!log_ptr)) {
4724
4725 return;
4726 }
4727
4728 log_ptr = mlog_write_initial_log_record_fast(
4729 (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4730 *log_ptr++ = (byte) offset;
4731 *log_ptr++ = (byte) length;
4732 mlog_close(mtr, log_ptr);
4733
4734 mlog_catenate_string(mtr, data, length);
4735}
4736
4737/**********************************************************************//**
4738Reorganize and compress a page. This is a low-level operation for
4739compressed pages, to be used when page_zip_compress() fails.
4740On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4741The function btr_page_reorganize() should be preferred whenever possible.
4742IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4743non-clustered index, the caller must update the insert buffer free
4744bits in the same mini-transaction in such a way that the modification
4745will be redo-logged.
4746@return TRUE on success, FALSE on failure; page_zip will be left
4747intact on failure, but page will be overwritten. */
4748ibool
4749page_zip_reorganize(
4750/*================*/
4751 buf_block_t* block, /*!< in/out: page with compressed page;
4752 on the compressed page, in: size;
4753 out: data, n_blobs,
4754 m_start, m_end, m_nonempty */
4755 dict_index_t* index, /*!< in: index of the B-tree node */
4756 mtr_t* mtr) /*!< in: mini-transaction */
4757{
4758 buf_pool_t* buf_pool = buf_pool_from_block(block);
4759 page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4760 page_t* page = buf_block_get_frame(block);
4761 buf_block_t* temp_block;
4762 page_t* temp_page;
4763
4764 ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4765 ut_ad(page_is_comp(page));
4766 ut_ad(!dict_index_is_ibuf(index));
4767 ut_ad(!index->table->is_temporary());
4768 /* Note that page_zip_validate(page_zip, page, index) may fail here. */
4769 UNIV_MEM_ASSERT_RW(page, srv_page_size);
4770 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4771
4772 /* Disable logging */
4773 mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4774
4775 temp_block = buf_block_alloc(buf_pool);
4776 btr_search_drop_page_hash_index(block);
4777 temp_page = temp_block->frame;
4778
4779 /* Copy the old page to temporary space */
4780 buf_frame_copy(temp_page, page);
4781
4782 /* Recreate the page: note that global data on page (possible
4783 segment headers, next page-field, etc.) is preserved intact */
4784
4785 page_create(block, mtr, TRUE, dict_index_is_spatial(index));
4786
4787 /* Copy the records from the temporary space to the recreated page;
4788 do not copy the lock bits yet */
4789
4790 page_copy_rec_list_end_no_locks(block, temp_block,
4791 page_get_infimum_rec(temp_page),
4792 index, mtr);
4793
4794 /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */
4795 memcpy(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
4796 temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8);
4797 /* PAGE_MAX_TRX_ID must be set on secondary index leaf pages. */
4798 ut_ad(dict_index_is_clust(index) || !page_is_leaf(temp_page)
4799 || page_get_max_trx_id(page) != 0);
4800 /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
4801 clustered index root pages. */
4802 ut_ad(page_get_max_trx_id(page) == 0
4803 || (dict_index_is_clust(index)
4804 ? page_is_root(temp_page)
4805 : page_is_leaf(temp_page)));
4806
4807 /* Restore logging. */
4808 mtr_set_log_mode(mtr, log_mode);
4809
4810 if (!page_zip_compress(page_zip, page, index,
4811 page_zip_level, NULL, mtr)) {
4812
4813 buf_block_free(temp_block);
4814 return(FALSE);
4815 }
4816
4817 lock_move_reorganize_page(block, temp_block);
4818
4819 buf_block_free(temp_block);
4820 return(TRUE);
4821}
4822
4823/**********************************************************************//**
4824Copy the records of a page byte for byte. Do not copy the page header
4825or trailer, except those B-tree header fields that are directly
4826related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4827NOTE: The caller must update the lock table and the adaptive hash index. */
4828void
4829page_zip_copy_recs(
4830/*===============*/
4831 page_zip_des_t* page_zip, /*!< out: copy of src_zip
4832 (n_blobs, m_start, m_end,
4833 m_nonempty, data[0..size-1]) */
4834 page_t* page, /*!< out: copy of src */
4835 const page_zip_des_t* src_zip, /*!< in: compressed page */
4836 const page_t* src, /*!< in: page */
4837 dict_index_t* index, /*!< in: index of the B-tree */
4838 mtr_t* mtr) /*!< in: mini-transaction */
4839{
4840 ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4841 ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
4842 ut_ad(!dict_index_is_ibuf(index));
4843 ut_ad(!index->table->is_temporary());
4844#ifdef UNIV_ZIP_DEBUG
4845 /* The B-tree operations that call this function may set
4846 FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4847 mismatch. A strict page_zip_validate() will be executed later
4848 during the B-tree operations. */
4849 ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
4850#endif /* UNIV_ZIP_DEBUG */
4851 ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4852 if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4853 ut_a(page_is_leaf(src));
4854 ut_a(dict_index_is_clust(index));
4855 }
4856
4857 UNIV_MEM_ASSERT_W(page, srv_page_size);
4858 UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4859 UNIV_MEM_ASSERT_RW(src, srv_page_size);
4860 UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4861
4862 /* Copy those B-tree page header fields that are related to
4863 the records stored in the page. Also copy the field
4864 PAGE_MAX_TRX_ID. Skip the rest of the page header and
4865 trailer. On the compressed page, there is no trailer. */
4866 compile_time_assert(PAGE_MAX_TRX_ID + 8 == PAGE_HEADER_PRIV_END);
4867 memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4868 PAGE_HEADER_PRIV_END);
4869 memcpy(PAGE_DATA + page, PAGE_DATA + src,
4870 srv_page_size - PAGE_DATA - FIL_PAGE_DATA_END);
4871 memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4872 PAGE_HEADER_PRIV_END);
4873 memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4874 page_zip_get_size(page_zip) - PAGE_DATA);
4875
4876 if (dict_index_is_clust(index)) {
4877 /* Reset the PAGE_ROOT_AUTO_INC field when copying
4878 from a root page. */
4879 memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page, 0, 8);
4880 memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page_zip->data,
4881 0, 8);
4882 } else {
4883 /* The PAGE_MAX_TRX_ID must be nonzero on leaf pages
4884 of secondary indexes, and 0 on others. */
4885 ut_ad(!page_is_leaf(src) == !page_get_max_trx_id(src));
4886 }
4887
4888 /* Copy all fields of src_zip to page_zip, except the pointer
4889 to the compressed data page. */
4890 {
4891 page_zip_t* data = page_zip->data;
4892 memcpy(page_zip, src_zip, sizeof *page_zip);
4893 page_zip->data = data;
4894 }
4895 ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
4896 + page_zip->m_end < page_zip_get_size(page_zip));
4897
4898 if (!page_is_leaf(src)
4899 && UNIV_UNLIKELY(!page_has_prev(src))
4900 && UNIV_LIKELY(page_has_prev(page))) {
4901 /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4902 ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4903 TRUE);
4904 if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4905 rec_t* rec = page + offs;
4906 ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4907 & REC_INFO_MIN_REC_FLAG);
4908 rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4909 }
4910 }
4911
4912#ifdef UNIV_ZIP_DEBUG
4913 ut_a(page_zip_validate(page_zip, page, index));
4914#endif /* UNIV_ZIP_DEBUG */
4915 page_zip_compress_write_log(page_zip, page, index, mtr);
4916}
4917
4918/**********************************************************************//**
4919Parses a log record of compressing an index page.
4920@return end of log record or NULL */
4921byte*
4922page_zip_parse_compress(
4923/*====================*/
4924 byte* ptr, /*!< in: buffer */
4925 byte* end_ptr,/*!< in: buffer end */
4926 page_t* page, /*!< out: uncompressed page */
4927 page_zip_des_t* page_zip)/*!< out: compressed page */
4928{
4929 ulint size;
4930 ulint trailer_size;
4931
4932 ut_ad(ptr != NULL);
4933 ut_ad(end_ptr!= NULL);
4934 ut_ad(!page == !page_zip);
4935
4936 if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4937
4938 return(NULL);
4939 }
4940
4941 size = mach_read_from_2(ptr);
4942 ptr += 2;
4943 trailer_size = mach_read_from_2(ptr);
4944 ptr += 2;
4945
4946 if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4947
4948 return(NULL);
4949 }
4950
4951 if (page) {
4952 if (!page_zip || page_zip_get_size(page_zip) < size) {
4953corrupt:
4954 recv_sys->found_corrupt_log = TRUE;
4955
4956 return(NULL);
4957 }
4958
4959 memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4960 memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4961 memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4962 memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4963 page_zip_get_size(page_zip) - trailer_size
4964 - (FIL_PAGE_TYPE + size));
4965 memcpy(page_zip->data + page_zip_get_size(page_zip)
4966 - trailer_size, ptr + 8 + size, trailer_size);
4967
4968 if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
4969 TRUE))) {
4970
4971 goto corrupt;
4972 }
4973 }
4974
4975 return(ptr + 8 + size + trailer_size);
4976}
4977#endif /* !UNIV_INNOCHECKSUM */
4978
4979/** Calculate the compressed page checksum.
4980@param[in] data compressed page
4981@param[in] size size of compressed page
4982@param[in] algo algorithm to use
4983@param[in] use_legacy_big_endian only used if algo is
4984SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
4985then use big endian byteorder when converting byte strings to integers.
4986SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
4987then use big endian byteorder when converting byte strings to integers.
4988@return page checksum */
4989uint32_t
4990page_zip_calc_checksum(
4991 const void* data,
4992 ulint size,
4993 srv_checksum_algorithm_t algo,
4994 bool use_legacy_big_endian /* = false */)
4995{
4996 uLong adler;
4997 const Bytef* s = static_cast<const byte*>(data);
4998
4999 /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
5000 and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
5001
5002 switch (algo) {
5003 case SRV_CHECKSUM_ALGORITHM_CRC32:
5004 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
5005 {
5006 ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
5007
5008 ut_crc32_func_t crc32_func = use_legacy_big_endian
5009 ? ut_crc32_legacy_big_endian
5010 : ut_crc32;
5011
5012 const uint32_t crc32
5013 = crc32_func(
5014 s + FIL_PAGE_OFFSET,
5015 FIL_PAGE_LSN - FIL_PAGE_OFFSET)
5016 ^ crc32_func(
5017 s + FIL_PAGE_TYPE, 2)
5018 ^ crc32_func(
5019 s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
5020 size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
5021
5022 return(crc32);
5023 }
5024 case SRV_CHECKSUM_ALGORITHM_INNODB:
5025 case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
5026 ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
5027
5028 adler = adler32(0L, s + FIL_PAGE_OFFSET,
5029 FIL_PAGE_LSN - FIL_PAGE_OFFSET);
5030 adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
5031 adler = adler32(
5032 adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
5033 static_cast<uInt>(size)
5034 - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
5035
5036 return(uint32_t(adler));
5037 case SRV_CHECKSUM_ALGORITHM_NONE:
5038 case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
5039 return(BUF_NO_CHECKSUM_MAGIC);
5040 /* no default so the compiler will emit a warning if new enum
5041 is added and not handled here */
5042 }
5043
5044 ut_error;
5045 return(0);
5046}
5047
5048/**********************************************************************//**
5049Verify a compressed page's checksum.
5050@return TRUE if the stored checksum is valid according to the value of
5051innodb_checksum_algorithm */
5052ibool
5053page_zip_verify_checksum(
5054/*=====================*/
5055 const void* data, /*!< in: compressed page */
5056 ulint size) /*!< in: size of compressed page */
5057{
5058 ib_uint32_t stored;
5059 ib_uint32_t calc;
5060
5061 stored = static_cast<ib_uint32_t>(mach_read_from_4(
5062 static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM));
5063
5064 ulint page_no MY_ATTRIBUTE((unused)) =
5065 mach_read_from_4(static_cast<const unsigned char*>
5066 (data) + FIL_PAGE_OFFSET);
5067 ulint space_id MY_ATTRIBUTE((unused)) =
5068 mach_read_from_4(static_cast<const unsigned char*>
5069 (data) + FIL_PAGE_SPACE_ID);
5070 const page_id_t page_id(space_id, page_no);
5071
5072 compile_time_assert(!(FIL_PAGE_LSN % 8));
5073
5074 /* Check if page is empty */
5075 if (stored == 0
5076 && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>(
5077 data)
5078 + FIL_PAGE_LSN) == 0) {
5079 /* make sure that the page is really empty */
5080#ifdef UNIV_INNOCHECKSUM
5081 ulint i;
5082 for (i = 0; i < size; i++) {
5083 if (*((const char*) data + i) != 0)
5084 break;
5085 }
5086 if (i >= size) {
5087 if (log_file) {
5088 fprintf(log_file, "Page::%llu is empty and"
5089 " uncorrupted\n", cur_page_num);
5090 }
5091
5092 return(TRUE);
5093 }
5094#else
5095 for (ulint i = 0; i < size; i++) {
5096 if (*((const char*) data + i) != 0) {
5097 return(FALSE);
5098 }
5099 }
5100 /* Empty page */
5101 return(TRUE);
5102#endif /* UNIV_INNOCHECKSUM */
5103 }
5104
5105 const srv_checksum_algorithm_t curr_algo =
5106 static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
5107
5108 if (curr_algo == SRV_CHECKSUM_ALGORITHM_NONE) {
5109 return(TRUE);
5110 }
5111
5112 calc = static_cast<ib_uint32_t>(page_zip_calc_checksum(
5113 data, size, curr_algo));
5114
5115#ifdef UNIV_INNOCHECKSUM
5116 if (log_file) {
5117 fprintf(log_file, "page::%llu;"
5118 " %s checksum: calculated = %u;"
5119 " recorded = %u\n", cur_page_num,
5120 buf_checksum_algorithm_name(
5121 static_cast<srv_checksum_algorithm_t>(
5122 srv_checksum_algorithm)),
5123 calc, stored);
5124 }
5125
5126 if (!strict_verify) {
5127
5128 const uint32_t crc32 = page_zip_calc_checksum(
5129 data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
5130
5131 if (log_file) {
5132 fprintf(log_file, "page::%llu: crc32 checksum:"
5133 " calculated = %u; recorded = %u\n",
5134 cur_page_num, crc32, stored);
5135 fprintf(log_file, "page::%llu: none checksum:"
5136 " calculated = %lu; recorded = %u\n",
5137 cur_page_num, BUF_NO_CHECKSUM_MAGIC, stored);
5138 }
5139 }
5140#endif /* UNIV_INNOCHECKSUM */
5141
5142 if (stored == calc) {
5143 return(TRUE);
5144 }
5145
5146 bool legacy_checksum_checked = false;
5147
5148 switch (curr_algo) {
5149 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
5150 case SRV_CHECKSUM_ALGORITHM_CRC32: {
5151
5152 if (stored == BUF_NO_CHECKSUM_MAGIC) {
5153#ifndef UNIV_INNOCHECKSUM
5154 if (curr_algo
5155 == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
5156 page_warn_strict_checksum(
5157 curr_algo,
5158 SRV_CHECKSUM_ALGORITHM_NONE,
5159 page_id);
5160 }
5161#endif /* UNIV_INNOCHECKSUM */
5162
5163 return(TRUE);
5164 }
5165
5166 /* We need to check whether the stored checksum matches legacy
5167 big endian checksum or Innodb checksum. We optimize the order
5168 based on earlier results. if earlier we have found pages
5169 matching legacy big endian checksum, we try to match it first.
5170 Otherwise we check innodb checksum first. */
5171 if (legacy_big_endian_checksum) {
5172 const uint32_t calculated =
5173 page_zip_calc_checksum(data, size, curr_algo, true);
5174 if (stored == calculated) {
5175
5176 return(TRUE);
5177 }
5178 legacy_checksum_checked = true;
5179 }
5180
5181 uint32_t calculated =
5182 page_zip_calc_checksum(data, size, SRV_CHECKSUM_ALGORITHM_INNODB);
5183
5184 if (stored == calculated) {
5185
5186#ifndef UNIV_INNOCHECKSUM
5187 if (curr_algo
5188 == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
5189 page_warn_strict_checksum(
5190 curr_algo,
5191 SRV_CHECKSUM_ALGORITHM_INNODB,
5192 page_id);
5193 }
5194#endif /* UNIV_INNOCHECKSUM */
5195
5196 return(TRUE);
5197 }
5198
5199 calculated = page_zip_calc_checksum(
5200 data, size, curr_algo, true);
5201
5202 /* If legacy checksum is not checked, do it now. */
5203 if ((legacy_checksum_checked
5204 && stored == calculated)) {
5205 legacy_big_endian_checksum = true;
5206 return(TRUE);
5207 }
5208
5209 break;
5210 }
5211 case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
5212 case SRV_CHECKSUM_ALGORITHM_INNODB: {
5213
5214 if (stored == BUF_NO_CHECKSUM_MAGIC) {
5215#ifndef UNIV_INNOCHECKSUM
5216 if (curr_algo
5217 == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
5218 page_warn_strict_checksum(
5219 curr_algo,
5220 SRV_CHECKSUM_ALGORITHM_NONE,
5221 page_id);
5222 }
5223#endif /* UNIV_INNOCHECKSUM */
5224
5225 return(TRUE);
5226 }
5227
5228 const uint32_t calculated = page_zip_calc_checksum(
5229 data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
5230 uint32_t calculated1;
5231
5232 if (stored == calculated
5233 || stored == (calculated1 =
5234 page_zip_calc_checksum(data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true))
5235 ) {
5236#ifndef UNIV_INNOCHECKSUM
5237 if (curr_algo
5238 == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
5239 page_warn_strict_checksum(
5240 curr_algo,
5241 SRV_CHECKSUM_ALGORITHM_CRC32,
5242 page_id);
5243 }
5244#endif /* UNIV_INNOCHECKSUM */
5245 return(TRUE);
5246 }
5247
5248 break;
5249 }
5250 case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: {
5251
5252 uint32_t calculated = page_zip_calc_checksum(
5253 data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
5254 const uint32_t calculated1 = page_zip_calc_checksum(
5255 data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true);
5256
5257 if (stored == calculated
5258 || stored == calculated1) {
5259#ifndef UNIV_INNOCHECKSUM
5260 page_warn_strict_checksum(
5261 curr_algo,
5262 SRV_CHECKSUM_ALGORITHM_CRC32,
5263 page_id);
5264#endif /* UNIV_INNOCHECKSUM */
5265 return(TRUE);
5266 }
5267
5268 calculated = page_zip_calc_checksum(
5269 data, size, SRV_CHECKSUM_ALGORITHM_INNODB);
5270
5271 if (stored == calculated) {
5272
5273#ifndef UNIV_INNOCHECKSUM
5274 page_warn_strict_checksum(
5275 curr_algo,
5276 SRV_CHECKSUM_ALGORITHM_INNODB,
5277 page_id);
5278#endif /* UNIV_INNOCHECKSUM */
5279 return(TRUE);
5280 }
5281
5282 break;
5283 }
5284 case SRV_CHECKSUM_ALGORITHM_NONE:
5285 ut_error;
5286 /* no default so the compiler will emit a warning if new enum
5287 is added and not handled here */
5288 }
5289
5290 return(FALSE);
5291}
5292