1/*****************************************************************************
2
3Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2012, Facebook Inc.
5Copyright (c) 2017, MariaDB Corporation.
6
7This program is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free Software
9Foundation; version 2 of the License.
10
11This program is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License along with
16this program; if not, write to the Free Software Foundation, Inc.,
1751 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
18
19*****************************************************************************/
20
21/**************************************************//**
22@file include/page0zip.h
23Compressed page interface
24
25Created June 2005 by Marko Makela
26*******************************************************/
27
28#ifndef page0zip_h
29#define page0zip_h
30
31#ifdef UNIV_MATERIALIZE
32# undef UNIV_INLINE
33# define UNIV_INLINE
34#endif
35
36#ifdef UNIV_INNOCHECKSUM
37#include "univ.i"
38#include "buf0buf.h"
39#include "ut0crc32.h"
40#include "buf0checksum.h"
41#include "mach0data.h"
42#include "zlib.h"
43#endif /* UNIV_INNOCHECKSUM */
44
45#ifndef UNIV_INNOCHECKSUM
46#include "mtr0types.h"
47#include "page0types.h"
48#endif /* !UNIV_INNOCHECKSUM */
49
50#include "buf0types.h"
51
52#ifndef UNIV_INNOCHECKSUM
53#include "dict0types.h"
54#include "srv0srv.h"
55#include "trx0types.h"
56#include "mem0mem.h"
57
58/* Compression level to be used by zlib. Settable by user. */
59extern uint page_zip_level;
60
61/* Default compression level. */
62#define DEFAULT_COMPRESSION_LEVEL 6
63/** Start offset of the area that will be compressed */
64#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
65/** Size of an compressed page directory entry */
66#define PAGE_ZIP_DIR_SLOT_SIZE 2
67/** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
68#define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE \
69 (PAGE_ZIP_DIR_SLOT_SIZE \
70 + DATA_TRX_ID_LEN \
71 + DATA_ROLL_PTR_LEN)
72/** Mask of record offsets */
73#define PAGE_ZIP_DIR_SLOT_MASK 0x3fffU
74/** 'owned' flag */
75#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000U
76/** 'deleted' flag */
77#define PAGE_ZIP_DIR_SLOT_DEL 0x8000U
78
79/* Whether or not to log compressed page images to avoid possible
80compression algorithm changes in zlib. */
81extern my_bool page_zip_log_pages;
82
83/**********************************************************************//**
84Determine the size of a compressed page in bytes.
85@return size in bytes */
86UNIV_INLINE
87ulint
88page_zip_get_size(
89/*==============*/
90 const page_zip_des_t* page_zip) /*!< in: compressed page */
91 MY_ATTRIBUTE((warn_unused_result));
92/**********************************************************************//**
93Set the size of a compressed page in bytes. */
94UNIV_INLINE
95void
96page_zip_set_size(
97/*==============*/
98 page_zip_des_t* page_zip, /*!< in/out: compressed page */
99 ulint size); /*!< in: size in bytes */
100
101/** Determine if a record is so big that it needs to be stored externally.
102@param[in] rec_size length of the record in bytes
103@param[in] comp nonzero=compact format
104@param[in] n_fields number of fields in the record; ignored if
105tablespace is not compressed
106@param[in] page_size page size
107@return FALSE if the entire record can be stored locally on the page */
108UNIV_INLINE
109ibool
110page_zip_rec_needs_ext(
111 ulint rec_size,
112 ulint comp,
113 ulint n_fields,
114 const page_size_t& page_size)
115 MY_ATTRIBUTE((warn_unused_result));
116
117/**********************************************************************//**
118Determine the guaranteed free space on an empty page.
119@return minimum payload size on the page */
120ulint
121page_zip_empty_size(
122/*================*/
123 ulint n_fields, /*!< in: number of columns in the index */
124 ulint zip_size) /*!< in: compressed page size in bytes */
125 MY_ATTRIBUTE((const));
126
127/** Check whether a tuple is too big for compressed table
128@param[in] index dict index object
129@param[in] entry entry for the index
130@return true if it's too big, otherwise false */
131bool
132page_zip_is_too_big(
133 const dict_index_t* index,
134 const dtuple_t* entry);
135
136/**********************************************************************//**
137Initialize a compressed page descriptor. */
138UNIV_INLINE
139void
140page_zip_des_init(
141/*==============*/
142 page_zip_des_t* page_zip); /*!< in/out: compressed page
143 descriptor */
144
145/**********************************************************************//**
146Configure the zlib allocator to use the given memory heap. */
147void
148page_zip_set_alloc(
149/*===============*/
150 void* stream, /*!< in/out: zlib stream */
151 mem_heap_t* heap); /*!< in: memory heap to use */
152
153/**********************************************************************//**
154Compress a page.
155@return TRUE on success, FALSE on failure; page_zip will be left
156intact on failure. */
157ibool
158page_zip_compress(
159/*==============*/
160 page_zip_des_t* page_zip, /*!< in: size; out: data,
161 n_blobs, m_start, m_end,
162 m_nonempty */
163 const page_t* page, /*!< in: uncompressed page */
164 dict_index_t* index, /*!< in: index of the B-tree
165 node */
166 ulint level, /*!< in: commpression level */
167 const redo_page_compress_t* page_comp_info,
168 /*!< in: used for applying
169 TRUNCATE log
170 record during recovery */
171 mtr_t* mtr); /*!< in/out: mini-transaction,
172 or NULL */
173
174/**********************************************************************//**
175Write the index information for the compressed page.
176@return used size of buf */
177ulint
178page_zip_fields_encode(
179/*===================*/
180 ulint n, /*!< in: number of fields
181 to compress */
182 const dict_index_t* index, /*!< in: index comprising
183 at least n fields */
184 ulint trx_id_pos,
185 /*!< in: position of the trx_id column
186 in the index, or ULINT_UNDEFINED if
187 this is a non-leaf page */
188 byte* buf); /*!< out: buffer of (n + 1) * 2 bytes */
189
190/**********************************************************************//**
191Decompress a page. This function should tolerate errors on the compressed
192page. Instead of letting assertions fail, it will return FALSE if an
193inconsistency is detected.
194@return TRUE on success, FALSE on failure */
195ibool
196page_zip_decompress(
197/*================*/
198 page_zip_des_t* page_zip,/*!< in: data, ssize;
199 out: m_start, m_end, m_nonempty, n_blobs */
200 page_t* page, /*!< out: uncompressed page, may be trashed */
201 ibool all) /*!< in: TRUE=decompress the whole page;
202 FALSE=verify but do not copy some
203 page header fields that should not change
204 after page creation */
205 MY_ATTRIBUTE((nonnull(1,2)));
206
207#ifdef UNIV_DEBUG
208/**********************************************************************//**
209Validate a compressed page descriptor.
210@return TRUE if ok */
211UNIV_INLINE
212ibool
213page_zip_simple_validate(
214/*=====================*/
215 const page_zip_des_t* page_zip); /*!< in: compressed page
216 descriptor */
217#endif /* UNIV_DEBUG */
218
219#ifdef UNIV_ZIP_DEBUG
220/**********************************************************************//**
221Check that the compressed and decompressed pages match.
222@return TRUE if valid, FALSE if not */
223ibool
224page_zip_validate_low(
225/*==================*/
226 const page_zip_des_t* page_zip,/*!< in: compressed page */
227 const page_t* page, /*!< in: uncompressed page */
228 const dict_index_t* index, /*!< in: index of the page, if known */
229 ibool sloppy) /*!< in: FALSE=strict,
230 TRUE=ignore the MIN_REC_FLAG */
231 MY_ATTRIBUTE((nonnull(1,2)));
232/**********************************************************************//**
233Check that the compressed and decompressed pages match. */
234ibool
235page_zip_validate(
236/*==============*/
237 const page_zip_des_t* page_zip,/*!< in: compressed page */
238 const page_t* page, /*!< in: uncompressed page */
239 const dict_index_t* index) /*!< in: index of the page, if known */
240 MY_ATTRIBUTE((nonnull(1,2)));
241#endif /* UNIV_ZIP_DEBUG */
242
243/**********************************************************************//**
244Determine how big record can be inserted without recompressing the page.
245@return a positive number indicating the maximum size of a record
246whose insertion is guaranteed to succeed, or zero or negative */
247UNIV_INLINE
248lint
249page_zip_max_ins_size(
250/*==================*/
251 const page_zip_des_t* page_zip,/*!< in: compressed page */
252 ibool is_clust)/*!< in: TRUE if clustered index */
253 MY_ATTRIBUTE((warn_unused_result));
254
255/**********************************************************************//**
256Determine if enough space is available in the modification log.
257@return TRUE if page_zip_write_rec() will succeed */
258UNIV_INLINE
259ibool
260page_zip_available(
261/*===============*/
262 const page_zip_des_t* page_zip,/*!< in: compressed page */
263 ibool is_clust,/*!< in: TRUE if clustered index */
264 ulint length, /*!< in: combined size of the record */
265 ulint create) /*!< in: nonzero=add the record to
266 the heap */
267 MY_ATTRIBUTE((warn_unused_result));
268
269/**********************************************************************//**
270Write data to the uncompressed header portion of a page. The data must
271already have been written to the uncompressed page. */
272UNIV_INLINE
273void
274page_zip_write_header(
275/*==================*/
276 page_zip_des_t* page_zip,/*!< in/out: compressed page */
277 const byte* str, /*!< in: address on the uncompressed page */
278 ulint length, /*!< in: length of the data */
279 mtr_t* mtr) /*!< in: mini-transaction, or NULL */
280 MY_ATTRIBUTE((nonnull(1,2)));
281
282/**********************************************************************//**
283Write an entire record on the compressed page. The data must already
284have been written to the uncompressed page. */
285void
286page_zip_write_rec(
287/*===============*/
288 page_zip_des_t* page_zip,/*!< in/out: compressed page */
289 const byte* rec, /*!< in: record being written */
290 dict_index_t* index, /*!< in: the index the record belongs to */
291 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
292 ulint create) /*!< in: nonzero=insert, zero=update */
293 MY_ATTRIBUTE((nonnull));
294
295/***********************************************************//**
296Parses a log record of writing a BLOB pointer of a record.
297@return end of log record or NULL */
298byte*
299page_zip_parse_write_blob_ptr(
300/*==========================*/
301 byte* ptr, /*!< in: redo log buffer */
302 byte* end_ptr,/*!< in: redo log buffer end */
303 page_t* page, /*!< in/out: uncompressed page */
304 page_zip_des_t* page_zip);/*!< in/out: compressed page */
305
306/**********************************************************************//**
307Write a BLOB pointer of a record on the leaf page of a clustered index.
308The information must already have been updated on the uncompressed page. */
309void
310page_zip_write_blob_ptr(
311/*====================*/
312 page_zip_des_t* page_zip,/*!< in/out: compressed page */
313 const byte* rec, /*!< in/out: record whose data is being
314 written */
315 dict_index_t* index, /*!< in: index of the page */
316 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
317 ulint n, /*!< in: column index */
318 mtr_t* mtr); /*!< in: mini-transaction handle,
319 or NULL if no logging is needed */
320
321/***********************************************************//**
322Parses a log record of writing the node pointer of a record.
323@return end of log record or NULL */
324byte*
325page_zip_parse_write_node_ptr(
326/*==========================*/
327 byte* ptr, /*!< in: redo log buffer */
328 byte* end_ptr,/*!< in: redo log buffer end */
329 page_t* page, /*!< in/out: uncompressed page */
330 page_zip_des_t* page_zip);/*!< in/out: compressed page */
331
332/**********************************************************************//**
333Write the node pointer of a record on a non-leaf compressed page. */
334void
335page_zip_write_node_ptr(
336/*====================*/
337 page_zip_des_t* page_zip,/*!< in/out: compressed page */
338 byte* rec, /*!< in/out: record */
339 ulint size, /*!< in: data size of rec */
340 ulint ptr, /*!< in: node pointer */
341 mtr_t* mtr); /*!< in: mini-transaction, or NULL */
342
343/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
344@param[in,out] page_zip compressed page
345@param[in,out] rec record
346@param[in] offsets rec_get_offsets(rec, index)
347@param[in] trx_id_field field number of DB_TRX_ID (number of PK fields)
348@param[in] trx_id DB_TRX_ID value (transaction identifier)
349@param[in] roll_ptr DB_ROLL_PTR value (undo log pointer)
350@param[in,out] mtr mini-transaction, or NULL to skip logging */
351void
352page_zip_write_trx_id_and_roll_ptr(
353 page_zip_des_t* page_zip,
354 byte* rec,
355 const ulint* offsets,
356 ulint trx_id_col,
357 trx_id_t trx_id,
358 roll_ptr_t roll_ptr,
359 mtr_t* mtr = NULL)
360 MY_ATTRIBUTE((nonnull(1,2,3)));
361
362/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
363@param[in] ptr redo log buffer
364@param[in] end_ptr end of redo log buffer
365@param[in,out] page uncompressed page
366@param[in,out] page_zip compressed page
367@return end of log record
368@retval NULL if the log record is incomplete */
369byte*
370page_zip_parse_write_trx_id(
371 byte* ptr,
372 byte* end_ptr,
373 page_t* page,
374 page_zip_des_t* page_zip)
375 MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
376
377/**********************************************************************//**
378Write the "deleted" flag of a record on a compressed page. The flag must
379already have been written on the uncompressed page. */
380void
381page_zip_rec_set_deleted(
382/*=====================*/
383 page_zip_des_t* page_zip,/*!< in/out: compressed page */
384 const byte* rec, /*!< in: record on the uncompressed page */
385 ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
386 MY_ATTRIBUTE((nonnull));
387
388/**********************************************************************//**
389Write the "owned" flag of a record on a compressed page. The n_owned field
390must already have been written on the uncompressed page. */
391void
392page_zip_rec_set_owned(
393/*===================*/
394 page_zip_des_t* page_zip,/*!< in/out: compressed page */
395 const byte* rec, /*!< in: record on the uncompressed page */
396 ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
397 MY_ATTRIBUTE((nonnull));
398
399/**********************************************************************//**
400Insert a record to the dense page directory. */
401void
402page_zip_dir_insert(
403/*================*/
404 page_zip_des_t* page_zip,/*!< in/out: compressed page */
405 const byte* prev_rec,/*!< in: record after which to insert */
406 const byte* free_rec,/*!< in: record from which rec was
407 allocated, or NULL */
408 byte* rec); /*!< in: record to insert */
409
410/**********************************************************************//**
411Shift the dense page directory and the array of BLOB pointers
412when a record is deleted. */
413void
414page_zip_dir_delete(
415/*================*/
416 page_zip_des_t* page_zip, /*!< in/out: compressed page */
417 byte* rec, /*!< in: deleted record */
418 const dict_index_t* index, /*!< in: index of rec */
419 const ulint* offsets, /*!< in: rec_get_offsets(rec) */
420 const byte* free) /*!< in: previous start of
421 the free list */
422 MY_ATTRIBUTE((nonnull(1,2,3,4)));
423
424/**********************************************************************//**
425Add a slot to the dense page directory. */
426void
427page_zip_dir_add_slot(
428/*==================*/
429 page_zip_des_t* page_zip, /*!< in/out: compressed page */
430 ulint is_clustered) /*!< in: nonzero for clustered index,
431 zero for others */
432 MY_ATTRIBUTE((nonnull));
433
434/***********************************************************//**
435Parses a log record of writing to the header of a page.
436@return end of log record or NULL */
437byte*
438page_zip_parse_write_header(
439/*========================*/
440 byte* ptr, /*!< in: redo log buffer */
441 byte* end_ptr,/*!< in: redo log buffer end */
442 page_t* page, /*!< in/out: uncompressed page */
443 page_zip_des_t* page_zip);/*!< in/out: compressed page */
444
445/**********************************************************************//**
446Write data to the uncompressed header portion of a page. The data must
447already have been written to the uncompressed page.
448However, the data portion of the uncompressed page may differ from
449the compressed page when a record is being inserted in
450page_cur_insert_rec_low(). */
451UNIV_INLINE
452void
453page_zip_write_header(
454/*==================*/
455 page_zip_des_t* page_zip,/*!< in/out: compressed page */
456 const byte* str, /*!< in: address on the uncompressed page */
457 ulint length, /*!< in: length of the data */
458 mtr_t* mtr) /*!< in: mini-transaction, or NULL */
459 MY_ATTRIBUTE((nonnull(1,2)));
460
461/**********************************************************************//**
462Reorganize and compress a page. This is a low-level operation for
463compressed pages, to be used when page_zip_compress() fails.
464On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
465The function btr_page_reorganize() should be preferred whenever possible.
466IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
467non-clustered index, the caller must update the insert buffer free
468bits in the same mini-transaction in such a way that the modification
469will be redo-logged.
470@return TRUE on success, FALSE on failure; page_zip will be left
471intact on failure, but page will be overwritten. */
472ibool
473page_zip_reorganize(
474/*================*/
475 buf_block_t* block, /*!< in/out: page with compressed page;
476 on the compressed page, in: size;
477 out: data, n_blobs,
478 m_start, m_end, m_nonempty */
479 dict_index_t* index, /*!< in: index of the B-tree node */
480 mtr_t* mtr) /*!< in: mini-transaction */
481 MY_ATTRIBUTE((nonnull));
482
483/**********************************************************************//**
484Copy the records of a page byte for byte. Do not copy the page header
485or trailer, except those B-tree header fields that are directly
486related to the storage of records. Also copy PAGE_MAX_TRX_ID.
487NOTE: The caller must update the lock table and the adaptive hash index. */
488void
489page_zip_copy_recs(
490/*===============*/
491 page_zip_des_t* page_zip, /*!< out: copy of src_zip
492 (n_blobs, m_start, m_end,
493 m_nonempty, data[0..size-1]) */
494 page_t* page, /*!< out: copy of src */
495 const page_zip_des_t* src_zip, /*!< in: compressed page */
496 const page_t* src, /*!< in: page */
497 dict_index_t* index, /*!< in: index of the B-tree */
498 mtr_t* mtr); /*!< in: mini-transaction */
499
500/**********************************************************************//**
501Parses a log record of compressing an index page.
502@return end of log record or NULL */
503byte*
504page_zip_parse_compress(
505/*====================*/
506 byte* ptr, /*!< in: buffer */
507 byte* end_ptr, /*!< in: buffer end */
508 page_t* page, /*!< out: uncompressed page */
509 page_zip_des_t* page_zip); /*!< out: compressed page */
510
511#endif /* !UNIV_INNOCHECKSUM */
512
513/** Calculate the compressed page checksum.
514@param[in] data compressed page
515@param[in] size size of compressed page
516@param[in] algo algorithm to use
517@param[in] use_legacy_big_endian only used if algo is
518SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
519then use big endian byteorder when converting byte strings to integers.
520@return page checksum */
521uint32_t
522page_zip_calc_checksum(
523 const void* data,
524 ulint size,
525 srv_checksum_algorithm_t algo,
526 bool use_legacy_big_endian = false);
527
528/**********************************************************************//**
529Verify a compressed page's checksum.
530@return TRUE if the stored checksum is valid according to the value of
531innodb_checksum_algorithm */
532ibool
533page_zip_verify_checksum(
534/*=====================*/
535 const void* data, /*!< in: compressed page */
536 ulint size); /*!< in: size of compressed page */
537
538#ifndef UNIV_INNOCHECKSUM
539/**********************************************************************//**
540Write a log record of compressing an index page without the data on the page. */
541UNIV_INLINE
542void
543page_zip_compress_write_log_no_data(
544/*================================*/
545 ulint level, /*!< in: compression level */
546 const page_t* page, /*!< in: page that is compressed */
547 dict_index_t* index, /*!< in: index */
548 mtr_t* mtr); /*!< in: mtr */
549/**********************************************************************//**
550Parses a log record of compressing an index page without the data.
551@return end of log record or NULL */
552UNIV_INLINE
553byte*
554page_zip_parse_compress_no_data(
555/*============================*/
556 byte* ptr, /*!< in: buffer */
557 byte* end_ptr, /*!< in: buffer end */
558 page_t* page, /*!< in: uncompressed page */
559 page_zip_des_t* page_zip, /*!< out: compressed page */
560 dict_index_t* index) /*!< in: index */
561 MY_ATTRIBUTE((nonnull(1,2)));
562
563/**********************************************************************//**
564Reset the counters used for filling
565INFORMATION_SCHEMA.innodb_cmp_per_index. */
566UNIV_INLINE
567void
568page_zip_reset_stat_per_index();
569/*===========================*/
570
571#ifdef UNIV_MATERIALIZE
572# undef UNIV_INLINE
573# define UNIV_INLINE UNIV_INLINE_ORIGINAL
574#endif
575
576#include "page0zip.ic"
577#endif /* !UNIV_INNOCHECKSUM */
578
579#endif /* page0zip_h */
580