1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2012, Facebook Inc. |
5 | Copyright (c) 2014, 2018, MariaDB Corporation. |
6 | |
7 | This program is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free Software |
9 | Foundation; version 2 of the License. |
10 | |
11 | This program is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License along with |
16 | this program; if not, write to the Free Software Foundation, Inc., |
17 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
18 | |
19 | *****************************************************************************/ |
20 | |
21 | /**************************************************//** |
22 | @file page/page0zip.cc |
23 | Compressed page interface |
24 | |
25 | Created June 2005 by Marko Makela |
26 | *******************************************************/ |
27 | |
28 | #include "page0size.h" |
29 | #include "page0zip.h" |
30 | |
31 | /** A BLOB field reference full of zero, for use in assertions and tests. |
32 | Initially, BLOB field references are set to zero, in |
33 | dtuple_convert_big_rec(). */ |
34 | const byte field_ref_zero[FIELD_REF_SIZE] = { |
35 | 0, 0, 0, 0, 0, |
36 | 0, 0, 0, 0, 0, |
37 | 0, 0, 0, 0, 0, |
38 | 0, 0, 0, 0, 0, |
39 | }; |
40 | |
41 | #ifndef UNIV_INNOCHECKSUM |
42 | #include "page0page.h" |
43 | #include "mtr0log.h" |
44 | #include "dict0dict.h" |
45 | #include "btr0cur.h" |
46 | #include "page0types.h" |
47 | #include "log0recv.h" |
48 | #include "row0row.h" |
49 | #include "row0trunc.h" |
50 | #include "zlib.h" |
51 | #include "buf0buf.h" |
52 | #include "buf0types.h" |
53 | #include "buf0checksum.h" |
54 | #include "btr0sea.h" |
55 | #include "dict0boot.h" |
56 | #include "lock0lock.h" |
57 | #include "srv0srv.h" |
58 | #include "buf0lru.h" |
59 | #include "srv0mon.h" |
60 | #include "ut0crc32.h" |
61 | |
62 | #include <map> |
63 | #include <algorithm> |
64 | |
65 | /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */ |
66 | page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; |
67 | /** Statistics on compression, indexed by index->id */ |
68 | page_zip_stat_per_index_t page_zip_stat_per_index; |
69 | |
70 | /** Compression level to be used by zlib. Settable by user. */ |
71 | uint page_zip_level; |
72 | |
73 | /** Whether or not to log compressed page images to avoid possible |
74 | compression algorithm changes in zlib. */ |
75 | my_bool page_zip_log_pages; |
76 | |
77 | /* Please refer to ../include/page0zip.ic for a description of the |
78 | compressed page format. */ |
79 | |
80 | /* The infimum and supremum records are omitted from the compressed page. |
81 | On compress, we compare that the records are there, and on uncompress we |
82 | restore the records. */ |
83 | /** Extra bytes of an infimum record */ |
84 | static const byte [] = { |
85 | 0x01, /* info_bits=0, n_owned=1 */ |
86 | 0x00, 0x02 /* heap_no=0, status=2 */ |
87 | /* ?, ? */ /* next=(first user rec, or supremum) */ |
88 | }; |
89 | /** Data bytes of an infimum record */ |
90 | static const byte infimum_data[] = { |
91 | 0x69, 0x6e, 0x66, 0x69, |
92 | 0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */ |
93 | }; |
94 | /** Extra bytes and data bytes of a supremum record */ |
95 | static const byte [] = { |
96 | /* 0x0?, */ /* info_bits=0, n_owned=1..8 */ |
97 | 0x00, 0x0b, /* heap_no=1, status=3 */ |
98 | 0x00, 0x00, /* next=0 */ |
99 | 0x73, 0x75, 0x70, 0x72, |
100 | 0x65, 0x6d, 0x75, 0x6d /* "supremum" */ |
101 | }; |
102 | |
103 | /** Assert that a block of memory is filled with zero bytes. |
104 | Compare at most sizeof(field_ref_zero) bytes. |
105 | @param b in: memory block |
106 | @param s in: size of the memory block, in bytes */ |
107 | #define ASSERT_ZERO(b, s) \ |
108 | ut_ad(!memcmp(b, field_ref_zero, \ |
109 | ut_min(static_cast<size_t>(s), sizeof field_ref_zero))); |
110 | /** Assert that a BLOB pointer is filled with zero bytes. |
111 | @param b in: BLOB pointer */ |
112 | #define ASSERT_ZERO_BLOB(b) \ |
113 | ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero)) |
114 | |
115 | /* Enable some extra debugging output. This code can be enabled |
116 | independently of any UNIV_ debugging conditions. */ |
117 | #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
118 | # include <stdarg.h> |
119 | MY_ATTRIBUTE((format (printf, 1, 2))) |
120 | /**********************************************************************//** |
121 | Report a failure to decompress or compress. |
122 | @return number of characters printed */ |
123 | static |
124 | int |
125 | page_zip_fail_func( |
126 | /*===============*/ |
127 | const char* fmt, /*!< in: printf(3) format string */ |
128 | ...) /*!< in: arguments corresponding to fmt */ |
129 | { |
130 | int res; |
131 | va_list ap; |
132 | |
133 | ut_print_timestamp(stderr); |
134 | fputs(" InnoDB: " , stderr); |
135 | va_start(ap, fmt); |
136 | res = vfprintf(stderr, fmt, ap); |
137 | va_end(ap); |
138 | |
139 | return(res); |
140 | } |
141 | /** Wrapper for page_zip_fail_func() |
142 | @param fmt_args in: printf(3) format string and arguments */ |
143 | # define page_zip_fail(fmt_args) page_zip_fail_func fmt_args |
144 | #else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
145 | /** Dummy wrapper for page_zip_fail_func() |
146 | @param fmt_args ignored: printf(3) format string and arguments */ |
147 | # define page_zip_fail(fmt_args) /* empty */ |
148 | #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
149 | |
150 | /**********************************************************************//** |
151 | Determine the guaranteed free space on an empty page. |
152 | @return minimum payload size on the page */ |
153 | ulint |
154 | page_zip_empty_size( |
155 | /*================*/ |
156 | ulint n_fields, /*!< in: number of columns in the index */ |
157 | ulint zip_size) /*!< in: compressed page size in bytes */ |
158 | { |
159 | ulint size = zip_size |
160 | /* subtract the page header and the longest |
161 | uncompressed data needed for one record */ |
162 | - (PAGE_DATA |
163 | + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE |
164 | + 1/* encoded heap_no==2 in page_zip_write_rec() */ |
165 | + 1/* end of modification log */ |
166 | - REC_N_NEW_EXTRA_BYTES/* omitted bytes */) |
167 | /* subtract the space for page_zip_fields_encode() */ |
168 | - compressBound(static_cast<uLong>(2 * (n_fields + 1))); |
169 | return(lint(size) > 0 ? size : 0); |
170 | } |
171 | |
172 | /** Check whether a tuple is too big for compressed table |
173 | @param[in] index dict index object |
174 | @param[in] entry entry for the index |
175 | @return true if it's too big, otherwise false */ |
176 | bool |
177 | page_zip_is_too_big( |
178 | const dict_index_t* index, |
179 | const dtuple_t* entry) |
180 | { |
181 | const page_size_t& page_size = |
182 | dict_table_page_size(index->table); |
183 | |
184 | /* Estimate the free space of an empty compressed page. |
185 | Subtract one byte for the encoded heap_no in the |
186 | modification log. */ |
187 | ulint free_space_zip = page_zip_empty_size( |
188 | index->n_fields, page_size.physical()); |
189 | ulint n_uniq = dict_index_get_n_unique_in_tree(index); |
190 | |
191 | ut_ad(dict_table_is_comp(index->table)); |
192 | ut_ad(page_size.is_compressed()); |
193 | |
194 | if (free_space_zip == 0) { |
195 | return(true); |
196 | } |
197 | |
198 | /* Subtract one byte for the encoded heap_no in the |
199 | modification log. */ |
200 | free_space_zip--; |
201 | |
202 | /* There should be enough room for two node pointer |
203 | records on an empty non-leaf page. This prevents |
204 | infinite page splits. */ |
205 | |
206 | if (entry->n_fields >= n_uniq |
207 | && (REC_NODE_PTR_SIZE |
208 | + rec_get_converted_size_comp_prefix( |
209 | index, entry->fields, n_uniq, NULL) |
210 | /* On a compressed page, there is |
211 | a two-byte entry in the dense |
212 | page directory for every record. |
213 | But there is no record header. */ |
214 | - (REC_N_NEW_EXTRA_BYTES - 2) |
215 | > free_space_zip / 2)) { |
216 | return(true); |
217 | } |
218 | |
219 | return(false); |
220 | } |
221 | |
222 | /*************************************************************//** |
223 | Gets the number of elements in the dense page directory, |
224 | including deleted records (the free list). |
225 | @return number of elements in the dense page directory */ |
226 | UNIV_INLINE |
227 | ulint |
228 | page_zip_dir_elems( |
229 | /*===============*/ |
230 | const page_zip_des_t* page_zip) /*!< in: compressed page */ |
231 | { |
232 | /* Exclude the page infimum and supremum from the record count. */ |
233 | return ulint(page_dir_get_n_heap(page_zip->data)) |
234 | - PAGE_HEAP_NO_USER_LOW; |
235 | } |
236 | |
237 | /*************************************************************//** |
238 | Gets the size of the compressed page trailer (the dense page directory), |
239 | including deleted records (the free list). |
240 | @return length of dense page directory, in bytes */ |
241 | UNIV_INLINE |
242 | ulint |
243 | page_zip_dir_size( |
244 | /*==============*/ |
245 | const page_zip_des_t* page_zip) /*!< in: compressed page */ |
246 | { |
247 | return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip)); |
248 | } |
249 | |
250 | /*************************************************************//** |
251 | Gets an offset to the compressed page trailer (the dense page directory), |
252 | including deleted records (the free list). |
253 | @return offset of the dense page directory */ |
254 | UNIV_INLINE |
255 | ulint |
256 | page_zip_dir_start_offs( |
257 | /*====================*/ |
258 | const page_zip_des_t* page_zip, /*!< in: compressed page */ |
259 | ulint n_dense) /*!< in: directory size */ |
260 | { |
261 | ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip)); |
262 | |
263 | return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE); |
264 | } |
265 | |
266 | /*************************************************************//** |
267 | Gets a pointer to the compressed page trailer (the dense page directory), |
268 | including deleted records (the free list). |
269 | @param[in] page_zip compressed page |
270 | @param[in] n_dense number of entries in the directory |
271 | @return pointer to the dense page directory */ |
272 | #define page_zip_dir_start_low(page_zip, n_dense) \ |
273 | ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense)) |
274 | /*************************************************************//** |
275 | Gets a pointer to the compressed page trailer (the dense page directory), |
276 | including deleted records (the free list). |
277 | @param[in] page_zip compressed page |
278 | @return pointer to the dense page directory */ |
279 | #define page_zip_dir_start(page_zip) \ |
280 | page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip)) |
281 | |
282 | /*************************************************************//** |
283 | Gets the size of the compressed page trailer (the dense page directory), |
284 | only including user records (excluding the free list). |
285 | @return length of dense page directory comprising existing records, in bytes */ |
286 | UNIV_INLINE |
287 | ulint |
288 | page_zip_dir_user_size( |
289 | /*===================*/ |
290 | const page_zip_des_t* page_zip) /*!< in: compressed page */ |
291 | { |
292 | ulint size = PAGE_ZIP_DIR_SLOT_SIZE |
293 | * ulint(page_get_n_recs(page_zip->data)); |
294 | ut_ad(size <= page_zip_dir_size(page_zip)); |
295 | return(size); |
296 | } |
297 | |
298 | /*************************************************************//** |
299 | Find the slot of the given record in the dense page directory. |
300 | @return dense directory slot, or NULL if record not found */ |
301 | UNIV_INLINE |
302 | byte* |
303 | page_zip_dir_find_low( |
304 | /*==================*/ |
305 | byte* slot, /*!< in: start of records */ |
306 | byte* end, /*!< in: end of records */ |
307 | ulint offset) /*!< in: offset of user record */ |
308 | { |
309 | ut_ad(slot <= end); |
310 | |
311 | for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) { |
312 | if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK) |
313 | == offset) { |
314 | return(slot); |
315 | } |
316 | } |
317 | |
318 | return(NULL); |
319 | } |
320 | |
321 | /*************************************************************//** |
322 | Find the slot of the given non-free record in the dense page directory. |
323 | @return dense directory slot, or NULL if record not found */ |
324 | UNIV_INLINE |
325 | byte* |
326 | page_zip_dir_find( |
327 | /*==============*/ |
328 | page_zip_des_t* page_zip, /*!< in: compressed page */ |
329 | ulint offset) /*!< in: offset of user record */ |
330 | { |
331 | byte* end = page_zip->data + page_zip_get_size(page_zip); |
332 | |
333 | ut_ad(page_zip_simple_validate(page_zip)); |
334 | |
335 | return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip), |
336 | end, |
337 | offset)); |
338 | } |
339 | |
340 | /*************************************************************//** |
341 | Find the slot of the given free record in the dense page directory. |
342 | @return dense directory slot, or NULL if record not found */ |
343 | UNIV_INLINE |
344 | byte* |
345 | page_zip_dir_find_free( |
346 | /*===================*/ |
347 | page_zip_des_t* page_zip, /*!< in: compressed page */ |
348 | ulint offset) /*!< in: offset of user record */ |
349 | { |
350 | byte* end = page_zip->data + page_zip_get_size(page_zip); |
351 | |
352 | ut_ad(page_zip_simple_validate(page_zip)); |
353 | |
354 | return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip), |
355 | end - page_zip_dir_user_size(page_zip), |
356 | offset)); |
357 | } |
358 | |
359 | /*************************************************************//** |
360 | Read a given slot in the dense page directory. |
361 | @return record offset on the uncompressed page, possibly ORed with |
362 | PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */ |
363 | UNIV_INLINE |
364 | ulint |
365 | page_zip_dir_get( |
366 | /*=============*/ |
367 | const page_zip_des_t* page_zip, /*!< in: compressed page */ |
368 | ulint slot) /*!< in: slot |
369 | (0=first user record) */ |
370 | { |
371 | ut_ad(page_zip_simple_validate(page_zip)); |
372 | ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE); |
373 | return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip) |
374 | - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1))); |
375 | } |
376 | |
377 | /**********************************************************************//** |
378 | Write a log record of compressing an index page. */ |
379 | static |
380 | void |
381 | page_zip_compress_write_log( |
382 | /*========================*/ |
383 | const page_zip_des_t* page_zip,/*!< in: compressed page */ |
384 | const page_t* page, /*!< in: uncompressed page */ |
385 | dict_index_t* index, /*!< in: index of the B-tree node */ |
386 | mtr_t* mtr) /*!< in: mini-transaction */ |
387 | { |
388 | byte* log_ptr; |
389 | ulint trailer_size; |
390 | |
391 | ut_ad(!dict_index_is_ibuf(index)); |
392 | |
393 | log_ptr = mlog_open(mtr, 11 + 2 + 2); |
394 | |
395 | if (!log_ptr) { |
396 | |
397 | return; |
398 | } |
399 | |
400 | /* Read the number of user records. */ |
401 | trailer_size = ulint(page_dir_get_n_heap(page_zip->data)) |
402 | - PAGE_HEAP_NO_USER_LOW; |
403 | /* Multiply by uncompressed of size stored per record */ |
404 | if (!page_is_leaf(page)) { |
405 | trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; |
406 | } else if (dict_index_is_clust(index)) { |
407 | trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE |
408 | + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
409 | } else { |
410 | trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE; |
411 | } |
412 | /* Add the space occupied by BLOB pointers. */ |
413 | trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; |
414 | ut_a(page_zip->m_end > PAGE_DATA); |
415 | compile_time_assert(FIL_PAGE_DATA <= PAGE_DATA); |
416 | ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip)); |
417 | |
418 | log_ptr = mlog_write_initial_log_record_fast((page_t*) page, |
419 | MLOG_ZIP_PAGE_COMPRESS, |
420 | log_ptr, mtr); |
421 | mach_write_to_2(log_ptr, ulint(page_zip->m_end - FIL_PAGE_TYPE)); |
422 | log_ptr += 2; |
423 | mach_write_to_2(log_ptr, trailer_size); |
424 | log_ptr += 2; |
425 | mlog_close(mtr, log_ptr); |
426 | |
427 | /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */ |
428 | mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4); |
429 | mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4); |
430 | /* Write most of the page header, the compressed stream and |
431 | the modification log. */ |
432 | mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE, |
433 | ulint(page_zip->m_end - FIL_PAGE_TYPE)); |
434 | /* Write the uncompressed trailer of the compressed page. */ |
435 | mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip) |
436 | - trailer_size, trailer_size); |
437 | } |
438 | |
439 | /******************************************************//** |
440 | Determine how many externally stored columns are contained |
441 | in existing records with smaller heap_no than rec. */ |
442 | static |
443 | ulint |
444 | page_zip_get_n_prev_extern( |
445 | /*=======================*/ |
446 | const page_zip_des_t* page_zip,/*!< in: dense page directory on |
447 | compressed page */ |
448 | const rec_t* rec, /*!< in: compact physical record |
449 | on a B-tree leaf page */ |
450 | const dict_index_t* index) /*!< in: record descriptor */ |
451 | { |
452 | const page_t* page = page_align(rec); |
453 | ulint n_ext = 0; |
454 | ulint i; |
455 | ulint left; |
456 | ulint heap_no; |
457 | ulint n_recs = page_get_n_recs(page_zip->data); |
458 | |
459 | ut_ad(page_is_leaf(page)); |
460 | ut_ad(page_is_comp(page)); |
461 | ut_ad(dict_table_is_comp(index->table)); |
462 | ut_ad(dict_index_is_clust(index)); |
463 | ut_ad(!dict_index_is_ibuf(index)); |
464 | |
465 | heap_no = rec_get_heap_no_new(rec); |
466 | ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); |
467 | left = heap_no - PAGE_HEAP_NO_USER_LOW; |
468 | if (UNIV_UNLIKELY(!left)) { |
469 | return(0); |
470 | } |
471 | |
472 | for (i = 0; i < n_recs; i++) { |
473 | const rec_t* r = page + (page_zip_dir_get(page_zip, i) |
474 | & PAGE_ZIP_DIR_SLOT_MASK); |
475 | |
476 | if (rec_get_heap_no_new(r) < heap_no) { |
477 | n_ext += rec_get_n_extern_new(r, index, |
478 | ULINT_UNDEFINED); |
479 | if (!--left) { |
480 | break; |
481 | } |
482 | } |
483 | } |
484 | |
485 | return(n_ext); |
486 | } |
487 | |
488 | /**********************************************************************//** |
489 | Encode the length of a fixed-length column. |
490 | @return buf + length of encoded val */ |
491 | static |
492 | byte* |
493 | page_zip_fixed_field_encode( |
494 | /*========================*/ |
495 | byte* buf, /*!< in: pointer to buffer where to write */ |
496 | ulint val) /*!< in: value to write */ |
497 | { |
498 | ut_ad(val >= 2); |
499 | |
500 | if (UNIV_LIKELY(val < 126)) { |
501 | /* |
502 | 0 = nullable variable field of at most 255 bytes length; |
503 | 1 = not null variable field of at most 255 bytes length; |
504 | 126 = nullable variable field with maximum length >255; |
505 | 127 = not null variable field with maximum length >255 |
506 | */ |
507 | *buf++ = (byte) val; |
508 | } else { |
509 | *buf++ = (byte) (0x80 | val >> 8); |
510 | *buf++ = (byte) val; |
511 | } |
512 | |
513 | return(buf); |
514 | } |
515 | |
516 | /**********************************************************************//** |
517 | Write the index information for the compressed page. |
518 | @return used size of buf */ |
519 | ulint |
520 | page_zip_fields_encode( |
521 | /*===================*/ |
522 | ulint n, /*!< in: number of fields |
523 | to compress */ |
524 | const dict_index_t* index, /*!< in: index comprising |
525 | at least n fields */ |
526 | ulint trx_id_pos, |
527 | /*!< in: position of the trx_id column |
528 | in the index, or ULINT_UNDEFINED if |
529 | this is a non-leaf page */ |
530 | byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */ |
531 | { |
532 | const byte* buf_start = buf; |
533 | ulint i; |
534 | ulint col; |
535 | ulint trx_id_col = 0; |
536 | /* sum of lengths of preceding non-nullable fixed fields, or 0 */ |
537 | ulint fixed_sum = 0; |
538 | |
539 | ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n); |
540 | |
541 | for (i = col = 0; i < n; i++) { |
542 | dict_field_t* field = dict_index_get_nth_field(index, i); |
543 | ulint val; |
544 | |
545 | if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) { |
546 | val = 1; /* set the "not nullable" flag */ |
547 | } else { |
548 | val = 0; /* nullable field */ |
549 | } |
550 | |
551 | if (!field->fixed_len) { |
552 | /* variable-length field */ |
553 | const dict_col_t* column |
554 | = dict_field_get_col(field); |
555 | |
556 | if (DATA_BIG_COL(column)) { |
557 | val |= 0x7e; /* max > 255 bytes */ |
558 | } |
559 | |
560 | if (fixed_sum) { |
561 | /* write out the length of any |
562 | preceding non-nullable fields */ |
563 | buf = page_zip_fixed_field_encode( |
564 | buf, fixed_sum << 1 | 1); |
565 | fixed_sum = 0; |
566 | col++; |
567 | } |
568 | |
569 | *buf++ = (byte) val; |
570 | col++; |
571 | } else if (val) { |
572 | /* fixed-length non-nullable field */ |
573 | |
574 | if (fixed_sum && UNIV_UNLIKELY |
575 | (fixed_sum + field->fixed_len |
576 | > DICT_MAX_FIXED_COL_LEN)) { |
577 | /* Write out the length of the |
578 | preceding non-nullable fields, |
579 | to avoid exceeding the maximum |
580 | length of a fixed-length column. */ |
581 | buf = page_zip_fixed_field_encode( |
582 | buf, fixed_sum << 1 | 1); |
583 | fixed_sum = 0; |
584 | col++; |
585 | } |
586 | |
587 | if (i && UNIV_UNLIKELY(i == trx_id_pos)) { |
588 | if (fixed_sum) { |
589 | /* Write out the length of any |
590 | preceding non-nullable fields, |
591 | and start a new trx_id column. */ |
592 | buf = page_zip_fixed_field_encode( |
593 | buf, fixed_sum << 1 | 1); |
594 | col++; |
595 | } |
596 | |
597 | trx_id_col = col; |
598 | fixed_sum = field->fixed_len; |
599 | } else { |
600 | /* add to the sum */ |
601 | fixed_sum += field->fixed_len; |
602 | } |
603 | } else { |
604 | /* fixed-length nullable field */ |
605 | |
606 | if (fixed_sum) { |
607 | /* write out the length of any |
608 | preceding non-nullable fields */ |
609 | buf = page_zip_fixed_field_encode( |
610 | buf, fixed_sum << 1 | 1); |
611 | fixed_sum = 0; |
612 | col++; |
613 | } |
614 | |
615 | buf = page_zip_fixed_field_encode( |
616 | buf, ulint(field->fixed_len) << 1); |
617 | col++; |
618 | } |
619 | } |
620 | |
621 | if (fixed_sum) { |
622 | /* Write out the lengths of last fixed-length columns. */ |
623 | buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1); |
624 | } |
625 | |
626 | if (trx_id_pos != ULINT_UNDEFINED) { |
627 | /* Write out the position of the trx_id column */ |
628 | i = trx_id_col; |
629 | } else { |
630 | /* Write out the number of nullable fields */ |
631 | i = index->n_nullable; |
632 | } |
633 | |
634 | if (i < 128) { |
635 | *buf++ = (byte) i; |
636 | } else { |
637 | *buf++ = (byte) (0x80 | i >> 8); |
638 | *buf++ = (byte) i; |
639 | } |
640 | |
641 | ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2); |
642 | return((ulint) (buf - buf_start)); |
643 | } |
644 | |
645 | /**********************************************************************//** |
646 | Populate the dense page directory from the sparse directory. */ |
647 | static |
648 | void |
649 | page_zip_dir_encode( |
650 | /*================*/ |
651 | const page_t* page, /*!< in: compact page */ |
652 | byte* buf, /*!< in: pointer to dense page directory[-1]; |
653 | out: dense directory on compressed page */ |
654 | const rec_t** recs) /*!< in: pointer to an array of 0, or NULL; |
655 | out: dense page directory sorted by ascending |
656 | address (and heap_no) */ |
657 | { |
658 | const byte* rec; |
659 | ulint status; |
660 | ulint min_mark; |
661 | ulint heap_no; |
662 | ulint i; |
663 | ulint n_heap; |
664 | ulint offs; |
665 | |
666 | min_mark = 0; |
667 | |
668 | if (page_is_leaf(page)) { |
669 | status = REC_STATUS_ORDINARY; |
670 | } else { |
671 | status = REC_STATUS_NODE_PTR; |
672 | if (UNIV_UNLIKELY(!page_has_prev(page))) { |
673 | min_mark = REC_INFO_MIN_REC_FLAG; |
674 | } |
675 | } |
676 | |
677 | n_heap = page_dir_get_n_heap(page); |
678 | |
679 | /* Traverse the list of stored records in the collation order, |
680 | starting from the first user record. */ |
681 | |
682 | rec = page + PAGE_NEW_INFIMUM; |
683 | |
684 | i = 0; |
685 | |
686 | for (;;) { |
687 | ulint info_bits; |
688 | offs = rec_get_next_offs(rec, TRUE); |
689 | if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) { |
690 | break; |
691 | } |
692 | rec = page + offs; |
693 | heap_no = rec_get_heap_no_new(rec); |
694 | ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW); |
695 | ut_a(heap_no < n_heap); |
696 | ut_a(offs < srv_page_size - PAGE_DIR); |
697 | ut_a(offs >= PAGE_ZIP_START); |
698 | compile_time_assert(!(PAGE_ZIP_DIR_SLOT_MASK |
699 | & (PAGE_ZIP_DIR_SLOT_MASK + 1))); |
700 | compile_time_assert(PAGE_ZIP_DIR_SLOT_MASK |
701 | >= UNIV_ZIP_SIZE_MAX - 1); |
702 | |
703 | if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) { |
704 | offs |= PAGE_ZIP_DIR_SLOT_OWNED; |
705 | } |
706 | |
707 | info_bits = rec_get_info_bits(rec, TRUE); |
708 | if (info_bits & REC_INFO_DELETED_FLAG) { |
709 | info_bits &= ~REC_INFO_DELETED_FLAG; |
710 | offs |= PAGE_ZIP_DIR_SLOT_DEL; |
711 | } |
712 | ut_a(info_bits == min_mark); |
713 | /* Only the smallest user record can have |
714 | REC_INFO_MIN_REC_FLAG set. */ |
715 | min_mark = 0; |
716 | |
717 | mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs); |
718 | |
719 | if (UNIV_LIKELY_NULL(recs)) { |
720 | /* Ensure that each heap_no occurs at most once. */ |
721 | ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]); |
722 | /* exclude infimum and supremum */ |
723 | recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec; |
724 | } |
725 | |
726 | ut_a(ulint(rec_get_status(rec)) == status); |
727 | } |
728 | |
729 | offs = page_header_get_field(page, PAGE_FREE); |
730 | |
731 | /* Traverse the free list (of deleted records). */ |
732 | while (offs) { |
733 | ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK)); |
734 | rec = page + offs; |
735 | |
736 | heap_no = rec_get_heap_no_new(rec); |
737 | ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW); |
738 | ut_a(heap_no < n_heap); |
739 | |
740 | ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */ |
741 | ut_a(ulint(rec_get_status(rec)) == status); |
742 | |
743 | mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs); |
744 | |
745 | if (UNIV_LIKELY_NULL(recs)) { |
746 | /* Ensure that each heap_no occurs at most once. */ |
747 | ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]); |
748 | /* exclude infimum and supremum */ |
749 | recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec; |
750 | } |
751 | |
752 | offs = rec_get_next_offs(rec, TRUE); |
753 | } |
754 | |
755 | /* Ensure that each heap no occurs at least once. */ |
756 | ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap); |
757 | } |
758 | |
759 | extern "C" { |
760 | |
761 | /**********************************************************************//** |
762 | Allocate memory for zlib. */ |
763 | static |
764 | void* |
765 | page_zip_zalloc( |
766 | /*============*/ |
767 | void* opaque, /*!< in/out: memory heap */ |
768 | uInt items, /*!< in: number of items to allocate */ |
769 | uInt size) /*!< in: size of an item in bytes */ |
770 | { |
771 | return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size)); |
772 | } |
773 | |
774 | /**********************************************************************//** |
775 | Deallocate memory for zlib. */ |
776 | static |
777 | void |
778 | page_zip_free( |
779 | /*==========*/ |
780 | void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */ |
781 | void* address MY_ATTRIBUTE((unused)))/*!< in: object to free */ |
782 | { |
783 | } |
784 | |
785 | } /* extern "C" */ |
786 | |
787 | /**********************************************************************//** |
788 | Configure the zlib allocator to use the given memory heap. */ |
789 | void |
790 | page_zip_set_alloc( |
791 | /*===============*/ |
792 | void* stream, /*!< in/out: zlib stream */ |
793 | mem_heap_t* heap) /*!< in: memory heap to use */ |
794 | { |
795 | z_stream* strm = static_cast<z_stream*>(stream); |
796 | |
797 | strm->zalloc = page_zip_zalloc; |
798 | strm->zfree = page_zip_free; |
799 | strm->opaque = heap; |
800 | } |
801 | |
802 | #if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
803 | /** Symbol for enabling compression and decompression diagnostics */ |
804 | # define PAGE_ZIP_COMPRESS_DBG |
805 | #endif |
806 | |
807 | #ifdef PAGE_ZIP_COMPRESS_DBG |
808 | /** Set this variable in a debugger to enable |
809 | excessive logging in page_zip_compress(). */ |
810 | static bool page_zip_compress_dbg; |
811 | /** Set this variable in a debugger to enable |
812 | binary logging of the data passed to deflate(). |
813 | When this variable is nonzero, it will act |
814 | as a log file name generator. */ |
815 | static unsigned page_zip_compress_log; |
816 | |
817 | /**********************************************************************//** |
818 | Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set. |
819 | @return deflate() status: Z_OK, Z_BUF_ERROR, ... */ |
820 | static |
821 | int |
822 | page_zip_compress_deflate( |
823 | /*======================*/ |
824 | FILE* logfile,/*!< in: log file, or NULL */ |
825 | z_streamp strm, /*!< in/out: compressed stream for deflate() */ |
826 | int flush) /*!< in: deflate() flushing method */ |
827 | { |
828 | int status; |
829 | if (UNIV_UNLIKELY(page_zip_compress_dbg)) { |
830 | ut_print_buf(stderr, strm->next_in, strm->avail_in); |
831 | } |
832 | if (UNIV_LIKELY_NULL(logfile)) { |
833 | if (fwrite(strm->next_in, 1, strm->avail_in, logfile) |
834 | != strm->avail_in) { |
835 | perror("fwrite" ); |
836 | } |
837 | } |
838 | status = deflate(strm, flush); |
839 | if (UNIV_UNLIKELY(page_zip_compress_dbg)) { |
840 | fprintf(stderr, " -> %d\n" , status); |
841 | } |
842 | return(status); |
843 | } |
844 | |
845 | /* Redefine deflate(). */ |
846 | # undef deflate |
847 | /** Debug wrapper for the zlib compression routine deflate(). |
848 | Log the operation if page_zip_compress_dbg is set. |
849 | @param strm in/out: compressed stream |
850 | @param flush in: flushing method |
851 | @return deflate() status: Z_OK, Z_BUF_ERROR, ... */ |
852 | # define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush) |
853 | /** Declaration of the logfile parameter */ |
854 | # define FILE_LOGFILE FILE* logfile, |
855 | /** The logfile parameter */ |
856 | # define LOGFILE logfile, |
857 | #else /* PAGE_ZIP_COMPRESS_DBG */ |
858 | /** Empty declaration of the logfile parameter */ |
859 | # define FILE_LOGFILE |
860 | /** Missing logfile parameter */ |
861 | # define LOGFILE |
862 | #endif /* PAGE_ZIP_COMPRESS_DBG */ |
863 | |
864 | /**********************************************************************//** |
865 | Compress the records of a node pointer page. |
866 | @return Z_OK, or a zlib error code */ |
867 | static |
868 | int |
869 | page_zip_compress_node_ptrs( |
870 | /*========================*/ |
871 | FILE_LOGFILE |
872 | z_stream* c_stream, /*!< in/out: compressed page stream */ |
873 | const rec_t** recs, /*!< in: dense page directory |
874 | sorted by address */ |
875 | ulint n_dense, /*!< in: size of recs[] */ |
876 | dict_index_t* index, /*!< in: the index of the page */ |
877 | byte* storage, /*!< in: end of dense page directory */ |
878 | mem_heap_t* heap) /*!< in: temporary memory heap */ |
879 | { |
880 | int err = Z_OK; |
881 | ulint* offsets = NULL; |
882 | |
883 | do { |
884 | const rec_t* rec = *recs++; |
885 | |
886 | offsets = rec_get_offsets(rec, index, offsets, false, |
887 | ULINT_UNDEFINED, &heap); |
888 | /* Only leaf nodes may contain externally stored columns. */ |
889 | ut_ad(!rec_offs_any_extern(offsets)); |
890 | |
891 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
892 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
893 | rec_offs_extra_size(offsets)); |
894 | |
895 | /* Compress the extra bytes. */ |
896 | c_stream->avail_in = static_cast<uInt>( |
897 | rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in); |
898 | |
899 | if (c_stream->avail_in) { |
900 | err = deflate(c_stream, Z_NO_FLUSH); |
901 | if (UNIV_UNLIKELY(err != Z_OK)) { |
902 | break; |
903 | } |
904 | } |
905 | ut_ad(!c_stream->avail_in); |
906 | |
907 | /* Compress the data bytes, except node_ptr. */ |
908 | c_stream->next_in = (byte*) rec; |
909 | c_stream->avail_in = static_cast<uInt>( |
910 | rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE); |
911 | |
912 | if (c_stream->avail_in) { |
913 | err = deflate(c_stream, Z_NO_FLUSH); |
914 | if (UNIV_UNLIKELY(err != Z_OK)) { |
915 | break; |
916 | } |
917 | } |
918 | |
919 | ut_ad(!c_stream->avail_in); |
920 | |
921 | memcpy(storage - REC_NODE_PTR_SIZE |
922 | * (rec_get_heap_no_new(rec) - 1), |
923 | c_stream->next_in, REC_NODE_PTR_SIZE); |
924 | c_stream->next_in += REC_NODE_PTR_SIZE; |
925 | } while (--n_dense); |
926 | |
927 | return(err); |
928 | } |
929 | |
930 | /**********************************************************************//** |
931 | Compress the records of a leaf node of a secondary index. |
932 | @return Z_OK, or a zlib error code */ |
933 | static |
934 | int |
935 | page_zip_compress_sec( |
936 | /*==================*/ |
937 | FILE_LOGFILE |
938 | z_stream* c_stream, /*!< in/out: compressed page stream */ |
939 | const rec_t** recs, /*!< in: dense page directory |
940 | sorted by address */ |
941 | ulint n_dense) /*!< in: size of recs[] */ |
942 | { |
943 | int err = Z_OK; |
944 | |
945 | ut_ad(n_dense > 0); |
946 | |
947 | do { |
948 | const rec_t* rec = *recs++; |
949 | |
950 | /* Compress everything up to this record. */ |
951 | c_stream->avail_in = static_cast<uInt>( |
952 | rec - REC_N_NEW_EXTRA_BYTES |
953 | - c_stream->next_in); |
954 | |
955 | if (UNIV_LIKELY(c_stream->avail_in != 0)) { |
956 | UNIV_MEM_ASSERT_RW(c_stream->next_in, |
957 | c_stream->avail_in); |
958 | err = deflate(c_stream, Z_NO_FLUSH); |
959 | if (UNIV_UNLIKELY(err != Z_OK)) { |
960 | break; |
961 | } |
962 | } |
963 | |
964 | ut_ad(!c_stream->avail_in); |
965 | ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES); |
966 | |
967 | /* Skip the REC_N_NEW_EXTRA_BYTES. */ |
968 | |
969 | c_stream->next_in = (byte*) rec; |
970 | } while (--n_dense); |
971 | |
972 | return(err); |
973 | } |
974 | |
975 | /**********************************************************************//** |
976 | Compress a record of a leaf node of a clustered index that contains |
977 | externally stored columns. |
978 | @return Z_OK, or a zlib error code */ |
979 | static |
980 | int |
981 | page_zip_compress_clust_ext( |
982 | /*========================*/ |
983 | FILE_LOGFILE |
984 | z_stream* c_stream, /*!< in/out: compressed page stream */ |
985 | const rec_t* rec, /*!< in: record */ |
986 | const ulint* offsets, /*!< in: rec_get_offsets(rec) */ |
987 | ulint trx_id_col, /*!< in: position of of DB_TRX_ID */ |
988 | byte* deleted, /*!< in: dense directory entry pointing |
989 | to the head of the free list */ |
990 | byte* storage, /*!< in: end of dense page directory */ |
991 | byte** externs, /*!< in/out: pointer to the next |
992 | available BLOB pointer */ |
993 | ulint* n_blobs) /*!< in/out: number of |
994 | externally stored columns */ |
995 | { |
996 | int err; |
997 | ulint i; |
998 | |
999 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
1000 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
1001 | rec_offs_extra_size(offsets)); |
1002 | |
1003 | for (i = 0; i < rec_offs_n_fields(offsets); i++) { |
1004 | ulint len; |
1005 | const byte* src; |
1006 | |
1007 | if (UNIV_UNLIKELY(i == trx_id_col)) { |
1008 | ut_ad(!rec_offs_nth_extern(offsets, i)); |
1009 | /* Store trx_id and roll_ptr |
1010 | in uncompressed form. */ |
1011 | src = rec_get_nth_field(rec, offsets, i, &len); |
1012 | ut_ad(src + DATA_TRX_ID_LEN |
1013 | == rec_get_nth_field(rec, offsets, |
1014 | i + 1, &len)); |
1015 | ut_ad(len == DATA_ROLL_PTR_LEN); |
1016 | |
1017 | /* Compress any preceding bytes. */ |
1018 | c_stream->avail_in = static_cast<uInt>( |
1019 | src - c_stream->next_in); |
1020 | |
1021 | if (c_stream->avail_in) { |
1022 | err = deflate(c_stream, Z_NO_FLUSH); |
1023 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1024 | |
1025 | return(err); |
1026 | } |
1027 | } |
1028 | |
1029 | ut_ad(!c_stream->avail_in); |
1030 | ut_ad(c_stream->next_in == src); |
1031 | |
1032 | memcpy(storage |
1033 | - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
1034 | * (rec_get_heap_no_new(rec) - 1), |
1035 | c_stream->next_in, |
1036 | DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
1037 | |
1038 | c_stream->next_in |
1039 | += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
1040 | |
1041 | /* Skip also roll_ptr */ |
1042 | i++; |
1043 | } else if (rec_offs_nth_extern(offsets, i)) { |
1044 | src = rec_get_nth_field(rec, offsets, i, &len); |
1045 | ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); |
1046 | src += len - BTR_EXTERN_FIELD_REF_SIZE; |
1047 | |
1048 | c_stream->avail_in = static_cast<uInt>( |
1049 | src - c_stream->next_in); |
1050 | if (UNIV_LIKELY(c_stream->avail_in != 0)) { |
1051 | err = deflate(c_stream, Z_NO_FLUSH); |
1052 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1053 | |
1054 | return(err); |
1055 | } |
1056 | } |
1057 | |
1058 | ut_ad(!c_stream->avail_in); |
1059 | ut_ad(c_stream->next_in == src); |
1060 | |
1061 | /* Reserve space for the data at |
1062 | the end of the space reserved for |
1063 | the compressed data and the page |
1064 | modification log. */ |
1065 | |
1066 | if (UNIV_UNLIKELY |
1067 | (c_stream->avail_out |
1068 | <= BTR_EXTERN_FIELD_REF_SIZE)) { |
1069 | /* out of space */ |
1070 | return(Z_BUF_ERROR); |
1071 | } |
1072 | |
1073 | ut_ad(*externs == c_stream->next_out |
1074 | + c_stream->avail_out |
1075 | + 1/* end of modif. log */); |
1076 | |
1077 | c_stream->next_in |
1078 | += BTR_EXTERN_FIELD_REF_SIZE; |
1079 | |
1080 | /* Skip deleted records. */ |
1081 | if (UNIV_LIKELY_NULL |
1082 | (page_zip_dir_find_low( |
1083 | storage, deleted, |
1084 | page_offset(rec)))) { |
1085 | continue; |
1086 | } |
1087 | |
1088 | (*n_blobs)++; |
1089 | c_stream->avail_out |
1090 | -= BTR_EXTERN_FIELD_REF_SIZE; |
1091 | *externs -= BTR_EXTERN_FIELD_REF_SIZE; |
1092 | |
1093 | /* Copy the BLOB pointer */ |
1094 | memcpy(*externs, c_stream->next_in |
1095 | - BTR_EXTERN_FIELD_REF_SIZE, |
1096 | BTR_EXTERN_FIELD_REF_SIZE); |
1097 | } |
1098 | } |
1099 | |
1100 | return(Z_OK); |
1101 | } |
1102 | |
1103 | /**********************************************************************//** |
1104 | Compress the records of a leaf node of a clustered index. |
1105 | @return Z_OK, or a zlib error code */ |
1106 | static |
1107 | int |
1108 | page_zip_compress_clust( |
1109 | /*====================*/ |
1110 | FILE_LOGFILE |
1111 | z_stream* c_stream, /*!< in/out: compressed page stream */ |
1112 | const rec_t** recs, /*!< in: dense page directory |
1113 | sorted by address */ |
1114 | ulint n_dense, /*!< in: size of recs[] */ |
1115 | dict_index_t* index, /*!< in: the index of the page */ |
1116 | ulint* n_blobs, /*!< in: 0; out: number of |
1117 | externally stored columns */ |
1118 | ulint trx_id_col, /*!< index of the trx_id column */ |
1119 | byte* deleted, /*!< in: dense directory entry pointing |
1120 | to the head of the free list */ |
1121 | byte* storage, /*!< in: end of dense page directory */ |
1122 | mem_heap_t* heap) /*!< in: temporary memory heap */ |
1123 | { |
1124 | int err = Z_OK; |
1125 | ulint* offsets = NULL; |
1126 | /* BTR_EXTERN_FIELD_REF storage */ |
1127 | byte* externs = storage - n_dense |
1128 | * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
1129 | |
1130 | ut_ad(*n_blobs == 0); |
1131 | |
1132 | do { |
1133 | const rec_t* rec = *recs++; |
1134 | |
1135 | offsets = rec_get_offsets(rec, index, offsets, true, |
1136 | ULINT_UNDEFINED, &heap); |
1137 | ut_ad(rec_offs_n_fields(offsets) |
1138 | == dict_index_get_n_fields(index)); |
1139 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
1140 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
1141 | rec_offs_extra_size(offsets)); |
1142 | |
1143 | /* Compress the extra bytes. */ |
1144 | c_stream->avail_in = static_cast<uInt>( |
1145 | rec - REC_N_NEW_EXTRA_BYTES |
1146 | - c_stream->next_in); |
1147 | |
1148 | if (c_stream->avail_in) { |
1149 | err = deflate(c_stream, Z_NO_FLUSH); |
1150 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1151 | |
1152 | goto func_exit; |
1153 | } |
1154 | } |
1155 | ut_ad(!c_stream->avail_in); |
1156 | ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES); |
1157 | |
1158 | /* Compress the data bytes. */ |
1159 | |
1160 | c_stream->next_in = (byte*) rec; |
1161 | |
1162 | /* Check if there are any externally stored columns. |
1163 | For each externally stored column, store the |
1164 | BTR_EXTERN_FIELD_REF separately. */ |
1165 | if (rec_offs_any_extern(offsets)) { |
1166 | ut_ad(dict_index_is_clust(index)); |
1167 | |
1168 | err = page_zip_compress_clust_ext( |
1169 | LOGFILE |
1170 | c_stream, rec, offsets, trx_id_col, |
1171 | deleted, storage, &externs, n_blobs); |
1172 | |
1173 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1174 | |
1175 | goto func_exit; |
1176 | } |
1177 | } else { |
1178 | ulint len; |
1179 | const byte* src; |
1180 | |
1181 | /* Store trx_id and roll_ptr in uncompressed form. */ |
1182 | src = rec_get_nth_field(rec, offsets, |
1183 | trx_id_col, &len); |
1184 | ut_ad(src + DATA_TRX_ID_LEN |
1185 | == rec_get_nth_field(rec, offsets, |
1186 | trx_id_col + 1, &len)); |
1187 | ut_ad(len == DATA_ROLL_PTR_LEN); |
1188 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
1189 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
1190 | rec_offs_extra_size(offsets)); |
1191 | |
1192 | /* Compress any preceding bytes. */ |
1193 | c_stream->avail_in = static_cast<uInt>( |
1194 | src - c_stream->next_in); |
1195 | |
1196 | if (c_stream->avail_in) { |
1197 | err = deflate(c_stream, Z_NO_FLUSH); |
1198 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1199 | |
1200 | return(err); |
1201 | } |
1202 | } |
1203 | |
1204 | ut_ad(!c_stream->avail_in); |
1205 | ut_ad(c_stream->next_in == src); |
1206 | |
1207 | memcpy(storage |
1208 | - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
1209 | * (rec_get_heap_no_new(rec) - 1), |
1210 | c_stream->next_in, |
1211 | DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
1212 | |
1213 | c_stream->next_in |
1214 | += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
1215 | |
1216 | /* Skip also roll_ptr */ |
1217 | ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets)); |
1218 | } |
1219 | |
1220 | /* Compress the last bytes of the record. */ |
1221 | c_stream->avail_in = static_cast<uInt>( |
1222 | rec + rec_offs_data_size(offsets) - c_stream->next_in); |
1223 | |
1224 | if (c_stream->avail_in) { |
1225 | err = deflate(c_stream, Z_NO_FLUSH); |
1226 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1227 | |
1228 | goto func_exit; |
1229 | } |
1230 | } |
1231 | ut_ad(!c_stream->avail_in); |
1232 | } while (--n_dense); |
1233 | |
1234 | func_exit: |
1235 | return(err);} |
1236 | |
1237 | /**********************************************************************//** |
1238 | Compress a page. |
1239 | @return TRUE on success, FALSE on failure; page_zip will be left |
1240 | intact on failure. */ |
1241 | ibool |
1242 | page_zip_compress( |
1243 | /*==============*/ |
1244 | page_zip_des_t* page_zip, /*!< in: size; out: data, |
1245 | n_blobs, m_start, m_end, |
1246 | m_nonempty */ |
1247 | const page_t* page, /*!< in: uncompressed page */ |
1248 | dict_index_t* index, /*!< in: index of the B-tree |
1249 | node */ |
1250 | ulint level, /*!< in: commpression level */ |
1251 | const redo_page_compress_t* page_comp_info, |
1252 | /*!< in: used for applying |
1253 | TRUNCATE log |
1254 | record during recovery */ |
1255 | mtr_t* mtr) /*!< in/out: mini-transaction, |
1256 | or NULL */ |
1257 | { |
1258 | z_stream c_stream; |
1259 | int err; |
1260 | ulint n_fields; /* number of index fields |
1261 | needed */ |
1262 | byte* fields; /*!< index field information */ |
1263 | byte* buf; /*!< compressed payload of the |
1264 | page */ |
1265 | byte* buf_end; /* end of buf */ |
1266 | ulint n_dense; |
1267 | ulint slot_size; /* amount of uncompressed bytes |
1268 | per record */ |
1269 | const rec_t** recs; /*!< dense page directory, |
1270 | sorted by address */ |
1271 | mem_heap_t* heap; |
1272 | ulint trx_id_col = ULINT_UNDEFINED; |
1273 | ulint n_blobs = 0; |
1274 | byte* storage; /* storage of uncompressed |
1275 | columns */ |
1276 | index_id_t ind_id; |
1277 | uintmax_t usec = ut_time_us(NULL); |
1278 | #ifdef PAGE_ZIP_COMPRESS_DBG |
1279 | FILE* logfile = NULL; |
1280 | #endif |
1281 | /* A local copy of srv_cmp_per_index_enabled to avoid reading that |
1282 | variable multiple times in this function since it can be changed at |
1283 | anytime. */ |
1284 | my_bool cmp_per_index_enabled; |
1285 | cmp_per_index_enabled = srv_cmp_per_index_enabled; |
1286 | |
1287 | ut_a(page_is_comp(page)); |
1288 | ut_a(fil_page_index_page_check(page)); |
1289 | ut_ad(page_simple_validate_new((page_t*) page)); |
1290 | ut_ad(page_zip_simple_validate(page_zip)); |
1291 | ut_ad(!index |
1292 | || (index |
1293 | && dict_table_is_comp(index->table) |
1294 | && !dict_index_is_ibuf(index))); |
1295 | |
1296 | UNIV_MEM_ASSERT_RW(page, srv_page_size); |
1297 | |
1298 | /* Check the data that will be omitted. */ |
1299 | ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES), |
1300 | infimum_extra, sizeof infimum_extra)); |
1301 | ut_a(!memcmp(page + PAGE_NEW_INFIMUM, |
1302 | infimum_data, sizeof infimum_data)); |
1303 | ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] |
1304 | /* info_bits == 0, n_owned <= max */ |
1305 | <= PAGE_DIR_SLOT_MAX_N_OWNED); |
1306 | ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1), |
1307 | supremum_extra_data, sizeof supremum_extra_data)); |
1308 | |
1309 | if (page_is_empty(page)) { |
1310 | ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE) |
1311 | == PAGE_NEW_SUPREMUM); |
1312 | } |
1313 | |
1314 | if (truncate_t::s_fix_up_active) { |
1315 | ut_ad(page_comp_info != NULL); |
1316 | n_fields = page_comp_info->n_fields; |
1317 | ind_id = page_comp_info->index_id; |
1318 | } else { |
1319 | if (page_is_leaf(page)) { |
1320 | n_fields = dict_index_get_n_fields(index); |
1321 | } else { |
1322 | n_fields = dict_index_get_n_unique_in_tree_nonleaf(index); |
1323 | } |
1324 | ind_id = index->id; |
1325 | } |
1326 | |
1327 | /* The dense directory excludes the infimum and supremum records. */ |
1328 | n_dense = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW; |
1329 | #ifdef PAGE_ZIP_COMPRESS_DBG |
1330 | if (UNIV_UNLIKELY(page_zip_compress_dbg)) { |
1331 | ib::info() << "compress " |
1332 | << static_cast<void*>(page_zip) << " " |
1333 | << static_cast<const void*>(page) << " " |
1334 | << page_is_leaf(page) << " " |
1335 | << n_fields << " " << n_dense; |
1336 | } |
1337 | |
1338 | if (UNIV_UNLIKELY(page_zip_compress_log)) { |
1339 | /* Create a log file for every compression attempt. */ |
1340 | char logfilename[9]; |
1341 | snprintf(logfilename, sizeof logfilename, |
1342 | "%08x" , page_zip_compress_log++); |
1343 | logfile = fopen(logfilename, "wb" ); |
1344 | |
1345 | if (logfile) { |
1346 | /* Write the uncompressed page to the log. */ |
1347 | if (fwrite(page, 1, srv_page_size, logfile) |
1348 | != srv_page_size) { |
1349 | perror("fwrite" ); |
1350 | } |
1351 | /* Record the compressed size as zero. |
1352 | This will be overwritten at successful exit. */ |
1353 | putc(0, logfile); |
1354 | putc(0, logfile); |
1355 | putc(0, logfile); |
1356 | putc(0, logfile); |
1357 | } |
1358 | } |
1359 | #endif /* PAGE_ZIP_COMPRESS_DBG */ |
1360 | page_zip_stat[page_zip->ssize - 1].compressed++; |
1361 | if (cmp_per_index_enabled) { |
1362 | mutex_enter(&page_zip_stat_per_index_mutex); |
1363 | page_zip_stat_per_index[ind_id].compressed++; |
1364 | mutex_exit(&page_zip_stat_per_index_mutex); |
1365 | } |
1366 | |
1367 | if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE |
1368 | >= page_zip_get_size(page_zip))) { |
1369 | |
1370 | goto err_exit; |
1371 | } |
1372 | |
1373 | MONITOR_INC(MONITOR_PAGE_COMPRESS); |
1374 | |
1375 | /* Simulate a compression failure with a probability determined by |
1376 | innodb_simulate_comp_failures, only if the page has 2 or more |
1377 | records. */ |
1378 | |
1379 | if (srv_simulate_comp_failures |
1380 | && !dict_index_is_ibuf(index) |
1381 | && page_get_n_recs(page) >= 2 |
1382 | && ((ulint)(rand() % 100) < srv_simulate_comp_failures) |
1383 | && strcmp(index->table->name.m_name, "IBUF_DUMMY" )) { |
1384 | |
1385 | #ifdef UNIV_DEBUG |
1386 | ib::error() |
1387 | << "Simulating a compression failure" |
1388 | << " for table " << index->table->name |
1389 | << " index " |
1390 | << index->name() |
1391 | << " page " |
1392 | << page_get_page_no(page) |
1393 | << "(" |
1394 | << (page_is_leaf(page) ? "leaf" : "non-leaf" ) |
1395 | << ")" ; |
1396 | |
1397 | #endif |
1398 | |
1399 | goto err_exit; |
1400 | } |
1401 | |
1402 | heap = mem_heap_create(page_zip_get_size(page_zip) |
1403 | + n_fields * (2 + sizeof(ulint)) |
1404 | + REC_OFFS_HEADER_SIZE |
1405 | + n_dense * ((sizeof *recs) |
1406 | - PAGE_ZIP_DIR_SLOT_SIZE) |
1407 | + srv_page_size * 4 |
1408 | + (512 << MAX_MEM_LEVEL)); |
1409 | |
1410 | recs = static_cast<const rec_t**>( |
1411 | mem_heap_zalloc(heap, n_dense * sizeof *recs)); |
1412 | |
1413 | fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2)); |
1414 | |
1415 | buf = static_cast<byte*>( |
1416 | mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA)); |
1417 | |
1418 | buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA; |
1419 | |
1420 | /* Compress the data payload. */ |
1421 | page_zip_set_alloc(&c_stream, heap); |
1422 | |
1423 | err = deflateInit2(&c_stream, static_cast<int>(level), |
1424 | Z_DEFLATED, srv_page_size_shift, |
1425 | MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); |
1426 | ut_a(err == Z_OK); |
1427 | |
1428 | c_stream.next_out = buf; |
1429 | |
1430 | /* Subtract the space reserved for uncompressed data. */ |
1431 | /* Page header and the end marker of the modification log */ |
1432 | c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1); |
1433 | |
1434 | /* Dense page directory and uncompressed columns, if any */ |
1435 | if (page_is_leaf(page)) { |
1436 | if ((index && dict_index_is_clust(index)) |
1437 | || (page_comp_info |
1438 | && (page_comp_info->type & DICT_CLUSTERED))) { |
1439 | |
1440 | if (index) { |
1441 | trx_id_col = dict_index_get_sys_col_pos( |
1442 | index, DATA_TRX_ID); |
1443 | ut_ad(trx_id_col > 0); |
1444 | ut_ad(trx_id_col != ULINT_UNDEFINED); |
1445 | } else if (page_comp_info |
1446 | && (page_comp_info->type |
1447 | & DICT_CLUSTERED)) { |
1448 | trx_id_col = page_comp_info->trx_id_pos; |
1449 | } |
1450 | |
1451 | slot_size = PAGE_ZIP_DIR_SLOT_SIZE |
1452 | + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
1453 | |
1454 | } else { |
1455 | /* Signal the absence of trx_id |
1456 | in page_zip_fields_encode() */ |
1457 | if (index) { |
1458 | ut_ad(dict_index_get_sys_col_pos( |
1459 | index, DATA_TRX_ID) == ULINT_UNDEFINED); |
1460 | } |
1461 | trx_id_col = 0; |
1462 | slot_size = PAGE_ZIP_DIR_SLOT_SIZE; |
1463 | } |
1464 | } else { |
1465 | slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; |
1466 | trx_id_col = ULINT_UNDEFINED; |
1467 | } |
1468 | |
1469 | if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size |
1470 | + 6/* sizeof(zlib header and footer) */)) { |
1471 | goto zlib_error; |
1472 | } |
1473 | |
1474 | c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size); |
1475 | if (truncate_t::s_fix_up_active) { |
1476 | ut_ad(page_comp_info != NULL); |
1477 | c_stream.avail_in = static_cast<uInt>( |
1478 | page_comp_info->field_len); |
1479 | for (ulint i = 0; i < page_comp_info->field_len; i++) { |
1480 | fields[i] = page_comp_info->fields[i]; |
1481 | } |
1482 | } else { |
1483 | c_stream.avail_in = static_cast<uInt>( |
1484 | page_zip_fields_encode( |
1485 | n_fields, index, trx_id_col, fields)); |
1486 | } |
1487 | c_stream.next_in = fields; |
1488 | |
1489 | if (UNIV_LIKELY(!trx_id_col)) { |
1490 | trx_id_col = ULINT_UNDEFINED; |
1491 | } |
1492 | |
1493 | UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in); |
1494 | err = deflate(&c_stream, Z_FULL_FLUSH); |
1495 | if (err != Z_OK) { |
1496 | goto zlib_error; |
1497 | } |
1498 | |
1499 | ut_ad(!c_stream.avail_in); |
1500 | |
1501 | page_zip_dir_encode(page, buf_end, recs); |
1502 | |
1503 | c_stream.next_in = (byte*) page + PAGE_ZIP_START; |
1504 | |
1505 | storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE; |
1506 | |
1507 | /* Compress the records in heap_no order. */ |
1508 | if (UNIV_UNLIKELY(!n_dense)) { |
1509 | } else if (!page_is_leaf(page)) { |
1510 | /* This is a node pointer page. */ |
1511 | err = page_zip_compress_node_ptrs(LOGFILE |
1512 | &c_stream, recs, n_dense, |
1513 | index, storage, heap); |
1514 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1515 | goto zlib_error; |
1516 | } |
1517 | } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { |
1518 | /* This is a leaf page in a secondary index. */ |
1519 | err = page_zip_compress_sec(LOGFILE |
1520 | &c_stream, recs, n_dense); |
1521 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1522 | goto zlib_error; |
1523 | } |
1524 | } else { |
1525 | /* This is a leaf page in a clustered index. */ |
1526 | err = page_zip_compress_clust(LOGFILE |
1527 | &c_stream, recs, n_dense, |
1528 | index, &n_blobs, trx_id_col, |
1529 | buf_end - PAGE_ZIP_DIR_SLOT_SIZE |
1530 | * page_get_n_recs(page), |
1531 | storage, heap); |
1532 | if (UNIV_UNLIKELY(err != Z_OK)) { |
1533 | goto zlib_error; |
1534 | } |
1535 | } |
1536 | |
1537 | /* Finish the compression. */ |
1538 | ut_ad(!c_stream.avail_in); |
1539 | /* Compress any trailing garbage, in case the last record was |
1540 | allocated from an originally longer space on the free list, |
1541 | or the data of the last record from page_zip_compress_sec(). */ |
1542 | c_stream.avail_in = static_cast<uInt>( |
1543 | page_header_get_field(page, PAGE_HEAP_TOP) |
1544 | - (c_stream.next_in - page)); |
1545 | ut_a(c_stream.avail_in <= srv_page_size - PAGE_ZIP_START - PAGE_DIR); |
1546 | |
1547 | UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in); |
1548 | err = deflate(&c_stream, Z_FINISH); |
1549 | |
1550 | if (UNIV_UNLIKELY(err != Z_STREAM_END)) { |
1551 | zlib_error: |
1552 | deflateEnd(&c_stream); |
1553 | mem_heap_free(heap); |
1554 | err_exit: |
1555 | #ifdef PAGE_ZIP_COMPRESS_DBG |
1556 | if (logfile) { |
1557 | fclose(logfile); |
1558 | } |
1559 | #endif /* PAGE_ZIP_COMPRESS_DBG */ |
1560 | if (page_is_leaf(page) && index) { |
1561 | dict_index_zip_failure(index); |
1562 | } |
1563 | |
1564 | uintmax_t time_diff = ut_time_us(NULL) - usec; |
1565 | page_zip_stat[page_zip->ssize - 1].compressed_usec |
1566 | += time_diff; |
1567 | if (cmp_per_index_enabled) { |
1568 | mutex_enter(&page_zip_stat_per_index_mutex); |
1569 | page_zip_stat_per_index[ind_id].compressed_usec |
1570 | += time_diff; |
1571 | mutex_exit(&page_zip_stat_per_index_mutex); |
1572 | } |
1573 | return(FALSE); |
1574 | } |
1575 | |
1576 | err = deflateEnd(&c_stream); |
1577 | ut_a(err == Z_OK); |
1578 | |
1579 | ut_ad(buf + c_stream.total_out == c_stream.next_out); |
1580 | ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out); |
1581 | |
1582 | /* Valgrind believes that zlib does not initialize some bits |
1583 | in the last 7 or 8 bytes of the stream. Make Valgrind happy. */ |
1584 | UNIV_MEM_VALID(buf, c_stream.total_out); |
1585 | |
1586 | /* Zero out the area reserved for the modification log. |
1587 | Space for the end marker of the modification log is not |
1588 | included in avail_out. */ |
1589 | memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */); |
1590 | |
1591 | #ifdef UNIV_DEBUG |
1592 | page_zip->m_start = |
1593 | #endif /* UNIV_DEBUG */ |
1594 | page_zip->m_end = unsigned(PAGE_DATA + c_stream.total_out); |
1595 | page_zip->m_nonempty = FALSE; |
1596 | page_zip->n_blobs = unsigned(n_blobs); |
1597 | /* Copy those header fields that will not be written |
1598 | in buf_flush_init_for_writing() */ |
1599 | memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, |
1600 | FIL_PAGE_LSN - FIL_PAGE_PREV); |
1601 | memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2); |
1602 | memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, |
1603 | PAGE_DATA - FIL_PAGE_DATA); |
1604 | /* Copy the rest of the compressed page */ |
1605 | memcpy(page_zip->data + PAGE_DATA, buf, |
1606 | page_zip_get_size(page_zip) - PAGE_DATA); |
1607 | mem_heap_free(heap); |
1608 | #ifdef UNIV_ZIP_DEBUG |
1609 | ut_a(page_zip_validate(page_zip, page, index)); |
1610 | #endif /* UNIV_ZIP_DEBUG */ |
1611 | |
1612 | if (mtr) { |
1613 | page_zip_compress_write_log(page_zip, page, index, mtr); |
1614 | } |
1615 | |
1616 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
1617 | |
1618 | #ifdef PAGE_ZIP_COMPRESS_DBG |
1619 | if (logfile) { |
1620 | /* Record the compressed size of the block. */ |
1621 | byte sz[4]; |
1622 | mach_write_to_4(sz, c_stream.total_out); |
1623 | fseek(logfile, srv_page_size, SEEK_SET); |
1624 | if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) { |
1625 | perror("fwrite" ); |
1626 | } |
1627 | fclose(logfile); |
1628 | } |
1629 | #endif /* PAGE_ZIP_COMPRESS_DBG */ |
1630 | uintmax_t time_diff = ut_time_us(NULL) - usec; |
1631 | page_zip_stat[page_zip->ssize - 1].compressed_ok++; |
1632 | page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; |
1633 | if (cmp_per_index_enabled) { |
1634 | mutex_enter(&page_zip_stat_per_index_mutex); |
1635 | page_zip_stat_per_index[ind_id].compressed_ok++; |
1636 | page_zip_stat_per_index[ind_id].compressed_usec += time_diff; |
1637 | mutex_exit(&page_zip_stat_per_index_mutex); |
1638 | } |
1639 | |
1640 | if (page_is_leaf(page) && !truncate_t::s_fix_up_active) { |
1641 | dict_index_zip_success(index); |
1642 | } |
1643 | |
1644 | return(TRUE); |
1645 | } |
1646 | |
1647 | /**********************************************************************//** |
1648 | Deallocate the index information initialized by page_zip_fields_decode(). */ |
1649 | static |
1650 | void |
1651 | page_zip_fields_free( |
1652 | /*=================*/ |
1653 | dict_index_t* index) /*!< in: dummy index to be freed */ |
1654 | { |
1655 | if (index) { |
1656 | dict_table_t* table = index->table; |
1657 | dict_index_zip_pad_mutex_destroy(index); |
1658 | mem_heap_free(index->heap); |
1659 | |
1660 | dict_mem_table_free(table); |
1661 | } |
1662 | } |
1663 | |
1664 | /**********************************************************************//** |
1665 | Read the index information for the compressed page. |
1666 | @return own: dummy index describing the page, or NULL on error */ |
1667 | static |
1668 | dict_index_t* |
1669 | page_zip_fields_decode( |
1670 | /*===================*/ |
1671 | const byte* buf, /*!< in: index information */ |
1672 | const byte* end, /*!< in: end of buf */ |
1673 | ulint* trx_id_col,/*!< in: NULL for non-leaf pages; |
1674 | for leaf pages, pointer to where to store |
1675 | the position of the trx_id column */ |
1676 | bool is_spatial)/*< in: is spatial index or not */ |
1677 | { |
1678 | const byte* b; |
1679 | ulint n; |
1680 | ulint i; |
1681 | ulint val; |
1682 | dict_table_t* table; |
1683 | dict_index_t* index; |
1684 | |
1685 | /* Determine the number of fields. */ |
1686 | for (b = buf, n = 0; b < end; n++) { |
1687 | if (*b++ & 0x80) { |
1688 | b++; /* skip the second byte */ |
1689 | } |
1690 | } |
1691 | |
1692 | n--; /* n_nullable or trx_id */ |
1693 | |
1694 | if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) { |
1695 | |
1696 | page_zip_fail(("page_zip_fields_decode: n = %lu\n" , |
1697 | (ulong) n)); |
1698 | return(NULL); |
1699 | } |
1700 | |
1701 | if (UNIV_UNLIKELY(b > end)) { |
1702 | |
1703 | page_zip_fail(("page_zip_fields_decode: %p > %p\n" , |
1704 | (const void*) b, (const void*) end)); |
1705 | return(NULL); |
1706 | } |
1707 | |
1708 | table = dict_mem_table_create("ZIP_DUMMY" , NULL, n, 0, |
1709 | DICT_TF_COMPACT, 0); |
1710 | index = dict_mem_index_create(table, "ZIP_DUMMY" , 0, n); |
1711 | index->n_uniq = unsigned(n); |
1712 | /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ |
1713 | index->cached = TRUE; |
1714 | |
1715 | /* Initialize the fields. */ |
1716 | for (b = buf, i = 0; i < n; i++) { |
1717 | ulint mtype; |
1718 | ulint len; |
1719 | |
1720 | val = *b++; |
1721 | |
1722 | if (UNIV_UNLIKELY(val & 0x80)) { |
1723 | /* fixed length > 62 bytes */ |
1724 | val = (val & 0x7f) << 8 | *b++; |
1725 | len = val >> 1; |
1726 | mtype = DATA_FIXBINARY; |
1727 | } else if (UNIV_UNLIKELY(val >= 126)) { |
1728 | /* variable length with max > 255 bytes */ |
1729 | len = 0x7fff; |
1730 | mtype = DATA_BINARY; |
1731 | } else if (val <= 1) { |
1732 | /* variable length with max <= 255 bytes */ |
1733 | len = 0; |
1734 | mtype = DATA_BINARY; |
1735 | } else { |
1736 | /* fixed length < 62 bytes */ |
1737 | len = val >> 1; |
1738 | mtype = DATA_FIXBINARY; |
1739 | } |
1740 | |
1741 | dict_mem_table_add_col(table, NULL, NULL, mtype, |
1742 | val & 1 ? DATA_NOT_NULL : 0, len); |
1743 | dict_index_add_col(index, table, |
1744 | dict_table_get_nth_col(table, i), 0); |
1745 | } |
1746 | |
1747 | val = *b++; |
1748 | if (UNIV_UNLIKELY(val & 0x80)) { |
1749 | val = (val & 0x7f) << 8 | *b++; |
1750 | } |
1751 | |
1752 | /* Decode the position of the trx_id column. */ |
1753 | if (trx_id_col) { |
1754 | if (!val) { |
1755 | val = ULINT_UNDEFINED; |
1756 | } else if (UNIV_UNLIKELY(val >= n)) { |
1757 | page_zip_fields_free(index); |
1758 | index = NULL; |
1759 | } else { |
1760 | index->type = DICT_CLUSTERED; |
1761 | } |
1762 | |
1763 | *trx_id_col = val; |
1764 | } else { |
1765 | /* Decode the number of nullable fields. */ |
1766 | if (UNIV_UNLIKELY(index->n_nullable > val)) { |
1767 | page_zip_fields_free(index); |
1768 | index = NULL; |
1769 | } else { |
1770 | index->n_nullable = unsigned(val); |
1771 | } |
1772 | } |
1773 | |
1774 | /* ROW_FORMAT=COMPRESSED does not support instant ADD COLUMN */ |
1775 | index->n_core_fields = index->n_fields; |
1776 | index->n_core_null_bytes |
1777 | = UT_BITS_IN_BYTES(unsigned(index->n_nullable)); |
1778 | |
1779 | ut_ad(b == end); |
1780 | |
1781 | if (is_spatial) { |
1782 | index->type |= DICT_SPATIAL; |
1783 | } |
1784 | |
1785 | return(index); |
1786 | } |
1787 | |
1788 | /**********************************************************************//** |
1789 | Populate the sparse page directory from the dense directory. |
1790 | @return TRUE on success, FALSE on failure */ |
1791 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
1792 | ibool |
1793 | page_zip_dir_decode( |
1794 | /*================*/ |
1795 | const page_zip_des_t* page_zip,/*!< in: dense page directory on |
1796 | compressed page */ |
1797 | page_t* page, /*!< in: compact page with valid header; |
1798 | out: trailer and sparse page directory |
1799 | filled in */ |
1800 | rec_t** recs, /*!< out: dense page directory sorted by |
1801 | ascending address (and heap_no) */ |
1802 | ulint n_dense)/*!< in: number of user records, and |
1803 | size of recs[] */ |
1804 | { |
1805 | ulint i; |
1806 | ulint n_recs; |
1807 | byte* slot; |
1808 | |
1809 | n_recs = page_get_n_recs(page); |
1810 | |
1811 | if (UNIV_UNLIKELY(n_recs > n_dense)) { |
1812 | page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n" , |
1813 | (ulong) n_recs, (ulong) n_dense)); |
1814 | return(FALSE); |
1815 | } |
1816 | |
1817 | /* Traverse the list of stored records in the sorting order, |
1818 | starting from the first user record. */ |
1819 | |
1820 | slot = page + (srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE); |
1821 | UNIV_PREFETCH_RW(slot); |
1822 | |
1823 | /* Zero out the page trailer. */ |
1824 | memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR); |
1825 | |
1826 | mach_write_to_2(slot, PAGE_NEW_INFIMUM); |
1827 | slot -= PAGE_DIR_SLOT_SIZE; |
1828 | UNIV_PREFETCH_RW(slot); |
1829 | |
1830 | /* Initialize the sparse directory and copy the dense directory. */ |
1831 | for (i = 0; i < n_recs; i++) { |
1832 | ulint offs = page_zip_dir_get(page_zip, i); |
1833 | |
1834 | if (offs & PAGE_ZIP_DIR_SLOT_OWNED) { |
1835 | mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK); |
1836 | slot -= PAGE_DIR_SLOT_SIZE; |
1837 | UNIV_PREFETCH_RW(slot); |
1838 | } |
1839 | |
1840 | if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK) |
1841 | < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) { |
1842 | page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n" , |
1843 | (unsigned) i, (unsigned) n_recs, |
1844 | (ulong) offs)); |
1845 | return(FALSE); |
1846 | } |
1847 | |
1848 | recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK); |
1849 | } |
1850 | |
1851 | mach_write_to_2(slot, PAGE_NEW_SUPREMUM); |
1852 | { |
1853 | const page_dir_slot_t* last_slot = page_dir_get_nth_slot( |
1854 | page, page_dir_get_n_slots(page) - 1U); |
1855 | |
1856 | if (UNIV_UNLIKELY(slot != last_slot)) { |
1857 | page_zip_fail(("page_zip_dir_decode 3: %p != %p\n" , |
1858 | (const void*) slot, |
1859 | (const void*) last_slot)); |
1860 | return(FALSE); |
1861 | } |
1862 | } |
1863 | |
1864 | /* Copy the rest of the dense directory. */ |
1865 | for (; i < n_dense; i++) { |
1866 | ulint offs = page_zip_dir_get(page_zip, i); |
1867 | |
1868 | if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) { |
1869 | page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n" , |
1870 | (unsigned) i, (unsigned) n_dense, |
1871 | (ulong) offs)); |
1872 | return(FALSE); |
1873 | } |
1874 | |
1875 | recs[i] = page + offs; |
1876 | } |
1877 | |
1878 | std::sort(recs, recs + n_dense); |
1879 | return(TRUE); |
1880 | } |
1881 | |
1882 | /**********************************************************************//** |
1883 | Initialize the REC_N_NEW_EXTRA_BYTES of each record. |
1884 | @return TRUE on success, FALSE on failure */ |
1885 | static |
1886 | ibool |
1887 | ( |
1888 | /*=====================*/ |
1889 | const page_zip_des_t* page_zip,/*!< in: compressed page */ |
1890 | page_t* page, /*!< in/out: uncompressed page */ |
1891 | ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */ |
1892 | { |
1893 | ulint n; |
1894 | ulint i; |
1895 | ulint n_owned = 1; |
1896 | ulint offs; |
1897 | rec_t* rec; |
1898 | |
1899 | n = page_get_n_recs(page); |
1900 | rec = page + PAGE_NEW_INFIMUM; |
1901 | |
1902 | for (i = 0; i < n; i++) { |
1903 | offs = page_zip_dir_get(page_zip, i); |
1904 | |
1905 | if (offs & PAGE_ZIP_DIR_SLOT_DEL) { |
1906 | info_bits |= REC_INFO_DELETED_FLAG; |
1907 | } |
1908 | if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) { |
1909 | info_bits |= n_owned; |
1910 | n_owned = 1; |
1911 | } else { |
1912 | n_owned++; |
1913 | } |
1914 | offs &= PAGE_ZIP_DIR_SLOT_MASK; |
1915 | if (UNIV_UNLIKELY(offs < PAGE_ZIP_START |
1916 | + REC_N_NEW_EXTRA_BYTES)) { |
1917 | page_zip_fail(("page_zip_set_extra_bytes 1:" |
1918 | " %u %u %lx\n" , |
1919 | (unsigned) i, (unsigned) n, |
1920 | (ulong) offs)); |
1921 | return(FALSE); |
1922 | } |
1923 | |
1924 | rec_set_next_offs_new(rec, offs); |
1925 | rec = page + offs; |
1926 | rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits; |
1927 | info_bits = 0; |
1928 | } |
1929 | |
1930 | /* Set the next pointer of the last user record. */ |
1931 | rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM); |
1932 | |
1933 | /* Set n_owned of the supremum record. */ |
1934 | page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned; |
1935 | |
1936 | /* The dense directory excludes the infimum and supremum records. */ |
1937 | n = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW; |
1938 | |
1939 | if (i >= n) { |
1940 | if (UNIV_LIKELY(i == n)) { |
1941 | return(TRUE); |
1942 | } |
1943 | |
1944 | page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n" , |
1945 | (unsigned) i, (unsigned) n)); |
1946 | return(FALSE); |
1947 | } |
1948 | |
1949 | offs = page_zip_dir_get(page_zip, i); |
1950 | |
1951 | /* Set the extra bytes of deleted records on the free list. */ |
1952 | for (;;) { |
1953 | if (UNIV_UNLIKELY(!offs) |
1954 | || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) { |
1955 | |
1956 | page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n" , |
1957 | (ulong) offs)); |
1958 | return(FALSE); |
1959 | } |
1960 | |
1961 | rec = page + offs; |
1962 | rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ |
1963 | |
1964 | if (++i == n) { |
1965 | break; |
1966 | } |
1967 | |
1968 | offs = page_zip_dir_get(page_zip, i); |
1969 | rec_set_next_offs_new(rec, offs); |
1970 | } |
1971 | |
1972 | /* Terminate the free list. */ |
1973 | rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ |
1974 | rec_set_next_offs_new(rec, 0); |
1975 | |
1976 | return(TRUE); |
1977 | } |
1978 | |
1979 | /**********************************************************************//** |
1980 | Apply the modification log to a record containing externally stored |
1981 | columns. Do not copy the fields that are stored separately. |
1982 | @return pointer to modification log, or NULL on failure */ |
1983 | static |
1984 | const byte* |
1985 | page_zip_apply_log_ext( |
1986 | /*===================*/ |
1987 | rec_t* rec, /*!< in/out: record */ |
1988 | const ulint* offsets, /*!< in: rec_get_offsets(rec) */ |
1989 | ulint trx_id_col, /*!< in: position of of DB_TRX_ID */ |
1990 | const byte* data, /*!< in: modification log */ |
1991 | const byte* end) /*!< in: end of modification log */ |
1992 | { |
1993 | ulint i; |
1994 | ulint len; |
1995 | byte* next_out = rec; |
1996 | |
1997 | /* Check if there are any externally stored columns. |
1998 | For each externally stored column, skip the |
1999 | BTR_EXTERN_FIELD_REF. */ |
2000 | |
2001 | for (i = 0; i < rec_offs_n_fields(offsets); i++) { |
2002 | byte* dst; |
2003 | |
2004 | if (UNIV_UNLIKELY(i == trx_id_col)) { |
2005 | /* Skip trx_id and roll_ptr */ |
2006 | dst = rec_get_nth_field(rec, offsets, |
2007 | i, &len); |
2008 | if (UNIV_UNLIKELY(dst - next_out >= end - data) |
2009 | || UNIV_UNLIKELY |
2010 | (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) |
2011 | || rec_offs_nth_extern(offsets, i)) { |
2012 | page_zip_fail(("page_zip_apply_log_ext:" |
2013 | " trx_id len %lu," |
2014 | " %p - %p >= %p - %p\n" , |
2015 | (ulong) len, |
2016 | (const void*) dst, |
2017 | (const void*) next_out, |
2018 | (const void*) end, |
2019 | (const void*) data)); |
2020 | return(NULL); |
2021 | } |
2022 | |
2023 | memcpy(next_out, data, ulint(dst - next_out)); |
2024 | data += ulint(dst - next_out); |
2025 | next_out = dst + (DATA_TRX_ID_LEN |
2026 | + DATA_ROLL_PTR_LEN); |
2027 | } else if (rec_offs_nth_extern(offsets, i)) { |
2028 | dst = rec_get_nth_field(rec, offsets, |
2029 | i, &len); |
2030 | ut_ad(len |
2031 | >= BTR_EXTERN_FIELD_REF_SIZE); |
2032 | |
2033 | len += ulint(dst - next_out) |
2034 | - BTR_EXTERN_FIELD_REF_SIZE; |
2035 | |
2036 | if (UNIV_UNLIKELY(data + len >= end)) { |
2037 | page_zip_fail(("page_zip_apply_log_ext:" |
2038 | " ext %p+%lu >= %p\n" , |
2039 | (const void*) data, |
2040 | (ulong) len, |
2041 | (const void*) end)); |
2042 | return(NULL); |
2043 | } |
2044 | |
2045 | memcpy(next_out, data, len); |
2046 | data += len; |
2047 | next_out += len |
2048 | + BTR_EXTERN_FIELD_REF_SIZE; |
2049 | } |
2050 | } |
2051 | |
2052 | /* Copy the last bytes of the record. */ |
2053 | len = ulint(rec_get_end(rec, offsets) - next_out); |
2054 | if (UNIV_UNLIKELY(data + len >= end)) { |
2055 | page_zip_fail(("page_zip_apply_log_ext:" |
2056 | " last %p+%lu >= %p\n" , |
2057 | (const void*) data, |
2058 | (ulong) len, |
2059 | (const void*) end)); |
2060 | return(NULL); |
2061 | } |
2062 | memcpy(next_out, data, len); |
2063 | data += len; |
2064 | |
2065 | return(data); |
2066 | } |
2067 | |
2068 | /**********************************************************************//** |
2069 | Apply the modification log to an uncompressed page. |
2070 | Do not copy the fields that are stored separately. |
2071 | @return pointer to end of modification log, or NULL on failure */ |
2072 | static |
2073 | const byte* |
2074 | page_zip_apply_log( |
2075 | /*===============*/ |
2076 | const byte* data, /*!< in: modification log */ |
2077 | ulint size, /*!< in: maximum length of the log, in bytes */ |
2078 | rec_t** recs, /*!< in: dense page directory, |
2079 | sorted by address (indexed by |
2080 | heap_no - PAGE_HEAP_NO_USER_LOW) */ |
2081 | ulint n_dense,/*!< in: size of recs[] */ |
2082 | bool is_leaf,/*!< in: whether this is a leaf page */ |
2083 | ulint trx_id_col,/*!< in: column number of trx_id in the index, |
2084 | or ULINT_UNDEFINED if none */ |
2085 | ulint heap_status, |
2086 | /*!< in: heap_no and status bits for |
2087 | the next record to uncompress */ |
2088 | dict_index_t* index, /*!< in: index of the page */ |
2089 | ulint* offsets)/*!< in/out: work area for |
2090 | rec_get_offsets_reverse() */ |
2091 | { |
2092 | const byte* const end = data + size; |
2093 | |
2094 | for (;;) { |
2095 | ulint val; |
2096 | rec_t* rec; |
2097 | ulint len; |
2098 | ulint hs; |
2099 | |
2100 | val = *data++; |
2101 | if (UNIV_UNLIKELY(!val)) { |
2102 | return(data - 1); |
2103 | } |
2104 | if (val & 0x80) { |
2105 | val = (val & 0x7f) << 8 | *data++; |
2106 | if (UNIV_UNLIKELY(!val)) { |
2107 | page_zip_fail(("page_zip_apply_log:" |
2108 | " invalid val %x%x\n" , |
2109 | data[-2], data[-1])); |
2110 | return(NULL); |
2111 | } |
2112 | } |
2113 | if (UNIV_UNLIKELY(data >= end)) { |
2114 | page_zip_fail(("page_zip_apply_log: %p >= %p\n" , |
2115 | (const void*) data, |
2116 | (const void*) end)); |
2117 | return(NULL); |
2118 | } |
2119 | if (UNIV_UNLIKELY((val >> 1) > n_dense)) { |
2120 | page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n" , |
2121 | (ulong) val, (ulong) n_dense)); |
2122 | return(NULL); |
2123 | } |
2124 | |
2125 | /* Determine the heap number and status bits of the record. */ |
2126 | rec = recs[(val >> 1) - 1]; |
2127 | |
2128 | hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT; |
2129 | hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1); |
2130 | |
2131 | /* This may either be an old record that is being |
2132 | overwritten (updated in place, or allocated from |
2133 | the free list), or a new record, with the next |
2134 | available_heap_no. */ |
2135 | if (UNIV_UNLIKELY(hs > heap_status)) { |
2136 | page_zip_fail(("page_zip_apply_log: %lu > %lu\n" , |
2137 | (ulong) hs, (ulong) heap_status)); |
2138 | return(NULL); |
2139 | } else if (hs == heap_status) { |
2140 | /* A new record was allocated from the heap. */ |
2141 | if (UNIV_UNLIKELY(val & 1)) { |
2142 | /* Only existing records may be cleared. */ |
2143 | page_zip_fail(("page_zip_apply_log:" |
2144 | " attempting to create" |
2145 | " deleted rec %lu\n" , |
2146 | (ulong) hs)); |
2147 | return(NULL); |
2148 | } |
2149 | heap_status += 1 << REC_HEAP_NO_SHIFT; |
2150 | } |
2151 | |
2152 | mach_write_to_2(rec - REC_NEW_HEAP_NO, hs); |
2153 | |
2154 | if (val & 1) { |
2155 | /* Clear the data bytes of the record. */ |
2156 | mem_heap_t* heap = NULL; |
2157 | ulint* offs; |
2158 | offs = rec_get_offsets(rec, index, offsets, is_leaf, |
2159 | ULINT_UNDEFINED, &heap); |
2160 | memset(rec, 0, rec_offs_data_size(offs)); |
2161 | |
2162 | if (UNIV_LIKELY_NULL(heap)) { |
2163 | mem_heap_free(heap); |
2164 | } |
2165 | continue; |
2166 | } |
2167 | |
2168 | compile_time_assert(REC_STATUS_NODE_PTR == TRUE); |
2169 | rec_get_offsets_reverse(data, index, |
2170 | hs & REC_STATUS_NODE_PTR, |
2171 | offsets); |
2172 | rec_offs_make_valid(rec, index, is_leaf, offsets); |
2173 | |
2174 | /* Copy the extra bytes (backwards). */ |
2175 | { |
2176 | byte* start = rec_get_start(rec, offsets); |
2177 | byte* b = rec - REC_N_NEW_EXTRA_BYTES; |
2178 | while (b != start) { |
2179 | *--b = *data++; |
2180 | } |
2181 | } |
2182 | |
2183 | /* Copy the data bytes. */ |
2184 | if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) { |
2185 | /* Non-leaf nodes should not contain any |
2186 | externally stored columns. */ |
2187 | if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) { |
2188 | page_zip_fail(("page_zip_apply_log:" |
2189 | " %lu&REC_STATUS_NODE_PTR\n" , |
2190 | (ulong) hs)); |
2191 | return(NULL); |
2192 | } |
2193 | |
2194 | data = page_zip_apply_log_ext( |
2195 | rec, offsets, trx_id_col, data, end); |
2196 | |
2197 | if (UNIV_UNLIKELY(!data)) { |
2198 | return(NULL); |
2199 | } |
2200 | } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) { |
2201 | len = rec_offs_data_size(offsets) |
2202 | - REC_NODE_PTR_SIZE; |
2203 | /* Copy the data bytes, except node_ptr. */ |
2204 | if (UNIV_UNLIKELY(data + len >= end)) { |
2205 | page_zip_fail(("page_zip_apply_log:" |
2206 | " node_ptr %p+%lu >= %p\n" , |
2207 | (const void*) data, |
2208 | (ulong) len, |
2209 | (const void*) end)); |
2210 | return(NULL); |
2211 | } |
2212 | memcpy(rec, data, len); |
2213 | data += len; |
2214 | } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { |
2215 | len = rec_offs_data_size(offsets); |
2216 | |
2217 | /* Copy all data bytes of |
2218 | a record in a secondary index. */ |
2219 | if (UNIV_UNLIKELY(data + len >= end)) { |
2220 | page_zip_fail(("page_zip_apply_log:" |
2221 | " sec %p+%lu >= %p\n" , |
2222 | (const void*) data, |
2223 | (ulong) len, |
2224 | (const void*) end)); |
2225 | return(NULL); |
2226 | } |
2227 | |
2228 | memcpy(rec, data, len); |
2229 | data += len; |
2230 | } else { |
2231 | /* Skip DB_TRX_ID and DB_ROLL_PTR. */ |
2232 | ulint l = rec_get_nth_field_offs(offsets, |
2233 | trx_id_col, &len); |
2234 | byte* b; |
2235 | |
2236 | if (UNIV_UNLIKELY(data + l >= end) |
2237 | || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN |
2238 | + DATA_ROLL_PTR_LEN))) { |
2239 | page_zip_fail(("page_zip_apply_log:" |
2240 | " trx_id %p+%lu >= %p\n" , |
2241 | (const void*) data, |
2242 | (ulong) l, |
2243 | (const void*) end)); |
2244 | return(NULL); |
2245 | } |
2246 | |
2247 | /* Copy any preceding data bytes. */ |
2248 | memcpy(rec, data, l); |
2249 | data += l; |
2250 | |
2251 | /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */ |
2252 | b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2253 | len = ulint(rec_get_end(rec, offsets) - b); |
2254 | if (UNIV_UNLIKELY(data + len >= end)) { |
2255 | page_zip_fail(("page_zip_apply_log:" |
2256 | " clust %p+%lu >= %p\n" , |
2257 | (const void*) data, |
2258 | (ulong) len, |
2259 | (const void*) end)); |
2260 | return(NULL); |
2261 | } |
2262 | memcpy(b, data, len); |
2263 | data += len; |
2264 | } |
2265 | } |
2266 | } |
2267 | |
2268 | /**********************************************************************//** |
2269 | Set the heap_no in a record, and skip the fixed-size record header |
2270 | that is not included in the d_stream. |
2271 | @return TRUE on success, FALSE if d_stream does not end at rec */ |
2272 | static |
2273 | ibool |
2274 | page_zip_decompress_heap_no( |
2275 | /*========================*/ |
2276 | z_stream* d_stream, /*!< in/out: compressed page stream */ |
2277 | rec_t* rec, /*!< in/out: record */ |
2278 | ulint& heap_status) /*!< in/out: heap_no and status bits */ |
2279 | { |
2280 | if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) { |
2281 | /* n_dense has grown since the page was last compressed. */ |
2282 | return(FALSE); |
2283 | } |
2284 | |
2285 | /* Skip the REC_N_NEW_EXTRA_BYTES. */ |
2286 | d_stream->next_out = rec; |
2287 | |
2288 | /* Set heap_no and the status bits. */ |
2289 | mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status); |
2290 | heap_status += 1 << REC_HEAP_NO_SHIFT; |
2291 | return(TRUE); |
2292 | } |
2293 | |
2294 | /**********************************************************************//** |
2295 | Decompress the records of a node pointer page. |
2296 | @return TRUE on success, FALSE on failure */ |
2297 | static |
2298 | ibool |
2299 | page_zip_decompress_node_ptrs( |
2300 | /*==========================*/ |
2301 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
2302 | z_stream* d_stream, /*!< in/out: compressed page stream */ |
2303 | rec_t** recs, /*!< in: dense page directory |
2304 | sorted by address */ |
2305 | ulint n_dense, /*!< in: size of recs[] */ |
2306 | dict_index_t* index, /*!< in: the index of the page */ |
2307 | ulint* offsets, /*!< in/out: temporary offsets */ |
2308 | mem_heap_t* heap) /*!< in: temporary memory heap */ |
2309 | { |
2310 | ulint heap_status = REC_STATUS_NODE_PTR |
2311 | | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; |
2312 | ulint slot; |
2313 | const byte* storage; |
2314 | |
2315 | /* Subtract the space reserved for uncompressed data. */ |
2316 | d_stream->avail_in -= static_cast<uInt>( |
2317 | n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE)); |
2318 | |
2319 | /* Decompress the records in heap_no order. */ |
2320 | for (slot = 0; slot < n_dense; slot++) { |
2321 | rec_t* rec = recs[slot]; |
2322 | |
2323 | d_stream->avail_out = static_cast<uInt>( |
2324 | rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); |
2325 | |
2326 | ut_ad(d_stream->avail_out < srv_page_size |
2327 | - PAGE_ZIP_START - PAGE_DIR); |
2328 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2329 | case Z_STREAM_END: |
2330 | page_zip_decompress_heap_no( |
2331 | d_stream, rec, heap_status); |
2332 | goto zlib_done; |
2333 | case Z_OK: |
2334 | case Z_BUF_ERROR: |
2335 | if (!d_stream->avail_out) { |
2336 | break; |
2337 | } |
2338 | /* fall through */ |
2339 | default: |
2340 | page_zip_fail(("page_zip_decompress_node_ptrs:" |
2341 | " 1 inflate(Z_SYNC_FLUSH)=%s\n" , |
2342 | d_stream->msg)); |
2343 | goto zlib_error; |
2344 | } |
2345 | |
2346 | if (!page_zip_decompress_heap_no( |
2347 | d_stream, rec, heap_status)) { |
2348 | ut_ad(0); |
2349 | } |
2350 | |
2351 | /* Read the offsets. The status bits are needed here. */ |
2352 | offsets = rec_get_offsets(rec, index, offsets, false, |
2353 | ULINT_UNDEFINED, &heap); |
2354 | |
2355 | /* Non-leaf nodes should not have any externally |
2356 | stored columns. */ |
2357 | ut_ad(!rec_offs_any_extern(offsets)); |
2358 | |
2359 | /* Decompress the data bytes, except node_ptr. */ |
2360 | d_stream->avail_out =static_cast<uInt>( |
2361 | rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE); |
2362 | |
2363 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2364 | case Z_STREAM_END: |
2365 | goto zlib_done; |
2366 | case Z_OK: |
2367 | case Z_BUF_ERROR: |
2368 | if (!d_stream->avail_out) { |
2369 | break; |
2370 | } |
2371 | /* fall through */ |
2372 | default: |
2373 | page_zip_fail(("page_zip_decompress_node_ptrs:" |
2374 | " 2 inflate(Z_SYNC_FLUSH)=%s\n" , |
2375 | d_stream->msg)); |
2376 | goto zlib_error; |
2377 | } |
2378 | |
2379 | /* Clear the node pointer in case the record |
2380 | will be deleted and the space will be reallocated |
2381 | to a smaller record. */ |
2382 | memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE); |
2383 | d_stream->next_out += REC_NODE_PTR_SIZE; |
2384 | |
2385 | ut_ad(d_stream->next_out == rec_get_end(rec, offsets)); |
2386 | } |
2387 | |
2388 | /* Decompress any trailing garbage, in case the last record was |
2389 | allocated from an originally longer space on the free list. */ |
2390 | d_stream->avail_out = static_cast<uInt>( |
2391 | page_header_get_field(page_zip->data, PAGE_HEAP_TOP) |
2392 | - page_offset(d_stream->next_out)); |
2393 | if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size |
2394 | - PAGE_ZIP_START - PAGE_DIR)) { |
2395 | |
2396 | page_zip_fail(("page_zip_decompress_node_ptrs:" |
2397 | " avail_out = %u\n" , |
2398 | d_stream->avail_out)); |
2399 | goto zlib_error; |
2400 | } |
2401 | |
2402 | if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { |
2403 | page_zip_fail(("page_zip_decompress_node_ptrs:" |
2404 | " inflate(Z_FINISH)=%s\n" , |
2405 | d_stream->msg)); |
2406 | zlib_error: |
2407 | inflateEnd(d_stream); |
2408 | return(FALSE); |
2409 | } |
2410 | |
2411 | /* Note that d_stream->avail_out > 0 may hold here |
2412 | if the modification log is nonempty. */ |
2413 | |
2414 | zlib_done: |
2415 | if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { |
2416 | ut_error; |
2417 | } |
2418 | |
2419 | { |
2420 | page_t* page = page_align(d_stream->next_out); |
2421 | |
2422 | /* Clear the unused heap space on the uncompressed page. */ |
2423 | memset(d_stream->next_out, 0, |
2424 | ulint(page_dir_get_nth_slot(page, |
2425 | page_dir_get_n_slots(page) |
2426 | - 1U) |
2427 | - d_stream->next_out)); |
2428 | } |
2429 | |
2430 | #ifdef UNIV_DEBUG |
2431 | page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in); |
2432 | #endif /* UNIV_DEBUG */ |
2433 | |
2434 | /* Apply the modification log. */ |
2435 | { |
2436 | const byte* mod_log_ptr; |
2437 | mod_log_ptr = page_zip_apply_log(d_stream->next_in, |
2438 | d_stream->avail_in + 1, |
2439 | recs, n_dense, false, |
2440 | ULINT_UNDEFINED, heap_status, |
2441 | index, offsets); |
2442 | |
2443 | if (UNIV_UNLIKELY(!mod_log_ptr)) { |
2444 | return(FALSE); |
2445 | } |
2446 | page_zip->m_end = unsigned(mod_log_ptr - page_zip->data); |
2447 | page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; |
2448 | } |
2449 | |
2450 | if (UNIV_UNLIKELY |
2451 | (page_zip_get_trailer_len(page_zip, |
2452 | dict_index_is_clust(index)) |
2453 | + page_zip->m_end >= page_zip_get_size(page_zip))) { |
2454 | page_zip_fail(("page_zip_decompress_node_ptrs:" |
2455 | " %lu + %lu >= %lu, %lu\n" , |
2456 | (ulong) page_zip_get_trailer_len( |
2457 | page_zip, dict_index_is_clust(index)), |
2458 | (ulong) page_zip->m_end, |
2459 | (ulong) page_zip_get_size(page_zip), |
2460 | (ulong) dict_index_is_clust(index))); |
2461 | return(FALSE); |
2462 | } |
2463 | |
2464 | /* Restore the uncompressed columns in heap_no order. */ |
2465 | storage = page_zip_dir_start_low(page_zip, n_dense); |
2466 | |
2467 | for (slot = 0; slot < n_dense; slot++) { |
2468 | rec_t* rec = recs[slot]; |
2469 | |
2470 | offsets = rec_get_offsets(rec, index, offsets, false, |
2471 | ULINT_UNDEFINED, &heap); |
2472 | /* Non-leaf nodes should not have any externally |
2473 | stored columns. */ |
2474 | ut_ad(!rec_offs_any_extern(offsets)); |
2475 | storage -= REC_NODE_PTR_SIZE; |
2476 | |
2477 | memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE, |
2478 | storage, REC_NODE_PTR_SIZE); |
2479 | } |
2480 | |
2481 | return(TRUE); |
2482 | } |
2483 | |
2484 | /**********************************************************************//** |
2485 | Decompress the records of a leaf node of a secondary index. |
2486 | @return TRUE on success, FALSE on failure */ |
2487 | static |
2488 | ibool |
2489 | page_zip_decompress_sec( |
2490 | /*====================*/ |
2491 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
2492 | z_stream* d_stream, /*!< in/out: compressed page stream */ |
2493 | rec_t** recs, /*!< in: dense page directory |
2494 | sorted by address */ |
2495 | ulint n_dense, /*!< in: size of recs[] */ |
2496 | dict_index_t* index, /*!< in: the index of the page */ |
2497 | ulint* offsets) /*!< in/out: temporary offsets */ |
2498 | { |
2499 | ulint heap_status = REC_STATUS_ORDINARY |
2500 | | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; |
2501 | ulint slot; |
2502 | |
2503 | ut_a(!dict_index_is_clust(index)); |
2504 | |
2505 | /* Subtract the space reserved for uncompressed data. */ |
2506 | d_stream->avail_in -= static_cast<uint>( |
2507 | n_dense * PAGE_ZIP_DIR_SLOT_SIZE); |
2508 | |
2509 | for (slot = 0; slot < n_dense; slot++) { |
2510 | rec_t* rec = recs[slot]; |
2511 | |
2512 | /* Decompress everything up to this record. */ |
2513 | d_stream->avail_out = static_cast<uint>( |
2514 | rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); |
2515 | |
2516 | if (UNIV_LIKELY(d_stream->avail_out)) { |
2517 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2518 | case Z_STREAM_END: |
2519 | page_zip_decompress_heap_no( |
2520 | d_stream, rec, heap_status); |
2521 | goto zlib_done; |
2522 | case Z_OK: |
2523 | case Z_BUF_ERROR: |
2524 | if (!d_stream->avail_out) { |
2525 | break; |
2526 | } |
2527 | /* fall through */ |
2528 | default: |
2529 | page_zip_fail(("page_zip_decompress_sec:" |
2530 | " inflate(Z_SYNC_FLUSH)=%s\n" , |
2531 | d_stream->msg)); |
2532 | goto zlib_error; |
2533 | } |
2534 | } |
2535 | |
2536 | if (!page_zip_decompress_heap_no( |
2537 | d_stream, rec, heap_status)) { |
2538 | ut_ad(0); |
2539 | } |
2540 | } |
2541 | |
2542 | /* Decompress the data of the last record and any trailing garbage, |
2543 | in case the last record was allocated from an originally longer space |
2544 | on the free list. */ |
2545 | d_stream->avail_out = static_cast<uInt>( |
2546 | page_header_get_field(page_zip->data, PAGE_HEAP_TOP) |
2547 | - page_offset(d_stream->next_out)); |
2548 | if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size |
2549 | - PAGE_ZIP_START - PAGE_DIR)) { |
2550 | |
2551 | page_zip_fail(("page_zip_decompress_sec:" |
2552 | " avail_out = %u\n" , |
2553 | d_stream->avail_out)); |
2554 | goto zlib_error; |
2555 | } |
2556 | |
2557 | if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { |
2558 | page_zip_fail(("page_zip_decompress_sec:" |
2559 | " inflate(Z_FINISH)=%s\n" , |
2560 | d_stream->msg)); |
2561 | zlib_error: |
2562 | inflateEnd(d_stream); |
2563 | return(FALSE); |
2564 | } |
2565 | |
2566 | /* Note that d_stream->avail_out > 0 may hold here |
2567 | if the modification log is nonempty. */ |
2568 | |
2569 | zlib_done: |
2570 | if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { |
2571 | ut_error; |
2572 | } |
2573 | |
2574 | { |
2575 | page_t* page = page_align(d_stream->next_out); |
2576 | |
2577 | /* Clear the unused heap space on the uncompressed page. */ |
2578 | memset(d_stream->next_out, 0, |
2579 | ulint(page_dir_get_nth_slot(page, |
2580 | page_dir_get_n_slots(page) |
2581 | - 1U) |
2582 | - d_stream->next_out)); |
2583 | } |
2584 | |
2585 | ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in)); |
2586 | |
2587 | /* Apply the modification log. */ |
2588 | { |
2589 | const byte* mod_log_ptr; |
2590 | mod_log_ptr = page_zip_apply_log(d_stream->next_in, |
2591 | d_stream->avail_in + 1, |
2592 | recs, n_dense, true, |
2593 | ULINT_UNDEFINED, heap_status, |
2594 | index, offsets); |
2595 | |
2596 | if (UNIV_UNLIKELY(!mod_log_ptr)) { |
2597 | return(FALSE); |
2598 | } |
2599 | page_zip->m_end = unsigned(mod_log_ptr - page_zip->data); |
2600 | page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; |
2601 | } |
2602 | |
2603 | if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE) |
2604 | + page_zip->m_end >= page_zip_get_size(page_zip))) { |
2605 | |
2606 | page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n" , |
2607 | (ulong) page_zip_get_trailer_len( |
2608 | page_zip, FALSE), |
2609 | (ulong) page_zip->m_end, |
2610 | (ulong) page_zip_get_size(page_zip))); |
2611 | return(FALSE); |
2612 | } |
2613 | |
2614 | /* There are no uncompressed columns on leaf pages of |
2615 | secondary indexes. */ |
2616 | |
2617 | return(TRUE); |
2618 | } |
2619 | |
2620 | /**********************************************************************//** |
2621 | Decompress a record of a leaf node of a clustered index that contains |
2622 | externally stored columns. |
2623 | @return TRUE on success */ |
2624 | static |
2625 | ibool |
2626 | page_zip_decompress_clust_ext( |
2627 | /*==========================*/ |
2628 | z_stream* d_stream, /*!< in/out: compressed page stream */ |
2629 | rec_t* rec, /*!< in/out: record */ |
2630 | const ulint* offsets, /*!< in: rec_get_offsets(rec) */ |
2631 | ulint trx_id_col) /*!< in: position of of DB_TRX_ID */ |
2632 | { |
2633 | ulint i; |
2634 | |
2635 | for (i = 0; i < rec_offs_n_fields(offsets); i++) { |
2636 | ulint len; |
2637 | byte* dst; |
2638 | |
2639 | if (UNIV_UNLIKELY(i == trx_id_col)) { |
2640 | /* Skip trx_id and roll_ptr */ |
2641 | dst = rec_get_nth_field(rec, offsets, i, &len); |
2642 | if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN |
2643 | + DATA_ROLL_PTR_LEN)) { |
2644 | |
2645 | page_zip_fail(("page_zip_decompress_clust_ext:" |
2646 | " len[%lu] = %lu\n" , |
2647 | (ulong) i, (ulong) len)); |
2648 | return(FALSE); |
2649 | } |
2650 | |
2651 | if (rec_offs_nth_extern(offsets, i)) { |
2652 | |
2653 | page_zip_fail(("page_zip_decompress_clust_ext:" |
2654 | " DB_TRX_ID at %lu is ext\n" , |
2655 | (ulong) i)); |
2656 | return(FALSE); |
2657 | } |
2658 | |
2659 | d_stream->avail_out = static_cast<uInt>( |
2660 | dst - d_stream->next_out); |
2661 | |
2662 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2663 | case Z_STREAM_END: |
2664 | case Z_OK: |
2665 | case Z_BUF_ERROR: |
2666 | if (!d_stream->avail_out) { |
2667 | break; |
2668 | } |
2669 | /* fall through */ |
2670 | default: |
2671 | page_zip_fail(("page_zip_decompress_clust_ext:" |
2672 | " 1 inflate(Z_SYNC_FLUSH)=%s\n" , |
2673 | d_stream->msg)); |
2674 | return(FALSE); |
2675 | } |
2676 | |
2677 | ut_ad(d_stream->next_out == dst); |
2678 | |
2679 | /* Clear DB_TRX_ID and DB_ROLL_PTR in order to |
2680 | avoid uninitialized bytes in case the record |
2681 | is affected by page_zip_apply_log(). */ |
2682 | memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2683 | |
2684 | d_stream->next_out += DATA_TRX_ID_LEN |
2685 | + DATA_ROLL_PTR_LEN; |
2686 | } else if (rec_offs_nth_extern(offsets, i)) { |
2687 | dst = rec_get_nth_field(rec, offsets, i, &len); |
2688 | ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); |
2689 | dst += len - BTR_EXTERN_FIELD_REF_SIZE; |
2690 | |
2691 | d_stream->avail_out = static_cast<uInt>( |
2692 | dst - d_stream->next_out); |
2693 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2694 | case Z_STREAM_END: |
2695 | case Z_OK: |
2696 | case Z_BUF_ERROR: |
2697 | if (!d_stream->avail_out) { |
2698 | break; |
2699 | } |
2700 | /* fall through */ |
2701 | default: |
2702 | page_zip_fail(("page_zip_decompress_clust_ext:" |
2703 | " 2 inflate(Z_SYNC_FLUSH)=%s\n" , |
2704 | d_stream->msg)); |
2705 | return(FALSE); |
2706 | } |
2707 | |
2708 | ut_ad(d_stream->next_out == dst); |
2709 | |
2710 | /* Clear the BLOB pointer in case |
2711 | the record will be deleted and the |
2712 | space will not be reused. Note that |
2713 | the final initialization of the BLOB |
2714 | pointers (copying from "externs" |
2715 | or clearing) will have to take place |
2716 | only after the page modification log |
2717 | has been applied. Otherwise, we |
2718 | could end up with an uninitialized |
2719 | BLOB pointer when a record is deleted, |
2720 | reallocated and deleted. */ |
2721 | memset(d_stream->next_out, 0, |
2722 | BTR_EXTERN_FIELD_REF_SIZE); |
2723 | d_stream->next_out |
2724 | += BTR_EXTERN_FIELD_REF_SIZE; |
2725 | } |
2726 | } |
2727 | |
2728 | return(TRUE); |
2729 | } |
2730 | |
2731 | /**********************************************************************//** |
2732 | Compress the records of a leaf node of a clustered index. |
2733 | @return TRUE on success, FALSE on failure */ |
2734 | static |
2735 | ibool |
2736 | page_zip_decompress_clust( |
2737 | /*======================*/ |
2738 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
2739 | z_stream* d_stream, /*!< in/out: compressed page stream */ |
2740 | rec_t** recs, /*!< in: dense page directory |
2741 | sorted by address */ |
2742 | ulint n_dense, /*!< in: size of recs[] */ |
2743 | dict_index_t* index, /*!< in: the index of the page */ |
2744 | ulint trx_id_col, /*!< index of the trx_id column */ |
2745 | ulint* offsets, /*!< in/out: temporary offsets */ |
2746 | mem_heap_t* heap) /*!< in: temporary memory heap */ |
2747 | { |
2748 | int err; |
2749 | ulint slot; |
2750 | ulint heap_status = REC_STATUS_ORDINARY |
2751 | | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; |
2752 | const byte* storage; |
2753 | const byte* externs; |
2754 | |
2755 | ut_a(dict_index_is_clust(index)); |
2756 | |
2757 | /* Subtract the space reserved for uncompressed data. */ |
2758 | d_stream->avail_in -= static_cast<uInt>(n_dense) |
2759 | * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE); |
2760 | |
2761 | /* Decompress the records in heap_no order. */ |
2762 | for (slot = 0; slot < n_dense; slot++) { |
2763 | rec_t* rec = recs[slot]; |
2764 | |
2765 | d_stream->avail_out =static_cast<uInt>( |
2766 | rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); |
2767 | |
2768 | ut_ad(d_stream->avail_out < srv_page_size |
2769 | - PAGE_ZIP_START - PAGE_DIR); |
2770 | err = inflate(d_stream, Z_SYNC_FLUSH); |
2771 | switch (err) { |
2772 | case Z_STREAM_END: |
2773 | page_zip_decompress_heap_no( |
2774 | d_stream, rec, heap_status); |
2775 | goto zlib_done; |
2776 | case Z_OK: |
2777 | case Z_BUF_ERROR: |
2778 | if (UNIV_LIKELY(!d_stream->avail_out)) { |
2779 | break; |
2780 | } |
2781 | /* fall through */ |
2782 | default: |
2783 | page_zip_fail(("page_zip_decompress_clust:" |
2784 | " 1 inflate(Z_SYNC_FLUSH)=%s\n" , |
2785 | d_stream->msg)); |
2786 | goto zlib_error; |
2787 | } |
2788 | |
2789 | if (!page_zip_decompress_heap_no( |
2790 | d_stream, rec, heap_status)) { |
2791 | ut_ad(0); |
2792 | } |
2793 | |
2794 | /* Read the offsets. The status bits are needed here. */ |
2795 | offsets = rec_get_offsets(rec, index, offsets, true, |
2796 | ULINT_UNDEFINED, &heap); |
2797 | |
2798 | /* This is a leaf page in a clustered index. */ |
2799 | |
2800 | /* Check if there are any externally stored columns. |
2801 | For each externally stored column, restore the |
2802 | BTR_EXTERN_FIELD_REF separately. */ |
2803 | |
2804 | if (rec_offs_any_extern(offsets)) { |
2805 | if (UNIV_UNLIKELY |
2806 | (!page_zip_decompress_clust_ext( |
2807 | d_stream, rec, offsets, trx_id_col))) { |
2808 | |
2809 | goto zlib_error; |
2810 | } |
2811 | } else { |
2812 | /* Skip trx_id and roll_ptr */ |
2813 | ulint len; |
2814 | byte* dst = rec_get_nth_field(rec, offsets, |
2815 | trx_id_col, &len); |
2816 | if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN |
2817 | + DATA_ROLL_PTR_LEN)) { |
2818 | |
2819 | page_zip_fail(("page_zip_decompress_clust:" |
2820 | " len = %lu\n" , (ulong) len)); |
2821 | goto zlib_error; |
2822 | } |
2823 | |
2824 | d_stream->avail_out = static_cast<uInt>( |
2825 | dst - d_stream->next_out); |
2826 | |
2827 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2828 | case Z_STREAM_END: |
2829 | case Z_OK: |
2830 | case Z_BUF_ERROR: |
2831 | if (!d_stream->avail_out) { |
2832 | break; |
2833 | } |
2834 | /* fall through */ |
2835 | default: |
2836 | page_zip_fail(("page_zip_decompress_clust:" |
2837 | " 2 inflate(Z_SYNC_FLUSH)=%s\n" , |
2838 | d_stream->msg)); |
2839 | goto zlib_error; |
2840 | } |
2841 | |
2842 | ut_ad(d_stream->next_out == dst); |
2843 | |
2844 | /* Clear DB_TRX_ID and DB_ROLL_PTR in order to |
2845 | avoid uninitialized bytes in case the record |
2846 | is affected by page_zip_apply_log(). */ |
2847 | memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2848 | |
2849 | d_stream->next_out += DATA_TRX_ID_LEN |
2850 | + DATA_ROLL_PTR_LEN; |
2851 | } |
2852 | |
2853 | /* Decompress the last bytes of the record. */ |
2854 | d_stream->avail_out = static_cast<uInt>( |
2855 | rec_get_end(rec, offsets) - d_stream->next_out); |
2856 | |
2857 | switch (inflate(d_stream, Z_SYNC_FLUSH)) { |
2858 | case Z_STREAM_END: |
2859 | case Z_OK: |
2860 | case Z_BUF_ERROR: |
2861 | if (!d_stream->avail_out) { |
2862 | break; |
2863 | } |
2864 | /* fall through */ |
2865 | default: |
2866 | page_zip_fail(("page_zip_decompress_clust:" |
2867 | " 3 inflate(Z_SYNC_FLUSH)=%s\n" , |
2868 | d_stream->msg)); |
2869 | goto zlib_error; |
2870 | } |
2871 | } |
2872 | |
2873 | /* Decompress any trailing garbage, in case the last record was |
2874 | allocated from an originally longer space on the free list. */ |
2875 | d_stream->avail_out = static_cast<uInt>( |
2876 | page_header_get_field(page_zip->data, PAGE_HEAP_TOP) |
2877 | - page_offset(d_stream->next_out)); |
2878 | if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size |
2879 | - PAGE_ZIP_START - PAGE_DIR)) { |
2880 | |
2881 | page_zip_fail(("page_zip_decompress_clust:" |
2882 | " avail_out = %u\n" , |
2883 | d_stream->avail_out)); |
2884 | goto zlib_error; |
2885 | } |
2886 | |
2887 | if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { |
2888 | page_zip_fail(("page_zip_decompress_clust:" |
2889 | " inflate(Z_FINISH)=%s\n" , |
2890 | d_stream->msg)); |
2891 | zlib_error: |
2892 | inflateEnd(d_stream); |
2893 | return(FALSE); |
2894 | } |
2895 | |
2896 | /* Note that d_stream->avail_out > 0 may hold here |
2897 | if the modification log is nonempty. */ |
2898 | |
2899 | zlib_done: |
2900 | if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { |
2901 | ut_error; |
2902 | } |
2903 | |
2904 | { |
2905 | page_t* page = page_align(d_stream->next_out); |
2906 | |
2907 | /* Clear the unused heap space on the uncompressed page. */ |
2908 | memset(d_stream->next_out, 0, |
2909 | ulint(page_dir_get_nth_slot(page, |
2910 | page_dir_get_n_slots(page) |
2911 | - 1U) |
2912 | - d_stream->next_out)); |
2913 | } |
2914 | |
2915 | ut_d(page_zip->m_start = unsigned(PAGE_DATA + d_stream->total_in)); |
2916 | |
2917 | /* Apply the modification log. */ |
2918 | { |
2919 | const byte* mod_log_ptr; |
2920 | mod_log_ptr = page_zip_apply_log(d_stream->next_in, |
2921 | d_stream->avail_in + 1, |
2922 | recs, n_dense, true, |
2923 | trx_id_col, heap_status, |
2924 | index, offsets); |
2925 | |
2926 | if (UNIV_UNLIKELY(!mod_log_ptr)) { |
2927 | return(FALSE); |
2928 | } |
2929 | page_zip->m_end = unsigned(mod_log_ptr - page_zip->data); |
2930 | page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; |
2931 | } |
2932 | |
2933 | if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE) |
2934 | + page_zip->m_end >= page_zip_get_size(page_zip))) { |
2935 | |
2936 | page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n" , |
2937 | (ulong) page_zip_get_trailer_len( |
2938 | page_zip, TRUE), |
2939 | (ulong) page_zip->m_end, |
2940 | (ulong) page_zip_get_size(page_zip))); |
2941 | return(FALSE); |
2942 | } |
2943 | |
2944 | storage = page_zip_dir_start_low(page_zip, n_dense); |
2945 | |
2946 | externs = storage - n_dense |
2947 | * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2948 | |
2949 | /* Restore the uncompressed columns in heap_no order. */ |
2950 | |
2951 | for (slot = 0; slot < n_dense; slot++) { |
2952 | ulint i; |
2953 | ulint len; |
2954 | byte* dst; |
2955 | rec_t* rec = recs[slot]; |
2956 | bool exists = !page_zip_dir_find_free( |
2957 | page_zip, page_offset(rec)); |
2958 | offsets = rec_get_offsets(rec, index, offsets, true, |
2959 | ULINT_UNDEFINED, &heap); |
2960 | |
2961 | dst = rec_get_nth_field(rec, offsets, |
2962 | trx_id_col, &len); |
2963 | ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2964 | storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
2965 | memcpy(dst, storage, |
2966 | DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
2967 | |
2968 | /* Check if there are any externally stored |
2969 | columns in this record. For each externally |
2970 | stored column, restore or clear the |
2971 | BTR_EXTERN_FIELD_REF. */ |
2972 | if (!rec_offs_any_extern(offsets)) { |
2973 | continue; |
2974 | } |
2975 | |
2976 | for (i = 0; i < rec_offs_n_fields(offsets); i++) { |
2977 | if (!rec_offs_nth_extern(offsets, i)) { |
2978 | continue; |
2979 | } |
2980 | dst = rec_get_nth_field(rec, offsets, i, &len); |
2981 | |
2982 | if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) { |
2983 | page_zip_fail(("page_zip_decompress_clust:" |
2984 | " %lu < 20\n" , |
2985 | (ulong) len)); |
2986 | return(FALSE); |
2987 | } |
2988 | |
2989 | dst += len - BTR_EXTERN_FIELD_REF_SIZE; |
2990 | |
2991 | if (UNIV_LIKELY(exists)) { |
2992 | /* Existing record: |
2993 | restore the BLOB pointer */ |
2994 | externs -= BTR_EXTERN_FIELD_REF_SIZE; |
2995 | |
2996 | if (UNIV_UNLIKELY |
2997 | (externs < page_zip->data |
2998 | + page_zip->m_end)) { |
2999 | page_zip_fail(("page_zip_" |
3000 | "decompress_clust:" |
3001 | " %p < %p + %lu\n" , |
3002 | (const void*) externs, |
3003 | (const void*) |
3004 | page_zip->data, |
3005 | (ulong) |
3006 | page_zip->m_end)); |
3007 | return(FALSE); |
3008 | } |
3009 | |
3010 | memcpy(dst, externs, |
3011 | BTR_EXTERN_FIELD_REF_SIZE); |
3012 | |
3013 | page_zip->n_blobs++; |
3014 | } else { |
3015 | /* Deleted record: |
3016 | clear the BLOB pointer */ |
3017 | memset(dst, 0, |
3018 | BTR_EXTERN_FIELD_REF_SIZE); |
3019 | } |
3020 | } |
3021 | } |
3022 | |
3023 | return(TRUE); |
3024 | } |
3025 | |
3026 | /**********************************************************************//** |
3027 | Decompress a page. This function should tolerate errors on the compressed |
3028 | page. Instead of letting assertions fail, it will return FALSE if an |
3029 | inconsistency is detected. |
3030 | @return TRUE on success, FALSE on failure */ |
3031 | static |
3032 | ibool |
3033 | page_zip_decompress_low( |
3034 | /*====================*/ |
3035 | page_zip_des_t* page_zip,/*!< in: data, ssize; |
3036 | out: m_start, m_end, m_nonempty, n_blobs */ |
3037 | page_t* page, /*!< out: uncompressed page, may be trashed */ |
3038 | ibool all) /*!< in: TRUE=decompress the whole page; |
3039 | FALSE=verify but do not copy some |
3040 | page header fields that should not change |
3041 | after page creation */ |
3042 | { |
3043 | z_stream d_stream; |
3044 | dict_index_t* index = NULL; |
3045 | rec_t** recs; /*!< dense page directory, sorted by address */ |
3046 | ulint n_dense;/* number of user records on the page */ |
3047 | ulint trx_id_col = ULINT_UNDEFINED; |
3048 | mem_heap_t* heap; |
3049 | ulint* offsets; |
3050 | |
3051 | ut_ad(page_zip_simple_validate(page_zip)); |
3052 | UNIV_MEM_ASSERT_W(page, srv_page_size); |
3053 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
3054 | |
3055 | /* The dense directory excludes the infimum and supremum records. */ |
3056 | n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW; |
3057 | if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE |
3058 | >= page_zip_get_size(page_zip))) { |
3059 | page_zip_fail(("page_zip_decompress 1: %lu %lu\n" , |
3060 | (ulong) n_dense, |
3061 | (ulong) page_zip_get_size(page_zip))); |
3062 | return(FALSE); |
3063 | } |
3064 | |
3065 | heap = mem_heap_create(n_dense * (3 * sizeof *recs) + srv_page_size); |
3066 | |
3067 | recs = static_cast<rec_t**>( |
3068 | mem_heap_alloc(heap, n_dense * sizeof *recs)); |
3069 | |
3070 | if (all) { |
3071 | /* Copy the page header. */ |
3072 | memcpy(page, page_zip->data, PAGE_DATA); |
3073 | } else { |
3074 | /* Check that the bytes that we skip are identical. */ |
3075 | #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
3076 | ut_a(!memcmp(FIL_PAGE_TYPE + page, |
3077 | FIL_PAGE_TYPE + page_zip->data, |
3078 | PAGE_HEADER - FIL_PAGE_TYPE)); |
3079 | ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page, |
3080 | PAGE_HEADER + PAGE_LEVEL + page_zip->data, |
3081 | PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL))); |
3082 | #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
3083 | |
3084 | /* Copy the mutable parts of the page header. */ |
3085 | memcpy(page, page_zip->data, FIL_PAGE_TYPE); |
3086 | memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data, |
3087 | PAGE_LEVEL - PAGE_N_DIR_SLOTS); |
3088 | |
3089 | #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
3090 | /* Check that the page headers match after copying. */ |
3091 | ut_a(!memcmp(page, page_zip->data, PAGE_DATA)); |
3092 | #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
3093 | } |
3094 | |
3095 | #ifdef UNIV_ZIP_DEBUG |
3096 | /* Clear the uncompressed page, except the header. */ |
3097 | memset(PAGE_DATA + page, 0x55, srv_page_size - PAGE_DATA); |
3098 | #endif /* UNIV_ZIP_DEBUG */ |
3099 | UNIV_MEM_INVALID(PAGE_DATA + page, srv_page_size - PAGE_DATA); |
3100 | |
3101 | /* Copy the page directory. */ |
3102 | if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs, |
3103 | n_dense))) { |
3104 | zlib_error: |
3105 | mem_heap_free(heap); |
3106 | return(FALSE); |
3107 | } |
3108 | |
3109 | /* Copy the infimum and supremum records. */ |
3110 | memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES), |
3111 | infimum_extra, sizeof infimum_extra); |
3112 | if (page_is_empty(page)) { |
3113 | rec_set_next_offs_new(page + PAGE_NEW_INFIMUM, |
3114 | PAGE_NEW_SUPREMUM); |
3115 | } else { |
3116 | rec_set_next_offs_new(page + PAGE_NEW_INFIMUM, |
3117 | page_zip_dir_get(page_zip, 0) |
3118 | & PAGE_ZIP_DIR_SLOT_MASK); |
3119 | } |
3120 | memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data); |
3121 | memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1), |
3122 | supremum_extra_data, sizeof supremum_extra_data); |
3123 | |
3124 | page_zip_set_alloc(&d_stream, heap); |
3125 | |
3126 | d_stream.next_in = page_zip->data + PAGE_DATA; |
3127 | /* Subtract the space reserved for |
3128 | the page header and the end marker of the modification log. */ |
3129 | d_stream.avail_in = static_cast<uInt>( |
3130 | page_zip_get_size(page_zip) - (PAGE_DATA + 1)); |
3131 | d_stream.next_out = page + PAGE_ZIP_START; |
3132 | d_stream.avail_out = uInt(srv_page_size - PAGE_ZIP_START); |
3133 | |
3134 | if (UNIV_UNLIKELY(inflateInit2(&d_stream, srv_page_size_shift) |
3135 | != Z_OK)) { |
3136 | ut_error; |
3137 | } |
3138 | |
3139 | /* Decode the zlib header and the index information. */ |
3140 | if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { |
3141 | |
3142 | page_zip_fail(("page_zip_decompress:" |
3143 | " 1 inflate(Z_BLOCK)=%s\n" , d_stream.msg)); |
3144 | goto zlib_error; |
3145 | } |
3146 | |
3147 | if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { |
3148 | |
3149 | page_zip_fail(("page_zip_decompress:" |
3150 | " 2 inflate(Z_BLOCK)=%s\n" , d_stream.msg)); |
3151 | goto zlib_error; |
3152 | } |
3153 | |
3154 | index = page_zip_fields_decode( |
3155 | page + PAGE_ZIP_START, d_stream.next_out, |
3156 | page_is_leaf(page) ? &trx_id_col : NULL, |
3157 | fil_page_get_type(page) == FIL_PAGE_RTREE); |
3158 | |
3159 | if (UNIV_UNLIKELY(!index)) { |
3160 | |
3161 | goto zlib_error; |
3162 | } |
3163 | |
3164 | /* Decompress the user records. */ |
3165 | page_zip->n_blobs = 0; |
3166 | d_stream.next_out = page + PAGE_ZIP_START; |
3167 | |
3168 | { |
3169 | /* Pre-allocate the offsets for rec_get_offsets_reverse(). */ |
3170 | ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE |
3171 | + dict_index_get_n_fields(index); |
3172 | |
3173 | offsets = static_cast<ulint*>( |
3174 | mem_heap_alloc(heap, n * sizeof(ulint))); |
3175 | |
3176 | *offsets = n; |
3177 | } |
3178 | |
3179 | /* Decompress the records in heap_no order. */ |
3180 | if (!page_is_leaf(page)) { |
3181 | /* This is a node pointer page. */ |
3182 | ulint info_bits; |
3183 | |
3184 | if (UNIV_UNLIKELY |
3185 | (!page_zip_decompress_node_ptrs(page_zip, &d_stream, |
3186 | recs, n_dense, index, |
3187 | offsets, heap))) { |
3188 | goto err_exit; |
3189 | } |
3190 | |
3191 | info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG; |
3192 | |
3193 | if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page, |
3194 | info_bits))) { |
3195 | goto err_exit; |
3196 | } |
3197 | } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { |
3198 | /* This is a leaf page in a secondary index. */ |
3199 | if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream, |
3200 | recs, n_dense, |
3201 | index, offsets))) { |
3202 | goto err_exit; |
3203 | } |
3204 | |
3205 | if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, |
3206 | page, 0))) { |
3207 | err_exit: |
3208 | page_zip_fields_free(index); |
3209 | mem_heap_free(heap); |
3210 | return(FALSE); |
3211 | } |
3212 | } else { |
3213 | /* This is a leaf page in a clustered index. */ |
3214 | if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip, |
3215 | &d_stream, recs, |
3216 | n_dense, index, |
3217 | trx_id_col, |
3218 | offsets, heap))) { |
3219 | goto err_exit; |
3220 | } |
3221 | |
3222 | if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, |
3223 | page, 0))) { |
3224 | goto err_exit; |
3225 | } |
3226 | } |
3227 | |
3228 | ut_a(page_is_comp(page)); |
3229 | UNIV_MEM_ASSERT_RW(page, srv_page_size); |
3230 | |
3231 | page_zip_fields_free(index); |
3232 | mem_heap_free(heap); |
3233 | |
3234 | return(TRUE); |
3235 | } |
3236 | |
3237 | /**********************************************************************//** |
3238 | Decompress a page. This function should tolerate errors on the compressed |
3239 | page. Instead of letting assertions fail, it will return FALSE if an |
3240 | inconsistency is detected. |
3241 | @return TRUE on success, FALSE on failure */ |
3242 | ibool |
3243 | page_zip_decompress( |
3244 | /*================*/ |
3245 | page_zip_des_t* page_zip,/*!< in: data, ssize; |
3246 | out: m_start, m_end, m_nonempty, n_blobs */ |
3247 | page_t* page, /*!< out: uncompressed page, may be trashed */ |
3248 | ibool all) /*!< in: TRUE=decompress the whole page; |
3249 | FALSE=verify but do not copy some |
3250 | page header fields that should not change |
3251 | after page creation */ |
3252 | { |
3253 | uintmax_t usec = ut_time_us(NULL); |
3254 | |
3255 | if (!page_zip_decompress_low(page_zip, page, all)) { |
3256 | return(FALSE); |
3257 | } |
3258 | |
3259 | uintmax_t time_diff = ut_time_us(NULL) - usec; |
3260 | page_zip_stat[page_zip->ssize - 1].decompressed++; |
3261 | page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff; |
3262 | |
3263 | index_id_t index_id = btr_page_get_index_id(page); |
3264 | |
3265 | if (srv_cmp_per_index_enabled) { |
3266 | mutex_enter(&page_zip_stat_per_index_mutex); |
3267 | page_zip_stat_per_index[index_id].decompressed++; |
3268 | page_zip_stat_per_index[index_id].decompressed_usec += time_diff; |
3269 | mutex_exit(&page_zip_stat_per_index_mutex); |
3270 | } |
3271 | |
3272 | /* Update the stat counter for LRU policy. */ |
3273 | buf_LRU_stat_inc_unzip(); |
3274 | |
3275 | MONITOR_INC(MONITOR_PAGE_DECOMPRESS); |
3276 | |
3277 | return(TRUE); |
3278 | } |
3279 | |
3280 | #ifdef UNIV_ZIP_DEBUG |
3281 | /**********************************************************************//** |
3282 | Dump a block of memory on the standard error stream. */ |
3283 | static |
3284 | void |
3285 | page_zip_hexdump_func( |
3286 | /*==================*/ |
3287 | const char* name, /*!< in: name of the data structure */ |
3288 | const void* buf, /*!< in: data */ |
3289 | ulint size) /*!< in: length of the data, in bytes */ |
3290 | { |
3291 | const byte* s = static_cast<const byte*>(buf); |
3292 | ulint addr; |
3293 | const ulint width = 32; /* bytes per line */ |
3294 | |
3295 | fprintf(stderr, "%s:\n" , name); |
3296 | |
3297 | for (addr = 0; addr < size; addr += width) { |
3298 | ulint i; |
3299 | |
3300 | fprintf(stderr, "%04lx " , (ulong) addr); |
3301 | |
3302 | i = ut_min(width, size - addr); |
3303 | |
3304 | while (i--) { |
3305 | fprintf(stderr, "%02x" , *s++); |
3306 | } |
3307 | |
3308 | putc('\n', stderr); |
3309 | } |
3310 | } |
3311 | |
3312 | /** Dump a block of memory on the standard error stream. |
3313 | @param buf in: data |
3314 | @param size in: length of the data, in bytes */ |
3315 | #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size) |
3316 | |
3317 | /** Flag: make page_zip_validate() compare page headers only */ |
3318 | bool page_zip_validate_header_only; |
3319 | |
3320 | /**********************************************************************//** |
3321 | Check that the compressed and decompressed pages match. |
3322 | @return TRUE if valid, FALSE if not */ |
3323 | ibool |
3324 | page_zip_validate_low( |
3325 | /*==================*/ |
3326 | const page_zip_des_t* page_zip,/*!< in: compressed page */ |
3327 | const page_t* page, /*!< in: uncompressed page */ |
3328 | const dict_index_t* index, /*!< in: index of the page, if known */ |
3329 | ibool sloppy) /*!< in: FALSE=strict, |
3330 | TRUE=ignore the MIN_REC_FLAG */ |
3331 | { |
3332 | page_zip_des_t temp_page_zip; |
3333 | byte* temp_page_buf; |
3334 | page_t* temp_page; |
3335 | ibool valid; |
3336 | |
3337 | if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, |
3338 | FIL_PAGE_LSN - FIL_PAGE_PREV) |
3339 | || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2) |
3340 | || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, |
3341 | PAGE_DATA - FIL_PAGE_DATA)) { |
3342 | page_zip_fail(("page_zip_validate: page header\n" )); |
3343 | page_zip_hexdump(page_zip, sizeof *page_zip); |
3344 | page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip)); |
3345 | page_zip_hexdump(page, srv_page_size); |
3346 | return(FALSE); |
3347 | } |
3348 | |
3349 | ut_a(page_is_comp(page)); |
3350 | |
3351 | if (page_zip_validate_header_only) { |
3352 | return(TRUE); |
3353 | } |
3354 | |
3355 | /* page_zip_decompress() expects the uncompressed page to be |
3356 | srv_page_size aligned. */ |
3357 | temp_page_buf = static_cast<byte*>( |
3358 | ut_malloc_nokey(2 << srv_page_size_shift)); |
3359 | temp_page = static_cast<byte*>(ut_align(temp_page_buf, srv_page_size)); |
3360 | |
3361 | UNIV_MEM_ASSERT_RW(page, srv_page_size); |
3362 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
3363 | |
3364 | temp_page_zip = *page_zip; |
3365 | valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE); |
3366 | if (!valid) { |
3367 | fputs("page_zip_validate(): failed to decompress\n" , stderr); |
3368 | goto func_exit; |
3369 | } |
3370 | if (page_zip->n_blobs != temp_page_zip.n_blobs) { |
3371 | page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n" , |
3372 | page_zip->n_blobs, temp_page_zip.n_blobs)); |
3373 | valid = FALSE; |
3374 | } |
3375 | #ifdef UNIV_DEBUG |
3376 | if (page_zip->m_start != temp_page_zip.m_start) { |
3377 | page_zip_fail(("page_zip_validate: m_start: %u!=%u\n" , |
3378 | page_zip->m_start, temp_page_zip.m_start)); |
3379 | valid = FALSE; |
3380 | } |
3381 | #endif /* UNIV_DEBUG */ |
3382 | if (page_zip->m_end != temp_page_zip.m_end) { |
3383 | page_zip_fail(("page_zip_validate: m_end: %u!=%u\n" , |
3384 | page_zip->m_end, temp_page_zip.m_end)); |
3385 | valid = FALSE; |
3386 | } |
3387 | if (page_zip->m_nonempty != temp_page_zip.m_nonempty) { |
3388 | page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n" , |
3389 | page_zip->m_nonempty, |
3390 | temp_page_zip.m_nonempty)); |
3391 | valid = FALSE; |
3392 | } |
3393 | if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER, |
3394 | srv_page_size - PAGE_HEADER - FIL_PAGE_DATA_END)) { |
3395 | |
3396 | /* In crash recovery, the "minimum record" flag may be |
3397 | set incorrectly until the mini-transaction is |
3398 | committed. Let us tolerate that difference when we |
3399 | are performing a sloppy validation. */ |
3400 | |
3401 | ulint* offsets; |
3402 | mem_heap_t* heap; |
3403 | const rec_t* rec; |
3404 | const rec_t* trec; |
3405 | byte info_bits_diff; |
3406 | ulint offset |
3407 | = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE); |
3408 | ut_a(offset >= PAGE_NEW_SUPREMUM); |
3409 | offset -= 5/*REC_NEW_INFO_BITS*/; |
3410 | |
3411 | info_bits_diff = page[offset] ^ temp_page[offset]; |
3412 | |
3413 | if (info_bits_diff == REC_INFO_MIN_REC_FLAG) { |
3414 | temp_page[offset] = page[offset]; |
3415 | |
3416 | if (!memcmp(page + PAGE_HEADER, |
3417 | temp_page + PAGE_HEADER, |
3418 | srv_page_size - PAGE_HEADER |
3419 | - FIL_PAGE_DATA_END)) { |
3420 | |
3421 | /* Only the minimum record flag |
3422 | differed. Let us ignore it. */ |
3423 | page_zip_fail(("page_zip_validate:" |
3424 | " min_rec_flag" |
3425 | " (%s%lu,%lu,0x%02lx)\n" , |
3426 | sloppy ? "ignored, " : "" , |
3427 | page_get_space_id(page), |
3428 | page_get_page_no(page), |
3429 | (ulong) page[offset])); |
3430 | /* We don't check for spatial index, since |
3431 | the "minimum record" could be deleted when |
3432 | doing rtr_update_mbr_field. |
3433 | GIS_FIXME: need to validate why |
3434 | rtr_update_mbr_field.() could affect this */ |
3435 | if (index && dict_index_is_spatial(index)) { |
3436 | valid = true; |
3437 | } else { |
3438 | valid = sloppy; |
3439 | } |
3440 | goto func_exit; |
3441 | } |
3442 | } |
3443 | |
3444 | /* Compare the pointers in the PAGE_FREE list. */ |
3445 | rec = page_header_get_ptr(page, PAGE_FREE); |
3446 | trec = page_header_get_ptr(temp_page, PAGE_FREE); |
3447 | |
3448 | while (rec || trec) { |
3449 | if (page_offset(rec) != page_offset(trec)) { |
3450 | page_zip_fail(("page_zip_validate:" |
3451 | " PAGE_FREE list: %u!=%u\n" , |
3452 | (unsigned) page_offset(rec), |
3453 | (unsigned) page_offset(trec))); |
3454 | valid = FALSE; |
3455 | goto func_exit; |
3456 | } |
3457 | |
3458 | rec = page_rec_get_next_low(rec, TRUE); |
3459 | trec = page_rec_get_next_low(trec, TRUE); |
3460 | } |
3461 | |
3462 | /* Compare the records. */ |
3463 | heap = NULL; |
3464 | offsets = NULL; |
3465 | rec = page_rec_get_next_low( |
3466 | page + PAGE_NEW_INFIMUM, TRUE); |
3467 | trec = page_rec_get_next_low( |
3468 | temp_page + PAGE_NEW_INFIMUM, TRUE); |
3469 | const bool is_leaf = page_is_leaf(page); |
3470 | |
3471 | do { |
3472 | if (page_offset(rec) != page_offset(trec)) { |
3473 | page_zip_fail(("page_zip_validate:" |
3474 | " record list: 0x%02x!=0x%02x\n" , |
3475 | (unsigned) page_offset(rec), |
3476 | (unsigned) page_offset(trec))); |
3477 | valid = FALSE; |
3478 | break; |
3479 | } |
3480 | |
3481 | if (index) { |
3482 | /* Compare the data. */ |
3483 | offsets = rec_get_offsets( |
3484 | rec, index, offsets, is_leaf, |
3485 | ULINT_UNDEFINED, &heap); |
3486 | |
3487 | if (memcmp(rec - rec_offs_extra_size(offsets), |
3488 | trec - rec_offs_extra_size(offsets), |
3489 | rec_offs_size(offsets))) { |
3490 | page_zip_fail( |
3491 | ("page_zip_validate:" |
3492 | " record content: 0x%02x" , |
3493 | (unsigned) page_offset(rec))); |
3494 | valid = FALSE; |
3495 | break; |
3496 | } |
3497 | } |
3498 | |
3499 | rec = page_rec_get_next_low(rec, TRUE); |
3500 | trec = page_rec_get_next_low(trec, TRUE); |
3501 | } while (rec || trec); |
3502 | |
3503 | if (heap) { |
3504 | mem_heap_free(heap); |
3505 | } |
3506 | } |
3507 | |
3508 | func_exit: |
3509 | if (!valid) { |
3510 | page_zip_hexdump(page_zip, sizeof *page_zip); |
3511 | page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip)); |
3512 | page_zip_hexdump(page, srv_page_size); |
3513 | page_zip_hexdump(temp_page, srv_page_size); |
3514 | } |
3515 | ut_free(temp_page_buf); |
3516 | return(valid); |
3517 | } |
3518 | |
3519 | /**********************************************************************//** |
3520 | Check that the compressed and decompressed pages match. |
3521 | @return TRUE if valid, FALSE if not */ |
3522 | ibool |
3523 | page_zip_validate( |
3524 | /*==============*/ |
3525 | const page_zip_des_t* page_zip,/*!< in: compressed page */ |
3526 | const page_t* page, /*!< in: uncompressed page */ |
3527 | const dict_index_t* index) /*!< in: index of the page, if known */ |
3528 | { |
3529 | return(page_zip_validate_low(page_zip, page, index, |
3530 | recv_recovery_is_on())); |
3531 | } |
3532 | #endif /* UNIV_ZIP_DEBUG */ |
3533 | |
3534 | #ifdef UNIV_DEBUG |
3535 | /**********************************************************************//** |
3536 | Assert that the compressed and decompressed page headers match. |
3537 | @return TRUE */ |
3538 | static |
3539 | ibool |
3540 | page_zip_header_cmp( |
3541 | /*================*/ |
3542 | const page_zip_des_t* page_zip,/*!< in: compressed page */ |
3543 | const byte* page) /*!< in: uncompressed page */ |
3544 | { |
3545 | ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, |
3546 | FIL_PAGE_LSN - FIL_PAGE_PREV)); |
3547 | ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, |
3548 | 2)); |
3549 | ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, |
3550 | PAGE_DATA - FIL_PAGE_DATA)); |
3551 | |
3552 | return(TRUE); |
3553 | } |
3554 | #endif /* UNIV_DEBUG */ |
3555 | |
3556 | /**********************************************************************//** |
3557 | Write a record on the compressed page that contains externally stored |
3558 | columns. The data must already have been written to the uncompressed page. |
3559 | @return end of modification log */ |
3560 | static |
3561 | byte* |
3562 | page_zip_write_rec_ext( |
3563 | /*===================*/ |
3564 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
3565 | const page_t* page, /*!< in: page containing rec */ |
3566 | const byte* rec, /*!< in: record being written */ |
3567 | dict_index_t* index, /*!< in: record descriptor */ |
3568 | const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ |
3569 | ulint create, /*!< in: nonzero=insert, zero=update */ |
3570 | ulint trx_id_col, /*!< in: position of DB_TRX_ID */ |
3571 | ulint heap_no, /*!< in: heap number of rec */ |
3572 | byte* storage, /*!< in: end of dense page directory */ |
3573 | byte* data) /*!< in: end of modification log */ |
3574 | { |
3575 | const byte* start = rec; |
3576 | ulint i; |
3577 | ulint len; |
3578 | byte* externs = storage; |
3579 | ulint n_ext = rec_offs_n_extern(offsets); |
3580 | |
3581 | ut_ad(rec_offs_validate(rec, index, offsets)); |
3582 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
3583 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
3584 | rec_offs_extra_size(offsets)); |
3585 | |
3586 | externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
3587 | * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW); |
3588 | |
3589 | /* Note that this will not take into account |
3590 | the BLOB columns of rec if create==TRUE. */ |
3591 | ut_ad(data + rec_offs_data_size(offsets) |
3592 | - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
3593 | - n_ext * BTR_EXTERN_FIELD_REF_SIZE |
3594 | < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs); |
3595 | |
3596 | { |
3597 | ulint blob_no = page_zip_get_n_prev_extern( |
3598 | page_zip, rec, index); |
3599 | byte* ext_end = externs - page_zip->n_blobs |
3600 | * BTR_EXTERN_FIELD_REF_SIZE; |
3601 | ut_ad(blob_no <= page_zip->n_blobs); |
3602 | externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE; |
3603 | |
3604 | if (create) { |
3605 | page_zip->n_blobs += static_cast<unsigned>(n_ext); |
3606 | ASSERT_ZERO_BLOB(ext_end - n_ext |
3607 | * BTR_EXTERN_FIELD_REF_SIZE); |
3608 | memmove(ext_end - n_ext |
3609 | * BTR_EXTERN_FIELD_REF_SIZE, |
3610 | ext_end, |
3611 | ulint(externs - ext_end)); |
3612 | } |
3613 | |
3614 | ut_a(blob_no + n_ext <= page_zip->n_blobs); |
3615 | } |
3616 | |
3617 | for (i = 0; i < rec_offs_n_fields(offsets); i++) { |
3618 | const byte* src; |
3619 | |
3620 | if (UNIV_UNLIKELY(i == trx_id_col)) { |
3621 | ut_ad(!rec_offs_nth_extern(offsets, |
3622 | i)); |
3623 | ut_ad(!rec_offs_nth_extern(offsets, |
3624 | i + 1)); |
3625 | /* Locate trx_id and roll_ptr. */ |
3626 | src = rec_get_nth_field(rec, offsets, |
3627 | i, &len); |
3628 | ut_ad(len == DATA_TRX_ID_LEN); |
3629 | ut_ad(src + DATA_TRX_ID_LEN |
3630 | == rec_get_nth_field( |
3631 | rec, offsets, |
3632 | i + 1, &len)); |
3633 | ut_ad(len == DATA_ROLL_PTR_LEN); |
3634 | |
3635 | /* Log the preceding fields. */ |
3636 | ASSERT_ZERO(data, src - start); |
3637 | memcpy(data, start, ulint(src - start)); |
3638 | data += src - start; |
3639 | start = src + (DATA_TRX_ID_LEN |
3640 | + DATA_ROLL_PTR_LEN); |
3641 | |
3642 | /* Store trx_id and roll_ptr. */ |
3643 | memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
3644 | * (heap_no - 1), |
3645 | src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
3646 | i++; /* skip also roll_ptr */ |
3647 | } else if (rec_offs_nth_extern(offsets, i)) { |
3648 | src = rec_get_nth_field(rec, offsets, |
3649 | i, &len); |
3650 | |
3651 | ut_ad(dict_index_is_clust(index)); |
3652 | ut_ad(len |
3653 | >= BTR_EXTERN_FIELD_REF_SIZE); |
3654 | src += len - BTR_EXTERN_FIELD_REF_SIZE; |
3655 | |
3656 | ASSERT_ZERO(data, src - start); |
3657 | memcpy(data, start, ulint(src - start)); |
3658 | data += src - start; |
3659 | start = src + BTR_EXTERN_FIELD_REF_SIZE; |
3660 | |
3661 | /* Store the BLOB pointer. */ |
3662 | externs -= BTR_EXTERN_FIELD_REF_SIZE; |
3663 | ut_ad(data < externs); |
3664 | memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE); |
3665 | } |
3666 | } |
3667 | |
3668 | /* Log the last bytes of the record. */ |
3669 | len = rec_offs_data_size(offsets) - ulint(start - rec); |
3670 | |
3671 | ASSERT_ZERO(data, len); |
3672 | memcpy(data, start, len); |
3673 | data += len; |
3674 | |
3675 | return(data); |
3676 | } |
3677 | |
3678 | /**********************************************************************//** |
3679 | Write an entire record on the compressed page. The data must already |
3680 | have been written to the uncompressed page. */ |
3681 | void |
3682 | page_zip_write_rec( |
3683 | /*===============*/ |
3684 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
3685 | const byte* rec, /*!< in: record being written */ |
3686 | dict_index_t* index, /*!< in: the index the record belongs to */ |
3687 | const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ |
3688 | ulint create) /*!< in: nonzero=insert, zero=update */ |
3689 | { |
3690 | const page_t* page; |
3691 | byte* data; |
3692 | byte* storage; |
3693 | ulint heap_no; |
3694 | byte* slot; |
3695 | |
3696 | ut_ad(page_zip_simple_validate(page_zip)); |
3697 | ut_ad(page_zip_get_size(page_zip) |
3698 | > PAGE_DATA + page_zip_dir_size(page_zip)); |
3699 | ut_ad(rec_offs_comp(offsets)); |
3700 | ut_ad(rec_offs_validate(rec, index, offsets)); |
3701 | |
3702 | ut_ad(page_zip->m_start >= PAGE_DATA); |
3703 | |
3704 | page = page_align(rec); |
3705 | |
3706 | ut_ad(page_zip_header_cmp(page_zip, page)); |
3707 | ut_ad(page_simple_validate_new((page_t*) page)); |
3708 | |
3709 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
3710 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
3711 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
3712 | rec_offs_extra_size(offsets)); |
3713 | |
3714 | slot = page_zip_dir_find(page_zip, page_offset(rec)); |
3715 | ut_a(slot); |
3716 | /* Copy the delete mark. */ |
3717 | if (rec_get_deleted_flag(rec, TRUE)) { |
3718 | /* In delete-marked records, DB_TRX_ID must |
3719 | always refer to an existing undo log record. |
3720 | On non-leaf pages, the delete-mark flag is garbage. */ |
3721 | ut_ad(!index->is_primary() || !page_is_leaf(page) |
3722 | || row_get_rec_trx_id(rec, index, offsets)); |
3723 | *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8; |
3724 | } else { |
3725 | *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8); |
3726 | } |
3727 | |
3728 | ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START); |
3729 | ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + srv_page_size |
3730 | - PAGE_DIR - PAGE_DIR_SLOT_SIZE |
3731 | * page_dir_get_n_slots(page)); |
3732 | |
3733 | heap_no = rec_get_heap_no_new(rec); |
3734 | ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */ |
3735 | ut_ad(heap_no < page_dir_get_n_heap(page)); |
3736 | |
3737 | /* Append to the modification log. */ |
3738 | data = page_zip->data + page_zip->m_end; |
3739 | ut_ad(!*data); |
3740 | |
3741 | /* Identify the record by writing its heap number - 1. |
3742 | 0 is reserved to indicate the end of the modification log. */ |
3743 | |
3744 | if (UNIV_UNLIKELY(heap_no - 1 >= 64)) { |
3745 | *data++ = (byte) (0x80 | (heap_no - 1) >> 7); |
3746 | ut_ad(!*data); |
3747 | } |
3748 | *data++ = (byte) ((heap_no - 1) << 1); |
3749 | ut_ad(!*data); |
3750 | |
3751 | { |
3752 | const byte* start = rec - rec_offs_extra_size(offsets); |
3753 | const byte* b = rec - REC_N_NEW_EXTRA_BYTES; |
3754 | |
3755 | /* Write the extra bytes backwards, so that |
3756 | rec_offs_extra_size() can be easily computed in |
3757 | page_zip_apply_log() by invoking |
3758 | rec_get_offsets_reverse(). */ |
3759 | |
3760 | while (b != start) { |
3761 | *data++ = *--b; |
3762 | ut_ad(!*data); |
3763 | } |
3764 | } |
3765 | |
3766 | /* Write the data bytes. Store the uncompressed bytes separately. */ |
3767 | storage = page_zip_dir_start(page_zip); |
3768 | |
3769 | if (page_is_leaf(page)) { |
3770 | ulint len; |
3771 | |
3772 | if (dict_index_is_clust(index)) { |
3773 | ulint trx_id_col; |
3774 | |
3775 | trx_id_col = dict_index_get_sys_col_pos(index, |
3776 | DATA_TRX_ID); |
3777 | ut_ad(trx_id_col != ULINT_UNDEFINED); |
3778 | |
3779 | /* Store separately trx_id, roll_ptr and |
3780 | the BTR_EXTERN_FIELD_REF of each BLOB column. */ |
3781 | if (rec_offs_any_extern(offsets)) { |
3782 | data = page_zip_write_rec_ext( |
3783 | page_zip, page, |
3784 | rec, index, offsets, create, |
3785 | trx_id_col, heap_no, storage, data); |
3786 | } else { |
3787 | /* Locate trx_id and roll_ptr. */ |
3788 | const byte* src |
3789 | = rec_get_nth_field(rec, offsets, |
3790 | trx_id_col, &len); |
3791 | ut_ad(len == DATA_TRX_ID_LEN); |
3792 | ut_ad(src + DATA_TRX_ID_LEN |
3793 | == rec_get_nth_field( |
3794 | rec, offsets, |
3795 | trx_id_col + 1, &len)); |
3796 | ut_ad(len == DATA_ROLL_PTR_LEN); |
3797 | |
3798 | /* Log the preceding fields. */ |
3799 | ASSERT_ZERO(data, src - rec); |
3800 | memcpy(data, rec, ulint(src - rec)); |
3801 | data += src - rec; |
3802 | |
3803 | /* Store trx_id and roll_ptr. */ |
3804 | memcpy(storage |
3805 | - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) |
3806 | * (heap_no - 1), |
3807 | src, |
3808 | DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
3809 | |
3810 | src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
3811 | |
3812 | /* Log the last bytes of the record. */ |
3813 | len = rec_offs_data_size(offsets) |
3814 | - ulint(src - rec); |
3815 | |
3816 | ASSERT_ZERO(data, len); |
3817 | memcpy(data, src, len); |
3818 | data += len; |
3819 | } |
3820 | } else { |
3821 | /* Leaf page of a secondary index: |
3822 | no externally stored columns */ |
3823 | ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID) |
3824 | == ULINT_UNDEFINED); |
3825 | ut_ad(!rec_offs_any_extern(offsets)); |
3826 | |
3827 | /* Log the entire record. */ |
3828 | len = rec_offs_data_size(offsets); |
3829 | |
3830 | ASSERT_ZERO(data, len); |
3831 | memcpy(data, rec, len); |
3832 | data += len; |
3833 | } |
3834 | } else { |
3835 | /* This is a node pointer page. */ |
3836 | ulint len; |
3837 | |
3838 | /* Non-leaf nodes should not have any externally |
3839 | stored columns. */ |
3840 | ut_ad(!rec_offs_any_extern(offsets)); |
3841 | |
3842 | /* Copy the data bytes, except node_ptr. */ |
3843 | len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE; |
3844 | ut_ad(data + len < storage - REC_NODE_PTR_SIZE |
3845 | * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)); |
3846 | ASSERT_ZERO(data, len); |
3847 | memcpy(data, rec, len); |
3848 | data += len; |
3849 | |
3850 | /* Copy the node pointer to the uncompressed area. */ |
3851 | memcpy(storage - REC_NODE_PTR_SIZE |
3852 | * (heap_no - 1), |
3853 | rec + len, |
3854 | REC_NODE_PTR_SIZE); |
3855 | } |
3856 | |
3857 | ut_a(!*data); |
3858 | ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip)); |
3859 | page_zip->m_end = unsigned(data - page_zip->data); |
3860 | page_zip->m_nonempty = TRUE; |
3861 | |
3862 | #ifdef UNIV_ZIP_DEBUG |
3863 | ut_a(page_zip_validate(page_zip, page_align(rec), index)); |
3864 | #endif /* UNIV_ZIP_DEBUG */ |
3865 | } |
3866 | |
3867 | /***********************************************************//** |
3868 | Parses a log record of writing a BLOB pointer of a record. |
3869 | @return end of log record or NULL */ |
3870 | byte* |
3871 | page_zip_parse_write_blob_ptr( |
3872 | /*==========================*/ |
3873 | byte* ptr, /*!< in: redo log buffer */ |
3874 | byte* end_ptr,/*!< in: redo log buffer end */ |
3875 | page_t* page, /*!< in/out: uncompressed page */ |
3876 | page_zip_des_t* page_zip)/*!< in/out: compressed page */ |
3877 | { |
3878 | ulint offset; |
3879 | ulint z_offset; |
3880 | |
3881 | ut_ad(ptr != NULL); |
3882 | ut_ad(end_ptr != NULL); |
3883 | ut_ad(!page == !page_zip); |
3884 | |
3885 | if (UNIV_UNLIKELY |
3886 | (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) { |
3887 | |
3888 | return(NULL); |
3889 | } |
3890 | |
3891 | offset = mach_read_from_2(ptr); |
3892 | z_offset = mach_read_from_2(ptr + 2); |
3893 | |
3894 | if (offset < PAGE_ZIP_START |
3895 | || offset >= srv_page_size |
3896 | || z_offset >= srv_page_size) { |
3897 | corrupt: |
3898 | recv_sys->found_corrupt_log = TRUE; |
3899 | |
3900 | return(NULL); |
3901 | } |
3902 | |
3903 | if (page) { |
3904 | |
3905 | if (!page_zip || !page_is_leaf(page)) { |
3906 | |
3907 | goto corrupt; |
3908 | } |
3909 | |
3910 | #ifdef UNIV_ZIP_DEBUG |
3911 | ut_a(page_zip_validate(page_zip, page, NULL)); |
3912 | #endif /* UNIV_ZIP_DEBUG */ |
3913 | |
3914 | memcpy(page + offset, |
3915 | ptr + 4, BTR_EXTERN_FIELD_REF_SIZE); |
3916 | memcpy(page_zip->data + z_offset, |
3917 | ptr + 4, BTR_EXTERN_FIELD_REF_SIZE); |
3918 | |
3919 | #ifdef UNIV_ZIP_DEBUG |
3920 | ut_a(page_zip_validate(page_zip, page, NULL)); |
3921 | #endif /* UNIV_ZIP_DEBUG */ |
3922 | } |
3923 | |
3924 | return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE)); |
3925 | } |
3926 | |
3927 | /**********************************************************************//** |
3928 | Write a BLOB pointer of a record on the leaf page of a clustered index. |
3929 | The information must already have been updated on the uncompressed page. */ |
3930 | void |
3931 | page_zip_write_blob_ptr( |
3932 | /*====================*/ |
3933 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
3934 | const byte* rec, /*!< in/out: record whose data is being |
3935 | written */ |
3936 | dict_index_t* index, /*!< in: index of the page */ |
3937 | const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ |
3938 | ulint n, /*!< in: column index */ |
3939 | mtr_t* mtr) /*!< in: mini-transaction handle, |
3940 | or NULL if no logging is needed */ |
3941 | { |
3942 | const byte* field; |
3943 | byte* externs; |
3944 | const page_t* page = page_align(rec); |
3945 | ulint blob_no; |
3946 | ulint len; |
3947 | |
3948 | ut_ad(page_zip != NULL); |
3949 | ut_ad(rec != NULL); |
3950 | ut_ad(index != NULL); |
3951 | ut_ad(offsets != NULL); |
3952 | ut_ad(page_simple_validate_new((page_t*) page)); |
3953 | ut_ad(page_zip_simple_validate(page_zip)); |
3954 | ut_ad(page_zip_get_size(page_zip) |
3955 | > PAGE_DATA + page_zip_dir_size(page_zip)); |
3956 | ut_ad(rec_offs_comp(offsets)); |
3957 | ut_ad(rec_offs_validate(rec, NULL, offsets)); |
3958 | ut_ad(rec_offs_any_extern(offsets)); |
3959 | ut_ad(rec_offs_nth_extern(offsets, n)); |
3960 | |
3961 | ut_ad(page_zip->m_start >= PAGE_DATA); |
3962 | ut_ad(page_zip_header_cmp(page_zip, page)); |
3963 | |
3964 | ut_ad(page_is_leaf(page)); |
3965 | ut_ad(dict_index_is_clust(index)); |
3966 | |
3967 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
3968 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
3969 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
3970 | rec_offs_extra_size(offsets)); |
3971 | |
3972 | blob_no = page_zip_get_n_prev_extern(page_zip, rec, index) |
3973 | + rec_get_n_extern_new(rec, index, n); |
3974 | ut_a(blob_no < page_zip->n_blobs); |
3975 | |
3976 | externs = page_zip->data + page_zip_get_size(page_zip) |
3977 | - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) |
3978 | * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE; |
3979 | |
3980 | field = rec_get_nth_field(rec, offsets, n, &len); |
3981 | |
3982 | externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE; |
3983 | field += len - BTR_EXTERN_FIELD_REF_SIZE; |
3984 | |
3985 | memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE); |
3986 | |
3987 | #ifdef UNIV_ZIP_DEBUG |
3988 | ut_a(page_zip_validate(page_zip, page, index)); |
3989 | #endif /* UNIV_ZIP_DEBUG */ |
3990 | |
3991 | if (mtr) { |
3992 | byte* log_ptr = mlog_open( |
3993 | mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE); |
3994 | if (UNIV_UNLIKELY(!log_ptr)) { |
3995 | return; |
3996 | } |
3997 | |
3998 | log_ptr = mlog_write_initial_log_record_fast( |
3999 | (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr); |
4000 | mach_write_to_2(log_ptr, page_offset(field)); |
4001 | log_ptr += 2; |
4002 | mach_write_to_2(log_ptr, ulint(externs - page_zip->data)); |
4003 | log_ptr += 2; |
4004 | memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE); |
4005 | log_ptr += BTR_EXTERN_FIELD_REF_SIZE; |
4006 | mlog_close(mtr, log_ptr); |
4007 | } |
4008 | } |
4009 | |
4010 | /***********************************************************//** |
4011 | Parses a log record of writing the node pointer of a record. |
4012 | @return end of log record or NULL */ |
4013 | byte* |
4014 | page_zip_parse_write_node_ptr( |
4015 | /*==========================*/ |
4016 | byte* ptr, /*!< in: redo log buffer */ |
4017 | byte* end_ptr,/*!< in: redo log buffer end */ |
4018 | page_t* page, /*!< in/out: uncompressed page */ |
4019 | page_zip_des_t* page_zip)/*!< in/out: compressed page */ |
4020 | { |
4021 | ulint offset; |
4022 | ulint z_offset; |
4023 | |
4024 | ut_ad(ptr != NULL); |
4025 | ut_ad(end_ptr!= NULL); |
4026 | ut_ad(!page == !page_zip); |
4027 | |
4028 | if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) { |
4029 | |
4030 | return(NULL); |
4031 | } |
4032 | |
4033 | offset = mach_read_from_2(ptr); |
4034 | z_offset = mach_read_from_2(ptr + 2); |
4035 | |
4036 | if (offset < PAGE_ZIP_START |
4037 | || offset >= srv_page_size |
4038 | || z_offset >= srv_page_size) { |
4039 | corrupt: |
4040 | recv_sys->found_corrupt_log = TRUE; |
4041 | |
4042 | return(NULL); |
4043 | } |
4044 | |
4045 | if (page) { |
4046 | byte* storage_end; |
4047 | byte* field; |
4048 | byte* storage; |
4049 | ulint heap_no; |
4050 | |
4051 | if (!page_zip || page_is_leaf(page)) { |
4052 | |
4053 | goto corrupt; |
4054 | } |
4055 | |
4056 | #ifdef UNIV_ZIP_DEBUG |
4057 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4058 | #endif /* UNIV_ZIP_DEBUG */ |
4059 | |
4060 | field = page + offset; |
4061 | storage = page_zip->data + z_offset; |
4062 | |
4063 | storage_end = page_zip_dir_start(page_zip); |
4064 | |
4065 | heap_no = 1 + ulint(storage_end - storage) / REC_NODE_PTR_SIZE; |
4066 | |
4067 | if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE) |
4068 | || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW) |
4069 | || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) { |
4070 | |
4071 | goto corrupt; |
4072 | } |
4073 | |
4074 | memcpy(field, ptr + 4, REC_NODE_PTR_SIZE); |
4075 | memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE); |
4076 | |
4077 | #ifdef UNIV_ZIP_DEBUG |
4078 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4079 | #endif /* UNIV_ZIP_DEBUG */ |
4080 | } |
4081 | |
4082 | return(ptr + (2 + 2 + REC_NODE_PTR_SIZE)); |
4083 | } |
4084 | |
4085 | /**********************************************************************//** |
4086 | Write the node pointer of a record on a non-leaf compressed page. */ |
4087 | void |
4088 | page_zip_write_node_ptr( |
4089 | /*====================*/ |
4090 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
4091 | byte* rec, /*!< in/out: record */ |
4092 | ulint size, /*!< in: data size of rec */ |
4093 | ulint ptr, /*!< in: node pointer */ |
4094 | mtr_t* mtr) /*!< in: mini-transaction, or NULL */ |
4095 | { |
4096 | byte* field; |
4097 | byte* storage; |
4098 | #ifdef UNIV_DEBUG |
4099 | page_t* page = page_align(rec); |
4100 | #endif /* UNIV_DEBUG */ |
4101 | |
4102 | ut_ad(page_simple_validate_new(page)); |
4103 | ut_ad(page_zip_simple_validate(page_zip)); |
4104 | ut_ad(page_zip_get_size(page_zip) |
4105 | > PAGE_DATA + page_zip_dir_size(page_zip)); |
4106 | ut_ad(page_rec_is_comp(rec)); |
4107 | |
4108 | ut_ad(page_zip->m_start >= PAGE_DATA); |
4109 | ut_ad(page_zip_header_cmp(page_zip, page)); |
4110 | |
4111 | ut_ad(!page_is_leaf(page)); |
4112 | |
4113 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4114 | UNIV_MEM_ASSERT_RW(rec, size); |
4115 | |
4116 | storage = page_zip_dir_start(page_zip) |
4117 | - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE; |
4118 | field = rec + size - REC_NODE_PTR_SIZE; |
4119 | |
4120 | #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
4121 | ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE)); |
4122 | #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
4123 | compile_time_assert(REC_NODE_PTR_SIZE == 4); |
4124 | mach_write_to_4(field, ptr); |
4125 | memcpy(storage, field, REC_NODE_PTR_SIZE); |
4126 | |
4127 | if (mtr) { |
4128 | byte* log_ptr = mlog_open(mtr, |
4129 | 11 + 2 + 2 + REC_NODE_PTR_SIZE); |
4130 | if (UNIV_UNLIKELY(!log_ptr)) { |
4131 | return; |
4132 | } |
4133 | |
4134 | log_ptr = mlog_write_initial_log_record_fast( |
4135 | field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr); |
4136 | mach_write_to_2(log_ptr, page_offset(field)); |
4137 | log_ptr += 2; |
4138 | mach_write_to_2(log_ptr, ulint(storage - page_zip->data)); |
4139 | log_ptr += 2; |
4140 | memcpy(log_ptr, field, REC_NODE_PTR_SIZE); |
4141 | log_ptr += REC_NODE_PTR_SIZE; |
4142 | mlog_close(mtr, log_ptr); |
4143 | } |
4144 | } |
4145 | |
4146 | /** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record. |
4147 | @param[in,out] page_zip compressed page |
4148 | @param[in,out] rec record |
4149 | @param[in] offsets rec_get_offsets(rec, index) |
4150 | @param[in] trx_id_field field number of DB_TRX_ID (number of PK fields) |
4151 | @param[in] trx_id DB_TRX_ID value (transaction identifier) |
4152 | @param[in] roll_ptr DB_ROLL_PTR value (undo log pointer) |
4153 | @param[in,out] mtr mini-transaction, or NULL to skip logging */ |
4154 | void |
4155 | page_zip_write_trx_id_and_roll_ptr( |
4156 | page_zip_des_t* page_zip, |
4157 | byte* rec, |
4158 | const ulint* offsets, |
4159 | ulint trx_id_col, |
4160 | trx_id_t trx_id, |
4161 | roll_ptr_t roll_ptr, |
4162 | mtr_t* mtr) |
4163 | { |
4164 | byte* field; |
4165 | byte* storage; |
4166 | #ifdef UNIV_DEBUG |
4167 | page_t* page = page_align(rec); |
4168 | #endif /* UNIV_DEBUG */ |
4169 | ulint len; |
4170 | |
4171 | ut_ad(page_simple_validate_new(page)); |
4172 | ut_ad(page_zip_simple_validate(page_zip)); |
4173 | ut_ad(page_zip_get_size(page_zip) |
4174 | > PAGE_DATA + page_zip_dir_size(page_zip)); |
4175 | ut_ad(rec_offs_validate(rec, NULL, offsets)); |
4176 | ut_ad(rec_offs_comp(offsets)); |
4177 | |
4178 | ut_ad(page_zip->m_start >= PAGE_DATA); |
4179 | ut_ad(page_zip_header_cmp(page_zip, page)); |
4180 | |
4181 | ut_ad(page_is_leaf(page)); |
4182 | |
4183 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4184 | |
4185 | storage = page_zip_dir_start(page_zip) |
4186 | - (rec_get_heap_no_new(rec) - 1) |
4187 | * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4188 | |
4189 | compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR); |
4190 | field = rec_get_nth_field(rec, offsets, trx_id_col, &len); |
4191 | ut_ad(len == DATA_TRX_ID_LEN); |
4192 | ut_ad(field + DATA_TRX_ID_LEN |
4193 | == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len)); |
4194 | ut_ad(len == DATA_ROLL_PTR_LEN); |
4195 | #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG |
4196 | ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)); |
4197 | #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ |
4198 | compile_time_assert(DATA_TRX_ID_LEN == 6); |
4199 | mach_write_to_6(field, trx_id); |
4200 | compile_time_assert(DATA_ROLL_PTR_LEN == 7); |
4201 | mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr); |
4202 | memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4203 | |
4204 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
4205 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
4206 | rec_offs_extra_size(offsets)); |
4207 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4208 | |
4209 | if (mtr) { |
4210 | byte* log_ptr = mlog_open( |
4211 | mtr, 11 + 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4212 | if (UNIV_UNLIKELY(!log_ptr)) { |
4213 | return; |
4214 | } |
4215 | |
4216 | log_ptr = mlog_write_initial_log_record_fast( |
4217 | (byte*) field, MLOG_ZIP_WRITE_TRX_ID, log_ptr, mtr); |
4218 | mach_write_to_2(log_ptr, page_offset(field)); |
4219 | log_ptr += 2; |
4220 | mach_write_to_2(log_ptr, ulint(storage - page_zip->data)); |
4221 | log_ptr += 2; |
4222 | memcpy(log_ptr, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4223 | log_ptr += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
4224 | mlog_close(mtr, log_ptr); |
4225 | } |
4226 | } |
4227 | |
4228 | /** Parse a MLOG_ZIP_WRITE_TRX_ID record. |
4229 | @param[in] ptr redo log buffer |
4230 | @param[in] end_ptr end of redo log buffer |
4231 | @param[in,out] page uncompressed page |
4232 | @param[in,out] page_zip compressed page |
4233 | @return end of log record |
4234 | @retval NULL if the log record is incomplete */ |
4235 | byte* |
4236 | page_zip_parse_write_trx_id( |
4237 | byte* ptr, |
4238 | byte* end_ptr, |
4239 | page_t* page, |
4240 | page_zip_des_t* page_zip) |
4241 | { |
4242 | byte* const end = 2 + 2 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + ptr; |
4243 | |
4244 | if (UNIV_UNLIKELY(end_ptr < end)) { |
4245 | return(NULL); |
4246 | } |
4247 | |
4248 | uint offset = mach_read_from_2(ptr); |
4249 | uint z_offset = mach_read_from_2(ptr + 2); |
4250 | |
4251 | if (offset < PAGE_ZIP_START |
4252 | || offset >= srv_page_size |
4253 | || z_offset >= srv_page_size) { |
4254 | corrupt: |
4255 | recv_sys->found_corrupt_log = TRUE; |
4256 | |
4257 | return(NULL); |
4258 | } |
4259 | |
4260 | if (page) { |
4261 | if (!page_zip || !page_is_leaf(page)) { |
4262 | goto corrupt; |
4263 | } |
4264 | |
4265 | #ifdef UNIV_ZIP_DEBUG |
4266 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4267 | #endif /* UNIV_ZIP_DEBUG */ |
4268 | |
4269 | byte* field = page + offset; |
4270 | byte* storage = page_zip->data + z_offset; |
4271 | |
4272 | if (storage >= page_zip_dir_start(page_zip)) { |
4273 | goto corrupt; |
4274 | } |
4275 | |
4276 | memcpy(field, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4277 | memcpy(storage, ptr + 4, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4278 | |
4279 | #ifdef UNIV_ZIP_DEBUG |
4280 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4281 | #endif /* UNIV_ZIP_DEBUG */ |
4282 | } |
4283 | |
4284 | return end; |
4285 | } |
4286 | |
4287 | /**********************************************************************//** |
4288 | Clear an area on the uncompressed and compressed page. |
4289 | Do not clear the data payload, as that would grow the modification log. */ |
4290 | static |
4291 | void |
4292 | page_zip_clear_rec( |
4293 | /*===============*/ |
4294 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
4295 | byte* rec, /*!< in: record to clear */ |
4296 | const dict_index_t* index, /*!< in: index of rec */ |
4297 | const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ |
4298 | { |
4299 | ulint heap_no; |
4300 | page_t* page = page_align(rec); |
4301 | byte* storage; |
4302 | byte* field; |
4303 | ulint len; |
4304 | /* page_zip_validate() would fail here if a record |
4305 | containing externally stored columns is being deleted. */ |
4306 | ut_ad(rec_offs_validate(rec, index, offsets)); |
4307 | ut_ad(!page_zip_dir_find(page_zip, page_offset(rec))); |
4308 | ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec))); |
4309 | ut_ad(page_zip_header_cmp(page_zip, page)); |
4310 | |
4311 | heap_no = rec_get_heap_no_new(rec); |
4312 | ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); |
4313 | |
4314 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4315 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
4316 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
4317 | rec_offs_extra_size(offsets)); |
4318 | |
4319 | if (!page_is_leaf(page)) { |
4320 | /* Clear node_ptr. On the compressed page, |
4321 | there is an array of node_ptr immediately before the |
4322 | dense page directory, at the very end of the page. */ |
4323 | storage = page_zip_dir_start(page_zip); |
4324 | ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) == |
4325 | rec_offs_n_fields(offsets) - 1); |
4326 | field = rec_get_nth_field(rec, offsets, |
4327 | rec_offs_n_fields(offsets) - 1, |
4328 | &len); |
4329 | ut_ad(len == REC_NODE_PTR_SIZE); |
4330 | |
4331 | ut_ad(!rec_offs_any_extern(offsets)); |
4332 | memset(field, 0, REC_NODE_PTR_SIZE); |
4333 | memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE, |
4334 | 0, REC_NODE_PTR_SIZE); |
4335 | } else if (dict_index_is_clust(index)) { |
4336 | /* Clear trx_id and roll_ptr. On the compressed page, |
4337 | there is an array of these fields immediately before the |
4338 | dense page directory, at the very end of the page. */ |
4339 | const ulint trx_id_pos |
4340 | = dict_col_get_clust_pos( |
4341 | dict_table_get_sys_col( |
4342 | index->table, DATA_TRX_ID), index); |
4343 | storage = page_zip_dir_start(page_zip); |
4344 | field = rec_get_nth_field(rec, offsets, trx_id_pos, &len); |
4345 | ut_ad(len == DATA_TRX_ID_LEN); |
4346 | |
4347 | memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4348 | memset(storage - (heap_no - 1) |
4349 | * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), |
4350 | 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4351 | |
4352 | if (rec_offs_any_extern(offsets)) { |
4353 | ulint i; |
4354 | |
4355 | for (i = rec_offs_n_fields(offsets); i--; ) { |
4356 | /* Clear all BLOB pointers in order to make |
4357 | page_zip_validate() pass. */ |
4358 | if (rec_offs_nth_extern(offsets, i)) { |
4359 | field = rec_get_nth_field( |
4360 | rec, offsets, i, &len); |
4361 | ut_ad(len |
4362 | == BTR_EXTERN_FIELD_REF_SIZE); |
4363 | memset(field + len |
4364 | - BTR_EXTERN_FIELD_REF_SIZE, |
4365 | 0, BTR_EXTERN_FIELD_REF_SIZE); |
4366 | } |
4367 | } |
4368 | } |
4369 | } else { |
4370 | ut_ad(!rec_offs_any_extern(offsets)); |
4371 | } |
4372 | |
4373 | #ifdef UNIV_ZIP_DEBUG |
4374 | ut_a(page_zip_validate(page_zip, page, index)); |
4375 | #endif /* UNIV_ZIP_DEBUG */ |
4376 | } |
4377 | |
4378 | /**********************************************************************//** |
4379 | Write the "deleted" flag of a record on a compressed page. The flag must |
4380 | already have been written on the uncompressed page. */ |
4381 | void |
4382 | page_zip_rec_set_deleted( |
4383 | /*=====================*/ |
4384 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
4385 | const byte* rec, /*!< in: record on the uncompressed page */ |
4386 | ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ |
4387 | { |
4388 | byte* slot = page_zip_dir_find(page_zip, page_offset(rec)); |
4389 | ut_a(slot); |
4390 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4391 | if (flag) { |
4392 | *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8); |
4393 | } else { |
4394 | *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8); |
4395 | } |
4396 | #ifdef UNIV_ZIP_DEBUG |
4397 | ut_a(page_zip_validate(page_zip, page_align(rec), NULL)); |
4398 | #endif /* UNIV_ZIP_DEBUG */ |
4399 | } |
4400 | |
4401 | /**********************************************************************//** |
4402 | Write the "owned" flag of a record on a compressed page. The n_owned field |
4403 | must already have been written on the uncompressed page. */ |
4404 | void |
4405 | page_zip_rec_set_owned( |
4406 | /*===================*/ |
4407 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
4408 | const byte* rec, /*!< in: record on the uncompressed page */ |
4409 | ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ |
4410 | { |
4411 | byte* slot = page_zip_dir_find(page_zip, page_offset(rec)); |
4412 | ut_a(slot); |
4413 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4414 | if (flag) { |
4415 | *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8); |
4416 | } else { |
4417 | *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8); |
4418 | } |
4419 | } |
4420 | |
4421 | /**********************************************************************//** |
4422 | Insert a record to the dense page directory. */ |
4423 | void |
4424 | page_zip_dir_insert( |
4425 | /*================*/ |
4426 | page_zip_des_t* page_zip,/*!< in/out: compressed page */ |
4427 | const byte* prev_rec,/*!< in: record after which to insert */ |
4428 | const byte* free_rec,/*!< in: record from which rec was |
4429 | allocated, or NULL */ |
4430 | byte* rec) /*!< in: record to insert */ |
4431 | { |
4432 | ulint n_dense; |
4433 | byte* slot_rec; |
4434 | byte* slot_free; |
4435 | |
4436 | ut_ad(prev_rec != rec); |
4437 | ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec); |
4438 | ut_ad(page_zip_simple_validate(page_zip)); |
4439 | |
4440 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4441 | |
4442 | if (page_rec_is_infimum(prev_rec)) { |
4443 | /* Use the first slot. */ |
4444 | slot_rec = page_zip->data + page_zip_get_size(page_zip); |
4445 | } else { |
4446 | byte* end = page_zip->data + page_zip_get_size(page_zip); |
4447 | byte* start = end - page_zip_dir_user_size(page_zip); |
4448 | |
4449 | if (UNIV_LIKELY(!free_rec)) { |
4450 | /* PAGE_N_RECS was already incremented |
4451 | in page_cur_insert_rec_zip(), but the |
4452 | dense directory slot at that position |
4453 | contains garbage. Skip it. */ |
4454 | start += PAGE_ZIP_DIR_SLOT_SIZE; |
4455 | } |
4456 | |
4457 | slot_rec = page_zip_dir_find_low(start, end, |
4458 | page_offset(prev_rec)); |
4459 | ut_a(slot_rec); |
4460 | } |
4461 | |
4462 | /* Read the old n_dense (n_heap may have been incremented). */ |
4463 | n_dense = page_dir_get_n_heap(page_zip->data) |
4464 | - (PAGE_HEAP_NO_USER_LOW + 1U); |
4465 | |
4466 | if (UNIV_LIKELY_NULL(free_rec)) { |
4467 | /* The record was allocated from the free list. |
4468 | Shift the dense directory only up to that slot. |
4469 | Note that in this case, n_dense is actually |
4470 | off by one, because page_cur_insert_rec_zip() |
4471 | did not increment n_heap. */ |
4472 | ut_ad(rec_get_heap_no_new(rec) < n_dense + 1 |
4473 | + PAGE_HEAP_NO_USER_LOW); |
4474 | ut_ad(rec >= free_rec); |
4475 | slot_free = page_zip_dir_find(page_zip, page_offset(free_rec)); |
4476 | ut_ad(slot_free); |
4477 | slot_free += PAGE_ZIP_DIR_SLOT_SIZE; |
4478 | } else { |
4479 | /* The record was allocated from the heap. |
4480 | Shift the entire dense directory. */ |
4481 | ut_ad(rec_get_heap_no_new(rec) == n_dense |
4482 | + PAGE_HEAP_NO_USER_LOW); |
4483 | |
4484 | /* Shift to the end of the dense page directory. */ |
4485 | slot_free = page_zip->data + page_zip_get_size(page_zip) |
4486 | - PAGE_ZIP_DIR_SLOT_SIZE * n_dense; |
4487 | } |
4488 | |
4489 | /* Shift the dense directory to allocate place for rec. */ |
4490 | memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free, |
4491 | ulint(slot_rec - slot_free)); |
4492 | |
4493 | /* Write the entry for the inserted record. |
4494 | The "owned" and "deleted" flags must be zero. */ |
4495 | mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec)); |
4496 | } |
4497 | |
4498 | /**********************************************************************//** |
4499 | Shift the dense page directory and the array of BLOB pointers |
4500 | when a record is deleted. */ |
4501 | void |
4502 | page_zip_dir_delete( |
4503 | /*================*/ |
4504 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
4505 | byte* rec, /*!< in: deleted record */ |
4506 | const dict_index_t* index, /*!< in: index of rec */ |
4507 | const ulint* offsets, /*!< in: rec_get_offsets(rec) */ |
4508 | const byte* free) /*!< in: previous start of |
4509 | the free list */ |
4510 | { |
4511 | byte* slot_rec; |
4512 | byte* slot_free; |
4513 | ulint n_ext; |
4514 | page_t* page = page_align(rec); |
4515 | |
4516 | ut_ad(rec_offs_validate(rec, index, offsets)); |
4517 | ut_ad(rec_offs_comp(offsets)); |
4518 | |
4519 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4520 | UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); |
4521 | UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), |
4522 | rec_offs_extra_size(offsets)); |
4523 | |
4524 | slot_rec = page_zip_dir_find(page_zip, page_offset(rec)); |
4525 | |
4526 | ut_a(slot_rec); |
4527 | |
4528 | /* This could not be done before page_zip_dir_find(). */ |
4529 | page_header_set_field(page, page_zip, PAGE_N_RECS, |
4530 | (ulint)(page_get_n_recs(page) - 1)); |
4531 | |
4532 | if (UNIV_UNLIKELY(!free)) { |
4533 | /* Make the last slot the start of the free list. */ |
4534 | slot_free = page_zip->data + page_zip_get_size(page_zip) |
4535 | - PAGE_ZIP_DIR_SLOT_SIZE |
4536 | * (page_dir_get_n_heap(page_zip->data) |
4537 | - PAGE_HEAP_NO_USER_LOW); |
4538 | } else { |
4539 | slot_free = page_zip_dir_find_free(page_zip, |
4540 | page_offset(free)); |
4541 | ut_a(slot_free < slot_rec); |
4542 | /* Grow the free list by one slot by moving the start. */ |
4543 | slot_free += PAGE_ZIP_DIR_SLOT_SIZE; |
4544 | } |
4545 | |
4546 | if (UNIV_LIKELY(slot_rec > slot_free)) { |
4547 | memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE, |
4548 | slot_free, |
4549 | ulint(slot_rec - slot_free)); |
4550 | } |
4551 | |
4552 | /* Write the entry for the deleted record. |
4553 | The "owned" and "deleted" flags will be cleared. */ |
4554 | mach_write_to_2(slot_free, page_offset(rec)); |
4555 | |
4556 | if (!page_is_leaf(page) || !dict_index_is_clust(index)) { |
4557 | ut_ad(!rec_offs_any_extern(offsets)); |
4558 | goto skip_blobs; |
4559 | } |
4560 | |
4561 | n_ext = rec_offs_n_extern(offsets); |
4562 | if (UNIV_UNLIKELY(n_ext != 0)) { |
4563 | /* Shift and zero fill the array of BLOB pointers. */ |
4564 | ulint blob_no; |
4565 | byte* externs; |
4566 | byte* ext_end; |
4567 | |
4568 | blob_no = page_zip_get_n_prev_extern(page_zip, rec, index); |
4569 | ut_a(blob_no + n_ext <= page_zip->n_blobs); |
4570 | |
4571 | externs = page_zip->data + page_zip_get_size(page_zip) |
4572 | - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) |
4573 | * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE; |
4574 | |
4575 | ext_end = externs - page_zip->n_blobs |
4576 | * BTR_EXTERN_FIELD_REF_SIZE; |
4577 | externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE; |
4578 | |
4579 | page_zip->n_blobs -= static_cast<unsigned>(n_ext); |
4580 | /* Shift and zero fill the array. */ |
4581 | memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end, |
4582 | ulint(page_zip->n_blobs - blob_no) |
4583 | * BTR_EXTERN_FIELD_REF_SIZE); |
4584 | memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE); |
4585 | } |
4586 | |
4587 | skip_blobs: |
4588 | /* The compression algorithm expects info_bits and n_owned |
4589 | to be 0 for deleted records. */ |
4590 | rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ |
4591 | |
4592 | page_zip_clear_rec(page_zip, rec, index, offsets); |
4593 | } |
4594 | |
4595 | /**********************************************************************//** |
4596 | Add a slot to the dense page directory. */ |
4597 | void |
4598 | page_zip_dir_add_slot( |
4599 | /*==================*/ |
4600 | page_zip_des_t* page_zip, /*!< in/out: compressed page */ |
4601 | ulint is_clustered) /*!< in: nonzero for clustered index, |
4602 | zero for others */ |
4603 | { |
4604 | ulint n_dense; |
4605 | byte* dir; |
4606 | byte* stored; |
4607 | |
4608 | ut_ad(page_is_comp(page_zip->data)); |
4609 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4610 | |
4611 | /* Read the old n_dense (n_heap has already been incremented). */ |
4612 | n_dense = page_dir_get_n_heap(page_zip->data) |
4613 | - (PAGE_HEAP_NO_USER_LOW + 1U); |
4614 | |
4615 | dir = page_zip->data + page_zip_get_size(page_zip) |
4616 | - PAGE_ZIP_DIR_SLOT_SIZE * n_dense; |
4617 | |
4618 | if (!page_is_leaf(page_zip->data)) { |
4619 | ut_ad(!page_zip->n_blobs); |
4620 | stored = dir - n_dense * REC_NODE_PTR_SIZE; |
4621 | } else if (is_clustered) { |
4622 | /* Move the BLOB pointer array backwards to make space for the |
4623 | roll_ptr and trx_id columns and the dense directory slot. */ |
4624 | byte* externs; |
4625 | |
4626 | stored = dir - n_dense |
4627 | * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); |
4628 | externs = stored |
4629 | - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; |
4630 | ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE, |
4631 | PAGE_ZIP_CLUST_LEAF_SLOT_SIZE); |
4632 | memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE, |
4633 | externs, ulint(stored - externs)); |
4634 | } else { |
4635 | stored = dir |
4636 | - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; |
4637 | ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE, |
4638 | static_cast<size_t>(PAGE_ZIP_DIR_SLOT_SIZE)); |
4639 | } |
4640 | |
4641 | /* Move the uncompressed area backwards to make space |
4642 | for one directory slot. */ |
4643 | memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, ulint(dir - stored)); |
4644 | } |
4645 | |
4646 | /***********************************************************//** |
4647 | Parses a log record of writing to the header of a page. |
4648 | @return end of log record or NULL */ |
4649 | byte* |
4650 | ( |
4651 | /*========================*/ |
4652 | byte* ptr, /*!< in: redo log buffer */ |
4653 | byte* end_ptr,/*!< in: redo log buffer end */ |
4654 | page_t* page, /*!< in/out: uncompressed page */ |
4655 | page_zip_des_t* page_zip)/*!< in/out: compressed page */ |
4656 | { |
4657 | ulint offset; |
4658 | ulint len; |
4659 | |
4660 | ut_ad(ptr != NULL); |
4661 | ut_ad(end_ptr!= NULL); |
4662 | ut_ad(!page == !page_zip); |
4663 | |
4664 | if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) { |
4665 | |
4666 | return(NULL); |
4667 | } |
4668 | |
4669 | offset = (ulint) *ptr++; |
4670 | len = (ulint) *ptr++; |
4671 | |
4672 | if (len == 0 || offset + len >= PAGE_DATA) { |
4673 | corrupt: |
4674 | recv_sys->found_corrupt_log = TRUE; |
4675 | |
4676 | return(NULL); |
4677 | } |
4678 | |
4679 | if (end_ptr < ptr + len) { |
4680 | |
4681 | return(NULL); |
4682 | } |
4683 | |
4684 | if (page) { |
4685 | if (!page_zip) { |
4686 | |
4687 | goto corrupt; |
4688 | } |
4689 | #ifdef UNIV_ZIP_DEBUG |
4690 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4691 | #endif /* UNIV_ZIP_DEBUG */ |
4692 | |
4693 | memcpy(page + offset, ptr, len); |
4694 | memcpy(page_zip->data + offset, ptr, len); |
4695 | |
4696 | #ifdef UNIV_ZIP_DEBUG |
4697 | ut_a(page_zip_validate(page_zip, page, NULL)); |
4698 | #endif /* UNIV_ZIP_DEBUG */ |
4699 | } |
4700 | |
4701 | return(ptr + len); |
4702 | } |
4703 | |
4704 | /**********************************************************************//** |
4705 | Write a log record of writing to the uncompressed header portion of a page. */ |
4706 | void |
4707 | ( |
4708 | /*======================*/ |
4709 | const byte* data, /*!< in: data on the uncompressed page */ |
4710 | ulint length, /*!< in: length of the data */ |
4711 | mtr_t* mtr) /*!< in: mini-transaction */ |
4712 | { |
4713 | byte* log_ptr = mlog_open(mtr, 11 + 1 + 1); |
4714 | ulint offset = page_offset(data); |
4715 | |
4716 | ut_ad(offset < PAGE_DATA); |
4717 | ut_ad(offset + length < PAGE_DATA); |
4718 | compile_time_assert(PAGE_DATA < 256U); |
4719 | ut_ad(length > 0); |
4720 | ut_ad(length < 256); |
4721 | |
4722 | /* If no logging is requested, we may return now */ |
4723 | if (UNIV_UNLIKELY(!log_ptr)) { |
4724 | |
4725 | return; |
4726 | } |
4727 | |
4728 | log_ptr = mlog_write_initial_log_record_fast( |
4729 | (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr); |
4730 | *log_ptr++ = (byte) offset; |
4731 | *log_ptr++ = (byte) length; |
4732 | mlog_close(mtr, log_ptr); |
4733 | |
4734 | mlog_catenate_string(mtr, data, length); |
4735 | } |
4736 | |
4737 | /**********************************************************************//** |
4738 | Reorganize and compress a page. This is a low-level operation for |
4739 | compressed pages, to be used when page_zip_compress() fails. |
4740 | On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. |
4741 | The function btr_page_reorganize() should be preferred whenever possible. |
4742 | IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a |
4743 | non-clustered index, the caller must update the insert buffer free |
4744 | bits in the same mini-transaction in such a way that the modification |
4745 | will be redo-logged. |
4746 | @return TRUE on success, FALSE on failure; page_zip will be left |
4747 | intact on failure, but page will be overwritten. */ |
4748 | ibool |
4749 | page_zip_reorganize( |
4750 | /*================*/ |
4751 | buf_block_t* block, /*!< in/out: page with compressed page; |
4752 | on the compressed page, in: size; |
4753 | out: data, n_blobs, |
4754 | m_start, m_end, m_nonempty */ |
4755 | dict_index_t* index, /*!< in: index of the B-tree node */ |
4756 | mtr_t* mtr) /*!< in: mini-transaction */ |
4757 | { |
4758 | buf_pool_t* buf_pool = buf_pool_from_block(block); |
4759 | page_zip_des_t* page_zip = buf_block_get_page_zip(block); |
4760 | page_t* page = buf_block_get_frame(block); |
4761 | buf_block_t* temp_block; |
4762 | page_t* temp_page; |
4763 | |
4764 | ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); |
4765 | ut_ad(page_is_comp(page)); |
4766 | ut_ad(!dict_index_is_ibuf(index)); |
4767 | ut_ad(!index->table->is_temporary()); |
4768 | /* Note that page_zip_validate(page_zip, page, index) may fail here. */ |
4769 | UNIV_MEM_ASSERT_RW(page, srv_page_size); |
4770 | UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
4771 | |
4772 | /* Disable logging */ |
4773 | mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
4774 | |
4775 | temp_block = buf_block_alloc(buf_pool); |
4776 | btr_search_drop_page_hash_index(block); |
4777 | temp_page = temp_block->frame; |
4778 | |
4779 | /* Copy the old page to temporary space */ |
4780 | buf_frame_copy(temp_page, page); |
4781 | |
4782 | /* Recreate the page: note that global data on page (possible |
4783 | segment headers, next page-field, etc.) is preserved intact */ |
4784 | |
4785 | page_create(block, mtr, TRUE, dict_index_is_spatial(index)); |
4786 | |
4787 | /* Copy the records from the temporary space to the recreated page; |
4788 | do not copy the lock bits yet */ |
4789 | |
4790 | page_copy_rec_list_end_no_locks(block, temp_block, |
4791 | page_get_infimum_rec(temp_page), |
4792 | index, mtr); |
4793 | |
4794 | /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */ |
4795 | memcpy(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), |
4796 | temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8); |
4797 | /* PAGE_MAX_TRX_ID must be set on secondary index leaf pages. */ |
4798 | ut_ad(dict_index_is_clust(index) || !page_is_leaf(temp_page) |
4799 | || page_get_max_trx_id(page) != 0); |
4800 | /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than |
4801 | clustered index root pages. */ |
4802 | ut_ad(page_get_max_trx_id(page) == 0 |
4803 | || (dict_index_is_clust(index) |
4804 | ? page_is_root(temp_page) |
4805 | : page_is_leaf(temp_page))); |
4806 | |
4807 | /* Restore logging. */ |
4808 | mtr_set_log_mode(mtr, log_mode); |
4809 | |
4810 | if (!page_zip_compress(page_zip, page, index, |
4811 | page_zip_level, NULL, mtr)) { |
4812 | |
4813 | buf_block_free(temp_block); |
4814 | return(FALSE); |
4815 | } |
4816 | |
4817 | lock_move_reorganize_page(block, temp_block); |
4818 | |
4819 | buf_block_free(temp_block); |
4820 | return(TRUE); |
4821 | } |
4822 | |
4823 | /**********************************************************************//** |
4824 | Copy the records of a page byte for byte. Do not copy the page header |
4825 | or trailer, except those B-tree header fields that are directly |
4826 | related to the storage of records. Also copy PAGE_MAX_TRX_ID. |
4827 | NOTE: The caller must update the lock table and the adaptive hash index. */ |
4828 | void |
4829 | page_zip_copy_recs( |
4830 | /*===============*/ |
4831 | page_zip_des_t* page_zip, /*!< out: copy of src_zip |
4832 | (n_blobs, m_start, m_end, |
4833 | m_nonempty, data[0..size-1]) */ |
4834 | page_t* page, /*!< out: copy of src */ |
4835 | const page_zip_des_t* src_zip, /*!< in: compressed page */ |
4836 | const page_t* src, /*!< in: page */ |
4837 | dict_index_t* index, /*!< in: index of the B-tree */ |
4838 | mtr_t* mtr) /*!< in: mini-transaction */ |
4839 | { |
4840 | ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); |
4841 | ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX)); |
4842 | ut_ad(!dict_index_is_ibuf(index)); |
4843 | ut_ad(!index->table->is_temporary()); |
4844 | #ifdef UNIV_ZIP_DEBUG |
4845 | /* The B-tree operations that call this function may set |
4846 | FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag |
4847 | mismatch. A strict page_zip_validate() will be executed later |
4848 | during the B-tree operations. */ |
4849 | ut_a(page_zip_validate_low(src_zip, src, index, TRUE)); |
4850 | #endif /* UNIV_ZIP_DEBUG */ |
4851 | ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip)); |
4852 | if (UNIV_UNLIKELY(src_zip->n_blobs)) { |
4853 | ut_a(page_is_leaf(src)); |
4854 | ut_a(dict_index_is_clust(index)); |
4855 | } |
4856 | |
4857 | UNIV_MEM_ASSERT_W(page, srv_page_size); |
4858 | UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip)); |
4859 | UNIV_MEM_ASSERT_RW(src, srv_page_size); |
4860 | UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip)); |
4861 | |
4862 | /* Copy those B-tree page header fields that are related to |
4863 | the records stored in the page. Also copy the field |
4864 | PAGE_MAX_TRX_ID. Skip the rest of the page header and |
4865 | trailer. On the compressed page, there is no trailer. */ |
4866 | compile_time_assert(PAGE_MAX_TRX_ID + 8 == PAGE_HEADER_PRIV_END); |
4867 | memcpy(PAGE_HEADER + page, PAGE_HEADER + src, |
4868 | PAGE_HEADER_PRIV_END); |
4869 | memcpy(PAGE_DATA + page, PAGE_DATA + src, |
4870 | srv_page_size - PAGE_DATA - FIL_PAGE_DATA_END); |
4871 | memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data, |
4872 | PAGE_HEADER_PRIV_END); |
4873 | memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data, |
4874 | page_zip_get_size(page_zip) - PAGE_DATA); |
4875 | |
4876 | if (dict_index_is_clust(index)) { |
4877 | /* Reset the PAGE_ROOT_AUTO_INC field when copying |
4878 | from a root page. */ |
4879 | memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page, 0, 8); |
4880 | memset(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page_zip->data, |
4881 | 0, 8); |
4882 | } else { |
4883 | /* The PAGE_MAX_TRX_ID must be nonzero on leaf pages |
4884 | of secondary indexes, and 0 on others. */ |
4885 | ut_ad(!page_is_leaf(src) == !page_get_max_trx_id(src)); |
4886 | } |
4887 | |
4888 | /* Copy all fields of src_zip to page_zip, except the pointer |
4889 | to the compressed data page. */ |
4890 | { |
4891 | page_zip_t* data = page_zip->data; |
4892 | memcpy(page_zip, src_zip, sizeof *page_zip); |
4893 | page_zip->data = data; |
4894 | } |
4895 | ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index)) |
4896 | + page_zip->m_end < page_zip_get_size(page_zip)); |
4897 | |
4898 | if (!page_is_leaf(src) |
4899 | && UNIV_UNLIKELY(!page_has_prev(src)) |
4900 | && UNIV_LIKELY(page_has_prev(page))) { |
4901 | /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */ |
4902 | ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM, |
4903 | TRUE); |
4904 | if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) { |
4905 | rec_t* rec = page + offs; |
4906 | ut_a(rec[-REC_N_NEW_EXTRA_BYTES] |
4907 | & REC_INFO_MIN_REC_FLAG); |
4908 | rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG; |
4909 | } |
4910 | } |
4911 | |
4912 | #ifdef UNIV_ZIP_DEBUG |
4913 | ut_a(page_zip_validate(page_zip, page, index)); |
4914 | #endif /* UNIV_ZIP_DEBUG */ |
4915 | page_zip_compress_write_log(page_zip, page, index, mtr); |
4916 | } |
4917 | |
4918 | /**********************************************************************//** |
4919 | Parses a log record of compressing an index page. |
4920 | @return end of log record or NULL */ |
4921 | byte* |
4922 | page_zip_parse_compress( |
4923 | /*====================*/ |
4924 | byte* ptr, /*!< in: buffer */ |
4925 | byte* end_ptr,/*!< in: buffer end */ |
4926 | page_t* page, /*!< out: uncompressed page */ |
4927 | page_zip_des_t* page_zip)/*!< out: compressed page */ |
4928 | { |
4929 | ulint size; |
4930 | ulint trailer_size; |
4931 | |
4932 | ut_ad(ptr != NULL); |
4933 | ut_ad(end_ptr!= NULL); |
4934 | ut_ad(!page == !page_zip); |
4935 | |
4936 | if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) { |
4937 | |
4938 | return(NULL); |
4939 | } |
4940 | |
4941 | size = mach_read_from_2(ptr); |
4942 | ptr += 2; |
4943 | trailer_size = mach_read_from_2(ptr); |
4944 | ptr += 2; |
4945 | |
4946 | if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) { |
4947 | |
4948 | return(NULL); |
4949 | } |
4950 | |
4951 | if (page) { |
4952 | if (!page_zip || page_zip_get_size(page_zip) < size) { |
4953 | corrupt: |
4954 | recv_sys->found_corrupt_log = TRUE; |
4955 | |
4956 | return(NULL); |
4957 | } |
4958 | |
4959 | memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4); |
4960 | memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4); |
4961 | memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size); |
4962 | memset(page_zip->data + FIL_PAGE_TYPE + size, 0, |
4963 | page_zip_get_size(page_zip) - trailer_size |
4964 | - (FIL_PAGE_TYPE + size)); |
4965 | memcpy(page_zip->data + page_zip_get_size(page_zip) |
4966 | - trailer_size, ptr + 8 + size, trailer_size); |
4967 | |
4968 | if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page, |
4969 | TRUE))) { |
4970 | |
4971 | goto corrupt; |
4972 | } |
4973 | } |
4974 | |
4975 | return(ptr + 8 + size + trailer_size); |
4976 | } |
4977 | #endif /* !UNIV_INNOCHECKSUM */ |
4978 | |
4979 | /** Calculate the compressed page checksum. |
4980 | @param[in] data compressed page |
4981 | @param[in] size size of compressed page |
4982 | @param[in] algo algorithm to use |
4983 | @param[in] use_legacy_big_endian only used if algo is |
4984 | SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true |
4985 | then use big endian byteorder when converting byte strings to integers. |
4986 | SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true |
4987 | then use big endian byteorder when converting byte strings to integers. |
4988 | @return page checksum */ |
4989 | uint32_t |
4990 | page_zip_calc_checksum( |
4991 | const void* data, |
4992 | ulint size, |
4993 | srv_checksum_algorithm_t algo, |
4994 | bool use_legacy_big_endian /* = false */) |
4995 | { |
4996 | uLong adler; |
4997 | const Bytef* s = static_cast<const byte*>(data); |
4998 | |
4999 | /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN, |
5000 | and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */ |
5001 | |
5002 | switch (algo) { |
5003 | case SRV_CHECKSUM_ALGORITHM_CRC32: |
5004 | case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: |
5005 | { |
5006 | ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); |
5007 | |
5008 | ut_crc32_func_t crc32_func = use_legacy_big_endian |
5009 | ? ut_crc32_legacy_big_endian |
5010 | : ut_crc32; |
5011 | |
5012 | const uint32_t crc32 |
5013 | = crc32_func( |
5014 | s + FIL_PAGE_OFFSET, |
5015 | FIL_PAGE_LSN - FIL_PAGE_OFFSET) |
5016 | ^ crc32_func( |
5017 | s + FIL_PAGE_TYPE, 2) |
5018 | ^ crc32_func( |
5019 | s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, |
5020 | size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); |
5021 | |
5022 | return(crc32); |
5023 | } |
5024 | case SRV_CHECKSUM_ALGORITHM_INNODB: |
5025 | case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: |
5026 | ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); |
5027 | |
5028 | adler = adler32(0L, s + FIL_PAGE_OFFSET, |
5029 | FIL_PAGE_LSN - FIL_PAGE_OFFSET); |
5030 | adler = adler32(adler, s + FIL_PAGE_TYPE, 2); |
5031 | adler = adler32( |
5032 | adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, |
5033 | static_cast<uInt>(size) |
5034 | - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); |
5035 | |
5036 | return(uint32_t(adler)); |
5037 | case SRV_CHECKSUM_ALGORITHM_NONE: |
5038 | case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: |
5039 | return(BUF_NO_CHECKSUM_MAGIC); |
5040 | /* no default so the compiler will emit a warning if new enum |
5041 | is added and not handled here */ |
5042 | } |
5043 | |
5044 | ut_error; |
5045 | return(0); |
5046 | } |
5047 | |
5048 | /**********************************************************************//** |
5049 | Verify a compressed page's checksum. |
5050 | @return TRUE if the stored checksum is valid according to the value of |
5051 | innodb_checksum_algorithm */ |
5052 | ibool |
5053 | page_zip_verify_checksum( |
5054 | /*=====================*/ |
5055 | const void* data, /*!< in: compressed page */ |
5056 | ulint size) /*!< in: size of compressed page */ |
5057 | { |
5058 | ib_uint32_t stored; |
5059 | ib_uint32_t calc; |
5060 | |
5061 | stored = static_cast<ib_uint32_t>(mach_read_from_4( |
5062 | static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM)); |
5063 | |
5064 | ulint page_no MY_ATTRIBUTE((unused)) = |
5065 | mach_read_from_4(static_cast<const unsigned char*> |
5066 | (data) + FIL_PAGE_OFFSET); |
5067 | ulint space_id MY_ATTRIBUTE((unused)) = |
5068 | mach_read_from_4(static_cast<const unsigned char*> |
5069 | (data) + FIL_PAGE_SPACE_ID); |
5070 | const page_id_t page_id(space_id, page_no); |
5071 | |
5072 | compile_time_assert(!(FIL_PAGE_LSN % 8)); |
5073 | |
5074 | /* Check if page is empty */ |
5075 | if (stored == 0 |
5076 | && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>( |
5077 | data) |
5078 | + FIL_PAGE_LSN) == 0) { |
5079 | /* make sure that the page is really empty */ |
5080 | #ifdef UNIV_INNOCHECKSUM |
5081 | ulint i; |
5082 | for (i = 0; i < size; i++) { |
5083 | if (*((const char*) data + i) != 0) |
5084 | break; |
5085 | } |
5086 | if (i >= size) { |
5087 | if (log_file) { |
5088 | fprintf(log_file, "Page::%llu is empty and" |
5089 | " uncorrupted\n" , cur_page_num); |
5090 | } |
5091 | |
5092 | return(TRUE); |
5093 | } |
5094 | #else |
5095 | for (ulint i = 0; i < size; i++) { |
5096 | if (*((const char*) data + i) != 0) { |
5097 | return(FALSE); |
5098 | } |
5099 | } |
5100 | /* Empty page */ |
5101 | return(TRUE); |
5102 | #endif /* UNIV_INNOCHECKSUM */ |
5103 | } |
5104 | |
5105 | const srv_checksum_algorithm_t curr_algo = |
5106 | static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); |
5107 | |
5108 | if (curr_algo == SRV_CHECKSUM_ALGORITHM_NONE) { |
5109 | return(TRUE); |
5110 | } |
5111 | |
5112 | calc = static_cast<ib_uint32_t>(page_zip_calc_checksum( |
5113 | data, size, curr_algo)); |
5114 | |
5115 | #ifdef UNIV_INNOCHECKSUM |
5116 | if (log_file) { |
5117 | fprintf(log_file, "page::%llu;" |
5118 | " %s checksum: calculated = %u;" |
5119 | " recorded = %u\n" , cur_page_num, |
5120 | buf_checksum_algorithm_name( |
5121 | static_cast<srv_checksum_algorithm_t>( |
5122 | srv_checksum_algorithm)), |
5123 | calc, stored); |
5124 | } |
5125 | |
5126 | if (!strict_verify) { |
5127 | |
5128 | const uint32_t crc32 = page_zip_calc_checksum( |
5129 | data, size, SRV_CHECKSUM_ALGORITHM_CRC32); |
5130 | |
5131 | if (log_file) { |
5132 | fprintf(log_file, "page::%llu: crc32 checksum:" |
5133 | " calculated = %u; recorded = %u\n" , |
5134 | cur_page_num, crc32, stored); |
5135 | fprintf(log_file, "page::%llu: none checksum:" |
5136 | " calculated = %lu; recorded = %u\n" , |
5137 | cur_page_num, BUF_NO_CHECKSUM_MAGIC, stored); |
5138 | } |
5139 | } |
5140 | #endif /* UNIV_INNOCHECKSUM */ |
5141 | |
5142 | if (stored == calc) { |
5143 | return(TRUE); |
5144 | } |
5145 | |
5146 | bool legacy_checksum_checked = false; |
5147 | |
5148 | switch (curr_algo) { |
5149 | case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: |
5150 | case SRV_CHECKSUM_ALGORITHM_CRC32: { |
5151 | |
5152 | if (stored == BUF_NO_CHECKSUM_MAGIC) { |
5153 | #ifndef UNIV_INNOCHECKSUM |
5154 | if (curr_algo |
5155 | == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { |
5156 | page_warn_strict_checksum( |
5157 | curr_algo, |
5158 | SRV_CHECKSUM_ALGORITHM_NONE, |
5159 | page_id); |
5160 | } |
5161 | #endif /* UNIV_INNOCHECKSUM */ |
5162 | |
5163 | return(TRUE); |
5164 | } |
5165 | |
5166 | /* We need to check whether the stored checksum matches legacy |
5167 | big endian checksum or Innodb checksum. We optimize the order |
5168 | based on earlier results. if earlier we have found pages |
5169 | matching legacy big endian checksum, we try to match it first. |
5170 | Otherwise we check innodb checksum first. */ |
5171 | if (legacy_big_endian_checksum) { |
5172 | const uint32_t calculated = |
5173 | page_zip_calc_checksum(data, size, curr_algo, true); |
5174 | if (stored == calculated) { |
5175 | |
5176 | return(TRUE); |
5177 | } |
5178 | legacy_checksum_checked = true; |
5179 | } |
5180 | |
5181 | uint32_t calculated = |
5182 | page_zip_calc_checksum(data, size, SRV_CHECKSUM_ALGORITHM_INNODB); |
5183 | |
5184 | if (stored == calculated) { |
5185 | |
5186 | #ifndef UNIV_INNOCHECKSUM |
5187 | if (curr_algo |
5188 | == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { |
5189 | page_warn_strict_checksum( |
5190 | curr_algo, |
5191 | SRV_CHECKSUM_ALGORITHM_INNODB, |
5192 | page_id); |
5193 | } |
5194 | #endif /* UNIV_INNOCHECKSUM */ |
5195 | |
5196 | return(TRUE); |
5197 | } |
5198 | |
5199 | calculated = page_zip_calc_checksum( |
5200 | data, size, curr_algo, true); |
5201 | |
5202 | /* If legacy checksum is not checked, do it now. */ |
5203 | if ((legacy_checksum_checked |
5204 | && stored == calculated)) { |
5205 | legacy_big_endian_checksum = true; |
5206 | return(TRUE); |
5207 | } |
5208 | |
5209 | break; |
5210 | } |
5211 | case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: |
5212 | case SRV_CHECKSUM_ALGORITHM_INNODB: { |
5213 | |
5214 | if (stored == BUF_NO_CHECKSUM_MAGIC) { |
5215 | #ifndef UNIV_INNOCHECKSUM |
5216 | if (curr_algo |
5217 | == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { |
5218 | page_warn_strict_checksum( |
5219 | curr_algo, |
5220 | SRV_CHECKSUM_ALGORITHM_NONE, |
5221 | page_id); |
5222 | } |
5223 | #endif /* UNIV_INNOCHECKSUM */ |
5224 | |
5225 | return(TRUE); |
5226 | } |
5227 | |
5228 | const uint32_t calculated = page_zip_calc_checksum( |
5229 | data, size, SRV_CHECKSUM_ALGORITHM_CRC32); |
5230 | uint32_t calculated1; |
5231 | |
5232 | if (stored == calculated |
5233 | || stored == (calculated1 = |
5234 | page_zip_calc_checksum(data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true)) |
5235 | ) { |
5236 | #ifndef UNIV_INNOCHECKSUM |
5237 | if (curr_algo |
5238 | == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { |
5239 | page_warn_strict_checksum( |
5240 | curr_algo, |
5241 | SRV_CHECKSUM_ALGORITHM_CRC32, |
5242 | page_id); |
5243 | } |
5244 | #endif /* UNIV_INNOCHECKSUM */ |
5245 | return(TRUE); |
5246 | } |
5247 | |
5248 | break; |
5249 | } |
5250 | case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: { |
5251 | |
5252 | uint32_t calculated = page_zip_calc_checksum( |
5253 | data, size, SRV_CHECKSUM_ALGORITHM_CRC32); |
5254 | const uint32_t calculated1 = page_zip_calc_checksum( |
5255 | data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true); |
5256 | |
5257 | if (stored == calculated |
5258 | || stored == calculated1) { |
5259 | #ifndef UNIV_INNOCHECKSUM |
5260 | page_warn_strict_checksum( |
5261 | curr_algo, |
5262 | SRV_CHECKSUM_ALGORITHM_CRC32, |
5263 | page_id); |
5264 | #endif /* UNIV_INNOCHECKSUM */ |
5265 | return(TRUE); |
5266 | } |
5267 | |
5268 | calculated = page_zip_calc_checksum( |
5269 | data, size, SRV_CHECKSUM_ALGORITHM_INNODB); |
5270 | |
5271 | if (stored == calculated) { |
5272 | |
5273 | #ifndef UNIV_INNOCHECKSUM |
5274 | page_warn_strict_checksum( |
5275 | curr_algo, |
5276 | SRV_CHECKSUM_ALGORITHM_INNODB, |
5277 | page_id); |
5278 | #endif /* UNIV_INNOCHECKSUM */ |
5279 | return(TRUE); |
5280 | } |
5281 | |
5282 | break; |
5283 | } |
5284 | case SRV_CHECKSUM_ALGORITHM_NONE: |
5285 | ut_error; |
5286 | /* no default so the compiler will emit a warning if new enum |
5287 | is added and not handled here */ |
5288 | } |
5289 | |
5290 | return(FALSE); |
5291 | } |
5292 | |