1/*****************************************************************************
2
3Copyright (c) 2012, 2018, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file row/row0import.cc
22Import a tablespace to a running instance.
23
24Created 2012-02-08 by Sunny Bains.
25*******************************************************/
26
27#include "ha_prototypes.h"
28
29#include "row0import.h"
30#include "btr0pcur.h"
31#include "que0que.h"
32#include "dict0boot.h"
33#include "ibuf0ibuf.h"
34#include "pars0pars.h"
35#include "row0sel.h"
36#include "row0mysql.h"
37#include "srv0start.h"
38#include "row0quiesce.h"
39#include "fil0pagecompress.h"
40#include "trx0undo.h"
41#include "ut0new.h"
42
43#include <vector>
44
45#ifdef HAVE_MY_AES_H
46#include <my_aes.h>
47#endif
48
49/** The size of the buffer to use for IO.
50@param n physical page size
51@return number of pages */
52#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
53
54/** For gathering stats on records during phase I */
55struct row_stats_t {
56 ulint m_n_deleted; /*!< Number of deleted records
57 found in the index */
58
59 ulint m_n_purged; /*!< Number of records purged
60 optimisatically */
61
62 ulint m_n_rows; /*!< Number of rows */
63
64 ulint m_n_purge_failed; /*!< Number of deleted rows
65 that could not be purged */
66};
67
68/** Index information required by IMPORT. */
69struct row_index_t {
70 index_id_t m_id; /*!< Index id of the table
71 in the exporting server */
72 byte* m_name; /*!< Index name */
73
74 ulint m_space; /*!< Space where it is placed */
75
76 ulint m_page_no; /*!< Root page number */
77
78 ulint m_type; /*!< Index type */
79
80 ulint m_trx_id_offset; /*!< Relevant only for clustered
81 indexes, offset of transaction
82 id system column */
83
84 ulint m_n_user_defined_cols; /*!< User defined columns */
85
86 ulint m_n_uniq; /*!< Number of columns that can
87 uniquely identify the row */
88
89 ulint m_n_nullable; /*!< Number of nullable
90 columns */
91
92 ulint m_n_fields; /*!< Total number of fields */
93
94 dict_field_t* m_fields; /*!< Index fields */
95
96 const dict_index_t*
97 m_srv_index; /*!< Index instance in the
98 importing server */
99
100 row_stats_t m_stats; /*!< Statistics gathered during
101 the import phase */
102
103};
104
105/** Meta data required by IMPORT. */
106struct row_import {
107 row_import() UNIV_NOTHROW
108 :
109 m_table(),
110 m_version(),
111 m_hostname(),
112 m_table_name(),
113 m_autoinc(),
114 m_page_size(0, 0, false),
115 m_flags(),
116 m_n_cols(),
117 m_cols(),
118 m_col_names(),
119 m_n_indexes(),
120 m_indexes(),
121 m_missing(true) {}
122
123 ~row_import() UNIV_NOTHROW;
124
125 /** Find the index entry in in the indexes array.
126 @param name index name
127 @return instance if found else 0. */
128 row_index_t* get_index(const char* name) const UNIV_NOTHROW;
129
130 /** Get the number of rows in the index.
131 @param name index name
132 @return number of rows (doesn't include delete marked rows). */
133 ulint get_n_rows(const char* name) const UNIV_NOTHROW;
134
135 /** Find the ordinal value of the column name in the cfg table columns.
136 @param name of column to look for.
137 @return ULINT_UNDEFINED if not found. */
138 ulint find_col(const char* name) const UNIV_NOTHROW;
139
140 /** Get the number of rows for which purge failed during the
141 convert phase.
142 @param name index name
143 @return number of rows for which purge failed. */
144 ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
145
146 /** Check if the index is clean. ie. no delete-marked records
147 @param name index name
148 @return true if index needs to be purged. */
149 bool requires_purge(const char* name) const UNIV_NOTHROW
150 {
151 return(get_n_purge_failed(name) > 0);
152 }
153
154 /** Set the index root <space, pageno> using the index name */
155 void set_root_by_name() UNIV_NOTHROW;
156
157 /** Set the index root <space, pageno> using a heuristic
158 @return DB_SUCCESS or error code */
159 dberr_t set_root_by_heuristic() UNIV_NOTHROW;
160
161 /** Check if the index schema that was read from the .cfg file
162 matches the in memory index definition.
163 Note: It will update row_import_t::m_srv_index to map the meta-data
164 read from the .cfg file to the server index instance.
165 @return DB_SUCCESS or error code. */
166 dberr_t match_index_columns(
167 THD* thd,
168 const dict_index_t* index) UNIV_NOTHROW;
169
170 /** Check if the table schema that was read from the .cfg file
171 matches the in memory table definition.
172 @param thd MySQL session variable
173 @return DB_SUCCESS or error code. */
174 dberr_t match_table_columns(
175 THD* thd) UNIV_NOTHROW;
176
177 /** Check if the table (and index) schema that was read from the
178 .cfg file matches the in memory table definition.
179 @param thd MySQL session variable
180 @return DB_SUCCESS or error code. */
181 dberr_t match_schema(
182 THD* thd) UNIV_NOTHROW;
183
184 dict_table_t* m_table; /*!< Table instance */
185
186 ulint m_version; /*!< Version of config file */
187
188 byte* m_hostname; /*!< Hostname where the
189 tablespace was exported */
190 byte* m_table_name; /*!< Exporting instance table
191 name */
192
193 ib_uint64_t m_autoinc; /*!< Next autoinc value */
194
195 page_size_t m_page_size; /*!< Tablespace page size */
196
197 ulint m_flags; /*!< Table flags */
198
199 ulint m_n_cols; /*!< Number of columns in the
200 meta-data file */
201
202 dict_col_t* m_cols; /*!< Column data */
203
204 byte** m_col_names; /*!< Column names, we store the
205 column naems separately becuase
206 there is no field to store the
207 value in dict_col_t */
208
209 ulint m_n_indexes; /*!< Number of indexes,
210 including clustered index */
211
212 row_index_t* m_indexes; /*!< Index meta data */
213
214 bool m_missing; /*!< true if a .cfg file was
215 found and was readable */
216};
217
218/** Use the page cursor to iterate over records in a block. */
219class RecIterator {
220public:
221 /** Default constructor */
222 RecIterator() UNIV_NOTHROW
223 {
224 memset(&m_cur, 0x0, sizeof(m_cur));
225 }
226
227 /** Position the cursor on the first user record. */
228 void open(buf_block_t* block) UNIV_NOTHROW
229 {
230 page_cur_set_before_first(block, &m_cur);
231
232 if (!end()) {
233 next();
234 }
235 }
236
237 /** Move to the next record. */
238 void next() UNIV_NOTHROW
239 {
240 page_cur_move_to_next(&m_cur);
241 }
242
243 /**
244 @return the current record */
245 rec_t* current() UNIV_NOTHROW
246 {
247 ut_ad(!end());
248 return(page_cur_get_rec(&m_cur));
249 }
250
251 /**
252 @return true if cursor is at the end */
253 bool end() UNIV_NOTHROW
254 {
255 return(page_cur_is_after_last(&m_cur) == TRUE);
256 }
257
258 /** Remove the current record
259 @return true on success */
260 bool remove(
261 const dict_index_t* index,
262 page_zip_des_t* page_zip,
263 ulint* offsets) UNIV_NOTHROW
264 {
265 /* We can't end up with an empty page unless it is root. */
266 if (page_get_n_recs(m_cur.block->frame) <= 1) {
267 return(false);
268 }
269
270 return(page_delete_rec(index, &m_cur, page_zip, offsets));
271 }
272
273private:
274 page_cur_t m_cur;
275};
276
277/** Class that purges delete marked reocords from indexes, both secondary
278and cluster. It does a pessimistic delete. This should only be done if we
279couldn't purge the delete marked reocrds during Phase I. */
280class IndexPurge {
281public:
282 /** Constructor
283 @param trx the user transaction covering the import tablespace
284 @param index to be imported
285 @param space_id space id of the tablespace */
286 IndexPurge(
287 trx_t* trx,
288 dict_index_t* index) UNIV_NOTHROW
289 :
290 m_trx(trx),
291 m_index(index),
292 m_n_rows(0)
293 {
294 ib::info() << "Phase II - Purge records from index "
295 << index->name;
296 }
297
298 /** Descructor */
299 ~IndexPurge() UNIV_NOTHROW { }
300
301 /** Purge delete marked records.
302 @return DB_SUCCESS or error code. */
303 dberr_t garbage_collect() UNIV_NOTHROW;
304
305 /** The number of records that are not delete marked.
306 @return total records in the index after purge */
307 ulint get_n_rows() const UNIV_NOTHROW
308 {
309 return(m_n_rows);
310 }
311
312private:
313 /** Begin import, position the cursor on the first record. */
314 void open() UNIV_NOTHROW;
315
316 /** Close the persistent curosr and commit the mini-transaction. */
317 void close() UNIV_NOTHROW;
318
319 /** Position the cursor on the next record.
320 @return DB_SUCCESS or error code */
321 dberr_t next() UNIV_NOTHROW;
322
323 /** Store the persistent cursor position and reopen the
324 B-tree cursor in BTR_MODIFY_TREE mode, because the
325 tree structure may be changed during a pessimistic delete. */
326 void purge_pessimistic_delete() UNIV_NOTHROW;
327
328 /** Purge delete-marked records.
329 @param offsets current row offsets. */
330 void purge() UNIV_NOTHROW;
331
332protected:
333 // Disable copying
334 IndexPurge();
335 IndexPurge(const IndexPurge&);
336 IndexPurge &operator=(const IndexPurge&);
337
338private:
339 trx_t* m_trx; /*!< User transaction */
340 mtr_t m_mtr; /*!< Mini-transaction */
341 btr_pcur_t m_pcur; /*!< Persistent cursor */
342 dict_index_t* m_index; /*!< Index to be processed */
343 ulint m_n_rows; /*!< Records in index */
344};
345
346/** Functor that is called for each physical page that is read from the
347tablespace file. */
348class AbstractCallback
349{
350public:
351 /** Constructor
352 @param trx covering transaction */
353 AbstractCallback(trx_t* trx, ulint space_id)
354 :
355 m_page_size(0, 0, false),
356 m_trx(trx),
357 m_space(space_id),
358 m_xdes(),
359 m_xdes_page_no(ULINT_UNDEFINED),
360 m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
361
362 /** Free any extent descriptor instance */
363 virtual ~AbstractCallback()
364 {
365 UT_DELETE_ARRAY(m_xdes);
366 }
367
368 /** Determine the page size to use for traversing the tablespace
369 @param file_size size of the tablespace file in bytes
370 @param block contents of the first page in the tablespace file.
371 @retval DB_SUCCESS or error code. */
372 virtual dberr_t init(
373 os_offset_t file_size,
374 const buf_block_t* block) UNIV_NOTHROW;
375
376 /** @return true if compressed table. */
377 bool is_compressed_table() const UNIV_NOTHROW
378 {
379 return(get_page_size().is_compressed());
380 }
381
382 /** @return the tablespace flags */
383 ulint get_space_flags() const
384 {
385 return(m_space_flags);
386 }
387
388 /**
389 Set the name of the physical file and the file handle that is used
390 to open it for the file that is being iterated over.
391 @param filename the physical name of the tablespace file
392 @param file OS file handle */
393 void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
394 {
395 m_file = file;
396 m_filepath = filename;
397 }
398
399 const page_size_t& get_page_size() const { return m_page_size; }
400
401 const char* filename() const { return m_filepath; }
402
403 /**
404 Called for every page in the tablespace. If the page was not
405 updated then its state must be set to BUF_PAGE_NOT_USED. For
406 compressed tables the page descriptor memory will be at offset:
407 block->frame + srv_page_size;
408 @param offset - physical offset within the file
409 @param block - block read from file, note it is not from the buffer pool
410 @retval DB_SUCCESS or error code. */
411 virtual dberr_t operator()(
412 os_offset_t offset,
413 buf_block_t* block) UNIV_NOTHROW = 0;
414
415 /** @return the tablespace identifier */
416 ulint get_space_id() const { return m_space; }
417
418 bool is_interrupted() const { return trx_is_interrupted(m_trx); }
419
420 /**
421 Get the data page depending on the table type, compressed or not.
422 @param block - block read from disk
423 @retval the buffer frame */
424 static byte* get_frame(const buf_block_t* block)
425 {
426 return block->page.zip.data
427 ? block->page.zip.data : block->frame;
428 }
429
430protected:
431 /** Get the physical offset of the extent descriptor within the page.
432 @param page_no page number of the extent descriptor
433 @param page contents of the page containing the extent descriptor.
434 @return the start of the xdes array in a page */
435 const xdes_t* xdes(
436 ulint page_no,
437 const page_t* page) const UNIV_NOTHROW
438 {
439 ulint offset;
440
441 offset = xdes_calc_descriptor_index(get_page_size(), page_no);
442
443 return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
444 }
445
446 /** Set the current page directory (xdes). If the extent descriptor is
447 marked as free then free the current extent descriptor and set it to
448 0. This implies that all pages that are covered by this extent
449 descriptor are also freed.
450
451 @param page_no offset of page within the file
452 @param page page contents
453 @return DB_SUCCESS or error code. */
454 dberr_t set_current_xdes(
455 ulint page_no,
456 const page_t* page) UNIV_NOTHROW
457 {
458 m_xdes_page_no = page_no;
459
460 UT_DELETE_ARRAY(m_xdes);
461 m_xdes = NULL;
462
463 ulint state;
464 const xdes_t* xdesc = page + XDES_ARR_OFFSET;
465
466 state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES);
467
468 if (state != XDES_FREE) {
469
470 m_xdes = UT_NEW_ARRAY_NOKEY(xdes_t,
471 m_page_size.physical());
472
473 /* Trigger OOM */
474 DBUG_EXECUTE_IF(
475 "ib_import_OOM_13",
476 UT_DELETE_ARRAY(m_xdes);
477 m_xdes = NULL;
478 );
479
480 if (m_xdes == NULL) {
481 return(DB_OUT_OF_MEMORY);
482 }
483
484 memcpy(m_xdes, page, m_page_size.physical());
485 }
486
487 return(DB_SUCCESS);
488 }
489
490 /** Check if the page is marked as free in the extent descriptor.
491 @param page_no page number to check in the extent descriptor.
492 @return true if the page is marked as free */
493 bool is_free(ulint page_no) const UNIV_NOTHROW
494 {
495 ut_a(xdes_calc_descriptor_page(get_page_size(), page_no)
496 == m_xdes_page_no);
497
498 if (m_xdes != 0) {
499 const xdes_t* xdesc = xdes(page_no, m_xdes);
500 ulint pos = page_no % FSP_EXTENT_SIZE;
501
502 return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos));
503 }
504
505 /* If the current xdes was free, the page must be free. */
506 return(true);
507 }
508
509protected:
510 /** The tablespace page size. */
511 page_size_t m_page_size;
512
513 /** File handle to the tablespace */
514 pfs_os_file_t m_file;
515
516 /** Physical file path. */
517 const char* m_filepath;
518
519 /** Covering transaction. */
520 trx_t* m_trx;
521
522 /** Space id of the file being iterated over. */
523 ulint m_space;
524
525 /** Minimum page number for which the free list has not been
526 initialized: the pages >= this limit are, by definition, free;
527 note that in a single-table tablespace where size < 64 pages,
528 this number is 64, i.e., we have initialized the space about
529 the first extent, but have not physically allocted those pages
530 to the file. @see FSP_LIMIT. */
531 ulint m_free_limit;
532
533 /** Current size of the space in pages */
534 ulint m_size;
535
536 /** Current extent descriptor page */
537 xdes_t* m_xdes;
538
539 /** Physical page offset in the file of the extent descriptor */
540 ulint m_xdes_page_no;
541
542 /** Flags value read from the header page */
543 ulint m_space_flags;
544};
545
546/** Determine the page size to use for traversing the tablespace
547@param file_size size of the tablespace file in bytes
548@param block contents of the first page in the tablespace file.
549@retval DB_SUCCESS or error code. */
550dberr_t
551AbstractCallback::init(
552 os_offset_t file_size,
553 const buf_block_t* block) UNIV_NOTHROW
554{
555 const page_t* page = block->frame;
556
557 m_space_flags = fsp_header_get_flags(page);
558 if (!fsp_flags_is_valid(m_space_flags, true)) {
559 ulint cflags = fsp_flags_convert_from_101(m_space_flags);
560 if (cflags == ULINT_UNDEFINED) {
561 ib::error() << "Invalid FSP_SPACE_FLAGS="
562 << ib::hex(m_space_flags);
563 return(DB_CORRUPTION);
564 }
565 m_space_flags = cflags;
566 }
567
568 /* Clear the DATA_DIR flag, which is basically garbage. */
569 m_space_flags &= ~(1U << FSP_FLAGS_POS_RESERVED);
570 m_page_size.copy_from(page_size_t(m_space_flags));
571
572 if (!is_compressed_table() && !m_page_size.equals_to(univ_page_size)) {
573
574 ib::error() << "Page size " << m_page_size.physical()
575 << " of ibd file is not the same as the server page"
576 " size " << srv_page_size;
577
578 return(DB_CORRUPTION);
579
580 } else if (file_size % m_page_size.physical() != 0) {
581
582 ib::error() << "File size " << file_size << " is not a"
583 " multiple of the page size "
584 << m_page_size.physical();
585
586 return(DB_CORRUPTION);
587 }
588
589 m_size = mach_read_from_4(page + FSP_SIZE);
590 m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
591 if (m_space == ULINT_UNDEFINED) {
592 m_space = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID
593 + page);
594 }
595
596 return set_current_xdes(0, page);
597}
598
599/**
600Try and determine the index root pages by checking if the next/prev
601pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
602struct FetchIndexRootPages : public AbstractCallback {
603
604 /** Index information gathered from the .ibd file. */
605 struct Index {
606
607 Index(index_id_t id, ulint page_no)
608 :
609 m_id(id),
610 m_page_no(page_no) { }
611
612 index_id_t m_id; /*!< Index id */
613 ulint m_page_no; /*!< Root page number */
614 };
615
616 typedef std::vector<Index, ut_allocator<Index> > Indexes;
617
618 /** Constructor
619 @param trx covering (user) transaction
620 @param table table definition in server .*/
621 FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
622 :
623 AbstractCallback(trx, ULINT_UNDEFINED),
624 m_table(table) UNIV_NOTHROW { }
625
626 /** Destructor */
627 virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
628
629 /** Called for each block as it is read from the file.
630 @param offset physical offset in the file
631 @param block block to convert, it is not from the buffer pool.
632 @retval DB_SUCCESS or error code. */
633 virtual dberr_t operator() (
634 os_offset_t offset,
635 buf_block_t* block) UNIV_NOTHROW;
636
637 /** Update the import configuration that will be used to import
638 the tablespace. */
639 dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
640
641 /** Table definition in server. */
642 const dict_table_t* m_table;
643
644 /** Index information */
645 Indexes m_indexes;
646};
647
648/** Called for each block as it is read from the file. Check index pages to
649determine the exact row format. We can't get that from the tablespace
650header flags alone.
651
652@param offset physical offset in the file
653@param block block to convert, it is not from the buffer pool.
654@retval DB_SUCCESS or error code. */
655dberr_t
656FetchIndexRootPages::operator() (
657 os_offset_t offset,
658 buf_block_t* block) UNIV_NOTHROW
659{
660 if (is_interrupted()) return DB_INTERRUPTED;
661
662 const page_t* page = get_frame(block);
663
664 ulint page_type = fil_page_get_type(page);
665
666 if (block->page.id.page_no() * m_page_size.physical() != offset) {
667
668 ib::error() << "Page offset doesn't match file offset:"
669 " page offset: " << block->page.id.page_no()
670 << ", file offset: "
671 << (offset / m_page_size.physical());
672
673 return DB_CORRUPTION;
674 } else if (page_type == FIL_PAGE_TYPE_XDES) {
675 return set_current_xdes(block->page.id.page_no(), page);
676 } else if (fil_page_index_page_check(page)
677 && !is_free(block->page.id.page_no())
678 && page_is_root(page)) {
679
680 index_id_t id = btr_page_get_index_id(page);
681
682 m_indexes.push_back(Index(id, block->page.id.page_no()));
683
684 if (m_indexes.size() == 1) {
685 /* Check that the tablespace flags match the table flags. */
686 ulint expected = dict_tf_to_fsp_flags(m_table->flags);
687 if (!fsp_flags_match(expected, m_space_flags)) {
688 ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
689 ER_TABLE_SCHEMA_MISMATCH,
690 "Expected FSP_SPACE_FLAGS=0x%x, .ibd "
691 "file contains 0x%x.",
692 unsigned(expected),
693 unsigned(m_space_flags));
694 return(DB_CORRUPTION);
695 }
696 }
697 }
698
699 return DB_SUCCESS;
700}
701
702/**
703Update the import configuration that will be used to import the tablespace.
704@return error code or DB_SUCCESS */
705dberr_t
706FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
707{
708 Indexes::const_iterator end = m_indexes.end();
709
710 ut_a(cfg->m_table == m_table);
711 cfg->m_page_size.copy_from(m_page_size);
712 cfg->m_n_indexes = m_indexes.size();
713
714 if (cfg->m_n_indexes == 0) {
715
716 ib::error() << "No B+Tree found in tablespace";
717
718 return(DB_CORRUPTION);
719 }
720
721 cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
722
723 /* Trigger OOM */
724 DBUG_EXECUTE_IF(
725 "ib_import_OOM_11",
726 UT_DELETE_ARRAY(cfg->m_indexes);
727 cfg->m_indexes = NULL;
728 );
729
730 if (cfg->m_indexes == NULL) {
731 return(DB_OUT_OF_MEMORY);
732 }
733
734 memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
735
736 row_index_t* cfg_index = cfg->m_indexes;
737
738 for (Indexes::const_iterator it = m_indexes.begin();
739 it != end;
740 ++it, ++cfg_index) {
741
742 char name[BUFSIZ];
743
744 snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
745
746 ulint len = strlen(name) + 1;
747
748 cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
749
750 /* Trigger OOM */
751 DBUG_EXECUTE_IF(
752 "ib_import_OOM_12",
753 UT_DELETE_ARRAY(cfg_index->m_name);
754 cfg_index->m_name = NULL;
755 );
756
757 if (cfg_index->m_name == NULL) {
758 return(DB_OUT_OF_MEMORY);
759 }
760
761 memcpy(cfg_index->m_name, name, len);
762
763 cfg_index->m_id = it->m_id;
764
765 cfg_index->m_space = m_space;
766
767 cfg_index->m_page_no = it->m_page_no;
768 }
769
770 return(DB_SUCCESS);
771}
772
773/* Functor that is called for each physical page that is read from the
774tablespace file.
775
776 1. Check each page for corruption.
777
778 2. Update the space id and LSN on every page
779 * For the header page
780 - Validate the flags
781 - Update the LSN
782
783 3. On Btree pages
784 * Set the index id
785 * Update the max trx id
786 * In a cluster index, update the system columns
787 * In a cluster index, update the BLOB ptr, set the space id
788 * Purge delete marked records, but only if they can be easily
789 removed from the page
790 * Keep a counter of number of rows, ie. non-delete-marked rows
791 * Keep a counter of number of delete marked rows
792 * Keep a counter of number of purge failure
793 * If a page is stamped with an index id that isn't in the .cfg file
794 we assume it is deleted and the page can be ignored.
795
796 4. Set the page state to dirty so that it will be written to disk.
797*/
798class PageConverter : public AbstractCallback {
799public:
800 /** Constructor
801 @param cfg config of table being imported.
802 @param space_id tablespace identifier
803 @param trx transaction covering the import */
804 PageConverter(row_import* cfg, ulint space_id, trx_t* trx)
805 :
806 AbstractCallback(trx, space_id),
807 m_cfg(cfg),
808 m_index(cfg->m_indexes),
809 m_current_lsn(log_get_lsn()),
810 m_page_zip_ptr(0),
811 m_rec_iter(),
812 m_offsets_(), m_offsets(m_offsets_),
813 m_heap(0),
814 m_cluster_index(dict_table_get_first_index(cfg->m_table))
815 {
816 ut_ad(m_current_lsn);
817 rec_offs_init(m_offsets_);
818 }
819
820 virtual ~PageConverter() UNIV_NOTHROW
821 {
822 if (m_heap != 0) {
823 mem_heap_free(m_heap);
824 }
825 }
826
827 /** Called for each block as it is read from the file.
828 @param offset physical offset in the file
829 @param block block to convert, it is not from the buffer pool.
830 @retval DB_SUCCESS or error code. */
831 virtual dberr_t operator() (
832 os_offset_t offset,
833 buf_block_t* block) UNIV_NOTHROW;
834private:
835 /** Update the page, set the space id, max trx id and index id.
836 @param block block read from file
837 @param page_type type of the page
838 @retval DB_SUCCESS or error code */
839 dberr_t update_page(
840 buf_block_t* block,
841 ulint& page_type) UNIV_NOTHROW;
842
843 /** Update the space, index id, trx id.
844 @param block block to convert
845 @return DB_SUCCESS or error code */
846 dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
847
848 /** Update the BLOB refrences and write UNDO log entries for
849 rows that can't be purged optimistically.
850 @param block block to update
851 @retval DB_SUCCESS or error code */
852 dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
853
854 /** Validate the space flags and update tablespace header page.
855 @param block block read from file, not from the buffer pool.
856 @retval DB_SUCCESS or error code */
857 dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
858
859 /** Adjust the BLOB reference for a single column that is externally stored
860 @param rec record to update
861 @param offsets column offsets for the record
862 @param i column ordinal value
863 @return DB_SUCCESS or error code */
864 dberr_t adjust_cluster_index_blob_column(
865 rec_t* rec,
866 const ulint* offsets,
867 ulint i) UNIV_NOTHROW;
868
869 /** Adjusts the BLOB reference in the clustered index row for all
870 externally stored columns.
871 @param rec record to update
872 @param offsets column offsets for the record
873 @return DB_SUCCESS or error code */
874 dberr_t adjust_cluster_index_blob_columns(
875 rec_t* rec,
876 const ulint* offsets) UNIV_NOTHROW;
877
878 /** In the clustered index, adjist the BLOB pointers as needed.
879 Also update the BLOB reference, write the new space id.
880 @param rec record to update
881 @param offsets column offsets for the record
882 @return DB_SUCCESS or error code */
883 dberr_t adjust_cluster_index_blob_ref(
884 rec_t* rec,
885 const ulint* offsets) UNIV_NOTHROW;
886
887 /** Purge delete-marked records, only if it is possible to do
888 so without re-organising the B+tree.
889 @retval true if purged */
890 bool purge() UNIV_NOTHROW;
891
892 /** Adjust the BLOB references and sys fields for the current record.
893 @param rec record to update
894 @param offsets column offsets for the record
895 @return DB_SUCCESS or error code. */
896 dberr_t adjust_cluster_record(
897 rec_t* rec,
898 const ulint* offsets) UNIV_NOTHROW;
899
900 /** Find an index with the matching id.
901 @return row_index_t* instance or 0 */
902 row_index_t* find_index(index_id_t id) UNIV_NOTHROW
903 {
904 row_index_t* index = &m_cfg->m_indexes[0];
905
906 for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) {
907 if (id == index->m_id) {
908 return(index);
909 }
910 }
911
912 return(0);
913
914 }
915private:
916 /** Config for table that is being imported. */
917 row_import* m_cfg;
918
919 /** Current index whose pages are being imported */
920 row_index_t* m_index;
921
922 /** Current system LSN */
923 lsn_t m_current_lsn;
924
925 /** Alias for m_page_zip, only set for compressed pages. */
926 page_zip_des_t* m_page_zip_ptr;
927
928 /** Iterator over records in a block */
929 RecIterator m_rec_iter;
930
931 /** Record offset */
932 ulint m_offsets_[REC_OFFS_NORMAL_SIZE];
933
934 /** Pointer to m_offsets_ */
935 ulint* m_offsets;
936
937 /** Memory heap for the record offsets */
938 mem_heap_t* m_heap;
939
940 /** Cluster index instance */
941 dict_index_t* m_cluster_index;
942};
943
944/**
945row_import destructor. */
946row_import::~row_import() UNIV_NOTHROW
947{
948 for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
949 UT_DELETE_ARRAY(m_indexes[i].m_name);
950
951 if (m_indexes[i].m_fields == NULL) {
952 continue;
953 }
954
955 dict_field_t* fields = m_indexes[i].m_fields;
956 ulint n_fields = m_indexes[i].m_n_fields;
957
958 for (ulint j = 0; j < n_fields; ++j) {
959 UT_DELETE_ARRAY(const_cast<char*>(fields[j].name()));
960 }
961
962 UT_DELETE_ARRAY(fields);
963 }
964
965 for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
966 UT_DELETE_ARRAY(m_col_names[i]);
967 }
968
969 UT_DELETE_ARRAY(m_cols);
970 UT_DELETE_ARRAY(m_indexes);
971 UT_DELETE_ARRAY(m_col_names);
972 UT_DELETE_ARRAY(m_table_name);
973 UT_DELETE_ARRAY(m_hostname);
974}
975
976/** Find the index entry in in the indexes array.
977@param name index name
978@return instance if found else 0. */
979row_index_t*
980row_import::get_index(
981 const char* name) const UNIV_NOTHROW
982{
983 for (ulint i = 0; i < m_n_indexes; ++i) {
984 const char* index_name;
985 row_index_t* index = &m_indexes[i];
986
987 index_name = reinterpret_cast<const char*>(index->m_name);
988
989 if (strcmp(index_name, name) == 0) {
990
991 return(index);
992 }
993 }
994
995 return(0);
996}
997
998/** Get the number of rows in the index.
999@param name index name
1000@return number of rows (doesn't include delete marked rows). */
1001ulint
1002row_import::get_n_rows(
1003 const char* name) const UNIV_NOTHROW
1004{
1005 const row_index_t* index = get_index(name);
1006
1007 ut_a(name != 0);
1008
1009 return(index->m_stats.m_n_rows);
1010}
1011
1012/** Get the number of rows for which purge failed uding the convert phase.
1013@param name index name
1014@return number of rows for which purge failed. */
1015ulint
1016row_import::get_n_purge_failed(
1017 const char* name) const UNIV_NOTHROW
1018{
1019 const row_index_t* index = get_index(name);
1020
1021 ut_a(name != 0);
1022
1023 return(index->m_stats.m_n_purge_failed);
1024}
1025
1026/** Find the ordinal value of the column name in the cfg table columns.
1027@param name of column to look for.
1028@return ULINT_UNDEFINED if not found. */
1029ulint
1030row_import::find_col(
1031 const char* name) const UNIV_NOTHROW
1032{
1033 for (ulint i = 0; i < m_n_cols; ++i) {
1034 const char* col_name;
1035
1036 col_name = reinterpret_cast<const char*>(m_col_names[i]);
1037
1038 if (strcmp(col_name, name) == 0) {
1039 return(i);
1040 }
1041 }
1042
1043 return(ULINT_UNDEFINED);
1044}
1045
1046/**
1047Check if the index schema that was read from the .cfg file matches the
1048in memory index definition.
1049@return DB_SUCCESS or error code. */
1050dberr_t
1051row_import::match_index_columns(
1052 THD* thd,
1053 const dict_index_t* index) UNIV_NOTHROW
1054{
1055 row_index_t* cfg_index;
1056 dberr_t err = DB_SUCCESS;
1057
1058 cfg_index = get_index(index->name);
1059
1060 if (cfg_index == 0) {
1061 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1062 ER_TABLE_SCHEMA_MISMATCH,
1063 "Index %s not found in tablespace meta-data file.",
1064 index->name());
1065
1066 return(DB_ERROR);
1067 }
1068
1069 if (cfg_index->m_n_fields != index->n_fields) {
1070
1071 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1072 ER_TABLE_SCHEMA_MISMATCH,
1073 "Index field count %u doesn't match"
1074 " tablespace metadata file value " ULINTPF,
1075 index->n_fields, cfg_index->m_n_fields);
1076
1077 return(DB_ERROR);
1078 }
1079
1080 cfg_index->m_srv_index = index;
1081
1082 const dict_field_t* field = index->fields;
1083 const dict_field_t* cfg_field = cfg_index->m_fields;
1084
1085 for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) {
1086
1087 if (strcmp(field->name(), cfg_field->name()) != 0) {
1088 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1089 ER_TABLE_SCHEMA_MISMATCH,
1090 "Index field name %s doesn't match"
1091 " tablespace metadata field name %s"
1092 " for field position " ULINTPF,
1093 field->name(), cfg_field->name(), i);
1094
1095 err = DB_ERROR;
1096 }
1097
1098 if (cfg_field->prefix_len != field->prefix_len) {
1099 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1100 ER_TABLE_SCHEMA_MISMATCH,
1101 "Index %s field %s prefix len %u"
1102 " doesn't match metadata file value %u",
1103 index->name(), field->name(),
1104 field->prefix_len, cfg_field->prefix_len);
1105
1106 err = DB_ERROR;
1107 }
1108
1109 if (cfg_field->fixed_len != field->fixed_len) {
1110 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1111 ER_TABLE_SCHEMA_MISMATCH,
1112 "Index %s field %s fixed len %u"
1113 " doesn't match metadata file value %u",
1114 index->name(), field->name(),
1115 field->fixed_len,
1116 cfg_field->fixed_len);
1117
1118 err = DB_ERROR;
1119 }
1120 }
1121
1122 return(err);
1123}
1124
1125/** Check if the table schema that was read from the .cfg file matches the
1126in memory table definition.
1127@param thd MySQL session variable
1128@return DB_SUCCESS or error code. */
1129dberr_t
1130row_import::match_table_columns(
1131 THD* thd) UNIV_NOTHROW
1132{
1133 dberr_t err = DB_SUCCESS;
1134 const dict_col_t* col = m_table->cols;
1135
1136 for (ulint i = 0; i < m_table->n_cols; ++i, ++col) {
1137
1138 const char* col_name;
1139 ulint cfg_col_index;
1140
1141 col_name = dict_table_get_col_name(
1142 m_table, dict_col_get_no(col));
1143
1144 cfg_col_index = find_col(col_name);
1145
1146 if (cfg_col_index == ULINT_UNDEFINED) {
1147
1148 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1149 ER_TABLE_SCHEMA_MISMATCH,
1150 "Column %s not found in tablespace.",
1151 col_name);
1152
1153 err = DB_ERROR;
1154 } else if (cfg_col_index != col->ind) {
1155
1156 ib_errf(thd, IB_LOG_LEVEL_ERROR,
1157 ER_TABLE_SCHEMA_MISMATCH,
1158 "Column %s ordinal value mismatch, it's at %u"
1159 " in the table and " ULINTPF
1160 " in the tablespace meta-data file",
1161 col_name, col->ind, cfg_col_index);
1162
1163 err = DB_ERROR;
1164 } else {
1165 const dict_col_t* cfg_col;
1166
1167 cfg_col = &m_cols[cfg_col_index];
1168 ut_a(cfg_col->ind == cfg_col_index);
1169
1170 if (cfg_col->prtype != col->prtype) {
1171 ib_errf(thd,
1172 IB_LOG_LEVEL_ERROR,
1173 ER_TABLE_SCHEMA_MISMATCH,
1174 "Column %s precise type mismatch.",
1175 col_name);
1176 err = DB_ERROR;
1177 }
1178
1179 if (cfg_col->mtype != col->mtype) {
1180 ib_errf(thd,
1181 IB_LOG_LEVEL_ERROR,
1182 ER_TABLE_SCHEMA_MISMATCH,
1183 "Column %s main type mismatch.",
1184 col_name);
1185 err = DB_ERROR;
1186 }
1187
1188 if (cfg_col->len != col->len) {
1189 ib_errf(thd,
1190 IB_LOG_LEVEL_ERROR,
1191 ER_TABLE_SCHEMA_MISMATCH,
1192 "Column %s length mismatch.",
1193 col_name);
1194 err = DB_ERROR;
1195 }
1196
1197 if (cfg_col->mbminlen != col->mbminlen
1198 || cfg_col->mbmaxlen != col->mbmaxlen) {
1199 ib_errf(thd,
1200 IB_LOG_LEVEL_ERROR,
1201 ER_TABLE_SCHEMA_MISMATCH,
1202 "Column %s multi-byte len mismatch.",
1203 col_name);
1204 err = DB_ERROR;
1205 }
1206
1207 if (cfg_col->ind != col->ind) {
1208 err = DB_ERROR;
1209 }
1210
1211 if (cfg_col->ord_part != col->ord_part) {
1212 ib_errf(thd,
1213 IB_LOG_LEVEL_ERROR,
1214 ER_TABLE_SCHEMA_MISMATCH,
1215 "Column %s ordering mismatch.",
1216 col_name);
1217 err = DB_ERROR;
1218 }
1219
1220 if (cfg_col->max_prefix != col->max_prefix) {
1221 ib_errf(thd,
1222 IB_LOG_LEVEL_ERROR,
1223 ER_TABLE_SCHEMA_MISMATCH,
1224 "Column %s max prefix mismatch.",
1225 col_name);
1226 err = DB_ERROR;
1227 }
1228 }
1229 }
1230
1231 return(err);
1232}
1233
1234/** Check if the table (and index) schema that was read from the .cfg file
1235matches the in memory table definition.
1236@param thd MySQL session variable
1237@return DB_SUCCESS or error code. */
1238dberr_t
1239row_import::match_schema(
1240 THD* thd) UNIV_NOTHROW
1241{
1242 /* Do some simple checks. */
1243
1244 if ((m_table->flags ^ m_flags) & ~DICT_TF_MASK_DATA_DIR) {
1245 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1246 "Table flags don't match, server table has 0x%x"
1247 " and the meta-data file has 0x" ULINTPFx,
1248 m_table->flags, m_flags);
1249
1250 return(DB_ERROR);
1251 } else if (m_table->n_cols != m_n_cols) {
1252 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1253 "Number of columns don't match, table has %u"
1254 " columns but the tablespace meta-data file has "
1255 ULINTPF " columns",
1256 m_table->n_cols, m_n_cols);
1257
1258 return(DB_ERROR);
1259 } else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
1260
1261 /* If the number of indexes don't match then it is better
1262 to abort the IMPORT. It is easy for the user to create a
1263 table matching the IMPORT definition. */
1264
1265 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1266 "Number of indexes don't match, table has " ULINTPF
1267 " indexes but the tablespace meta-data file has "
1268 ULINTPF " indexes",
1269 UT_LIST_GET_LEN(m_table->indexes), m_n_indexes);
1270
1271 return(DB_ERROR);
1272 }
1273
1274 dberr_t err = match_table_columns(thd);
1275
1276 if (err != DB_SUCCESS) {
1277 return(err);
1278 }
1279
1280 /* Check if the index definitions match. */
1281
1282 const dict_index_t* index;
1283
1284 for (index = UT_LIST_GET_FIRST(m_table->indexes);
1285 index != 0;
1286 index = UT_LIST_GET_NEXT(indexes, index)) {
1287
1288 dberr_t index_err;
1289
1290 index_err = match_index_columns(thd, index);
1291
1292 if (index_err != DB_SUCCESS) {
1293 err = index_err;
1294 }
1295 }
1296
1297 return(err);
1298}
1299
1300/**
1301Set the index root <space, pageno>, using index name. */
1302void
1303row_import::set_root_by_name() UNIV_NOTHROW
1304{
1305 row_index_t* cfg_index = m_indexes;
1306
1307 for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) {
1308 dict_index_t* index;
1309
1310 const char* index_name;
1311
1312 index_name = reinterpret_cast<const char*>(cfg_index->m_name);
1313
1314 index = dict_table_get_index_on_name(m_table, index_name);
1315
1316 /* We've already checked that it exists. */
1317 ut_a(index != 0);
1318
1319 index->page = cfg_index->m_page_no;
1320 }
1321}
1322
1323/**
1324Set the index root <space, pageno>, using a heuristic.
1325@return DB_SUCCESS or error code */
1326dberr_t
1327row_import::set_root_by_heuristic() UNIV_NOTHROW
1328{
1329 row_index_t* cfg_index = m_indexes;
1330
1331 ut_a(m_n_indexes > 0);
1332
1333 // TODO: For now use brute force, based on ordinality
1334
1335 if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
1336
1337 ib::warn() << "Table " << m_table->name << " should have "
1338 << UT_LIST_GET_LEN(m_table->indexes) << " indexes but"
1339 " the tablespace has " << m_n_indexes << " indexes";
1340 }
1341
1342 dict_mutex_enter_for_mysql();
1343
1344 ulint i = 0;
1345 dberr_t err = DB_SUCCESS;
1346
1347 for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
1348 index != 0;
1349 index = UT_LIST_GET_NEXT(indexes, index)) {
1350
1351 if (index->type & DICT_FTS) {
1352 index->type |= DICT_CORRUPT;
1353 ib::warn() << "Skipping FTS index: " << index->name;
1354 } else if (i < m_n_indexes) {
1355
1356 UT_DELETE_ARRAY(cfg_index[i].m_name);
1357
1358 ulint len = strlen(index->name) + 1;
1359
1360 cfg_index[i].m_name = UT_NEW_ARRAY_NOKEY(byte, len);
1361
1362 /* Trigger OOM */
1363 DBUG_EXECUTE_IF(
1364 "ib_import_OOM_14",
1365 UT_DELETE_ARRAY(cfg_index[i].m_name);
1366 cfg_index[i].m_name = NULL;
1367 );
1368
1369 if (cfg_index[i].m_name == NULL) {
1370 err = DB_OUT_OF_MEMORY;
1371 break;
1372 }
1373
1374 memcpy(cfg_index[i].m_name, index->name, len);
1375
1376 cfg_index[i].m_srv_index = index;
1377
1378 index->page = cfg_index[i].m_page_no;
1379
1380 ++i;
1381 }
1382 }
1383
1384 dict_mutex_exit_for_mysql();
1385
1386 return(err);
1387}
1388
1389/**
1390Purge delete marked records.
1391@return DB_SUCCESS or error code. */
1392dberr_t
1393IndexPurge::garbage_collect() UNIV_NOTHROW
1394{
1395 dberr_t err;
1396 ibool comp = dict_table_is_comp(m_index->table);
1397
1398 /* Open the persistent cursor and start the mini-transaction. */
1399
1400 open();
1401
1402 while ((err = next()) == DB_SUCCESS) {
1403
1404 rec_t* rec = btr_pcur_get_rec(&m_pcur);
1405 ibool deleted = rec_get_deleted_flag(rec, comp);
1406
1407 if (!deleted) {
1408 ++m_n_rows;
1409 } else {
1410 purge();
1411 }
1412 }
1413
1414 /* Close the persistent cursor and commit the mini-transaction. */
1415
1416 close();
1417
1418 return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
1419}
1420
1421/**
1422Begin import, position the cursor on the first record. */
1423void
1424IndexPurge::open() UNIV_NOTHROW
1425{
1426 mtr_start(&m_mtr);
1427
1428 mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1429
1430 btr_pcur_open_at_index_side(
1431 true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
1432 btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
1433 if (rec_is_default_row(btr_pcur_get_rec(&m_pcur), m_index)) {
1434 ut_ad(btr_pcur_is_on_user_rec(&m_pcur));
1435 /* Skip the 'default row' pseudo-record. */
1436 } else {
1437 btr_pcur_move_to_prev_on_page(&m_pcur);
1438 }
1439}
1440
1441/**
1442Close the persistent curosr and commit the mini-transaction. */
1443void
1444IndexPurge::close() UNIV_NOTHROW
1445{
1446 btr_pcur_close(&m_pcur);
1447 mtr_commit(&m_mtr);
1448}
1449
1450/**
1451Position the cursor on the next record.
1452@return DB_SUCCESS or error code */
1453dberr_t
1454IndexPurge::next() UNIV_NOTHROW
1455{
1456 btr_pcur_move_to_next_on_page(&m_pcur);
1457
1458 /* When switching pages, commit the mini-transaction
1459 in order to release the latch on the old page. */
1460
1461 if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
1462 return(DB_SUCCESS);
1463 } else if (trx_is_interrupted(m_trx)) {
1464 /* Check after every page because the check
1465 is expensive. */
1466 return(DB_INTERRUPTED);
1467 }
1468
1469 btr_pcur_store_position(&m_pcur, &m_mtr);
1470
1471 mtr_commit(&m_mtr);
1472
1473 mtr_start(&m_mtr);
1474
1475 mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1476
1477 btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
1478
1479 if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
1480
1481 return(DB_END_OF_INDEX);
1482 }
1483
1484 return(DB_SUCCESS);
1485}
1486
1487/**
1488Store the persistent cursor position and reopen the
1489B-tree cursor in BTR_MODIFY_TREE mode, because the
1490tree structure may be changed during a pessimistic delete. */
1491void
1492IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
1493{
1494 dberr_t err;
1495
1496 btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
1497 &m_pcur, &m_mtr);
1498
1499 ut_ad(rec_get_deleted_flag(
1500 btr_pcur_get_rec(&m_pcur),
1501 dict_table_is_comp(m_index->table)));
1502
1503 btr_cur_pessimistic_delete(
1504 &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, false, &m_mtr);
1505
1506 ut_a(err == DB_SUCCESS);
1507
1508 /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */
1509 mtr_commit(&m_mtr);
1510}
1511
1512/**
1513Purge delete-marked records. */
1514void
1515IndexPurge::purge() UNIV_NOTHROW
1516{
1517 btr_pcur_store_position(&m_pcur, &m_mtr);
1518
1519 purge_pessimistic_delete();
1520
1521 mtr_start(&m_mtr);
1522
1523 mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1524
1525 btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
1526}
1527
1528/** Adjust the BLOB reference for a single column that is externally stored
1529@param rec record to update
1530@param offsets column offsets for the record
1531@param i column ordinal value
1532@return DB_SUCCESS or error code */
1533inline
1534dberr_t
1535PageConverter::adjust_cluster_index_blob_column(
1536 rec_t* rec,
1537 const ulint* offsets,
1538 ulint i) UNIV_NOTHROW
1539{
1540 ulint len;
1541 byte* field;
1542
1543 field = rec_get_nth_field(rec, offsets, i, &len);
1544
1545 DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
1546 len = BTR_EXTERN_FIELD_REF_SIZE - 1;);
1547
1548 if (len < BTR_EXTERN_FIELD_REF_SIZE) {
1549
1550 ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
1551 ER_INNODB_INDEX_CORRUPT,
1552 "Externally stored column(" ULINTPF
1553 ") has a reference length of " ULINTPF
1554 " in the cluster index %s",
1555 i, len, m_cluster_index->name());
1556
1557 return(DB_CORRUPTION);
1558 }
1559
1560 field += len - (BTR_EXTERN_FIELD_REF_SIZE - BTR_EXTERN_SPACE_ID);
1561
1562 mach_write_to_4(field, get_space_id());
1563
1564 if (m_page_zip_ptr) {
1565 page_zip_write_blob_ptr(
1566 m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
1567 }
1568
1569 return(DB_SUCCESS);
1570}
1571
1572/** Adjusts the BLOB reference in the clustered index row for all externally
1573stored columns.
1574@param rec record to update
1575@param offsets column offsets for the record
1576@return DB_SUCCESS or error code */
1577inline
1578dberr_t
1579PageConverter::adjust_cluster_index_blob_columns(
1580 rec_t* rec,
1581 const ulint* offsets) UNIV_NOTHROW
1582{
1583 ut_ad(rec_offs_any_extern(offsets));
1584
1585 /* Adjust the space_id in the BLOB pointers. */
1586
1587 for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) {
1588
1589 /* Only if the column is stored "externally". */
1590
1591 if (rec_offs_nth_extern(offsets, i)) {
1592 dberr_t err;
1593
1594 err = adjust_cluster_index_blob_column(rec, offsets, i);
1595
1596 if (err != DB_SUCCESS) {
1597 return(err);
1598 }
1599 }
1600 }
1601
1602 return(DB_SUCCESS);
1603}
1604
1605/** In the clustered index, adjust BLOB pointers as needed. Also update the
1606BLOB reference, write the new space id.
1607@param rec record to update
1608@param offsets column offsets for the record
1609@return DB_SUCCESS or error code */
1610inline
1611dberr_t
1612PageConverter::adjust_cluster_index_blob_ref(
1613 rec_t* rec,
1614 const ulint* offsets) UNIV_NOTHROW
1615{
1616 if (rec_offs_any_extern(offsets)) {
1617 dberr_t err;
1618
1619 err = adjust_cluster_index_blob_columns(rec, offsets);
1620
1621 if (err != DB_SUCCESS) {
1622 return(err);
1623 }
1624 }
1625
1626 return(DB_SUCCESS);
1627}
1628
1629/** Purge delete-marked records, only if it is possible to do so without
1630re-organising the B+tree.
1631@return true if purge succeeded */
1632inline bool PageConverter::purge() UNIV_NOTHROW
1633{
1634 const dict_index_t* index = m_index->m_srv_index;
1635
1636 /* We can't have a page that is empty and not root. */
1637 if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) {
1638
1639 ++m_index->m_stats.m_n_purged;
1640
1641 return(true);
1642 } else {
1643 ++m_index->m_stats.m_n_purge_failed;
1644 }
1645
1646 return(false);
1647}
1648
1649/** Adjust the BLOB references and sys fields for the current record.
1650@param rec record to update
1651@param offsets column offsets for the record
1652@return DB_SUCCESS or error code. */
1653inline
1654dberr_t
1655PageConverter::adjust_cluster_record(
1656 rec_t* rec,
1657 const ulint* offsets) UNIV_NOTHROW
1658{
1659 dberr_t err;
1660
1661 if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
1662
1663 /* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields
1664 are only written in conjunction with other changes to the
1665 record. */
1666 ulint trx_id_pos = m_cluster_index->n_uniq
1667 ? m_cluster_index->n_uniq : 1;
1668 if (m_page_zip_ptr) {
1669 page_zip_write_trx_id_and_roll_ptr(
1670 m_page_zip_ptr, rec, m_offsets, trx_id_pos,
1671 0, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS,
1672 NULL);
1673 } else {
1674 ulint len;
1675 byte* ptr = rec_get_nth_field(
1676 rec, m_offsets, trx_id_pos, &len);
1677 ut_ad(len == DATA_TRX_ID_LEN);
1678 memcpy(ptr, reset_trx_id, sizeof reset_trx_id);
1679 }
1680 }
1681
1682 return(err);
1683}
1684
1685/** Update the BLOB refrences and write UNDO log entries for
1686rows that can't be purged optimistically.
1687@param block block to update
1688@retval DB_SUCCESS or error code */
1689inline
1690dberr_t
1691PageConverter::update_records(
1692 buf_block_t* block) UNIV_NOTHROW
1693{
1694 ibool comp = dict_table_is_comp(m_cfg->m_table);
1695 bool clust_index = m_index->m_srv_index == m_cluster_index;
1696
1697 /* This will also position the cursor on the first user record. */
1698
1699 m_rec_iter.open(block);
1700
1701 while (!m_rec_iter.end()) {
1702 rec_t* rec = m_rec_iter.current();
1703
1704 ibool deleted = rec_get_deleted_flag(rec, comp);
1705
1706 /* For the clustered index we have to adjust the BLOB
1707 reference and the system fields irrespective of the
1708 delete marked flag. The adjustment of delete marked
1709 cluster records is required for purge to work later. */
1710
1711 if (deleted || clust_index) {
1712 m_offsets = rec_get_offsets(
1713 rec, m_index->m_srv_index, m_offsets, true,
1714 ULINT_UNDEFINED, &m_heap);
1715 }
1716
1717 if (clust_index) {
1718
1719 dberr_t err = adjust_cluster_record(rec, m_offsets);
1720
1721 if (err != DB_SUCCESS) {
1722 return(err);
1723 }
1724 }
1725
1726 /* If it is a delete marked record then try an
1727 optimistic delete. */
1728
1729 if (deleted) {
1730 /* A successful purge will move the cursor to the
1731 next record. */
1732
1733 if (!purge()) {
1734 m_rec_iter.next();
1735 }
1736
1737 ++m_index->m_stats.m_n_deleted;
1738 } else {
1739 ++m_index->m_stats.m_n_rows;
1740 m_rec_iter.next();
1741 }
1742 }
1743
1744 return(DB_SUCCESS);
1745}
1746
1747/** Update the space, index id, trx id.
1748@return DB_SUCCESS or error code */
1749inline
1750dberr_t
1751PageConverter::update_index_page(
1752 buf_block_t* block) UNIV_NOTHROW
1753{
1754 index_id_t id;
1755 buf_frame_t* page = block->frame;
1756
1757 if (is_free(block->page.id.page_no())) {
1758 return(DB_SUCCESS);
1759 } else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
1760
1761 row_index_t* index = find_index(id);
1762
1763 if (index == 0) {
1764 ib::error() << "Page for tablespace " << m_space
1765 << " is index page with id " << id
1766 << " but that index is not found from"
1767 << " configuration file. Current index name "
1768 << m_index->m_name << " and id " << m_index->m_id;
1769 m_index = 0;
1770 return(DB_CORRUPTION);
1771 }
1772
1773 /* Update current index */
1774 m_index = index;
1775 }
1776
1777 /* If the .cfg file is missing and there is an index mismatch
1778 then ignore the error. */
1779 if (m_cfg->m_missing && (m_index == 0 || m_index->m_srv_index == 0)) {
1780 return(DB_SUCCESS);
1781 }
1782
1783#ifdef UNIV_ZIP_DEBUG
1784 ut_a(!is_compressed_table()
1785 || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index));
1786#endif /* UNIV_ZIP_DEBUG */
1787
1788 /* This has to be written to uncompressed index header. Set it to
1789 the current index id. */
1790 btr_page_set_index_id(
1791 page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
1792
1793 if (dict_index_is_clust(m_index->m_srv_index)) {
1794 if (page_is_root(page)) {
1795 /* Preserve the PAGE_ROOT_AUTO_INC. */
1796 if (m_index->m_srv_index->table->supports_instant()
1797 && btr_cur_instant_root_init(
1798 const_cast<dict_index_t*>(
1799 m_index->m_srv_index),
1800 page)) {
1801 return(DB_CORRUPTION);
1802 }
1803 } else {
1804 /* Clear PAGE_MAX_TRX_ID so that it can be
1805 used for other purposes in the future. IMPORT
1806 in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1
1807 would set the field to the transaction ID even
1808 on clustered index pages. */
1809 page_set_max_trx_id(block, m_page_zip_ptr, 0, NULL);
1810 }
1811 } else {
1812 /* Set PAGE_MAX_TRX_ID on secondary index leaf pages,
1813 and clear it on non-leaf pages. */
1814 page_set_max_trx_id(block, m_page_zip_ptr,
1815 page_is_leaf(page) ? m_trx->id : 0, NULL);
1816 }
1817
1818 if (page_is_empty(page)) {
1819
1820 /* Only a root page can be empty. */
1821 if (!page_is_root(page)) {
1822 // TODO: We should relax this and skip secondary
1823 // indexes. Mark them as corrupt because they can
1824 // always be rebuilt.
1825 return(DB_CORRUPTION);
1826 }
1827
1828 return(DB_SUCCESS);
1829 }
1830
1831 return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS;
1832}
1833
1834/** Validate the space flags and update tablespace header page.
1835@param block block read from file, not from the buffer pool.
1836@retval DB_SUCCESS or error code */
1837inline
1838dberr_t
1839PageConverter::update_header(
1840 buf_block_t* block) UNIV_NOTHROW
1841{
1842 /* Check for valid header */
1843 switch (fsp_header_get_space_id(get_frame(block))) {
1844 case 0:
1845 return(DB_CORRUPTION);
1846 case ULINT_UNDEFINED:
1847 ib::warn() << "Space id check in the header failed: ignored";
1848 }
1849
1850 mach_write_to_8(
1851 get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
1852 m_current_lsn);
1853
1854 /* Write back the adjusted flags. */
1855 mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
1856 + get_frame(block), m_space_flags);
1857
1858 /* Write space_id to the tablespace header, page 0. */
1859 mach_write_to_4(
1860 get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
1861 get_space_id());
1862
1863 /* This is on every page in the tablespace. */
1864 mach_write_to_4(
1865 get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
1866 get_space_id());
1867
1868 return(DB_SUCCESS);
1869}
1870
1871/** Update the page, set the space id, max trx id and index id.
1872@param block block read from file
1873@retval DB_SUCCESS or error code */
1874inline
1875dberr_t
1876PageConverter::update_page(
1877 buf_block_t* block,
1878 ulint& page_type) UNIV_NOTHROW
1879{
1880 dberr_t err = DB_SUCCESS;
1881
1882 ut_ad(!block->page.zip.data == !is_compressed_table());
1883
1884 if (block->page.zip.data) {
1885 m_page_zip_ptr = &block->page.zip;
1886 } else {
1887 ut_ad(!m_page_zip_ptr);
1888 }
1889
1890 switch (page_type = fil_page_get_type(get_frame(block))) {
1891 case FIL_PAGE_TYPE_FSP_HDR:
1892 ut_a(block->page.id.page_no() == 0);
1893 /* Work directly on the uncompressed page headers. */
1894 return(update_header(block));
1895
1896 case FIL_PAGE_INDEX:
1897 case FIL_PAGE_RTREE:
1898 /* We need to decompress the contents into block->frame
1899 before we can do any thing with Btree pages. */
1900
1901 if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
1902 return(DB_CORRUPTION);
1903 }
1904
1905 /* fall through */
1906 case FIL_PAGE_TYPE_INSTANT:
1907 /* This is on every page in the tablespace. */
1908 mach_write_to_4(
1909 get_frame(block)
1910 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
1911
1912 /* Only update the Btree nodes. */
1913 return(update_index_page(block));
1914
1915 case FIL_PAGE_TYPE_SYS:
1916 /* This is page 0 in the system tablespace. */
1917 return(DB_CORRUPTION);
1918
1919 case FIL_PAGE_TYPE_XDES:
1920 err = set_current_xdes(
1921 block->page.id.page_no(), get_frame(block));
1922 /* fall through */
1923 case FIL_PAGE_INODE:
1924 case FIL_PAGE_TYPE_TRX_SYS:
1925 case FIL_PAGE_IBUF_FREE_LIST:
1926 case FIL_PAGE_TYPE_ALLOCATED:
1927 case FIL_PAGE_IBUF_BITMAP:
1928 case FIL_PAGE_TYPE_BLOB:
1929 case FIL_PAGE_TYPE_ZBLOB:
1930 case FIL_PAGE_TYPE_ZBLOB2:
1931
1932 /* Work directly on the uncompressed page headers. */
1933 /* This is on every page in the tablespace. */
1934 mach_write_to_4(
1935 get_frame(block)
1936 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
1937
1938 return(err);
1939 }
1940
1941 ib::warn() << "Unknown page type (" << page_type << ")";
1942
1943 return(DB_CORRUPTION);
1944}
1945
1946/** Called for every page in the tablespace. If the page was not
1947updated then its state must be set to BUF_PAGE_NOT_USED.
1948@param block block read from file, note it is not from the buffer pool
1949@retval DB_SUCCESS or error code. */
1950dberr_t
1951PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
1952{
1953 /* If we already had an old page with matching number
1954 in the buffer pool, evict it now, because
1955 we no longer evict the pages on DISCARD TABLESPACE. */
1956 buf_page_get_gen(block->page.id, get_page_size(),
1957 RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
1958 __FILE__, __LINE__, NULL, NULL);
1959
1960 ulint page_type;
1961
1962 dberr_t err = update_page(block, page_type);
1963 if (err != DB_SUCCESS) return err;
1964
1965 if (!block->page.zip.data) {
1966 buf_flush_init_for_writing(
1967 NULL, block->frame, NULL, m_current_lsn);
1968 } else if (fil_page_type_is_index(page_type)) {
1969 buf_flush_init_for_writing(
1970 NULL, block->page.zip.data, &block->page.zip,
1971 m_current_lsn);
1972 } else {
1973 /* Calculate and update the checksum of non-index
1974 pages for ROW_FORMAT=COMPRESSED tables. */
1975 buf_flush_update_zip_checksum(
1976 block->page.zip.data, get_page_size().physical(),
1977 m_current_lsn);
1978 }
1979
1980 return DB_SUCCESS;
1981}
1982
1983/*****************************************************************//**
1984Clean up after import tablespace failure, this function will acquire
1985the dictionary latches on behalf of the transaction if the transaction
1986hasn't already acquired them. */
1987static MY_ATTRIBUTE((nonnull))
1988void
1989row_import_discard_changes(
1990/*=======================*/
1991 row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
1992 trx_t* trx, /*!< in/out: transaction for import */
1993 dberr_t err) /*!< in: error code */
1994{
1995 dict_table_t* table = prebuilt->table;
1996
1997 ut_a(err != DB_SUCCESS);
1998
1999 prebuilt->trx->error_info = NULL;
2000
2001 ib::info() << "Discarding tablespace of table "
2002 << prebuilt->table->name
2003 << ": " << ut_strerr(err);
2004
2005 if (trx->dict_operation_lock_mode != RW_X_LATCH) {
2006 ut_a(trx->dict_operation_lock_mode == 0);
2007 row_mysql_lock_data_dictionary(trx);
2008 }
2009
2010 ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2011
2012 /* Since we update the index root page numbers on disk after
2013 we've done a successful import. The table will not be loadable.
2014 However, we need to ensure that the in memory root page numbers
2015 are reset to "NULL". */
2016
2017 for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
2018 index != 0;
2019 index = UT_LIST_GET_NEXT(indexes, index)) {
2020
2021 index->page = FIL_NULL;
2022 }
2023
2024 table->file_unreadable = true;
2025 if (table->space) {
2026 fil_close_tablespace(trx, table->space->id);
2027 table->space = NULL;
2028 }
2029}
2030
2031/*****************************************************************//**
2032Clean up after import tablespace. */
2033static MY_ATTRIBUTE((nonnull, warn_unused_result))
2034dberr_t
2035row_import_cleanup(
2036/*===============*/
2037 row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
2038 trx_t* trx, /*!< in/out: transaction for import */
2039 dberr_t err) /*!< in: error code */
2040{
2041 ut_a(prebuilt->trx != trx);
2042
2043 if (err != DB_SUCCESS) {
2044 row_import_discard_changes(prebuilt, trx, err);
2045 }
2046
2047 ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2048
2049 DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
2050
2051 trx_commit_for_mysql(trx);
2052
2053 row_mysql_unlock_data_dictionary(trx);
2054
2055 trx_free(trx);
2056
2057 prebuilt->trx->op_info = "";
2058
2059 DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
2060
2061 log_make_checkpoint_at(LSN_MAX, TRUE);
2062
2063 return(err);
2064}
2065
2066/*****************************************************************//**
2067Report error during tablespace import. */
2068static MY_ATTRIBUTE((nonnull, warn_unused_result))
2069dberr_t
2070row_import_error(
2071/*=============*/
2072 row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
2073 trx_t* trx, /*!< in/out: transaction for import */
2074 dberr_t err) /*!< in: error code */
2075{
2076 if (!trx_is_interrupted(trx)) {
2077 char table_name[MAX_FULL_NAME_LEN + 1];
2078
2079 innobase_format_name(
2080 table_name, sizeof(table_name),
2081 prebuilt->table->name.m_name);
2082
2083 ib_senderrf(
2084 trx->mysql_thd, IB_LOG_LEVEL_WARN,
2085 ER_INNODB_IMPORT_ERROR,
2086 table_name, (ulong) err, ut_strerr(err));
2087 }
2088
2089 return(row_import_cleanup(prebuilt, trx, err));
2090}
2091
2092/*****************************************************************//**
2093Adjust the root page index node and leaf node segment headers, update
2094with the new space id. For all the table's secondary indexes.
2095@return error code */
2096static MY_ATTRIBUTE((nonnull, warn_unused_result))
2097dberr_t
2098row_import_adjust_root_pages_of_secondary_indexes(
2099/*==============================================*/
2100 trx_t* trx, /*!< in: transaction used for
2101 the import */
2102 dict_table_t* table, /*!< in: table the indexes
2103 belong to */
2104 const row_import& cfg) /*!< Import context */
2105{
2106 dict_index_t* index;
2107 ulint n_rows_in_table;
2108 dberr_t err = DB_SUCCESS;
2109
2110 /* Skip the clustered index. */
2111 index = dict_table_get_first_index(table);
2112
2113 n_rows_in_table = cfg.get_n_rows(index->name);
2114
2115 DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
2116 n_rows_in_table++;);
2117
2118 /* Adjust the root pages of the secondary indexes only. */
2119 while ((index = dict_table_get_next_index(index)) != NULL) {
2120 ut_a(!dict_index_is_clust(index));
2121
2122 if (!(index->type & DICT_CORRUPT)
2123 && index->page != FIL_NULL) {
2124
2125 /* Update the Btree segment headers for index node and
2126 leaf nodes in the root page. Set the new space id. */
2127
2128 err = btr_root_adjust_on_import(index);
2129 } else {
2130 ib::warn() << "Skip adjustment of root pages for"
2131 " index " << index->name << ".";
2132
2133 err = DB_CORRUPTION;
2134 }
2135
2136 if (err != DB_SUCCESS) {
2137
2138 if (index->type & DICT_CLUSTERED) {
2139 break;
2140 }
2141
2142 ib_errf(trx->mysql_thd,
2143 IB_LOG_LEVEL_WARN,
2144 ER_INNODB_INDEX_CORRUPT,
2145 "Index %s not found or corrupt,"
2146 " you should recreate this index.",
2147 index->name());
2148
2149 /* Do not bail out, so that the data
2150 can be recovered. */
2151
2152 err = DB_SUCCESS;
2153 index->type |= DICT_CORRUPT;
2154 continue;
2155 }
2156
2157 /* If we failed to purge any records in the index then
2158 do it the hard way.
2159
2160 TODO: We can do this in the first pass by generating UNDO log
2161 records for the failed rows. */
2162
2163 if (!cfg.requires_purge(index->name)) {
2164 continue;
2165 }
2166
2167 IndexPurge purge(trx, index);
2168
2169 trx->op_info = "secondary: purge delete marked records";
2170
2171 err = purge.garbage_collect();
2172
2173 trx->op_info = "";
2174
2175 if (err != DB_SUCCESS) {
2176 break;
2177 } else if (purge.get_n_rows() != n_rows_in_table) {
2178
2179 ib_errf(trx->mysql_thd,
2180 IB_LOG_LEVEL_WARN,
2181 ER_INNODB_INDEX_CORRUPT,
2182 "Index '%s' contains " ULINTPF " entries, "
2183 "should be " ULINTPF ", you should recreate "
2184 "this index.", index->name(),
2185 purge.get_n_rows(), n_rows_in_table);
2186
2187 index->type |= DICT_CORRUPT;
2188
2189 /* Do not bail out, so that the data
2190 can be recovered. */
2191
2192 err = DB_SUCCESS;
2193 }
2194 }
2195
2196 return(err);
2197}
2198
2199/*****************************************************************//**
2200Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID).
2201@return error code */
2202static MY_ATTRIBUTE((nonnull, warn_unused_result))
2203dberr_t
2204row_import_set_sys_max_row_id(
2205/*==========================*/
2206 row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
2207 handler */
2208 const dict_table_t* table) /*!< in: table to import */
2209{
2210 dberr_t err;
2211 const rec_t* rec;
2212 mtr_t mtr;
2213 btr_pcur_t pcur;
2214 row_id_t row_id = 0;
2215 dict_index_t* index;
2216
2217 index = dict_table_get_first_index(table);
2218 ut_a(dict_index_is_clust(index));
2219
2220 mtr_start(&mtr);
2221
2222 mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2223
2224 btr_pcur_open_at_index_side(
2225 false, // High end
2226 index,
2227 BTR_SEARCH_LEAF,
2228 &pcur,
2229 true, // Init cursor
2230 0, // Leaf level
2231 &mtr);
2232
2233 btr_pcur_move_to_prev_on_page(&pcur);
2234 rec = btr_pcur_get_rec(&pcur);
2235
2236 /* Check for empty table. */
2237 if (page_rec_is_infimum(rec)) {
2238 /* The table is empty. */
2239 err = DB_SUCCESS;
2240 } else if (rec_is_default_row(rec, index)) {
2241 /* The clustered index contains the 'default row',
2242 that is, the table is empty. */
2243 err = DB_SUCCESS;
2244 } else {
2245 ulint len;
2246 const byte* field;
2247 mem_heap_t* heap = NULL;
2248 ulint offsets_[1 + REC_OFFS_HEADER_SIZE];
2249 ulint* offsets;
2250
2251 rec_offs_init(offsets_);
2252
2253 offsets = rec_get_offsets(
2254 rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
2255
2256 field = rec_get_nth_field(
2257 rec, offsets,
2258 dict_index_get_sys_col_pos(index, DATA_ROW_ID),
2259 &len);
2260
2261 if (len == DATA_ROW_ID_LEN) {
2262 row_id = mach_read_from_6(field);
2263 err = DB_SUCCESS;
2264 } else {
2265 err = DB_CORRUPTION;
2266 }
2267
2268 if (heap != NULL) {
2269 mem_heap_free(heap);
2270 }
2271 }
2272
2273 btr_pcur_close(&pcur);
2274 mtr_commit(&mtr);
2275
2276 DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure",
2277 err = DB_CORRUPTION;);
2278
2279 if (err != DB_SUCCESS) {
2280 ib_errf(prebuilt->trx->mysql_thd,
2281 IB_LOG_LEVEL_WARN,
2282 ER_INNODB_INDEX_CORRUPT,
2283 "Index `%s` corruption detected, invalid DB_ROW_ID"
2284 " in index.", index->name());
2285
2286 return(err);
2287
2288 } else if (row_id > 0) {
2289
2290 /* Update the system row id if the imported index row id is
2291 greater than the max system row id. */
2292
2293 mutex_enter(&dict_sys->mutex);
2294
2295 if (row_id >= dict_sys->row_id) {
2296 dict_sys->row_id = row_id + 1;
2297 dict_hdr_flush_row_id();
2298 }
2299
2300 mutex_exit(&dict_sys->mutex);
2301 }
2302
2303 return(DB_SUCCESS);
2304}
2305
2306/*****************************************************************//**
2307Read the a string from the meta data file.
2308@return DB_SUCCESS or error code. */
2309static
2310dberr_t
2311row_import_cfg_read_string(
2312/*=======================*/
2313 FILE* file, /*!< in/out: File to read from */
2314 byte* ptr, /*!< out: string to read */
2315 ulint max_len) /*!< in: maximum length of the output
2316 buffer in bytes */
2317{
2318 DBUG_EXECUTE_IF("ib_import_string_read_error",
2319 errno = EINVAL; return(DB_IO_ERROR););
2320
2321 ulint len = 0;
2322
2323 while (!feof(file)) {
2324 int ch = fgetc(file);
2325
2326 if (ch == EOF) {
2327 break;
2328 } else if (ch != 0) {
2329 if (len < max_len) {
2330 ptr[len++] = ch;
2331 } else {
2332 break;
2333 }
2334 /* max_len includes the NUL byte */
2335 } else if (len != max_len - 1) {
2336 break;
2337 } else {
2338 ptr[len] = 0;
2339 return(DB_SUCCESS);
2340 }
2341 }
2342
2343 errno = EINVAL;
2344
2345 return(DB_IO_ERROR);
2346}
2347
2348/*********************************************************************//**
2349Write the meta data (index user fields) config file.
2350@return DB_SUCCESS or error code. */
2351static MY_ATTRIBUTE((nonnull, warn_unused_result))
2352dberr_t
2353row_import_cfg_read_index_fields(
2354/*=============================*/
2355 FILE* file, /*!< in: file to write to */
2356 THD* thd, /*!< in/out: session */
2357 row_index_t* index) /*!< Index being read in */
2358{
2359 byte row[sizeof(ib_uint32_t) * 3];
2360 ulint n_fields = index->m_n_fields;
2361
2362 index->m_fields = UT_NEW_ARRAY_NOKEY(dict_field_t, n_fields);
2363
2364 /* Trigger OOM */
2365 DBUG_EXECUTE_IF(
2366 "ib_import_OOM_4",
2367 UT_DELETE_ARRAY(index->m_fields);
2368 index->m_fields = NULL;
2369 );
2370
2371 if (index->m_fields == NULL) {
2372 return(DB_OUT_OF_MEMORY);
2373 }
2374
2375 dict_field_t* field = index->m_fields;
2376
2377 memset(field, 0x0, sizeof(*field) * n_fields);
2378
2379 for (ulint i = 0; i < n_fields; ++i, ++field) {
2380 byte* ptr = row;
2381
2382 /* Trigger EOF */
2383 DBUG_EXECUTE_IF("ib_import_io_read_error_1",
2384 (void) fseek(file, 0L, SEEK_END););
2385
2386 if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
2387
2388 ib_senderrf(
2389 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2390 (ulong) errno, strerror(errno),
2391 "while reading index fields.");
2392
2393 return(DB_IO_ERROR);
2394 }
2395
2396 field->prefix_len = mach_read_from_4(ptr);
2397 ptr += sizeof(ib_uint32_t);
2398
2399 field->fixed_len = mach_read_from_4(ptr);
2400 ptr += sizeof(ib_uint32_t);
2401
2402 /* Include the NUL byte in the length. */
2403 ulint len = mach_read_from_4(ptr);
2404
2405 byte* name = UT_NEW_ARRAY_NOKEY(byte, len);
2406
2407 /* Trigger OOM */
2408 DBUG_EXECUTE_IF(
2409 "ib_import_OOM_5",
2410 UT_DELETE_ARRAY(name);
2411 name = NULL;
2412 );
2413
2414 if (name == NULL) {
2415 return(DB_OUT_OF_MEMORY);
2416 }
2417
2418 field->name = reinterpret_cast<const char*>(name);
2419
2420 dberr_t err = row_import_cfg_read_string(file, name, len);
2421
2422 if (err != DB_SUCCESS) {
2423
2424 ib_senderrf(
2425 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2426 (ulong) errno, strerror(errno),
2427 "while parsing table name.");
2428
2429 return(err);
2430 }
2431 }
2432
2433 return(DB_SUCCESS);
2434}
2435
2436/*****************************************************************//**
2437Read the index names and root page numbers of the indexes and set the values.
2438Row format [root_page_no, len of str, str ... ]
2439@return DB_SUCCESS or error code. */
2440static MY_ATTRIBUTE((nonnull, warn_unused_result))
2441dberr_t
2442row_import_read_index_data(
2443/*=======================*/
2444 FILE* file, /*!< in: File to read from */
2445 THD* thd, /*!< in: session */
2446 row_import* cfg) /*!< in/out: meta-data read */
2447{
2448 byte* ptr;
2449 row_index_t* cfg_index;
2450 byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9];
2451
2452 /* FIXME: What is the max value? */
2453 ut_a(cfg->m_n_indexes > 0);
2454 ut_a(cfg->m_n_indexes < 1024);
2455
2456 cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
2457
2458 /* Trigger OOM */
2459 DBUG_EXECUTE_IF(
2460 "ib_import_OOM_6",
2461 UT_DELETE_ARRAY(cfg->m_indexes);
2462 cfg->m_indexes = NULL;
2463 );
2464
2465 if (cfg->m_indexes == NULL) {
2466 return(DB_OUT_OF_MEMORY);
2467 }
2468
2469 memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
2470
2471 cfg_index = cfg->m_indexes;
2472
2473 for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) {
2474 /* Trigger EOF */
2475 DBUG_EXECUTE_IF("ib_import_io_read_error_2",
2476 (void) fseek(file, 0L, SEEK_END););
2477
2478 /* Read the index data. */
2479 size_t n_bytes = fread(row, 1, sizeof(row), file);
2480
2481 /* Trigger EOF */
2482 DBUG_EXECUTE_IF("ib_import_io_read_error",
2483 (void) fseek(file, 0L, SEEK_END););
2484
2485 if (n_bytes != sizeof(row)) {
2486 char msg[BUFSIZ];
2487
2488 snprintf(msg, sizeof(msg),
2489 "while reading index meta-data, expected "
2490 "to read " ULINTPF
2491 " bytes but read only " ULINTPF " bytes",
2492 sizeof(row), n_bytes);
2493
2494 ib_senderrf(
2495 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2496 (ulong) errno, strerror(errno), msg);
2497
2498 ib::error() << "IO Error: " << msg;
2499
2500 return(DB_IO_ERROR);
2501 }
2502
2503 ptr = row;
2504
2505 cfg_index->m_id = mach_read_from_8(ptr);
2506 ptr += sizeof(index_id_t);
2507
2508 cfg_index->m_space = mach_read_from_4(ptr);
2509 ptr += sizeof(ib_uint32_t);
2510
2511 cfg_index->m_page_no = mach_read_from_4(ptr);
2512 ptr += sizeof(ib_uint32_t);
2513
2514 cfg_index->m_type = mach_read_from_4(ptr);
2515 ptr += sizeof(ib_uint32_t);
2516
2517 cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
2518 if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
2519 ut_ad(0);
2520 /* Overflow. Pretend that the clustered index
2521 has a variable-length PRIMARY KEY. */
2522 cfg_index->m_trx_id_offset = 0;
2523 }
2524 ptr += sizeof(ib_uint32_t);
2525
2526 cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
2527 ptr += sizeof(ib_uint32_t);
2528
2529 cfg_index->m_n_uniq = mach_read_from_4(ptr);
2530 ptr += sizeof(ib_uint32_t);
2531
2532 cfg_index->m_n_nullable = mach_read_from_4(ptr);
2533 ptr += sizeof(ib_uint32_t);
2534
2535 cfg_index->m_n_fields = mach_read_from_4(ptr);
2536 ptr += sizeof(ib_uint32_t);
2537
2538 /* The NUL byte is included in the name length. */
2539 ulint len = mach_read_from_4(ptr);
2540
2541 if (len > OS_FILE_MAX_PATH) {
2542 ib_errf(thd, IB_LOG_LEVEL_ERROR,
2543 ER_INNODB_INDEX_CORRUPT,
2544 "Index name length (" ULINTPF ") is too long, "
2545 "the meta-data is corrupt", len);
2546
2547 return(DB_CORRUPTION);
2548 }
2549
2550 cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
2551
2552 /* Trigger OOM */
2553 DBUG_EXECUTE_IF(
2554 "ib_import_OOM_7",
2555 UT_DELETE_ARRAY(cfg_index->m_name);
2556 cfg_index->m_name = NULL;
2557 );
2558
2559 if (cfg_index->m_name == NULL) {
2560 return(DB_OUT_OF_MEMORY);
2561 }
2562
2563 dberr_t err;
2564
2565 err = row_import_cfg_read_string(file, cfg_index->m_name, len);
2566
2567 if (err != DB_SUCCESS) {
2568
2569 ib_senderrf(
2570 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2571 (ulong) errno, strerror(errno),
2572 "while parsing index name.");
2573
2574 return(err);
2575 }
2576
2577 err = row_import_cfg_read_index_fields(file, thd, cfg_index);
2578
2579 if (err != DB_SUCCESS) {
2580 return(err);
2581 }
2582
2583 }
2584
2585 return(DB_SUCCESS);
2586}
2587
2588/*****************************************************************//**
2589Set the index root page number for v1 format.
2590@return DB_SUCCESS or error code. */
2591static
2592dberr_t
2593row_import_read_indexes(
2594/*====================*/
2595 FILE* file, /*!< in: File to read from */
2596 THD* thd, /*!< in: session */
2597 row_import* cfg) /*!< in/out: meta-data read */
2598{
2599 byte row[sizeof(ib_uint32_t)];
2600
2601 /* Trigger EOF */
2602 DBUG_EXECUTE_IF("ib_import_io_read_error_3",
2603 (void) fseek(file, 0L, SEEK_END););
2604
2605 /* Read the number of indexes. */
2606 if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
2607 ib_senderrf(
2608 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2609 (ulong) errno, strerror(errno),
2610 "while reading number of indexes.");
2611
2612 return(DB_IO_ERROR);
2613 }
2614
2615 cfg->m_n_indexes = mach_read_from_4(row);
2616
2617 if (cfg->m_n_indexes == 0) {
2618 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2619 "Number of indexes in meta-data file is 0");
2620
2621 return(DB_CORRUPTION);
2622
2623 } else if (cfg->m_n_indexes > 1024) {
2624 // FIXME: What is the upper limit? */
2625 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2626 "Number of indexes in meta-data file is too high: "
2627 ULINTPF, cfg->m_n_indexes);
2628 cfg->m_n_indexes = 0;
2629
2630 return(DB_CORRUPTION);
2631 }
2632
2633 return(row_import_read_index_data(file, thd, cfg));
2634}
2635
2636/*********************************************************************//**
2637Read the meta data (table columns) config file. Deserialise the contents of
2638dict_col_t structure, along with the column name. */
2639static MY_ATTRIBUTE((nonnull, warn_unused_result))
2640dberr_t
2641row_import_read_columns(
2642/*====================*/
2643 FILE* file, /*!< in: file to write to */
2644 THD* thd, /*!< in/out: session */
2645 row_import* cfg) /*!< in/out: meta-data read */
2646{
2647 dict_col_t* col;
2648 byte row[sizeof(ib_uint32_t) * 8];
2649
2650 /* FIXME: What should the upper limit be? */
2651 ut_a(cfg->m_n_cols > 0);
2652 ut_a(cfg->m_n_cols < 1024);
2653
2654 cfg->m_cols = UT_NEW_ARRAY_NOKEY(dict_col_t, cfg->m_n_cols);
2655
2656 /* Trigger OOM */
2657 DBUG_EXECUTE_IF(
2658 "ib_import_OOM_8",
2659 UT_DELETE_ARRAY(cfg->m_cols);
2660 cfg->m_cols = NULL;
2661 );
2662
2663 if (cfg->m_cols == NULL) {
2664 return(DB_OUT_OF_MEMORY);
2665 }
2666
2667 cfg->m_col_names = UT_NEW_ARRAY_NOKEY(byte*, cfg->m_n_cols);
2668
2669 /* Trigger OOM */
2670 DBUG_EXECUTE_IF(
2671 "ib_import_OOM_9",
2672 UT_DELETE_ARRAY(cfg->m_col_names);
2673 cfg->m_col_names = NULL;
2674 );
2675
2676 if (cfg->m_col_names == NULL) {
2677 return(DB_OUT_OF_MEMORY);
2678 }
2679
2680 memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols);
2681 memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols);
2682
2683 col = cfg->m_cols;
2684
2685 for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) {
2686 byte* ptr = row;
2687
2688 /* Trigger EOF */
2689 DBUG_EXECUTE_IF("ib_import_io_read_error_4",
2690 (void) fseek(file, 0L, SEEK_END););
2691
2692 if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
2693 ib_senderrf(
2694 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2695 (ulong) errno, strerror(errno),
2696 "while reading table column meta-data.");
2697
2698 return(DB_IO_ERROR);
2699 }
2700
2701 col->prtype = mach_read_from_4(ptr);
2702 ptr += sizeof(ib_uint32_t);
2703
2704 col->mtype = mach_read_from_4(ptr);
2705 ptr += sizeof(ib_uint32_t);
2706
2707 col->len = mach_read_from_4(ptr);
2708 ptr += sizeof(ib_uint32_t);
2709
2710 ulint mbminmaxlen = mach_read_from_4(ptr);
2711 col->mbmaxlen = mbminmaxlen / 5;
2712 col->mbminlen = mbminmaxlen % 5;
2713 ptr += sizeof(ib_uint32_t);
2714
2715 col->ind = mach_read_from_4(ptr);
2716 ptr += sizeof(ib_uint32_t);
2717
2718 col->ord_part = mach_read_from_4(ptr);
2719 ptr += sizeof(ib_uint32_t);
2720
2721 col->max_prefix = mach_read_from_4(ptr);
2722 ptr += sizeof(ib_uint32_t);
2723
2724 /* Read in the column name as [len, byte array]. The len
2725 includes the NUL byte. */
2726
2727 ulint len = mach_read_from_4(ptr);
2728
2729 /* FIXME: What is the maximum column name length? */
2730 if (len == 0 || len > 128) {
2731 ib_errf(thd, IB_LOG_LEVEL_ERROR,
2732 ER_IO_READ_ERROR,
2733 "Column name length " ULINTPF ", is invalid",
2734 len);
2735
2736 return(DB_CORRUPTION);
2737 }
2738
2739 cfg->m_col_names[i] = UT_NEW_ARRAY_NOKEY(byte, len);
2740
2741 /* Trigger OOM */
2742 DBUG_EXECUTE_IF(
2743 "ib_import_OOM_10",
2744 UT_DELETE_ARRAY(cfg->m_col_names[i]);
2745 cfg->m_col_names[i] = NULL;
2746 );
2747
2748 if (cfg->m_col_names[i] == NULL) {
2749 return(DB_OUT_OF_MEMORY);
2750 }
2751
2752 dberr_t err;
2753
2754 err = row_import_cfg_read_string(
2755 file, cfg->m_col_names[i], len);
2756
2757 if (err != DB_SUCCESS) {
2758
2759 ib_senderrf(
2760 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2761 (ulong) errno, strerror(errno),
2762 "while parsing table column name.");
2763
2764 return(err);
2765 }
2766 }
2767
2768 return(DB_SUCCESS);
2769}
2770
2771/*****************************************************************//**
2772Read the contents of the <tablespace>.cfg file.
2773@return DB_SUCCESS or error code. */
2774static MY_ATTRIBUTE((nonnull, warn_unused_result))
2775dberr_t
2776row_import_read_v1(
2777/*===============*/
2778 FILE* file, /*!< in: File to read from */
2779 THD* thd, /*!< in: session */
2780 row_import* cfg) /*!< out: meta data */
2781{
2782 byte value[sizeof(ib_uint32_t)];
2783
2784 /* Trigger EOF */
2785 DBUG_EXECUTE_IF("ib_import_io_read_error_5",
2786 (void) fseek(file, 0L, SEEK_END););
2787
2788 /* Read the hostname where the tablespace was exported. */
2789 if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
2790 ib_senderrf(
2791 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2792 (ulong) errno, strerror(errno),
2793 "while reading meta-data export hostname length.");
2794
2795 return(DB_IO_ERROR);
2796 }
2797
2798 ulint len = mach_read_from_4(value);
2799
2800 /* NUL byte is part of name length. */
2801 cfg->m_hostname = UT_NEW_ARRAY_NOKEY(byte, len);
2802
2803 /* Trigger OOM */
2804 DBUG_EXECUTE_IF(
2805 "ib_import_OOM_1",
2806 UT_DELETE_ARRAY(cfg->m_hostname);
2807 cfg->m_hostname = NULL;
2808 );
2809
2810 if (cfg->m_hostname == NULL) {
2811 return(DB_OUT_OF_MEMORY);
2812 }
2813
2814 dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len);
2815
2816 if (err != DB_SUCCESS) {
2817
2818 ib_senderrf(
2819 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2820 (ulong) errno, strerror(errno),
2821 "while parsing export hostname.");
2822
2823 return(err);
2824 }
2825
2826 /* Trigger EOF */
2827 DBUG_EXECUTE_IF("ib_import_io_read_error_6",
2828 (void) fseek(file, 0L, SEEK_END););
2829
2830 /* Read the table name of tablespace that was exported. */
2831 if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
2832 ib_senderrf(
2833 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2834 (ulong) errno, strerror(errno),
2835 "while reading meta-data table name length.");
2836
2837 return(DB_IO_ERROR);
2838 }
2839
2840 len = mach_read_from_4(value);
2841
2842 /* NUL byte is part of name length. */
2843 cfg->m_table_name = UT_NEW_ARRAY_NOKEY(byte, len);
2844
2845 /* Trigger OOM */
2846 DBUG_EXECUTE_IF(
2847 "ib_import_OOM_2",
2848 UT_DELETE_ARRAY(cfg->m_table_name);
2849 cfg->m_table_name = NULL;
2850 );
2851
2852 if (cfg->m_table_name == NULL) {
2853 return(DB_OUT_OF_MEMORY);
2854 }
2855
2856 err = row_import_cfg_read_string(file, cfg->m_table_name, len);
2857
2858 if (err != DB_SUCCESS) {
2859 ib_senderrf(
2860 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2861 (ulong) errno, strerror(errno),
2862 "while parsing table name.");
2863
2864 return(err);
2865 }
2866
2867 ib::info() << "Importing tablespace for table '" << cfg->m_table_name
2868 << "' that was exported from host '" << cfg->m_hostname << "'";
2869
2870 byte row[sizeof(ib_uint32_t) * 3];
2871
2872 /* Trigger EOF */
2873 DBUG_EXECUTE_IF("ib_import_io_read_error_7",
2874 (void) fseek(file, 0L, SEEK_END););
2875
2876 /* Read the autoinc value. */
2877 if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
2878 ib_senderrf(
2879 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2880 (ulong) errno, strerror(errno),
2881 "while reading autoinc value.");
2882
2883 return(DB_IO_ERROR);
2884 }
2885
2886 cfg->m_autoinc = mach_read_from_8(row);
2887
2888 /* Trigger EOF */
2889 DBUG_EXECUTE_IF("ib_import_io_read_error_8",
2890 (void) fseek(file, 0L, SEEK_END););
2891
2892 /* Read the tablespace page size. */
2893 if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
2894 ib_senderrf(
2895 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2896 (ulong) errno, strerror(errno),
2897 "while reading meta-data header.");
2898
2899 return(DB_IO_ERROR);
2900 }
2901
2902 byte* ptr = row;
2903
2904 const ulint logical_page_size = mach_read_from_4(ptr);
2905 ptr += sizeof(ib_uint32_t);
2906
2907 if (logical_page_size != srv_page_size) {
2908
2909 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
2910 "Tablespace to be imported has a different"
2911 " page size than this server. Server page size"
2912 " is %lu, whereas tablespace page size"
2913 " is " ULINTPF,
2914 srv_page_size,
2915 logical_page_size);
2916
2917 return(DB_ERROR);
2918 }
2919
2920 cfg->m_flags = mach_read_from_4(ptr);
2921 ptr += sizeof(ib_uint32_t);
2922
2923 cfg->m_page_size.copy_from(dict_tf_get_page_size(cfg->m_flags));
2924
2925 ut_a(logical_page_size == cfg->m_page_size.logical());
2926
2927 cfg->m_n_cols = mach_read_from_4(ptr);
2928
2929 if (!dict_tf_is_valid(cfg->m_flags)) {
2930 ib_errf(thd, IB_LOG_LEVEL_ERROR,
2931 ER_TABLE_SCHEMA_MISMATCH,
2932 "Invalid table flags: " ULINTPF, cfg->m_flags);
2933
2934 return(DB_CORRUPTION);
2935 }
2936
2937 err = row_import_read_columns(file, thd, cfg);
2938
2939 if (err == DB_SUCCESS) {
2940 err = row_import_read_indexes(file, thd, cfg);
2941 }
2942
2943 return(err);
2944}
2945
2946/**
2947Read the contents of the <tablespace>.cfg file.
2948@return DB_SUCCESS or error code. */
2949static MY_ATTRIBUTE((nonnull, warn_unused_result))
2950dberr_t
2951row_import_read_meta_data(
2952/*======================*/
2953 FILE* file, /*!< in: File to read from */
2954 THD* thd, /*!< in: session */
2955 row_import& cfg) /*!< out: contents of the .cfg file */
2956{
2957 byte row[sizeof(ib_uint32_t)];
2958
2959 /* Trigger EOF */
2960 DBUG_EXECUTE_IF("ib_import_io_read_error_9",
2961 (void) fseek(file, 0L, SEEK_END););
2962
2963 if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
2964 ib_senderrf(
2965 thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2966 (ulong) errno, strerror(errno),
2967 "while reading meta-data version.");
2968
2969 return(DB_IO_ERROR);
2970 }
2971
2972 cfg.m_version = mach_read_from_4(row);
2973
2974 /* Check the version number. */
2975 switch (cfg.m_version) {
2976 case IB_EXPORT_CFG_VERSION_V1:
2977
2978 return(row_import_read_v1(file, thd, &cfg));
2979 default:
2980 ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2981 "Unsupported meta-data version number (" ULINTPF "), "
2982 "file ignored", cfg.m_version);
2983 }
2984
2985 return(DB_ERROR);
2986}
2987
2988/**
2989Read the contents of the <tablename>.cfg file.
2990@return DB_SUCCESS or error code. */
2991static MY_ATTRIBUTE((nonnull, warn_unused_result))
2992dberr_t
2993row_import_read_cfg(
2994/*================*/
2995 dict_table_t* table, /*!< in: table */
2996 THD* thd, /*!< in: session */
2997 row_import& cfg) /*!< out: contents of the .cfg file */
2998{
2999 dberr_t err;
3000 char name[OS_FILE_MAX_PATH];
3001
3002 cfg.m_table = table;
3003
3004 srv_get_meta_data_filename(table, name, sizeof(name));
3005
3006 FILE* file = fopen(name, "rb");
3007
3008 if (file == NULL) {
3009 char msg[BUFSIZ];
3010
3011 snprintf(msg, sizeof(msg),
3012 "Error opening '%s', will attempt to import"
3013 " without schema verification", name);
3014
3015 ib_senderrf(
3016 thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
3017 (ulong) errno, strerror(errno), msg);
3018
3019 cfg.m_missing = true;
3020
3021 err = DB_FAIL;
3022 } else {
3023
3024 cfg.m_missing = false;
3025
3026 err = row_import_read_meta_data(file, thd, cfg);
3027 fclose(file);
3028 }
3029
3030 return(err);
3031}
3032
3033/*****************************************************************//**
3034Update the <space, root page> of a table's indexes from the values
3035in the data dictionary.
3036@return DB_SUCCESS or error code */
3037dberr_t
3038row_import_update_index_root(
3039/*=========================*/
3040 trx_t* trx, /*!< in/out: transaction that
3041 covers the update */
3042 const dict_table_t* table, /*!< in: Table for which we want
3043 to set the root page_no */
3044 bool reset, /*!< in: if true then set to
3045 FIL_NUL */
3046 bool dict_locked) /*!< in: Set to true if the
3047 caller already owns the
3048 dict_sys_t:: mutex. */
3049
3050{
3051 const dict_index_t* index;
3052 que_t* graph = 0;
3053 dberr_t err = DB_SUCCESS;
3054
3055 ut_ad(reset || table->space->id == table->space_id);
3056
3057 static const char sql[] = {
3058 "PROCEDURE UPDATE_INDEX_ROOT() IS\n"
3059 "BEGIN\n"
3060 "UPDATE SYS_INDEXES\n"
3061 "SET SPACE = :space,\n"
3062 " PAGE_NO = :page,\n"
3063 " TYPE = :type\n"
3064 "WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
3065 "END;\n"};
3066
3067 if (!dict_locked) {
3068 mutex_enter(&dict_sys->mutex);
3069 }
3070
3071 for (index = dict_table_get_first_index(table);
3072 index != 0;
3073 index = dict_table_get_next_index(index)) {
3074
3075 pars_info_t* info;
3076 ib_uint32_t page;
3077 ib_uint32_t space;
3078 ib_uint32_t type;
3079 index_id_t index_id;
3080 table_id_t table_id;
3081
3082 info = (graph != 0) ? graph->info : pars_info_create();
3083
3084 mach_write_to_4(
3085 reinterpret_cast<byte*>(&type),
3086 index->type);
3087
3088 mach_write_to_4(
3089 reinterpret_cast<byte*>(&page),
3090 reset ? FIL_NULL : index->page);
3091
3092 mach_write_to_4(
3093 reinterpret_cast<byte*>(&space),
3094 reset ? FIL_NULL : index->table->space_id);
3095
3096 mach_write_to_8(
3097 reinterpret_cast<byte*>(&index_id),
3098 index->id);
3099
3100 mach_write_to_8(
3101 reinterpret_cast<byte*>(&table_id),
3102 table->id);
3103
3104 /* If we set the corrupt bit during the IMPORT phase then
3105 we need to update the system tables. */
3106 pars_info_bind_int4_literal(info, "type", &type);
3107 pars_info_bind_int4_literal(info, "space", &space);
3108 pars_info_bind_int4_literal(info, "page", &page);
3109 pars_info_bind_ull_literal(info, "index_id", &index_id);
3110 pars_info_bind_ull_literal(info, "table_id", &table_id);
3111
3112 if (graph == 0) {
3113 graph = pars_sql(info, sql);
3114 ut_a(graph);
3115 graph->trx = trx;
3116 }
3117
3118 que_thr_t* thr;
3119
3120 graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
3121
3122 ut_a(thr = que_fork_start_command(graph));
3123
3124 que_run_threads(thr);
3125
3126 DBUG_EXECUTE_IF("ib_import_internal_error",
3127 trx->error_state = DB_ERROR;);
3128
3129 err = trx->error_state;
3130
3131 if (err != DB_SUCCESS) {
3132 ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3133 ER_INTERNAL_ERROR,
3134 "While updating the <space, root page"
3135 " number> of index %s - %s",
3136 index->name(), ut_strerr(err));
3137
3138 break;
3139 }
3140 }
3141
3142 que_graph_free(graph);
3143
3144 if (!dict_locked) {
3145 mutex_exit(&dict_sys->mutex);
3146 }
3147
3148 return(err);
3149}
3150
3151/** Callback arg for row_import_set_discarded. */
3152struct discard_t {
3153 ib_uint32_t flags2; /*!< Value read from column */
3154 bool state; /*!< New state of the flag */
3155 ulint n_recs; /*!< Number of recs processed */
3156};
3157
3158/******************************************************************//**
3159Fetch callback that sets or unsets the DISCARDED tablespace flag in
3160SYS_TABLES. The flags is stored in MIX_LEN column.
3161@return FALSE if all OK */
3162static
3163ibool
3164row_import_set_discarded(
3165/*=====================*/
3166 void* row, /*!< in: sel_node_t* */
3167 void* user_arg) /*!< in: bool set/unset flag */
3168{
3169 sel_node_t* node = static_cast<sel_node_t*>(row);
3170 discard_t* discard = static_cast<discard_t*>(user_arg);
3171 dfield_t* dfield = que_node_get_val(node->select_list);
3172 dtype_t* type = dfield_get_type(dfield);
3173 ulint len = dfield_get_len(dfield);
3174
3175 ut_a(dtype_get_mtype(type) == DATA_INT);
3176 ut_a(len == sizeof(ib_uint32_t));
3177
3178 ulint flags2 = mach_read_from_4(
3179 static_cast<byte*>(dfield_get_data(dfield)));
3180
3181 if (discard->state) {
3182 flags2 |= DICT_TF2_DISCARDED;
3183 } else {
3184 flags2 &= ~DICT_TF2_DISCARDED;
3185 }
3186
3187 mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
3188
3189 ++discard->n_recs;
3190
3191 /* There should be at most one matching record. */
3192 ut_a(discard->n_recs == 1);
3193
3194 return(FALSE);
3195}
3196
3197/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
3198@param[in,out] trx dictionary transaction
3199@param[in] table_id table identifier
3200@param[in] discarded whether to set or clear the flag
3201@return DB_SUCCESS or error code */
3202dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
3203 bool discarded)
3204{
3205 pars_info_t* info;
3206 discard_t discard;
3207
3208 static const char sql[] =
3209 "PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
3210 "DECLARE FUNCTION my_func;\n"
3211 "DECLARE CURSOR c IS\n"
3212 " SELECT MIX_LEN"
3213 " FROM SYS_TABLES"
3214 " WHERE ID = :table_id FOR UPDATE;"
3215 "\n"
3216 "BEGIN\n"
3217 "OPEN c;\n"
3218 "WHILE 1 = 1 LOOP\n"
3219 " FETCH c INTO my_func();\n"
3220 " IF c % NOTFOUND THEN\n"
3221 " EXIT;\n"
3222 " END IF;\n"
3223 "END LOOP;\n"
3224 "UPDATE SYS_TABLES"
3225 " SET MIX_LEN = :flags2"
3226 " WHERE ID = :table_id;\n"
3227 "CLOSE c;\n"
3228 "END;\n";
3229
3230 discard.n_recs = 0;
3231 discard.state = discarded;
3232 discard.flags2 = ULINT32_UNDEFINED;
3233
3234 info = pars_info_create();
3235
3236 pars_info_add_ull_literal(info, "table_id", table_id);
3237 pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
3238
3239 pars_info_bind_function(
3240 info, "my_func", row_import_set_discarded, &discard);
3241
3242 dberr_t err = que_eval_sql(info, sql, false, trx);
3243
3244 ut_a(discard.n_recs == 1);
3245 ut_a(discard.flags2 != ULINT32_UNDEFINED);
3246
3247 return(err);
3248}
3249
3250struct fil_iterator_t {
3251 pfs_os_file_t file; /*!< File handle */
3252 const char* filepath; /*!< File path name */
3253 os_offset_t start; /*!< From where to start */
3254 os_offset_t end; /*!< Where to stop */
3255 os_offset_t file_size; /*!< File size in bytes */
3256 ulint n_io_buffers; /*!< Number of pages to use
3257 for IO */
3258 byte* io_buffer; /*!< Buffer to use for IO */
3259 fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
3260 byte* crypt_io_buffer; /*!< IO buffer when encrypted */
3261};
3262
3263/********************************************************************//**
3264TODO: This can be made parallel trivially by chunking up the file and creating
3265a callback per thread. . Main benefit will be to use multiple CPUs for
3266checksums and compressed tables. We have to do compressed tables block by
3267block right now. Secondly we need to decompress/compress and copy too much
3268of data. These are CPU intensive.
3269
3270Iterate over all the pages in the tablespace.
3271@param iter - Tablespace iterator
3272@param block - block to use for IO
3273@param callback - Callback to inspect and update page contents
3274@retval DB_SUCCESS or error code */
3275static
3276dberr_t
3277fil_iterate(
3278/*========*/
3279 const fil_iterator_t& iter,
3280 buf_block_t* block,
3281 AbstractCallback& callback)
3282{
3283 os_offset_t offset;
3284 const ulint size = callback.get_page_size().physical();
3285 ulint n_bytes = iter.n_io_buffers * size;
3286
3287 ut_ad(!srv_read_only_mode);
3288
3289 /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
3290 copying for non-index pages. Unfortunately, it is
3291 required by buf_zip_decompress() */
3292
3293 for (offset = iter.start; offset < iter.end; offset += n_bytes) {
3294 if (callback.is_interrupted()) {
3295 return DB_INTERRUPTED;
3296 }
3297
3298 byte* io_buffer = iter.io_buffer;
3299 block->frame = io_buffer;
3300
3301 if (block->page.zip.data) {
3302 /* Zip IO is done in the compressed page buffer. */
3303 io_buffer = block->page.zip.data;
3304 }
3305
3306 /* We have to read the exact number of bytes. Otherwise the
3307 InnoDB IO functions croak on failed reads. */
3308
3309 n_bytes = ulint(ut_min(os_offset_t(n_bytes),
3310 iter.end - offset));
3311
3312 ut_ad(n_bytes > 0);
3313 ut_ad(!(n_bytes % size));
3314
3315 const bool encrypted = iter.crypt_data != NULL
3316 && iter.crypt_data->should_encrypt();
3317 /* Use additional crypt io buffer if tablespace is encrypted */
3318 byte* const readptr = encrypted
3319 ? iter.crypt_io_buffer : io_buffer;
3320 byte* const writeptr = readptr;
3321
3322 IORequest read_request(IORequest::READ);
3323 read_request.disable_partial_io_warnings();
3324
3325 dberr_t err = os_file_read_no_error_handling(
3326 read_request, iter.file, readptr, offset, n_bytes, 0);
3327 if (err != DB_SUCCESS) {
3328 ib::error() << iter.filepath
3329 << ": os_file_read() failed";
3330 }
3331
3332 bool updated = false;
3333 os_offset_t page_off = offset;
3334 ulint n_pages_read = n_bytes / size;
3335 block->page.id.set_page_no(ulint(page_off / size));
3336
3337 for (ulint i = 0; i < n_pages_read;
3338 block->page.id.set_page_no(block->page.id.page_no() + 1),
3339 ++i, page_off += size, block->frame += size) {
3340 bool decrypted = false;
3341 err = DB_SUCCESS;
3342 byte* src = readptr + i * size;
3343 byte* dst = io_buffer + i * size;
3344 bool frame_changed = false;
3345 ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
3346 const bool page_compressed
3347 = page_type
3348 == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
3349 || page_type == FIL_PAGE_PAGE_COMPRESSED;
3350 const ulint page_no = page_get_page_no(src);
3351 if (!page_no && page_off) {
3352 const ulint* b = reinterpret_cast<const ulint*>
3353 (src);
3354 const ulint* const e = b + size / sizeof *b;
3355 do {
3356 if (*b++) {
3357 goto page_corrupted;
3358 }
3359 } while (b != e);
3360
3361 /* Proceed to the next page,
3362 because this one is all zero. */
3363 continue;
3364 }
3365
3366 if (page_no != page_off / size) {
3367 goto page_corrupted;
3368 }
3369
3370 if (encrypted) {
3371 decrypted = fil_space_decrypt(
3372 iter.crypt_data, dst,
3373 callback.get_page_size(), src, &err);
3374
3375 if (err != DB_SUCCESS) {
3376 return err;
3377 }
3378
3379 if (decrypted) {
3380 updated = true;
3381 } else {
3382 if (!page_compressed
3383 && !block->page.zip.data) {
3384 block->frame = src;
3385 frame_changed = true;
3386 } else {
3387 ut_ad(dst != src);
3388 memcpy(dst, src, size);
3389 }
3390 }
3391 }
3392
3393 /* If the original page is page_compressed, we need
3394 to decompress it before adjusting further. */
3395 if (page_compressed) {
3396 fil_decompress_page(NULL, dst, ulong(size),
3397 NULL);
3398 updated = true;
3399 } else if (buf_page_is_corrupted(
3400 false,
3401 encrypted && !frame_changed
3402 ? dst : src,
3403 callback.get_page_size(), NULL)) {
3404page_corrupted:
3405 ib::warn() << callback.filename()
3406 << ": Page " << (offset / size)
3407 << " at offset " << offset
3408 << " looks corrupted.";
3409 return DB_CORRUPTION;
3410 }
3411
3412 if ((err = callback(page_off, block)) != DB_SUCCESS) {
3413 return err;
3414 } else if (!updated) {
3415 updated = buf_block_get_state(block)
3416 == BUF_BLOCK_FILE_PAGE;
3417 }
3418
3419 /* If tablespace is encrypted we use additional
3420 temporary scratch area where pages are read
3421 for decrypting readptr == crypt_io_buffer != io_buffer.
3422
3423 Destination for decryption is a buffer pool block
3424 block->frame == dst == io_buffer that is updated.
3425 Pages that did not require decryption even when
3426 tablespace is marked as encrypted are not copied
3427 instead block->frame is set to src == readptr.
3428
3429 For encryption we again use temporary scratch area
3430 writeptr != io_buffer == dst
3431 that is then written to the tablespace
3432
3433 (1) For normal tables io_buffer == dst == writeptr
3434 (2) For only page compressed tables
3435 io_buffer == dst == writeptr
3436 (3) For encrypted (and page compressed)
3437 readptr != io_buffer == dst != writeptr
3438 */
3439
3440 ut_ad(!encrypted && !page_compressed ?
3441 src == dst && dst == writeptr + (i * size):1);
3442 ut_ad(page_compressed && !encrypted ?
3443 src == dst && dst == writeptr + (i * size):1);
3444 ut_ad(encrypted ?
3445 src != dst && dst != writeptr + (i * size):1);
3446
3447 /* When tablespace is encrypted or compressed its
3448 first page (i.e. page 0) is not encrypted or
3449 compressed and there is no need to copy frame. */
3450 if (encrypted && block->page.id.page_no() != 0) {
3451 byte *local_frame = callback.get_frame(block);
3452 ut_ad((writeptr + (i * size)) != local_frame);
3453 memcpy((writeptr + (i * size)), local_frame, size);
3454 }
3455
3456 if (frame_changed) {
3457 block->frame = dst;
3458 }
3459
3460 src = io_buffer + (i * size);
3461
3462 if (page_compressed) {
3463 ulint len = 0;
3464
3465 fil_compress_page(
3466 NULL,
3467 src,
3468 NULL,
3469 size,
3470 0,/* FIXME: compression level */
3471 512,/* FIXME: use proper block size */
3472 encrypted,
3473 &len);
3474 ut_ad(len <= size);
3475 memset(src + len, 0, size - len);
3476 updated = true;
3477 }
3478
3479 /* Encrypt the page if encryption was used. */
3480 if (encrypted && decrypted) {
3481 byte *dest = writeptr + i * size;
3482 byte* tmp = fil_encrypt_buf(
3483 iter.crypt_data,
3484 block->page.id.space(),
3485 block->page.id.page_no(),
3486 mach_read_from_8(src + FIL_PAGE_LSN),
3487 src, callback.get_page_size(), dest);
3488
3489 if (tmp == src) {
3490 /* TODO: remove unnecessary memcpy's */
3491 ut_ad(dest != src);
3492 memcpy(dest, src, size);
3493 }
3494
3495 updated = true;
3496 }
3497 }
3498
3499 /* A page was updated in the set, write back to disk. */
3500 if (updated) {
3501 IORequest write_request(IORequest::WRITE);
3502
3503 err = os_file_write(write_request,
3504 iter.filepath, iter.file,
3505 writeptr, offset, n_bytes);
3506
3507 if (err != DB_SUCCESS) {
3508 return err;
3509 }
3510 }
3511 }
3512
3513 return DB_SUCCESS;
3514}
3515
3516/********************************************************************//**
3517Iterate over all the pages in the tablespace.
3518@param table - the table definiton in the server
3519@param n_io_buffers - number of blocks to read and write together
3520@param callback - functor that will do the page updates
3521@return DB_SUCCESS or error code */
3522static
3523dberr_t
3524fil_tablespace_iterate(
3525/*===================*/
3526 dict_table_t* table,
3527 ulint n_io_buffers,
3528 AbstractCallback& callback)
3529{
3530 dberr_t err;
3531 pfs_os_file_t file;
3532 char* filepath;
3533
3534 ut_a(n_io_buffers > 0);
3535 ut_ad(!srv_read_only_mode);
3536
3537 DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
3538 return(DB_CORRUPTION););
3539
3540 /* Make sure the data_dir_path is set. */
3541 dict_get_and_save_data_dir_path(table, false);
3542
3543 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3544 ut_a(table->data_dir_path);
3545
3546 filepath = fil_make_filepath(
3547 table->data_dir_path, table->name.m_name, IBD, true);
3548 } else {
3549 filepath = fil_make_filepath(
3550 NULL, table->name.m_name, IBD, false);
3551 }
3552
3553 if (!filepath) {
3554 return(DB_OUT_OF_MEMORY);
3555 } else {
3556 bool success;
3557
3558 file = os_file_create_simple_no_error_handling(
3559 innodb_data_file_key, filepath,
3560 OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
3561
3562 if (!success) {
3563 /* The following call prints an error message */
3564 os_file_get_last_error(true);
3565 ib::error() << "Trying to import a tablespace,"
3566 " but could not open the tablespace file "
3567 << filepath;
3568 ut_free(filepath);
3569 return DB_TABLESPACE_NOT_FOUND;
3570 } else {
3571 err = DB_SUCCESS;
3572 }
3573 }
3574
3575 callback.set_file(filepath, file);
3576
3577 os_offset_t file_size = os_file_get_size(file);
3578 ut_a(file_size != (os_offset_t) -1);
3579
3580 /* Allocate a page to read in the tablespace header, so that we
3581 can determine the page size and zip_size (if it is compressed).
3582 We allocate an extra page in case it is a compressed table. One
3583 page is to ensure alignement. */
3584
3585 void* page_ptr = ut_malloc_nokey(3U << srv_page_size_shift);
3586 byte* page = static_cast<byte*>(ut_align(page_ptr, srv_page_size));
3587
3588 buf_block_t* block = reinterpret_cast<buf_block_t*>
3589 (ut_zalloc_nokey(sizeof *block));
3590 block->frame = page;
3591 block->page.id.copy_from(page_id_t(0, 0));
3592 block->page.io_fix = BUF_IO_NONE;
3593 block->page.buf_fix_count = 1;
3594 block->page.state = BUF_BLOCK_FILE_PAGE;
3595
3596 /* Read the first page and determine the page and zip size. */
3597
3598 IORequest request(IORequest::READ);
3599 request.disable_partial_io_warnings();
3600
3601 err = os_file_read_no_error_handling(request, file, page, 0,
3602 srv_page_size, 0);
3603
3604 if (err == DB_SUCCESS) {
3605 err = callback.init(file_size, block);
3606 }
3607
3608 if (err == DB_SUCCESS) {
3609 block->page.id.copy_from(
3610 page_id_t(callback.get_space_id(), 0));
3611 block->page.size.copy_from(callback.get_page_size());
3612 if (block->page.size.is_compressed()) {
3613 page_zip_set_size(&block->page.zip,
3614 callback.get_page_size().physical());
3615 /* ROW_FORMAT=COMPRESSED is not optimised for block IO
3616 for now. We do the IMPORT page by page. */
3617 n_io_buffers = 1;
3618 }
3619
3620 fil_iterator_t iter;
3621
3622 /* read (optional) crypt data */
3623 iter.crypt_data = fil_space_read_crypt_data(
3624 callback.get_page_size(), page);
3625
3626 /* If tablespace is encrypted, it needs extra buffers */
3627 if (iter.crypt_data && n_io_buffers > 1) {
3628 /* decrease io buffers so that memory
3629 consumption will not double */
3630 n_io_buffers /= 2;
3631 }
3632
3633 iter.file = file;
3634 iter.start = 0;
3635 iter.end = file_size;
3636 iter.filepath = filepath;
3637 iter.file_size = file_size;
3638 iter.n_io_buffers = n_io_buffers;
3639
3640 /* Add an extra page for compressed page scratch area. */
3641 void* io_buffer = ut_malloc_nokey(
3642 (2 + iter.n_io_buffers) << srv_page_size_shift);
3643
3644 iter.io_buffer = static_cast<byte*>(
3645 ut_align(io_buffer, srv_page_size));
3646
3647 void* crypt_io_buffer = NULL;
3648 if (iter.crypt_data) {
3649 crypt_io_buffer = ut_malloc_nokey(
3650 (2 + iter.n_io_buffers)
3651 << srv_page_size_shift);
3652 iter.crypt_io_buffer = static_cast<byte*>(
3653 ut_align(crypt_io_buffer, srv_page_size));
3654 }
3655
3656 if (block->page.zip.ssize) {
3657 ut_ad(iter.n_io_buffers == 1);
3658 block->frame = iter.io_buffer;
3659 block->page.zip.data = block->frame + srv_page_size;
3660 }
3661
3662 err = fil_iterate(iter, block, callback);
3663
3664 if (iter.crypt_data) {
3665 fil_space_destroy_crypt_data(&iter.crypt_data);
3666 }
3667
3668 ut_free(crypt_io_buffer);
3669 ut_free(io_buffer);
3670 }
3671
3672 if (err == DB_SUCCESS) {
3673 ib::info() << "Sync to disk";
3674
3675 if (!os_file_flush(file)) {
3676 ib::info() << "os_file_flush() failed!";
3677 err = DB_IO_ERROR;
3678 } else {
3679 ib::info() << "Sync to disk - done!";
3680 }
3681 }
3682
3683 os_file_close(file);
3684
3685 ut_free(page_ptr);
3686 ut_free(filepath);
3687 ut_free(block);
3688
3689 return(err);
3690}
3691
3692/*****************************************************************//**
3693Imports a tablespace. The space id in the .ibd file must match the space id
3694of the table in the data dictionary.
3695@return error code or DB_SUCCESS */
3696dberr_t
3697row_import_for_mysql(
3698/*=================*/
3699 dict_table_t* table, /*!< in/out: table */
3700 row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
3701{
3702 dberr_t err;
3703 trx_t* trx;
3704 ib_uint64_t autoinc = 0;
3705 char* filepath = NULL;
3706 ulint space_flags MY_ATTRIBUTE((unused));
3707
3708 /* The caller assured that this is not read_only_mode and that no
3709 temorary tablespace is being imported. */
3710 ut_ad(!srv_read_only_mode);
3711 ut_ad(!table->is_temporary());
3712
3713 ut_ad(table->space_id);
3714 ut_ad(table->space_id < SRV_LOG_SPACE_FIRST_ID);
3715 ut_ad(prebuilt->trx);
3716 ut_ad(!table->is_readable());
3717
3718 ibuf_delete_for_discarded_space(table->space_id);
3719
3720 trx_start_if_not_started(prebuilt->trx, true);
3721
3722 trx = trx_create();
3723
3724 /* So that the table is not DROPped during recovery. */
3725 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
3726
3727 trx_start_if_not_started(trx, true);
3728
3729 /* So that we can send error messages to the user. */
3730 trx->mysql_thd = prebuilt->trx->mysql_thd;
3731
3732 /* Ensure that the table will be dropped by trx_rollback_active()
3733 in case of a crash. */
3734
3735 trx->table_id = table->id;
3736
3737 /* Assign an undo segment for the transaction, so that the
3738 transaction will be recovered after a crash. */
3739
3740 /* TODO: Do not write any undo log for the IMPORT cleanup. */
3741 {
3742 mtr_t mtr;
3743 mtr.start();
3744 trx_undo_assign(trx, &err, &mtr);
3745 mtr.commit();
3746 }
3747
3748 DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
3749 err = DB_TOO_MANY_CONCURRENT_TRXS;);
3750
3751 if (err != DB_SUCCESS) {
3752
3753 return(row_import_cleanup(prebuilt, trx, err));
3754
3755 } else if (trx->rsegs.m_redo.undo == 0) {
3756
3757 err = DB_TOO_MANY_CONCURRENT_TRXS;
3758 return(row_import_cleanup(prebuilt, trx, err));
3759 }
3760
3761 prebuilt->trx->op_info = "read meta-data file";
3762
3763 /* Prevent DDL operations while we are checking. */
3764
3765 rw_lock_s_lock_func(dict_operation_lock, 0, __FILE__, __LINE__);
3766
3767 row_import cfg;
3768
3769 memset(&cfg, 0x0, sizeof(cfg));
3770
3771 err = row_import_read_cfg(table, trx->mysql_thd, cfg);
3772
3773 /* Check if the table column definitions match the contents
3774 of the config file. */
3775
3776 if (err == DB_SUCCESS) {
3777
3778 /* We have a schema file, try and match it with our
3779 data dictionary. */
3780
3781 err = cfg.match_schema(trx->mysql_thd);
3782
3783 /* Update index->page and SYS_INDEXES.PAGE_NO to match the
3784 B-tree root page numbers in the tablespace. Use the index
3785 name from the .cfg file to find match. */
3786
3787 if (err == DB_SUCCESS) {
3788 cfg.set_root_by_name();
3789 autoinc = cfg.m_autoinc;
3790 }
3791
3792 rw_lock_s_unlock_gen(dict_operation_lock, 0);
3793
3794 DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
3795 err = DB_TOO_MANY_CONCURRENT_TRXS;);
3796
3797 } else if (cfg.m_missing) {
3798
3799 rw_lock_s_unlock_gen(dict_operation_lock, 0);
3800
3801 /* We don't have a schema file, we will have to discover
3802 the index root pages from the .ibd file and skip the schema
3803 matching step. */
3804
3805 ut_a(err == DB_FAIL);
3806
3807 cfg.m_page_size.copy_from(univ_page_size);
3808
3809 FetchIndexRootPages fetchIndexRootPages(table, trx);
3810
3811 err = fil_tablespace_iterate(
3812 table, IO_BUFFER_SIZE(cfg.m_page_size.physical()),
3813 fetchIndexRootPages);
3814
3815 if (err == DB_SUCCESS) {
3816
3817 err = fetchIndexRootPages.build_row_import(&cfg);
3818
3819 /* Update index->page and SYS_INDEXES.PAGE_NO
3820 to match the B-tree root page numbers in the
3821 tablespace. */
3822
3823 if (err == DB_SUCCESS) {
3824 err = cfg.set_root_by_heuristic();
3825 }
3826 }
3827
3828 space_flags = fetchIndexRootPages.get_space_flags();
3829
3830 } else {
3831 rw_lock_s_unlock_gen(dict_operation_lock, 0);
3832 }
3833
3834 if (err != DB_SUCCESS) {
3835 return(row_import_error(prebuilt, trx, err));
3836 }
3837
3838 prebuilt->trx->op_info = "importing tablespace";
3839
3840 ib::info() << "Phase I - Update all pages";
3841
3842 /* Iterate over all the pages and do the sanity checking and
3843 the conversion required to import the tablespace. */
3844
3845 PageConverter converter(&cfg, table->space_id, trx);
3846
3847 /* Set the IO buffer size in pages. */
3848
3849 err = fil_tablespace_iterate(
3850 table, IO_BUFFER_SIZE(cfg.m_page_size.physical()), converter);
3851
3852 DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
3853 err = DB_TOO_MANY_CONCURRENT_TRXS;);
3854
3855 if (err != DB_SUCCESS) {
3856 char table_name[MAX_FULL_NAME_LEN + 1];
3857
3858 innobase_format_name(
3859 table_name, sizeof(table_name),
3860 table->name.m_name);
3861
3862 if (err != DB_DECRYPTION_FAILED) {
3863
3864 ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3865 ER_INTERNAL_ERROR,
3866 "Cannot reset LSNs in table %s : %s",
3867 table_name, ut_strerr(err));
3868 }
3869
3870 return(row_import_cleanup(prebuilt, trx, err));
3871 }
3872
3873 row_mysql_lock_data_dictionary(trx);
3874
3875 /* If the table is stored in a remote tablespace, we need to
3876 determine that filepath from the link file and system tables.
3877 Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
3878 dict_get_and_save_data_dir_path(table, true);
3879
3880 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3881 ut_a(table->data_dir_path);
3882
3883 filepath = fil_make_filepath(
3884 table->data_dir_path, table->name.m_name, IBD, true);
3885 } else {
3886 filepath = fil_make_filepath(
3887 NULL, table->name.m_name, IBD, false);
3888 }
3889
3890 DBUG_EXECUTE_IF(
3891 "ib_import_OOM_15",
3892 ut_free(filepath);
3893 filepath = NULL;
3894 );
3895
3896 if (filepath == NULL) {
3897 row_mysql_unlock_data_dictionary(trx);
3898 return(row_import_cleanup(prebuilt, trx, DB_OUT_OF_MEMORY));
3899 }
3900
3901 /* Open the tablespace so that we can access via the buffer pool.
3902 We set the 2nd param (fix_dict = true) here because we already
3903 have an x-lock on dict_operation_lock and dict_sys->mutex.
3904 The tablespace is initially opened as a temporary one, because
3905 we will not be writing any redo log for it before we have invoked
3906 fil_space_t::set_imported() to declare it a persistent tablespace. */
3907
3908 ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
3909
3910 table->space = fil_ibd_open(
3911 true, true, FIL_TYPE_IMPORT, table->space_id,
3912 fsp_flags, table->name, filepath, &err);
3913
3914 ut_ad((table->space == NULL) == (err != DB_SUCCESS));
3915 DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
3916 err = DB_TABLESPACE_NOT_FOUND; table->space = NULL;);
3917
3918 if (!table->space) {
3919 row_mysql_unlock_data_dictionary(trx);
3920
3921 ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3922 ER_GET_ERRMSG,
3923 err, ut_strerr(err), filepath);
3924
3925 ut_free(filepath);
3926
3927 return(row_import_cleanup(prebuilt, trx, err));
3928 }
3929
3930 row_mysql_unlock_data_dictionary(trx);
3931
3932 ut_free(filepath);
3933
3934 err = ibuf_check_bitmap_on_import(trx, table->space);
3935
3936 DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
3937
3938 if (err != DB_SUCCESS) {
3939 return(row_import_cleanup(prebuilt, trx, err));
3940 }
3941
3942 /* The first index must always be the clustered index. */
3943
3944 dict_index_t* index = dict_table_get_first_index(table);
3945
3946 if (!dict_index_is_clust(index)) {
3947 return(row_import_error(prebuilt, trx, DB_CORRUPTION));
3948 }
3949
3950 /* Update the Btree segment headers for index node and
3951 leaf nodes in the root page. Set the new space id. */
3952
3953 err = btr_root_adjust_on_import(index);
3954
3955 DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
3956 err = DB_CORRUPTION;);
3957
3958 if (err != DB_SUCCESS) {
3959 return(row_import_error(prebuilt, trx, err));
3960 } else if (cfg.requires_purge(index->name)) {
3961
3962 /* Purge any delete-marked records that couldn't be
3963 purged during the page conversion phase from the
3964 cluster index. */
3965
3966 IndexPurge purge(trx, index);
3967
3968 trx->op_info = "cluster: purging delete marked records";
3969
3970 err = purge.garbage_collect();
3971
3972 trx->op_info = "";
3973 }
3974
3975 DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
3976
3977 if (err != DB_SUCCESS) {
3978 return(row_import_error(prebuilt, trx, err));
3979 }
3980
3981 /* For secondary indexes, purge any records that couldn't be purged
3982 during the page conversion phase. */
3983
3984 err = row_import_adjust_root_pages_of_secondary_indexes(
3985 trx, table, cfg);
3986
3987 DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
3988 err = DB_CORRUPTION;);
3989
3990 if (err != DB_SUCCESS) {
3991 return(row_import_error(prebuilt, trx, err));
3992 }
3993
3994 /* Ensure that the next available DB_ROW_ID is not smaller than
3995 any DB_ROW_ID stored in the table. */
3996
3997 if (prebuilt->clust_index_was_generated) {
3998
3999 err = row_import_set_sys_max_row_id(prebuilt, table);
4000
4001 if (err != DB_SUCCESS) {
4002 return(row_import_error(prebuilt, trx, err));
4003 }
4004 }
4005
4006 ib::info() << "Phase III - Flush changes to disk";
4007
4008 /* Ensure that all pages dirtied during the IMPORT make it to disk.
4009 The only dirty pages generated should be from the pessimistic purge
4010 of delete marked records that couldn't be purged in Phase I. */
4011
4012 {
4013 FlushObserver observer(prebuilt->table->space, trx, NULL);
4014 buf_LRU_flush_or_remove_pages(prebuilt->table->space_id,
4015 &observer);
4016
4017 if (observer.is_interrupted()) {
4018 ib::info() << "Phase III - Flush interrupted";
4019 return(row_import_error(prebuilt, trx,
4020 DB_INTERRUPTED));
4021 }
4022 }
4023
4024 ib::info() << "Phase IV - Flush complete";
4025 prebuilt->table->space->set_imported();
4026
4027 /* The dictionary latches will be released in in row_import_cleanup()
4028 after the transaction commit, for both success and error. */
4029
4030 row_mysql_lock_data_dictionary(trx);
4031
4032 /* Update the root pages of the table's indexes. */
4033 err = row_import_update_index_root(trx, table, false, true);
4034
4035 if (err != DB_SUCCESS) {
4036 return(row_import_error(prebuilt, trx, err));
4037 }
4038
4039 err = row_import_update_discarded_flag(trx, table->id, false);
4040
4041 if (err != DB_SUCCESS) {
4042 return(row_import_error(prebuilt, trx, err));
4043 }
4044
4045 table->file_unreadable = false;
4046 table->flags2 &= ~DICT_TF2_DISCARDED;
4047
4048 /* Set autoinc value read from .cfg file, if one was specified.
4049 Otherwise, keep the PAGE_ROOT_AUTO_INC as is. */
4050 if (autoinc) {
4051 ib::info() << table->name << " autoinc value set to "
4052 << autoinc;
4053
4054 table->autoinc = autoinc--;
4055 btr_write_autoinc(dict_table_get_first_index(table), autoinc);
4056 }
4057
4058 return(row_import_cleanup(prebuilt, trx, err));
4059}
4060