row0import.cc source code [MariaDB/storage/innobase/row/row0import.cc]

1	/*****************************************************************************
2
3	Copyright (c) 2012, 2018, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2015, 2018, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file row/row0import.cc
22	Import a tablespace to a running instance.
23
24	Created 2012-02-08 by Sunny Bains.
25	*******************************************************/
26
27	#include "ha_prototypes.h"
28
29	#include "row0import.h"
30	#include "btr0pcur.h"
31	#include "que0que.h"
32	#include "dict0boot.h"
33	#include "ibuf0ibuf.h"
34	#include "pars0pars.h"
35	#include "row0sel.h"
36	#include "row0mysql.h"
37	#include "srv0start.h"
38	#include "row0quiesce.h"
39	#include "fil0pagecompress.h"
40	#include "trx0undo.h"
41	#include "ut0new.h"
42
43	#include <vector>
44
45	#ifdef HAVE_MY_AES_H
46	#include <my_aes.h>
47	#endif
48
49	/* The size of the buffer to use for IO.*
50	@param n physical page size
51	@return number of pages /*
52	#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
53
54	/* For gathering stats on records during phase I /
55	struct row_stats_t {
56	ulint m_n_deleted; /!< Number of deleted records*
57	found in the index /*
58
59	ulint m_n_purged; /!< Number of records purged*
60	optimisatically /*
61
62	ulint m_n_rows; /!< Number of rows /
63
64	ulint m_n_purge_failed; /!< Number of deleted rows*
65	that could not be purged /*
66	};
67
68	/* Index information required by IMPORT. /
69	struct row_index_t {
70	index_id_t m_id; /!< Index id of the table*
71	in the exporting server /*
72	byte* m_name; /!< Index name /
73
74	ulint m_space; /!< Space where it is placed /
75
76	ulint m_page_no; /!< Root page number /
77
78	ulint m_type; /!< Index type /
79
80	ulint m_trx_id_offset; /!< Relevant only for clustered*
81	indexes, offset of transaction
82	id system column /*
83
84	ulint m_n_user_defined_cols; /!< User defined columns /
85
86	ulint m_n_uniq; /!< Number of columns that can*
87	uniquely identify the row /*
88
89	ulint m_n_nullable; /!< Number of nullable*
90	columns /*
91
92	ulint m_n_fields; /!< Total number of fields /
93
94	dict_field_t* m_fields; /!< Index fields /
95
96	const dict_index_t*
97	m_srv_index; /!< Index instance in the*
98	importing server /*
99
100	row_stats_t m_stats; /!< Statistics gathered during*
101	the import phase /*
102
103	};
104
105	/* Meta data required by IMPORT. /
106	struct row_import {
107	row_import() UNIV_NOTHROW
108	:
109	m_table(),
110	m_version(),
111	m_hostname(),
112	m_table_name(),
113	m_autoinc(),
114	m_page_size (`0`, `0`, false),
115	m_flags(),
116	m_n_cols(),
117	m_cols(),
118	m_col_names(),
119	m_n_indexes(),
120	m_indexes(),
121	m_missing(true) {}
122
123	~row_import() UNIV_NOTHROW;
124
125	/* Find the index entry in in the indexes array.*
126	@param name index name
127	@return instance if found else 0. /*
128	row_index_t* get_index(const char* name) const UNIV_NOTHROW;
129
130	/* Get the number of rows in the index.*
131	@param name index name
132	@return number of rows (doesn't include delete marked rows). /*
133	ulint get_n_rows(const char* name) const UNIV_NOTHROW;
134
135	/* Find the ordinal value of the column name in the cfg table columns.*
136	@param name of column to look for.
137	@return ULINT_UNDEFINED if not found. /*
138	ulint find_col(const char* name) const UNIV_NOTHROW;
139
140	/* Get the number of rows for which purge failed during the*
141	convert phase.
142	@param name index name
143	@return number of rows for which purge failed. /*
144	ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
145
146	/* Check if the index is clean. ie. no delete-marked records*
147	@param name index name
148	@return true if index needs to be purged. /*
149	bool requires_purge(const char* name) const UNIV_NOTHROW
150	{
151	return(get_n_purge_failed(name) > `0`);
152	}
153
154	/* Set the index root <space, pageno> using the index name /
155	void set_root_by_name() UNIV_NOTHROW;
156
157	/* Set the index root <space, pageno> using a heuristic*
158	@return DB_SUCCESS or error code /*
159	dberr_t set_root_by_heuristic() UNIV_NOTHROW;
160
161	/* Check if the index schema that was read from the .cfg file*
162	matches the in memory index definition.
163	Note: It will update row_import_t::m_srv_index to map the meta-data
164	read from the .cfg file to the server index instance.
165	@return DB_SUCCESS or error code. /*
166	dberr_t match_index_columns(
167	THD* thd,
168	const dict_index_t* index) UNIV_NOTHROW;
169
170	/* Check if the table schema that was read from the .cfg file*
171	matches the in memory table definition.
172	@param thd MySQL session variable
173	@return DB_SUCCESS or error code. /*
174	dberr_t match_table_columns(
175	THD* thd) UNIV_NOTHROW;
176
177	/* Check if the table (and index) schema that was read from the*
178	.cfg file matches the in memory table definition.
179	@param thd MySQL session variable
180	@return DB_SUCCESS or error code. /*
181	dberr_t match_schema(
182	THD* thd) UNIV_NOTHROW;
183
184	dict_table_t* m_table; /!< Table instance /
185
186	ulint m_version; /!< Version of config file /
187
188	byte* m_hostname; /!< Hostname where the*
189	tablespace was exported /*
190	byte* m_table_name; /!< Exporting instance table*
191	name /*
192
193	ib_uint64_t m_autoinc; /!< Next autoinc value /
194
195	page_size_t m_page_size; /!< Tablespace page size /
196
197	ulint m_flags; /!< Table flags /
198
199	ulint m_n_cols; /!< Number of columns in the*
200	meta-data file /*
201
202	dict_col_t* m_cols; /!< Column data /
203
204	byte** m_col_names; /!< Column names, we store the*
205	column naems separately becuase
206	there is no field to store the
207	value in dict_col_t /*
208
209	ulint m_n_indexes; /!< Number of indexes,*
210	including clustered index /*
211
212	row_index_t* m_indexes; /!< Index meta data /
213
214	bool m_missing; /!< true if a .cfg file was*
215	found and was readable /*
216	};
217
218	/* Use the page cursor to iterate over records in a block. /
219	class RecIterator {
220	public:
221	/* Default constructor /
222	RecIterator() UNIV_NOTHROW
223	{
224	memset(&m_cur, `0x0`, sizeof(m_cur));
225	}
226
227	/* Position the cursor on the first user record. /
228	void open(buf_block_t* block) UNIV_NOTHROW
229	{
230	page_cur_set_before_first(block, &m_cur);
231
232	if (!end()) {
233	next();
234	}
235	}
236
237	/* Move to the next record. /
238	void next() UNIV_NOTHROW
239	{
240	page_cur_move_to_next(&m_cur);
241	}
242
243	/**
244	@return the current record /*
245	rec_t* current() UNIV_NOTHROW
246	{
247	ut_ad(!end());
248	return(page_cur_get_rec(&m_cur));
249	}
250
251	/**
252	@return true if cursor is at the end /*
253	bool end() UNIV_NOTHROW
254	{
255	return(page_cur_is_after_last(&m_cur) == TRUE);
256	}
257
258	/* Remove the current record*
259	@return true on success /*
260	bool remove(
261	const dict_index_t* index,
262	page_zip_des_t* page_zip,
263	ulint* offsets) UNIV_NOTHROW
264	{
265	/ We can't end up with an empty page unless it is root. /
266	if (page_get_n_recs(m_cur.block->frame) <= `1`) {
267	return(false);
268	}
269
270	return(page_delete_rec(index, &m_cur, page_zip, offsets));
271	}
272
273	private:
274	page_cur_t m_cur;
275	};
276
277	/* Class that purges delete marked reocords from indexes, both secondary*
278	and cluster. It does a pessimistic delete. This should only be done if we
279	couldn't purge the delete marked reocrds during Phase I. /*
280	class IndexPurge {
281	public:
282	/* Constructor*
283	@param trx the user transaction covering the import tablespace
284	@param index to be imported
285	@param space_id space id of the tablespace /*
286	IndexPurge(
287	trx_t* trx,
288	dict_index_t* index) UNIV_NOTHROW
289	:
290	m_trx(trx),
291	m_index(index),
292	m_n_rows(`0`)
293	{
294	ib::info () << "Phase II - Purge records from index "
295	<< index->name;
296	}
297
298	/* Descructor /
299	~IndexPurge() UNIV_NOTHROW { }
300
301	/* Purge delete marked records.*
302	@return DB_SUCCESS or error code. /*
303	dberr_t garbage_collect() UNIV_NOTHROW;
304
305	/* The number of records that are not delete marked.*
306	@return total records in the index after purge /*
307	ulint get_n_rows() const UNIV_NOTHROW
308	{
309	return(m_n_rows);
310	}
311
312	private:
313	/* Begin import, position the cursor on the first record. /
314	void open() UNIV_NOTHROW;
315
316	/* Close the persistent curosr and commit the mini-transaction. /
317	void close() UNIV_NOTHROW;
318
319	/* Position the cursor on the next record.*
320	@return DB_SUCCESS or error code /*
321	dberr_t next() UNIV_NOTHROW;
322
323	/* Store the persistent cursor position and reopen the*
324	B-tree cursor in BTR_MODIFY_TREE mode, because the
325	tree structure may be changed during a pessimistic delete. /*
326	void purge_pessimistic_delete() UNIV_NOTHROW;
327
328	/* Purge delete-marked records.*
329	@param offsets current row offsets. /*
330	void purge() UNIV_NOTHROW;
331
332	protected:
333	// Disable copying
334	IndexPurge();
335	IndexPurge(const IndexPurge&);
336	IndexPurge &operator=(const IndexPurge&);
337
338	private:
339	trx_t* m_trx; /!< User transaction /
340	mtr_t m_mtr; /!< Mini-transaction /
341	btr_pcur_t m_pcur; /!< Persistent cursor /
342	dict_index_t* m_index; /!< Index to be processed /
343	ulint m_n_rows; /!< Records in index /
344	};
345
346	/* Functor that is called for each physical page that is read from the*
347	tablespace file. /*
348	class AbstractCallback
349	{
350	public:
351	/* Constructor*
352	@param trx covering transaction /*
353	AbstractCallback(trx_t* trx, ulint space_id)
354	:
355	m_page_size (`0`, `0`, false),
356	m_trx(trx),
357	m_space(space_id),
358	m_xdes(),
359	m_xdes_page_no(ULINT_UNDEFINED),
360	m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
361
362	/* Free any extent descriptor instance /
363	virtual ~AbstractCallback()
364	{
365	UT_DELETE_ARRAY(m_xdes);
366	}
367
368	/* Determine the page size to use for traversing the tablespace*
369	@param file_size size of the tablespace file in bytes
370	@param block contents of the first page in the tablespace file.
371	@retval DB_SUCCESS or error code. /*
372	virtual dberr_t init(
373	os_offset_t file_size,
374	const buf_block_t* block) UNIV_NOTHROW;
375
376	/* @return true if compressed table. /
377	bool is_compressed_table() const UNIV_NOTHROW
378	{
379	return(get_page_size().is_compressed());
380	}
381
382	/* @return the tablespace flags /
383	ulint get_space_flags() const
384	{
385	return(m_space_flags);
386	}
387
388	/**
389	Set the name of the physical file and the file handle that is used
390	to open it for the file that is being iterated over.
391	@param filename the physical name of the tablespace file
392	@param file OS file handle /*
393	void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
394	{
395	m_file = file;
396	m_filepath = filename;
397	}
398
399	const page_size_t& get_page_size() const { return m_page_size; }
400
401	const char* filename() const { return m_filepath; }
402
403	/**
404	Called for every page in the tablespace. If the page was not
405	updated then its state must be set to BUF_PAGE_NOT_USED. For
406	compressed tables the page descriptor memory will be at offset:
407	block->frame + srv_page_size;
408	@param offset - physical offset within the file
409	@param block - block read from file, note it is not from the buffer pool
410	@retval DB_SUCCESS or error code. /*
411	virtual dberr_t operator()(
412	os_offset_t offset,
413	buf_block_t* block) UNIV_NOTHROW = `0`;
414
415	/* @return the tablespace identifier /
416	ulint get_space_id() const { return m_space; }
417
418	bool is_interrupted() const { return trx_is_interrupted(m_trx); }
419
420	/**
421	Get the data page depending on the table type, compressed or not.
422	@param block - block read from disk
423	@retval the buffer frame /*
424	static byte* get_frame(const buf_block_t* block)
425	{
426	return block->page.zip.data
427	? block->page.zip.data : block->frame;
428	}
429
430	protected:
431	/* Get the physical offset of the extent descriptor within the page.*
432	@param page_no page number of the extent descriptor
433	@param page contents of the page containing the extent descriptor.
434	@return the start of the xdes array in a page /*
435	const xdes_t* xdes(
436	ulint page_no,
437	const page_t* page) const UNIV_NOTHROW
438	{
439	ulint offset;
440
441	offset = xdes_calc_descriptor_index(get_page_size(), page_no);
442
443	return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
444	}
445
446	/* Set the current page directory (xdes). If the extent descriptor is*
447	marked as free then free the current extent descriptor and set it to
448	0. This implies that all pages that are covered by this extent
449	descriptor are also freed.
450
451	@param page_no offset of page within the file
452	@param page page contents
453	@return DB_SUCCESS or error code. /*
454	dberr_t set_current_xdes(
455	ulint page_no,
456	const page_t* page) UNIV_NOTHROW
457	{
458	m_xdes_page_no = page_no;
459
460	UT_DELETE_ARRAY(m_xdes);
461	m_xdes = NULL;
462
463	ulint state;
464	const xdes_t* xdesc = page + XDES_ARR_OFFSET;
465
466	state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES);
467
468	if (state != XDES_FREE) {
469
470	m_xdes = UT_NEW_ARRAY_NOKEY(xdes_t,
471	m_page_size.physical());
472
473	/ Trigger OOM /
474	DBUG_EXECUTE_IF(
475	"ib_import_OOM_13",
476	UT_DELETE_ARRAY(m_xdes);
477	m_xdes = NULL;
478	);
479
480	if (m_xdes == NULL) {
481	return(DB_OUT_OF_MEMORY);
482	}
483
484	memcpy(m_xdes, page, m_page_size.physical());
485	}
486
487	return(DB_SUCCESS);
488	}
489
490	/* Check if the page is marked as free in the extent descriptor.*
491	@param page_no page number to check in the extent descriptor.
492	@return true if the page is marked as free /*
493	bool is_free(ulint page_no) const UNIV_NOTHROW
494	{
495	ut_a(xdes_calc_descriptor_page(get_page_size(), page_no)
496	== m_xdes_page_no);
497
498	if (m_xdes != `0`) {
499	const xdes_t* xdesc = xdes(page_no, m_xdes);
500	ulint pos = page_no % FSP_EXTENT_SIZE;
501
502	return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos));
503	}
504
505	/ If the current xdes was free, the page must be free. /
506	return(true);
507	}
508
509	protected:
510	/* The tablespace page size. /
511	page_size_t m_page_size;
512
513	/* File handle to the tablespace /
514	pfs_os_file_t m_file;
515
516	/* Physical file path. /
517	const char* m_filepath;
518
519	/* Covering transaction. /
520	trx_t* m_trx;
521
522	/* Space id of the file being iterated over. /
523	ulint m_space;
524
525	/* Minimum page number for which the free list has not been*
526	initialized: the pages >= this limit are, by definition, free;
527	note that in a single-table tablespace where size < 64 pages,
528	this number is 64, i.e., we have initialized the space about
529	the first extent, but have not physically allocted those pages
530	to the file. @see FSP_LIMIT. /*
531	ulint m_free_limit;
532
533	/* Current size of the space in pages /
534	ulint m_size;
535
536	/* Current extent descriptor page /
537	xdes_t* m_xdes;
538
539	/* Physical page offset in the file of the extent descriptor /
540	ulint m_xdes_page_no;
541
542	/* Flags value read from the header page /
543	ulint m_space_flags;
544	};
545
546	/* Determine the page size to use for traversing the tablespace*
547	@param file_size size of the tablespace file in bytes
548	@param block contents of the first page in the tablespace file.
549	@retval DB_SUCCESS or error code. /*
550	dberr_t
551	AbstractCallback::init(
552	os_offset_t file_size,
553	const buf_block_t* block) UNIV_NOTHROW
554	{
555	const page_t* page = block->frame;
556
557	m_space_flags = fsp_header_get_flags(page);
558	if (!fsp_flags_is_valid(m_space_flags, true)) {
559	ulint cflags = fsp_flags_convert_from_101(m_space_flags);
560	if (cflags == ULINT_UNDEFINED) {
561	ib::error () << "Invalid FSP_SPACE_FLAGS="
562	<< ib::hex (m_space_flags);
563	return(DB_CORRUPTION);
564	}
565	m_space_flags = cflags;
566	}
567
568	/ Clear the DATA_DIR flag, which is basically garbage. /
569	m_space_flags &= ~(`1U` << FSP_FLAGS_POS_RESERVED);
570	m_page_size.copy_from(page_size_t (m_space_flags));
571
572	if (!is_compressed_table() && !m_page_size.equals_to(univ_page_size)) {
573
574	ib::error () << "Page size " << m_page_size.physical()
575	<< " of ibd file is not the same as the server page"
576	" size " << srv_page_size;
577
578	return(DB_CORRUPTION);
579
580	} else if (file_size % m_page_size.physical() != `0`) {
581
582	ib::error () << "File size " << file_size << " is not a"
583	" multiple of the page size "
584	<< m_page_size.physical();
585
586	return(DB_CORRUPTION);
587	}
588
589	m_size = mach_read_from_4(page + FSP_SIZE);
590	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
591	if (m_space == ULINT_UNDEFINED) {
592	m_space = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID
593	+ page);
594	}
595
596	return set_current_xdes(`0`, page);
597	}
598
599	/**
600	Try and determine the index root pages by checking if the next/prev
601	pointers are both FIL_NULL. We need to ensure that skip deleted pages. /*
602	struct FetchIndexRootPages : public AbstractCallback {
603
604	/* Index information gathered from the .ibd file. /
605	struct Index {
606
607	Index(index_id_t id, ulint page_no)
608	:
609	m_id(id),
610	m_page_no(page_no) { }
611
612	index_id_t m_id; /!< Index id /
613	ulint m_page_no; /!< Root page number /
614	};
615
616	typedef std::vector<Index, ut_allocator<Index> > Indexes;
617
618	/* Constructor*
619	@param trx covering (user) transaction
620	@param table table definition in server ./*
621	FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
622	:
623	AbstractCallback (trx, ULINT_UNDEFINED),
624	m_table(table) UNIV_NOTHROW { }
625
626	/* Destructor /
627	virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
628
629	/* Called for each block as it is read from the file.*
630	@param offset physical offset in the file
631	@param block block to convert, it is not from the buffer pool.
632	@retval DB_SUCCESS or error code. /*
633	virtual dberr_t operator() (
634	os_offset_t offset,
635	buf_block_t* block) UNIV_NOTHROW;
636
637	/* Update the import configuration that will be used to import*
638	the tablespace. /*
639	dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
640
641	/* Table definition in server. /
642	const dict_table_t* m_table;
643
644	/* Index information /
645	Indexes m_indexes;
646	};
647
648	/* Called for each block as it is read from the file. Check index pages to*
649	determine the exact row format. We can't get that from the tablespace
650	header flags alone.
651
652	@param offset physical offset in the file
653	@param block block to convert, it is not from the buffer pool.
654	@retval DB_SUCCESS or error code. /*
655	dberr_t
656	FetchIndexRootPages::operator() (
657	os_offset_t offset,
658	buf_block_t* block) UNIV_NOTHROW
659	{
660	if (is_interrupted()) return DB_INTERRUPTED;
661
662	const page_t* page = get_frame(block);
663
664	ulint page_type = fil_page_get_type(page);
665
666	if (block->page.id.page_no() * m_page_size.physical() != offset) {
667
668	ib::error () << "Page offset doesn't match file offset:"
669	" page offset: " << block->page.id.page_no()
670	<< ", file offset: "
671	<< (offset / m_page_size.physical());
672
673	return DB_CORRUPTION;
674	} else if (page_type == FIL_PAGE_TYPE_XDES) {
675	return set_current_xdes(block->page.id.page_no(), page);
676	} else if (fil_page_index_page_check(page)
677	&& !is_free(block->page.id.page_no())
678	&& page_is_root(page)) {
679
680	index_id_t id = btr_page_get_index_id(page);
681
682	m_indexes.push_back(Index (id, block->page.id.page_no()));
683
684	if (m_indexes.size() == `1`) {
685	/ Check that the tablespace flags match the table flags. /
686	ulint expected = dict_tf_to_fsp_flags(m_table->flags);
687	if (!fsp_flags_match(expected, m_space_flags)) {
688	ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
689	ER_TABLE_SCHEMA_MISMATCH,
690	"Expected FSP_SPACE_FLAGS=0x%x, .ibd "
691	"file contains 0x%x.",
692	unsigned(expected),
693	unsigned(m_space_flags));
694	return(DB_CORRUPTION);
695	}
696	}
697	}
698
699	return DB_SUCCESS;
700	}
701
702	/**
703	Update the import configuration that will be used to import the tablespace.
704	@return error code or DB_SUCCESS /*
705	dberr_t
706	FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
707	{
708	Indexes::const_iterator end = m_indexes.end();
709
710	ut_a(cfg->m_table == m_table);
711	cfg->m_page_size.copy_from(m_page_size);
712	cfg->m_n_indexes = m_indexes.size();
713
714	if (cfg->m_n_indexes == `0`) {
715
716	ib::error () << "No B+Tree found in tablespace";
717
718	return(DB_CORRUPTION);
719	}
720
721	cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
722
723	/ Trigger OOM /
724	DBUG_EXECUTE_IF(
725	"ib_import_OOM_11",
726	UT_DELETE_ARRAY(cfg->m_indexes);
727	cfg->m_indexes = NULL;
728	);
729
730	if (cfg->m_indexes == NULL) {
731	return(DB_OUT_OF_MEMORY);
732	}
733
734	memset(cfg->m_indexes, `0x0`, sizeof(cfg->m_indexes) cfg->m_n_indexes);
735
736	row_index_t* cfg_index = cfg->m_indexes;
737
738	for (Indexes::const_iterator it = m_indexes.begin();
739	it != end;
740	++it, ++cfg_index) {
741
742	char name[BUFSIZ];
743
744	snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
745
746	ulint len = strlen(name) + `1`;
747
748	cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
749
750	/ Trigger OOM /
751	DBUG_EXECUTE_IF(
752	"ib_import_OOM_12",
753	UT_DELETE_ARRAY(cfg_index->m_name);
754	cfg_index->m_name = NULL;
755	);
756
757	if (cfg_index->m_name == NULL) {
758	return(DB_OUT_OF_MEMORY);
759	}
760
761	memcpy(cfg_index->m_name, name, len);
762
763	cfg_index->m_id = it ->m_id;
764
765	cfg_index->m_space = m_space;
766
767	cfg_index->m_page_no = it ->m_page_no;
768	}
769
770	return(DB_SUCCESS);
771	}
772
773	/ Functor that is called for each physical page that is read from the*
774	tablespace file.
775
776	1. Check each page for corruption.
777
778	2. Update the space id and LSN on every page
779	* For the header page
780	- Validate the flags
781	- Update the LSN
782
783	3. On Btree pages
784	* Set the index id
785	* Update the max trx id
786	* In a cluster index, update the system columns
787	* In a cluster index, update the BLOB ptr, set the space id
788	* Purge delete marked records, but only if they can be easily
789	removed from the page
790	* Keep a counter of number of rows, ie. non-delete-marked rows
791	* Keep a counter of number of delete marked rows
792	* Keep a counter of number of purge failure
793	* If a page is stamped with an index id that isn't in the .cfg file
794	we assume it is deleted and the page can be ignored.
795
796	4. Set the page state to dirty so that it will be written to disk.
797	*/
798	class PageConverter : public AbstractCallback {
799	public:
800	/* Constructor*
801	@param cfg config of table being imported.
802	@param space_id tablespace identifier
803	@param trx transaction covering the import /*
804	PageConverter(row_import* cfg, ulint space_id, trx_t* trx)
805	:
806	AbstractCallback (trx, space_id),
807	m_cfg(cfg),
808	m_index(cfg->m_indexes),
809	m_current_lsn(log_get_lsn()),
810	m_page_zip_ptr(`0`),
811	m_rec_iter (),
812	m_offsets_(), m_offsets(m_offsets_),
813	m_heap(`0`),
814	m_cluster_index(dict_table_get_first_index(cfg->m_table))
815	{
816	ut_ad(m_current_lsn);
817	rec_offs_init(m_offsets_);
818	}
819
820	virtual ~PageConverter() UNIV_NOTHROW
821	{
822	if (m_heap != `0`) {
823	mem_heap_free(m_heap);
824	}
825	}
826
827	/* Called for each block as it is read from the file.*
828	@param offset physical offset in the file
829	@param block block to convert, it is not from the buffer pool.
830	@retval DB_SUCCESS or error code. /*
831	virtual dberr_t operator() (
832	os_offset_t offset,
833	buf_block_t* block) UNIV_NOTHROW;
834	private:
835	/* Update the page, set the space id, max trx id and index id.*
836	@param block block read from file
837	@param page_type type of the page
838	@retval DB_SUCCESS or error code /*
839	dberr_t update_page(
840	buf_block_t* block,
841	ulint& page_type) UNIV_NOTHROW;
842
843	/* Update the space, index id, trx id.*
844	@param block block to convert
845	@return DB_SUCCESS or error code /*
846	dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
847
848	/* Update the BLOB refrences and write UNDO log entries for*
849	rows that can't be purged optimistically.
850	@param block block to update
851	@retval DB_SUCCESS or error code /*
852	dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
853
854	/* Validate the space flags and update tablespace header page.*
855	@param block block read from file, not from the buffer pool.
856	@retval DB_SUCCESS or error code /*
857	dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
858
859	/* Adjust the BLOB reference for a single column that is externally stored*
860	@param rec record to update
861	@param offsets column offsets for the record
862	@param i column ordinal value
863	@return DB_SUCCESS or error code /*
864	dberr_t adjust_cluster_index_blob_column(
865	rec_t* rec,
866	const ulint* offsets,
867	ulint i) UNIV_NOTHROW;
868
869	/* Adjusts the BLOB reference in the clustered index row for all*
870	externally stored columns.
871	@param rec record to update
872	@param offsets column offsets for the record
873	@return DB_SUCCESS or error code /*
874	dberr_t adjust_cluster_index_blob_columns(
875	rec_t* rec,
876	const ulint* offsets) UNIV_NOTHROW;
877
878	/* In the clustered index, adjist the BLOB pointers as needed.*
879	Also update the BLOB reference, write the new space id.
880	@param rec record to update
881	@param offsets column offsets for the record
882	@return DB_SUCCESS or error code /*
883	dberr_t adjust_cluster_index_blob_ref(
884	rec_t* rec,
885	const ulint* offsets) UNIV_NOTHROW;
886
887	/* Purge delete-marked records, only if it is possible to do*
888	so without re-organising the B+tree.
889	@retval true if purged /*
890	bool purge() UNIV_NOTHROW;
891
892	/* Adjust the BLOB references and sys fields for the current record.*
893	@param rec record to update
894	@param offsets column offsets for the record
895	@return DB_SUCCESS or error code. /*
896	dberr_t adjust_cluster_record(
897	rec_t* rec,
898	const ulint* offsets) UNIV_NOTHROW;
899
900	/* Find an index with the matching id.*
901	@return row_index_t instance or 0 /
902	row_index_t* find_index(index_id_t id) UNIV_NOTHROW
903	{
904	row_index_t* index = &m_cfg->m_indexes[`0`];
905
906	for (ulint i = `0`; i < m_cfg->m_n_indexes; ++i, ++index) {
907	if (id == index->m_id) {
908	return(index);
909	}
910	}
911
912	return(`0`);
913
914	}
915	private:
916	/* Config for table that is being imported. /
917	row_import* m_cfg;
918
919	/* Current index whose pages are being imported /
920	row_index_t* m_index;
921
922	/* Current system LSN /
923	lsn_t m_current_lsn;
924
925	/* Alias for m_page_zip, only set for compressed pages. /
926	page_zip_des_t* m_page_zip_ptr;
927
928	/* Iterator over records in a block /
929	RecIterator m_rec_iter;
930
931	/* Record offset /
932	ulint m_offsets_[REC_OFFS_NORMAL_SIZE];
933
934	/* Pointer to m_offsets_ /
935	ulint* m_offsets;
936
937	/* Memory heap for the record offsets /
938	mem_heap_t* m_heap;
939
940	/* Cluster index instance /
941	dict_index_t* m_cluster_index;
942	};
943
944	/**
945	row_import destructor. /*
946	row_import::~row_import() UNIV_NOTHROW
947	{
948	for (ulint i = `0`; m_indexes != `0` && i < m_n_indexes; ++i) {
949	UT_DELETE_ARRAY(m_indexes[i].m_name);
950
951	if (m_indexes[i].m_fields == NULL) {
952	continue;
953	}
954
955	dict_field_t* fields = m_indexes[i].m_fields;
956	ulint n_fields = m_indexes[i].m_n_fields;
957
958	for (ulint j = `0`; j < n_fields; ++j) {
959	UT_DELETE_ARRAY(const_cast<char*>(fields[j].name()));
960	}
961
962	UT_DELETE_ARRAY(fields);
963	}
964
965	for (ulint i = `0`; m_col_names != `0` && i < m_n_cols; ++i) {
966	UT_DELETE_ARRAY(m_col_names[i]);
967	}
968
969	UT_DELETE_ARRAY(m_cols);
970	UT_DELETE_ARRAY(m_indexes);
971	UT_DELETE_ARRAY(m_col_names);
972	UT_DELETE_ARRAY(m_table_name);
973	UT_DELETE_ARRAY(m_hostname);
974	}
975
976	/* Find the index entry in in the indexes array.*
977	@param name index name
978	@return instance if found else 0. /*
979	row_index_t*
980	row_import::get_index(
981	const char* name) const UNIV_NOTHROW
982	{
983	for (ulint i = `0`; i < m_n_indexes; ++i) {
984	const char* index_name;
985	row_index_t* index = &m_indexes[i];
986
987	index_name = reinterpret_cast<const char*>(index->m_name);
988
989	if (strcmp(index_name, name) == `0`) {
990
991	return(index);
992	}
993	}
994
995	return(`0`);
996	}
997
998	/* Get the number of rows in the index.*
999	@param name index name
1000	@return number of rows (doesn't include delete marked rows). /*
1001	ulint
1002	row_import::get_n_rows(
1003	const char* name) const UNIV_NOTHROW
1004	{
1005	const row_index_t* index = get_index(name);
1006
1007	ut_a(name != `0`);
1008
1009	return(index->m_stats.m_n_rows);
1010	}
1011
1012	/* Get the number of rows for which purge failed uding the convert phase.*
1013	@param name index name
1014	@return number of rows for which purge failed. /*
1015	ulint
1016	row_import::get_n_purge_failed(
1017	const char* name) const UNIV_NOTHROW
1018	{
1019	const row_index_t* index = get_index(name);
1020
1021	ut_a(name != `0`);
1022
1023	return(index->m_stats.m_n_purge_failed);
1024	}
1025
1026	/* Find the ordinal value of the column name in the cfg table columns.*
1027	@param name of column to look for.
1028	@return ULINT_UNDEFINED if not found. /*
1029	ulint
1030	row_import::find_col(
1031	const char* name) const UNIV_NOTHROW
1032	{
1033	for (ulint i = `0`; i < m_n_cols; ++i) {
1034	const char* col_name;
1035
1036	col_name = reinterpret_cast<const char*>(m_col_names[i]);
1037
1038	if (strcmp(col_name, name) == `0`) {
1039	return(i);
1040	}
1041	}
1042
1043	return(ULINT_UNDEFINED);
1044	}
1045
1046	/**
1047	Check if the index schema that was read from the .cfg file matches the
1048	in memory index definition.
1049	@return DB_SUCCESS or error code. /*
1050	dberr_t
1051	row_import::match_index_columns(
1052	THD* thd,
1053	const dict_index_t* index) UNIV_NOTHROW
1054	{
1055	row_index_t* cfg_index;
1056	dberr_t err = DB_SUCCESS;
1057
1058	cfg_index = get_index(index->name);
1059
1060	if (cfg_index == `0`) {
1061	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1062	ER_TABLE_SCHEMA_MISMATCH,
1063	"Index %s not found in tablespace meta-data file.",
1064	index->name ());
1065
1066	return(DB_ERROR);
1067	}
1068
1069	if (cfg_index->m_n_fields != index->n_fields) {
1070
1071	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1072	ER_TABLE_SCHEMA_MISMATCH,
1073	"Index field count %u doesn't match"
1074	" tablespace metadata file value " ULINTPF,
1075	index->n_fields, cfg_index->m_n_fields);
1076
1077	return(DB_ERROR);
1078	}
1079
1080	cfg_index->m_srv_index = index;
1081
1082	const dict_field_t* field = index->fields;
1083	const dict_field_t* cfg_field = cfg_index->m_fields;
1084
1085	for (ulint i = `0`; i < index->n_fields; ++i, ++field, ++cfg_field) {
1086
1087	if (strcmp(field->name (), cfg_field->name ()) != `0`) {
1088	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1089	ER_TABLE_SCHEMA_MISMATCH,
1090	"Index field name %s doesn't match"
1091	" tablespace metadata field name %s"
1092	" for field position " ULINTPF,
1093	field->name (), cfg_field->name (), i);
1094
1095	err = DB_ERROR;
1096	}
1097
1098	if (cfg_field->prefix_len != field->prefix_len) {
1099	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1100	ER_TABLE_SCHEMA_MISMATCH,
1101	"Index %s field %s prefix len %u"
1102	" doesn't match metadata file value %u",
1103	index->name (), field->name (),
1104	field->prefix_len, cfg_field->prefix_len);
1105
1106	err = DB_ERROR;
1107	}
1108
1109	if (cfg_field->fixed_len != field->fixed_len) {
1110	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1111	ER_TABLE_SCHEMA_MISMATCH,
1112	"Index %s field %s fixed len %u"
1113	" doesn't match metadata file value %u",
1114	index->name (), field->name (),
1115	field->fixed_len,
1116	cfg_field->fixed_len);
1117
1118	err = DB_ERROR;
1119	}
1120	}
1121
1122	return(err);
1123	}
1124
1125	/* Check if the table schema that was read from the .cfg file matches the*
1126	in memory table definition.
1127	@param thd MySQL session variable
1128	@return DB_SUCCESS or error code. /*
1129	dberr_t
1130	row_import::match_table_columns(
1131	THD* thd) UNIV_NOTHROW
1132	{
1133	dberr_t err = DB_SUCCESS;
1134	const dict_col_t* col = m_table->cols;
1135
1136	for (ulint i = `0`; i < m_table->n_cols; ++i, ++col) {
1137
1138	const char* col_name;
1139	ulint cfg_col_index;
1140
1141	col_name = dict_table_get_col_name(
1142	m_table, dict_col_get_no(col));
1143
1144	cfg_col_index = find_col(col_name);
1145
1146	if (cfg_col_index == ULINT_UNDEFINED) {
1147
1148	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1149	ER_TABLE_SCHEMA_MISMATCH,
1150	"Column %s not found in tablespace.",
1151	col_name);
1152
1153	err = DB_ERROR;
1154	} else if (cfg_col_index != col->ind) {
1155
1156	ib_errf(thd, IB_LOG_LEVEL_ERROR,
1157	ER_TABLE_SCHEMA_MISMATCH,
1158	"Column %s ordinal value mismatch, it's at %u"
1159	" in the table and " ULINTPF
1160	" in the tablespace meta-data file",
1161	col_name, col->ind, cfg_col_index);
1162
1163	err = DB_ERROR;
1164	} else {
1165	const dict_col_t* cfg_col;
1166
1167	cfg_col = &m_cols[cfg_col_index];
1168	ut_a(cfg_col->ind == cfg_col_index);
1169
1170	if (cfg_col->prtype != col->prtype) {
1171	ib_errf(thd,
1172	IB_LOG_LEVEL_ERROR,
1173	ER_TABLE_SCHEMA_MISMATCH,
1174	"Column %s precise type mismatch.",
1175	col_name);
1176	err = DB_ERROR;
1177	}
1178
1179	if (cfg_col->mtype != col->mtype) {
1180	ib_errf(thd,
1181	IB_LOG_LEVEL_ERROR,
1182	ER_TABLE_SCHEMA_MISMATCH,
1183	"Column %s main type mismatch.",
1184	col_name);
1185	err = DB_ERROR;
1186	}
1187
1188	if (cfg_col->len != col->len) {
1189	ib_errf(thd,
1190	IB_LOG_LEVEL_ERROR,
1191	ER_TABLE_SCHEMA_MISMATCH,
1192	"Column %s length mismatch.",
1193	col_name);
1194	err = DB_ERROR;
1195	}
1196
1197	if (cfg_col->mbminlen != col->mbminlen
1198	\|\| cfg_col->mbmaxlen != col->mbmaxlen) {
1199	ib_errf(thd,
1200	IB_LOG_LEVEL_ERROR,
1201	ER_TABLE_SCHEMA_MISMATCH,
1202	"Column %s multi-byte len mismatch.",
1203	col_name);
1204	err = DB_ERROR;
1205	}
1206
1207	if (cfg_col->ind != col->ind) {
1208	err = DB_ERROR;
1209	}
1210
1211	if (cfg_col->ord_part != col->ord_part) {
1212	ib_errf(thd,
1213	IB_LOG_LEVEL_ERROR,
1214	ER_TABLE_SCHEMA_MISMATCH,
1215	"Column %s ordering mismatch.",
1216	col_name);
1217	err = DB_ERROR;
1218	}
1219
1220	if (cfg_col->max_prefix != col->max_prefix) {
1221	ib_errf(thd,
1222	IB_LOG_LEVEL_ERROR,
1223	ER_TABLE_SCHEMA_MISMATCH,
1224	"Column %s max prefix mismatch.",
1225	col_name);
1226	err = DB_ERROR;
1227	}
1228	}
1229	}
1230
1231	return(err);
1232	}
1233
1234	/* Check if the table (and index) schema that was read from the .cfg file*
1235	matches the in memory table definition.
1236	@param thd MySQL session variable
1237	@return DB_SUCCESS or error code. /*
1238	dberr_t
1239	row_import::match_schema(
1240	THD* thd) UNIV_NOTHROW
1241	{
1242	/ Do some simple checks. /
1243
1244	if ((m_table->flags ^ m_flags) & ~DICT_TF_MASK_DATA_DIR) {
1245	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1246	"Table flags don't match, server table has 0x%x"
1247	" and the meta-data file has 0x" ULINTPFx,
1248	m_table->flags, m_flags);
1249
1250	return(DB_ERROR);
1251	} else if (m_table->n_cols != m_n_cols) {
1252	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1253	"Number of columns don't match, table has %u"
1254	" columns but the tablespace meta-data file has "
1255	ULINTPF " columns",
1256	m_table->n_cols, m_n_cols);
1257
1258	return(DB_ERROR);
1259	} else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
1260
1261	/ If the number of indexes don't match then it is better*
1262	to abort the IMPORT. It is easy for the user to create a
1263	table matching the IMPORT definition. /*
1264
1265	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
1266	"Number of indexes don't match, table has " ULINTPF
1267	" indexes but the tablespace meta-data file has "
1268	ULINTPF " indexes",
1269	UT_LIST_GET_LEN(m_table->indexes), m_n_indexes);
1270
1271	return(DB_ERROR);
1272	}
1273
1274	dberr_t err = match_table_columns(thd);
1275
1276	if (err != DB_SUCCESS) {
1277	return(err);
1278	}
1279
1280	/ Check if the index definitions match. /
1281
1282	const dict_index_t* index;
1283
1284	for (index = UT_LIST_GET_FIRST(m_table->indexes);
1285	index != `0`;
1286	index = UT_LIST_GET_NEXT(indexes, index)) {
1287
1288	dberr_t index_err;
1289
1290	index_err = match_index_columns(thd, index);
1291
1292	if (index_err != DB_SUCCESS) {
1293	err = index_err;
1294	}
1295	}
1296
1297	return(err);
1298	}
1299
1300	/**
1301	Set the index root <space, pageno>, using index name. /*
1302	void
1303	row_import::set_root_by_name() UNIV_NOTHROW
1304	{
1305	row_index_t* cfg_index = m_indexes;
1306
1307	for (ulint i = `0`; i < m_n_indexes; ++i, ++cfg_index) {
1308	dict_index_t* index;
1309
1310	const char* index_name;
1311
1312	index_name = reinterpret_cast<const char*>(cfg_index->m_name);
1313
1314	index = dict_table_get_index_on_name(m_table, index_name);
1315
1316	/ We've already checked that it exists. /
1317	ut_a(index != `0`);
1318
1319	index->page = cfg_index->m_page_no;
1320	}
1321	}
1322
1323	/**
1324	Set the index root <space, pageno>, using a heuristic.
1325	@return DB_SUCCESS or error code /*
1326	dberr_t
1327	row_import::set_root_by_heuristic() UNIV_NOTHROW
1328	{
1329	row_index_t* cfg_index = m_indexes;
1330
1331	ut_a(m_n_indexes > `0`);
1332
1333	// TODO: For now use brute force, based on ordinality
1334
1335	if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
1336
1337	ib::warn () << "Table " << m_table->name << " should have "
1338	<< UT_LIST_GET_LEN(m_table->indexes) << " indexes but"
1339	" the tablespace has " << m_n_indexes << " indexes";
1340	}
1341
1342	dict_mutex_enter_for_mysql();
1343
1344	ulint i = `0`;
1345	dberr_t err = DB_SUCCESS;
1346
1347	for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
1348	index != `0`;
1349	index = UT_LIST_GET_NEXT(indexes, index)) {
1350
1351	if (index->type & DICT_FTS) {
1352	index->type \|= DICT_CORRUPT;
1353	ib::warn () << "Skipping FTS index: " << index->name;
1354	} else if (i < m_n_indexes) {
1355
1356	UT_DELETE_ARRAY(cfg_index[i].m_name);
1357
1358	ulint len = strlen(index->name) + `1`;
1359
1360	cfg_index[i].m_name = UT_NEW_ARRAY_NOKEY(byte, len);
1361
1362	/ Trigger OOM /
1363	DBUG_EXECUTE_IF(
1364	"ib_import_OOM_14",
1365	UT_DELETE_ARRAY(cfg_index[i].m_name);
1366	cfg_index[i].m_name = NULL;
1367	);
1368
1369	if (cfg_index[i].m_name == NULL) {
1370	err = DB_OUT_OF_MEMORY;
1371	break;
1372	}
1373
1374	memcpy(cfg_index[i].m_name, index->name, len);
1375
1376	cfg_index[i].m_srv_index = index;
1377
1378	index->page = cfg_index[i].m_page_no;
1379
1380	++i;
1381	}
1382	}
1383
1384	dict_mutex_exit_for_mysql();
1385
1386	return(err);
1387	}
1388
1389	/**
1390	Purge delete marked records.
1391	@return DB_SUCCESS or error code. /*
1392	dberr_t
1393	IndexPurge::garbage_collect() UNIV_NOTHROW
1394	{
1395	dberr_t err;
1396	ibool comp = dict_table_is_comp(m_index->table);
1397
1398	/ Open the persistent cursor and start the mini-transaction. /
1399
1400	open();
1401
1402	while ((err = next()) == DB_SUCCESS) {
1403
1404	rec_t* rec = btr_pcur_get_rec(&m_pcur);
1405	ibool deleted = rec_get_deleted_flag(rec, comp);
1406
1407	if (!deleted) {
1408	++m_n_rows;
1409	} else {
1410	purge();
1411	}
1412	}
1413
1414	/ Close the persistent cursor and commit the mini-transaction. /
1415
1416	close();
1417
1418	return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
1419	}
1420
1421	/**
1422	Begin import, position the cursor on the first record. /*
1423	void
1424	IndexPurge::open() UNIV_NOTHROW
1425	{
1426	mtr_start(&m_mtr);
1427
1428	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1429
1430	btr_pcur_open_at_index_side(
1431	true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, `0`, &m_mtr);
1432	btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
1433	if (rec_is_default_row(btr_pcur_get_rec(&m_pcur), m_index)) {
1434	ut_ad(btr_pcur_is_on_user_rec(&m_pcur));
1435	/ Skip the 'default row' pseudo-record. /
1436	} else {
1437	btr_pcur_move_to_prev_on_page(&m_pcur);
1438	}
1439	}
1440
1441	/**
1442	Close the persistent curosr and commit the mini-transaction. /*
1443	void
1444	IndexPurge::close() UNIV_NOTHROW
1445	{
1446	btr_pcur_close(&m_pcur);
1447	mtr_commit(&m_mtr);
1448	}
1449
1450	/**
1451	Position the cursor on the next record.
1452	@return DB_SUCCESS or error code /*
1453	dberr_t
1454	IndexPurge::next() UNIV_NOTHROW
1455	{
1456	btr_pcur_move_to_next_on_page(&m_pcur);
1457
1458	/ When switching pages, commit the mini-transaction*
1459	in order to release the latch on the old page. /*
1460
1461	if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
1462	return(DB_SUCCESS);
1463	} else if (trx_is_interrupted(m_trx)) {
1464	/ Check after every page because the check*
1465	is expensive. /*
1466	return(DB_INTERRUPTED);
1467	}
1468
1469	btr_pcur_store_position(&m_pcur, &m_mtr);
1470
1471	mtr_commit(&m_mtr);
1472
1473	mtr_start(&m_mtr);
1474
1475	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1476
1477	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
1478
1479	if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
1480
1481	return(DB_END_OF_INDEX);
1482	}
1483
1484	return(DB_SUCCESS);
1485	}
1486
1487	/**
1488	Store the persistent cursor position and reopen the
1489	B-tree cursor in BTR_MODIFY_TREE mode, because the
1490	tree structure may be changed during a pessimistic delete. /*
1491	void
1492	IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
1493	{
1494	dberr_t err;
1495
1496	btr_pcur_restore_position(BTR_MODIFY_TREE \| BTR_LATCH_FOR_DELETE,
1497	&m_pcur, &m_mtr);
1498
1499	ut_ad(rec_get_deleted_flag(
1500	btr_pcur_get_rec(&m_pcur),
1501	dict_table_is_comp(m_index->table)));
1502
1503	btr_cur_pessimistic_delete(
1504	&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), `0`, false, &m_mtr);
1505
1506	ut_a(err == DB_SUCCESS);
1507
1508	/ Reopen the B-tree cursor in BTR_MODIFY_LEAF mode /
1509	mtr_commit(&m_mtr);
1510	}
1511
1512	/**
1513	Purge delete-marked records. /*
1514	void
1515	IndexPurge::purge() UNIV_NOTHROW
1516	{
1517	btr_pcur_store_position(&m_pcur, &m_mtr);
1518
1519	purge_pessimistic_delete();
1520
1521	mtr_start(&m_mtr);
1522
1523	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
1524
1525	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
1526	}
1527
1528	/* Adjust the BLOB reference for a single column that is externally stored*
1529	@param rec record to update
1530	@param offsets column offsets for the record
1531	@param i column ordinal value
1532	@return DB_SUCCESS or error code /*
1533	inline
1534	dberr_t
1535	PageConverter::adjust_cluster_index_blob_column(
1536	rec_t* rec,
1537	const ulint* offsets,
1538	ulint i) UNIV_NOTHROW
1539	{
1540	ulint len;
1541	byte* field;
1542
1543	field = rec_get_nth_field(rec, offsets, i, &len);
1544
1545	DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
1546	len = BTR_EXTERN_FIELD_REF_SIZE - `1`;);
1547
1548	if (len < BTR_EXTERN_FIELD_REF_SIZE) {
1549
1550	ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
1551	ER_INNODB_INDEX_CORRUPT,
1552	"Externally stored column(" ULINTPF
1553	") has a reference length of " ULINTPF
1554	" in the cluster index %s",
1555	i, len, m_cluster_index->name ());
1556
1557	return(DB_CORRUPTION);
1558	}
1559
1560	field += len - (BTR_EXTERN_FIELD_REF_SIZE - BTR_EXTERN_SPACE_ID);
1561
1562	mach_write_to_4(field, get_space_id());
1563
1564	if (m_page_zip_ptr) {
1565	page_zip_write_blob_ptr(
1566	m_page_zip_ptr, rec, m_cluster_index, offsets, i, `0`);
1567	}
1568
1569	return(DB_SUCCESS);
1570	}
1571
1572	/* Adjusts the BLOB reference in the clustered index row for all externally*
1573	stored columns.
1574	@param rec record to update
1575	@param offsets column offsets for the record
1576	@return DB_SUCCESS or error code /*
1577	inline
1578	dberr_t
1579	PageConverter::adjust_cluster_index_blob_columns(
1580	rec_t* rec,
1581	const ulint* offsets) UNIV_NOTHROW
1582	{
1583	ut_ad(rec_offs_any_extern(offsets));
1584
1585	/ Adjust the space_id in the BLOB pointers. /
1586
1587	for (ulint i = `0`; i < rec_offs_n_fields(offsets); ++i) {
1588
1589	/ Only if the column is stored "externally". /
1590
1591	if (rec_offs_nth_extern(offsets, i)) {
1592	dberr_t err;
1593
1594	err = adjust_cluster_index_blob_column(rec, offsets, i);
1595
1596	if (err != DB_SUCCESS) {
1597	return(err);
1598	}
1599	}
1600	}
1601
1602	return(DB_SUCCESS);
1603	}
1604
1605	/* In the clustered index, adjust BLOB pointers as needed. Also update the*
1606	BLOB reference, write the new space id.
1607	@param rec record to update
1608	@param offsets column offsets for the record
1609	@return DB_SUCCESS or error code /*
1610	inline
1611	dberr_t
1612	PageConverter::adjust_cluster_index_blob_ref(
1613	rec_t* rec,
1614	const ulint* offsets) UNIV_NOTHROW
1615	{
1616	if (rec_offs_any_extern(offsets)) {
1617	dberr_t err;
1618
1619	err = adjust_cluster_index_blob_columns(rec, offsets);
1620
1621	if (err != DB_SUCCESS) {
1622	return(err);
1623	}
1624	}
1625
1626	return(DB_SUCCESS);
1627	}
1628
1629	/* Purge delete-marked records, only if it is possible to do so without*
1630	re-organising the B+tree.
1631	@return true if purge succeeded /*
1632	inline bool PageConverter::purge() UNIV_NOTHROW
1633	{
1634	const dict_index_t* index = m_index->m_srv_index;
1635
1636	/ We can't have a page that is empty and not root. /
1637	if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) {
1638
1639	++m_index->m_stats.m_n_purged;
1640
1641	return(true);
1642	} else {
1643	++m_index->m_stats.m_n_purge_failed;
1644	}
1645
1646	return(false);
1647	}
1648
1649	/* Adjust the BLOB references and sys fields for the current record.*
1650	@param rec record to update
1651	@param offsets column offsets for the record
1652	@return DB_SUCCESS or error code. /*
1653	inline
1654	dberr_t
1655	PageConverter::adjust_cluster_record(
1656	rec_t* rec,
1657	const ulint* offsets) UNIV_NOTHROW
1658	{
1659	dberr_t err;
1660
1661	if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
1662
1663	/ Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields*
1664	are only written in conjunction with other changes to the
1665	record. /*
1666	ulint trx_id_pos = m_cluster_index->n_uniq
1667	? m_cluster_index->n_uniq : `1`;
1668	if (m_page_zip_ptr) {
1669	page_zip_write_trx_id_and_roll_ptr(
1670	m_page_zip_ptr, rec, m_offsets, trx_id_pos,
1671	`0`, roll_ptr_t(`1`) << ROLL_PTR_INSERT_FLAG_POS,
1672	NULL);
1673	} else {
1674	ulint len;
1675	byte* ptr = rec_get_nth_field(
1676	rec, m_offsets, trx_id_pos, &len);
1677	ut_ad(len == DATA_TRX_ID_LEN);
1678	memcpy(ptr, reset_trx_id, sizeof reset_trx_id);
1679	}
1680	}
1681
1682	return(err);
1683	}
1684
1685	/* Update the BLOB refrences and write UNDO log entries for*
1686	rows that can't be purged optimistically.
1687	@param block block to update
1688	@retval DB_SUCCESS or error code /*
1689	inline
1690	dberr_t
1691	PageConverter::update_records(
1692	buf_block_t* block) UNIV_NOTHROW
1693	{
1694	ibool comp = dict_table_is_comp(m_cfg->m_table);
1695	bool clust_index = m_index->m_srv_index == m_cluster_index;
1696
1697	/ This will also position the cursor on the first user record. /
1698
1699	m_rec_iter.open(block);
1700
1701	while (!m_rec_iter.end()) {
1702	rec_t* rec = m_rec_iter.current();
1703
1704	ibool deleted = rec_get_deleted_flag(rec, comp);
1705
1706	/ For the clustered index we have to adjust the BLOB*
1707	reference and the system fields irrespective of the
1708	delete marked flag. The adjustment of delete marked
1709	cluster records is required for purge to work later. /*
1710
1711	if (deleted \|\| clust_index) {
1712	m_offsets = rec_get_offsets(
1713	rec, m_index->m_srv_index, m_offsets, true,
1714	ULINT_UNDEFINED, &m_heap);
1715	}
1716
1717	if (clust_index) {
1718
1719	dberr_t err = adjust_cluster_record(rec, m_offsets);
1720
1721	if (err != DB_SUCCESS) {
1722	return(err);
1723	}
1724	}
1725
1726	/ If it is a delete marked record then try an*
1727	optimistic delete. /*
1728
1729	if (deleted) {
1730	/ A successful purge will move the cursor to the*
1731	next record. /*
1732
1733	if (!purge()) {
1734	m_rec_iter.next();
1735	}
1736
1737	++m_index->m_stats.m_n_deleted;
1738	} else {
1739	++m_index->m_stats.m_n_rows;
1740	m_rec_iter.next();
1741	}
1742	}
1743
1744	return(DB_SUCCESS);
1745	}
1746
1747	/* Update the space, index id, trx id.*
1748	@return DB_SUCCESS or error code /*
1749	inline
1750	dberr_t
1751	PageConverter::update_index_page(
1752	buf_block_t* block) UNIV_NOTHROW
1753	{
1754	index_id_t id;
1755	buf_frame_t* page = block->frame;
1756
1757	if (is_free(block->page.id.page_no())) {
1758	return(DB_SUCCESS);
1759	} else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
1760
1761	row_index_t* index = find_index(id);
1762
1763	if (index == `0`) {
1764	ib::error () << "Page for tablespace " << m_space
1765	<< " is index page with id " << id
1766	<< " but that index is not found from"
1767	<< " configuration file. Current index name "
1768	<< m_index->m_name << " and id " << m_index->m_id;
1769	m_index = `0`;
1770	return(DB_CORRUPTION);
1771	}
1772
1773	/ Update current index /
1774	m_index = index;
1775	}
1776
1777	/ If the .cfg file is missing and there is an index mismatch*
1778	then ignore the error. /*
1779	if (m_cfg->m_missing && (m_index == `0` \|\| m_index->m_srv_index == `0`)) {
1780	return(DB_SUCCESS);
1781	}
1782
1783	#ifdef UNIV_ZIP_DEBUG
1784	ut_a(!is_compressed_table()
1785	\|\| page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index));
1786	#endif /* UNIV_ZIP_DEBUG */
1787
1788	/ This has to be written to uncompressed index header. Set it to*
1789	the current index id. /*
1790	btr_page_set_index_id(
1791	page, m_page_zip_ptr, m_index->m_srv_index->id, `0`);
1792
1793	if (dict_index_is_clust(m_index->m_srv_index)) {
1794	if (page_is_root(page)) {
1795	/ Preserve the PAGE_ROOT_AUTO_INC. /
1796	if (m_index->m_srv_index->table->supports_instant()
1797	&& btr_cur_instant_root_init(
1798	const_cast<dict_index_t*>(
1799	m_index->m_srv_index),
1800	page)) {
1801	return(DB_CORRUPTION);
1802	}
1803	} else {
1804	/ Clear PAGE_MAX_TRX_ID so that it can be*
1805	used for other purposes in the future. IMPORT
1806	in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1
1807	would set the field to the transaction ID even
1808	on clustered index pages. /*
1809	page_set_max_trx_id(block, m_page_zip_ptr, `0`, NULL);
1810	}
1811	} else {
1812	/ Set PAGE_MAX_TRX_ID on secondary index leaf pages,*
1813	and clear it on non-leaf pages. /*
1814	page_set_max_trx_id(block, m_page_zip_ptr,
1815	page_is_leaf(page) ? m_trx->id : `0`, NULL);
1816	}
1817
1818	if (page_is_empty(page)) {
1819
1820	/ Only a root page can be empty. /
1821	if (!page_is_root(page)) {
1822	// TODO: We should relax this and skip secondary
1823	// indexes. Mark them as corrupt because they can
1824	// always be rebuilt.
1825	return(DB_CORRUPTION);
1826	}
1827
1828	return(DB_SUCCESS);
1829	}
1830
1831	return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS;
1832	}
1833
1834	/* Validate the space flags and update tablespace header page.*
1835	@param block block read from file, not from the buffer pool.
1836	@retval DB_SUCCESS or error code /*
1837	inline
1838	dberr_t
1839	PageConverter::update_header(
1840	buf_block_t* block) UNIV_NOTHROW
1841	{
1842	/ Check for valid header /
1843	switch (fsp_header_get_space_id(get_frame(block))) {
1844	case `0`:
1845	return(DB_CORRUPTION);
1846	case ULINT_UNDEFINED:
1847	ib::warn () << "Space id check in the header failed: ignored";
1848	}
1849
1850	mach_write_to_8(
1851	get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
1852	m_current_lsn);
1853
1854	/ Write back the adjusted flags. /
1855	mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
1856	+ get_frame(block), m_space_flags);
1857
1858	/ Write space_id to the tablespace header, page 0. /
1859	mach_write_to_4(
1860	get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
1861	get_space_id());
1862
1863	/ This is on every page in the tablespace. /
1864	mach_write_to_4(
1865	get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
1866	get_space_id());
1867
1868	return(DB_SUCCESS);
1869	}
1870
1871	/* Update the page, set the space id, max trx id and index id.*
1872	@param block block read from file
1873	@retval DB_SUCCESS or error code /*
1874	inline
1875	dberr_t
1876	PageConverter::update_page(
1877	buf_block_t* block,
1878	ulint& page_type) UNIV_NOTHROW
1879	{
1880	dberr_t err = DB_SUCCESS;
1881
1882	ut_ad(!block->page.zip.data == !is_compressed_table());
1883
1884	if (block->page.zip.data) {
1885	m_page_zip_ptr = &block->page.zip;
1886	} else {
1887	ut_ad(!m_page_zip_ptr);
1888	}
1889
1890	switch (page_type = fil_page_get_type(get_frame(block))) {
1891	case FIL_PAGE_TYPE_FSP_HDR:
1892	ut_a(block->page.id.page_no() == `0`);
1893	/ Work directly on the uncompressed page headers. /
1894	return(update_header(block));
1895
1896	case FIL_PAGE_INDEX:
1897	case FIL_PAGE_RTREE:
1898	/ We need to decompress the contents into block->frame*
1899	before we can do any thing with Btree pages. /*
1900
1901	if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
1902	return(DB_CORRUPTION);
1903	}
1904
1905	/ fall through /
1906	case FIL_PAGE_TYPE_INSTANT:
1907	/ This is on every page in the tablespace. /
1908	mach_write_to_4(
1909	get_frame(block)
1910	+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
1911
1912	/ Only update the Btree nodes. /
1913	return(update_index_page(block));
1914
1915	case FIL_PAGE_TYPE_SYS:
1916	/ This is page 0 in the system tablespace. /
1917	return(DB_CORRUPTION);
1918
1919	case FIL_PAGE_TYPE_XDES:
1920	err = set_current_xdes(
1921	block->page.id.page_no(), get_frame(block));
1922	/ fall through /
1923	case FIL_PAGE_INODE:
1924	case FIL_PAGE_TYPE_TRX_SYS:
1925	case FIL_PAGE_IBUF_FREE_LIST:
1926	case FIL_PAGE_TYPE_ALLOCATED:
1927	case FIL_PAGE_IBUF_BITMAP:
1928	case FIL_PAGE_TYPE_BLOB:
1929	case FIL_PAGE_TYPE_ZBLOB:
1930	case FIL_PAGE_TYPE_ZBLOB2:
1931
1932	/ Work directly on the uncompressed page headers. /
1933	/ This is on every page in the tablespace. /
1934	mach_write_to_4(
1935	get_frame(block)
1936	+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
1937
1938	return(err);
1939	}
1940
1941	ib::warn () << "Unknown page type (" << page_type << ")";
1942
1943	return(DB_CORRUPTION);
1944	}
1945
1946	/* Called for every page in the tablespace. If the page was not*
1947	updated then its state must be set to BUF_PAGE_NOT_USED.
1948	@param block block read from file, note it is not from the buffer pool
1949	@retval DB_SUCCESS or error code. /*
1950	dberr_t
1951	PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW
1952	{
1953	/ If we already had an old page with matching number*
1954	in the buffer pool, evict it now, because
1955	we no longer evict the pages on DISCARD TABLESPACE. /*
1956	buf_page_get_gen(block->page.id, get_page_size(),
1957	RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
1958	__FILE__, __LINE__, NULL, NULL);
1959
1960	ulint page_type;
1961
1962	dberr_t err = update_page(block, page_type);
1963	if (err != DB_SUCCESS) return err;
1964
1965	if (!block->page.zip.data) {
1966	buf_flush_init_for_writing(
1967	NULL, block->frame, NULL, m_current_lsn);
1968	} else if (fil_page_type_is_index(page_type)) {
1969	buf_flush_init_for_writing(
1970	NULL, block->page.zip.data, &block->page.zip,
1971	m_current_lsn);
1972	} else {
1973	/ Calculate and update the checksum of non-index*
1974	pages for ROW_FORMAT=COMPRESSED tables. /*
1975	buf_flush_update_zip_checksum(
1976	block->page.zip.data, get_page_size().physical(),
1977	m_current_lsn);
1978	}
1979
1980	return DB_SUCCESS;
1981	}
1982
1983	/***************************************************************//**
1984	Clean up after import tablespace failure, this function will acquire
1985	the dictionary latches on behalf of the transaction if the transaction
1986	hasn't already acquired them. /*
1987	static MY_ATTRIBUTE((nonnull))
1988	void
1989	row_import_discard_changes(
1990	/=======================/
1991	row_prebuilt_t* prebuilt, /!< in/out: prebuilt from handler /
1992	trx_t* trx, /!< in/out: transaction for import /
1993	dberr_t err) /!< in: error code /
1994	{
1995	dict_table_t* table = prebuilt->table;
1996
1997	ut_a(err != DB_SUCCESS);
1998
1999	prebuilt->trx->error_info = NULL;
2000
2001	ib::info () << "Discarding tablespace of table "
2002	<< prebuilt->table->name
2003	<< ": " << ut_strerr(err);
2004
2005	if (trx->dict_operation_lock_mode != RW_X_LATCH) {
2006	ut_a(trx->dict_operation_lock_mode == `0`);
2007	row_mysql_lock_data_dictionary(trx);
2008	}
2009
2010	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2011
2012	/ Since we update the index root page numbers on disk after*
2013	we've done a successful import. The table will not be loadable.
2014	However, we need to ensure that the in memory root page numbers
2015	are reset to "NULL". /*
2016
2017	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
2018	index != `0`;
2019	index = UT_LIST_GET_NEXT(indexes, index)) {
2020
2021	index->page = FIL_NULL;
2022	}
2023
2024	table->file_unreadable = true;
2025	if (table->space) {
2026	fil_close_tablespace(trx, table->space->id);
2027	table->space = NULL;
2028	}
2029	}
2030
2031	/***************************************************************//**
2032	Clean up after import tablespace. /*
2033	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2034	dberr_t
2035	row_import_cleanup(
2036	/===============/
2037	row_prebuilt_t* prebuilt, /!< in/out: prebuilt from handler /
2038	trx_t* trx, /!< in/out: transaction for import /
2039	dberr_t err) /!< in: error code /
2040	{
2041	ut_a(prebuilt->trx != trx);
2042
2043	if (err != DB_SUCCESS) {
2044	row_import_discard_changes(prebuilt, trx, err);
2045	}
2046
2047	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2048
2049	DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
2050
2051	trx_commit_for_mysql(trx);
2052
2053	row_mysql_unlock_data_dictionary(trx);
2054
2055	trx_free(trx);
2056
2057	prebuilt->trx->op_info = "";
2058
2059	DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
2060
2061	log_make_checkpoint_at(LSN_MAX, TRUE);
2062
2063	return(err);
2064	}
2065
2066	/***************************************************************//**
2067	Report error during tablespace import. /*
2068	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2069	dberr_t
2070	row_import_error(
2071	/=============/
2072	row_prebuilt_t* prebuilt, /!< in/out: prebuilt from handler /
2073	trx_t* trx, /!< in/out: transaction for import /
2074	dberr_t err) /!< in: error code /
2075	{
2076	if (!trx_is_interrupted(trx)) {
2077	char table_name[MAX_FULL_NAME_LEN + `1`];
2078
2079	innobase_format_name(
2080	table_name, sizeof(table_name),
2081	prebuilt->table->name.m_name);
2082
2083	ib_senderrf(
2084	trx->mysql_thd, IB_LOG_LEVEL_WARN,
2085	ER_INNODB_IMPORT_ERROR,
2086	table_name, (ulong) err, ut_strerr(err));
2087	}
2088
2089	return(row_import_cleanup(prebuilt, trx, err));
2090	}
2091
2092	/***************************************************************//**
2093	Adjust the root page index node and leaf node segment headers, update
2094	with the new space id. For all the table's secondary indexes.
2095	@return error code /*
2096	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2097	dberr_t
2098	row_import_adjust_root_pages_of_secondary_indexes(
2099	/==============================================/
2100	trx_t* trx, /!< in: transaction used for*
2101	the import /*
2102	dict_table_t* table, /!< in: table the indexes*
2103	belong to /*
2104	const row_import& cfg) /!< Import context /
2105	{
2106	dict_index_t* index;
2107	ulint n_rows_in_table;
2108	dberr_t err = DB_SUCCESS;
2109
2110	/ Skip the clustered index. /
2111	index = dict_table_get_first_index(table);
2112
2113	n_rows_in_table = cfg.get_n_rows(index->name);
2114
2115	DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
2116	n_rows_in_table++;);
2117
2118	/ Adjust the root pages of the secondary indexes only. /
2119	while ((index = dict_table_get_next_index(index)) != NULL) {
2120	ut_a(!dict_index_is_clust(index));
2121
2122	if (!(index->type & DICT_CORRUPT)
2123	&& index->page != FIL_NULL) {
2124
2125	/ Update the Btree segment headers for index node and*
2126	leaf nodes in the root page. Set the new space id. /*
2127
2128	err = btr_root_adjust_on_import(index);
2129	} else {
2130	ib::warn () << "Skip adjustment of root pages for"
2131	" index " << index->name << ".";
2132
2133	err = DB_CORRUPTION;
2134	}
2135
2136	if (err != DB_SUCCESS) {
2137
2138	if (index->type & DICT_CLUSTERED) {
2139	break;
2140	}
2141
2142	ib_errf(trx->mysql_thd,
2143	IB_LOG_LEVEL_WARN,
2144	ER_INNODB_INDEX_CORRUPT,
2145	"Index %s not found or corrupt,"
2146	" you should recreate this index.",
2147	index->name ());
2148
2149	/ Do not bail out, so that the data*
2150	can be recovered. /*
2151
2152	err = DB_SUCCESS;
2153	index->type \|= DICT_CORRUPT;
2154	continue;
2155	}
2156
2157	/ If we failed to purge any records in the index then*
2158	do it the hard way.
2159
2160	TODO: We can do this in the first pass by generating UNDO log
2161	records for the failed rows. /*
2162
2163	if (!cfg.requires_purge(index->name)) {
2164	continue;
2165	}
2166
2167	IndexPurge purge(trx, index);
2168
2169	trx->op_info = "secondary: purge delete marked records";
2170
2171	err = purge.garbage_collect();
2172
2173	trx->op_info = "";
2174
2175	if (err != DB_SUCCESS) {
2176	break;
2177	} else if (purge.get_n_rows() != n_rows_in_table) {
2178
2179	ib_errf(trx->mysql_thd,
2180	IB_LOG_LEVEL_WARN,
2181	ER_INNODB_INDEX_CORRUPT,
2182	"Index '%s' contains " ULINTPF " entries, "
2183	"should be " ULINTPF ", you should recreate "
2184	"this index.", index->name (),
2185	purge.get_n_rows(), n_rows_in_table);
2186
2187	index->type \|= DICT_CORRUPT;
2188
2189	/ Do not bail out, so that the data*
2190	can be recovered. /*
2191
2192	err = DB_SUCCESS;
2193	}
2194	}
2195
2196	return(err);
2197	}
2198
2199	/***************************************************************//**
2200	Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID).
2201	@return error code /*
2202	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2203	dberr_t
2204	row_import_set_sys_max_row_id(
2205	/==========================/
2206	row_prebuilt_t* prebuilt, /!< in/out: prebuilt from*
2207	handler /*
2208	const dict_table_t* table) /!< in: table to import /
2209	{
2210	dberr_t err;
2211	const rec_t* rec;
2212	mtr_t mtr;
2213	btr_pcur_t pcur;
2214	row_id_t row_id = `0`;
2215	dict_index_t* index;
2216
2217	index = dict_table_get_first_index(table);
2218	ut_a(dict_index_is_clust(index));
2219
2220	mtr_start(&mtr);
2221
2222	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2223
2224	btr_pcur_open_at_index_side(
2225	false, // High end
2226	index,
2227	BTR_SEARCH_LEAF,
2228	&pcur,
2229	true, // Init cursor
2230	`0`, // Leaf level
2231	&mtr);
2232
2233	btr_pcur_move_to_prev_on_page(&pcur);
2234	rec = btr_pcur_get_rec(&pcur);
2235
2236	/ Check for empty table. /
2237	if (page_rec_is_infimum(rec)) {
2238	/ The table is empty. /
2239	err = DB_SUCCESS;
2240	} else if (rec_is_default_row(rec, index)) {
2241	/ The clustered index contains the 'default row',*
2242	that is, the table is empty. /*
2243	err = DB_SUCCESS;
2244	} else {
2245	ulint len;
2246	const byte* field;
2247	mem_heap_t* heap = NULL;
2248	ulint offsets_[`1` + REC_OFFS_HEADER_SIZE];
2249	ulint* offsets;
2250
2251	rec_offs_init(offsets_);
2252
2253	offsets = rec_get_offsets(
2254	rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
2255
2256	field = rec_get_nth_field(
2257	rec, offsets,
2258	dict_index_get_sys_col_pos(index, DATA_ROW_ID),
2259	&len);
2260
2261	if (len == DATA_ROW_ID_LEN) {
2262	row_id = mach_read_from_6(field);
2263	err = DB_SUCCESS;
2264	} else {
2265	err = DB_CORRUPTION;
2266	}
2267
2268	if (heap != NULL) {
2269	mem_heap_free(heap);
2270	}
2271	}
2272
2273	btr_pcur_close(&pcur);
2274	mtr_commit(&mtr);
2275
2276	DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure",
2277	err = DB_CORRUPTION;);
2278
2279	if (err != DB_SUCCESS) {
2280	ib_errf(prebuilt->trx->mysql_thd,
2281	IB_LOG_LEVEL_WARN,
2282	ER_INNODB_INDEX_CORRUPT,
2283	"Index `%s` corruption detected, invalid DB_ROW_ID"
2284	" in index.", index->name ());
2285
2286	return(err);
2287
2288	} else if (row_id > `0`) {
2289
2290	/ Update the system row id if the imported index row id is*
2291	greater than the max system row id. /*
2292
2293	mutex_enter(&dict_sys->mutex);
2294
2295	if (row_id >= dict_sys->row_id) {
2296	dict_sys->row_id = row_id + `1`;
2297	dict_hdr_flush_row_id();
2298	}
2299
2300	mutex_exit(&dict_sys->mutex);
2301	}
2302
2303	return(DB_SUCCESS);
2304	}
2305
2306	/***************************************************************//**
2307	Read the a string from the meta data file.
2308	@return DB_SUCCESS or error code. /*
2309	static
2310	dberr_t
2311	row_import_cfg_read_string(
2312	/=======================/
2313	FILE* file, /!< in/out: File to read from /
2314	byte* ptr, /!< out: string to read /
2315	ulint max_len) /!< in: maximum length of the output*
2316	buffer in bytes /*
2317	{
2318	DBUG_EXECUTE_IF("ib_import_string_read_error",
2319	errno = EINVAL; return(DB_IO_ERROR););
2320
2321	ulint len = `0`;
2322
2323	while (!feof(file)) {
2324	int ch = fgetc(file);
2325
2326	if (ch == EOF) {
2327	break;
2328	} else if (ch != `0`) {
2329	if (len < max_len) {
2330	ptr[len++] = ch;
2331	} else {
2332	break;
2333	}
2334	/ max_len includes the NUL byte /
2335	} else if (len != max_len - `1`) {
2336	break;
2337	} else {
2338	ptr[len] = `0`;
2339	return(DB_SUCCESS);
2340	}
2341	}
2342
2343	errno = EINVAL;
2344
2345	return(DB_IO_ERROR);
2346	}
2347
2348	/*******************************************************************//**
2349	Write the meta data (index user fields) config file.
2350	@return DB_SUCCESS or error code. /*
2351	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2352	dberr_t
2353	row_import_cfg_read_index_fields(
2354	/=============================/
2355	FILE* file, /!< in: file to write to /
2356	THD* thd, /!< in/out: session /
2357	row_index_t* index) /!< Index being read in /
2358	{
2359	byte row[sizeof(ib_uint32_t) * `3`];
2360	ulint n_fields = index->m_n_fields;
2361
2362	index->m_fields = UT_NEW_ARRAY_NOKEY(dict_field_t, n_fields);
2363
2364	/ Trigger OOM /
2365	DBUG_EXECUTE_IF(
2366	"ib_import_OOM_4",
2367	UT_DELETE_ARRAY(index->m_fields);
2368	index->m_fields = NULL;
2369	);
2370
2371	if (index->m_fields == NULL) {
2372	return(DB_OUT_OF_MEMORY);
2373	}
2374
2375	dict_field_t* field = index->m_fields;
2376
2377	memset(field, `0x0`, sizeof(field) n_fields);
2378
2379	for (ulint i = `0`; i < n_fields; ++i, ++field) {
2380	byte* ptr = row;
2381
2382	/ Trigger EOF /
2383	DBUG_EXECUTE_IF("ib_import_io_read_error_1",
2384	(void) fseek(file, `0L`, SEEK_END););
2385
2386	if (fread(row, `1`, sizeof(row), file) != sizeof(row)) {
2387
2388	ib_senderrf(
2389	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2390	(ulong) errno, strerror(errno),
2391	"while reading index fields.");
2392
2393	return(DB_IO_ERROR);
2394	}
2395
2396	field->prefix_len = mach_read_from_4(ptr);
2397	ptr += sizeof(ib_uint32_t);
2398
2399	field->fixed_len = mach_read_from_4(ptr);
2400	ptr += sizeof(ib_uint32_t);
2401
2402	/ Include the NUL byte in the length. /
2403	ulint len = mach_read_from_4(ptr);
2404
2405	byte* name = UT_NEW_ARRAY_NOKEY(byte, len);
2406
2407	/ Trigger OOM /
2408	DBUG_EXECUTE_IF(
2409	"ib_import_OOM_5",
2410	UT_DELETE_ARRAY(name);
2411	name = NULL;
2412	);
2413
2414	if (name == NULL) {
2415	return(DB_OUT_OF_MEMORY);
2416	}
2417
2418	field->name = reinterpret_cast<const char*>(name);
2419
2420	dberr_t err = row_import_cfg_read_string(file, name, len);
2421
2422	if (err != DB_SUCCESS) {
2423
2424	ib_senderrf(
2425	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2426	(ulong) errno, strerror(errno),
2427	"while parsing table name.");
2428
2429	return(err);
2430	}
2431	}
2432
2433	return(DB_SUCCESS);
2434	}
2435
2436	/***************************************************************//**
2437	Read the index names and root page numbers of the indexes and set the values.
2438	Row format [root_page_no, len of str, str ... ]
2439	@return DB_SUCCESS or error code. /*
2440	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2441	dberr_t
2442	row_import_read_index_data(
2443	/=======================/
2444	FILE* file, /!< in: File to read from /
2445	THD* thd, /!< in: session /
2446	row_import* cfg) /!< in/out: meta-data read /
2447	{
2448	byte* ptr;
2449	row_index_t* cfg_index;
2450	byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * `9`];
2451
2452	/ FIXME: What is the max value? /
2453	ut_a(cfg->m_n_indexes > `0`);
2454	ut_a(cfg->m_n_indexes < `1024`);
2455
2456	cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
2457
2458	/ Trigger OOM /
2459	DBUG_EXECUTE_IF(
2460	"ib_import_OOM_6",
2461	UT_DELETE_ARRAY(cfg->m_indexes);
2462	cfg->m_indexes = NULL;
2463	);
2464
2465	if (cfg->m_indexes == NULL) {
2466	return(DB_OUT_OF_MEMORY);
2467	}
2468
2469	memset(cfg->m_indexes, `0x0`, sizeof(cfg->m_indexes) cfg->m_n_indexes);
2470
2471	cfg_index = cfg->m_indexes;
2472
2473	for (ulint i = `0`; i < cfg->m_n_indexes; ++i, ++cfg_index) {
2474	/ Trigger EOF /
2475	DBUG_EXECUTE_IF("ib_import_io_read_error_2",
2476	(void) fseek(file, `0L`, SEEK_END););
2477
2478	/ Read the index data. /
2479	size_t n_bytes = fread(row, `1`, sizeof(row), file);
2480
2481	/ Trigger EOF /
2482	DBUG_EXECUTE_IF("ib_import_io_read_error",
2483	(void) fseek(file, `0L`, SEEK_END););
2484
2485	if (n_bytes != sizeof(row)) {
2486	char msg[BUFSIZ];
2487
2488	snprintf(msg, sizeof(msg),
2489	"while reading index meta-data, expected "
2490	"to read " ULINTPF
2491	" bytes but read only " ULINTPF " bytes",
2492	sizeof(row), n_bytes);
2493
2494	ib_senderrf(
2495	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2496	(ulong) errno, strerror(errno), msg);
2497
2498	ib::error () << "IO Error: " << msg;
2499
2500	return(DB_IO_ERROR);
2501	}
2502
2503	ptr = row;
2504
2505	cfg_index->m_id = mach_read_from_8(ptr);
2506	ptr += sizeof(index_id_t);
2507
2508	cfg_index->m_space = mach_read_from_4(ptr);
2509	ptr += sizeof(ib_uint32_t);
2510
2511	cfg_index->m_page_no = mach_read_from_4(ptr);
2512	ptr += sizeof(ib_uint32_t);
2513
2514	cfg_index->m_type = mach_read_from_4(ptr);
2515	ptr += sizeof(ib_uint32_t);
2516
2517	cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
2518	if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
2519	ut_ad(`0`);
2520	/ Overflow. Pretend that the clustered index*
2521	has a variable-length PRIMARY KEY. /*
2522	cfg_index->m_trx_id_offset = `0`;
2523	}
2524	ptr += sizeof(ib_uint32_t);
2525
2526	cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
2527	ptr += sizeof(ib_uint32_t);
2528
2529	cfg_index->m_n_uniq = mach_read_from_4(ptr);
2530	ptr += sizeof(ib_uint32_t);
2531
2532	cfg_index->m_n_nullable = mach_read_from_4(ptr);
2533	ptr += sizeof(ib_uint32_t);
2534
2535	cfg_index->m_n_fields = mach_read_from_4(ptr);
2536	ptr += sizeof(ib_uint32_t);
2537
2538	/ The NUL byte is included in the name length. /
2539	ulint len = mach_read_from_4(ptr);
2540
2541	if (len > OS_FILE_MAX_PATH) {
2542	ib_errf(thd, IB_LOG_LEVEL_ERROR,
2543	ER_INNODB_INDEX_CORRUPT,
2544	"Index name length (" ULINTPF ") is too long, "
2545	"the meta-data is corrupt", len);
2546
2547	return(DB_CORRUPTION);
2548	}
2549
2550	cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
2551
2552	/ Trigger OOM /
2553	DBUG_EXECUTE_IF(
2554	"ib_import_OOM_7",
2555	UT_DELETE_ARRAY(cfg_index->m_name);
2556	cfg_index->m_name = NULL;
2557	);
2558
2559	if (cfg_index->m_name == NULL) {
2560	return(DB_OUT_OF_MEMORY);
2561	}
2562
2563	dberr_t err;
2564
2565	err = row_import_cfg_read_string(file, cfg_index->m_name, len);
2566
2567	if (err != DB_SUCCESS) {
2568
2569	ib_senderrf(
2570	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2571	(ulong) errno, strerror(errno),
2572	"while parsing index name.");
2573
2574	return(err);
2575	}
2576
2577	err = row_import_cfg_read_index_fields(file, thd, cfg_index);
2578
2579	if (err != DB_SUCCESS) {
2580	return(err);
2581	}
2582
2583	}
2584
2585	return(DB_SUCCESS);
2586	}
2587
2588	/***************************************************************//**
2589	Set the index root page number for v1 format.
2590	@return DB_SUCCESS or error code. /*
2591	static
2592	dberr_t
2593	row_import_read_indexes(
2594	/====================/
2595	FILE* file, /!< in: File to read from /
2596	THD* thd, /!< in: session /
2597	row_import* cfg) /!< in/out: meta-data read /
2598	{
2599	byte row[sizeof(ib_uint32_t)];
2600
2601	/ Trigger EOF /
2602	DBUG_EXECUTE_IF("ib_import_io_read_error_3",
2603	(void) fseek(file, `0L`, SEEK_END););
2604
2605	/ Read the number of indexes. /
2606	if (fread(row, `1`, sizeof(row), file) != sizeof(row)) {
2607	ib_senderrf(
2608	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2609	(ulong) errno, strerror(errno),
2610	"while reading number of indexes.");
2611
2612	return(DB_IO_ERROR);
2613	}
2614
2615	cfg->m_n_indexes = mach_read_from_4(row);
2616
2617	if (cfg->m_n_indexes == `0`) {
2618	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2619	"Number of indexes in meta-data file is 0");
2620
2621	return(DB_CORRUPTION);
2622
2623	} else if (cfg->m_n_indexes > `1024`) {
2624	// FIXME: What is the upper limit? /*
2625	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2626	"Number of indexes in meta-data file is too high: "
2627	ULINTPF, cfg->m_n_indexes);
2628	cfg->m_n_indexes = `0`;
2629
2630	return(DB_CORRUPTION);
2631	}
2632
2633	return(row_import_read_index_data(file, thd, cfg));
2634	}
2635
2636	/*******************************************************************//**
2637	Read the meta data (table columns) config file. Deserialise the contents of
2638	dict_col_t structure, along with the column name. /*
2639	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2640	dberr_t
2641	row_import_read_columns(
2642	/====================/
2643	FILE* file, /!< in: file to write to /
2644	THD* thd, /!< in/out: session /
2645	row_import* cfg) /!< in/out: meta-data read /
2646	{
2647	dict_col_t* col;
2648	byte row[sizeof(ib_uint32_t) * `8`];
2649
2650	/ FIXME: What should the upper limit be? /
2651	ut_a(cfg->m_n_cols > `0`);
2652	ut_a(cfg->m_n_cols < `1024`);
2653
2654	cfg->m_cols = UT_NEW_ARRAY_NOKEY(dict_col_t, cfg->m_n_cols);
2655
2656	/ Trigger OOM /
2657	DBUG_EXECUTE_IF(
2658	"ib_import_OOM_8",
2659	UT_DELETE_ARRAY(cfg->m_cols);
2660	cfg->m_cols = NULL;
2661	);
2662
2663	if (cfg->m_cols == NULL) {
2664	return(DB_OUT_OF_MEMORY);
2665	}
2666
2667	cfg->m_col_names = UT_NEW_ARRAY_NOKEY(byte*, cfg->m_n_cols);
2668
2669	/ Trigger OOM /
2670	DBUG_EXECUTE_IF(
2671	"ib_import_OOM_9",
2672	UT_DELETE_ARRAY(cfg->m_col_names);
2673	cfg->m_col_names = NULL;
2674	);
2675
2676	if (cfg->m_col_names == NULL) {
2677	return(DB_OUT_OF_MEMORY);
2678	}
2679
2680	memset(cfg->m_cols, `0x0`, sizeof(cfg->m_cols) * cfg->m_n_cols);
2681	memset(cfg->m_col_names, `0x0`, sizeof(cfg->m_col_names) * cfg->m_n_cols);
2682
2683	col = cfg->m_cols;
2684
2685	for (ulint i = `0`; i < cfg->m_n_cols; ++i, ++col) {
2686	byte* ptr = row;
2687
2688	/ Trigger EOF /
2689	DBUG_EXECUTE_IF("ib_import_io_read_error_4",
2690	(void) fseek(file, `0L`, SEEK_END););
2691
2692	if (fread(row, `1`, sizeof(row), file) != sizeof(row)) {
2693	ib_senderrf(
2694	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2695	(ulong) errno, strerror(errno),
2696	"while reading table column meta-data.");
2697
2698	return(DB_IO_ERROR);
2699	}
2700
2701	col->prtype = mach_read_from_4(ptr);
2702	ptr += sizeof(ib_uint32_t);
2703
2704	col->mtype = mach_read_from_4(ptr);
2705	ptr += sizeof(ib_uint32_t);
2706
2707	col->len = mach_read_from_4(ptr);
2708	ptr += sizeof(ib_uint32_t);
2709
2710	ulint mbminmaxlen = mach_read_from_4(ptr);
2711	col->mbmaxlen = mbminmaxlen / `5`;
2712	col->mbminlen = mbminmaxlen % `5`;
2713	ptr += sizeof(ib_uint32_t);
2714
2715	col->ind = mach_read_from_4(ptr);
2716	ptr += sizeof(ib_uint32_t);
2717
2718	col->ord_part = mach_read_from_4(ptr);
2719	ptr += sizeof(ib_uint32_t);
2720
2721	col->max_prefix = mach_read_from_4(ptr);
2722	ptr += sizeof(ib_uint32_t);
2723
2724	/ Read in the column name as [len, byte array]. The len*
2725	includes the NUL byte. /*
2726
2727	ulint len = mach_read_from_4(ptr);
2728
2729	/ FIXME: What is the maximum column name length? /
2730	if (len == `0` \|\| len > `128`) {
2731	ib_errf(thd, IB_LOG_LEVEL_ERROR,
2732	ER_IO_READ_ERROR,
2733	"Column name length " ULINTPF ", is invalid",
2734	len);
2735
2736	return(DB_CORRUPTION);
2737	}
2738
2739	cfg->m_col_names[i] = UT_NEW_ARRAY_NOKEY(byte, len);
2740
2741	/ Trigger OOM /
2742	DBUG_EXECUTE_IF(
2743	"ib_import_OOM_10",
2744	UT_DELETE_ARRAY(cfg->m_col_names[i]);
2745	cfg->m_col_names[i] = NULL;
2746	);
2747
2748	if (cfg->m_col_names[i] == NULL) {
2749	return(DB_OUT_OF_MEMORY);
2750	}
2751
2752	dberr_t err;
2753
2754	err = row_import_cfg_read_string(
2755	file, cfg->m_col_names[i], len);
2756
2757	if (err != DB_SUCCESS) {
2758
2759	ib_senderrf(
2760	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2761	(ulong) errno, strerror(errno),
2762	"while parsing table column name.");
2763
2764	return(err);
2765	}
2766	}
2767
2768	return(DB_SUCCESS);
2769	}
2770
2771	/***************************************************************//**
2772	Read the contents of the <tablespace>.cfg file.
2773	@return DB_SUCCESS or error code. /*
2774	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2775	dberr_t
2776	row_import_read_v1(
2777	/===============/
2778	FILE* file, /!< in: File to read from /
2779	THD* thd, /!< in: session /
2780	row_import* cfg) /!< out: meta data /
2781	{
2782	byte value[sizeof(ib_uint32_t)];
2783
2784	/ Trigger EOF /
2785	DBUG_EXECUTE_IF("ib_import_io_read_error_5",
2786	(void) fseek(file, `0L`, SEEK_END););
2787
2788	/ Read the hostname where the tablespace was exported. /
2789	if (fread(value, `1`, sizeof(value), file) != sizeof(value)) {
2790	ib_senderrf(
2791	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2792	(ulong) errno, strerror(errno),
2793	"while reading meta-data export hostname length.");
2794
2795	return(DB_IO_ERROR);
2796	}
2797
2798	ulint len = mach_read_from_4(value);
2799
2800	/ NUL byte is part of name length. /
2801	cfg->m_hostname = UT_NEW_ARRAY_NOKEY(byte, len);
2802
2803	/ Trigger OOM /
2804	DBUG_EXECUTE_IF(
2805	"ib_import_OOM_1",
2806	UT_DELETE_ARRAY(cfg->m_hostname);
2807	cfg->m_hostname = NULL;
2808	);
2809
2810	if (cfg->m_hostname == NULL) {
2811	return(DB_OUT_OF_MEMORY);
2812	}
2813
2814	dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len);
2815
2816	if (err != DB_SUCCESS) {
2817
2818	ib_senderrf(
2819	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2820	(ulong) errno, strerror(errno),
2821	"while parsing export hostname.");
2822
2823	return(err);
2824	}
2825
2826	/ Trigger EOF /
2827	DBUG_EXECUTE_IF("ib_import_io_read_error_6",
2828	(void) fseek(file, `0L`, SEEK_END););
2829
2830	/ Read the table name of tablespace that was exported. /
2831	if (fread(value, `1`, sizeof(value), file) != sizeof(value)) {
2832	ib_senderrf(
2833	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2834	(ulong) errno, strerror(errno),
2835	"while reading meta-data table name length.");
2836
2837	return(DB_IO_ERROR);
2838	}
2839
2840	len = mach_read_from_4(value);
2841
2842	/ NUL byte is part of name length. /
2843	cfg->m_table_name = UT_NEW_ARRAY_NOKEY(byte, len);
2844
2845	/ Trigger OOM /
2846	DBUG_EXECUTE_IF(
2847	"ib_import_OOM_2",
2848	UT_DELETE_ARRAY(cfg->m_table_name);
2849	cfg->m_table_name = NULL;
2850	);
2851
2852	if (cfg->m_table_name == NULL) {
2853	return(DB_OUT_OF_MEMORY);
2854	}
2855
2856	err = row_import_cfg_read_string(file, cfg->m_table_name, len);
2857
2858	if (err != DB_SUCCESS) {
2859	ib_senderrf(
2860	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2861	(ulong) errno, strerror(errno),
2862	"while parsing table name.");
2863
2864	return(err);
2865	}
2866
2867	ib::info () << "Importing tablespace for table '" << cfg->m_table_name
2868	<< "' that was exported from host '" << cfg->m_hostname << "'";
2869
2870	byte row[sizeof(ib_uint32_t) * `3`];
2871
2872	/ Trigger EOF /
2873	DBUG_EXECUTE_IF("ib_import_io_read_error_7",
2874	(void) fseek(file, `0L`, SEEK_END););
2875
2876	/ Read the autoinc value. /
2877	if (fread(row, `1`, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
2878	ib_senderrf(
2879	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2880	(ulong) errno, strerror(errno),
2881	"while reading autoinc value.");
2882
2883	return(DB_IO_ERROR);
2884	}
2885
2886	cfg->m_autoinc = mach_read_from_8(row);
2887
2888	/ Trigger EOF /
2889	DBUG_EXECUTE_IF("ib_import_io_read_error_8",
2890	(void) fseek(file, `0L`, SEEK_END););
2891
2892	/ Read the tablespace page size. /
2893	if (fread(row, `1`, sizeof(row), file) != sizeof(row)) {
2894	ib_senderrf(
2895	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2896	(ulong) errno, strerror(errno),
2897	"while reading meta-data header.");
2898
2899	return(DB_IO_ERROR);
2900	}
2901
2902	byte* ptr = row;
2903
2904	const ulint logical_page_size = mach_read_from_4(ptr);
2905	ptr += sizeof(ib_uint32_t);
2906
2907	if (logical_page_size != srv_page_size) {
2908
2909	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
2910	"Tablespace to be imported has a different"
2911	" page size than this server. Server page size"
2912	" is %lu, whereas tablespace page size"
2913	" is " ULINTPF,
2914	srv_page_size,
2915	logical_page_size);
2916
2917	return(DB_ERROR);
2918	}
2919
2920	cfg->m_flags = mach_read_from_4(ptr);
2921	ptr += sizeof(ib_uint32_t);
2922
2923	cfg->m_page_size.copy_from(dict_tf_get_page_size(cfg->m_flags));
2924
2925	ut_a(logical_page_size == cfg->m_page_size.logical());
2926
2927	cfg->m_n_cols = mach_read_from_4(ptr);
2928
2929	if (!dict_tf_is_valid(cfg->m_flags)) {
2930	ib_errf(thd, IB_LOG_LEVEL_ERROR,
2931	ER_TABLE_SCHEMA_MISMATCH,
2932	"Invalid table flags: " ULINTPF, cfg->m_flags);
2933
2934	return(DB_CORRUPTION);
2935	}
2936
2937	err = row_import_read_columns(file, thd, cfg);
2938
2939	if (err == DB_SUCCESS) {
2940	err = row_import_read_indexes(file, thd, cfg);
2941	}
2942
2943	return(err);
2944	}
2945
2946	/**
2947	Read the contents of the <tablespace>.cfg file.
2948	@return DB_SUCCESS or error code. /*
2949	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2950	dberr_t
2951	row_import_read_meta_data(
2952	/======================/
2953	FILE* file, /!< in: File to read from /
2954	THD* thd, /!< in: session /
2955	row_import& cfg) /!< out: contents of the .cfg file /
2956	{
2957	byte row[sizeof(ib_uint32_t)];
2958
2959	/ Trigger EOF /
2960	DBUG_EXECUTE_IF("ib_import_io_read_error_9",
2961	(void) fseek(file, `0L`, SEEK_END););
2962
2963	if (fread(&row, `1`, sizeof(row), file) != sizeof(row)) {
2964	ib_senderrf(
2965	thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2966	(ulong) errno, strerror(errno),
2967	"while reading meta-data version.");
2968
2969	return(DB_IO_ERROR);
2970	}
2971
2972	cfg.m_version = mach_read_from_4(row);
2973
2974	/ Check the version number. /
2975	switch (cfg.m_version) {
2976	case IB_EXPORT_CFG_VERSION_V1:
2977
2978	return(row_import_read_v1(file, thd, &cfg));
2979	default:
2980	ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
2981	"Unsupported meta-data version number (" ULINTPF "), "
2982	"file ignored", cfg.m_version);
2983	}
2984
2985	return(DB_ERROR);
2986	}
2987
2988	/**
2989	Read the contents of the <tablename>.cfg file.
2990	@return DB_SUCCESS or error code. /*
2991	static MY_ATTRIBUTE((nonnull, warn_unused_result))
2992	dberr_t
2993	row_import_read_cfg(
2994	/================/
2995	dict_table_t* table, /!< in: table /
2996	THD* thd, /!< in: session /
2997	row_import& cfg) /!< out: contents of the .cfg file /
2998	{
2999	dberr_t err;
3000	char name[OS_FILE_MAX_PATH];
3001
3002	cfg.m_table = table;
3003
3004	srv_get_meta_data_filename(table, name, sizeof(name));
3005
3006	FILE* file = fopen(name, "rb");
3007
3008	if (file == NULL) {
3009	char msg[BUFSIZ];
3010
3011	snprintf(msg, sizeof(msg),
3012	"Error opening '%s', will attempt to import"
3013	" without schema verification", name);
3014
3015	ib_senderrf(
3016	thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
3017	(ulong) errno, strerror(errno), msg);
3018
3019	cfg.m_missing = true;
3020
3021	err = DB_FAIL;
3022	} else {
3023
3024	cfg.m_missing = false;
3025
3026	err = row_import_read_meta_data(file, thd, cfg);
3027	fclose(file);
3028	}
3029
3030	return(err);
3031	}
3032
3033	/***************************************************************//**
3034	Update the <space, root page> of a table's indexes from the values
3035	in the data dictionary.
3036	@return DB_SUCCESS or error code /*
3037	dberr_t
3038	row_import_update_index_root(
3039	/=========================/
3040	trx_t* trx, /!< in/out: transaction that*
3041	covers the update /*
3042	const dict_table_t* table, /!< in: Table for which we want*
3043	to set the root page_no /*
3044	bool reset, /!< in: if true then set to*
3045	FIL_NUL /*
3046	bool dict_locked) /!< in: Set to true if the*
3047	caller already owns the
3048	dict_sys_t:: mutex. /*
3049
3050	{
3051	const dict_index_t* index;
3052	que_t* graph = `0`;
3053	dberr_t err = DB_SUCCESS;
3054
3055	ut_ad(reset \|\| table->space->id == table->space_id);
3056
3057	static const char sql[] = {
3058	"PROCEDURE UPDATE_INDEX_ROOT() IS\n"
3059	"BEGIN\n"
3060	"UPDATE SYS_INDEXES\n"
3061	"SET SPACE = :space,\n"
3062	" PAGE_NO = :page,\n"
3063	" TYPE = :type\n"
3064	"WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
3065	"END;\n"};
3066
3067	if (!dict_locked) {
3068	mutex_enter(&dict_sys->mutex);
3069	}
3070
3071	for (index = dict_table_get_first_index(table);
3072	index != `0`;
3073	index = dict_table_get_next_index(index)) {
3074
3075	pars_info_t* info;
3076	ib_uint32_t page;
3077	ib_uint32_t space;
3078	ib_uint32_t type;
3079	index_id_t index_id;
3080	table_id_t table_id;
3081
3082	info = (graph != `0`) ? graph->info : pars_info_create();
3083
3084	mach_write_to_4(
3085	reinterpret_cast<byte*>(&type),
3086	index->type);
3087
3088	mach_write_to_4(
3089	reinterpret_cast<byte*>(&page),
3090	reset ? FIL_NULL : index->page);
3091
3092	mach_write_to_4(
3093	reinterpret_cast<byte*>(&space),
3094	reset ? FIL_NULL : index->table->space_id);
3095
3096	mach_write_to_8(
3097	reinterpret_cast<byte*>(&index_id),
3098	index->id);
3099
3100	mach_write_to_8(
3101	reinterpret_cast<byte*>(&table_id),
3102	table->id);
3103
3104	/ If we set the corrupt bit during the IMPORT phase then*
3105	we need to update the system tables. /*
3106	pars_info_bind_int4_literal(info, "type", &type);
3107	pars_info_bind_int4_literal(info, "space", &space);
3108	pars_info_bind_int4_literal(info, "page", &page);
3109	pars_info_bind_ull_literal(info, "index_id", &index_id);
3110	pars_info_bind_ull_literal(info, "table_id", &table_id);
3111
3112	if (graph == `0`) {
3113	graph = pars_sql(info, sql);
3114	ut_a(graph);
3115	graph->trx = trx;
3116	}
3117
3118	que_thr_t* thr;
3119
3120	graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
3121
3122	ut_a(thr = que_fork_start_command(graph));
3123
3124	que_run_threads(thr);
3125
3126	DBUG_EXECUTE_IF("ib_import_internal_error",
3127	trx->error_state = DB_ERROR;);
3128
3129	err = trx->error_state;
3130
3131	if (err != DB_SUCCESS) {
3132	ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3133	ER_INTERNAL_ERROR,
3134	"While updating the <space, root page"
3135	" number> of index %s - %s",
3136	index->name (), ut_strerr(err));
3137
3138	break;
3139	}
3140	}
3141
3142	que_graph_free(graph);
3143
3144	if (!dict_locked) {
3145	mutex_exit(&dict_sys->mutex);
3146	}
3147
3148	return(err);
3149	}
3150
3151	/* Callback arg for row_import_set_discarded. /
3152	struct discard_t {
3153	ib_uint32_t flags2; /!< Value read from column /
3154	bool state; /!< New state of the flag /
3155	ulint n_recs; /!< Number of recs processed /
3156	};
3157
3158	/****************************************************************//**
3159	Fetch callback that sets or unsets the DISCARDED tablespace flag in
3160	SYS_TABLES. The flags is stored in MIX_LEN column.
3161	@return FALSE if all OK /*
3162	static
3163	ibool
3164	row_import_set_discarded(
3165	/=====================/
3166	void* row, /!< in: sel_node_t* /
3167	void* user_arg) /!< in: bool set/unset flag /
3168	{
3169	sel_node_t* node = static_cast<sel_node_t*>(row);
3170	discard_t* discard = static_cast<discard_t*>(user_arg);
3171	dfield_t* dfield = que_node_get_val(node->select_list);
3172	dtype_t* type = dfield_get_type(dfield);
3173	ulint len = dfield_get_len(dfield);
3174
3175	ut_a(dtype_get_mtype(type) == DATA_INT);
3176	ut_a(len == sizeof(ib_uint32_t));
3177
3178	ulint flags2 = mach_read_from_4(
3179	static_cast<byte*>(dfield_get_data(dfield)));
3180
3181	if (discard->state) {
3182	flags2 \|= DICT_TF2_DISCARDED;
3183	} else {
3184	flags2 &= ~DICT_TF2_DISCARDED;
3185	}
3186
3187	mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
3188
3189	++discard->n_recs;
3190
3191	/ There should be at most one matching record. /
3192	ut_a(discard->n_recs == `1`);
3193
3194	return(FALSE);
3195	}
3196
3197	/* Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.*
3198	@param[in,out] trx dictionary transaction
3199	@param[in] table_id table identifier
3200	@param[in] discarded whether to set or clear the flag
3201	@return DB_SUCCESS or error code /*
3202	dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
3203	bool discarded)
3204	{
3205	pars_info_t* info;
3206	discard_t discard;
3207
3208	static const char sql[] =
3209	"PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
3210	"DECLARE FUNCTION my_func;\n"
3211	"DECLARE CURSOR c IS\n"
3212	" SELECT MIX_LEN"
3213	" FROM SYS_TABLES"
3214	" WHERE ID = :table_id FOR UPDATE;"
3215	"\n"
3216	"BEGIN\n"
3217	"OPEN c;\n"
3218	"WHILE 1 = 1 LOOP\n"
3219	" FETCH c INTO my_func();\n"
3220	" IF c % NOTFOUND THEN\n"
3221	" EXIT;\n"
3222	" END IF;\n"
3223	"END LOOP;\n"
3224	"UPDATE SYS_TABLES"
3225	" SET MIX_LEN = :flags2"
3226	" WHERE ID = :table_id;\n"
3227	"CLOSE c;\n"
3228	"END;\n";
3229
3230	discard.n_recs = `0`;
3231	discard.state = discarded;
3232	discard.flags2 = ULINT32_UNDEFINED;
3233
3234	info = pars_info_create();
3235
3236	pars_info_add_ull_literal(info, "table_id", table_id);
3237	pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
3238
3239	pars_info_bind_function(
3240	info, "my_func", row_import_set_discarded, &discard);
3241
3242	dberr_t err = que_eval_sql(info, sql, false, trx);
3243
3244	ut_a(discard.n_recs == `1`);
3245	ut_a(discard.flags2 != ULINT32_UNDEFINED);
3246
3247	return(err);
3248	}
3249
3250	struct fil_iterator_t {
3251	pfs_os_file_t file; /!< File handle /
3252	const char* filepath; /!< File path name /
3253	os_offset_t start; /!< From where to start /
3254	os_offset_t end; /!< Where to stop /
3255	os_offset_t file_size; /!< File size in bytes /
3256	ulint n_io_buffers; /!< Number of pages to use*
3257	for IO /*
3258	byte* io_buffer; /!< Buffer to use for IO /
3259	fil_space_crypt_t crypt_data; /!< Crypt data (if encrypted) /*
3260	byte* crypt_io_buffer; /!< IO buffer when encrypted /
3261	};
3262
3263	/******************************************************************//**
3264	TODO: This can be made parallel trivially by chunking up the file and creating
3265	a callback per thread. . Main benefit will be to use multiple CPUs for
3266	checksums and compressed tables. We have to do compressed tables block by
3267	block right now. Secondly we need to decompress/compress and copy too much
3268	of data. These are CPU intensive.
3269
3270	Iterate over all the pages in the tablespace.
3271	@param iter - Tablespace iterator
3272	@param block - block to use for IO
3273	@param callback - Callback to inspect and update page contents
3274	@retval DB_SUCCESS or error code /*
3275	static
3276	dberr_t
3277	fil_iterate(
3278	/========/
3279	const fil_iterator_t& iter,
3280	buf_block_t* block,
3281	AbstractCallback& callback)
3282	{
3283	os_offset_t offset;
3284	const ulint size = callback.get_page_size().physical();
3285	ulint n_bytes = iter.n_io_buffers * size;
3286
3287	ut_ad(!srv_read_only_mode);
3288
3289	/ TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless*
3290	copying for non-index pages. Unfortunately, it is
3291	required by buf_zip_decompress() /*
3292
3293	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
3294	if (callback.is_interrupted()) {
3295	return DB_INTERRUPTED;
3296	}
3297
3298	byte* io_buffer = iter.io_buffer;
3299	block->frame = io_buffer;
3300
3301	if (block->page.zip.data) {
3302	/ Zip IO is done in the compressed page buffer. /
3303	io_buffer = block->page.zip.data;
3304	}
3305
3306	/ We have to read the exact number of bytes. Otherwise the*
3307	InnoDB IO functions croak on failed reads. /*
3308
3309	n_bytes = ulint(ut_min(os_offset_t(n_bytes),
3310	iter.end - offset));
3311
3312	ut_ad(n_bytes > `0`);
3313	ut_ad(!(n_bytes % size));
3314
3315	const bool encrypted = iter.crypt_data != NULL
3316	&& iter.crypt_data->should_encrypt();
3317	/ Use additional crypt io buffer if tablespace is encrypted /
3318	byte* const readptr = encrypted
3319	? iter.crypt_io_buffer : io_buffer;
3320	byte* const writeptr = readptr;
3321
3322	IORequest read_request(IORequest::READ);
3323	read_request.disable_partial_io_warnings();
3324
3325	dberr_t err = os_file_read_no_error_handling(
3326	read_request, iter.file, readptr, offset, n_bytes, `0`);
3327	if (err != DB_SUCCESS) {
3328	ib::error () << iter.filepath
3329	<< ": os_file_read() failed";
3330	}
3331
3332	bool updated = false;
3333	os_offset_t page_off = offset;
3334	ulint n_pages_read = n_bytes / size;
3335	block->page.id.set_page_no(ulint(page_off / size));
3336
3337	for (ulint i = `0`; i < n_pages_read;
3338	block->page.id.set_page_no(block->page.id.page_no() + `1`),
3339	++i, page_off += size, block->frame += size) {
3340	bool decrypted = false;
3341	err = DB_SUCCESS;
3342	byte* src = readptr + i * size;
3343	byte* dst = io_buffer + i * size;
3344	bool frame_changed = false;
3345	ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
3346	const bool page_compressed
3347	= page_type
3348	== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
3349	\|\| page_type == FIL_PAGE_PAGE_COMPRESSED;
3350	const ulint page_no = page_get_page_no(src);
3351	if (!page_no && page_off) {
3352	const ulint* b = reinterpret_cast<const ulint*>
3353	(src);
3354	const ulint* const e = b + size / sizeof *b;
3355	do {
3356	if (*b++) {
3357	goto page_corrupted;
3358	}
3359	} while (b != e);
3360
3361	/ Proceed to the next page,*
3362	because this one is all zero. /*
3363	continue;
3364	}
3365
3366	if (page_no != page_off / size) {
3367	goto page_corrupted;
3368	}
3369
3370	if (encrypted) {
3371	decrypted = fil_space_decrypt(
3372	iter.crypt_data, dst,
3373	callback.get_page_size(), src, &err);
3374
3375	if (err != DB_SUCCESS) {
3376	return err;
3377	}
3378
3379	if (decrypted) {
3380	updated = true;
3381	} else {
3382	if (!page_compressed
3383	&& !block->page.zip.data) {
3384	block->frame = src;
3385	frame_changed = true;
3386	} else {
3387	ut_ad(dst != src);
3388	memcpy(dst, src, size);
3389	}
3390	}
3391	}
3392
3393	/ If the original page is page_compressed, we need*
3394	to decompress it before adjusting further. /*
3395	if (page_compressed) {
3396	fil_decompress_page(NULL, dst, ulong(size),
3397	NULL);
3398	updated = true;
3399	} else if (buf_page_is_corrupted(
3400	false,
3401	encrypted && !frame_changed
3402	? dst : src,
3403	callback.get_page_size(), NULL)) {
3404	page_corrupted:
3405	ib::warn () << callback.filename()
3406	<< ": Page " << (offset / size)
3407	<< " at offset " << offset
3408	<< " looks corrupted.";
3409	return DB_CORRUPTION;
3410	}
3411
3412	if ((err = callback (page_off, block)) != DB_SUCCESS) {
3413	return err;
3414	} else if (!updated) {
3415	updated = buf_block_get_state(block)
3416	== BUF_BLOCK_FILE_PAGE;
3417	}
3418
3419	/ If tablespace is encrypted we use additional*
3420	temporary scratch area where pages are read
3421	for decrypting readptr == crypt_io_buffer != io_buffer.
3422
3423	Destination for decryption is a buffer pool block
3424	block->frame == dst == io_buffer that is updated.
3425	Pages that did not require decryption even when
3426	tablespace is marked as encrypted are not copied
3427	instead block->frame is set to src == readptr.
3428
3429	For encryption we again use temporary scratch area
3430	writeptr != io_buffer == dst
3431	that is then written to the tablespace
3432
3433	(1) For normal tables io_buffer == dst == writeptr
3434	(2) For only page compressed tables
3435	io_buffer == dst == writeptr
3436	(3) For encrypted (and page compressed)
3437	readptr != io_buffer == dst != writeptr
3438	*/
3439
3440	ut_ad(!encrypted && !page_compressed ?
3441	src == dst && dst == writeptr + (i * size):`1`);
3442	ut_ad(page_compressed && !encrypted ?
3443	src == dst && dst == writeptr + (i * size):`1`);
3444	ut_ad(encrypted ?
3445	src != dst && dst != writeptr + (i * size):`1`);
3446
3447	/ When tablespace is encrypted or compressed its*
3448	first page (i.e. page 0) is not encrypted or
3449	compressed and there is no need to copy frame. /*
3450	if (encrypted && block->page.id.page_no() != `0`) {
3451	byte *local_frame = callback.get_frame(block);
3452	ut_ad((writeptr + (i * size)) != local_frame);
3453	memcpy((writeptr + (i * size)), local_frame, size);
3454	}
3455
3456	if (frame_changed) {
3457	block->frame = dst;
3458	}
3459
3460	src = io_buffer + (i * size);
3461
3462	if (page_compressed) {
3463	ulint len = `0`;
3464
3465	fil_compress_page(
3466	NULL,
3467	src,
3468	NULL,
3469	size,
3470	`0`,/ FIXME: compression level /
3471	`512`,/ FIXME: use proper block size /
3472	encrypted,
3473	&len);
3474	ut_ad(len <= size);
3475	memset(src + len, `0`, size - len);
3476	updated = true;
3477	}
3478
3479	/ Encrypt the page if encryption was used. /
3480	if (encrypted && decrypted) {
3481	byte dest = writeptr + i size;
3482	byte* tmp = fil_encrypt_buf(
3483	iter.crypt_data,
3484	block->page.id.space(),
3485	block->page.id.page_no(),
3486	mach_read_from_8(src + FIL_PAGE_LSN),
3487	src, callback.get_page_size(), dest);
3488
3489	if (tmp == src) {
3490	/ TODO: remove unnecessary memcpy's /
3491	ut_ad(dest != src);
3492	memcpy(dest, src, size);
3493	}
3494
3495	updated = true;
3496	}
3497	}
3498
3499	/ A page was updated in the set, write back to disk. /
3500	if (updated) {
3501	IORequest write_request(IORequest::WRITE);
3502
3503	err = os_file_write(write_request,
3504	iter.filepath, iter.file,
3505	writeptr, offset, n_bytes);
3506
3507	if (err != DB_SUCCESS) {
3508	return err;
3509	}
3510	}
3511	}
3512
3513	return DB_SUCCESS;
3514	}
3515
3516	/******************************************************************//**
3517	Iterate over all the pages in the tablespace.
3518	@param table - the table definiton in the server
3519	@param n_io_buffers - number of blocks to read and write together
3520	@param callback - functor that will do the page updates
3521	@return DB_SUCCESS or error code /*
3522	static
3523	dberr_t
3524	fil_tablespace_iterate(
3525	/===================/
3526	dict_table_t* table,
3527	ulint n_io_buffers,
3528	AbstractCallback& callback)
3529	{
3530	dberr_t err;
3531	pfs_os_file_t file;
3532	char* filepath;
3533
3534	ut_a(n_io_buffers > `0`);
3535	ut_ad(!srv_read_only_mode);
3536
3537	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
3538	return(DB_CORRUPTION););
3539
3540	/ Make sure the data_dir_path is set. /
3541	dict_get_and_save_data_dir_path(table, false);
3542
3543	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3544	ut_a(table->data_dir_path);
3545
3546	filepath = fil_make_filepath(
3547	table->data_dir_path, table->name.m_name, IBD, true);
3548	} else {
3549	filepath = fil_make_filepath(
3550	NULL, table->name.m_name, IBD, false);
3551	}
3552
3553	if (!filepath) {
3554	return(DB_OUT_OF_MEMORY);
3555	} else {
3556	bool success;
3557
3558	file = os_file_create_simple_no_error_handling(
3559	innodb_data_file_key, filepath,
3560	OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
3561
3562	if (!success) {
3563	/ The following call prints an error message /
3564	os_file_get_last_error(true);
3565	ib::error () << "Trying to import a tablespace,"
3566	" but could not open the tablespace file "
3567	<< filepath;
3568	ut_free(filepath);
3569	return DB_TABLESPACE_NOT_FOUND;
3570	} else {
3571	err = DB_SUCCESS;
3572	}
3573	}
3574
3575	callback.set_file(filepath, file);
3576
3577	os_offset_t file_size = os_file_get_size(file);
3578	ut_a(file_size != (os_offset_t) -`1`);
3579
3580	/ Allocate a page to read in the tablespace header, so that we*
3581	can determine the page size and zip_size (if it is compressed).
3582	We allocate an extra page in case it is a compressed table. One
3583	page is to ensure alignement. /*
3584
3585	void* page_ptr = ut_malloc_nokey(`3U` << srv_page_size_shift);
3586	byte* page = static_cast<byte*>(ut_align(page_ptr, srv_page_size));
3587
3588	buf_block_t* block = reinterpret_cast<buf_block_t*>
3589	(ut_zalloc_nokey(sizeof *block));
3590	block->frame = page;
3591	block->page.id.copy_from(page_id_t (`0`, `0`));
3592	block->page.io_fix = BUF_IO_NONE;
3593	block->page.buf_fix_count = `1`;
3594	block->page.state = BUF_BLOCK_FILE_PAGE;
3595
3596	/ Read the first page and determine the page and zip size. /
3597
3598	IORequest request(IORequest::READ);
3599	request.disable_partial_io_warnings();
3600
3601	err = os_file_read_no_error_handling(request, file, page, `0`,
3602	srv_page_size, `0`);
3603
3604	if (err == DB_SUCCESS) {
3605	err = callback.init(file_size, block);
3606	}
3607
3608	if (err == DB_SUCCESS) {
3609	block->page.id.copy_from(
3610	page_id_t (callback.get_space_id(), `0`));
3611	block->page.size.copy_from(callback.get_page_size());
3612	if (block->page.size.is_compressed()) {
3613	page_zip_set_size(&block->page.zip,
3614	callback.get_page_size().physical());
3615	/ ROW_FORMAT=COMPRESSED is not optimised for block IO*
3616	for now. We do the IMPORT page by page. /*
3617	n_io_buffers = `1`;
3618	}
3619
3620	fil_iterator_t iter;
3621
3622	/ read (optional) crypt data /
3623	iter.crypt_data = fil_space_read_crypt_data(
3624	callback.get_page_size(), page);
3625
3626	/ If tablespace is encrypted, it needs extra buffers /
3627	if (iter.crypt_data && n_io_buffers > `1`) {
3628	/ decrease io buffers so that memory*
3629	consumption will not double /*
3630	n_io_buffers /= `2`;
3631	}
3632
3633	iter.file = file;
3634	iter.start = `0`;
3635	iter.end = file_size;
3636	iter.filepath = filepath;
3637	iter.file_size = file_size;
3638	iter.n_io_buffers = n_io_buffers;
3639
3640	/ Add an extra page for compressed page scratch area. /
3641	void* io_buffer = ut_malloc_nokey(
3642	(`2` + iter.n_io_buffers) << srv_page_size_shift);
3643
3644	iter.io_buffer = static_cast<byte*>(
3645	ut_align(io_buffer, srv_page_size));
3646
3647	void* crypt_io_buffer = NULL;
3648	if (iter.crypt_data) {
3649	crypt_io_buffer = ut_malloc_nokey(
3650	(`2` + iter.n_io_buffers)
3651	<< srv_page_size_shift);
3652	iter.crypt_io_buffer = static_cast<byte*>(
3653	ut_align(crypt_io_buffer, srv_page_size));
3654	}
3655
3656	if (block->page.zip.ssize) {
3657	ut_ad(iter.n_io_buffers == `1`);
3658	block->frame = iter.io_buffer;
3659	block->page.zip.data = block->frame + srv_page_size;
3660	}
3661
3662	err = fil_iterate(iter, block, callback);
3663
3664	if (iter.crypt_data) {
3665	fil_space_destroy_crypt_data(&iter.crypt_data);
3666	}
3667
3668	ut_free(crypt_io_buffer);
3669	ut_free(io_buffer);
3670	}
3671
3672	if (err == DB_SUCCESS) {
3673	ib::info () << "Sync to disk";
3674
3675	if (!os_file_flush(file)) {
3676	ib::info () << "os_file_flush() failed!";
3677	err = DB_IO_ERROR;
3678	} else {
3679	ib::info () << "Sync to disk - done!";
3680	}
3681	}
3682
3683	os_file_close(file);
3684
3685	ut_free(page_ptr);
3686	ut_free(filepath);
3687	ut_free(block);
3688
3689	return(err);
3690	}
3691
3692	/***************************************************************//**
3693	Imports a tablespace. The space id in the .ibd file must match the space id
3694	of the table in the data dictionary.
3695	@return error code or DB_SUCCESS /*
3696	dberr_t
3697	row_import_for_mysql(
3698	/=================/
3699	dict_table_t* table, /!< in/out: table /
3700	row_prebuilt_t* prebuilt) /!< in: prebuilt struct in MySQL /
3701	{
3702	dberr_t err;
3703	trx_t* trx;
3704	ib_uint64_t autoinc = `0`;
3705	char* filepath = NULL;
3706	ulint space_flags MY_ATTRIBUTE((unused));
3707
3708	/ The caller assured that this is not read_only_mode and that no*
3709	temorary tablespace is being imported. /*
3710	ut_ad(!srv_read_only_mode);
3711	ut_ad(!table->is_temporary());
3712
3713	ut_ad(table->space_id);
3714	ut_ad(table->space_id < SRV_LOG_SPACE_FIRST_ID);
3715	ut_ad(prebuilt->trx);
3716	ut_ad(!table->is_readable());
3717
3718	ibuf_delete_for_discarded_space(table->space_id);
3719
3720	trx_start_if_not_started(prebuilt->trx, true);
3721
3722	trx = trx_create();
3723
3724	/ So that the table is not DROPped during recovery. /
3725	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
3726
3727	trx_start_if_not_started(trx, true);
3728
3729	/ So that we can send error messages to the user. /
3730	trx->mysql_thd = prebuilt->trx->mysql_thd;
3731
3732	/ Ensure that the table will be dropped by trx_rollback_active()*
3733	in case of a crash. /*
3734
3735	trx->table_id = table->id;
3736
3737	/ Assign an undo segment for the transaction, so that the*
3738	transaction will be recovered after a crash. /*
3739
3740	/ TODO: Do not write any undo log for the IMPORT cleanup. /
3741	{
3742	mtr_t mtr;
3743	mtr.start();
3744	trx_undo_assign(trx, &err, &mtr);
3745	mtr.commit();
3746	}
3747
3748	DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
3749	err = DB_TOO_MANY_CONCURRENT_TRXS;);
3750
3751	if (err != DB_SUCCESS) {
3752
3753	return(row_import_cleanup(prebuilt, trx, err));
3754
3755	} else if (trx->rsegs.m_redo.undo == `0`) {
3756
3757	err = DB_TOO_MANY_CONCURRENT_TRXS;
3758	return(row_import_cleanup(prebuilt, trx, err));
3759	}
3760
3761	prebuilt->trx->op_info = "read meta-data file";
3762
3763	/ Prevent DDL operations while we are checking. /
3764
3765	rw_lock_s_lock_func(dict_operation_lock, `0`, __FILE__, __LINE__);
3766
3767	row_import cfg;
3768
3769	memset(&cfg, `0x0`, sizeof(cfg));
3770
3771	err = row_import_read_cfg(table, trx->mysql_thd, cfg);
3772
3773	/ Check if the table column definitions match the contents*
3774	of the config file. /*
3775
3776	if (err == DB_SUCCESS) {
3777
3778	/ We have a schema file, try and match it with our*
3779	data dictionary. /*
3780
3781	err = cfg.match_schema(trx->mysql_thd);
3782
3783	/ Update index->page and SYS_INDEXES.PAGE_NO to match the*
3784	B-tree root page numbers in the tablespace. Use the index
3785	name from the .cfg file to find match. /*
3786
3787	if (err == DB_SUCCESS) {
3788	cfg.set_root_by_name();
3789	autoinc = cfg.m_autoinc;
3790	}
3791
3792	rw_lock_s_unlock_gen(dict_operation_lock, `0`);
3793
3794	DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
3795	err = DB_TOO_MANY_CONCURRENT_TRXS;);
3796
3797	} else if (cfg.m_missing) {
3798
3799	rw_lock_s_unlock_gen(dict_operation_lock, `0`);
3800
3801	/ We don't have a schema file, we will have to discover*
3802	the index root pages from the .ibd file and skip the schema
3803	matching step. /*
3804
3805	ut_a(err == DB_FAIL);
3806
3807	cfg.m_page_size.copy_from(univ_page_size);
3808
3809	FetchIndexRootPages fetchIndexRootPages(table, trx);
3810
3811	err = fil_tablespace_iterate(
3812	table, IO_BUFFER_SIZE(cfg.m_page_size.physical()),
3813	fetchIndexRootPages);
3814
3815	if (err == DB_SUCCESS) {
3816
3817	err = fetchIndexRootPages.build_row_import(&cfg);
3818
3819	/ Update index->page and SYS_INDEXES.PAGE_NO*
3820	to match the B-tree root page numbers in the
3821	tablespace. /*
3822
3823	if (err == DB_SUCCESS) {
3824	err = cfg.set_root_by_heuristic();
3825	}
3826	}
3827
3828	space_flags = fetchIndexRootPages.get_space_flags();
3829
3830	} else {
3831	rw_lock_s_unlock_gen(dict_operation_lock, `0`);
3832	}
3833
3834	if (err != DB_SUCCESS) {
3835	return(row_import_error(prebuilt, trx, err));
3836	}
3837
3838	prebuilt->trx->op_info = "importing tablespace";
3839
3840	ib::info () << "Phase I - Update all pages";
3841
3842	/ Iterate over all the pages and do the sanity checking and*
3843	the conversion required to import the tablespace. /*
3844
3845	PageConverter converter(&cfg, table->space_id, trx);
3846
3847	/ Set the IO buffer size in pages. /
3848
3849	err = fil_tablespace_iterate(
3850	table, IO_BUFFER_SIZE(cfg.m_page_size.physical()), converter);
3851
3852	DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
3853	err = DB_TOO_MANY_CONCURRENT_TRXS;);
3854
3855	if (err != DB_SUCCESS) {
3856	char table_name[MAX_FULL_NAME_LEN + `1`];
3857
3858	innobase_format_name(
3859	table_name, sizeof(table_name),
3860	table->name.m_name);
3861
3862	if (err != DB_DECRYPTION_FAILED) {
3863
3864	ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3865	ER_INTERNAL_ERROR,
3866	"Cannot reset LSNs in table %s : %s",
3867	table_name, ut_strerr(err));
3868	}
3869
3870	return(row_import_cleanup(prebuilt, trx, err));
3871	}
3872
3873	row_mysql_lock_data_dictionary(trx);
3874
3875	/ If the table is stored in a remote tablespace, we need to*
3876	determine that filepath from the link file and system tables.
3877	Find the space ID in SYS_TABLES since this is an ALTER TABLE. /*
3878	dict_get_and_save_data_dir_path(table, true);
3879
3880	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3881	ut_a(table->data_dir_path);
3882
3883	filepath = fil_make_filepath(
3884	table->data_dir_path, table->name.m_name, IBD, true);
3885	} else {
3886	filepath = fil_make_filepath(
3887	NULL, table->name.m_name, IBD, false);
3888	}
3889
3890	DBUG_EXECUTE_IF(
3891	"ib_import_OOM_15",
3892	ut_free(filepath);
3893	filepath = NULL;
3894	);
3895
3896	if (filepath == NULL) {
3897	row_mysql_unlock_data_dictionary(trx);
3898	return(row_import_cleanup(prebuilt, trx, DB_OUT_OF_MEMORY));
3899	}
3900
3901	/ Open the tablespace so that we can access via the buffer pool.*
3902	We set the 2nd param (fix_dict = true) here because we already
3903	have an x-lock on dict_operation_lock and dict_sys->mutex.
3904	The tablespace is initially opened as a temporary one, because
3905	we will not be writing any redo log for it before we have invoked
3906	fil_space_t::set_imported() to declare it a persistent tablespace. /*
3907
3908	ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
3909
3910	table->space = fil_ibd_open(
3911	true, true, FIL_TYPE_IMPORT, table->space_id,
3912	fsp_flags, table->name, filepath, &err);
3913
3914	ut_ad((table->space == NULL) == (err != DB_SUCCESS));
3915	DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
3916	err = DB_TABLESPACE_NOT_FOUND; table->space = NULL;);
3917
3918	if (!table->space) {
3919	row_mysql_unlock_data_dictionary(trx);
3920
3921	ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
3922	ER_GET_ERRMSG,
3923	err, ut_strerr(err), filepath);
3924
3925	ut_free(filepath);
3926
3927	return(row_import_cleanup(prebuilt, trx, err));
3928	}
3929
3930	row_mysql_unlock_data_dictionary(trx);
3931
3932	ut_free(filepath);
3933
3934	err = ibuf_check_bitmap_on_import(trx, table->space);
3935
3936	DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
3937
3938	if (err != DB_SUCCESS) {
3939	return(row_import_cleanup(prebuilt, trx, err));
3940	}
3941
3942	/ The first index must always be the clustered index. /
3943
3944	dict_index_t* index = dict_table_get_first_index(table);
3945
3946	if (!dict_index_is_clust(index)) {
3947	return(row_import_error(prebuilt, trx, DB_CORRUPTION));
3948	}
3949
3950	/ Update the Btree segment headers for index node and*
3951	leaf nodes in the root page. Set the new space id. /*
3952
3953	err = btr_root_adjust_on_import(index);
3954
3955	DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
3956	err = DB_CORRUPTION;);
3957
3958	if (err != DB_SUCCESS) {
3959	return(row_import_error(prebuilt, trx, err));
3960	} else if (cfg.requires_purge(index->name)) {
3961
3962	/ Purge any delete-marked records that couldn't be*
3963	purged during the page conversion phase from the
3964	cluster index. /*
3965
3966	IndexPurge purge(trx, index);
3967
3968	trx->op_info = "cluster: purging delete marked records";
3969
3970	err = purge.garbage_collect();
3971
3972	trx->op_info = "";
3973	}
3974
3975	DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
3976
3977	if (err != DB_SUCCESS) {
3978	return(row_import_error(prebuilt, trx, err));
3979	}
3980
3981	/ For secondary indexes, purge any records that couldn't be purged*
3982	during the page conversion phase. /*
3983
3984	err = row_import_adjust_root_pages_of_secondary_indexes(
3985	trx, table, cfg);
3986
3987	DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
3988	err = DB_CORRUPTION;);
3989
3990	if (err != DB_SUCCESS) {
3991	return(row_import_error(prebuilt, trx, err));
3992	}
3993
3994	/ Ensure that the next available DB_ROW_ID is not smaller than*
3995	any DB_ROW_ID stored in the table. /*
3996
3997	if (prebuilt->clust_index_was_generated) {
3998
3999	err = row_import_set_sys_max_row_id(prebuilt, table);
4000
4001	if (err != DB_SUCCESS) {
4002	return(row_import_error(prebuilt, trx, err));
4003	}
4004	}
4005
4006	ib::info () << "Phase III - Flush changes to disk";
4007
4008	/ Ensure that all pages dirtied during the IMPORT make it to disk.*
4009	The only dirty pages generated should be from the pessimistic purge
4010	of delete marked records that couldn't be purged in Phase I. /*
4011
4012	{
4013	FlushObserver observer(prebuilt->table->space, trx, NULL);
4014	buf_LRU_flush_or_remove_pages(prebuilt->table->space_id,
4015	&observer);
4016
4017	if (observer.is_interrupted()) {
4018	ib::info () << "Phase III - Flush interrupted";
4019	return(row_import_error(prebuilt, trx,
4020	DB_INTERRUPTED));
4021	}
4022	}
4023
4024	ib::info () << "Phase IV - Flush complete";
4025	prebuilt->table->space->set_imported();
4026
4027	/ The dictionary latches will be released in in row_import_cleanup()*
4028	after the transaction commit, for both success and error. /*
4029
4030	row_mysql_lock_data_dictionary(trx);
4031
4032	/ Update the root pages of the table's indexes. /
4033	err = row_import_update_index_root(trx, table, false, true);
4034
4035	if (err != DB_SUCCESS) {
4036	return(row_import_error(prebuilt, trx, err));
4037	}
4038
4039	err = row_import_update_discarded_flag(trx, table->id, false);
4040
4041	if (err != DB_SUCCESS) {
4042	return(row_import_error(prebuilt, trx, err));
4043	}
4044
4045	table->file_unreadable = false;
4046	table->flags2 &= ~DICT_TF2_DISCARDED;
4047
4048	/ Set autoinc value read from .cfg file, if one was specified.*
4049	Otherwise, keep the PAGE_ROOT_AUTO_INC as is. /*
4050	if (autoinc) {
4051	ib::info () << table->name << " autoinc value set to "
4052	<< autoinc;
4053
4054	table->autoinc = autoinc--;
4055	btr_write_autoinc(dict_table_get_first_index(table), autoinc);
4056	}
4057
4058	return(row_import_cleanup(prebuilt, trx, err));
4059	}
4060

Browse the source code of MariaDB/storage/innobase/row/row0import.cc