1/*****************************************************************************
2
3Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2016, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file dict/dict0boot.cc
22Data dictionary creation and booting
23
24Created 4/18/1996 Heikki Tuuri
25*******************************************************/
26
27#include "ha_prototypes.h"
28
29#include "dict0boot.h"
30#include "dict0crea.h"
31#include "btr0btr.h"
32#include "dict0load.h"
33#include "trx0trx.h"
34#include "srv0srv.h"
35#include "ibuf0ibuf.h"
36#include "buf0flu.h"
37#include "log0recv.h"
38#include "os0file.h"
39
40/**********************************************************************//**
41Gets a pointer to the dictionary header and x-latches its page.
42@return pointer to the dictionary header, page x-latched */
43dict_hdr_t*
44dict_hdr_get(
45/*=========*/
46 mtr_t* mtr) /*!< in: mtr */
47{
48 buf_block_t* block;
49 dict_hdr_t* header;
50
51 block = buf_page_get(page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO),
52 univ_page_size, RW_X_LATCH, mtr);
53 header = DICT_HDR + buf_block_get_frame(block);
54
55 buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
56
57 return(header);
58}
59
60/**********************************************************************//**
61Returns a new table, index, or space id. */
62void
63dict_hdr_get_new_id(
64/*================*/
65 table_id_t* table_id, /*!< out: table id
66 (not assigned if NULL) */
67 index_id_t* index_id, /*!< out: index id
68 (not assigned if NULL) */
69 ulint* space_id, /*!< out: space id
70 (not assigned if NULL) */
71 const dict_table_t* table, /*!< in: table */
72 bool disable_redo) /*!< in: if true and table
73 object is NULL
74 then disable-redo */
75{
76 dict_hdr_t* dict_hdr;
77 ib_id_t id;
78 mtr_t mtr;
79
80 mtr_start(&mtr);
81 if (table) {
82 if (table->is_temporary()) {
83 mtr.set_log_mode(MTR_LOG_NO_REDO);
84 }
85 } else if (disable_redo) {
86 /* In non-read-only mode we need to ensure that space-id header
87 page is written to disk else if page is removed from buffer
88 cache and re-loaded it would assign temporary tablespace id
89 to another tablespace.
90 This is not a case with read-only mode as there is no new object
91 that is created except temporary tablespace. */
92 mtr.set_log_mode(srv_read_only_mode
93 ? MTR_LOG_NONE : MTR_LOG_NO_REDO);
94 }
95
96 /* Server started and let's say space-id = x
97 - table created with file-per-table
98 - space-id = x + 1
99 - crash
100 Case 1: If it was redo logged then we know that it will be
101 restored to x + 1
102 Case 2: if not redo-logged
103 Header will have the old space-id = x
104 This is OK because on restart there is no object with
105 space id = x + 1
106 Case 3:
107 space-id = x (on start)
108 space-id = x+1 (temp-table allocation) - no redo logging
109 space-id = x+2 (non-temp-table allocation), this get's
110 redo logged.
111 If there is a crash there will be only 2 entries
112 x (original) and x+2 (new) and disk hdr will be updated
113 to reflect x + 2 entry.
114 We cannot allocate the same space id to different objects. */
115 dict_hdr = dict_hdr_get(&mtr);
116
117 if (table_id) {
118 id = mach_read_from_8(dict_hdr + DICT_HDR_TABLE_ID);
119 id++;
120 mlog_write_ull(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr);
121 *table_id = id;
122 }
123
124 if (index_id) {
125 id = mach_read_from_8(dict_hdr + DICT_HDR_INDEX_ID);
126 id++;
127 mlog_write_ull(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr);
128 *index_id = id;
129 }
130
131 if (space_id) {
132 *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
133 MLOG_4BYTES, &mtr);
134 if (fil_assign_new_space_id(space_id)) {
135 mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
136 *space_id, MLOG_4BYTES, &mtr);
137 }
138 }
139
140 mtr_commit(&mtr);
141}
142
143/**********************************************************************//**
144Writes the current value of the row id counter to the dictionary header file
145page. */
146void
147dict_hdr_flush_row_id(void)
148/*=======================*/
149{
150 dict_hdr_t* dict_hdr;
151 row_id_t id;
152 mtr_t mtr;
153
154 ut_ad(mutex_own(&dict_sys->mutex));
155
156 id = dict_sys->row_id;
157
158 mtr_start(&mtr);
159
160 dict_hdr = dict_hdr_get(&mtr);
161
162 mlog_write_ull(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
163
164 mtr_commit(&mtr);
165}
166
167/*****************************************************************//**
168Creates the file page for the dictionary header. This function is
169called only at the database creation.
170@return TRUE if succeed */
171static
172ibool
173dict_hdr_create(
174/*============*/
175 mtr_t* mtr) /*!< in: mtr */
176{
177 buf_block_t* block;
178 dict_hdr_t* dict_header;
179 ulint root_page_no;
180
181 ut_ad(mtr);
182 compile_time_assert(DICT_HDR_SPACE == 0);
183
184 /* Create the dictionary header file block in a new, allocated file
185 segment in the system tablespace */
186 block = fseg_create(fil_system.sys_space, 0,
187 DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
188
189 ut_a(DICT_HDR_PAGE_NO == block->page.id.page_no());
190
191 dict_header = dict_hdr_get(mtr);
192
193 /* Start counting row, table, index, and tree ids from
194 DICT_HDR_FIRST_ID */
195 mlog_write_ull(dict_header + DICT_HDR_ROW_ID,
196 DICT_HDR_FIRST_ID, mtr);
197
198 mlog_write_ull(dict_header + DICT_HDR_TABLE_ID,
199 DICT_HDR_FIRST_ID, mtr);
200
201 mlog_write_ull(dict_header + DICT_HDR_INDEX_ID,
202 DICT_HDR_FIRST_ID, mtr);
203
204 mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID,
205 0, MLOG_4BYTES, mtr);
206
207 /* Obsolete, but we must initialize it anyway. */
208 mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW,
209 DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr);
210
211 /* Create the B-tree roots for the clustered indexes of the basic
212 system tables */
213
214 /*--------------------------*/
215 root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
216 fil_system.sys_space, DICT_TABLES_ID,
217 dict_ind_redundant, NULL, mtr);
218 if (root_page_no == FIL_NULL) {
219
220 return(FALSE);
221 }
222
223 mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
224 MLOG_4BYTES, mtr);
225 /*--------------------------*/
226 root_page_no = btr_create(DICT_UNIQUE,
227 fil_system.sys_space, DICT_TABLE_IDS_ID,
228 dict_ind_redundant, NULL, mtr);
229 if (root_page_no == FIL_NULL) {
230
231 return(FALSE);
232 }
233
234 mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
235 MLOG_4BYTES, mtr);
236 /*--------------------------*/
237 root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
238 fil_system.sys_space, DICT_COLUMNS_ID,
239 dict_ind_redundant, NULL, mtr);
240 if (root_page_no == FIL_NULL) {
241
242 return(FALSE);
243 }
244
245 mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
246 MLOG_4BYTES, mtr);
247 /*--------------------------*/
248 root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
249 fil_system.sys_space, DICT_INDEXES_ID,
250 dict_ind_redundant, NULL, mtr);
251 if (root_page_no == FIL_NULL) {
252
253 return(FALSE);
254 }
255
256 mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
257 MLOG_4BYTES, mtr);
258 /*--------------------------*/
259 root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
260 fil_system.sys_space, DICT_FIELDS_ID,
261 dict_ind_redundant, NULL, mtr);
262 if (root_page_no == FIL_NULL) {
263
264 return(FALSE);
265 }
266
267 mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
268 MLOG_4BYTES, mtr);
269 /*--------------------------*/
270
271 return(TRUE);
272}
273
274/*****************************************************************//**
275Initializes the data dictionary memory structures when the database is
276started. This function is also called when the data dictionary is created.
277@return DB_SUCCESS or error code. */
278dberr_t
279dict_boot(void)
280/*===========*/
281{
282 dict_table_t* table;
283 dict_index_t* index;
284 dict_hdr_t* dict_hdr;
285 mem_heap_t* heap;
286 mtr_t mtr;
287
288 /* Be sure these constants do not ever change. To avoid bloat,
289 only check the *NUM_FIELDS* in each table */
290
291 ut_ad(DICT_NUM_COLS__SYS_TABLES == 8);
292 ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10);
293 ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2);
294 ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7);
295 ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9);
296 ut_ad(DICT_NUM_COLS__SYS_INDEXES == 8);
297 ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 10);
298 ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3);
299 ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5);
300 ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4);
301 ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6);
302 ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2);
303 ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4);
304 ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6);
305
306 mtr_start(&mtr);
307
308 /* Create the hash tables etc. */
309 dict_init();
310
311 heap = mem_heap_create(450);
312
313 mutex_enter(&dict_sys->mutex);
314
315 /* Get the dictionary header */
316 dict_hdr = dict_hdr_get(&mtr);
317
318 /* Because we only write new row ids to disk-based data structure
319 (dictionary header) when it is divisible by
320 DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
321 the latest value of the row id counter. Therefore we advance
322 the counter at the database startup to avoid overlapping values.
323 Note that when a user after database startup first time asks for
324 a new row id, then because the counter is now divisible by
325 ..._MARGIN, it will immediately be updated to the disk-based
326 header. */
327
328 dict_sys->row_id = DICT_HDR_ROW_ID_WRITE_MARGIN
329 + ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID),
330 DICT_HDR_ROW_ID_WRITE_MARGIN);
331
332 /* Insert into the dictionary cache the descriptions of the basic
333 system tables */
334 /*-------------------------*/
335 table = dict_mem_table_create("SYS_TABLES", fil_system.sys_space,
336 8, 0, 0, 0);
337
338 dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0,
339 MAX_FULL_NAME_LEN);
340 dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
341 /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
342 dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
343 /* The low order bit of TYPE is always set to 1. If ROW_FORMAT
344 is not REDUNDANT or COMPACT, this field matches table->flags. */
345 dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
346 dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
347 /* MIX_LEN may contain additional table flags when
348 ROW_FORMAT!=REDUNDANT. */
349 dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
350 dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
351 dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
352
353 table->id = DICT_TABLES_ID;
354
355 dict_table_add_system_columns(table, heap);
356 table->add_to_cache();
357 dict_sys->sys_tables = table;
358 mem_heap_empty(heap);
359
360 index = dict_mem_index_create(table, "CLUST_IND",
361 DICT_UNIQUE | DICT_CLUSTERED, 1);
362
363 dict_mem_index_add_field(index, "NAME", 0);
364
365 index->id = DICT_TABLES_ID;
366 index = dict_index_add_to_cache(
367 index, mach_read_from_4(dict_hdr + DICT_HDR_TABLES));
368 ut_a(index);
369 ut_ad(!table->is_instant());
370 table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
371 unsigned(table->indexes.start->n_nullable));
372
373 /*-------------------------*/
374 index = dict_mem_index_create(table, "ID_IND", DICT_UNIQUE, 1);
375 dict_mem_index_add_field(index, "ID", 0);
376
377 index->id = DICT_TABLE_IDS_ID;
378 index = dict_index_add_to_cache(
379 index, mach_read_from_4(dict_hdr + DICT_HDR_TABLE_IDS));
380 ut_a(index);
381
382 /*-------------------------*/
383 table = dict_mem_table_create("SYS_COLUMNS", fil_system.sys_space,
384 7, 0, 0, 0);
385
386 dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
387 dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
388 dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
389 dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
390 dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
391 dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
392 dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
393
394 table->id = DICT_COLUMNS_ID;
395
396 dict_table_add_system_columns(table, heap);
397 table->add_to_cache();
398 dict_sys->sys_columns = table;
399 mem_heap_empty(heap);
400
401 index = dict_mem_index_create(table, "CLUST_IND",
402 DICT_UNIQUE | DICT_CLUSTERED, 2);
403
404 dict_mem_index_add_field(index, "TABLE_ID", 0);
405 dict_mem_index_add_field(index, "POS", 0);
406
407 index->id = DICT_COLUMNS_ID;
408 index = dict_index_add_to_cache(
409 index, mach_read_from_4(dict_hdr + DICT_HDR_COLUMNS));
410 ut_a(index);
411 ut_ad(!table->is_instant());
412 table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
413 unsigned(table->indexes.start->n_nullable));
414
415 /*-------------------------*/
416 table = dict_mem_table_create("SYS_INDEXES", fil_system.sys_space,
417 DICT_NUM_COLS__SYS_INDEXES, 0, 0, 0);
418
419 dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
420 dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
421 dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
422 dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
423 dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
424 /* SYS_INDEXES.SPACE is redundant and not being read;
425 SYS_TABLES.SPACE is being used instead. */
426 dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
427 dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
428 dict_mem_table_add_col(table, heap, "MERGE_THRESHOLD", DATA_INT, 0, 4);
429
430 table->id = DICT_INDEXES_ID;
431
432 dict_table_add_system_columns(table, heap);
433 /* The column SYS_INDEXES.MERGE_THRESHOLD was "instantly"
434 added in MySQL 5.7 and MariaDB 10.2.2. Assign it DEFAULT NULL.
435 Because of file format compatibility, we must treat SYS_INDEXES
436 as a special case, relaxing some debug assertions
437 for DICT_INDEXES_ID. */
438 dict_table_get_nth_col(table, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD)
439 ->def_val.len = UNIV_SQL_NULL;
440 table->add_to_cache();
441 dict_sys->sys_indexes = table;
442 mem_heap_empty(heap);
443
444 index = dict_mem_index_create(table, "CLUST_IND",
445 DICT_UNIQUE | DICT_CLUSTERED, 2);
446
447 dict_mem_index_add_field(index, "TABLE_ID", 0);
448 dict_mem_index_add_field(index, "ID", 0);
449
450 index->id = DICT_INDEXES_ID;
451 index = dict_index_add_to_cache(
452 index, mach_read_from_4(dict_hdr + DICT_HDR_INDEXES));
453 ut_a(index);
454 ut_ad(!table->is_instant());
455 table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
456 unsigned(table->indexes.start->n_nullable));
457
458 /*-------------------------*/
459 table = dict_mem_table_create("SYS_FIELDS", fil_system.sys_space,
460 3, 0, 0, 0);
461
462 dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 8);
463 dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
464 dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
465
466 table->id = DICT_FIELDS_ID;
467
468 dict_table_add_system_columns(table, heap);
469 table->add_to_cache();
470 dict_sys->sys_fields = table;
471 mem_heap_free(heap);
472
473 index = dict_mem_index_create(table, "CLUST_IND",
474 DICT_UNIQUE | DICT_CLUSTERED, 2);
475
476 dict_mem_index_add_field(index, "INDEX_ID", 0);
477 dict_mem_index_add_field(index, "POS", 0);
478
479 index->id = DICT_FIELDS_ID;
480 index = dict_index_add_to_cache(
481 index, mach_read_from_4(dict_hdr + DICT_HDR_FIELDS));
482 ut_a(index);
483 ut_ad(!table->is_instant());
484 table->indexes.start->n_core_null_bytes = UT_BITS_IN_BYTES(
485 unsigned(table->indexes.start->n_nullable));
486
487 mtr_commit(&mtr);
488
489 /*-------------------------*/
490
491 /* Initialize the insert buffer table and index for each tablespace */
492
493 dberr_t err = DB_SUCCESS;
494
495 err = ibuf_init_at_db_start();
496
497 if (err == DB_SUCCESS) {
498 if (srv_read_only_mode
499 && srv_force_recovery != SRV_FORCE_NO_LOG_REDO
500 && !ibuf_is_empty()) {
501
502 if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
503 ib::error() << "Change buffer must be empty when"
504 " --innodb-read-only is set!"
505 "You can try to recover the database with innodb_force_recovery=5";
506
507 err = DB_ERROR;
508 } else {
509 ib::warn() << "Change buffer not empty when --innodb-read-only "
510 "is set! but srv_force_recovery = " << srv_force_recovery
511 << " , ignoring.";
512 }
513 }
514
515 if (err == DB_SUCCESS) {
516 /* Load definitions of other indexes on system tables */
517
518 dict_load_sys_table(dict_sys->sys_tables);
519 dict_load_sys_table(dict_sys->sys_columns);
520 dict_load_sys_table(dict_sys->sys_indexes);
521 dict_load_sys_table(dict_sys->sys_fields);
522 }
523 }
524
525 mutex_exit(&dict_sys->mutex);
526
527 return(err);
528}
529
530/*****************************************************************//**
531Inserts the basic system table data into themselves in the database
532creation. */
533static
534void
535dict_insert_initial_data(void)
536/*==========================*/
537{
538 /* Does nothing yet */
539}
540
541/*****************************************************************//**
542Creates and initializes the data dictionary at the server bootstrap.
543@return DB_SUCCESS or error code. */
544dberr_t
545dict_create(void)
546/*=============*/
547{
548 mtr_t mtr;
549
550 mtr_start(&mtr);
551
552 dict_hdr_create(&mtr);
553
554 mtr_commit(&mtr);
555
556 dberr_t err = dict_boot();
557
558 if (err == DB_SUCCESS) {
559 dict_insert_initial_data();
560 }
561
562 return(err);
563}
564