1/* Copyright (C) 2007 MySQL AB & Sanja Belkin
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16#ifndef _ma_loghandler_h
17#define _ma_loghandler_h
18
19#define MB (1024UL*1024)
20
21/* transaction log default cache size (TODO: make it global variable) */
22#define TRANSLOG_PAGECACHE_SIZE (2*MB)
23/* transaction log default file size */
24#define TRANSLOG_FILE_SIZE (1024U*MB)
25/* minimum possible transaction log size */
26#define TRANSLOG_MIN_FILE_SIZE (8*MB)
27/* transaction log default flags (TODO: make it global variable) */
28#define TRANSLOG_DEFAULT_FLAGS 0
29
30/*
31 Transaction log flags.
32
33 We allow all kind protections to be switched on together for people who
34 really unsure in their hardware/OS.
35*/
36#define TRANSLOG_PAGE_CRC 1U
37#define TRANSLOG_SECTOR_PROTECTION (1U<<1)
38#define TRANSLOG_RECORD_CRC (1U<<2)
39#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \
40 TRANSLOG_RECORD_CRC) + 1)
41
42#define RECHEADER_READ_ERROR -1
43#define RECHEADER_READ_EOF -2
44
45/*
46 Page size in transaction log
47 It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE
48 (DISK_DRIVE_SECTOR_SIZE * 2^N)
49*/
50#define TRANSLOG_PAGE_SIZE (8U*1024)
51
52#include "ma_loghandler_lsn.h"
53#include "trnman_public.h"
54
55/* short transaction ID type */
56typedef uint16 SHORT_TRANSACTION_ID;
57
58struct st_maria_handler;
59
60/* Changing one of the "SIZE" below will break backward-compatibility! */
61/* Length of CRC at end of pages */
62#define ROW_EXTENT_PAGE_SIZE 5
63#define ROW_EXTENT_COUNT_SIZE 2
64/* Size of file id in logs */
65#define FILEID_STORE_SIZE 2
66/* Size of page reference in log */
67#define PAGE_STORE_SIZE ROW_EXTENT_PAGE_SIZE
68/* Size of page ranges in log */
69#define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE
70#define DIRPOS_STORE_SIZE 1
71#define CLR_TYPE_STORE_SIZE 1
72/* If table has live checksum we store its changes in UNDOs */
73#define HA_CHECKSUM_STORE_SIZE 4
74#define KEY_NR_STORE_SIZE 1
75#define PAGE_LENGTH_STORE_SIZE 2
76
77/* Store methods to match the above sizes */
78#define fileid_store(T,A) int2store(T,A)
79#define page_store(T,A) int5store(T,((ulonglong)(A)))
80#define dirpos_store(T,A) ((*(uchar*) (T)) = A)
81#define pagerange_store(T,A) int2store(T,A)
82#define clr_type_store(T,A) ((*(uchar*) (T)) = A)
83#define key_nr_store(T, A) ((*(uchar*) (T)) = A)
84#define ha_checksum_store(T,A) int4store(T,A)
85#define fileid_korr(P) uint2korr(P)
86#define page_korr(P) uint5korr(P)
87#define dirpos_korr(P) (*(const uchar *) (P))
88#define pagerange_korr(P) uint2korr(P)
89#define clr_type_korr(P) (*(const uchar *) (P))
90#define key_nr_korr(P) (*(const uchar *) (P))
91#define ha_checksum_korr(P) uint4korr(P)
92
93/*
94 Length of disk drive sector size (we assume that writing it
95 to disk is an atomic operation)
96*/
97#define DISK_DRIVE_SECTOR_SIZE 512U
98
99/* position reserved in an array of parts of a log record */
100#define TRANSLOG_INTERNAL_PARTS 2
101
102/* types of records in the transaction log */
103/* TODO: Set numbers for these when we have all entries figured out */
104
105enum translog_record_type
106{
107 LOGREC_RESERVED_FOR_CHUNKS23= 0,
108 LOGREC_REDO_INSERT_ROW_HEAD,
109 LOGREC_REDO_INSERT_ROW_TAIL,
110 LOGREC_REDO_NEW_ROW_HEAD,
111 LOGREC_REDO_NEW_ROW_TAIL,
112 LOGREC_REDO_INSERT_ROW_BLOBS,
113 LOGREC_REDO_PURGE_ROW_HEAD,
114 LOGREC_REDO_PURGE_ROW_TAIL,
115 LOGREC_REDO_FREE_BLOCKS,
116 LOGREC_REDO_FREE_HEAD_OR_TAIL,
117 LOGREC_REDO_DELETE_ROW, /* unused */
118 LOGREC_REDO_UPDATE_ROW_HEAD, /* unused */
119 LOGREC_REDO_INDEX,
120 LOGREC_REDO_INDEX_NEW_PAGE,
121 LOGREC_REDO_INDEX_FREE_PAGE,
122 LOGREC_REDO_UNDELETE_ROW,
123 LOGREC_CLR_END,
124 LOGREC_PURGE_END,
125 LOGREC_UNDO_ROW_INSERT,
126 LOGREC_UNDO_ROW_DELETE,
127 LOGREC_UNDO_ROW_UPDATE,
128 LOGREC_UNDO_KEY_INSERT,
129 LOGREC_UNDO_KEY_INSERT_WITH_ROOT,
130 LOGREC_UNDO_KEY_DELETE,
131 LOGREC_UNDO_KEY_DELETE_WITH_ROOT,
132 LOGREC_PREPARE,
133 LOGREC_PREPARE_WITH_UNDO_PURGE,
134 LOGREC_COMMIT,
135 LOGREC_COMMIT_WITH_UNDO_PURGE,
136 LOGREC_CHECKPOINT,
137 LOGREC_REDO_CREATE_TABLE,
138 LOGREC_REDO_RENAME_TABLE,
139 LOGREC_REDO_DROP_TABLE,
140 LOGREC_REDO_DELETE_ALL,
141 LOGREC_REDO_REPAIR_TABLE,
142 LOGREC_FILE_ID,
143 LOGREC_LONG_TRANSACTION_ID,
144 LOGREC_INCOMPLETE_LOG,
145 LOGREC_INCOMPLETE_GROUP,
146 LOGREC_UNDO_BULK_INSERT,
147 LOGREC_REDO_BITMAP_NEW_PAGE,
148 LOGREC_IMPORTED_TABLE,
149 LOGREC_DEBUG_INFO,
150 LOGREC_FIRST_FREE,
151 LOGREC_RESERVED_FUTURE_EXTENSION= 63
152};
153#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */
154
155/* Type of operations in LOGREC_REDO_INDEX */
156
157enum en_key_op
158{
159 KEY_OP_NONE, /* Not used */
160 KEY_OP_OFFSET, /* Set current position */
161 KEY_OP_SHIFT, /* Shift up/or down at current position */
162 KEY_OP_CHANGE, /* Change data at current position */
163 KEY_OP_ADD_PREFIX, /* Insert data at start of page */
164 KEY_OP_DEL_PREFIX, /* Delete data at start of page */
165 KEY_OP_ADD_SUFFIX, /* Insert data at end of page */
166 KEY_OP_DEL_SUFFIX, /* Delete data at end of page */
167 KEY_OP_CHECK, /* For debugging; CRC of used part of page */
168 KEY_OP_MULTI_COPY, /* List of memcpy()s with fixed-len sources in page */
169 KEY_OP_SET_PAGEFLAG, /* Set pageflag from next byte */
170 KEY_OP_COMPACT_PAGE, /* Compact key page */
171 KEY_OP_MAX_PAGELENGTH, /* Set page to max page length */
172 KEY_OP_DEBUG, /* Entry for storing what triggered redo_index */
173 KEY_OP_DEBUG_2 /* Entry for pagelengths */
174};
175
176enum en_key_debug
177{
178 KEY_OP_DEBUG_RTREE_COMBINE, /* 0 */
179 KEY_OP_DEBUG_RTREE_SPLIT, /* 1 */
180 KEY_OP_DEBUG_RTREE_SET_KEY, /* 2 */
181 KEY_OP_DEBUG_FATHER_CHANGED_1, /* 3 */
182 KEY_OP_DEBUG_FATHER_CHANGED_2, /* 4 */
183 KEY_OP_DEBUG_LOG_SPLIT, /* 5 */
184 KEY_OP_DEBUG_LOG_ADD_1, /* 6 */
185 KEY_OP_DEBUG_LOG_ADD_2, /* 7 */
186 KEY_OP_DEBUG_LOG_ADD_3, /* 8 */
187 KEY_OP_DEBUG_LOG_ADD_4, /* 9 */
188 KEY_OP_DEBUG_LOG_PREFIX_1, /* 10 */
189 KEY_OP_DEBUG_LOG_PREFIX_2, /* 11 */
190 KEY_OP_DEBUG_LOG_PREFIX_3, /* 12 */
191 KEY_OP_DEBUG_LOG_PREFIX_4, /* 13 */
192 KEY_OP_DEBUG_LOG_PREFIX_5, /* 14 */
193 KEY_OP_DEBUG_LOG_DEL_CHANGE_1, /* 15 */
194 KEY_OP_DEBUG_LOG_DEL_CHANGE_2, /* 16 */
195 KEY_OP_DEBUG_LOG_DEL_CHANGE_3, /* 17 */
196 KEY_OP_DEBUG_LOG_DEL_CHANGE_RT, /* 18 */
197 KEY_OP_DEBUG_LOG_DEL_PREFIX, /* 19 */
198 KEY_OP_DEBUG_LOG_MIDDLE /* 20 */
199};
200
201
202enum translog_debug_info_type
203{
204 LOGREC_DEBUG_INFO_QUERY
205};
206
207/* Size of log file; One log file is restricted to 4G */
208typedef uint32 translog_size_t;
209
210#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024U
211
212typedef struct st_translog_group_descriptor
213{
214 TRANSLOG_ADDRESS addr;
215 uint8 num;
216} TRANSLOG_GROUP;
217
218
219typedef struct st_translog_header_buffer
220{
221 /* LSN of the read record */
222 LSN lsn;
223 /* array of groups descriptors, can be used only if groups_no > 0 */
224 TRANSLOG_GROUP *groups;
225 /* short transaction ID or 0 if it has no sense for the record */
226 SHORT_TRANSACTION_ID short_trid;
227 /*
228 The Record length in buffer (including read header, but excluding
229 hidden part of record (type, short TrID, length)
230 */
231 translog_size_t record_length;
232 /*
233 Buffer for write decoded header of the record (depend on the record
234 type)
235 */
236 uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE];
237 /* number of groups listed in */
238 uint groups_no;
239 /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */
240 uint chunk0_pages;
241 /* type of the read record */
242 enum translog_record_type type;
243 /* chunk 0 data address (valid only if groups_no > 0) */
244 TRANSLOG_ADDRESS chunk0_data_addr;
245 /*
246 Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>)
247 */
248 int16 compressed_LSN_economy;
249 /* short transaction ID or 0 if it has no sense for the record */
250 uint16 non_header_data_start_offset;
251 /* non read body data length in this first chunk */
252 uint16 non_header_data_len;
253 /* chunk 0 data size (valid only if groups_no > 0) */
254 uint16 chunk0_data_len;
255} TRANSLOG_HEADER_BUFFER;
256
257
258typedef struct st_translog_scanner_data
259{
260 uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */
261 TRANSLOG_ADDRESS page_addr; /* current page address */
262 /* end of the log which we saw last time */
263 TRANSLOG_ADDRESS horizon;
264 TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */
265 uchar *page; /* page content pointer */
266 /* direct link on the current page or NULL if not supported/requested */
267 PAGECACHE_BLOCK_LINK *direct_link;
268 /* offset of the chunk in the page */
269 translog_size_t page_offset;
270 /* set horizon only once at init */
271 my_bool fixed_horizon;
272 /* try to get direct link on the page if it is possible */
273 my_bool use_direct_link;
274} TRANSLOG_SCANNER_DATA;
275
276
277typedef struct st_translog_reader_data
278{
279 TRANSLOG_HEADER_BUFFER header; /* Header */
280 TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */
281 translog_size_t body_offset; /* current chunk body offset */
282 /* data offset from the record beginning */
283 translog_size_t current_offset;
284 /* number of bytes read in header */
285 uint16 read_header;
286 uint16 chunk_size; /* current chunk size */
287 uint current_group; /* current group */
288 uint current_chunk; /* current chunk in the group */
289 my_bool eor; /* end of the record */
290} TRANSLOG_READER_DATA;
291
292C_MODE_START
293
294/* Records types for unittests */
295#define LOGREC_FIXED_RECORD_0LSN_EXAMPLE 1
296#define LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE 2
297#define LOGREC_FIXED_RECORD_1LSN_EXAMPLE 3
298#define LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE 4
299#define LOGREC_FIXED_RECORD_2LSN_EXAMPLE 5
300#define LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE 6
301
302extern void translog_example_table_init();
303extern void translog_table_init();
304#define translog_init(D,M,V,I,C,F,R) \
305 translog_init_with_table(D,M,V,I,C,F,R,&translog_table_init,0)
306extern my_bool translog_init_with_table(const char *directory,
307 uint32 log_file_max_size,
308 uint32 server_version,
309 uint32 server_id,
310 PAGECACHE *pagecache,
311 uint flags,
312 my_bool readonly,
313 void (*init_table_func)(),
314 my_bool no_error);
315#ifndef DBUG_OFF
316void check_translog_description_table(int num);
317#endif
318
319extern my_bool
320translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn,
321 MARIA_HA *tbl_info,
322 translog_size_t rec_len, uint part_no,
323 LEX_CUSTRING *parts_data, uchar *store_share_id,
324 void *hook_arg);
325
326extern void translog_destroy();
327
328extern int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff);
329
330extern void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff);
331
332extern translog_size_t translog_read_record(LSN lsn,
333 translog_size_t offset,
334 translog_size_t length,
335 uchar *buffer,
336 struct st_translog_reader_data
337 *data);
338
339extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
340
341extern my_bool translog_scanner_init(LSN lsn,
342 my_bool fixed_horizon,
343 struct st_translog_scanner_data *scanner,
344 my_bool use_direct_link);
345extern void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner);
346
347extern int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
348 TRANSLOG_HEADER_BUFFER *buff);
349extern LSN translog_get_file_max_lsn_stored(uint32 file);
350extern my_bool translog_purge(TRANSLOG_ADDRESS low);
351extern my_bool translog_is_file(uint file_no);
352extern void translog_lock();
353extern void translog_unlock();
354extern void translog_lock_handler_assert_owner();
355extern TRANSLOG_ADDRESS translog_get_horizon();
356extern TRANSLOG_ADDRESS translog_get_horizon_no_lock();
357extern int translog_assign_id_to_share(struct st_maria_handler *tbl_info,
358 TRN *trn);
359extern void translog_deassign_id_from_share(struct st_maria_share *share);
360extern void
361translog_assign_id_to_share_from_recovery(struct st_maria_share *share,
362 uint16 id);
363extern my_bool translog_walk_filenames(const char *directory,
364 my_bool (*callback)(const char *,
365 const char *));
366extern void dump_page(uchar *buffer, File handler);
367extern my_bool translog_log_debug_info(TRN *trn,
368 enum translog_debug_info_type type,
369 uchar *info, size_t length);
370
371enum enum_translog_status
372{
373 TRANSLOG_UNINITED, /* no initialization done or error during initialization */
374 TRANSLOG_OK, /* transaction log is functioning */
375 TRANSLOG_READONLY, /* read only mode due to write errors */
376 TRANSLOG_SHUTDOWN /* going to shutdown the loghandler */
377};
378extern enum enum_translog_status translog_status;
379extern ulonglong translog_syncs; /* Number of sync()s */
380
381void translog_soft_sync(my_bool mode);
382void translog_hard_group_commit(my_bool mode);
383int translog_soft_sync_start(void);
384void translog_soft_sync_end(void);
385void translog_sync();
386void translog_set_group_commit_interval(uint32 interval);
387
388/*
389 all the rest added because of recovery; should we make
390 ma_loghandler_for_recovery.h ?
391*/
392
393/*
394 Information from transaction log file header
395*/
396
397typedef struct st_loghandler_file_info
398{
399 /*
400 LSN_IMPOSSIBLE for current file (not finished file).
401 Maximum LSN of the record which parts stored in the
402 file.
403 */
404 LSN max_lsn;
405 ulonglong timestamp; /* Time stamp */
406 ulong maria_version; /* Version of maria loghandler */
407 ulong mysql_version; /* Version of mysql server */
408 ulong server_id; /* Server ID */
409 ulong page_size; /* Loghandler page size */
410 ulong file_number; /* Number of the file (from the file header) */
411} LOGHANDLER_FILE_INFO;
412
413#define SHARE_ID_MAX 65535 /* array's size */
414
415extern void translog_fill_overhead_table();
416extern void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
417 uchar *page_buff);
418extern LSN translog_first_lsn_in_log();
419extern LSN translog_first_theoretical_lsn();
420extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
421extern my_bool translog_purge_at_flush();
422extern uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon);
423extern uint32 translog_get_first_needed_file();
424extern char *translog_filename_by_fileno(uint32 file_no, char *path);
425extern void translog_set_file_size(uint32 size);
426
427/* record parts descriptor */
428struct st_translog_parts
429{
430 /* full record length */
431 translog_size_t record_length;
432 /* full record length with chunk headers */
433 translog_size_t total_record_length;
434 /* current part index */
435 uint current;
436 /* total number of elements in parts */
437 uint elements;
438 /* array of parts */
439 LEX_CUSTRING *parts;
440};
441
442typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type,
443 TRN *trn,
444 struct st_maria_handler *tbl_info,
445 void *hook_arg);
446
447typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type,
448 TRN *trn,
449 struct st_maria_handler *tbl_info,
450 LSN *lsn, void *hook_arg);
451
452typedef uint16(*read_rec_hook) (enum translog_record_type type,
453 uint16 read_length, uchar *read_buff,
454 uchar *decoded_buff);
455
456
457/* record classes */
458enum record_class
459{
460 LOGRECTYPE_NOT_ALLOWED,
461 LOGRECTYPE_VARIABLE_LENGTH,
462 LOGRECTYPE_PSEUDOFIXEDLENGTH,
463 LOGRECTYPE_FIXEDLENGTH
464};
465
466enum enum_record_in_group {
467 LOGREC_NOT_LAST_IN_GROUP= 0, LOGREC_LAST_IN_GROUP, LOGREC_IS_GROUP_ITSELF
468};
469
470/*
471 Descriptor of log record type
472*/
473typedef struct st_log_record_type_descriptor
474{
475 /* internal class of the record */
476 enum record_class rclass;
477 /*
478 length for fixed-size record, pseudo-fixed record
479 length with uncompressed LSNs
480 */
481 uint16 fixed_length;
482 /* how much record body (belonged to headers too) read with headers */
483 uint16 read_header_len;
484 /* HOOK for writing the record called before lock */
485 prewrite_rec_hook prewrite_hook;
486 /* HOOK for writing the record called when LSN is known, inside lock */
487 inwrite_rec_hook inwrite_hook;
488 /* HOOK for reading headers */
489 read_rec_hook read_hook;
490 /*
491 For pseudo fixed records number of compressed LSNs followed by
492 system header
493 */
494 int16 compressed_LSN;
495 /* the rest is for maria_read_log & Recovery */
496 /** @brief for debug error messages or "maria_read_log" command-line tool */
497 const char *name;
498 enum enum_record_in_group record_in_group;
499 /* a function to execute when we see the record during the REDO phase */
500 int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *);
501 /* a function to execute when we see the record during the UNDO phase */
502 int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *, TRN *);
503} LOG_DESC;
504
505extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
506
507typedef enum
508{
509 TRANSLOG_GCOMMIT_NONE,
510 TRANSLOG_GCOMMIT_HARD,
511 TRANSLOG_GCOMMIT_SOFT
512} enum_maria_group_commit;
513extern ulong maria_group_commit;
514extern ulong maria_group_commit_interval;
515typedef enum
516{
517 TRANSLOG_PURGE_IMMIDIATE,
518 TRANSLOG_PURGE_EXTERNAL,
519 TRANSLOG_PURGE_ONDEMAND
520} enum_maria_translog_purge_type;
521extern ulong log_purge_type;
522extern ulong log_file_size;
523
524typedef enum
525{
526 TRANSLOG_SYNC_DIR_NEVER,
527 TRANSLOG_SYNC_DIR_NEWFILE,
528 TRANSLOG_SYNC_DIR_ALWAYS
529} enum_maria_sync_log_dir;
530extern ulong sync_log_dir;
531
532C_MODE_END
533#endif
534