1/* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16#include "maria_def.h"
17#include "trnman.h"
18#include "ma_blockrec.h" /* for some constants and in-write hooks */
19#include "ma_key_recover.h" /* For some in-write hooks */
20#include "ma_checkpoint.h"
21#include "ma_servicethread.h"
22
23/*
24 On Windows, neither my_open() nor mysql_file_sync() work for directories.
25 Also there is no need to flush filesystem changes ,i.e to sync()
26 directories.
27*/
28#ifdef __WIN__
29#define sync_dir(A,B) 0
30#else
31#define sync_dir(A,B) mysql_file_sync(A,B)
32#endif
33
34/**
35 @file
36 @brief Module which writes and reads to a transaction log
37*/
38
39/* 0xFF can never be valid first byte of a chunk */
40#define TRANSLOG_FILLER 0xFF
41
42/* number of opened log files in the pagecache (should be at least 2) */
43#define OPENED_FILES_NUM 3
44#define CACHED_FILES_NUM 5
45#define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
46#if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
47#include <hash.h>
48#include <m_ctype.h>
49#endif
50
51/** @brief protects checkpoint_in_progress */
52static mysql_mutex_t LOCK_soft_sync;
53/** @brief for killing the background checkpoint thread */
54static mysql_cond_t COND_soft_sync;
55/** @brief control structure for checkpoint background thread */
56static MA_SERVICE_THREAD_CONTROL soft_sync_control=
57 {0, FALSE, FALSE, &LOCK_soft_sync, &COND_soft_sync};
58
59
60/* transaction log file descriptor */
61typedef struct st_translog_file
62{
63 uint32 number;
64 PAGECACHE_FILE handler;
65 my_bool was_recovered;
66 my_bool is_sync;
67} TRANSLOG_FILE;
68
69/* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
70#define TRANSLOG_WRITE_BUFFER (1024*1024)
71/*
72 pagecache_read/write/inject() use bmove512() on their buffers so those must
73 be long-aligned, which we guarantee by using the type below:
74*/
75typedef union
76{
77 ulonglong dummy;
78 uchar buffer[TRANSLOG_PAGE_SIZE];
79} TRANSLOG_PAGE_SIZE_BUFF;
80
81/* min chunk length */
82#define TRANSLOG_MIN_CHUNK 3
83/*
84 Number of buffers used by loghandler
85
86 Should be at least 4, because one thread can block up to 2 buffers in
87 normal circumstances (less then half of one and full other, or just
88 switched one and other), But if we met end of the file in the middle and
89 have to switch buffer it will be 3. + 1 buffer for flushing/writing.
90 We have a bigger number here for higher concurrency and to make division
91 faster.
92
93 The number should be power of 2 to be fast.
94*/
95#define TRANSLOG_BUFFERS_NO 8
96/* number of bytes (+ header) which can be unused on first page in sequence */
97#define TRANSLOG_MINCHUNK_CONTENT 1
98/* version of log file */
99#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
100
101#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
102
103/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
104#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
105#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
106
107
108/* max lsn calculation for buffer */
109#define BUFFER_MAX_LSN(B) \
110 ((B)->last_lsn == LSN_IMPOSSIBLE ? (B)->prev_last_lsn : (B)->last_lsn)
111
112/* log write buffer descriptor */
113struct st_translog_buffer
114{
115 /*
116 Cache for current log. Comes first to be aligned for bmove512() in
117 pagecache_inject()
118 */
119 uchar buffer[TRANSLOG_WRITE_BUFFER];
120 /*
121 Maximum LSN of records which ends in this buffer (or IMPOSSIBLE_LSN
122 if no LSNs ends here)
123 */
124 LSN last_lsn;
125 /* last_lsn of previous buffer or IMPOSSIBLE_LSN if it is very first one */
126 LSN prev_last_lsn;
127 /* This buffer offset in the file */
128 TRANSLOG_ADDRESS offset;
129 /*
130 Next buffer offset in the file (it is not always offset + size,
131 in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
132 */
133 TRANSLOG_ADDRESS next_buffer_offset;
134 /* Previous buffer offset to detect it flush finish */
135 TRANSLOG_ADDRESS prev_buffer_offset;
136 /*
137 If the buffer was forced to close it save value of its horizon
138 otherwise LSN_IMPOSSIBLE
139 */
140 TRANSLOG_ADDRESS pre_force_close_horizon;
141 /*
142 How much is written (or will be written when copy_to_buffer_in_progress
143 become 0) to this buffer
144 */
145 translog_size_t size;
146 /*
147 When moving from one log buffer to another, we write the last of the
148 previous buffer to file and then move to start using the new log
149 buffer. In the case of a part filed last page, this page is not moved
150 to the start of the new buffer but instead we set the 'skip_data'
151 variable to tell us how much data at the beginning of the buffer is not
152 relevant.
153 */
154 uint skipped_data;
155 /* File handler for this buffer */
156 TRANSLOG_FILE *file;
157 /* Threads which are waiting for buffer filling/freeing */
158 mysql_cond_t waiting_filling_buffer;
159 /* Number of records which are in copy progress */
160 uint copy_to_buffer_in_progress;
161 /* list of waiting buffer ready threads */
162 struct st_my_thread_var *waiting_flush;
163 /*
164 If true then previous buffer overlap with this one (due to flush of
165 loghandler, the last page of that buffer is the same as the first page
166 of this buffer) and have to be written first (because contain old
167 content of page which present in both buffers)
168 */
169 my_bool overlay;
170 uint buffer_no;
171 /*
172 Lock for the buffer.
173
174 Current buffer also lock the whole handler (if one want lock the handler
175 one should lock the current buffer).
176
177 Buffers are locked only in one direction (with overflow and beginning
178 from the first buffer). If we keep lock on buffer N we can lock only
179 buffer N+1 (never N-1).
180
181 One thread do not lock more then 2 buffer in a time, so to make dead
182 lock it should be N thread (where N equal number of buffers) takes one
183 buffer and try to lock next. But it is impossible because there is only
184 2 cases when thread take 2 buffers: 1) one thread finishes current
185 buffer (where horizon is) and start next (to which horizon moves). 2)
186 flush start from buffer after current (oldest) and go till the current
187 crabbing by buffer sequence. And there is only one flush in a moment
188 (they are serialised).
189
190 Because of above and number of buffers equal 5 we can't get dead lock (it is
191 impossible to get all 5 buffers locked simultaneously).
192 */
193 mysql_mutex_t mutex;
194 /*
195 Some thread is going to close the buffer and it should be
196 done only by that thread
197 */
198 my_bool is_closing_buffer;
199 /*
200 Version of the buffer increases every time buffer the buffer flushed.
201 With file and offset it allow detect buffer changes
202 */
203 uint8 ver;
204
205 /*
206 When previous buffer sent to disk it set its address here to allow
207 to detect when it is done
208 (we have to keep it in this buffer to lock buffers only in one direction).
209 */
210 TRANSLOG_ADDRESS prev_sent_to_disk;
211 mysql_cond_t prev_sent_to_disk_cond;
212};
213
214
215struct st_buffer_cursor
216{
217 /* pointer into the buffer */
218 uchar *ptr;
219 /* current buffer */
220 struct st_translog_buffer *buffer;
221 /* How many bytes we wrote on the current page */
222 uint16 current_page_fill;
223 /*
224 How many times we write the page on the disk during flushing process
225 (for sector protection).
226 */
227 uint16 write_counter;
228 /* previous write offset */
229 uint16 previous_offset;
230 /* Number of current buffer */
231 uint8 buffer_no;
232 /*
233 True if it is just filling buffer after advancing the pointer to
234 the horizon.
235 */
236 my_bool chaser;
237 /*
238 Is current page of the cursor already finished (sector protection
239 should be applied if it is needed)
240 */
241 my_bool protected;
242};
243
244
245typedef uint8 dirty_buffer_mask_t;
246
247struct st_translog_descriptor
248{
249 /* *** Parameters of the log handler *** */
250
251 /* Page cache for the log reads */
252 PAGECACHE *pagecache;
253 uint flags;
254 /* File open flags */
255 uint open_flags;
256 /* max size of one log size (for new logs creation) */
257 uint32 log_file_max_size;
258 uint32 server_version;
259 /* server ID (used for replication) */
260 uint32 server_id;
261 /* Loghandler's buffer capacity in case of chunk 2 filling */
262 uint32 buffer_capacity_chunk_2;
263 /*
264 Half of the buffer capacity in case of chunk 2 filling,
265 used to decide will we write a record in one group or many.
266 It is written to the variable just to avoid devision every
267 time we need it.
268 */
269 uint32 half_buffer_capacity_chunk_2;
270 /* Page overhead calculated by flags (whether CRC is enabled, etc) */
271 uint16 page_overhead;
272 /*
273 Page capacity ("useful load") calculated by flags
274 (TRANSLOG_PAGE_SIZE - page_overhead-1)
275 */
276 uint16 page_capacity_chunk_2;
277 /* Path to the directory where we store log store files */
278 char directory[FN_REFLEN];
279
280 /* *** Current state of the log handler *** */
281 /* list of opened files */
282 DYNAMIC_ARRAY open_files;
283 /* min/max number of file in the array */
284 uint32 max_file, min_file;
285 /* the opened files list guard */
286 mysql_rwlock_t open_files_lock;
287
288 /*
289 File descriptor of the directory where we store log files for syncing
290 it.
291 */
292 File directory_fd;
293 /* buffers for log writing */
294 struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
295 /* Mask where 1 in position N mean that buffer N is not flushed */
296 dirty_buffer_mask_t dirty_buffer_mask;
297 /* The above variable protection */
298 mysql_mutex_t dirty_buffer_mask_lock;
299 /*
300 horizon - visible end of the log (here is absolute end of the log:
301 position where next chunk can start
302 */
303 TRANSLOG_ADDRESS horizon;
304 /* horizon buffer cursor */
305 struct st_buffer_cursor bc;
306 /* maximum LSN of the current (not finished) file */
307 LSN max_lsn;
308
309 /*
310 Last flushed LSN (protected by log_flush_lock).
311 Pointers in the log ordered like this:
312 last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
313 max_lsn <= horizon
314 */
315 LSN flushed;
316 /* Last LSN sent to the disk (but maybe not written yet) */
317 LSN sent_to_disk;
318 /* Horizon from which log started after initialization */
319 TRANSLOG_ADDRESS log_start;
320 TRANSLOG_ADDRESS previous_flush_horizon;
321 /* All what is after this address is not sent to disk yet */
322 TRANSLOG_ADDRESS in_buffers_only;
323 /* protection of sent_to_disk and in_buffers_only */
324 mysql_mutex_t sent_to_disk_lock;
325 /*
326 Protect flushed (see above) and for flush serialization (will
327 be removed in v1.5
328 */
329 mysql_mutex_t log_flush_lock;
330 mysql_cond_t log_flush_cond;
331 mysql_cond_t new_goal_cond;
332
333 /* Protects changing of headers of finished files (max_lsn) */
334 mysql_mutex_t file_header_lock;
335
336 /*
337 Sorted array (with protection) of files where we started writing process
338 and so we can't give last LSN yet
339 */
340 mysql_mutex_t unfinished_files_lock;
341 DYNAMIC_ARRAY unfinished_files;
342
343 /*
344 minimum number of still need file calculeted during last
345 translog_purge call
346 */
347 uint32 min_need_file;
348 /* Purger data: minimum file in the log (or 0 if unknown) */
349 uint32 min_file_number;
350 /* Protect purger from many calls and it's data */
351 mysql_mutex_t purger_lock;
352 /* last low water mark checked */
353 LSN last_lsn_checked;
354 /**
355 Must be set to 0 under loghandler lock every time a new LSN
356 is generated.
357 */
358 my_bool is_everything_flushed;
359 /* True when flush pass is in progress */
360 my_bool flush_in_progress;
361 /* The flush number (used to distinguish two flushes goes one by one) */
362 volatile int flush_no;
363 /* Next flush pass variables */
364 TRANSLOG_ADDRESS next_pass_max_lsn;
365 pthread_t max_lsn_requester;
366};
367
368static struct st_translog_descriptor log_descriptor;
369
370ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
371ulong log_file_size= TRANSLOG_FILE_SIZE;
372/* sync() of log files directory mode */
373ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
374ulong maria_group_commit= TRANSLOG_GCOMMIT_NONE;
375ulong maria_group_commit_interval= 0;
376
377/* Marker for end of log */
378static uchar end_of_log= 0;
379#define END_OF_LOG &end_of_log
380/**
381 Switch for "soft" sync (no real sync() but periodical sync by service
382 thread)
383*/
384static volatile my_bool soft_sync= FALSE;
385/**
386 Switch for "hard" group commit mode
387*/
388static volatile my_bool hard_group_commit= FALSE;
389/**
390 File numbers interval which have to be sync()
391*/
392static uint32 soft_sync_min= 0;
393static uint32 soft_sync_max= 0;
394static uint32 soft_need_sync= 1;
395/**
396 stores interval in microseconds
397*/
398static uint32 group_commit_wait= 0;
399
400enum enum_translog_status translog_status= TRANSLOG_UNINITED;
401ulonglong translog_syncs= 0; /* Number of sync()s */
402
403/* time of last flush */
404static ulonglong flush_start= 0;
405
406/* chunk types */
407#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
408#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
409#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
410#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
411#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
412#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
413#define TRANSLOG_CHUNK_0_CONT 0x3F /* the type to mark chunk 0 continue */
414
415/* compressed (relative) LSN constants */
416#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
417
418
419#include <my_atomic.h>
420/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
421static MARIA_SHARE **id_to_share= NULL;
422
423static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args);
424
425static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
426static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
427LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
428
429
430/*
431 Initialize log_record_type_descriptors
432*/
433
434LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
435
436
437#ifndef DBUG_OFF
438
439#define translog_buffer_lock_assert_owner(B) \
440 mysql_mutex_assert_owner(&(B)->mutex)
441#define translog_lock_assert_owner() \
442 mysql_mutex_assert_owner(&log_descriptor.bc.buffer->mutex)
443void translog_lock_handler_assert_owner()
444{
445 translog_lock_assert_owner();
446}
447
448/**
449 @brief check the description table validity
450
451 @param num how many records should be filled
452*/
453
454static uint max_allowed_translog_type= 0;
455
456void check_translog_description_table(int num)
457{
458 int i;
459 DBUG_ENTER("check_translog_description_table");
460 DBUG_PRINT("enter", ("last record: %d", num));
461 DBUG_ASSERT(num > 0);
462 /* last is reserved for extending the table */
463 DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
464 DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
465 max_allowed_translog_type= num;
466
467 for (i= 0; i <= num; i++)
468 {
469 DBUG_PRINT("info",
470 ("record type: %d class: %d fixed: %u header: %u LSNs: %u "
471 "name: %s",
472 i, log_record_type_descriptor[i].rclass,
473 (uint)log_record_type_descriptor[i].fixed_length,
474 (uint)log_record_type_descriptor[i].read_header_len,
475 (uint)log_record_type_descriptor[i].compressed_LSN,
476 log_record_type_descriptor[i].name));
477 switch (log_record_type_descriptor[i].rclass) {
478 case LOGRECTYPE_NOT_ALLOWED:
479 DBUG_ASSERT(i == 0);
480 break;
481 case LOGRECTYPE_VARIABLE_LENGTH:
482 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
483 DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
484 ((log_record_type_descriptor[i].compressed_LSN == 1) &&
485 (log_record_type_descriptor[i].read_header_len >=
486 LSN_STORE_SIZE)) ||
487 ((log_record_type_descriptor[i].compressed_LSN == 2) &&
488 (log_record_type_descriptor[i].read_header_len >=
489 LSN_STORE_SIZE * 2)));
490 break;
491 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
492 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
493 log_record_type_descriptor[i].read_header_len);
494 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
495 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
496 break;
497 case LOGRECTYPE_FIXEDLENGTH:
498 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
499 log_record_type_descriptor[i].read_header_len);
500 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
501 break;
502 default:
503 DBUG_ASSERT(0);
504 }
505 }
506 for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
507 {
508 DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
509 LOGRECTYPE_NOT_ALLOWED);
510 }
511 DBUG_VOID_RETURN;
512}
513#else
514#define translog_buffer_lock_assert_owner(B) {}
515#define translog_lock_assert_owner() {}
516#endif
517
518static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
519{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
520 "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
521
522static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
523{LOGRECTYPE_VARIABLE_LENGTH, 0,
524 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
525 write_hook_for_redo, NULL, 0,
526 "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
527
528static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
529{LOGRECTYPE_VARIABLE_LENGTH, 0,
530 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
531 write_hook_for_redo, NULL, 0,
532 "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
533
534static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_HEAD=
535{LOGRECTYPE_VARIABLE_LENGTH, 0,
536 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
537 write_hook_for_redo, NULL, 0,
538 "redo_new_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
539
540static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_TAIL=
541{LOGRECTYPE_VARIABLE_LENGTH, 0,
542 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
543 write_hook_for_redo, NULL, 0,
544 "redo_new_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
545
546static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
547{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
548 write_hook_for_redo, NULL, 0,
549 "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
550
551static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
552{LOGRECTYPE_FIXEDLENGTH,
553 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
554 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
555 NULL, write_hook_for_redo, NULL, 0,
556 "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
557
558static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
559{LOGRECTYPE_FIXEDLENGTH,
560 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
561 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
562 NULL, write_hook_for_redo, NULL, 0,
563 "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
564
565static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
566{LOGRECTYPE_VARIABLE_LENGTH, 0,
567 FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
568 NULL, write_hook_for_redo, NULL, 0,
569 "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
570
571static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
572{LOGRECTYPE_FIXEDLENGTH,
573 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
574 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
575 NULL, write_hook_for_redo, NULL, 0,
576 "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
577
578/* not yet used; for when we have versioning */
579static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
580{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
581 "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
582
583/** @todo RECOVERY BUG unused, remove? */
584static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
585{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
586 "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
587
588static LOG_DESC INIT_LOGREC_REDO_INDEX=
589{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
590 "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
591
592static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
593{LOGRECTYPE_VARIABLE_LENGTH, 0,
594 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
595 NULL, write_hook_for_redo, NULL, 0,
596 "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
597
598static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
599{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
600 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
601 NULL, write_hook_for_redo, NULL, 0,
602 "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
603
604static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
605{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
606 "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
607
608static LOG_DESC INIT_LOGREC_CLR_END=
609{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
610 CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
611 "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
612
613static LOG_DESC INIT_LOGREC_PURGE_END=
614{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
615 "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
616
617static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
618{LOGRECTYPE_VARIABLE_LENGTH, 0,
619 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
620 NULL, write_hook_for_undo_row_insert, NULL, 1,
621 "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
622
623static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
624{LOGRECTYPE_VARIABLE_LENGTH, 0,
625 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
626 NULL, write_hook_for_undo_row_delete, NULL, 1,
627 "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
628
629static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
630{LOGRECTYPE_VARIABLE_LENGTH, 0,
631 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
632 NULL, write_hook_for_undo_row_update, NULL, 1,
633 "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
634
635static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
636{LOGRECTYPE_VARIABLE_LENGTH, 0,
637 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
638 NULL, write_hook_for_undo_key_insert, NULL, 1,
639 "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
640
641/* This will never be in the log, only in the clr */
642static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
643{LOGRECTYPE_VARIABLE_LENGTH, 0,
644 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
645 NULL, write_hook_for_undo_key, NULL, 1,
646 "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
647
648static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
649{LOGRECTYPE_VARIABLE_LENGTH, 0,
650 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
651 NULL, write_hook_for_undo_key_delete, NULL, 1,
652 "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
653
654static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
655{LOGRECTYPE_VARIABLE_LENGTH, 0,
656 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
657 NULL, write_hook_for_undo_key_delete, NULL, 1,
658 "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
659
660static LOG_DESC INIT_LOGREC_PREPARE=
661{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
662 "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
663
664static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
665{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
666 "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
667
668static LOG_DESC INIT_LOGREC_COMMIT=
669{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
670 write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
671 NULL};
672
673static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
674{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
675 "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
676
677static LOG_DESC INIT_LOGREC_CHECKPOINT=
678{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
679 "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
680
681static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
682{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
683"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
684
685static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
686{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
687 "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
688
689static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
690{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
691 "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
692
693static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
694{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
695 NULL, write_hook_for_redo_delete_all, NULL, 0,
696 "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
697
698static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
699{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
700 NULL, NULL, NULL, 0,
701 "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
702
703static LOG_DESC INIT_LOGREC_FILE_ID=
704{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
705 "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
706
707static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
708{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
709 "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
710
711static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
712{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
713 NULL, NULL, NULL, 0,
714 "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
715
716static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
717{LOGRECTYPE_FIXEDLENGTH, 0, 0,
718 NULL, NULL, NULL, 0,
719 "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
720
721static LOG_DESC INIT_LOGREC_UNDO_BULK_INSERT=
722{LOGRECTYPE_VARIABLE_LENGTH, 0,
723 LSN_STORE_SIZE + FILEID_STORE_SIZE,
724 NULL, write_hook_for_undo_bulk_insert, NULL, 1,
725 "undo_bulk_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
726
727static LOG_DESC INIT_LOGREC_REDO_BITMAP_NEW_PAGE=
728{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
729 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
730 NULL, NULL, NULL, 0,
731 "redo_create_bitmap", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
732
733static LOG_DESC INIT_LOGREC_IMPORTED_TABLE=
734{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
735 "imported_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
736
737static LOG_DESC INIT_LOGREC_DEBUG_INFO=
738{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
739 "info", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
740
741const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
742
743void translog_table_init()
744{
745 int i;
746 log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
747 INIT_LOGREC_RESERVED_FOR_CHUNKS23;
748 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
749 INIT_LOGREC_REDO_INSERT_ROW_HEAD;
750 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
751 INIT_LOGREC_REDO_INSERT_ROW_TAIL;
752 log_record_type_descriptor[LOGREC_REDO_NEW_ROW_HEAD]=
753 INIT_LOGREC_REDO_NEW_ROW_HEAD;
754 log_record_type_descriptor[LOGREC_REDO_NEW_ROW_TAIL]=
755 INIT_LOGREC_REDO_NEW_ROW_TAIL;
756 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
757 INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
758 log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
759 INIT_LOGREC_REDO_PURGE_ROW_HEAD;
760 log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
761 INIT_LOGREC_REDO_PURGE_ROW_TAIL;
762 log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
763 INIT_LOGREC_REDO_FREE_BLOCKS;
764 log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
765 INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
766 log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
767 INIT_LOGREC_REDO_DELETE_ROW;
768 log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
769 INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
770 log_record_type_descriptor[LOGREC_REDO_INDEX]=
771 INIT_LOGREC_REDO_INDEX;
772 log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
773 INIT_LOGREC_REDO_INDEX_NEW_PAGE;
774 log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
775 INIT_LOGREC_REDO_INDEX_FREE_PAGE;
776 log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
777 INIT_LOGREC_REDO_UNDELETE_ROW;
778 log_record_type_descriptor[LOGREC_CLR_END]=
779 INIT_LOGREC_CLR_END;
780 log_record_type_descriptor[LOGREC_PURGE_END]=
781 INIT_LOGREC_PURGE_END;
782 log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
783 INIT_LOGREC_UNDO_ROW_INSERT;
784 log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
785 INIT_LOGREC_UNDO_ROW_DELETE;
786 log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
787 INIT_LOGREC_UNDO_ROW_UPDATE;
788 log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
789 INIT_LOGREC_UNDO_KEY_INSERT;
790 log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
791 INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
792 log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
793 INIT_LOGREC_UNDO_KEY_DELETE;
794 log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
795 INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
796 log_record_type_descriptor[LOGREC_PREPARE]=
797 INIT_LOGREC_PREPARE;
798 log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
799 INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
800 log_record_type_descriptor[LOGREC_COMMIT]=
801 INIT_LOGREC_COMMIT;
802 log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
803 INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
804 log_record_type_descriptor[LOGREC_CHECKPOINT]=
805 INIT_LOGREC_CHECKPOINT;
806 log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
807 INIT_LOGREC_REDO_CREATE_TABLE;
808 log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
809 INIT_LOGREC_REDO_RENAME_TABLE;
810 log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
811 INIT_LOGREC_REDO_DROP_TABLE;
812 log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
813 INIT_LOGREC_REDO_DELETE_ALL;
814 log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
815 INIT_LOGREC_REDO_REPAIR_TABLE;
816 log_record_type_descriptor[LOGREC_FILE_ID]=
817 INIT_LOGREC_FILE_ID;
818 log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
819 INIT_LOGREC_LONG_TRANSACTION_ID;
820 log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
821 INIT_LOGREC_INCOMPLETE_LOG;
822 log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
823 INIT_LOGREC_INCOMPLETE_GROUP;
824 log_record_type_descriptor[LOGREC_UNDO_BULK_INSERT]=
825 INIT_LOGREC_UNDO_BULK_INSERT;
826 log_record_type_descriptor[LOGREC_REDO_BITMAP_NEW_PAGE]=
827 INIT_LOGREC_REDO_BITMAP_NEW_PAGE;
828 log_record_type_descriptor[LOGREC_IMPORTED_TABLE]=
829 INIT_LOGREC_IMPORTED_TABLE;
830 log_record_type_descriptor[LOGREC_DEBUG_INFO]=
831 INIT_LOGREC_DEBUG_INFO;
832
833 for (i= LOGREC_FIRST_FREE; i < LOGREC_NUMBER_OF_TYPES; i++)
834 log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
835#ifndef DBUG_OFF
836 check_translog_description_table(LOGREC_FIRST_FREE -1);
837#endif
838}
839
840
841/* all possible flags page overheads */
842static uint page_overhead[TRANSLOG_FLAGS_NUM];
843
844typedef struct st_translog_validator_data
845{
846 TRANSLOG_ADDRESS *addr;
847 my_bool was_recovered;
848} TRANSLOG_VALIDATOR_DATA;
849
850
851/*
852 Check cursor/buffer consistence
853
854 SYNOPSIS
855 translog_check_cursor
856 cursor cursor which will be checked
857*/
858
859static void translog_check_cursor(struct st_buffer_cursor *cursor
860 __attribute__((unused)))
861{
862 DBUG_ASSERT(cursor->chaser ||
863 ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
864 cursor->buffer->size));
865 DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
866 DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
867 cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
868 DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
869}
870
871
872/**
873 @brief switch the loghandler in read only mode in case of write error
874*/
875
876void translog_stop_writing()
877{
878 DBUG_ENTER("translog_stop_writing");
879 DBUG_PRINT("error", ("errno: %d my_errno: %d", errno, my_errno));
880 translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
881 TRANSLOG_UNINITED :
882 TRANSLOG_READONLY);
883 log_descriptor.is_everything_flushed= 1;
884 log_descriptor.open_flags= O_BINARY | O_RDONLY;
885 DBUG_ASSERT(0);
886 DBUG_VOID_RETURN;
887}
888
889
890/*
891 @brief Get file name of the log by log number
892
893 @param file_no Number of the log we want to open
894 @param path Pointer to buffer where file name will be
895 stored (must be FN_REFLEN bytes at least)
896
897 @return pointer to path
898*/
899
900char *translog_filename_by_fileno(uint32 file_no, char *path)
901{
902 char buff[11], *end;
903 uint length;
904 DBUG_ENTER("translog_filename_by_fileno");
905 DBUG_ASSERT(file_no <= 0xfffffff);
906
907 /* log_descriptor.directory is already formated */
908 end= strxmov(path, log_descriptor.directory, "aria_log.0000000", NullS);
909 length= (uint) (int10_to_str(file_no, buff, 10) - buff);
910 strmov(end - length +1, buff);
911
912 DBUG_PRINT("info", ("Path: '%s' path: %p", path, path));
913 DBUG_RETURN(path);
914}
915
916
917/**
918 @brief Create log file with given number without cache
919
920 @param file_no Number of the log we want to open
921
922 retval -1 error
923 retval # file descriptor number
924*/
925
926static File create_logfile_by_number_no_cache(uint32 file_no)
927{
928 File file;
929 char path[FN_REFLEN];
930 DBUG_ENTER("create_logfile_by_number_no_cache");
931
932 if (translog_status != TRANSLOG_OK)
933 DBUG_RETURN(-1);
934
935 /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
936 if ((file= mysql_file_create(key_file_translog,
937 translog_filename_by_fileno(file_no, path),
938 0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0)
939 {
940 DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
941 translog_stop_writing();
942 DBUG_RETURN(-1);
943 }
944 if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
945 sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
946 {
947 DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
948 errno, log_descriptor.directory));
949 mysql_file_close(file, MYF(0));
950 translog_stop_writing();
951 DBUG_RETURN(-1);
952 }
953 DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
954 DBUG_RETURN(file);
955}
956
957/**
958 @brief Open (not create) log file with given number without cache
959
960 @param file_no Number of the log we want to open
961
962 retval -1 error
963 retval # file descriptor number
964*/
965
966static File open_logfile_by_number_no_cache(uint32 file_no)
967{
968 File file;
969 char path[FN_REFLEN];
970 DBUG_ENTER("open_logfile_by_number_no_cache");
971
972 /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
973 /* TODO: use mysql_file_create() */
974 if ((file= mysql_file_open(key_file_translog,
975 translog_filename_by_fileno(file_no, path),
976 log_descriptor.open_flags,
977 MYF(MY_WME))) < 0)
978 {
979 DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
980 DBUG_RETURN(-1);
981 }
982 DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
983 DBUG_RETURN(file);
984}
985
986
987/**
988 @brief get file descriptor by given number using cache
989
990 @param file_no Number of the log we want to open
991
992 retval # file descriptor
993 retval NULL file is not opened
994*/
995
996static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
997{
998 TRANSLOG_FILE *file;
999 DBUG_ENTER("get_logfile_by_number");
1000 mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1001 if (log_descriptor.max_file - file_no >=
1002 log_descriptor.open_files.elements)
1003 {
1004 DBUG_PRINT("info", ("File #%u is not opened", file_no));
1005 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1006 DBUG_RETURN(NULL);
1007 }
1008 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1009 log_descriptor.open_files.elements);
1010 DBUG_ASSERT(log_descriptor.max_file >= file_no);
1011 DBUG_ASSERT(log_descriptor.min_file <= file_no);
1012
1013 file= *dynamic_element(&log_descriptor.open_files,
1014 log_descriptor.max_file - file_no, TRANSLOG_FILE **);
1015 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1016 DBUG_PRINT("info", ("File %p File no: %u, File handler: %d",
1017 file, file_no,
1018 (file ? file->handler.file : -1)));
1019 DBUG_ASSERT(!file || file->number == file_no);
1020 DBUG_RETURN(file);
1021}
1022
1023
1024/**
1025 @brief get current file descriptor
1026
1027 retval # file descriptor
1028*/
1029
1030static TRANSLOG_FILE *get_current_logfile()
1031{
1032 TRANSLOG_FILE *file;
1033 DBUG_ENTER("get_current_logfile");
1034 mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1035 DBUG_PRINT("info", ("max_file: %lu min_file: %lu open_files: %lu",
1036 (ulong) log_descriptor.max_file,
1037 (ulong) log_descriptor.min_file,
1038 (ulong) log_descriptor.open_files.elements));
1039 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1040 log_descriptor.open_files.elements);
1041 file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
1042 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1043 DBUG_RETURN(file);
1044}
1045
1046uchar maria_trans_file_magic[]=
1047{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
1048 'L', 'O', 'G' };
1049#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
1050 8 + 4 + 4 + 4 + 2 + 3 + \
1051 LSN_STORE_SIZE)
1052
1053
1054/*
1055 Write log file page header in the just opened new log file
1056
1057 SYNOPSIS
1058 translog_write_file_header();
1059
1060 NOTES
1061 First page is just a marker page; We don't store any real log data in it.
1062
1063 RETURN
1064 0 OK
1065 1 ERROR
1066*/
1067
1068static my_bool translog_write_file_header()
1069{
1070 TRANSLOG_FILE *file;
1071 ulonglong timestamp;
1072 uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
1073 my_bool rc;
1074 DBUG_ENTER("translog_write_file_header");
1075
1076 /* file tag */
1077 memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
1078 page+= sizeof(maria_trans_file_magic);
1079 /* timestamp */
1080 timestamp= my_hrtime().val;
1081 int8store(page, timestamp);
1082 page+= 8;
1083 /* maria version */
1084 int4store(page, TRANSLOG_VERSION_ID);
1085 page+= 4;
1086 /* mysql version (MYSQL_VERSION_ID) */
1087 int4store(page, log_descriptor.server_version);
1088 page+= 4;
1089 /* server ID */
1090 int4store(page, log_descriptor.server_id);
1091 page+= 4;
1092 /* loghandler page_size */
1093 int2store(page, TRANSLOG_PAGE_SIZE - 1);
1094 page+= 2;
1095 /* file number */
1096 int3store(page, LSN_FILE_NO(log_descriptor.horizon));
1097 page+= 3;
1098 lsn_store(page, LSN_IMPOSSIBLE);
1099 page+= LSN_STORE_SIZE;
1100 memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
1101
1102 file= get_current_logfile();
1103 rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
1104 log_write_flags) != 0;
1105 /*
1106 Dropping the flag in such way can make false alarm: signalling than the
1107 file in not sync when it is sync, but the situation is quite rare and
1108 protections with mutexes give much more overhead to the whole engine
1109 */
1110 file->is_sync= 0;
1111 DBUG_RETURN(rc);
1112}
1113
1114/*
1115 @brief write the new LSN on the given file header
1116
1117 @param file The file descriptor
1118 @param lsn That LSN which should be written
1119
1120 @retval 0 OK
1121 @retval 1 Error
1122*/
1123
1124static my_bool translog_max_lsn_to_header(File file, LSN lsn)
1125{
1126 uchar lsn_buff[LSN_STORE_SIZE];
1127 my_bool rc;
1128 DBUG_ENTER("translog_max_lsn_to_header");
1129 DBUG_PRINT("enter", ("File descriptor: %ld "
1130 "lsn: " LSN_FMT,
1131 (long) file,
1132 LSN_IN_PARTS(lsn)));
1133
1134 lsn_store(lsn_buff, lsn);
1135
1136 rc= (my_pwrite(file, lsn_buff,
1137 LSN_STORE_SIZE,
1138 (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
1139 log_write_flags) != 0 ||
1140 mysql_file_sync(file, MYF(MY_WME)) != 0);
1141 /*
1142 We should not increase counter in case of error above, but it is so
1143 unlikely that we can ignore this case
1144 */
1145 translog_syncs++;
1146 DBUG_RETURN(rc);
1147}
1148
1149
1150/*
1151 @brief Extract hander file information from loghandler file page
1152
1153 @param desc header information descriptor to be filled with information
1154 @param page_buff buffer with the page content
1155*/
1156
1157void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
1158 uchar *page_buff)
1159{
1160 uchar *ptr;
1161
1162 ptr= page_buff + sizeof(maria_trans_file_magic);
1163 desc->timestamp= uint8korr(ptr);
1164 ptr+= 8;
1165 desc->maria_version= uint4korr(ptr);
1166 ptr+= 4;
1167 desc->mysql_version= uint4korr(ptr);
1168 ptr+= 4;
1169 desc->server_id= uint4korr(ptr + 4);
1170 ptr+= 4;
1171 desc->page_size= uint2korr(ptr) + 1;
1172 ptr+= 2;
1173 desc->file_number= uint3korr(ptr);
1174 ptr+=3;
1175 desc->max_lsn= lsn_korr(ptr);
1176}
1177
1178
1179/*
1180 @brief Read hander file information from loghandler file
1181
1182 @param desc header information descriptor to be filled with information
1183 @param file file descriptor to read
1184
1185 @retval 0 OK
1186 @retval 1 Error
1187*/
1188
1189my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
1190{
1191 uchar page_buff[LOG_HEADER_DATA_SIZE];
1192 DBUG_ENTER("translog_read_file_header");
1193
1194 if (mysql_file_pread(file, page_buff,
1195 sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
1196 {
1197 DBUG_PRINT("info", ("log read fail error: %d", my_errno));
1198 DBUG_RETURN(1);
1199 }
1200 translog_interpret_file_header(desc, page_buff);
1201 DBUG_PRINT("info", ("timestamp: %llu aria ver: %lu mysql ver: %lu "
1202 "server id %lu page size %lu file number %lu "
1203 "max lsn: " LSN_FMT,
1204 (ulonglong) desc->timestamp,
1205 (ulong) desc->maria_version,
1206 (ulong) desc->mysql_version,
1207 (ulong) desc->server_id,
1208 desc->page_size, (ulong) desc->file_number,
1209 LSN_IN_PARTS(desc->max_lsn)));
1210 DBUG_RETURN(0);
1211}
1212
1213
1214/*
1215 @brief set the lsn to the files from_file - to_file if it is greater
1216 then written in the file
1217
1218 @param from_file first file number (min)
1219 @param to_file last file number (max)
1220 @param lsn the lsn for writing
1221 @param is_locked true if current thread locked the log handler
1222
1223 @retval 0 OK
1224 @retval 1 Error
1225*/
1226
1227static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
1228 LSN lsn, my_bool is_locked)
1229{
1230 uint32 file;
1231 DBUG_ENTER("translog_set_lsn_for_files");
1232 DBUG_PRINT("enter", ("From: %lu to: %lu lsn: " LSN_FMT " locked: %d",
1233 (ulong) from_file, (ulong) to_file,
1234 LSN_IN_PARTS(lsn),
1235 is_locked));
1236 DBUG_ASSERT(from_file <= to_file);
1237 DBUG_ASSERT(from_file > 0); /* we have not file 0 */
1238
1239 /* Checks the current file (not finished yet file) */
1240 if (!is_locked)
1241 translog_lock();
1242 if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
1243 {
1244 if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
1245 log_descriptor.max_lsn= lsn;
1246 to_file--;
1247 }
1248 if (!is_locked)
1249 translog_unlock();
1250
1251 /* Checks finished files if they are */
1252 mysql_mutex_lock(&log_descriptor.file_header_lock);
1253 for (file= from_file; file <= to_file; file++)
1254 {
1255 LOGHANDLER_FILE_INFO info;
1256 File fd;
1257
1258 fd= open_logfile_by_number_no_cache(file);
1259 if ((fd < 0) ||
1260 ((translog_read_file_header(&info, fd) ||
1261 (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
1262 translog_max_lsn_to_header(fd, lsn))) |
1263 mysql_file_close(fd, MYF(MY_WME))))
1264 {
1265 translog_stop_writing();
1266 mysql_mutex_unlock(&log_descriptor.file_header_lock);
1267 DBUG_RETURN(1);
1268 }
1269 }
1270 mysql_mutex_unlock(&log_descriptor.file_header_lock);
1271
1272 DBUG_RETURN(0);
1273}
1274
1275
1276/* descriptor of file in unfinished_files */
1277struct st_file_counter
1278{
1279 uint32 file; /* file number */
1280 uint32 counter; /* counter for started writes */
1281};
1282
1283
1284/*
1285 @brief mark file "in progress" (for multi-group records)
1286
1287 @param file log file number
1288*/
1289
1290static void translog_mark_file_unfinished(uint32 file)
1291{
1292 int place, i;
1293 struct st_file_counter fc, *fc_ptr;
1294
1295 DBUG_ENTER("translog_mark_file_unfinished");
1296 DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1297
1298 fc.file= file; fc.counter= 1;
1299 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1300
1301 if (log_descriptor.unfinished_files.elements == 0)
1302 {
1303 insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1304 DBUG_PRINT("info", ("The first element inserted"));
1305 goto end;
1306 }
1307
1308 for (place= log_descriptor.unfinished_files.elements - 1;
1309 place >= 0;
1310 place--)
1311 {
1312 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1313 place, struct st_file_counter *);
1314 if (fc_ptr->file <= file)
1315 break;
1316 }
1317
1318 if (place >= 0 && fc_ptr->file == file)
1319 {
1320 fc_ptr->counter++;
1321 DBUG_PRINT("info", ("counter increased"));
1322 goto end;
1323 }
1324
1325 if (place == (int)log_descriptor.unfinished_files.elements)
1326 {
1327 insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1328 DBUG_PRINT("info", ("The last element inserted"));
1329 goto end;
1330 }
1331 /* shift and assign new element */
1332 insert_dynamic(&log_descriptor.unfinished_files,
1333 (uchar*)
1334 dynamic_element(&log_descriptor.unfinished_files,
1335 log_descriptor.unfinished_files.elements- 1,
1336 struct st_file_counter *));
1337 for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
1338 {
1339 /* we do not use set_dynamic() to avoid unneeded checks */
1340 memcpy(dynamic_element(&log_descriptor.unfinished_files,
1341 i, struct st_file_counter *),
1342 dynamic_element(&log_descriptor.unfinished_files,
1343 i + 1, struct st_file_counter *),
1344 sizeof(struct st_file_counter));
1345 }
1346 memcpy(dynamic_element(&log_descriptor.unfinished_files,
1347 place + 1, struct st_file_counter *),
1348 &fc, sizeof(struct st_file_counter));
1349end:
1350 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1351 DBUG_VOID_RETURN;
1352}
1353
1354
1355/*
1356 @brief remove file mark "in progress" (for multi-group records)
1357
1358 @param file log file number
1359*/
1360
1361static void translog_mark_file_finished(uint32 file)
1362{
1363 int i;
1364 struct st_file_counter *UNINIT_VAR(fc_ptr);
1365 DBUG_ENTER("translog_mark_file_finished");
1366 DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1367
1368 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1369
1370 DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
1371 for (i= 0;
1372 i < (int) log_descriptor.unfinished_files.elements;
1373 i++)
1374 {
1375 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1376 i, struct st_file_counter *);
1377 if (fc_ptr->file == file)
1378 {
1379 break;
1380 }
1381 }
1382 DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
1383
1384 if (! --fc_ptr->counter)
1385 delete_dynamic_element(&log_descriptor.unfinished_files, i);
1386 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1387 DBUG_VOID_RETURN;
1388}
1389
1390
1391/*
1392 @brief get max LSN of the record which parts stored in this file
1393
1394 @param file file number
1395
1396 @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
1397 @retval LSN_IMPOSSIBLE File is still not finished
1398 @retval LSN_ERROR Error opening file
1399 @retval # LSN of the record which parts stored in this file
1400*/
1401
1402LSN translog_get_file_max_lsn_stored(uint32 file)
1403{
1404 uint32 limit= FILENO_IMPOSSIBLE;
1405 DBUG_ENTER("translog_get_file_max_lsn_stored");
1406 DBUG_PRINT("enter", ("file: %lu", (ulong)file));
1407 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
1408 translog_status == TRANSLOG_READONLY);
1409
1410 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1411
1412 /* find file with minimum file number "in progress" */
1413 if (log_descriptor.unfinished_files.elements > 0)
1414 {
1415 struct st_file_counter *fc_ptr;
1416 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1417 0, struct st_file_counter *);
1418 limit= fc_ptr->file; /* minimal file number "in progress" */
1419 }
1420 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1421
1422 /*
1423 if there is no "in progress file" then unfinished file is in progress
1424 for sure
1425 */
1426 if (limit == FILENO_IMPOSSIBLE)
1427 {
1428 TRANSLOG_ADDRESS horizon= translog_get_horizon();
1429 limit= LSN_FILE_NO(horizon);
1430 }
1431
1432 if (file >= limit)
1433 {
1434 DBUG_PRINT("info", ("The file in in progress"));
1435 DBUG_RETURN(LSN_IMPOSSIBLE);
1436 }
1437
1438 {
1439 LOGHANDLER_FILE_INFO info;
1440 File fd;
1441
1442 fd= open_logfile_by_number_no_cache(file);
1443 if(fd < 0)
1444 {
1445 DBUG_PRINT("error", ("Can't open file"));
1446 DBUG_RETURN(LSN_ERROR);
1447 }
1448
1449 if (translog_read_file_header(&info, fd))
1450 {
1451 DBUG_PRINT("error", ("Can't read file header"));
1452 info.max_lsn= LSN_ERROR;
1453 }
1454
1455 if (mysql_file_close(fd, MYF(MY_WME)))
1456 {
1457 DBUG_PRINT("error", ("Can't close file"));
1458 info.max_lsn= LSN_ERROR;
1459 }
1460
1461 DBUG_PRINT("info", ("Max lsn: " LSN_FMT, LSN_IN_PARTS(info.max_lsn)));
1462 DBUG_RETURN(info.max_lsn);
1463 }
1464}
1465
1466/*
1467 Initialize transaction log file buffer
1468
1469 SYNOPSIS
1470 translog_buffer_init()
1471 buffer The buffer to initialize
1472 num Number of this buffer
1473
1474 RETURN
1475 0 OK
1476 1 Error
1477*/
1478
1479static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
1480{
1481 DBUG_ENTER("translog_buffer_init");
1482 buffer->pre_force_close_horizon=
1483 buffer->prev_last_lsn= buffer->last_lsn=
1484 LSN_IMPOSSIBLE;
1485 DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
1486 buffer));
1487
1488 buffer->buffer_no= (uint8) num;
1489 /* This Buffer File */
1490 buffer->file= NULL;
1491 buffer->overlay= 0;
1492 /* cache for current log */
1493 memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1494 /* Buffer size */
1495 buffer->size= 0;
1496 buffer->skipped_data= 0;
1497 /* cond of thread which is waiting for buffer filling */
1498 if (mysql_cond_init(key_TRANSLOG_BUFFER_waiting_filling_buffer,
1499 &buffer->waiting_filling_buffer, 0))
1500 DBUG_RETURN(1);
1501 /* Number of records which are in copy progress */
1502 buffer->copy_to_buffer_in_progress= 0;
1503 /* list of waiting buffer ready threads */
1504 buffer->waiting_flush= 0;
1505 /*
1506 Buffers locked by the following mutex. As far as buffers create logical
1507 circle (after last buffer goes first) it trigger false alarm of deadlock
1508 detect system, so we remove check of deadlock for this buffers. Indeed
1509 all mutex locks concentrated around current buffer except flushing
1510 thread (but it is only one thread). One thread can't take more then
1511 2 buffer locks at once. So deadlock is impossible here.
1512
1513 To prevent false alarm of dead lock detection we switch dead lock
1514 detection for one buffer in the middle of the buffers chain. Excluding
1515 only one of eight buffers from deadlock detection hardly can hide other
1516 possible problems which include this mutexes.
1517 */
1518
1519 if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
1520 &buffer->mutex, MY_MUTEX_INIT_FAST) ||
1521 mysql_cond_init(key_TRANSLOG_BUFFER_prev_sent_to_disk_cond,
1522 &buffer->prev_sent_to_disk_cond, 0))
1523 DBUG_RETURN(1);
1524 mysql_mutex_setflags(&buffer->mutex, MYF_NO_DEADLOCK_DETECTION);
1525 buffer->is_closing_buffer= 0;
1526 buffer->prev_sent_to_disk= LSN_IMPOSSIBLE;
1527 buffer->prev_buffer_offset= LSN_IMPOSSIBLE;
1528 buffer->ver= 0;
1529 DBUG_RETURN(0);
1530}
1531
1532
1533/*
1534 @brief close transaction log file by descriptor
1535
1536 @param file pagegecache file descriptor reference
1537
1538 @return Operation status
1539 @retval 0 OK
1540 @retval 1 Error
1541*/
1542
1543static my_bool translog_close_log_file(TRANSLOG_FILE *file)
1544{
1545 int rc= 0;
1546 flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
1547 FLUSH_RELEASE);
1548 /*
1549 Sync file when we close it
1550 TODO: sync only we have changed the log
1551 */
1552 if (!file->is_sync)
1553 {
1554 rc= mysql_file_sync(file->handler.file, MYF(MY_WME));
1555 translog_syncs++;
1556 }
1557 rc|= mysql_file_close(file->handler.file, MYF(MY_WME));
1558 my_free(file);
1559 return MY_TEST(rc);
1560}
1561
1562
1563/**
1564 @brief Initializes TRANSLOG_FILE structure
1565
1566 @param file reference on the file to initialize
1567 @param number file number
1568 @param is_sync is file synced on disk
1569*/
1570
1571static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
1572 my_bool is_sync)
1573{
1574 pagecache_file_set_null_hooks(&file->handler);
1575 file->handler.post_read_hook= translog_page_validator;
1576 file->handler.flush_log_callback= maria_flush_log_for_page_none;
1577 file->handler.callback_data= (uchar*)file;
1578
1579 file->number= number;
1580 file->was_recovered= 0;
1581 file->is_sync= is_sync;
1582}
1583
1584
1585/**
1586 @brief Create and fill header of new file.
1587
1588 @note the caller must call it right after it has increased
1589 log_descriptor.horizon to the new file
1590 (log_descriptor.horizon+= LSN_ONE_FILE)
1591
1592
1593 @retval 0 OK
1594 @retval 1 Error
1595*/
1596
1597static my_bool translog_create_new_file()
1598{
1599 TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
1600 MYF(0));
1601
1602 TRANSLOG_FILE *old= get_current_logfile();
1603 uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1604 DBUG_ENTER("translog_create_new_file");
1605
1606 if (file == NULL)
1607 goto error;
1608
1609 /*
1610 Writes max_lsn to the file header before finishing it (there is no need
1611 to lock file header buffer because it is still unfinished file, so only
1612 one thread can finish the file and nobody interested of LSN of current
1613 (unfinished) file, because no one can purge it).
1614 */
1615 if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
1616 goto error;
1617
1618 mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
1619 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1620 log_descriptor.open_files.elements);
1621 DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
1622 if (allocate_dynamic(&log_descriptor.open_files,
1623 log_descriptor.max_file - log_descriptor.min_file + 2))
1624 goto error_lock;
1625
1626 /* this call just expand the array */
1627 if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
1628 goto error_lock;
1629
1630 if ((file->handler.file= create_logfile_by_number_no_cache(file_no)) == -1)
1631 goto error_lock;
1632 translog_file_init(file, file_no, 0);
1633
1634 log_descriptor.max_file++;
1635 {
1636 char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
1637 TRANSLOG_FILE**);
1638 memmove(start + sizeof(TRANSLOG_FILE*), start,
1639 sizeof(TRANSLOG_FILE*) *
1640 (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
1641 }
1642 /* can't fail we because we expanded array */
1643 set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
1644 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1645 log_descriptor.open_files.elements);
1646 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1647
1648 DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
1649
1650 if (translog_write_file_header())
1651 DBUG_RETURN(1);
1652
1653 if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no,
1654 max_trid_in_control_file,
1655 recovery_failures))
1656 {
1657 translog_stop_writing();
1658 DBUG_RETURN(1);
1659 }
1660
1661 DBUG_RETURN(0);
1662
1663error_lock:
1664 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1665error:
1666 translog_stop_writing();
1667 my_free(file);
1668 DBUG_RETURN(1);
1669}
1670
1671
1672/**
1673 @brief Locks the loghandler buffer.
1674
1675 @param buffer This buffer which should be locked
1676
1677 @note See comment before buffer 'mutex' variable.
1678
1679 @retval 0 OK
1680 @retval 1 Error
1681*/
1682
1683static void translog_buffer_lock(struct st_translog_buffer *buffer)
1684{
1685 DBUG_ENTER("translog_buffer_lock");
1686 DBUG_PRINT("enter",
1687 ("Lock buffer #%u: %p", buffer->buffer_no,
1688 buffer));
1689 mysql_mutex_lock(&buffer->mutex);
1690 DBUG_VOID_RETURN;
1691}
1692
1693
1694/*
1695 Unlock the loghandler buffer
1696
1697 SYNOPSIS
1698 translog_buffer_unlock()
1699 buffer This buffer which should be unlocked
1700
1701 RETURN
1702 0 OK
1703 1 Error
1704*/
1705
1706static void translog_buffer_unlock(struct st_translog_buffer *buffer)
1707{
1708 DBUG_ENTER("translog_buffer_unlock");
1709 DBUG_PRINT("enter", ("Unlock buffer... #%u (%p)",
1710 (uint) buffer->buffer_no, buffer));
1711
1712 mysql_mutex_unlock(&buffer->mutex);
1713 DBUG_VOID_RETURN;
1714}
1715
1716
1717/*
1718 Write a header on the page
1719
1720 SYNOPSIS
1721 translog_new_page_header()
1722 horizon Where to write the page
1723 cursor Where to write the page
1724
1725 NOTE
1726 - space for page header should be checked before
1727*/
1728
1729static uchar translog_sector_random;
1730
1731static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
1732 struct st_buffer_cursor *cursor)
1733{
1734 uchar *ptr;
1735
1736 DBUG_ENTER("translog_new_page_header");
1737 DBUG_ASSERT(cursor->ptr);
1738
1739 cursor->protected= 0;
1740
1741 ptr= cursor->ptr;
1742 /* Page number */
1743 int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
1744 ptr+= 3;
1745 /* File number */
1746 int3store(ptr, LSN_FILE_NO(*horizon));
1747 ptr+= 3;
1748 DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
1749 cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
1750 ptr++;
1751 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1752 {
1753#ifndef DBUG_OFF
1754 DBUG_PRINT("info", ("write 0x11223344 CRC to " LSN_FMT,
1755 LSN_IN_PARTS(*horizon)));
1756 /* This will be overwritten by real CRC; This is just for debugging */
1757 int4store(ptr, 0x11223344);
1758#endif
1759 /* CRC will be put when page is finished */
1760 ptr+= CRC_SIZE;
1761 }
1762 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1763 {
1764 /*
1765 translog_sector_randmo works like "random" values producer because
1766 it is enough to have such "random" for this purpose and it will
1767 not interfere with higher level pseudo random value generator
1768 */
1769 ptr[0]= translog_sector_random++;
1770 ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1771 }
1772 {
1773 size_t len= (ptr - cursor->ptr);
1774 (*horizon)+= len; /* increasing the offset part of the address */
1775 cursor->current_page_fill= (uint16)len;
1776 if (!cursor->chaser)
1777 cursor->buffer->size+= (translog_size_t)len;
1778 }
1779 cursor->ptr= ptr;
1780 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
1781 "Horizon: " LSN_FMT,
1782 (uint) cursor->buffer->buffer_no, cursor->buffer,
1783 cursor->chaser, (ulong) cursor->buffer->size,
1784 (ulong) (cursor->ptr - cursor->buffer->buffer),
1785 LSN_IN_PARTS(*horizon)));
1786 translog_check_cursor(cursor);
1787 DBUG_VOID_RETURN;
1788}
1789
1790
1791/*
1792 Put sector protection on the page image
1793
1794 SYNOPSIS
1795 translog_put_sector_protection()
1796 page reference on the page content
1797 cursor cursor of the buffer
1798
1799 NOTES
1800 We put a sector protection on all following sectors on the page,
1801 except the first sector that is protected by page header.
1802*/
1803
1804static void translog_put_sector_protection(uchar *page,
1805 struct st_buffer_cursor *cursor)
1806{
1807 uchar *table= page + log_descriptor.page_overhead -
1808 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1809 uint i, offset;
1810 uint16 last_protected_sector= ((cursor->previous_offset - 1) /
1811 DISK_DRIVE_SECTOR_SIZE);
1812 uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
1813 uint8 value= table[0] + cursor->write_counter;
1814 DBUG_ENTER("translog_put_sector_protection");
1815
1816 if (start_sector == 0)
1817 {
1818 /* First sector is protected by file & page numbers in the page header. */
1819 start_sector= 1;
1820 }
1821
1822 DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
1823 "last protected:%u start sector:%u",
1824 (uint) cursor->write_counter,
1825 (uint) value,
1826 (uint) cursor->previous_offset,
1827 (uint) last_protected_sector, (uint) start_sector));
1828 if (last_protected_sector == start_sector)
1829 {
1830 i= last_protected_sector;
1831 offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1832 /* restore data, because we modified sector which was protected */
1833 if (offset < cursor->previous_offset)
1834 page[offset]= table[i];
1835 }
1836 for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
1837 i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1838 i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1839 {
1840 DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1841 i, offset, (uint) page[offset]));
1842 table[i]= page[offset];
1843 page[offset]= value;
1844 DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1845 i, offset, (uint) page[offset]));
1846 }
1847 DBUG_VOID_RETURN;
1848}
1849
1850
1851/*
1852 Calculate CRC32 of given area
1853
1854 SYNOPSIS
1855 translog_crc()
1856 area Pointer of the area beginning
1857 length The Area length
1858
1859 RETURN
1860 CRC32
1861*/
1862
1863static uint32 translog_crc(uchar *area, uint length)
1864{
1865 DBUG_ENTER("translog_crc");
1866 DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
1867}
1868
1869
1870/*
1871 Finish current page with zeros
1872
1873 SYNOPSIS
1874 translog_finish_page()
1875 horizon \ horizon & buffer pointers
1876 cursor /
1877*/
1878
1879static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
1880 struct st_buffer_cursor *cursor)
1881{
1882 uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
1883 uchar *page= cursor->ptr - cursor->current_page_fill;
1884 DBUG_ENTER("translog_finish_page");
1885 DBUG_PRINT("enter", ("Buffer: #%u %p "
1886 "Buffer addr: " LSN_FMT " "
1887 "Page addr: " LSN_FMT " "
1888 "size:%u (%u) Pg:%u left:%u",
1889 (uint) cursor->buffer_no, cursor->buffer,
1890 LSN_IN_PARTS(cursor->buffer->offset),
1891 (uint)LSN_FILE_NO(*horizon),
1892 (uint)(LSN_OFFSET(*horizon) -
1893 cursor->current_page_fill),
1894 (uint) cursor->buffer->size,
1895 (uint) (cursor->ptr -cursor->buffer->buffer),
1896 (uint) cursor->current_page_fill, (uint) left));
1897 DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
1898 translog_check_cursor(cursor);
1899 if (cursor->protected)
1900 {
1901 DBUG_PRINT("info", ("Already protected and finished"));
1902 DBUG_VOID_RETURN;
1903 }
1904 cursor->protected= 1;
1905
1906 DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
1907 if (left != 0)
1908 {
1909 DBUG_PRINT("info", ("left: %u", (uint) left));
1910 memset(cursor->ptr, TRANSLOG_FILLER, left);
1911 cursor->ptr+= left;
1912 (*horizon)+= left; /* offset increasing */
1913 if (!cursor->chaser)
1914 cursor->buffer->size+= left;
1915 /* We are finishing the page so reset the counter */
1916 cursor->current_page_fill= 0;
1917 DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
1918 "chaser: %d Size: %lu (%lu)",
1919 (uint) cursor->buffer->buffer_no,
1920 cursor->buffer, cursor->chaser,
1921 (ulong) cursor->buffer->size,
1922 (ulong) (cursor->ptr - cursor->buffer->buffer)));
1923 translog_check_cursor(cursor);
1924 }
1925 /*
1926 When we are finishing the page other thread might not finish the page
1927 header yet (in case if we started from the middle of the page) so we
1928 have to read log_descriptor.flags but not the flags from the page.
1929 */
1930 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1931 {
1932 translog_put_sector_protection(page, cursor);
1933 DBUG_PRINT("info", ("drop write_counter"));
1934 cursor->write_counter= 0;
1935 cursor->previous_offset= 0;
1936 }
1937 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1938 {
1939 uint32 crc= translog_crc(page + log_descriptor.page_overhead,
1940 TRANSLOG_PAGE_SIZE -
1941 log_descriptor.page_overhead);
1942 DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
1943 /* We have page number, file number and flag before crc */
1944 int4store(page + 3 + 3 + 1, crc);
1945 }
1946 DBUG_VOID_RETURN;
1947}
1948
1949
1950/*
1951 @brief Wait until all threads have finished closing this buffer.
1952
1953 @param buffer This buffer should be check
1954*/
1955
1956static void translog_wait_for_closing(struct st_translog_buffer *buffer)
1957{
1958 DBUG_ENTER("translog_wait_for_closing");
1959 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
1960 "is closing %u File: %d size: %lu",
1961 (uint) buffer->buffer_no, buffer,
1962 (uint) buffer->copy_to_buffer_in_progress,
1963 (uint) buffer->is_closing_buffer,
1964 (buffer->file ? buffer->file->handler.file : -1),
1965 (ulong) buffer->size));
1966 translog_buffer_lock_assert_owner(buffer);
1967
1968 while (buffer->is_closing_buffer)
1969 {
1970 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
1971 (uint) buffer->buffer_no, buffer));
1972 DBUG_ASSERT(buffer->file != NULL);
1973 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1974 DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
1975 (uint) buffer->buffer_no, buffer));
1976 }
1977
1978 DBUG_VOID_RETURN;
1979}
1980
1981
1982/*
1983 @brief Wait until all threads have finished filling this buffer.
1984
1985 @param buffer This buffer should be check
1986*/
1987
1988static void translog_wait_for_writers(struct st_translog_buffer *buffer)
1989{
1990 DBUG_ENTER("translog_wait_for_writers");
1991 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
1992 "is closing %u File: %d size: %lu",
1993 (uint) buffer->buffer_no, buffer,
1994 (uint) buffer->copy_to_buffer_in_progress,
1995 (uint) buffer->is_closing_buffer,
1996 (buffer->file ? buffer->file->handler.file : -1),
1997 (ulong) buffer->size));
1998 translog_buffer_lock_assert_owner(buffer);
1999
2000 while (buffer->copy_to_buffer_in_progress)
2001 {
2002 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2003 (uint) buffer->buffer_no, buffer));
2004 DBUG_ASSERT(buffer->file != NULL);
2005 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2006 DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
2007 (uint) buffer->buffer_no, buffer));
2008 }
2009
2010 DBUG_VOID_RETURN;
2011}
2012
2013
2014/*
2015
2016 Wait for buffer to become free
2017
2018 SYNOPSIS
2019 translog_wait_for_buffer_free()
2020 buffer The buffer we are waiting for
2021
2022 NOTE
2023 - this buffer should be locked
2024*/
2025
2026static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
2027{
2028 TRANSLOG_ADDRESS offset= buffer->offset;
2029 TRANSLOG_FILE *file= buffer->file;
2030 uint8 ver= buffer->ver;
2031 DBUG_ENTER("translog_wait_for_buffer_free");
2032 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
2033 "is closing %u File: %d size: %lu",
2034 (uint) buffer->buffer_no, buffer,
2035 (uint) buffer->copy_to_buffer_in_progress,
2036 (uint) buffer->is_closing_buffer,
2037 (buffer->file ? buffer->file->handler.file : -1),
2038 (ulong) buffer->size));
2039
2040 translog_wait_for_writers(buffer);
2041
2042 if (offset != buffer->offset || file != buffer->file || ver != buffer->ver)
2043 DBUG_VOID_RETURN; /* the buffer if already freed */
2044
2045 while (buffer->file != NULL)
2046 {
2047 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2048 (uint) buffer->buffer_no, buffer));
2049 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2050 DBUG_PRINT("info", ("wait for writers done. buffer: #%u %p",
2051 (uint) buffer->buffer_no, buffer));
2052 }
2053 DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
2054 DBUG_VOID_RETURN;
2055}
2056
2057
2058/*
2059 Initialize the cursor for a buffer
2060
2061 SYNOPSIS
2062 translog_cursor_init()
2063 buffer The buffer
2064 cursor It's cursor
2065 buffer_no Number of buffer
2066*/
2067
2068static void translog_cursor_init(struct st_buffer_cursor *cursor,
2069 struct st_translog_buffer *buffer,
2070 uint8 buffer_no)
2071{
2072 DBUG_ENTER("translog_cursor_init");
2073 cursor->ptr= buffer->buffer;
2074 cursor->buffer= buffer;
2075 cursor->buffer_no= buffer_no;
2076 cursor->current_page_fill= 0;
2077 cursor->chaser= (cursor != &log_descriptor.bc);
2078 cursor->write_counter= 0;
2079 cursor->previous_offset= 0;
2080 cursor->protected= 0;
2081 DBUG_VOID_RETURN;
2082}
2083
2084
2085/*
2086 @brief Initialize buffer for the current file, and a cursor for this buffer.
2087
2088 @param buffer The buffer
2089 @param cursor It's cursor
2090 @param buffer_no Number of buffer
2091*/
2092
2093static void translog_start_buffer(struct st_translog_buffer *buffer,
2094 struct st_buffer_cursor *cursor,
2095 uint buffer_no)
2096{
2097 DBUG_ENTER("translog_start_buffer");
2098 DBUG_PRINT("enter",
2099 ("Assign buffer: #%u (%p) offset: 0x%x(%u)",
2100 (uint) buffer->buffer_no, buffer,
2101 (uint) LSN_OFFSET(log_descriptor.horizon),
2102 (uint) LSN_OFFSET(log_descriptor.horizon)));
2103 DBUG_ASSERT(buffer_no == buffer->buffer_no);
2104 buffer->pre_force_close_horizon=
2105 buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
2106 DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
2107 buffer));
2108 buffer->offset= log_descriptor.horizon;
2109 buffer->next_buffer_offset= LSN_IMPOSSIBLE;
2110 buffer->file= get_current_logfile();
2111 buffer->overlay= 0;
2112 buffer->size= 0;
2113 buffer->skipped_data= 0;
2114 translog_cursor_init(cursor, buffer, buffer_no);
2115 DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: %p "
2116 "chaser: %d Size: %lu (%lu)",
2117 (long) (buffer->file ? buffer->file->number : 0),
2118 (buffer->file ? buffer->file->handler.file : -1),
2119 (uint) cursor->buffer->buffer_no, cursor->buffer,
2120 cursor->chaser, (ulong) cursor->buffer->size,
2121 (ulong) (cursor->ptr - cursor->buffer->buffer)));
2122 translog_check_cursor(cursor);
2123 mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2124 log_descriptor.dirty_buffer_mask|= (1 << buffer->buffer_no);
2125 mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2126
2127 DBUG_VOID_RETURN;
2128}
2129
2130
2131/*
2132 @brief Switch to the next buffer in a chain.
2133
2134 @param horizon \ Pointers on current position in file and buffer
2135 @param cursor /
2136 @param new_file Also start new file
2137
2138 @note
2139 - loghandler should be locked
2140 - after return new and old buffer still are locked
2141
2142 @retval 0 OK
2143 @retval 1 Error
2144*/
2145
2146static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
2147 struct st_buffer_cursor *cursor,
2148 my_bool new_file)
2149{
2150 uint old_buffer_no= cursor->buffer_no;
2151 uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
2152 struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
2153 my_bool chasing= cursor->chaser;
2154 DBUG_ENTER("translog_buffer_next");
2155
2156 DBUG_PRINT("info", ("horizon: " LSN_FMT " chasing: %d",
2157 LSN_IN_PARTS(log_descriptor.horizon), chasing));
2158
2159 DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
2160
2161 translog_finish_page(horizon, cursor);
2162
2163 if (!chasing)
2164 {
2165 translog_buffer_lock(new_buffer);
2166#ifndef DBUG_OFF
2167 {
2168 TRANSLOG_ADDRESS offset= new_buffer->offset;
2169 TRANSLOG_FILE *file= new_buffer->file;
2170 uint8 ver= new_buffer->ver;
2171 translog_lock_assert_owner();
2172#endif
2173 translog_wait_for_buffer_free(new_buffer);
2174#ifndef DBUG_OFF
2175 /* We keep the handler locked so nobody can start this new buffer */
2176 DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
2177 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
2178 }
2179#endif
2180 }
2181 else
2182 DBUG_ASSERT(new_buffer->file != NULL);
2183
2184 if (new_file)
2185 {
2186 /* move the horizon to the next file and its header page */
2187 (*horizon)+= LSN_ONE_FILE;
2188 (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
2189 if (!chasing && translog_create_new_file())
2190 {
2191 DBUG_RETURN(1);
2192 }
2193 }
2194
2195 /* prepare next page */
2196 if (chasing)
2197 translog_cursor_init(cursor, new_buffer, new_buffer_no);
2198 else
2199 {
2200 translog_lock_assert_owner();
2201 translog_start_buffer(new_buffer, cursor, new_buffer_no);
2202 new_buffer->prev_buffer_offset=
2203 log_descriptor.buffers[old_buffer_no].offset;
2204 new_buffer->prev_last_lsn=
2205 BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
2206 }
2207 log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
2208 DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer:%p",
2209 LSN_IN_PARTS(new_buffer->prev_last_lsn),
2210 new_buffer));
2211 translog_new_page_header(horizon, cursor);
2212 DBUG_RETURN(0);
2213}
2214
2215
2216/*
2217 Sets max LSN sent to file, and address from which data is only in the buffer
2218
2219 SYNOPSIS
2220 translog_set_sent_to_disk()
2221 buffer buffer which we have sent to disk
2222
2223 TODO: use atomic operations if possible (64bit architectures?)
2224*/
2225
2226static void translog_set_sent_to_disk(struct st_translog_buffer *buffer)
2227{
2228 LSN lsn= buffer->last_lsn;
2229 TRANSLOG_ADDRESS in_buffers= buffer->next_buffer_offset;
2230
2231 DBUG_ENTER("translog_set_sent_to_disk");
2232 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2233 DBUG_PRINT("enter", ("lsn: " LSN_FMT " in_buffers: " LSN_FMT " "
2234 "in_buffers_only: " LSN_FMT " start: " LSN_FMT " "
2235 "sent_to_disk: " LSN_FMT,
2236 LSN_IN_PARTS(lsn),
2237 LSN_IN_PARTS(in_buffers),
2238 LSN_IN_PARTS(log_descriptor.log_start),
2239 LSN_IN_PARTS(log_descriptor.in_buffers_only),
2240 LSN_IN_PARTS(log_descriptor.sent_to_disk)));
2241 /*
2242 We write sequentially (first part of following assert) but we rewrite
2243 the same page in case we started mysql and shut it down immediately
2244 (second part of the following assert)
2245 */
2246 DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0 ||
2247 cmp_translog_addr(lsn, log_descriptor.log_start) < 0);
2248 log_descriptor.sent_to_disk= lsn;
2249 /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2250 if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2251 {
2252 log_descriptor.in_buffers_only= in_buffers;
2253 DBUG_PRINT("info", ("set new in_buffers_only"));
2254 }
2255 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2256 DBUG_VOID_RETURN;
2257}
2258
2259
2260/*
2261 Sets address from which data is only in the buffer
2262
2263 SYNOPSIS
2264 translog_set_only_in_buffers()
2265 lsn LSN to assign
2266 in_buffers to assign to in_buffers_only
2267*/
2268
2269static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
2270{
2271 DBUG_ENTER("translog_set_only_in_buffers");
2272 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2273 DBUG_PRINT("enter", ("in_buffers: " LSN_FMT " "
2274 "in_buffers_only: " LSN_FMT,
2275 LSN_IN_PARTS(in_buffers),
2276 LSN_IN_PARTS(log_descriptor.in_buffers_only)));
2277 /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2278 if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2279 {
2280 if (translog_status != TRANSLOG_OK)
2281 goto end;
2282 log_descriptor.in_buffers_only= in_buffers;
2283 DBUG_PRINT("info", ("set new in_buffers_only"));
2284 }
2285end:
2286 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2287 DBUG_VOID_RETURN;
2288}
2289
2290
2291/*
2292 Gets address from which data is only in the buffer
2293
2294 SYNOPSIS
2295 translog_only_in_buffers()
2296
2297 RETURN
2298 address from which data is only in the buffer
2299*/
2300
2301static TRANSLOG_ADDRESS translog_only_in_buffers()
2302{
2303 register TRANSLOG_ADDRESS addr;
2304 DBUG_ENTER("translog_only_in_buffers");
2305 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2306 addr= log_descriptor.in_buffers_only;
2307 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2308 DBUG_RETURN(addr);
2309}
2310
2311
2312/*
2313 Get max LSN sent to file
2314
2315 SYNOPSIS
2316 translog_get_sent_to_disk()
2317
2318 RETURN
2319 max LSN send to file
2320*/
2321
2322static LSN translog_get_sent_to_disk()
2323{
2324 register LSN lsn;
2325 DBUG_ENTER("translog_get_sent_to_disk");
2326 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2327 lsn= log_descriptor.sent_to_disk;
2328 DBUG_PRINT("info", ("sent to disk up to " LSN_FMT, LSN_IN_PARTS(lsn)));
2329 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2330 DBUG_RETURN(lsn);
2331}
2332
2333
2334/*
2335 Get first chunk address on the given page
2336
2337 SYNOPSIS
2338 translog_get_first_chunk_offset()
2339 page The page where to find first chunk
2340
2341 RETURN
2342 first chunk offset
2343*/
2344
2345static my_bool translog_get_first_chunk_offset(uchar *page)
2346{
2347 DBUG_ENTER("translog_get_first_chunk_offset");
2348 DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
2349 DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
2350}
2351
2352
2353/*
2354 Write coded length of record
2355
2356 SYNOPSIS
2357 translog_write_variable_record_1group_code_len
2358 dst Destination buffer pointer
2359 length Length which should be coded
2360 header_len Calculated total header length
2361*/
2362
2363static void
2364translog_write_variable_record_1group_code_len(uchar *dst,
2365 translog_size_t length,
2366 uint16 header_len)
2367{
2368 switch (header_len) {
2369 case 6: /* (5 + 1) */
2370 DBUG_ASSERT(length <= 250);
2371 *dst= (uint8) length;
2372 return;
2373 case 8: /* (5 + 3) */
2374 DBUG_ASSERT(length <= 0xFFFF);
2375 *dst= 251;
2376 int2store(dst + 1, length);
2377 return;
2378 case 9: /* (5 + 4) */
2379 DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
2380 *dst= 252;
2381 int3store(dst + 1, length);
2382 return;
2383 case 10: /* (5 + 5) */
2384 *dst= 253;
2385 int4store(dst + 1, length);
2386 return;
2387 default:
2388 DBUG_ASSERT(0);
2389 }
2390 return;
2391}
2392
2393
2394/*
2395 Decode record data length and advance given pointer to the next field
2396
2397 SYNOPSIS
2398 translog_variable_record_1group_decode_len()
2399 src The pointer to the pointer to the length beginning
2400
2401 RETURN
2402 decoded length
2403*/
2404
2405static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
2406{
2407 uint8 first= (uint8) (**src);
2408 switch (first) {
2409 case 251:
2410 (*src)+= 3;
2411 return (uint2korr((*src) - 2));
2412 case 252:
2413 (*src)+= 4;
2414 return (uint3korr((*src) - 3));
2415 case 253:
2416 (*src)+= 5;
2417 return (uint4korr((*src) - 4));
2418 case 254:
2419 case 255:
2420 DBUG_ASSERT(0); /* reserved for future use */
2421 return (0);
2422 default:
2423 (*src)++;
2424 return (first);
2425 }
2426}
2427
2428
2429/*
2430 Get total length of this chunk (not only body)
2431
2432 SYNOPSIS
2433 translog_get_total_chunk_length()
2434 page The page where chunk placed
2435 offset Offset of the chunk on this place
2436
2437 RETURN
2438 total length of the chunk
2439*/
2440
2441static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
2442{
2443 DBUG_ENTER("translog_get_total_chunk_length");
2444 switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
2445 case TRANSLOG_CHUNK_LSN:
2446 {
2447 /* 0 chunk referred as LSN (head or tail) */
2448 translog_size_t rec_len;
2449 uchar *start= page + offset;
2450 uchar *ptr= start + 1 + 2; /* chunk type and short trid */
2451 uint16 chunk_len, header_len, page_rest;
2452 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
2453 rec_len= translog_variable_record_1group_decode_len(&ptr);
2454 chunk_len= uint2korr(ptr);
2455 header_len= (uint16) (ptr -start) + 2;
2456 DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
2457 (ulong) rec_len, (uint) chunk_len, (uint) header_len));
2458 if (chunk_len)
2459 {
2460 DBUG_PRINT("info", ("chunk len: %u + %u = %u",
2461 (uint) header_len, (uint) chunk_len,
2462 (uint) (chunk_len + header_len)));
2463 DBUG_RETURN(chunk_len + header_len);
2464 }
2465 page_rest= TRANSLOG_PAGE_SIZE - offset;
2466 DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
2467 if (rec_len + header_len < page_rest)
2468 DBUG_RETURN(rec_len + header_len);
2469 DBUG_RETURN(page_rest);
2470 }
2471 case TRANSLOG_CHUNK_FIXED:
2472 {
2473 uchar *ptr;
2474 uint type= page[offset] & TRANSLOG_REC_TYPE;
2475 uint length;
2476 int i;
2477 /* 1 (pseudo)fixed record (also LSN) */
2478 DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
2479 DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
2480 LOGRECTYPE_FIXEDLENGTH ||
2481 log_record_type_descriptor[type].rclass ==
2482 LOGRECTYPE_PSEUDOFIXEDLENGTH);
2483 if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
2484 {
2485 DBUG_PRINT("info",
2486 ("Fixed length: %u",
2487 (uint) (log_record_type_descriptor[type].fixed_length + 3)));
2488 DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
2489 }
2490
2491 ptr= page + offset + 3; /* first compressed LSN */
2492 length= log_record_type_descriptor[type].fixed_length + 3;
2493 for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2494 {
2495 /* first 2 bits is length - 2 */
2496 uint len= (((uint8) (*ptr)) >> 6) + 2;
2497 if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
2498 len+= LSN_STORE_SIZE; /* case of full LSN storing */
2499 ptr+= len;
2500 /* subtract saved bytes */
2501 length-= (LSN_STORE_SIZE - len);
2502 }
2503 DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
2504 DBUG_RETURN(length);
2505 }
2506 case TRANSLOG_CHUNK_NOHDR:
2507 /* 2 no header chunk (till page end) */
2508 DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
2509 (uint) (TRANSLOG_PAGE_SIZE - offset)));
2510 DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
2511 case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
2512 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
2513 DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
2514 DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2515 DBUG_RETURN(uint2korr(page + offset + 1) + 3);
2516 default:
2517 DBUG_ASSERT(0);
2518 DBUG_RETURN(0);
2519 }
2520}
2521
2522/*
2523 @brief Waits previous buffer flush finish
2524
2525 @param buffer buffer for check
2526
2527 @retval 0 previous buffer flushed and this thread have to flush this one
2528 @retval 1 previous buffer flushed and this buffer flushed by other thread too
2529*/
2530
2531my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
2532{
2533 TRANSLOG_ADDRESS offset= buffer->offset;
2534 TRANSLOG_FILE *file= buffer->file;
2535 uint8 ver= buffer->ver;
2536 DBUG_ENTER("translog_prev_buffer_flush_wait");
2537 DBUG_PRINT("enter", ("buffer: %p #%u offset: " LSN_FMT " "
2538 "prev sent: " LSN_FMT " prev offset: " LSN_FMT,
2539 buffer, (uint) buffer->buffer_no,
2540 LSN_IN_PARTS(buffer->offset),
2541 LSN_IN_PARTS(buffer->prev_sent_to_disk),
2542 LSN_IN_PARTS(buffer->prev_buffer_offset)));
2543 translog_buffer_lock_assert_owner(buffer);
2544 if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
2545 {
2546 do {
2547 mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
2548 if (buffer->file != file || buffer->offset != offset ||
2549 buffer->ver != ver)
2550 DBUG_RETURN(1); /* some the thread flushed the buffer already */
2551 } while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
2552 }
2553 DBUG_RETURN(0);
2554}
2555
2556
2557/*
2558 Flush given buffer
2559
2560 SYNOPSIS
2561 translog_buffer_flush()
2562 buffer This buffer should be flushed
2563
2564 RETURN
2565 0 OK
2566 1 Error
2567*/
2568
2569static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
2570{
2571 uint32 i, pg;
2572 TRANSLOG_ADDRESS offset= buffer->offset;
2573 TRANSLOG_FILE *file= buffer->file;
2574 uint8 ver= buffer->ver;
2575 uint skipped_data;
2576 DBUG_ENTER("translog_buffer_flush");
2577 DBUG_PRINT("enter",
2578 ("Buffer: #%u %p file: %d offset: " LSN_FMT " size: %lu",
2579 (uint) buffer->buffer_no, buffer,
2580 buffer->file->handler.file,
2581 LSN_IN_PARTS(buffer->offset),
2582 (ulong) buffer->size));
2583 translog_buffer_lock_assert_owner(buffer);
2584
2585 if (buffer->file == NULL)
2586 DBUG_RETURN(0);
2587
2588 translog_wait_for_writers(buffer);
2589
2590 if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2591 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2592
2593 if (buffer->is_closing_buffer)
2594 {
2595 /* some other flush in progress */
2596 translog_wait_for_closing(buffer);
2597 if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2598 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2599 }
2600
2601 if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
2602 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2603
2604 /*
2605 Send page by page in the pagecache what we are going to write on the
2606 disk
2607 */
2608 file= buffer->file;
2609 skipped_data= buffer->skipped_data;
2610 DBUG_ASSERT(skipped_data < TRANSLOG_PAGE_SIZE);
2611 for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2612 i < buffer->size;
2613 i+= TRANSLOG_PAGE_SIZE, pg++)
2614 {
2615#ifndef DBUG_OFF
2616 TRANSLOG_ADDRESS addr= (buffer->offset + i);
2617#endif
2618 DBUG_PRINT("info", ("send log form %lu till %lu address: " LSN_FMT " "
2619 "page #: %lu buffer size: %lu buffer: %p",
2620 (ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE),
2621 LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size,
2622 buffer));
2623 DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2624 DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2625 if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
2626 DBUG_RETURN(1);
2627 if (pagecache_write_part(log_descriptor.pagecache,
2628 &file->handler, pg, 3,
2629 buffer->buffer + i,
2630 PAGECACHE_PLAIN_PAGE,
2631 PAGECACHE_LOCK_LEFT_UNLOCKED,
2632 PAGECACHE_PIN_LEFT_UNPINNED,
2633 PAGECACHE_WRITE_DONE, 0,
2634 LSN_IMPOSSIBLE,
2635 skipped_data,
2636 TRANSLOG_PAGE_SIZE - skipped_data))
2637 {
2638 DBUG_PRINT("error",
2639 ("Can't write page " LSN_FMT " to pagecache, error: %d",
2640 buffer->file->number,
2641 (uint)(LSN_OFFSET(buffer->offset)+ i),
2642 my_errno));
2643 translog_stop_writing();
2644 DBUG_RETURN(1);
2645 }
2646 skipped_data= 0;
2647 }
2648 file->is_sync= 0;
2649 if (my_pwrite(file->handler.file, buffer->buffer + buffer->skipped_data,
2650 buffer->size - buffer->skipped_data,
2651 LSN_OFFSET(buffer->offset) + buffer->skipped_data,
2652 log_write_flags))
2653 {
2654 DBUG_PRINT("error", ("Can't write buffer " LSN_FMT " size %lu "
2655 "to the disk (%d)",
2656 (uint) file->handler.file,
2657 (uint) LSN_OFFSET(buffer->offset),
2658 (ulong) buffer->size, errno));
2659 translog_stop_writing();
2660 DBUG_RETURN(1);
2661 }
2662 /*
2663 Dropping the flag in such way can make false alarm: signalling than the
2664 file in not sync when it is sync, but the situation is quite rare and
2665 protections with mutexes give much more overhead to the whole engine
2666 */
2667 file->is_sync= 0;
2668
2669 if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
2670 {
2671 if (translog_prev_buffer_flush_wait(buffer))
2672 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2673 translog_set_sent_to_disk(buffer);
2674 }
2675 else
2676 translog_set_only_in_buffers(buffer->next_buffer_offset);
2677
2678 /* say to next buffer that we are finished */
2679 {
2680 struct st_translog_buffer *next_buffer=
2681 log_descriptor.buffers + ((buffer->buffer_no + 1) % TRANSLOG_BUFFERS_NO);
2682 if (likely(translog_status == TRANSLOG_OK)){
2683 translog_buffer_lock(next_buffer);
2684 next_buffer->prev_sent_to_disk= buffer->offset;
2685 translog_buffer_unlock(next_buffer);
2686 mysql_cond_broadcast(&next_buffer->prev_sent_to_disk_cond);
2687 }
2688 else
2689 {
2690 /*
2691 It is shutdown =>
2692 1) there is only one thread
2693 2) mutexes of other buffers can be destroyed => we can't use them
2694 */
2695 next_buffer->prev_sent_to_disk= buffer->offset;
2696 }
2697 }
2698 /* Free buffer */
2699 buffer->file= NULL;
2700 buffer->overlay= 0;
2701 buffer->ver++;
2702 mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2703 log_descriptor.dirty_buffer_mask&= ~(1 << buffer->buffer_no);
2704 mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2705 mysql_cond_broadcast(&buffer->waiting_filling_buffer);
2706 DBUG_RETURN(0);
2707}
2708
2709
2710/*
2711 Recover page with sector protection (wipe out failed chunks)
2712
2713 SYNOPSYS
2714 translog_recover_page_up_to_sector()
2715 page reference on the page
2716 offset offset of failed sector
2717
2718 RETURN
2719 0 OK
2720 1 Error
2721*/
2722
2723static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2724{
2725 uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
2726 DBUG_ENTER("translog_recover_page_up_to_sector");
2727 DBUG_PRINT("enter", ("offset: %u first chunk: %u",
2728 (uint) offset, (uint) chunk_offset));
2729
2730 while (chunk_offset < offset && page[chunk_offset] != TRANSLOG_FILLER)
2731 {
2732 uint16 chunk_length;
2733 if ((chunk_length=
2734 translog_get_total_chunk_length(page, chunk_offset)) == 0)
2735 {
2736 DBUG_PRINT("error", ("cant get chunk length (offset %u)",
2737 (uint) chunk_offset));
2738 DBUG_RETURN(1);
2739 }
2740 DBUG_PRINT("info", ("chunk: offset: %u length %u",
2741 (uint) chunk_offset, (uint) chunk_length));
2742 if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
2743 {
2744 DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
2745 (uint) chunk_offset));
2746 DBUG_RETURN(1);
2747 }
2748 chunk_offset+= chunk_length;
2749 }
2750
2751 valid_chunk_end= chunk_offset;
2752 /* end of trusted area - sector parsing */
2753 while (page[chunk_offset] != TRANSLOG_FILLER)
2754 {
2755 uint16 chunk_length;
2756 if ((chunk_length=
2757 translog_get_total_chunk_length(page, chunk_offset)) == 0)
2758 break;
2759
2760 DBUG_PRINT("info", ("chunk: offset: %u length %u",
2761 (uint) chunk_offset, (uint) chunk_length));
2762 if (((ulong) chunk_offset) + ((ulong) chunk_length) >
2763 (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2764 break;
2765
2766 chunk_offset+= chunk_length;
2767 valid_chunk_end= chunk_offset;
2768 }
2769 DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
2770
2771 memset(page + valid_chunk_end, TRANSLOG_FILLER,
2772 TRANSLOG_PAGE_SIZE - valid_chunk_end);
2773
2774 DBUG_RETURN(0);
2775}
2776
2777
2778/**
2779 @brief Checks and removes sector protection.
2780
2781 @param page reference on the page content.
2782 @param file transaction log descriptor.
2783
2784 @retvat 0 OK
2785 @retval 1 Error
2786*/
2787
2788static my_bool
2789translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
2790{
2791 uint i, offset;
2792 uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
2793 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2794 uint8 current= table[0];
2795 DBUG_ENTER("translog_check_sector_protection");
2796
2797 for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
2798 i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2799 i++, offset+= DISK_DRIVE_SECTOR_SIZE)
2800 {
2801 /*
2802 TODO: add chunk counting for "suspecting" sectors (difference is
2803 more than 1-2), if difference more then present chunks then it is
2804 the problem.
2805 */
2806 uint8 test= page[offset];
2807 DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2808 "read: 0x%x stored: 0x%x%x",
2809 i, offset, (ulong) current,
2810 (uint) uint2korr(page + offset), (uint) table[i],
2811 (uint) table[i + 1]));
2812 /*
2813 3 is minimal possible record length. So we can have "distance"
2814 between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
2815 only if it is old value, i.e. the sector was not written.
2816 */
2817 if (((test < current) &&
2818 ((uint)(0xFFL - current + test) > DISK_DRIVE_SECTOR_SIZE / 3)) ||
2819 ((test >= current) &&
2820 ((uint)(test - current) > DISK_DRIVE_SECTOR_SIZE / 3)))
2821 {
2822 if (translog_recover_page_up_to_sector(page, offset))
2823 DBUG_RETURN(1);
2824 file->was_recovered= 1;
2825 DBUG_RETURN(0);
2826 }
2827
2828 /* Restore value on the page */
2829 page[offset]= table[i];
2830 current= test;
2831 DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2832 "read: 0x%x stored: 0x%x",
2833 i, offset, (ulong) current,
2834 (uint) page[offset], (uint) table[i]));
2835 }
2836 DBUG_RETURN(0);
2837}
2838
2839
2840/**
2841 @brief Log page validator (read callback)
2842
2843 @param page The page data to check
2844 @param page_no The page number (<offset>/<page length>)
2845 @param data_ptr Read callback data pointer (pointer to TRANSLOG_FILE)
2846
2847 @todo: add turning loghandler to read-only mode after merging with
2848 that patch.
2849
2850 @retval 0 OK
2851 @retval 1 Error
2852*/
2853
2854static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args)
2855{
2856 uchar *page= args->page;
2857 pgcache_page_no_t page_no= args->pageno;
2858 uint this_page_page_overhead;
2859 uint flags;
2860 uchar *page_pos;
2861 TRANSLOG_FILE *data= (TRANSLOG_FILE *) args->data;
2862#ifndef DBUG_OFF
2863 pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE;
2864#endif
2865 DBUG_ENTER("translog_page_validator");
2866
2867 data->was_recovered= 0;
2868
2869 if (res)
2870 {
2871 DBUG_RETURN(1);
2872 }
2873
2874 if ((pgcache_page_no_t) uint3korr(page) != page_no ||
2875 (uint32) uint3korr(page + 3) != data->number)
2876 {
2877 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2878 "page address written in the page is incorrect: "
2879 "File %lu instead of %lu or page %lu instead of %lu",
2880 (uint)data->number, (uint)offset,
2881 (ulong) uint3korr(page + 3), (ulong) data->number,
2882 (ulong) uint3korr(page),
2883 (ulong) page_no));
2884 DBUG_RETURN(1);
2885 }
2886 flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
2887 this_page_page_overhead= page_overhead[flags];
2888 if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
2889 TRANSLOG_RECORD_CRC))
2890 {
2891 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2892 "Garbage in the page flags field detected : %x",
2893 (uint) data->number, (uint) offset,
2894 (uint) flags));
2895 DBUG_RETURN(1);
2896 }
2897 page_pos= page + (3 + 3 + 1);
2898 if (flags & TRANSLOG_PAGE_CRC)
2899 {
2900 uint32 crc= translog_crc(page + this_page_page_overhead,
2901 TRANSLOG_PAGE_SIZE -
2902 this_page_page_overhead);
2903 if (crc != uint4korr(page_pos))
2904 {
2905 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2906 "CRC mismatch: calculated: %lx on the page %lx",
2907 (uint) data->number, (uint) offset,
2908 (ulong) crc, (ulong) uint4korr(page_pos)));
2909 DBUG_RETURN(1);
2910 }
2911 page_pos+= CRC_SIZE; /* Skip crc */
2912 }
2913 if (flags & TRANSLOG_SECTOR_PROTECTION &&
2914 translog_check_sector_protection(page, data))
2915 {
2916 DBUG_RETURN(1);
2917 }
2918 DBUG_RETURN(0);
2919}
2920
2921
2922/**
2923 @brief Locks the loghandler.
2924*/
2925
2926void translog_lock()
2927{
2928 uint8 current_buffer;
2929 DBUG_ENTER("translog_lock");
2930
2931 /*
2932 Locking the loghandler mean locking current buffer, but it can change
2933 during locking, so we should check it
2934 */
2935 for (;;)
2936 {
2937 /*
2938 log_descriptor.bc.buffer_no is only one byte so its reading is
2939 an atomic operation
2940 */
2941 current_buffer= log_descriptor.bc.buffer_no;
2942 translog_buffer_lock(log_descriptor.buffers + current_buffer);
2943 if (log_descriptor.bc.buffer_no == current_buffer)
2944 break;
2945 translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2946 }
2947 DBUG_VOID_RETURN;
2948}
2949
2950
2951/*
2952 Unlock the loghandler
2953
2954 SYNOPSIS
2955 translog_unlock()
2956
2957 RETURN
2958 0 OK
2959 1 Error
2960*/
2961
2962void translog_unlock()
2963{
2964 translog_buffer_unlock(log_descriptor.bc.buffer);
2965}
2966
2967
2968/**
2969 @brief Get log page by file number and offset of the beginning of the page
2970
2971 @param data validator data, which contains the page address
2972 @param buffer buffer for page placing
2973 (might not be used in some cache implementations)
2974 @param direct_link if it is not NULL then caller can accept direct
2975 link to the page cache
2976
2977 @retval NULL Error
2978 @retval # pointer to the page cache which should be used to read this page
2979*/
2980
2981static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
2982 PAGECACHE_BLOCK_LINK **direct_link)
2983{
2984 TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
2985 uint32 file_no= LSN_FILE_NO(addr);
2986 TRANSLOG_FILE *file;
2987 DBUG_ENTER("translog_get_page");
2988 DBUG_PRINT("enter", ("File: %u Offset: %u(0x%x)",
2989 file_no,
2990 (uint) LSN_OFFSET(addr),
2991 (uint) LSN_OFFSET(addr)));
2992
2993 /* it is really page address */
2994 DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
2995 if (direct_link)
2996 *direct_link= NULL;
2997
2998restart:
2999
3000 in_buffers= translog_only_in_buffers();
3001 DBUG_PRINT("info", ("in_buffers: " LSN_FMT,
3002 LSN_IN_PARTS(in_buffers)));
3003 if (in_buffers != LSN_IMPOSSIBLE &&
3004 cmp_translog_addr(addr, in_buffers) >= 0)
3005 {
3006 translog_lock();
3007 DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3008 /* recheck with locked loghandler */
3009 in_buffers= translog_only_in_buffers();
3010 if (cmp_translog_addr(addr, in_buffers) >= 0)
3011 {
3012 uint16 buffer_no= log_descriptor.bc.buffer_no;
3013#ifdef DBUG_ASSERT_EXISTS
3014 uint16 buffer_start= buffer_no;
3015#endif
3016 struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
3017 struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
3018 for (;;)
3019 {
3020 /*
3021 if the page is in the buffer and it is the last version of the
3022 page (in case of division the page by buffer flush)
3023 */
3024 if (curr_buffer->file != NULL &&
3025 cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
3026 cmp_translog_addr(addr,
3027 (curr_buffer->next_buffer_offset ?
3028 curr_buffer->next_buffer_offset:
3029 curr_buffer->offset + curr_buffer->size)) < 0)
3030 {
3031 TRANSLOG_ADDRESS offset= curr_buffer->offset;
3032 TRANSLOG_FILE *fl= curr_buffer->file;
3033 uchar *from, *table= NULL;
3034 int is_last_unfinished_page;
3035 uint last_protected_sector= 0;
3036 uint skipped_data= curr_buffer->skipped_data;
3037 TRANSLOG_FILE file_copy;
3038 uint8 ver= curr_buffer->ver;
3039 translog_wait_for_writers(curr_buffer);
3040 if (offset != curr_buffer->offset || fl != curr_buffer->file ||
3041 ver != curr_buffer->ver)
3042 {
3043 DBUG_ASSERT(buffer_unlock == curr_buffer);
3044 translog_buffer_unlock(buffer_unlock);
3045 goto restart;
3046 }
3047 DBUG_ASSERT(LSN_FILE_NO(addr) == LSN_FILE_NO(curr_buffer->offset));
3048 from= curr_buffer->buffer + (addr - curr_buffer->offset);
3049 if (skipped_data && addr == curr_buffer->offset)
3050 {
3051 /*
3052 We read page part of which is not present in buffer,
3053 so we should read absent part from file (page cache actually)
3054 */
3055 file= get_logfile_by_number(file_no);
3056 DBUG_ASSERT(file != NULL);
3057 /*
3058 it's ok to not lock the page because:
3059 - The log handler has it's own page cache.
3060 - There is only one thread that can access the log
3061 cache at a time
3062 */
3063 if (!(buffer= pagecache_read(log_descriptor.pagecache,
3064 &file->handler,
3065 LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3066 3, buffer,
3067 PAGECACHE_PLAIN_PAGE,
3068 PAGECACHE_LOCK_LEFT_UNLOCKED,
3069 NULL)))
3070 DBUG_RETURN(NULL);
3071 }
3072 else
3073 skipped_data= 0; /* Read after skipped in buffer data */
3074 /*
3075 Now we have correct data in buffer up to 'skipped_data'. The
3076 following memcpy() will move the data from the internal buffer
3077 that was not yet on disk.
3078 */
3079 memcpy(buffer + skipped_data, from + skipped_data,
3080 TRANSLOG_PAGE_SIZE - skipped_data);
3081 /*
3082 We can use copy then in translog_page_validator() because it
3083 do not put it permanently somewhere.
3084 We have to use copy because after releasing log lock we can't
3085 guaranty that the file still be present (in real life it will be
3086 present but theoretically possible that it will be released
3087 already from last files cache);
3088 */
3089 file_copy= *(curr_buffer->file);
3090 file_copy.handler.callback_data= (uchar*) &file_copy;
3091 is_last_unfinished_page= ((log_descriptor.bc.buffer ==
3092 curr_buffer) &&
3093 (log_descriptor.bc.ptr >= from) &&
3094 (log_descriptor.bc.ptr <
3095 from + TRANSLOG_PAGE_SIZE));
3096 if (is_last_unfinished_page &&
3097 (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
3098 {
3099 last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
3100 DISK_DRIVE_SECTOR_SIZE);
3101 table= buffer + log_descriptor.page_overhead -
3102 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
3103 }
3104
3105 DBUG_ASSERT(buffer_unlock == curr_buffer);
3106 translog_buffer_unlock(buffer_unlock);
3107 if (is_last_unfinished_page)
3108 {
3109 uint i;
3110 /*
3111 This is last unfinished page => we should not check CRC and
3112 remove only that protection which already installed (no need
3113 to check it)
3114
3115 We do not check the flag of sector protection, because if
3116 (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
3117 not set then last_protected_sector will be 0 so following loop
3118 will be never executed
3119 */
3120 DBUG_PRINT("info", ("This is last unfinished page, "
3121 "last protected sector %u",
3122 last_protected_sector));
3123 for (i= 1; i <= last_protected_sector; i++)
3124 {
3125 uint offset= i * DISK_DRIVE_SECTOR_SIZE;
3126 DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
3127 i, buffer[offset],
3128 table[i]));
3129 buffer[offset]= table[i];
3130 }
3131 }
3132 else
3133 {
3134 /*
3135 This IF should be true because we use in-memory data which
3136 supposed to be correct.
3137 */
3138 PAGECACHE_IO_HOOK_ARGS args;
3139 args.page= buffer;
3140 args.pageno= LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE;
3141 args.data= (uchar*) &file_copy;
3142 if (translog_page_validator(0, &args))
3143 {
3144 DBUG_ASSERT(0);
3145 buffer= NULL;
3146 }
3147 }
3148 DBUG_RETURN(buffer);
3149 }
3150 buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
3151 curr_buffer= log_descriptor.buffers + buffer_no;
3152 translog_buffer_lock(curr_buffer);
3153 translog_buffer_unlock(buffer_unlock);
3154 buffer_unlock= curr_buffer;
3155 /* we can't make a full circle */
3156 DBUG_ASSERT(buffer_start != buffer_no);
3157 }
3158 }
3159 translog_unlock();
3160 }
3161 file= get_logfile_by_number(file_no);
3162 DBUG_ASSERT(file != NULL);
3163 buffer= pagecache_read(log_descriptor.pagecache, &file->handler,
3164 LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3165 3, (direct_link ? NULL : buffer),
3166 PAGECACHE_PLAIN_PAGE,
3167 (direct_link ?
3168 PAGECACHE_LOCK_READ :
3169 PAGECACHE_LOCK_LEFT_UNLOCKED),
3170 direct_link);
3171 DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
3172 direct_link,
3173 (direct_link ? *direct_link : NULL)));
3174 data->was_recovered= file->was_recovered;
3175 DBUG_RETURN(buffer);
3176}
3177
3178
3179/**
3180 @brief free direct log page link
3181
3182 @param direct_link the direct log page link to be freed
3183
3184*/
3185
3186static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
3187{
3188 DBUG_ENTER("translog_free_link");
3189 DBUG_PRINT("info", ("Direct link: %p",
3190 direct_link));
3191 if (direct_link)
3192 pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
3193 PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
3194 LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
3195 DBUG_VOID_RETURN;
3196}
3197
3198
3199/**
3200 @brief Finds last full page of the given log file.
3201
3202 @param addr address structure to fill with data, which contain
3203 file number of the log file
3204 @param last_page_ok Result of the check whether last page OK.
3205 (for now only we check only that file length
3206 divisible on page length).
3207 @param no_errors suppress messages about non-critical errors
3208
3209 @retval 0 OK
3210 @retval 1 Error
3211*/
3212
3213static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
3214 my_bool *last_page_ok,
3215 my_bool no_errors)
3216{
3217 char path[FN_REFLEN];
3218 uint32 rec_offset;
3219 my_off_t file_size;
3220 uint32 file_no= LSN_FILE_NO(*addr);
3221 TRANSLOG_FILE *file;
3222#ifndef DBUG_OFF
3223 char buff[21];
3224#endif
3225 DBUG_ENTER("translog_get_last_page_addr");
3226
3227 if (likely((file= get_logfile_by_number(file_no)) != NULL))
3228 {
3229 /*
3230 This function used only during initialization of loghandler or in
3231 scanner (which mean we need read that part of the log), so the
3232 requested log file have to be opened and can't be freed after
3233 returning pointer on it (file_size).
3234 */
3235 file_size= mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0));
3236 }
3237 else
3238 {
3239 /*
3240 This branch is used only during very early initialization
3241 when files are not opened.
3242 */
3243 File fd;
3244 if ((fd= mysql_file_open(key_file_translog,
3245 translog_filename_by_fileno(file_no, path),
3246 O_RDONLY, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0)
3247 {
3248 my_errno= errno;
3249 DBUG_PRINT("error", ("Error %d during opening file #%d",
3250 errno, file_no));
3251 DBUG_RETURN(1);
3252 }
3253 file_size= mysql_file_seek(fd, 0, SEEK_END, MYF(0));
3254 mysql_file_close(fd, MYF(0));
3255 }
3256 DBUG_PRINT("info", ("File size: %s", llstr(file_size, buff)));
3257 if (file_size == MY_FILEPOS_ERROR)
3258 DBUG_RETURN(1);
3259 DBUG_ASSERT(file_size < 0xffffffffULL);
3260 if (((uint32)file_size) > TRANSLOG_PAGE_SIZE)
3261 {
3262 rec_offset= (((((uint32)file_size) / TRANSLOG_PAGE_SIZE) - 1) *
3263 TRANSLOG_PAGE_SIZE);
3264 *last_page_ok= (((uint32)file_size) == rec_offset + TRANSLOG_PAGE_SIZE);
3265 }
3266 else
3267 {
3268 *last_page_ok= 0;
3269 rec_offset= 0;
3270 }
3271 *addr= MAKE_LSN(file_no, rec_offset);
3272 DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
3273 *last_page_ok));
3274 DBUG_RETURN(0);
3275}
3276
3277
3278/**
3279 @brief Get number bytes for record length storing
3280
3281 @param length Record length which will be encoded
3282
3283 @return 1,3,4,5 - number of bytes to store given length
3284*/
3285
3286static uint translog_variable_record_length_bytes(translog_size_t length)
3287{
3288 if (length < 250)
3289 return 1;
3290 if (length < 0xFFFF)
3291 return 3;
3292 if (length < (ulong) 0xFFFFFF)
3293 return 4;
3294 return 5;
3295}
3296
3297
3298/**
3299 @brief Gets header of this chunk.
3300
3301 @param chunk The pointer to the chunk beginning
3302
3303 @retval # total length of the chunk
3304 @retval 0 Error
3305*/
3306
3307static uint16 translog_get_chunk_header_length(uchar *chunk)
3308{
3309 DBUG_ENTER("translog_get_chunk_header_length");
3310 switch (*chunk & TRANSLOG_CHUNK_TYPE) {
3311 case TRANSLOG_CHUNK_LSN:
3312 {
3313 /* 0 chunk referred as LSN (head or tail) */
3314 translog_size_t rec_len __attribute__((unused));
3315 uchar *start= chunk;
3316 uchar *ptr= start + 1 + 2;
3317 uint16 chunk_len, header_len;
3318 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
3319 rec_len= translog_variable_record_1group_decode_len(&ptr);
3320 chunk_len= uint2korr(ptr);
3321 header_len= (uint16) (ptr - start) +2;
3322 DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
3323 (ulong) rec_len, (uint) chunk_len, (uint) header_len));
3324 if (chunk_len)
3325 {
3326 /* TODO: fine header end */
3327 /*
3328 The last chunk of multi-group record can be base for it header
3329 calculation (we skip to the first group to read the header) so if we
3330 stuck here something is wrong.
3331 */
3332 DBUG_ASSERT(0);
3333 DBUG_RETURN(0); /* Keep compiler happy */
3334 }
3335 DBUG_RETURN(header_len);
3336 }
3337 case TRANSLOG_CHUNK_FIXED:
3338 {
3339 /* 1 (pseudo)fixed record (also LSN) */
3340 DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
3341 DBUG_RETURN(3);
3342 }
3343 case TRANSLOG_CHUNK_NOHDR:
3344 /* 2 no header chunk (till page end) */
3345 DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
3346 DBUG_RETURN(1);
3347 break;
3348 case TRANSLOG_CHUNK_LNGTH:
3349 /* 3 chunk with chunk length */
3350 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
3351 DBUG_RETURN(3);
3352 break;
3353 default:
3354 DBUG_ASSERT(0);
3355 DBUG_RETURN(0); /* Keep compiler happy */
3356 }
3357}
3358
3359
3360/**
3361 @brief Truncate the log to the given address. Used during the startup if the
3362 end of log if corrupted.
3363
3364 @param addr new horizon
3365
3366 @retval 0 OK
3367 @retval 1 Error
3368*/
3369
3370static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
3371{
3372 uchar *page;
3373 TRANSLOG_ADDRESS current_page;
3374 uint32 next_page_offset, page_rest;
3375 uint32 i;
3376 File fd;
3377 int rc;
3378 TRANSLOG_VALIDATOR_DATA data;
3379 char path[FN_REFLEN];
3380 uchar page_buff[TRANSLOG_PAGE_SIZE];
3381 DBUG_ENTER("translog_truncate_log");
3382 /* TODO: write warning to the client */
3383 DBUG_PRINT("warning", ("removing all records from " LSN_FMT " "
3384 "till " LSN_FMT,
3385 LSN_IN_PARTS(addr),
3386 LSN_IN_PARTS(log_descriptor.horizon)));
3387 DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3388 /* remove files between the address and horizon */
3389 for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
3390 if (mysql_file_delete(key_file_translog,
3391 translog_filename_by_fileno(i, path), MYF(MY_WME)))
3392 {
3393 translog_unlock();
3394 DBUG_RETURN(1);
3395 }
3396
3397 /* truncate the last file up to the last page */
3398 next_page_offset= LSN_OFFSET(addr);
3399 next_page_offset= (next_page_offset -
3400 ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
3401 TRANSLOG_PAGE_SIZE);
3402 page_rest= next_page_offset - LSN_OFFSET(addr);
3403 memset(page_buff, TRANSLOG_FILLER, page_rest);
3404 rc= ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
3405 ((mysql_file_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
3406 (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
3407 log_write_flags)) ||
3408 mysql_file_sync(fd, MYF(MY_WME)))));
3409 translog_syncs++;
3410 rc|= (fd > 0 && mysql_file_close(fd, MYF(MY_WME)));
3411 if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS)
3412 {
3413 rc|= sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
3414 translog_syncs++;
3415 }
3416 if (rc)
3417 DBUG_RETURN(1);
3418
3419 /* fix the horizon */
3420 log_descriptor.horizon= addr;
3421 /* fix the buffer data */
3422 current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
3423 TRANSLOG_PAGE_SIZE));
3424 data.addr= &current_page;
3425 if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
3426 NULL)
3427 DBUG_RETURN(1);
3428 if (page != log_descriptor.buffers->buffer)
3429 memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
3430 log_descriptor.bc.buffer->offset= current_page;
3431 log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
3432 log_descriptor.bc.ptr=
3433 log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
3434 log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
3435 DBUG_RETURN(0);
3436}
3437
3438
3439/**
3440 Applies function 'callback' to all files (in a directory) which
3441 name looks like a log's name (aria_log.[0-9]{7}).
3442 If 'callback' returns TRUE this interrupts the walk and returns
3443 TRUE. Otherwise FALSE is returned after processing all log files.
3444 It cannot just use log_descriptor.directory because that may not yet have
3445 been initialized.
3446
3447 @param directory directory to scan
3448 @param callback function to apply; is passed directory and base
3449 name of found file
3450*/
3451
3452my_bool translog_walk_filenames(const char *directory,
3453 my_bool (*callback)(const char *,
3454 const char *))
3455{
3456 MY_DIR *dirp;
3457 uint i;
3458 my_bool rc= FALSE;
3459
3460 /* Finds and removes transaction log files */
3461 if (!(dirp = my_dir(directory, MYF(MY_DONT_SORT))))
3462 return FALSE;
3463
3464 for (i= 0; i < dirp->number_of_files; i++)
3465 {
3466 char *file= dirp->dir_entry[i].name;
3467 if (strncmp(file, "aria_log.", 10) == 0 &&
3468 file[10] >= '0' && file[10] <= '9' &&
3469 file[11] >= '0' && file[11] <= '9' &&
3470 file[12] >= '0' && file[12] <= '9' &&
3471 file[13] >= '0' && file[13] <= '9' &&
3472 file[14] >= '0' && file[14] <= '9' &&
3473 file[15] >= '0' && file[15] <= '9' &&
3474 file[16] >= '0' && file[16] <= '9' &&
3475 file[17] >= '0' && file[17] <= '9' &&
3476 file[18] == '\0' && (*callback)(directory, file))
3477 {
3478 rc= TRUE;
3479 break;
3480 }
3481 }
3482 my_dirend(dirp);
3483 return rc;
3484}
3485
3486
3487/**
3488 @brief Fills table of dependence length of page header from page flags
3489*/
3490
3491void translog_fill_overhead_table()
3492{
3493 uint i;
3494 for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
3495 {
3496 page_overhead[i]= 7;
3497 if (i & TRANSLOG_PAGE_CRC)
3498 page_overhead[i]+= CRC_SIZE;
3499 if (i & TRANSLOG_SECTOR_PROTECTION)
3500 page_overhead[i]+= TRANSLOG_PAGE_SIZE /
3501 DISK_DRIVE_SECTOR_SIZE;
3502 }
3503}
3504
3505
3506/**
3507 Callback to find first log in directory.
3508*/
3509
3510static my_bool translog_callback_search_first(const char *directory
3511 __attribute__((unused)),
3512 const char *filename
3513 __attribute__((unused)))
3514{
3515 return TRUE;
3516}
3517
3518
3519/**
3520 @brief Checks that chunk is LSN one
3521
3522 @param type type of the chunk
3523
3524 @retval 1 the chunk is LNS
3525 @retval 0 the chunk is not LSN
3526*/
3527
3528static my_bool translog_is_LSN_chunk(uchar type)
3529{
3530 DBUG_ENTER("translog_is_LSN_chunk");
3531 DBUG_PRINT("info", ("byte: %x chunk type: %u record type: %u",
3532 type, type >> 6, type & TRANSLOG_REC_TYPE));
3533 DBUG_RETURN(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_FIXED) ||
3534 (((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_LSN) &&
3535 ((type & TRANSLOG_REC_TYPE)) != TRANSLOG_CHUNK_0_CONT));
3536}
3537
3538
3539/**
3540 @brief Initialize transaction log
3541
3542 @param directory Directory where log files are put
3543 @param log_file_max_size max size of one log size (for new logs creation)
3544 @param server_version version of MySQL server (MYSQL_VERSION_ID)
3545 @param server_id server ID (replication & Co)
3546 @param pagecache Page cache for the log reads
3547 @param flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
3548 TRANSLOG_RECORD_CRC)
3549 @param read_only Put transaction log in read-only mode
3550 @param init_table_func function to initialize record descriptors table
3551 @param no_errors suppress messages about non-critical errors
3552
3553 @todo
3554 Free used resources in case of error.
3555
3556 @retval 0 OK
3557 @retval 1 Error
3558*/
3559
3560my_bool translog_init_with_table(const char *directory,
3561 uint32 log_file_max_size,
3562 uint32 server_version,
3563 uint32 server_id, PAGECACHE *pagecache,
3564 uint flags, my_bool readonly,
3565 void (*init_table_func)(),
3566 my_bool no_errors)
3567{
3568 int i;
3569 int old_log_was_recovered= 0, logs_found= 0;
3570 uint old_flags= flags;
3571 uint32 start_file_num= 1;
3572 TRANSLOG_ADDRESS sure_page, last_page, last_valid_page, checkpoint_lsn;
3573 my_bool version_changed= 0;
3574 DBUG_ENTER("translog_init_with_table");
3575
3576 translog_syncs= 0;
3577 flush_start= 0;
3578 id_to_share= NULL;
3579
3580 log_descriptor.directory_fd= -1;
3581 log_descriptor.is_everything_flushed= 1;
3582 log_descriptor.flush_in_progress= 0;
3583 log_descriptor.flush_no= 0;
3584 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
3585
3586 /* Normally in Aria this this calls translog_table_init() */
3587 (*init_table_func)();
3588 compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
3589 TRANSLOG_BUFFERS_NO);
3590 log_descriptor.dirty_buffer_mask= 0;
3591 if (readonly)
3592 log_descriptor.open_flags= O_BINARY | O_RDONLY;
3593 else
3594 log_descriptor.open_flags= O_BINARY | O_RDWR;
3595 if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
3596 &log_descriptor.sent_to_disk_lock, MY_MUTEX_INIT_FAST) ||
3597 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_file_header_lock,
3598 &log_descriptor.file_header_lock, MY_MUTEX_INIT_FAST) ||
3599 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_unfinished_files_lock,
3600 &log_descriptor.unfinished_files_lock, MY_MUTEX_INIT_FAST) ||
3601 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_purger_lock,
3602 &log_descriptor.purger_lock, MY_MUTEX_INIT_FAST) ||
3603 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_log_flush_lock,
3604 &log_descriptor.log_flush_lock, MY_MUTEX_INIT_FAST) ||
3605 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock,
3606 &log_descriptor.dirty_buffer_mask_lock, MY_MUTEX_INIT_FAST) ||
3607 mysql_cond_init(key_TRANSLOG_DESCRIPTOR_log_flush_cond,
3608 &log_descriptor.log_flush_cond, 0) ||
3609 mysql_cond_init(key_TRANSLOG_DESCRIPTOR_new_goal_cond,
3610 &log_descriptor.new_goal_cond, 0) ||
3611 mysql_rwlock_init(key_TRANSLOG_DESCRIPTOR_open_files_lock,
3612 &log_descriptor.open_files_lock) ||
3613 my_init_dynamic_array(&log_descriptor.open_files,
3614 sizeof(TRANSLOG_FILE*), 10, 10, MYF(0)) ||
3615 my_init_dynamic_array(&log_descriptor.unfinished_files,
3616 sizeof(struct st_file_counter),
3617 10, 10, MYF(0)))
3618 goto err;
3619 log_descriptor.min_need_file= 0;
3620 log_descriptor.min_file_number= 0;
3621 log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
3622
3623 /* Directory to store files */
3624 unpack_dirname(log_descriptor.directory, directory);
3625#ifndef __WIN__
3626 if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
3627 O_RDONLY, MYF(MY_WME))) < 0)
3628 {
3629 my_errno= errno;
3630 DBUG_PRINT("error", ("Error %d during opening directory '%s'",
3631 errno, log_descriptor.directory));
3632 goto err;
3633 }
3634#endif
3635 log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
3636 DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
3637 log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
3638 /* max size of one log size (for new logs creation) */
3639 log_file_size= log_descriptor.log_file_max_size=
3640 log_file_max_size;
3641 /* server version */
3642 log_descriptor.server_version= server_version;
3643 /* server ID */
3644 log_descriptor.server_id= server_id;
3645 /* Page cache for the log reads */
3646 log_descriptor.pagecache= pagecache;
3647 /* Flags */
3648 DBUG_ASSERT((flags &
3649 ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
3650 TRANSLOG_RECORD_CRC)) == 0);
3651 log_descriptor.flags= flags;
3652 translog_fill_overhead_table();
3653 log_descriptor.page_overhead= page_overhead[flags];
3654 log_descriptor.page_capacity_chunk_2=
3655 TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
3656 compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
3657 log_descriptor.buffer_capacity_chunk_2=
3658 (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
3659 log_descriptor.page_capacity_chunk_2;
3660 log_descriptor.half_buffer_capacity_chunk_2=
3661 log_descriptor.buffer_capacity_chunk_2 / 2;
3662 DBUG_PRINT("info",
3663 ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
3664 log_descriptor.page_overhead,
3665 log_descriptor.page_capacity_chunk_2,
3666 log_descriptor.buffer_capacity_chunk_2,
3667 log_descriptor.half_buffer_capacity_chunk_2));
3668
3669 /* Just to init it somehow (hack for bootstrap)*/
3670 {
3671 TRANSLOG_FILE *file= 0;
3672 log_descriptor.min_file = log_descriptor.max_file= 1;
3673 insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3674 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3675 pop_dynamic(&log_descriptor.open_files);
3676 }
3677
3678 /* Buffers for log writing */
3679 for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
3680 {
3681 if (translog_buffer_init(log_descriptor.buffers + i, i))
3682 goto err;
3683 DBUG_PRINT("info", ("translog_buffer buffer #%u:%p",
3684 i, log_descriptor.buffers + i));
3685 }
3686
3687 /*
3688 last_logno and last_checkpoint_lsn were set in
3689 ma_control_file_create_or_open()
3690 */
3691 logs_found= (last_logno != FILENO_IMPOSSIBLE);
3692
3693 translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
3694 checkpoint_lsn= last_checkpoint_lsn;
3695
3696 if (logs_found)
3697 {
3698 my_bool pageok;
3699 DBUG_PRINT("info", ("log found..."));
3700 /*
3701 TODO: scan directory for aria_log.XXXXXXXX files and find
3702 highest XXXXXXXX & set logs_found
3703 TODO: check that last checkpoint within present log addresses space
3704
3705 find the log end
3706 */
3707 if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
3708 {
3709 DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
3710 /* only last log needs to be checked */
3711 sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
3712 }
3713 else
3714 {
3715 sure_page= last_checkpoint_lsn;
3716 DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
3717 sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
3718 }
3719 /* Set horizon to the beginning of the last file first */
3720 log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
3721 if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
3722 {
3723 if (!translog_walk_filenames(log_descriptor.directory,
3724 &translog_callback_search_first))
3725 {
3726 /*
3727 Files was deleted, just start from the next log number, so that
3728 existing tables are in the past.
3729 */
3730 start_file_num= last_logno + 1;
3731 checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
3732 logs_found= 0;
3733 }
3734 else
3735 goto err;
3736 }
3737 else if (LSN_OFFSET(last_page) == 0)
3738 {
3739 if (LSN_FILE_NO(last_page) == 1)
3740 {
3741 logs_found= 0; /* file #1 has no pages */
3742 DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
3743 }
3744 else
3745 {
3746 last_page-= LSN_ONE_FILE;
3747 if (translog_get_last_page_addr(&last_page, &pageok, 0))
3748 goto err;
3749 }
3750 }
3751 if (logs_found)
3752 {
3753 uint32 i;
3754 log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
3755 log_descriptor.max_file= last_logno;
3756 /* Open all files */
3757 if (allocate_dynamic(&log_descriptor.open_files,
3758 log_descriptor.max_file -
3759 log_descriptor.min_file + 1))
3760 goto err;
3761 for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
3762 {
3763 /*
3764 We can't allocate all file together because they will be freed
3765 one by one
3766 */
3767 TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(sizeof(TRANSLOG_FILE),
3768 MYF(0));
3769
3770 compile_time_assert(MY_FILEPOS_ERROR > 0xffffffffULL);
3771 if (file == NULL ||
3772 (file->handler.file=
3773 open_logfile_by_number_no_cache(i)) < 0 ||
3774 mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0)) >=
3775 0xffffffffULL)
3776 {
3777 int j;
3778 for (j= i - log_descriptor.min_file - 1; j > 0; j--)
3779 {
3780 TRANSLOG_FILE *el=
3781 *dynamic_element(&log_descriptor.open_files, j,
3782 TRANSLOG_FILE **);
3783 mysql_file_close(el->handler.file, MYF(MY_WME));
3784 my_free(el);
3785 }
3786 if (file)
3787 {
3788 free(file);
3789 goto err;
3790 }
3791 else
3792 goto err;
3793 }
3794 translog_file_init(file, i, 1);
3795 /* we allocated space so it can't fail */
3796 insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3797 }
3798 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3799 log_descriptor.open_files.elements);
3800 }
3801 }
3802 else if (readonly)
3803 {
3804 /* There is no logs and there is read-only mode => nothing to read */
3805 DBUG_PRINT("error", ("No logs and read-only mode"));
3806 goto err;
3807 }
3808
3809 if (logs_found)
3810 {
3811 TRANSLOG_ADDRESS current_page= sure_page;
3812 my_bool pageok;
3813
3814 DBUG_PRINT("info", ("The log is really present"));
3815 DBUG_ASSERT(sure_page <= last_page);
3816
3817 /* TODO: check page size */
3818
3819 last_valid_page= LSN_IMPOSSIBLE;
3820 /*
3821 Scans and validate pages. We need it to show "outside" only for sure
3822 valid part of the log. If the log was damaged then fixed we have to
3823 cut off damaged part before some other process start write something
3824 in the log.
3825 */
3826 do
3827 {
3828 TRANSLOG_ADDRESS current_file_last_page;
3829 current_file_last_page= current_page;
3830 if (translog_get_last_page_addr(&current_file_last_page, &pageok, 0))
3831 goto err;
3832 if (!pageok)
3833 {
3834 DBUG_PRINT("error", ("File %lu have no complete last page",
3835 (ulong) LSN_FILE_NO(current_file_last_page)));
3836 old_log_was_recovered= 1;
3837 /* This file is not written till the end so it should be last */
3838 last_page= current_file_last_page;
3839 /* TODO: issue warning */
3840 }
3841 do
3842 {
3843 TRANSLOG_VALIDATOR_DATA data;
3844 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3845 uchar *page;
3846 data.addr= &current_page;
3847 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
3848 goto err;
3849 if (data.was_recovered)
3850 {
3851 DBUG_PRINT("error", ("file no: %lu (%d) "
3852 "rec_offset: 0x%lx (%lu) (%d)",
3853 (ulong) LSN_FILE_NO(current_page),
3854 (uint3korr(page + 3) !=
3855 LSN_FILE_NO(current_page)),
3856 (ulong) LSN_OFFSET(current_page),
3857 (ulong) (LSN_OFFSET(current_page) /
3858 TRANSLOG_PAGE_SIZE),
3859 (uint3korr(page) !=
3860 LSN_OFFSET(current_page) /
3861 TRANSLOG_PAGE_SIZE)));
3862 old_log_was_recovered= 1;
3863 break;
3864 }
3865 old_flags= page[TRANSLOG_PAGE_FLAGS];
3866 last_valid_page= current_page;
3867 current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
3868 } while (current_page <= current_file_last_page);
3869 current_page+= LSN_ONE_FILE;
3870 current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
3871 } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3872 !old_log_was_recovered);
3873 if (last_valid_page == LSN_IMPOSSIBLE)
3874 {
3875 /* Panic!!! Even page which should be valid is invalid */
3876 /* TODO: issue error */
3877 goto err;
3878 }
3879 DBUG_PRINT("info", ("Last valid page is in file: %lu "
3880 "offset: %lu (0x%lx) "
3881 "Logs found: %d was recovered: %d "
3882 "flags match: %d",
3883 (ulong) LSN_FILE_NO(last_valid_page),
3884 (ulong) LSN_OFFSET(last_valid_page),
3885 (ulong) LSN_OFFSET(last_valid_page),
3886 logs_found, old_log_was_recovered,
3887 (old_flags == flags)));
3888
3889 /* TODO: check server ID */
3890 if (logs_found && !old_log_was_recovered && old_flags == flags)
3891 {
3892 TRANSLOG_VALIDATOR_DATA data;
3893 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3894 uchar *page;
3895 uint16 chunk_offset;
3896 data.addr= &last_valid_page;
3897 /* continue old log */
3898 DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
3899 LSN_FILE_NO(log_descriptor.horizon));
3900 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
3901 (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
3902 goto err;
3903
3904 /* Puts filled part of old page in the buffer */
3905 log_descriptor.horizon= last_valid_page;
3906 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3907 /*
3908 Free space if filled with TRANSLOG_FILLER and first uchar of
3909 real chunk can't be TRANSLOG_FILLER
3910 */
3911 while (chunk_offset < TRANSLOG_PAGE_SIZE &&
3912 page[chunk_offset] != TRANSLOG_FILLER)
3913 {
3914 uint16 chunk_length;
3915 if ((chunk_length=
3916 translog_get_total_chunk_length(page, chunk_offset)) == 0)
3917 goto err;
3918 DBUG_PRINT("info", ("chunk: offset: %u length: %u",
3919 (uint) chunk_offset, (uint) chunk_length));
3920 chunk_offset+= chunk_length;
3921
3922 /* chunk can't cross the page border */
3923 DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
3924 }
3925 memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3926 log_descriptor.bc.buffer->size+= chunk_offset;
3927 log_descriptor.bc.ptr+= chunk_offset;
3928 log_descriptor.bc.current_page_fill= chunk_offset;
3929 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
3930 (chunk_offset +
3931 LSN_OFFSET(last_valid_page)));
3932 DBUG_PRINT("info", ("Move Page #%u: %p chaser: %d Size: %lu (%lu)",
3933 (uint) log_descriptor.bc.buffer_no,
3934 log_descriptor.bc.buffer,
3935 log_descriptor.bc.chaser,
3936 (ulong) log_descriptor.bc.buffer->size,
3937 (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3938 buffer->buffer)));
3939 translog_check_cursor(&log_descriptor.bc);
3940 }
3941 if (!old_log_was_recovered && old_flags == flags)
3942 {
3943 LOGHANDLER_FILE_INFO info;
3944
3945 /*
3946 Accessing &log_descriptor.open_files without mutex is safe
3947 because it is initialization
3948 */
3949 if (translog_read_file_header(&info,
3950 (*dynamic_element(&log_descriptor.
3951 open_files,
3952 0, TRANSLOG_FILE **))->
3953 handler.file))
3954 goto err;
3955 version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
3956 }
3957 }
3958 DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
3959 logs_found, old_log_was_recovered));
3960 if (!logs_found)
3961 {
3962 TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
3963 MYF(0));
3964 DBUG_PRINT("info", ("The log is not found => we will create new log"));
3965 if (file == NULL)
3966 goto err;
3967 /* Start new log system from scratch */
3968 log_descriptor.horizon= MAKE_LSN(start_file_num,
3969 TRANSLOG_PAGE_SIZE); /* header page */
3970 translog_file_init(file, start_file_num, 0);
3971 if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
3972 {
3973 my_free(file);
3974 goto err;
3975 }
3976 if ((file->handler.file=
3977 create_logfile_by_number_no_cache(start_file_num)) == -1)
3978 goto err;
3979 log_descriptor.min_file= log_descriptor.max_file= start_file_num;
3980 if (translog_write_file_header())
3981 goto err;
3982 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3983 log_descriptor.open_files.elements);
3984
3985 if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
3986 max_trid_in_control_file,
3987 recovery_failures))
3988 goto err;
3989 /* assign buffer 0 */
3990 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3991 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
3992 }
3993 else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
3994 !readonly)
3995 {
3996 /* leave the damaged file untouched */
3997 log_descriptor.horizon+= LSN_ONE_FILE;
3998 /* header page */
3999 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4000 TRANSLOG_PAGE_SIZE);
4001 if (translog_create_new_file())
4002 goto err;
4003 /*
4004 Buffer system left untouched after recovery => we should init it
4005 (starting from buffer 0)
4006 */
4007 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
4008 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
4009 }
4010
4011 /* all LSNs that are on disk are flushed */
4012 log_descriptor.log_start= log_descriptor.sent_to_disk=
4013 log_descriptor.flushed= log_descriptor.horizon;
4014 log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
4015 log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
4016 /*
4017 Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
4018 address of the next LSN and we want indicate that all LSNs that are
4019 already on the disk are flushed so we need decrease horizon on 1 (we are
4020 sure that there is no LSN on the disk which is greater then 'flushed'
4021 and there will not be LSN created that is equal or less then the value
4022 of the 'flushed').
4023 */
4024 log_descriptor.flushed--; /* offset decreased */
4025 log_descriptor.sent_to_disk--; /* offset decreased */
4026 /*
4027 Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
4028 structures for generating 2-byte ids:
4029 */
4030 id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
4031 MYF(MY_WME | MY_ZEROFILL));
4032 if (unlikely(!id_to_share))
4033 goto err;
4034 id_to_share--; /* min id is 1 */
4035
4036 /* Check the last LSN record integrity */
4037 if (logs_found)
4038 {
4039 TRANSLOG_SCANNER_DATA scanner;
4040 TRANSLOG_ADDRESS page_addr;
4041 LSN last_lsn= LSN_IMPOSSIBLE;
4042 /*
4043 take very last page address and try to find LSN record on it
4044 if it fail take address of previous page and so on
4045 */
4046 page_addr= (log_descriptor.horizon -
4047 ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
4048 if (translog_scanner_init(page_addr, 1, &scanner, 1))
4049 goto err;
4050 scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4051 for (;;)
4052 {
4053 uint chunk_1byte;
4054 chunk_1byte= scanner.page[scanner.page_offset];
4055 while (!translog_is_LSN_chunk(chunk_1byte) &&
4056 scanner.page != END_OF_LOG &&
4057 scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
4058 scanner.page_addr == page_addr)
4059 {
4060 if (translog_get_next_chunk(&scanner))
4061 {
4062 translog_destroy_scanner(&scanner);
4063 goto err;
4064 }
4065 if (scanner.page != END_OF_LOG)
4066 chunk_1byte= scanner.page[scanner.page_offset];
4067 }
4068 if (translog_is_LSN_chunk(chunk_1byte))
4069 {
4070 last_lsn= scanner.page_addr + scanner.page_offset;
4071 if (translog_get_next_chunk(&scanner))
4072 {
4073 translog_destroy_scanner(&scanner);
4074 goto err;
4075 }
4076 if (scanner.page == END_OF_LOG)
4077 break; /* it was the last record */
4078 chunk_1byte= scanner.page[scanner.page_offset];
4079 continue; /* try to find other record on this page */
4080 }
4081
4082 if (last_lsn != LSN_IMPOSSIBLE)
4083 break; /* there is no more records on the page */
4084
4085 /* We have to make step back */
4086 if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
4087 {
4088 uint32 file_no= LSN_FILE_NO(page_addr);
4089 my_bool last_page_ok;
4090 /* it is beginning of the current file */
4091 if (unlikely(file_no == 1))
4092 {
4093 /*
4094 It is beginning of the log => there is no LSNs in the log =>
4095 There is no harm in leaving it "as-is".
4096 */
4097 log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4098 DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4099 LSN_IN_PARTS(log_descriptor.
4100 previous_flush_horizon)));
4101 DBUG_RETURN(0);
4102 }
4103 file_no--;
4104 page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
4105 translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
4106 /* page should be OK as it is not the last file */
4107 DBUG_ASSERT(last_page_ok);
4108 }
4109 else
4110 {
4111 page_addr-= TRANSLOG_PAGE_SIZE;
4112 }
4113 translog_destroy_scanner(&scanner);
4114 if (translog_scanner_init(page_addr, 1, &scanner, 1))
4115 goto err;
4116 scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4117 }
4118 translog_destroy_scanner(&scanner);
4119
4120 /* Now scanner points to the last LSN chunk, lets check it */
4121 {
4122 TRANSLOG_HEADER_BUFFER rec;
4123 translog_size_t rec_len;
4124 int len;
4125 uchar buffer[1];
4126 DBUG_PRINT("info", ("going to check the last found record " LSN_FMT,
4127 LSN_IN_PARTS(last_lsn)));
4128
4129 len=
4130 translog_read_record_header(last_lsn, &rec);
4131 if (unlikely (len == RECHEADER_READ_ERROR ||
4132 len == RECHEADER_READ_EOF))
4133 {
4134 DBUG_PRINT("error", ("unexpected end of log or record during "
4135 "reading record header: " LSN_FMT " len: %d",
4136 LSN_IN_PARTS(last_lsn), len));
4137 if (readonly)
4138 log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4139 else if (translog_truncate_log(last_lsn))
4140 {
4141 translog_free_record_header(&rec);
4142 goto err;
4143 }
4144 }
4145 else
4146 {
4147 DBUG_ASSERT(last_lsn == rec.lsn);
4148 if (likely(rec.record_length != 0))
4149 {
4150 /*
4151 Reading the last byte of record will trigger scanning all
4152 record chunks for now
4153 */
4154 rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
4155 buffer, NULL);
4156 if (rec_len != 1)
4157 {
4158 DBUG_PRINT("error", ("unexpected end of log or record during "
4159 "reading record body: " LSN_FMT " len: %d",
4160 LSN_IN_PARTS(rec.lsn),
4161 len));
4162 if (readonly)
4163 log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4164
4165 else if (translog_truncate_log(last_lsn))
4166 {
4167 translog_free_record_header(&rec);
4168 goto err;
4169 }
4170 }
4171 }
4172 }
4173 translog_free_record_header(&rec);
4174 }
4175 }
4176 log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4177 DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4178 LSN_IN_PARTS(log_descriptor.previous_flush_horizon)));
4179 DBUG_RETURN(0);
4180err:
4181 ma_message_no_user(0, "log initialization failed");
4182 DBUG_RETURN(1);
4183}
4184
4185
4186/*
4187 @brief Free transaction log file buffer.
4188
4189 @param buffer_no The buffer to free
4190*/
4191
4192static void translog_buffer_destroy(struct st_translog_buffer *buffer)
4193{
4194 DBUG_ENTER("translog_buffer_destroy");
4195 DBUG_PRINT("enter",
4196 ("Buffer #%u: %p file: %d offset: " LSN_FMT " size: %lu",
4197 (uint) buffer->buffer_no, buffer,
4198 (buffer->file ? buffer->file->handler.file : -1),
4199 LSN_IN_PARTS(buffer->offset),
4200 (ulong) buffer->size));
4201 if (buffer->file != NULL)
4202 {
4203 /*
4204 We ignore errors here, because we can't do something about it
4205 (it is shutting down)
4206
4207 We also have to take the locks even if there can't be any other
4208 threads running, because translog_buffer_flush()
4209 requires that we have the buffer locked.
4210 */
4211 translog_buffer_lock(buffer);
4212 translog_buffer_flush(buffer);
4213 translog_buffer_unlock(buffer);
4214 }
4215 DBUG_PRINT("info", ("Destroy mutex: %p", &buffer->mutex));
4216 mysql_mutex_destroy(&buffer->mutex);
4217 mysql_cond_destroy(&buffer->waiting_filling_buffer);
4218 DBUG_VOID_RETURN;
4219}
4220
4221
4222/*
4223 Free log handler resources
4224
4225 SYNOPSIS
4226 translog_destroy()
4227*/
4228
4229void translog_destroy()
4230{
4231 TRANSLOG_FILE **file;
4232 uint i;
4233 uint8 current_buffer;
4234 DBUG_ENTER("translog_destroy");
4235
4236 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
4237 translog_status == TRANSLOG_READONLY);
4238 translog_lock();
4239 current_buffer= log_descriptor.bc.buffer_no;
4240 translog_status= (translog_status == TRANSLOG_READONLY ?
4241 TRANSLOG_UNINITED :
4242 TRANSLOG_SHUTDOWN);
4243 if (log_descriptor.bc.buffer->file != NULL)
4244 translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
4245 translog_unlock();
4246
4247 for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
4248 {
4249 struct st_translog_buffer *buffer= (log_descriptor.buffers +
4250 ((i + current_buffer + 1) %
4251 TRANSLOG_BUFFERS_NO));
4252 translog_buffer_destroy(buffer);
4253 }
4254 translog_status= TRANSLOG_UNINITED;
4255
4256 /* close files */
4257 while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
4258 translog_close_log_file(*file);
4259 mysql_mutex_destroy(&log_descriptor.sent_to_disk_lock);
4260 mysql_mutex_destroy(&log_descriptor.file_header_lock);
4261 mysql_mutex_destroy(&log_descriptor.unfinished_files_lock);
4262 mysql_mutex_destroy(&log_descriptor.purger_lock);
4263 mysql_mutex_destroy(&log_descriptor.log_flush_lock);
4264 mysql_mutex_destroy(&log_descriptor.dirty_buffer_mask_lock);
4265 mysql_cond_destroy(&log_descriptor.log_flush_cond);
4266 mysql_cond_destroy(&log_descriptor.new_goal_cond);
4267 mysql_rwlock_destroy(&log_descriptor.open_files_lock);
4268 delete_dynamic(&log_descriptor.open_files);
4269 delete_dynamic(&log_descriptor.unfinished_files);
4270
4271 if (log_descriptor.directory_fd >= 0)
4272 mysql_file_close(log_descriptor.directory_fd, MYF(MY_WME));
4273 if (id_to_share != NULL)
4274 my_free(id_to_share + 1);
4275 DBUG_VOID_RETURN;
4276}
4277
4278
4279/*
4280 @brief Starts new page.
4281
4282 @param horizon \ Position in file and buffer where we are
4283 @param cursor /
4284 @param prev_buffer Buffer which should be flushed will be assigned here.
4285 This is always set (to NULL if nothing to flush).
4286
4287 @note We do not want to flush the buffer immediately because we want to
4288 let caller of this function first advance 'horizon' pointer and unlock the
4289 loghandler and only then flush the log which can take some time.
4290
4291 @retval 0 OK
4292 @retval 1 Error
4293*/
4294
4295static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
4296 struct st_buffer_cursor *cursor,
4297 struct st_translog_buffer **prev_buffer)
4298{
4299 struct st_translog_buffer *buffer= cursor->buffer;
4300 DBUG_ENTER("translog_page_next");
4301
4302 *prev_buffer= NULL;
4303 if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
4304 cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
4305 (LSN_OFFSET(*horizon) >
4306 log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
4307 {
4308 DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
4309 "File size: %lu max: %lu => %d",
4310 (ulong) cursor->buffer->size,
4311 (ulong) (cursor->ptr - cursor->buffer->buffer),
4312 (cursor->ptr + TRANSLOG_PAGE_SIZE >
4313 cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
4314 (ulong) LSN_OFFSET(*horizon),
4315 (ulong) log_descriptor.log_file_max_size,
4316 (LSN_OFFSET(*horizon) >
4317 (log_descriptor.log_file_max_size -
4318 TRANSLOG_PAGE_SIZE))));
4319 if (translog_buffer_next(horizon, cursor,
4320 LSN_OFFSET(*horizon) >
4321 (log_descriptor.log_file_max_size -
4322 TRANSLOG_PAGE_SIZE)))
4323 DBUG_RETURN(1);
4324 *prev_buffer= buffer;
4325 DBUG_PRINT("info", ("Buffer #%u (%p): have to be flushed",
4326 (uint) buffer->buffer_no, buffer));
4327 }
4328 else
4329 {
4330 DBUG_PRINT("info", ("Use the same buffer #%u (%p): "
4331 "Buffer Size: %lu (%lu)",
4332 (uint) buffer->buffer_no,
4333 buffer,
4334 (ulong) cursor->buffer->size,
4335 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4336 translog_finish_page(horizon, cursor);
4337 translog_new_page_header(horizon, cursor);
4338 }
4339 DBUG_RETURN(0);
4340}
4341
4342
4343/*
4344 Write data of given length to the current page
4345
4346 SYNOPSIS
4347 translog_write_data_on_page()
4348 horizon \ Pointers on file and buffer
4349 cursor /
4350 length IN length of the chunk
4351 buffer buffer with data
4352
4353 RETURN
4354 0 OK
4355 1 Error
4356*/
4357
4358static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
4359 struct st_buffer_cursor *cursor,
4360 translog_size_t length,
4361 uchar *buffer)
4362{
4363 DBUG_ENTER("translog_write_data_on_page");
4364 DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
4365 (ulong) length, (uint) cursor->current_page_fill));
4366 DBUG_ASSERT(length > 0);
4367 DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4368 DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4369 TRANSLOG_WRITE_BUFFER);
4370
4371 memcpy(cursor->ptr, buffer, length);
4372 cursor->ptr+= length;
4373 (*horizon)+= length; /* adds offset */
4374 cursor->current_page_fill+= length;
4375 if (!cursor->chaser)
4376 cursor->buffer->size+= length;
4377 DBUG_PRINT("info", ("Write data buffer #%u: %p "
4378 "chaser: %d Size: %lu (%lu)",
4379 (uint) cursor->buffer->buffer_no, cursor->buffer,
4380 cursor->chaser, (ulong) cursor->buffer->size,
4381 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4382 translog_check_cursor(cursor);
4383
4384 DBUG_RETURN(0);
4385}
4386
4387
4388/*
4389 Write data from parts of given length to the current page
4390
4391 SYNOPSIS
4392 translog_write_parts_on_page()
4393 horizon \ Pointers on file and buffer
4394 cursor /
4395 length IN length of the chunk
4396 parts IN/OUT chunk source
4397
4398 RETURN
4399 0 OK
4400 1 Error
4401*/
4402
4403static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
4404 struct st_buffer_cursor *cursor,
4405 translog_size_t length,
4406 struct st_translog_parts *parts)
4407{
4408 translog_size_t left= length;
4409 uint cur= (uint) parts->current;
4410 DBUG_ENTER("translog_write_parts_on_page");
4411 DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
4412 "Buffer size: %lu (%lu)",
4413 (ulong) length,
4414 (uint) (cur + 1), (uint) parts->elements,
4415 (uint) cursor->current_page_fill,
4416 (ulong) cursor->buffer->size,
4417 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4418 DBUG_ASSERT(length > 0);
4419 DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4420 DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4421 TRANSLOG_WRITE_BUFFER);
4422
4423 do
4424 {
4425 translog_size_t len;
4426 LEX_CUSTRING *part;
4427 const uchar *buff;
4428
4429 DBUG_ASSERT(cur < parts->elements);
4430 part= parts->parts + cur;
4431 buff= part->str;
4432 DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: %p",
4433 (uint) (cur + 1), (ulong) part->length, (ulong) left,
4434 buff));
4435
4436 if (part->length > left)
4437 {
4438 /* we should write less then the current part */
4439 len= left;
4440 part->length-= len;
4441 part->str+= len;
4442 DBUG_PRINT("info", ("Set new part: %u Length: %lu",
4443 (uint) (cur + 1), (ulong) part->length));
4444 }
4445 else
4446 {
4447 len= (translog_size_t) part->length;
4448 cur++;
4449 DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
4450 }
4451 DBUG_PRINT("info", ("copy: %p <- %p %u",
4452 cursor->ptr, buff, len));
4453 if (likely(len))
4454 {
4455 memcpy(cursor->ptr, buff, len);
4456 left-= len;
4457 cursor->ptr+= len;
4458 }
4459 } while (left);
4460
4461 DBUG_PRINT("info", ("Horizon: " LSN_FMT " Length %u(0x%x)",
4462 LSN_IN_PARTS(*horizon),
4463 length, length));
4464 parts->current= cur;
4465 (*horizon)+= length; /* offset increasing */
4466 cursor->current_page_fill+= length;
4467 if (!cursor->chaser)
4468 cursor->buffer->size+= length;
4469 /*
4470 We do not not updating parts->total_record_length here because it is
4471 need only before writing record to have total length
4472 */
4473 DBUG_PRINT("info", ("Write parts buffer #%u: %p "
4474 "chaser: %d Size: %lu (%lu) "
4475 "Horizon: " LSN_FMT " buff offset: 0x%x",
4476 (uint) cursor->buffer->buffer_no, cursor->buffer,
4477 cursor->chaser, (ulong) cursor->buffer->size,
4478 (ulong) (cursor->ptr - cursor->buffer->buffer),
4479 LSN_IN_PARTS(*horizon),
4480 (uint) (LSN_OFFSET(cursor->buffer->offset) +
4481 cursor->buffer->size)));
4482 translog_check_cursor(cursor);
4483
4484 DBUG_RETURN(0);
4485}
4486
4487
4488/*
4489 Put 1 group chunk type 0 header into parts array
4490
4491 SYNOPSIS
4492 translog_write_variable_record_1group_header()
4493 parts Descriptor of record source parts
4494 type The log record type
4495 short_trid Short transaction ID or 0 if it has no sense
4496 header_length Calculated header length of chunk type 0
4497 chunk0_header Buffer for the chunk header writing
4498*/
4499
4500static void
4501translog_write_variable_record_1group_header(struct st_translog_parts *parts,
4502 enum translog_record_type type,
4503 SHORT_TRANSACTION_ID short_trid,
4504 uint16 header_length,
4505 uchar *chunk0_header)
4506{
4507 LEX_CUSTRING *part;
4508 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4509 part= parts->parts + (--parts->current);
4510 parts->total_record_length+= (translog_size_t) (part->length= header_length);
4511 part->str= chunk0_header;
4512 /* puts chunk type */
4513 *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
4514 int2store(chunk0_header + 1, short_trid);
4515 /* puts record length */
4516 translog_write_variable_record_1group_code_len(chunk0_header + 3,
4517 parts->record_length,
4518 header_length);
4519 /* puts 0 as chunk length which indicate 1 group record */
4520 int2store(chunk0_header + header_length - 2, 0);
4521}
4522
4523
4524/*
4525 Increase number of writers for this buffer
4526
4527 SYNOPSIS
4528 translog_buffer_increase_writers()
4529 buffer target buffer
4530*/
4531
4532static inline void
4533translog_buffer_increase_writers(struct st_translog_buffer *buffer)
4534{
4535 DBUG_ENTER("translog_buffer_increase_writers");
4536 translog_buffer_lock_assert_owner(buffer);
4537 buffer->copy_to_buffer_in_progress++;
4538 DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
4539 (uint) buffer->buffer_no, buffer,
4540 buffer->copy_to_buffer_in_progress));
4541 DBUG_VOID_RETURN;
4542}
4543
4544
4545/*
4546 Decrease number of writers for this buffer
4547
4548 SYNOPSIS
4549 translog_buffer_decrease_writers()
4550 buffer target buffer
4551*/
4552
4553static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
4554{
4555 DBUG_ENTER("translog_buffer_decrease_writers");
4556 translog_buffer_lock_assert_owner(buffer);
4557 buffer->copy_to_buffer_in_progress--;
4558 DBUG_PRINT("info",
4559 ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
4560 (uint) buffer->buffer_no, buffer,
4561 buffer->copy_to_buffer_in_progress));
4562 if (buffer->copy_to_buffer_in_progress == 0)
4563 mysql_cond_broadcast(&buffer->waiting_filling_buffer);
4564 DBUG_VOID_RETURN;
4565}
4566
4567
4568/**
4569 @brief Skip to the next page for chaser (thread which advanced horizon
4570 pointer and now feeling the buffer)
4571
4572 @param horizon \ Pointers on file position and buffer
4573 @param cursor /
4574
4575 @retval 1 OK
4576 @retval 0 Error
4577*/
4578
4579static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon,
4580 struct st_buffer_cursor *cursor)
4581{
4582 struct st_translog_buffer *buffer_to_flush;
4583 my_bool rc;
4584 DBUG_ENTER("translog_chaser_page_next");
4585 DBUG_ASSERT(cursor->chaser);
4586 rc= translog_page_next(horizon, cursor, &buffer_to_flush);
4587 if (buffer_to_flush != NULL)
4588 {
4589 translog_buffer_lock(buffer_to_flush);
4590 translog_buffer_decrease_writers(buffer_to_flush);
4591 if (!rc)
4592 rc= translog_buffer_flush(buffer_to_flush);
4593 translog_buffer_unlock(buffer_to_flush);
4594 }
4595 DBUG_RETURN(rc);
4596}
4597
4598/*
4599 Put chunk 2 from new page beginning
4600
4601 SYNOPSIS
4602 translog_write_variable_record_chunk2_page()
4603 parts Descriptor of record source parts
4604 horizon \ Pointers on file position and buffer
4605 cursor /
4606
4607 RETURN
4608 0 OK
4609 1 Error
4610*/
4611
4612static my_bool
4613translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
4614 TRANSLOG_ADDRESS *horizon,
4615 struct st_buffer_cursor *cursor)
4616{
4617 uchar chunk2_header[1];
4618 DBUG_ENTER("translog_write_variable_record_chunk2_page");
4619 chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
4620
4621 if (translog_chaser_page_next(horizon, cursor))
4622 DBUG_RETURN(1);
4623
4624 /* Puts chunk type */
4625 translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
4626 /* Puts chunk body */
4627 translog_write_parts_on_page(horizon, cursor,
4628 log_descriptor.page_capacity_chunk_2, parts);
4629 DBUG_RETURN(0);
4630}
4631
4632
4633/*
4634 Put chunk 3 of requested length in the buffer from new page beginning
4635
4636 SYNOPSIS
4637 translog_write_variable_record_chunk3_page()
4638 parts Descriptor of record source parts
4639 length Length of this chunk
4640 horizon \ Pointers on file position and buffer
4641 cursor /
4642
4643 RETURN
4644 0 OK
4645 1 Error
4646*/
4647
4648static my_bool
4649translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
4650 uint16 length,
4651 TRANSLOG_ADDRESS *horizon,
4652 struct st_buffer_cursor *cursor)
4653{
4654 LEX_CUSTRING *part;
4655 uchar chunk3_header[1 + 2];
4656 DBUG_ENTER("translog_write_variable_record_chunk3_page");
4657
4658 if (translog_chaser_page_next(horizon, cursor))
4659 DBUG_RETURN(1);
4660
4661 if (length == 0)
4662 {
4663 /* It was call to write page header only (no data for chunk 3) */
4664 DBUG_PRINT("info", ("It is a call to make page header only"));
4665 DBUG_RETURN(0);
4666 }
4667
4668 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4669 part= parts->parts + (--parts->current);
4670 parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
4671 part->str= chunk3_header;
4672 /* Puts chunk type */
4673 *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
4674 /* Puts chunk length */
4675 int2store(chunk3_header + 1, length);
4676
4677 translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
4678 DBUG_RETURN(0);
4679}
4680
4681/*
4682 Move log pointer (horizon) on given number pages starting from next page,
4683 and given offset on the last page
4684
4685 SYNOPSIS
4686 translog_advance_pointer()
4687 pages Number of full pages starting from the next one
4688 last_page_data Plus this data on the last page
4689
4690 RETURN
4691 0 OK
4692 1 Error
4693*/
4694
4695static my_bool translog_advance_pointer(int pages, uint16 last_page_data)
4696{
4697 translog_size_t last_page_offset= (log_descriptor.page_overhead +
4698 last_page_data);
4699 translog_size_t offset= (TRANSLOG_PAGE_SIZE -
4700 log_descriptor.bc.current_page_fill +
4701 pages * TRANSLOG_PAGE_SIZE + last_page_offset);
4702 translog_size_t buffer_end_offset, file_end_offset, min_offset;
4703 DBUG_ENTER("translog_advance_pointer");
4704 DBUG_PRINT("enter", ("Pointer: " LSN_FMT " + %u + %u pages + %u + %u",
4705 LSN_IN_PARTS(log_descriptor.horizon),
4706 (uint) (TRANSLOG_PAGE_SIZE -
4707 log_descriptor.bc.current_page_fill),
4708 pages, (uint) log_descriptor.page_overhead,
4709 (uint) last_page_data));
4710 translog_lock_assert_owner();
4711
4712 if (pages == -1)
4713 {
4714 /*
4715 It is special case when we advance the pointer on the same page.
4716 It can happened when we write last part of multi-group record.
4717 */
4718 DBUG_ASSERT(last_page_data + log_descriptor.bc.current_page_fill <=
4719 TRANSLOG_PAGE_SIZE);
4720 offset= last_page_data;
4721 last_page_offset= log_descriptor.bc.current_page_fill + last_page_data;
4722 goto end;
4723 }
4724 DBUG_PRINT("info", ("last_page_offset %lu", (ulong) last_page_offset));
4725 DBUG_ASSERT(last_page_offset <= TRANSLOG_PAGE_SIZE);
4726
4727 /*
4728 The loop will be executed 1-3 times. Usually we advance the
4729 pointer to fill only the current buffer (if we have more then 1/2 of
4730 buffer free or 2 buffers (rest of current and all next). In case of
4731 really huge record end where we write last group with "table of
4732 content" of all groups and ignore buffer borders we can occupy
4733 3 buffers.
4734 */
4735 for (;;)
4736 {
4737 uint8 new_buffer_no;
4738 struct st_translog_buffer *new_buffer;
4739 struct st_translog_buffer *old_buffer;
4740 buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
4741 if (likely(log_descriptor.log_file_max_size >=
4742 LSN_OFFSET(log_descriptor.horizon)))
4743 file_end_offset= (log_descriptor.log_file_max_size -
4744 LSN_OFFSET(log_descriptor.horizon));
4745 else
4746 {
4747 /*
4748 We already have written more then current file limit allow,
4749 So we will finish this page and start new file
4750 */
4751 file_end_offset= (TRANSLOG_PAGE_SIZE -
4752 log_descriptor.bc.current_page_fill);
4753 }
4754 DBUG_PRINT("info", ("offset: %u buffer_end_offs: %u, "
4755 "file_end_offs: %u",
4756 offset, buffer_end_offset,
4757 file_end_offset));
4758 DBUG_PRINT("info", ("Buff #%u %u (%p) offset 0x%x + size 0x%x = "
4759 "0x%x (0x%x)",
4760 log_descriptor.bc.buffer->buffer_no,
4761 log_descriptor.bc.buffer_no,
4762 log_descriptor.bc.buffer,
4763 (uint) LSN_OFFSET(log_descriptor.bc.buffer->offset),
4764 log_descriptor.bc.buffer->size,
4765 (uint) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4766 log_descriptor.bc.buffer->size),
4767 (uint) LSN_OFFSET(log_descriptor.horizon)));
4768 DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4769 log_descriptor.bc.buffer->size ==
4770 LSN_OFFSET(log_descriptor.horizon));
4771
4772 if (offset <= buffer_end_offset && offset <= file_end_offset)
4773 break;
4774 old_buffer= log_descriptor.bc.buffer;
4775 new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
4776 new_buffer= log_descriptor.buffers + new_buffer_no;
4777
4778 translog_buffer_lock(new_buffer);
4779#ifndef DBUG_OFF
4780 {
4781 TRANSLOG_ADDRESS offset= new_buffer->offset;
4782 TRANSLOG_FILE *file= new_buffer->file;
4783 uint8 ver= new_buffer->ver;
4784 translog_lock_assert_owner();
4785#endif
4786 translog_wait_for_buffer_free(new_buffer);
4787#ifndef DBUG_OFF
4788 /* We keep the handler locked so nobody can start this new buffer */
4789 DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
4790 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
4791 }
4792#endif
4793
4794 min_offset= MY_MIN(buffer_end_offset, file_end_offset);
4795 /* TODO: check is it ptr or size enough */
4796 log_descriptor.bc.buffer->size+= min_offset;
4797 log_descriptor.bc.ptr+= min_offset;
4798 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu)",
4799 (uint) log_descriptor.bc.buffer->buffer_no,
4800 log_descriptor.bc.buffer,
4801 log_descriptor.bc.chaser,
4802 (ulong) log_descriptor.bc.buffer->size,
4803 (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
4804 buffer->buffer)));
4805 DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
4806 log_descriptor.bc.buffer->buffer) ==
4807 log_descriptor.bc.buffer->size);
4808 DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
4809 log_descriptor.bc.buffer_no);
4810 translog_buffer_increase_writers(log_descriptor.bc.buffer);
4811
4812 if (file_end_offset <= buffer_end_offset)
4813 {
4814 log_descriptor.horizon+= LSN_ONE_FILE;
4815 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4816 TRANSLOG_PAGE_SIZE);
4817 DBUG_PRINT("info", ("New file: %lu",
4818 (ulong) LSN_FILE_NO(log_descriptor.horizon)));
4819 if (translog_create_new_file())
4820 {
4821 DBUG_RETURN(1);
4822 }
4823 }
4824 else
4825 {
4826 DBUG_PRINT("info", ("The same file"));
4827 log_descriptor.horizon+= min_offset; /* offset increasing */
4828 }
4829 translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
4830 old_buffer->next_buffer_offset= new_buffer->offset;
4831 new_buffer->prev_buffer_offset= old_buffer->offset;
4832 translog_buffer_unlock(old_buffer);
4833 offset-= min_offset;
4834 }
4835 DBUG_PRINT("info", ("drop write_counter"));
4836 log_descriptor.bc.write_counter= 0;
4837 log_descriptor.bc.previous_offset= 0;
4838end:
4839 log_descriptor.bc.ptr+= offset;
4840 log_descriptor.bc.buffer->size+= offset;
4841 translog_buffer_increase_writers(log_descriptor.bc.buffer);
4842 log_descriptor.horizon+= offset; /* offset increasing */
4843 log_descriptor.bc.current_page_fill= last_page_offset;
4844 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
4845 "offset: %u last page: %u",
4846 (uint) log_descriptor.bc.buffer->buffer_no,
4847 log_descriptor.bc.buffer,
4848 log_descriptor.bc.chaser,
4849 (ulong) log_descriptor.bc.buffer->size,
4850 (ulong) (log_descriptor.bc.ptr -
4851 log_descriptor.bc.buffer->
4852 buffer), (uint) offset,
4853 (uint) last_page_offset));
4854 DBUG_PRINT("info",
4855 ("pointer moved to: " LSN_FMT,
4856 LSN_IN_PARTS(log_descriptor.horizon)));
4857 translog_check_cursor(&log_descriptor.bc);
4858 log_descriptor.bc.protected= 0;
4859 DBUG_RETURN(0);
4860}
4861
4862
4863/*
4864 Get page rest
4865
4866 SYNOPSIS
4867 translog_get_current_page_rest()
4868
4869 NOTE loghandler should be locked
4870
4871 RETURN
4872 number of bytes left on the current page
4873*/
4874
4875static uint translog_get_current_page_rest()
4876{
4877 return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
4878}
4879
4880
4881/*
4882 Get buffer rest in full pages
4883
4884 SYNOPSIS
4885 translog_get_current_buffer_rest()
4886
4887 NOTE loghandler should be locked
4888
4889 RETURN
4890 number of full pages left on the current buffer
4891*/
4892
4893static uint translog_get_current_buffer_rest()
4894{
4895 return (uint)((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
4896 log_descriptor.bc.ptr) /
4897 TRANSLOG_PAGE_SIZE);
4898}
4899
4900/*
4901 Calculate possible group size without first (current) page
4902
4903 SYNOPSIS
4904 translog_get_current_group_size()
4905
4906 NOTE loghandler should be locked
4907
4908 RETURN
4909 group size without first (current) page
4910*/
4911
4912static translog_size_t translog_get_current_group_size()
4913{
4914 /* buffer rest in full pages */
4915 translog_size_t buffer_rest= translog_get_current_buffer_rest();
4916 DBUG_ENTER("translog_get_current_group_size");
4917 DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
4918
4919 buffer_rest*= log_descriptor.page_capacity_chunk_2;
4920 /* in case of only half of buffer free we can write this and next buffer */
4921 if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
4922 {
4923 DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
4924 (ulong) buffer_rest,
4925 (ulong) log_descriptor.buffer_capacity_chunk_2));
4926 buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
4927 }
4928
4929 DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
4930
4931 DBUG_RETURN(buffer_rest);
4932}
4933
4934
4935static inline void set_lsn(LSN *lsn, LSN value)
4936{
4937 DBUG_ENTER("set_lsn");
4938 translog_lock_assert_owner();
4939 *lsn= value;
4940 /* we generate LSN so something is not flushed in log */
4941 log_descriptor.is_everything_flushed= 0;
4942 DBUG_PRINT("info", ("new LSN appeared: " LSN_FMT, LSN_IN_PARTS(value)));
4943 DBUG_VOID_RETURN;
4944}
4945
4946
4947/**
4948 @brief Write variable record in 1 group.
4949
4950 @param lsn LSN of the record will be written here
4951 @param type the log record type
4952 @param short_trid Short transaction ID or 0 if it has no sense
4953 @param parts Descriptor of record source parts
4954 @param buffer_to_flush Buffer which have to be flushed if it is not 0
4955 @param header_length Calculated header length of chunk type 0
4956 @param trn Transaction structure pointer for hooks by
4957 record log type, for short_id
4958 @param hook_arg Argument which will be passed to pre-write and
4959 in-write hooks of this record.
4960
4961 @note
4962 We must have a translog_lock() when entering this function
4963 We must have buffer_to_flush locked (if not null)
4964
4965 @return Operation status
4966 @retval 0 OK
4967 @retval 1 Error
4968*/
4969
4970static my_bool
4971translog_write_variable_record_1group(LSN *lsn,
4972 enum translog_record_type type,
4973 MARIA_HA *tbl_info,
4974 SHORT_TRANSACTION_ID short_trid,
4975 struct st_translog_parts *parts,
4976 struct st_translog_buffer
4977 *buffer_to_flush, uint16 header_length,
4978 TRN *trn, void *hook_arg)
4979{
4980 TRANSLOG_ADDRESS horizon;
4981 struct st_buffer_cursor cursor;
4982 int rc= 0;
4983 uint i;
4984 translog_size_t record_rest, full_pages, first_page;
4985 uint additional_chunk3_page= 0;
4986 uchar chunk0_header[1 + 2 + 5 + 2];
4987 DBUG_ENTER("translog_write_variable_record_1group");
4988 translog_lock_assert_owner();
4989 if (buffer_to_flush)
4990 translog_buffer_lock_assert_owner(buffer_to_flush);
4991
4992 set_lsn(lsn, horizon= log_descriptor.horizon);
4993 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
4994 *lsn, TRUE) ||
4995 (log_record_type_descriptor[type].inwrite_hook &&
4996 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
4997 lsn, hook_arg)))
4998 {
4999 translog_unlock();
5000 DBUG_RETURN(1);
5001 }
5002 cursor= log_descriptor.bc;
5003 cursor.chaser= 1;
5004
5005 /* Advance pointer to be able unlock the loghandler */
5006 first_page= translog_get_current_page_rest();
5007 record_rest= parts->record_length - (first_page - header_length);
5008 full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
5009 record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5010
5011 if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
5012 {
5013 DBUG_PRINT("info", ("2 chunks type 3 is needed"));
5014 /* We will write 2 chunks type 3 at the end of this group */
5015 additional_chunk3_page= 1;
5016 record_rest= 1;
5017 }
5018
5019 DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
5020 "additional: %u (%u) rest %u = %u",
5021 first_page, first_page - header_length,
5022 full_pages,
5023 (ulong) full_pages *
5024 log_descriptor.page_capacity_chunk_2,
5025 additional_chunk3_page,
5026 additional_chunk3_page *
5027 (log_descriptor.page_capacity_chunk_2 - 1),
5028 record_rest, parts->record_length));
5029 /* record_rest + 3 is chunk type 3 overhead + record_rest */
5030 rc|= translog_advance_pointer((int)(full_pages + additional_chunk3_page),
5031 (record_rest ? record_rest + 3 : 0));
5032 log_descriptor.bc.buffer->last_lsn= *lsn;
5033 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
5034 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5035 log_descriptor.bc.buffer));
5036
5037 translog_unlock();
5038
5039 /*
5040 Check if we switched buffer and need process it (current buffer is
5041 unlocked already => we will not delay other threads
5042 */
5043 if (buffer_to_flush != NULL)
5044 {
5045 if (!rc)
5046 rc= translog_buffer_flush(buffer_to_flush);
5047 translog_buffer_unlock(buffer_to_flush);
5048 }
5049 if (rc)
5050 DBUG_RETURN(1);
5051
5052 translog_write_variable_record_1group_header(parts, type, short_trid,
5053 header_length, chunk0_header);
5054
5055 /* fill the pages */
5056 translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
5057
5058 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5059 LSN_IN_PARTS(log_descriptor.horizon),
5060 LSN_IN_PARTS(horizon)));
5061
5062 for (i= 0; i < full_pages; i++)
5063 {
5064 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5065 DBUG_RETURN(1);
5066
5067 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5068 LSN_IN_PARTS(log_descriptor.horizon),
5069 LSN_IN_PARTS(horizon)));
5070 }
5071
5072 if (additional_chunk3_page)
5073 {
5074 if (translog_write_variable_record_chunk3_page(parts,
5075 log_descriptor.
5076 page_capacity_chunk_2 - 2,
5077 &horizon, &cursor))
5078 DBUG_RETURN(1);
5079 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5080 LSN_IN_PARTS(log_descriptor.horizon),
5081 LSN_IN_PARTS(horizon)));
5082 DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
5083 }
5084
5085 if (translog_write_variable_record_chunk3_page(parts,
5086 record_rest,
5087 &horizon, &cursor))
5088 DBUG_RETURN(1);
5089 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5090 (uint) LSN_FILE_NO(log_descriptor.horizon),
5091 (uint) LSN_OFFSET(log_descriptor.horizon),
5092 (uint) LSN_FILE_NO(horizon),
5093 (uint) LSN_OFFSET(horizon)));
5094
5095 translog_buffer_lock(cursor.buffer);
5096 translog_buffer_decrease_writers(cursor.buffer);
5097 translog_buffer_unlock(cursor.buffer);
5098 DBUG_RETURN(rc);
5099}
5100
5101
5102/**
5103 @brief Write variable record in 1 chunk.
5104
5105 @param lsn LSN of the record will be written here
5106 @param type the log record type
5107 @param short_trid Short transaction ID or 0 if it has no sense
5108 @param parts Descriptor of record source parts
5109 @param buffer_to_flush Buffer which have to be flushed if it is not 0
5110 @param header_length Calculated header length of chunk type 0
5111 @param trn Transaction structure pointer for hooks by
5112 record log type, for short_id
5113 @param hook_arg Argument which will be passed to pre-write and
5114 in-write hooks of this record.
5115
5116 @note
5117 We must have a translog_lock() when entering this function
5118 We must have buffer_to_flush locked (if not null)
5119
5120 @return Operation status
5121 @retval 0 OK
5122 @retval 1 Error
5123*/
5124
5125static my_bool
5126translog_write_variable_record_1chunk(LSN *lsn,
5127 enum translog_record_type type,
5128 MARIA_HA *tbl_info,
5129 SHORT_TRANSACTION_ID short_trid,
5130 struct st_translog_parts *parts,
5131 struct st_translog_buffer
5132 *buffer_to_flush, uint16 header_length,
5133 TRN *trn, void *hook_arg)
5134{
5135 int rc;
5136 uchar chunk0_header[1 + 2 + 5 + 2];
5137 DBUG_ENTER("translog_write_variable_record_1chunk");
5138 translog_lock_assert_owner();
5139 if (buffer_to_flush)
5140 translog_buffer_lock_assert_owner(buffer_to_flush);
5141
5142 translog_write_variable_record_1group_header(parts, type, short_trid,
5143 header_length, chunk0_header);
5144 set_lsn(lsn, log_descriptor.horizon);
5145 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5146 *lsn, TRUE) ||
5147 (log_record_type_descriptor[type].inwrite_hook &&
5148 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5149 lsn, hook_arg)))
5150 {
5151 translog_unlock();
5152 DBUG_RETURN(1);
5153 }
5154
5155 rc= translog_write_parts_on_page(&log_descriptor.horizon,
5156 &log_descriptor.bc,
5157 parts->total_record_length, parts);
5158 log_descriptor.bc.buffer->last_lsn= *lsn;
5159 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
5160 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5161 log_descriptor.bc.buffer));
5162 translog_unlock();
5163
5164 /*
5165 check if we switched buffer and need process it (current buffer is
5166 unlocked already => we will not delay other threads
5167 */
5168 if (buffer_to_flush != NULL)
5169 {
5170 if (!rc)
5171 rc= translog_buffer_flush(buffer_to_flush);
5172 translog_buffer_unlock(buffer_to_flush);
5173 }
5174
5175 DBUG_RETURN(rc);
5176}
5177
5178
5179/*
5180 @brief Calculates and write LSN difference (compressed LSN).
5181
5182 @param base_lsn LSN from which we calculate difference
5183 @param lsn LSN for codding
5184 @param dst Result will be written to dst[-pack_length] .. dst[-1]
5185
5186 @note To store an LSN in a compact way we will use the following compression:
5187 If a log record has LSN1, and it contains the LSN2 as a back reference,
5188 Instead of LSN2 we write LSN1-LSN2, encoded as:
5189 two bits the number N (see below)
5190 14 bits
5191 N bytes
5192 That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5193 is stored in the first two bits.
5194
5195 @note function made to write the result in backward direction with no
5196 special sense or tricks both directions are equal in complicity
5197
5198 @retval # pointer on coded LSN
5199*/
5200
5201static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
5202{
5203 uint64 diff;
5204 DBUG_ENTER("translog_put_LSN_diff");
5205 DBUG_PRINT("enter", ("Base: " LSN_FMT " val: " LSN_FMT " dst:%p",
5206 LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
5207 dst));
5208 DBUG_ASSERT(base_lsn > lsn);
5209 diff= base_lsn - lsn;
5210 DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
5211 if (diff <= 0x3FFF)
5212 {
5213 dst-= 2;
5214 /*
5215 Note we store this high uchar first to ensure that first uchar has
5216 0 in the 3 upper bits.
5217 */
5218 dst[0]= (uchar)(diff >> 8);
5219 dst[1]= (uchar)(diff & 0xFF);
5220 }
5221 else if (diff <= 0x3FFFFFL)
5222 {
5223 dst-= 3;
5224 dst[0]= (uchar)(0x40 | (diff >> 16));
5225 int2store(dst + 1, diff & 0xFFFF);
5226 }
5227 else if (diff <= 0x3FFFFFFFL)
5228 {
5229 dst-= 4;
5230 dst[0]= (uchar)(0x80 | (diff >> 24));
5231 int3store(dst + 1, diff & 0xFFFFFFL);
5232 }
5233 else if (diff <= 0x3FFFFFFFFFLL)
5234
5235 {
5236 dst-= 5;
5237 dst[0]= (uchar)(0xC0 | (diff >> 32));
5238 int4store(dst + 1, diff & 0xFFFFFFFFL);
5239 }
5240 else
5241 {
5242 /*
5243 It is full LSN after special 1 diff (which is impossible
5244 in real life)
5245 */
5246 dst-= 2 + LSN_STORE_SIZE;
5247 dst[0]= 0;
5248 dst[1]= 1;
5249 lsn_store(dst + 2, lsn);
5250 }
5251 DBUG_PRINT("info", ("new dst:%p", dst));
5252 DBUG_RETURN(dst);
5253}
5254
5255
5256/*
5257 Get LSN from LSN-difference (compressed LSN)
5258
5259 SYNOPSIS
5260 translog_get_LSN_from_diff()
5261 base_lsn LSN from which we calculate difference
5262 src pointer to coded lsn
5263 dst pointer to buffer where to write 7byte LSN
5264
5265 NOTE:
5266 To store an LSN in a compact way we will use the following compression:
5267
5268 If a log record has LSN1, and it contains the lSN2 as a back reference,
5269 Instead of LSN2 we write LSN1-LSN2, encoded as:
5270
5271 two bits the number N (see below)
5272 14 bits
5273 N bytes
5274
5275 That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5276 is stored in the first two bits.
5277
5278 RETURN
5279 pointer to buffer after decoded LSN
5280*/
5281
5282static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
5283{
5284 LSN lsn;
5285 uint32 diff;
5286 uint32 first_byte;
5287 uint32 file_no, rec_offset;
5288 uint8 code;
5289 DBUG_ENTER("translog_get_LSN_from_diff");
5290 DBUG_PRINT("enter", ("Base: " LSN_FMT " src:%p dst %p",
5291 LSN_IN_PARTS(base_lsn), src, dst));
5292 first_byte= *((uint8*) src);
5293 code= first_byte >> 6; /* Length is in 2 most significant bits */
5294 first_byte&= 0x3F;
5295 src++; /* Skip length + encode */
5296 file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
5297 DBUG_PRINT("info", ("code: %u first byte: %lu",
5298 (uint) code, (ulong) first_byte));
5299 switch (code) {
5300 case 0:
5301 if (first_byte == 0 && *((uint8*)src) == 1)
5302 {
5303 /*
5304 It is full LSN after special 1 diff (which is impossible
5305 in real life)
5306 */
5307 memcpy(dst, src + 1, LSN_STORE_SIZE);
5308 DBUG_PRINT("info", ("Special case of full LSN, new src:%p",
5309 src + 1 + LSN_STORE_SIZE));
5310 DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
5311 }
5312 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
5313 break;
5314 case 1:
5315 diff= uint2korr(src);
5316 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
5317 break;
5318 case 2:
5319 diff= uint3korr(src);
5320 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
5321 break;
5322 case 3:
5323 {
5324 ulonglong base_offset= LSN_OFFSET(base_lsn);
5325 diff= uint4korr(src);
5326 if (diff > LSN_OFFSET(base_lsn))
5327 {
5328 /* take 1 from file offset */
5329 first_byte++;
5330 base_offset+= 0x100000000LL;
5331 }
5332 file_no= LSN_FILE_NO(base_lsn) - first_byte;
5333 DBUG_ASSERT(base_offset - diff <= UINT_MAX);
5334 rec_offset= (uint32)(base_offset - diff);
5335 break;
5336 }
5337 default:
5338 DBUG_ASSERT(0);
5339 DBUG_RETURN(NULL);
5340 }
5341 lsn= MAKE_LSN(file_no, rec_offset);
5342 src+= code + 1;
5343 lsn_store(dst, lsn);
5344 DBUG_PRINT("info", ("new src:%p", src));
5345 DBUG_RETURN(src);
5346}
5347
5348
5349/**
5350 @brief Encodes relative LSNs listed in the parameters.
5351
5352 @param parts Parts list with encoded LSN(s)
5353 @param base_lsn LSN which is base for encoding
5354 @param lsns number of LSN(s) to encode
5355 @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
5356*/
5357
5358static void translog_relative_LSN_encode(struct st_translog_parts *parts,
5359 LSN base_lsn,
5360 uint lsns, uchar *compressed_LSNs)
5361{
5362 LEX_CUSTRING *part;
5363 uint lsns_len= lsns * LSN_STORE_SIZE;
5364 uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
5365 uchar *buffer= buffer_src;
5366 const uchar *cbuffer;
5367
5368 DBUG_ENTER("translog_relative_LSN_encode");
5369
5370 DBUG_ASSERT(parts->current != 0);
5371 part= parts->parts + parts->current;
5372
5373 /* collect all LSN(s) in one chunk if it (they) is (are) divided */
5374 if (part->length < lsns_len)
5375 {
5376 size_t copied= part->length;
5377 LEX_CUSTRING *next_part;
5378 DBUG_PRINT("info", ("Using buffer:%p", compressed_LSNs));
5379 memcpy(buffer, part->str, part->length);
5380 next_part= parts->parts + parts->current + 1;
5381 do
5382 {
5383 DBUG_ASSERT(next_part < parts->parts + parts->elements);
5384 if ((next_part->length + copied) < lsns_len)
5385 {
5386 memcpy(buffer + copied, next_part->str,
5387 next_part->length);
5388 copied+= next_part->length;
5389 next_part->length= 0; next_part->str= 0;
5390 /* delete_dynamic_element(&parts->parts, parts->current + 1); */
5391 next_part++;
5392 parts->current++;
5393 part= parts->parts + parts->current;
5394 }
5395 else
5396 {
5397 size_t len= lsns_len - copied;
5398 memcpy(buffer + copied, next_part->str, len);
5399 copied= lsns_len;
5400 next_part->str+= len;
5401 next_part->length-= len;
5402 }
5403 } while (copied < lsns_len);
5404 cbuffer= buffer;
5405 }
5406 else
5407 {
5408 cbuffer= part->str;
5409 part->str+= lsns_len;
5410 part->length-= lsns_len;
5411 parts->current--;
5412 part= parts->parts + parts->current;
5413 }
5414
5415 {
5416 /* Compress */
5417 LSN ref;
5418 int economy;
5419 const uchar *src_ptr;
5420 uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
5421 COMPRESSED_LSN_MAX_STORE_SIZE);
5422 /*
5423 We write the result in backward direction with no special sense or
5424 tricks both directions are equal in complicity
5425 */
5426 for (src_ptr= cbuffer + lsns_len - LSN_STORE_SIZE;
5427 src_ptr >= (const uchar*)cbuffer;
5428 src_ptr-= LSN_STORE_SIZE)
5429 {
5430 ref= lsn_korr(src_ptr);
5431 dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
5432 }
5433 part->length= (size_t)((compressed_LSNs +
5434 (MAX_NUMBER_OF_LSNS_PER_RECORD *
5435 COMPRESSED_LSN_MAX_STORE_SIZE)) -
5436 dst_ptr);
5437 economy= lsns_len - (uint)part->length;
5438 parts->record_length-= economy;
5439 DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
5440 (ulong)part->length, economy));
5441 parts->total_record_length-= economy;
5442 part->str= dst_ptr;
5443 }
5444 DBUG_VOID_RETURN;
5445}
5446
5447
5448/**
5449 @brief Write multi-group variable-size record.
5450
5451 @param lsn LSN of the record will be written here
5452 @param type the log record type
5453 @param short_trid Short transaction ID or 0 if it has no sense
5454 @param parts Descriptor of record source parts
5455 @param buffer_to_flush Buffer which have to be flushed if it is not 0
5456 @param header_length Header length calculated for 1 group
5457 @param buffer_rest Beginning from which we plan to write in full pages
5458 @param trn Transaction structure pointer for hooks by
5459 record log type, for short_id
5460 @param hook_arg Argument which will be passed to pre-write and
5461 in-write hooks of this record.
5462
5463 @note
5464 We must have a translog_lock() when entering this function
5465
5466 We must have buffer_to_flush locked (if not null)
5467 buffer_to_flush should *NOT* be locked when calling this function.
5468 (This is note is here as this is different from most other
5469 translog_write...() functions which require the buffer to be locked)
5470
5471 @return Operation status
5472 @retval 0 OK
5473 @retval 1 Error
5474*/
5475
5476static my_bool
5477translog_write_variable_record_mgroup(LSN *lsn,
5478 enum translog_record_type type,
5479 MARIA_HA *tbl_info,
5480 SHORT_TRANSACTION_ID short_trid,
5481 struct st_translog_parts *parts,
5482 struct st_translog_buffer
5483 *buffer_to_flush,
5484 uint16 header_length,
5485 translog_size_t buffer_rest,
5486 TRN *trn, void *hook_arg)
5487{
5488 TRANSLOG_ADDRESS horizon;
5489 struct st_buffer_cursor cursor;
5490 int rc= 0;
5491 uint i, chunk2_page, full_pages;
5492 uint curr_group= 0;
5493 translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
5494 translog_size_t done= 0;
5495 struct st_translog_group_descriptor group;
5496 DYNAMIC_ARRAY groups;
5497 uint16 chunk3_size;
5498 uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
5499 uint16 last_page_capacity;
5500 my_bool new_page_before_chunk0= 1, first_chunk0= 1;
5501 uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
5502 uchar chunk2_header[1];
5503 uint header_fixed_part= header_length + 2;
5504 uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
5505 uint file_of_the_first_group;
5506 int pages_to_skip;
5507 struct st_translog_buffer *buffer_of_last_lsn;
5508 DBUG_ENTER("translog_write_variable_record_mgroup");
5509 translog_lock_assert_owner();
5510
5511 chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
5512
5513 if (my_init_dynamic_array(&groups,
5514 sizeof(struct st_translog_group_descriptor),
5515 10, 10, MYF(0)))
5516 {
5517 translog_unlock();
5518 DBUG_PRINT("error", ("init array failed"));
5519 DBUG_RETURN(1);
5520 }
5521
5522 first_page= translog_get_current_page_rest();
5523 record_rest= parts->record_length - (first_page - 1);
5524 DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
5525
5526 if (record_rest < buffer_rest)
5527 {
5528 /*
5529 The record (group 1 type) is larger than the free space on the page
5530 - we need to split it in two. But when we split it in two, the first
5531 part is big enough to hold all the data of the record (because the
5532 header of the first part of the split is smaller than the header of
5533 the record as a whole when it takes only one chunk)
5534 */
5535 DBUG_PRINT("info", ("too many free space because changing header"));
5536 buffer_rest-= log_descriptor.page_capacity_chunk_2;
5537 DBUG_ASSERT(record_rest >= buffer_rest);
5538 }
5539
5540 file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
5541 translog_mark_file_unfinished(file_of_the_first_group);
5542 do
5543 {
5544 group.addr= horizon= log_descriptor.horizon;
5545 cursor= log_descriptor.bc;
5546 cursor.chaser= 1;
5547 if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
5548 {
5549 /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
5550 full_pages= 255;
5551 buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
5552 }
5553 /*
5554 group chunks =
5555 full pages + first page (which actually can be full, too).
5556 But here we assign number of chunks - 1
5557 */
5558 group.num= full_pages;
5559 if (insert_dynamic(&groups, (uchar*) &group))
5560 {
5561 DBUG_PRINT("error", ("insert into array failed"));
5562 goto err_unlock;
5563 }
5564
5565 DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
5566 "full_pages: %lu (%lu) "
5567 "Left %lu",
5568 groups.elements,
5569 first_page, first_page - 1,
5570 (ulong) full_pages,
5571 (ulong) (full_pages *
5572 log_descriptor.page_capacity_chunk_2),
5573 (ulong)(parts->record_length - (first_page - 1 +
5574 buffer_rest) -
5575 done)));
5576 rc|= translog_advance_pointer((int)full_pages, 0);
5577
5578 translog_unlock();
5579
5580 if (buffer_to_flush != NULL)
5581 {
5582 translog_buffer_decrease_writers(buffer_to_flush);
5583 if (!rc)
5584 rc= translog_buffer_flush(buffer_to_flush);
5585 translog_buffer_unlock(buffer_to_flush);
5586 buffer_to_flush= NULL;
5587 }
5588 if (rc)
5589 {
5590 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5591 goto err;
5592 }
5593
5594 translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5595 translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5596 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5597 "Left %lu",
5598 LSN_IN_PARTS(log_descriptor.horizon),
5599 LSN_IN_PARTS(horizon),
5600 (ulong) (parts->record_length - (first_page - 1) -
5601 done)));
5602
5603 for (i= 0; i < full_pages; i++)
5604 {
5605 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5606 goto err;
5607
5608 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " "
5609 "local: " LSN_FMT " "
5610 "Left: %lu",
5611 LSN_IN_PARTS(log_descriptor.horizon),
5612 LSN_IN_PARTS(horizon),
5613 (ulong) (parts->record_length - (first_page - 1) -
5614 i * log_descriptor.page_capacity_chunk_2 -
5615 done)));
5616 }
5617
5618 done+= (first_page - 1 + buffer_rest);
5619
5620 if (translog_chaser_page_next(&horizon, &cursor))
5621 {
5622 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5623 goto err;
5624 }
5625 translog_buffer_lock(cursor.buffer);
5626 translog_buffer_decrease_writers(cursor.buffer);
5627 translog_buffer_unlock(cursor.buffer);
5628
5629 translog_lock();
5630
5631 /* Check that we have place for chunk type 2 */
5632 first_page= translog_get_current_page_rest();
5633 if (first_page <= 1)
5634 {
5635 if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
5636 &buffer_to_flush))
5637 goto err_unlock;
5638 first_page= translog_get_current_page_rest();
5639 }
5640 buffer_rest= translog_get_current_group_size();
5641 } while ((translog_size_t)(first_page + buffer_rest) <
5642 (translog_size_t)(parts->record_length - done));
5643
5644 group.addr= horizon= log_descriptor.horizon;
5645 cursor= log_descriptor.bc;
5646 cursor.chaser= 1;
5647 group.num= 0; /* 0 because it does not matter */
5648 if (insert_dynamic(&groups, (uchar*) &group))
5649 {
5650 DBUG_PRINT("error", ("insert into array failed"));
5651 goto err_unlock;
5652 }
5653 record_rest= parts->record_length - done;
5654 DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
5655 if (first_page > record_rest + 1)
5656 {
5657 /*
5658 We have not so much data to fill all first page
5659 (no speaking about full pages)
5660 so it will be:
5661 <chunk0 <data>>
5662 or
5663 <chunk0>...<chunk0><chunk0 <data>>
5664 or
5665 <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5666 */
5667 chunk2_page= full_pages= 0;
5668 last_page_capacity= first_page;
5669 pages_to_skip= -1;
5670 }
5671 else
5672 {
5673 /*
5674 We will have:
5675 <chunk2 <data>>...<chunk2 <data>><chunk0 <data>>
5676 or
5677 <chunk2 <data>>...<chunk2 <data>><chunk0>...<chunk0><chunk0 <data>>
5678 or
5679 <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5680 */
5681 chunk2_page= 1;
5682 record_rest-= (first_page - 1);
5683 pages_to_skip= full_pages=
5684 record_rest / log_descriptor.page_capacity_chunk_2;
5685 record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5686 last_page_capacity= page_capacity;
5687 }
5688 chunk3_size= 0;
5689 chunk3_pages= 0;
5690 if (last_page_capacity > record_rest + 1 && record_rest != 0)
5691 {
5692 if (last_page_capacity >
5693 record_rest + header_fixed_part + groups.elements * (7 + 1))
5694 {
5695 /* 1 record of type 0 */
5696 chunk3_pages= 0;
5697 }
5698 else
5699 {
5700 pages_to_skip++;
5701 chunk3_pages= 1;
5702 if (record_rest + 2 == last_page_capacity)
5703 {
5704 chunk3_size= record_rest - 1;
5705 record_rest= 1;
5706 }
5707 else
5708 {
5709 chunk3_size= record_rest;
5710 record_rest= 0;
5711 }
5712 }
5713 }
5714 /*
5715 A first non-full page will hold type 0 chunk only if it fit in it with
5716 all its headers
5717 */
5718 while (page_capacity <
5719 record_rest + header_fixed_part +
5720 (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
5721 chunk0_pages++;
5722 DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
5723 "Group on last page: %u",
5724 chunk0_pages, groups.elements,
5725 groups_per_page,
5726 (groups.elements -
5727 ((page_capacity - header_fixed_part) / (7 + 1)) *
5728 (chunk0_pages - 1))));
5729 DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
5730 "chunk3: %u (%u) rest: %u",
5731 first_page,
5732 chunk2_page, full_pages,
5733 (ulong) full_pages *
5734 log_descriptor.page_capacity_chunk_2,
5735 chunk3_pages, (uint) chunk3_size, (uint) record_rest));
5736 rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1),
5737 record_rest + header_fixed_part +
5738 (groups.elements -
5739 ((page_capacity -
5740 header_fixed_part) / (7 + 1)) *
5741 (chunk0_pages - 1)) * (7 + 1));
5742 buffer_of_last_lsn= log_descriptor.bc.buffer;
5743 translog_unlock();
5744
5745 if (buffer_to_flush != NULL)
5746 {
5747 translog_buffer_decrease_writers(buffer_to_flush);
5748 if (!rc)
5749 rc= translog_buffer_flush(buffer_to_flush);
5750 translog_buffer_unlock(buffer_to_flush);
5751 buffer_to_flush= NULL;
5752 }
5753 if (rc)
5754 {
5755 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5756 goto err;
5757 }
5758
5759 if (rc)
5760 goto err;
5761
5762 if (chunk2_page)
5763 {
5764 DBUG_PRINT("info", ("chunk 2 to finish first page"));
5765 translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5766 translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5767 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5768 "Left: %lu",
5769 LSN_IN_PARTS(log_descriptor.horizon),
5770 LSN_IN_PARTS(horizon),
5771 (ulong) (parts->record_length - (first_page - 1) -
5772 done)));
5773 }
5774 else if (chunk3_pages)
5775 {
5776 uchar chunk3_header[3];
5777 DBUG_PRINT("info", ("chunk 3"));
5778 DBUG_ASSERT(full_pages == 0);
5779 chunk3_pages= 0;
5780 chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
5781 int2store(chunk3_header + 1, chunk3_size);
5782 translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
5783 translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
5784 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5785 "Left: %lu",
5786 LSN_IN_PARTS(log_descriptor.horizon),
5787 LSN_IN_PARTS(horizon),
5788 (ulong) (parts->record_length - chunk3_size - done)));
5789 }
5790 else
5791 {
5792 DBUG_PRINT("info", ("no new_page_before_chunk0"));
5793 new_page_before_chunk0= 0;
5794 }
5795
5796 for (i= 0; i < full_pages; i++)
5797 {
5798 DBUG_ASSERT(chunk2_page != 0);
5799 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5800 goto err;
5801
5802 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5803 "Left: %lu",
5804 LSN_IN_PARTS(log_descriptor.horizon),
5805 LSN_IN_PARTS(horizon),
5806 (ulong) (parts->record_length - (first_page - 1) -
5807 i * log_descriptor.page_capacity_chunk_2 -
5808 done)));
5809 }
5810
5811 if (chunk3_pages &&
5812 translog_write_variable_record_chunk3_page(parts,
5813 chunk3_size,
5814 &horizon, &cursor))
5815 goto err;
5816 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5817 LSN_IN_PARTS(log_descriptor.horizon),
5818 LSN_IN_PARTS(horizon)));
5819
5820 *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
5821 int2store(chunk0_header + 1, short_trid);
5822 translog_write_variable_record_1group_code_len(chunk0_header + 3,
5823 parts->record_length,
5824 header_length);
5825 do
5826 {
5827 int limit;
5828 if (new_page_before_chunk0 &&
5829 translog_chaser_page_next(&horizon, &cursor))
5830 {
5831 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5832 goto err;
5833 }
5834 new_page_before_chunk0= 1;
5835
5836 if (first_chunk0)
5837 {
5838 first_chunk0= 0;
5839
5840 /*
5841 We can drop "log_descriptor.is_everything_flushed" earlier when have
5842 lock on loghandler and assign initial value of "horizon" variable or
5843 before unlocking loghandler (because we will increase writers
5844 counter on the buffer and every thread which wanted flush the buffer
5845 will wait till we finish with it). But IMHO better here take short
5846 lock and do not bother other threads with waiting.
5847 */
5848 translog_lock();
5849 set_lsn(lsn, horizon);
5850 buffer_of_last_lsn->last_lsn= *lsn;
5851 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
5852 LSN_IN_PARTS(buffer_of_last_lsn->last_lsn),
5853 buffer_of_last_lsn));
5854 if (log_record_type_descriptor[type].inwrite_hook &&
5855 (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
5856 tbl_info,
5857 lsn, hook_arg))
5858 goto err_unlock;
5859 translog_unlock();
5860 }
5861
5862 /*
5863 A first non-full page will hold type 0 chunk only if it fit in it with
5864 all its headers => the fist page is full or number of groups less then
5865 possible number of full page.
5866 */
5867 limit= (groups_per_page < groups.elements - curr_group ?
5868 groups_per_page : groups.elements - curr_group);
5869 DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
5870 (uint) groups.elements, (uint) curr_group,
5871 (uint) limit));
5872
5873 if (chunk0_pages == 1)
5874 {
5875 DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
5876 (uint) limit, (uint) record_rest,
5877 (uint) (2 + limit * (7 + 1) + record_rest)));
5878 int2store(chunk0_header + header_length - 2,
5879 2 + limit * (7 + 1) + record_rest);
5880 }
5881 else
5882 {
5883 DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
5884 (uint) limit, (uint) (2 + limit * (7 + 1))));
5885 int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
5886 }
5887 int2store(chunk0_header + header_length, groups.elements - curr_group);
5888 translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
5889 chunk0_header);
5890 for (i= curr_group; i < limit + curr_group; i++)
5891 {
5892 struct st_translog_group_descriptor *grp_ptr;
5893 grp_ptr= dynamic_element(&groups, i,
5894 struct st_translog_group_descriptor *);
5895 lsn_store(group_desc, grp_ptr->addr);
5896 group_desc[7]= grp_ptr->num;
5897 translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
5898 }
5899
5900 if (chunk0_pages == 1 && record_rest != 0)
5901 translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
5902
5903 chunk0_pages--;
5904 curr_group+= limit;
5905 /* put special type to indicate that it is not LSN chunk */
5906 *chunk0_header= (uchar) (TRANSLOG_CHUNK_LSN | TRANSLOG_CHUNK_0_CONT);
5907 } while (chunk0_pages != 0);
5908 translog_buffer_lock(cursor.buffer);
5909 translog_buffer_decrease_writers(cursor.buffer);
5910 translog_buffer_unlock(cursor.buffer);
5911 rc= 0;
5912
5913 if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
5914 *lsn, FALSE))
5915 goto err;
5916
5917 translog_mark_file_finished(file_of_the_first_group);
5918
5919 delete_dynamic(&groups);
5920 DBUG_RETURN(rc);
5921
5922err_unlock:
5923
5924 translog_unlock();
5925
5926err:
5927 if (buffer_to_flush != NULL)
5928 {
5929 /* This is to prevent locking buffer forever in case of error */
5930 translog_buffer_decrease_writers(buffer_to_flush);
5931 if (!rc)
5932 rc= translog_buffer_flush(buffer_to_flush);
5933 translog_buffer_unlock(buffer_to_flush);
5934 buffer_to_flush= NULL;
5935 }
5936
5937
5938 translog_mark_file_finished(file_of_the_first_group);
5939
5940 delete_dynamic(&groups);
5941 DBUG_RETURN(1);
5942}
5943
5944
5945/**
5946 @brief Write the variable length log record.
5947
5948 @param lsn LSN of the record will be written here
5949 @param type the log record type
5950 @param short_trid Short transaction ID or 0 if it has no sense
5951 @param parts Descriptor of record source parts
5952 @param trn Transaction structure pointer for hooks by
5953 record log type, for short_id
5954 @param hook_arg Argument which will be passed to pre-write and
5955 in-write hooks of this record.
5956
5957 @return Operation status
5958 @retval 0 OK
5959 @retval 1 Error
5960*/
5961
5962static my_bool translog_write_variable_record(LSN *lsn,
5963 enum translog_record_type type,
5964 MARIA_HA *tbl_info,
5965 SHORT_TRANSACTION_ID short_trid,
5966 struct st_translog_parts *parts,
5967 TRN *trn, void *hook_arg)
5968{
5969 struct st_translog_buffer *buffer_to_flush= NULL;
5970 uint header_length1= 1 + 2 + 2 +
5971 translog_variable_record_length_bytes(parts->record_length);
5972 ulong buffer_rest;
5973 uint page_rest;
5974 /* Max number of such LSNs per record is 2 */
5975 uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5976 COMPRESSED_LSN_MAX_STORE_SIZE];
5977 my_bool res;
5978 DBUG_ENTER("translog_write_variable_record");
5979
5980 translog_lock();
5981 DBUG_PRINT("info", ("horizon: " LSN_FMT,
5982 LSN_IN_PARTS(log_descriptor.horizon)));
5983 page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
5984 DBUG_PRINT("info", ("header length: %u page_rest: %u",
5985 header_length1, page_rest));
5986
5987 /*
5988 header and part which we should read have to fit in one chunk
5989 TODO: allow to divide readable header
5990 */
5991 if (page_rest <
5992 (header_length1 + log_record_type_descriptor[type].read_header_len))
5993 {
5994 DBUG_PRINT("info",
5995 ("Next page, size: %u header: %u + %u",
5996 log_descriptor.bc.current_page_fill,
5997 header_length1,
5998 log_record_type_descriptor[type].read_header_len));
5999 translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6000 &buffer_to_flush);
6001 /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
6002 page_rest= log_descriptor.page_capacity_chunk_2 + 1;
6003 DBUG_PRINT("info", ("page_rest: %u", page_rest));
6004 }
6005
6006 /*
6007 To minimize compressed size we will compress always relative to
6008 very first chunk address (log_descriptor.horizon for now)
6009 */
6010 if (log_record_type_descriptor[type].compressed_LSN > 0)
6011 {
6012 translog_relative_LSN_encode(parts, log_descriptor.horizon,
6013 log_record_type_descriptor[type].
6014 compressed_LSN, compressed_LSNs);
6015 /* recalculate header length after compression */
6016 header_length1= 1 + 2 + 2 +
6017 translog_variable_record_length_bytes(parts->record_length);
6018 DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
6019 "record length: %lu",
6020 header_length1, (ulong)parts->record_length));
6021 }
6022
6023 /* TODO: check space on current page for header + few bytes */
6024 if (page_rest >= parts->record_length + header_length1)
6025 {
6026 /* following function makes translog_unlock(); */
6027 res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
6028 short_trid,
6029 parts, buffer_to_flush,
6030 header_length1, trn, hook_arg);
6031 DBUG_RETURN(res);
6032 }
6033
6034 buffer_rest= translog_get_current_group_size();
6035
6036 if (buffer_rest >= parts->record_length + header_length1 - page_rest)
6037 {
6038 /* following function makes translog_unlock(); */
6039 res= translog_write_variable_record_1group(lsn, type, tbl_info,
6040 short_trid,
6041 parts, buffer_to_flush,
6042 header_length1, trn, hook_arg);
6043 DBUG_RETURN(res);
6044 }
6045 /* following function makes translog_unlock(); */
6046 res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
6047 short_trid,
6048 parts, buffer_to_flush,
6049 header_length1,
6050 buffer_rest, trn, hook_arg);
6051 DBUG_RETURN(res);
6052}
6053
6054
6055/**
6056 @brief Write the fixed and pseudo-fixed log record.
6057
6058 @param lsn LSN of the record will be written here
6059 @param type the log record type
6060 @param short_trid Short transaction ID or 0 if it has no sense
6061 @param parts Descriptor of record source parts
6062 @param trn Transaction structure pointer for hooks by
6063 record log type, for short_id
6064 @param hook_arg Argument which will be passed to pre-write and
6065 in-write hooks of this record.
6066
6067 @return Operation status
6068 @retval 0 OK
6069 @retval 1 Error
6070*/
6071
6072static my_bool translog_write_fixed_record(LSN *lsn,
6073 enum translog_record_type type,
6074 MARIA_HA *tbl_info,
6075 SHORT_TRANSACTION_ID short_trid,
6076 struct st_translog_parts *parts,
6077 TRN *trn, void *hook_arg)
6078{
6079 struct st_translog_buffer *buffer_to_flush= NULL;
6080 uchar chunk1_header[1 + 2];
6081 /* Max number of such LSNs per record is 2 */
6082 uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
6083 COMPRESSED_LSN_MAX_STORE_SIZE];
6084 LEX_CUSTRING *part;
6085 int rc= 1;
6086 DBUG_ENTER("translog_write_fixed_record");
6087 DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
6088 LOGRECTYPE_FIXEDLENGTH &&
6089 parts->record_length ==
6090 log_record_type_descriptor[type].fixed_length) ||
6091 (log_record_type_descriptor[type].rclass ==
6092 LOGRECTYPE_PSEUDOFIXEDLENGTH &&
6093 parts->record_length ==
6094 log_record_type_descriptor[type].fixed_length));
6095
6096 translog_lock();
6097 DBUG_PRINT("info", ("horizon: " LSN_FMT,
6098 LSN_IN_PARTS(log_descriptor.horizon)));
6099
6100 DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
6101 DBUG_PRINT("info",
6102 ("Page size: %u record: %u next cond: %d",
6103 log_descriptor.bc.current_page_fill,
6104 (parts->record_length +
6105 log_record_type_descriptor[type].compressed_LSN * 2 + 3),
6106 ((((uint) log_descriptor.bc.current_page_fill) +
6107 (parts->record_length +
6108 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6109 TRANSLOG_PAGE_SIZE)));
6110 /*
6111 check that there is enough place on current page.
6112 NOTE: compressing may increase page LSN size on two bytes for every LSN
6113 */
6114 if ((((uint) log_descriptor.bc.current_page_fill) +
6115 (parts->record_length +
6116 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6117 TRANSLOG_PAGE_SIZE)
6118 {
6119 DBUG_PRINT("info", ("Next page"));
6120 if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6121 &buffer_to_flush))
6122 goto err; /* rc == 1 */
6123 if (buffer_to_flush)
6124 translog_buffer_lock_assert_owner(buffer_to_flush);
6125 }
6126
6127 set_lsn(lsn, log_descriptor.horizon);
6128 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
6129 *lsn, TRUE) ||
6130 (log_record_type_descriptor[type].inwrite_hook &&
6131 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
6132 lsn, hook_arg)))
6133 goto err;
6134
6135 /* compress LSNs */
6136 if (log_record_type_descriptor[type].rclass ==
6137 LOGRECTYPE_PSEUDOFIXEDLENGTH)
6138 {
6139 DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
6140 translog_relative_LSN_encode(parts, *lsn,
6141 log_record_type_descriptor[type].
6142 compressed_LSN, compressed_LSNs);
6143 }
6144
6145 /*
6146 Write the whole record at once (we know that there is enough place on
6147 the destination page)
6148 */
6149 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
6150 part= parts->parts + (--parts->current);
6151 parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
6152 part->str= chunk1_header;
6153 *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
6154 int2store(chunk1_header + 1, short_trid);
6155
6156 rc= translog_write_parts_on_page(&log_descriptor.horizon,
6157 &log_descriptor.bc,
6158 parts->total_record_length, parts);
6159
6160 log_descriptor.bc.buffer->last_lsn= *lsn;
6161 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
6162 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
6163 log_descriptor.bc.buffer));
6164
6165err:
6166 translog_unlock();
6167
6168 /*
6169 check if we switched buffer and need process it (current buffer is
6170 unlocked already => we will not delay other threads
6171 */
6172 if (buffer_to_flush != NULL)
6173 {
6174 if (!rc)
6175 rc= translog_buffer_flush(buffer_to_flush);
6176 translog_buffer_unlock(buffer_to_flush);
6177 }
6178
6179 DBUG_RETURN(rc);
6180}
6181
6182
6183/**
6184 @brief Writes the log record
6185
6186 If share has no 2-byte-id yet, gives an id to the share and logs
6187 LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
6188 yet, logs it.
6189
6190 @param lsn LSN of the record will be written here
6191 @param type the log record type
6192 @param trn Transaction structure pointer for hooks by
6193 record log type, for short_id
6194 @param tbl_info MARIA_HA of table or NULL
6195 @param rec_len record length or 0 (count it)
6196 @param part_no number of parts or 0 (count it)
6197 @param parts_data zero ended (in case of number of parts is 0)
6198 array of LEX_STRINGs (parts), first
6199 TRANSLOG_INTERNAL_PARTS positions in the log
6200 should be unused (need for loghandler)
6201 @param store_share_id if tbl_info!=NULL then share's id will
6202 automatically be stored in the two first bytes
6203 pointed (so pointer is assumed to be !=NULL)
6204 @param hook_arg argument which will be passed to pre-write and
6205 in-write hooks of this record.
6206
6207 @return Operation status
6208 @retval 0 OK
6209 @retval 1 Error
6210*/
6211
6212my_bool translog_write_record(LSN *lsn,
6213 enum translog_record_type type,
6214 TRN *trn, MARIA_HA *tbl_info,
6215 translog_size_t rec_len,
6216 uint part_no,
6217 LEX_CUSTRING *parts_data,
6218 uchar *store_share_id,
6219 void *hook_arg)
6220{
6221 struct st_translog_parts parts;
6222 LEX_CUSTRING *part;
6223 int rc;
6224 uint short_trid= trn->short_id;
6225 DBUG_ENTER("translog_write_record");
6226 DBUG_PRINT("enter", ("type: %u (%s) ShortTrID: %u rec_len: %lu",
6227 (uint) type, log_record_type_descriptor[type].name,
6228 (uint) short_trid, (ulong) rec_len));
6229 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6230 translog_status == TRANSLOG_READONLY);
6231 DBUG_ASSERT(type != 0);
6232 DBUG_SLOW_ASSERT((uint)type <= max_allowed_translog_type);
6233 if (unlikely(translog_status != TRANSLOG_OK))
6234 {
6235 DBUG_PRINT("error", ("Transaction log is write protected"));
6236 DBUG_RETURN(1);
6237 }
6238
6239 if (tbl_info && type != LOGREC_FILE_ID)
6240 {
6241 MARIA_SHARE *share= tbl_info->s;
6242 DBUG_ASSERT(share->now_transactional);
6243 if (unlikely(share->id == 0))
6244 {
6245 /*
6246 First log write for this MARIA_SHARE; give it a short id.
6247 When the lock manager is enabled and needs a short id, it should be
6248 assigned in the lock manager (because row locks will be taken before
6249 log records are written; for example SELECT FOR UPDATE takes locks but
6250 writes no log record.
6251 */
6252 if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
6253 DBUG_RETURN(1);
6254 }
6255 fileid_store(store_share_id, share->id);
6256 }
6257 if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
6258 {
6259 LSN dummy_lsn;
6260 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6261 uchar log_data[6];
6262 DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE);
6263 int6store(log_data, trn->trid);
6264 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6265 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6266 trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
6267 if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
6268 trn, NULL, sizeof(log_data),
6269 sizeof(log_array)/sizeof(log_array[0]),
6270 log_array, NULL, NULL)))
6271 DBUG_RETURN(1);
6272 }
6273
6274 parts.parts= parts_data;
6275
6276 /* count parts if they are not counted by upper level */
6277 if (part_no == 0)
6278 {
6279 for (part_no= TRANSLOG_INTERNAL_PARTS;
6280 parts_data[part_no].length != 0;
6281 part_no++);
6282 }
6283 parts.elements= part_no;
6284 parts.current= TRANSLOG_INTERNAL_PARTS;
6285
6286 /* clear TRANSLOG_INTERNAL_PARTS */
6287 compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
6288 parts_data[0].str= 0;
6289 parts_data[0].length= 0;
6290
6291 /* count length of the record */
6292 if (rec_len == 0)
6293 {
6294 for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
6295 part < parts_data + part_no;
6296 part++)
6297 {
6298 rec_len+= (translog_size_t) part->length;
6299 }
6300 }
6301 parts.record_length= rec_len;
6302
6303#ifndef DBUG_OFF
6304 {
6305 uint i;
6306 size_t len= 0;
6307#ifdef HAVE_valgrind
6308 ha_checksum checksum= 0;
6309#endif
6310 for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
6311 {
6312#ifdef HAVE_valgrind
6313 /* Find unitialized bytes early */
6314 checksum+= my_checksum(checksum, parts_data[i].str,
6315 parts_data[i].length);
6316#endif
6317 len+= parts_data[i].length;
6318 }
6319 DBUG_ASSERT(len == rec_len);
6320 }
6321#endif
6322 /*
6323 Start total_record_length from record_length then overhead will
6324 be add
6325 */
6326 parts.total_record_length= parts.record_length;
6327 DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
6328
6329 /* process this parts */
6330 if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
6331 (*log_record_type_descriptor[type].prewrite_hook)(type, trn,
6332 tbl_info,
6333 hook_arg))))
6334 {
6335 switch (log_record_type_descriptor[type].rclass) {
6336 case LOGRECTYPE_VARIABLE_LENGTH:
6337 rc= translog_write_variable_record(lsn, type, tbl_info,
6338 short_trid, &parts, trn, hook_arg);
6339 break;
6340 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
6341 case LOGRECTYPE_FIXEDLENGTH:
6342 rc= translog_write_fixed_record(lsn, type, tbl_info,
6343 short_trid, &parts, trn, hook_arg);
6344 break;
6345 case LOGRECTYPE_NOT_ALLOWED:
6346 default:
6347 DBUG_ASSERT(0);
6348 rc= 1;
6349 }
6350 }
6351
6352 DBUG_PRINT("info", ("LSN: " LSN_FMT, LSN_IN_PARTS(*lsn)));
6353 DBUG_RETURN(rc);
6354}
6355
6356
6357/*
6358 Decode compressed (relative) LSN(s)
6359
6360 SYNOPSIS
6361 translog_relative_lsn_decode()
6362 base_lsn LSN for encoding
6363 src Decode LSN(s) from here
6364 dst Put decoded LSNs here
6365 lsns number of LSN(s)
6366
6367 RETURN
6368 position in sources after decoded LSN(s)
6369*/
6370
6371static uchar *translog_relative_LSN_decode(LSN base_lsn,
6372 uchar *src, uchar *dst, uint lsns)
6373{
6374 uint i;
6375 for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
6376 {
6377 src= translog_get_LSN_from_diff(base_lsn, src, dst);
6378 }
6379 return src;
6380}
6381
6382/**
6383 @brief Get header of fixed/pseudo length record and call hook for
6384 it processing
6385
6386 @param page Pointer to the buffer with page where LSN chunk is
6387 placed
6388 @param page_offset Offset of the first chunk in the page
6389 @param buff Buffer to be filled with header data
6390
6391 @return Length of header or operation status
6392 @retval # number of bytes in TRANSLOG_HEADER_BUFFER::header where
6393 stored decoded part of the header
6394*/
6395
6396static int translog_fixed_length_header(uchar *page,
6397 translog_size_t page_offset,
6398 TRANSLOG_HEADER_BUFFER *buff)
6399{
6400 struct st_log_record_type_descriptor *desc=
6401 log_record_type_descriptor + buff->type;
6402 uchar *src= page + page_offset + 3;
6403 uchar *dst= buff->header;
6404 uchar *start= src;
6405 int lsns= desc->compressed_LSN;
6406 uint length= desc->fixed_length;
6407 DBUG_ENTER("translog_fixed_length_header");
6408
6409 buff->record_length= length;
6410
6411 if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
6412 {
6413 DBUG_ASSERT(lsns > 0);
6414 src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
6415 lsns*= LSN_STORE_SIZE;
6416 dst+= lsns;
6417 length-= lsns;
6418 buff->compressed_LSN_economy= (lsns - (int) (src - start));
6419 }
6420 else
6421 buff->compressed_LSN_economy= 0;
6422
6423 memcpy(dst, src, length);
6424 buff->non_header_data_start_offset= (uint16) (page_offset +
6425 ((src + length) -
6426 (page + page_offset)));
6427 buff->non_header_data_len= 0;
6428 DBUG_RETURN(buff->record_length);
6429}
6430
6431
6432/*
6433 Free resources used by TRANSLOG_HEADER_BUFFER
6434
6435 SYNOPSIS
6436 translog_free_record_header();
6437*/
6438
6439void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
6440{
6441 DBUG_ENTER("translog_free_record_header");
6442 if (buff->groups_no != 0)
6443 {
6444 my_free(buff->groups);
6445 buff->groups_no= 0;
6446 }
6447 DBUG_VOID_RETURN;
6448}
6449
6450
6451/**
6452 @brief Returns the current horizon at the end of the current log
6453
6454 @return Horizon
6455 @retval LSN_ERROR error
6456 @retvar # Horizon
6457*/
6458
6459TRANSLOG_ADDRESS translog_get_horizon()
6460{
6461 TRANSLOG_ADDRESS res;
6462 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6463 translog_status == TRANSLOG_READONLY);
6464 translog_lock();
6465 res= log_descriptor.horizon;
6466 translog_unlock();
6467 return res;
6468}
6469
6470
6471/**
6472 @brief Returns the current horizon at the end of the current log, caller is
6473 assumed to already hold the lock
6474
6475 @return Horizon
6476 @retval LSN_ERROR error
6477 @retvar # Horizon
6478*/
6479
6480TRANSLOG_ADDRESS translog_get_horizon_no_lock()
6481{
6482 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6483 translog_status == TRANSLOG_READONLY);
6484 translog_lock_assert_owner();
6485 return log_descriptor.horizon;
6486}
6487
6488
6489/*
6490 Set last page in the scanner data structure
6491
6492 SYNOPSIS
6493 translog_scanner_set_last_page()
6494 scanner Information about current chunk during scanning
6495
6496 RETURN
6497 0 OK
6498 1 Error
6499*/
6500
6501static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
6502{
6503 my_bool page_ok;
6504 if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
6505 {
6506 /* It is last file => we can easy find last page address by horizon */
6507 uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
6508 scanner->last_file_page= (scanner->horizon -
6509 (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
6510 return (0);
6511 }
6512 scanner->last_file_page= scanner->page_addr;
6513 return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
6514}
6515
6516
6517/**
6518 @brief Get page from page cache according to requested method
6519
6520 @param scanner The scanner data
6521
6522 @return operation status
6523 @retval 0 OK
6524 @retval 1 Error
6525*/
6526
6527static my_bool
6528translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
6529{
6530 TRANSLOG_VALIDATOR_DATA data;
6531 DBUG_ENTER("translog_scanner_get_page");
6532 data.addr= &scanner->page_addr;
6533 data.was_recovered= 0;
6534 DBUG_RETURN((scanner->page=
6535 translog_get_page(&data, scanner->buffer,
6536 (scanner->use_direct_link ?
6537 &scanner->direct_link :
6538 NULL))) ==
6539 NULL);
6540}
6541
6542
6543/**
6544 @brief Initialize reader scanner.
6545
6546 @param lsn LSN with which it have to be inited
6547 @param fixed_horizon true if it is OK do not read records which was written
6548 after scanning beginning
6549 @param scanner scanner which have to be inited
6550 @param use_direct prefer using direct lings from page handler
6551 where it is possible.
6552
6553 @note If direct link was used translog_destroy_scanner should be
6554 called after it using
6555
6556 @return status of the operation
6557 @retval 0 OK
6558 @retval 1 Error
6559*/
6560
6561my_bool translog_scanner_init(LSN lsn,
6562 my_bool fixed_horizon,
6563 TRANSLOG_SCANNER_DATA *scanner,
6564 my_bool use_direct)
6565{
6566 DBUG_ENTER("translog_scanner_init");
6567 DBUG_PRINT("enter", ("Scanner: %p LSN: " LSN_FMT,
6568 scanner, LSN_IN_PARTS(lsn)));
6569 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6570 translog_status == TRANSLOG_READONLY);
6571
6572 scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
6573
6574 scanner->fixed_horizon= fixed_horizon;
6575 scanner->use_direct_link= use_direct;
6576 scanner->direct_link= NULL;
6577
6578 scanner->horizon= translog_get_horizon();
6579 DBUG_PRINT("info", ("horizon: " LSN_FMT, LSN_IN_PARTS(scanner->horizon)));
6580
6581 /* lsn < horizon */
6582 DBUG_ASSERT(lsn <= scanner->horizon);
6583
6584 scanner->page_addr= lsn;
6585 scanner->page_addr-= scanner->page_offset; /*decrease offset */
6586
6587 if (translog_scanner_set_last_page(scanner))
6588 DBUG_RETURN(1);
6589
6590 if (translog_scanner_get_page(scanner))
6591 DBUG_RETURN(1);
6592 DBUG_RETURN(0);
6593}
6594
6595
6596/**
6597 @brief Destroy scanner object;
6598
6599 @param scanner The scanner object to destroy
6600*/
6601
6602void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
6603{
6604 DBUG_ENTER("translog_destroy_scanner");
6605 DBUG_PRINT("enter", ("Scanner: %p", scanner));
6606 translog_free_link(scanner->direct_link);
6607 DBUG_VOID_RETURN;
6608}
6609
6610
6611/*
6612 Checks End of the Log
6613
6614 SYNOPSIS
6615 translog_scanner_eol()
6616 scanner Information about current chunk during scanning
6617
6618 RETURN
6619 1 End of the Log
6620 0 OK
6621*/
6622
6623static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
6624{
6625 DBUG_ENTER("translog_scanner_eol");
6626 DBUG_PRINT("enter",
6627 ("Horizon: " LSN_FMT " Current: (%u, 0x%x+0x%x=0x%x)",
6628 LSN_IN_PARTS(scanner->horizon),
6629 LSN_IN_PARTS(scanner->page_addr),
6630 (uint) scanner->page_offset,
6631 (uint) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
6632 if (scanner->horizon > (scanner->page_addr +
6633 scanner->page_offset))
6634 {
6635 DBUG_PRINT("info", ("Horizon is not reached"));
6636 DBUG_RETURN(0);
6637 }
6638 if (scanner->fixed_horizon)
6639 {
6640 DBUG_PRINT("info", ("Horizon is fixed and reached"));
6641 DBUG_RETURN(1);
6642 }
6643 scanner->horizon= translog_get_horizon();
6644 DBUG_PRINT("info",
6645 ("Horizon is re-read, EOL: %d",
6646 scanner->horizon <= (scanner->page_addr +
6647 scanner->page_offset)));
6648 DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
6649 scanner->page_offset));
6650}
6651
6652
6653/**
6654 @brief Cheks End of the Page
6655
6656 @param scanner Information about current chunk during scanning
6657
6658 @retval 1 End of the Page
6659 @retval 0 OK
6660*/
6661
6662static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
6663{
6664 DBUG_ENTER("translog_scanner_eop");
6665 DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
6666 scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
6667}
6668
6669
6670/**
6671 @brief Checks End of the File (i.e. we are scanning last page, which do not
6672 mean end of this page)
6673
6674 @param scanner Information about current chunk during scanning
6675
6676 @retval 1 End of the File
6677 @retval 0 OK
6678*/
6679
6680static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
6681{
6682 DBUG_ENTER("translog_scanner_eof");
6683 DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
6684 LSN_FILE_NO(scanner->last_file_page));
6685 DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
6686 "normal EOF: %d",
6687 (ulong) LSN_OFFSET(scanner->page_addr),
6688 (ulong) LSN_OFFSET(scanner->last_file_page),
6689 LSN_OFFSET(scanner->page_addr) ==
6690 LSN_OFFSET(scanner->last_file_page)));
6691 /*
6692 TODO: detect damaged file EOF,
6693 TODO: issue warning if damaged file EOF detected
6694 */
6695 DBUG_RETURN(scanner->page_addr ==
6696 scanner->last_file_page);
6697}
6698
6699/*
6700 Move scanner to the next chunk
6701
6702 SYNOPSIS
6703 translog_get_next_chunk()
6704 scanner Information about current chunk during scanning
6705
6706 RETURN
6707 0 OK
6708 1 Error
6709*/
6710
6711static my_bool
6712translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
6713{
6714 uint16 len;
6715 DBUG_ENTER("translog_get_next_chunk");
6716
6717 if (translog_scanner_eop(scanner))
6718 len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
6719 else if ((len= translog_get_total_chunk_length(scanner->page,
6720 scanner->page_offset)) == 0)
6721 DBUG_RETURN(1);
6722 scanner->page_offset+= len;
6723
6724 if (translog_scanner_eol(scanner))
6725 {
6726 scanner->page= END_OF_LOG;
6727 scanner->page_offset= 0;
6728 DBUG_RETURN(0);
6729 }
6730 if (translog_scanner_eop(scanner))
6731 {
6732 /* before reading next page we should unpin current one if it was pinned */
6733 translog_free_link(scanner->direct_link);
6734 if (translog_scanner_eof(scanner))
6735 {
6736 DBUG_PRINT("info", ("horizon: " LSN_FMT " pageaddr: " LSN_FMT,
6737 LSN_IN_PARTS(scanner->horizon),
6738 LSN_IN_PARTS(scanner->page_addr)));
6739 /* if it is log end it have to be caught before */
6740 DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
6741 LSN_FILE_NO(scanner->page_addr));
6742 scanner->page_addr+= LSN_ONE_FILE;
6743 scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
6744 TRANSLOG_PAGE_SIZE);
6745 if (translog_scanner_set_last_page(scanner))
6746 DBUG_RETURN(1);
6747 }
6748 else
6749 {
6750 scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
6751 }
6752
6753 if (translog_scanner_get_page(scanner))
6754 DBUG_RETURN(1);
6755
6756 scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
6757 if (translog_scanner_eol(scanner))
6758 {
6759 scanner->page= END_OF_LOG;
6760 scanner->page_offset= 0;
6761 DBUG_RETURN(0);
6762 }
6763 DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6764 }
6765 DBUG_RETURN(0);
6766}
6767
6768
6769/**
6770 @brief Get header of variable length record and call hook for it processing
6771
6772 @param page Pointer to the buffer with page where LSN chunk is
6773 placed
6774 @param page_offset Offset of the first chunk in the page
6775 @param buff Buffer to be filled with header data
6776 @param scanner If present should be moved to the header page if
6777 it differ from LSN page
6778
6779 @return Length of header or operation status
6780 @retval RECHEADER_READ_ERROR error
6781 @retval RECHEADER_READ_EOF End of the log reached during the read
6782 @retval # number of bytes in
6783 TRANSLOG_HEADER_BUFFER::header where
6784 stored decoded part of the header
6785*/
6786
6787static int
6788translog_variable_length_header(uchar *page, translog_size_t page_offset,
6789 TRANSLOG_HEADER_BUFFER *buff,
6790 TRANSLOG_SCANNER_DATA *scanner)
6791{
6792 struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
6793 buff->type);
6794 uchar *src= page + page_offset + 1 + 2;
6795 uchar *dst= buff->header;
6796 LSN base_lsn;
6797 uint lsns= desc->compressed_LSN;
6798 uint16 chunk_len;
6799 uint16 length= desc->read_header_len;
6800 uint16 buffer_length= length;
6801 uint16 body_len;
6802 int rc;
6803 TRANSLOG_SCANNER_DATA internal_scanner;
6804 DBUG_ENTER("translog_variable_length_header");
6805
6806 buff->record_length= translog_variable_record_1group_decode_len(&src);
6807 chunk_len= uint2korr(src);
6808 DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
6809 (ulong) buff->record_length, (uint) chunk_len,
6810 (uint) length, (uint) buffer_length));
6811 if (chunk_len == 0)
6812 {
6813 uint16 page_rest;
6814 DBUG_PRINT("info", ("1 group"));
6815 src+= 2;
6816 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6817
6818 base_lsn= buff->lsn;
6819 body_len= MY_MIN(page_rest, buff->record_length);
6820 }
6821 else
6822 {
6823 uint grp_no, curr;
6824 uint header_to_skip;
6825 uint16 page_rest;
6826
6827 DBUG_PRINT("info", ("multi-group"));
6828 grp_no= buff->groups_no= uint2korr(src + 2);
6829 if (!(buff->groups=
6830 (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
6831 MYF(0))))
6832 DBUG_RETURN(RECHEADER_READ_ERROR);
6833 DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
6834 src+= (2 + 2);
6835 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6836 curr= 0;
6837 header_to_skip= (uint) (src - (page + page_offset));
6838 buff->chunk0_pages= 0;
6839
6840 for (;;)
6841 {
6842 uint i, read_length= grp_no;
6843
6844 buff->chunk0_pages++;
6845 if (page_rest < grp_no * (7 + 1))
6846 read_length= page_rest / (7 + 1);
6847 DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
6848 "start from: %u",
6849 buff->chunk0_pages, read_length, grp_no, curr));
6850 for (i= 0; i < read_length; i++, curr++)
6851 {
6852 DBUG_ASSERT(curr < buff->groups_no);
6853 buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
6854 buff->groups[curr].num= src[i * (7 + 1) + 7];
6855 DBUG_PRINT("info", ("group #%u " LSN_FMT " chunks: %u",
6856 curr,
6857 LSN_IN_PARTS(buff->groups[curr].addr),
6858 (uint) buff->groups[curr].num));
6859 }
6860 grp_no-= read_length;
6861 if (grp_no == 0)
6862 {
6863 if (scanner)
6864 {
6865 buff->chunk0_data_addr= scanner->page_addr;
6866 /* offset increased */
6867 buff->chunk0_data_addr+= (page_offset + header_to_skip +
6868 read_length * (7 + 1));
6869 }
6870 else
6871 {
6872 buff->chunk0_data_addr= buff->lsn;
6873 /* offset increased */
6874 buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
6875 }
6876 buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
6877 DBUG_PRINT("info", ("Data address: " LSN_FMT " len: %u",
6878 LSN_IN_PARTS(buff->chunk0_data_addr),
6879 buff->chunk0_data_len));
6880 break;
6881 }
6882 if (scanner == NULL)
6883 {
6884 DBUG_PRINT("info", ("use internal scanner for header reading"));
6885 scanner= &internal_scanner;
6886 if (translog_scanner_init(buff->lsn, 1, scanner, 0))
6887 {
6888 rc= RECHEADER_READ_ERROR;
6889 goto exit_and_free;
6890 }
6891 }
6892 if (translog_get_next_chunk(scanner))
6893 {
6894 if (scanner == &internal_scanner)
6895 translog_destroy_scanner(scanner);
6896 rc= RECHEADER_READ_ERROR;
6897 goto exit_and_free;
6898 }
6899 if (scanner->page == END_OF_LOG)
6900 {
6901 if (scanner == &internal_scanner)
6902 translog_destroy_scanner(scanner);
6903 rc= RECHEADER_READ_EOF;
6904 goto exit_and_free;
6905 }
6906 page= scanner->page;
6907 page_offset= scanner->page_offset;
6908 src= page + page_offset + header_to_skip;
6909 chunk_len= uint2korr(src - 2 - 2);
6910 DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
6911 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6912 }
6913
6914 if (scanner == NULL)
6915 {
6916 DBUG_PRINT("info", ("use internal scanner"));
6917 scanner= &internal_scanner;
6918 }
6919 else
6920 {
6921 translog_destroy_scanner(scanner);
6922 }
6923 base_lsn= buff->groups[0].addr;
6924 translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
6925 /* first group chunk is always chunk type 2 */
6926 page= scanner->page;
6927 page_offset= scanner->page_offset;
6928 src= page + page_offset + 1;
6929 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6930 body_len= page_rest;
6931 if (scanner == &internal_scanner)
6932 translog_destroy_scanner(scanner);
6933 }
6934 if (lsns)
6935 {
6936 uchar *start= src;
6937 src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
6938 lsns*= LSN_STORE_SIZE;
6939 dst+= lsns;
6940 length-= lsns;
6941 buff->record_length+= (buff->compressed_LSN_economy=
6942 (int) (lsns - (src - start)));
6943 DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu",
6944 lsns / LSN_STORE_SIZE, (uint) length,
6945 (int) buff->compressed_LSN_economy,
6946 (ulong) buff->record_length));
6947 body_len-= (uint16) (src - start);
6948 }
6949 else
6950 buff->compressed_LSN_economy= 0;
6951
6952 DBUG_ASSERT(body_len >= length);
6953 body_len-= length;
6954 memcpy(dst, src, length);
6955 buff->non_header_data_start_offset= (uint16) (src + length - page);
6956 buff->non_header_data_len= body_len;
6957 DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
6958 buff->non_header_data_start_offset,
6959 buff->non_header_data_len, buffer_length));
6960 DBUG_RETURN(buffer_length);
6961
6962exit_and_free:
6963 my_free(buff->groups);
6964 buff->groups_no= 0; /* prevent try to use of buff->groups */
6965 DBUG_RETURN(rc);
6966}
6967
6968
6969/**
6970 @brief Read record header from the given buffer
6971
6972 @param page page content buffer
6973 @param page_offset offset of the chunk in the page
6974 @param buff destination buffer
6975 @param scanner If this is set the scanner will be moved to the
6976 record header page (differ from LSN page in case of
6977 multi-group records)
6978
6979 @return Length of header or operation status
6980 @retval RECHEADER_READ_ERROR error
6981 @retval # number of bytes in
6982 TRANSLOG_HEADER_BUFFER::header where
6983 stored decoded part of the header
6984*/
6985
6986int translog_read_record_header_from_buffer(uchar *page,
6987 uint16 page_offset,
6988 TRANSLOG_HEADER_BUFFER *buff,
6989 TRANSLOG_SCANNER_DATA *scanner)
6990{
6991 translog_size_t res;
6992 DBUG_ENTER("translog_read_record_header_from_buffer");
6993 DBUG_PRINT("info", ("page byte: 0x%x offset: %u",
6994 (uint) page[page_offset], (uint) page_offset));
6995 DBUG_ASSERT(translog_is_LSN_chunk(page[page_offset]));
6996 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6997 translog_status == TRANSLOG_READONLY);
6998 buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
6999 buff->short_trid= uint2korr(page + page_offset + 1);
7000 DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN " LSN_FMT,
7001 (uint) buff->type, (uint)buff->short_trid,
7002 LSN_IN_PARTS(buff->lsn)));
7003 /* Read required bytes from the header and call hook */
7004 switch (log_record_type_descriptor[buff->type].rclass) {
7005 case LOGRECTYPE_VARIABLE_LENGTH:
7006 res= translog_variable_length_header(page, page_offset, buff,
7007 scanner);
7008 break;
7009 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
7010 case LOGRECTYPE_FIXEDLENGTH:
7011 res= translog_fixed_length_header(page, page_offset, buff);
7012 break;
7013 default:
7014 DBUG_ASSERT(0); /* we read some junk (got no LSN) */
7015 res= RECHEADER_READ_ERROR;
7016 }
7017 DBUG_RETURN(res);
7018}
7019
7020
7021/**
7022 @brief Read record header and some fixed part of a record (the part depend
7023 on record type).
7024
7025 @param lsn log record serial number (address of the record)
7026 @param buff log record header buffer
7027
7028 @note Some type of record can be read completely by this call
7029 @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7030 LSN can be translated to absolute one), some fields can be added (like
7031 actual header length in the record if the header has variable length)
7032
7033 @return Length of header or operation status
7034 @retval RECHEADER_READ_ERROR error
7035 @retval # number of bytes in
7036 TRANSLOG_HEADER_BUFFER::header where
7037 stored decoded part of the header
7038*/
7039
7040int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
7041{
7042 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
7043 uchar *page;
7044 translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
7045 PAGECACHE_BLOCK_LINK *direct_link;
7046 TRANSLOG_ADDRESS addr;
7047 TRANSLOG_VALIDATOR_DATA data;
7048 DBUG_ENTER("translog_read_record_header");
7049 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7050 DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
7051 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7052 translog_status == TRANSLOG_READONLY);
7053
7054 buff->lsn= lsn;
7055 buff->groups_no= 0;
7056 data.addr= &addr;
7057 data.was_recovered= 0;
7058 addr= lsn;
7059 addr-= page_offset; /* offset decreasing */
7060 res= (!(page= translog_get_page(&data, psize_buff.buffer, &direct_link))) ?
7061 RECHEADER_READ_ERROR :
7062 translog_read_record_header_from_buffer(page, page_offset, buff, 0);
7063 translog_free_link(direct_link);
7064 DBUG_RETURN(res);
7065}
7066
7067
7068/**
7069 @brief Read record header and some fixed part of a record (the part depend
7070 on record type).
7071
7072 @param scan scanner position to read
7073 @param buff log record header buffer
7074 @param move_scanner request to move scanner to the header position
7075
7076 @note Some type of record can be read completely by this call
7077 @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7078 LSN can be translated to absolute one), some fields can be added (like
7079 actual header length in the record if the header has variable length)
7080
7081 @return Length of header or operation status
7082 @retval RECHEADER_READ_ERROR error
7083 @retval # number of bytes in
7084 TRANSLOG_HEADER_BUFFER::header where stored
7085 decoded part of the header
7086*/
7087
7088int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
7089 TRANSLOG_HEADER_BUFFER *buff,
7090 my_bool move_scanner)
7091{
7092 translog_size_t res;
7093 DBUG_ENTER("translog_read_record_header_scan");
7094 DBUG_PRINT("enter", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7095 "Lst: " LSN_FMT " Offset: %u(%x) fixed %d",
7096 LSN_IN_PARTS(scanner->page_addr),
7097 LSN_IN_PARTS(scanner->horizon),
7098 LSN_IN_PARTS(scanner->last_file_page),
7099 (uint) scanner->page_offset,
7100 (uint) scanner->page_offset, scanner->fixed_horizon));
7101 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7102 translog_status == TRANSLOG_READONLY);
7103 buff->groups_no= 0;
7104 buff->lsn= scanner->page_addr;
7105 buff->lsn+= scanner->page_offset; /* offset increasing */
7106 res= translog_read_record_header_from_buffer(scanner->page,
7107 scanner->page_offset,
7108 buff,
7109 (move_scanner ?
7110 scanner : 0));
7111 DBUG_RETURN(res);
7112}
7113
7114
7115/**
7116 @brief Read record header and some fixed part of the next record (the part
7117 depend on record type).
7118
7119 @param scanner data for scanning if lsn is NULL scanner data
7120 will be used for continue scanning.
7121 The scanner can be NULL.
7122
7123 @param buff log record header buffer
7124
7125 @return Length of header or operation status
7126 @retval RECHEADER_READ_ERROR error
7127 @retval RECHEADER_READ_EOF EOF
7128 @retval # number of bytes in
7129 TRANSLOG_HEADER_BUFFER::header where
7130 stored decoded part of the header
7131*/
7132
7133int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
7134 TRANSLOG_HEADER_BUFFER *buff)
7135{
7136 translog_size_t res;
7137
7138 DBUG_ENTER("translog_read_next_record_header");
7139 buff->groups_no= 0; /* to be sure that we will free it right */
7140 DBUG_PRINT("enter", ("scanner: %p", scanner));
7141 DBUG_PRINT("info", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7142 "Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
7143 LSN_IN_PARTS(scanner->page_addr),
7144 LSN_IN_PARTS(scanner->horizon),
7145 LSN_IN_PARTS(scanner->last_file_page),
7146 (uint) scanner->page_offset,
7147 (uint) scanner->page_offset, scanner->fixed_horizon));
7148 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7149 translog_status == TRANSLOG_READONLY);
7150
7151 do
7152 {
7153 if (translog_get_next_chunk(scanner))
7154 DBUG_RETURN(RECHEADER_READ_ERROR);
7155 if (scanner->page == END_OF_LOG)
7156 {
7157 DBUG_PRINT("info", ("End of file from the scanner"));
7158 /* Last record was read */
7159 buff->lsn= LSN_IMPOSSIBLE;
7160 DBUG_RETURN(RECHEADER_READ_EOF);
7161 }
7162 DBUG_PRINT("info", ("Page: " LSN_FMT " offset: %lu byte: %x",
7163 LSN_IN_PARTS(scanner->page_addr),
7164 (ulong) scanner->page_offset,
7165 (uint) scanner->page[scanner->page_offset]));
7166 } while (!translog_is_LSN_chunk(scanner->page[scanner->page_offset]) &&
7167 scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
7168
7169 if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
7170 {
7171 DBUG_PRINT("info", ("End of file"));
7172 /* Last record was read */
7173 buff->lsn= LSN_IMPOSSIBLE;
7174 /* Return 'end of log' marker */
7175 res= RECHEADER_READ_EOF;
7176 }
7177 else
7178 res= translog_read_record_header_scan(scanner, buff, 0);
7179 DBUG_RETURN(res);
7180}
7181
7182
7183/*
7184 Moves record data reader to the next chunk and fill the data reader
7185 information about that chunk.
7186
7187 SYNOPSIS
7188 translog_record_read_next_chunk()
7189 data data cursor
7190
7191 RETURN
7192 0 OK
7193 1 Error
7194*/
7195
7196static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
7197{
7198 translog_size_t new_current_offset= data->current_offset + data->chunk_size;
7199 uint16 chunk_header_len, chunk_len;
7200 uint8 type;
7201 DBUG_ENTER("translog_record_read_next_chunk");
7202
7203 if (data->eor)
7204 {
7205 DBUG_PRINT("info", ("end of the record flag set"));
7206 DBUG_RETURN(1);
7207 }
7208
7209 if (data->header.groups_no &&
7210 data->header.groups_no - 1 != data->current_group &&
7211 data->header.groups[data->current_group].num == data->current_chunk)
7212 {
7213 /* Goto next group */
7214 data->current_group++;
7215 data->current_chunk= 0;
7216 DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
7217 translog_destroy_scanner(&data->scanner);
7218 translog_scanner_init(data->header.groups[data->current_group].addr,
7219 1, &data->scanner, 1);
7220 }
7221 else
7222 {
7223 data->current_chunk++;
7224 if (translog_get_next_chunk(&data->scanner))
7225 DBUG_RETURN(1);
7226 if (data->scanner.page == END_OF_LOG)
7227 {
7228 /*
7229 Actually it should not happened, but we want to quit nicely in case
7230 of a truncated log
7231 */
7232 DBUG_RETURN(1);
7233 }
7234 }
7235 type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
7236
7237 if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
7238 {
7239 DBUG_PRINT("info",
7240 ("Last chunk: data len: %u offset: %u group: %u of %u",
7241 data->header.chunk0_data_len, data->scanner.page_offset,
7242 data->current_group, data->header.groups_no - 1));
7243 DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
7244 DBUG_ASSERT(data->header.lsn ==
7245 data->scanner.page_addr + data->scanner.page_offset);
7246 translog_destroy_scanner(&data->scanner);
7247 translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
7248 data->chunk_size= data->header.chunk0_data_len;
7249 data->body_offset= data->scanner.page_offset;
7250 data->current_offset= new_current_offset;
7251 data->eor= 1;
7252 DBUG_RETURN(0);
7253 }
7254
7255 if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
7256 {
7257 data->eor= 1;
7258 DBUG_RETURN(1); /* End of record */
7259 }
7260
7261 chunk_header_len=
7262 translog_get_chunk_header_length(data->scanner.page +
7263 data->scanner.page_offset);
7264 chunk_len= translog_get_total_chunk_length(data->scanner.page,
7265 data->scanner.page_offset);
7266 data->chunk_size= chunk_len - chunk_header_len;
7267 data->body_offset= data->scanner.page_offset + chunk_header_len;
7268 data->current_offset= new_current_offset;
7269 DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
7270 "current_offset: %lu",
7271 (uint) data->current_group,
7272 (uint) data->current_chunk,
7273 (uint) data->body_offset,
7274 (uint) data->chunk_size, (ulong) data->current_offset));
7275 DBUG_RETURN(0);
7276}
7277
7278
7279/*
7280 Initialize record reader data from LSN
7281
7282 SYNOPSIS
7283 translog_init_reader_data()
7284 lsn reference to LSN we should start from
7285 data reader data to initialize
7286
7287 RETURN
7288 0 OK
7289 1 Error
7290*/
7291
7292static my_bool translog_init_reader_data(LSN lsn,
7293 TRANSLOG_READER_DATA *data)
7294{
7295 int read_header;
7296 DBUG_ENTER("translog_init_reader_data");
7297 if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
7298 ((read_header=
7299 translog_read_record_header_scan(&data->scanner, &data->header, 1))
7300 == RECHEADER_READ_ERROR))
7301 DBUG_RETURN(1);
7302 data->read_header= read_header;
7303 data->body_offset= data->header.non_header_data_start_offset;
7304 data->chunk_size= data->header.non_header_data_len;
7305 data->current_offset= data->read_header;
7306 data->current_group= 0;
7307 data->current_chunk= 0;
7308 data->eor= 0;
7309 DBUG_PRINT("info", ("read_header: %u "
7310 "body_offset: %u chunk_size: %u current_offset: %lu",
7311 (uint) data->read_header,
7312 (uint) data->body_offset,
7313 (uint) data->chunk_size, (ulong) data->current_offset));
7314 DBUG_RETURN(0);
7315}
7316
7317
7318/**
7319 @brief Destroy reader data object
7320*/
7321
7322static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
7323{
7324 translog_destroy_scanner(&data->scanner);
7325 translog_free_record_header(&data->header);
7326}
7327
7328
7329/*
7330 Read a part of the record.
7331
7332 SYNOPSIS
7333 translog_read_record_header()
7334 lsn log record serial number (address of the record)
7335 offset From the beginning of the record beginning (read
7336 by translog_read_record_header).
7337 length Length of record part which have to be read.
7338 buffer Buffer where to read the record part (have to be at
7339 least 'length' bytes length)
7340
7341 RETURN
7342 length of data actually read
7343*/
7344
7345translog_size_t translog_read_record(LSN lsn,
7346 translog_size_t offset,
7347 translog_size_t length,
7348 uchar *buffer,
7349 TRANSLOG_READER_DATA *data)
7350{
7351 translog_size_t requested_length= length;
7352 translog_size_t end= offset + length;
7353 TRANSLOG_READER_DATA internal_data;
7354 DBUG_ENTER("translog_read_record");
7355 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7356 translog_status == TRANSLOG_READONLY);
7357
7358 if (data == NULL)
7359 {
7360 DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
7361 data= &internal_data;
7362 }
7363 if (lsn ||
7364 (offset < data->current_offset &&
7365 !(offset < data->read_header && offset + length < data->read_header)))
7366 {
7367 if (translog_init_reader_data(lsn, data))
7368 DBUG_RETURN(0);
7369 }
7370 DBUG_PRINT("info", ("Offset: %lu length: %lu "
7371 "Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7372 "Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
7373 (ulong) offset, (ulong) length,
7374 LSN_IN_PARTS(data->scanner.page_addr),
7375 LSN_IN_PARTS(data->scanner.horizon),
7376 LSN_IN_PARTS(data->scanner.last_file_page),
7377 (uint) data->scanner.page_offset,
7378 (uint) data->scanner.page_offset,
7379 data->scanner.fixed_horizon));
7380 if (offset < data->read_header)
7381 {
7382 uint16 len= MY_MIN(data->read_header, end) - offset;
7383 DBUG_PRINT("info",
7384 ("enter header offset: %lu length: %lu",
7385 (ulong) offset, (ulong) length));
7386 memcpy(buffer, data->header.header + offset, len);
7387 length-= len;
7388 if (length == 0)
7389 {
7390 translog_destroy_reader_data(data);
7391 DBUG_RETURN(requested_length);
7392 }
7393 offset+= len;
7394 buffer+= len;
7395 DBUG_PRINT("info",
7396 ("len: %u offset: %lu curr: %lu length: %lu",
7397 len, (ulong) offset, (ulong) data->current_offset,
7398 (ulong) length));
7399 }
7400 /* TODO: find first page which we should read by offset */
7401
7402 /* read the record chunk by chunk */
7403 for(;;)
7404 {
7405 uint page_end= data->current_offset + data->chunk_size;
7406 DBUG_PRINT("info",
7407 ("enter body offset: %lu curr: %lu "
7408 "length: %lu page_end: %lu",
7409 (ulong) offset, (ulong) data->current_offset, (ulong) length,
7410 (ulong) page_end));
7411 if (offset < page_end)
7412 {
7413 uint len= page_end - offset;
7414 set_if_smaller(len, length); /* in case we read beyond record's end */
7415 DBUG_ASSERT(offset >= data->current_offset);
7416 memcpy(buffer,
7417 data->scanner.page + data->body_offset +
7418 (offset - data->current_offset), len);
7419 length-= len;
7420 if (length == 0)
7421 {
7422 translog_destroy_reader_data(data);
7423 DBUG_RETURN(requested_length);
7424 }
7425 offset+= len;
7426 buffer+= len;
7427 DBUG_PRINT("info",
7428 ("len: %u offset: %lu curr: %lu length: %lu",
7429 len, (ulong) offset, (ulong) data->current_offset,
7430 (ulong) length));
7431 }
7432 if (translog_record_read_next_chunk(data))
7433 {
7434 translog_destroy_reader_data(data);
7435 DBUG_RETURN(requested_length - length);
7436 }
7437 }
7438}
7439
7440
7441/*
7442 @brief Force skipping to the next buffer
7443
7444 @todo Do not copy old page content if all page protections are switched off
7445 (because we do not need calculate something or change old parts of the page)
7446*/
7447
7448static void translog_force_current_buffer_to_finish()
7449{
7450 TRANSLOG_ADDRESS new_buff_beginning;
7451 uint16 old_buffer_no= log_descriptor.bc.buffer_no;
7452 uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7453 struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
7454 new_buffer_no);
7455 struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
7456 uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
7457 uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
7458 uint16 UNINIT_VAR(current_page_fill), write_counter, previous_offset;
7459 DBUG_ENTER("translog_force_current_buffer_to_finish");
7460
7461 DBUG_PRINT("enter", ("Buffer #%u %p "
7462 "Buffer addr: " LSN_FMT " "
7463 "Page addr: " LSN_FMT " "
7464 "size: %lu (%lu) Pg: %u left: %u in progress %u",
7465 (uint) old_buffer_no,
7466 old_buffer,
7467 LSN_IN_PARTS(old_buffer->offset),
7468 LSN_FILE_NO(log_descriptor.horizon),
7469 (uint)(LSN_OFFSET(log_descriptor.horizon) -
7470 log_descriptor.bc.current_page_fill),
7471 (ulong) old_buffer->size,
7472 (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
7473 buffer->buffer),
7474 (uint) log_descriptor.bc.current_page_fill,
7475 (uint) left,
7476 (uint) old_buffer->
7477 copy_to_buffer_in_progress));
7478 translog_lock_assert_owner();
7479 new_buff_beginning= old_buffer->offset;
7480 new_buff_beginning+= old_buffer->size; /* increase offset */
7481
7482 DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
7483 DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
7484 LSN_FILE_NO(old_buffer->offset));
7485 translog_check_cursor(&log_descriptor.bc);
7486 DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
7487 if (left)
7488 {
7489 /*
7490 TODO: if 'left' is so small that can't hold any other record
7491 then do not move the page
7492 */
7493 DBUG_PRINT("info", ("left: %u", (uint) left));
7494
7495 old_buffer->pre_force_close_horizon=
7496 old_buffer->offset + old_buffer->size;
7497 /* decrease offset */
7498 new_buff_beginning-= log_descriptor.bc.current_page_fill;
7499 current_page_fill= log_descriptor.bc.current_page_fill;
7500
7501 memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
7502 old_buffer->size+= left;
7503 DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
7504 "Size: %lu",
7505 (uint) old_buffer->buffer_no,
7506 old_buffer,
7507 (ulong) old_buffer->size));
7508 DBUG_ASSERT(old_buffer->buffer_no ==
7509 log_descriptor.bc.buffer_no);
7510 }
7511 else
7512 {
7513 log_descriptor.bc.current_page_fill= 0;
7514 }
7515
7516 translog_buffer_lock(new_buffer);
7517#ifndef DBUG_OFF
7518 {
7519 TRANSLOG_ADDRESS offset= new_buffer->offset;
7520 TRANSLOG_FILE *file= new_buffer->file;
7521 uint8 ver= new_buffer->ver;
7522 translog_lock_assert_owner();
7523#endif
7524 translog_wait_for_buffer_free(new_buffer);
7525#ifndef DBUG_OFF
7526 /* We keep the handler locked so nobody can start this new buffer */
7527 DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
7528 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
7529 }
7530#endif
7531
7532 write_counter= log_descriptor.bc.write_counter;
7533 previous_offset= log_descriptor.bc.previous_offset;
7534 translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
7535 /* Fix buffer offset (which was incorrectly set to horizon) */
7536 log_descriptor.bc.buffer->offset= new_buff_beginning;
7537 log_descriptor.bc.write_counter= write_counter;
7538 log_descriptor.bc.previous_offset= previous_offset;
7539 new_buffer->prev_last_lsn= BUFFER_MAX_LSN(old_buffer);
7540 DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer: %p",
7541 LSN_IN_PARTS(new_buffer->prev_last_lsn),
7542 new_buffer));
7543
7544 /*
7545 Advances this log pointer, increases writers and let other threads to
7546 write to the log while we process old page content
7547 */
7548 if (left)
7549 {
7550 log_descriptor.bc.ptr+= current_page_fill;
7551 log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
7552 current_page_fill;
7553 new_buffer->overlay= 1;
7554 }
7555 else
7556 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
7557 translog_buffer_increase_writers(new_buffer);
7558 translog_buffer_unlock(new_buffer);
7559
7560 /*
7561 We have to wait until all writers finish before start changing the
7562 pages by applying protection and copying the page content in the
7563 new buffer.
7564 */
7565#ifndef DBUG_OFF
7566 {
7567 TRANSLOG_ADDRESS offset= old_buffer->offset;
7568 TRANSLOG_FILE *file= old_buffer->file;
7569 uint8 ver= old_buffer->ver;
7570#endif
7571 /*
7572 Now only one thread can flush log (buffer can flush many threads but
7573 log flush log flush where this function is used can do only one thread)
7574 so no other thread can set is_closing_buffer.
7575 */
7576 DBUG_ASSERT(!old_buffer->is_closing_buffer);
7577 old_buffer->is_closing_buffer= 1; /* Other flushes will wait */
7578 DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer set",
7579 (uint) old_buffer->buffer_no, old_buffer));
7580 translog_wait_for_writers(old_buffer);
7581#ifndef DBUG_OFF
7582 /* We blocked flushing this buffer so the buffer should not changed */
7583 DBUG_ASSERT(offset == old_buffer->offset && file == old_buffer->file &&
7584 ver == old_buffer->ver);
7585 }
7586#endif
7587
7588 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
7589 {
7590 translog_put_sector_protection(data, &log_descriptor.bc);
7591 if (left)
7592 {
7593 log_descriptor.bc.write_counter++;
7594 log_descriptor.bc.previous_offset= current_page_fill;
7595 }
7596 else
7597 {
7598 DBUG_PRINT("info", ("drop write_counter"));
7599 log_descriptor.bc.write_counter= 0;
7600 log_descriptor.bc.previous_offset= 0;
7601 }
7602 }
7603
7604 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
7605 {
7606 uint32 crc= translog_crc(data + log_descriptor.page_overhead,
7607 TRANSLOG_PAGE_SIZE -
7608 log_descriptor.page_overhead);
7609 DBUG_PRINT("info", ("CRC: 0x%x", crc));
7610 int4store(data + 3 + 3 + 1, crc);
7611 }
7612 old_buffer->is_closing_buffer= 0;
7613 DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer cleared",
7614 (uint) old_buffer->buffer_no, old_buffer));
7615 mysql_cond_broadcast(&old_buffer->waiting_filling_buffer);
7616
7617 if (left)
7618 {
7619 if (log_descriptor.flags &
7620 (TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION))
7621 memcpy(new_buffer->buffer, data, current_page_fill);
7622 else
7623 {
7624 /*
7625 This page header does not change if we add more data to the page so
7626 we can not copy it and will not overwrite later
7627 */
7628 new_buffer->skipped_data= current_page_fill;
7629 TRASH_ALLOC(new_buffer->buffer, current_page_fill);
7630 DBUG_ASSERT(new_buffer->skipped_data < TRANSLOG_PAGE_SIZE);
7631 }
7632 }
7633 old_buffer->next_buffer_offset= new_buffer->offset;
7634 translog_buffer_lock(new_buffer);
7635 new_buffer->prev_buffer_offset= old_buffer->offset;
7636 translog_buffer_decrease_writers(new_buffer);
7637 translog_buffer_unlock(new_buffer);
7638
7639 DBUG_VOID_RETURN;
7640}
7641
7642
7643/**
7644 @brief Waits while given lsn will be flushed
7645
7646 @param lsn log record serial number up to which (inclusive)
7647 the log has to be flushed
7648*/
7649
7650void translog_flush_wait_for_end(LSN lsn)
7651{
7652 DBUG_ENTER("translog_flush_wait_for_end");
7653 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7654 mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7655 while (cmp_translog_addr(log_descriptor.flushed, lsn) < 0)
7656 mysql_cond_wait(&log_descriptor.log_flush_cond,
7657 &log_descriptor.log_flush_lock);
7658 DBUG_VOID_RETURN;
7659}
7660
7661
7662/**
7663 @brief Sets goal for the next flush pass and waits for this pass end.
7664
7665 @param lsn log record serial number up to which (inclusive)
7666 the log has to be flushed
7667*/
7668
7669void translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)
7670{
7671 int flush_no= log_descriptor.flush_no;
7672 DBUG_ENTER("translog_flush_set_new_goal_and_wait");
7673 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7674 mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7675 if (cmp_translog_addr(lsn, log_descriptor.next_pass_max_lsn) > 0)
7676 {
7677 log_descriptor.next_pass_max_lsn= lsn;
7678 log_descriptor.max_lsn_requester= pthread_self();
7679 mysql_cond_broadcast(&log_descriptor.new_goal_cond);
7680 }
7681 while (flush_no == log_descriptor.flush_no)
7682 {
7683 mysql_cond_wait(&log_descriptor.log_flush_cond,
7684 &log_descriptor.log_flush_lock);
7685 }
7686 DBUG_VOID_RETURN;
7687}
7688
7689
7690/**
7691 @brief sync() range of files (inclusive) and directory (by request)
7692
7693 @param min min internal file number to flush
7694 @param max max internal file number to flush
7695 @param sync_dir need sync directory
7696
7697 return Operation status
7698 @retval 0 OK
7699 @retval 1 Error
7700*/
7701
7702static my_bool translog_sync_files(uint32 min, uint32 max,
7703 my_bool sync_dir)
7704{
7705 uint fn;
7706 my_bool rc= 0;
7707 ulonglong flush_interval;
7708 DBUG_ENTER("translog_sync_files");
7709 DBUG_PRINT("info", ("min: %lu max: %lu sync dir: %d",
7710 (ulong) min, (ulong) max, (int) sync_dir));
7711 DBUG_ASSERT(min <= max);
7712
7713 flush_interval= group_commit_wait;
7714 if (flush_interval)
7715 flush_start= microsecond_interval_timer();
7716 for (fn= min; fn <= max; fn++)
7717 {
7718 TRANSLOG_FILE *file= get_logfile_by_number(fn);
7719 DBUG_ASSERT(file != NULL);
7720 if (!file->is_sync)
7721 {
7722 if (mysql_file_sync(file->handler.file, MYF(MY_WME)))
7723 {
7724 rc= 1;
7725 translog_stop_writing();
7726 DBUG_RETURN(rc);
7727 }
7728 translog_syncs++;
7729 file->is_sync= 1;
7730 }
7731 }
7732
7733 if (sync_dir)
7734 {
7735 if (!(rc= sync_dir(log_descriptor.directory_fd,
7736 MYF(MY_WME | MY_IGNORE_BADFD))))
7737 translog_syncs++;
7738 }
7739
7740 DBUG_RETURN(rc);
7741}
7742
7743
7744/*
7745 @brief Flushes buffers with LSNs in them less or equal address <lsn>
7746
7747 @param lsn address up to which all LSNs should be flushed,
7748 can be reset to real last LSN address
7749 @parem sent_to_disk returns 'sent to disk' position
7750 @param flush_horizon returns horizon of the flush
7751
7752 @note About terminology see comment to translog_flush().
7753*/
7754
7755void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
7756 TRANSLOG_ADDRESS *sent_to_disk,
7757 TRANSLOG_ADDRESS *flush_horizon)
7758{
7759 dirty_buffer_mask_t dirty_buffer_mask;
7760 uint i;
7761 uint8 UNINIT_VAR(last_buffer_no), start_buffer_no;
7762 DBUG_ENTER("translog_flush_buffers");
7763
7764 /*
7765 We will recheck information when will lock buffers one by
7766 one so we can use unprotected read here (this is just for
7767 speed up buffers processing)
7768 */
7769 dirty_buffer_mask= log_descriptor.dirty_buffer_mask;
7770 DBUG_PRINT("info", ("Dirty buffer mask: %lx current buffer: %u",
7771 (ulong) dirty_buffer_mask,
7772 (uint) log_descriptor.bc.buffer_no));
7773 for (i= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7774 i != log_descriptor.bc.buffer_no && !(dirty_buffer_mask & (1 << i));
7775 i= (i + 1) % TRANSLOG_BUFFERS_NO) {}
7776 start_buffer_no= i;
7777
7778 DBUG_PRINT("info",
7779 ("start from: %u current: %u prev last lsn: " LSN_FMT,
7780 (uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
7781 LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
7782
7783 /*
7784 if LSN up to which we have to flush bigger then maximum LSN of previous
7785 buffer and at least one LSN was saved in the current buffer (last_lsn !=
7786 LSN_IMPOSSIBLE) then we have to close the current buffer.
7787 */
7788 if (cmp_translog_addr(*lsn, log_descriptor.bc.buffer->prev_last_lsn) > 0 &&
7789 log_descriptor.bc.buffer->last_lsn != LSN_IMPOSSIBLE)
7790 {
7791 struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
7792 *lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
7793 DBUG_PRINT("info", ("LSN to flush fixed to last lsn: " LSN_FMT,
7794 LSN_IN_PARTS(*lsn)));
7795 last_buffer_no= log_descriptor.bc.buffer_no;
7796 log_descriptor.is_everything_flushed= 1;
7797 translog_force_current_buffer_to_finish();
7798 translog_buffer_unlock(buffer);
7799 }
7800 else
7801 {
7802 if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE)
7803 {
7804 /*
7805 In this case both last_lsn & prev_last_lsn are LSN_IMPOSSIBLE
7806 otherwise it will go in the first IF because LSN_IMPOSSIBLE less
7807 then any real LSN and cmp_translog_addr(*lsn,
7808 log_descriptor.bc.buffer->prev_last_lsn) will be TRUE
7809 */
7810 DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn ==
7811 LSN_IMPOSSIBLE);
7812 DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing"));
7813 translog_unlock();
7814 DBUG_VOID_RETURN;
7815 }
7816
7817 DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE);
7818 /* fix lsn if it was horizon */
7819 *lsn= log_descriptor.bc.buffer->prev_last_lsn;
7820 DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: " LSN_FMT,
7821 LSN_IN_PARTS(*lsn)));
7822 last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
7823 TRANSLOG_BUFFERS_NO);
7824 translog_unlock();
7825 }
7826 /* flush buffers */
7827 *sent_to_disk= translog_get_sent_to_disk();
7828 if (cmp_translog_addr(*lsn, *sent_to_disk) > 0)
7829 {
7830
7831 DBUG_PRINT("info", ("Start buffer #: %u last buffer #: %u",
7832 (uint) start_buffer_no, (uint) last_buffer_no));
7833 last_buffer_no= (last_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7834 i= start_buffer_no;
7835 do
7836 {
7837 struct st_translog_buffer *buffer= log_descriptor.buffers + i;
7838 translog_buffer_lock(buffer);
7839 DBUG_PRINT("info", ("Check buffer:%p #: %u "
7840 "prev last LSN: " LSN_FMT " "
7841 "last LSN: " LSN_FMT " status: %s",
7842 buffer,
7843 (uint) i,
7844 LSN_IN_PARTS(buffer->prev_last_lsn),
7845 LSN_IN_PARTS(buffer->last_lsn),
7846 (buffer->file ?
7847 "dirty" : "closed")));
7848 if (buffer->prev_last_lsn <= *lsn &&
7849 buffer->file != NULL)
7850 {
7851 DBUG_ASSERT(*flush_horizon <= buffer->offset + buffer->size);
7852 *flush_horizon= (buffer->pre_force_close_horizon != LSN_IMPOSSIBLE ?
7853 buffer->pre_force_close_horizon :
7854 buffer->offset + buffer->size);
7855 /* pre_force_close_horizon is reset during new buffer start */
7856 DBUG_PRINT("info", ("flush_horizon: " LSN_FMT,
7857 LSN_IN_PARTS(*flush_horizon)));
7858 DBUG_ASSERT(*flush_horizon <= log_descriptor.horizon);
7859
7860 translog_buffer_flush(buffer);
7861 }
7862 translog_buffer_unlock(buffer);
7863 i= (i + 1) % TRANSLOG_BUFFERS_NO;
7864 } while (i != last_buffer_no);
7865 *sent_to_disk= translog_get_sent_to_disk();
7866 }
7867
7868 DBUG_VOID_RETURN;
7869}
7870
7871/**
7872 @brief Flush the log up to given LSN (included)
7873
7874 @param lsn log record serial number up to which (inclusive)
7875 the log has to be flushed
7876
7877 @return Operation status
7878 @retval 0 OK
7879 @retval 1 Error
7880
7881 @note
7882
7883 - Non group commit logic: Commits made in passes. Thread which started
7884 flush first is performing actual flush, other threads sets new goal (LSN)
7885 of the next pass (if it is maximum) and waits for the pass end or just
7886 wait for the pass end.
7887
7888 - If hard group commit enabled and rate set to zero:
7889 The first thread sends all changed buffers to disk. This is repeated
7890 as long as there are new LSNs added. The process can not loop
7891 forever because we have limited number of threads and they will wait
7892 for the data to be synced.
7893 Pseudo code:
7894
7895 do
7896 send changed buffers to disk
7897 while new_goal
7898 sync
7899
7900 - If hard group commit switched ON and less than rate microseconds has
7901 passed from last sync, then after buffers have been sent to disk
7902 wait until rate microseconds has passed since last sync, do sync and return.
7903 This ensures that if we call sync infrequently we don't do any waits.
7904
7905 - If soft group commit enabled everything works as with 'non group commit'
7906 but the thread doesn't do any real sync(). If rate is not zero the
7907 sync() will be performed by a service thread with the given rate
7908 when needed (new LSN appears).
7909
7910 @note Terminology:
7911 'sent to disk' means written to disk but not sync()ed,
7912 'flushed' mean sent to disk and synced().
7913*/
7914
7915my_bool translog_flush(TRANSLOG_ADDRESS lsn)
7916{
7917 struct timespec abstime;
7918 ulonglong UNINIT_VAR(flush_interval);
7919 ulonglong time_spent;
7920 LSN sent_to_disk= LSN_IMPOSSIBLE;
7921 TRANSLOG_ADDRESS flush_horizon;
7922 my_bool rc= 0;
7923 my_bool hgroup_commit_at_start;
7924 DBUG_ENTER("translog_flush");
7925 DBUG_PRINT("enter", ("Flush up to LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7926 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7927 translog_status == TRANSLOG_READONLY);
7928
7929 mysql_mutex_lock(&log_descriptor.log_flush_lock);
7930 DBUG_PRINT("info", ("Everything is flushed up to " LSN_FMT,
7931 LSN_IN_PARTS(log_descriptor.flushed)));
7932 if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
7933 {
7934 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
7935 DBUG_RETURN(0);
7936 }
7937 if (log_descriptor.flush_in_progress)
7938 {
7939 translog_lock();
7940 /* fix lsn if it was horizon */
7941 if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->last_lsn) > 0)
7942 lsn= BUFFER_MAX_LSN(log_descriptor.bc.buffer);
7943 translog_unlock();
7944 translog_flush_set_new_goal_and_wait(lsn);
7945 if (!pthread_equal(log_descriptor.max_lsn_requester, pthread_self()))
7946 {
7947 /*
7948 translog_flush_wait_for_end() release log_flush_lock while is
7949 waiting then acquire it again
7950 */
7951 translog_flush_wait_for_end(lsn);
7952 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
7953 DBUG_RETURN(0);
7954 }
7955 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
7956 }
7957 log_descriptor.flush_in_progress= 1;
7958 flush_horizon= log_descriptor.previous_flush_horizon;
7959 DBUG_PRINT("info", ("flush_in_progress is set, flush_horizon: " LSN_FMT,
7960 LSN_IN_PARTS(flush_horizon)));
7961 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
7962
7963 hgroup_commit_at_start= hard_group_commit;
7964 if (hgroup_commit_at_start)
7965 flush_interval= group_commit_wait;
7966
7967 translog_lock();
7968 if (log_descriptor.is_everything_flushed)
7969 {
7970 DBUG_PRINT("info", ("everything is flushed"));
7971 translog_unlock();
7972 mysql_mutex_lock(&log_descriptor.log_flush_lock);
7973 goto out;
7974 }
7975
7976 for (;;)
7977 {
7978 /* Following function flushes buffers and makes translog_unlock() */
7979 translog_flush_buffers(&lsn, &sent_to_disk, &flush_horizon);
7980
7981 if (!hgroup_commit_at_start)
7982 break; /* flush pass is ended */
7983
7984retest:
7985 /*
7986 We do not check time here because mysql_mutex_lock rarely takes
7987 a lot of time so we can sacrifice a bit precision to performance
7988 (taking into account that microsecond_interval_timer() might be
7989 expensive call).
7990 */
7991 if (flush_interval == 0)
7992 break; /* flush pass is ended */
7993
7994 mysql_mutex_lock(&log_descriptor.log_flush_lock);
7995 if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
7996 {
7997 if (flush_interval == 0 ||
7998 (time_spent= (microsecond_interval_timer() - flush_start)) >=
7999 flush_interval)
8000 {
8001 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8002 break;
8003 }
8004 DBUG_PRINT("info", ("flush waits: %llu interval: %llu spent: %llu",
8005 flush_interval - time_spent,
8006 flush_interval, time_spent));
8007 /* wait time or next goal */
8008 set_timespec_nsec(abstime, flush_interval - time_spent);
8009 mysql_cond_timedwait(&log_descriptor.new_goal_cond,
8010 &log_descriptor.log_flush_lock,
8011 &abstime);
8012 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8013 DBUG_PRINT("info", ("retest conditions"));
8014 goto retest;
8015 }
8016
8017 /* take next goal */
8018 lsn= log_descriptor.next_pass_max_lsn;
8019 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
8020 /* prevent other thread from continue */
8021 log_descriptor.max_lsn_requester= pthread_self();
8022 DBUG_PRINT("info", ("flush took next goal: " LSN_FMT,
8023 LSN_IN_PARTS(lsn)));
8024 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8025
8026 /* next flush pass */
8027 DBUG_PRINT("info", ("next flush pass"));
8028 translog_lock();
8029 }
8030
8031 /*
8032 sync() files from previous flush till current one
8033 */
8034 if (!soft_sync || hgroup_commit_at_start)
8035 {
8036 if ((rc=
8037 translog_sync_files(LSN_FILE_NO(log_descriptor.flushed),
8038 LSN_FILE_NO(lsn),
8039 sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
8040 (LSN_FILE_NO(log_descriptor.
8041 previous_flush_horizon) !=
8042 LSN_FILE_NO(flush_horizon) ||
8043 (LSN_OFFSET(log_descriptor.
8044 previous_flush_horizon) /
8045 TRANSLOG_PAGE_SIZE) !=
8046 (LSN_OFFSET(flush_horizon) /
8047 TRANSLOG_PAGE_SIZE)))))
8048 {
8049 sent_to_disk= LSN_IMPOSSIBLE;
8050 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8051 goto out;
8052 }
8053 /* keep values for soft sync() and forced sync() actual */
8054 {
8055 uint32 fileno= LSN_FILE_NO(lsn);
8056 soft_sync_min= fileno;
8057 soft_sync_max= fileno;
8058 }
8059 }
8060 else
8061 {
8062 soft_sync_max= LSN_FILE_NO(lsn);
8063 soft_need_sync= 1;
8064 }
8065
8066 DBUG_ASSERT(flush_horizon <= log_descriptor.horizon);
8067
8068 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8069 log_descriptor.previous_flush_horizon= flush_horizon;
8070out:
8071 if (sent_to_disk != LSN_IMPOSSIBLE)
8072 log_descriptor.flushed= sent_to_disk;
8073 log_descriptor.flush_in_progress= 0;
8074 log_descriptor.flush_no++;
8075 DBUG_PRINT("info", ("flush_in_progress is dropped"));
8076 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8077 mysql_cond_broadcast(&log_descriptor.log_flush_cond);
8078 DBUG_RETURN(rc);
8079}
8080
8081
8082/**
8083 @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
8084
8085 If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
8086 open MARIA_SHAREs), give it one and record this assignment in the log
8087 (LOGREC_FILE_ID log record).
8088
8089 @param tbl_info table
8090 @param trn calling transaction
8091
8092 @return Operation status
8093 @retval 0 OK
8094 @retval 1 Error
8095
8096 @note Can be called even if share already has an id (then will do nothing)
8097*/
8098
8099int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
8100{
8101 uint16 id;
8102 MARIA_SHARE *share= tbl_info->s;
8103 /*
8104 If you give an id to a non-BLOCK_RECORD table, you also need to release
8105 this id somewhere. Then you can change the assertion.
8106 */
8107 DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8108 /* re-check under mutex to avoid having 2 ids for the same share */
8109 mysql_mutex_lock(&share->intern_lock);
8110 if (unlikely(share->id == 0))
8111 {
8112 LSN lsn;
8113 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8114 uchar log_data[FILEID_STORE_SIZE];
8115 /* Inspired by set_short_trid() of trnman.c */
8116 uint i= share->kfile.file % SHARE_ID_MAX + 1;
8117 id= 0;
8118 do
8119 {
8120 for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
8121 {
8122 void *tmp= NULL;
8123 if (id_to_share[i] == NULL &&
8124 my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
8125 {
8126 id= (uint16) i;
8127 break;
8128 }
8129 }
8130 i= 1; /* scan the whole array */
8131 } while (id == 0);
8132 DBUG_PRINT("info", ("id_to_share: %p -> %u", share, id));
8133 fileid_store(log_data, id);
8134 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
8135 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
8136 /*
8137 open_file_name is an unresolved name (symlinks are not resolved, datadir
8138 is not realpath-ed, etc) which is good: the log can be moved to another
8139 directory and continue working.
8140 */
8141 log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
8142 (uchar *)share->open_file_name.str;
8143 log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
8144 share->open_file_name.length + 1;
8145 /*
8146 We can't unlock share->intern_lock before the log entry is written to
8147 ensure no one uses the id before it's logged.
8148 */
8149 if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
8150 (translog_size_t)
8151 (sizeof(log_data) +
8152 log_array[TRANSLOG_INTERNAL_PARTS +
8153 1].length),
8154 sizeof(log_array)/sizeof(log_array[0]),
8155 log_array, NULL, NULL)))
8156 {
8157 mysql_mutex_unlock(&share->intern_lock);
8158 return 1;
8159 }
8160 /*
8161 Now when translog record is done, we can set share->id.
8162 If we set it before, then translog_write_record may pick up the id
8163 before it's written to the log.
8164 */
8165 share->id= id;
8166 share->state.logrec_file_id= lsn;
8167 }
8168 mysql_mutex_unlock(&share->intern_lock);
8169 return 0;
8170}
8171
8172
8173/**
8174 @brief Recycles a MARIA_SHARE's short id.
8175
8176 @param share table
8177
8178 @note Must be called only if share has an id (i.e. id != 0)
8179*/
8180
8181void translog_deassign_id_from_share(MARIA_SHARE *share)
8182{
8183 DBUG_PRINT("info", ("id_to_share: %p id %u -> 0",
8184 share, share->id));
8185 /*
8186 We don't need any mutex as we are called only when closing the last
8187 instance of the table or at the end of REPAIR: no writes can be
8188 happening. But a Checkpoint may be reading share->id, so we require this
8189 mutex:
8190 */
8191 mysql_mutex_assert_owner(&share->intern_lock);
8192 my_atomic_storeptr((void **)&id_to_share[share->id], 0);
8193 share->id= 0;
8194 /* useless but safety: */
8195 share->lsn_of_file_id= LSN_IMPOSSIBLE;
8196}
8197
8198
8199void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
8200 uint16 id)
8201{
8202 DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
8203 DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8204 DBUG_ASSERT(share->id == 0);
8205 DBUG_ASSERT(id_to_share[id] == NULL);
8206 id_to_share[share->id= id]= share;
8207}
8208
8209
8210/**
8211 @brief check if such log file exists
8212
8213 @param file_no number of the file to test
8214
8215 @retval 0 no such file
8216 @retval 1 there is file with such number
8217*/
8218
8219my_bool translog_is_file(uint file_no)
8220{
8221 MY_STAT stat_buff;
8222 char path[FN_REFLEN];
8223 return (MY_TEST(mysql_file_stat(key_file_translog,
8224 translog_filename_by_fileno(file_no, path),
8225 &stat_buff, MYF(0))));
8226}
8227
8228
8229/**
8230 @brief returns minimum log file number
8231
8232 @param horizon the end of the log
8233 @param is_protected true if it is under purge_log protection
8234
8235 @retval minimum file number
8236 @retval 0 no files found
8237*/
8238
8239static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
8240{
8241 uint min_file= 0, max_file;
8242 DBUG_ENTER("translog_first_file");
8243 if (!is_protected)
8244 mysql_mutex_lock(&log_descriptor.purger_lock);
8245 if (log_descriptor.min_file_number &&
8246 translog_is_file(log_descriptor.min_file_number))
8247 {
8248 DBUG_PRINT("info", ("cached %lu",
8249 (ulong) log_descriptor.min_file_number));
8250 if (!is_protected)
8251 mysql_mutex_unlock(&log_descriptor.purger_lock);
8252 DBUG_RETURN(log_descriptor.min_file_number);
8253 }
8254
8255 max_file= LSN_FILE_NO(horizon);
8256
8257 /* binary search for last file */
8258 while (min_file != max_file && min_file != (max_file - 1))
8259 {
8260 uint test= (min_file + max_file) / 2;
8261 DBUG_PRINT("info", ("min_file: %u test: %u max_file: %u",
8262 min_file, test, max_file));
8263 if (test == max_file)
8264 test--;
8265 if (translog_is_file(test))
8266 max_file= test;
8267 else
8268 min_file= test;
8269 }
8270 log_descriptor.min_file_number= max_file;
8271 if (!is_protected)
8272 mysql_mutex_unlock(&log_descriptor.purger_lock);
8273 DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
8274 DBUG_ASSERT(max_file >= 1);
8275 DBUG_RETURN(max_file);
8276}
8277
8278
8279/**
8280 @brief returns the most close LSN higher the given chunk address
8281
8282 @param addr the chunk address to start from
8283 @param horizon the horizon if it is known or LSN_IMPOSSIBLE
8284
8285 @retval LSN_ERROR Error
8286 @retval LSN_IMPOSSIBLE no LSNs after the address
8287 @retval # LSN of the most close LSN higher the given chunk address
8288*/
8289
8290LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
8291{
8292 TRANSLOG_SCANNER_DATA scanner;
8293 LSN result;
8294 DBUG_ENTER("translog_next_LSN");
8295
8296 if (horizon == LSN_IMPOSSIBLE)
8297 horizon= translog_get_horizon();
8298
8299 if (addr == horizon)
8300 DBUG_RETURN(LSN_IMPOSSIBLE);
8301
8302 translog_scanner_init(addr, 0, &scanner, 1);
8303 /*
8304 addr can point not to a chunk beginning but page end so next
8305 page beginning.
8306 */
8307 if (addr % TRANSLOG_PAGE_SIZE == 0)
8308 {
8309 /*
8310 We are emulating the page end which cased such horizon value to
8311 trigger translog_scanner_eop().
8312
8313 We can't just increase addr on page header overhead because it
8314 can be file end so we allow translog_get_next_chunk() to skip
8315 to the next page in correct way
8316 */
8317 scanner.page_addr-= TRANSLOG_PAGE_SIZE;
8318 scanner.page_offset= TRANSLOG_PAGE_SIZE;
8319#ifndef DBUG_OFF
8320 scanner.page= NULL; /* prevent using incorrect page content */
8321#endif
8322 }
8323 /* addr can point not to a chunk beginning but to a page end */
8324 if (translog_scanner_eop(&scanner))
8325 {
8326 if (translog_get_next_chunk(&scanner))
8327 {
8328 result= LSN_ERROR;
8329 goto out;
8330 }
8331 if (scanner.page == END_OF_LOG)
8332 {
8333 result= LSN_IMPOSSIBLE;
8334 goto out;
8335 }
8336 }
8337
8338 while (!translog_is_LSN_chunk(scanner.page[scanner.page_offset]) &&
8339 scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
8340 {
8341 if (translog_get_next_chunk(&scanner))
8342 {
8343 result= LSN_ERROR;
8344 goto out;
8345 }
8346 if (scanner.page == END_OF_LOG)
8347 {
8348 result= LSN_IMPOSSIBLE;
8349 goto out;
8350 }
8351 }
8352
8353 if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
8354 result= LSN_IMPOSSIBLE; /* reached page filler */
8355 else
8356 result= scanner.page_addr + scanner.page_offset;
8357out:
8358 translog_destroy_scanner(&scanner);
8359 DBUG_RETURN(result);
8360}
8361
8362
8363/**
8364 @brief returns the LSN of the first record starting in this log
8365
8366 @retval LSN_ERROR Error
8367 @retval LSN_IMPOSSIBLE no log or the log is empty
8368 @retval # LSN of the first record
8369*/
8370
8371LSN translog_first_lsn_in_log()
8372{
8373 TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
8374 TRANSLOG_VALIDATOR_DATA data;
8375 uint file;
8376 uint16 chunk_offset;
8377 uchar *page;
8378 DBUG_ENTER("translog_first_lsn_in_log");
8379 DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(horizon)));
8380 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8381 translog_status == TRANSLOG_READONLY);
8382
8383 if (!(file= translog_first_file(horizon, 0)))
8384 {
8385 /* log has no records yet */
8386 DBUG_RETURN(LSN_IMPOSSIBLE);
8387 }
8388
8389 addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8390 data.addr= &addr;
8391 {
8392 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8393 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
8394 (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
8395 DBUG_RETURN(LSN_ERROR);
8396 }
8397 addr+= chunk_offset;
8398
8399 DBUG_RETURN(translog_next_LSN(addr, horizon));
8400}
8401
8402
8403/**
8404 @brief Returns theoretical first LSN if first log is present
8405
8406 @retval LSN_ERROR Error
8407 @retval LSN_IMPOSSIBLE no log
8408 @retval # LSN of the first record
8409*/
8410
8411LSN translog_first_theoretical_lsn()
8412{
8413 TRANSLOG_ADDRESS addr= translog_get_horizon();
8414 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8415 uchar *page;
8416 TRANSLOG_VALIDATOR_DATA data;
8417 DBUG_ENTER("translog_first_theoretical_lsn");
8418 DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(addr)));
8419 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8420 translog_status == TRANSLOG_READONLY);
8421
8422 if (!translog_is_file(1))
8423 DBUG_RETURN(LSN_IMPOSSIBLE);
8424 if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
8425 {
8426 /* log has no records yet */
8427 DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8428 log_descriptor.page_overhead));
8429 }
8430
8431 addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8432 data.addr= &addr;
8433 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
8434 DBUG_RETURN(LSN_ERROR);
8435
8436 DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8437 page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
8438}
8439
8440
8441/**
8442 @brief Checks given low water mark and purge files if it is need
8443
8444 @param low the last (minimum) address which is need
8445
8446 @retval 0 OK
8447 @retval 1 Error
8448*/
8449
8450my_bool translog_purge(TRANSLOG_ADDRESS low)
8451{
8452 uint32 last_need_file= LSN_FILE_NO(low);
8453 uint32 min_unsync;
8454 int soft;
8455 TRANSLOG_ADDRESS horizon= translog_get_horizon();
8456 int rc= 0;
8457 DBUG_ENTER("translog_purge");
8458 DBUG_PRINT("enter", ("low: " LSN_FMT, LSN_IN_PARTS(low)));
8459 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8460 translog_status == TRANSLOG_READONLY);
8461
8462 soft= soft_sync;
8463 min_unsync= soft_sync_min;
8464 DBUG_PRINT("info", ("min_unsync: %lu", (ulong) min_unsync));
8465 if (soft && min_unsync < last_need_file)
8466 {
8467 last_need_file= min_unsync;
8468 DBUG_PRINT("info", ("last_need_file set to %lu", (ulong)last_need_file));
8469 }
8470
8471 mysql_mutex_lock(&log_descriptor.purger_lock);
8472 DBUG_PRINT("info", ("last_lsn_checked file: %lu:",
8473 (ulong) log_descriptor.last_lsn_checked));
8474 if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
8475 {
8476 uint32 i;
8477 uint32 min_file= translog_first_file(horizon, 1);
8478 DBUG_ASSERT(min_file != 0); /* log is already started */
8479 DBUG_PRINT("info", ("min_file: %lu:",(ulong) min_file));
8480 for(i= min_file; i < last_need_file && rc == 0; i++)
8481 {
8482 LSN lsn= translog_get_file_max_lsn_stored(i);
8483 if (lsn == LSN_IMPOSSIBLE)
8484 break; /* files are still in writing */
8485 if (lsn == LSN_ERROR)
8486 {
8487 rc= 1;
8488 break;
8489 }
8490 if (cmp_translog_addr(lsn, low) >= 0)
8491 break;
8492
8493 DBUG_PRINT("info", ("purge file %lu", (ulong) i));
8494
8495 /* remove file descriptor from the cache */
8496 /*
8497 log_descriptor.min_file can be changed only here during execution
8498 and the function is serialized, so we can access it without problems
8499 */
8500 if (i >= log_descriptor.min_file)
8501 {
8502 TRANSLOG_FILE *file;
8503 mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
8504 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8505 log_descriptor.open_files.elements);
8506 DBUG_ASSERT(log_descriptor.min_file == i);
8507 file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
8508 DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
8509 DBUG_ASSERT(i == file->number);
8510 log_descriptor.min_file++;
8511 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8512 log_descriptor.open_files.elements);
8513 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
8514 translog_close_log_file(file);
8515 }
8516 if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE)
8517 {
8518 char path[FN_REFLEN], *file_name;
8519 file_name= translog_filename_by_fileno(i, path);
8520 rc= MY_TEST(mysql_file_delete(key_file_translog,
8521 file_name, MYF(MY_WME)));
8522 }
8523 }
8524 if (unlikely(rc == 1))
8525 log_descriptor.min_need_file= 0; /* impossible value */
8526 else
8527 log_descriptor.min_need_file= i;
8528 }
8529
8530 mysql_mutex_unlock(&log_descriptor.purger_lock);
8531 DBUG_RETURN(rc);
8532}
8533
8534
8535/**
8536 @brief Purges files by stored min need file in case of
8537 "ondemend" purge type
8538
8539 @note This function do real work only if it is "ondemend" purge type
8540 and translog_purge() was called at least once and last time without
8541 errors
8542
8543 @retval 0 OK
8544 @retval 1 Error
8545*/
8546
8547my_bool translog_purge_at_flush()
8548{
8549 uint32 i, min_file;
8550 int rc= 0;
8551 DBUG_ENTER("translog_purge_at_flush");
8552 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8553 translog_status == TRANSLOG_READONLY);
8554
8555 if (unlikely(translog_status == TRANSLOG_READONLY))
8556 {
8557 DBUG_PRINT("info", ("The log is read only => exit"));
8558 DBUG_RETURN(0);
8559 }
8560
8561 if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
8562 {
8563 DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
8564 DBUG_RETURN(0);
8565 }
8566
8567 mysql_mutex_lock(&log_descriptor.purger_lock);
8568
8569 if (unlikely(log_descriptor.min_need_file == 0))
8570 {
8571 DBUG_PRINT("info", ("No info about min need file => exit"));
8572 mysql_mutex_unlock(&log_descriptor.purger_lock);
8573 DBUG_RETURN(0);
8574 }
8575
8576 min_file= translog_first_file(translog_get_horizon(), 1);
8577 DBUG_ASSERT(min_file != 0); /* log is already started */
8578 for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
8579 {
8580 char path[FN_REFLEN], *file_name;
8581 DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
8582 file_name= translog_filename_by_fileno(i, path);
8583 rc= MY_TEST(mysql_file_delete(key_file_translog,
8584 file_name, MYF(MY_WME)));
8585 }
8586
8587 mysql_mutex_unlock(&log_descriptor.purger_lock);
8588 DBUG_RETURN(rc);
8589}
8590
8591
8592/**
8593 @brief Gets min file number
8594
8595 @param horizon the end of the log
8596
8597 @retval minimum file number
8598 @retval 0 no files found
8599*/
8600
8601uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
8602{
8603 return translog_first_file(horizon, 0);
8604}
8605
8606
8607/**
8608 @brief Gets min file number which is needed
8609
8610 @retval minimum file number
8611 @retval 0 unknown
8612*/
8613
8614uint32 translog_get_first_needed_file()
8615{
8616 uint32 file_no;
8617 mysql_mutex_lock(&log_descriptor.purger_lock);
8618 file_no= log_descriptor.min_need_file;
8619 mysql_mutex_unlock(&log_descriptor.purger_lock);
8620 return file_no;
8621}
8622
8623
8624/**
8625 @brief Gets transaction log file size
8626
8627 @return transaction log file size
8628*/
8629
8630uint32 translog_get_file_size()
8631{
8632 uint32 res;
8633 translog_lock();
8634 res= log_descriptor.log_file_max_size;
8635 translog_unlock();
8636 return (res);
8637}
8638
8639
8640/**
8641 @brief Sets transaction log file size
8642
8643 @return Returns actually set transaction log size
8644*/
8645
8646void translog_set_file_size(uint32 size)
8647{
8648 struct st_translog_buffer *old_buffer= NULL;
8649 DBUG_ENTER("translog_set_file_size");
8650 translog_lock();
8651 DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
8652 DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0);
8653 DBUG_ASSERT(size >= TRANSLOG_MIN_FILE_SIZE);
8654 log_descriptor.log_file_max_size= size;
8655 /* if current file longer then finish it*/
8656 if (LSN_OFFSET(log_descriptor.horizon) >= log_descriptor.log_file_max_size)
8657 {
8658 old_buffer= log_descriptor.bc.buffer;
8659 translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
8660 translog_buffer_unlock(old_buffer);
8661 }
8662 translog_unlock();
8663 if (old_buffer)
8664 {
8665 translog_buffer_lock(old_buffer);
8666 translog_buffer_flush(old_buffer);
8667 translog_buffer_unlock(old_buffer);
8668 }
8669 DBUG_VOID_RETURN;
8670}
8671
8672
8673/**
8674 Write debug information to log if we EXTRA_DEBUG is enabled
8675*/
8676
8677my_bool translog_log_debug_info(TRN *trn __attribute__((unused)),
8678 enum translog_debug_info_type type
8679 __attribute__((unused)),
8680 uchar *info __attribute__((unused)),
8681 size_t length __attribute__((unused)))
8682{
8683#ifdef EXTRA_DEBUG
8684 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8685 uchar debug_type;
8686 LSN lsn;
8687
8688 if (!trn)
8689 {
8690 /*
8691 We can't log the current transaction because we don't have
8692 an active transaction. Use a temporary transaction object instead
8693 */
8694 trn= &dummy_transaction_object;
8695 }
8696 debug_type= (uchar) type;
8697 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= &debug_type;
8698 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1;
8699 log_array[TRANSLOG_INTERNAL_PARTS + 1].str= info;
8700 log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
8701 return translog_write_record(&lsn, LOGREC_DEBUG_INFO,
8702 trn, NULL,
8703 (translog_size_t) (1+ length),
8704 sizeof(log_array)/sizeof(log_array[0]),
8705 log_array, NULL, NULL);
8706#else
8707 return 0;
8708#endif
8709}
8710
8711
8712
8713/**
8714 Sets soft sync mode
8715
8716 @param mode TRUE if we need switch soft sync on else off
8717*/
8718
8719void translog_soft_sync(my_bool mode)
8720{
8721 soft_sync= mode;
8722}
8723
8724
8725/**
8726 Sets hard group commit
8727
8728 @param mode TRUE if we need switch hard group commit on else off
8729*/
8730
8731void translog_hard_group_commit(my_bool mode)
8732{
8733 hard_group_commit= mode;
8734}
8735
8736
8737/**
8738 @brief forced log sync (used when we are switching modes)
8739*/
8740
8741void translog_sync()
8742{
8743 uint32 max= get_current_logfile()->number;
8744 uint32 min;
8745 DBUG_ENTER("ma_translog_sync");
8746
8747 min= soft_sync_min;
8748 if (!min)
8749 min= max;
8750
8751 translog_sync_files(min, max, sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS);
8752
8753 DBUG_VOID_RETURN;
8754}
8755
8756
8757/**
8758 @brief set rate for group commit
8759
8760 @param interval interval to set.
8761
8762 @note We use this function with additional variable because have to
8763 restart service thread with new value which we can't make inside changing
8764 variable routine (update_maria_group_commit_interval)
8765*/
8766
8767void translog_set_group_commit_interval(uint32 interval)
8768{
8769 DBUG_ENTER("translog_set_group_commit_interval");
8770 group_commit_wait= interval;
8771 DBUG_PRINT("info", ("wait: %llu",
8772 (ulonglong)group_commit_wait));
8773 DBUG_VOID_RETURN;
8774}
8775
8776
8777/**
8778 @brief syncing service thread
8779*/
8780
8781static pthread_handler_t
8782ma_soft_sync_background( void *arg __attribute__((unused)))
8783{
8784
8785 my_thread_init();
8786 {
8787 DBUG_ENTER("ma_soft_sync_background");
8788 for(;;)
8789 {
8790 ulonglong prev_loop= microsecond_interval_timer();
8791 ulonglong time, sleep;
8792 uint32 min, max, sync_request;
8793 min= soft_sync_min;
8794 max= soft_sync_max;
8795 sync_request= soft_need_sync;
8796 soft_sync_min= max;
8797 soft_need_sync= 0;
8798
8799 sleep= group_commit_wait;
8800 if (sync_request)
8801 translog_sync_files(min, max, FALSE);
8802 time= microsecond_interval_timer() - prev_loop;
8803 if (time > sleep)
8804 sleep= 0;
8805 else
8806 sleep-= time;
8807 if (my_service_thread_sleep(&soft_sync_control, sleep))
8808 break;
8809 }
8810 my_thread_end();
8811 DBUG_RETURN(0);
8812 }
8813}
8814
8815
8816/**
8817 @brief Starts syncing thread
8818*/
8819
8820int translog_soft_sync_start(void)
8821{
8822 int res= 0;
8823 uint32 min, max;
8824 DBUG_ENTER("translog_soft_sync_start");
8825
8826 /* check and init variables */
8827 min= soft_sync_min;
8828 max= soft_sync_max;
8829 if (!max)
8830 soft_sync_max= max= get_current_logfile()->number;
8831 if (!min)
8832 soft_sync_min= max;
8833 soft_need_sync= 1;
8834
8835 if (!(res= ma_service_thread_control_init(&soft_sync_control)))
8836 if ((res= mysql_thread_create(key_thread_soft_sync,
8837 &soft_sync_control.thread, NULL,
8838 ma_soft_sync_background, NULL)))
8839 soft_sync_control.killed= TRUE;
8840 DBUG_RETURN(res);
8841}
8842
8843
8844/**
8845 @brief Stops syncing thread
8846*/
8847
8848void translog_soft_sync_end(void)
8849{
8850 DBUG_ENTER("translog_soft_sync_end");
8851 if (soft_sync_control.inited)
8852 {
8853 ma_service_thread_control_end(&soft_sync_control);
8854 }
8855 DBUG_VOID_RETURN;
8856}
8857
8858
8859/**
8860 @brief Dump information about file header page.
8861*/
8862
8863static void dump_header_page(uchar *buff)
8864{
8865 LOGHANDLER_FILE_INFO desc;
8866 char strbuff[21];
8867 translog_interpret_file_header(&desc, buff);
8868 printf(" This can be header page:\n"
8869 " Timestamp: %s\n"
8870 " Aria log version: %lu\n"
8871 " Server version: %lu\n"
8872 " Server id %lu\n"
8873 " Page size %lu\n",
8874 llstr(desc.timestamp, strbuff),
8875 desc.maria_version,
8876 desc.mysql_version,
8877 desc.server_id,
8878 desc.page_size);
8879 if (desc.page_size != TRANSLOG_PAGE_SIZE)
8880 printf(" WARNING: page size is not equal compiled in one %lu!!!\n",
8881 (ulong) TRANSLOG_PAGE_SIZE);
8882 printf(" File number %lu\n"
8883 " Max lsn: " LSN_FMT "\n",
8884 desc.file_number,
8885 LSN_IN_PARTS(desc.max_lsn));
8886}
8887
8888static const char *record_class_string[]=
8889{
8890 "LOGRECTYPE_NOT_ALLOWED",
8891 "LOGRECTYPE_VARIABLE_LENGTH",
8892 "LOGRECTYPE_PSEUDOFIXEDLENGTH",
8893 "LOGRECTYPE_FIXEDLENGTH"
8894};
8895
8896
8897/**
8898 @brief dump information about transaction log chunk
8899
8900 @param buffer reference to the whole page
8901 @param ptr pointer to the chunk
8902
8903 @reval # reference to the next chunk
8904 @retval NULL can't interpret data
8905*/
8906
8907static uchar *dump_chunk(uchar *buffer, uchar *ptr)
8908{
8909 uint length;
8910 if (*ptr == TRANSLOG_FILLER)
8911 {
8912 printf(" Filler till the page end\n");
8913 for (; ptr < buffer + TRANSLOG_PAGE_SIZE; ptr++)
8914 {
8915 if (*ptr != TRANSLOG_FILLER)
8916 {
8917 printf(" WARNING: non filler character met before page end "
8918 "(page + 0x%04x: 0x%02x) (stop interpretation)!!!",
8919 (uint) (ptr - buffer), (uint) ptr[0]);
8920 return NULL;
8921 }
8922 }
8923 return ptr;
8924 }
8925 if (*ptr == 0 || *ptr == 0xFF)
8926 {
8927 printf(" WARNING: chunk can't start from 0x0 "
8928 "(stop interpretation)!!!\n");
8929 return NULL;
8930 }
8931 switch (ptr[0] & TRANSLOG_CHUNK_TYPE) {
8932 case TRANSLOG_CHUNK_LSN:
8933 printf(" LSN chunk type 0 (variable length)\n");
8934 if (likely((ptr[0] & TRANSLOG_REC_TYPE) != TRANSLOG_CHUNK_0_CONT))
8935 {
8936 printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
8937 ptr[0] & TRANSLOG_REC_TYPE,
8938 (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
8939 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
8940 "NULL"),
8941 record_class_string[log_record_type_descriptor[ptr[0] &
8942 TRANSLOG_REC_TYPE].
8943 rclass],
8944 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
8945 compressed_LSN);
8946 if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
8947 LOGRECTYPE_VARIABLE_LENGTH)
8948 {
8949 printf(" WARNING: this record class here can't be used "
8950 "(stop interpretation)!!!\n");
8951 break;
8952 }
8953 }
8954 else
8955 printf(" Continuation of previous chunk 0 header \n");
8956 printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
8957 {
8958 uchar *hdr_ptr= ptr + 1 + 2; /* chunk type and short trid */
8959 uint16 chunk_len;
8960 printf (" Record length: %lu\n",
8961 (ulong) translog_variable_record_1group_decode_len(&hdr_ptr));
8962 chunk_len= uint2korr(hdr_ptr);
8963 if (chunk_len == 0)
8964 printf (" It is 1 group record (chunk length == 0)\n");
8965 else
8966 {
8967 uint16 groups, i;
8968
8969 printf (" Chunk length %u\n", (uint) chunk_len);
8970 groups= uint2korr(hdr_ptr + 2);
8971 hdr_ptr+= 4;
8972 printf (" Number of groups left to the end %u:\n", (uint) groups);
8973 for(i= 0;
8974 i < groups && hdr_ptr < buffer + TRANSLOG_PAGE_SIZE;
8975 i++, hdr_ptr+= LSN_STORE_SIZE + 1)
8976 {
8977 TRANSLOG_ADDRESS gpr_addr= lsn_korr(hdr_ptr);
8978 uint pages= hdr_ptr[LSN_STORE_SIZE];
8979 printf (" Group +#%u: " LSN_FMT " pages: %u\n",
8980 (uint) i, LSN_IN_PARTS(gpr_addr), pages);
8981 }
8982 }
8983 }
8984 break;
8985 case TRANSLOG_CHUNK_FIXED:
8986 printf(" LSN chunk type 1 (fixed size)\n");
8987 printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
8988 ptr[0] & TRANSLOG_REC_TYPE,
8989 (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
8990 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
8991 "NULL"),
8992 record_class_string[log_record_type_descriptor[ptr[0] &
8993 TRANSLOG_REC_TYPE].
8994 rclass],
8995 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
8996 compressed_LSN);
8997 if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
8998 LOGRECTYPE_PSEUDOFIXEDLENGTH &&
8999 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9000 LOGRECTYPE_FIXEDLENGTH)
9001 {
9002 printf(" WARNING: this record class here can't be used "
9003 "(stop interpretation)!!!\n");
9004 }
9005 printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
9006 break;
9007 case TRANSLOG_CHUNK_NOHDR:
9008 printf(" No header chunk type 2(till the end of the page)\n");
9009 if (ptr[0] & TRANSLOG_REC_TYPE)
9010 {
9011 printf(" WARNING: chunk header content record type: 0x%02x "
9012 "(dtop interpretation)!!!",
9013 (uint) ptr[0]);
9014 return NULL;
9015 }
9016 break;
9017 case TRANSLOG_CHUNK_LNGTH:
9018 printf(" Chunk with length type 3\n");
9019 if (ptr[0] & TRANSLOG_REC_TYPE)
9020 {
9021 printf(" WARNING: chunk header content record type: 0x%02x "
9022 "(dtop interpretation)!!!",
9023 (uint) ptr[0]);
9024 return NULL;
9025 }
9026 break;
9027 }
9028 {
9029 intptr offset= ptr - buffer;
9030 DBUG_ASSERT(offset <= UINT_MAX16);
9031 length= translog_get_total_chunk_length(buffer, (uint16)offset);
9032 }
9033 printf(" Length %u\n", length);
9034 ptr+= length;
9035 return ptr;
9036}
9037
9038
9039/**
9040 @brief Dump information about page with data.
9041*/
9042
9043static void dump_datapage(uchar *buffer, File handler)
9044{
9045 uchar *ptr;
9046 ulong offset;
9047 uint32 page, file;
9048 uint header_len;
9049 printf(" Page: %ld File number: %ld\n",
9050 (ulong) (page= uint3korr(buffer)),
9051 (ulong) (file= uint3korr(buffer + 3)));
9052 if (page == 0)
9053 printf(" WARNING: page == 0!!!\n");
9054 if (file == 0)
9055 printf(" WARNING: file == 0!!!\n");
9056 offset= page * TRANSLOG_PAGE_SIZE;
9057 printf(" Flags (0x%x):\n", (uint) buffer[TRANSLOG_PAGE_FLAGS]);
9058 if (buffer[TRANSLOG_PAGE_FLAGS])
9059 {
9060 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
9061 printf(" Page CRC\n");
9062 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9063 printf(" Sector protection\n");
9064 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9065 printf(" Record CRC (WARNING: not yet implemented!!!)\n");
9066 if (buffer[TRANSLOG_PAGE_FLAGS] & ~(TRANSLOG_PAGE_CRC |
9067 TRANSLOG_SECTOR_PROTECTION |
9068 TRANSLOG_RECORD_CRC))
9069 {
9070 printf(" WARNING: unknown flags (stop interpretation)!!!\n");
9071 return;
9072 }
9073 }
9074 else
9075 printf(" No flags\n");
9076 printf(" Page header length: %u\n",
9077 (header_len= page_overhead[buffer[TRANSLOG_PAGE_FLAGS]]));
9078 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9079 {
9080 uint32 crc= uint4korr(buffer + TRANSLOG_PAGE_FLAGS + 1);
9081 uint32 ccrc;
9082 printf (" Page CRC 0x%04lx\n", (ulong) crc);
9083 ccrc= translog_crc(buffer + header_len, TRANSLOG_PAGE_SIZE - header_len);
9084 if (crc != ccrc)
9085 printf(" WARNING: calculated CRC: 0x%04lx!!!\n", (ulong) ccrc);
9086 }
9087 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9088 {
9089 TRANSLOG_FILE tfile;
9090 {
9091 uchar *table= buffer + header_len -
9092 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
9093 uint i;
9094 printf(" Sector protection current value: 0x%02x\n", (uint) table[0]);
9095 for (i= 1; i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; i++)
9096 {
9097 printf(" Sector protection in sector: 0x%02x saved value 0x%02x\n",
9098 (uint)buffer[i * DISK_DRIVE_SECTOR_SIZE],
9099 (uint)table[i]);
9100 }
9101 }
9102 tfile.number= file;
9103 bzero(&tfile.handler, sizeof(tfile.handler));
9104 tfile.handler.file= handler;
9105 tfile.was_recovered= 0;
9106 tfile.is_sync= 1;
9107 if (translog_check_sector_protection(buffer, &tfile))
9108 printf(" WARNING: sector protection found problems!!!\n");
9109 }
9110 ptr= buffer + header_len;
9111 while (ptr && ptr < buffer + TRANSLOG_PAGE_SIZE)
9112 {
9113 printf(" Chunk %d %lld:\n",
9114 file,((longlong) (ptr - buffer)+ offset));
9115 ptr= dump_chunk(buffer, ptr);
9116 }
9117}
9118
9119
9120/**
9121 @brief Dump information about page.
9122*/
9123
9124void dump_page(uchar *buffer, File handler)
9125{
9126 if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
9127 sizeof(maria_trans_file_magic)) == 0)
9128 {
9129 dump_header_page(buffer);
9130 }
9131 dump_datapage(buffer, handler);
9132}
9133