1/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2017, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
16
17
18/**
19 @file
20
21 @brief
22 logging of commands
23
24 @todo
25 Abort logging when we get an error in reading or writing log files
26*/
27
28#include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */
29#include "sql_priv.h"
30#include "log.h"
31#include "sql_base.h" // open_log_table
32#include "sql_repl.h"
33#include "sql_delete.h" // mysql_truncate
34#include "sql_parse.h" // command_name
35#include "sql_time.h" // calc_time_from_sec, my_time_compare
36#include "tztime.h" // my_tz_OFFSET0, struct Time_zone
37#include "sql_acl.h" // SUPER_ACL
38#include "log_event.h" // Query_log_event
39#include "rpl_filter.h"
40#include "rpl_rli.h"
41#include "sql_audit.h"
42#include "mysqld.h"
43
44#include <my_dir.h>
45#include <m_ctype.h> // For test_if_number
46
47#ifdef _WIN32
48#include "message.h"
49#endif
50
51#include "sql_plugin.h"
52#include "debug_sync.h"
53#include "sql_show.h"
54#include "my_pthread.h"
55#include "semisync_master.h"
56#include "wsrep_mysqld.h"
57#include "sp_rcontext.h"
58#include "sp_head.h"
59
60/* max size of the log message */
61#define MAX_LOG_BUFFER_SIZE 1024
62#define MAX_TIME_SIZE 32
63#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
64/* Truncate cache log files bigger than this */
65#define CACHE_FILE_TRUNC_SIZE 65536
66
67#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
68
69handlerton *binlog_hton;
70LOGGER logger;
71
72const char *log_bin_index= 0;
73const char *log_bin_basename= 0;
74
75MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
76
77static bool test_if_number(const char *str,
78 ulong *res, bool allow_wildcards);
79static int binlog_init(void *p);
80static int binlog_close_connection(handlerton *hton, THD *thd);
81static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
82static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
83static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
84 THD *thd);
85static int binlog_commit(handlerton *hton, THD *thd, bool all);
86static int binlog_rollback(handlerton *hton, THD *thd, bool all);
87static int binlog_prepare(handlerton *hton, THD *thd, bool all);
88static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
89
90static const LEX_CSTRING write_error_msg=
91 { STRING_WITH_LEN("error writing to the binary log") };
92
93static my_bool opt_optimize_thread_scheduling= TRUE;
94ulong binlog_checksum_options;
95#ifndef DBUG_OFF
96ulong opt_binlog_dbug_fsync_sleep= 0;
97#endif
98
99mysql_mutex_t LOCK_prepare_ordered;
100mysql_cond_t COND_prepare_ordered;
101mysql_mutex_t LOCK_after_binlog_sync;
102mysql_mutex_t LOCK_commit_ordered;
103
104static ulonglong binlog_status_var_num_commits;
105static ulonglong binlog_status_var_num_group_commits;
106static ulonglong binlog_status_group_commit_trigger_count;
107static ulonglong binlog_status_group_commit_trigger_lock_wait;
108static ulonglong binlog_status_group_commit_trigger_timeout;
109static char binlog_snapshot_file[FN_REFLEN];
110static ulonglong binlog_snapshot_position;
111
112static const char *fatal_log_error=
113 "Could not use %s for logging (error %d). "
114 "Turning logging off for the whole duration of the MariaDB server process. "
115 "To turn it on again: fix the cause, shutdown the MariaDB server and "
116 "restart it.";
117
118
119static SHOW_VAR binlog_status_vars_detail[]=
120{
121 {"commits",
122 (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
123 {"group_commits",
124 (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
125 {"group_commit_trigger_count",
126 (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
127 {"group_commit_trigger_lock_wait",
128 (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
129 {"group_commit_trigger_timeout",
130 (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
131 {"snapshot_file",
132 (char *)&binlog_snapshot_file, SHOW_CHAR},
133 {"snapshot_position",
134 (char *)&binlog_snapshot_position, SHOW_LONGLONG},
135 {NullS, NullS, SHOW_LONG}
136};
137
138/*
139 Variables for the binlog background thread.
140 Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
141 */
142static bool binlog_background_thread_started= false;
143static bool binlog_background_thread_stop= false;
144static MYSQL_BIN_LOG::xid_count_per_binlog *
145 binlog_background_thread_queue= NULL;
146
147static bool start_binlog_background_thread();
148
149static rpl_binlog_state rpl_global_gtid_binlog_state;
150
151void setup_log_handling()
152{
153 rpl_global_gtid_binlog_state.init();
154}
155
156
157/**
158 purge logs, master and slave sides both, related error code
159 convertor.
160 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
161
162 @param res an internal to purging routines error code
163
164 @return the user level error code ER_*
165*/
166uint purge_log_get_error_code(int res)
167{
168 uint errcode= 0;
169
170 switch (res) {
171 case 0: break;
172 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
173 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
174 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
175 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
176 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
177 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
178 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
179 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
180 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
181 }
182
183 return errcode;
184}
185
186/**
187 Silence all errors and warnings reported when performing a write
188 to a log table.
189 Errors and warnings are not reported to the client or SQL exception
190 handlers, so that the presence of logging does not interfere and affect
191 the logic of an application.
192*/
193class Silence_log_table_errors : public Internal_error_handler
194{
195 char m_message[MYSQL_ERRMSG_SIZE];
196public:
197 Silence_log_table_errors()
198 {
199 m_message[0]= '\0';
200 }
201
202 virtual ~Silence_log_table_errors() {}
203
204 virtual bool handle_condition(THD *thd,
205 uint sql_errno,
206 const char* sql_state,
207 Sql_condition::enum_warning_level *level,
208 const char* msg,
209 Sql_condition ** cond_hdl);
210 const char *message() const { return m_message; }
211};
212
213bool
214Silence_log_table_errors::handle_condition(THD *,
215 uint,
216 const char*,
217 Sql_condition::enum_warning_level*,
218 const char* msg,
219 Sql_condition ** cond_hdl)
220{
221 *cond_hdl= NULL;
222 strmake_buf(m_message, msg);
223 return TRUE;
224}
225
226sql_print_message_func sql_print_message_handlers[3] =
227{
228 sql_print_information,
229 sql_print_warning,
230 sql_print_error
231};
232
233
234/**
235 Create the name of the log file
236
237 @param[OUT] out a pointer to a new allocated name will go there
238 @param[IN] log_ext The extension for the file (e.g .log)
239 @param[IN] once whether to use malloc_once or a normal malloc.
240*/
241void make_default_log_name(char **out, const char* log_ext, bool once)
242{
243 char buff[FN_REFLEN+10];
244 fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
245 if (once)
246 *out= my_once_strdup(buff, MYF(MY_WME));
247 else
248 {
249 my_free(*out);
250 *out= my_strdup(buff, MYF(MY_WME));
251 }
252}
253
254
255/*
256 Helper classes to store non-transactional and transactional data
257 before copying it to the binary log.
258*/
259class binlog_cache_data
260{
261public:
262 binlog_cache_data(): m_pending(0), status(0),
263 before_stmt_pos(MY_OFF_T_UNDEF),
264 incident(FALSE), changes_to_non_trans_temp_table_flag(FALSE),
265 saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
266 ptr_binlog_cache_disk_use(0)
267 { }
268
269 ~binlog_cache_data()
270 {
271 DBUG_ASSERT(empty());
272 close_cached_file(&cache_log);
273 }
274
275 /*
276 Return 1 if there is no relevant entries in the cache
277
278 This is:
279 - Cache is empty
280 - There are row or critical (DDL?) events in the cache
281
282 The status test is needed to avoid writing entries with only
283 a table map entry, which would crash in do_apply_event() on the slave
284 as it assumes that there is always a row entry after a table map.
285 */
286 bool empty() const
287 {
288 return (pending() == NULL &&
289 (my_b_write_tell(&cache_log) == 0 ||
290 ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
291 }
292
293 Rows_log_event *pending() const
294 {
295 return m_pending;
296 }
297
298 void set_pending(Rows_log_event *const pending_arg)
299 {
300 m_pending= pending_arg;
301 }
302
303 void set_incident(void)
304 {
305 incident= TRUE;
306 }
307
308 bool has_incident(void)
309 {
310 return(incident);
311 }
312
313 void set_changes_to_non_trans_temp_table()
314 {
315 changes_to_non_trans_temp_table_flag= TRUE;
316 }
317
318 bool changes_to_non_trans_temp_table()
319 {
320 return (changes_to_non_trans_temp_table_flag);
321 }
322
323 void reset()
324 {
325 bool cache_was_empty= empty();
326 bool truncate_file= (cache_log.file != -1 &&
327 my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
328 truncate(0,1); // Forget what's in cache
329 if (!cache_was_empty)
330 compute_statistics();
331 if (truncate_file)
332 my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
333
334 changes_to_non_trans_temp_table_flag= FALSE;
335 status= 0;
336 incident= FALSE;
337 before_stmt_pos= MY_OFF_T_UNDEF;
338 DBUG_ASSERT(empty());
339 }
340
341 my_off_t get_byte_position() const
342 {
343 return my_b_tell(&cache_log);
344 }
345
346 my_off_t get_prev_position()
347 {
348 return(before_stmt_pos);
349 }
350
351 void set_prev_position(my_off_t pos)
352 {
353 before_stmt_pos= pos;
354 }
355
356 void restore_prev_position()
357 {
358 truncate(before_stmt_pos);
359 }
360
361 void restore_savepoint(my_off_t pos)
362 {
363 truncate(pos);
364 if (pos < before_stmt_pos)
365 before_stmt_pos= MY_OFF_T_UNDEF;
366 }
367
368 void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
369 ulong *param_ptr_binlog_cache_use,
370 ulong *param_ptr_binlog_cache_disk_use)
371 {
372 /*
373 The assertions guarantee that the set_binlog_cache_info is
374 called just once and information passed as parameters are
375 never zero.
376
377 This is done while calling the constructor binlog_cache_mngr.
378 We cannot set informaton in the constructor binlog_cache_data
379 because the space for binlog_cache_mngr is allocated through
380 a placement new.
381
382 In the future, we can refactor this and change it to avoid
383 the set_binlog_info.
384 */
385 DBUG_ASSERT(saved_max_binlog_cache_size == 0 &&
386 param_max_binlog_cache_size != 0 &&
387 ptr_binlog_cache_use == 0 &&
388 param_ptr_binlog_cache_use != 0 &&
389 ptr_binlog_cache_disk_use == 0 &&
390 param_ptr_binlog_cache_disk_use != 0);
391
392 saved_max_binlog_cache_size= param_max_binlog_cache_size;
393 ptr_binlog_cache_use= param_ptr_binlog_cache_use;
394 ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
395 cache_log.end_of_file= saved_max_binlog_cache_size;
396 }
397
398 void add_status(enum_logged_status status_arg)
399 {
400 status|= status_arg;
401 }
402
403 /*
404 Cache to store data before copying it to the binary log.
405 */
406 IO_CACHE cache_log;
407
408private:
409 /*
410 Pending binrows event. This event is the event where the rows are currently
411 written.
412 */
413 Rows_log_event *m_pending;
414
415 /*
416 Bit flags for what has been writting to cache. Used to
417 discard logs without any data changes.
418 see enum_logged_status;
419 */
420 uint32 status;
421
422 /*
423 Binlog position before the start of the current statement.
424 */
425 my_off_t before_stmt_pos;
426
427 /*
428 This indicates that some events did not get into the cache and most likely
429 it is corrupted.
430 */
431 bool incident;
432
433 /*
434 This flag indicates if the cache has changes to temporary tables.
435 @TODO This a temporary fix and should be removed after BUG#54562.
436 */
437 bool changes_to_non_trans_temp_table_flag;
438
439 /**
440 This function computes binlog cache and disk usage.
441 */
442 void compute_statistics()
443 {
444 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
445 if (cache_log.disk_writes != 0)
446 {
447#ifdef REAL_STATISTICS
448 statistic_add(*ptr_binlog_cache_disk_use,
449 cache_log.disk_writes, &LOCK_status);
450#else
451 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
452#endif
453 cache_log.disk_writes= 0;
454 }
455 }
456
457 /*
458 Stores the values of maximum size of the cache allowed when this cache
459 is configured. This corresponds to either
460 . max_binlog_cache_size or max_binlog_stmt_cache_size.
461 */
462 my_off_t saved_max_binlog_cache_size;
463
464 /*
465 Stores a pointer to the status variable that keeps track of the in-memory
466 cache usage. This corresponds to either
467 . binlog_cache_use or binlog_stmt_cache_use.
468 */
469 ulong *ptr_binlog_cache_use;
470
471 /*
472 Stores a pointer to the status variable that keeps track of the disk
473 cache usage. This corresponds to either
474 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
475 */
476 ulong *ptr_binlog_cache_disk_use;
477
478 /*
479 It truncates the cache to a certain position. This includes deleting the
480 pending event.
481 */
482 void truncate(my_off_t pos, bool reset_cache=0)
483 {
484 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
485 if (pending())
486 {
487 delete pending();
488 set_pending(0);
489 }
490 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
491 cache_log.end_of_file= saved_max_binlog_cache_size;
492 }
493
494 binlog_cache_data& operator=(const binlog_cache_data& info);
495 binlog_cache_data(const binlog_cache_data& info);
496};
497
498
499void Log_event_writer::add_status(enum_logged_status status)
500{
501 if (likely(cache_data))
502 cache_data->add_status(status);
503}
504
505class binlog_cache_mngr {
506public:
507 binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
508 my_off_t param_max_binlog_cache_size,
509 ulong *param_ptr_binlog_stmt_cache_use,
510 ulong *param_ptr_binlog_stmt_cache_disk_use,
511 ulong *param_ptr_binlog_cache_use,
512 ulong *param_ptr_binlog_cache_disk_use)
513 : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
514 {
515 stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
516 param_ptr_binlog_stmt_cache_use,
517 param_ptr_binlog_stmt_cache_disk_use);
518 trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
519 param_ptr_binlog_cache_use,
520 param_ptr_binlog_cache_disk_use);
521 last_commit_pos_file[0]= 0;
522 }
523
524 void reset(bool do_stmt, bool do_trx)
525 {
526 if (do_stmt)
527 stmt_cache.reset();
528 if (do_trx)
529 {
530 trx_cache.reset();
531 using_xa= FALSE;
532 last_commit_pos_file[0]= 0;
533 last_commit_pos_offset= 0;
534 }
535 }
536
537 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
538 {
539 return (is_transactional ? &trx_cache : &stmt_cache);
540 }
541
542 IO_CACHE* get_binlog_cache_log(bool is_transactional)
543 {
544 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
545 }
546
547 binlog_cache_data stmt_cache;
548
549 binlog_cache_data trx_cache;
550
551 /*
552 Binlog position for current transaction.
553 For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
554 position corresponding to the snapshot taken. During (and after) commit,
555 this is set to the binlog position corresponding to just after the
556 commit (so storage engines can store it in their transaction log).
557 */
558 char last_commit_pos_file[FN_REFLEN];
559 my_off_t last_commit_pos_offset;
560
561 /*
562 Flag set true if this transaction is committed with log_xid() as part of
563 XA, false if not.
564 */
565 bool using_xa;
566 my_xid xa_xid;
567 bool need_unlog;
568 /*
569 Id of binlog that transaction was written to; only needed if need_unlog is
570 true.
571 */
572 ulong binlog_id;
573 /* Set if we get an error during commit that must be returned from unlog(). */
574 bool delayed_error;
575
576private:
577
578 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
579 binlog_cache_mngr(const binlog_cache_mngr& info);
580};
581
582bool LOGGER::is_log_table_enabled(uint log_table_type)
583{
584 switch (log_table_type) {
585 case QUERY_LOG_SLOW:
586 return (table_log_handler != NULL) && global_system_variables.sql_log_slow;
587 case QUERY_LOG_GENERAL:
588 return (table_log_handler != NULL) && opt_log ;
589 default:
590 DBUG_ASSERT(0);
591 return FALSE; /* make compiler happy */
592 }
593}
594
595/**
596 Check if a given table is opened log table
597
598 @param table Table to check
599 @param check_if_opened Only fail if it's a log table in use
600 @param error_msg String to put in error message if not ok.
601 No error message if 0
602 @return 0 ok
603 @return # Type of log file
604 */
605
606int check_if_log_table(const TABLE_LIST *table,
607 bool check_if_opened,
608 const char *error_msg)
609{
610 int result= 0;
611 if (table->db.length == 5 &&
612 !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
613 {
614 const char *table_name= table->table_name.str;
615
616 if (table->table_name.length == 11 &&
617 !my_strcasecmp(table_alias_charset, table_name, "general_log"))
618 {
619 result= QUERY_LOG_GENERAL;
620 goto end;
621 }
622
623 if (table->table_name.length == 8 &&
624 !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
625 {
626 result= QUERY_LOG_SLOW;
627 goto end;
628 }
629 }
630 return 0;
631
632end:
633 if (!check_if_opened || logger.is_log_table_enabled(result))
634 {
635 if (error_msg)
636 my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
637 return result;
638 }
639 return 0;
640}
641
642
643Log_to_csv_event_handler::Log_to_csv_event_handler()
644{
645}
646
647
648Log_to_csv_event_handler::~Log_to_csv_event_handler()
649{
650}
651
652
653void Log_to_csv_event_handler::cleanup()
654{
655 logger.is_log_tables_initialized= FALSE;
656}
657
658/* log event handlers */
659
660/**
661 Log command to the general log table
662
663 Log given command to the general log table.
664
665 @param event_time command start timestamp
666 @param user_host the pointer to the string with user@host info
667 @param user_host_len length of the user_host string. this is computed
668 once and passed to all general log event handlers
669 @param thread_id Id of the thread, issued a query
670 @param command_type the type of the command being logged
671 @param command_type_len the length of the string above
672 @param sql_text the very text of the query being executed
673 @param sql_text_len the length of sql_text string
674
675
676 @return This function attempts to never call my_error(). This is
677 necessary, because general logging happens already after a statement
678 status has been sent to the client, so the client can not see the
679 error anyway. Besides, the error is not related to the statement
680 being executed and is internal, and thus should be handled
681 internally (@todo: how?).
682 If a write to the table has failed, the function attempts to
683 write to a short error message to the file. The failure is also
684 indicated in the return value.
685
686 @retval FALSE OK
687 @retval TRUE error occurred
688*/
689
690bool Log_to_csv_event_handler::
691 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
692 const char *command_type, size_t command_type_len,
693 const char *sql_text, size_t sql_text_len,
694 CHARSET_INFO *client_cs)
695{
696 TABLE_LIST table_list;
697 TABLE *table;
698 bool result= TRUE;
699 bool need_close= FALSE;
700 bool need_pop= FALSE;
701 bool need_rnd_end= FALSE;
702 uint field_index;
703 Silence_log_table_errors error_handler;
704 Open_tables_backup open_tables_backup;
705 ulonglong save_thd_options;
706 bool save_time_zone_used;
707 DBUG_ENTER("log_general");
708
709 /*
710 CSV uses TIME_to_timestamp() internally if table needs to be repaired
711 which will set thd->time_zone_used
712 */
713 save_time_zone_used= thd->time_zone_used;
714
715 save_thd_options= thd->variables.option_bits;
716 thd->variables.option_bits&= ~OPTION_BIN_LOG;
717
718 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
719 TL_WRITE_CONCURRENT_INSERT);
720
721 /*
722 1) open_log_table generates an error of the
723 table can not be opened or is corrupted.
724 2) "INSERT INTO general_log" can generate warning sometimes.
725
726 Suppress these warnings and errors, they can't be dealt with
727 properly anyway.
728
729 QQ: this problem needs to be studied in more detail.
730 Comment this 2 lines and run "cast.test" to see what's happening.
731 */
732 thd->push_internal_handler(& error_handler);
733 need_pop= TRUE;
734
735 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
736 goto err;
737
738 need_close= TRUE;
739
740 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
741 table->file->ha_rnd_init_with_error(0))
742 goto err;
743
744 need_rnd_end= TRUE;
745
746 /* Honor next number columns if present */
747 table->next_number_field= table->found_next_number_field;
748
749 /*
750 NOTE: we do not call restore_record() here, as all fields are
751 filled by the Logger (=> no need to load default ones).
752 */
753
754 /*
755 We do not set a value for table->field[0], as it will use
756 default value (which is CURRENT_TIMESTAMP).
757 */
758
759 /* check that all columns exist */
760 if (table->s->fields < 6)
761 goto err;
762
763 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
764
765 ((Field_timestamp*) table->field[0])->store_TIME(
766 hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
767
768 /* do a write */
769 if (table->field[1]->store(user_host, user_host_len, client_cs) ||
770 table->field[2]->store((longlong) thread_id_arg, TRUE) ||
771 table->field[3]->store((longlong) global_system_variables.server_id,
772 TRUE) ||
773 table->field[4]->store(command_type, command_type_len, client_cs))
774 goto err;
775
776 /*
777 A positive return value in store() means truncation.
778 Still logging a message in the log in this case.
779 */
780 table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
781 if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
782 goto err;
783
784 /* mark all fields as not null */
785 table->field[1]->set_notnull();
786 table->field[2]->set_notnull();
787 table->field[3]->set_notnull();
788 table->field[4]->set_notnull();
789 table->field[5]->set_notnull();
790
791 /* Set any extra columns to their default values */
792 for (field_index= 6 ; field_index < table->s->fields ; field_index++)
793 {
794 table->field[field_index]->set_default();
795 }
796
797 /* log table entries are not replicated */
798 if (table->file->ha_write_row(table->record[0]))
799 goto err;
800
801 result= FALSE;
802
803err:
804 if (result && !thd->killed)
805 sql_print_error("Failed to write to mysql.general_log: %s",
806 error_handler.message());
807
808 if (need_rnd_end)
809 {
810 table->file->ha_rnd_end();
811 table->file->ha_release_auto_increment();
812 }
813 if (need_pop)
814 thd->pop_internal_handler();
815 if (need_close)
816 close_log_table(thd, &open_tables_backup);
817
818 thd->variables.option_bits= save_thd_options;
819 thd->time_zone_used= save_time_zone_used;
820 DBUG_RETURN(result);
821}
822
823
824/*
825 Log a query to the slow log table
826
827 SYNOPSIS
828 log_slow()
829 thd THD of the query
830 current_time current timestamp
831 user_host the pointer to the string with user@host info
832 user_host_len length of the user_host string. this is computed once
833 and passed to all general log event handlers
834 query_time Amount of time the query took to execute (in microseconds)
835 lock_time Amount of time the query was locked (in microseconds)
836 is_command The flag, which determines, whether the sql_text is a
837 query or an administrator command (these are treated
838 differently by the old logging routines)
839 sql_text the very text of the query or administrator command
840 processed
841 sql_text_len the length of sql_text string
842
843 DESCRIPTION
844
845 Log a query to the slow log table
846
847 RETURN
848 FALSE - OK
849 TRUE - error occurred
850*/
851
852bool Log_to_csv_event_handler::
853 log_slow(THD *thd, my_hrtime_t current_time,
854 const char *user_host, size_t user_host_len,
855 ulonglong query_utime, ulonglong lock_utime, bool is_command,
856 const char *sql_text, size_t sql_text_len)
857{
858 TABLE_LIST table_list;
859 TABLE *table;
860 bool result= TRUE;
861 bool need_close= FALSE;
862 bool need_rnd_end= FALSE;
863 Silence_log_table_errors error_handler;
864 Open_tables_backup open_tables_backup;
865 CHARSET_INFO *client_cs= thd->variables.character_set_client;
866 bool save_time_zone_used;
867 long query_time= (long) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
868 long lock_time= (long) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
869 long query_time_micro= (long) (query_utime % 1000000);
870 long lock_time_micro= (long) (lock_utime % 1000000);
871
872 DBUG_ENTER("Log_to_csv_event_handler::log_slow");
873
874 thd->push_internal_handler(& error_handler);
875 /*
876 CSV uses TIME_to_timestamp() internally if table needs to be repaired
877 which will set thd->time_zone_used
878 */
879 save_time_zone_used= thd->time_zone_used;
880
881 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
882 TL_WRITE_CONCURRENT_INSERT);
883
884 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
885 goto err;
886
887 need_close= TRUE;
888
889 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
890 table->file->ha_rnd_init_with_error(0))
891 goto err;
892
893 need_rnd_end= TRUE;
894
895 /* Honor next number columns if present */
896 table->next_number_field= table->found_next_number_field;
897
898 restore_record(table, s->default_values); // Get empty record
899
900 /* check that all columns exist */
901 if (table->s->fields < 13)
902 goto err;
903
904 /* store the time and user values */
905 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
906 ((Field_timestamp*) table->field[0])->store_TIME(
907 hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
908 if (table->field[1]->store(user_host, user_host_len, client_cs))
909 goto err;
910
911 /*
912 A TIME field can not hold the full longlong range; query_time or
913 lock_time may be truncated without warning here, if greater than
914 839 hours (~35 days)
915 */
916 MYSQL_TIME t;
917 t.neg= 0;
918
919 /* fill in query_time field */
920 calc_time_from_sec(&t, query_time, query_time_micro);
921 if (table->field[2]->store_time(&t))
922 goto err;
923 /* lock_time */
924 calc_time_from_sec(&t, lock_time, lock_time_micro);
925 if (table->field[3]->store_time(&t))
926 goto err;
927 /* rows_sent */
928 if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
929 goto err;
930 /* rows_examined */
931 if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
932 goto err;
933
934 /* fill database field */
935 if (thd->db.str)
936 {
937 if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
938 goto err;
939 table->field[6]->set_notnull();
940 }
941
942 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
943 {
944 if (table->
945 field[7]->store((longlong)
946 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
947 TRUE))
948 goto err;
949 table->field[7]->set_notnull();
950 }
951
952 /*
953 Set value if we do an insert on autoincrement column. Note that for
954 some engines (those for which get_auto_increment() does not leave a
955 table lock until the statement ends), this is just the first value and
956 the next ones used may not be contiguous to it.
957 */
958 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
959 {
960 if (table->
961 field[8]->store((longlong)
962 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
963 goto err;
964 table->field[8]->set_notnull();
965 }
966
967 if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
968 goto err;
969 table->field[9]->set_notnull();
970
971 /*
972 Column sql_text.
973 A positive return value in store() means truncation.
974 Still logging a message in the log in this case.
975 */
976 if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
977 goto err;
978
979 if (table->field[11]->store((longlong) thd->thread_id, TRUE))
980 goto err;
981
982 /* Rows_affected */
983 if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
984 (longlong) thd->get_stmt_da()->affected_rows() :
985 0, TRUE))
986 goto err;
987
988 /* log table entries are not replicated */
989 if (table->file->ha_write_row(table->record[0]))
990 goto err;
991
992 result= FALSE;
993
994err:
995 thd->pop_internal_handler();
996
997 if (result && !thd->killed)
998 sql_print_error("Failed to write to mysql.slow_log: %s",
999 error_handler.message());
1000
1001 if (need_rnd_end)
1002 {
1003 table->file->ha_rnd_end();
1004 table->file->ha_release_auto_increment();
1005 }
1006 if (need_close)
1007 close_log_table(thd, &open_tables_backup);
1008 thd->time_zone_used= save_time_zone_used;
1009 DBUG_RETURN(result);
1010}
1011
1012int Log_to_csv_event_handler::
1013 activate_log(THD *thd, uint log_table_type)
1014{
1015 TABLE_LIST table_list;
1016 TABLE *table;
1017 LEX_CSTRING *UNINIT_VAR(log_name);
1018 int result;
1019 Open_tables_backup open_tables_backup;
1020
1021 DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1022
1023 if (log_table_type == QUERY_LOG_GENERAL)
1024 {
1025 log_name= &GENERAL_LOG_NAME;
1026 }
1027 else
1028 {
1029 DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1030
1031 log_name= &SLOW_LOG_NAME;
1032 }
1033 table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1034
1035 table= open_log_table(thd, &table_list, &open_tables_backup);
1036 if (table)
1037 {
1038 result= 0;
1039 close_log_table(thd, &open_tables_backup);
1040 }
1041 else
1042 result= 1;
1043
1044 DBUG_RETURN(result);
1045}
1046
1047bool Log_to_csv_event_handler::
1048 log_error(enum loglevel level, const char *format, va_list args)
1049{
1050 /* No log table is implemented */
1051 DBUG_ASSERT(0);
1052 return FALSE;
1053}
1054
1055bool Log_to_file_event_handler::
1056 log_error(enum loglevel level, const char *format,
1057 va_list args)
1058{
1059 return vprint_msg_to_log(level, format, args);
1060}
1061
1062void Log_to_file_event_handler::init_pthread_objects()
1063{
1064 mysql_log.init_pthread_objects();
1065 mysql_slow_log.init_pthread_objects();
1066}
1067
1068
1069/** Wrapper around MYSQL_LOG::write() for slow log. */
1070
1071bool Log_to_file_event_handler::
1072 log_slow(THD *thd, my_hrtime_t current_time,
1073 const char *user_host, size_t user_host_len,
1074 ulonglong query_utime, ulonglong lock_utime, bool is_command,
1075 const char *sql_text, size_t sql_text_len)
1076{
1077 Silence_log_table_errors error_handler;
1078 thd->push_internal_handler(&error_handler);
1079 bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1080 user_host, user_host_len,
1081 query_utime, lock_utime, is_command,
1082 sql_text, sql_text_len);
1083 thd->pop_internal_handler();
1084 return retval;
1085}
1086
1087
1088/**
1089 Wrapper around MYSQL_LOG::write() for general log. We need it since we
1090 want all log event handlers to have the same signature.
1091*/
1092
1093bool Log_to_file_event_handler::
1094 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1095 const char *command_type, size_t command_type_len,
1096 const char *sql_text, size_t sql_text_len,
1097 CHARSET_INFO *client_cs)
1098{
1099 Silence_log_table_errors error_handler;
1100 thd->push_internal_handler(&error_handler);
1101 bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1102 user_host_len,
1103 thread_id_arg, command_type, command_type_len,
1104 sql_text, sql_text_len);
1105 thd->pop_internal_handler();
1106 return retval;
1107}
1108
1109
1110bool Log_to_file_event_handler::init()
1111{
1112 if (!is_initialized)
1113 {
1114 if (global_system_variables.sql_log_slow)
1115 mysql_slow_log.open_slow_log(opt_slow_logname);
1116
1117 if (opt_log)
1118 mysql_log.open_query_log(opt_logname);
1119
1120 is_initialized= TRUE;
1121 }
1122
1123 return FALSE;
1124}
1125
1126
1127void Log_to_file_event_handler::cleanup()
1128{
1129 mysql_log.cleanup();
1130 mysql_slow_log.cleanup();
1131}
1132
1133void Log_to_file_event_handler::flush()
1134{
1135 /* reopen log files */
1136 if (opt_log)
1137 mysql_log.reopen_file();
1138 if (global_system_variables.sql_log_slow)
1139 mysql_slow_log.reopen_file();
1140}
1141
1142/*
1143 Log error with all enabled log event handlers
1144
1145 SYNOPSIS
1146 error_log_print()
1147
1148 level The level of the error significance: NOTE,
1149 WARNING or ERROR.
1150 format format string for the error message
1151 args list of arguments for the format string
1152
1153 RETURN
1154 FALSE - OK
1155 TRUE - error occurred
1156*/
1157
1158bool LOGGER::error_log_print(enum loglevel level, const char *format,
1159 va_list args)
1160{
1161 bool error= FALSE;
1162 Log_event_handler **current_handler;
1163
1164 /* currently we don't need locking here as there is no error_log table */
1165 for (current_handler= error_log_handler_list ; *current_handler ;)
1166 error= (*current_handler++)->log_error(level, format, args) || error;
1167
1168 return error;
1169}
1170
1171
1172void LOGGER::cleanup_base()
1173{
1174 DBUG_ASSERT(inited == 1);
1175 mysql_rwlock_destroy(&LOCK_logger);
1176 if (table_log_handler)
1177 {
1178 table_log_handler->cleanup();
1179 delete table_log_handler;
1180 table_log_handler= NULL;
1181 }
1182 if (file_log_handler)
1183 file_log_handler->cleanup();
1184}
1185
1186
1187void LOGGER::cleanup_end()
1188{
1189 DBUG_ASSERT(inited == 1);
1190 if (file_log_handler)
1191 {
1192 delete file_log_handler;
1193 file_log_handler=NULL;
1194 }
1195 inited= 0;
1196}
1197
1198
1199/**
1200 Perform basic log initialization: create file-based log handler and
1201 init error log.
1202*/
1203void LOGGER::init_base()
1204{
1205 DBUG_ASSERT(inited == 0);
1206 inited= 1;
1207
1208 /*
1209 Here we create file log handler. We don't do it for the table log handler
1210 here as it cannot be created so early. The reason is THD initialization,
1211 which depends on the system variables (parsed later).
1212 */
1213 if (!file_log_handler)
1214 file_log_handler= new Log_to_file_event_handler;
1215
1216 /* by default we use traditional error log */
1217 init_error_log(LOG_FILE);
1218
1219 file_log_handler->init_pthread_objects();
1220 mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1221}
1222
1223
1224void LOGGER::init_log_tables()
1225{
1226 if (!table_log_handler)
1227 table_log_handler= new Log_to_csv_event_handler;
1228
1229 if (!is_log_tables_initialized &&
1230 !table_log_handler->init() && !file_log_handler->init())
1231 is_log_tables_initialized= TRUE;
1232}
1233
1234
1235/**
1236 Close and reopen the slow log (with locks).
1237
1238 @returns FALSE.
1239*/
1240bool LOGGER::flush_slow_log()
1241{
1242 /*
1243 Now we lock logger, as nobody should be able to use logging routines while
1244 log tables are closed
1245 */
1246 logger.lock_exclusive();
1247
1248 /* Reopen slow log file */
1249 if (global_system_variables.sql_log_slow)
1250 file_log_handler->get_mysql_slow_log()->reopen_file();
1251
1252 /* End of log flush */
1253 logger.unlock();
1254
1255 return 0;
1256}
1257
1258
1259/**
1260 Close and reopen the general log (with locks).
1261
1262 @returns FALSE.
1263*/
1264bool LOGGER::flush_general_log()
1265{
1266 /*
1267 Now we lock logger, as nobody should be able to use logging routines while
1268 log tables are closed
1269 */
1270 logger.lock_exclusive();
1271
1272 /* Reopen general log file */
1273 if (opt_log)
1274 file_log_handler->get_mysql_log()->reopen_file();
1275
1276 /* End of log flush */
1277 logger.unlock();
1278
1279 return 0;
1280}
1281
1282
1283/*
1284 Log slow query with all enabled log event handlers
1285
1286 SYNOPSIS
1287 slow_log_print()
1288
1289 thd THD of the query being logged
1290 query The query being logged
1291 query_length The length of the query string
1292 current_utime Current time in microseconds (from undefined start)
1293
1294 RETURN
1295 FALSE OK
1296 TRUE error occurred
1297*/
1298
1299bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1300 ulonglong current_utime)
1301
1302{
1303 bool error= FALSE;
1304 Log_event_handler **current_handler;
1305 bool is_command= FALSE;
1306 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1307 Security_context *sctx= thd->security_ctx;
1308 uint user_host_len= 0;
1309 ulonglong query_utime, lock_utime;
1310
1311 DBUG_ASSERT(thd->enable_slow_log);
1312 /*
1313 Print the message to the buffer if we have slow log enabled
1314 */
1315
1316 if (*slow_log_handler_list)
1317 {
1318 /* do not log slow queries from replication threads */
1319 if (!thd->variables.sql_log_slow)
1320 return 0;
1321
1322 lock_shared();
1323 if (!global_system_variables.sql_log_slow)
1324 {
1325 unlock();
1326 return 0;
1327 }
1328
1329 /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1330 user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1331 sctx->priv_user, "[",
1332 sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1333 sctx->host ? sctx->host : "", " [",
1334 sctx->ip ? sctx->ip : "", "]", NullS) -
1335 user_host_buff);
1336
1337 DBUG_ASSERT(thd->start_utime);
1338 DBUG_ASSERT(thd->start_time);
1339 query_utime= (current_utime - thd->start_utime);
1340 lock_utime= (thd->utime_after_lock - thd->start_utime);
1341 my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1342 thd->start_time_sec_part + query_utime };
1343
1344 if (!query)
1345 {
1346 is_command= TRUE;
1347 query= command_name[thd->get_command()].str;
1348 query_length= (uint)command_name[thd->get_command()].length;
1349 }
1350
1351 for (current_handler= slow_log_handler_list; *current_handler ;)
1352 error= (*current_handler++)->log_slow(thd, current_time,
1353 user_host_buff, user_host_len,
1354 query_utime, lock_utime, is_command,
1355 query, query_length) || error;
1356
1357 unlock();
1358 }
1359 return error;
1360}
1361
1362bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1363 const char *query, size_t query_length)
1364{
1365 bool error= FALSE;
1366 Log_event_handler **current_handler= general_log_handler_list;
1367 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1368 uint user_host_len= 0;
1369 my_hrtime_t current_time;
1370
1371 DBUG_ASSERT(thd);
1372
1373 user_host_len= make_user_name(thd, user_host_buff);
1374
1375 current_time= my_hrtime();
1376
1377 mysql_audit_general_log(thd, hrtime_to_time(current_time),
1378 user_host_buff, user_host_len,
1379 command_name[(uint) command].str,
1380 (uint)command_name[(uint) command].length,
1381 query, (uint)query_length);
1382
1383 if (opt_log && log_command(thd, command))
1384 {
1385 lock_shared();
1386 while (*current_handler)
1387 error|= (*current_handler++)->
1388 log_general(thd, current_time, user_host_buff,
1389 user_host_len, thd->thread_id,
1390 command_name[(uint) command].str,
1391 command_name[(uint) command].length,
1392 query, query_length,
1393 thd->variables.character_set_client) || error;
1394 unlock();
1395 }
1396
1397 return error;
1398}
1399
1400bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1401 const char *format, va_list args)
1402{
1403 size_t message_buff_len= 0;
1404 char message_buff[MAX_LOG_BUFFER_SIZE];
1405
1406 /* prepare message */
1407 if (format)
1408 message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1409 format, args);
1410 else
1411 message_buff[0]= '\0';
1412
1413 return general_log_write(thd, command, message_buff, message_buff_len);
1414}
1415
1416void LOGGER::init_error_log(ulonglong error_log_printer)
1417{
1418 if (error_log_printer & LOG_NONE)
1419 {
1420 error_log_handler_list[0]= 0;
1421 return;
1422 }
1423
1424 switch (error_log_printer) {
1425 case LOG_FILE:
1426 error_log_handler_list[0]= file_log_handler;
1427 error_log_handler_list[1]= 0;
1428 break;
1429 /* these two are disabled for now */
1430 case LOG_TABLE:
1431 DBUG_ASSERT(0);
1432 break;
1433 case LOG_TABLE|LOG_FILE:
1434 DBUG_ASSERT(0);
1435 break;
1436 }
1437}
1438
1439void LOGGER::init_slow_log(ulonglong slow_log_printer)
1440{
1441 if (slow_log_printer & LOG_NONE)
1442 {
1443 slow_log_handler_list[0]= 0;
1444 return;
1445 }
1446
1447 switch (slow_log_printer) {
1448 case LOG_FILE:
1449 slow_log_handler_list[0]= file_log_handler;
1450 slow_log_handler_list[1]= 0;
1451 break;
1452 case LOG_TABLE:
1453 slow_log_handler_list[0]= table_log_handler;
1454 slow_log_handler_list[1]= 0;
1455 break;
1456 case LOG_TABLE|LOG_FILE:
1457 slow_log_handler_list[0]= file_log_handler;
1458 slow_log_handler_list[1]= table_log_handler;
1459 slow_log_handler_list[2]= 0;
1460 break;
1461 }
1462}
1463
1464void LOGGER::init_general_log(ulonglong general_log_printer)
1465{
1466 if (general_log_printer & LOG_NONE)
1467 {
1468 general_log_handler_list[0]= 0;
1469 return;
1470 }
1471
1472 switch (general_log_printer) {
1473 case LOG_FILE:
1474 general_log_handler_list[0]= file_log_handler;
1475 general_log_handler_list[1]= 0;
1476 break;
1477 case LOG_TABLE:
1478 general_log_handler_list[0]= table_log_handler;
1479 general_log_handler_list[1]= 0;
1480 break;
1481 case LOG_TABLE|LOG_FILE:
1482 general_log_handler_list[0]= file_log_handler;
1483 general_log_handler_list[1]= table_log_handler;
1484 general_log_handler_list[2]= 0;
1485 break;
1486 }
1487}
1488
1489
1490bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1491{
1492 MYSQL_QUERY_LOG *file_log;
1493 bool res= FALSE;
1494 lock_exclusive();
1495 switch (log_type) {
1496 case QUERY_LOG_SLOW:
1497 if (!global_system_variables.sql_log_slow)
1498 {
1499 file_log= file_log_handler->get_mysql_slow_log();
1500
1501 file_log->open_slow_log(opt_slow_logname);
1502 if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1503 {
1504 /* Error printed by open table in activate_log() */
1505 res= TRUE;
1506 file_log->close(0);
1507 }
1508 else
1509 {
1510 init_slow_log(log_output_options);
1511 global_system_variables.sql_log_slow= TRUE;
1512 }
1513 }
1514 break;
1515 case QUERY_LOG_GENERAL:
1516 if (!opt_log)
1517 {
1518 file_log= file_log_handler->get_mysql_log();
1519
1520 file_log->open_query_log(opt_logname);
1521 if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1522 {
1523 /* Error printed by open table in activate_log() */
1524 res= TRUE;
1525 file_log->close(0);
1526 }
1527 else
1528 {
1529 init_general_log(log_output_options);
1530 opt_log= TRUE;
1531 }
1532 }
1533 break;
1534 default:
1535 DBUG_ASSERT(0);
1536 }
1537 unlock();
1538 return res;
1539}
1540
1541
1542void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1543{
1544 my_bool *tmp_opt= 0;
1545 MYSQL_LOG *UNINIT_VAR(file_log);
1546
1547 switch (log_type) {
1548 case QUERY_LOG_SLOW:
1549 tmp_opt= &global_system_variables.sql_log_slow;
1550 file_log= file_log_handler->get_mysql_slow_log();
1551 break;
1552 case QUERY_LOG_GENERAL:
1553 tmp_opt= &opt_log;
1554 file_log= file_log_handler->get_mysql_log();
1555 break;
1556 default:
1557 MY_ASSERT_UNREACHABLE();
1558 }
1559
1560 if (!(*tmp_opt))
1561 return;
1562
1563 lock_exclusive();
1564 file_log->close(0);
1565 *tmp_opt= FALSE;
1566 unlock();
1567}
1568
1569
1570/* the parameters are unused for the log tables */
1571bool Log_to_csv_event_handler::init()
1572{
1573 return 0;
1574}
1575
1576int LOGGER::set_handlers(ulonglong error_log_printer,
1577 ulonglong slow_log_printer,
1578 ulonglong general_log_printer)
1579{
1580 /* error log table is not supported yet */
1581 DBUG_ASSERT(error_log_printer < LOG_TABLE);
1582
1583 lock_exclusive();
1584
1585 if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1586 !is_log_tables_initialized)
1587 {
1588 slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1589 general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1590
1591 sql_print_error("Failed to initialize log tables. "
1592 "Falling back to the old-fashioned logs");
1593 }
1594
1595 init_error_log(error_log_printer);
1596 init_slow_log(slow_log_printer);
1597 init_general_log(general_log_printer);
1598
1599 unlock();
1600
1601 return 0;
1602}
1603
1604 /*
1605 Save position of binary log transaction cache.
1606
1607 SYNPOSIS
1608 binlog_trans_log_savepos()
1609
1610 thd The thread to take the binlog data from
1611 pos Pointer to variable where the position will be stored
1612
1613 DESCRIPTION
1614
1615 Save the current position in the binary log transaction cache into
1616 the variable pointed to by 'pos'
1617 */
1618
1619static void
1620binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1621{
1622 DBUG_ENTER("binlog_trans_log_savepos");
1623 DBUG_ASSERT(pos != NULL);
1624 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1625 DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1626 *pos= cache_mngr->trx_cache.get_byte_position();
1627 DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1628 DBUG_VOID_RETURN;
1629}
1630
1631
1632/*
1633 Truncate the binary log transaction cache.
1634
1635 SYNPOSIS
1636 binlog_trans_log_truncate()
1637
1638 thd The thread to take the binlog data from
1639 pos Position to truncate to
1640
1641 DESCRIPTION
1642
1643 Truncate the binary log to the given position. Will not change
1644 anything else.
1645
1646 */
1647static void
1648binlog_trans_log_truncate(THD *thd, my_off_t pos)
1649{
1650 DBUG_ENTER("binlog_trans_log_truncate");
1651 DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1652
1653 DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1654 /* Only true if binlog_trans_log_savepos() wasn't called before */
1655 DBUG_ASSERT(pos != ~(my_off_t) 0);
1656
1657 binlog_cache_mngr *const cache_mngr=
1658 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1659 cache_mngr->trx_cache.restore_savepoint(pos);
1660 DBUG_VOID_RETURN;
1661}
1662
1663
1664/*
1665 this function is mostly a placeholder.
1666 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1667 should be moved here.
1668*/
1669
1670int binlog_init(void *p)
1671{
1672 binlog_hton= (handlerton *)p;
1673 binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES
1674 : SHOW_OPTION_NO;
1675 binlog_hton->db_type=DB_TYPE_BINLOG;
1676 binlog_hton->savepoint_offset= sizeof(my_off_t);
1677 binlog_hton->close_connection= binlog_close_connection;
1678 binlog_hton->savepoint_set= binlog_savepoint_set;
1679 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1680 binlog_hton->savepoint_rollback_can_release_mdl=
1681 binlog_savepoint_rollback_can_release_mdl;
1682 binlog_hton->commit= binlog_commit;
1683 binlog_hton->rollback= binlog_rollback;
1684 binlog_hton->prepare= binlog_prepare;
1685 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1686 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1687 return 0;
1688}
1689
1690#ifdef WITH_WSREP
1691#include "wsrep_binlog.h"
1692#endif /* WITH_WSREP */
1693static int binlog_close_connection(handlerton *hton, THD *thd)
1694{
1695 DBUG_ENTER("binlog_close_connection");
1696 binlog_cache_mngr *const cache_mngr=
1697 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1698#ifdef WITH_WSREP
1699 if (cache_mngr && !cache_mngr->trx_cache.empty()) {
1700 IO_CACHE* cache= get_trans_log(thd);
1701 uchar *buf;
1702 size_t len=0;
1703 wsrep_write_cache_buf(cache, &buf, &len);
1704 WSREP_WARN("binlog trx cache not empty (%lu bytes) @ connection close %lld",
1705 (ulong) len, (longlong) thd->thread_id);
1706 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1707
1708 cache = cache_mngr->get_binlog_cache_log(false);
1709 wsrep_write_cache_buf(cache, &buf, &len);
1710 WSREP_WARN("binlog stmt cache not empty (%lu bytes) @ connection close %lld",
1711 (ulong) len, (longlong) thd->thread_id);
1712 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1713 }
1714#endif /* WITH_WSREP */
1715 DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
1716 thd_set_ha_data(thd, binlog_hton, NULL);
1717 cache_mngr->~binlog_cache_mngr();
1718 my_free(cache_mngr);
1719 DBUG_RETURN(0);
1720}
1721
1722/*
1723 This function flushes a cache upon commit/rollback.
1724
1725 SYNOPSIS
1726 binlog_flush_cache()
1727
1728 thd The thread whose transaction should be ended
1729 cache_mngr Pointer to the binlog_cache_mngr to use
1730 all True if the entire transaction should be ended, false if
1731 only the statement transaction should be ended.
1732 end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
1733 using_stmt True if the statement cache should be flushed
1734 using_trx True if the transaction cache should be flushed
1735
1736 DESCRIPTION
1737
1738 End the currently transaction or statement. The transaction can be either
1739 a real transaction or a statement transaction.
1740
1741 This can be to commit a transaction, with a COMMIT query event or an XA
1742 commit XID event. But it can also be to rollback a transaction with a
1743 ROLLBACK query event, used for rolling back transactions which also
1744 contain updates to non-transactional tables. Or it can be a flush of
1745 a statement cache.
1746 */
1747
1748static int
1749binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1750 Log_event *end_ev, bool all, bool using_stmt,
1751 bool using_trx)
1752{
1753 int error= 0;
1754 DBUG_ENTER("binlog_flush_cache");
1755 DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1756
1757 if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1758 (using_trx && !cache_mngr->trx_cache.empty()))
1759 {
1760 if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1761 DBUG_RETURN(1);
1762 if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1763 DBUG_RETURN(1);
1764
1765 /*
1766 Doing a commit or a rollback including non-transactional tables,
1767 i.e., ending a transaction where we might write the transaction
1768 cache to the binary log.
1769
1770 We can always end the statement when ending a transaction since
1771 transactions are not allowed inside stored functions. If they
1772 were, we would have to ensure that we're not ending a statement
1773 inside a stored function.
1774 */
1775 error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1776 end_ev, all,
1777 using_stmt, using_trx);
1778 }
1779 else
1780 {
1781 /*
1782 This can happen in row-format binlog with something like
1783 BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1784 The nontrans_table is written directly into the binlog before commit,
1785 and if the trans_table is ignored there will be no rows to write when
1786 we get here.
1787
1788 So there is no work to do. Therefore, we will not increment any XID
1789 count, so we must not decrement any XID count in unlog().
1790 */
1791 cache_mngr->need_unlog= 0;
1792 }
1793 cache_mngr->reset(using_stmt, using_trx);
1794
1795 DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
1796 (!using_trx || cache_mngr->trx_cache.empty()));
1797 DBUG_RETURN(error);
1798}
1799
1800
1801/**
1802 This function flushes the stmt-cache upon commit.
1803
1804 @param thd The thread whose transaction should be flushed
1805 @param cache_mngr Pointer to the cache manager
1806
1807 @return
1808 nonzero if an error pops up when flushing the cache.
1809*/
1810static inline int
1811binlog_commit_flush_stmt_cache(THD *thd, bool all,
1812 binlog_cache_mngr *cache_mngr)
1813{
1814 DBUG_ENTER("binlog_commit_flush_stmt_cache");
1815#ifdef WITH_WSREP
1816 if (thd->wsrep_mysql_replicated > 0)
1817 {
1818 DBUG_ASSERT(WSREP_ON);
1819 WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1820 thd->wsrep_mysql_replicated);
1821 return 0;
1822 }
1823#endif
1824
1825 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1826 FALSE, TRUE, TRUE, 0);
1827 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1828}
1829
1830/**
1831 This function flushes the trx-cache upon commit.
1832
1833 @param thd The thread whose transaction should be flushed
1834 @param cache_mngr Pointer to the cache manager
1835
1836 @return
1837 nonzero if an error pops up when flushing the cache.
1838*/
1839static inline int
1840binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1841{
1842 DBUG_ENTER("binlog_commit_flush_trx_cache");
1843 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1844 TRUE, TRUE, TRUE, 0);
1845 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1846}
1847
1848/**
1849 This function flushes the trx-cache upon rollback.
1850
1851 @param thd The thread whose transaction should be flushed
1852 @param cache_mngr Pointer to the cache manager
1853
1854 @return
1855 nonzero if an error pops up when flushing the cache.
1856*/
1857static inline int
1858binlog_rollback_flush_trx_cache(THD *thd, bool all,
1859 binlog_cache_mngr *cache_mngr)
1860{
1861 Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
1862 TRUE, TRUE, TRUE, 0);
1863 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1864}
1865
1866/**
1867 This function flushes the trx-cache upon commit.
1868
1869 @param thd The thread whose transaction should be flushed
1870 @param cache_mngr Pointer to the cache manager
1871 @param xid Transaction Id
1872
1873 @return
1874 nonzero if an error pops up when flushing the cache.
1875*/
1876static inline int
1877binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1878 bool all, my_xid xid)
1879{
1880 if (xid)
1881 {
1882 Xid_log_event end_evt(thd, xid, TRUE);
1883 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1884 }
1885 else
1886 {
1887 /*
1888 Empty xid occurs in XA COMMIT ... ONE PHASE.
1889 In this case, we do not have a MySQL xid for the transaction, and the
1890 external XA transaction coordinator will have to handle recovery if
1891 needed. So we end the transaction with a plain COMMIT query event.
1892 */
1893 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1894 TRUE, TRUE, TRUE, 0);
1895 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1896 }
1897}
1898
1899/**
1900 This function truncates the transactional cache upon committing or rolling
1901 back either a transaction or a statement.
1902
1903 @param thd The thread whose transaction should be flushed
1904 @param cache_mngr Pointer to the cache data to be flushed
1905 @param all @c true means truncate the transaction, otherwise the
1906 statement must be truncated.
1907
1908 @return
1909 nonzero if an error pops up when truncating the transactional cache.
1910*/
1911static int
1912binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1913{
1914 DBUG_ENTER("binlog_truncate_trx_cache");
1915 int error=0;
1916 /*
1917 This function handles transactional changes and as such this flag
1918 equals to true.
1919 */
1920 bool const is_transactional= TRUE;
1921
1922 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1923 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1924 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1925 all ? "all" : "stmt"));
1926
1927 thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1928 /*
1929 If rolling back an entire transaction or a single statement not
1930 inside a transaction, we reset the transaction cache.
1931 */
1932 if (ending_trans(thd, all))
1933 {
1934 if (cache_mngr->trx_cache.has_incident())
1935 error= mysql_bin_log.write_incident(thd);
1936
1937 thd->clear_binlog_table_maps();
1938
1939 cache_mngr->reset(false, true);
1940 }
1941 /*
1942 If rolling back a statement in a transaction, we truncate the
1943 transaction cache to remove the statement.
1944 */
1945 else
1946 cache_mngr->trx_cache.restore_prev_position();
1947
1948 DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1949 DBUG_RETURN(error);
1950}
1951
1952static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1953{
1954 /*
1955 do nothing.
1956 just pretend we can do 2pc, so that MySQL won't
1957 switch to 1pc.
1958 real work will be done in MYSQL_BIN_LOG::log_and_order()
1959 */
1960 return 0;
1961}
1962
1963/*
1964 We flush the cache wrapped in a beging/rollback if:
1965 . aborting a single or multi-statement transaction and;
1966 . the OPTION_KEEP_LOG is active or;
1967 . the format is STMT and a non-trans table was updated or;
1968 . the format is MIXED and a temporary non-trans table was
1969 updated or;
1970 . the format is MIXED, non-trans table was updated and
1971 aborting a single statement transaction;
1972*/
1973static bool trans_cannot_safely_rollback(THD *thd, bool all)
1974{
1975 binlog_cache_mngr *const cache_mngr=
1976 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1977
1978 return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
1979 (trans_has_updated_non_trans_table(thd) &&
1980 thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
1981 (cache_mngr->trx_cache.changes_to_non_trans_temp_table() &&
1982 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
1983 (trans_has_updated_non_trans_table(thd) &&
1984 ending_single_stmt_trans(thd,all) &&
1985 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED));
1986}
1987
1988
1989/**
1990 This function is called once after each statement.
1991
1992 It has the responsibility to flush the caches to the binary log on commits.
1993
1994 @param hton The binlog handlerton.
1995 @param thd The client thread that executes the transaction.
1996 @param all This is @c true if this is a real transaction commit, and
1997 @false otherwise.
1998
1999 @see handlerton::commit
2000*/
2001static int binlog_commit(handlerton *hton, THD *thd, bool all)
2002{
2003 int error= 0;
2004 PSI_stage_info org_stage;
2005 DBUG_ENTER("binlog_commit");
2006
2007 binlog_cache_mngr *const cache_mngr=
2008 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2009
2010 if (!cache_mngr)
2011 {
2012 DBUG_ASSERT(WSREP(thd));
2013 DBUG_RETURN(0);
2014 }
2015
2016 DBUG_PRINT("debug",
2017 ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2018 all,
2019 YESNO(thd->in_multi_stmt_transaction_mode()),
2020 YESNO(thd->transaction.all.modified_non_trans_table),
2021 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2022
2023
2024 thd->backup_stage(&org_stage);
2025 THD_STAGE_INFO(thd, stage_binlog_write);
2026 if (!cache_mngr->stmt_cache.empty())
2027 {
2028 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2029 }
2030
2031 if (cache_mngr->trx_cache.empty())
2032 {
2033 /*
2034 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2035 */
2036 cache_mngr->reset(false, true);
2037 THD_STAGE_INFO(thd, org_stage);
2038 DBUG_RETURN(error);
2039 }
2040
2041 /*
2042 We commit the transaction if:
2043 - We are not in a transaction and committing a statement, or
2044 - We are in a transaction and a full transaction is committed.
2045 Otherwise, we accumulate the changes.
2046 */
2047 if (likely(!error) && ending_trans(thd, all))
2048 error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
2049
2050 /*
2051 This is part of the stmt rollback.
2052 */
2053 if (!all)
2054 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2055
2056 THD_STAGE_INFO(thd, org_stage);
2057 DBUG_RETURN(error);
2058}
2059
2060/**
2061 This function is called when a transaction or a statement is rolled back.
2062
2063 @param hton The binlog handlerton.
2064 @param thd The client thread that executes the transaction.
2065 @param all This is @c true if this is a real transaction rollback, and
2066 @false otherwise.
2067
2068 @see handlerton::rollback
2069*/
2070static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2071{
2072 DBUG_ENTER("binlog_rollback");
2073 int error= 0;
2074 binlog_cache_mngr *const cache_mngr=
2075 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2076
2077 if (!cache_mngr)
2078 {
2079 DBUG_ASSERT(WSREP(thd));
2080 DBUG_RETURN(0);
2081 }
2082
2083 DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2084 YESNO(all),
2085 YESNO(thd->transaction.all.modified_non_trans_table),
2086 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2087
2088 /*
2089 If an incident event is set we do not flush the content of the statement
2090 cache because it may be corrupted.
2091 */
2092 if (cache_mngr->stmt_cache.has_incident())
2093 {
2094 error= mysql_bin_log.write_incident(thd);
2095 cache_mngr->reset(true, false);
2096 }
2097 else if (!cache_mngr->stmt_cache.empty())
2098 {
2099 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2100 }
2101
2102 if (cache_mngr->trx_cache.empty())
2103 {
2104 /*
2105 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2106 */
2107 cache_mngr->reset(false, true);
2108 DBUG_RETURN(error);
2109 }
2110 if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2111 {
2112 /*
2113 "all == true" means that a "rollback statement" triggered the error and
2114 this function was called. However, this must not happen as a rollback
2115 is written directly to the binary log. And in auto-commit mode, a single
2116 statement that is rolled back has the flag all == false.
2117 */
2118 DBUG_ASSERT(!all);
2119 /*
2120 We reach this point if the effect of a statement did not properly get into
2121 a cache and need to be rolled back.
2122 */
2123 error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2124 }
2125 else if (likely(!error))
2126 {
2127 if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2128 error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2129 /*
2130 Truncate the cache if:
2131 . aborting a single or multi-statement transaction or;
2132 . the OPTION_KEEP_LOG is not active and;
2133 . the format is not STMT or no non-trans table was
2134 updated and;
2135 . the format is not MIXED or no temporary non-trans table
2136 was updated.
2137 */
2138 else if (ending_trans(thd, all) ||
2139 (!(thd->variables.option_bits & OPTION_KEEP_LOG) &&
2140 (!stmt_has_updated_non_trans_table(thd) ||
2141 thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2142 (!cache_mngr->trx_cache.changes_to_non_trans_temp_table() ||
2143 thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2144 error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2145 }
2146
2147 /*
2148 This is part of the stmt rollback.
2149 */
2150 if (!all)
2151 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2152
2153 DBUG_RETURN(error);
2154}
2155
2156
2157void binlog_reset_cache(THD *thd)
2158{
2159 binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2160 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2161 DBUG_ENTER("binlog_reset_cache");
2162 if (cache_mngr)
2163 {
2164 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2165 cache_mngr->reset(true, true);
2166 }
2167 DBUG_VOID_RETURN;
2168}
2169
2170
2171void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2172{
2173 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2174
2175 write_error= 1;
2176
2177 if (unlikely(check_write_error(thd)))
2178 DBUG_VOID_RETURN;
2179
2180 if (my_errno == EFBIG)
2181 {
2182 if (is_transactional)
2183 {
2184 my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(MY_WME));
2185 }
2186 else
2187 {
2188 my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(MY_WME));
2189 }
2190 }
2191 else
2192 {
2193 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
2194 }
2195
2196 DBUG_VOID_RETURN;
2197}
2198
2199bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2200{
2201 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2202
2203 bool checked= FALSE;
2204
2205 if (likely(!thd->is_error()))
2206 DBUG_RETURN(checked);
2207
2208 switch (thd->get_stmt_da()->sql_errno())
2209 {
2210 case ER_TRANS_CACHE_FULL:
2211 case ER_STMT_CACHE_FULL:
2212 case ER_ERROR_ON_WRITE:
2213 case ER_BINLOG_LOGGING_IMPOSSIBLE:
2214 checked= TRUE;
2215 break;
2216 }
2217
2218 DBUG_RETURN(checked);
2219}
2220
2221
2222/**
2223 @note
2224 How do we handle this (unlikely but legal) case:
2225 @verbatim
2226 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2227 @endverbatim
2228 The problem occurs when a savepoint is before the update to the
2229 non-transactional table. Then when there's a rollback to the savepoint, if we
2230 simply truncate the binlog cache, we lose the part of the binlog cache where
2231 the update is. If we want to not lose it, we need to write the SAVEPOINT
2232 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2233 is easy: it's just write at the end of the binlog cache, but the former
2234 should be *inserted* to the place where the user called SAVEPOINT. The
2235 solution is that when the user calls SAVEPOINT, we write it to the binlog
2236 cache (so no need to later insert it). As transactions are never intermixed
2237 in the binary log (i.e. they are serialized), we won't have conflicts with
2238 savepoint names when using mysqlbinlog or in the slave SQL thread.
2239 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2240 non-transactional table, we don't truncate the binlog cache but instead write
2241 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2242 will chop the SAVEPOINT command from the binlog cache, which is good as in
2243 that case there is no need to have it in the binlog).
2244*/
2245
2246static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2247{
2248 int error= 1;
2249 DBUG_ENTER("binlog_savepoint_set");
2250
2251 if (wsrep_emulate_bin_log)
2252 DBUG_RETURN(0);
2253
2254 char buf[1024];
2255
2256 String log_query(buf, sizeof(buf), &my_charset_bin);
2257 if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2258 append_identifier(thd, &log_query, &thd->lex->ident))
2259 DBUG_RETURN(1);
2260 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2261 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2262 TRUE, FALSE, TRUE, errcode);
2263 /*
2264 We cannot record the position before writing the statement
2265 because a rollback to a savepoint (.e.g. consider it "S") would
2266 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2267 written to the binary log despite the fact that the server could
2268 still issue other rollback statements to the same savepoint (i.e.
2269 "S").
2270 Given that the savepoint is valid until the server releases it,
2271 ie, until the transaction commits or it is released explicitly,
2272 we need to log it anyway so that we don't have "ROLLBACK TO S"
2273 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2274 log.
2275 */
2276 if (likely(!(error= mysql_bin_log.write(&qinfo))))
2277 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2278
2279 DBUG_RETURN(error);
2280}
2281
2282static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2283{
2284 DBUG_ENTER("binlog_savepoint_rollback");
2285
2286 if (wsrep_emulate_bin_log)
2287 DBUG_RETURN(0);
2288
2289 /*
2290 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2291 non-transactional table. Otherwise, truncate the binlog cache starting
2292 from the SAVEPOINT command.
2293 */
2294 if (unlikely(trans_has_updated_non_trans_table(thd) ||
2295 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2296 {
2297 char buf[1024];
2298 String log_query(buf, sizeof(buf), &my_charset_bin);
2299 if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2300 append_identifier(thd, &log_query, &thd->lex->ident))
2301 DBUG_RETURN(1);
2302 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2303 Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2304 TRUE, FALSE, TRUE, errcode);
2305 DBUG_RETURN(mysql_bin_log.write(&qinfo));
2306 }
2307
2308 binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2309
2310 /*
2311 When a SAVEPOINT is executed inside a stored function/trigger we force the
2312 pending event to be flushed with a STMT_END_F flag and clear the table maps
2313 as well to ensure that following DMLs will have a clean state to start
2314 with. ROLLBACK inside a stored routine has to finalize possibly existing
2315 current row-based pending event with cleaning up table maps. That ensures
2316 that following DMLs will have a clean state to start with.
2317 */
2318 if (thd->in_sub_stmt)
2319 thd->clear_binlog_table_maps();
2320
2321 DBUG_RETURN(0);
2322}
2323
2324
2325/**
2326 Check whether binlog state allows to safely release MDL locks after
2327 rollback to savepoint.
2328
2329 @param hton The binlog handlerton.
2330 @param thd The client thread that executes the transaction.
2331
2332 @return true - It is safe to release MDL locks.
2333 false - If it is not.
2334*/
2335static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2336 THD *thd)
2337{
2338 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2339 /*
2340 If we have not updated any non-transactional tables rollback
2341 to savepoint will simply truncate binlog cache starting from
2342 SAVEPOINT command. So it should be safe to release MDL acquired
2343 after SAVEPOINT command in this case.
2344 */
2345 DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2346}
2347
2348
2349int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2350{
2351 uchar magic[4];
2352 DBUG_ASSERT(my_b_tell(log) == 0);
2353
2354 if (my_b_read(log, magic, sizeof(magic)))
2355 {
2356 *errmsg = "I/O error reading the header from the binary log";
2357 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2358 log->error);
2359 return 1;
2360 }
2361 if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2362 {
2363 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2364 return 1;
2365 }
2366 return 0;
2367}
2368
2369
2370File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2371{
2372 File file;
2373 DBUG_ENTER("open_binlog");
2374
2375 if ((file= mysql_file_open(key_file_binlog,
2376 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2377 MYF(MY_WME))) < 0)
2378 {
2379 sql_print_error("Failed to open log (file '%s', errno %d)",
2380 log_file_name, my_errno);
2381 *errmsg = "Could not open log file";
2382 goto err;
2383 }
2384 if (init_io_cache(log, file, (size_t)binlog_file_cache_size, READ_CACHE, 0, 0,
2385 MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2386 {
2387 sql_print_error("Failed to create a cache on log (file '%s')",
2388 log_file_name);
2389 *errmsg = "Could not open log file";
2390 goto err;
2391 }
2392 if (check_binlog_magic(log,errmsg))
2393 goto err;
2394 DBUG_RETURN(file);
2395
2396err:
2397 if (file >= 0)
2398 {
2399 mysql_file_close(file, MYF(0));
2400 end_io_cache(log);
2401 }
2402 DBUG_RETURN(-1);
2403}
2404
2405#ifdef _WIN32
2406static int eventSource = 0;
2407
2408static void setup_windows_event_source()
2409{
2410 HKEY hRegKey= NULL;
2411 DWORD dwError= 0;
2412 TCHAR szPath[MAX_PATH];
2413 DWORD dwTypes;
2414
2415 if (eventSource) // Ensure that we are only called once
2416 return;
2417 eventSource= 1;
2418
2419 // Create the event source registry key
2420 dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2421 "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MySQL",
2422 &hRegKey);
2423
2424 /* Name of the PE module that contains the message resource */
2425 GetModuleFileName(NULL, szPath, MAX_PATH);
2426
2427 /* Register EventMessageFile */
2428 dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2429 (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2430
2431 /* Register supported event types */
2432 dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2433 EVENTLOG_INFORMATION_TYPE);
2434 dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2435 (LPBYTE) &dwTypes, sizeof dwTypes);
2436
2437 RegCloseKey(hRegKey);
2438}
2439
2440#endif /* _WIN32 */
2441
2442
2443/**
2444 Find a unique filename for 'filename.#'.
2445
2446 Set '#' to the number next to the maximum found in the most
2447 recent log file extension.
2448
2449 This function will return nonzero if: (i) the generated name
2450 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2451 or (iii) some other error happened while examining the filesystem.
2452
2453 @return
2454 nonzero if not possible to get unique filename.
2455*/
2456
2457static int find_uniq_filename(char *name, ulong next_log_number)
2458{
2459 uint i;
2460 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
2461 struct st_my_dir *dir_info;
2462 struct fileinfo *file_info;
2463 ulong max_found, next, UNINIT_VAR(number);
2464 size_t buf_length, length;
2465 char *start, *end;
2466 int error= 0;
2467 DBUG_ENTER("find_uniq_filename");
2468
2469 length= dirname_part(buff, name, &buf_length);
2470 start= name + length;
2471 end= strend(start);
2472
2473 *end='.';
2474 length= (size_t) (end - start + 1);
2475
2476 if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
2477 unlikely(!(dir_info= my_dir(buff,
2478 MYF(MY_DONT_SORT)))))))
2479 { // This shouldn't happen
2480 strmov(end,".1"); // use name+1
2481 DBUG_RETURN(1);
2482 }
2483 file_info= dir_info->dir_entry;
2484 max_found= next_log_number ? next_log_number-1 : 0;
2485 for (i= dir_info->number_of_files ; i-- ; file_info++)
2486 {
2487 if (strncmp(file_info->name, start, length) == 0 &&
2488 test_if_number(file_info->name+length, &number,0))
2489 {
2490 set_if_bigger(max_found,(ulong) number);
2491 }
2492 }
2493 my_dirend(dir_info);
2494
2495 /* check if reached the maximum possible extension number */
2496 if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2497 {
2498 sql_print_error("Log filename extension number exhausted: %06lu. \
2499Please fix this by archiving old logs and \
2500updating the index files.", max_found);
2501 error= 1;
2502 goto end;
2503 }
2504
2505 next= max_found + 1;
2506 if (sprintf(ext_buf, "%06lu", next)<0)
2507 {
2508 error= 1;
2509 goto end;
2510 }
2511 *end++='.';
2512
2513 /*
2514 Check if the generated extension size + the file name exceeds the
2515 buffer size used. If one did not check this, then the filename might be
2516 truncated, resulting in error.
2517 */
2518 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2519 {
2520 sql_print_error("Log filename too large: %s%s (%zu). \
2521Please fix this by archiving old logs and updating the \
2522index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2523 error= 1;
2524 goto end;
2525 }
2526
2527 if (sprintf(end, "%06lu", next)<0)
2528 {
2529 error= 1;
2530 goto end;
2531 }
2532
2533 /* print warning if reaching the end of available extensions. */
2534 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2535 sql_print_warning("Next log extension: %lu. \
2536Remaining log filename extensions: %lu. \
2537Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2538
2539end:
2540 DBUG_RETURN(error);
2541}
2542
2543
2544void MYSQL_LOG::init(enum_log_type log_type_arg,
2545 enum cache_type io_cache_type_arg)
2546{
2547 DBUG_ENTER("MYSQL_LOG::init");
2548 log_type= log_type_arg;
2549 io_cache_type= io_cache_type_arg;
2550 DBUG_PRINT("info",("log_type: %d", log_type));
2551 DBUG_VOID_RETURN;
2552}
2553
2554
2555bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2556 const char *new_name,
2557 ulong next_log_number,
2558 enum_log_type log_type_arg,
2559 enum cache_type io_cache_type_arg)
2560{
2561 init(log_type_arg, io_cache_type_arg);
2562
2563 if (new_name)
2564 {
2565 strmov(log_file_name, new_name);
2566 }
2567 else if (!new_name && generate_new_name(log_file_name, log_name,
2568 next_log_number))
2569 return TRUE;
2570
2571 return FALSE;
2572}
2573
2574
2575/*
2576 Open a (new) log file.
2577
2578 SYNOPSIS
2579 open()
2580
2581 log_name The name of the log to open
2582 log_type_arg The type of the log. E.g. LOG_NORMAL
2583 new_name The new name for the logfile. This is only needed
2584 when the method is used to open the binlog file.
2585 io_cache_type_arg The type of the IO_CACHE to use for this log file
2586
2587 DESCRIPTION
2588 Open the logfile, init IO_CACHE and write startup messages
2589 (in case of general and slow query logs).
2590
2591 RETURN VALUES
2592 0 ok
2593 1 error
2594*/
2595
2596bool MYSQL_LOG::open(
2597#ifdef HAVE_PSI_INTERFACE
2598 PSI_file_key log_file_key,
2599#endif
2600 const char *log_name, enum_log_type log_type_arg,
2601 const char *new_name, ulong next_log_number,
2602 enum cache_type io_cache_type_arg)
2603{
2604 char buff[FN_REFLEN];
2605 MY_STAT f_stat;
2606 File file= -1;
2607 my_off_t seek_offset;
2608 bool is_fifo = false;
2609 int open_flags= O_CREAT | O_BINARY;
2610 DBUG_ENTER("MYSQL_LOG::open");
2611 DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2612
2613 write_error= 0;
2614
2615 if (!(name= my_strdup(log_name, MYF(MY_WME))))
2616 {
2617 name= (char *)log_name; // for the error message
2618 goto err;
2619 }
2620
2621 /*
2622 log_type is LOG_UNKNOWN if we should not generate a new name
2623 This is only used when called from MYSQL_BINARY_LOG::open, which
2624 has already updated log_file_name.
2625 */
2626 if (log_type_arg != LOG_UNKNOWN &&
2627 init_and_set_log_file_name(name, new_name, next_log_number,
2628 log_type_arg, io_cache_type_arg))
2629 goto err;
2630
2631 is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2632 MY_S_ISFIFO(f_stat.st_mode);
2633
2634 if (io_cache_type == SEQ_READ_APPEND)
2635 open_flags |= O_RDWR | O_APPEND;
2636 else
2637 open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2638
2639 if (is_fifo)
2640 open_flags |= O_NONBLOCK;
2641
2642 db[0]= 0;
2643
2644#ifdef HAVE_PSI_INTERFACE
2645 /* Keep the key for reopen */
2646 m_log_file_key= log_file_key;
2647#endif
2648
2649 if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2650 MYF(MY_WME | ME_WAITTANG))) < 0)
2651 goto err;
2652
2653 if (is_fifo)
2654 seek_offset= 0;
2655 else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2656 goto err;
2657
2658 if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
2659 MYF(MY_WME | MY_NABP |
2660 ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2661 goto err;
2662
2663 if (log_type == LOG_NORMAL)
2664 {
2665 char *end;
2666 size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2667#ifdef EMBEDDED_LIBRARY
2668 "embedded library\n",
2669 my_progname, server_version, MYSQL_COMPILATION_COMMENT
2670#elif defined(_WIN32)
2671 "started with:\nTCP Port: %d, Named Pipe: %s\n",
2672 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2673 mysqld_port, mysqld_unix_port
2674#else
2675 "started with:\nTcp port: %d Unix socket: %s\n",
2676 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2677 mysqld_port, mysqld_unix_port
2678#endif
2679 );
2680 end= strnmov(buff + len, "Time\t\t Id Command\tArgument\n",
2681 sizeof(buff) - len);
2682 if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2683 flush_io_cache(&log_file))
2684 goto err;
2685 }
2686
2687 log_state= LOG_OPENED;
2688 DBUG_RETURN(0);
2689
2690err:
2691 sql_print_error(fatal_log_error, name, errno);
2692 if (file >= 0)
2693 mysql_file_close(file, MYF(0));
2694 end_io_cache(&log_file);
2695 my_free(name);
2696 name= NULL;
2697 log_state= LOG_CLOSED;
2698 DBUG_RETURN(1);
2699}
2700
2701MYSQL_LOG::MYSQL_LOG()
2702 : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2703 log_state(LOG_CLOSED)
2704{
2705 /*
2706 We don't want to initialize LOCK_Log here as such initialization depends on
2707 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2708 called only in main(). Doing initialization here would make it happen
2709 before main().
2710 */
2711 bzero((char*) &log_file, sizeof(log_file));
2712}
2713
2714void MYSQL_LOG::init_pthread_objects()
2715{
2716 DBUG_ASSERT(inited == 0);
2717 inited= 1;
2718 mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2719}
2720
2721/*
2722 Close the log file
2723
2724 SYNOPSIS
2725 close()
2726 exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2727 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2728 binlog rotation, to delay actual close of the old file until
2729 we have successfully created the new file.
2730
2731 NOTES
2732 One can do an open on the object at once after doing a close.
2733 The internal structures are not freed until cleanup() is called
2734*/
2735
2736void MYSQL_LOG::close(uint exiting)
2737{ // One can't set log_type here!
2738 DBUG_ENTER("MYSQL_LOG::close");
2739 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2740 if (log_state == LOG_OPENED)
2741 {
2742 end_io_cache(&log_file);
2743
2744 if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2745 {
2746 write_error= 1;
2747 sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno);
2748 }
2749
2750 if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2751 mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2752 {
2753 write_error= 1;
2754 sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno);
2755 }
2756 }
2757
2758 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2759 my_free(name);
2760 name= NULL;
2761 DBUG_VOID_RETURN;
2762}
2763
2764/** This is called only once. */
2765
2766void MYSQL_LOG::cleanup()
2767{
2768 DBUG_ENTER("cleanup");
2769 if (inited)
2770 {
2771 inited= 0;
2772 mysql_mutex_destroy(&LOCK_log);
2773 close(0);
2774 }
2775 DBUG_VOID_RETURN;
2776}
2777
2778
2779int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2780 ulong next_log_number)
2781{
2782 fn_format(new_name, log_name, mysql_data_home, "", 4);
2783 if (log_type == LOG_BIN)
2784 {
2785 if (!fn_ext(log_name)[0])
2786 {
2787 if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2788 unlikely(find_uniq_filename(new_name, next_log_number)))
2789 {
2790 THD *thd= current_thd;
2791 if (unlikely(thd))
2792 my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATALERROR), log_name);
2793 sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2794 return 1;
2795 }
2796 }
2797 }
2798 return 0;
2799}
2800
2801
2802/*
2803 Reopen the log file
2804
2805 SYNOPSIS
2806 reopen_file()
2807
2808 DESCRIPTION
2809 Reopen the log file. The method is used during FLUSH LOGS
2810 and locks LOCK_log mutex
2811*/
2812
2813
2814void MYSQL_QUERY_LOG::reopen_file()
2815{
2816 char *save_name;
2817 DBUG_ENTER("MYSQL_LOG::reopen_file");
2818
2819 mysql_mutex_lock(&LOCK_log);
2820 if (!is_open())
2821 {
2822 DBUG_PRINT("info",("log is closed"));
2823 mysql_mutex_unlock(&LOCK_log);
2824 DBUG_VOID_RETURN;
2825 }
2826
2827 save_name= name;
2828 name= 0; // Don't free name
2829 close(LOG_CLOSE_TO_BE_OPENED);
2830
2831 /*
2832 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2833 */
2834
2835 open(
2836#ifdef HAVE_PSI_INTERFACE
2837 m_log_file_key,
2838#endif
2839 save_name, log_type, 0, 0, io_cache_type);
2840 my_free(save_name);
2841
2842 mysql_mutex_unlock(&LOCK_log);
2843
2844 DBUG_VOID_RETURN;
2845}
2846
2847
2848/*
2849 Write a command to traditional general log file
2850
2851 SYNOPSIS
2852 write()
2853
2854 event_time command start timestamp
2855 user_host the pointer to the string with user@host info
2856 user_host_len length of the user_host string. this is computed once
2857 and passed to all general log event handlers
2858 thread_id Id of the thread, issued a query
2859 command_type the type of the command being logged
2860 command_type_len the length of the string above
2861 sql_text the very text of the query being executed
2862 sql_text_len the length of sql_text string
2863
2864 DESCRIPTION
2865
2866 Log given command to to normal (not rotable) log file
2867
2868 RETURN
2869 FASE - OK
2870 TRUE - error occurred
2871*/
2872
2873bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
2874 const char *command_type, size_t command_type_len,
2875 const char *sql_text, size_t sql_text_len)
2876{
2877 char buff[32];
2878 char local_time_buff[MAX_TIME_SIZE];
2879 struct tm start;
2880 size_t time_buff_len= 0;
2881
2882 mysql_mutex_lock(&LOCK_log);
2883
2884 /* Test if someone closed between the is_open test and lock */
2885 if (is_open())
2886 {
2887 /* for testing output of timestamp and thread id */
2888 DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
2889
2890 /* Note that my_b_write() assumes it knows the length for this */
2891 if (event_time != last_time)
2892 {
2893 last_time= event_time;
2894
2895 localtime_r(&event_time, &start);
2896
2897 time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
2898 "%02d%02d%02d %2d:%02d:%02d\t",
2899 start.tm_year % 100, start.tm_mon + 1,
2900 start.tm_mday, start.tm_hour,
2901 start.tm_min, start.tm_sec);
2902
2903 if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
2904 goto err;
2905 }
2906 else
2907 if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
2908 goto err;
2909
2910 /* command_type, thread_id */
2911 size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
2912
2913 if (my_b_write(&log_file, (uchar*) buff, length))
2914 goto err;
2915
2916 if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
2917 goto err;
2918
2919 if (my_b_write(&log_file, (uchar*) "\t", 1))
2920 goto err;
2921
2922 /* sql_text */
2923 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
2924 goto err;
2925
2926 if (my_b_write(&log_file, (uchar*) "\n", 1) ||
2927 flush_io_cache(&log_file))
2928 goto err;
2929 }
2930
2931 mysql_mutex_unlock(&LOCK_log);
2932 return FALSE;
2933err:
2934
2935 if (!write_error)
2936 {
2937 write_error= 1;
2938 sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
2939 }
2940 mysql_mutex_unlock(&LOCK_log);
2941 return TRUE;
2942}
2943
2944
2945/*
2946 Log a query to the traditional slow log file
2947
2948 SYNOPSIS
2949 write()
2950
2951 thd THD of the query
2952 current_time current timestamp
2953 user_host the pointer to the string with user@host info
2954 user_host_len length of the user_host string. this is computed once
2955 and passed to all general log event handlers
2956 query_utime Amount of time the query took to execute (in microseconds)
2957 lock_utime Amount of time the query was locked (in microseconds)
2958 is_command The flag, which determines, whether the sql_text is a
2959 query or an administrator command.
2960 sql_text the very text of the query or administrator command
2961 processed
2962 sql_text_len the length of sql_text string
2963
2964 DESCRIPTION
2965
2966 Log a query to the slow log file.
2967
2968 RETURN
2969 FALSE - OK
2970 TRUE - error occurred
2971*/
2972
2973bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
2974 const char *user_host, size_t user_host_len, ulonglong query_utime,
2975 ulonglong lock_utime, bool is_command,
2976 const char *sql_text, size_t sql_text_len)
2977{
2978 bool error= 0;
2979 char llbuff[22];
2980 DBUG_ENTER("MYSQL_QUERY_LOG::write");
2981
2982 mysql_mutex_lock(&LOCK_log);
2983 if (is_open())
2984 { // Safety agains reopen
2985 char buff[80], *end;
2986 char query_time_buff[22+7], lock_time_buff[22+7];
2987 size_t buff_len;
2988 end= buff;
2989
2990 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
2991 {
2992 if (current_time != last_time)
2993 {
2994 last_time= current_time;
2995 struct tm start;
2996 localtime_r(&current_time, &start);
2997
2998 buff_len= my_snprintf(buff, sizeof buff,
2999 "# Time: %02d%02d%02d %2d:%02d:%02d\n",
3000 start.tm_year % 100, start.tm_mon + 1,
3001 start.tm_mday, start.tm_hour,
3002 start.tm_min, start.tm_sec);
3003
3004 /* Note that my_b_write() assumes it knows the length for this */
3005 if (my_b_write(&log_file, (uchar*) buff, buff_len))
3006 goto err;
3007 }
3008 const uchar uh[]= "# User@Host: ";
3009 if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
3010 my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
3011 my_b_write(&log_file, (uchar*) "\n", 1))
3012 goto err;
3013
3014 /* For slow query log */
3015 sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
3016 sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
3017 if (my_b_printf(&log_file,
3018 "# Thread_id: %lu Schema: %s QC_hit: %s\n"
3019 "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n"
3020 "# Rows_affected: %lu Bytes_sent: %lu\n",
3021 (ulong) thd->thread_id, thd->get_db(),
3022 ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3023 query_time_buff, lock_time_buff,
3024 (ulong) thd->get_sent_row_count(),
3025 (ulong) thd->get_examined_row_count(),
3026 (ulong) thd->get_affected_rows(),
3027 (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3028 goto err;
3029
3030 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3031 && thd->tmp_tables_used &&
3032 my_b_printf(&log_file,
3033 "# Tmp_tables: %lu Tmp_disk_tables: %lu "
3034 "Tmp_table_sizes: %s\n",
3035 (ulong) thd->tmp_tables_used,
3036 (ulong) thd->tmp_tables_disk_used,
3037 llstr(thd->tmp_tables_size, llbuff)))
3038 goto err;
3039
3040 if (thd->spcont &&
3041 my_b_printf(&log_file, "# Stored_routine: %s\n",
3042 ErrConvDQName(thd->spcont->m_sp).ptr()))
3043 goto err;
3044
3045 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3046 (thd->query_plan_flags &
3047 (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3048 QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3049 QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3050 my_b_printf(&log_file,
3051 "# Full_scan: %s Full_join: %s "
3052 "Tmp_table: %s Tmp_table_on_disk: %s\n"
3053 "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu "
3054 "Priority_queue: %s\n",
3055 ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3056 ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3057 (thd->tmp_tables_used ? "Yes" : "No"),
3058 (thd->tmp_tables_disk_used ? "Yes" : "No"),
3059 ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3060 ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3061 "Yes" : "No"),
3062 thd->query_plan_fsort_passes,
3063 ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3064 "Yes" : "No")
3065 ))
3066 goto err;
3067 if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3068 thd->lex->explain)
3069 {
3070 StringBuffer<128> buf;
3071 DBUG_ASSERT(!thd->free_list);
3072 if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3073 if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3074 goto err;
3075 thd->free_items();
3076 }
3077 if (thd->db.str && strcmp(thd->db.str, db))
3078 { // Database changed
3079 if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3080 goto err;
3081 strmov(db,thd->db.str);
3082 }
3083 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3084 {
3085 end=strmov(end, ",last_insert_id=");
3086 end=longlong10_to_str((longlong)
3087 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3088 end, -10);
3089 }
3090 // Save value if we do an insert.
3091 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3092 {
3093 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3094 {
3095 end=strmov(end,",insert_id=");
3096 end=longlong10_to_str((longlong)
3097 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3098 end, -10);
3099 }
3100 }
3101
3102 /*
3103 This info used to show up randomly, depending on whether the query
3104 checked the query start time or not. now we always write current
3105 timestamp to the slow log
3106 */
3107 end= strmov(end, ",timestamp=");
3108 end= int10_to_str((long) current_time, end, 10);
3109
3110 if (end != buff)
3111 {
3112 *end++=';';
3113 *end='\n';
3114 if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3115 my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3116 goto err;
3117 }
3118 if (is_command)
3119 {
3120 end= strxmov(buff, "# administrator command: ", NullS);
3121 buff_len= (ulong) (end - buff);
3122 DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3123 {DBUG_SET("+d,simulate_file_write_error");});
3124 if(my_b_write(&log_file, (uchar*) buff, buff_len))
3125 goto err;
3126 }
3127 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3128 my_b_write(&log_file, (uchar*) ";\n",2) ||
3129 flush_io_cache(&log_file))
3130 goto err;
3131
3132 }
3133 }
3134end:
3135 mysql_mutex_unlock(&LOCK_log);
3136 DBUG_RETURN(error);
3137
3138err:
3139 error= 1;
3140 if (!write_error)
3141 {
3142 write_error= 1;
3143 sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3144 }
3145 goto end;
3146}
3147
3148
3149/**
3150 @todo
3151 The following should be using fn_format(); We just need to
3152 first change fn_format() to cut the file name if it's too long.
3153*/
3154const char *MYSQL_LOG::generate_name(const char *log_name,
3155 const char *suffix,
3156 bool strip_ext, char *buff)
3157{
3158 if (!log_name || !log_name[0])
3159 {
3160 strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3161 return (const char *)
3162 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3163 }
3164 // get rid of extension if the log is binary to avoid problems
3165 if (strip_ext)
3166 {
3167 char *p= fn_ext(log_name);
3168 uint length= (uint) (p - log_name);
3169 strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3170 return (const char*)buff;
3171 }
3172 return log_name;
3173}
3174
3175
3176/*
3177 Print some additional information about addition/removal of
3178 XID list entries.
3179 TODO: Remove once MDEV-9510 is fixed.
3180*/
3181#ifdef WITH_WSREP
3182#define WSREP_XID_LIST_ENTRY(X, Y) \
3183 if (wsrep_debug) \
3184 { \
3185 char buf[FN_REFLEN]; \
3186 strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3187 WSREP_DEBUG(X, buf, Y->binlog_id); \
3188 }
3189#else
3190#define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3191#endif
3192
3193MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3194 :reset_master_pending(0), mark_xid_done_waiting(0),
3195 bytes_written(0), file_id(1), open_count(1),
3196 group_commit_queue(0), group_commit_queue_busy(FALSE),
3197 num_commits(0), num_group_commits(0),
3198 group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3199 group_commit_trigger_lock_wait(0),
3200 sync_period_ptr(sync_period), sync_counter(0),
3201 state_file_deleted(false), binlog_state_recover_done(false),
3202 is_relay_log(0), relay_signal_cnt(0),
3203 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3204 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3205 description_event_for_exec(0), description_event_for_queue(0),
3206 current_binlog_id(0)
3207{
3208 /*
3209 We don't want to initialize locks here as such initialization depends on
3210 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3211 called only in main(). Doing initialization here would make it happen
3212 before main().
3213 */
3214 index_file_name[0] = 0;
3215 bzero((char*) &index_file, sizeof(index_file));
3216 bzero((char*) &purge_index_file, sizeof(purge_index_file));
3217}
3218
3219void MYSQL_BIN_LOG::stop_background_thread()
3220{
3221 if (binlog_background_thread_started)
3222 {
3223 mysql_mutex_lock(&LOCK_binlog_background_thread);
3224 binlog_background_thread_stop= true;
3225 mysql_cond_signal(&COND_binlog_background_thread);
3226 while (binlog_background_thread_stop)
3227 mysql_cond_wait(&COND_binlog_background_thread_end,
3228 &LOCK_binlog_background_thread);
3229 mysql_mutex_unlock(&LOCK_binlog_background_thread);
3230 binlog_background_thread_started= false;
3231 }
3232}
3233
3234/* this is called only once */
3235
3236void MYSQL_BIN_LOG::cleanup()
3237{
3238 DBUG_ENTER("cleanup");
3239 if (inited)
3240 {
3241 xid_count_per_binlog *b;
3242
3243 /* Wait for the binlog background thread to stop. */
3244 if (!is_relay_log)
3245 stop_background_thread();
3246
3247 inited= 0;
3248 mysql_mutex_lock(&LOCK_log);
3249 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3250 mysql_mutex_unlock(&LOCK_log);
3251 delete description_event_for_queue;
3252 delete description_event_for_exec;
3253
3254 while ((b= binlog_xid_count_list.get()))
3255 {
3256 /*
3257 There should be no pending XIDs at shutdown, and only one entry (for
3258 the active binlog file) in the list.
3259 */
3260 DBUG_ASSERT(b->xid_count == 0);
3261 DBUG_ASSERT(!binlog_xid_count_list.head());
3262 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3263 "for %s (%lu)", b);
3264 my_free(b);
3265 }
3266
3267 mysql_mutex_destroy(&LOCK_log);
3268 mysql_mutex_destroy(&LOCK_index);
3269 mysql_mutex_destroy(&LOCK_xid_list);
3270 mysql_mutex_destroy(&LOCK_binlog_background_thread);
3271 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3272 mysql_cond_destroy(&COND_relay_log_updated);
3273 mysql_cond_destroy(&COND_bin_log_updated);
3274 mysql_cond_destroy(&COND_queue_busy);
3275 mysql_cond_destroy(&COND_xid_list);
3276 mysql_cond_destroy(&COND_binlog_background_thread);
3277 mysql_cond_destroy(&COND_binlog_background_thread_end);
3278 }
3279
3280 /*
3281 Free data for global binlog state.
3282 We can't do that automaticly as we need to do this before
3283 safemalloc is shut down
3284 */
3285 if (!is_relay_log)
3286 rpl_global_gtid_binlog_state.free();
3287 DBUG_VOID_RETURN;
3288}
3289
3290
3291/* Init binlog-specific vars */
3292void MYSQL_BIN_LOG::init(ulong max_size_arg)
3293{
3294 DBUG_ENTER("MYSQL_BIN_LOG::init");
3295 max_size= max_size_arg;
3296 DBUG_PRINT("info",("max_size: %lu", max_size));
3297 DBUG_VOID_RETURN;
3298}
3299
3300
3301void MYSQL_BIN_LOG::init_pthread_objects()
3302{
3303 MYSQL_LOG::init_pthread_objects();
3304 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3305 mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3306 mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3307 &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3308 mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3309 mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3310 mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3311 mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3312
3313 mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3314 &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3315 mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3316 &COND_binlog_background_thread, 0);
3317 mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3318 &COND_binlog_background_thread_end, 0);
3319
3320 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3321 MY_MUTEX_INIT_SLOW);
3322}
3323
3324
3325bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3326 const char *log_name, bool need_mutex)
3327{
3328 File index_file_nr= -1;
3329 DBUG_ASSERT(!my_b_inited(&index_file));
3330
3331 /*
3332 First open of this class instance
3333 Create an index file that will hold all file names uses for logging.
3334 Add new entries to the end of it.
3335 */
3336 myf opt= MY_UNPACK_FILENAME;
3337 if (!index_file_name_arg)
3338 {
3339 index_file_name_arg= log_name; // Use same basename for index file
3340 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3341 }
3342 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3343 ".index", opt);
3344 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3345 index_file_name,
3346 O_RDWR | O_CREAT | O_BINARY,
3347 MYF(MY_WME))) < 0 ||
3348 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3349 init_io_cache(&index_file, index_file_nr,
3350 IO_SIZE, WRITE_CACHE,
3351 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3352 0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
3353 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3354 {
3355 /*
3356 TODO: all operations creating/deleting the index file or a log, should
3357 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3358 TODO: file creation should be done with mysql_file_create()
3359 not mysql_file_open().
3360 */
3361 if (index_file_nr >= 0)
3362 mysql_file_close(index_file_nr, MYF(0));
3363 return TRUE;
3364 }
3365
3366#ifdef HAVE_REPLICATION
3367 /*
3368 Sync the index by purging any binary log file that is not registered.
3369 In other words, either purge binary log files that were removed from
3370 the index but not purged from the file system due to a crash or purge
3371 any binary log file that was created but not register in the index
3372 due to a crash.
3373 */
3374
3375 if (set_purge_index_file_name(index_file_name_arg) ||
3376 open_purge_index_file(FALSE) ||
3377 purge_index_entry(NULL, NULL, need_mutex) ||
3378 close_purge_index_file() ||
3379 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3380 {
3381 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3382 "file.");
3383 return TRUE;
3384 }
3385#endif
3386
3387 return FALSE;
3388}
3389
3390
3391/**
3392 Open a (new) binlog file.
3393
3394 - Open the log file and the index file. Register the new
3395 file name in it
3396 - When calling this when the file is in use, you must have a locks
3397 on LOCK_log and LOCK_index.
3398
3399 @retval
3400 0 ok
3401 @retval
3402 1 error
3403*/
3404
3405bool MYSQL_BIN_LOG::open(const char *log_name,
3406 enum_log_type log_type_arg,
3407 const char *new_name,
3408 ulong next_log_number,
3409 enum cache_type io_cache_type_arg,
3410 ulong max_size_arg,
3411 bool null_created_arg,
3412 bool need_mutex)
3413{
3414 File file= -1;
3415 xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3416 DBUG_ENTER("MYSQL_BIN_LOG::open");
3417 DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
3418
3419 mysql_mutex_assert_owner(&LOCK_log);
3420
3421 if (!is_relay_log)
3422 {
3423 if (!binlog_state_recover_done)
3424 {
3425 binlog_state_recover_done= true;
3426 if (do_binlog_recovery(opt_bin_logname, false))
3427 DBUG_RETURN(1);
3428 }
3429
3430 if (!binlog_background_thread_started &&
3431 start_binlog_background_thread())
3432 DBUG_RETURN(1);
3433 }
3434
3435 /* We need to calculate new log file name for purge to delete old */
3436 if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3437 log_type_arg, io_cache_type_arg))
3438 {
3439 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3440 DBUG_RETURN(1);
3441 }
3442
3443#ifdef HAVE_REPLICATION
3444 if (open_purge_index_file(TRUE) ||
3445 register_create_index_entry(log_file_name) ||
3446 sync_purge_index_file() ||
3447 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3448 {
3449 /**
3450 TODO:
3451 Although this was introduced to appease valgrind when
3452 injecting emulated faults using
3453 fault_injection_registering_index it may be good to consider
3454 what actually happens when open_purge_index_file succeeds but
3455 register or sync fails.
3456
3457 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3458 for "real life" purposes as well?
3459 */
3460 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3461 if (my_b_inited(&purge_index_file))
3462 {
3463 end_io_cache(&purge_index_file);
3464 my_close(purge_index_file.file, MYF(0));
3465 }
3466 });
3467
3468 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3469 DBUG_RETURN(1);
3470 }
3471 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3472#endif
3473
3474 write_error= 0;
3475
3476 /* open the main log file */
3477 if (MYSQL_LOG::open(
3478#ifdef HAVE_PSI_INTERFACE
3479 m_key_file_log,
3480#endif
3481 log_name,
3482 LOG_UNKNOWN, /* Don't generate new name */
3483 0, 0, io_cache_type_arg))
3484 {
3485#ifdef HAVE_REPLICATION
3486 close_purge_index_file();
3487#endif
3488 DBUG_RETURN(1); /* all warnings issued */
3489 }
3490
3491 init(max_size_arg);
3492
3493 open_count++;
3494
3495 DBUG_ASSERT(log_type == LOG_BIN);
3496
3497 {
3498 bool write_file_name_to_index_file=0;
3499
3500 if (!my_b_filelength(&log_file))
3501 {
3502 /*
3503 The binary log file was empty (probably newly created)
3504 This is the normal case and happens when the user doesn't specify
3505 an extension for the binary log files.
3506 In this case we write a standard header to it.
3507 */
3508 if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3509 BIN_LOG_HEADER_SIZE))
3510 goto err;
3511 bytes_written+= BIN_LOG_HEADER_SIZE;
3512 write_file_name_to_index_file= 1;
3513 }
3514
3515 {
3516 /*
3517 In 4.x we put Start event only in the first binlog. But from 5.0 we
3518 want a Start event even if this is not the very first binlog.
3519 */
3520 Format_description_log_event s(BINLOG_VERSION);
3521 /*
3522 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3523 as we won't be able to reset it later
3524 */
3525 if (io_cache_type == WRITE_CACHE)
3526 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3527
3528 if (is_relay_log)
3529 {
3530 if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3531 relay_log_checksum_alg=
3532 opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3533 : BINLOG_CHECKSUM_ALG_OFF;
3534 s.checksum_alg= relay_log_checksum_alg;
3535 }
3536 else
3537 s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3538
3539 crypto.scheme = 0;
3540 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3541 if (!s.is_valid())
3542 goto err;
3543 s.dont_set_created= null_created_arg;
3544 if (write_event(&s))
3545 goto err;
3546 bytes_written+= s.data_written;
3547
3548 if (encrypt_binlog)
3549 {
3550 uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3551 if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3552 {
3553 sql_print_error("Failed to enable encryption of binary logs");
3554 goto err;
3555 }
3556
3557 if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3558 {
3559 if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3560 goto err;
3561
3562 Start_encryption_log_event sele(1, key_version, crypto.nonce);
3563 sele.checksum_alg= s.checksum_alg;
3564 if (write_event(&sele))
3565 goto err;
3566
3567 // Start_encryption_log_event is written, enable the encryption
3568 if (crypto.init(sele.crypto_scheme, key_version))
3569 goto err;
3570 }
3571 }
3572
3573 if (!is_relay_log)
3574 {
3575 char buf[FN_REFLEN];
3576
3577 /*
3578 Output a Gtid_list_log_event at the start of the binlog file.
3579
3580 This is used to quickly determine which GTIDs are found in binlog
3581 files earlier than this one, and which are found in this (or later)
3582 binlogs.
3583
3584 The list gives a mapping from (domain_id, server_id) -> seq_no (so
3585 this means that there is at most one entry for every unique pair
3586 (domain_id, server_id) in the list). It indicates that this seq_no is
3587 the last one found in an earlier binlog file for this (domain_id,
3588 server_id) combination - so any higher seq_no should be search for
3589 from this binlog file, or a later one.
3590
3591 This allows to locate the binlog file containing a given GTID by
3592 scanning backwards, reading just the Gtid_list_log_event at the
3593 start of each file, and scanning only the relevant binlog file when
3594 found, not all binlog files.
3595
3596 The existence of a given entry (domain_id, server_id, seq_no)
3597 guarantees only that this seq_no will not be found in this or any
3598 later binlog file. It does not guarantee that it can be found it an
3599 earlier binlog file, for example the file may have been purged.
3600
3601 If there is no entry for a given (domain_id, server_id) pair, then
3602 it means that no such GTID exists in any earlier binlog. It is
3603 permissible to remove such pair from future Gtid_list_log_events
3604 if all previous binlog files containing such GTIDs have been purged
3605 (though such optimization is not performed at the time of this
3606 writing). So if there is no entry for given GTID it means that such
3607 GTID should be search for in this or later binlog file, same as if
3608 there had been an entry (domain_id, server_id, 0).
3609 */
3610
3611 Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3612 if (write_event(&gl_ev))
3613 goto err;
3614
3615 /* Output a binlog checkpoint event at the start of the binlog file. */
3616
3617 /*
3618 Construct an entry in the binlog_xid_count_list for the new binlog
3619 file (we will not link it into the list until we know the new file
3620 is successfully created; otherwise we would have to remove it again
3621 if creation failed, which gets tricky since other threads may have
3622 seen the entry in the meantime - and we do not want to hold
3623 LOCK_xid_list for long periods of time).
3624
3625 Write the current binlog checkpoint into the log, so XA recovery will
3626 know from where to start recovery.
3627 */
3628 size_t off= dirname_length(log_file_name);
3629 size_t len= strlen(log_file_name) - off;
3630 char *entry_mem, *name_mem;
3631 if (!(new_xid_list_entry = (xid_count_per_binlog *)
3632 my_multi_malloc(MYF(MY_WME),
3633 &entry_mem, sizeof(xid_count_per_binlog),
3634 &name_mem, len,
3635 NULL)))
3636 goto err;
3637 memcpy(name_mem, log_file_name+off, len);
3638 new_xid_list_entry->binlog_name= name_mem;
3639 new_xid_list_entry->binlog_name_len= (int)len;
3640 new_xid_list_entry->xid_count= 0;
3641 new_xid_list_entry->notify_count= 0;
3642
3643 /*
3644 Find the name for the Initial binlog checkpoint.
3645
3646 Normally this will just be the first entry, as we delete entries
3647 when their count drops to zero. But we scan the list to handle any
3648 corner case, eg. for the first binlog file opened after startup, the
3649 list will be empty.
3650 */
3651 mysql_mutex_lock(&LOCK_xid_list);
3652 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3653 while ((b= it++) && b->xid_count == 0)
3654 ;
3655 mysql_mutex_unlock(&LOCK_xid_list);
3656 if (!b)
3657 b= new_xid_list_entry;
3658 strmake(buf, b->binlog_name, b->binlog_name_len);
3659 Binlog_checkpoint_log_event ev(buf, (uint)len);
3660 DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3661 flush_io_cache(&log_file);
3662 mysql_file_sync(log_file.file, MYF(MY_WME));
3663 DBUG_SUICIDE(););
3664 if (write_event(&ev))
3665 goto err;
3666 bytes_written+= ev.data_written;
3667 }
3668 }
3669 if (description_event_for_queue &&
3670 description_event_for_queue->binlog_version>=4)
3671 {
3672 /*
3673 This is a relay log written to by the I/O slave thread.
3674 Write the event so that others can later know the format of this relay
3675 log.
3676 Note that this event is very close to the original event from the
3677 master (it has binlog version of the master, event types of the
3678 master), so this is suitable to parse the next relay log's event. It
3679 has been produced by
3680 Format_description_log_event::Format_description_log_event(char* buf,).
3681 Why don't we want to write the description_event_for_queue if this
3682 event is for format<4 (3.23 or 4.x): this is because in that case, the
3683 description_event_for_queue describes the data received from the
3684 master, but not the data written to the relay log (*conversion*),
3685 which is in format 4 (slave's).
3686 */
3687 /*
3688 Set 'created' to 0, so that in next relay logs this event does not
3689 trigger cleaning actions on the slave in
3690 Format_description_log_event::apply_event_impl().
3691 */
3692 description_event_for_queue->created= 0;
3693 /* Don't set log_pos in event header */
3694 description_event_for_queue->set_artificial_event();
3695
3696 if (write_event(description_event_for_queue))
3697 goto err;
3698 bytes_written+= description_event_for_queue->data_written;
3699 }
3700 if (flush_io_cache(&log_file) ||
3701 mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3702 goto err;
3703
3704 my_off_t offset= my_b_tell(&log_file);
3705
3706 if (!is_relay_log)
3707 {
3708 /* update binlog_end_pos so that it can be read by after sync hook */
3709 reset_binlog_end_pos(log_file_name, offset);
3710
3711 mysql_mutex_lock(&LOCK_commit_ordered);
3712 strmake_buf(last_commit_pos_file, log_file_name);
3713 last_commit_pos_offset= offset;
3714 mysql_mutex_unlock(&LOCK_commit_ordered);
3715 }
3716
3717 if (write_file_name_to_index_file)
3718 {
3719#ifdef HAVE_REPLICATION
3720#ifdef ENABLED_DEBUG_SYNC
3721 if (current_thd)
3722 DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3723#endif
3724 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3725#endif
3726
3727 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3728 reinit_io_cache(&index_file, WRITE_CACHE,
3729 my_b_filelength(&index_file), 0, 0);
3730 /*
3731 As this is a new log file, we write the file name to the index
3732 file. As every time we write to the index file, we sync it.
3733 */
3734 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3735 my_b_write(&index_file, (uchar*) log_file_name,
3736 strlen(log_file_name)) ||
3737 my_b_write(&index_file, (uchar*) "\n", 1) ||
3738 flush_io_cache(&index_file) ||
3739 mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3740 goto err;
3741
3742#ifdef HAVE_REPLICATION
3743 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3744#endif
3745 }
3746 }
3747
3748 if (!is_relay_log)
3749 {
3750 /*
3751 Now the file was created successfully, so we can link in the entry for
3752 the new binlog file in binlog_xid_count_list.
3753 */
3754 mysql_mutex_lock(&LOCK_xid_list);
3755 ++current_binlog_id;
3756 new_xid_list_entry->binlog_id= current_binlog_id;
3757 /* Remove any initial entries with no pending XIDs. */
3758 while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3759 {
3760 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3761 "%s (%lu)", b);
3762 my_free(binlog_xid_count_list.get());
3763 }
3764 mysql_cond_broadcast(&COND_xid_list);
3765 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3766 "%s (%lu)", new_xid_list_entry);
3767 binlog_xid_count_list.push_back(new_xid_list_entry);
3768 mysql_mutex_unlock(&LOCK_xid_list);
3769
3770 /*
3771 Now that we have synced a new binlog file with an initial Gtid_list
3772 event, it is safe to delete the binlog state file. We will write out
3773 a new, updated file at shutdown, and if we crash before we can recover
3774 the state from the newly written binlog file.
3775
3776 Since the state file will contain out-of-date data as soon as the first
3777 new GTID is binlogged, it is better to remove it, to avoid any risk of
3778 accidentally reading incorrect data later.
3779 */
3780 if (!state_file_deleted)
3781 {
3782 char buf[FN_REFLEN];
3783 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3784 MY_UNPACK_FILENAME);
3785 my_delete(buf, MY_SYNC_DIR);
3786 state_file_deleted= true;
3787 }
3788 }
3789
3790 log_state= LOG_OPENED;
3791
3792#ifdef HAVE_REPLICATION
3793 close_purge_index_file();
3794#endif
3795
3796 /* Notify the io thread that binlog is rotated to a new file */
3797 if (is_relay_log)
3798 signal_relay_log_update();
3799 else
3800 update_binlog_end_pos();
3801 DBUG_RETURN(0);
3802
3803err:
3804 int tmp_errno= errno;
3805#ifdef HAVE_REPLICATION
3806 if (is_inited_purge_index_file())
3807 purge_index_entry(NULL, NULL, need_mutex);
3808 close_purge_index_file();
3809#endif
3810 sql_print_error(fatal_log_error, name, tmp_errno);
3811 if (new_xid_list_entry)
3812 my_free(new_xid_list_entry);
3813 if (file >= 0)
3814 mysql_file_close(file, MYF(0));
3815 close(LOG_CLOSE_INDEX);
3816 DBUG_RETURN(1);
3817}
3818
3819
3820int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3821{
3822 mysql_mutex_lock(&LOCK_log);
3823 int ret = raw_get_current_log(linfo);
3824 mysql_mutex_unlock(&LOCK_log);
3825 return ret;
3826}
3827
3828int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3829{
3830 mysql_mutex_assert_owner(&LOCK_log);
3831 strmake_buf(linfo->log_file_name, log_file_name);
3832 linfo->pos = my_b_tell(&log_file);
3833 return 0;
3834}
3835
3836/**
3837 Move all data up in a file in an filename index file.
3838
3839 We do the copy outside of the IO_CACHE as the cache buffers would just
3840 make things slower and more complicated.
3841 In most cases the copy loop should only do one read.
3842
3843 @param index_file File to move
3844 @param offset Move everything from here to beginning
3845
3846 @note
3847 File will be truncated to be 'offset' shorter or filled up with newlines
3848
3849 @retval
3850 0 ok
3851*/
3852
3853#ifdef HAVE_REPLICATION
3854
3855static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
3856{
3857 int bytes_read;
3858 my_off_t init_offset= offset;
3859 File file= index_file->file;
3860 uchar io_buf[IO_SIZE*2];
3861 DBUG_ENTER("copy_up_file_and_fill");
3862
3863 for (;; offset+= bytes_read)
3864 {
3865 mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
3866 if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
3867 MYF(MY_WME)))
3868 < 0)
3869 goto err;
3870 if (!bytes_read)
3871 break; // end of file
3872 mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
3873 if (mysql_file_write(file, io_buf, bytes_read,
3874 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3875 goto err;
3876 }
3877 /* The following will either truncate the file or fill the end with \n' */
3878 if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
3879 mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3880 goto err;
3881
3882 /* Reset data in old index cache */
3883 reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
3884 DBUG_RETURN(0);
3885
3886err:
3887 DBUG_RETURN(1);
3888}
3889
3890#endif /* HAVE_REPLICATION */
3891
3892/**
3893 Find the position in the log-index-file for the given log name.
3894
3895 @param linfo Store here the found log file name and position to
3896 the NEXT log file name in the index file.
3897 @param log_name Filename to find in the index file.
3898 Is a null pointer if we want to read the first entry
3899 @param need_lock Set this to 1 if the parent doesn't already have a
3900 lock on LOCK_index
3901
3902 @note
3903 On systems without the truncate function the file will end with one or
3904 more empty lines. These will be ignored when reading the file.
3905
3906 @retval
3907 0 ok
3908 @retval
3909 LOG_INFO_EOF End of log-index-file found
3910 @retval
3911 LOG_INFO_IO Got IO error while reading file
3912*/
3913
3914int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3915 bool need_lock)
3916{
3917 int error= 0;
3918 char *full_fname= linfo->log_file_name;
3919 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3920 uint log_name_len= 0, fname_len= 0;
3921 DBUG_ENTER("find_log_pos");
3922 full_log_name[0]= full_fname[0]= 0;
3923
3924 /*
3925 Mutex needed because we need to make sure the file pointer does not
3926 move from under our feet
3927 */
3928 if (need_lock)
3929 mysql_mutex_lock(&LOCK_index);
3930 mysql_mutex_assert_owner(&LOCK_index);
3931
3932 // extend relative paths for log_name to be searched
3933 if (log_name)
3934 {
3935 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3936 {
3937 error= LOG_INFO_EOF;
3938 goto end;
3939 }
3940 }
3941
3942 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3943 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3944 log_name ? log_name : "NULL", full_log_name));
3945
3946 /* As the file is flushed, we can't get an error here */
3947 (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
3948
3949 for (;;)
3950 {
3951 size_t length;
3952 my_off_t offset= my_b_tell(&index_file);
3953
3954 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3955 error= LOG_INFO_EOF; break;);
3956 /* If we get 0 or 1 characters, this is the end of the file */
3957 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3958 {
3959 /* Did not find the given entry; Return not found or error */
3960 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3961 break;
3962 }
3963 if (fname[length-1] != '\n')
3964 continue; // Not a log entry
3965 fname[length-1]= 0; // Remove end \n
3966
3967 // extend relative paths and match against full path
3968 if (normalize_binlog_name(full_fname, fname, is_relay_log))
3969 {
3970 error= LOG_INFO_EOF;
3971 break;
3972 }
3973 fname_len= (uint) strlen(full_fname);
3974
3975 // if the log entry matches, null string matching anything
3976 if (!log_name ||
3977 (log_name_len == fname_len &&
3978 !memcmp(full_fname, full_log_name, log_name_len)))
3979 {
3980 DBUG_PRINT("info", ("Found log file entry"));
3981 linfo->index_file_start_offset= offset;
3982 linfo->index_file_offset = my_b_tell(&index_file);
3983 break;
3984 }
3985 }
3986
3987end:
3988 if (need_lock)
3989 mysql_mutex_unlock(&LOCK_index);
3990 DBUG_RETURN(error);
3991}
3992
3993
3994/**
3995 Find the position in the log-index-file for the given log name.
3996
3997 @param
3998 linfo Store here the next log file name and position to
3999 the file name after that.
4000 @param
4001 need_lock Set this to 1 if the parent doesn't already have a
4002 lock on LOCK_index
4003
4004 @note
4005 - Before calling this function, one has to call find_log_pos()
4006 to set up 'linfo'
4007 - Mutex needed because we need to make sure the file pointer does not move
4008 from under our feet
4009
4010 @retval
4011 0 ok
4012 @retval
4013 LOG_INFO_EOF End of log-index-file found
4014 @retval
4015 LOG_INFO_IO Got IO error while reading file
4016*/
4017
4018int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
4019{
4020 int error= 0;
4021 size_t length;
4022 char fname[FN_REFLEN];
4023 char *full_fname= linfo->log_file_name;
4024
4025 if (need_lock)
4026 mysql_mutex_lock(&LOCK_index);
4027 mysql_mutex_assert_owner(&LOCK_index);
4028
4029 /* As the file is flushed, we can't get an error here */
4030 (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4031 0);
4032
4033 linfo->index_file_start_offset= linfo->index_file_offset;
4034 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4035 {
4036 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4037 goto err;
4038 }
4039
4040 if (fname[0] != 0)
4041 {
4042 if(normalize_binlog_name(full_fname, fname, is_relay_log))
4043 {
4044 error= LOG_INFO_EOF;
4045 goto err;
4046 }
4047 length= strlen(full_fname);
4048 }
4049
4050 full_fname[length-1]= 0; // kill \n
4051 linfo->index_file_offset= my_b_tell(&index_file);
4052
4053err:
4054 if (need_lock)
4055 mysql_mutex_unlock(&LOCK_index);
4056 return error;
4057}
4058
4059
4060/**
4061 Delete all logs refered to in the index file.
4062
4063 The new index file will only contain this file.
4064
4065 @param thd Thread id. This can be zero in case of resetting
4066 relay logs
4067 @param create_new_log 1 if we should start writing to a new log file
4068 @param next_log_number min number of next log file to use, if possible.
4069
4070 @note
4071 If not called from slave thread, write start event to new log
4072
4073 @retval
4074 0 ok
4075 @retval
4076 1 error
4077*/
4078
4079bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4080 rpl_gtid *init_state, uint32 init_state_len,
4081 ulong next_log_number)
4082{
4083 LOG_INFO linfo;
4084 bool error=0;
4085 int err;
4086 const char* save_name;
4087 DBUG_ENTER("reset_logs");
4088
4089 if (!is_relay_log)
4090 {
4091 if (init_state && !is_empty_state())
4092 {
4093 my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4094 DBUG_RETURN(1);
4095 }
4096
4097 /*
4098 Mark that a RESET MASTER is in progress.
4099 This ensures that a binlog checkpoint will not try to write binlog
4100 checkpoint events, which would be useless (as we are deleting the binlog
4101 anyway) and could deadlock, as we are holding LOCK_log.
4102
4103 Wait for any mark_xid_done() calls that might be already running to
4104 complete (mark_xid_done_waiting counter to drop to zero); we need to
4105 do this before we take the LOCK_log to not deadlock.
4106 */
4107 mysql_mutex_lock(&LOCK_xid_list);
4108 reset_master_pending++;
4109 while (mark_xid_done_waiting > 0)
4110 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4111 mysql_mutex_unlock(&LOCK_xid_list);
4112 }
4113
4114 DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4115 /*
4116 We need to get both locks to be sure that no one is trying to
4117 write to the index log file.
4118 */
4119 mysql_mutex_lock(&LOCK_log);
4120 mysql_mutex_lock(&LOCK_index);
4121
4122 if (!is_relay_log)
4123 {
4124 /*
4125 We are going to nuke all binary log files.
4126 Without binlog, we cannot XA recover prepared-but-not-committed
4127 transactions in engines. So force a commit checkpoint first.
4128
4129 Note that we take and immediately
4130 release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4131 the effect to ensure that any on-going group commit (in
4132 trx_group_commit_leader()) has completed before we request the checkpoint,
4133 due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4134 (We are holding LOCK_log, so no new group commit can start).
4135
4136 Without this, it is possible (though perhaps unlikely) that the RESET
4137 MASTER could run in-between the write to the binlog and the
4138 commit_ordered() in the engine of some transaction, and then a crash
4139 later would leave such transaction not recoverable.
4140 */
4141
4142 mysql_mutex_lock(&LOCK_after_binlog_sync);
4143 mysql_mutex_lock(&LOCK_commit_ordered);
4144 mysql_mutex_unlock(&LOCK_after_binlog_sync);
4145 mysql_mutex_unlock(&LOCK_commit_ordered);
4146
4147 mark_xids_active(current_binlog_id, 1);
4148 do_checkpoint_request(current_binlog_id);
4149
4150 /* Now wait for all checkpoint requests and pending unlog() to complete. */
4151 mysql_mutex_lock(&LOCK_xid_list);
4152 for (;;)
4153 {
4154 if (is_xidlist_idle_nolock())
4155 break;
4156 /*
4157 Wait until signalled that one more binlog dropped to zero, then check
4158 again.
4159 */
4160 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4161 }
4162
4163 /*
4164 Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4165 no new ones will be written. So we can proceed to delete the logs.
4166 */
4167 mysql_mutex_unlock(&LOCK_xid_list);
4168 }
4169
4170 /* Save variables so that we can reopen the log */
4171 save_name=name;
4172 name=0; // Protect against free
4173 close(LOG_CLOSE_TO_BE_OPENED);
4174
4175 /*
4176 First delete all old log files and then update the index file.
4177 As we first delete the log files and do not use sort of logging,
4178 a crash may lead to an inconsistent state where the index has
4179 references to non-existent files.
4180
4181 We need to invert the steps and use the purge_index_file methods
4182 in order to make the operation safe.
4183 */
4184
4185 if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4186 {
4187 uint errcode= purge_log_get_error_code(err);
4188 sql_print_error("Failed to locate old binlog or relay log files");
4189 my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4190 error= 1;
4191 goto err;
4192 }
4193
4194 for (;;)
4195 {
4196 if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4197 {
4198 if (my_errno == ENOENT)
4199 {
4200 if (thd)
4201 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4202 ER_LOG_PURGE_NO_FILE,
4203 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4204 linfo.log_file_name);
4205
4206 sql_print_information("Failed to delete file '%s'",
4207 linfo.log_file_name);
4208 my_errno= 0;
4209 error= 0;
4210 }
4211 else
4212 {
4213 if (thd)
4214 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4215 ER_BINLOG_PURGE_FATAL_ERR,
4216 "a problem with deleting %s; "
4217 "consider examining correspondence "
4218 "of your binlog index file "
4219 "to the actual binlog files",
4220 linfo.log_file_name);
4221 error= 1;
4222 goto err;
4223 }
4224 }
4225 if (find_next_log(&linfo, 0))
4226 break;
4227 }
4228
4229 if (!is_relay_log)
4230 {
4231 if (init_state)
4232 rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4233 else
4234 rpl_global_gtid_binlog_state.reset();
4235 }
4236
4237 /* Start logging with a new file */
4238 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4239 // Reset (open will update)
4240 if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4241 {
4242 if (my_errno == ENOENT)
4243 {
4244 if (thd)
4245 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4246 ER_LOG_PURGE_NO_FILE,
4247 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4248 index_file_name);
4249 sql_print_information("Failed to delete file '%s'",
4250 index_file_name);
4251 my_errno= 0;
4252 error= 0;
4253 }
4254 else
4255 {
4256 if (thd)
4257 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4258 ER_BINLOG_PURGE_FATAL_ERR,
4259 "a problem with deleting %s; "
4260 "consider examining correspondence "
4261 "of your binlog index file "
4262 "to the actual binlog files",
4263 index_file_name);
4264 error= 1;
4265 goto err;
4266 }
4267 }
4268 if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4269 if (unlikely((error= open(save_name, log_type, 0, next_log_number,
4270 io_cache_type, max_size, 0, FALSE))))
4271 goto err;
4272 my_free((void *) save_name);
4273
4274err:
4275 if (error == 1)
4276 name= const_cast<char*>(save_name);
4277
4278 if (!is_relay_log)
4279 {
4280 xid_count_per_binlog *b;
4281 /*
4282 Remove all entries in the xid_count list except the last.
4283 Normally we will just be deleting all the entries that we waited for to
4284 drop to zero above. But if we fail during RESET MASTER for some reason
4285 then we will not have created any new log file, and we may keep the last
4286 of the old entries.
4287 */
4288 mysql_mutex_lock(&LOCK_xid_list);
4289 for (;;)
4290 {
4291 b= binlog_xid_count_list.head();
4292 DBUG_ASSERT(b /* List can never become empty. */);
4293 if (b->binlog_id == current_binlog_id)
4294 break;
4295 DBUG_ASSERT(b->xid_count == 0);
4296 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4297 "xid_list_entry for %s (%lu)", b);
4298 my_free(binlog_xid_count_list.get());
4299 }
4300 mysql_cond_broadcast(&COND_xid_list);
4301 reset_master_pending--;
4302 mysql_mutex_unlock(&LOCK_xid_list);
4303 }
4304
4305 mysql_mutex_unlock(&LOCK_index);
4306 mysql_mutex_unlock(&LOCK_log);
4307 DBUG_RETURN(error);
4308}
4309
4310
4311void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4312{
4313 mysql_mutex_lock(&LOCK_xid_list);
4314 for (;;)
4315 {
4316 if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4317 break;
4318 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4319 }
4320 mysql_mutex_unlock(&LOCK_xid_list);
4321
4322 /*
4323 LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4324 obtained after mark_xid_done() has written the last checkpoint event.
4325 */
4326 mysql_mutex_lock(&LOCK_log);
4327 mysql_mutex_unlock(&LOCK_log);
4328}
4329
4330
4331/**
4332 Delete relay log files prior to rli->group_relay_log_name
4333 (i.e. all logs which are not involved in a non-finished group
4334 (transaction)), remove them from the index file and start on next
4335 relay log.
4336
4337 IMPLEMENTATION
4338
4339 - You must hold rli->data_lock before calling this function, since
4340 it writes group_relay_log_pos and similar fields of
4341 Relay_log_info.
4342 - Protects index file with LOCK_index
4343 - Delete relevant relay log files
4344 - Copy all file names after these ones to the front of the index file
4345 - If the OS has truncate, truncate the file, else fill it with \n'
4346 - Read the next file name from the index file and store in rli->linfo
4347
4348 @param rli Relay log information
4349 @param included If false, all relay logs that are strictly before
4350 rli->group_relay_log_name are deleted ; if true, the
4351 latter is deleted too (i.e. all relay logs
4352 read by the SQL slave thread are deleted).
4353
4354 @note
4355 - This is only called from the slave SQL thread when it has read
4356 all commands from a relay log and want to switch to a new relay log.
4357 - When this happens, we can be in an active transaction as
4358 a transaction can span over two relay logs
4359 (although it is always written as a single block to the master's binary
4360 log, hence cannot span over two master's binary logs).
4361
4362 @retval
4363 0 ok
4364 @retval
4365 LOG_INFO_EOF End of log-index-file found
4366 @retval
4367 LOG_INFO_SEEK Could not allocate IO cache
4368 @retval
4369 LOG_INFO_IO Got IO error while reading file
4370*/
4371
4372#ifdef HAVE_REPLICATION
4373
4374int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4375{
4376 int error, errcode;
4377 char *to_purge_if_included= NULL;
4378 inuse_relaylog *ir;
4379 ulonglong log_space_reclaimed= 0;
4380 DBUG_ENTER("purge_first_log");
4381
4382 DBUG_ASSERT(is_open());
4383 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4384 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4385
4386 mysql_mutex_assert_owner(&rli->data_lock);
4387
4388 mysql_mutex_lock(&LOCK_index);
4389
4390 ir= rli->inuse_relaylog_list;
4391 while (ir)
4392 {
4393 inuse_relaylog *next= ir->next;
4394 if (!ir->completed || ir->dequeued_count < ir->queued_count)
4395 {
4396 included= false;
4397 break;
4398 }
4399 if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4400 break;
4401 if (!next)
4402 {
4403 rli->last_inuse_relaylog= NULL;
4404 included= 1;
4405 to_purge_if_included= my_strdup(ir->name, MYF(0));
4406 }
4407 rli->free_inuse_relaylog(ir);
4408 ir= next;
4409 }
4410 rli->inuse_relaylog_list= ir;
4411 if (ir)
4412 to_purge_if_included= my_strdup(ir->name, MYF(0));
4413
4414 /*
4415 Read the next log file name from the index file and pass it back to
4416 the caller.
4417 */
4418 if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4419 0))) ||
4420 unlikely((error=find_next_log(&rli->linfo, 0))))
4421 {
4422 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4423 error, rli->linfo.index_file_offset,
4424 rli->event_relay_log_name, included);
4425 goto err;
4426 }
4427
4428 /*
4429 Reset rli's coordinates to the current log.
4430 */
4431 rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4432 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4433
4434 /*
4435 If we removed the rli->group_relay_log_name file,
4436 we must update the rli->group* coordinates, otherwise do not touch it as the
4437 group's execution is not finished (e.g. COMMIT not executed)
4438 */
4439 if (included)
4440 {
4441 rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4442 strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4443 rli->notify_group_relay_log_name_update();
4444 }
4445
4446 /* Store where we are in the new file for the execution thread */
4447 if (rli->flush())
4448 error= LOG_INFO_IO;
4449
4450 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4451
4452 rli->relay_log.purge_logs(to_purge_if_included, included,
4453 0, 0, &log_space_reclaimed);
4454
4455 mysql_mutex_lock(&rli->log_space_lock);
4456 rli->log_space_total-= log_space_reclaimed;
4457 mysql_cond_broadcast(&rli->log_space_cond);
4458 mysql_mutex_unlock(&rli->log_space_lock);
4459
4460 /*
4461 * Need to update the log pos because purge logs has been called
4462 * after fetching initially the log pos at the beginning of the method.
4463 */
4464 if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4465 {
4466 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4467 errcode, rli->linfo.index_file_offset,
4468 rli->group_relay_log_name, included);
4469 goto err;
4470 }
4471
4472 /* If included was passed, rli->linfo should be the first entry. */
4473 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4474
4475err:
4476 my_free(to_purge_if_included);
4477 mysql_mutex_unlock(&LOCK_index);
4478 DBUG_RETURN(error);
4479}
4480
4481/**
4482 Update log index_file.
4483*/
4484
4485int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4486{
4487 if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4488 return LOG_INFO_IO;
4489
4490 // now update offsets in index file for running threads
4491 if (need_update_threads)
4492 adjust_linfo_offsets(log_info->index_file_start_offset);
4493 return 0;
4494}
4495
4496/**
4497 Remove all logs before the given log from disk and from the index file.
4498
4499 @param to_log Delete all log file name before this file.
4500 @param included If true, to_log is deleted too.
4501 @param need_mutex
4502 @param need_update_threads If we want to update the log coordinates of
4503 all threads. False for relay logs, true otherwise.
4504 @param reclaimeed_log_space If not null, increment this variable to
4505 the amount of log space freed
4506
4507 @note
4508 If any of the logs before the deleted one is in use,
4509 only purge logs up to this one.
4510
4511 @retval
4512 0 ok
4513 @retval
4514 LOG_INFO_EOF to_log not found
4515 LOG_INFO_EMFILE too many files opened
4516 LOG_INFO_FATAL if any other than ENOENT error from
4517 mysql_file_stat() or mysql_file_delete()
4518*/
4519
4520int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4521 bool included,
4522 bool need_mutex,
4523 bool need_update_threads,
4524 ulonglong *reclaimed_space)
4525{
4526 int error= 0;
4527 bool exit_loop= 0;
4528 LOG_INFO log_info;
4529 THD *thd= current_thd;
4530 DBUG_ENTER("purge_logs");
4531 DBUG_PRINT("info",("to_log= %s",to_log));
4532
4533 if (need_mutex)
4534 mysql_mutex_lock(&LOCK_index);
4535 if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4536 {
4537 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4538 "listed in the index.", to_log);
4539 goto err;
4540 }
4541
4542 if (unlikely((error= open_purge_index_file(TRUE))))
4543 {
4544 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4545 goto err;
4546 }
4547
4548 /*
4549 File name exists in index file; delete until we find this file
4550 or a file that is used.
4551 */
4552 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4553 goto err;
4554 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4555 can_purge_log(log_info.log_file_name))
4556 {
4557 if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4558 {
4559 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4560 log_info.log_file_name);
4561 goto err;
4562 }
4563
4564 if (find_next_log(&log_info, 0) || exit_loop)
4565 break;
4566 }
4567
4568 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4569
4570 if (unlikely((error= sync_purge_index_file())))
4571 {
4572 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4573 goto err;
4574 }
4575
4576 /* We know how many files to delete. Update index file. */
4577 if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4578 {
4579 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4580 goto err;
4581 }
4582
4583 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4584
4585err:
4586 /* Read each entry from purge_index_file and delete the file. */
4587 if (is_inited_purge_index_file() &&
4588 (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4589 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4590 " that would be purged.");
4591 close_purge_index_file();
4592
4593 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4594
4595 if (need_mutex)
4596 mysql_mutex_unlock(&LOCK_index);
4597 DBUG_RETURN(error);
4598}
4599
4600int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4601{
4602 int error= 0;
4603 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4604 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4605 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4606 MY_REPLACE_EXT)) == NULL)
4607 {
4608 error= 1;
4609 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4610 "file name.");
4611 }
4612 DBUG_RETURN(error);
4613}
4614
4615int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4616{
4617 int error= 0;
4618 File file= -1;
4619
4620 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4621
4622 if (destroy)
4623 close_purge_index_file();
4624
4625 if (!my_b_inited(&purge_index_file))
4626 {
4627 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4628 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4629 init_io_cache(&purge_index_file, file, IO_SIZE,
4630 (destroy ? WRITE_CACHE : READ_CACHE),
4631 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4632 {
4633 error= 1;
4634 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4635 " file.");
4636 }
4637 }
4638 DBUG_RETURN(error);
4639}
4640
4641int MYSQL_BIN_LOG::close_purge_index_file()
4642{
4643 int error= 0;
4644
4645 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4646
4647 if (my_b_inited(&purge_index_file))
4648 {
4649 end_io_cache(&purge_index_file);
4650 error= my_close(purge_index_file.file, MYF(0));
4651 }
4652 my_delete(purge_index_file_name, MYF(0));
4653 bzero((char*) &purge_index_file, sizeof(purge_index_file));
4654
4655 DBUG_RETURN(error);
4656}
4657
4658bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4659{
4660 return my_b_inited(&purge_index_file);
4661}
4662
4663int MYSQL_BIN_LOG::sync_purge_index_file()
4664{
4665 int error= 0;
4666 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4667
4668 if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4669 unlikely((error= my_sync(purge_index_file.file,
4670 MYF(MY_WME | MY_SYNC_FILESIZE)))))
4671 DBUG_RETURN(error);
4672
4673 DBUG_RETURN(error);
4674}
4675
4676int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4677{
4678 int error= 0;
4679 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4680
4681 if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4682 strlen(entry)))) ||
4683 unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4684 DBUG_RETURN (error);
4685
4686 DBUG_RETURN(error);
4687}
4688
4689int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4690{
4691 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4692 DBUG_RETURN(register_purge_index_entry(entry));
4693}
4694
4695int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4696 bool need_mutex)
4697{
4698 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4699 MY_STAT s;
4700 int error= 0;
4701 LOG_INFO log_info;
4702 LOG_INFO check_log_info;
4703
4704 DBUG_ASSERT(my_b_inited(&purge_index_file));
4705
4706 if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4707 0))))
4708 {
4709 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4710 "for read");
4711 goto err;
4712 }
4713
4714 for (;;)
4715 {
4716 size_t length;
4717
4718 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4719 FN_REFLEN)) <= 1)
4720 {
4721 if (purge_index_file.error)
4722 {
4723 error= purge_index_file.error;
4724 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4725 "register file.", error);
4726 goto err;
4727 }
4728
4729 /* Reached EOF */
4730 break;
4731 }
4732
4733 /* Get rid of the trailing '\n' */
4734 log_info.log_file_name[length-1]= 0;
4735
4736 if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4737 MYF(0))))
4738 {
4739 if (my_errno == ENOENT)
4740 {
4741 /*
4742 It's not fatal if we can't stat a log file that does not exist;
4743 If we could not stat, we won't delete.
4744 */
4745 if (thd)
4746 {
4747 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4748 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4749 log_info.log_file_name);
4750 }
4751 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4752 log_info.log_file_name);
4753 my_errno= 0;
4754 }
4755 else
4756 {
4757 /*
4758 Other than ENOENT are fatal
4759 */
4760 if (thd)
4761 {
4762 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4763 ER_BINLOG_PURGE_FATAL_ERR,
4764 "a problem with getting info on being purged %s; "
4765 "consider examining correspondence "
4766 "of your binlog index file "
4767 "to the actual binlog files",
4768 log_info.log_file_name);
4769 }
4770 else
4771 {
4772 sql_print_information("Failed to delete log file '%s'; "
4773 "consider examining correspondence "
4774 "of your binlog index file "
4775 "to the actual binlog files",
4776 log_info.log_file_name);
4777 }
4778 error= LOG_INFO_FATAL;
4779 goto err;
4780 }
4781 }
4782 else
4783 {
4784 if (unlikely((error= find_log_pos(&check_log_info,
4785 log_info.log_file_name, need_mutex))))
4786 {
4787 if (error != LOG_INFO_EOF)
4788 {
4789 if (thd)
4790 {
4791 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4792 ER_BINLOG_PURGE_FATAL_ERR,
4793 "a problem with deleting %s and "
4794 "reading the binlog index file",
4795 log_info.log_file_name);
4796 }
4797 else
4798 {
4799 sql_print_information("Failed to delete file '%s' and "
4800 "read the binlog index file",
4801 log_info.log_file_name);
4802 }
4803 goto err;
4804 }
4805
4806 error= 0;
4807
4808 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4809 if (!my_delete(log_info.log_file_name, MYF(0)))
4810 {
4811 if (reclaimed_space)
4812 *reclaimed_space+= s.st_size;
4813 }
4814 else
4815 {
4816 if (my_errno == ENOENT)
4817 {
4818 if (thd)
4819 {
4820 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4821 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4822 log_info.log_file_name);
4823 }
4824 sql_print_information("Failed to delete file '%s'",
4825 log_info.log_file_name);
4826 my_errno= 0;
4827 }
4828 else
4829 {
4830 if (thd)
4831 {
4832 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4833 ER_BINLOG_PURGE_FATAL_ERR,
4834 "a problem with deleting %s; "
4835 "consider examining correspondence "
4836 "of your binlog index file "
4837 "to the actual binlog files",
4838 log_info.log_file_name);
4839 }
4840 else
4841 {
4842 sql_print_information("Failed to delete file '%s'; "
4843 "consider examining correspondence "
4844 "of your binlog index file "
4845 "to the actual binlog files",
4846 log_info.log_file_name);
4847 }
4848 if (my_errno == EMFILE)
4849 {
4850 DBUG_PRINT("info",
4851 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4852 error= LOG_INFO_EMFILE;
4853 goto err;
4854 }
4855 error= LOG_INFO_FATAL;
4856 goto err;
4857 }
4858 }
4859 }
4860 }
4861 }
4862
4863err:
4864 DBUG_RETURN(error);
4865}
4866
4867/**
4868 Remove all logs before the given file date from disk and from the
4869 index file.
4870
4871 @param thd Thread pointer
4872 @param purge_time Delete all log files before given date.
4873
4874 @note
4875 If any of the logs before the deleted one is in use,
4876 only purge logs up to this one.
4877
4878 @retval
4879 0 ok
4880 @retval
4881 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
4882 LOG_INFO_FATAL if any other than ENOENT error from
4883 mysql_file_stat() or mysql_file_delete()
4884*/
4885
4886int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
4887{
4888 int error;
4889 char to_log[FN_REFLEN];
4890 LOG_INFO log_info;
4891 MY_STAT stat_area;
4892 THD *thd= current_thd;
4893 DBUG_ENTER("purge_logs_before_date");
4894
4895 mysql_mutex_lock(&LOCK_index);
4896 to_log[0]= 0;
4897
4898 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4899 goto err;
4900
4901 while (strcmp(log_file_name, log_info.log_file_name) &&
4902 can_purge_log(log_info.log_file_name))
4903 {
4904 if (!mysql_file_stat(m_key_file_log,
4905 log_info.log_file_name, &stat_area, MYF(0)))
4906 {
4907 if (my_errno == ENOENT)
4908 {
4909 /*
4910 It's not fatal if we can't stat a log file that does not exist.
4911 */
4912 my_errno= 0;
4913 }
4914 else
4915 {
4916 /*
4917 Other than ENOENT are fatal
4918 */
4919 if (thd)
4920 {
4921 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4922 ER_BINLOG_PURGE_FATAL_ERR,
4923 "a problem with getting info on being purged %s; "
4924 "consider examining correspondence "
4925 "of your binlog index file "
4926 "to the actual binlog files",
4927 log_info.log_file_name);
4928 }
4929 else
4930 {
4931 sql_print_information("Failed to delete log file '%s'",
4932 log_info.log_file_name);
4933 }
4934 error= LOG_INFO_FATAL;
4935 goto err;
4936 }
4937 }
4938 else
4939 {
4940 if (stat_area.st_mtime < purge_time)
4941 strmake_buf(to_log, log_info.log_file_name);
4942 else
4943 break;
4944 }
4945 if (find_next_log(&log_info, 0))
4946 break;
4947 }
4948
4949 error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
4950
4951err:
4952 mysql_mutex_unlock(&LOCK_index);
4953 DBUG_RETURN(error);
4954}
4955
4956
4957bool
4958MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
4959{
4960 xid_count_per_binlog *b;
4961
4962 if (is_active(log_file_name_arg))
4963 return false;
4964 mysql_mutex_lock(&LOCK_xid_list);
4965 {
4966 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4967 while ((b= it++) &&
4968 0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
4969 b->binlog_name, b->binlog_name_len))
4970 ;
4971 }
4972 mysql_mutex_unlock(&LOCK_xid_list);
4973 if (b)
4974 return false;
4975 return !log_in_use(log_file_name_arg);
4976}
4977#endif /* HAVE_REPLICATION */
4978
4979
4980bool
4981MYSQL_BIN_LOG::is_xidlist_idle()
4982{
4983 bool res;
4984 mysql_mutex_lock(&LOCK_xid_list);
4985 res= is_xidlist_idle_nolock();
4986 mysql_mutex_unlock(&LOCK_xid_list);
4987 return res;
4988}
4989
4990
4991bool
4992MYSQL_BIN_LOG::is_xidlist_idle_nolock()
4993{
4994 xid_count_per_binlog *b;
4995
4996 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4997 while ((b= it++))
4998 {
4999 if (b->xid_count > 0)
5000 return false;
5001 }
5002 return true;
5003}
5004
5005#ifdef WITH_WSREP
5006inline bool
5007is_gtid_cached_internal(IO_CACHE *file)
5008{
5009 uchar data[EVENT_TYPE_OFFSET+1];
5010 bool result= false;
5011 my_off_t write_pos= my_b_tell(file);
5012 if (reinit_io_cache(file, READ_CACHE, 0, 0, 0))
5013 return false;
5014 /*
5015 In the cache we have gtid event if , below condition is true,
5016 */
5017 my_b_read(file, data, sizeof(data));
5018 uint event_type= (uchar)data[EVENT_TYPE_OFFSET];
5019 if (event_type == GTID_LOG_EVENT)
5020 result= true;
5021 /*
5022 Cleanup , Why because we have not read the full buffer
5023 and this will cause next to next reinit_io_cache(called in write_cache)
5024 to make cache empty.
5025 */
5026 file->read_pos= file->read_end;
5027 if (reinit_io_cache(file, WRITE_CACHE, write_pos, 0, 0))
5028 return false;
5029 return result;
5030}
5031#endif
5032
5033#ifdef WITH_WSREP
5034inline bool
5035MYSQL_BIN_LOG::is_gtid_cached(THD *thd)
5036{
5037 binlog_cache_mngr *mngr= (binlog_cache_mngr *) thd_get_ha_data(
5038 thd, binlog_hton);
5039 if (!mngr)
5040 return false;
5041 binlog_cache_data *cache_trans= mngr->get_binlog_cache_data(
5042 use_trans_cache(thd, true));
5043 binlog_cache_data *cache_stmt= mngr->get_binlog_cache_data(
5044 use_trans_cache(thd, false));
5045 if (cache_trans && !cache_trans->empty() &&
5046 is_gtid_cached_internal(&cache_trans->cache_log))
5047 return true;
5048 if (cache_stmt && !cache_stmt->empty() &&
5049 is_gtid_cached_internal(&cache_stmt->cache_log))
5050 return true;
5051 return false;
5052}
5053#endif
5054/**
5055 Create a new log file name.
5056
5057 @param buf buf of at least FN_REFLEN where new name is stored
5058
5059 @note
5060 If file name will be longer then FN_REFLEN it will be truncated
5061*/
5062
5063void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5064{
5065 size_t dir_len = dirname_length(log_file_name);
5066 if (dir_len >= FN_REFLEN)
5067 dir_len=FN_REFLEN-1;
5068 strnmov(buf, log_file_name, dir_len);
5069 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5070}
5071
5072
5073/**
5074 Check if we are writing/reading to the given log file.
5075*/
5076
5077bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5078{
5079 /**
5080 * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5081 * but code violates this! (scary monsters and super creeps!)
5082 *
5083 * example stacktrace:
5084 * #8 MYSQL_BIN_LOG::is_active
5085 * #9 MYSQL_BIN_LOG::can_purge_log
5086 * #10 MYSQL_BIN_LOG::purge_logs
5087 * #11 MYSQL_BIN_LOG::purge_first_log
5088 * #12 next_event
5089 * #13 exec_relay_log_event
5090 *
5091 * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5092 */
5093 return !strcmp(log_file_name, log_file_name_arg);
5094}
5095
5096
5097/*
5098 Wrappers around new_file_impl to avoid using argument
5099 to control locking. The argument 1) less readable 2) breaks
5100 incapsulation 3) allows external access to the class without
5101 a lock (which is not possible with private new_file_without_locking
5102 method).
5103
5104 @retval
5105 nonzero - error
5106*/
5107
5108int MYSQL_BIN_LOG::new_file()
5109{
5110 return new_file_impl(1);
5111}
5112
5113/*
5114 @retval
5115 nonzero - error
5116 */
5117int MYSQL_BIN_LOG::new_file_without_locking()
5118{
5119 return new_file_impl(0);
5120}
5121
5122
5123/**
5124 Start writing to a new log file or reopen the old file.
5125
5126 @param need_lock Set to 1 if caller has not locked LOCK_log
5127
5128 @retval
5129 nonzero - error
5130
5131 @note
5132 The new file name is stored last in the index file
5133*/
5134
5135int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
5136{
5137 int error= 0, close_on_error= FALSE;
5138 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5139 uint close_flag;
5140 bool delay_close= false;
5141 File UNINIT_VAR(old_file);
5142 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5143
5144 if (need_lock)
5145 mysql_mutex_lock(&LOCK_log);
5146 mysql_mutex_assert_owner(&LOCK_log);
5147
5148 if (!is_open())
5149 {
5150 DBUG_PRINT("info",("log is closed"));
5151 mysql_mutex_unlock(&LOCK_log);
5152 DBUG_RETURN(error);
5153 }
5154
5155 mysql_mutex_lock(&LOCK_index);
5156
5157 /* Reuse old name if not binlog and not update log */
5158 new_name_ptr= name;
5159
5160 /*
5161 If user hasn't specified an extension, generate a new log name
5162 We have to do this here and not in open as we want to store the
5163 new file name in the current binary log file.
5164 */
5165 if (unlikely((error= generate_new_name(new_name, name, 0))))
5166 {
5167#ifdef ENABLE_AND_FIX_HANG
5168 close_on_error= TRUE;
5169#endif
5170 goto end;
5171 }
5172 new_name_ptr=new_name;
5173
5174 if (log_type == LOG_BIN)
5175 {
5176 {
5177 /*
5178 We log the whole file name for log file as the user may decide
5179 to change base names at some point.
5180 */
5181 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5182 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5183 /*
5184 The current relay-log's closing Rotate event must have checksum
5185 value computed with an algorithm of the last relay-logged FD event.
5186 */
5187 if (is_relay_log)
5188 r.checksum_alg= relay_log_checksum_alg;
5189 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5190 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5191 (error= write_event(&r)))
5192 {
5193 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5194 close_on_error= TRUE;
5195 my_printf_error(ER_ERROR_ON_WRITE,
5196 ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5197 MYF(ME_FATALERROR), name, errno);
5198 goto end;
5199 }
5200 bytes_written += r.data_written;
5201 }
5202 }
5203
5204 /*
5205 Update needs to be signalled even if there is no rotate event
5206 log rotation should give the waiting thread a signal to
5207 discover EOF and move on to the next log.
5208 */
5209 if (unlikely((error= flush_io_cache(&log_file))))
5210 {
5211 close_on_error= TRUE;
5212 goto end;
5213 }
5214 update_binlog_end_pos();
5215
5216 old_name=name;
5217 name=0; // Don't free name
5218 close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5219 if (!is_relay_log)
5220 {
5221 /*
5222 We need to keep the old binlog file open (and marked as in-use) until
5223 the new one is fully created and synced to disk and index. Otherwise we
5224 leave a window where if we crash, there is no binlog file marked as
5225 crashed for server restart to detect the need for recovery.
5226 */
5227 old_file= log_file.file;
5228 close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5229 delay_close= true;
5230 }
5231 close(close_flag);
5232 if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5233 {
5234 DBUG_ASSERT(!is_relay_log);
5235 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5236 binlog_checksum_options= checksum_alg_reset;
5237 }
5238 /*
5239 Note that at this point, log_state != LOG_CLOSED
5240 (important for is_open()).
5241 */
5242
5243 /*
5244 new_file() is only used for rotation (in FLUSH LOGS or because size >
5245 max_binlog_size or max_relay_log_size).
5246 If this is a binary log, the Format_description_log_event at the
5247 beginning of the new file should have created=0 (to distinguish with the
5248 Format_description_log_event written at server startup, which should
5249 trigger temp tables deletion on slaves.
5250 */
5251
5252 /* reopen index binlog file, BUG#34582 */
5253 file_to_open= index_file_name;
5254 error= open_index_file(index_file_name, 0, FALSE);
5255 if (likely(!error))
5256 {
5257 /* reopen the binary log file. */
5258 file_to_open= new_name_ptr;
5259 error= open(old_name, log_type, new_name_ptr, 0, io_cache_type,
5260 max_size, 1, FALSE);
5261 }
5262
5263 /* handle reopening errors */
5264 if (unlikely(error))
5265 {
5266 my_error(ER_CANT_OPEN_FILE, MYF(ME_FATALERROR), file_to_open, error);
5267 close_on_error= TRUE;
5268 }
5269
5270 my_free(old_name);
5271
5272end:
5273
5274 if (delay_close)
5275 {
5276 clear_inuse_flag_when_closing(old_file);
5277 mysql_file_close(old_file, MYF(MY_WME));
5278 }
5279
5280 if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5281 {
5282 /*
5283 Close whatever was left opened.
5284
5285 We are keeping the behavior as it exists today, ie,
5286 we disable logging and move on (see: BUG#51014).
5287
5288 TODO: as part of WL#1790 consider other approaches:
5289 - kill mysql (safety);
5290 - try multiple locations for opening a log file;
5291 - switch server to protected/readonly mode
5292 - ...
5293 */
5294 close(LOG_CLOSE_INDEX);
5295 sql_print_error(fatal_log_error, new_name_ptr, errno);
5296 }
5297
5298 mysql_mutex_unlock(&LOCK_index);
5299 if (need_lock)
5300 mysql_mutex_unlock(&LOCK_log);
5301
5302 DBUG_RETURN(error);
5303}
5304
5305bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5306 IO_CACHE *file)
5307{
5308 Log_event_writer writer(file, 0, &crypto);
5309 if (crypto.scheme && file == &log_file)
5310 writer.ctx= alloca(crypto.ctx_size);
5311 if (cache_data)
5312 cache_data->add_status(ev->logged_status());
5313 return writer.write(ev);
5314}
5315
5316bool MYSQL_BIN_LOG::append(Log_event *ev)
5317{
5318 bool res;
5319 mysql_mutex_lock(&LOCK_log);
5320 res= append_no_lock(ev);
5321 mysql_mutex_unlock(&LOCK_log);
5322 return res;
5323}
5324
5325
5326bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5327{
5328 bool error = 0;
5329 DBUG_ENTER("MYSQL_BIN_LOG::append");
5330
5331 mysql_mutex_assert_owner(&LOCK_log);
5332 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5333
5334 if (write_event(ev))
5335 {
5336 error=1;
5337 goto err;
5338 }
5339 bytes_written+= ev->data_written;
5340 DBUG_PRINT("info",("max_size: %lu",max_size));
5341 if (flush_and_sync(0))
5342 goto err;
5343 if (my_b_append_tell(&log_file) > max_size)
5344 error= new_file_without_locking();
5345err:
5346 update_binlog_end_pos();
5347 DBUG_RETURN(error);
5348}
5349
5350bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5351{
5352 bool error= 1;
5353 uchar *ebuf= 0;
5354 DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5355
5356 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5357
5358 mysql_mutex_assert_owner(&LOCK_log);
5359
5360 if (crypto.scheme != 0)
5361 {
5362 DBUG_ASSERT(crypto.scheme == 1);
5363
5364 uint elen;
5365 uchar iv[BINLOG_IV_LENGTH];
5366
5367 ebuf= (uchar*)my_safe_alloca(len);
5368 if (!ebuf)
5369 goto err;
5370
5371 crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5372
5373 /*
5374 we want to encrypt everything, excluding the event length:
5375 massage the data before the encryption
5376 */
5377 memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5378
5379 if (encryption_crypt(buf + 4, len - 4,
5380 ebuf + 4, &elen,
5381 crypto.key, crypto.key_length, iv, sizeof(iv),
5382 ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5383 ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5384 goto err;
5385
5386 DBUG_ASSERT(elen == len - 4);
5387
5388 /* massage the data after the encryption */
5389 memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5390 int4store(ebuf + EVENT_LEN_OFFSET, len);
5391
5392 buf= ebuf;
5393 }
5394 if (my_b_append(&log_file, buf, len))
5395 goto err;
5396 bytes_written+= len;
5397
5398 error= 0;
5399 DBUG_PRINT("info",("max_size: %lu",max_size));
5400 if (flush_and_sync(0))
5401 goto err;
5402 if (my_b_append_tell(&log_file) > max_size)
5403 error= new_file_without_locking();
5404err:
5405 my_safe_afree(ebuf, len);
5406 if (likely(!error))
5407 update_binlog_end_pos();
5408 DBUG_RETURN(error);
5409}
5410
5411bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5412{
5413 int err=0, fd=log_file.file;
5414 if (synced)
5415 *synced= 0;
5416 mysql_mutex_assert_owner(&LOCK_log);
5417 if (flush_io_cache(&log_file))
5418 return 1;
5419 uint sync_period= get_sync_period();
5420 if (sync_period && ++sync_counter >= sync_period)
5421 {
5422 sync_counter= 0;
5423 err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5424 if (synced)
5425 *synced= 1;
5426#ifndef DBUG_OFF
5427 if (opt_binlog_dbug_fsync_sleep > 0)
5428 my_sleep(opt_binlog_dbug_fsync_sleep);
5429#endif
5430 }
5431 return err;
5432}
5433
5434void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5435{
5436 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5437 thd->binlog_evt_union.do_union= TRUE;
5438 thd->binlog_evt_union.unioned_events= FALSE;
5439 thd->binlog_evt_union.unioned_events_trans= FALSE;
5440 thd->binlog_evt_union.first_query_id= query_id_param;
5441}
5442
5443void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5444{
5445 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5446 thd->binlog_evt_union.do_union= FALSE;
5447}
5448
5449bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5450{
5451 return (thd->binlog_evt_union.do_union &&
5452 query_id_param >= thd->binlog_evt_union.first_query_id);
5453}
5454
5455/**
5456 This function checks if a transactional table was updated by the
5457 current transaction.
5458
5459 @param thd The client thread that executed the current statement.
5460 @return
5461 @c true if a transactional table was updated, @c false otherwise.
5462*/
5463bool
5464trans_has_updated_trans_table(const THD* thd)
5465{
5466 binlog_cache_mngr *const cache_mngr=
5467 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5468
5469 return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5470}
5471
5472/**
5473 This function checks if a transactional table was updated by the
5474 current statement.
5475
5476 @param thd The client thread that executed the current statement.
5477 @return
5478 @c true if a transactional table was updated, @c false otherwise.
5479*/
5480bool
5481stmt_has_updated_trans_table(const THD *thd)
5482{
5483 Ha_trx_info *ha_info;
5484
5485 for (ha_info= thd->transaction.stmt.ha_list; ha_info;
5486 ha_info= ha_info->next())
5487 {
5488 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
5489 return (TRUE);
5490 }
5491 return (FALSE);
5492}
5493
5494/**
5495 This function checks if either a trx-cache or a non-trx-cache should
5496 be used. If @c bin_log_direct_non_trans_update is active or the format
5497 is either MIXED or ROW, the cache to be used depends on the flag @c
5498 is_transactional.
5499
5500 On the other hand, if binlog_format is STMT or direct option is
5501 OFF, the trx-cache should be used if and only if the statement is
5502 transactional or the trx-cache is not empty. Otherwise, the
5503 non-trx-cache should be used.
5504
5505 @param thd The client thread.
5506 @param is_transactional The changes are related to a trx-table.
5507 @return
5508 @c true if a trx-cache should be used, @c false otherwise.
5509*/
5510bool use_trans_cache(const THD* thd, bool is_transactional)
5511{
5512 if (is_transactional)
5513 return 1;
5514 binlog_cache_mngr *const cache_mngr=
5515 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5516
5517 return ((thd->is_current_stmt_binlog_format_row() ||
5518 thd->variables.binlog_direct_non_trans_update) ? 0 :
5519 !cache_mngr->trx_cache.empty());
5520}
5521
5522/**
5523 This function checks if a transaction, either a multi-statement
5524 or a single statement transaction is about to commit or not.
5525
5526 @param thd The client thread that executed the current statement.
5527 @param all Committing a transaction (i.e. TRUE) or a statement
5528 (i.e. FALSE).
5529 @return
5530 @c true if committing a transaction, otherwise @c false.
5531*/
5532bool ending_trans(THD* thd, const bool all)
5533{
5534 return (all || ending_single_stmt_trans(thd, all));
5535}
5536
5537/**
5538 This function checks if a single statement transaction is about
5539 to commit or not.
5540
5541 @param thd The client thread that executed the current statement.
5542 @param all Committing a transaction (i.e. TRUE) or a statement
5543 (i.e. FALSE).
5544 @return
5545 @c true if committing a single statement transaction, otherwise
5546 @c false.
5547*/
5548bool ending_single_stmt_trans(THD* thd, const bool all)
5549{
5550 return (!all && !thd->in_multi_stmt_transaction_mode());
5551}
5552
5553/**
5554 This function checks if a non-transactional table was updated by
5555 the current transaction.
5556
5557 @param thd The client thread that executed the current statement.
5558 @return
5559 @c true if a non-transactional table was updated, @c false
5560 otherwise.
5561*/
5562bool trans_has_updated_non_trans_table(const THD* thd)
5563{
5564 return (thd->transaction.all.modified_non_trans_table ||
5565 thd->transaction.stmt.modified_non_trans_table);
5566}
5567
5568/**
5569 This function checks if a non-transactional table was updated by the
5570 current statement.
5571
5572 @param thd The client thread that executed the current statement.
5573 @return
5574 @c true if a non-transactional table was updated, @c false otherwise.
5575*/
5576bool stmt_has_updated_non_trans_table(const THD* thd)
5577{
5578 return (thd->transaction.stmt.modified_non_trans_table);
5579}
5580
5581/*
5582 These functions are placed in this file since they need access to
5583 binlog_hton, which has internal linkage.
5584*/
5585
5586binlog_cache_mngr *THD::binlog_setup_trx_data()
5587{
5588 DBUG_ENTER("THD::binlog_setup_trx_data");
5589 binlog_cache_mngr *cache_mngr=
5590 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5591
5592 if (cache_mngr)
5593 DBUG_RETURN(cache_mngr); // Already set up
5594
5595 cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5596 if (!cache_mngr ||
5597 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5598 LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5599 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5600 LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5601 {
5602 my_free(cache_mngr);
5603 DBUG_RETURN(0); // Didn't manage to set it up
5604 }
5605 thd_set_ha_data(this, binlog_hton, cache_mngr);
5606
5607 cache_mngr= new (cache_mngr)
5608 binlog_cache_mngr(max_binlog_stmt_cache_size,
5609 max_binlog_cache_size,
5610 &binlog_stmt_cache_use,
5611 &binlog_stmt_cache_disk_use,
5612 &binlog_cache_use,
5613 &binlog_cache_disk_use);
5614 DBUG_RETURN(cache_mngr);
5615}
5616
5617/*
5618 Function to start a statement and optionally a transaction for the
5619 binary log.
5620
5621 SYNOPSIS
5622 binlog_start_trans_and_stmt()
5623
5624 DESCRIPTION
5625
5626 This function does three things:
5627 - Start a transaction if not in autocommit mode or if a BEGIN
5628 statement has been seen.
5629
5630 - Start a statement transaction to allow us to truncate the cache.
5631
5632 - Save the currrent binlog position so that we can roll back the
5633 statement by truncating the cache.
5634
5635 We only update the saved position if the old one was undefined,
5636 the reason is that there are some cases (e.g., for CREATE-SELECT)
5637 where the position is saved twice (e.g., both in
5638 select_create::prepare() and THD::binlog_write_table_map()) , but
5639 we should use the first. This means that calls to this function
5640 can be used to start the statement before the first table map
5641 event, to include some extra events.
5642 */
5643
5644void
5645THD::binlog_start_trans_and_stmt()
5646{
5647 binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5648 DBUG_ENTER("binlog_start_trans_and_stmt");
5649 DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu",
5650 cache_mngr,
5651 (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5652 (ulong) 0)));
5653
5654 if (cache_mngr == NULL ||
5655 cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5656 {
5657 this->binlog_set_stmt_begin();
5658 bool mstmt_mode= in_multi_stmt_transaction_mode();
5659#ifdef WITH_WSREP
5660 /* Write Gtid
5661 Get domain id only when gtid mode is set
5662 If this event is replicate through a master then ,
5663 we will forward the same gtid another nodes
5664 We have to do this only one time in mysql transaction.
5665 Since this function is called multiple times , We will check for
5666 ha_info->is_started()
5667 */
5668 Ha_trx_info *ha_info;
5669 ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5670
5671 if (!ha_info->is_started() && wsrep_gtid_mode
5672 && this->variables.gtid_seq_no)
5673 {
5674 binlog_cache_mngr *const cache_mngr=
5675 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5676 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(1);
5677 IO_CACHE *file= &cache_data->cache_log;
5678 Log_event_writer writer(file, cache_data);
5679 Gtid_log_event gtid_event(this, this->variables.gtid_seq_no,
5680 this->variables.gtid_domain_id,
5681 true, LOG_EVENT_SUPPRESS_USE_F,
5682 true, 0);
5683 gtid_event.server_id= this->variables.server_id;
5684 writer.write(&gtid_event);
5685 }
5686#endif
5687 if (mstmt_mode)
5688 trans_register_ha(this, TRUE, binlog_hton);
5689 trans_register_ha(this, FALSE, binlog_hton);
5690 /*
5691 Mark statement transaction as read/write. We never start
5692 a binary log transaction and keep it read-only,
5693 therefore it's best to mark the transaction read/write just
5694 at the same time we start it.
5695 Not necessary to mark the normal transaction read/write
5696 since the statement-level flag will be propagated automatically
5697 inside ha_commit_trans.
5698 */
5699 ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5700 }
5701 DBUG_VOID_RETURN;
5702}
5703
5704void THD::binlog_set_stmt_begin() {
5705 binlog_cache_mngr *cache_mngr=
5706 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5707
5708 /*
5709 The call to binlog_trans_log_savepos() might create the cache_mngr
5710 structure, if it didn't exist before, so we save the position
5711 into an auto variable and then write it into the transaction
5712 data for the binary log (i.e., cache_mngr).
5713 */
5714 my_off_t pos= 0;
5715 binlog_trans_log_savepos(this, &pos);
5716 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5717 cache_mngr->trx_cache.set_prev_position(pos);
5718}
5719
5720static int
5721binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5722{
5723 int err= 0;
5724 DBUG_ENTER("binlog_start_consistent_snapshot");
5725
5726 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5727
5728 /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5729 mysql_mutex_assert_owner(&LOCK_commit_ordered);
5730 strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5731 cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5732
5733 trans_register_ha(thd, TRUE, hton);
5734
5735 DBUG_RETURN(err);
5736}
5737
5738/**
5739 This function writes a table map to the binary log.
5740 Note that in order to keep the signature uniform with related methods,
5741 we use a redundant parameter to indicate whether a transactional table
5742 was changed or not.
5743
5744 If with_annotate != NULL and
5745 *with_annotate = TRUE write also Annotate_rows before the table map.
5746
5747 @param table a pointer to the table.
5748 @param is_transactional @c true indicates a transactional table,
5749 otherwise @c false a non-transactional.
5750 @return
5751 nonzero if an error pops up when writing the table map event.
5752*/
5753int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
5754 my_bool *with_annotate)
5755{
5756 int error;
5757 DBUG_ENTER("THD::binlog_write_table_map");
5758 DBUG_PRINT("enter", ("table: %p (%s: #%lu)",
5759 table, table->s->table_name.str,
5760 table->s->table_map_id));
5761
5762 /* Ensure that all events in a GTID group are in the same cache */
5763 if (variables.option_bits & OPTION_GTID_BEGIN)
5764 is_transactional= 1;
5765
5766 /* Pre-conditions */
5767 DBUG_ASSERT(is_current_stmt_binlog_format_row());
5768 DBUG_ASSERT(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open());
5769 DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
5770
5771 Table_map_log_event
5772 the_event(this, table, table->s->table_map_id, is_transactional);
5773
5774 if (binlog_table_maps == 0)
5775 binlog_start_trans_and_stmt();
5776
5777 binlog_cache_mngr *const cache_mngr=
5778 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5779 binlog_cache_data *cache_data= (cache_mngr->
5780 get_binlog_cache_data(is_transactional));
5781 IO_CACHE *file= &cache_data->cache_log;
5782 Log_event_writer writer(file, cache_data);
5783
5784 if (with_annotate && *with_annotate)
5785 {
5786 Annotate_rows_log_event anno(table->in_use, is_transactional, false);
5787 /* Annotate event should be written not more than once */
5788 *with_annotate= 0;
5789 if (unlikely((error= writer.write(&anno))))
5790 {
5791 if (my_errno == EFBIG)
5792 cache_data->set_incident();
5793 DBUG_RETURN(error);
5794 }
5795 }
5796 if (unlikely((error= writer.write(&the_event))))
5797 DBUG_RETURN(error);
5798
5799 binlog_table_maps++;
5800 DBUG_RETURN(0);
5801}
5802
5803/**
5804 This function retrieves a pending row event from a cache which is
5805 specified through the parameter @c is_transactional. Respectively, when it
5806 is @c true, the pending event is returned from the transactional cache.
5807 Otherwise from the non-transactional cache.
5808
5809 @param is_transactional @c true indicates a transactional cache,
5810 otherwise @c false a non-transactional.
5811 @return
5812 The row event if any.
5813*/
5814Rows_log_event*
5815THD::binlog_get_pending_rows_event(bool is_transactional) const
5816{
5817 Rows_log_event* rows= NULL;
5818 binlog_cache_mngr *const cache_mngr=
5819 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5820
5821 /*
5822 This is less than ideal, but here's the story: If there is no cache_mngr,
5823 prepare_pending_rows_event() has never been called (since the cache_mngr
5824 is set up there). In that case, we just return NULL.
5825 */
5826 if (cache_mngr)
5827 {
5828 binlog_cache_data *cache_data=
5829 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5830
5831 rows= cache_data->pending();
5832 }
5833 return (rows);
5834}
5835
5836/**
5837 This function stores a pending row event into a cache which is specified
5838 through the parameter @c is_transactional. Respectively, when it is @c
5839 true, the pending event is stored into the transactional cache. Otherwise
5840 into the non-transactional cache.
5841
5842 @param evt a pointer to the row event.
5843 @param is_transactional @c true indicates a transactional cache,
5844 otherwise @c false a non-transactional.
5845*/
5846void
5847THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
5848{
5849 binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
5850
5851 DBUG_ASSERT(cache_mngr);
5852
5853 binlog_cache_data *cache_data=
5854 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5855
5856 cache_data->set_pending(ev);
5857}
5858
5859
5860/**
5861 This function removes the pending rows event, discarding any outstanding
5862 rows. If there is no pending rows event available, this is effectively a
5863 no-op.
5864
5865 @param thd a pointer to the user thread.
5866 @param is_transactional @c true indicates a transactional cache,
5867 otherwise @c false a non-transactional.
5868*/
5869int
5870MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
5871{
5872 DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
5873
5874 binlog_cache_mngr *const cache_mngr=
5875 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5876
5877 DBUG_ASSERT(cache_mngr);
5878
5879 binlog_cache_data *cache_data=
5880 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5881
5882 if (Rows_log_event* pending= cache_data->pending())
5883 {
5884 delete pending;
5885 cache_data->set_pending(NULL);
5886 }
5887
5888 DBUG_RETURN(0);
5889}
5890
5891/*
5892 Moves the last bunch of rows from the pending Rows event to a cache (either
5893 transactional cache if is_transaction is @c true, or the non-transactional
5894 cache otherwise. Sets a new pending event.
5895
5896 @param thd a pointer to the user thread.
5897 @param evt a pointer to the row event.
5898 @param is_transactional @c true indicates a transactional cache,
5899 otherwise @c false a non-transactional.
5900*/
5901int
5902MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5903 Rows_log_event* event,
5904 bool is_transactional)
5905{
5906 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5907 DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
5908 DBUG_PRINT("enter", ("event: %p", event));
5909
5910 int error= 0;
5911 binlog_cache_mngr *const cache_mngr=
5912 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5913
5914 DBUG_ASSERT(cache_mngr);
5915
5916 binlog_cache_data *cache_data=
5917 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5918
5919 DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
5920
5921 if (Rows_log_event* pending= cache_data->pending())
5922 {
5923 Log_event_writer writer(&cache_data->cache_log, cache_data);
5924
5925 /*
5926 Write pending event to the cache.
5927 */
5928 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5929 {DBUG_SET("+d,simulate_file_write_error");});
5930 if (writer.write(pending))
5931 {
5932 set_write_error(thd, is_transactional);
5933 if (check_write_error(thd) && cache_data &&
5934 stmt_has_updated_non_trans_table(thd))
5935 cache_data->set_incident();
5936 delete pending;
5937 cache_data->set_pending(NULL);
5938 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5939 {DBUG_SET("-d,simulate_file_write_error");});
5940 DBUG_RETURN(1);
5941 }
5942
5943 delete pending;
5944 }
5945
5946 thd->binlog_set_pending_rows_event(event, is_transactional);
5947
5948 DBUG_RETURN(error);
5949}
5950
5951
5952/* Generate a new global transaction ID, and write it to the binlog */
5953
5954bool
5955MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
5956 bool is_transactional, uint64 commit_id)
5957{
5958 rpl_gtid gtid;
5959 uint32 domain_id;
5960 uint32 local_server_id;
5961 uint64 seq_no;
5962 int err;
5963 DBUG_ENTER("write_gtid_event");
5964 DBUG_PRINT("enter", ("standalone: %d", standalone));
5965
5966#ifdef WITH_WSREP
5967 if (WSREP(thd) && thd->wsrep_trx_meta.gtid.seqno != -1 && wsrep_gtid_mode && !thd->variables.gtid_seq_no)
5968 {
5969 domain_id= wsrep_gtid_domain_id;
5970 } else {
5971#endif /* WITH_WSREP */
5972 domain_id= thd->variables.gtid_domain_id;
5973#ifdef WITH_WSREP
5974 }
5975#endif /* WITH_WSREP */
5976 local_server_id= thd->variables.server_id;
5977 seq_no= thd->variables.gtid_seq_no;
5978
5979 DBUG_ASSERT(local_server_id != 0);
5980
5981 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
5982 {
5983 DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
5984 "Master and slave will have different GTID values"));
5985 /* Reset the flag, as we will write out a GTID anyway */
5986 thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
5987 }
5988
5989 /*
5990 Reset the session variable gtid_seq_no, to reduce the risk of accidentally
5991 producing a duplicate GTID.
5992 */
5993 thd->variables.gtid_seq_no= 0;
5994 if (seq_no != 0)
5995 {
5996 /* Use the specified sequence number. */
5997 gtid.domain_id= domain_id;
5998 gtid.server_id= local_server_id;
5999 gtid.seq_no= seq_no;
6000 err= rpl_global_gtid_binlog_state.update(&gtid, opt_gtid_strict_mode);
6001 if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
6002 errno= ER_GTID_STRICT_OUT_OF_ORDER;
6003 }
6004 else
6005 {
6006 /* Allocate the next sequence number for the GTID. */
6007 err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
6008 local_server_id, &gtid);
6009 seq_no= gtid.seq_no;
6010 }
6011 if (err)
6012 DBUG_RETURN(true);
6013 thd->last_commit_gtid= gtid;
6014
6015 Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
6016 LOG_EVENT_SUPPRESS_USE_F, is_transactional,
6017 commit_id);
6018
6019 /* Write the event to the binary log. */
6020 DBUG_ASSERT(this == &mysql_bin_log);
6021
6022#ifdef WITH_WSREP
6023 if (wsrep_gtid_mode && is_gtid_cached(thd))
6024 DBUG_RETURN(false);
6025#endif
6026
6027 if (write_event(&gtid_event))
6028 DBUG_RETURN(true);
6029 status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6030
6031 DBUG_RETURN(false);
6032}
6033
6034
6035int
6036MYSQL_BIN_LOG::write_state_to_file()
6037{
6038 File file_no;
6039 IO_CACHE cache;
6040 char buf[FN_REFLEN];
6041 int err;
6042 bool opened= false;
6043 bool log_inited= false;
6044
6045 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6046 MY_UNPACK_FILENAME);
6047 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6048 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6049 MYF(MY_WME))) < 0)
6050 {
6051 err= 1;
6052 goto err;
6053 }
6054 opened= true;
6055 if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6056 MYF(MY_WME|MY_WAIT_IF_FULL))))
6057 goto err;
6058 log_inited= true;
6059 if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6060 goto err;
6061 log_inited= false;
6062 if ((err= end_io_cache(&cache)))
6063 goto err;
6064 if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6065 goto err;
6066 goto end;
6067
6068err:
6069 sql_print_error("Error writing binlog state to file '%s'.\n", buf);
6070 if (log_inited)
6071 end_io_cache(&cache);
6072end:
6073 if (opened)
6074 mysql_file_close(file_no, MYF(0));
6075
6076 return err;
6077}
6078
6079
6080/*
6081 Initialize the binlog state from the master-bin.state file, at server startup.
6082
6083 Returns:
6084 0 for success.
6085 2 for when .state file did not exist.
6086 1 for other error.
6087*/
6088int
6089MYSQL_BIN_LOG::read_state_from_file()
6090{
6091 File file_no;
6092 IO_CACHE cache;
6093 char buf[FN_REFLEN];
6094 int err;
6095 bool opened= false;
6096 bool log_inited= false;
6097
6098 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6099 MY_UNPACK_FILENAME);
6100 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6101 O_RDONLY|O_BINARY, MYF(0))) < 0)
6102 {
6103 if (my_errno != ENOENT)
6104 {
6105 err= 1;
6106 goto err;
6107 }
6108 else
6109 {
6110 /*
6111 If the state file does not exist, this is the first server startup
6112 with GTID enabled. So initialize to empty state.
6113 */
6114 rpl_global_gtid_binlog_state.reset();
6115 err= 2;
6116 goto end;
6117 }
6118 }
6119 opened= true;
6120 if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6121 MYF(MY_WME|MY_WAIT_IF_FULL))))
6122 goto err;
6123 log_inited= true;
6124 if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6125 goto err;
6126 goto end;
6127
6128err:
6129 sql_print_error("Error reading binlog GTID state from file '%s'.\n", buf);
6130end:
6131 if (log_inited)
6132 end_io_cache(&cache);
6133 if (opened)
6134 mysql_file_close(file_no, MYF(0));
6135
6136 return err;
6137}
6138
6139
6140int
6141MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6142{
6143 return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6144}
6145
6146
6147bool
6148MYSQL_BIN_LOG::append_state_pos(String *str)
6149{
6150 return rpl_global_gtid_binlog_state.append_pos(str);
6151}
6152
6153
6154bool
6155MYSQL_BIN_LOG::append_state(String *str)
6156{
6157 return rpl_global_gtid_binlog_state.append_state(str);
6158}
6159
6160
6161bool
6162MYSQL_BIN_LOG::is_empty_state()
6163{
6164 return (rpl_global_gtid_binlog_state.count() == 0);
6165}
6166
6167
6168bool
6169MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6170 rpl_gtid *out_gtid)
6171{
6172 rpl_gtid *gtid;
6173 if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6174 *out_gtid= *gtid;
6175 return gtid != NULL;
6176}
6177
6178
6179bool
6180MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6181 rpl_gtid *out_gtid)
6182{
6183 rpl_gtid *found_gtid;
6184
6185 if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6186 {
6187 *out_gtid= *found_gtid;
6188 return true;
6189 }
6190
6191 return false;
6192}
6193
6194
6195int
6196MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6197{
6198 return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6199}
6200
6201
6202bool
6203MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6204 uint32 server_id_arg,
6205 uint64 seq_no)
6206{
6207 return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6208 server_id_arg,
6209 seq_no);
6210}
6211
6212
6213/**
6214 Write an event to the binary log. If with_annotate != NULL and
6215 *with_annotate = TRUE write also Annotate_rows before the event
6216 (this should happen only if the event is a Table_map).
6217*/
6218
6219bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6220{
6221 THD *thd= event_info->thd;
6222 bool error= 1;
6223 binlog_cache_data *cache_data= 0;
6224 bool is_trans_cache= FALSE;
6225 bool using_trans= event_info->use_trans_cache();
6226 bool direct= event_info->use_direct_logging();
6227 ulong UNINIT_VAR(prev_binlog_id);
6228 DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6229
6230 /*
6231 When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6232 enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6233 So, avoid writing to binlog file.
6234 */
6235 if (direct &&
6236 (wsrep_emulate_bin_log ||
6237 (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6238 DBUG_RETURN(0);
6239
6240 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6241 {
6242 DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6243 /* Wait for commit from binary log before we commit */
6244 direct= 0;
6245 using_trans= 1;
6246 }
6247
6248 if (thd->binlog_evt_union.do_union)
6249 {
6250 /*
6251 In Stored function; Remember that function call caused an update.
6252 We will log the function call to the binary log on function exit
6253 */
6254 thd->binlog_evt_union.unioned_events= TRUE;
6255 thd->binlog_evt_union.unioned_events_trans |= using_trans;
6256 DBUG_RETURN(0);
6257 }
6258
6259 /*
6260 We only end the statement if we are in a top-level statement. If
6261 we are inside a stored function, we do not end the statement since
6262 this will close all tables on the slave. But there can be a special case
6263 where we are inside a stored function/trigger and a SAVEPOINT is being
6264 set in side the stored function/trigger. This SAVEPOINT execution will
6265 force the pending event to be flushed without an STMT_END_F flag. This
6266 will result in a case where following DMLs will be considered as part of
6267 same statement and result in data loss on slave. Hence in this case we
6268 force the end_stmt to be true.
6269 */
6270 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6271 SQLCOM_SAVEPOINT) ? true :
6272 (thd->locked_tables_mode && thd->lex->requires_prelocking());
6273 if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6274 DBUG_RETURN(error);
6275
6276 /*
6277 In most cases this is only called if 'is_open()' is true; in fact this is
6278 mostly called if is_open() *was* true a few instructions before, but it
6279 could have changed since.
6280 */
6281 /* applier and replayer can skip writing binlog events */
6282 if ((WSREP_EMULATE_BINLOG(thd) &&
6283 IF_WSREP(thd->wsrep_exec_mode != REPL_RECV, 0)) || is_open())
6284 {
6285 my_off_t UNINIT_VAR(my_org_b_tell);
6286#ifdef HAVE_REPLICATION
6287 /*
6288 In the future we need to add to the following if tests like
6289 "do the involved tables match (to be implemented)
6290 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6291 */
6292 const char *local_db= event_info->get_db();
6293
6294 bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6295
6296 /*
6297 Log all updates to binlog cache so that they can get replicated to other
6298 nodes. A check has been added to stop them from getting logged into
6299 binary log files.
6300 */
6301 if (WSREP(thd)) option_bin_log_flag= true;
6302
6303 if ((!(option_bin_log_flag)) ||
6304 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6305 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6306 !binlog_filter->db_ok(local_db)))
6307 DBUG_RETURN(0);
6308#endif /* HAVE_REPLICATION */
6309
6310 IO_CACHE *file= NULL;
6311
6312 if (direct)
6313 {
6314 int res;
6315 uint64 commit_id= 0;
6316 DBUG_PRINT("info", ("direct is set"));
6317 if ((res= thd->wait_for_prior_commit()))
6318 DBUG_RETURN(res);
6319 file= &log_file;
6320 my_org_b_tell= my_b_tell(file);
6321 mysql_mutex_lock(&LOCK_log);
6322 prev_binlog_id= current_binlog_id;
6323 DBUG_EXECUTE_IF("binlog_force_commit_id",
6324 {
6325 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6326 bool null_value;
6327 user_var_entry *entry=
6328 (user_var_entry*) my_hash_search(&thd->user_vars,
6329 (uchar*) commit_name.str,
6330 commit_name.length);
6331 commit_id= entry->val_int(&null_value);
6332 });
6333 if (write_gtid_event(thd, true, using_trans, commit_id))
6334 goto err;
6335 }
6336 else
6337 {
6338 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6339 if (!cache_mngr)
6340 goto err;
6341
6342 is_trans_cache= use_trans_cache(thd, using_trans);
6343 cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6344 file= &cache_data->cache_log;
6345
6346 if (thd->lex->stmt_accessed_non_trans_temp_table())
6347 cache_data->set_changes_to_non_trans_temp_table();
6348
6349 thd->binlog_start_trans_and_stmt();
6350 }
6351 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6352
6353 /*
6354 No check for auto events flag here - this write method should
6355 never be called if auto-events are enabled.
6356
6357 Write first log events which describe the 'run environment'
6358 of the SQL command. If row-based binlogging, Insert_id, Rand
6359 and other kind of "setting context" events are not needed.
6360 */
6361
6362 if (with_annotate && *with_annotate)
6363 {
6364 DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6365 Annotate_rows_log_event anno(thd, using_trans, direct);
6366 /* Annotate event should be written not more than once */
6367 *with_annotate= 0;
6368 if (write_event(&anno, cache_data, file))
6369 goto err;
6370 }
6371
6372 {
6373 if (!thd->is_current_stmt_binlog_format_row())
6374 {
6375 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6376 {
6377 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6378 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6379 using_trans, direct);
6380 if (write_event(&e, cache_data, file))
6381 goto err;
6382 }
6383 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6384 {
6385 DBUG_PRINT("info",("number of auto_inc intervals: %u",
6386 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6387 nb_elements()));
6388 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6389 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6390 minimum(), using_trans, direct);
6391 if (write_event(&e, cache_data, file))
6392 goto err;
6393 }
6394 if (thd->rand_used)
6395 {
6396 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6397 using_trans, direct);
6398 if (write_event(&e, cache_data, file))
6399 goto err;
6400 }
6401 if (thd->user_var_events.elements)
6402 {
6403 for (uint i= 0; i < thd->user_var_events.elements; i++)
6404 {
6405 BINLOG_USER_VAR_EVENT *user_var_event;
6406 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6407
6408 /* setting flags for user var log event */
6409 uchar flags= User_var_log_event::UNDEF_F;
6410 if (user_var_event->unsigned_flag)
6411 flags|= User_var_log_event::UNSIGNED_F;
6412
6413 User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6414 user_var_event->user_var_event->name.length,
6415 user_var_event->value,
6416 user_var_event->length,
6417 user_var_event->type,
6418 user_var_event->charset_number,
6419 flags,
6420 using_trans,
6421 direct);
6422 if (write_event(&e, cache_data, file))
6423 goto err;
6424 }
6425 }
6426 }
6427 }
6428
6429 /*
6430 Write the event.
6431 */
6432 if (write_event(event_info, cache_data, file) ||
6433 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6434 goto err;
6435
6436 error= 0;
6437err:
6438 if (direct)
6439 {
6440 my_off_t offset= my_b_tell(file);
6441 bool check_purge= false;
6442 DBUG_ASSERT(!is_relay_log);
6443
6444 if (likely(!error))
6445 {
6446 bool synced;
6447
6448 if ((error= flush_and_sync(&synced)))
6449 {
6450 }
6451 else
6452 {
6453 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6454 mysql_mutex_assert_owner(&LOCK_log);
6455 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6456 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6457#ifdef HAVE_REPLICATION
6458 if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6459 file->pos_in_file))
6460 {
6461 sql_print_error("Failed to run 'after_flush' hooks");
6462 error= 1;
6463 }
6464 else
6465#endif
6466 {
6467 /*
6468 update binlog_end_pos so it can be read by dump thread
6469 note: must be _after_ the RUN_HOOK(after_flush) or else
6470 semi-sync might not have put the transaction into
6471 it's list before dump-thread tries to send it
6472 */
6473 update_binlog_end_pos(offset);
6474 if (unlikely((error= rotate(false, &check_purge))))
6475 check_purge= false;
6476 }
6477 }
6478 }
6479
6480 status_var_add(thd->status_var.binlog_bytes_written,
6481 offset - my_org_b_tell);
6482
6483 mysql_mutex_lock(&LOCK_after_binlog_sync);
6484 mysql_mutex_unlock(&LOCK_log);
6485
6486 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6487 mysql_mutex_assert_not_owner(&LOCK_log);
6488 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6489 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6490#ifdef HAVE_REPLICATION
6491 if (repl_semisync_master.wait_after_sync(log_file_name,
6492 file->pos_in_file))
6493 {
6494 error=1;
6495 /* error is already printed inside hook */
6496 }
6497#endif
6498
6499 /*
6500 Take mutex to protect against a reader seeing partial writes of 64-bit
6501 offset on 32-bit CPUs.
6502 */
6503 mysql_mutex_lock(&LOCK_commit_ordered);
6504 mysql_mutex_unlock(&LOCK_after_binlog_sync);
6505 last_commit_pos_offset= offset;
6506 mysql_mutex_unlock(&LOCK_commit_ordered);
6507
6508 if (check_purge)
6509 checkpoint_and_purge(prev_binlog_id);
6510 }
6511
6512 if (unlikely(error))
6513 {
6514 set_write_error(thd, is_trans_cache);
6515 if (check_write_error(thd) && cache_data &&
6516 stmt_has_updated_non_trans_table(thd))
6517 cache_data->set_incident();
6518 }
6519 }
6520
6521 DBUG_RETURN(error);
6522}
6523
6524
6525int error_log_print(enum loglevel level, const char *format,
6526 va_list args)
6527{
6528 return logger.error_log_print(level, format, args);
6529}
6530
6531
6532bool slow_log_print(THD *thd, const char *query, uint query_length,
6533 ulonglong current_utime)
6534{
6535 return logger.slow_log_print(thd, query, query_length, current_utime);
6536}
6537
6538
6539/**
6540 Decide if we should log the command to general log
6541
6542 @retval
6543 FALSE No logging
6544 TRUE Ok to log
6545*/
6546
6547bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6548{
6549 /*
6550 Log command if we have at least one log event handler enabled and want
6551 to log this king of commands
6552 */
6553 if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6554 return FALSE;
6555
6556 /*
6557 If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6558 OPTION_LOG_OFF is set.
6559 Only the super user can set this bit.
6560 */
6561 return !(thd->variables.option_bits & OPTION_LOG_OFF);
6562}
6563
6564
6565bool general_log_print(THD *thd, enum enum_server_command command,
6566 const char *format, ...)
6567{
6568 va_list args;
6569 uint error= 0;
6570
6571 /* Print the message to the buffer if we want to log this kind of commands */
6572 if (! logger.log_command(thd, command))
6573 return FALSE;
6574
6575 va_start(args, format);
6576 error= logger.general_log_print(thd, command, format, args);
6577 va_end(args);
6578
6579 return error;
6580}
6581
6582bool general_log_write(THD *thd, enum enum_server_command command,
6583 const char *query, size_t query_length)
6584{
6585 /* Write the message to the log if we want to log this king of commands */
6586 if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6587 return logger.general_log_write(thd, command, query, query_length);
6588
6589 return FALSE;
6590}
6591
6592
6593static void
6594binlog_checkpoint_callback(void *cookie)
6595{
6596 MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6597 (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6598 /*
6599 For every supporting engine, we increment the xid_count and issue a
6600 commit_checkpoint_request(). Then we can count when all
6601 commit_checkpoint_notify() callbacks have occurred, and then log a new
6602 binlog checkpoint event.
6603 */
6604 mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6605}
6606
6607
6608/*
6609 Request a commit checkpoint from each supporting engine.
6610 This must be called after each binlog rotate, and after LOCK_log has been
6611 released. The xid_count value in the xid_count_per_binlog entry was
6612 incremented by 1 and will be decremented in this function; this ensures
6613 that the entry will not go away early despite LOCK_log not being held.
6614*/
6615void
6616MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6617{
6618 xid_count_per_binlog *entry;
6619
6620 /*
6621 Find the binlog entry, and invoke commit_checkpoint_request() on it in
6622 each supporting storage engine.
6623 */
6624 mysql_mutex_lock(&LOCK_xid_list);
6625 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6626 do {
6627 entry= it++;
6628 DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6629 } while (entry->binlog_id != binlog_id);
6630 mysql_mutex_unlock(&LOCK_xid_list);
6631
6632 ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6633 /*
6634 When we rotated the binlog, we incremented xid_count to make sure the
6635 entry would not go away until this point, where we have done all necessary
6636 commit_checkpoint_request() calls.
6637 So now we can (and must) decrease the count - when it reaches zero, we
6638 will know that both all pending unlog() and all pending
6639 commit_checkpoint_notify() calls are done, and we can log a new binlog
6640 checkpoint.
6641 */
6642 mark_xid_done(binlog_id, true);
6643}
6644
6645
6646/**
6647 The method executes rotation when LOCK_log is already acquired
6648 by the caller.
6649
6650 @param force_rotate caller can request the log rotation
6651 @param check_purge is set to true if rotation took place
6652
6653 @note
6654 Caller _must_ check the check_purge variable. If this is set, it means
6655 that the binlog was rotated, and caller _must_ ensure that
6656 do_checkpoint_request() is called later with the binlog_id of the rotated
6657 binlog file. The call to do_checkpoint_request() must happen after
6658 LOCK_log is released (which is why we cannot simply do it here).
6659 Usually, checkpoint_and_purge() is appropriate, as it will both handle
6660 the checkpointing and any needed purging of old logs.
6661
6662 @note
6663 If rotation fails, for instance the server was unable
6664 to create a new log file, we still try to write an
6665 incident event to the current log.
6666
6667 @retval
6668 nonzero - error in rotating routine.
6669*/
6670int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6671{
6672 int error= 0;
6673 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6674
6675 if (wsrep_to_isolation)
6676 {
6677 DBUG_ASSERT(WSREP_ON);
6678 *check_purge= false;
6679 WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6680 wsrep_to_isolation);
6681 DBUG_RETURN(0);
6682 }
6683
6684 //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6685 *check_purge= false;
6686
6687 if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6688 {
6689 ulong binlog_id= current_binlog_id;
6690 /*
6691 We rotate the binlog, so we need to start a commit checkpoint in all
6692 supporting engines - when it finishes, we can log a new binlog checkpoint
6693 event.
6694
6695 But we cannot start the checkpoint here - there could be a group commit
6696 still in progress which needs to be included in the checkpoint, and
6697 besides we do not want to do the (possibly expensive) checkpoint while
6698 LOCK_log is held.
6699
6700 On the other hand, we must be sure that the xid_count entry for the
6701 previous log does not go away until we start the checkpoint - which it
6702 could do as it is no longer the most recent. So we increment xid_count
6703 (to count the pending checkpoint request) - this will fix the entry in
6704 place until we decrement again in do_checkpoint_request().
6705 */
6706 mark_xids_active(binlog_id, 1);
6707
6708 if (unlikely((error= new_file_without_locking())))
6709 {
6710 /**
6711 Be conservative... There are possible lost events (eg,
6712 failing to log the Execute_load_query_log_event
6713 on a LOAD DATA while using a non-transactional
6714 table)!
6715
6716 We give it a shot and try to write an incident event anyway
6717 to the current log.
6718 */
6719 if (!write_incident_already_locked(current_thd))
6720 flush_and_sync(0);
6721
6722 /*
6723 We failed to rotate - so we have to decrement the xid_count back that
6724 we incremented before attempting the rotate.
6725 */
6726 mark_xid_done(binlog_id, false);
6727 }
6728 else
6729 *check_purge= true;
6730 }
6731 DBUG_RETURN(error);
6732}
6733
6734/**
6735 The method executes logs purging routine.
6736
6737 @retval
6738 nonzero - error in rotating routine.
6739*/
6740void MYSQL_BIN_LOG::purge()
6741{
6742 mysql_mutex_assert_not_owner(&LOCK_log);
6743#ifdef HAVE_REPLICATION
6744 if (expire_logs_days)
6745 {
6746 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
6747 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
6748 if (purge_time >= 0)
6749 {
6750 purge_logs_before_date(purge_time);
6751 }
6752 DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
6753 }
6754#endif
6755}
6756
6757
6758void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
6759{
6760 do_checkpoint_request(binlog_id);
6761 purge();
6762}
6763
6764
6765/**
6766 Searches for the first (oldest) binlog file name in in the binlog index.
6767
6768 @param[in,out] buf_arg pointer to a buffer to hold found
6769 the first binary log file name
6770 @return NULL on success, otherwise error message
6771*/
6772static const char* get_first_binlog(char* buf_arg)
6773{
6774 IO_CACHE *index_file;
6775 size_t length;
6776 char fname[FN_REFLEN];
6777 const char* errmsg= NULL;
6778
6779 DBUG_ENTER("get_first_binlog");
6780
6781 DBUG_ASSERT(mysql_bin_log.is_open());
6782
6783 mysql_bin_log.lock_index();
6784
6785 index_file=mysql_bin_log.get_index_file();
6786 if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
6787 {
6788 errmsg= "failed to create a cache on binlog index";
6789 goto end;
6790 }
6791 /* The file ends with EOF or empty line */
6792 if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
6793 {
6794 errmsg= "empty binlog index";
6795 goto end;
6796 }
6797 else
6798 {
6799 fname[length-1]= 0; // Remove end \n
6800 }
6801 if (normalize_binlog_name(buf_arg, fname, false))
6802 {
6803 errmsg= "cound not normalize the first file name in the binlog index";
6804 goto end;
6805 }
6806end:
6807 mysql_bin_log.unlock_index();
6808
6809 DBUG_RETURN(errmsg);
6810}
6811
6812/**
6813 Check weather the gtid binlog state can safely remove gtid
6814 domains passed as the argument. A safety condition is satisfied when
6815 there are no events from the being deleted domains in the currently existing
6816 binlog files. Upon successful check the supplied domains are removed
6817 from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
6818 the active latest file won't have the deleted domains in its Gtid_list header.
6819
6820 @param domain_drop_lex gtid domain id sequence from lex.
6821 Passed as a pointer to dynamic array must be not empty
6822 unless pointer value NULL.
6823 @retval zero on success
6824 @retval > 0 ineffective call none from the *non* empty
6825 gtid domain sequence is deleted
6826 @retval < 0 on error
6827*/
6828static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
6829{
6830 int rc= 0;
6831 Gtid_list_log_event *glev= NULL;
6832 char buf[FN_REFLEN];
6833 File file;
6834 IO_CACHE cache;
6835 const char* errmsg= NULL;
6836 char errbuf[MYSQL_ERRMSG_SIZE]= {0};
6837
6838 if (!domain_drop_lex)
6839 return 0; // still "effective" having empty domain sequence to delete
6840
6841 DBUG_ASSERT(domain_drop_lex->elements > 0);
6842 mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
6843
6844 if ((errmsg= get_first_binlog(buf)) != NULL)
6845 goto end;
6846 bzero((char*) &cache, sizeof(cache));
6847 if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
6848 goto end;
6849 errmsg= get_gtid_list_event(&cache, &glev);
6850 end_io_cache(&cache);
6851 mysql_file_close(file, MYF(MY_WME));
6852
6853 DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
6854 errmsg= "injected error";);
6855 if (errmsg)
6856 goto end;
6857 errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
6858 glev, errbuf);
6859
6860end:
6861 if (errmsg)
6862 {
6863 if (strlen(errmsg) > 0)
6864 {
6865 my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
6866 rc= -1;
6867 }
6868 else
6869 {
6870 rc= 1;
6871 }
6872 }
6873 delete glev;
6874
6875 return rc;
6876}
6877
6878/**
6879 The method is a shortcut of @c rotate() and @c purge().
6880 LOCK_log is acquired prior to rotate and is released after it.
6881
6882 @param force_rotate caller can request the log rotation
6883
6884 @retval
6885 nonzero - error in rotating routine.
6886*/
6887int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
6888 DYNAMIC_ARRAY *domain_drop_lex)
6889{
6890 int err_gtid=0, error= 0;
6891 ulong prev_binlog_id;
6892 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
6893 bool check_purge= false;
6894
6895 mysql_mutex_lock(&LOCK_log);
6896 prev_binlog_id= current_binlog_id;
6897
6898 if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
6899 {
6900 // inffective attempt to delete merely skips rotate and purge
6901 if (err_gtid < 0)
6902 error= 1; // otherwise error is propagated the user
6903 }
6904 else if (unlikely((error= rotate(force_rotate, &check_purge))))
6905 check_purge= false;
6906 /*
6907 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
6908 the mutex. Otherwise causes various deadlocks.
6909 */
6910 mysql_mutex_unlock(&LOCK_log);
6911
6912 if (check_purge)
6913 checkpoint_and_purge(prev_binlog_id);
6914
6915 DBUG_RETURN(error);
6916}
6917
6918uint MYSQL_BIN_LOG::next_file_id()
6919{
6920 uint res;
6921 mysql_mutex_lock(&LOCK_log);
6922 res = file_id++;
6923 mysql_mutex_unlock(&LOCK_log);
6924 return res;
6925}
6926
6927class CacheWriter: public Log_event_writer
6928{
6929public:
6930 size_t remains;
6931
6932 CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
6933 Binlog_crypt_data *cr)
6934 : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
6935 first(true)
6936 { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
6937
6938 ~CacheWriter()
6939 { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
6940
6941 int write(uchar* pos, size_t len)
6942 {
6943 DBUG_ENTER("CacheWriter::write");
6944 if (first)
6945 write_header(pos, len);
6946 else
6947 write_data(pos, len);
6948
6949 remains -= len;
6950 if ((first= !remains))
6951 write_footer();
6952 DBUG_RETURN(0);
6953 }
6954private:
6955 THD *thd;
6956 bool first;
6957};
6958
6959/*
6960 Write the contents of a cache to the binary log.
6961
6962 SYNOPSIS
6963 write_cache()
6964 thd Current_thread
6965 cache Cache to write to the binary log
6966
6967 DESCRIPTION
6968 Write the contents of the cache to the binary log. The cache will
6969 be reset as a READ_CACHE to be able to read the contents from it.
6970
6971 Reading from the trans cache with possible (per @c binlog_checksum_options)
6972 adding checksum value and then fixing the length and the end_log_pos of
6973 events prior to fill in the binlog cache.
6974*/
6975
6976int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
6977{
6978 DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
6979
6980 mysql_mutex_assert_owner(&LOCK_log);
6981 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
6982 DBUG_RETURN(ER_ERROR_ON_WRITE);
6983 size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
6984 size_t val;
6985 size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
6986 uchar header[LOG_EVENT_HEADER_LEN];
6987 CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
6988
6989 if (crypto.scheme)
6990 writer.ctx= alloca(crypto.ctx_size);
6991
6992 // while there is just one alg the following must hold:
6993 DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
6994 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
6995
6996 /*
6997 The events in the buffer have incorrect end_log_pos data
6998 (relative to beginning of group rather than absolute),
6999 so we'll recalculate them in situ so the binlog is always
7000 correct, even in the middle of a group. This is possible
7001 because we now know the start position of the group (the
7002 offset of this cache in the log, if you will); all we need
7003 to do is to find all event-headers, and add the position of
7004 the group to the end_log_pos of each event. This is pretty
7005 straight forward, except that we read the cache in segments,
7006 so an event-header might end up on the cache-border and get
7007 split.
7008 */
7009
7010 group= (size_t)my_b_tell(&log_file);
7011 hdr_offs= carry= 0;
7012
7013 do
7014 {
7015 /*
7016 if we only got a partial header in the last iteration,
7017 get the other half now and process a full header.
7018 */
7019 if (unlikely(carry > 0))
7020 {
7021 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7022 size_t tail= LOG_EVENT_HEADER_LEN - carry;
7023
7024 /* assemble both halves */
7025 memcpy(&header[carry], (char *)cache->read_pos, tail);
7026
7027 uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7028 writer.remains= len;
7029
7030 /* fix end_log_pos */
7031 end_log_pos_inc += writer.checksum_len;
7032 val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7033 int4store(header + LOG_POS_OFFSET, val);
7034
7035 /* fix len */
7036 len+= writer.checksum_len;
7037 int4store(header + EVENT_LEN_OFFSET, len);
7038
7039 if (writer.write(header, LOG_EVENT_HEADER_LEN))
7040 DBUG_RETURN(ER_ERROR_ON_WRITE);
7041
7042 cache->read_pos+= tail;
7043 length-= tail;
7044 carry= 0;
7045
7046 /* next event header at ... */
7047 hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7048 }
7049
7050 /* if there is anything to write, process it. */
7051
7052 if (likely(length > 0))
7053 {
7054 DBUG_EXECUTE_IF("fail_binlog_write_1",
7055 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7056 /*
7057 process all event-headers in this (partial) cache.
7058 if next header is beyond current read-buffer,
7059 we'll get it later (though not necessarily in the
7060 very next iteration, just "eventually").
7061 */
7062
7063 if (hdr_offs >= length)
7064 {
7065 if (writer.write(cache->read_pos, length))
7066 DBUG_RETURN(ER_ERROR_ON_WRITE);
7067 }
7068
7069 while (hdr_offs < length)
7070 {
7071 /*
7072 finish off with remains of the last event that crawls
7073 from previous into the current buffer
7074 */
7075 if (writer.remains != 0)
7076 {
7077 if (writer.write(cache->read_pos, hdr_offs))
7078 DBUG_RETURN(ER_ERROR_ON_WRITE);
7079 }
7080
7081 /*
7082 partial header only? save what we can get, process once
7083 we get the rest.
7084 */
7085 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7086 {
7087 carry= length - hdr_offs;
7088 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7089 length= hdr_offs;
7090 }
7091 else
7092 {
7093 /* we've got a full event-header, and it came in one piece */
7094 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7095 uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7096 uchar *log_pos= ev + LOG_POS_OFFSET;
7097
7098 end_log_pos_inc += writer.checksum_len;
7099 /* fix end_log_pos */
7100 val= uint4korr(log_pos) + group + end_log_pos_inc;
7101 int4store(log_pos, val);
7102
7103 /* fix length */
7104 int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7105
7106 writer.remains= ev_len;
7107 if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7108 DBUG_RETURN(ER_ERROR_ON_WRITE);
7109
7110 /* next event header at ... */
7111 hdr_offs += ev_len; // incr by the netto len
7112
7113 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7114 }
7115 }
7116
7117 /*
7118 Adjust hdr_offs. Note that it may still point beyond the segment
7119 read in the next iteration; if the current event is very long,
7120 it may take a couple of read-iterations (and subsequent adjustments
7121 of hdr_offs) for it to point into the then-current segment.
7122 If we have a split header (!carry), hdr_offs will be set at the
7123 beginning of the next iteration, overwriting the value we set here:
7124 */
7125 hdr_offs -= length;
7126 }
7127 } while ((length= my_b_fill(cache)));
7128
7129 DBUG_ASSERT(carry == 0);
7130 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7131
7132 DBUG_RETURN(0); // All OK
7133}
7134
7135/*
7136 Helper function to get the error code of the query to be binlogged.
7137 */
7138int query_error_code(THD *thd, bool not_killed)
7139{
7140 int error;
7141
7142 if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7143 {
7144 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7145 if (!error)
7146 return error;
7147
7148 /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7149 ER_QUERY_INTERRUPTED, So here we need to make sure that error
7150 is not set to these errors when specified not_killed by the
7151 caller.
7152 */
7153 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7154 error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7155 error= 0;
7156 }
7157 else
7158 {
7159 /* killed status for DELAYED INSERT thread should never be used */
7160 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7161 error= thd->killed_errno();
7162 }
7163
7164 return error;
7165}
7166
7167
7168bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7169{
7170 uint error= 0;
7171 DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7172 Incident incident= INCIDENT_LOST_EVENTS;
7173 Incident_log_event ev(thd, incident, &write_error_msg);
7174
7175 if (likely(is_open()))
7176 {
7177 error= write_event(&ev);
7178 status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7179 }
7180
7181 DBUG_RETURN(error);
7182}
7183
7184
7185bool MYSQL_BIN_LOG::write_incident(THD *thd)
7186{
7187 uint error= 0;
7188 my_off_t offset;
7189 bool check_purge= false;
7190 ulong prev_binlog_id;
7191 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7192
7193 mysql_mutex_lock(&LOCK_log);
7194 if (likely(is_open()))
7195 {
7196 prev_binlog_id= current_binlog_id;
7197 if (likely(!(error= write_incident_already_locked(thd))) &&
7198 likely(!(error= flush_and_sync(0))))
7199 {
7200 update_binlog_end_pos();
7201 if (unlikely((error= rotate(false, &check_purge))))
7202 check_purge= false;
7203 }
7204
7205 offset= my_b_tell(&log_file);
7206
7207 update_binlog_end_pos(offset);
7208
7209 /*
7210 Take mutex to protect against a reader seeing partial writes of 64-bit
7211 offset on 32-bit CPUs.
7212 */
7213 mysql_mutex_lock(&LOCK_commit_ordered);
7214 last_commit_pos_offset= offset;
7215 mysql_mutex_unlock(&LOCK_commit_ordered);
7216 mysql_mutex_unlock(&LOCK_log);
7217
7218 if (check_purge)
7219 checkpoint_and_purge(prev_binlog_id);
7220 }
7221 else
7222 {
7223 mysql_mutex_unlock(&LOCK_log);
7224 }
7225
7226 DBUG_RETURN(error);
7227}
7228
7229void
7230MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7231{
7232 my_off_t offset;
7233 Binlog_checkpoint_log_event ev(name_arg, len);
7234 /*
7235 Note that we must sync the binlog checkpoint to disk.
7236 Otherwise a subsequent log purge could delete binlogs that XA recovery
7237 thinks are needed (even though they are not really).
7238 */
7239 if (!write_event(&ev) && !flush_and_sync(0))
7240 {
7241 update_binlog_end_pos();
7242 }
7243 else
7244 {
7245 /*
7246 If we fail to write the checkpoint event, something is probably really
7247 bad with the binlog. We complain in the error log.
7248
7249 Note that failure to write binlog checkpoint does not compromise the
7250 ability to do crash recovery - crash recovery will just have to scan a
7251 bit more of the binlog than strictly necessary.
7252 */
7253 sql_print_error("Failed to write binlog checkpoint event to binary log\n");
7254 }
7255
7256 offset= my_b_tell(&log_file);
7257
7258 update_binlog_end_pos(offset);
7259
7260 /*
7261 Take mutex to protect against a reader seeing partial writes of 64-bit
7262 offset on 32-bit CPUs.
7263 */
7264 mysql_mutex_lock(&LOCK_commit_ordered);
7265 last_commit_pos_offset= offset;
7266 mysql_mutex_unlock(&LOCK_commit_ordered);
7267}
7268
7269
7270/**
7271 Write a cached log entry to the binary log.
7272 - To support transaction over replication, we wrap the transaction
7273 with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7274 We want to write a BEGIN/ROLLBACK block when a non-transactional table
7275 was updated in a transaction which was rolled back. This is to ensure
7276 that the same updates are run on the slave.
7277
7278 @param thd
7279 @param cache The cache to copy to the binlog
7280 @param commit_event The commit event to print after writing the
7281 contents of the cache.
7282 @param incident Defines if an incident event should be created to
7283 notify that some non-transactional changes did
7284 not get into the binlog.
7285
7286 @note
7287 We only come here if there is something in the cache.
7288 @note
7289 The thing in the cache is always a complete transaction.
7290 @note
7291 'cache' needs to be reinitialized after this functions returns.
7292*/
7293
7294bool
7295MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7296 binlog_cache_mngr *cache_mngr,
7297 Log_event *end_ev, bool all,
7298 bool using_stmt_cache,
7299 bool using_trx_cache)
7300{
7301 group_commit_entry entry;
7302 Ha_trx_info *ha_info;
7303 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7304
7305 /*
7306 Control should not be allowed beyond this point in wsrep_emulate_bin_log
7307 mode. Also, do not write the cached updates to binlog if binary logging is
7308 disabled (log-bin/sql_log_bin).
7309 */
7310 if (wsrep_emulate_bin_log)
7311 {
7312 DBUG_RETURN(0);
7313 }
7314 else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7315 {
7316 cache_mngr->need_unlog= false;
7317 DBUG_RETURN(0);
7318 }
7319
7320 entry.thd= thd;
7321 entry.cache_mngr= cache_mngr;
7322 entry.error= 0;
7323 entry.all= all;
7324 entry.using_stmt_cache= using_stmt_cache;
7325 entry.using_trx_cache= using_trx_cache;
7326 entry.need_unlog= false;
7327 ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
7328
7329 for (; ha_info; ha_info= ha_info->next())
7330 {
7331 if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7332 !ha_info->ht()->commit_checkpoint_request)
7333 entry.need_unlog= true;
7334 break;
7335 }
7336
7337 entry.end_event= end_ev;
7338 if (cache_mngr->stmt_cache.has_incident() ||
7339 cache_mngr->trx_cache.has_incident())
7340 {
7341 Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7342 entry.incident_event= &inc_ev;
7343 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7344 }
7345 else
7346 {
7347 entry.incident_event= NULL;
7348 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7349 }
7350}
7351
7352
7353/*
7354 Put a transaction that is ready to commit in the group commit queue.
7355 The transaction is identified by the ENTRY object passed into this function.
7356
7357 To facilitate group commit for the binlog, we first queue up ourselves in
7358 this function. Then later the first thread to enter the queue waits for
7359 the LOCK_log mutex, and commits for everyone in the queue once it gets the
7360 lock. Any other threads in the queue just wait for the first one to finish
7361 the commit and wake them up. This way, all transactions in the queue get
7362 committed in a single disk operation.
7363
7364 The main work in this function is when the commit in one transaction has
7365 been marked to wait for the commit of another transaction to happen
7366 first. This is used to support in-order parallel replication, where
7367 transactions can execute out-of-order but need to be committed in-order with
7368 how they happened on the master. The waiting of one commit on another needs
7369 to be integrated with the group commit queue, to ensure that the waiting
7370 transaction can participate in the same group commit as the waited-for
7371 transaction.
7372
7373 So when we put a transaction in the queue, we check if there were other
7374 transactions already prepared to commit but just waiting for the first one
7375 to commit. If so, we add those to the queue as well, transitively for all
7376 waiters.
7377
7378 And if a transaction is marked to wait for a prior transaction, but that
7379 prior transaction is already queued for group commit, then we can queue the
7380 new transaction directly to participate in the group commit.
7381
7382 @retval < 0 Error
7383 @retval > 0 If queued as the first entry in the queue (meaning this
7384 is the leader)
7385 @retval 0 Otherwise (queued as participant, leader handles the commit)
7386*/
7387
7388int
7389MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7390{
7391 group_commit_entry *entry, *orig_queue, *last;
7392 wait_for_commit *cur;
7393 wait_for_commit *wfc;
7394 DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7395
7396 /*
7397 Check if we need to wait for another transaction to commit before us.
7398
7399 It is safe to do a quick check without lock first in the case where we do
7400 not have to wait. But if the quick check shows we need to wait, we must do
7401 another safe check under lock, to avoid the race where the other
7402 transaction wakes us up between the check and the wait.
7403 */
7404 wfc= orig_entry->thd->wait_for_commit_ptr;
7405 orig_entry->queued_by_other= false;
7406 if (wfc && wfc->waitee)
7407 {
7408 mysql_mutex_lock(&wfc->LOCK_wait_commit);
7409 /*
7410 Do an extra check here, this time safely under lock.
7411
7412 If waitee->commit_started is set, it means that the transaction we need
7413 to wait for has already queued up for group commit. In this case it is
7414 safe for us to queue up immediately as well, increasing the opprtunities
7415 for group commit. Because waitee has taken the LOCK_prepare_ordered
7416 before setting the flag, so there is no risk that we can queue ahead of
7417 it.
7418 */
7419 if (wfc->waitee && !wfc->waitee->commit_started)
7420 {
7421 PSI_stage_info old_stage;
7422 wait_for_commit *loc_waitee;
7423
7424 /*
7425 By setting wfc->opaque_pointer to our own entry, we mark that we are
7426 ready to commit, but waiting for another transaction to commit before
7427 us.
7428
7429 This other transaction may then take over the commit process for us to
7430 get us included in its own group commit. If this happens, the
7431 queued_by_other flag is set.
7432
7433 Setting this flag may or may not be seen by the other thread, but we
7434 are safe in any case: The other thread will set queued_by_other under
7435 its LOCK_wait_commit, and we will not check queued_by_other only after
7436 we have been woken up.
7437 */
7438 wfc->opaque_pointer= orig_entry;
7439 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7440 orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7441 &wfc->LOCK_wait_commit,
7442 &stage_waiting_for_prior_transaction_to_commit,
7443 &old_stage);
7444 while ((loc_waitee= wfc->waitee) && !orig_entry->thd->check_killed())
7445 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7446 wfc->opaque_pointer= NULL;
7447 DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7448 orig_entry->queued_by_other));
7449
7450 if (loc_waitee)
7451 {
7452 /* Wait terminated due to kill. */
7453 mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7454 if (loc_waitee->wakeup_subsequent_commits_running ||
7455 orig_entry->queued_by_other)
7456 {
7457 /* Our waitee is already waking us up, so ignore the kill. */
7458 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7459 do
7460 {
7461 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7462 } while (wfc->waitee);
7463 }
7464 else
7465 {
7466 /* We were killed, so remove us from the list of waitee. */
7467 wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7468 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7469 wfc->waitee= NULL;
7470
7471 orig_entry->thd->EXIT_COND(&old_stage);
7472 /* Interrupted by kill. */
7473 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7474 wfc->wakeup_error= orig_entry->thd->killed_errno();
7475 if (!wfc->wakeup_error)
7476 wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7477 my_message(wfc->wakeup_error,
7478 ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7479 DBUG_RETURN(-1);
7480 }
7481 }
7482 orig_entry->thd->EXIT_COND(&old_stage);
7483 }
7484 else
7485 mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7486 }
7487 /*
7488 If the transaction we were waiting for has already put us into the group
7489 commit queue (and possibly already done the entire binlog commit for us),
7490 then there is nothing else to do.
7491 */
7492 if (orig_entry->queued_by_other)
7493 DBUG_RETURN(0);
7494
7495 if (wfc && wfc->wakeup_error)
7496 {
7497 my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7498 DBUG_RETURN(-1);
7499 }
7500
7501 /* Now enqueue ourselves in the group commit queue. */
7502 DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7503 orig_entry->thd->clear_wakeup_ready();
7504 mysql_mutex_lock(&LOCK_prepare_ordered);
7505 orig_queue= group_commit_queue;
7506
7507 /*
7508 Iteratively process everything added to the queue, looking for waiters,
7509 and their waiters, and so on. If a waiter is ready to commit, we
7510 immediately add it to the queue, and mark it as queued_by_other.
7511
7512 This would be natural to do with recursion, but we want to avoid
7513 potentially unbounded recursion blowing the C stack, so we use the list
7514 approach instead.
7515
7516 We keep a list of the group_commit_entry of all the waiters that need to
7517 be processed. Initially this list contains only the entry passed into this
7518 function.
7519
7520 We process entries in the list one by one. The element currently being
7521 processed is pointed to by `entry`, and the element at the end of the list
7522 is pointed to by `last` (we do not use NULL to terminate the list).
7523
7524 As we process an entry, any waiters for that entry are added at the end of
7525 the list, to be processed in subsequent iterations. The the entry is added
7526 to the group_commit_queue. This continues until the list is exhausted,
7527 with all entries ever added eventually processed.
7528
7529 The end result is a breath-first traversal of the tree of waiters,
7530 re-using the `next' pointers of the group_commit_entry objects in place of
7531 extra stack space in a recursive traversal.
7532
7533 The temporary list linked through these `next' pointers is not used by the
7534 caller or any other function; it only exists while doing the iterative
7535 tree traversal. After, all the processed entries are linked into the
7536 group_commit_queue.
7537 */
7538
7539 cur= wfc;
7540 last= orig_entry;
7541 entry= orig_entry;
7542 for (;;)
7543 {
7544 group_commit_entry *next_entry;
7545
7546 if (entry->cache_mngr->using_xa)
7547 {
7548 DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7549 run_prepare_ordered(entry->thd, entry->all);
7550 DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7551 }
7552
7553 if (cur)
7554 {
7555 /*
7556 Now that we have taken LOCK_prepare_ordered and will queue up in the
7557 group commit queue, it is safe for following transactions to queue
7558 themselves. We will grab here any transaction that is now ready to
7559 queue up, but after that, more transactions may become ready while the
7560 leader is waiting to start the group commit. So set the flag
7561 `commit_started', so that later transactions can still participate in
7562 the group commit..
7563 */
7564 cur->commit_started= true;
7565
7566 /*
7567 Check if this transaction has other transaction waiting for it to
7568 commit.
7569
7570 If so, process the waiting transactions, and their waiters and so on,
7571 transitively.
7572 */
7573 if (cur->subsequent_commits_list)
7574 {
7575 wait_for_commit *waiter, **waiter_ptr;
7576
7577 mysql_mutex_lock(&cur->LOCK_wait_commit);
7578 /*
7579 Grab the list, now safely under lock, and process it if still
7580 non-empty.
7581 */
7582 waiter= cur->subsequent_commits_list;
7583 waiter_ptr= &cur->subsequent_commits_list;
7584 while (waiter)
7585 {
7586 wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7587 group_commit_entry *entry2=
7588 (group_commit_entry *)waiter->opaque_pointer;
7589 if (entry2)
7590 {
7591 /*
7592 This is another transaction ready to be written to the binary
7593 log. We can put it into the queue directly, without needing a
7594 separate context switch to the other thread. We just set a flag
7595 so that the other thread will know when it wakes up that it was
7596 already processed.
7597
7598 So remove it from the list of our waiters, and instead put it at
7599 the end of the list to be processed in a subsequent iteration of
7600 the outer loop.
7601 */
7602 *waiter_ptr= next_waiter;
7603 entry2->queued_by_other= true;
7604 last->next= entry2;
7605 last= entry2;
7606 /*
7607 As a small optimisation, we do not actually need to set
7608 entry2->next to NULL, as we can use the pointer `last' to check
7609 for end-of-list.
7610 */
7611 }
7612 else
7613 {
7614 /*
7615 This transaction is not ready to participate in the group commit
7616 yet, so leave it in the waiter list. It might join the group
7617 commit later, if it completes soon enough to do so (it will see
7618 our wfc->commit_started flag set), or it might commit later in a
7619 later group commit.
7620 */
7621 waiter_ptr= &waiter->next_subsequent_commit;
7622 }
7623 waiter= next_waiter;
7624 }
7625 mysql_mutex_unlock(&cur->LOCK_wait_commit);
7626 }
7627 }
7628
7629 /*
7630 Handle the heuristics that if another transaction is waiting for this
7631 transaction (or if it does so later), then we want to trigger group
7632 commit immediately, without waiting for the binlog_commit_wait_usec
7633 timeout to expire.
7634 */
7635 entry->thd->waiting_on_group_commit= true;
7636
7637 /* Add the entry to the group commit queue. */
7638 next_entry= entry->next;
7639 entry->next= group_commit_queue;
7640 group_commit_queue= entry;
7641 if (entry == last)
7642 break;
7643 /*
7644 Move to the next entry in the flattened list of waiting transactions
7645 that still need to be processed transitively.
7646 */
7647 entry= next_entry;
7648 DBUG_ASSERT(entry != NULL);
7649 cur= entry->thd->wait_for_commit_ptr;
7650 }
7651
7652 if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7653 mysql_cond_signal(&COND_prepare_ordered);
7654 mysql_mutex_unlock(&LOCK_prepare_ordered);
7655 DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7656
7657 DBUG_PRINT("info", ("Queued for group commit as %s\n",
7658 (orig_queue == NULL) ? "leader" : "participant"));
7659 DBUG_RETURN(orig_queue == NULL);
7660}
7661
7662bool
7663MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
7664{
7665 int is_leader= queue_for_group_commit(entry);
7666
7667 /*
7668 The first in the queue handles group commit for all; the others just wait
7669 to be signalled when group commit is done.
7670 */
7671 if (is_leader < 0)
7672 return true; /* Error */
7673 else if (is_leader)
7674 trx_group_commit_leader(entry);
7675 else if (!entry->queued_by_other)
7676 {
7677 DEBUG_SYNC(entry->thd, "after_semisync_queue");
7678
7679 entry->thd->wait_for_wakeup_ready();
7680 }
7681 else
7682 {
7683 /*
7684 If we were queued by another prior commit, then we are woken up
7685 only when the leader has already completed the commit for us.
7686 So nothing to do here then.
7687 */
7688 }
7689
7690 if (!opt_optimize_thread_scheduling)
7691 {
7692 /* For the leader, trx_group_commit_leader() already took the lock. */
7693 if (!is_leader)
7694 mysql_mutex_lock(&LOCK_commit_ordered);
7695
7696 DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
7697 ++num_commits;
7698 if (entry->cache_mngr->using_xa && !entry->error)
7699 run_commit_ordered(entry->thd, entry->all);
7700
7701 group_commit_entry *next= entry->next;
7702 if (!next)
7703 {
7704 group_commit_queue_busy= FALSE;
7705 mysql_cond_signal(&COND_queue_busy);
7706 DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
7707 }
7708 mysql_mutex_unlock(&LOCK_commit_ordered);
7709 entry->thd->wakeup_subsequent_commits(entry->error);
7710
7711 if (next)
7712 {
7713 /*
7714 Wake up the next thread in the group commit.
7715
7716 The next thread can be waiting in two different ways, depending on
7717 whether it put itself in the queue, or if it was put in queue by us
7718 because it had to wait for us to commit first.
7719
7720 So execute the appropriate wakeup, identified by the queued_by_other
7721 field.
7722 */
7723 if (next->queued_by_other)
7724 next->thd->wait_for_commit_ptr->wakeup(entry->error);
7725 else
7726 next->thd->signal_wakeup_ready();
7727 }
7728 else
7729 {
7730 /*
7731 If we rotated the binlog, and if we are using the unoptimized thread
7732 scheduling where every thread runs its own commit_ordered(), then we
7733 must do the commit checkpoint and log purge here, after all
7734 commit_ordered() calls have finished, and locks have been released.
7735 */
7736 if (entry->check_purge)
7737 checkpoint_and_purge(entry->binlog_id);
7738 }
7739
7740 }
7741
7742 if (likely(!entry->error))
7743 return entry->thd->wait_for_prior_commit();
7744
7745 switch (entry->error)
7746 {
7747 case ER_ERROR_ON_WRITE:
7748 my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
7749 break;
7750 case ER_ERROR_ON_READ:
7751 my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
7752 entry->error_cache->file_name, entry->commit_errno);
7753 break;
7754 default:
7755 /*
7756 There are not (and should not be) any errors thrown not covered above.
7757 But just in case one is added later without updating the above switch
7758 statement, include a catch-all.
7759 */
7760 my_printf_error(entry->error,
7761 "Error writing transaction to binary log: %d",
7762 MYF(ME_NOREFRESH), entry->error);
7763 }
7764
7765 /*
7766 Since we return error, this transaction XID will not be committed, so
7767 we need to mark it as not needed for recovery (unlog() is not called
7768 for a transaction if log_xid() fails).
7769 */
7770 if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
7771 entry->cache_mngr->need_unlog)
7772 mark_xid_done(entry->cache_mngr->binlog_id, true);
7773
7774 return 1;
7775}
7776
7777/*
7778 Do binlog group commit as the lead thread.
7779
7780 This must be called when this statement/transaction is queued at the start of
7781 the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
7782 commit all the transactions in the queue (more may have entered while waiting
7783 for LOCK_log). After commit is done, all other threads in the queue will be
7784 signalled.
7785
7786 */
7787void
7788MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
7789{
7790 uint xid_count= 0;
7791 my_off_t UNINIT_VAR(commit_offset);
7792 group_commit_entry *current, *last_in_queue;
7793 group_commit_entry *queue= NULL;
7794 bool check_purge= false;
7795 ulong UNINIT_VAR(binlog_id);
7796 uint64 commit_id;
7797 DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
7798
7799 {
7800 DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
7801 DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
7802 ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
7803 );
7804 /*
7805 Lock the LOCK_log(), and once we get it, collect any additional writes
7806 that queued up while we were waiting.
7807 */
7808 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
7809 mysql_mutex_lock(&LOCK_log);
7810 DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
7811
7812 mysql_mutex_lock(&LOCK_prepare_ordered);
7813 if (opt_binlog_commit_wait_count)
7814 wait_for_sufficient_commits();
7815 /*
7816 Note that wait_for_sufficient_commits() may have released and
7817 re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
7818 */
7819 current= group_commit_queue;
7820 group_commit_queue= NULL;
7821 mysql_mutex_unlock(&LOCK_prepare_ordered);
7822 binlog_id= current_binlog_id;
7823
7824 /* As the queue is in reverse order of entering, reverse it. */
7825 last_in_queue= current;
7826 while (current)
7827 {
7828 group_commit_entry *next= current->next;
7829 /*
7830 Now that group commit is started, we can clear the flag; there is no
7831 longer any use in waiters on this commit trying to trigger it early.
7832 */
7833 current->thd->waiting_on_group_commit= false;
7834 current->next= queue;
7835 queue= current;
7836 current= next;
7837 }
7838 DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
7839
7840 /* Now we have in queue the list of transactions to be committed in order. */
7841 }
7842
7843 DBUG_ASSERT(is_open());
7844 if (likely(is_open())) // Should always be true
7845 {
7846 commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
7847 DBUG_EXECUTE_IF("binlog_force_commit_id",
7848 {
7849 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
7850 bool null_value;
7851 user_var_entry *entry=
7852 (user_var_entry*) my_hash_search(&leader->thd->user_vars,
7853 (uchar*) commit_name.str,
7854 commit_name.length);
7855 commit_id= entry->val_int(&null_value);
7856 });
7857 /*
7858 Commit every transaction in the queue.
7859
7860 Note that we are doing this in a different thread than the one running
7861 the transaction! So we are limited in the operations we can do. In
7862 particular, we cannot call my_error() on behalf of a transaction, as
7863 that obtains the THD from thread local storage. Instead, we must set
7864 current->error and let the thread do the error reporting itself once
7865 we wake it up.
7866 */
7867 for (current= queue; current != NULL; current= current->next)
7868 {
7869 binlog_cache_mngr *cache_mngr= current->cache_mngr;
7870
7871 /*
7872 We already checked before that at least one cache is non-empty; if both
7873 are empty we would have skipped calling into here.
7874 */
7875 DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
7876
7877 if (unlikely((current->error= write_transaction_or_stmt(current,
7878 commit_id))))
7879 current->commit_errno= errno;
7880
7881 strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
7882 commit_offset= my_b_write_tell(&log_file);
7883 cache_mngr->last_commit_pos_offset= commit_offset;
7884 if (cache_mngr->using_xa && cache_mngr->xa_xid)
7885 {
7886 /*
7887 If all storage engines support commit_checkpoint_request(), then we
7888 do not need to keep track of when this XID is durably committed.
7889 Instead we will just ask the storage engine to durably commit all its
7890 XIDs when we rotate a binlog file.
7891 */
7892 if (current->need_unlog)
7893 {
7894 xid_count++;
7895 cache_mngr->need_unlog= true;
7896 cache_mngr->binlog_id= binlog_id;
7897 }
7898 else
7899 cache_mngr->need_unlog= false;
7900
7901 cache_mngr->delayed_error= false;
7902 }
7903 }
7904
7905 bool synced= 0;
7906 if (unlikely(flush_and_sync(&synced)))
7907 {
7908 for (current= queue; current != NULL; current= current->next)
7909 {
7910 if (!current->error)
7911 {
7912 current->error= ER_ERROR_ON_WRITE;
7913 current->commit_errno= errno;
7914 current->error_cache= NULL;
7915 }
7916 }
7917 }
7918 else
7919 {
7920 bool any_error= false;
7921
7922 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
7923 mysql_mutex_assert_owner(&LOCK_log);
7924 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
7925 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
7926
7927 for (current= queue; current != NULL; current= current->next)
7928 {
7929#ifdef HAVE_REPLICATION
7930 if (likely(!current->error) &&
7931 unlikely(repl_semisync_master.
7932 report_binlog_update(current->thd,
7933 current->cache_mngr->
7934 last_commit_pos_file,
7935 current->cache_mngr->
7936 last_commit_pos_offset)))
7937 {
7938 current->error= ER_ERROR_ON_WRITE;
7939 current->commit_errno= -1;
7940 current->error_cache= NULL;
7941 any_error= true;
7942 }
7943#endif
7944 }
7945
7946 /*
7947 update binlog_end_pos so it can be read by dump thread
7948 Note: must be _after_ the RUN_HOOK(after_flush) or else
7949 semi-sync might not have put the transaction into
7950 it's list before dump-thread tries to send it
7951 */
7952 update_binlog_end_pos(commit_offset);
7953
7954 if (unlikely(any_error))
7955 sql_print_error("Failed to run 'after_flush' hooks");
7956 }
7957
7958 /*
7959 If any commit_events are Xid_log_event, increase the number of pending
7960 XIDs in current binlog (it's decreased in ::unlog()). When the count in
7961 a (not active) binlog file reaches zero, we know that it is no longer
7962 needed in XA recovery, and we can log a new binlog checkpoint event.
7963 */
7964 if (xid_count > 0)
7965 {
7966 mark_xids_active(binlog_id, xid_count);
7967 }
7968
7969 if (rotate(false, &check_purge))
7970 {
7971 /*
7972 If we fail to rotate, which thread should get the error?
7973 We give the error to the leader, as any my_error() thrown inside
7974 rotate() will have been registered for the leader THD.
7975
7976 However we must not return error from here - that would cause
7977 ha_commit_trans() to abort and rollback the transaction, which would
7978 leave an inconsistent state with the transaction committed in the
7979 binlog but rolled back in the engine.
7980
7981 Instead set a flag so that we can return error later, from unlog(),
7982 when the transaction has been safely committed in the engine.
7983 */
7984 leader->cache_mngr->delayed_error= true;
7985 my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
7986 check_purge= false;
7987 }
7988 /* In case of binlog rotate, update the correct current binlog offset. */
7989 commit_offset= my_b_write_tell(&log_file);
7990 }
7991
7992 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
7993 mysql_mutex_lock(&LOCK_after_binlog_sync);
7994 /*
7995 We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
7996 otherwise scheduling could allow the next group commit to run ahead of us,
7997 messing up the order of commit_ordered() calls. But as soon as
7998 LOCK_after_binlog_sync is obtained, we can let the next group commit start.
7999 */
8000 mysql_mutex_unlock(&LOCK_log);
8001
8002 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8003
8004 /*
8005 Loop through threads and run the binlog_sync hook
8006 */
8007 {
8008 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8009 mysql_mutex_assert_not_owner(&LOCK_log);
8010 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8011 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8012
8013 bool first __attribute__((unused))= true;
8014 bool last __attribute__((unused));
8015 for (current= queue; current != NULL; current= current->next)
8016 {
8017 last= current->next == NULL;
8018#ifdef HAVE_REPLICATION
8019 if (likely(!current->error))
8020 current->error=
8021 repl_semisync_master.wait_after_sync(current->cache_mngr->
8022 last_commit_pos_file,
8023 current->cache_mngr->
8024 last_commit_pos_offset);
8025#endif
8026 first= false;
8027 }
8028 }
8029
8030 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8031 mysql_mutex_lock(&LOCK_commit_ordered);
8032 last_commit_pos_offset= commit_offset;
8033
8034 /*
8035 Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8036 acquired so that groups can not reorder for the different stages of
8037 the group commit procedure.
8038 */
8039 mysql_mutex_unlock(&LOCK_after_binlog_sync);
8040 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8041 ++num_group_commits;
8042
8043 if (!opt_optimize_thread_scheduling)
8044 {
8045 /*
8046 If we want to run commit_ordered() each in the transaction's own thread
8047 context, then we need to mark the queue reserved; we need to finish all
8048 threads in one group commit before the next group commit can be allowed
8049 to proceed, and we cannot unlock a simple pthreads mutex in a different
8050 thread from the one that locked it.
8051 */
8052
8053 while (group_commit_queue_busy)
8054 mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8055 group_commit_queue_busy= TRUE;
8056
8057 /*
8058 Set these so parent can run checkpoint_and_purge() in last thread.
8059 (When using optimized thread scheduling, we run checkpoint_and_purge()
8060 in this function, so parent does not need to and we need not set these
8061 values).
8062 */
8063 last_in_queue->check_purge= check_purge;
8064 last_in_queue->binlog_id= binlog_id;
8065
8066 /* Note that we return with LOCK_commit_ordered locked! */
8067 DBUG_VOID_RETURN;
8068 }
8069
8070 /*
8071 Wakeup each participant waiting for our group commit, first calling the
8072 commit_ordered() methods for any transactions doing 2-phase commit.
8073 */
8074 current= queue;
8075 while (current != NULL)
8076 {
8077 group_commit_entry *next;
8078
8079 DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8080 ++num_commits;
8081 if (current->cache_mngr->using_xa && likely(!current->error) &&
8082 DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8083 run_commit_ordered(current->thd, current->all);
8084 current->thd->wakeup_subsequent_commits(current->error);
8085
8086 /*
8087 Careful not to access current->next after waking up the other thread! As
8088 it may change immediately after wakeup.
8089 */
8090 next= current->next;
8091 if (current != leader) // Don't wake up ourself
8092 {
8093 if (current->queued_by_other)
8094 current->thd->wait_for_commit_ptr->wakeup(current->error);
8095 else
8096 current->thd->signal_wakeup_ready();
8097 }
8098 current= next;
8099 }
8100 DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8101 mysql_mutex_unlock(&LOCK_commit_ordered);
8102 DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8103
8104 if (check_purge)
8105 checkpoint_and_purge(binlog_id);
8106
8107 DBUG_VOID_RETURN;
8108}
8109
8110
8111int
8112MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8113 uint64 commit_id)
8114{
8115 binlog_cache_mngr *mngr= entry->cache_mngr;
8116 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8117
8118 if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
8119 DBUG_RETURN(ER_ERROR_ON_WRITE);
8120
8121 if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8122 write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8123 {
8124 entry->error_cache= &mngr->stmt_cache.cache_log;
8125 DBUG_RETURN(ER_ERROR_ON_WRITE);
8126 }
8127
8128 if (entry->using_trx_cache && !mngr->trx_cache.empty())
8129 {
8130 DBUG_EXECUTE_IF("crash_before_writing_xid",
8131 {
8132 if ((write_cache(entry->thd,
8133 mngr->get_binlog_cache_log(TRUE))))
8134 DBUG_PRINT("info", ("error writing binlog cache"));
8135 else
8136 flush_and_sync(0);
8137
8138 DBUG_PRINT("info", ("crashing before writing xid"));
8139 DBUG_SUICIDE();
8140 });
8141
8142 if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8143 {
8144 entry->error_cache= &mngr->trx_cache.cache_log;
8145 DBUG_RETURN(ER_ERROR_ON_WRITE);
8146 }
8147 }
8148
8149 DBUG_EXECUTE_IF("inject_error_writing_xid",
8150 {
8151 entry->error_cache= NULL;
8152 errno= 28;
8153 DBUG_RETURN(ER_ERROR_ON_WRITE);
8154 });
8155
8156 if (write_event(entry->end_event))
8157 {
8158 entry->error_cache= NULL;
8159 DBUG_RETURN(ER_ERROR_ON_WRITE);
8160 }
8161 status_var_add(entry->thd->status_var.binlog_bytes_written,
8162 entry->end_event->data_written);
8163
8164 if (entry->incident_event)
8165 {
8166 if (write_event(entry->incident_event))
8167 {
8168 entry->error_cache= NULL;
8169 DBUG_RETURN(ER_ERROR_ON_WRITE);
8170 }
8171 }
8172
8173 if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8174 {
8175 entry->error_cache= &mngr->stmt_cache.cache_log;
8176 DBUG_RETURN(ER_ERROR_ON_WRITE);
8177 }
8178 if (unlikely(mngr->get_binlog_cache_log(TRUE)->error)) // Error on read
8179 {
8180 entry->error_cache= &mngr->trx_cache.cache_log;
8181 DBUG_RETURN(ER_ERROR_ON_WRITE);
8182 }
8183
8184 DBUG_RETURN(0);
8185}
8186
8187
8188/*
8189 Wait for sufficient commits to queue up for group commit, according to the
8190 values of binlog_commit_wait_count and binlog_commit_wait_usec.
8191
8192 Note that this function may release and re-acquire LOCK_log and
8193 LOCK_prepare_ordered if it needs to wait.
8194*/
8195
8196void
8197MYSQL_BIN_LOG::wait_for_sufficient_commits()
8198{
8199 size_t count;
8200 group_commit_entry *e;
8201 group_commit_entry *last_head;
8202 struct timespec wait_until;
8203
8204 mysql_mutex_assert_owner(&LOCK_log);
8205 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8206
8207 for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8208 {
8209 if (++count >= opt_binlog_commit_wait_count)
8210 {
8211 group_commit_trigger_count++;
8212 return;
8213 }
8214 if (unlikely(e->thd->has_waiter))
8215 {
8216 group_commit_trigger_lock_wait++;
8217 return;
8218 }
8219 }
8220
8221 mysql_mutex_unlock(&LOCK_log);
8222 set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8223
8224 for (;;)
8225 {
8226 int err;
8227 group_commit_entry *head;
8228
8229 err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8230 &wait_until);
8231 if (err == ETIMEDOUT)
8232 {
8233 group_commit_trigger_timeout++;
8234 break;
8235 }
8236 if (unlikely(last_head->thd->has_waiter))
8237 {
8238 group_commit_trigger_lock_wait++;
8239 break;
8240 }
8241 head= group_commit_queue;
8242 for (e= head; e && e != last_head; e= e->next)
8243 {
8244 ++count;
8245 if (unlikely(e->thd->has_waiter))
8246 {
8247 group_commit_trigger_lock_wait++;
8248 goto after_loop;
8249 }
8250 }
8251 if (count >= opt_binlog_commit_wait_count)
8252 {
8253 group_commit_trigger_count++;
8254 break;
8255 }
8256 last_head= head;
8257 }
8258after_loop:
8259
8260 /*
8261 We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8262 LOCK_log can be held for long periods (eg. we do I/O under it), while
8263 LOCK_prepare_ordered must only be held for short periods.
8264
8265 In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8266 violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8267 cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8268 code, as there can be at most one group commit leader thread at a time).
8269
8270 So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8271 LOCK_log.
8272 */
8273 if (mysql_mutex_trylock(&LOCK_log))
8274 {
8275 mysql_mutex_unlock(&LOCK_prepare_ordered);
8276 mysql_mutex_lock(&LOCK_log);
8277 mysql_mutex_lock(&LOCK_prepare_ordered);
8278 }
8279}
8280
8281
8282void
8283MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8284{
8285 group_commit_entry *head;
8286 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8287 head= group_commit_queue;
8288 if (head)
8289 {
8290 head->thd->has_waiter= true;
8291 mysql_cond_signal(&COND_prepare_ordered);
8292 }
8293}
8294
8295
8296/*
8297 This function is called when a transaction T1 goes to wait for another
8298 transaction T2. It is used to cut short any binlog group commit delay from
8299 --binlog-commit-wait-count in the case where another transaction is stalled
8300 on the wait due to conflicting row locks.
8301
8302 If T2 is already ready to group commit, any waiting group commit will be
8303 signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8304 when T2 later becomes ready, immediate group commit will be triggered.
8305*/
8306void
8307binlog_report_wait_for(THD *thd1, THD *thd2)
8308{
8309 if (opt_binlog_commit_wait_count == 0)
8310 return;
8311 mysql_mutex_lock(&LOCK_prepare_ordered);
8312 thd2->has_waiter= true;
8313 if (thd2->waiting_on_group_commit)
8314 mysql_bin_log.binlog_trigger_immediate_group_commit();
8315 mysql_mutex_unlock(&LOCK_prepare_ordered);
8316}
8317
8318
8319/**
8320 Wait until we get a signal that the relay log has been updated.
8321
8322 @param thd Thread variable
8323
8324 @note
8325 One must have a lock on LOCK_log before calling this function.
8326 This lock will be released before return! That's required by
8327 THD::enter_cond() (see NOTES in sql_class.h).
8328*/
8329
8330void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8331{
8332 PSI_stage_info old_stage;
8333 DBUG_ENTER("wait_for_update_relay_log");
8334
8335 mysql_mutex_assert_owner(&LOCK_log);
8336 thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8337 &stage_slave_has_read_all_relay_log,
8338 &old_stage);
8339 mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8340 thd->EXIT_COND(&old_stage);
8341 DBUG_VOID_RETURN;
8342}
8343
8344/**
8345 Wait until we get a signal that the binary log has been updated.
8346 Applies to master only.
8347
8348 NOTES
8349 @param[in] thd a THD struct
8350 @param[in] timeout a pointer to a timespec;
8351 NULL means to wait w/o timeout.
8352 @retval 0 if got signalled on update
8353 @retval non-0 if wait timeout elapsed
8354 @note
8355 LOCK_log must be taken before calling this function.
8356 LOCK_log is being released while the thread is waiting.
8357 LOCK_log is released by the caller.
8358*/
8359
8360int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8361 struct timespec *timeout)
8362{
8363 int ret= 0;
8364 DBUG_ENTER("wait_for_update_binlog_end_pos");
8365
8366 thd_wait_begin(thd, THD_WAIT_BINLOG);
8367 mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8368 if (!timeout)
8369 mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8370 else
8371 ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8372 timeout);
8373 thd_wait_end(thd);
8374 DBUG_RETURN(ret);
8375}
8376
8377
8378/**
8379 Close the log file.
8380
8381 @param exiting Bitmask for one or more of the following bits:
8382 - LOG_CLOSE_INDEX : if we should close the index file
8383 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8384 at once after close.
8385 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8386 - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8387 LOG_EVENT_BINLOG_IN_USE_F flag
8388
8389 @note
8390 One can do an open on the object at once after doing a close.
8391 The internal structures are not freed until cleanup() is called
8392*/
8393
8394void MYSQL_BIN_LOG::close(uint exiting)
8395{ // One can't set log_type here!
8396 bool failed_to_save_state= false;
8397 DBUG_ENTER("MYSQL_BIN_LOG::close");
8398 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8399
8400 mysql_mutex_assert_owner(&LOCK_log);
8401
8402 if (log_state == LOG_OPENED)
8403 {
8404#ifdef HAVE_REPLICATION
8405 if (log_type == LOG_BIN &&
8406 (exiting & LOG_CLOSE_STOP_EVENT))
8407 {
8408 Stop_log_event s;
8409 // the checksumming rule for relay-log case is similar to Rotate
8410 s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8411 : (enum_binlog_checksum_alg)binlog_checksum_options;
8412 DBUG_ASSERT(!is_relay_log ||
8413 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8414 write_event(&s);
8415 bytes_written+= s.data_written;
8416 flush_io_cache(&log_file);
8417 update_binlog_end_pos();
8418
8419 /*
8420 When we shut down server, write out the binlog state to a separate
8421 file so we do not have to scan an entire binlog file to recover it
8422 at next server start.
8423
8424 Note that this must be written and synced to disk before marking the
8425 last binlog file as "not crashed".
8426 */
8427 if (!is_relay_log && write_state_to_file())
8428 {
8429 sql_print_error("Failed to save binlog GTID state during shutdown. "
8430 "Binlog will be marked as crashed, so that crash "
8431 "recovery can recover the state at next server "
8432 "startup.");
8433 /*
8434 Leave binlog file marked as crashed, so we can recover state by
8435 scanning it now that we failed to write out the state properly.
8436 */
8437 failed_to_save_state= true;
8438 }
8439 }
8440#endif /* HAVE_REPLICATION */
8441
8442 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8443 if (log_file.type == WRITE_CACHE && log_type == LOG_BIN
8444 && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8445 {
8446 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8447 if (!failed_to_save_state)
8448 clear_inuse_flag_when_closing(log_file.file);
8449 /*
8450 Restore position so that anything we have in the IO_cache is written
8451 to the correct position.
8452 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8453 original position on system that doesn't support pwrite().
8454 */
8455 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8456 }
8457
8458 /* this will cleanup IO_CACHE, sync and close the file */
8459 MYSQL_LOG::close(exiting);
8460 }
8461
8462 /*
8463 The following test is needed even if is_open() is not set, as we may have
8464 called a not complete close earlier and the index file is still open.
8465 */
8466
8467 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8468 {
8469 end_io_cache(&index_file);
8470 if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8471 ! write_error)
8472 {
8473 write_error= 1;
8474 sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE),
8475 index_file_name, errno);
8476 }
8477 }
8478 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8479 my_free(name);
8480 name= NULL;
8481 DBUG_VOID_RETURN;
8482}
8483
8484
8485/*
8486 Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8487 closed and not needing crash recovery.
8488*/
8489void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8490{
8491 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8492 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8493 mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8494}
8495
8496
8497void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8498{
8499 /*
8500 We need to take locks, otherwise this may happen:
8501 new_file() is called, calls open(old_max_size), then before open() starts,
8502 set_max_size() sets max_size to max_size_arg, then open() starts and
8503 uses the old_max_size argument, so max_size_arg has been overwritten and
8504 it's like if the SET command was never run.
8505 */
8506 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8507 mysql_mutex_lock(&LOCK_log);
8508 if (is_open())
8509 max_size= max_size_arg;
8510 mysql_mutex_unlock(&LOCK_log);
8511 DBUG_VOID_RETURN;
8512}
8513
8514
8515/**
8516 Check if a string is a valid number.
8517
8518 @param str String to test
8519 @param res Store value here
8520 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
8521
8522 @note
8523 For the moment the allow_wildcards argument is not used
8524 Should be move to some other file.
8525
8526 @retval
8527 1 String is a number
8528 @retval
8529 0 String is not a number
8530*/
8531
8532static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8533{
8534 int flag;
8535 const char *start;
8536 DBUG_ENTER("test_if_number");
8537
8538 flag=0; start=str;
8539 while (*str++ == ' ') ;
8540 if (*--str == '-' || *str == '+')
8541 str++;
8542 while (my_isdigit(files_charset_info,*str) ||
8543 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8544 {
8545 flag=1;
8546 str++;
8547 }
8548 if (*str == '.')
8549 {
8550 for (str++ ;
8551 my_isdigit(files_charset_info,*str) ||
8552 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8553 str++, flag=1) ;
8554 }
8555 if (*str != 0 || flag == 0)
8556 DBUG_RETURN(0);
8557 if (res)
8558 *res=atol(start);
8559 DBUG_RETURN(1); /* Number ok */
8560} /* test_if_number */
8561
8562
8563void sql_perror(const char *message)
8564{
8565#if defined(_WIN32)
8566 char* buf;
8567 DWORD dw= GetLastError();
8568 if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
8569 FORMAT_MESSAGE_IGNORE_INSERTS, NULL, dw,
8570 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8571 {
8572 sql_print_error("%s: %s",message, buf);
8573 LocalFree((HLOCAL)buf);
8574 }
8575 else
8576 {
8577 sql_print_error("%s", message);
8578 }
8579#elif defined(HAVE_STRERROR)
8580 sql_print_error("%s: %s",message, strerror(errno));
8581#else
8582 perror(message);
8583#endif
8584}
8585
8586
8587/*
8588 Change the file associated with two output streams. Used to
8589 redirect stdout and stderr to a file. The streams are reopened
8590 only for appending (writing at end of file).
8591*/
8592extern "C" my_bool reopen_fstreams(const char *filename,
8593 FILE *outstream, FILE *errstream)
8594{
8595 if (outstream && !my_freopen(filename, "a", outstream))
8596 return TRUE;
8597
8598 if (errstream && !my_freopen(filename, "a", errstream))
8599 return TRUE;
8600
8601 /* The error stream must be unbuffered. */
8602 if (errstream)
8603 setbuf(errstream, NULL);
8604
8605 return FALSE;
8606}
8607
8608
8609/*
8610 Unfortunately, there seems to be no good way
8611 to restore the original streams upon failure.
8612*/
8613static bool redirect_std_streams(const char *file)
8614{
8615 if (reopen_fstreams(file, stdout, stderr))
8616 return TRUE;
8617
8618 setbuf(stderr, NULL);
8619 return FALSE;
8620}
8621
8622
8623bool flush_error_log()
8624{
8625 bool result= 0;
8626 if (opt_error_log)
8627 {
8628 mysql_mutex_lock(&LOCK_error_log);
8629 if (redirect_std_streams(log_error_file))
8630 result= 1;
8631 mysql_mutex_unlock(&LOCK_error_log);
8632 }
8633 return result;
8634}
8635
8636#ifdef _WIN32
8637static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
8638 size_t length, size_t buffLen)
8639{
8640 HANDLE event;
8641 char *buffptr= buff;
8642 DBUG_ENTER("print_buffer_to_nt_eventlog");
8643
8644 /* Add ending CR/LF's to string, overwrite last chars if necessary */
8645 strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
8646
8647 setup_windows_event_source();
8648 if ((event= RegisterEventSource(NULL,"MySQL")))
8649 {
8650 switch (level) {
8651 case ERROR_LEVEL:
8652 ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8653 (LPCSTR*)&buffptr, NULL);
8654 break;
8655 case WARNING_LEVEL:
8656 ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8657 (LPCSTR*) &buffptr, NULL);
8658 break;
8659 case INFORMATION_LEVEL:
8660 ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
8661 0, (LPCSTR*) &buffptr, NULL);
8662 break;
8663 }
8664 DeregisterEventSource(event);
8665 }
8666
8667 DBUG_VOID_RETURN;
8668}
8669#endif /* _WIN32 */
8670
8671
8672#ifndef EMBEDDED_LIBRARY
8673static void print_buffer_to_file(enum loglevel level, const char *buffer,
8674 size_t length)
8675{
8676 time_t skr;
8677 struct tm tm_tmp;
8678 struct tm *start;
8679 THD *thd= 0;
8680 size_t tag_length= 0;
8681 char tag[NAME_LEN];
8682 DBUG_ENTER("print_buffer_to_file");
8683 DBUG_PRINT("enter",("buffer: %s", buffer));
8684
8685 if (mysqld_server_initialized && (thd= current_thd))
8686 {
8687 if (thd->connection_name.length)
8688 {
8689 /*
8690 Add tag for slaves so that the user can see from which connection
8691 the error originates.
8692 */
8693 tag_length= my_snprintf(tag, sizeof(tag),
8694 ER_THD(thd, ER_MASTER_LOG_PREFIX),
8695 (int) thd->connection_name.length,
8696 thd->connection_name.str);
8697 }
8698 }
8699
8700 mysql_mutex_lock(&LOCK_error_log);
8701
8702 skr= my_time(0);
8703 localtime_r(&skr, &tm_tmp);
8704 start=&tm_tmp;
8705
8706 fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
8707 start->tm_year + 1900,
8708 start->tm_mon+1,
8709 start->tm_mday,
8710 start->tm_hour,
8711 start->tm_min,
8712 start->tm_sec,
8713 (unsigned long) (thd ? thd->thread_id : 0),
8714 (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
8715 "Warning" : "Note"),
8716 (int) tag_length, tag,
8717 (int) length, buffer);
8718
8719 fflush(stderr);
8720
8721 mysql_mutex_unlock(&LOCK_error_log);
8722 DBUG_VOID_RETURN;
8723}
8724
8725/**
8726 Prints a printf style message to the error log and, under NT, to the
8727 Windows event log.
8728
8729 This function prints the message into a buffer and then sends that buffer
8730 to other functions to write that message to other logging sources.
8731
8732 @param level The level of the msg significance
8733 @param format Printf style format of message
8734 @param args va_list list of arguments for the message
8735
8736 @returns
8737 The function always returns 0. The return value is present in the
8738 signature to be compatible with other logging routines, which could
8739 return an error (e.g. logging to the log tables)
8740*/
8741int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
8742{
8743 char buff[1024];
8744 size_t length;
8745 DBUG_ENTER("vprint_msg_to_log");
8746
8747 length= my_vsnprintf(buff, sizeof(buff), format, args);
8748 print_buffer_to_file(level, buff, length);
8749
8750#ifdef _WIN32
8751 print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
8752#endif
8753
8754 DBUG_RETURN(0);
8755}
8756#endif /* EMBEDDED_LIBRARY */
8757
8758
8759void sql_print_error(const char *format, ...)
8760{
8761 va_list args;
8762 DBUG_ENTER("sql_print_error");
8763
8764 va_start(args, format);
8765 error_log_print(ERROR_LEVEL, format, args);
8766 va_end(args);
8767
8768 DBUG_VOID_RETURN;
8769}
8770
8771
8772void sql_print_warning(const char *format, ...)
8773{
8774 va_list args;
8775 DBUG_ENTER("sql_print_warning");
8776
8777 va_start(args, format);
8778 error_log_print(WARNING_LEVEL, format, args);
8779 va_end(args);
8780
8781 DBUG_VOID_RETURN;
8782}
8783
8784
8785void sql_print_information(const char *format, ...)
8786{
8787 va_list args;
8788 DBUG_ENTER("sql_print_information");
8789
8790 va_start(args, format);
8791 sql_print_information_v(format, args);
8792 va_end(args);
8793
8794 DBUG_VOID_RETURN;
8795}
8796
8797void sql_print_information_v(const char *format, va_list ap)
8798{
8799 if (disable_log_notes)
8800 return; // Skip notes during start/shutdown
8801
8802 error_log_print(INFORMATION_LEVEL, format, ap);
8803}
8804
8805void
8806TC_LOG::run_prepare_ordered(THD *thd, bool all)
8807{
8808 Ha_trx_info *ha_info=
8809 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8810
8811 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8812 for (; ha_info; ha_info= ha_info->next())
8813 {
8814 handlerton *ht= ha_info->ht();
8815 if (!ht->prepare_ordered)
8816 continue;
8817 ht->prepare_ordered(ht, thd, all);
8818 }
8819}
8820
8821
8822void
8823TC_LOG::run_commit_ordered(THD *thd, bool all)
8824{
8825 Ha_trx_info *ha_info=
8826 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8827
8828 mysql_mutex_assert_owner(&LOCK_commit_ordered);
8829 for (; ha_info; ha_info= ha_info->next())
8830 {
8831 handlerton *ht= ha_info->ht();
8832 if (!ht->commit_ordered)
8833 continue;
8834 ht->commit_ordered(ht, thd, all);
8835 DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
8836 }
8837}
8838
8839
8840int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
8841 bool need_prepare_ordered,
8842 bool need_commit_ordered)
8843{
8844 int cookie;
8845 struct commit_entry entry;
8846 bool UNINIT_VAR(is_group_commit_leader);
8847
8848 if (need_prepare_ordered)
8849 {
8850 mysql_mutex_lock(&LOCK_prepare_ordered);
8851 run_prepare_ordered(thd, all);
8852 if (need_commit_ordered)
8853 {
8854 /*
8855 Must put us in queue so we can run_commit_ordered() in same sequence
8856 as we did run_prepare_ordered().
8857 */
8858 thd->clear_wakeup_ready();
8859 entry.thd= thd;
8860 commit_entry *previous_queue= commit_ordered_queue;
8861 entry.next= previous_queue;
8862 commit_ordered_queue= &entry;
8863 is_group_commit_leader= (previous_queue == NULL);
8864 }
8865 mysql_mutex_unlock(&LOCK_prepare_ordered);
8866 }
8867
8868 if (thd->wait_for_prior_commit())
8869 return 0;
8870
8871 cookie= 0;
8872 if (xid)
8873 cookie= log_one_transaction(xid);
8874
8875 if (need_commit_ordered)
8876 {
8877 if (need_prepare_ordered)
8878 {
8879 /*
8880 We did the run_prepare_ordered() serialised, then ran the log_xid() in
8881 parallel. Now we have to do run_commit_ordered() serialised in the
8882 same sequence as run_prepare_ordered().
8883
8884 We do this starting from the head of the queue, each thread doing
8885 run_commit_ordered() and signalling the next in queue.
8886 */
8887 if (is_group_commit_leader)
8888 {
8889 /* The first in queue starts the ball rolling. */
8890 mysql_mutex_lock(&LOCK_prepare_ordered);
8891 while (commit_ordered_queue_busy)
8892 mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
8893 commit_entry *queue= commit_ordered_queue;
8894 commit_ordered_queue= NULL;
8895 /*
8896 Mark the queue busy while we bounce it from one thread to the
8897 next.
8898 */
8899 commit_ordered_queue_busy= true;
8900 mysql_mutex_unlock(&LOCK_prepare_ordered);
8901
8902 /* Reverse the queue list so we get correct order. */
8903 commit_entry *prev= NULL;
8904 while (queue)
8905 {
8906 commit_entry *next= queue->next;
8907 queue->next= prev;
8908 prev= queue;
8909 queue= next;
8910 }
8911 DBUG_ASSERT(prev == &entry && prev->thd == thd);
8912 }
8913 else
8914 {
8915 /* Not first in queue; just wait until previous thread wakes us up. */
8916 thd->wait_for_wakeup_ready();
8917 }
8918 }
8919
8920 /* Only run commit_ordered() if log_xid was successful. */
8921 if (cookie)
8922 {
8923 mysql_mutex_lock(&LOCK_commit_ordered);
8924 run_commit_ordered(thd, all);
8925 mysql_mutex_unlock(&LOCK_commit_ordered);
8926 }
8927
8928 if (need_prepare_ordered)
8929 {
8930 commit_entry *next= entry.next;
8931 if (next)
8932 {
8933 next->thd->signal_wakeup_ready();
8934 }
8935 else
8936 {
8937 mysql_mutex_lock(&LOCK_prepare_ordered);
8938 commit_ordered_queue_busy= false;
8939 mysql_cond_signal(&COND_queue_busy);
8940 mysql_mutex_unlock(&LOCK_prepare_ordered);
8941 }
8942 }
8943 }
8944
8945 return cookie;
8946}
8947
8948
8949/********* transaction coordinator log for 2pc - mmap() based solution *******/
8950
8951/*
8952 the log consists of a file, mapped to memory.
8953 file is divided into pages of tc_log_page_size size.
8954 (usable size of the first page is smaller because of the log header)
8955 there is a PAGE control structure for each page
8956 each page (or rather its PAGE control structure) can be in one of
8957 the three states - active, syncing, pool.
8958 there could be only one page in the active or syncing state,
8959 but many in pool - pool is a fifo queue.
8960 the usual lifecycle of a page is pool->active->syncing->pool.
8961 the "active" page is a page where new xid's are logged.
8962 the page stays active as long as the syncing slot is taken.
8963 the "syncing" page is being synced to disk. no new xid can be added to it.
8964 when the syncing is done the page is moved to a pool and an active page
8965 becomes "syncing".
8966
8967 the result of such an architecture is a natural "commit grouping" -
8968 If commits are coming faster than the system can sync, they do not
8969 stall. Instead, all commits that came since the last sync are
8970 logged to the same "active" page, and they all are synced with the next -
8971 one - sync. Thus, thought individual commits are delayed, throughput
8972 is not decreasing.
8973
8974 when an xid is added to an active page, the thread of this xid waits
8975 for a page's condition until the page is synced. when syncing slot
8976 becomes vacant one of these waiters is awaken to take care of syncing.
8977 it syncs the page and signals all waiters that the page is synced.
8978 PAGE::waiters is used to count these waiters, and a page may never
8979 become active again until waiters==0 (that is all waiters from the
8980 previous sync have noticed that the sync was completed)
8981
8982 note, that the page becomes "dirty" and has to be synced only when a
8983 new xid is added into it. Removing a xid from a page does not make it
8984 dirty - we don't sync xid removals to disk.
8985*/
8986
8987ulong tc_log_page_waits= 0;
8988
8989#ifdef HAVE_MMAP
8990
8991#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
8992
8993static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
8994
8995ulong opt_tc_log_size;
8996ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
8997
8998int TC_LOG_MMAP::open(const char *opt_name)
8999{
9000 uint i;
9001 bool crashed=FALSE;
9002 PAGE *pg;
9003
9004 DBUG_ASSERT(total_ha_2pc > 1);
9005 DBUG_ASSERT(opt_name && opt_name[0]);
9006
9007 tc_log_page_size= my_getpagesize();
9008
9009 fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9010 if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR, MYF(0))) < 0)
9011 {
9012 if (my_errno != ENOENT)
9013 goto err;
9014 if (using_heuristic_recover())
9015 return 1;
9016 if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9017 O_RDWR, MYF(MY_WME))) < 0)
9018 goto err;
9019 inited=1;
9020 file_length= opt_tc_log_size;
9021 if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9022 goto err;
9023 }
9024 else
9025 {
9026 inited= 1;
9027 crashed= TRUE;
9028 sql_print_information("Recovering after a crash using %s", opt_name);
9029 if (tc_heuristic_recover)
9030 {
9031 sql_print_error("Cannot perform automatic crash recovery when "
9032 "--tc-heuristic-recover is used");
9033 goto err;
9034 }
9035 file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9036 if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9037 goto err;
9038 }
9039
9040 data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9041 MAP_NOSYNC|MAP_SHARED, fd, 0);
9042 if (data == MAP_FAILED)
9043 {
9044 my_errno=errno;
9045 goto err;
9046 }
9047 inited=2;
9048
9049 npages=(uint)file_length/tc_log_page_size;
9050 if (npages < 3) // to guarantee non-empty pool
9051 goto err;
9052 if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9053 goto err;
9054 inited=3;
9055 for (pg=pages, i=0; i < npages; i++, pg++)
9056 {
9057 pg->next=pg+1;
9058 pg->waiters=0;
9059 pg->state=PS_POOL;
9060 mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9061 mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9062 pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9063 pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9064 pg->end=pg->start + pg->size;
9065 }
9066 pages[0].size=pages[0].free=
9067 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9068 pages[0].start=pages[0].end-pages[0].size;
9069 pages[npages-1].next=0;
9070 inited=4;
9071
9072 if (crashed && recover())
9073 goto err;
9074
9075 memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9076 data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9077 my_msync(fd, data, tc_log_page_size, MS_SYNC);
9078 inited=5;
9079
9080 mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9081 mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9082 mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9083 mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9084 MY_MUTEX_INIT_FAST);
9085 mysql_cond_init(key_COND_active, &COND_active, 0);
9086 mysql_cond_init(key_COND_pool, &COND_pool, 0);
9087 mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9088
9089 inited=6;
9090
9091 syncing= 0;
9092 active=pages;
9093 DBUG_ASSERT(npages >= 2);
9094 pool=pages+1;
9095 pool_last_ptr= &((pages+npages-1)->next);
9096 commit_ordered_queue= NULL;
9097 commit_ordered_queue_busy= false;
9098
9099 return 0;
9100
9101err:
9102 close();
9103 return 1;
9104}
9105
9106/**
9107 there is no active page, let's got one from the pool.
9108
9109 Two strategies here:
9110 -# take the first from the pool
9111 -# if there're waiters - take the one with the most free space.
9112
9113 @todo
9114 page merging. try to allocate adjacent page first,
9115 so that they can be flushed both in one sync
9116*/
9117
9118void TC_LOG_MMAP::get_active_from_pool()
9119{
9120 PAGE **p, **best_p=0;
9121 int best_free;
9122
9123 mysql_mutex_lock(&LOCK_pool);
9124
9125 do
9126 {
9127 best_p= p= &pool;
9128 if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9129 break; // yes - take it.
9130
9131 best_free=0; // no - trying second strategy
9132 for (p=&(*p)->next; *p; p=&(*p)->next)
9133 {
9134 if ((*p)->waiters == 0 && (*p)->free > best_free)
9135 {
9136 best_free=(*p)->free;
9137 best_p=p;
9138 }
9139 }
9140 }
9141 while ((*best_p == 0 || best_free == 0) && overflow());
9142
9143 mysql_mutex_assert_owner(&LOCK_active);
9144 active=*best_p;
9145
9146 /* Unlink the page from the pool. */
9147 if (!(*best_p)->next)
9148 pool_last_ptr= best_p;
9149 *best_p=(*best_p)->next;
9150 mysql_mutex_unlock(&LOCK_pool);
9151
9152 mysql_mutex_lock(&active->lock);
9153 if (active->free == active->size) // we've chosen an empty page
9154 {
9155 tc_log_cur_pages_used++;
9156 set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9157 }
9158}
9159
9160/**
9161 @todo
9162 perhaps, increase log size ?
9163*/
9164int TC_LOG_MMAP::overflow()
9165{
9166 /*
9167 simple overflow handling - just wait
9168 TODO perhaps, increase log size ?
9169 let's check the behaviour of tc_log_page_waits first
9170 */
9171 tc_log_page_waits++;
9172 mysql_cond_wait(&COND_pool, &LOCK_pool);
9173 return 1; // always return 1
9174}
9175
9176/**
9177 Record that transaction XID is committed on the persistent storage.
9178
9179 This function is called in the middle of two-phase commit:
9180 First all resources prepare the transaction, then tc_log->log() is called,
9181 then all resources commit the transaction, then tc_log->unlog() is called.
9182
9183 All access to active page is serialized but it's not a problem, as
9184 we're assuming that fsync() will be a main bottleneck.
9185 That is, parallelizing writes to log pages we'll decrease number of
9186 threads waiting for a page, but then all these threads will be waiting
9187 for a fsync() anyway
9188
9189 If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9190 records XID in a special Xid_log_event.
9191 If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9192 log.
9193
9194 @retval
9195 0 - error
9196 @retval
9197 \# - otherwise, "cookie", a number that will be passed as an argument
9198 to unlog() call. tc_log can define it any way it wants,
9199 and use for whatever purposes. TC_LOG_MMAP sets it
9200 to the position in memory where xid was logged to.
9201*/
9202
9203int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9204{
9205 int err;
9206 PAGE *p;
9207 ulong cookie;
9208
9209 mysql_mutex_lock(&LOCK_active);
9210
9211 /*
9212 if the active page is full - just wait...
9213 frankly speaking, active->free here accessed outside of mutex
9214 protection, but it's safe, because it only means we may miss an
9215 unlog() for the active page, and we're not waiting for it here -
9216 unlog() does not signal COND_active.
9217 */
9218 while (unlikely(active && active->free == 0))
9219 mysql_cond_wait(&COND_active, &LOCK_active);
9220
9221 /* no active page ? take one from the pool */
9222 if (active == 0)
9223 get_active_from_pool();
9224 else
9225 mysql_mutex_lock(&active->lock);
9226
9227 p=active;
9228
9229 /*
9230 p->free is always > 0 here because to decrease it one needs
9231 to take p->lock and before it one needs to take LOCK_active.
9232 But checked that active->free > 0 under LOCK_active and
9233 haven't release it ever since
9234 */
9235
9236 /* searching for an empty slot */
9237 while (*p->ptr)
9238 {
9239 p->ptr++;
9240 DBUG_ASSERT(p->ptr < p->end); // because p->free > 0
9241 }
9242
9243 /* found! store xid there and mark the page dirty */
9244 cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
9245 *p->ptr++= xid;
9246 p->free--;
9247 p->state= PS_DIRTY;
9248 mysql_mutex_unlock(&p->lock);
9249
9250 mysql_mutex_lock(&LOCK_sync);
9251 if (syncing)
9252 { // somebody's syncing. let's wait
9253 mysql_mutex_unlock(&LOCK_active);
9254 mysql_mutex_lock(&p->lock);
9255 p->waiters++;
9256 while (p->state == PS_DIRTY && syncing)
9257 {
9258 mysql_mutex_unlock(&p->lock);
9259 mysql_cond_wait(&p->cond, &LOCK_sync);
9260 mysql_mutex_lock(&p->lock);
9261 }
9262 p->waiters--;
9263 err= p->state == PS_ERROR;
9264 if (p->state != PS_DIRTY) // page was synced
9265 {
9266 mysql_mutex_unlock(&LOCK_sync);
9267 if (p->waiters == 0)
9268 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9269 mysql_mutex_unlock(&p->lock);
9270 goto done; // we're done
9271 }
9272 DBUG_ASSERT(!syncing);
9273 mysql_mutex_unlock(&p->lock);
9274 syncing = p;
9275 mysql_mutex_unlock(&LOCK_sync);
9276
9277 mysql_mutex_lock(&LOCK_active);
9278 active=0; // page is not active anymore
9279 mysql_cond_broadcast(&COND_active);
9280 mysql_mutex_unlock(&LOCK_active);
9281 }
9282 else
9283 {
9284 syncing = p; // place is vacant - take it
9285 mysql_mutex_unlock(&LOCK_sync);
9286 active = 0; // page is not active anymore
9287 mysql_cond_broadcast(&COND_active);
9288 mysql_mutex_unlock(&LOCK_active);
9289 }
9290 err= sync();
9291
9292done:
9293 return err ? 0 : cookie;
9294}
9295
9296int TC_LOG_MMAP::sync()
9297{
9298 int err;
9299
9300 DBUG_ASSERT(syncing != active);
9301
9302 /*
9303 sit down and relax - this can take a while...
9304 note - no locks are held at this point
9305 */
9306 err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9307
9308 /* page is synced. let's move it to the pool */
9309 mysql_mutex_lock(&LOCK_pool);
9310 (*pool_last_ptr)=syncing;
9311 pool_last_ptr=&(syncing->next);
9312 syncing->next=0;
9313 syncing->state= err ? PS_ERROR : PS_POOL;
9314 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9315 mysql_mutex_unlock(&LOCK_pool);
9316
9317 /* marking 'syncing' slot free */
9318 mysql_mutex_lock(&LOCK_sync);
9319 mysql_cond_broadcast(&syncing->cond); // signal "sync done"
9320 syncing=0;
9321 /*
9322 we check the "active" pointer without LOCK_active. Still, it's safe -
9323 "active" can change from NULL to not NULL any time, but it
9324 will take LOCK_sync before waiting on active->cond. That is, it can never
9325 miss a signal.
9326 And "active" can change to NULL only by the syncing thread
9327 (the thread that will send a signal below)
9328 */
9329 if (active)
9330 mysql_cond_signal(&active->cond); // wake up a new syncer
9331 mysql_mutex_unlock(&LOCK_sync);
9332 return err;
9333}
9334
9335static void
9336mmap_do_checkpoint_callback(void *data)
9337{
9338 TC_LOG_MMAP::pending_cookies *pending=
9339 static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9340 ++pending->pending_count;
9341}
9342
9343int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9344{
9345 pending_cookies *full_buffer= NULL;
9346 uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9347 DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9348
9349 /*
9350 Do not delete the entry immediately, as there may be participating storage
9351 engines which implement commit_checkpoint_request(), and thus have not yet
9352 flushed the commit durably to disk.
9353
9354 Instead put it in a queue - and periodically, we will request a checkpoint
9355 from all engines and delete a whole batch at once.
9356 */
9357 mysql_mutex_lock(&LOCK_pending_checkpoint);
9358 if (pending_checkpoint == NULL)
9359 {
9360 uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9361 if (!(pending_checkpoint=
9362 (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
9363 {
9364 my_error(ER_OUTOFMEMORY, MYF(0), size);
9365 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9366 return 1;
9367 }
9368 }
9369
9370 pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9371 if (pending_checkpoint->count == ncookies)
9372 {
9373 full_buffer= pending_checkpoint;
9374 pending_checkpoint= NULL;
9375 }
9376 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9377
9378 if (full_buffer)
9379 {
9380 /*
9381 We do an extra increment and notify here - this ensures that
9382 things work also if there are no engines at all that support
9383 commit_checkpoint_request.
9384 */
9385 ++full_buffer->pending_count;
9386 ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9387 commit_checkpoint_notify(full_buffer);
9388 }
9389 return 0;
9390}
9391
9392
9393void
9394TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9395{
9396 uint count;
9397 pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9398 mysql_mutex_lock(&LOCK_pending_checkpoint);
9399 DBUG_ASSERT(pending->pending_count > 0);
9400 count= --pending->pending_count;
9401 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9402 if (count == 0)
9403 {
9404 uint i;
9405 for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9406 delete_entry(pending->cookies[i]);
9407 my_free(pending);
9408 }
9409}
9410
9411
9412/**
9413 erase xid from the page, update page free space counters/pointers.
9414 cookie points directly to the memory where xid was logged.
9415*/
9416
9417int TC_LOG_MMAP::delete_entry(ulong cookie)
9418{
9419 PAGE *p=pages+(cookie/tc_log_page_size);
9420 my_xid *x=(my_xid *)(data+cookie);
9421
9422 DBUG_ASSERT(x >= p->start && x < p->end);
9423
9424 mysql_mutex_lock(&p->lock);
9425 *x=0;
9426 p->free++;
9427 DBUG_ASSERT(p->free <= p->size);
9428 set_if_smaller(p->ptr, x);
9429 if (p->free == p->size) // the page is completely empty
9430 statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9431 if (p->waiters == 0) // the page is in pool and ready to rock
9432 mysql_cond_signal(&COND_pool); // ping ... for overflow()
9433 mysql_mutex_unlock(&p->lock);
9434 return 0;
9435}
9436
9437void TC_LOG_MMAP::close()
9438{
9439 uint i;
9440 switch (inited) {
9441 case 6:
9442 mysql_mutex_destroy(&LOCK_sync);
9443 mysql_mutex_destroy(&LOCK_active);
9444 mysql_mutex_destroy(&LOCK_pool);
9445 mysql_mutex_destroy(&LOCK_pending_checkpoint);
9446 mysql_cond_destroy(&COND_pool);
9447 mysql_cond_destroy(&COND_active);
9448 mysql_cond_destroy(&COND_queue_busy);
9449 /* fall through */
9450 case 5:
9451 data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9452 /* fall through */
9453 case 4:
9454 for (i=0; i < npages; i++)
9455 {
9456 if (pages[i].ptr == 0)
9457 break;
9458 mysql_mutex_destroy(&pages[i].lock);
9459 mysql_cond_destroy(&pages[i].cond);
9460 }
9461 /* fall through */
9462 case 3:
9463 my_free(pages);
9464 /* fall through */
9465 case 2:
9466 my_munmap((char*)data, (size_t)file_length);
9467 /* fall through */
9468 case 1:
9469 mysql_file_close(fd, MYF(0));
9470 }
9471 if (inited>=5) // cannot do in the switch because of Windows
9472 mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9473 if (pending_checkpoint)
9474 my_free(pending_checkpoint);
9475 inited=0;
9476}
9477
9478
9479int TC_LOG_MMAP::recover()
9480{
9481 HASH xids;
9482 PAGE *p=pages, *end_p=pages+npages;
9483
9484 if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9485 {
9486 sql_print_error("Bad magic header in tc log");
9487 goto err1;
9488 }
9489
9490 /*
9491 the first byte after magic signature is set to current
9492 number of storage engines on startup
9493 */
9494 if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9495 {
9496 sql_print_error("Recovery failed! You must enable "
9497 "all engines that were enabled at the moment of the crash");
9498 goto err1;
9499 }
9500
9501 if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
9502 sizeof(my_xid), 0, 0, MYF(0)))
9503 goto err1;
9504
9505 for ( ; p < end_p ; p++)
9506 {
9507 for (my_xid *x=p->start; x < p->end; x++)
9508 if (*x && my_hash_insert(&xids, (uchar *)x))
9509 goto err2; // OOM
9510 }
9511
9512 if (ha_recover(&xids))
9513 goto err2;
9514
9515 my_hash_free(&xids);
9516 bzero(data, (size_t)file_length);
9517 return 0;
9518
9519err2:
9520 my_hash_free(&xids);
9521err1:
9522 sql_print_error("Crash recovery failed. Either correct the problem "
9523 "(if it's, for example, out of memory error) and restart, "
9524 "or delete tc log and start mysqld with "
9525 "--tc-heuristic-recover={commit|rollback}");
9526 return 1;
9527}
9528#endif
9529
9530TC_LOG *tc_log;
9531TC_LOG_DUMMY tc_log_dummy;
9532TC_LOG_MMAP tc_log_mmap;
9533
9534/**
9535 Perform heuristic recovery, if --tc-heuristic-recover was used.
9536
9537 @note
9538 no matter whether heuristic recovery was successful or not
9539 mysqld must exit. So, return value is the same in both cases.
9540
9541 @retval
9542 0 no heuristic recovery was requested
9543 @retval
9544 1 heuristic recovery was performed
9545*/
9546
9547int TC_LOG::using_heuristic_recover()
9548{
9549 if (!tc_heuristic_recover)
9550 return 0;
9551
9552 sql_print_information("Heuristic crash recovery mode");
9553 if (ha_recover(0))
9554 sql_print_error("Heuristic crash recovery failed");
9555 sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9556 return 1;
9557}
9558
9559/****** transaction coordinator log for 2pc - binlog() based solution ******/
9560#define TC_LOG_BINLOG MYSQL_BIN_LOG
9561
9562int TC_LOG_BINLOG::open(const char *opt_name)
9563{
9564 int error= 1;
9565
9566 DBUG_ASSERT(total_ha_2pc > 1);
9567 DBUG_ASSERT(opt_name && opt_name[0]);
9568
9569 if (!my_b_inited(&index_file))
9570 {
9571 /* There was a failure to open the index file, can't open the binlog */
9572 cleanup();
9573 return 1;
9574 }
9575
9576 if (using_heuristic_recover())
9577 {
9578 mysql_mutex_lock(&LOCK_log);
9579 /* generate a new binlog to mask a corrupted one */
9580 open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9581 mysql_mutex_unlock(&LOCK_log);
9582 cleanup();
9583 return 1;
9584 }
9585
9586 error= do_binlog_recovery(opt_name, true);
9587 binlog_state_recover_done= true;
9588 return error;
9589}
9590
9591/** This is called on shutdown, after ha_panic. */
9592void TC_LOG_BINLOG::close()
9593{
9594}
9595
9596/*
9597 Do a binlog log_xid() for a group of transactions, linked through
9598 thd->next_commit_ordered.
9599*/
9600int
9601TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
9602 bool need_prepare_ordered __attribute__((unused)),
9603 bool need_commit_ordered __attribute__((unused)))
9604{
9605 int err;
9606 DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
9607
9608 binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
9609 if (!cache_mngr)
9610 {
9611 WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
9612 DBUG_RETURN(0);
9613 }
9614
9615 cache_mngr->using_xa= TRUE;
9616 cache_mngr->xa_xid= xid;
9617 err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
9618
9619 DEBUG_SYNC(thd, "binlog_after_log_and_order");
9620
9621 if (err)
9622 DBUG_RETURN(0);
9623
9624 bool need_unlog= cache_mngr->need_unlog;
9625 /*
9626 The transaction won't need the flag anymore.
9627 Todo/fixme: consider to move the statement into cache_mngr->reset()
9628 relocated to the current or later point.
9629 */
9630 cache_mngr->need_unlog= false;
9631 /*
9632 If using explicit user XA, we will not have XID. We must still return a
9633 non-zero cookie (as zero cookie signals error).
9634 */
9635 if (!xid || !need_unlog)
9636 DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
9637 else
9638 DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
9639 cache_mngr->delayed_error));
9640}
9641
9642/*
9643 After an XID is logged, we need to hold on to the current binlog file until
9644 it is fully committed in the storage engine. The reason is that crash
9645 recovery only looks at the latest binlog, so we must make sure there are no
9646 outstanding prepared (but not committed) transactions before rotating the
9647 binlog.
9648
9649 To handle this, we keep a count of outstanding XIDs. This function is used
9650 to increase this count when committing one or more transactions to the
9651 binary log.
9652*/
9653void
9654TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
9655{
9656 xid_count_per_binlog *b;
9657
9658 DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
9659 DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
9660
9661 mysql_mutex_lock(&LOCK_xid_list);
9662 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9663 while ((b= it++))
9664 {
9665 if (b->binlog_id == binlog_id)
9666 {
9667 b->xid_count += xid_count;
9668 break;
9669 }
9670 }
9671 /*
9672 As we do not delete elements until count reach zero, elements should always
9673 be found.
9674 */
9675 DBUG_ASSERT(b);
9676 mysql_mutex_unlock(&LOCK_xid_list);
9677 DBUG_VOID_RETURN;
9678}
9679
9680/*
9681 Once an XID is committed, it can no longer be needed during crash recovery,
9682 as it has been durably recorded on disk as "committed".
9683
9684 This function is called to mark an XID this way. It needs to decrease the
9685 count of pending XIDs in the corresponding binlog. When the count reaches
9686 zero (for an "old" binlog that is not the active one), that binlog file no
9687 longer need to be scanned during crash recovery, so we can log a new binlog
9688 checkpoint.
9689*/
9690void
9691TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
9692{
9693 xid_count_per_binlog *b;
9694 bool first;
9695 ulong current;
9696
9697 DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
9698
9699 mysql_mutex_lock(&LOCK_xid_list);
9700 current= current_binlog_id;
9701 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9702 first= true;
9703 while ((b= it++))
9704 {
9705 if (b->binlog_id == binlog_id)
9706 {
9707 --b->xid_count;
9708
9709 DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
9710
9711 break;
9712 }
9713 first= false;
9714 }
9715 /* Binlog is always found, as we do not remove until count reaches 0 */
9716 DBUG_ASSERT(b);
9717 /*
9718 If a RESET MASTER is pending, we are about to remove all log files, and
9719 the RESET MASTER thread is waiting for all pending unlog() calls to
9720 complete while holding LOCK_log. In this case we should not log a binlog
9721 checkpoint event (it would be deleted immediately anyway and we would
9722 deadlock on LOCK_log) but just signal the thread.
9723 */
9724 if (unlikely(reset_master_pending))
9725 {
9726 mysql_cond_broadcast(&COND_xid_list);
9727 mysql_mutex_unlock(&LOCK_xid_list);
9728 DBUG_VOID_RETURN;
9729 }
9730
9731 if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
9732 !write_checkpoint)
9733 {
9734 /* No new binlog checkpoint reached yet. */
9735 mysql_mutex_unlock(&LOCK_xid_list);
9736 DBUG_VOID_RETURN;
9737 }
9738
9739 /*
9740 Now log a binlog checkpoint for the first binlog file with a non-zero count.
9741
9742 Note that it is possible (though perhaps unlikely) that when count of
9743 binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
9744 need to skip several entries before we find the one to log in the binlog
9745 checkpoint event.
9746
9747 We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
9748 Binlog_checkpoint_events are logged in order. This simplifies recovery a
9749 bit, as it can just take the last binlog checkpoint in the log, rather
9750 than compare all found against each other to find the one pointing to the
9751 most recent binlog.
9752
9753 Note also that we need to first release LOCK_xid_list, then aquire
9754 LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
9755 holding LOCK_xid_list, we might deadlock with other threads that take the
9756 locks in the opposite order.
9757 */
9758
9759 ++mark_xid_done_waiting;
9760 mysql_mutex_unlock(&LOCK_xid_list);
9761 mysql_mutex_lock(&LOCK_log);
9762 mysql_mutex_lock(&LOCK_xid_list);
9763 --mark_xid_done_waiting;
9764 mysql_cond_broadcast(&COND_xid_list);
9765 /* We need to reload current_binlog_id due to release/re-take of lock. */
9766 current= current_binlog_id;
9767
9768 for (;;)
9769 {
9770 /* Remove initial element(s) with zero count. */
9771 b= binlog_xid_count_list.head();
9772 /*
9773 We must not remove all elements in the list - the entry for the current
9774 binlog must be present always.
9775 */
9776 DBUG_ASSERT(b);
9777 if (b->binlog_id == current || b->xid_count > 0)
9778 break;
9779 WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
9780 "xid_list_entry for %s (%lu)", b);
9781 my_free(binlog_xid_count_list.get());
9782 }
9783
9784 mysql_mutex_unlock(&LOCK_xid_list);
9785 write_binlog_checkpoint_event_already_locked(b->binlog_name,
9786 b->binlog_name_len);
9787 mysql_mutex_unlock(&LOCK_log);
9788 DBUG_VOID_RETURN;
9789}
9790
9791int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
9792{
9793 DBUG_ENTER("TC_LOG_BINLOG::unlog");
9794 if (!xid)
9795 DBUG_RETURN(0);
9796
9797 if (!BINLOG_COOKIE_IS_DUMMY(cookie))
9798 mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
9799 /*
9800 See comment in trx_group_commit_leader() - if rotate() gave a failure,
9801 we delay the return of error code to here.
9802 */
9803 DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
9804}
9805
9806void
9807TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
9808{
9809 xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
9810 bool found_entry= false;
9811 mysql_mutex_lock(&LOCK_binlog_background_thread);
9812 /* count the same notification kind from different engines */
9813 for (xid_count_per_binlog *link= binlog_background_thread_queue;
9814 link && !found_entry; link= link->next_in_queue)
9815 {
9816 if ((found_entry= (entry == link)))
9817 entry->notify_count++;
9818 }
9819 if (!found_entry)
9820 {
9821 entry->next_in_queue= binlog_background_thread_queue;
9822 binlog_background_thread_queue= entry;
9823 }
9824 mysql_cond_signal(&COND_binlog_background_thread);
9825 mysql_mutex_unlock(&LOCK_binlog_background_thread);
9826}
9827
9828/*
9829 Binlog background thread.
9830
9831 This thread is used to log binlog checkpoints in the background, rather than
9832 in the context of random storage engine threads that happen to call
9833 commit_checkpoint_notify_ha() and may not like the delays while syncing
9834 binlog to disk or may not be setup with all my_thread_init() and other
9835 necessary stuff.
9836
9837 In the future, this thread could also be used to do log rotation in the
9838 background, which could elimiate all stalls around binlog rotations.
9839*/
9840pthread_handler_t
9841binlog_background_thread(void *arg __attribute__((unused)))
9842{
9843 bool stop;
9844 MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
9845 THD *thd;
9846 my_thread_init();
9847 DBUG_ENTER("binlog_background_thread");
9848
9849 thd= new THD(next_thread_id());
9850 thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
9851 thd->thread_stack= (char*) &thd; /* Set approximate stack start */
9852 thd->store_globals();
9853 thd->security_ctx->skip_grants();
9854 thd->set_command(COM_DAEMON);
9855
9856 /*
9857 Load the slave replication GTID state from the mysql.gtid_slave_pos
9858 table.
9859
9860 This is mostly so that we can start our seq_no counter from the highest
9861 seq_no seen by a slave. This way, we have a way to tell if a transaction
9862 logged by ourselves as master is newer or older than a replicated
9863 transaction.
9864 */
9865#ifdef HAVE_REPLICATION
9866 if (rpl_load_gtid_slave_state(thd))
9867 sql_print_warning("Failed to load slave replication state from table "
9868 "%s.%s: %u: %s", "mysql",
9869 rpl_gtid_slave_state_table_name.str,
9870 thd->get_stmt_da()->sql_errno(),
9871 thd->get_stmt_da()->message());
9872#endif
9873
9874 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9875 binlog_background_thread_started= true;
9876 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
9877 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9878
9879 for (;;)
9880 {
9881 /*
9882 Wait until there is something in the queue to process, or we are asked
9883 to shut down.
9884 */
9885 THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
9886 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9887 for (;;)
9888 {
9889 stop= binlog_background_thread_stop;
9890 queue= binlog_background_thread_queue;
9891 if (stop && !mysql_bin_log.is_xidlist_idle())
9892 {
9893 /*
9894 Delay stop until all pending binlog checkpoints have been processed.
9895 */
9896 stop= false;
9897 }
9898 if (stop || queue)
9899 break;
9900 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
9901 &mysql_bin_log.LOCK_binlog_background_thread);
9902 }
9903 /* Grab the queue, if any. */
9904 binlog_background_thread_queue= NULL;
9905 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9906
9907 /* Process any incoming commit_checkpoint_notify() calls. */
9908 DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
9909 DBUG_ASSERT(!debug_sync_set_action(
9910 thd,
9911 STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
9912 "SIGNAL injected_binlog_background_thread "
9913 "WAIT_FOR something_that_will_never_happen "
9914 "TIMEOUT 2")));
9915 );
9916 while (queue)
9917 {
9918 long count= queue->notify_count;
9919 THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
9920 DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
9921 /* Set the thread start time */
9922 thd->set_time();
9923 /* Grab next pointer first, as mark_xid_done() may free the element. */
9924 next= queue->next_in_queue;
9925 queue->notify_count= 0;
9926 for (long i= 0; i <= count; i++)
9927 mysql_bin_log.mark_xid_done(queue->binlog_id, true);
9928 queue= next;
9929
9930 DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
9931 DBUG_ASSERT(!debug_sync_set_action(
9932 thd,
9933 STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
9934 );
9935 }
9936
9937 if (stop)
9938 break;
9939 }
9940
9941 THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
9942
9943 /* No need to use mutex as thd is not linked into other threads */
9944 delete thd;
9945
9946 my_thread_end();
9947
9948 /* Signal that we are (almost) stopped. */
9949 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9950 binlog_background_thread_stop= false;
9951 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
9952 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9953
9954 DBUG_RETURN(0);
9955}
9956
9957#ifdef HAVE_PSI_INTERFACE
9958static PSI_thread_key key_thread_binlog;
9959
9960static PSI_thread_info all_binlog_threads[]=
9961{
9962 { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
9963};
9964#endif /* HAVE_PSI_INTERFACE */
9965
9966static bool
9967start_binlog_background_thread()
9968{
9969 pthread_t th;
9970
9971#ifdef HAVE_PSI_INTERFACE
9972 if (PSI_server)
9973 PSI_server->register_thread("sql", all_binlog_threads,
9974 array_elements(all_binlog_threads));
9975#endif
9976
9977 if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
9978 binlog_background_thread, NULL))
9979 return 1;
9980
9981 /*
9982 Wait for the thread to have started (so we know that the slave replication
9983 state is loaded and we have correct global_gtid_counter).
9984 */
9985 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9986 while (!binlog_background_thread_started)
9987 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
9988 &mysql_bin_log.LOCK_binlog_background_thread);
9989 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9990
9991 return 0;
9992}
9993
9994
9995int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
9996 IO_CACHE *first_log,
9997 Format_description_log_event *fdle, bool do_xa)
9998{
9999 Log_event *ev= NULL;
10000 HASH xids;
10001 MEM_ROOT mem_root;
10002 char binlog_checkpoint_name[FN_REFLEN];
10003 bool binlog_checkpoint_found;
10004 bool first_round;
10005 IO_CACHE log;
10006 File file= -1;
10007 const char *errmsg;
10008#ifdef HAVE_REPLICATION
10009 rpl_gtid last_gtid;
10010 bool last_gtid_standalone= false;
10011 bool last_gtid_valid= false;
10012#endif
10013
10014 if (! fdle->is_valid() ||
10015 (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10016 sizeof(my_xid), 0, 0, MYF(0))))
10017 goto err1;
10018
10019 if (do_xa)
10020 init_alloc_root(&mem_root, "TC_LOG_BINLOG", TC_LOG_PAGE_SIZE,
10021 TC_LOG_PAGE_SIZE, MYF(0));
10022
10023 fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10024
10025 /*
10026 Scan the binlog for XIDs that need to be committed if still in the
10027 prepared stage.
10028
10029 Start with the latest binlog file, then continue with any other binlog
10030 files if the last found binlog checkpoint indicates it is needed.
10031 */
10032
10033 binlog_checkpoint_found= false;
10034 first_round= true;
10035 for (;;)
10036 {
10037 while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10038 fdle, opt_master_verify_checksum))
10039 && ev->is_valid())
10040 {
10041 enum Log_event_type typ= ev->get_type_code();
10042 switch (typ)
10043 {
10044 case XID_EVENT:
10045 {
10046 if (do_xa)
10047 {
10048 Xid_log_event *xev=(Xid_log_event *)ev;
10049 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10050 sizeof(xev->xid));
10051 if (!x || my_hash_insert(&xids, x))
10052 goto err2;
10053 }
10054 break;
10055 }
10056 case BINLOG_CHECKPOINT_EVENT:
10057 if (first_round && do_xa)
10058 {
10059 size_t dir_len;
10060 Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10061 if (cev->binlog_file_len >= FN_REFLEN)
10062 sql_print_warning("Incorrect binlog checkpoint event with too "
10063 "long file name found.");
10064 else
10065 {
10066 /*
10067 Note that we cannot use make_log_name() here, as we have not yet
10068 initialised MYSQL_BIN_LOG::log_file_name.
10069 */
10070 dir_len= dirname_length(last_log_name);
10071 strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10072 cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10073 binlog_checkpoint_found= true;
10074 }
10075 }
10076 break;
10077 case GTID_LIST_EVENT:
10078 if (first_round)
10079 {
10080 Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10081
10082 /* Initialise the binlog state from the Gtid_list event. */
10083 if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10084 goto err2;
10085 }
10086 break;
10087
10088#ifdef HAVE_REPLICATION
10089 case GTID_EVENT:
10090 if (first_round)
10091 {
10092 Gtid_log_event *gev= (Gtid_log_event *)ev;
10093
10094 /* Update the binlog state with any GTID logged after Gtid_list. */
10095 last_gtid.domain_id= gev->domain_id;
10096 last_gtid.server_id= gev->server_id;
10097 last_gtid.seq_no= gev->seq_no;
10098 last_gtid_standalone=
10099 ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10100 last_gtid_valid= true;
10101 }
10102 break;
10103#endif
10104
10105 case START_ENCRYPTION_EVENT:
10106 {
10107 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10108 goto err2;
10109 }
10110 break;
10111
10112 default:
10113 /* Nothing. */
10114 break;
10115 }
10116
10117#ifdef HAVE_REPLICATION
10118 if (last_gtid_valid &&
10119 ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10120 (!last_gtid_standalone &&
10121 (typ == XID_EVENT ||
10122 (LOG_EVENT_IS_QUERY(typ) &&
10123 (((Query_log_event *)ev)->is_commit() ||
10124 ((Query_log_event *)ev)->is_rollback()))))))
10125 {
10126 if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10127 goto err2;
10128 last_gtid_valid= false;
10129 }
10130#endif
10131
10132 delete ev;
10133 ev= NULL;
10134 }
10135
10136 if (!do_xa)
10137 break;
10138 /*
10139 If the last binlog checkpoint event points to an older log, we have to
10140 scan all logs from there also, to get all possible XIDs to recover.
10141
10142 If there was no binlog checkpoint event at all, this means the log was
10143 written by an older version of MariaDB (or MySQL) - these always have an
10144 (implicit) binlog checkpoint event at the start of the last binlog file.
10145 */
10146 if (first_round)
10147 {
10148 if (!binlog_checkpoint_found)
10149 break;
10150 first_round= false;
10151 DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10152 if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10153 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10154 DBUG_SUICIDE();
10155 );
10156 if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10157 {
10158 sql_print_error("Binlog file '%s' not found in binlog index, needed "
10159 "for recovery. Aborting.", binlog_checkpoint_name);
10160 goto err2;
10161 }
10162 }
10163 else
10164 {
10165 end_io_cache(&log);
10166 mysql_file_close(file, MYF(MY_WME));
10167 file= -1;
10168 }
10169
10170 if (!strcmp(linfo->log_file_name, last_log_name))
10171 break; // No more files to do
10172 if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10173 {
10174 sql_print_error("%s", errmsg);
10175 goto err2;
10176 }
10177 /*
10178 We do not need to read the Format_description_log_event of other binlog
10179 files. It is not possible for a binlog checkpoint to span multiple
10180 binlog files written by different versions of the server. So we can use
10181 the first one read for reading from all binlog files.
10182 */
10183 if (find_next_log(linfo, 1))
10184 {
10185 sql_print_error("Error reading binlog files during recovery. Aborting.");
10186 goto err2;
10187 }
10188 fdle->reset_crypto();
10189 }
10190
10191 if (do_xa)
10192 {
10193 if (ha_recover(&xids))
10194 goto err2;
10195
10196 free_root(&mem_root, MYF(0));
10197 my_hash_free(&xids);
10198 }
10199 return 0;
10200
10201err2:
10202 delete ev;
10203 if (file >= 0)
10204 {
10205 end_io_cache(&log);
10206 mysql_file_close(file, MYF(MY_WME));
10207 }
10208 if (do_xa)
10209 {
10210 free_root(&mem_root, MYF(0));
10211 my_hash_free(&xids);
10212 }
10213err1:
10214 sql_print_error("Crash recovery failed. Either correct the problem "
10215 "(if it's, for example, out of memory error) and restart, "
10216 "or delete (or rename) binary log and start mysqld with "
10217 "--tc-heuristic-recover={commit|rollback}");
10218 return 1;
10219}
10220
10221
10222int
10223MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10224{
10225 LOG_INFO log_info;
10226 const char *errmsg;
10227 IO_CACHE log;
10228 File file;
10229 Log_event *ev= 0;
10230 Format_description_log_event fdle(BINLOG_VERSION);
10231 char log_name[FN_REFLEN];
10232 int error;
10233
10234 if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10235 {
10236 /*
10237 If there are no binlog files (LOG_INFO_EOF), then we still try to read
10238 the .state file to restore the binlog state. This allows to copy a server
10239 to provision a new one without copying the binlog files (except the
10240 master-bin.state file) and still preserve the correct binlog state.
10241 */
10242 if (error != LOG_INFO_EOF)
10243 sql_print_error("find_log_pos() failed (error: %d)", error);
10244 else
10245 {
10246 error= read_state_from_file();
10247 if (error == 2)
10248 {
10249 /*
10250 No binlog files and no binlog state is not an error (eg. just initial
10251 server start after fresh installation).
10252 */
10253 error= 0;
10254 }
10255 }
10256 return error;
10257 }
10258
10259 if (! fdle.is_valid())
10260 return 1;
10261
10262 do
10263 {
10264 strmake_buf(log_name, log_info.log_file_name);
10265 } while (!(error= find_next_log(&log_info, 1)));
10266
10267 if (error != LOG_INFO_EOF)
10268 {
10269 sql_print_error("find_log_pos() failed (error: %d)", error);
10270 return error;
10271 }
10272
10273 if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10274 {
10275 sql_print_error("%s", errmsg);
10276 return 1;
10277 }
10278
10279 if ((ev= Log_event::read_log_event(&log, &fdle,
10280 opt_master_verify_checksum)) &&
10281 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10282 {
10283 if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10284 {
10285 sql_print_information("Recovering after a crash using %s", opt_name);
10286 error= recover(&log_info, log_name, &log,
10287 (Format_description_log_event *)ev, do_xa_recovery);
10288 }
10289 else
10290 {
10291 error= read_state_from_file();
10292 if (unlikely(error == 2))
10293 {
10294 /*
10295 The binlog exists, but the .state file is missing. This is normal if
10296 this is the first master start after a major upgrade to 10.0 (with
10297 GTID support).
10298
10299 However, it could also be that the .state file was lost somehow, and
10300 in this case it could be a serious issue, as we would set the wrong
10301 binlog state in the next binlog file to be created, and GTID
10302 processing would be corrupted. A common way would be copying files
10303 from an old server to a new one and forgetting the .state file.
10304
10305 So in this case, we want to try to recover the binlog state by
10306 scanning the last binlog file (but we do not need any XA recovery).
10307
10308 ToDo: We could avoid one scan at first start after major upgrade, by
10309 detecting that there is no GTID_LIST event at the start of the
10310 binlog file, and stopping the scan in that case.
10311 */
10312 error= recover(&log_info, log_name, &log,
10313 (Format_description_log_event *)ev, false);
10314 }
10315 }
10316 }
10317
10318 delete ev;
10319 end_io_cache(&log);
10320 mysql_file_close(file, MYF(MY_WME));
10321
10322 return error;
10323}
10324
10325
10326#ifdef INNODB_COMPATIBILITY_HOOKS
10327/**
10328 Get the file name of the MySQL binlog.
10329 @return the name of the binlog file
10330*/
10331extern "C"
10332const char* mysql_bin_log_file_name(void)
10333{
10334 return mysql_bin_log.get_log_fname();
10335}
10336/**
10337 Get the current position of the MySQL binlog.
10338 @return byte offset from the beginning of the binlog
10339*/
10340extern "C"
10341ulonglong mysql_bin_log_file_pos(void)
10342{
10343 return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
10344}
10345/*
10346 Get the current position of the MySQL binlog for transaction currently being
10347 committed.
10348
10349 This is valid to call from within storage engine commit_ordered() and
10350 commit() methods only.
10351
10352 Since it stores the position inside THD, it is safe to call without any
10353 locking.
10354*/
10355void
10356mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10357{
10358 binlog_cache_mngr *cache_mngr;
10359 if (opt_bin_log &&
10360 (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10361 {
10362 *out_file= cache_mngr->last_commit_pos_file;
10363 *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10364 }
10365 else
10366 {
10367 *out_file= NULL;
10368 *out_pos= 0;
10369 }
10370}
10371#endif /* INNODB_COMPATIBILITY_HOOKS */
10372
10373
10374static void
10375binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10376 void *var_ptr, const void *save)
10377{
10378 ulong value= *((ulong *)save);
10379 bool check_purge= false;
10380 ulong UNINIT_VAR(prev_binlog_id);
10381
10382 mysql_mutex_lock(mysql_bin_log.get_log_lock());
10383 if(mysql_bin_log.is_open())
10384 {
10385 prev_binlog_id= mysql_bin_log.current_binlog_id;
10386 if (binlog_checksum_options != value)
10387 mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10388 if (mysql_bin_log.rotate(true, &check_purge))
10389 check_purge= false;
10390 }
10391 else
10392 {
10393 binlog_checksum_options= value;
10394 }
10395 DBUG_ASSERT(binlog_checksum_options == value);
10396 mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10397 mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10398 if (check_purge)
10399 mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10400}
10401
10402
10403static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
10404{
10405 mysql_bin_log.set_status_variables(thd);
10406 var->type= SHOW_ARRAY;
10407 var->value= (char *)&binlog_status_vars_detail;
10408 return 0;
10409}
10410
10411static SHOW_VAR binlog_status_vars_top[]= {
10412 {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10413 {NullS, NullS, SHOW_LONG}
10414};
10415
10416static MYSQL_SYSVAR_BOOL(
10417 optimize_thread_scheduling,
10418 opt_optimize_thread_scheduling,
10419 PLUGIN_VAR_READONLY,
10420 "Run fast part of group commit in a single thread, to optimize kernel "
10421 "thread scheduling. On by default. Disable to run each transaction in group "
10422 "commit in its own thread, which can be slower at very high concurrency. "
10423 "This option is mostly for testing one algorithm versus the other, and it "
10424 "should not normally be necessary to change it.",
10425 NULL,
10426 NULL,
10427 1);
10428
10429static MYSQL_SYSVAR_ENUM(
10430 checksum,
10431 binlog_checksum_options,
10432 PLUGIN_VAR_RQCMDARG,
10433 "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10434 "log events in the binary log",
10435 NULL,
10436 binlog_checksum_update,
10437 BINLOG_CHECKSUM_ALG_CRC32,
10438 &binlog_checksum_typelib);
10439
10440static struct st_mysql_sys_var *binlog_sys_vars[]=
10441{
10442 MYSQL_SYSVAR(optimize_thread_scheduling),
10443 MYSQL_SYSVAR(checksum),
10444 NULL
10445};
10446
10447
10448/*
10449 Copy out the non-directory part of binlog position filename for the
10450 `binlog_snapshot_file' status variable, same way as it is done for
10451 SHOW MASTER STATUS.
10452*/
10453static void
10454set_binlog_snapshot_file(const char *src)
10455{
10456 size_t dir_len = dirname_length(src);
10457 strmake_buf(binlog_snapshot_file, src + dir_len);
10458}
10459
10460/*
10461 Copy out current values of status variables, for SHOW STATUS or
10462 information_schema.global_status.
10463
10464 This is called only under LOCK_show_status, so we can fill in a static array.
10465*/
10466void
10467TC_LOG_BINLOG::set_status_variables(THD *thd)
10468{
10469 binlog_cache_mngr *cache_mngr;
10470
10471 if (thd && opt_bin_log)
10472 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10473 else
10474 cache_mngr= 0;
10475
10476 bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10477 mysql_mutex_lock(&LOCK_commit_ordered);
10478 binlog_status_var_num_commits= this->num_commits;
10479 binlog_status_var_num_group_commits= this->num_group_commits;
10480 if (!have_snapshot)
10481 {
10482 set_binlog_snapshot_file(last_commit_pos_file);
10483 binlog_snapshot_position= last_commit_pos_offset;
10484 }
10485 mysql_mutex_unlock(&LOCK_commit_ordered);
10486 mysql_mutex_lock(&LOCK_prepare_ordered);
10487 binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10488 binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10489 binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10490 mysql_mutex_unlock(&LOCK_prepare_ordered);
10491
10492 if (have_snapshot)
10493 {
10494 set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10495 binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10496 }
10497}
10498
10499
10500/*
10501 Find the Gtid_list_log_event at the start of a binlog.
10502
10503 NULL for ok, non-NULL error message for error.
10504
10505 If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10506 get back to binlogs written by old server version without GTID support. If
10507 so, it means we have reached the point to start from, as no GTID events can
10508 exist in earlier binlogs.
10509*/
10510const char *
10511get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10512{
10513 Format_description_log_event init_fdle(BINLOG_VERSION);
10514 Format_description_log_event *fdle;
10515 Log_event *ev;
10516 const char *errormsg = NULL;
10517
10518 *out_gtid_list= NULL;
10519
10520 if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10521 opt_master_verify_checksum)) ||
10522 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10523 {
10524 if (ev)
10525 delete ev;
10526 return "Could not read format description log event while looking for "
10527 "GTID position in binlog";
10528 }
10529
10530 fdle= static_cast<Format_description_log_event *>(ev);
10531
10532 for (;;)
10533 {
10534 Log_event_type typ;
10535
10536 ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10537 if (!ev)
10538 {
10539 errormsg= "Could not read GTID list event while looking for GTID "
10540 "position in binlog";
10541 break;
10542 }
10543 typ= ev->get_type_code();
10544 if (typ == GTID_LIST_EVENT)
10545 break; /* Done, found it */
10546 if (typ == START_ENCRYPTION_EVENT)
10547 {
10548 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10549 errormsg= "Could not set up decryption for binlog.";
10550 }
10551 delete ev;
10552 if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10553 typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10554 continue; /* Continue looking */
10555
10556 /* We did not find any Gtid_list_log_event, must be old binlog. */
10557 ev= NULL;
10558 break;
10559 }
10560
10561 delete fdle;
10562 *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10563 return errormsg;
10564}
10565
10566
10567struct st_mysql_storage_engine binlog_storage_engine=
10568{ MYSQL_HANDLERTON_INTERFACE_VERSION };
10569
10570maria_declare_plugin(binlog)
10571{
10572 MYSQL_STORAGE_ENGINE_PLUGIN,
10573 &binlog_storage_engine,
10574 "binlog",
10575 "MySQL AB",
10576 "This is a pseudo storage engine to represent the binlog in a transaction",
10577 PLUGIN_LICENSE_GPL,
10578 binlog_init, /* Plugin Init */
10579 NULL, /* Plugin Deinit */
10580 0x0100 /* 1.0 */,
10581 binlog_status_vars_top, /* status variables */
10582 binlog_sys_vars, /* system variables */
10583 "1.0", /* string version */
10584 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
10585}
10586maria_declare_plugin_end;
10587
10588#ifdef WITH_WSREP
10589IO_CACHE * get_trans_log(THD * thd)
10590{
10591 DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
10592 binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
10593 thd_get_ha_data(thd, binlog_hton);
10594 if (cache_mngr)
10595 return cache_mngr->get_binlog_cache_log(true);
10596
10597 WSREP_DEBUG("binlog cache not initialized, conn: %llu",
10598 thd->thread_id);
10599 return NULL;
10600}
10601
10602
10603bool wsrep_trans_cache_is_empty(THD *thd)
10604{
10605 binlog_cache_mngr *const cache_mngr=
10606 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10607 return (!cache_mngr || cache_mngr->trx_cache.empty());
10608}
10609
10610
10611void thd_binlog_trx_reset(THD * thd)
10612{
10613 /*
10614 todo: fix autocommit select to not call the caller
10615 */
10616 if (thd_get_ha_data(thd, binlog_hton) != NULL)
10617 {
10618 binlog_cache_mngr *const cache_mngr=
10619 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10620 if (cache_mngr)
10621 {
10622 cache_mngr->reset(false, true);
10623 if (!cache_mngr->stmt_cache.empty())
10624 {
10625 WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
10626 cache_mngr->stmt_cache.reset();
10627 }
10628 }
10629 }
10630 thd->clear_binlog_table_maps();
10631}
10632
10633
10634void thd_binlog_rollback_stmt(THD * thd)
10635{
10636 WSREP_DEBUG("thd_binlog_rollback_stmt connection: %llu",
10637 thd->thread_id);
10638 binlog_cache_mngr *const cache_mngr=
10639 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10640 if (cache_mngr)
10641 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
10642}
10643#endif /* WITH_WSREP */
10644