1/* Copyright (c) 2000, 2017, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2017, MariaDB Corporation
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17
18/**
19 @addtogroup Replication
20 @{
21
22 @file
23
24 @brief Code to run the io thread and the sql thread on the
25 replication slave.
26*/
27
28#include "mariadb.h"
29#include "sql_priv.h"
30#include "slave.h"
31#include "sql_parse.h" // execute_init_command
32#include "sql_table.h" // mysql_rm_table
33#include "rpl_mi.h"
34#include "rpl_rli.h"
35#include "sql_repl.h"
36#include "rpl_filter.h"
37#include "repl_failsafe.h"
38#include "transaction.h"
39#include <thr_alarm.h>
40#include <my_dir.h>
41#include <sql_common.h>
42#include <errmsg.h>
43#include <ssl_compat.h>
44#include "unireg.h"
45#include <mysys_err.h>
46#include <signal.h>
47#include <mysql.h>
48#include <myisam.h>
49
50#include "sql_base.h" // close_thread_tables
51#include "tztime.h" // struct Time_zone
52#include "log_event.h" // Rotate_log_event,
53 // Create_file_log_event,
54 // Format_description_log_event
55#include "wsrep_mysqld.h"
56
57#ifdef HAVE_REPLICATION
58
59#include "rpl_tblmap.h"
60#include "debug_sync.h"
61#include "rpl_parallel.h"
62#include "sql_show.h"
63#include "semisync_slave.h"
64
65#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
66
67#define MAX_SLAVE_RETRY_PAUSE 5
68/*
69 a parameter of sql_slave_killed() to defer the killed status
70*/
71#define SLAVE_WAIT_GROUP_DONE 60
72bool use_slave_mask = 0;
73MY_BITMAP slave_error_mask;
74char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
75uint *slave_transaction_retry_errors;
76uint slave_transaction_retry_error_length= 0;
77char slave_transaction_retry_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
78
79char* slave_load_tmpdir = 0;
80Master_info *active_mi= 0;
81Master_info_index *master_info_index;
82my_bool replicate_same_server_id;
83ulonglong relay_log_space_limit = 0;
84ulonglong opt_read_binlog_speed_limit = 0;
85
86const char *relay_log_index= 0;
87const char *relay_log_basename= 0;
88
89LEX_CSTRING default_master_connection_name= { (char*) "", 0 };
90
91/*
92 When slave thread exits, we need to remember the temporary tables so we
93 can re-use them on slave start.
94
95 TODO: move the vars below under Master_info
96*/
97
98int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
99
100static pthread_key(Master_info*, RPL_MASTER_INFO);
101
102enum enum_slave_reconnect_actions
103{
104 SLAVE_RECON_ACT_REG= 0,
105 SLAVE_RECON_ACT_DUMP= 1,
106 SLAVE_RECON_ACT_EVENT= 2,
107 SLAVE_RECON_ACT_MAX
108};
109
110enum enum_slave_reconnect_messages
111{
112 SLAVE_RECON_MSG_WAIT= 0,
113 SLAVE_RECON_MSG_KILLED_WAITING= 1,
114 SLAVE_RECON_MSG_AFTER= 2,
115 SLAVE_RECON_MSG_FAILED= 3,
116 SLAVE_RECON_MSG_COMMAND= 4,
117 SLAVE_RECON_MSG_KILLED_AFTER= 5,
118 SLAVE_RECON_MSG_MAX
119};
120
121static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
122{
123 {
124 "Waiting to reconnect after a failed registration on master",
125 "Slave I/O thread killed while waiting to reconnect after a failed \
126registration on master",
127 "Reconnecting after a failed registration on master",
128 "failed registering on master, reconnecting to try again, \
129log '%s' at position %llu%s",
130 "COM_REGISTER_SLAVE",
131 "Slave I/O thread killed during or after reconnect"
132 },
133 {
134 "Waiting to reconnect after a failed binlog dump request",
135 "Slave I/O thread killed while retrying master dump",
136 "Reconnecting after a failed binlog dump request",
137 "failed dump request, reconnecting to try again, log '%s' at position %llu%s",
138 "COM_BINLOG_DUMP",
139 "Slave I/O thread killed during or after reconnect"
140 },
141 {
142 "Waiting to reconnect after a failed master event read",
143 "Slave I/O thread killed while waiting to reconnect after a failed read",
144 "Reconnecting after a failed master event read",
145 "Slave I/O thread: Failed reading log event, reconnecting to retry, \
146log '%s' at position %llu%s",
147 "",
148 "Slave I/O thread killed during or after a reconnect done to recover from \
149failed read"
150 }
151};
152
153
154typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
155
156static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
157static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
158static bool wait_for_relay_log_space(Relay_log_info* rli);
159static bool io_slave_killed(Master_info* mi);
160static bool sql_slave_killed(rpl_group_info *rgi);
161static int init_slave_thread(THD*, Master_info *, SLAVE_THD_TYPE);
162static void make_slave_skip_errors_printable(void);
163static void make_slave_transaction_retry_errors_printable(void);
164static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
165static int safe_reconnect(THD*, MYSQL*, Master_info*, bool);
166static int connect_to_master(THD*, MYSQL*, Master_info*, bool, bool);
167static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size);
168static int queue_event(Master_info* mi,const char* buf,ulong event_len);
169static int terminate_slave_thread(THD *, mysql_mutex_t *, mysql_cond_t *,
170 volatile uint *, bool);
171static bool check_io_slave_killed(Master_info *mi, const char *info);
172static bool send_show_master_info_data(THD *, Master_info *, bool, String *);
173/*
174 Function to set the slave's max_allowed_packet based on the value
175 of slave_max_allowed_packet.
176
177 @in_param thd Thread handler for slave
178 @in_param mysql MySQL connection handle
179*/
180
181static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
182{
183 DBUG_ENTER("set_slave_max_allowed_packet");
184 // thd and mysql must be valid
185 DBUG_ASSERT(thd && mysql);
186
187 thd->variables.max_allowed_packet= slave_max_allowed_packet;
188 thd->net.max_packet_size= slave_max_allowed_packet;
189 /*
190 Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
191 thread and the mysql->option max_allowed_packet, since a
192 replication event can become this much larger than
193 the corresponding packet (query) sent from client to master.
194 */
195 thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
196 /*
197 Skipping the setting of mysql->net.max_packet size to slave
198 max_allowed_packet since this is done during mysql_real_connect.
199 */
200 mysql->options.max_allowed_packet=
201 slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
202 DBUG_VOID_RETURN;
203}
204
205/*
206 Find out which replications threads are running
207
208 SYNOPSIS
209 init_thread_mask()
210 mask Return value here
211 mi master_info for slave
212 inverse If set, returns which threads are not running
213
214 IMPLEMENTATION
215 Get a bit mask for which threads are running so that we can later restart
216 these threads.
217
218 RETURN
219 mask If inverse == 0, running threads
220 If inverse == 1, stopped threads
221*/
222
223void init_thread_mask(int* mask,Master_info* mi,bool inverse)
224{
225 bool set_io = mi->slave_running, set_sql = mi->rli.slave_running;
226 int tmp_mask=0;
227 DBUG_ENTER("init_thread_mask");
228
229 if (set_io)
230 tmp_mask |= SLAVE_IO;
231 if (set_sql)
232 tmp_mask |= SLAVE_SQL;
233 if (inverse)
234 tmp_mask^= (SLAVE_IO | SLAVE_SQL);
235 *mask = tmp_mask;
236 DBUG_VOID_RETURN;
237}
238
239
240/*
241 lock_slave_threads() against other threads doing STOP, START or RESET SLAVE
242
243*/
244
245void Master_info::lock_slave_threads()
246{
247 DBUG_ENTER("lock_slave_threads");
248 mysql_mutex_lock(&start_stop_lock);
249 DBUG_VOID_RETURN;
250}
251
252
253/*
254 unlock_slave_threads()
255*/
256
257void Master_info::unlock_slave_threads()
258{
259 DBUG_ENTER("unlock_slave_threads");
260 mysql_mutex_unlock(&start_stop_lock);
261 DBUG_VOID_RETURN;
262}
263
264#ifdef HAVE_PSI_INTERFACE
265static PSI_thread_key key_thread_slave_io, key_thread_slave_sql;
266
267static PSI_thread_info all_slave_threads[]=
268{
269 { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
270 { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL}
271};
272
273static void init_slave_psi_keys(void)
274{
275 const char* category= "sql";
276 int count;
277
278 if (PSI_server == NULL)
279 return;
280
281 count= array_elements(all_slave_threads);
282 PSI_server->register_thread(category, all_slave_threads, count);
283}
284#endif /* HAVE_PSI_INTERFACE */
285
286
287/*
288 Note: This definition needs to be kept in sync with the one in
289 mysql_system_tables.sql which is used by mysql_create_db.
290*/
291static const char gtid_pos_table_definition1[]=
292 "CREATE TABLE ";
293static const char gtid_pos_table_definition2[]=
294 " (domain_id INT UNSIGNED NOT NULL, "
295 "sub_id BIGINT UNSIGNED NOT NULL, "
296 "server_id INT UNSIGNED NOT NULL, "
297 "seq_no BIGINT UNSIGNED NOT NULL, "
298 "PRIMARY KEY (domain_id, sub_id)) CHARSET=latin1 "
299 "COMMENT='Replication slave GTID position' "
300 "ENGINE=";
301
302/*
303 Build a query string
304 CREATE TABLE mysql.gtid_slave_pos_<engine> ... ENGINE=<engine>
305*/
306static bool
307build_gtid_pos_create_query(THD *thd, String *query,
308 LEX_CSTRING *table_name,
309 LEX_CSTRING *engine_name)
310{
311 bool err= false;
312 err|= query->append(gtid_pos_table_definition1);
313 err|= append_identifier(thd, query, table_name);
314 err|= query->append(gtid_pos_table_definition2);
315 err|= append_identifier(thd, query, engine_name);
316 return err;
317}
318
319
320static int
321gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name)
322{
323 int err;
324 StringBuffer<sizeof(gtid_pos_table_definition1) +
325 sizeof(gtid_pos_table_definition1) +
326 2*FN_REFLEN> query;
327
328 if (build_gtid_pos_create_query(thd, &query, table_name, plugin_name(engine)))
329 {
330 my_error(ER_OUT_OF_RESOURCES, MYF(0));
331 return 1;
332 }
333
334 thd->set_db(&MYSQL_SCHEMA_NAME);
335 thd->clear_error();
336 ulonglong thd_saved_option= thd->variables.option_bits;
337 /* This query shuold not be binlogged. */
338 thd->variables.option_bits&= ~(ulonglong)OPTION_BIN_LOG;
339 thd->set_query_and_id(query.c_ptr(), query.length(), thd->charset(),
340 next_query_id());
341 Parser_state parser_state;
342 err= parser_state.init(thd, thd->query(), thd->query_length());
343 if (err)
344 goto end;
345 mysql_parse(thd, thd->query(), thd->query_length(), &parser_state,
346 FALSE, FALSE);
347 if (unlikely(thd->is_error()))
348 err= 1;
349 /* The warning is relevant to 10.3 and earlier. */
350 sql_print_warning("The automatically created table '%s' name may not be "
351 "entirely in lowercase. The table name will be converted "
352 "to lowercase to any future upgrade to 10.4.0 and later "
353 "version where it will be auto-created at once "
354 "in lowercase.",
355 table_name->str);
356end:
357 thd->variables.option_bits= thd_saved_option;
358 thd->reset_query();
359 return err;
360}
361
362
363static void
364handle_gtid_pos_auto_create_request(THD *thd, void *hton)
365{
366 int UNINIT_VAR(err);
367 plugin_ref engine= NULL, *auto_engines;
368 rpl_slave_state::gtid_pos_table *entry;
369 StringBuffer<FN_REFLEN> loc_table_name;
370 LEX_CSTRING table_name;
371
372 /*
373 Check that the plugin is still in @@gtid_pos_auto_engines, and lock
374 it.
375 */
376 mysql_mutex_lock(&LOCK_global_system_variables);
377 engine= NULL;
378 for (auto_engines= opt_gtid_pos_auto_plugins;
379 auto_engines && *auto_engines;
380 ++auto_engines)
381 {
382 if (plugin_hton(*auto_engines) == hton)
383 {
384 engine= my_plugin_lock(NULL, *auto_engines);
385 break;
386 }
387 }
388 mysql_mutex_unlock(&LOCK_global_system_variables);
389 if (!engine)
390 {
391 /* The engine is gone from @@gtid_pos_auto_engines, so no action. */
392 goto end;
393 }
394
395 /* Find the entry for the table to auto-create. */
396 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
397 entry= (rpl_slave_state::gtid_pos_table *)
398 rpl_global_gtid_slave_state->gtid_pos_tables;
399 while (entry)
400 {
401 if (entry->table_hton == hton &&
402 entry->state == rpl_slave_state::GTID_POS_CREATE_REQUESTED)
403 break;
404 entry= entry->next;
405 }
406 if (entry)
407 {
408 entry->state = rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS;
409 err= loc_table_name.append(entry->table_name.str, entry->table_name.length);
410 }
411 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
412 if (!entry)
413 goto end;
414 if (err)
415 {
416 sql_print_error("Out of memory while trying to auto-create GTID position table");
417 goto end;
418 }
419 table_name.str= loc_table_name.c_ptr_safe();
420 table_name.length= loc_table_name.length();
421
422 err= gtid_pos_table_creation(thd, engine, &table_name);
423 if (err)
424 {
425 sql_print_error("Error auto-creating GTID position table `mysql.%s`: %s Error_code: %d",
426 table_name.str, thd->get_stmt_da()->message(),
427 thd->get_stmt_da()->sql_errno());
428 thd->clear_error();
429 goto end;
430 }
431
432 /* Now enable the entry for the auto-created table. */
433 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
434 entry= (rpl_slave_state::gtid_pos_table *)
435 rpl_global_gtid_slave_state->gtid_pos_tables;
436 while (entry)
437 {
438 if (entry->table_hton == hton &&
439 entry->state == rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS)
440 {
441 entry->state= rpl_slave_state::GTID_POS_AVAILABLE;
442 break;
443 }
444 entry= entry->next;
445 }
446 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
447
448end:
449 if (engine)
450 plugin_unlock(NULL, engine);
451}
452
453
454static bool slave_background_thread_running;
455static bool slave_background_thread_stop;
456static bool slave_background_thread_gtid_loaded;
457
458static struct slave_background_kill_t {
459 slave_background_kill_t *next;
460 THD *to_kill;
461} *slave_background_kill_list;
462
463static struct slave_background_gtid_pos_create_t {
464 slave_background_gtid_pos_create_t *next;
465 void *hton;
466} *slave_background_gtid_pos_create_list;
467
468
469pthread_handler_t
470handle_slave_background(void *arg __attribute__((unused)))
471{
472 THD *thd;
473 PSI_stage_info old_stage;
474 bool stop;
475
476 my_thread_init();
477 thd= new THD(next_thread_id());
478 thd->thread_stack= (char*) &thd; /* Set approximate stack start */
479 thd->system_thread = SYSTEM_THREAD_SLAVE_BACKGROUND;
480 thread_safe_increment32(&service_thread_count);
481 thd->store_globals();
482 thd->security_ctx->skip_grants();
483 thd->set_command(COM_DAEMON);
484
485 thd_proc_info(thd, "Loading slave GTID position from table");
486 if (rpl_load_gtid_slave_state(thd))
487 sql_print_warning("Failed to load slave replication state from table "
488 "%s.%s: %u: %s", "mysql",
489 rpl_gtid_slave_state_table_name.str,
490 thd->get_stmt_da()->sql_errno(),
491 thd->get_stmt_da()->message());
492
493 mysql_mutex_lock(&LOCK_slave_background);
494 slave_background_thread_gtid_loaded= true;
495 mysql_cond_broadcast(&COND_slave_background);
496
497 THD_STAGE_INFO(thd, stage_slave_background_process_request);
498 do
499 {
500 slave_background_kill_t *kill_list;
501 slave_background_gtid_pos_create_t *create_list;
502
503 thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background,
504 &stage_slave_background_wait_request,
505 &old_stage);
506 for (;;)
507 {
508 stop= abort_loop || thd->killed || slave_background_thread_stop;
509 kill_list= slave_background_kill_list;
510 create_list= slave_background_gtid_pos_create_list;
511 if (stop || kill_list || create_list)
512 break;
513 mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
514 }
515
516 slave_background_kill_list= NULL;
517 slave_background_gtid_pos_create_list= NULL;
518 thd->EXIT_COND(&old_stage);
519
520 while (kill_list)
521 {
522 slave_background_kill_t *p = kill_list;
523 THD *to_kill= p->to_kill;
524 kill_list= p->next;
525
526 to_kill->awake(KILL_CONNECTION);
527 mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
528 to_kill->rgi_slave->killed_for_retry=
529 rpl_group_info::RETRY_KILL_KILLED;
530 mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
531 mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
532 my_free(p);
533 }
534
535 while (create_list)
536 {
537 slave_background_gtid_pos_create_t *next= create_list->next;
538 void *hton= create_list->hton;
539 handle_gtid_pos_auto_create_request(thd, hton);
540 my_free(create_list);
541 create_list= next;
542 }
543
544 mysql_mutex_lock(&LOCK_slave_background);
545 } while (!stop);
546
547 slave_background_thread_running= false;
548 mysql_cond_broadcast(&COND_slave_background);
549 mysql_mutex_unlock(&LOCK_slave_background);
550
551 delete thd;
552 thread_safe_decrement32(&service_thread_count);
553 signal_thd_deleted();
554
555 my_thread_end();
556 return 0;
557}
558
559
560
561void
562slave_background_kill_request(THD *to_kill)
563{
564 if (to_kill->rgi_slave->killed_for_retry)
565 return; // Already deadlock killed.
566 slave_background_kill_t *p=
567 (slave_background_kill_t *)my_malloc(sizeof(*p), MYF(MY_WME));
568 if (p)
569 {
570 p->to_kill= to_kill;
571 to_kill->rgi_slave->killed_for_retry=
572 rpl_group_info::RETRY_KILL_PENDING;
573 mysql_mutex_lock(&LOCK_slave_background);
574 p->next= slave_background_kill_list;
575 slave_background_kill_list= p;
576 mysql_cond_signal(&COND_slave_background);
577 mysql_mutex_unlock(&LOCK_slave_background);
578 }
579}
580
581
582/*
583 This function must only be called from a slave SQL thread (or worker thread),
584 to ensure that the table_entry will not go away before we can lock the
585 LOCK_slave_state.
586*/
587void
588slave_background_gtid_pos_create_request(
589 rpl_slave_state::gtid_pos_table *table_entry)
590{
591 slave_background_gtid_pos_create_t *p;
592
593 if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
594 return;
595 p= (slave_background_gtid_pos_create_t *)my_malloc(sizeof(*p), MYF(MY_WME));
596 if (!p)
597 return;
598 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
599 if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
600 {
601 my_free(p);
602 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
603 return;
604 }
605 table_entry->state= rpl_slave_state::GTID_POS_CREATE_REQUESTED;
606 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
607
608 p->hton= table_entry->table_hton;
609 mysql_mutex_lock(&LOCK_slave_background);
610 p->next= slave_background_gtid_pos_create_list;
611 slave_background_gtid_pos_create_list= p;
612 mysql_cond_signal(&COND_slave_background);
613 mysql_mutex_unlock(&LOCK_slave_background);
614}
615
616
617/*
618 Start the slave background thread.
619
620 This thread is currently used for two purposes:
621
622 1. To load the GTID state from mysql.gtid_slave_pos at server start; reading
623 from table requires valid THD, which is otherwise not available during
624 server init.
625
626 2. To kill worker thread transactions during parallel replication, when a
627 storage engine attempts to take an errorneous conflicting lock that would
628 cause a deadlock. Killing is done asynchroneously, as the kill may not
629 be safe within the context of a callback from inside storage engine
630 locking code.
631*/
632static int
633start_slave_background_thread()
634{
635 pthread_t th;
636
637 slave_background_thread_running= true;
638 slave_background_thread_stop= false;
639 slave_background_thread_gtid_loaded= false;
640 if (mysql_thread_create(key_thread_slave_background,
641 &th, &connection_attrib, handle_slave_background,
642 NULL))
643 {
644 sql_print_error("Failed to create thread while initialising slave");
645 return 1;
646 }
647 mysql_mutex_lock(&LOCK_slave_background);
648 while (!slave_background_thread_gtid_loaded)
649 mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
650 mysql_mutex_unlock(&LOCK_slave_background);
651
652 return 0;
653}
654
655
656static void
657stop_slave_background_thread()
658{
659 mysql_mutex_lock(&LOCK_slave_background);
660 slave_background_thread_stop= true;
661 mysql_cond_broadcast(&COND_slave_background);
662 while (slave_background_thread_running)
663 mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
664 mysql_mutex_unlock(&LOCK_slave_background);
665}
666
667
668/* Initialize slave structures */
669
670int init_slave()
671{
672 DBUG_ENTER("init_slave");
673 int error= 0;
674
675#ifdef HAVE_PSI_INTERFACE
676 init_slave_psi_keys();
677#endif
678
679 if (start_slave_background_thread())
680 return 1;
681
682 if (global_rpl_thread_pool.init(opt_slave_parallel_threads))
683 return 1;
684
685 /*
686 This is called when mysqld starts. Before client connections are
687 accepted. However bootstrap may conflict with us if it does START SLAVE.
688 So it's safer to take the lock.
689 */
690
691 if (pthread_key_create(&RPL_MASTER_INFO, NULL))
692 goto err;
693
694 master_info_index= new Master_info_index;
695 if (!master_info_index || master_info_index->init_all_master_info())
696 {
697 sql_print_error("Failed to initialize multi master structures");
698 DBUG_RETURN(1);
699 }
700 if (!(active_mi= new Master_info(&default_master_connection_name,
701 relay_log_recovery)) ||
702 active_mi->error())
703 {
704 delete active_mi;
705 active_mi= 0;
706 sql_print_error("Failed to allocate memory for the Master Info structure");
707 goto err;
708 }
709
710 if (master_info_index->add_master_info(active_mi, FALSE))
711 {
712 delete active_mi;
713 active_mi= 0;
714 goto err;
715 }
716
717 /*
718 If master_host is not specified, try to read it from the master_info file.
719 If master_host is specified, create the master_info file if it doesn't
720 exists.
721 */
722
723 if (init_master_info(active_mi,master_info_file,relay_log_info_file,
724 1, (SLAVE_IO | SLAVE_SQL)))
725 {
726 sql_print_error("Failed to initialize the master info structure");
727 goto err;
728 }
729
730 /* If server id is not set, start_slave_thread() will say it */
731
732 if (active_mi->host[0] && !opt_skip_slave_start)
733 {
734 int error;
735 THD *thd= new THD(next_thread_id());
736 thd->thread_stack= (char*) &thd;
737 thd->store_globals();
738
739 error= start_slave_threads(0, /* No active thd */
740 1 /* need mutex */,
741 1 /* wait for start*/,
742 active_mi,
743 master_info_file,
744 relay_log_info_file,
745 SLAVE_IO | SLAVE_SQL);
746
747 thd->reset_globals();
748 delete thd;
749 if (unlikely(error))
750 {
751 sql_print_error("Failed to create slave threads");
752 goto err;
753 }
754 }
755
756end:
757 DBUG_RETURN(error);
758
759err:
760 error= 1;
761 goto end;
762}
763
764/*
765 Updates the master info based on the information stored in the
766 relay info and ignores relay logs previously retrieved by the IO
767 thread, which thus starts fetching again based on to the
768 group_master_log_pos and group_master_log_name. Eventually, the old
769 relay logs will be purged by the normal purge mechanism.
770
771 In the feature, we should improve this routine in order to avoid throwing
772 away logs that are safely stored in the disk. Note also that this recovery
773 routine relies on the correctness of the relay-log.info and only tolerates
774 coordinate problems in master.info.
775
776 In this function, there is no need for a mutex as the caller
777 (i.e. init_slave) already has one acquired.
778
779 Specifically, the following structures are updated:
780
781 1 - mi->master_log_pos <-- rli->group_master_log_pos
782 2 - mi->master_log_name <-- rli->group_master_log_name
783 3 - It moves the relay log to the new relay log file, by
784 rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
785 rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
786 rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
787 rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
788
789 If there is an error, it returns (1), otherwise returns (0).
790 */
791int init_recovery(Master_info* mi, const char** errmsg)
792{
793 DBUG_ENTER("init_recovery");
794
795 Relay_log_info *rli= &mi->rli;
796 if (rli->group_master_log_name[0])
797 {
798 mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE,
799 rli->group_master_log_pos);
800 strmake_buf(mi->master_log_name, rli->group_master_log_name);
801
802 sql_print_warning("Recovery from master pos %ld and file %s.",
803 (ulong) mi->master_log_pos, mi->master_log_name);
804
805 strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname());
806 strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname());
807
808 rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
809 }
810
811 DBUG_RETURN(0);
812}
813
814
815/**
816 Convert slave skip errors bitmap into a printable string.
817*/
818
819static void make_slave_skip_errors_printable(void)
820{
821 /*
822 To be safe, we want 10 characters of room in the buffer for a number
823 plus terminators. Also, we need some space for constant strings.
824 10 characters must be sufficient for a number plus {',' | '...'}
825 plus a NUL terminator. That is a max 6 digit number.
826 */
827 const size_t MIN_ROOM= 10;
828 DBUG_ENTER("make_slave_skip_errors_printable");
829 DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
830 DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
831
832 /* Make @@slave_skip_errors show the nice human-readable value. */
833 opt_slave_skip_errors= slave_skip_error_names;
834
835 if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
836 {
837 /* purecov: begin tested */
838 memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
839 /* purecov: end */
840 }
841 else if (bitmap_is_set_all(&slave_error_mask))
842 {
843 /* purecov: begin tested */
844 memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
845 /* purecov: end */
846 }
847 else
848 {
849 char *buff= slave_skip_error_names;
850 char *bend= buff + sizeof(slave_skip_error_names) - MIN_ROOM;
851 int errnum;
852
853 for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
854 {
855 if (bitmap_is_set(&slave_error_mask, errnum))
856 {
857 if (buff >= bend)
858 break; /* purecov: tested */
859 buff= int10_to_str(errnum, buff, 10);
860 *buff++= ',';
861 }
862 }
863 if (buff != slave_skip_error_names)
864 buff--; // Remove last ','
865 if (errnum < MAX_SLAVE_ERROR)
866 {
867 /* Couldn't show all errors */
868 buff= strmov(buff, "..."); /* purecov: tested */
869 }
870 *buff=0;
871 }
872 DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
873 DBUG_VOID_RETURN;
874}
875
876/*
877 Init function to set up array for errors that should be skipped for slave
878
879 SYNOPSIS
880 init_slave_skip_errors()
881 arg List of errors numbers to skip, separated with ','
882
883 NOTES
884 Called from get_options() in mysqld.cc on start-up
885*/
886
887bool init_slave_skip_errors(const char* arg)
888{
889 const char *p;
890 DBUG_ENTER("init_slave_skip_errors");
891
892 if (!arg || !*arg) // No errors defined
893 goto end;
894
895 if (unlikely(my_bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0)))
896 DBUG_RETURN(1);
897
898 use_slave_mask= 1;
899 for (;my_isspace(system_charset_info,*arg);++arg)
900 /* empty */;
901 if (!my_strnncoll(system_charset_info,(uchar*)arg,4,(const uchar*)"all",4))
902 {
903 bitmap_set_all(&slave_error_mask);
904 goto end;
905 }
906 for (p= arg ; *p; )
907 {
908 long err_code;
909 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
910 break;
911 if (err_code < MAX_SLAVE_ERROR)
912 bitmap_set_bit(&slave_error_mask,(uint)err_code);
913 while (!my_isdigit(system_charset_info,*p) && *p)
914 p++;
915 }
916
917end:
918 make_slave_skip_errors_printable();
919 DBUG_RETURN(0);
920}
921
922/**
923 Make printable version if slave_transaction_retry_errors
924 This is never empty as at least ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT
925 will be there
926*/
927
928static void make_slave_transaction_retry_errors_printable(void)
929{
930 /*
931 To be safe, we want 10 characters of room in the buffer for a number
932 plus terminators. Also, we need some space for constant strings.
933 10 characters must be sufficient for a number plus {',' | '...'}
934 plus a NUL terminator. That is a max 6 digit number.
935 */
936 const size_t MIN_ROOM= 10;
937 char *buff= slave_transaction_retry_error_names;
938 char *bend= buff + sizeof(slave_transaction_retry_error_names) - MIN_ROOM;
939 uint i;
940 DBUG_ENTER("make_slave_transaction_retry_errors_printable");
941 DBUG_ASSERT(sizeof(slave_transaction_retry_error_names) > MIN_ROOM);
942
943 /* Make @@slave_transaction_retry_errors show a human-readable value */
944 opt_slave_transaction_retry_errors= slave_transaction_retry_error_names;
945
946 for (i= 0; i < slave_transaction_retry_error_length && buff < bend; i++)
947 {
948 buff= int10_to_str(slave_transaction_retry_errors[i], buff, 10);
949 *buff++= ',';
950 }
951 if (buff != slave_transaction_retry_error_names)
952 buff--; // Remove last ','
953 if (i < slave_transaction_retry_error_length)
954 {
955 /* Couldn't show all errors */
956 buff= strmov(buff, "..."); /* purecov: tested */
957 }
958 *buff=0;
959 DBUG_PRINT("exit", ("error_names: '%s'",
960 slave_transaction_retry_error_names));
961 DBUG_VOID_RETURN;
962}
963
964
965bool init_slave_transaction_retry_errors(const char* arg)
966{
967 const char *p;
968 long err_code;
969 uint i;
970 DBUG_ENTER("init_slave_transaction_retry_errors");
971
972 /* Handle empty strings */
973 if (!arg)
974 arg= "";
975
976 slave_transaction_retry_error_length= 2;
977 for (;my_isspace(system_charset_info,*arg);++arg)
978 /* empty */;
979 for (p= arg; *p; )
980 {
981 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
982 break;
983 slave_transaction_retry_error_length++;
984 while (!my_isdigit(system_charset_info,*p) && *p)
985 p++;
986 }
987
988 if (unlikely(!(slave_transaction_retry_errors=
989 (uint *) my_once_alloc(sizeof(int) *
990 slave_transaction_retry_error_length,
991 MYF(MY_WME)))))
992 DBUG_RETURN(1);
993
994 /*
995 Temporary error codes:
996 currently, InnoDB deadlock detected by InnoDB or lock
997 wait timeout (innodb_lock_wait_timeout exceeded
998 */
999 slave_transaction_retry_errors[0]= ER_LOCK_DEADLOCK;
1000 slave_transaction_retry_errors[1]= ER_LOCK_WAIT_TIMEOUT;
1001
1002 /* Add user codes after this */
1003 for (p= arg, i= 2; *p; )
1004 {
1005 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
1006 break;
1007 if (err_code > 0 && err_code < ER_ERROR_LAST)
1008 slave_transaction_retry_errors[i++]= (uint) err_code;
1009 while (!my_isdigit(system_charset_info,*p) && *p)
1010 p++;
1011 }
1012 slave_transaction_retry_error_length= i;
1013
1014 make_slave_transaction_retry_errors_printable();
1015 DBUG_RETURN(0);
1016}
1017
1018
1019int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
1020{
1021 DBUG_ENTER("terminate_slave_threads");
1022
1023 if (!mi->inited)
1024 DBUG_RETURN(0); /* successfully do nothing */
1025 int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
1026 int retval= 0;
1027 mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
1028 mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
1029
1030 if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
1031 {
1032 DBUG_PRINT("info",("Terminating SQL thread"));
1033 if (mi->using_parallel() && mi->rli.abort_slave && mi->rli.stop_for_until)
1034 {
1035 mi->rli.stop_for_until= false;
1036 mi->rli.parallel.stop_during_until();
1037 }
1038 else
1039 mi->rli.abort_slave=1;
1040 if (unlikely((error= terminate_slave_thread(mi->rli.sql_driver_thd,
1041 sql_lock,
1042 &mi->rli.stop_cond,
1043 &mi->rli.slave_running,
1044 skip_lock))) &&
1045 !force_all)
1046 DBUG_RETURN(error);
1047 retval= error;
1048
1049 mysql_mutex_lock(log_lock);
1050
1051 DBUG_PRINT("info",("Flushing relay-log info file."));
1052 if (current_thd)
1053 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
1054 if (mi->rli.flush() || my_sync(mi->rli.info_fd, MYF(MY_WME)))
1055 retval= ER_ERROR_DURING_FLUSH_LOGS;
1056
1057 mysql_mutex_unlock(log_lock);
1058 }
1059 if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
1060 {
1061 DBUG_PRINT("info",("Terminating IO thread"));
1062 mi->abort_slave=1;
1063 if (unlikely((error= terminate_slave_thread(mi->io_thd, io_lock,
1064 &mi->stop_cond,
1065 &mi->slave_running,
1066 skip_lock))) &&
1067 !force_all)
1068 DBUG_RETURN(error);
1069 if (!retval)
1070 retval= error;
1071
1072 mysql_mutex_lock(log_lock);
1073
1074 DBUG_PRINT("info",("Flushing relay log and master info file."));
1075 if (current_thd)
1076 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
1077 if (likely(mi->fd >= 0))
1078 {
1079 if (flush_master_info(mi, TRUE, FALSE) || my_sync(mi->fd, MYF(MY_WME)))
1080 retval= ER_ERROR_DURING_FLUSH_LOGS;
1081 }
1082 if (mi->rli.relay_log.is_open() &&
1083 my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME)))
1084 retval= ER_ERROR_DURING_FLUSH_LOGS;
1085
1086 mysql_mutex_unlock(log_lock);
1087 }
1088 DBUG_RETURN(retval);
1089}
1090
1091
1092/**
1093 Wait for a slave thread to terminate.
1094
1095 This function is called after requesting the thread to terminate
1096 (by setting @c abort_slave member of @c Relay_log_info or @c
1097 Master_info structure to 1). Termination of the thread is
1098 controlled with the the predicate <code>*slave_running</code>.
1099
1100 Function will acquire @c term_lock before waiting on the condition
1101 unless @c skip_lock is true in which case the mutex should be owned
1102 by the caller of this function and will remain acquired after
1103 return from the function.
1104
1105 @param term_lock
1106 Associated lock to use when waiting for @c term_cond
1107
1108 @param term_cond
1109 Condition that is signalled when the thread has terminated
1110
1111 @param slave_running
1112 Pointer to predicate to check for slave thread termination
1113
1114 @param skip_lock
1115 If @c true the lock will not be acquired before waiting on
1116 the condition. In this case, it is assumed that the calling
1117 function acquires the lock before calling this function.
1118
1119 @retval 0 All OK ER_SLAVE_NOT_RUNNING otherwise.
1120
1121 @note If the executing thread has to acquire term_lock (skip_lock
1122 is false), the negative running status does not represent
1123 any issue therefore no error is reported.
1124
1125 */
1126static int
1127terminate_slave_thread(THD *thd,
1128 mysql_mutex_t *term_lock,
1129 mysql_cond_t *term_cond,
1130 volatile uint *slave_running,
1131 bool skip_lock)
1132{
1133 DBUG_ENTER("terminate_slave_thread");
1134 if (!skip_lock)
1135 {
1136 mysql_mutex_lock(term_lock);
1137 }
1138 else
1139 {
1140 mysql_mutex_assert_owner(term_lock);
1141 }
1142 if (!*slave_running)
1143 {
1144 if (!skip_lock)
1145 {
1146 /*
1147 if run_lock (term_lock) is acquired locally then either
1148 slave_running status is fine
1149 */
1150 mysql_mutex_unlock(term_lock);
1151 DBUG_RETURN(0);
1152 }
1153 else
1154 {
1155 DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
1156 }
1157 }
1158 DBUG_ASSERT(thd != 0);
1159 THD_CHECK_SENTRY(thd);
1160
1161 /*
1162 Is is critical to test if the slave is running. Otherwise, we might
1163 be referening freed memory trying to kick it
1164 */
1165
1166 while (*slave_running) // Should always be true
1167 {
1168 int error __attribute__((unused));
1169 DBUG_PRINT("loop", ("killing slave thread"));
1170
1171 mysql_mutex_lock(&thd->LOCK_thd_kill);
1172#ifndef DONT_USE_THR_ALARM
1173 /*
1174 Error codes from pthread_kill are:
1175 EINVAL: invalid signal number (can't happen)
1176 ESRCH: thread already killed (can happen, should be ignored)
1177 */
1178 int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm);
1179 DBUG_ASSERT(err != EINVAL);
1180#endif
1181 thd->awake_no_mutex(NOT_KILLED);
1182
1183 mysql_mutex_unlock(&thd->LOCK_thd_kill);
1184
1185 /*
1186 There is a small chance that slave thread might miss the first
1187 alarm. To protect againts it, resend the signal until it reacts
1188 */
1189 struct timespec abstime;
1190 set_timespec(abstime,2);
1191 error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
1192 DBUG_ASSERT(error == ETIMEDOUT || error == 0);
1193 }
1194
1195 DBUG_ASSERT(*slave_running == 0);
1196
1197 if (!skip_lock)
1198 mysql_mutex_unlock(term_lock);
1199 DBUG_RETURN(0);
1200}
1201
1202
1203int start_slave_thread(
1204#ifdef HAVE_PSI_INTERFACE
1205 PSI_thread_key thread_key,
1206#endif
1207 pthread_handler h_func, mysql_mutex_t *start_lock,
1208 mysql_mutex_t *cond_lock,
1209 mysql_cond_t *start_cond,
1210 volatile uint *slave_running,
1211 volatile ulong *slave_run_id,
1212 Master_info* mi)
1213{
1214 pthread_t th;
1215 ulong start_id;
1216 int error;
1217 DBUG_ENTER("start_slave_thread");
1218
1219 DBUG_ASSERT(mi->inited);
1220
1221 if (start_lock)
1222 mysql_mutex_lock(start_lock);
1223 if (!global_system_variables.server_id)
1224 {
1225 if (start_cond)
1226 mysql_cond_broadcast(start_cond);
1227 if (start_lock)
1228 mysql_mutex_unlock(start_lock);
1229 sql_print_error("Server id not set, will not start slave");
1230 DBUG_RETURN(ER_BAD_SLAVE);
1231 }
1232
1233 if (*slave_running)
1234 {
1235 if (start_cond)
1236 mysql_cond_broadcast(start_cond);
1237 if (start_lock)
1238 mysql_mutex_unlock(start_lock);
1239 DBUG_RETURN(ER_SLAVE_MUST_STOP);
1240 }
1241 start_id= *slave_run_id;
1242 DBUG_PRINT("info",("Creating new slave thread"));
1243 if (unlikely((error= mysql_thread_create(thread_key,
1244 &th, &connection_attrib, h_func,
1245 (void*)mi))))
1246 {
1247 sql_print_error("Can't create slave thread (errno= %d).", error);
1248 if (start_lock)
1249 mysql_mutex_unlock(start_lock);
1250 DBUG_RETURN(ER_SLAVE_THREAD);
1251 }
1252
1253 /*
1254 In the following loop we can't check for thd->killed as we have to
1255 wait until THD structures for the slave thread are created
1256 before we can return.
1257 This should be ok as there is no major work done in the slave
1258 threads before they signal that we can stop waiting.
1259 */
1260
1261 if (start_cond && cond_lock) // caller has cond_lock
1262 {
1263 THD* thd = current_thd;
1264 while (start_id == *slave_run_id)
1265 {
1266 DBUG_PRINT("sleep",("Waiting for slave thread to start"));
1267 PSI_stage_info saved_stage= {0, "", 0};
1268 thd->ENTER_COND(start_cond, cond_lock,
1269 & stage_waiting_for_slave_thread_to_start,
1270 & saved_stage);
1271 /*
1272 It is not sufficient to test this at loop bottom. We must test
1273 it after registering the mutex in enter_cond(). If the kill
1274 happens after testing of thd->killed and before the mutex is
1275 registered, we could otherwise go waiting though thd->killed is
1276 set.
1277 */
1278 mysql_cond_wait(start_cond, cond_lock);
1279 thd->EXIT_COND(& saved_stage);
1280 mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
1281 }
1282 }
1283 if (start_lock)
1284 mysql_mutex_unlock(start_lock);
1285 DBUG_RETURN(0);
1286}
1287
1288
1289/*
1290 start_slave_threads()
1291
1292 NOTES
1293 SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
1294 sense to do that for starting a slave--we always care if it actually
1295 started the threads that were not previously running
1296*/
1297
1298int start_slave_threads(THD *thd,
1299 bool need_slave_mutex, bool wait_for_start,
1300 Master_info* mi, const char* master_info_fname,
1301 const char* slave_info_fname, int thread_mask)
1302{
1303 mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
1304 mysql_cond_t* cond_io=0, *cond_sql=0;
1305 int error=0;
1306 const char *errmsg;
1307 DBUG_ENTER("start_slave_threads");
1308
1309 if (need_slave_mutex)
1310 {
1311 lock_io = &mi->run_lock;
1312 lock_sql = &mi->rli.run_lock;
1313 }
1314 if (wait_for_start)
1315 {
1316 cond_io = &mi->start_cond;
1317 cond_sql = &mi->rli.start_cond;
1318 lock_cond_io = &mi->run_lock;
1319 lock_cond_sql = &mi->rli.run_lock;
1320 }
1321
1322 /*
1323 If we are using GTID and both SQL and IO threads are stopped, then get
1324 rid of all relay logs.
1325
1326 Relay logs are not very useful when using GTID, except as a buffer
1327 between the fetch in the IO thread and the apply in SQL thread. However
1328 while one of the threads is running, they are in use and cannot be
1329 removed.
1330 */
1331 if (mi->using_gtid != Master_info::USE_GTID_NO &&
1332 !mi->slave_running && !mi->rli.slave_running)
1333 {
1334 /*
1335 purge_relay_logs() clears the mi->rli.group_master_log_pos.
1336 So save and restore them, like we do in CHANGE MASTER.
1337 (We are not going to use them for GTID, but it might be worth to
1338 keep them in case connection with GTID fails and user wants to go
1339 back and continue with previous old-style replication coordinates).
1340 */
1341 mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE,
1342 mi->rli.group_master_log_pos);
1343 strmake(mi->master_log_name, mi->rli.group_master_log_name,
1344 sizeof(mi->master_log_name)-1);
1345 purge_relay_logs(&mi->rli, thd, 0, &errmsg);
1346 mi->rli.group_master_log_pos= mi->master_log_pos;
1347 strmake(mi->rli.group_master_log_name, mi->master_log_name,
1348 sizeof(mi->rli.group_master_log_name)-1);
1349
1350 error= rpl_load_gtid_state(&mi->gtid_current_pos, mi->using_gtid ==
1351 Master_info::USE_GTID_CURRENT_POS);
1352 mi->events_queued_since_last_gtid= 0;
1353 mi->gtid_reconnect_event_skip_count= 0;
1354
1355 mi->rli.restart_gtid_pos.reset();
1356 }
1357
1358 if (likely(!error) && likely((thread_mask & SLAVE_IO)))
1359 error= start_slave_thread(
1360#ifdef HAVE_PSI_INTERFACE
1361 key_thread_slave_io,
1362#endif
1363 handle_slave_io, lock_io, lock_cond_io,
1364 cond_io,
1365 &mi->slave_running, &mi->slave_run_id,
1366 mi);
1367 if (likely(!error) && likely(thread_mask & SLAVE_SQL))
1368 {
1369 error= start_slave_thread(
1370#ifdef HAVE_PSI_INTERFACE
1371 key_thread_slave_sql,
1372#endif
1373 handle_slave_sql, lock_sql, lock_cond_sql,
1374 cond_sql,
1375 &mi->rli.slave_running, &mi->rli.slave_run_id,
1376 mi);
1377 if (unlikely(error))
1378 terminate_slave_threads(mi, thread_mask & SLAVE_IO, !need_slave_mutex);
1379 }
1380 DBUG_RETURN(error);
1381}
1382
1383
1384/*
1385 Kill slaves preparing for shutdown
1386*/
1387
1388void slave_prepare_for_shutdown()
1389{
1390 mysql_mutex_lock(&LOCK_active_mi);
1391 master_info_index->free_connections();
1392 mysql_mutex_unlock(&LOCK_active_mi);
1393 stop_slave_background_thread();
1394}
1395
1396/*
1397 Release slave threads at time of executing shutdown.
1398*/
1399
1400void end_slave()
1401{
1402 DBUG_ENTER("end_slave");
1403
1404 /*
1405 This is called when the server terminates, in close_connections().
1406 It terminates slave threads. However, some CHANGE MASTER etc may still be
1407 running presently. If a START SLAVE was in progress, the mutex lock below
1408 will make us wait until slave threads have started, and START SLAVE
1409 returns, then we terminate them here.
1410
1411 We can also be called by cleanup(), which only happens if some
1412 startup parameter to the server was wrong.
1413 */
1414 mysql_mutex_lock(&LOCK_active_mi);
1415 /*
1416 master_info_index should not have any threads anymore as they where
1417 killed as part of slave_prepare_for_shutdown()
1418 */
1419 delete master_info_index;
1420 master_info_index= 0;
1421 active_mi= 0;
1422 mysql_mutex_unlock(&LOCK_active_mi);
1423
1424 stop_slave_background_thread();
1425
1426 global_rpl_thread_pool.destroy();
1427 free_all_rpl_filters();
1428 DBUG_VOID_RETURN;
1429}
1430
1431static bool io_slave_killed(Master_info* mi)
1432{
1433 DBUG_ENTER("io_slave_killed");
1434
1435 DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
1436 DBUG_RETURN(mi->abort_slave || abort_loop || mi->io_thd->killed);
1437}
1438
1439/**
1440 The function analyzes a possible killed status and makes
1441 a decision whether to accept it or not.
1442 Normally upon accepting the sql thread goes to shutdown.
1443 In the event of deffering decision @rli->last_event_start_time waiting
1444 timer is set to force the killed status be accepted upon its expiration.
1445
1446 @param thd pointer to a THD instance
1447 @param rli pointer to Relay_log_info instance
1448
1449 @return TRUE the killed status is recognized, FALSE a possible killed
1450 status is deferred.
1451*/
1452static bool sql_slave_killed(rpl_group_info *rgi)
1453{
1454 bool ret= FALSE;
1455 Relay_log_info *rli= rgi->rli;
1456 THD *thd= rgi->thd;
1457 DBUG_ENTER("sql_slave_killed");
1458
1459 DBUG_ASSERT(rli->sql_driver_thd == thd);
1460 DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
1461 if (abort_loop || rli->sql_driver_thd->killed || rli->abort_slave)
1462 {
1463 /*
1464 The transaction should always be binlogged if OPTION_KEEP_LOG is
1465 set (it implies that something can not be rolled back). And such
1466 case should be regarded similarly as modifing a
1467 non-transactional table because retrying of the transaction will
1468 lead to an error or inconsistency as well.
1469
1470 Example: OPTION_KEEP_LOG is set if a temporary table is created
1471 or dropped.
1472
1473 Note that transaction.all.modified_non_trans_table may be 1
1474 if last statement was a single row transaction without begin/end.
1475 Testing this flag must always be done in connection with
1476 rli->is_in_group().
1477 */
1478
1479 if ((thd->transaction.all.modified_non_trans_table ||
1480 (thd->variables.option_bits & OPTION_KEEP_LOG)) &&
1481 rli->is_in_group())
1482 {
1483 char msg_stopped[]=
1484 "... Slave SQL Thread stopped with incomplete event group "
1485 "having non-transactional changes. "
1486 "If the group consists solely of row-based events, you can try "
1487 "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
1488 "ignores duplicate key, key not found, and similar errors (see "
1489 "documentation for details).";
1490
1491 DBUG_PRINT("info", ("modified_non_trans_table: %d OPTION_BEGIN: %d "
1492 "OPTION_KEEP_LOG: %d is_in_group: %d",
1493 thd->transaction.all.modified_non_trans_table,
1494 MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
1495 MY_TEST(thd->variables.option_bits & OPTION_KEEP_LOG),
1496 rli->is_in_group()));
1497
1498 if (rli->abort_slave)
1499 {
1500 DBUG_PRINT("info",
1501 ("Request to stop slave SQL Thread received while "
1502 "applying a group that has non-transactional "
1503 "changes; waiting for completion of the group ... "));
1504
1505 /*
1506 Slave sql thread shutdown in face of unfinished group
1507 modified Non-trans table is handled via a timer. The slave
1508 may eventually give out to complete the current group and in
1509 that case there might be issues at consequent slave restart,
1510 see the error message. WL#2975 offers a robust solution
1511 requiring to store the last exectuted event's coordinates
1512 along with the group's coordianates instead of waiting with
1513 @c last_event_start_time the timer.
1514 */
1515
1516 if (rgi->last_event_start_time == 0)
1517 rgi->last_event_start_time= my_time(0);
1518 ret= difftime(my_time(0), rgi->last_event_start_time) <=
1519 SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE;
1520
1521 DBUG_EXECUTE_IF("stop_slave_middle_group",
1522 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
1523 ret= TRUE;);); // time is over
1524
1525 if (ret == 0)
1526 {
1527 rli->report(WARNING_LEVEL, 0, rgi->gtid_info(),
1528 "Request to stop slave SQL Thread received while "
1529 "applying a group that has non-transactional "
1530 "changes; waiting for completion of the group ... ");
1531 }
1532 else
1533 {
1534 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
1535 ER_THD(thd, ER_SLAVE_FATAL_ERROR), msg_stopped);
1536 }
1537 }
1538 else
1539 {
1540 ret= TRUE;
1541 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
1542 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
1543 msg_stopped);
1544 }
1545 }
1546 else
1547 {
1548 ret= TRUE;
1549 }
1550 }
1551 if (ret)
1552 rgi->last_event_start_time= 0;
1553
1554 DBUG_RETURN(ret);
1555}
1556
1557
1558/*
1559 skip_load_data_infile()
1560
1561 NOTES
1562 This is used to tell a 3.23 master to break send_file()
1563*/
1564
1565void skip_load_data_infile(NET *net)
1566{
1567 DBUG_ENTER("skip_load_data_infile");
1568
1569 (void)net_request_file(net, "/dev/null");
1570 (void)my_net_read(net); // discard response
1571 (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
1572 DBUG_VOID_RETURN;
1573}
1574
1575
1576bool net_request_file(NET* net, const char* fname)
1577{
1578 DBUG_ENTER("net_request_file");
1579 DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
1580 (uchar*) "", 0));
1581}
1582
1583/*
1584 From other comments and tests in code, it looks like
1585 sometimes Query_log_event and Load_log_event can have db == 0
1586 (see rewrite_db() above for example)
1587 (cases where this happens are unclear; it may be when the master is 3.23).
1588*/
1589
1590const char *print_slave_db_safe(const char* db)
1591{
1592 DBUG_ENTER("*print_slave_db_safe");
1593
1594 DBUG_RETURN((db ? db : ""));
1595}
1596
1597#endif /* HAVE_REPLICATION */
1598
1599int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
1600 const char *default_val)
1601{
1602 size_t length;
1603 DBUG_ENTER("init_strvar_from_file");
1604
1605 if ((length=my_b_gets(f,var, max_size)))
1606 {
1607 char* last_p = var + length -1;
1608 if (*last_p == '\n')
1609 *last_p = 0; // if we stopped on newline, kill it
1610 else
1611 {
1612 /*
1613 If we truncated a line or stopped on last char, remove all chars
1614 up to and including newline.
1615 */
1616 int c;
1617 while (((c=my_b_get(f)) != '\n' && c != my_b_EOF)) ;
1618 }
1619 DBUG_RETURN(0);
1620 }
1621 else if (default_val)
1622 {
1623 strmake(var, default_val, max_size-1);
1624 DBUG_RETURN(0);
1625 }
1626 DBUG_RETURN(1);
1627}
1628
1629/*
1630 when moving these functions to mysys, don't forget to
1631 remove slave.cc from libmysqld/CMakeLists.txt
1632*/
1633int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
1634{
1635 char buf[32];
1636 DBUG_ENTER("init_intvar_from_file");
1637
1638
1639 if (my_b_gets(f, buf, sizeof(buf)))
1640 {
1641 *var = atoi(buf);
1642 DBUG_RETURN(0);
1643 }
1644 else if (default_val)
1645 {
1646 *var = default_val;
1647 DBUG_RETURN(0);
1648 }
1649 DBUG_RETURN(1);
1650}
1651
1652int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val)
1653{
1654 char buf[16];
1655 DBUG_ENTER("init_floatvar_from_file");
1656
1657
1658 if (my_b_gets(f, buf, sizeof(buf)))
1659 {
1660 if (sscanf(buf, "%f", var) != 1)
1661 DBUG_RETURN(1);
1662 else
1663 DBUG_RETURN(0);
1664 }
1665 else if (default_val != 0.0)
1666 {
1667 *var = default_val;
1668 DBUG_RETURN(0);
1669 }
1670 DBUG_RETURN(1);
1671}
1672
1673
1674/**
1675 A master info read method
1676
1677 This function is called from @c init_master_info() along with
1678 relatives to restore some of @c active_mi members.
1679 Particularly, this function is responsible for restoring
1680 IGNORE_SERVER_IDS list of servers whose events the slave is
1681 going to ignore (to not log them in the relay log).
1682 Items being read are supposed to be decimal output of values of a
1683 type shorter or equal of @c long and separated by the single space.
1684 It also used to restore DO_DOMAIN_IDS & IGNORE_DOMAIN_IDS lists.
1685
1686 @param arr @c DYNAMIC_ARRAY pointer to storage for servers id
1687 @param f @c IO_CACHE pointer to the source file
1688
1689 @retval 0 All OK
1690 @retval non-zero An error
1691*/
1692
1693int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f)
1694{
1695 int ret= 0;
1696 char buf[16 * (sizeof(long)*4 + 1)]; // static buffer to use most of times
1697 char *buf_act= buf; // actual buffer can be dynamic if static is short
1698 char *token, *last;
1699 uint num_items; // number of items of `arr'
1700 size_t read_size;
1701 DBUG_ENTER("init_dynarray_intvar_from_file");
1702
1703 if ((read_size= my_b_gets(f, buf_act, sizeof(buf))) == 0)
1704 {
1705 DBUG_RETURN(0); // no line in master.info
1706 }
1707 if (read_size + 1 == sizeof(buf) && buf[sizeof(buf) - 2] != '\n')
1708 {
1709 /*
1710 short read happend; allocate sufficient memory and make the 2nd read
1711 */
1712 char buf_work[(sizeof(long)*3 + 1)*16];
1713 memcpy(buf_work, buf, sizeof(buf_work));
1714 num_items= atoi(strtok_r(buf_work, " ", &last));
1715 size_t snd_size;
1716 /*
1717 max size lower bound approximate estimation bases on the formula:
1718 (the items number + items themselves) *
1719 (decimal size + space) - 1 + `\n' + '\0'
1720 */
1721 size_t max_size= (1 + num_items) * (sizeof(long)*3 + 1) + 1;
1722 buf_act= (char*) my_malloc(max_size, MYF(MY_WME));
1723 memcpy(buf_act, buf, read_size);
1724 snd_size= my_b_gets(f, buf_act + read_size, max_size - read_size);
1725 if (snd_size == 0 ||
1726 ((snd_size + 1 == max_size - read_size) && buf_act[max_size - 2] != '\n'))
1727 {
1728 /*
1729 failure to make the 2nd read or short read again
1730 */
1731 ret= 1;
1732 goto err;
1733 }
1734 }
1735 token= strtok_r(buf_act, " ", &last);
1736 if (token == NULL)
1737 {
1738 ret= 1;
1739 goto err;
1740 }
1741 num_items= atoi(token);
1742 for (uint i=0; i < num_items; i++)
1743 {
1744 token= strtok_r(NULL, " ", &last);
1745 if (token == NULL)
1746 {
1747 ret= 1;
1748 goto err;
1749 }
1750 else
1751 {
1752 ulong val= atol(token);
1753 insert_dynamic(arr, (uchar *) &val);
1754 }
1755 }
1756err:
1757 if (buf_act != buf)
1758 my_free(buf_act);
1759 DBUG_RETURN(ret);
1760}
1761
1762#ifdef HAVE_REPLICATION
1763
1764/*
1765 Check if the error is caused by network.
1766 @param[in] errorno Number of the error.
1767 RETURNS:
1768 TRUE network error
1769 FALSE not network error
1770*/
1771
1772bool is_network_error(uint errorno)
1773{
1774 if (errorno == CR_CONNECTION_ERROR ||
1775 errorno == CR_CONN_HOST_ERROR ||
1776 errorno == CR_SERVER_GONE_ERROR ||
1777 errorno == CR_SERVER_LOST ||
1778 errorno == ER_CON_COUNT_ERROR ||
1779 errorno == ER_CONNECTION_KILLED ||
1780 errorno == ER_NEW_ABORTING_CONNECTION ||
1781 errorno == ER_NET_READ_INTERRUPTED ||
1782 errorno == ER_SERVER_SHUTDOWN)
1783 return TRUE;
1784#ifdef WITH_WSREP
1785 if (errorno == ER_UNKNOWN_COM_ERROR)
1786 return TRUE;
1787#endif
1788
1789 return FALSE;
1790}
1791
1792
1793/*
1794 Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
1795 relying on the binlog's version. This is not perfect: imagine an upgrade
1796 of the master without waiting that all slaves are in sync with the master;
1797 then a slave could be fooled about the binlog's format. This is what happens
1798 when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
1799 slaves are fooled. So we do this only to distinguish between 3.23 and more
1800 recent masters (it's too late to change things for 3.23).
1801
1802 RETURNS
1803 0 ok
1804 1 error
1805 2 transient network problem, the caller should try to reconnect
1806*/
1807
1808static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
1809{
1810 char err_buff[MAX_SLAVE_ERRMSG], err_buff2[MAX_SLAVE_ERRMSG];
1811 const char* errmsg= 0;
1812 int err_code= 0;
1813 MYSQL_RES *master_res= 0;
1814 MYSQL_ROW master_row;
1815 uint version= mysql_get_server_version(mysql) / 10000;
1816 DBUG_ENTER("get_master_version_and_clock");
1817
1818 /*
1819 Free old description_event_for_queue (that is needed if we are in
1820 a reconnection).
1821 */
1822 delete mi->rli.relay_log.description_event_for_queue;
1823 mi->rli.relay_log.description_event_for_queue= 0;
1824
1825 if (!my_isdigit(&my_charset_bin,*mysql->server_version))
1826 {
1827 errmsg= err_buff2;
1828 snprintf(err_buff2, sizeof(err_buff2),
1829 "Master reported unrecognized MySQL version: %s",
1830 mysql->server_version);
1831 err_code= ER_SLAVE_FATAL_ERROR;
1832 sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
1833 }
1834 else
1835 {
1836 /*
1837 Note the following switch will bug when we have MySQL branch 30 ;)
1838 */
1839 switch (version) {
1840 case 0:
1841 case 1:
1842 case 2:
1843 errmsg= err_buff2;
1844 snprintf(err_buff2, sizeof(err_buff2),
1845 "Master reported unrecognized MySQL version: %s",
1846 mysql->server_version);
1847 err_code= ER_SLAVE_FATAL_ERROR;
1848 sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
1849 break;
1850 case 3:
1851 mi->rli.relay_log.description_event_for_queue= new
1852 Format_description_log_event(1, mysql->server_version);
1853 break;
1854 case 4:
1855 mi->rli.relay_log.description_event_for_queue= new
1856 Format_description_log_event(3, mysql->server_version);
1857 break;
1858 default:
1859 /*
1860 Master is MySQL >=5.0. Give a default Format_desc event, so that we can
1861 take the early steps (like tests for "is this a 3.23 master") which we
1862 have to take before we receive the real master's Format_desc which will
1863 override this one. Note that the Format_desc we create below is garbage
1864 (it has the format of the *slave*); it's only good to help know if the
1865 master is 3.23, 4.0, etc.
1866 */
1867 mi->rli.relay_log.description_event_for_queue= new
1868 Format_description_log_event(4, mysql->server_version);
1869 break;
1870 }
1871 }
1872
1873 /*
1874 This does not mean that a 5.0 slave will be able to read a 6.0 master; but
1875 as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
1876 can't read a 6.0 master, this will show up when the slave can't read some
1877 events sent by the master, and there will be error messages.
1878 */
1879
1880 if (errmsg)
1881 goto err;
1882
1883 /* as we are here, we tried to allocate the event */
1884 if (!mi->rli.relay_log.description_event_for_queue)
1885 {
1886 errmsg= "default Format_description_log_event";
1887 err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
1888 sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
1889 goto err;
1890 }
1891
1892 /*
1893 FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
1894 It's necessary to adjust FD_q.(A) at this point because in the following
1895 course FD_q is going to be dumped to RL.
1896 Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m)
1897 in queue_event and the master's (A) is installed.
1898 At one step with the assignment the Relay-Log's checksum alg is set to
1899 a new value: RL.(A) := FD_q.(A). If the slave service is stopped
1900 the last time assigned RL.(A) will be passed over to the restarting
1901 service (to the current execution point).
1902 RL.A is a "codec" to verify checksum in queue_event() almost all the time
1903 the first fake Rotate event.
1904 Starting from this point IO thread will executes the following checksum
1905 warmup sequence of actions:
1906
1907 FD_q.A := RL.A,
1908 A_m^0 := master.@@global.binlog_checksum,
1909 {queue_event(R_f): verifies(R_f, A_m^0)},
1910 {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
1911 FD_q := FD_m, RL.A := FD_q.A)}
1912
1913 See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
1914 docs lines (binlog.h).
1915 In above A_m^0 - the value of master's
1916 @@binlog_checksum determined in the upcoming handshake (stored in
1917 mi->checksum_alg_before_fd).
1918
1919
1920 After the warm-up sequence IO gets to "normal" checksum verification mode
1921 to use RL.A in
1922
1923 {queue_event(E_m): verifies(E_m, RL.A)}
1924
1925 until it has received a new FD_m.
1926 */
1927 mi->rli.relay_log.description_event_for_queue->checksum_alg=
1928 mi->rli.relay_log.relay_log_checksum_alg;
1929
1930 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg !=
1931 BINLOG_CHECKSUM_ALG_UNDEF);
1932 DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
1933 BINLOG_CHECKSUM_ALG_UNDEF);
1934 /*
1935 Compare the master and slave's clock. Do not die if master's clock is
1936 unavailable (very old master not supporting UNIX_TIMESTAMP()?).
1937 */
1938
1939#ifdef ENABLED_DEBUG_SYNC
1940 DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
1941 {
1942 const char act[]=
1943 "now "
1944 "wait_for signal.get_unix_timestamp";
1945 DBUG_ASSERT(debug_sync_service);
1946 DBUG_ASSERT(!debug_sync_set_action(current_thd,
1947 STRING_WITH_LEN(act)));
1948 };);
1949#endif
1950
1951 master_res= NULL;
1952 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
1953 (master_res= mysql_store_result(mysql)) &&
1954 (master_row= mysql_fetch_row(master_res)))
1955 {
1956 mysql_mutex_lock(&mi->data_lock);
1957 mi->clock_diff_with_master=
1958 (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
1959 mysql_mutex_unlock(&mi->data_lock);
1960 }
1961 else if (check_io_slave_killed(mi, NULL))
1962 goto slave_killed_err;
1963 else if (is_network_error(mysql_errno(mysql)))
1964 {
1965 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
1966 "Get master clock failed with error: %s", mysql_error(mysql));
1967 goto network_err;
1968 }
1969 else
1970 {
1971 mysql_mutex_lock(&mi->data_lock);
1972 mi->clock_diff_with_master= 0; /* The "most sensible" value */
1973 mysql_mutex_unlock(&mi->data_lock);
1974 sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
1975 "do not trust column Seconds_Behind_Master of SHOW "
1976 "SLAVE STATUS. Error: %s (%d)",
1977 mysql_error(mysql), mysql_errno(mysql));
1978 }
1979 if (master_res)
1980 {
1981 mysql_free_result(master_res);
1982 master_res= NULL;
1983 }
1984
1985 /*
1986 Check that the master's server id and ours are different. Because if they
1987 are equal (which can result from a simple copy of master's datadir to slave,
1988 thus copying some my.cnf), replication will work but all events will be
1989 skipped.
1990 Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
1991 master?).
1992 Note: we could have put a @@SERVER_ID in the previous SELECT
1993 UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
1994 */
1995#ifdef ENABLED_DEBUG_SYNC
1996 DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
1997 {
1998 const char act[]=
1999 "now "
2000 "wait_for signal.get_server_id";
2001 DBUG_ASSERT(debug_sync_service);
2002 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2003 STRING_WITH_LEN(act)));
2004 };);
2005#endif
2006 master_res= NULL;
2007 master_row= NULL;
2008 if (!mysql_real_query(mysql,
2009 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
2010 (master_res= mysql_store_result(mysql)) &&
2011 (master_row= mysql_fetch_row(master_res)))
2012 {
2013 if ((global_system_variables.server_id ==
2014 (mi->master_id= strtoul(master_row[1], 0, 10))) &&
2015 !mi->rli.replicate_same_server_id)
2016 {
2017 errmsg= "The slave I/O thread stops because master and slave have equal \
2018MySQL server ids; these ids must be different for replication to work (or \
2019the --replicate-same-server-id option must be used on slave but this does \
2020not always make sense; please check the manual before using it).";
2021 err_code= ER_SLAVE_FATAL_ERROR;
2022 sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2023 goto err;
2024 }
2025 }
2026 else if (mysql_errno(mysql))
2027 {
2028 if (check_io_slave_killed(mi, NULL))
2029 goto slave_killed_err;
2030 else if (is_network_error(mysql_errno(mysql)))
2031 {
2032 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2033 "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
2034 goto network_err;
2035 }
2036 /* Fatal error */
2037 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2038when it try to get the value of SERVER_ID variable from master.";
2039 err_code= mysql_errno(mysql);
2040 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2041 goto err;
2042 }
2043 else if (!master_row && master_res)
2044 {
2045 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
2046 "Unknown system variable 'SERVER_ID' on master, \
2047maybe it is a *VERY OLD MASTER*.");
2048 }
2049 if (master_res)
2050 {
2051 mysql_free_result(master_res);
2052 master_res= NULL;
2053 }
2054 if (mi->master_id == 0 && mi->ignore_server_ids.elements > 0)
2055 {
2056 errmsg= "Slave configured with server id filtering could not detect the master server id.";
2057 err_code= ER_SLAVE_FATAL_ERROR;
2058 sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2059 goto err;
2060 }
2061
2062 /*
2063 Check that the master's global character_set_server and ours are the same.
2064 Not fatal if query fails (old master?).
2065 Note that we don't check for equality of global character_set_client and
2066 collation_connection (neither do we prevent their setting in
2067 set_var.cc). That's because from what I (Guilhem) have tested, the global
2068 values of these 2 are never used (new connections don't use them).
2069 We don't test equality of global collation_database either as it's is
2070 going to be deprecated (made read-only) in 4.1 very soon.
2071 The test is only relevant if master < 5.0.3 (we'll test only if it's older
2072 than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
2073 charset info in each binlog event.
2074 We don't do it for 3.23 because masters <3.23.50 hang on
2075 SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
2076 test only if master is 4.x.
2077 */
2078
2079 /* redundant with rest of code but safer against later additions */
2080 if (version == 3)
2081 goto err;
2082
2083 if (version == 4)
2084 {
2085 master_res= NULL;
2086 if (!mysql_real_query(mysql,
2087 STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
2088 (master_res= mysql_store_result(mysql)) &&
2089 (master_row= mysql_fetch_row(master_res)))
2090 {
2091 if (strcmp(master_row[0], global_system_variables.collation_server->name))
2092 {
2093 errmsg= "The slave I/O thread stops because master and slave have \
2094different values for the COLLATION_SERVER global variable. The values must \
2095be equal for the Statement-format replication to work";
2096 err_code= ER_SLAVE_FATAL_ERROR;
2097 sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2098 goto err;
2099 }
2100 }
2101 else if (check_io_slave_killed(mi, NULL))
2102 goto slave_killed_err;
2103 else if (is_network_error(mysql_errno(mysql)))
2104 {
2105 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2106 "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
2107 goto network_err;
2108 }
2109 else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
2110 {
2111 /* Fatal error */
2112 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2113when it try to get the value of COLLATION_SERVER global variable from master.";
2114 err_code= mysql_errno(mysql);
2115 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2116 goto err;
2117 }
2118 else
2119 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
2120 "Unknown system variable 'COLLATION_SERVER' on master, \
2121maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
2122inconsistency if replicated data deals with collation.");
2123
2124 if (master_res)
2125 {
2126 mysql_free_result(master_res);
2127 master_res= NULL;
2128 }
2129 }
2130
2131 /*
2132 Perform analogous check for time zone. Theoretically we also should
2133 perform check here to verify that SYSTEM time zones are the same on
2134 slave and master, but we can't rely on value of @@system_time_zone
2135 variable (it is time zone abbreviation) since it determined at start
2136 time and so could differ for slave and master even if they are really
2137 in the same system time zone. So we are omiting this check and just
2138 relying on documentation. Also according to Monty there are many users
2139 who are using replication between servers in various time zones. Hence
2140 such check will broke everything for them. (And now everything will
2141 work for them because by default both their master and slave will have
2142 'SYSTEM' time zone).
2143 This check is only necessary for 4.x masters (and < 5.0.4 masters but
2144 those were alpha).
2145 */
2146 if (version == 4)
2147 {
2148 master_res= NULL;
2149 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
2150 (master_res= mysql_store_result(mysql)) &&
2151 (master_row= mysql_fetch_row(master_res)))
2152 {
2153 if (strcmp(master_row[0],
2154 global_system_variables.time_zone->get_name()->ptr()))
2155 {
2156 errmsg= "The slave I/O thread stops because master and slave have \
2157different values for the TIME_ZONE global variable. The values must \
2158be equal for the Statement-format replication to work";
2159 err_code= ER_SLAVE_FATAL_ERROR;
2160 sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2161 goto err;
2162 }
2163 }
2164 else if (check_io_slave_killed(mi, NULL))
2165 goto slave_killed_err;
2166 else if (is_network_error(err_code= mysql_errno(mysql)))
2167 {
2168 mi->report(ERROR_LEVEL, err_code, NULL,
2169 "Get master TIME_ZONE failed with error: %s",
2170 mysql_error(mysql));
2171 goto network_err;
2172 }
2173 else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
2174 {
2175 /* We use ERROR_LEVEL to get the error logged to file */
2176 mi->report(ERROR_LEVEL, err_code, NULL,
2177
2178 "MySQL master doesn't have a TIME_ZONE variable. Note that"
2179 "if your timezone is not same between master and slave, your "
2180 "slave may get wrong data into timestamp columns");
2181 }
2182 else
2183 {
2184 /* Fatal error */
2185 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2186when it try to get the value of TIME_ZONE global variable from master.";
2187 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2188 goto err;
2189 }
2190 if (master_res)
2191 {
2192 mysql_free_result(master_res);
2193 master_res= NULL;
2194 }
2195 }
2196
2197 if (mi->heartbeat_period != 0.0)
2198 {
2199 const char query_format[]= "SET @master_heartbeat_period= %llu";
2200 char query[sizeof(query_format) + 32];
2201 /*
2202 the period is an ulonglong of nano-secs.
2203 */
2204 my_snprintf(query, sizeof(query), query_format,
2205 (ulonglong) (mi->heartbeat_period*1000000000UL));
2206
2207 DBUG_EXECUTE_IF("simulate_slave_heartbeat_network_error",
2208 { static ulong dbug_count= 0;
2209 if (++dbug_count < 3)
2210 goto heartbeat_network_error;
2211 });
2212 if (mysql_real_query(mysql, query, (ulong)strlen(query)))
2213 {
2214 if (check_io_slave_killed(mi, NULL))
2215 goto slave_killed_err;
2216
2217 if (is_network_error(mysql_errno(mysql)))
2218 {
2219 IF_DBUG(heartbeat_network_error: , )
2220 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2221 "SET @master_heartbeat_period to master failed with error: %s",
2222 mysql_error(mysql));
2223 mysql_free_result(mysql_store_result(mysql));
2224 goto network_err;
2225 }
2226 else
2227 {
2228 /* Fatal error */
2229 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2230 "when it tries to SET @master_heartbeat_period on master.";
2231 err_code= ER_SLAVE_FATAL_ERROR;
2232 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2233 mysql_free_result(mysql_store_result(mysql));
2234 goto err;
2235 }
2236 }
2237 mysql_free_result(mysql_store_result(mysql));
2238 }
2239
2240 /*
2241 Querying if master is capable to checksum and notifying it about own
2242 CRC-awareness. The master's side instant value of @@global.binlog_checksum
2243 is stored in the dump thread's uservar area as well as cached locally
2244 to become known in consensus by master and slave.
2245 */
2246 DBUG_EXECUTE_IF("simulate_slave_unaware_checksum",
2247 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2248 goto past_checksum;);
2249 {
2250 int rc;
2251 const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
2252 master_res= NULL;
2253 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
2254 /*
2255 @c checksum_alg_before_fd is queried from master in this block.
2256 If master is old checksum-unaware the value stays undefined.
2257 Once the first FD will be received its alg descriptor will replace
2258 the being queried one.
2259 */
2260 rc= mysql_real_query(mysql, query,(ulong)strlen(query));
2261 if (rc != 0)
2262 {
2263 if (check_io_slave_killed(mi, NULL))
2264 goto slave_killed_err;
2265
2266 if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
2267 {
2268 /* Ignore this expected error if not a high error level */
2269 if (global_system_variables.log_warnings > 1)
2270 {
2271 // this is tolerable as OM -> NS is supported
2272 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2273 "Notifying master by %s failed with "
2274 "error: %s", query, mysql_error(mysql));
2275 }
2276 }
2277 else
2278 {
2279 if (is_network_error(mysql_errno(mysql)))
2280 {
2281 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2282 "Notifying master by %s failed with "
2283 "error: %s", query, mysql_error(mysql));
2284 mysql_free_result(mysql_store_result(mysql));
2285 goto network_err;
2286 }
2287 else
2288 {
2289 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2290 "when it tried to SET @master_binlog_checksum on master.";
2291 err_code= ER_SLAVE_FATAL_ERROR;
2292 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2293 mysql_free_result(mysql_store_result(mysql));
2294 goto err;
2295 }
2296 }
2297 }
2298 else
2299 {
2300 mysql_free_result(mysql_store_result(mysql));
2301 if (!mysql_real_query(mysql,
2302 STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
2303 (master_res= mysql_store_result(mysql)) &&
2304 (master_row= mysql_fetch_row(master_res)) &&
2305 (master_row[0] != NULL))
2306 {
2307 mi->checksum_alg_before_fd= (enum_binlog_checksum_alg)
2308 (find_type(master_row[0], &binlog_checksum_typelib, 1) - 1);
2309 // valid outcome is either of
2310 DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
2311 mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
2312 }
2313 else if (check_io_slave_killed(mi, NULL))
2314 goto slave_killed_err;
2315 else if (is_network_error(mysql_errno(mysql)))
2316 {
2317 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2318 "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
2319 goto network_err;
2320 }
2321 else
2322 {
2323 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2324 "when it tried to SELECT @master_binlog_checksum.";
2325 err_code= ER_SLAVE_FATAL_ERROR;
2326 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2327 mysql_free_result(mysql_store_result(mysql));
2328 goto err;
2329 }
2330 }
2331 if (master_res)
2332 {
2333 mysql_free_result(master_res);
2334 master_res= NULL;
2335 }
2336 }
2337
2338#ifndef DBUG_OFF
2339past_checksum:
2340#endif
2341
2342 /*
2343 Request the master to filter away events with the @@skip_replication flag
2344 set, if we are running with
2345 --replicate-events-marked-for-skip=FILTER_ON_MASTER.
2346 */
2347 if (opt_replicate_events_marked_for_skip == RPL_SKIP_FILTER_ON_MASTER)
2348 {
2349 if (unlikely(mysql_real_query(mysql,
2350 STRING_WITH_LEN("SET skip_replication=1"))))
2351 {
2352 err_code= mysql_errno(mysql);
2353 if (is_network_error(err_code))
2354 {
2355 mi->report(ERROR_LEVEL, err_code, NULL,
2356 "Setting master-side filtering of @@skip_replication failed "
2357 "with error: %s", mysql_error(mysql));
2358 goto network_err;
2359 }
2360 else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
2361 {
2362 /*
2363 The master is older than the slave and does not support the
2364 @@skip_replication feature.
2365 This is not a problem, as such master will not generate events with
2366 the @@skip_replication flag set in the first place. We will still
2367 do slave-side filtering of such events though, to handle the (rare)
2368 case of downgrading a master and receiving old events generated from
2369 before the downgrade with the @@skip_replication flag set.
2370 */
2371 DBUG_PRINT("info", ("Old master does not support master-side filtering "
2372 "of @@skip_replication events."));
2373 }
2374 else
2375 {
2376 /* Fatal error */
2377 errmsg= "The slave I/O thread stops because a fatal error is "
2378 "encountered when it tries to request filtering of events marked "
2379 "with the @@skip_replication flag.";
2380 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2381 goto err;
2382 }
2383 }
2384 }
2385
2386 /* Announce MariaDB slave capabilities. */
2387 DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;);
2388 {
2389 int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
2390 mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
2391 STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))),
2392 mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
2393 STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE))));
2394 if (unlikely(rc))
2395 {
2396 err_code= mysql_errno(mysql);
2397 if (is_network_error(err_code))
2398 {
2399 mi->report(ERROR_LEVEL, err_code, NULL,
2400 "Setting @mariadb_slave_capability failed with error: %s",
2401 mysql_error(mysql));
2402 goto network_err;
2403 }
2404 else
2405 {
2406 /* Fatal error */
2407 errmsg= "The slave I/O thread stops because a fatal error is "
2408 "encountered when it tries to set @mariadb_slave_capability.";
2409 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2410 goto err;
2411 }
2412 }
2413 }
2414#ifndef DBUG_OFF
2415after_set_capability:
2416#endif
2417
2418 if (mi->using_gtid != Master_info::USE_GTID_NO)
2419 {
2420 /* Request dump to start from slave replication GTID state. */
2421 int rc;
2422 char str_buf[256];
2423 String query_str(str_buf, sizeof(str_buf), system_charset_info);
2424 query_str.length(0);
2425
2426 /*
2427 Read the master @@GLOBAL.gtid_domain_id variable.
2428 This is mostly to check that master is GTID aware, but we could later
2429 perhaps use it to check that different multi-source masters are correctly
2430 configured with distinct domain_id.
2431 */
2432 if (mysql_real_query(mysql,
2433 STRING_WITH_LEN("SELECT @@GLOBAL.gtid_domain_id")) ||
2434 !(master_res= mysql_store_result(mysql)) ||
2435 !(master_row= mysql_fetch_row(master_res)))
2436 {
2437 err_code= mysql_errno(mysql);
2438 if (is_network_error(err_code))
2439 {
2440 mi->report(ERROR_LEVEL, err_code, NULL,
2441 "Get master @@GLOBAL.gtid_domain_id failed with error: %s",
2442 mysql_error(mysql));
2443 goto network_err;
2444 }
2445 else
2446 {
2447 errmsg= "The slave I/O thread stops because master does not support "
2448 "MariaDB global transaction id. A fatal error is encountered when "
2449 "it tries to SELECT @@GLOBAL.gtid_domain_id.";
2450 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2451 goto err;
2452 }
2453 }
2454 mysql_free_result(master_res);
2455 master_res= NULL;
2456
2457 query_str.append(STRING_WITH_LEN("SET @slave_connect_state='"),
2458 system_charset_info);
2459 if (mi->gtid_current_pos.append_to_string(&query_str))
2460 {
2461 err_code= ER_OUTOFMEMORY;
2462 errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2463 "error is encountered when it tries to compute @slave_connect_state.";
2464 sprintf(err_buff, "%s Error: Out of memory", errmsg);
2465 goto err;
2466 }
2467 query_str.append(STRING_WITH_LEN("'"), system_charset_info);
2468
2469 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2470 if (unlikely(rc))
2471 {
2472 err_code= mysql_errno(mysql);
2473 if (is_network_error(err_code))
2474 {
2475 mi->report(ERROR_LEVEL, err_code, NULL,
2476 "Setting @slave_connect_state failed with error: %s",
2477 mysql_error(mysql));
2478 goto network_err;
2479 }
2480 else
2481 {
2482 /* Fatal error */
2483 errmsg= "The slave I/O thread stops because a fatal error is "
2484 "encountered when it tries to set @slave_connect_state.";
2485 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2486 goto err;
2487 }
2488 }
2489
2490 query_str.length(0);
2491 if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_strict_mode="),
2492 system_charset_info) ||
2493 query_str.append_ulonglong(opt_gtid_strict_mode != false))
2494 {
2495 err_code= ER_OUTOFMEMORY;
2496 errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2497 "error is encountered when it tries to set @slave_gtid_strict_mode.";
2498 sprintf(err_buff, "%s Error: Out of memory", errmsg);
2499 goto err;
2500 }
2501
2502 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2503 if (unlikely(rc))
2504 {
2505 err_code= mysql_errno(mysql);
2506 if (is_network_error(err_code))
2507 {
2508 mi->report(ERROR_LEVEL, err_code, NULL,
2509 "Setting @slave_gtid_strict_mode failed with error: %s",
2510 mysql_error(mysql));
2511 goto network_err;
2512 }
2513 else
2514 {
2515 /* Fatal error */
2516 errmsg= "The slave I/O thread stops because a fatal error is "
2517 "encountered when it tries to set @slave_gtid_strict_mode.";
2518 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2519 goto err;
2520 }
2521 }
2522
2523 query_str.length(0);
2524 if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_ignore_duplicates="),
2525 system_charset_info) ||
2526 query_str.append_ulonglong(opt_gtid_ignore_duplicates != false))
2527 {
2528 err_code= ER_OUTOFMEMORY;
2529 errmsg= "The slave I/O thread stops because a fatal out-of-memory error "
2530 "is encountered when it tries to set @slave_gtid_ignore_duplicates.";
2531 sprintf(err_buff, "%s Error: Out of memory", errmsg);
2532 goto err;
2533 }
2534
2535 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2536 if (unlikely(rc))
2537 {
2538 err_code= mysql_errno(mysql);
2539 if (is_network_error(err_code))
2540 {
2541 mi->report(ERROR_LEVEL, err_code, NULL,
2542 "Setting @slave_gtid_ignore_duplicates failed with "
2543 "error: %s", mysql_error(mysql));
2544 goto network_err;
2545 }
2546 else
2547 {
2548 /* Fatal error */
2549 errmsg= "The slave I/O thread stops because a fatal error is "
2550 "encountered when it tries to set @slave_gtid_ignore_duplicates.";
2551 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2552 goto err;
2553 }
2554 }
2555
2556 if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID)
2557 {
2558 query_str.length(0);
2559 query_str.append(STRING_WITH_LEN("SET @slave_until_gtid='"),
2560 system_charset_info);
2561 if (mi->rli.until_gtid_pos.append_to_string(&query_str))
2562 {
2563 err_code= ER_OUTOFMEMORY;
2564 errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2565 "error is encountered when it tries to compute @slave_until_gtid.";
2566 sprintf(err_buff, "%s Error: Out of memory", errmsg);
2567 goto err;
2568 }
2569 query_str.append(STRING_WITH_LEN("'"), system_charset_info);
2570
2571 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2572 if (unlikely(rc))
2573 {
2574 err_code= mysql_errno(mysql);
2575 if (is_network_error(err_code))
2576 {
2577 mi->report(ERROR_LEVEL, err_code, NULL,
2578 "Setting @slave_until_gtid failed with error: %s",
2579 mysql_error(mysql));
2580 goto network_err;
2581 }
2582 else
2583 {
2584 /* Fatal error */
2585 errmsg= "The slave I/O thread stops because a fatal error is "
2586 "encountered when it tries to set @slave_until_gtid.";
2587 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2588 goto err;
2589 }
2590 }
2591 }
2592 }
2593 else
2594 {
2595 /*
2596 If we are not using GTID to connect this time, then instead request
2597 the corresponding GTID position from the master, so that the user
2598 can reconnect the next time using MASTER_GTID_POS=AUTO.
2599 */
2600 char quote_buf[2*sizeof(mi->master_log_name)+1];
2601 char str_buf[28+2*sizeof(mi->master_log_name)+10];
2602 String query(str_buf, sizeof(str_buf), system_charset_info);
2603 query.length(0);
2604
2605 query.append("SELECT binlog_gtid_pos('");
2606 escape_quotes_for_mysql(&my_charset_bin, quote_buf, sizeof(quote_buf),
2607 mi->master_log_name, strlen(mi->master_log_name));
2608 query.append(quote_buf);
2609 query.append("',");
2610 query.append_ulonglong(mi->master_log_pos);
2611 query.append(")");
2612
2613 if (!mysql_real_query(mysql, query.c_ptr_safe(), query.length()) &&
2614 (master_res= mysql_store_result(mysql)) &&
2615 (master_row= mysql_fetch_row(master_res)) &&
2616 (master_row[0] != NULL))
2617 {
2618 rpl_global_gtid_slave_state->load(mi->io_thd, master_row[0],
2619 strlen(master_row[0]), false, false);
2620 }
2621 else if (check_io_slave_killed(mi, NULL))
2622 goto slave_killed_err;
2623 else if (is_network_error(mysql_errno(mysql)))
2624 {
2625 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2626 "Get master GTID position failed with error: %s", mysql_error(mysql));
2627 goto network_err;
2628 }
2629 else
2630 {
2631 /*
2632 ToDo: If the master does not have the binlog_gtid_pos() function, it
2633 just means that it is an old master with no GTID support, so we should
2634 do nothing.
2635
2636 However, if binlog_gtid_pos() exists, but fails or returns NULL, then
2637 it means that the requested position is not valid. We could use this
2638 to catch attempts to replicate from within the middle of an event,
2639 avoiding strange failures or possible corruption.
2640 */
2641 }
2642 if (master_res)
2643 {
2644 mysql_free_result(master_res);
2645 master_res= NULL;
2646 }
2647 }
2648
2649err:
2650 if (errmsg)
2651 {
2652 if (master_res)
2653 mysql_free_result(master_res);
2654 DBUG_ASSERT(err_code != 0);
2655 mi->report(ERROR_LEVEL, err_code, NULL, "%s", err_buff);
2656 DBUG_RETURN(1);
2657 }
2658
2659 DBUG_RETURN(0);
2660
2661network_err:
2662 if (master_res)
2663 mysql_free_result(master_res);
2664 DBUG_RETURN(2);
2665
2666slave_killed_err:
2667 if (master_res)
2668 mysql_free_result(master_res);
2669 DBUG_RETURN(2);
2670}
2671
2672
2673static bool wait_for_relay_log_space(Relay_log_info* rli)
2674{
2675 bool slave_killed=0;
2676 bool ignore_log_space_limit;
2677 Master_info* mi = rli->mi;
2678 PSI_stage_info old_stage;
2679 THD* thd = mi->io_thd;
2680 DBUG_ENTER("wait_for_relay_log_space");
2681
2682 mysql_mutex_lock(&rli->log_space_lock);
2683 thd->ENTER_COND(&rli->log_space_cond,
2684 &rli->log_space_lock,
2685 &stage_waiting_for_relay_log_space,
2686 &old_stage);
2687 while (rli->log_space_limit < rli->log_space_total &&
2688 !(slave_killed=io_slave_killed(mi)) &&
2689 !rli->ignore_log_space_limit)
2690 mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
2691
2692 ignore_log_space_limit= rli->ignore_log_space_limit;
2693 rli->ignore_log_space_limit= 0;
2694
2695 thd->EXIT_COND(&old_stage);
2696
2697 /*
2698 Makes the IO thread read only one event at a time
2699 until the SQL thread is able to purge the relay
2700 logs, freeing some space.
2701
2702 Therefore, once the SQL thread processes this next
2703 event, it goes to sleep (no more events in the queue),
2704 sets ignore_log_space_limit=true and wakes the IO thread.
2705 However, this event may have been enough already for
2706 the SQL thread to purge some log files, freeing
2707 rli->log_space_total .
2708
2709 This guarantees that the SQL and IO thread move
2710 forward only one event at a time (to avoid deadlocks),
2711 when the relay space limit is reached. It also
2712 guarantees that when the SQL thread is prepared to
2713 rotate (to be able to purge some logs), the IO thread
2714 will know about it and will rotate.
2715
2716 NOTE: The ignore_log_space_limit is only set when the SQL
2717 thread sleeps waiting for events.
2718
2719 */
2720
2721 if (ignore_log_space_limit)
2722 {
2723#ifndef DBUG_OFF
2724 {
2725 DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
2726 "ignore_log_space_limit=%d "
2727 "sql_force_rotate_relay=%d",
2728 rli->log_space_limit, rli->log_space_total,
2729 (int) rli->ignore_log_space_limit,
2730 (int) rli->sql_force_rotate_relay));
2731 }
2732#endif
2733 if (rli->sql_force_rotate_relay)
2734 {
2735 mysql_mutex_lock(&mi->data_lock);
2736 rotate_relay_log(rli->mi);
2737 mysql_mutex_unlock(&mi->data_lock);
2738 rli->sql_force_rotate_relay= false;
2739 }
2740 }
2741
2742 DBUG_RETURN(slave_killed);
2743}
2744
2745
2746/*
2747 Builds a Rotate from the ignored events' info and writes it to relay log.
2748
2749 SYNOPSIS
2750 write_ignored_events_info_to_relay_log()
2751 thd pointer to I/O thread's thd
2752 mi
2753
2754 DESCRIPTION
2755 Slave I/O thread, going to die, must leave a durable trace of the
2756 ignored events' end position for the use of the slave SQL thread, by
2757 calling this function. Only that thread can call it (see assertion).
2758 */
2759static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
2760{
2761 Relay_log_info *rli= &mi->rli;
2762 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
2763 DBUG_ENTER("write_ignored_events_info_to_relay_log");
2764
2765 DBUG_ASSERT(thd == mi->io_thd);
2766 mysql_mutex_lock(log_lock);
2767 if (rli->ign_master_log_name_end[0] || rli->ign_gtids.count())
2768 {
2769 Rotate_log_event *rev= NULL;
2770 Gtid_list_log_event *glev= NULL;
2771 if (rli->ign_master_log_name_end[0])
2772 {
2773 rev= new Rotate_log_event(rli->ign_master_log_name_end,
2774 0, rli->ign_master_log_pos_end,
2775 Rotate_log_event::DUP_NAME);
2776 rli->ign_master_log_name_end[0]= 0;
2777 if (unlikely(!(bool)rev))
2778 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
2779 ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
2780 "Rotate_event (out of memory?),"
2781 " SHOW SLAVE STATUS may be inaccurate");
2782 }
2783 if (rli->ign_gtids.count())
2784 {
2785 DBUG_ASSERT(!rli->is_in_group()); // Ensure no active transaction
2786 glev= new Gtid_list_log_event(&rli->ign_gtids,
2787 Gtid_list_log_event::FLAG_IGN_GTIDS);
2788 rli->ign_gtids.reset();
2789 if (unlikely(!(bool)glev))
2790 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
2791 ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
2792 "Gtid_list_event (out of memory?),"
2793 " gtid_slave_pos may be inaccurate");
2794 }
2795
2796 /* Can unlock before writing as slave SQL thd will soon see our event. */
2797 mysql_mutex_unlock(log_lock);
2798 if (rev)
2799 {
2800 DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
2801 rev->server_id= 0; // don't be ignored by slave SQL thread
2802 if (unlikely(rli->relay_log.append(rev)))
2803 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
2804 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2805 "failed to write a Rotate event"
2806 " to the relay log, SHOW SLAVE STATUS may be"
2807 " inaccurate");
2808 delete rev;
2809 }
2810 if (glev)
2811 {
2812 DBUG_PRINT("info",("writing a Gtid_list event to track down ignored events"));
2813 glev->server_id= 0; // don't be ignored by slave SQL thread
2814 glev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
2815 if (unlikely(rli->relay_log.append(glev)))
2816 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
2817 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2818 "failed to write a Gtid_list event to the relay log, "
2819 "gtid_slave_pos may be inaccurate");
2820 delete glev;
2821 }
2822 if (likely (rev || glev))
2823 {
2824 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
2825 if (flush_master_info(mi, TRUE, TRUE))
2826 sql_print_error("Failed to flush master info file");
2827 }
2828 }
2829 else
2830 mysql_mutex_unlock(log_lock);
2831 DBUG_VOID_RETURN;
2832}
2833
2834
2835int register_slave_on_master(MYSQL* mysql, Master_info *mi,
2836 bool *suppress_warnings)
2837{
2838 uchar buf[1024], *pos= buf;
2839 size_t report_host_len=0, report_user_len=0, report_password_len=0;
2840 DBUG_ENTER("register_slave_on_master");
2841
2842 *suppress_warnings= FALSE;
2843 if (report_host)
2844 report_host_len= strlen(report_host);
2845 if (report_host_len > HOSTNAME_LENGTH)
2846 {
2847 sql_print_warning("The length of report_host is %zu. "
2848 "It is larger than the max length(%d), so this "
2849 "slave cannot be registered to the master.",
2850 report_host_len, HOSTNAME_LENGTH);
2851 DBUG_RETURN(0);
2852 }
2853
2854 if (report_user)
2855 report_user_len= strlen(report_user);
2856 if (report_user_len > USERNAME_LENGTH)
2857 {
2858 sql_print_warning("The length of report_user is %zu. "
2859 "It is larger than the max length(%d), so this "
2860 "slave cannot be registered to the master.",
2861 report_user_len, USERNAME_LENGTH);
2862 DBUG_RETURN(0);
2863 }
2864
2865 if (report_password)
2866 report_password_len= strlen(report_password);
2867 if (report_password_len > MAX_PASSWORD_LENGTH)
2868 {
2869 sql_print_warning("The length of report_password is %zu. "
2870 "It is larger than the max length(%d), so this "
2871 "slave cannot be registered to the master.",
2872 report_password_len, MAX_PASSWORD_LENGTH);
2873 DBUG_RETURN(0);
2874 }
2875
2876 int4store(pos, global_system_variables.server_id); pos+= 4;
2877 pos= net_store_data(pos, (uchar*) report_host, report_host_len);
2878 pos= net_store_data(pos, (uchar*) report_user, report_user_len);
2879 pos= net_store_data(pos, (uchar*) report_password, report_password_len);
2880 int2store(pos, (uint16) report_port); pos+= 2;
2881 /*
2882 Fake rpl_recovery_rank, which was removed in BUG#13963,
2883 so that this server can register itself on old servers,
2884 see BUG#49259.
2885 */
2886 int4store(pos, /* rpl_recovery_rank */ 0); pos+= 4;
2887 /* The master will fill in master_id */
2888 int4store(pos, 0); pos+= 4;
2889
2890 if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (ulong) (pos- buf), 0))
2891 {
2892 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
2893 {
2894 *suppress_warnings= TRUE; // Suppress reconnect warning
2895 }
2896 else if (!check_io_slave_killed(mi, NULL))
2897 {
2898 char buf[256];
2899 my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql),
2900 mysql_errno(mysql));
2901 mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
2902 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
2903 }
2904 DBUG_RETURN(1);
2905 }
2906 DBUG_RETURN(0);
2907}
2908
2909
2910/**
2911 Execute a SHOW SLAVE STATUS statement.
2912
2913 @param thd Pointer to THD object for the client thread executing the
2914 statement.
2915
2916 @param mi Pointer to Master_info object for the IO thread.
2917
2918 @retval FALSE success
2919 @retval TRUE failure
2920*/
2921
2922bool show_master_info(THD *thd, Master_info *mi, bool full)
2923{
2924 DBUG_ENTER("show_master_info");
2925 String gtid_pos;
2926 List<Item> field_list;
2927
2928 if (full && rpl_global_gtid_slave_state->tostring(&gtid_pos, NULL, 0))
2929 DBUG_RETURN(TRUE);
2930 show_master_info_get_fields(thd, &field_list, full, gtid_pos.length());
2931 if (thd->protocol->send_result_set_metadata(&field_list,
2932 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2933 DBUG_RETURN(TRUE);
2934 if (send_show_master_info_data(thd, mi, full, &gtid_pos))
2935 DBUG_RETURN(TRUE);
2936 my_eof(thd);
2937 DBUG_RETURN(FALSE);
2938}
2939
2940void show_master_info_get_fields(THD *thd, List<Item> *field_list,
2941 bool full, size_t gtid_pos_length)
2942{
2943 Master_info *mi;
2944 MEM_ROOT *mem_root= thd->mem_root;
2945 DBUG_ENTER("show_master_info_get_fields");
2946
2947 if (full)
2948 {
2949 field_list->push_back(new (mem_root)
2950 Item_empty_string(thd, "Connection_name",
2951 MAX_CONNECTION_NAME),
2952 mem_root);
2953 field_list->push_back(new (mem_root)
2954 Item_empty_string(thd, "Slave_SQL_State", 30),
2955 mem_root);
2956 }
2957
2958 field_list->push_back(new (mem_root)
2959 Item_empty_string(thd, "Slave_IO_State", 30),
2960 mem_root);
2961 field_list->push_back(new (mem_root)
2962 Item_empty_string(thd, "Master_Host", sizeof(mi->host)),
2963 mem_root);
2964 field_list->push_back(new (mem_root)
2965 Item_empty_string(thd, "Master_User", sizeof(mi->user)),
2966 mem_root);
2967 field_list->push_back(new (mem_root)
2968 Item_return_int(thd, "Master_Port", 7, MYSQL_TYPE_LONG),
2969 mem_root);
2970 field_list->push_back(new (mem_root)
2971 Item_return_int(thd, "Connect_Retry", 10,
2972 MYSQL_TYPE_LONG),
2973 mem_root);
2974 field_list->push_back(new (mem_root)
2975 Item_empty_string(thd, "Master_Log_File", FN_REFLEN),
2976 mem_root);
2977 field_list->push_back(new (mem_root)
2978 Item_return_int(thd, "Read_Master_Log_Pos", 10,
2979 MYSQL_TYPE_LONGLONG),
2980 mem_root);
2981 field_list->push_back(new (mem_root)
2982 Item_empty_string(thd, "Relay_Log_File", FN_REFLEN),
2983 mem_root);
2984 field_list->push_back(new (mem_root)
2985 Item_return_int(thd, "Relay_Log_Pos", 10,
2986 MYSQL_TYPE_LONGLONG),
2987 mem_root);
2988 field_list->push_back(new (mem_root)
2989 Item_empty_string(thd, "Relay_Master_Log_File",
2990 FN_REFLEN),
2991 mem_root);
2992 field_list->push_back(new (mem_root)
2993 Item_empty_string(thd, "Slave_IO_Running", 3),
2994 mem_root);
2995 field_list->push_back(new (mem_root)
2996 Item_empty_string(thd, "Slave_SQL_Running", 3),
2997 mem_root);
2998 field_list->push_back(new (mem_root)
2999 Item_empty_string(thd, "Replicate_Do_DB", 20),
3000 mem_root);
3001 field_list->push_back(new (mem_root)
3002 Item_empty_string(thd, "Replicate_Ignore_DB", 20),
3003 mem_root);
3004 field_list->push_back(new (mem_root)
3005 Item_empty_string(thd, "Replicate_Do_Table", 20),
3006 mem_root);
3007 field_list->push_back(new (mem_root)
3008 Item_empty_string(thd, "Replicate_Ignore_Table", 23),
3009 mem_root);
3010 field_list->push_back(new (mem_root)
3011 Item_empty_string(thd, "Replicate_Wild_Do_Table", 24),
3012 mem_root);
3013 field_list->push_back(new (mem_root)
3014 Item_empty_string(thd, "Replicate_Wild_Ignore_Table",
3015 28),
3016 mem_root);
3017 field_list->push_back(new (mem_root)
3018 Item_return_int(thd, "Last_Errno", 4, MYSQL_TYPE_LONG),
3019 mem_root);
3020 field_list->push_back(new (mem_root)
3021 Item_empty_string(thd, "Last_Error", 20),
3022 mem_root);
3023 field_list->push_back(new (mem_root)
3024 Item_return_int(thd, "Skip_Counter", 10,
3025 MYSQL_TYPE_LONG),
3026 mem_root);
3027 field_list->push_back(new (mem_root)
3028 Item_return_int(thd, "Exec_Master_Log_Pos", 10,
3029 MYSQL_TYPE_LONGLONG),
3030 mem_root);
3031 field_list->push_back(new (mem_root)
3032 Item_return_int(thd, "Relay_Log_Space", 10,
3033 MYSQL_TYPE_LONGLONG),
3034 mem_root);
3035 field_list->push_back(new (mem_root)
3036 Item_empty_string(thd, "Until_Condition", 6),
3037 mem_root);
3038 field_list->push_back(new (mem_root)
3039 Item_empty_string(thd, "Until_Log_File", FN_REFLEN),
3040 mem_root);
3041 field_list->push_back(new (mem_root)
3042 Item_return_int(thd, "Until_Log_Pos", 10,
3043 MYSQL_TYPE_LONGLONG),
3044 mem_root);
3045 field_list->push_back(new (mem_root)
3046 Item_empty_string(thd, "Master_SSL_Allowed", 7),
3047 mem_root);
3048 field_list->push_back(new (mem_root)
3049 Item_empty_string(thd, "Master_SSL_CA_File",
3050 sizeof(mi->ssl_ca)),
3051 mem_root);
3052 field_list->push_back(new (mem_root)
3053 Item_empty_string(thd, "Master_SSL_CA_Path",
3054 sizeof(mi->ssl_capath)),
3055 mem_root);
3056 field_list->push_back(new (mem_root)
3057 Item_empty_string(thd, "Master_SSL_Cert",
3058 sizeof(mi->ssl_cert)),
3059 mem_root);
3060 field_list->push_back(new (mem_root)
3061 Item_empty_string(thd, "Master_SSL_Cipher",
3062 sizeof(mi->ssl_cipher)),
3063 mem_root);
3064 field_list->push_back(new (mem_root)
3065 Item_empty_string(thd, "Master_SSL_Key",
3066 sizeof(mi->ssl_key)),
3067 mem_root);
3068 field_list->push_back(new (mem_root)
3069 Item_return_int(thd, "Seconds_Behind_Master", 10,
3070 MYSQL_TYPE_LONGLONG),
3071 mem_root);
3072 field_list->push_back(new (mem_root)
3073 Item_empty_string(thd, "Master_SSL_Verify_Server_Cert",
3074 3),
3075 mem_root);
3076 field_list->push_back(new (mem_root)
3077 Item_return_int(thd, "Last_IO_Errno", 4,
3078 MYSQL_TYPE_LONG),
3079 mem_root);
3080 field_list->push_back(new (mem_root)
3081 Item_empty_string(thd, "Last_IO_Error", 20),
3082 mem_root);
3083 field_list->push_back(new (mem_root)
3084 Item_return_int(thd, "Last_SQL_Errno", 4,
3085 MYSQL_TYPE_LONG),
3086 mem_root);
3087 field_list->push_back(new (mem_root)
3088 Item_empty_string(thd, "Last_SQL_Error", 20),
3089 mem_root);
3090 field_list->push_back(new (mem_root)
3091 Item_empty_string(thd, "Replicate_Ignore_Server_Ids",
3092 FN_REFLEN),
3093 mem_root);
3094 field_list->push_back(new (mem_root)
3095 Item_return_int(thd, "Master_Server_Id", sizeof(ulong),
3096 MYSQL_TYPE_LONG),
3097 mem_root);
3098 field_list->push_back(new (mem_root)
3099 Item_empty_string(thd, "Master_SSL_Crl",
3100 sizeof(mi->ssl_crl)),
3101 mem_root);
3102 field_list->push_back(new (mem_root)
3103 Item_empty_string(thd, "Master_SSL_Crlpath",
3104 sizeof(mi->ssl_crlpath)),
3105 mem_root);
3106 field_list->push_back(new (mem_root)
3107 Item_empty_string(thd, "Using_Gtid",
3108 sizeof("Current_Pos")-1),
3109 mem_root);
3110 field_list->push_back(new (mem_root)
3111 Item_empty_string(thd, "Gtid_IO_Pos", 30),
3112 mem_root);
3113 field_list->push_back(new (mem_root)
3114 Item_empty_string(thd, "Replicate_Do_Domain_Ids",
3115 FN_REFLEN),
3116 mem_root);
3117 field_list->push_back(new (mem_root)
3118 Item_empty_string(thd, "Replicate_Ignore_Domain_Ids",
3119 FN_REFLEN),
3120 mem_root);
3121 field_list->push_back(new (mem_root)
3122 Item_empty_string(thd, "Parallel_Mode",
3123 sizeof("conservative")-1),
3124 mem_root);
3125 field_list->push_back(new (mem_root)
3126 Item_return_int(thd, "SQL_Delay", 10,
3127 MYSQL_TYPE_LONG));
3128 field_list->push_back(new (mem_root)
3129 Item_return_int(thd, "SQL_Remaining_Delay", 8,
3130 MYSQL_TYPE_LONG));
3131 field_list->push_back(new (mem_root)
3132 Item_empty_string(thd, "Slave_SQL_Running_State",
3133 20));
3134 field_list->push_back(new (mem_root)
3135 Item_return_int(thd, "Slave_DDL_Groups", 20,
3136 MYSQL_TYPE_LONGLONG),
3137 mem_root);
3138 field_list->push_back(new (mem_root)
3139 Item_return_int(thd, "Slave_Non_Transactional_Groups", 20,
3140 MYSQL_TYPE_LONGLONG),
3141 mem_root);
3142 field_list->push_back(new (mem_root)
3143 Item_return_int(thd, "Slave_Transactional_Groups", 20,
3144 MYSQL_TYPE_LONGLONG),
3145 mem_root);
3146
3147 if (full)
3148 {
3149 field_list->push_back(new (mem_root)
3150 Item_return_int(thd, "Retried_transactions", 10,
3151 MYSQL_TYPE_LONG),
3152 mem_root);
3153 field_list->push_back(new (mem_root)
3154 Item_return_int(thd, "Max_relay_log_size", 10,
3155 MYSQL_TYPE_LONGLONG),
3156 mem_root);
3157 field_list->push_back(new (mem_root)
3158 Item_return_int(thd, "Executed_log_entries", 10,
3159 MYSQL_TYPE_LONG),
3160 mem_root);
3161 field_list->push_back(new (mem_root)
3162 Item_return_int(thd, "Slave_received_heartbeats", 10,
3163 MYSQL_TYPE_LONG),
3164 mem_root);
3165 field_list->push_back(new (mem_root)
3166 Item_float(thd, "Slave_heartbeat_period", 0.0, 3, 10),
3167 mem_root);
3168 field_list->push_back(new (mem_root)
3169 Item_empty_string(thd, "Gtid_Slave_Pos",
3170 (uint)gtid_pos_length),
3171 mem_root);
3172 }
3173 DBUG_VOID_RETURN;
3174}
3175
3176/* Text for Slave_IO_Running */
3177static const char *slave_running[]= { "No", "Connecting", "Preparing", "Yes" };
3178
3179static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
3180 String *gtid_pos)
3181{
3182 DBUG_ENTER("send_show_master_info_data");
3183
3184 if (mi->host[0])
3185 {
3186 DBUG_PRINT("info",("host is set: '%s'", mi->host));
3187 String *packet= &thd->packet;
3188 Protocol *protocol= thd->protocol;
3189 Rpl_filter *rpl_filter= mi->rpl_filter;
3190 StringBuffer<256> tmp;
3191
3192 protocol->prepare_for_resend();
3193
3194 /*
3195 slave_running can be accessed without run_lock but not other
3196 non-volotile members like mi->io_thd, which is guarded by the mutex.
3197 */
3198 if (full)
3199 protocol->store(mi->connection_name.str, mi->connection_name.length,
3200 &my_charset_bin);
3201 mysql_mutex_lock(&mi->run_lock);
3202 if (full)
3203 {
3204 /*
3205 Show what the sql driver replication thread is doing
3206 This is only meaningful if there is only one slave thread.
3207 */
3208 protocol->store(mi->rli.sql_driver_thd ?
3209 mi->rli.sql_driver_thd->get_proc_info() : "",
3210 &my_charset_bin);
3211 }
3212 protocol->store(mi->io_thd ? mi->io_thd->get_proc_info() : "", &my_charset_bin);
3213 mysql_mutex_unlock(&mi->run_lock);
3214
3215 mysql_mutex_lock(&mi->data_lock);
3216 mysql_mutex_lock(&mi->rli.data_lock);
3217 /* err_lock is to protect mi->last_error() */
3218 mysql_mutex_lock(&mi->err_lock);
3219 /* err_lock is to protect mi->rli.last_error() */
3220 mysql_mutex_lock(&mi->rli.err_lock);
3221 protocol->store(mi->host, &my_charset_bin);
3222 protocol->store(mi->user, &my_charset_bin);
3223 protocol->store((uint32) mi->port);
3224 protocol->store((uint32) mi->connect_retry);
3225 protocol->store(mi->master_log_name, &my_charset_bin);
3226 protocol->store((ulonglong) mi->master_log_pos);
3227 protocol->store(mi->rli.group_relay_log_name +
3228 dirname_length(mi->rli.group_relay_log_name),
3229 &my_charset_bin);
3230 protocol->store((ulonglong) mi->rli.group_relay_log_pos);
3231 protocol->store(mi->rli.group_master_log_name, &my_charset_bin);
3232 protocol->store(slave_running[mi->slave_running], &my_charset_bin);
3233 protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin);
3234 protocol->store(rpl_filter->get_do_db());
3235 protocol->store(rpl_filter->get_ignore_db());
3236
3237 rpl_filter->get_do_table(&tmp);
3238 protocol->store(&tmp);
3239 rpl_filter->get_ignore_table(&tmp);
3240 protocol->store(&tmp);
3241 rpl_filter->get_wild_do_table(&tmp);
3242 protocol->store(&tmp);
3243 rpl_filter->get_wild_ignore_table(&tmp);
3244 protocol->store(&tmp);
3245
3246 protocol->store(mi->rli.last_error().number);
3247 protocol->store(mi->rli.last_error().message, &my_charset_bin);
3248 protocol->store((uint32) mi->rli.slave_skip_counter);
3249 protocol->store((ulonglong) mi->rli.group_master_log_pos);
3250 protocol->store((ulonglong) mi->rli.log_space_total);
3251
3252 protocol->store(
3253 mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None":
3254 ( mi->rli.until_condition==Relay_log_info::UNTIL_MASTER_POS? "Master":
3255 ( mi->rli.until_condition==Relay_log_info::UNTIL_RELAY_POS? "Relay":
3256 "Gtid")), &my_charset_bin);
3257 protocol->store(mi->rli.until_log_name, &my_charset_bin);
3258 protocol->store((ulonglong) mi->rli.until_log_pos);
3259
3260#ifdef HAVE_OPENSSL
3261 protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
3262#else
3263 protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
3264#endif
3265 protocol->store(mi->ssl_ca, &my_charset_bin);
3266 protocol->store(mi->ssl_capath, &my_charset_bin);
3267 protocol->store(mi->ssl_cert, &my_charset_bin);
3268 protocol->store(mi->ssl_cipher, &my_charset_bin);
3269 protocol->store(mi->ssl_key, &my_charset_bin);
3270
3271 /*
3272 Seconds_Behind_Master: if SQL thread is running and I/O thread is
3273 connected, we can compute it otherwise show NULL (i.e. unknown).
3274 */
3275 if ((mi->slave_running == MYSQL_SLAVE_RUN_READING) &&
3276 mi->rli.slave_running)
3277 {
3278 long time_diff;
3279 bool idle;
3280 time_t stamp= mi->rli.last_master_timestamp;
3281
3282 if (!stamp)
3283 idle= true;
3284 else
3285 {
3286 idle= mi->rli.sql_thread_caught_up;
3287 if (mi->using_parallel() && idle && !mi->rli.parallel.workers_idle())
3288 idle= false;
3289 }
3290 if (idle)
3291 time_diff= 0;
3292 else
3293 {
3294 time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master);
3295 /*
3296 Apparently on some systems time_diff can be <0. Here are possible
3297 reasons related to MySQL:
3298 - the master is itself a slave of another master whose time is ahead.
3299 - somebody used an explicit SET TIMESTAMP on the master.
3300 Possible reason related to granularity-to-second of time functions
3301 (nothing to do with MySQL), which can explain a value of -1:
3302 assume the master's and slave's time are perfectly synchronized, and
3303 that at slave's connection time, when the master's timestamp is read,
3304 it is at the very end of second 1, and (a very short time later) when
3305 the slave's timestamp is read it is at the very beginning of second
3306 2. Then the recorded value for master is 1 and the recorded value for
3307 slave is 2. At SHOW SLAVE STATUS time, assume that the difference
3308 between timestamp of slave and rli->last_master_timestamp is 0
3309 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
3310 This confuses users, so we don't go below 0.
3311
3312 last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
3313 special marker to say "consider we have caught up".
3314 */
3315 if (time_diff < 0)
3316 time_diff= 0;
3317 }
3318 protocol->store((longlong)time_diff);
3319 }
3320 else
3321 {
3322 protocol->store_null();
3323 }
3324 protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
3325
3326 // Last_IO_Errno
3327 protocol->store(mi->last_error().number);
3328 // Last_IO_Error
3329 protocol->store(mi->last_error().message, &my_charset_bin);
3330 // Last_SQL_Errno
3331 protocol->store(mi->rli.last_error().number);
3332 // Last_SQL_Error
3333 protocol->store(mi->rli.last_error().message, &my_charset_bin);
3334 // Replicate_Ignore_Server_Ids
3335 prot_store_ids(thd, &mi->ignore_server_ids);
3336 // Master_Server_id
3337 protocol->store((uint32) mi->master_id);
3338 // SQL_Delay
3339 // Master_Ssl_Crl
3340 protocol->store(mi->ssl_ca, &my_charset_bin);
3341 // Master_Ssl_Crlpath
3342 protocol->store(mi->ssl_capath, &my_charset_bin);
3343 // Using_Gtid
3344 protocol->store(mi->using_gtid_astext(mi->using_gtid), &my_charset_bin);
3345 // Gtid_IO_Pos
3346 {
3347 mi->gtid_current_pos.to_string(&tmp);
3348 protocol->store(tmp.ptr(), tmp.length(), &my_charset_bin);
3349 }
3350
3351 // Replicate_Do_Domain_Ids & Replicate_Ignore_Domain_Ids
3352 mi->domain_id_filter.store_ids(thd);
3353
3354 // Parallel_Mode
3355 {
3356 const char *mode_name= get_type(&slave_parallel_mode_typelib,
3357 mi->parallel_mode);
3358 protocol->store(mode_name, strlen(mode_name), &my_charset_bin);
3359 }
3360
3361 protocol->store((uint32) mi->rli.get_sql_delay());
3362 // SQL_Remaining_Delay
3363 // THD::proc_info is not protected by any lock, so we read it once
3364 // to ensure that we use the same value throughout this function.
3365 const char *slave_sql_running_state=
3366 mi->rli.sql_driver_thd ? mi->rli.sql_driver_thd->proc_info : "";
3367 if (slave_sql_running_state == Relay_log_info::state_delaying_string)
3368 {
3369 time_t t= my_time(0), sql_delay_end= mi->rli.get_sql_delay_end();
3370 protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
3371 }
3372 else
3373 protocol->store_null();
3374 // Slave_SQL_Running_State
3375 protocol->store(slave_sql_running_state, &my_charset_bin);
3376
3377 uint64 events;
3378 events= (uint64)my_atomic_load64_explicit((volatile int64 *)
3379 &mi->total_ddl_groups, MY_MEMORY_ORDER_RELAXED);
3380 protocol->store(events);
3381 events= (uint64)my_atomic_load64_explicit((volatile int64 *)
3382 &mi->total_non_trans_groups, MY_MEMORY_ORDER_RELAXED);
3383 protocol->store(events);
3384 events= (uint64)my_atomic_load64_explicit((volatile int64 *)
3385 &mi->total_trans_groups, MY_MEMORY_ORDER_RELAXED);
3386 protocol->store(events);
3387
3388 if (full)
3389 {
3390 protocol->store((uint32) mi->rli.retried_trans);
3391 protocol->store((ulonglong) mi->rli.max_relay_log_size);
3392 protocol->store((uint32) mi->rli.executed_entries);
3393 protocol->store((uint32) mi->received_heartbeats);
3394 protocol->store((double) mi->heartbeat_period, 3, &tmp);
3395 protocol->store(gtid_pos->ptr(), gtid_pos->length(), &my_charset_bin);
3396 }
3397
3398 mysql_mutex_unlock(&mi->rli.err_lock);
3399 mysql_mutex_unlock(&mi->err_lock);
3400 mysql_mutex_unlock(&mi->rli.data_lock);
3401 mysql_mutex_unlock(&mi->data_lock);
3402
3403 if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
3404 DBUG_RETURN(TRUE);
3405 }
3406 DBUG_RETURN(FALSE);
3407}
3408
3409
3410/* Used to sort connections by name */
3411
3412static int cmp_mi_by_name(const Master_info **arg1,
3413 const Master_info **arg2)
3414{
3415 return my_strcasecmp(system_charset_info, (*arg1)->connection_name.str,
3416 (*arg2)->connection_name.str);
3417}
3418
3419
3420/**
3421 Execute a SHOW FULL SLAVE STATUS statement.
3422
3423 @param thd Pointer to THD object for the client thread executing the
3424 statement.
3425
3426 Elements are sorted according to the original connection_name.
3427
3428 @retval FALSE success
3429 @retval TRUE failure
3430
3431 @note
3432 master_info_index is protected by LOCK_active_mi.
3433*/
3434
3435bool show_all_master_info(THD* thd)
3436{
3437 uint i, elements;
3438 String gtid_pos;
3439 Master_info **tmp;
3440 List<Item> field_list;
3441 DBUG_ENTER("show_master_info");
3442 mysql_mutex_assert_owner(&LOCK_active_mi);
3443
3444 gtid_pos.length(0);
3445 if (rpl_append_gtid_state(&gtid_pos, true))
3446 {
3447 my_error(ER_OUT_OF_RESOURCES, MYF(0));
3448 DBUG_RETURN(TRUE);
3449 }
3450
3451 show_master_info_get_fields(thd, &field_list, 1, gtid_pos.length());
3452 if (thd->protocol->send_result_set_metadata(&field_list,
3453 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3454 DBUG_RETURN(TRUE);
3455
3456 if (!master_info_index ||
3457 !(elements= master_info_index->master_info_hash.records))
3458 goto end;
3459
3460 /*
3461 Sort lines to get them into a predicted order
3462 (needed for test cases and to not confuse users)
3463 */
3464 if (!(tmp= (Master_info**) thd->alloc(sizeof(Master_info*) * elements)))
3465 DBUG_RETURN(TRUE);
3466
3467 for (i= 0; i < elements; i++)
3468 {
3469 tmp[i]= (Master_info *) my_hash_element(&master_info_index->
3470 master_info_hash, i);
3471 }
3472 my_qsort(tmp, elements, sizeof(Master_info*), (qsort_cmp) cmp_mi_by_name);
3473
3474 for (i= 0; i < elements; i++)
3475 {
3476 if (send_show_master_info_data(thd, tmp[i], 1, &gtid_pos))
3477 DBUG_RETURN(TRUE);
3478 }
3479
3480end:
3481 my_eof(thd);
3482 DBUG_RETURN(FALSE);
3483}
3484
3485
3486void set_slave_thread_options(THD* thd)
3487{
3488 DBUG_ENTER("set_slave_thread_options");
3489 /*
3490 It's nonsense to constrain the slave threads with max_join_size; if a
3491 query succeeded on master, we HAVE to execute it. So set
3492 OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
3493 (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
3494 SELECT examining more than 4 billion rows would still fail (yes, because
3495 when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
3496 only for client threads.
3497 */
3498 ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS;
3499 if (opt_log_slave_updates)
3500 options|= OPTION_BIN_LOG;
3501 else
3502 options&= ~OPTION_BIN_LOG;
3503 thd->variables.option_bits= options;
3504 thd->variables.completion_type= 0;
3505
3506 /* For easier test in LOGGER::log_command */
3507 if (thd->variables.log_disabled_statements & LOG_DISABLE_SLAVE)
3508 thd->variables.option_bits|= OPTION_LOG_OFF;
3509
3510 thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements &
3511 LOG_SLOW_DISABLE_SLAVE);
3512 DBUG_VOID_RETURN;
3513}
3514
3515void set_slave_thread_default_charset(THD* thd, rpl_group_info *rgi)
3516{
3517 DBUG_ENTER("set_slave_thread_default_charset");
3518
3519 thd->variables.collation_server=
3520 global_system_variables.collation_server;
3521 thd->update_charset(global_system_variables.character_set_client,
3522 global_system_variables.collation_connection);
3523
3524 thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
3525 DBUG_VOID_RETURN;
3526}
3527
3528/*
3529 init_slave_thread()
3530*/
3531
3532static int init_slave_thread(THD* thd, Master_info *mi,
3533 SLAVE_THD_TYPE thd_type)
3534{
3535 DBUG_ENTER("init_slave_thread");
3536 int simulate_error __attribute__((unused))= 0;
3537 DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
3538 simulate_error|= (1 << SLAVE_THD_IO););
3539 DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
3540 simulate_error|= (1 << SLAVE_THD_SQL););
3541
3542 thd->system_thread = (thd_type == SLAVE_THD_SQL) ?
3543 SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
3544 thread_safe_increment32(&service_thread_count);
3545
3546 /* We must call store_globals() before doing my_net_init() */
3547 if (init_thr_lock() || thd->store_globals() ||
3548 my_net_init(&thd->net, 0, thd, MYF(MY_THREAD_SPECIFIC)) ||
3549 IF_DBUG(simulate_error & (1<< thd_type), 0))
3550 {
3551 thd->cleanup();
3552 DBUG_RETURN(-1);
3553 }
3554
3555 thd->security_ctx->skip_grants();
3556 thd->slave_thread= 1;
3557 thd->connection_name= mi->connection_name;
3558 thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE);
3559 set_slave_thread_options(thd);
3560
3561 if (thd_type == SLAVE_THD_SQL)
3562 THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
3563 else
3564 THD_STAGE_INFO(thd, stage_waiting_for_master_update);
3565 thd->set_time();
3566 /* Do not use user-supplied timeout value for system threads. */
3567 thd->variables.lock_wait_timeout= LONG_TIMEOUT;
3568 DBUG_RETURN(0);
3569}
3570
3571/*
3572 Sleep for a given amount of time or until killed.
3573
3574 @param thd Thread context of the current thread.
3575 @param seconds The number of seconds to sleep.
3576 @param func Function object to check if the thread has been killed.
3577 @param info The Rpl_info object associated with this sleep.
3578
3579 @retval True if the thread has been killed, false otherwise.
3580*/
3581template <typename killed_func, typename rpl_info>
3582static bool slave_sleep(THD *thd, time_t seconds,
3583 killed_func func, rpl_info info)
3584{
3585
3586 bool ret;
3587 struct timespec abstime;
3588
3589 mysql_mutex_t *lock= &info->sleep_lock;
3590 mysql_cond_t *cond= &info->sleep_cond;
3591
3592 /* Absolute system time at which the sleep time expires. */
3593 set_timespec(abstime, seconds);
3594 mysql_mutex_lock(lock);
3595 thd->ENTER_COND(cond, lock, NULL, NULL);
3596
3597 while (! (ret= func(info)))
3598 {
3599 int error= mysql_cond_timedwait(cond, lock, &abstime);
3600 if (error == ETIMEDOUT || error == ETIME)
3601 break;
3602 }
3603 /* Implicitly unlocks the mutex. */
3604 thd->EXIT_COND(NULL);
3605 return ret;
3606}
3607
3608
3609static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
3610 bool *suppress_warnings)
3611{
3612 uchar buf[FN_REFLEN + 10];
3613 int len;
3614 ushort binlog_flags = 0; // for now
3615 char* logname = mi->master_log_name;
3616 DBUG_ENTER("request_dump");
3617
3618 *suppress_warnings= FALSE;
3619
3620 if (opt_log_slave_updates && opt_replicate_annotate_row_events)
3621 binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
3622
3623 if (repl_semisync_slave.request_transmit(mi))
3624 DBUG_RETURN(1);
3625
3626 // TODO if big log files: Change next to int8store()
3627 int4store(buf, (ulong) mi->master_log_pos);
3628 int2store(buf + 4, binlog_flags);
3629 int4store(buf + 6, global_system_variables.server_id);
3630 len = (uint) strlen(logname);
3631 memcpy(buf + 10, logname,len);
3632 if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
3633 {
3634 /*
3635 Something went wrong, so we will just reconnect and retry later
3636 in the future, we should do a better error analysis, but for
3637 now we just fill up the error log :-)
3638 */
3639 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3640 *suppress_warnings= TRUE; // Suppress reconnect warning
3641 else
3642 sql_print_error("Error on COM_BINLOG_DUMP: %d %s, will retry in %d secs",
3643 mysql_errno(mysql), mysql_error(mysql),
3644 mi->connect_retry);
3645 DBUG_RETURN(1);
3646 }
3647
3648 DBUG_RETURN(0);
3649}
3650
3651
3652/*
3653 Read one event from the master
3654
3655 SYNOPSIS
3656 read_event()
3657 mysql MySQL connection
3658 mi Master connection information
3659 suppress_warnings TRUE when a normal net read timeout has caused us to
3660 try a reconnect. We do not want to print anything to
3661 the error log in this case because this a anormal
3662 event in an idle server.
3663 network_read_len get the real network read length in VIO, especially using compressed protocol
3664
3665 RETURN VALUES
3666 'packet_error' Error
3667 number Length of packet
3668*/
3669
3670static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings,
3671 ulong* network_read_len)
3672{
3673 ulong len;
3674 DBUG_ENTER("read_event");
3675
3676 *suppress_warnings= FALSE;
3677 /*
3678 my_real_read() will time us out
3679 We check if we were told to die, and if not, try reading again
3680 */
3681#ifndef DBUG_OFF
3682 if (disconnect_slave_event_count && !(mi->events_till_disconnect--))
3683 DBUG_RETURN(packet_error);
3684#endif
3685
3686 len = cli_safe_read_reallen(mysql, network_read_len);
3687 if (unlikely(len == packet_error || (long) len < 1))
3688 {
3689 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3690 {
3691 /*
3692 We are trying a normal reconnect after a read timeout;
3693 we suppress prints to .err file as long as the reconnect
3694 happens without problems
3695 */
3696 *suppress_warnings=
3697 global_system_variables.log_warnings < 2 ? TRUE : FALSE;
3698 }
3699 else
3700 {
3701 if (!mi->rli.abort_slave)
3702 {
3703 sql_print_error("Error reading packet from server: %s (server_errno=%d)",
3704 mysql_error(mysql), mysql_errno(mysql));
3705 }
3706 }
3707 DBUG_RETURN(packet_error);
3708 }
3709
3710 /* Check if eof packet */
3711 if (len < 8 && mysql->net.read_pos[0] == 254)
3712 {
3713 sql_print_information("Slave: received end packet from server, apparent "
3714 "master shutdown: %s",
3715 mysql_error(mysql));
3716 DBUG_RETURN(packet_error);
3717 }
3718
3719 DBUG_PRINT("exit", ("len: %lu net->read_pos[4]: %d",
3720 len, mysql->net.read_pos[4]));
3721 DBUG_RETURN(len - 1);
3722}
3723
3724
3725/**
3726 Check if the current error is of temporary nature of not.
3727 Some errors are temporary in nature, such as
3728 ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.
3729
3730 @retval 0 if fatal error
3731 @retval 1 temporary error, do retry
3732*/
3733
3734int
3735has_temporary_error(THD *thd)
3736{
3737 uint current_errno;
3738 DBUG_ENTER("has_temporary_error");
3739
3740 DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
3741 if (thd->get_stmt_da()->is_error())
3742 {
3743 thd->clear_error();
3744 my_error(ER_LOCK_DEADLOCK, MYF(0));
3745 });
3746
3747 /*
3748 If there is no message in THD, we can't say if it's a temporary
3749 error or not. This is currently the case for Incident_log_event,
3750 which sets no message. Return FALSE.
3751 */
3752 if (!likely(thd->is_error()))
3753 DBUG_RETURN(0);
3754
3755 current_errno= thd->get_stmt_da()->sql_errno();
3756 for (uint i= 0; i < slave_transaction_retry_error_length; i++)
3757 {
3758 if (current_errno == slave_transaction_retry_errors[i])
3759 DBUG_RETURN(1);
3760 }
3761
3762 DBUG_RETURN(0);
3763}
3764
3765
3766/**
3767 If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
3768
3769 Design note: this is the place to unlock rli->data_lock. The lock
3770 must be held when reading delay info from rli, but it should not be
3771 held while sleeping.
3772
3773 @param ev Event that is about to be executed.
3774
3775 @param thd The sql thread's THD object.
3776
3777 @param rli The sql thread's Relay_log_info structure.
3778
3779 @retval 0 If the delay timed out and the event shall be executed.
3780
3781 @retval nonzero If the delay was interrupted and the event shall be skipped.
3782*/
3783int
3784sql_delay_event(Log_event *ev, THD *thd, rpl_group_info *rgi)
3785{
3786 Relay_log_info* rli= rgi->rli;
3787 long sql_delay= rli->get_sql_delay();
3788
3789 DBUG_ENTER("sql_delay_event");
3790 mysql_mutex_assert_owner(&rli->data_lock);
3791 DBUG_ASSERT(!rli->belongs_to_client());
3792
3793 int type= ev->get_type_code();
3794 if (sql_delay && type != ROTATE_EVENT &&
3795 type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
3796 {
3797 // The time when we should execute the event.
3798 time_t sql_delay_end=
3799 ev->when + rli->mi->clock_diff_with_master + sql_delay;
3800 // The current time.
3801 time_t now= my_time(0);
3802 // The time we will have to sleep before executing the event.
3803 unsigned long nap_time= 0;
3804 if (sql_delay_end > now)
3805 nap_time= (ulong)(sql_delay_end - now);
3806
3807 DBUG_PRINT("info", ("sql_delay= %lu "
3808 "ev->when= %lu "
3809 "rli->mi->clock_diff_with_master= %lu "
3810 "now= %ld "
3811 "sql_delay_end= %llu "
3812 "nap_time= %ld",
3813 sql_delay, (long)ev->when,
3814 rli->mi->clock_diff_with_master,
3815 (long)now, (ulonglong)sql_delay_end, (long)nap_time));
3816
3817 if (sql_delay_end > now)
3818 {
3819 DBUG_PRINT("info", ("delaying replication event %lu secs",
3820 nap_time));
3821 rli->start_sql_delay(sql_delay_end);
3822 mysql_mutex_unlock(&rli->data_lock);
3823 DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rgi));
3824 }
3825 }
3826
3827 mysql_mutex_unlock(&rli->data_lock);
3828
3829 DBUG_RETURN(0);
3830}
3831
3832
3833/*
3834 First half of apply_event_and_update_pos(), see below.
3835 Setup some THD variables for applying the event.
3836
3837 Split out so that it can run with rli->data_lock held in non-parallel
3838 replication, but without the mutex held in the parallel case.
3839*/
3840static int
3841apply_event_and_update_pos_setup(Log_event* ev, THD* thd, rpl_group_info *rgi)
3842{
3843 DBUG_ENTER("apply_event_and_update_pos_setup");
3844
3845 DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
3846 ev->get_type_str(), ev->get_type_code(),
3847 ev->server_id));
3848 DBUG_PRINT("info", ("thd->options: '%s%s%s' rgi->last_event_start_time: %lu",
3849 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
3850 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
3851 FLAGSTR(thd->variables.option_bits, OPTION_GTID_BEGIN),
3852 (ulong) rgi->last_event_start_time));
3853
3854 /*
3855 Execute the event to change the database and update the binary
3856 log coordinates, but first we set some data that is needed for
3857 the thread.
3858
3859 The event will be executed unless it is supposed to be skipped.
3860
3861 Queries originating from this server must be skipped. Low-level
3862 events (Format_description_log_event, Rotate_log_event,
3863 Stop_log_event) from this server must also be skipped. But for
3864 those we don't want to modify 'group_master_log_pos', because
3865 these events did not exist on the master.
3866 Format_description_log_event is not completely skipped.
3867
3868 Skip queries specified by the user in 'slave_skip_counter'. We
3869 can't however skip events that has something to do with the log
3870 files themselves.
3871
3872 Filtering on own server id is extremely important, to ignore
3873 execution of events created by the creation/rotation of the relay
3874 log (remember that now the relay log starts with its Format_desc,
3875 has a Rotate etc).
3876 */
3877
3878 /* Use the original server id for logging. */
3879 thd->variables.server_id = ev->server_id;
3880 thd->set_time(); // time the query
3881 thd->lex->current_select= 0;
3882 if (!ev->when)
3883 {
3884 my_hrtime_t hrtime= my_hrtime();
3885 ev->when= hrtime_to_my_time(hrtime);
3886 ev->when_sec_part= hrtime_sec_part(hrtime);
3887 }
3888 thd->variables.option_bits=
3889 (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) |
3890 (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0);
3891 ev->thd = thd; // because up to this point, ev->thd == 0
3892
3893 DBUG_RETURN(ev->shall_skip(rgi));
3894}
3895
3896
3897/*
3898 Second half of apply_event_and_update_pos(), see below.
3899
3900 Do the actual event apply (or skip), and position update.
3901 */
3902static int
3903apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi,
3904 int reason)
3905{
3906 int exec_res= 0;
3907 Relay_log_info* rli= rgi->rli;
3908
3909 DBUG_ENTER("apply_event_and_update_pos_apply");
3910 DBUG_EXECUTE_IF("inject_slave_sql_before_apply_event",
3911 {
3912 DBUG_ASSERT(!debug_sync_set_action
3913 (thd, STRING_WITH_LEN("now WAIT_FOR continue")));
3914 DBUG_SET_INITIAL("-d,inject_slave_sql_before_apply_event");
3915 };);
3916 if (reason == Log_event::EVENT_SKIP_NOT)
3917 exec_res= ev->apply_event(rgi);
3918
3919#ifdef WITH_WSREP
3920 if (exec_res && thd->wsrep_conflict_state != NO_CONFLICT)
3921 {
3922 WSREP_DEBUG("SQL apply failed, res %d conflict state: %d",
3923 exec_res, thd->wsrep_conflict_state);
3924 rli->abort_slave= 1;
3925 rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(),
3926 "Node has dropped from cluster");
3927 }
3928#endif
3929
3930#ifndef DBUG_OFF
3931 /*
3932 This only prints information to the debug trace.
3933
3934 TODO: Print an informational message to the error log?
3935 */
3936 static const char *const explain[] = {
3937 // EVENT_SKIP_NOT,
3938 "not skipped",
3939 // EVENT_SKIP_IGNORE,
3940 "skipped because event should be ignored",
3941 // EVENT_SKIP_COUNT
3942 "skipped because event skip counter was non-zero"
3943 };
3944 DBUG_PRINT("info", ("OPTION_BEGIN: %d IN_STMT: %d IN_TRANSACTION: %d",
3945 MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
3946 rli->get_flag(Relay_log_info::IN_STMT),
3947 rli->get_flag(Relay_log_info::IN_TRANSACTION)));
3948 DBUG_PRINT("skip_event", ("%s event was %s",
3949 ev->get_type_str(), explain[reason]));
3950#endif
3951
3952 DBUG_PRINT("info", ("apply_event error = %d", exec_res));
3953 if (exec_res == 0)
3954 {
3955 int error= ev->update_pos(rgi);
3956 #ifndef DBUG_OFF
3957 DBUG_PRINT("info", ("update_pos error = %d", error));
3958 if (!rli->belongs_to_client())
3959 {
3960 DBUG_PRINT("info", ("group %llu %s", rli->group_relay_log_pos,
3961 rli->group_relay_log_name));
3962 DBUG_PRINT("info", ("event %llu %s", rli->event_relay_log_pos,
3963 rli->event_relay_log_name));
3964 }
3965#endif
3966 /*
3967 The update should not fail, so print an error message and
3968 return an error code.
3969
3970 TODO: Replace this with a decent error message when merged
3971 with BUG#24954 (which adds several new error message).
3972 */
3973 if (unlikely(error))
3974 {
3975 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, rgi->gtid_info(),
3976 "It was not possible to update the positions"
3977 " of the relay log information: the slave may"
3978 " be in an inconsistent state."
3979 " Stopped in %s position %llu",
3980 rli->group_relay_log_name, rli->group_relay_log_pos);
3981 DBUG_RETURN(2);
3982 }
3983 }
3984 else
3985 {
3986 /*
3987 Make sure we do not erroneously update gtid_slave_pos with a lingering
3988 GTID from this failed event group (MDEV-4906).
3989 */
3990 rgi->gtid_pending= false;
3991 }
3992
3993 DBUG_RETURN(exec_res ? 1 : 0);
3994}
3995
3996
3997/**
3998 Applies the given event and advances the relay log position.
3999
4000 This is needed by the sql thread to execute events from the binlog,
4001 and by clients executing BINLOG statements. Conceptually, this
4002 function does:
4003
4004 @code
4005 ev->apply_event(rli);
4006 ev->update_pos(rli);
4007 @endcode
4008
4009 It also does the following maintainance:
4010
4011 - Initializes the thread's server_id and time; and the event's
4012 thread.
4013
4014 - If !rli->belongs_to_client() (i.e., if it belongs to the slave
4015 sql thread instead of being used for executing BINLOG
4016 statements), it does the following things: (1) skips events if it
4017 is needed according to the server id or slave_skip_counter; (2)
4018 unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER
4019 TO MASTER_DELAY=X'; (4) maintains the running state of the sql
4020 thread (rli->thread_state).
4021
4022 - Reports errors as needed.
4023
4024 @param ev The event to apply.
4025
4026 @param thd The client thread that executes the event (i.e., the
4027 slave sql thread if called from a replication slave, or the client
4028 thread if called to execute a BINLOG statement).
4029
4030 @param rli The relay log info (i.e., the slave's rli if called from
4031 a replication slave, or the client's thd->rli_fake if called to
4032 execute a BINLOG statement).
4033
4034 @retval 0 OK.
4035
4036 @retval 1 Error calling ev->apply_event().
4037
4038 @retval 2 No error calling ev->apply_event(), but error calling
4039 ev->update_pos().
4040
4041 This function is only used in non-parallel replication, where it is called
4042 with rli->data_lock held; this lock is released during this function.
4043*/
4044int
4045apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_group_info *rgi)
4046{
4047 Relay_log_info* rli= rgi->rli;
4048 mysql_mutex_assert_owner(&rli->data_lock);
4049 int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
4050 if (reason == Log_event::EVENT_SKIP_COUNT)
4051 {
4052 DBUG_ASSERT(rli->slave_skip_counter > 0);
4053 rli->slave_skip_counter--;
4054 }
4055
4056 if (reason == Log_event::EVENT_SKIP_NOT)
4057 {
4058 // Sleeps if needed, and unlocks rli->data_lock.
4059 if (sql_delay_event(ev, thd, rgi))
4060 return 0;
4061 }
4062 else
4063 mysql_mutex_unlock(&rli->data_lock);
4064
4065 return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
4066}
4067
4068
4069/*
4070 The version of above apply_event_and_update_pos() used in parallel
4071 replication. Unlike the non-parallel case, this function is called without
4072 rli->data_lock held.
4073*/
4074int
4075apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
4076 rpl_group_info *rgi)
4077{
4078 mysql_mutex_assert_not_owner(&rgi->rli->data_lock);
4079 int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
4080 /*
4081 In parallel replication, sql_slave_skip_counter is handled in the SQL
4082 driver thread, so 23 should never see EVENT_SKIP_COUNT here.
4083 */
4084 DBUG_ASSERT(reason != Log_event::EVENT_SKIP_COUNT);
4085 /*
4086 Calling sql_delay_event() was handled in the SQL driver thread when
4087 doing parallel replication.
4088 */
4089 return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
4090}
4091
4092
4093/**
4094 Keep the relay log transaction state up to date.
4095
4096 The state reflects how things are after the given event, that has just been
4097 read from the relay log, is executed.
4098
4099 This is only needed to ensure we:
4100 - Don't abort the sql driver thread in the middle of an event group.
4101 - Don't rotate the io thread in the middle of a statement or transaction.
4102 The mechanism is that the io thread, when it needs to rotate the relay
4103 log, will wait until the sql driver has read all the cached events
4104 and then continue reading events one by one from the master until
4105 the sql threads signals that log doesn't have an active group anymore.
4106
4107 There are two possible cases. We keep them as 2 separate flags mainly
4108 to make debugging easier.
4109
4110 - IN_STMT is set when we have read an event that should be used
4111 together with the next event. This is for example setting a
4112 variable that is used when executing the next statement.
4113 - IN_TRANSACTION is set when we are inside a BEGIN...COMMIT group
4114
4115 To test the state one should use the is_in_group() function.
4116*/
4117
4118inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev)
4119{
4120 Log_event_type typ= ev->get_type_code();
4121
4122 /* check if we are in a multi part event */
4123 if (ev->is_part_of_group())
4124 rli->set_flag(Relay_log_info::IN_STMT);
4125 else if (Log_event::is_group_event(typ))
4126 {
4127 /*
4128 If it was not a is_part_of_group() and not a group event (like
4129 rotate) then we can reset the IN_STMT flag. We have the above
4130 if only to allow us to have a rotate element anywhere.
4131 */
4132 rli->clear_flag(Relay_log_info::IN_STMT);
4133 }
4134
4135 /* Check for an event that starts or stops a transaction */
4136 if (LOG_EVENT_IS_QUERY(typ))
4137 {
4138 Query_log_event *qev= (Query_log_event*) ev;
4139 /*
4140 Trivial optimization to avoid the following somewhat expensive
4141 checks.
4142 */
4143 if (qev->q_len <= sizeof("ROLLBACK"))
4144 {
4145 if (qev->is_begin())
4146 rli->set_flag(Relay_log_info::IN_TRANSACTION);
4147 if (qev->is_commit() || qev->is_rollback())
4148 rli->clear_flag(Relay_log_info::IN_TRANSACTION);
4149 }
4150 }
4151 if (typ == XID_EVENT)
4152 rli->clear_flag(Relay_log_info::IN_TRANSACTION);
4153 if (typ == GTID_EVENT &&
4154 !(((Gtid_log_event*) ev)->flags2 & Gtid_log_event::FL_STANDALONE))
4155 {
4156 /* This GTID_EVENT will generate a BEGIN event */
4157 rli->set_flag(Relay_log_info::IN_TRANSACTION);
4158 }
4159
4160 DBUG_PRINT("info", ("event: %u IN_STMT: %d IN_TRANSACTION: %d",
4161 (uint) typ,
4162 rli->get_flag(Relay_log_info::IN_STMT),
4163 rli->get_flag(Relay_log_info::IN_TRANSACTION)));
4164}
4165
4166
4167/**
4168 Top-level function for executing the next event in the relay log.
4169 This is called from the SQL thread.
4170
4171 This function reads the event from the relay log, executes it, and
4172 advances the relay log position. It also handles errors, etc.
4173
4174 This function may fail to apply the event for the following reasons:
4175
4176 - The position specfied by the UNTIL condition of the START SLAVE
4177 command is reached.
4178
4179 - It was not possible to read the event from the log.
4180
4181 - The slave is killed.
4182
4183 - An error occurred when applying the event, and the event has been
4184 tried slave_trans_retries times. If the event has been retried
4185 fewer times, 0 is returned.
4186
4187 - init_master_info or init_relay_log_pos failed. (These are called
4188 if a failure occurs when applying the event.)
4189
4190 - An error occurred when updating the binlog position.
4191
4192 @retval 0 The event was applied.
4193
4194 @retval 1 The event was not applied.
4195*/
4196
4197static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
4198 rpl_group_info *serial_rgi)
4199{
4200 ulonglong event_size;
4201 DBUG_ENTER("exec_relay_log_event");
4202
4203 /*
4204 We acquire this mutex since we need it for all operations except
4205 event execution. But we will release it in places where we will
4206 wait for something for example inside of next_event().
4207 */
4208 mysql_mutex_lock(&rli->data_lock);
4209
4210 Log_event *ev= next_event(serial_rgi, &event_size);
4211
4212 if (sql_slave_killed(serial_rgi))
4213 {
4214 mysql_mutex_unlock(&rli->data_lock);
4215 delete ev;
4216 DBUG_RETURN(1);
4217 }
4218 if (ev)
4219 {
4220 int exec_res;
4221 Log_event_type typ= ev->get_type_code();
4222
4223 /*
4224 Even if we don't execute this event, we keep the master timestamp,
4225 so that seconds behind master shows correct delta (there are events
4226 that are not replayed, so we keep falling behind).
4227
4228 If it is an artificial event, or a relay log event (IO thread generated
4229 event) or ev->when is set to 0, we don't update the
4230 last_master_timestamp.
4231
4232 In parallel replication, we might queue a large number of events, and
4233 the user might be surprised to see a claim that the slave is up to date
4234 long before those queued events are actually executed.
4235 */
4236 if (!rli->mi->using_parallel() &&
4237 !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
4238 {
4239 rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
4240 DBUG_ASSERT(rli->last_master_timestamp >= 0);
4241 }
4242
4243 /*
4244 This tests if the position of the beginning of the current event
4245 hits the UNTIL barrier.
4246 */
4247 if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
4248 rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
4249 (ev->server_id != global_system_variables.server_id ||
4250 rli->replicate_same_server_id) &&
4251 rli->is_until_satisfied((rli->get_flag(Relay_log_info::IN_TRANSACTION) || !ev->log_pos)
4252 ? rli->group_master_log_pos
4253 : ev->log_pos - ev->data_written))
4254 {
4255 sql_print_information("Slave SQL thread stopped because it reached its"
4256 " UNTIL position %llu", rli->until_pos());
4257 /*
4258 Setting abort_slave flag because we do not want additional
4259 message about error in query execution to be printed.
4260 */
4261 rli->abort_slave= 1;
4262 rli->stop_for_until= true;
4263 mysql_mutex_unlock(&rli->data_lock);
4264 delete ev;
4265 DBUG_RETURN(1);
4266 }
4267
4268 { /**
4269 The following failure injecion works in cooperation with tests
4270 setting @@global.debug= 'd,incomplete_group_in_relay_log'.
4271 Xid or Commit events are not executed to force the slave sql
4272 read hanging if the realy log does not have any more events.
4273 */
4274 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
4275 if ((typ == XID_EVENT) ||
4276 (LOG_EVENT_IS_QUERY(typ) &&
4277 strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
4278 {
4279 DBUG_ASSERT(thd->transaction.all.modified_non_trans_table);
4280 rli->abort_slave= 1;
4281 mysql_mutex_unlock(&rli->data_lock);
4282 delete ev;
4283 serial_rgi->inc_event_relay_log_pos();
4284 DBUG_RETURN(0);
4285 };);
4286 }
4287
4288 update_state_of_relay_log(rli, ev);
4289
4290 if (rli->mi->using_parallel())
4291 {
4292 int res= rli->parallel.do_event(serial_rgi, ev, event_size);
4293 /*
4294 In parallel replication, we need to update the relay log position
4295 immediately so that it will be the correct position from which to
4296 read the next event.
4297 */
4298 if (res == 0)
4299 rli->event_relay_log_pos= rli->future_event_relay_log_pos;
4300 if (res >= 0)
4301 DBUG_RETURN(res);
4302 /*
4303 Else we proceed to execute the event non-parallel.
4304 This is the case for pre-10.0 events without GTID, and for handling
4305 slave_skip_counter.
4306 */
4307 }
4308
4309 if (typ == GTID_EVENT)
4310 {
4311 Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev);
4312
4313 /*
4314 For GTID, allocate a new sub_id for the given domain_id.
4315 The sub_id must be allocated in increasing order of binlog order.
4316 */
4317 if (event_group_new_gtid(serial_rgi, gev))
4318 {
4319 sql_print_error("Error reading relay log event: %s", "slave SQL thread "
4320 "aborted because of out-of-memory error");
4321 mysql_mutex_unlock(&rli->data_lock);
4322 delete ev;
4323 DBUG_RETURN(1);
4324 }
4325
4326 if (opt_gtid_ignore_duplicates &&
4327 rli->mi->using_gtid != Master_info::USE_GTID_NO)
4328 {
4329 int res= rpl_global_gtid_slave_state->check_duplicate_gtid
4330 (&serial_rgi->current_gtid, serial_rgi);
4331 if (res < 0)
4332 {
4333 sql_print_error("Error processing GTID event: %s", "slave SQL "
4334 "thread aborted because of out-of-memory error");
4335 mysql_mutex_unlock(&rli->data_lock);
4336 delete ev;
4337 DBUG_RETURN(1);
4338 }
4339 /*
4340 If we need to skip this event group (because the GTID was already
4341 applied), then do it using the code for slave_skip_counter, which
4342 is able to handle skipping until the end of the event group.
4343 */
4344 if (!res)
4345 rli->slave_skip_counter= 1;
4346 }
4347 }
4348
4349 serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos;
4350 serial_rgi->event_relay_log_name= rli->event_relay_log_name;
4351 serial_rgi->event_relay_log_pos= rli->event_relay_log_pos;
4352 exec_res= apply_event_and_update_pos(ev, thd, serial_rgi);
4353
4354#ifdef WITH_WSREP
4355 WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res);
4356#endif /* WITH_WSREP */
4357
4358 delete_or_keep_event_post_apply(serial_rgi, typ, ev);
4359
4360 /*
4361 update_log_pos failed: this should not happen, so we don't
4362 retry.
4363 */
4364 if (unlikely(exec_res == 2))
4365 DBUG_RETURN(1);
4366
4367#ifdef WITH_WSREP
4368 mysql_mutex_lock(&thd->LOCK_thd_data);
4369 if (thd->wsrep_conflict_state == NO_CONFLICT)
4370 {
4371 mysql_mutex_unlock(&thd->LOCK_thd_data);
4372#endif /* WITH_WSREP */
4373 if (slave_trans_retries)
4374 {
4375 int UNINIT_VAR(temp_err);
4376 if (unlikely(exec_res) && (temp_err= has_temporary_error(thd)))
4377 {
4378 const char *errmsg;
4379 rli->clear_error();
4380 /*
4381 We were in a transaction which has been rolled back because of a
4382 temporary error;
4383 let's seek back to BEGIN log event and retry it all again.
4384 Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
4385 there is no rollback since 5.0.13 (ref: manual).
4386 We have to not only seek but also
4387
4388 a) init_master_info(), to seek back to hot relay log's start
4389 for later (for when we will come back to this hot log after
4390 re-processing the possibly existing old logs where BEGIN is:
4391 check_binlog_magic() will then need the cache to be at
4392 position 0 (see comments at beginning of
4393 init_master_info()).
4394 b) init_relay_log_pos(), because the BEGIN may be an older relay log.
4395 */
4396 if (serial_rgi->trans_retries < slave_trans_retries)
4397 {
4398 if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
4399 sql_print_error("Failed to initialize the master info structure");
4400 else if (init_relay_log_pos(rli,
4401 rli->group_relay_log_name,
4402 rli->group_relay_log_pos,
4403 1, &errmsg, 1))
4404 sql_print_error("Error initializing relay log position: %s",
4405 errmsg);
4406 else
4407 {
4408 exec_res= 0;
4409 serial_rgi->cleanup_context(thd, 1);
4410 /* chance for concurrent connection to get more locks */
4411 slave_sleep(thd, MY_MAX(MY_MIN(serial_rgi->trans_retries,
4412 MAX_SLAVE_RETRY_PAUSE),
4413 slave_trans_retry_interval),
4414 sql_slave_killed, serial_rgi);
4415 serial_rgi->trans_retries++;
4416 mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
4417 rli->retried_trans++;
4418 statistic_increment(slave_retried_transactions, LOCK_status);
4419 mysql_mutex_unlock(&rli->data_lock);
4420 DBUG_PRINT("info", ("Slave retries transaction "
4421 "rgi->trans_retries: %lu",
4422 serial_rgi->trans_retries));
4423 }
4424 }
4425 else
4426 sql_print_error("Slave SQL thread retried transaction %lu time(s) "
4427 "in vain, giving up. Consider raising the value of "
4428 "the slave_transaction_retries variable.",
4429 slave_trans_retries);
4430 }
4431 else if ((exec_res && !temp_err) ||
4432 (opt_using_transactions &&
4433 rli->group_relay_log_pos == rli->event_relay_log_pos))
4434 {
4435 /*
4436 Only reset the retry counter if the entire group succeeded
4437 or failed with a non-transient error. On a successful
4438 event, the execution will proceed as usual; in the case of a
4439 non-transient error, the slave will stop with an error.
4440 */
4441 serial_rgi->trans_retries= 0; // restart from fresh
4442 DBUG_PRINT("info", ("Resetting retry counter, rgi->trans_retries: %lu",
4443 serial_rgi->trans_retries));
4444 }
4445 }
4446#ifdef WITH_WSREP
4447 }
4448 else
4449 mysql_mutex_unlock(&thd->LOCK_thd_data);
4450#endif /* WITH_WSREP */
4451
4452 thread_safe_increment64(&rli->executed_entries);
4453 DBUG_RETURN(exec_res);
4454 }
4455 mysql_mutex_unlock(&rli->data_lock);
4456 rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE, NULL,
4457 ER_THD(thd, ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
4458Could not parse relay log event entry. The possible reasons are: the master's \
4459binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
4460binary log), the slave's relay log is corrupted (you can check this by running \
4461'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
4462or slave's MySQL code. If you want to check the master's binary log or slave's \
4463relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
4464on this slave.\
4465");
4466 DBUG_RETURN(1);
4467}
4468
4469
4470static bool check_io_slave_killed(Master_info *mi, const char *info)
4471{
4472 if (io_slave_killed(mi))
4473 {
4474 if (info && global_system_variables.log_warnings)
4475 sql_print_information("%s", info);
4476 return TRUE;
4477 }
4478 return FALSE;
4479}
4480
4481/**
4482 @brief Try to reconnect slave IO thread.
4483
4484 @details Terminates current connection to master, sleeps for
4485 @c mi->connect_retry msecs and initiates new connection with
4486 @c safe_reconnect(). Variable pointed by @c retry_count is increased -
4487 if it exceeds @c master_retry_count then connection is not re-established
4488 and function signals error.
4489 Unless @c suppres_warnings is TRUE, a warning is put in the server error log
4490 when reconnecting. The warning message and messages used to report errors
4491 are taken from @c messages array. In case @c master_retry_count is exceeded,
4492 no messages are added to the log.
4493
4494 @param[in] thd Thread context.
4495 @param[in] mysql MySQL connection.
4496 @param[in] mi Master connection information.
4497 @param[in,out] retry_count Number of attempts to reconnect.
4498 @param[in] suppress_warnings TRUE when a normal net read timeout
4499 has caused to reconnecting.
4500 @param[in] messages Messages to print/log, see
4501 reconnect_messages[] array.
4502
4503 @retval 0 OK.
4504 @retval 1 There was an error.
4505*/
4506
4507static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
4508 uint *retry_count, bool suppress_warnings,
4509 const char *messages[SLAVE_RECON_MSG_MAX])
4510{
4511 mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
4512 thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
4513#ifdef SIGNAL_WITH_VIO_CLOSE
4514 thd->clear_active_vio();
4515#endif
4516 end_server(mysql);
4517 if ((*retry_count)++)
4518 {
4519 if (*retry_count > master_retry_count)
4520 return 1; // Don't retry forever
4521 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
4522 }
4523 if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
4524 return 1;
4525 thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
4526 if (!suppress_warnings)
4527 {
4528 char buf[256];
4529 StringBuffer<100> tmp;
4530 if (mi->using_gtid != Master_info::USE_GTID_NO)
4531 {
4532 tmp.append(STRING_WITH_LEN("; GTID position '"));
4533 mi->gtid_current_pos.append_to_string(&tmp);
4534 if (mi->events_queued_since_last_gtid == 0)
4535 tmp.append(STRING_WITH_LEN("'"));
4536 else
4537 {
4538 tmp.append(STRING_WITH_LEN("', GTID event skip "));
4539 tmp.append_ulonglong((ulonglong)mi->events_queued_since_last_gtid);
4540 }
4541 }
4542 my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
4543 IO_RPL_LOG_NAME, mi->master_log_pos,
4544 tmp.c_ptr_safe());
4545 /*
4546 Raise a warining during registering on master/requesting dump.
4547 Log a message reading event.
4548 */
4549 if (messages[SLAVE_RECON_MSG_COMMAND][0])
4550 {
4551 mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
4552 ER_THD(thd, ER_SLAVE_MASTER_COM_FAILURE),
4553 messages[SLAVE_RECON_MSG_COMMAND], buf);
4554 }
4555 else
4556 {
4557 sql_print_information("%s", buf);
4558 }
4559 }
4560 if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(mi))
4561 {
4562 if (global_system_variables.log_warnings)
4563 sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
4564 return 1;
4565 }
4566 return 0;
4567}
4568
4569
4570/**
4571 Slave IO thread entry point.
4572
4573 @param arg Pointer to Master_info struct that holds information for
4574 the IO thread.
4575
4576 @return Always 0.
4577*/
4578pthread_handler_t handle_slave_io(void *arg)
4579{
4580 THD *thd; // needs to be first for thread_stack
4581 MYSQL *mysql;
4582 Master_info *mi = (Master_info*)arg;
4583 Relay_log_info *rli= &mi->rli;
4584 uint retry_count;
4585 bool suppress_warnings;
4586 int ret;
4587 rpl_io_thread_info io_info;
4588#ifndef DBUG_OFF
4589 mi->dbug_do_disconnect= false;
4590#endif
4591 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
4592 my_thread_init();
4593 DBUG_ENTER("handle_slave_io");
4594
4595 DBUG_ASSERT(mi->inited);
4596 mysql= NULL ;
4597 retry_count= 0;
4598
4599 thd= new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
4600
4601 mysql_mutex_lock(&mi->run_lock);
4602 /* Inform waiting threads that slave has started */
4603 mi->slave_run_id++;
4604
4605#ifndef DBUG_OFF
4606 mi->events_till_disconnect = disconnect_slave_event_count;
4607#endif
4608
4609 THD_CHECK_SENTRY(thd);
4610 mi->io_thd = thd;
4611
4612 pthread_detach_this_thread();
4613 thd->thread_stack= (char*) &thd; // remember where our stack is
4614 mi->clear_error();
4615 if (init_slave_thread(thd, mi, SLAVE_THD_IO))
4616 {
4617 mysql_cond_broadcast(&mi->start_cond);
4618 sql_print_error("Failed during slave I/O thread initialization");
4619 goto err_during_init;
4620 }
4621 thd->system_thread_info.rpl_io_info= &io_info;
4622 add_to_active_threads(thd);
4623 mi->slave_running = MYSQL_SLAVE_RUN_NOT_CONNECT;
4624 mi->abort_slave = 0;
4625 mysql_mutex_unlock(&mi->run_lock);
4626 mysql_cond_broadcast(&mi->start_cond);
4627 mi->rows_event_tracker.reset();
4628
4629 DBUG_PRINT("master_info",("log_file_name: '%s' position: %llu",
4630 mi->master_log_name, mi->master_log_pos));
4631
4632 /* This must be called before run any binlog_relay_io hooks */
4633 my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
4634
4635 /* Load the set of seen GTIDs, if we did not already. */
4636 if (rpl_load_gtid_slave_state(thd))
4637 {
4638 mi->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
4639 "Unable to load replication GTID slave state from mysql.%s: %s",
4640 rpl_gtid_slave_state_table_name.str,
4641 thd->get_stmt_da()->message());
4642 /*
4643 If we are using old-style replication, we can continue, even though we
4644 then will not be able to record the GTIDs we receive. But if using GTID,
4645 we must give up.
4646 */
4647 if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
4648 goto err;
4649 }
4650
4651
4652 if (DBUG_EVALUATE_IF("failed_slave_start", 1, 0)
4653 || repl_semisync_slave.slave_start(mi))
4654 {
4655 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4656 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4657 "Failed to run 'thread_start' hook");
4658 goto err;
4659 }
4660
4661 if (!(mi->mysql = mysql = mysql_init(NULL)))
4662 {
4663 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4664 ER_THD(thd, ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
4665 goto err;
4666 }
4667
4668 THD_STAGE_INFO(thd, stage_connecting_to_master);
4669 // we can get killed during safe_connect
4670 if (!safe_connect(thd, mysql, mi))
4671 {
4672 if (mi->using_gtid == Master_info::USE_GTID_NO)
4673 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4674 "replication started in log '%s' at position %llu",
4675 mi->user, mi->host, mi->port,
4676 IO_RPL_LOG_NAME, mi->master_log_pos);
4677 else
4678 {
4679 StringBuffer<100> tmp;
4680 mi->gtid_current_pos.to_string(&tmp);
4681 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4682 "replication starts at GTID position '%s'",
4683 mi->user, mi->host, mi->port, tmp.c_ptr_safe());
4684 }
4685 }
4686 else
4687 {
4688 sql_print_information("Slave I/O thread killed while connecting to master");
4689 goto err;
4690 }
4691
4692connected:
4693
4694 if (mi->using_gtid != Master_info::USE_GTID_NO)
4695 {
4696 /*
4697 When the IO thread (re)connects to the master using GTID, it will
4698 connect at the start of an event group. But the IO thread may have
4699 previously logged part of the following event group to the relay
4700 log.
4701
4702 When the IO and SQL thread are started together, we erase any previous
4703 relay logs, but this is not possible/desirable while the SQL thread is
4704 running. To avoid duplicating partial event groups in the relay logs in
4705 this case, we remember the count of events in any partially logged event
4706 group before the reconnect, and then here at connect we set up a counter
4707 to skip the already-logged part of the group.
4708 */
4709 mi->gtid_reconnect_event_skip_count= mi->events_queued_since_last_gtid;
4710 mi->gtid_event_seen= false;
4711 /*
4712 Reset stale state of the rows-event group tracker at reconnect.
4713 */
4714 mi->rows_event_tracker.reset();
4715 }
4716
4717#ifdef ENABLED_DEBUG_SYNC
4718 DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
4719 {
4720 const char act[]=
4721 "now "
4722 "wait_for signal.io_thread_let_running";
4723 DBUG_ASSERT(debug_sync_service);
4724 DBUG_ASSERT(!debug_sync_set_action(thd,
4725 STRING_WITH_LEN(act)));
4726 };);
4727#endif
4728
4729 mysql_mutex_lock(&mi->run_lock);
4730 mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
4731 mysql_mutex_unlock(&mi->run_lock);
4732
4733 thd->slave_net = &mysql->net;
4734 THD_STAGE_INFO(thd, stage_checking_master_version);
4735 ret= get_master_version_and_clock(mysql, mi);
4736 if (ret == 1)
4737 /* Fatal error */
4738 goto err;
4739
4740 if (ret == 2)
4741 {
4742 if (check_io_slave_killed(mi, "Slave I/O thread killed "
4743 "while calling get_master_version_and_clock(...)"))
4744 goto err;
4745 suppress_warnings= FALSE;
4746 /*
4747 Try to reconnect because the error was caused by a transient network
4748 problem
4749 */
4750 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4751 reconnect_messages[SLAVE_RECON_ACT_REG]))
4752 goto err;
4753 goto connected;
4754 }
4755
4756 if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
4757 {
4758 /*
4759 Register ourselves with the master.
4760 */
4761 THD_STAGE_INFO(thd, stage_registering_slave_on_master);
4762 if (register_slave_on_master(mysql, mi, &suppress_warnings))
4763 {
4764 if (!check_io_slave_killed(mi, "Slave I/O thread killed "
4765 "while registering slave on master"))
4766 {
4767 sql_print_error("Slave I/O thread couldn't register on master");
4768 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4769 reconnect_messages[SLAVE_RECON_ACT_REG]))
4770 goto err;
4771 }
4772 else
4773 goto err;
4774 goto connected;
4775 }
4776 }
4777
4778 DBUG_PRINT("info",("Starting reading binary log from master"));
4779 thd->set_command(COM_SLAVE_IO);
4780 while (!io_slave_killed(mi))
4781 {
4782 THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
4783 if (request_dump(thd, mysql, mi, &suppress_warnings))
4784 {
4785 sql_print_error("Failed on request_dump()");
4786 if (check_io_slave_killed(mi, NullS) ||
4787 try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4788 reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4789 goto err;
4790 goto connected;
4791 }
4792
4793 const char *event_buf;
4794
4795 mi->slave_running= MYSQL_SLAVE_RUN_READING;
4796 DBUG_ASSERT(mi->last_error().number == 0);
4797 ulonglong lastchecktime = my_hrtime().val;
4798 ulonglong tokenamount = opt_read_binlog_speed_limit*1024;
4799 while (!io_slave_killed(mi))
4800 {
4801 ulong event_len, network_read_len = 0;
4802 /*
4803 We say "waiting" because read_event() will wait if there's nothing to
4804 read. But if there's something to read, it will not wait. The
4805 important thing is to not confuse users by saying "reading" whereas
4806 we're in fact receiving nothing.
4807 */
4808 THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
4809 event_len= read_event(mysql, mi, &suppress_warnings, &network_read_len);
4810 if (check_io_slave_killed(mi, NullS))
4811 goto err;
4812
4813 if (unlikely(event_len == packet_error))
4814 {
4815 uint mysql_error_number= mysql_errno(mysql);
4816 switch (mysql_error_number) {
4817 case CR_NET_PACKET_TOO_LARGE:
4818 sql_print_error("\
4819Log entry on master is longer than slave_max_allowed_packet (%lu) on \
4820slave. If the entry is correct, restart the server with a higher value of \
4821slave_max_allowed_packet",
4822 slave_max_allowed_packet);
4823 mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE, NULL,
4824 "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
4825 goto err;
4826 case ER_MASTER_FATAL_ERROR_READING_BINLOG:
4827 mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG, NULL,
4828 ER_THD(thd, ER_MASTER_FATAL_ERROR_READING_BINLOG),
4829 mysql_error_number, mysql_error(mysql));
4830 goto err;
4831 case ER_OUT_OF_RESOURCES:
4832 sql_print_error("\
4833Stopping slave I/O thread due to out-of-memory error from master");
4834 mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES, NULL,
4835 "%s", ER_THD(thd, ER_OUT_OF_RESOURCES));
4836 goto err;
4837 }
4838 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4839 reconnect_messages[SLAVE_RECON_ACT_EVENT]))
4840 goto err;
4841 goto connected;
4842 } // if (event_len == packet_error)
4843
4844 retry_count=0; // ok event, reset retry counter
4845 THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
4846 event_buf= (const char*)mysql->net.read_pos + 1;
4847 mi->semi_ack= 0;
4848 if (repl_semisync_slave.
4849 slave_read_sync_header((const char*)mysql->net.read_pos + 1, event_len,
4850 &(mi->semi_ack), &event_buf, &event_len))
4851 {
4852 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4853 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4854 "Failed to run 'after_read_event' hook");
4855 goto err;
4856 }
4857
4858 /* Control the binlog read speed of master
4859 when read_binlog_speed_limit is non-zero
4860 */
4861 ulonglong speed_limit_in_bytes = opt_read_binlog_speed_limit * 1024;
4862 if (speed_limit_in_bytes)
4863 {
4864 /* Prevent the tokenamount become a large value,
4865 for example, the IO thread doesn't work for a long time
4866 */
4867 if (tokenamount > speed_limit_in_bytes * 2)
4868 {
4869 lastchecktime = my_hrtime().val;
4870 tokenamount = speed_limit_in_bytes * 2;
4871 }
4872
4873 do
4874 {
4875 ulonglong currenttime = my_hrtime().val;
4876 tokenamount += (currenttime - lastchecktime) * speed_limit_in_bytes / (1000*1000);
4877 lastchecktime = currenttime;
4878 if(tokenamount < network_read_len)
4879 {
4880 ulonglong duration =1000ULL*1000 * (network_read_len - tokenamount) / speed_limit_in_bytes;
4881 time_t second_time = (time_t)(duration / (1000 * 1000));
4882 uint micro_time = duration % (1000 * 1000);
4883
4884 // at least sleep 1000 micro second
4885 my_sleep(MY_MAX(micro_time,1000));
4886
4887 /*
4888 If it sleep more than one second,
4889 it should use slave_sleep() to avoid the STOP SLAVE hang.
4890 */
4891 if (second_time)
4892 slave_sleep(thd, second_time, io_slave_killed, mi);
4893
4894 }
4895 }while(tokenamount < network_read_len);
4896 tokenamount -= network_read_len;
4897 }
4898
4899 if (queue_event(mi, event_buf, event_len))
4900 {
4901 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
4902 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
4903 "could not queue event from master");
4904 goto err;
4905 }
4906
4907 if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK) &&
4908 repl_semisync_slave.slave_reply(mi))
4909 {
4910 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4911 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4912 "Failed to run 'after_queue_event' hook");
4913 goto err;
4914 }
4915
4916 if (mi->using_gtid == Master_info::USE_GTID_NO &&
4917 /*
4918 If rpl_semi_sync_slave_delay_master is enabled, we will flush
4919 master info only when ack is needed. This may lead to at least one
4920 group transaction delay but affords better performance improvement.
4921 */
4922 (!repl_semisync_slave.get_slave_enabled() ||
4923 (!(mi->semi_ack & SEMI_SYNC_SLAVE_DELAY_SYNC) ||
4924 (mi->semi_ack & (SEMI_SYNC_NEED_ACK)))) &&
4925 (DBUG_EVALUATE_IF("failed_flush_master_info", 1, 0) ||
4926 flush_master_info(mi, TRUE, TRUE)))
4927 {
4928 sql_print_error("Failed to flush master info file");
4929 goto err;
4930 }
4931 /*
4932 See if the relay logs take too much space.
4933 We don't lock mi->rli.log_space_lock here; this dirty read saves time
4934 and does not introduce any problem:
4935 - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so
4936 the clean value is 0), then we are reading only one more event as we
4937 should, and we'll block only at the next event. No big deal.
4938 - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just
4939 after (so the clean value is 1), then we are going into
4940 wait_for_relay_log_space() for no reason, but this function
4941 will do a clean read, notice the clean value and exit
4942 immediately.
4943 */
4944#ifndef DBUG_OFF
4945 {
4946 DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
4947 "ignore_log_space_limit=%d",
4948 rli->log_space_limit, rli->log_space_total,
4949 (int) rli->ignore_log_space_limit));
4950 }
4951#endif
4952
4953 if (rli->log_space_limit && rli->log_space_limit <
4954 rli->log_space_total &&
4955 !rli->ignore_log_space_limit)
4956 if (wait_for_relay_log_space(rli))
4957 {
4958 sql_print_error("Slave I/O thread aborted while waiting for relay \
4959log space");
4960 goto err;
4961 }
4962 }
4963 }
4964
4965 // error = 0;
4966err:
4967 // print the current replication position
4968 if (mi->using_gtid == Master_info::USE_GTID_NO)
4969 sql_print_information("Slave I/O thread exiting, read up to log '%s', "
4970 "position %llu", IO_RPL_LOG_NAME, mi->master_log_pos);
4971 else
4972 {
4973 StringBuffer<100> tmp;
4974 mi->gtid_current_pos.to_string(&tmp);
4975 sql_print_information("Slave I/O thread exiting, read up to log '%s', "
4976 "position %llu; GTID position %s",
4977 IO_RPL_LOG_NAME, mi->master_log_pos,
4978 tmp.c_ptr_safe());
4979 }
4980 repl_semisync_slave.slave_stop(mi);
4981 thd->reset_query();
4982 thd->reset_db(&null_clex_str);
4983 if (mysql)
4984 {
4985 /*
4986 Here we need to clear the active VIO before closing the
4987 connection with the master. The reason is that THD::awake()
4988 might be called from terminate_slave_thread() because somebody
4989 issued a STOP SLAVE. If that happends, the close_active_vio()
4990 can be called in the middle of closing the VIO associated with
4991 the 'mysql' object, causing a crash.
4992 */
4993#ifdef SIGNAL_WITH_VIO_CLOSE
4994 thd->clear_active_vio();
4995#endif
4996 mysql_close(mysql);
4997 mi->mysql=0;
4998 }
4999 write_ignored_events_info_to_relay_log(thd, mi);
5000 if (mi->using_gtid != Master_info::USE_GTID_NO)
5001 flush_master_info(mi, TRUE, TRUE);
5002 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5003 thd->add_status_to_global();
5004 unlink_not_visible_thd(thd);
5005 mysql_mutex_lock(&mi->run_lock);
5006
5007err_during_init:
5008 /* Forget the relay log's format */
5009 delete mi->rli.relay_log.description_event_for_queue;
5010 mi->rli.relay_log.description_event_for_queue= 0;
5011 // TODO: make rpl_status part of Master_info
5012 change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
5013
5014 thd->assert_not_linked();
5015 delete thd;
5016 thread_safe_decrement32(&service_thread_count);
5017 signal_thd_deleted();
5018
5019 mi->abort_slave= 0;
5020 mi->slave_running= MYSQL_SLAVE_NOT_RUN;
5021 mi->io_thd= 0;
5022 /*
5023 Note: the order of the two following calls (first broadcast, then unlock)
5024 is important. Otherwise a killer_thread can execute between the calls and
5025 delete the mi structure leading to a crash! (see BUG#25306 for details)
5026 */
5027 mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done
5028 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5029 mysql_mutex_unlock(&mi->run_lock);
5030
5031 DBUG_LEAVE; // Must match DBUG_ENTER()
5032 my_thread_end();
5033 ERR_remove_state(0);
5034 pthread_exit(0);
5035 return 0; // Avoid compiler warnings
5036}
5037
5038/*
5039 Check the temporary directory used by commands like
5040 LOAD DATA INFILE.
5041
5042 As the directory never changes during a mysqld run, we only
5043 test this once and cache the result. This also resolve a race condition
5044 when this can be run by multiple threads at the same time.
5045 */
5046
5047static bool check_temp_dir_run= 0;
5048static int check_temp_dir_result= 0;
5049
5050static
5051int check_temp_dir(char* tmp_file)
5052{
5053 File fd;
5054 int result= 1; // Assume failure
5055 MY_DIR *dirp;
5056 char tmp_dir[FN_REFLEN];
5057 size_t tmp_dir_size;
5058 DBUG_ENTER("check_temp_dir");
5059
5060 /* This look is safe to use as this function is only called once */
5061 mysql_mutex_lock(&LOCK_start_thread);
5062 if (check_temp_dir_run)
5063 {
5064 if ((result= check_temp_dir_result))
5065 my_message(result, tmp_file, MYF(0));
5066 goto end;
5067 }
5068 check_temp_dir_run= 1;
5069
5070 /*
5071 Get the directory from the temporary file.
5072 */
5073 dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
5074
5075 /*
5076 Check if the directory exists.
5077 */
5078 if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
5079 goto end;
5080 my_dirend(dirp);
5081
5082 /*
5083 Check permissions to create a file. We use O_TRUNC to ensure that
5084 things works even if we happen to have and old file laying around.
5085 */
5086 if ((fd= mysql_file_create(key_file_misc,
5087 tmp_file, CREATE_MODE,
5088 O_WRONLY | O_BINARY | O_TRUNC | O_NOFOLLOW,
5089 MYF(MY_WME))) < 0)
5090 goto end;
5091
5092 result= 0; // Directory name ok
5093 /*
5094 Clean up.
5095 */
5096 mysql_file_close(fd, MYF(0));
5097 mysql_file_delete(key_file_misc, tmp_file, MYF(0));
5098
5099end:
5100 mysql_mutex_unlock(&LOCK_start_thread);
5101 DBUG_RETURN(result);
5102}
5103
5104
5105void
5106slave_output_error_info(rpl_group_info *rgi, THD *thd)
5107{
5108 /*
5109 retrieve as much info as possible from the thd and, error
5110 codes and warnings and print this to the error log as to
5111 allow the user to locate the error
5112 */
5113 Relay_log_info *rli= rgi->rli;
5114 uint32 const last_errno= rli->last_error().number;
5115
5116 if (unlikely(thd->is_error()))
5117 {
5118 char const *const errmsg= thd->get_stmt_da()->message();
5119
5120 DBUG_PRINT("info",
5121 ("thd->get_stmt_da()->sql_errno()=%d; rli->last_error.number=%d",
5122 thd->get_stmt_da()->sql_errno(), last_errno));
5123 if (last_errno == 0)
5124 {
5125 /*
5126 This function is reporting an error which was not reported
5127 while executing exec_relay_log_event().
5128 */
5129 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
5130 rgi->gtid_info(), "%s", errmsg);
5131 }
5132 else if (last_errno != thd->get_stmt_da()->sql_errno())
5133 {
5134 /*
5135 * An error was reported while executing exec_relay_log_event()
5136 * however the error code differs from what is in the thread.
5137 * This function prints out more information to help finding
5138 * what caused the problem.
5139 */
5140 sql_print_error("Slave (additional info): %s Error_code: %d",
5141 errmsg, thd->get_stmt_da()->sql_errno());
5142 }
5143 }
5144
5145 /* Print any warnings issued */
5146 Diagnostics_area::Sql_condition_iterator it=
5147 thd->get_stmt_da()->sql_conditions();
5148 const Sql_condition *err;
5149 /*
5150 Added controlled slave thread cancel for replication
5151 of user-defined variables.
5152 */
5153 bool udf_error = false;
5154 while ((err= it++))
5155 {
5156 if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
5157 udf_error = true;
5158 sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
5159 }
5160 if (unlikely(udf_error))
5161 {
5162 StringBuffer<100> tmp;
5163 if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5164 {
5165 tmp.append(STRING_WITH_LEN("; GTID position '"));
5166 rpl_append_gtid_state(&tmp, false);
5167 tmp.append(STRING_WITH_LEN("'"));
5168 }
5169 sql_print_error("Error loading user-defined library, slave SQL "
5170 "thread aborted. Install the missing library, and restart the "
5171 "slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
5172 "position %llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
5173 tmp.c_ptr_safe());
5174 }
5175 else
5176 {
5177 StringBuffer<100> tmp;
5178 if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5179 {
5180 tmp.append(STRING_WITH_LEN("; GTID position '"));
5181 rpl_append_gtid_state(&tmp, false);
5182 tmp.append(STRING_WITH_LEN("'"));
5183 }
5184 sql_print_error("Error running query, slave SQL thread aborted. "
5185 "Fix the problem, and restart the slave SQL thread "
5186 "with \"SLAVE START\". We stopped at log '%s' position "
5187 "%llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
5188 tmp.c_ptr_safe());
5189 }
5190}
5191
5192
5193/**
5194 Slave SQL thread entry point.
5195
5196 @param arg Pointer to Relay_log_info object that holds information
5197 for the SQL thread.
5198
5199 @return Always 0.
5200*/
5201pthread_handler_t handle_slave_sql(void *arg)
5202{
5203 THD *thd; /* needs to be first for thread_stack */
5204 char saved_log_name[FN_REFLEN];
5205 char saved_master_log_name[FN_REFLEN];
5206 my_off_t UNINIT_VAR(saved_log_pos);
5207 my_off_t UNINIT_VAR(saved_master_log_pos);
5208 String saved_skip_gtid_pos;
5209 my_off_t saved_skip= 0;
5210 Master_info *mi= ((Master_info*)arg);
5211 Relay_log_info* rli = &mi->rli;
5212 my_bool wsrep_node_dropped __attribute__((unused)) = FALSE;
5213 const char *errmsg;
5214 rpl_group_info *serial_rgi;
5215 rpl_sql_thread_info sql_info(mi->rpl_filter);
5216
5217 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
5218 my_thread_init();
5219 DBUG_ENTER("handle_slave_sql");
5220
5221#ifdef WITH_WSREP
5222 wsrep_restart_point:
5223#endif
5224
5225 serial_rgi= new rpl_group_info(rli);
5226 thd = new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
5227 thd->thread_stack = (char*)&thd; // remember where our stack is
5228 thd->system_thread_info.rpl_sql_info= &sql_info;
5229
5230 DBUG_ASSERT(rli->inited);
5231 DBUG_ASSERT(rli->mi == mi);
5232 mysql_mutex_lock(&rli->run_lock);
5233 DBUG_ASSERT(!rli->slave_running);
5234 errmsg= 0;
5235#ifndef DBUG_OFF
5236 rli->events_till_abort = abort_slave_event_count;
5237#endif
5238
5239 /*
5240 THD for the sql driver thd. In parallel replication this is the thread
5241 that reads things from the relay log and calls rpl_parallel::do_event()
5242 to execute queries.
5243
5244 In single thread replication this is the THD for the thread that is
5245 executing SQL queries too.
5246 */
5247 serial_rgi->thd= rli->sql_driver_thd= thd;
5248
5249 /* Inform waiting threads that slave has started */
5250 rli->slave_run_id++;
5251 rli->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
5252
5253 pthread_detach_this_thread();
5254
5255 if (opt_slave_parallel_threads > 0 &&
5256 rpl_parallel_activate_pool(&global_rpl_thread_pool))
5257 {
5258 mysql_cond_broadcast(&rli->start_cond);
5259 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5260 "Failed during parallel slave pool activation");
5261 goto err_during_init;
5262 }
5263
5264 if (init_slave_thread(thd, mi, SLAVE_THD_SQL))
5265 {
5266 /*
5267 TODO: this is currently broken - slave start and change master
5268 will be stuck if we fail here
5269 */
5270 mysql_cond_broadcast(&rli->start_cond);
5271 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5272 "Failed during slave thread initialization");
5273 goto err_during_init;
5274 }
5275 thd->init_for_queries();
5276 thd->rgi_slave= serial_rgi;
5277 if ((serial_rgi->deferred_events_collecting= mi->rpl_filter->is_on()))
5278 {
5279 serial_rgi->deferred_events= new Deferred_log_events(rli);
5280 }
5281
5282 /*
5283 binlog_annotate_row_events must be TRUE only after an Annotate_rows event
5284 has been received and only till the last corresponding rbr event has been
5285 applied. In all other cases it must be FALSE.
5286 */
5287 thd->variables.binlog_annotate_row_events= 0;
5288
5289 /* Ensure that slave can exeute any alter table it gets from master */
5290 thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT;
5291
5292 add_to_active_threads(thd);
5293 /*
5294 We are going to set slave_running to 1. Assuming slave I/O thread is
5295 alive and connected, this is going to make Seconds_Behind_Master be 0
5296 i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
5297 the moment we start we can think we are caught up, and the next second we
5298 start receiving data so we realize we are not caught up and
5299 Seconds_Behind_Master grows. No big deal.
5300 */
5301 rli->abort_slave = 0;
5302 rli->stop_for_until= false;
5303 mysql_mutex_unlock(&rli->run_lock);
5304 mysql_cond_broadcast(&rli->start_cond);
5305
5306 /*
5307 Reset errors for a clean start (otherwise, if the master is idle, the SQL
5308 thread may execute no Query_log_event, so the error will remain even
5309 though there's no problem anymore). Do not reset the master timestamp
5310 (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
5311 as we are not sure that we are going to receive a query, we want to
5312 remember the last master timestamp (to say how many seconds behind we are
5313 now.
5314 But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
5315 */
5316 rli->clear_error();
5317 rli->parallel.reset();
5318
5319 //tell the I/O thread to take relay_log_space_limit into account from now on
5320 rli->ignore_log_space_limit= 0;
5321
5322 serial_rgi->gtid_sub_id= 0;
5323 serial_rgi->gtid_pending= false;
5324 if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel() &&
5325 rli->restart_gtid_pos.count() > 0)
5326 {
5327 /*
5328 With parallel replication in GTID mode, if we have a multi-domain GTID
5329 position, we need to start some way back in the relay log and skip any
5330 GTID that was already applied before. Since event groups can be split
5331 across multiple relay logs, this earlier starting point may be in the
5332 middle of an already applied event group, so we also need to skip any
5333 remaining part of such group.
5334 */
5335 rli->gtid_skip_flag = GTID_SKIP_TRANSACTION;
5336 }
5337 else
5338 rli->gtid_skip_flag = GTID_SKIP_NOT;
5339 if (init_relay_log_pos(rli,
5340 rli->group_relay_log_name,
5341 rli->group_relay_log_pos,
5342 1 /*need data lock*/, &errmsg,
5343 1 /*look for a description_event*/))
5344 {
5345 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5346 "Error initializing relay log position: %s", errmsg);
5347 goto err_before_start;
5348 }
5349 rli->reset_inuse_relaylog();
5350 if (rli->alloc_inuse_relaylog(rli->group_relay_log_name))
5351 goto err_before_start;
5352
5353 strcpy(rli->future_event_master_log_name, rli->group_master_log_name);
5354 THD_CHECK_SENTRY(thd);
5355#ifndef DBUG_OFF
5356 {
5357 DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%llu "
5358 "rli->event_relay_log_pos=%llu",
5359 my_b_tell(rli->cur_log), rli->event_relay_log_pos));
5360 DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
5361 /*
5362 Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
5363 correct position when it's called just after my_b_seek() (the questionable
5364 stuff is those "seek is done on next read" comments in the my_b_seek()
5365 source code).
5366 The crude reality is that this assertion randomly fails whereas
5367 replication seems to work fine. And there is no easy explanation why it
5368 fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
5369 init_relay_log_pos() called above). Maybe the assertion would be
5370 meaningful if we held rli->data_lock between the my_b_seek() and the
5371 DBUG_ASSERT().
5372 */
5373#ifdef SHOULD_BE_CHECKED
5374 DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
5375#endif
5376 }
5377#endif
5378
5379#ifdef WITH_WSREP
5380 thd->wsrep_exec_mode= LOCAL_STATE;
5381 /* synchronize with wsrep replication */
5382 if (WSREP_ON)
5383 wsrep_ready_wait();
5384#endif
5385 DBUG_PRINT("master_info",("log_file_name: %s position: %llu",
5386 rli->group_master_log_name,
5387 rli->group_master_log_pos));
5388 if (global_system_variables.log_warnings)
5389 {
5390 StringBuffer<100> tmp;
5391 if (mi->using_gtid != Master_info::USE_GTID_NO)
5392 {
5393 tmp.append(STRING_WITH_LEN("; GTID position '"));
5394 rpl_append_gtid_state(&tmp,
5395 mi->using_gtid==Master_info::USE_GTID_CURRENT_POS);
5396 tmp.append(STRING_WITH_LEN("'"));
5397 }
5398 sql_print_information("Slave SQL thread initialized, starting replication "
5399 "in log '%s' at position %llu, relay log '%s' "
5400 "position: %llu%s", RPL_LOG_NAME,
5401 rli->group_master_log_pos, rli->group_relay_log_name,
5402 rli->group_relay_log_pos, tmp.c_ptr_safe());
5403 }
5404
5405 if (check_temp_dir(rli->slave_patternload_file))
5406 {
5407 check_temp_dir_result= thd->get_stmt_da()->sql_errno();
5408 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5409 "Unable to use slave's temporary directory %s - %s",
5410 slave_load_tmpdir, thd->get_stmt_da()->message());
5411 goto err;
5412 }
5413 else
5414 check_temp_dir_result= 0;
5415
5416 /* Load the set of seen GTIDs, if we did not already. */
5417 if (rpl_load_gtid_slave_state(thd))
5418 {
5419 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5420 "Unable to load replication GTID slave state from mysql.%s: %s",
5421 rpl_gtid_slave_state_table_name.str,
5422 thd->get_stmt_da()->message());
5423 /*
5424 If we are using old-style replication, we can continue, even though we
5425 then will not be able to record the GTIDs we receive. But if using GTID,
5426 we must give up.
5427 */
5428 if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
5429 goto err;
5430 }
5431 /* Re-load the set of mysql.gtid_slave_posXXX tables available. */
5432 if (find_gtid_slave_pos_tables(thd))
5433 {
5434 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5435 "Error processing replication GTID position tables: %s",
5436 thd->get_stmt_da()->message());
5437 goto err;
5438 }
5439
5440 /* execute init_slave variable */
5441 if (opt_init_slave.length)
5442 {
5443 execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
5444 if (unlikely(thd->is_slave_error))
5445 {
5446 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5447 "Slave SQL thread aborted. Can't execute init_slave query");
5448 goto err;
5449 }
5450 }
5451
5452 /*
5453 First check until condition - probably there is nothing to execute. We
5454 do not want to wait for next event in this case.
5455 */
5456 mysql_mutex_lock(&rli->data_lock);
5457 if (rli->slave_skip_counter)
5458 {
5459 strmake_buf(saved_log_name, rli->group_relay_log_name);
5460 strmake_buf(saved_master_log_name, rli->group_master_log_name);
5461 saved_log_pos= rli->group_relay_log_pos;
5462 saved_master_log_pos= rli->group_master_log_pos;
5463 if (mi->using_gtid != Master_info::USE_GTID_NO)
5464 {
5465 saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '"));
5466 rpl_append_gtid_state(&saved_skip_gtid_pos, false);
5467 saved_skip_gtid_pos.append(STRING_WITH_LEN("'; "));
5468 }
5469 saved_skip= rli->slave_skip_counter;
5470 }
5471 if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
5472 rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
5473 rli->is_until_satisfied(rli->group_master_log_pos))
5474 {
5475 sql_print_information("Slave SQL thread stopped because it reached its"
5476 " UNTIL position %llu", rli->until_pos());
5477 mysql_mutex_unlock(&rli->data_lock);
5478 goto err;
5479 }
5480 mysql_mutex_unlock(&rli->data_lock);
5481
5482 /* Read queries from the IO/THREAD until this thread is killed */
5483
5484 thd->set_command(COM_SLAVE_SQL);
5485 while (!sql_slave_killed(serial_rgi))
5486 {
5487 THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
5488 THD_CHECK_SENTRY(thd);
5489
5490 if (saved_skip && rli->slave_skip_counter == 0)
5491 {
5492 StringBuffer<100> tmp;
5493 if (mi->using_gtid != Master_info::USE_GTID_NO)
5494 {
5495 tmp.append(STRING_WITH_LEN(", GTID '"));
5496 rpl_append_gtid_state(&tmp, false);
5497 tmp.append(STRING_WITH_LEN("'; "));
5498 }
5499
5500 sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
5501 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5502 "master_log_pos='%ld'%s and new position at "
5503 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5504 "master_log_pos='%ld'%s ",
5505 (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
5506 saved_master_log_name, (ulong) saved_master_log_pos,
5507 saved_skip_gtid_pos.c_ptr_safe(),
5508 rli->group_relay_log_name, (ulong) rli->group_relay_log_pos,
5509 rli->group_master_log_name, (ulong) rli->group_master_log_pos,
5510 tmp.c_ptr_safe());
5511 saved_skip= 0;
5512 saved_skip_gtid_pos.free();
5513 }
5514
5515 if (exec_relay_log_event(thd, rli, serial_rgi))
5516 {
5517#ifdef WITH_WSREP
5518 if (thd->wsrep_conflict_state != NO_CONFLICT)
5519 {
5520 wsrep_node_dropped= TRUE;
5521 rli->abort_slave= TRUE;
5522 }
5523#endif /* WITH_WSREP */
5524
5525 DBUG_PRINT("info", ("exec_relay_log_event() failed"));
5526 // do not scare the user if SQL thread was simply killed or stopped
5527 if (!sql_slave_killed(serial_rgi))
5528 {
5529 slave_output_error_info(serial_rgi, thd);
5530 if (WSREP_ON && rli->last_error().number == ER_UNKNOWN_COM_ERROR)
5531 wsrep_node_dropped= TRUE;
5532 }
5533 goto err;
5534 }
5535 }
5536
5537 err:
5538 if (mi->using_parallel())
5539 rli->parallel.wait_for_done(thd, rli);
5540
5541 /* Thread stopped. Print the current replication position to the log */
5542 {
5543 StringBuffer<100> tmp;
5544 if (mi->using_gtid != Master_info::USE_GTID_NO)
5545 {
5546 tmp.append(STRING_WITH_LEN("; GTID position '"));
5547 rpl_append_gtid_state(&tmp, false);
5548 tmp.append(STRING_WITH_LEN("'"));
5549 }
5550 sql_print_information("Slave SQL thread exiting, replication stopped in "
5551 "log '%s' at position %llu%s", RPL_LOG_NAME,
5552 rli->group_master_log_pos, tmp.c_ptr_safe());
5553 }
5554
5555 err_before_start:
5556
5557 /*
5558 Some events set some playgrounds, which won't be cleared because thread
5559 stops. Stopping of this thread may not be known to these events ("stop"
5560 request is detected only by the present function, not by events), so we
5561 must "proactively" clear playgrounds:
5562 */
5563 thd->clear_error();
5564 serial_rgi->cleanup_context(thd, 1);
5565 /*
5566 Some extra safety, which should not been needed (normally, event deletion
5567 should already have done these assignments (each event which sets these
5568 variables is supposed to set them to 0 before terminating)).
5569 */
5570 thd->catalog= 0;
5571 thd->reset_query();
5572 thd->reset_db(&null_clex_str);
5573 if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5574 {
5575 ulong domain_count;
5576 my_bool save_log_all_errors= thd->log_all_errors;
5577
5578 /*
5579 We don't need to check return value for rli->flush()
5580 as any errors should be logged to stderr
5581 */
5582 thd->log_all_errors= 1;
5583 rli->flush();
5584 thd->log_all_errors= save_log_all_errors;
5585 if (mi->using_parallel())
5586 {
5587 /*
5588 In parallel replication GTID mode, we may stop with different domains
5589 at different positions in the relay log.
5590
5591 To handle this when we restart the SQL thread, mark the current
5592 per-domain position in the Relay_log_info.
5593 */
5594 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
5595 domain_count= rpl_global_gtid_slave_state->count();
5596 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
5597 if (domain_count > 1)
5598 {
5599 inuse_relaylog *ir;
5600
5601 /*
5602 Load the starting GTID position, so that we can skip already applied
5603 GTIDs when we restart the SQL thread. And set the start position in
5604 the relay log back to a known safe place to start (prior to any not
5605 yet applied transaction in any domain).
5606 */
5607 rli->restart_gtid_pos.load(rpl_global_gtid_slave_state, NULL, 0);
5608 if ((ir= rli->inuse_relaylog_list))
5609 {
5610 rpl_gtid *gtid= ir->relay_log_state;
5611 uint32 count= ir->relay_log_state_count;
5612 while (count > 0)
5613 {
5614 process_gtid_for_restart_pos(rli, gtid);
5615 ++gtid;
5616 --count;
5617 }
5618 strmake_buf(rli->group_relay_log_name, ir->name);
5619 rli->group_relay_log_pos= BIN_LOG_HEADER_SIZE;
5620 rli->relay_log_state.load(ir->relay_log_state, ir->relay_log_state_count);
5621 }
5622 }
5623 }
5624 }
5625 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5626 thd->add_status_to_global();
5627 unlink_not_visible_thd(thd);
5628 mysql_mutex_lock(&rli->run_lock);
5629
5630err_during_init:
5631 /* We need data_lock, at least to wake up any waiting master_pos_wait() */
5632 mysql_mutex_lock(&rli->data_lock);
5633 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT); // tracking buffer overrun
5634 /* When master_pos_wait() wakes up it will check this and terminate */
5635 rli->slave_running= MYSQL_SLAVE_NOT_RUN;
5636 /* Forget the relay log's format */
5637 delete rli->relay_log.description_event_for_exec;
5638 rli->relay_log.description_event_for_exec= 0;
5639 rli->reset_inuse_relaylog();
5640 /* Wake up master_pos_wait() */
5641 mysql_mutex_unlock(&rli->data_lock);
5642 DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
5643 mysql_cond_broadcast(&rli->data_cond);
5644 rli->ignore_log_space_limit= 0; /* don't need any lock */
5645 /* we die so won't remember charset - re-update them on next thread start */
5646 thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
5647
5648 /*
5649 TODO: see if we can do this conditionally in next_event() instead
5650 to avoid unneeded position re-init
5651
5652 We only reset THD::temporary_tables to 0 here and not free it, as this
5653 could be used by slave through Relay_log_info::save_temporary_tables.
5654 */
5655 thd->temporary_tables= 0;
5656 rli->sql_driver_thd= 0;
5657 thd->rgi_fake= thd->rgi_slave= NULL;
5658
5659#ifdef WITH_WSREP
5660 /*
5661 If slave stopped due to node going non primary, we set global flag to
5662 trigger automatic restart of slave when node joins back to cluster.
5663 */
5664 if (WSREP_ON && wsrep_node_dropped && wsrep_restart_slave)
5665 {
5666 if (wsrep_ready)
5667 {
5668 WSREP_INFO("Slave error due to node temporarily non-primary"
5669 "SQL slave will continue");
5670 wsrep_node_dropped= FALSE;
5671 mysql_mutex_unlock(&rli->run_lock);
5672 WSREP_DEBUG("wsrep_conflict_state now: %d", thd->wsrep_conflict_state);
5673 WSREP_INFO("slave restart: %d", thd->wsrep_conflict_state);
5674 thd->wsrep_conflict_state= NO_CONFLICT;
5675 goto wsrep_restart_point;
5676 } else {
5677 WSREP_INFO("Slave error due to node going non-primary");
5678 WSREP_INFO("wsrep_restart_slave was set and therefore slave will be "
5679 "automatically restarted when node joins back to cluster.");
5680 wsrep_restart_slave_activated= TRUE;
5681 }
5682 }
5683#endif /* WITH_WSREP */
5684
5685 /*
5686 Note: the order of the broadcast and unlock calls below (first
5687 broadcast, then unlock) is important. Otherwise a killer_thread can
5688 execute between the calls and delete the mi structure leading to a
5689 crash! (see BUG#25306 for details)
5690 */
5691 mysql_cond_broadcast(&rli->stop_cond);
5692 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5693 mysql_mutex_unlock(&rli->run_lock); // tell the world we are done
5694
5695 rpl_parallel_resize_pool_if_no_slaves();
5696
5697 delete serial_rgi;
5698 delete thd;
5699 thread_safe_decrement32(&service_thread_count);
5700 signal_thd_deleted();
5701
5702 DBUG_LEAVE; // Must match DBUG_ENTER()
5703 my_thread_end();
5704 ERR_remove_state(0);
5705 pthread_exit(0);
5706 return 0; // Avoid compiler warnings
5707}
5708
5709
5710/*
5711 process_io_create_file()
5712*/
5713
5714static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
5715{
5716 int error = 1;
5717 ulong num_bytes;
5718 bool cev_not_written;
5719 THD *thd = mi->io_thd;
5720 NET *net = &mi->mysql->net;
5721 DBUG_ENTER("process_io_create_file");
5722
5723 if (unlikely(!cev->is_valid()))
5724 DBUG_RETURN(1);
5725
5726 if (!mi->rpl_filter->db_ok(cev->db))
5727 {
5728 skip_load_data_infile(net);
5729 DBUG_RETURN(0);
5730 }
5731 DBUG_ASSERT(cev->inited_from_old);
5732 thd->file_id = cev->file_id = mi->file_id++;
5733 thd->variables.server_id = cev->server_id;
5734 cev_not_written = 1;
5735
5736 if (unlikely(net_request_file(net,cev->fname)))
5737 {
5738 sql_print_error("Slave I/O: failed requesting download of '%s'",
5739 cev->fname);
5740 goto err;
5741 }
5742
5743 /*
5744 This dummy block is so we could instantiate Append_block_log_event
5745 once and then modify it slightly instead of doing it multiple times
5746 in the loop
5747 */
5748 {
5749 Append_block_log_event aev(thd,0,0,0,0);
5750
5751 for (;;)
5752 {
5753 if (unlikely((num_bytes=my_net_read(net)) == packet_error))
5754 {
5755 sql_print_error("Network read error downloading '%s' from master",
5756 cev->fname);
5757 goto err;
5758 }
5759 if (unlikely(!num_bytes)) /* eof */
5760 {
5761 /* 3.23 master wants it */
5762 net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
5763 /*
5764 If we wrote Create_file_log_event, then we need to write
5765 Execute_load_log_event. If we did not write Create_file_log_event,
5766 then this is an empty file and we can just do as if the LOAD DATA
5767 INFILE had not existed, i.e. write nothing.
5768 */
5769 if (unlikely(cev_not_written))
5770 break;
5771 Execute_load_log_event xev(thd,0,0);
5772 xev.log_pos = cev->log_pos;
5773 if (unlikely(mi->rli.relay_log.append(&xev)))
5774 {
5775 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5776 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5777 "error writing Exec_load event to relay log");
5778 goto err;
5779 }
5780 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
5781 break;
5782 }
5783 if (unlikely(cev_not_written))
5784 {
5785 cev->block = net->read_pos;
5786 cev->block_len = num_bytes;
5787 if (unlikely(mi->rli.relay_log.append(cev)))
5788 {
5789 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5790 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5791 "error writing Create_file event to relay log");
5792 goto err;
5793 }
5794 cev_not_written=0;
5795 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
5796 }
5797 else
5798 {
5799 aev.block = net->read_pos;
5800 aev.block_len = num_bytes;
5801 aev.log_pos = cev->log_pos;
5802 if (unlikely(mi->rli.relay_log.append(&aev)))
5803 {
5804 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5805 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5806 "error writing Append_block event to relay log");
5807 goto err;
5808 }
5809 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ;
5810 }
5811 }
5812 }
5813 error=0;
5814err:
5815 DBUG_RETURN(error);
5816}
5817
5818
5819/*
5820 Start using a new binary log on the master
5821
5822 SYNOPSIS
5823 process_io_rotate()
5824 mi master_info for the slave
5825 rev The rotate log event read from the binary log
5826
5827 DESCRIPTION
5828 Updates the master info with the place in the next binary
5829 log where we should start reading.
5830 Rotate the relay log to avoid mixed-format relay logs.
5831
5832 NOTES
5833 We assume we already locked mi->data_lock
5834
5835 RETURN VALUES
5836 0 ok
5837 1 Log event is illegal
5838
5839*/
5840
5841static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
5842{
5843 DBUG_ENTER("process_io_rotate");
5844 mysql_mutex_assert_owner(&mi->data_lock);
5845
5846 if (unlikely(!rev->is_valid()))
5847 DBUG_RETURN(1);
5848
5849 /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
5850 memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1);
5851 mi->master_log_pos= rev->pos;
5852 DBUG_PRINT("info", ("master_log_pos: '%s' %lu",
5853 mi->master_log_name, (ulong) mi->master_log_pos));
5854#ifndef DBUG_OFF
5855 /*
5856 If we do not do this, we will be getting the first
5857 rotate event forever, so we need to not disconnect after one.
5858 */
5859 if (disconnect_slave_event_count)
5860 mi->events_till_disconnect++;
5861#endif
5862
5863 /*
5864 If description_event_for_queue is format <4, there is conversion in the
5865 relay log to the slave's format (4). And Rotate can mean upgrade or
5866 nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
5867 no need to reset description_event_for_queue now. And if it's nothing (same
5868 master version as before), no need (still using the slave's format).
5869 */
5870 if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
5871 {
5872 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
5873 mi->rli.relay_log.relay_log_checksum_alg);
5874
5875 delete mi->rli.relay_log.description_event_for_queue;
5876 /* start from format 3 (MySQL 4.0) again */
5877 mi->rli.relay_log.description_event_for_queue= new
5878 Format_description_log_event(3);
5879 mi->rli.relay_log.description_event_for_queue->checksum_alg=
5880 mi->rli.relay_log.relay_log_checksum_alg;
5881 }
5882 /*
5883 Rotate the relay log makes binlog format detection easier (at next slave
5884 start or mysqlbinlog)
5885 */
5886 DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */);
5887}
5888
5889/*
5890 Reads a 3.23 event and converts it to the slave's format. This code was
5891 copied from MySQL 4.0.
5892*/
5893static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
5894 ulong event_len)
5895{
5896 const char *errmsg = 0;
5897 ulong inc_pos;
5898 bool ignore_event= 0;
5899 char *tmp_buf = 0;
5900 Relay_log_info *rli= &mi->rli;
5901 DBUG_ENTER("queue_binlog_ver_1_event");
5902
5903 /*
5904 If we get Load event, we need to pass a non-reusable buffer
5905 to read_log_event, so we do a trick
5906 */
5907 if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
5908 {
5909 if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
5910 {
5911 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5912 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
5913 DBUG_RETURN(1);
5914 }
5915 memcpy(tmp_buf,buf,event_len);
5916 /*
5917 Create_file constructor wants a 0 as last char of buffer, this 0 will
5918 serve as the string-termination char for the file's name (which is at the
5919 end of the buffer)
5920 We must increment event_len, otherwise the event constructor will not see
5921 this end 0, which leads to segfault.
5922 */
5923 tmp_buf[event_len++]=0;
5924 int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
5925 buf = (const char*)tmp_buf;
5926 }
5927 /*
5928 This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
5929 send the loaded file, and write it to the relay log in the form of
5930 Append_block/Exec_load (the SQL thread needs the data, as that thread is not
5931 connected to the master).
5932 */
5933 Log_event *ev=
5934 Log_event::read_log_event(buf, event_len, &errmsg,
5935 mi->rli.relay_log.description_event_for_queue, 0);
5936 if (unlikely(!ev))
5937 {
5938 sql_print_error("Read invalid event from master: '%s',\
5939 master could be corrupt but a more likely cause of this is a bug",
5940 errmsg);
5941 my_free(tmp_buf);
5942 DBUG_RETURN(1);
5943 }
5944
5945 mysql_mutex_lock(&mi->data_lock);
5946 ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
5947 switch (ev->get_type_code()) {
5948 case STOP_EVENT:
5949 ignore_event= 1;
5950 inc_pos= event_len;
5951 break;
5952 case ROTATE_EVENT:
5953 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
5954 {
5955 delete ev;
5956 mysql_mutex_unlock(&mi->data_lock);
5957 DBUG_RETURN(1);
5958 }
5959 inc_pos= 0;
5960 break;
5961 case CREATE_FILE_EVENT:
5962 /*
5963 Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
5964 queue_old_event() which is for 3.23 events which don't comprise
5965 CREATE_FILE_EVENT. This is because read_log_event() above has just
5966 transformed LOAD_EVENT into CREATE_FILE_EVENT.
5967 */
5968 {
5969 /* We come here when and only when tmp_buf != 0 */
5970 DBUG_ASSERT(tmp_buf != 0);
5971 inc_pos=event_len;
5972 ev->log_pos+= inc_pos;
5973 int error = process_io_create_file(mi,(Create_file_log_event*)ev);
5974 delete ev;
5975 mi->master_log_pos += inc_pos;
5976 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
5977 mysql_mutex_unlock(&mi->data_lock);
5978 my_free(tmp_buf);
5979 DBUG_RETURN(error);
5980 }
5981 default:
5982 inc_pos= event_len;
5983 break;
5984 }
5985 if (likely(!ignore_event))
5986 {
5987 if (ev->log_pos)
5988 /*
5989 Don't do it for fake Rotate events (see comment in
5990 Log_event::Log_event(const char* buf...) in log_event.cc).
5991 */
5992 ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
5993 if (unlikely(rli->relay_log.append(ev)))
5994 {
5995 delete ev;
5996 mysql_mutex_unlock(&mi->data_lock);
5997 DBUG_RETURN(1);
5998 }
5999 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6000 }
6001 delete ev;
6002 mi->master_log_pos+= inc_pos;
6003 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
6004 mysql_mutex_unlock(&mi->data_lock);
6005 DBUG_RETURN(0);
6006}
6007
6008/*
6009 Reads a 4.0 event and converts it to the slave's format. This code was copied
6010 from queue_binlog_ver_1_event(), with some affordable simplifications.
6011*/
6012static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
6013 ulong event_len)
6014{
6015 const char *errmsg = 0;
6016 ulong inc_pos;
6017 char *tmp_buf = 0;
6018 Relay_log_info *rli= &mi->rli;
6019 DBUG_ENTER("queue_binlog_ver_3_event");
6020
6021 /* read_log_event() will adjust log_pos to be end_log_pos */
6022 Log_event *ev=
6023 Log_event::read_log_event(buf,event_len, &errmsg,
6024 mi->rli.relay_log.description_event_for_queue, 0);
6025 if (unlikely(!ev))
6026 {
6027 sql_print_error("Read invalid event from master: '%s',\
6028 master could be corrupt but a more likely cause of this is a bug",
6029 errmsg);
6030 my_free(tmp_buf);
6031 DBUG_RETURN(1);
6032 }
6033 mysql_mutex_lock(&mi->data_lock);
6034 switch (ev->get_type_code()) {
6035 case STOP_EVENT:
6036 goto err;
6037 case ROTATE_EVENT:
6038 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6039 {
6040 delete ev;
6041 mysql_mutex_unlock(&mi->data_lock);
6042 DBUG_RETURN(1);
6043 }
6044 inc_pos= 0;
6045 break;
6046 default:
6047 inc_pos= event_len;
6048 break;
6049 }
6050
6051 if (unlikely(rli->relay_log.append(ev)))
6052 {
6053 delete ev;
6054 mysql_mutex_unlock(&mi->data_lock);
6055 DBUG_RETURN(1);
6056 }
6057 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6058 delete ev;
6059 mi->master_log_pos+= inc_pos;
6060err:
6061 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
6062 mysql_mutex_unlock(&mi->data_lock);
6063 DBUG_RETURN(0);
6064}
6065
6066/*
6067 queue_old_event()
6068
6069 Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
6070 (exactly, slave's) format. To do the conversion, we create a 5.0 event from
6071 the 3.23/4.0 bytes, then write this event to the relay log.
6072
6073 TODO:
6074 Test this code before release - it has to be tested on a separate
6075 setup with 3.23 master or 4.0 master
6076*/
6077
6078static int queue_old_event(Master_info *mi, const char *buf,
6079 ulong event_len)
6080{
6081 DBUG_ENTER("queue_old_event");
6082
6083 switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
6084 {
6085 case 1:
6086 DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
6087 case 3:
6088 DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
6089 default: /* unsupported format; eg version 2 */
6090 DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
6091 mi->rli.relay_log.description_event_for_queue->binlog_version));
6092 DBUG_RETURN(1);
6093 }
6094}
6095
6096/*
6097 queue_event()
6098
6099 If the event is 3.23/4.0, passes it to queue_old_event() which will convert
6100 it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
6101 no format conversion, it's pure read/write of bytes.
6102 So a 5.0.0 slave's relay log can contain events in the slave's format or in
6103 any >=5.0.0 format.
6104*/
6105
6106static int queue_event(Master_info* mi,const char* buf, ulong event_len)
6107{
6108 int error= 0;
6109 StringBuffer<1024> error_msg;
6110 ulonglong inc_pos= 0;
6111 ulonglong event_pos;
6112 Relay_log_info *rli= &mi->rli;
6113 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
6114 ulong s_id;
6115 bool unlock_data_lock= TRUE;
6116 bool gtid_skip_enqueue= false;
6117 bool got_gtid_event= false;
6118 rpl_gtid event_gtid;
6119 static uint dbug_rows_event_count __attribute__((unused))= 0;
6120 bool is_compress_event = false;
6121 char* new_buf = NULL;
6122 char new_buf_arr[4096];
6123 bool is_malloc = false;
6124 bool is_rows_event= false;
6125 /*
6126 FD_q must have been prepared for the first R_a event
6127 inside get_master_version_and_clock()
6128 Show-up of FD:s affects checksum_alg at once because
6129 that changes FD_queue.
6130 */
6131 enum enum_binlog_checksum_alg checksum_alg=
6132 mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
6133 mi->checksum_alg_before_fd : mi->rli.relay_log.relay_log_checksum_alg;
6134
6135 char *save_buf= NULL; // needed for checksumming the fake Rotate event
6136 char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
6137
6138 DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
6139 checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
6140 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
6141
6142 DBUG_ENTER("queue_event");
6143 /*
6144 FD_queue checksum alg description does not apply in a case of
6145 FD itself. The one carries both parts of the checksum data.
6146 */
6147 if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
6148 {
6149 checksum_alg= get_checksum_alg(buf, event_len);
6150 }
6151 else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3)
6152 {
6153 // checksum behaviour is similar to the pre-checksum FD handling
6154 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6155 mi->rli.relay_log.description_event_for_queue->checksum_alg=
6156 mi->rli.relay_log.relay_log_checksum_alg= checksum_alg=
6157 BINLOG_CHECKSUM_ALG_OFF;
6158 }
6159
6160 // does not hold always because of old binlog can work with NM
6161 // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6162
6163 // should hold unless manipulations with RL. Tests that do that
6164 // will have to refine the clause.
6165 DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
6166 BINLOG_CHECKSUM_ALG_UNDEF);
6167
6168 // Emulate the network corruption
6169 DBUG_EXECUTE_IF("corrupt_queue_event",
6170 if ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
6171 {
6172 char *debug_event_buf_c = (char*) buf;
6173 int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
6174 debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
6175 DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
6176 DBUG_SET("-d,corrupt_queue_event");
6177 }
6178 );
6179
6180 if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
6181 {
6182 error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
6183 unlock_data_lock= FALSE;
6184 goto err;
6185 }
6186
6187 if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
6188 (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
6189 DBUG_RETURN(queue_old_event(mi,buf,event_len));
6190
6191#ifdef ENABLED_DEBUG_SYNC
6192 /*
6193 A (+d,dbug.rows_events_to_delay_relay_logging)-test is supposed to
6194 create a few Write_log_events and after receiving the 1st of them
6195 the IO thread signals to launch the SQL thread, and sets itself to
6196 wait for a release signal.
6197 */
6198 DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging",
6199 if ((buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT_V1 ||
6200 buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT) &&
6201 ++dbug_rows_event_count == 2)
6202 {
6203 const char act[]=
6204 "now SIGNAL start_sql_thread "
6205 "WAIT_FOR go_on_relay_logging";
6206 DBUG_ASSERT(debug_sync_service);
6207 DBUG_ASSERT(!debug_sync_set_action(current_thd,
6208 STRING_WITH_LEN(act)));
6209 };);
6210#endif
6211 mysql_mutex_lock(&mi->data_lock);
6212
6213 switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
6214 case STOP_EVENT:
6215 /*
6216 We needn't write this event to the relay log. Indeed, it just indicates a
6217 master server shutdown. The only thing this does is cleaning. But
6218 cleaning is already done on a per-master-thread basis (as the master
6219 server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
6220 prepared statements' deletion are TODO only when we binlog prep stmts).
6221
6222 We don't even increment mi->master_log_pos, because we may be just after
6223 a Rotate event. Btw, in a few milliseconds we are going to have a Start
6224 event from the next binlog (unless the master is presently running
6225 without --log-bin).
6226 */
6227 goto err;
6228 case ROTATE_EVENT:
6229 {
6230 Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
6231 event_len - BINLOG_CHECKSUM_LEN : event_len,
6232 mi->rli.relay_log.description_event_for_queue);
6233
6234 if (unlikely(mi->gtid_reconnect_event_skip_count) &&
6235 unlikely(!mi->gtid_event_seen) &&
6236 rev.is_artificial_event() &&
6237 (mi->prev_master_id != mi->master_id ||
6238 strcmp(rev.new_log_ident, mi->master_log_name) != 0))
6239 {
6240 /*
6241 Artificial Rotate_log_event is the first event we receive at the start
6242 of each master binlog file. It gives the name of the new binlog file.
6243
6244 Normally, we already have this name from the real rotate event at the
6245 end of the previous binlog file (unless we are making a new connection
6246 using GTID). But if the master server restarted/crashed, there is no
6247 rotate event at the end of the prior binlog file, so the name is new.
6248
6249 We use this fact to handle a special case of master crashing. If the
6250 master crashed while writing the binlog, it might end with a partial
6251 event group lacking the COMMIT/XID event, which must be rolled
6252 back. If the slave IO thread happens to get a disconnect in the middle
6253 of exactly this event group, it will try to reconnect at the same GTID
6254 and skip already fetched events. However, that GTID did not commit on
6255 the master before the crash, so it does not really exist, and the
6256 master will connect the slave at the next following GTID starting in
6257 the next binlog. This could confuse the slave and make it mix the
6258 start of one event group with the end of another.
6259
6260 But we detect this case here, by noticing the change of binlog name
6261 which detects the missing rotate event at the end of the previous
6262 binlog file. In this case, we reset the counters to make us not skip
6263 the next event group, and queue an artificial Format Description
6264 event. The previously fetched incomplete event group will then be
6265 rolled back when the Format Description event is executed by the SQL
6266 thread.
6267
6268 A similar case is if the reconnect somehow connects to a different
6269 master server (like due to a network proxy or IP address takeover).
6270 We detect this case by noticing a change of server_id and in this
6271 case likewise rollback the partially received event group.
6272 */
6273 Format_description_log_event fdle(4);
6274
6275 if (mi->prev_master_id != mi->master_id)
6276 sql_print_warning("The server_id of master server changed in the "
6277 "middle of GTID %u-%u-%llu. Assuming a change of "
6278 "master server, so rolling back the previously "
6279 "received partial transaction. Expected: %lu, "
6280 "received: %lu", mi->last_queued_gtid.domain_id,
6281 mi->last_queued_gtid.server_id,
6282 mi->last_queued_gtid.seq_no,
6283 mi->prev_master_id, mi->master_id);
6284 else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0)
6285 sql_print_warning("Unexpected change of master binlog file name in the "
6286 "middle of GTID %u-%u-%llu, assuming that master has "
6287 "crashed and rolling back the transaction. Expected: "
6288 "'%s', received: '%s'",
6289 mi->last_queued_gtid.domain_id,
6290 mi->last_queued_gtid.server_id,
6291 mi->last_queued_gtid.seq_no,
6292 mi->master_log_name, rev.new_log_ident);
6293
6294 mysql_mutex_lock(log_lock);
6295 if (likely(!rli->relay_log.write_event(&fdle) &&
6296 !rli->relay_log.flush_and_sync(NULL)))
6297 {
6298 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6299 }
6300 else
6301 {
6302 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6303 mysql_mutex_unlock(log_lock);
6304 goto err;
6305 }
6306 rli->relay_log.signal_relay_log_update();
6307 mysql_mutex_unlock(log_lock);
6308
6309 mi->gtid_reconnect_event_skip_count= 0;
6310 mi->events_queued_since_last_gtid= 0;
6311 }
6312 mi->prev_master_id= mi->master_id;
6313
6314 if (unlikely(process_io_rotate(mi, &rev)))
6315 {
6316 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6317 goto err;
6318 }
6319 /*
6320 Checksum special cases for the fake Rotate (R_f) event caused by the protocol
6321 of events generation and serialization in RL where Rotate of master is
6322 queued right next to FD of slave.
6323 Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
6324 Two special rules apply only to the first R_f which comes in before any FD_m.
6325 The 2nd R_f should be compatible with the FD_s that must have taken over
6326 the last seen FD_m's (A).
6327
6328 RSC_1: If OM \and fake Rotate \and slave is configured to
6329 to compute checksum for its first FD event for RL
6330 the fake Rotate gets checksummed here.
6331 */
6332 if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
6333 mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
6334 {
6335 ha_checksum rot_crc= 0;
6336 event_len += BINLOG_CHECKSUM_LEN;
6337 memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
6338 int4store(&rot_buf[EVENT_LEN_OFFSET],
6339 uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN);
6340 rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
6341 event_len - BINLOG_CHECKSUM_LEN);
6342 int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
6343 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6344 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
6345 mi->rli.relay_log.relay_log_checksum_alg);
6346 /* the first one */
6347 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6348 save_buf= (char *) buf;
6349 buf= rot_buf;
6350 }
6351 else
6352 /*
6353 RSC_2: If NM \and fake Rotate \and slave does not compute checksum
6354 the fake Rotate's checksum is stripped off before relay-logging.
6355 */
6356 if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
6357 mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
6358 {
6359 event_len -= BINLOG_CHECKSUM_LEN;
6360 memcpy(rot_buf, buf, event_len);
6361 int4store(&rot_buf[EVENT_LEN_OFFSET],
6362 uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN);
6363 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6364 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
6365 mi->rli.relay_log.relay_log_checksum_alg);
6366 /* the first one */
6367 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6368 save_buf= (char *) buf;
6369 buf= rot_buf;
6370 }
6371 /*
6372 Now the I/O thread has just changed its mi->master_log_name, so
6373 incrementing mi->master_log_pos is nonsense.
6374 */
6375 inc_pos= 0;
6376 break;
6377 }
6378 case FORMAT_DESCRIPTION_EVENT:
6379 {
6380 /*
6381 Create an event, and save it (when we rotate the relay log, we will have
6382 to write this event again).
6383 */
6384 /*
6385 We are the only thread which reads/writes description_event_for_queue.
6386 The relay_log struct does not move (though some members of it can
6387 change), so we needn't any lock (no rli->data_lock, no log lock).
6388 */
6389 Format_description_log_event* tmp;
6390 const char* errmsg;
6391 // mark it as undefined that is irrelevant anymore
6392 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6393 if (!(tmp= (Format_description_log_event*)
6394 Log_event::read_log_event(buf, event_len, &errmsg,
6395 mi->rli.relay_log.description_event_for_queue,
6396 1)))
6397 {
6398 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6399 goto err;
6400 }
6401 tmp->copy_crypto_data(mi->rli.relay_log.description_event_for_queue);
6402 delete mi->rli.relay_log.description_event_for_queue;
6403 mi->rli.relay_log.description_event_for_queue= tmp;
6404 if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
6405 tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
6406
6407 /* installing new value of checksum Alg for relay log */
6408 mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
6409
6410 /*
6411 Do not queue any format description event that we receive after a
6412 reconnect where we are skipping over a partial event group received
6413 before the reconnect.
6414
6415 (If we queued such an event, and it was the first format_description
6416 event after master restart, the slave SQL thread would think that
6417 the partial event group before it in the relay log was from a
6418 previous master crash and should be rolled back).
6419 */
6420 if (unlikely(mi->gtid_reconnect_event_skip_count && !mi->gtid_event_seen))
6421 gtid_skip_enqueue= true;
6422
6423 /*
6424 Though this does some conversion to the slave's format, this will
6425 preserve the master's binlog format version, and number of event types.
6426 */
6427 /*
6428 If the event was not requested by the slave (the slave did not ask for
6429 it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
6430 */
6431 inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
6432 DBUG_PRINT("info",("binlog format is now %d",
6433 mi->rli.relay_log.description_event_for_queue->binlog_version));
6434
6435 }
6436 break;
6437
6438 case HEARTBEAT_LOG_EVENT:
6439 {
6440 /*
6441 HB (heartbeat) cannot come before RL (Relay)
6442 */
6443 Heartbeat_log_event hb(buf,
6444 mi->rli.relay_log.relay_log_checksum_alg
6445 != BINLOG_CHECKSUM_ALG_OFF ?
6446 event_len - BINLOG_CHECKSUM_LEN : event_len,
6447 mi->rli.relay_log.description_event_for_queue);
6448 if (!hb.is_valid())
6449 {
6450 error= ER_SLAVE_HEARTBEAT_FAILURE;
6451 error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
6452 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6453 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
6454 error_msg.append(STRING_WITH_LEN(" log_pos "));
6455 error_msg.append_ulonglong(hb.log_pos);
6456 goto err;
6457 }
6458 mi->received_heartbeats++;
6459 /*
6460 compare local and event's versions of log_file, log_pos.
6461
6462 Heartbeat is sent only after an event corresponding to the corrdinates
6463 the heartbeat carries.
6464 Slave can not have a higher coordinate except in the only
6465 special case when mi->master_log_name, master_log_pos have never
6466 been updated by Rotate event i.e when slave does not have any history
6467 with the master (and thereafter mi->master_log_pos is NULL).
6468
6469 Slave can have lower coordinates, if some event from master was omitted.
6470
6471 TODO: handling `when' for SHOW SLAVE STATUS' snds behind
6472 */
6473 if (memcmp(mi->master_log_name, hb.get_log_ident(), hb.get_ident_len()) ||
6474 mi->master_log_pos > hb.log_pos) {
6475 /* missed events of heartbeat from the past */
6476 error= ER_SLAVE_HEARTBEAT_FAILURE;
6477 error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
6478 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6479 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
6480 error_msg.append(STRING_WITH_LEN(" log_pos "));
6481 error_msg.append_ulonglong(hb.log_pos);
6482 goto err;
6483 }
6484
6485 /*
6486 Heartbeat events doesn't count in the binlog size, so we don't have to
6487 increment mi->master_log_pos
6488 */
6489 goto skip_relay_logging;
6490 }
6491 break;
6492
6493 case GTID_LIST_EVENT:
6494 {
6495 const char *errmsg;
6496 Gtid_list_log_event *glev;
6497 Log_event *tmp;
6498 uint32 flags;
6499
6500 if (!(tmp= Log_event::read_log_event(buf, event_len, &errmsg,
6501 mi->rli.relay_log.description_event_for_queue,
6502 opt_slave_sql_verify_checksum)))
6503 {
6504 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6505 goto err;
6506 }
6507 glev= static_cast<Gtid_list_log_event *>(tmp);
6508 event_pos= glev->log_pos;
6509 flags= glev->gl_flags;
6510 delete glev;
6511
6512 /*
6513 We use fake Gtid_list events to update the old-style position (among
6514 other things).
6515
6516 Early code created fake Gtid_list events with zero log_pos, those should
6517 not modify old-style position.
6518 */
6519 if (event_pos == 0 || event_pos <= mi->master_log_pos)
6520 inc_pos= 0;
6521 else
6522 inc_pos= event_pos - mi->master_log_pos;
6523
6524 if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID &&
6525 flags & Gtid_list_log_event::FLAG_UNTIL_REACHED)
6526 {
6527 char str_buf[128];
6528 String str(str_buf, sizeof(str_buf), system_charset_info);
6529 mi->rli.until_gtid_pos.to_string(&str);
6530 sql_print_information("Slave I/O thread stops because it reached its"
6531 " UNTIL master_gtid_pos %s", str.c_ptr_safe());
6532 mi->abort_slave= true;
6533 }
6534 }
6535 break;
6536
6537 case GTID_EVENT:
6538 {
6539 DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
6540 {
6541 mi->dbug_do_disconnect= true;
6542 mi->dbug_event_counter= 2;
6543 };);
6544
6545 uchar gtid_flag;
6546
6547 if (Gtid_log_event::peek(buf, event_len, checksum_alg,
6548 &event_gtid.domain_id, &event_gtid.server_id,
6549 &event_gtid.seq_no, &gtid_flag,
6550 rli->relay_log.description_event_for_queue))
6551 {
6552 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6553 goto err;
6554 }
6555 got_gtid_event= true;
6556 if (mi->using_gtid == Master_info::USE_GTID_NO)
6557 goto default_action;
6558 if (unlikely(mi->gtid_reconnect_event_skip_count))
6559 {
6560 if (likely(!mi->gtid_event_seen))
6561 {
6562 mi->gtid_event_seen= true;
6563 /*
6564 If we are reconnecting, and we need to skip a partial event group
6565 already queued to the relay log before the reconnect, then we check
6566 that we actually get the same event group (same GTID) as before, so
6567 we do not end up with half of one group and half another.
6568
6569 The only way we should be able to receive a different GTID than what
6570 we expect is if the binlog on the master (or more likely the whole
6571 master server) was replaced with a different one, on the same IP
6572 address, _and_ the new master happens to have domains in a different
6573 order so we get the GTID from a different domain first. Still, it is
6574 best to protect against this case.
6575 */
6576 if (event_gtid.domain_id != mi->last_queued_gtid.domain_id ||
6577 event_gtid.server_id != mi->last_queued_gtid.server_id ||
6578 event_gtid.seq_no != mi->last_queued_gtid.seq_no)
6579 {
6580 bool first;
6581 error= ER_SLAVE_UNEXPECTED_MASTER_SWITCH;
6582 error_msg.append(STRING_WITH_LEN("Expected: "));
6583 first= true;
6584 rpl_slave_state_tostring_helper(&error_msg, &mi->last_queued_gtid,
6585 &first);
6586 error_msg.append(STRING_WITH_LEN(", received: "));
6587 first= true;
6588 rpl_slave_state_tostring_helper(&error_msg, &event_gtid, &first);
6589 goto err;
6590 }
6591 if (global_system_variables.log_warnings > 1)
6592 {
6593 bool first= true;
6594 StringBuffer<1024> gtid_text;
6595 rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6596 &first);
6597 sql_print_information("Slave IO thread is reconnected to "
6598 "receive Gtid_log_event %s. It is to skip %llu "
6599 "already received events including the gtid one",
6600 gtid_text.ptr(),
6601 mi->events_queued_since_last_gtid);
6602 }
6603 goto default_action;
6604 }
6605 else
6606 {
6607 bool first;
6608 StringBuffer<1024> gtid_text;
6609
6610 gtid_text.append(STRING_WITH_LEN("Last received gtid: "));
6611 first= true;
6612 rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6613 &first);
6614 gtid_text.append(STRING_WITH_LEN(", currently received: "));
6615 first= true;
6616 rpl_slave_state_tostring_helper(&gtid_text, &event_gtid, &first);
6617
6618 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6619 sql_print_error("Slave IO thread has received a new Gtid_log_event "
6620 "while skipping already logged events "
6621 "after reconnect. %s. %llu remains to be skipped. "
6622 "The number of originally read events was %llu",
6623 gtid_text.ptr(),
6624 mi->gtid_reconnect_event_skip_count,
6625 mi->events_queued_since_last_gtid);
6626 goto err;
6627 }
6628 }
6629 mi->gtid_event_seen= true;
6630
6631 /*
6632 We have successfully queued to relay log everything before this GTID, so
6633 in case of reconnect we can start from after any previous GTID.
6634 (Normally we would have updated gtid_current_pos earlier at the end of
6635 the previous event group, but better leave an extra check here for
6636 safety).
6637 */
6638 if (mi->events_queued_since_last_gtid)
6639 {
6640 mi->gtid_current_pos.update(&mi->last_queued_gtid);
6641 mi->events_queued_since_last_gtid= 0;
6642 }
6643 mi->last_queued_gtid= event_gtid;
6644 mi->last_queued_gtid_standalone=
6645 (gtid_flag & Gtid_log_event::FL_STANDALONE) != 0;
6646
6647 /* Should filter all the subsequent events in the current GTID group? */
6648 mi->domain_id_filter.do_filter(event_gtid.domain_id);
6649
6650 ++mi->events_queued_since_last_gtid;
6651 inc_pos= event_len;
6652 }
6653 break;
6654 /*
6655 Binlog compressed event should uncompress in IO thread
6656 */
6657 case QUERY_COMPRESSED_EVENT:
6658 inc_pos= event_len;
6659 if (query_event_uncompress(rli->relay_log.description_event_for_queue,
6660 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
6661 buf, event_len, new_buf_arr, sizeof(new_buf_arr),
6662 &is_malloc, (char **)&new_buf, &event_len))
6663 {
6664 char llbuf[22];
6665 error = ER_BINLOG_UNCOMPRESS_ERROR;
6666 error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
6667 llstr(mi->master_log_pos, llbuf);
6668 error_msg.append(llbuf, strlen(llbuf));
6669 goto err;
6670 }
6671 buf = new_buf;
6672 is_compress_event = true;
6673 goto default_action;
6674
6675 case WRITE_ROWS_COMPRESSED_EVENT:
6676 case UPDATE_ROWS_COMPRESSED_EVENT:
6677 case DELETE_ROWS_COMPRESSED_EVENT:
6678 case WRITE_ROWS_COMPRESSED_EVENT_V1:
6679 case UPDATE_ROWS_COMPRESSED_EVENT_V1:
6680 case DELETE_ROWS_COMPRESSED_EVENT_V1:
6681 inc_pos = event_len;
6682 {
6683 if (row_log_event_uncompress(rli->relay_log.description_event_for_queue,
6684 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
6685 buf, event_len, new_buf_arr, sizeof(new_buf_arr),
6686 &is_malloc, (char **)&new_buf, &event_len))
6687 {
6688 char llbuf[22];
6689 error = ER_BINLOG_UNCOMPRESS_ERROR;
6690 error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
6691 llstr(mi->master_log_pos, llbuf);
6692 error_msg.append(llbuf, strlen(llbuf));
6693 goto err;
6694 }
6695 }
6696 is_compress_event = true;
6697 buf = new_buf;
6698 /*
6699 As we are uncertain about compressed V2 rows events, we don't track
6700 them
6701 */
6702 if (LOG_EVENT_IS_ROW_V2((Log_event_type) buf[EVENT_TYPE_OFFSET]))
6703 goto default_action;
6704 /* fall through */
6705 case WRITE_ROWS_EVENT_V1:
6706 case UPDATE_ROWS_EVENT_V1:
6707 case DELETE_ROWS_EVENT_V1:
6708 case WRITE_ROWS_EVENT:
6709 case UPDATE_ROWS_EVENT:
6710 case DELETE_ROWS_EVENT:
6711 {
6712 is_rows_event= true;
6713 mi->rows_event_tracker.update(mi->master_log_name,
6714 mi->master_log_pos,
6715 buf,
6716 mi->rli.relay_log.
6717 description_event_for_queue);
6718
6719 DBUG_EXECUTE_IF("simulate_stmt_end_rows_event_loss",
6720 {
6721 mi->rows_event_tracker.stmt_end_seen= false;
6722 });
6723 }
6724 goto default_action;
6725
6726#ifndef DBUG_OFF
6727 case XID_EVENT:
6728 DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
6729 {
6730 /* Inject an event group that is missing its XID commit event. */
6731 if (mi->last_queued_gtid.domain_id == 0 &&
6732 mi->last_queued_gtid.seq_no == 1000)
6733 goto skip_relay_logging;
6734 });
6735#endif
6736 /* fall through */
6737 default:
6738 default_action:
6739 DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
6740 {
6741 if (mi->dbug_do_disconnect &&
6742 (LOG_EVENT_IS_QUERY((Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]) ||
6743 ((uchar)buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT))
6744 && (--mi->dbug_event_counter == 0))
6745 {
6746 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6747 mi->dbug_do_disconnect= false; /* Safety */
6748 goto err;
6749 }
6750 };);
6751
6752 DBUG_EXECUTE_IF("kill_slave_io_before_commit",
6753 {
6754 if ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
6755 ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT && /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
6756 Query_log_event::peek_is_commit_rollback(buf, event_len,
6757 checksum_alg)))
6758 {
6759 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6760 goto err;
6761 }
6762 };);
6763
6764 if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen)
6765 {
6766 if (unlikely(mi->gtid_reconnect_event_skip_count))
6767 {
6768 --mi->gtid_reconnect_event_skip_count;
6769 gtid_skip_enqueue= true;
6770 }
6771 else if (mi->events_queued_since_last_gtid)
6772 ++mi->events_queued_since_last_gtid;
6773 }
6774
6775 if (!is_compress_event)
6776 inc_pos= event_len;
6777
6778 break;
6779 }
6780
6781 /*
6782 Integrity of Rows- event group check.
6783 A sequence of Rows- events must end with STMT_END_F flagged one.
6784 Even when Heartbeat event interrupts Rows- events flow this must indicate a
6785 malfunction e.g logging on the master.
6786 */
6787 if (((uchar) buf[EVENT_TYPE_OFFSET] != HEARTBEAT_LOG_EVENT) &&
6788 !is_rows_event &&
6789 mi->rows_event_tracker.check_and_report(mi->master_log_name,
6790 mi->master_log_pos))
6791 {
6792 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6793 goto err;
6794 }
6795
6796 /*
6797 If we filter events master-side (eg. @@skip_replication), we will see holes
6798 in the event positions from the master. If we see such a hole, adjust
6799 mi->master_log_pos accordingly so we maintain the correct position (for
6800 reconnect, MASTER_POS_WAIT(), etc.)
6801 */
6802 if (inc_pos > 0 &&
6803 event_len >= LOG_POS_OFFSET+4 &&
6804 (event_pos= uint4korr(buf+LOG_POS_OFFSET)) > mi->master_log_pos + inc_pos)
6805 {
6806 inc_pos= event_pos - mi->master_log_pos;
6807 DBUG_PRINT("info", ("Adjust master_log_pos %llu->%llu to account for "
6808 "master-side filtering",
6809 mi->master_log_pos + inc_pos, event_pos));
6810 }
6811
6812 /*
6813 If this event is originating from this server, don't queue it.
6814 We don't check this for 3.23 events because it's simpler like this; 3.23
6815 will be filtered anyway by the SQL slave thread which also tests the
6816 server id (we must also keep this test in the SQL thread, in case somebody
6817 upgrades a 4.0 slave which has a not-filtered relay log).
6818
6819 ANY event coming from ourselves can be ignored: it is obvious for queries;
6820 for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
6821 (--log-slave-updates would not log that) unless this slave is also its
6822 direct master (an unsupported, useless setup!).
6823 */
6824
6825 mysql_mutex_lock(log_lock);
6826 s_id= uint4korr(buf + SERVER_ID_OFFSET);
6827 /*
6828 Write the event to the relay log, unless we reconnected in the middle
6829 of an event group and now need to skip the initial part of the group that
6830 we already wrote before reconnecting.
6831 */
6832 if (unlikely(gtid_skip_enqueue))
6833 {
6834 mi->master_log_pos+= inc_pos;
6835 if ((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT &&
6836 s_id == mi->master_id)
6837 {
6838 /*
6839 If we write this master's description event in the middle of an event
6840 group due to GTID reconnect, SQL thread will think that master crashed
6841 in the middle of the group and roll back the first half, so we must not.
6842
6843 But we still have to write an artificial copy of the masters description
6844 event, to override the initial slave-version description event so that
6845 SQL thread has the right information for parsing the events it reads.
6846 */
6847 rli->relay_log.description_event_for_queue->created= 0;
6848 rli->relay_log.description_event_for_queue->set_artificial_event();
6849 if (rli->relay_log.append_no_lock
6850 (rli->relay_log.description_event_for_queue))
6851 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6852 else
6853 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6854 }
6855 else if (mi->gtid_reconnect_event_skip_count == 0)
6856 {
6857 /*
6858 Add a fake rotate event so that SQL thread can see the old-style
6859 position where we re-connected in the middle of a GTID event group.
6860 */
6861 Rotate_log_event fake_rev(mi->master_log_name, 0, mi->master_log_pos, 0);
6862 fake_rev.server_id= mi->master_id;
6863 if (rli->relay_log.append_no_lock(&fake_rev))
6864 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6865 else
6866 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6867 }
6868 }
6869 else
6870 if ((s_id == global_system_variables.server_id &&
6871 !mi->rli.replicate_same_server_id) ||
6872 event_that_should_be_ignored(buf) ||
6873 /*
6874 the following conjunction deals with IGNORE_SERVER_IDS, if set
6875 If the master is on the ignore list, execution of
6876 format description log events and rotate events is necessary.
6877 */
6878 (mi->ignore_server_ids.elements > 0 &&
6879 mi->shall_ignore_server_id(s_id) &&
6880 /* everything is filtered out from non-master */
6881 (s_id != mi->master_id ||
6882 /* for the master meta information is necessary */
6883 ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
6884 (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT))) ||
6885
6886 /*
6887 Check whether it needs to be filtered based on domain_id
6888 (DO_DOMAIN_IDS/IGNORE_DOMAIN_IDS).
6889 */
6890 (mi->domain_id_filter.is_group_filtered() &&
6891 Log_event::is_group_event((Log_event_type)(uchar)
6892 buf[EVENT_TYPE_OFFSET])))
6893 {
6894 /*
6895 Do not write it to the relay log.
6896 a) We still want to increment mi->master_log_pos, so that we won't
6897 re-read this event from the master if the slave IO thread is now
6898 stopped/restarted (more efficient if the events we are ignoring are big
6899 LOAD DATA INFILE).
6900 b) We want to record that we are skipping events, for the information of
6901 the slave SQL thread, otherwise that thread may let
6902 rli->group_relay_log_pos stay too small if the last binlog's event is
6903 ignored.
6904 But events which were generated by this slave and which do not exist in
6905 the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
6906 mi->master_log_pos.
6907 If the event is originated remotely and is being filtered out by
6908 IGNORE_SERVER_IDS it increments mi->master_log_pos
6909 as well as rli->group_relay_log_pos.
6910 */
6911 if (!(s_id == global_system_variables.server_id &&
6912 !mi->rli.replicate_same_server_id) ||
6913 ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
6914 (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT &&
6915 (uchar)buf[EVENT_TYPE_OFFSET] != STOP_EVENT))
6916 {
6917 mi->master_log_pos+= inc_pos;
6918 memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
6919 DBUG_ASSERT(rli->ign_master_log_name_end[0]);
6920 rli->ign_master_log_pos_end= mi->master_log_pos;
6921 if (got_gtid_event)
6922 rli->ign_gtids.update(&event_gtid);
6923 }
6924 // the slave SQL thread needs to re-check
6925 rli->relay_log.signal_relay_log_update();
6926 DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
6927 (ulong) mi->master_log_pos, uint4korr(buf + SERVER_ID_OFFSET)));
6928 }
6929 else
6930 {
6931 if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len)))
6932 {
6933 mi->master_log_pos+= inc_pos;
6934 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
6935 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6936 }
6937 else
6938 {
6939 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6940 }
6941 rli->ign_master_log_name_end[0]= 0; // last event is not ignored
6942 if (got_gtid_event)
6943 rli->ign_gtids.remove_if_present(&event_gtid);
6944 if (save_buf != NULL)
6945 buf= save_buf;
6946 }
6947 mysql_mutex_unlock(log_lock);
6948
6949 if (likely(!error) &&
6950 mi->using_gtid != Master_info::USE_GTID_NO &&
6951 mi->events_queued_since_last_gtid > 0 &&
6952 ( (mi->last_queued_gtid_standalone &&
6953 !Log_event::is_part_of_group((Log_event_type)(uchar)
6954 buf[EVENT_TYPE_OFFSET])) ||
6955 (!mi->last_queued_gtid_standalone &&
6956 ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
6957 ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT && /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
6958 Query_log_event::peek_is_commit_rollback(buf, event_len,
6959 checksum_alg))))))
6960 {
6961 /*
6962 The whole of the current event group is queued. So in case of
6963 reconnect we can start from after the current GTID.
6964 */
6965 if (mi->gtid_reconnect_event_skip_count)
6966 {
6967 bool first= true;
6968 StringBuffer<1024> gtid_text;
6969
6970 rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6971 &first);
6972 sql_print_error("Slave IO thread received a terminal event from "
6973 "group %s whose retrieval was interrupted "
6974 "with reconnect. We still had %llu events to read. "
6975 "The number of originally read events was %llu",
6976 gtid_text.ptr(),
6977 mi->gtid_reconnect_event_skip_count,
6978 mi->events_queued_since_last_gtid);
6979 }
6980 mi->gtid_current_pos.update(&mi->last_queued_gtid);
6981 mi->events_queued_since_last_gtid= 0;
6982
6983 /* Reset the domain_id_filter flag. */
6984 mi->domain_id_filter.reset_filter();
6985 }
6986
6987skip_relay_logging:
6988
6989err:
6990 if (unlock_data_lock)
6991 mysql_mutex_unlock(&mi->data_lock);
6992 DBUG_PRINT("info", ("error: %d", error));
6993
6994 /*
6995 Do not print ER_SLAVE_RELAY_LOG_WRITE_FAILURE error here, as the caller
6996 handle_slave_io() prints it on return.
6997 */
6998 if (unlikely(error) && error != ER_SLAVE_RELAY_LOG_WRITE_FAILURE)
6999 mi->report(ERROR_LEVEL, error, NULL, ER_DEFAULT(error),
7000 error_msg.ptr());
7001
7002 if (unlikely(is_malloc))
7003 my_free((void *)new_buf);
7004
7005 DBUG_RETURN(error);
7006}
7007
7008
7009void end_relay_log_info(Relay_log_info* rli)
7010{
7011 mysql_mutex_t *log_lock;
7012 DBUG_ENTER("end_relay_log_info");
7013
7014 rli->error_on_rli_init_info= false;
7015 if (!rli->inited)
7016 DBUG_VOID_RETURN;
7017 if (rli->info_fd >= 0)
7018 {
7019 end_io_cache(&rli->info_file);
7020 mysql_file_close(rli->info_fd, MYF(MY_WME));
7021 rli->info_fd = -1;
7022 }
7023 if (rli->cur_log_fd >= 0)
7024 {
7025 end_io_cache(&rli->cache_buf);
7026 mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
7027 rli->cur_log_fd = -1;
7028 }
7029 rli->inited = 0;
7030 log_lock= rli->relay_log.get_log_lock();
7031 mysql_mutex_lock(log_lock);
7032 rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
7033 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7034 mysql_mutex_unlock(log_lock);
7035 /*
7036 Delete the slave's temporary tables from memory.
7037 In the future there will be other actions than this, to ensure persistance
7038 of slave's temp tables after shutdown.
7039 */
7040 rli->close_temporary_tables();
7041 DBUG_VOID_RETURN;
7042}
7043
7044
7045/**
7046 Hook to detach the active VIO before closing a connection handle.
7047
7048 The client API might close the connection (and associated data)
7049 in case it encounters a unrecoverable (network) error. This hook
7050 is called from the client code before the VIO handle is deleted
7051 allows the thread to detach the active vio so it does not point
7052 to freed memory.
7053
7054 Other calls to THD::clear_active_vio throughout this module are
7055 redundant due to the hook but are left in place for illustrative
7056 purposes.
7057*/
7058
7059extern "C" void slave_io_thread_detach_vio()
7060{
7061#ifdef SIGNAL_WITH_VIO_CLOSE
7062 THD *thd= current_thd;
7063 if (thd && thd->slave_thread)
7064 thd->clear_active_vio();
7065#endif
7066}
7067
7068
7069/*
7070 Try to connect until successful or slave killed
7071
7072 SYNPOSIS
7073 safe_connect()
7074 thd Thread handler for slave
7075 mysql MySQL connection handle
7076 mi Replication handle
7077
7078 RETURN
7079 0 ok
7080 # Error
7081*/
7082
7083static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
7084{
7085 DBUG_ENTER("safe_connect");
7086
7087 DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
7088}
7089
7090
7091/*
7092 SYNPOSIS
7093 connect_to_master()
7094
7095 IMPLEMENTATION
7096 Try to connect until successful or slave killed or we have retried
7097 master_retry_count times
7098*/
7099
7100static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
7101 bool reconnect, bool suppress_warnings)
7102{
7103 int slave_was_killed;
7104 int last_errno= -2; // impossible error
7105 ulong err_count=0;
7106 my_bool my_true= 1;
7107 DBUG_ENTER("connect_to_master");
7108 set_slave_max_allowed_packet(thd, mysql);
7109#ifndef DBUG_OFF
7110 mi->events_till_disconnect = disconnect_slave_event_count;
7111#endif
7112 ulong client_flag= CLIENT_REMEMBER_OPTIONS;
7113 if (opt_slave_compressed_protocol)
7114 client_flag|= CLIENT_COMPRESS; /* We will use compression */
7115
7116 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7117 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7118 mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
7119 (char*) &my_true);
7120
7121#ifdef HAVE_OPENSSL
7122 if (mi->ssl)
7123 {
7124 mysql_ssl_set(mysql,
7125 mi->ssl_key[0]?mi->ssl_key:0,
7126 mi->ssl_cert[0]?mi->ssl_cert:0,
7127 mi->ssl_ca[0]?mi->ssl_ca:0,
7128 mi->ssl_capath[0]?mi->ssl_capath:0,
7129 mi->ssl_cipher[0]?mi->ssl_cipher:0);
7130 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7131 &mi->ssl_verify_server_cert);
7132 mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
7133 mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
7134 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7135 &mi->ssl_verify_server_cert);
7136 }
7137#endif
7138
7139 /*
7140 If server's default charset is not supported (like utf16, utf32) as client
7141 charset, then set client charset to 'latin1' (default client charset).
7142 */
7143 if (is_supported_parser_charset(default_charset_info))
7144 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7145 else
7146 {
7147 sql_print_information("'%s' can not be used as client character set. "
7148 "'%s' will be used as default client character set "
7149 "while connecting to master.",
7150 default_charset_info->csname,
7151 default_client_charset_info->csname);
7152 mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
7153 default_client_charset_info->csname);
7154 }
7155
7156 /* This one is not strictly needed but we have it here for completeness */
7157 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7158
7159 /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
7160 if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
7161 mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
7162
7163 /* we disallow empty users */
7164 if (mi->user[0] == 0)
7165 {
7166 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
7167 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
7168 "Invalid (empty) username when attempting to "
7169 "connect to the master server. Connection attempt "
7170 "terminated.");
7171 DBUG_RETURN(1);
7172 }
7173 while (!(slave_was_killed = io_slave_killed(mi)) &&
7174 (reconnect ? mysql_reconnect(mysql) != 0 :
7175 mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
7176 mi->port, 0, client_flag) == 0))
7177 {
7178 /* Don't repeat last error */
7179 if ((int)mysql_errno(mysql) != last_errno)
7180 {
7181 last_errno=mysql_errno(mysql);
7182 suppress_warnings= 0;
7183 mi->report(ERROR_LEVEL, last_errno, NULL,
7184 "error %s to master '%s@%s:%d'"
7185 " - retry-time: %d maximum-retries: %lu message: %s",
7186 (reconnect ? "reconnecting" : "connecting"),
7187 mi->user, mi->host, mi->port,
7188 mi->connect_retry, master_retry_count,
7189 mysql_error(mysql));
7190 }
7191 /*
7192 By default we try forever. The reason is that failure will trigger
7193 master election, so if the user did not set master_retry_count we
7194 do not want to have election triggered on the first failure to
7195 connect
7196 */
7197 if (++err_count == master_retry_count)
7198 {
7199 slave_was_killed=1;
7200 if (reconnect)
7201 change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
7202 break;
7203 }
7204 slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
7205 }
7206
7207 if (!slave_was_killed)
7208 {
7209 mi->clear_error(); // clear possible left over reconnect error
7210 if (reconnect)
7211 {
7212 if (!suppress_warnings && global_system_variables.log_warnings)
7213 sql_print_information("Slave: connected to master '%s@%s:%d',"
7214 "replication resumed in log '%s' at "
7215 "position %llu", mi->user, mi->host, mi->port,
7216 IO_RPL_LOG_NAME, mi->master_log_pos);
7217 }
7218 else
7219 {
7220 change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE);
7221 general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
7222 mi->user, mi->host, mi->port);
7223 }
7224#ifdef SIGNAL_WITH_VIO_CLOSE
7225 thd->set_active_vio(mysql->net.vio);
7226#endif
7227 }
7228 mysql->reconnect= 1;
7229 DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
7230 DBUG_RETURN(slave_was_killed);
7231}
7232
7233
7234/*
7235 safe_reconnect()
7236
7237 IMPLEMENTATION
7238 Try to connect until successful or slave killed or we have retried
7239 master_retry_count times
7240*/
7241
7242static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
7243 bool suppress_warnings)
7244{
7245 DBUG_ENTER("safe_reconnect");
7246 DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
7247}
7248
7249
7250#ifdef NOT_USED
7251MYSQL *rpl_connect_master(MYSQL *mysql)
7252{
7253 Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO);
7254 bool allocated= false;
7255 my_bool my_true= 1;
7256 THD *thd;
7257
7258 if (!mi)
7259 {
7260 sql_print_error("'rpl_connect_master' must be called in slave I/O thread context.");
7261 return NULL;
7262 }
7263 thd= mi->io_thd;
7264 if (!mysql)
7265 {
7266 if(!(mysql= mysql_init(NULL)))
7267 {
7268 sql_print_error("rpl_connect_master: failed in mysql_init()");
7269 return NULL;
7270 }
7271 allocated= true;
7272 }
7273
7274 /*
7275 XXX: copied from connect_to_master, this function should not
7276 change the slave status, so we cannot use connect_to_master
7277 directly
7278
7279 TODO: make this part a seperate function to eliminate duplication
7280 */
7281 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7282 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7283 mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
7284 (char*) &my_true);
7285
7286#ifdef HAVE_OPENSSL
7287 if (mi->ssl)
7288 {
7289 mysql_ssl_set(mysql,
7290 mi->ssl_key[0]?mi->ssl_key:0,
7291 mi->ssl_cert[0]?mi->ssl_cert:0,
7292 mi->ssl_ca[0]?mi->ssl_ca:0,
7293 mi->ssl_capath[0]?mi->ssl_capath:0,
7294 mi->ssl_cipher[0]?mi->ssl_cipher:0);
7295 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7296 &mi->ssl_verify_server_cert);
7297 }
7298#endif
7299
7300 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7301 /* This one is not strictly needed but we have it here for completeness */
7302 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7303
7304 if (mi->user == NULL
7305 || mi->user[0] == 0
7306 || io_slave_killed( mi)
7307 || !mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
7308 mi->port, 0, 0))
7309 {
7310 if (!io_slave_killed( mi))
7311 sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)",
7312 mysql_error(mysql), mysql_errno(mysql));
7313
7314 if (allocated)
7315 mysql_close(mysql); // this will free the object
7316 return NULL;
7317 }
7318 return mysql;
7319}
7320#endif
7321
7322
7323/*
7324 Called when we notice that the current "hot" log got rotated under our feet.
7325*/
7326
7327static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
7328{
7329 DBUG_ENTER("reopen_relay_log");
7330 DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
7331 DBUG_ASSERT(rli->cur_log_fd == -1);
7332
7333 IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
7334 if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name,
7335 errmsg)) <0)
7336 DBUG_RETURN(0);
7337 /*
7338 We want to start exactly where we was before:
7339 relay_log_pos Current log pos
7340 pending Number of bytes already processed from the event
7341 */
7342 rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
7343 my_b_seek(cur_log,rli->event_relay_log_pos);
7344 DBUG_RETURN(cur_log);
7345}
7346
7347
7348/**
7349 Reads next event from the relay log. Should be called from the
7350 slave IO thread.
7351
7352 @param rli Relay_log_info structure for the slave IO thread.
7353
7354 @return The event read, or NULL on error. If an error occurs, the
7355 error is reported through the sql_print_information() or
7356 sql_print_error() functions.
7357
7358 The size of the read event (in bytes) is returned in *event_size.
7359*/
7360static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
7361{
7362 Log_event* ev;
7363 Relay_log_info *rli= rgi->rli;
7364 IO_CACHE* cur_log = rli->cur_log;
7365 mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
7366 const char* errmsg=0;
7367 DBUG_ENTER("next_event");
7368
7369 DBUG_ASSERT(rgi->thd != 0 && rgi->thd == rli->sql_driver_thd);
7370 *event_size= 0;
7371
7372#ifndef DBUG_OFF
7373 if (abort_slave_event_count && !rli->events_till_abort--)
7374 DBUG_RETURN(0);
7375#endif
7376
7377 /*
7378 For most operations we need to protect rli members with data_lock,
7379 so we assume calling function acquired this mutex for us and we will
7380 hold it for the most of the loop below However, we will release it
7381 whenever it is worth the hassle, and in the cases when we go into a
7382 mysql_cond_wait() with the non-data_lock mutex
7383 */
7384 mysql_mutex_assert_owner(&rli->data_lock);
7385
7386 while (!sql_slave_killed(rgi))
7387 {
7388 /*
7389 We can have two kinds of log reading:
7390 hot_log:
7391 rli->cur_log points at the IO_CACHE of relay_log, which
7392 is actively being updated by the I/O thread. We need to be careful
7393 in this case and make sure that we are not looking at a stale log that
7394 has already been rotated. If it has been, we reopen the log.
7395
7396 The other case is much simpler:
7397 We just have a read only log that nobody else will be updating.
7398 */
7399 ulonglong old_pos;
7400 bool hot_log;
7401 if ((hot_log = (cur_log != &rli->cache_buf)))
7402 {
7403 DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
7404 mysql_mutex_lock(log_lock);
7405
7406 /*
7407 Reading xxx_file_id is safe because the log will only
7408 be rotated when we hold relay_log.LOCK_log
7409 */
7410 if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count)
7411 {
7412 // The master has switched to a new log file; Reopen the old log file
7413 cur_log=reopen_relay_log(rli, &errmsg);
7414 mysql_mutex_unlock(log_lock);
7415 if (!cur_log) // No more log files
7416 goto err;
7417 hot_log=0; // Using old binary log
7418 }
7419 }
7420 /*
7421 As there is no guarantee that the relay is open (for example, an I/O
7422 error during a write by the slave I/O thread may have closed it), we
7423 have to test it.
7424 */
7425 if (!my_b_inited(cur_log))
7426 goto err;
7427#ifndef DBUG_OFF
7428 {
7429 /* This is an assertion which sometimes fails, let's try to track it */
7430 DBUG_PRINT("info", ("my_b_tell(cur_log)=%llu rli->event_relay_log_pos=%llu",
7431 my_b_tell(cur_log), rli->event_relay_log_pos));
7432 DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
7433 DBUG_ASSERT(rli->mi->using_parallel() ||
7434 my_b_tell(cur_log) == rli->event_relay_log_pos);
7435 }
7436#endif
7437 /*
7438 Relay log is always in new format - if the master is 3.23, the
7439 I/O thread will convert the format for us.
7440 A problem: the description event may be in a previous relay log. So if
7441 the slave has been shutdown meanwhile, we would have to look in old relay
7442 logs, which may even have been deleted. So we need to write this
7443 description event at the beginning of the relay log.
7444 When the relay log is created when the I/O thread starts, easy: the
7445 master will send the description event and we will queue it.
7446 But if the relay log is created by new_file(): then the solution is:
7447 MYSQL_BIN_LOG::open() will write the buffered description event.
7448 */
7449 old_pos= rli->event_relay_log_pos;
7450 if ((ev= Log_event::read_log_event(cur_log,
7451 rli->relay_log.description_event_for_exec,
7452 opt_slave_sql_verify_checksum)))
7453
7454 {
7455 /*
7456 read it while we have a lock, to avoid a mutex lock in
7457 inc_event_relay_log_pos()
7458 */
7459 rli->future_event_relay_log_pos= my_b_tell(cur_log);
7460 *event_size= rli->future_event_relay_log_pos - old_pos;
7461
7462 if (hot_log)
7463 mysql_mutex_unlock(log_lock);
7464 rli->sql_thread_caught_up= false;
7465 DBUG_RETURN(ev);
7466 }
7467 if (opt_reckless_slave) // For mysql-test
7468 cur_log->error = 0;
7469 if (unlikely(cur_log->error < 0))
7470 {
7471 errmsg = "slave SQL thread aborted because of I/O error";
7472 if (hot_log)
7473 mysql_mutex_unlock(log_lock);
7474 goto err;
7475 }
7476 if (!cur_log->error) /* EOF */
7477 {
7478 /*
7479 On a hot log, EOF means that there are no more updates to
7480 process and we must block until I/O thread adds some and
7481 signals us to continue
7482 */
7483 if (hot_log)
7484 {
7485 /*
7486 We say in Seconds_Behind_Master that we have "caught up". Note that
7487 for example if network link is broken but I/O slave thread hasn't
7488 noticed it (slave_net_timeout not elapsed), then we'll say "caught
7489 up" whereas we're not really caught up. Fixing that would require
7490 internally cutting timeout in smaller pieces in network read, no
7491 thanks. Another example: SQL has caught up on I/O, now I/O has read
7492 a new event and is queuing it; the false "0" will exist until SQL
7493 finishes executing the new event; it will be look abnormal only if
7494 the events have old timestamps (then you get "many", 0, "many").
7495
7496 Transient phases like this can be fixed with implemeting
7497 Heartbeat event which provides the slave the status of the
7498 master at time the master does not have any new update to send.
7499 Seconds_Behind_Master would be zero only when master has no
7500 more updates in binlog for slave. The heartbeat can be sent
7501 in a (small) fraction of slave_net_timeout. Until it's done
7502 rli->sql_thread_caught_up is temporarely (for time of waiting for
7503 the following event) set whenever EOF is reached.
7504 */
7505 rli->sql_thread_caught_up= true;
7506
7507 DBUG_ASSERT(rli->relay_log.get_open_count() ==
7508 rli->cur_log_old_open_count);
7509
7510 if (rli->ign_master_log_name_end[0])
7511 {
7512 /* We generate and return a Rotate, to make our positions advance */
7513 DBUG_PRINT("info",("seeing an ignored end segment"));
7514 ev= new Rotate_log_event(rli->ign_master_log_name_end,
7515 0, rli->ign_master_log_pos_end,
7516 Rotate_log_event::DUP_NAME);
7517 rli->ign_master_log_name_end[0]= 0;
7518 mysql_mutex_unlock(log_lock);
7519 if (unlikely(!ev))
7520 {
7521 errmsg= "Slave SQL thread failed to create a Rotate event "
7522 "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
7523 goto err;
7524 }
7525 ev->server_id= 0; // don't be ignored by slave SQL thread
7526 DBUG_RETURN(ev);
7527 }
7528
7529 if (rli->ign_gtids.count() && !rli->is_in_group())
7530 {
7531 /*
7532 We generate and return a Gtid_list, to update gtid_slave_pos,
7533 unless being in the middle of a group.
7534 */
7535 DBUG_PRINT("info",("seeing ignored end gtids"));
7536 ev= new Gtid_list_log_event(&rli->ign_gtids,
7537 Gtid_list_log_event::FLAG_IGN_GTIDS);
7538 rli->ign_gtids.reset();
7539 mysql_mutex_unlock(log_lock);
7540 if (unlikely(!ev))
7541 {
7542 errmsg= "Slave SQL thread failed to create a Gtid_list event "
7543 "(out of memory?), gtid_slave_pos may be inaccurate";
7544 goto err;
7545 }
7546 ev->server_id= 0; // don't be ignored by slave SQL thread
7547 ev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
7548 DBUG_RETURN(ev);
7549 }
7550
7551 /*
7552 We have to check sql_slave_killed() here an extra time.
7553 Otherwise we may miss a wakeup, since last check was done
7554 without holding LOCK_log.
7555 */
7556 if (sql_slave_killed(rgi))
7557 {
7558 mysql_mutex_unlock(log_lock);
7559 break;
7560 }
7561
7562 /*
7563 We can, and should release data_lock while we are waiting for
7564 update. If we do not, show slave status will block
7565 */
7566 mysql_mutex_unlock(&rli->data_lock);
7567
7568 /*
7569 Possible deadlock :
7570 - the I/O thread has reached log_space_limit
7571 - the SQL thread has read all relay logs, but cannot purge for some
7572 reason:
7573 * it has already purged all logs except the current one
7574 * there are other logs than the current one but they're involved in
7575 a transaction that finishes in the current one (or is not finished)
7576 Solution :
7577 Wake up the possibly waiting I/O thread, and set a boolean asking
7578 the I/O thread to temporarily ignore the log_space_limit
7579 constraint, because we do not want the I/O thread to block because of
7580 space (it's ok if it blocks for any other reason (e.g. because the
7581 master does not send anything). Then the I/O thread stops waiting
7582 and reads one more event and starts honoring log_space_limit again.
7583
7584 If the SQL thread needs more events to be able to rotate the log (it
7585 might need to finish the current group first), then it can ask for
7586 one more at a time. Thus we don't outgrow the relay log indefinitely,
7587 but rather in a controlled manner, until the next rotate.
7588
7589 When the SQL thread starts it sets ignore_log_space_limit to false.
7590 We should also reset ignore_log_space_limit to 0 when the user does
7591 RESET SLAVE, but in fact, no need as RESET SLAVE requires that the
7592 slave be stopped, and the SQL thread sets ignore_log_space_limit
7593 to 0 when
7594 it stops.
7595 */
7596 mysql_mutex_lock(&rli->log_space_lock);
7597
7598 /*
7599 If we have reached the limit of the relay space and we
7600 are going to sleep, waiting for more events:
7601
7602 1. If outside a group, SQL thread asks the IO thread
7603 to force a rotation so that the SQL thread purges
7604 logs next time it processes an event (thus space is
7605 freed).
7606
7607 2. If in a group, SQL thread asks the IO thread to
7608 ignore the limit and queues yet one more event
7609 so that the SQL thread finishes the group and
7610 is are able to rotate and purge sometime soon.
7611 */
7612 if (rli->log_space_limit &&
7613 rli->log_space_limit < rli->log_space_total)
7614 {
7615 /* force rotation if not in an unfinished group */
7616 rli->sql_force_rotate_relay= !rli->is_in_group();
7617
7618 /* ask for one more event */
7619 rli->ignore_log_space_limit= true;
7620 }
7621
7622 mysql_cond_broadcast(&rli->log_space_cond);
7623 mysql_mutex_unlock(&rli->log_space_lock);
7624 // Note that wait_for_update_relay_log unlocks lock_log !
7625 rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
7626 // re-acquire data lock since we released it earlier
7627 mysql_mutex_lock(&rli->data_lock);
7628 rli->sql_thread_caught_up= false;
7629 continue;
7630 }
7631 /*
7632 If the log was not hot, we need to move to the next log in
7633 sequence. The next log could be hot or cold, we deal with both
7634 cases separately after doing some common initialization
7635 */
7636 end_io_cache(cur_log);
7637 DBUG_ASSERT(rli->cur_log_fd >= 0);
7638 mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
7639 rli->cur_log_fd = -1;
7640 rli->last_inuse_relaylog->completed= true;
7641 rli->relay_log.description_event_for_exec->reset_crypto();
7642
7643 if (relay_log_purge)
7644 {
7645 /*
7646 purge_first_log will properly set up relay log coordinates in rli.
7647 If the group's coordinates are equal to the event's coordinates
7648 (i.e. the relay log was not rotated in the middle of a group),
7649 we can purge this relay log too.
7650 We do ulonglong and string comparisons, this may be slow but
7651 - purging the last relay log is nice (it can save 1GB of disk), so we
7652 like to detect the case where we can do it, and given this,
7653 - I see no better detection method
7654 - purge_first_log is not called that often
7655 */
7656 if (rli->relay_log.purge_first_log
7657 (rli,
7658 rli->group_relay_log_pos == rli->event_relay_log_pos
7659 && !strcmp(rli->group_relay_log_name,rli->event_relay_log_name)))
7660 {
7661 errmsg = "Error purging processed logs";
7662 goto err;
7663 }
7664 }
7665 else
7666 {
7667 /*
7668 If hot_log is set, then we already have a lock on
7669 LOCK_log. If not, we have to get the lock.
7670
7671 According to Sasha, the only time this code will ever be executed
7672 is if we are recovering from a bug.
7673 */
7674 if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
7675 {
7676 errmsg = "error switching to the next log";
7677 goto err;
7678 }
7679 rli->event_relay_log_pos = BIN_LOG_HEADER_SIZE;
7680 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
7681 if (rli->flush())
7682 {
7683 errmsg= "error flushing relay log";
7684 goto err;
7685 }
7686 }
7687 /*
7688 Now we want to open this next log. To know if it's a hot log (the one
7689 being written by the I/O thread now) or a cold log, we can use
7690 is_active(); if it is hot, we use the I/O cache; if it's cold we open
7691 the file normally. But if is_active() reports that the log is hot, this
7692 may change between the test and the consequence of the test. So we may
7693 open the I/O cache whereas the log is now cold, which is nonsense.
7694 To guard against this, we need to have LOCK_log.
7695 */
7696
7697 DBUG_PRINT("info",("hot_log: %d",hot_log));
7698 if (!hot_log) /* if hot_log, we already have this mutex */
7699 mysql_mutex_lock(log_lock);
7700 if (rli->relay_log.is_active(rli->linfo.log_file_name))
7701 {
7702 rli->cur_log= cur_log= rli->relay_log.get_log_file();
7703 rli->cur_log_old_open_count= rli->relay_log.get_open_count();
7704 DBUG_ASSERT(rli->cur_log_fd == -1);
7705
7706 /*
7707 When the SQL thread is [stopped and] (re)started the
7708 following may happen:
7709
7710 1. Log was hot at stop time and remains hot at restart
7711
7712 SQL thread reads again from hot_log (SQL thread was
7713 reading from the active log when it was stopped and the
7714 very same log is still active on SQL thread restart).
7715
7716 In this case, my_b_seek is performed on cur_log, while
7717 cur_log points to relay_log.get_log_file();
7718
7719 2. Log was hot at stop time but got cold before restart
7720
7721 The log was hot when SQL thread stopped, but it is not
7722 anymore when the SQL thread restarts.
7723
7724 In this case, the SQL thread reopens the log, using
7725 cache_buf, ie, cur_log points to &cache_buf, and thence
7726 its coordinates are reset.
7727
7728 3. Log was already cold at stop time
7729
7730 The log was not hot when the SQL thread stopped, and, of
7731 course, it will not be hot when it restarts.
7732
7733 In this case, the SQL thread opens the cold log again,
7734 using cache_buf, ie, cur_log points to &cache_buf, and
7735 thence its coordinates are reset.
7736
7737 4. Log was hot at stop time, DBA changes to previous cold
7738 log and restarts SQL thread
7739
7740 The log was hot when the SQL thread was stopped, but the
7741 user changed the coordinates of the SQL thread to
7742 restart from a previous cold log.
7743
7744 In this case, at start time, cur_log points to a cold
7745 log, opened using &cache_buf as cache, and coordinates
7746 are reset. However, as it moves on to the next logs, it
7747 will eventually reach the hot log. If the hot log is the
7748 same at the time the SQL thread was stopped, then
7749 coordinates were not reset - the cur_log will point to
7750 relay_log.get_log_file(), and not a freshly opened
7751 IO_CACHE through cache_buf. For this reason we need to
7752 deploy a my_b_seek before calling check_binlog_magic at
7753 this point of the code (see: BUG#55263 for more
7754 details).
7755
7756 NOTES:
7757 - We must keep the LOCK_log to read the 4 first bytes, as
7758 this is a hot log (same as when we call read_log_event()
7759 above: for a hot log we take the mutex).
7760
7761 - Because of scenario #4 above, we need to have a
7762 my_b_seek here. Otherwise, we might hit the assertion
7763 inside check_binlog_magic.
7764 */
7765
7766 my_b_seek(cur_log, (my_off_t) 0);
7767 if (check_binlog_magic(cur_log,&errmsg))
7768 {
7769 if (!hot_log)
7770 mysql_mutex_unlock(log_lock);
7771 goto err;
7772 }
7773 if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
7774 {
7775 if (!hot_log)
7776 mysql_mutex_unlock(log_lock);
7777 goto err;
7778 }
7779 if (!hot_log)
7780 mysql_mutex_unlock(log_lock);
7781 continue;
7782 }
7783 if (!hot_log)
7784 mysql_mutex_unlock(log_lock);
7785 /*
7786 if we get here, the log was not hot, so we will have to open it
7787 ourselves. We are sure that the log is still not hot now (a log can get
7788 from hot to cold, but not from cold to hot). No need for LOCK_log.
7789 */
7790 // open_binlog() will check the magic header
7791 if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name,
7792 &errmsg)) <0)
7793 goto err;
7794 if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
7795 goto err;
7796 }
7797 else
7798 {
7799 /*
7800 Read failed with a non-EOF error.
7801 TODO: come up with something better to handle this error
7802 */
7803 if (hot_log)
7804 mysql_mutex_unlock(log_lock);
7805 sql_print_error("Slave SQL thread: I/O error reading \
7806event(errno: %d cur_log->error: %d)",
7807 my_errno,cur_log->error);
7808 // set read position to the beginning of the event
7809 my_b_seek(cur_log,rli->event_relay_log_pos);
7810 /* otherwise, we have had a partial read */
7811 errmsg = "Aborting slave SQL thread because of partial event read";
7812 break; // To end of function
7813 }
7814 }
7815 if (!errmsg && global_system_variables.log_warnings)
7816 {
7817 sql_print_information("Error reading relay log event: %s",
7818 "slave SQL thread was killed");
7819 DBUG_RETURN(0);
7820 }
7821
7822err:
7823 if (errmsg)
7824 sql_print_error("Error reading relay log event: %s", errmsg);
7825 DBUG_RETURN(0);
7826}
7827
7828/*
7829 Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
7830 because of size is simpler because when we do it we already have all relevant
7831 locks; here we don't, so this function is mainly taking locks).
7832 Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
7833 is void).
7834*/
7835
7836int rotate_relay_log(Master_info* mi)
7837{
7838 DBUG_ENTER("rotate_relay_log");
7839 Relay_log_info* rli= &mi->rli;
7840 int error= 0;
7841
7842 DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
7843
7844 /*
7845 We need to test inited because otherwise, new_file() will attempt to lock
7846 LOCK_log, which may not be inited (if we're not a slave).
7847 */
7848 if (!rli->inited)
7849 {
7850 DBUG_PRINT("info", ("rli->inited == 0"));
7851 goto end;
7852 }
7853
7854 /* If the relay log is closed, new_file() will do nothing. */
7855 if ((error= rli->relay_log.new_file()))
7856 goto end;
7857
7858 /*
7859 We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
7860 be counted, so imagine a succession of FLUSH LOGS and assume the slave
7861 threads are started:
7862 relay_log_space decreases by the size of the deleted relay log, but does
7863 not increase, so flush-after-flush we may become negative, which is wrong.
7864 Even if this will be corrected as soon as a query is replicated on the
7865 slave (because the I/O thread will then call harvest_bytes_written() which
7866 will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
7867 output in SHOW SLAVE STATUS meanwhile. So we harvest now.
7868 If the log is closed, then this will just harvest the last writes, probably
7869 0 as they probably have been harvested.
7870
7871 Note that it needs to be protected by mi->data_lock.
7872 */
7873 mysql_mutex_assert_owner(&mi->data_lock);
7874 rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7875end:
7876 DBUG_RETURN(error);
7877}
7878
7879
7880/**
7881 Detects, based on master's version (as found in the relay log), if master
7882 has a certain bug.
7883 @param rli Relay_log_info which tells the master's version
7884 @param bug_id Number of the bug as found in bugs.mysql.com
7885 @param report bool report error message, default TRUE
7886
7887 @param pred Predicate function that will be called with @c param to
7888 check for the bug. If the function return @c true, the bug is present,
7889 otherwise, it is not.
7890
7891 @param param State passed to @c pred function.
7892
7893 @return TRUE if master has the bug, FALSE if it does not.
7894*/
7895bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
7896 bool (*pred)(const void *), const void *param)
7897{
7898 struct st_version_range_for_one_bug {
7899 uint bug_id;
7900 const uchar introduced_in[3]; // first version with bug
7901 const uchar fixed_in[3]; // first version with fix
7902 };
7903 static struct st_version_range_for_one_bug versions_for_all_bugs[]=
7904 {
7905 {24432, { 5, 0, 24 }, { 5, 0, 38 } },
7906 {24432, { 5, 1, 12 }, { 5, 1, 17 } },
7907 {33029, { 5, 0, 0 }, { 5, 0, 58 } },
7908 {33029, { 5, 1, 0 }, { 5, 1, 12 } },
7909 {37426, { 5, 1, 0 }, { 5, 1, 26 } },
7910 };
7911 const uchar *master_ver=
7912 rli->relay_log.description_event_for_exec->server_version_split.ver;
7913
7914 DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split.ver) == 3);
7915
7916 for (uint i= 0;
7917 i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
7918 {
7919 const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
7920 *fixed_in= versions_for_all_bugs[i].fixed_in;
7921 if ((versions_for_all_bugs[i].bug_id == bug_id) &&
7922 (memcmp(introduced_in, master_ver, 3) <= 0) &&
7923 (memcmp(fixed_in, master_ver, 3) > 0) &&
7924 (pred == NULL || (*pred)(param)))
7925 {
7926 if (!report)
7927 return TRUE;
7928 // a short message for SHOW SLAVE STATUS (message length constraints)
7929 my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
7930 " http://bugs.mysql.com/bug.php?id=%u"
7931 " so slave stops; check error log on slave"
7932 " for more info", MYF(0), bug_id);
7933 // a verbose message for the error log
7934 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, NULL,
7935 "According to the master's version ('%s'),"
7936 " it is probable that master suffers from this bug:"
7937 " http://bugs.mysql.com/bug.php?id=%u"
7938 " and thus replicating the current binary log event"
7939 " may make the slave's data become different from the"
7940 " master's data."
7941 " To take no risk, slave refuses to replicate"
7942 " this event and stops."
7943 " We recommend that all updates be stopped on the"
7944 " master and slave, that the data of both be"
7945 " manually synchronized,"
7946 " that master's binary logs be deleted,"
7947 " that master be upgraded to a version at least"
7948 " equal to '%d.%d.%d'. Then replication can be"
7949 " restarted.",
7950 rli->relay_log.description_event_for_exec->server_version,
7951 bug_id,
7952 fixed_in[0], fixed_in[1], fixed_in[2]);
7953 return TRUE;
7954 }
7955 }
7956 return FALSE;
7957}
7958
7959/**
7960 BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12
7961 exclusive, if one statement in a SP generated AUTO_INCREMENT value
7962 by the top statement, all statements after it would be considered
7963 generated AUTO_INCREMENT value by the top statement, and a
7964 erroneous INSERT_ID value might be associated with these statement,
7965 which could cause duplicate entry error and stop the slave.
7966
7967 Detect buggy master to work around.
7968 */
7969bool rpl_master_erroneous_autoinc(THD *thd)
7970{
7971 if (thd->rgi_slave)
7972 {
7973 DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
7974 return rpl_master_has_bug(thd->rgi_slave->rli, 33029, FALSE, NULL, NULL);
7975 }
7976 return FALSE;
7977}
7978
7979
7980static bool get_row_event_stmt_end(const char* buf,
7981 const Format_description_log_event *fdle)
7982{
7983 uint8 const common_header_len= fdle->common_header_len;
7984 Log_event_type event_type= (Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET];
7985
7986 uint8 const post_header_len= fdle->post_header_len[event_type-1];
7987 const char *flag_start= buf + common_header_len;
7988 /*
7989 The term 4 below signifies that master is of 'an intermediate source', see
7990 Rows_log_event::Rows_log_event.
7991 */
7992 flag_start += RW_MAPID_OFFSET + ((post_header_len == 6) ? 4 : RW_FLAGS_OFFSET);
7993
7994 return (uint2korr(flag_start) & Rows_log_event::STMT_END_F) != 0;
7995}
7996
7997
7998/*
7999 Reset log event tracking data.
8000*/
8001
8002void Rows_event_tracker::reset()
8003{
8004 binlog_file_name[0]= 0;
8005 first_seen= last_seen= 0;
8006 stmt_end_seen= false;
8007}
8008
8009
8010/*
8011 Update log event tracking data.
8012
8013 The first- and last- seen event binlog position get memorized, as
8014 well as the end-of-statement status of the last one.
8015*/
8016
8017void Rows_event_tracker::update(const char* file_name, my_off_t pos,
8018 const char* buf,
8019 const Format_description_log_event *fdle)
8020{
8021 if (!first_seen)
8022 {
8023 first_seen= pos;
8024 strmake(binlog_file_name, file_name, sizeof(binlog_file_name) - 1);
8025 }
8026 last_seen= pos;
8027 DBUG_ASSERT(stmt_end_seen == 0); // We can only have one
8028 stmt_end_seen= get_row_event_stmt_end(buf, fdle);
8029};
8030
8031
8032/**
8033 The function is called at next event reading
8034 after a sequence of Rows- log-events. It checks the end-of-statement status
8035 of the past sequence to report on any isssue.
8036 In the positive case the tracker gets reset.
8037
8038 @return true when the Rows- event group integrity found compromised,
8039 false otherwise.
8040*/
8041bool Rows_event_tracker::check_and_report(const char* file_name,
8042 my_off_t pos)
8043{
8044 if (last_seen)
8045 {
8046 // there was at least one "block" event previously
8047 if (!stmt_end_seen)
8048 {
8049 sql_print_error("Slave IO thread did not receive an expected "
8050 "Rows-log end-of-statement for event starting "
8051 "at log '%s' position %llu "
8052 "whose last block was seen at log '%s' position %llu. "
8053 "The end-of-statement should have been delivered "
8054 "before the current one at log '%s' position %llu",
8055 binlog_file_name, first_seen,
8056 binlog_file_name, last_seen, file_name, pos);
8057 return true;
8058 }
8059 reset();
8060 }
8061
8062 return false;
8063}
8064
8065/**
8066 @} (end of group Replication)
8067*/
8068
8069#endif /* HAVE_REPLICATION */
8070