| 1 | /* Copyright (c) 2005, 2017, Oracle and/or its affiliates. |
| 2 | Copyright (c) 2009, 2017, MariaDB Corporation |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
| 16 | |
| 17 | #ifndef RPL_RLI_H |
| 18 | #define RPL_RLI_H |
| 19 | |
| 20 | #include "rpl_tblmap.h" |
| 21 | #include "rpl_reporting.h" |
| 22 | #include "rpl_utility.h" |
| 23 | #include "log.h" /* LOG_INFO, MYSQL_BIN_LOG */ |
| 24 | #include "sql_class.h" /* THD */ |
| 25 | #include "log_event.h" |
| 26 | #include "rpl_parallel.h" |
| 27 | |
| 28 | struct RPL_TABLE_LIST; |
| 29 | class Master_info; |
| 30 | class Rpl_filter; |
| 31 | |
| 32 | |
| 33 | /**************************************************************************** |
| 34 | |
| 35 | Replication SQL Thread |
| 36 | |
| 37 | Relay_log_info contains: |
| 38 | - the current relay log |
| 39 | - the current relay log offset |
| 40 | - master log name |
| 41 | - master log sequence corresponding to the last update |
| 42 | - misc information specific to the SQL thread |
| 43 | |
| 44 | Relay_log_info is initialized from the slave.info file if such |
| 45 | exists. Otherwise, data members are intialized with defaults. The |
| 46 | initialization is done with Relay_log_info::init() call. |
| 47 | |
| 48 | The format of slave.info file: |
| 49 | |
| 50 | relay_log_name |
| 51 | relay_log_pos |
| 52 | master_log_name |
| 53 | master_log_pos |
| 54 | |
| 55 | To clean up, call end_relay_log_info() |
| 56 | |
| 57 | *****************************************************************************/ |
| 58 | |
| 59 | struct rpl_group_info; |
| 60 | struct inuse_relaylog; |
| 61 | |
| 62 | class Relay_log_info : public Slave_reporting_capability |
| 63 | { |
| 64 | public: |
| 65 | /** |
| 66 | Flags for the state of reading the relay log. Note that these are |
| 67 | bit masks. |
| 68 | */ |
| 69 | enum enum_state_flag { |
| 70 | /** We are inside a group of events forming a statement */ |
| 71 | IN_STMT=1, |
| 72 | /** We have inside a transaction */ |
| 73 | IN_TRANSACTION=2 |
| 74 | }; |
| 75 | |
| 76 | /* |
| 77 | The SQL thread owns one Relay_log_info, and each client that has |
| 78 | executed a BINLOG statement owns one Relay_log_info. This function |
| 79 | returns zero for the Relay_log_info object that belongs to the SQL |
| 80 | thread and nonzero for Relay_log_info objects that belong to |
| 81 | clients. |
| 82 | */ |
| 83 | inline bool belongs_to_client() |
| 84 | { |
| 85 | DBUG_ASSERT(sql_driver_thd); |
| 86 | return !sql_driver_thd->slave_thread; |
| 87 | } |
| 88 | |
| 89 | /* |
| 90 | If true, events with the same server id should be replicated. This |
| 91 | field is set on creation of a relay log info structure by copying |
| 92 | the value of ::replicate_same_server_id and can be overridden if |
| 93 | necessary. For example of when this is done, check sql_binlog.cc, |
| 94 | where the BINLOG statement can be used to execute "raw" events. |
| 95 | */ |
| 96 | bool replicate_same_server_id; |
| 97 | |
| 98 | /*** The following variables can only be read when protect by data lock ****/ |
| 99 | |
| 100 | /* |
| 101 | info_fd - file descriptor of the info file. set only during |
| 102 | initialization or clean up - safe to read anytime |
| 103 | cur_log_fd - file descriptor of the current read relay log |
| 104 | */ |
| 105 | File info_fd,cur_log_fd; |
| 106 | |
| 107 | /* |
| 108 | Protected with internal locks. |
| 109 | Must get data_lock when resetting the logs. |
| 110 | */ |
| 111 | MYSQL_BIN_LOG relay_log; |
| 112 | LOG_INFO linfo; |
| 113 | |
| 114 | /* |
| 115 | cur_log |
| 116 | Pointer that either points at relay_log.get_log_file() or |
| 117 | &rli->cache_buf, depending on whether the log is hot or there was |
| 118 | the need to open a cold relay_log. |
| 119 | |
| 120 | cache_buf |
| 121 | IO_CACHE used when opening cold relay logs. |
| 122 | */ |
| 123 | IO_CACHE cache_buf,*cur_log; |
| 124 | |
| 125 | /* |
| 126 | Keeps track of the number of transactions that commits |
| 127 | before fsyncing. The option --sync-relay-log-info determines |
| 128 | how many transactions should commit before fsyncing. |
| 129 | */ |
| 130 | uint sync_counter; |
| 131 | |
| 132 | /* |
| 133 | Identifies when the recovery process is going on. |
| 134 | See sql/slave.cc:init_recovery for further details. |
| 135 | */ |
| 136 | bool is_relay_log_recovery; |
| 137 | |
| 138 | /* The following variables are safe to read any time */ |
| 139 | |
| 140 | /* IO_CACHE of the info file - set only during init or end */ |
| 141 | IO_CACHE info_file; |
| 142 | |
| 143 | /* |
| 144 | List of temporary tables used by this connection. |
| 145 | This is updated when a temporary table is created or dropped by |
| 146 | a replication thread. |
| 147 | |
| 148 | Not reset when replication ends, to allow one to access the tables |
| 149 | when replication restarts. |
| 150 | |
| 151 | Protected by data_lock. |
| 152 | */ |
| 153 | All_tmp_tables_list *save_temporary_tables; |
| 154 | |
| 155 | /* |
| 156 | standard lock acquisition order to avoid deadlocks: |
| 157 | run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index |
| 158 | */ |
| 159 | mysql_mutex_t data_lock, run_lock; |
| 160 | /* |
| 161 | start_cond is broadcast when SQL thread is started |
| 162 | stop_cond - when stopped |
| 163 | data_cond - when data protected by data_lock changes |
| 164 | */ |
| 165 | mysql_cond_t start_cond, stop_cond, data_cond; |
| 166 | /* parent Master_info structure */ |
| 167 | Master_info *mi; |
| 168 | |
| 169 | /* |
| 170 | List of active relay log files. |
| 171 | (This can be more than one in case of parallel replication). |
| 172 | */ |
| 173 | inuse_relaylog *inuse_relaylog_list; |
| 174 | inuse_relaylog *last_inuse_relaylog; |
| 175 | |
| 176 | /* |
| 177 | Needed to deal properly with cur_log getting closed and re-opened with |
| 178 | a different log under our feet |
| 179 | */ |
| 180 | uint32 cur_log_old_open_count; |
| 181 | |
| 182 | /* |
| 183 | If on init_info() call error_on_rli_init_info is true that means |
| 184 | that previous call to init_info() terminated with an error, RESET |
| 185 | SLAVE must be executed and the problem fixed manually. |
| 186 | */ |
| 187 | bool error_on_rli_init_info; |
| 188 | |
| 189 | /* |
| 190 | Let's call a group (of events) : |
| 191 | - a transaction |
| 192 | or |
| 193 | - an autocommiting query + its associated events (INSERT_ID, |
| 194 | TIMESTAMP...) |
| 195 | We need these rli coordinates : |
| 196 | - relay log name and position of the beginning of the group we currently |
| 197 | are executing. Needed to know where we have to restart when replication has |
| 198 | stopped in the middle of a group (which has been rolled back by the slave). |
| 199 | - relay log name and position just after the event we have just |
| 200 | executed. This event is part of the current group. |
| 201 | Formerly we only had the immediately above coordinates, plus a 'pending' |
| 202 | variable, but this dealt wrong with the case of a transaction starting on a |
| 203 | relay log and finishing (commiting) on another relay log. Case which can |
| 204 | happen when, for example, the relay log gets rotated because of |
| 205 | max_binlog_size. |
| 206 | |
| 207 | Note: group_relay_log_name, group_relay_log_pos must only be |
| 208 | written from the thread owning the Relay_log_info (SQL thread if |
| 209 | !belongs_to_client(); client thread executing BINLOG statement if |
| 210 | belongs_to_client()). |
| 211 | */ |
| 212 | char group_relay_log_name[FN_REFLEN]; |
| 213 | ulonglong group_relay_log_pos; |
| 214 | char event_relay_log_name[FN_REFLEN]; |
| 215 | ulonglong event_relay_log_pos; |
| 216 | ulonglong future_event_relay_log_pos; |
| 217 | /* |
| 218 | The master log name for current event. Only used in parallel replication. |
| 219 | */ |
| 220 | char future_event_master_log_name[FN_REFLEN]; |
| 221 | |
| 222 | /* |
| 223 | Original log name and position of the group we're currently executing |
| 224 | (whose coordinates are group_relay_log_name/pos in the relay log) |
| 225 | in the master's binlog. These concern the *group*, because in the master's |
| 226 | binlog the log_pos that comes with each event is the position of the |
| 227 | beginning of the group. |
| 228 | |
| 229 | Note: group_master_log_name, group_master_log_pos must only be |
| 230 | written from the thread owning the Relay_log_info (SQL thread if |
| 231 | !belongs_to_client(); client thread executing BINLOG statement if |
| 232 | belongs_to_client()). |
| 233 | */ |
| 234 | char group_master_log_name[FN_REFLEN]; |
| 235 | volatile my_off_t group_master_log_pos; |
| 236 | |
| 237 | /* |
| 238 | Handling of the relay_log_space_limit optional constraint. |
| 239 | ignore_log_space_limit is used to resolve a deadlock between I/O and SQL |
| 240 | threads, the SQL thread sets it to unblock the I/O thread and make it |
| 241 | temporarily forget about the constraint. |
| 242 | */ |
| 243 | ulonglong log_space_limit,log_space_total; |
| 244 | bool ignore_log_space_limit; |
| 245 | |
| 246 | /* |
| 247 | Used by the SQL thread to instructs the IO thread to rotate |
| 248 | the logs when the SQL thread needs to purge to release some |
| 249 | disk space. |
| 250 | */ |
| 251 | bool sql_force_rotate_relay; |
| 252 | |
| 253 | time_t last_master_timestamp; |
| 254 | /* |
| 255 | The SQL driver thread sets this true while it is waiting at the end of the |
| 256 | relay log for more events to arrive. SHOW SLAVE STATUS uses this to report |
| 257 | Seconds_Behind_Master as zero while the SQL thread is so waiting. |
| 258 | */ |
| 259 | bool sql_thread_caught_up; |
| 260 | |
| 261 | void clear_until_condition(); |
| 262 | /** |
| 263 | Reset the delay. |
| 264 | This is used by RESET SLAVE to clear the delay. |
| 265 | */ |
| 266 | void clear_sql_delay() |
| 267 | { |
| 268 | sql_delay= 0; |
| 269 | } |
| 270 | |
| 271 | |
| 272 | /* |
| 273 | Needed for problems when slave stops and we want to restart it |
| 274 | skipping one or more events in the master log that have caused |
| 275 | errors, and have been manually applied by DBA already. |
| 276 | Must be ulong as it's refered to from set_var.cc |
| 277 | */ |
| 278 | volatile ulonglong slave_skip_counter; |
| 279 | ulonglong max_relay_log_size; |
| 280 | |
| 281 | volatile ulong abort_pos_wait; /* Incremented on change master */ |
| 282 | volatile ulong slave_run_id; /* Incremented on slave start */ |
| 283 | mysql_mutex_t log_space_lock; |
| 284 | mysql_cond_t log_space_cond; |
| 285 | /* |
| 286 | THD for the main sql thread, the one that starts threads to process |
| 287 | slave requests. If there is only one thread, then this THD is also |
| 288 | used for SQL processing. |
| 289 | A kill sent to this THD will kill the replication. |
| 290 | */ |
| 291 | THD *sql_driver_thd; |
| 292 | #ifndef DBUG_OFF |
| 293 | int events_till_abort; |
| 294 | #endif |
| 295 | |
| 296 | enum_gtid_skip_type gtid_skip_flag; |
| 297 | |
| 298 | /* |
| 299 | inited changes its value within LOCK_active_mi-guarded critical |
| 300 | sections at times of start_slave_threads() (0->1) and end_slave() (1->0). |
| 301 | Readers may not acquire the mutex while they realize potential concurrency |
| 302 | issue. |
| 303 | If not set, the value of other members of the structure are undefined. |
| 304 | */ |
| 305 | volatile bool inited; |
| 306 | volatile bool abort_slave; |
| 307 | volatile bool stop_for_until; |
| 308 | volatile uint slave_running; |
| 309 | |
| 310 | /* |
| 311 | Condition and its parameters from START SLAVE UNTIL clause. |
| 312 | |
| 313 | UNTIL condition is tested with is_until_satisfied() method that is |
| 314 | called by exec_relay_log_event(). is_until_satisfied() caches the result |
| 315 | of the comparison of log names because log names don't change very often; |
| 316 | this cache is invalidated by parts of code which change log names with |
| 317 | notify_*_log_name_updated() methods. (They need to be called only if SQL |
| 318 | thread is running). |
| 319 | */ |
| 320 | |
| 321 | enum { |
| 322 | UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS, UNTIL_GTID |
| 323 | } until_condition; |
| 324 | char until_log_name[FN_REFLEN]; |
| 325 | ulonglong until_log_pos; |
| 326 | /* extension extracted from log_name and converted to int */ |
| 327 | ulong until_log_name_extension; |
| 328 | /* |
| 329 | Cached result of comparison of until_log_name and current log name |
| 330 | -2 means unitialised, -1,0,1 are comarison results |
| 331 | */ |
| 332 | enum |
| 333 | { |
| 334 | UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, |
| 335 | UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 |
| 336 | } until_log_names_cmp_result; |
| 337 | /* Condition for UNTIL master_gtid_pos. */ |
| 338 | slave_connection_state until_gtid_pos; |
| 339 | |
| 340 | /* |
| 341 | retried_trans is a cumulative counter: how many times the slave |
| 342 | has retried a transaction (any) since slave started. |
| 343 | Protected by data_lock. |
| 344 | */ |
| 345 | ulong retried_trans; |
| 346 | /* |
| 347 | Number of executed events for SLAVE STATUS. |
| 348 | Protected by slave_executed_entries_lock |
| 349 | */ |
| 350 | int64 executed_entries; |
| 351 | |
| 352 | /* |
| 353 | If the end of the hot relay log is made of master's events ignored by the |
| 354 | slave I/O thread, these two keep track of the coords (in the master's |
| 355 | binlog) of the last of these events seen by the slave I/O thread. If not, |
| 356 | ign_master_log_name_end[0] == 0. |
| 357 | As they are like a Rotate event read/written from/to the relay log, they |
| 358 | are both protected by rli->relay_log.LOCK_log. |
| 359 | */ |
| 360 | char ign_master_log_name_end[FN_REFLEN]; |
| 361 | ulonglong ign_master_log_pos_end; |
| 362 | /* Similar for ignored GTID events. */ |
| 363 | slave_connection_state ign_gtids; |
| 364 | |
| 365 | /* |
| 366 | Indentifies where the SQL Thread should create temporary files for the |
| 367 | LOAD DATA INFILE. This is used for security reasons. |
| 368 | */ |
| 369 | char slave_patternload_file[FN_REFLEN]; |
| 370 | size_t slave_patternload_file_size; |
| 371 | |
| 372 | rpl_parallel parallel; |
| 373 | /* |
| 374 | The relay_log_state keeps track of the current binlog state of the |
| 375 | execution of the relay log. This is used to know where to resume |
| 376 | current GTID position if the slave thread is stopped and |
| 377 | restarted. It is only accessed from the SQL thread, so it does |
| 378 | not need any locking. |
| 379 | */ |
| 380 | rpl_binlog_state relay_log_state; |
| 381 | /* |
| 382 | The restart_gtid_state is used when the SQL thread restarts on a relay log |
| 383 | in GTID mode. In multi-domain parallel replication, each domain may have a |
| 384 | separat position, so some events in more progressed domains may need to be |
| 385 | skipped. This keeps track of the domains that have not yet reached their |
| 386 | starting event. |
| 387 | */ |
| 388 | slave_connection_state restart_gtid_pos; |
| 389 | |
| 390 | Relay_log_info(bool is_slave_recovery); |
| 391 | ~Relay_log_info(); |
| 392 | |
| 393 | /* |
| 394 | Invalidate cached until_log_name and group_relay_log_name comparison |
| 395 | result. Should be called after any update of group_realy_log_name if |
| 396 | there chances that sql_thread is running. |
| 397 | */ |
| 398 | inline void notify_group_relay_log_name_update() |
| 399 | { |
| 400 | if (until_condition==UNTIL_RELAY_POS) |
| 401 | until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; |
| 402 | } |
| 403 | |
| 404 | /* |
| 405 | The same as previous but for group_master_log_name. |
| 406 | */ |
| 407 | inline void notify_group_master_log_name_update() |
| 408 | { |
| 409 | if (until_condition==UNTIL_MASTER_POS) |
| 410 | until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; |
| 411 | } |
| 412 | |
| 413 | void inc_group_relay_log_pos(ulonglong log_pos, |
| 414 | rpl_group_info *rgi, |
| 415 | bool skip_lock=0); |
| 416 | |
| 417 | int wait_for_pos(THD* thd, String* log_name, longlong log_pos, |
| 418 | longlong timeout); |
| 419 | void close_temporary_tables(); |
| 420 | |
| 421 | /* Check if UNTIL condition is satisfied. See slave.cc for more. */ |
| 422 | bool is_until_satisfied(my_off_t); |
| 423 | inline ulonglong until_pos() |
| 424 | { |
| 425 | DBUG_ASSERT(until_condition == UNTIL_MASTER_POS || |
| 426 | until_condition == UNTIL_RELAY_POS); |
| 427 | return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : |
| 428 | group_relay_log_pos); |
| 429 | } |
| 430 | |
| 431 | /** |
| 432 | Helper function to do after statement completion. |
| 433 | |
| 434 | This function is called from an event to complete the group by |
| 435 | either stepping the group position, if the "statement" is not |
| 436 | inside a transaction; or increase the event position, if the |
| 437 | "statement" is inside a transaction. |
| 438 | |
| 439 | @param event_log_pos |
| 440 | Master log position of the event. The position is recorded in the |
| 441 | relay log info and used to produce information for <code>SHOW |
| 442 | SLAVE STATUS</code>. |
| 443 | */ |
| 444 | bool stmt_done(my_off_t event_log_pos, THD *thd, rpl_group_info *rgi); |
| 445 | int alloc_inuse_relaylog(const char *name); |
| 446 | void free_inuse_relaylog(inuse_relaylog *ir); |
| 447 | void reset_inuse_relaylog(); |
| 448 | int update_relay_log_state(rpl_gtid *gtid_list, uint32 count); |
| 449 | |
| 450 | /** |
| 451 | Is the replication inside a group? |
| 452 | |
| 453 | The reader of the relay log is inside a group if either: |
| 454 | - The IN_TRANSACTION flag is set, meaning we're inside a transaction |
| 455 | - The IN_STMT flag is set, meaning we have read at least one row from |
| 456 | a multi-event entry. |
| 457 | |
| 458 | This flag reflects the state of the log 'just now', ie after the last |
| 459 | read event would be executed. |
| 460 | This allow us to test if we can stop replication before reading |
| 461 | the next entry. |
| 462 | |
| 463 | @retval true Replication thread is currently inside a group |
| 464 | @retval false Replication thread is currently not inside a group |
| 465 | */ |
| 466 | bool is_in_group() const { |
| 467 | return (m_flags & (IN_STMT | IN_TRANSACTION)); |
| 468 | } |
| 469 | |
| 470 | /** |
| 471 | Set the value of a replication state flag. |
| 472 | |
| 473 | @param flag Flag to set |
| 474 | */ |
| 475 | void set_flag(enum_state_flag flag) |
| 476 | { |
| 477 | m_flags|= flag; |
| 478 | } |
| 479 | |
| 480 | /** |
| 481 | Get the value of a replication state flag. |
| 482 | |
| 483 | @param flag Flag to get value of |
| 484 | |
| 485 | @return @c true if the flag was set, @c false otherwise. |
| 486 | */ |
| 487 | bool get_flag(enum_state_flag flag) |
| 488 | { |
| 489 | return m_flags & flag; |
| 490 | } |
| 491 | |
| 492 | /** |
| 493 | Clear the value of a replication state flag. |
| 494 | |
| 495 | @param flag Flag to clear |
| 496 | */ |
| 497 | void clear_flag(enum_state_flag flag) |
| 498 | { |
| 499 | m_flags&= ~flag; |
| 500 | } |
| 501 | |
| 502 | /** |
| 503 | Text used in THD::proc_info when the slave SQL thread is delaying. |
| 504 | */ |
| 505 | static const char *const state_delaying_string; |
| 506 | |
| 507 | bool flush(); |
| 508 | |
| 509 | /** |
| 510 | Reads the relay_log.info file. |
| 511 | */ |
| 512 | int init(const char* info_filename); |
| 513 | |
| 514 | /** |
| 515 | Indicate that a delay starts. |
| 516 | |
| 517 | This does not actually sleep; it only sets the state of this |
| 518 | Relay_log_info object to delaying so that the correct state can be |
| 519 | reported by SHOW SLAVE STATUS and SHOW PROCESSLIST. |
| 520 | |
| 521 | Requires rli->data_lock. |
| 522 | |
| 523 | @param delay_end The time when the delay shall end. |
| 524 | */ |
| 525 | void start_sql_delay(time_t delay_end) |
| 526 | { |
| 527 | mysql_mutex_assert_owner(&data_lock); |
| 528 | sql_delay_end= delay_end; |
| 529 | thd_proc_info(sql_driver_thd, state_delaying_string); |
| 530 | } |
| 531 | |
| 532 | int32 get_sql_delay() { return sql_delay; } |
| 533 | void set_sql_delay(int32 _sql_delay) { sql_delay= _sql_delay; } |
| 534 | time_t get_sql_delay_end() { return sql_delay_end; } |
| 535 | |
| 536 | private: |
| 537 | |
| 538 | |
| 539 | /** |
| 540 | Delay slave SQL thread by this amount, compared to master (in |
| 541 | seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X. |
| 542 | |
| 543 | Guarded by data_lock. Initialized by the client thread executing |
| 544 | START SLAVE. Written by client threads executing CHANGE MASTER TO |
| 545 | MASTER_DELAY=X. Read by SQL thread and by client threads |
| 546 | executing SHOW SLAVE STATUS. Note: must not be written while the |
| 547 | slave SQL thread is running, since the SQL thread reads it without |
| 548 | a lock when executing Relay_log_info::flush(). |
| 549 | */ |
| 550 | int sql_delay; |
| 551 | |
| 552 | /** |
| 553 | During a delay, specifies the point in time when the delay ends. |
| 554 | |
| 555 | This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS. |
| 556 | |
| 557 | Guarded by data_lock. Written by the sql thread. Read by client |
| 558 | threads executing SHOW SLAVE STATUS. |
| 559 | */ |
| 560 | time_t sql_delay_end; |
| 561 | |
| 562 | /* |
| 563 | Before the MASTER_DELAY parameter was added (WL#344), |
| 564 | relay_log.info had 4 lines. Now it has 5 lines. |
| 565 | */ |
| 566 | static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5; |
| 567 | |
| 568 | /* |
| 569 | Holds the state of the data in the relay log. |
| 570 | We need this to ensure that we are not in the middle of a |
| 571 | statement or inside BEGIN ... COMMIT when should rotate the |
| 572 | relay log. |
| 573 | */ |
| 574 | uint32 m_flags; |
| 575 | }; |
| 576 | |
| 577 | |
| 578 | /* |
| 579 | In parallel replication, if we need to re-try a transaction due to a |
| 580 | deadlock or other temporary error, we may need to go back and re-read events |
| 581 | out of an earlier relay log. |
| 582 | |
| 583 | This structure keeps track of the relaylogs that are potentially in use. |
| 584 | Each rpl_group_info has a pointer to one of those, corresponding to the |
| 585 | first GTID event. |
| 586 | |
| 587 | A pair of reference count keeps track of how long a relay log is potentially |
| 588 | in use. When the `completed' flag is set, all events have been read out of |
| 589 | the relay log, but the log might still be needed for retry in worker |
| 590 | threads. As worker threads complete an event group, they increment |
| 591 | atomically the `dequeued_count' with number of events queued. Thus, when |
| 592 | completed is set and dequeued_count equals queued_count, the relay log file |
| 593 | is finally done with and can be purged. |
| 594 | |
| 595 | By separating the queued and dequeued count, only the dequeued_count needs |
| 596 | multi-thread synchronisation; the completed flag and queued_count fields |
| 597 | are only accessed by the SQL driver thread and need no synchronisation. |
| 598 | */ |
| 599 | struct inuse_relaylog { |
| 600 | inuse_relaylog *next; |
| 601 | Relay_log_info *rli; |
| 602 | /* |
| 603 | relay_log_state holds the binlog state corresponding to the start of this |
| 604 | relay log file. It is an array with relay_log_state_count elements. |
| 605 | */ |
| 606 | rpl_gtid *relay_log_state; |
| 607 | uint32 relay_log_state_count; |
| 608 | /* Number of events in this relay log queued for worker threads. */ |
| 609 | int64 queued_count; |
| 610 | /* Number of events completed by worker threads. */ |
| 611 | volatile int64 dequeued_count; |
| 612 | /* Set when all events have been read from a relaylog. */ |
| 613 | bool completed; |
| 614 | char name[FN_REFLEN]; |
| 615 | }; |
| 616 | |
| 617 | |
| 618 | /* |
| 619 | This is data for various state needed to be kept for the processing of |
| 620 | one event group (transaction) during replication. |
| 621 | |
| 622 | In single-threaded replication, there will be one global rpl_group_info and |
| 623 | one global Relay_log_info per master connection. They will be linked |
| 624 | together. |
| 625 | |
| 626 | In parallel replication, there will be one rpl_group_info object for |
| 627 | each running sql thread, each having their own thd. |
| 628 | |
| 629 | All rpl_group_info will share the same Relay_log_info. |
| 630 | */ |
| 631 | |
| 632 | struct rpl_group_info |
| 633 | { |
| 634 | rpl_group_info *next; /* For free list in rpl_parallel_thread */ |
| 635 | Relay_log_info *rli; |
| 636 | THD *thd; |
| 637 | /* |
| 638 | Current GTID being processed. |
| 639 | The sub_id gives the binlog order within one domain_id. A zero sub_id |
| 640 | means that there is no active GTID. |
| 641 | */ |
| 642 | uint64 gtid_sub_id; |
| 643 | rpl_gtid current_gtid; |
| 644 | uint64 commit_id; |
| 645 | /* |
| 646 | This is used to keep transaction commit order. |
| 647 | We will signal this when we commit, and can register it to wait for the |
| 648 | commit_orderer of the previous commit to signal us. |
| 649 | */ |
| 650 | wait_for_commit commit_orderer; |
| 651 | /* |
| 652 | If non-zero, the sub_id of a prior event group whose commit we have to wait |
| 653 | for before committing ourselves. Then wait_commit_group_info points to the |
| 654 | event group to wait for. |
| 655 | |
| 656 | Before using this, rpl_parallel_entry::last_committed_sub_id should be |
| 657 | compared against wait_commit_sub_id. Only if last_committed_sub_id is |
| 658 | smaller than wait_commit_sub_id must the wait be done (otherwise the |
| 659 | waited-for transaction is already committed, so we would otherwise wait |
| 660 | for the wrong commit). |
| 661 | */ |
| 662 | uint64 wait_commit_sub_id; |
| 663 | rpl_group_info *wait_commit_group_info; |
| 664 | /* |
| 665 | This holds a pointer to a struct that keeps track of the need to wait |
| 666 | for the previous batch of event groups to reach the commit stage, before |
| 667 | this batch can start to execute. |
| 668 | |
| 669 | (When we execute in parallel the transactions that group committed |
| 670 | together on the master, we still need to wait for any prior transactions |
| 671 | to have reached the commit stage). |
| 672 | |
| 673 | The pointed-to gco is only valid for as long as |
| 674 | gtid_sub_id < parallel_entry->last_committed_sub_id. After that, it can |
| 675 | be freed by another thread. |
| 676 | */ |
| 677 | group_commit_orderer *gco; |
| 678 | |
| 679 | struct rpl_parallel_entry *parallel_entry; |
| 680 | |
| 681 | /* |
| 682 | A container to hold on Intvar-, Rand-, Uservar- log-events in case |
| 683 | the slave is configured with table filtering rules. |
| 684 | The withhold events are executed when their parent Query destiny is |
| 685 | determined for execution as well. |
| 686 | */ |
| 687 | Deferred_log_events *deferred_events; |
| 688 | |
| 689 | /* |
| 690 | State of the container: true stands for IRU events gathering, |
| 691 | false does for execution, either deferred or direct. |
| 692 | */ |
| 693 | bool deferred_events_collecting; |
| 694 | |
| 695 | Annotate_rows_log_event *m_annotate_event; |
| 696 | |
| 697 | RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */ |
| 698 | uint tables_to_lock_count; /* RBR: Count of tables to lock */ |
| 699 | table_mapping m_table_map; /* RBR: Mapping table-id to table */ |
| 700 | mysql_mutex_t sleep_lock; |
| 701 | mysql_cond_t sleep_cond; |
| 702 | |
| 703 | /* |
| 704 | trans_retries varies between 0 to slave_transaction_retries and counts how |
| 705 | many times the slave has retried the present transaction; gets reset to 0 |
| 706 | when the transaction finally succeeds. |
| 707 | */ |
| 708 | ulong trans_retries; |
| 709 | |
| 710 | /* |
| 711 | Used to defer stopping the SQL thread to give it a chance |
| 712 | to finish up the current group of events. |
| 713 | The timestamp is set and reset in @c sql_slave_killed(). |
| 714 | */ |
| 715 | time_t last_event_start_time; |
| 716 | |
| 717 | char *event_relay_log_name; |
| 718 | char event_relay_log_name_buf[FN_REFLEN]; |
| 719 | ulonglong event_relay_log_pos; |
| 720 | ulonglong future_event_relay_log_pos; |
| 721 | /* |
| 722 | The master log name for current event. Only used in parallel replication. |
| 723 | */ |
| 724 | char future_event_master_log_name[FN_REFLEN]; |
| 725 | bool is_parallel_exec; |
| 726 | /* When gtid_pending is true, we have not yet done record_gtid(). */ |
| 727 | bool gtid_pending; |
| 728 | int worker_error; |
| 729 | /* |
| 730 | Set true when we signalled that we reach the commit phase. Used to avoid |
| 731 | counting one event group twice. |
| 732 | */ |
| 733 | bool did_mark_start_commit; |
| 734 | /* Copy of flags2 from GTID event. */ |
| 735 | uchar gtid_ev_flags2; |
| 736 | enum { |
| 737 | GTID_DUPLICATE_NULL=0, |
| 738 | GTID_DUPLICATE_IGNORE=1, |
| 739 | GTID_DUPLICATE_OWNER=2 |
| 740 | }; |
| 741 | /* |
| 742 | When --gtid-ignore-duplicates, this is set to one of the above three |
| 743 | values: |
| 744 | GTID_DUPLICATE_NULL - Not using --gtid-ignore-duplicates. |
| 745 | GTID_DUPLICATE_IGNORE - This gtid already applied, skip the event group. |
| 746 | GTID_DUPLICATE_OWNER - We are the current owner of the domain, and must |
| 747 | apply the event group and then release the domain. |
| 748 | */ |
| 749 | uint8 gtid_ignore_duplicate_state; |
| 750 | |
| 751 | /* |
| 752 | Runtime state for printing a note when slave is taking |
| 753 | too long while processing a row event. |
| 754 | */ |
| 755 | longlong row_stmt_start_timestamp; |
| 756 | bool long_find_row_note_printed; |
| 757 | /* Needs room for "Gtid D-S-N\x00". */ |
| 758 | char gtid_info_buf[5+10+1+10+1+20+1]; |
| 759 | |
| 760 | /* |
| 761 | The timestamp, from the master, of the commit event. |
| 762 | Used to do delayed update of rli->last_master_timestamp, for getting |
| 763 | reasonable values out of Seconds_Behind_Master in SHOW SLAVE STATUS. |
| 764 | */ |
| 765 | time_t last_master_timestamp; |
| 766 | |
| 767 | /* |
| 768 | Information to be able to re-try an event group in case of a deadlock or |
| 769 | other temporary error. |
| 770 | */ |
| 771 | inuse_relaylog *relay_log; |
| 772 | uint64 retry_start_offset; |
| 773 | uint64 retry_event_count; |
| 774 | /* |
| 775 | If `speculation' is != SPECULATE_NO, then we are optimistically running |
| 776 | this transaction in parallel, even though it might not be safe (there may |
| 777 | be a conflict with a prior event group). |
| 778 | |
| 779 | In this case, a conflict can cause other errors than deadlocks (like |
| 780 | duplicate key for example). So in case of _any_ error, we need to roll |
| 781 | back and retry the event group. |
| 782 | */ |
| 783 | enum enum_speculation { |
| 784 | /* |
| 785 | This transaction was group-committed together on the master with the |
| 786 | other transactions with which it is replicated in parallel. |
| 787 | */ |
| 788 | SPECULATE_NO, |
| 789 | /* |
| 790 | We will optimistically try to run this transaction in parallel with |
| 791 | other transactions, even though it is not known to be conflict free. |
| 792 | If we get a conflict, we will detect it as a deadlock, roll back and |
| 793 | retry. |
| 794 | */ |
| 795 | SPECULATE_OPTIMISTIC, |
| 796 | /* |
| 797 | This transaction got a conflict during speculative parallel apply, or |
| 798 | it was marked on the master as likely to cause a conflict or unsafe to |
| 799 | speculate. So it will wait for the prior transaction to commit before |
| 800 | starting to replicate. |
| 801 | */ |
| 802 | SPECULATE_WAIT |
| 803 | } speculation; |
| 804 | enum enum_retry_killed { |
| 805 | RETRY_KILL_NONE = 0, |
| 806 | RETRY_KILL_PENDING, |
| 807 | RETRY_KILL_KILLED |
| 808 | }; |
| 809 | uchar killed_for_retry; |
| 810 | |
| 811 | rpl_group_info(Relay_log_info *rli_); |
| 812 | ~rpl_group_info(); |
| 813 | void reinit(Relay_log_info *rli); |
| 814 | |
| 815 | /* |
| 816 | Returns true if the argument event resides in the containter; |
| 817 | more specifically, the checking is done against the last added event. |
| 818 | */ |
| 819 | bool is_deferred_event(Log_event * ev) |
| 820 | { |
| 821 | return deferred_events_collecting ? deferred_events->is_last(ev) : false; |
| 822 | }; |
| 823 | /* The general cleanup that slave applier may need at the end of query. */ |
| 824 | inline void cleanup_after_query() |
| 825 | { |
| 826 | if (deferred_events) |
| 827 | deferred_events->rewind(); |
| 828 | }; |
| 829 | /* The general cleanup that slave applier may need at the end of session. */ |
| 830 | void cleanup_after_session() |
| 831 | { |
| 832 | if (deferred_events) |
| 833 | { |
| 834 | delete deferred_events; |
| 835 | deferred_events= NULL; |
| 836 | } |
| 837 | }; |
| 838 | |
| 839 | /** |
| 840 | Save pointer to Annotate_rows event and switch on the |
| 841 | binlog_annotate_row_events for this sql thread. |
| 842 | To be called when sql thread receives an Annotate_rows event. |
| 843 | */ |
| 844 | inline void set_annotate_event(Annotate_rows_log_event *event) |
| 845 | { |
| 846 | DBUG_ASSERT(m_annotate_event == NULL); |
| 847 | m_annotate_event= event; |
| 848 | this->thd->variables.binlog_annotate_row_events= 1; |
| 849 | } |
| 850 | |
| 851 | /** |
| 852 | Returns pointer to the saved Annotate_rows event or NULL if there is |
| 853 | no saved event. |
| 854 | */ |
| 855 | inline Annotate_rows_log_event* get_annotate_event() |
| 856 | { |
| 857 | return m_annotate_event; |
| 858 | } |
| 859 | |
| 860 | /** |
| 861 | Delete saved Annotate_rows event (if any) and switch off the |
| 862 | binlog_annotate_row_events for this sql thread. |
| 863 | To be called when sql thread has applied the last (i.e. with |
| 864 | STMT_END_F flag) rbr event. |
| 865 | */ |
| 866 | inline void free_annotate_event() |
| 867 | { |
| 868 | if (m_annotate_event) |
| 869 | { |
| 870 | this->thd->variables.binlog_annotate_row_events= 0; |
| 871 | delete m_annotate_event; |
| 872 | m_annotate_event= 0; |
| 873 | } |
| 874 | } |
| 875 | |
| 876 | bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const |
| 877 | { |
| 878 | DBUG_ASSERT(tabledef_var && conv_table_var); |
| 879 | for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global) |
| 880 | if (ptr->table == table_arg) |
| 881 | { |
| 882 | *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef; |
| 883 | *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table; |
| 884 | DBUG_PRINT("debug" , ("Fetching table data for table %s.%s:" |
| 885 | " tabledef: %p, conv_table: %p" , |
| 886 | table_arg->s->db.str, table_arg->s->table_name.str, |
| 887 | *tabledef_var, *conv_table_var)); |
| 888 | return true; |
| 889 | } |
| 890 | return false; |
| 891 | } |
| 892 | |
| 893 | void clear_tables_to_lock(); |
| 894 | void cleanup_context(THD *, bool); |
| 895 | void slave_close_thread_tables(THD *); |
| 896 | void mark_start_commit_no_lock(); |
| 897 | void mark_start_commit(); |
| 898 | char *gtid_info(); |
| 899 | void unmark_start_commit(); |
| 900 | |
| 901 | longlong get_row_stmt_start_timestamp() |
| 902 | { |
| 903 | return row_stmt_start_timestamp; |
| 904 | } |
| 905 | |
| 906 | void set_row_stmt_start_timestamp() |
| 907 | { |
| 908 | if (row_stmt_start_timestamp == 0) |
| 909 | row_stmt_start_timestamp= microsecond_interval_timer(); |
| 910 | } |
| 911 | |
| 912 | void reset_row_stmt_start_timestamp() |
| 913 | { |
| 914 | row_stmt_start_timestamp= 0; |
| 915 | } |
| 916 | |
| 917 | void set_long_find_row_note_printed() |
| 918 | { |
| 919 | long_find_row_note_printed= true; |
| 920 | } |
| 921 | |
| 922 | void unset_long_find_row_note_printed() |
| 923 | { |
| 924 | long_find_row_note_printed= false; |
| 925 | } |
| 926 | |
| 927 | bool is_long_find_row_note_printed() |
| 928 | { |
| 929 | return long_find_row_note_printed; |
| 930 | } |
| 931 | |
| 932 | inline void inc_event_relay_log_pos() |
| 933 | { |
| 934 | if (!is_parallel_exec) |
| 935 | rli->event_relay_log_pos= future_event_relay_log_pos; |
| 936 | } |
| 937 | }; |
| 938 | |
| 939 | |
| 940 | /* |
| 941 | The class rpl_sql_thread_info is the THD::system_thread_info for an SQL |
| 942 | thread; this is either the driver SQL thread or a worker thread for parallel |
| 943 | replication. |
| 944 | */ |
| 945 | class rpl_sql_thread_info |
| 946 | { |
| 947 | public: |
| 948 | char cached_charset[6]; |
| 949 | Rpl_filter* rpl_filter; |
| 950 | |
| 951 | rpl_sql_thread_info(Rpl_filter *filter); |
| 952 | |
| 953 | /* |
| 954 | Last charset (6 bytes) seen by slave SQL thread is cached here; it helps |
| 955 | the thread save 3 get_charset() per Query_log_event if the charset is not |
| 956 | changing from event to event (common situation). |
| 957 | When the 6 bytes are equal to 0 is used to mean "cache is invalidated". |
| 958 | */ |
| 959 | void cached_charset_invalidate(); |
| 960 | bool cached_charset_compare(char *charset) const; |
| 961 | }; |
| 962 | |
| 963 | |
| 964 | extern struct rpl_slave_state *rpl_global_gtid_slave_state; |
| 965 | extern gtid_waiting rpl_global_gtid_waiting; |
| 966 | |
| 967 | int rpl_load_gtid_slave_state(THD *thd); |
| 968 | int find_gtid_slave_pos_tables(THD *thd); |
| 969 | int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev); |
| 970 | void delete_or_keep_event_post_apply(rpl_group_info *rgi, |
| 971 | Log_event_type typ, Log_event *ev); |
| 972 | |
| 973 | #endif /* RPL_RLI_H */ |
| 974 | |