| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2017, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file dict/dict0stats_bg.cc |
| 22 | Code used for background table and index stats gathering. |
| 23 | |
| 24 | Created Apr 25, 2012 Vasil Dimov |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #include "dict0dict.h" |
| 28 | #include "dict0stats.h" |
| 29 | #include "dict0stats_bg.h" |
| 30 | #include "dict0defrag_bg.h" |
| 31 | #include "row0mysql.h" |
| 32 | #include "srv0start.h" |
| 33 | #include "ut0new.h" |
| 34 | #include "fil0fil.h" |
| 35 | |
| 36 | #include <vector> |
| 37 | |
| 38 | /** Minimum time interval between stats recalc for a given table */ |
| 39 | #define MIN_RECALC_INTERVAL 10 /* seconds */ |
| 40 | |
| 41 | /** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add() |
| 42 | or shutdown. Not protected by any mutex. */ |
| 43 | os_event_t dict_stats_event; |
| 44 | |
| 45 | /** Variable to initiate shutdown the dict stats thread. Note we don't |
| 46 | use 'srv_shutdown_state' because we want to shutdown dict stats thread |
| 47 | before purge thread. */ |
| 48 | bool dict_stats_start_shutdown; |
| 49 | |
| 50 | /** Event to wait for shutdown of the dict stats thread */ |
| 51 | os_event_t dict_stats_shutdown_event; |
| 52 | |
| 53 | #ifdef UNIV_DEBUG |
| 54 | /** Used by SET GLOBAL innodb_dict_stats_disabled_debug = 1; */ |
| 55 | my_bool innodb_dict_stats_disabled_debug; |
| 56 | |
| 57 | static os_event_t dict_stats_disabled_event; |
| 58 | #endif /* UNIV_DEBUG */ |
| 59 | |
| 60 | /** This mutex protects the "recalc_pool" variable. */ |
| 61 | static ib_mutex_t recalc_pool_mutex; |
| 62 | |
| 63 | /** Allocator type, used by std::vector */ |
| 64 | typedef ut_allocator<table_id_t> |
| 65 | recalc_pool_allocator_t; |
| 66 | |
| 67 | /** The multitude of tables whose stats are to be automatically |
| 68 | recalculated - an STL vector */ |
| 69 | typedef std::vector<table_id_t, recalc_pool_allocator_t> |
| 70 | recalc_pool_t; |
| 71 | |
| 72 | /** Iterator type for iterating over the elements of objects of type |
| 73 | recalc_pool_t. */ |
| 74 | typedef recalc_pool_t::iterator |
| 75 | recalc_pool_iterator_t; |
| 76 | |
| 77 | /** Pool where we store information on which tables are to be processed |
| 78 | by background statistics gathering. */ |
| 79 | static recalc_pool_t* recalc_pool; |
| 80 | |
| 81 | |
| 82 | /*****************************************************************//** |
| 83 | Initialize the recalc pool, called once during thread initialization. */ |
| 84 | static |
| 85 | void |
| 86 | dict_stats_recalc_pool_init() |
| 87 | /*=========================*/ |
| 88 | { |
| 89 | ut_ad(!srv_read_only_mode); |
| 90 | /* JAN: TODO: MySQL 5.7 PSI |
| 91 | const PSI_memory_key key = mem_key_dict_stats_bg_recalc_pool_t; |
| 92 | |
| 93 | recalc_pool = UT_NEW(recalc_pool_t(recalc_pool_allocator_t(key)), key); |
| 94 | |
| 95 | recalc_pool->reserve(RECALC_POOL_INITIAL_SLOTS); |
| 96 | */ |
| 97 | recalc_pool = new std::vector<table_id_t, recalc_pool_allocator_t>(); |
| 98 | } |
| 99 | |
| 100 | /*****************************************************************//** |
| 101 | Free the resources occupied by the recalc pool, called once during |
| 102 | thread de-initialization. */ |
| 103 | static |
| 104 | void |
| 105 | dict_stats_recalc_pool_deinit() |
| 106 | /*===========================*/ |
| 107 | { |
| 108 | ut_ad(!srv_read_only_mode); |
| 109 | |
| 110 | recalc_pool->clear(); |
| 111 | |
| 112 | UT_DELETE(recalc_pool); |
| 113 | recalc_pool = NULL; |
| 114 | } |
| 115 | |
| 116 | /*****************************************************************//** |
| 117 | Add a table to the recalc pool, which is processed by the |
| 118 | background stats gathering thread. Only the table id is added to the |
| 119 | list, so the table can be closed after being enqueued and it will be |
| 120 | opened when needed. If the table does not exist later (has been DROPped), |
| 121 | then it will be removed from the pool and skipped. */ |
| 122 | static |
| 123 | void |
| 124 | dict_stats_recalc_pool_add( |
| 125 | /*=======================*/ |
| 126 | const dict_table_t* table) /*!< in: table to add */ |
| 127 | { |
| 128 | ut_ad(!srv_read_only_mode); |
| 129 | |
| 130 | mutex_enter(&recalc_pool_mutex); |
| 131 | |
| 132 | /* quit if already in the list */ |
| 133 | for (recalc_pool_iterator_t iter = recalc_pool->begin(); |
| 134 | iter != recalc_pool->end(); |
| 135 | ++iter) { |
| 136 | |
| 137 | if (*iter == table->id) { |
| 138 | mutex_exit(&recalc_pool_mutex); |
| 139 | return; |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | recalc_pool->push_back(table->id); |
| 144 | |
| 145 | mutex_exit(&recalc_pool_mutex); |
| 146 | |
| 147 | os_event_set(dict_stats_event); |
| 148 | } |
| 149 | |
| 150 | /** Update the table modification counter and if necessary, |
| 151 | schedule new estimates for table and index statistics to be calculated. |
| 152 | @param[in,out] table persistent or temporary table */ |
| 153 | void |
| 154 | dict_stats_update_if_needed(dict_table_t* table) |
| 155 | { |
| 156 | ut_ad(table->stat_initialized); |
| 157 | ut_ad(!mutex_own(&dict_sys->mutex)); |
| 158 | |
| 159 | ulonglong counter = table->stat_modified_counter++; |
| 160 | ulonglong n_rows = dict_table_get_n_rows(table); |
| 161 | |
| 162 | if (dict_stats_is_persistent_enabled(table)) { |
| 163 | if (counter > n_rows / 10 /* 10% */ |
| 164 | && dict_stats_auto_recalc_is_enabled(table)) { |
| 165 | |
| 166 | dict_stats_recalc_pool_add(table); |
| 167 | table->stat_modified_counter = 0; |
| 168 | } |
| 169 | return; |
| 170 | } |
| 171 | |
| 172 | /* Calculate new statistics if 1 / 16 of table has been modified |
| 173 | since the last time a statistics batch was run. |
| 174 | We calculate statistics at most every 16th round, since we may have |
| 175 | a counter table which is very small and updated very often. */ |
| 176 | ulonglong threshold = 16 + n_rows / 16; /* 6.25% */ |
| 177 | |
| 178 | if (srv_stats_modified_counter) { |
| 179 | threshold = std::min(srv_stats_modified_counter, threshold); |
| 180 | } |
| 181 | |
| 182 | if (counter > threshold) { |
| 183 | /* this will reset table->stat_modified_counter to 0 */ |
| 184 | dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT); |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | /*****************************************************************//** |
| 189 | Get a table from the auto recalc pool. The returned table id is removed |
| 190 | from the pool. |
| 191 | @return true if the pool was non-empty and "id" was set, false otherwise */ |
| 192 | static |
| 193 | bool |
| 194 | dict_stats_recalc_pool_get( |
| 195 | /*=======================*/ |
| 196 | table_id_t* id) /*!< out: table id, or unmodified if list is |
| 197 | empty */ |
| 198 | { |
| 199 | ut_ad(!srv_read_only_mode); |
| 200 | |
| 201 | mutex_enter(&recalc_pool_mutex); |
| 202 | |
| 203 | if (recalc_pool->empty()) { |
| 204 | mutex_exit(&recalc_pool_mutex); |
| 205 | return(false); |
| 206 | } |
| 207 | |
| 208 | *id = recalc_pool->at(0); |
| 209 | |
| 210 | recalc_pool->erase(recalc_pool->begin()); |
| 211 | |
| 212 | mutex_exit(&recalc_pool_mutex); |
| 213 | |
| 214 | return(true); |
| 215 | } |
| 216 | |
| 217 | /*****************************************************************//** |
| 218 | Delete a given table from the auto recalc pool. |
| 219 | dict_stats_recalc_pool_del() */ |
| 220 | void |
| 221 | dict_stats_recalc_pool_del( |
| 222 | /*=======================*/ |
| 223 | const dict_table_t* table) /*!< in: table to remove */ |
| 224 | { |
| 225 | ut_ad(!srv_read_only_mode); |
| 226 | ut_ad(mutex_own(&dict_sys->mutex)); |
| 227 | |
| 228 | mutex_enter(&recalc_pool_mutex); |
| 229 | |
| 230 | ut_ad(table->id > 0); |
| 231 | |
| 232 | for (recalc_pool_iterator_t iter = recalc_pool->begin(); |
| 233 | iter != recalc_pool->end(); |
| 234 | ++iter) { |
| 235 | |
| 236 | if (*iter == table->id) { |
| 237 | /* erase() invalidates the iterator */ |
| 238 | recalc_pool->erase(iter); |
| 239 | break; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | mutex_exit(&recalc_pool_mutex); |
| 244 | } |
| 245 | |
| 246 | /*****************************************************************//** |
| 247 | Wait until background stats thread has stopped using the specified table. |
| 248 | The caller must have locked the data dictionary using |
| 249 | row_mysql_lock_data_dictionary() and this function may unlock it temporarily |
| 250 | and restore the lock before it exits. |
| 251 | The background stats thread is guaranteed not to start using the specified |
| 252 | table after this function returns and before the caller unlocks the data |
| 253 | dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag |
| 254 | under dict_sys->mutex. */ |
| 255 | void |
| 256 | dict_stats_wait_bg_to_stop_using_table( |
| 257 | /*===================================*/ |
| 258 | dict_table_t* table, /*!< in/out: table */ |
| 259 | trx_t* trx) /*!< in/out: transaction to use for |
| 260 | unlocking/locking the data dict */ |
| 261 | { |
| 262 | while (!dict_stats_stop_bg(table)) { |
| 263 | DICT_BG_YIELD(trx); |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | /*****************************************************************//** |
| 268 | Initialize global variables needed for the operation of dict_stats_thread() |
| 269 | Must be called before dict_stats_thread() is started. */ |
| 270 | void |
| 271 | dict_stats_thread_init() |
| 272 | { |
| 273 | ut_a(!srv_read_only_mode); |
| 274 | |
| 275 | dict_stats_event = os_event_create(0); |
| 276 | dict_stats_shutdown_event = os_event_create(0); |
| 277 | |
| 278 | ut_d(dict_stats_disabled_event = os_event_create(0)); |
| 279 | |
| 280 | /* The recalc_pool_mutex is acquired from: |
| 281 | 1) the background stats gathering thread before any other latch |
| 282 | and released without latching anything else in between (thus |
| 283 | any level would do here) |
| 284 | 2) from dict_stats_update_if_needed() |
| 285 | and released without latching anything else in between. We know |
| 286 | that dict_sys->mutex (SYNC_DICT) is not acquired when |
| 287 | dict_stats_update_if_needed() is called and it may be acquired |
| 288 | inside that function (thus a level <=SYNC_DICT would do). |
| 289 | 3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT) |
| 290 | and dict_operation_lock (SYNC_DICT_OPERATION) have been locked |
| 291 | (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do) |
| 292 | So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */ |
| 293 | |
| 294 | mutex_create(LATCH_ID_RECALC_POOL, &recalc_pool_mutex); |
| 295 | |
| 296 | dict_stats_recalc_pool_init(); |
| 297 | dict_defrag_pool_init(); |
| 298 | |
| 299 | } |
| 300 | |
| 301 | /*****************************************************************//** |
| 302 | Free resources allocated by dict_stats_thread_init(), must be called |
| 303 | after dict_stats_thread() has exited. */ |
| 304 | void |
| 305 | dict_stats_thread_deinit() |
| 306 | /*======================*/ |
| 307 | { |
| 308 | ut_a(!srv_read_only_mode); |
| 309 | ut_ad(!srv_dict_stats_thread_active); |
| 310 | |
| 311 | if (recalc_pool == NULL) { |
| 312 | return; |
| 313 | } |
| 314 | |
| 315 | dict_stats_recalc_pool_deinit(); |
| 316 | dict_defrag_pool_deinit(); |
| 317 | |
| 318 | mutex_free(&recalc_pool_mutex); |
| 319 | |
| 320 | ut_d(os_event_destroy(dict_stats_disabled_event)); |
| 321 | os_event_destroy(dict_stats_event); |
| 322 | os_event_destroy(dict_stats_shutdown_event); |
| 323 | dict_stats_start_shutdown = false; |
| 324 | } |
| 325 | |
| 326 | /*****************************************************************//** |
| 327 | Get the first table that has been added for auto recalc and eventually |
| 328 | update its stats. */ |
| 329 | static |
| 330 | void |
| 331 | dict_stats_process_entry_from_recalc_pool() |
| 332 | /*=======================================*/ |
| 333 | { |
| 334 | table_id_t table_id; |
| 335 | |
| 336 | ut_ad(!srv_read_only_mode); |
| 337 | |
| 338 | /* pop the first table from the auto recalc pool */ |
| 339 | if (!dict_stats_recalc_pool_get(&table_id)) { |
| 340 | /* no tables for auto recalc */ |
| 341 | return; |
| 342 | } |
| 343 | |
| 344 | dict_table_t* table; |
| 345 | |
| 346 | mutex_enter(&dict_sys->mutex); |
| 347 | |
| 348 | table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL); |
| 349 | |
| 350 | if (table == NULL) { |
| 351 | /* table does not exist, must have been DROPped |
| 352 | after its id was enqueued */ |
| 353 | mutex_exit(&dict_sys->mutex); |
| 354 | return; |
| 355 | } |
| 356 | |
| 357 | ut_ad(!table->is_temporary()); |
| 358 | |
| 359 | if (!fil_table_accessible(table)) { |
| 360 | dict_table_close(table, TRUE, FALSE); |
| 361 | mutex_exit(&dict_sys->mutex); |
| 362 | return; |
| 363 | } |
| 364 | |
| 365 | table->stats_bg_flag |= BG_STAT_IN_PROGRESS; |
| 366 | |
| 367 | mutex_exit(&dict_sys->mutex); |
| 368 | |
| 369 | /* ut_time() could be expensive, the current function |
| 370 | is called once every time a table has been changed more than 10% and |
| 371 | on a system with lots of small tables, this could become hot. If we |
| 372 | find out that this is a problem, then the check below could eventually |
| 373 | be replaced with something else, though a time interval is the natural |
| 374 | approach. */ |
| 375 | |
| 376 | if (ut_difftime(ut_time(), table->stats_last_recalc) |
| 377 | < MIN_RECALC_INTERVAL) { |
| 378 | |
| 379 | /* Stats were (re)calculated not long ago. To avoid |
| 380 | too frequent stats updates we put back the table on |
| 381 | the auto recalc list and do nothing. */ |
| 382 | |
| 383 | dict_stats_recalc_pool_add(table); |
| 384 | |
| 385 | } else { |
| 386 | |
| 387 | dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT); |
| 388 | } |
| 389 | |
| 390 | mutex_enter(&dict_sys->mutex); |
| 391 | |
| 392 | table->stats_bg_flag = BG_STAT_NONE; |
| 393 | |
| 394 | dict_table_close(table, TRUE, FALSE); |
| 395 | |
| 396 | mutex_exit(&dict_sys->mutex); |
| 397 | } |
| 398 | |
| 399 | #ifdef UNIV_DEBUG |
| 400 | /** Disables dict stats thread. It's used by: |
| 401 | SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0). |
| 402 | @param[in] save immediate result from check function */ |
| 403 | void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*, |
| 404 | const void* save) |
| 405 | { |
| 406 | /* This method is protected by mutex, as every SET GLOBAL .. */ |
| 407 | ut_ad(dict_stats_disabled_event != NULL); |
| 408 | |
| 409 | const bool disable = *static_cast<const my_bool*>(save); |
| 410 | |
| 411 | const int64_t sig_count = os_event_reset(dict_stats_disabled_event); |
| 412 | |
| 413 | innodb_dict_stats_disabled_debug = disable; |
| 414 | |
| 415 | if (disable) { |
| 416 | os_event_set(dict_stats_event); |
| 417 | os_event_wait_low(dict_stats_disabled_event, sig_count); |
| 418 | } |
| 419 | } |
| 420 | #endif /* UNIV_DEBUG */ |
| 421 | |
| 422 | |
| 423 | /*****************************************************************//** |
| 424 | This is the thread for background stats gathering. It pops tables, from |
| 425 | the auto recalc list and proceeds them, eventually recalculating their |
| 426 | statistics. |
| 427 | @return this function does not return, it calls os_thread_exit() */ |
| 428 | extern "C" |
| 429 | os_thread_ret_t |
| 430 | DECLARE_THREAD(dict_stats_thread)(void*) |
| 431 | { |
| 432 | my_thread_init(); |
| 433 | ut_a(!srv_read_only_mode); |
| 434 | |
| 435 | #ifdef UNIV_PFS_THREAD |
| 436 | /* JAN: TODO: MySQL 5.7 PSI |
| 437 | pfs_register_thread(dict_stats_thread_key); |
| 438 | */ |
| 439 | #endif /* UNIV_PFS_THREAD */ |
| 440 | |
| 441 | while (!dict_stats_start_shutdown) { |
| 442 | |
| 443 | /* Wake up periodically even if not signaled. This is |
| 444 | because we may lose an event - if the below call to |
| 445 | dict_stats_process_entry_from_recalc_pool() puts the entry back |
| 446 | in the list, the os_event_set() will be lost by the subsequent |
| 447 | os_event_reset(). */ |
| 448 | os_event_wait_time( |
| 449 | dict_stats_event, MIN_RECALC_INTERVAL * 1000000); |
| 450 | |
| 451 | #ifdef UNIV_DEBUG |
| 452 | while (innodb_dict_stats_disabled_debug) { |
| 453 | os_event_set(dict_stats_disabled_event); |
| 454 | if (dict_stats_start_shutdown) { |
| 455 | break; |
| 456 | } |
| 457 | os_event_wait_time( |
| 458 | dict_stats_event, 100000); |
| 459 | } |
| 460 | #endif /* UNIV_DEBUG */ |
| 461 | |
| 462 | if (dict_stats_start_shutdown) { |
| 463 | break; |
| 464 | } |
| 465 | |
| 466 | dict_stats_process_entry_from_recalc_pool(); |
| 467 | dict_defrag_process_entries_from_defrag_pool(); |
| 468 | |
| 469 | os_event_reset(dict_stats_event); |
| 470 | } |
| 471 | |
| 472 | srv_dict_stats_thread_active = false; |
| 473 | |
| 474 | os_event_set(dict_stats_shutdown_event); |
| 475 | my_thread_end(); |
| 476 | |
| 477 | /* We count the number of threads in os_thread_exit(). A created |
| 478 | thread should always use that to exit instead of return(). */ |
| 479 | os_thread_exit(); |
| 480 | |
| 481 | OS_THREAD_DUMMY_RETURN; |
| 482 | } |
| 483 | |
| 484 | /** Shut down the dict_stats_thread. */ |
| 485 | void |
| 486 | dict_stats_shutdown() |
| 487 | { |
| 488 | dict_stats_start_shutdown = true; |
| 489 | os_event_set(dict_stats_event); |
| 490 | os_event_wait(dict_stats_shutdown_event); |
| 491 | } |
| 492 | |