| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2016, 2018, MariaDB Corporation. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify it under |
| 6 | the terms of the GNU General Public License as published by the Free Software |
| 7 | Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License along with |
| 14 | this program; if not, write to the Free Software Foundation, Inc., |
| 15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 16 | |
| 17 | *****************************************************************************/ |
| 18 | |
| 19 | /**************************************************//** |
| 20 | @file dict/dict0defrag_bg.cc |
| 21 | Defragmentation routines. |
| 22 | |
| 23 | Created 25/08/2016 Jan Lindström |
| 24 | *******************************************************/ |
| 25 | |
| 26 | #include "dict0dict.h" |
| 27 | #include "dict0stats.h" |
| 28 | #include "dict0stats_bg.h" |
| 29 | #include "dict0defrag_bg.h" |
| 30 | #include "row0mysql.h" |
| 31 | #include "srv0start.h" |
| 32 | #include "ut0new.h" |
| 33 | |
| 34 | #include <vector> |
| 35 | |
| 36 | static ib_mutex_t defrag_pool_mutex; |
| 37 | |
| 38 | #ifdef MYSQL_PFS |
| 39 | static mysql_pfs_key_t defrag_pool_mutex_key; |
| 40 | #endif |
| 41 | |
| 42 | /** Indices whose defrag stats need to be saved to persistent storage.*/ |
| 43 | struct defrag_pool_item_t { |
| 44 | table_id_t table_id; |
| 45 | index_id_t index_id; |
| 46 | }; |
| 47 | |
| 48 | /** Allocator type, used by std::vector */ |
| 49 | typedef ut_allocator<defrag_pool_item_t> |
| 50 | defrag_pool_allocator_t; |
| 51 | |
| 52 | /** The multitude of tables to be defragmented- an STL vector */ |
| 53 | typedef std::vector<defrag_pool_item_t, defrag_pool_allocator_t> |
| 54 | defrag_pool_t; |
| 55 | |
| 56 | /** Iterator type for iterating over the elements of objects of type |
| 57 | defrag_pool_t. */ |
| 58 | typedef defrag_pool_t::iterator defrag_pool_iterator_t; |
| 59 | |
| 60 | /** Pool where we store information on which tables are to be processed |
| 61 | by background defragmentation. */ |
| 62 | static defrag_pool_t* defrag_pool; |
| 63 | |
| 64 | extern bool dict_stats_start_shutdown; |
| 65 | |
| 66 | /*****************************************************************//** |
| 67 | Initialize the defrag pool, called once during thread initialization. */ |
| 68 | void |
| 69 | dict_defrag_pool_init(void) |
| 70 | /*=======================*/ |
| 71 | { |
| 72 | ut_ad(!srv_read_only_mode); |
| 73 | /* JAN: TODO: MySQL 5.7 PSI |
| 74 | const PSI_memory_key key2 = mem_key_dict_defrag_pool_t; |
| 75 | |
| 76 | defrag_pool = UT_NEW(defrag_pool_t(defrag_pool_allocator_t(key2)), key2); |
| 77 | |
| 78 | recalc_pool->reserve(RECALC_POOL_INITIAL_SLOTS); |
| 79 | */ |
| 80 | defrag_pool = new std::vector<defrag_pool_item_t, defrag_pool_allocator_t>(); |
| 81 | |
| 82 | /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */ |
| 83 | mutex_create(LATCH_ID_DEFRAGMENT_MUTEX, &defrag_pool_mutex); |
| 84 | } |
| 85 | |
| 86 | /*****************************************************************//** |
| 87 | Free the resources occupied by the defrag pool, called once during |
| 88 | thread de-initialization. */ |
| 89 | void |
| 90 | dict_defrag_pool_deinit(void) |
| 91 | /*=========================*/ |
| 92 | { |
| 93 | ut_ad(!srv_read_only_mode); |
| 94 | |
| 95 | defrag_pool->clear(); |
| 96 | mutex_free(&defrag_pool_mutex); |
| 97 | |
| 98 | UT_DELETE(defrag_pool); |
| 99 | } |
| 100 | |
| 101 | /*****************************************************************//** |
| 102 | Get an index from the auto defrag pool. The returned index id is removed |
| 103 | from the pool. |
| 104 | @return true if the pool was non-empty and "id" was set, false otherwise */ |
| 105 | static |
| 106 | bool |
| 107 | dict_stats_defrag_pool_get( |
| 108 | /*=======================*/ |
| 109 | table_id_t* table_id, /*!< out: table id, or unmodified if |
| 110 | list is empty */ |
| 111 | index_id_t* index_id) /*!< out: index id, or unmodified if |
| 112 | list is empty */ |
| 113 | { |
| 114 | ut_ad(!srv_read_only_mode); |
| 115 | |
| 116 | mutex_enter(&defrag_pool_mutex); |
| 117 | |
| 118 | if (defrag_pool->empty()) { |
| 119 | mutex_exit(&defrag_pool_mutex); |
| 120 | return(false); |
| 121 | } |
| 122 | |
| 123 | defrag_pool_item_t& item = defrag_pool->back(); |
| 124 | *table_id = item.table_id; |
| 125 | *index_id = item.index_id; |
| 126 | |
| 127 | defrag_pool->pop_back(); |
| 128 | |
| 129 | mutex_exit(&defrag_pool_mutex); |
| 130 | |
| 131 | return(true); |
| 132 | } |
| 133 | |
| 134 | /*****************************************************************//** |
| 135 | Add an index in a table to the defrag pool, which is processed by the |
| 136 | background stats gathering thread. Only the table id and index id are |
| 137 | added to the list, so the table can be closed after being enqueued and |
| 138 | it will be opened when needed. If the table or index does not exist later |
| 139 | (has been DROPped), then it will be removed from the pool and skipped. */ |
| 140 | void |
| 141 | dict_stats_defrag_pool_add( |
| 142 | /*=======================*/ |
| 143 | const dict_index_t* index) /*!< in: table to add */ |
| 144 | { |
| 145 | defrag_pool_item_t item; |
| 146 | |
| 147 | ut_ad(!srv_read_only_mode); |
| 148 | |
| 149 | mutex_enter(&defrag_pool_mutex); |
| 150 | |
| 151 | /* quit if already in the list */ |
| 152 | for (defrag_pool_iterator_t iter = defrag_pool->begin(); |
| 153 | iter != defrag_pool->end(); |
| 154 | ++iter) { |
| 155 | if ((*iter).table_id == index->table->id |
| 156 | && (*iter).index_id == index->id) { |
| 157 | mutex_exit(&defrag_pool_mutex); |
| 158 | return; |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | item.table_id = index->table->id; |
| 163 | item.index_id = index->id; |
| 164 | defrag_pool->push_back(item); |
| 165 | |
| 166 | mutex_exit(&defrag_pool_mutex); |
| 167 | |
| 168 | os_event_set(dict_stats_event); |
| 169 | } |
| 170 | |
| 171 | /*****************************************************************//** |
| 172 | Delete a given index from the auto defrag pool. */ |
| 173 | void |
| 174 | dict_stats_defrag_pool_del( |
| 175 | /*=======================*/ |
| 176 | const dict_table_t* table, /*!<in: if given, remove |
| 177 | all entries for the table */ |
| 178 | const dict_index_t* index) /*!< in: if given, remove this index */ |
| 179 | { |
| 180 | ut_a((table && !index) || (!table && index)); |
| 181 | ut_ad(!srv_read_only_mode); |
| 182 | ut_ad(mutex_own(&dict_sys->mutex)); |
| 183 | |
| 184 | mutex_enter(&defrag_pool_mutex); |
| 185 | |
| 186 | defrag_pool_iterator_t iter = defrag_pool->begin(); |
| 187 | while (iter != defrag_pool->end()) { |
| 188 | if ((table && (*iter).table_id == table->id) |
| 189 | || (index |
| 190 | && (*iter).table_id == index->table->id |
| 191 | && (*iter).index_id == index->id)) { |
| 192 | /* erase() invalidates the iterator */ |
| 193 | iter = defrag_pool->erase(iter); |
| 194 | if (index) |
| 195 | break; |
| 196 | } else { |
| 197 | iter++; |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | mutex_exit(&defrag_pool_mutex); |
| 202 | } |
| 203 | |
| 204 | /*****************************************************************//** |
| 205 | Get the first index that has been added for updating persistent defrag |
| 206 | stats and eventually save its stats. */ |
| 207 | static |
| 208 | void |
| 209 | dict_stats_process_entry_from_defrag_pool() |
| 210 | { |
| 211 | table_id_t table_id; |
| 212 | index_id_t index_id; |
| 213 | |
| 214 | ut_ad(!srv_read_only_mode); |
| 215 | |
| 216 | /* pop the first index from the auto defrag pool */ |
| 217 | if (!dict_stats_defrag_pool_get(&table_id, &index_id)) { |
| 218 | /* no index in defrag pool */ |
| 219 | return; |
| 220 | } |
| 221 | |
| 222 | dict_table_t* table; |
| 223 | |
| 224 | mutex_enter(&dict_sys->mutex); |
| 225 | |
| 226 | /* If the table is no longer cached, we've already lost the in |
| 227 | memory stats so there's nothing really to write to disk. */ |
| 228 | table = dict_table_open_on_id(table_id, TRUE, |
| 229 | DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); |
| 230 | |
| 231 | dict_index_t* index = table && !table->corrupted |
| 232 | ? dict_table_find_index_on_id(table, index_id) |
| 233 | : NULL; |
| 234 | |
| 235 | if (!index || index->is_corrupted()) { |
| 236 | if (table) { |
| 237 | dict_table_close(table, TRUE, FALSE); |
| 238 | } |
| 239 | mutex_exit(&dict_sys->mutex); |
| 240 | return; |
| 241 | } |
| 242 | |
| 243 | mutex_exit(&dict_sys->mutex); |
| 244 | dict_stats_save_defrag_stats(index); |
| 245 | dict_table_close(table, FALSE, FALSE); |
| 246 | } |
| 247 | |
| 248 | /*****************************************************************//** |
| 249 | Get the first index that has been added for updating persistent defrag |
| 250 | stats and eventually save its stats. */ |
| 251 | void |
| 252 | dict_defrag_process_entries_from_defrag_pool() |
| 253 | /*==========================================*/ |
| 254 | { |
| 255 | while (defrag_pool->size() && !dict_stats_start_shutdown) { |
| 256 | dict_stats_process_entry_from_defrag_pool(); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | /*********************************************************************//** |
| 261 | Save defragmentation result. |
| 262 | @return DB_SUCCESS or error code */ |
| 263 | dberr_t |
| 264 | dict_stats_save_defrag_summary( |
| 265 | /*============================*/ |
| 266 | dict_index_t* index) /*!< in: index */ |
| 267 | { |
| 268 | dberr_t ret=DB_SUCCESS; |
| 269 | lint now = (lint) ut_time(); |
| 270 | |
| 271 | if (dict_index_is_ibuf(index)) { |
| 272 | return DB_SUCCESS; |
| 273 | } |
| 274 | |
| 275 | rw_lock_x_lock(dict_operation_lock); |
| 276 | mutex_enter(&dict_sys->mutex); |
| 277 | |
| 278 | ret = dict_stats_save_index_stat(index, now, "n_pages_freed" , |
| 279 | index->stat_defrag_n_pages_freed, |
| 280 | NULL, |
| 281 | "Number of pages freed during" |
| 282 | " last defragmentation run." , |
| 283 | NULL); |
| 284 | |
| 285 | mutex_exit(&dict_sys->mutex); |
| 286 | rw_lock_x_unlock(dict_operation_lock); |
| 287 | |
| 288 | return (ret); |
| 289 | } |
| 290 | |
| 291 | /*********************************************************************//** |
| 292 | Save defragmentation stats for a given index. |
| 293 | @return DB_SUCCESS or error code */ |
| 294 | dberr_t |
| 295 | dict_stats_save_defrag_stats( |
| 296 | /*============================*/ |
| 297 | dict_index_t* index) /*!< in: index */ |
| 298 | { |
| 299 | dberr_t ret; |
| 300 | |
| 301 | if (dict_index_is_ibuf(index)) { |
| 302 | return DB_SUCCESS; |
| 303 | } |
| 304 | |
| 305 | if (!index->is_readable()) { |
| 306 | return dict_stats_report_error(index->table, true); |
| 307 | } |
| 308 | |
| 309 | lint now = (lint) ut_time(); |
| 310 | mtr_t mtr; |
| 311 | ulint n_leaf_pages; |
| 312 | ulint n_leaf_reserved; |
| 313 | mtr_start(&mtr); |
| 314 | mtr_s_lock(dict_index_get_lock(index), &mtr); |
| 315 | n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES, |
| 316 | &n_leaf_pages, &mtr); |
| 317 | mtr_commit(&mtr); |
| 318 | |
| 319 | if (n_leaf_reserved == ULINT_UNDEFINED) { |
| 320 | // The index name is different during fast index creation, |
| 321 | // so the stats won't be associated with the right index |
| 322 | // for later use. We just return without saving. |
| 323 | return DB_SUCCESS; |
| 324 | } |
| 325 | |
| 326 | rw_lock_x_lock(dict_operation_lock); |
| 327 | |
| 328 | mutex_enter(&dict_sys->mutex); |
| 329 | ret = dict_stats_save_index_stat(index, now, "n_page_split" , |
| 330 | index->stat_defrag_n_page_split, |
| 331 | NULL, |
| 332 | "Number of new page splits on leaves" |
| 333 | " since last defragmentation." , |
| 334 | NULL); |
| 335 | if (ret != DB_SUCCESS) { |
| 336 | goto end; |
| 337 | } |
| 338 | |
| 339 | ret = dict_stats_save_index_stat( |
| 340 | index, now, "n_leaf_pages_defrag" , |
| 341 | n_leaf_pages, |
| 342 | NULL, |
| 343 | "Number of leaf pages when this stat is saved to disk" , |
| 344 | NULL); |
| 345 | if (ret != DB_SUCCESS) { |
| 346 | goto end; |
| 347 | } |
| 348 | |
| 349 | ret = dict_stats_save_index_stat( |
| 350 | index, now, "n_leaf_pages_reserved" , |
| 351 | n_leaf_reserved, |
| 352 | NULL, |
| 353 | "Number of pages reserved for this index leaves when this stat " |
| 354 | "is saved to disk" , |
| 355 | NULL); |
| 356 | |
| 357 | end: |
| 358 | mutex_exit(&dict_sys->mutex); |
| 359 | rw_lock_x_unlock(dict_operation_lock); |
| 360 | |
| 361 | return (ret); |
| 362 | } |
| 363 | |