| 1 | /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. |
| 2 | Copyright (c) 2014, SkySQL Ab. |
| 3 | Copyright (c) 2016, MariaDB Corporation |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published by |
| 7 | the Free Software Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License |
| 15 | along with this program; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
| 17 | |
| 18 | #include "mariadb.h" |
| 19 | #include "sql_parse.h" // check_one_table_access |
| 20 | // check_merge_table_access |
| 21 | // check_one_table_access |
| 22 | #include "sql_table.h" // mysql_alter_table, etc. |
| 23 | #include "sql_cmd.h" // Sql_cmd |
| 24 | #include "sql_alter.h" // Sql_cmd_alter_table |
| 25 | #include "sql_partition.h" // struct partition_info, etc. |
| 26 | #include "debug_sync.h" // DEBUG_SYNC |
| 27 | #include "sql_truncate.h" // mysql_truncate_table, |
| 28 | // Sql_cmd_truncate_table |
| 29 | #include "sql_admin.h" // Analyze/Check/.._table_statement |
| 30 | #include "sql_partition_admin.h" // Alter_table_*_partition |
| 31 | #ifdef WITH_PARTITION_STORAGE_ENGINE |
| 32 | #include "ha_partition.h" // ha_partition |
| 33 | #endif |
| 34 | #include "sql_base.h" // open_and_lock_tables |
| 35 | |
| 36 | #ifndef WITH_PARTITION_STORAGE_ENGINE |
| 37 | |
| 38 | bool Sql_cmd_partition_unsupported::execute(THD *) |
| 39 | { |
| 40 | DBUG_ENTER("Sql_cmd_partition_unsupported::execute" ); |
| 41 | /* error, partitioning support not compiled in... */ |
| 42 | my_error(ER_FEATURE_DISABLED, MYF(0), "partitioning" , |
| 43 | "--with-plugin-partition" ); |
| 44 | DBUG_RETURN(TRUE); |
| 45 | } |
| 46 | |
| 47 | #else |
| 48 | |
| 49 | bool Sql_cmd_alter_table_exchange_partition::execute(THD *thd) |
| 50 | { |
| 51 | /* Moved from mysql_execute_command */ |
| 52 | LEX *lex= thd->lex; |
| 53 | /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */ |
| 54 | SELECT_LEX *select_lex= &lex->select_lex; |
| 55 | /* first table of first SELECT_LEX */ |
| 56 | TABLE_LIST *first_table= (TABLE_LIST*) select_lex->table_list.first; |
| 57 | /* |
| 58 | Code in mysql_alter_table() may modify its HA_CREATE_INFO argument, |
| 59 | so we have to use a copy of this structure to make execution |
| 60 | prepared statement- safe. A shallow copy is enough as no memory |
| 61 | referenced from this structure will be modified. |
| 62 | @todo move these into constructor... |
| 63 | */ |
| 64 | HA_CREATE_INFO create_info(lex->create_info); |
| 65 | Alter_info alter_info(lex->alter_info, thd->mem_root); |
| 66 | ulong priv_needed= ALTER_ACL | DROP_ACL | INSERT_ACL | CREATE_ACL; |
| 67 | |
| 68 | DBUG_ENTER("Sql_cmd_alter_table_exchange_partition::execute" ); |
| 69 | |
| 70 | if (unlikely(thd->is_fatal_error)) |
| 71 | { |
| 72 | /* out of memory creating a copy of alter_info */ |
| 73 | DBUG_RETURN(TRUE); |
| 74 | } |
| 75 | |
| 76 | /* Must be set in the parser */ |
| 77 | DBUG_ASSERT(select_lex->db.str); |
| 78 | /* also check the table to be exchanged with the partition */ |
| 79 | DBUG_ASSERT(alter_info.partition_flags & ALTER_PARTITION_EXCHANGE); |
| 80 | |
| 81 | if (unlikely(check_access(thd, priv_needed, first_table->db.str, |
| 82 | &first_table->grant.privilege, |
| 83 | &first_table->grant.m_internal, |
| 84 | 0, 0)) || |
| 85 | unlikely(check_access(thd, priv_needed, first_table->next_local->db.str, |
| 86 | &first_table->next_local->grant.privilege, |
| 87 | &first_table->next_local->grant.m_internal, |
| 88 | 0, 0))) |
| 89 | DBUG_RETURN(TRUE); |
| 90 | |
| 91 | if (unlikely(check_grant(thd, priv_needed, first_table, FALSE, UINT_MAX, |
| 92 | FALSE))) |
| 93 | DBUG_RETURN(TRUE); |
| 94 | |
| 95 | /* Not allowed with EXCHANGE PARTITION */ |
| 96 | DBUG_ASSERT(!create_info.data_file_name && !create_info.index_file_name); |
| 97 | WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); |
| 98 | |
| 99 | thd->prepare_logs_for_admin_command(); |
| 100 | DBUG_RETURN(exchange_partition(thd, first_table, &alter_info)); |
| 101 | #ifdef WITH_WSREP |
| 102 | error: |
| 103 | /* handle errors in TO_ISOLATION here */ |
| 104 | DBUG_RETURN(true); |
| 105 | #endif /* WITH_WSREP */ |
| 106 | } |
| 107 | |
| 108 | |
| 109 | /** |
| 110 | @brief Checks that the tables will be able to be used for EXCHANGE PARTITION. |
| 111 | @param table Non partitioned table. |
| 112 | @param part_table Partitioned table. |
| 113 | |
| 114 | @retval FALSE if OK, otherwise error is reported and TRUE is returned. |
| 115 | */ |
| 116 | |
| 117 | static bool check_exchange_partition(TABLE *table, TABLE *part_table) |
| 118 | { |
| 119 | DBUG_ENTER("check_exchange_partition" ); |
| 120 | |
| 121 | /* Both tables must exist */ |
| 122 | if (unlikely(!part_table || !table)) |
| 123 | { |
| 124 | my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0)); |
| 125 | DBUG_RETURN(TRUE); |
| 126 | } |
| 127 | |
| 128 | /* The first table must be partitioned, and the second must not */ |
| 129 | if (unlikely(!part_table->part_info)) |
| 130 | { |
| 131 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
| 132 | DBUG_RETURN(TRUE); |
| 133 | } |
| 134 | if (unlikely(table->part_info)) |
| 135 | { |
| 136 | my_error(ER_PARTITION_EXCHANGE_PART_TABLE, MYF(0), |
| 137 | table->s->table_name.str); |
| 138 | DBUG_RETURN(TRUE); |
| 139 | } |
| 140 | |
| 141 | if (unlikely(part_table->file->ht != partition_hton)) |
| 142 | { |
| 143 | /* |
| 144 | Only allowed on partitioned tables throught the generic ha_partition |
| 145 | handler, i.e not yet for native partitioning. |
| 146 | */ |
| 147 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
| 148 | DBUG_RETURN(TRUE); |
| 149 | } |
| 150 | |
| 151 | if (unlikely(table->file->ht != part_table->part_info->default_engine_type)) |
| 152 | { |
| 153 | my_error(ER_MIX_HANDLER_ERROR, MYF(0)); |
| 154 | DBUG_RETURN(TRUE); |
| 155 | } |
| 156 | |
| 157 | /* Verify that table is not tmp table, partitioned tables cannot be tmp. */ |
| 158 | if (unlikely(table->s->tmp_table != NO_TMP_TABLE)) |
| 159 | { |
| 160 | my_error(ER_PARTITION_EXCHANGE_TEMP_TABLE, MYF(0), |
| 161 | table->s->table_name.str); |
| 162 | DBUG_RETURN(TRUE); |
| 163 | } |
| 164 | |
| 165 | /* The table cannot have foreign keys constraints or be referenced */ |
| 166 | if (unlikely(!table->file->can_switch_engines())) |
| 167 | { |
| 168 | my_error(ER_PARTITION_EXCHANGE_FOREIGN_KEY, MYF(0), |
| 169 | table->s->table_name.str); |
| 170 | DBUG_RETURN(TRUE); |
| 171 | } |
| 172 | DBUG_RETURN(FALSE); |
| 173 | } |
| 174 | |
| 175 | |
| 176 | /** |
| 177 | @brief Compare table structure/options between a non partitioned table |
| 178 | and a specific partition of a partitioned table. |
| 179 | |
| 180 | @param thd Thread object. |
| 181 | @param table Non partitioned table. |
| 182 | @param part_table Partitioned table. |
| 183 | @param part_elem Partition element to use for partition specific compare. |
| 184 | */ |
| 185 | static bool compare_table_with_partition(THD *thd, TABLE *table, |
| 186 | TABLE *part_table, |
| 187 | partition_element *part_elem, |
| 188 | uint part_id) |
| 189 | { |
| 190 | HA_CREATE_INFO table_create_info, part_create_info; |
| 191 | Alter_info part_alter_info; |
| 192 | Alter_table_ctx part_alter_ctx; // Not used |
| 193 | DBUG_ENTER("compare_table_with_partition" ); |
| 194 | |
| 195 | bool metadata_equal= false; |
| 196 | memset(&part_create_info, 0, sizeof(HA_CREATE_INFO)); |
| 197 | memset(&table_create_info, 0, sizeof(HA_CREATE_INFO)); |
| 198 | |
| 199 | update_create_info_from_table(&table_create_info, table); |
| 200 | /* get the current auto_increment value */ |
| 201 | table->file->update_create_info(&table_create_info); |
| 202 | /* mark all columns used, since they are used when preparing the new table */ |
| 203 | part_table->use_all_columns(); |
| 204 | table->use_all_columns(); |
| 205 | if (unlikely(mysql_prepare_alter_table(thd, part_table, &part_create_info, |
| 206 | &part_alter_info, &part_alter_ctx))) |
| 207 | { |
| 208 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
| 209 | DBUG_RETURN(TRUE); |
| 210 | } |
| 211 | /* db_type is not set in prepare_alter_table */ |
| 212 | part_create_info.db_type= part_table->part_info->default_engine_type; |
| 213 | ((ha_partition*)(part_table->file))->update_part_create_info(&part_create_info, part_id); |
| 214 | /* |
| 215 | Since we exchange the partition with the table, allow exchanging |
| 216 | auto_increment value as well. |
| 217 | */ |
| 218 | part_create_info.auto_increment_value= |
| 219 | table_create_info.auto_increment_value; |
| 220 | |
| 221 | /* Check compatible row_types and set create_info accordingly. */ |
| 222 | { |
| 223 | enum row_type part_row_type= part_table->file->get_row_type(); |
| 224 | enum row_type table_row_type= table->file->get_row_type(); |
| 225 | if (part_row_type != table_row_type) |
| 226 | { |
| 227 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
| 228 | "ROW_FORMAT" ); |
| 229 | DBUG_RETURN(true); |
| 230 | } |
| 231 | part_create_info.row_type= table->s->row_type; |
| 232 | } |
| 233 | |
| 234 | /* |
| 235 | NOTE: ha_blackhole does not support check_if_compatible_data, |
| 236 | so this always fail for blackhole tables. |
| 237 | ha_myisam compares pointers to verify that DATA/INDEX DIRECTORY is |
| 238 | the same, so any table using data/index_file_name will fail. |
| 239 | */ |
| 240 | if (mysql_compare_tables(table, &part_alter_info, &part_create_info, |
| 241 | &metadata_equal)) |
| 242 | { |
| 243 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
| 244 | DBUG_RETURN(TRUE); |
| 245 | } |
| 246 | |
| 247 | DEBUG_SYNC(thd, "swap_partition_after_compare_tables" ); |
| 248 | if (!metadata_equal) |
| 249 | { |
| 250 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
| 251 | DBUG_RETURN(TRUE); |
| 252 | } |
| 253 | DBUG_ASSERT(table->s->db_create_options == |
| 254 | part_table->s->db_create_options); |
| 255 | DBUG_ASSERT(table->s->db_options_in_use == |
| 256 | part_table->s->db_options_in_use); |
| 257 | |
| 258 | if (table_create_info.avg_row_length != part_create_info.avg_row_length) |
| 259 | { |
| 260 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
| 261 | "AVG_ROW_LENGTH" ); |
| 262 | DBUG_RETURN(TRUE); |
| 263 | } |
| 264 | |
| 265 | if (table_create_info.table_options != part_create_info.table_options) |
| 266 | { |
| 267 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
| 268 | "TABLE OPTION" ); |
| 269 | DBUG_RETURN(TRUE); |
| 270 | } |
| 271 | |
| 272 | if (table->s->table_charset != part_table->s->table_charset) |
| 273 | { |
| 274 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
| 275 | "CHARACTER SET" ); |
| 276 | DBUG_RETURN(TRUE); |
| 277 | } |
| 278 | |
| 279 | /* |
| 280 | NOTE: We do not support update of frm-file, i.e. change |
| 281 | max/min_rows, data/index_file_name etc. |
| 282 | The workaround is to use REORGANIZE PARTITION to rewrite |
| 283 | the frm file and then use EXCHANGE PARTITION when they are the same. |
| 284 | */ |
| 285 | if (compare_partition_options(&table_create_info, part_elem)) |
| 286 | DBUG_RETURN(TRUE); |
| 287 | |
| 288 | DBUG_RETURN(FALSE); |
| 289 | } |
| 290 | |
| 291 | |
| 292 | /** |
| 293 | @brief Exchange partition/table with ddl log. |
| 294 | |
| 295 | @details How to handle a crash in the middle of the rename (break on error): |
| 296 | 1) register in ddl_log that we are going to exchange swap_table with part. |
| 297 | 2) do the first rename (swap_table -> tmp-name) and sync the ddl_log. |
| 298 | 3) do the second rename (part -> swap_table) and sync the ddl_log. |
| 299 | 4) do the last rename (tmp-name -> part). |
| 300 | 5) mark the entry done. |
| 301 | |
| 302 | Recover by: |
| 303 | 5) is done, All completed. Nothing to recover. |
| 304 | 4) is done see 3). (No mark or sync in the ddl_log...) |
| 305 | 3) is done -> try rename part -> tmp-name (ignore failure) goto 2). |
| 306 | 2) is done -> try rename swap_table -> part (ignore failure) goto 1). |
| 307 | 1) is done -> try rename tmp-name -> swap_table (ignore failure). |
| 308 | before 1) Nothing to recover... |
| 309 | |
| 310 | @param thd Thread handle |
| 311 | @param name name of table/partition 1 (to be exchanged with 2) |
| 312 | @param from_name name of table/partition 2 (to be exchanged with 1) |
| 313 | @param tmp_name temporary name to use while exchaning |
| 314 | @param ht handlerton of the table/partitions |
| 315 | |
| 316 | @return Operation status |
| 317 | @retval TRUE Error |
| 318 | @retval FALSE Success |
| 319 | |
| 320 | @note ha_heap always succeeds in rename (since it is created upon usage). |
| 321 | This is OK when to recover from a crash since all heap are empty and the |
| 322 | recover is done early in the startup of the server (right before |
| 323 | read_init_file which can populate the tables). |
| 324 | |
| 325 | And if no crash we can trust the syncs in the ddl_log. |
| 326 | |
| 327 | What about if the rename is put into a background thread? That will cause |
| 328 | corruption and is avoided by the exlusive metadata lock. |
| 329 | */ |
| 330 | static bool exchange_name_with_ddl_log(THD *thd, |
| 331 | const char *name, |
| 332 | const char *from_name, |
| 333 | const char *tmp_name, |
| 334 | handlerton *ht) |
| 335 | { |
| 336 | DDL_LOG_ENTRY exchange_entry; |
| 337 | DDL_LOG_MEMORY_ENTRY *log_entry= NULL; |
| 338 | DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL; |
| 339 | bool error= TRUE; |
| 340 | bool error_set= FALSE; |
| 341 | handler *file= NULL; |
| 342 | DBUG_ENTER("exchange_name_with_ddl_log" ); |
| 343 | |
| 344 | if (unlikely(!(file= get_new_handler(NULL, thd->mem_root, ht)))) |
| 345 | DBUG_RETURN(TRUE); |
| 346 | |
| 347 | /* prepare the action entry */ |
| 348 | exchange_entry.entry_type= DDL_LOG_ENTRY_CODE; |
| 349 | exchange_entry.action_type= DDL_LOG_EXCHANGE_ACTION; |
| 350 | exchange_entry.next_entry= 0; |
| 351 | exchange_entry.name= name; |
| 352 | exchange_entry.from_name= from_name; |
| 353 | exchange_entry.tmp_name= tmp_name; |
| 354 | exchange_entry.handler_name= ha_resolve_storage_engine_name(ht); |
| 355 | exchange_entry.phase= EXCH_PHASE_NAME_TO_TEMP; |
| 356 | |
| 357 | mysql_mutex_lock(&LOCK_gdl); |
| 358 | /* |
| 359 | write to the ddl log what to do by: |
| 360 | 1) write the action entry (i.e. which names to be exchanged) |
| 361 | 2) write the execution entry with a link to the action entry |
| 362 | */ |
| 363 | DBUG_EXECUTE_IF("exchange_partition_fail_1" , goto err_no_action_written;); |
| 364 | DBUG_EXECUTE_IF("exchange_partition_abort_1" , DBUG_SUICIDE();); |
| 365 | if (unlikely(write_ddl_log_entry(&exchange_entry, &log_entry))) |
| 366 | goto err_no_action_written; |
| 367 | |
| 368 | DBUG_EXECUTE_IF("exchange_partition_fail_2" , goto err_no_execute_written;); |
| 369 | DBUG_EXECUTE_IF("exchange_partition_abort_2" , DBUG_SUICIDE();); |
| 370 | if (unlikely(write_execute_ddl_log_entry(log_entry->entry_pos, FALSE, |
| 371 | &exec_log_entry))) |
| 372 | goto err_no_execute_written; |
| 373 | /* ddl_log is written and synced */ |
| 374 | |
| 375 | mysql_mutex_unlock(&LOCK_gdl); |
| 376 | /* |
| 377 | Execute the name exchange. |
| 378 | Do one rename, increase the phase, update the action entry and sync. |
| 379 | In case of errors in the ddl_log we must fail and let the ddl_log try |
| 380 | to revert the changes, since otherwise it could revert the command after |
| 381 | we sent OK to the client. |
| 382 | */ |
| 383 | /* call rename table from table to tmp-name */ |
| 384 | DBUG_EXECUTE_IF("exchange_partition_fail_3" , |
| 385 | my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, 0); |
| 386 | error_set= TRUE; |
| 387 | goto err_rename;); |
| 388 | DBUG_EXECUTE_IF("exchange_partition_abort_3" , DBUG_SUICIDE();); |
| 389 | if (unlikely(file->ha_rename_table(name, tmp_name))) |
| 390 | { |
| 391 | my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, my_errno); |
| 392 | error_set= TRUE; |
| 393 | goto err_rename; |
| 394 | } |
| 395 | DBUG_EXECUTE_IF("exchange_partition_fail_4" , goto err_rename;); |
| 396 | DBUG_EXECUTE_IF("exchange_partition_abort_4" , DBUG_SUICIDE();); |
| 397 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
| 398 | goto err_rename; |
| 399 | |
| 400 | /* call rename table from partition to table */ |
| 401 | DBUG_EXECUTE_IF("exchange_partition_fail_5" , |
| 402 | my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, 0); |
| 403 | error_set= TRUE; |
| 404 | goto err_rename;); |
| 405 | DBUG_EXECUTE_IF("exchange_partition_abort_5" , DBUG_SUICIDE();); |
| 406 | if (unlikely(file->ha_rename_table(from_name, name))) |
| 407 | { |
| 408 | my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, my_errno); |
| 409 | error_set= TRUE; |
| 410 | goto err_rename; |
| 411 | } |
| 412 | DBUG_EXECUTE_IF("exchange_partition_fail_6" , goto err_rename;); |
| 413 | DBUG_EXECUTE_IF("exchange_partition_abort_6" , DBUG_SUICIDE();); |
| 414 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
| 415 | goto err_rename; |
| 416 | |
| 417 | /* call rename table from tmp-nam to partition */ |
| 418 | DBUG_EXECUTE_IF("exchange_partition_fail_7" , |
| 419 | my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, 0); |
| 420 | error_set= TRUE; |
| 421 | goto err_rename;); |
| 422 | DBUG_EXECUTE_IF("exchange_partition_abort_7" , DBUG_SUICIDE();); |
| 423 | if (unlikely(file->ha_rename_table(tmp_name, from_name))) |
| 424 | { |
| 425 | my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, my_errno); |
| 426 | error_set= TRUE; |
| 427 | goto err_rename; |
| 428 | } |
| 429 | DBUG_EXECUTE_IF("exchange_partition_fail_8" , goto err_rename;); |
| 430 | DBUG_EXECUTE_IF("exchange_partition_abort_8" , DBUG_SUICIDE();); |
| 431 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
| 432 | goto err_rename; |
| 433 | |
| 434 | /* The exchange is complete and ddl_log is deactivated */ |
| 435 | DBUG_EXECUTE_IF("exchange_partition_fail_9" , goto err_rename;); |
| 436 | DBUG_EXECUTE_IF("exchange_partition_abort_9" , DBUG_SUICIDE();); |
| 437 | /* all OK */ |
| 438 | error= FALSE; |
| 439 | delete file; |
| 440 | DBUG_RETURN(error); |
| 441 | err_rename: |
| 442 | /* |
| 443 | Nothing to do if any of these commands fails :( the commands itselfs |
| 444 | will log to the error log about the failures... |
| 445 | */ |
| 446 | /* execute the ddl log entry to revert the renames */ |
| 447 | (void) execute_ddl_log_entry(current_thd, log_entry->entry_pos); |
| 448 | mysql_mutex_lock(&LOCK_gdl); |
| 449 | /* mark the execute log entry done */ |
| 450 | (void) write_execute_ddl_log_entry(0, TRUE, &exec_log_entry); |
| 451 | /* release the execute log entry */ |
| 452 | (void) release_ddl_log_memory_entry(exec_log_entry); |
| 453 | err_no_execute_written: |
| 454 | /* release the action log entry */ |
| 455 | (void) release_ddl_log_memory_entry(log_entry); |
| 456 | err_no_action_written: |
| 457 | mysql_mutex_unlock(&LOCK_gdl); |
| 458 | delete file; |
| 459 | if (!error_set) |
| 460 | my_error(ER_DDL_LOG_ERROR, MYF(0)); |
| 461 | DBUG_RETURN(error); |
| 462 | } |
| 463 | |
| 464 | |
| 465 | /** |
| 466 | @brief Swap places between a partition and a table. |
| 467 | |
| 468 | @details Verify that the tables are compatible (same engine, definition etc), |
| 469 | verify that all rows in the table will fit in the partition, |
| 470 | if all OK, rename table to tmp name, rename partition to table |
| 471 | and finally rename tmp name to partition. |
| 472 | |
| 473 | 1) Take upgradable mdl, open tables and then lock them (inited in parse) |
| 474 | 2) Verify that metadata matches |
| 475 | 3) verify data |
| 476 | 4) Upgrade to exclusive mdl for both tables |
| 477 | 5) Rename table <-> partition |
| 478 | 6) Rely on close_thread_tables to release mdl and table locks |
| 479 | |
| 480 | @param thd Thread handle |
| 481 | @param table_list Table where the partition exists as first table, |
| 482 | Table to swap with the partition as second table |
| 483 | @param alter_info Contains partition name to swap |
| 484 | |
| 485 | @note This is a DDL operation so triggers will not be used. |
| 486 | */ |
| 487 | bool Sql_cmd_alter_table_exchange_partition:: |
| 488 | exchange_partition(THD *thd, TABLE_LIST *table_list, Alter_info *alter_info) |
| 489 | { |
| 490 | TABLE *part_table, *swap_table; |
| 491 | TABLE_LIST *swap_table_list; |
| 492 | handlerton *table_hton; |
| 493 | partition_element *part_elem; |
| 494 | const char *partition_name; |
| 495 | char temp_name[FN_REFLEN+1]; |
| 496 | char part_file_name[2*FN_REFLEN+1]; |
| 497 | char swap_file_name[FN_REFLEN+1]; |
| 498 | char temp_file_name[FN_REFLEN+1]; |
| 499 | uint swap_part_id; |
| 500 | uint part_file_name_len; |
| 501 | Alter_table_prelocking_strategy alter_prelocking_strategy; |
| 502 | MDL_ticket *swap_table_mdl_ticket= NULL; |
| 503 | MDL_ticket *part_table_mdl_ticket= NULL; |
| 504 | uint table_counter; |
| 505 | bool error= TRUE; |
| 506 | DBUG_ENTER("mysql_exchange_partition" ); |
| 507 | DBUG_ASSERT(alter_info->partition_flags & ALTER_PARTITION_EXCHANGE); |
| 508 | |
| 509 | /* Don't allow to exchange with log table */ |
| 510 | swap_table_list= table_list->next_local; |
| 511 | if (check_if_log_table(swap_table_list, FALSE, "ALTER PARTITION" )) |
| 512 | DBUG_RETURN(TRUE); |
| 513 | |
| 514 | /* |
| 515 | Currently no MDL lock that allows both read and write and is upgradeable |
| 516 | to exclusive, so leave the lock type to TL_WRITE_ALLOW_READ also on the |
| 517 | partitioned table. |
| 518 | |
| 519 | TODO: add MDL lock that allows both read and write and is upgradable to |
| 520 | exclusive lock. This would allow to continue using the partitioned table |
| 521 | also with update/insert/delete while the verification of the swap table |
| 522 | is running. |
| 523 | */ |
| 524 | |
| 525 | /* |
| 526 | NOTE: It is not possible to exchange a crashed partition/table since |
| 527 | we need some info from the engine, which we can only access after open, |
| 528 | to be able to verify the structure/metadata. |
| 529 | */ |
| 530 | table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); |
| 531 | if (unlikely(open_tables(thd, &table_list, &table_counter, 0, |
| 532 | &alter_prelocking_strategy))) |
| 533 | DBUG_RETURN(true); |
| 534 | |
| 535 | part_table= table_list->table; |
| 536 | swap_table= swap_table_list->table; |
| 537 | |
| 538 | if (unlikely(check_exchange_partition(swap_table, part_table))) |
| 539 | DBUG_RETURN(TRUE); |
| 540 | |
| 541 | /* set lock pruning on first table */ |
| 542 | partition_name= alter_info->partition_names.head(); |
| 543 | if (unlikely(table_list->table->part_info-> |
| 544 | set_named_partition_bitmap(partition_name, |
| 545 | strlen(partition_name)))) |
| 546 | DBUG_RETURN(true); |
| 547 | |
| 548 | if (unlikely(lock_tables(thd, table_list, table_counter, 0))) |
| 549 | DBUG_RETURN(true); |
| 550 | |
| 551 | |
| 552 | table_hton= swap_table->file->ht; |
| 553 | |
| 554 | THD_STAGE_INFO(thd, stage_verifying_table); |
| 555 | |
| 556 | /* Will append the partition name later in part_info->get_part_elem() */ |
| 557 | part_file_name_len= build_table_filename(part_file_name, |
| 558 | sizeof(part_file_name), |
| 559 | table_list->db.str, |
| 560 | table_list->table_name.str, |
| 561 | "" , 0); |
| 562 | build_table_filename(swap_file_name, |
| 563 | sizeof(swap_file_name), |
| 564 | swap_table_list->db.str, |
| 565 | swap_table_list->table_name.str, |
| 566 | "" , 0); |
| 567 | /* create a unique temp name #sqlx-nnnn_nnnn, x for eXchange */ |
| 568 | my_snprintf(temp_name, sizeof(temp_name), "%sx-%lx_%llx" , |
| 569 | tmp_file_prefix, current_pid, thd->thread_id); |
| 570 | if (lower_case_table_names) |
| 571 | my_casedn_str(files_charset_info, temp_name); |
| 572 | build_table_filename(temp_file_name, sizeof(temp_file_name), |
| 573 | table_list->next_local->db.str, |
| 574 | temp_name, "" , FN_IS_TMP); |
| 575 | |
| 576 | if (unlikely(!(part_elem= |
| 577 | part_table->part_info->get_part_elem(partition_name, |
| 578 | part_file_name + |
| 579 | part_file_name_len, |
| 580 | sizeof(part_file_name) - |
| 581 | part_file_name_len, |
| 582 | &swap_part_id)))) |
| 583 | { |
| 584 | DBUG_RETURN(TRUE); |
| 585 | } |
| 586 | |
| 587 | if (unlikely(swap_part_id == NOT_A_PARTITION_ID)) |
| 588 | { |
| 589 | DBUG_ASSERT(part_table->part_info->is_sub_partitioned()); |
| 590 | my_error(ER_PARTITION_INSTEAD_OF_SUBPARTITION, MYF(0)); |
| 591 | DBUG_RETURN(TRUE); |
| 592 | } |
| 593 | |
| 594 | if (unlikely(compare_table_with_partition(thd, swap_table, part_table, |
| 595 | part_elem, |
| 596 | swap_part_id))) |
| 597 | DBUG_RETURN(TRUE); |
| 598 | |
| 599 | /* Table and partition has same structure/options, OK to exchange */ |
| 600 | |
| 601 | thd_proc_info(thd, "Verifying data with partition" ); |
| 602 | |
| 603 | if (unlikely(verify_data_with_partition(swap_table, part_table, |
| 604 | swap_part_id))) |
| 605 | DBUG_RETURN(TRUE); |
| 606 | |
| 607 | /* |
| 608 | Get exclusive mdl lock on both tables, alway the non partitioned table |
| 609 | first. Remember the tickets for downgrading locks later. |
| 610 | */ |
| 611 | swap_table_mdl_ticket= swap_table->mdl_ticket; |
| 612 | part_table_mdl_ticket= part_table->mdl_ticket; |
| 613 | |
| 614 | /* |
| 615 | No need to set used_partitions to only propagate |
| 616 | HA_EXTRA_PREPARE_FOR_RENAME to one part since no built in engine uses |
| 617 | that flag. And the action would probably be to force close all other |
| 618 | instances which is what we are doing any way. |
| 619 | */ |
| 620 | if (wait_while_table_is_used(thd, swap_table, HA_EXTRA_PREPARE_FOR_RENAME) || |
| 621 | wait_while_table_is_used(thd, part_table, HA_EXTRA_PREPARE_FOR_RENAME)) |
| 622 | goto err; |
| 623 | |
| 624 | DEBUG_SYNC(thd, "swap_partition_after_wait" ); |
| 625 | |
| 626 | close_all_tables_for_name(thd, swap_table->s, HA_EXTRA_NOT_USED, NULL); |
| 627 | close_all_tables_for_name(thd, part_table->s, HA_EXTRA_NOT_USED, NULL); |
| 628 | |
| 629 | DEBUG_SYNC(thd, "swap_partition_before_rename" ); |
| 630 | |
| 631 | if (unlikely(exchange_name_with_ddl_log(thd, swap_file_name, part_file_name, |
| 632 | temp_file_name, table_hton))) |
| 633 | goto err; |
| 634 | |
| 635 | /* |
| 636 | Reopen tables under LOCK TABLES. Ignore the return value for now. It's |
| 637 | better to keep master/slave in consistent state. Alternative would be to |
| 638 | try to revert the exchange operation and issue error. |
| 639 | */ |
| 640 | (void) thd->locked_tables_list.reopen_tables(thd, false); |
| 641 | |
| 642 | if (unlikely((error= write_bin_log(thd, TRUE, thd->query(), |
| 643 | thd->query_length())))) |
| 644 | { |
| 645 | /* |
| 646 | The error is reported in write_bin_log(). |
| 647 | We try to revert to make it easier to keep the master/slave in sync. |
| 648 | */ |
| 649 | (void) exchange_name_with_ddl_log(thd, part_file_name, swap_file_name, |
| 650 | temp_file_name, table_hton); |
| 651 | } |
| 652 | |
| 653 | err: |
| 654 | if (thd->locked_tables_mode) |
| 655 | { |
| 656 | if (swap_table_mdl_ticket) |
| 657 | swap_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
| 658 | if (part_table_mdl_ticket) |
| 659 | part_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
| 660 | } |
| 661 | |
| 662 | if (unlikely(!error)) |
| 663 | my_ok(thd); |
| 664 | |
| 665 | // For query cache |
| 666 | table_list->table= NULL; |
| 667 | table_list->next_local->table= NULL; |
| 668 | query_cache_invalidate3(thd, table_list, FALSE); |
| 669 | |
| 670 | DBUG_RETURN(error); |
| 671 | } |
| 672 | |
| 673 | bool Sql_cmd_alter_table_analyze_partition::execute(THD *thd) |
| 674 | { |
| 675 | bool res; |
| 676 | DBUG_ENTER("Sql_cmd_alter_table_analyze_partition::execute" ); |
| 677 | |
| 678 | /* |
| 679 | Flag that it is an ALTER command which administrates partitions, used |
| 680 | by ha_partition |
| 681 | */ |
| 682 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
| 683 | |
| 684 | res= Sql_cmd_analyze_table::execute(thd); |
| 685 | |
| 686 | DBUG_RETURN(res); |
| 687 | } |
| 688 | |
| 689 | |
| 690 | bool Sql_cmd_alter_table_check_partition::execute(THD *thd) |
| 691 | { |
| 692 | bool res; |
| 693 | DBUG_ENTER("Sql_cmd_alter_table_check_partition::execute" ); |
| 694 | |
| 695 | /* |
| 696 | Flag that it is an ALTER command which administrates partitions, used |
| 697 | by ha_partition |
| 698 | */ |
| 699 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
| 700 | |
| 701 | res= Sql_cmd_check_table::execute(thd); |
| 702 | |
| 703 | DBUG_RETURN(res); |
| 704 | } |
| 705 | |
| 706 | |
| 707 | bool Sql_cmd_alter_table_optimize_partition::execute(THD *thd) |
| 708 | { |
| 709 | bool res; |
| 710 | DBUG_ENTER("Alter_table_optimize_partition_statement::execute" ); |
| 711 | |
| 712 | /* |
| 713 | Flag that it is an ALTER command which administrates partitions, used |
| 714 | by ha_partition |
| 715 | */ |
| 716 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
| 717 | |
| 718 | res= Sql_cmd_optimize_table::execute(thd); |
| 719 | |
| 720 | DBUG_RETURN(res); |
| 721 | } |
| 722 | |
| 723 | |
| 724 | bool Sql_cmd_alter_table_repair_partition::execute(THD *thd) |
| 725 | { |
| 726 | bool res; |
| 727 | DBUG_ENTER("Sql_cmd_alter_table_repair_partition::execute" ); |
| 728 | |
| 729 | /* |
| 730 | Flag that it is an ALTER command which administrates partitions, used |
| 731 | by ha_partition |
| 732 | */ |
| 733 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
| 734 | |
| 735 | res= Sql_cmd_repair_table::execute(thd); |
| 736 | |
| 737 | DBUG_RETURN(res); |
| 738 | } |
| 739 | |
| 740 | |
| 741 | bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) |
| 742 | { |
| 743 | int error; |
| 744 | ha_partition *partition; |
| 745 | ulong timeout= thd->variables.lock_wait_timeout; |
| 746 | TABLE_LIST *first_table= thd->lex->select_lex.table_list.first; |
| 747 | Alter_info *alter_info= &thd->lex->alter_info; |
| 748 | uint table_counter, i; |
| 749 | List<String> partition_names_list; |
| 750 | bool binlog_stmt; |
| 751 | DBUG_ENTER("Sql_cmd_alter_table_truncate_partition::execute" ); |
| 752 | |
| 753 | /* |
| 754 | Flag that it is an ALTER command which administrates partitions, used |
| 755 | by ha_partition. |
| 756 | */ |
| 757 | thd->lex->alter_info.partition_flags|= (ALTER_PARTITION_ADMIN | |
| 758 | ALTER_PARTITION_TRUNCATE); |
| 759 | |
| 760 | /* Fix the lock types (not the same as ordinary ALTER TABLE). */ |
| 761 | first_table->lock_type= TL_WRITE; |
| 762 | first_table->mdl_request.set_type(MDL_EXCLUSIVE); |
| 763 | |
| 764 | /* |
| 765 | Check table permissions and open it with a exclusive lock. |
| 766 | Ensure it is a partitioned table and finally, upcast the |
| 767 | handler and invoke the partition truncate method. Lastly, |
| 768 | write the statement to the binary log if necessary. |
| 769 | */ |
| 770 | |
| 771 | if (check_one_table_access(thd, DROP_ACL, first_table)) |
| 772 | DBUG_RETURN(TRUE); |
| 773 | |
| 774 | #ifdef WITH_WSREP |
| 775 | if (WSREP(thd) && |
| 776 | (!thd->is_current_stmt_binlog_format_row() || |
| 777 | !thd->find_temporary_table(first_table)) && |
| 778 | wsrep_to_isolation_begin( |
| 779 | thd, first_table->db.str, first_table->table_name.str, NULL) |
| 780 | ) |
| 781 | { |
| 782 | WSREP_WARN("ALTER TABLE TRUNCATE PARTITION isolation failure" ); |
| 783 | DBUG_RETURN(TRUE); |
| 784 | } |
| 785 | #endif /* WITH_WSREP */ |
| 786 | |
| 787 | if (open_tables(thd, &first_table, &table_counter, 0)) |
| 788 | DBUG_RETURN(true); |
| 789 | |
| 790 | if (!first_table->table || first_table->view || |
| 791 | first_table->table->s->db_type() != partition_hton) |
| 792 | { |
| 793 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
| 794 | DBUG_RETURN(TRUE); |
| 795 | } |
| 796 | |
| 797 | |
| 798 | /* |
| 799 | Prune all, but named partitions, |
| 800 | to avoid excessive calls to external_lock(). |
| 801 | */ |
| 802 | List_iterator<const char> partition_names_it(alter_info->partition_names); |
| 803 | uint num_names= alter_info->partition_names.elements; |
| 804 | for (i= 0; i < num_names; i++) |
| 805 | { |
| 806 | const char *partition_name= partition_names_it++; |
| 807 | String *str_partition_name= new (thd->mem_root) |
| 808 | String(partition_name, system_charset_info); |
| 809 | if (!str_partition_name) |
| 810 | DBUG_RETURN(true); |
| 811 | partition_names_list.push_back(str_partition_name, thd->mem_root); |
| 812 | } |
| 813 | if (first_table->table-> |
| 814 | part_info->set_partition_bitmaps(&partition_names_list)) |
| 815 | DBUG_RETURN(true); |
| 816 | |
| 817 | if (lock_tables(thd, first_table, table_counter, 0)) |
| 818 | DBUG_RETURN(true); |
| 819 | |
| 820 | /* |
| 821 | Under locked table modes this might still not be an exclusive |
| 822 | lock. Hence, upgrade the lock since the handler truncate method |
| 823 | mandates an exclusive metadata lock. |
| 824 | */ |
| 825 | MDL_ticket *ticket= first_table->table->mdl_ticket; |
| 826 | if (thd->mdl_context.upgrade_shared_lock(ticket, MDL_EXCLUSIVE, timeout)) |
| 827 | DBUG_RETURN(TRUE); |
| 828 | |
| 829 | tdc_remove_table(thd, TDC_RT_REMOVE_NOT_OWN, first_table->db.str, |
| 830 | first_table->table_name.str, FALSE); |
| 831 | |
| 832 | partition= (ha_partition*) first_table->table->file; |
| 833 | /* Invoke the handler method responsible for truncating the partition. */ |
| 834 | if (unlikely(error= partition->truncate_partition(alter_info, |
| 835 | &binlog_stmt))) |
| 836 | partition->print_error(error, MYF(0)); |
| 837 | |
| 838 | /* |
| 839 | All effects of a truncate operation are committed even if the |
| 840 | operation fails. Thus, the query must be written to the binary |
| 841 | log. The exception is a unimplemented truncate method or failure |
| 842 | before any call to handler::truncate() is done. |
| 843 | Also, it is logged in statement format, regardless of the binlog format. |
| 844 | |
| 845 | Since we've changed data within the table, we also have to invalidate |
| 846 | the query cache for it. |
| 847 | */ |
| 848 | if (likely(error != HA_ERR_WRONG_COMMAND)) |
| 849 | { |
| 850 | query_cache_invalidate3(thd, first_table, FALSE); |
| 851 | if (binlog_stmt) |
| 852 | error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); |
| 853 | } |
| 854 | |
| 855 | /* |
| 856 | A locked table ticket was upgraded to a exclusive lock. After the |
| 857 | the query has been written to the binary log, downgrade the lock |
| 858 | to a shared one. |
| 859 | */ |
| 860 | if (thd->locked_tables_mode) |
| 861 | ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
| 862 | |
| 863 | if (likely(!error)) |
| 864 | my_ok(thd); |
| 865 | |
| 866 | // Invalidate query cache |
| 867 | DBUG_ASSERT(!first_table->next_local); |
| 868 | query_cache_invalidate3(thd, first_table, FALSE); |
| 869 | |
| 870 | DBUG_RETURN(error); |
| 871 | } |
| 872 | |
| 873 | #endif /* WITH_PARTITION_STORAGE_ENGINE */ |
| 874 | |