1 | /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. |
2 | Copyright (c) 2014, SkySQL Ab. |
3 | Copyright (c) 2016, MariaDB Corporation |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; version 2 of the License. |
8 | |
9 | This program is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
17 | |
18 | #include "mariadb.h" |
19 | #include "sql_parse.h" // check_one_table_access |
20 | // check_merge_table_access |
21 | // check_one_table_access |
22 | #include "sql_table.h" // mysql_alter_table, etc. |
23 | #include "sql_cmd.h" // Sql_cmd |
24 | #include "sql_alter.h" // Sql_cmd_alter_table |
25 | #include "sql_partition.h" // struct partition_info, etc. |
26 | #include "debug_sync.h" // DEBUG_SYNC |
27 | #include "sql_truncate.h" // mysql_truncate_table, |
28 | // Sql_cmd_truncate_table |
29 | #include "sql_admin.h" // Analyze/Check/.._table_statement |
30 | #include "sql_partition_admin.h" // Alter_table_*_partition |
31 | #ifdef WITH_PARTITION_STORAGE_ENGINE |
32 | #include "ha_partition.h" // ha_partition |
33 | #endif |
34 | #include "sql_base.h" // open_and_lock_tables |
35 | |
36 | #ifndef WITH_PARTITION_STORAGE_ENGINE |
37 | |
38 | bool Sql_cmd_partition_unsupported::execute(THD *) |
39 | { |
40 | DBUG_ENTER("Sql_cmd_partition_unsupported::execute" ); |
41 | /* error, partitioning support not compiled in... */ |
42 | my_error(ER_FEATURE_DISABLED, MYF(0), "partitioning" , |
43 | "--with-plugin-partition" ); |
44 | DBUG_RETURN(TRUE); |
45 | } |
46 | |
47 | #else |
48 | |
49 | bool Sql_cmd_alter_table_exchange_partition::execute(THD *thd) |
50 | { |
51 | /* Moved from mysql_execute_command */ |
52 | LEX *lex= thd->lex; |
53 | /* first SELECT_LEX (have special meaning for many of non-SELECTcommands) */ |
54 | SELECT_LEX *select_lex= &lex->select_lex; |
55 | /* first table of first SELECT_LEX */ |
56 | TABLE_LIST *first_table= (TABLE_LIST*) select_lex->table_list.first; |
57 | /* |
58 | Code in mysql_alter_table() may modify its HA_CREATE_INFO argument, |
59 | so we have to use a copy of this structure to make execution |
60 | prepared statement- safe. A shallow copy is enough as no memory |
61 | referenced from this structure will be modified. |
62 | @todo move these into constructor... |
63 | */ |
64 | HA_CREATE_INFO create_info(lex->create_info); |
65 | Alter_info alter_info(lex->alter_info, thd->mem_root); |
66 | ulong priv_needed= ALTER_ACL | DROP_ACL | INSERT_ACL | CREATE_ACL; |
67 | |
68 | DBUG_ENTER("Sql_cmd_alter_table_exchange_partition::execute" ); |
69 | |
70 | if (unlikely(thd->is_fatal_error)) |
71 | { |
72 | /* out of memory creating a copy of alter_info */ |
73 | DBUG_RETURN(TRUE); |
74 | } |
75 | |
76 | /* Must be set in the parser */ |
77 | DBUG_ASSERT(select_lex->db.str); |
78 | /* also check the table to be exchanged with the partition */ |
79 | DBUG_ASSERT(alter_info.partition_flags & ALTER_PARTITION_EXCHANGE); |
80 | |
81 | if (unlikely(check_access(thd, priv_needed, first_table->db.str, |
82 | &first_table->grant.privilege, |
83 | &first_table->grant.m_internal, |
84 | 0, 0)) || |
85 | unlikely(check_access(thd, priv_needed, first_table->next_local->db.str, |
86 | &first_table->next_local->grant.privilege, |
87 | &first_table->next_local->grant.m_internal, |
88 | 0, 0))) |
89 | DBUG_RETURN(TRUE); |
90 | |
91 | if (unlikely(check_grant(thd, priv_needed, first_table, FALSE, UINT_MAX, |
92 | FALSE))) |
93 | DBUG_RETURN(TRUE); |
94 | |
95 | /* Not allowed with EXCHANGE PARTITION */ |
96 | DBUG_ASSERT(!create_info.data_file_name && !create_info.index_file_name); |
97 | WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); |
98 | |
99 | thd->prepare_logs_for_admin_command(); |
100 | DBUG_RETURN(exchange_partition(thd, first_table, &alter_info)); |
101 | #ifdef WITH_WSREP |
102 | error: |
103 | /* handle errors in TO_ISOLATION here */ |
104 | DBUG_RETURN(true); |
105 | #endif /* WITH_WSREP */ |
106 | } |
107 | |
108 | |
109 | /** |
110 | @brief Checks that the tables will be able to be used for EXCHANGE PARTITION. |
111 | @param table Non partitioned table. |
112 | @param part_table Partitioned table. |
113 | |
114 | @retval FALSE if OK, otherwise error is reported and TRUE is returned. |
115 | */ |
116 | |
117 | static bool check_exchange_partition(TABLE *table, TABLE *part_table) |
118 | { |
119 | DBUG_ENTER("check_exchange_partition" ); |
120 | |
121 | /* Both tables must exist */ |
122 | if (unlikely(!part_table || !table)) |
123 | { |
124 | my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0)); |
125 | DBUG_RETURN(TRUE); |
126 | } |
127 | |
128 | /* The first table must be partitioned, and the second must not */ |
129 | if (unlikely(!part_table->part_info)) |
130 | { |
131 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
132 | DBUG_RETURN(TRUE); |
133 | } |
134 | if (unlikely(table->part_info)) |
135 | { |
136 | my_error(ER_PARTITION_EXCHANGE_PART_TABLE, MYF(0), |
137 | table->s->table_name.str); |
138 | DBUG_RETURN(TRUE); |
139 | } |
140 | |
141 | if (unlikely(part_table->file->ht != partition_hton)) |
142 | { |
143 | /* |
144 | Only allowed on partitioned tables throught the generic ha_partition |
145 | handler, i.e not yet for native partitioning. |
146 | */ |
147 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
148 | DBUG_RETURN(TRUE); |
149 | } |
150 | |
151 | if (unlikely(table->file->ht != part_table->part_info->default_engine_type)) |
152 | { |
153 | my_error(ER_MIX_HANDLER_ERROR, MYF(0)); |
154 | DBUG_RETURN(TRUE); |
155 | } |
156 | |
157 | /* Verify that table is not tmp table, partitioned tables cannot be tmp. */ |
158 | if (unlikely(table->s->tmp_table != NO_TMP_TABLE)) |
159 | { |
160 | my_error(ER_PARTITION_EXCHANGE_TEMP_TABLE, MYF(0), |
161 | table->s->table_name.str); |
162 | DBUG_RETURN(TRUE); |
163 | } |
164 | |
165 | /* The table cannot have foreign keys constraints or be referenced */ |
166 | if (unlikely(!table->file->can_switch_engines())) |
167 | { |
168 | my_error(ER_PARTITION_EXCHANGE_FOREIGN_KEY, MYF(0), |
169 | table->s->table_name.str); |
170 | DBUG_RETURN(TRUE); |
171 | } |
172 | DBUG_RETURN(FALSE); |
173 | } |
174 | |
175 | |
176 | /** |
177 | @brief Compare table structure/options between a non partitioned table |
178 | and a specific partition of a partitioned table. |
179 | |
180 | @param thd Thread object. |
181 | @param table Non partitioned table. |
182 | @param part_table Partitioned table. |
183 | @param part_elem Partition element to use for partition specific compare. |
184 | */ |
185 | static bool compare_table_with_partition(THD *thd, TABLE *table, |
186 | TABLE *part_table, |
187 | partition_element *part_elem, |
188 | uint part_id) |
189 | { |
190 | HA_CREATE_INFO table_create_info, part_create_info; |
191 | Alter_info part_alter_info; |
192 | Alter_table_ctx part_alter_ctx; // Not used |
193 | DBUG_ENTER("compare_table_with_partition" ); |
194 | |
195 | bool metadata_equal= false; |
196 | memset(&part_create_info, 0, sizeof(HA_CREATE_INFO)); |
197 | memset(&table_create_info, 0, sizeof(HA_CREATE_INFO)); |
198 | |
199 | update_create_info_from_table(&table_create_info, table); |
200 | /* get the current auto_increment value */ |
201 | table->file->update_create_info(&table_create_info); |
202 | /* mark all columns used, since they are used when preparing the new table */ |
203 | part_table->use_all_columns(); |
204 | table->use_all_columns(); |
205 | if (unlikely(mysql_prepare_alter_table(thd, part_table, &part_create_info, |
206 | &part_alter_info, &part_alter_ctx))) |
207 | { |
208 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
209 | DBUG_RETURN(TRUE); |
210 | } |
211 | /* db_type is not set in prepare_alter_table */ |
212 | part_create_info.db_type= part_table->part_info->default_engine_type; |
213 | ((ha_partition*)(part_table->file))->update_part_create_info(&part_create_info, part_id); |
214 | /* |
215 | Since we exchange the partition with the table, allow exchanging |
216 | auto_increment value as well. |
217 | */ |
218 | part_create_info.auto_increment_value= |
219 | table_create_info.auto_increment_value; |
220 | |
221 | /* Check compatible row_types and set create_info accordingly. */ |
222 | { |
223 | enum row_type part_row_type= part_table->file->get_row_type(); |
224 | enum row_type table_row_type= table->file->get_row_type(); |
225 | if (part_row_type != table_row_type) |
226 | { |
227 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
228 | "ROW_FORMAT" ); |
229 | DBUG_RETURN(true); |
230 | } |
231 | part_create_info.row_type= table->s->row_type; |
232 | } |
233 | |
234 | /* |
235 | NOTE: ha_blackhole does not support check_if_compatible_data, |
236 | so this always fail for blackhole tables. |
237 | ha_myisam compares pointers to verify that DATA/INDEX DIRECTORY is |
238 | the same, so any table using data/index_file_name will fail. |
239 | */ |
240 | if (mysql_compare_tables(table, &part_alter_info, &part_create_info, |
241 | &metadata_equal)) |
242 | { |
243 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
244 | DBUG_RETURN(TRUE); |
245 | } |
246 | |
247 | DEBUG_SYNC(thd, "swap_partition_after_compare_tables" ); |
248 | if (!metadata_equal) |
249 | { |
250 | my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0)); |
251 | DBUG_RETURN(TRUE); |
252 | } |
253 | DBUG_ASSERT(table->s->db_create_options == |
254 | part_table->s->db_create_options); |
255 | DBUG_ASSERT(table->s->db_options_in_use == |
256 | part_table->s->db_options_in_use); |
257 | |
258 | if (table_create_info.avg_row_length != part_create_info.avg_row_length) |
259 | { |
260 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
261 | "AVG_ROW_LENGTH" ); |
262 | DBUG_RETURN(TRUE); |
263 | } |
264 | |
265 | if (table_create_info.table_options != part_create_info.table_options) |
266 | { |
267 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
268 | "TABLE OPTION" ); |
269 | DBUG_RETURN(TRUE); |
270 | } |
271 | |
272 | if (table->s->table_charset != part_table->s->table_charset) |
273 | { |
274 | my_error(ER_PARTITION_EXCHANGE_DIFFERENT_OPTION, MYF(0), |
275 | "CHARACTER SET" ); |
276 | DBUG_RETURN(TRUE); |
277 | } |
278 | |
279 | /* |
280 | NOTE: We do not support update of frm-file, i.e. change |
281 | max/min_rows, data/index_file_name etc. |
282 | The workaround is to use REORGANIZE PARTITION to rewrite |
283 | the frm file and then use EXCHANGE PARTITION when they are the same. |
284 | */ |
285 | if (compare_partition_options(&table_create_info, part_elem)) |
286 | DBUG_RETURN(TRUE); |
287 | |
288 | DBUG_RETURN(FALSE); |
289 | } |
290 | |
291 | |
292 | /** |
293 | @brief Exchange partition/table with ddl log. |
294 | |
295 | @details How to handle a crash in the middle of the rename (break on error): |
296 | 1) register in ddl_log that we are going to exchange swap_table with part. |
297 | 2) do the first rename (swap_table -> tmp-name) and sync the ddl_log. |
298 | 3) do the second rename (part -> swap_table) and sync the ddl_log. |
299 | 4) do the last rename (tmp-name -> part). |
300 | 5) mark the entry done. |
301 | |
302 | Recover by: |
303 | 5) is done, All completed. Nothing to recover. |
304 | 4) is done see 3). (No mark or sync in the ddl_log...) |
305 | 3) is done -> try rename part -> tmp-name (ignore failure) goto 2). |
306 | 2) is done -> try rename swap_table -> part (ignore failure) goto 1). |
307 | 1) is done -> try rename tmp-name -> swap_table (ignore failure). |
308 | before 1) Nothing to recover... |
309 | |
310 | @param thd Thread handle |
311 | @param name name of table/partition 1 (to be exchanged with 2) |
312 | @param from_name name of table/partition 2 (to be exchanged with 1) |
313 | @param tmp_name temporary name to use while exchaning |
314 | @param ht handlerton of the table/partitions |
315 | |
316 | @return Operation status |
317 | @retval TRUE Error |
318 | @retval FALSE Success |
319 | |
320 | @note ha_heap always succeeds in rename (since it is created upon usage). |
321 | This is OK when to recover from a crash since all heap are empty and the |
322 | recover is done early in the startup of the server (right before |
323 | read_init_file which can populate the tables). |
324 | |
325 | And if no crash we can trust the syncs in the ddl_log. |
326 | |
327 | What about if the rename is put into a background thread? That will cause |
328 | corruption and is avoided by the exlusive metadata lock. |
329 | */ |
330 | static bool exchange_name_with_ddl_log(THD *thd, |
331 | const char *name, |
332 | const char *from_name, |
333 | const char *tmp_name, |
334 | handlerton *ht) |
335 | { |
336 | DDL_LOG_ENTRY exchange_entry; |
337 | DDL_LOG_MEMORY_ENTRY *log_entry= NULL; |
338 | DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL; |
339 | bool error= TRUE; |
340 | bool error_set= FALSE; |
341 | handler *file= NULL; |
342 | DBUG_ENTER("exchange_name_with_ddl_log" ); |
343 | |
344 | if (unlikely(!(file= get_new_handler(NULL, thd->mem_root, ht)))) |
345 | DBUG_RETURN(TRUE); |
346 | |
347 | /* prepare the action entry */ |
348 | exchange_entry.entry_type= DDL_LOG_ENTRY_CODE; |
349 | exchange_entry.action_type= DDL_LOG_EXCHANGE_ACTION; |
350 | exchange_entry.next_entry= 0; |
351 | exchange_entry.name= name; |
352 | exchange_entry.from_name= from_name; |
353 | exchange_entry.tmp_name= tmp_name; |
354 | exchange_entry.handler_name= ha_resolve_storage_engine_name(ht); |
355 | exchange_entry.phase= EXCH_PHASE_NAME_TO_TEMP; |
356 | |
357 | mysql_mutex_lock(&LOCK_gdl); |
358 | /* |
359 | write to the ddl log what to do by: |
360 | 1) write the action entry (i.e. which names to be exchanged) |
361 | 2) write the execution entry with a link to the action entry |
362 | */ |
363 | DBUG_EXECUTE_IF("exchange_partition_fail_1" , goto err_no_action_written;); |
364 | DBUG_EXECUTE_IF("exchange_partition_abort_1" , DBUG_SUICIDE();); |
365 | if (unlikely(write_ddl_log_entry(&exchange_entry, &log_entry))) |
366 | goto err_no_action_written; |
367 | |
368 | DBUG_EXECUTE_IF("exchange_partition_fail_2" , goto err_no_execute_written;); |
369 | DBUG_EXECUTE_IF("exchange_partition_abort_2" , DBUG_SUICIDE();); |
370 | if (unlikely(write_execute_ddl_log_entry(log_entry->entry_pos, FALSE, |
371 | &exec_log_entry))) |
372 | goto err_no_execute_written; |
373 | /* ddl_log is written and synced */ |
374 | |
375 | mysql_mutex_unlock(&LOCK_gdl); |
376 | /* |
377 | Execute the name exchange. |
378 | Do one rename, increase the phase, update the action entry and sync. |
379 | In case of errors in the ddl_log we must fail and let the ddl_log try |
380 | to revert the changes, since otherwise it could revert the command after |
381 | we sent OK to the client. |
382 | */ |
383 | /* call rename table from table to tmp-name */ |
384 | DBUG_EXECUTE_IF("exchange_partition_fail_3" , |
385 | my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, 0); |
386 | error_set= TRUE; |
387 | goto err_rename;); |
388 | DBUG_EXECUTE_IF("exchange_partition_abort_3" , DBUG_SUICIDE();); |
389 | if (unlikely(file->ha_rename_table(name, tmp_name))) |
390 | { |
391 | my_error(ER_ERROR_ON_RENAME, MYF(0), name, tmp_name, my_errno); |
392 | error_set= TRUE; |
393 | goto err_rename; |
394 | } |
395 | DBUG_EXECUTE_IF("exchange_partition_fail_4" , goto err_rename;); |
396 | DBUG_EXECUTE_IF("exchange_partition_abort_4" , DBUG_SUICIDE();); |
397 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
398 | goto err_rename; |
399 | |
400 | /* call rename table from partition to table */ |
401 | DBUG_EXECUTE_IF("exchange_partition_fail_5" , |
402 | my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, 0); |
403 | error_set= TRUE; |
404 | goto err_rename;); |
405 | DBUG_EXECUTE_IF("exchange_partition_abort_5" , DBUG_SUICIDE();); |
406 | if (unlikely(file->ha_rename_table(from_name, name))) |
407 | { |
408 | my_error(ER_ERROR_ON_RENAME, MYF(0), from_name, name, my_errno); |
409 | error_set= TRUE; |
410 | goto err_rename; |
411 | } |
412 | DBUG_EXECUTE_IF("exchange_partition_fail_6" , goto err_rename;); |
413 | DBUG_EXECUTE_IF("exchange_partition_abort_6" , DBUG_SUICIDE();); |
414 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
415 | goto err_rename; |
416 | |
417 | /* call rename table from tmp-nam to partition */ |
418 | DBUG_EXECUTE_IF("exchange_partition_fail_7" , |
419 | my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, 0); |
420 | error_set= TRUE; |
421 | goto err_rename;); |
422 | DBUG_EXECUTE_IF("exchange_partition_abort_7" , DBUG_SUICIDE();); |
423 | if (unlikely(file->ha_rename_table(tmp_name, from_name))) |
424 | { |
425 | my_error(ER_ERROR_ON_RENAME, MYF(0), tmp_name, from_name, my_errno); |
426 | error_set= TRUE; |
427 | goto err_rename; |
428 | } |
429 | DBUG_EXECUTE_IF("exchange_partition_fail_8" , goto err_rename;); |
430 | DBUG_EXECUTE_IF("exchange_partition_abort_8" , DBUG_SUICIDE();); |
431 | if (unlikely(deactivate_ddl_log_entry(log_entry->entry_pos))) |
432 | goto err_rename; |
433 | |
434 | /* The exchange is complete and ddl_log is deactivated */ |
435 | DBUG_EXECUTE_IF("exchange_partition_fail_9" , goto err_rename;); |
436 | DBUG_EXECUTE_IF("exchange_partition_abort_9" , DBUG_SUICIDE();); |
437 | /* all OK */ |
438 | error= FALSE; |
439 | delete file; |
440 | DBUG_RETURN(error); |
441 | err_rename: |
442 | /* |
443 | Nothing to do if any of these commands fails :( the commands itselfs |
444 | will log to the error log about the failures... |
445 | */ |
446 | /* execute the ddl log entry to revert the renames */ |
447 | (void) execute_ddl_log_entry(current_thd, log_entry->entry_pos); |
448 | mysql_mutex_lock(&LOCK_gdl); |
449 | /* mark the execute log entry done */ |
450 | (void) write_execute_ddl_log_entry(0, TRUE, &exec_log_entry); |
451 | /* release the execute log entry */ |
452 | (void) release_ddl_log_memory_entry(exec_log_entry); |
453 | err_no_execute_written: |
454 | /* release the action log entry */ |
455 | (void) release_ddl_log_memory_entry(log_entry); |
456 | err_no_action_written: |
457 | mysql_mutex_unlock(&LOCK_gdl); |
458 | delete file; |
459 | if (!error_set) |
460 | my_error(ER_DDL_LOG_ERROR, MYF(0)); |
461 | DBUG_RETURN(error); |
462 | } |
463 | |
464 | |
465 | /** |
466 | @brief Swap places between a partition and a table. |
467 | |
468 | @details Verify that the tables are compatible (same engine, definition etc), |
469 | verify that all rows in the table will fit in the partition, |
470 | if all OK, rename table to tmp name, rename partition to table |
471 | and finally rename tmp name to partition. |
472 | |
473 | 1) Take upgradable mdl, open tables and then lock them (inited in parse) |
474 | 2) Verify that metadata matches |
475 | 3) verify data |
476 | 4) Upgrade to exclusive mdl for both tables |
477 | 5) Rename table <-> partition |
478 | 6) Rely on close_thread_tables to release mdl and table locks |
479 | |
480 | @param thd Thread handle |
481 | @param table_list Table where the partition exists as first table, |
482 | Table to swap with the partition as second table |
483 | @param alter_info Contains partition name to swap |
484 | |
485 | @note This is a DDL operation so triggers will not be used. |
486 | */ |
487 | bool Sql_cmd_alter_table_exchange_partition:: |
488 | exchange_partition(THD *thd, TABLE_LIST *table_list, Alter_info *alter_info) |
489 | { |
490 | TABLE *part_table, *swap_table; |
491 | TABLE_LIST *swap_table_list; |
492 | handlerton *table_hton; |
493 | partition_element *part_elem; |
494 | const char *partition_name; |
495 | char temp_name[FN_REFLEN+1]; |
496 | char part_file_name[2*FN_REFLEN+1]; |
497 | char swap_file_name[FN_REFLEN+1]; |
498 | char temp_file_name[FN_REFLEN+1]; |
499 | uint swap_part_id; |
500 | uint part_file_name_len; |
501 | Alter_table_prelocking_strategy alter_prelocking_strategy; |
502 | MDL_ticket *swap_table_mdl_ticket= NULL; |
503 | MDL_ticket *part_table_mdl_ticket= NULL; |
504 | uint table_counter; |
505 | bool error= TRUE; |
506 | DBUG_ENTER("mysql_exchange_partition" ); |
507 | DBUG_ASSERT(alter_info->partition_flags & ALTER_PARTITION_EXCHANGE); |
508 | |
509 | /* Don't allow to exchange with log table */ |
510 | swap_table_list= table_list->next_local; |
511 | if (check_if_log_table(swap_table_list, FALSE, "ALTER PARTITION" )) |
512 | DBUG_RETURN(TRUE); |
513 | |
514 | /* |
515 | Currently no MDL lock that allows both read and write and is upgradeable |
516 | to exclusive, so leave the lock type to TL_WRITE_ALLOW_READ also on the |
517 | partitioned table. |
518 | |
519 | TODO: add MDL lock that allows both read and write and is upgradable to |
520 | exclusive lock. This would allow to continue using the partitioned table |
521 | also with update/insert/delete while the verification of the swap table |
522 | is running. |
523 | */ |
524 | |
525 | /* |
526 | NOTE: It is not possible to exchange a crashed partition/table since |
527 | we need some info from the engine, which we can only access after open, |
528 | to be able to verify the structure/metadata. |
529 | */ |
530 | table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE); |
531 | if (unlikely(open_tables(thd, &table_list, &table_counter, 0, |
532 | &alter_prelocking_strategy))) |
533 | DBUG_RETURN(true); |
534 | |
535 | part_table= table_list->table; |
536 | swap_table= swap_table_list->table; |
537 | |
538 | if (unlikely(check_exchange_partition(swap_table, part_table))) |
539 | DBUG_RETURN(TRUE); |
540 | |
541 | /* set lock pruning on first table */ |
542 | partition_name= alter_info->partition_names.head(); |
543 | if (unlikely(table_list->table->part_info-> |
544 | set_named_partition_bitmap(partition_name, |
545 | strlen(partition_name)))) |
546 | DBUG_RETURN(true); |
547 | |
548 | if (unlikely(lock_tables(thd, table_list, table_counter, 0))) |
549 | DBUG_RETURN(true); |
550 | |
551 | |
552 | table_hton= swap_table->file->ht; |
553 | |
554 | THD_STAGE_INFO(thd, stage_verifying_table); |
555 | |
556 | /* Will append the partition name later in part_info->get_part_elem() */ |
557 | part_file_name_len= build_table_filename(part_file_name, |
558 | sizeof(part_file_name), |
559 | table_list->db.str, |
560 | table_list->table_name.str, |
561 | "" , 0); |
562 | build_table_filename(swap_file_name, |
563 | sizeof(swap_file_name), |
564 | swap_table_list->db.str, |
565 | swap_table_list->table_name.str, |
566 | "" , 0); |
567 | /* create a unique temp name #sqlx-nnnn_nnnn, x for eXchange */ |
568 | my_snprintf(temp_name, sizeof(temp_name), "%sx-%lx_%llx" , |
569 | tmp_file_prefix, current_pid, thd->thread_id); |
570 | if (lower_case_table_names) |
571 | my_casedn_str(files_charset_info, temp_name); |
572 | build_table_filename(temp_file_name, sizeof(temp_file_name), |
573 | table_list->next_local->db.str, |
574 | temp_name, "" , FN_IS_TMP); |
575 | |
576 | if (unlikely(!(part_elem= |
577 | part_table->part_info->get_part_elem(partition_name, |
578 | part_file_name + |
579 | part_file_name_len, |
580 | sizeof(part_file_name) - |
581 | part_file_name_len, |
582 | &swap_part_id)))) |
583 | { |
584 | DBUG_RETURN(TRUE); |
585 | } |
586 | |
587 | if (unlikely(swap_part_id == NOT_A_PARTITION_ID)) |
588 | { |
589 | DBUG_ASSERT(part_table->part_info->is_sub_partitioned()); |
590 | my_error(ER_PARTITION_INSTEAD_OF_SUBPARTITION, MYF(0)); |
591 | DBUG_RETURN(TRUE); |
592 | } |
593 | |
594 | if (unlikely(compare_table_with_partition(thd, swap_table, part_table, |
595 | part_elem, |
596 | swap_part_id))) |
597 | DBUG_RETURN(TRUE); |
598 | |
599 | /* Table and partition has same structure/options, OK to exchange */ |
600 | |
601 | thd_proc_info(thd, "Verifying data with partition" ); |
602 | |
603 | if (unlikely(verify_data_with_partition(swap_table, part_table, |
604 | swap_part_id))) |
605 | DBUG_RETURN(TRUE); |
606 | |
607 | /* |
608 | Get exclusive mdl lock on both tables, alway the non partitioned table |
609 | first. Remember the tickets for downgrading locks later. |
610 | */ |
611 | swap_table_mdl_ticket= swap_table->mdl_ticket; |
612 | part_table_mdl_ticket= part_table->mdl_ticket; |
613 | |
614 | /* |
615 | No need to set used_partitions to only propagate |
616 | HA_EXTRA_PREPARE_FOR_RENAME to one part since no built in engine uses |
617 | that flag. And the action would probably be to force close all other |
618 | instances which is what we are doing any way. |
619 | */ |
620 | if (wait_while_table_is_used(thd, swap_table, HA_EXTRA_PREPARE_FOR_RENAME) || |
621 | wait_while_table_is_used(thd, part_table, HA_EXTRA_PREPARE_FOR_RENAME)) |
622 | goto err; |
623 | |
624 | DEBUG_SYNC(thd, "swap_partition_after_wait" ); |
625 | |
626 | close_all_tables_for_name(thd, swap_table->s, HA_EXTRA_NOT_USED, NULL); |
627 | close_all_tables_for_name(thd, part_table->s, HA_EXTRA_NOT_USED, NULL); |
628 | |
629 | DEBUG_SYNC(thd, "swap_partition_before_rename" ); |
630 | |
631 | if (unlikely(exchange_name_with_ddl_log(thd, swap_file_name, part_file_name, |
632 | temp_file_name, table_hton))) |
633 | goto err; |
634 | |
635 | /* |
636 | Reopen tables under LOCK TABLES. Ignore the return value for now. It's |
637 | better to keep master/slave in consistent state. Alternative would be to |
638 | try to revert the exchange operation and issue error. |
639 | */ |
640 | (void) thd->locked_tables_list.reopen_tables(thd, false); |
641 | |
642 | if (unlikely((error= write_bin_log(thd, TRUE, thd->query(), |
643 | thd->query_length())))) |
644 | { |
645 | /* |
646 | The error is reported in write_bin_log(). |
647 | We try to revert to make it easier to keep the master/slave in sync. |
648 | */ |
649 | (void) exchange_name_with_ddl_log(thd, part_file_name, swap_file_name, |
650 | temp_file_name, table_hton); |
651 | } |
652 | |
653 | err: |
654 | if (thd->locked_tables_mode) |
655 | { |
656 | if (swap_table_mdl_ticket) |
657 | swap_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
658 | if (part_table_mdl_ticket) |
659 | part_table_mdl_ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
660 | } |
661 | |
662 | if (unlikely(!error)) |
663 | my_ok(thd); |
664 | |
665 | // For query cache |
666 | table_list->table= NULL; |
667 | table_list->next_local->table= NULL; |
668 | query_cache_invalidate3(thd, table_list, FALSE); |
669 | |
670 | DBUG_RETURN(error); |
671 | } |
672 | |
673 | bool Sql_cmd_alter_table_analyze_partition::execute(THD *thd) |
674 | { |
675 | bool res; |
676 | DBUG_ENTER("Sql_cmd_alter_table_analyze_partition::execute" ); |
677 | |
678 | /* |
679 | Flag that it is an ALTER command which administrates partitions, used |
680 | by ha_partition |
681 | */ |
682 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
683 | |
684 | res= Sql_cmd_analyze_table::execute(thd); |
685 | |
686 | DBUG_RETURN(res); |
687 | } |
688 | |
689 | |
690 | bool Sql_cmd_alter_table_check_partition::execute(THD *thd) |
691 | { |
692 | bool res; |
693 | DBUG_ENTER("Sql_cmd_alter_table_check_partition::execute" ); |
694 | |
695 | /* |
696 | Flag that it is an ALTER command which administrates partitions, used |
697 | by ha_partition |
698 | */ |
699 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
700 | |
701 | res= Sql_cmd_check_table::execute(thd); |
702 | |
703 | DBUG_RETURN(res); |
704 | } |
705 | |
706 | |
707 | bool Sql_cmd_alter_table_optimize_partition::execute(THD *thd) |
708 | { |
709 | bool res; |
710 | DBUG_ENTER("Alter_table_optimize_partition_statement::execute" ); |
711 | |
712 | /* |
713 | Flag that it is an ALTER command which administrates partitions, used |
714 | by ha_partition |
715 | */ |
716 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
717 | |
718 | res= Sql_cmd_optimize_table::execute(thd); |
719 | |
720 | DBUG_RETURN(res); |
721 | } |
722 | |
723 | |
724 | bool Sql_cmd_alter_table_repair_partition::execute(THD *thd) |
725 | { |
726 | bool res; |
727 | DBUG_ENTER("Sql_cmd_alter_table_repair_partition::execute" ); |
728 | |
729 | /* |
730 | Flag that it is an ALTER command which administrates partitions, used |
731 | by ha_partition |
732 | */ |
733 | thd->lex->alter_info.partition_flags|= ALTER_PARTITION_ADMIN; |
734 | |
735 | res= Sql_cmd_repair_table::execute(thd); |
736 | |
737 | DBUG_RETURN(res); |
738 | } |
739 | |
740 | |
741 | bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) |
742 | { |
743 | int error; |
744 | ha_partition *partition; |
745 | ulong timeout= thd->variables.lock_wait_timeout; |
746 | TABLE_LIST *first_table= thd->lex->select_lex.table_list.first; |
747 | Alter_info *alter_info= &thd->lex->alter_info; |
748 | uint table_counter, i; |
749 | List<String> partition_names_list; |
750 | bool binlog_stmt; |
751 | DBUG_ENTER("Sql_cmd_alter_table_truncate_partition::execute" ); |
752 | |
753 | /* |
754 | Flag that it is an ALTER command which administrates partitions, used |
755 | by ha_partition. |
756 | */ |
757 | thd->lex->alter_info.partition_flags|= (ALTER_PARTITION_ADMIN | |
758 | ALTER_PARTITION_TRUNCATE); |
759 | |
760 | /* Fix the lock types (not the same as ordinary ALTER TABLE). */ |
761 | first_table->lock_type= TL_WRITE; |
762 | first_table->mdl_request.set_type(MDL_EXCLUSIVE); |
763 | |
764 | /* |
765 | Check table permissions and open it with a exclusive lock. |
766 | Ensure it is a partitioned table and finally, upcast the |
767 | handler and invoke the partition truncate method. Lastly, |
768 | write the statement to the binary log if necessary. |
769 | */ |
770 | |
771 | if (check_one_table_access(thd, DROP_ACL, first_table)) |
772 | DBUG_RETURN(TRUE); |
773 | |
774 | #ifdef WITH_WSREP |
775 | if (WSREP(thd) && |
776 | (!thd->is_current_stmt_binlog_format_row() || |
777 | !thd->find_temporary_table(first_table)) && |
778 | wsrep_to_isolation_begin( |
779 | thd, first_table->db.str, first_table->table_name.str, NULL) |
780 | ) |
781 | { |
782 | WSREP_WARN("ALTER TABLE TRUNCATE PARTITION isolation failure" ); |
783 | DBUG_RETURN(TRUE); |
784 | } |
785 | #endif /* WITH_WSREP */ |
786 | |
787 | if (open_tables(thd, &first_table, &table_counter, 0)) |
788 | DBUG_RETURN(true); |
789 | |
790 | if (!first_table->table || first_table->view || |
791 | first_table->table->s->db_type() != partition_hton) |
792 | { |
793 | my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); |
794 | DBUG_RETURN(TRUE); |
795 | } |
796 | |
797 | |
798 | /* |
799 | Prune all, but named partitions, |
800 | to avoid excessive calls to external_lock(). |
801 | */ |
802 | List_iterator<const char> partition_names_it(alter_info->partition_names); |
803 | uint num_names= alter_info->partition_names.elements; |
804 | for (i= 0; i < num_names; i++) |
805 | { |
806 | const char *partition_name= partition_names_it++; |
807 | String *str_partition_name= new (thd->mem_root) |
808 | String(partition_name, system_charset_info); |
809 | if (!str_partition_name) |
810 | DBUG_RETURN(true); |
811 | partition_names_list.push_back(str_partition_name, thd->mem_root); |
812 | } |
813 | if (first_table->table-> |
814 | part_info->set_partition_bitmaps(&partition_names_list)) |
815 | DBUG_RETURN(true); |
816 | |
817 | if (lock_tables(thd, first_table, table_counter, 0)) |
818 | DBUG_RETURN(true); |
819 | |
820 | /* |
821 | Under locked table modes this might still not be an exclusive |
822 | lock. Hence, upgrade the lock since the handler truncate method |
823 | mandates an exclusive metadata lock. |
824 | */ |
825 | MDL_ticket *ticket= first_table->table->mdl_ticket; |
826 | if (thd->mdl_context.upgrade_shared_lock(ticket, MDL_EXCLUSIVE, timeout)) |
827 | DBUG_RETURN(TRUE); |
828 | |
829 | tdc_remove_table(thd, TDC_RT_REMOVE_NOT_OWN, first_table->db.str, |
830 | first_table->table_name.str, FALSE); |
831 | |
832 | partition= (ha_partition*) first_table->table->file; |
833 | /* Invoke the handler method responsible for truncating the partition. */ |
834 | if (unlikely(error= partition->truncate_partition(alter_info, |
835 | &binlog_stmt))) |
836 | partition->print_error(error, MYF(0)); |
837 | |
838 | /* |
839 | All effects of a truncate operation are committed even if the |
840 | operation fails. Thus, the query must be written to the binary |
841 | log. The exception is a unimplemented truncate method or failure |
842 | before any call to handler::truncate() is done. |
843 | Also, it is logged in statement format, regardless of the binlog format. |
844 | |
845 | Since we've changed data within the table, we also have to invalidate |
846 | the query cache for it. |
847 | */ |
848 | if (likely(error != HA_ERR_WRONG_COMMAND)) |
849 | { |
850 | query_cache_invalidate3(thd, first_table, FALSE); |
851 | if (binlog_stmt) |
852 | error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); |
853 | } |
854 | |
855 | /* |
856 | A locked table ticket was upgraded to a exclusive lock. After the |
857 | the query has been written to the binary log, downgrade the lock |
858 | to a shared one. |
859 | */ |
860 | if (thd->locked_tables_mode) |
861 | ticket->downgrade_lock(MDL_SHARED_NO_READ_WRITE); |
862 | |
863 | if (likely(!error)) |
864 | my_ok(thd); |
865 | |
866 | // Invalidate query cache |
867 | DBUG_ASSERT(!first_table->next_local); |
868 | query_cache_invalidate3(thd, first_table, FALSE); |
869 | |
870 | DBUG_RETURN(error); |
871 | } |
872 | |
873 | #endif /* WITH_PARTITION_STORAGE_ENGINE */ |
874 | |