1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of TokuDB |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | TokuDBis is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | TokuDB is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with TokuDB. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ======= */ |
23 | |
24 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
25 | |
26 | #if TOKU_INCLUDE_ALTER_56 |
27 | |
28 | #if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099 |
29 | #define TOKU_ALTER_RENAME ALTER_RENAME |
30 | #define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t |
31 | #elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ |
32 | (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) |
33 | #define TOKU_ALTER_RENAME ALTER_RENAME |
34 | #define DYNAMIC_ARRAY_ELEMENTS_TYPE int |
35 | #elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599 |
36 | #define TOKU_ALTER_RENAME ALTER_RENAME_56 |
37 | #define DYNAMIC_ARRAY_ELEMENTS_TYPE int |
38 | #else |
39 | #error |
40 | #endif |
41 | |
42 | #include "ha_tokudb_alter_common.cc" |
43 | #include <sql_array.h> |
44 | #include <sql_base.h> |
45 | |
46 | // The tokudb alter context contains the alter state that is set in the check if supported method and used |
47 | // later when the alter operation is executed. |
48 | class tokudb_alter_ctx : public inplace_alter_handler_ctx { |
49 | public: |
50 | tokudb_alter_ctx() : |
51 | handler_flags(0), |
52 | alter_txn(NULL), |
53 | add_index_changed(false), |
54 | drop_index_changed(false), |
55 | reset_card(false), |
56 | compression_changed(false), |
57 | expand_varchar_update_needed(false), |
58 | expand_fixed_update_needed(false), |
59 | expand_blob_update_needed(false), |
60 | optimize_needed(false), |
61 | table_kc_info(NULL), |
62 | altered_table_kc_info(NULL) { |
63 | } |
64 | ~tokudb_alter_ctx() { |
65 | if (altered_table_kc_info) |
66 | free_key_and_col_info(altered_table_kc_info); |
67 | } |
68 | public: |
69 | ulong handler_flags; |
70 | DB_TXN* alter_txn; |
71 | bool add_index_changed; |
72 | bool incremented_num_DBs, modified_DBs; |
73 | bool drop_index_changed; |
74 | bool reset_card; |
75 | bool compression_changed; |
76 | enum toku_compression_method orig_compression_method; |
77 | bool expand_varchar_update_needed; |
78 | bool expand_fixed_update_needed; |
79 | bool expand_blob_update_needed; |
80 | bool optimize_needed; |
81 | Dynamic_array<uint> changed_fields; |
82 | KEY_AND_COL_INFO* table_kc_info; |
83 | KEY_AND_COL_INFO* altered_table_kc_info; |
84 | KEY_AND_COL_INFO altered_table_kc_info_base; |
85 | }; |
86 | |
87 | // Debug function to print out an alter table operation |
88 | void ha_tokudb::print_alter_info( |
89 | TABLE* altered_table, |
90 | Alter_inplace_info* ha_alter_info) { |
91 | |
92 | TOKUDB_TRACE( |
93 | "***are keys of two tables same? %d" , |
94 | tables_have_same_keys(table, altered_table, false, false)); |
95 | if (ha_alter_info->handler_flags) { |
96 | TOKUDB_TRACE("***alter flags set ***" ); |
97 | for (int i = 0; i < 32; i++) { |
98 | if (ha_alter_info->handler_flags & (1 << i)) |
99 | TOKUDB_TRACE("%d" , i); |
100 | } |
101 | } |
102 | |
103 | // everyone calculates data by doing some default_values - record[0], but |
104 | // I do not see why that is necessary |
105 | TOKUDB_TRACE("******" ); |
106 | TOKUDB_TRACE("***orig table***" ); |
107 | for (uint i = 0; i < table->s->fields; i++) { |
108 | // |
109 | // make sure to use table->field, and NOT table->s->field |
110 | // |
111 | Field* curr_field = table->field[i]; |
112 | uint null_offset = get_null_offset(table, curr_field); |
113 | TOKUDB_TRACE( |
114 | "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: " |
115 | "%d, is_null %d, pack_length %u" , |
116 | curr_field->field_name.str, |
117 | curr_field->real_type(), |
118 | mysql_to_toku_type(curr_field), |
119 | curr_field->null_bit, |
120 | null_offset, |
121 | curr_field->real_maybe_null(), |
122 | curr_field->real_maybe_null() ? |
123 | table->s->default_values[null_offset] & curr_field->null_bit : |
124 | 0xffffffff, |
125 | curr_field->pack_length()); |
126 | } |
127 | TOKUDB_TRACE("******" ); |
128 | TOKUDB_TRACE("***altered table***" ); |
129 | for (uint i = 0; i < altered_table->s->fields; i++) { |
130 | Field* curr_field = altered_table->field[i]; |
131 | uint null_offset = get_null_offset(altered_table, curr_field); |
132 | TOKUDB_TRACE( |
133 | "name: %s, types: %u %u, nullable: %d, null_offset: %d, " |
134 | "is_null_field: %d, is_null %d, pack_length %u" , |
135 | curr_field->field_name.str, |
136 | curr_field->real_type(), |
137 | mysql_to_toku_type(curr_field), |
138 | curr_field->null_bit, |
139 | null_offset, |
140 | curr_field->real_maybe_null(), |
141 | curr_field->real_maybe_null() ? |
142 | altered_table->s->default_values[null_offset] & |
143 | curr_field->null_bit : 0xffffffff, |
144 | curr_field->pack_length()); |
145 | } |
146 | TOKUDB_TRACE("******" ); |
147 | } |
148 | |
149 | // Given two tables with equal number of fields, find all of the fields with |
150 | // different types and return the indexes of the different fields in the |
151 | // changed_fields array. This function ignores field name differences. |
152 | static int find_changed_fields( |
153 | TABLE* table_a, |
154 | TABLE* table_b, |
155 | Dynamic_array<uint>& changed_fields) { |
156 | |
157 | for (uint i = 0; i < table_a->s->fields; i++) { |
158 | Field* field_a = table_a->field[i]; |
159 | Field* field_b = table_b->field[i]; |
160 | if (!fields_are_same_type(field_a, field_b)) |
161 | changed_fields.append(i); |
162 | } |
163 | return changed_fields.elements(); |
164 | } |
165 | |
166 | static bool change_length_is_supported( |
167 | TABLE* table, |
168 | TABLE* altered_table, |
169 | Alter_inplace_info* ha_alter_info, |
170 | tokudb_alter_ctx* ctx); |
171 | |
172 | static bool change_type_is_supported( |
173 | TABLE* table, |
174 | TABLE* altered_table, |
175 | Alter_inplace_info* ha_alter_info, |
176 | tokudb_alter_ctx* ctx); |
177 | |
178 | // The ha_alter_info->handler_flags can not be trusted. |
179 | // This function maps the bogus handler flags to something we like. |
180 | static ulong fix_handler_flags( |
181 | THD* thd, |
182 | TABLE* table, |
183 | TABLE* altered_table, |
184 | Alter_inplace_info* ha_alter_info) { |
185 | |
186 | ulong handler_flags = ha_alter_info->handler_flags; |
187 | |
188 | #if 100000 <= MYSQL_VERSION_ID |
189 | // This is automatically supported, hide the flag from later checks |
190 | handler_flags &= ~ALTER_PARTITIONED; |
191 | #endif |
192 | |
193 | // workaround for fill_alter_inplace_info bug (#5193) |
194 | // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a |
195 | // column addition that does not change the keys. |
196 | // the following code turns the ADD_INDEX and DROP_INDEX flags so that |
197 | // we can do hot column addition later. |
198 | if (handler_flags & |
199 | (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) { |
200 | if (handler_flags & |
201 | (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) { |
202 | if (tables_have_same_keys( |
203 | table, |
204 | altered_table, |
205 | tokudb::sysvars::alter_print_error(thd) != 0, false)) { |
206 | handler_flags &= |
207 | ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + |
208 | ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX); |
209 | } |
210 | } |
211 | } |
212 | |
213 | // always allow rename table + any other operation, so turn off the |
214 | // rename flag |
215 | handler_flags &= ~ALTER_RENAME; |
216 | |
217 | // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed, |
218 | // so turn off the flag |
219 | if (handler_flags & ALTER_STORED_COLUMN_TYPE) { |
220 | if (all_fields_are_same_type(table, altered_table)) { |
221 | handler_flags &= ~ALTER_STORED_COLUMN_TYPE; |
222 | } |
223 | } |
224 | |
225 | return handler_flags; |
226 | } |
227 | |
228 | // Require that there is no intersection of add and drop names. |
229 | static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) { |
230 | for (uint d = 0; d < ha_alter_info->index_drop_count; d++) { |
231 | KEY* drop_key = ha_alter_info->index_drop_buffer[d]; |
232 | for (uint a = 0; a < ha_alter_info->index_add_count; a++) { |
233 | KEY* add_key = |
234 | &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]]; |
235 | if (strcmp(drop_key->name.str, add_key->name.str) == 0) { |
236 | return false; |
237 | } |
238 | } |
239 | } |
240 | return true; |
241 | } |
242 | |
243 | // Return true if some bit in mask is set and no bit in ~mask is set, |
244 | // otherwise return false. |
245 | static bool only_flags(ulong bits, ulong mask) { |
246 | return (bits & mask) != 0 && (bits & ~mask) == 0; |
247 | } |
248 | |
249 | // Check if an alter table operation on this table and described by the alter |
250 | // table parameters is supported inplace and if so, what type of locking is |
251 | // needed to execute it. return values: |
252 | |
253 | // HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an |
254 | // inplace operation, a table copy is required |
255 | |
256 | // HA_ALTER_ERROR: the alter table operation should fail |
257 | |
258 | // HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X |
259 | |
260 | // HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X, |
261 | // alter runs with MDL SNW |
262 | |
263 | // HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW, |
264 | // concurrent reads, no writes |
265 | |
266 | // HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X, |
267 | // alter runs with MDL SW |
268 | |
269 | // HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW, |
270 | // concurrent reads, writes. |
271 | // must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid |
272 | // deadlocks with the MDL lock and the table lock |
273 | enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter( |
274 | TABLE* altered_table, |
275 | Alter_inplace_info* ha_alter_info) { |
276 | |
277 | TOKUDB_HANDLER_DBUG_ENTER("" ); |
278 | |
279 | if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { |
280 | print_alter_info(altered_table, ha_alter_info); |
281 | } |
282 | |
283 | // default is NOT inplace |
284 | enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED; |
285 | THD* thd = ha_thd(); |
286 | |
287 | // setup context |
288 | tokudb_alter_ctx* ctx = new tokudb_alter_ctx; |
289 | ha_alter_info->handler_ctx = ctx; |
290 | ctx->handler_flags = |
291 | fix_handler_flags(thd, table, altered_table, ha_alter_info); |
292 | ctx->table_kc_info = &share->kc_info; |
293 | ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base; |
294 | memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO)); |
295 | |
296 | if (tokudb::sysvars::disable_hot_alter(thd)) { |
297 | ; // do nothing |
298 | } else if (only_flags( |
299 | ctx->handler_flags, |
300 | ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX + |
301 | ALTER_DROP_UNIQUE_INDEX + |
302 | ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + |
303 | ALTER_ADD_UNIQUE_INDEX)) { |
304 | // add or drop index |
305 | if (table->s->null_bytes == altered_table->s->null_bytes && |
306 | (ha_alter_info->index_add_count > 0 || |
307 | ha_alter_info->index_drop_count > 0) && |
308 | !tables_have_same_keys( |
309 | table, |
310 | altered_table, |
311 | tokudb::sysvars::alter_print_error(thd) != 0, false) && |
312 | is_disjoint_add_drop(ha_alter_info)) { |
313 | |
314 | if (ctx->handler_flags & |
315 | (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX + |
316 | ALTER_DROP_UNIQUE_INDEX)) { |
317 | // the fractal tree can not handle dropping an index concurrent |
318 | // with querying with the index. |
319 | // we grab an exclusive MDL for the drop index. |
320 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
321 | } else { |
322 | /* FIXME: MDEV-16099 Use alter algorithm=nocopy |
323 | or algorithm=instant for non-InnoDB engine */ |
324 | result = HA_ALTER_INPLACE_COPY_LOCK; |
325 | |
326 | // someday, allow multiple hot indexes via alter table add key. |
327 | // don't forget to change the store_lock function. |
328 | // for now, hot indexing is only supported via session variable |
329 | // with the create index sql command |
330 | if (ha_alter_info->index_add_count == 1 && |
331 | // only one add or drop |
332 | ha_alter_info->index_drop_count == 0 && |
333 | // must be add index not add unique index |
334 | ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX && |
335 | // must be a create index command |
336 | thd_sql_command(thd) == SQLCOM_CREATE_INDEX && |
337 | // must be enabled |
338 | tokudb::sysvars::create_index_online(thd)) { |
339 | // external_lock set WRITE_ALLOW_WRITE which allows writes |
340 | // concurrent with the index creation |
341 | /* FIXME: MDEV-16099 Use alter algorithm=nocopy |
342 | or algorithm=instant for non-InnoDB engine */ |
343 | result = HA_ALTER_INPLACE_COPY_NO_LOCK; |
344 | } |
345 | } |
346 | } |
347 | } else if (only_flags( |
348 | ctx->handler_flags, |
349 | ALTER_COLUMN_DEFAULT)) { |
350 | // column default |
351 | if (table->s->null_bytes == altered_table->s->null_bytes) |
352 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
353 | } else if (ctx->handler_flags & ALTER_COLUMN_NAME && |
354 | only_flags( |
355 | ctx->handler_flags, |
356 | ALTER_COLUMN_NAME | |
357 | ALTER_COLUMN_DEFAULT)) { |
358 | // column rename |
359 | // we have identified a possible column rename, |
360 | // but let's do some more checks |
361 | |
362 | // we will only allow an hcr if there are no changes |
363 | // in column positions (ALTER_STORED_COLUMN_ORDER is not set) |
364 | |
365 | // now need to verify that one and only one column |
366 | // has changed only its name. If we find anything to |
367 | // the contrary, we don't allow it, also check indexes |
368 | if (table->s->null_bytes == altered_table->s->null_bytes) { |
369 | bool cr_supported = |
370 | column_rename_supported( |
371 | table, |
372 | altered_table, |
373 | (ctx->handler_flags & |
374 | ALTER_STORED_COLUMN_ORDER) != 0); |
375 | if (cr_supported) |
376 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
377 | } |
378 | } else if (ctx->handler_flags & ALTER_ADD_COLUMN && |
379 | only_flags( |
380 | ctx->handler_flags, |
381 | ALTER_ADD_COLUMN | |
382 | ALTER_STORED_COLUMN_ORDER) && |
383 | setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { |
384 | |
385 | // add column |
386 | uint32_t added_columns[altered_table->s->fields]; |
387 | uint32_t num_added_columns = 0; |
388 | int r = |
389 | find_changed_columns( |
390 | added_columns, |
391 | &num_added_columns, |
392 | table, |
393 | altered_table); |
394 | if (r == 0) { |
395 | if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { |
396 | for (uint32_t i = 0; i < num_added_columns; i++) { |
397 | uint32_t curr_added_index = added_columns[i]; |
398 | Field* curr_added_field = |
399 | altered_table->field[curr_added_index]; |
400 | TOKUDB_TRACE( |
401 | "Added column: index %d, name %s" , |
402 | curr_added_index, |
403 | curr_added_field->field_name.str); |
404 | } |
405 | } |
406 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
407 | } |
408 | } else if (ctx->handler_flags & ALTER_DROP_COLUMN && |
409 | only_flags( |
410 | ctx->handler_flags, |
411 | ALTER_DROP_COLUMN | |
412 | ALTER_STORED_COLUMN_ORDER) && |
413 | setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { |
414 | |
415 | // drop column |
416 | uint32_t dropped_columns[table->s->fields]; |
417 | uint32_t num_dropped_columns = 0; |
418 | int r = |
419 | find_changed_columns( |
420 | dropped_columns, |
421 | &num_dropped_columns, |
422 | altered_table, |
423 | table); |
424 | if (r == 0) { |
425 | if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { |
426 | for (uint32_t i = 0; i < num_dropped_columns; i++) { |
427 | uint32_t curr_dropped_index = dropped_columns[i]; |
428 | Field* curr_dropped_field = table->field[curr_dropped_index]; |
429 | TOKUDB_TRACE( |
430 | "Dropped column: index %d, name %s" , |
431 | curr_dropped_index, |
432 | curr_dropped_field->field_name.str); |
433 | } |
434 | } |
435 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
436 | } |
437 | } else if ((ctx->handler_flags & |
438 | ALTER_COLUMN_EQUAL_PACK_LENGTH) && |
439 | only_flags( |
440 | ctx->handler_flags, |
441 | ALTER_COLUMN_EQUAL_PACK_LENGTH | |
442 | ALTER_COLUMN_DEFAULT) && |
443 | table->s->fields == altered_table->s->fields && |
444 | find_changed_fields( |
445 | table, |
446 | altered_table, |
447 | ctx->changed_fields) > 0 && |
448 | setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { |
449 | |
450 | // change column length |
451 | if (change_length_is_supported( |
452 | table, |
453 | altered_table, |
454 | ha_alter_info, ctx)) { |
455 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
456 | } |
457 | } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) && |
458 | only_flags( |
459 | ctx->handler_flags, |
460 | ALTER_STORED_COLUMN_TYPE | |
461 | ALTER_COLUMN_DEFAULT) && |
462 | table->s->fields == altered_table->s->fields && |
463 | find_changed_fields( |
464 | table, |
465 | altered_table, |
466 | ctx->changed_fields) > 0 && |
467 | setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { |
468 | |
469 | // change column type |
470 | if (change_type_is_supported( |
471 | table, |
472 | altered_table, |
473 | ha_alter_info, ctx)) { |
474 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
475 | } |
476 | } else if (only_flags( |
477 | ctx->handler_flags, |
478 | ALTER_CHANGE_CREATE_OPTION)) { |
479 | |
480 | HA_CREATE_INFO* create_info = ha_alter_info->create_info; |
481 | #if TOKU_INCLUDE_OPTION_STRUCTS |
482 | // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's |
483 | // compression |
484 | if (create_info->option_struct->row_format != |
485 | table_share->option_struct->row_format) |
486 | create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT; |
487 | #endif |
488 | // alter auto_increment |
489 | if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) { |
490 | // do a sanity check that the table is what we think it is |
491 | if (tables_have_same_keys_and_columns( |
492 | table, |
493 | altered_table, |
494 | tokudb::sysvars::alter_print_error(thd) != 0)) { |
495 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
496 | } |
497 | } else if (only_flags( |
498 | create_info->used_fields, |
499 | HA_CREATE_USED_ROW_FORMAT)) { |
500 | // alter row_format |
501 | // do a sanity check that the table is what we think it is |
502 | if (tables_have_same_keys_and_columns( |
503 | table, |
504 | altered_table, |
505 | tokudb::sysvars::alter_print_error(thd) != 0)) { |
506 | result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; |
507 | } |
508 | } |
509 | } |
510 | #if TOKU_OPTIMIZE_WITH_RECREATE |
511 | else if (only_flags( |
512 | ctx->handler_flags, |
513 | ALTER_RECREATE_TABLE | |
514 | ALTER_COLUMN_DEFAULT)) { |
515 | ctx->optimize_needed = true; |
516 | /* FIXME: MDEV-16099 Use alter algorithm=nocopy |
517 | or algorithm=instant for non-InnoDB engine */ |
518 | result = HA_ALTER_INPLACE_COPY_NO_LOCK; |
519 | } |
520 | #endif |
521 | |
522 | if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) && |
523 | result != HA_ALTER_INPLACE_NOT_SUPPORTED && |
524 | table->s->null_bytes != altered_table->s->null_bytes) { |
525 | |
526 | TOKUDB_HANDLER_TRACE("q %s" , thd->query()); |
527 | TOKUDB_HANDLER_TRACE( |
528 | "null bytes %u -> %u" , |
529 | table->s->null_bytes, |
530 | altered_table->s->null_bytes); |
531 | } |
532 | |
533 | // turn a not supported result into an error if the slow alter table |
534 | // (copy) is disabled |
535 | if (result == HA_ALTER_INPLACE_NOT_SUPPORTED && |
536 | tokudb::sysvars::disable_slow_alter(thd)) { |
537 | print_error(HA_ERR_UNSUPPORTED, MYF(0)); |
538 | result = HA_ALTER_ERROR; |
539 | } |
540 | |
541 | DBUG_RETURN(result); |
542 | } |
543 | |
544 | // Prepare for the alter operations |
545 | bool ha_tokudb::prepare_inplace_alter_table( |
546 | TABLE* altered_table, |
547 | Alter_inplace_info* ha_alter_info) { |
548 | |
549 | TOKUDB_HANDLER_DBUG_ENTER("" ); |
550 | tokudb_alter_ctx* ctx = |
551 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
552 | assert_always(transaction); // transaction must exist after table is locked |
553 | ctx->alter_txn = transaction; |
554 | bool result = false; // success |
555 | DBUG_RETURN(result); |
556 | } |
557 | |
558 | // Execute the alter operations. |
559 | bool ha_tokudb::inplace_alter_table( |
560 | TABLE* altered_table, |
561 | Alter_inplace_info* ha_alter_info) { |
562 | |
563 | TOKUDB_HANDLER_DBUG_ENTER("" ); |
564 | |
565 | int error = 0; |
566 | tokudb_alter_ctx* ctx = |
567 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
568 | HA_CREATE_INFO* create_info = ha_alter_info->create_info; |
569 | |
570 | // this should be enough to handle locking as the higher level MDL |
571 | // on this table should prevent any new analyze tasks. |
572 | share->cancel_background_jobs(); |
573 | |
574 | if (error == 0 && |
575 | (ctx->handler_flags & |
576 | (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX | |
577 | ALTER_DROP_UNIQUE_INDEX))) { |
578 | error = alter_table_drop_index(altered_table, ha_alter_info); |
579 | } |
580 | if (error == 0 && |
581 | (ctx->handler_flags & |
582 | (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + |
583 | ALTER_ADD_UNIQUE_INDEX))) { |
584 | error = alter_table_add_index(altered_table, ha_alter_info); |
585 | } |
586 | if (error == 0 && |
587 | (ctx->handler_flags & |
588 | (ALTER_ADD_COLUMN | |
589 | ALTER_DROP_COLUMN))) { |
590 | error = alter_table_add_or_drop_column(altered_table, ha_alter_info); |
591 | } |
592 | if (error == 0 && |
593 | (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) && |
594 | (create_info->used_fields & HA_CREATE_USED_AUTO)) { |
595 | error = write_auto_inc_create( |
596 | share->status_block, |
597 | create_info->auto_increment_value, |
598 | ctx->alter_txn); |
599 | } |
600 | if (error == 0 && |
601 | (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) && |
602 | (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { |
603 | // Get the current compression |
604 | DB *db = share->key_file[0]; |
605 | error = db->get_compression_method(db, &ctx->orig_compression_method); |
606 | assert_always(error == 0); |
607 | |
608 | // Set the new compression |
609 | #if TOKU_INCLUDE_OPTION_STRUCTS |
610 | toku_compression_method method = |
611 | row_format_to_toku_compression_method( |
612 | (tokudb::sysvars::row_format_t)create_info->option_struct->row_format); |
613 | #else |
614 | toku_compression_method method = |
615 | row_type_to_toku_compression_method(create_info->row_type); |
616 | #endif |
617 | uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); |
618 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
619 | db = share->key_file[i]; |
620 | error = db->change_compression_method(db, method); |
621 | if (error) |
622 | break; |
623 | ctx->compression_changed = true; |
624 | } |
625 | } |
626 | |
627 | // note: only one column expansion is allowed |
628 | |
629 | if (error == 0 && ctx->expand_fixed_update_needed) |
630 | error = alter_table_expand_columns(altered_table, ha_alter_info); |
631 | |
632 | if (error == 0 && ctx->expand_varchar_update_needed) |
633 | error = alter_table_expand_varchar_offsets( |
634 | altered_table, |
635 | ha_alter_info); |
636 | |
637 | if (error == 0 && ctx->expand_blob_update_needed) |
638 | error = alter_table_expand_blobs(altered_table, ha_alter_info); |
639 | |
640 | if (error == 0 && ctx->reset_card) { |
641 | error = tokudb::alter_card( |
642 | share->status_block, |
643 | ctx->alter_txn, |
644 | table->s, |
645 | altered_table->s); |
646 | } |
647 | if (error == 0 && ctx->optimize_needed) { |
648 | error = do_optimize(ha_thd()); |
649 | } |
650 | |
651 | #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ |
652 | (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) |
653 | #if WITH_PARTITION_STORAGE_ENGINE |
654 | if (error == 0 && |
655 | (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) { |
656 | #else |
657 | if (error == 0) { |
658 | #endif |
659 | error = write_frm_data( |
660 | share->status_block, |
661 | ctx->alter_txn, |
662 | altered_table->s->path.str); |
663 | } |
664 | #endif |
665 | |
666 | bool result = false; // success |
667 | if (error) { |
668 | print_error(error, MYF(0)); |
669 | result = true; // failure |
670 | } |
671 | |
672 | DBUG_RETURN(result); |
673 | } |
674 | |
675 | int ha_tokudb::alter_table_add_index( |
676 | TABLE* altered_table, |
677 | Alter_inplace_info* ha_alter_info) { |
678 | |
679 | // sort keys in add index order |
680 | KEY* key_info = (KEY*)tokudb::memory::malloc( |
681 | sizeof(KEY) * ha_alter_info->index_add_count, |
682 | MYF(MY_WME)); |
683 | for (uint i = 0; i < ha_alter_info->index_add_count; i++) { |
684 | KEY *key = &key_info[i]; |
685 | *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; |
686 | for (KEY_PART_INFO* key_part = key->key_part; |
687 | key_part < key->key_part + key->user_defined_key_parts; |
688 | key_part++) { |
689 | key_part->field = table->field[key_part->fieldnr]; |
690 | } |
691 | } |
692 | |
693 | tokudb_alter_ctx* ctx = |
694 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
695 | ctx->add_index_changed = true; |
696 | int error = tokudb_add_index( |
697 | table, |
698 | key_info, |
699 | ha_alter_info->index_add_count, |
700 | ctx->alter_txn, |
701 | &ctx->incremented_num_DBs, |
702 | &ctx->modified_DBs); |
703 | if (error == HA_ERR_FOUND_DUPP_KEY) { |
704 | // hack for now, in case of duplicate key error, |
705 | // because at the moment we cannot display the right key |
706 | // information to the user, so that he knows potentially what went |
707 | // wrong. |
708 | last_dup_key = MAX_KEY; |
709 | } |
710 | |
711 | tokudb::memory::free(key_info); |
712 | |
713 | if (error == 0) |
714 | ctx->reset_card = true; |
715 | |
716 | return error; |
717 | } |
718 | |
719 | static bool find_index_of_key( |
720 | const char* key_name, |
721 | TABLE* table, |
722 | uint* index_offset_ptr) { |
723 | |
724 | for (uint i = 0; i < table->s->keys; i++) { |
725 | if (strcmp(key_name, table->key_info[i].name.str) == 0) { |
726 | *index_offset_ptr = i; |
727 | return true; |
728 | } |
729 | } |
730 | return false; |
731 | } |
732 | |
733 | static bool find_index_of_key( |
734 | const char* key_name, |
735 | KEY* key_info, |
736 | uint key_count, |
737 | uint* index_offset_ptr) { |
738 | |
739 | for (uint i = 0; i < key_count; i++) { |
740 | if (strcmp(key_name, key_info[i].name.str) == 0) { |
741 | *index_offset_ptr = i; |
742 | return true; |
743 | } |
744 | } |
745 | return false; |
746 | } |
747 | |
748 | int ha_tokudb::alter_table_drop_index( |
749 | TABLE* altered_table, |
750 | Alter_inplace_info* ha_alter_info) { |
751 | |
752 | KEY *key_info = table->key_info; |
753 | // translate key names to indexes into the key_info array |
754 | uint index_drop_offsets[ha_alter_info->index_drop_count]; |
755 | for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { |
756 | bool found; |
757 | found = find_index_of_key( |
758 | ha_alter_info->index_drop_buffer[i]->name.str, |
759 | table, |
760 | &index_drop_offsets[i]); |
761 | if (!found) { |
762 | // undo of add key in partition engine |
763 | found = find_index_of_key( |
764 | ha_alter_info->index_drop_buffer[i]->name.str, |
765 | ha_alter_info->key_info_buffer, |
766 | ha_alter_info->key_count, |
767 | &index_drop_offsets[i]); |
768 | assert_always(found); |
769 | key_info = ha_alter_info->key_info_buffer; |
770 | } |
771 | } |
772 | |
773 | // drop indexes |
774 | tokudb_alter_ctx* ctx = |
775 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
776 | ctx->drop_index_changed = true; |
777 | |
778 | int error = drop_indexes( |
779 | table, |
780 | index_drop_offsets, |
781 | ha_alter_info->index_drop_count, |
782 | key_info, |
783 | ctx->alter_txn); |
784 | |
785 | if (error == 0) |
786 | ctx->reset_card = true; |
787 | |
788 | return error; |
789 | } |
790 | |
791 | int ha_tokudb::alter_table_add_or_drop_column( |
792 | TABLE* altered_table, |
793 | Alter_inplace_info* ha_alter_info) { |
794 | |
795 | tokudb_alter_ctx* ctx = |
796 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
797 | int error; |
798 | uchar *column_extra = NULL; |
799 | uint32_t max_column_extra_size; |
800 | uint32_t num_column_extra; |
801 | uint32_t num_columns = 0; |
802 | uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); |
803 | // set size such that we know it is big enough for both cases |
804 | uint32_t columns[table->s->fields + altered_table->s->fields]; |
805 | memset(columns, 0, sizeof(columns)); |
806 | |
807 | // generate the array of columns |
808 | if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) { |
809 | find_changed_columns( |
810 | columns, |
811 | &num_columns, |
812 | altered_table, |
813 | table); |
814 | } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) { |
815 | find_changed_columns( |
816 | columns, |
817 | &num_columns, |
818 | table, |
819 | altered_table); |
820 | } else { |
821 | assert_unreachable(); |
822 | } |
823 | max_column_extra_size = |
824 | // max static row_mutator |
825 | STATIC_ROW_MUTATOR_SIZE + |
826 | // max dynamic row_mutator |
827 | 4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + |
828 | // max static blob size |
829 | (4 + share->kc_info.num_blobs) + |
830 | // max dynamic blob size |
831 | (num_columns*(1+4+1+4)); |
832 | column_extra = (uchar*)tokudb::memory::malloc( |
833 | max_column_extra_size, |
834 | MYF(MY_WME)); |
835 | if (column_extra == NULL) { |
836 | error = ENOMEM; |
837 | goto cleanup; |
838 | } |
839 | |
840 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
841 | // change to a new descriptor |
842 | DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); |
843 | error = new_row_descriptor( |
844 | table, |
845 | altered_table, |
846 | ha_alter_info, |
847 | i, |
848 | &row_descriptor); |
849 | if (error) |
850 | goto cleanup; |
851 | error = share->key_file[i]->change_descriptor( |
852 | share->key_file[i], |
853 | ctx->alter_txn, |
854 | &row_descriptor, |
855 | 0); |
856 | tokudb::memory::free(row_descriptor.data); |
857 | if (error) |
858 | goto cleanup; |
859 | |
860 | if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { |
861 | num_column_extra = fill_row_mutator( |
862 | column_extra, |
863 | columns, |
864 | num_columns, |
865 | altered_table, |
866 | ctx->altered_table_kc_info, |
867 | i, |
868 | // true if adding columns, otherwise is a drop |
869 | (ha_alter_info->handler_flags & |
870 | ALTER_ADD_COLUMN) != 0); |
871 | |
872 | DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt); |
873 | column_dbt.data = column_extra; |
874 | column_dbt.size = num_column_extra; |
875 | DBUG_ASSERT(num_column_extra <= max_column_extra_size); |
876 | error = share->key_file[i]->update_broadcast( |
877 | share->key_file[i], |
878 | ctx->alter_txn, |
879 | &column_dbt, |
880 | DB_IS_RESETTING_OP); |
881 | if (error) { |
882 | goto cleanup; |
883 | } |
884 | } |
885 | } |
886 | |
887 | error = 0; |
888 | cleanup: |
889 | tokudb::memory::free(column_extra); |
890 | return error; |
891 | } |
892 | |
893 | // Commit or abort the alter operations. |
894 | // If commit then write the new frm data to the status using the alter |
895 | // transaction. |
896 | // If abort then abort the alter transaction and try to rollback the |
897 | // non-transactional changes. |
898 | bool ha_tokudb::commit_inplace_alter_table( |
899 | TABLE* altered_table, |
900 | Alter_inplace_info* ha_alter_info, |
901 | bool commit) { |
902 | |
903 | TOKUDB_HANDLER_DBUG_ENTER("" ); |
904 | |
905 | tokudb_alter_ctx* ctx = |
906 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
907 | bool result = false; // success |
908 | THD *thd = ha_thd(); |
909 | |
910 | if (commit) { |
911 | #if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ |
912 | (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \ |
913 | (100000 <= MYSQL_VERSION_ID) |
914 | if (ha_alter_info->group_commit_ctx) { |
915 | ha_alter_info->group_commit_ctx = NULL; |
916 | } |
917 | #endif |
918 | #if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \ |
919 | (100000 <= MYSQL_VERSION_ID) |
920 | #if WITH_PARTITION_STORAGE_ENGINE |
921 | if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) { |
922 | #else |
923 | if (true) { |
924 | #endif |
925 | int error = write_frm_data( |
926 | share->status_block, |
927 | ctx->alter_txn, |
928 | altered_table->s->path.str); |
929 | if (error) { |
930 | commit = false; |
931 | result = true; |
932 | print_error(error, MYF(0)); |
933 | } |
934 | } |
935 | #endif |
936 | } |
937 | |
938 | if (!commit) { |
939 | if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE && |
940 | (ctx->add_index_changed || ctx->drop_index_changed || |
941 | ctx->compression_changed)) { |
942 | |
943 | // get exclusive lock no matter what |
944 | #if defined(MARIADB_BASE_VERSION) |
945 | killed_state saved_killed_state = thd->killed; |
946 | thd->killed = NOT_KILLED; |
947 | for (volatile uint i = 0; |
948 | wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); |
949 | i++) { |
950 | if (thd->killed != NOT_KILLED) |
951 | thd->killed = NOT_KILLED; |
952 | sleep(1); |
953 | } |
954 | assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); |
955 | if (thd->killed == NOT_KILLED) |
956 | thd->killed = saved_killed_state; |
957 | #else |
958 | THD::killed_state saved_killed_state = thd->killed; |
959 | thd->killed = THD::NOT_KILLED; |
960 | // MySQL does not handle HA_EXTRA_NOT_USED so we use |
961 | // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through |
962 | // the partition storage engine and is treated as a NOP by tokudb |
963 | for (volatile uint i = 0; |
964 | wait_while_table_is_used( |
965 | thd, |
966 | table, |
967 | HA_EXTRA_PREPARE_FOR_RENAME); |
968 | i++) { |
969 | if (thd->killed != THD::NOT_KILLED) |
970 | thd->killed = THD::NOT_KILLED; |
971 | sleep(1); |
972 | } |
973 | assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); |
974 | if (thd->killed == THD::NOT_KILLED) |
975 | thd->killed = saved_killed_state; |
976 | #endif |
977 | } |
978 | |
979 | // abort the alter transaction NOW so that any alters are rolled back. |
980 | // this allows the following restores to work. |
981 | tokudb_trx_data* trx = |
982 | (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton); |
983 | assert_always(ctx->alter_txn == trx->stmt); |
984 | assert_always(trx->tokudb_lock_count > 0); |
985 | // for partitioned tables, we use a single transaction to do all of the |
986 | // partition changes. the tokudb_lock_count is a reference count for |
987 | // each of the handlers to the same transaction. obviously, we want |
988 | // to only abort once. |
989 | if (trx->tokudb_lock_count > 0) { |
990 | if (--trx->tokudb_lock_count <= trx->create_lock_count) { |
991 | trx->create_lock_count = 0; |
992 | abort_txn(ctx->alter_txn); |
993 | ctx->alter_txn = NULL; |
994 | trx->stmt = NULL; |
995 | trx->sub_sp_level = NULL; |
996 | } |
997 | transaction = NULL; |
998 | } |
999 | |
1000 | if (ctx->add_index_changed) { |
1001 | restore_add_index( |
1002 | table, |
1003 | ha_alter_info->index_add_count, |
1004 | ctx->incremented_num_DBs, |
1005 | ctx->modified_DBs); |
1006 | } |
1007 | if (ctx->drop_index_changed) { |
1008 | // translate key names to indexes into the key_info array |
1009 | uint index_drop_offsets[ha_alter_info->index_drop_count]; |
1010 | for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { |
1011 | bool found = find_index_of_key( |
1012 | ha_alter_info->index_drop_buffer[i]->name.str, |
1013 | table, |
1014 | &index_drop_offsets[i]); |
1015 | assert_always(found); |
1016 | } |
1017 | restore_drop_indexes( |
1018 | table, |
1019 | index_drop_offsets, |
1020 | ha_alter_info->index_drop_count); |
1021 | } |
1022 | if (ctx->compression_changed) { |
1023 | uint32_t curr_num_DBs = |
1024 | table->s->keys + tokudb_test(hidden_primary_key); |
1025 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
1026 | DB *db = share->key_file[i]; |
1027 | int error = db->change_compression_method( |
1028 | db, |
1029 | ctx->orig_compression_method); |
1030 | assert_always(error == 0); |
1031 | } |
1032 | } |
1033 | } |
1034 | DBUG_RETURN(result); |
1035 | } |
1036 | |
1037 | // Setup the altered table's key and col info. |
1038 | int ha_tokudb::setup_kc_info( |
1039 | TABLE* altered_table, |
1040 | KEY_AND_COL_INFO* altered_kc_info) { |
1041 | |
1042 | int error = allocate_key_and_col_info(altered_table->s, altered_kc_info); |
1043 | if (error == 0) |
1044 | error = initialize_key_and_col_info( |
1045 | altered_table->s, |
1046 | altered_table, |
1047 | altered_kc_info, |
1048 | hidden_primary_key, |
1049 | primary_key); |
1050 | return error; |
1051 | } |
1052 | |
1053 | // Expand the variable length fields offsets from 1 to 2 bytes. |
1054 | int ha_tokudb::alter_table_expand_varchar_offsets( |
1055 | TABLE* altered_table, |
1056 | Alter_inplace_info* ha_alter_info) { |
1057 | |
1058 | int error = 0; |
1059 | tokudb_alter_ctx* ctx = |
1060 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
1061 | |
1062 | uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); |
1063 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
1064 | // change to a new descriptor |
1065 | DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); |
1066 | error = new_row_descriptor( |
1067 | table, |
1068 | altered_table, |
1069 | ha_alter_info, |
1070 | i, |
1071 | &row_descriptor); |
1072 | if (error) |
1073 | break; |
1074 | error = share->key_file[i]->change_descriptor( |
1075 | share->key_file[i], |
1076 | ctx->alter_txn, |
1077 | &row_descriptor, |
1078 | 0); |
1079 | tokudb::memory::free(row_descriptor.data); |
1080 | if (error) |
1081 | break; |
1082 | |
1083 | // for all trees that have values, make an update variable offsets |
1084 | // message and broadcast it into the tree |
1085 | if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { |
1086 | uint32_t offset_start = |
1087 | table_share->null_bytes + |
1088 | share->kc_info.mcp_info[i].fixed_field_size; |
1089 | uint32_t offset_end = |
1090 | offset_start + |
1091 | share->kc_info.mcp_info[i].len_of_offsets; |
1092 | uint32_t number_of_offsets = offset_end - offset_start; |
1093 | |
1094 | // make the expand variable offsets message |
1095 | DBT expand; memset(&expand, 0, sizeof expand); |
1096 | expand.size = |
1097 | sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end); |
1098 | expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME)); |
1099 | if (!expand.data) { |
1100 | error = ENOMEM; |
1101 | break; |
1102 | } |
1103 | uchar* expand_ptr = (uchar*)expand.data; |
1104 | expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS; |
1105 | expand_ptr += sizeof(uchar); |
1106 | |
1107 | memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets)); |
1108 | expand_ptr += sizeof(number_of_offsets); |
1109 | |
1110 | memcpy(expand_ptr, &offset_start, sizeof(offset_start)); |
1111 | expand_ptr += sizeof(offset_start); |
1112 | |
1113 | // and broadcast it into the tree |
1114 | error = share->key_file[i]->update_broadcast( |
1115 | share->key_file[i], |
1116 | ctx->alter_txn, |
1117 | &expand, |
1118 | DB_IS_RESETTING_OP); |
1119 | tokudb::memory::free(expand.data); |
1120 | if (error) |
1121 | break; |
1122 | } |
1123 | } |
1124 | |
1125 | return error; |
1126 | } |
1127 | |
1128 | // Return true if a field is part of a key |
1129 | static bool field_in_key(KEY *key, Field *field) { |
1130 | for (uint i = 0; i < key->user_defined_key_parts; i++) { |
1131 | KEY_PART_INFO *key_part = &key->key_part[i]; |
1132 | if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0) |
1133 | return true; |
1134 | } |
1135 | return false; |
1136 | } |
1137 | |
1138 | // Return true if a field is part of any key |
1139 | static bool field_in_key_of_table(TABLE *table, Field *field) { |
1140 | for (uint i = 0; i < table->s->keys; i++) { |
1141 | if (field_in_key(&table->key_info[i], field)) |
1142 | return true; |
1143 | } |
1144 | return false; |
1145 | } |
1146 | |
1147 | // Return true if all changed varchar/varbinary field lengths can be changed |
1148 | // inplace, otherwise return false |
1149 | static bool change_varchar_length_is_supported( |
1150 | Field* old_field, |
1151 | Field* new_field, |
1152 | TABLE* table, |
1153 | TABLE* altered_table, |
1154 | Alter_inplace_info* ha_alter_info, |
1155 | tokudb_alter_ctx* ctx) { |
1156 | |
1157 | if (old_field->real_type() != MYSQL_TYPE_VARCHAR || |
1158 | new_field->real_type() != MYSQL_TYPE_VARCHAR || |
1159 | old_field->binary() != new_field->binary() || |
1160 | old_field->charset()->number != new_field->charset()->number || |
1161 | old_field->field_length > new_field->field_length) |
1162 | return false; |
1163 | if (ctx->table_kc_info->num_offset_bytes > |
1164 | ctx->altered_table_kc_info->num_offset_bytes) |
1165 | return false; // shrink is not supported |
1166 | if (ctx->table_kc_info->num_offset_bytes < |
1167 | ctx->altered_table_kc_info->num_offset_bytes) |
1168 | // sum of varchar lengths changed from 1 to 2 |
1169 | ctx->expand_varchar_update_needed = true; |
1170 | return true; |
1171 | } |
1172 | |
1173 | // Return true if all changed field lengths can be changed inplace, otherwise |
1174 | // return false |
1175 | static bool change_length_is_supported( |
1176 | TABLE* table, |
1177 | TABLE* altered_table, |
1178 | Alter_inplace_info* ha_alter_info, |
1179 | tokudb_alter_ctx* ctx) { |
1180 | |
1181 | if (table->s->fields != altered_table->s->fields) |
1182 | return false; |
1183 | if (table->s->null_bytes != altered_table->s->null_bytes) |
1184 | return false; |
1185 | if (ctx->changed_fields.elements() > 1) |
1186 | return false; // only support one field change |
1187 | for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; |
1188 | ai < ctx->changed_fields.elements(); |
1189 | ai++) { |
1190 | uint i = ctx->changed_fields.at(ai); |
1191 | Field *old_field = table->field[i]; |
1192 | Field *new_field = altered_table->field[i]; |
1193 | if (old_field->real_type() != new_field->real_type()) |
1194 | return false; // no type conversions |
1195 | if (old_field->real_type() != MYSQL_TYPE_VARCHAR) |
1196 | return false; // only varchar |
1197 | if (field_in_key_of_table(table, old_field) || |
1198 | field_in_key_of_table(altered_table, new_field)) |
1199 | return false; // not in any key |
1200 | if (!change_varchar_length_is_supported( |
1201 | old_field, |
1202 | new_field, |
1203 | table, |
1204 | altered_table, |
1205 | ha_alter_info, |
1206 | ctx)) |
1207 | return false; |
1208 | } |
1209 | |
1210 | return true; |
1211 | } |
1212 | |
1213 | // Debug function that ensures that the array is sorted |
1214 | static bool is_sorted(Dynamic_array<uint> &a) { |
1215 | bool r = true; |
1216 | if (a.elements() > 0) { |
1217 | uint lastelement = a.at(0); |
1218 | for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++) |
1219 | if (lastelement > a.at(i)) |
1220 | r = false; |
1221 | } |
1222 | return r; |
1223 | } |
1224 | |
1225 | int ha_tokudb::alter_table_expand_columns( |
1226 | TABLE* altered_table, |
1227 | Alter_inplace_info* ha_alter_info) { |
1228 | |
1229 | int error = 0; |
1230 | tokudb_alter_ctx* ctx = |
1231 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
1232 | // since we build the changed_fields array in field order, it must be sorted |
1233 | assert_always(is_sorted(ctx->changed_fields)); |
1234 | for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; |
1235 | error == 0 && ai < ctx->changed_fields.elements(); |
1236 | ai++) { |
1237 | uint expand_field_num = ctx->changed_fields.at(ai); |
1238 | error = alter_table_expand_one_column( |
1239 | altered_table, |
1240 | ha_alter_info, |
1241 | expand_field_num); |
1242 | } |
1243 | |
1244 | return error; |
1245 | } |
1246 | |
1247 | // Return true if the field is an unsigned int |
1248 | static bool is_unsigned(Field *f) { |
1249 | return (f->flags & UNSIGNED_FLAG) != 0; |
1250 | } |
1251 | |
1252 | // Return the starting offset in the value for a particular index (selected by |
1253 | // idx) of a particular field (selected by expand_field_num) |
1254 | // TODO: replace this? |
1255 | static uint32_t alter_table_field_offset( |
1256 | uint32_t null_bytes, |
1257 | KEY_AND_COL_INFO* kc_info, |
1258 | int idx, |
1259 | int expand_field_num) { |
1260 | |
1261 | uint32_t offset = null_bytes; |
1262 | for (int i = 0; i < expand_field_num; i++) { |
1263 | if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields |
1264 | continue; |
1265 | offset += kc_info->field_lengths[i]; |
1266 | } |
1267 | return offset; |
1268 | } |
1269 | |
1270 | // Send an expand message into all clustered indexes including the primary |
1271 | int ha_tokudb::alter_table_expand_one_column( |
1272 | TABLE* altered_table, |
1273 | Alter_inplace_info* ha_alter_info, |
1274 | int expand_field_num) { |
1275 | |
1276 | int error = 0; |
1277 | tokudb_alter_ctx* ctx = |
1278 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
1279 | |
1280 | Field *old_field = table->field[expand_field_num]; |
1281 | TOKU_TYPE old_field_type = mysql_to_toku_type(old_field); |
1282 | Field *new_field = altered_table->field[expand_field_num]; |
1283 | TOKU_TYPE new_field_type = mysql_to_toku_type(new_field); |
1284 | assert_always(old_field_type == new_field_type); |
1285 | |
1286 | uchar operation; |
1287 | uchar pad_char; |
1288 | switch (old_field_type) { |
1289 | case toku_type_int: |
1290 | assert_always(is_unsigned(old_field) == is_unsigned(new_field)); |
1291 | if (is_unsigned(old_field)) |
1292 | operation = UPDATE_OP_EXPAND_UINT; |
1293 | else |
1294 | operation = UPDATE_OP_EXPAND_INT; |
1295 | pad_char = 0; |
1296 | break; |
1297 | case toku_type_fixstring: |
1298 | operation = UPDATE_OP_EXPAND_CHAR; |
1299 | pad_char = old_field->charset()->pad_char; |
1300 | break; |
1301 | case toku_type_fixbinary: |
1302 | operation = UPDATE_OP_EXPAND_BINARY; |
1303 | pad_char = 0; |
1304 | break; |
1305 | default: |
1306 | assert_unreachable(); |
1307 | } |
1308 | |
1309 | uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); |
1310 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
1311 | // change to a new descriptor |
1312 | DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); |
1313 | error = new_row_descriptor( |
1314 | table, |
1315 | altered_table, |
1316 | ha_alter_info, |
1317 | i, |
1318 | &row_descriptor); |
1319 | if (error) |
1320 | break; |
1321 | error = share->key_file[i]->change_descriptor( |
1322 | share->key_file[i], |
1323 | ctx->alter_txn, |
1324 | &row_descriptor, |
1325 | 0); |
1326 | tokudb::memory::free(row_descriptor.data); |
1327 | if (error) |
1328 | break; |
1329 | |
1330 | // for all trees that have values, make an expand update message and |
1331 | // broadcast it into the tree |
1332 | if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { |
1333 | uint32_t old_offset = alter_table_field_offset( |
1334 | table_share->null_bytes, |
1335 | ctx->table_kc_info, |
1336 | i, |
1337 | expand_field_num); |
1338 | uint32_t new_offset = alter_table_field_offset( |
1339 | table_share->null_bytes, |
1340 | ctx->altered_table_kc_info, |
1341 | i, |
1342 | expand_field_num); |
1343 | assert_always(old_offset <= new_offset); |
1344 | |
1345 | uint32_t old_length = |
1346 | ctx->table_kc_info->field_lengths[expand_field_num]; |
1347 | assert_always(old_length == old_field->pack_length()); |
1348 | |
1349 | uint32_t new_length = |
1350 | ctx->altered_table_kc_info->field_lengths[expand_field_num]; |
1351 | assert_always(new_length == new_field->pack_length()); |
1352 | |
1353 | DBT expand; memset(&expand, 0, sizeof(expand)); |
1354 | expand.size = |
1355 | sizeof(operation) + sizeof(new_offset) + |
1356 | sizeof(old_length) + sizeof(new_length); |
1357 | if (operation == UPDATE_OP_EXPAND_CHAR || |
1358 | operation == UPDATE_OP_EXPAND_BINARY) |
1359 | expand.size += sizeof(pad_char); |
1360 | expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME)); |
1361 | if (!expand.data) { |
1362 | error = ENOMEM; |
1363 | break; |
1364 | } |
1365 | uchar *expand_ptr = (uchar *)expand.data; |
1366 | expand_ptr[0] = operation; |
1367 | expand_ptr += sizeof operation; |
1368 | |
1369 | // for the first altered field, old_offset == new_offset. |
1370 | // for the subsequent altered fields, the new_offset |
1371 | // should be used as it includes the length changes from the |
1372 | // previous altered fields. |
1373 | memcpy(expand_ptr, &new_offset, sizeof(new_offset)); |
1374 | expand_ptr += sizeof(new_offset); |
1375 | |
1376 | memcpy(expand_ptr, &old_length, sizeof(old_length)); |
1377 | expand_ptr += sizeof(old_length); |
1378 | |
1379 | memcpy(expand_ptr, &new_length, sizeof(new_length)); |
1380 | expand_ptr += sizeof(new_length); |
1381 | |
1382 | if (operation == UPDATE_OP_EXPAND_CHAR || |
1383 | operation == UPDATE_OP_EXPAND_BINARY) { |
1384 | memcpy(expand_ptr, &pad_char, sizeof(pad_char)); |
1385 | expand_ptr += sizeof(pad_char); |
1386 | } |
1387 | |
1388 | assert_always(expand_ptr == (uchar*)expand.data + expand.size); |
1389 | |
1390 | // and broadcast it into the tree |
1391 | error = share->key_file[i]->update_broadcast( |
1392 | share->key_file[i], |
1393 | ctx->alter_txn, |
1394 | &expand, |
1395 | DB_IS_RESETTING_OP); |
1396 | tokudb::memory::free(expand.data); |
1397 | if (error) |
1398 | break; |
1399 | } |
1400 | } |
1401 | |
1402 | return error; |
1403 | } |
1404 | |
1405 | static void marshall_blob_lengths( |
1406 | tokudb::buffer& b, |
1407 | uint32_t n, |
1408 | TABLE* table, |
1409 | KEY_AND_COL_INFO* kc_info) { |
1410 | |
1411 | for (uint i = 0; i < n; i++) { |
1412 | uint blob_field_index = kc_info->blob_fields[i]; |
1413 | assert_always(blob_field_index < table->s->fields); |
1414 | uint8_t blob_field_length = |
1415 | table->s->field[blob_field_index]->row_pack_length(); |
1416 | b.append(&blob_field_length, sizeof blob_field_length); |
1417 | } |
1418 | } |
1419 | |
1420 | int ha_tokudb::alter_table_expand_blobs( |
1421 | TABLE* altered_table, |
1422 | Alter_inplace_info* ha_alter_info) { |
1423 | |
1424 | int error = 0; |
1425 | tokudb_alter_ctx* ctx = |
1426 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
1427 | |
1428 | uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); |
1429 | for (uint32_t i = 0; i < curr_num_DBs; i++) { |
1430 | // change to a new descriptor |
1431 | DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); |
1432 | error = new_row_descriptor( |
1433 | table, |
1434 | altered_table, |
1435 | ha_alter_info, |
1436 | i, |
1437 | &row_descriptor); |
1438 | if (error) |
1439 | break; |
1440 | error = share->key_file[i]->change_descriptor( |
1441 | share->key_file[i], |
1442 | ctx->alter_txn, |
1443 | &row_descriptor, |
1444 | 0); |
1445 | tokudb::memory::free(row_descriptor.data); |
1446 | if (error) |
1447 | break; |
1448 | |
1449 | // for all trees that have values, make an update blobs message and |
1450 | // broadcast it into the tree |
1451 | if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { |
1452 | tokudb::buffer b; |
1453 | uint8_t op = UPDATE_OP_EXPAND_BLOB; |
1454 | b.append(&op, sizeof op); |
1455 | b.append_ui<uint32_t>( |
1456 | table->s->null_bytes + |
1457 | ctx->table_kc_info->mcp_info[i].fixed_field_size); |
1458 | uint32_t var_offset_bytes = |
1459 | ctx->table_kc_info->mcp_info[i].len_of_offsets; |
1460 | b.append_ui<uint32_t>(var_offset_bytes); |
1461 | b.append_ui<uint32_t>( |
1462 | var_offset_bytes == 0 ? 0 : |
1463 | ctx->table_kc_info->num_offset_bytes); |
1464 | |
1465 | // add blobs info |
1466 | uint32_t num_blobs = ctx->table_kc_info->num_blobs; |
1467 | b.append_ui<uint32_t>(num_blobs); |
1468 | marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info); |
1469 | marshall_blob_lengths( |
1470 | b, |
1471 | num_blobs, |
1472 | altered_table, |
1473 | ctx->altered_table_kc_info); |
1474 | |
1475 | // and broadcast it into the tree |
1476 | DBT expand; memset(&expand, 0, sizeof expand); |
1477 | expand.data = b.data(); |
1478 | expand.size = b.size(); |
1479 | error = share->key_file[i]->update_broadcast( |
1480 | share->key_file[i], |
1481 | ctx->alter_txn, |
1482 | &expand, |
1483 | DB_IS_RESETTING_OP); |
1484 | if (error) |
1485 | break; |
1486 | } |
1487 | } |
1488 | |
1489 | return error; |
1490 | } |
1491 | |
1492 | // Return true if two fixed length fields can be changed inplace |
1493 | static bool change_fixed_length_is_supported( |
1494 | TABLE* table, |
1495 | TABLE* altered_table, |
1496 | Field* old_field, |
1497 | Field* new_field, |
1498 | tokudb_alter_ctx* ctx) { |
1499 | |
1500 | // no change in size is supported |
1501 | if (old_field->pack_length() == new_field->pack_length()) |
1502 | return true; |
1503 | // shrink is not supported |
1504 | if (old_field->pack_length() > new_field->pack_length()) |
1505 | return false; |
1506 | ctx->expand_fixed_update_needed = true; |
1507 | return true; |
1508 | } |
1509 | |
1510 | static bool change_blob_length_is_supported( |
1511 | TABLE* table, |
1512 | TABLE* altered_table, |
1513 | Field* old_field, |
1514 | Field* new_field, |
1515 | tokudb_alter_ctx* ctx) { |
1516 | |
1517 | // blob -> longer or equal length blob |
1518 | if (old_field->binary() && new_field->binary() && |
1519 | old_field->pack_length() <= new_field->pack_length()) { |
1520 | ctx->expand_blob_update_needed = true; |
1521 | return true; |
1522 | } |
1523 | // text -> longer or equal length text |
1524 | if (!old_field->binary() && !new_field->binary() && |
1525 | old_field->pack_length() <= new_field->pack_length() && |
1526 | old_field->charset()->number == new_field->charset()->number) { |
1527 | ctx->expand_blob_update_needed = true; |
1528 | return true; |
1529 | } |
1530 | return false; |
1531 | } |
1532 | |
1533 | // Return true if the MySQL type is an int or unsigned int type |
1534 | static bool is_int_type(enum_field_types t) { |
1535 | switch (t) { |
1536 | case MYSQL_TYPE_TINY: |
1537 | case MYSQL_TYPE_SHORT: |
1538 | case MYSQL_TYPE_INT24: |
1539 | case MYSQL_TYPE_LONG: |
1540 | case MYSQL_TYPE_LONGLONG: |
1541 | return true; |
1542 | default: |
1543 | return false; |
1544 | } |
1545 | } |
1546 | |
1547 | // Return true if two field types can be changed inplace |
1548 | static bool change_field_type_is_supported( |
1549 | Field* old_field, |
1550 | Field* new_field, |
1551 | TABLE* table, |
1552 | TABLE* altered_table, |
1553 | Alter_inplace_info* ha_alter_info, |
1554 | tokudb_alter_ctx* ctx) { |
1555 | |
1556 | enum_field_types old_type = old_field->real_type(); |
1557 | enum_field_types new_type = new_field->real_type(); |
1558 | if (is_int_type(old_type)) { |
1559 | // int and unsigned int expansion |
1560 | if (is_int_type(new_type) && |
1561 | is_unsigned(old_field) == is_unsigned(new_field)) |
1562 | return change_fixed_length_is_supported( |
1563 | table, |
1564 | altered_table, |
1565 | old_field, |
1566 | new_field, |
1567 | ctx); |
1568 | else |
1569 | return false; |
1570 | } else if (old_type == MYSQL_TYPE_STRING) { |
1571 | // char(X) -> char(Y) and binary(X) -> binary(Y) expansion |
1572 | if (new_type == MYSQL_TYPE_STRING && |
1573 | old_field->binary() == new_field->binary() && |
1574 | old_field->charset()->number == new_field->charset()->number) |
1575 | return change_fixed_length_is_supported( |
1576 | table, |
1577 | altered_table, |
1578 | old_field, |
1579 | new_field, |
1580 | ctx); |
1581 | else |
1582 | return false; |
1583 | } else if (old_type == MYSQL_TYPE_VARCHAR) { |
1584 | // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion |
1585 | // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for |
1586 | // these cases |
1587 | return change_varchar_length_is_supported( |
1588 | old_field, |
1589 | new_field, |
1590 | table, |
1591 | altered_table, |
1592 | ha_alter_info, |
1593 | ctx); |
1594 | } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) { |
1595 | return change_blob_length_is_supported( |
1596 | table, |
1597 | altered_table, |
1598 | old_field, |
1599 | new_field, |
1600 | ctx); |
1601 | } else |
1602 | return false; |
1603 | } |
1604 | |
1605 | // Return true if all changed field types can be changed inplace |
1606 | static bool change_type_is_supported( |
1607 | TABLE* table, |
1608 | TABLE* altered_table, |
1609 | Alter_inplace_info* ha_alter_info, |
1610 | tokudb_alter_ctx* ctx) { |
1611 | |
1612 | if (table->s->null_bytes != altered_table->s->null_bytes) |
1613 | return false; |
1614 | if (table->s->fields != altered_table->s->fields) |
1615 | return false; |
1616 | if (ctx->changed_fields.elements() > 1) |
1617 | return false; // only support one field change |
1618 | for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; |
1619 | ai < ctx->changed_fields.elements(); |
1620 | ai++) { |
1621 | uint i = ctx->changed_fields.at(ai); |
1622 | Field *old_field = table->field[i]; |
1623 | Field *new_field = altered_table->field[i]; |
1624 | if (field_in_key_of_table(table, old_field) || |
1625 | field_in_key_of_table(altered_table, new_field)) |
1626 | return false; |
1627 | if (!change_field_type_is_supported( |
1628 | old_field, |
1629 | new_field, |
1630 | table, |
1631 | altered_table, |
1632 | ha_alter_info, |
1633 | ctx)) |
1634 | return false; |
1635 | } |
1636 | return true; |
1637 | } |
1638 | |
1639 | // Allocate and initialize a new descriptor for a dictionary in the altered |
1640 | // table identified with idx. |
1641 | // Return the new descriptor in the row_descriptor DBT. |
1642 | // Return non-zero on error. |
1643 | int ha_tokudb::new_row_descriptor( |
1644 | TABLE* table, |
1645 | TABLE* altered_table, |
1646 | Alter_inplace_info* ha_alter_info, |
1647 | uint32_t idx, |
1648 | DBT* row_descriptor) { |
1649 | |
1650 | int error = 0; |
1651 | tokudb_alter_ctx* ctx = |
1652 | static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); |
1653 | row_descriptor->size = |
1654 | get_max_desc_size(ctx->altered_table_kc_info, altered_table); |
1655 | row_descriptor->data = |
1656 | (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME)); |
1657 | if (row_descriptor->data == NULL) { |
1658 | error = ENOMEM; |
1659 | } else { |
1660 | KEY* prim_key = |
1661 | hidden_primary_key ? NULL : |
1662 | &altered_table->s->key_info[primary_key]; |
1663 | if (idx == primary_key) { |
1664 | row_descriptor->size = create_main_key_descriptor( |
1665 | (uchar*)row_descriptor->data, |
1666 | prim_key, |
1667 | hidden_primary_key, |
1668 | primary_key, |
1669 | altered_table, |
1670 | ctx->altered_table_kc_info); |
1671 | } else { |
1672 | row_descriptor->size = create_secondary_key_descriptor( |
1673 | (uchar*)row_descriptor->data, |
1674 | &altered_table->key_info[idx], |
1675 | prim_key, |
1676 | hidden_primary_key, |
1677 | altered_table, |
1678 | primary_key, |
1679 | idx, |
1680 | ctx->altered_table_kc_info); |
1681 | } |
1682 | error = 0; |
1683 | } |
1684 | return error; |
1685 | } |
1686 | |
1687 | #endif |
1688 | |