1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of TokuDB
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 TokuDBis is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 TokuDB is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
21
22======= */
23
24#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25
26#if TOKU_INCLUDE_ALTER_56
27
28#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
29#define TOKU_ALTER_RENAME ALTER_RENAME
30#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
31#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
32 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
33#define TOKU_ALTER_RENAME ALTER_RENAME
34#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
35#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
36#define TOKU_ALTER_RENAME ALTER_RENAME_56
37#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
38#else
39#error
40#endif
41
42#include "ha_tokudb_alter_common.cc"
43#include <sql_array.h>
44#include <sql_base.h>
45
46// The tokudb alter context contains the alter state that is set in the check if supported method and used
47// later when the alter operation is executed.
48class tokudb_alter_ctx : public inplace_alter_handler_ctx {
49public:
50 tokudb_alter_ctx() :
51 handler_flags(0),
52 alter_txn(NULL),
53 add_index_changed(false),
54 drop_index_changed(false),
55 reset_card(false),
56 compression_changed(false),
57 expand_varchar_update_needed(false),
58 expand_fixed_update_needed(false),
59 expand_blob_update_needed(false),
60 optimize_needed(false),
61 table_kc_info(NULL),
62 altered_table_kc_info(NULL) {
63 }
64 ~tokudb_alter_ctx() {
65 if (altered_table_kc_info)
66 free_key_and_col_info(altered_table_kc_info);
67 }
68public:
69 ulong handler_flags;
70 DB_TXN* alter_txn;
71 bool add_index_changed;
72 bool incremented_num_DBs, modified_DBs;
73 bool drop_index_changed;
74 bool reset_card;
75 bool compression_changed;
76 enum toku_compression_method orig_compression_method;
77 bool expand_varchar_update_needed;
78 bool expand_fixed_update_needed;
79 bool expand_blob_update_needed;
80 bool optimize_needed;
81 Dynamic_array<uint> changed_fields;
82 KEY_AND_COL_INFO* table_kc_info;
83 KEY_AND_COL_INFO* altered_table_kc_info;
84 KEY_AND_COL_INFO altered_table_kc_info_base;
85};
86
87// Debug function to print out an alter table operation
88void ha_tokudb::print_alter_info(
89 TABLE* altered_table,
90 Alter_inplace_info* ha_alter_info) {
91
92 TOKUDB_TRACE(
93 "***are keys of two tables same? %d",
94 tables_have_same_keys(table, altered_table, false, false));
95 if (ha_alter_info->handler_flags) {
96 TOKUDB_TRACE("***alter flags set ***");
97 for (int i = 0; i < 32; i++) {
98 if (ha_alter_info->handler_flags & (1 << i))
99 TOKUDB_TRACE("%d", i);
100 }
101 }
102
103 // everyone calculates data by doing some default_values - record[0], but
104 // I do not see why that is necessary
105 TOKUDB_TRACE("******");
106 TOKUDB_TRACE("***orig table***");
107 for (uint i = 0; i < table->s->fields; i++) {
108 //
109 // make sure to use table->field, and NOT table->s->field
110 //
111 Field* curr_field = table->field[i];
112 uint null_offset = get_null_offset(table, curr_field);
113 TOKUDB_TRACE(
114 "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
115 "%d, is_null %d, pack_length %u",
116 curr_field->field_name.str,
117 curr_field->real_type(),
118 mysql_to_toku_type(curr_field),
119 curr_field->null_bit,
120 null_offset,
121 curr_field->real_maybe_null(),
122 curr_field->real_maybe_null() ?
123 table->s->default_values[null_offset] & curr_field->null_bit :
124 0xffffffff,
125 curr_field->pack_length());
126 }
127 TOKUDB_TRACE("******");
128 TOKUDB_TRACE("***altered table***");
129 for (uint i = 0; i < altered_table->s->fields; i++) {
130 Field* curr_field = altered_table->field[i];
131 uint null_offset = get_null_offset(altered_table, curr_field);
132 TOKUDB_TRACE(
133 "name: %s, types: %u %u, nullable: %d, null_offset: %d, "
134 "is_null_field: %d, is_null %d, pack_length %u",
135 curr_field->field_name.str,
136 curr_field->real_type(),
137 mysql_to_toku_type(curr_field),
138 curr_field->null_bit,
139 null_offset,
140 curr_field->real_maybe_null(),
141 curr_field->real_maybe_null() ?
142 altered_table->s->default_values[null_offset] &
143 curr_field->null_bit : 0xffffffff,
144 curr_field->pack_length());
145 }
146 TOKUDB_TRACE("******");
147}
148
149// Given two tables with equal number of fields, find all of the fields with
150// different types and return the indexes of the different fields in the
151// changed_fields array. This function ignores field name differences.
152static int find_changed_fields(
153 TABLE* table_a,
154 TABLE* table_b,
155 Dynamic_array<uint>& changed_fields) {
156
157 for (uint i = 0; i < table_a->s->fields; i++) {
158 Field* field_a = table_a->field[i];
159 Field* field_b = table_b->field[i];
160 if (!fields_are_same_type(field_a, field_b))
161 changed_fields.append(i);
162 }
163 return changed_fields.elements();
164}
165
166static bool change_length_is_supported(
167 TABLE* table,
168 TABLE* altered_table,
169 Alter_inplace_info* ha_alter_info,
170 tokudb_alter_ctx* ctx);
171
172static bool change_type_is_supported(
173 TABLE* table,
174 TABLE* altered_table,
175 Alter_inplace_info* ha_alter_info,
176 tokudb_alter_ctx* ctx);
177
178// The ha_alter_info->handler_flags can not be trusted.
179// This function maps the bogus handler flags to something we like.
180static ulong fix_handler_flags(
181 THD* thd,
182 TABLE* table,
183 TABLE* altered_table,
184 Alter_inplace_info* ha_alter_info) {
185
186 ulong handler_flags = ha_alter_info->handler_flags;
187
188#if 100000 <= MYSQL_VERSION_ID
189 // This is automatically supported, hide the flag from later checks
190 handler_flags &= ~ALTER_PARTITIONED;
191#endif
192
193 // workaround for fill_alter_inplace_info bug (#5193)
194 // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
195 // column addition that does not change the keys.
196 // the following code turns the ADD_INDEX and DROP_INDEX flags so that
197 // we can do hot column addition later.
198 if (handler_flags &
199 (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) {
200 if (handler_flags &
201 (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) {
202 if (tables_have_same_keys(
203 table,
204 altered_table,
205 tokudb::sysvars::alter_print_error(thd) != 0, false)) {
206 handler_flags &=
207 ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
208 ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX);
209 }
210 }
211 }
212
213 // always allow rename table + any other operation, so turn off the
214 // rename flag
215 handler_flags &= ~ALTER_RENAME;
216
217 // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
218 // so turn off the flag
219 if (handler_flags & ALTER_STORED_COLUMN_TYPE) {
220 if (all_fields_are_same_type(table, altered_table)) {
221 handler_flags &= ~ALTER_STORED_COLUMN_TYPE;
222 }
223 }
224
225 return handler_flags;
226}
227
228// Require that there is no intersection of add and drop names.
229static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
230 for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
231 KEY* drop_key = ha_alter_info->index_drop_buffer[d];
232 for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
233 KEY* add_key =
234 &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
235 if (strcmp(drop_key->name.str, add_key->name.str) == 0) {
236 return false;
237 }
238 }
239 }
240 return true;
241}
242
243// Return true if some bit in mask is set and no bit in ~mask is set,
244// otherwise return false.
245static bool only_flags(ulong bits, ulong mask) {
246 return (bits & mask) != 0 && (bits & ~mask) == 0;
247}
248
249// Check if an alter table operation on this table and described by the alter
250// table parameters is supported inplace and if so, what type of locking is
251// needed to execute it. return values:
252
253// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
254// inplace operation, a table copy is required
255
256// HA_ALTER_ERROR: the alter table operation should fail
257
258// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
259
260// HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X,
261// alter runs with MDL SNW
262
263// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
264// concurrent reads, no writes
265
266// HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X,
267// alter runs with MDL SW
268
269// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
270// concurrent reads, writes.
271// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
272// deadlocks with the MDL lock and the table lock
273enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
274 TABLE* altered_table,
275 Alter_inplace_info* ha_alter_info) {
276
277 TOKUDB_HANDLER_DBUG_ENTER("");
278
279 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
280 print_alter_info(altered_table, ha_alter_info);
281 }
282
283 // default is NOT inplace
284 enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
285 THD* thd = ha_thd();
286
287 // setup context
288 tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
289 ha_alter_info->handler_ctx = ctx;
290 ctx->handler_flags =
291 fix_handler_flags(thd, table, altered_table, ha_alter_info);
292 ctx->table_kc_info = &share->kc_info;
293 ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
294 memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
295
296 if (tokudb::sysvars::disable_hot_alter(thd)) {
297 ; // do nothing
298 } else if (only_flags(
299 ctx->handler_flags,
300 ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
301 ALTER_DROP_UNIQUE_INDEX +
302 ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
303 ALTER_ADD_UNIQUE_INDEX)) {
304 // add or drop index
305 if (table->s->null_bytes == altered_table->s->null_bytes &&
306 (ha_alter_info->index_add_count > 0 ||
307 ha_alter_info->index_drop_count > 0) &&
308 !tables_have_same_keys(
309 table,
310 altered_table,
311 tokudb::sysvars::alter_print_error(thd) != 0, false) &&
312 is_disjoint_add_drop(ha_alter_info)) {
313
314 if (ctx->handler_flags &
315 (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
316 ALTER_DROP_UNIQUE_INDEX)) {
317 // the fractal tree can not handle dropping an index concurrent
318 // with querying with the index.
319 // we grab an exclusive MDL for the drop index.
320 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
321 } else {
322 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
323 or algorithm=instant for non-InnoDB engine */
324 result = HA_ALTER_INPLACE_COPY_LOCK;
325
326 // someday, allow multiple hot indexes via alter table add key.
327 // don't forget to change the store_lock function.
328 // for now, hot indexing is only supported via session variable
329 // with the create index sql command
330 if (ha_alter_info->index_add_count == 1 &&
331 // only one add or drop
332 ha_alter_info->index_drop_count == 0 &&
333 // must be add index not add unique index
334 ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX &&
335 // must be a create index command
336 thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
337 // must be enabled
338 tokudb::sysvars::create_index_online(thd)) {
339 // external_lock set WRITE_ALLOW_WRITE which allows writes
340 // concurrent with the index creation
341 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
342 or algorithm=instant for non-InnoDB engine */
343 result = HA_ALTER_INPLACE_COPY_NO_LOCK;
344 }
345 }
346 }
347 } else if (only_flags(
348 ctx->handler_flags,
349 ALTER_COLUMN_DEFAULT)) {
350 // column default
351 if (table->s->null_bytes == altered_table->s->null_bytes)
352 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
353 } else if (ctx->handler_flags & ALTER_COLUMN_NAME &&
354 only_flags(
355 ctx->handler_flags,
356 ALTER_COLUMN_NAME |
357 ALTER_COLUMN_DEFAULT)) {
358 // column rename
359 // we have identified a possible column rename,
360 // but let's do some more checks
361
362 // we will only allow an hcr if there are no changes
363 // in column positions (ALTER_STORED_COLUMN_ORDER is not set)
364
365 // now need to verify that one and only one column
366 // has changed only its name. If we find anything to
367 // the contrary, we don't allow it, also check indexes
368 if (table->s->null_bytes == altered_table->s->null_bytes) {
369 bool cr_supported =
370 column_rename_supported(
371 table,
372 altered_table,
373 (ctx->handler_flags &
374 ALTER_STORED_COLUMN_ORDER) != 0);
375 if (cr_supported)
376 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
377 }
378 } else if (ctx->handler_flags & ALTER_ADD_COLUMN &&
379 only_flags(
380 ctx->handler_flags,
381 ALTER_ADD_COLUMN |
382 ALTER_STORED_COLUMN_ORDER) &&
383 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
384
385 // add column
386 uint32_t added_columns[altered_table->s->fields];
387 uint32_t num_added_columns = 0;
388 int r =
389 find_changed_columns(
390 added_columns,
391 &num_added_columns,
392 table,
393 altered_table);
394 if (r == 0) {
395 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
396 for (uint32_t i = 0; i < num_added_columns; i++) {
397 uint32_t curr_added_index = added_columns[i];
398 Field* curr_added_field =
399 altered_table->field[curr_added_index];
400 TOKUDB_TRACE(
401 "Added column: index %d, name %s",
402 curr_added_index,
403 curr_added_field->field_name.str);
404 }
405 }
406 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
407 }
408 } else if (ctx->handler_flags & ALTER_DROP_COLUMN &&
409 only_flags(
410 ctx->handler_flags,
411 ALTER_DROP_COLUMN |
412 ALTER_STORED_COLUMN_ORDER) &&
413 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
414
415 // drop column
416 uint32_t dropped_columns[table->s->fields];
417 uint32_t num_dropped_columns = 0;
418 int r =
419 find_changed_columns(
420 dropped_columns,
421 &num_dropped_columns,
422 altered_table,
423 table);
424 if (r == 0) {
425 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
426 for (uint32_t i = 0; i < num_dropped_columns; i++) {
427 uint32_t curr_dropped_index = dropped_columns[i];
428 Field* curr_dropped_field = table->field[curr_dropped_index];
429 TOKUDB_TRACE(
430 "Dropped column: index %d, name %s",
431 curr_dropped_index,
432 curr_dropped_field->field_name.str);
433 }
434 }
435 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
436 }
437 } else if ((ctx->handler_flags &
438 ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
439 only_flags(
440 ctx->handler_flags,
441 ALTER_COLUMN_EQUAL_PACK_LENGTH |
442 ALTER_COLUMN_DEFAULT) &&
443 table->s->fields == altered_table->s->fields &&
444 find_changed_fields(
445 table,
446 altered_table,
447 ctx->changed_fields) > 0 &&
448 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
449
450 // change column length
451 if (change_length_is_supported(
452 table,
453 altered_table,
454 ha_alter_info, ctx)) {
455 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
456 }
457 } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) &&
458 only_flags(
459 ctx->handler_flags,
460 ALTER_STORED_COLUMN_TYPE |
461 ALTER_COLUMN_DEFAULT) &&
462 table->s->fields == altered_table->s->fields &&
463 find_changed_fields(
464 table,
465 altered_table,
466 ctx->changed_fields) > 0 &&
467 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
468
469 // change column type
470 if (change_type_is_supported(
471 table,
472 altered_table,
473 ha_alter_info, ctx)) {
474 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
475 }
476 } else if (only_flags(
477 ctx->handler_flags,
478 ALTER_CHANGE_CREATE_OPTION)) {
479
480 HA_CREATE_INFO* create_info = ha_alter_info->create_info;
481#if TOKU_INCLUDE_OPTION_STRUCTS
482 // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
483 // compression
484 if (create_info->option_struct->row_format !=
485 table_share->option_struct->row_format)
486 create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
487#endif
488 // alter auto_increment
489 if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
490 // do a sanity check that the table is what we think it is
491 if (tables_have_same_keys_and_columns(
492 table,
493 altered_table,
494 tokudb::sysvars::alter_print_error(thd) != 0)) {
495 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
496 }
497 } else if (only_flags(
498 create_info->used_fields,
499 HA_CREATE_USED_ROW_FORMAT)) {
500 // alter row_format
501 // do a sanity check that the table is what we think it is
502 if (tables_have_same_keys_and_columns(
503 table,
504 altered_table,
505 tokudb::sysvars::alter_print_error(thd) != 0)) {
506 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
507 }
508 }
509 }
510#if TOKU_OPTIMIZE_WITH_RECREATE
511 else if (only_flags(
512 ctx->handler_flags,
513 ALTER_RECREATE_TABLE |
514 ALTER_COLUMN_DEFAULT)) {
515 ctx->optimize_needed = true;
516 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
517 or algorithm=instant for non-InnoDB engine */
518 result = HA_ALTER_INPLACE_COPY_NO_LOCK;
519 }
520#endif
521
522 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
523 result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
524 table->s->null_bytes != altered_table->s->null_bytes) {
525
526 TOKUDB_HANDLER_TRACE("q %s", thd->query());
527 TOKUDB_HANDLER_TRACE(
528 "null bytes %u -> %u",
529 table->s->null_bytes,
530 altered_table->s->null_bytes);
531 }
532
533 // turn a not supported result into an error if the slow alter table
534 // (copy) is disabled
535 if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
536 tokudb::sysvars::disable_slow_alter(thd)) {
537 print_error(HA_ERR_UNSUPPORTED, MYF(0));
538 result = HA_ALTER_ERROR;
539 }
540
541 DBUG_RETURN(result);
542}
543
544// Prepare for the alter operations
545bool ha_tokudb::prepare_inplace_alter_table(
546 TABLE* altered_table,
547 Alter_inplace_info* ha_alter_info) {
548
549 TOKUDB_HANDLER_DBUG_ENTER("");
550 tokudb_alter_ctx* ctx =
551 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
552 assert_always(transaction); // transaction must exist after table is locked
553 ctx->alter_txn = transaction;
554 bool result = false; // success
555 DBUG_RETURN(result);
556}
557
558// Execute the alter operations.
559bool ha_tokudb::inplace_alter_table(
560 TABLE* altered_table,
561 Alter_inplace_info* ha_alter_info) {
562
563 TOKUDB_HANDLER_DBUG_ENTER("");
564
565 int error = 0;
566 tokudb_alter_ctx* ctx =
567 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
568 HA_CREATE_INFO* create_info = ha_alter_info->create_info;
569
570 // this should be enough to handle locking as the higher level MDL
571 // on this table should prevent any new analyze tasks.
572 share->cancel_background_jobs();
573
574 if (error == 0 &&
575 (ctx->handler_flags &
576 (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX |
577 ALTER_DROP_UNIQUE_INDEX))) {
578 error = alter_table_drop_index(altered_table, ha_alter_info);
579 }
580 if (error == 0 &&
581 (ctx->handler_flags &
582 (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
583 ALTER_ADD_UNIQUE_INDEX))) {
584 error = alter_table_add_index(altered_table, ha_alter_info);
585 }
586 if (error == 0 &&
587 (ctx->handler_flags &
588 (ALTER_ADD_COLUMN |
589 ALTER_DROP_COLUMN))) {
590 error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
591 }
592 if (error == 0 &&
593 (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
594 (create_info->used_fields & HA_CREATE_USED_AUTO)) {
595 error = write_auto_inc_create(
596 share->status_block,
597 create_info->auto_increment_value,
598 ctx->alter_txn);
599 }
600 if (error == 0 &&
601 (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
602 (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
603 // Get the current compression
604 DB *db = share->key_file[0];
605 error = db->get_compression_method(db, &ctx->orig_compression_method);
606 assert_always(error == 0);
607
608 // Set the new compression
609#if TOKU_INCLUDE_OPTION_STRUCTS
610 toku_compression_method method =
611 row_format_to_toku_compression_method(
612 (tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
613#else
614 toku_compression_method method =
615 row_type_to_toku_compression_method(create_info->row_type);
616#endif
617 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
618 for (uint32_t i = 0; i < curr_num_DBs; i++) {
619 db = share->key_file[i];
620 error = db->change_compression_method(db, method);
621 if (error)
622 break;
623 ctx->compression_changed = true;
624 }
625 }
626
627 // note: only one column expansion is allowed
628
629 if (error == 0 && ctx->expand_fixed_update_needed)
630 error = alter_table_expand_columns(altered_table, ha_alter_info);
631
632 if (error == 0 && ctx->expand_varchar_update_needed)
633 error = alter_table_expand_varchar_offsets(
634 altered_table,
635 ha_alter_info);
636
637 if (error == 0 && ctx->expand_blob_update_needed)
638 error = alter_table_expand_blobs(altered_table, ha_alter_info);
639
640 if (error == 0 && ctx->reset_card) {
641 error = tokudb::alter_card(
642 share->status_block,
643 ctx->alter_txn,
644 table->s,
645 altered_table->s);
646 }
647 if (error == 0 && ctx->optimize_needed) {
648 error = do_optimize(ha_thd());
649 }
650
651#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
652 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
653#if WITH_PARTITION_STORAGE_ENGINE
654 if (error == 0 &&
655 (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
656#else
657 if (error == 0) {
658#endif
659 error = write_frm_data(
660 share->status_block,
661 ctx->alter_txn,
662 altered_table->s->path.str);
663 }
664#endif
665
666 bool result = false; // success
667 if (error) {
668 print_error(error, MYF(0));
669 result = true; // failure
670 }
671
672 DBUG_RETURN(result);
673}
674
675int ha_tokudb::alter_table_add_index(
676 TABLE* altered_table,
677 Alter_inplace_info* ha_alter_info) {
678
679 // sort keys in add index order
680 KEY* key_info = (KEY*)tokudb::memory::malloc(
681 sizeof(KEY) * ha_alter_info->index_add_count,
682 MYF(MY_WME));
683 for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
684 KEY *key = &key_info[i];
685 *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
686 for (KEY_PART_INFO* key_part = key->key_part;
687 key_part < key->key_part + key->user_defined_key_parts;
688 key_part++) {
689 key_part->field = table->field[key_part->fieldnr];
690 }
691 }
692
693 tokudb_alter_ctx* ctx =
694 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
695 ctx->add_index_changed = true;
696 int error = tokudb_add_index(
697 table,
698 key_info,
699 ha_alter_info->index_add_count,
700 ctx->alter_txn,
701 &ctx->incremented_num_DBs,
702 &ctx->modified_DBs);
703 if (error == HA_ERR_FOUND_DUPP_KEY) {
704 // hack for now, in case of duplicate key error,
705 // because at the moment we cannot display the right key
706 // information to the user, so that he knows potentially what went
707 // wrong.
708 last_dup_key = MAX_KEY;
709 }
710
711 tokudb::memory::free(key_info);
712
713 if (error == 0)
714 ctx->reset_card = true;
715
716 return error;
717}
718
719static bool find_index_of_key(
720 const char* key_name,
721 TABLE* table,
722 uint* index_offset_ptr) {
723
724 for (uint i = 0; i < table->s->keys; i++) {
725 if (strcmp(key_name, table->key_info[i].name.str) == 0) {
726 *index_offset_ptr = i;
727 return true;
728 }
729 }
730 return false;
731}
732
733static bool find_index_of_key(
734 const char* key_name,
735 KEY* key_info,
736 uint key_count,
737 uint* index_offset_ptr) {
738
739 for (uint i = 0; i < key_count; i++) {
740 if (strcmp(key_name, key_info[i].name.str) == 0) {
741 *index_offset_ptr = i;
742 return true;
743 }
744 }
745 return false;
746}
747
748int ha_tokudb::alter_table_drop_index(
749 TABLE* altered_table,
750 Alter_inplace_info* ha_alter_info) {
751
752 KEY *key_info = table->key_info;
753 // translate key names to indexes into the key_info array
754 uint index_drop_offsets[ha_alter_info->index_drop_count];
755 for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
756 bool found;
757 found = find_index_of_key(
758 ha_alter_info->index_drop_buffer[i]->name.str,
759 table,
760 &index_drop_offsets[i]);
761 if (!found) {
762 // undo of add key in partition engine
763 found = find_index_of_key(
764 ha_alter_info->index_drop_buffer[i]->name.str,
765 ha_alter_info->key_info_buffer,
766 ha_alter_info->key_count,
767 &index_drop_offsets[i]);
768 assert_always(found);
769 key_info = ha_alter_info->key_info_buffer;
770 }
771 }
772
773 // drop indexes
774 tokudb_alter_ctx* ctx =
775 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
776 ctx->drop_index_changed = true;
777
778 int error = drop_indexes(
779 table,
780 index_drop_offsets,
781 ha_alter_info->index_drop_count,
782 key_info,
783 ctx->alter_txn);
784
785 if (error == 0)
786 ctx->reset_card = true;
787
788 return error;
789}
790
791int ha_tokudb::alter_table_add_or_drop_column(
792 TABLE* altered_table,
793 Alter_inplace_info* ha_alter_info) {
794
795 tokudb_alter_ctx* ctx =
796 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
797 int error;
798 uchar *column_extra = NULL;
799 uint32_t max_column_extra_size;
800 uint32_t num_column_extra;
801 uint32_t num_columns = 0;
802 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
803 // set size such that we know it is big enough for both cases
804 uint32_t columns[table->s->fields + altered_table->s->fields];
805 memset(columns, 0, sizeof(columns));
806
807 // generate the array of columns
808 if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) {
809 find_changed_columns(
810 columns,
811 &num_columns,
812 altered_table,
813 table);
814 } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) {
815 find_changed_columns(
816 columns,
817 &num_columns,
818 table,
819 altered_table);
820 } else {
821 assert_unreachable();
822 }
823 max_column_extra_size =
824 // max static row_mutator
825 STATIC_ROW_MUTATOR_SIZE +
826 // max dynamic row_mutator
827 4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
828 // max static blob size
829 (4 + share->kc_info.num_blobs) +
830 // max dynamic blob size
831 (num_columns*(1+4+1+4));
832 column_extra = (uchar*)tokudb::memory::malloc(
833 max_column_extra_size,
834 MYF(MY_WME));
835 if (column_extra == NULL) {
836 error = ENOMEM;
837 goto cleanup;
838 }
839
840 for (uint32_t i = 0; i < curr_num_DBs; i++) {
841 // change to a new descriptor
842 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
843 error = new_row_descriptor(
844 table,
845 altered_table,
846 ha_alter_info,
847 i,
848 &row_descriptor);
849 if (error)
850 goto cleanup;
851 error = share->key_file[i]->change_descriptor(
852 share->key_file[i],
853 ctx->alter_txn,
854 &row_descriptor,
855 0);
856 tokudb::memory::free(row_descriptor.data);
857 if (error)
858 goto cleanup;
859
860 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
861 num_column_extra = fill_row_mutator(
862 column_extra,
863 columns,
864 num_columns,
865 altered_table,
866 ctx->altered_table_kc_info,
867 i,
868 // true if adding columns, otherwise is a drop
869 (ha_alter_info->handler_flags &
870 ALTER_ADD_COLUMN) != 0);
871
872 DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
873 column_dbt.data = column_extra;
874 column_dbt.size = num_column_extra;
875 DBUG_ASSERT(num_column_extra <= max_column_extra_size);
876 error = share->key_file[i]->update_broadcast(
877 share->key_file[i],
878 ctx->alter_txn,
879 &column_dbt,
880 DB_IS_RESETTING_OP);
881 if (error) {
882 goto cleanup;
883 }
884 }
885 }
886
887 error = 0;
888 cleanup:
889 tokudb::memory::free(column_extra);
890 return error;
891}
892
893// Commit or abort the alter operations.
894// If commit then write the new frm data to the status using the alter
895// transaction.
896// If abort then abort the alter transaction and try to rollback the
897// non-transactional changes.
898bool ha_tokudb::commit_inplace_alter_table(
899 TABLE* altered_table,
900 Alter_inplace_info* ha_alter_info,
901 bool commit) {
902
903 TOKUDB_HANDLER_DBUG_ENTER("");
904
905 tokudb_alter_ctx* ctx =
906 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
907 bool result = false; // success
908 THD *thd = ha_thd();
909
910 if (commit) {
911#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
912 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
913 (100000 <= MYSQL_VERSION_ID)
914 if (ha_alter_info->group_commit_ctx) {
915 ha_alter_info->group_commit_ctx = NULL;
916 }
917#endif
918#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
919 (100000 <= MYSQL_VERSION_ID)
920#if WITH_PARTITION_STORAGE_ENGINE
921 if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
922#else
923 if (true) {
924#endif
925 int error = write_frm_data(
926 share->status_block,
927 ctx->alter_txn,
928 altered_table->s->path.str);
929 if (error) {
930 commit = false;
931 result = true;
932 print_error(error, MYF(0));
933 }
934 }
935#endif
936 }
937
938 if (!commit) {
939 if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
940 (ctx->add_index_changed || ctx->drop_index_changed ||
941 ctx->compression_changed)) {
942
943 // get exclusive lock no matter what
944#if defined(MARIADB_BASE_VERSION)
945 killed_state saved_killed_state = thd->killed;
946 thd->killed = NOT_KILLED;
947 for (volatile uint i = 0;
948 wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
949 i++) {
950 if (thd->killed != NOT_KILLED)
951 thd->killed = NOT_KILLED;
952 sleep(1);
953 }
954 assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
955 if (thd->killed == NOT_KILLED)
956 thd->killed = saved_killed_state;
957#else
958 THD::killed_state saved_killed_state = thd->killed;
959 thd->killed = THD::NOT_KILLED;
960 // MySQL does not handle HA_EXTRA_NOT_USED so we use
961 // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
962 // the partition storage engine and is treated as a NOP by tokudb
963 for (volatile uint i = 0;
964 wait_while_table_is_used(
965 thd,
966 table,
967 HA_EXTRA_PREPARE_FOR_RENAME);
968 i++) {
969 if (thd->killed != THD::NOT_KILLED)
970 thd->killed = THD::NOT_KILLED;
971 sleep(1);
972 }
973 assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
974 if (thd->killed == THD::NOT_KILLED)
975 thd->killed = saved_killed_state;
976#endif
977 }
978
979 // abort the alter transaction NOW so that any alters are rolled back.
980 // this allows the following restores to work.
981 tokudb_trx_data* trx =
982 (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
983 assert_always(ctx->alter_txn == trx->stmt);
984 assert_always(trx->tokudb_lock_count > 0);
985 // for partitioned tables, we use a single transaction to do all of the
986 // partition changes. the tokudb_lock_count is a reference count for
987 // each of the handlers to the same transaction. obviously, we want
988 // to only abort once.
989 if (trx->tokudb_lock_count > 0) {
990 if (--trx->tokudb_lock_count <= trx->create_lock_count) {
991 trx->create_lock_count = 0;
992 abort_txn(ctx->alter_txn);
993 ctx->alter_txn = NULL;
994 trx->stmt = NULL;
995 trx->sub_sp_level = NULL;
996 }
997 transaction = NULL;
998 }
999
1000 if (ctx->add_index_changed) {
1001 restore_add_index(
1002 table,
1003 ha_alter_info->index_add_count,
1004 ctx->incremented_num_DBs,
1005 ctx->modified_DBs);
1006 }
1007 if (ctx->drop_index_changed) {
1008 // translate key names to indexes into the key_info array
1009 uint index_drop_offsets[ha_alter_info->index_drop_count];
1010 for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
1011 bool found = find_index_of_key(
1012 ha_alter_info->index_drop_buffer[i]->name.str,
1013 table,
1014 &index_drop_offsets[i]);
1015 assert_always(found);
1016 }
1017 restore_drop_indexes(
1018 table,
1019 index_drop_offsets,
1020 ha_alter_info->index_drop_count);
1021 }
1022 if (ctx->compression_changed) {
1023 uint32_t curr_num_DBs =
1024 table->s->keys + tokudb_test(hidden_primary_key);
1025 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1026 DB *db = share->key_file[i];
1027 int error = db->change_compression_method(
1028 db,
1029 ctx->orig_compression_method);
1030 assert_always(error == 0);
1031 }
1032 }
1033 }
1034 DBUG_RETURN(result);
1035}
1036
1037// Setup the altered table's key and col info.
1038int ha_tokudb::setup_kc_info(
1039 TABLE* altered_table,
1040 KEY_AND_COL_INFO* altered_kc_info) {
1041
1042 int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
1043 if (error == 0)
1044 error = initialize_key_and_col_info(
1045 altered_table->s,
1046 altered_table,
1047 altered_kc_info,
1048 hidden_primary_key,
1049 primary_key);
1050 return error;
1051}
1052
1053// Expand the variable length fields offsets from 1 to 2 bytes.
1054int ha_tokudb::alter_table_expand_varchar_offsets(
1055 TABLE* altered_table,
1056 Alter_inplace_info* ha_alter_info) {
1057
1058 int error = 0;
1059 tokudb_alter_ctx* ctx =
1060 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1061
1062 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1063 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1064 // change to a new descriptor
1065 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1066 error = new_row_descriptor(
1067 table,
1068 altered_table,
1069 ha_alter_info,
1070 i,
1071 &row_descriptor);
1072 if (error)
1073 break;
1074 error = share->key_file[i]->change_descriptor(
1075 share->key_file[i],
1076 ctx->alter_txn,
1077 &row_descriptor,
1078 0);
1079 tokudb::memory::free(row_descriptor.data);
1080 if (error)
1081 break;
1082
1083 // for all trees that have values, make an update variable offsets
1084 // message and broadcast it into the tree
1085 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1086 uint32_t offset_start =
1087 table_share->null_bytes +
1088 share->kc_info.mcp_info[i].fixed_field_size;
1089 uint32_t offset_end =
1090 offset_start +
1091 share->kc_info.mcp_info[i].len_of_offsets;
1092 uint32_t number_of_offsets = offset_end - offset_start;
1093
1094 // make the expand variable offsets message
1095 DBT expand; memset(&expand, 0, sizeof expand);
1096 expand.size =
1097 sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
1098 expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1099 if (!expand.data) {
1100 error = ENOMEM;
1101 break;
1102 }
1103 uchar* expand_ptr = (uchar*)expand.data;
1104 expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
1105 expand_ptr += sizeof(uchar);
1106
1107 memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
1108 expand_ptr += sizeof(number_of_offsets);
1109
1110 memcpy(expand_ptr, &offset_start, sizeof(offset_start));
1111 expand_ptr += sizeof(offset_start);
1112
1113 // and broadcast it into the tree
1114 error = share->key_file[i]->update_broadcast(
1115 share->key_file[i],
1116 ctx->alter_txn,
1117 &expand,
1118 DB_IS_RESETTING_OP);
1119 tokudb::memory::free(expand.data);
1120 if (error)
1121 break;
1122 }
1123 }
1124
1125 return error;
1126}
1127
1128// Return true if a field is part of a key
1129static bool field_in_key(KEY *key, Field *field) {
1130 for (uint i = 0; i < key->user_defined_key_parts; i++) {
1131 KEY_PART_INFO *key_part = &key->key_part[i];
1132 if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0)
1133 return true;
1134 }
1135 return false;
1136}
1137
1138// Return true if a field is part of any key
1139static bool field_in_key_of_table(TABLE *table, Field *field) {
1140 for (uint i = 0; i < table->s->keys; i++) {
1141 if (field_in_key(&table->key_info[i], field))
1142 return true;
1143 }
1144 return false;
1145}
1146
1147// Return true if all changed varchar/varbinary field lengths can be changed
1148// inplace, otherwise return false
1149static bool change_varchar_length_is_supported(
1150 Field* old_field,
1151 Field* new_field,
1152 TABLE* table,
1153 TABLE* altered_table,
1154 Alter_inplace_info* ha_alter_info,
1155 tokudb_alter_ctx* ctx) {
1156
1157 if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
1158 new_field->real_type() != MYSQL_TYPE_VARCHAR ||
1159 old_field->binary() != new_field->binary() ||
1160 old_field->charset()->number != new_field->charset()->number ||
1161 old_field->field_length > new_field->field_length)
1162 return false;
1163 if (ctx->table_kc_info->num_offset_bytes >
1164 ctx->altered_table_kc_info->num_offset_bytes)
1165 return false; // shrink is not supported
1166 if (ctx->table_kc_info->num_offset_bytes <
1167 ctx->altered_table_kc_info->num_offset_bytes)
1168 // sum of varchar lengths changed from 1 to 2
1169 ctx->expand_varchar_update_needed = true;
1170 return true;
1171}
1172
1173// Return true if all changed field lengths can be changed inplace, otherwise
1174// return false
1175static bool change_length_is_supported(
1176 TABLE* table,
1177 TABLE* altered_table,
1178 Alter_inplace_info* ha_alter_info,
1179 tokudb_alter_ctx* ctx) {
1180
1181 if (table->s->fields != altered_table->s->fields)
1182 return false;
1183 if (table->s->null_bytes != altered_table->s->null_bytes)
1184 return false;
1185 if (ctx->changed_fields.elements() > 1)
1186 return false; // only support one field change
1187 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1188 ai < ctx->changed_fields.elements();
1189 ai++) {
1190 uint i = ctx->changed_fields.at(ai);
1191 Field *old_field = table->field[i];
1192 Field *new_field = altered_table->field[i];
1193 if (old_field->real_type() != new_field->real_type())
1194 return false; // no type conversions
1195 if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
1196 return false; // only varchar
1197 if (field_in_key_of_table(table, old_field) ||
1198 field_in_key_of_table(altered_table, new_field))
1199 return false; // not in any key
1200 if (!change_varchar_length_is_supported(
1201 old_field,
1202 new_field,
1203 table,
1204 altered_table,
1205 ha_alter_info,
1206 ctx))
1207 return false;
1208 }
1209
1210 return true;
1211}
1212
1213// Debug function that ensures that the array is sorted
1214static bool is_sorted(Dynamic_array<uint> &a) {
1215 bool r = true;
1216 if (a.elements() > 0) {
1217 uint lastelement = a.at(0);
1218 for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
1219 if (lastelement > a.at(i))
1220 r = false;
1221 }
1222 return r;
1223}
1224
1225int ha_tokudb::alter_table_expand_columns(
1226 TABLE* altered_table,
1227 Alter_inplace_info* ha_alter_info) {
1228
1229 int error = 0;
1230 tokudb_alter_ctx* ctx =
1231 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1232 // since we build the changed_fields array in field order, it must be sorted
1233 assert_always(is_sorted(ctx->changed_fields));
1234 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1235 error == 0 && ai < ctx->changed_fields.elements();
1236 ai++) {
1237 uint expand_field_num = ctx->changed_fields.at(ai);
1238 error = alter_table_expand_one_column(
1239 altered_table,
1240 ha_alter_info,
1241 expand_field_num);
1242 }
1243
1244 return error;
1245}
1246
1247// Return true if the field is an unsigned int
1248static bool is_unsigned(Field *f) {
1249 return (f->flags & UNSIGNED_FLAG) != 0;
1250}
1251
1252// Return the starting offset in the value for a particular index (selected by
1253// idx) of a particular field (selected by expand_field_num)
1254// TODO: replace this?
1255static uint32_t alter_table_field_offset(
1256 uint32_t null_bytes,
1257 KEY_AND_COL_INFO* kc_info,
1258 int idx,
1259 int expand_field_num) {
1260
1261 uint32_t offset = null_bytes;
1262 for (int i = 0; i < expand_field_num; i++) {
1263 if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
1264 continue;
1265 offset += kc_info->field_lengths[i];
1266 }
1267 return offset;
1268}
1269
1270// Send an expand message into all clustered indexes including the primary
1271int ha_tokudb::alter_table_expand_one_column(
1272 TABLE* altered_table,
1273 Alter_inplace_info* ha_alter_info,
1274 int expand_field_num) {
1275
1276 int error = 0;
1277 tokudb_alter_ctx* ctx =
1278 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1279
1280 Field *old_field = table->field[expand_field_num];
1281 TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
1282 Field *new_field = altered_table->field[expand_field_num];
1283 TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
1284 assert_always(old_field_type == new_field_type);
1285
1286 uchar operation;
1287 uchar pad_char;
1288 switch (old_field_type) {
1289 case toku_type_int:
1290 assert_always(is_unsigned(old_field) == is_unsigned(new_field));
1291 if (is_unsigned(old_field))
1292 operation = UPDATE_OP_EXPAND_UINT;
1293 else
1294 operation = UPDATE_OP_EXPAND_INT;
1295 pad_char = 0;
1296 break;
1297 case toku_type_fixstring:
1298 operation = UPDATE_OP_EXPAND_CHAR;
1299 pad_char = old_field->charset()->pad_char;
1300 break;
1301 case toku_type_fixbinary:
1302 operation = UPDATE_OP_EXPAND_BINARY;
1303 pad_char = 0;
1304 break;
1305 default:
1306 assert_unreachable();
1307 }
1308
1309 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1310 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1311 // change to a new descriptor
1312 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1313 error = new_row_descriptor(
1314 table,
1315 altered_table,
1316 ha_alter_info,
1317 i,
1318 &row_descriptor);
1319 if (error)
1320 break;
1321 error = share->key_file[i]->change_descriptor(
1322 share->key_file[i],
1323 ctx->alter_txn,
1324 &row_descriptor,
1325 0);
1326 tokudb::memory::free(row_descriptor.data);
1327 if (error)
1328 break;
1329
1330 // for all trees that have values, make an expand update message and
1331 // broadcast it into the tree
1332 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1333 uint32_t old_offset = alter_table_field_offset(
1334 table_share->null_bytes,
1335 ctx->table_kc_info,
1336 i,
1337 expand_field_num);
1338 uint32_t new_offset = alter_table_field_offset(
1339 table_share->null_bytes,
1340 ctx->altered_table_kc_info,
1341 i,
1342 expand_field_num);
1343 assert_always(old_offset <= new_offset);
1344
1345 uint32_t old_length =
1346 ctx->table_kc_info->field_lengths[expand_field_num];
1347 assert_always(old_length == old_field->pack_length());
1348
1349 uint32_t new_length =
1350 ctx->altered_table_kc_info->field_lengths[expand_field_num];
1351 assert_always(new_length == new_field->pack_length());
1352
1353 DBT expand; memset(&expand, 0, sizeof(expand));
1354 expand.size =
1355 sizeof(operation) + sizeof(new_offset) +
1356 sizeof(old_length) + sizeof(new_length);
1357 if (operation == UPDATE_OP_EXPAND_CHAR ||
1358 operation == UPDATE_OP_EXPAND_BINARY)
1359 expand.size += sizeof(pad_char);
1360 expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1361 if (!expand.data) {
1362 error = ENOMEM;
1363 break;
1364 }
1365 uchar *expand_ptr = (uchar *)expand.data;
1366 expand_ptr[0] = operation;
1367 expand_ptr += sizeof operation;
1368
1369 // for the first altered field, old_offset == new_offset.
1370 // for the subsequent altered fields, the new_offset
1371 // should be used as it includes the length changes from the
1372 // previous altered fields.
1373 memcpy(expand_ptr, &new_offset, sizeof(new_offset));
1374 expand_ptr += sizeof(new_offset);
1375
1376 memcpy(expand_ptr, &old_length, sizeof(old_length));
1377 expand_ptr += sizeof(old_length);
1378
1379 memcpy(expand_ptr, &new_length, sizeof(new_length));
1380 expand_ptr += sizeof(new_length);
1381
1382 if (operation == UPDATE_OP_EXPAND_CHAR ||
1383 operation == UPDATE_OP_EXPAND_BINARY) {
1384 memcpy(expand_ptr, &pad_char, sizeof(pad_char));
1385 expand_ptr += sizeof(pad_char);
1386 }
1387
1388 assert_always(expand_ptr == (uchar*)expand.data + expand.size);
1389
1390 // and broadcast it into the tree
1391 error = share->key_file[i]->update_broadcast(
1392 share->key_file[i],
1393 ctx->alter_txn,
1394 &expand,
1395 DB_IS_RESETTING_OP);
1396 tokudb::memory::free(expand.data);
1397 if (error)
1398 break;
1399 }
1400 }
1401
1402 return error;
1403}
1404
1405static void marshall_blob_lengths(
1406 tokudb::buffer& b,
1407 uint32_t n,
1408 TABLE* table,
1409 KEY_AND_COL_INFO* kc_info) {
1410
1411 for (uint i = 0; i < n; i++) {
1412 uint blob_field_index = kc_info->blob_fields[i];
1413 assert_always(blob_field_index < table->s->fields);
1414 uint8_t blob_field_length =
1415 table->s->field[blob_field_index]->row_pack_length();
1416 b.append(&blob_field_length, sizeof blob_field_length);
1417 }
1418}
1419
1420int ha_tokudb::alter_table_expand_blobs(
1421 TABLE* altered_table,
1422 Alter_inplace_info* ha_alter_info) {
1423
1424 int error = 0;
1425 tokudb_alter_ctx* ctx =
1426 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1427
1428 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1429 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1430 // change to a new descriptor
1431 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1432 error = new_row_descriptor(
1433 table,
1434 altered_table,
1435 ha_alter_info,
1436 i,
1437 &row_descriptor);
1438 if (error)
1439 break;
1440 error = share->key_file[i]->change_descriptor(
1441 share->key_file[i],
1442 ctx->alter_txn,
1443 &row_descriptor,
1444 0);
1445 tokudb::memory::free(row_descriptor.data);
1446 if (error)
1447 break;
1448
1449 // for all trees that have values, make an update blobs message and
1450 // broadcast it into the tree
1451 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1452 tokudb::buffer b;
1453 uint8_t op = UPDATE_OP_EXPAND_BLOB;
1454 b.append(&op, sizeof op);
1455 b.append_ui<uint32_t>(
1456 table->s->null_bytes +
1457 ctx->table_kc_info->mcp_info[i].fixed_field_size);
1458 uint32_t var_offset_bytes =
1459 ctx->table_kc_info->mcp_info[i].len_of_offsets;
1460 b.append_ui<uint32_t>(var_offset_bytes);
1461 b.append_ui<uint32_t>(
1462 var_offset_bytes == 0 ? 0 :
1463 ctx->table_kc_info->num_offset_bytes);
1464
1465 // add blobs info
1466 uint32_t num_blobs = ctx->table_kc_info->num_blobs;
1467 b.append_ui<uint32_t>(num_blobs);
1468 marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
1469 marshall_blob_lengths(
1470 b,
1471 num_blobs,
1472 altered_table,
1473 ctx->altered_table_kc_info);
1474
1475 // and broadcast it into the tree
1476 DBT expand; memset(&expand, 0, sizeof expand);
1477 expand.data = b.data();
1478 expand.size = b.size();
1479 error = share->key_file[i]->update_broadcast(
1480 share->key_file[i],
1481 ctx->alter_txn,
1482 &expand,
1483 DB_IS_RESETTING_OP);
1484 if (error)
1485 break;
1486 }
1487 }
1488
1489 return error;
1490}
1491
1492// Return true if two fixed length fields can be changed inplace
1493static bool change_fixed_length_is_supported(
1494 TABLE* table,
1495 TABLE* altered_table,
1496 Field* old_field,
1497 Field* new_field,
1498 tokudb_alter_ctx* ctx) {
1499
1500 // no change in size is supported
1501 if (old_field->pack_length() == new_field->pack_length())
1502 return true;
1503 // shrink is not supported
1504 if (old_field->pack_length() > new_field->pack_length())
1505 return false;
1506 ctx->expand_fixed_update_needed = true;
1507 return true;
1508}
1509
1510static bool change_blob_length_is_supported(
1511 TABLE* table,
1512 TABLE* altered_table,
1513 Field* old_field,
1514 Field* new_field,
1515 tokudb_alter_ctx* ctx) {
1516
1517 // blob -> longer or equal length blob
1518 if (old_field->binary() && new_field->binary() &&
1519 old_field->pack_length() <= new_field->pack_length()) {
1520 ctx->expand_blob_update_needed = true;
1521 return true;
1522 }
1523 // text -> longer or equal length text
1524 if (!old_field->binary() && !new_field->binary() &&
1525 old_field->pack_length() <= new_field->pack_length() &&
1526 old_field->charset()->number == new_field->charset()->number) {
1527 ctx->expand_blob_update_needed = true;
1528 return true;
1529 }
1530 return false;
1531}
1532
1533// Return true if the MySQL type is an int or unsigned int type
1534static bool is_int_type(enum_field_types t) {
1535 switch (t) {
1536 case MYSQL_TYPE_TINY:
1537 case MYSQL_TYPE_SHORT:
1538 case MYSQL_TYPE_INT24:
1539 case MYSQL_TYPE_LONG:
1540 case MYSQL_TYPE_LONGLONG:
1541 return true;
1542 default:
1543 return false;
1544 }
1545}
1546
1547// Return true if two field types can be changed inplace
1548static bool change_field_type_is_supported(
1549 Field* old_field,
1550 Field* new_field,
1551 TABLE* table,
1552 TABLE* altered_table,
1553 Alter_inplace_info* ha_alter_info,
1554 tokudb_alter_ctx* ctx) {
1555
1556 enum_field_types old_type = old_field->real_type();
1557 enum_field_types new_type = new_field->real_type();
1558 if (is_int_type(old_type)) {
1559 // int and unsigned int expansion
1560 if (is_int_type(new_type) &&
1561 is_unsigned(old_field) == is_unsigned(new_field))
1562 return change_fixed_length_is_supported(
1563 table,
1564 altered_table,
1565 old_field,
1566 new_field,
1567 ctx);
1568 else
1569 return false;
1570 } else if (old_type == MYSQL_TYPE_STRING) {
1571 // char(X) -> char(Y) and binary(X) -> binary(Y) expansion
1572 if (new_type == MYSQL_TYPE_STRING &&
1573 old_field->binary() == new_field->binary() &&
1574 old_field->charset()->number == new_field->charset()->number)
1575 return change_fixed_length_is_supported(
1576 table,
1577 altered_table,
1578 old_field,
1579 new_field,
1580 ctx);
1581 else
1582 return false;
1583 } else if (old_type == MYSQL_TYPE_VARCHAR) {
1584 // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
1585 // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
1586 // these cases
1587 return change_varchar_length_is_supported(
1588 old_field,
1589 new_field,
1590 table,
1591 altered_table,
1592 ha_alter_info,
1593 ctx);
1594 } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
1595 return change_blob_length_is_supported(
1596 table,
1597 altered_table,
1598 old_field,
1599 new_field,
1600 ctx);
1601 } else
1602 return false;
1603}
1604
1605// Return true if all changed field types can be changed inplace
1606static bool change_type_is_supported(
1607 TABLE* table,
1608 TABLE* altered_table,
1609 Alter_inplace_info* ha_alter_info,
1610 tokudb_alter_ctx* ctx) {
1611
1612 if (table->s->null_bytes != altered_table->s->null_bytes)
1613 return false;
1614 if (table->s->fields != altered_table->s->fields)
1615 return false;
1616 if (ctx->changed_fields.elements() > 1)
1617 return false; // only support one field change
1618 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1619 ai < ctx->changed_fields.elements();
1620 ai++) {
1621 uint i = ctx->changed_fields.at(ai);
1622 Field *old_field = table->field[i];
1623 Field *new_field = altered_table->field[i];
1624 if (field_in_key_of_table(table, old_field) ||
1625 field_in_key_of_table(altered_table, new_field))
1626 return false;
1627 if (!change_field_type_is_supported(
1628 old_field,
1629 new_field,
1630 table,
1631 altered_table,
1632 ha_alter_info,
1633 ctx))
1634 return false;
1635 }
1636 return true;
1637}
1638
1639// Allocate and initialize a new descriptor for a dictionary in the altered
1640// table identified with idx.
1641// Return the new descriptor in the row_descriptor DBT.
1642// Return non-zero on error.
1643int ha_tokudb::new_row_descriptor(
1644 TABLE* table,
1645 TABLE* altered_table,
1646 Alter_inplace_info* ha_alter_info,
1647 uint32_t idx,
1648 DBT* row_descriptor) {
1649
1650 int error = 0;
1651 tokudb_alter_ctx* ctx =
1652 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1653 row_descriptor->size =
1654 get_max_desc_size(ctx->altered_table_kc_info, altered_table);
1655 row_descriptor->data =
1656 (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
1657 if (row_descriptor->data == NULL) {
1658 error = ENOMEM;
1659 } else {
1660 KEY* prim_key =
1661 hidden_primary_key ? NULL :
1662 &altered_table->s->key_info[primary_key];
1663 if (idx == primary_key) {
1664 row_descriptor->size = create_main_key_descriptor(
1665 (uchar*)row_descriptor->data,
1666 prim_key,
1667 hidden_primary_key,
1668 primary_key,
1669 altered_table,
1670 ctx->altered_table_kc_info);
1671 } else {
1672 row_descriptor->size = create_secondary_key_descriptor(
1673 (uchar*)row_descriptor->data,
1674 &altered_table->key_info[idx],
1675 prim_key,
1676 hidden_primary_key,
1677 altered_table,
1678 primary_key,
1679 idx,
1680 ctx->altered_table_kc_info);
1681 }
1682 error = 0;
1683 }
1684 return error;
1685}
1686
1687#endif
1688