1/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2018, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/** @file handler.cc
18
19 @brief
20 Handler-calling-functions
21*/
22
23#include "mariadb.h"
24#include "sql_priv.h"
25#include "unireg.h"
26#include "rpl_rli.h"
27#include "sql_cache.h" // query_cache, query_cache_*
28#include "sql_connect.h" // global_table_stats
29#include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp
30#include "sql_table.h" // build_table_filename
31#include "sql_parse.h" // check_stack_overrun
32#include "sql_acl.h" // SUPER_ACL
33#include "sql_base.h" // TDC_element
34#include "discover.h" // extension_based_table_discovery, etc
35#include "log_event.h" // *_rows_log_event
36#include "create_options.h"
37#include <myisampack.h>
38#include "transaction.h"
39#include "myisam.h"
40#include "probes_mysql.h"
41#include <mysql/psi/mysql_table.h>
42#include "debug_sync.h" // DEBUG_SYNC
43#include "sql_audit.h"
44#include "ha_sequence.h"
45
46#ifdef WITH_PARTITION_STORAGE_ENGINE
47#include "ha_partition.h"
48#endif
49
50#ifdef WITH_ARIA_STORAGE_ENGINE
51#include "../storage/maria/ha_maria.h"
52#endif
53#include "semisync_master.h"
54
55#include "wsrep_mysqld.h"
56#include "wsrep.h"
57#include "wsrep_xid.h"
58
59/*
60 While we have legacy_db_type, we have this array to
61 check for dups and to find handlerton from legacy_db_type.
62 Remove when legacy_db_type is finally gone
63*/
64st_plugin_int *hton2plugin[MAX_HA];
65
66static handlerton *installed_htons[128];
67
68#define BITMAP_STACKBUF_SIZE (128/8)
69
70KEY_CREATE_INFO default_key_create_info=
71{ HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true };
72
73/* number of entries in handlertons[] */
74ulong total_ha= 0;
75/* number of storage engines (from handlertons[]) that support 2pc */
76ulong total_ha_2pc= 0;
77#ifndef DBUG_OFF
78/*
79 Number of non-mandatory 2pc handlertons whose initialization failed
80 to estimate total_ha_2pc value under supposition of the failures
81 have not occcured.
82*/
83ulong failed_ha_2pc= 0;
84#endif
85/* size of savepoint storage area (see ha_init) */
86ulong savepoint_alloc_size= 0;
87
88static const LEX_CSTRING sys_table_aliases[]=
89{
90 { STRING_WITH_LEN("INNOBASE") }, { STRING_WITH_LEN("INNODB") },
91 { STRING_WITH_LEN("HEAP") }, { STRING_WITH_LEN("MEMORY") },
92 { STRING_WITH_LEN("MERGE") }, { STRING_WITH_LEN("MRG_MYISAM") },
93 { STRING_WITH_LEN("Maria") }, { STRING_WITH_LEN("Aria") },
94 {NullS, 0}
95};
96
97const char *ha_row_type[] = {
98 "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE"
99};
100
101const char *tx_isolation_names[] =
102{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
103 NullS};
104TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
105 tx_isolation_names, NULL};
106
107static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
108uint known_extensions_id= 0;
109
110static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
111 bool is_real_trans);
112
113
114static plugin_ref ha_default_plugin(THD *thd)
115{
116 if (thd->variables.table_plugin)
117 return thd->variables.table_plugin;
118 return my_plugin_lock(thd, global_system_variables.table_plugin);
119}
120
121static plugin_ref ha_default_tmp_plugin(THD *thd)
122{
123 if (thd->variables.tmp_table_plugin)
124 return thd->variables.tmp_table_plugin;
125 if (global_system_variables.tmp_table_plugin)
126 return my_plugin_lock(thd, global_system_variables.tmp_table_plugin);
127 return ha_default_plugin(thd);
128}
129
130
131/** @brief
132 Return the default storage engine handlerton for thread
133
134 SYNOPSIS
135 ha_default_handlerton(thd)
136 thd current thread
137
138 RETURN
139 pointer to handlerton
140*/
141handlerton *ha_default_handlerton(THD *thd)
142{
143 plugin_ref plugin= ha_default_plugin(thd);
144 DBUG_ASSERT(plugin);
145 handlerton *hton= plugin_hton(plugin);
146 DBUG_ASSERT(hton);
147 return hton;
148}
149
150
151handlerton *ha_default_tmp_handlerton(THD *thd)
152{
153 plugin_ref plugin= ha_default_tmp_plugin(thd);
154 DBUG_ASSERT(plugin);
155 handlerton *hton= plugin_hton(plugin);
156 DBUG_ASSERT(hton);
157 return hton;
158}
159
160
161/** @brief
162 Return the storage engine handlerton for the supplied name
163
164 SYNOPSIS
165 ha_resolve_by_name(thd, name)
166 thd current thread
167 name name of storage engine
168
169 RETURN
170 pointer to storage engine plugin handle
171*/
172plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
173 bool tmp_table)
174{
175 const LEX_CSTRING *table_alias;
176 plugin_ref plugin;
177
178redo:
179 /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
180 if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
181 (const uchar *)name->str, name->length,
182 (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
183 return tmp_table ? ha_default_tmp_plugin(thd) : ha_default_plugin(thd);
184
185 if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
186 {
187 handlerton *hton= plugin_hton(plugin);
188 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
189 return plugin;
190
191 /*
192 unlocking plugin immediately after locking is relatively low cost.
193 */
194 plugin_unlock(thd, plugin);
195 }
196
197 /*
198 We check for the historical aliases.
199 */
200 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
201 {
202 if (!my_strnncoll(&my_charset_latin1,
203 (const uchar *)name->str, name->length,
204 (const uchar *)table_alias->str, table_alias->length))
205 {
206 name= table_alias + 1;
207 goto redo;
208 }
209 }
210
211 return NULL;
212}
213
214
215plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
216{
217 if (hton)
218 {
219 st_plugin_int *plugin= hton2plugin[hton->slot];
220 return my_plugin_lock(thd, plugin_int_to_ref(plugin));
221 }
222 return NULL;
223}
224
225
226handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
227{
228 plugin_ref plugin;
229 switch (db_type) {
230 case DB_TYPE_DEFAULT:
231 return ha_default_handlerton(thd);
232 default:
233 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
234 (plugin= ha_lock_engine(thd, installed_htons[db_type])))
235 return plugin_hton(plugin);
236 /* fall through */
237 case DB_TYPE_UNKNOWN:
238 return NULL;
239 }
240}
241
242
243/**
244 Use other database handler if databasehandler is not compiled in.
245*/
246handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute)
247{
248 if (ha_storage_engine_is_enabled(hton))
249 return hton;
250
251 if (no_substitute)
252 return NULL;
253
254 return ha_default_handlerton(thd);
255} /* ha_checktype */
256
257
258handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
259 handlerton *db_type)
260{
261 handler *file;
262 DBUG_ENTER("get_new_handler");
263 DBUG_PRINT("enter", ("alloc: %p", alloc));
264
265 if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
266 {
267 if ((file= db_type->create(db_type, share, alloc)))
268 file->init();
269 DBUG_RETURN(file);
270 }
271 /*
272 Try the default table type
273 Here the call to current_thd() is ok as we call this function a lot of
274 times but we enter this branch very seldom.
275 */
276 file= get_new_handler(share, alloc, ha_default_handlerton(current_thd));
277 DBUG_RETURN(file);
278}
279
280
281#ifdef WITH_PARTITION_STORAGE_ENGINE
282handler *get_ha_partition(partition_info *part_info)
283{
284 ha_partition *partition;
285 DBUG_ENTER("get_ha_partition");
286 if ((partition= new ha_partition(partition_hton, part_info)))
287 {
288 if (partition->initialize_partition(current_thd->mem_root))
289 {
290 delete partition;
291 partition= 0;
292 }
293 else
294 partition->init();
295 }
296 else
297 {
298 my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR),
299 static_cast<int>(sizeof(ha_partition)));
300 }
301 DBUG_RETURN(((handler*) partition));
302}
303#endif
304
305static const char **handler_errmsgs;
306
307C_MODE_START
308static const char **get_handler_errmsgs(int nr)
309{
310 return handler_errmsgs;
311}
312C_MODE_END
313
314
315/**
316 Register handler error messages for use with my_error().
317
318 @retval
319 0 OK
320 @retval
321 !=0 Error
322*/
323
324int ha_init_errors(void)
325{
326#define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
327
328 /* Allocate a pointer array for the error message strings. */
329 /* Zerofill it to avoid uninitialized gaps. */
330 if (! (handler_errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
331 MYF(MY_WME | MY_ZEROFILL))))
332 return 1;
333
334 /* Set the dedicated error messages. */
335 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
336 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
337 SETMSG(HA_ERR_RECORD_CHANGED, "Update which is recoverable");
338 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
339 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
340 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
341 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
342 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
343 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
344 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
345 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
346 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
347 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
348 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
349 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
350 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
351 SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
352 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
353 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
354 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
355 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
356 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
357 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
358 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
359 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
360 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
361 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
362 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
363 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
364 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
365 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
366 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
367 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
368 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
369 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
370 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
371 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
372 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
373 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
374 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
375 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
376 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
377 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
378 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
379 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
380 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
381 SETMSG(HA_ERR_DISK_FULL, ER_DEFAULT(ER_DISK_FULL));
382 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
383 SETMSG(HA_ERR_FK_DEPTH_EXCEEDED, "Foreign key cascade delete/update exceeds");
384 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
385
386 /* Register the error messages for use with my_error(). */
387 return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
388}
389
390
391/**
392 Unregister handler error messages.
393
394 @retval
395 0 OK
396 @retval
397 !=0 Error
398*/
399static int ha_finish_errors(void)
400{
401 /* Allocate a pointer array for the error message strings. */
402 my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
403 my_free(handler_errmsgs);
404 handler_errmsgs= 0;
405 return 0;
406}
407
408static volatile int32 need_full_discover_for_existence= 0;
409static volatile int32 engines_with_discover_file_names= 0;
410static volatile int32 engines_with_discover= 0;
411
412static int full_discover_for_existence(handlerton *, const char *, const char *)
413{ return 0; }
414
415static int ext_based_existence(handlerton *, const char *, const char *)
416{ return 0; }
417
418static int hton_ext_based_table_discovery(handlerton *hton, LEX_CSTRING *db,
419 MY_DIR *dir, handlerton::discovered_list *result)
420{
421 /*
422 tablefile_extensions[0] is the metadata file, see
423 the comment above tablefile_extensions declaration
424 */
425 return extension_based_table_discovery(dir, hton->tablefile_extensions[0],
426 result);
427}
428
429static void update_discovery_counters(handlerton *hton, int val)
430{
431 if (hton->discover_table_existence == full_discover_for_existence)
432 my_atomic_add32(&need_full_discover_for_existence, val);
433
434 if (hton->discover_table_names && hton->tablefile_extensions[0])
435 my_atomic_add32(&engines_with_discover_file_names, val);
436
437 if (hton->discover_table)
438 my_atomic_add32(&engines_with_discover, val);
439}
440
441int ha_finalize_handlerton(st_plugin_int *plugin)
442{
443 handlerton *hton= (handlerton *)plugin->data;
444 DBUG_ENTER("ha_finalize_handlerton");
445
446 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
447 if (!hton)
448 goto end;
449
450 switch (hton->state) {
451 case SHOW_OPTION_NO:
452 case SHOW_OPTION_DISABLED:
453 break;
454 case SHOW_OPTION_YES:
455 if (installed_htons[hton->db_type] == hton)
456 installed_htons[hton->db_type]= NULL;
457 break;
458 };
459
460 if (hton->panic)
461 hton->panic(hton, HA_PANIC_CLOSE);
462
463 if (plugin->plugin->deinit)
464 {
465 /*
466 Today we have no defined/special behavior for uninstalling
467 engine plugins.
468 */
469 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
470 if (plugin->plugin->deinit(NULL))
471 {
472 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
473 plugin->name.str));
474 }
475 }
476
477 free_sysvar_table_options(hton);
478 update_discovery_counters(hton, -1);
479
480 /*
481 In case a plugin is uninstalled and re-installed later, it should
482 reuse an array slot. Otherwise the number of uninstall/install
483 cycles would be limited.
484 */
485 if (hton->slot != HA_SLOT_UNDEF)
486 {
487 /* Make sure we are not unpluging another plugin */
488 DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
489 DBUG_ASSERT(hton->slot < MAX_HA);
490 hton2plugin[hton->slot]= NULL;
491 }
492
493 my_free(hton);
494
495 end:
496 DBUG_RETURN(0);
497}
498
499
500int ha_initialize_handlerton(st_plugin_int *plugin)
501{
502 handlerton *hton;
503 static const char *no_exts[]= { 0 };
504 DBUG_ENTER("ha_initialize_handlerton");
505 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
506
507 hton= (handlerton *)my_malloc(sizeof(handlerton),
508 MYF(MY_WME | MY_ZEROFILL));
509 if (hton == NULL)
510 {
511 sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
512 plugin->name.str);
513 goto err_no_hton_memory;
514 }
515
516 hton->tablefile_extensions= no_exts;
517 hton->discover_table_names= hton_ext_based_table_discovery;
518
519 hton->slot= HA_SLOT_UNDEF;
520 /* Historical Requirement */
521 plugin->data= hton; // shortcut for the future
522 if (plugin->plugin->init && plugin->plugin->init(hton))
523 {
524 sql_print_error("Plugin '%s' init function returned error.",
525 plugin->name.str);
526 goto err;
527 }
528
529 // hton_ext_based_table_discovery() works only when discovery
530 // is supported and the engine if file-based.
531 if (hton->discover_table_names == hton_ext_based_table_discovery &&
532 (!hton->discover_table || !hton->tablefile_extensions[0]))
533 hton->discover_table_names= NULL;
534
535 // default discover_table_existence implementation
536 if (!hton->discover_table_existence && hton->discover_table)
537 {
538 if (hton->tablefile_extensions[0])
539 hton->discover_table_existence= ext_based_existence;
540 else
541 hton->discover_table_existence= full_discover_for_existence;
542 }
543
544 switch (hton->state) {
545 case SHOW_OPTION_NO:
546 break;
547 case SHOW_OPTION_YES:
548 {
549 uint tmp;
550 ulong fslot;
551
552 DBUG_EXECUTE_IF("unstable_db_type", {
553 static int i= (int) DB_TYPE_FIRST_DYNAMIC;
554 hton->db_type= (enum legacy_db_type)++i;
555 });
556
557 /* now check the db_type for conflict */
558 if (hton->db_type <= DB_TYPE_UNKNOWN ||
559 hton->db_type >= DB_TYPE_DEFAULT ||
560 installed_htons[hton->db_type])
561 {
562 int idx= (int) DB_TYPE_FIRST_DYNAMIC;
563
564 while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
565 idx++;
566
567 if (idx == (int) DB_TYPE_DEFAULT)
568 {
569 sql_print_warning("Too many storage engines!");
570 goto err_deinit;
571 }
572 if (hton->db_type != DB_TYPE_UNKNOWN)
573 sql_print_warning("Storage engine '%s' has conflicting typecode. "
574 "Assigning value %d.", plugin->plugin->name, idx);
575 hton->db_type= (enum legacy_db_type) idx;
576 }
577
578 /*
579 In case a plugin is uninstalled and re-installed later, it should
580 reuse an array slot. Otherwise the number of uninstall/install
581 cycles would be limited. So look for a free slot.
582 */
583 DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
584 for (fslot= 0; fslot < total_ha; fslot++)
585 {
586 if (!hton2plugin[fslot])
587 break;
588 }
589 if (fslot < total_ha)
590 hton->slot= fslot;
591 else
592 {
593 if (total_ha >= MAX_HA)
594 {
595 sql_print_error("Too many plugins loaded. Limit is %lu. "
596 "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
597 goto err_deinit;
598 }
599 hton->slot= total_ha++;
600 }
601 installed_htons[hton->db_type]= hton;
602 tmp= hton->savepoint_offset;
603 hton->savepoint_offset= savepoint_alloc_size;
604 savepoint_alloc_size+= tmp;
605 hton2plugin[hton->slot]=plugin;
606 if (hton->prepare)
607 {
608 total_ha_2pc++;
609 if (tc_log && tc_log != get_tc_log_implementation())
610 {
611 total_ha_2pc--;
612 hton->prepare= 0;
613 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
614 ER_UNKNOWN_ERROR,
615 "Cannot enable tc-log at run-time. "
616 "XA features of %s are disabled",
617 plugin->name.str);
618 }
619 }
620 break;
621 }
622 /* fall through */
623 default:
624 hton->state= SHOW_OPTION_DISABLED;
625 break;
626 }
627
628 /*
629 This is entirely for legacy. We will create a new "disk based" hton and a
630 "memory" hton which will be configurable longterm. We should be able to
631 remove partition.
632 */
633 switch (hton->db_type) {
634 case DB_TYPE_HEAP:
635 heap_hton= hton;
636 break;
637 case DB_TYPE_MYISAM:
638 myisam_hton= hton;
639 break;
640 case DB_TYPE_PARTITION_DB:
641 partition_hton= hton;
642 break;
643 case DB_TYPE_SEQUENCE:
644 sql_sequence_hton= hton;
645 break;
646 default:
647 break;
648 };
649
650 resolve_sysvar_table_options(hton);
651 update_discovery_counters(hton, 1);
652
653 DBUG_RETURN(0);
654
655err_deinit:
656 /*
657 Let plugin do its inner deinitialization as plugin->init()
658 was successfully called before.
659 */
660 if (plugin->plugin->deinit)
661 (void) plugin->plugin->deinit(NULL);
662
663err:
664#ifndef DBUG_OFF
665 if (hton->prepare && hton->state == SHOW_OPTION_YES)
666 failed_ha_2pc++;
667#endif
668 my_free(hton);
669err_no_hton_memory:
670 plugin->data= NULL;
671 DBUG_RETURN(1);
672}
673
674int ha_init()
675{
676 int error= 0;
677 DBUG_ENTER("ha_init");
678
679 DBUG_ASSERT(total_ha < MAX_HA);
680 /*
681 Check if there is a transaction-capable storage engine besides the
682 binary log (which is considered a transaction-capable storage engine in
683 counting total_ha)
684 */
685 opt_using_transactions= total_ha>(ulong)opt_bin_log;
686 savepoint_alloc_size+= sizeof(SAVEPOINT);
687 DBUG_RETURN(error);
688}
689
690int ha_end()
691{
692 int error= 0;
693 DBUG_ENTER("ha_end");
694
695
696 /*
697 This should be eventualy based on the graceful shutdown flag.
698 So if flag is equal to HA_PANIC_CLOSE, the deallocate
699 the errors.
700 */
701 if (unlikely(ha_finish_errors()))
702 error= 1;
703
704 DBUG_RETURN(error);
705}
706
707static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
708 void *path)
709{
710 handlerton *hton= plugin_hton(plugin);
711 if (hton->state == SHOW_OPTION_YES && hton->drop_database)
712 hton->drop_database(hton, (char *)path);
713 return FALSE;
714}
715
716
717void ha_drop_database(char* path)
718{
719 plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
720}
721
722
723static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
724 void *disable)
725{
726 handlerton *hton= plugin_hton(plugin);
727 if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state)
728 hton->checkpoint_state(hton, (int) *(bool*) disable);
729 return FALSE;
730}
731
732
733void ha_checkpoint_state(bool disable)
734{
735 plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
736}
737
738
739struct st_commit_checkpoint_request {
740 void *cookie;
741 void (*pre_hook)(void *);
742};
743
744static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
745 void *data)
746{
747 st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
748 handlerton *hton= plugin_hton(plugin);
749 if (hton->state == SHOW_OPTION_YES && hton->commit_checkpoint_request)
750 {
751 void *cookie= st->cookie;
752 if (st->pre_hook)
753 (*st->pre_hook)(cookie);
754 (*hton->commit_checkpoint_request)(hton, cookie);
755 }
756 return FALSE;
757}
758
759
760/*
761 Invoke commit_checkpoint_request() in all storage engines that implement it.
762
763 If pre_hook is non-NULL, the hook will be called prior to each invocation.
764*/
765void
766ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
767{
768 st_commit_checkpoint_request st;
769 st.cookie= cookie;
770 st.pre_hook= pre_hook;
771 plugin_foreach(NULL, commit_checkpoint_request_handlerton,
772 MYSQL_STORAGE_ENGINE_PLUGIN, &st);
773}
774
775
776
777static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
778 void *unused)
779{
780 handlerton *hton= plugin_hton(plugin);
781 /*
782 there's no need to rollback here as all transactions must
783 be rolled back already
784 */
785 if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
786 {
787 if (hton->close_connection)
788 hton->close_connection(hton, thd);
789 /* make sure ha_data is reset and ha_data_lock is released */
790 thd_set_ha_data(thd, hton, NULL);
791 }
792 return FALSE;
793}
794
795/**
796 @note
797 don't bother to rollback here, it's done already
798*/
799void ha_close_connection(THD* thd)
800{
801 plugin_foreach_with_mask(thd, closecon_handlerton,
802 MYSQL_STORAGE_ENGINE_PLUGIN,
803 PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
804}
805
806static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
807 void *level)
808{
809 handlerton *hton= plugin_hton(plugin);
810
811 if (hton->state == SHOW_OPTION_YES && hton->kill_query &&
812 thd_get_ha_data(thd, hton))
813 hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
814 return FALSE;
815}
816
817void ha_kill_query(THD* thd, enum thd_kill_levels level)
818{
819 DBUG_ENTER("ha_kill_query");
820 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level);
821 DBUG_VOID_RETURN;
822}
823
824
825/* ========================================================================
826 ======================= TRANSACTIONS ===================================*/
827
828/**
829 Transaction handling in the server
830 ==================================
831
832 In each client connection, MySQL maintains two transactional
833 states:
834 - a statement transaction,
835 - a standard, also called normal transaction.
836
837 Historical note
838 ---------------
839 "Statement transaction" is a non-standard term that comes
840 from the times when MySQL supported BerkeleyDB storage engine.
841
842 First of all, it should be said that in BerkeleyDB auto-commit
843 mode auto-commits operations that are atomic to the storage
844 engine itself, such as a write of a record, and are too
845 high-granular to be atomic from the application perspective
846 (MySQL). One SQL statement could involve many BerkeleyDB
847 auto-committed operations and thus BerkeleyDB auto-commit was of
848 little use to MySQL.
849
850 Secondly, instead of SQL standard savepoints, BerkeleyDB
851 provided the concept of "nested transactions". In a nutshell,
852 transactions could be arbitrarily nested, but when the parent
853 transaction was committed or aborted, all its child (nested)
854 transactions were handled committed or aborted as well.
855 Commit of a nested transaction, in turn, made its changes
856 visible, but not durable: it destroyed the nested transaction,
857 all its changes would become available to the parent and
858 currently active nested transactions of this parent.
859
860 So the mechanism of nested transactions was employed to
861 provide "all or nothing" guarantee of SQL statements
862 required by the standard.
863 A nested transaction would be created at start of each SQL
864 statement, and destroyed (committed or aborted) at statement
865 end. Such nested transaction was internally referred to as
866 a "statement transaction" and gave birth to the term.
867
868 (Historical note ends)
869
870 Since then a statement transaction is started for each statement
871 that accesses transactional tables or uses the binary log. If
872 the statement succeeds, the statement transaction is committed.
873 If the statement fails, the transaction is rolled back. Commits
874 of statement transactions are not durable -- each such
875 transaction is nested in the normal transaction, and if the
876 normal transaction is rolled back, the effects of all enclosed
877 statement transactions are undone as well. Technically,
878 a statement transaction can be viewed as a savepoint which is
879 maintained automatically in order to make effects of one
880 statement atomic.
881
882 The normal transaction is started by the user and is ended
883 usually upon a user request as well. The normal transaction
884 encloses transactions of all statements issued between
885 its beginning and its end.
886 In autocommit mode, the normal transaction is equivalent
887 to the statement transaction.
888
889 Since MySQL supports PSEA (pluggable storage engine
890 architecture), more than one transactional engine can be
891 active at a time. Hence transactions, from the server
892 point of view, are always distributed. In particular,
893 transactional state is maintained independently for each
894 engine. In order to commit a transaction the two phase
895 commit protocol is employed.
896
897 Not all statements are executed in context of a transaction.
898 Administrative and status information statements do not modify
899 engine data, and thus do not start a statement transaction and
900 also have no effect on the normal transaction. Examples of such
901 statements are SHOW STATUS and RESET SLAVE.
902
903 Similarly DDL statements are not transactional,
904 and therefore a transaction is [almost] never started for a DDL
905 statement. The difference between a DDL statement and a purely
906 administrative statement though is that a DDL statement always
907 commits the current transaction before proceeding, if there is
908 any.
909
910 At last, SQL statements that work with non-transactional
911 engines also have no effect on the transaction state of the
912 connection. Even though they are written to the binary log,
913 and the binary log is, overall, transactional, the writes
914 are done in "write-through" mode, directly to the binlog
915 file, followed with a OS cache sync, in other words,
916 bypassing the binlog undo log (translog).
917 They do not commit the current normal transaction.
918 A failure of a statement that uses non-transactional tables
919 would cause a rollback of the statement transaction, but
920 in case there no non-transactional tables are used,
921 no statement transaction is started.
922
923 Data layout
924 -----------
925
926 The server stores its transaction-related data in
927 thd->transaction. This structure has two members of type
928 THD_TRANS. These members correspond to the statement and
929 normal transactions respectively:
930
931 - thd->transaction.stmt contains a list of engines
932 that are participating in the given statement
933 - thd->transaction.all contains a list of engines that
934 have participated in any of the statement transactions started
935 within the context of the normal transaction.
936 Each element of the list contains a pointer to the storage
937 engine, engine-specific transactional data, and engine-specific
938 transaction flags.
939
940 In autocommit mode thd->transaction.all is empty.
941 Instead, data of thd->transaction.stmt is
942 used to commit/rollback the normal transaction.
943
944 The list of registered engines has a few important properties:
945 - no engine is registered in the list twice
946 - engines are present in the list a reverse temporal order --
947 new participants are always added to the beginning of the list.
948
949 Transaction life cycle
950 ----------------------
951
952 When a new connection is established, thd->transaction
953 members are initialized to an empty state.
954 If a statement uses any tables, all affected engines
955 are registered in the statement engine list. In
956 non-autocommit mode, the same engines are registered in
957 the normal transaction list.
958 At the end of the statement, the server issues a commit
959 or a roll back for all engines in the statement list.
960 At this point transaction flags of an engine, if any, are
961 propagated from the statement list to the list of the normal
962 transaction.
963 When commit/rollback is finished, the statement list is
964 cleared. It will be filled in again by the next statement,
965 and emptied again at the next statement's end.
966
967 The normal transaction is committed in a similar way
968 (by going over all engines in thd->transaction.all list)
969 but at different times:
970 - upon COMMIT SQL statement is issued by the user
971 - implicitly, by the server, at the beginning of a DDL statement
972 or SET AUTOCOMMIT={0|1} statement.
973
974 The normal transaction can be rolled back as well:
975 - if the user has requested so, by issuing ROLLBACK SQL
976 statement
977 - if one of the storage engines requested a rollback
978 by setting thd->transaction_rollback_request. This may
979 happen in case, e.g., when the transaction in the engine was
980 chosen a victim of the internal deadlock resolution algorithm
981 and rolled back internally. When such a situation happens, there
982 is little the server can do and the only option is to rollback
983 transactions in all other participating engines. In this case
984 the rollback is accompanied by an error sent to the user.
985
986 As follows from the use cases above, the normal transaction
987 is never committed when there is an outstanding statement
988 transaction. In most cases there is no conflict, since
989 commits of the normal transaction are issued by a stand-alone
990 administrative or DDL statement, thus no outstanding statement
991 transaction of the previous statement exists. Besides,
992 all statements that manipulate with the normal transaction
993 are prohibited in stored functions and triggers, therefore
994 no conflicting situation can occur in a sub-statement either.
995 The remaining rare cases when the server explicitly has
996 to commit the statement transaction prior to committing the normal
997 one cover error-handling scenarios (see for example
998 SQLCOM_LOCK_TABLES).
999
1000 When committing a statement or a normal transaction, the server
1001 either uses the two-phase commit protocol, or issues a commit
1002 in each engine independently. The two-phase commit protocol
1003 is used only if:
1004 - all participating engines support two-phase commit (provide
1005 handlerton::prepare PSEA API call) and
1006 - transactions in at least two engines modify data (i.e. are
1007 not read-only).
1008
1009 Note that the two phase commit is used for
1010 statement transactions, even though they are not durable anyway.
1011 This is done to ensure logical consistency of data in a multiple-
1012 engine transaction.
1013 For example, imagine that some day MySQL supports unique
1014 constraint checks deferred till the end of statement. In such
1015 case a commit in one of the engines may yield ER_DUP_KEY,
1016 and MySQL should be able to gracefully abort statement
1017 transactions of other participants.
1018
1019 After the normal transaction has been committed,
1020 thd->transaction.all list is cleared.
1021
1022 When a connection is closed, the current normal transaction, if
1023 any, is rolled back.
1024
1025 Roles and responsibilities
1026 --------------------------
1027
1028 The server has no way to know that an engine participates in
1029 the statement and a transaction has been started
1030 in it unless the engine says so. Thus, in order to be
1031 a part of a transaction, the engine must "register" itself.
1032 This is done by invoking trans_register_ha() server call.
1033 Normally the engine registers itself whenever handler::external_lock()
1034 is called. trans_register_ha() can be invoked many times: if
1035 an engine is already registered, the call does nothing.
1036 In case autocommit is not set, the engine must register itself
1037 twice -- both in the statement list and in the normal transaction
1038 list.
1039 In which list to register is a parameter of trans_register_ha().
1040
1041 Note, that although the registration interface in itself is
1042 fairly clear, the current usage practice often leads to undesired
1043 effects. E.g. since a call to trans_register_ha() in most engines
1044 is embedded into implementation of handler::external_lock(), some
1045 DDL statements start a transaction (at least from the server
1046 point of view) even though they are not expected to. E.g.
1047 CREATE TABLE does not start a transaction, since
1048 handler::external_lock() is never called during CREATE TABLE. But
1049 CREATE TABLE ... SELECT does, since handler::external_lock() is
1050 called for the table that is being selected from. This has no
1051 practical effects currently, but must be kept in mind
1052 nevertheless.
1053
1054 Once an engine is registered, the server will do the rest
1055 of the work.
1056
1057 During statement execution, whenever any of data-modifying
1058 PSEA API methods is used, e.g. handler::write_row() or
1059 handler::update_row(), the read-write flag is raised in the
1060 statement transaction for the involved engine.
1061 Currently All PSEA calls are "traced", and the data can not be
1062 changed in a way other than issuing a PSEA call. Important:
1063 unless this invariant is preserved the server will not know that
1064 a transaction in a given engine is read-write and will not
1065 involve the two-phase commit protocol!
1066
1067 At the end of a statement, server call trans_commit_stmt is
1068 invoked. This call in turn invokes handlerton::prepare()
1069 for every involved engine. Prepare is followed by a call
1070 to handlerton::commit_one_phase() If a one-phase commit
1071 will suffice, handlerton::prepare() is not invoked and
1072 the server only calls handlerton::commit_one_phase().
1073 At statement commit, the statement-related read-write
1074 engine flag is propagated to the corresponding flag in the
1075 normal transaction. When the commit is complete, the list
1076 of registered engines is cleared.
1077
1078 Rollback is handled in a similar fashion.
1079
1080 Additional notes on DDL and the normal transaction.
1081 ---------------------------------------------------
1082
1083 DDLs and operations with non-transactional engines
1084 do not "register" in thd->transaction lists, and thus do not
1085 modify the transaction state. Besides, each DDL in
1086 MySQL is prefixed with an implicit normal transaction commit
1087 (a call to trans_commit_implicit()), and thus leaves nothing
1088 to modify.
1089 However, as it has been pointed out with CREATE TABLE .. SELECT,
1090 some DDL statements can start a *new* transaction.
1091
1092 Behaviour of the server in this case is currently badly
1093 defined.
1094 DDL statements use a form of "semantic" logging
1095 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1096 the newly created table is deleted.
1097 In addition, some DDL statements issue interim transaction
1098 commits: e.g. ALTER TABLE issues a commit after data is copied
1099 from the original table to the internal temporary table. Other
1100 statements, e.g. CREATE TABLE ... SELECT do not always commit
1101 after itself.
1102 And finally there is a group of DDL statements such as
1103 RENAME/DROP TABLE that doesn't start a new transaction
1104 and doesn't commit.
1105
1106 This diversity makes it hard to say what will happen if
1107 by chance a stored function is invoked during a DDL --
1108 whether any modifications it makes will be committed or not
1109 is not clear. Fortunately, SQL grammar of few DDLs allows
1110 invocation of a stored function.
1111
1112 A consistent behaviour is perhaps to always commit the normal
1113 transaction after all DDLs, just like the statement transaction
1114 is always committed at the end of all statements.
1115*/
1116
1117/**
1118 Register a storage engine for a transaction.
1119
1120 Every storage engine MUST call this function when it starts
1121 a transaction or a statement (that is it must be called both for the
1122 "beginning of transaction" and "beginning of statement").
1123 Only storage engines registered for the transaction/statement
1124 will know when to commit/rollback it.
1125
1126 @note
1127 trans_register_ha is idempotent - storage engine may register many
1128 times per transaction.
1129
1130*/
1131void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
1132{
1133 THD_TRANS *trans;
1134 Ha_trx_info *ha_info;
1135 DBUG_ENTER("trans_register_ha");
1136 DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1137
1138 if (all)
1139 {
1140 trans= &thd->transaction.all;
1141 thd->server_status|= SERVER_STATUS_IN_TRANS;
1142 if (thd->tx_read_only)
1143 thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1144 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1145 }
1146 else
1147 trans= &thd->transaction.stmt;
1148
1149 ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1150
1151 if (ha_info->is_started())
1152 DBUG_VOID_RETURN; /* already registered, return */
1153
1154 ha_info->register_ha(trans, ht_arg);
1155
1156 trans->no_2pc|=(ht_arg->prepare==0);
1157 if (thd->transaction.xid_state.xid.is_null())
1158 thd->transaction.xid_state.xid.set(thd->query_id);
1159 DBUG_VOID_RETURN;
1160}
1161
1162
1163static int prepare_or_error(handlerton *ht, THD *thd, bool all)
1164{
1165 int err= ht->prepare(ht, thd, all);
1166 status_var_increment(thd->status_var.ha_prepare_count);
1167 if (err)
1168 {
1169 /* avoid sending error, if we're going to replay the transaction */
1170#ifdef WITH_WSREP
1171 if (ht != wsrep_hton ||
1172 err == EMSGSIZE || thd->wsrep_conflict_state != MUST_REPLAY)
1173#endif
1174 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1175 }
1176 return err;
1177}
1178
1179
1180/**
1181 @retval
1182 0 ok
1183 @retval
1184 1 error, transaction was rolled back
1185*/
1186int ha_prepare(THD *thd)
1187{
1188 int error=0, all=1;
1189 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1190 Ha_trx_info *ha_info= trans->ha_list;
1191 DBUG_ENTER("ha_prepare");
1192
1193 if (ha_info)
1194 {
1195 for (; ha_info; ha_info= ha_info->next())
1196 {
1197 handlerton *ht= ha_info->ht();
1198 if (ht->prepare)
1199 {
1200 if (unlikely(prepare_or_error(ht, thd, all)))
1201 {
1202 ha_rollback_trans(thd, all);
1203 error=1;
1204 break;
1205 }
1206 }
1207 else
1208 {
1209 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1210 ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO),
1211 HA_ERR_WRONG_COMMAND,
1212 ha_resolve_storage_engine_name(ht));
1213
1214 }
1215 }
1216 }
1217
1218 DBUG_RETURN(error);
1219}
1220
1221/**
1222 Check if we can skip the two-phase commit.
1223
1224 A helper function to evaluate if two-phase commit is mandatory.
1225 As a side effect, propagates the read-only/read-write flags
1226 of the statement transaction to its enclosing normal transaction.
1227
1228 If we have at least two engines with read-write changes we must
1229 run a two-phase commit. Otherwise we can run several independent
1230 commits as the only transactional engine has read-write changes
1231 and others are read-only.
1232
1233 @retval 0 All engines are read-only.
1234 @retval 1 We have the only engine with read-write changes.
1235 @retval >1 More than one engine have read-write changes.
1236 Note: return value might NOT be the exact number of
1237 engines with read-write changes.
1238*/
1239
1240static
1241uint
1242ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1243 bool all)
1244{
1245 /* The number of storage engines that have actual changes. */
1246 unsigned rw_ha_count= 0;
1247 Ha_trx_info *ha_info;
1248
1249 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1250 {
1251 if (ha_info->is_trx_read_write())
1252 ++rw_ha_count;
1253
1254 if (! all)
1255 {
1256 Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1257 DBUG_ASSERT(ha_info != ha_info_all);
1258 /*
1259 Merge read-only/read-write information about statement
1260 transaction to its enclosing normal transaction. Do this
1261 only if in a real transaction -- that is, if we know
1262 that ha_info_all is registered in thd->transaction.all.
1263 Since otherwise we only clutter the normal transaction flags.
1264 */
1265 if (ha_info_all->is_started()) /* FALSE if autocommit. */
1266 ha_info_all->coalesce_trx_with(ha_info);
1267 }
1268 else if (rw_ha_count > 1)
1269 {
1270 /*
1271 It is a normal transaction, so we don't need to merge read/write
1272 information up, and the need for two-phase commit has been
1273 already established. Break the loop prematurely.
1274 */
1275 break;
1276 }
1277 }
1278 return rw_ha_count;
1279}
1280
1281
1282/**
1283 @retval
1284 0 ok
1285 @retval
1286 1 transaction was rolled back
1287 @retval
1288 2 error during commit, data may be inconsistent
1289
1290 @todo
1291 Since we don't support nested statement transactions in 5.0,
1292 we can't commit or rollback stmt transactions while we are inside
1293 stored functions or triggers. So we simply do nothing now.
1294 TODO: This should be fixed in later ( >= 5.1) releases.
1295*/
1296int ha_commit_trans(THD *thd, bool all)
1297{
1298 int error= 0, cookie;
1299 /*
1300 'all' means that this is either an explicit commit issued by
1301 user, or an implicit commit issued by a DDL.
1302 */
1303 THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1304 /*
1305 "real" is a nick name for a transaction for which a commit will
1306 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1307 transation is not 'real': even though it's possible to commit it,
1308 the changes are not durable as they might be rolled back if the
1309 enclosing 'all' transaction is rolled back.
1310 */
1311 bool is_real_trans= ((all || thd->transaction.all.ha_list == 0) &&
1312 !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1313 Ha_trx_info *ha_info= trans->ha_list;
1314 bool need_prepare_ordered, need_commit_ordered;
1315 my_xid xid;
1316 DBUG_ENTER("ha_commit_trans");
1317 DBUG_PRINT("info",("thd: %p option_bits: %lu all: %d",
1318 thd, (ulong) thd->variables.option_bits, all));
1319
1320 /* Just a random warning to test warnings pushed during autocommit. */
1321 DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
1322 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1323 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1324 ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)););
1325
1326 DBUG_PRINT("info",
1327 ("all: %d thd->in_sub_stmt: %d ha_info: %p is_real_trans: %d",
1328 all, thd->in_sub_stmt, ha_info, is_real_trans));
1329 /*
1330 We must not commit the normal transaction if a statement
1331 transaction is pending. Otherwise statement transaction
1332 flags will not get propagated to its normal transaction's
1333 counterpart.
1334 */
1335 DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1336 trans == &thd->transaction.stmt);
1337
1338 if (thd->in_sub_stmt)
1339 {
1340 DBUG_ASSERT(0);
1341 /*
1342 Since we don't support nested statement transactions in 5.0,
1343 we can't commit or rollback stmt transactions while we are inside
1344 stored functions or triggers. So we simply do nothing now.
1345 TODO: This should be fixed in later ( >= 5.1) releases.
1346 */
1347 if (!all)
1348 DBUG_RETURN(0);
1349 /*
1350 We assume that all statements which commit or rollback main transaction
1351 are prohibited inside of stored functions or triggers. So they should
1352 bail out with error even before ha_commit_trans() call. To be 100% safe
1353 let us throw error in non-debug builds.
1354 */
1355 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1356 DBUG_RETURN(2);
1357 }
1358
1359#ifdef WITH_ARIA_STORAGE_ENGINE
1360 ha_maria::implicit_commit(thd, TRUE);
1361#endif
1362
1363 if (!ha_info)
1364 {
1365 /*
1366 Free resources and perform other cleanup even for 'empty' transactions.
1367 */
1368 if (is_real_trans)
1369 thd->transaction.cleanup();
1370 DBUG_RETURN(0);
1371 }
1372
1373 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1374
1375 /* Close all cursors that can not survive COMMIT */
1376 if (is_real_trans) /* not a statement commit */
1377 thd->stmt_map.close_transient_cursors();
1378
1379 uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1380 /* rw_trans is TRUE when we in a transaction changing data */
1381 bool rw_trans= is_real_trans &&
1382 (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
1383 MDL_request mdl_request;
1384 DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
1385 is_real_trans, rw_trans, rw_ha_count));
1386
1387 if (rw_trans)
1388 {
1389 /*
1390 Acquire a metadata lock which will ensure that COMMIT is blocked
1391 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1392 COMMIT in progress blocks FTWRL).
1393
1394 We allow the owner of FTWRL to COMMIT; we assume that it knows
1395 what it does.
1396 */
1397 mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1398 MDL_EXPLICIT);
1399
1400 if (!WSREP(thd) &&
1401 thd->mdl_context.acquire_lock(&mdl_request,
1402 thd->variables.lock_wait_timeout))
1403 {
1404 ha_rollback_trans(thd, all);
1405 DBUG_RETURN(1);
1406 }
1407
1408 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1409 }
1410
1411 if (rw_trans &&
1412 opt_readonly &&
1413 !(thd->security_ctx->master_access & SUPER_ACL) &&
1414 !thd->slave_thread)
1415 {
1416 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1417 goto err;
1418 }
1419
1420#if 1 // FIXME: This should be done in ha_prepare().
1421 if (rw_trans || (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
1422 thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING))
1423 {
1424 ulonglong trx_start_id= 0, trx_end_id= 0;
1425 for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
1426 {
1427 if (ha_info->ht()->prepare_commit_versioned)
1428 {
1429 trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id);
1430 if (trx_end_id)
1431 break; // FIXME: use a common ID for cross-engine transactions
1432 }
1433 }
1434
1435 if (trx_end_id)
1436 {
1437 if (!TR_table::use_transaction_registry)
1438 {
1439 my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
1440 goto err;
1441 }
1442 DBUG_ASSERT(trx_start_id);
1443 TR_table trt(thd, true);
1444 if (trt.update(trx_start_id, trx_end_id))
1445 goto err;
1446 // Here, the call will not commit inside InnoDB. It is only working
1447 // around closing thd->transaction.stmt open by TR_table::open().
1448 if (all)
1449 commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
1450 }
1451 }
1452#endif
1453
1454 if (trans->no_2pc || (rw_ha_count <= 1))
1455 {
1456 error= ha_commit_one_phase(thd, all);
1457 goto done;
1458 }
1459
1460 need_prepare_ordered= FALSE;
1461 need_commit_ordered= FALSE;
1462 xid= thd->transaction.xid_state.xid.get_my_xid();
1463
1464 for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
1465 {
1466 handlerton *ht= hi->ht();
1467 /*
1468 Do not call two-phase commit if this particular
1469 transaction is read-only. This allows for simpler
1470 implementation in engines that are always read-only.
1471 */
1472 if (! hi->is_trx_read_write())
1473 continue;
1474 /*
1475 Sic: we know that prepare() is not NULL since otherwise
1476 trans->no_2pc would have been set.
1477 */
1478 if (unlikely(prepare_or_error(ht, thd, all)))
1479 goto err;
1480
1481 need_prepare_ordered|= (ht->prepare_ordered != NULL);
1482 need_commit_ordered|= (ht->commit_ordered != NULL);
1483 }
1484 DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
1485 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
1486
1487#ifdef WITH_WSREP
1488 if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid))
1489 {
1490 // xid was rewritten by wsrep
1491 xid= wsrep_xid_seqno(thd->transaction.xid_state.xid);
1492 }
1493#endif /* WITH_WSREP */
1494
1495 if (!is_real_trans)
1496 {
1497 error= commit_one_phase_2(thd, all, trans, is_real_trans);
1498 goto done;
1499 }
1500
1501 DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
1502 cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
1503 need_commit_ordered);
1504 if (!cookie)
1505 goto err;
1506
1507 DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
1508 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
1509
1510 error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
1511
1512 DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
1513 if (tc_log->unlog(cookie, xid))
1514 {
1515 error= 2; /* Error during commit */
1516 goto end;
1517 }
1518
1519done:
1520 DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1521
1522 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
1523 mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
1524 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
1525 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
1526#ifdef HAVE_REPLICATION
1527 repl_semisync_master.wait_after_commit(thd, all);
1528 DEBUG_SYNC(thd, "after_group_after_commit");
1529#endif
1530 goto end;
1531
1532 /* Come here if error and we need to rollback. */
1533err:
1534 error= 1; /* Transaction was rolled back */
1535 /*
1536 In parallel replication, rollback is delayed, as there is extra replication
1537 book-keeping to be done before rolling back and allowing a conflicting
1538 transaction to continue (MDEV-7458).
1539 */
1540 if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec))
1541 ha_rollback_trans(thd, all);
1542
1543end:
1544 if (rw_trans && mdl_request.ticket)
1545 {
1546 /*
1547 We do not always immediately release transactional locks
1548 after ha_commit_trans() (see uses of ha_enable_transaction()),
1549 thus we release the commit blocker lock as soon as it's
1550 not needed.
1551 */
1552 thd->mdl_context.release_lock(mdl_request.ticket);
1553 }
1554 DBUG_RETURN(error);
1555}
1556
1557/**
1558 @note
1559 This function does not care about global read lock. A caller should.
1560
1561 @param[in] all Is set in case of explicit commit
1562 (COMMIT statement), or implicit commit
1563 issued by DDL. Is not set when called
1564 at the end of statement, even if
1565 autocommit=1.
1566*/
1567
1568int ha_commit_one_phase(THD *thd, bool all)
1569{
1570 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1571 /*
1572 "real" is a nick name for a transaction for which a commit will
1573 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1574 transaction is not 'real': even though it's possible to commit it,
1575 the changes are not durable as they might be rolled back if the
1576 enclosing 'all' transaction is rolled back.
1577 We establish the value of 'is_real_trans' by checking
1578 if it's an explicit COMMIT/BEGIN statement, or implicit
1579 commit issued by DDL (all == TRUE), or if we're running
1580 in autocommit mode (it's only in the autocommit mode
1581 ha_commit_one_phase() can be called with an empty
1582 transaction.all.ha_list, see why in trans_register_ha()).
1583 */
1584 bool is_real_trans= ((all || thd->transaction.all.ha_list == 0) &&
1585 !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1586 int res;
1587 DBUG_ENTER("ha_commit_one_phase");
1588 if (is_real_trans)
1589 {
1590 DEBUG_SYNC(thd, "ha_commit_one_phase");
1591 if ((res= thd->wait_for_prior_commit()))
1592 DBUG_RETURN(res);
1593 }
1594 res= commit_one_phase_2(thd, all, trans, is_real_trans);
1595 DBUG_RETURN(res);
1596}
1597
1598
1599static int
1600commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
1601{
1602 int error= 0;
1603 uint count= 0;
1604 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1605 DBUG_ENTER("commit_one_phase_2");
1606 if (is_real_trans)
1607 DEBUG_SYNC(thd, "commit_one_phase_2");
1608 if (ha_info)
1609 {
1610 for (; ha_info; ha_info= ha_info_next)
1611 {
1612 int err;
1613 handlerton *ht= ha_info->ht();
1614 if ((err= ht->commit(ht, thd, all)))
1615 {
1616 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1617 error=1;
1618 }
1619 /* Should this be done only if is_real_trans is set ? */
1620 status_var_increment(thd->status_var.ha_commit_count);
1621 if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write())
1622 ++count;
1623 ha_info_next= ha_info->next();
1624 ha_info->reset(); /* keep it conveniently zero-filled */
1625 }
1626 trans->ha_list= 0;
1627 trans->no_2pc=0;
1628 if (all)
1629 {
1630#ifdef HAVE_QUERY_CACHE
1631 if (thd->transaction.changed_tables)
1632 query_cache.invalidate(thd, thd->transaction.changed_tables);
1633#endif
1634 }
1635 }
1636 /* Free resources and perform other cleanup even for 'empty' transactions. */
1637 if (is_real_trans)
1638 {
1639 thd->has_waiter= false;
1640 thd->transaction.cleanup();
1641 if (count >= 2)
1642 statistic_increment(transactions_multi_engine, LOCK_status);
1643 }
1644
1645 DBUG_RETURN(error);
1646}
1647
1648
1649int ha_rollback_trans(THD *thd, bool all)
1650{
1651 int error=0;
1652 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1653 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1654 /*
1655 "real" is a nick name for a transaction for which a commit will
1656 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1657 transaction is not 'real': even though it's possible to commit it,
1658 the changes are not durable as they might be rolled back if the
1659 enclosing 'all' transaction is rolled back.
1660 We establish the value of 'is_real_trans' by checking
1661 if it's an explicit COMMIT or BEGIN statement, or implicit
1662 commit issued by DDL (in these cases all == TRUE),
1663 or if we're running in autocommit mode (it's only in the autocommit mode
1664 ha_commit_one_phase() is called with an empty
1665 transaction.all.ha_list, see why in trans_register_ha()).
1666 */
1667 bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1668 DBUG_ENTER("ha_rollback_trans");
1669
1670 /*
1671 We must not rollback the normal transaction if a statement
1672 transaction is pending.
1673 */
1674 DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1675 trans == &thd->transaction.stmt);
1676
1677#ifdef HAVE_REPLICATION
1678 if (is_real_trans)
1679 {
1680 /*
1681 In parallel replication, if we need to rollback during commit, we must
1682 first inform following transactions that we are going to abort our commit
1683 attempt. Otherwise those following transactions can run too early, and
1684 possibly cause replication to fail. See comments in retry_event_group().
1685
1686 There were several bugs with this in the past that were very hard to
1687 track down (MDEV-7458, MDEV-8302). So we add here an assertion for
1688 rollback without signalling following transactions. And in release
1689 builds, we explicitly do the signalling before rolling back.
1690 */
1691 DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit));
1692 if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
1693 thd->rgi_slave->unmark_start_commit();
1694 }
1695#endif
1696
1697 if (thd->in_sub_stmt)
1698 {
1699 DBUG_ASSERT(0);
1700 /*
1701 If we are inside stored function or trigger we should not commit or
1702 rollback current statement transaction. See comment in ha_commit_trans()
1703 call for more information.
1704 */
1705 if (!all)
1706 DBUG_RETURN(0);
1707 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1708 DBUG_RETURN(1);
1709 }
1710
1711 if (ha_info)
1712 {
1713 /* Close all cursors that can not survive ROLLBACK */
1714 if (is_real_trans) /* not a statement commit */
1715 thd->stmt_map.close_transient_cursors();
1716
1717 for (; ha_info; ha_info= ha_info_next)
1718 {
1719 int err;
1720 handlerton *ht= ha_info->ht();
1721 if ((err= ht->rollback(ht, thd, all)))
1722 { // cannot happen
1723 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1724 error=1;
1725#ifdef WITH_WSREP
1726 WSREP_WARN("handlerton rollback failed, thd %llu %lld conf %d SQL %s",
1727 thd->thread_id, thd->query_id, thd->wsrep_conflict_state,
1728 thd->query());
1729#endif /* WITH_WSREP */
1730 }
1731 status_var_increment(thd->status_var.ha_rollback_count);
1732 ha_info_next= ha_info->next();
1733 ha_info->reset(); /* keep it conveniently zero-filled */
1734 }
1735 trans->ha_list= 0;
1736 trans->no_2pc=0;
1737 }
1738
1739 /*
1740 Thanks to possibility of MDL deadlock rollback request can come even if
1741 transaction hasn't been started in any transactional storage engine.
1742 */
1743 if (is_real_trans && thd->transaction_rollback_request &&
1744 thd->transaction.xid_state.xa_state != XA_NOTR)
1745 thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno();
1746
1747 /* Always cleanup. Even if nht==0. There may be savepoints. */
1748 if (is_real_trans)
1749 {
1750 thd->has_waiter= false;
1751 thd->transaction.cleanup();
1752 }
1753 if (all)
1754 thd->transaction_rollback_request= FALSE;
1755
1756 /*
1757 If a non-transactional table was updated, warn; don't warn if this is a
1758 slave thread (because when a slave thread executes a ROLLBACK, it has
1759 been read from the binary log, so it's 100% sure and normal to produce
1760 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1761 slave SQL thread, it would not stop the thread but just be printed in
1762 the error log; but we don't want users to wonder why they have this
1763 message in the error log, so we don't send it.
1764
1765 We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
1766 it doesn't matter if a warning is pushed to a system thread or not:
1767 No one will see it...
1768 */
1769 if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1770 !thd->slave_thread && thd->killed < KILL_CONNECTION)
1771 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1772 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1773 ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));
1774#ifdef HAVE_REPLICATION
1775 repl_semisync_master.wait_after_rollback(thd, all);
1776#endif
1777 DBUG_RETURN(error);
1778}
1779
1780
1781struct xahton_st {
1782 XID *xid;
1783 int result;
1784};
1785
1786static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1787 void *arg)
1788{
1789 handlerton *hton= plugin_hton(plugin);
1790 if (hton->state == SHOW_OPTION_YES && hton->recover)
1791 {
1792 hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1793 ((struct xahton_st *)arg)->result= 0;
1794 }
1795 return FALSE;
1796}
1797
1798static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1799 void *arg)
1800{
1801 handlerton *hton= plugin_hton(plugin);
1802 if (hton->state == SHOW_OPTION_YES && hton->recover)
1803 {
1804 hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1805 ((struct xahton_st *)arg)->result= 0;
1806 }
1807 return FALSE;
1808}
1809
1810
1811int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1812{
1813 struct xahton_st xaop;
1814 xaop.xid= xid;
1815 xaop.result= 1;
1816
1817 plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1818 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1819
1820 return xaop.result;
1821}
1822
1823
1824#ifndef DBUG_OFF
1825/**
1826 @note
1827 This does not need to be multi-byte safe or anything
1828*/
1829static char* xid_to_str(char *buf, XID *xid)
1830{
1831 int i;
1832 char *s=buf;
1833 *s++='\'';
1834 for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
1835 {
1836 uchar c=(uchar)xid->data[i];
1837 /* is_next_dig is set if next character is a number */
1838 bool is_next_dig= FALSE;
1839 if (i < XIDDATASIZE)
1840 {
1841 char ch= xid->data[i+1];
1842 is_next_dig= (ch >= '0' && ch <='9');
1843 }
1844 if (i == xid->gtrid_length)
1845 {
1846 *s++='\'';
1847 if (xid->bqual_length)
1848 {
1849 *s++='.';
1850 *s++='\'';
1851 }
1852 }
1853 if (c < 32 || c > 126)
1854 {
1855 *s++='\\';
1856 /*
1857 If next character is a number, write current character with
1858 3 octal numbers to ensure that the next number is not seen
1859 as part of the octal number
1860 */
1861 if (c > 077 || is_next_dig)
1862 *s++=_dig_vec_lower[c >> 6];
1863 if (c > 007 || is_next_dig)
1864 *s++=_dig_vec_lower[(c >> 3) & 7];
1865 *s++=_dig_vec_lower[c & 7];
1866 }
1867 else
1868 {
1869 if (c == '\'' || c == '\\')
1870 *s++='\\';
1871 *s++=c;
1872 }
1873 }
1874 *s++='\'';
1875 *s=0;
1876 return buf;
1877}
1878#endif
1879
1880/**
1881 recover() step of xa.
1882
1883 @note
1884 there are three modes of operation:
1885 - automatic recover after a crash
1886 in this case commit_list != 0, tc_heuristic_recover==0
1887 all xids from commit_list are committed, others are rolled back
1888 - manual (heuristic) recover
1889 in this case commit_list==0, tc_heuristic_recover != 0
1890 DBA has explicitly specified that all prepared transactions should
1891 be committed (or rolled back).
1892 - no recovery (MySQL did not detect a crash)
1893 in this case commit_list==0, tc_heuristic_recover == 0
1894 there should be no prepared transactions in this case.
1895*/
1896struct xarecover_st
1897{
1898 int len, found_foreign_xids, found_my_xids;
1899 XID *list;
1900 HASH *commit_list;
1901 bool dry_run;
1902};
1903
1904static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1905 void *arg)
1906{
1907 handlerton *hton= plugin_hton(plugin);
1908 struct xarecover_st *info= (struct xarecover_st *) arg;
1909 int got;
1910
1911 if (hton->state == SHOW_OPTION_YES && hton->recover)
1912 {
1913 while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1914 {
1915 sql_print_information("Found %d prepared transaction(s) in %s",
1916 got, hton_name(hton)->str);
1917 for (int i=0; i < got; i ++)
1918 {
1919 my_xid x= IF_WSREP(WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ?
1920 wsrep_xid_seqno(info->list[i]) :
1921 info->list[i].get_my_xid(),
1922 info->list[i].get_my_xid());
1923 if (!x) // not "mine" - that is generated by external TM
1924 {
1925#ifndef DBUG_OFF
1926 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1927 DBUG_PRINT("info", ("ignore xid %s", xid_to_str(buf, info->list+i)));
1928#endif
1929 xid_cache_insert(info->list+i, XA_PREPARED);
1930 info->found_foreign_xids++;
1931 continue;
1932 }
1933 if (info->dry_run)
1934 {
1935 info->found_my_xids++;
1936 continue;
1937 }
1938 // recovery mode
1939 if (info->commit_list ?
1940 my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1941 tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1942 {
1943#ifndef DBUG_OFF
1944 int rc=
1945#endif
1946 hton->commit_by_xid(hton, info->list+i);
1947#ifndef DBUG_OFF
1948 if (rc == 0)
1949 {
1950 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1951 DBUG_PRINT("info", ("commit xid %s", xid_to_str(buf, info->list+i)));
1952 }
1953#endif
1954 }
1955 else
1956 {
1957#ifndef DBUG_OFF
1958 int rc=
1959#endif
1960 hton->rollback_by_xid(hton, info->list+i);
1961#ifndef DBUG_OFF
1962 if (rc == 0)
1963 {
1964 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1965 DBUG_PRINT("info", ("rollback xid %s",
1966 xid_to_str(buf, info->list+i)));
1967 }
1968#endif
1969 }
1970 }
1971 if (got < info->len)
1972 break;
1973 }
1974 }
1975 return FALSE;
1976}
1977
1978int ha_recover(HASH *commit_list)
1979{
1980 struct xarecover_st info;
1981 DBUG_ENTER("ha_recover");
1982 info.found_foreign_xids= info.found_my_xids= 0;
1983 info.commit_list= commit_list;
1984 info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1985 info.list= NULL;
1986
1987 /* commit_list and tc_heuristic_recover cannot be set both */
1988 DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
1989 /* if either is set, total_ha_2pc must be set too */
1990 DBUG_ASSERT(info.dry_run ||
1991 (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
1992
1993 if (total_ha_2pc <= (ulong)opt_bin_log)
1994 DBUG_RETURN(0);
1995
1996 if (info.commit_list)
1997 sql_print_information("Starting crash recovery...");
1998
1999 for (info.len= MAX_XID_LIST_SIZE ;
2000 info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
2001 {
2002 info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
2003 }
2004 if (!info.list)
2005 {
2006 sql_print_error(ER(ER_OUTOFMEMORY),
2007 static_cast<int>(info.len*sizeof(XID)));
2008 DBUG_RETURN(1);
2009 }
2010
2011 plugin_foreach(NULL, xarecover_handlerton,
2012 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
2013
2014 my_free(info.list);
2015 if (info.found_foreign_xids)
2016 sql_print_warning("Found %d prepared XA transactions",
2017 info.found_foreign_xids);
2018 if (info.dry_run && info.found_my_xids)
2019 {
2020 sql_print_error("Found %d prepared transactions! It means that mysqld was "
2021 "not shut down properly last time and critical recovery "
2022 "information (last binlog or %s file) was manually deleted "
2023 "after a crash. You have to start mysqld with "
2024 "--tc-heuristic-recover switch to commit or rollback "
2025 "pending transactions.",
2026 info.found_my_xids, opt_tc_log_file);
2027 DBUG_RETURN(1);
2028 }
2029 if (info.commit_list)
2030 sql_print_information("Crash recovery finished.");
2031 DBUG_RETURN(0);
2032}
2033
2034/**
2035 return the XID as it appears in the SQL function's arguments.
2036 So this string can be passed to XA START, XA PREPARE etc...
2037
2038 @note
2039 the 'buf' has to have space for at least SQL_XIDSIZE bytes.
2040*/
2041
2042
2043/*
2044 'a'..'z' 'A'..'Z', '0'..'9'
2045 and '-' '_' ' ' symbols don't have to be
2046 converted.
2047*/
2048
2049static const char xid_needs_conv[128]=
2050{
2051 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2052 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2053 0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,
2054 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
2055 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2056 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,
2057 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2058 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1
2059};
2060
2061uint get_sql_xid(XID *xid, char *buf)
2062{
2063 int tot_len= xid->gtrid_length + xid->bqual_length;
2064 int i;
2065 const char *orig_buf= buf;
2066
2067 for (i=0; i<tot_len; i++)
2068 {
2069 uchar c= ((uchar *) xid->data)[i];
2070 if (c >= 128 || xid_needs_conv[c])
2071 break;
2072 }
2073
2074 if (i >= tot_len)
2075 {
2076 /* No need to convert characters to hexadecimals. */
2077 *buf++= '\'';
2078 memcpy(buf, xid->data, xid->gtrid_length);
2079 buf+= xid->gtrid_length;
2080 *buf++= '\'';
2081 if (xid->bqual_length > 0 || xid->formatID != 1)
2082 {
2083 *buf++= ',';
2084 *buf++= '\'';
2085 memcpy(buf, xid->data+xid->gtrid_length, xid->bqual_length);
2086 buf+= xid->bqual_length;
2087 *buf++= '\'';
2088 }
2089 }
2090 else
2091 {
2092 *buf++= 'X';
2093 *buf++= '\'';
2094 for (i= 0; i < xid->gtrid_length; i++)
2095 {
2096 *buf++=_dig_vec_lower[((uchar*) xid->data)[i] >> 4];
2097 *buf++=_dig_vec_lower[((uchar*) xid->data)[i] & 0x0f];
2098 }
2099 *buf++= '\'';
2100 if (xid->bqual_length > 0 || xid->formatID != 1)
2101 {
2102 *buf++= ',';
2103 *buf++= 'X';
2104 *buf++= '\'';
2105 for (; i < tot_len; i++)
2106 {
2107 *buf++=_dig_vec_lower[((uchar*) xid->data)[i] >> 4];
2108 *buf++=_dig_vec_lower[((uchar*) xid->data)[i] & 0x0f];
2109 }
2110 *buf++= '\'';
2111 }
2112 }
2113
2114 if (xid->formatID != 1)
2115 {
2116 *buf++= ',';
2117 buf+= my_longlong10_to_str_8bit(&my_charset_bin, buf,
2118 MY_INT64_NUM_DECIMAL_DIGITS, -10, xid->formatID);
2119 }
2120
2121 return (uint)(buf - orig_buf);
2122}
2123
2124
2125/**
2126 return the list of XID's to a client, the same way SHOW commands do.
2127
2128 @note
2129 I didn't find in XA specs that an RM cannot return the same XID twice,
2130 so mysql_xa_recover does not filter XID's to ensure uniqueness.
2131 It can be easily fixed later, if necessary.
2132*/
2133
2134static my_bool xa_recover_callback(XID_STATE *xs, Protocol *protocol,
2135 char *data, uint data_len, CHARSET_INFO *data_cs)
2136{
2137 if (xs->xa_state == XA_PREPARED)
2138 {
2139 protocol->prepare_for_resend();
2140 protocol->store_longlong((longlong) xs->xid.formatID, FALSE);
2141 protocol->store_longlong((longlong) xs->xid.gtrid_length, FALSE);
2142 protocol->store_longlong((longlong) xs->xid.bqual_length, FALSE);
2143 protocol->store(data, data_len, data_cs);
2144 if (protocol->write())
2145 return TRUE;
2146 }
2147 return FALSE;
2148}
2149
2150
2151static my_bool xa_recover_callback_short(XID_STATE *xs, Protocol *protocol)
2152{
2153 return xa_recover_callback(xs, protocol, xs->xid.data,
2154 xs->xid.gtrid_length + xs->xid.bqual_length, &my_charset_bin);
2155}
2156
2157
2158static my_bool xa_recover_callback_verbose(XID_STATE *xs, Protocol *protocol)
2159{
2160 char buf[SQL_XIDSIZE];
2161 uint len= get_sql_xid(&xs->xid, buf);
2162 return xa_recover_callback(xs, protocol, buf, len,
2163 &my_charset_utf8_general_ci);
2164}
2165
2166
2167bool mysql_xa_recover(THD *thd)
2168{
2169 List<Item> field_list;
2170 Protocol *protocol= thd->protocol;
2171 MEM_ROOT *mem_root= thd->mem_root;
2172 my_hash_walk_action action;
2173 DBUG_ENTER("mysql_xa_recover");
2174
2175 field_list.push_back(new (mem_root)
2176 Item_int(thd, "formatID", 0,
2177 MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2178 field_list.push_back(new (mem_root)
2179 Item_int(thd, "gtrid_length", 0,
2180 MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2181 field_list.push_back(new (mem_root)
2182 Item_int(thd, "bqual_length", 0,
2183 MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2184 {
2185 uint len;
2186 CHARSET_INFO *cs;
2187
2188 if (thd->lex->verbose)
2189 {
2190 len= SQL_XIDSIZE;
2191 cs= &my_charset_utf8_general_ci;
2192 action= (my_hash_walk_action) xa_recover_callback_verbose;
2193 }
2194 else
2195 {
2196 len= XIDDATASIZE;
2197 cs= &my_charset_bin;
2198 action= (my_hash_walk_action) xa_recover_callback_short;
2199 }
2200
2201 field_list.push_back(new (mem_root)
2202 Item_empty_string(thd, "data", len, cs), mem_root);
2203 }
2204
2205 if (protocol->send_result_set_metadata(&field_list,
2206 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2207 DBUG_RETURN(1);
2208
2209 if (xid_cache_iterate(thd, action, protocol))
2210 DBUG_RETURN(1);
2211 my_eof(thd);
2212 DBUG_RETURN(0);
2213}
2214
2215/*
2216 Called by engine to notify TC that a new commit checkpoint has been reached.
2217 See comments on handlerton method commit_checkpoint_request() for details.
2218*/
2219void
2220commit_checkpoint_notify_ha(handlerton *hton, void *cookie)
2221{
2222 tc_log->commit_checkpoint_notify(cookie);
2223}
2224
2225
2226/**
2227 Check if all storage engines used in transaction agree that after
2228 rollback to savepoint it is safe to release MDL locks acquired after
2229 savepoint creation.
2230
2231 @param thd The client thread that executes the transaction.
2232
2233 @return true - It is safe to release MDL locks.
2234 false - If it is not.
2235*/
2236bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2237{
2238 Ha_trx_info *ha_info;
2239 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2240 &thd->transaction.all);
2241
2242 DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2243
2244 /**
2245 Checking whether it is safe to release metadata locks after rollback to
2246 savepoint in all the storage engines that are part of the transaction.
2247 */
2248 for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2249 {
2250 handlerton *ht= ha_info->ht();
2251 DBUG_ASSERT(ht);
2252
2253 if (ht->savepoint_rollback_can_release_mdl == 0 ||
2254 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2255 DBUG_RETURN(false);
2256 }
2257
2258 DBUG_RETURN(true);
2259}
2260
2261int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2262{
2263 int error=0;
2264 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2265 &thd->transaction.all);
2266 Ha_trx_info *ha_info, *ha_info_next;
2267
2268 DBUG_ENTER("ha_rollback_to_savepoint");
2269
2270 trans->no_2pc=0;
2271 /*
2272 rolling back to savepoint in all storage engines that were part of the
2273 transaction when the savepoint was set
2274 */
2275 for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2276 {
2277 int err;
2278 handlerton *ht= ha_info->ht();
2279 DBUG_ASSERT(ht);
2280 DBUG_ASSERT(ht->savepoint_set != 0);
2281 if ((err= ht->savepoint_rollback(ht, thd,
2282 (uchar *)(sv+1)+ht->savepoint_offset)))
2283 { // cannot happen
2284 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2285 error=1;
2286 }
2287 status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2288 trans->no_2pc|= ht->prepare == 0;
2289 }
2290 /*
2291 rolling back the transaction in all storage engines that were not part of
2292 the transaction when the savepoint was set
2293 */
2294 for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2295 ha_info= ha_info_next)
2296 {
2297 int err;
2298 handlerton *ht= ha_info->ht();
2299 if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2300 { // cannot happen
2301 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2302 error=1;
2303 }
2304 status_var_increment(thd->status_var.ha_rollback_count);
2305 ha_info_next= ha_info->next();
2306 ha_info->reset(); /* keep it conveniently zero-filled */
2307 }
2308 trans->ha_list= sv->ha_list;
2309 DBUG_RETURN(error);
2310}
2311
2312/**
2313 @note
2314 according to the sql standard (ISO/IEC 9075-2:2003)
2315 section "4.33.4 SQL-statements and transaction states",
2316 SAVEPOINT is *not* transaction-initiating SQL-statement
2317*/
2318int ha_savepoint(THD *thd, SAVEPOINT *sv)
2319{
2320 int error=0;
2321 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2322 &thd->transaction.all);
2323 Ha_trx_info *ha_info= trans->ha_list;
2324 DBUG_ENTER("ha_savepoint");
2325
2326 for (; ha_info; ha_info= ha_info->next())
2327 {
2328 int err;
2329 handlerton *ht= ha_info->ht();
2330 DBUG_ASSERT(ht);
2331 if (! ht->savepoint_set)
2332 {
2333 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2334 error=1;
2335 break;
2336 }
2337 if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2338 { // cannot happen
2339 my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2340 error=1;
2341 }
2342 status_var_increment(thd->status_var.ha_savepoint_count);
2343 }
2344 /*
2345 Remember the list of registered storage engines. All new
2346 engines are prepended to the beginning of the list.
2347 */
2348 sv->ha_list= trans->ha_list;
2349
2350 DBUG_RETURN(error);
2351}
2352
2353int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2354{
2355 int error=0;
2356 Ha_trx_info *ha_info= sv->ha_list;
2357 DBUG_ENTER("ha_release_savepoint");
2358
2359 for (; ha_info; ha_info= ha_info->next())
2360 {
2361 int err;
2362 handlerton *ht= ha_info->ht();
2363 /* Savepoint life time is enclosed into transaction life time. */
2364 DBUG_ASSERT(ht);
2365 if (!ht->savepoint_release)
2366 continue;
2367 if ((err= ht->savepoint_release(ht, thd,
2368 (uchar *)(sv+1) + ht->savepoint_offset)))
2369 { // cannot happen
2370 my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2371 error=1;
2372 }
2373 }
2374 DBUG_RETURN(error);
2375}
2376
2377
2378static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2379 void *arg)
2380{
2381 handlerton *hton= plugin_hton(plugin);
2382 if (hton->state == SHOW_OPTION_YES &&
2383 hton->start_consistent_snapshot)
2384 {
2385 if (hton->start_consistent_snapshot(hton, thd))
2386 return TRUE;
2387 *((bool *)arg)= false;
2388 }
2389 return FALSE;
2390}
2391
2392int ha_start_consistent_snapshot(THD *thd)
2393{
2394 bool err, warn= true;
2395
2396 /*
2397 Holding the LOCK_commit_ordered mutex ensures that we get the same
2398 snapshot for all engines (including the binary log). This allows us
2399 among other things to do backups with
2400 START TRANSACTION WITH CONSISTENT SNAPSHOT and
2401 have a consistent binlog position.
2402 */
2403 mysql_mutex_lock(&LOCK_commit_ordered);
2404 err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2405 mysql_mutex_unlock(&LOCK_commit_ordered);
2406
2407 if (err)
2408 {
2409 ha_rollback_trans(thd, true);
2410 return 1;
2411 }
2412
2413 /*
2414 Same idea as when one wants to CREATE TABLE in one engine which does not
2415 exist:
2416 */
2417 if (warn)
2418 push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2419 "This MariaDB server does not support any "
2420 "consistent-read capable storage engine");
2421 return 0;
2422}
2423
2424
2425static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2426 void *arg)
2427{
2428 handlerton *hton= plugin_hton(plugin);
2429 if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2430 hton->flush_logs(hton))
2431 return TRUE;
2432 return FALSE;
2433}
2434
2435
2436bool ha_flush_logs(handlerton *db_type)
2437{
2438 if (db_type == NULL)
2439 {
2440 if (plugin_foreach(NULL, flush_handlerton,
2441 MYSQL_STORAGE_ENGINE_PLUGIN, 0))
2442 return TRUE;
2443 }
2444 else
2445 {
2446 if (db_type->state != SHOW_OPTION_YES ||
2447 (db_type->flush_logs && db_type->flush_logs(db_type)))
2448 return TRUE;
2449 }
2450 return FALSE;
2451}
2452
2453
2454/**
2455 @brief make canonical filename
2456
2457 @param[in] file table handler
2458 @param[in] path original path
2459 @param[out] tmp_path buffer for canonized path
2460
2461 @details Lower case db name and table name path parts for
2462 non file based tables when lower_case_table_names
2463 is 2 (store as is, compare in lower case).
2464 Filesystem path prefix (mysql_data_home or tmpdir)
2465 is left intact.
2466
2467 @note tmp_path may be left intact if no conversion was
2468 performed.
2469
2470 @retval canonized path
2471
2472 @todo This may be done more efficiently when table path
2473 gets built. Convert this function to something like
2474 ASSERT_CANONICAL_FILENAME.
2475*/
2476const char *get_canonical_filename(handler *file, const char *path,
2477 char *tmp_path)
2478{
2479 uint i;
2480 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2481 return path;
2482
2483 for (i= 0; i <= mysql_tmpdir_list.max; i++)
2484 {
2485 if (is_prefix(path, mysql_tmpdir_list.list[i]))
2486 return path;
2487 }
2488
2489 /* Ensure that table handler get path in lower case */
2490 if (tmp_path != path)
2491 strmov(tmp_path, path);
2492
2493 /*
2494 we only should turn into lowercase database/table part
2495 so start the process after homedirectory
2496 */
2497 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2498 return tmp_path;
2499}
2500
2501
2502/** delete a table in the engine
2503
2504 @note
2505 ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors.
2506 The .frm file will be deleted only if we return 0.
2507*/
2508int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2509 const LEX_CSTRING *db, const LEX_CSTRING *alias, bool generate_warning)
2510{
2511 handler *file;
2512 char tmp_path[FN_REFLEN];
2513 int error;
2514 TABLE dummy_table;
2515 TABLE_SHARE dummy_share;
2516 DBUG_ENTER("ha_delete_table");
2517
2518 /* table_type is NULL in ALTER TABLE when renaming only .frm files */
2519 if (table_type == NULL || table_type == view_pseudo_hton ||
2520 ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2521 DBUG_RETURN(0);
2522
2523 bzero((char*) &dummy_table, sizeof(dummy_table));
2524 bzero((char*) &dummy_share, sizeof(dummy_share));
2525 dummy_table.s= &dummy_share;
2526
2527 path= get_canonical_filename(file, path, tmp_path);
2528 if (unlikely((error= file->ha_delete_table(path))))
2529 {
2530 /*
2531 it's not an error if the table doesn't exist in the engine.
2532 warn the user, but still report DROP being a success
2533 */
2534 bool intercept= error == ENOENT || error == HA_ERR_NO_SUCH_TABLE;
2535
2536 if (!intercept || generate_warning)
2537 {
2538 /* Fill up strucutures that print_error may need */
2539 dummy_share.path.str= (char*) path;
2540 dummy_share.path.length= strlen(path);
2541 dummy_share.normalized_path= dummy_share.path;
2542 dummy_share.db= *db;
2543 dummy_share.table_name= *alias;
2544 dummy_table.alias.set(alias->str, alias->length, table_alias_charset);
2545 file->change_table_ptr(&dummy_table, &dummy_share);
2546 file->print_error(error, MYF(intercept ? ME_JUST_WARNING : 0));
2547 }
2548 if (intercept)
2549 error= 0;
2550 }
2551 delete file;
2552
2553 DBUG_RETURN(error);
2554}
2555
2556/****************************************************************************
2557** General handler functions
2558****************************************************************************/
2559
2560
2561/**
2562 Clone a handler
2563
2564 @param name name of new table instance
2565 @param mem_root Where 'this->ref' should be allocated. It can't be
2566 in this->table->mem_root as otherwise we will not be
2567 able to reclaim that memory when the clone handler
2568 object is destroyed.
2569*/
2570
2571handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2572{
2573 handler *new_handler= get_new_handler(table->s, mem_root, ht);
2574
2575 if (!new_handler)
2576 return NULL;
2577 if (new_handler->set_ha_share_ref(ha_share))
2578 goto err;
2579
2580 /*
2581 TODO: Implement a more efficient way to have more than one index open for
2582 the same table instance. The ha_open call is not cachable for clone.
2583
2584 This is not critical as the engines already have the table open
2585 and should be able to use the original instance of the table.
2586 */
2587 if (new_handler->ha_open(table, name, table->db_stat,
2588 HA_OPEN_IGNORE_IF_LOCKED, mem_root))
2589 goto err;
2590
2591 return new_handler;
2592
2593err:
2594 delete new_handler;
2595 return NULL;
2596}
2597
2598LEX_CSTRING *handler::engine_name()
2599{
2600 return hton_name(ht);
2601}
2602
2603
2604double handler::keyread_time(uint index, uint ranges, ha_rows rows)
2605{
2606 /*
2607 It is assumed that we will read trough the whole key range and that all
2608 key blocks are half full (normally things are much better). It is also
2609 assumed that each time we read the next key from the index, the handler
2610 performs a random seek, thus the cost is proportional to the number of
2611 blocks read. This model does not take into account clustered indexes -
2612 engines that support that (e.g. InnoDB) may want to overwrite this method.
2613 The model counts in the time to read index entries from cache.
2614 */
2615 size_t len= table->key_info[index].key_length + ref_length;
2616 if (index == table->s->primary_key && table->file->primary_key_is_clustered())
2617 len= table->s->stored_rec_length;
2618 double keys_per_block= (stats.block_size/2.0/len+1);
2619 return (rows + keys_per_block-1)/ keys_per_block +
2620 len*rows/(stats.block_size+1)/TIME_FOR_COMPARE ;
2621}
2622
2623void **handler::ha_data(THD *thd) const
2624{
2625 return thd_ha_data(thd, ht);
2626}
2627
2628THD *handler::ha_thd(void) const
2629{
2630 DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2631 return (table && table->in_use) ? table->in_use : current_thd;
2632}
2633
2634void handler::unbind_psi()
2635{
2636 /*
2637 Notify the instrumentation that this table is not owned
2638 by this thread any more.
2639 */
2640 PSI_CALL_unbind_table(m_psi);
2641}
2642
2643void handler::rebind_psi()
2644{
2645 /*
2646 Notify the instrumentation that this table is now owned
2647 by this thread.
2648 */
2649 m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi);
2650}
2651
2652
2653PSI_table_share *handler::ha_table_share_psi() const
2654{
2655 return table_share->m_psi;
2656}
2657
2658/** @brief
2659 Open database-handler.
2660
2661 IMPLEMENTATION
2662 Try O_RDONLY if cannot open as O_RDWR
2663 Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2664*/
2665int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2666 uint test_if_locked, MEM_ROOT *mem_root,
2667 List<String> *partitions_to_open)
2668{
2669 int error;
2670 DBUG_ENTER("handler::ha_open");
2671 DBUG_PRINT("enter",
2672 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2673 name, ht->db_type, table_arg->db_stat, mode,
2674 test_if_locked));
2675
2676 table= table_arg;
2677 DBUG_ASSERT(table->s == table_share);
2678 DBUG_ASSERT(m_lock_type == F_UNLCK);
2679 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2680 DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2681
2682 set_partitions_to_open(partitions_to_open);
2683
2684 if (unlikely((error=open(name,mode,test_if_locked))))
2685 {
2686 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2687 (table->db_stat & HA_TRY_READ_ONLY))
2688 {
2689 table->db_stat|=HA_READ_ONLY;
2690 error=open(name,O_RDONLY,test_if_locked);
2691 }
2692 }
2693 if (unlikely(error))
2694 {
2695 my_errno= error; /* Safeguard */
2696 DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2697 }
2698 else
2699 {
2700 DBUG_ASSERT(m_psi == NULL);
2701 DBUG_ASSERT(table_share != NULL);
2702 /*
2703 Do not call this for partitions handlers, since it may take too much
2704 resources.
2705 So only use the m_psi on table level, not for individual partitions.
2706 */
2707 if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2708 {
2709 m_psi= PSI_CALL_open_table(ha_table_share_psi(), this);
2710 }
2711
2712 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2713 table->db_stat|=HA_READ_ONLY;
2714 (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2715
2716 /* Allocate ref in thd or on the table's mem_root */
2717 if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root,
2718 ALIGN_SIZE(ref_length)*2)))
2719 {
2720 ha_close();
2721 error=HA_ERR_OUT_OF_MEM;
2722 }
2723 else
2724 dup_ref=ref+ALIGN_SIZE(ref_length);
2725 cached_table_flags= table_flags();
2726 }
2727 reset_statistics();
2728 internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
2729
2730 DBUG_RETURN(error);
2731}
2732
2733int handler::ha_close(void)
2734{
2735 DBUG_ENTER("ha_close");
2736 /*
2737 Increment global statistics for temporary tables.
2738 In_use is 0 for tables that was closed from the table cache.
2739 */
2740 if (table->in_use)
2741 status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
2742 PSI_CALL_close_table(m_psi);
2743 m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2744
2745 /* Detach from ANALYZE tracker */
2746 tracker= NULL;
2747
2748 DBUG_ASSERT(m_lock_type == F_UNLCK);
2749 DBUG_ASSERT(inited == NONE);
2750 DBUG_RETURN(close());
2751}
2752
2753
2754int handler::ha_rnd_next(uchar *buf)
2755{
2756 int result;
2757 DBUG_ENTER("handler::ha_rnd_next");
2758 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2759 m_lock_type != F_UNLCK);
2760 DBUG_ASSERT(inited == RND);
2761
2762 do
2763 {
2764 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2765 { result= rnd_next(buf); })
2766 if (result != HA_ERR_RECORD_DELETED)
2767 break;
2768 status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count);
2769 } while (!table->in_use->check_killed());
2770
2771 if (result == HA_ERR_RECORD_DELETED)
2772 result= HA_ERR_ABORTED_BY_USER;
2773 else
2774 {
2775 if (!result)
2776 {
2777 update_rows_read();
2778 if (table->vfield && buf == table->record[0])
2779 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2780 }
2781 increment_statistics(&SSV::ha_read_rnd_next_count);
2782 }
2783
2784 table->status=result ? STATUS_NOT_FOUND: 0;
2785 DBUG_RETURN(result);
2786}
2787
2788int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2789{
2790 int result;
2791 DBUG_ENTER("handler::ha_rnd_pos");
2792 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2793 m_lock_type != F_UNLCK);
2794 /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2795 /* DBUG_ASSERT(inited == RND); */
2796
2797 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2798 { result= rnd_pos(buf, pos); })
2799 increment_statistics(&SSV::ha_read_rnd_count);
2800 if (result == HA_ERR_RECORD_DELETED)
2801 result= HA_ERR_KEY_NOT_FOUND;
2802 else if (!result)
2803 {
2804 update_rows_read();
2805 if (table->vfield && buf == table->record[0])
2806 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2807 }
2808 table->status=result ? STATUS_NOT_FOUND: 0;
2809 DBUG_RETURN(result);
2810}
2811
2812int handler::ha_index_read_map(uchar *buf, const uchar *key,
2813 key_part_map keypart_map,
2814 enum ha_rkey_function find_flag)
2815{
2816 int result;
2817 DBUG_ENTER("handler::ha_index_read_map");
2818 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2819 m_lock_type != F_UNLCK);
2820 DBUG_ASSERT(inited==INDEX);
2821
2822 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2823 { result= index_read_map(buf, key, keypart_map, find_flag); })
2824 increment_statistics(&SSV::ha_read_key_count);
2825 if (!result)
2826 {
2827 update_index_statistics();
2828 if (table->vfield && buf == table->record[0])
2829 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2830 }
2831 table->status=result ? STATUS_NOT_FOUND: 0;
2832 DBUG_RETURN(result);
2833}
2834
2835/*
2836 @note: Other index lookup/navigation functions require prior
2837 handler->index_init() call. This function is different, it requires
2838 that the scan is not initialized, and accepts "uint index" as an argument.
2839*/
2840
2841int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
2842 key_part_map keypart_map,
2843 enum ha_rkey_function find_flag)
2844{
2845 int result;
2846 DBUG_ASSERT(inited==NONE);
2847 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2848 m_lock_type != F_UNLCK);
2849 DBUG_ASSERT(end_range == NULL);
2850 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, index, 0,
2851 { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
2852 increment_statistics(&SSV::ha_read_key_count);
2853 if (!result)
2854 {
2855 update_rows_read();
2856 index_rows_read[index]++;
2857 if (table->vfield && buf == table->record[0])
2858 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2859 }
2860 table->status=result ? STATUS_NOT_FOUND: 0;
2861 return result;
2862}
2863
2864int handler::ha_index_next(uchar * buf)
2865{
2866 int result;
2867 DBUG_ENTER("handler::ha_index_next");
2868 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2869 m_lock_type != F_UNLCK);
2870 DBUG_ASSERT(inited==INDEX);
2871
2872 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2873 { result= index_next(buf); })
2874 increment_statistics(&SSV::ha_read_next_count);
2875 if (!result)
2876 {
2877 update_index_statistics();
2878 if (table->vfield && buf == table->record[0])
2879 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2880 }
2881 table->status=result ? STATUS_NOT_FOUND: 0;
2882 DBUG_RETURN(result);
2883}
2884
2885int handler::ha_index_prev(uchar * buf)
2886{
2887 int result;
2888 DBUG_ENTER("handler::ha_index_prev");
2889 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2890 m_lock_type != F_UNLCK);
2891 DBUG_ASSERT(inited==INDEX);
2892
2893 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2894 { result= index_prev(buf); })
2895 increment_statistics(&SSV::ha_read_prev_count);
2896 if (!result)
2897 {
2898 update_index_statistics();
2899 if (table->vfield && buf == table->record[0])
2900 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2901 }
2902 table->status=result ? STATUS_NOT_FOUND: 0;
2903 DBUG_RETURN(result);
2904}
2905
2906int handler::ha_index_first(uchar * buf)
2907{
2908 int result;
2909 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2910 m_lock_type != F_UNLCK);
2911 DBUG_ASSERT(inited==INDEX);
2912
2913 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2914 { result= index_first(buf); })
2915 increment_statistics(&SSV::ha_read_first_count);
2916 if (!result)
2917 {
2918 update_index_statistics();
2919 if (table->vfield && buf == table->record[0])
2920 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2921 }
2922 table->status=result ? STATUS_NOT_FOUND: 0;
2923 return result;
2924}
2925
2926int handler::ha_index_last(uchar * buf)
2927{
2928 int result;
2929 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2930 m_lock_type != F_UNLCK);
2931 DBUG_ASSERT(inited==INDEX);
2932
2933 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2934 { result= index_last(buf); })
2935 increment_statistics(&SSV::ha_read_last_count);
2936 if (!result)
2937 {
2938 update_index_statistics();
2939 if (table->vfield && buf == table->record[0])
2940 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2941 }
2942 table->status=result ? STATUS_NOT_FOUND: 0;
2943 return result;
2944}
2945
2946int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
2947{
2948 int result;
2949 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2950 m_lock_type != F_UNLCK);
2951 DBUG_ASSERT(inited==INDEX);
2952
2953 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2954 { result= index_next_same(buf, key, keylen); })
2955 increment_statistics(&SSV::ha_read_next_count);
2956 if (!result)
2957 {
2958 update_index_statistics();
2959 if (table->vfield && buf == table->record[0])
2960 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2961 }
2962 table->status=result ? STATUS_NOT_FOUND: 0;
2963 return result;
2964}
2965
2966
2967bool handler::ha_was_semi_consistent_read()
2968{
2969 bool result= was_semi_consistent_read();
2970 if (result)
2971 increment_statistics(&SSV::ha_read_retry_count);
2972 return result;
2973}
2974
2975/* Initialize handler for random reading, with error handling */
2976
2977int handler::ha_rnd_init_with_error(bool scan)
2978{
2979 int error;
2980 if (likely(!(error= ha_rnd_init(scan))))
2981 return 0;
2982 table->file->print_error(error, MYF(0));
2983 return error;
2984}
2985
2986
2987/**
2988 Read first row (only) from a table. Used for reading tables with
2989 only one row, either based on table statistics or if table is a SEQUENCE.
2990
2991 This is never called for normal InnoDB tables, as these table types
2992 does not have HA_STATS_RECORDS_IS_EXACT set.
2993*/
2994int handler::read_first_row(uchar * buf, uint primary_key)
2995{
2996 int error;
2997 DBUG_ENTER("handler::read_first_row");
2998
2999 /*
3000 If there is very few deleted rows in the table, find the first row by
3001 scanning the table.
3002 TODO remove the test for HA_READ_ORDER
3003 */
3004 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3005 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3006 {
3007 if (likely(!(error= ha_rnd_init(1))))
3008 {
3009 error= ha_rnd_next(buf);
3010 const int end_error= ha_rnd_end();
3011 if (likely(!error))
3012 error= end_error;
3013 }
3014 }
3015 else
3016 {
3017 /* Find the first row through the primary key */
3018 if (likely(!(error= ha_index_init(primary_key, 0))))
3019 {
3020 error= ha_index_first(buf);
3021 const int end_error= ha_index_end();
3022 if (likely(!error))
3023 error= end_error;
3024 }
3025 }
3026 DBUG_RETURN(error);
3027}
3028
3029/**
3030 Generate the next auto-increment number based on increment and offset.
3031 computes the lowest number
3032 - strictly greater than "nr"
3033 - of the form: auto_increment_offset + N * auto_increment_increment
3034 If overflow happened then return MAX_ULONGLONG value as an
3035 indication of overflow.
3036 In most cases increment= offset= 1, in which case we get:
3037 @verbatim 1,2,3,4,5,... @endverbatim
3038 If increment=10 and offset=5 and previous number is 1, we get:
3039 @verbatim 1,5,15,25,35,... @endverbatim
3040*/
3041inline ulonglong
3042compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3043{
3044 const ulonglong save_nr= nr;
3045
3046 if (variables->auto_increment_increment == 1)
3047 nr= nr + 1; // optimization of the formula below
3048 else
3049 {
3050 nr= (((nr+ variables->auto_increment_increment -
3051 variables->auto_increment_offset)) /
3052 (ulonglong) variables->auto_increment_increment);
3053 nr= (nr* (ulonglong) variables->auto_increment_increment +
3054 variables->auto_increment_offset);
3055 }
3056
3057 if (unlikely(nr <= save_nr))
3058 return ULONGLONG_MAX;
3059
3060 return nr;
3061}
3062
3063
3064void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3065{
3066 /*
3067 If we have set THD::next_insert_id previously and plan to insert an
3068 explicitly-specified value larger than this, we need to increase
3069 THD::next_insert_id to be greater than the explicit value.
3070 */
3071 if ((next_insert_id > 0) && (nr >= next_insert_id))
3072 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3073}
3074
3075
3076/**
3077 Update the auto_increment field if necessary.
3078
3079 Updates columns with type NEXT_NUMBER if:
3080
3081 - If column value is set to NULL (in which case
3082 auto_increment_field_not_null is 0)
3083 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3084 set. In the future we will only set NEXT_NUMBER fields if one sets them
3085 to NULL (or they are not included in the insert list).
3086
3087 In those cases, we check if the currently reserved interval still has
3088 values we have not used. If yes, we pick the smallest one and use it.
3089 Otherwise:
3090
3091 - If a list of intervals has been provided to the statement via SET
3092 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3093 first unused interval from this list, consider it as reserved.
3094
3095 - Otherwise we set the column for the first row to the value
3096 next_insert_id(get_auto_increment(column))) which is usually
3097 max-used-column-value+1.
3098 We call get_auto_increment() for the first row in a multi-row
3099 statement. get_auto_increment() will tell us the interval of values it
3100 reserved for us.
3101
3102 - In both cases, for the following rows we use those reserved values without
3103 calling the handler again (we just progress in the interval, computing
3104 each new value from the previous one). Until we have exhausted them, then
3105 we either take the next provided interval or call get_auto_increment()
3106 again to reserve a new interval.
3107
3108 - In both cases, the reserved intervals are remembered in
3109 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3110 binlogging; the last reserved interval is remembered in
3111 auto_inc_interval_for_cur_row. The number of reserved intervals is
3112 remembered in auto_inc_intervals_count. It differs from the number of
3113 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3114 latter list is cumulative over all statements forming one binlog event
3115 (when stored functions and triggers are used), and collapses two
3116 contiguous intervals in one (see its append() method).
3117
3118 The idea is that generated auto_increment values are predictable and
3119 independent of the column values in the table. This is needed to be
3120 able to replicate into a table that already has rows with a higher
3121 auto-increment value than the one that is inserted.
3122
3123 After we have already generated an auto-increment number and the user
3124 inserts a column with a higher value than the last used one, we will
3125 start counting from the inserted value.
3126
3127 This function's "outputs" are: the table's auto_increment field is filled
3128 with a value, thd->next_insert_id is filled with the value to use for the
3129 next row, if a value was autogenerated for the current row it is stored in
3130 thd->insert_id_for_cur_row, if get_auto_increment() was called
3131 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3132 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3133 this list.
3134
3135 @todo
3136 Replace all references to "next number" or NEXT_NUMBER to
3137 "auto_increment", everywhere (see below: there is
3138 table->auto_increment_field_not_null, and there also exists
3139 table->next_number_field, it's not consistent).
3140
3141 @retval
3142 0 ok
3143 @retval
3144 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3145 returned ~(ulonglong) 0
3146 @retval
3147 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3148 failure.
3149*/
3150
3151#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3152#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3153#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3154
3155int handler::update_auto_increment()
3156{
3157 ulonglong nr, nb_reserved_values;
3158 bool append= FALSE;
3159 THD *thd= table->in_use;
3160 struct system_variables *variables= &thd->variables;
3161 int result=0, tmp;
3162 enum enum_check_fields save_count_cuted_fields;
3163 DBUG_ENTER("handler::update_auto_increment");
3164
3165 /*
3166 next_insert_id is a "cursor" into the reserved interval, it may go greater
3167 than the interval, but not smaller.
3168 */
3169 DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3170
3171 if ((nr= table->next_number_field->val_int()) != 0 ||
3172 (table->auto_increment_field_not_null &&
3173 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3174 {
3175 /*
3176 Update next_insert_id if we had already generated a value in this
3177 statement (case of INSERT VALUES(null),(3763),(null):
3178 the last NULL needs to insert 3764, not the value of the first NULL plus
3179 1).
3180 Ignore negative values.
3181 */
3182 if ((longlong) nr > 0 || (table->next_number_field->flags & UNSIGNED_FLAG))
3183 adjust_next_insert_id_after_explicit_value(nr);
3184 insert_id_for_cur_row= 0; // didn't generate anything
3185 DBUG_RETURN(0);
3186 }
3187
3188 // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT
3189 if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3190 {
3191 if (table->versioned())
3192 {
3193 Field *end= table->vers_end_field();
3194 DBUG_ASSERT(end);
3195 bitmap_set_bit(table->read_set, end->field_index);
3196 if (!end->is_max())
3197 {
3198 if (!table->next_number_field->real_maybe_null())
3199 DBUG_RETURN(HA_ERR_UNSUPPORTED);
3200 table->next_number_field->set_null();
3201 DBUG_RETURN(0);
3202 }
3203 }
3204 table->next_number_field->set_notnull();
3205 }
3206
3207 if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3208 {
3209 /* next_insert_id is beyond what is reserved, so we reserve more. */
3210 const Discrete_interval *forced=
3211 thd->auto_inc_intervals_forced.get_next();
3212 if (forced != NULL)
3213 {
3214 nr= forced->minimum();
3215 nb_reserved_values= forced->values();
3216 }
3217 else
3218 {
3219 /*
3220 handler::estimation_rows_to_insert was set by
3221 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3222 */
3223 ulonglong nb_desired_values;
3224 /*
3225 If an estimation was given to the engine:
3226 - use it.
3227 - if we already reserved numbers, it means the estimation was
3228 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3229 time, twice that the 3rd time etc.
3230 If no estimation was given, use those increasing defaults from the
3231 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3232 Don't go beyond a max to not reserve "way too much" (because
3233 reservation means potentially losing unused values).
3234 Note that in prelocked mode no estimation is given.
3235 */
3236
3237 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3238 nb_desired_values= estimation_rows_to_insert;
3239 else if ((auto_inc_intervals_count == 0) &&
3240 (thd->lex->many_values.elements > 0))
3241 {
3242 /*
3243 For multi-row inserts, if the bulk inserts cannot be started, the
3244 handler::estimation_rows_to_insert will not be set. But we still
3245 want to reserve the autoinc values.
3246 */
3247 nb_desired_values= thd->lex->many_values.elements;
3248 }
3249 else /* go with the increasing defaults */
3250 {
3251 /* avoid overflow in formula, with this if() */
3252 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3253 {
3254 nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3255 (1 << auto_inc_intervals_count);
3256 set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3257 }
3258 else
3259 nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3260 }
3261 get_auto_increment(variables->auto_increment_offset,
3262 variables->auto_increment_increment,
3263 nb_desired_values, &nr,
3264 &nb_reserved_values);
3265 if (nr == ULONGLONG_MAX)
3266 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3267
3268 /*
3269 That rounding below should not be needed when all engines actually
3270 respect offset and increment in get_auto_increment(). But they don't
3271 so we still do it. Wonder if for the not-first-in-index we should do
3272 it. Hope that this rounding didn't push us out of the interval; even
3273 if it did we cannot do anything about it (calling the engine again
3274 will not help as we inserted no row).
3275 */
3276 nr= compute_next_insert_id(nr-1, variables);
3277 }
3278
3279 if (table->s->next_number_keypart == 0)
3280 {
3281 /* We must defer the appending until "nr" has been possibly truncated */
3282 append= TRUE;
3283 }
3284 else
3285 {
3286 /*
3287 For such auto_increment there is no notion of interval, just a
3288 singleton. The interval is not even stored in
3289 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3290 for next row.
3291 */
3292 DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3293 }
3294 }
3295
3296 if (unlikely(nr == ULONGLONG_MAX))
3297 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3298
3299 DBUG_ASSERT(nr != 0);
3300 DBUG_PRINT("info",("auto_increment: %llu nb_reserved_values: %llu",
3301 nr, append ? nb_reserved_values : 0));
3302
3303 /* Store field without warning (Warning will be printed by insert) */
3304 save_count_cuted_fields= thd->count_cuted_fields;
3305 thd->count_cuted_fields= CHECK_FIELD_IGNORE;
3306 tmp= table->next_number_field->store((longlong)nr, TRUE);
3307 thd->count_cuted_fields= save_count_cuted_fields;
3308
3309 if (unlikely(tmp)) // Out of range value in store
3310 {
3311 /*
3312 It's better to return an error here than getting a confusing
3313 'duplicate key error' later.
3314 */
3315 result= HA_ERR_AUTOINC_ERANGE;
3316 }
3317 if (append)
3318 {
3319 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3320 variables->auto_increment_increment);
3321 auto_inc_intervals_count++;
3322 /* Row-based replication does not need to store intervals in binlog */
3323 if (((WSREP(thd) && wsrep_emulate_bin_log ) || mysql_bin_log.is_open())
3324 && !thd->is_current_stmt_binlog_format_row())
3325 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3326 append(auto_inc_interval_for_cur_row.minimum(),
3327 auto_inc_interval_for_cur_row.values(),
3328 variables->auto_increment_increment);
3329 }
3330
3331 /*
3332 Record this autogenerated value. If the caller then
3333 succeeds to insert this value, it will call
3334 record_first_successful_insert_id_in_cur_stmt()
3335 which will set first_successful_insert_id_in_cur_stmt if it's not
3336 already set.
3337 */
3338 insert_id_for_cur_row= nr;
3339
3340 if (result) // overflow
3341 DBUG_RETURN(result);
3342
3343 /*
3344 Set next insert id to point to next auto-increment value to be able to
3345 handle multi-row statements.
3346 */
3347 set_next_insert_id(compute_next_insert_id(nr, variables));
3348
3349 DBUG_RETURN(0);
3350}
3351
3352
3353/** @brief
3354 MySQL signal that it changed the column bitmap
3355
3356 USAGE
3357 This is for handlers that needs to setup their own column bitmaps.
3358 Normally the handler should set up their own column bitmaps in
3359 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3360 this.
3361
3362 The handler is allowd to do changes to the bitmap after a index_init or
3363 rnd_init() call is made as after this, MySQL will not use the bitmap
3364 for any program logic checking.
3365*/
3366void handler::column_bitmaps_signal()
3367{
3368 DBUG_ENTER("column_bitmaps_signal");
3369 if (table)
3370 DBUG_PRINT("info", ("read_set: %p write_set: %p",
3371 table->read_set, table->write_set));
3372 DBUG_VOID_RETURN;
3373}
3374
3375
3376/** @brief
3377 Reserves an interval of auto_increment values from the handler.
3378
3379 SYNOPSIS
3380 get_auto_increment()
3381 offset
3382 increment
3383 nb_desired_values how many values we want
3384 first_value (OUT) the first value reserved by the handler
3385 nb_reserved_values (OUT) how many values the handler reserved
3386
3387 offset and increment means that we want values to be of the form
3388 offset + N * increment, where N>=0 is integer.
3389 If the function sets *first_value to ~(ulonglong)0 it means an error.
3390 If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3391 reserved to "positive infinite".
3392*/
3393void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3394 ulonglong nb_desired_values,
3395 ulonglong *first_value,
3396 ulonglong *nb_reserved_values)
3397{
3398 ulonglong nr;
3399 int error;
3400 MY_BITMAP *old_read_set;
3401
3402 old_read_set= table->prepare_for_keyread(table->s->next_number_index);
3403
3404 if (ha_index_init(table->s->next_number_index, 1))
3405 {
3406 /* This should never happen, assert in debug, and fail in release build */
3407 DBUG_ASSERT(0);
3408 (void) extra(HA_EXTRA_NO_KEYREAD);
3409 *first_value= ULONGLONG_MAX;
3410 return;
3411 }
3412
3413 if (table->s->next_number_keypart == 0)
3414 { // Autoincrement at key-start
3415 error= ha_index_last(table->record[1]);
3416 /*
3417 MySQL implicitely assumes such method does locking (as MySQL decides to
3418 use nr+increment without checking again with the handler, in
3419 handler::update_auto_increment()), so reserves to infinite.
3420 */
3421 *nb_reserved_values= ULONGLONG_MAX;
3422 }
3423 else
3424 {
3425 uchar key[MAX_KEY_LENGTH];
3426 key_copy(key, table->record[0],
3427 table->key_info + table->s->next_number_index,
3428 table->s->next_number_key_offset);
3429 error= ha_index_read_map(table->record[1], key,
3430 make_prev_keypart_map(table->s->
3431 next_number_keypart),
3432 HA_READ_PREFIX_LAST);
3433 /*
3434 MySQL needs to call us for next row: assume we are inserting ("a",null)
3435 here, we return 3, and next this statement will want to insert
3436 ("b",null): there is no reason why ("b",3+1) would be the good row to
3437 insert: maybe it already exists, maybe 3+1 is too large...
3438 */
3439 *nb_reserved_values= 1;
3440 }
3441
3442 if (unlikely(error))
3443 {
3444 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3445 /* No entry found, that's fine */;
3446 else
3447 print_error(error, MYF(0));
3448 nr= 1;
3449 }
3450 else
3451 nr= ((ulonglong) table->next_number_field->
3452 val_int_offset(table->s->rec_buff_length)+1);
3453 ha_index_end();
3454 table->restore_column_maps_after_keyread(old_read_set);
3455 *first_value= nr;
3456 return;
3457}
3458
3459
3460void handler::ha_release_auto_increment()
3461{
3462 DBUG_ENTER("ha_release_auto_increment");
3463 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3464 m_lock_type != F_UNLCK ||
3465 (!next_insert_id && !insert_id_for_cur_row));
3466 release_auto_increment();
3467 insert_id_for_cur_row= 0;
3468 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3469 auto_inc_intervals_count= 0;
3470 if (next_insert_id > 0)
3471 {
3472 next_insert_id= 0;
3473 /*
3474 this statement used forced auto_increment values if there were some,
3475 wipe them away for other statements.
3476 */
3477 table->in_use->auto_inc_intervals_forced.empty();
3478 }
3479 DBUG_VOID_RETURN;
3480}
3481
3482
3483/**
3484 Construct and emit duplicate key error message using information
3485 from table's record buffer.
3486
3487 @param table TABLE object which record buffer should be used as
3488 source for column values.
3489 @param key Key description.
3490 @param msg Error message template to which key value should be
3491 added.
3492 @param errflag Flags for my_error() call.
3493
3494 @notes
3495 The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly
3496 with old code, the error number is ER_DUP_ENTRY
3497*/
3498
3499void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3500{
3501 /* Write the duplicated key in the error message */
3502 char key_buff[MAX_KEY_LENGTH];
3503 String str(key_buff,sizeof(key_buff),system_charset_info);
3504
3505 if (key == NULL)
3506 {
3507 /*
3508 Key is unknown. Should only happen if storage engine reports wrong
3509 duplicate key number.
3510 */
3511 my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "*UNKNOWN*");
3512 }
3513 else
3514 {
3515 /* Table is opened and defined at this point */
3516 key_unpack(&str,table, key);
3517 uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3518 if (str.length() >= max_length)
3519 {
3520 str.length(max_length-4);
3521 str.append(STRING_WITH_LEN("..."));
3522 }
3523 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
3524 key->name.str);
3525 }
3526}
3527
3528/**
3529 Construct and emit duplicate key error message using information
3530 from table's record buffer.
3531
3532 @sa print_keydup_error(table, key, msg, errflag).
3533*/
3534
3535void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3536{
3537 print_keydup_error(table, key,
3538 ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME),
3539 errflag);
3540}
3541
3542
3543/**
3544 Print error that we got from handler function.
3545
3546 @note
3547 In case of delete table it's only safe to use the following parts of
3548 the 'table' structure:
3549 - table->s->path
3550 - table->alias
3551*/
3552
3553#define SET_FATAL_ERROR fatal_error=1
3554
3555void handler::print_error(int error, myf errflag)
3556{
3557 bool fatal_error= 0;
3558 DBUG_ENTER("handler::print_error");
3559 DBUG_PRINT("enter",("error: %d",error));
3560
3561 if (ha_thd()->transaction_rollback_request)
3562 {
3563 /* Ensure this becomes a true error */
3564 errflag&= ~(ME_JUST_WARNING | ME_JUST_INFO);
3565 }
3566
3567 int textno= -1; // impossible value
3568 switch (error) {
3569 case EACCES:
3570 textno=ER_OPEN_AS_READONLY;
3571 break;
3572 case EAGAIN:
3573 textno=ER_FILE_USED;
3574 break;
3575 case ENOENT:
3576 case ENOTDIR:
3577 case ELOOP:
3578 textno=ER_FILE_NOT_FOUND;
3579 break;
3580 case ENOSPC:
3581 case HA_ERR_DISK_FULL:
3582 textno= ER_DISK_FULL;
3583 SET_FATAL_ERROR; // Ensure error is logged
3584 break;
3585 case HA_ERR_KEY_NOT_FOUND:
3586 case HA_ERR_NO_ACTIVE_RECORD:
3587 case HA_ERR_RECORD_DELETED:
3588 case HA_ERR_END_OF_FILE:
3589 /*
3590 This errors is not not normally fatal (for example for reads). However
3591 if you get it during an update or delete, then its fatal.
3592 As the user is calling print_error() (which is not done on read), we
3593 assume something when wrong with the update or delete.
3594 */
3595 SET_FATAL_ERROR;
3596 textno=ER_KEY_NOT_FOUND;
3597 break;
3598 case HA_ERR_ABORTED_BY_USER:
3599 {
3600 DBUG_ASSERT(table->in_use->killed);
3601 table->in_use->send_kill_message();
3602 DBUG_VOID_RETURN;
3603 }
3604 case HA_ERR_WRONG_MRG_TABLE_DEF:
3605 textno=ER_WRONG_MRG_TABLE;
3606 break;
3607 case HA_ERR_FOUND_DUPP_KEY:
3608 {
3609 if (table)
3610 {
3611 uint key_nr=get_dup_key(error);
3612 if ((int) key_nr >= 0 && key_nr < table->s->keys)
3613 {
3614 print_keydup_error(table, &table->key_info[key_nr], errflag);
3615 DBUG_VOID_RETURN;
3616 }
3617 }
3618 textno=ER_DUP_KEY;
3619 break;
3620 }
3621 case HA_ERR_FOREIGN_DUPLICATE_KEY:
3622 {
3623 char rec_buf[MAX_KEY_LENGTH];
3624 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3625 /* Table is opened and defined at this point */
3626
3627 /*
3628 Just print the subset of fields that are part of the first index,
3629 printing the whole row from there is not easy.
3630 */
3631 key_unpack(&rec, table, &table->key_info[0]);
3632
3633 char child_table_name[NAME_LEN + 1];
3634 char child_key_name[NAME_LEN + 1];
3635 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
3636 child_key_name, sizeof(child_key_name)))
3637 {
3638 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
3639 table_share->table_name.str, rec.c_ptr_safe(),
3640 child_table_name, child_key_name);
3641 }
3642 else
3643 {
3644 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
3645 table_share->table_name.str, rec.c_ptr_safe());
3646 }
3647 DBUG_VOID_RETURN;
3648 }
3649 case HA_ERR_NULL_IN_SPATIAL:
3650 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
3651 DBUG_VOID_RETURN;
3652 case HA_ERR_FOUND_DUPP_UNIQUE:
3653 textno=ER_DUP_UNIQUE;
3654 break;
3655 case HA_ERR_RECORD_CHANGED:
3656 /*
3657 This is not fatal error when using HANDLER interface
3658 SET_FATAL_ERROR;
3659 */
3660 textno=ER_CHECKREAD;
3661 break;
3662 case HA_ERR_CRASHED:
3663 SET_FATAL_ERROR;
3664 textno=ER_NOT_KEYFILE;
3665 break;
3666 case HA_ERR_WRONG_IN_RECORD:
3667 SET_FATAL_ERROR;
3668 textno= ER_CRASHED_ON_USAGE;
3669 break;
3670 case HA_ERR_CRASHED_ON_USAGE:
3671 SET_FATAL_ERROR;
3672 textno=ER_CRASHED_ON_USAGE;
3673 break;
3674 case HA_ERR_NOT_A_TABLE:
3675 textno= error;
3676 break;
3677 case HA_ERR_CRASHED_ON_REPAIR:
3678 SET_FATAL_ERROR;
3679 textno=ER_CRASHED_ON_REPAIR;
3680 break;
3681 case HA_ERR_OUT_OF_MEM:
3682 textno=ER_OUT_OF_RESOURCES;
3683 break;
3684 case HA_ERR_WRONG_COMMAND:
3685 my_error(ER_ILLEGAL_HA, MYF(0), table_type(), table_share->db.str,
3686 table_share->table_name.str);
3687 DBUG_VOID_RETURN;
3688 break;
3689 case HA_ERR_OLD_FILE:
3690 textno=ER_OLD_KEYFILE;
3691 break;
3692 case HA_ERR_UNSUPPORTED:
3693 textno=ER_UNSUPPORTED_EXTENSION;
3694 break;
3695 case HA_ERR_RECORD_FILE_FULL:
3696 {
3697 textno=ER_RECORD_FILE_FULL;
3698 /* Write the error message to error log */
3699 errflag|= ME_NOREFRESH;
3700 break;
3701 }
3702 case HA_ERR_INDEX_FILE_FULL:
3703 {
3704 textno=ER_INDEX_FILE_FULL;
3705 /* Write the error message to error log */
3706 errflag|= ME_NOREFRESH;
3707 break;
3708 }
3709 case HA_ERR_LOCK_WAIT_TIMEOUT:
3710 textno=ER_LOCK_WAIT_TIMEOUT;
3711 break;
3712 case HA_ERR_LOCK_TABLE_FULL:
3713 textno=ER_LOCK_TABLE_FULL;
3714 break;
3715 case HA_ERR_LOCK_DEADLOCK:
3716 {
3717 String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), system_charset_info);
3718
3719 get_error_message(error, &str);
3720 full_err_msg.append(str);
3721 my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe());
3722 DBUG_VOID_RETURN;
3723 }
3724 case HA_ERR_READ_ONLY_TRANSACTION:
3725 textno=ER_READ_ONLY_TRANSACTION;
3726 break;
3727 case HA_ERR_CANNOT_ADD_FOREIGN:
3728 textno=ER_CANNOT_ADD_FOREIGN;
3729 break;
3730 case HA_ERR_ROW_IS_REFERENCED:
3731 {
3732 String str;
3733 get_error_message(error, &str);
3734 my_printf_error(ER_ROW_IS_REFERENCED_2,
3735 ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED),
3736 errflag, str.c_ptr_safe());
3737 DBUG_VOID_RETURN;
3738 }
3739 case HA_ERR_NO_REFERENCED_ROW:
3740 {
3741 String str;
3742 get_error_message(error, &str);
3743 my_printf_error(ER_NO_REFERENCED_ROW_2,
3744 ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW),
3745 errflag, str.c_ptr_safe());
3746 DBUG_VOID_RETURN;
3747 }
3748 case HA_ERR_TABLE_DEF_CHANGED:
3749 textno=ER_TABLE_DEF_CHANGED;
3750 break;
3751 case HA_ERR_NO_SUCH_TABLE:
3752 my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str,
3753 table_share->table_name.str);
3754 DBUG_VOID_RETURN;
3755 case HA_ERR_RBR_LOGGING_FAILED:
3756 textno= ER_BINLOG_ROW_LOGGING_FAILED;
3757 break;
3758 case HA_ERR_DROP_INDEX_FK:
3759 {
3760 const char *ptr= "???";
3761 uint key_nr= get_dup_key(error);
3762 if ((int) key_nr >= 0)
3763 ptr= table->key_info[key_nr].name.str;
3764 my_error(ER_DROP_INDEX_FK, errflag, ptr);
3765 DBUG_VOID_RETURN;
3766 }
3767 case HA_ERR_TABLE_NEEDS_UPGRADE:
3768 textno= ER_TABLE_NEEDS_UPGRADE;
3769 my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
3770 "TABLE", table_share->table_name.str);
3771 DBUG_VOID_RETURN;
3772 case HA_ERR_NO_PARTITION_FOUND:
3773 textno=ER_WRONG_PARTITION_NAME;
3774 break;
3775 case HA_ERR_TABLE_READONLY:
3776 textno= ER_OPEN_AS_READONLY;
3777 break;
3778 case HA_ERR_AUTOINC_READ_FAILED:
3779 textno= ER_AUTOINC_READ_FAILED;
3780 break;
3781 case HA_ERR_AUTOINC_ERANGE:
3782 textno= error;
3783 my_error(textno, errflag, table->next_number_field->field_name.str,
3784 table->in_use->get_stmt_da()->current_row_for_warning());
3785 DBUG_VOID_RETURN;
3786 break;
3787 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
3788 textno= ER_TOO_MANY_CONCURRENT_TRXS;
3789 break;
3790 case HA_ERR_INDEX_COL_TOO_LONG:
3791 textno= ER_INDEX_COLUMN_TOO_LONG;
3792 break;
3793 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
3794 textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
3795 break;
3796 case HA_ERR_INDEX_CORRUPT:
3797 textno= ER_INDEX_CORRUPT;
3798 break;
3799 case HA_ERR_UNDO_REC_TOO_BIG:
3800 textno= ER_UNDO_RECORD_TOO_BIG;
3801 break;
3802 case HA_ERR_TABLE_IN_FK_CHECK:
3803 textno= ER_TABLE_IN_FK_CHECK;
3804 break;
3805 default:
3806 {
3807 /* The error was "unknown" to this function.
3808 Ask handler if it has got a message for this error */
3809 bool temporary= FALSE;
3810 String str;
3811 temporary= get_error_message(error, &str);
3812 if (!str.is_empty())
3813 {
3814 const char* engine= table_type();
3815 if (temporary)
3816 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
3817 engine);
3818 else
3819 {
3820 SET_FATAL_ERROR;
3821 my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
3822 }
3823 }
3824 else
3825 my_error(ER_GET_ERRNO, errflag, error, table_type());
3826 DBUG_VOID_RETURN;
3827 }
3828 }
3829 DBUG_ASSERT(textno > 0);
3830 if (unlikely(fatal_error))
3831 {
3832 /* Ensure this becomes a true error */
3833 errflag&= ~(ME_JUST_WARNING | ME_JUST_INFO);
3834 if ((debug_assert_if_crashed_table ||
3835 global_system_variables.log_warnings > 1))
3836 {
3837 /*
3838 Log error to log before we crash or if extended warnings are requested
3839 */
3840 errflag|= ME_NOREFRESH;
3841 }
3842 }
3843
3844 /* if we got an OS error from a file-based engine, specify a path of error */
3845 if (error < HA_ERR_FIRST && bas_ext()[0])
3846 {
3847 char buff[FN_REFLEN];
3848 strxnmov(buff, sizeof(buff),
3849 table_share->normalized_path.str, bas_ext()[0], NULL);
3850 my_error(textno, errflag, buff, error);
3851 }
3852 else
3853 my_error(textno, errflag, table_share->table_name.str, error);
3854 DBUG_VOID_RETURN;
3855}
3856
3857
3858/**
3859 Return an error message specific to this handler.
3860
3861 @param error error code previously returned by handler
3862 @param buf pointer to String where to add error message
3863
3864 @return
3865 Returns true if this is a temporary error
3866*/
3867bool handler::get_error_message(int error, String* buf)
3868{
3869 DBUG_EXECUTE_IF("external_lock_failure",
3870 buf->set_ascii(STRING_WITH_LEN("KABOOM!")););
3871 return FALSE;
3872}
3873
3874/**
3875 Check for incompatible collation changes.
3876
3877 @retval
3878 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
3879 @retval
3880 0 No upgrade required.
3881*/
3882
3883int handler::check_collation_compatibility()
3884{
3885 ulong mysql_version= table->s->mysql_version;
3886
3887 if (mysql_version < 50124)
3888 {
3889 KEY *key= table->key_info;
3890 KEY *key_end= key + table->s->keys;
3891 for (; key < key_end; key++)
3892 {
3893 KEY_PART_INFO *key_part= key->key_part;
3894 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
3895 for (; key_part < key_part_end; key_part++)
3896 {
3897 if (!key_part->fieldnr)
3898 continue;
3899 Field *field= table->field[key_part->fieldnr - 1];
3900 uint cs_number= field->charset()->number;
3901 if ((mysql_version < 50048 &&
3902 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
3903 cs_number == 41 || /* latin7_general_ci - bug #29461 */
3904 cs_number == 42 || /* latin7_general_cs - bug #29461 */
3905 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
3906 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
3907 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
3908 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
3909 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
3910 (mysql_version < 50124 &&
3911 (cs_number == 33 || /* utf8_general_ci - bug #27877 */
3912 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
3913 return HA_ADMIN_NEEDS_UPGRADE;
3914 }
3915 }
3916 }
3917
3918 return 0;
3919}
3920
3921
3922int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
3923{
3924 int error;
3925 KEY *keyinfo, *keyend;
3926 KEY_PART_INFO *keypart, *keypartend;
3927
3928 if (table->s->incompatible_version)
3929 return HA_ADMIN_NEEDS_ALTER;
3930
3931 if (!table->s->mysql_version)
3932 {
3933 /* check for blob-in-key error */
3934 keyinfo= table->key_info;
3935 keyend= table->key_info + table->s->keys;
3936 for (; keyinfo < keyend; keyinfo++)
3937 {
3938 keypart= keyinfo->key_part;
3939 keypartend= keypart + keyinfo->user_defined_key_parts;
3940 for (; keypart < keypartend; keypart++)
3941 {
3942 if (!keypart->fieldnr)
3943 continue;
3944 Field *field= table->field[keypart->fieldnr-1];
3945 if (field->type() == MYSQL_TYPE_BLOB)
3946 {
3947 if (check_opt->sql_flags & TT_FOR_UPGRADE)
3948 check_opt->flags= T_MEDIUM;
3949 return HA_ADMIN_NEEDS_CHECK;
3950 }
3951 }
3952 }
3953 }
3954 if (table->s->frm_version < FRM_VER_TRUE_VARCHAR)
3955 return HA_ADMIN_NEEDS_ALTER;
3956
3957 if (unlikely((error= check_collation_compatibility())))
3958 return error;
3959
3960 return check_for_upgrade(check_opt);
3961}
3962
3963
3964int handler::check_old_types()
3965{
3966 Field** field;
3967
3968 if (!table->s->mysql_version)
3969 {
3970 /* check for bad DECIMAL field */
3971 for (field= table->field; (*field); field++)
3972 {
3973 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
3974 {
3975 return HA_ADMIN_NEEDS_ALTER;
3976 }
3977 if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
3978 {
3979 return HA_ADMIN_NEEDS_ALTER;
3980 }
3981 }
3982 }
3983 return 0;
3984}
3985
3986
3987static bool update_frm_version(TABLE *table)
3988{
3989 char path[FN_REFLEN];
3990 File file;
3991 int result= 1;
3992 DBUG_ENTER("update_frm_version");
3993
3994 /*
3995 No need to update frm version in case table was created or checked
3996 by server with the same version. This also ensures that we do not
3997 update frm version for temporary tables as this code doesn't support
3998 temporary tables.
3999 */
4000 if (table->s->mysql_version == MYSQL_VERSION_ID)
4001 DBUG_RETURN(0);
4002
4003 strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4004
4005 if ((file= mysql_file_open(key_file_frm,
4006 path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4007 {
4008 uchar version[4];
4009
4010 int4store(version, MYSQL_VERSION_ID);
4011
4012 if ((result= (int)mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4013 goto err;
4014
4015 table->s->mysql_version= MYSQL_VERSION_ID;
4016 }
4017err:
4018 if (file >= 0)
4019 (void) mysql_file_close(file, MYF(MY_WME));
4020 DBUG_RETURN(result);
4021}
4022
4023
4024
4025/**
4026 @return
4027 key if error because of duplicated keys
4028*/
4029uint handler::get_dup_key(int error)
4030{
4031 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4032 m_lock_type != F_UNLCK);
4033 DBUG_ENTER("handler::get_dup_key");
4034 table->file->errkey = (uint) -1;
4035 if (error == HA_ERR_FOUND_DUPP_KEY ||
4036 error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
4037 error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4038 error == HA_ERR_DROP_INDEX_FK)
4039 table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4040 DBUG_RETURN(table->file->errkey);
4041}
4042
4043
4044/**
4045 Delete all files with extension from bas_ext().
4046
4047 @param name Base name of table
4048
4049 @note
4050 We assume that the handler may return more extensions than
4051 was actually used for the file.
4052
4053 @retval
4054 0 If we successfully deleted at least one file from base_ext and
4055 didn't get any other errors than ENOENT
4056 @retval
4057 !0 Error
4058*/
4059int handler::delete_table(const char *name)
4060{
4061 int saved_error= 0;
4062 int error= 0;
4063 int enoent_or_zero;
4064
4065 if (ht->discover_table)
4066 enoent_or_zero= 0; // the table may not exist in the engine, it's ok
4067 else
4068 enoent_or_zero= ENOENT; // the first file of bas_ext() *must* exist
4069
4070 for (const char **ext=bas_ext(); *ext ; ext++)
4071 {
4072 if (mysql_file_delete_with_symlink(key_file_misc, name, *ext, 0))
4073 {
4074 if (my_errno != ENOENT)
4075 {
4076 /*
4077 If error on the first existing file, return the error.
4078 Otherwise delete as much as possible.
4079 */
4080 if (enoent_or_zero)
4081 return my_errno;
4082 saved_error= my_errno;
4083 }
4084 }
4085 else
4086 enoent_or_zero= 0; // No error for ENOENT
4087 error= enoent_or_zero;
4088 }
4089 return saved_error ? saved_error : error;
4090}
4091
4092
4093int handler::rename_table(const char * from, const char * to)
4094{
4095 int error= 0;
4096 const char **ext, **start_ext;
4097 start_ext= bas_ext();
4098 for (ext= start_ext; *ext ; ext++)
4099 {
4100 if (unlikely(rename_file_ext(from, to, *ext)))
4101 {
4102 if ((error=my_errno) != ENOENT)
4103 break;
4104 error= 0;
4105 }
4106 }
4107 if (unlikely(error))
4108 {
4109 /* Try to revert the rename. Ignore errors. */
4110 for (; ext >= start_ext; ext--)
4111 rename_file_ext(to, from, *ext);
4112 }
4113 return error;
4114}
4115
4116
4117void handler::drop_table(const char *name)
4118{
4119 ha_close();
4120 delete_table(name);
4121}
4122
4123
4124/**
4125 Performs checks upon the table.
4126
4127 @param thd thread doing CHECK TABLE operation
4128 @param check_opt options from the parser
4129
4130 @retval
4131 HA_ADMIN_OK Successful upgrade
4132 @retval
4133 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4134 @retval
4135 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4136 @retval
4137 HA_ADMIN_NOT_IMPLEMENTED
4138*/
4139int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4140{
4141 int error;
4142 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4143 m_lock_type != F_UNLCK);
4144
4145 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4146 (check_opt->sql_flags & TT_FOR_UPGRADE))
4147 return 0;
4148
4149 if (table->s->mysql_version < MYSQL_VERSION_ID)
4150 {
4151 if (unlikely((error= check_old_types())))
4152 return error;
4153 error= ha_check_for_upgrade(check_opt);
4154 if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK)))
4155 return error;
4156 if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE)))
4157 return 0;
4158 }
4159 if (unlikely((error= check(thd, check_opt))))
4160 return error;
4161 /* Skip updating frm version if not main handler. */
4162 if (table->file != this)
4163 return error;
4164 return update_frm_version(table);
4165}
4166
4167/**
4168 A helper function to mark a transaction read-write,
4169 if it is started.
4170*/
4171
4172void handler::mark_trx_read_write_internal()
4173{
4174 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4175 /*
4176 When a storage engine method is called, the transaction must
4177 have been started, unless it's a DDL call, for which the
4178 storage engine starts the transaction internally, and commits
4179 it internally, without registering in the ha_list.
4180 Unfortunately here we can't know know for sure if the engine
4181 has registered the transaction or not, so we must check.
4182 */
4183 if (ha_info->is_started())
4184 {
4185 DBUG_ASSERT(has_transaction_manager());
4186 /*
4187 table_share can be NULL in ha_delete_table(). See implementation
4188 of standalone function ha_delete_table() in sql_base.cc.
4189 */
4190 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4191 ha_info->set_trx_read_write();
4192 }
4193}
4194
4195
4196/**
4197 Repair table: public interface.
4198
4199 @sa handler::repair()
4200*/
4201
4202int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4203{
4204 int result;
4205
4206 mark_trx_read_write();
4207
4208 result= repair(thd, check_opt);
4209 DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4210 ha_table_flags() & HA_CAN_REPAIR);
4211
4212 if (result == HA_ADMIN_OK)
4213 result= update_frm_version(table);
4214 return result;
4215}
4216
4217
4218/**
4219 Bulk update row: public interface.
4220
4221 @sa handler::bulk_update_row()
4222*/
4223
4224int
4225handler::ha_bulk_update_row(const uchar *old_data, const uchar *new_data,
4226 ha_rows *dup_key_found)
4227{
4228 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4229 m_lock_type == F_WRLCK);
4230 mark_trx_read_write();
4231
4232 return bulk_update_row(old_data, new_data, dup_key_found);
4233}
4234
4235
4236/**
4237 Delete all rows: public interface.
4238
4239 @sa handler::delete_all_rows()
4240*/
4241
4242int
4243handler::ha_delete_all_rows()
4244{
4245 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4246 m_lock_type == F_WRLCK);
4247 mark_trx_read_write();
4248
4249 return delete_all_rows();
4250}
4251
4252
4253/**
4254 Truncate table: public interface.
4255
4256 @sa handler::truncate()
4257*/
4258
4259int
4260handler::ha_truncate()
4261{
4262 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4263 m_lock_type == F_WRLCK);
4264 mark_trx_read_write();
4265
4266 return truncate();
4267}
4268
4269
4270/**
4271 Reset auto increment: public interface.
4272
4273 @sa handler::reset_auto_increment()
4274*/
4275
4276int
4277handler::ha_reset_auto_increment(ulonglong value)
4278{
4279 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4280 m_lock_type == F_WRLCK);
4281 mark_trx_read_write();
4282
4283 return reset_auto_increment(value);
4284}
4285
4286
4287/**
4288 Optimize table: public interface.
4289
4290 @sa handler::optimize()
4291*/
4292
4293int
4294handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4295{
4296 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4297 m_lock_type == F_WRLCK);
4298 mark_trx_read_write();
4299
4300 return optimize(thd, check_opt);
4301}
4302
4303
4304/**
4305 Analyze table: public interface.
4306
4307 @sa handler::analyze()
4308*/
4309
4310int
4311handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4312{
4313 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4314 m_lock_type != F_UNLCK);
4315 mark_trx_read_write();
4316
4317 return analyze(thd, check_opt);
4318}
4319
4320
4321/**
4322 Check and repair table: public interface.
4323
4324 @sa handler::check_and_repair()
4325*/
4326
4327bool
4328handler::ha_check_and_repair(THD *thd)
4329{
4330 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4331 m_lock_type == F_UNLCK);
4332 mark_trx_read_write();
4333
4334 return check_and_repair(thd);
4335}
4336
4337
4338/**
4339 Disable indexes: public interface.
4340
4341 @sa handler::disable_indexes()
4342*/
4343
4344int
4345handler::ha_disable_indexes(uint mode)
4346{
4347 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4348 m_lock_type != F_UNLCK);
4349 mark_trx_read_write();
4350
4351 return disable_indexes(mode);
4352}
4353
4354
4355/**
4356 Enable indexes: public interface.
4357
4358 @sa handler::enable_indexes()
4359*/
4360
4361int
4362handler::ha_enable_indexes(uint mode)
4363{
4364 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4365 m_lock_type != F_UNLCK);
4366 mark_trx_read_write();
4367
4368 return enable_indexes(mode);
4369}
4370
4371
4372/**
4373 Discard or import tablespace: public interface.
4374
4375 @sa handler::discard_or_import_tablespace()
4376*/
4377
4378int
4379handler::ha_discard_or_import_tablespace(my_bool discard)
4380{
4381 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4382 m_lock_type == F_WRLCK);
4383 mark_trx_read_write();
4384
4385 return discard_or_import_tablespace(discard);
4386}
4387
4388
4389bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4390 Alter_inplace_info *ha_alter_info)
4391{
4392 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4393 m_lock_type != F_UNLCK);
4394 mark_trx_read_write();
4395
4396 return prepare_inplace_alter_table(altered_table, ha_alter_info);
4397}
4398
4399
4400bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4401 Alter_inplace_info *ha_alter_info,
4402 bool commit)
4403{
4404 /*
4405 At this point we should have an exclusive metadata lock on the table.
4406 The exception is if we're about to roll back changes (commit= false).
4407 In this case, we might be rolling back after a failed lock upgrade,
4408 so we could be holding the same lock level as for inplace_alter_table().
4409 */
4410 DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4411 table->s->db.str,
4412 table->s->table_name.str,
4413 MDL_EXCLUSIVE) ||
4414 !commit);
4415
4416 return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4417}
4418
4419
4420/*
4421 Default implementation to support in-place alter table
4422 and old online add/drop index API
4423*/
4424
4425enum_alter_inplace_result
4426handler::check_if_supported_inplace_alter(TABLE *altered_table,
4427 Alter_inplace_info *ha_alter_info)
4428{
4429 DBUG_ENTER("handler::check_if_supported_inplace_alter");
4430
4431 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4432
4433 if (altered_table->versioned(VERS_TIMESTAMP))
4434 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4435
4436 alter_table_operations inplace_offline_operations=
4437 ALTER_COLUMN_EQUAL_PACK_LENGTH |
4438 ALTER_COLUMN_NAME |
4439 ALTER_RENAME_COLUMN |
4440 ALTER_CHANGE_COLUMN_DEFAULT |
4441 ALTER_COLUMN_DEFAULT |
4442 ALTER_COLUMN_OPTION |
4443 ALTER_CHANGE_CREATE_OPTION |
4444 ALTER_DROP_CHECK_CONSTRAINT |
4445 ALTER_PARTITIONED |
4446 ALTER_VIRTUAL_GCOL_EXPR |
4447 ALTER_RENAME;
4448
4449 /* Is there at least one operation that requires copy algorithm? */
4450 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4451 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4452
4453 /*
4454 The following checks for changes related to ALTER_OPTIONS
4455
4456 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4457 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4458 change column charsets and so not supported in-place through
4459 old API.
4460
4461 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4462 not supported as in-place operations in old API either.
4463 */
4464 if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4465 HA_CREATE_USED_DEFAULT_CHARSET |
4466 HA_CREATE_USED_PACK_KEYS |
4467 HA_CREATE_USED_CHECKSUM |
4468 HA_CREATE_USED_MAX_ROWS) ||
4469 (table->s->row_type != create_info->row_type))
4470 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4471
4472 uint table_changes= (ha_alter_info->handler_flags &
4473 ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4474 IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4475 if (table->file->check_if_incompatible_data(create_info, table_changes)
4476 == COMPATIBLE_DATA_YES)
4477 DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
4478
4479 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4480}
4481
4482void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4483 const char *try_instead) const
4484{
4485 if (unsupported_reason == NULL)
4486 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4487 not_supported, try_instead);
4488 else
4489 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4490 not_supported, unsupported_reason, try_instead);
4491}
4492
4493
4494/**
4495 Rename table: public interface.
4496
4497 @sa handler::rename_table()
4498*/
4499
4500int
4501handler::ha_rename_table(const char *from, const char *to)
4502{
4503 DBUG_ASSERT(m_lock_type == F_UNLCK);
4504 mark_trx_read_write();
4505
4506 return rename_table(from, to);
4507}
4508
4509
4510/**
4511 Delete table: public interface.
4512
4513 @sa handler::delete_table()
4514*/
4515
4516int
4517handler::ha_delete_table(const char *name)
4518{
4519 mark_trx_read_write();
4520 return delete_table(name);
4521}
4522
4523
4524/**
4525 Drop table in the engine: public interface.
4526
4527 @sa handler::drop_table()
4528
4529 The difference between this and delete_table() is that the table is open in
4530 drop_table().
4531*/
4532
4533void
4534handler::ha_drop_table(const char *name)
4535{
4536 DBUG_ASSERT(m_lock_type == F_UNLCK);
4537 mark_trx_read_write();
4538
4539 return drop_table(name);
4540}
4541
4542
4543/**
4544 Create a table in the engine: public interface.
4545
4546 @sa handler::create()
4547*/
4548
4549int
4550handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg)
4551{
4552 DBUG_ASSERT(m_lock_type == F_UNLCK);
4553 mark_trx_read_write();
4554 int error= create(name, form, info_arg);
4555 if (!error &&
4556 !(info_arg->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER)))
4557 mysql_audit_create_table(form);
4558 return error;
4559}
4560
4561
4562/**
4563 Create handler files for CREATE TABLE: public interface.
4564
4565 @sa handler::create_partitioning_metadata()
4566*/
4567
4568int
4569handler::ha_create_partitioning_metadata(const char *name,
4570 const char *old_name,
4571 int action_flag)
4572{
4573 /*
4574 Normally this is done when unlocked, but in fast_alter_partition_table,
4575 it is done on an already locked handler when preparing to alter/rename
4576 partitions.
4577 */
4578 DBUG_ASSERT(m_lock_type == F_UNLCK ||
4579 (!old_name && strcmp(name, table_share->path.str)));
4580
4581 return create_partitioning_metadata(name, old_name, action_flag);
4582}
4583
4584
4585/**
4586 Change partitions: public interface.
4587
4588 @sa handler::change_partitions()
4589*/
4590
4591int
4592handler::ha_change_partitions(HA_CREATE_INFO *create_info,
4593 const char *path,
4594 ulonglong * const copied,
4595 ulonglong * const deleted,
4596 const uchar *pack_frm_data,
4597 size_t pack_frm_len)
4598{
4599 /*
4600 Must have at least RDLCK or be a TMP table. Read lock is needed to read
4601 from current partitions and write lock will be taken on new partitions.
4602 */
4603 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4604 m_lock_type != F_UNLCK);
4605
4606 mark_trx_read_write();
4607
4608 return change_partitions(create_info, path, copied, deleted,
4609 pack_frm_data, pack_frm_len);
4610}
4611
4612
4613/**
4614 Drop partitions: public interface.
4615
4616 @sa handler::drop_partitions()
4617*/
4618
4619int
4620handler::ha_drop_partitions(const char *path)
4621{
4622 DBUG_ASSERT(!table->db_stat);
4623
4624 mark_trx_read_write();
4625
4626 return drop_partitions(path);
4627}
4628
4629
4630/**
4631 Rename partitions: public interface.
4632
4633 @sa handler::rename_partitions()
4634*/
4635
4636int
4637handler::ha_rename_partitions(const char *path)
4638{
4639 DBUG_ASSERT(!table->db_stat);
4640
4641 mark_trx_read_write();
4642
4643 return rename_partitions(path);
4644}
4645
4646
4647/**
4648 Tell the storage engine that it is allowed to "disable transaction" in the
4649 handler. It is a hint that ACID is not required - it was used in NDB for
4650 ALTER TABLE, for example, when data are copied to temporary table.
4651 A storage engine may treat this hint any way it likes. NDB for example
4652 started to commit every now and then automatically.
4653 This hint can be safely ignored.
4654*/
4655int ha_enable_transaction(THD *thd, bool on)
4656{
4657 int error=0;
4658 DBUG_ENTER("ha_enable_transaction");
4659 DBUG_PRINT("enter", ("on: %d", (int) on));
4660
4661 if ((thd->transaction.on= on))
4662 {
4663 /*
4664 Now all storage engines should have transaction handling enabled.
4665 But some may have it enabled all the time - "disabling" transactions
4666 is an optimization hint that storage engine is free to ignore.
4667 So, let's commit an open transaction (if any) now.
4668 */
4669 if (likely(!(error= ha_commit_trans(thd, 0))))
4670 error= trans_commit_implicit(thd);
4671 }
4672 DBUG_RETURN(error);
4673}
4674
4675int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
4676{
4677 int error;
4678 DBUG_ENTER("handler::index_next_same");
4679 if (!(error=index_next(buf)))
4680 {
4681 my_ptrdiff_t ptrdiff= buf - table->record[0];
4682 uchar *UNINIT_VAR(save_record_0);
4683 KEY *UNINIT_VAR(key_info);
4684 KEY_PART_INFO *UNINIT_VAR(key_part);
4685 KEY_PART_INFO *UNINIT_VAR(key_part_end);
4686
4687 /*
4688 key_cmp_if_same() compares table->record[0] against 'key'.
4689 In parts it uses table->record[0] directly, in parts it uses
4690 field objects with their local pointers into table->record[0].
4691 If 'buf' is distinct from table->record[0], we need to move
4692 all record references. This is table->record[0] itself and
4693 the field pointers of the fields used in this key.
4694 */
4695 if (ptrdiff)
4696 {
4697 save_record_0= table->record[0];
4698 table->record[0]= buf;
4699 key_info= table->key_info + active_index;
4700 key_part= key_info->key_part;
4701 key_part_end= key_part + key_info->user_defined_key_parts;
4702 for (; key_part < key_part_end; key_part++)
4703 {
4704 DBUG_ASSERT(key_part->field);
4705 key_part->field->move_field_offset(ptrdiff);
4706 }
4707 }
4708
4709 if (key_cmp_if_same(table, key, active_index, keylen))
4710 {
4711 table->status=STATUS_NOT_FOUND;
4712 error=HA_ERR_END_OF_FILE;
4713 }
4714
4715 /* Move back if necessary. */
4716 if (ptrdiff)
4717 {
4718 table->record[0]= save_record_0;
4719 for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
4720 key_part->field->move_field_offset(-ptrdiff);
4721 }
4722 }
4723 DBUG_PRINT("return",("%i", error));
4724 DBUG_RETURN(error);
4725}
4726
4727
4728void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
4729 uint part_id)
4730{
4731 info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
4732 HA_STATUS_NO_LOCK);
4733 stat_info->records= stats.records;
4734 stat_info->mean_rec_length= stats.mean_rec_length;
4735 stat_info->data_file_length= stats.data_file_length;
4736 stat_info->max_data_file_length= stats.max_data_file_length;
4737 stat_info->index_file_length= stats.index_file_length;
4738 stat_info->max_index_file_length=stats.max_index_file_length;
4739 stat_info->delete_length= stats.delete_length;
4740 stat_info->create_time= stats.create_time;
4741 stat_info->update_time= stats.update_time;
4742 stat_info->check_time= stats.check_time;
4743 stat_info->check_sum= 0;
4744 if (table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
4745 stat_info->check_sum= checksum();
4746 return;
4747}
4748
4749
4750/*
4751 Updates the global table stats with the TABLE this handler represents
4752*/
4753
4754void handler::update_global_table_stats()
4755{
4756 TABLE_STATS * table_stats;
4757
4758 status_var_add(table->in_use->status_var.rows_read, rows_read);
4759 DBUG_ASSERT(rows_tmp_read == 0);
4760
4761 if (!table->in_use->userstat_running)
4762 {
4763 rows_read= rows_changed= 0;
4764 return;
4765 }
4766
4767 if (rows_read + rows_changed == 0)
4768 return; // Nothing to update.
4769
4770 DBUG_ASSERT(table->s);
4771 DBUG_ASSERT(table->s->table_cache_key.str);
4772
4773 mysql_mutex_lock(&LOCK_global_table_stats);
4774 /* Gets the global table stats, creating one if necessary. */
4775 if (!(table_stats= (TABLE_STATS*)
4776 my_hash_search(&global_table_stats,
4777 (uchar*) table->s->table_cache_key.str,
4778 table->s->table_cache_key.length)))
4779 {
4780 if (!(table_stats = ((TABLE_STATS*)
4781 my_malloc(sizeof(TABLE_STATS),
4782 MYF(MY_WME | MY_ZEROFILL)))))
4783 {
4784 /* Out of memory error already given */
4785 goto end;
4786 }
4787 memcpy(table_stats->table, table->s->table_cache_key.str,
4788 table->s->table_cache_key.length);
4789 table_stats->table_name_length= (uint)table->s->table_cache_key.length;
4790 table_stats->engine_type= ht->db_type;
4791 /* No need to set variables to 0, as we use MY_ZEROFILL above */
4792
4793 if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
4794 {
4795 /* Out of memory error is already given */
4796 my_free(table_stats);
4797 goto end;
4798 }
4799 }
4800 // Updates the global table stats.
4801 table_stats->rows_read+= rows_read;
4802 table_stats->rows_changed+= rows_changed;
4803 table_stats->rows_changed_x_indexes+= (rows_changed *
4804 (table->s->keys ? table->s->keys :
4805 1));
4806 rows_read= rows_changed= 0;
4807end:
4808 mysql_mutex_unlock(&LOCK_global_table_stats);
4809}
4810
4811
4812/*
4813 Updates the global index stats with this handler's accumulated index reads.
4814*/
4815
4816void handler::update_global_index_stats()
4817{
4818 DBUG_ASSERT(table->s);
4819
4820 if (!table->in_use->userstat_running)
4821 {
4822 /* Reset all index read values */
4823 bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
4824 return;
4825 }
4826
4827 for (uint index = 0; index < table->s->keys; index++)
4828 {
4829 if (index_rows_read[index])
4830 {
4831 INDEX_STATS* index_stats;
4832 size_t key_length;
4833 KEY *key_info = &table->key_info[index]; // Rows were read using this
4834
4835 DBUG_ASSERT(key_info->cache_name);
4836 if (!key_info->cache_name)
4837 continue;
4838 key_length= table->s->table_cache_key.length + key_info->name.length + 1;
4839 mysql_mutex_lock(&LOCK_global_index_stats);
4840 // Gets the global index stats, creating one if necessary.
4841 if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
4842 key_info->cache_name,
4843 key_length)))
4844 {
4845 if (!(index_stats = ((INDEX_STATS*)
4846 my_malloc(sizeof(INDEX_STATS),
4847 MYF(MY_WME | MY_ZEROFILL)))))
4848 goto end; // Error is already given
4849
4850 memcpy(index_stats->index, key_info->cache_name, key_length);
4851 index_stats->index_name_length= key_length;
4852 if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
4853 {
4854 my_free(index_stats);
4855 goto end;
4856 }
4857 }
4858 /* Updates the global index stats. */
4859 index_stats->rows_read+= index_rows_read[index];
4860 index_rows_read[index]= 0;
4861end:
4862 mysql_mutex_unlock(&LOCK_global_index_stats);
4863 }
4864 }
4865}
4866
4867
4868/****************************************************************************
4869** Some general functions that isn't in the handler class
4870****************************************************************************/
4871
4872/**
4873 Initiates table-file and calls appropriate database-creator.
4874
4875 @retval
4876 0 ok
4877 @retval
4878 1 error
4879*/
4880int ha_create_table(THD *thd, const char *path,
4881 const char *db, const char *table_name,
4882 HA_CREATE_INFO *create_info, LEX_CUSTRING *frm)
4883{
4884 int error= 1;
4885 TABLE table;
4886 char name_buff[FN_REFLEN];
4887 const char *name;
4888 TABLE_SHARE share;
4889 bool temp_table __attribute__((unused)) =
4890 create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER);
4891 DBUG_ENTER("ha_create_table");
4892
4893 init_tmp_table_share(thd, &share, db, 0, table_name, path);
4894
4895 if (frm)
4896 {
4897 bool write_frm_now= !create_info->db_type->discover_table &&
4898 !create_info->tmp_table();
4899
4900 share.frm_image= frm;
4901
4902 // open an frm image
4903 if (share.init_from_binary_frm_image(thd, write_frm_now,
4904 frm->str, frm->length))
4905 goto err;
4906 }
4907 else
4908 {
4909 // open an frm file
4910 share.db_plugin= ha_lock_engine(thd, create_info->db_type);
4911
4912 if (open_table_def(thd, &share))
4913 goto err;
4914 }
4915
4916 share.m_psi= PSI_CALL_get_table_share(temp_table, &share);
4917
4918 if (open_table_from_share(thd, &share, &empty_clex_str, 0, READ_ALL, 0,
4919 &table, true))
4920 goto err;
4921
4922 update_create_info_from_table(create_info, &table);
4923
4924 name= get_canonical_filename(table.file, share.path.str, name_buff);
4925
4926 error= table.file->ha_create(name, &table, create_info);
4927
4928 if (unlikely(error))
4929 {
4930 if (!thd->is_error())
4931 my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table_name, error);
4932 table.file->print_error(error, MYF(ME_JUST_WARNING));
4933 PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length,
4934 share.table_name.str, (uint)share.table_name.length);
4935 }
4936
4937 (void) closefrm(&table);
4938
4939err:
4940 free_table_share(&share);
4941 DBUG_RETURN(error != 0);
4942}
4943
4944void st_ha_check_opt::init()
4945{
4946 flags= sql_flags= 0;
4947 start_time= my_time(0);
4948}
4949
4950
4951/*****************************************************************************
4952 Key cache handling.
4953
4954 This code is only relevant for ISAM/MyISAM tables
4955
4956 key_cache->cache may be 0 only in the case where a key cache is not
4957 initialized or when we where not able to init the key cache in a previous
4958 call to ha_init_key_cache() (probably out of memory)
4959*****************************************************************************/
4960
4961/**
4962 Init a key cache if it has not been initied before.
4963*/
4964int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused
4965 __attribute__((unused)))
4966{
4967 DBUG_ENTER("ha_init_key_cache");
4968
4969 if (!key_cache->key_cache_inited)
4970 {
4971 mysql_mutex_lock(&LOCK_global_system_variables);
4972 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
4973 uint tmp_block_size= (uint) key_cache->param_block_size;
4974 uint division_limit= (uint)key_cache->param_division_limit;
4975 uint age_threshold= (uint)key_cache->param_age_threshold;
4976 uint partitions= (uint)key_cache->param_partitions;
4977 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
4978 mysql_mutex_unlock(&LOCK_global_system_variables);
4979 DBUG_RETURN(!init_key_cache(key_cache,
4980 tmp_block_size,
4981 tmp_buff_size,
4982 division_limit, age_threshold,
4983 changed_blocks_hash_size,
4984 partitions));
4985 }
4986 DBUG_RETURN(0);
4987}
4988
4989
4990/**
4991 Resize key cache.
4992*/
4993int ha_resize_key_cache(KEY_CACHE *key_cache)
4994{
4995 DBUG_ENTER("ha_resize_key_cache");
4996
4997 if (key_cache->key_cache_inited)
4998 {
4999 mysql_mutex_lock(&LOCK_global_system_variables);
5000 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5001 long tmp_block_size= (long) key_cache->param_block_size;
5002 uint division_limit= (uint)key_cache->param_division_limit;
5003 uint age_threshold= (uint)key_cache->param_age_threshold;
5004 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5005 mysql_mutex_unlock(&LOCK_global_system_variables);
5006 DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5007 tmp_buff_size,
5008 division_limit, age_threshold,
5009 changed_blocks_hash_size));
5010 }
5011 DBUG_RETURN(0);
5012}
5013
5014
5015/**
5016 Change parameters for key cache (like division_limit)
5017*/
5018int ha_change_key_cache_param(KEY_CACHE *key_cache)
5019{
5020 DBUG_ENTER("ha_change_key_cache_param");
5021
5022 if (key_cache->key_cache_inited)
5023 {
5024 mysql_mutex_lock(&LOCK_global_system_variables);
5025 uint division_limit= (uint)key_cache->param_division_limit;
5026 uint age_threshold= (uint)key_cache->param_age_threshold;
5027 mysql_mutex_unlock(&LOCK_global_system_variables);
5028 change_key_cache_param(key_cache, division_limit, age_threshold);
5029 }
5030 DBUG_RETURN(0);
5031}
5032
5033
5034/**
5035 Repartition key cache
5036*/
5037int ha_repartition_key_cache(KEY_CACHE *key_cache)
5038{
5039 DBUG_ENTER("ha_repartition_key_cache");
5040
5041 if (key_cache->key_cache_inited)
5042 {
5043 mysql_mutex_lock(&LOCK_global_system_variables);
5044 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5045 long tmp_block_size= (long) key_cache->param_block_size;
5046 uint division_limit= (uint)key_cache->param_division_limit;
5047 uint age_threshold= (uint)key_cache->param_age_threshold;
5048 uint partitions= (uint)key_cache->param_partitions;
5049 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5050 mysql_mutex_unlock(&LOCK_global_system_variables);
5051 DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
5052 tmp_buff_size,
5053 division_limit, age_threshold,
5054 changed_blocks_hash_size,
5055 partitions));
5056 }
5057 DBUG_RETURN(0);
5058}
5059
5060
5061/**
5062 Move all tables from one key cache to another one.
5063*/
5064int ha_change_key_cache(KEY_CACHE *old_key_cache,
5065 KEY_CACHE *new_key_cache)
5066{
5067 mi_change_key_cache(old_key_cache, new_key_cache);
5068 return 0;
5069}
5070
5071
5072static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5073 void *arg)
5074{
5075 TABLE_SHARE *share= (TABLE_SHARE *)arg;
5076 handlerton *hton= plugin_hton(plugin);
5077 if (hton->state == SHOW_OPTION_YES && hton->discover_table)
5078 {
5079 share->db_plugin= plugin;
5080 int error= hton->discover_table(hton, thd, share);
5081 if (error != HA_ERR_NO_SUCH_TABLE)
5082 {
5083 if (unlikely(error))
5084 {
5085 if (!share->error)
5086 {
5087 share->error= OPEN_FRM_ERROR_ALREADY_ISSUED;
5088 plugin_unlock(0, share->db_plugin);
5089 }
5090
5091 /*
5092 report an error, unless it is "generic" and a more
5093 specific one was already reported
5094 */
5095 if (error != HA_ERR_GENERIC || !thd->is_error())
5096 my_error(ER_GET_ERRNO, MYF(0), error, plugin_name(plugin)->str);
5097 share->db_plugin= 0;
5098 }
5099 else
5100 share->error= OPEN_FRM_OK;
5101
5102 status_var_increment(thd->status_var.ha_discover_count);
5103 return TRUE; // abort the search
5104 }
5105 share->db_plugin= 0;
5106 }
5107
5108 DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);
5109 return FALSE; // continue with the next engine
5110}
5111
5112int ha_discover_table(THD *thd, TABLE_SHARE *share)
5113{
5114 DBUG_ENTER("ha_discover_table");
5115 int found;
5116
5117 DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR); // share is not OK yet
5118
5119 if (!engines_with_discover)
5120 found= FALSE;
5121 else if (share->db_plugin)
5122 found= discover_handlerton(thd, share->db_plugin, share);
5123 else
5124 found= plugin_foreach(thd, discover_handlerton,
5125 MYSQL_STORAGE_ENGINE_PLUGIN, share);
5126
5127 if (!found)
5128 open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found
5129
5130 DBUG_RETURN(share->error != OPEN_FRM_OK);
5131}
5132
5133static my_bool file_ext_exists(char *path, size_t path_len, const char *ext)
5134{
5135 strmake(path + path_len, ext, FN_REFLEN - path_len);
5136 return !access(path, F_OK);
5137}
5138
5139struct st_discover_existence_args
5140{
5141 char *path;
5142 size_t path_len;
5143 const char *db, *table_name;
5144 handlerton *hton;
5145 bool frm_exists;
5146};
5147
5148static my_bool discover_existence(THD *thd, plugin_ref plugin,
5149 void *arg)
5150{
5151 st_discover_existence_args *args= (st_discover_existence_args*)arg;
5152 handlerton *ht= plugin_hton(plugin);
5153 if (ht->state != SHOW_OPTION_YES || !ht->discover_table_existence)
5154 return args->frm_exists;
5155
5156 args->hton= ht;
5157
5158 if (ht->discover_table_existence == ext_based_existence)
5159 return file_ext_exists(args->path, args->path_len,
5160 ht->tablefile_extensions[0]);
5161
5162 return ht->discover_table_existence(ht, args->db, args->table_name);
5163}
5164
5165class Table_exists_error_handler : public Internal_error_handler
5166{
5167public:
5168 Table_exists_error_handler()
5169 : m_handled_errors(0), m_unhandled_errors(0)
5170 {}
5171
5172 bool handle_condition(THD *thd,
5173 uint sql_errno,
5174 const char* sqlstate,
5175 Sql_condition::enum_warning_level *level,
5176 const char* msg,
5177 Sql_condition ** cond_hdl)
5178 {
5179 *cond_hdl= NULL;
5180 if (sql_errno == ER_NO_SUCH_TABLE ||
5181 sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE ||
5182 sql_errno == ER_WRONG_OBJECT)
5183 {
5184 m_handled_errors++;
5185 return TRUE;
5186 }
5187
5188 if (*level == Sql_condition::WARN_LEVEL_ERROR)
5189 m_unhandled_errors++;
5190 return FALSE;
5191 }
5192
5193 bool safely_trapped_errors()
5194 {
5195 return ((m_handled_errors > 0) && (m_unhandled_errors == 0));
5196 }
5197
5198private:
5199 int m_handled_errors;
5200 int m_unhandled_errors;
5201};
5202
5203/**
5204 Check if a given table exists, without doing a full discover, if possible
5205
5206 If the 'hton' is not NULL, it's set to the handlerton of the storage engine
5207 of this table, or to view_pseudo_hton if the frm belongs to a view.
5208
5209 This function takes discovery correctly into account. If frm is found,
5210 it discovers the table to make sure it really exists in the engine.
5211 If no frm is found it discovers the table, in case it still exists in
5212 the engine.
5213
5214 While it tries to cut corners (don't open .frm if no discovering engine is
5215 enabled, no full discovery if all discovering engines support
5216 discover_table_existence, etc), it still *may* be quite expensive
5217 and must be used sparingly.
5218
5219 @retval true Table exists (even if the error occurred, like bad frm)
5220 @retval false Table does not exist (one can do CREATE TABLE table_name)
5221
5222 @note if frm exists and the table in engine doesn't, *hton will be set,
5223 but the return value will be false.
5224
5225 @note if frm file exists, but the table cannot be opened (engine not
5226 loaded, frm is invalid), the return value will be true, but
5227 *hton will be NULL.
5228*/
5229
5230bool ha_table_exists(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table_name,
5231 handlerton **hton, bool *is_sequence)
5232{
5233 handlerton *dummy;
5234 bool dummy2;
5235 DBUG_ENTER("ha_table_exists");
5236
5237 if (hton)
5238 *hton= 0;
5239 else if (engines_with_discover)
5240 hton= &dummy;
5241 if (!is_sequence)
5242 is_sequence= &dummy2;
5243 *is_sequence= 0;
5244
5245 TDC_element *element= tdc_lock_share(thd, db->str, table_name->str);
5246 if (element && element != MY_ERRPTR)
5247 {
5248 if (hton)
5249 *hton= element->share->db_type();
5250 *is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE;
5251 tdc_unlock_share(element);
5252 DBUG_RETURN(TRUE);
5253 }
5254
5255 char path[FN_REFLEN + 1];
5256 size_t path_len = build_table_filename(path, sizeof(path) - 1,
5257 db->str, table_name->str, "", 0);
5258 st_discover_existence_args args= {path, path_len, db->str, table_name->str, 0, true};
5259
5260 if (file_ext_exists(path, path_len, reg_ext))
5261 {
5262 bool exists= true;
5263 if (hton)
5264 {
5265 char engine_buf[NAME_CHAR_LEN + 1];
5266 LEX_CSTRING engine= { engine_buf, 0 };
5267 Table_type type;
5268
5269 if ((type= dd_frm_type(thd, path, &engine, is_sequence)) ==
5270 TABLE_TYPE_UNKNOWN)
5271 DBUG_RETURN(0);
5272
5273 if (type != TABLE_TYPE_VIEW)
5274 {
5275 plugin_ref p= plugin_lock_by_name(thd, &engine,
5276 MYSQL_STORAGE_ENGINE_PLUGIN);
5277 *hton= p ? plugin_hton(p) : NULL;
5278 if (*hton)
5279 // verify that the table really exists
5280 exists= discover_existence(thd, p, &args);
5281 }
5282 else
5283 *hton= view_pseudo_hton;
5284 }
5285 DBUG_RETURN(exists);
5286 }
5287
5288 args.frm_exists= false;
5289 if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN,
5290 &args))
5291 {
5292 if (hton)
5293 *hton= args.hton;
5294 DBUG_RETURN(TRUE);
5295 }
5296
5297 if (need_full_discover_for_existence)
5298 {
5299 TABLE_LIST table;
5300 uint flags = GTS_TABLE | GTS_VIEW;
5301 if (!hton)
5302 flags|= GTS_NOLOCK;
5303
5304 Table_exists_error_handler no_such_table_handler;
5305 thd->push_internal_handler(&no_such_table_handler);
5306 table.init_one_table(db, table_name, 0, TL_READ);
5307 TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags);
5308 thd->pop_internal_handler();
5309
5310 if (hton && share)
5311 {
5312 *hton= share->db_type();
5313 tdc_release_share(share);
5314 }
5315
5316 // the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else
5317 DBUG_RETURN(!no_such_table_handler.safely_trapped_errors());
5318 }
5319
5320 DBUG_RETURN(FALSE);
5321}
5322
5323/**
5324 Discover all table names in a given database
5325*/
5326extern "C" {
5327
5328static int cmp_file_names(const void *a, const void *b)
5329{
5330 CHARSET_INFO *cs= character_set_filesystem;
5331 char *aa= ((FILEINFO *)a)->name;
5332 char *bb= ((FILEINFO *)b)->name;
5333 return my_strnncoll(cs, (uchar*)aa, strlen(aa), (uchar*)bb, strlen(bb));
5334}
5335
5336static int cmp_table_names(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5337{
5338 return my_strnncoll(&my_charset_bin, (uchar*)((*a)->str), (*a)->length,
5339 (uchar*)((*b)->str), (*b)->length);
5340}
5341
5342#ifndef DBUG_OFF
5343static int cmp_table_names_desc(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5344{
5345 return -cmp_table_names(a, b);
5346}
5347#endif
5348
5349}
5350
5351Discovered_table_list::Discovered_table_list(THD *thd_arg,
5352 Dynamic_array<LEX_CSTRING*> *tables_arg,
5353 const LEX_CSTRING *wild_arg) :
5354 thd(thd_arg), with_temps(false), tables(tables_arg)
5355{
5356 if (wild_arg->str && wild_arg->str[0])
5357 {
5358 wild= wild_arg->str;
5359 wend= wild + wild_arg->length;
5360 }
5361 else
5362 wild= 0;
5363}
5364
5365bool Discovered_table_list::add_table(const char *tname, size_t tlen)
5366{
5367 /*
5368 TODO Check with_temps and filter out temp tables.
5369 Implement the check, when we'll have at least one affected engine (with
5370 custom discover_table_names() method, that calls add_table() directly).
5371 Note: avoid comparing the same name twice (here and in add_file).
5372 */
5373 if (wild && my_wildcmp(table_alias_charset, tname, tname + tlen, wild, wend,
5374 wild_prefix, wild_one, wild_many))
5375 return 0;
5376
5377 LEX_CSTRING *name= thd->make_clex_string(tname, tlen);
5378 if (!name || tables->append(name))
5379 return 1;
5380 return 0;
5381}
5382
5383bool Discovered_table_list::add_file(const char *fname)
5384{
5385 bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == 0;
5386
5387 if (is_temp && !with_temps)
5388 return 0;
5389
5390 char tname[SAFE_NAME_LEN + 1];
5391 size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp);
5392 return add_table(tname, tlen);
5393}
5394
5395
5396void Discovered_table_list::sort()
5397{
5398 tables->sort(cmp_table_names);
5399}
5400
5401
5402#ifndef DBUG_OFF
5403void Discovered_table_list::sort_desc()
5404{
5405 tables->sort(cmp_table_names_desc);
5406}
5407#endif
5408
5409
5410void Discovered_table_list::remove_duplicates()
5411{
5412 LEX_CSTRING **src= tables->front();
5413 LEX_CSTRING **dst= src;
5414 sort();
5415 while (++dst <= tables->back())
5416 {
5417 LEX_CSTRING *s= *src, *d= *dst;
5418 DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= 0);
5419 if ((s->length != d->length || strncmp(s->str, d->str, d->length)))
5420 {
5421 src++;
5422 if (src != dst)
5423 *src= *dst;
5424 }
5425 }
5426 tables->elements(src - tables->front() + 1);
5427}
5428
5429struct st_discover_names_args
5430{
5431 LEX_CSTRING *db;
5432 MY_DIR *dirp;
5433 Discovered_table_list *result;
5434 uint possible_duplicates;
5435};
5436
5437static my_bool discover_names(THD *thd, plugin_ref plugin,
5438 void *arg)
5439{
5440 st_discover_names_args *args= (st_discover_names_args *)arg;
5441 handlerton *ht= plugin_hton(plugin);
5442
5443 if (ht->state == SHOW_OPTION_YES && ht->discover_table_names)
5444 {
5445 size_t old_elements= args->result->tables->elements();
5446 if (ht->discover_table_names(ht, args->db, args->dirp, args->result))
5447 return 1;
5448
5449 /*
5450 hton_ext_based_table_discovery never discovers a table that has
5451 a corresponding .frm file; but custom engine discover methods might
5452 */
5453 if (ht->discover_table_names != hton_ext_based_table_discovery)
5454 args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements);
5455 }
5456
5457 return 0;
5458}
5459
5460/**
5461 Return the list of tables
5462
5463 @param thd
5464 @param db database to look into
5465 @param dirp list of files in this database (as returned by my_dir())
5466 @param result the object to return the list of files in
5467 @param reusable if true, on return, 'dirp' will be a valid list of all
5468 non-table files. If false, discovery will work much faster,
5469 but it will leave 'dirp' corrupted and completely unusable,
5470 only good for my_dirend().
5471
5472 Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true
5473 for DROP DATABASE (as it needs to know and delete non-table files).
5474*/
5475
5476int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp,
5477 Discovered_table_list *result, bool reusable)
5478{
5479 int error;
5480 DBUG_ENTER("ha_discover_table_names");
5481
5482 if (engines_with_discover_file_names == 0 && !reusable)
5483 {
5484 st_discover_names_args args= {db, NULL, result, 0};
5485 error= ext_table_discovery_simple(dirp, result) ||
5486 plugin_foreach(thd, discover_names,
5487 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5488 }
5489 else
5490 {
5491 st_discover_names_args args= {db, dirp, result, 0};
5492
5493 /* extension_based_table_discovery relies on dirp being sorted */
5494 my_qsort(dirp->dir_entry, dirp->number_of_files,
5495 sizeof(FILEINFO), cmp_file_names);
5496
5497 error= extension_based_table_discovery(dirp, reg_ext, result) ||
5498 plugin_foreach(thd, discover_names,
5499 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5500 if (args.possible_duplicates > 0)
5501 result->remove_duplicates();
5502 }
5503
5504 DBUG_RETURN(error);
5505}
5506
5507
5508/*
5509int handler::pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
5510 KEY_MULTI_RANGE *ranges,
5511 uint range_count,
5512 bool sorted, HANDLER_BUFFER *buffer,
5513 bool use_parallel)
5514{
5515 int result;
5516 DBUG_ENTER("handler::pre_read_multi_range_first");
5517 result = pre_read_range_first(ranges->start_key.keypart_map ?
5518 &ranges->start_key : 0,
5519 ranges->end_key.keypart_map ?
5520 &ranges->end_key : 0,
5521 test(ranges->range_flag & EQ_RANGE),
5522 sorted,
5523 use_parallel);
5524 DBUG_RETURN(result);
5525}
5526*/
5527
5528
5529/**
5530 Read first row between two ranges.
5531 Store ranges for future calls to read_range_next.
5532
5533 @param start_key Start key. Is 0 if no min range
5534 @param end_key End key. Is 0 if no max range
5535 @param eq_range_arg Set to 1 if start_key == end_key
5536 @param sorted Set to 1 if result should be sorted per key
5537
5538 @note
5539 Record is read into table->record[0]
5540
5541 @retval
5542 0 Found row
5543 @retval
5544 HA_ERR_END_OF_FILE No rows in range
5545 @retval
5546 \# Error code
5547*/
5548int handler::read_range_first(const key_range *start_key,
5549 const key_range *end_key,
5550 bool eq_range_arg, bool sorted)
5551{
5552 int result;
5553 DBUG_ENTER("handler::read_range_first");
5554
5555 eq_range= eq_range_arg;
5556 set_end_range(end_key);
5557 range_key_part= table->key_info[active_index].key_part;
5558
5559 if (!start_key) // Read first record
5560 result= ha_index_first(table->record[0]);
5561 else
5562 result= ha_index_read_map(table->record[0],
5563 start_key->key,
5564 start_key->keypart_map,
5565 start_key->flag);
5566 if (result)
5567 DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
5568 ? HA_ERR_END_OF_FILE
5569 : result);
5570
5571 if (compare_key(end_range) <= 0)
5572 {
5573 DBUG_RETURN(0);
5574 }
5575 else
5576 {
5577 /*
5578 The last read row does not fall in the range. So request
5579 storage engine to release row lock if possible.
5580 */
5581 unlock_row();
5582 DBUG_RETURN(HA_ERR_END_OF_FILE);
5583 }
5584}
5585
5586
5587/**
5588 Read next row between two ranges.
5589
5590 @note
5591 Record is read into table->record[0]
5592
5593 @retval
5594 0 Found row
5595 @retval
5596 HA_ERR_END_OF_FILE No rows in range
5597 @retval
5598 \# Error code
5599*/
5600int handler::read_range_next()
5601{
5602 int result;
5603 DBUG_ENTER("handler::read_range_next");
5604
5605 if (eq_range)
5606 {
5607 /* We trust that index_next_same always gives a row in range */
5608 DBUG_RETURN(ha_index_next_same(table->record[0],
5609 end_range->key,
5610 end_range->length));
5611 }
5612 result= ha_index_next(table->record[0]);
5613 if (result)
5614 DBUG_RETURN(result);
5615
5616 if (compare_key(end_range) <= 0)
5617 {
5618 DBUG_RETURN(0);
5619 }
5620 else
5621 {
5622 /*
5623 The last read row does not fall in the range. So request
5624 storage engine to release row lock if possible.
5625 */
5626 unlock_row();
5627 DBUG_RETURN(HA_ERR_END_OF_FILE);
5628 }
5629}
5630
5631
5632void handler::set_end_range(const key_range *end_key)
5633{
5634 end_range= 0;
5635 if (end_key)
5636 {
5637 end_range= &save_end_range;
5638 save_end_range= *end_key;
5639 key_compare_result_on_equal=
5640 ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
5641 (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
5642 }
5643}
5644
5645
5646/**
5647 Compare if found key (in row) is over max-value.
5648
5649 @param range range to compare to row. May be 0 for no range
5650
5651 @see also
5652 key.cc::key_cmp()
5653
5654 @return
5655 The return value is SIGN(key_in_row - range_key):
5656
5657 - 0 : Key is equal to range or 'range' == 0 (no range)
5658 - -1 : Key is less than range
5659 - 1 : Key is larger than range
5660*/
5661int handler::compare_key(key_range *range)
5662{
5663 int cmp;
5664 if (!range || in_range_check_pushed_down)
5665 return 0; // No max range
5666 cmp= key_cmp(range_key_part, range->key, range->length);
5667 if (!cmp)
5668 cmp= key_compare_result_on_equal;
5669 return cmp;
5670}
5671
5672
5673/*
5674 Same as compare_key() but doesn't check have in_range_check_pushed_down.
5675 This is used by index condition pushdown implementation.
5676*/
5677
5678int handler::compare_key2(key_range *range) const
5679{
5680 int cmp;
5681 if (!range)
5682 return 0; // no max range
5683 cmp= key_cmp(range_key_part, range->key, range->length);
5684 if (!cmp)
5685 cmp= key_compare_result_on_equal;
5686 return cmp;
5687}
5688
5689
5690/**
5691 ICP callback - to be called by an engine to check the pushed condition
5692*/
5693extern "C" enum icp_result handler_index_cond_check(void* h_arg)
5694{
5695 handler *h= (handler*)h_arg;
5696 THD *thd= h->table->in_use;
5697 enum icp_result res;
5698
5699 enum thd_kill_levels abort_at= h->has_transactions() ?
5700 THD_ABORT_SOFTLY : THD_ABORT_ASAP;
5701 if (thd_kill_level(thd) > abort_at)
5702 return ICP_ABORTED_BY_USER;
5703
5704 if (h->end_range && h->compare_key2(h->end_range) > 0)
5705 return ICP_OUT_OF_RANGE;
5706 h->increment_statistics(&SSV::ha_icp_attempts);
5707 if ((res= h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH) ==
5708 ICP_MATCH)
5709 h->increment_statistics(&SSV::ha_icp_match);
5710 return res;
5711}
5712
5713int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
5714 key_part_map keypart_map,
5715 enum ha_rkey_function find_flag)
5716{
5717 int error, UNINIT_VAR(error1);
5718
5719 error= ha_index_init(index, 0);
5720 if (likely(!error))
5721 {
5722 error= index_read_map(buf, key, keypart_map, find_flag);
5723 error1= ha_index_end();
5724 }
5725 return error ? error : error1;
5726}
5727
5728
5729/**
5730 Returns a list of all known extensions.
5731
5732 No mutexes, worst case race is a minor surplus memory allocation
5733 We have to recreate the extension map if mysqld is restarted (for example
5734 within libmysqld)
5735
5736 @retval
5737 pointer pointer to TYPELIB structure
5738*/
5739static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
5740 void *arg)
5741{
5742 List<char> *found_exts= (List<char> *) arg;
5743 handlerton *hton= plugin_hton(plugin);
5744 List_iterator_fast<char> it(*found_exts);
5745 const char **ext, *old_ext;
5746
5747 for (ext= hton->tablefile_extensions; *ext; ext++)
5748 {
5749 while ((old_ext= it++))
5750 {
5751 if (!strcmp(old_ext, *ext))
5752 break;
5753 }
5754 if (!old_ext)
5755 found_exts->push_back((char *) *ext);
5756
5757 it.rewind();
5758 }
5759 return FALSE;
5760}
5761
5762TYPELIB *ha_known_exts(void)
5763{
5764 if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
5765 {
5766 List<char> found_exts;
5767 const char **ext, *old_ext;
5768
5769 known_extensions_id= mysys_usage_id;
5770 found_exts.push_back((char*) TRG_EXT);
5771 found_exts.push_back((char*) TRN_EXT);
5772
5773 plugin_foreach(NULL, exts_handlerton,
5774 MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
5775
5776 ext= (const char **) my_once_alloc(sizeof(char *)*
5777 (found_exts.elements+1),
5778 MYF(MY_WME | MY_FAE));
5779
5780 DBUG_ASSERT(ext != 0);
5781 known_extensions.count= found_exts.elements;
5782 known_extensions.type_names= ext;
5783
5784 List_iterator_fast<char> it(found_exts);
5785 while ((old_ext= it++))
5786 *ext++= old_ext;
5787 *ext= 0;
5788 }
5789 return &known_extensions;
5790}
5791
5792
5793static bool stat_print(THD *thd, const char *type, size_t type_len,
5794 const char *file, size_t file_len,
5795 const char *status, size_t status_len)
5796{
5797 Protocol *protocol= thd->protocol;
5798 protocol->prepare_for_resend();
5799 protocol->store(type, type_len, system_charset_info);
5800 protocol->store(file, file_len, system_charset_info);
5801 protocol->store(status, status_len, system_charset_info);
5802 if (protocol->write())
5803 return TRUE;
5804 return FALSE;
5805}
5806
5807
5808static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
5809 void *arg)
5810{
5811 enum ha_stat_type stat= *(enum ha_stat_type *) arg;
5812 handlerton *hton= plugin_hton(plugin);
5813 if (hton->state == SHOW_OPTION_YES && hton->show_status &&
5814 hton->show_status(hton, thd, stat_print, stat))
5815 return TRUE;
5816 return FALSE;
5817}
5818
5819bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
5820{
5821 List<Item> field_list;
5822 Protocol *protocol= thd->protocol;
5823 MEM_ROOT *mem_root= thd->mem_root;
5824 bool result;
5825
5826 field_list.push_back(new (mem_root) Item_empty_string(thd, "Type", 10),
5827 mem_root);
5828 field_list.push_back(new (mem_root)
5829 Item_empty_string(thd, "Name", FN_REFLEN), mem_root);
5830 field_list.push_back(new (mem_root)
5831 Item_empty_string(thd, "Status", 10),
5832 mem_root);
5833
5834 if (protocol->send_result_set_metadata(&field_list,
5835 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
5836 return TRUE;
5837
5838 if (db_type == NULL)
5839 {
5840 result= plugin_foreach(thd, showstat_handlerton,
5841 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
5842 }
5843 else
5844 {
5845 if (db_type->state != SHOW_OPTION_YES)
5846 {
5847 const LEX_CSTRING *name= hton_name(db_type);
5848 result= stat_print(thd, name->str, name->length,
5849 "", 0, "DISABLED", 8) ? 1 : 0;
5850 }
5851 else
5852 {
5853 result= db_type->show_status &&
5854 db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
5855 }
5856 }
5857
5858 /*
5859 We also check thd->is_error() as Innodb may return 0 even if
5860 there was an error.
5861 */
5862 if (likely(!result && !thd->is_error()))
5863 my_eof(thd);
5864 else if (!thd->is_error())
5865 my_error(ER_GET_ERRNO, MYF(0), errno, hton_name(db_type)->str);
5866 return result;
5867}
5868
5869/*
5870 Function to check if the conditions for row-based binlogging is
5871 correct for the table.
5872
5873 A row in the given table should be replicated if:
5874 - It's not called by partition engine
5875 - Row-based replication is enabled in the current thread
5876 - The binlog is enabled
5877 - It is not a temporary table
5878 - The binary log is open
5879 - The database the table resides in shall be binlogged (binlog_*_db rules)
5880 - table is not mysql.event
5881
5882 RETURN VALUE
5883 0 No binary logging in row format
5884 1 Row needs to be logged
5885*/
5886
5887bool handler::check_table_binlog_row_based(bool binlog_row)
5888{
5889 if (table->versioned(VERS_TRX_ID))
5890 return false;
5891 if (unlikely((table->in_use->variables.sql_log_bin_off)))
5892 return 0; /* Called by partitioning engine */
5893 if (unlikely((!check_table_binlog_row_based_done)))
5894 {
5895 check_table_binlog_row_based_done= 1;
5896 check_table_binlog_row_based_result=
5897 check_table_binlog_row_based_internal(binlog_row);
5898 }
5899 return check_table_binlog_row_based_result;
5900}
5901
5902bool handler::check_table_binlog_row_based_internal(bool binlog_row)
5903{
5904 THD *thd= table->in_use;
5905
5906 return (table->s->can_do_row_logging &&
5907 thd->is_current_stmt_binlog_format_row() &&
5908 /*
5909 Wsrep partially enables binary logging if it have not been
5910 explicitly turned on. As a result we return 'true' if we are in
5911 wsrep binlog emulation mode and the current thread is not a wsrep
5912 applier or replayer thread. This decision is not affected by
5913 @@sql_log_bin as we want the events to make into the binlog
5914 cache only to filter them later before they make into binary log
5915 file.
5916
5917 However, we do return 'false' if binary logging was temporarily
5918 turned off (see tmp_disable_binlog(A)).
5919
5920 Otherwise, return 'true' if binary logging is on.
5921 */
5922 IF_WSREP(((WSREP_EMULATE_BINLOG(thd) &&
5923 (thd->wsrep_exec_mode != REPL_RECV)) ||
5924 ((WSREP(thd) ||
5925 (thd->variables.option_bits & OPTION_BIN_LOG)) &&
5926 mysql_bin_log.is_open())),
5927 (thd->variables.option_bits & OPTION_BIN_LOG) &&
5928 mysql_bin_log.is_open()));
5929}
5930
5931
5932/** @brief
5933 Write table maps for all (manually or automatically) locked tables
5934 to the binary log. Also, if binlog_annotate_row_events is ON,
5935 write Annotate_rows event before the first table map.
5936
5937 SYNOPSIS
5938 write_locked_table_maps()
5939 thd Pointer to THD structure
5940
5941 DESCRIPTION
5942 This function will generate and write table maps for all tables
5943 that are locked by the thread 'thd'.
5944
5945 RETURN VALUE
5946 0 All OK
5947 1 Failed to write all table maps
5948
5949 SEE ALSO
5950 THD::lock
5951*/
5952
5953static int write_locked_table_maps(THD *thd)
5954{
5955 DBUG_ENTER("write_locked_table_maps");
5956 DBUG_PRINT("enter", ("thd:%p thd->lock:%p "
5957 "thd->extra_lock: %p",
5958 thd, thd->lock, thd->extra_lock));
5959
5960 DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
5961
5962 MYSQL_LOCK *locks[2];
5963 locks[0]= thd->extra_lock;
5964 locks[1]= thd->lock;
5965 my_bool with_annotate= thd->variables.binlog_annotate_row_events &&
5966 thd->query() && thd->query_length();
5967
5968 for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
5969 {
5970 MYSQL_LOCK const *const lock= locks[i];
5971 if (lock == NULL)
5972 continue;
5973
5974 TABLE **const end_ptr= lock->table + lock->table_count;
5975 for (TABLE **table_ptr= lock->table ;
5976 table_ptr != end_ptr ;
5977 ++table_ptr)
5978 {
5979 TABLE *const table= *table_ptr;
5980 DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
5981 if (table->current_lock == F_WRLCK &&
5982 table->file->check_table_binlog_row_based(0))
5983 {
5984 /*
5985 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
5986 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
5987 compatible behavior with the STMT based replication even when
5988 the table is not transactional. In other words, if the operation
5989 fails while executing the insert phase nothing is written to the
5990 binlog.
5991
5992 Note that at this point, we check the type of a set of tables to
5993 create the table map events. In the function binlog_log_row(),
5994 which calls the current function, we check the type of the table
5995 of the current row.
5996 */
5997 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
5998 table->file->has_transactions();
5999 int const error= thd->binlog_write_table_map(table, has_trans,
6000 &with_annotate);
6001 /*
6002 If an error occurs, it is the responsibility of the caller to
6003 roll back the transaction.
6004 */
6005 if (unlikely(error))
6006 DBUG_RETURN(1);
6007 }
6008 }
6009 }
6010 DBUG_RETURN(0);
6011}
6012
6013
6014static int binlog_log_row_internal(TABLE* table,
6015 const uchar *before_record,
6016 const uchar *after_record,
6017 Log_func *log_func)
6018{
6019 bool error= 0;
6020 THD *const thd= table->in_use;
6021
6022 /*
6023 If there are no table maps written to the binary log, this is
6024 the first row handled in this statement. In that case, we need
6025 to write table maps for all locked tables to the binary log.
6026 */
6027 if (likely(!(error= ((thd->get_binlog_table_maps() == 0 &&
6028 write_locked_table_maps(thd))))))
6029 {
6030 /*
6031 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
6032 (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
6033 compatible behavior with the STMT based replication even when
6034 the table is not transactional. In other words, if the operation
6035 fails while executing the insert phase nothing is written to the
6036 binlog.
6037 */
6038 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
6039 table->file->has_transactions();
6040 error= (*log_func)(thd, table, has_trans, before_record, after_record);
6041 }
6042 return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
6043}
6044
6045int binlog_log_row(TABLE* table, const uchar *before_record,
6046 const uchar *after_record, Log_func *log_func)
6047{
6048#ifdef WITH_WSREP
6049 THD *const thd= table->in_use;
6050
6051 /* only InnoDB tables will be replicated through binlog emulation */
6052 if ((WSREP_EMULATE_BINLOG(thd) &&
6053 table->file->partition_ht()->db_type != DB_TYPE_INNODB) ||
6054 (thd->wsrep_ignore_table == true))
6055 return 0;
6056
6057 /* enforce wsrep_max_ws_rows */
6058 if (WSREP(thd) && table->s->tmp_table == NO_TMP_TABLE)
6059 {
6060 thd->wsrep_affected_rows++;
6061 if (wsrep_max_ws_rows &&
6062 thd->wsrep_exec_mode != REPL_RECV &&
6063 thd->wsrep_affected_rows > wsrep_max_ws_rows)
6064 {
6065 trans_rollback_stmt(thd) || trans_rollback(thd);
6066 my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
6067 return ER_ERROR_DURING_COMMIT;
6068 }
6069 }
6070#endif
6071
6072 if (!table->file->check_table_binlog_row_based(1))
6073 return 0;
6074 return binlog_log_row_internal(table, before_record, after_record, log_func);
6075}
6076
6077
6078int handler::ha_external_lock(THD *thd, int lock_type)
6079{
6080 int error;
6081 DBUG_ENTER("handler::ha_external_lock");
6082 /*
6083 Whether this is lock or unlock, this should be true, and is to verify that
6084 if get_auto_increment() was called (thus may have reserved intervals or
6085 taken a table lock), ha_release_auto_increment() was too.
6086 */
6087 DBUG_ASSERT(next_insert_id == 0);
6088 /* Consecutive calls for lock without unlocking in between is not allowed */
6089 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6090 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
6091 lock_type == F_UNLCK));
6092 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
6093 DBUG_ASSERT(inited == NONE || table->open_by_handler);
6094
6095 if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
6096 MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
6097 MYSQL_HANDLER_UNLOCK_START_ENABLED())
6098 {
6099 if (lock_type == F_RDLCK)
6100 {
6101 MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
6102 table_share->table_name.str);
6103 }
6104 else if (lock_type == F_WRLCK)
6105 {
6106 MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
6107 table_share->table_name.str);
6108 }
6109 else if (lock_type == F_UNLCK)
6110 {
6111 MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
6112 table_share->table_name.str);
6113 }
6114 }
6115
6116 /*
6117 We cache the table flags if the locking succeeded. Otherwise, we
6118 keep them as they were when they were fetched in ha_open().
6119 */
6120 MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
6121 { error= external_lock(thd, lock_type); })
6122
6123 DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;);
6124
6125 if (likely(error == 0 || lock_type == F_UNLCK))
6126 {
6127 m_lock_type= lock_type;
6128 cached_table_flags= table_flags();
6129 if (table_share->tmp_table == NO_TMP_TABLE)
6130 mysql_audit_external_lock(thd, table_share, lock_type);
6131 }
6132
6133 if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
6134 MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
6135 MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
6136 {
6137 if (lock_type == F_RDLCK)
6138 {
6139 MYSQL_HANDLER_RDLOCK_DONE(error);
6140 }
6141 else if (lock_type == F_WRLCK)
6142 {
6143 MYSQL_HANDLER_WRLOCK_DONE(error);
6144 }
6145 else if (lock_type == F_UNLCK)
6146 {
6147 MYSQL_HANDLER_UNLOCK_DONE(error);
6148 }
6149 }
6150 DBUG_RETURN(error);
6151}
6152
6153
6154/** @brief
6155 Check handler usage and reset state of file to after 'open'
6156*/
6157int handler::ha_reset()
6158{
6159 DBUG_ENTER("ha_reset");
6160 /* Check that we have called all proper deallocation functions */
6161 DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
6162 table->s->column_bitmap_size ==
6163 (uchar*) table->def_write_set.bitmap);
6164 DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
6165 DBUG_ASSERT(!table->file->keyread_enabled());
6166 /* ensure that ha_index_end / ha_rnd_end has been called */
6167 DBUG_ASSERT(inited == NONE);
6168 /* reset the bitmaps to point to defaults */
6169 table->default_column_bitmaps();
6170 pushed_cond= NULL;
6171 tracker= NULL;
6172 mark_trx_read_write_done= check_table_binlog_row_based_done=
6173 check_table_binlog_row_based_result= 0;
6174 /* Reset information about pushed engine conditions */
6175 cancel_pushed_idx_cond();
6176 /* Reset information about pushed index conditions */
6177 clear_top_table_fields();
6178 DBUG_RETURN(reset());
6179}
6180
6181
6182int handler::ha_write_row(uchar *buf)
6183{
6184 int error;
6185 Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
6186 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6187 m_lock_type == F_WRLCK);
6188 DBUG_ENTER("handler::ha_write_row");
6189 DEBUG_SYNC_C("ha_write_row_start");
6190
6191 MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
6192 mark_trx_read_write();
6193 increment_statistics(&SSV::ha_write_count);
6194
6195 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
6196 { error= write_row(buf); })
6197
6198 MYSQL_INSERT_ROW_DONE(error);
6199 if (likely(!error) && !row_already_logged)
6200 {
6201 rows_changed++;
6202 error= binlog_log_row(table, 0, buf, log_func);
6203 }
6204 DEBUG_SYNC_C("ha_write_row_end");
6205 DBUG_RETURN(error);
6206}
6207
6208
6209int handler::ha_update_row(const uchar *old_data, const uchar *new_data)
6210{
6211 int error;
6212 Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
6213 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6214 m_lock_type == F_WRLCK);
6215
6216 /*
6217 Some storage engines require that the new record is in record[0]
6218 (and the old record is in record[1]).
6219 */
6220 DBUG_ASSERT(new_data == table->record[0]);
6221 DBUG_ASSERT(old_data == table->record[1]);
6222
6223 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6224 mark_trx_read_write();
6225 increment_statistics(&SSV::ha_update_count);
6226
6227 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
6228 { error= update_row(old_data, new_data);})
6229
6230 MYSQL_UPDATE_ROW_DONE(error);
6231 if (likely(!error) && !row_already_logged)
6232 {
6233 rows_changed++;
6234 error= binlog_log_row(table, old_data, new_data, log_func);
6235 }
6236 return error;
6237}
6238
6239/*
6240 Update first row. Only used by sequence tables
6241*/
6242
6243int handler::update_first_row(uchar *new_data)
6244{
6245 int error;
6246 if (likely(!(error= ha_rnd_init(1))))
6247 {
6248 int end_error;
6249 if (likely(!(error= ha_rnd_next(table->record[1]))))
6250 {
6251 /*
6252 We have to do the memcmp as otherwise we may get error 169 from InnoDB
6253 */
6254 if (memcmp(new_data, table->record[1], table->s->reclength))
6255 error= update_row(table->record[1], new_data);
6256 }
6257 end_error= ha_rnd_end();
6258 if (likely(!error))
6259 error= end_error;
6260 /* Logging would be wrong if update_row works but ha_rnd_end fails */
6261 DBUG_ASSERT(!end_error || error != 0);
6262 }
6263 return error;
6264}
6265
6266
6267int handler::ha_delete_row(const uchar *buf)
6268{
6269 int error;
6270 Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
6271 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6272 m_lock_type == F_WRLCK);
6273 /*
6274 Normally table->record[0] is used, but sometimes table->record[1] is used.
6275 */
6276 DBUG_ASSERT(buf == table->record[0] ||
6277 buf == table->record[1]);
6278
6279 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6280 mark_trx_read_write();
6281 increment_statistics(&SSV::ha_delete_count);
6282
6283 TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_DELETE_ROW, active_index, 0,
6284 { error= delete_row(buf);})
6285 MYSQL_DELETE_ROW_DONE(error);
6286 if (likely(!error))
6287 {
6288 rows_changed++;
6289 error= binlog_log_row(table, buf, 0, log_func);
6290 }
6291 return error;
6292}
6293
6294
6295/**
6296 Execute a direct update request. A direct update request updates all
6297 qualified rows in a single operation, rather than one row at a time.
6298 In a Spider cluster the direct update operation is pushed down to the
6299 child levels of the cluster.
6300
6301 Note that this can't be used in case of statment logging
6302
6303 @param update_rows Number of updated rows.
6304
6305 @retval 0 Success.
6306 @retval != 0 Failure.
6307*/
6308
6309int handler::ha_direct_update_rows(ha_rows *update_rows)
6310{
6311 int error;
6312
6313 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6314 mark_trx_read_write();
6315
6316 error = direct_update_rows(update_rows);
6317 MYSQL_UPDATE_ROW_DONE(error);
6318 return error;
6319}
6320
6321
6322/**
6323 Execute a direct delete request. A direct delete request deletes all
6324 qualified rows in a single operation, rather than one row at a time.
6325 In a Spider cluster the direct delete operation is pushed down to the
6326 child levels of the cluster.
6327
6328 @param delete_rows Number of deleted rows.
6329
6330 @retval 0 Success.
6331 @retval != 0 Failure.
6332*/
6333
6334int handler::ha_direct_delete_rows(ha_rows *delete_rows)
6335{
6336 int error;
6337 /* Ensure we are not using binlog row */
6338 DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row());
6339
6340 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6341 mark_trx_read_write();
6342
6343 error = direct_delete_rows(delete_rows);
6344 MYSQL_DELETE_ROW_DONE(error);
6345 return error;
6346}
6347
6348
6349/** @brief
6350 use_hidden_primary_key() is called in case of an update/delete when
6351 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
6352 but we don't have a primary key
6353*/
6354void handler::use_hidden_primary_key()
6355{
6356 /* fallback to use all columns in the table to identify row */
6357 table->column_bitmaps_set(&table->s->all_set, table->write_set);
6358}
6359
6360
6361/**
6362 Get an initialized ha_share.
6363
6364 @return Initialized ha_share
6365 @retval NULL ha_share is not yet initialized.
6366 @retval != NULL previous initialized ha_share.
6367
6368 @note
6369 If not a temp table, then LOCK_ha_data must be held.
6370*/
6371
6372Handler_share *handler::get_ha_share_ptr()
6373{
6374 DBUG_ENTER("handler::get_ha_share_ptr");
6375 DBUG_ASSERT(ha_share);
6376 DBUG_ASSERT(table_share);
6377
6378#ifndef DBUG_OFF
6379 if (table_share->tmp_table == NO_TMP_TABLE)
6380 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
6381#endif
6382
6383 DBUG_RETURN(*ha_share);
6384}
6385
6386
6387/**
6388 Set ha_share to be used by all instances of the same table/partition.
6389
6390 @param ha_share Handler_share to be shared.
6391
6392 @note
6393 If not a temp table, then LOCK_ha_data must be held.
6394*/
6395
6396void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
6397{
6398 DBUG_ENTER("handler::set_ha_share_ptr");
6399 DBUG_ASSERT(ha_share);
6400#ifndef DBUG_OFF
6401 if (table_share->tmp_table == NO_TMP_TABLE)
6402 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
6403#endif
6404
6405 *ha_share= arg_ha_share;
6406 DBUG_VOID_RETURN;
6407}
6408
6409
6410/**
6411 Take a lock for protecting shared handler data.
6412*/
6413
6414void handler::lock_shared_ha_data()
6415{
6416 DBUG_ASSERT(table_share);
6417 if (table_share->tmp_table == NO_TMP_TABLE)
6418 mysql_mutex_lock(&table_share->LOCK_ha_data);
6419}
6420
6421
6422/**
6423 Release lock for protecting ha_share.
6424*/
6425
6426void handler::unlock_shared_ha_data()
6427{
6428 DBUG_ASSERT(table_share);
6429 if (table_share->tmp_table == NO_TMP_TABLE)
6430 mysql_mutex_unlock(&table_share->LOCK_ha_data);
6431}
6432
6433/** @brief
6434 Dummy function which accept information about log files which is not need
6435 by handlers
6436*/
6437void signal_log_not_needed(struct handlerton, char *log_file)
6438{
6439 DBUG_ENTER("signal_log_not_needed");
6440 DBUG_PRINT("enter", ("logfile '%s'", log_file));
6441 DBUG_VOID_RETURN;
6442}
6443
6444void handler::set_lock_type(enum thr_lock_type lock)
6445{
6446 table->reginfo.lock_type= lock;
6447}
6448
6449#ifdef WITH_WSREP
6450/**
6451 @details
6452 This function makes the storage engine to force the victim transaction
6453 to abort. Currently, only innodb has this functionality, but any SE
6454 implementing the wsrep API should provide this service to support
6455 multi-master operation.
6456
6457 @note Aborting the transaction does NOT end it, it still has to
6458 be rolled back with hton->rollback().
6459
6460 @note It is safe to abort from one thread (bf_thd) the transaction,
6461 running in another thread (victim_thd), because InnoDB's lock_sys and
6462 trx_mutex guarantee the necessary protection. However, its not safe
6463 to access victim_thd->transaction, because it's not protected from
6464 concurrent accesses. And it's an overkill to take LOCK_plugin and
6465 iterate the whole installed_htons[] array every time.
6466
6467 @param bf_thd brute force THD asking for the abort
6468 @param victim_thd victim THD to be aborted
6469
6470 @return
6471 always 0
6472*/
6473
6474int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
6475{
6476 DBUG_ENTER("ha_abort_transaction");
6477 if (!WSREP(bf_thd) &&
6478 !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
6479 bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
6480 DBUG_RETURN(0);
6481 }
6482
6483 handlerton *hton= installed_htons[DB_TYPE_INNODB];
6484 if (hton && hton->abort_transaction)
6485 {
6486 hton->abort_transaction(hton, bf_thd, victim_thd, signal);
6487 }
6488 else
6489 {
6490 WSREP_WARN("Cannot abort InnoDB transaction");
6491 }
6492
6493 DBUG_RETURN(0);
6494}
6495
6496void ha_fake_trx_id(THD *thd)
6497{
6498 DBUG_ENTER("ha_fake_trx_id");
6499
6500 bool no_fake_trx_id= true;
6501
6502 if (!WSREP(thd))
6503 {
6504 DBUG_VOID_RETURN;
6505 }
6506
6507 /* Try statement transaction if standard one is not set. */
6508 THD_TRANS *trans= (thd->transaction.all.ha_list) ? &thd->transaction.all :
6509 &thd->transaction.stmt;
6510
6511 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
6512
6513 for (; ha_info; ha_info= ha_info_next)
6514 {
6515 handlerton *hton= ha_info->ht();
6516 if (hton->fake_trx_id)
6517 {
6518 hton->fake_trx_id(hton, thd);
6519
6520 /* Got a fake trx id. */
6521 no_fake_trx_id= false;
6522
6523 /*
6524 We need transaction ID from just one storage engine providing
6525 fake_trx_id (which will most likely be the case).
6526 */
6527 break;
6528 }
6529 ha_info_next= ha_info->next();
6530 }
6531
6532 if (unlikely(no_fake_trx_id))
6533 WSREP_WARN("Cannot get fake transaction ID from storage engine.");
6534
6535 DBUG_VOID_RETURN;
6536}
6537#endif /* WITH_WSREP */
6538
6539
6540#ifdef TRANS_LOG_MGM_EXAMPLE_CODE
6541/*
6542 Example of transaction log management functions based on assumption that logs
6543 placed into a directory
6544*/
6545#include <my_dir.h>
6546#include <my_sys.h>
6547int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
6548{
6549 void *buffer;
6550 int res= 1;
6551 struct handler_iterator iterator;
6552 struct handler_log_file_data data;
6553
6554 if (!hton->create_iterator)
6555 return 1; /* iterator creator is not supported */
6556
6557 if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
6558 HA_ITERATOR_OK)
6559 {
6560 /* error during creation of log iterator or iterator is not supported */
6561 return 1;
6562 }
6563 while((*iterator.next)(&iterator, (void*)&data) == 0)
6564 {
6565 printf("%s\n", data.filename.str);
6566 if (data.status == HA_LOG_STATUS_FREE &&
6567 mysql_file_delete(INSTRUMENT_ME,
6568 data.filename.str, MYF(MY_WME)))
6569 goto err;
6570 }
6571 res= 0;
6572err:
6573 (*iterator.destroy)(&iterator);
6574 return res;
6575}
6576
6577
6578/*
6579 Here we should get info from handler where it save logs but here is
6580 just example, so we use constant.
6581 IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
6582 rights on it except root and it consist of directories only at lest for
6583 *nix (sorry, can't find windows-safe solution here, but it is only example).
6584*/
6585#define fl_dir FN_ROOTDIR
6586
6587
6588/** @brief
6589 Dummy function to return log status should be replaced by function which
6590 really detect the log status and check that the file is a log of this
6591 handler.
6592*/
6593enum log_status fl_get_log_status(char *log)
6594{
6595 MY_STAT stat_buff;
6596 if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
6597 return HA_LOG_STATUS_INUSE;
6598 return HA_LOG_STATUS_NOSUCHLOG;
6599}
6600
6601
6602struct fl_buff
6603{
6604 LEX_STRING *names;
6605 enum log_status *statuses;
6606 uint32 entries;
6607 uint32 current;
6608};
6609
6610
6611int fl_log_iterator_next(struct handler_iterator *iterator,
6612 void *iterator_object)
6613{
6614 struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
6615 struct handler_log_file_data *data=
6616 (struct handler_log_file_data *) iterator_object;
6617 if (buff->current >= buff->entries)
6618 return 1;
6619 data->filename= buff->names[buff->current];
6620 data->status= buff->statuses[buff->current];
6621 buff->current++;
6622 return 0;
6623}
6624
6625
6626void fl_log_iterator_destroy(struct handler_iterator *iterator)
6627{
6628 my_free(iterator->buffer);
6629}
6630
6631
6632/** @brief
6633 returns buffer, to be assigned in handler_iterator struct
6634*/
6635enum handler_create_iterator_result
6636fl_log_iterator_buffer_init(struct handler_iterator *iterator)
6637{
6638 MY_DIR *dirp;
6639 struct fl_buff *buff;
6640 char *name_ptr;
6641 uchar *ptr;
6642 FILEINFO *file;
6643 uint32 i;
6644
6645 /* to be able to make my_free without crash in case of error */
6646 iterator->buffer= 0;
6647
6648 if (!(dirp = my_dir(fl_dir, MYF(MY_THREAD_SPECIFIC))))
6649 {
6650 return HA_ITERATOR_ERROR;
6651 }
6652 if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
6653 ((ALIGN_SIZE(sizeof(LEX_STRING)) +
6654 sizeof(enum log_status) +
6655 + FN_REFLEN + 1) *
6656 (uint) dirp->number_off_files),
6657 MYF(MY_THREAD_SPECIFIC))) == 0)
6658 {
6659 return HA_ITERATOR_ERROR;
6660 }
6661 buff= (struct fl_buff *)ptr;
6662 buff->entries= buff->current= 0;
6663 ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
6664 buff->names= (LEX_STRING*) (ptr);
6665 ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
6666 (uint) dirp->number_off_files));
6667 buff->statuses= (enum log_status *)(ptr);
6668 name_ptr= (char *)(ptr + (sizeof(enum log_status) *
6669 (uint) dirp->number_off_files));
6670 for (i=0 ; i < (uint) dirp->number_off_files ; i++)
6671 {
6672 enum log_status st;
6673 file= dirp->dir_entry + i;
6674 if ((file->name[0] == '.' &&
6675 ((file->name[1] == '.' && file->name[2] == '\0') ||
6676 file->name[1] == '\0')))
6677 continue;
6678 if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
6679 continue;
6680 name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
6681 FN_REFLEN, fl_dir, file->name, NullS);
6682 buff->names[buff->entries].length= (name_ptr -
6683 buff->names[buff->entries].str);
6684 buff->statuses[buff->entries]= st;
6685 buff->entries++;
6686 }
6687
6688 iterator->buffer= buff;
6689 iterator->next= &fl_log_iterator_next;
6690 iterator->destroy= &fl_log_iterator_destroy;
6691 my_dirend(dirp);
6692 return HA_ITERATOR_OK;
6693}
6694
6695
6696/* An example of a iterator creator */
6697enum handler_create_iterator_result
6698fl_create_iterator(enum handler_iterator_type type,
6699 struct handler_iterator *iterator)
6700{
6701 switch(type) {
6702 case HA_TRANSACTLOG_ITERATOR:
6703 return fl_log_iterator_buffer_init(iterator);
6704 default:
6705 return HA_ITERATOR_UNSUPPORTED;
6706 }
6707}
6708#endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
6709
6710
6711bool HA_CREATE_INFO::check_conflicting_charset_declarations(CHARSET_INFO *cs)
6712{
6713 if ((used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
6714 /* DEFAULT vs explicit, or explicit vs DEFAULT */
6715 (((default_table_charset == NULL) != (cs == NULL)) ||
6716 /* Two different explicit character sets */
6717 (default_table_charset && cs &&
6718 !my_charset_same(default_table_charset, cs))))
6719 {
6720 my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
6721 "CHARACTER SET ", default_table_charset ?
6722 default_table_charset->csname : "DEFAULT",
6723 "CHARACTER SET ", cs ? cs->csname : "DEFAULT");
6724 return true;
6725 }
6726 return false;
6727}
6728
6729/* Remove all indexes for a given table from global index statistics */
6730
6731static
6732int del_global_index_stats_for_table(THD *thd, uchar* cache_key, size_t cache_key_length)
6733{
6734 int res = 0;
6735 DBUG_ENTER("del_global_index_stats_for_table");
6736
6737 mysql_mutex_lock(&LOCK_global_index_stats);
6738
6739 for (uint i= 0; i < global_index_stats.records;)
6740 {
6741 INDEX_STATS *index_stats =
6742 (INDEX_STATS*) my_hash_element(&global_index_stats, i);
6743
6744 /* We search correct db\0table_name\0 string */
6745 if (index_stats &&
6746 index_stats->index_name_length >= cache_key_length &&
6747 !memcmp(index_stats->index, cache_key, cache_key_length))
6748 {
6749 res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
6750 /*
6751 In our HASH implementation on deletion one elements
6752 is moved into a place where a deleted element was,
6753 and the last element is moved into the empty space.
6754 Thus we need to re-examine the current element, but
6755 we don't have to restart the search from the beginning.
6756 */
6757 }
6758 else
6759 i++;
6760 }
6761
6762 mysql_mutex_unlock(&LOCK_global_index_stats);
6763 DBUG_RETURN(res);
6764}
6765
6766/* Remove a table from global table statistics */
6767
6768int del_global_table_stat(THD *thd, LEX_CSTRING *db, LEX_CSTRING *table)
6769{
6770 TABLE_STATS *table_stats;
6771 int res = 0;
6772 uchar *cache_key;
6773 size_t cache_key_length;
6774 DBUG_ENTER("del_global_table_stat");
6775
6776 cache_key_length= db->length + 1 + table->length + 1;
6777
6778 if(!(cache_key= (uchar *)my_malloc(cache_key_length,
6779 MYF(MY_WME | MY_ZEROFILL))))
6780 {
6781 /* Out of memory error already given */
6782 res = 1;
6783 goto end;
6784 }
6785
6786 memcpy(cache_key, db->str, db->length);
6787 memcpy(cache_key + db->length + 1, table->str, table->length);
6788
6789 res= del_global_index_stats_for_table(thd, cache_key, cache_key_length);
6790
6791 mysql_mutex_lock(&LOCK_global_table_stats);
6792
6793 if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats,
6794 cache_key,
6795 cache_key_length)))
6796 res= my_hash_delete(&global_table_stats, (uchar*)table_stats);
6797
6798 my_free(cache_key);
6799 mysql_mutex_unlock(&LOCK_global_table_stats);
6800
6801end:
6802 DBUG_RETURN(res);
6803}
6804
6805/* Remove a index from global index statistics */
6806
6807int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info)
6808{
6809 INDEX_STATS *index_stats;
6810 size_t key_length= table->s->table_cache_key.length + key_info->name.length + 1;
6811 int res = 0;
6812 DBUG_ENTER("del_global_index_stat");
6813 mysql_mutex_lock(&LOCK_global_index_stats);
6814
6815 if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
6816 key_info->cache_name,
6817 key_length)))
6818 res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
6819
6820 mysql_mutex_unlock(&LOCK_global_index_stats);
6821 DBUG_RETURN(res);
6822}
6823
6824bool Vers_parse_info::is_start(const char *name) const
6825{
6826 DBUG_ASSERT(name);
6827 return as_row.start && as_row.start.streq(name);
6828}
6829bool Vers_parse_info::is_end(const char *name) const
6830{
6831 DBUG_ASSERT(name);
6832 return as_row.end && as_row.end.streq(name);
6833}
6834bool Vers_parse_info::is_start(const Create_field &f) const
6835{
6836 return f.flags & VERS_SYS_START_FLAG;
6837}
6838bool Vers_parse_info::is_end(const Create_field &f) const
6839{
6840 return f.flags & VERS_SYS_END_FLAG;
6841}
6842
6843static Create_field *vers_init_sys_field(THD *thd, const char *field_name, int flags, bool integer)
6844{
6845 Create_field *f= new (thd->mem_root) Create_field();
6846 if (!f)
6847 return NULL;
6848
6849 memset(f, 0, sizeof(*f));
6850 f->field_name.str= field_name;
6851 f->field_name.length= strlen(field_name);
6852 f->charset= system_charset_info;
6853 f->flags= flags | NOT_NULL_FLAG;
6854 if (integer)
6855 {
6856 DBUG_ASSERT(0); // Not implemented yet
6857 f->set_handler(&type_handler_vers_trx_id);
6858 f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1;
6859 f->flags|= UNSIGNED_FLAG;
6860 }
6861 else
6862 {
6863 f->set_handler(&type_handler_timestamp2);
6864 f->length= MAX_DATETIME_PRECISION;
6865 }
6866 f->invisible= DBUG_EVALUATE_IF("sysvers_show", VISIBLE, INVISIBLE_SYSTEM);
6867
6868 if (f->check(thd))
6869 return NULL;
6870
6871 return f;
6872}
6873
6874static bool vers_create_sys_field(THD *thd, const char *field_name,
6875 Alter_info *alter_info, int flags)
6876{
6877 Create_field *f= vers_init_sys_field(thd, field_name, flags, false);
6878 if (!f)
6879 return true;
6880
6881 alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
6882 alter_info->create_list.push_back(f);
6883
6884 return false;
6885}
6886
6887const Lex_ident Vers_parse_info::default_start= "row_start";
6888const Lex_ident Vers_parse_info::default_end= "row_end";
6889
6890bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info)
6891{
6892 // If user specified some of these he must specify the others too. Do nothing.
6893 if (*this)
6894 return false;
6895
6896 alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
6897
6898 system_time= start_end_t(default_start, default_end);
6899 as_row= system_time;
6900
6901 if (vers_create_sys_field(thd, default_start, alter_info, VERS_SYS_START_FLAG) ||
6902 vers_create_sys_field(thd, default_end, alter_info, VERS_SYS_END_FLAG))
6903 {
6904 return true;
6905 }
6906 return false;
6907}
6908
6909bool Table_scope_and_contents_source_st::vers_native(THD *thd) const
6910{
6911 if (ha_check_storage_engine_flag(db_type, HTON_NATIVE_SYS_VERSIONING))
6912 return true;
6913
6914#ifdef WITH_PARTITION_STORAGE_ENGINE
6915 partition_info *info= thd->work_part_info;
6916 if (info && !(used_fields & HA_CREATE_USED_ENGINE))
6917 {
6918 if (handlerton *hton= info->default_engine_type)
6919 return ha_check_storage_engine_flag(hton, HTON_NATIVE_SYS_VERSIONING);
6920
6921 List_iterator_fast<partition_element> it(info->partitions);
6922 while (partition_element *partition_element= it++)
6923 {
6924 if (partition_element->find_engine_flag(HTON_NATIVE_SYS_VERSIONING))
6925 return true;
6926 }
6927 }
6928#endif
6929 return false;
6930}
6931
6932bool Table_scope_and_contents_source_st::vers_fix_system_fields(
6933 THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table,
6934 bool create_select)
6935{
6936 DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING));
6937
6938 DBUG_EXECUTE_IF("sysvers_force", if (!tmp_table()) {
6939 alter_info->flags|= ALTER_ADD_SYSTEM_VERSIONING;
6940 options|= HA_VERSIONED_TABLE; });
6941
6942 if (!vers_info.need_check(alter_info))
6943 return false;
6944
6945 if (!vers_info.versioned_fields && vers_info.unversioned_fields &&
6946 !(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING))
6947 {
6948 // All is correct but this table is not versioned.
6949 options&= ~HA_VERSIONED_TABLE;
6950 return false;
6951 }
6952
6953 if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && vers_info)
6954 {
6955 my_error(ER_MISSING, MYF(0), create_table.table_name.str,
6956 "WITH SYSTEM VERSIONING");
6957 return true;
6958 }
6959
6960 List_iterator<Create_field> it(alter_info->create_list);
6961 while (Create_field *f= it++)
6962 {
6963 if ((f->versioning == Column_definition::VERSIONING_NOT_SET &&
6964 !(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)) ||
6965 f->versioning == Column_definition::WITHOUT_VERSIONING)
6966 {
6967 f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
6968 }
6969 } // while (Create_field *f= it++)
6970
6971 if (vers_info.fix_implicit(thd, alter_info))
6972 return true;
6973
6974 int plain_cols= 0; // columns don't have WITH or WITHOUT SYSTEM VERSIONING
6975 int vers_cols= 0; // columns have WITH SYSTEM VERSIONING
6976 it.rewind();
6977 while (const Create_field *f= it++)
6978 {
6979 if (vers_info.is_start(*f) || vers_info.is_end(*f))
6980 continue;
6981
6982 if (f->versioning == Column_definition::VERSIONING_NOT_SET)
6983 plain_cols++;
6984 else if (f->versioning == Column_definition::WITH_VERSIONING)
6985 vers_cols++;
6986 }
6987
6988 if (!thd->lex->tmp_table() &&
6989 // CREATE from SELECT (Create_fields are not yet added)
6990 !create_select && vers_cols == 0 && (plain_cols == 0 || !vers_info))
6991 {
6992 my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(0),
6993 create_table.table_name.str);
6994 return true;
6995 }
6996
6997 return false;
6998}
6999
7000
7001bool Table_scope_and_contents_source_st::vers_check_system_fields(
7002 THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table)
7003{
7004 if (!(options & HA_VERSIONED_TABLE))
7005 return false;
7006 return vers_info.check_sys_fields(create_table.table_name, create_table.db,
7007 alter_info, vers_native(thd));
7008}
7009
7010
7011bool Vers_parse_info::fix_alter_info(THD *thd, Alter_info *alter_info,
7012 HA_CREATE_INFO *create_info, TABLE *table)
7013{
7014 TABLE_SHARE *share= table->s;
7015 const char *table_name= share->table_name.str;
7016
7017 if (!need_check(alter_info) && !share->versioned)
7018 return false;
7019
7020 if (DBUG_EVALUATE_IF("sysvers_force", 0, share->tmp_table))
7021 {
7022 my_error(ER_VERS_TEMPORARY, MYF(0));
7023 return true;
7024 }
7025
7026 if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING &&
7027 table->versioned())
7028 {
7029 my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
7030 return true;
7031 }
7032
7033 if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)
7034 {
7035 if (!share->versioned)
7036 {
7037 my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
7038 return true;
7039 }
7040#ifdef WITH_PARTITION_STORAGE_ENGINE
7041 if (table->part_info &&
7042 table->part_info->part_type == VERSIONING_PARTITION)
7043 {
7044 my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(0), table_name);
7045 return true;
7046 }
7047#endif
7048
7049 return false;
7050 }
7051
7052 {
7053 List_iterator_fast<Create_field> it(alter_info->create_list);
7054 while (Create_field *f= it++)
7055 {
7056 if (f->change.length && f->flags & VERS_SYSTEM_FIELD)
7057 {
7058 my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->field_name.str);
7059 return true;
7060 }
7061 }
7062 }
7063
7064 if ((alter_info->flags & ALTER_DROP_PERIOD ||
7065 versioned_fields || unversioned_fields) && !share->versioned)
7066 {
7067 my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
7068 return true;
7069 }
7070
7071 if (share->versioned)
7072 {
7073 if (alter_info->flags & ALTER_ADD_PERIOD)
7074 {
7075 my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
7076 return true;
7077 }
7078
7079 // copy info from existing table
7080 create_info->options|= HA_VERSIONED_TABLE;
7081
7082 DBUG_ASSERT(share->vers_start_field());
7083 DBUG_ASSERT(share->vers_end_field());
7084 Lex_ident start(share->vers_start_field()->field_name);
7085 Lex_ident end(share->vers_end_field()->field_name);
7086 DBUG_ASSERT(start.str);
7087 DBUG_ASSERT(end.str);
7088
7089 as_row= start_end_t(start, end);
7090 system_time= as_row;
7091
7092 if (alter_info->create_list.elements)
7093 {
7094 List_iterator_fast<Create_field> it(alter_info->create_list);
7095 while (Create_field *f= it++)
7096 {
7097 if (f->versioning == Column_definition::WITHOUT_VERSIONING)
7098 f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
7099
7100 if (f->change.str && (start.streq(f->change) || end.streq(f->change)))
7101 {
7102 my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str);
7103 return true;
7104 }
7105 }
7106 }
7107
7108 return false;
7109 }
7110
7111 if (fix_implicit(thd, alter_info))
7112 return true;
7113
7114 if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)
7115 {
7116 bool native= create_info->vers_native(thd);
7117 if (check_sys_fields(table_name, share->db, alter_info, native))
7118 return true;
7119 }
7120
7121 return false;
7122}
7123
7124bool
7125Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info,
7126 TABLE_LIST &src_table, TABLE_LIST &table)
7127{
7128 List_iterator<Create_field> it(alter_info.create_list);
7129 Create_field *f, *f_start=NULL, *f_end= NULL;
7130
7131 DBUG_ASSERT(alter_info.create_list.elements > 2);
7132
7133 if (create_info.tmp_table())
7134 {
7135 int remove= 2;
7136 while (remove && (f= it++))
7137 {
7138 if (f->flags & VERS_SYSTEM_FIELD)
7139 {
7140 it.remove();
7141 remove--;
7142 }
7143 }
7144 DBUG_ASSERT(remove == 0);
7145 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
7146 ER_UNKNOWN_ERROR,
7147 "System versioning is stripped from temporary `%s.%s`",
7148 table.db.str, table.table_name.str);
7149 return false;
7150 }
7151
7152 while ((f= it++))
7153 {
7154 if (f->flags & VERS_SYS_START_FLAG)
7155 {
7156 f_start= f;
7157 if (f_end)
7158 break;
7159 }
7160 else if (f->flags & VERS_SYS_END_FLAG)
7161 {
7162 f_end= f;
7163 if (f_start)
7164 break;
7165 }
7166 }
7167
7168 if (!f_start || !f_end)
7169 {
7170 my_error(ER_MISSING, MYF(0), src_table.table_name.str,
7171 f_start ? "AS ROW END" : "AS ROW START");
7172 return true;
7173 }
7174
7175 as_row= start_end_t(f_start->field_name, f_end->field_name);
7176 system_time= as_row;
7177
7178 create_info.options|= HA_VERSIONED_TABLE;
7179 return false;
7180}
7181
7182bool Vers_parse_info::need_check(const Alter_info *alter_info) const
7183{
7184 return versioned_fields || unversioned_fields ||
7185 alter_info->flags & ALTER_ADD_PERIOD ||
7186 alter_info->flags & ALTER_DROP_PERIOD ||
7187 alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING ||
7188 alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING || *this;
7189}
7190
7191bool Vers_parse_info::check_conditions(const Lex_table_name &table_name,
7192 const Lex_table_name &db) const
7193{
7194 if (!as_row.start || !as_row.end)
7195 {
7196 my_error(ER_MISSING, MYF(0), table_name.str,
7197 as_row.start ? "AS ROW END" : "AS ROW START");
7198 return true;
7199 }
7200
7201 if (!system_time.start || !system_time.end)
7202 {
7203 my_error(ER_MISSING, MYF(0), table_name.str, "PERIOD FOR SYSTEM_TIME");
7204 return true;
7205 }
7206
7207 if (!as_row.start.streq(system_time.start) ||
7208 !as_row.end.streq(system_time.end))
7209 {
7210 my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
7211 return true;
7212 }
7213
7214 if (db.streq(MYSQL_SCHEMA_NAME))
7215 {
7216 my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(0), MYSQL_SCHEMA_NAME.str);
7217 return true;
7218 }
7219 return false;
7220}
7221
7222bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
7223 const Lex_table_name &db,
7224 Alter_info *alter_info, bool native)
7225{
7226 if (check_conditions(table_name, db))
7227 return true;
7228
7229 List_iterator<Create_field> it(alter_info->create_list);
7230 uint found_flag= 0;
7231 while (Create_field *f= it++)
7232 {
7233 vers_sys_type_t f_check_unit= VERS_UNDEFINED;
7234 uint sys_flag= f->flags & VERS_SYSTEM_FIELD;
7235
7236 if (!sys_flag)
7237 continue;
7238
7239 if (sys_flag & found_flag)
7240 {
7241 my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0),
7242 found_flag & VERS_SYS_START_FLAG ? "START" : "END",
7243 f->field_name.str);
7244 return true;
7245 }
7246
7247 sys_flag|= found_flag;
7248
7249 if ((f->type_handler() == &type_handler_datetime2 ||
7250 f->type_handler() == &type_handler_timestamp2) &&
7251 f->length == MAX_DATETIME_FULL_WIDTH)
7252 {
7253 f_check_unit= VERS_TIMESTAMP;
7254 }
7255 else if (native
7256 && f->type_handler() == &type_handler_longlong
7257 && (f->flags & UNSIGNED_FLAG)
7258 && f->length == (MY_INT64_NUM_DECIMAL_DIGITS - 1))
7259 {
7260 f_check_unit= VERS_TRX_ID;
7261 }
7262 else
7263 {
7264 if (!check_unit)
7265 check_unit= VERS_TIMESTAMP;
7266 goto error;
7267 }
7268
7269 if (f_check_unit)
7270 {
7271 if (check_unit)
7272 {
7273 if (check_unit == f_check_unit)
7274 {
7275 if (check_unit == VERS_TRX_ID && !TR_table::use_transaction_registry)
7276 {
7277 my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
7278 return true;
7279 }
7280 return false;
7281 }
7282 error:
7283 my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), f->field_name.str,
7284 check_unit == VERS_TIMESTAMP ?
7285 "TIMESTAMP(6)" :
7286 "BIGINT(20) UNSIGNED",
7287 table_name.str);
7288 return true;
7289 }
7290 check_unit= f_check_unit;
7291 }
7292 }
7293
7294 my_error(ER_MISSING, MYF(0), table_name.str, found_flag & VERS_SYS_START_FLAG ?
7295 "ROW END" : found_flag ? "ROW START" : "ROW START/END");
7296 return true;
7297}
7298