handler.cc source code [MariaDB/sql/handler.cc]

1	/ Copyright (c) 2000, 2016, Oracle and/or its affiliates.*
2	Copyright (c) 2009, 2018, MariaDB Corporation.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published by
6	the Free Software Foundation; version 2 of the License.
7
8	This program is distributed in the hope that it will be useful,
9	but WITHOUT ANY WARRANTY; without even the implied warranty of
10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	GNU General Public License for more details.
12
13	You should have received a copy of the GNU General Public License
14	along with this program; if not, write to the Free Software
15	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /*
16
17	/* @file handler.cc*
18
19	@brief
20	Handler-calling-functions
21	*/
22
23	#include "mariadb.h"
24	#include "sql_priv.h"
25	#include "unireg.h"
26	#include "rpl_rli.h"
27	#include "sql_cache.h" // query_cache, query_cache_*
28	#include "sql_connect.h" // global_table_stats
29	#include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp
30	#include "sql_table.h" // build_table_filename
31	#include "sql_parse.h" // check_stack_overrun
32	#include "sql_acl.h" // SUPER_ACL
33	#include "sql_base.h" // TDC_element
34	#include "discover.h" // extension_based_table_discovery, etc
35	#include "log_event.h" // *_rows_log_event
36	#include "create_options.h"
37	#include <myisampack.h>
38	#include "transaction.h"
39	#include "myisam.h"
40	#include "probes_mysql.h"
41	#include <mysql/psi/mysql_table.h>
42	#include "debug_sync.h" // DEBUG_SYNC
43	#include "sql_audit.h"
44	#include "ha_sequence.h"
45
46	#ifdef WITH_PARTITION_STORAGE_ENGINE
47	#include "ha_partition.h"
48	#endif
49
50	#ifdef WITH_ARIA_STORAGE_ENGINE
51	#include "../storage/maria/ha_maria.h"
52	#endif
53	#include "semisync_master.h"
54
55	#include "wsrep_mysqld.h"
56	#include "wsrep.h"
57	#include "wsrep_xid.h"
58
59	/*
60	While we have legacy_db_type, we have this array to
61	check for dups and to find handlerton from legacy_db_type.
62	Remove when legacy_db_type is finally gone
63	*/
64	st_plugin_int *hton2plugin[MAX_HA];
65
66	static handlerton *installed_htons[`128`];
67
68	#define BITMAP_STACKBUF_SIZE (128/8)
69
70	KEY_CREATE_INFO default_key_create_info=
71	{ HA_KEY_ALG_UNDEF, `0`, `0`, {NullS, `0`}, {NullS, `0`}, true };
72
73	/ number of entries in handlertons[] /
74	ulong total_ha= `0`;
75	/ number of storage engines (from handlertons[]) that support 2pc /
76	ulong total_ha_2pc= `0`;
77	#ifndef DBUG_OFF
78	/*
79	Number of non-mandatory 2pc handlertons whose initialization failed
80	to estimate total_ha_2pc value under supposition of the failures
81	have not occcured.
82	*/
83	ulong failed_ha_2pc= `0`;
84	#endif
85	/ size of savepoint storage area (see ha_init) /
86	ulong savepoint_alloc_size= `0`;
87
88	static const LEX_CSTRING sys_table_aliases[]=
89	{
90	{ STRING_WITH_LEN("INNOBASE") }, { STRING_WITH_LEN("INNODB") },
91	{ STRING_WITH_LEN("HEAP") }, { STRING_WITH_LEN("MEMORY") },
92	{ STRING_WITH_LEN("MERGE") }, { STRING_WITH_LEN("MRG_MYISAM") },
93	{ STRING_WITH_LEN("Maria") }, { STRING_WITH_LEN("Aria") },
94	{NullS, `0`}
95	};
96
97	const char *ha_row_type[] = {
98	"", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE"
99	};
100
101	const char *tx_isolation_names[] =
102	{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
103	NullS};
104	TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-`1`,"",
105	tx_isolation_names, NULL};
106
107	static TYPELIB known_extensions= {`0`,"known_exts", NULL, NULL};
108	uint known_extensions_id= `0`;
109
110	static int commit_one_phase_2(THD thd, bool* all, THD_TRANS *trans,
111	bool is_real_trans);
112
113
114	static plugin_ref ha_default_plugin(THD *thd)
115	{
116	if (thd->variables.table_plugin)
117	return thd->variables.table_plugin;
118	return my_plugin_lock(thd, global_system_variables.table_plugin);
119	}
120
121	static plugin_ref ha_default_tmp_plugin(THD *thd)
122	{
123	if (thd->variables.tmp_table_plugin)
124	return thd->variables.tmp_table_plugin;
125	if (global_system_variables.tmp_table_plugin)
126	return my_plugin_lock(thd, global_system_variables.tmp_table_plugin);
127	return ha_default_plugin(thd);
128	}
129
130
131	/* @brief*
132	Return the default storage engine handlerton for thread
133
134	SYNOPSIS
135	ha_default_handlerton(thd)
136	thd current thread
137
138	RETURN
139	pointer to handlerton
140	*/
141	handlerton ha_default_handlerton(THD thd)
142	{
143	plugin_ref plugin= ha_default_plugin(thd);
144	DBUG_ASSERT(plugin);
145	handlerton *hton= plugin_hton(plugin);
146	DBUG_ASSERT(hton);
147	return hton;
148	}
149
150
151	handlerton ha_default_tmp_handlerton(THD thd)
152	{
153	plugin_ref plugin= ha_default_tmp_plugin(thd);
154	DBUG_ASSERT(plugin);
155	handlerton *hton= plugin_hton(plugin);
156	DBUG_ASSERT(hton);
157	return hton;
158	}
159
160
161	/* @brief*
162	Return the storage engine handlerton for the supplied name
163
164	SYNOPSIS
165	ha_resolve_by_name(thd, name)
166	thd current thread
167	name name of storage engine
168
169	RETURN
170	pointer to storage engine plugin handle
171	*/
172	plugin_ref ha_resolve_by_name(THD thd, const* LEX_CSTRING *name,
173	bool tmp_table)
174	{
175	const LEX_CSTRING *table_alias;
176	plugin_ref plugin;
177
178	redo:
179	/ my_strnncoll is a macro and gcc doesn't do early expansion of macro /
180	if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
181	(const uchar *)name->str, name->length,
182	(const uchar *)STRING_WITH_LEN("DEFAULT"), `0`))
183	return tmp_table ? ha_default_tmp_plugin(thd) : ha_default_plugin(thd);
184
185	if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
186	{
187	handlerton *hton= plugin_hton(plugin);
188	if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
189	return plugin;
190
191	/*
192	unlocking plugin immediately after locking is relatively low cost.
193	*/
194	plugin_unlock(thd, plugin);
195	}
196
197	/*
198	We check for the historical aliases.
199	*/
200	for (table_alias= sys_table_aliases; table_alias->str; table_alias+= `2`)
201	{
202	if (!my_strnncoll(&my_charset_latin1,
203	(const uchar *)name->str, name->length,
204	(const uchar *)table_alias->str, table_alias->length))
205	{
206	name= table_alias + `1`;
207	goto redo;
208	}
209	}
210
211	return NULL;
212	}
213
214
215	plugin_ref ha_lock_engine(THD thd, const* handlerton *hton)
216	{
217	if (hton)
218	{
219	st_plugin_int *plugin= hton2plugin[hton->slot];
220	return my_plugin_lock(thd, plugin_int_to_ref(plugin));
221	}
222	return NULL;
223	}
224
225
226	handlerton ha_resolve_by_legacy_type(THD thd, enum legacy_db_type db_type)
227	{
228	plugin_ref plugin;
229	switch (db_type) {
230	case DB_TYPE_DEFAULT:
231	return ha_default_handlerton(thd);
232	default:
233	if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
234	(plugin= ha_lock_engine(thd, installed_htons[db_type])))
235	return plugin_hton(plugin);
236	/ fall through /
237	case DB_TYPE_UNKNOWN:
238	return NULL;
239	}
240	}
241
242
243	/**
244	Use other database handler if databasehandler is not compiled in.
245	*/
246	handlerton ha_checktype(THD thd, handlerton hton, bool* no_substitute)
247	{
248	if (ha_storage_engine_is_enabled(hton))
249	return hton;
250
251	if (no_substitute)
252	return NULL;
253
254	return ha_default_handlerton(thd);
255	} / ha_checktype /
256
257
258	handler get_new_handler(TABLE_SHARE share, MEM_ROOT *alloc,
259	handlerton *db_type)
260	{
261	handler *file;
262	DBUG_ENTER("get_new_handler");
263	DBUG_PRINT("enter", ("alloc: %p", alloc));
264
265	if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
266	{
267	if ((file= db_type->create(db_type, share, alloc)))
268	file->init();
269	DBUG_RETURN(file);
270	}
271	/*
272	Try the default table type
273	Here the call to current_thd() is ok as we call this function a lot of
274	times but we enter this branch very seldom.
275	*/
276	file= get_new_handler(share, alloc, ha_default_handlerton(current_thd));
277	DBUG_RETURN(file);
278	}
279
280
281	#ifdef WITH_PARTITION_STORAGE_ENGINE
282	handler get_ha_partition(partition_info part_info)
283	{
284	ha_partition *partition;
285	DBUG_ENTER("get_ha_partition");
286	if ((partition= new ha_partition (partition_hton, part_info)))
287	{
288	if (partition->initialize_partition(current_thd->mem_root))
289	{
290	delete partition;
291	partition= `0`;
292	}
293	else
294	partition->init();
295	}
296	else
297	{
298	my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR),
299	static_cast<int>(sizeof(ha_partition)));
300	}
301	DBUG_RETURN(((handler*) partition));
302	}
303	#endif
304
305	static const char **handler_errmsgs;
306
307	C_MODE_START
308	static const char *get_handler_errmsgs(int* nr)
309	{
310	return handler_errmsgs;
311	}
312	C_MODE_END
313
314
315	/**
316	Register handler error messages for use with my_error().
317
318	@retval
319	0 OK
320	@retval
321	!=0 Error
322	*/
323
324	int ha_init_errors(void)
325	{
326	#define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
327
328	/ Allocate a pointer array for the error message strings. /
329	/ Zerofill it to avoid uninitialized gaps. /
330	if (! (handler_errmsgs= (const char*) my_malloc(HA_ERR_ERRORS sizeof(char*),
331	MYF(MY_WME \| MY_ZEROFILL))))
332	return `1`;
333
334	/ Set the dedicated error messages. /
335	SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
336	SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
337	SETMSG(HA_ERR_RECORD_CHANGED, "Update which is recoverable");
338	SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
339	SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
340	SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
341	SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
342	SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
343	SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
344	SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
345	SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
346	SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
347	SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
348	SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
349	SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
350	SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
351	SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
352	SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
353	SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
354	SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
355	SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
356	SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
357	SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
358	SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
359	SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
360	SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
361	SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
362	SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
363	SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
364	SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
365	SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
366	SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
367	SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
368	SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
369	SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
370	SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
371	SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
372	SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
373	SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
374	SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
375	SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
376	SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
377	SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
378	SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
379	SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
380	SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
381	SETMSG(HA_ERR_DISK_FULL, ER_DEFAULT(ER_DISK_FULL));
382	SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
383	SETMSG(HA_ERR_FK_DEPTH_EXCEEDED, "Foreign key cascade delete/update exceeds");
384	SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
385
386	/ Register the error messages for use with my_error(). /
387	return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
388	}
389
390
391	/**
392	Unregister handler error messages.
393
394	@retval
395	0 OK
396	@retval
397	!=0 Error
398	*/
399	static int ha_finish_errors(void)
400	{
401	/ Allocate a pointer array for the error message strings. /
402	my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
403	my_free(handler_errmsgs);
404	handler_errmsgs= `0`;
405	return `0`;
406	}
407
408	static volatile int32 need_full_discover_for_existence= `0`;
409	static volatile int32 engines_with_discover_file_names= `0`;
410	static volatile int32 engines_with_discover= `0`;
411
412	static int full_discover_for_existence(handlerton , const* char , const* char *)
413	{ return `0`; }
414
415	static int ext_based_existence(handlerton , const* char , const* char *)
416	{ return `0`; }
417
418	static int hton_ext_based_table_discovery(handlerton hton, LEX_CSTRING db,
419	MY_DIR dir, handlerton::discovered_list result)
420	{
421	/*
422	tablefile_extensions[0] is the metadata file, see
423	the comment above tablefile_extensions declaration
424	*/
425	return extension_based_table_discovery(dir, hton->tablefile_extensions[`0`],
426	result);
427	}
428
429	static void update_discovery_counters(handlerton hton, int* val)
430	{
431	if (hton->discover_table_existence == full_discover_for_existence)
432	my_atomic_add32(&need_full_discover_for_existence, val);
433
434	if (hton->discover_table_names && hton->tablefile_extensions[`0`])
435	my_atomic_add32(&engines_with_discover_file_names, val);
436
437	if (hton->discover_table)
438	my_atomic_add32(&engines_with_discover, val);
439	}
440
441	int ha_finalize_handlerton(st_plugin_int *plugin)
442	{
443	handlerton hton= (handlerton )plugin->data;
444	DBUG_ENTER("ha_finalize_handlerton");
445
446	/ hton can be NULL here, if ha_initialize_handlerton() failed. /
447	if (!hton)
448	goto end;
449
450	switch (hton->state) {
451	case SHOW_OPTION_NO:
452	case SHOW_OPTION_DISABLED:
453	break;
454	case SHOW_OPTION_YES:
455	if (installed_htons[hton->db_type] == hton)
456	installed_htons[hton->db_type]= NULL;
457	break;
458	};
459
460	if (hton->panic)
461	hton->panic(hton, HA_PANIC_CLOSE);
462
463	if (plugin->plugin->deinit)
464	{
465	/*
466	Today we have no defined/special behavior for uninstalling
467	engine plugins.
468	*/
469	DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
470	if (plugin->plugin->deinit(NULL))
471	{
472	DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
473	plugin->name.str));
474	}
475	}
476
477	free_sysvar_table_options(hton);
478	update_discovery_counters(hton, -`1`);
479
480	/*
481	In case a plugin is uninstalled and re-installed later, it should
482	reuse an array slot. Otherwise the number of uninstall/install
483	cycles would be limited.
484	*/
485	if (hton->slot != HA_SLOT_UNDEF)
486	{
487	/ Make sure we are not unpluging another plugin /
488	DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
489	DBUG_ASSERT(hton->slot < MAX_HA);
490	hton2plugin[hton->slot]= NULL;
491	}
492
493	my_free(hton);
494
495	end:
496	DBUG_RETURN(`0`);
497	}
498
499
500	int ha_initialize_handlerton(st_plugin_int *plugin)
501	{
502	handlerton *hton;
503	static const char *no_exts[]= { `0` };
504	DBUG_ENTER("ha_initialize_handlerton");
505	DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
506
507	hton= (handlerton )my_malloc(sizeof*(handlerton),
508	MYF(MY_WME \| MY_ZEROFILL));
509	if (hton == NULL)
510	{
511	sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
512	plugin->name.str);
513	goto err_no_hton_memory;
514	}
515
516	hton->tablefile_extensions= no_exts;
517	hton->discover_table_names= hton_ext_based_table_discovery;
518
519	hton->slot= HA_SLOT_UNDEF;
520	/ Historical Requirement /
521	plugin->data= hton; // shortcut for the future
522	if (plugin->plugin->init && plugin->plugin->init(hton))
523	{
524	sql_print_error("Plugin '%s' init function returned error.",
525	plugin->name.str);
526	goto err;
527	}
528
529	// hton_ext_based_table_discovery() works only when discovery
530	// is supported and the engine if file-based.
531	if (hton->discover_table_names == hton_ext_based_table_discovery &&
532	(!hton->discover_table \|\| !hton->tablefile_extensions[`0`]))
533	hton->discover_table_names= NULL;
534
535	// default discover_table_existence implementation
536	if (!hton->discover_table_existence && hton->discover_table)
537	{
538	if (hton->tablefile_extensions[`0`])
539	hton->discover_table_existence= ext_based_existence;
540	else
541	hton->discover_table_existence= full_discover_for_existence;
542	}
543
544	switch (hton->state) {
545	case SHOW_OPTION_NO:
546	break;
547	case SHOW_OPTION_YES:
548	{
549	uint tmp;
550	ulong fslot;
551
552	DBUG_EXECUTE_IF("unstable_db_type", {
553	static int i= (int) DB_TYPE_FIRST_DYNAMIC;
554	hton->db_type= (enum legacy_db_type)++i;
555	});
556
557	/ now check the db_type for conflict /
558	if (hton->db_type <= DB_TYPE_UNKNOWN \|\|
559	hton->db_type >= DB_TYPE_DEFAULT \|\|
560	installed_htons[hton->db_type])
561	{
562	int idx= (int) DB_TYPE_FIRST_DYNAMIC;
563
564	while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
565	idx++;
566
567	if (idx == (int) DB_TYPE_DEFAULT)
568	{
569	sql_print_warning("Too many storage engines!");
570	goto err_deinit;
571	}
572	if (hton->db_type != DB_TYPE_UNKNOWN)
573	sql_print_warning("Storage engine '%s' has conflicting typecode. "
574	"Assigning value %d.", plugin->plugin->name, idx);
575	hton->db_type= (enum legacy_db_type) idx;
576	}
577
578	/*
579	In case a plugin is uninstalled and re-installed later, it should
580	reuse an array slot. Otherwise the number of uninstall/install
581	cycles would be limited. So look for a free slot.
582	*/
583	DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
584	for (fslot= `0`; fslot < total_ha; fslot++)
585	{
586	if (!hton2plugin[fslot])
587	break;
588	}
589	if (fslot < total_ha)
590	hton->slot= fslot;
591	else
592	{
593	if (total_ha >= MAX_HA)
594	{
595	sql_print_error("Too many plugins loaded. Limit is %lu. "
596	"Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
597	goto err_deinit;
598	}
599	hton->slot= total_ha++;
600	}
601	installed_htons[hton->db_type]= hton;
602	tmp= hton->savepoint_offset;
603	hton->savepoint_offset= savepoint_alloc_size;
604	savepoint_alloc_size+= tmp;
605	hton2plugin[hton->slot]=plugin;
606	if (hton->prepare)
607	{
608	total_ha_2pc++;
609	if (tc_log && tc_log != get_tc_log_implementation())
610	{
611	total_ha_2pc--;
612	hton->prepare= `0`;
613	push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
614	ER_UNKNOWN_ERROR,
615	"Cannot enable tc-log at run-time. "
616	"XA features of %s are disabled",
617	plugin->name.str);
618	}
619	}
620	break;
621	}
622	/ fall through /
623	default:
624	hton->state= SHOW_OPTION_DISABLED;
625	break;
626	}
627
628	/*
629	This is entirely for legacy. We will create a new "disk based" hton and a
630	"memory" hton which will be configurable longterm. We should be able to
631	remove partition.
632	*/
633	switch (hton->db_type) {
634	case DB_TYPE_HEAP:
635	heap_hton= hton;
636	break;
637	case DB_TYPE_MYISAM:
638	myisam_hton= hton;
639	break;
640	case DB_TYPE_PARTITION_DB:
641	partition_hton= hton;
642	break;
643	case DB_TYPE_SEQUENCE:
644	sql_sequence_hton= hton;
645	break;
646	default:
647	break;
648	};
649
650	resolve_sysvar_table_options(hton);
651	update_discovery_counters(hton, `1`);
652
653	DBUG_RETURN(`0`);
654
655	err_deinit:
656	/*
657	Let plugin do its inner deinitialization as plugin->init()
658	was successfully called before.
659	*/
660	if (plugin->plugin->deinit)
661	(void) plugin->plugin->deinit(NULL);
662
663	err:
664	#ifndef DBUG_OFF
665	if (hton->prepare && hton->state == SHOW_OPTION_YES)
666	failed_ha_2pc++;
667	#endif
668	my_free(hton);
669	err_no_hton_memory:
670	plugin->data= NULL;
671	DBUG_RETURN(`1`);
672	}
673
674	int ha_init()
675	{
676	int error= `0`;
677	DBUG_ENTER("ha_init");
678
679	DBUG_ASSERT(total_ha < MAX_HA);
680	/*
681	Check if there is a transaction-capable storage engine besides the
682	binary log (which is considered a transaction-capable storage engine in
683	counting total_ha)
684	*/
685	opt_using_transactions= total_ha>(ulong)opt_bin_log;
686	savepoint_alloc_size+= sizeof(SAVEPOINT);
687	DBUG_RETURN(error);
688	}
689
690	int ha_end()
691	{
692	int error= `0`;
693	DBUG_ENTER("ha_end");
694
695
696	/*
697	This should be eventualy based on the graceful shutdown flag.
698	So if flag is equal to HA_PANIC_CLOSE, the deallocate
699	the errors.
700	*/
701	if (unlikely(ha_finish_errors()))
702	error= `1`;
703
704	DBUG_RETURN(error);
705	}
706
707	static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
708	void *path)
709	{
710	handlerton *hton= plugin_hton(plugin);
711	if (hton->state == SHOW_OPTION_YES && hton->drop_database)
712	hton->drop_database(hton, (char *)path);
713	return FALSE;
714	}
715
716
717	void ha_drop_database(char* path)
718	{
719	plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
720	}
721
722
723	static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
724	void *disable)
725	{
726	handlerton *hton= plugin_hton(plugin);
727	if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state)
728	hton->checkpoint_state(hton, (int) (bool**) disable);
729	return FALSE;
730	}
731
732
733	void ha_checkpoint_state(bool disable)
734	{
735	plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
736	}
737
738
739	struct st_commit_checkpoint_request {
740	void *cookie;
741	void (pre_hook)(void* *);
742	};
743
744	static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
745	void *data)
746	{
747	st_commit_checkpoint_request st= (st_commit_checkpoint_request )data;
748	handlerton *hton= plugin_hton(plugin);
749	if (hton->state == SHOW_OPTION_YES && hton->commit_checkpoint_request)
750	{
751	void *cookie= st->cookie;
752	if (st->pre_hook)
753	(*st->pre_hook)(cookie);
754	(*hton->commit_checkpoint_request)(hton, cookie);
755	}
756	return FALSE;
757	}
758
759
760	/*
761	Invoke commit_checkpoint_request() in all storage engines that implement it.
762
763	If pre_hook is non-NULL, the hook will be called prior to each invocation.
764	*/
765	void
766	ha_commit_checkpoint_request(void cookie, void* (pre_hook)(void* *))
767	{
768	st_commit_checkpoint_request st;
769	st.cookie= cookie;
770	st.pre_hook= pre_hook;
771	plugin_foreach(NULL, commit_checkpoint_request_handlerton,
772	MYSQL_STORAGE_ENGINE_PLUGIN, &st);
773	}
774
775
776
777	static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
778	void *unused)
779	{
780	handlerton *hton= plugin_hton(plugin);
781	/*
782	there's no need to rollback here as all transactions must
783	be rolled back already
784	*/
785	if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
786	{
787	if (hton->close_connection)
788	hton->close_connection(hton, thd);
789	/ make sure ha_data is reset and ha_data_lock is released /
790	thd_set_ha_data(thd, hton, NULL);
791	}
792	return FALSE;
793	}
794
795	/**
796	@note
797	don't bother to rollback here, it's done already
798	*/
799	void ha_close_connection(THD* thd)
800	{
801	plugin_foreach_with_mask(thd, closecon_handlerton,
802	MYSQL_STORAGE_ENGINE_PLUGIN,
803	PLUGIN_IS_DELETED\|PLUGIN_IS_READY, `0`);
804	}
805
806	static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
807	void *level)
808	{
809	handlerton *hton= plugin_hton(plugin);
810
811	if (hton->state == SHOW_OPTION_YES && hton->kill_query &&
812	thd_get_ha_data(thd, hton))
813	hton->kill_query(hton, thd, (enum* thd_kill_levels *) level);
814	return FALSE;
815	}
816
817	void ha_kill_query(THD* thd, enum thd_kill_levels level)
818	{
819	DBUG_ENTER("ha_kill_query");
820	plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level);
821	DBUG_VOID_RETURN;
822	}
823
824
825	/ ========================================================================*
826	======================= TRANSACTIONS ===================================/*
827
828	/**
829	Transaction handling in the server
830	==================================
831
832	In each client connection, MySQL maintains two transactional
833	states:
834	- a statement transaction,
835	- a standard, also called normal transaction.
836
837	Historical note
838	---------------
839	"Statement transaction" is a non-standard term that comes
840	from the times when MySQL supported BerkeleyDB storage engine.
841
842	First of all, it should be said that in BerkeleyDB auto-commit
843	mode auto-commits operations that are atomic to the storage
844	engine itself, such as a write of a record, and are too
845	high-granular to be atomic from the application perspective
846	(MySQL). One SQL statement could involve many BerkeleyDB
847	auto-committed operations and thus BerkeleyDB auto-commit was of
848	little use to MySQL.
849
850	Secondly, instead of SQL standard savepoints, BerkeleyDB
851	provided the concept of "nested transactions". In a nutshell,
852	transactions could be arbitrarily nested, but when the parent
853	transaction was committed or aborted, all its child (nested)
854	transactions were handled committed or aborted as well.
855	Commit of a nested transaction, in turn, made its changes
856	visible, but not durable: it destroyed the nested transaction,
857	all its changes would become available to the parent and
858	currently active nested transactions of this parent.
859
860	So the mechanism of nested transactions was employed to
861	provide "all or nothing" guarantee of SQL statements
862	required by the standard.
863	A nested transaction would be created at start of each SQL
864	statement, and destroyed (committed or aborted) at statement
865	end. Such nested transaction was internally referred to as
866	a "statement transaction" and gave birth to the term.
867
868	(Historical note ends)
869
870	Since then a statement transaction is started for each statement
871	that accesses transactional tables or uses the binary log. If
872	the statement succeeds, the statement transaction is committed.
873	If the statement fails, the transaction is rolled back. Commits
874	of statement transactions are not durable -- each such
875	transaction is nested in the normal transaction, and if the
876	normal transaction is rolled back, the effects of all enclosed
877	statement transactions are undone as well. Technically,
878	a statement transaction can be viewed as a savepoint which is
879	maintained automatically in order to make effects of one
880	statement atomic.
881
882	The normal transaction is started by the user and is ended
883	usually upon a user request as well. The normal transaction
884	encloses transactions of all statements issued between
885	its beginning and its end.
886	In autocommit mode, the normal transaction is equivalent
887	to the statement transaction.
888
889	Since MySQL supports PSEA (pluggable storage engine
890	architecture), more than one transactional engine can be
891	active at a time. Hence transactions, from the server
892	point of view, are always distributed. In particular,
893	transactional state is maintained independently for each
894	engine. In order to commit a transaction the two phase
895	commit protocol is employed.
896
897	Not all statements are executed in context of a transaction.
898	Administrative and status information statements do not modify
899	engine data, and thus do not start a statement transaction and
900	also have no effect on the normal transaction. Examples of such
901	statements are SHOW STATUS and RESET SLAVE.
902
903	Similarly DDL statements are not transactional,
904	and therefore a transaction is [almost] never started for a DDL
905	statement. The difference between a DDL statement and a purely
906	administrative statement though is that a DDL statement always
907	commits the current transaction before proceeding, if there is
908	any.
909
910	At last, SQL statements that work with non-transactional
911	engines also have no effect on the transaction state of the
912	connection. Even though they are written to the binary log,
913	and the binary log is, overall, transactional, the writes
914	are done in "write-through" mode, directly to the binlog
915	file, followed with a OS cache sync, in other words,
916	bypassing the binlog undo log (translog).
917	They do not commit the current normal transaction.
918	A failure of a statement that uses non-transactional tables
919	would cause a rollback of the statement transaction, but
920	in case there no non-transactional tables are used,
921	no statement transaction is started.
922
923	Data layout
924	-----------
925
926	The server stores its transaction-related data in
927	thd->transaction. This structure has two members of type
928	THD_TRANS. These members correspond to the statement and
929	normal transactions respectively:
930
931	- thd->transaction.stmt contains a list of engines
932	that are participating in the given statement
933	- thd->transaction.all contains a list of engines that
934	have participated in any of the statement transactions started
935	within the context of the normal transaction.
936	Each element of the list contains a pointer to the storage
937	engine, engine-specific transactional data, and engine-specific
938	transaction flags.
939
940	In autocommit mode thd->transaction.all is empty.
941	Instead, data of thd->transaction.stmt is
942	used to commit/rollback the normal transaction.
943
944	The list of registered engines has a few important properties:
945	- no engine is registered in the list twice
946	- engines are present in the list a reverse temporal order --
947	new participants are always added to the beginning of the list.
948
949	Transaction life cycle
950	----------------------
951
952	When a new connection is established, thd->transaction
953	members are initialized to an empty state.
954	If a statement uses any tables, all affected engines
955	are registered in the statement engine list. In
956	non-autocommit mode, the same engines are registered in
957	the normal transaction list.
958	At the end of the statement, the server issues a commit
959	or a roll back for all engines in the statement list.
960	At this point transaction flags of an engine, if any, are
961	propagated from the statement list to the list of the normal
962	transaction.
963	When commit/rollback is finished, the statement list is
964	cleared. It will be filled in again by the next statement,
965	and emptied again at the next statement's end.
966
967	The normal transaction is committed in a similar way
968	(by going over all engines in thd->transaction.all list)
969	but at different times:
970	- upon COMMIT SQL statement is issued by the user
971	- implicitly, by the server, at the beginning of a DDL statement
972	or SET AUTOCOMMIT={0\|1} statement.
973
974	The normal transaction can be rolled back as well:
975	- if the user has requested so, by issuing ROLLBACK SQL
976	statement
977	- if one of the storage engines requested a rollback
978	by setting thd->transaction_rollback_request. This may
979	happen in case, e.g., when the transaction in the engine was
980	chosen a victim of the internal deadlock resolution algorithm
981	and rolled back internally. When such a situation happens, there
982	is little the server can do and the only option is to rollback
983	transactions in all other participating engines. In this case
984	the rollback is accompanied by an error sent to the user.
985
986	As follows from the use cases above, the normal transaction
987	is never committed when there is an outstanding statement
988	transaction. In most cases there is no conflict, since
989	commits of the normal transaction are issued by a stand-alone
990	administrative or DDL statement, thus no outstanding statement
991	transaction of the previous statement exists. Besides,
992	all statements that manipulate with the normal transaction
993	are prohibited in stored functions and triggers, therefore
994	no conflicting situation can occur in a sub-statement either.
995	The remaining rare cases when the server explicitly has
996	to commit the statement transaction prior to committing the normal
997	one cover error-handling scenarios (see for example
998	SQLCOM_LOCK_TABLES).
999
1000	When committing a statement or a normal transaction, the server
1001	either uses the two-phase commit protocol, or issues a commit
1002	in each engine independently. The two-phase commit protocol
1003	is used only if:
1004	- all participating engines support two-phase commit (provide
1005	handlerton::prepare PSEA API call) and
1006	- transactions in at least two engines modify data (i.e. are
1007	not read-only).
1008
1009	Note that the two phase commit is used for
1010	statement transactions, even though they are not durable anyway.
1011	This is done to ensure logical consistency of data in a multiple-
1012	engine transaction.
1013	For example, imagine that some day MySQL supports unique
1014	constraint checks deferred till the end of statement. In such
1015	case a commit in one of the engines may yield ER_DUP_KEY,
1016	and MySQL should be able to gracefully abort statement
1017	transactions of other participants.
1018
1019	After the normal transaction has been committed,
1020	thd->transaction.all list is cleared.
1021
1022	When a connection is closed, the current normal transaction, if
1023	any, is rolled back.
1024
1025	Roles and responsibilities
1026	--------------------------
1027
1028	The server has no way to know that an engine participates in
1029	the statement and a transaction has been started
1030	in it unless the engine says so. Thus, in order to be
1031	a part of a transaction, the engine must "register" itself.
1032	This is done by invoking trans_register_ha() server call.
1033	Normally the engine registers itself whenever handler::external_lock()
1034	is called. trans_register_ha() can be invoked many times: if
1035	an engine is already registered, the call does nothing.
1036	In case autocommit is not set, the engine must register itself
1037	twice -- both in the statement list and in the normal transaction
1038	list.
1039	In which list to register is a parameter of trans_register_ha().
1040
1041	Note, that although the registration interface in itself is
1042	fairly clear, the current usage practice often leads to undesired
1043	effects. E.g. since a call to trans_register_ha() in most engines
1044	is embedded into implementation of handler::external_lock(), some
1045	DDL statements start a transaction (at least from the server
1046	point of view) even though they are not expected to. E.g.
1047	CREATE TABLE does not start a transaction, since
1048	handler::external_lock() is never called during CREATE TABLE. But
1049	CREATE TABLE ... SELECT does, since handler::external_lock() is
1050	called for the table that is being selected from. This has no
1051	practical effects currently, but must be kept in mind
1052	nevertheless.
1053
1054	Once an engine is registered, the server will do the rest
1055	of the work.
1056
1057	During statement execution, whenever any of data-modifying
1058	PSEA API methods is used, e.g. handler::write_row() or
1059	handler::update_row(), the read-write flag is raised in the
1060	statement transaction for the involved engine.
1061	Currently All PSEA calls are "traced", and the data can not be
1062	changed in a way other than issuing a PSEA call. Important:
1063	unless this invariant is preserved the server will not know that
1064	a transaction in a given engine is read-write and will not
1065	involve the two-phase commit protocol!
1066
1067	At the end of a statement, server call trans_commit_stmt is
1068	invoked. This call in turn invokes handlerton::prepare()
1069	for every involved engine. Prepare is followed by a call
1070	to handlerton::commit_one_phase() If a one-phase commit
1071	will suffice, handlerton::prepare() is not invoked and
1072	the server only calls handlerton::commit_one_phase().
1073	At statement commit, the statement-related read-write
1074	engine flag is propagated to the corresponding flag in the
1075	normal transaction. When the commit is complete, the list
1076	of registered engines is cleared.
1077
1078	Rollback is handled in a similar fashion.
1079
1080	Additional notes on DDL and the normal transaction.
1081	---------------------------------------------------
1082
1083	DDLs and operations with non-transactional engines
1084	do not "register" in thd->transaction lists, and thus do not
1085	modify the transaction state. Besides, each DDL in
1086	MySQL is prefixed with an implicit normal transaction commit
1087	(a call to trans_commit_implicit()), and thus leaves nothing
1088	to modify.
1089	However, as it has been pointed out with CREATE TABLE .. SELECT,
1090	some DDL statements can start a new* transaction.*
1091
1092	Behaviour of the server in this case is currently badly
1093	defined.
1094	DDL statements use a form of "semantic" logging
1095	to maintain atomicity: if CREATE TABLE .. SELECT failed,
1096	the newly created table is deleted.
1097	In addition, some DDL statements issue interim transaction
1098	commits: e.g. ALTER TABLE issues a commit after data is copied
1099	from the original table to the internal temporary table. Other
1100	statements, e.g. CREATE TABLE ... SELECT do not always commit
1101	after itself.
1102	And finally there is a group of DDL statements such as
1103	RENAME/DROP TABLE that doesn't start a new transaction
1104	and doesn't commit.
1105
1106	This diversity makes it hard to say what will happen if
1107	by chance a stored function is invoked during a DDL --
1108	whether any modifications it makes will be committed or not
1109	is not clear. Fortunately, SQL grammar of few DDLs allows
1110	invocation of a stored function.
1111
1112	A consistent behaviour is perhaps to always commit the normal
1113	transaction after all DDLs, just like the statement transaction
1114	is always committed at the end of all statements.
1115	*/
1116
1117	/**
1118	Register a storage engine for a transaction.
1119
1120	Every storage engine MUST call this function when it starts
1121	a transaction or a statement (that is it must be called both for the
1122	"beginning of transaction" and "beginning of statement").
1123	Only storage engines registered for the transaction/statement
1124	will know when to commit/rollback it.
1125
1126	@note
1127	trans_register_ha is idempotent - storage engine may register many
1128	times per transaction.
1129
1130	*/
1131	void trans_register_ha(THD thd, bool* all, handlerton *ht_arg)
1132	{
1133	THD_TRANS *trans;
1134	Ha_trx_info *ha_info;
1135	DBUG_ENTER("trans_register_ha");
1136	DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1137
1138	if (all)
1139	{
1140	trans= &thd->transaction.all;
1141	thd->server_status\|= SERVER_STATUS_IN_TRANS;
1142	if (thd->tx_read_only)
1143	thd->server_status\|= SERVER_STATUS_IN_TRANS_READONLY;
1144	DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1145	}
1146	else
1147	trans= &thd->transaction.stmt;
1148
1149	ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? `1` : `0`);
1150
1151	if (ha_info->is_started())
1152	DBUG_VOID_RETURN; / already registered, return /
1153
1154	ha_info->register_ha(trans, ht_arg);
1155
1156	trans->no_2pc\|=(ht_arg->prepare==`0`);
1157	if (thd->transaction.xid_state.xid.is_null())
1158	thd->transaction.xid_state.xid.set(thd->query_id);
1159	DBUG_VOID_RETURN;
1160	}
1161
1162
1163	static int prepare_or_error(handlerton ht, THD thd, bool all)
1164	{
1165	int err= ht->prepare(ht, thd, all);
1166	status_var_increment(thd->status_var.ha_prepare_count);
1167	if (err)
1168	{
1169	/ avoid sending error, if we're going to replay the transaction /
1170	#ifdef WITH_WSREP
1171	if (ht != wsrep_hton \|\|
1172	err == EMSGSIZE \|\| thd->wsrep_conflict_state != MUST_REPLAY)
1173	#endif
1174	my_error(ER_ERROR_DURING_COMMIT, MYF(`0`), err);
1175	}
1176	return err;
1177	}
1178
1179
1180	/**
1181	@retval
1182	0 ok
1183	@retval
1184	1 error, transaction was rolled back
1185	*/
1186	int ha_prepare(THD *thd)
1187	{
1188	int error=`0`, all=`1`;
1189	THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1190	Ha_trx_info *ha_info= trans->ha_list;
1191	DBUG_ENTER("ha_prepare");
1192
1193	if (ha_info)
1194	{
1195	for (; ha_info; ha_info= ha_info->next())
1196	{
1197	handlerton *ht= ha_info->ht();
1198	if (ht->prepare)
1199	{
1200	if (unlikely(prepare_or_error(ht, thd, all)))
1201	{
1202	ha_rollback_trans(thd, all);
1203	error=`1`;
1204	break;
1205	}
1206	}
1207	else
1208	{
1209	push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1210	ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO),
1211	HA_ERR_WRONG_COMMAND,
1212	ha_resolve_storage_engine_name(ht));
1213
1214	}
1215	}
1216	}
1217
1218	DBUG_RETURN(error);
1219	}
1220
1221	/**
1222	Check if we can skip the two-phase commit.
1223
1224	A helper function to evaluate if two-phase commit is mandatory.
1225	As a side effect, propagates the read-only/read-write flags
1226	of the statement transaction to its enclosing normal transaction.
1227
1228	If we have at least two engines with read-write changes we must
1229	run a two-phase commit. Otherwise we can run several independent
1230	commits as the only transactional engine has read-write changes
1231	and others are read-only.
1232
1233	@retval 0 All engines are read-only.
1234	@retval 1 We have the only engine with read-write changes.
1235	@retval >1 More than one engine have read-write changes.
1236	Note: return value might NOT be the exact number of
1237	engines with read-write changes.
1238	*/
1239
1240	static
1241	uint
1242	ha_check_and_coalesce_trx_read_only(THD thd, Ha_trx_info ha_list,
1243	bool all)
1244	{
1245	/ The number of storage engines that have actual changes. /
1246	unsigned rw_ha_count= `0`;
1247	Ha_trx_info *ha_info;
1248
1249	for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1250	{
1251	if (ha_info->is_trx_read_write())
1252	++rw_ha_count;
1253
1254	if (! all)
1255	{
1256	Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[`1`];
1257	DBUG_ASSERT(ha_info != ha_info_all);
1258	/*
1259	Merge read-only/read-write information about statement
1260	transaction to its enclosing normal transaction. Do this
1261	only if in a real transaction -- that is, if we know
1262	that ha_info_all is registered in thd->transaction.all.
1263	Since otherwise we only clutter the normal transaction flags.
1264	*/
1265	if (ha_info_all->is_started()) / FALSE if autocommit. /
1266	ha_info_all->coalesce_trx_with(ha_info);
1267	}
1268	else if (rw_ha_count > `1`)
1269	{
1270	/*
1271	It is a normal transaction, so we don't need to merge read/write
1272	information up, and the need for two-phase commit has been
1273	already established. Break the loop prematurely.
1274	*/
1275	break;
1276	}
1277	}
1278	return rw_ha_count;
1279	}
1280
1281
1282	/**
1283	@retval
1284	0 ok
1285	@retval
1286	1 transaction was rolled back
1287	@retval
1288	2 error during commit, data may be inconsistent
1289
1290	@todo
1291	Since we don't support nested statement transactions in 5.0,
1292	we can't commit or rollback stmt transactions while we are inside
1293	stored functions or triggers. So we simply do nothing now.
1294	TODO: This should be fixed in later ( >= 5.1) releases.
1295	*/
1296	int ha_commit_trans(THD thd, bool* all)
1297	{
1298	int error= `0`, cookie;
1299	/*
1300	'all' means that this is either an explicit commit issued by
1301	user, or an implicit commit issued by a DDL.
1302	*/
1303	THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1304	/*
1305	"real" is a nick name for a transaction for which a commit will
1306	make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1307	transation is not 'real': even though it's possible to commit it,
1308	the changes are not durable as they might be rolled back if the
1309	enclosing 'all' transaction is rolled back.
1310	*/
1311	bool is_real_trans= ((all \|\| thd->transaction.all.ha_list == `0`) &&
1312	!(thd->variables.option_bits & OPTION_GTID_BEGIN));
1313	Ha_trx_info *ha_info= trans->ha_list;
1314	bool need_prepare_ordered, need_commit_ordered;
1315	my_xid xid;
1316	DBUG_ENTER("ha_commit_trans");
1317	DBUG_PRINT("info",("thd: %p option_bits: %lu all: %d",
1318	thd, (ulong) thd->variables.option_bits, all));
1319
1320	/ Just a random warning to test warnings pushed during autocommit. /
1321	DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
1322	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1323	ER_WARNING_NOT_COMPLETE_ROLLBACK,
1324	ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)););
1325
1326	DBUG_PRINT("info",
1327	("all: %d thd->in_sub_stmt: %d ha_info: %p is_real_trans: %d",
1328	all, thd->in_sub_stmt, ha_info, is_real_trans));
1329	/*
1330	We must not commit the normal transaction if a statement
1331	transaction is pending. Otherwise statement transaction
1332	flags will not get propagated to its normal transaction's
1333	counterpart.
1334	*/
1335	DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL \|\|
1336	trans == &thd->transaction.stmt);
1337
1338	if (thd->in_sub_stmt)
1339	{
1340	DBUG_ASSERT(`0`);
1341	/*
1342	Since we don't support nested statement transactions in 5.0,
1343	we can't commit or rollback stmt transactions while we are inside
1344	stored functions or triggers. So we simply do nothing now.
1345	TODO: This should be fixed in later ( >= 5.1) releases.
1346	*/
1347	if (!all)
1348	DBUG_RETURN(`0`);
1349	/*
1350	We assume that all statements which commit or rollback main transaction
1351	are prohibited inside of stored functions or triggers. So they should
1352	bail out with error even before ha_commit_trans() call. To be 100% safe
1353	let us throw error in non-debug builds.
1354	*/
1355	my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(`0`));
1356	DBUG_RETURN(`2`);
1357	}
1358
1359	#ifdef WITH_ARIA_STORAGE_ENGINE
1360	ha_maria::implicit_commit(thd, TRUE);
1361	#endif
1362
1363	if (!ha_info)
1364	{
1365	/*
1366	Free resources and perform other cleanup even for 'empty' transactions.
1367	*/
1368	if (is_real_trans)
1369	thd->transaction.cleanup();
1370	DBUG_RETURN(`0`);
1371	}
1372
1373	DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1374
1375	/ Close all cursors that can not survive COMMIT /
1376	if (is_real_trans) / not a statement commit /
1377	thd->stmt_map.close_transient_cursors();
1378
1379	uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1380	/ rw_trans is TRUE when we in a transaction changing data /
1381	bool rw_trans= is_real_trans &&
1382	(rw_ha_count > (thd->is_current_stmt_binlog_disabled()?`0U`:`1U`));
1383	MDL_request mdl_request;
1384	DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
1385	is_real_trans, rw_trans, rw_ha_count));
1386
1387	if (rw_trans)
1388	{
1389	/*
1390	Acquire a metadata lock which will ensure that COMMIT is blocked
1391	by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1392	COMMIT in progress blocks FTWRL).
1393
1394	We allow the owner of FTWRL to COMMIT; we assume that it knows
1395	what it does.
1396	*/
1397	mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1398	MDL_EXPLICIT);
1399
1400	if (!WSREP(thd) &&
1401	thd->mdl_context.acquire_lock(&mdl_request,
1402	thd->variables.lock_wait_timeout))
1403	{
1404	ha_rollback_trans(thd, all);
1405	DBUG_RETURN(`1`);
1406	}
1407
1408	DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1409	}
1410
1411	if (rw_trans &&
1412	opt_readonly &&
1413	!(thd->security_ctx->master_access & SUPER_ACL) &&
1414	!thd->slave_thread)
1415	{
1416	my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(`0`), "--read-only");
1417	goto err;
1418	}
1419
1420	#if 1 // FIXME: This should be done in ha_prepare().
1421	if (rw_trans \|\| (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
1422	thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING))
1423	{
1424	ulonglong trx_start_id= `0`, trx_end_id= `0`;
1425	for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
1426	{
1427	if (ha_info->ht()->prepare_commit_versioned)
1428	{
1429	trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id);
1430	if (trx_end_id)
1431	break; // FIXME: use a common ID for cross-engine transactions
1432	}
1433	}
1434
1435	if (trx_end_id)
1436	{
1437	if (!TR_table::use_transaction_registry)
1438	{
1439	my_error(ER_VERS_TRT_IS_DISABLED, MYF(`0`));
1440	goto err;
1441	}
1442	DBUG_ASSERT(trx_start_id);
1443	TR_table trt(thd, true);
1444	if (trt.update(trx_start_id, trx_end_id))
1445	goto err;
1446	// Here, the call will not commit inside InnoDB. It is only working
1447	// around closing thd->transaction.stmt open by TR_table::open().
1448	if (all)
1449	commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
1450	}
1451	}
1452	#endif
1453
1454	if (trans->no_2pc \|\| (rw_ha_count <= `1`))
1455	{
1456	error= ha_commit_one_phase(thd, all);
1457	goto done;
1458	}
1459
1460	need_prepare_ordered= FALSE;
1461	need_commit_ordered= FALSE;
1462	xid= thd->transaction.xid_state.xid.get_my_xid();
1463
1464	for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
1465	{
1466	handlerton *ht= hi->ht();
1467	/*
1468	Do not call two-phase commit if this particular
1469	transaction is read-only. This allows for simpler
1470	implementation in engines that are always read-only.
1471	*/
1472	if (! hi->is_trx_read_write())
1473	continue;
1474	/*
1475	Sic: we know that prepare() is not NULL since otherwise
1476	trans->no_2pc would have been set.
1477	*/
1478	if (unlikely(prepare_or_error(ht, thd, all)))
1479	goto err;
1480
1481	need_prepare_ordered\|= (ht->prepare_ordered != NULL);
1482	need_commit_ordered\|= (ht->commit_ordered != NULL);
1483	}
1484	DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
1485	DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
1486
1487	#ifdef WITH_WSREP
1488	if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid))
1489	{
1490	// xid was rewritten by wsrep
1491	xid= wsrep_xid_seqno(thd->transaction.xid_state.xid);
1492	}
1493	#endif /* WITH_WSREP */
1494
1495	if (!is_real_trans)
1496	{
1497	error= commit_one_phase_2(thd, all, trans, is_real_trans);
1498	goto done;
1499	}
1500
1501	DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
1502	cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
1503	need_commit_ordered);
1504	if (!cookie)
1505	goto err;
1506
1507	DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
1508	DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
1509
1510	error= commit_one_phase_2(thd, all, trans, is_real_trans) ? `2` : `0`;
1511
1512	DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
1513	if (tc_log->unlog(cookie, xid))
1514	{
1515	error= `2`; / Error during commit /
1516	goto end;
1517	}
1518
1519	done:
1520	DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1521
1522	mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
1523	mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
1524	mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
1525	mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
1526	#ifdef HAVE_REPLICATION
1527	repl_semisync_master.wait_after_commit(thd, all);
1528	DEBUG_SYNC(thd, "after_group_after_commit");
1529	#endif
1530	goto end;
1531
1532	/ Come here if error and we need to rollback. /
1533	err:
1534	error= `1`; / Transaction was rolled back /
1535	/*
1536	In parallel replication, rollback is delayed, as there is extra replication
1537	book-keeping to be done before rolling back and allowing a conflicting
1538	transaction to continue (MDEV-7458).
1539	*/
1540	if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec))
1541	ha_rollback_trans(thd, all);
1542
1543	end:
1544	if (rw_trans && mdl_request.ticket)
1545	{
1546	/*
1547	We do not always immediately release transactional locks
1548	after ha_commit_trans() (see uses of ha_enable_transaction()),
1549	thus we release the commit blocker lock as soon as it's
1550	not needed.
1551	*/
1552	thd->mdl_context.release_lock(mdl_request.ticket);
1553	}
1554	DBUG_RETURN(error);
1555	}
1556
1557	/**
1558	@note
1559	This function does not care about global read lock. A caller should.
1560
1561	@param[in] all Is set in case of explicit commit
1562	(COMMIT statement), or implicit commit
1563	issued by DDL. Is not set when called
1564	at the end of statement, even if
1565	autocommit=1.
1566	*/
1567
1568	int ha_commit_one_phase(THD thd, bool* all)
1569	{
1570	THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1571	/*
1572	"real" is a nick name for a transaction for which a commit will
1573	make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1574	transaction is not 'real': even though it's possible to commit it,
1575	the changes are not durable as they might be rolled back if the
1576	enclosing 'all' transaction is rolled back.
1577	We establish the value of 'is_real_trans' by checking
1578	if it's an explicit COMMIT/BEGIN statement, or implicit
1579	commit issued by DDL (all == TRUE), or if we're running
1580	in autocommit mode (it's only in the autocommit mode
1581	ha_commit_one_phase() can be called with an empty
1582	transaction.all.ha_list, see why in trans_register_ha()).
1583	*/
1584	bool is_real_trans= ((all \|\| thd->transaction.all.ha_list == `0`) &&
1585	!(thd->variables.option_bits & OPTION_GTID_BEGIN));
1586	int res;
1587	DBUG_ENTER("ha_commit_one_phase");
1588	if (is_real_trans)
1589	{
1590	DEBUG_SYNC(thd, "ha_commit_one_phase");
1591	if ((res= thd->wait_for_prior_commit()))
1592	DBUG_RETURN(res);
1593	}
1594	res= commit_one_phase_2(thd, all, trans, is_real_trans);
1595	DBUG_RETURN(res);
1596	}
1597
1598
1599	static int
1600	commit_one_phase_2(THD thd, bool* all, THD_TRANS trans, bool* is_real_trans)
1601	{
1602	int error= `0`;
1603	uint count= `0`;
1604	Ha_trx_info ha_info= trans->ha_list, ha_info_next;
1605	DBUG_ENTER("commit_one_phase_2");
1606	if (is_real_trans)
1607	DEBUG_SYNC(thd, "commit_one_phase_2");
1608	if (ha_info)
1609	{
1610	for (; ha_info; ha_info= ha_info_next)
1611	{
1612	int err;
1613	handlerton *ht= ha_info->ht();
1614	if ((err= ht->commit(ht, thd, all)))
1615	{
1616	my_error(ER_ERROR_DURING_COMMIT, MYF(`0`), err);
1617	error=`1`;
1618	}
1619	/ Should this be done only if is_real_trans is set ? /
1620	status_var_increment(thd->status_var.ha_commit_count);
1621	if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write())
1622	++count;
1623	ha_info_next= ha_info->next();
1624	ha_info->reset(); / keep it conveniently zero-filled /
1625	}
1626	trans->ha_list= `0`;
1627	trans->no_2pc=`0`;
1628	if (all)
1629	{
1630	#ifdef HAVE_QUERY_CACHE
1631	if (thd->transaction.changed_tables)
1632	query_cache.invalidate(thd, thd->transaction.changed_tables);
1633	#endif
1634	}
1635	}
1636	/ Free resources and perform other cleanup even for 'empty' transactions. /
1637	if (is_real_trans)
1638	{
1639	thd->has_waiter= false;
1640	thd->transaction.cleanup();
1641	if (count >= `2`)
1642	statistic_increment(transactions_multi_engine, LOCK_status);
1643	}
1644
1645	DBUG_RETURN(error);
1646	}
1647
1648
1649	int ha_rollback_trans(THD thd, bool* all)
1650	{
1651	int error=`0`;
1652	THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1653	Ha_trx_info ha_info= trans->ha_list, ha_info_next;
1654	/*
1655	"real" is a nick name for a transaction for which a commit will
1656	make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1657	transaction is not 'real': even though it's possible to commit it,
1658	the changes are not durable as they might be rolled back if the
1659	enclosing 'all' transaction is rolled back.
1660	We establish the value of 'is_real_trans' by checking
1661	if it's an explicit COMMIT or BEGIN statement, or implicit
1662	commit issued by DDL (in these cases all == TRUE),
1663	or if we're running in autocommit mode (it's only in the autocommit mode
1664	ha_commit_one_phase() is called with an empty
1665	transaction.all.ha_list, see why in trans_register_ha()).
1666	*/
1667	bool is_real_trans=all \|\| thd->transaction.all.ha_list == `0`;
1668	DBUG_ENTER("ha_rollback_trans");
1669
1670	/*
1671	We must not rollback the normal transaction if a statement
1672	transaction is pending.
1673	*/
1674	DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL \|\|
1675	trans == &thd->transaction.stmt);
1676
1677	#ifdef HAVE_REPLICATION
1678	if (is_real_trans)
1679	{
1680	/*
1681	In parallel replication, if we need to rollback during commit, we must
1682	first inform following transactions that we are going to abort our commit
1683	attempt. Otherwise those following transactions can run too early, and
1684	possibly cause replication to fail. See comments in retry_event_group().
1685
1686	There were several bugs with this in the past that were very hard to
1687	track down (MDEV-7458, MDEV-8302). So we add here an assertion for
1688	rollback without signalling following transactions. And in release
1689	builds, we explicitly do the signalling before rolling back.
1690	*/
1691	DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit));
1692	if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
1693	thd->rgi_slave->unmark_start_commit();
1694	}
1695	#endif
1696
1697	if (thd->in_sub_stmt)
1698	{
1699	DBUG_ASSERT(`0`);
1700	/*
1701	If we are inside stored function or trigger we should not commit or
1702	rollback current statement transaction. See comment in ha_commit_trans()
1703	call for more information.
1704	*/
1705	if (!all)
1706	DBUG_RETURN(`0`);
1707	my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(`0`));
1708	DBUG_RETURN(`1`);
1709	}
1710
1711	if (ha_info)
1712	{
1713	/ Close all cursors that can not survive ROLLBACK /
1714	if (is_real_trans) / not a statement commit /
1715	thd->stmt_map.close_transient_cursors();
1716
1717	for (; ha_info; ha_info= ha_info_next)
1718	{
1719	int err;
1720	handlerton *ht= ha_info->ht();
1721	if ((err= ht->rollback(ht, thd, all)))
1722	{ // cannot happen
1723	my_error(ER_ERROR_DURING_ROLLBACK, MYF(`0`), err);
1724	error=`1`;
1725	#ifdef WITH_WSREP
1726	WSREP_WARN("handlerton rollback failed, thd %llu %lld conf %d SQL %s",
1727	thd->thread_id, thd->query_id, thd->wsrep_conflict_state,
1728	thd->query());
1729	#endif /* WITH_WSREP */
1730	}
1731	status_var_increment(thd->status_var.ha_rollback_count);
1732	ha_info_next= ha_info->next();
1733	ha_info->reset(); / keep it conveniently zero-filled /
1734	}
1735	trans->ha_list= `0`;
1736	trans->no_2pc=`0`;
1737	}
1738
1739	/*
1740	Thanks to possibility of MDL deadlock rollback request can come even if
1741	transaction hasn't been started in any transactional storage engine.
1742	*/
1743	if (is_real_trans && thd->transaction_rollback_request &&
1744	thd->transaction.xid_state.xa_state != XA_NOTR)
1745	thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno();
1746
1747	/ Always cleanup. Even if nht==0. There may be savepoints. /
1748	if (is_real_trans)
1749	{
1750	thd->has_waiter= false;
1751	thd->transaction.cleanup();
1752	}
1753	if (all)
1754	thd->transaction_rollback_request= FALSE;
1755
1756	/*
1757	If a non-transactional table was updated, warn; don't warn if this is a
1758	slave thread (because when a slave thread executes a ROLLBACK, it has
1759	been read from the binary log, so it's 100% sure and normal to produce
1760	error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1761	slave SQL thread, it would not stop the thread but just be printed in
1762	the error log; but we don't want users to wonder why they have this
1763	message in the error log, so we don't send it.
1764
1765	We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
1766	it doesn't matter if a warning is pushed to a system thread or not:
1767	No one will see it...
1768	*/
1769	if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1770	!thd->slave_thread && thd->killed < KILL_CONNECTION)
1771	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1772	ER_WARNING_NOT_COMPLETE_ROLLBACK,
1773	ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));
1774	#ifdef HAVE_REPLICATION
1775	repl_semisync_master.wait_after_rollback(thd, all);
1776	#endif
1777	DBUG_RETURN(error);
1778	}
1779
1780
1781	struct xahton_st {
1782	XID *xid;
1783	int result;
1784	};
1785
1786	static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1787	void *arg)
1788	{
1789	handlerton *hton= plugin_hton(plugin);
1790	if (hton->state == SHOW_OPTION_YES && hton->recover)
1791	{
1792	hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1793	((struct xahton_st *)arg)->result= `0`;
1794	}
1795	return FALSE;
1796	}
1797
1798	static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1799	void *arg)
1800	{
1801	handlerton *hton= plugin_hton(plugin);
1802	if (hton->state == SHOW_OPTION_YES && hton->recover)
1803	{
1804	hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1805	((struct xahton_st *)arg)->result= `0`;
1806	}
1807	return FALSE;
1808	}
1809
1810
1811	int ha_commit_or_rollback_by_xid(XID xid, bool* commit)
1812	{
1813	struct xahton_st xaop;
1814	xaop.xid= xid;
1815	xaop.result= `1`;
1816
1817	plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1818	MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1819
1820	return xaop.result;
1821	}
1822
1823
1824	#ifndef DBUG_OFF
1825	/**
1826	@note
1827	This does not need to be multi-byte safe or anything
1828	*/
1829	static char* xid_to_str(char buf, XID xid)
1830	{
1831	int i;
1832	char *s=buf;
1833	*s++=`'\''`;
1834	for (i=`0`; i < xid->gtrid_length+xid->bqual_length; i++)
1835	{
1836	uchar c=(uchar)xid->data[i];
1837	/ is_next_dig is set if next character is a number /
1838	bool is_next_dig= FALSE;
1839	if (i < XIDDATASIZE)
1840	{
1841	char ch= xid->data[i+`1`];
1842	is_next_dig= (ch >= `'0'` && ch <=`'9'`);
1843	}
1844	if (i == xid->gtrid_length)
1845	{
1846	*s++=`'\''`;
1847	if (xid->bqual_length)
1848	{
1849	*s++=`'.'`;
1850	*s++=`'\''`;
1851	}
1852	}
1853	if (c < `32` \|\| c > `126`)
1854	{
1855	*s++=`'\\'`;
1856	/*
1857	If next character is a number, write current character with
1858	3 octal numbers to ensure that the next number is not seen
1859	as part of the octal number
1860	*/
1861	if (c > `077` \|\| is_next_dig)
1862	*s++=_dig_vec_lower[c >> `6`];
1863	if (c > `007` \|\| is_next_dig)
1864	*s++=_dig_vec_lower[(c >> `3`) & `7`];
1865	*s++=_dig_vec_lower[c & `7`];
1866	}
1867	else
1868	{
1869	if (c == `'\''` \|\| c == `'\\'`)
1870	*s++=`'\\'`;
1871	*s++=c;
1872	}
1873	}
1874	*s++=`'\''`;
1875	*s=`0`;
1876	return buf;
1877	}
1878	#endif
1879
1880	/**
1881	recover() step of xa.
1882
1883	@note
1884	there are three modes of operation:
1885	- automatic recover after a crash
1886	in this case commit_list != 0, tc_heuristic_recover==0
1887	all xids from commit_list are committed, others are rolled back
1888	- manual (heuristic) recover
1889	in this case commit_list==0, tc_heuristic_recover != 0
1890	DBA has explicitly specified that all prepared transactions should
1891	be committed (or rolled back).
1892	- no recovery (MySQL did not detect a crash)
1893	in this case commit_list==0, tc_heuristic_recover == 0
1894	there should be no prepared transactions in this case.
1895	*/
1896	struct xarecover_st
1897	{
1898	int len, found_foreign_xids, found_my_xids;
1899	XID *list;
1900	HASH *commit_list;
1901	bool dry_run;
1902	};
1903
1904	static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1905	void *arg)
1906	{
1907	handlerton *hton= plugin_hton(plugin);
1908	struct xarecover_st info= (struct* xarecover_st *) arg;
1909	int got;
1910
1911	if (hton->state == SHOW_OPTION_YES && hton->recover)
1912	{
1913	while ((got= hton->recover(hton, info->list, info->len)) > `0` )
1914	{
1915	sql_print_information("Found %d prepared transaction(s) in %s",
1916	got, hton_name(hton)->str);
1917	for (int i=`0`; i < got; i ++)
1918	{
1919	my_xid x= IF_WSREP(WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ?
1920	wsrep_xid_seqno(info->list[i]) :
1921	info->list[i].get_my_xid(),
1922	info->list[i].get_my_xid());
1923	if (!x) // not "mine" - that is generated by external TM
1924	{
1925	#ifndef DBUG_OFF
1926	char buf[XIDDATASIZE`4`+`6`]; // see xid_to_str*
1927	DBUG_PRINT("info", ("ignore xid %s", xid_to_str(buf, info->list+i)));
1928	#endif
1929	xid_cache_insert(info->list+i, XA_PREPARED);
1930	info->found_foreign_xids++;
1931	continue;
1932	}
1933	if (info->dry_run)
1934	{
1935	info->found_my_xids++;
1936	continue;
1937	}
1938	// recovery mode
1939	if (info->commit_list ?
1940	my_hash_search(info->commit_list, (uchar )&x, sizeof*(x)) != `0` :
1941	tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1942	{
1943	#ifndef DBUG_OFF
1944	int rc=
1945	#endif
1946	hton->commit_by_xid(hton, info->list+i);
1947	#ifndef DBUG_OFF
1948	if (rc == `0`)
1949	{
1950	char buf[XIDDATASIZE`4`+`6`]; // see xid_to_str*
1951	DBUG_PRINT("info", ("commit xid %s", xid_to_str(buf, info->list+i)));
1952	}
1953	#endif
1954	}
1955	else
1956	{
1957	#ifndef DBUG_OFF
1958	int rc=
1959	#endif
1960	hton->rollback_by_xid(hton, info->list+i);
1961	#ifndef DBUG_OFF
1962	if (rc == `0`)
1963	{
1964	char buf[XIDDATASIZE`4`+`6`]; // see xid_to_str*
1965	DBUG_PRINT("info", ("rollback xid %s",
1966	xid_to_str(buf, info->list+i)));
1967	}
1968	#endif
1969	}
1970	}
1971	if (got < info->len)
1972	break;
1973	}
1974	}
1975	return FALSE;
1976	}
1977
1978	int ha_recover(HASH *commit_list)
1979	{
1980	struct xarecover_st info;
1981	DBUG_ENTER("ha_recover");
1982	info.found_foreign_xids= info.found_my_xids= `0`;
1983	info.commit_list= commit_list;
1984	info.dry_run= (info.commit_list==`0` && tc_heuristic_recover==`0`);
1985	info.list= NULL;
1986
1987	/ commit_list and tc_heuristic_recover cannot be set both /
1988	DBUG_ASSERT(info.commit_list==`0` \|\| tc_heuristic_recover==`0`);
1989	/ if either is set, total_ha_2pc must be set too /
1990	DBUG_ASSERT(info.dry_run \|\|
1991	(failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
1992
1993	if (total_ha_2pc <= (ulong)opt_bin_log)
1994	DBUG_RETURN(`0`);
1995
1996	if (info.commit_list)
1997	sql_print_information("Starting crash recovery...");
1998
1999	for (info.len= MAX_XID_LIST_SIZE ;
2000	info.list==`0` && info.len > MIN_XID_LIST_SIZE; info.len/=`2`)
2001	{
2002	info.list=(XID )my_malloc(info.lensizeof(XID), MYF(`0`));
2003	}
2004	if (!info.list)
2005	{
2006	sql_print_error(ER(ER_OUTOFMEMORY),
2007	static_cast<int>(info.len*sizeof(XID)));
2008	DBUG_RETURN(`1`);
2009	}
2010
2011	plugin_foreach(NULL, xarecover_handlerton,
2012	MYSQL_STORAGE_ENGINE_PLUGIN, &info);
2013
2014	my_free(info.list);
2015	if (info.found_foreign_xids)
2016	sql_print_warning("Found %d prepared XA transactions",
2017	info.found_foreign_xids);
2018	if (info.dry_run && info.found_my_xids)
2019	{
2020	sql_print_error("Found %d prepared transactions! It means that mysqld was "
2021	"not shut down properly last time and critical recovery "
2022	"information (last binlog or %s file) was manually deleted "
2023	"after a crash. You have to start mysqld with "
2024	"--tc-heuristic-recover switch to commit or rollback "
2025	"pending transactions.",
2026	info.found_my_xids, opt_tc_log_file);
2027	DBUG_RETURN(`1`);
2028	}
2029	if (info.commit_list)
2030	sql_print_information("Crash recovery finished.");
2031	DBUG_RETURN(`0`);
2032	}
2033
2034	/**
2035	return the XID as it appears in the SQL function's arguments.
2036	So this string can be passed to XA START, XA PREPARE etc...
2037
2038	@note
2039	the 'buf' has to have space for at least SQL_XIDSIZE bytes.
2040	*/
2041
2042
2043	/*
2044	'a'..'z' 'A'..'Z', '0'..'9'
2045	and '-' '_' ' ' symbols don't have to be
2046	converted.
2047	*/
2048
2049	static const char xid_needs_conv[`128`]=
2050	{
2051	`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,
2052	`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,
2053	`0`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`1`,`0`,`1`,`1`,
2054	`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`1`,`1`,`1`,`1`,`1`,`1`,
2055	`1`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,
2056	`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`1`,`1`,`1`,`1`,`0`,
2057	`1`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,
2058	`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`1`,`1`,`1`,`1`,`1`
2059	};
2060
2061	uint get_sql_xid(XID xid, char* *buf)
2062	{
2063	int tot_len= xid->gtrid_length + xid->bqual_length;
2064	int i;
2065	const char *orig_buf= buf;
2066
2067	for (i=`0`; i<tot_len; i++)
2068	{
2069	uchar c= ((uchar *) xid->data)[i];
2070	if (c >= `128` \|\| xid_needs_conv[c])
2071	break;
2072	}
2073
2074	if (i >= tot_len)
2075	{
2076	/ No need to convert characters to hexadecimals. /
2077	*buf++= `'\''`;
2078	memcpy(buf, xid->data, xid->gtrid_length);
2079	buf+= xid->gtrid_length;
2080	*buf++= `'\''`;
2081	if (xid->bqual_length > `0` \|\| xid->formatID != `1`)
2082	{
2083	*buf++= `','`;
2084	*buf++= `'\''`;
2085	memcpy(buf, xid->data+xid->gtrid_length, xid->bqual_length);
2086	buf+= xid->bqual_length;
2087	*buf++= `'\''`;
2088	}
2089	}
2090	else
2091	{
2092	*buf++= `'X'`;
2093	*buf++= `'\''`;
2094	for (i= `0`; i < xid->gtrid_length; i++)
2095	{
2096	buf++=_dig_vec_lower[((uchar) xid->data)[i] >> `4`];
2097	buf++=_dig_vec_lower[((uchar) xid->data)[i] & `0x0f`];
2098	}
2099	*buf++= `'\''`;
2100	if (xid->bqual_length > `0` \|\| xid->formatID != `1`)
2101	{
2102	*buf++= `','`;
2103	*buf++= `'X'`;
2104	*buf++= `'\''`;
2105	for (; i < tot_len; i++)
2106	{
2107	buf++=_dig_vec_lower[((uchar) xid->data)[i] >> `4`];
2108	buf++=_dig_vec_lower[((uchar) xid->data)[i] & `0x0f`];
2109	}
2110	*buf++= `'\''`;
2111	}
2112	}
2113
2114	if (xid->formatID != `1`)
2115	{
2116	*buf++= `','`;
2117	buf+= my_longlong10_to_str_8bit(&my_charset_bin, buf,
2118	MY_INT64_NUM_DECIMAL_DIGITS, -`10`, xid->formatID);
2119	}
2120
2121	return (uint)(buf - orig_buf);
2122	}
2123
2124
2125	/**
2126	return the list of XID's to a client, the same way SHOW commands do.
2127
2128	@note
2129	I didn't find in XA specs that an RM cannot return the same XID twice,
2130	so mysql_xa_recover does not filter XID's to ensure uniqueness.
2131	It can be easily fixed later, if necessary.
2132	*/
2133
2134	static my_bool xa_recover_callback(XID_STATE xs, Protocol protocol,
2135	char data, uint data_len, CHARSET_INFO data_cs)
2136	{
2137	if (xs->xa_state == XA_PREPARED)
2138	{
2139	protocol->prepare_for_resend();
2140	protocol->store_longlong((longlong) xs->xid.formatID, FALSE);
2141	protocol->store_longlong((longlong) xs->xid.gtrid_length, FALSE);
2142	protocol->store_longlong((longlong) xs->xid.bqual_length, FALSE);
2143	protocol->store(data, data_len, data_cs);
2144	if (protocol->write())
2145	return TRUE;
2146	}
2147	return FALSE;
2148	}
2149
2150
2151	static my_bool xa_recover_callback_short(XID_STATE xs, Protocol protocol)
2152	{
2153	return xa_recover_callback(xs, protocol, xs->xid.data,
2154	xs->xid.gtrid_length + xs->xid.bqual_length, &my_charset_bin);
2155	}
2156
2157
2158	static my_bool xa_recover_callback_verbose(XID_STATE xs, Protocol protocol)
2159	{
2160	char buf[SQL_XIDSIZE];
2161	uint len= get_sql_xid(&xs->xid, buf);
2162	return xa_recover_callback(xs, protocol, buf, len,
2163	&my_charset_utf8_general_ci);
2164	}
2165
2166
2167	bool mysql_xa_recover(THD *thd)
2168	{
2169	List<Item> field_list;
2170	Protocol *protocol= thd->protocol;
2171	MEM_ROOT *mem_root= thd->mem_root;
2172	my_hash_walk_action action;
2173	DBUG_ENTER("mysql_xa_recover");
2174
2175	field_list.push_back(new (mem_root)
2176	Item_int (thd, "formatID", `0`,
2177	MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2178	field_list.push_back(new (mem_root)
2179	Item_int (thd, "gtrid_length", `0`,
2180	MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2181	field_list.push_back(new (mem_root)
2182	Item_int (thd, "bqual_length", `0`,
2183	MY_INT32_NUM_DECIMAL_DIGITS), mem_root);
2184	{
2185	uint len;
2186	CHARSET_INFO *cs;
2187
2188	if (thd->lex->verbose)
2189	{
2190	len= SQL_XIDSIZE;
2191	cs= &my_charset_utf8_general_ci;
2192	action= (my_hash_walk_action) xa_recover_callback_verbose;
2193	}
2194	else
2195	{
2196	len= XIDDATASIZE;
2197	cs= &my_charset_bin;
2198	action= (my_hash_walk_action) xa_recover_callback_short;
2199	}
2200
2201	field_list.push_back(new (mem_root)
2202	Item_empty_string (thd, "data", len, cs), mem_root);
2203	}
2204
2205	if (protocol->send_result_set_metadata(&field_list,
2206	Protocol::SEND_NUM_ROWS \| Protocol::SEND_EOF))
2207	DBUG_RETURN(`1`);
2208
2209	if (xid_cache_iterate(thd, action, protocol))
2210	DBUG_RETURN(`1`);
2211	my_eof(thd);
2212	DBUG_RETURN(`0`);
2213	}
2214
2215	/*
2216	Called by engine to notify TC that a new commit checkpoint has been reached.
2217	See comments on handlerton method commit_checkpoint_request() for details.
2218	*/
2219	void
2220	commit_checkpoint_notify_ha(handlerton hton, void* *cookie)
2221	{
2222	tc_log->commit_checkpoint_notify(cookie);
2223	}
2224
2225
2226	/**
2227	Check if all storage engines used in transaction agree that after
2228	rollback to savepoint it is safe to release MDL locks acquired after
2229	savepoint creation.
2230
2231	@param thd The client thread that executes the transaction.
2232
2233	@return true - It is safe to release MDL locks.
2234	false - If it is not.
2235	*/
2236	bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2237	{
2238	Ha_trx_info *ha_info;
2239	THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2240	&thd->transaction.all);
2241
2242	DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2243
2244	/**
2245	Checking whether it is safe to release metadata locks after rollback to
2246	savepoint in all the storage engines that are part of the transaction.
2247	*/
2248	for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2249	{
2250	handlerton *ht= ha_info->ht();
2251	DBUG_ASSERT(ht);
2252
2253	if (ht->savepoint_rollback_can_release_mdl == `0` \|\|
2254	ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2255	DBUG_RETURN(false);
2256	}
2257
2258	DBUG_RETURN(true);
2259	}
2260
2261	int ha_rollback_to_savepoint(THD thd, SAVEPOINT sv)
2262	{
2263	int error=`0`;
2264	THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2265	&thd->transaction.all);
2266	Ha_trx_info ha_info, ha_info_next;
2267
2268	DBUG_ENTER("ha_rollback_to_savepoint");
2269
2270	trans->no_2pc=`0`;
2271	/*
2272	rolling back to savepoint in all storage engines that were part of the
2273	transaction when the savepoint was set
2274	*/
2275	for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2276	{
2277	int err;
2278	handlerton *ht= ha_info->ht();
2279	DBUG_ASSERT(ht);
2280	DBUG_ASSERT(ht->savepoint_set != `0`);
2281	if ((err= ht->savepoint_rollback(ht, thd,
2282	(uchar *)(sv+`1`)+ht->savepoint_offset)))
2283	{ // cannot happen
2284	my_error(ER_ERROR_DURING_ROLLBACK, MYF(`0`), err);
2285	error=`1`;
2286	}
2287	status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2288	trans->no_2pc\|= ht->prepare == `0`;
2289	}
2290	/*
2291	rolling back the transaction in all storage engines that were not part of
2292	the transaction when the savepoint was set
2293	*/
2294	for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2295	ha_info= ha_info_next)
2296	{
2297	int err;
2298	handlerton *ht= ha_info->ht();
2299	if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2300	{ // cannot happen
2301	my_error(ER_ERROR_DURING_ROLLBACK, MYF(`0`), err);
2302	error=`1`;
2303	}
2304	status_var_increment(thd->status_var.ha_rollback_count);
2305	ha_info_next= ha_info->next();
2306	ha_info->reset(); / keep it conveniently zero-filled /
2307	}
2308	trans->ha_list= sv->ha_list;
2309	DBUG_RETURN(error);
2310	}
2311
2312	/**
2313	@note
2314	according to the sql standard (ISO/IEC 9075-2:2003)
2315	section "4.33.4 SQL-statements and transaction states",
2316	SAVEPOINT is not* transaction-initiating SQL-statement*
2317	*/
2318	int ha_savepoint(THD thd, SAVEPOINT sv)
2319	{
2320	int error=`0`;
2321	THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2322	&thd->transaction.all);
2323	Ha_trx_info *ha_info= trans->ha_list;
2324	DBUG_ENTER("ha_savepoint");
2325
2326	for (; ha_info; ha_info= ha_info->next())
2327	{
2328	int err;
2329	handlerton *ht= ha_info->ht();
2330	DBUG_ASSERT(ht);
2331	if (! ht->savepoint_set)
2332	{
2333	my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(`0`), "SAVEPOINT");
2334	error=`1`;
2335	break;
2336	}
2337	if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+`1`)+ht->savepoint_offset)))
2338	{ // cannot happen
2339	my_error(ER_GET_ERRNO, MYF(`0`), err, hton_name(ht)->str);
2340	error=`1`;
2341	}
2342	status_var_increment(thd->status_var.ha_savepoint_count);
2343	}
2344	/*
2345	Remember the list of registered storage engines. All new
2346	engines are prepended to the beginning of the list.
2347	*/
2348	sv->ha_list= trans->ha_list;
2349
2350	DBUG_RETURN(error);
2351	}
2352
2353	int ha_release_savepoint(THD thd, SAVEPOINT sv)
2354	{
2355	int error=`0`;
2356	Ha_trx_info *ha_info= sv->ha_list;
2357	DBUG_ENTER("ha_release_savepoint");
2358
2359	for (; ha_info; ha_info= ha_info->next())
2360	{
2361	int err;
2362	handlerton *ht= ha_info->ht();
2363	/ Savepoint life time is enclosed into transaction life time. /
2364	DBUG_ASSERT(ht);
2365	if (!ht->savepoint_release)
2366	continue;
2367	if ((err= ht->savepoint_release(ht, thd,
2368	(uchar *)(sv+`1`) + ht->savepoint_offset)))
2369	{ // cannot happen
2370	my_error(ER_GET_ERRNO, MYF(`0`), err, hton_name(ht)->str);
2371	error=`1`;
2372	}
2373	}
2374	DBUG_RETURN(error);
2375	}
2376
2377
2378	static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2379	void *arg)
2380	{
2381	handlerton *hton= plugin_hton(plugin);
2382	if (hton->state == SHOW_OPTION_YES &&
2383	hton->start_consistent_snapshot)
2384	{
2385	if (hton->start_consistent_snapshot(hton, thd))
2386	return TRUE;
2387	((bool* )arg)= false*;
2388	}
2389	return FALSE;
2390	}
2391
2392	int ha_start_consistent_snapshot(THD *thd)
2393	{
2394	bool err, warn= true;
2395
2396	/*
2397	Holding the LOCK_commit_ordered mutex ensures that we get the same
2398	snapshot for all engines (including the binary log). This allows us
2399	among other things to do backups with
2400	START TRANSACTION WITH CONSISTENT SNAPSHOT and
2401	have a consistent binlog position.
2402	*/
2403	mysql_mutex_lock(&LOCK_commit_ordered);
2404	err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2405	mysql_mutex_unlock(&LOCK_commit_ordered);
2406
2407	if (err)
2408	{
2409	ha_rollback_trans(thd, true);
2410	return `1`;
2411	}
2412
2413	/*
2414	Same idea as when one wants to CREATE TABLE in one engine which does not
2415	exist:
2416	*/
2417	if (warn)
2418	push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2419	"This MariaDB server does not support any "
2420	"consistent-read capable storage engine");
2421	return `0`;
2422	}
2423
2424
2425	static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2426	void *arg)
2427	{
2428	handlerton *hton= plugin_hton(plugin);
2429	if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2430	hton->flush_logs(hton))
2431	return TRUE;
2432	return FALSE;
2433	}
2434
2435
2436	bool ha_flush_logs(handlerton *db_type)
2437	{
2438	if (db_type == NULL)
2439	{
2440	if (plugin_foreach(NULL, flush_handlerton,
2441	MYSQL_STORAGE_ENGINE_PLUGIN, `0`))
2442	return TRUE;
2443	}
2444	else
2445	{
2446	if (db_type->state != SHOW_OPTION_YES \|\|
2447	(db_type->flush_logs && db_type->flush_logs(db_type)))
2448	return TRUE;
2449	}
2450	return FALSE;
2451	}
2452
2453
2454	/**
2455	@brief make canonical filename
2456
2457	@param[in] file table handler
2458	@param[in] path original path
2459	@param[out] tmp_path buffer for canonized path
2460
2461	@details Lower case db name and table name path parts for
2462	non file based tables when lower_case_table_names
2463	is 2 (store as is, compare in lower case).
2464	Filesystem path prefix (mysql_data_home or tmpdir)
2465	is left intact.
2466
2467	@note tmp_path may be left intact if no conversion was
2468	performed.
2469
2470	@retval canonized path
2471
2472	@todo This may be done more efficiently when table path
2473	gets built. Convert this function to something like
2474	ASSERT_CANONICAL_FILENAME.
2475	*/
2476	const char get_canonical_filename(handler file, const char *path,
2477	char *tmp_path)
2478	{
2479	uint i;
2480	if (lower_case_table_names != `2` \|\| (file->ha_table_flags() & HA_FILE_BASED))
2481	return path;
2482
2483	for (i= `0`; i <= mysql_tmpdir_list.max; i++)
2484	{
2485	if (is_prefix(path, mysql_tmpdir_list.list[i]))
2486	return path;
2487	}
2488
2489	/ Ensure that table handler get path in lower case /
2490	if (tmp_path != path)
2491	strmov(tmp_path, path);
2492
2493	/*
2494	we only should turn into lowercase database/table part
2495	so start the process after homedirectory
2496	*/
2497	my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2498	return tmp_path;
2499	}
2500
2501
2502	/* delete a table in the engine*
2503
2504	@note
2505	ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors.
2506	The .frm file will be deleted only if we return 0.
2507	*/
2508	int ha_delete_table(THD thd, handlerton table_type, const char *path,
2509	const LEX_CSTRING db, const* LEX_CSTRING alias, bool* generate_warning)
2510	{
2511	handler *file;
2512	char tmp_path[FN_REFLEN];
2513	int error;
2514	TABLE dummy_table;
2515	TABLE_SHARE dummy_share;
2516	DBUG_ENTER("ha_delete_table");
2517
2518	/ table_type is NULL in ALTER TABLE when renaming only .frm files /
2519	if (table_type == NULL \|\| table_type == view_pseudo_hton \|\|
2520	! (file=get_new_handler((TABLE_SHARE*)`0`, thd->mem_root, table_type)))
2521	DBUG_RETURN(`0`);
2522
2523	bzero((char) &dummy_table, sizeof*(dummy_table));
2524	bzero((char) &dummy_share, sizeof*(dummy_share));
2525	dummy_table.s= &dummy_share;
2526
2527	path= get_canonical_filename(file, path, tmp_path);
2528	if (unlikely((error= file->ha_delete_table(path))))
2529	{
2530	/*
2531	it's not an error if the table doesn't exist in the engine.
2532	warn the user, but still report DROP being a success
2533	*/
2534	bool intercept= error == ENOENT \|\| error == HA_ERR_NO_SUCH_TABLE;
2535
2536	if (!intercept \|\| generate_warning)
2537	{
2538	/ Fill up strucutures that print_error may need /
2539	dummy_share.path.str= (char*) path;
2540	dummy_share.path.length= strlen(path);
2541	dummy_share.normalized_path = dummy_share.path;
2542	dummy_share.db = *db;
2543	dummy_share.table_name = *alias;
2544	dummy_table.alias.set(alias->str, alias->length, table_alias_charset);
2545	file->change_table_ptr(&dummy_table, &dummy_share);
2546	file->print_error(error, MYF(intercept ? ME_JUST_WARNING : `0`));
2547	}
2548	if (intercept)
2549	error= `0`;
2550	}
2551	delete file;
2552
2553	DBUG_RETURN(error);
2554	}
2555
2556	/****************************************************************************
2557	** General handler functions
2558	****************************************************************************/
2559
2560
2561	/**
2562	Clone a handler
2563
2564	@param name name of new table instance
2565	@param mem_root Where 'this->ref' should be allocated. It can't be
2566	in this->table->mem_root as otherwise we will not be
2567	able to reclaim that memory when the clone handler
2568	object is destroyed.
2569	*/
2570
2571	handler handler::clone(const* char name, MEM_ROOT mem_root)
2572	{
2573	handler *new_handler= get_new_handler(table->s, mem_root, ht);
2574
2575	if (!new_handler)
2576	return NULL;
2577	if (new_handler->set_ha_share_ref(ha_share))
2578	goto err;
2579
2580	/*
2581	TODO: Implement a more efficient way to have more than one index open for
2582	the same table instance. The ha_open call is not cachable for clone.
2583
2584	This is not critical as the engines already have the table open
2585	and should be able to use the original instance of the table.
2586	*/
2587	if (new_handler->ha_open(table, name, table->db_stat,
2588	HA_OPEN_IGNORE_IF_LOCKED, mem_root))
2589	goto err;
2590
2591	return new_handler;
2592
2593	err:
2594	delete new_handler;
2595	return NULL;
2596	}
2597
2598	LEX_CSTRING *handler::engine_name()
2599	{
2600	return hton_name(ht);
2601	}
2602
2603
2604	double handler::keyread_time(uint index, uint ranges, ha_rows rows)
2605	{
2606	/*
2607	It is assumed that we will read trough the whole key range and that all
2608	key blocks are half full (normally things are much better). It is also
2609	assumed that each time we read the next key from the index, the handler
2610	performs a random seek, thus the cost is proportional to the number of
2611	blocks read. This model does not take into account clustered indexes -
2612	engines that support that (e.g. InnoDB) may want to overwrite this method.
2613	The model counts in the time to read index entries from cache.
2614	*/
2615	size_t len= table->key_info[index].key_length + ref_length;
2616	if (index == table->s->primary_key && table->file->primary_key_is_clustered())
2617	len= table->s->stored_rec_length;
2618	double keys_per_block= (stats.block_size/`2.0`/len+`1`);
2619	return (rows + keys_per_block-`1`)/ keys_per_block +
2620	len*rows/(stats.block_size+`1`)/TIME_FOR_COMPARE ;
2621	}
2622
2623	void *handler::ha_data(THD thd) const
2624	{
2625	return thd_ha_data(thd, ht);
2626	}
2627
2628	THD handler::ha_thd(void) const*
2629	{
2630	DBUG_ASSERT(!table \|\| !table->in_use \|\| table->in_use == current_thd);
2631	return (table && table->in_use) ? table->in_use : current_thd;
2632	}
2633
2634	void handler::unbind_psi()
2635	{
2636	/*
2637	Notify the instrumentation that this table is not owned
2638	by this thread any more.
2639	*/
2640	PSI_CALL_unbind_table(m_psi);
2641	}
2642
2643	void handler::rebind_psi()
2644	{
2645	/*
2646	Notify the instrumentation that this table is now owned
2647	by this thread.
2648	*/
2649	m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi);
2650	}
2651
2652
2653	PSI_table_share handler::ha_table_share_psi() const*
2654	{
2655	return table_share->m_psi;
2656	}
2657
2658	/* @brief*
2659	Open database-handler.
2660
2661	IMPLEMENTATION
2662	Try O_RDONLY if cannot open as O_RDWR
2663	Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2664	*/
2665	int handler::ha_open(TABLE table_arg, const* char name, int* mode,
2666	uint test_if_locked, MEM_ROOT *mem_root,
2667	List<String> *partitions_to_open)
2668	{
2669	int error;
2670	DBUG_ENTER("handler::ha_open");
2671	DBUG_PRINT("enter",
2672	("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2673	name, ht->db_type, table_arg->db_stat, mode,
2674	test_if_locked));
2675
2676	table= table_arg;
2677	DBUG_ASSERT(table->s == table_share);
2678	DBUG_ASSERT(m_lock_type == F_UNLCK);
2679	DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2680	DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2681
2682	set_partitions_to_open(partitions_to_open);
2683
2684	if (unlikely((error=open(name,mode,test_if_locked))))
2685	{
2686	if ((error == EACCES \|\| error == EROFS) && mode == O_RDWR &&
2687	(table->db_stat & HA_TRY_READ_ONLY))
2688	{
2689	table->db_stat\|=HA_READ_ONLY;
2690	error=open(name,O_RDONLY,test_if_locked);
2691	}
2692	}
2693	if (unlikely(error))
2694	{
2695	my_errno= error; / Safeguard /
2696	DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2697	}
2698	else
2699	{
2700	DBUG_ASSERT(m_psi == NULL);
2701	DBUG_ASSERT(table_share != NULL);
2702	/*
2703	Do not call this for partitions handlers, since it may take too much
2704	resources.
2705	So only use the m_psi on table level, not for individual partitions.
2706	*/
2707	if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2708	{
2709	m_psi= PSI_CALL_open_table(ha_table_share_psi(), this);
2710	}
2711
2712	if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2713	table->db_stat\|=HA_READ_ONLY;
2714	(void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2715
2716	/ Allocate ref in thd or on the table's mem_root /
2717	if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root,
2718	ALIGN_SIZE(ref_length)*`2`)))
2719	{
2720	ha_close();
2721	error=HA_ERR_OUT_OF_MEM;
2722	}
2723	else
2724	dup_ref=ref+ALIGN_SIZE(ref_length);
2725	cached_table_flags= table_flags();
2726	}
2727	reset_statistics();
2728	internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
2729
2730	DBUG_RETURN(error);
2731	}
2732
2733	int handler::ha_close(void)
2734	{
2735	DBUG_ENTER("ha_close");
2736	/*
2737	Increment global statistics for temporary tables.
2738	In_use is 0 for tables that was closed from the table cache.
2739	*/
2740	if (table->in_use)
2741	status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
2742	PSI_CALL_close_table(m_psi);
2743	m_psi= NULL; / instrumentation handle, invalid after close_table() /
2744
2745	/ Detach from ANALYZE tracker /
2746	tracker= NULL;
2747
2748	DBUG_ASSERT(m_lock_type == F_UNLCK);
2749	DBUG_ASSERT(inited == NONE);
2750	DBUG_RETURN(close());
2751	}
2752
2753
2754	int handler::ha_rnd_next(uchar *buf)
2755	{
2756	int result;
2757	DBUG_ENTER("handler::ha_rnd_next");
2758	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2759	m_lock_type != F_UNLCK);
2760	DBUG_ASSERT(inited == RND);
2761
2762	do
2763	{
2764	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, `0`,
2765	{ result= rnd_next(buf); })
2766	if (result != HA_ERR_RECORD_DELETED)
2767	break;
2768	status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count);
2769	} while (!table->in_use->check_killed());
2770
2771	if (result == HA_ERR_RECORD_DELETED)
2772	result= HA_ERR_ABORTED_BY_USER;
2773	else
2774	{
2775	if (!result)
2776	{
2777	update_rows_read();
2778	if (table->vfield && buf == table->record[`0`])
2779	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2780	}
2781	increment_statistics(&SSV::ha_read_rnd_next_count);
2782	}
2783
2784	table->status=result ? STATUS_NOT_FOUND: `0`;
2785	DBUG_RETURN(result);
2786	}
2787
2788	int handler::ha_rnd_pos(uchar buf, uchar pos)
2789	{
2790	int result;
2791	DBUG_ENTER("handler::ha_rnd_pos");
2792	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2793	m_lock_type != F_UNLCK);
2794	/ TODO: Find out how to solve ha_rnd_pos when finding duplicate update. /
2795	/ DBUG_ASSERT(inited == RND); /
2796
2797	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, `0`,
2798	{ result= rnd_pos(buf, pos); })
2799	increment_statistics(&SSV::ha_read_rnd_count);
2800	if (result == HA_ERR_RECORD_DELETED)
2801	result= HA_ERR_KEY_NOT_FOUND;
2802	else if (!result)
2803	{
2804	update_rows_read();
2805	if (table->vfield && buf == table->record[`0`])
2806	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2807	}
2808	table->status=result ? STATUS_NOT_FOUND: `0`;
2809	DBUG_RETURN(result);
2810	}
2811
2812	int handler::ha_index_read_map(uchar buf, const* uchar *key,
2813	key_part_map keypart_map,
2814	enum ha_rkey_function find_flag)
2815	{
2816	int result;
2817	DBUG_ENTER("handler::ha_index_read_map");
2818	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2819	m_lock_type != F_UNLCK);
2820	DBUG_ASSERT(inited==INDEX);
2821
2822	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2823	{ result= index_read_map(buf, key, keypart_map, find_flag); })
2824	increment_statistics(&SSV::ha_read_key_count);
2825	if (!result)
2826	{
2827	update_index_statistics();
2828	if (table->vfield && buf == table->record[`0`])
2829	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2830	}
2831	table->status=result ? STATUS_NOT_FOUND: `0`;
2832	DBUG_RETURN(result);
2833	}
2834
2835	/*
2836	@note: Other index lookup/navigation functions require prior
2837	handler->index_init() call. This function is different, it requires
2838	that the scan is not initialized, and accepts "uint index" as an argument.
2839	*/
2840
2841	int handler::ha_index_read_idx_map(uchar buf, uint index, const* uchar *key,
2842	key_part_map keypart_map,
2843	enum ha_rkey_function find_flag)
2844	{
2845	int result;
2846	DBUG_ASSERT(inited==NONE);
2847	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2848	m_lock_type != F_UNLCK);
2849	DBUG_ASSERT(end_range == NULL);
2850	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, index, `0`,
2851	{ result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
2852	increment_statistics(&SSV::ha_read_key_count);
2853	if (!result)
2854	{
2855	update_rows_read();
2856	index_rows_read[index]++;
2857	if (table->vfield && buf == table->record[`0`])
2858	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2859	}
2860	table->status=result ? STATUS_NOT_FOUND: `0`;
2861	return result;
2862	}
2863
2864	int handler::ha_index_next(uchar * buf)
2865	{
2866	int result;
2867	DBUG_ENTER("handler::ha_index_next");
2868	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2869	m_lock_type != F_UNLCK);
2870	DBUG_ASSERT(inited==INDEX);
2871
2872	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2873	{ result= index_next(buf); })
2874	increment_statistics(&SSV::ha_read_next_count);
2875	if (!result)
2876	{
2877	update_index_statistics();
2878	if (table->vfield && buf == table->record[`0`])
2879	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2880	}
2881	table->status=result ? STATUS_NOT_FOUND: `0`;
2882	DBUG_RETURN(result);
2883	}
2884
2885	int handler::ha_index_prev(uchar * buf)
2886	{
2887	int result;
2888	DBUG_ENTER("handler::ha_index_prev");
2889	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2890	m_lock_type != F_UNLCK);
2891	DBUG_ASSERT(inited==INDEX);
2892
2893	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2894	{ result= index_prev(buf); })
2895	increment_statistics(&SSV::ha_read_prev_count);
2896	if (!result)
2897	{
2898	update_index_statistics();
2899	if (table->vfield && buf == table->record[`0`])
2900	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2901	}
2902	table->status=result ? STATUS_NOT_FOUND: `0`;
2903	DBUG_RETURN(result);
2904	}
2905
2906	int handler::ha_index_first(uchar * buf)
2907	{
2908	int result;
2909	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2910	m_lock_type != F_UNLCK);
2911	DBUG_ASSERT(inited==INDEX);
2912
2913	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2914	{ result= index_first(buf); })
2915	increment_statistics(&SSV::ha_read_first_count);
2916	if (!result)
2917	{
2918	update_index_statistics();
2919	if (table->vfield && buf == table->record[`0`])
2920	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2921	}
2922	table->status=result ? STATUS_NOT_FOUND: `0`;
2923	return result;
2924	}
2925
2926	int handler::ha_index_last(uchar * buf)
2927	{
2928	int result;
2929	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2930	m_lock_type != F_UNLCK);
2931	DBUG_ASSERT(inited==INDEX);
2932
2933	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2934	{ result= index_last(buf); })
2935	increment_statistics(&SSV::ha_read_last_count);
2936	if (!result)
2937	{
2938	update_index_statistics();
2939	if (table->vfield && buf == table->record[`0`])
2940	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2941	}
2942	table->status=result ? STATUS_NOT_FOUND: `0`;
2943	return result;
2944	}
2945
2946	int handler::ha_index_next_same(uchar buf, const* uchar *key, uint keylen)
2947	{
2948	int result;
2949	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
2950	m_lock_type != F_UNLCK);
2951	DBUG_ASSERT(inited==INDEX);
2952
2953	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, `0`,
2954	{ result= index_next_same(buf, key, keylen); })
2955	increment_statistics(&SSV::ha_read_next_count);
2956	if (!result)
2957	{
2958	update_index_statistics();
2959	if (table->vfield && buf == table->record[`0`])
2960	table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2961	}
2962	table->status=result ? STATUS_NOT_FOUND: `0`;
2963	return result;
2964	}
2965
2966
2967	bool handler::ha_was_semi_consistent_read()
2968	{
2969	bool result= was_semi_consistent_read();
2970	if (result)
2971	increment_statistics(&SSV::ha_read_retry_count);
2972	return result;
2973	}
2974
2975	/ Initialize handler for random reading, with error handling /
2976
2977	int handler::ha_rnd_init_with_error(bool scan)
2978	{
2979	int error;
2980	if (likely(!(error= ha_rnd_init(scan))))
2981	return `0`;
2982	table->file->print_error(error, MYF(`0`));
2983	return error;
2984	}
2985
2986
2987	/**
2988	Read first row (only) from a table. Used for reading tables with
2989	only one row, either based on table statistics or if table is a SEQUENCE.
2990
2991	This is never called for normal InnoDB tables, as these table types
2992	does not have HA_STATS_RECORDS_IS_EXACT set.
2993	*/
2994	int handler::read_first_row(uchar * buf, uint primary_key)
2995	{
2996	int error;
2997	DBUG_ENTER("handler::read_first_row");
2998
2999	/*
3000	If there is very few deleted rows in the table, find the first row by
3001	scanning the table.
3002	TODO remove the test for HA_READ_ORDER
3003	*/
3004	if (stats.deleted < `10` \|\| primary_key >= MAX_KEY \|\|
3005	!(index_flags(primary_key, `0`, `0`) & HA_READ_ORDER))
3006	{
3007	if (likely(!(error= ha_rnd_init(`1`))))
3008	{
3009	error= ha_rnd_next(buf);
3010	const int end_error= ha_rnd_end();
3011	if (likely(!error))
3012	error= end_error;
3013	}
3014	}
3015	else
3016	{
3017	/ Find the first row through the primary key /
3018	if (likely(!(error= ha_index_init(primary_key, `0`))))
3019	{
3020	error= ha_index_first(buf);
3021	const int end_error= ha_index_end();
3022	if (likely(!error))
3023	error= end_error;
3024	}
3025	}
3026	DBUG_RETURN(error);
3027	}
3028
3029	/**
3030	Generate the next auto-increment number based on increment and offset.
3031	computes the lowest number
3032	- strictly greater than "nr"
3033	- of the form: auto_increment_offset + N auto_increment_increment*
3034	If overflow happened then return MAX_ULONGLONG value as an
3035	indication of overflow.
3036	In most cases increment= offset= 1, in which case we get:
3037	@verbatim 1,2,3,4,5,... @endverbatim
3038	If increment=10 and offset=5 and previous number is 1, we get:
3039	@verbatim 1,5,15,25,35,... @endverbatim
3040	*/
3041	inline ulonglong
3042	compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3043	{
3044	const ulonglong save_nr= nr;
3045
3046	if (variables->auto_increment_increment == `1`)
3047	nr= nr + `1`; // optimization of the formula below
3048	else
3049	{
3050	nr= (((nr+ variables->auto_increment_increment -
3051	variables->auto_increment_offset)) /
3052	(ulonglong) variables->auto_increment_increment);
3053	nr= (nr* (ulonglong) variables->auto_increment_increment +
3054	variables->auto_increment_offset);
3055	}
3056
3057	if (unlikely(nr <= save_nr))
3058	return ULONGLONG_MAX;
3059
3060	return nr;
3061	}
3062
3063
3064	void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3065	{
3066	/*
3067	If we have set THD::next_insert_id previously and plan to insert an
3068	explicitly-specified value larger than this, we need to increase
3069	THD::next_insert_id to be greater than the explicit value.
3070	*/
3071	if ((next_insert_id > `0`) && (nr >= next_insert_id))
3072	set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3073	}
3074
3075
3076	/**
3077	Update the auto_increment field if necessary.
3078
3079	Updates columns with type NEXT_NUMBER if:
3080
3081	- If column value is set to NULL (in which case
3082	auto_increment_field_not_null is 0)
3083	- If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3084	set. In the future we will only set NEXT_NUMBER fields if one sets them
3085	to NULL (or they are not included in the insert list).
3086
3087	In those cases, we check if the currently reserved interval still has
3088	values we have not used. If yes, we pick the smallest one and use it.
3089	Otherwise:
3090
3091	- If a list of intervals has been provided to the statement via SET
3092	INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3093	first unused interval from this list, consider it as reserved.
3094
3095	- Otherwise we set the column for the first row to the value
3096	next_insert_id(get_auto_increment(column))) which is usually
3097	max-used-column-value+1.
3098	We call get_auto_increment() for the first row in a multi-row
3099	statement. get_auto_increment() will tell us the interval of values it
3100	reserved for us.
3101
3102	- In both cases, for the following rows we use those reserved values without
3103	calling the handler again (we just progress in the interval, computing
3104	each new value from the previous one). Until we have exhausted them, then
3105	we either take the next provided interval or call get_auto_increment()
3106	again to reserve a new interval.
3107
3108	- In both cases, the reserved intervals are remembered in
3109	thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3110	binlogging; the last reserved interval is remembered in
3111	auto_inc_interval_for_cur_row. The number of reserved intervals is
3112	remembered in auto_inc_intervals_count. It differs from the number of
3113	elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3114	latter list is cumulative over all statements forming one binlog event
3115	(when stored functions and triggers are used), and collapses two
3116	contiguous intervals in one (see its append() method).
3117
3118	The idea is that generated auto_increment values are predictable and
3119	independent of the column values in the table. This is needed to be
3120	able to replicate into a table that already has rows with a higher
3121	auto-increment value than the one that is inserted.
3122
3123	After we have already generated an auto-increment number and the user
3124	inserts a column with a higher value than the last used one, we will
3125	start counting from the inserted value.
3126
3127	This function's "outputs" are: the table's auto_increment field is filled
3128	with a value, thd->next_insert_id is filled with the value to use for the
3129	next row, if a value was autogenerated for the current row it is stored in
3130	thd->insert_id_for_cur_row, if get_auto_increment() was called
3131	thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3132	present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3133	this list.
3134
3135	@todo
3136	Replace all references to "next number" or NEXT_NUMBER to
3137	"auto_increment", everywhere (see below: there is
3138	table->auto_increment_field_not_null, and there also exists
3139	table->next_number_field, it's not consistent).
3140
3141	@retval
3142	0 ok
3143	@retval
3144	HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3145	returned ~(ulonglong) 0
3146	@retval
3147	HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3148	failure.
3149	*/
3150
3151	#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3152	#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3153	#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3154
3155	int handler::update_auto_increment()
3156	{
3157	ulonglong nr, nb_reserved_values;
3158	bool append= FALSE;
3159	THD *thd= table->in_use;
3160	struct system_variables *variables= &thd->variables;
3161	int result=`0`, tmp;
3162	enum enum_check_fields save_count_cuted_fields;
3163	DBUG_ENTER("handler::update_auto_increment");
3164
3165	/*
3166	next_insert_id is a "cursor" into the reserved interval, it may go greater
3167	than the interval, but not smaller.
3168	*/
3169	DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3170
3171	if ((nr= table->next_number_field->val_int()) != `0` \|\|
3172	(table->auto_increment_field_not_null &&
3173	thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3174	{
3175	/*
3176	Update next_insert_id if we had already generated a value in this
3177	statement (case of INSERT VALUES(null),(3763),(null):
3178	the last NULL needs to insert 3764, not the value of the first NULL plus
3179	1).
3180	Ignore negative values.
3181	*/
3182	if ((longlong) nr > `0` \|\| (table->next_number_field->flags & UNSIGNED_FLAG))
3183	adjust_next_insert_id_after_explicit_value(nr);
3184	insert_id_for_cur_row= `0`; // didn't generate anything
3185	DBUG_RETURN(`0`);
3186	}
3187
3188	// ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT
3189	if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3190	{
3191	if (table->versioned())
3192	{
3193	Field *end= table->vers_end_field();
3194	DBUG_ASSERT(end);
3195	bitmap_set_bit(table->read_set, end->field_index);
3196	if (!end->is_max())
3197	{
3198	if (!table->next_number_field->real_maybe_null())
3199	DBUG_RETURN(HA_ERR_UNSUPPORTED);
3200	table->next_number_field->set_null();
3201	DBUG_RETURN(`0`);
3202	}
3203	}
3204	table->next_number_field->set_notnull();
3205	}
3206
3207	if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3208	{
3209	/ next_insert_id is beyond what is reserved, so we reserve more. /
3210	const Discrete_interval *forced=
3211	thd->auto_inc_intervals_forced.get_next();
3212	if (forced != NULL)
3213	{
3214	nr= forced->minimum();
3215	nb_reserved_values= forced->values();
3216	}
3217	else
3218	{
3219	/*
3220	handler::estimation_rows_to_insert was set by
3221	handler::ha_start_bulk_insert(); if 0 it means "unknown".
3222	*/
3223	ulonglong nb_desired_values;
3224	/*
3225	If an estimation was given to the engine:
3226	- use it.
3227	- if we already reserved numbers, it means the estimation was
3228	not accurate, then we'll reserve 2AUTO_INC_DEFAULT_NB_ROWS the 2nd*
3229	time, twice that the 3rd time etc.
3230	If no estimation was given, use those increasing defaults from the
3231	start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3232	Don't go beyond a max to not reserve "way too much" (because
3233	reservation means potentially losing unused values).
3234	Note that in prelocked mode no estimation is given.
3235	*/
3236
3237	if ((auto_inc_intervals_count == `0`) && (estimation_rows_to_insert > `0`))
3238	nb_desired_values= estimation_rows_to_insert;
3239	else if ((auto_inc_intervals_count == `0`) &&
3240	(thd->lex->many_values.elements > `0`))
3241	{
3242	/*
3243	For multi-row inserts, if the bulk inserts cannot be started, the
3244	handler::estimation_rows_to_insert will not be set. But we still
3245	want to reserve the autoinc values.
3246	*/
3247	nb_desired_values= thd->lex->many_values.elements;
3248	}
3249	else / go with the increasing defaults /
3250	{
3251	/ avoid overflow in formula, with this if() /
3252	if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3253	{
3254	nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3255	(`1` << auto_inc_intervals_count);
3256	set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3257	}
3258	else
3259	nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3260	}
3261	get_auto_increment(variables->auto_increment_offset,
3262	variables->auto_increment_increment,
3263	nb_desired_values, &nr,
3264	&nb_reserved_values);
3265	if (nr == ULONGLONG_MAX)
3266	DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3267
3268	/*
3269	That rounding below should not be needed when all engines actually
3270	respect offset and increment in get_auto_increment(). But they don't
3271	so we still do it. Wonder if for the not-first-in-index we should do
3272	it. Hope that this rounding didn't push us out of the interval; even
3273	if it did we cannot do anything about it (calling the engine again
3274	will not help as we inserted no row).
3275	*/
3276	nr= compute_next_insert_id(nr-`1`, variables);
3277	}
3278
3279	if (table->s->next_number_keypart == `0`)
3280	{
3281	/ We must defer the appending until "nr" has been possibly truncated /
3282	append= TRUE;
3283	}
3284	else
3285	{
3286	/*
3287	For such auto_increment there is no notion of interval, just a
3288	singleton. The interval is not even stored in
3289	thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3290	for next row.
3291	*/
3292	DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3293	}
3294	}
3295
3296	if (unlikely(nr == ULONGLONG_MAX))
3297	DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3298
3299	DBUG_ASSERT(nr != `0`);
3300	DBUG_PRINT("info",("auto_increment: %llu nb_reserved_values: %llu",
3301	nr, append ? nb_reserved_values : `0`));
3302
3303	/ Store field without warning (Warning will be printed by insert) /
3304	save_count_cuted_fields= thd->count_cuted_fields;
3305	thd->count_cuted_fields= CHECK_FIELD_IGNORE;
3306	tmp= table->next_number_field->store((longlong)nr, TRUE);
3307	thd->count_cuted_fields= save_count_cuted_fields;
3308
3309	if (unlikely(tmp)) // Out of range value in store
3310	{
3311	/*
3312	It's better to return an error here than getting a confusing
3313	'duplicate key error' later.
3314	*/
3315	result= HA_ERR_AUTOINC_ERANGE;
3316	}
3317	if (append)
3318	{
3319	auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3320	variables->auto_increment_increment);
3321	auto_inc_intervals_count++;
3322	/ Row-based replication does not need to store intervals in binlog /
3323	if (((WSREP(thd) && wsrep_emulate_bin_log ) \|\| mysql_bin_log.is_open())
3324	&& !thd->is_current_stmt_binlog_format_row())
3325	thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3326	append(auto_inc_interval_for_cur_row.minimum(),
3327	auto_inc_interval_for_cur_row.values(),
3328	variables->auto_increment_increment);
3329	}
3330
3331	/*
3332	Record this autogenerated value. If the caller then
3333	succeeds to insert this value, it will call
3334	record_first_successful_insert_id_in_cur_stmt()
3335	which will set first_successful_insert_id_in_cur_stmt if it's not
3336	already set.
3337	*/
3338	insert_id_for_cur_row= nr;
3339
3340	if (result) // overflow
3341	DBUG_RETURN(result);
3342
3343	/*
3344	Set next insert id to point to next auto-increment value to be able to
3345	handle multi-row statements.
3346	*/
3347	set_next_insert_id(compute_next_insert_id(nr, variables));
3348
3349	DBUG_RETURN(`0`);
3350	}
3351
3352
3353	/* @brief*
3354	MySQL signal that it changed the column bitmap
3355
3356	USAGE
3357	This is for handlers that needs to setup their own column bitmaps.
3358	Normally the handler should set up their own column bitmaps in
3359	index_init() or rnd_init() and in any column_bitmaps_signal() call after
3360	this.
3361
3362	The handler is allowd to do changes to the bitmap after a index_init or
3363	rnd_init() call is made as after this, MySQL will not use the bitmap
3364	for any program logic checking.
3365	*/
3366	void handler::column_bitmaps_signal()
3367	{
3368	DBUG_ENTER("column_bitmaps_signal");
3369	if (table)
3370	DBUG_PRINT("info", ("read_set: %p write_set: %p",
3371	table->read_set, table->write_set));
3372	DBUG_VOID_RETURN;
3373	}
3374
3375
3376	/* @brief*
3377	Reserves an interval of auto_increment values from the handler.
3378
3379	SYNOPSIS
3380	get_auto_increment()
3381	offset
3382	increment
3383	nb_desired_values how many values we want
3384	first_value (OUT) the first value reserved by the handler
3385	nb_reserved_values (OUT) how many values the handler reserved
3386
3387	offset and increment means that we want values to be of the form
3388	offset + N increment, where N>=0 is integer.*
3389	If the function sets first_value to ~(ulonglong)0 it means an error.*
3390	If the function sets nb_reserved_values to ULONGLONG_MAX it means it has*
3391	reserved to "positive infinite".
3392	*/
3393	void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3394	ulonglong nb_desired_values,
3395	ulonglong *first_value,
3396	ulonglong *nb_reserved_values)
3397	{
3398	ulonglong nr;
3399	int error;
3400	MY_BITMAP *old_read_set;
3401
3402	old_read_set= table->prepare_for_keyread(table->s->next_number_index);
3403
3404	if (ha_index_init(table->s->next_number_index, `1`))
3405	{
3406	/ This should never happen, assert in debug, and fail in release build /
3407	DBUG_ASSERT(`0`);
3408	(void) extra(HA_EXTRA_NO_KEYREAD);
3409	*first_value= ULONGLONG_MAX;
3410	return;
3411	}
3412
3413	if (table->s->next_number_keypart == `0`)
3414	{ // Autoincrement at key-start
3415	error= ha_index_last(table->record[`1`]);
3416	/*
3417	MySQL implicitely assumes such method does locking (as MySQL decides to
3418	use nr+increment without checking again with the handler, in
3419	handler::update_auto_increment()), so reserves to infinite.
3420	*/
3421	*nb_reserved_values= ULONGLONG_MAX;
3422	}
3423	else
3424	{
3425	uchar key[MAX_KEY_LENGTH];
3426	key_copy(key, table->record[`0`],
3427	table->key_info + table->s->next_number_index,
3428	table->s->next_number_key_offset);
3429	error= ha_index_read_map(table->record[`1`], key,
3430	make_prev_keypart_map(table->s->
3431	next_number_keypart),
3432	HA_READ_PREFIX_LAST);
3433	/*
3434	MySQL needs to call us for next row: assume we are inserting ("a",null)
3435	here, we return 3, and next this statement will want to insert
3436	("b",null): there is no reason why ("b",3+1) would be the good row to
3437	insert: maybe it already exists, maybe 3+1 is too large...
3438	*/
3439	*nb_reserved_values= `1`;
3440	}
3441
3442	if (unlikely(error))
3443	{
3444	if (error == HA_ERR_END_OF_FILE \|\| error == HA_ERR_KEY_NOT_FOUND)
3445	/ No entry found, that's fine /;
3446	else
3447	print_error(error, MYF(`0`));
3448	nr= `1`;
3449	}
3450	else
3451	nr= ((ulonglong) table->next_number_field->
3452	val_int_offset(table->s->rec_buff_length)+`1`);
3453	ha_index_end();
3454	table->restore_column_maps_after_keyread(old_read_set);
3455	*first_value= nr;
3456	return;
3457	}
3458
3459
3460	void handler::ha_release_auto_increment()
3461	{
3462	DBUG_ENTER("ha_release_auto_increment");
3463	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
3464	m_lock_type != F_UNLCK \|\|
3465	(!next_insert_id && !insert_id_for_cur_row));
3466	release_auto_increment();
3467	insert_id_for_cur_row= `0`;
3468	auto_inc_interval_for_cur_row.replace(`0`, `0`, `0`);
3469	auto_inc_intervals_count= `0`;
3470	if (next_insert_id > `0`)
3471	{
3472	next_insert_id= `0`;
3473	/*
3474	this statement used forced auto_increment values if there were some,
3475	wipe them away for other statements.
3476	*/
3477	table->in_use->auto_inc_intervals_forced.empty();
3478	}
3479	DBUG_VOID_RETURN;
3480	}
3481
3482
3483	/**
3484	Construct and emit duplicate key error message using information
3485	from table's record buffer.
3486
3487	@param table TABLE object which record buffer should be used as
3488	source for column values.
3489	@param key Key description.
3490	@param msg Error message template to which key value should be
3491	added.
3492	@param errflag Flags for my_error() call.
3493
3494	@notes
3495	The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly
3496	with old code, the error number is ER_DUP_ENTRY
3497	*/
3498
3499	void print_keydup_error(TABLE table, KEY key, const char *msg, myf errflag)
3500	{
3501	/ Write the duplicated key in the error message /
3502	char key_buff[MAX_KEY_LENGTH];
3503	String str(key_buff,sizeof(key_buff),system_charset_info);
3504
3505	if (key == NULL)
3506	{
3507	/*
3508	Key is unknown. Should only happen if storage engine reports wrong
3509	duplicate key number.
3510	*/
3511	my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "UNKNOWN");
3512	}
3513	else
3514	{
3515	/ Table is opened and defined at this point /
3516	key_unpack(&str,table, key);
3517	uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3518	if (str.length() >= max_length)
3519	{
3520	str.length(max_length-`4`);
3521	str.append(STRING_WITH_LEN("..."));
3522	}
3523	my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
3524	key->name.str);
3525	}
3526	}
3527
3528	/**
3529	Construct and emit duplicate key error message using information
3530	from table's record buffer.
3531
3532	@sa print_keydup_error(table, key, msg, errflag).
3533	*/
3534
3535	void print_keydup_error(TABLE table, KEY key, myf errflag)
3536	{
3537	print_keydup_error(table, key,
3538	ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME),
3539	errflag);
3540	}
3541
3542
3543	/**
3544	Print error that we got from handler function.
3545
3546	@note
3547	In case of delete table it's only safe to use the following parts of
3548	the 'table' structure:
3549	- table->s->path
3550	- table->alias
3551	*/
3552
3553	#define SET_FATAL_ERROR fatal_error=1
3554
3555	void handler::print_error(int error, myf errflag)
3556	{
3557	bool fatal_error= `0`;
3558	DBUG_ENTER("handler::print_error");
3559	DBUG_PRINT("enter",("error: %d",error));
3560
3561	if (ha_thd()->transaction_rollback_request)
3562	{
3563	/ Ensure this becomes a true error /
3564	errflag&= ~(ME_JUST_WARNING \| ME_JUST_INFO);
3565	}
3566
3567	int textno= -`1`; // impossible value
3568	switch (error) {
3569	case EACCES:
3570	textno=ER_OPEN_AS_READONLY;
3571	break;
3572	case EAGAIN:
3573	textno=ER_FILE_USED;
3574	break;
3575	case ENOENT:
3576	case ENOTDIR:
3577	case ELOOP:
3578	textno=ER_FILE_NOT_FOUND;
3579	break;
3580	case ENOSPC:
3581	case HA_ERR_DISK_FULL:
3582	textno= ER_DISK_FULL;
3583	SET_FATAL_ERROR; // Ensure error is logged
3584	break;
3585	case HA_ERR_KEY_NOT_FOUND:
3586	case HA_ERR_NO_ACTIVE_RECORD:
3587	case HA_ERR_RECORD_DELETED:
3588	case HA_ERR_END_OF_FILE:
3589	/*
3590	This errors is not not normally fatal (for example for reads). However
3591	if you get it during an update or delete, then its fatal.
3592	As the user is calling print_error() (which is not done on read), we
3593	assume something when wrong with the update or delete.
3594	*/
3595	SET_FATAL_ERROR;
3596	textno=ER_KEY_NOT_FOUND;
3597	break;
3598	case HA_ERR_ABORTED_BY_USER:
3599	{
3600	DBUG_ASSERT(table->in_use->killed);
3601	table->in_use->send_kill_message();
3602	DBUG_VOID_RETURN;
3603	}
3604	case HA_ERR_WRONG_MRG_TABLE_DEF:
3605	textno=ER_WRONG_MRG_TABLE;
3606	break;
3607	case HA_ERR_FOUND_DUPP_KEY:
3608	{
3609	if (table)
3610	{
3611	uint key_nr=get_dup_key(error);
3612	if ((int) key_nr >= `0` && key_nr < table->s->keys)
3613	{
3614	print_keydup_error(table, &table->key_info[key_nr], errflag);
3615	DBUG_VOID_RETURN;
3616	}
3617	}
3618	textno=ER_DUP_KEY;
3619	break;
3620	}
3621	case HA_ERR_FOREIGN_DUPLICATE_KEY:
3622	{
3623	char rec_buf[MAX_KEY_LENGTH];
3624	String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3625	/ Table is opened and defined at this point /
3626
3627	/*
3628	Just print the subset of fields that are part of the first index,
3629	printing the whole row from there is not easy.
3630	*/
3631	key_unpack(&rec, table, &table->key_info[`0`]);
3632
3633	char child_table_name[NAME_LEN + `1`];
3634	char child_key_name[NAME_LEN + `1`];
3635	if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
3636	child_key_name, sizeof(child_key_name)))
3637	{
3638	my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
3639	table_share->table_name.str, rec.c_ptr_safe(),
3640	child_table_name, child_key_name);
3641	}
3642	else
3643	{
3644	my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
3645	table_share->table_name.str, rec.c_ptr_safe());
3646	}
3647	DBUG_VOID_RETURN;
3648	}
3649	case HA_ERR_NULL_IN_SPATIAL:
3650	my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
3651	DBUG_VOID_RETURN;
3652	case HA_ERR_FOUND_DUPP_UNIQUE:
3653	textno=ER_DUP_UNIQUE;
3654	break;
3655	case HA_ERR_RECORD_CHANGED:
3656	/*
3657	This is not fatal error when using HANDLER interface
3658	SET_FATAL_ERROR;
3659	*/
3660	textno=ER_CHECKREAD;
3661	break;
3662	case HA_ERR_CRASHED:
3663	SET_FATAL_ERROR;
3664	textno=ER_NOT_KEYFILE;
3665	break;
3666	case HA_ERR_WRONG_IN_RECORD:
3667	SET_FATAL_ERROR;
3668	textno= ER_CRASHED_ON_USAGE;
3669	break;
3670	case HA_ERR_CRASHED_ON_USAGE:
3671	SET_FATAL_ERROR;
3672	textno=ER_CRASHED_ON_USAGE;
3673	break;
3674	case HA_ERR_NOT_A_TABLE:
3675	textno= error;
3676	break;
3677	case HA_ERR_CRASHED_ON_REPAIR:
3678	SET_FATAL_ERROR;
3679	textno=ER_CRASHED_ON_REPAIR;
3680	break;
3681	case HA_ERR_OUT_OF_MEM:
3682	textno=ER_OUT_OF_RESOURCES;
3683	break;
3684	case HA_ERR_WRONG_COMMAND:
3685	my_error(ER_ILLEGAL_HA, MYF(`0`), table_type(), table_share->db.str,
3686	table_share->table_name.str);
3687	DBUG_VOID_RETURN;
3688	break;
3689	case HA_ERR_OLD_FILE:
3690	textno=ER_OLD_KEYFILE;
3691	break;
3692	case HA_ERR_UNSUPPORTED:
3693	textno=ER_UNSUPPORTED_EXTENSION;
3694	break;
3695	case HA_ERR_RECORD_FILE_FULL:
3696	{
3697	textno=ER_RECORD_FILE_FULL;
3698	/ Write the error message to error log /
3699	errflag\|= ME_NOREFRESH;
3700	break;
3701	}
3702	case HA_ERR_INDEX_FILE_FULL:
3703	{
3704	textno=ER_INDEX_FILE_FULL;
3705	/ Write the error message to error log /
3706	errflag\|= ME_NOREFRESH;
3707	break;
3708	}
3709	case HA_ERR_LOCK_WAIT_TIMEOUT:
3710	textno=ER_LOCK_WAIT_TIMEOUT;
3711	break;
3712	case HA_ERR_LOCK_TABLE_FULL:
3713	textno=ER_LOCK_TABLE_FULL;
3714	break;
3715	case HA_ERR_LOCK_DEADLOCK:
3716	{
3717	String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), system_charset_info);
3718
3719	get_error_message(error, &str);
3720	full_err_msg.append(str);
3721	my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe());
3722	DBUG_VOID_RETURN;
3723	}
3724	case HA_ERR_READ_ONLY_TRANSACTION:
3725	textno=ER_READ_ONLY_TRANSACTION;
3726	break;
3727	case HA_ERR_CANNOT_ADD_FOREIGN:
3728	textno=ER_CANNOT_ADD_FOREIGN;
3729	break;
3730	case HA_ERR_ROW_IS_REFERENCED:
3731	{
3732	String str;
3733	get_error_message(error, &str);
3734	my_printf_error(ER_ROW_IS_REFERENCED_2,
3735	ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED),
3736	errflag, str.c_ptr_safe());
3737	DBUG_VOID_RETURN;
3738	}
3739	case HA_ERR_NO_REFERENCED_ROW:
3740	{
3741	String str;
3742	get_error_message(error, &str);
3743	my_printf_error(ER_NO_REFERENCED_ROW_2,
3744	ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW),
3745	errflag, str.c_ptr_safe());
3746	DBUG_VOID_RETURN;
3747	}
3748	case HA_ERR_TABLE_DEF_CHANGED:
3749	textno=ER_TABLE_DEF_CHANGED;
3750	break;
3751	case HA_ERR_NO_SUCH_TABLE:
3752	my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str,
3753	table_share->table_name.str);
3754	DBUG_VOID_RETURN;
3755	case HA_ERR_RBR_LOGGING_FAILED:
3756	textno= ER_BINLOG_ROW_LOGGING_FAILED;
3757	break;
3758	case HA_ERR_DROP_INDEX_FK:
3759	{
3760	const char *ptr= "???";
3761	uint key_nr= get_dup_key(error);
3762	if ((int) key_nr >= `0`)
3763	ptr= table->key_info[key_nr].name.str;
3764	my_error(ER_DROP_INDEX_FK, errflag, ptr);
3765	DBUG_VOID_RETURN;
3766	}
3767	case HA_ERR_TABLE_NEEDS_UPGRADE:
3768	textno= ER_TABLE_NEEDS_UPGRADE;
3769	my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
3770	"TABLE", table_share->table_name.str);
3771	DBUG_VOID_RETURN;
3772	case HA_ERR_NO_PARTITION_FOUND:
3773	textno=ER_WRONG_PARTITION_NAME;
3774	break;
3775	case HA_ERR_TABLE_READONLY:
3776	textno= ER_OPEN_AS_READONLY;
3777	break;
3778	case HA_ERR_AUTOINC_READ_FAILED:
3779	textno= ER_AUTOINC_READ_FAILED;
3780	break;
3781	case HA_ERR_AUTOINC_ERANGE:
3782	textno= error;
3783	my_error(textno, errflag, table->next_number_field->field_name.str,
3784	table->in_use->get_stmt_da()->current_row_for_warning());
3785	DBUG_VOID_RETURN;
3786	break;
3787	case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
3788	textno= ER_TOO_MANY_CONCURRENT_TRXS;
3789	break;
3790	case HA_ERR_INDEX_COL_TOO_LONG:
3791	textno= ER_INDEX_COLUMN_TOO_LONG;
3792	break;
3793	case HA_ERR_NOT_IN_LOCK_PARTITIONS:
3794	textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
3795	break;
3796	case HA_ERR_INDEX_CORRUPT:
3797	textno= ER_INDEX_CORRUPT;
3798	break;
3799	case HA_ERR_UNDO_REC_TOO_BIG:
3800	textno= ER_UNDO_RECORD_TOO_BIG;
3801	break;
3802	case HA_ERR_TABLE_IN_FK_CHECK:
3803	textno= ER_TABLE_IN_FK_CHECK;
3804	break;
3805	default:
3806	{
3807	/ The error was "unknown" to this function.*
3808	Ask handler if it has got a message for this error /*
3809	bool temporary= FALSE;
3810	String str;
3811	temporary= get_error_message(error, &str);
3812	if (!str.is_empty())
3813	{
3814	const char* engine= table_type();
3815	if (temporary)
3816	my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
3817	engine);
3818	else
3819	{
3820	SET_FATAL_ERROR;
3821	my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
3822	}
3823	}
3824	else
3825	my_error(ER_GET_ERRNO, errflag, error, table_type());
3826	DBUG_VOID_RETURN;
3827	}
3828	}
3829	DBUG_ASSERT(textno > `0`);
3830	if (unlikely(fatal_error))
3831	{
3832	/ Ensure this becomes a true error /
3833	errflag&= ~(ME_JUST_WARNING \| ME_JUST_INFO);
3834	if ((debug_assert_if_crashed_table \|\|
3835	global_system_variables.log_warnings > `1`))
3836	{
3837	/*
3838	Log error to log before we crash or if extended warnings are requested
3839	*/
3840	errflag\|= ME_NOREFRESH;
3841	}
3842	}
3843
3844	/ if we got an OS error from a file-based engine, specify a path of error /
3845	if (error < HA_ERR_FIRST && bas_ext()[`0`])
3846	{
3847	char buff[FN_REFLEN];
3848	strxnmov(buff, sizeof(buff),
3849	table_share->normalized_path.str, bas_ext()[`0`], NULL);
3850	my_error(textno, errflag, buff, error);
3851	}
3852	else
3853	my_error(textno, errflag, table_share->table_name.str, error);
3854	DBUG_VOID_RETURN;
3855	}
3856
3857
3858	/**
3859	Return an error message specific to this handler.
3860
3861	@param error error code previously returned by handler
3862	@param buf pointer to String where to add error message
3863
3864	@return
3865	Returns true if this is a temporary error
3866	*/
3867	bool handler::get_error_message(int error, String* buf)
3868	{
3869	DBUG_EXECUTE_IF("external_lock_failure",
3870	buf->set_ascii(STRING_WITH_LEN("KABOOM!")););
3871	return FALSE;
3872	}
3873
3874	/**
3875	Check for incompatible collation changes.
3876
3877	@retval
3878	HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
3879	@retval
3880	0 No upgrade required.
3881	*/
3882
3883	int handler::check_collation_compatibility()
3884	{
3885	ulong mysql_version= table->s->mysql_version;
3886
3887	if (mysql_version < `50124`)
3888	{
3889	KEY *key= table->key_info;
3890	KEY *key_end= key + table->s->keys;
3891	for (; key < key_end; key++)
3892	{
3893	KEY_PART_INFO *key_part= key->key_part;
3894	KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
3895	for (; key_part < key_part_end; key_part++)
3896	{
3897	if (!key_part->fieldnr)
3898	continue;
3899	Field *field= table->field[key_part->fieldnr - `1`];
3900	uint cs_number= field->charset()->number;
3901	if ((mysql_version < `50048` &&
3902	(cs_number == `11` \|\| / ascii_general_ci - bug #29499, bug #27562 /
3903	cs_number == `41` \|\| / latin7_general_ci - bug #29461 /
3904	cs_number == `42` \|\| / latin7_general_cs - bug #29461 /
3905	cs_number == `20` \|\| / latin7_estonian_cs - bug #29461 /
3906	cs_number == `21` \|\| / latin2_hungarian_ci - bug #29461 /
3907	cs_number == `22` \|\| / koi8u_general_ci - bug #29461 /
3908	cs_number == `23` \|\| / cp1251_ukrainian_ci - bug #29461 /
3909	cs_number == `26`)) \|\| / cp1250_general_ci - bug #29461 /
3910	(mysql_version < `50124` &&
3911	(cs_number == `33` \|\| / utf8_general_ci - bug #27877 /
3912	cs_number == `35`))) / ucs2_general_ci - bug #27877 /
3913	return HA_ADMIN_NEEDS_UPGRADE;
3914	}
3915	}
3916	}
3917
3918	return `0`;
3919	}
3920
3921
3922	int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
3923	{
3924	int error;
3925	KEY keyinfo, keyend;
3926	KEY_PART_INFO keypart, keypartend;
3927
3928	if (table->s->incompatible_version)
3929	return HA_ADMIN_NEEDS_ALTER;
3930
3931	if (!table->s->mysql_version)
3932	{
3933	/ check for blob-in-key error /
3934	keyinfo= table->key_info;
3935	keyend= table->key_info + table->s->keys;
3936	for (; keyinfo < keyend; keyinfo++)
3937	{
3938	keypart= keyinfo->key_part;
3939	keypartend= keypart + keyinfo->user_defined_key_parts;
3940	for (; keypart < keypartend; keypart++)
3941	{
3942	if (!keypart->fieldnr)
3943	continue;
3944	Field *field= table->field[keypart->fieldnr-`1`];
3945	if (field->type() == MYSQL_TYPE_BLOB)
3946	{
3947	if (check_opt->sql_flags & TT_FOR_UPGRADE)
3948	check_opt->flags= T_MEDIUM;
3949	return HA_ADMIN_NEEDS_CHECK;
3950	}
3951	}
3952	}
3953	}
3954	if (table->s->frm_version < FRM_VER_TRUE_VARCHAR)
3955	return HA_ADMIN_NEEDS_ALTER;
3956
3957	if (unlikely((error= check_collation_compatibility())))
3958	return error;
3959
3960	return check_for_upgrade(check_opt);
3961	}
3962
3963
3964	int handler::check_old_types()
3965	{
3966	Field** field;
3967
3968	if (!table->s->mysql_version)
3969	{
3970	/ check for bad DECIMAL field /
3971	for (field= table->field; (*field); field++)
3972	{
3973	if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
3974	{
3975	return HA_ADMIN_NEEDS_ALTER;
3976	}
3977	if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
3978	{
3979	return HA_ADMIN_NEEDS_ALTER;
3980	}
3981	}
3982	}
3983	return `0`;
3984	}
3985
3986
3987	static bool update_frm_version(TABLE *table)
3988	{
3989	char path[FN_REFLEN];
3990	File file;
3991	int result= `1`;
3992	DBUG_ENTER("update_frm_version");
3993
3994	/*
3995	No need to update frm version in case table was created or checked
3996	by server with the same version. This also ensures that we do not
3997	update frm version for temporary tables as this code doesn't support
3998	temporary tables.
3999	*/
4000	if (table->s->mysql_version == MYSQL_VERSION_ID)
4001	DBUG_RETURN(`0`);
4002
4003	strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4004
4005	if ((file= mysql_file_open(key_file_frm,
4006	path, O_RDWR\|O_BINARY, MYF(MY_WME))) >= `0`)
4007	{
4008	uchar version[`4`];
4009
4010	int4store(version, MYSQL_VERSION_ID);
4011
4012	if ((result= (int)mysql_file_pwrite(file, (uchar*) version, `4`, `51L`, MYF_RW)))
4013	goto err;
4014
4015	table->s->mysql_version= MYSQL_VERSION_ID;
4016	}
4017	err:
4018	if (file >= `0`)
4019	(void) mysql_file_close(file, MYF(MY_WME));
4020	DBUG_RETURN(result);
4021	}
4022
4023
4024
4025	/**
4026	@return
4027	key if error because of duplicated keys
4028	*/
4029	uint handler::get_dup_key(int error)
4030	{
4031	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4032	m_lock_type != F_UNLCK);
4033	DBUG_ENTER("handler::get_dup_key");
4034	table->file->errkey = (uint) -`1`;
4035	if (error == HA_ERR_FOUND_DUPP_KEY \|\|
4036	error == HA_ERR_FOREIGN_DUPLICATE_KEY \|\|
4037	error == HA_ERR_FOUND_DUPP_UNIQUE \|\| error == HA_ERR_NULL_IN_SPATIAL \|\|
4038	error == HA_ERR_DROP_INDEX_FK)
4039	table->file->info(HA_STATUS_ERRKEY \| HA_STATUS_NO_LOCK);
4040	DBUG_RETURN(table->file->errkey);
4041	}
4042
4043
4044	/**
4045	Delete all files with extension from bas_ext().
4046
4047	@param name Base name of table
4048
4049	@note
4050	We assume that the handler may return more extensions than
4051	was actually used for the file.
4052
4053	@retval
4054	0 If we successfully deleted at least one file from base_ext and
4055	didn't get any other errors than ENOENT
4056	@retval
4057	!0 Error
4058	*/
4059	int handler::delete_table(const char *name)
4060	{
4061	int saved_error= `0`;
4062	int error= `0`;
4063	int enoent_or_zero;
4064
4065	if (ht->discover_table)
4066	enoent_or_zero= `0`; // the table may not exist in the engine, it's ok
4067	else
4068	enoent_or_zero= ENOENT; // the first file of bas_ext() must* exist*
4069
4070	for (const char *ext=bas_ext(); ext ; ext++)
4071	{
4072	if (mysql_file_delete_with_symlink(key_file_misc, name, *ext, `0`))
4073	{
4074	if (my_errno != ENOENT)
4075	{
4076	/*
4077	If error on the first existing file, return the error.
4078	Otherwise delete as much as possible.
4079	*/
4080	if (enoent_or_zero)
4081	return my_errno;
4082	saved_error= my_errno;
4083	}
4084	}
4085	else
4086	enoent_or_zero= `0`; // No error for ENOENT
4087	error= enoent_or_zero;
4088	}
4089	return saved_error ? saved_error : error;
4090	}
4091
4092
4093	int handler::rename_table(const char * from, const char * to)
4094	{
4095	int error= `0`;
4096	const char ext, start_ext;
4097	start_ext= bas_ext();
4098	for (ext= start_ext; *ext ; ext++)
4099	{
4100	if (unlikely(rename_file_ext(from, to, *ext)))
4101	{
4102	if ((error=my_errno) != ENOENT)
4103	break;
4104	error= `0`;
4105	}
4106	}
4107	if (unlikely(error))
4108	{
4109	/ Try to revert the rename. Ignore errors. /
4110	for (; ext >= start_ext; ext--)
4111	rename_file_ext(to, from, *ext);
4112	}
4113	return error;
4114	}
4115
4116
4117	void handler::drop_table(const char *name)
4118	{
4119	ha_close();
4120	delete_table(name);
4121	}
4122
4123
4124	/**
4125	Performs checks upon the table.
4126
4127	@param thd thread doing CHECK TABLE operation
4128	@param check_opt options from the parser
4129
4130	@retval
4131	HA_ADMIN_OK Successful upgrade
4132	@retval
4133	HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4134	@retval
4135	HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4136	@retval
4137	HA_ADMIN_NOT_IMPLEMENTED
4138	*/
4139	int handler::ha_check(THD thd, HA_CHECK_OPT check_opt)
4140	{
4141	int error;
4142	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4143	m_lock_type != F_UNLCK);
4144
4145	if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4146	(check_opt->sql_flags & TT_FOR_UPGRADE))
4147	return `0`;
4148
4149	if (table->s->mysql_version < MYSQL_VERSION_ID)
4150	{
4151	if (unlikely((error= check_old_types())))
4152	return error;
4153	error= ha_check_for_upgrade(check_opt);
4154	if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK)))
4155	return error;
4156	if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE)))
4157	return `0`;
4158	}
4159	if (unlikely((error= check(thd, check_opt))))
4160	return error;
4161	/ Skip updating frm version if not main handler. /
4162	if (table->file != this)
4163	return error;
4164	return update_frm_version(table);
4165	}
4166
4167	/**
4168	A helper function to mark a transaction read-write,
4169	if it is started.
4170	*/
4171
4172	void handler::mark_trx_read_write_internal()
4173	{
4174	Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[`0`];
4175	/*
4176	When a storage engine method is called, the transaction must
4177	have been started, unless it's a DDL call, for which the
4178	storage engine starts the transaction internally, and commits
4179	it internally, without registering in the ha_list.
4180	Unfortunately here we can't know know for sure if the engine
4181	has registered the transaction or not, so we must check.
4182	*/
4183	if (ha_info->is_started())
4184	{
4185	DBUG_ASSERT(has_transaction_manager());
4186	/*
4187	table_share can be NULL in ha_delete_table(). See implementation
4188	of standalone function ha_delete_table() in sql_base.cc.
4189	*/
4190	if (table_share == NULL \|\| table_share->tmp_table == NO_TMP_TABLE)
4191	ha_info->set_trx_read_write();
4192	}
4193	}
4194
4195
4196	/**
4197	Repair table: public interface.
4198
4199	@sa handler::repair()
4200	*/
4201
4202	int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4203	{
4204	int result;
4205
4206	mark_trx_read_write();
4207
4208	result= repair(thd, check_opt);
4209	DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED \|\|
4210	ha_table_flags() & HA_CAN_REPAIR);
4211
4212	if (result == HA_ADMIN_OK)
4213	result= update_frm_version(table);
4214	return result;
4215	}
4216
4217
4218	/**
4219	Bulk update row: public interface.
4220
4221	@sa handler::bulk_update_row()
4222	*/
4223
4224	int
4225	handler::ha_bulk_update_row(const uchar old_data, const* uchar *new_data,
4226	ha_rows *dup_key_found)
4227	{
4228	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4229	m_lock_type == F_WRLCK);
4230	mark_trx_read_write();
4231
4232	return bulk_update_row(old_data, new_data, dup_key_found);
4233	}
4234
4235
4236	/**
4237	Delete all rows: public interface.
4238
4239	@sa handler::delete_all_rows()
4240	*/
4241
4242	int
4243	handler::ha_delete_all_rows()
4244	{
4245	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4246	m_lock_type == F_WRLCK);
4247	mark_trx_read_write();
4248
4249	return delete_all_rows();
4250	}
4251
4252
4253	/**
4254	Truncate table: public interface.
4255
4256	@sa handler::truncate()
4257	*/
4258
4259	int
4260	handler::ha_truncate()
4261	{
4262	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4263	m_lock_type == F_WRLCK);
4264	mark_trx_read_write();
4265
4266	return truncate();
4267	}
4268
4269
4270	/**
4271	Reset auto increment: public interface.
4272
4273	@sa handler::reset_auto_increment()
4274	*/
4275
4276	int
4277	handler::ha_reset_auto_increment(ulonglong value)
4278	{
4279	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4280	m_lock_type == F_WRLCK);
4281	mark_trx_read_write();
4282
4283	return reset_auto_increment(value);
4284	}
4285
4286
4287	/**
4288	Optimize table: public interface.
4289
4290	@sa handler::optimize()
4291	*/
4292
4293	int
4294	handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4295	{
4296	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4297	m_lock_type == F_WRLCK);
4298	mark_trx_read_write();
4299
4300	return optimize(thd, check_opt);
4301	}
4302
4303
4304	/**
4305	Analyze table: public interface.
4306
4307	@sa handler::analyze()
4308	*/
4309
4310	int
4311	handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4312	{
4313	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4314	m_lock_type != F_UNLCK);
4315	mark_trx_read_write();
4316
4317	return analyze(thd, check_opt);
4318	}
4319
4320
4321	/**
4322	Check and repair table: public interface.
4323
4324	@sa handler::check_and_repair()
4325	*/
4326
4327	bool
4328	handler::ha_check_and_repair(THD *thd)
4329	{
4330	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4331	m_lock_type == F_UNLCK);
4332	mark_trx_read_write();
4333
4334	return check_and_repair(thd);
4335	}
4336
4337
4338	/**
4339	Disable indexes: public interface.
4340
4341	@sa handler::disable_indexes()
4342	*/
4343
4344	int
4345	handler::ha_disable_indexes(uint mode)
4346	{
4347	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4348	m_lock_type != F_UNLCK);
4349	mark_trx_read_write();
4350
4351	return disable_indexes(mode);
4352	}
4353
4354
4355	/**
4356	Enable indexes: public interface.
4357
4358	@sa handler::enable_indexes()
4359	*/
4360
4361	int
4362	handler::ha_enable_indexes(uint mode)
4363	{
4364	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4365	m_lock_type != F_UNLCK);
4366	mark_trx_read_write();
4367
4368	return enable_indexes(mode);
4369	}
4370
4371
4372	/**
4373	Discard or import tablespace: public interface.
4374
4375	@sa handler::discard_or_import_tablespace()
4376	*/
4377
4378	int
4379	handler::ha_discard_or_import_tablespace(my_bool discard)
4380	{
4381	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4382	m_lock_type == F_WRLCK);
4383	mark_trx_read_write();
4384
4385	return discard_or_import_tablespace(discard);
4386	}
4387
4388
4389	bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4390	Alter_inplace_info *ha_alter_info)
4391	{
4392	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4393	m_lock_type != F_UNLCK);
4394	mark_trx_read_write();
4395
4396	return prepare_inplace_alter_table(altered_table, ha_alter_info);
4397	}
4398
4399
4400	bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4401	Alter_inplace_info *ha_alter_info,
4402	bool commit)
4403	{
4404	/*
4405	At this point we should have an exclusive metadata lock on the table.
4406	The exception is if we're about to roll back changes (commit= false).
4407	In this case, we might be rolling back after a failed lock upgrade,
4408	so we could be holding the same lock level as for inplace_alter_table().
4409	*/
4410	DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4411	table->s->db.str,
4412	table->s->table_name.str,
4413	MDL_EXCLUSIVE) \|\|
4414	!commit);
4415
4416	return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4417	}
4418
4419
4420	/*
4421	Default implementation to support in-place alter table
4422	and old online add/drop index API
4423	*/
4424
4425	enum_alter_inplace_result
4426	handler::check_if_supported_inplace_alter(TABLE *altered_table,
4427	Alter_inplace_info *ha_alter_info)
4428	{
4429	DBUG_ENTER("handler::check_if_supported_inplace_alter");
4430
4431	HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4432
4433	if (altered_table->versioned(VERS_TIMESTAMP))
4434	DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4435
4436	alter_table_operations inplace_offline_operations=
4437	ALTER_COLUMN_EQUAL_PACK_LENGTH \|
4438	ALTER_COLUMN_NAME \|
4439	ALTER_RENAME_COLUMN \|
4440	ALTER_CHANGE_COLUMN_DEFAULT \|
4441	ALTER_COLUMN_DEFAULT \|
4442	ALTER_COLUMN_OPTION \|
4443	ALTER_CHANGE_CREATE_OPTION \|
4444	ALTER_DROP_CHECK_CONSTRAINT \|
4445	ALTER_PARTITIONED \|
4446	ALTER_VIRTUAL_GCOL_EXPR \|
4447	ALTER_RENAME;
4448
4449	/ Is there at least one operation that requires copy algorithm? /
4450	if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4451	DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4452
4453	/*
4454	The following checks for changes related to ALTER_OPTIONS
4455
4456	ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4457	ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4458	change column charsets and so not supported in-place through
4459	old API.
4460
4461	Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4462	not supported as in-place operations in old API either.
4463	*/
4464	if (create_info->used_fields & (HA_CREATE_USED_CHARSET \|
4465	HA_CREATE_USED_DEFAULT_CHARSET \|
4466	HA_CREATE_USED_PACK_KEYS \|
4467	HA_CREATE_USED_CHECKSUM \|
4468	HA_CREATE_USED_MAX_ROWS) \|\|
4469	(table->s->row_type != create_info->row_type))
4470	DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4471
4472	uint table_changes= (ha_alter_info->handler_flags &
4473	ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4474	IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4475	if (table->file->check_if_incompatible_data(create_info, table_changes)
4476	== COMPATIBLE_DATA_YES)
4477	DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
4478
4479	DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4480	}
4481
4482	void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4483	const char try_instead) const*
4484	{
4485	if (unsupported_reason == NULL)
4486	my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(`0`),
4487	not_supported, try_instead);
4488	else
4489	my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(`0`),
4490	not_supported, unsupported_reason, try_instead);
4491	}
4492
4493
4494	/**
4495	Rename table: public interface.
4496
4497	@sa handler::rename_table()
4498	*/
4499
4500	int
4501	handler::ha_rename_table(const char from, const* char *to)
4502	{
4503	DBUG_ASSERT(m_lock_type == F_UNLCK);
4504	mark_trx_read_write();
4505
4506	return rename_table(from, to);
4507	}
4508
4509
4510	/**
4511	Delete table: public interface.
4512
4513	@sa handler::delete_table()
4514	*/
4515
4516	int
4517	handler::ha_delete_table(const char *name)
4518	{
4519	mark_trx_read_write();
4520	return delete_table(name);
4521	}
4522
4523
4524	/**
4525	Drop table in the engine: public interface.
4526
4527	@sa handler::drop_table()
4528
4529	The difference between this and delete_table() is that the table is open in
4530	drop_table().
4531	*/
4532
4533	void
4534	handler::ha_drop_table(const char *name)
4535	{
4536	DBUG_ASSERT(m_lock_type == F_UNLCK);
4537	mark_trx_read_write();
4538
4539	return drop_table(name);
4540	}
4541
4542
4543	/**
4544	Create a table in the engine: public interface.
4545
4546	@sa handler::create()
4547	*/
4548
4549	int
4550	handler::ha_create(const char name, TABLE form, HA_CREATE_INFO *info_arg)
4551	{
4552	DBUG_ASSERT(m_lock_type == F_UNLCK);
4553	mark_trx_read_write();
4554	int error= create(name, form, info_arg);
4555	if (!error &&
4556	!(info_arg->options & (HA_LEX_CREATE_TMP_TABLE \| HA_CREATE_TMP_ALTER)))
4557	mysql_audit_create_table(form);
4558	return error;
4559	}
4560
4561
4562	/**
4563	Create handler files for CREATE TABLE: public interface.
4564
4565	@sa handler::create_partitioning_metadata()
4566	*/
4567
4568	int
4569	handler::ha_create_partitioning_metadata(const char *name,
4570	const char *old_name,
4571	int action_flag)
4572	{
4573	/*
4574	Normally this is done when unlocked, but in fast_alter_partition_table,
4575	it is done on an already locked handler when preparing to alter/rename
4576	partitions.
4577	*/
4578	DBUG_ASSERT(m_lock_type == F_UNLCK \|\|
4579	(!old_name && strcmp(name, table_share->path.str)));
4580
4581	return create_partitioning_metadata(name, old_name, action_flag);
4582	}
4583
4584
4585	/**
4586	Change partitions: public interface.
4587
4588	@sa handler::change_partitions()
4589	*/
4590
4591	int
4592	handler::ha_change_partitions(HA_CREATE_INFO *create_info,
4593	const char *path,
4594	ulonglong * const copied,
4595	ulonglong * const deleted,
4596	const uchar *pack_frm_data,
4597	size_t pack_frm_len)
4598	{
4599	/*
4600	Must have at least RDLCK or be a TMP table. Read lock is needed to read
4601	from current partitions and write lock will be taken on new partitions.
4602	*/
4603	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
4604	m_lock_type != F_UNLCK);
4605
4606	mark_trx_read_write();
4607
4608	return change_partitions(create_info, path, copied, deleted,
4609	pack_frm_data, pack_frm_len);
4610	}
4611
4612
4613	/**
4614	Drop partitions: public interface.
4615
4616	@sa handler::drop_partitions()
4617	*/
4618
4619	int
4620	handler::ha_drop_partitions(const char *path)
4621	{
4622	DBUG_ASSERT(!table->db_stat);
4623
4624	mark_trx_read_write();
4625
4626	return drop_partitions(path);
4627	}
4628
4629
4630	/**
4631	Rename partitions: public interface.
4632
4633	@sa handler::rename_partitions()
4634	*/
4635
4636	int
4637	handler::ha_rename_partitions(const char *path)
4638	{
4639	DBUG_ASSERT(!table->db_stat);
4640
4641	mark_trx_read_write();
4642
4643	return rename_partitions(path);
4644	}
4645
4646
4647	/**
4648	Tell the storage engine that it is allowed to "disable transaction" in the
4649	handler. It is a hint that ACID is not required - it was used in NDB for
4650	ALTER TABLE, for example, when data are copied to temporary table.
4651	A storage engine may treat this hint any way it likes. NDB for example
4652	started to commit every now and then automatically.
4653	This hint can be safely ignored.
4654	*/
4655	int ha_enable_transaction(THD thd, bool* on)
4656	{
4657	int error=`0`;
4658	DBUG_ENTER("ha_enable_transaction");
4659	DBUG_PRINT("enter", ("on: %d", (int) on));
4660
4661	if ((thd->transaction.on= on))
4662	{
4663	/*
4664	Now all storage engines should have transaction handling enabled.
4665	But some may have it enabled all the time - "disabling" transactions
4666	is an optimization hint that storage engine is free to ignore.
4667	So, let's commit an open transaction (if any) now.
4668	*/
4669	if (likely(!(error= ha_commit_trans(thd, `0`))))
4670	error= trans_commit_implicit(thd);
4671	}
4672	DBUG_RETURN(error);
4673	}
4674
4675	int handler::index_next_same(uchar buf, const* uchar *key, uint keylen)
4676	{
4677	int error;
4678	DBUG_ENTER("handler::index_next_same");
4679	if (!(error=index_next(buf)))
4680	{
4681	my_ptrdiff_t ptrdiff= buf - table->record[`0`];
4682	uchar *UNINIT_VAR(save_record_0);
4683	KEY *UNINIT_VAR(key_info);
4684	KEY_PART_INFO *UNINIT_VAR(key_part);
4685	KEY_PART_INFO *UNINIT_VAR(key_part_end);
4686
4687	/*
4688	key_cmp_if_same() compares table->record[0] against 'key'.
4689	In parts it uses table->record[0] directly, in parts it uses
4690	field objects with their local pointers into table->record[0].
4691	If 'buf' is distinct from table->record[0], we need to move
4692	all record references. This is table->record[0] itself and
4693	the field pointers of the fields used in this key.
4694	*/
4695	if (ptrdiff)
4696	{
4697	save_record_0= table->record[`0`];
4698	table->record[`0`]= buf;
4699	key_info= table->key_info + active_index;
4700	key_part= key_info->key_part;
4701	key_part_end= key_part + key_info->user_defined_key_parts;
4702	for (; key_part < key_part_end; key_part++)
4703	{
4704	DBUG_ASSERT(key_part->field);
4705	key_part->field->move_field_offset(ptrdiff);
4706	}
4707	}
4708
4709	if (key_cmp_if_same(table, key, active_index, keylen))
4710	{
4711	table->status=STATUS_NOT_FOUND;
4712	error=HA_ERR_END_OF_FILE;
4713	}
4714
4715	/ Move back if necessary. /
4716	if (ptrdiff)
4717	{
4718	table->record[`0`]= save_record_0;
4719	for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
4720	key_part->field->move_field_offset(-ptrdiff);
4721	}
4722	}
4723	DBUG_PRINT("return",("%i", error));
4724	DBUG_RETURN(error);
4725	}
4726
4727
4728	void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
4729	uint part_id)
4730	{
4731	info(HA_STATUS_CONST \| HA_STATUS_TIME \| HA_STATUS_VARIABLE \|
4732	HA_STATUS_NO_LOCK);
4733	stat_info->records= stats.records;
4734	stat_info->mean_rec_length= stats.mean_rec_length;
4735	stat_info->data_file_length= stats.data_file_length;
4736	stat_info->max_data_file_length= stats.max_data_file_length;
4737	stat_info->index_file_length= stats.index_file_length;
4738	stat_info->max_index_file_length=stats.max_index_file_length;
4739	stat_info->delete_length= stats.delete_length;
4740	stat_info->create_time= stats.create_time;
4741	stat_info->update_time= stats.update_time;
4742	stat_info->check_time= stats.check_time;
4743	stat_info->check_sum= `0`;
4744	if (table_flags() & (HA_HAS_OLD_CHECKSUM \| HA_HAS_NEW_CHECKSUM))
4745	stat_info->check_sum= checksum();
4746	return;
4747	}
4748
4749
4750	/*
4751	Updates the global table stats with the TABLE this handler represents
4752	*/
4753
4754	void handler::update_global_table_stats()
4755	{
4756	TABLE_STATS * table_stats;
4757
4758	status_var_add(table->in_use->status_var.rows_read, rows_read);
4759	DBUG_ASSERT(rows_tmp_read == `0`);
4760
4761	if (!table->in_use->userstat_running)
4762	{
4763	rows_read= rows_changed= `0`;
4764	return;
4765	}
4766
4767	if (rows_read + rows_changed == `0`)
4768	return; // Nothing to update.
4769
4770	DBUG_ASSERT(table->s);
4771	DBUG_ASSERT(table->s->table_cache_key.str);
4772
4773	mysql_mutex_lock(&LOCK_global_table_stats);
4774	/ Gets the global table stats, creating one if necessary. /
4775	if (!(table_stats= (TABLE_STATS*)
4776	my_hash_search(&global_table_stats,
4777	(uchar*) table->s->table_cache_key.str,
4778	table->s->table_cache_key.length)))
4779	{
4780	if (!(table_stats = ((TABLE_STATS*)
4781	my_malloc(sizeof(TABLE_STATS),
4782	MYF(MY_WME \| MY_ZEROFILL)))))
4783	{
4784	/ Out of memory error already given /
4785	goto end;
4786	}
4787	memcpy(table_stats->table, table->s->table_cache_key.str,
4788	table->s->table_cache_key.length);
4789	table_stats->table_name_length= (uint)table->s->table_cache_key.length;
4790	table_stats->engine_type= ht->db_type;
4791	/ No need to set variables to 0, as we use MY_ZEROFILL above /
4792
4793	if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
4794	{
4795	/ Out of memory error is already given /
4796	my_free(table_stats);
4797	goto end;
4798	}
4799	}
4800	// Updates the global table stats.
4801	table_stats->rows_read+= rows_read;
4802	table_stats->rows_changed+= rows_changed;
4803	table_stats->rows_changed_x_indexes+= (rows_changed *
4804	(table->s->keys ? table->s->keys :
4805	`1`));
4806	rows_read= rows_changed= `0`;
4807	end:
4808	mysql_mutex_unlock(&LOCK_global_table_stats);
4809	}
4810
4811
4812	/*
4813	Updates the global index stats with this handler's accumulated index reads.
4814	*/
4815
4816	void handler::update_global_index_stats()
4817	{
4818	DBUG_ASSERT(table->s);
4819
4820	if (!table->in_use->userstat_running)
4821	{
4822	/ Reset all index read values /
4823	bzero(index_rows_read, sizeof(index_rows_read[`0`]) * table->s->keys);
4824	return;
4825	}
4826
4827	for (uint index = `0`; index < table->s->keys; index++)
4828	{
4829	if (index_rows_read[index])
4830	{
4831	INDEX_STATS* index_stats;
4832	size_t key_length;
4833	KEY key_info = &table->key_info[index]; // Rows were read using this*
4834
4835	DBUG_ASSERT(key_info->cache_name);
4836	if (!key_info->cache_name)
4837	continue;
4838	key_length= table->s->table_cache_key.length + key_info->name.length + `1`;
4839	mysql_mutex_lock(&LOCK_global_index_stats);
4840	// Gets the global index stats, creating one if necessary.
4841	if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
4842	key_info->cache_name,
4843	key_length)))
4844	{
4845	if (!(index_stats = ((INDEX_STATS*)
4846	my_malloc(sizeof(INDEX_STATS),
4847	MYF(MY_WME \| MY_ZEROFILL)))))
4848	goto end; // Error is already given
4849
4850	memcpy(index_stats->index, key_info->cache_name, key_length);
4851	index_stats->index_name_length= key_length;
4852	if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
4853	{
4854	my_free(index_stats);
4855	goto end;
4856	}
4857	}
4858	/ Updates the global index stats. /
4859	index_stats->rows_read+= index_rows_read[index];
4860	index_rows_read[index]= `0`;
4861	end:
4862	mysql_mutex_unlock(&LOCK_global_index_stats);
4863	}
4864	}
4865	}
4866
4867
4868	/****************************************************************************
4869	** Some general functions that isn't in the handler class
4870	****************************************************************************/
4871
4872	/**
4873	Initiates table-file and calls appropriate database-creator.
4874
4875	@retval
4876	0 ok
4877	@retval
4878	1 error
4879	*/
4880	int ha_create_table(THD thd, const* char *path,
4881	const char db, const* char *table_name,
4882	HA_CREATE_INFO create_info, LEX_CUSTRING frm)
4883	{
4884	int error= `1`;
4885	TABLE table;
4886	char name_buff[FN_REFLEN];
4887	const char *name;
4888	TABLE_SHARE share;
4889	bool temp_table __attribute__((unused)) =
4890	create_info->options & (HA_LEX_CREATE_TMP_TABLE \| HA_CREATE_TMP_ALTER);
4891	DBUG_ENTER("ha_create_table");
4892
4893	init_tmp_table_share(thd, &share, db, `0`, table_name, path);
4894
4895	if (frm)
4896	{
4897	bool write_frm_now= !create_info->db_type->discover_table &&
4898	!create_info->tmp_table();
4899
4900	share.frm_image= frm;
4901
4902	// open an frm image
4903	if (share.init_from_binary_frm_image(thd, write_frm_now,
4904	frm->str, frm->length))
4905	goto err;
4906	}
4907	else
4908	{
4909	// open an frm file
4910	share.db_plugin= ha_lock_engine(thd, create_info->db_type);
4911
4912	if (open_table_def(thd, &share))
4913	goto err;
4914	}
4915
4916	share.m_psi= PSI_CALL_get_table_share(temp_table, &share);
4917
4918	if (open_table_from_share(thd, &share, &empty_clex_str, `0`, READ_ALL, `0`,
4919	&table, true))
4920	goto err;
4921
4922	update_create_info_from_table(create_info, &table);
4923
4924	name= get_canonical_filename(table.file, share.path.str, name_buff);
4925
4926	error= table.file->ha_create(name, &table, create_info);
4927
4928	if (unlikely(error))
4929	{
4930	if (!thd->is_error())
4931	my_error(ER_CANT_CREATE_TABLE, MYF(`0`), db, table_name, error);
4932	table.file->print_error(error, MYF(ME_JUST_WARNING));
4933	PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length,
4934	share.table_name.str, (uint)share.table_name.length);
4935	}
4936
4937	(void) closefrm(&table);
4938
4939	err:
4940	free_table_share(&share);
4941	DBUG_RETURN(error != `0`);
4942	}
4943
4944	void st_ha_check_opt::init()
4945	{
4946	flags= sql_flags= `0`;
4947	start_time= my_time(`0`);
4948	}
4949
4950
4951	/*****************************************************************************
4952	Key cache handling.
4953
4954	This code is only relevant for ISAM/MyISAM tables
4955
4956	key_cache->cache may be 0 only in the case where a key cache is not
4957	initialized or when we where not able to init the key cache in a previous
4958	call to ha_init_key_cache() (probably out of memory)
4959	*****************************************************************************/
4960
4961	/**
4962	Init a key cache if it has not been initied before.
4963	*/
4964	int ha_init_key_cache(const char name, KEY_CACHE key_cache, void *unused
4965	__attribute__((unused)))
4966	{
4967	DBUG_ENTER("ha_init_key_cache");
4968
4969	if (!key_cache->key_cache_inited)
4970	{
4971	mysql_mutex_lock(&LOCK_global_system_variables);
4972	size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
4973	uint tmp_block_size= (uint) key_cache->param_block_size;
4974	uint division_limit= (uint)key_cache->param_division_limit;
4975	uint age_threshold= (uint)key_cache->param_age_threshold;
4976	uint partitions= (uint)key_cache->param_partitions;
4977	uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
4978	mysql_mutex_unlock(&LOCK_global_system_variables);
4979	DBUG_RETURN(!init_key_cache(key_cache,
4980	tmp_block_size,
4981	tmp_buff_size,
4982	division_limit, age_threshold,
4983	changed_blocks_hash_size,
4984	partitions));
4985	}
4986	DBUG_RETURN(`0`);
4987	}
4988
4989
4990	/**
4991	Resize key cache.
4992	*/
4993	int ha_resize_key_cache(KEY_CACHE *key_cache)
4994	{
4995	DBUG_ENTER("ha_resize_key_cache");
4996
4997	if (key_cache->key_cache_inited)
4998	{
4999	mysql_mutex_lock(&LOCK_global_system_variables);
5000	size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5001	long tmp_block_size= (long) key_cache->param_block_size;
5002	uint division_limit= (uint)key_cache->param_division_limit;
5003	uint age_threshold= (uint)key_cache->param_age_threshold;
5004	uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5005	mysql_mutex_unlock(&LOCK_global_system_variables);
5006	DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5007	tmp_buff_size,
5008	division_limit, age_threshold,
5009	changed_blocks_hash_size));
5010	}
5011	DBUG_RETURN(`0`);
5012	}
5013
5014
5015	/**
5016	Change parameters for key cache (like division_limit)
5017	*/
5018	int ha_change_key_cache_param(KEY_CACHE *key_cache)
5019	{
5020	DBUG_ENTER("ha_change_key_cache_param");
5021
5022	if (key_cache->key_cache_inited)
5023	{
5024	mysql_mutex_lock(&LOCK_global_system_variables);
5025	uint division_limit= (uint)key_cache->param_division_limit;
5026	uint age_threshold= (uint)key_cache->param_age_threshold;
5027	mysql_mutex_unlock(&LOCK_global_system_variables);
5028	change_key_cache_param(key_cache, division_limit, age_threshold);
5029	}
5030	DBUG_RETURN(`0`);
5031	}
5032
5033
5034	/**
5035	Repartition key cache
5036	*/
5037	int ha_repartition_key_cache(KEY_CACHE *key_cache)
5038	{
5039	DBUG_ENTER("ha_repartition_key_cache");
5040
5041	if (key_cache->key_cache_inited)
5042	{
5043	mysql_mutex_lock(&LOCK_global_system_variables);
5044	size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5045	long tmp_block_size= (long) key_cache->param_block_size;
5046	uint division_limit= (uint)key_cache->param_division_limit;
5047	uint age_threshold= (uint)key_cache->param_age_threshold;
5048	uint partitions= (uint)key_cache->param_partitions;
5049	uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5050	mysql_mutex_unlock(&LOCK_global_system_variables);
5051	DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
5052	tmp_buff_size,
5053	division_limit, age_threshold,
5054	changed_blocks_hash_size,
5055	partitions));
5056	}
5057	DBUG_RETURN(`0`);
5058	}
5059
5060
5061	/**
5062	Move all tables from one key cache to another one.
5063	*/
5064	int ha_change_key_cache(KEY_CACHE *old_key_cache,
5065	KEY_CACHE *new_key_cache)
5066	{
5067	mi_change_key_cache(old_key_cache, new_key_cache);
5068	return `0`;
5069	}
5070
5071
5072	static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5073	void *arg)
5074	{
5075	TABLE_SHARE share= (TABLE_SHARE )arg;
5076	handlerton *hton= plugin_hton(plugin);
5077	if (hton->state == SHOW_OPTION_YES && hton->discover_table)
5078	{
5079	share->db_plugin= plugin;
5080	int error= hton->discover_table(hton, thd, share);
5081	if (error != HA_ERR_NO_SUCH_TABLE)
5082	{
5083	if (unlikely(error))
5084	{
5085	if (!share->error)
5086	{
5087	share->error= OPEN_FRM_ERROR_ALREADY_ISSUED;
5088	plugin_unlock(`0`, share->db_plugin);
5089	}
5090
5091	/*
5092	report an error, unless it is "generic" and a more
5093	specific one was already reported
5094	*/
5095	if (error != HA_ERR_GENERIC \|\| !thd->is_error())
5096	my_error(ER_GET_ERRNO, MYF(`0`), error, plugin_name(plugin)->str);
5097	share->db_plugin= `0`;
5098	}
5099	else
5100	share->error= OPEN_FRM_OK;
5101
5102	status_var_increment(thd->status_var.ha_discover_count);
5103	return TRUE; // abort the search
5104	}
5105	share->db_plugin= `0`;
5106	}
5107
5108	DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);
5109	return FALSE; // continue with the next engine
5110	}
5111
5112	int ha_discover_table(THD thd, TABLE_SHARE share)
5113	{
5114	DBUG_ENTER("ha_discover_table");
5115	int found;
5116
5117	DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR); // share is not OK yet
5118
5119	if (!engines_with_discover)
5120	found= FALSE;
5121	else if (share->db_plugin)
5122	found= discover_handlerton(thd, share->db_plugin, share);
5123	else
5124	found= plugin_foreach(thd, discover_handlerton,
5125	MYSQL_STORAGE_ENGINE_PLUGIN, share);
5126
5127	if (!found)
5128	open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found
5129
5130	DBUG_RETURN(share->error != OPEN_FRM_OK);
5131	}
5132
5133	static my_bool file_ext_exists(char path, size_t path_len, const* char *ext)
5134	{
5135	strmake(path + path_len, ext, FN_REFLEN - path_len);
5136	return !access(path, F_OK);
5137	}
5138
5139	struct st_discover_existence_args
5140	{
5141	char *path;
5142	size_t path_len;
5143	const char db, table_name;
5144	handlerton *hton;
5145	bool frm_exists;
5146	};
5147
5148	static my_bool discover_existence(THD *thd, plugin_ref plugin,
5149	void *arg)
5150	{
5151	st_discover_existence_args args= (st_discover_existence_args)arg;
5152	handlerton *ht= plugin_hton(plugin);
5153	if (ht->state != SHOW_OPTION_YES \|\| !ht->discover_table_existence)
5154	return args->frm_exists;
5155
5156	args->hton= ht;
5157
5158	if (ht->discover_table_existence == ext_based_existence)
5159	return file_ext_exists(args->path, args->path_len,
5160	ht->tablefile_extensions[`0`]);
5161
5162	return ht->discover_table_existence(ht, args->db, args->table_name);
5163	}
5164
5165	class Table_exists_error_handler : public Internal_error_handler
5166	{
5167	public:
5168	Table_exists_error_handler()
5169	: m_handled_errors(`0`), m_unhandled_errors(`0`)
5170	{}
5171
5172	bool handle_condition(THD *thd,
5173	uint sql_errno,
5174	const char* sqlstate,
5175	Sql_condition::enum_warning_level *level,
5176	const char* msg,
5177	Sql_condition ** cond_hdl)
5178	{
5179	*cond_hdl= NULL;
5180	if (sql_errno == ER_NO_SUCH_TABLE \|\|
5181	sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE \|\|
5182	sql_errno == ER_WRONG_OBJECT)
5183	{
5184	m_handled_errors++;
5185	return TRUE;
5186	}
5187
5188	if (*level == Sql_condition::WARN_LEVEL_ERROR)
5189	m_unhandled_errors++;
5190	return FALSE;
5191	}
5192
5193	bool safely_trapped_errors()
5194	{
5195	return ((m_handled_errors > `0`) && (m_unhandled_errors == `0`));
5196	}
5197
5198	private:
5199	int m_handled_errors;
5200	int m_unhandled_errors;
5201	};
5202
5203	/**
5204	Check if a given table exists, without doing a full discover, if possible
5205
5206	If the 'hton' is not NULL, it's set to the handlerton of the storage engine
5207	of this table, or to view_pseudo_hton if the frm belongs to a view.
5208
5209	This function takes discovery correctly into account. If frm is found,
5210	it discovers the table to make sure it really exists in the engine.
5211	If no frm is found it discovers the table, in case it still exists in
5212	the engine.
5213
5214	While it tries to cut corners (don't open .frm if no discovering engine is
5215	enabled, no full discovery if all discovering engines support
5216	discover_table_existence, etc), it still may* be quite expensive*
5217	and must be used sparingly.
5218
5219	@retval true Table exists (even if the error occurred, like bad frm)
5220	@retval false Table does not exist (one can do CREATE TABLE table_name)
5221
5222	@note if frm exists and the table in engine doesn't, hton will be set,*
5223	but the return value will be false.
5224
5225	@note if frm file exists, but the table cannot be opened (engine not
5226	loaded, frm is invalid), the return value will be true, but
5227	*hton will be NULL.
5228	*/
5229
5230	bool ha_table_exists(THD thd, const* LEX_CSTRING db, const* LEX_CSTRING *table_name,
5231	handlerton *hton, bool* *is_sequence)
5232	{
5233	handlerton *dummy;
5234	bool dummy2;
5235	DBUG_ENTER("ha_table_exists");
5236
5237	if (hton)
5238	*hton= `0`;
5239	else if (engines_with_discover)
5240	hton= &dummy;
5241	if (!is_sequence)
5242	is_sequence= &dummy2;
5243	*is_sequence= `0`;
5244
5245	TDC_element *element= tdc_lock_share(thd, db->str, table_name->str);
5246	if (element && element != MY_ERRPTR)
5247	{
5248	if (hton)
5249	*hton= element->share->db_type();
5250	*is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE;
5251	tdc_unlock_share(element);
5252	DBUG_RETURN(TRUE);
5253	}
5254
5255	char path[FN_REFLEN + `1`];
5256	size_t path_len = build_table_filename(path, sizeof(path) - `1`,
5257	db->str, table_name->str, "", `0`);
5258	st_discover_existence_args args= {path, path_len, db->str, table_name->str, `0`, true};
5259
5260	if (file_ext_exists(path, path_len, reg_ext))
5261	{
5262	bool exists= true;
5263	if (hton)
5264	{
5265	char engine_buf[NAME_CHAR_LEN + `1`];
5266	LEX_CSTRING engine= { engine_buf, `0` };
5267	Table_type type;
5268
5269	if ((type= dd_frm_type(thd, path, &engine, is_sequence)) ==
5270	TABLE_TYPE_UNKNOWN)
5271	DBUG_RETURN(`0`);
5272
5273	if (type != TABLE_TYPE_VIEW)
5274	{
5275	plugin_ref p= plugin_lock_by_name(thd, &engine,
5276	MYSQL_STORAGE_ENGINE_PLUGIN);
5277	*hton= p ? plugin_hton(p) : NULL;
5278	if (*hton)
5279	// verify that the table really exists
5280	exists= discover_existence(thd, p, &args);
5281	}
5282	else
5283	*hton= view_pseudo_hton;
5284	}
5285	DBUG_RETURN(exists);
5286	}
5287
5288	args.frm_exists= false;
5289	if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN,
5290	&args))
5291	{
5292	if (hton)
5293	*hton= args.hton;
5294	DBUG_RETURN(TRUE);
5295	}
5296
5297	if (need_full_discover_for_existence)
5298	{
5299	TABLE_LIST table;
5300	uint flags = GTS_TABLE \| GTS_VIEW;
5301	if (!hton)
5302	flags\|= GTS_NOLOCK;
5303
5304	Table_exists_error_handler no_such_table_handler;
5305	thd->push_internal_handler(&no_such_table_handler);
5306	table.init_one_table(db, table_name, `0`, TL_READ);
5307	TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags);
5308	thd->pop_internal_handler();
5309
5310	if (hton && share)
5311	{
5312	*hton= share->db_type();
5313	tdc_release_share(share);
5314	}
5315
5316	// the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else
5317	DBUG_RETURN(!no_such_table_handler.safely_trapped_errors());
5318	}
5319
5320	DBUG_RETURN(FALSE);
5321	}
5322
5323	/**
5324	Discover all table names in a given database
5325	*/
5326	extern "C" {
5327
5328	static int cmp_file_names(const void a, const* void *b)
5329	{
5330	CHARSET_INFO *cs= character_set_filesystem;
5331	char aa= ((FILEINFO )a)->name;
5332	char bb= ((FILEINFO )b)->name;
5333	return my_strnncoll(cs, (uchar)aa, strlen(aa), (uchar)bb, strlen(bb));
5334	}
5335
5336	static int cmp_table_names(LEX_CSTRING * const a, LEX_CSTRING const *b)
5337	{
5338	return my_strnncoll(&my_charset_bin, (uchar)((a)->str), (*a)->length,
5339	(uchar)((b)->str), (*b)->length);
5340	}
5341
5342	#ifndef DBUG_OFF
5343	static int cmp_table_names_desc(LEX_CSTRING * const a, LEX_CSTRING const *b)
5344	{
5345	return -cmp_table_names(a, b);
5346	}
5347	#endif
5348
5349	}
5350
5351	Discovered_table_list::Discovered_table_list(THD *thd_arg,
5352	Dynamic_array<LEX_CSTRING> tables_arg,
5353	const LEX_CSTRING *wild_arg) :
5354	thd(thd_arg), with_temps(false), tables(tables_arg)
5355	{
5356	if (wild_arg->str && wild_arg->str[`0`])
5357	{
5358	wild= wild_arg->str;
5359	wend= wild + wild_arg->length;
5360	}
5361	else
5362	wild= `0`;
5363	}
5364
5365	bool Discovered_table_list::add_table(const char *tname, size_t tlen)
5366	{
5367	/*
5368	TODO Check with_temps and filter out temp tables.
5369	Implement the check, when we'll have at least one affected engine (with
5370	custom discover_table_names() method, that calls add_table() directly).
5371	Note: avoid comparing the same name twice (here and in add_file).
5372	*/
5373	if (wild && my_wildcmp(table_alias_charset, tname, tname + tlen, wild, wend,
5374	wild_prefix, wild_one, wild_many))
5375	return `0`;
5376
5377	LEX_CSTRING *name= thd->make_clex_string(tname, tlen);
5378	if (!name \|\| tables->append(name))
5379	return `1`;
5380	return `0`;
5381	}
5382
5383	bool Discovered_table_list::add_file(const char *fname)
5384	{
5385	bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == `0`;
5386
5387	if (is_temp && !with_temps)
5388	return `0`;
5389
5390	char tname[SAFE_NAME_LEN + `1`];
5391	size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp);
5392	return add_table(tname, tlen);
5393	}
5394
5395
5396	void Discovered_table_list::sort()
5397	{
5398	tables->sort(cmp_table_names);
5399	}
5400
5401
5402	#ifndef DBUG_OFF
5403	void Discovered_table_list::sort_desc()
5404	{
5405	tables->sort(cmp_table_names_desc);
5406	}
5407	#endif
5408
5409
5410	void Discovered_table_list::remove_duplicates()
5411	{
5412	LEX_CSTRING **src= tables->front();
5413	LEX_CSTRING **dst= src;
5414	sort();
5415	while (++dst <= tables->back())
5416	{
5417	LEX_CSTRING s= src, d= dst;
5418	DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= `0`);
5419	if ((s->length != d->length \|\| strncmp(s->str, d->str, d->length)))
5420	{
5421	src++;
5422	if (src != dst)
5423	src= dst;
5424	}
5425	}
5426	tables->elements(src - tables->front() + `1`);
5427	}
5428
5429	struct st_discover_names_args
5430	{
5431	LEX_CSTRING *db;
5432	MY_DIR *dirp;
5433	Discovered_table_list *result;
5434	uint possible_duplicates;
5435	};
5436
5437	static my_bool discover_names(THD *thd, plugin_ref plugin,
5438	void *arg)
5439	{
5440	st_discover_names_args args= (st_discover_names_args )arg;
5441	handlerton *ht= plugin_hton(plugin);
5442
5443	if (ht->state == SHOW_OPTION_YES && ht->discover_table_names)
5444	{
5445	size_t old_elements= args->result->tables->elements();
5446	if (ht->discover_table_names(ht, args->db, args->dirp, args->result))
5447	return `1`;
5448
5449	/*
5450	hton_ext_based_table_discovery never discovers a table that has
5451	a corresponding .frm file; but custom engine discover methods might
5452	*/
5453	if (ht->discover_table_names != hton_ext_based_table_discovery)
5454	args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements);
5455	}
5456
5457	return `0`;
5458	}
5459
5460	/**
5461	Return the list of tables
5462
5463	@param thd
5464	@param db database to look into
5465	@param dirp list of files in this database (as returned by my_dir())
5466	@param result the object to return the list of files in
5467	@param reusable if true, on return, 'dirp' will be a valid list of all
5468	non-table files. If false, discovery will work much faster,
5469	but it will leave 'dirp' corrupted and completely unusable,
5470	only good for my_dirend().
5471
5472	Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true
5473	for DROP DATABASE (as it needs to know and delete non-table files).
5474	*/
5475
5476	int ha_discover_table_names(THD thd, LEX_CSTRING db, MY_DIR *dirp,
5477	Discovered_table_list result, bool* reusable)
5478	{
5479	int error;
5480	DBUG_ENTER("ha_discover_table_names");
5481
5482	if (engines_with_discover_file_names == `0` && !reusable)
5483	{
5484	st_discover_names_args args= {db, NULL, result, `0`};
5485	error= ext_table_discovery_simple(dirp, result) \|\|
5486	plugin_foreach(thd, discover_names,
5487	MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5488	}
5489	else
5490	{
5491	st_discover_names_args args= {db, dirp, result, `0`};
5492
5493	/ extension_based_table_discovery relies on dirp being sorted /
5494	my_qsort(dirp->dir_entry, dirp->number_of_files,
5495	sizeof(FILEINFO), cmp_file_names);
5496
5497	error= extension_based_table_discovery(dirp, reg_ext, result) \|\|
5498	plugin_foreach(thd, discover_names,
5499	MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5500	if (args.possible_duplicates > `0`)
5501	result->remove_duplicates();
5502	}
5503
5504	DBUG_RETURN(error);
5505	}
5506
5507
5508	/*
5509	int handler::pre_read_multi_range_first(KEY_MULTI_RANGE found_range_p,
5510	KEY_MULTI_RANGE ranges,*
5511	uint range_count,
5512	bool sorted, HANDLER_BUFFER buffer,*
5513	bool use_parallel)
5514	{
5515	int result;
5516	DBUG_ENTER("handler::pre_read_multi_range_first");
5517	result = pre_read_range_first(ranges->start_key.keypart_map ?
5518	&ranges->start_key : 0,
5519	ranges->end_key.keypart_map ?
5520	&ranges->end_key : 0,
5521	test(ranges->range_flag & EQ_RANGE),
5522	sorted,
5523	use_parallel);
5524	DBUG_RETURN(result);
5525	}
5526	*/
5527
5528
5529	/**
5530	Read first row between two ranges.
5531	Store ranges for future calls to read_range_next.
5532
5533	@param start_key Start key. Is 0 if no min range
5534	@param end_key End key. Is 0 if no max range
5535	@param eq_range_arg Set to 1 if start_key == end_key
5536	@param sorted Set to 1 if result should be sorted per key
5537
5538	@note
5539	Record is read into table->record[0]
5540
5541	@retval
5542	0 Found row
5543	@retval
5544	HA_ERR_END_OF_FILE No rows in range
5545	@retval
5546	\# Error code
5547	*/
5548	int handler::read_range_first(const key_range *start_key,
5549	const key_range *end_key,
5550	bool eq_range_arg, bool sorted)
5551	{
5552	int result;
5553	DBUG_ENTER("handler::read_range_first");
5554
5555	eq_range= eq_range_arg;
5556	set_end_range(end_key);
5557	range_key_part= table->key_info[active_index].key_part;
5558
5559	if (!start_key) // Read first record
5560	result= ha_index_first(table->record[`0`]);
5561	else
5562	result= ha_index_read_map(table->record[`0`],
5563	start_key->key,
5564	start_key->keypart_map,
5565	start_key->flag);
5566	if (result)
5567	DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
5568	? HA_ERR_END_OF_FILE
5569	: result);
5570
5571	if (compare_key(end_range) <= `0`)
5572	{
5573	DBUG_RETURN(`0`);
5574	}
5575	else
5576	{
5577	/*
5578	The last read row does not fall in the range. So request
5579	storage engine to release row lock if possible.
5580	*/
5581	unlock_row();
5582	DBUG_RETURN(HA_ERR_END_OF_FILE);
5583	}
5584	}
5585
5586
5587	/**
5588	Read next row between two ranges.
5589
5590	@note
5591	Record is read into table->record[0]
5592
5593	@retval
5594	0 Found row
5595	@retval
5596	HA_ERR_END_OF_FILE No rows in range
5597	@retval
5598	\# Error code
5599	*/
5600	int handler::read_range_next()
5601	{
5602	int result;
5603	DBUG_ENTER("handler::read_range_next");
5604
5605	if (eq_range)
5606	{
5607	/ We trust that index_next_same always gives a row in range /
5608	DBUG_RETURN(ha_index_next_same(table->record[`0`],
5609	end_range->key,
5610	end_range->length));
5611	}
5612	result= ha_index_next(table->record[`0`]);
5613	if (result)
5614	DBUG_RETURN(result);
5615
5616	if (compare_key(end_range) <= `0`)
5617	{
5618	DBUG_RETURN(`0`);
5619	}
5620	else
5621	{
5622	/*
5623	The last read row does not fall in the range. So request
5624	storage engine to release row lock if possible.
5625	*/
5626	unlock_row();
5627	DBUG_RETURN(HA_ERR_END_OF_FILE);
5628	}
5629	}
5630
5631
5632	void handler::set_end_range(const key_range *end_key)
5633	{
5634	end_range= `0`;
5635	if (end_key)
5636	{
5637	end_range= &save_end_range;
5638	save_end_range = *end_key;
5639	key_compare_result_on_equal=
5640	((end_key->flag == HA_READ_BEFORE_KEY) ? `1` :
5641	(end_key->flag == HA_READ_AFTER_KEY) ? -`1` : `0`);
5642	}
5643	}
5644
5645
5646	/**
5647	Compare if found key (in row) is over max-value.
5648
5649	@param range range to compare to row. May be 0 for no range
5650
5651	@see also
5652	key.cc::key_cmp()
5653
5654	@return
5655	The return value is SIGN(key_in_row - range_key):
5656
5657	- 0 : Key is equal to range or 'range' == 0 (no range)
5658	- -1 : Key is less than range
5659	- 1 : Key is larger than range
5660	*/
5661	int handler::compare_key(key_range *range)
5662	{
5663	int cmp;
5664	if (!range \|\| in_range_check_pushed_down)
5665	return `0`; // No max range
5666	cmp= key_cmp(range_key_part, range->key, range->length);
5667	if (!cmp)
5668	cmp= key_compare_result_on_equal;
5669	return cmp;
5670	}
5671
5672
5673	/*
5674	Same as compare_key() but doesn't check have in_range_check_pushed_down.
5675	This is used by index condition pushdown implementation.
5676	*/
5677
5678	int handler::compare_key2(key_range range) const*
5679	{
5680	int cmp;
5681	if (!range)
5682	return `0`; // no max range
5683	cmp= key_cmp(range_key_part, range->key, range->length);
5684	if (!cmp)
5685	cmp= key_compare_result_on_equal;
5686	return cmp;
5687	}
5688
5689
5690	/**
5691	ICP callback - to be called by an engine to check the pushed condition
5692	*/
5693	extern "C" enum icp_result handler_index_cond_check(void* h_arg)
5694	{
5695	handler h= (handler)h_arg;
5696	THD *thd= h->table->in_use;
5697	enum icp_result res;
5698
5699	enum thd_kill_levels abort_at= h->has_transactions() ?
5700	THD_ABORT_SOFTLY : THD_ABORT_ASAP;
5701	if (thd_kill_level(thd) > abort_at)
5702	return ICP_ABORTED_BY_USER;
5703
5704	if (h->end_range && h->compare_key2(h->end_range) > `0`)
5705	return ICP_OUT_OF_RANGE;
5706	h->increment_statistics(&SSV::ha_icp_attempts);
5707	if ((res= h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH) ==
5708	ICP_MATCH)
5709	h->increment_statistics(&SSV::ha_icp_match);
5710	return res;
5711	}
5712
5713	int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
5714	key_part_map keypart_map,
5715	enum ha_rkey_function find_flag)
5716	{
5717	int error, UNINIT_VAR(error1);
5718
5719	error= ha_index_init(index, `0`);
5720	if (likely(!error))
5721	{
5722	error= index_read_map(buf, key, keypart_map, find_flag);
5723	error1= ha_index_end();
5724	}
5725	return error ? error : error1;
5726	}
5727
5728
5729	/**
5730	Returns a list of all known extensions.
5731
5732	No mutexes, worst case race is a minor surplus memory allocation
5733	We have to recreate the extension map if mysqld is restarted (for example
5734	within libmysqld)
5735
5736	@retval
5737	pointer pointer to TYPELIB structure
5738	*/
5739	static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
5740	void *arg)
5741	{
5742	List<char> found_exts= (List<char> ) arg;
5743	handlerton *hton= plugin_hton(plugin);
5744	List_iterator_fast<char> it(*found_exts);
5745	const char *ext, old_ext;
5746
5747	for (ext= hton->tablefile_extensions; *ext; ext++)
5748	{
5749	while ((old_ext= it ++))
5750	{
5751	if (!strcmp(old_ext, *ext))
5752	break;
5753	}
5754	if (!old_ext)
5755	found_exts->push_back((char ) ext);
5756
5757	it.rewind();
5758	}
5759	return FALSE;
5760	}
5761
5762	TYPELIB ha_known_exts(void*)
5763	{
5764	if (!known_extensions.type_names \|\| mysys_usage_id != known_extensions_id)
5765	{
5766	List<char> found_exts;
5767	const char *ext, old_ext;
5768
5769	known_extensions_id= mysys_usage_id;
5770	found_exts.push_back((char*) TRG_EXT);
5771	found_exts.push_back((char*) TRN_EXT);
5772
5773	plugin_foreach(NULL, exts_handlerton,
5774	MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
5775
5776	ext= (const char ) my_once_alloc(sizeof*(char* )
5777	(found_exts.elements+`1`),
5778	MYF(MY_WME \| MY_FAE));
5779
5780	DBUG_ASSERT(ext != `0`);
5781	known_extensions.count= found_exts.elements;
5782	known_extensions.type_names= ext;
5783
5784	List_iterator_fast<char> it(found_exts);
5785	while ((old_ext= it ++))
5786	*ext++= old_ext;
5787	*ext= `0`;
5788	}
5789	return &known_extensions;
5790	}
5791
5792
5793	static bool stat_print(THD thd, const* char *type, size_t type_len,
5794	const char *file, size_t file_len,
5795	const char *status, size_t status_len)
5796	{
5797	Protocol *protocol= thd->protocol;
5798	protocol->prepare_for_resend();
5799	protocol->store(type, type_len, system_charset_info);
5800	protocol->store(file, file_len, system_charset_info);
5801	protocol->store(status, status_len, system_charset_info);
5802	if (protocol->write())
5803	return TRUE;
5804	return FALSE;
5805	}
5806
5807
5808	static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
5809	void *arg)
5810	{
5811	enum ha_stat_type stat= (enum* ha_stat_type *) arg;
5812	handlerton *hton= plugin_hton(plugin);
5813	if (hton->state == SHOW_OPTION_YES && hton->show_status &&
5814	hton->show_status(hton, thd, stat_print, stat))
5815	return TRUE;
5816	return FALSE;
5817	}
5818
5819	bool ha_show_status(THD thd, handlerton db_type, enum ha_stat_type stat)
5820	{
5821	List<Item> field_list;
5822	Protocol *protocol= thd->protocol;
5823	MEM_ROOT *mem_root= thd->mem_root;
5824	bool result;
5825
5826	field_list.push_back(new (mem_root) Item_empty_string (thd, "Type", `10`),
5827	mem_root);
5828	field_list.push_back(new (mem_root)
5829	Item_empty_string (thd, "Name", FN_REFLEN), mem_root);
5830	field_list.push_back(new (mem_root)
5831	Item_empty_string (thd, "Status", `10`),
5832	mem_root);
5833
5834	if (protocol->send_result_set_metadata(&field_list,
5835	Protocol::SEND_NUM_ROWS \| Protocol::SEND_EOF))
5836	return TRUE;
5837
5838	if (db_type == NULL)
5839	{
5840	result= plugin_foreach(thd, showstat_handlerton,
5841	MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
5842	}
5843	else
5844	{
5845	if (db_type->state != SHOW_OPTION_YES)
5846	{
5847	const LEX_CSTRING *name= hton_name(db_type);
5848	result= stat_print(thd, name->str, name->length,
5849	"", `0`, "DISABLED", `8`) ? `1` : `0`;
5850	}
5851	else
5852	{
5853	result= db_type->show_status &&
5854	db_type->show_status(db_type, thd, stat_print, stat) ? `1` : `0`;
5855	}
5856	}
5857
5858	/*
5859	We also check thd->is_error() as Innodb may return 0 even if
5860	there was an error.
5861	*/
5862	if (likely(!result && !thd->is_error()))
5863	my_eof(thd);
5864	else if (!thd->is_error())
5865	my_error(ER_GET_ERRNO, MYF(`0`), errno, hton_name(db_type)->str);
5866	return result;
5867	}
5868
5869	/*
5870	Function to check if the conditions for row-based binlogging is
5871	correct for the table.
5872
5873	A row in the given table should be replicated if:
5874	- It's not called by partition engine
5875	- Row-based replication is enabled in the current thread
5876	- The binlog is enabled
5877	- It is not a temporary table
5878	- The binary log is open
5879	- The database the table resides in shall be binlogged (binlog__db rules)*
5880	- table is not mysql.event
5881
5882	RETURN VALUE
5883	0 No binary logging in row format
5884	1 Row needs to be logged
5885	*/
5886
5887	bool handler::check_table_binlog_row_based(bool binlog_row)
5888	{
5889	if (table->versioned(VERS_TRX_ID))
5890	return false;
5891	if (unlikely((table->in_use->variables.sql_log_bin_off)))
5892	return `0`; / Called by partitioning engine /
5893	if (unlikely((!check_table_binlog_row_based_done)))
5894	{
5895	check_table_binlog_row_based_done= `1`;
5896	check_table_binlog_row_based_result=
5897	check_table_binlog_row_based_internal(binlog_row);
5898	}
5899	return check_table_binlog_row_based_result;
5900	}
5901
5902	bool handler::check_table_binlog_row_based_internal(bool binlog_row)
5903	{
5904	THD *thd= table->in_use;
5905
5906	return (table->s->can_do_row_logging &&
5907	thd->is_current_stmt_binlog_format_row() &&
5908	/*
5909	Wsrep partially enables binary logging if it have not been
5910	explicitly turned on. As a result we return 'true' if we are in
5911	wsrep binlog emulation mode and the current thread is not a wsrep
5912	applier or replayer thread. This decision is not affected by
5913	@@sql_log_bin as we want the events to make into the binlog
5914	cache only to filter them later before they make into binary log
5915	file.
5916
5917	However, we do return 'false' if binary logging was temporarily
5918	turned off (see tmp_disable_binlog(A)).
5919
5920	Otherwise, return 'true' if binary logging is on.
5921	*/
5922	IF_WSREP(((WSREP_EMULATE_BINLOG(thd) &&
5923	(thd->wsrep_exec_mode != REPL_RECV)) \|\|
5924	((WSREP(thd) \|\|
5925	(thd->variables.option_bits & OPTION_BIN_LOG)) &&
5926	mysql_bin_log.is_open())),
5927	(thd->variables.option_bits & OPTION_BIN_LOG) &&
5928	mysql_bin_log.is_open()));
5929	}
5930
5931
5932	/* @brief*
5933	Write table maps for all (manually or automatically) locked tables
5934	to the binary log. Also, if binlog_annotate_row_events is ON,
5935	write Annotate_rows event before the first table map.
5936
5937	SYNOPSIS
5938	write_locked_table_maps()
5939	thd Pointer to THD structure
5940
5941	DESCRIPTION
5942	This function will generate and write table maps for all tables
5943	that are locked by the thread 'thd'.
5944
5945	RETURN VALUE
5946	0 All OK
5947	1 Failed to write all table maps
5948
5949	SEE ALSO
5950	THD::lock
5951	*/
5952
5953	static int write_locked_table_maps(THD *thd)
5954	{
5955	DBUG_ENTER("write_locked_table_maps");
5956	DBUG_PRINT("enter", ("thd:%p thd->lock:%p "
5957	"thd->extra_lock: %p",
5958	thd, thd->lock, thd->extra_lock));
5959
5960	DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
5961
5962	MYSQL_LOCK *locks[`2`];
5963	locks[`0`]= thd->extra_lock;
5964	locks[`1`]= thd->lock;
5965	my_bool with_annotate= thd->variables.binlog_annotate_row_events &&
5966	thd->query() && thd->query_length();
5967
5968	for (uint i= `0` ; i < sizeof(locks)/sizeof(*locks) ; ++i )
5969	{
5970	MYSQL_LOCK const *const lock= locks[i];
5971	if (lock == NULL)
5972	continue;
5973
5974	TABLE **const end_ptr= lock->table + lock->table_count;
5975	for (TABLE **table_ptr= lock->table ;
5976	table_ptr != end_ptr ;
5977	++table_ptr)
5978	{
5979	TABLE *const table= *table_ptr;
5980	DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
5981	if (table->current_lock == F_WRLCK &&
5982	table->file->check_table_binlog_row_based(`0`))
5983	{
5984	/*
5985	We need to have a transactional behavior for SQLCOM_CREATE_TABLE
5986	(e.g. CREATE TABLE... SELECT FROM TABLE) in order to keep a*
5987	compatible behavior with the STMT based replication even when
5988	the table is not transactional. In other words, if the operation
5989	fails while executing the insert phase nothing is written to the
5990	binlog.
5991
5992	Note that at this point, we check the type of a set of tables to
5993	create the table map events. In the function binlog_log_row(),
5994	which calls the current function, we check the type of the table
5995	of the current row.
5996	*/
5997	bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE \|\|
5998	table->file->has_transactions();
5999	int const error= thd->binlog_write_table_map(table, has_trans,
6000	&with_annotate);
6001	/*
6002	If an error occurs, it is the responsibility of the caller to
6003	roll back the transaction.
6004	*/
6005	if (unlikely(error))
6006	DBUG_RETURN(`1`);
6007	}
6008	}
6009	}
6010	DBUG_RETURN(`0`);
6011	}
6012
6013
6014	static int binlog_log_row_internal(TABLE* table,
6015	const uchar *before_record,
6016	const uchar *after_record,
6017	Log_func *log_func)
6018	{
6019	bool error= `0`;
6020	THD *const thd= table->in_use;
6021
6022	/*
6023	If there are no table maps written to the binary log, this is
6024	the first row handled in this statement. In that case, we need
6025	to write table maps for all locked tables to the binary log.
6026	*/
6027	if (likely(!(error= ((thd->get_binlog_table_maps() == `0` &&
6028	write_locked_table_maps(thd))))))
6029	{
6030	/*
6031	We need to have a transactional behavior for SQLCOM_CREATE_TABLE
6032	(i.e. CREATE TABLE... SELECT FROM TABLE) in order to keep a*
6033	compatible behavior with the STMT based replication even when
6034	the table is not transactional. In other words, if the operation
6035	fails while executing the insert phase nothing is written to the
6036	binlog.
6037	*/
6038	bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE \|\|
6039	table->file->has_transactions();
6040	error= (*log_func)(thd, table, has_trans, before_record, after_record);
6041	}
6042	return error ? HA_ERR_RBR_LOGGING_FAILED : `0`;
6043	}
6044
6045	int binlog_log_row(TABLE* table, const uchar *before_record,
6046	const uchar after_record, Log_func log_func)
6047	{
6048	#ifdef WITH_WSREP
6049	THD *const thd= table->in_use;
6050
6051	/ only InnoDB tables will be replicated through binlog emulation /
6052	if ((WSREP_EMULATE_BINLOG(thd) &&
6053	table->file->partition_ht()->db_type != DB_TYPE_INNODB) \|\|
6054	(thd->wsrep_ignore_table == true))
6055	return `0`;
6056
6057	/ enforce wsrep_max_ws_rows /
6058	if (WSREP(thd) && table->s->tmp_table == NO_TMP_TABLE)
6059	{
6060	thd->wsrep_affected_rows++;
6061	if (wsrep_max_ws_rows &&
6062	thd->wsrep_exec_mode != REPL_RECV &&
6063	thd->wsrep_affected_rows > wsrep_max_ws_rows)
6064	{
6065	trans_rollback_stmt(thd) \|\| trans_rollback(thd);
6066	my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(`0`));
6067	return ER_ERROR_DURING_COMMIT;
6068	}
6069	}
6070	#endif
6071
6072	if (!table->file->check_table_binlog_row_based(`1`))
6073	return `0`;
6074	return binlog_log_row_internal(table, before_record, after_record, log_func);
6075	}
6076
6077
6078	int handler::ha_external_lock(THD thd, int* lock_type)
6079	{
6080	int error;
6081	DBUG_ENTER("handler::ha_external_lock");
6082	/*
6083	Whether this is lock or unlock, this should be true, and is to verify that
6084	if get_auto_increment() was called (thus may have reserved intervals or
6085	taken a table lock), ha_release_auto_increment() was too.
6086	*/
6087	DBUG_ASSERT(next_insert_id == `0`);
6088	/ Consecutive calls for lock without unlocking in between is not allowed /
6089	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
6090	((lock_type != F_UNLCK && m_lock_type == F_UNLCK) \|\|
6091	lock_type == F_UNLCK));
6092	/ SQL HANDLER call locks/unlock while scanning (RND/INDEX). /
6093	DBUG_ASSERT(inited == NONE \|\| table->open_by_handler);
6094
6095	if (MYSQL_HANDLER_RDLOCK_START_ENABLED() \|\|
6096	MYSQL_HANDLER_WRLOCK_START_ENABLED() \|\|
6097	MYSQL_HANDLER_UNLOCK_START_ENABLED())
6098	{
6099	if (lock_type == F_RDLCK)
6100	{
6101	MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
6102	table_share->table_name.str);
6103	}
6104	else if (lock_type == F_WRLCK)
6105	{
6106	MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
6107	table_share->table_name.str);
6108	}
6109	else if (lock_type == F_UNLCK)
6110	{
6111	MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
6112	table_share->table_name.str);
6113	}
6114	}
6115
6116	/*
6117	We cache the table flags if the locking succeeded. Otherwise, we
6118	keep them as they were when they were fetched in ha_open().
6119	*/
6120	MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
6121	{ error= external_lock(thd, lock_type); })
6122
6123	DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;);
6124
6125	if (likely(error == `0` \|\| lock_type == F_UNLCK))
6126	{
6127	m_lock_type= lock_type;
6128	cached_table_flags= table_flags();
6129	if (table_share->tmp_table == NO_TMP_TABLE)
6130	mysql_audit_external_lock(thd, table_share, lock_type);
6131	}
6132
6133	if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() \|\|
6134	MYSQL_HANDLER_WRLOCK_DONE_ENABLED() \|\|
6135	MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
6136	{
6137	if (lock_type == F_RDLCK)
6138	{
6139	MYSQL_HANDLER_RDLOCK_DONE(error);
6140	}
6141	else if (lock_type == F_WRLCK)
6142	{
6143	MYSQL_HANDLER_WRLOCK_DONE(error);
6144	}
6145	else if (lock_type == F_UNLCK)
6146	{
6147	MYSQL_HANDLER_UNLOCK_DONE(error);
6148	}
6149	}
6150	DBUG_RETURN(error);
6151	}
6152
6153
6154	/* @brief*
6155	Check handler usage and reset state of file to after 'open'
6156	*/
6157	int handler::ha_reset()
6158	{
6159	DBUG_ENTER("ha_reset");
6160	/ Check that we have called all proper deallocation functions /
6161	DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
6162	table->s->column_bitmap_size ==
6163	(uchar*) table->def_write_set.bitmap);
6164	DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
6165	DBUG_ASSERT(!table->file->keyread_enabled());
6166	/ ensure that ha_index_end / ha_rnd_end has been called /
6167	DBUG_ASSERT(inited == NONE);
6168	/ reset the bitmaps to point to defaults /
6169	table->default_column_bitmaps();
6170	pushed_cond= NULL;
6171	tracker= NULL;
6172	mark_trx_read_write_done= check_table_binlog_row_based_done=
6173	check_table_binlog_row_based_result= `0`;
6174	/ Reset information about pushed engine conditions /
6175	cancel_pushed_idx_cond();
6176	/ Reset information about pushed index conditions /
6177	clear_top_table_fields();
6178	DBUG_RETURN(reset());
6179	}
6180
6181
6182	int handler::ha_write_row(uchar *buf)
6183	{
6184	int error;
6185	Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
6186	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
6187	m_lock_type == F_WRLCK);
6188	DBUG_ENTER("handler::ha_write_row");
6189	DEBUG_SYNC_C("ha_write_row_start");
6190
6191	MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
6192	mark_trx_read_write();
6193	increment_statistics(&SSV::ha_write_count);
6194
6195	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, `0`,
6196	{ error= write_row(buf); })
6197
6198	MYSQL_INSERT_ROW_DONE(error);
6199	if (likely(!error) && !row_already_logged)
6200	{
6201	rows_changed++;
6202	error= binlog_log_row(table, `0`, buf, log_func);
6203	}
6204	DEBUG_SYNC_C("ha_write_row_end");
6205	DBUG_RETURN(error);
6206	}
6207
6208
6209	int handler::ha_update_row(const uchar old_data, const* uchar *new_data)
6210	{
6211	int error;
6212	Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
6213	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
6214	m_lock_type == F_WRLCK);
6215
6216	/*
6217	Some storage engines require that the new record is in record[0]
6218	(and the old record is in record[1]).
6219	*/
6220	DBUG_ASSERT(new_data == table->record[`0`]);
6221	DBUG_ASSERT(old_data == table->record[`1`]);
6222
6223	MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6224	mark_trx_read_write();
6225	increment_statistics(&SSV::ha_update_count);
6226
6227	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_UPDATE_ROW, active_index, `0`,
6228	{ error= update_row(old_data, new_data);})
6229
6230	MYSQL_UPDATE_ROW_DONE(error);
6231	if (likely(!error) && !row_already_logged)
6232	{
6233	rows_changed++;
6234	error= binlog_log_row(table, old_data, new_data, log_func);
6235	}
6236	return error;
6237	}
6238
6239	/*
6240	Update first row. Only used by sequence tables
6241	*/
6242
6243	int handler::update_first_row(uchar *new_data)
6244	{
6245	int error;
6246	if (likely(!(error= ha_rnd_init(`1`))))
6247	{
6248	int end_error;
6249	if (likely(!(error= ha_rnd_next(table->record[`1`]))))
6250	{
6251	/*
6252	We have to do the memcmp as otherwise we may get error 169 from InnoDB
6253	*/
6254	if (memcmp(new_data, table->record[`1`], table->s->reclength))
6255	error= update_row(table->record[`1`], new_data);
6256	}
6257	end_error= ha_rnd_end();
6258	if (likely(!error))
6259	error= end_error;
6260	/ Logging would be wrong if update_row works but ha_rnd_end fails /
6261	DBUG_ASSERT(!end_error \|\| error != `0`);
6262	}
6263	return error;
6264	}
6265
6266
6267	int handler::ha_delete_row(const uchar *buf)
6268	{
6269	int error;
6270	Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
6271	DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE \|\|
6272	m_lock_type == F_WRLCK);
6273	/*
6274	Normally table->record[0] is used, but sometimes table->record[1] is used.
6275	*/
6276	DBUG_ASSERT(buf == table->record[`0`] \|\|
6277	buf == table->record[`1`]);
6278
6279	MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6280	mark_trx_read_write();
6281	increment_statistics(&SSV::ha_delete_count);
6282
6283	TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_DELETE_ROW, active_index, `0`,
6284	{ error= delete_row(buf);})
6285	MYSQL_DELETE_ROW_DONE(error);
6286	if (likely(!error))
6287	{
6288	rows_changed++;
6289	error= binlog_log_row(table, buf, `0`, log_func);
6290	}
6291	return error;
6292	}
6293
6294
6295	/**
6296	Execute a direct update request. A direct update request updates all
6297	qualified rows in a single operation, rather than one row at a time.
6298	In a Spider cluster the direct update operation is pushed down to the
6299	child levels of the cluster.
6300
6301	Note that this can't be used in case of statment logging
6302
6303	@param update_rows Number of updated rows.
6304
6305	@retval 0 Success.
6306	@retval != 0 Failure.
6307	*/
6308
6309	int handler::ha_direct_update_rows(ha_rows *update_rows)
6310	{
6311	int error;
6312
6313	MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6314	mark_trx_read_write();
6315
6316	error = direct_update_rows(update_rows);
6317	MYSQL_UPDATE_ROW_DONE(error);
6318	return error;
6319	}
6320
6321
6322	/**
6323	Execute a direct delete request. A direct delete request deletes all
6324	qualified rows in a single operation, rather than one row at a time.
6325	In a Spider cluster the direct delete operation is pushed down to the
6326	child levels of the cluster.
6327
6328	@param delete_rows Number of deleted rows.
6329
6330	@retval 0 Success.
6331	@retval != 0 Failure.
6332	*/
6333
6334	int handler::ha_direct_delete_rows(ha_rows *delete_rows)
6335	{
6336	int error;
6337	/ Ensure we are not using binlog row /
6338	DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row());
6339
6340	MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6341	mark_trx_read_write();
6342
6343	error = direct_delete_rows(delete_rows);
6344	MYSQL_DELETE_ROW_DONE(error);
6345	return error;
6346	}
6347
6348
6349	/* @brief*
6350	use_hidden_primary_key() is called in case of an update/delete when
6351	(table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
6352	but we don't have a primary key
6353	*/
6354	void handler::use_hidden_primary_key()
6355	{
6356	/ fallback to use all columns in the table to identify row /
6357	table->column_bitmaps_set(&table->s->all_set, table->write_set);
6358	}
6359
6360
6361	/**
6362	Get an initialized ha_share.
6363
6364	@return Initialized ha_share
6365	@retval NULL ha_share is not yet initialized.
6366	@retval != NULL previous initialized ha_share.
6367
6368	@note
6369	If not a temp table, then LOCK_ha_data must be held.
6370	*/
6371
6372	Handler_share *handler::get_ha_share_ptr()
6373	{
6374	DBUG_ENTER("handler::get_ha_share_ptr");
6375	DBUG_ASSERT(ha_share);
6376	DBUG_ASSERT(table_share);
6377
6378	#ifndef DBUG_OFF
6379	if (table_share->tmp_table == NO_TMP_TABLE)
6380	mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
6381	#endif
6382
6383	DBUG_RETURN(*ha_share);
6384	}
6385
6386
6387	/**
6388	Set ha_share to be used by all instances of the same table/partition.
6389
6390	@param ha_share Handler_share to be shared.
6391
6392	@note
6393	If not a temp table, then LOCK_ha_data must be held.
6394	*/
6395
6396	void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
6397	{
6398	DBUG_ENTER("handler::set_ha_share_ptr");
6399	DBUG_ASSERT(ha_share);
6400	#ifndef DBUG_OFF
6401	if (table_share->tmp_table == NO_TMP_TABLE)
6402	mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
6403	#endif
6404
6405	*ha_share= arg_ha_share;
6406	DBUG_VOID_RETURN;
6407	}
6408
6409
6410	/**
6411	Take a lock for protecting shared handler data.
6412	*/
6413
6414	void handler::lock_shared_ha_data()
6415	{
6416	DBUG_ASSERT(table_share);
6417	if (table_share->tmp_table == NO_TMP_TABLE)
6418	mysql_mutex_lock(&table_share->LOCK_ha_data);
6419	}
6420
6421
6422	/**
6423	Release lock for protecting ha_share.
6424	*/
6425
6426	void handler::unlock_shared_ha_data()
6427	{
6428	DBUG_ASSERT(table_share);
6429	if (table_share->tmp_table == NO_TMP_TABLE)
6430	mysql_mutex_unlock(&table_share->LOCK_ha_data);
6431	}
6432
6433	/* @brief*
6434	Dummy function which accept information about log files which is not need
6435	by handlers
6436	*/
6437	void signal_log_not_needed(struct handlerton, char *log_file)
6438	{
6439	DBUG_ENTER("signal_log_not_needed");
6440	DBUG_PRINT("enter", ("logfile '%s'", log_file));
6441	DBUG_VOID_RETURN;
6442	}
6443
6444	void handler::set_lock_type(enum thr_lock_type lock)
6445	{
6446	table->reginfo.lock_type= lock;
6447	}
6448
6449	#ifdef WITH_WSREP
6450	/**
6451	@details
6452	This function makes the storage engine to force the victim transaction
6453	to abort. Currently, only innodb has this functionality, but any SE
6454	implementing the wsrep API should provide this service to support
6455	multi-master operation.
6456
6457	@note Aborting the transaction does NOT end it, it still has to
6458	be rolled back with hton->rollback().
6459
6460	@note It is safe to abort from one thread (bf_thd) the transaction,
6461	running in another thread (victim_thd), because InnoDB's lock_sys and
6462	trx_mutex guarantee the necessary protection. However, its not safe
6463	to access victim_thd->transaction, because it's not protected from
6464	concurrent accesses. And it's an overkill to take LOCK_plugin and
6465	iterate the whole installed_htons[] array every time.
6466
6467	@param bf_thd brute force THD asking for the abort
6468	@param victim_thd victim THD to be aborted
6469
6470	@return
6471	always 0
6472	*/
6473
6474	int ha_abort_transaction(THD bf_thd, THD victim_thd, my_bool signal)
6475	{
6476	DBUG_ENTER("ha_abort_transaction");
6477	if (!WSREP(bf_thd) &&
6478	!(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
6479	bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
6480	DBUG_RETURN(`0`);
6481	}
6482
6483	handlerton *hton= installed_htons[DB_TYPE_INNODB];
6484	if (hton && hton->abort_transaction)
6485	{
6486	hton->abort_transaction(hton, bf_thd, victim_thd, signal);
6487	}
6488	else
6489	{
6490	WSREP_WARN("Cannot abort InnoDB transaction");
6491	}
6492
6493	DBUG_RETURN(`0`);
6494	}
6495
6496	void ha_fake_trx_id(THD *thd)
6497	{
6498	DBUG_ENTER("ha_fake_trx_id");
6499
6500	bool no_fake_trx_id= true;
6501
6502	if (!WSREP(thd))
6503	{
6504	DBUG_VOID_RETURN;
6505	}
6506
6507	/ Try statement transaction if standard one is not set. /
6508	THD_TRANS *trans= (thd->transaction.all.ha_list) ? &thd->transaction.all :
6509	&thd->transaction.stmt;
6510
6511	Ha_trx_info ha_info= trans->ha_list, ha_info_next;
6512
6513	for (; ha_info; ha_info= ha_info_next)
6514	{
6515	handlerton *hton= ha_info->ht();
6516	if (hton->fake_trx_id)
6517	{
6518	hton->fake_trx_id(hton, thd);
6519
6520	/ Got a fake trx id. /
6521	no_fake_trx_id= false;
6522
6523	/*
6524	We need transaction ID from just one storage engine providing
6525	fake_trx_id (which will most likely be the case).
6526	*/
6527	break;
6528	}
6529	ha_info_next= ha_info->next();
6530	}
6531
6532	if (unlikely(no_fake_trx_id))
6533	WSREP_WARN("Cannot get fake transaction ID from storage engine.");
6534
6535	DBUG_VOID_RETURN;
6536	}
6537	#endif /* WITH_WSREP */
6538
6539
6540	#ifdef TRANS_LOG_MGM_EXAMPLE_CODE
6541	/*
6542	Example of transaction log management functions based on assumption that logs
6543	placed into a directory
6544	*/
6545	#include <my_dir.h>
6546	#include <my_sys.h>
6547	int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
6548	{
6549	void *buffer;
6550	int res= `1`;
6551	struct handler_iterator iterator;
6552	struct handler_log_file_data data;
6553
6554	if (!hton->create_iterator)
6555	return `1`; / iterator creator is not supported /
6556
6557	if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
6558	HA_ITERATOR_OK)
6559	{
6560	/ error during creation of log iterator or iterator is not supported /
6561	return `1`;
6562	}
6563	while((iterator.next)(&iterator, (void**)&data) == `0`)
6564	{
6565	printf("%s\n", data.filename.str);
6566	if (data.status == HA_LOG_STATUS_FREE &&
6567	mysql_file_delete(INSTRUMENT_ME,
6568	data.filename.str, MYF(MY_WME)))
6569	goto err;
6570	}
6571	res= `0`;
6572	err:
6573	(*iterator.destroy)(&iterator);
6574	return res;
6575	}
6576
6577
6578	/*
6579	Here we should get info from handler where it save logs but here is
6580	just example, so we use constant.
6581	IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
6582	rights on it except root and it consist of directories only at lest for
6583	*nix (sorry, can't find windows-safe solution here, but it is only example).
6584	*/
6585	#define fl_dir FN_ROOTDIR
6586
6587
6588	/* @brief*
6589	Dummy function to return log status should be replaced by function which
6590	really detect the log status and check that the file is a log of this
6591	handler.
6592	*/
6593	enum log_status fl_get_log_status(char *log)
6594	{
6595	MY_STAT stat_buff;
6596	if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(`0`)))
6597	return HA_LOG_STATUS_INUSE;
6598	return HA_LOG_STATUS_NOSUCHLOG;
6599	}
6600
6601
6602	struct fl_buff
6603	{
6604	LEX_STRING *names;
6605	enum log_status *statuses;
6606	uint32 entries;
6607	uint32 current;
6608	};
6609
6610
6611	int fl_log_iterator_next(struct handler_iterator *iterator,
6612	void *iterator_object)
6613	{
6614	struct fl_buff buff= (struct* fl_buff *)iterator->buffer;
6615	struct handler_log_file_data *data=
6616	(struct handler_log_file_data *) iterator_object;
6617	if (buff->current >= buff->entries)
6618	return `1`;
6619	data->filename= buff->names[buff->current];
6620	data->status= buff->statuses[buff->current];
6621	buff->current++;
6622	return `0`;
6623	}
6624
6625
6626	void fl_log_iterator_destroy(struct handler_iterator *iterator)
6627	{
6628	my_free(iterator->buffer);
6629	}
6630
6631
6632	/* @brief*
6633	returns buffer, to be assigned in handler_iterator struct
6634	*/
6635	enum handler_create_iterator_result
6636	fl_log_iterator_buffer_init(struct handler_iterator *iterator)
6637	{
6638	MY_DIR *dirp;
6639	struct fl_buff *buff;
6640	char *name_ptr;
6641	uchar *ptr;
6642	FILEINFO *file;
6643	uint32 i;
6644
6645	/ to be able to make my_free without crash in case of error /
6646	iterator->buffer= `0`;
6647
6648	if (!(dirp = my_dir(fl_dir, MYF(MY_THREAD_SPECIFIC))))
6649	{
6650	return HA_ITERATOR_ERROR;
6651	}
6652	if ((ptr= (uchar)my_malloc(ALIGN_SIZE(sizeof*(fl_buff)) +
6653	((ALIGN_SIZE(sizeof(LEX_STRING)) +
6654	sizeof(enum log_status) +
6655	+ FN_REFLEN + `1`) *
6656	(uint) dirp->number_off_files),
6657	MYF(MY_THREAD_SPECIFIC))) == `0`)
6658	{
6659	return HA_ITERATOR_ERROR;
6660	}
6661	buff= (struct fl_buff *)ptr;
6662	buff->entries= buff->current= `0`;
6663	ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
6664	buff->names= (LEX_STRING*) (ptr);
6665	ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
6666	(uint) dirp->number_off_files));
6667	buff->statuses= (enum log_status *)(ptr);
6668	name_ptr= (char )(ptr + (sizeof(enum* log_status) *
6669	(uint) dirp->number_off_files));
6670	for (i=`0` ; i < (uint) dirp->number_off_files ; i++)
6671	{
6672	enum log_status st;
6673	file= dirp->dir_entry + i;
6674	if ((file->name[`0`] == `'.'` &&
6675	((file->name[`1`] == `'.'` && file->name[`2`] == `'\0'`) \|\|
6676	file->name[`1`] == `'\0'`)))
6677	continue;
6678	if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
6679	continue;
6680	name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
6681	FN_REFLEN, fl_dir, file->name, NullS);
6682	buff->names[buff->entries].length= (name_ptr -
6683	buff->names[buff->entries].str);
6684	buff->statuses[buff->entries]= st;
6685	buff->entries++;
6686	}
6687
6688	iterator->buffer= buff;
6689	iterator->next= &fl_log_iterator_next;
6690	iterator->destroy= &fl_log_iterator_destroy;
6691	my_dirend(dirp);
6692	return HA_ITERATOR_OK;
6693	}
6694
6695
6696	/ An example of a iterator creator /
6697	enum handler_create_iterator_result
6698	fl_create_iterator(enum handler_iterator_type type,
6699	struct handler_iterator *iterator)
6700	{
6701	switch(type) {
6702	case HA_TRANSACTLOG_ITERATOR:
6703	return fl_log_iterator_buffer_init(iterator);
6704	default:
6705	return HA_ITERATOR_UNSUPPORTED;
6706	}
6707	}
6708	#endif /TRANS_LOG_MGM_EXAMPLE_CODE/
6709
6710
6711	bool HA_CREATE_INFO::check_conflicting_charset_declarations(CHARSET_INFO *cs)
6712	{
6713	if ((used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
6714	/ DEFAULT vs explicit, or explicit vs DEFAULT /
6715	(((default_table_charset == NULL) != (cs == NULL)) \|\|
6716	/ Two different explicit character sets /
6717	(default_table_charset && cs &&
6718	!my_charset_same(default_table_charset, cs))))
6719	{
6720	my_error(ER_CONFLICTING_DECLARATIONS, MYF(`0`),
6721	"CHARACTER SET ", default_table_charset ?
6722	default_table_charset->csname : "DEFAULT",
6723	"CHARACTER SET ", cs ? cs->csname : "DEFAULT");
6724	return true;
6725	}
6726	return false;
6727	}
6728
6729	/ Remove all indexes for a given table from global index statistics /
6730
6731	static
6732	int del_global_index_stats_for_table(THD thd, uchar cache_key, size_t cache_key_length)
6733	{
6734	int res = `0`;
6735	DBUG_ENTER("del_global_index_stats_for_table");
6736
6737	mysql_mutex_lock(&LOCK_global_index_stats);
6738
6739	for (uint i= `0`; i < global_index_stats.records;)
6740	{
6741	INDEX_STATS *index_stats =
6742	(INDEX_STATS*) my_hash_element(&global_index_stats, i);
6743
6744	/ We search correct db\0table_name\0 string /
6745	if (index_stats &&
6746	index_stats->index_name_length >= cache_key_length &&
6747	!memcmp(index_stats->index, cache_key, cache_key_length))
6748	{
6749	res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
6750	/*
6751	In our HASH implementation on deletion one elements
6752	is moved into a place where a deleted element was,
6753	and the last element is moved into the empty space.
6754	Thus we need to re-examine the current element, but
6755	we don't have to restart the search from the beginning.
6756	*/
6757	}
6758	else
6759	i++;
6760	}
6761
6762	mysql_mutex_unlock(&LOCK_global_index_stats);
6763	DBUG_RETURN(res);
6764	}
6765
6766	/ Remove a table from global table statistics /
6767
6768	int del_global_table_stat(THD thd, LEX_CSTRING db, LEX_CSTRING *table)
6769	{
6770	TABLE_STATS *table_stats;
6771	int res = `0`;
6772	uchar *cache_key;
6773	size_t cache_key_length;
6774	DBUG_ENTER("del_global_table_stat");
6775
6776	cache_key_length= db->length + `1` + table->length + `1`;
6777
6778	if(!(cache_key= (uchar *)my_malloc(cache_key_length,
6779	MYF(MY_WME \| MY_ZEROFILL))))
6780	{
6781	/ Out of memory error already given /
6782	res = `1`;
6783	goto end;
6784	}
6785
6786	memcpy(cache_key, db->str, db->length);
6787	memcpy(cache_key + db->length + `1`, table->str, table->length);
6788
6789	res= del_global_index_stats_for_table(thd, cache_key, cache_key_length);
6790
6791	mysql_mutex_lock(&LOCK_global_table_stats);
6792
6793	if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats,
6794	cache_key,
6795	cache_key_length)))
6796	res= my_hash_delete(&global_table_stats, (uchar*)table_stats);
6797
6798	my_free(cache_key);
6799	mysql_mutex_unlock(&LOCK_global_table_stats);
6800
6801	end:
6802	DBUG_RETURN(res);
6803	}
6804
6805	/ Remove a index from global index statistics /
6806
6807	int del_global_index_stat(THD thd, TABLE table, KEY* key_info)
6808	{
6809	INDEX_STATS *index_stats;
6810	size_t key_length= table->s->table_cache_key.length + key_info->name.length + `1`;
6811	int res = `0`;
6812	DBUG_ENTER("del_global_index_stat");
6813	mysql_mutex_lock(&LOCK_global_index_stats);
6814
6815	if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
6816	key_info->cache_name,
6817	key_length)))
6818	res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
6819
6820	mysql_mutex_unlock(&LOCK_global_index_stats);
6821	DBUG_RETURN(res);
6822	}
6823
6824	bool Vers_parse_info::is_start(const char name) const*
6825	{
6826	DBUG_ASSERT(name);
6827	return as_row.start && as_row.start.streq(name);
6828	}
6829	bool Vers_parse_info::is_end(const char name) const*
6830	{
6831	DBUG_ASSERT(name);
6832	return as_row.end && as_row.end.streq(name);
6833	}
6834	bool Vers_parse_info::is_start(const Create_field &f) const
6835	{
6836	return f.flags & VERS_SYS_START_FLAG;
6837	}
6838	bool Vers_parse_info::is_end(const Create_field &f) const
6839	{
6840	return f.flags & VERS_SYS_END_FLAG;
6841	}
6842
6843	static Create_field vers_init_sys_field(THD thd, const char field_name, int* flags, bool integer)
6844	{
6845	Create_field f= new* (thd->mem_root) Create_field ();
6846	if (!f)
6847	return NULL;
6848
6849	memset(f, `0`, sizeof(*f));
6850	f->field_name.str= field_name;
6851	f->field_name.length= strlen(field_name);
6852	f->charset= system_charset_info;
6853	f->flags= flags \| NOT_NULL_FLAG;
6854	if (integer)
6855	{
6856	DBUG_ASSERT(`0`); // Not implemented yet
6857	f->set_handler(&type_handler_vers_trx_id);
6858	f->length= MY_INT64_NUM_DECIMAL_DIGITS - `1`;
6859	f->flags\|= UNSIGNED_FLAG;
6860	}
6861	else
6862	{
6863	f->set_handler(&type_handler_timestamp2);
6864	f->length= MAX_DATETIME_PRECISION;
6865	}
6866	f->invisible= DBUG_EVALUATE_IF("sysvers_show", VISIBLE, INVISIBLE_SYSTEM);
6867
6868	if (f->check(thd))
6869	return NULL;
6870
6871	return f;
6872	}
6873
6874	static bool vers_create_sys_field(THD thd, const* char *field_name,
6875	Alter_info alter_info, int* flags)
6876	{
6877	Create_field f= vers_init_sys_field(thd, field_name, flags, false*);
6878	if (!f)
6879	return true;
6880
6881	alter_info->flags\|= ALTER_PARSER_ADD_COLUMN;
6882	alter_info->create_list.push_back(f);
6883
6884	return false;
6885	}
6886
6887	const Lex_ident Vers_parse_info::default_start= "row_start";
6888	const Lex_ident Vers_parse_info::default_end= "row_end";
6889
6890	bool Vers_parse_info::fix_implicit(THD thd, Alter_info alter_info)
6891	{
6892	// If user specified some of these he must specify the others too. Do nothing.
6893	if (*this)
6894	return false;
6895
6896	alter_info->flags\|= ALTER_PARSER_ADD_COLUMN;
6897
6898	system_time = start_end_t (default_start, default_end);
6899	as_row = system_time;
6900
6901	if (vers_create_sys_field(thd, default_start, alter_info, VERS_SYS_START_FLAG) \|\|
6902	vers_create_sys_field(thd, default_end, alter_info, VERS_SYS_END_FLAG))
6903	{
6904	return true;
6905	}
6906	return false;
6907	}
6908
6909	bool Table_scope_and_contents_source_st::vers_native(THD thd) const*
6910	{
6911	if (ha_check_storage_engine_flag(db_type, HTON_NATIVE_SYS_VERSIONING))
6912	return true;
6913
6914	#ifdef WITH_PARTITION_STORAGE_ENGINE
6915	partition_info *info= thd->work_part_info;
6916	if (info && !(used_fields & HA_CREATE_USED_ENGINE))
6917	{
6918	if (handlerton *hton= info->default_engine_type)
6919	return ha_check_storage_engine_flag(hton, HTON_NATIVE_SYS_VERSIONING);
6920
6921	List_iterator_fast<partition_element> it(info->partitions);
6922	while (partition_element *partition_element= it ++)
6923	{
6924	if (partition_element->find_engine_flag(HTON_NATIVE_SYS_VERSIONING))
6925	return true;
6926	}
6927	}
6928	#endif
6929	return false;
6930	}
6931
6932	bool Table_scope_and_contents_source_st::vers_fix_system_fields(
6933	THD thd, Alter_info alter_info, const TABLE_LIST &create_table,
6934	bool create_select)
6935	{
6936	DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING));
6937
6938	DBUG_EXECUTE_IF("sysvers_force", if (!tmp_table()) {
6939	alter_info->flags\|= ALTER_ADD_SYSTEM_VERSIONING;
6940	options\|= HA_VERSIONED_TABLE; });
6941
6942	if (!vers_info.need_check(alter_info))
6943	return false;
6944
6945	if (!vers_info.versioned_fields && vers_info.unversioned_fields &&
6946	!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING))
6947	{
6948	// All is correct but this table is not versioned.
6949	options&= ~HA_VERSIONED_TABLE;
6950	return false;
6951	}
6952
6953	if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && vers_info)
6954	{
6955	my_error(ER_MISSING, MYF(`0`), create_table.table_name.str,
6956	"WITH SYSTEM VERSIONING");
6957	return true;
6958	}
6959
6960	List_iterator<Create_field> it(alter_info->create_list);
6961	while (Create_field *f= it ++)
6962	{
6963	if ((f->versioning == Column_definition::VERSIONING_NOT_SET &&
6964	!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)) \|\|
6965	f->versioning == Column_definition::WITHOUT_VERSIONING)
6966	{
6967	f->flags\|= VERS_UPDATE_UNVERSIONED_FLAG;
6968	}
6969	} // while (Create_field f= it++)*
6970
6971	if (vers_info.fix_implicit(thd, alter_info))
6972	return true;
6973
6974	int plain_cols= `0`; // columns don't have WITH or WITHOUT SYSTEM VERSIONING
6975	int vers_cols= `0`; // columns have WITH SYSTEM VERSIONING
6976	it.rewind();
6977	while (const Create_field *f= it ++)
6978	{
6979	if (vers_info.is_start(f) \|\| vers_info.is_end(f))
6980	continue;
6981
6982	if (f->versioning == Column_definition::VERSIONING_NOT_SET)
6983	plain_cols++;
6984	else if (f->versioning == Column_definition::WITH_VERSIONING)
6985	vers_cols++;
6986	}
6987
6988	if (!thd->lex->tmp_table() &&
6989	// CREATE from SELECT (Create_fields are not yet added)
6990	!create_select && vers_cols == `0` && (plain_cols == `0` \|\| !vers_info))
6991	{
6992	my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(`0`),
6993	create_table.table_name.str);
6994	return true;
6995	}
6996
6997	return false;
6998	}
6999
7000
7001	bool Table_scope_and_contents_source_st::vers_check_system_fields(
7002	THD thd, Alter_info alter_info, const TABLE_LIST &create_table)
7003	{
7004	if (!(options & HA_VERSIONED_TABLE))
7005	return false;
7006	return vers_info.check_sys_fields(create_table.table_name, create_table.db,
7007	alter_info, vers_native(thd));
7008	}
7009
7010
7011	bool Vers_parse_info::fix_alter_info(THD thd, Alter_info alter_info,
7012	HA_CREATE_INFO create_info, TABLE table)
7013	{
7014	TABLE_SHARE *share= table->s;
7015	const char *table_name= share->table_name.str;
7016
7017	if (!need_check(alter_info) && !share->versioned)
7018	return false;
7019
7020	if (DBUG_EVALUATE_IF("sysvers_force", `0`, share->tmp_table))
7021	{
7022	my_error(ER_VERS_TEMPORARY, MYF(`0`));
7023	return true;
7024	}
7025
7026	if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING &&
7027	table->versioned())
7028	{
7029	my_error(ER_VERS_ALREADY_VERSIONED, MYF(`0`), table_name);
7030	return true;
7031	}
7032
7033	if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)
7034	{
7035	if (!share->versioned)
7036	{
7037	my_error(ER_VERS_NOT_VERSIONED, MYF(`0`), table_name);
7038	return true;
7039	}
7040	#ifdef WITH_PARTITION_STORAGE_ENGINE
7041	if (table->part_info &&
7042	table->part_info->part_type == VERSIONING_PARTITION)
7043	{
7044	my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(`0`), table_name);
7045	return true;
7046	}
7047	#endif
7048
7049	return false;
7050	}
7051
7052	{
7053	List_iterator_fast<Create_field> it(alter_info->create_list);
7054	while (Create_field *f= it ++)
7055	{
7056	if (f->change.length && f->flags & VERS_SYSTEM_FIELD)
7057	{
7058	my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(`0`), f->field_name.str);
7059	return true;
7060	}
7061	}
7062	}
7063
7064	if ((alter_info->flags & ALTER_DROP_PERIOD \|\|
7065	versioned_fields \|\| unversioned_fields) && !share->versioned)
7066	{
7067	my_error(ER_VERS_NOT_VERSIONED, MYF(`0`), table_name);
7068	return true;
7069	}
7070
7071	if (share->versioned)
7072	{
7073	if (alter_info->flags & ALTER_ADD_PERIOD)
7074	{
7075	my_error(ER_VERS_ALREADY_VERSIONED, MYF(`0`), table_name);
7076	return true;
7077	}
7078
7079	// copy info from existing table
7080	create_info->options\|= HA_VERSIONED_TABLE;
7081
7082	DBUG_ASSERT(share->vers_start_field());
7083	DBUG_ASSERT(share->vers_end_field());
7084	Lex_ident start(share->vers_start_field()->field_name);
7085	Lex_ident end(share->vers_end_field()->field_name);
7086	DBUG_ASSERT(start.str);
7087	DBUG_ASSERT(end.str);
7088
7089	as_row = start_end_t (start, end);
7090	system_time = as_row;
7091
7092	if (alter_info->create_list.elements)
7093	{
7094	List_iterator_fast<Create_field> it(alter_info->create_list);
7095	while (Create_field *f= it ++)
7096	{
7097	if (f->versioning == Column_definition::WITHOUT_VERSIONING)
7098	f->flags\|= VERS_UPDATE_UNVERSIONED_FLAG;
7099
7100	if (f->change.str && (start.streq(f->change) \|\| end.streq(f->change)))
7101	{
7102	my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(`0`), f->change.str);
7103	return true;
7104	}
7105	}
7106	}
7107
7108	return false;
7109	}
7110
7111	if (fix_implicit(thd, alter_info))
7112	return true;
7113
7114	if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)
7115	{
7116	bool native= create_info->vers_native(thd);
7117	if (check_sys_fields(table_name, share->db, alter_info, native))
7118	return true;
7119	}
7120
7121	return false;
7122	}
7123
7124	bool
7125	Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info,
7126	TABLE_LIST &src_table, TABLE_LIST &table)
7127	{
7128	List_iterator<Create_field> it(alter_info.create_list);
7129	Create_field f, f_start=NULL, *f_end= NULL;
7130
7131	DBUG_ASSERT(alter_info.create_list.elements > `2`);
7132
7133	if (create_info.tmp_table())
7134	{
7135	int remove= `2`;
7136	while (remove && (f= it ++))
7137	{
7138	if (f->flags & VERS_SYSTEM_FIELD)
7139	{
7140	it.remove();
7141	remove--;
7142	}
7143	}
7144	DBUG_ASSERT(remove == `0`);
7145	push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
7146	ER_UNKNOWN_ERROR,
7147	"System versioning is stripped from temporary `%s.%s`",
7148	table.db.str, table.table_name.str);
7149	return false;
7150	}
7151
7152	while ((f= it ++))
7153	{
7154	if (f->flags & VERS_SYS_START_FLAG)
7155	{
7156	f_start= f;
7157	if (f_end)
7158	break;
7159	}
7160	else if (f->flags & VERS_SYS_END_FLAG)
7161	{
7162	f_end= f;
7163	if (f_start)
7164	break;
7165	}
7166	}
7167
7168	if (!f_start \|\| !f_end)
7169	{
7170	my_error(ER_MISSING, MYF(`0`), src_table.table_name.str,
7171	f_start ? "AS ROW END" : "AS ROW START");
7172	return true;
7173	}
7174
7175	as_row = start_end_t (f_start->field_name, f_end->field_name);
7176	system_time = as_row;
7177
7178	create_info.options\|= HA_VERSIONED_TABLE;
7179	return false;
7180	}
7181
7182	bool Vers_parse_info::need_check(const Alter_info alter_info) const*
7183	{
7184	return versioned_fields \|\| unversioned_fields \|\|
7185	alter_info->flags & ALTER_ADD_PERIOD \|\|
7186	alter_info->flags & ALTER_DROP_PERIOD \|\|
7187	alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING \|\|
7188	alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING \|\| *this;
7189	}
7190
7191	bool Vers_parse_info::check_conditions(const Lex_table_name &table_name,
7192	const Lex_table_name &db) const
7193	{
7194	if (!as_row.start \|\| !as_row.end)
7195	{
7196	my_error(ER_MISSING, MYF(`0`), table_name.str,
7197	as_row.start ? "AS ROW END" : "AS ROW START");
7198	return true;
7199	}
7200
7201	if (!system_time.start \|\| !system_time.end)
7202	{
7203	my_error(ER_MISSING, MYF(`0`), table_name.str, "PERIOD FOR SYSTEM_TIME");
7204	return true;
7205	}
7206
7207	if (!as_row.start.streq(system_time.start) \|\|
7208	!as_row.end.streq(system_time.end))
7209	{
7210	my_error(ER_VERS_PERIOD_COLUMNS, MYF(`0`), as_row.start.str, as_row.end.str);
7211	return true;
7212	}
7213
7214	if (db.streq(MYSQL_SCHEMA_NAME))
7215	{
7216	my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(`0`), MYSQL_SCHEMA_NAME.str);
7217	return true;
7218	}
7219	return false;
7220	}
7221
7222	bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
7223	const Lex_table_name &db,
7224	Alter_info alter_info, bool* native)
7225	{
7226	if (check_conditions(table_name, db))
7227	return true;
7228
7229	List_iterator<Create_field> it(alter_info->create_list);
7230	uint found_flag= `0`;
7231	while (Create_field *f= it ++)
7232	{
7233	vers_sys_type_t f_check_unit= VERS_UNDEFINED;
7234	uint sys_flag= f->flags & VERS_SYSTEM_FIELD;
7235
7236	if (!sys_flag)
7237	continue;
7238
7239	if (sys_flag & found_flag)
7240	{
7241	my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(`0`),
7242	found_flag & VERS_SYS_START_FLAG ? "START" : "END",
7243	f->field_name.str);
7244	return true;
7245	}
7246
7247	sys_flag\|= found_flag;
7248
7249	if ((f->type_handler() == &type_handler_datetime2 \|\|
7250	f->type_handler() == &type_handler_timestamp2) &&
7251	f->length == MAX_DATETIME_FULL_WIDTH)
7252	{
7253	f_check_unit= VERS_TIMESTAMP;
7254	}
7255	else if (native
7256	&& f->type_handler() == &type_handler_longlong
7257	&& (f->flags & UNSIGNED_FLAG)
7258	&& f->length == (MY_INT64_NUM_DECIMAL_DIGITS - `1`))
7259	{
7260	f_check_unit= VERS_TRX_ID;
7261	}
7262	else
7263	{
7264	if (!check_unit)
7265	check_unit= VERS_TIMESTAMP;
7266	goto error;
7267	}
7268
7269	if (f_check_unit)
7270	{
7271	if (check_unit)
7272	{
7273	if (check_unit == f_check_unit)
7274	{
7275	if (check_unit == VERS_TRX_ID && !TR_table::use_transaction_registry)
7276	{
7277	my_error(ER_VERS_TRT_IS_DISABLED, MYF(`0`));
7278	return true;
7279	}
7280	return false;
7281	}
7282	error:
7283	my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(`0`), f->field_name.str,
7284	check_unit == VERS_TIMESTAMP ?
7285	"TIMESTAMP(6)" :
7286	"BIGINT(20) UNSIGNED",
7287	table_name.str);
7288	return true;
7289	}
7290	check_unit= f_check_unit;
7291	}
7292	}
7293
7294	my_error(ER_MISSING, MYF(`0`), table_name.str, found_flag & VERS_SYS_START_FLAG ?
7295	"ROW END" : found_flag ? "ROW START" : "ROW START/END");
7296	return true;
7297	}
7298

Browse the source code of MariaDB/sql/handler.cc