lock0lock.cc source code [MariaDB/storage/innobase/lock/lock0lock.cc]

1	/*****************************************************************************
2
3	Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2014, 2018, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file lock/lock0lock.cc
22	The transaction lock system
23
24	Created 5/7/1996 Heikki Tuuri
25	*******************************************************/
26
27	#define LOCK_MODULE_IMPLEMENTATION
28
29
30	#include "ha_prototypes.h"
31
32	#include <mysql/service_thd_error_context.h>
33	#include <sql_class.h>
34
35	#include "lock0lock.h"
36	#include "lock0priv.h"
37	#include "dict0mem.h"
38	#include "trx0purge.h"
39	#include "trx0sys.h"
40	#include "srv0mon.h"
41	#include "ut0vec.h"
42	#include "btr0btr.h"
43	#include "dict0boot.h"
44	#include "ut0new.h"
45	#include "row0sel.h"
46	#include "row0mysql.h"
47	#include "row0vers.h"
48	#include "pars0pars.h"
49
50	#include <set>
51
52	#ifdef WITH_WSREP
53	#include <mysql/service_wsrep.h>
54	#endif /* WITH_WSREP */
55
56	/* Lock scheduling algorithm /
57	ulong innodb_lock_schedule_algorithm;
58
59	/* The value of innodb_deadlock_detect /
60	my_bool innobase_deadlock_detect;
61
62	/* Total number of cached record locks /
63	static const ulint REC_LOCK_CACHE = `8`;
64
65	/* Maximum record lock size in bytes /
66	static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + `256`;
67
68	/* Total number of cached table locks /
69	static const ulint TABLE_LOCK_CACHE = `8`;
70
71	/* Size in bytes, of the table lock instance /
72	static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
73
74	/*******************************************************************//**
75	Checks if a waiting record lock request still has to wait in a queue.
76	@return lock that is causing the wait /*
77	static
78	const lock_t*
79	lock_rec_has_to_wait_in_queue(
80	/==========================/
81	const lock_t* wait_lock); /!< in: waiting record lock /
82
83	/* Grant a lock to a waiting lock request and release the waiting transaction*
84	after lock_reset_lock_and_trx_wait() has been called. /*
85	static void lock_grant_after_reset(lock_t* lock);
86
87	extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
88	extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
89	extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
90
91	/* Print info of a table lock.*
92	@param[in,out] file output stream
93	@param[in] lock table lock /*
94	static
95	void
96	lock_table_print(FILE* file, const lock_t* lock);
97
98	/* Print info of a record lock.*
99	@param[in,out] file output stream
100	@param[in] lock record lock /*
101	static
102	void
103	lock_rec_print(FILE* file, const lock_t* lock);
104
105	/* Deadlock checker. /
106	class DeadlockChecker {
107	public:
108	/* Checks if a joining lock request results in a deadlock. If*
109	a deadlock is found this function will resolve the deadlock
110	by choosing a victim transaction and rolling it back. It
111	will attempt to resolve all deadlocks. The returned transaction
112	id will be the joining transaction id or 0 if some other
113	transaction was chosen as a victim and rolled back or no
114	deadlock found.
115
116	@param lock lock the transaction is requesting
117	@param trx transaction requesting the lock
118
119	@return id of transaction chosen as victim or 0 /*
120	static const trx_t* check_and_resolve(
121	const lock_t* lock,
122	trx_t* trx);
123
124	private:
125	/* Do a shallow copy. Default destructor OK.*
126	@param trx the start transaction (start node)
127	@param wait_lock lock that a transaction wants
128	@param mark_start visited node counter /*
129	DeadlockChecker(
130	const trx_t* trx,
131	const lock_t* wait_lock,
132	ib_uint64_t mark_start,
133	bool report_waiters)
134	:
135	m_cost(),
136	m_start(trx),
137	m_too_deep(),
138	m_wait_lock(wait_lock),
139	m_mark_start(mark_start),
140	m_n_elems(),
141	m_report_waiters(report_waiters)
142	{
143	}
144
145	/* Check if the search is too deep. /
146	bool is_too_deep() const
147	{
148	return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
149	\|\| m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
150	}
151
152	/* Save current state.*
153	@param lock lock to push on the stack.
154	@param heap_no the heap number to push on the stack.
155	@return false if stack is full. /*
156	bool push(const lock_t* lock, ulint heap_no)
157	{
158	ut_ad((lock_get_type_low(lock) & LOCK_REC)
159	\|\| (lock_get_type_low(lock) & LOCK_TABLE));
160
161	ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != `0`)
162	== (heap_no == ULINT_UNDEFINED));
163
164	/ Ensure that the stack is bounded. /
165	if (m_n_elems >= UT_ARR_SIZE(s_states)) {
166	return(false);
167	}
168
169	state_t& state = s_states[m_n_elems++];
170
171	state.m_lock = lock;
172	state.m_wait_lock = m_wait_lock;
173	state.m_heap_no =heap_no;
174
175	return(true);
176	}
177
178	/* Restore state.*
179	@param[out] lock current lock
180	@param[out] heap_no current heap_no /*
181	void pop(const lock_t*& lock, ulint& heap_no)
182	{
183	ut_a(m_n_elems > `0`);
184
185	const state_t& state = s_states[--m_n_elems];
186
187	lock = state.m_lock;
188	heap_no = state.m_heap_no;
189	m_wait_lock = state.m_wait_lock;
190	}
191
192	/* Check whether the node has been visited.*
193	@param lock lock to check
194	@return true if the node has been visited /*
195	bool is_visited(const lock_t* lock) const
196	{
197	return(lock->trx->lock.deadlock_mark > m_mark_start);
198	}
199
200	/* Get the next lock in the queue that is owned by a transaction*
201	whose sub-tree has not already been searched.
202	Note: "next" here means PREV for table locks.
203	@param lock Lock in queue
204	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
205	@return next lock or NULL if at end of queue /*
206	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
207
208	/* Get the first lock to search. The search starts from the current*
209	wait_lock. What we are really interested in is an edge from the
210	current wait_lock's owning transaction to another transaction that has
211	a lock ahead in the queue. We skip locks where the owning transaction's
212	sub-tree has already been searched.
213
214	Note: The record locks are traversed from the oldest lock to the
215	latest. For table locks we go from latest to oldest.
216
217	For record locks, we first position the iterator on first lock on
218	the page and then reposition on the actual heap_no. This is required
219	due to the way the record lock has is implemented.
220
221	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
222
223	@return first lock or NULL /*
224	const lock_t* get_first_lock(ulint* heap_no) const;
225
226	/* Notify that a deadlock has been detected and print the conflicting*
227	transaction info.
228	@param lock lock causing deadlock /*
229	void notify(const lock_t* lock) const;
230
231	/* Select the victim transaction that should be rolledback.*
232	@return victim transaction /*
233	const trx_t* select_victim() const;
234
235	/* Rollback transaction selected as the victim. /
236	void trx_rollback();
237
238	/* Looks iteratively for a deadlock. Note: the joining transaction*
239	may have been granted its lock by the deadlock checks.
240
241	@return 0 if no deadlock else the victim transaction./*
242	const trx_t* search();
243
244	/* Print transaction data to the deadlock file and possibly to stderr.*
245	@param trx transaction
246	@param max_query_len max query length to print /*
247	static void print(const trx_t* trx, ulint max_query_len);
248
249	/* rewind(3) the file used for storing the latest detected deadlock*
250	and print a heading message to stderr if printing of all deadlocks to
251	stderr is enabled. /*
252	static void start_print();
253
254	/* Print lock data to the deadlock file and possibly to stderr.*
255	@param lock record or table type lock /*
256	static void print(const lock_t* lock);
257
258	/* Print a message to the deadlock file and possibly to stderr.*
259	@param msg message to print /*
260	static void print(const char* msg);
261
262	/* Print info about transaction that was rolled back.*
263	@param trx transaction rolled back
264	@param lock lock trx wants /*
265	static void rollback_print(const trx_t* trx, const lock_t* lock);
266
267	private:
268	/* DFS state information, used during deadlock checking. /
269	struct state_t {
270	const lock_t* m_lock; /!< Current lock /
271	const lock_t* m_wait_lock; /!< Waiting for lock /
272	ulint m_heap_no; /!< heap number if rec lock /
273	};
274
275	/* Used in deadlock tracking. Protected by lock_sys.mutex. /
276	static ib_uint64_t s_lock_mark_counter;
277
278	/* Calculation steps thus far. It is the count of the nodes visited. /
279	ulint m_cost;
280
281	/* Joining transaction that is requesting a lock in an*
282	incompatible mode /*
283	const trx_t* m_start;
284
285	/* TRUE if search was too deep and was aborted /
286	bool m_too_deep;
287
288	/* Lock that trx wants /
289	const lock_t* m_wait_lock;
290
291	/* Value of lock_mark_count at the start of the deadlock check. /
292	ib_uint64_t m_mark_start;
293
294	/* Number of states pushed onto the stack /
295	size_t m_n_elems;
296
297	/* This is to avoid malloc/free calls. /
298	static state_t s_states[MAX_STACK_SIZE];
299
300	/* Set if thd_rpl_deadlock_check() should be called for waits. /
301	const bool m_report_waiters;
302	};
303
304	/* Counter to mark visited nodes during deadlock search. /
305	ib_uint64_t DeadlockChecker::s_lock_mark_counter = `0`;
306
307	/* The stack used for deadlock searches. /
308	DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE];
309
310	#ifdef UNIV_DEBUG
311	/*******************************************************************//**
312	Validates the lock system.
313	@return TRUE if ok /*
314	static
315	bool
316	lock_validate();
317	/============/
318
319	/*******************************************************************//**
320	Validates the record lock queues on a page.
321	@return TRUE if ok /*
322	static
323	ibool
324	lock_rec_validate_page(
325	/===================/
326	const buf_block_t* block) /!< in: buffer block /
327	MY_ATTRIBUTE((warn_unused_result));
328	#endif /* UNIV_DEBUG */
329
330	/ The lock system /
331	lock_sys_t lock_sys;
332
333	/* We store info on the latest deadlock error to this buffer. InnoDB*
334	Monitor will then fetch it and print /*
335	static bool lock_deadlock_found = false;
336
337	/* Only created if !srv_read_only_mode /
338	static FILE* lock_latest_err_file;
339
340	/*******************************************************************//**
341	Reports that a transaction id is insensible, i.e., in the future. /*
342	void
343	lock_report_trx_id_insanity(
344	/========================/
345	trx_id_t trx_id, /!< in: trx id /
346	const rec_t* rec, /!< in: user record /
347	dict_index_t* index, /!< in: index /
348	const ulint* offsets, /!< in: rec_get_offsets(rec, index) /
349	trx_id_t max_trx_id) /!< in: trx_sys.get_max_trx_id() /
350	{
351	ut_ad(rec_offs_validate(rec, index, offsets));
352	ut_ad(!rec_is_default_row(rec, index));
353
354	ib::error ()
355	<< "Transaction id " << trx_id
356	<< " associated with record" << rec_offsets_print (rec, offsets)
357	<< " in index " << index->name
358	<< " of table " << index->table->name
359	<< " is greater than the global counter " << max_trx_id
360	<< "! The table is corrupted.";
361	}
362
363	/*******************************************************************//**
364	Checks that a transaction id is sensible, i.e., not in the future.
365	@return true if ok /*
366	bool
367	lock_check_trx_id_sanity(
368	/=====================/
369	trx_id_t trx_id, /!< in: trx id /
370	const rec_t* rec, /!< in: user record /
371	dict_index_t* index, /!< in: index /
372	const ulint* offsets) /!< in: rec_get_offsets(rec, index) /
373	{
374	ut_ad(rec_offs_validate(rec, index, offsets));
375	ut_ad(!rec_is_default_row(rec, index));
376
377	trx_id_t max_trx_id = trx_sys.get_max_trx_id();
378	ut_ad(max_trx_id \|\| srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
379
380	if (max_trx_id && trx_id >= max_trx_id) {
381	lock_report_trx_id_insanity(
382	trx_id, rec, index, offsets, max_trx_id);
383	return false;
384	}
385	return(true);
386	}
387
388	/*******************************************************************//**
389	Checks that a record is seen in a consistent read.
390	@return true if sees, or false if an earlier version of the record
391	should be retrieved /*
392	bool
393	lock_clust_rec_cons_read_sees(
394	/==========================/
395	const rec_t* rec, /!< in: user record which should be read or*
396	passed over by a read cursor /*
397	dict_index_t* index, /!< in: clustered index /
398	const ulint* offsets,/!< in: rec_get_offsets(rec, index) /
399	ReadView* view) /!< in: consistent read view /
400	{
401	ut_ad(dict_index_is_clust(index));
402	ut_ad(page_rec_is_user_rec(rec));
403	ut_ad(rec_offs_validate(rec, index, offsets));
404	ut_ad(!rec_is_default_row(rec, index));
405
406	/ Temp-tables are not shared across connections and multiple*
407	transactions from different connections cannot simultaneously
408	operate on same temp-table and so read of temp-table is
409	always consistent read. /*
410	if (index->table->is_temporary()) {
411	return(true);
412	}
413
414	/ NOTE that we call this function while holding the search*
415	system latch. /*
416
417	trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
418
419	return(view->changes_visible(trx_id, index->table->name));
420	}
421
422	/*******************************************************************//**
423	Checks that a non-clustered index record is seen in a consistent read.
424
425	NOTE that a non-clustered index page contains so little information on
426	its modifications that also in the case false, the present version of
427	rec may be the right, but we must check this from the clustered index
428	record.
429
430	@return true if certainly sees, or false if an earlier version of the
431	clustered index record might be needed /*
432	bool
433	lock_sec_rec_cons_read_sees(
434	/========================/
435	const rec_t* rec, /!< in: user record which*
436	should be read or passed over
437	by a read cursor /*
438	const dict_index_t* index, /!< in: index /
439	const ReadView* view) /!< in: consistent read view /
440	{
441	ut_ad(page_rec_is_user_rec(rec));
442	ut_ad(!index->is_primary());
443	ut_ad(!rec_is_default_row(rec, index));
444
445	/ NOTE that we might call this function while holding the search*
446	system latch. /*
447
448	if (index->table->is_temporary()) {
449
450	/ Temp-tables are not shared across connections and multiple*
451	transactions from different connections cannot simultaneously
452	operate on same temp-table and so read of temp-table is
453	always consistent read. /*
454
455	return(true);
456	}
457
458	trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
459
460	ut_ad(max_trx_id > `0`);
461
462	return(view->sees(max_trx_id));
463	}
464
465
466	/**
467	Creates the lock system at database start.
468
469	@param[in] n_cells number of slots in lock hash table
470	*/
471	void lock_sys_t::create(ulint n_cells)
472	{
473	ut_ad(this == &lock_sys);
474
475	m_initialised= true;
476
477	waiting_threads = static_cast<srv_slot_t*>
478	(ut_zalloc_nokey(srv_max_n_threads * sizeof *waiting_threads));
479	last_slot = waiting_threads;
480
481	mutex_create(LATCH_ID_LOCK_SYS, &mutex);
482
483	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &wait_mutex);
484
485	timeout_event = os_event_create(`0`);
486
487	rec_hash = hash_create(n_cells);
488	prdt_hash = hash_create(n_cells);
489	prdt_page_hash = hash_create(n_cells);
490
491	if (!srv_read_only_mode) {
492	lock_latest_err_file = os_file_create_tmpfile();
493	ut_a(lock_latest_err_file);
494	}
495	}
496
497	/* Calculates the fold value of a lock: used in migrating the hash table.*
498	@param[in] lock record lock object
499	@return folded value /*
500	static
501	ulint
502	lock_rec_lock_fold(
503	const lock_t* lock)
504	{
505	return(lock_rec_fold(lock->un_member.rec_lock.space,
506	lock->un_member.rec_lock.page_no));
507	}
508
509
510	/**
511	Resize the lock hash table.
512
513	@param[in] n_cells number of slots in lock hash table
514	*/
515	void lock_sys_t::resize(ulint n_cells)
516	{
517	ut_ad(this == &lock_sys);
518
519	mutex_enter(&mutex);
520
521	hash_table_t* old_hash = rec_hash;
522	rec_hash = hash_create(n_cells);
523	HASH_MIGRATE(old_hash, rec_hash, lock_t, hash,
524	lock_rec_lock_fold);
525	hash_table_free(old_hash);
526
527	old_hash = prdt_hash;
528	prdt_hash = hash_create(n_cells);
529	HASH_MIGRATE(old_hash, prdt_hash, lock_t, hash,
530	lock_rec_lock_fold);
531	hash_table_free(old_hash);
532
533	old_hash = prdt_page_hash;
534	prdt_page_hash = hash_create(n_cells);
535	HASH_MIGRATE(old_hash, prdt_page_hash, lock_t, hash,
536	lock_rec_lock_fold);
537	hash_table_free(old_hash);
538
539	/ need to update block->lock_hash_val /
540	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
541	buf_pool_t* buf_pool = buf_pool_from_array(i);
542
543	buf_pool_mutex_enter(buf_pool);
544	buf_page_t* bpage;
545	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
546
547	while (bpage != NULL) {
548	if (buf_page_get_state(bpage)
549	== BUF_BLOCK_FILE_PAGE) {
550	buf_block_t* block;
551	block = reinterpret_cast<buf_block_t*>(
552	bpage);
553
554	block->lock_hash_val
555	= lock_rec_hash(
556	bpage->id.space(),
557	bpage->id.page_no());
558	}
559	bpage = UT_LIST_GET_NEXT(LRU, bpage);
560	}
561	buf_pool_mutex_exit(buf_pool);
562	}
563
564	mutex_exit(&mutex);
565	}
566
567
568	/* Closes the lock system at database shutdown. /
569	void lock_sys_t::close()
570	{
571	ut_ad(this == &lock_sys);
572
573	if (!m_initialised) return;
574
575	if (lock_latest_err_file != NULL) {
576	fclose(lock_latest_err_file);
577	lock_latest_err_file = NULL;
578	}
579
580	hash_table_free(rec_hash);
581	hash_table_free(prdt_hash);
582	hash_table_free(prdt_page_hash);
583
584	os_event_destroy(timeout_event);
585
586	mutex_destroy(&mutex);
587	mutex_destroy(&wait_mutex);
588
589	for (ulint i = srv_max_n_threads; i--; ) {
590	if (os_event_t& event = waiting_threads[i].event) {
591	os_event_destroy(event);
592	}
593	}
594
595	ut_free(waiting_threads);
596	m_initialised= false;
597	}
598
599	/*******************************************************************//**
600	Gets the size of a lock struct.
601	@return size in bytes /*
602	ulint
603	lock_get_size(void)
604	/===============/
605	{
606	return((ulint) sizeof(lock_t));
607	}
608
609	static inline void lock_grant_have_trx_mutex(lock_t* lock)
610	{
611	lock_reset_lock_and_trx_wait(lock);
612	lock_grant_after_reset(lock);
613	}
614
615	/*******************************************************************//**
616	Gets the gap flag of a record lock.
617	@return LOCK_GAP or 0 /*
618	UNIV_INLINE
619	ulint
620	lock_rec_get_gap(
621	/=============/
622	const lock_t* lock) /!< in: record lock /
623	{
624	ut_ad(lock);
625	ut_ad(lock_get_type_low(lock) == LOCK_REC);
626
627	return(lock->type_mode & LOCK_GAP);
628	}
629
630	/*******************************************************************//**
631	Gets the LOCK_REC_NOT_GAP flag of a record lock.
632	@return LOCK_REC_NOT_GAP or 0 /*
633	UNIV_INLINE
634	ulint
635	lock_rec_get_rec_not_gap(
636	/=====================/
637	const lock_t* lock) /!< in: record lock /
638	{
639	ut_ad(lock);
640	ut_ad(lock_get_type_low(lock) == LOCK_REC);
641
642	return(lock->type_mode & LOCK_REC_NOT_GAP);
643	}
644
645	/*******************************************************************//**
646	Gets the waiting insert flag of a record lock.
647	@return LOCK_INSERT_INTENTION or 0 /*
648	UNIV_INLINE
649	ulint
650	lock_rec_get_insert_intention(
651	/==========================/
652	const lock_t* lock) /!< in: record lock /
653	{
654	ut_ad(lock);
655	ut_ad(lock_get_type_low(lock) == LOCK_REC);
656
657	return(lock->type_mode & LOCK_INSERT_INTENTION);
658	}
659
660	/*******************************************************************//**
661	Checks if a lock request for a new lock has to wait for request lock2.
662	@return TRUE if new lock has to wait for lock2 to be removed /*
663	UNIV_INLINE
664	bool
665	lock_rec_has_to_wait(
666	/=================/
667	bool for_locking,
668	/!< in is called locking or releasing /
669	const trx_t* trx, /!< in: trx of new lock /
670	ulint type_mode,/!< in: precise mode of the new lock*
671	to set: LOCK_S or LOCK_X, possibly
672	ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
673	LOCK_INSERT_INTENTION /*
674	const lock_t* lock2, /!< in: another record lock; NOTE that*
675	it is assumed that this has a lock bit
676	set on the same record as in the new
677	lock we are setting /*
678	bool lock_is_on_supremum)
679	/!< in: TRUE if we are setting the*
680	lock on the 'supremum' record of an
681	index page: we know then that the lock
682	request is really for a 'gap' type lock /*
683	{
684	ut_ad(trx && lock2);
685	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
686
687	if (trx == lock2->trx
688	\|\| lock_mode_compatible(
689	static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
690	lock_get_mode(lock2))) {
691	return false;
692	}
693
694	/ We have somewhat complex rules when gap type record locks*
695	cause waits /*
696
697	if ((lock_is_on_supremum \|\| (type_mode & LOCK_GAP))
698	&& !(type_mode & LOCK_INSERT_INTENTION)) {
699
700	/ Gap type locks without LOCK_INSERT_INTENTION flag*
701	do not need to wait for anything. This is because
702	different users can have conflicting lock types
703	on gaps. /*
704
705	return false;
706	}
707
708	if (!(type_mode & LOCK_INSERT_INTENTION) && lock_rec_get_gap(lock2)) {
709
710	/ Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP*
711	does not need to wait for a gap type lock /*
712
713	return false;
714	}
715
716	if ((type_mode & LOCK_GAP) && lock_rec_get_rec_not_gap(lock2)) {
717
718	/ Lock on gap does not need to wait for*
719	a LOCK_REC_NOT_GAP type lock /*
720
721	return false;
722	}
723
724	if (lock_rec_get_insert_intention(lock2)) {
725
726	/ No lock request needs to wait for an insert*
727	intention lock to be removed. This is ok since our
728	rules allow conflicting locks on gaps. This eliminates
729	a spurious deadlock caused by a next-key lock waiting
730	for an insert intention lock; when the insert
731	intention lock was granted, the insert deadlocked on
732	the waiting next-key lock.
733
734	Also, insert intention locks do not disturb each
735	other. /*
736
737	return false;
738	}
739
740	if ((type_mode & LOCK_GAP \|\| lock_rec_get_gap(lock2))
741	&& !thd_need_ordering_with(trx->mysql_thd, lock2->trx->mysql_thd)) {
742	/ If the upper server layer has already decided on the*
743	commit order between the transaction requesting the
744	lock and the transaction owning the lock, we do not
745	need to wait for gap locks. Such ordeering by the upper
746	server layer happens in parallel replication, where the
747	commit order is fixed to match the original order on the
748	master.
749
750	Such gap locks are mainly needed to get serialisability
751	between transactions so that they will be binlogged in
752	the correct order so that statement-based replication
753	will give the correct results. Since the right order
754	was already determined on the master, we do not need
755	to enforce it again here.
756
757	Skipping the locks is not essential for correctness,
758	since in case of deadlock we will just kill the later
759	transaction and retry it. But it can save some
760	unnecessary rollbacks and retries. /*
761
762	return false;
763	}
764
765	#ifdef WITH_WSREP
766	/ if BF thread is locking and has conflict with another BF*
767	thread, we need to look at trx ordering and lock types /*
768	if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)
769	&& wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
770
771	if (wsrep_debug) {
772	ib::info () << "BF-BF lock conflict, locking: "
773	<< for_locking;
774	lock_rec_print(stderr, lock2);
775	ib::info ()
776	<< " SQL1: " << wsrep_thd_query(trx->mysql_thd)
777	<< " SQL2: "
778	<< wsrep_thd_query(lock2->trx->mysql_thd);
779	}
780
781	if (wsrep_trx_order_before(trx->mysql_thd,
782	lock2->trx->mysql_thd)
783	&& (type_mode & LOCK_MODE_MASK) == LOCK_X
784	&& (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
785	if (for_locking \|\| wsrep_debug) {
786	/ exclusive lock conflicts are not*
787	accepted /*
788	ib::info ()
789	<< "BF-BF X lock conflict,mode: "
790	<< type_mode
791	<< " supremum: " << lock_is_on_supremum
792	<< "conflicts states: my "
793	<< wsrep_thd_conflict_state(
794	trx->mysql_thd, FALSE)
795	<< " locked "
796	<< wsrep_thd_conflict_state(
797	lock2->trx->mysql_thd,
798	FALSE);
799	lock_rec_print(stderr, lock2);
800	ib::info () << " SQL1: "
801	<< wsrep_thd_query(trx->mysql_thd)
802	<< " SQL2: "
803	<< wsrep_thd_query(
804	lock2->trx->mysql_thd);
805
806	if (for_locking) {
807	return false;
808	}
809	}
810	} else {
811	/ if lock2->index->n_uniq <=*
812	lock2->index->n_user_defined_cols
813	operation is on uniq index
814	*/
815	if (wsrep_debug) {
816	ib::info ()
817	<< "BF conflict, modes: " << type_mode
818	<< ":" << lock2->type_mode
819	<< " idx: " << lock2->index->name ()
820	<< " table: "
821	<< lock2->index->table->name.m_name
822	<< " n_uniq: " << lock2->index->n_uniq
823	<< " n_user: "
824	<< lock2->index->n_user_defined_cols
825	<< " SQL1: "
826	<< wsrep_thd_query(trx->mysql_thd)
827	<< " SQL2: "
828	<< wsrep_thd_query(
829	lock2->trx->mysql_thd);
830	}
831	return false;
832	}
833	}
834	#endif /* WITH_WSREP */
835
836	return true;
837	}
838
839	/*******************************************************************//**
840	Checks if a lock request lock1 has to wait for request lock2.
841	@return TRUE if lock1 has to wait for lock2 to be removed /*
842	bool
843	lock_has_to_wait(
844	/=============/
845	const lock_t* lock1, /!< in: waiting lock /
846	const lock_t* lock2) /!< in: another lock; NOTE that it is*
847	assumed that this has a lock bit set
848	on the same record as in lock1 if the
849	locks are record locks /*
850	{
851	ut_ad(lock1 && lock2);
852
853	if (lock1->trx == lock2->trx
854	\|\| lock_mode_compatible(lock_get_mode(lock1),
855	lock_get_mode(lock2))) {
856	return false;
857	}
858
859	if (lock_get_type_low(lock1) != LOCK_REC) {
860	return true;
861	}
862
863	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
864
865	if (lock1->type_mode & (LOCK_PREDICATE \| LOCK_PRDT_PAGE)) {
866	return lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
867	lock_get_prdt_from_lock(lock1),
868	lock2);
869	}
870
871	return lock_rec_has_to_wait(
872	false, lock1->trx, lock1->type_mode, lock2,
873	lock_rec_get_nth_bit(lock1, PAGE_HEAP_NO_SUPREMUM));
874	}
875
876	/============== RECORD LOCK BASIC FUNCTIONS ============================/
877
878	/********************************************************************//**
879	Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
880	if none found.
881	@return bit index == heap number of the record, or ULINT_UNDEFINED if
882	none found /*
883	ulint
884	lock_rec_find_set_bit(
885	/==================/
886	const lock_t* lock) /!< in: record lock with at least one bit set /
887	{
888	for (ulint i = `0`; i < lock_rec_get_n_bits(lock); ++i) {
889
890	if (lock_rec_get_nth_bit(lock, i)) {
891
892	return(i);
893	}
894	}
895
896	return(ULINT_UNDEFINED);
897	}
898
899	/*******************************************************************//**
900	Determines if there are explicit record locks on a page.
901	@return an explicit record lock on the page, or NULL if there are none /*
902	lock_t*
903	lock_rec_expl_exist_on_page(
904	/========================/
905	ulint space, /!< in: space id /
906	ulint page_no)/!< in: page number /
907	{
908	lock_t* lock;
909
910	lock_mutex_enter();
911	/ Only used in ibuf pages, so rec_hash is good enough /
912	lock = lock_rec_get_first_on_page_addr(lock_sys.rec_hash,
913	space, page_no);
914	lock_mutex_exit();
915
916	return(lock);
917	}
918
919	/*******************************************************************//**
920	Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
921	pointer in the transaction! This function is used in lock object creation
922	and resetting. /*
923	static
924	void
925	lock_rec_bitmap_reset(
926	/==================/
927	lock_t* lock) /!< in: record lock /
928	{
929	ulint n_bytes;
930
931	ut_ad(lock_get_type_low(lock) == LOCK_REC);
932
933	/ Reset to zero the bitmap which resides immediately after the lock*
934	struct /*
935
936	n_bytes = lock_rec_get_n_bits(lock) / `8`;
937
938	ut_ad((lock_rec_get_n_bits(lock) % `8`) == `0`);
939
940	memset(&lock[`1`], `0`, n_bytes);
941	}
942
943	/*******************************************************************//**
944	Copies a record lock to heap.
945	@return copy of lock /*
946	static
947	lock_t*
948	lock_rec_copy(
949	/==========/
950	const lock_t* lock, /!< in: record lock /
951	mem_heap_t* heap) /!< in: memory heap /
952	{
953	ulint size;
954
955	ut_ad(lock_get_type_low(lock) == LOCK_REC);
956
957	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / `8`;
958
959	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
960	}
961
962	/*******************************************************************//**
963	Gets the previous record lock set on a record.
964	@return previous lock on the same record, NULL if none exists /*
965	const lock_t*
966	lock_rec_get_prev(
967	/==============/
968	const lock_t* in_lock,/!< in: record lock /
969	ulint heap_no)/!< in: heap number of the record /
970	{
971	lock_t* lock;
972	ulint space;
973	ulint page_no;
974	lock_t* found_lock = NULL;
975	hash_table_t* hash;
976
977	ut_ad(lock_mutex_own());
978	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
979
980	space = in_lock->un_member.rec_lock.space;
981	page_no = in_lock->un_member.rec_lock.page_no;
982
983	hash = lock_hash_get(in_lock->type_mode);
984
985	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
986	/ No op /;
987	lock = lock_rec_get_next_on_page(lock)) {
988
989	ut_ad(lock);
990
991	if (lock == in_lock) {
992
993	return(found_lock);
994	}
995
996	if (lock_rec_get_nth_bit(lock, heap_no)) {
997
998	found_lock = lock;
999	}
1000	}
1001	}
1002
1003	/============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================/
1004
1005	/*******************************************************************//**
1006	Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1007	to precise_mode.
1008	@return lock or NULL /*
1009	UNIV_INLINE
1010	lock_t*
1011	lock_rec_has_expl(
1012	/==============/
1013	ulint precise_mode,/!< in: LOCK_S or LOCK_X*
1014	possibly ORed to LOCK_GAP or
1015	LOCK_REC_NOT_GAP, for a
1016	supremum record we regard this
1017	always a gap type request /*
1018	const buf_block_t* block, /!< in: buffer block containing*
1019	the record /*
1020	ulint heap_no,/!< in: heap number of the record /
1021	const trx_t* trx) /!< in: transaction /
1022	{
1023	lock_t* lock;
1024
1025	ut_ad(lock_mutex_own());
1026	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1027	\|\| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1028	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1029
1030	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
1031	lock != NULL;
1032	lock = lock_rec_get_next(heap_no, lock)) {
1033
1034	if (lock->trx == trx
1035	&& !lock_rec_get_insert_intention(lock)
1036	&& lock_mode_stronger_or_eq(
1037	lock_get_mode(lock),
1038	static_cast<lock_mode>(
1039	precise_mode & LOCK_MODE_MASK))
1040	&& !lock_get_wait(lock)
1041	&& (!lock_rec_get_rec_not_gap(lock)
1042	\|\| (precise_mode & LOCK_REC_NOT_GAP)
1043	\|\| heap_no == PAGE_HEAP_NO_SUPREMUM)
1044	&& (!lock_rec_get_gap(lock)
1045	\|\| (precise_mode & LOCK_GAP)
1046	\|\| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1047
1048	return(lock);
1049	}
1050	}
1051
1052	return(NULL);
1053	}
1054
1055	#ifdef UNIV_DEBUG
1056	/*******************************************************************//**
1057	Checks if some other transaction has a lock request in the queue.
1058	@return lock or NULL /*
1059	static
1060	lock_t*
1061	lock_rec_other_has_expl_req(
1062	/========================/
1063	lock_mode mode, /!< in: LOCK_S or LOCK_X /
1064	const buf_block_t* block, /!< in: buffer block containing*
1065	the record /*
1066	bool wait, /!< in: whether also waiting locks*
1067	are taken into account /*
1068	ulint heap_no,/!< in: heap number of the record /
1069	const trx_t* trx) /!< in: transaction, or NULL if*
1070	requests by all transactions
1071	are taken into account /*
1072	{
1073
1074	ut_ad(lock_mutex_own());
1075	ut_ad(mode == LOCK_X \|\| mode == LOCK_S);
1076
1077	/ Only GAP lock can be on SUPREMUM, and we are not looking for*
1078	GAP lock /*
1079	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1080	return(NULL);
1081	}
1082
1083	for (lock_t* lock = lock_rec_get_first(lock_sys.rec_hash,
1084	block, heap_no);
1085	lock != NULL;
1086	lock = lock_rec_get_next(heap_no, lock)) {
1087
1088	if (lock->trx != trx
1089	&& !lock_rec_get_gap(lock)
1090	&& (wait \|\| !lock_get_wait(lock))
1091	&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1092
1093	return(lock);
1094	}
1095	}
1096
1097	return(NULL);
1098	}
1099	#endif /* UNIV_DEBUG */
1100
1101	#ifdef WITH_WSREP
1102	static
1103	void
1104	wsrep_kill_victim(
1105	/==============/
1106	const trx_t * const trx,
1107	const lock_t *lock)
1108	{
1109	ut_ad(lock_mutex_own());
1110	ut_ad(trx_mutex_own(lock->trx));
1111
1112	/ quit for native mysql /
1113	if (!wsrep_on(trx->mysql_thd)) {
1114	return;
1115	}
1116
1117	my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
1118	my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
1119
1120	if ((bf_this && !bf_other) \|\|
1121	(bf_this && bf_other && wsrep_trx_order_before(
1122	trx->mysql_thd, lock->trx->mysql_thd))) {
1123
1124	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1125	if (wsrep_debug) {
1126	ib::info () << "WSREP: BF victim waiting\n";
1127	}
1128	/ cannot release lock, until our lock*
1129	is in the queue/*
1130	} else if (lock->trx != trx) {
1131	if (wsrep_log_conflicts) {
1132	if (bf_this) {
1133	ib::info () << "*** Priority TRANSACTION:";
1134	} else {
1135	ib::info () << "*** Victim TRANSACTION:";
1136	}
1137
1138	trx_print_latched(stderr, trx, `3000`);
1139
1140	if (bf_other) {
1141	ib::info () << "*** Priority TRANSACTION:";
1142	} else {
1143	ib::info () << "*** Victim TRANSACTION:";
1144	}
1145	trx_print_latched(stderr, lock->trx, `3000`);
1146
1147	ib::info () << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
1148
1149	if (lock_get_type(lock) == LOCK_REC) {
1150	lock_rec_print(stderr, lock);
1151	} else {
1152	lock_table_print(stderr, lock);
1153	}
1154
1155	ib::info () << " SQL1: "
1156	<< wsrep_thd_query(trx->mysql_thd);
1157	ib::info () << " SQL2: "
1158	<< wsrep_thd_query(lock->trx->mysql_thd);
1159	}
1160
1161	wsrep_innobase_kill_one_trx(trx->mysql_thd,
1162	trx, lock->trx, TRUE);
1163	}
1164	}
1165	}
1166	#endif /* WITH_WSREP */
1167
1168	/*******************************************************************//**
1169	Checks if some other transaction has a conflicting explicit lock request
1170	in the queue, so that we have to wait.
1171	@return lock or NULL /*
1172	static
1173	lock_t*
1174	lock_rec_other_has_conflicting(
1175	/===========================/
1176	ulint mode, /!< in: LOCK_S or LOCK_X,*
1177	possibly ORed to LOCK_GAP or
1178	LOC_REC_NOT_GAP,
1179	LOCK_INSERT_INTENTION /*
1180	const buf_block_t* block, /!< in: buffer block containing*
1181	the record /*
1182	ulint heap_no,/!< in: heap number of the record /
1183	const trx_t* trx) /!< in: our transaction /
1184	{
1185	lock_t* lock;
1186
1187	ut_ad(lock_mutex_own());
1188
1189	bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
1190
1191	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
1192	lock != NULL;
1193	lock = lock_rec_get_next(heap_no, lock)) {
1194
1195	if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
1196	#ifdef WITH_WSREP
1197	if (wsrep_on_trx(trx)) {
1198	trx_mutex_enter(lock->trx);
1199	/ Below function will roll back either trx*
1200	or lock->trx depending on priority of the
1201	transaction. /*
1202	wsrep_kill_victim(const_cast<trx_t*>(trx), lock);
1203	trx_mutex_exit(lock->trx);
1204	}
1205	#endif /* WITH_WSREP */
1206	return(lock);
1207	}
1208	}
1209
1210	return(NULL);
1211	}
1212
1213	/*******************************************************************//**
1214	Checks if some transaction has an implicit x-lock on a record in a secondary
1215	index.
1216	@return transaction id of the transaction which has the x-lock, or 0;
1217	NOTE that this function can return false positives but never false
1218	negatives. The caller must confirm all positive results by calling
1219	trx_is_active(). /*
1220	static
1221	trx_t*
1222	lock_sec_rec_some_has_impl(
1223	/=======================/
1224	trx_t* caller_trx,/!<in/out: trx of current thread /
1225	const rec_t* rec, /!< in: user record /
1226	dict_index_t* index, /!< in: secondary index /
1227	const ulint* offsets)/!< in: rec_get_offsets(rec, index) /
1228	{
1229	trx_t* trx;
1230	trx_id_t max_trx_id;
1231	const page_t* page = page_align(rec);
1232
1233	ut_ad(!lock_mutex_own());
1234	ut_ad(!dict_index_is_clust(index));
1235	ut_ad(page_rec_is_user_rec(rec));
1236	ut_ad(rec_offs_validate(rec, index, offsets));
1237	ut_ad(!rec_is_default_row(rec, index));
1238
1239	max_trx_id = page_get_max_trx_id(page);
1240
1241	/ Some transaction may have an implicit x-lock on the record only*
1242	if the max trx id for the page >= min trx id for the trx list, or
1243	database recovery is running. We do not write the changes of a page
1244	max trx id to the log, and therefore during recovery, this value
1245	for a page may be incorrect. /*
1246
1247	if (max_trx_id < trx_sys.get_min_trx_id()) {
1248
1249	trx = `0`;
1250
1251	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
1252
1253	/ The page is corrupt: try to avoid a crash by returning 0 /
1254	trx = `0`;
1255
1256	/ In this case it is possible that some transaction has an implicit*
1257	x-lock. We have to look in the clustered index. /*
1258
1259	} else {
1260	trx = row_vers_impl_x_locked(caller_trx, rec, index, offsets);
1261	}
1262
1263	return(trx);
1264	}
1265
1266	/*******************************************************************//**
1267	Return approximate number or record locks (bits set in the bitmap) for
1268	this transaction. Since delete-marked records may be removed, the
1269	record count will not be precise.
1270	The caller must be holding lock_sys.mutex. /*
1271	ulint
1272	lock_number_of_rows_locked(
1273	/=======================/
1274	const trx_lock_t* trx_lock) /!< in: transaction locks /
1275	{
1276	ut_ad(lock_mutex_own());
1277
1278	return(trx_lock->n_rec_locks);
1279	}
1280
1281	/*******************************************************************//**
1282	Return the number of table locks for a transaction.
1283	The caller must be holding lock_sys.mutex. /*
1284	ulint
1285	lock_number_of_tables_locked(
1286	/=========================/
1287	const trx_lock_t* trx_lock) /!< in: transaction locks /
1288	{
1289	const lock_t* lock;
1290	ulint n_tables = `0`;
1291
1292	ut_ad(lock_mutex_own());
1293
1294	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
1295	lock != NULL;
1296	lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
1297
1298	if (lock_get_type_low(lock) == LOCK_TABLE) {
1299	n_tables++;
1300	}
1301	}
1302
1303	return(n_tables);
1304	}
1305
1306	/============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============/
1307
1308	#ifdef WITH_WSREP
1309	static
1310	void
1311	wsrep_print_wait_locks(
1312	/===================/
1313	lock_t* c_lock) / conflicting lock to print /
1314	{
1315	if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
1316	ib::info () << "WSREP: c_lock != wait lock";
1317	ib::info () << " SQL: "
1318	<< wsrep_thd_query(c_lock->trx->mysql_thd);
1319
1320	if (lock_get_type_low(c_lock) & LOCK_TABLE) {
1321	lock_table_print(stderr, c_lock);
1322	} else {
1323	lock_rec_print(stderr, c_lock);
1324	}
1325
1326	if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
1327	lock_table_print(stderr, c_lock->trx->lock.wait_lock);
1328	} else {
1329	lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
1330	}
1331	}
1332	}
1333	#endif /* WITH_WSREP */
1334
1335	/* Create a new record lock and inserts it to the lock queue,*
1336	without checking for deadlocks or conflicts.
1337	@param[in] type_mode lock mode and wait flag; type will be replaced
1338	with LOCK_REC
1339	@param[in] space tablespace id
1340	@param[in] page_no index page number
1341	@param[in] page R-tree index page, or NULL
1342	@param[in] heap_no record heap number in the index page
1343	@param[in] index the index tree
1344	@param[in,out] trx transaction
1345	@param[in] holds_trx_mutex whether the caller holds trx->mutex
1346	@return created lock /*
1347	lock_t*
1348	lock_rec_create_low(
1349	#ifdef WITH_WSREP
1350	lock_t* c_lock, /!< conflicting lock /
1351	que_thr_t* thr, /!< thread owning trx /
1352	#endif
1353	ulint type_mode,
1354	ulint space,
1355	ulint page_no,
1356	const page_t* page,
1357	ulint heap_no,
1358	dict_index_t* index,
1359	trx_t* trx,
1360	bool holds_trx_mutex)
1361	{
1362	lock_t* lock;
1363	ulint n_bits;
1364	ulint n_bytes;
1365
1366	ut_ad(lock_mutex_own());
1367	ut_ad(holds_trx_mutex == trx_mutex_own(trx));
1368	ut_ad(dict_index_is_clust(index) \|\| !dict_index_is_online_ddl(index));
1369
1370	#ifdef UNIV_DEBUG
1371	/ Non-locking autocommit read-only transactions should not set*
1372	any locks. See comment in trx_set_rw_mode explaining why this
1373	conditional check is required in debug code. /*
1374	if (holds_trx_mutex) {
1375	check_trx_state(trx);
1376	}
1377	#endif /* UNIV_DEBUG */
1378
1379	/ If rec is the supremum record, then we reset the gap and*
1380	LOCK_REC_NOT_GAP bits, as all locks on the supremum are
1381	automatically of the gap type /*
1382
1383	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1384	ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1385	type_mode = type_mode & ~(LOCK_GAP \| LOCK_REC_NOT_GAP);
1386	}
1387
1388	if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE \| LOCK_PRDT_PAGE)))) {
1389	/ Make lock bitmap bigger by a safety margin /
1390	n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
1391	n_bytes = `1` + n_bits / `8`;
1392	} else {
1393	ut_ad(heap_no == PRDT_HEAPNO);
1394
1395	/ The lock is always on PAGE_HEAP_NO_INFIMUM (0), so*
1396	we only need 1 bit (which round up to 1 byte) for
1397	lock bit setting /*
1398	n_bytes = `1`;
1399
1400	if (type_mode & LOCK_PREDICATE) {
1401	ulint tmp = UNIV_WORD_SIZE - `1`;
1402
1403	/ We will attach predicate structure after lock.*
1404	Make sure the memory is aligned on 8 bytes,
1405	the mem_heap_alloc will align it with
1406	MEM_SPACE_NEEDED anyway. /*
1407	n_bytes = (n_bytes + sizeof(lock_prdt_t) + tmp) & ~tmp;
1408	ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
1409	}
1410	}
1411
1412	if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
1413	\|\| sizeof *lock + n_bytes > REC_LOCK_SIZE) {
1414	lock = static_cast<lock_t*>(
1415	mem_heap_alloc(trx->lock.lock_heap,
1416	sizeof *lock + n_bytes));
1417	} else {
1418	lock = trx->lock.rec_pool [trx->lock.rec_cached++];
1419	}
1420
1421	lock->trx = trx;
1422	lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) \| LOCK_REC;
1423	lock->index = index;
1424	lock->un_member.rec_lock.space = uint32_t(space);
1425	lock->un_member.rec_lock.page_no = uint32_t(page_no);
1426
1427	if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE \| LOCK_PRDT_PAGE)))) {
1428	lock->un_member.rec_lock.n_bits = uint32_t(n_bytes * `8`);
1429	} else {
1430	/ Predicate lock always on INFIMUM (0) /
1431	lock->un_member.rec_lock.n_bits = `8`;
1432	}
1433	lock_rec_bitmap_reset(lock);
1434	lock_rec_set_nth_bit(lock, heap_no);
1435	index->table->n_rec_locks++;
1436	ut_ad(index->table->n_ref_count > `0` \|\| !index->table->can_be_evicted);
1437
1438	#ifdef WITH_WSREP
1439	if (c_lock && wsrep_on_trx(trx)
1440	&& wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
1441	lock_t hash = (lock_t )c_lock->hash;
1442	lock_t *prev = NULL;
1443
1444	while (hash && wsrep_thd_is_BF(hash->trx->mysql_thd, TRUE)
1445	&& wsrep_trx_order_before(hash->trx->mysql_thd,
1446	trx->mysql_thd)) {
1447	prev = hash;
1448	hash = (lock_t *)hash->hash;
1449	}
1450	lock->hash = hash;
1451	if (prev) {
1452	prev->hash = lock;
1453	} else {
1454	c_lock->hash = lock;
1455	}
1456	/*
1457	* delayed conflict resolution '...kill_one_trx' was not called,
1458	* if victim was waiting for some other lock
1459	*/
1460	trx_mutex_enter(c_lock->trx);
1461	if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1462
1463	c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
1464
1465	if (wsrep_debug) {
1466	wsrep_print_wait_locks(c_lock);
1467	}
1468
1469	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1470	lock_set_lock_and_trx_wait(lock, trx);
1471	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
1472
1473	trx->lock.wait_thr = thr;
1474	thr->state = QUE_THR_LOCK_WAIT;
1475
1476	/ have to release trx mutex for the duration of*
1477	victim lock release. This will eventually call
1478	lock_grant, which wants to grant trx mutex again
1479	*/
1480	if (holds_trx_mutex) {
1481	trx_mutex_exit(trx);
1482	}
1483	lock_cancel_waiting_and_release(
1484	c_lock->trx->lock.wait_lock);
1485
1486	if (holds_trx_mutex) {
1487	trx_mutex_enter(trx);
1488	}
1489
1490	trx_mutex_exit(c_lock->trx);
1491
1492	if (wsrep_debug) {
1493	ib::info () << "WSREP: c_lock canceled "
1494	<< ib::hex (c_lock->trx->id)
1495	<< " SQL: "
1496	<< wsrep_thd_query(
1497	c_lock->trx->mysql_thd);
1498	}
1499
1500	/ have to bail out here to avoid lock_set_lock... /
1501	return(lock);
1502	}
1503	trx_mutex_exit(c_lock->trx);
1504	} else
1505	#endif /* WITH_WSREP */
1506	if (!(type_mode & (LOCK_WAIT \| LOCK_PREDICATE \| LOCK_PRDT_PAGE))
1507	&& innodb_lock_schedule_algorithm
1508	== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
1509	&& !thd_is_replication_slave_thread(trx->mysql_thd)) {
1510	HASH_PREPEND(lock_t, hash, lock_sys.rec_hash,
1511	lock_rec_fold(space, page_no), lock);
1512	} else {
1513	HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
1514	lock_rec_fold(space, page_no), lock);
1515	}
1516
1517	if (!holds_trx_mutex) {
1518	trx_mutex_enter(trx);
1519	}
1520	ut_ad(trx_mutex_own(trx));
1521	if (type_mode & LOCK_WAIT) {
1522	lock_set_lock_and_trx_wait(lock, trx);
1523	}
1524	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
1525	if (!holds_trx_mutex) {
1526	trx_mutex_exit(trx);
1527	}
1528	MONITOR_INC(MONITOR_RECLOCK_CREATED);
1529	MONITOR_INC(MONITOR_NUM_RECLOCK);
1530
1531	return lock;
1532	}
1533
1534	/*******************************************************************//**
1535	Check if lock1 has higher priority than lock2.
1536	NULL has lowest priority.
1537	If neither of them is wait lock, the first one has higher priority.
1538	If only one of them is a wait lock, it has lower priority.
1539	If either is a high priority transaction, the lock has higher priority.
1540	Otherwise, the one with an older transaction has higher priority.
1541	@returns true if lock1 has higher priority, false otherwise. /*
1542	static
1543	bool
1544	has_higher_priority(
1545	lock_t *lock1,
1546	lock_t *lock2)
1547	{
1548	if (lock1 == NULL) {
1549	return false;
1550	} else if (lock2 == NULL) {
1551	return true;
1552	}
1553	// Granted locks has higher priority.
1554	if (!lock_get_wait(lock1)) {
1555	return true;
1556	} else if (!lock_get_wait(lock2)) {
1557	return false;
1558	}
1559	return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
1560	}
1561
1562	/*******************************************************************//**
1563	Insert a lock to the hash list according to the mode (whether it is a wait
1564	lock) and the age of the transaction the it is associated with.
1565	If the lock is not a wait lock, insert it to the head of the hash list.
1566	Otherwise, insert it to the middle of the wait locks according to the age of
1567	the transaciton. /*
1568	static
1569	dberr_t
1570	lock_rec_insert_by_trx_age(
1571	lock_t in_lock) /!< in: lock to be insert /*{
1572	ulint space;
1573	ulint page_no;
1574	ulint rec_fold;
1575	lock_t* node;
1576	lock_t* next;
1577	hash_table_t* hash;
1578	hash_cell_t* cell;
1579
1580	space = in_lock->un_member.rec_lock.space;
1581	page_no = in_lock->un_member.rec_lock.page_no;
1582	rec_fold = lock_rec_fold(space, page_no);
1583	hash = lock_hash_get(in_lock->type_mode);
1584	cell = hash_get_nth_cell(hash,
1585	hash_calc_hash(rec_fold, hash));
1586
1587	node = (lock_t *) cell->node;
1588	// If in_lock is not a wait lock, we insert it to the head of the list.
1589	if (node == NULL \|\| !lock_get_wait(in_lock) \|\| has_higher_priority(in_lock, node)) {
1590	cell->node = in_lock;
1591	in_lock->hash = node;
1592	if (lock_get_wait(in_lock)) {
1593	lock_grant_have_trx_mutex(in_lock);
1594	return DB_SUCCESS_LOCKED_REC;
1595	}
1596	return DB_SUCCESS;
1597	}
1598	while (node != NULL && has_higher_priority((lock_t *) node->hash,
1599	in_lock)) {
1600	node = (lock_t *) node->hash;
1601	}
1602	next = (lock_t *) node->hash;
1603	node->hash = in_lock;
1604	in_lock->hash = next;
1605
1606	if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) {
1607	lock_grant_have_trx_mutex(in_lock);
1608	if (cell->node != in_lock) {
1609	// Move it to the front of the queue
1610	node->hash = in_lock->hash;
1611	next = (lock_t *) cell->node;
1612	cell->node = in_lock;
1613	in_lock->hash = next;
1614	}
1615	return DB_SUCCESS_LOCKED_REC;
1616	}
1617
1618	return DB_SUCCESS;
1619	}
1620
1621	#ifdef UNIV_DEBUG
1622	static
1623	bool
1624	lock_queue_validate(
1625	const lock_t in_lock) /!< in: lock whose hash list is to be validated /*
1626	{
1627	ulint space;
1628	ulint page_no;
1629	ulint rec_fold;
1630	hash_table_t* hash;
1631	hash_cell_t* cell;
1632	lock_t* next;
1633	bool wait_lock __attribute__((unused))= false;
1634
1635	if (in_lock == NULL) {
1636	return true;
1637	}
1638
1639	space = in_lock->un_member.rec_lock.space;
1640	page_no = in_lock->un_member.rec_lock.page_no;
1641	rec_fold = lock_rec_fold(space, page_no);
1642	hash = lock_hash_get(in_lock->type_mode);
1643	cell = hash_get_nth_cell(hash,
1644	hash_calc_hash(rec_fold, hash));
1645	next = (lock_t *) cell->node;
1646	while (next != NULL) {
1647	// If this is a granted lock, check that there's no wait lock before it.
1648	if (!lock_get_wait(next)) {
1649	ut_ad(!wait_lock);
1650	} else {
1651	wait_lock = true;
1652	}
1653	next = next->hash;
1654	}
1655	return true;
1656	}
1657	#endif /* UNIV_DEBUG */
1658
1659	static
1660	void
1661	lock_rec_insert_to_head(
1662	lock_t in_lock, /!< in: lock to be insert /*
1663	ulint rec_fold) /!< in: rec_fold of the page /
1664	{
1665	hash_table_t* hash;
1666	hash_cell_t* cell;
1667	lock_t* node;
1668
1669	if (in_lock == NULL) {
1670	return;
1671	}
1672
1673	hash = lock_hash_get(in_lock->type_mode);
1674	cell = hash_get_nth_cell(hash,
1675	hash_calc_hash(rec_fold, hash));
1676	node = (lock_t *) cell->node;
1677	if (node != in_lock) {
1678	cell->node = in_lock;
1679	in_lock->hash = node;
1680	}
1681	}
1682
1683	/* Enqueue a waiting request for a lock which cannot be granted immediately.*
1684	Check for deadlocks.
1685	@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
1686	possibly ORed with LOCK_GAP or
1687	LOCK_REC_NOT_GAP, ORed with
1688	LOCK_INSERT_INTENTION if this
1689	waiting lock request is set
1690	when performing an insert of
1691	an index record
1692	@param[in] block leaf page in the index
1693	@param[in] heap_no record heap number in the block
1694	@param[in] index index tree
1695	@param[in,out] thr query thread
1696	@param[in] prdt minimum bounding box (spatial index)
1697	@retval DB_LOCK_WAIT if the waiting lock was enqueued
1698	@retval DB_DEADLOCK if this transaction was chosen as the victim
1699	@retval DB_SUCCESS_LOCKED_REC if the other transaction was chosen as a victim
1700	(or it happened to commit) /*
1701	dberr_t
1702	lock_rec_enqueue_waiting(
1703	#ifdef WITH_WSREP
1704	lock_t* c_lock, /!< conflicting lock /
1705	#endif
1706	ulint type_mode,
1707	const buf_block_t* block,
1708	ulint heap_no,
1709	dict_index_t* index,
1710	que_thr_t* thr,
1711	lock_prdt_t* prdt)
1712	{
1713	ut_ad(lock_mutex_own());
1714	ut_ad(!srv_read_only_mode);
1715	ut_ad(dict_index_is_clust(index) \|\| !dict_index_is_online_ddl(index));
1716
1717	trx_t* trx = thr_get_trx(thr);
1718
1719	ut_ad(trx_mutex_own(trx));
1720	ut_a(!que_thr_stop(thr));
1721
1722	switch (trx_get_dict_operation(trx)) {
1723	case TRX_DICT_OP_NONE:
1724	break;
1725	case TRX_DICT_OP_TABLE:
1726	case TRX_DICT_OP_INDEX:
1727	ib::error () << "A record lock wait happens in a dictionary"
1728	" operation. index "
1729	<< index->name
1730	<< " of table "
1731	<< index->table->name
1732	<< ". " << BUG_REPORT_MSG;
1733	ut_ad(`0`);
1734	}
1735
1736	if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == `0`) {
1737	trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1738	return DB_LOCK_WAIT_TIMEOUT;
1739	}
1740
1741	/ Enqueue the lock request that will wait to be granted, note that*
1742	we already own the trx mutex. /*
1743	lock_t* lock = lock_rec_create(
1744	#ifdef WITH_WSREP
1745	c_lock, thr,
1746	#endif
1747	type_mode \| LOCK_WAIT, block, heap_no, index, trx, TRUE);
1748
1749	if (prdt && type_mode & LOCK_PREDICATE) {
1750	lock_prdt_set_prdt(lock, prdt);
1751	}
1752
1753	if (const trx_t* victim =
1754	DeadlockChecker::check_and_resolve(lock, trx)) {
1755	ut_ad(victim == trx);
1756	lock_reset_lock_and_trx_wait(lock);
1757	lock_rec_reset_nth_bit(lock, heap_no);
1758	return DB_DEADLOCK;
1759	}
1760
1761	if (!trx->lock.wait_lock) {
1762	/ If there was a deadlock but we chose another*
1763	transaction as a victim, it is possible that we
1764	already have the lock now granted! /*
1765	#ifdef WITH_WSREP
1766	if (wsrep_debug) {
1767	ib::info () << "WSREP: BF thread got lock granted early, ID " << ib::hex (trx->id)
1768	<< " query: " << wsrep_thd_query(trx->mysql_thd);
1769	}
1770	#endif
1771	return DB_SUCCESS_LOCKED_REC;
1772	}
1773
1774	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1775
1776	trx->lock.was_chosen_as_deadlock_victim = false;
1777	trx->lock.wait_started = ut_time();
1778
1779	ut_a(que_thr_stop(thr));
1780
1781	DBUG_LOG("ib_lock", "trx " << ib::hex(trx->id)
1782	<< " waits for lock in index " << index->name
1783	<< " of table " << index->table->name);
1784
1785	MONITOR_INC(MONITOR_LOCKREC_WAIT);
1786
1787	if (innodb_lock_schedule_algorithm
1788	== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
1789	&& !prdt
1790	&& !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
1791	HASH_DELETE(lock_t, hash, lock_sys.rec_hash,
1792	lock_rec_lock_fold(lock), lock);
1793	dberr_t res = lock_rec_insert_by_trx_age(lock);
1794	if (res != DB_SUCCESS) {
1795	return res;
1796	}
1797	}
1798
1799	return DB_LOCK_WAIT;
1800	}
1801
1802	/*******************************************************************//**
1803	Adds a record lock request in the record queue. The request is normally
1804	added as the last in the queue, but if there are no waiting lock requests
1805	on the record, and the request to be added is not a waiting request, we
1806	can reuse a suitable record lock object already existing on the same page,
1807	just setting the appropriate bit in its bitmap. This is a low-level function
1808	which does NOT check for deadlocks or lock compatibility!
1809	@return lock where the bit was set /*
1810	static
1811	void
1812	lock_rec_add_to_queue(
1813	/==================/
1814	ulint type_mode,/!< in: lock mode, wait, gap*
1815	etc. flags; type is ignored
1816	and replaced by LOCK_REC /*
1817	const buf_block_t* block, /!< in: buffer block containing*
1818	the record /*
1819	ulint heap_no,/!< in: heap number of the record /
1820	dict_index_t* index, /!< in: index of record /
1821	trx_t* trx, /!< in/out: transaction /
1822	bool caller_owns_trx_mutex)
1823	/!< in: TRUE if caller owns the*
1824	transaction mutex /*
1825	{
1826	#ifdef UNIV_DEBUG
1827	ut_ad(lock_mutex_own());
1828	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
1829	ut_ad(dict_index_is_clust(index)
1830	\|\| dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1831	switch (type_mode & LOCK_MODE_MASK) {
1832	case LOCK_X:
1833	case LOCK_S:
1834	break;
1835	default:
1836	ut_error;
1837	}
1838
1839	if (!(type_mode & (LOCK_WAIT \| LOCK_GAP))) {
1840	lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
1841	? LOCK_X
1842	: LOCK_S;
1843	const lock_t* other_lock
1844	= lock_rec_other_has_expl_req(
1845	mode, block, false, heap_no, trx);
1846	#ifdef WITH_WSREP
1847	//ut_a(!other_lock \|\| (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
1848	// wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
1849	if (other_lock &&
1850	wsrep_on(trx->mysql_thd) &&
1851	!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
1852	!wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
1853
1854	ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
1855	((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
1856	wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
1857	wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " <<
1858	wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
1859	wsrep_thd_query(trx->mysql_thd);
1860	trx_t* otrx = other_lock->trx;
1861	ib::info() << "WSREP other lock:\n BF:" <<
1862	((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
1863	wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
1864	wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
1865	wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
1866	wsrep_thd_query(otrx->mysql_thd);
1867	}
1868	#else
1869	ut_a(!other_lock);
1870	#endif /* WITH_WSREP */
1871	}
1872	#endif /* UNIV_DEBUG */
1873
1874	type_mode \|= LOCK_REC;
1875
1876	/ If rec is the supremum record, then we can reset the gap bit, as*
1877	all locks on the supremum are automatically of the gap type, and we
1878	try to avoid unnecessary memory consumption of a new record lock
1879	struct for a gap type lock /*
1880
1881	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1882	ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1883
1884	/ There should never be LOCK_REC_NOT_GAP on a supremum*
1885	record, but let us play safe /*
1886
1887	type_mode &= ~(LOCK_GAP \| LOCK_REC_NOT_GAP);
1888	}
1889
1890	lock_t* lock;
1891	lock_t* first_lock;
1892	hash_table_t* hash = lock_hash_get(type_mode);
1893
1894	/ Look for a waiting lock request on the same record or on a gap /
1895
1896	for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
1897	lock != NULL;
1898	lock = lock_rec_get_next_on_page(lock)) {
1899
1900	if (lock_get_wait(lock)
1901	&& lock_rec_get_nth_bit(lock, heap_no)) {
1902
1903	break;
1904	}
1905	}
1906
1907	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
1908
1909	/ Look for a similar record lock on the same page:*
1910	if one is found and there are no waiting lock requests,
1911	we can just set the bit /*
1912
1913	lock = lock_rec_find_similar_on_page(
1914	type_mode, heap_no, first_lock, trx);
1915
1916	if (lock != NULL) {
1917
1918	lock_rec_set_nth_bit(lock, heap_no);
1919
1920	return;
1921	}
1922	}
1923
1924	lock_rec_create(
1925	#ifdef WITH_WSREP
1926	NULL, NULL,
1927	#endif
1928	type_mode, block, heap_no, index, trx, caller_owns_trx_mutex);
1929	}
1930
1931	/*******************************************************************//**
1932	Tries to lock the specified record in the mode requested. If not immediately
1933	possible, enqueues a waiting lock request. This is a low-level function
1934	which does NOT look at implicit locks! Checks lock compatibility within
1935	explicit locks. This function sets a normal next-key lock, or in the case
1936	of a page supremum record, a gap type lock.
1937	@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK /*
1938	static
1939	dberr_t
1940	lock_rec_lock(
1941	/==========/
1942	bool impl, /!< in: if true, no lock is set*
1943	if no wait is necessary: we
1944	assume that the caller will
1945	set an implicit lock /*
1946	ulint mode, /!< in: lock mode: LOCK_X or*
1947	LOCK_S possibly ORed to either
1948	LOCK_GAP or LOCK_REC_NOT_GAP /*
1949	const buf_block_t* block, /!< in: buffer block containing*
1950	the record /*
1951	ulint heap_no,/!< in: heap number of record /
1952	dict_index_t* index, /!< in: index of record /
1953	que_thr_t* thr) /!< in: query thread /
1954	{
1955	trx_t *trx= thr_get_trx(thr);
1956	dberr_t err= DB_SUCCESS;
1957
1958	ut_ad(!srv_read_only_mode);
1959	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S \|\|
1960	(LOCK_MODE_MASK & mode) == LOCK_X);
1961	ut_ad((mode & LOCK_TYPE_MASK) == LOCK_GAP \|\|
1962	(mode & LOCK_TYPE_MASK) == LOCK_REC_NOT_GAP \|\|
1963	(mode & LOCK_TYPE_MASK) == `0`);
1964	ut_ad(dict_index_is_clust(index) \|\| !dict_index_is_online_ddl(index));
1965	DBUG_EXECUTE_IF("innodb_report_deadlock", return DB_DEADLOCK;);
1966
1967	lock_mutex_enter();
1968	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S \|\|
1969	lock_table_has(trx, index->table, LOCK_IS));
1970	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X \|\|
1971	lock_table_has(trx, index->table, LOCK_IX));
1972
1973	if (lock_t *lock= lock_rec_get_first_on_page(lock_sys.rec_hash, block))
1974	{
1975	trx_mutex_enter(trx);
1976	if (lock_rec_get_next_on_page(lock) \|\|
1977	lock->trx != trx \|\|
1978	lock->type_mode != (ulint(mode) \| LOCK_REC) \|\|
1979	lock_rec_get_n_bits(lock) <= heap_no)
1980	{
1981	/ Do nothing if the trx already has a strong enough lock on rec /
1982	if (!lock_rec_has_expl(mode, block, heap_no, trx))
1983	{
1984	if (
1985	#ifdef WITH_WSREP
1986	lock_t *c_lock=
1987	#endif
1988	lock_rec_other_has_conflicting(mode, block, heap_no, trx))
1989	{
1990	/*
1991	If another transaction has a non-gap conflicting
1992	request in the queue, as this transaction does not
1993	have a lock strong enough already granted on the
1994	record, we have to wait. /*
1995	err = lock_rec_enqueue_waiting(
1996	#ifdef WITH_WSREP
1997	c_lock,
1998	#endif /* WITH_WSREP */
1999	mode, block, heap_no, index, thr, NULL);
2000	}
2001	else if (!impl)
2002	{
2003	/ Set the requested lock on the record. /
2004	lock_rec_add_to_queue(LOCK_REC \| mode, block, heap_no, index, trx,
2005	true);
2006	err= DB_SUCCESS_LOCKED_REC;
2007	}
2008	}
2009	}
2010	else if (!impl)
2011	{
2012	/*
2013	If the nth bit of the record lock is already set then we do not set
2014	a new lock bit, otherwise we do set
2015	*/
2016	if (!lock_rec_get_nth_bit(lock, heap_no))
2017	{
2018	lock_rec_set_nth_bit(lock, heap_no);
2019	err= DB_SUCCESS_LOCKED_REC;
2020	}
2021	}
2022	trx_mutex_exit(trx);
2023	}
2024	else
2025	{
2026	/*
2027	Simplified and faster path for the most common cases
2028	Note that we don't own the trx mutex.
2029	*/
2030	if (!impl)
2031	lock_rec_create(
2032	#ifdef WITH_WSREP
2033	NULL, NULL,
2034	#endif
2035	mode, block, heap_no, index, trx, false);
2036
2037	err= DB_SUCCESS_LOCKED_REC;
2038	}
2039	lock_mutex_exit();
2040	MONITOR_ATOMIC_INC(MONITOR_NUM_RECLOCK_REQ);
2041	return err;
2042	}
2043
2044	/*******************************************************************//**
2045	Checks if a waiting record lock request still has to wait in a queue.
2046	@return lock that is causing the wait /*
2047	static
2048	const lock_t*
2049	lock_rec_has_to_wait_in_queue(
2050	/==========================/
2051	const lock_t* wait_lock) /!< in: waiting record lock /
2052	{
2053	const lock_t* lock;
2054	ulint space;
2055	ulint page_no;
2056	ulint heap_no;
2057	ulint bit_mask;
2058	ulint bit_offset;
2059	hash_table_t* hash;
2060
2061	ut_ad(lock_mutex_own());
2062	ut_ad(lock_get_wait(wait_lock));
2063	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2064
2065	space = wait_lock->un_member.rec_lock.space;
2066	page_no = wait_lock->un_member.rec_lock.page_no;
2067	heap_no = lock_rec_find_set_bit(wait_lock);
2068
2069	bit_offset = heap_no / `8`;
2070	bit_mask = static_cast<ulint>(`1`) << (heap_no % `8`);
2071
2072	hash = lock_hash_get(wait_lock->type_mode);
2073
2074	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
2075	lock != wait_lock;
2076	lock = lock_rec_get_next_on_page_const(lock)) {
2077
2078	const byte* p = (const byte*) &lock[`1`];
2079
2080	if (heap_no < lock_rec_get_n_bits(lock)
2081	&& (p[bit_offset] & bit_mask)
2082	&& lock_has_to_wait(wait_lock, lock)) {
2083	#ifdef WITH_WSREP
2084	if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
2085	wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
2086	if (wsrep_debug) {
2087	ib::info () << "WSREP: waiting BF trx: " << ib::hex (wait_lock->trx->id)
2088	<< " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd);
2089	lock_rec_print(stderr, wait_lock);
2090	ib::info () << "WSREP: do not wait another BF trx: " << ib::hex (lock->trx->id)
2091	<< " query: " << wsrep_thd_query(lock->trx->mysql_thd);
2092	lock_rec_print(stderr, lock);
2093	}
2094	/ don't wait for another BF lock /
2095	continue;
2096	}
2097	#endif /* WITH_WSREP */
2098
2099	return(lock);
2100	}
2101	}
2102
2103	return(NULL);
2104	}
2105
2106	/* Grant a lock to a waiting lock request and release the waiting transaction*
2107	after lock_reset_lock_and_trx_wait() has been called. /*
2108	static void lock_grant_after_reset(lock_t* lock)
2109	{
2110	ut_ad(lock_mutex_own());
2111	ut_ad(trx_mutex_own(lock->trx));
2112
2113	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2114	dict_table_t* table = lock->un_member.tab_lock.table;
2115
2116	if (table->autoinc_trx == lock->trx) {
2117	ib::error () << "Transaction already had an"
2118	<< " AUTO-INC lock!";
2119	} else {
2120	table->autoinc_trx = lock->trx;
2121
2122	ib_vector_push(lock->trx->autoinc_locks, &lock);
2123	}
2124	}
2125
2126	DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
2127	trx_get_id_for_print(lock->trx)));
2128
2129	/ If we are resolving a deadlock by choosing another transaction*
2130	as a victim, then our original transaction may not be in the
2131	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2132	for it /*
2133
2134	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2135	que_thr_t* thr;
2136
2137	thr = que_thr_end_lock_wait(lock->trx);
2138
2139	if (thr != NULL) {
2140	lock_wait_release_thread_if_suspended(thr);
2141	}
2142	}
2143	}
2144
2145	/* Grant a lock to a waiting lock request and release the waiting transaction. /
2146	static void lock_grant(lock_t* lock)
2147	{
2148	lock_reset_lock_and_trx_wait(lock);
2149	trx_mutex_enter(lock->trx);
2150	lock_grant_after_reset(lock);
2151	trx_mutex_exit(lock->trx);
2152	}
2153
2154	/***********************************************************//**
2155	Cancels a waiting record lock request and releases the waiting transaction
2156	that requested it. NOTE: does NOT check if waiting lock requests behind this
2157	one can now be granted! /*
2158	static
2159	void
2160	lock_rec_cancel(
2161	/============/
2162	lock_t* lock) /!< in: waiting record lock request /
2163	{
2164	que_thr_t* thr;
2165
2166	ut_ad(lock_mutex_own());
2167	ut_ad(lock_get_type_low(lock) == LOCK_REC);
2168
2169	/ Reset the bit (there can be only one set bit) in the lock bitmap /
2170	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2171
2172	/ Reset the wait flag and the back pointer to lock in trx /
2173
2174	lock_reset_lock_and_trx_wait(lock);
2175
2176	/ The following function releases the trx from lock wait /
2177
2178	trx_mutex_enter(lock->trx);
2179
2180	thr = que_thr_end_lock_wait(lock->trx);
2181
2182	if (thr != NULL) {
2183	lock_wait_release_thread_if_suspended(thr);
2184	}
2185
2186	trx_mutex_exit(lock->trx);
2187	}
2188
2189	static
2190	void
2191	lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no)
2192	{
2193	lock_t* lock;
2194	lock_t* previous = static_cast<lock_t*>(
2195	hash_get_nth_cell(lock_sys.rec_hash,
2196	hash_calc_hash(rec_fold, lock_sys.rec_hash))
2197	->node);
2198	if (previous == NULL) {
2199	return;
2200	}
2201	if (previous->un_member.rec_lock.space == space &&
2202	previous->un_member.rec_lock.page_no == page_no) {
2203	lock = previous;
2204	}
2205	else {
2206	while (previous->hash &&
2207	(previous->hash->un_member.rec_lock.space != space \|\|
2208	previous->hash->un_member.rec_lock.page_no != page_no)) {
2209	previous = previous->hash;
2210	}
2211	lock = previous->hash;
2212	}
2213
2214	ut_ad(previous->hash == lock \|\| previous == lock);
2215	/ Grant locks if there are no conflicting locks ahead.*
2216	Move granted locks to the head of the list. /*
2217	while (lock) {
2218	/ If the lock is a wait lock on this page, and it does not need to wait. /
2219	if (lock_get_wait(lock)
2220	&& lock->un_member.rec_lock.space == space
2221	&& lock->un_member.rec_lock.page_no == page_no
2222	&& !lock_rec_has_to_wait_in_queue(lock)) {
2223	lock_grant(lock);
2224
2225	if (previous != NULL) {
2226	/ Move the lock to the head of the list. /
2227	HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
2228	lock_rec_insert_to_head(lock, rec_fold);
2229	} else {
2230	/ Already at the head of the list. /
2231	previous = lock;
2232	}
2233	/ Move on to the next lock. /
2234	lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
2235	} else {
2236	previous = lock;
2237	lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
2238	}
2239	}
2240	}
2241
2242	/* Remove a record lock request, waiting or granted, from the queue and*
2243	grant locks to other transactions in the queue if they now are entitled
2244	to a lock. NOTE: all record locks contained in in_lock are removed.
2245	@param[in,out] in_lock record lock /*
2246	static void lock_rec_dequeue_from_page(lock_t* in_lock)
2247	{
2248	ulint space;
2249	ulint page_no;
2250	hash_table_t* lock_hash;
2251
2252	ut_ad(lock_mutex_own());
2253	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2254	/ We may or may not be holding in_lock->trx->mutex here. /
2255
2256	space = in_lock->un_member.rec_lock.space;
2257	page_no = in_lock->un_member.rec_lock.page_no;
2258
2259	in_lock->index->table->n_rec_locks--;
2260
2261	lock_hash = lock_hash_get(in_lock->type_mode);
2262
2263	ulint rec_fold = lock_rec_fold(space, page_no);
2264
2265	HASH_DELETE(lock_t, hash, lock_hash, rec_fold, in_lock);
2266	UT_LIST_REMOVE(in_lock->trx->lock.trx_locks, in_lock);
2267
2268	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2269	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2270
2271	if (innodb_lock_schedule_algorithm
2272	== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS
2273	\|\| lock_hash != lock_sys.rec_hash
2274	\|\| thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
2275	/ Check if waiting locks in the queue can now be granted:*
2276	grant locks if there are no conflicting locks ahead. Stop at
2277	the first X lock that is waiting or has been granted. /*
2278
2279	for (lock_t* lock = lock_rec_get_first_on_page_addr(
2280	lock_hash, space, page_no);
2281	lock != NULL;
2282	lock = lock_rec_get_next_on_page(lock)) {
2283
2284	if (lock_get_wait(lock)
2285	&& !lock_rec_has_to_wait_in_queue(lock)) {
2286	/ Grant the lock /
2287	ut_ad(lock->trx != in_lock->trx);
2288	lock_grant(lock);
2289	}
2290	}
2291	} else {
2292	lock_grant_and_move_on_page(rec_fold, space, page_no);
2293	}
2294	}
2295
2296	/***********************************************************//**
2297	Removes a record lock request, waiting or granted, from the queue. /*
2298	void
2299	lock_rec_discard(
2300	/=============/
2301	lock_t* in_lock) /!< in: record lock object: all*
2302	record locks which are contained
2303	in this lock object are removed /*
2304	{
2305	ulint space;
2306	ulint page_no;
2307	trx_lock_t* trx_lock;
2308
2309	ut_ad(lock_mutex_own());
2310	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2311
2312	trx_lock = &in_lock->trx->lock;
2313
2314	space = in_lock->un_member.rec_lock.space;
2315	page_no = in_lock->un_member.rec_lock.page_no;
2316
2317	in_lock->index->table->n_rec_locks--;
2318
2319	HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2320	lock_rec_fold(space, page_no), in_lock);
2321
2322	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2323
2324	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2325	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2326	}
2327
2328	/***********************************************************//**
2329	Removes record lock objects set on an index page which is discarded. This
2330	function does not move locks, or check for waiting locks, therefore the
2331	lock bitmaps must already be reset when this function is called. /*
2332	static
2333	void
2334	lock_rec_free_all_from_discard_page_low(
2335	/====================================/
2336	ulint space,
2337	ulint page_no,
2338	hash_table_t* lock_hash)
2339	{
2340	lock_t* lock;
2341	lock_t* next_lock;
2342
2343	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2344
2345	while (lock != NULL) {
2346	ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2347	ut_ad(!lock_get_wait(lock));
2348
2349	next_lock = lock_rec_get_next_on_page(lock);
2350
2351	lock_rec_discard(lock);
2352
2353	lock = next_lock;
2354	}
2355	}
2356
2357	/***********************************************************//**
2358	Removes record lock objects set on an index page which is discarded. This
2359	function does not move locks, or check for waiting locks, therefore the
2360	lock bitmaps must already be reset when this function is called. /*
2361	void
2362	lock_rec_free_all_from_discard_page(
2363	/================================/
2364	const buf_block_t* block) /!< in: page to be discarded /
2365	{
2366	ulint space;
2367	ulint page_no;
2368
2369	ut_ad(lock_mutex_own());
2370
2371	space = block->page.id.space();
2372	page_no = block->page.id.page_no();
2373
2374	lock_rec_free_all_from_discard_page_low(
2375	space, page_no, lock_sys.rec_hash);
2376	lock_rec_free_all_from_discard_page_low(
2377	space, page_no, lock_sys.prdt_hash);
2378	lock_rec_free_all_from_discard_page_low(
2379	space, page_no, lock_sys.prdt_page_hash);
2380	}
2381
2382	/============= RECORD LOCK MOVING AND INHERITING ===================/
2383
2384	/***********************************************************//**
2385	Resets the lock bits for a single record. Releases transactions waiting for
2386	lock requests here. /*
2387	static
2388	void
2389	lock_rec_reset_and_release_wait_low(
2390	/================================/
2391	hash_table_t* hash, /!< in: hash table /
2392	const buf_block_t* block, /!< in: buffer block containing*
2393	the record /*
2394	ulint heap_no)/!< in: heap number of record /
2395	{
2396	lock_t* lock;
2397
2398	ut_ad(lock_mutex_own());
2399
2400	for (lock = lock_rec_get_first(hash, block, heap_no);
2401	lock != NULL;
2402	lock = lock_rec_get_next(heap_no, lock)) {
2403
2404	if (lock_get_wait(lock)) {
2405	lock_rec_cancel(lock);
2406	} else {
2407	lock_rec_reset_nth_bit(lock, heap_no);
2408	}
2409	}
2410	}
2411
2412	/***********************************************************//**
2413	Resets the lock bits for a single record. Releases transactions waiting for
2414	lock requests here. /*
2415	static
2416	void
2417	lock_rec_reset_and_release_wait(
2418	/============================/
2419	const buf_block_t* block, /!< in: buffer block containing*
2420	the record /*
2421	ulint heap_no)/!< in: heap number of record /
2422	{
2423	lock_rec_reset_and_release_wait_low(
2424	lock_sys.rec_hash, block, heap_no);
2425
2426	lock_rec_reset_and_release_wait_low(
2427	lock_sys.prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
2428	lock_rec_reset_and_release_wait_low(
2429	lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
2430	}
2431
2432	/***********************************************************//**
2433	Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2434	of another record as gap type locks, but does not reset the lock bits of
2435	the other record. Also waiting lock requests on rec are inherited as
2436	GRANTED gap locks. /*
2437	static
2438	void
2439	lock_rec_inherit_to_gap(
2440	/====================/
2441	const buf_block_t* heir_block, /!< in: block containing the*
2442	record which inherits /*
2443	const buf_block_t* block, /!< in: block containing the*
2444	record from which inherited;
2445	does NOT reset the locks on
2446	this record /*
2447	ulint heir_heap_no, /!< in: heap_no of the*
2448	inheriting record /*
2449	ulint heap_no) /!< in: heap_no of the*
2450	donating record /*
2451	{
2452	lock_t* lock;
2453
2454	ut_ad(lock_mutex_own());
2455
2456	/ If srv_locks_unsafe_for_binlog is TRUE or session is using*
2457	READ COMMITTED isolation level, we do not want locks set
2458	by an UPDATE or a DELETE to be inherited as gap type locks. But we
2459	DO want S-locks/X-locks(taken for replace) set by a consistency
2460	constraint to be inherited also then. /*
2461
2462	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
2463	lock != NULL;
2464	lock = lock_rec_get_next(heap_no, lock)) {
2465
2466	if (!lock_rec_get_insert_intention(lock)
2467	&& !((srv_locks_unsafe_for_binlog
2468	\|\| lock->trx->isolation_level
2469	<= TRX_ISO_READ_COMMITTED)
2470	&& lock_get_mode(lock) ==
2471	(lock->trx->duplicates ? LOCK_S : LOCK_X))) {
2472	lock_rec_add_to_queue(
2473	LOCK_REC \| LOCK_GAP
2474	\| ulint(lock_get_mode(lock)),
2475	heir_block, heir_heap_no, lock->index,
2476	lock->trx, FALSE);
2477	}
2478	}
2479	}
2480
2481	/***********************************************************//**
2482	Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2483	of another record as gap type locks, but does not reset the lock bits of the
2484	other record. Also waiting lock requests are inherited as GRANTED gap locks. /*
2485	static
2486	void
2487	lock_rec_inherit_to_gap_if_gap_lock(
2488	/================================/
2489	const buf_block_t* block, /!< in: buffer block /
2490	ulint heir_heap_no, /!< in: heap_no of*
2491	record which inherits /*
2492	ulint heap_no) /!< in: heap_no of record*
2493	from which inherited;
2494	does NOT reset the locks
2495	on this record /*
2496	{
2497	lock_t* lock;
2498
2499	lock_mutex_enter();
2500
2501	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
2502	lock != NULL;
2503	lock = lock_rec_get_next(heap_no, lock)) {
2504
2505	if (!lock_rec_get_insert_intention(lock)
2506	&& (heap_no == PAGE_HEAP_NO_SUPREMUM
2507	\|\| !lock_rec_get_rec_not_gap(lock))) {
2508
2509	lock_rec_add_to_queue(
2510	LOCK_REC \| LOCK_GAP
2511	\| ulint(lock_get_mode(lock)),
2512	block, heir_heap_no, lock->index,
2513	lock->trx, FALSE);
2514	}
2515	}
2516
2517	lock_mutex_exit();
2518	}
2519
2520	/***********************************************************//**
2521	Moves the locks of a record to another record and resets the lock bits of
2522	the donating record. /*
2523	static
2524	void
2525	lock_rec_move_low(
2526	/==============/
2527	hash_table_t* lock_hash, /!< in: hash table to use /
2528	const buf_block_t* receiver, /!< in: buffer block containing*
2529	the receiving record /*
2530	const buf_block_t* donator, /!< in: buffer block containing*
2531	the donating record /*
2532	ulint receiver_heap_no,/!< in: heap_no of the record*
2533	which gets the locks; there
2534	must be no lock requests
2535	on it! /*
2536	ulint donator_heap_no)/!< in: heap_no of the record*
2537	which gives the locks /*
2538	{
2539	lock_t* lock;
2540
2541	ut_ad(lock_mutex_own());
2542
2543	/ If the lock is predicate lock, it resides on INFIMUM record /
2544	ut_ad(lock_rec_get_first(
2545	lock_hash, receiver, receiver_heap_no) == NULL
2546	\|\| lock_hash == lock_sys.prdt_hash
2547	\|\| lock_hash == lock_sys.prdt_page_hash);
2548
2549	for (lock = lock_rec_get_first(lock_hash,
2550	donator, donator_heap_no);
2551	lock != NULL;
2552	lock = lock_rec_get_next(donator_heap_no, lock)) {
2553
2554	const ulint type_mode = lock->type_mode;
2555
2556	lock_rec_reset_nth_bit(lock, donator_heap_no);
2557
2558	if (type_mode & LOCK_WAIT) {
2559	lock_reset_lock_and_trx_wait(lock);
2560	}
2561
2562	/ Note that we FIRST reset the bit, and then set the lock:*
2563	the function works also if donator == receiver /*
2564
2565	lock_rec_add_to_queue(
2566	type_mode, receiver, receiver_heap_no,
2567	lock->index, lock->trx, FALSE);
2568	}
2569
2570	ut_ad(lock_rec_get_first(lock_sys.rec_hash,
2571	donator, donator_heap_no) == NULL);
2572	}
2573
2574	/* Move all the granted locks to the front of the given lock list.*
2575	All the waiting locks will be at the end of the list.
2576	@param[in,out] lock_list the given lock list. /*
2577	static
2578	void
2579	lock_move_granted_locks_to_front(
2580	UT_LIST_BASE_NODE_T(lock_t)& lock_list)
2581	{
2582	lock_t* lock;
2583
2584	bool seen_waiting_lock = false;
2585
2586	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
2587	lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2588
2589	if (!seen_waiting_lock) {
2590	if (lock->is_waiting()) {
2591	seen_waiting_lock = true;
2592	}
2593	continue;
2594	}
2595
2596	ut_ad(seen_waiting_lock);
2597
2598	if (!lock->is_waiting()) {
2599	lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
2600	ut_a(prev);
2601	UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2602	lock = prev;
2603	}
2604	}
2605	}
2606
2607	/***********************************************************//**
2608	Moves the locks of a record to another record and resets the lock bits of
2609	the donating record. /*
2610	UNIV_INLINE
2611	void
2612	lock_rec_move(
2613	/==========/
2614	const buf_block_t* receiver, /!< in: buffer block containing*
2615	the receiving record /*
2616	const buf_block_t* donator, /!< in: buffer block containing*
2617	the donating record /*
2618	ulint receiver_heap_no,/!< in: heap_no of the record*
2619	which gets the locks; there
2620	must be no lock requests
2621	on it! /*
2622	ulint donator_heap_no)/!< in: heap_no of the record*
2623	which gives the locks /*
2624	{
2625	lock_rec_move_low(lock_sys.rec_hash, receiver, donator,
2626	receiver_heap_no, donator_heap_no);
2627	}
2628
2629	/***********************************************************//**
2630	Updates the lock table when we have reorganized a page. NOTE: we copy
2631	also the locks set on the infimum of the page; the infimum may carry
2632	locks if an update of a record is occurring on the page, and its locks
2633	were temporarily stored on the infimum. /*
2634	void
2635	lock_move_reorganize_page(
2636	/======================/
2637	const buf_block_t* block, /!< in: old index page, now*
2638	reorganized /*
2639	const buf_block_t* oblock) /!< in: copy of the old, not*
2640	reorganized page /*
2641	{
2642	lock_t* lock;
2643	UT_LIST_BASE_NODE_T(lock_t) old_locks;
2644	mem_heap_t* heap = NULL;
2645	ulint comp;
2646
2647	lock_mutex_enter();
2648
2649	/ FIXME: This needs to deal with predicate lock too /
2650	lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block);
2651
2652	if (lock == NULL) {
2653	lock_mutex_exit();
2654
2655	return;
2656	}
2657
2658	heap = mem_heap_create(`256`);
2659
2660	/ Copy first all the locks on the page to heap and reset the*
2661	bitmaps in the original locks; chain the copies of the locks
2662	using the trx_locks field in them. /*
2663
2664	UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2665
2666	do {
2667	/ Make a copy of the lock /
2668	lock_t* old_lock = lock_rec_copy(lock, heap);
2669
2670	UT_LIST_ADD_LAST(old_locks, old_lock);
2671
2672	/ Reset bitmap of lock /
2673	lock_rec_bitmap_reset(lock);
2674
2675	if (lock_get_wait(lock)) {
2676
2677	lock_reset_lock_and_trx_wait(lock);
2678	}
2679
2680	lock = lock_rec_get_next_on_page(lock);
2681	} while (lock != NULL);
2682
2683	comp = page_is_comp(block->frame);
2684	ut_ad(comp == page_is_comp(oblock->frame));
2685
2686	lock_move_granted_locks_to_front(old_locks);
2687
2688	DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2689	UT_LIST_REVERSE(old_locks););
2690
2691	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
2692	lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2693
2694	/ NOTE: we copy also the locks set on the infimum and*
2695	supremum of the page; the infimum may carry locks if an
2696	update of a record is occurring on the page, and its locks
2697	were temporarily stored on the infimum /*
2698	const rec_t* rec1 = page_get_infimum_rec(
2699	buf_block_get_frame(block));
2700	const rec_t* rec2 = page_get_infimum_rec(
2701	buf_block_get_frame(oblock));
2702
2703	/ Set locks according to old locks /
2704	for (;;) {
2705	ulint old_heap_no;
2706	ulint new_heap_no;
2707	ut_d(const rec_t* const orec = rec1);
2708	ut_ad(page_rec_is_default_row(rec1)
2709	== page_rec_is_default_row(rec2));
2710
2711	if (comp) {
2712	old_heap_no = rec_get_heap_no_new(rec2);
2713	new_heap_no = rec_get_heap_no_new(rec1);
2714
2715	rec1 = page_rec_get_next_low(rec1, TRUE);
2716	rec2 = page_rec_get_next_low(rec2, TRUE);
2717	} else {
2718	old_heap_no = rec_get_heap_no_old(rec2);
2719	new_heap_no = rec_get_heap_no_old(rec1);
2720	ut_ad(!memcmp(rec1, rec2,
2721	rec_get_data_size_old(rec2)));
2722
2723	rec1 = page_rec_get_next_low(rec1, FALSE);
2724	rec2 = page_rec_get_next_low(rec2, FALSE);
2725	}
2726
2727	/ Clear the bit in old_lock. /
2728	if (old_heap_no < lock->un_member.rec_lock.n_bits
2729	&& lock_rec_reset_nth_bit(lock, old_heap_no)) {
2730	ut_ad(!page_rec_is_default_row(orec));
2731
2732	/ NOTE that the old lock bitmap could be too*
2733	small for the new heap number! /*
2734
2735	lock_rec_add_to_queue(
2736	lock->type_mode, block, new_heap_no,
2737	lock->index, lock->trx, FALSE);
2738	}
2739
2740	if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2741	ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2742	break;
2743	}
2744	}
2745
2746	ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2747	}
2748
2749	lock_mutex_exit();
2750
2751	mem_heap_free(heap);
2752
2753	#ifdef UNIV_DEBUG_LOCK_VALIDATE
2754	ut_ad(lock_rec_validate_page(block));
2755	#endif
2756	}
2757
2758	/***********************************************************//**
2759	Moves the explicit locks on user records to another page if a record
2760	list end is moved to another page. /*
2761	void
2762	lock_move_rec_list_end(
2763	/===================/
2764	const buf_block_t* new_block, /!< in: index page to move to /
2765	const buf_block_t* block, /!< in: index page /
2766	const rec_t* rec) /!< in: record on page: this*
2767	is the first record moved /*
2768	{
2769	lock_t* lock;
2770	const ulint comp = page_rec_is_comp(rec);
2771
2772	ut_ad(buf_block_get_frame(block) == page_align(rec));
2773	ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2774
2775	lock_mutex_enter();
2776
2777	/ Note: when we move locks from record to record, waiting locks*
2778	and possible granted gap type locks behind them are enqueued in
2779	the original order, because new elements are inserted to a hash
2780	table to the end of the hash chain, and lock_rec_add_to_queue
2781	does not reuse locks if there are waiters in the queue. /*
2782
2783	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
2784	lock = lock_rec_get_next_on_page(lock)) {
2785	const rec_t* rec1 = rec;
2786	const rec_t* rec2;
2787	const ulint type_mode = lock->type_mode;
2788
2789	if (comp) {
2790	if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
2791	rec1 = page_rec_get_next_low(rec1, TRUE);
2792	}
2793
2794	rec2 = page_rec_get_next_low(
2795	buf_block_get_frame(new_block)
2796	+ PAGE_NEW_INFIMUM, TRUE);
2797	} else {
2798	if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
2799	rec1 = page_rec_get_next_low(rec1, FALSE);
2800	}
2801
2802	rec2 = page_rec_get_next_low(
2803	buf_block_get_frame(new_block)
2804	+ PAGE_OLD_INFIMUM, FALSE);
2805	}
2806
2807	/ Copy lock requests on user records to new page and*
2808	reset the lock bits on the old /*
2809
2810	for (;;) {
2811	ut_ad(page_rec_is_default_row(rec1)
2812	== page_rec_is_default_row(rec2));
2813	ut_d(const rec_t* const orec = rec1);
2814
2815	ulint rec1_heap_no;
2816	ulint rec2_heap_no;
2817
2818	if (comp) {
2819	rec1_heap_no = rec_get_heap_no_new(rec1);
2820
2821	if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2822	break;
2823	}
2824
2825	rec2_heap_no = rec_get_heap_no_new(rec2);
2826	rec1 = page_rec_get_next_low(rec1, TRUE);
2827	rec2 = page_rec_get_next_low(rec2, TRUE);
2828	} else {
2829	rec1_heap_no = rec_get_heap_no_old(rec1);
2830
2831	if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2832	break;
2833	}
2834
2835	rec2_heap_no = rec_get_heap_no_old(rec2);
2836
2837	ut_ad(rec_get_data_size_old(rec1)
2838	== rec_get_data_size_old(rec2));
2839
2840	ut_ad(!memcmp(rec1, rec2,
2841	rec_get_data_size_old(rec1)));
2842
2843	rec1 = page_rec_get_next_low(rec1, FALSE);
2844	rec2 = page_rec_get_next_low(rec2, FALSE);
2845	}
2846
2847	if (rec1_heap_no < lock->un_member.rec_lock.n_bits
2848	&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2849	ut_ad(!page_rec_is_default_row(orec));
2850
2851	if (type_mode & LOCK_WAIT) {
2852	lock_reset_lock_and_trx_wait(lock);
2853	}
2854
2855	lock_rec_add_to_queue(
2856	type_mode, new_block, rec2_heap_no,
2857	lock->index, lock->trx, FALSE);
2858	}
2859	}
2860	}
2861
2862	lock_mutex_exit();
2863
2864	#ifdef UNIV_DEBUG_LOCK_VALIDATE
2865	ut_ad(lock_rec_validate_page(block));
2866	ut_ad(lock_rec_validate_page(new_block));
2867	#endif
2868	}
2869
2870	/***********************************************************//**
2871	Moves the explicit locks on user records to another page if a record
2872	list start is moved to another page. /*
2873	void
2874	lock_move_rec_list_start(
2875	/=====================/
2876	const buf_block_t* new_block, /!< in: index page to*
2877	move to /*
2878	const buf_block_t* block, /!< in: index page /
2879	const rec_t* rec, /!< in: record on page:*
2880	this is the first
2881	record NOT copied /*
2882	const rec_t* old_end) /!< in: old*
2883	previous-to-last
2884	record on new_page
2885	before the records
2886	were copied /*
2887	{
2888	lock_t* lock;
2889	const ulint comp = page_rec_is_comp(rec);
2890
2891	ut_ad(block->frame == page_align(rec));
2892	ut_ad(new_block->frame == page_align(old_end));
2893	ut_ad(comp == page_rec_is_comp(old_end));
2894	ut_ad(!page_rec_is_default_row(rec));
2895
2896	lock_mutex_enter();
2897
2898	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
2899	lock = lock_rec_get_next_on_page(lock)) {
2900	const rec_t* rec1;
2901	const rec_t* rec2;
2902	const ulint type_mode = lock->type_mode;
2903
2904	if (comp) {
2905	rec1 = page_rec_get_next_low(
2906	buf_block_get_frame(block)
2907	+ PAGE_NEW_INFIMUM, TRUE);
2908	rec2 = page_rec_get_next_low(old_end, TRUE);
2909	} else {
2910	rec1 = page_rec_get_next_low(
2911	buf_block_get_frame(block)
2912	+ PAGE_OLD_INFIMUM, FALSE);
2913	rec2 = page_rec_get_next_low(old_end, FALSE);
2914	}
2915
2916	/ Copy lock requests on user records to new page and*
2917	reset the lock bits on the old /*
2918
2919	while (rec1 != rec) {
2920	ut_ad(page_rec_is_default_row(rec1)
2921	== page_rec_is_default_row(rec2));
2922	ut_d(const rec_t* const prev = rec1);
2923
2924	ulint rec1_heap_no;
2925	ulint rec2_heap_no;
2926
2927	if (comp) {
2928	rec1_heap_no = rec_get_heap_no_new(rec1);
2929	rec2_heap_no = rec_get_heap_no_new(rec2);
2930
2931	rec1 = page_rec_get_next_low(rec1, TRUE);
2932	rec2 = page_rec_get_next_low(rec2, TRUE);
2933	} else {
2934	rec1_heap_no = rec_get_heap_no_old(rec1);
2935	rec2_heap_no = rec_get_heap_no_old(rec2);
2936
2937	ut_ad(!memcmp(rec1, rec2,
2938	rec_get_data_size_old(rec2)));
2939
2940	rec1 = page_rec_get_next_low(rec1, FALSE);
2941	rec2 = page_rec_get_next_low(rec2, FALSE);
2942	}
2943
2944	if (rec1_heap_no < lock->un_member.rec_lock.n_bits
2945	&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2946	ut_ad(!page_rec_is_default_row(prev));
2947
2948	if (type_mode & LOCK_WAIT) {
2949	lock_reset_lock_and_trx_wait(lock);
2950	}
2951
2952	lock_rec_add_to_queue(
2953	type_mode, new_block, rec2_heap_no,
2954	lock->index, lock->trx, FALSE);
2955	}
2956	}
2957
2958	#ifdef UNIV_DEBUG
2959	if (page_rec_is_supremum(rec)) {
2960	ulint i;
2961
2962	for (i = PAGE_HEAP_NO_USER_LOW;
2963	i < lock_rec_get_n_bits(lock); i++) {
2964	if (lock_rec_get_nth_bit(lock, i)) {
2965	ib::fatal()
2966	<< "lock_move_rec_list_start():"
2967	<< i << " not moved in "
2968	<< (void*) lock;
2969	}
2970	}
2971	}
2972	#endif /* UNIV_DEBUG */
2973	}
2974
2975	lock_mutex_exit();
2976
2977	#ifdef UNIV_DEBUG_LOCK_VALIDATE
2978	ut_ad(lock_rec_validate_page(block));
2979	#endif
2980	}
2981
2982	/***********************************************************//**
2983	Moves the explicit locks on user records to another page if a record
2984	list start is moved to another page. /*
2985	void
2986	lock_rtr_move_rec_list(
2987	/===================/
2988	const buf_block_t* new_block, /!< in: index page to*
2989	move to /*
2990	const buf_block_t* block, /!< in: index page /
2991	rtr_rec_move_t* rec_move, /!< in: recording records*
2992	moved /*
2993	ulint num_move) /!< in: num of rec to move /
2994	{
2995	lock_t* lock;
2996	ulint comp;
2997
2998	if (!num_move) {
2999	return;
3000	}
3001
3002	comp = page_rec_is_comp(rec_move[`0`].old_rec);
3003
3004	ut_ad(block->frame == page_align(rec_move[`0`].old_rec));
3005	ut_ad(new_block->frame == page_align(rec_move[`0`].new_rec));
3006	ut_ad(comp == page_rec_is_comp(rec_move[`0`].new_rec));
3007
3008	lock_mutex_enter();
3009
3010	for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
3011	lock = lock_rec_get_next_on_page(lock)) {
3012	ulint moved = `0`;
3013	const rec_t* rec1;
3014	const rec_t* rec2;
3015	const ulint type_mode = lock->type_mode;
3016
3017	/ Copy lock requests on user records to new page and*
3018	reset the lock bits on the old /*
3019
3020	while (moved < num_move) {
3021	ulint rec1_heap_no;
3022	ulint rec2_heap_no;
3023
3024	rec1 = rec_move[moved].old_rec;
3025	rec2 = rec_move[moved].new_rec;
3026	ut_ad(!page_rec_is_default_row(rec1));
3027	ut_ad(!page_rec_is_default_row(rec2));
3028
3029	if (comp) {
3030	rec1_heap_no = rec_get_heap_no_new(rec1);
3031	rec2_heap_no = rec_get_heap_no_new(rec2);
3032
3033	} else {
3034	rec1_heap_no = rec_get_heap_no_old(rec1);
3035	rec2_heap_no = rec_get_heap_no_old(rec2);
3036
3037	ut_ad(!memcmp(rec1, rec2,
3038	rec_get_data_size_old(rec2)));
3039	}
3040
3041	if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3042	&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3043	if (type_mode & LOCK_WAIT) {
3044	lock_reset_lock_and_trx_wait(lock);
3045	}
3046
3047	lock_rec_add_to_queue(
3048	type_mode, new_block, rec2_heap_no,
3049	lock->index, lock->trx, FALSE);
3050
3051	rec_move[moved].moved = true;
3052	}
3053
3054	moved++;
3055	}
3056	}
3057
3058	lock_mutex_exit();
3059
3060	#ifdef UNIV_DEBUG_LOCK_VALIDATE
3061	ut_ad(lock_rec_validate_page(block));
3062	#endif
3063	}
3064	/***********************************************************//**
3065	Updates the lock table when a page is split to the right. /*
3066	void
3067	lock_update_split_right(
3068	/====================/
3069	const buf_block_t* right_block, /!< in: right page /
3070	const buf_block_t* left_block) /!< in: left page /
3071	{
3072	ulint heap_no = lock_get_min_heap_no(right_block);
3073
3074	lock_mutex_enter();
3075
3076	/ Move the locks on the supremum of the left page to the supremum*
3077	of the right page /*
3078
3079	lock_rec_move(right_block, left_block,
3080	PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3081
3082	/ Inherit the locks to the supremum of left page from the successor*
3083	of the infimum on right page /*
3084
3085	lock_rec_inherit_to_gap(left_block, right_block,
3086	PAGE_HEAP_NO_SUPREMUM, heap_no);
3087
3088	lock_mutex_exit();
3089	}
3090
3091	/***********************************************************//**
3092	Updates the lock table when a page is merged to the right. /*
3093	void
3094	lock_update_merge_right(
3095	/====================/
3096	const buf_block_t* right_block, /!< in: right page to*
3097	which merged /*
3098	const rec_t* orig_succ, /!< in: original*
3099	successor of infimum
3100	on the right page
3101	before merge /*
3102	const buf_block_t* left_block) /!< in: merged index*
3103	page which will be
3104	discarded /*
3105	{
3106	ut_ad(!page_rec_is_default_row(orig_succ));
3107
3108	lock_mutex_enter();
3109
3110	/ Inherit the locks from the supremum of the left page to the*
3111	original successor of infimum on the right page, to which the left
3112	page was merged /*
3113
3114	lock_rec_inherit_to_gap(right_block, left_block,
3115	page_rec_get_heap_no(orig_succ),
3116	PAGE_HEAP_NO_SUPREMUM);
3117
3118	/ Reset the locks on the supremum of the left page, releasing*
3119	waiting transactions /*
3120
3121	lock_rec_reset_and_release_wait_low(
3122	lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3123
3124	/ there should exist no page lock on the left page,*
3125	otherwise, it will be blocked from merge /*
3126	ut_ad(!lock_rec_get_first_on_page_addr(lock_sys.prdt_page_hash,
3127	left_block->page.id.space(),
3128	left_block->page.id.page_no()));
3129
3130	lock_rec_free_all_from_discard_page(left_block);
3131
3132	lock_mutex_exit();
3133	}
3134
3135	/***********************************************************//**
3136	Updates the lock table when the root page is copied to another in
3137	btr_root_raise_and_insert. Note that we leave lock structs on the
3138	root page, even though they do not make sense on other than leaf
3139	pages: the reason is that in a pessimistic update the infimum record
3140	of the root page will act as a dummy carrier of the locks of the record
3141	to be updated. /*
3142	void
3143	lock_update_root_raise(
3144	/===================/
3145	const buf_block_t* block, /!< in: index page to which copied /
3146	const buf_block_t* root) /!< in: root page /
3147	{
3148	lock_mutex_enter();
3149
3150	/ Move the locks on the supremum of the root to the supremum*
3151	of block /*
3152
3153	lock_rec_move(block, root,
3154	PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3155	lock_mutex_exit();
3156	}
3157
3158	/***********************************************************//**
3159	Updates the lock table when a page is copied to another and the original page
3160	is removed from the chain of leaf pages, except if page is the root! /*
3161	void
3162	lock_update_copy_and_discard(
3163	/=========================/
3164	const buf_block_t* new_block, /!< in: index page to*
3165	which copied /*
3166	const buf_block_t* block) /!< in: index page;*
3167	NOT the root! /*
3168	{
3169	lock_mutex_enter();
3170
3171	/ Move the locks on the supremum of the old page to the supremum*
3172	of new_page /*
3173
3174	lock_rec_move(new_block, block,
3175	PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3176	lock_rec_free_all_from_discard_page(block);
3177
3178	lock_mutex_exit();
3179	}
3180
3181	/***********************************************************//**
3182	Updates the lock table when a page is split to the left. /*
3183	void
3184	lock_update_split_left(
3185	/===================/
3186	const buf_block_t* right_block, /!< in: right page /
3187	const buf_block_t* left_block) /!< in: left page /
3188	{
3189	ulint heap_no = lock_get_min_heap_no(right_block);
3190
3191	lock_mutex_enter();
3192
3193	/ Inherit the locks to the supremum of the left page from the*
3194	successor of the infimum on the right page /*
3195
3196	lock_rec_inherit_to_gap(left_block, right_block,
3197	PAGE_HEAP_NO_SUPREMUM, heap_no);
3198
3199	lock_mutex_exit();
3200	}
3201
3202	/***********************************************************//**
3203	Updates the lock table when a page is merged to the left. /*
3204	void
3205	lock_update_merge_left(
3206	/===================/
3207	const buf_block_t* left_block, /!< in: left page to*
3208	which merged /*
3209	const rec_t* orig_pred, /!< in: original predecessor*
3210	of supremum on the left page
3211	before merge /*
3212	const buf_block_t* right_block) /!< in: merged index page*
3213	which will be discarded /*
3214	{
3215	const rec_t* left_next_rec;
3216
3217	ut_ad(left_block->frame == page_align(orig_pred));
3218
3219	lock_mutex_enter();
3220
3221	left_next_rec = page_rec_get_next_const(orig_pred);
3222
3223	if (!page_rec_is_supremum(left_next_rec)) {
3224
3225	/ Inherit the locks on the supremum of the left page to the*
3226	first record which was moved from the right page /*
3227
3228	lock_rec_inherit_to_gap(left_block, left_block,
3229	page_rec_get_heap_no(left_next_rec),
3230	PAGE_HEAP_NO_SUPREMUM);
3231
3232	/ Reset the locks on the supremum of the left page,*
3233	releasing waiting transactions /*
3234
3235	lock_rec_reset_and_release_wait_low(
3236	lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3237	}
3238
3239	/ Move the locks from the supremum of right page to the supremum*
3240	of the left page /*
3241
3242	lock_rec_move(left_block, right_block,
3243	PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3244
3245	/ there should exist no page lock on the right page,*
3246	otherwise, it will be blocked from merge /*
3247	ut_ad(!lock_rec_get_first_on_page_addr(
3248	lock_sys.prdt_page_hash,
3249	right_block->page.id.space(),
3250	right_block->page.id.page_no()));
3251
3252	lock_rec_free_all_from_discard_page(right_block);
3253
3254	lock_mutex_exit();
3255	}
3256
3257	/***********************************************************//**
3258	Resets the original locks on heir and replaces them with gap type locks
3259	inherited from rec. /*
3260	void
3261	lock_rec_reset_and_inherit_gap_locks(
3262	/=================================/
3263	const buf_block_t* heir_block, /!< in: block containing the*
3264	record which inherits /*
3265	const buf_block_t* block, /!< in: block containing the*
3266	record from which inherited;
3267	does NOT reset the locks on
3268	this record /*
3269	ulint heir_heap_no, /!< in: heap_no of the*
3270	inheriting record /*
3271	ulint heap_no) /!< in: heap_no of the*
3272	donating record /*
3273	{
3274	lock_mutex_enter();
3275
3276	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3277
3278	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3279
3280	lock_mutex_exit();
3281	}
3282
3283	/***********************************************************//**
3284	Updates the lock table when a page is discarded. /*
3285	void
3286	lock_update_discard(
3287	/================/
3288	const buf_block_t* heir_block, /!< in: index page*
3289	which will inherit the locks /*
3290	ulint heir_heap_no, /!< in: heap_no of the record*
3291	which will inherit the locks /*
3292	const buf_block_t* block) /!< in: index page*
3293	which will be discarded /*
3294	{
3295	const page_t* page = block->frame;
3296	const rec_t* rec;
3297	ulint heap_no;
3298
3299	lock_mutex_enter();
3300
3301	if (!lock_rec_get_first_on_page(lock_sys.rec_hash, block)
3302	&& (!lock_rec_get_first_on_page(lock_sys.prdt_hash, block))) {
3303	/ No locks exist on page, nothing to do /
3304
3305	lock_mutex_exit();
3306
3307	return;
3308	}
3309
3310	/ Inherit all the locks on the page to the record and reset all*
3311	the locks on the page /*
3312
3313	if (page_is_comp(page)) {
3314	rec = page + PAGE_NEW_INFIMUM;
3315
3316	do {
3317	heap_no = rec_get_heap_no_new(rec);
3318
3319	lock_rec_inherit_to_gap(heir_block, block,
3320	heir_heap_no, heap_no);
3321
3322	lock_rec_reset_and_release_wait(block, heap_no);
3323
3324	rec = page + rec_get_next_offs(rec, TRUE);
3325	} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3326	} else {
3327	rec = page + PAGE_OLD_INFIMUM;
3328
3329	do {
3330	heap_no = rec_get_heap_no_old(rec);
3331
3332	lock_rec_inherit_to_gap(heir_block, block,
3333	heir_heap_no, heap_no);
3334
3335	lock_rec_reset_and_release_wait(block, heap_no);
3336
3337	rec = page + rec_get_next_offs(rec, FALSE);
3338	} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3339	}
3340
3341	lock_rec_free_all_from_discard_page(block);
3342
3343	lock_mutex_exit();
3344	}
3345
3346	/***********************************************************//**
3347	Updates the lock table when a new user record is inserted. /*
3348	void
3349	lock_update_insert(
3350	/===============/
3351	const buf_block_t* block, /!< in: buffer block containing rec /
3352	const rec_t* rec) /!< in: the inserted record /
3353	{
3354	ulint receiver_heap_no;
3355	ulint donator_heap_no;
3356
3357	ut_ad(block->frame == page_align(rec));
3358	ut_ad(!page_rec_is_default_row(rec));
3359
3360	/ Inherit the gap-locking locks for rec, in gap mode, from the next*
3361	record /*
3362
3363	if (page_rec_is_comp(rec)) {
3364	receiver_heap_no = rec_get_heap_no_new(rec);
3365	donator_heap_no = rec_get_heap_no_new(
3366	page_rec_get_next_low(rec, TRUE));
3367	} else {
3368	receiver_heap_no = rec_get_heap_no_old(rec);
3369	donator_heap_no = rec_get_heap_no_old(
3370	page_rec_get_next_low(rec, FALSE));
3371	}
3372
3373	lock_rec_inherit_to_gap_if_gap_lock(
3374	block, receiver_heap_no, donator_heap_no);
3375	}
3376
3377	/***********************************************************//**
3378	Updates the lock table when a record is removed. /*
3379	void
3380	lock_update_delete(
3381	/===============/
3382	const buf_block_t* block, /!< in: buffer block containing rec /
3383	const rec_t* rec) /!< in: the record to be removed /
3384	{
3385	const page_t* page = block->frame;
3386	ulint heap_no;
3387	ulint next_heap_no;
3388
3389	ut_ad(page == page_align(rec));
3390	ut_ad(!page_rec_is_default_row(rec));
3391
3392	if (page_is_comp(page)) {
3393	heap_no = rec_get_heap_no_new(rec);
3394	next_heap_no = rec_get_heap_no_new(page
3395	+ rec_get_next_offs(rec,
3396	TRUE));
3397	} else {
3398	heap_no = rec_get_heap_no_old(rec);
3399	next_heap_no = rec_get_heap_no_old(page
3400	+ rec_get_next_offs(rec,
3401	FALSE));
3402	}
3403
3404	lock_mutex_enter();
3405
3406	/ Let the next record inherit the locks from rec, in gap mode /
3407
3408	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3409
3410	/ Reset the lock bits on rec and release waiting transactions /
3411
3412	lock_rec_reset_and_release_wait(block, heap_no);
3413
3414	lock_mutex_exit();
3415	}
3416
3417	/*******************************************************************//**
3418	Stores on the page infimum record the explicit locks of another record.
3419	This function is used to store the lock state of a record when it is
3420	updated and the size of the record changes in the update. The record
3421	is moved in such an update, perhaps to another page. The infimum record
3422	acts as a dummy carrier record, taking care of lock releases while the
3423	actual record is being moved. /*
3424	void
3425	lock_rec_store_on_page_infimum(
3426	/===========================/
3427	const buf_block_t* block, /!< in: buffer block containing rec /
3428	const rec_t* rec) /!< in: record whose lock state*
3429	is stored on the infimum
3430	record of the same page; lock
3431	bits are reset on the
3432	record /*
3433	{
3434	ulint heap_no = page_rec_get_heap_no(rec);
3435
3436	ut_ad(block->frame == page_align(rec));
3437
3438	lock_mutex_enter();
3439
3440	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3441
3442	lock_mutex_exit();
3443	}
3444
3445	/*******************************************************************//**
3446	Restores the state of explicit lock requests on a single record, where the
3447	state was stored on the infimum of the page. /*
3448	void
3449	lock_rec_restore_from_page_infimum(
3450	/===============================/
3451	const buf_block_t* block, /!< in: buffer block containing rec /
3452	const rec_t* rec, /!< in: record whose lock state*
3453	is restored /*
3454	const buf_block_t* donator)/!< in: page (rec is not*
3455	necessarily on this page)
3456	whose infimum stored the lock
3457	state; lock bits are reset on
3458	the infimum /*
3459	{
3460	ulint heap_no = page_rec_get_heap_no(rec);
3461
3462	lock_mutex_enter();
3463
3464	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3465
3466	lock_mutex_exit();
3467	}
3468
3469	/========================= TABLE LOCKS ==============================/
3470
3471	/* Functor for accessing the embedded node within a table lock. /
3472	struct TableLockGetNode {
3473	ut_list_node<lock_t>& operator() (lock_t& elem)
3474	{
3475	return(elem.un_member.tab_lock.locks);
3476	}
3477	};
3478
3479	/*******************************************************************//**
3480	Creates a table lock object and adds it as the last in the lock queue
3481	of the table. Does NOT check for deadlocks or lock compatibility.
3482	@return own: new lock object /*
3483	UNIV_INLINE
3484	lock_t*
3485	lock_table_create(
3486	/==============/
3487	dict_table_t* table, /!< in/out: database table*
3488	in dictionary cache /*
3489	ulint type_mode,/!< in: lock mode possibly ORed with*
3490	LOCK_WAIT /*
3491	trx_t* trx /!< in: trx /
3492	#ifdef WITH_WSREP
3493	, lock_t* c_lock = NULL /!< in: conflicting lock /
3494	#endif
3495	)
3496	{
3497	lock_t* lock;
3498
3499	ut_ad(table && trx);
3500	ut_ad(lock_mutex_own());
3501	ut_ad(trx_mutex_own(trx));
3502
3503	check_trx_state(trx);
3504
3505	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3506	++table->n_waiting_or_granted_auto_inc_locks;
3507	}
3508
3509	/ For AUTOINC locking we reuse the lock instance only if*
3510	there is no wait involved else we allocate the waiting lock
3511	from the transaction lock heap. /*
3512	if (type_mode == LOCK_AUTO_INC) {
3513
3514	lock = table->autoinc_lock;
3515
3516	table->autoinc_trx = trx;
3517
3518	ib_vector_push(trx->autoinc_locks, &lock);
3519
3520	} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3521	lock = trx->lock.table_pool [trx->lock.table_cached++];
3522	} else {
3523
3524	lock = static_cast<lock_t*>(
3525	mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3526
3527	}
3528
3529	lock->type_mode = ib_uint32_t(type_mode \| LOCK_TABLE);
3530	lock->trx = trx;
3531
3532	lock->un_member.tab_lock.table = table;
3533
3534	ut_ad(table->n_ref_count > `0` \|\| !table->can_be_evicted);
3535
3536	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
3537
3538	#ifdef WITH_WSREP
3539	if (c_lock) {
3540	if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
3541	ut_list_insert(table->locks, c_lock, lock,
3542	TableLockGetNode ());
3543	if (wsrep_debug) {
3544	ib::info () << "table lock BF conflict for "
3545	<< ib::hex (c_lock->trx->id)
3546	<< " SQL: "
3547	<< wsrep_thd_query(
3548	c_lock->trx->mysql_thd);
3549	}
3550	} else {
3551	ut_list_append(table->locks, lock, TableLockGetNode ());
3552	}
3553
3554	trx_mutex_enter(c_lock->trx);
3555
3556	if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
3557	c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
3558
3559	if (wsrep_debug) {
3560	wsrep_print_wait_locks(c_lock);
3561	}
3562
3563	/ The lock release will call lock_grant(),*
3564	which would acquire trx->mutex again. /*
3565	trx_mutex_exit(trx);
3566	lock_cancel_waiting_and_release(
3567	c_lock->trx->lock.wait_lock);
3568	trx_mutex_enter(trx);
3569
3570	if (wsrep_debug) {
3571	ib::info () << "WSREP: c_lock canceled "
3572	<< ib::hex (c_lock->trx->id)
3573	<< " SQL: "
3574	<< wsrep_thd_query(
3575	c_lock->trx->mysql_thd);
3576	}
3577	}
3578
3579	trx_mutex_exit(c_lock->trx);
3580	} else
3581	#endif /* WITH_WSREP */
3582	ut_list_append(table->locks, lock, TableLockGetNode ());
3583
3584	if (type_mode & LOCK_WAIT) {
3585
3586	lock_set_lock_and_trx_wait(lock, trx);
3587	}
3588
3589	lock->trx->lock.table_locks.push_back(lock);
3590
3591	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3592	MONITOR_INC(MONITOR_NUM_TABLELOCK);
3593
3594	return(lock);
3595	}
3596
3597	/***********************************************************//**
3598	Pops autoinc lock requests from the transaction's autoinc_locks. We
3599	handle the case where there are gaps in the array and they need to
3600	be popped off the stack. /*
3601	UNIV_INLINE
3602	void
3603	lock_table_pop_autoinc_locks(
3604	/=========================/
3605	trx_t* trx) /!< in/out: transaction that owns the AUTOINC locks /
3606	{
3607	ut_ad(lock_mutex_own());
3608	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3609
3610	/ Skip any gaps, gaps are NULL lock entries in the*
3611	trx->autoinc_locks vector. /*
3612
3613	do {
3614	ib_vector_pop(trx->autoinc_locks);
3615
3616	if (ib_vector_is_empty(trx->autoinc_locks)) {
3617	return;
3618	}
3619
3620	} while ((lock_t*) ib_vector_get_last(trx->autoinc_locks) == NULL);
3621	}
3622
3623	/***********************************************************//**
3624	Removes an autoinc lock request from the transaction's autoinc_locks. /*
3625	UNIV_INLINE
3626	void
3627	lock_table_remove_autoinc_lock(
3628	/===========================/
3629	lock_t* lock, /!< in: table lock /
3630	trx_t* trx) /!< in/out: transaction that owns the lock /
3631	{
3632	lock_t* autoinc_lock;
3633	lint i = ib_vector_size(trx->autoinc_locks) - `1`;
3634
3635	ut_ad(lock_mutex_own());
3636	ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3637	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3638	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3639
3640	/ With stored functions and procedures the user may drop*
3641	a table within the same "statement". This special case has
3642	to be handled by deleting only those AUTOINC locks that were
3643	held by the table being dropped. /*
3644
3645	autoinc_lock = *static_cast<lock_t**>(
3646	ib_vector_get(trx->autoinc_locks, i));
3647
3648	/ This is the default fast case. /
3649
3650	if (autoinc_lock == lock) {
3651	lock_table_pop_autoinc_locks(trx);
3652	} else {
3653	/ The last element should never be NULL /
3654	ut_a(autoinc_lock != NULL);
3655
3656	/ Handle freeing the locks from within the stack. /
3657
3658	while (--i >= `0`) {
3659	autoinc_lock = *static_cast<lock_t**>(
3660	ib_vector_get(trx->autoinc_locks, i));
3661
3662	if (autoinc_lock == lock) {
3663	void* null_var = NULL;
3664	ib_vector_set(trx->autoinc_locks, i, &null_var);
3665	return;
3666	}
3667	}
3668
3669	/ Must find the autoinc lock. /
3670	ut_error;
3671	}
3672	}
3673
3674	/***********************************************************//**
3675	Removes a table lock request from the queue and the trx list of locks;
3676	this is a low-level function which does NOT check if waiting requests
3677	can now be granted. /*
3678	UNIV_INLINE
3679	void
3680	lock_table_remove_low(
3681	/==================/
3682	lock_t* lock) /!< in/out: table lock /
3683	{
3684	trx_t* trx;
3685	dict_table_t* table;
3686
3687	ut_ad(lock_mutex_own());
3688
3689	trx = lock->trx;
3690	table = lock->un_member.tab_lock.table;
3691
3692	/ Remove the table from the transaction's AUTOINC vector, if*
3693	the lock that is being released is an AUTOINC lock. /*
3694	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
3695
3696	/ The table's AUTOINC lock can get transferred to*
3697	another transaction before we get here. /*
3698	if (table->autoinc_trx == trx) {
3699	table->autoinc_trx = NULL;
3700	}
3701
3702	/ The locks must be freed in the reverse order from*
3703	the one in which they were acquired. This is to avoid
3704	traversing the AUTOINC lock vector unnecessarily.
3705
3706	We only store locks that were granted in the
3707	trx->autoinc_locks vector (see lock_table_create()
3708	and lock_grant()). Therefore it can be empty and we
3709	need to check for that. /*
3710
3711	if (!lock_get_wait(lock)
3712	&& !ib_vector_is_empty(trx->autoinc_locks)) {
3713
3714	lock_table_remove_autoinc_lock(lock, trx);
3715	}
3716
3717	ut_a(table->n_waiting_or_granted_auto_inc_locks > `0`);
3718	table->n_waiting_or_granted_auto_inc_locks--;
3719	}
3720
3721	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
3722	ut_list_remove(table->locks, lock, TableLockGetNode ());
3723
3724	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3725	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3726	}
3727
3728	/*******************************************************************//**
3729	Enqueues a waiting request for a table lock which cannot be granted
3730	immediately. Checks for deadlocks.
3731	@retval DB_LOCK_WAIT if the waiting lock was enqueued
3732	@retval DB_DEADLOCK if this transaction was chosen as the victim
3733	@retval DB_SUCCESS if the other transaction committed or aborted /*
3734	static
3735	dberr_t
3736	lock_table_enqueue_waiting(
3737	/=======================/
3738	ulint mode, /!< in: lock mode this transaction is*
3739	requesting /*
3740	dict_table_t* table, /!< in/out: table /
3741	que_thr_t* thr /!< in: query thread /
3742	#ifdef WITH_WSREP
3743	, lock_t* c_lock /!< in: conflicting lock or NULL /
3744	#endif
3745	)
3746	{
3747	trx_t* trx;
3748	lock_t* lock;
3749
3750	ut_ad(lock_mutex_own());
3751	ut_ad(!srv_read_only_mode);
3752
3753	trx = thr_get_trx(thr);
3754	ut_ad(trx_mutex_own(trx));
3755	ut_a(!que_thr_stop(thr));
3756
3757	switch (trx_get_dict_operation(trx)) {
3758	case TRX_DICT_OP_NONE:
3759	break;
3760	case TRX_DICT_OP_TABLE:
3761	case TRX_DICT_OP_INDEX:
3762	ib::error () << "A table lock wait happens in a dictionary"
3763	" operation. Table " << table->name
3764	<< ". " << BUG_REPORT_MSG;
3765	ut_ad(`0`);
3766	}
3767
3768	#ifdef WITH_WSREP
3769	if (trx->lock.was_chosen_as_deadlock_victim) {
3770	return(DB_DEADLOCK);
3771	}
3772	#endif /* WITH_WSREP */
3773
3774	/ Enqueue the lock request that will wait to be granted /
3775	lock = lock_table_create(table, ulint(mode) \| LOCK_WAIT, trx
3776	#ifdef WITH_WSREP
3777	, c_lock
3778	#endif
3779	);
3780
3781	const trx_t* victim_trx =
3782	DeadlockChecker::check_and_resolve(lock, trx);
3783
3784	if (victim_trx != `0`) {
3785	ut_ad(victim_trx == trx);
3786
3787	/ The order here is important, we don't want to*
3788	lose the state of the lock before calling remove. /*
3789	lock_table_remove_low(lock);
3790	lock_reset_lock_and_trx_wait(lock);
3791
3792	return(DB_DEADLOCK);
3793
3794	} else if (trx->lock.wait_lock == NULL) {
3795	/ Deadlock resolution chose another transaction as a victim,*
3796	and we accidentally got our lock granted! /*
3797
3798	return(DB_SUCCESS);
3799	}
3800
3801	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3802
3803	trx->lock.wait_started = ut_time();
3804	trx->lock.was_chosen_as_deadlock_victim = false;
3805
3806	ut_a(que_thr_stop(thr));
3807
3808	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3809
3810	return(DB_LOCK_WAIT);
3811	}
3812
3813	/*******************************************************************//**
3814	Checks if other transactions have an incompatible mode lock request in
3815	the lock queue.
3816	@return lock or NULL /*
3817	UNIV_INLINE
3818	lock_t*
3819	lock_table_other_has_incompatible(
3820	/==============================/
3821	const trx_t* trx, /!< in: transaction, or NULL if all*
3822	transactions should be included /*
3823	ulint wait, /!< in: LOCK_WAIT if also*
3824	waiting locks are taken into
3825	account, or 0 if not /*
3826	const dict_table_t* table, /!< in: table /
3827	lock_mode mode) /!< in: lock mode /
3828	{
3829	lock_t* lock;
3830
3831	ut_ad(lock_mutex_own());
3832
3833	for (lock = UT_LIST_GET_LAST(table->locks);
3834	lock != NULL;
3835	lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
3836
3837	if (lock->trx != trx
3838	&& !lock_mode_compatible(lock_get_mode(lock), mode)
3839	&& (wait \|\| !lock_get_wait(lock))) {
3840
3841	#ifdef WITH_WSREP
3842	if (wsrep_on(lock->trx->mysql_thd)) {
3843	if (wsrep_debug) {
3844	ib::info () << "WSREP: table lock abort for table:"
3845	<< table->name.m_name;
3846	ib::info () << " SQL: "
3847	<< wsrep_thd_query(lock->trx->mysql_thd);
3848	}
3849	trx_mutex_enter(lock->trx);
3850	wsrep_kill_victim((trx_t )trx, (lock_t )lock);
3851	trx_mutex_exit(lock->trx);
3852	}
3853	#endif /* WITH_WSREP */
3854
3855	return(lock);
3856	}
3857	}
3858
3859	return(NULL);
3860	}
3861
3862	/*******************************************************************//**
3863	Locks the specified database table in the mode given. If the lock cannot
3864	be granted immediately, the query thread is put to wait.
3865	@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK /*
3866	dberr_t
3867	lock_table(
3868	/=======/
3869	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG bit is set,*
3870	does nothing /*
3871	dict_table_t* table, /!< in/out: database table*
3872	in dictionary cache /*
3873	lock_mode mode, /!< in: lock mode /
3874	que_thr_t* thr) /!< in: query thread /
3875	{
3876	trx_t* trx;
3877	dberr_t err;
3878	lock_t* wait_for;
3879
3880	ut_ad(table && thr);
3881
3882	/ Given limited visibility of temp-table we can avoid*
3883	locking overhead /*
3884	if ((flags & BTR_NO_LOCKING_FLAG)
3885	\|\| srv_read_only_mode
3886	\|\| table->is_temporary()) {
3887
3888	return(DB_SUCCESS);
3889	}
3890
3891	ut_a(flags == `0`);
3892
3893	trx = thr_get_trx(thr);
3894
3895	/ Look for equal or stronger locks the same trx already*
3896	has on the table. No need to acquire the lock mutex here
3897	because only this transacton can add/access table locks
3898	to/from trx_t::table_locks. /*
3899
3900	if (lock_table_has(trx, table, mode)) {
3901
3902	return(DB_SUCCESS);
3903	}
3904
3905	/ Read only transactions can write to temp tables, we don't want*
3906	to promote them to RW transactions. Their updates cannot be visible
3907	to other transactions. Therefore we can keep them out
3908	of the read views. /*
3909
3910	if ((mode == LOCK_IX \|\| mode == LOCK_X)
3911	&& !trx->read_only
3912	&& trx->rsegs.m_redo.rseg == `0`) {
3913
3914	trx_set_rw_mode(trx);
3915	}
3916
3917	lock_mutex_enter();
3918
3919	DBUG_EXECUTE_IF("fatal-semaphore-timeout",
3920	{ os_thread_sleep(`3600000000LL`); });
3921
3922	/ We have to check if the new lock is compatible with any locks*
3923	other transactions have in the table lock queue. /*
3924
3925	wait_for = lock_table_other_has_incompatible(
3926	trx, LOCK_WAIT, table, mode);
3927
3928	trx_mutex_enter(trx);
3929
3930	/ Another trx has a request on the table in an incompatible*
3931	mode: this trx may have to wait /*
3932
3933	if (wait_for != NULL) {
3934	err = lock_table_enqueue_waiting(ulint(mode) \| flags, table,
3935	thr
3936	#ifdef WITH_WSREP
3937	, wait_for
3938	#endif
3939	);
3940	} else {
3941	lock_table_create(table, ulint(mode) \| flags, trx);
3942
3943	ut_a(!flags \|\| mode == LOCK_S \|\| mode == LOCK_X);
3944
3945	err = DB_SUCCESS;
3946	}
3947
3948	lock_mutex_exit();
3949
3950	trx_mutex_exit(trx);
3951
3952	return(err);
3953	}
3954
3955	/*******************************************************************//**
3956	Creates a table IX lock object for a resurrected transaction. /*
3957	void
3958	lock_table_ix_resurrect(
3959	/====================/
3960	dict_table_t* table, /!< in/out: table /
3961	trx_t* trx) /!< in/out: transaction /
3962	{
3963	ut_ad(trx->is_recovered);
3964
3965	if (lock_table_has(trx, table, LOCK_IX)) {
3966	return;
3967	}
3968
3969	lock_mutex_enter();
3970
3971	/ We have to check if the new lock is compatible with any locks*
3972	other transactions have in the table lock queue. /*
3973
3974	ut_ad(!lock_table_other_has_incompatible(
3975	trx, LOCK_WAIT, table, LOCK_IX));
3976
3977	trx_mutex_enter(trx);
3978	lock_table_create(table, LOCK_IX, trx);
3979	lock_mutex_exit();
3980	trx_mutex_exit(trx);
3981	}
3982
3983	/*******************************************************************//**
3984	Checks if a waiting table lock request still has to wait in a queue.
3985	@return TRUE if still has to wait /*
3986	static
3987	bool
3988	lock_table_has_to_wait_in_queue(
3989	/============================/
3990	const lock_t* wait_lock) /!< in: waiting table lock /
3991	{
3992	const dict_table_t* table;
3993	const lock_t* lock;
3994
3995	ut_ad(lock_mutex_own());
3996	ut_ad(lock_get_wait(wait_lock));
3997
3998	table = wait_lock->un_member.tab_lock.table;
3999
4000	for (lock = UT_LIST_GET_FIRST(table->locks);
4001	lock != wait_lock;
4002	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4003
4004	if (lock_has_to_wait(wait_lock, lock)) {
4005
4006	return(true);
4007	}
4008	}
4009
4010	return(false);
4011	}
4012
4013	/***********************************************************//**
4014	Removes a table lock request, waiting or granted, from the queue and grants
4015	locks to other transactions in the queue, if they now are entitled to a
4016	lock. /*
4017	static
4018	void
4019	lock_table_dequeue(
4020	/===============/
4021	lock_t* in_lock)/!< in/out: table lock object; transactions waiting*
4022	behind will get their lock requests granted, if
4023	they are now qualified to it /*
4024	{
4025	ut_ad(lock_mutex_own());
4026	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
4027
4028	lock_t* lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
4029
4030	lock_table_remove_low(in_lock);
4031
4032	/ Check if waiting locks in the queue can now be granted: grant*
4033	locks if there are no conflicting locks ahead. /*
4034
4035	for (/ No op /;
4036	lock != NULL;
4037	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4038
4039	if (lock_get_wait(lock)
4040	&& !lock_table_has_to_wait_in_queue(lock)) {
4041
4042	/ Grant the lock /
4043	ut_ad(in_lock->trx != lock->trx);
4044	lock_grant(lock);
4045	}
4046	}
4047	}
4048
4049	/* Sets a lock on a table based on the given mode.*
4050	@param[in] table table to lock
4051	@param[in,out] trx transaction
4052	@param[in] mode LOCK_X or LOCK_S
4053	@return error code or DB_SUCCESS. /*
4054	dberr_t
4055	lock_table_for_trx(
4056	dict_table_t* table,
4057	trx_t* trx,
4058	enum lock_mode mode)
4059	{
4060	mem_heap_t* heap;
4061	que_thr_t* thr;
4062	dberr_t err;
4063	sel_node_t* node;
4064	heap = mem_heap_create(`512`);
4065
4066	node = sel_node_create(heap);
4067	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
4068	thr->graph->state = QUE_FORK_ACTIVE;
4069
4070	/ We use the select query graph as the dummy graph needed*
4071	in the lock module call /*
4072
4073	thr = static_cast<que_thr_t*>(
4074	que_fork_get_first_thr(
4075	static_cast<que_fork_t*>(que_node_get_parent(thr))));
4076
4077	que_thr_move_to_run_state_for_mysql(thr, trx);
4078
4079	run_again:
4080	thr->run_node = thr;
4081	thr->prev_node = thr->common.parent;
4082
4083	err = lock_table(`0`, table, mode, thr);
4084
4085	trx->error_state = err;
4086
4087	if (UNIV_LIKELY(err == DB_SUCCESS)) {
4088	que_thr_stop_for_mysql_no_error(thr, trx);
4089	} else {
4090	que_thr_stop_for_mysql(thr);
4091
4092	if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4093	goto run_again;
4094	}
4095	}
4096
4097	que_graph_free(thr->graph);
4098	trx->op_info = "";
4099
4100	return(err);
4101	}
4102
4103	/=========================== LOCK RELEASE ==============================/
4104	static
4105	void
4106	lock_grant_and_move_on_rec(
4107	hash_table_t* lock_hash,
4108	lock_t* first_lock,
4109	ulint heap_no)
4110	{
4111	lock_t* lock;
4112	lock_t* previous;
4113	ulint space;
4114	ulint page_no;
4115	ulint rec_fold;
4116
4117	space = first_lock->un_member.rec_lock.space;
4118	page_no = first_lock->un_member.rec_lock.page_no;
4119	rec_fold = lock_rec_fold(space, page_no);
4120
4121	previous = (lock_t *) hash_get_nth_cell(lock_hash,
4122	hash_calc_hash(rec_fold, lock_hash))->node;
4123	if (previous == NULL) {
4124	return;
4125	}
4126	if (previous == first_lock) {
4127	lock = previous;
4128	} else {
4129	while (previous->hash &&
4130	previous->hash != first_lock) {
4131	previous = previous->hash;
4132	}
4133	lock = previous->hash;
4134	}
4135	/ Grant locks if there are no conflicting locks ahead.*
4136	Move granted locks to the head of the list. /*
4137	for (;lock != NULL;) {
4138
4139	/ If the lock is a wait lock on this page, and it does not need to wait. /
4140	if (lock->un_member.rec_lock.space == space
4141	&& lock->un_member.rec_lock.page_no == page_no
4142	&& lock_rec_get_nth_bit(lock, heap_no)
4143	&& lock_get_wait(lock)
4144	&& !lock_rec_has_to_wait_in_queue(lock)) {
4145
4146	lock_grant(lock);
4147
4148	if (previous != NULL) {
4149	/ Move the lock to the head of the list. /
4150	HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
4151	lock_rec_insert_to_head(lock, rec_fold);
4152	} else {
4153	/ Already at the head of the list. /
4154	previous = lock;
4155	}
4156	/ Move on to the next lock. /
4157	lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
4158	} else {
4159	previous = lock;
4160	lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
4161	}
4162	}
4163	}
4164
4165	/***********************************************************//**
4166	Removes a granted record lock of a transaction from the queue and grants
4167	locks to other transactions waiting in the queue if they now are entitled
4168	to a lock. /*
4169	void
4170	lock_rec_unlock(
4171	/============/
4172	trx_t* trx, /!< in/out: transaction that has*
4173	set a record lock /*
4174	const buf_block_t* block, /!< in: buffer block containing rec /
4175	const rec_t* rec, /!< in: record /
4176	lock_mode lock_mode)/!< in: LOCK_S or LOCK_X /
4177	{
4178	lock_t* first_lock;
4179	lock_t* lock;
4180	ulint heap_no;
4181
4182	ut_ad(trx);
4183	ut_ad(rec);
4184	ut_ad(block->frame == page_align(rec));
4185	ut_ad(!trx->lock.wait_lock);
4186	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
4187	ut_ad(!page_rec_is_default_row(rec));
4188
4189	heap_no = page_rec_get_heap_no(rec);
4190
4191	lock_mutex_enter();
4192	trx_mutex_enter(trx);
4193
4194	first_lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
4195
4196	/ Find the last lock with the same lock_mode and transaction*
4197	on the record. /*
4198
4199	for (lock = first_lock; lock != NULL;
4200	lock = lock_rec_get_next(heap_no, lock)) {
4201	if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
4202	goto released;
4203	}
4204	}
4205
4206	lock_mutex_exit();
4207	trx_mutex_exit(trx);
4208
4209	{
4210	ib::error err;
4211	err << "Unlock row could not find a " << lock_mode
4212	<< " mode lock on the record. Current statement: ";
4213	size_t stmt_len;
4214	if (const char* stmt = innobase_get_stmt_unsafe(
4215	trx->mysql_thd, &stmt_len)) {
4216	err.write(stmt, stmt_len);
4217	}
4218	}
4219
4220	return;
4221
4222	released:
4223	ut_a(!lock_get_wait(lock));
4224	lock_rec_reset_nth_bit(lock, heap_no);
4225
4226	if (innodb_lock_schedule_algorithm
4227	== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS \|\|
4228	thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
4229
4230	/ Check if we can now grant waiting lock requests /
4231
4232	for (lock = first_lock; lock != NULL;
4233	lock = lock_rec_get_next(heap_no, lock)) {
4234	if (lock_get_wait(lock)
4235	&& !lock_rec_has_to_wait_in_queue(lock)) {
4236
4237	/ Grant the lock /
4238	ut_ad(trx != lock->trx);
4239	lock_grant(lock);
4240	}
4241	}
4242	} else {
4243	lock_grant_and_move_on_rec(lock_sys.rec_hash, first_lock, heap_no);
4244	}
4245
4246	lock_mutex_exit();
4247	trx_mutex_exit(trx);
4248	}
4249
4250	#ifdef UNIV_DEBUG
4251	/*******************************************************************//**
4252	Check if a transaction that has X or IX locks has set the dict_op
4253	code correctly. /*
4254	static
4255	void
4256	lock_check_dict_lock(
4257	/==================/
4258	const lock_t* lock) /!< in: lock to check /
4259	{
4260	if (lock_get_type_low(lock) == LOCK_REC) {
4261
4262	/ Check if the transcation locked a record*
4263	in a system table in X mode. It should have set
4264	the dict_op code correctly if it did. /*
4265	if (lock->index->table->id < DICT_HDR_FIRST_ID
4266	&& lock_get_mode(lock) == LOCK_X) {
4267
4268	ut_ad(lock_get_mode(lock) != LOCK_IX);
4269	ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4270	}
4271	} else {
4272	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4273
4274	const dict_table_t* table;
4275
4276	table = lock->un_member.tab_lock.table;
4277
4278	/ Check if the transcation locked a system table*
4279	in IX mode. It should have set the dict_op code
4280	correctly if it did. /*
4281	if (table->id < DICT_HDR_FIRST_ID
4282	&& (lock_get_mode(lock) == LOCK_X
4283	\|\| lock_get_mode(lock) == LOCK_IX)) {
4284
4285	ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4286	}
4287	}
4288	}
4289	#endif /* UNIV_DEBUG */
4290
4291	/*******************************************************************//**
4292	Releases transaction locks, and releases possible other transactions waiting
4293	because of these locks. /*
4294	static
4295	void
4296	lock_release(
4297	/=========/
4298	trx_t* trx) /!< in/out: transaction /
4299	{
4300	lock_t* lock;
4301	ulint count = `0`;
4302	trx_id_t max_trx_id = trx_sys.get_max_trx_id();
4303
4304	ut_ad(lock_mutex_own());
4305	ut_ad(!trx_mutex_own(trx));
4306
4307	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4308	lock != NULL;
4309	lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
4310
4311	ut_d(lock_check_dict_lock(lock));
4312
4313	if (lock_get_type_low(lock) == LOCK_REC) {
4314
4315	lock_rec_dequeue_from_page(lock);
4316	} else {
4317	dict_table_t* table;
4318
4319	table = lock->un_member.tab_lock.table;
4320
4321	if (lock_get_mode(lock) != LOCK_IS
4322	&& trx->undo_no != `0`) {
4323
4324	/ The trx may have modified the table. We*
4325	block the use of the MySQL query cache for
4326	all currently active transactions. /*
4327
4328	table->query_cache_inv_id = max_trx_id;
4329	}
4330
4331	lock_table_dequeue(lock);
4332	}
4333
4334	if (count == LOCK_RELEASE_INTERVAL) {
4335	/ Release the mutex for a while, so that we*
4336	do not monopolize it /*
4337
4338	lock_mutex_exit();
4339
4340	lock_mutex_enter();
4341
4342	count = `0`;
4343	}
4344
4345	++count;
4346	}
4347	}
4348
4349	/ True if a lock mode is S or X /
4350	#define IS_LOCK_S_OR_X(lock) \
4351	(lock_get_mode(lock) == LOCK_S \
4352	\|\| lock_get_mode(lock) == LOCK_X)
4353
4354	/*******************************************************************//**
4355	Removes table locks of the transaction on a table to be dropped. /*
4356	static
4357	void
4358	lock_trx_table_locks_remove(
4359	/========================/
4360	const lock_t* lock_to_remove) /!< in: lock to remove /
4361	{
4362	trx_t* trx = lock_to_remove->trx;
4363
4364	ut_ad(lock_mutex_own());
4365
4366	/ It is safe to read this because we are holding the lock mutex /
4367	if (!trx->lock.cancel) {
4368	trx_mutex_enter(trx);
4369	} else {
4370	ut_ad(trx_mutex_own(trx));
4371	}
4372
4373	typedef lock_pool_t::reverse_iterator iterator;
4374
4375	iterator end = trx->lock.table_locks.rend();
4376
4377	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4378
4379	const lock_t* lock = *it;
4380
4381	if (lock == NULL) {
4382	continue;
4383	}
4384
4385	ut_a(trx == lock->trx);
4386	ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4387	ut_a(lock->un_member.tab_lock.table != NULL);
4388
4389	if (lock == lock_to_remove) {
4390
4391	*it = NULL;
4392
4393	if (!trx->lock.cancel) {
4394	trx_mutex_exit(trx);
4395	}
4396
4397	return;
4398	}
4399	}
4400
4401	if (!trx->lock.cancel) {
4402	trx_mutex_exit(trx);
4403	}
4404
4405	/ Lock must exist in the vector. /
4406	ut_error;
4407	}
4408
4409	/===================== VALIDATION AND DEBUGGING ====================/
4410
4411	/* Print info of a table lock.*
4412	@param[in,out] file output stream
4413	@param[in] lock table lock /*
4414	static
4415	void
4416	lock_table_print(FILE* file, const lock_t* lock)
4417	{
4418	ut_ad(lock_mutex_own());
4419	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4420
4421	fputs("TABLE LOCK table ", file);
4422	ut_print_name(file, lock->trx,
4423	lock->un_member.tab_lock.table->name.m_name);
4424	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4425
4426	if (lock_get_mode(lock) == LOCK_S) {
4427	fputs(" lock mode S", file);
4428	} else if (lock_get_mode(lock) == LOCK_X) {
4429	ut_ad(lock->trx->id != `0`);
4430	fputs(" lock mode X", file);
4431	} else if (lock_get_mode(lock) == LOCK_IS) {
4432	fputs(" lock mode IS", file);
4433	} else if (lock_get_mode(lock) == LOCK_IX) {
4434	ut_ad(lock->trx->id != `0`);
4435	fputs(" lock mode IX", file);
4436	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4437	fputs(" lock mode AUTO-INC", file);
4438	} else {
4439	fprintf(file, " unknown lock mode %lu",
4440	(ulong) lock_get_mode(lock));
4441	}
4442
4443	if (lock_get_wait(lock)) {
4444	fputs(" waiting", file);
4445	}
4446
4447	putc(`'\n'`, file);
4448	}
4449
4450	/* Print info of a record lock.*
4451	@param[in,out] file output stream
4452	@param[in] lock record lock /*
4453	static
4454	void
4455	lock_rec_print(FILE* file, const lock_t* lock)
4456	{
4457	ulint space;
4458	ulint page_no;
4459	mtr_t mtr;
4460	mem_heap_t* heap = NULL;
4461	ulint offsets_[REC_OFFS_NORMAL_SIZE];
4462	ulint* offsets = offsets_;
4463	rec_offs_init(offsets_);
4464
4465	ut_ad(lock_mutex_own());
4466	ut_a(lock_get_type_low(lock) == LOCK_REC);
4467
4468	space = lock->un_member.rec_lock.space;
4469	page_no = lock->un_member.rec_lock.page_no;
4470
4471	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
4472	"index %s of table ",
4473	(ulong) space, (ulong) page_no,
4474	(ulong) lock_rec_get_n_bits(lock),
4475	lock->index->name ());
4476	ut_print_name(file, lock->trx, lock->index->table->name.m_name);
4477	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4478
4479	if (lock_get_mode(lock) == LOCK_S) {
4480	fputs(" lock mode S", file);
4481	} else if (lock_get_mode(lock) == LOCK_X) {
4482	fputs(" lock_mode X", file);
4483	} else {
4484	ut_error;
4485	}
4486
4487	if (lock_rec_get_gap(lock)) {
4488	fputs(" locks gap before rec", file);
4489	}
4490
4491	if (lock_rec_get_rec_not_gap(lock)) {
4492	fputs(" locks rec but not gap", file);
4493	}
4494
4495	if (lock_rec_get_insert_intention(lock)) {
4496	fputs(" insert intention", file);
4497	}
4498
4499	if (lock_get_wait(lock)) {
4500	fputs(" waiting", file);
4501	}
4502
4503	mtr_start(&mtr);
4504
4505	putc(`'\n'`, file);
4506
4507	const buf_block_t* block;
4508
4509	block = buf_page_try_get(page_id_t (space, page_no), &mtr);
4510
4511	for (ulint i = `0`; i < lock_rec_get_n_bits(lock); ++i) {
4512
4513	if (!lock_rec_get_nth_bit(lock, i)) {
4514	continue;
4515	}
4516
4517	fprintf(file, "Record lock, heap no %lu", (ulong) i);
4518
4519	if (block) {
4520	ut_ad(page_is_leaf(block->frame));
4521	const rec_t* rec;
4522
4523	rec = page_find_rec_with_heap_no(
4524	buf_block_get_frame(block), i);
4525	ut_ad(!page_rec_is_default_row(rec));
4526
4527	offsets = rec_get_offsets(
4528	rec, lock->index, offsets, true,
4529	ULINT_UNDEFINED, &heap);
4530
4531	putc(`' '`, file);
4532	rec_print_new(file, rec, offsets);
4533	}
4534
4535	putc(`'\n'`, file);
4536	}
4537
4538	mtr_commit(&mtr);
4539
4540	if (heap) {
4541	mem_heap_free(heap);
4542	}
4543	}
4544
4545	#ifdef UNIV_DEBUG
4546	/ Print the number of lock structs from lock_print_info_summary() only*
4547	in non-production builds for performance reasons, see
4548	http://bugs.mysql.com/36942 /*
4549	#define PRINT_NUM_OF_LOCK_STRUCTS
4550	#endif /* UNIV_DEBUG */
4551
4552	#ifdef PRINT_NUM_OF_LOCK_STRUCTS
4553	/*******************************************************************//**
4554	Calculates the number of record lock structs in the record lock hash table.
4555	@return number of record locks /*
4556	static
4557	ulint
4558	lock_get_n_rec_locks(void)
4559	/======================/
4560	{
4561	ulint n_locks = `0`;
4562	ulint i;
4563
4564	ut_ad(lock_mutex_own());
4565
4566	for (i = `0`; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
4567	const lock_t* lock;
4568
4569	for (lock = static_cast<const lock_t*>(
4570	HASH_GET_FIRST(lock_sys.rec_hash, i));
4571	lock != `0`;
4572	lock = static_cast<const lock_t*>(
4573	HASH_GET_NEXT(hash, lock))) {
4574
4575	n_locks++;
4576	}
4577	}
4578
4579	return(n_locks);
4580	}
4581	#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4582
4583	/*******************************************************************//**
4584	Prints info of locks for all transactions.
4585	@return FALSE if not able to obtain lock mutex
4586	and exits without printing info /*
4587	ibool
4588	lock_print_info_summary(
4589	/====================/
4590	FILE* file, /!< in: file where to print /
4591	ibool nowait) /!< in: whether to wait for the lock mutex /
4592	{
4593	/ if nowait is FALSE, wait on the lock mutex,*
4594	otherwise return immediately if fail to obtain the
4595	mutex. /*
4596	if (!nowait) {
4597	lock_mutex_enter();
4598	} else if (lock_mutex_enter_nowait()) {
4599	fputs("FAIL TO OBTAIN LOCK MUTEX,"
4600	" SKIP LOCK INFO PRINTING\n", file);
4601	return(FALSE);
4602	}
4603
4604	if (lock_deadlock_found) {
4605	fputs("------------------------\n"
4606	"LATEST DETECTED DEADLOCK\n"
4607	"------------------------\n", file);
4608
4609	if (!srv_read_only_mode) {
4610	ut_copy_file(file, lock_latest_err_file);
4611	}
4612	}
4613
4614	fputs("------------\n"
4615	"TRANSACTIONS\n"
4616	"------------\n", file);
4617
4618	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
4619	trx_sys.get_max_trx_id());
4620
4621	fprintf(file,
4622	"Purge done for trx's n:o < " TRX_ID_FMT
4623	" undo n:o < " TRX_ID_FMT " state: %s\n"
4624	"History list length " ULINTPF "\n",
4625	purge_sys.tail.trx_no(),
4626	purge_sys.tail.undo_no,
4627	purge_sys.enabled()
4628	? (purge_sys.running() ? "running"
4629	: purge_sys.paused() ? "stopped" : "running but idle")
4630	: "disabled",
4631	trx_sys.history_size());
4632
4633	#ifdef PRINT_NUM_OF_LOCK_STRUCTS
4634	fprintf(file,
4635	"Total number of lock structs in row lock hash table %lu\n",
4636	(ulong) lock_get_n_rec_locks());
4637	#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4638	return(TRUE);
4639	}
4640
4641	/* Functor to print not-started transaction from the trx_list. /
4642
4643	struct PrintNotStarted {
4644
4645	PrintNotStarted(FILE* file) : m_file(file) { }
4646
4647	void operator()(const trx_t* trx)
4648	{
4649	ut_ad(mutex_own(&trx_sys.mutex));
4650
4651	/ See state transitions and locking rules in trx0trx.h /
4652
4653	if (trx->mysql_thd
4654	&& trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
4655
4656	fputs("---", m_file);
4657	trx_print_latched(m_file, trx, `600`);
4658	}
4659	}
4660
4661	FILE* m_file;
4662	};
4663
4664	/* Prints transaction lock wait and MVCC state.*
4665	@param[in,out] file file where to print
4666	@param[in] trx transaction /*
4667	void
4668	lock_trx_print_wait_and_mvcc_state(
4669	FILE* file,
4670	const trx_t* trx)
4671	{
4672	fprintf(file, "---");
4673
4674	trx_print_latched(file, trx, `600`);
4675
4676	/ Note: read_view->get_state() check is race condition. But it*
4677	should "kind of work" because read_view is freed only at shutdown.
4678	Worst thing that may happen is that it'll get transferred to
4679	another thread and print wrong values. /*
4680
4681	if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) {
4682	trx->read_view.print_limits(file);
4683	}
4684
4685	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
4686
4687	fprintf(file,
4688	"------- TRX HAS BEEN WAITING %lu SEC"
4689	" FOR THIS LOCK TO BE GRANTED:\n",
4690	(ulong) difftime(ut_time(), trx->lock.wait_started));
4691
4692	if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
4693	lock_rec_print(file, trx->lock.wait_lock);
4694	} else {
4695	lock_table_print(file, trx->lock.wait_lock);
4696	}
4697
4698	fprintf(file, "------------------\n");
4699	}
4700	}
4701
4702	/*******************************************************************//**
4703	Prints info of locks for a transaction. /*
4704	static
4705	void
4706	lock_trx_print_locks(
4707	/=================/
4708	FILE* file, /!< in/out: File to write /
4709	const trx_t* trx) /!< in: current transaction /
4710	{
4711	uint32_t i= `0`;
4712	/ Iterate over the transaction's locks. /
4713	for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4714	lock != NULL;
4715	lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
4716	if (lock_get_type_low(lock) == LOCK_REC) {
4717
4718	lock_rec_print(file, lock);
4719	} else {
4720	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4721
4722	lock_table_print(file, lock);
4723	}
4724
4725	if (++i == `10`) {
4726
4727	fprintf(file,
4728	"10 LOCKS PRINTED FOR THIS TRX:"
4729	" SUPPRESSING FURTHER PRINTS\n");
4730
4731	break;
4732	}
4733	}
4734	}
4735
4736
4737	static my_bool lock_print_info_all_transactions_callback(
4738	rw_trx_hash_element_t element, FILE file)
4739	{
4740	mutex_enter(&element->mutex);
4741	if (trx_t *trx= element->trx)
4742	{
4743	check_trx_state(trx);
4744	lock_trx_print_wait_and_mvcc_state(file, trx);
4745
4746	if (srv_print_innodb_lock_monitor)
4747	{
4748	trx->reference();
4749	mutex_exit(&element->mutex);
4750	lock_trx_print_locks(file, trx);
4751	trx->release_reference();
4752	return `0`;
4753	}
4754	}
4755	mutex_exit(&element->mutex);
4756	return `0`;
4757	}
4758
4759
4760	/*******************************************************************//**
4761	Prints info of locks for each transaction. This function assumes that the
4762	caller holds the lock mutex and more importantly it will release the lock
4763	mutex on behalf of the caller. (This should be fixed in the future). /*
4764	void
4765	lock_print_info_all_transactions(
4766	/=============================/
4767	FILE* file) /!< in/out: file where to print /
4768	{
4769	ut_ad(lock_mutex_own());
4770
4771	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4772
4773	/ First print info on non-active transactions /
4774
4775	/ NOTE: information of auto-commit non-locking read-only*
4776	transactions will be omitted here. The information will be
4777	available from INFORMATION_SCHEMA.INNODB_TRX. /*
4778
4779	PrintNotStarted print_not_started(file);
4780	mutex_enter(&trx_sys.mutex);
4781	ut_list_map(trx_sys.trx_list, print_not_started);
4782	mutex_exit(&trx_sys.mutex);
4783
4784	trx_sys.rw_trx_hash.iterate_no_dups(
4785	reinterpret_cast<my_hash_walk_action>
4786	(lock_print_info_all_transactions_callback), file);
4787	lock_mutex_exit();
4788
4789	ut_ad(lock_validate());
4790	}
4791
4792	#ifdef UNIV_DEBUG
4793	/*******************************************************************//**
4794	Find the the lock in the trx_t::trx_lock_t::table_locks vector.
4795	@return true if found /*
4796	static
4797	bool
4798	lock_trx_table_locks_find(
4799	/======================/
4800	trx_t* trx, /!< in: trx to validate /
4801	const lock_t* find_lock) /!< in: lock to find /
4802	{
4803	bool found = false;
4804
4805	trx_mutex_enter(trx);
4806
4807	typedef lock_pool_t::const_reverse_iterator iterator;
4808
4809	iterator end = trx->lock.table_locks.rend();
4810
4811	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4812
4813	const lock_t* lock = *it;
4814
4815	if (lock == NULL) {
4816
4817	continue;
4818
4819	} else if (lock == find_lock) {
4820
4821	/ Can't be duplicates. /
4822	ut_a(!found);
4823	found = true;
4824	}
4825
4826	ut_a(trx == lock->trx);
4827	ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4828	ut_a(lock->un_member.tab_lock.table != NULL);
4829	}
4830
4831	trx_mutex_exit(trx);
4832
4833	return(found);
4834	}
4835
4836	/*******************************************************************//**
4837	Validates the lock queue on a table.
4838	@return TRUE if ok /*
4839	static
4840	ibool
4841	lock_table_queue_validate(
4842	/======================/
4843	const dict_table_t* table) /!< in: table /
4844	{
4845	const lock_t* lock;
4846
4847	ut_ad(lock_mutex_own());
4848
4849	for (lock = UT_LIST_GET_FIRST(table->locks);
4850	lock != NULL;
4851	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4852
4853	/ Transaction state may change from ACTIVE to PREPARED.*
4854	State change to COMMITTED is not possible while we are
4855	holding lock_sys.mutex: it is done by lock_trx_release_locks()
4856	under lock_sys.mutex protection.
4857	Transaction in NOT_STARTED state cannot hold locks, and
4858	lock->trx->state can only move to NOT_STARTED from COMMITTED. /*
4859	check_trx_state(lock->trx);
4860
4861	if (!lock_get_wait(lock)) {
4862
4863	ut_a(!lock_table_other_has_incompatible(
4864	lock->trx, `0`, table,
4865	lock_get_mode(lock)));
4866	} else {
4867
4868	ut_a(lock_table_has_to_wait_in_queue(lock));
4869	}
4870
4871	ut_a(lock_trx_table_locks_find(lock->trx, lock));
4872	}
4873
4874	return(TRUE);
4875	}
4876
4877	/*******************************************************************//**
4878	Validates the lock queue on a single record.
4879	@return TRUE if ok /*
4880	static
4881	bool
4882	lock_rec_queue_validate(
4883	/====================/
4884	bool locked_lock_trx_sys,
4885	/!< in: if the caller holds*
4886	both the lock mutex and
4887	trx_sys_t->lock. /*
4888	const buf_block_t* block, /!< in: buffer block containing rec /
4889	const rec_t* rec, /!< in: record to look at /
4890	const dict_index_t* index, /!< in: index, or NULL if not known /
4891	const ulint* offsets)/!< in: rec_get_offsets(rec, index) /
4892	{
4893	const lock_t* lock;
4894	ulint heap_no;
4895
4896	ut_a(rec);
4897	ut_a(block->frame == page_align(rec));
4898	ut_ad(rec_offs_validate(rec, index, offsets));
4899	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
4900	ut_ad(page_rec_is_leaf(rec));
4901	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
4902	ut_ad(!index \|\| dict_index_is_clust(index)
4903	\|\| !dict_index_is_online_ddl(index));
4904
4905	heap_no = page_rec_get_heap_no(rec);
4906
4907	if (!locked_lock_trx_sys) {
4908	lock_mutex_enter();
4909	}
4910
4911	if (!page_rec_is_user_rec(rec)) {
4912
4913	for (lock = lock_rec_get_first(lock_sys.rec_hash,
4914	block, heap_no);
4915	lock != NULL;
4916	lock = lock_rec_get_next_const(heap_no, lock)) {
4917
4918	ut_ad(!trx_is_ac_nl_ro(lock->trx));
4919
4920	if (lock_get_wait(lock)) {
4921	ut_a(lock_rec_has_to_wait_in_queue(lock));
4922	}
4923
4924	if (index != NULL) {
4925	ut_a(lock->index == index);
4926	}
4927	}
4928
4929	goto func_exit;
4930	}
4931
4932	if (index == NULL) {
4933
4934	/ Nothing we can do /
4935
4936	} else if (dict_index_is_clust(index)) {
4937	/ Unlike the non-debug code, this invariant can only succeed*
4938	if the check and assertion are covered by the lock mutex. /*
4939
4940	const trx_t *impl_trx = trx_sys.rw_trx_hash.find(current_trx(),
4941	lock_clust_rec_some_has_impl(rec, index, offsets));
4942
4943	ut_ad(lock_mutex_own());
4944	/ impl_trx cannot be committed until lock_mutex_exit()*
4945	because lock_trx_release_locks() acquires lock_sys.mutex /*
4946
4947	if (!impl_trx) {
4948	} else if (const lock_t* other_lock
4949	= lock_rec_other_has_expl_req(
4950	LOCK_S, block, true, heap_no,
4951	impl_trx)) {
4952	/ The impl_trx is holding an implicit lock on the*
4953	given record 'rec'. So there cannot be another
4954	explicit granted lock. Also, there can be another
4955	explicit waiting lock only if the impl_trx has an
4956	explicit granted lock. /*
4957
4958	#ifdef WITH_WSREP
4959	if (wsrep_on(other_lock->trx->mysql_thd)) {
4960	if (!lock_get_wait(other_lock) ) {
4961	ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
4962	((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
4963	wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
4964	wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " <<
4965	wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
4966	wsrep_thd_query(impl_trx->mysql_thd);
4967
4968	trx_t* otrx = other_lock->trx;
4969
4970	ib::info() << "WSREP other lock:\n BF:" <<
4971	((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
4972	wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
4973	wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
4974	wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
4975	wsrep_thd_query(otrx->mysql_thd);
4976	}
4977
4978	if (!lock_rec_has_expl(LOCK_X \| LOCK_REC_NOT_GAP,
4979	block, heap_no,
4980	impl_trx)) {
4981	ib::info() << "WSREP impl BF lock conflict";
4982	}
4983	} else
4984	#endif /* WITH_WSREP */
4985	ut_ad(lock_get_wait(other_lock));
4986	ut_ad(lock_rec_has_expl(LOCK_X \| LOCK_REC_NOT_GAP,
4987	block, heap_no, impl_trx));
4988	}
4989	}
4990
4991	for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
4992	lock != NULL;
4993	lock = lock_rec_get_next_const(heap_no, lock)) {
4994
4995	ut_ad(!trx_is_ac_nl_ro(lock->trx));
4996	ut_ad(!page_rec_is_default_row(rec));
4997
4998	if (index) {
4999	ut_a(lock->index == index);
5000	}
5001
5002	if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
5003
5004	lock_mode mode;
5005
5006	if (lock_get_mode(lock) == LOCK_S) {
5007	mode = LOCK_X;
5008	} else {
5009	mode = LOCK_S;
5010	}
5011
5012	const lock_t* other_lock
5013	= lock_rec_other_has_expl_req(
5014	mode, block, false, heap_no,
5015	lock->trx);
5016	#ifdef WITH_WSREP
5017	ut_a(!other_lock
5018	\|\| wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)
5019	\|\| wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
5020
5021	#else
5022	ut_a(!other_lock);
5023	#endif /* WITH_WSREP */
5024	} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
5025
5026	ut_a(lock_rec_has_to_wait_in_queue(lock));
5027	}
5028	}
5029
5030	ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS \|\|
5031	lock_queue_validate(lock));
5032
5033	func_exit:
5034	if (!locked_lock_trx_sys) {
5035	lock_mutex_exit();
5036	}
5037
5038	return(TRUE);
5039	}
5040
5041	/*******************************************************************//**
5042	Validates the record lock queues on a page.
5043	@return TRUE if ok /*
5044	static
5045	ibool
5046	lock_rec_validate_page(
5047	/===================/
5048	const buf_block_t* block) /!< in: buffer block /
5049	{
5050	const lock_t* lock;
5051	const rec_t* rec;
5052	ulint nth_lock = `0`;
5053	ulint nth_bit = `0`;
5054	ulint i;
5055	mem_heap_t* heap = NULL;
5056	ulint offsets_[REC_OFFS_NORMAL_SIZE];
5057	ulint* offsets = offsets_;
5058	rec_offs_init(offsets_);
5059
5060	ut_ad(!lock_mutex_own());
5061
5062	lock_mutex_enter();
5063	loop:
5064	lock = lock_rec_get_first_on_page_addr(
5065	lock_sys.rec_hash,
5066	block->page.id.space(), block->page.id.page_no());
5067
5068	if (!lock) {
5069	goto function_exit;
5070	}
5071
5072	ut_ad(!block->page.file_page_was_freed);
5073
5074	for (i = `0`; i < nth_lock; i++) {
5075
5076	lock = lock_rec_get_next_on_page_const(lock);
5077
5078	if (!lock) {
5079	goto function_exit;
5080	}
5081	}
5082
5083	ut_ad(!trx_is_ac_nl_ro(lock->trx));
5084
5085	/ Only validate the record queues when this thread is not*
5086	holding a space->latch. /*
5087	if (!sync_check_find(SYNC_FSP))
5088	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5089
5090	if (i == `1` \|\| lock_rec_get_nth_bit(lock, i)) {
5091
5092	rec = page_find_rec_with_heap_no(block->frame, i);
5093	ut_a(rec);
5094	ut_ad(page_rec_is_leaf(rec));
5095	offsets = rec_get_offsets(rec, lock->index, offsets,
5096	true, ULINT_UNDEFINED,
5097	&heap);
5098
5099	/ If this thread is holding the file space*
5100	latch (fil_space_t::latch), the following
5101	check WILL break the latching order and may
5102	cause a deadlock of threads. /*
5103
5104	lock_rec_queue_validate(
5105	TRUE, block, rec, lock->index, offsets);
5106
5107	nth_bit = i + `1`;
5108
5109	goto loop;
5110	}
5111	}
5112
5113	nth_bit = `0`;
5114	nth_lock++;
5115
5116	goto loop;
5117
5118	function_exit:
5119	lock_mutex_exit();
5120
5121	if (heap != NULL) {
5122	mem_heap_free(heap);
5123	}
5124	return(TRUE);
5125	}
5126
5127	/*******************************************************************//**
5128	Validate record locks up to a limit.
5129	@return lock at limit or NULL if no more locks in the hash bucket /*
5130	static MY_ATTRIBUTE((warn_unused_result))
5131	const lock_t*
5132	lock_rec_validate(
5133	/==============/
5134	ulint start, /!< in: lock_sys.rec_hash*
5135	bucket /*
5136	ib_uint64_t* limit) /!< in/out: upper limit of*
5137	(space, page_no) /*
5138	{
5139	ut_ad(lock_mutex_own());
5140
5141	for (const lock_t* lock = static_cast<const lock_t*>(
5142	HASH_GET_FIRST(lock_sys.rec_hash, start));
5143	lock != NULL;
5144	lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
5145
5146	ib_uint64_t current;
5147
5148	ut_ad(!trx_is_ac_nl_ro(lock->trx));
5149	ut_ad(lock_get_type(lock) == LOCK_REC);
5150
5151	current = ut_ull_create(
5152	lock->un_member.rec_lock.space,
5153	lock->un_member.rec_lock.page_no);
5154
5155	if (current > *limit) {
5156	*limit = current + `1`;
5157	return(lock);
5158	}
5159	}
5160
5161	return(`0`);
5162	}
5163
5164	/*******************************************************************//**
5165	Validate a record lock's block /*
5166	static
5167	void
5168	lock_rec_block_validate(
5169	/====================/
5170	ulint space_id,
5171	ulint page_no)
5172	{
5173	/ The lock and the block that it is referring to may be freed at*
5174	this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
5175	If the lock exists in lock_rec_validate_page() we assert
5176	!block->page.file_page_was_freed. /*
5177
5178	buf_block_t* block;
5179	mtr_t mtr;
5180
5181	/ Transactional locks should never refer to dropped*
5182	tablespaces, because all DDL operations that would drop or
5183	discard or rebuild a tablespace do hold an exclusive table
5184	lock, which would conflict with any locks referring to the
5185	tablespace from other transactions. /*
5186	if (fil_space_t* space = fil_space_acquire(space_id)) {
5187	dberr_t err = DB_SUCCESS;
5188	mtr_start(&mtr);
5189
5190	block = buf_page_get_gen(
5191	page_id_t(space_id, page_no),
5192	page_size_t(space->flags),
5193	RW_X_LATCH, NULL,
5194	BUF_GET_POSSIBLY_FREED,
5195	__FILE__, __LINE__, &mtr, &err);
5196
5197	if (err != DB_SUCCESS) {
5198	ib::error() << "Lock rec block validate failed for tablespace "
5199	<< space->name
5200	<< " space_id " << space_id
5201	<< " page_no " << page_no << " err " << err;
5202	}
5203
5204	if (block) {
5205	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5206
5207	ut_ad(lock_rec_validate_page(block));
5208	}
5209
5210	mtr_commit(&mtr);
5211
5212	space->release();
5213	}
5214	}
5215
5216
5217	static my_bool lock_validate_table_locks(rw_trx_hash_element_t element, void**)
5218	{
5219	ut_ad(lock_mutex_own());
5220	mutex_enter(&element->mutex);
5221	if (element->trx)
5222	{
5223	check_trx_state(element->trx);
5224	for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
5225	lock != NULL;
5226	lock= UT_LIST_GET_NEXT(trx_locks, lock))
5227	{
5228	if (lock_get_type_low(lock) & LOCK_TABLE)
5229	lock_table_queue_validate(lock->un_member.tab_lock.table);
5230	}
5231	}
5232	mutex_exit(&element->mutex);
5233	return `0`;
5234	}
5235
5236
5237	/*******************************************************************//**
5238	Validates the lock system.
5239	@return TRUE if ok /*
5240	static
5241	bool
5242	lock_validate()
5243	/===========/
5244	{
5245	typedef std::pair<ulint, ulint> page_addr_t;
5246	typedef std::set<
5247	page_addr_t,
5248	std::less<page_addr_t>,
5249	ut_allocator<page_addr_t> > page_addr_set;
5250
5251	page_addr_set pages;
5252
5253	lock_mutex_enter();
5254
5255	/ Validate table locks /
5256	trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
5257	(lock_validate_table_locks), `0`);
5258
5259	/ Iterate over all the record locks and validate the locks. We*
5260	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
5261	Release both mutexes during the validation check. /*
5262
5263	for (ulint i = `0`; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
5264	ib_uint64_t limit = `0`;
5265
5266	while (const lock_t* lock = lock_rec_validate(i, &limit)) {
5267	if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) {
5268	/ The lock bitmap is empty; ignore it. /
5269	continue;
5270	}
5271	const lock_rec_t& l = lock->un_member.rec_lock;
5272	pages.insert(std::make_pair(l.space, l.page_no));
5273	}
5274	}
5275
5276	lock_mutex_exit();
5277
5278	for (page_addr_set::const_iterator it = pages.begin();
5279	it != pages.end();
5280	++it) {
5281	lock_rec_block_validate((it).first, (it).second);
5282	}
5283
5284	return(true);
5285	}
5286	#endif /* UNIV_DEBUG */
5287	/============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================/
5288
5289	/*******************************************************************//**
5290	Checks if locks of other transactions prevent an immediate insert of
5291	a record. If they do, first tests if the query thread should anyway
5292	be suspended for some reason; if not, then puts the transaction and
5293	the query thread to the lock wait state and inserts a waiting request
5294	for a gap x-lock to the lock queue.
5295	@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK /*
5296	dberr_t
5297	lock_rec_insert_check_and_lock(
5298	/===========================/
5299	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG bit is*
5300	set, does nothing /*
5301	const rec_t* rec, /!< in: record after which to insert /
5302	buf_block_t* block, /!< in/out: buffer block of rec /
5303	dict_index_t* index, /!< in: index /
5304	que_thr_t* thr, /!< in: query thread /
5305	mtr_t* mtr, /!< in/out: mini-transaction /
5306	bool* inherit)/!< out: set to true if the new*
5307	inserted record maybe should inherit
5308	LOCK_GAP type locks from the successor
5309	record /*
5310	{
5311	ut_ad(block->frame == page_align(rec));
5312	ut_ad(!dict_index_is_online_ddl(index)
5313	\|\| dict_index_is_clust(index)
5314	\|\| (flags & BTR_CREATE_FLAG));
5315	ut_ad(mtr->is_named_space(index->table->space));
5316	ut_ad(page_rec_is_leaf(rec));
5317
5318	if (flags & BTR_NO_LOCKING_FLAG) {
5319
5320	return(DB_SUCCESS);
5321	}
5322
5323	ut_ad(!index->table->is_temporary());
5324
5325	dberr_t err;
5326	lock_t* lock;
5327	bool inherit_in = *inherit;
5328	trx_t* trx = thr_get_trx(thr);
5329	const rec_t* next_rec = page_rec_get_next_const(rec);
5330	ulint heap_no = page_rec_get_heap_no(next_rec);
5331	ut_ad(!rec_is_default_row(next_rec, index));
5332
5333	lock_mutex_enter();
5334	/ Because this code is invoked for a running transaction by*
5335	the thread that is serving the transaction, it is not necessary
5336	to hold trx->mutex here. /*
5337
5338	/ When inserting a record into an index, the table must be at*
5339	least IX-locked. When we are building an index, we would pass
5340	BTR_NO_LOCKING_FLAG and skip the locking altogether. /*
5341	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5342
5343	lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
5344
5345	if (lock == NULL) {
5346	/ We optimize CPU time usage in the simplest case /
5347
5348	lock_mutex_exit();
5349
5350	if (inherit_in && !dict_index_is_clust(index)) {
5351	/ Update the page max trx id field /
5352	page_update_max_trx_id(block,
5353	buf_block_get_page_zip(block),
5354	trx->id, mtr);
5355	}
5356
5357	inherit = false*;
5358
5359	return(DB_SUCCESS);
5360	}
5361
5362	/ Spatial index does not use GAP lock protection. It uses*
5363	"predicate lock" to protect the "range" /*
5364	if (dict_index_is_spatial(index)) {
5365	return(DB_SUCCESS);
5366	}
5367
5368	inherit = true*;
5369
5370	/ If another transaction has an explicit lock request which locks*
5371	the gap, waiting or granted, on the successor, the insert has to wait.
5372
5373	An exception is the case where the lock by the another transaction
5374	is a gap type lock which it placed to wait for its turn to insert. We
5375	do not consider that kind of a lock conflicting with our insert. This
5376	eliminates an unnecessary deadlock which resulted when 2 transactions
5377	had to wait for their insert. Both had waiting gap type lock requests
5378	on the successor, which produced an unnecessary deadlock. /*
5379
5380	const ulint type_mode = LOCK_X \| LOCK_GAP \| LOCK_INSERT_INTENTION;
5381
5382	if (
5383	#ifdef WITH_WSREP
5384	lock_t* c_lock =
5385	#endif /* WITH_WSREP */
5386	lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) {
5387	/ Note that we may get DB_SUCCESS also here! /
5388	trx_mutex_enter(trx);
5389
5390	err = lock_rec_enqueue_waiting(
5391	#ifdef WITH_WSREP
5392	c_lock,
5393	#endif /* WITH_WSREP */
5394	type_mode, block, heap_no, index, thr, NULL);
5395
5396	trx_mutex_exit(trx);
5397	} else {
5398	err = DB_SUCCESS;
5399	}
5400
5401	lock_mutex_exit();
5402
5403	switch (err) {
5404	case DB_SUCCESS_LOCKED_REC:
5405	err = DB_SUCCESS;
5406	/ fall through /
5407	case DB_SUCCESS:
5408	if (!inherit_in \|\| dict_index_is_clust(index)) {
5409	break;
5410	}
5411
5412	/ Update the page max trx id field /
5413	page_update_max_trx_id(
5414	block, buf_block_get_page_zip(block), trx->id, mtr);
5415	default:
5416	/ We only care about the two return values. /
5417	break;
5418	}
5419
5420	#ifdef UNIV_DEBUG
5421	{
5422	mem_heap_t* heap = NULL;
5423	ulint offsets_[REC_OFFS_NORMAL_SIZE];
5424	const ulint* offsets;
5425	rec_offs_init(offsets_);
5426
5427	offsets = rec_get_offsets(next_rec, index, offsets_, true,
5428	ULINT_UNDEFINED, &heap);
5429
5430	ut_ad(lock_rec_queue_validate(
5431	FALSE, block, next_rec, index, offsets));
5432
5433	if (heap != NULL) {
5434	mem_heap_free(heap);
5435	}
5436	}
5437	#endif /* UNIV_DEBUG */
5438
5439	return(err);
5440	}
5441
5442	/*******************************************************************//**
5443	Creates an explicit record lock for a running transaction that currently only
5444	has an implicit lock on the record. The transaction instance must have a
5445	reference count > 0 so that it can't be committed and freed before this
5446	function has completed. /*
5447	static
5448	void
5449	lock_rec_convert_impl_to_expl_for_trx(
5450	/==================================/
5451	const buf_block_t* block, /!< in: buffer block of rec /
5452	const rec_t* rec, /!< in: user record on page /
5453	dict_index_t* index, /!< in: index of record /
5454	trx_t* trx, /!< in/out: active transaction /
5455	ulint heap_no)/!< in: rec heap number to lock /
5456	{
5457	ut_ad(trx->is_referenced());
5458	ut_ad(page_rec_is_leaf(rec));
5459	ut_ad(!rec_is_default_row(rec, index));
5460
5461	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
5462
5463	lock_mutex_enter();
5464
5465	ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
5466
5467	if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
5468	&& !lock_rec_has_expl(LOCK_X \| LOCK_REC_NOT_GAP,
5469	block, heap_no, trx)) {
5470
5471	ulint type_mode;
5472
5473	type_mode = (LOCK_REC \| LOCK_X \| LOCK_REC_NOT_GAP);
5474
5475	lock_rec_add_to_queue(
5476	type_mode, block, heap_no, index, trx, FALSE);
5477	}
5478
5479	lock_mutex_exit();
5480
5481	trx->release_reference();
5482
5483	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
5484	}
5485
5486
5487	#ifdef UNIV_DEBUG
5488	struct lock_rec_other_trx_holds_expl_arg
5489	{
5490	const ulint heap_no;
5491	const buf_block_t * const block;
5492	const trx_t *impl_trx;
5493	};
5494
5495
5496	static my_bool lock_rec_other_trx_holds_expl_callback(
5497	rw_trx_hash_element_t *element,
5498	lock_rec_other_trx_holds_expl_arg *arg)
5499	{
5500	mutex_enter(&element->mutex);
5501	if (element->trx)
5502	{
5503	lock_t *expl_lock= lock_rec_has_expl(LOCK_S \| LOCK_REC_NOT_GAP, arg->block,
5504	arg->heap_no, element->trx);
5505	/*
5506	An explicit lock is held by trx other than the trx holding the implicit
5507	lock.
5508	*/
5509	ut_ad(!expl_lock \|\| expl_lock->trx == arg->impl_trx);
5510	}
5511	mutex_exit(&element->mutex);
5512	return `0`;
5513	}
5514
5515
5516	/**
5517	Checks if some transaction, other than given trx_id, has an explicit
5518	lock on the given rec.
5519
5520	FIXME: if the current transaction holds implicit lock from INSERT, a
5521	subsequent locking read should not convert it to explicit. See also
5522	MDEV-11215.
5523
5524	@param caller_trx trx of current thread
5525	@param[in] trx trx holding implicit lock on rec
5526	@param[in] rec user record
5527	@param[in] block buffer block containing the record
5528	*/
5529
5530	static void lock_rec_other_trx_holds_expl(trx_t caller_trx, trx_t trx,
5531	const rec_t *rec,
5532	const buf_block_t *block)
5533	{
5534	if (trx)
5535	{
5536	ut_ad(!page_rec_is_default_row(rec));
5537	lock_mutex_enter();
5538	lock_rec_other_trx_holds_expl_arg arg= { page_rec_get_heap_no(rec), block,
5539	trx };
5540	trx_sys.rw_trx_hash.iterate(caller_trx,
5541	reinterpret_cast<my_hash_walk_action>
5542	(lock_rec_other_trx_holds_expl_callback),
5543	&arg);
5544	lock_mutex_exit();
5545	}
5546	}
5547	#endif /* UNIV_DEBUG */
5548
5549
5550	/*******************************************************************//**
5551	If a transaction has an implicit x-lock on a record, but no explicit x-lock
5552	set on the record, sets one for it. /*
5553	static
5554	void
5555	lock_rec_convert_impl_to_expl(
5556	/==========================/
5557	trx_t* caller_trx,/!<in/out: trx of current thread /
5558	const buf_block_t* block, /!< in: buffer block of rec /
5559	const rec_t* rec, /!< in: user record on page /
5560	dict_index_t* index, /!< in: index of record /
5561	const ulint* offsets)/!< in: rec_get_offsets(rec, index) /
5562	{
5563	trx_t* trx;
5564
5565	ut_ad(!lock_mutex_own());
5566	ut_ad(page_rec_is_user_rec(rec));
5567	ut_ad(rec_offs_validate(rec, index, offsets));
5568	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5569	ut_ad(page_rec_is_leaf(rec));
5570	ut_ad(!rec_is_default_row(rec, index));
5571
5572	if (dict_index_is_clust(index)) {
5573	trx_id_t trx_id;
5574
5575	trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5576
5577	trx = trx_sys.find(caller_trx, trx_id);
5578	} else {
5579	ut_ad(!dict_index_is_online_ddl(index));
5580
5581	trx = lock_sec_rec_some_has_impl(caller_trx, rec, index,
5582	offsets);
5583
5584	ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec,
5585	block));
5586	}
5587
5588	if (trx != `0`) {
5589	ulint heap_no = page_rec_get_heap_no(rec);
5590
5591	ut_ad(trx->is_referenced());
5592
5593	/ If the transaction is still active and has no*
5594	explicit x-lock set on the record, set one for it.
5595	trx cannot be committed until the ref count is zero. /*
5596
5597	lock_rec_convert_impl_to_expl_for_trx(
5598	block, rec, index, trx, heap_no);
5599	}
5600	}
5601
5602	/*******************************************************************//**
5603	Checks if locks of other transactions prevent an immediate modify (update,
5604	delete mark, or delete unmark) of a clustered index record. If they do,
5605	first tests if the query thread should anyway be suspended for some
5606	reason; if not, then puts the transaction and the query thread to the
5607	lock wait state and inserts a waiting request for a record x-lock to the
5608	lock queue.
5609	@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK /*
5610	dberr_t
5611	lock_clust_rec_modify_check_and_lock(
5612	/=================================/
5613	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG*
5614	bit is set, does nothing /*
5615	const buf_block_t* block, /!< in: buffer block of rec /
5616	const rec_t* rec, /!< in: record which should be*
5617	modified /*
5618	dict_index_t* index, /!< in: clustered index /
5619	const ulint* offsets,/!< in: rec_get_offsets(rec, index) /
5620	que_thr_t* thr) /!< in: query thread /
5621	{
5622	dberr_t err;
5623	ulint heap_no;
5624
5625	ut_ad(rec_offs_validate(rec, index, offsets));
5626	ut_ad(page_rec_is_leaf(rec));
5627	ut_ad(dict_index_is_clust(index));
5628	ut_ad(block->frame == page_align(rec));
5629
5630	if (flags & BTR_NO_LOCKING_FLAG) {
5631
5632	return(DB_SUCCESS);
5633	}
5634	ut_ad(!rec_is_default_row(rec, index));
5635	ut_ad(!index->table->is_temporary());
5636
5637	heap_no = rec_offs_comp(offsets)
5638	? rec_get_heap_no_new(rec)
5639	: rec_get_heap_no_old(rec);
5640
5641	/ If a transaction has no explicit x-lock set on the record, set one*
5642	for it /*
5643
5644	lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec, index,
5645	offsets);
5646
5647	err = lock_rec_lock(TRUE, LOCK_X \| LOCK_REC_NOT_GAP,
5648	block, heap_no, index, thr);
5649
5650	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5651
5652	if (err == DB_SUCCESS_LOCKED_REC) {
5653	err = DB_SUCCESS;
5654	}
5655
5656	return(err);
5657	}
5658
5659	/*******************************************************************//**
5660	Checks if locks of other transactions prevent an immediate modify (delete
5661	mark or delete unmark) of a secondary index record.
5662	@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK /*
5663	dberr_t
5664	lock_sec_rec_modify_check_and_lock(
5665	/===============================/
5666	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG*
5667	bit is set, does nothing /*
5668	buf_block_t* block, /!< in/out: buffer block of rec /
5669	const rec_t* rec, /!< in: record which should be*
5670	modified; NOTE: as this is a secondary
5671	index, we always have to modify the
5672	clustered index record first: see the
5673	comment below /*
5674	dict_index_t* index, /!< in: secondary index /
5675	que_thr_t* thr, /!< in: query thread*
5676	(can be NULL if BTR_NO_LOCKING_FLAG) /*
5677	mtr_t* mtr) /!< in/out: mini-transaction /
5678	{
5679	dberr_t err;
5680	ulint heap_no;
5681
5682	ut_ad(!dict_index_is_clust(index));
5683	ut_ad(!dict_index_is_online_ddl(index) \|\| (flags & BTR_CREATE_FLAG));
5684	ut_ad(block->frame == page_align(rec));
5685	ut_ad(mtr->is_named_space(index->table->space));
5686	ut_ad(page_rec_is_leaf(rec));
5687	ut_ad(!rec_is_default_row(rec, index));
5688
5689	if (flags & BTR_NO_LOCKING_FLAG) {
5690
5691	return(DB_SUCCESS);
5692	}
5693	ut_ad(!index->table->is_temporary());
5694
5695	heap_no = page_rec_get_heap_no(rec);
5696
5697	/ Another transaction cannot have an implicit lock on the record,*
5698	because when we come here, we already have modified the clustered
5699	index record, and this would not have been possible if another active
5700	transaction had modified this secondary index record. /*
5701
5702	err = lock_rec_lock(TRUE, LOCK_X \| LOCK_REC_NOT_GAP,
5703	block, heap_no, index, thr);
5704
5705	#ifdef UNIV_DEBUG
5706	{
5707	mem_heap_t* heap = NULL;
5708	ulint offsets_[REC_OFFS_NORMAL_SIZE];
5709	const ulint* offsets;
5710	rec_offs_init(offsets_);
5711
5712	offsets = rec_get_offsets(rec, index, offsets_, true,
5713	ULINT_UNDEFINED, &heap);
5714
5715	ut_ad(lock_rec_queue_validate(
5716	FALSE, block, rec, index, offsets));
5717
5718	if (heap != NULL) {
5719	mem_heap_free(heap);
5720	}
5721	}
5722	#endif /* UNIV_DEBUG */
5723
5724	if (err == DB_SUCCESS \|\| err == DB_SUCCESS_LOCKED_REC) {
5725	/ Update the page max trx id field /
5726	/ It might not be necessary to do this if*
5727	err == DB_SUCCESS (no new lock created),
5728	but it should not cost too much performance. /*
5729	page_update_max_trx_id(block,
5730	buf_block_get_page_zip(block),
5731	thr_get_trx(thr)->id, mtr);
5732	err = DB_SUCCESS;
5733	}
5734
5735	return(err);
5736	}
5737
5738	/*******************************************************************//**
5739	Like lock_clust_rec_read_check_and_lock(), but reads a
5740	secondary index record.
5741	@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK /*
5742	dberr_t
5743	lock_sec_rec_read_check_and_lock(
5744	/=============================/
5745	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG*
5746	bit is set, does nothing /*
5747	const buf_block_t* block, /!< in: buffer block of rec /
5748	const rec_t* rec, /!< in: user record or page*
5749	supremum record which should
5750	be read or passed over by a
5751	read cursor /*
5752	dict_index_t* index, /!< in: secondary index /
5753	const ulint* offsets,/!< in: rec_get_offsets(rec, index) /
5754	lock_mode mode, /!< in: mode of the lock which*
5755	the read cursor should set on
5756	records: LOCK_S or LOCK_X; the
5757	latter is possible in
5758	SELECT FOR UPDATE /*
5759	ulint gap_mode,/!< in: LOCK_ORDINARY, LOCK_GAP, or*
5760	LOCK_REC_NOT_GAP /*
5761	que_thr_t* thr) /!< in: query thread /
5762	{
5763	dberr_t err;
5764	ulint heap_no;
5765
5766	ut_ad(!dict_index_is_clust(index));
5767	ut_ad(!dict_index_is_online_ddl(index));
5768	ut_ad(block->frame == page_align(rec));
5769	ut_ad(page_rec_is_user_rec(rec) \|\| page_rec_is_supremum(rec));
5770	ut_ad(rec_offs_validate(rec, index, offsets));
5771	ut_ad(page_rec_is_leaf(rec));
5772	ut_ad(mode == LOCK_X \|\| mode == LOCK_S);
5773
5774	if ((flags & BTR_NO_LOCKING_FLAG)
5775	\|\| srv_read_only_mode
5776	\|\| index->table->is_temporary()) {
5777
5778	return(DB_SUCCESS);
5779	}
5780
5781	ut_ad(!rec_is_default_row(rec, index));
5782	heap_no = page_rec_get_heap_no(rec);
5783
5784	/ Some transaction may have an implicit x-lock on the record only*
5785	if the max trx id for the page >= min trx id for the trx list or a
5786	database recovery is running. /*
5787
5788	if (!page_rec_is_supremum(rec)
5789	&& page_get_max_trx_id(block->frame) >= trx_sys.get_min_trx_id()) {
5790
5791	lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
5792	index, offsets);
5793	}
5794
5795	err = lock_rec_lock(FALSE, ulint(mode) \| gap_mode,
5796	block, heap_no, index, thr);
5797
5798	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5799
5800	return(err);
5801	}
5802
5803	/*******************************************************************//**
5804	Checks if locks of other transactions prevent an immediate read, or passing
5805	over by a read cursor, of a clustered index record. If they do, first tests
5806	if the query thread should anyway be suspended for some reason; if not, then
5807	puts the transaction and the query thread to the lock wait state and inserts a
5808	waiting request for a record lock to the lock queue. Sets the requested mode
5809	lock on the record.
5810	@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK /*
5811	dberr_t
5812	lock_clust_rec_read_check_and_lock(
5813	/===============================/
5814	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG*
5815	bit is set, does nothing /*
5816	const buf_block_t* block, /!< in: buffer block of rec /
5817	const rec_t* rec, /!< in: user record or page*
5818	supremum record which should
5819	be read or passed over by a
5820	read cursor /*
5821	dict_index_t* index, /!< in: clustered index /
5822	const ulint* offsets,/!< in: rec_get_offsets(rec, index) /
5823	lock_mode mode, /!< in: mode of the lock which*
5824	the read cursor should set on
5825	records: LOCK_S or LOCK_X; the
5826	latter is possible in
5827	SELECT FOR UPDATE /*
5828	ulint gap_mode,/!< in: LOCK_ORDINARY, LOCK_GAP, or*
5829	LOCK_REC_NOT_GAP /*
5830	que_thr_t* thr) /!< in: query thread /
5831	{
5832	dberr_t err;
5833	ulint heap_no;
5834
5835	ut_ad(dict_index_is_clust(index));
5836	ut_ad(block->frame == page_align(rec));
5837	ut_ad(page_rec_is_user_rec(rec) \|\| page_rec_is_supremum(rec));
5838	ut_ad(gap_mode == LOCK_ORDINARY \|\| gap_mode == LOCK_GAP
5839	\|\| gap_mode == LOCK_REC_NOT_GAP);
5840	ut_ad(rec_offs_validate(rec, index, offsets));
5841	ut_ad(page_rec_is_leaf(rec));
5842	ut_ad(!rec_is_default_row(rec, index));
5843
5844	if ((flags & BTR_NO_LOCKING_FLAG)
5845	\|\| srv_read_only_mode
5846	\|\| index->table->is_temporary()) {
5847
5848	return(DB_SUCCESS);
5849	}
5850
5851	heap_no = page_rec_get_heap_no(rec);
5852
5853	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
5854
5855	lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
5856	index, offsets);
5857	}
5858
5859	err = lock_rec_lock(FALSE, ulint(mode) \| gap_mode,
5860	block, heap_no, index, thr);
5861
5862	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5863
5864	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
5865
5866	return(err);
5867	}
5868	/*******************************************************************//**
5869	Checks if locks of other transactions prevent an immediate read, or passing
5870	over by a read cursor, of a clustered index record. If they do, first tests
5871	if the query thread should anyway be suspended for some reason; if not, then
5872	puts the transaction and the query thread to the lock wait state and inserts a
5873	waiting request for a record lock to the lock queue. Sets the requested mode
5874	lock on the record. This is an alternative version of
5875	lock_clust_rec_read_check_and_lock() that does not require the parameter
5876	"offsets".
5877	@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK /*
5878	dberr_t
5879	lock_clust_rec_read_check_and_lock_alt(
5880	/===================================/
5881	ulint flags, /!< in: if BTR_NO_LOCKING_FLAG*
5882	bit is set, does nothing /*
5883	const buf_block_t* block, /!< in: buffer block of rec /
5884	const rec_t* rec, /!< in: user record or page*
5885	supremum record which should
5886	be read or passed over by a
5887	read cursor /*
5888	dict_index_t* index, /!< in: clustered index /
5889	lock_mode mode, /!< in: mode of the lock which*
5890	the read cursor should set on
5891	records: LOCK_S or LOCK_X; the
5892	latter is possible in
5893	SELECT FOR UPDATE /*
5894	ulint gap_mode,/!< in: LOCK_ORDINARY, LOCK_GAP, or*
5895	LOCK_REC_NOT_GAP /*
5896	que_thr_t* thr) /!< in: query thread /
5897	{
5898	mem_heap_t* tmp_heap = NULL;
5899	ulint offsets_[REC_OFFS_NORMAL_SIZE];
5900	ulint* offsets = offsets_;
5901	dberr_t err;
5902	rec_offs_init(offsets_);
5903
5904	ut_ad(page_rec_is_leaf(rec));
5905	offsets = rec_get_offsets(rec, index, offsets, true,
5906	ULINT_UNDEFINED, &tmp_heap);
5907	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
5908	offsets, mode, gap_mode, thr);
5909	if (tmp_heap) {
5910	mem_heap_free(tmp_heap);
5911	}
5912
5913	if (err == DB_SUCCESS_LOCKED_REC) {
5914	err = DB_SUCCESS;
5915	}
5916
5917	return(err);
5918	}
5919
5920	/*****************************************************************//**
5921	Release the last lock from the transaction's autoinc locks. /*
5922	UNIV_INLINE
5923	void
5924	lock_release_autoinc_last_lock(
5925	/===========================/
5926	ib_vector_t* autoinc_locks) /!< in/out: vector of AUTOINC locks /
5927	{
5928	ulint last;
5929	lock_t* lock;
5930
5931	ut_ad(lock_mutex_own());
5932	ut_a(!ib_vector_is_empty(autoinc_locks));
5933
5934	/ The lock to be release must be the last lock acquired. /
5935	last = ib_vector_size(autoinc_locks) - `1`;
5936	lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
5937
5938	/ Should have only AUTOINC locks in the vector. /
5939	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
5940	ut_a(lock_get_type(lock) == LOCK_TABLE);
5941
5942	ut_a(lock->un_member.tab_lock.table != NULL);
5943
5944	/ This will remove the lock from the trx autoinc_locks too. /
5945	lock_table_dequeue(lock);
5946
5947	/ Remove from the table vector too. /
5948	lock_trx_table_locks_remove(lock);
5949	}
5950
5951	/*****************************************************************//**
5952	Check if a transaction holds any autoinc locks.
5953	@return TRUE if the transaction holds any AUTOINC locks. /*
5954	static
5955	ibool
5956	lock_trx_holds_autoinc_locks(
5957	/=========================/
5958	const trx_t* trx) /!< in: transaction /
5959	{
5960	ut_a(trx->autoinc_locks != NULL);
5961
5962	return(!ib_vector_is_empty(trx->autoinc_locks));
5963	}
5964
5965	/*****************************************************************//**
5966	Release all the transaction's autoinc locks. /*
5967	static
5968	void
5969	lock_release_autoinc_locks(
5970	/=======================/
5971	trx_t* trx) /!< in/out: transaction /
5972	{
5973	ut_ad(lock_mutex_own());
5974	/ If this is invoked for a running transaction by the thread*
5975	that is serving the transaction, then it is not necessary to
5976	hold trx->mutex here. /*
5977
5978	ut_a(trx->autoinc_locks != NULL);
5979
5980	/ We release the locks in the reverse order. This is to*
5981	avoid searching the vector for the element to delete at
5982	the lower level. See (lock_table_remove_low()) for details. /*
5983	while (!ib_vector_is_empty(trx->autoinc_locks)) {
5984
5985	/ lock_table_remove_low() will also remove the lock from*
5986	the transaction's autoinc_locks vector. /*
5987	lock_release_autoinc_last_lock(trx->autoinc_locks);
5988	}
5989
5990	/ Should release all locks. /
5991	ut_a(ib_vector_is_empty(trx->autoinc_locks));
5992	}
5993
5994	/*****************************************************************//**
5995	Gets the type of a lock. Non-inline version for using outside of the
5996	lock module.
5997	@return LOCK_TABLE or LOCK_REC /*
5998	ulint
5999	lock_get_type(
6000	/==========/
6001	const lock_t* lock) /!< in: lock /
6002	{
6003	return(lock_get_type_low(lock));
6004	}
6005
6006	/*****************************************************************//**
6007	Gets the id of the transaction owning a lock.
6008	@return transaction id /*
6009	trx_id_t
6010	lock_get_trx_id(
6011	/============/
6012	const lock_t* lock) /!< in: lock /
6013	{
6014	return(trx_get_id_for_print(lock->trx));
6015	}
6016
6017	/*****************************************************************//**
6018	Gets the mode of a lock in a human readable string.
6019	The string should not be free()'d or modified.
6020	@return lock mode /*
6021	const char*
6022	lock_get_mode_str(
6023	/==============/
6024	const lock_t* lock) /!< in: lock /
6025	{
6026	ibool is_gap_lock;
6027
6028	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
6029	&& lock_rec_get_gap(lock);
6030
6031	switch (lock_get_mode(lock)) {
6032	case LOCK_S:
6033	if (is_gap_lock) {
6034	return("S,GAP");
6035	} else {
6036	return("S");
6037	}
6038	case LOCK_X:
6039	if (is_gap_lock) {
6040	return("X,GAP");
6041	} else {
6042	return("X");
6043	}
6044	case LOCK_IS:
6045	if (is_gap_lock) {
6046	return("IS,GAP");
6047	} else {
6048	return("IS");
6049	}
6050	case LOCK_IX:
6051	if (is_gap_lock) {
6052	return("IX,GAP");
6053	} else {
6054	return("IX");
6055	}
6056	case LOCK_AUTO_INC:
6057	return("AUTO_INC");
6058	default:
6059	return("UNKNOWN");
6060	}
6061	}
6062
6063	/*****************************************************************//**
6064	Gets the type of a lock in a human readable string.
6065	The string should not be free()'d or modified.
6066	@return lock type /*
6067	const char*
6068	lock_get_type_str(
6069	/==============/
6070	const lock_t* lock) /!< in: lock /
6071	{
6072	switch (lock_get_type_low(lock)) {
6073	case LOCK_REC:
6074	return("RECORD");
6075	case LOCK_TABLE:
6076	return("TABLE");
6077	default:
6078	return("UNKNOWN");
6079	}
6080	}
6081
6082	/*****************************************************************//**
6083	Gets the table on which the lock is.
6084	@return table /*
6085	UNIV_INLINE
6086	dict_table_t*
6087	lock_get_table(
6088	/===========/
6089	const lock_t* lock) /!< in: lock /
6090	{
6091	switch (lock_get_type_low(lock)) {
6092	case LOCK_REC:
6093	ut_ad(dict_index_is_clust(lock->index)
6094	\|\| !dict_index_is_online_ddl(lock->index));
6095	return(lock->index->table);
6096	case LOCK_TABLE:
6097	return(lock->un_member.tab_lock.table);
6098	default:
6099	ut_error;
6100	return(NULL);
6101	}
6102	}
6103
6104	/*****************************************************************//**
6105	Gets the id of the table on which the lock is.
6106	@return id of the table /*
6107	table_id_t
6108	lock_get_table_id(
6109	/==============/
6110	const lock_t* lock) /!< in: lock /
6111	{
6112	dict_table_t* table;
6113
6114	table = lock_get_table(lock);
6115
6116	return(table->id);
6117	}
6118
6119	/* Determine which table a lock is associated with.*
6120	@param[in] lock the lock
6121	@return name of the table /*
6122	const table_name_t&
6123	lock_get_table_name(
6124	const lock_t* lock)
6125	{
6126	return(lock_get_table(lock)->name);
6127	}
6128
6129	/*****************************************************************//**
6130	For a record lock, gets the index on which the lock is.
6131	@return index /*
6132	const dict_index_t*
6133	lock_rec_get_index(
6134	/===============/
6135	const lock_t* lock) /!< in: lock /
6136	{
6137	ut_a(lock_get_type_low(lock) == LOCK_REC);
6138	ut_ad(dict_index_is_clust(lock->index)
6139	\|\| !dict_index_is_online_ddl(lock->index));
6140
6141	return(lock->index);
6142	}
6143
6144	/*****************************************************************//**
6145	For a record lock, gets the name of the index on which the lock is.
6146	The string should not be free()'d or modified.
6147	@return name of the index /*
6148	const char*
6149	lock_rec_get_index_name(
6150	/====================/
6151	const lock_t* lock) /!< in: lock /
6152	{
6153	ut_a(lock_get_type_low(lock) == LOCK_REC);
6154	ut_ad(dict_index_is_clust(lock->index)
6155	\|\| !dict_index_is_online_ddl(lock->index));
6156
6157	return(lock->index->name);
6158	}
6159
6160	/*****************************************************************//**
6161	For a record lock, gets the tablespace number on which the lock is.
6162	@return tablespace number /*
6163	ulint
6164	lock_rec_get_space_id(
6165	/==================/
6166	const lock_t* lock) /!< in: lock /
6167	{
6168	ut_a(lock_get_type_low(lock) == LOCK_REC);
6169
6170	return(lock->un_member.rec_lock.space);
6171	}
6172
6173	/*****************************************************************//**
6174	For a record lock, gets the page number on which the lock is.
6175	@return page number /*
6176	ulint
6177	lock_rec_get_page_no(
6178	/=================/
6179	const lock_t* lock) /!< in: lock /
6180	{
6181	ut_a(lock_get_type_low(lock) == LOCK_REC);
6182
6183	return(lock->un_member.rec_lock.page_no);
6184	}
6185
6186	/*******************************************************************//**
6187	Cancels a waiting lock request and releases possible other transactions
6188	waiting behind it. /*
6189	void
6190	lock_cancel_waiting_and_release(
6191	/============================/
6192	lock_t* lock) /!< in/out: waiting lock request /
6193	{
6194	que_thr_t* thr;
6195
6196	ut_ad(lock_mutex_own());
6197	ut_ad(trx_mutex_own(lock->trx));
6198
6199	lock->trx->lock.cancel = true;
6200
6201	if (lock_get_type_low(lock) == LOCK_REC) {
6202
6203	lock_rec_dequeue_from_page(lock);
6204	} else {
6205	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6206
6207	if (lock->trx->autoinc_locks != NULL) {
6208	/ Release the transaction's AUTOINC locks. /
6209	lock_release_autoinc_locks(lock->trx);
6210	}
6211
6212	lock_table_dequeue(lock);
6213	}
6214
6215	/ Reset the wait flag and the back pointer to lock in trx. /
6216
6217	lock_reset_lock_and_trx_wait(lock);
6218
6219	/ The following function releases the trx from lock wait. /
6220
6221	thr = que_thr_end_lock_wait(lock->trx);
6222
6223	if (thr != NULL) {
6224	lock_wait_release_thread_if_suspended(thr);
6225	}
6226
6227	lock->trx->lock.cancel = false;
6228	}
6229
6230	/*******************************************************************//**
6231	Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6232	function should be called at the the end of an SQL statement, by the
6233	connection thread that owns the transaction (trx->mysql_thd). /*
6234	void
6235	lock_unlock_table_autoinc(
6236	/======================/
6237	trx_t* trx) /!< in/out: transaction /
6238	{
6239	ut_ad(!lock_mutex_own());
6240	ut_ad(!trx_mutex_own(trx));
6241	ut_ad(!trx->lock.wait_lock);
6242
6243	/ This can be invoked on NOT_STARTED, ACTIVE, PREPARED,*
6244	but not COMMITTED transactions. /*
6245
6246	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
6247	\|\| !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6248
6249	/ This function is invoked for a running transaction by the*
6250	thread that is serving the transaction. Therefore it is not
6251	necessary to hold trx->mutex here. /*
6252
6253	if (lock_trx_holds_autoinc_locks(trx)) {
6254	lock_mutex_enter();
6255
6256	lock_release_autoinc_locks(trx);
6257
6258	lock_mutex_exit();
6259	}
6260	}
6261
6262	/*******************************************************************//**
6263	Releases a transaction's locks, and releases possible other transactions
6264	waiting because of these locks. Change the state of the transaction to
6265	TRX_STATE_COMMITTED_IN_MEMORY. /*
6266	void
6267	lock_trx_release_locks(
6268	/===================/
6269	trx_t* trx) /!< in/out: transaction /
6270	{
6271	check_trx_state(trx);
6272	ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)
6273	\|\| trx_state_eq(trx, TRX_STATE_ACTIVE));
6274
6275	bool release_lock = UT_LIST_GET_LEN(trx->lock.trx_locks) > `0`;
6276
6277	/ Don't take lock_sys.mutex if trx didn't acquire any lock. /
6278	if (release_lock) {
6279
6280	/ The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY*
6281	is protected by both the lock_sys.mutex and the trx->mutex. /*
6282	lock_mutex_enter();
6283	}
6284
6285	/ The following assignment makes the transaction committed in memory*
6286	and makes its changes to data visible to other transactions.
6287	NOTE that there is a small discrepancy from the strict formal
6288	visibility rules here: a human user of the database can see
6289	modifications made by another transaction T even before the necessary
6290	log segment has been flushed to the disk. If the database happens to
6291	crash before the flush, the user has seen modifications from T which
6292	will never be a committed transaction. However, any transaction T2
6293	which sees the modifications of the committing transaction T, and
6294	which also itself makes modifications to the database, will get an lsn
6295	larger than the committing transaction T. In the case where the log
6296	flush fails, and T never gets committed, also T2 will never get
6297	committed. /*
6298
6299	/--------------------------------------/
6300	trx_mutex_enter(trx);
6301	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
6302	trx_mutex_exit(trx);
6303	/--------------------------------------/
6304
6305	if (trx->is_referenced()) {
6306
6307	ut_a(release_lock);
6308
6309	lock_mutex_exit();
6310
6311	while (trx->is_referenced()) {
6312
6313	DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6314
6315	/* Doing an implicit to explicit conversion*
6316	should not be expensive. /*
6317	ut_delay(srv_spin_wait_delay);
6318	}
6319
6320	lock_mutex_enter();
6321	}
6322
6323	ut_ad(!trx->is_referenced());
6324
6325	if (release_lock) {
6326
6327	lock_release(trx);
6328
6329	lock_mutex_exit();
6330	}
6331
6332	trx->lock.n_rec_locks = `0`;
6333
6334	/ We don't remove the locks one by one from the vector for*
6335	efficiency reasons. We simply reset it because we would have
6336	released all the locks anyway. /*
6337
6338	trx->lock.table_locks.clear();
6339
6340	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == `0`);
6341	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6342	ut_a(trx->lock.table_locks.empty());
6343
6344	mem_heap_empty(trx->lock.lock_heap);
6345	}
6346
6347	static inline dberr_t lock_trx_handle_wait_low(trx_t* trx)
6348	{
6349	ut_ad(lock_mutex_own());
6350	ut_ad(trx_mutex_own(trx));
6351
6352	if (trx->lock.was_chosen_as_deadlock_victim) {
6353	return DB_DEADLOCK;
6354	}
6355	if (!trx->lock.wait_lock) {
6356	/ The lock was probably granted before we got here. /
6357	return DB_SUCCESS;
6358	}
6359
6360	lock_cancel_waiting_and_release(trx->lock.wait_lock);
6361	return DB_LOCK_WAIT;
6362	}
6363
6364	/*******************************************************************//**
6365	Check whether the transaction has already been rolled back because it
6366	was selected as a deadlock victim, or if it has to wait then cancel
6367	the wait lock.
6368	@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS /*
6369	dberr_t
6370	lock_trx_handle_wait(
6371	/=================/
6372	trx_t* trx) /!< in/out: trx lock state /
6373	{
6374	lock_mutex_enter();
6375	trx_mutex_enter(trx);
6376	dberr_t err = lock_trx_handle_wait_low(trx);
6377	lock_mutex_exit();
6378	trx_mutex_exit(trx);
6379	return err;
6380	}
6381
6382	/*******************************************************************//**
6383	Get the number of locks on a table.
6384	@return number of locks /*
6385	ulint
6386	lock_table_get_n_locks(
6387	/===================/
6388	const dict_table_t* table) /!< in: table /
6389	{
6390	ulint n_table_locks;
6391
6392	lock_mutex_enter();
6393
6394	n_table_locks = UT_LIST_GET_LEN(table->locks);
6395
6396	lock_mutex_exit();
6397
6398	return(n_table_locks);
6399	}
6400
6401	#ifdef UNIV_DEBUG
6402	/**
6403	Do an exhaustive check for any locks (table or rec) against the table.
6404
6405	@param[in] table check if there are any locks held on records in this table
6406	or on the table itself
6407	*/
6408
6409	static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element,
6410	const dict_table_t *table)
6411	{
6412	ut_ad(lock_mutex_own());
6413	mutex_enter(&element->mutex);
6414	if (element->trx)
6415	{
6416	check_trx_state(element->trx);
6417	for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
6418	lock != NULL;
6419	lock= UT_LIST_GET_NEXT(trx_locks, lock))
6420	{
6421	ut_ad(lock->trx == element->trx);
6422	if (lock_get_type_low(lock) == LOCK_REC)
6423	{
6424	ut_ad(!dict_index_is_online_ddl(lock->index) \|\|
6425	dict_index_is_clust(lock->index));
6426	ut_ad(lock->index->table != table);
6427	}
6428	else
6429	ut_ad(lock->un_member.tab_lock.table != table);
6430	}
6431	}
6432	mutex_exit(&element->mutex);
6433	return `0`;
6434	}
6435	#endif /* UNIV_DEBUG */
6436
6437	/*****************************************************************//**
6438	Check if there are any locks (table or rec) against table.
6439	@return true if table has either table or record locks. /*
6440	bool
6441	lock_table_has_locks(
6442	/=================/
6443	const dict_table_t* table) /!< in: check if there are any locks*
6444	held on records in this table or on the
6445	table itself /*
6446	{
6447	ibool has_locks;
6448
6449	ut_ad(table != NULL);
6450	lock_mutex_enter();
6451
6452	has_locks = UT_LIST_GET_LEN(table->locks) > `0` \|\| table->n_rec_locks > `0`;
6453
6454	#ifdef UNIV_DEBUG
6455	if (!has_locks) {
6456	trx_sys.rw_trx_hash.iterate(
6457	reinterpret_cast<my_hash_walk_action>
6458	(lock_table_locks_lookup),
6459	const_cast<dict_table_t*>(table));
6460	}
6461	#endif /* UNIV_DEBUG */
6462
6463	lock_mutex_exit();
6464
6465	return(has_locks);
6466	}
6467
6468	/*****************************************************************//**
6469	Initialise the table lock list. /*
6470	void
6471	lock_table_lock_list_init(
6472	/======================/
6473	table_lock_list_t* lock_list) /!< List to initialise /
6474	{
6475	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
6476	}
6477
6478	/*****************************************************************//**
6479	Initialise the trx lock list. /*
6480	void
6481	lock_trx_lock_list_init(
6482	/====================/
6483	trx_lock_list_t* lock_list) /!< List to initialise /
6484	{
6485	UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
6486	}
6487
6488	/*****************************************************************//**
6489	Set the lock system timeout event. /*
6490	void
6491	lock_set_timeout_event()
6492	/====================/
6493	{
6494	os_event_set(lock_sys.timeout_event);
6495	}
6496
6497	#ifdef UNIV_DEBUG
6498	/*****************************************************************//**
6499	Check if the transaction holds any locks on the sys tables
6500	or its records.
6501	@return the strongest lock found on any sys table or 0 for none /*
6502	const lock_t*
6503	lock_trx_has_sys_table_locks(
6504	/=========================/
6505	const trx_t* trx) /!< in: transaction to check /
6506	{
6507	const lock_t* strongest_lock = `0`;
6508	lock_mode strongest = LOCK_NONE;
6509
6510	lock_mutex_enter();
6511
6512	typedef lock_pool_t::const_reverse_iterator iterator;
6513
6514	iterator end = trx->lock.table_locks.rend();
6515	iterator it = trx->lock.table_locks.rbegin();
6516
6517	/ Find a valid mode. Note: ib_vector_size() can be 0. /
6518
6519	for (/ No op /; it != end; ++it) {
6520	const lock_t* lock = *it;
6521
6522	if (lock != NULL
6523	&& dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
6524
6525	strongest = lock_get_mode(lock);
6526	ut_ad(strongest != LOCK_NONE);
6527	strongest_lock = lock;
6528	break;
6529	}
6530	}
6531
6532	if (strongest == LOCK_NONE) {
6533	lock_mutex_exit();
6534	return(NULL);
6535	}
6536
6537	for (/ No op /; it != end; ++it) {
6538	const lock_t* lock = *it;
6539
6540	if (lock == NULL) {
6541	continue;
6542	}
6543
6544	ut_ad(trx == lock->trx);
6545	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6546	ut_ad(lock->un_member.tab_lock.table != NULL);
6547
6548	lock_mode mode = lock_get_mode(lock);
6549
6550	if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
6551	&& lock_mode_stronger_or_eq(mode, strongest)) {
6552
6553	strongest = mode;
6554	strongest_lock = lock;
6555	}
6556	}
6557
6558	lock_mutex_exit();
6559
6560	return(strongest_lock);
6561	}
6562
6563	/*****************************************************************//**
6564	Check if the transaction holds an exclusive lock on a record.
6565	@return whether the locks are held /*
6566	bool
6567	lock_trx_has_rec_x_lock(
6568	/====================/
6569	const trx_t* trx, /!< in: transaction to check /
6570	const dict_table_t* table, /!< in: table to check /
6571	const buf_block_t* block, /!< in: buffer block of the record /
6572	ulint heap_no)/!< in: record heap number /
6573	{
6574	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
6575
6576	lock_mutex_enter();
6577	ut_a(lock_table_has(trx, table, LOCK_IX)
6578	\|\| table->is_temporary());
6579	ut_a(lock_rec_has_expl(LOCK_X \| LOCK_REC_NOT_GAP,
6580	block, heap_no, trx)
6581	\|\| table->is_temporary());
6582	lock_mutex_exit();
6583	return(true);
6584	}
6585	#endif /* UNIV_DEBUG */
6586
6587	/* rewind(3) the file used for storing the latest detected deadlock and*
6588	print a heading message to stderr if printing of all deadlocks to stderr
6589	is enabled. /*
6590	void
6591	DeadlockChecker::start_print()
6592	{
6593	ut_ad(lock_mutex_own());
6594
6595	rewind(lock_latest_err_file);
6596	ut_print_timestamp(lock_latest_err_file);
6597
6598	if (srv_print_all_deadlocks) {
6599	ib::info () << "Transactions deadlock detected, dumping"
6600	<< " detailed information.";
6601	}
6602	}
6603
6604	/* Print a message to the deadlock file and possibly to stderr.*
6605	@param msg message to print /*
6606	void
6607	DeadlockChecker::print(const char* msg)
6608	{
6609	fputs(msg, lock_latest_err_file);
6610
6611	if (srv_print_all_deadlocks) {
6612	ib::info () << msg;
6613	}
6614	}
6615
6616	/* Print transaction data to the deadlock file and possibly to stderr.*
6617	@param trx transaction
6618	@param max_query_len max query length to print /*
6619	void
6620	DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
6621	{
6622	ut_ad(lock_mutex_own());
6623
6624	ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
6625	ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
6626	ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
6627
6628	trx_print_low(lock_latest_err_file, trx, max_query_len,
6629	n_rec_locks, n_trx_locks, heap_size);
6630
6631	if (srv_print_all_deadlocks) {
6632	trx_print_low(stderr, trx, max_query_len,
6633	n_rec_locks, n_trx_locks, heap_size);
6634	}
6635	}
6636
6637	/* Print lock data to the deadlock file and possibly to stderr.*
6638	@param lock record or table type lock /*
6639	void
6640	DeadlockChecker::print(const lock_t* lock)
6641	{
6642	ut_ad(lock_mutex_own());
6643
6644	if (lock_get_type_low(lock) == LOCK_REC) {
6645	lock_rec_print(lock_latest_err_file, lock);
6646
6647	if (srv_print_all_deadlocks) {
6648	lock_rec_print(stderr, lock);
6649	}
6650	} else {
6651	lock_table_print(lock_latest_err_file, lock);
6652
6653	if (srv_print_all_deadlocks) {
6654	lock_table_print(stderr, lock);
6655	}
6656	}
6657	}
6658
6659	/* Get the next lock in the queue that is owned by a transaction whose*
6660	sub-tree has not already been searched.
6661	Note: "next" here means PREV for table locks.
6662
6663	@param lock Lock in queue
6664	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
6665
6666	@return next lock or NULL if at end of queue /*
6667	const lock_t*
6668	DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
6669	{
6670	ut_ad(lock_mutex_own());
6671
6672	do {
6673	if (lock_get_type_low(lock) == LOCK_REC) {
6674	ut_ad(heap_no != ULINT_UNDEFINED);
6675	lock = lock_rec_get_next_const(heap_no, lock);
6676	} else {
6677	ut_ad(heap_no == ULINT_UNDEFINED);
6678	ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
6679
6680	lock = UT_LIST_GET_NEXT(
6681	un_member.tab_lock.locks, lock);
6682	}
6683
6684	} while (lock != NULL && is_visited(lock));
6685
6686	ut_ad(lock == NULL
6687	\|\| lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
6688
6689	return(lock);
6690	}
6691
6692	/* Get the first lock to search. The search starts from the current*
6693	wait_lock. What we are really interested in is an edge from the
6694	current wait_lock's owning transaction to another transaction that has
6695	a lock ahead in the queue. We skip locks where the owning transaction's
6696	sub-tree has already been searched.
6697
6698	Note: The record locks are traversed from the oldest lock to the
6699	latest. For table locks we go from latest to oldest.
6700
6701	For record locks, we first position the "iterator" on the first lock on
6702	the page and then reposition on the actual heap_no. This is required
6703	due to the way the record lock has is implemented.
6704
6705	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
6706	@return first lock or NULL /*
6707	const lock_t*
6708	DeadlockChecker::get_first_lock(ulint* heap_no) const
6709	{
6710	ut_ad(lock_mutex_own());
6711
6712	const lock_t* lock = m_wait_lock;
6713
6714	if (lock_get_type_low(lock) == LOCK_REC) {
6715	hash_table_t* lock_hash;
6716
6717	lock_hash = lock->type_mode & LOCK_PREDICATE
6718	? lock_sys.prdt_hash
6719	: lock_sys.rec_hash;
6720
6721	/ We are only interested in records that match the heap_no. /
6722	*heap_no = lock_rec_find_set_bit(lock);
6723
6724	ut_ad(*heap_no <= `0xffff`);
6725	ut_ad(*heap_no != ULINT_UNDEFINED);
6726
6727	/ Find the locks on the page. /
6728	lock = lock_rec_get_first_on_page_addr(
6729	lock_hash,
6730	lock->un_member.rec_lock.space,
6731	lock->un_member.rec_lock.page_no);
6732
6733	/ Position on the first lock on the physical record./
6734	if (!lock_rec_get_nth_bit(lock, *heap_no)) {
6735	lock = lock_rec_get_next_const(*heap_no, lock);
6736	}
6737
6738	ut_a(!lock_get_wait(lock));
6739	} else {
6740	/ Table locks don't care about the heap_no. /
6741	*heap_no = ULINT_UNDEFINED;
6742	ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
6743	dict_table_t* table = lock->un_member.tab_lock.table;
6744	lock = UT_LIST_GET_FIRST(table->locks);
6745	}
6746
6747	/ Must find at least two locks, otherwise there cannot be a*
6748	waiting lock, secondly the first lock cannot be the wait_lock. /*
6749	ut_a(lock != NULL);
6750	ut_a(lock != m_wait_lock \|\|
6751	(innodb_lock_schedule_algorithm
6752	== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
6753	&& !thd_is_replication_slave_thread(lock->trx->mysql_thd)));
6754
6755	/ Check that the lock type doesn't change. /
6756	ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
6757
6758	return(lock);
6759	}
6760
6761	/* Notify that a deadlock has been detected and print the conflicting*
6762	transaction info.
6763	@param lock lock causing deadlock /*
6764	void
6765	DeadlockChecker::notify(const lock_t* lock) const
6766	{
6767	ut_ad(lock_mutex_own());
6768
6769	start_print();
6770
6771	print("\n*** (1) TRANSACTION:\n");
6772
6773	print(m_wait_lock->trx, `3000`);
6774
6775	print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
6776
6777	print(m_wait_lock);
6778
6779	print("*** (2) TRANSACTION:\n");
6780
6781	print(lock->trx, `3000`);
6782
6783	print("*** (2) HOLDS THE LOCK(S):\n");
6784
6785	print(lock);
6786
6787	/ It is possible that the joining transaction was granted its*
6788	lock when we rolled back some other waiting transaction. /*
6789
6790	if (m_start->lock.wait_lock != `0`) {
6791	print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
6792
6793	print(m_start->lock.wait_lock);
6794	}
6795
6796	DBUG_PRINT("ib_lock", ("deadlock detected"));
6797	}
6798
6799	/* Select the victim transaction that should be rolledback.*
6800	@return victim transaction /*
6801	const trx_t*
6802	DeadlockChecker::select_victim() const
6803	{
6804	ut_ad(lock_mutex_own());
6805	ut_ad(m_start->lock.wait_lock != `0`);
6806	ut_ad(m_wait_lock->trx != m_start);
6807
6808	if (trx_weight_ge(m_wait_lock->trx, m_start)) {
6809	/ The joining transaction is 'smaller',*
6810	choose it as the victim and roll it back. /*
6811	#ifdef WITH_WSREP
6812	if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE)) {
6813	return(m_wait_lock->trx);
6814	}
6815	#endif /* WITH_WSREP */
6816	return(m_start);
6817	}
6818
6819	#ifdef WITH_WSREP
6820	if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE)) {
6821	return(m_start);
6822	}
6823	#endif /* WITH_WSREP */
6824
6825	return(m_wait_lock->trx);
6826	}
6827
6828	/* Looks iteratively for a deadlock. Note: the joining transaction may*
6829	have been granted its lock by the deadlock checks.
6830	@return 0 if no deadlock else the victim transaction instance./*
6831	const trx_t*
6832	DeadlockChecker::search()
6833	{
6834	ut_ad(lock_mutex_own());
6835	ut_ad(!trx_mutex_own(m_start));
6836
6837	ut_ad(m_start != NULL);
6838	ut_ad(m_wait_lock != NULL);
6839	check_trx_state(m_wait_lock->trx);
6840	ut_ad(m_mark_start <= s_lock_mark_counter);
6841
6842	/ Look at the locks ahead of wait_lock in the lock queue. /
6843	ulint heap_no;
6844	const lock_t* lock = get_first_lock(&heap_no);
6845
6846	for (;;) {
6847	/ We should never visit the same sub-tree more than once. /
6848	ut_ad(lock == NULL \|\| !is_visited(lock));
6849
6850	while (m_n_elems > `0` && lock == NULL) {
6851
6852	/ Restore previous search state. /
6853
6854	pop(lock, heap_no);
6855
6856	lock = get_next_lock(lock, heap_no);
6857	}
6858
6859	if (lock == NULL) {
6860	break;
6861	}
6862
6863	if (lock == m_wait_lock) {
6864
6865	/ We can mark this subtree as searched /
6866	ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
6867
6868	lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
6869
6870	/ We are not prepared for an overflow. This 64-bit*
6871	counter should never wrap around. At 10^9 increments
6872	per second, it would take 10^3 years of uptime. /*
6873
6874	ut_ad(s_lock_mark_counter > `0`);
6875
6876	/ Backtrack /
6877	lock = NULL;
6878	continue;
6879	}
6880
6881	if (!lock_has_to_wait(m_wait_lock, lock)) {
6882	/ No conflict, next lock /
6883	lock = get_next_lock(lock, heap_no);
6884	continue;
6885	}
6886
6887	if (lock->trx == m_start) {
6888	/ Found a cycle. /
6889	notify(lock);
6890	return select_victim();
6891	}
6892
6893	if (is_too_deep()) {
6894	/ Search too deep to continue. /
6895	m_too_deep = true;
6896	return m_start;
6897	}
6898
6899	/ We do not need to report autoinc locks to the upper*
6900	layer. These locks are released before commit, so they
6901	can not cause deadlocks with binlog-fixed commit
6902	order. /*
6903	if (m_report_waiters
6904	&& (lock_get_type_low(lock) != LOCK_TABLE
6905	\|\| lock_get_mode(lock) != LOCK_AUTO_INC)) {
6906	thd_rpl_deadlock_check(m_start->mysql_thd,
6907	lock->trx->mysql_thd);
6908	}
6909
6910	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
6911	/ Another trx ahead has requested a lock in an*
6912	incompatible mode, and is itself waiting for a lock. /*
6913
6914	++m_cost;
6915
6916	if (!push(lock, heap_no)) {
6917	m_too_deep = true;
6918	return m_start;
6919	}
6920
6921	m_wait_lock = lock->trx->lock.wait_lock;
6922
6923	lock = get_first_lock(&heap_no);
6924
6925	if (is_visited(lock)) {
6926	lock = get_next_lock(lock, heap_no);
6927	}
6928	} else {
6929	lock = get_next_lock(lock, heap_no);
6930	}
6931	}
6932
6933	ut_a(lock == NULL && m_n_elems == `0`);
6934
6935	/ No deadlock found. /
6936	return(`0`);
6937	}
6938
6939	/* Print info about transaction that was rolled back.*
6940	@param trx transaction rolled back
6941	@param lock lock trx wants /*
6942	void
6943	DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock)
6944	{
6945	ut_ad(lock_mutex_own());
6946
6947	/ If the lock search exceeds the max step*
6948	or the max depth, the current trx will be
6949	the victim. Print its information. /*
6950	start_print();
6951
6952	print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
6953	" WAITS-FOR GRAPH, WE WILL ROLL BACK"
6954	" FOLLOWING TRANSACTION \n\n"
6955	"*** TRANSACTION:\n");
6956
6957	print(trx, `3000`);
6958
6959	print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
6960
6961	print(lock);
6962	}
6963
6964	/* Rollback transaction selected as the victim. /
6965	void
6966	DeadlockChecker::trx_rollback()
6967	{
6968	ut_ad(lock_mutex_own());
6969
6970	trx_t* trx = m_wait_lock->trx;
6971
6972	print("*** WE ROLL BACK TRANSACTION (1)\n");
6973
6974	trx_mutex_enter(trx);
6975
6976	trx->lock.was_chosen_as_deadlock_victim = true;
6977
6978	lock_cancel_waiting_and_release(trx->lock.wait_lock);
6979
6980	trx_mutex_exit(trx);
6981	}
6982
6983	/* Checks if a joining lock request results in a deadlock. If a deadlock is*
6984	found this function will resolve the deadlock by choosing a victim transaction
6985	and rolling it back. It will attempt to resolve all deadlocks. The returned
6986	transaction id will be the joining transaction instance or NULL if some other
6987	transaction was chosen as a victim and rolled back or no deadlock found.
6988
6989	@param[in] lock lock the transaction is requesting
6990	@param[in,out] trx transaction requesting the lock
6991
6992	@return transaction instanace chosen as victim or 0 /*
6993	const trx_t*
6994	DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
6995	{
6996	ut_ad(lock_mutex_own());
6997	ut_ad(trx_mutex_own(trx));
6998	check_trx_state(trx);
6999	ut_ad(!srv_read_only_mode);
7000
7001	if (!innobase_deadlock_detect) {
7002	return(NULL);
7003	}
7004
7005	/ Release the mutex to obey the latching order.*
7006	This is safe, because DeadlockChecker::check_and_resolve()
7007	is invoked when a lock wait is enqueued for the currently
7008	running transaction. Because m_trx is a running transaction
7009	(it is not currently suspended because of a lock wait),
7010	its state can only be changed by this thread, which is
7011	currently associated with the transaction. /*
7012
7013	trx_mutex_exit(trx);
7014
7015	const trx_t* victim_trx;
7016	const bool report_waiters = trx->mysql_thd
7017	&& thd_need_wait_reports(trx->mysql_thd);
7018
7019	/ Try and resolve as many deadlocks as possible. /
7020	do {
7021	DeadlockChecker checker(trx, lock, s_lock_mark_counter,
7022	report_waiters);
7023
7024	victim_trx = checker.search();
7025
7026	/ Search too deep, we rollback the joining transaction only*
7027	if it is possible to rollback. Otherwise we rollback the
7028	transaction that is holding the lock that the joining
7029	transaction wants. /*
7030	if (checker.is_too_deep()) {
7031
7032	ut_ad(trx == checker.m_start);
7033	ut_ad(trx == victim_trx);
7034
7035	rollback_print(victim_trx, lock);
7036
7037	MONITOR_INC(MONITOR_DEADLOCK);
7038
7039	break;
7040
7041	} else if (victim_trx != NULL && victim_trx != trx) {
7042
7043	ut_ad(victim_trx == checker.m_wait_lock->trx);
7044
7045	checker.trx_rollback();
7046
7047	lock_deadlock_found = true;
7048
7049	MONITOR_INC(MONITOR_DEADLOCK);
7050	}
7051
7052	} while (victim_trx != NULL && victim_trx != trx);
7053
7054	/ If the joining transaction was selected as the victim. /
7055	if (victim_trx != NULL) {
7056
7057	print("*** WE ROLL BACK TRANSACTION (2)\n");
7058
7059	lock_deadlock_found = true;
7060	}
7061
7062	trx_mutex_enter(trx);
7063
7064	return(victim_trx);
7065	}
7066
7067	/**
7068	Allocate cached locks for the transaction.
7069	@param trx allocate cached record locks for this transaction /*
7070	void
7071	lock_trx_alloc_locks(trx_t* trx)
7072	{
7073	ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
7074	byte* ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7075
7076	/ We allocate one big chunk and then distribute it among*
7077	the rest of the elements. The allocated chunk pointer is always
7078	at index 0. /*
7079
7080	for (ulint i = `0`; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
7081	trx->lock.rec_pool.push_back(
7082	reinterpret_cast<ib_lock_t*>(ptr));
7083	}
7084
7085	sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
7086	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7087
7088	for (ulint i = `0`; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
7089	trx->lock.table_pool.push_back(
7090	reinterpret_cast<ib_lock_t*>(ptr));
7091	}
7092
7093	}
7094	/***********************************************************//**
7095	Updates the lock table when a page is split and merged to
7096	two pages. /*
7097	UNIV_INTERN
7098	void
7099	lock_update_split_and_merge(
7100	const buf_block_t* left_block, /!< in: left page to which merged /
7101	const rec_t* orig_pred, /!< in: original predecessor of*
7102	supremum on the left page before merge/*
7103	const buf_block_t* right_block) /!< in: right page from which merged /
7104	{
7105	const rec_t* left_next_rec;
7106
7107	ut_ad(page_is_leaf(left_block->frame));
7108	ut_ad(page_is_leaf(right_block->frame));
7109	ut_ad(page_align(orig_pred) == left_block->frame);
7110
7111	lock_mutex_enter();
7112
7113	left_next_rec = page_rec_get_next_const(orig_pred);
7114	ut_ad(!page_rec_is_default_row(left_next_rec));
7115
7116	/ Inherit the locks on the supremum of the left page to the*
7117	first record which was moved from the right page /*
7118	lock_rec_inherit_to_gap(
7119	left_block, left_block,
7120	page_rec_get_heap_no(left_next_rec),
7121	PAGE_HEAP_NO_SUPREMUM);
7122
7123	/ Reset the locks on the supremum of the left page,*
7124	releasing waiting transactions /*
7125	lock_rec_reset_and_release_wait(left_block,
7126	PAGE_HEAP_NO_SUPREMUM);
7127
7128	/ Inherit the locks to the supremum of the left page from the*
7129	successor of the infimum on the right page /*
7130	lock_rec_inherit_to_gap(left_block, right_block,
7131	PAGE_HEAP_NO_SUPREMUM,
7132	lock_get_min_heap_no(right_block));
7133
7134	lock_mutex_exit();
7135	}
7136

Browse the source code of MariaDB/storage/innobase/lock/lock0lock.cc