1/*****************************************************************************
2
3Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2014, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file lock/lock0lock.cc
22The transaction lock system
23
24Created 5/7/1996 Heikki Tuuri
25*******************************************************/
26
27#define LOCK_MODULE_IMPLEMENTATION
28
29
30#include "ha_prototypes.h"
31
32#include <mysql/service_thd_error_context.h>
33#include <sql_class.h>
34
35#include "lock0lock.h"
36#include "lock0priv.h"
37#include "dict0mem.h"
38#include "trx0purge.h"
39#include "trx0sys.h"
40#include "srv0mon.h"
41#include "ut0vec.h"
42#include "btr0btr.h"
43#include "dict0boot.h"
44#include "ut0new.h"
45#include "row0sel.h"
46#include "row0mysql.h"
47#include "row0vers.h"
48#include "pars0pars.h"
49
50#include <set>
51
52#ifdef WITH_WSREP
53#include <mysql/service_wsrep.h>
54#endif /* WITH_WSREP */
55
56/** Lock scheduling algorithm */
57ulong innodb_lock_schedule_algorithm;
58
59/** The value of innodb_deadlock_detect */
60my_bool innobase_deadlock_detect;
61
62/** Total number of cached record locks */
63static const ulint REC_LOCK_CACHE = 8;
64
65/** Maximum record lock size in bytes */
66static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
67
68/** Total number of cached table locks */
69static const ulint TABLE_LOCK_CACHE = 8;
70
71/** Size in bytes, of the table lock instance */
72static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
73
74/*********************************************************************//**
75Checks if a waiting record lock request still has to wait in a queue.
76@return lock that is causing the wait */
77static
78const lock_t*
79lock_rec_has_to_wait_in_queue(
80/*==========================*/
81 const lock_t* wait_lock); /*!< in: waiting record lock */
82
83/** Grant a lock to a waiting lock request and release the waiting transaction
84after lock_reset_lock_and_trx_wait() has been called. */
85static void lock_grant_after_reset(lock_t* lock);
86
87extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
88extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
89extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
90
91/** Print info of a table lock.
92@param[in,out] file output stream
93@param[in] lock table lock */
94static
95void
96lock_table_print(FILE* file, const lock_t* lock);
97
98/** Print info of a record lock.
99@param[in,out] file output stream
100@param[in] lock record lock */
101static
102void
103lock_rec_print(FILE* file, const lock_t* lock);
104
105/** Deadlock checker. */
106class DeadlockChecker {
107public:
108 /** Checks if a joining lock request results in a deadlock. If
109 a deadlock is found this function will resolve the deadlock
110 by choosing a victim transaction and rolling it back. It
111 will attempt to resolve all deadlocks. The returned transaction
112 id will be the joining transaction id or 0 if some other
113 transaction was chosen as a victim and rolled back or no
114 deadlock found.
115
116 @param lock lock the transaction is requesting
117 @param trx transaction requesting the lock
118
119 @return id of transaction chosen as victim or 0 */
120 static const trx_t* check_and_resolve(
121 const lock_t* lock,
122 trx_t* trx);
123
124private:
125 /** Do a shallow copy. Default destructor OK.
126 @param trx the start transaction (start node)
127 @param wait_lock lock that a transaction wants
128 @param mark_start visited node counter */
129 DeadlockChecker(
130 const trx_t* trx,
131 const lock_t* wait_lock,
132 ib_uint64_t mark_start,
133 bool report_waiters)
134 :
135 m_cost(),
136 m_start(trx),
137 m_too_deep(),
138 m_wait_lock(wait_lock),
139 m_mark_start(mark_start),
140 m_n_elems(),
141 m_report_waiters(report_waiters)
142 {
143 }
144
145 /** Check if the search is too deep. */
146 bool is_too_deep() const
147 {
148 return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
149 || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
150 }
151
152 /** Save current state.
153 @param lock lock to push on the stack.
154 @param heap_no the heap number to push on the stack.
155 @return false if stack is full. */
156 bool push(const lock_t* lock, ulint heap_no)
157 {
158 ut_ad((lock_get_type_low(lock) & LOCK_REC)
159 || (lock_get_type_low(lock) & LOCK_TABLE));
160
161 ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
162 == (heap_no == ULINT_UNDEFINED));
163
164 /* Ensure that the stack is bounded. */
165 if (m_n_elems >= UT_ARR_SIZE(s_states)) {
166 return(false);
167 }
168
169 state_t& state = s_states[m_n_elems++];
170
171 state.m_lock = lock;
172 state.m_wait_lock = m_wait_lock;
173 state.m_heap_no =heap_no;
174
175 return(true);
176 }
177
178 /** Restore state.
179 @param[out] lock current lock
180 @param[out] heap_no current heap_no */
181 void pop(const lock_t*& lock, ulint& heap_no)
182 {
183 ut_a(m_n_elems > 0);
184
185 const state_t& state = s_states[--m_n_elems];
186
187 lock = state.m_lock;
188 heap_no = state.m_heap_no;
189 m_wait_lock = state.m_wait_lock;
190 }
191
192 /** Check whether the node has been visited.
193 @param lock lock to check
194 @return true if the node has been visited */
195 bool is_visited(const lock_t* lock) const
196 {
197 return(lock->trx->lock.deadlock_mark > m_mark_start);
198 }
199
200 /** Get the next lock in the queue that is owned by a transaction
201 whose sub-tree has not already been searched.
202 Note: "next" here means PREV for table locks.
203 @param lock Lock in queue
204 @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
205 @return next lock or NULL if at end of queue */
206 const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
207
208 /** Get the first lock to search. The search starts from the current
209 wait_lock. What we are really interested in is an edge from the
210 current wait_lock's owning transaction to another transaction that has
211 a lock ahead in the queue. We skip locks where the owning transaction's
212 sub-tree has already been searched.
213
214 Note: The record locks are traversed from the oldest lock to the
215 latest. For table locks we go from latest to oldest.
216
217 For record locks, we first position the iterator on first lock on
218 the page and then reposition on the actual heap_no. This is required
219 due to the way the record lock has is implemented.
220
221 @param[out] heap_no if rec lock, else ULINT_UNDEFINED.
222
223 @return first lock or NULL */
224 const lock_t* get_first_lock(ulint* heap_no) const;
225
226 /** Notify that a deadlock has been detected and print the conflicting
227 transaction info.
228 @param lock lock causing deadlock */
229 void notify(const lock_t* lock) const;
230
231 /** Select the victim transaction that should be rolledback.
232 @return victim transaction */
233 const trx_t* select_victim() const;
234
235 /** Rollback transaction selected as the victim. */
236 void trx_rollback();
237
238 /** Looks iteratively for a deadlock. Note: the joining transaction
239 may have been granted its lock by the deadlock checks.
240
241 @return 0 if no deadlock else the victim transaction.*/
242 const trx_t* search();
243
244 /** Print transaction data to the deadlock file and possibly to stderr.
245 @param trx transaction
246 @param max_query_len max query length to print */
247 static void print(const trx_t* trx, ulint max_query_len);
248
249 /** rewind(3) the file used for storing the latest detected deadlock
250 and print a heading message to stderr if printing of all deadlocks to
251 stderr is enabled. */
252 static void start_print();
253
254 /** Print lock data to the deadlock file and possibly to stderr.
255 @param lock record or table type lock */
256 static void print(const lock_t* lock);
257
258 /** Print a message to the deadlock file and possibly to stderr.
259 @param msg message to print */
260 static void print(const char* msg);
261
262 /** Print info about transaction that was rolled back.
263 @param trx transaction rolled back
264 @param lock lock trx wants */
265 static void rollback_print(const trx_t* trx, const lock_t* lock);
266
267private:
268 /** DFS state information, used during deadlock checking. */
269 struct state_t {
270 const lock_t* m_lock; /*!< Current lock */
271 const lock_t* m_wait_lock; /*!< Waiting for lock */
272 ulint m_heap_no; /*!< heap number if rec lock */
273 };
274
275 /** Used in deadlock tracking. Protected by lock_sys.mutex. */
276 static ib_uint64_t s_lock_mark_counter;
277
278 /** Calculation steps thus far. It is the count of the nodes visited. */
279 ulint m_cost;
280
281 /** Joining transaction that is requesting a lock in an
282 incompatible mode */
283 const trx_t* m_start;
284
285 /** TRUE if search was too deep and was aborted */
286 bool m_too_deep;
287
288 /** Lock that trx wants */
289 const lock_t* m_wait_lock;
290
291 /** Value of lock_mark_count at the start of the deadlock check. */
292 ib_uint64_t m_mark_start;
293
294 /** Number of states pushed onto the stack */
295 size_t m_n_elems;
296
297 /** This is to avoid malloc/free calls. */
298 static state_t s_states[MAX_STACK_SIZE];
299
300 /** Set if thd_rpl_deadlock_check() should be called for waits. */
301 const bool m_report_waiters;
302};
303
304/** Counter to mark visited nodes during deadlock search. */
305ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0;
306
307/** The stack used for deadlock searches. */
308DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE];
309
310#ifdef UNIV_DEBUG
311/*********************************************************************//**
312Validates the lock system.
313@return TRUE if ok */
314static
315bool
316lock_validate();
317/*============*/
318
319/*********************************************************************//**
320Validates the record lock queues on a page.
321@return TRUE if ok */
322static
323ibool
324lock_rec_validate_page(
325/*===================*/
326 const buf_block_t* block) /*!< in: buffer block */
327 MY_ATTRIBUTE((warn_unused_result));
328#endif /* UNIV_DEBUG */
329
330/* The lock system */
331lock_sys_t lock_sys;
332
333/** We store info on the latest deadlock error to this buffer. InnoDB
334Monitor will then fetch it and print */
335static bool lock_deadlock_found = false;
336
337/** Only created if !srv_read_only_mode */
338static FILE* lock_latest_err_file;
339
340/*********************************************************************//**
341Reports that a transaction id is insensible, i.e., in the future. */
342void
343lock_report_trx_id_insanity(
344/*========================*/
345 trx_id_t trx_id, /*!< in: trx id */
346 const rec_t* rec, /*!< in: user record */
347 dict_index_t* index, /*!< in: index */
348 const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
349 trx_id_t max_trx_id) /*!< in: trx_sys.get_max_trx_id() */
350{
351 ut_ad(rec_offs_validate(rec, index, offsets));
352 ut_ad(!rec_is_default_row(rec, index));
353
354 ib::error()
355 << "Transaction id " << trx_id
356 << " associated with record" << rec_offsets_print(rec, offsets)
357 << " in index " << index->name
358 << " of table " << index->table->name
359 << " is greater than the global counter " << max_trx_id
360 << "! The table is corrupted.";
361}
362
363/*********************************************************************//**
364Checks that a transaction id is sensible, i.e., not in the future.
365@return true if ok */
366bool
367lock_check_trx_id_sanity(
368/*=====================*/
369 trx_id_t trx_id, /*!< in: trx id */
370 const rec_t* rec, /*!< in: user record */
371 dict_index_t* index, /*!< in: index */
372 const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
373{
374 ut_ad(rec_offs_validate(rec, index, offsets));
375 ut_ad(!rec_is_default_row(rec, index));
376
377 trx_id_t max_trx_id = trx_sys.get_max_trx_id();
378 ut_ad(max_trx_id || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
379
380 if (max_trx_id && trx_id >= max_trx_id) {
381 lock_report_trx_id_insanity(
382 trx_id, rec, index, offsets, max_trx_id);
383 return false;
384 }
385 return(true);
386}
387
388/*********************************************************************//**
389Checks that a record is seen in a consistent read.
390@return true if sees, or false if an earlier version of the record
391should be retrieved */
392bool
393lock_clust_rec_cons_read_sees(
394/*==========================*/
395 const rec_t* rec, /*!< in: user record which should be read or
396 passed over by a read cursor */
397 dict_index_t* index, /*!< in: clustered index */
398 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
399 ReadView* view) /*!< in: consistent read view */
400{
401 ut_ad(dict_index_is_clust(index));
402 ut_ad(page_rec_is_user_rec(rec));
403 ut_ad(rec_offs_validate(rec, index, offsets));
404 ut_ad(!rec_is_default_row(rec, index));
405
406 /* Temp-tables are not shared across connections and multiple
407 transactions from different connections cannot simultaneously
408 operate on same temp-table and so read of temp-table is
409 always consistent read. */
410 if (index->table->is_temporary()) {
411 return(true);
412 }
413
414 /* NOTE that we call this function while holding the search
415 system latch. */
416
417 trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
418
419 return(view->changes_visible(trx_id, index->table->name));
420}
421
422/*********************************************************************//**
423Checks that a non-clustered index record is seen in a consistent read.
424
425NOTE that a non-clustered index page contains so little information on
426its modifications that also in the case false, the present version of
427rec may be the right, but we must check this from the clustered index
428record.
429
430@return true if certainly sees, or false if an earlier version of the
431clustered index record might be needed */
432bool
433lock_sec_rec_cons_read_sees(
434/*========================*/
435 const rec_t* rec, /*!< in: user record which
436 should be read or passed over
437 by a read cursor */
438 const dict_index_t* index, /*!< in: index */
439 const ReadView* view) /*!< in: consistent read view */
440{
441 ut_ad(page_rec_is_user_rec(rec));
442 ut_ad(!index->is_primary());
443 ut_ad(!rec_is_default_row(rec, index));
444
445 /* NOTE that we might call this function while holding the search
446 system latch. */
447
448 if (index->table->is_temporary()) {
449
450 /* Temp-tables are not shared across connections and multiple
451 transactions from different connections cannot simultaneously
452 operate on same temp-table and so read of temp-table is
453 always consistent read. */
454
455 return(true);
456 }
457
458 trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
459
460 ut_ad(max_trx_id > 0);
461
462 return(view->sees(max_trx_id));
463}
464
465
466/**
467 Creates the lock system at database start.
468
469 @param[in] n_cells number of slots in lock hash table
470*/
471void lock_sys_t::create(ulint n_cells)
472{
473 ut_ad(this == &lock_sys);
474
475 m_initialised= true;
476
477 waiting_threads = static_cast<srv_slot_t*>
478 (ut_zalloc_nokey(srv_max_n_threads * sizeof *waiting_threads));
479 last_slot = waiting_threads;
480
481 mutex_create(LATCH_ID_LOCK_SYS, &mutex);
482
483 mutex_create(LATCH_ID_LOCK_SYS_WAIT, &wait_mutex);
484
485 timeout_event = os_event_create(0);
486
487 rec_hash = hash_create(n_cells);
488 prdt_hash = hash_create(n_cells);
489 prdt_page_hash = hash_create(n_cells);
490
491 if (!srv_read_only_mode) {
492 lock_latest_err_file = os_file_create_tmpfile();
493 ut_a(lock_latest_err_file);
494 }
495}
496
497/** Calculates the fold value of a lock: used in migrating the hash table.
498@param[in] lock record lock object
499@return folded value */
500static
501ulint
502lock_rec_lock_fold(
503 const lock_t* lock)
504{
505 return(lock_rec_fold(lock->un_member.rec_lock.space,
506 lock->un_member.rec_lock.page_no));
507}
508
509
510/**
511 Resize the lock hash table.
512
513 @param[in] n_cells number of slots in lock hash table
514*/
515void lock_sys_t::resize(ulint n_cells)
516{
517 ut_ad(this == &lock_sys);
518
519 mutex_enter(&mutex);
520
521 hash_table_t* old_hash = rec_hash;
522 rec_hash = hash_create(n_cells);
523 HASH_MIGRATE(old_hash, rec_hash, lock_t, hash,
524 lock_rec_lock_fold);
525 hash_table_free(old_hash);
526
527 old_hash = prdt_hash;
528 prdt_hash = hash_create(n_cells);
529 HASH_MIGRATE(old_hash, prdt_hash, lock_t, hash,
530 lock_rec_lock_fold);
531 hash_table_free(old_hash);
532
533 old_hash = prdt_page_hash;
534 prdt_page_hash = hash_create(n_cells);
535 HASH_MIGRATE(old_hash, prdt_page_hash, lock_t, hash,
536 lock_rec_lock_fold);
537 hash_table_free(old_hash);
538
539 /* need to update block->lock_hash_val */
540 for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
541 buf_pool_t* buf_pool = buf_pool_from_array(i);
542
543 buf_pool_mutex_enter(buf_pool);
544 buf_page_t* bpage;
545 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
546
547 while (bpage != NULL) {
548 if (buf_page_get_state(bpage)
549 == BUF_BLOCK_FILE_PAGE) {
550 buf_block_t* block;
551 block = reinterpret_cast<buf_block_t*>(
552 bpage);
553
554 block->lock_hash_val
555 = lock_rec_hash(
556 bpage->id.space(),
557 bpage->id.page_no());
558 }
559 bpage = UT_LIST_GET_NEXT(LRU, bpage);
560 }
561 buf_pool_mutex_exit(buf_pool);
562 }
563
564 mutex_exit(&mutex);
565}
566
567
568/** Closes the lock system at database shutdown. */
569void lock_sys_t::close()
570{
571 ut_ad(this == &lock_sys);
572
573 if (!m_initialised) return;
574
575 if (lock_latest_err_file != NULL) {
576 fclose(lock_latest_err_file);
577 lock_latest_err_file = NULL;
578 }
579
580 hash_table_free(rec_hash);
581 hash_table_free(prdt_hash);
582 hash_table_free(prdt_page_hash);
583
584 os_event_destroy(timeout_event);
585
586 mutex_destroy(&mutex);
587 mutex_destroy(&wait_mutex);
588
589 for (ulint i = srv_max_n_threads; i--; ) {
590 if (os_event_t& event = waiting_threads[i].event) {
591 os_event_destroy(event);
592 }
593 }
594
595 ut_free(waiting_threads);
596 m_initialised= false;
597}
598
599/*********************************************************************//**
600Gets the size of a lock struct.
601@return size in bytes */
602ulint
603lock_get_size(void)
604/*===============*/
605{
606 return((ulint) sizeof(lock_t));
607}
608
609static inline void lock_grant_have_trx_mutex(lock_t* lock)
610{
611 lock_reset_lock_and_trx_wait(lock);
612 lock_grant_after_reset(lock);
613}
614
615/*********************************************************************//**
616Gets the gap flag of a record lock.
617@return LOCK_GAP or 0 */
618UNIV_INLINE
619ulint
620lock_rec_get_gap(
621/*=============*/
622 const lock_t* lock) /*!< in: record lock */
623{
624 ut_ad(lock);
625 ut_ad(lock_get_type_low(lock) == LOCK_REC);
626
627 return(lock->type_mode & LOCK_GAP);
628}
629
630/*********************************************************************//**
631Gets the LOCK_REC_NOT_GAP flag of a record lock.
632@return LOCK_REC_NOT_GAP or 0 */
633UNIV_INLINE
634ulint
635lock_rec_get_rec_not_gap(
636/*=====================*/
637 const lock_t* lock) /*!< in: record lock */
638{
639 ut_ad(lock);
640 ut_ad(lock_get_type_low(lock) == LOCK_REC);
641
642 return(lock->type_mode & LOCK_REC_NOT_GAP);
643}
644
645/*********************************************************************//**
646Gets the waiting insert flag of a record lock.
647@return LOCK_INSERT_INTENTION or 0 */
648UNIV_INLINE
649ulint
650lock_rec_get_insert_intention(
651/*==========================*/
652 const lock_t* lock) /*!< in: record lock */
653{
654 ut_ad(lock);
655 ut_ad(lock_get_type_low(lock) == LOCK_REC);
656
657 return(lock->type_mode & LOCK_INSERT_INTENTION);
658}
659
660/*********************************************************************//**
661Checks if a lock request for a new lock has to wait for request lock2.
662@return TRUE if new lock has to wait for lock2 to be removed */
663UNIV_INLINE
664bool
665lock_rec_has_to_wait(
666/*=================*/
667 bool for_locking,
668 /*!< in is called locking or releasing */
669 const trx_t* trx, /*!< in: trx of new lock */
670 ulint type_mode,/*!< in: precise mode of the new lock
671 to set: LOCK_S or LOCK_X, possibly
672 ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
673 LOCK_INSERT_INTENTION */
674 const lock_t* lock2, /*!< in: another record lock; NOTE that
675 it is assumed that this has a lock bit
676 set on the same record as in the new
677 lock we are setting */
678 bool lock_is_on_supremum)
679 /*!< in: TRUE if we are setting the
680 lock on the 'supremum' record of an
681 index page: we know then that the lock
682 request is really for a 'gap' type lock */
683{
684 ut_ad(trx && lock2);
685 ut_ad(lock_get_type_low(lock2) == LOCK_REC);
686
687 if (trx == lock2->trx
688 || lock_mode_compatible(
689 static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
690 lock_get_mode(lock2))) {
691 return false;
692 }
693
694 /* We have somewhat complex rules when gap type record locks
695 cause waits */
696
697 if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
698 && !(type_mode & LOCK_INSERT_INTENTION)) {
699
700 /* Gap type locks without LOCK_INSERT_INTENTION flag
701 do not need to wait for anything. This is because
702 different users can have conflicting lock types
703 on gaps. */
704
705 return false;
706 }
707
708 if (!(type_mode & LOCK_INSERT_INTENTION) && lock_rec_get_gap(lock2)) {
709
710 /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
711 does not need to wait for a gap type lock */
712
713 return false;
714 }
715
716 if ((type_mode & LOCK_GAP) && lock_rec_get_rec_not_gap(lock2)) {
717
718 /* Lock on gap does not need to wait for
719 a LOCK_REC_NOT_GAP type lock */
720
721 return false;
722 }
723
724 if (lock_rec_get_insert_intention(lock2)) {
725
726 /* No lock request needs to wait for an insert
727 intention lock to be removed. This is ok since our
728 rules allow conflicting locks on gaps. This eliminates
729 a spurious deadlock caused by a next-key lock waiting
730 for an insert intention lock; when the insert
731 intention lock was granted, the insert deadlocked on
732 the waiting next-key lock.
733
734 Also, insert intention locks do not disturb each
735 other. */
736
737 return false;
738 }
739
740 if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2))
741 && !thd_need_ordering_with(trx->mysql_thd, lock2->trx->mysql_thd)) {
742 /* If the upper server layer has already decided on the
743 commit order between the transaction requesting the
744 lock and the transaction owning the lock, we do not
745 need to wait for gap locks. Such ordeering by the upper
746 server layer happens in parallel replication, where the
747 commit order is fixed to match the original order on the
748 master.
749
750 Such gap locks are mainly needed to get serialisability
751 between transactions so that they will be binlogged in
752 the correct order so that statement-based replication
753 will give the correct results. Since the right order
754 was already determined on the master, we do not need
755 to enforce it again here.
756
757 Skipping the locks is not essential for correctness,
758 since in case of deadlock we will just kill the later
759 transaction and retry it. But it can save some
760 unnecessary rollbacks and retries. */
761
762 return false;
763 }
764
765#ifdef WITH_WSREP
766 /* if BF thread is locking and has conflict with another BF
767 thread, we need to look at trx ordering and lock types */
768 if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)
769 && wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
770
771 if (wsrep_debug) {
772 ib::info() << "BF-BF lock conflict, locking: "
773 << for_locking;
774 lock_rec_print(stderr, lock2);
775 ib::info()
776 << " SQL1: " << wsrep_thd_query(trx->mysql_thd)
777 << " SQL2: "
778 << wsrep_thd_query(lock2->trx->mysql_thd);
779 }
780
781 if (wsrep_trx_order_before(trx->mysql_thd,
782 lock2->trx->mysql_thd)
783 && (type_mode & LOCK_MODE_MASK) == LOCK_X
784 && (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
785 if (for_locking || wsrep_debug) {
786 /* exclusive lock conflicts are not
787 accepted */
788 ib::info()
789 << "BF-BF X lock conflict,mode: "
790 << type_mode
791 << " supremum: " << lock_is_on_supremum
792 << "conflicts states: my "
793 << wsrep_thd_conflict_state(
794 trx->mysql_thd, FALSE)
795 << " locked "
796 << wsrep_thd_conflict_state(
797 lock2->trx->mysql_thd,
798 FALSE);
799 lock_rec_print(stderr, lock2);
800 ib::info() << " SQL1: "
801 << wsrep_thd_query(trx->mysql_thd)
802 << " SQL2: "
803 << wsrep_thd_query(
804 lock2->trx->mysql_thd);
805
806 if (for_locking) {
807 return false;
808 }
809 }
810 } else {
811 /* if lock2->index->n_uniq <=
812 lock2->index->n_user_defined_cols
813 operation is on uniq index
814 */
815 if (wsrep_debug) {
816 ib::info()
817 << "BF conflict, modes: " << type_mode
818 << ":" << lock2->type_mode
819 << " idx: " << lock2->index->name()
820 << " table: "
821 << lock2->index->table->name.m_name
822 << " n_uniq: " << lock2->index->n_uniq
823 << " n_user: "
824 << lock2->index->n_user_defined_cols
825 << " SQL1: "
826 << wsrep_thd_query(trx->mysql_thd)
827 << " SQL2: "
828 << wsrep_thd_query(
829 lock2->trx->mysql_thd);
830 }
831 return false;
832 }
833 }
834#endif /* WITH_WSREP */
835
836 return true;
837}
838
839/*********************************************************************//**
840Checks if a lock request lock1 has to wait for request lock2.
841@return TRUE if lock1 has to wait for lock2 to be removed */
842bool
843lock_has_to_wait(
844/*=============*/
845 const lock_t* lock1, /*!< in: waiting lock */
846 const lock_t* lock2) /*!< in: another lock; NOTE that it is
847 assumed that this has a lock bit set
848 on the same record as in lock1 if the
849 locks are record locks */
850{
851 ut_ad(lock1 && lock2);
852
853 if (lock1->trx == lock2->trx
854 || lock_mode_compatible(lock_get_mode(lock1),
855 lock_get_mode(lock2))) {
856 return false;
857 }
858
859 if (lock_get_type_low(lock1) != LOCK_REC) {
860 return true;
861 }
862
863 ut_ad(lock_get_type_low(lock2) == LOCK_REC);
864
865 if (lock1->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
866 return lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
867 lock_get_prdt_from_lock(lock1),
868 lock2);
869 }
870
871 return lock_rec_has_to_wait(
872 false, lock1->trx, lock1->type_mode, lock2,
873 lock_rec_get_nth_bit(lock1, PAGE_HEAP_NO_SUPREMUM));
874}
875
876/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
877
878/**********************************************************************//**
879Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
880if none found.
881@return bit index == heap number of the record, or ULINT_UNDEFINED if
882none found */
883ulint
884lock_rec_find_set_bit(
885/*==================*/
886 const lock_t* lock) /*!< in: record lock with at least one bit set */
887{
888 for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
889
890 if (lock_rec_get_nth_bit(lock, i)) {
891
892 return(i);
893 }
894 }
895
896 return(ULINT_UNDEFINED);
897}
898
899/*********************************************************************//**
900Determines if there are explicit record locks on a page.
901@return an explicit record lock on the page, or NULL if there are none */
902lock_t*
903lock_rec_expl_exist_on_page(
904/*========================*/
905 ulint space, /*!< in: space id */
906 ulint page_no)/*!< in: page number */
907{
908 lock_t* lock;
909
910 lock_mutex_enter();
911 /* Only used in ibuf pages, so rec_hash is good enough */
912 lock = lock_rec_get_first_on_page_addr(lock_sys.rec_hash,
913 space, page_no);
914 lock_mutex_exit();
915
916 return(lock);
917}
918
919/*********************************************************************//**
920Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
921pointer in the transaction! This function is used in lock object creation
922and resetting. */
923static
924void
925lock_rec_bitmap_reset(
926/*==================*/
927 lock_t* lock) /*!< in: record lock */
928{
929 ulint n_bytes;
930
931 ut_ad(lock_get_type_low(lock) == LOCK_REC);
932
933 /* Reset to zero the bitmap which resides immediately after the lock
934 struct */
935
936 n_bytes = lock_rec_get_n_bits(lock) / 8;
937
938 ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
939
940 memset(&lock[1], 0, n_bytes);
941}
942
943/*********************************************************************//**
944Copies a record lock to heap.
945@return copy of lock */
946static
947lock_t*
948lock_rec_copy(
949/*==========*/
950 const lock_t* lock, /*!< in: record lock */
951 mem_heap_t* heap) /*!< in: memory heap */
952{
953 ulint size;
954
955 ut_ad(lock_get_type_low(lock) == LOCK_REC);
956
957 size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
958
959 return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
960}
961
962/*********************************************************************//**
963Gets the previous record lock set on a record.
964@return previous lock on the same record, NULL if none exists */
965const lock_t*
966lock_rec_get_prev(
967/*==============*/
968 const lock_t* in_lock,/*!< in: record lock */
969 ulint heap_no)/*!< in: heap number of the record */
970{
971 lock_t* lock;
972 ulint space;
973 ulint page_no;
974 lock_t* found_lock = NULL;
975 hash_table_t* hash;
976
977 ut_ad(lock_mutex_own());
978 ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
979
980 space = in_lock->un_member.rec_lock.space;
981 page_no = in_lock->un_member.rec_lock.page_no;
982
983 hash = lock_hash_get(in_lock->type_mode);
984
985 for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
986 /* No op */;
987 lock = lock_rec_get_next_on_page(lock)) {
988
989 ut_ad(lock);
990
991 if (lock == in_lock) {
992
993 return(found_lock);
994 }
995
996 if (lock_rec_get_nth_bit(lock, heap_no)) {
997
998 found_lock = lock;
999 }
1000 }
1001}
1002
1003/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1004
1005/*********************************************************************//**
1006Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1007to precise_mode.
1008@return lock or NULL */
1009UNIV_INLINE
1010lock_t*
1011lock_rec_has_expl(
1012/*==============*/
1013 ulint precise_mode,/*!< in: LOCK_S or LOCK_X
1014 possibly ORed to LOCK_GAP or
1015 LOCK_REC_NOT_GAP, for a
1016 supremum record we regard this
1017 always a gap type request */
1018 const buf_block_t* block, /*!< in: buffer block containing
1019 the record */
1020 ulint heap_no,/*!< in: heap number of the record */
1021 const trx_t* trx) /*!< in: transaction */
1022{
1023 lock_t* lock;
1024
1025 ut_ad(lock_mutex_own());
1026 ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1027 || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1028 ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1029
1030 for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
1031 lock != NULL;
1032 lock = lock_rec_get_next(heap_no, lock)) {
1033
1034 if (lock->trx == trx
1035 && !lock_rec_get_insert_intention(lock)
1036 && lock_mode_stronger_or_eq(
1037 lock_get_mode(lock),
1038 static_cast<lock_mode>(
1039 precise_mode & LOCK_MODE_MASK))
1040 && !lock_get_wait(lock)
1041 && (!lock_rec_get_rec_not_gap(lock)
1042 || (precise_mode & LOCK_REC_NOT_GAP)
1043 || heap_no == PAGE_HEAP_NO_SUPREMUM)
1044 && (!lock_rec_get_gap(lock)
1045 || (precise_mode & LOCK_GAP)
1046 || heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1047
1048 return(lock);
1049 }
1050 }
1051
1052 return(NULL);
1053}
1054
1055#ifdef UNIV_DEBUG
1056/*********************************************************************//**
1057Checks if some other transaction has a lock request in the queue.
1058@return lock or NULL */
1059static
1060lock_t*
1061lock_rec_other_has_expl_req(
1062/*========================*/
1063 lock_mode mode, /*!< in: LOCK_S or LOCK_X */
1064 const buf_block_t* block, /*!< in: buffer block containing
1065 the record */
1066 bool wait, /*!< in: whether also waiting locks
1067 are taken into account */
1068 ulint heap_no,/*!< in: heap number of the record */
1069 const trx_t* trx) /*!< in: transaction, or NULL if
1070 requests by all transactions
1071 are taken into account */
1072{
1073
1074 ut_ad(lock_mutex_own());
1075 ut_ad(mode == LOCK_X || mode == LOCK_S);
1076
1077 /* Only GAP lock can be on SUPREMUM, and we are not looking for
1078 GAP lock */
1079 if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1080 return(NULL);
1081 }
1082
1083 for (lock_t* lock = lock_rec_get_first(lock_sys.rec_hash,
1084 block, heap_no);
1085 lock != NULL;
1086 lock = lock_rec_get_next(heap_no, lock)) {
1087
1088 if (lock->trx != trx
1089 && !lock_rec_get_gap(lock)
1090 && (wait || !lock_get_wait(lock))
1091 && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1092
1093 return(lock);
1094 }
1095 }
1096
1097 return(NULL);
1098}
1099#endif /* UNIV_DEBUG */
1100
1101#ifdef WITH_WSREP
1102static
1103void
1104wsrep_kill_victim(
1105/*==============*/
1106 const trx_t * const trx,
1107 const lock_t *lock)
1108{
1109 ut_ad(lock_mutex_own());
1110 ut_ad(trx_mutex_own(lock->trx));
1111
1112 /* quit for native mysql */
1113 if (!wsrep_on(trx->mysql_thd)) {
1114 return;
1115 }
1116
1117 my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
1118 my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
1119
1120 if ((bf_this && !bf_other) ||
1121 (bf_this && bf_other && wsrep_trx_order_before(
1122 trx->mysql_thd, lock->trx->mysql_thd))) {
1123
1124 if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1125 if (wsrep_debug) {
1126 ib::info() << "WSREP: BF victim waiting\n";
1127 }
1128 /* cannot release lock, until our lock
1129 is in the queue*/
1130 } else if (lock->trx != trx) {
1131 if (wsrep_log_conflicts) {
1132 if (bf_this) {
1133 ib::info() << "*** Priority TRANSACTION:";
1134 } else {
1135 ib::info() << "*** Victim TRANSACTION:";
1136 }
1137
1138 trx_print_latched(stderr, trx, 3000);
1139
1140 if (bf_other) {
1141 ib::info() << "*** Priority TRANSACTION:";
1142 } else {
1143 ib::info() << "*** Victim TRANSACTION:";
1144 }
1145 trx_print_latched(stderr, lock->trx, 3000);
1146
1147 ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
1148
1149 if (lock_get_type(lock) == LOCK_REC) {
1150 lock_rec_print(stderr, lock);
1151 } else {
1152 lock_table_print(stderr, lock);
1153 }
1154
1155 ib::info() << " SQL1: "
1156 << wsrep_thd_query(trx->mysql_thd);
1157 ib::info() << " SQL2: "
1158 << wsrep_thd_query(lock->trx->mysql_thd);
1159 }
1160
1161 wsrep_innobase_kill_one_trx(trx->mysql_thd,
1162 trx, lock->trx, TRUE);
1163 }
1164 }
1165}
1166#endif /* WITH_WSREP */
1167
1168/*********************************************************************//**
1169Checks if some other transaction has a conflicting explicit lock request
1170in the queue, so that we have to wait.
1171@return lock or NULL */
1172static
1173lock_t*
1174lock_rec_other_has_conflicting(
1175/*===========================*/
1176 ulint mode, /*!< in: LOCK_S or LOCK_X,
1177 possibly ORed to LOCK_GAP or
1178 LOC_REC_NOT_GAP,
1179 LOCK_INSERT_INTENTION */
1180 const buf_block_t* block, /*!< in: buffer block containing
1181 the record */
1182 ulint heap_no,/*!< in: heap number of the record */
1183 const trx_t* trx) /*!< in: our transaction */
1184{
1185 lock_t* lock;
1186
1187 ut_ad(lock_mutex_own());
1188
1189 bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
1190
1191 for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
1192 lock != NULL;
1193 lock = lock_rec_get_next(heap_no, lock)) {
1194
1195 if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
1196#ifdef WITH_WSREP
1197 if (wsrep_on_trx(trx)) {
1198 trx_mutex_enter(lock->trx);
1199 /* Below function will roll back either trx
1200 or lock->trx depending on priority of the
1201 transaction. */
1202 wsrep_kill_victim(const_cast<trx_t*>(trx), lock);
1203 trx_mutex_exit(lock->trx);
1204 }
1205#endif /* WITH_WSREP */
1206 return(lock);
1207 }
1208 }
1209
1210 return(NULL);
1211}
1212
1213/*********************************************************************//**
1214Checks if some transaction has an implicit x-lock on a record in a secondary
1215index.
1216@return transaction id of the transaction which has the x-lock, or 0;
1217NOTE that this function can return false positives but never false
1218negatives. The caller must confirm all positive results by calling
1219trx_is_active(). */
1220static
1221trx_t*
1222lock_sec_rec_some_has_impl(
1223/*=======================*/
1224 trx_t* caller_trx,/*!<in/out: trx of current thread */
1225 const rec_t* rec, /*!< in: user record */
1226 dict_index_t* index, /*!< in: secondary index */
1227 const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
1228{
1229 trx_t* trx;
1230 trx_id_t max_trx_id;
1231 const page_t* page = page_align(rec);
1232
1233 ut_ad(!lock_mutex_own());
1234 ut_ad(!dict_index_is_clust(index));
1235 ut_ad(page_rec_is_user_rec(rec));
1236 ut_ad(rec_offs_validate(rec, index, offsets));
1237 ut_ad(!rec_is_default_row(rec, index));
1238
1239 max_trx_id = page_get_max_trx_id(page);
1240
1241 /* Some transaction may have an implicit x-lock on the record only
1242 if the max trx id for the page >= min trx id for the trx list, or
1243 database recovery is running. We do not write the changes of a page
1244 max trx id to the log, and therefore during recovery, this value
1245 for a page may be incorrect. */
1246
1247 if (max_trx_id < trx_sys.get_min_trx_id()) {
1248
1249 trx = 0;
1250
1251 } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
1252
1253 /* The page is corrupt: try to avoid a crash by returning 0 */
1254 trx = 0;
1255
1256 /* In this case it is possible that some transaction has an implicit
1257 x-lock. We have to look in the clustered index. */
1258
1259 } else {
1260 trx = row_vers_impl_x_locked(caller_trx, rec, index, offsets);
1261 }
1262
1263 return(trx);
1264}
1265
1266/*********************************************************************//**
1267Return approximate number or record locks (bits set in the bitmap) for
1268this transaction. Since delete-marked records may be removed, the
1269record count will not be precise.
1270The caller must be holding lock_sys.mutex. */
1271ulint
1272lock_number_of_rows_locked(
1273/*=======================*/
1274 const trx_lock_t* trx_lock) /*!< in: transaction locks */
1275{
1276 ut_ad(lock_mutex_own());
1277
1278 return(trx_lock->n_rec_locks);
1279}
1280
1281/*********************************************************************//**
1282Return the number of table locks for a transaction.
1283The caller must be holding lock_sys.mutex. */
1284ulint
1285lock_number_of_tables_locked(
1286/*=========================*/
1287 const trx_lock_t* trx_lock) /*!< in: transaction locks */
1288{
1289 const lock_t* lock;
1290 ulint n_tables = 0;
1291
1292 ut_ad(lock_mutex_own());
1293
1294 for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
1295 lock != NULL;
1296 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
1297
1298 if (lock_get_type_low(lock) == LOCK_TABLE) {
1299 n_tables++;
1300 }
1301 }
1302
1303 return(n_tables);
1304}
1305
1306/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1307
1308#ifdef WITH_WSREP
1309static
1310void
1311wsrep_print_wait_locks(
1312/*===================*/
1313 lock_t* c_lock) /* conflicting lock to print */
1314{
1315 if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
1316 ib::info() << "WSREP: c_lock != wait lock";
1317 ib::info() << " SQL: "
1318 << wsrep_thd_query(c_lock->trx->mysql_thd);
1319
1320 if (lock_get_type_low(c_lock) & LOCK_TABLE) {
1321 lock_table_print(stderr, c_lock);
1322 } else {
1323 lock_rec_print(stderr, c_lock);
1324 }
1325
1326 if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
1327 lock_table_print(stderr, c_lock->trx->lock.wait_lock);
1328 } else {
1329 lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
1330 }
1331 }
1332}
1333#endif /* WITH_WSREP */
1334
1335/** Create a new record lock and inserts it to the lock queue,
1336without checking for deadlocks or conflicts.
1337@param[in] type_mode lock mode and wait flag; type will be replaced
1338 with LOCK_REC
1339@param[in] space tablespace id
1340@param[in] page_no index page number
1341@param[in] page R-tree index page, or NULL
1342@param[in] heap_no record heap number in the index page
1343@param[in] index the index tree
1344@param[in,out] trx transaction
1345@param[in] holds_trx_mutex whether the caller holds trx->mutex
1346@return created lock */
1347lock_t*
1348lock_rec_create_low(
1349#ifdef WITH_WSREP
1350 lock_t* c_lock, /*!< conflicting lock */
1351 que_thr_t* thr, /*!< thread owning trx */
1352#endif
1353 ulint type_mode,
1354 ulint space,
1355 ulint page_no,
1356 const page_t* page,
1357 ulint heap_no,
1358 dict_index_t* index,
1359 trx_t* trx,
1360 bool holds_trx_mutex)
1361{
1362 lock_t* lock;
1363 ulint n_bits;
1364 ulint n_bytes;
1365
1366 ut_ad(lock_mutex_own());
1367 ut_ad(holds_trx_mutex == trx_mutex_own(trx));
1368 ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1369
1370#ifdef UNIV_DEBUG
1371 /* Non-locking autocommit read-only transactions should not set
1372 any locks. See comment in trx_set_rw_mode explaining why this
1373 conditional check is required in debug code. */
1374 if (holds_trx_mutex) {
1375 check_trx_state(trx);
1376 }
1377#endif /* UNIV_DEBUG */
1378
1379 /* If rec is the supremum record, then we reset the gap and
1380 LOCK_REC_NOT_GAP bits, as all locks on the supremum are
1381 automatically of the gap type */
1382
1383 if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1384 ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1385 type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1386 }
1387
1388 if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
1389 /* Make lock bitmap bigger by a safety margin */
1390 n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
1391 n_bytes = 1 + n_bits / 8;
1392 } else {
1393 ut_ad(heap_no == PRDT_HEAPNO);
1394
1395 /* The lock is always on PAGE_HEAP_NO_INFIMUM (0), so
1396 we only need 1 bit (which round up to 1 byte) for
1397 lock bit setting */
1398 n_bytes = 1;
1399
1400 if (type_mode & LOCK_PREDICATE) {
1401 ulint tmp = UNIV_WORD_SIZE - 1;
1402
1403 /* We will attach predicate structure after lock.
1404 Make sure the memory is aligned on 8 bytes,
1405 the mem_heap_alloc will align it with
1406 MEM_SPACE_NEEDED anyway. */
1407 n_bytes = (n_bytes + sizeof(lock_prdt_t) + tmp) & ~tmp;
1408 ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
1409 }
1410 }
1411
1412 if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
1413 || sizeof *lock + n_bytes > REC_LOCK_SIZE) {
1414 lock = static_cast<lock_t*>(
1415 mem_heap_alloc(trx->lock.lock_heap,
1416 sizeof *lock + n_bytes));
1417 } else {
1418 lock = trx->lock.rec_pool[trx->lock.rec_cached++];
1419 }
1420
1421 lock->trx = trx;
1422 lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
1423 lock->index = index;
1424 lock->un_member.rec_lock.space = uint32_t(space);
1425 lock->un_member.rec_lock.page_no = uint32_t(page_no);
1426
1427 if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
1428 lock->un_member.rec_lock.n_bits = uint32_t(n_bytes * 8);
1429 } else {
1430 /* Predicate lock always on INFIMUM (0) */
1431 lock->un_member.rec_lock.n_bits = 8;
1432 }
1433 lock_rec_bitmap_reset(lock);
1434 lock_rec_set_nth_bit(lock, heap_no);
1435 index->table->n_rec_locks++;
1436 ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted);
1437
1438#ifdef WITH_WSREP
1439 if (c_lock && wsrep_on_trx(trx)
1440 && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
1441 lock_t *hash = (lock_t *)c_lock->hash;
1442 lock_t *prev = NULL;
1443
1444 while (hash && wsrep_thd_is_BF(hash->trx->mysql_thd, TRUE)
1445 && wsrep_trx_order_before(hash->trx->mysql_thd,
1446 trx->mysql_thd)) {
1447 prev = hash;
1448 hash = (lock_t *)hash->hash;
1449 }
1450 lock->hash = hash;
1451 if (prev) {
1452 prev->hash = lock;
1453 } else {
1454 c_lock->hash = lock;
1455 }
1456 /*
1457 * delayed conflict resolution '...kill_one_trx' was not called,
1458 * if victim was waiting for some other lock
1459 */
1460 trx_mutex_enter(c_lock->trx);
1461 if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1462
1463 c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
1464
1465 if (wsrep_debug) {
1466 wsrep_print_wait_locks(c_lock);
1467 }
1468
1469 trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1470 lock_set_lock_and_trx_wait(lock, trx);
1471 UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
1472
1473 trx->lock.wait_thr = thr;
1474 thr->state = QUE_THR_LOCK_WAIT;
1475
1476 /* have to release trx mutex for the duration of
1477 victim lock release. This will eventually call
1478 lock_grant, which wants to grant trx mutex again
1479 */
1480 if (holds_trx_mutex) {
1481 trx_mutex_exit(trx);
1482 }
1483 lock_cancel_waiting_and_release(
1484 c_lock->trx->lock.wait_lock);
1485
1486 if (holds_trx_mutex) {
1487 trx_mutex_enter(trx);
1488 }
1489
1490 trx_mutex_exit(c_lock->trx);
1491
1492 if (wsrep_debug) {
1493 ib::info() << "WSREP: c_lock canceled "
1494 << ib::hex(c_lock->trx->id)
1495 << " SQL: "
1496 << wsrep_thd_query(
1497 c_lock->trx->mysql_thd);
1498 }
1499
1500 /* have to bail out here to avoid lock_set_lock... */
1501 return(lock);
1502 }
1503 trx_mutex_exit(c_lock->trx);
1504 } else
1505#endif /* WITH_WSREP */
1506 if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE))
1507 && innodb_lock_schedule_algorithm
1508 == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
1509 && !thd_is_replication_slave_thread(trx->mysql_thd)) {
1510 HASH_PREPEND(lock_t, hash, lock_sys.rec_hash,
1511 lock_rec_fold(space, page_no), lock);
1512 } else {
1513 HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
1514 lock_rec_fold(space, page_no), lock);
1515 }
1516
1517 if (!holds_trx_mutex) {
1518 trx_mutex_enter(trx);
1519 }
1520 ut_ad(trx_mutex_own(trx));
1521 if (type_mode & LOCK_WAIT) {
1522 lock_set_lock_and_trx_wait(lock, trx);
1523 }
1524 UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
1525 if (!holds_trx_mutex) {
1526 trx_mutex_exit(trx);
1527 }
1528 MONITOR_INC(MONITOR_RECLOCK_CREATED);
1529 MONITOR_INC(MONITOR_NUM_RECLOCK);
1530
1531 return lock;
1532}
1533
1534/*********************************************************************//**
1535Check if lock1 has higher priority than lock2.
1536NULL has lowest priority.
1537If neither of them is wait lock, the first one has higher priority.
1538If only one of them is a wait lock, it has lower priority.
1539If either is a high priority transaction, the lock has higher priority.
1540Otherwise, the one with an older transaction has higher priority.
1541@returns true if lock1 has higher priority, false otherwise. */
1542static
1543bool
1544has_higher_priority(
1545 lock_t *lock1,
1546 lock_t *lock2)
1547{
1548 if (lock1 == NULL) {
1549 return false;
1550 } else if (lock2 == NULL) {
1551 return true;
1552 }
1553 // Granted locks has higher priority.
1554 if (!lock_get_wait(lock1)) {
1555 return true;
1556 } else if (!lock_get_wait(lock2)) {
1557 return false;
1558 }
1559 return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
1560}
1561
1562/*********************************************************************//**
1563Insert a lock to the hash list according to the mode (whether it is a wait
1564lock) and the age of the transaction the it is associated with.
1565If the lock is not a wait lock, insert it to the head of the hash list.
1566Otherwise, insert it to the middle of the wait locks according to the age of
1567the transaciton. */
1568static
1569dberr_t
1570lock_rec_insert_by_trx_age(
1571 lock_t *in_lock) /*!< in: lock to be insert */{
1572 ulint space;
1573 ulint page_no;
1574 ulint rec_fold;
1575 lock_t* node;
1576 lock_t* next;
1577 hash_table_t* hash;
1578 hash_cell_t* cell;
1579
1580 space = in_lock->un_member.rec_lock.space;
1581 page_no = in_lock->un_member.rec_lock.page_no;
1582 rec_fold = lock_rec_fold(space, page_no);
1583 hash = lock_hash_get(in_lock->type_mode);
1584 cell = hash_get_nth_cell(hash,
1585 hash_calc_hash(rec_fold, hash));
1586
1587 node = (lock_t *) cell->node;
1588 // If in_lock is not a wait lock, we insert it to the head of the list.
1589 if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) {
1590 cell->node = in_lock;
1591 in_lock->hash = node;
1592 if (lock_get_wait(in_lock)) {
1593 lock_grant_have_trx_mutex(in_lock);
1594 return DB_SUCCESS_LOCKED_REC;
1595 }
1596 return DB_SUCCESS;
1597 }
1598 while (node != NULL && has_higher_priority((lock_t *) node->hash,
1599 in_lock)) {
1600 node = (lock_t *) node->hash;
1601 }
1602 next = (lock_t *) node->hash;
1603 node->hash = in_lock;
1604 in_lock->hash = next;
1605
1606 if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) {
1607 lock_grant_have_trx_mutex(in_lock);
1608 if (cell->node != in_lock) {
1609 // Move it to the front of the queue
1610 node->hash = in_lock->hash;
1611 next = (lock_t *) cell->node;
1612 cell->node = in_lock;
1613 in_lock->hash = next;
1614 }
1615 return DB_SUCCESS_LOCKED_REC;
1616 }
1617
1618 return DB_SUCCESS;
1619}
1620
1621#ifdef UNIV_DEBUG
1622static
1623bool
1624lock_queue_validate(
1625 const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */
1626{
1627 ulint space;
1628 ulint page_no;
1629 ulint rec_fold;
1630 hash_table_t* hash;
1631 hash_cell_t* cell;
1632 lock_t* next;
1633 bool wait_lock __attribute__((unused))= false;
1634
1635 if (in_lock == NULL) {
1636 return true;
1637 }
1638
1639 space = in_lock->un_member.rec_lock.space;
1640 page_no = in_lock->un_member.rec_lock.page_no;
1641 rec_fold = lock_rec_fold(space, page_no);
1642 hash = lock_hash_get(in_lock->type_mode);
1643 cell = hash_get_nth_cell(hash,
1644 hash_calc_hash(rec_fold, hash));
1645 next = (lock_t *) cell->node;
1646 while (next != NULL) {
1647 // If this is a granted lock, check that there's no wait lock before it.
1648 if (!lock_get_wait(next)) {
1649 ut_ad(!wait_lock);
1650 } else {
1651 wait_lock = true;
1652 }
1653 next = next->hash;
1654 }
1655 return true;
1656}
1657#endif /* UNIV_DEBUG */
1658
1659static
1660void
1661lock_rec_insert_to_head(
1662 lock_t *in_lock, /*!< in: lock to be insert */
1663 ulint rec_fold) /*!< in: rec_fold of the page */
1664{
1665 hash_table_t* hash;
1666 hash_cell_t* cell;
1667 lock_t* node;
1668
1669 if (in_lock == NULL) {
1670 return;
1671 }
1672
1673 hash = lock_hash_get(in_lock->type_mode);
1674 cell = hash_get_nth_cell(hash,
1675 hash_calc_hash(rec_fold, hash));
1676 node = (lock_t *) cell->node;
1677 if (node != in_lock) {
1678 cell->node = in_lock;
1679 in_lock->hash = node;
1680 }
1681}
1682
1683/** Enqueue a waiting request for a lock which cannot be granted immediately.
1684Check for deadlocks.
1685@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
1686 possibly ORed with LOCK_GAP or
1687 LOCK_REC_NOT_GAP, ORed with
1688 LOCK_INSERT_INTENTION if this
1689 waiting lock request is set
1690 when performing an insert of
1691 an index record
1692@param[in] block leaf page in the index
1693@param[in] heap_no record heap number in the block
1694@param[in] index index tree
1695@param[in,out] thr query thread
1696@param[in] prdt minimum bounding box (spatial index)
1697@retval DB_LOCK_WAIT if the waiting lock was enqueued
1698@retval DB_DEADLOCK if this transaction was chosen as the victim
1699@retval DB_SUCCESS_LOCKED_REC if the other transaction was chosen as a victim
1700 (or it happened to commit) */
1701dberr_t
1702lock_rec_enqueue_waiting(
1703#ifdef WITH_WSREP
1704 lock_t* c_lock, /*!< conflicting lock */
1705#endif
1706 ulint type_mode,
1707 const buf_block_t* block,
1708 ulint heap_no,
1709 dict_index_t* index,
1710 que_thr_t* thr,
1711 lock_prdt_t* prdt)
1712{
1713 ut_ad(lock_mutex_own());
1714 ut_ad(!srv_read_only_mode);
1715 ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1716
1717 trx_t* trx = thr_get_trx(thr);
1718
1719 ut_ad(trx_mutex_own(trx));
1720 ut_a(!que_thr_stop(thr));
1721
1722 switch (trx_get_dict_operation(trx)) {
1723 case TRX_DICT_OP_NONE:
1724 break;
1725 case TRX_DICT_OP_TABLE:
1726 case TRX_DICT_OP_INDEX:
1727 ib::error() << "A record lock wait happens in a dictionary"
1728 " operation. index "
1729 << index->name
1730 << " of table "
1731 << index->table->name
1732 << ". " << BUG_REPORT_MSG;
1733 ut_ad(0);
1734 }
1735
1736 if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
1737 trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1738 return DB_LOCK_WAIT_TIMEOUT;
1739 }
1740
1741 /* Enqueue the lock request that will wait to be granted, note that
1742 we already own the trx mutex. */
1743 lock_t* lock = lock_rec_create(
1744#ifdef WITH_WSREP
1745 c_lock, thr,
1746#endif
1747 type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
1748
1749 if (prdt && type_mode & LOCK_PREDICATE) {
1750 lock_prdt_set_prdt(lock, prdt);
1751 }
1752
1753 if (const trx_t* victim =
1754 DeadlockChecker::check_and_resolve(lock, trx)) {
1755 ut_ad(victim == trx);
1756 lock_reset_lock_and_trx_wait(lock);
1757 lock_rec_reset_nth_bit(lock, heap_no);
1758 return DB_DEADLOCK;
1759 }
1760
1761 if (!trx->lock.wait_lock) {
1762 /* If there was a deadlock but we chose another
1763 transaction as a victim, it is possible that we
1764 already have the lock now granted! */
1765#ifdef WITH_WSREP
1766 if (wsrep_debug) {
1767 ib::info() << "WSREP: BF thread got lock granted early, ID " << ib::hex(trx->id)
1768 << " query: " << wsrep_thd_query(trx->mysql_thd);
1769 }
1770#endif
1771 return DB_SUCCESS_LOCKED_REC;
1772 }
1773
1774 trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1775
1776 trx->lock.was_chosen_as_deadlock_victim = false;
1777 trx->lock.wait_started = ut_time();
1778
1779 ut_a(que_thr_stop(thr));
1780
1781 DBUG_LOG("ib_lock", "trx " << ib::hex(trx->id)
1782 << " waits for lock in index " << index->name
1783 << " of table " << index->table->name);
1784
1785 MONITOR_INC(MONITOR_LOCKREC_WAIT);
1786
1787 if (innodb_lock_schedule_algorithm
1788 == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
1789 && !prdt
1790 && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
1791 HASH_DELETE(lock_t, hash, lock_sys.rec_hash,
1792 lock_rec_lock_fold(lock), lock);
1793 dberr_t res = lock_rec_insert_by_trx_age(lock);
1794 if (res != DB_SUCCESS) {
1795 return res;
1796 }
1797 }
1798
1799 return DB_LOCK_WAIT;
1800}
1801
1802/*********************************************************************//**
1803Adds a record lock request in the record queue. The request is normally
1804added as the last in the queue, but if there are no waiting lock requests
1805on the record, and the request to be added is not a waiting request, we
1806can reuse a suitable record lock object already existing on the same page,
1807just setting the appropriate bit in its bitmap. This is a low-level function
1808which does NOT check for deadlocks or lock compatibility!
1809@return lock where the bit was set */
1810static
1811void
1812lock_rec_add_to_queue(
1813/*==================*/
1814 ulint type_mode,/*!< in: lock mode, wait, gap
1815 etc. flags; type is ignored
1816 and replaced by LOCK_REC */
1817 const buf_block_t* block, /*!< in: buffer block containing
1818 the record */
1819 ulint heap_no,/*!< in: heap number of the record */
1820 dict_index_t* index, /*!< in: index of record */
1821 trx_t* trx, /*!< in/out: transaction */
1822 bool caller_owns_trx_mutex)
1823 /*!< in: TRUE if caller owns the
1824 transaction mutex */
1825{
1826#ifdef UNIV_DEBUG
1827 ut_ad(lock_mutex_own());
1828 ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
1829 ut_ad(dict_index_is_clust(index)
1830 || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1831 switch (type_mode & LOCK_MODE_MASK) {
1832 case LOCK_X:
1833 case LOCK_S:
1834 break;
1835 default:
1836 ut_error;
1837 }
1838
1839 if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1840 lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
1841 ? LOCK_X
1842 : LOCK_S;
1843 const lock_t* other_lock
1844 = lock_rec_other_has_expl_req(
1845 mode, block, false, heap_no, trx);
1846#ifdef WITH_WSREP
1847 //ut_a(!other_lock || (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
1848 // wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
1849 if (other_lock &&
1850 wsrep_on(trx->mysql_thd) &&
1851 !wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
1852 !wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
1853
1854 ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
1855 ((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
1856 wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
1857 wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " <<
1858 wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
1859 wsrep_thd_query(trx->mysql_thd);
1860 trx_t* otrx = other_lock->trx;
1861 ib::info() << "WSREP other lock:\n BF:" <<
1862 ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
1863 wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
1864 wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
1865 wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
1866 wsrep_thd_query(otrx->mysql_thd);
1867 }
1868#else
1869 ut_a(!other_lock);
1870#endif /* WITH_WSREP */
1871 }
1872#endif /* UNIV_DEBUG */
1873
1874 type_mode |= LOCK_REC;
1875
1876 /* If rec is the supremum record, then we can reset the gap bit, as
1877 all locks on the supremum are automatically of the gap type, and we
1878 try to avoid unnecessary memory consumption of a new record lock
1879 struct for a gap type lock */
1880
1881 if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1882 ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1883
1884 /* There should never be LOCK_REC_NOT_GAP on a supremum
1885 record, but let us play safe */
1886
1887 type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1888 }
1889
1890 lock_t* lock;
1891 lock_t* first_lock;
1892 hash_table_t* hash = lock_hash_get(type_mode);
1893
1894 /* Look for a waiting lock request on the same record or on a gap */
1895
1896 for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
1897 lock != NULL;
1898 lock = lock_rec_get_next_on_page(lock)) {
1899
1900 if (lock_get_wait(lock)
1901 && lock_rec_get_nth_bit(lock, heap_no)) {
1902
1903 break;
1904 }
1905 }
1906
1907 if (lock == NULL && !(type_mode & LOCK_WAIT)) {
1908
1909 /* Look for a similar record lock on the same page:
1910 if one is found and there are no waiting lock requests,
1911 we can just set the bit */
1912
1913 lock = lock_rec_find_similar_on_page(
1914 type_mode, heap_no, first_lock, trx);
1915
1916 if (lock != NULL) {
1917
1918 lock_rec_set_nth_bit(lock, heap_no);
1919
1920 return;
1921 }
1922 }
1923
1924 lock_rec_create(
1925#ifdef WITH_WSREP
1926 NULL, NULL,
1927#endif
1928 type_mode, block, heap_no, index, trx, caller_owns_trx_mutex);
1929}
1930
1931/*********************************************************************//**
1932Tries to lock the specified record in the mode requested. If not immediately
1933possible, enqueues a waiting lock request. This is a low-level function
1934which does NOT look at implicit locks! Checks lock compatibility within
1935explicit locks. This function sets a normal next-key lock, or in the case
1936of a page supremum record, a gap type lock.
1937@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
1938static
1939dberr_t
1940lock_rec_lock(
1941/*==========*/
1942 bool impl, /*!< in: if true, no lock is set
1943 if no wait is necessary: we
1944 assume that the caller will
1945 set an implicit lock */
1946 ulint mode, /*!< in: lock mode: LOCK_X or
1947 LOCK_S possibly ORed to either
1948 LOCK_GAP or LOCK_REC_NOT_GAP */
1949 const buf_block_t* block, /*!< in: buffer block containing
1950 the record */
1951 ulint heap_no,/*!< in: heap number of record */
1952 dict_index_t* index, /*!< in: index of record */
1953 que_thr_t* thr) /*!< in: query thread */
1954{
1955 trx_t *trx= thr_get_trx(thr);
1956 dberr_t err= DB_SUCCESS;
1957
1958 ut_ad(!srv_read_only_mode);
1959 ut_ad((LOCK_MODE_MASK & mode) == LOCK_S ||
1960 (LOCK_MODE_MASK & mode) == LOCK_X);
1961 ut_ad((mode & LOCK_TYPE_MASK) == LOCK_GAP ||
1962 (mode & LOCK_TYPE_MASK) == LOCK_REC_NOT_GAP ||
1963 (mode & LOCK_TYPE_MASK) == 0);
1964 ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1965 DBUG_EXECUTE_IF("innodb_report_deadlock", return DB_DEADLOCK;);
1966
1967 lock_mutex_enter();
1968 ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1969 lock_table_has(trx, index->table, LOCK_IS));
1970 ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1971 lock_table_has(trx, index->table, LOCK_IX));
1972
1973 if (lock_t *lock= lock_rec_get_first_on_page(lock_sys.rec_hash, block))
1974 {
1975 trx_mutex_enter(trx);
1976 if (lock_rec_get_next_on_page(lock) ||
1977 lock->trx != trx ||
1978 lock->type_mode != (ulint(mode) | LOCK_REC) ||
1979 lock_rec_get_n_bits(lock) <= heap_no)
1980 {
1981 /* Do nothing if the trx already has a strong enough lock on rec */
1982 if (!lock_rec_has_expl(mode, block, heap_no, trx))
1983 {
1984 if (
1985#ifdef WITH_WSREP
1986 lock_t *c_lock=
1987#endif
1988 lock_rec_other_has_conflicting(mode, block, heap_no, trx))
1989 {
1990 /*
1991 If another transaction has a non-gap conflicting
1992 request in the queue, as this transaction does not
1993 have a lock strong enough already granted on the
1994 record, we have to wait. */
1995 err = lock_rec_enqueue_waiting(
1996#ifdef WITH_WSREP
1997 c_lock,
1998#endif /* WITH_WSREP */
1999 mode, block, heap_no, index, thr, NULL);
2000 }
2001 else if (!impl)
2002 {
2003 /* Set the requested lock on the record. */
2004 lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx,
2005 true);
2006 err= DB_SUCCESS_LOCKED_REC;
2007 }
2008 }
2009 }
2010 else if (!impl)
2011 {
2012 /*
2013 If the nth bit of the record lock is already set then we do not set
2014 a new lock bit, otherwise we do set
2015 */
2016 if (!lock_rec_get_nth_bit(lock, heap_no))
2017 {
2018 lock_rec_set_nth_bit(lock, heap_no);
2019 err= DB_SUCCESS_LOCKED_REC;
2020 }
2021 }
2022 trx_mutex_exit(trx);
2023 }
2024 else
2025 {
2026 /*
2027 Simplified and faster path for the most common cases
2028 Note that we don't own the trx mutex.
2029 */
2030 if (!impl)
2031 lock_rec_create(
2032#ifdef WITH_WSREP
2033 NULL, NULL,
2034#endif
2035 mode, block, heap_no, index, trx, false);
2036
2037 err= DB_SUCCESS_LOCKED_REC;
2038 }
2039 lock_mutex_exit();
2040 MONITOR_ATOMIC_INC(MONITOR_NUM_RECLOCK_REQ);
2041 return err;
2042}
2043
2044/*********************************************************************//**
2045Checks if a waiting record lock request still has to wait in a queue.
2046@return lock that is causing the wait */
2047static
2048const lock_t*
2049lock_rec_has_to_wait_in_queue(
2050/*==========================*/
2051 const lock_t* wait_lock) /*!< in: waiting record lock */
2052{
2053 const lock_t* lock;
2054 ulint space;
2055 ulint page_no;
2056 ulint heap_no;
2057 ulint bit_mask;
2058 ulint bit_offset;
2059 hash_table_t* hash;
2060
2061 ut_ad(lock_mutex_own());
2062 ut_ad(lock_get_wait(wait_lock));
2063 ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2064
2065 space = wait_lock->un_member.rec_lock.space;
2066 page_no = wait_lock->un_member.rec_lock.page_no;
2067 heap_no = lock_rec_find_set_bit(wait_lock);
2068
2069 bit_offset = heap_no / 8;
2070 bit_mask = static_cast<ulint>(1) << (heap_no % 8);
2071
2072 hash = lock_hash_get(wait_lock->type_mode);
2073
2074 for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
2075 lock != wait_lock;
2076 lock = lock_rec_get_next_on_page_const(lock)) {
2077
2078 const byte* p = (const byte*) &lock[1];
2079
2080 if (heap_no < lock_rec_get_n_bits(lock)
2081 && (p[bit_offset] & bit_mask)
2082 && lock_has_to_wait(wait_lock, lock)) {
2083#ifdef WITH_WSREP
2084 if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
2085 wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
2086 if (wsrep_debug) {
2087 ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id)
2088 << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd);
2089 lock_rec_print(stderr, wait_lock);
2090 ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id)
2091 << " query: " << wsrep_thd_query(lock->trx->mysql_thd);
2092 lock_rec_print(stderr, lock);
2093 }
2094 /* don't wait for another BF lock */
2095 continue;
2096 }
2097#endif /* WITH_WSREP */
2098
2099 return(lock);
2100 }
2101 }
2102
2103 return(NULL);
2104}
2105
2106/** Grant a lock to a waiting lock request and release the waiting transaction
2107after lock_reset_lock_and_trx_wait() has been called. */
2108static void lock_grant_after_reset(lock_t* lock)
2109{
2110 ut_ad(lock_mutex_own());
2111 ut_ad(trx_mutex_own(lock->trx));
2112
2113 if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2114 dict_table_t* table = lock->un_member.tab_lock.table;
2115
2116 if (table->autoinc_trx == lock->trx) {
2117 ib::error() << "Transaction already had an"
2118 << " AUTO-INC lock!";
2119 } else {
2120 table->autoinc_trx = lock->trx;
2121
2122 ib_vector_push(lock->trx->autoinc_locks, &lock);
2123 }
2124 }
2125
2126 DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
2127 trx_get_id_for_print(lock->trx)));
2128
2129 /* If we are resolving a deadlock by choosing another transaction
2130 as a victim, then our original transaction may not be in the
2131 TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2132 for it */
2133
2134 if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2135 que_thr_t* thr;
2136
2137 thr = que_thr_end_lock_wait(lock->trx);
2138
2139 if (thr != NULL) {
2140 lock_wait_release_thread_if_suspended(thr);
2141 }
2142 }
2143}
2144
2145/** Grant a lock to a waiting lock request and release the waiting transaction. */
2146static void lock_grant(lock_t* lock)
2147{
2148 lock_reset_lock_and_trx_wait(lock);
2149 trx_mutex_enter(lock->trx);
2150 lock_grant_after_reset(lock);
2151 trx_mutex_exit(lock->trx);
2152}
2153
2154/*************************************************************//**
2155Cancels a waiting record lock request and releases the waiting transaction
2156that requested it. NOTE: does NOT check if waiting lock requests behind this
2157one can now be granted! */
2158static
2159void
2160lock_rec_cancel(
2161/*============*/
2162 lock_t* lock) /*!< in: waiting record lock request */
2163{
2164 que_thr_t* thr;
2165
2166 ut_ad(lock_mutex_own());
2167 ut_ad(lock_get_type_low(lock) == LOCK_REC);
2168
2169 /* Reset the bit (there can be only one set bit) in the lock bitmap */
2170 lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2171
2172 /* Reset the wait flag and the back pointer to lock in trx */
2173
2174 lock_reset_lock_and_trx_wait(lock);
2175
2176 /* The following function releases the trx from lock wait */
2177
2178 trx_mutex_enter(lock->trx);
2179
2180 thr = que_thr_end_lock_wait(lock->trx);
2181
2182 if (thr != NULL) {
2183 lock_wait_release_thread_if_suspended(thr);
2184 }
2185
2186 trx_mutex_exit(lock->trx);
2187}
2188
2189static
2190void
2191lock_grant_and_move_on_page(ulint rec_fold, ulint space, ulint page_no)
2192{
2193 lock_t* lock;
2194 lock_t* previous = static_cast<lock_t*>(
2195 hash_get_nth_cell(lock_sys.rec_hash,
2196 hash_calc_hash(rec_fold, lock_sys.rec_hash))
2197 ->node);
2198 if (previous == NULL) {
2199 return;
2200 }
2201 if (previous->un_member.rec_lock.space == space &&
2202 previous->un_member.rec_lock.page_no == page_no) {
2203 lock = previous;
2204 }
2205 else {
2206 while (previous->hash &&
2207 (previous->hash->un_member.rec_lock.space != space ||
2208 previous->hash->un_member.rec_lock.page_no != page_no)) {
2209 previous = previous->hash;
2210 }
2211 lock = previous->hash;
2212 }
2213
2214 ut_ad(previous->hash == lock || previous == lock);
2215 /* Grant locks if there are no conflicting locks ahead.
2216 Move granted locks to the head of the list. */
2217 while (lock) {
2218 /* If the lock is a wait lock on this page, and it does not need to wait. */
2219 if (lock_get_wait(lock)
2220 && lock->un_member.rec_lock.space == space
2221 && lock->un_member.rec_lock.page_no == page_no
2222 && !lock_rec_has_to_wait_in_queue(lock)) {
2223 lock_grant(lock);
2224
2225 if (previous != NULL) {
2226 /* Move the lock to the head of the list. */
2227 HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
2228 lock_rec_insert_to_head(lock, rec_fold);
2229 } else {
2230 /* Already at the head of the list. */
2231 previous = lock;
2232 }
2233 /* Move on to the next lock. */
2234 lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
2235 } else {
2236 previous = lock;
2237 lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
2238 }
2239 }
2240}
2241
2242/** Remove a record lock request, waiting or granted, from the queue and
2243grant locks to other transactions in the queue if they now are entitled
2244to a lock. NOTE: all record locks contained in in_lock are removed.
2245@param[in,out] in_lock record lock */
2246static void lock_rec_dequeue_from_page(lock_t* in_lock)
2247{
2248 ulint space;
2249 ulint page_no;
2250 hash_table_t* lock_hash;
2251
2252 ut_ad(lock_mutex_own());
2253 ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2254 /* We may or may not be holding in_lock->trx->mutex here. */
2255
2256 space = in_lock->un_member.rec_lock.space;
2257 page_no = in_lock->un_member.rec_lock.page_no;
2258
2259 in_lock->index->table->n_rec_locks--;
2260
2261 lock_hash = lock_hash_get(in_lock->type_mode);
2262
2263 ulint rec_fold = lock_rec_fold(space, page_no);
2264
2265 HASH_DELETE(lock_t, hash, lock_hash, rec_fold, in_lock);
2266 UT_LIST_REMOVE(in_lock->trx->lock.trx_locks, in_lock);
2267
2268 MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2269 MONITOR_DEC(MONITOR_NUM_RECLOCK);
2270
2271 if (innodb_lock_schedule_algorithm
2272 == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS
2273 || lock_hash != lock_sys.rec_hash
2274 || thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
2275 /* Check if waiting locks in the queue can now be granted:
2276 grant locks if there are no conflicting locks ahead. Stop at
2277 the first X lock that is waiting or has been granted. */
2278
2279 for (lock_t* lock = lock_rec_get_first_on_page_addr(
2280 lock_hash, space, page_no);
2281 lock != NULL;
2282 lock = lock_rec_get_next_on_page(lock)) {
2283
2284 if (lock_get_wait(lock)
2285 && !lock_rec_has_to_wait_in_queue(lock)) {
2286 /* Grant the lock */
2287 ut_ad(lock->trx != in_lock->trx);
2288 lock_grant(lock);
2289 }
2290 }
2291 } else {
2292 lock_grant_and_move_on_page(rec_fold, space, page_no);
2293 }
2294}
2295
2296/*************************************************************//**
2297Removes a record lock request, waiting or granted, from the queue. */
2298void
2299lock_rec_discard(
2300/*=============*/
2301 lock_t* in_lock) /*!< in: record lock object: all
2302 record locks which are contained
2303 in this lock object are removed */
2304{
2305 ulint space;
2306 ulint page_no;
2307 trx_lock_t* trx_lock;
2308
2309 ut_ad(lock_mutex_own());
2310 ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2311
2312 trx_lock = &in_lock->trx->lock;
2313
2314 space = in_lock->un_member.rec_lock.space;
2315 page_no = in_lock->un_member.rec_lock.page_no;
2316
2317 in_lock->index->table->n_rec_locks--;
2318
2319 HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2320 lock_rec_fold(space, page_no), in_lock);
2321
2322 UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2323
2324 MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2325 MONITOR_DEC(MONITOR_NUM_RECLOCK);
2326}
2327
2328/*************************************************************//**
2329Removes record lock objects set on an index page which is discarded. This
2330function does not move locks, or check for waiting locks, therefore the
2331lock bitmaps must already be reset when this function is called. */
2332static
2333void
2334lock_rec_free_all_from_discard_page_low(
2335/*====================================*/
2336 ulint space,
2337 ulint page_no,
2338 hash_table_t* lock_hash)
2339{
2340 lock_t* lock;
2341 lock_t* next_lock;
2342
2343 lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2344
2345 while (lock != NULL) {
2346 ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2347 ut_ad(!lock_get_wait(lock));
2348
2349 next_lock = lock_rec_get_next_on_page(lock);
2350
2351 lock_rec_discard(lock);
2352
2353 lock = next_lock;
2354 }
2355}
2356
2357/*************************************************************//**
2358Removes record lock objects set on an index page which is discarded. This
2359function does not move locks, or check for waiting locks, therefore the
2360lock bitmaps must already be reset when this function is called. */
2361void
2362lock_rec_free_all_from_discard_page(
2363/*================================*/
2364 const buf_block_t* block) /*!< in: page to be discarded */
2365{
2366 ulint space;
2367 ulint page_no;
2368
2369 ut_ad(lock_mutex_own());
2370
2371 space = block->page.id.space();
2372 page_no = block->page.id.page_no();
2373
2374 lock_rec_free_all_from_discard_page_low(
2375 space, page_no, lock_sys.rec_hash);
2376 lock_rec_free_all_from_discard_page_low(
2377 space, page_no, lock_sys.prdt_hash);
2378 lock_rec_free_all_from_discard_page_low(
2379 space, page_no, lock_sys.prdt_page_hash);
2380}
2381
2382/*============= RECORD LOCK MOVING AND INHERITING ===================*/
2383
2384/*************************************************************//**
2385Resets the lock bits for a single record. Releases transactions waiting for
2386lock requests here. */
2387static
2388void
2389lock_rec_reset_and_release_wait_low(
2390/*================================*/
2391 hash_table_t* hash, /*!< in: hash table */
2392 const buf_block_t* block, /*!< in: buffer block containing
2393 the record */
2394 ulint heap_no)/*!< in: heap number of record */
2395{
2396 lock_t* lock;
2397
2398 ut_ad(lock_mutex_own());
2399
2400 for (lock = lock_rec_get_first(hash, block, heap_no);
2401 lock != NULL;
2402 lock = lock_rec_get_next(heap_no, lock)) {
2403
2404 if (lock_get_wait(lock)) {
2405 lock_rec_cancel(lock);
2406 } else {
2407 lock_rec_reset_nth_bit(lock, heap_no);
2408 }
2409 }
2410}
2411
2412/*************************************************************//**
2413Resets the lock bits for a single record. Releases transactions waiting for
2414lock requests here. */
2415static
2416void
2417lock_rec_reset_and_release_wait(
2418/*============================*/
2419 const buf_block_t* block, /*!< in: buffer block containing
2420 the record */
2421 ulint heap_no)/*!< in: heap number of record */
2422{
2423 lock_rec_reset_and_release_wait_low(
2424 lock_sys.rec_hash, block, heap_no);
2425
2426 lock_rec_reset_and_release_wait_low(
2427 lock_sys.prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
2428 lock_rec_reset_and_release_wait_low(
2429 lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
2430}
2431
2432/*************************************************************//**
2433Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2434of another record as gap type locks, but does not reset the lock bits of
2435the other record. Also waiting lock requests on rec are inherited as
2436GRANTED gap locks. */
2437static
2438void
2439lock_rec_inherit_to_gap(
2440/*====================*/
2441 const buf_block_t* heir_block, /*!< in: block containing the
2442 record which inherits */
2443 const buf_block_t* block, /*!< in: block containing the
2444 record from which inherited;
2445 does NOT reset the locks on
2446 this record */
2447 ulint heir_heap_no, /*!< in: heap_no of the
2448 inheriting record */
2449 ulint heap_no) /*!< in: heap_no of the
2450 donating record */
2451{
2452 lock_t* lock;
2453
2454 ut_ad(lock_mutex_own());
2455
2456 /* If srv_locks_unsafe_for_binlog is TRUE or session is using
2457 READ COMMITTED isolation level, we do not want locks set
2458 by an UPDATE or a DELETE to be inherited as gap type locks. But we
2459 DO want S-locks/X-locks(taken for replace) set by a consistency
2460 constraint to be inherited also then. */
2461
2462 for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
2463 lock != NULL;
2464 lock = lock_rec_get_next(heap_no, lock)) {
2465
2466 if (!lock_rec_get_insert_intention(lock)
2467 && !((srv_locks_unsafe_for_binlog
2468 || lock->trx->isolation_level
2469 <= TRX_ISO_READ_COMMITTED)
2470 && lock_get_mode(lock) ==
2471 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
2472 lock_rec_add_to_queue(
2473 LOCK_REC | LOCK_GAP
2474 | ulint(lock_get_mode(lock)),
2475 heir_block, heir_heap_no, lock->index,
2476 lock->trx, FALSE);
2477 }
2478 }
2479}
2480
2481/*************************************************************//**
2482Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2483of another record as gap type locks, but does not reset the lock bits of the
2484other record. Also waiting lock requests are inherited as GRANTED gap locks. */
2485static
2486void
2487lock_rec_inherit_to_gap_if_gap_lock(
2488/*================================*/
2489 const buf_block_t* block, /*!< in: buffer block */
2490 ulint heir_heap_no, /*!< in: heap_no of
2491 record which inherits */
2492 ulint heap_no) /*!< in: heap_no of record
2493 from which inherited;
2494 does NOT reset the locks
2495 on this record */
2496{
2497 lock_t* lock;
2498
2499 lock_mutex_enter();
2500
2501 for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
2502 lock != NULL;
2503 lock = lock_rec_get_next(heap_no, lock)) {
2504
2505 if (!lock_rec_get_insert_intention(lock)
2506 && (heap_no == PAGE_HEAP_NO_SUPREMUM
2507 || !lock_rec_get_rec_not_gap(lock))) {
2508
2509 lock_rec_add_to_queue(
2510 LOCK_REC | LOCK_GAP
2511 | ulint(lock_get_mode(lock)),
2512 block, heir_heap_no, lock->index,
2513 lock->trx, FALSE);
2514 }
2515 }
2516
2517 lock_mutex_exit();
2518}
2519
2520/*************************************************************//**
2521Moves the locks of a record to another record and resets the lock bits of
2522the donating record. */
2523static
2524void
2525lock_rec_move_low(
2526/*==============*/
2527 hash_table_t* lock_hash, /*!< in: hash table to use */
2528 const buf_block_t* receiver, /*!< in: buffer block containing
2529 the receiving record */
2530 const buf_block_t* donator, /*!< in: buffer block containing
2531 the donating record */
2532 ulint receiver_heap_no,/*!< in: heap_no of the record
2533 which gets the locks; there
2534 must be no lock requests
2535 on it! */
2536 ulint donator_heap_no)/*!< in: heap_no of the record
2537 which gives the locks */
2538{
2539 lock_t* lock;
2540
2541 ut_ad(lock_mutex_own());
2542
2543 /* If the lock is predicate lock, it resides on INFIMUM record */
2544 ut_ad(lock_rec_get_first(
2545 lock_hash, receiver, receiver_heap_no) == NULL
2546 || lock_hash == lock_sys.prdt_hash
2547 || lock_hash == lock_sys.prdt_page_hash);
2548
2549 for (lock = lock_rec_get_first(lock_hash,
2550 donator, donator_heap_no);
2551 lock != NULL;
2552 lock = lock_rec_get_next(donator_heap_no, lock)) {
2553
2554 const ulint type_mode = lock->type_mode;
2555
2556 lock_rec_reset_nth_bit(lock, donator_heap_no);
2557
2558 if (type_mode & LOCK_WAIT) {
2559 lock_reset_lock_and_trx_wait(lock);
2560 }
2561
2562 /* Note that we FIRST reset the bit, and then set the lock:
2563 the function works also if donator == receiver */
2564
2565 lock_rec_add_to_queue(
2566 type_mode, receiver, receiver_heap_no,
2567 lock->index, lock->trx, FALSE);
2568 }
2569
2570 ut_ad(lock_rec_get_first(lock_sys.rec_hash,
2571 donator, donator_heap_no) == NULL);
2572}
2573
2574/** Move all the granted locks to the front of the given lock list.
2575All the waiting locks will be at the end of the list.
2576@param[in,out] lock_list the given lock list. */
2577static
2578void
2579lock_move_granted_locks_to_front(
2580 UT_LIST_BASE_NODE_T(lock_t)& lock_list)
2581{
2582 lock_t* lock;
2583
2584 bool seen_waiting_lock = false;
2585
2586 for (lock = UT_LIST_GET_FIRST(lock_list); lock;
2587 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2588
2589 if (!seen_waiting_lock) {
2590 if (lock->is_waiting()) {
2591 seen_waiting_lock = true;
2592 }
2593 continue;
2594 }
2595
2596 ut_ad(seen_waiting_lock);
2597
2598 if (!lock->is_waiting()) {
2599 lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
2600 ut_a(prev);
2601 UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2602 lock = prev;
2603 }
2604 }
2605}
2606
2607/*************************************************************//**
2608Moves the locks of a record to another record and resets the lock bits of
2609the donating record. */
2610UNIV_INLINE
2611void
2612lock_rec_move(
2613/*==========*/
2614 const buf_block_t* receiver, /*!< in: buffer block containing
2615 the receiving record */
2616 const buf_block_t* donator, /*!< in: buffer block containing
2617 the donating record */
2618 ulint receiver_heap_no,/*!< in: heap_no of the record
2619 which gets the locks; there
2620 must be no lock requests
2621 on it! */
2622 ulint donator_heap_no)/*!< in: heap_no of the record
2623 which gives the locks */
2624{
2625 lock_rec_move_low(lock_sys.rec_hash, receiver, donator,
2626 receiver_heap_no, donator_heap_no);
2627}
2628
2629/*************************************************************//**
2630Updates the lock table when we have reorganized a page. NOTE: we copy
2631also the locks set on the infimum of the page; the infimum may carry
2632locks if an update of a record is occurring on the page, and its locks
2633were temporarily stored on the infimum. */
2634void
2635lock_move_reorganize_page(
2636/*======================*/
2637 const buf_block_t* block, /*!< in: old index page, now
2638 reorganized */
2639 const buf_block_t* oblock) /*!< in: copy of the old, not
2640 reorganized page */
2641{
2642 lock_t* lock;
2643 UT_LIST_BASE_NODE_T(lock_t) old_locks;
2644 mem_heap_t* heap = NULL;
2645 ulint comp;
2646
2647 lock_mutex_enter();
2648
2649 /* FIXME: This needs to deal with predicate lock too */
2650 lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block);
2651
2652 if (lock == NULL) {
2653 lock_mutex_exit();
2654
2655 return;
2656 }
2657
2658 heap = mem_heap_create(256);
2659
2660 /* Copy first all the locks on the page to heap and reset the
2661 bitmaps in the original locks; chain the copies of the locks
2662 using the trx_locks field in them. */
2663
2664 UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2665
2666 do {
2667 /* Make a copy of the lock */
2668 lock_t* old_lock = lock_rec_copy(lock, heap);
2669
2670 UT_LIST_ADD_LAST(old_locks, old_lock);
2671
2672 /* Reset bitmap of lock */
2673 lock_rec_bitmap_reset(lock);
2674
2675 if (lock_get_wait(lock)) {
2676
2677 lock_reset_lock_and_trx_wait(lock);
2678 }
2679
2680 lock = lock_rec_get_next_on_page(lock);
2681 } while (lock != NULL);
2682
2683 comp = page_is_comp(block->frame);
2684 ut_ad(comp == page_is_comp(oblock->frame));
2685
2686 lock_move_granted_locks_to_front(old_locks);
2687
2688 DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2689 UT_LIST_REVERSE(old_locks););
2690
2691 for (lock = UT_LIST_GET_FIRST(old_locks); lock;
2692 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2693
2694 /* NOTE: we copy also the locks set on the infimum and
2695 supremum of the page; the infimum may carry locks if an
2696 update of a record is occurring on the page, and its locks
2697 were temporarily stored on the infimum */
2698 const rec_t* rec1 = page_get_infimum_rec(
2699 buf_block_get_frame(block));
2700 const rec_t* rec2 = page_get_infimum_rec(
2701 buf_block_get_frame(oblock));
2702
2703 /* Set locks according to old locks */
2704 for (;;) {
2705 ulint old_heap_no;
2706 ulint new_heap_no;
2707 ut_d(const rec_t* const orec = rec1);
2708 ut_ad(page_rec_is_default_row(rec1)
2709 == page_rec_is_default_row(rec2));
2710
2711 if (comp) {
2712 old_heap_no = rec_get_heap_no_new(rec2);
2713 new_heap_no = rec_get_heap_no_new(rec1);
2714
2715 rec1 = page_rec_get_next_low(rec1, TRUE);
2716 rec2 = page_rec_get_next_low(rec2, TRUE);
2717 } else {
2718 old_heap_no = rec_get_heap_no_old(rec2);
2719 new_heap_no = rec_get_heap_no_old(rec1);
2720 ut_ad(!memcmp(rec1, rec2,
2721 rec_get_data_size_old(rec2)));
2722
2723 rec1 = page_rec_get_next_low(rec1, FALSE);
2724 rec2 = page_rec_get_next_low(rec2, FALSE);
2725 }
2726
2727 /* Clear the bit in old_lock. */
2728 if (old_heap_no < lock->un_member.rec_lock.n_bits
2729 && lock_rec_reset_nth_bit(lock, old_heap_no)) {
2730 ut_ad(!page_rec_is_default_row(orec));
2731
2732 /* NOTE that the old lock bitmap could be too
2733 small for the new heap number! */
2734
2735 lock_rec_add_to_queue(
2736 lock->type_mode, block, new_heap_no,
2737 lock->index, lock->trx, FALSE);
2738 }
2739
2740 if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2741 ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2742 break;
2743 }
2744 }
2745
2746 ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2747 }
2748
2749 lock_mutex_exit();
2750
2751 mem_heap_free(heap);
2752
2753#ifdef UNIV_DEBUG_LOCK_VALIDATE
2754 ut_ad(lock_rec_validate_page(block));
2755#endif
2756}
2757
2758/*************************************************************//**
2759Moves the explicit locks on user records to another page if a record
2760list end is moved to another page. */
2761void
2762lock_move_rec_list_end(
2763/*===================*/
2764 const buf_block_t* new_block, /*!< in: index page to move to */
2765 const buf_block_t* block, /*!< in: index page */
2766 const rec_t* rec) /*!< in: record on page: this
2767 is the first record moved */
2768{
2769 lock_t* lock;
2770 const ulint comp = page_rec_is_comp(rec);
2771
2772 ut_ad(buf_block_get_frame(block) == page_align(rec));
2773 ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2774
2775 lock_mutex_enter();
2776
2777 /* Note: when we move locks from record to record, waiting locks
2778 and possible granted gap type locks behind them are enqueued in
2779 the original order, because new elements are inserted to a hash
2780 table to the end of the hash chain, and lock_rec_add_to_queue
2781 does not reuse locks if there are waiters in the queue. */
2782
2783 for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
2784 lock = lock_rec_get_next_on_page(lock)) {
2785 const rec_t* rec1 = rec;
2786 const rec_t* rec2;
2787 const ulint type_mode = lock->type_mode;
2788
2789 if (comp) {
2790 if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
2791 rec1 = page_rec_get_next_low(rec1, TRUE);
2792 }
2793
2794 rec2 = page_rec_get_next_low(
2795 buf_block_get_frame(new_block)
2796 + PAGE_NEW_INFIMUM, TRUE);
2797 } else {
2798 if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
2799 rec1 = page_rec_get_next_low(rec1, FALSE);
2800 }
2801
2802 rec2 = page_rec_get_next_low(
2803 buf_block_get_frame(new_block)
2804 + PAGE_OLD_INFIMUM, FALSE);
2805 }
2806
2807 /* Copy lock requests on user records to new page and
2808 reset the lock bits on the old */
2809
2810 for (;;) {
2811 ut_ad(page_rec_is_default_row(rec1)
2812 == page_rec_is_default_row(rec2));
2813 ut_d(const rec_t* const orec = rec1);
2814
2815 ulint rec1_heap_no;
2816 ulint rec2_heap_no;
2817
2818 if (comp) {
2819 rec1_heap_no = rec_get_heap_no_new(rec1);
2820
2821 if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2822 break;
2823 }
2824
2825 rec2_heap_no = rec_get_heap_no_new(rec2);
2826 rec1 = page_rec_get_next_low(rec1, TRUE);
2827 rec2 = page_rec_get_next_low(rec2, TRUE);
2828 } else {
2829 rec1_heap_no = rec_get_heap_no_old(rec1);
2830
2831 if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2832 break;
2833 }
2834
2835 rec2_heap_no = rec_get_heap_no_old(rec2);
2836
2837 ut_ad(rec_get_data_size_old(rec1)
2838 == rec_get_data_size_old(rec2));
2839
2840 ut_ad(!memcmp(rec1, rec2,
2841 rec_get_data_size_old(rec1)));
2842
2843 rec1 = page_rec_get_next_low(rec1, FALSE);
2844 rec2 = page_rec_get_next_low(rec2, FALSE);
2845 }
2846
2847 if (rec1_heap_no < lock->un_member.rec_lock.n_bits
2848 && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2849 ut_ad(!page_rec_is_default_row(orec));
2850
2851 if (type_mode & LOCK_WAIT) {
2852 lock_reset_lock_and_trx_wait(lock);
2853 }
2854
2855 lock_rec_add_to_queue(
2856 type_mode, new_block, rec2_heap_no,
2857 lock->index, lock->trx, FALSE);
2858 }
2859 }
2860 }
2861
2862 lock_mutex_exit();
2863
2864#ifdef UNIV_DEBUG_LOCK_VALIDATE
2865 ut_ad(lock_rec_validate_page(block));
2866 ut_ad(lock_rec_validate_page(new_block));
2867#endif
2868}
2869
2870/*************************************************************//**
2871Moves the explicit locks on user records to another page if a record
2872list start is moved to another page. */
2873void
2874lock_move_rec_list_start(
2875/*=====================*/
2876 const buf_block_t* new_block, /*!< in: index page to
2877 move to */
2878 const buf_block_t* block, /*!< in: index page */
2879 const rec_t* rec, /*!< in: record on page:
2880 this is the first
2881 record NOT copied */
2882 const rec_t* old_end) /*!< in: old
2883 previous-to-last
2884 record on new_page
2885 before the records
2886 were copied */
2887{
2888 lock_t* lock;
2889 const ulint comp = page_rec_is_comp(rec);
2890
2891 ut_ad(block->frame == page_align(rec));
2892 ut_ad(new_block->frame == page_align(old_end));
2893 ut_ad(comp == page_rec_is_comp(old_end));
2894 ut_ad(!page_rec_is_default_row(rec));
2895
2896 lock_mutex_enter();
2897
2898 for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
2899 lock = lock_rec_get_next_on_page(lock)) {
2900 const rec_t* rec1;
2901 const rec_t* rec2;
2902 const ulint type_mode = lock->type_mode;
2903
2904 if (comp) {
2905 rec1 = page_rec_get_next_low(
2906 buf_block_get_frame(block)
2907 + PAGE_NEW_INFIMUM, TRUE);
2908 rec2 = page_rec_get_next_low(old_end, TRUE);
2909 } else {
2910 rec1 = page_rec_get_next_low(
2911 buf_block_get_frame(block)
2912 + PAGE_OLD_INFIMUM, FALSE);
2913 rec2 = page_rec_get_next_low(old_end, FALSE);
2914 }
2915
2916 /* Copy lock requests on user records to new page and
2917 reset the lock bits on the old */
2918
2919 while (rec1 != rec) {
2920 ut_ad(page_rec_is_default_row(rec1)
2921 == page_rec_is_default_row(rec2));
2922 ut_d(const rec_t* const prev = rec1);
2923
2924 ulint rec1_heap_no;
2925 ulint rec2_heap_no;
2926
2927 if (comp) {
2928 rec1_heap_no = rec_get_heap_no_new(rec1);
2929 rec2_heap_no = rec_get_heap_no_new(rec2);
2930
2931 rec1 = page_rec_get_next_low(rec1, TRUE);
2932 rec2 = page_rec_get_next_low(rec2, TRUE);
2933 } else {
2934 rec1_heap_no = rec_get_heap_no_old(rec1);
2935 rec2_heap_no = rec_get_heap_no_old(rec2);
2936
2937 ut_ad(!memcmp(rec1, rec2,
2938 rec_get_data_size_old(rec2)));
2939
2940 rec1 = page_rec_get_next_low(rec1, FALSE);
2941 rec2 = page_rec_get_next_low(rec2, FALSE);
2942 }
2943
2944 if (rec1_heap_no < lock->un_member.rec_lock.n_bits
2945 && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2946 ut_ad(!page_rec_is_default_row(prev));
2947
2948 if (type_mode & LOCK_WAIT) {
2949 lock_reset_lock_and_trx_wait(lock);
2950 }
2951
2952 lock_rec_add_to_queue(
2953 type_mode, new_block, rec2_heap_no,
2954 lock->index, lock->trx, FALSE);
2955 }
2956 }
2957
2958#ifdef UNIV_DEBUG
2959 if (page_rec_is_supremum(rec)) {
2960 ulint i;
2961
2962 for (i = PAGE_HEAP_NO_USER_LOW;
2963 i < lock_rec_get_n_bits(lock); i++) {
2964 if (lock_rec_get_nth_bit(lock, i)) {
2965 ib::fatal()
2966 << "lock_move_rec_list_start():"
2967 << i << " not moved in "
2968 << (void*) lock;
2969 }
2970 }
2971 }
2972#endif /* UNIV_DEBUG */
2973 }
2974
2975 lock_mutex_exit();
2976
2977#ifdef UNIV_DEBUG_LOCK_VALIDATE
2978 ut_ad(lock_rec_validate_page(block));
2979#endif
2980}
2981
2982/*************************************************************//**
2983Moves the explicit locks on user records to another page if a record
2984list start is moved to another page. */
2985void
2986lock_rtr_move_rec_list(
2987/*===================*/
2988 const buf_block_t* new_block, /*!< in: index page to
2989 move to */
2990 const buf_block_t* block, /*!< in: index page */
2991 rtr_rec_move_t* rec_move, /*!< in: recording records
2992 moved */
2993 ulint num_move) /*!< in: num of rec to move */
2994{
2995 lock_t* lock;
2996 ulint comp;
2997
2998 if (!num_move) {
2999 return;
3000 }
3001
3002 comp = page_rec_is_comp(rec_move[0].old_rec);
3003
3004 ut_ad(block->frame == page_align(rec_move[0].old_rec));
3005 ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
3006 ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
3007
3008 lock_mutex_enter();
3009
3010 for (lock = lock_rec_get_first_on_page(lock_sys.rec_hash, block); lock;
3011 lock = lock_rec_get_next_on_page(lock)) {
3012 ulint moved = 0;
3013 const rec_t* rec1;
3014 const rec_t* rec2;
3015 const ulint type_mode = lock->type_mode;
3016
3017 /* Copy lock requests on user records to new page and
3018 reset the lock bits on the old */
3019
3020 while (moved < num_move) {
3021 ulint rec1_heap_no;
3022 ulint rec2_heap_no;
3023
3024 rec1 = rec_move[moved].old_rec;
3025 rec2 = rec_move[moved].new_rec;
3026 ut_ad(!page_rec_is_default_row(rec1));
3027 ut_ad(!page_rec_is_default_row(rec2));
3028
3029 if (comp) {
3030 rec1_heap_no = rec_get_heap_no_new(rec1);
3031 rec2_heap_no = rec_get_heap_no_new(rec2);
3032
3033 } else {
3034 rec1_heap_no = rec_get_heap_no_old(rec1);
3035 rec2_heap_no = rec_get_heap_no_old(rec2);
3036
3037 ut_ad(!memcmp(rec1, rec2,
3038 rec_get_data_size_old(rec2)));
3039 }
3040
3041 if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3042 && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3043 if (type_mode & LOCK_WAIT) {
3044 lock_reset_lock_and_trx_wait(lock);
3045 }
3046
3047 lock_rec_add_to_queue(
3048 type_mode, new_block, rec2_heap_no,
3049 lock->index, lock->trx, FALSE);
3050
3051 rec_move[moved].moved = true;
3052 }
3053
3054 moved++;
3055 }
3056 }
3057
3058 lock_mutex_exit();
3059
3060#ifdef UNIV_DEBUG_LOCK_VALIDATE
3061 ut_ad(lock_rec_validate_page(block));
3062#endif
3063}
3064/*************************************************************//**
3065Updates the lock table when a page is split to the right. */
3066void
3067lock_update_split_right(
3068/*====================*/
3069 const buf_block_t* right_block, /*!< in: right page */
3070 const buf_block_t* left_block) /*!< in: left page */
3071{
3072 ulint heap_no = lock_get_min_heap_no(right_block);
3073
3074 lock_mutex_enter();
3075
3076 /* Move the locks on the supremum of the left page to the supremum
3077 of the right page */
3078
3079 lock_rec_move(right_block, left_block,
3080 PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3081
3082 /* Inherit the locks to the supremum of left page from the successor
3083 of the infimum on right page */
3084
3085 lock_rec_inherit_to_gap(left_block, right_block,
3086 PAGE_HEAP_NO_SUPREMUM, heap_no);
3087
3088 lock_mutex_exit();
3089}
3090
3091/*************************************************************//**
3092Updates the lock table when a page is merged to the right. */
3093void
3094lock_update_merge_right(
3095/*====================*/
3096 const buf_block_t* right_block, /*!< in: right page to
3097 which merged */
3098 const rec_t* orig_succ, /*!< in: original
3099 successor of infimum
3100 on the right page
3101 before merge */
3102 const buf_block_t* left_block) /*!< in: merged index
3103 page which will be
3104 discarded */
3105{
3106 ut_ad(!page_rec_is_default_row(orig_succ));
3107
3108 lock_mutex_enter();
3109
3110 /* Inherit the locks from the supremum of the left page to the
3111 original successor of infimum on the right page, to which the left
3112 page was merged */
3113
3114 lock_rec_inherit_to_gap(right_block, left_block,
3115 page_rec_get_heap_no(orig_succ),
3116 PAGE_HEAP_NO_SUPREMUM);
3117
3118 /* Reset the locks on the supremum of the left page, releasing
3119 waiting transactions */
3120
3121 lock_rec_reset_and_release_wait_low(
3122 lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3123
3124 /* there should exist no page lock on the left page,
3125 otherwise, it will be blocked from merge */
3126 ut_ad(!lock_rec_get_first_on_page_addr(lock_sys.prdt_page_hash,
3127 left_block->page.id.space(),
3128 left_block->page.id.page_no()));
3129
3130 lock_rec_free_all_from_discard_page(left_block);
3131
3132 lock_mutex_exit();
3133}
3134
3135/*************************************************************//**
3136Updates the lock table when the root page is copied to another in
3137btr_root_raise_and_insert. Note that we leave lock structs on the
3138root page, even though they do not make sense on other than leaf
3139pages: the reason is that in a pessimistic update the infimum record
3140of the root page will act as a dummy carrier of the locks of the record
3141to be updated. */
3142void
3143lock_update_root_raise(
3144/*===================*/
3145 const buf_block_t* block, /*!< in: index page to which copied */
3146 const buf_block_t* root) /*!< in: root page */
3147{
3148 lock_mutex_enter();
3149
3150 /* Move the locks on the supremum of the root to the supremum
3151 of block */
3152
3153 lock_rec_move(block, root,
3154 PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3155 lock_mutex_exit();
3156}
3157
3158/*************************************************************//**
3159Updates the lock table when a page is copied to another and the original page
3160is removed from the chain of leaf pages, except if page is the root! */
3161void
3162lock_update_copy_and_discard(
3163/*=========================*/
3164 const buf_block_t* new_block, /*!< in: index page to
3165 which copied */
3166 const buf_block_t* block) /*!< in: index page;
3167 NOT the root! */
3168{
3169 lock_mutex_enter();
3170
3171 /* Move the locks on the supremum of the old page to the supremum
3172 of new_page */
3173
3174 lock_rec_move(new_block, block,
3175 PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3176 lock_rec_free_all_from_discard_page(block);
3177
3178 lock_mutex_exit();
3179}
3180
3181/*************************************************************//**
3182Updates the lock table when a page is split to the left. */
3183void
3184lock_update_split_left(
3185/*===================*/
3186 const buf_block_t* right_block, /*!< in: right page */
3187 const buf_block_t* left_block) /*!< in: left page */
3188{
3189 ulint heap_no = lock_get_min_heap_no(right_block);
3190
3191 lock_mutex_enter();
3192
3193 /* Inherit the locks to the supremum of the left page from the
3194 successor of the infimum on the right page */
3195
3196 lock_rec_inherit_to_gap(left_block, right_block,
3197 PAGE_HEAP_NO_SUPREMUM, heap_no);
3198
3199 lock_mutex_exit();
3200}
3201
3202/*************************************************************//**
3203Updates the lock table when a page is merged to the left. */
3204void
3205lock_update_merge_left(
3206/*===================*/
3207 const buf_block_t* left_block, /*!< in: left page to
3208 which merged */
3209 const rec_t* orig_pred, /*!< in: original predecessor
3210 of supremum on the left page
3211 before merge */
3212 const buf_block_t* right_block) /*!< in: merged index page
3213 which will be discarded */
3214{
3215 const rec_t* left_next_rec;
3216
3217 ut_ad(left_block->frame == page_align(orig_pred));
3218
3219 lock_mutex_enter();
3220
3221 left_next_rec = page_rec_get_next_const(orig_pred);
3222
3223 if (!page_rec_is_supremum(left_next_rec)) {
3224
3225 /* Inherit the locks on the supremum of the left page to the
3226 first record which was moved from the right page */
3227
3228 lock_rec_inherit_to_gap(left_block, left_block,
3229 page_rec_get_heap_no(left_next_rec),
3230 PAGE_HEAP_NO_SUPREMUM);
3231
3232 /* Reset the locks on the supremum of the left page,
3233 releasing waiting transactions */
3234
3235 lock_rec_reset_and_release_wait_low(
3236 lock_sys.rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3237 }
3238
3239 /* Move the locks from the supremum of right page to the supremum
3240 of the left page */
3241
3242 lock_rec_move(left_block, right_block,
3243 PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3244
3245 /* there should exist no page lock on the right page,
3246 otherwise, it will be blocked from merge */
3247 ut_ad(!lock_rec_get_first_on_page_addr(
3248 lock_sys.prdt_page_hash,
3249 right_block->page.id.space(),
3250 right_block->page.id.page_no()));
3251
3252 lock_rec_free_all_from_discard_page(right_block);
3253
3254 lock_mutex_exit();
3255}
3256
3257/*************************************************************//**
3258Resets the original locks on heir and replaces them with gap type locks
3259inherited from rec. */
3260void
3261lock_rec_reset_and_inherit_gap_locks(
3262/*=================================*/
3263 const buf_block_t* heir_block, /*!< in: block containing the
3264 record which inherits */
3265 const buf_block_t* block, /*!< in: block containing the
3266 record from which inherited;
3267 does NOT reset the locks on
3268 this record */
3269 ulint heir_heap_no, /*!< in: heap_no of the
3270 inheriting record */
3271 ulint heap_no) /*!< in: heap_no of the
3272 donating record */
3273{
3274 lock_mutex_enter();
3275
3276 lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3277
3278 lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3279
3280 lock_mutex_exit();
3281}
3282
3283/*************************************************************//**
3284Updates the lock table when a page is discarded. */
3285void
3286lock_update_discard(
3287/*================*/
3288 const buf_block_t* heir_block, /*!< in: index page
3289 which will inherit the locks */
3290 ulint heir_heap_no, /*!< in: heap_no of the record
3291 which will inherit the locks */
3292 const buf_block_t* block) /*!< in: index page
3293 which will be discarded */
3294{
3295 const page_t* page = block->frame;
3296 const rec_t* rec;
3297 ulint heap_no;
3298
3299 lock_mutex_enter();
3300
3301 if (!lock_rec_get_first_on_page(lock_sys.rec_hash, block)
3302 && (!lock_rec_get_first_on_page(lock_sys.prdt_hash, block))) {
3303 /* No locks exist on page, nothing to do */
3304
3305 lock_mutex_exit();
3306
3307 return;
3308 }
3309
3310 /* Inherit all the locks on the page to the record and reset all
3311 the locks on the page */
3312
3313 if (page_is_comp(page)) {
3314 rec = page + PAGE_NEW_INFIMUM;
3315
3316 do {
3317 heap_no = rec_get_heap_no_new(rec);
3318
3319 lock_rec_inherit_to_gap(heir_block, block,
3320 heir_heap_no, heap_no);
3321
3322 lock_rec_reset_and_release_wait(block, heap_no);
3323
3324 rec = page + rec_get_next_offs(rec, TRUE);
3325 } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3326 } else {
3327 rec = page + PAGE_OLD_INFIMUM;
3328
3329 do {
3330 heap_no = rec_get_heap_no_old(rec);
3331
3332 lock_rec_inherit_to_gap(heir_block, block,
3333 heir_heap_no, heap_no);
3334
3335 lock_rec_reset_and_release_wait(block, heap_no);
3336
3337 rec = page + rec_get_next_offs(rec, FALSE);
3338 } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3339 }
3340
3341 lock_rec_free_all_from_discard_page(block);
3342
3343 lock_mutex_exit();
3344}
3345
3346/*************************************************************//**
3347Updates the lock table when a new user record is inserted. */
3348void
3349lock_update_insert(
3350/*===============*/
3351 const buf_block_t* block, /*!< in: buffer block containing rec */
3352 const rec_t* rec) /*!< in: the inserted record */
3353{
3354 ulint receiver_heap_no;
3355 ulint donator_heap_no;
3356
3357 ut_ad(block->frame == page_align(rec));
3358 ut_ad(!page_rec_is_default_row(rec));
3359
3360 /* Inherit the gap-locking locks for rec, in gap mode, from the next
3361 record */
3362
3363 if (page_rec_is_comp(rec)) {
3364 receiver_heap_no = rec_get_heap_no_new(rec);
3365 donator_heap_no = rec_get_heap_no_new(
3366 page_rec_get_next_low(rec, TRUE));
3367 } else {
3368 receiver_heap_no = rec_get_heap_no_old(rec);
3369 donator_heap_no = rec_get_heap_no_old(
3370 page_rec_get_next_low(rec, FALSE));
3371 }
3372
3373 lock_rec_inherit_to_gap_if_gap_lock(
3374 block, receiver_heap_no, donator_heap_no);
3375}
3376
3377/*************************************************************//**
3378Updates the lock table when a record is removed. */
3379void
3380lock_update_delete(
3381/*===============*/
3382 const buf_block_t* block, /*!< in: buffer block containing rec */
3383 const rec_t* rec) /*!< in: the record to be removed */
3384{
3385 const page_t* page = block->frame;
3386 ulint heap_no;
3387 ulint next_heap_no;
3388
3389 ut_ad(page == page_align(rec));
3390 ut_ad(!page_rec_is_default_row(rec));
3391
3392 if (page_is_comp(page)) {
3393 heap_no = rec_get_heap_no_new(rec);
3394 next_heap_no = rec_get_heap_no_new(page
3395 + rec_get_next_offs(rec,
3396 TRUE));
3397 } else {
3398 heap_no = rec_get_heap_no_old(rec);
3399 next_heap_no = rec_get_heap_no_old(page
3400 + rec_get_next_offs(rec,
3401 FALSE));
3402 }
3403
3404 lock_mutex_enter();
3405
3406 /* Let the next record inherit the locks from rec, in gap mode */
3407
3408 lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3409
3410 /* Reset the lock bits on rec and release waiting transactions */
3411
3412 lock_rec_reset_and_release_wait(block, heap_no);
3413
3414 lock_mutex_exit();
3415}
3416
3417/*********************************************************************//**
3418Stores on the page infimum record the explicit locks of another record.
3419This function is used to store the lock state of a record when it is
3420updated and the size of the record changes in the update. The record
3421is moved in such an update, perhaps to another page. The infimum record
3422acts as a dummy carrier record, taking care of lock releases while the
3423actual record is being moved. */
3424void
3425lock_rec_store_on_page_infimum(
3426/*===========================*/
3427 const buf_block_t* block, /*!< in: buffer block containing rec */
3428 const rec_t* rec) /*!< in: record whose lock state
3429 is stored on the infimum
3430 record of the same page; lock
3431 bits are reset on the
3432 record */
3433{
3434 ulint heap_no = page_rec_get_heap_no(rec);
3435
3436 ut_ad(block->frame == page_align(rec));
3437
3438 lock_mutex_enter();
3439
3440 lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3441
3442 lock_mutex_exit();
3443}
3444
3445/*********************************************************************//**
3446Restores the state of explicit lock requests on a single record, where the
3447state was stored on the infimum of the page. */
3448void
3449lock_rec_restore_from_page_infimum(
3450/*===============================*/
3451 const buf_block_t* block, /*!< in: buffer block containing rec */
3452 const rec_t* rec, /*!< in: record whose lock state
3453 is restored */
3454 const buf_block_t* donator)/*!< in: page (rec is not
3455 necessarily on this page)
3456 whose infimum stored the lock
3457 state; lock bits are reset on
3458 the infimum */
3459{
3460 ulint heap_no = page_rec_get_heap_no(rec);
3461
3462 lock_mutex_enter();
3463
3464 lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3465
3466 lock_mutex_exit();
3467}
3468
3469/*========================= TABLE LOCKS ==============================*/
3470
3471/** Functor for accessing the embedded node within a table lock. */
3472struct TableLockGetNode {
3473 ut_list_node<lock_t>& operator() (lock_t& elem)
3474 {
3475 return(elem.un_member.tab_lock.locks);
3476 }
3477};
3478
3479/*********************************************************************//**
3480Creates a table lock object and adds it as the last in the lock queue
3481of the table. Does NOT check for deadlocks or lock compatibility.
3482@return own: new lock object */
3483UNIV_INLINE
3484lock_t*
3485lock_table_create(
3486/*==============*/
3487 dict_table_t* table, /*!< in/out: database table
3488 in dictionary cache */
3489 ulint type_mode,/*!< in: lock mode possibly ORed with
3490 LOCK_WAIT */
3491 trx_t* trx /*!< in: trx */
3492#ifdef WITH_WSREP
3493 , lock_t* c_lock = NULL /*!< in: conflicting lock */
3494#endif
3495 )
3496{
3497 lock_t* lock;
3498
3499 ut_ad(table && trx);
3500 ut_ad(lock_mutex_own());
3501 ut_ad(trx_mutex_own(trx));
3502
3503 check_trx_state(trx);
3504
3505 if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3506 ++table->n_waiting_or_granted_auto_inc_locks;
3507 }
3508
3509 /* For AUTOINC locking we reuse the lock instance only if
3510 there is no wait involved else we allocate the waiting lock
3511 from the transaction lock heap. */
3512 if (type_mode == LOCK_AUTO_INC) {
3513
3514 lock = table->autoinc_lock;
3515
3516 table->autoinc_trx = trx;
3517
3518 ib_vector_push(trx->autoinc_locks, &lock);
3519
3520 } else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3521 lock = trx->lock.table_pool[trx->lock.table_cached++];
3522 } else {
3523
3524 lock = static_cast<lock_t*>(
3525 mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3526
3527 }
3528
3529 lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
3530 lock->trx = trx;
3531
3532 lock->un_member.tab_lock.table = table;
3533
3534 ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
3535
3536 UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
3537
3538#ifdef WITH_WSREP
3539 if (c_lock) {
3540 if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
3541 ut_list_insert(table->locks, c_lock, lock,
3542 TableLockGetNode());
3543 if (wsrep_debug) {
3544 ib::info() << "table lock BF conflict for "
3545 << ib::hex(c_lock->trx->id)
3546 << " SQL: "
3547 << wsrep_thd_query(
3548 c_lock->trx->mysql_thd);
3549 }
3550 } else {
3551 ut_list_append(table->locks, lock, TableLockGetNode());
3552 }
3553
3554 trx_mutex_enter(c_lock->trx);
3555
3556 if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
3557 c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
3558
3559 if (wsrep_debug) {
3560 wsrep_print_wait_locks(c_lock);
3561 }
3562
3563 /* The lock release will call lock_grant(),
3564 which would acquire trx->mutex again. */
3565 trx_mutex_exit(trx);
3566 lock_cancel_waiting_and_release(
3567 c_lock->trx->lock.wait_lock);
3568 trx_mutex_enter(trx);
3569
3570 if (wsrep_debug) {
3571 ib::info() << "WSREP: c_lock canceled "
3572 << ib::hex(c_lock->trx->id)
3573 << " SQL: "
3574 << wsrep_thd_query(
3575 c_lock->trx->mysql_thd);
3576 }
3577 }
3578
3579 trx_mutex_exit(c_lock->trx);
3580 } else
3581#endif /* WITH_WSREP */
3582 ut_list_append(table->locks, lock, TableLockGetNode());
3583
3584 if (type_mode & LOCK_WAIT) {
3585
3586 lock_set_lock_and_trx_wait(lock, trx);
3587 }
3588
3589 lock->trx->lock.table_locks.push_back(lock);
3590
3591 MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3592 MONITOR_INC(MONITOR_NUM_TABLELOCK);
3593
3594 return(lock);
3595}
3596
3597/*************************************************************//**
3598Pops autoinc lock requests from the transaction's autoinc_locks. We
3599handle the case where there are gaps in the array and they need to
3600be popped off the stack. */
3601UNIV_INLINE
3602void
3603lock_table_pop_autoinc_locks(
3604/*=========================*/
3605 trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */
3606{
3607 ut_ad(lock_mutex_own());
3608 ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3609
3610 /* Skip any gaps, gaps are NULL lock entries in the
3611 trx->autoinc_locks vector. */
3612
3613 do {
3614 ib_vector_pop(trx->autoinc_locks);
3615
3616 if (ib_vector_is_empty(trx->autoinc_locks)) {
3617 return;
3618 }
3619
3620 } while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
3621}
3622
3623/*************************************************************//**
3624Removes an autoinc lock request from the transaction's autoinc_locks. */
3625UNIV_INLINE
3626void
3627lock_table_remove_autoinc_lock(
3628/*===========================*/
3629 lock_t* lock, /*!< in: table lock */
3630 trx_t* trx) /*!< in/out: transaction that owns the lock */
3631{
3632 lock_t* autoinc_lock;
3633 lint i = ib_vector_size(trx->autoinc_locks) - 1;
3634
3635 ut_ad(lock_mutex_own());
3636 ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3637 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3638 ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3639
3640 /* With stored functions and procedures the user may drop
3641 a table within the same "statement". This special case has
3642 to be handled by deleting only those AUTOINC locks that were
3643 held by the table being dropped. */
3644
3645 autoinc_lock = *static_cast<lock_t**>(
3646 ib_vector_get(trx->autoinc_locks, i));
3647
3648 /* This is the default fast case. */
3649
3650 if (autoinc_lock == lock) {
3651 lock_table_pop_autoinc_locks(trx);
3652 } else {
3653 /* The last element should never be NULL */
3654 ut_a(autoinc_lock != NULL);
3655
3656 /* Handle freeing the locks from within the stack. */
3657
3658 while (--i >= 0) {
3659 autoinc_lock = *static_cast<lock_t**>(
3660 ib_vector_get(trx->autoinc_locks, i));
3661
3662 if (autoinc_lock == lock) {
3663 void* null_var = NULL;
3664 ib_vector_set(trx->autoinc_locks, i, &null_var);
3665 return;
3666 }
3667 }
3668
3669 /* Must find the autoinc lock. */
3670 ut_error;
3671 }
3672}
3673
3674/*************************************************************//**
3675Removes a table lock request from the queue and the trx list of locks;
3676this is a low-level function which does NOT check if waiting requests
3677can now be granted. */
3678UNIV_INLINE
3679void
3680lock_table_remove_low(
3681/*==================*/
3682 lock_t* lock) /*!< in/out: table lock */
3683{
3684 trx_t* trx;
3685 dict_table_t* table;
3686
3687 ut_ad(lock_mutex_own());
3688
3689 trx = lock->trx;
3690 table = lock->un_member.tab_lock.table;
3691
3692 /* Remove the table from the transaction's AUTOINC vector, if
3693 the lock that is being released is an AUTOINC lock. */
3694 if (lock_get_mode(lock) == LOCK_AUTO_INC) {
3695
3696 /* The table's AUTOINC lock can get transferred to
3697 another transaction before we get here. */
3698 if (table->autoinc_trx == trx) {
3699 table->autoinc_trx = NULL;
3700 }
3701
3702 /* The locks must be freed in the reverse order from
3703 the one in which they were acquired. This is to avoid
3704 traversing the AUTOINC lock vector unnecessarily.
3705
3706 We only store locks that were granted in the
3707 trx->autoinc_locks vector (see lock_table_create()
3708 and lock_grant()). Therefore it can be empty and we
3709 need to check for that. */
3710
3711 if (!lock_get_wait(lock)
3712 && !ib_vector_is_empty(trx->autoinc_locks)) {
3713
3714 lock_table_remove_autoinc_lock(lock, trx);
3715 }
3716
3717 ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
3718 table->n_waiting_or_granted_auto_inc_locks--;
3719 }
3720
3721 UT_LIST_REMOVE(trx->lock.trx_locks, lock);
3722 ut_list_remove(table->locks, lock, TableLockGetNode());
3723
3724 MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3725 MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3726}
3727
3728/*********************************************************************//**
3729Enqueues a waiting request for a table lock which cannot be granted
3730immediately. Checks for deadlocks.
3731@retval DB_LOCK_WAIT if the waiting lock was enqueued
3732@retval DB_DEADLOCK if this transaction was chosen as the victim
3733@retval DB_SUCCESS if the other transaction committed or aborted */
3734static
3735dberr_t
3736lock_table_enqueue_waiting(
3737/*=======================*/
3738 ulint mode, /*!< in: lock mode this transaction is
3739 requesting */
3740 dict_table_t* table, /*!< in/out: table */
3741 que_thr_t* thr /*!< in: query thread */
3742#ifdef WITH_WSREP
3743 , lock_t* c_lock /*!< in: conflicting lock or NULL */
3744#endif
3745)
3746{
3747 trx_t* trx;
3748 lock_t* lock;
3749
3750 ut_ad(lock_mutex_own());
3751 ut_ad(!srv_read_only_mode);
3752
3753 trx = thr_get_trx(thr);
3754 ut_ad(trx_mutex_own(trx));
3755 ut_a(!que_thr_stop(thr));
3756
3757 switch (trx_get_dict_operation(trx)) {
3758 case TRX_DICT_OP_NONE:
3759 break;
3760 case TRX_DICT_OP_TABLE:
3761 case TRX_DICT_OP_INDEX:
3762 ib::error() << "A table lock wait happens in a dictionary"
3763 " operation. Table " << table->name
3764 << ". " << BUG_REPORT_MSG;
3765 ut_ad(0);
3766 }
3767
3768#ifdef WITH_WSREP
3769 if (trx->lock.was_chosen_as_deadlock_victim) {
3770 return(DB_DEADLOCK);
3771 }
3772#endif /* WITH_WSREP */
3773
3774 /* Enqueue the lock request that will wait to be granted */
3775 lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx
3776#ifdef WITH_WSREP
3777 , c_lock
3778#endif
3779 );
3780
3781 const trx_t* victim_trx =
3782 DeadlockChecker::check_and_resolve(lock, trx);
3783
3784 if (victim_trx != 0) {
3785 ut_ad(victim_trx == trx);
3786
3787 /* The order here is important, we don't want to
3788 lose the state of the lock before calling remove. */
3789 lock_table_remove_low(lock);
3790 lock_reset_lock_and_trx_wait(lock);
3791
3792 return(DB_DEADLOCK);
3793
3794 } else if (trx->lock.wait_lock == NULL) {
3795 /* Deadlock resolution chose another transaction as a victim,
3796 and we accidentally got our lock granted! */
3797
3798 return(DB_SUCCESS);
3799 }
3800
3801 trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3802
3803 trx->lock.wait_started = ut_time();
3804 trx->lock.was_chosen_as_deadlock_victim = false;
3805
3806 ut_a(que_thr_stop(thr));
3807
3808 MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3809
3810 return(DB_LOCK_WAIT);
3811}
3812
3813/*********************************************************************//**
3814Checks if other transactions have an incompatible mode lock request in
3815the lock queue.
3816@return lock or NULL */
3817UNIV_INLINE
3818lock_t*
3819lock_table_other_has_incompatible(
3820/*==============================*/
3821 const trx_t* trx, /*!< in: transaction, or NULL if all
3822 transactions should be included */
3823 ulint wait, /*!< in: LOCK_WAIT if also
3824 waiting locks are taken into
3825 account, or 0 if not */
3826 const dict_table_t* table, /*!< in: table */
3827 lock_mode mode) /*!< in: lock mode */
3828{
3829 lock_t* lock;
3830
3831 ut_ad(lock_mutex_own());
3832
3833 for (lock = UT_LIST_GET_LAST(table->locks);
3834 lock != NULL;
3835 lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
3836
3837 if (lock->trx != trx
3838 && !lock_mode_compatible(lock_get_mode(lock), mode)
3839 && (wait || !lock_get_wait(lock))) {
3840
3841#ifdef WITH_WSREP
3842 if (wsrep_on(lock->trx->mysql_thd)) {
3843 if (wsrep_debug) {
3844 ib::info() << "WSREP: table lock abort for table:"
3845 << table->name.m_name;
3846 ib::info() << " SQL: "
3847 << wsrep_thd_query(lock->trx->mysql_thd);
3848 }
3849 trx_mutex_enter(lock->trx);
3850 wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
3851 trx_mutex_exit(lock->trx);
3852 }
3853#endif /* WITH_WSREP */
3854
3855 return(lock);
3856 }
3857 }
3858
3859 return(NULL);
3860}
3861
3862/*********************************************************************//**
3863Locks the specified database table in the mode given. If the lock cannot
3864be granted immediately, the query thread is put to wait.
3865@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
3866dberr_t
3867lock_table(
3868/*=======*/
3869 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
3870 does nothing */
3871 dict_table_t* table, /*!< in/out: database table
3872 in dictionary cache */
3873 lock_mode mode, /*!< in: lock mode */
3874 que_thr_t* thr) /*!< in: query thread */
3875{
3876 trx_t* trx;
3877 dberr_t err;
3878 lock_t* wait_for;
3879
3880 ut_ad(table && thr);
3881
3882 /* Given limited visibility of temp-table we can avoid
3883 locking overhead */
3884 if ((flags & BTR_NO_LOCKING_FLAG)
3885 || srv_read_only_mode
3886 || table->is_temporary()) {
3887
3888 return(DB_SUCCESS);
3889 }
3890
3891 ut_a(flags == 0);
3892
3893 trx = thr_get_trx(thr);
3894
3895 /* Look for equal or stronger locks the same trx already
3896 has on the table. No need to acquire the lock mutex here
3897 because only this transacton can add/access table locks
3898 to/from trx_t::table_locks. */
3899
3900 if (lock_table_has(trx, table, mode)) {
3901
3902 return(DB_SUCCESS);
3903 }
3904
3905 /* Read only transactions can write to temp tables, we don't want
3906 to promote them to RW transactions. Their updates cannot be visible
3907 to other transactions. Therefore we can keep them out
3908 of the read views. */
3909
3910 if ((mode == LOCK_IX || mode == LOCK_X)
3911 && !trx->read_only
3912 && trx->rsegs.m_redo.rseg == 0) {
3913
3914 trx_set_rw_mode(trx);
3915 }
3916
3917 lock_mutex_enter();
3918
3919 DBUG_EXECUTE_IF("fatal-semaphore-timeout",
3920 { os_thread_sleep(3600000000LL); });
3921
3922 /* We have to check if the new lock is compatible with any locks
3923 other transactions have in the table lock queue. */
3924
3925 wait_for = lock_table_other_has_incompatible(
3926 trx, LOCK_WAIT, table, mode);
3927
3928 trx_mutex_enter(trx);
3929
3930 /* Another trx has a request on the table in an incompatible
3931 mode: this trx may have to wait */
3932
3933 if (wait_for != NULL) {
3934 err = lock_table_enqueue_waiting(ulint(mode) | flags, table,
3935 thr
3936#ifdef WITH_WSREP
3937 , wait_for
3938#endif
3939 );
3940 } else {
3941 lock_table_create(table, ulint(mode) | flags, trx);
3942
3943 ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
3944
3945 err = DB_SUCCESS;
3946 }
3947
3948 lock_mutex_exit();
3949
3950 trx_mutex_exit(trx);
3951
3952 return(err);
3953}
3954
3955/*********************************************************************//**
3956Creates a table IX lock object for a resurrected transaction. */
3957void
3958lock_table_ix_resurrect(
3959/*====================*/
3960 dict_table_t* table, /*!< in/out: table */
3961 trx_t* trx) /*!< in/out: transaction */
3962{
3963 ut_ad(trx->is_recovered);
3964
3965 if (lock_table_has(trx, table, LOCK_IX)) {
3966 return;
3967 }
3968
3969 lock_mutex_enter();
3970
3971 /* We have to check if the new lock is compatible with any locks
3972 other transactions have in the table lock queue. */
3973
3974 ut_ad(!lock_table_other_has_incompatible(
3975 trx, LOCK_WAIT, table, LOCK_IX));
3976
3977 trx_mutex_enter(trx);
3978 lock_table_create(table, LOCK_IX, trx);
3979 lock_mutex_exit();
3980 trx_mutex_exit(trx);
3981}
3982
3983/*********************************************************************//**
3984Checks if a waiting table lock request still has to wait in a queue.
3985@return TRUE if still has to wait */
3986static
3987bool
3988lock_table_has_to_wait_in_queue(
3989/*============================*/
3990 const lock_t* wait_lock) /*!< in: waiting table lock */
3991{
3992 const dict_table_t* table;
3993 const lock_t* lock;
3994
3995 ut_ad(lock_mutex_own());
3996 ut_ad(lock_get_wait(wait_lock));
3997
3998 table = wait_lock->un_member.tab_lock.table;
3999
4000 for (lock = UT_LIST_GET_FIRST(table->locks);
4001 lock != wait_lock;
4002 lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4003
4004 if (lock_has_to_wait(wait_lock, lock)) {
4005
4006 return(true);
4007 }
4008 }
4009
4010 return(false);
4011}
4012
4013/*************************************************************//**
4014Removes a table lock request, waiting or granted, from the queue and grants
4015locks to other transactions in the queue, if they now are entitled to a
4016lock. */
4017static
4018void
4019lock_table_dequeue(
4020/*===============*/
4021 lock_t* in_lock)/*!< in/out: table lock object; transactions waiting
4022 behind will get their lock requests granted, if
4023 they are now qualified to it */
4024{
4025 ut_ad(lock_mutex_own());
4026 ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
4027
4028 lock_t* lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
4029
4030 lock_table_remove_low(in_lock);
4031
4032 /* Check if waiting locks in the queue can now be granted: grant
4033 locks if there are no conflicting locks ahead. */
4034
4035 for (/* No op */;
4036 lock != NULL;
4037 lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4038
4039 if (lock_get_wait(lock)
4040 && !lock_table_has_to_wait_in_queue(lock)) {
4041
4042 /* Grant the lock */
4043 ut_ad(in_lock->trx != lock->trx);
4044 lock_grant(lock);
4045 }
4046 }
4047}
4048
4049/** Sets a lock on a table based on the given mode.
4050@param[in] table table to lock
4051@param[in,out] trx transaction
4052@param[in] mode LOCK_X or LOCK_S
4053@return error code or DB_SUCCESS. */
4054dberr_t
4055lock_table_for_trx(
4056 dict_table_t* table,
4057 trx_t* trx,
4058 enum lock_mode mode)
4059{
4060 mem_heap_t* heap;
4061 que_thr_t* thr;
4062 dberr_t err;
4063 sel_node_t* node;
4064 heap = mem_heap_create(512);
4065
4066 node = sel_node_create(heap);
4067 thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
4068 thr->graph->state = QUE_FORK_ACTIVE;
4069
4070 /* We use the select query graph as the dummy graph needed
4071 in the lock module call */
4072
4073 thr = static_cast<que_thr_t*>(
4074 que_fork_get_first_thr(
4075 static_cast<que_fork_t*>(que_node_get_parent(thr))));
4076
4077 que_thr_move_to_run_state_for_mysql(thr, trx);
4078
4079run_again:
4080 thr->run_node = thr;
4081 thr->prev_node = thr->common.parent;
4082
4083 err = lock_table(0, table, mode, thr);
4084
4085 trx->error_state = err;
4086
4087 if (UNIV_LIKELY(err == DB_SUCCESS)) {
4088 que_thr_stop_for_mysql_no_error(thr, trx);
4089 } else {
4090 que_thr_stop_for_mysql(thr);
4091
4092 if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4093 goto run_again;
4094 }
4095 }
4096
4097 que_graph_free(thr->graph);
4098 trx->op_info = "";
4099
4100 return(err);
4101}
4102
4103/*=========================== LOCK RELEASE ==============================*/
4104static
4105void
4106lock_grant_and_move_on_rec(
4107 hash_table_t* lock_hash,
4108 lock_t* first_lock,
4109 ulint heap_no)
4110{
4111 lock_t* lock;
4112 lock_t* previous;
4113 ulint space;
4114 ulint page_no;
4115 ulint rec_fold;
4116
4117 space = first_lock->un_member.rec_lock.space;
4118 page_no = first_lock->un_member.rec_lock.page_no;
4119 rec_fold = lock_rec_fold(space, page_no);
4120
4121 previous = (lock_t *) hash_get_nth_cell(lock_hash,
4122 hash_calc_hash(rec_fold, lock_hash))->node;
4123 if (previous == NULL) {
4124 return;
4125 }
4126 if (previous == first_lock) {
4127 lock = previous;
4128 } else {
4129 while (previous->hash &&
4130 previous->hash != first_lock) {
4131 previous = previous->hash;
4132 }
4133 lock = previous->hash;
4134 }
4135 /* Grant locks if there are no conflicting locks ahead.
4136 Move granted locks to the head of the list. */
4137 for (;lock != NULL;) {
4138
4139 /* If the lock is a wait lock on this page, and it does not need to wait. */
4140 if (lock->un_member.rec_lock.space == space
4141 && lock->un_member.rec_lock.page_no == page_no
4142 && lock_rec_get_nth_bit(lock, heap_no)
4143 && lock_get_wait(lock)
4144 && !lock_rec_has_to_wait_in_queue(lock)) {
4145
4146 lock_grant(lock);
4147
4148 if (previous != NULL) {
4149 /* Move the lock to the head of the list. */
4150 HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
4151 lock_rec_insert_to_head(lock, rec_fold);
4152 } else {
4153 /* Already at the head of the list. */
4154 previous = lock;
4155 }
4156 /* Move on to the next lock. */
4157 lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
4158 } else {
4159 previous = lock;
4160 lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
4161 }
4162 }
4163}
4164
4165/*************************************************************//**
4166Removes a granted record lock of a transaction from the queue and grants
4167locks to other transactions waiting in the queue if they now are entitled
4168to a lock. */
4169void
4170lock_rec_unlock(
4171/*============*/
4172 trx_t* trx, /*!< in/out: transaction that has
4173 set a record lock */
4174 const buf_block_t* block, /*!< in: buffer block containing rec */
4175 const rec_t* rec, /*!< in: record */
4176 lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
4177{
4178 lock_t* first_lock;
4179 lock_t* lock;
4180 ulint heap_no;
4181
4182 ut_ad(trx);
4183 ut_ad(rec);
4184 ut_ad(block->frame == page_align(rec));
4185 ut_ad(!trx->lock.wait_lock);
4186 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
4187 ut_ad(!page_rec_is_default_row(rec));
4188
4189 heap_no = page_rec_get_heap_no(rec);
4190
4191 lock_mutex_enter();
4192 trx_mutex_enter(trx);
4193
4194 first_lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
4195
4196 /* Find the last lock with the same lock_mode and transaction
4197 on the record. */
4198
4199 for (lock = first_lock; lock != NULL;
4200 lock = lock_rec_get_next(heap_no, lock)) {
4201 if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
4202 goto released;
4203 }
4204 }
4205
4206 lock_mutex_exit();
4207 trx_mutex_exit(trx);
4208
4209 {
4210 ib::error err;
4211 err << "Unlock row could not find a " << lock_mode
4212 << " mode lock on the record. Current statement: ";
4213 size_t stmt_len;
4214 if (const char* stmt = innobase_get_stmt_unsafe(
4215 trx->mysql_thd, &stmt_len)) {
4216 err.write(stmt, stmt_len);
4217 }
4218 }
4219
4220 return;
4221
4222released:
4223 ut_a(!lock_get_wait(lock));
4224 lock_rec_reset_nth_bit(lock, heap_no);
4225
4226 if (innodb_lock_schedule_algorithm
4227 == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
4228 thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
4229
4230 /* Check if we can now grant waiting lock requests */
4231
4232 for (lock = first_lock; lock != NULL;
4233 lock = lock_rec_get_next(heap_no, lock)) {
4234 if (lock_get_wait(lock)
4235 && !lock_rec_has_to_wait_in_queue(lock)) {
4236
4237 /* Grant the lock */
4238 ut_ad(trx != lock->trx);
4239 lock_grant(lock);
4240 }
4241 }
4242 } else {
4243 lock_grant_and_move_on_rec(lock_sys.rec_hash, first_lock, heap_no);
4244 }
4245
4246 lock_mutex_exit();
4247 trx_mutex_exit(trx);
4248}
4249
4250#ifdef UNIV_DEBUG
4251/*********************************************************************//**
4252Check if a transaction that has X or IX locks has set the dict_op
4253code correctly. */
4254static
4255void
4256lock_check_dict_lock(
4257/*==================*/
4258 const lock_t* lock) /*!< in: lock to check */
4259{
4260 if (lock_get_type_low(lock) == LOCK_REC) {
4261
4262 /* Check if the transcation locked a record
4263 in a system table in X mode. It should have set
4264 the dict_op code correctly if it did. */
4265 if (lock->index->table->id < DICT_HDR_FIRST_ID
4266 && lock_get_mode(lock) == LOCK_X) {
4267
4268 ut_ad(lock_get_mode(lock) != LOCK_IX);
4269 ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4270 }
4271 } else {
4272 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4273
4274 const dict_table_t* table;
4275
4276 table = lock->un_member.tab_lock.table;
4277
4278 /* Check if the transcation locked a system table
4279 in IX mode. It should have set the dict_op code
4280 correctly if it did. */
4281 if (table->id < DICT_HDR_FIRST_ID
4282 && (lock_get_mode(lock) == LOCK_X
4283 || lock_get_mode(lock) == LOCK_IX)) {
4284
4285 ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4286 }
4287 }
4288}
4289#endif /* UNIV_DEBUG */
4290
4291/*********************************************************************//**
4292Releases transaction locks, and releases possible other transactions waiting
4293because of these locks. */
4294static
4295void
4296lock_release(
4297/*=========*/
4298 trx_t* trx) /*!< in/out: transaction */
4299{
4300 lock_t* lock;
4301 ulint count = 0;
4302 trx_id_t max_trx_id = trx_sys.get_max_trx_id();
4303
4304 ut_ad(lock_mutex_own());
4305 ut_ad(!trx_mutex_own(trx));
4306
4307 for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4308 lock != NULL;
4309 lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
4310
4311 ut_d(lock_check_dict_lock(lock));
4312
4313 if (lock_get_type_low(lock) == LOCK_REC) {
4314
4315 lock_rec_dequeue_from_page(lock);
4316 } else {
4317 dict_table_t* table;
4318
4319 table = lock->un_member.tab_lock.table;
4320
4321 if (lock_get_mode(lock) != LOCK_IS
4322 && trx->undo_no != 0) {
4323
4324 /* The trx may have modified the table. We
4325 block the use of the MySQL query cache for
4326 all currently active transactions. */
4327
4328 table->query_cache_inv_id = max_trx_id;
4329 }
4330
4331 lock_table_dequeue(lock);
4332 }
4333
4334 if (count == LOCK_RELEASE_INTERVAL) {
4335 /* Release the mutex for a while, so that we
4336 do not monopolize it */
4337
4338 lock_mutex_exit();
4339
4340 lock_mutex_enter();
4341
4342 count = 0;
4343 }
4344
4345 ++count;
4346 }
4347}
4348
4349/* True if a lock mode is S or X */
4350#define IS_LOCK_S_OR_X(lock) \
4351 (lock_get_mode(lock) == LOCK_S \
4352 || lock_get_mode(lock) == LOCK_X)
4353
4354/*********************************************************************//**
4355Removes table locks of the transaction on a table to be dropped. */
4356static
4357void
4358lock_trx_table_locks_remove(
4359/*========================*/
4360 const lock_t* lock_to_remove) /*!< in: lock to remove */
4361{
4362 trx_t* trx = lock_to_remove->trx;
4363
4364 ut_ad(lock_mutex_own());
4365
4366 /* It is safe to read this because we are holding the lock mutex */
4367 if (!trx->lock.cancel) {
4368 trx_mutex_enter(trx);
4369 } else {
4370 ut_ad(trx_mutex_own(trx));
4371 }
4372
4373 typedef lock_pool_t::reverse_iterator iterator;
4374
4375 iterator end = trx->lock.table_locks.rend();
4376
4377 for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4378
4379 const lock_t* lock = *it;
4380
4381 if (lock == NULL) {
4382 continue;
4383 }
4384
4385 ut_a(trx == lock->trx);
4386 ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4387 ut_a(lock->un_member.tab_lock.table != NULL);
4388
4389 if (lock == lock_to_remove) {
4390
4391 *it = NULL;
4392
4393 if (!trx->lock.cancel) {
4394 trx_mutex_exit(trx);
4395 }
4396
4397 return;
4398 }
4399 }
4400
4401 if (!trx->lock.cancel) {
4402 trx_mutex_exit(trx);
4403 }
4404
4405 /* Lock must exist in the vector. */
4406 ut_error;
4407}
4408
4409/*===================== VALIDATION AND DEBUGGING ====================*/
4410
4411/** Print info of a table lock.
4412@param[in,out] file output stream
4413@param[in] lock table lock */
4414static
4415void
4416lock_table_print(FILE* file, const lock_t* lock)
4417{
4418 ut_ad(lock_mutex_own());
4419 ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4420
4421 fputs("TABLE LOCK table ", file);
4422 ut_print_name(file, lock->trx,
4423 lock->un_member.tab_lock.table->name.m_name);
4424 fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4425
4426 if (lock_get_mode(lock) == LOCK_S) {
4427 fputs(" lock mode S", file);
4428 } else if (lock_get_mode(lock) == LOCK_X) {
4429 ut_ad(lock->trx->id != 0);
4430 fputs(" lock mode X", file);
4431 } else if (lock_get_mode(lock) == LOCK_IS) {
4432 fputs(" lock mode IS", file);
4433 } else if (lock_get_mode(lock) == LOCK_IX) {
4434 ut_ad(lock->trx->id != 0);
4435 fputs(" lock mode IX", file);
4436 } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4437 fputs(" lock mode AUTO-INC", file);
4438 } else {
4439 fprintf(file, " unknown lock mode %lu",
4440 (ulong) lock_get_mode(lock));
4441 }
4442
4443 if (lock_get_wait(lock)) {
4444 fputs(" waiting", file);
4445 }
4446
4447 putc('\n', file);
4448}
4449
4450/** Print info of a record lock.
4451@param[in,out] file output stream
4452@param[in] lock record lock */
4453static
4454void
4455lock_rec_print(FILE* file, const lock_t* lock)
4456{
4457 ulint space;
4458 ulint page_no;
4459 mtr_t mtr;
4460 mem_heap_t* heap = NULL;
4461 ulint offsets_[REC_OFFS_NORMAL_SIZE];
4462 ulint* offsets = offsets_;
4463 rec_offs_init(offsets_);
4464
4465 ut_ad(lock_mutex_own());
4466 ut_a(lock_get_type_low(lock) == LOCK_REC);
4467
4468 space = lock->un_member.rec_lock.space;
4469 page_no = lock->un_member.rec_lock.page_no;
4470
4471 fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
4472 "index %s of table ",
4473 (ulong) space, (ulong) page_no,
4474 (ulong) lock_rec_get_n_bits(lock),
4475 lock->index->name());
4476 ut_print_name(file, lock->trx, lock->index->table->name.m_name);
4477 fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4478
4479 if (lock_get_mode(lock) == LOCK_S) {
4480 fputs(" lock mode S", file);
4481 } else if (lock_get_mode(lock) == LOCK_X) {
4482 fputs(" lock_mode X", file);
4483 } else {
4484 ut_error;
4485 }
4486
4487 if (lock_rec_get_gap(lock)) {
4488 fputs(" locks gap before rec", file);
4489 }
4490
4491 if (lock_rec_get_rec_not_gap(lock)) {
4492 fputs(" locks rec but not gap", file);
4493 }
4494
4495 if (lock_rec_get_insert_intention(lock)) {
4496 fputs(" insert intention", file);
4497 }
4498
4499 if (lock_get_wait(lock)) {
4500 fputs(" waiting", file);
4501 }
4502
4503 mtr_start(&mtr);
4504
4505 putc('\n', file);
4506
4507 const buf_block_t* block;
4508
4509 block = buf_page_try_get(page_id_t(space, page_no), &mtr);
4510
4511 for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4512
4513 if (!lock_rec_get_nth_bit(lock, i)) {
4514 continue;
4515 }
4516
4517 fprintf(file, "Record lock, heap no %lu", (ulong) i);
4518
4519 if (block) {
4520 ut_ad(page_is_leaf(block->frame));
4521 const rec_t* rec;
4522
4523 rec = page_find_rec_with_heap_no(
4524 buf_block_get_frame(block), i);
4525 ut_ad(!page_rec_is_default_row(rec));
4526
4527 offsets = rec_get_offsets(
4528 rec, lock->index, offsets, true,
4529 ULINT_UNDEFINED, &heap);
4530
4531 putc(' ', file);
4532 rec_print_new(file, rec, offsets);
4533 }
4534
4535 putc('\n', file);
4536 }
4537
4538 mtr_commit(&mtr);
4539
4540 if (heap) {
4541 mem_heap_free(heap);
4542 }
4543}
4544
4545#ifdef UNIV_DEBUG
4546/* Print the number of lock structs from lock_print_info_summary() only
4547in non-production builds for performance reasons, see
4548http://bugs.mysql.com/36942 */
4549#define PRINT_NUM_OF_LOCK_STRUCTS
4550#endif /* UNIV_DEBUG */
4551
4552#ifdef PRINT_NUM_OF_LOCK_STRUCTS
4553/*********************************************************************//**
4554Calculates the number of record lock structs in the record lock hash table.
4555@return number of record locks */
4556static
4557ulint
4558lock_get_n_rec_locks(void)
4559/*======================*/
4560{
4561 ulint n_locks = 0;
4562 ulint i;
4563
4564 ut_ad(lock_mutex_own());
4565
4566 for (i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
4567 const lock_t* lock;
4568
4569 for (lock = static_cast<const lock_t*>(
4570 HASH_GET_FIRST(lock_sys.rec_hash, i));
4571 lock != 0;
4572 lock = static_cast<const lock_t*>(
4573 HASH_GET_NEXT(hash, lock))) {
4574
4575 n_locks++;
4576 }
4577 }
4578
4579 return(n_locks);
4580}
4581#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4582
4583/*********************************************************************//**
4584Prints info of locks for all transactions.
4585@return FALSE if not able to obtain lock mutex
4586and exits without printing info */
4587ibool
4588lock_print_info_summary(
4589/*====================*/
4590 FILE* file, /*!< in: file where to print */
4591 ibool nowait) /*!< in: whether to wait for the lock mutex */
4592{
4593 /* if nowait is FALSE, wait on the lock mutex,
4594 otherwise return immediately if fail to obtain the
4595 mutex. */
4596 if (!nowait) {
4597 lock_mutex_enter();
4598 } else if (lock_mutex_enter_nowait()) {
4599 fputs("FAIL TO OBTAIN LOCK MUTEX,"
4600 " SKIP LOCK INFO PRINTING\n", file);
4601 return(FALSE);
4602 }
4603
4604 if (lock_deadlock_found) {
4605 fputs("------------------------\n"
4606 "LATEST DETECTED DEADLOCK\n"
4607 "------------------------\n", file);
4608
4609 if (!srv_read_only_mode) {
4610 ut_copy_file(file, lock_latest_err_file);
4611 }
4612 }
4613
4614 fputs("------------\n"
4615 "TRANSACTIONS\n"
4616 "------------\n", file);
4617
4618 fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
4619 trx_sys.get_max_trx_id());
4620
4621 fprintf(file,
4622 "Purge done for trx's n:o < " TRX_ID_FMT
4623 " undo n:o < " TRX_ID_FMT " state: %s\n"
4624 "History list length " ULINTPF "\n",
4625 purge_sys.tail.trx_no(),
4626 purge_sys.tail.undo_no,
4627 purge_sys.enabled()
4628 ? (purge_sys.running() ? "running"
4629 : purge_sys.paused() ? "stopped" : "running but idle")
4630 : "disabled",
4631 trx_sys.history_size());
4632
4633#ifdef PRINT_NUM_OF_LOCK_STRUCTS
4634 fprintf(file,
4635 "Total number of lock structs in row lock hash table %lu\n",
4636 (ulong) lock_get_n_rec_locks());
4637#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4638 return(TRUE);
4639}
4640
4641/** Functor to print not-started transaction from the trx_list. */
4642
4643struct PrintNotStarted {
4644
4645 PrintNotStarted(FILE* file) : m_file(file) { }
4646
4647 void operator()(const trx_t* trx)
4648 {
4649 ut_ad(mutex_own(&trx_sys.mutex));
4650
4651 /* See state transitions and locking rules in trx0trx.h */
4652
4653 if (trx->mysql_thd
4654 && trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
4655
4656 fputs("---", m_file);
4657 trx_print_latched(m_file, trx, 600);
4658 }
4659 }
4660
4661 FILE* m_file;
4662};
4663
4664/** Prints transaction lock wait and MVCC state.
4665@param[in,out] file file where to print
4666@param[in] trx transaction */
4667void
4668lock_trx_print_wait_and_mvcc_state(
4669 FILE* file,
4670 const trx_t* trx)
4671{
4672 fprintf(file, "---");
4673
4674 trx_print_latched(file, trx, 600);
4675
4676 /* Note: read_view->get_state() check is race condition. But it
4677 should "kind of work" because read_view is freed only at shutdown.
4678 Worst thing that may happen is that it'll get transferred to
4679 another thread and print wrong values. */
4680
4681 if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) {
4682 trx->read_view.print_limits(file);
4683 }
4684
4685 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
4686
4687 fprintf(file,
4688 "------- TRX HAS BEEN WAITING %lu SEC"
4689 " FOR THIS LOCK TO BE GRANTED:\n",
4690 (ulong) difftime(ut_time(), trx->lock.wait_started));
4691
4692 if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
4693 lock_rec_print(file, trx->lock.wait_lock);
4694 } else {
4695 lock_table_print(file, trx->lock.wait_lock);
4696 }
4697
4698 fprintf(file, "------------------\n");
4699 }
4700}
4701
4702/*********************************************************************//**
4703Prints info of locks for a transaction. */
4704static
4705void
4706lock_trx_print_locks(
4707/*=================*/
4708 FILE* file, /*!< in/out: File to write */
4709 const trx_t* trx) /*!< in: current transaction */
4710{
4711 uint32_t i= 0;
4712 /* Iterate over the transaction's locks. */
4713 for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4714 lock != NULL;
4715 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
4716 if (lock_get_type_low(lock) == LOCK_REC) {
4717
4718 lock_rec_print(file, lock);
4719 } else {
4720 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4721
4722 lock_table_print(file, lock);
4723 }
4724
4725 if (++i == 10) {
4726
4727 fprintf(file,
4728 "10 LOCKS PRINTED FOR THIS TRX:"
4729 " SUPPRESSING FURTHER PRINTS\n");
4730
4731 break;
4732 }
4733 }
4734}
4735
4736
4737static my_bool lock_print_info_all_transactions_callback(
4738 rw_trx_hash_element_t *element, FILE *file)
4739{
4740 mutex_enter(&element->mutex);
4741 if (trx_t *trx= element->trx)
4742 {
4743 check_trx_state(trx);
4744 lock_trx_print_wait_and_mvcc_state(file, trx);
4745
4746 if (srv_print_innodb_lock_monitor)
4747 {
4748 trx->reference();
4749 mutex_exit(&element->mutex);
4750 lock_trx_print_locks(file, trx);
4751 trx->release_reference();
4752 return 0;
4753 }
4754 }
4755 mutex_exit(&element->mutex);
4756 return 0;
4757}
4758
4759
4760/*********************************************************************//**
4761Prints info of locks for each transaction. This function assumes that the
4762caller holds the lock mutex and more importantly it will release the lock
4763mutex on behalf of the caller. (This should be fixed in the future). */
4764void
4765lock_print_info_all_transactions(
4766/*=============================*/
4767 FILE* file) /*!< in/out: file where to print */
4768{
4769 ut_ad(lock_mutex_own());
4770
4771 fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4772
4773 /* First print info on non-active transactions */
4774
4775 /* NOTE: information of auto-commit non-locking read-only
4776 transactions will be omitted here. The information will be
4777 available from INFORMATION_SCHEMA.INNODB_TRX. */
4778
4779 PrintNotStarted print_not_started(file);
4780 mutex_enter(&trx_sys.mutex);
4781 ut_list_map(trx_sys.trx_list, print_not_started);
4782 mutex_exit(&trx_sys.mutex);
4783
4784 trx_sys.rw_trx_hash.iterate_no_dups(
4785 reinterpret_cast<my_hash_walk_action>
4786 (lock_print_info_all_transactions_callback), file);
4787 lock_mutex_exit();
4788
4789 ut_ad(lock_validate());
4790}
4791
4792#ifdef UNIV_DEBUG
4793/*********************************************************************//**
4794Find the the lock in the trx_t::trx_lock_t::table_locks vector.
4795@return true if found */
4796static
4797bool
4798lock_trx_table_locks_find(
4799/*======================*/
4800 trx_t* trx, /*!< in: trx to validate */
4801 const lock_t* find_lock) /*!< in: lock to find */
4802{
4803 bool found = false;
4804
4805 trx_mutex_enter(trx);
4806
4807 typedef lock_pool_t::const_reverse_iterator iterator;
4808
4809 iterator end = trx->lock.table_locks.rend();
4810
4811 for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4812
4813 const lock_t* lock = *it;
4814
4815 if (lock == NULL) {
4816
4817 continue;
4818
4819 } else if (lock == find_lock) {
4820
4821 /* Can't be duplicates. */
4822 ut_a(!found);
4823 found = true;
4824 }
4825
4826 ut_a(trx == lock->trx);
4827 ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4828 ut_a(lock->un_member.tab_lock.table != NULL);
4829 }
4830
4831 trx_mutex_exit(trx);
4832
4833 return(found);
4834}
4835
4836/*********************************************************************//**
4837Validates the lock queue on a table.
4838@return TRUE if ok */
4839static
4840ibool
4841lock_table_queue_validate(
4842/*======================*/
4843 const dict_table_t* table) /*!< in: table */
4844{
4845 const lock_t* lock;
4846
4847 ut_ad(lock_mutex_own());
4848
4849 for (lock = UT_LIST_GET_FIRST(table->locks);
4850 lock != NULL;
4851 lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4852
4853 /* Transaction state may change from ACTIVE to PREPARED.
4854 State change to COMMITTED is not possible while we are
4855 holding lock_sys.mutex: it is done by lock_trx_release_locks()
4856 under lock_sys.mutex protection.
4857 Transaction in NOT_STARTED state cannot hold locks, and
4858 lock->trx->state can only move to NOT_STARTED from COMMITTED. */
4859 check_trx_state(lock->trx);
4860
4861 if (!lock_get_wait(lock)) {
4862
4863 ut_a(!lock_table_other_has_incompatible(
4864 lock->trx, 0, table,
4865 lock_get_mode(lock)));
4866 } else {
4867
4868 ut_a(lock_table_has_to_wait_in_queue(lock));
4869 }
4870
4871 ut_a(lock_trx_table_locks_find(lock->trx, lock));
4872 }
4873
4874 return(TRUE);
4875}
4876
4877/*********************************************************************//**
4878Validates the lock queue on a single record.
4879@return TRUE if ok */
4880static
4881bool
4882lock_rec_queue_validate(
4883/*====================*/
4884 bool locked_lock_trx_sys,
4885 /*!< in: if the caller holds
4886 both the lock mutex and
4887 trx_sys_t->lock. */
4888 const buf_block_t* block, /*!< in: buffer block containing rec */
4889 const rec_t* rec, /*!< in: record to look at */
4890 const dict_index_t* index, /*!< in: index, or NULL if not known */
4891 const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
4892{
4893 const lock_t* lock;
4894 ulint heap_no;
4895
4896 ut_a(rec);
4897 ut_a(block->frame == page_align(rec));
4898 ut_ad(rec_offs_validate(rec, index, offsets));
4899 ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
4900 ut_ad(page_rec_is_leaf(rec));
4901 ut_ad(lock_mutex_own() == locked_lock_trx_sys);
4902 ut_ad(!index || dict_index_is_clust(index)
4903 || !dict_index_is_online_ddl(index));
4904
4905 heap_no = page_rec_get_heap_no(rec);
4906
4907 if (!locked_lock_trx_sys) {
4908 lock_mutex_enter();
4909 }
4910
4911 if (!page_rec_is_user_rec(rec)) {
4912
4913 for (lock = lock_rec_get_first(lock_sys.rec_hash,
4914 block, heap_no);
4915 lock != NULL;
4916 lock = lock_rec_get_next_const(heap_no, lock)) {
4917
4918 ut_ad(!trx_is_ac_nl_ro(lock->trx));
4919
4920 if (lock_get_wait(lock)) {
4921 ut_a(lock_rec_has_to_wait_in_queue(lock));
4922 }
4923
4924 if (index != NULL) {
4925 ut_a(lock->index == index);
4926 }
4927 }
4928
4929 goto func_exit;
4930 }
4931
4932 if (index == NULL) {
4933
4934 /* Nothing we can do */
4935
4936 } else if (dict_index_is_clust(index)) {
4937 /* Unlike the non-debug code, this invariant can only succeed
4938 if the check and assertion are covered by the lock mutex. */
4939
4940 const trx_t *impl_trx = trx_sys.rw_trx_hash.find(current_trx(),
4941 lock_clust_rec_some_has_impl(rec, index, offsets));
4942
4943 ut_ad(lock_mutex_own());
4944 /* impl_trx cannot be committed until lock_mutex_exit()
4945 because lock_trx_release_locks() acquires lock_sys.mutex */
4946
4947 if (!impl_trx) {
4948 } else if (const lock_t* other_lock
4949 = lock_rec_other_has_expl_req(
4950 LOCK_S, block, true, heap_no,
4951 impl_trx)) {
4952 /* The impl_trx is holding an implicit lock on the
4953 given record 'rec'. So there cannot be another
4954 explicit granted lock. Also, there can be another
4955 explicit waiting lock only if the impl_trx has an
4956 explicit granted lock. */
4957
4958#ifdef WITH_WSREP
4959 if (wsrep_on(other_lock->trx->mysql_thd)) {
4960 if (!lock_get_wait(other_lock) ) {
4961 ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
4962 ((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
4963 wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
4964 wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " <<
4965 wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
4966 wsrep_thd_query(impl_trx->mysql_thd);
4967
4968 trx_t* otrx = other_lock->trx;
4969
4970 ib::info() << "WSREP other lock:\n BF:" <<
4971 ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
4972 wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
4973 wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
4974 wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
4975 wsrep_thd_query(otrx->mysql_thd);
4976 }
4977
4978 if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4979 block, heap_no,
4980 impl_trx)) {
4981 ib::info() << "WSREP impl BF lock conflict";
4982 }
4983 } else
4984#endif /* WITH_WSREP */
4985 ut_ad(lock_get_wait(other_lock));
4986 ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4987 block, heap_no, impl_trx));
4988 }
4989 }
4990
4991 for (lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
4992 lock != NULL;
4993 lock = lock_rec_get_next_const(heap_no, lock)) {
4994
4995 ut_ad(!trx_is_ac_nl_ro(lock->trx));
4996 ut_ad(!page_rec_is_default_row(rec));
4997
4998 if (index) {
4999 ut_a(lock->index == index);
5000 }
5001
5002 if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
5003
5004 lock_mode mode;
5005
5006 if (lock_get_mode(lock) == LOCK_S) {
5007 mode = LOCK_X;
5008 } else {
5009 mode = LOCK_S;
5010 }
5011
5012 const lock_t* other_lock
5013 = lock_rec_other_has_expl_req(
5014 mode, block, false, heap_no,
5015 lock->trx);
5016#ifdef WITH_WSREP
5017 ut_a(!other_lock
5018 || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)
5019 || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
5020
5021#else
5022 ut_a(!other_lock);
5023#endif /* WITH_WSREP */
5024 } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
5025
5026 ut_a(lock_rec_has_to_wait_in_queue(lock));
5027 }
5028 }
5029
5030 ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
5031 lock_queue_validate(lock));
5032
5033func_exit:
5034 if (!locked_lock_trx_sys) {
5035 lock_mutex_exit();
5036 }
5037
5038 return(TRUE);
5039}
5040
5041/*********************************************************************//**
5042Validates the record lock queues on a page.
5043@return TRUE if ok */
5044static
5045ibool
5046lock_rec_validate_page(
5047/*===================*/
5048 const buf_block_t* block) /*!< in: buffer block */
5049{
5050 const lock_t* lock;
5051 const rec_t* rec;
5052 ulint nth_lock = 0;
5053 ulint nth_bit = 0;
5054 ulint i;
5055 mem_heap_t* heap = NULL;
5056 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5057 ulint* offsets = offsets_;
5058 rec_offs_init(offsets_);
5059
5060 ut_ad(!lock_mutex_own());
5061
5062 lock_mutex_enter();
5063loop:
5064 lock = lock_rec_get_first_on_page_addr(
5065 lock_sys.rec_hash,
5066 block->page.id.space(), block->page.id.page_no());
5067
5068 if (!lock) {
5069 goto function_exit;
5070 }
5071
5072 ut_ad(!block->page.file_page_was_freed);
5073
5074 for (i = 0; i < nth_lock; i++) {
5075
5076 lock = lock_rec_get_next_on_page_const(lock);
5077
5078 if (!lock) {
5079 goto function_exit;
5080 }
5081 }
5082
5083 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5084
5085 /* Only validate the record queues when this thread is not
5086 holding a space->latch. */
5087 if (!sync_check_find(SYNC_FSP))
5088 for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5089
5090 if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
5091
5092 rec = page_find_rec_with_heap_no(block->frame, i);
5093 ut_a(rec);
5094 ut_ad(page_rec_is_leaf(rec));
5095 offsets = rec_get_offsets(rec, lock->index, offsets,
5096 true, ULINT_UNDEFINED,
5097 &heap);
5098
5099 /* If this thread is holding the file space
5100 latch (fil_space_t::latch), the following
5101 check WILL break the latching order and may
5102 cause a deadlock of threads. */
5103
5104 lock_rec_queue_validate(
5105 TRUE, block, rec, lock->index, offsets);
5106
5107 nth_bit = i + 1;
5108
5109 goto loop;
5110 }
5111 }
5112
5113 nth_bit = 0;
5114 nth_lock++;
5115
5116 goto loop;
5117
5118function_exit:
5119 lock_mutex_exit();
5120
5121 if (heap != NULL) {
5122 mem_heap_free(heap);
5123 }
5124 return(TRUE);
5125}
5126
5127/*********************************************************************//**
5128Validate record locks up to a limit.
5129@return lock at limit or NULL if no more locks in the hash bucket */
5130static MY_ATTRIBUTE((warn_unused_result))
5131const lock_t*
5132lock_rec_validate(
5133/*==============*/
5134 ulint start, /*!< in: lock_sys.rec_hash
5135 bucket */
5136 ib_uint64_t* limit) /*!< in/out: upper limit of
5137 (space, page_no) */
5138{
5139 ut_ad(lock_mutex_own());
5140
5141 for (const lock_t* lock = static_cast<const lock_t*>(
5142 HASH_GET_FIRST(lock_sys.rec_hash, start));
5143 lock != NULL;
5144 lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
5145
5146 ib_uint64_t current;
5147
5148 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5149 ut_ad(lock_get_type(lock) == LOCK_REC);
5150
5151 current = ut_ull_create(
5152 lock->un_member.rec_lock.space,
5153 lock->un_member.rec_lock.page_no);
5154
5155 if (current > *limit) {
5156 *limit = current + 1;
5157 return(lock);
5158 }
5159 }
5160
5161 return(0);
5162}
5163
5164/*********************************************************************//**
5165Validate a record lock's block */
5166static
5167void
5168lock_rec_block_validate(
5169/*====================*/
5170 ulint space_id,
5171 ulint page_no)
5172{
5173 /* The lock and the block that it is referring to may be freed at
5174 this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
5175 If the lock exists in lock_rec_validate_page() we assert
5176 !block->page.file_page_was_freed. */
5177
5178 buf_block_t* block;
5179 mtr_t mtr;
5180
5181 /* Transactional locks should never refer to dropped
5182 tablespaces, because all DDL operations that would drop or
5183 discard or rebuild a tablespace do hold an exclusive table
5184 lock, which would conflict with any locks referring to the
5185 tablespace from other transactions. */
5186 if (fil_space_t* space = fil_space_acquire(space_id)) {
5187 dberr_t err = DB_SUCCESS;
5188 mtr_start(&mtr);
5189
5190 block = buf_page_get_gen(
5191 page_id_t(space_id, page_no),
5192 page_size_t(space->flags),
5193 RW_X_LATCH, NULL,
5194 BUF_GET_POSSIBLY_FREED,
5195 __FILE__, __LINE__, &mtr, &err);
5196
5197 if (err != DB_SUCCESS) {
5198 ib::error() << "Lock rec block validate failed for tablespace "
5199 << space->name
5200 << " space_id " << space_id
5201 << " page_no " << page_no << " err " << err;
5202 }
5203
5204 if (block) {
5205 buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5206
5207 ut_ad(lock_rec_validate_page(block));
5208 }
5209
5210 mtr_commit(&mtr);
5211
5212 space->release();
5213 }
5214}
5215
5216
5217static my_bool lock_validate_table_locks(rw_trx_hash_element_t *element, void*)
5218{
5219 ut_ad(lock_mutex_own());
5220 mutex_enter(&element->mutex);
5221 if (element->trx)
5222 {
5223 check_trx_state(element->trx);
5224 for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
5225 lock != NULL;
5226 lock= UT_LIST_GET_NEXT(trx_locks, lock))
5227 {
5228 if (lock_get_type_low(lock) & LOCK_TABLE)
5229 lock_table_queue_validate(lock->un_member.tab_lock.table);
5230 }
5231 }
5232 mutex_exit(&element->mutex);
5233 return 0;
5234}
5235
5236
5237/*********************************************************************//**
5238Validates the lock system.
5239@return TRUE if ok */
5240static
5241bool
5242lock_validate()
5243/*===========*/
5244{
5245 typedef std::pair<ulint, ulint> page_addr_t;
5246 typedef std::set<
5247 page_addr_t,
5248 std::less<page_addr_t>,
5249 ut_allocator<page_addr_t> > page_addr_set;
5250
5251 page_addr_set pages;
5252
5253 lock_mutex_enter();
5254
5255 /* Validate table locks */
5256 trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
5257 (lock_validate_table_locks), 0);
5258
5259 /* Iterate over all the record locks and validate the locks. We
5260 don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
5261 Release both mutexes during the validation check. */
5262
5263 for (ulint i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
5264 ib_uint64_t limit = 0;
5265
5266 while (const lock_t* lock = lock_rec_validate(i, &limit)) {
5267 if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) {
5268 /* The lock bitmap is empty; ignore it. */
5269 continue;
5270 }
5271 const lock_rec_t& l = lock->un_member.rec_lock;
5272 pages.insert(std::make_pair(l.space, l.page_no));
5273 }
5274 }
5275
5276 lock_mutex_exit();
5277
5278 for (page_addr_set::const_iterator it = pages.begin();
5279 it != pages.end();
5280 ++it) {
5281 lock_rec_block_validate((*it).first, (*it).second);
5282 }
5283
5284 return(true);
5285}
5286#endif /* UNIV_DEBUG */
5287/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
5288
5289/*********************************************************************//**
5290Checks if locks of other transactions prevent an immediate insert of
5291a record. If they do, first tests if the query thread should anyway
5292be suspended for some reason; if not, then puts the transaction and
5293the query thread to the lock wait state and inserts a waiting request
5294for a gap x-lock to the lock queue.
5295@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
5296dberr_t
5297lock_rec_insert_check_and_lock(
5298/*===========================*/
5299 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
5300 set, does nothing */
5301 const rec_t* rec, /*!< in: record after which to insert */
5302 buf_block_t* block, /*!< in/out: buffer block of rec */
5303 dict_index_t* index, /*!< in: index */
5304 que_thr_t* thr, /*!< in: query thread */
5305 mtr_t* mtr, /*!< in/out: mini-transaction */
5306 bool* inherit)/*!< out: set to true if the new
5307 inserted record maybe should inherit
5308 LOCK_GAP type locks from the successor
5309 record */
5310{
5311 ut_ad(block->frame == page_align(rec));
5312 ut_ad(!dict_index_is_online_ddl(index)
5313 || dict_index_is_clust(index)
5314 || (flags & BTR_CREATE_FLAG));
5315 ut_ad(mtr->is_named_space(index->table->space));
5316 ut_ad(page_rec_is_leaf(rec));
5317
5318 if (flags & BTR_NO_LOCKING_FLAG) {
5319
5320 return(DB_SUCCESS);
5321 }
5322
5323 ut_ad(!index->table->is_temporary());
5324
5325 dberr_t err;
5326 lock_t* lock;
5327 bool inherit_in = *inherit;
5328 trx_t* trx = thr_get_trx(thr);
5329 const rec_t* next_rec = page_rec_get_next_const(rec);
5330 ulint heap_no = page_rec_get_heap_no(next_rec);
5331 ut_ad(!rec_is_default_row(next_rec, index));
5332
5333 lock_mutex_enter();
5334 /* Because this code is invoked for a running transaction by
5335 the thread that is serving the transaction, it is not necessary
5336 to hold trx->mutex here. */
5337
5338 /* When inserting a record into an index, the table must be at
5339 least IX-locked. When we are building an index, we would pass
5340 BTR_NO_LOCKING_FLAG and skip the locking altogether. */
5341 ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5342
5343 lock = lock_rec_get_first(lock_sys.rec_hash, block, heap_no);
5344
5345 if (lock == NULL) {
5346 /* We optimize CPU time usage in the simplest case */
5347
5348 lock_mutex_exit();
5349
5350 if (inherit_in && !dict_index_is_clust(index)) {
5351 /* Update the page max trx id field */
5352 page_update_max_trx_id(block,
5353 buf_block_get_page_zip(block),
5354 trx->id, mtr);
5355 }
5356
5357 *inherit = false;
5358
5359 return(DB_SUCCESS);
5360 }
5361
5362 /* Spatial index does not use GAP lock protection. It uses
5363 "predicate lock" to protect the "range" */
5364 if (dict_index_is_spatial(index)) {
5365 return(DB_SUCCESS);
5366 }
5367
5368 *inherit = true;
5369
5370 /* If another transaction has an explicit lock request which locks
5371 the gap, waiting or granted, on the successor, the insert has to wait.
5372
5373 An exception is the case where the lock by the another transaction
5374 is a gap type lock which it placed to wait for its turn to insert. We
5375 do not consider that kind of a lock conflicting with our insert. This
5376 eliminates an unnecessary deadlock which resulted when 2 transactions
5377 had to wait for their insert. Both had waiting gap type lock requests
5378 on the successor, which produced an unnecessary deadlock. */
5379
5380 const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
5381
5382 if (
5383#ifdef WITH_WSREP
5384 lock_t* c_lock =
5385#endif /* WITH_WSREP */
5386 lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) {
5387 /* Note that we may get DB_SUCCESS also here! */
5388 trx_mutex_enter(trx);
5389
5390 err = lock_rec_enqueue_waiting(
5391#ifdef WITH_WSREP
5392 c_lock,
5393#endif /* WITH_WSREP */
5394 type_mode, block, heap_no, index, thr, NULL);
5395
5396 trx_mutex_exit(trx);
5397 } else {
5398 err = DB_SUCCESS;
5399 }
5400
5401 lock_mutex_exit();
5402
5403 switch (err) {
5404 case DB_SUCCESS_LOCKED_REC:
5405 err = DB_SUCCESS;
5406 /* fall through */
5407 case DB_SUCCESS:
5408 if (!inherit_in || dict_index_is_clust(index)) {
5409 break;
5410 }
5411
5412 /* Update the page max trx id field */
5413 page_update_max_trx_id(
5414 block, buf_block_get_page_zip(block), trx->id, mtr);
5415 default:
5416 /* We only care about the two return values. */
5417 break;
5418 }
5419
5420#ifdef UNIV_DEBUG
5421 {
5422 mem_heap_t* heap = NULL;
5423 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5424 const ulint* offsets;
5425 rec_offs_init(offsets_);
5426
5427 offsets = rec_get_offsets(next_rec, index, offsets_, true,
5428 ULINT_UNDEFINED, &heap);
5429
5430 ut_ad(lock_rec_queue_validate(
5431 FALSE, block, next_rec, index, offsets));
5432
5433 if (heap != NULL) {
5434 mem_heap_free(heap);
5435 }
5436 }
5437#endif /* UNIV_DEBUG */
5438
5439 return(err);
5440}
5441
5442/*********************************************************************//**
5443Creates an explicit record lock for a running transaction that currently only
5444has an implicit lock on the record. The transaction instance must have a
5445reference count > 0 so that it can't be committed and freed before this
5446function has completed. */
5447static
5448void
5449lock_rec_convert_impl_to_expl_for_trx(
5450/*==================================*/
5451 const buf_block_t* block, /*!< in: buffer block of rec */
5452 const rec_t* rec, /*!< in: user record on page */
5453 dict_index_t* index, /*!< in: index of record */
5454 trx_t* trx, /*!< in/out: active transaction */
5455 ulint heap_no)/*!< in: rec heap number to lock */
5456{
5457 ut_ad(trx->is_referenced());
5458 ut_ad(page_rec_is_leaf(rec));
5459 ut_ad(!rec_is_default_row(rec, index));
5460
5461 DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
5462
5463 lock_mutex_enter();
5464
5465 ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
5466
5467 if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
5468 && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
5469 block, heap_no, trx)) {
5470
5471 ulint type_mode;
5472
5473 type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
5474
5475 lock_rec_add_to_queue(
5476 type_mode, block, heap_no, index, trx, FALSE);
5477 }
5478
5479 lock_mutex_exit();
5480
5481 trx->release_reference();
5482
5483 DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
5484}
5485
5486
5487#ifdef UNIV_DEBUG
5488struct lock_rec_other_trx_holds_expl_arg
5489{
5490 const ulint heap_no;
5491 const buf_block_t * const block;
5492 const trx_t *impl_trx;
5493};
5494
5495
5496static my_bool lock_rec_other_trx_holds_expl_callback(
5497 rw_trx_hash_element_t *element,
5498 lock_rec_other_trx_holds_expl_arg *arg)
5499{
5500 mutex_enter(&element->mutex);
5501 if (element->trx)
5502 {
5503 lock_t *expl_lock= lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, arg->block,
5504 arg->heap_no, element->trx);
5505 /*
5506 An explicit lock is held by trx other than the trx holding the implicit
5507 lock.
5508 */
5509 ut_ad(!expl_lock || expl_lock->trx == arg->impl_trx);
5510 }
5511 mutex_exit(&element->mutex);
5512 return 0;
5513}
5514
5515
5516/**
5517 Checks if some transaction, other than given trx_id, has an explicit
5518 lock on the given rec.
5519
5520 FIXME: if the current transaction holds implicit lock from INSERT, a
5521 subsequent locking read should not convert it to explicit. See also
5522 MDEV-11215.
5523
5524 @param caller_trx trx of current thread
5525 @param[in] trx trx holding implicit lock on rec
5526 @param[in] rec user record
5527 @param[in] block buffer block containing the record
5528*/
5529
5530static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx,
5531 const rec_t *rec,
5532 const buf_block_t *block)
5533{
5534 if (trx)
5535 {
5536 ut_ad(!page_rec_is_default_row(rec));
5537 lock_mutex_enter();
5538 lock_rec_other_trx_holds_expl_arg arg= { page_rec_get_heap_no(rec), block,
5539 trx };
5540 trx_sys.rw_trx_hash.iterate(caller_trx,
5541 reinterpret_cast<my_hash_walk_action>
5542 (lock_rec_other_trx_holds_expl_callback),
5543 &arg);
5544 lock_mutex_exit();
5545 }
5546}
5547#endif /* UNIV_DEBUG */
5548
5549
5550/*********************************************************************//**
5551If a transaction has an implicit x-lock on a record, but no explicit x-lock
5552set on the record, sets one for it. */
5553static
5554void
5555lock_rec_convert_impl_to_expl(
5556/*==========================*/
5557 trx_t* caller_trx,/*!<in/out: trx of current thread */
5558 const buf_block_t* block, /*!< in: buffer block of rec */
5559 const rec_t* rec, /*!< in: user record on page */
5560 dict_index_t* index, /*!< in: index of record */
5561 const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
5562{
5563 trx_t* trx;
5564
5565 ut_ad(!lock_mutex_own());
5566 ut_ad(page_rec_is_user_rec(rec));
5567 ut_ad(rec_offs_validate(rec, index, offsets));
5568 ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5569 ut_ad(page_rec_is_leaf(rec));
5570 ut_ad(!rec_is_default_row(rec, index));
5571
5572 if (dict_index_is_clust(index)) {
5573 trx_id_t trx_id;
5574
5575 trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5576
5577 trx = trx_sys.find(caller_trx, trx_id);
5578 } else {
5579 ut_ad(!dict_index_is_online_ddl(index));
5580
5581 trx = lock_sec_rec_some_has_impl(caller_trx, rec, index,
5582 offsets);
5583
5584 ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec,
5585 block));
5586 }
5587
5588 if (trx != 0) {
5589 ulint heap_no = page_rec_get_heap_no(rec);
5590
5591 ut_ad(trx->is_referenced());
5592
5593 /* If the transaction is still active and has no
5594 explicit x-lock set on the record, set one for it.
5595 trx cannot be committed until the ref count is zero. */
5596
5597 lock_rec_convert_impl_to_expl_for_trx(
5598 block, rec, index, trx, heap_no);
5599 }
5600}
5601
5602/*********************************************************************//**
5603Checks if locks of other transactions prevent an immediate modify (update,
5604delete mark, or delete unmark) of a clustered index record. If they do,
5605first tests if the query thread should anyway be suspended for some
5606reason; if not, then puts the transaction and the query thread to the
5607lock wait state and inserts a waiting request for a record x-lock to the
5608lock queue.
5609@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
5610dberr_t
5611lock_clust_rec_modify_check_and_lock(
5612/*=================================*/
5613 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5614 bit is set, does nothing */
5615 const buf_block_t* block, /*!< in: buffer block of rec */
5616 const rec_t* rec, /*!< in: record which should be
5617 modified */
5618 dict_index_t* index, /*!< in: clustered index */
5619 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
5620 que_thr_t* thr) /*!< in: query thread */
5621{
5622 dberr_t err;
5623 ulint heap_no;
5624
5625 ut_ad(rec_offs_validate(rec, index, offsets));
5626 ut_ad(page_rec_is_leaf(rec));
5627 ut_ad(dict_index_is_clust(index));
5628 ut_ad(block->frame == page_align(rec));
5629
5630 if (flags & BTR_NO_LOCKING_FLAG) {
5631
5632 return(DB_SUCCESS);
5633 }
5634 ut_ad(!rec_is_default_row(rec, index));
5635 ut_ad(!index->table->is_temporary());
5636
5637 heap_no = rec_offs_comp(offsets)
5638 ? rec_get_heap_no_new(rec)
5639 : rec_get_heap_no_old(rec);
5640
5641 /* If a transaction has no explicit x-lock set on the record, set one
5642 for it */
5643
5644 lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec, index,
5645 offsets);
5646
5647 err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
5648 block, heap_no, index, thr);
5649
5650 ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5651
5652 if (err == DB_SUCCESS_LOCKED_REC) {
5653 err = DB_SUCCESS;
5654 }
5655
5656 return(err);
5657}
5658
5659/*********************************************************************//**
5660Checks if locks of other transactions prevent an immediate modify (delete
5661mark or delete unmark) of a secondary index record.
5662@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
5663dberr_t
5664lock_sec_rec_modify_check_and_lock(
5665/*===============================*/
5666 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5667 bit is set, does nothing */
5668 buf_block_t* block, /*!< in/out: buffer block of rec */
5669 const rec_t* rec, /*!< in: record which should be
5670 modified; NOTE: as this is a secondary
5671 index, we always have to modify the
5672 clustered index record first: see the
5673 comment below */
5674 dict_index_t* index, /*!< in: secondary index */
5675 que_thr_t* thr, /*!< in: query thread
5676 (can be NULL if BTR_NO_LOCKING_FLAG) */
5677 mtr_t* mtr) /*!< in/out: mini-transaction */
5678{
5679 dberr_t err;
5680 ulint heap_no;
5681
5682 ut_ad(!dict_index_is_clust(index));
5683 ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
5684 ut_ad(block->frame == page_align(rec));
5685 ut_ad(mtr->is_named_space(index->table->space));
5686 ut_ad(page_rec_is_leaf(rec));
5687 ut_ad(!rec_is_default_row(rec, index));
5688
5689 if (flags & BTR_NO_LOCKING_FLAG) {
5690
5691 return(DB_SUCCESS);
5692 }
5693 ut_ad(!index->table->is_temporary());
5694
5695 heap_no = page_rec_get_heap_no(rec);
5696
5697 /* Another transaction cannot have an implicit lock on the record,
5698 because when we come here, we already have modified the clustered
5699 index record, and this would not have been possible if another active
5700 transaction had modified this secondary index record. */
5701
5702 err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
5703 block, heap_no, index, thr);
5704
5705#ifdef UNIV_DEBUG
5706 {
5707 mem_heap_t* heap = NULL;
5708 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5709 const ulint* offsets;
5710 rec_offs_init(offsets_);
5711
5712 offsets = rec_get_offsets(rec, index, offsets_, true,
5713 ULINT_UNDEFINED, &heap);
5714
5715 ut_ad(lock_rec_queue_validate(
5716 FALSE, block, rec, index, offsets));
5717
5718 if (heap != NULL) {
5719 mem_heap_free(heap);
5720 }
5721 }
5722#endif /* UNIV_DEBUG */
5723
5724 if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
5725 /* Update the page max trx id field */
5726 /* It might not be necessary to do this if
5727 err == DB_SUCCESS (no new lock created),
5728 but it should not cost too much performance. */
5729 page_update_max_trx_id(block,
5730 buf_block_get_page_zip(block),
5731 thr_get_trx(thr)->id, mtr);
5732 err = DB_SUCCESS;
5733 }
5734
5735 return(err);
5736}
5737
5738/*********************************************************************//**
5739Like lock_clust_rec_read_check_and_lock(), but reads a
5740secondary index record.
5741@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
5742dberr_t
5743lock_sec_rec_read_check_and_lock(
5744/*=============================*/
5745 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5746 bit is set, does nothing */
5747 const buf_block_t* block, /*!< in: buffer block of rec */
5748 const rec_t* rec, /*!< in: user record or page
5749 supremum record which should
5750 be read or passed over by a
5751 read cursor */
5752 dict_index_t* index, /*!< in: secondary index */
5753 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
5754 lock_mode mode, /*!< in: mode of the lock which
5755 the read cursor should set on
5756 records: LOCK_S or LOCK_X; the
5757 latter is possible in
5758 SELECT FOR UPDATE */
5759 ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
5760 LOCK_REC_NOT_GAP */
5761 que_thr_t* thr) /*!< in: query thread */
5762{
5763 dberr_t err;
5764 ulint heap_no;
5765
5766 ut_ad(!dict_index_is_clust(index));
5767 ut_ad(!dict_index_is_online_ddl(index));
5768 ut_ad(block->frame == page_align(rec));
5769 ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5770 ut_ad(rec_offs_validate(rec, index, offsets));
5771 ut_ad(page_rec_is_leaf(rec));
5772 ut_ad(mode == LOCK_X || mode == LOCK_S);
5773
5774 if ((flags & BTR_NO_LOCKING_FLAG)
5775 || srv_read_only_mode
5776 || index->table->is_temporary()) {
5777
5778 return(DB_SUCCESS);
5779 }
5780
5781 ut_ad(!rec_is_default_row(rec, index));
5782 heap_no = page_rec_get_heap_no(rec);
5783
5784 /* Some transaction may have an implicit x-lock on the record only
5785 if the max trx id for the page >= min trx id for the trx list or a
5786 database recovery is running. */
5787
5788 if (!page_rec_is_supremum(rec)
5789 && page_get_max_trx_id(block->frame) >= trx_sys.get_min_trx_id()) {
5790
5791 lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
5792 index, offsets);
5793 }
5794
5795 err = lock_rec_lock(FALSE, ulint(mode) | gap_mode,
5796 block, heap_no, index, thr);
5797
5798 ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5799
5800 return(err);
5801}
5802
5803/*********************************************************************//**
5804Checks if locks of other transactions prevent an immediate read, or passing
5805over by a read cursor, of a clustered index record. If they do, first tests
5806if the query thread should anyway be suspended for some reason; if not, then
5807puts the transaction and the query thread to the lock wait state and inserts a
5808waiting request for a record lock to the lock queue. Sets the requested mode
5809lock on the record.
5810@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
5811dberr_t
5812lock_clust_rec_read_check_and_lock(
5813/*===============================*/
5814 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5815 bit is set, does nothing */
5816 const buf_block_t* block, /*!< in: buffer block of rec */
5817 const rec_t* rec, /*!< in: user record or page
5818 supremum record which should
5819 be read or passed over by a
5820 read cursor */
5821 dict_index_t* index, /*!< in: clustered index */
5822 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
5823 lock_mode mode, /*!< in: mode of the lock which
5824 the read cursor should set on
5825 records: LOCK_S or LOCK_X; the
5826 latter is possible in
5827 SELECT FOR UPDATE */
5828 ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
5829 LOCK_REC_NOT_GAP */
5830 que_thr_t* thr) /*!< in: query thread */
5831{
5832 dberr_t err;
5833 ulint heap_no;
5834
5835 ut_ad(dict_index_is_clust(index));
5836 ut_ad(block->frame == page_align(rec));
5837 ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5838 ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
5839 || gap_mode == LOCK_REC_NOT_GAP);
5840 ut_ad(rec_offs_validate(rec, index, offsets));
5841 ut_ad(page_rec_is_leaf(rec));
5842 ut_ad(!rec_is_default_row(rec, index));
5843
5844 if ((flags & BTR_NO_LOCKING_FLAG)
5845 || srv_read_only_mode
5846 || index->table->is_temporary()) {
5847
5848 return(DB_SUCCESS);
5849 }
5850
5851 heap_no = page_rec_get_heap_no(rec);
5852
5853 if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
5854
5855 lock_rec_convert_impl_to_expl(thr_get_trx(thr), block, rec,
5856 index, offsets);
5857 }
5858
5859 err = lock_rec_lock(FALSE, ulint(mode) | gap_mode,
5860 block, heap_no, index, thr);
5861
5862 ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
5863
5864 DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
5865
5866 return(err);
5867}
5868/*********************************************************************//**
5869Checks if locks of other transactions prevent an immediate read, or passing
5870over by a read cursor, of a clustered index record. If they do, first tests
5871if the query thread should anyway be suspended for some reason; if not, then
5872puts the transaction and the query thread to the lock wait state and inserts a
5873waiting request for a record lock to the lock queue. Sets the requested mode
5874lock on the record. This is an alternative version of
5875lock_clust_rec_read_check_and_lock() that does not require the parameter
5876"offsets".
5877@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
5878dberr_t
5879lock_clust_rec_read_check_and_lock_alt(
5880/*===================================*/
5881 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5882 bit is set, does nothing */
5883 const buf_block_t* block, /*!< in: buffer block of rec */
5884 const rec_t* rec, /*!< in: user record or page
5885 supremum record which should
5886 be read or passed over by a
5887 read cursor */
5888 dict_index_t* index, /*!< in: clustered index */
5889 lock_mode mode, /*!< in: mode of the lock which
5890 the read cursor should set on
5891 records: LOCK_S or LOCK_X; the
5892 latter is possible in
5893 SELECT FOR UPDATE */
5894 ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
5895 LOCK_REC_NOT_GAP */
5896 que_thr_t* thr) /*!< in: query thread */
5897{
5898 mem_heap_t* tmp_heap = NULL;
5899 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5900 ulint* offsets = offsets_;
5901 dberr_t err;
5902 rec_offs_init(offsets_);
5903
5904 ut_ad(page_rec_is_leaf(rec));
5905 offsets = rec_get_offsets(rec, index, offsets, true,
5906 ULINT_UNDEFINED, &tmp_heap);
5907 err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
5908 offsets, mode, gap_mode, thr);
5909 if (tmp_heap) {
5910 mem_heap_free(tmp_heap);
5911 }
5912
5913 if (err == DB_SUCCESS_LOCKED_REC) {
5914 err = DB_SUCCESS;
5915 }
5916
5917 return(err);
5918}
5919
5920/*******************************************************************//**
5921Release the last lock from the transaction's autoinc locks. */
5922UNIV_INLINE
5923void
5924lock_release_autoinc_last_lock(
5925/*===========================*/
5926 ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
5927{
5928 ulint last;
5929 lock_t* lock;
5930
5931 ut_ad(lock_mutex_own());
5932 ut_a(!ib_vector_is_empty(autoinc_locks));
5933
5934 /* The lock to be release must be the last lock acquired. */
5935 last = ib_vector_size(autoinc_locks) - 1;
5936 lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
5937
5938 /* Should have only AUTOINC locks in the vector. */
5939 ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
5940 ut_a(lock_get_type(lock) == LOCK_TABLE);
5941
5942 ut_a(lock->un_member.tab_lock.table != NULL);
5943
5944 /* This will remove the lock from the trx autoinc_locks too. */
5945 lock_table_dequeue(lock);
5946
5947 /* Remove from the table vector too. */
5948 lock_trx_table_locks_remove(lock);
5949}
5950
5951/*******************************************************************//**
5952Check if a transaction holds any autoinc locks.
5953@return TRUE if the transaction holds any AUTOINC locks. */
5954static
5955ibool
5956lock_trx_holds_autoinc_locks(
5957/*=========================*/
5958 const trx_t* trx) /*!< in: transaction */
5959{
5960 ut_a(trx->autoinc_locks != NULL);
5961
5962 return(!ib_vector_is_empty(trx->autoinc_locks));
5963}
5964
5965/*******************************************************************//**
5966Release all the transaction's autoinc locks. */
5967static
5968void
5969lock_release_autoinc_locks(
5970/*=======================*/
5971 trx_t* trx) /*!< in/out: transaction */
5972{
5973 ut_ad(lock_mutex_own());
5974 /* If this is invoked for a running transaction by the thread
5975 that is serving the transaction, then it is not necessary to
5976 hold trx->mutex here. */
5977
5978 ut_a(trx->autoinc_locks != NULL);
5979
5980 /* We release the locks in the reverse order. This is to
5981 avoid searching the vector for the element to delete at
5982 the lower level. See (lock_table_remove_low()) for details. */
5983 while (!ib_vector_is_empty(trx->autoinc_locks)) {
5984
5985 /* lock_table_remove_low() will also remove the lock from
5986 the transaction's autoinc_locks vector. */
5987 lock_release_autoinc_last_lock(trx->autoinc_locks);
5988 }
5989
5990 /* Should release all locks. */
5991 ut_a(ib_vector_is_empty(trx->autoinc_locks));
5992}
5993
5994/*******************************************************************//**
5995Gets the type of a lock. Non-inline version for using outside of the
5996lock module.
5997@return LOCK_TABLE or LOCK_REC */
5998ulint
5999lock_get_type(
6000/*==========*/
6001 const lock_t* lock) /*!< in: lock */
6002{
6003 return(lock_get_type_low(lock));
6004}
6005
6006/*******************************************************************//**
6007Gets the id of the transaction owning a lock.
6008@return transaction id */
6009trx_id_t
6010lock_get_trx_id(
6011/*============*/
6012 const lock_t* lock) /*!< in: lock */
6013{
6014 return(trx_get_id_for_print(lock->trx));
6015}
6016
6017/*******************************************************************//**
6018Gets the mode of a lock in a human readable string.
6019The string should not be free()'d or modified.
6020@return lock mode */
6021const char*
6022lock_get_mode_str(
6023/*==============*/
6024 const lock_t* lock) /*!< in: lock */
6025{
6026 ibool is_gap_lock;
6027
6028 is_gap_lock = lock_get_type_low(lock) == LOCK_REC
6029 && lock_rec_get_gap(lock);
6030
6031 switch (lock_get_mode(lock)) {
6032 case LOCK_S:
6033 if (is_gap_lock) {
6034 return("S,GAP");
6035 } else {
6036 return("S");
6037 }
6038 case LOCK_X:
6039 if (is_gap_lock) {
6040 return("X,GAP");
6041 } else {
6042 return("X");
6043 }
6044 case LOCK_IS:
6045 if (is_gap_lock) {
6046 return("IS,GAP");
6047 } else {
6048 return("IS");
6049 }
6050 case LOCK_IX:
6051 if (is_gap_lock) {
6052 return("IX,GAP");
6053 } else {
6054 return("IX");
6055 }
6056 case LOCK_AUTO_INC:
6057 return("AUTO_INC");
6058 default:
6059 return("UNKNOWN");
6060 }
6061}
6062
6063/*******************************************************************//**
6064Gets the type of a lock in a human readable string.
6065The string should not be free()'d or modified.
6066@return lock type */
6067const char*
6068lock_get_type_str(
6069/*==============*/
6070 const lock_t* lock) /*!< in: lock */
6071{
6072 switch (lock_get_type_low(lock)) {
6073 case LOCK_REC:
6074 return("RECORD");
6075 case LOCK_TABLE:
6076 return("TABLE");
6077 default:
6078 return("UNKNOWN");
6079 }
6080}
6081
6082/*******************************************************************//**
6083Gets the table on which the lock is.
6084@return table */
6085UNIV_INLINE
6086dict_table_t*
6087lock_get_table(
6088/*===========*/
6089 const lock_t* lock) /*!< in: lock */
6090{
6091 switch (lock_get_type_low(lock)) {
6092 case LOCK_REC:
6093 ut_ad(dict_index_is_clust(lock->index)
6094 || !dict_index_is_online_ddl(lock->index));
6095 return(lock->index->table);
6096 case LOCK_TABLE:
6097 return(lock->un_member.tab_lock.table);
6098 default:
6099 ut_error;
6100 return(NULL);
6101 }
6102}
6103
6104/*******************************************************************//**
6105Gets the id of the table on which the lock is.
6106@return id of the table */
6107table_id_t
6108lock_get_table_id(
6109/*==============*/
6110 const lock_t* lock) /*!< in: lock */
6111{
6112 dict_table_t* table;
6113
6114 table = lock_get_table(lock);
6115
6116 return(table->id);
6117}
6118
6119/** Determine which table a lock is associated with.
6120@param[in] lock the lock
6121@return name of the table */
6122const table_name_t&
6123lock_get_table_name(
6124 const lock_t* lock)
6125{
6126 return(lock_get_table(lock)->name);
6127}
6128
6129/*******************************************************************//**
6130For a record lock, gets the index on which the lock is.
6131@return index */
6132const dict_index_t*
6133lock_rec_get_index(
6134/*===============*/
6135 const lock_t* lock) /*!< in: lock */
6136{
6137 ut_a(lock_get_type_low(lock) == LOCK_REC);
6138 ut_ad(dict_index_is_clust(lock->index)
6139 || !dict_index_is_online_ddl(lock->index));
6140
6141 return(lock->index);
6142}
6143
6144/*******************************************************************//**
6145For a record lock, gets the name of the index on which the lock is.
6146The string should not be free()'d or modified.
6147@return name of the index */
6148const char*
6149lock_rec_get_index_name(
6150/*====================*/
6151 const lock_t* lock) /*!< in: lock */
6152{
6153 ut_a(lock_get_type_low(lock) == LOCK_REC);
6154 ut_ad(dict_index_is_clust(lock->index)
6155 || !dict_index_is_online_ddl(lock->index));
6156
6157 return(lock->index->name);
6158}
6159
6160/*******************************************************************//**
6161For a record lock, gets the tablespace number on which the lock is.
6162@return tablespace number */
6163ulint
6164lock_rec_get_space_id(
6165/*==================*/
6166 const lock_t* lock) /*!< in: lock */
6167{
6168 ut_a(lock_get_type_low(lock) == LOCK_REC);
6169
6170 return(lock->un_member.rec_lock.space);
6171}
6172
6173/*******************************************************************//**
6174For a record lock, gets the page number on which the lock is.
6175@return page number */
6176ulint
6177lock_rec_get_page_no(
6178/*=================*/
6179 const lock_t* lock) /*!< in: lock */
6180{
6181 ut_a(lock_get_type_low(lock) == LOCK_REC);
6182
6183 return(lock->un_member.rec_lock.page_no);
6184}
6185
6186/*********************************************************************//**
6187Cancels a waiting lock request and releases possible other transactions
6188waiting behind it. */
6189void
6190lock_cancel_waiting_and_release(
6191/*============================*/
6192 lock_t* lock) /*!< in/out: waiting lock request */
6193{
6194 que_thr_t* thr;
6195
6196 ut_ad(lock_mutex_own());
6197 ut_ad(trx_mutex_own(lock->trx));
6198
6199 lock->trx->lock.cancel = true;
6200
6201 if (lock_get_type_low(lock) == LOCK_REC) {
6202
6203 lock_rec_dequeue_from_page(lock);
6204 } else {
6205 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6206
6207 if (lock->trx->autoinc_locks != NULL) {
6208 /* Release the transaction's AUTOINC locks. */
6209 lock_release_autoinc_locks(lock->trx);
6210 }
6211
6212 lock_table_dequeue(lock);
6213 }
6214
6215 /* Reset the wait flag and the back pointer to lock in trx. */
6216
6217 lock_reset_lock_and_trx_wait(lock);
6218
6219 /* The following function releases the trx from lock wait. */
6220
6221 thr = que_thr_end_lock_wait(lock->trx);
6222
6223 if (thr != NULL) {
6224 lock_wait_release_thread_if_suspended(thr);
6225 }
6226
6227 lock->trx->lock.cancel = false;
6228}
6229
6230/*********************************************************************//**
6231Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6232function should be called at the the end of an SQL statement, by the
6233connection thread that owns the transaction (trx->mysql_thd). */
6234void
6235lock_unlock_table_autoinc(
6236/*======================*/
6237 trx_t* trx) /*!< in/out: transaction */
6238{
6239 ut_ad(!lock_mutex_own());
6240 ut_ad(!trx_mutex_own(trx));
6241 ut_ad(!trx->lock.wait_lock);
6242
6243 /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
6244 but not COMMITTED transactions. */
6245
6246 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
6247 || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6248
6249 /* This function is invoked for a running transaction by the
6250 thread that is serving the transaction. Therefore it is not
6251 necessary to hold trx->mutex here. */
6252
6253 if (lock_trx_holds_autoinc_locks(trx)) {
6254 lock_mutex_enter();
6255
6256 lock_release_autoinc_locks(trx);
6257
6258 lock_mutex_exit();
6259 }
6260}
6261
6262/*********************************************************************//**
6263Releases a transaction's locks, and releases possible other transactions
6264waiting because of these locks. Change the state of the transaction to
6265TRX_STATE_COMMITTED_IN_MEMORY. */
6266void
6267lock_trx_release_locks(
6268/*===================*/
6269 trx_t* trx) /*!< in/out: transaction */
6270{
6271 check_trx_state(trx);
6272 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)
6273 || trx_state_eq(trx, TRX_STATE_ACTIVE));
6274
6275 bool release_lock = UT_LIST_GET_LEN(trx->lock.trx_locks) > 0;
6276
6277 /* Don't take lock_sys.mutex if trx didn't acquire any lock. */
6278 if (release_lock) {
6279
6280 /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
6281 is protected by both the lock_sys.mutex and the trx->mutex. */
6282 lock_mutex_enter();
6283 }
6284
6285 /* The following assignment makes the transaction committed in memory
6286 and makes its changes to data visible to other transactions.
6287 NOTE that there is a small discrepancy from the strict formal
6288 visibility rules here: a human user of the database can see
6289 modifications made by another transaction T even before the necessary
6290 log segment has been flushed to the disk. If the database happens to
6291 crash before the flush, the user has seen modifications from T which
6292 will never be a committed transaction. However, any transaction T2
6293 which sees the modifications of the committing transaction T, and
6294 which also itself makes modifications to the database, will get an lsn
6295 larger than the committing transaction T. In the case where the log
6296 flush fails, and T never gets committed, also T2 will never get
6297 committed. */
6298
6299 /*--------------------------------------*/
6300 trx_mutex_enter(trx);
6301 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
6302 trx_mutex_exit(trx);
6303 /*--------------------------------------*/
6304
6305 if (trx->is_referenced()) {
6306
6307 ut_a(release_lock);
6308
6309 lock_mutex_exit();
6310
6311 while (trx->is_referenced()) {
6312
6313 DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6314
6315 /** Doing an implicit to explicit conversion
6316 should not be expensive. */
6317 ut_delay(srv_spin_wait_delay);
6318 }
6319
6320 lock_mutex_enter();
6321 }
6322
6323 ut_ad(!trx->is_referenced());
6324
6325 if (release_lock) {
6326
6327 lock_release(trx);
6328
6329 lock_mutex_exit();
6330 }
6331
6332 trx->lock.n_rec_locks = 0;
6333
6334 /* We don't remove the locks one by one from the vector for
6335 efficiency reasons. We simply reset it because we would have
6336 released all the locks anyway. */
6337
6338 trx->lock.table_locks.clear();
6339
6340 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
6341 ut_a(ib_vector_is_empty(trx->autoinc_locks));
6342 ut_a(trx->lock.table_locks.empty());
6343
6344 mem_heap_empty(trx->lock.lock_heap);
6345}
6346
6347static inline dberr_t lock_trx_handle_wait_low(trx_t* trx)
6348{
6349 ut_ad(lock_mutex_own());
6350 ut_ad(trx_mutex_own(trx));
6351
6352 if (trx->lock.was_chosen_as_deadlock_victim) {
6353 return DB_DEADLOCK;
6354 }
6355 if (!trx->lock.wait_lock) {
6356 /* The lock was probably granted before we got here. */
6357 return DB_SUCCESS;
6358 }
6359
6360 lock_cancel_waiting_and_release(trx->lock.wait_lock);
6361 return DB_LOCK_WAIT;
6362}
6363
6364/*********************************************************************//**
6365Check whether the transaction has already been rolled back because it
6366was selected as a deadlock victim, or if it has to wait then cancel
6367the wait lock.
6368@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
6369dberr_t
6370lock_trx_handle_wait(
6371/*=================*/
6372 trx_t* trx) /*!< in/out: trx lock state */
6373{
6374 lock_mutex_enter();
6375 trx_mutex_enter(trx);
6376 dberr_t err = lock_trx_handle_wait_low(trx);
6377 lock_mutex_exit();
6378 trx_mutex_exit(trx);
6379 return err;
6380}
6381
6382/*********************************************************************//**
6383Get the number of locks on a table.
6384@return number of locks */
6385ulint
6386lock_table_get_n_locks(
6387/*===================*/
6388 const dict_table_t* table) /*!< in: table */
6389{
6390 ulint n_table_locks;
6391
6392 lock_mutex_enter();
6393
6394 n_table_locks = UT_LIST_GET_LEN(table->locks);
6395
6396 lock_mutex_exit();
6397
6398 return(n_table_locks);
6399}
6400
6401#ifdef UNIV_DEBUG
6402/**
6403 Do an exhaustive check for any locks (table or rec) against the table.
6404
6405 @param[in] table check if there are any locks held on records in this table
6406 or on the table itself
6407*/
6408
6409static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element,
6410 const dict_table_t *table)
6411{
6412 ut_ad(lock_mutex_own());
6413 mutex_enter(&element->mutex);
6414 if (element->trx)
6415 {
6416 check_trx_state(element->trx);
6417 for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
6418 lock != NULL;
6419 lock= UT_LIST_GET_NEXT(trx_locks, lock))
6420 {
6421 ut_ad(lock->trx == element->trx);
6422 if (lock_get_type_low(lock) == LOCK_REC)
6423 {
6424 ut_ad(!dict_index_is_online_ddl(lock->index) ||
6425 dict_index_is_clust(lock->index));
6426 ut_ad(lock->index->table != table);
6427 }
6428 else
6429 ut_ad(lock->un_member.tab_lock.table != table);
6430 }
6431 }
6432 mutex_exit(&element->mutex);
6433 return 0;
6434}
6435#endif /* UNIV_DEBUG */
6436
6437/*******************************************************************//**
6438Check if there are any locks (table or rec) against table.
6439@return true if table has either table or record locks. */
6440bool
6441lock_table_has_locks(
6442/*=================*/
6443 const dict_table_t* table) /*!< in: check if there are any locks
6444 held on records in this table or on the
6445 table itself */
6446{
6447 ibool has_locks;
6448
6449 ut_ad(table != NULL);
6450 lock_mutex_enter();
6451
6452 has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
6453
6454#ifdef UNIV_DEBUG
6455 if (!has_locks) {
6456 trx_sys.rw_trx_hash.iterate(
6457 reinterpret_cast<my_hash_walk_action>
6458 (lock_table_locks_lookup),
6459 const_cast<dict_table_t*>(table));
6460 }
6461#endif /* UNIV_DEBUG */
6462
6463 lock_mutex_exit();
6464
6465 return(has_locks);
6466}
6467
6468/*******************************************************************//**
6469Initialise the table lock list. */
6470void
6471lock_table_lock_list_init(
6472/*======================*/
6473 table_lock_list_t* lock_list) /*!< List to initialise */
6474{
6475 UT_LIST_INIT(*lock_list, &lock_table_t::locks);
6476}
6477
6478/*******************************************************************//**
6479Initialise the trx lock list. */
6480void
6481lock_trx_lock_list_init(
6482/*====================*/
6483 trx_lock_list_t* lock_list) /*!< List to initialise */
6484{
6485 UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
6486}
6487
6488/*******************************************************************//**
6489Set the lock system timeout event. */
6490void
6491lock_set_timeout_event()
6492/*====================*/
6493{
6494 os_event_set(lock_sys.timeout_event);
6495}
6496
6497#ifdef UNIV_DEBUG
6498/*******************************************************************//**
6499Check if the transaction holds any locks on the sys tables
6500or its records.
6501@return the strongest lock found on any sys table or 0 for none */
6502const lock_t*
6503lock_trx_has_sys_table_locks(
6504/*=========================*/
6505 const trx_t* trx) /*!< in: transaction to check */
6506{
6507 const lock_t* strongest_lock = 0;
6508 lock_mode strongest = LOCK_NONE;
6509
6510 lock_mutex_enter();
6511
6512 typedef lock_pool_t::const_reverse_iterator iterator;
6513
6514 iterator end = trx->lock.table_locks.rend();
6515 iterator it = trx->lock.table_locks.rbegin();
6516
6517 /* Find a valid mode. Note: ib_vector_size() can be 0. */
6518
6519 for (/* No op */; it != end; ++it) {
6520 const lock_t* lock = *it;
6521
6522 if (lock != NULL
6523 && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
6524
6525 strongest = lock_get_mode(lock);
6526 ut_ad(strongest != LOCK_NONE);
6527 strongest_lock = lock;
6528 break;
6529 }
6530 }
6531
6532 if (strongest == LOCK_NONE) {
6533 lock_mutex_exit();
6534 return(NULL);
6535 }
6536
6537 for (/* No op */; it != end; ++it) {
6538 const lock_t* lock = *it;
6539
6540 if (lock == NULL) {
6541 continue;
6542 }
6543
6544 ut_ad(trx == lock->trx);
6545 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6546 ut_ad(lock->un_member.tab_lock.table != NULL);
6547
6548 lock_mode mode = lock_get_mode(lock);
6549
6550 if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
6551 && lock_mode_stronger_or_eq(mode, strongest)) {
6552
6553 strongest = mode;
6554 strongest_lock = lock;
6555 }
6556 }
6557
6558 lock_mutex_exit();
6559
6560 return(strongest_lock);
6561}
6562
6563/*******************************************************************//**
6564Check if the transaction holds an exclusive lock on a record.
6565@return whether the locks are held */
6566bool
6567lock_trx_has_rec_x_lock(
6568/*====================*/
6569 const trx_t* trx, /*!< in: transaction to check */
6570 const dict_table_t* table, /*!< in: table to check */
6571 const buf_block_t* block, /*!< in: buffer block of the record */
6572 ulint heap_no)/*!< in: record heap number */
6573{
6574 ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
6575
6576 lock_mutex_enter();
6577 ut_a(lock_table_has(trx, table, LOCK_IX)
6578 || table->is_temporary());
6579 ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
6580 block, heap_no, trx)
6581 || table->is_temporary());
6582 lock_mutex_exit();
6583 return(true);
6584}
6585#endif /* UNIV_DEBUG */
6586
6587/** rewind(3) the file used for storing the latest detected deadlock and
6588print a heading message to stderr if printing of all deadlocks to stderr
6589is enabled. */
6590void
6591DeadlockChecker::start_print()
6592{
6593 ut_ad(lock_mutex_own());
6594
6595 rewind(lock_latest_err_file);
6596 ut_print_timestamp(lock_latest_err_file);
6597
6598 if (srv_print_all_deadlocks) {
6599 ib::info() << "Transactions deadlock detected, dumping"
6600 << " detailed information.";
6601 }
6602}
6603
6604/** Print a message to the deadlock file and possibly to stderr.
6605@param msg message to print */
6606void
6607DeadlockChecker::print(const char* msg)
6608{
6609 fputs(msg, lock_latest_err_file);
6610
6611 if (srv_print_all_deadlocks) {
6612 ib::info() << msg;
6613 }
6614}
6615
6616/** Print transaction data to the deadlock file and possibly to stderr.
6617@param trx transaction
6618@param max_query_len max query length to print */
6619void
6620DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
6621{
6622 ut_ad(lock_mutex_own());
6623
6624 ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
6625 ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
6626 ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
6627
6628 trx_print_low(lock_latest_err_file, trx, max_query_len,
6629 n_rec_locks, n_trx_locks, heap_size);
6630
6631 if (srv_print_all_deadlocks) {
6632 trx_print_low(stderr, trx, max_query_len,
6633 n_rec_locks, n_trx_locks, heap_size);
6634 }
6635}
6636
6637/** Print lock data to the deadlock file and possibly to stderr.
6638@param lock record or table type lock */
6639void
6640DeadlockChecker::print(const lock_t* lock)
6641{
6642 ut_ad(lock_mutex_own());
6643
6644 if (lock_get_type_low(lock) == LOCK_REC) {
6645 lock_rec_print(lock_latest_err_file, lock);
6646
6647 if (srv_print_all_deadlocks) {
6648 lock_rec_print(stderr, lock);
6649 }
6650 } else {
6651 lock_table_print(lock_latest_err_file, lock);
6652
6653 if (srv_print_all_deadlocks) {
6654 lock_table_print(stderr, lock);
6655 }
6656 }
6657}
6658
6659/** Get the next lock in the queue that is owned by a transaction whose
6660sub-tree has not already been searched.
6661Note: "next" here means PREV for table locks.
6662
6663@param lock Lock in queue
6664@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
6665
6666@return next lock or NULL if at end of queue */
6667const lock_t*
6668DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
6669{
6670 ut_ad(lock_mutex_own());
6671
6672 do {
6673 if (lock_get_type_low(lock) == LOCK_REC) {
6674 ut_ad(heap_no != ULINT_UNDEFINED);
6675 lock = lock_rec_get_next_const(heap_no, lock);
6676 } else {
6677 ut_ad(heap_no == ULINT_UNDEFINED);
6678 ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
6679
6680 lock = UT_LIST_GET_NEXT(
6681 un_member.tab_lock.locks, lock);
6682 }
6683
6684 } while (lock != NULL && is_visited(lock));
6685
6686 ut_ad(lock == NULL
6687 || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
6688
6689 return(lock);
6690}
6691
6692/** Get the first lock to search. The search starts from the current
6693wait_lock. What we are really interested in is an edge from the
6694current wait_lock's owning transaction to another transaction that has
6695a lock ahead in the queue. We skip locks where the owning transaction's
6696sub-tree has already been searched.
6697
6698Note: The record locks are traversed from the oldest lock to the
6699latest. For table locks we go from latest to oldest.
6700
6701For record locks, we first position the "iterator" on the first lock on
6702the page and then reposition on the actual heap_no. This is required
6703due to the way the record lock has is implemented.
6704
6705@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
6706@return first lock or NULL */
6707const lock_t*
6708DeadlockChecker::get_first_lock(ulint* heap_no) const
6709{
6710 ut_ad(lock_mutex_own());
6711
6712 const lock_t* lock = m_wait_lock;
6713
6714 if (lock_get_type_low(lock) == LOCK_REC) {
6715 hash_table_t* lock_hash;
6716
6717 lock_hash = lock->type_mode & LOCK_PREDICATE
6718 ? lock_sys.prdt_hash
6719 : lock_sys.rec_hash;
6720
6721 /* We are only interested in records that match the heap_no. */
6722 *heap_no = lock_rec_find_set_bit(lock);
6723
6724 ut_ad(*heap_no <= 0xffff);
6725 ut_ad(*heap_no != ULINT_UNDEFINED);
6726
6727 /* Find the locks on the page. */
6728 lock = lock_rec_get_first_on_page_addr(
6729 lock_hash,
6730 lock->un_member.rec_lock.space,
6731 lock->un_member.rec_lock.page_no);
6732
6733 /* Position on the first lock on the physical record.*/
6734 if (!lock_rec_get_nth_bit(lock, *heap_no)) {
6735 lock = lock_rec_get_next_const(*heap_no, lock);
6736 }
6737
6738 ut_a(!lock_get_wait(lock));
6739 } else {
6740 /* Table locks don't care about the heap_no. */
6741 *heap_no = ULINT_UNDEFINED;
6742 ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
6743 dict_table_t* table = lock->un_member.tab_lock.table;
6744 lock = UT_LIST_GET_FIRST(table->locks);
6745 }
6746
6747 /* Must find at least two locks, otherwise there cannot be a
6748 waiting lock, secondly the first lock cannot be the wait_lock. */
6749 ut_a(lock != NULL);
6750 ut_a(lock != m_wait_lock ||
6751 (innodb_lock_schedule_algorithm
6752 == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
6753 && !thd_is_replication_slave_thread(lock->trx->mysql_thd)));
6754
6755 /* Check that the lock type doesn't change. */
6756 ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
6757
6758 return(lock);
6759}
6760
6761/** Notify that a deadlock has been detected and print the conflicting
6762transaction info.
6763@param lock lock causing deadlock */
6764void
6765DeadlockChecker::notify(const lock_t* lock) const
6766{
6767 ut_ad(lock_mutex_own());
6768
6769 start_print();
6770
6771 print("\n*** (1) TRANSACTION:\n");
6772
6773 print(m_wait_lock->trx, 3000);
6774
6775 print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
6776
6777 print(m_wait_lock);
6778
6779 print("*** (2) TRANSACTION:\n");
6780
6781 print(lock->trx, 3000);
6782
6783 print("*** (2) HOLDS THE LOCK(S):\n");
6784
6785 print(lock);
6786
6787 /* It is possible that the joining transaction was granted its
6788 lock when we rolled back some other waiting transaction. */
6789
6790 if (m_start->lock.wait_lock != 0) {
6791 print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
6792
6793 print(m_start->lock.wait_lock);
6794 }
6795
6796 DBUG_PRINT("ib_lock", ("deadlock detected"));
6797}
6798
6799/** Select the victim transaction that should be rolledback.
6800@return victim transaction */
6801const trx_t*
6802DeadlockChecker::select_victim() const
6803{
6804 ut_ad(lock_mutex_own());
6805 ut_ad(m_start->lock.wait_lock != 0);
6806 ut_ad(m_wait_lock->trx != m_start);
6807
6808 if (trx_weight_ge(m_wait_lock->trx, m_start)) {
6809 /* The joining transaction is 'smaller',
6810 choose it as the victim and roll it back. */
6811#ifdef WITH_WSREP
6812 if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE)) {
6813 return(m_wait_lock->trx);
6814 }
6815#endif /* WITH_WSREP */
6816 return(m_start);
6817 }
6818
6819#ifdef WITH_WSREP
6820 if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE)) {
6821 return(m_start);
6822 }
6823#endif /* WITH_WSREP */
6824
6825 return(m_wait_lock->trx);
6826}
6827
6828/** Looks iteratively for a deadlock. Note: the joining transaction may
6829have been granted its lock by the deadlock checks.
6830@return 0 if no deadlock else the victim transaction instance.*/
6831const trx_t*
6832DeadlockChecker::search()
6833{
6834 ut_ad(lock_mutex_own());
6835 ut_ad(!trx_mutex_own(m_start));
6836
6837 ut_ad(m_start != NULL);
6838 ut_ad(m_wait_lock != NULL);
6839 check_trx_state(m_wait_lock->trx);
6840 ut_ad(m_mark_start <= s_lock_mark_counter);
6841
6842 /* Look at the locks ahead of wait_lock in the lock queue. */
6843 ulint heap_no;
6844 const lock_t* lock = get_first_lock(&heap_no);
6845
6846 for (;;) {
6847 /* We should never visit the same sub-tree more than once. */
6848 ut_ad(lock == NULL || !is_visited(lock));
6849
6850 while (m_n_elems > 0 && lock == NULL) {
6851
6852 /* Restore previous search state. */
6853
6854 pop(lock, heap_no);
6855
6856 lock = get_next_lock(lock, heap_no);
6857 }
6858
6859 if (lock == NULL) {
6860 break;
6861 }
6862
6863 if (lock == m_wait_lock) {
6864
6865 /* We can mark this subtree as searched */
6866 ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
6867
6868 lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
6869
6870 /* We are not prepared for an overflow. This 64-bit
6871 counter should never wrap around. At 10^9 increments
6872 per second, it would take 10^3 years of uptime. */
6873
6874 ut_ad(s_lock_mark_counter > 0);
6875
6876 /* Backtrack */
6877 lock = NULL;
6878 continue;
6879 }
6880
6881 if (!lock_has_to_wait(m_wait_lock, lock)) {
6882 /* No conflict, next lock */
6883 lock = get_next_lock(lock, heap_no);
6884 continue;
6885 }
6886
6887 if (lock->trx == m_start) {
6888 /* Found a cycle. */
6889 notify(lock);
6890 return select_victim();
6891 }
6892
6893 if (is_too_deep()) {
6894 /* Search too deep to continue. */
6895 m_too_deep = true;
6896 return m_start;
6897 }
6898
6899 /* We do not need to report autoinc locks to the upper
6900 layer. These locks are released before commit, so they
6901 can not cause deadlocks with binlog-fixed commit
6902 order. */
6903 if (m_report_waiters
6904 && (lock_get_type_low(lock) != LOCK_TABLE
6905 || lock_get_mode(lock) != LOCK_AUTO_INC)) {
6906 thd_rpl_deadlock_check(m_start->mysql_thd,
6907 lock->trx->mysql_thd);
6908 }
6909
6910 if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
6911 /* Another trx ahead has requested a lock in an
6912 incompatible mode, and is itself waiting for a lock. */
6913
6914 ++m_cost;
6915
6916 if (!push(lock, heap_no)) {
6917 m_too_deep = true;
6918 return m_start;
6919 }
6920
6921 m_wait_lock = lock->trx->lock.wait_lock;
6922
6923 lock = get_first_lock(&heap_no);
6924
6925 if (is_visited(lock)) {
6926 lock = get_next_lock(lock, heap_no);
6927 }
6928 } else {
6929 lock = get_next_lock(lock, heap_no);
6930 }
6931 }
6932
6933 ut_a(lock == NULL && m_n_elems == 0);
6934
6935 /* No deadlock found. */
6936 return(0);
6937}
6938
6939/** Print info about transaction that was rolled back.
6940@param trx transaction rolled back
6941@param lock lock trx wants */
6942void
6943DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock)
6944{
6945 ut_ad(lock_mutex_own());
6946
6947 /* If the lock search exceeds the max step
6948 or the max depth, the current trx will be
6949 the victim. Print its information. */
6950 start_print();
6951
6952 print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
6953 " WAITS-FOR GRAPH, WE WILL ROLL BACK"
6954 " FOLLOWING TRANSACTION \n\n"
6955 "*** TRANSACTION:\n");
6956
6957 print(trx, 3000);
6958
6959 print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
6960
6961 print(lock);
6962}
6963
6964/** Rollback transaction selected as the victim. */
6965void
6966DeadlockChecker::trx_rollback()
6967{
6968 ut_ad(lock_mutex_own());
6969
6970 trx_t* trx = m_wait_lock->trx;
6971
6972 print("*** WE ROLL BACK TRANSACTION (1)\n");
6973
6974 trx_mutex_enter(trx);
6975
6976 trx->lock.was_chosen_as_deadlock_victim = true;
6977
6978 lock_cancel_waiting_and_release(trx->lock.wait_lock);
6979
6980 trx_mutex_exit(trx);
6981}
6982
6983/** Checks if a joining lock request results in a deadlock. If a deadlock is
6984found this function will resolve the deadlock by choosing a victim transaction
6985and rolling it back. It will attempt to resolve all deadlocks. The returned
6986transaction id will be the joining transaction instance or NULL if some other
6987transaction was chosen as a victim and rolled back or no deadlock found.
6988
6989@param[in] lock lock the transaction is requesting
6990@param[in,out] trx transaction requesting the lock
6991
6992@return transaction instanace chosen as victim or 0 */
6993const trx_t*
6994DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
6995{
6996 ut_ad(lock_mutex_own());
6997 ut_ad(trx_mutex_own(trx));
6998 check_trx_state(trx);
6999 ut_ad(!srv_read_only_mode);
7000
7001 if (!innobase_deadlock_detect) {
7002 return(NULL);
7003 }
7004
7005 /* Release the mutex to obey the latching order.
7006 This is safe, because DeadlockChecker::check_and_resolve()
7007 is invoked when a lock wait is enqueued for the currently
7008 running transaction. Because m_trx is a running transaction
7009 (it is not currently suspended because of a lock wait),
7010 its state can only be changed by this thread, which is
7011 currently associated with the transaction. */
7012
7013 trx_mutex_exit(trx);
7014
7015 const trx_t* victim_trx;
7016 const bool report_waiters = trx->mysql_thd
7017 && thd_need_wait_reports(trx->mysql_thd);
7018
7019 /* Try and resolve as many deadlocks as possible. */
7020 do {
7021 DeadlockChecker checker(trx, lock, s_lock_mark_counter,
7022 report_waiters);
7023
7024 victim_trx = checker.search();
7025
7026 /* Search too deep, we rollback the joining transaction only
7027 if it is possible to rollback. Otherwise we rollback the
7028 transaction that is holding the lock that the joining
7029 transaction wants. */
7030 if (checker.is_too_deep()) {
7031
7032 ut_ad(trx == checker.m_start);
7033 ut_ad(trx == victim_trx);
7034
7035 rollback_print(victim_trx, lock);
7036
7037 MONITOR_INC(MONITOR_DEADLOCK);
7038
7039 break;
7040
7041 } else if (victim_trx != NULL && victim_trx != trx) {
7042
7043 ut_ad(victim_trx == checker.m_wait_lock->trx);
7044
7045 checker.trx_rollback();
7046
7047 lock_deadlock_found = true;
7048
7049 MONITOR_INC(MONITOR_DEADLOCK);
7050 }
7051
7052 } while (victim_trx != NULL && victim_trx != trx);
7053
7054 /* If the joining transaction was selected as the victim. */
7055 if (victim_trx != NULL) {
7056
7057 print("*** WE ROLL BACK TRANSACTION (2)\n");
7058
7059 lock_deadlock_found = true;
7060 }
7061
7062 trx_mutex_enter(trx);
7063
7064 return(victim_trx);
7065}
7066
7067/**
7068Allocate cached locks for the transaction.
7069@param trx allocate cached record locks for this transaction */
7070void
7071lock_trx_alloc_locks(trx_t* trx)
7072{
7073 ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
7074 byte* ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7075
7076 /* We allocate one big chunk and then distribute it among
7077 the rest of the elements. The allocated chunk pointer is always
7078 at index 0. */
7079
7080 for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
7081 trx->lock.rec_pool.push_back(
7082 reinterpret_cast<ib_lock_t*>(ptr));
7083 }
7084
7085 sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
7086 ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7087
7088 for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
7089 trx->lock.table_pool.push_back(
7090 reinterpret_cast<ib_lock_t*>(ptr));
7091 }
7092
7093}
7094/*************************************************************//**
7095Updates the lock table when a page is split and merged to
7096two pages. */
7097UNIV_INTERN
7098void
7099lock_update_split_and_merge(
7100 const buf_block_t* left_block, /*!< in: left page to which merged */
7101 const rec_t* orig_pred, /*!< in: original predecessor of
7102 supremum on the left page before merge*/
7103 const buf_block_t* right_block) /*!< in: right page from which merged */
7104{
7105 const rec_t* left_next_rec;
7106
7107 ut_ad(page_is_leaf(left_block->frame));
7108 ut_ad(page_is_leaf(right_block->frame));
7109 ut_ad(page_align(orig_pred) == left_block->frame);
7110
7111 lock_mutex_enter();
7112
7113 left_next_rec = page_rec_get_next_const(orig_pred);
7114 ut_ad(!page_rec_is_default_row(left_next_rec));
7115
7116 /* Inherit the locks on the supremum of the left page to the
7117 first record which was moved from the right page */
7118 lock_rec_inherit_to_gap(
7119 left_block, left_block,
7120 page_rec_get_heap_no(left_next_rec),
7121 PAGE_HEAP_NO_SUPREMUM);
7122
7123 /* Reset the locks on the supremum of the left page,
7124 releasing waiting transactions */
7125 lock_rec_reset_and_release_wait(left_block,
7126 PAGE_HEAP_NO_SUPREMUM);
7127
7128 /* Inherit the locks to the supremum of the left page from the
7129 successor of the infimum on the right page */
7130 lock_rec_inherit_to_gap(left_block, right_block,
7131 PAGE_HEAP_NO_SUPREMUM,
7132 lock_get_min_heap_no(right_block));
7133
7134 lock_mutex_exit();
7135}
7136