1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2014, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file lock/lock0wait.cc |
22 | The transaction lock system |
23 | |
24 | Created 25/5/2010 Sunny Bains |
25 | *******************************************************/ |
26 | |
27 | #define LOCK_MODULE_IMPLEMENTATION |
28 | |
29 | #include "ha_prototypes.h" |
30 | #include <mysql/service_thd_wait.h> |
31 | #include <mysql/service_wsrep.h> |
32 | |
33 | #include "srv0mon.h" |
34 | #include "que0que.h" |
35 | #include "lock0lock.h" |
36 | #include "row0mysql.h" |
37 | #include "srv0start.h" |
38 | #include "lock0priv.h" |
39 | |
40 | /*********************************************************************//** |
41 | Print the contents of the lock_sys_t::waiting_threads array. */ |
42 | static |
43 | void |
44 | lock_wait_table_print(void) |
45 | /*=======================*/ |
46 | { |
47 | ut_ad(lock_wait_mutex_own()); |
48 | |
49 | const srv_slot_t* slot = lock_sys.waiting_threads; |
50 | |
51 | for (ulint i = 0; i < srv_max_n_threads; i++, ++slot) { |
52 | |
53 | fprintf(stderr, |
54 | "Slot %lu: thread type %lu," |
55 | " in use %lu, susp %lu, timeout %lu, time %lu\n" , |
56 | (ulong) i, |
57 | (ulong) slot->type, |
58 | (ulong) slot->in_use, |
59 | (ulong) slot->suspended, |
60 | slot->wait_timeout, |
61 | (ulong) difftime(ut_time(), slot->suspend_time)); |
62 | } |
63 | } |
64 | |
65 | /*********************************************************************//** |
66 | Release a slot in the lock_sys_t::waiting_threads. Adjust the array last pointer |
67 | if there are empty slots towards the end of the table. */ |
68 | static |
69 | void |
70 | lock_wait_table_release_slot( |
71 | /*=========================*/ |
72 | srv_slot_t* slot) /*!< in: slot to release */ |
73 | { |
74 | #ifdef UNIV_DEBUG |
75 | srv_slot_t* upper = lock_sys.waiting_threads + srv_max_n_threads; |
76 | #endif /* UNIV_DEBUG */ |
77 | |
78 | lock_wait_mutex_enter(); |
79 | |
80 | ut_ad(slot->in_use); |
81 | ut_ad(slot->thr != NULL); |
82 | ut_ad(slot->thr->slot != NULL); |
83 | ut_ad(slot->thr->slot == slot); |
84 | |
85 | /* Must be within the array boundaries. */ |
86 | ut_ad(slot >= lock_sys.waiting_threads); |
87 | ut_ad(slot < upper); |
88 | |
89 | /* Note: When we reserve the slot we use the trx_t::mutex to update |
90 | the slot values to change the state to reserved. Here we are using the |
91 | lock mutex to change the state of the slot to free. This is by design, |
92 | because when we query the slot state we always hold both the lock and |
93 | trx_t::mutex. To reduce contention on the lock mutex when reserving the |
94 | slot we avoid acquiring the lock mutex. */ |
95 | |
96 | lock_mutex_enter(); |
97 | |
98 | slot->thr->slot = NULL; |
99 | slot->thr = NULL; |
100 | slot->in_use = FALSE; |
101 | |
102 | lock_mutex_exit(); |
103 | |
104 | /* Scan backwards and adjust the last free slot pointer. */ |
105 | for (slot = lock_sys.last_slot; |
106 | slot > lock_sys.waiting_threads && !slot->in_use; |
107 | --slot) { |
108 | /* No op */ |
109 | } |
110 | |
111 | /* Either the array is empty or the last scanned slot is in use. */ |
112 | ut_ad(slot->in_use || slot == lock_sys.waiting_threads); |
113 | |
114 | lock_sys.last_slot = slot + 1; |
115 | |
116 | /* The last slot is either outside of the array boundary or it's |
117 | on an empty slot. */ |
118 | ut_ad(lock_sys.last_slot == upper || !lock_sys.last_slot->in_use); |
119 | |
120 | ut_ad(lock_sys.last_slot >= lock_sys.waiting_threads); |
121 | ut_ad(lock_sys.last_slot <= upper); |
122 | |
123 | lock_wait_mutex_exit(); |
124 | } |
125 | |
126 | /*********************************************************************//** |
127 | Reserves a slot in the thread table for the current user OS thread. |
128 | @return reserved slot */ |
129 | static |
130 | srv_slot_t* |
131 | lock_wait_table_reserve_slot( |
132 | /*=========================*/ |
133 | que_thr_t* thr, /*!< in: query thread associated |
134 | with the user OS thread */ |
135 | ulong wait_timeout) /*!< in: lock wait timeout value */ |
136 | { |
137 | ulint i; |
138 | srv_slot_t* slot; |
139 | |
140 | ut_ad(lock_wait_mutex_own()); |
141 | ut_ad(trx_mutex_own(thr_get_trx(thr))); |
142 | |
143 | slot = lock_sys.waiting_threads; |
144 | |
145 | for (i = srv_max_n_threads; i--; ++slot) { |
146 | if (!slot->in_use) { |
147 | slot->in_use = TRUE; |
148 | slot->thr = thr; |
149 | slot->thr->slot = slot; |
150 | |
151 | if (slot->event == NULL) { |
152 | slot->event = os_event_create(0); |
153 | ut_a(slot->event); |
154 | } |
155 | |
156 | os_event_reset(slot->event); |
157 | slot->suspended = TRUE; |
158 | slot->suspend_time = ut_time(); |
159 | slot->wait_timeout = wait_timeout; |
160 | |
161 | if (slot == lock_sys.last_slot) { |
162 | ++lock_sys.last_slot; |
163 | } |
164 | |
165 | ut_ad(lock_sys.last_slot |
166 | <= lock_sys.waiting_threads + srv_max_n_threads); |
167 | |
168 | return(slot); |
169 | } |
170 | } |
171 | |
172 | ib::error() << "There appear to be " << srv_max_n_threads << " user" |
173 | " threads currently waiting inside InnoDB, which is the upper" |
174 | " limit. Cannot continue operation. Before aborting, we print" |
175 | " a list of waiting threads." ; |
176 | lock_wait_table_print(); |
177 | |
178 | ut_error; |
179 | return(NULL); |
180 | } |
181 | |
182 | #ifdef WITH_WSREP |
183 | /*********************************************************************//** |
184 | check if lock timeout was for priority thread, |
185 | as a side effect trigger lock monitor |
186 | @param[in] trx transaction owning the lock |
187 | @param[in] locked true if trx and lock_sys.mutex is ownd |
188 | @return false for regular lock timeout */ |
189 | static |
190 | bool |
191 | wsrep_is_BF_lock_timeout( |
192 | const trx_t* trx, |
193 | bool locked = true) |
194 | { |
195 | if (wsrep_on_trx(trx) |
196 | && wsrep_thd_is_BF(trx->mysql_thd, FALSE) |
197 | && trx->error_state != DB_DEADLOCK) { |
198 | ib::info() << "WSREP: BF lock wait long for trx:" << ib::hex(trx->id) |
199 | << " query: " << wsrep_thd_query(trx->mysql_thd); |
200 | if (!locked) { |
201 | lock_mutex_enter(); |
202 | } |
203 | |
204 | ut_ad(lock_mutex_own()); |
205 | |
206 | trx_print_latched(stderr, trx, 3000); |
207 | |
208 | if (!locked) { |
209 | lock_mutex_exit(); |
210 | } |
211 | |
212 | srv_print_innodb_monitor = TRUE; |
213 | srv_print_innodb_lock_monitor = TRUE; |
214 | os_event_set(srv_monitor_event); |
215 | return true; |
216 | } |
217 | return false; |
218 | } |
219 | #endif /* WITH_WSREP */ |
220 | |
221 | /***************************************************************//** |
222 | Puts a user OS thread to wait for a lock to be released. If an error |
223 | occurs during the wait trx->error_state associated with thr is |
224 | != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK |
225 | are possible errors. DB_DEADLOCK is returned if selective deadlock |
226 | resolution chose this transaction as a victim. */ |
227 | void |
228 | lock_wait_suspend_thread( |
229 | /*=====================*/ |
230 | que_thr_t* thr) /*!< in: query thread associated with the |
231 | user OS thread */ |
232 | { |
233 | srv_slot_t* slot; |
234 | double wait_time; |
235 | trx_t* trx; |
236 | ibool was_declared_inside_innodb; |
237 | int64_t start_time = 0; |
238 | int64_t finish_time; |
239 | ulint sec; |
240 | ulint ms; |
241 | ulong lock_wait_timeout; |
242 | |
243 | trx = thr_get_trx(thr); |
244 | |
245 | if (trx->mysql_thd != 0) { |
246 | DEBUG_SYNC_C("lock_wait_suspend_thread_enter" ); |
247 | } |
248 | |
249 | /* InnoDB system transactions (such as the purge, and |
250 | incomplete transactions that are being rolled back after crash |
251 | recovery) will use the global value of |
252 | innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ |
253 | lock_wait_timeout = trx_lock_wait_timeout_get(trx); |
254 | |
255 | lock_wait_mutex_enter(); |
256 | |
257 | trx_mutex_enter(trx); |
258 | |
259 | trx->error_state = DB_SUCCESS; |
260 | |
261 | if (thr->state == QUE_THR_RUNNING) { |
262 | |
263 | ut_ad(thr->is_active); |
264 | |
265 | /* The lock has already been released or this transaction |
266 | was chosen as a deadlock victim: no need to suspend */ |
267 | |
268 | if (trx->lock.was_chosen_as_deadlock_victim) { |
269 | |
270 | trx->error_state = DB_DEADLOCK; |
271 | trx->lock.was_chosen_as_deadlock_victim = false; |
272 | } |
273 | |
274 | lock_wait_mutex_exit(); |
275 | trx_mutex_exit(trx); |
276 | return; |
277 | } |
278 | |
279 | ut_ad(!thr->is_active); |
280 | |
281 | slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout); |
282 | |
283 | lock_wait_mutex_exit(); |
284 | trx_mutex_exit(trx); |
285 | |
286 | if (thr->lock_state == QUE_THR_LOCK_ROW) { |
287 | srv_stats.n_lock_wait_count.inc(); |
288 | srv_stats.n_lock_wait_current_count.inc(); |
289 | |
290 | if (ut_usectime(&sec, &ms) == -1) { |
291 | start_time = -1; |
292 | } else { |
293 | start_time = int64_t(sec) * 1000000 + int64_t(ms); |
294 | } |
295 | } |
296 | |
297 | ulint lock_type = ULINT_UNDEFINED; |
298 | |
299 | /* The wait_lock can be cleared by another thread when the |
300 | lock is released. But the wait can only be initiated by the |
301 | current thread which owns the transaction. Only acquire the |
302 | mutex if the wait_lock is still active. */ |
303 | if (const lock_t* wait_lock = trx->lock.wait_lock) { |
304 | lock_mutex_enter(); |
305 | wait_lock = trx->lock.wait_lock; |
306 | if (wait_lock) { |
307 | lock_type = lock_get_type_low(wait_lock); |
308 | } |
309 | lock_mutex_exit(); |
310 | } |
311 | |
312 | ulint had_dict_lock = trx->dict_operation_lock_mode; |
313 | |
314 | switch (had_dict_lock) { |
315 | case 0: |
316 | break; |
317 | case RW_S_LATCH: |
318 | /* Release foreign key check latch */ |
319 | row_mysql_unfreeze_data_dictionary(trx); |
320 | |
321 | DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep" ); |
322 | break; |
323 | default: |
324 | /* There should never be a lock wait when the |
325 | dictionary latch is reserved in X mode. Dictionary |
326 | transactions should only acquire locks on dictionary |
327 | tables, not other tables. All access to dictionary |
328 | tables should be covered by dictionary |
329 | transactions. */ |
330 | ut_error; |
331 | } |
332 | |
333 | ut_a(trx->dict_operation_lock_mode == 0); |
334 | |
335 | /* Suspend this thread and wait for the event. */ |
336 | |
337 | was_declared_inside_innodb = trx->declared_to_be_inside_innodb; |
338 | |
339 | if (was_declared_inside_innodb) { |
340 | /* We must declare this OS thread to exit InnoDB, since a |
341 | possible other thread holding a lock which this thread waits |
342 | for must be allowed to enter, sooner or later */ |
343 | |
344 | srv_conc_force_exit_innodb(trx); |
345 | } |
346 | |
347 | /* Unknown is also treated like a record lock */ |
348 | if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) { |
349 | thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK); |
350 | } else { |
351 | ut_ad(lock_type == LOCK_TABLE); |
352 | thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK); |
353 | } |
354 | |
355 | os_event_wait(slot->event); |
356 | |
357 | thd_wait_end(trx->mysql_thd); |
358 | |
359 | /* After resuming, reacquire the data dictionary latch if |
360 | necessary. */ |
361 | |
362 | if (was_declared_inside_innodb) { |
363 | |
364 | /* Return back inside InnoDB */ |
365 | |
366 | srv_conc_force_enter_innodb(trx); |
367 | } |
368 | |
369 | if (had_dict_lock) { |
370 | |
371 | row_mysql_freeze_data_dictionary(trx); |
372 | } |
373 | |
374 | wait_time = ut_difftime(ut_time(), slot->suspend_time); |
375 | |
376 | /* Release the slot for others to use */ |
377 | |
378 | lock_wait_table_release_slot(slot); |
379 | |
380 | if (thr->lock_state == QUE_THR_LOCK_ROW) { |
381 | int64_t diff_time; |
382 | if (start_time == -1 || ut_usectime(&sec, &ms) == -1) { |
383 | finish_time = -1; |
384 | diff_time = 0; |
385 | } else { |
386 | finish_time = int64_t(sec) * 1000000 + int64_t(ms); |
387 | diff_time = std::max<int64_t>( |
388 | 0, finish_time - start_time); |
389 | srv_stats.n_lock_wait_time.add(diff_time); |
390 | |
391 | /* Only update the variable if we successfully |
392 | retrieved the start and finish times. See Bug#36819. */ |
393 | if (ulint(diff_time) > lock_sys.n_lock_max_wait_time) { |
394 | lock_sys.n_lock_max_wait_time |
395 | = ulint(diff_time); |
396 | } |
397 | /* Record the lock wait time for this thread */ |
398 | thd_storage_lock_wait(trx->mysql_thd, diff_time); |
399 | } |
400 | |
401 | srv_stats.n_lock_wait_current_count.dec(); |
402 | |
403 | DBUG_EXECUTE_IF("lock_instrument_slow_query_log" , |
404 | os_thread_sleep(1000);); |
405 | } |
406 | |
407 | /* The transaction is chosen as deadlock victim during sleep. */ |
408 | if (trx->error_state == DB_DEADLOCK) { |
409 | return; |
410 | } |
411 | |
412 | if (lock_wait_timeout < 100000000 |
413 | && wait_time > (double) lock_wait_timeout |
414 | #ifdef WITH_WSREP |
415 | && (!wsrep_on_trx(trx) || |
416 | (!wsrep_is_BF_lock_timeout(trx, false) && trx->error_state != DB_DEADLOCK)) |
417 | #endif /* WITH_WSREP */ |
418 | ) { |
419 | |
420 | trx->error_state = DB_LOCK_WAIT_TIMEOUT; |
421 | |
422 | MONITOR_INC(MONITOR_TIMEOUT); |
423 | } |
424 | |
425 | if (trx_is_interrupted(trx)) { |
426 | |
427 | trx->error_state = DB_INTERRUPTED; |
428 | } |
429 | } |
430 | |
431 | /********************************************************************//** |
432 | Releases a user OS thread waiting for a lock to be released, if the |
433 | thread is already suspended. */ |
434 | void |
435 | lock_wait_release_thread_if_suspended( |
436 | /*==================================*/ |
437 | que_thr_t* thr) /*!< in: query thread associated with the |
438 | user OS thread */ |
439 | { |
440 | ut_ad(lock_mutex_own()); |
441 | ut_ad(trx_mutex_own(thr_get_trx(thr))); |
442 | |
443 | /* We own both the lock mutex and the trx_t::mutex but not the |
444 | lock wait mutex. This is OK because other threads will see the state |
445 | of this slot as being in use and no other thread can change the state |
446 | of the slot to free unless that thread also owns the lock mutex. */ |
447 | |
448 | if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) { |
449 | trx_t* trx = thr_get_trx(thr); |
450 | |
451 | if (trx->lock.was_chosen_as_deadlock_victim) { |
452 | |
453 | trx->error_state = DB_DEADLOCK; |
454 | trx->lock.was_chosen_as_deadlock_victim = false; |
455 | } |
456 | |
457 | os_event_set(thr->slot->event); |
458 | } |
459 | } |
460 | |
461 | /*********************************************************************//** |
462 | Check if the thread lock wait has timed out. Release its locks if the |
463 | wait has actually timed out. */ |
464 | static |
465 | void |
466 | lock_wait_check_and_cancel( |
467 | /*=======================*/ |
468 | const srv_slot_t* slot) /*!< in: slot reserved by a user |
469 | thread when the wait started */ |
470 | { |
471 | trx_t* trx; |
472 | double wait_time; |
473 | ib_time_t suspend_time = slot->suspend_time; |
474 | |
475 | ut_ad(lock_wait_mutex_own()); |
476 | |
477 | ut_ad(slot->in_use); |
478 | |
479 | ut_ad(slot->suspended); |
480 | |
481 | wait_time = ut_difftime(ut_time(), suspend_time); |
482 | |
483 | trx = thr_get_trx(slot->thr); |
484 | |
485 | if (trx_is_interrupted(trx) |
486 | || (slot->wait_timeout < 100000000 |
487 | && (wait_time > (double) slot->wait_timeout |
488 | || wait_time < 0))) { |
489 | |
490 | /* Timeout exceeded or a wrap-around in system |
491 | time counter: cancel the lock request queued |
492 | by the transaction and release possible |
493 | other transactions waiting behind; it is |
494 | possible that the lock has already been |
495 | granted: in that case do nothing */ |
496 | |
497 | lock_mutex_enter(); |
498 | |
499 | trx_mutex_enter(trx); |
500 | |
501 | if (trx->lock.wait_lock != NULL) { |
502 | |
503 | ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT); |
504 | |
505 | #ifdef WITH_WSREP |
506 | if (!wsrep_is_BF_lock_timeout(trx)) { |
507 | #endif /* WITH_WSREP */ |
508 | lock_cancel_waiting_and_release(trx->lock.wait_lock); |
509 | #ifdef WITH_WSREP |
510 | } |
511 | #endif /* WITH_WSREP */ |
512 | } |
513 | |
514 | lock_mutex_exit(); |
515 | |
516 | trx_mutex_exit(trx); |
517 | } |
518 | |
519 | } |
520 | |
521 | /*********************************************************************//** |
522 | A thread which wakes up threads whose lock wait may have lasted too long. |
523 | @return a dummy parameter */ |
524 | extern "C" |
525 | os_thread_ret_t |
526 | DECLARE_THREAD(lock_wait_timeout_thread)(void*) |
527 | { |
528 | int64_t sig_count = 0; |
529 | os_event_t event = lock_sys.timeout_event; |
530 | |
531 | ut_ad(!srv_read_only_mode); |
532 | |
533 | #ifdef UNIV_PFS_THREAD |
534 | pfs_register_thread(srv_lock_timeout_thread_key); |
535 | #endif /* UNIV_PFS_THREAD */ |
536 | |
537 | do { |
538 | srv_slot_t* slot; |
539 | |
540 | /* When someone is waiting for a lock, we wake up every second |
541 | and check if a timeout has passed for a lock wait */ |
542 | |
543 | os_event_wait_time_low(event, 1000000, sig_count); |
544 | sig_count = os_event_reset(event); |
545 | |
546 | if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { |
547 | break; |
548 | } |
549 | |
550 | lock_wait_mutex_enter(); |
551 | |
552 | /* Check all slots for user threads that are waiting |
553 | on locks, and if they have exceeded the time limit. */ |
554 | |
555 | for (slot = lock_sys.waiting_threads; |
556 | slot < lock_sys.last_slot; |
557 | ++slot) { |
558 | |
559 | /* We are doing a read without the lock mutex |
560 | and/or the trx mutex. This is OK because a slot |
561 | can't be freed or reserved without the lock wait |
562 | mutex. */ |
563 | |
564 | if (slot->in_use) { |
565 | lock_wait_check_and_cancel(slot); |
566 | } |
567 | } |
568 | |
569 | sig_count = os_event_reset(event); |
570 | |
571 | lock_wait_mutex_exit(); |
572 | |
573 | } while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); |
574 | |
575 | lock_sys.timeout_thread_active = false; |
576 | |
577 | /* We count the number of threads in os_thread_exit(). A created |
578 | thread should always use that to exit and not use return() to exit. */ |
579 | |
580 | os_thread_exit(); |
581 | |
582 | OS_THREAD_DUMMY_RETURN; |
583 | } |
584 | |
585 | |