1/*****************************************************************************
2
3Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2008, Google Inc.
5Copyright (c) 2013, 2018, MariaDB Corporation.
6
7Portions of this file contain modifications contributed and copyrighted by
8Google, Inc. Those modifications are gratefully acknowledged and are described
9briefly in the InnoDB documentation. The contributions by Google are
10incorporated with their permission, and subject to the conditions contained in
11the file COPYING.Google.
12
13This program is free software; you can redistribute it and/or modify it under
14the terms of the GNU General Public License as published by the Free Software
15Foundation; version 2 of the License.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25*****************************************************************************/
26
27/**************************************************//**
28@file sync/sync0arr.cc
29The wait array used in synchronization primitives
30
31Created 9/5/1995 Heikki Tuuri
32*******************************************************/
33
34#include "ha_prototypes.h"
35#include "sync0arr.h"
36#include <mysqld_error.h>
37#include <mysql/plugin.h>
38#include <hash.h>
39#include <myisampack.h>
40#include <sql_acl.h>
41#include <mysys_err.h>
42#include <my_sys.h>
43#include "srv0srv.h"
44#include "srv0start.h"
45#include "i_s.h"
46#include <sql_plugin.h>
47#include <innodb_priv.h>
48
49#include "sync0sync.h"
50#include "lock0lock.h"
51#include "sync0rw.h"
52#include "sync0debug.h"
53#include "os0event.h"
54#include "os0file.h"
55#include "srv0srv.h"
56
57/*
58 WAIT ARRAY
59 ==========
60
61The wait array consists of cells each of which has an an event object created
62for it. The threads waiting for a mutex, for example, can reserve a cell
63in the array and suspend themselves to wait for the event to become signaled.
64When using the wait array, remember to make sure that some thread holding
65the synchronization object will eventually know that there is a waiter in
66the array and signal the object, to prevent infinite wait. Why we chose
67to implement a wait array? First, to make mutexes fast, we had to code
68our own implementation of them, which only in usually uncommon cases
69resorts to using slow operating system primitives. Then we had the choice of
70assigning a unique OS event for each mutex, which would be simpler, or
71using a global wait array. In some operating systems, the global wait
72array solution is more efficient and flexible, because we can do with
73a very small number of OS events, say 200. In NT 3.51, allocating events
74seems to be a quadratic algorithm, because 10 000 events are created fast,
75but 100 000 events takes a couple of minutes to create.
76
77As of 5.0.30 the above mentioned design is changed. Since now OS can handle
78millions of wait events efficiently, we no longer have this concept of each
79cell of wait array having one event. Instead, now the event that a thread
80wants to wait on is embedded in the wait object (mutex or rw_lock). We still
81keep the global wait array for the sake of diagnostics and also to avoid
82infinite wait The error_monitor thread scans the global wait array to signal
83any waiting threads who have missed the signal. */
84
85typedef SyncArrayMutex::MutexType WaitMutex;
86typedef BlockSyncArrayMutex::MutexType BlockWaitMutex;
87
88/** The latch types that use the sync array. */
89union sync_object_t {
90
91 /** RW lock instance */
92 rw_lock_t* lock;
93
94 /** Mutex instance */
95 WaitMutex* mutex;
96
97 /** Block mutex instance */
98 BlockWaitMutex* bpmutex;
99};
100
101/** A cell where an individual thread may wait suspended until a resource
102is released. The suspending is implemented using an operating system
103event semaphore. */
104
105struct sync_cell_t {
106 sync_object_t latch; /*!< pointer to the object the
107 thread is waiting for; if NULL
108 the cell is free for use */
109 ulint request_type; /*!< lock type requested on the
110 object */
111 const char* file; /*!< in debug version file where
112 requested */
113 ulint line; /*!< in debug version line where
114 requested, or ULINT_UNDEFINED */
115 os_thread_id_t thread_id; /*!< thread id of this waiting
116 thread */
117 bool waiting; /*!< TRUE if the thread has already
118 called sync_array_event_wait
119 on this cell */
120 int64_t signal_count; /*!< We capture the signal_count
121 of the latch when we
122 reset the event. This value is
123 then passed on to os_event_wait
124 and we wait only if the event
125 has not been signalled in the
126 period between the reset and
127 wait call. */
128 time_t reservation_time;/*!< time when the thread reserved
129 the wait cell */
130};
131
132/* NOTE: It is allowed for a thread to wait for an event allocated for
133the array without owning the protecting mutex (depending on the case:
134OS or database mutex), but all changes (set or reset) to the state of
135the event must be made while owning the mutex. */
136
137/** Synchronization array */
138struct sync_array_t {
139
140 /** Constructor
141 Creates a synchronization wait array. It is protected by a mutex
142 which is automatically reserved when the functions operating on it
143 are called.
144 @param[in] num_cells Number of cells to create */
145 sync_array_t(ulint num_cells)
146 UNIV_NOTHROW;
147
148 /** Destructor */
149 ~sync_array_t()
150 UNIV_NOTHROW;
151
152 ulint n_reserved; /*!< number of currently reserved
153 cells in the wait array */
154 ulint n_cells; /*!< number of cells in the
155 wait array */
156 sync_cell_t* array; /*!< pointer to wait array */
157 SysMutex mutex; /*!< System mutex protecting the
158 data structure. As this data
159 structure is used in constructing
160 the database mutex, to prevent
161 infinite recursion in implementation,
162 we fall back to an OS mutex. */
163 ulint res_count; /*!< count of cell reservations
164 since creation of the array */
165 ulint next_free_slot; /*!< the next free cell in the array */
166 ulint first_free_slot;/*!< the last slot that was freed */
167};
168
169/** User configured sync array size */
170ulong srv_sync_array_size = 1;
171
172/** Locally stored copy of srv_sync_array_size */
173ulint sync_array_size;
174
175/** The global array of wait cells for implementation of the database's own
176mutexes and read-write locks */
177sync_array_t** sync_wait_array;
178
179/** count of how many times an object has been signalled */
180static ulint sg_count;
181
182#define sync_array_exit(a) mutex_exit(&(a)->mutex)
183#define sync_array_enter(a) mutex_enter(&(a)->mutex)
184
185#ifdef UNIV_DEBUG
186/******************************************************************//**
187This function is called only in the debug version. Detects a deadlock
188of one or more threads because of waits of semaphores.
189@return TRUE if deadlock detected */
190static
191bool
192sync_array_detect_deadlock(
193/*=======================*/
194 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
195 own the mutex to array */
196 sync_cell_t* start, /*!< in: cell where recursive search started */
197 sync_cell_t* cell, /*!< in: cell to search */
198 ulint depth); /*!< in: recursion depth */
199#endif /* UNIV_DEBUG */
200
201/** Constructor
202Creates a synchronization wait array. It is protected by a mutex
203which is automatically reserved when the functions operating on it
204are called.
205@param[in] num_cells Number of cells to create */
206sync_array_t::sync_array_t(ulint num_cells)
207 UNIV_NOTHROW
208 :
209 n_reserved(),
210 n_cells(num_cells),
211 array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
212 mutex(),
213 res_count(),
214 next_free_slot(),
215 first_free_slot(ULINT_UNDEFINED)
216{
217 ut_a(num_cells > 0);
218
219 memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
220
221 /* Then create the mutex to protect the wait array */
222 mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
223}
224
225/** Validate the integrity of the wait array. Check
226that the number of reserved cells equals the count variable.
227@param[in,out] arr sync wait array */
228static
229void
230sync_array_validate(sync_array_t* arr)
231{
232 ulint i;
233 ulint count = 0;
234
235 sync_array_enter(arr);
236
237 for (i = 0; i < arr->n_cells; i++) {
238 sync_cell_t* cell;
239
240 cell = sync_array_get_nth_cell(arr, i);
241
242 if (cell->latch.mutex != NULL) {
243 count++;
244 }
245 }
246
247 ut_a(count == arr->n_reserved);
248
249 sync_array_exit(arr);
250}
251
252/** Destructor */
253sync_array_t::~sync_array_t()
254 UNIV_NOTHROW
255{
256 ut_a(n_reserved == 0);
257
258 sync_array_validate(this);
259
260 /* Release the mutex protecting the wait array */
261
262 mutex_free(&mutex);
263
264 UT_DELETE_ARRAY(array);
265}
266
267/*****************************************************************//**
268Gets the nth cell in array.
269@return cell */
270UNIV_INTERN
271sync_cell_t*
272sync_array_get_nth_cell(
273/*====================*/
274 sync_array_t* arr, /*!< in: sync array */
275 ulint n) /*!< in: index */
276{
277 ut_a(n < arr->n_cells);
278
279 return(arr->array + n);
280}
281
282/******************************************************************//**
283Frees the resources in a wait array. */
284static
285void
286sync_array_free(
287/*============*/
288 sync_array_t* arr) /*!< in, own: sync wait array */
289{
290 UT_DELETE(arr);
291}
292
293/*******************************************************************//**
294Returns the event that the thread owning the cell waits for. */
295static
296os_event_t
297sync_cell_get_event(
298/*================*/
299 sync_cell_t* cell) /*!< in: non-empty sync array cell */
300{
301 ulint type = cell->request_type;
302
303 if (type == SYNC_MUTEX) {
304
305 return(cell->latch.mutex->event());
306
307 } else if (type == SYNC_BUF_BLOCK) {
308
309 return(cell->latch.bpmutex->event());
310
311 } else if (type == RW_LOCK_X_WAIT) {
312
313 return(cell->latch.lock->wait_ex_event);
314
315 } else { /* RW_LOCK_S and RW_LOCK_X wait on the same event */
316
317 return(cell->latch.lock->event);
318 }
319}
320
321/******************************************************************//**
322Reserves a wait array cell for waiting for an object.
323The event of the cell is reset to nonsignalled state.
324@return sync cell to wait on */
325sync_cell_t*
326sync_array_reserve_cell(
327/*====================*/
328 sync_array_t* arr, /*!< in: wait array */
329 void* object, /*!< in: pointer to the object to wait for */
330 ulint type, /*!< in: lock request type */
331 const char* file, /*!< in: file where requested */
332 unsigned line) /*!< in: line where requested */
333{
334 sync_cell_t* cell;
335
336 sync_array_enter(arr);
337
338 if (arr->first_free_slot != ULINT_UNDEFINED) {
339 /* Try and find a slot in the free list */
340 ut_ad(arr->first_free_slot < arr->next_free_slot);
341 cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
342 arr->first_free_slot = cell->line;
343 } else if (arr->next_free_slot < arr->n_cells) {
344 /* Try and find a slot after the currently allocated slots */
345 cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
346 ++arr->next_free_slot;
347 } else {
348 sync_array_exit(arr);
349
350 // We should return NULL and if there is more than
351 // one sync array, try another sync array instance.
352 return(NULL);
353 }
354
355 ++arr->res_count;
356
357 ut_ad(arr->n_reserved < arr->n_cells);
358 ut_ad(arr->next_free_slot <= arr->n_cells);
359
360 ++arr->n_reserved;
361
362 /* Reserve the cell. */
363 ut_ad(cell->latch.mutex == NULL);
364
365 cell->request_type = type;
366
367 if (cell->request_type == SYNC_MUTEX) {
368 cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
369 } else if (cell->request_type == SYNC_BUF_BLOCK) {
370 cell->latch.bpmutex = reinterpret_cast<BlockWaitMutex*>(object);
371 } else {
372 cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
373 }
374
375 cell->waiting = false;
376
377 cell->file = file;
378 cell->line = line;
379
380 sync_array_exit(arr);
381
382 cell->thread_id = os_thread_get_curr_id();
383
384 cell->reservation_time = ut_time();
385
386 /* Make sure the event is reset and also store the value of
387 signal_count at which the event was reset. */
388 os_event_t event = sync_cell_get_event(cell);
389 cell->signal_count = os_event_reset(event);
390
391 return(cell);
392}
393
394/******************************************************************//**
395Frees the cell. NOTE! sync_array_wait_event frees the cell
396automatically! */
397void
398sync_array_free_cell(
399/*=================*/
400 sync_array_t* arr, /*!< in: wait array */
401 sync_cell_t*& cell) /*!< in/out: the cell in the array */
402{
403 sync_array_enter(arr);
404
405 ut_a(cell->latch.mutex != NULL);
406
407 cell->waiting = false;
408 cell->signal_count = 0;
409 cell->latch.mutex = NULL;
410
411 /* Setup the list of free slots in the array */
412 cell->line = arr->first_free_slot;
413
414 arr->first_free_slot = cell - arr->array;
415
416 ut_a(arr->n_reserved > 0);
417 arr->n_reserved--;
418
419 if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
420#ifdef UNIV_DEBUG
421 for (ulint i = 0; i < arr->next_free_slot; ++i) {
422 cell = sync_array_get_nth_cell(arr, i);
423
424 ut_ad(!cell->waiting);
425 ut_ad(cell->latch.mutex == 0);
426 ut_ad(cell->signal_count == 0);
427 }
428#endif /* UNIV_DEBUG */
429 arr->next_free_slot = 0;
430 arr->first_free_slot = ULINT_UNDEFINED;
431 }
432 sync_array_exit(arr);
433
434 cell = 0;
435}
436
437/******************************************************************//**
438This function should be called when a thread starts to wait on
439a wait array cell. In the debug version this function checks
440if the wait for a semaphore will result in a deadlock, in which
441case prints info and asserts. */
442void
443sync_array_wait_event(
444/*==================*/
445 sync_array_t* arr, /*!< in: wait array */
446 sync_cell_t*& cell) /*!< in: index of the reserved cell */
447{
448 sync_array_enter(arr);
449
450 ut_ad(!cell->waiting);
451 ut_ad(cell->latch.mutex);
452 ut_ad(os_thread_get_curr_id() == cell->thread_id);
453
454 cell->waiting = true;
455
456#ifdef UNIV_DEBUG
457
458 /* We use simple enter to the mutex below, because if
459 we cannot acquire it at once, mutex_enter would call
460 recursively sync_array routines, leading to trouble.
461 rw_lock_debug_mutex freezes the debug lists. */
462
463 rw_lock_debug_mutex_enter();
464
465 if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
466
467 ib::fatal() << "########################################"
468 " Deadlock Detected!";
469 }
470
471 rw_lock_debug_mutex_exit();
472#endif /* UNIV_DEBUG */
473 sync_array_exit(arr);
474
475 os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
476
477 sync_array_free_cell(arr, cell);
478
479 cell = 0;
480}
481
482/******************************************************************//**
483Reports info of a wait array cell. */
484static
485void
486sync_array_cell_print(
487/*==================*/
488 FILE* file, /*!< in: file where to print */
489 sync_cell_t* cell) /*!< in: sync cell */
490{
491 rw_lock_t* rwlock;
492 ulint type;
493 ulint writer;
494
495 type = cell->request_type;
496
497 fprintf(file,
498 "--Thread %lu has waited at %s line %lu"
499 " for %.2f seconds the semaphore:\n",
500 (ulong) os_thread_pf(cell->thread_id),
501 innobase_basename(cell->file), (ulong) cell->line,
502 difftime(time(NULL), cell->reservation_time));
503
504 if (type == SYNC_MUTEX) {
505 WaitMutex* mutex = cell->latch.mutex;
506 const WaitMutex::MutexPolicy& policy = mutex->policy();
507#ifdef UNIV_DEBUG
508 const char* name = policy.get_enter_filename();
509 if (name == NULL) {
510 /* The mutex might have been released. */
511 name = "NULL";
512 }
513#endif /* UNIV_DEBUG */
514
515 if (mutex) {
516 fprintf(file,
517 "Mutex at %p, %s, lock var %x\n"
518#ifdef UNIV_DEBUG
519 "Last time reserved in file %s line %u"
520#endif /* UNIV_DEBUG */
521 "\n",
522 (void*) mutex,
523 policy.to_string().c_str(),
524 mutex->state()
525#ifdef UNIV_DEBUG
526 ,name,
527 policy.get_enter_line()
528#endif /* UNIV_DEBUG */
529 );
530 }
531 } else if (type == SYNC_BUF_BLOCK) {
532 BlockWaitMutex* mutex = cell->latch.bpmutex;
533
534 const BlockWaitMutex::MutexPolicy& policy =
535 mutex->policy();
536#ifdef UNIV_DEBUG
537 const char* name = policy.get_enter_filename();
538 if (name == NULL) {
539 /* The mutex might have been released. */
540 name = "NULL";
541 }
542#endif /* UNIV_DEBUG */
543
544 fprintf(file,
545 "Mutex at %p, %s, lock var %lu\n"
546#ifdef UNIV_DEBUG
547 "Last time reserved in file %s line %lu"
548#endif /* UNIV_DEBUG */
549 "\n",
550 (void*) mutex,
551 policy.to_string().c_str(),
552 (ulong) mutex->state()
553#ifdef UNIV_DEBUG
554 ,name,
555 (ulong) policy.get_enter_line()
556#endif /* UNIV_DEBUG */
557 );
558 } else if (type == RW_LOCK_X
559 || type == RW_LOCK_X_WAIT
560 || type == RW_LOCK_SX
561 || type == RW_LOCK_S) {
562
563 fputs(type == RW_LOCK_X ? "X-lock on"
564 : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
565 : type == RW_LOCK_SX ? "SX-lock on"
566 : "S-lock on", file);
567
568 rwlock = cell->latch.lock;
569
570 if (rwlock) {
571 fprintf(file,
572 " RW-latch at %p created in file %s line %u\n",
573 (void*) rwlock, innobase_basename(rwlock->cfile_name),
574 rwlock->cline);
575
576 writer = rw_lock_get_writer(rwlock);
577
578 if (writer != RW_LOCK_NOT_LOCKED) {
579
580 fprintf(file,
581 "a writer (thread id " ULINTPF ") has"
582 " reserved it in mode %s",
583 os_thread_pf(rwlock->writer_thread),
584 writer == RW_LOCK_X ? " exclusive\n"
585 : writer == RW_LOCK_SX ? " SX\n"
586 : " wait exclusive\n");
587 }
588
589 fprintf(file,
590 "number of readers " ULINTPF
591 ", waiters flag %d, "
592 "lock_word: %x\n"
593 "Last time write locked in file %s line %u"
594#if 0 /* JAN: TODO: FIX LATER */
595 "\nHolder thread " ULINTPF
596 " file %s line " ULINTPF
597#endif
598 "\n",
599 rw_lock_get_reader_count(rwlock),
600 my_atomic_load32_explicit(&rwlock->waiters, MY_MEMORY_ORDER_RELAXED),
601 my_atomic_load32_explicit(&rwlock->lock_word, MY_MEMORY_ORDER_RELAXED),
602 innobase_basename(rwlock->last_x_file_name),
603 rwlock->last_x_line
604#if 0 /* JAN: TODO: FIX LATER */
605 , os_thread_pf(rwlock->thread_id),
606 innobase_basename(rwlock->file_name),
607 rwlock->line
608#endif
609 );
610 }
611
612 } else {
613 ut_error;
614 }
615
616 if (!cell->waiting) {
617 fputs("wait has ended\n", file);
618 }
619}
620
621#ifdef UNIV_DEBUG
622/******************************************************************//**
623Looks for a cell with the given thread id.
624@return pointer to cell or NULL if not found */
625static
626sync_cell_t*
627sync_array_find_thread(
628/*===================*/
629 sync_array_t* arr, /*!< in: wait array */
630 os_thread_id_t thread) /*!< in: thread id */
631{
632 ulint i;
633
634 for (i = 0; i < arr->n_cells; i++) {
635 sync_cell_t* cell;
636
637 cell = sync_array_get_nth_cell(arr, i);
638
639 if (cell->latch.mutex != NULL
640 && os_thread_eq(cell->thread_id, thread)) {
641
642 return(cell); /* Found */
643 }
644 }
645
646 return(NULL); /* Not found */
647}
648
649/******************************************************************//**
650Recursion step for deadlock detection.
651@return TRUE if deadlock detected */
652static
653ibool
654sync_array_deadlock_step(
655/*=====================*/
656 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
657 own the mutex to array */
658 sync_cell_t* start, /*!< in: cell where recursive search
659 started */
660 os_thread_id_t thread, /*!< in: thread to look at */
661 ulint pass, /*!< in: pass value */
662 ulint depth) /*!< in: recursion depth */
663{
664 sync_cell_t* new_cell;
665
666 if (pass != 0) {
667 /* If pass != 0, then we do not know which threads are
668 responsible of releasing the lock, and no deadlock can
669 be detected. */
670
671 return(FALSE);
672 }
673
674 new_cell = sync_array_find_thread(arr, thread);
675
676 if (new_cell == start) {
677 /* Deadlock */
678 fputs("########################################\n"
679 "DEADLOCK of threads detected!\n", stderr);
680
681 return(TRUE);
682
683 } else if (new_cell) {
684 return(sync_array_detect_deadlock(
685 arr, start, new_cell, depth + 1));
686 }
687 return(FALSE);
688}
689
690/**
691Report an error to stderr.
692@param lock rw-lock instance
693@param debug rw-lock debug information
694@param cell thread context */
695static
696void
697sync_array_report_error(
698 rw_lock_t* lock,
699 rw_lock_debug_t* debug,
700 sync_cell_t* cell)
701{
702 fprintf(stderr, "rw-lock %p ", (void*) lock);
703 sync_array_cell_print(stderr, cell);
704 rw_lock_debug_print(stderr, debug);
705}
706
707/******************************************************************//**
708This function is called only in the debug version. Detects a deadlock
709of one or more threads because of waits of semaphores.
710@return TRUE if deadlock detected */
711static
712bool
713sync_array_detect_deadlock(
714/*=======================*/
715 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
716 own the mutex to array */
717 sync_cell_t* start, /*!< in: cell where recursive search started */
718 sync_cell_t* cell, /*!< in: cell to search */
719 ulint depth) /*!< in: recursion depth */
720{
721 rw_lock_t* lock;
722 os_thread_id_t thread;
723 ibool ret;
724 rw_lock_debug_t*debug;
725
726 ut_a(arr);
727 ut_a(start);
728 ut_a(cell);
729 ut_ad(cell->latch.mutex != 0);
730 ut_ad(os_thread_get_curr_id() == start->thread_id);
731 ut_ad(depth < 100);
732
733 depth++;
734
735 if (!cell->waiting) {
736 /* No deadlock here */
737 return(false);
738 }
739
740 switch (cell->request_type) {
741 case SYNC_MUTEX: {
742
743 WaitMutex* mutex = cell->latch.mutex;
744 const WaitMutex::MutexPolicy& policy = mutex->policy();
745
746 if (mutex->state() != MUTEX_STATE_UNLOCKED) {
747 thread = policy.get_thread_id();
748
749 /* Note that mutex->thread_id above may be
750 also OS_THREAD_ID_UNDEFINED, because the
751 thread which held the mutex maybe has not
752 yet updated the value, or it has already
753 released the mutex: in this case no deadlock
754 can occur, as the wait array cannot contain
755 a thread with ID_UNDEFINED value. */
756 ret = sync_array_deadlock_step(
757 arr, start, thread, 0, depth);
758
759 if (ret) {
760 const char* name;
761
762 name = policy.get_enter_filename();
763
764 if (name == NULL) {
765 /* The mutex might have been
766 released. */
767 name = "NULL";
768 }
769
770 ib::info()
771 << "Mutex " << mutex << " owned by"
772 " thread " << os_thread_pf(thread)
773 << " file " << name << " line "
774 << policy.get_enter_line();
775
776 sync_array_cell_print(stderr, cell);
777
778 return(true);
779 }
780 }
781
782 /* No deadlock */
783 return(false);
784 }
785
786 case SYNC_BUF_BLOCK: {
787
788 BlockWaitMutex* mutex = cell->latch.bpmutex;
789
790 const BlockWaitMutex::MutexPolicy& policy =
791 mutex->policy();
792
793 if (mutex->state() != MUTEX_STATE_UNLOCKED) {
794 thread = policy.get_thread_id();
795
796 /* Note that mutex->thread_id above may be
797 also OS_THREAD_ID_UNDEFINED, because the
798 thread which held the mutex maybe has not
799 yet updated the value, or it has already
800 released the mutex: in this case no deadlock
801 can occur, as the wait array cannot contain
802 a thread with ID_UNDEFINED value. */
803 ret = sync_array_deadlock_step(
804 arr, start, thread, 0, depth);
805
806 if (ret) {
807 const char* name;
808
809 name = policy.get_enter_filename();
810
811 if (name == NULL) {
812 /* The mutex might have been
813 released. */
814 name = "NULL";
815 }
816
817 ib::info()
818 << "Mutex " << mutex << " owned by"
819 " thread " << os_thread_pf(thread)
820 << " file " << name << " line "
821 << policy.get_enter_line();
822
823
824 return(true);
825 }
826 }
827
828 /* No deadlock */
829 return(false);
830 }
831
832 case RW_LOCK_X:
833 case RW_LOCK_X_WAIT:
834
835 lock = cell->latch.lock;
836
837 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
838 debug != NULL;
839 debug = UT_LIST_GET_NEXT(list, debug)) {
840
841 thread = debug->thread_id;
842
843 switch (debug->lock_type) {
844 case RW_LOCK_X:
845 case RW_LOCK_SX:
846 case RW_LOCK_X_WAIT:
847 if (os_thread_eq(thread, cell->thread_id)) {
848 break;
849 }
850 /* fall through */
851 case RW_LOCK_S:
852
853 /* The (wait) x-lock request can block
854 infinitely only if someone (can be also cell
855 thread) is holding s-lock, or someone
856 (cannot be cell thread) (wait) x-lock or
857 sx-lock, and he is blocked by start thread */
858
859 ret = sync_array_deadlock_step(
860 arr, start, thread, debug->pass,
861 depth);
862
863 if (ret) {
864 sync_array_report_error(
865 lock, debug, cell);
866 rw_lock_debug_print(stderr, debug);
867 return(TRUE);
868 }
869 }
870 }
871
872 return(false);
873
874 case RW_LOCK_SX:
875
876 lock = cell->latch.lock;
877
878 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
879 debug != 0;
880 debug = UT_LIST_GET_NEXT(list, debug)) {
881
882 thread = debug->thread_id;
883
884 switch (debug->lock_type) {
885 case RW_LOCK_X:
886 case RW_LOCK_SX:
887 case RW_LOCK_X_WAIT:
888
889 if (os_thread_eq(thread, cell->thread_id)) {
890 break;
891 }
892
893 /* The sx-lock request can block infinitely
894 only if someone (can be also cell thread) is
895 holding (wait) x-lock or sx-lock, and he is
896 blocked by start thread */
897
898 ret = sync_array_deadlock_step(
899 arr, start, thread, debug->pass,
900 depth);
901
902 if (ret) {
903 sync_array_report_error(
904 lock, debug, cell);
905 return(TRUE);
906 }
907 }
908 }
909
910 return(false);
911
912 case RW_LOCK_S:
913
914 lock = cell->latch.lock;
915
916 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
917 debug != 0;
918 debug = UT_LIST_GET_NEXT(list, debug)) {
919
920 thread = debug->thread_id;
921
922 if (debug->lock_type == RW_LOCK_X
923 || debug->lock_type == RW_LOCK_X_WAIT) {
924
925 /* The s-lock request can block infinitely
926 only if someone (can also be cell thread) is
927 holding (wait) x-lock, and he is blocked by
928 start thread */
929
930 ret = sync_array_deadlock_step(
931 arr, start, thread, debug->pass,
932 depth);
933
934 if (ret) {
935 sync_array_report_error(
936 lock, debug, cell);
937 return(TRUE);
938 }
939 }
940 }
941
942 return(false);
943
944 default:
945 ut_error;
946 }
947
948 return(true);
949}
950#endif /* UNIV_DEBUG */
951
952/**********************************************************************//**
953Increments the signalled count. */
954void
955sync_array_object_signalled()
956/*=========================*/
957{
958 ++sg_count;
959}
960
961/**********************************************************************//**
962Prints warnings of long semaphore waits to stderr.
963@return TRUE if fatal semaphore wait threshold was exceeded */
964static
965bool
966sync_array_print_long_waits_low(
967/*============================*/
968 sync_array_t* arr, /*!< in: sync array instance */
969 os_thread_id_t* waiter, /*!< out: longest waiting thread */
970 const void** sema, /*!< out: longest-waited-for semaphore */
971 ibool* noticed)/*!< out: TRUE if long wait noticed */
972{
973 ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
974 ibool fatal = FALSE;
975 double longest_diff = 0;
976 ulint i;
977
978 /* For huge tables, skip the check during CHECK TABLE etc... */
979 if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
980 return(false);
981 }
982
983#ifdef UNIV_DEBUG_VALGRIND
984 /* Increase the timeouts if running under valgrind because it executes
985 extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that
986 we are running under valgrind but we have no better way to tell.
987 See Bug#58432 innodb.innodb_bug56143 fails under valgrind
988 for an example */
989# define SYNC_ARRAY_TIMEOUT 2400
990 fatal_timeout *= 10;
991#else
992# define SYNC_ARRAY_TIMEOUT 240
993#endif
994
995 for (ulint i = 0; i < arr->n_cells; i++) {
996
997 sync_cell_t* cell;
998 void* latch;
999
1000 cell = sync_array_get_nth_cell(arr, i);
1001
1002 latch = cell->latch.mutex;
1003
1004 if (latch == NULL || !cell->waiting) {
1005
1006 continue;
1007 }
1008
1009 double diff = difftime(time(NULL), cell->reservation_time);
1010
1011 if (diff > SYNC_ARRAY_TIMEOUT) {
1012 ib::warn() << "A long semaphore wait:";
1013 sync_array_cell_print(stderr, cell);
1014 *noticed = TRUE;
1015 }
1016
1017 if (diff > fatal_timeout) {
1018 fatal = TRUE;
1019 }
1020
1021 if (diff > longest_diff) {
1022 longest_diff = diff;
1023 *sema = latch;
1024 *waiter = cell->thread_id;
1025 }
1026 }
1027
1028 /* We found a long semaphore wait, print all threads that are
1029 waiting for a semaphore. */
1030 if (*noticed) {
1031 for (i = 0; i < arr->n_cells; i++) {
1032 void* wait_object;
1033 sync_cell_t* cell;
1034
1035 cell = sync_array_get_nth_cell(arr, i);
1036
1037 wait_object = cell->latch.mutex;
1038
1039 if (wait_object == NULL || !cell->waiting) {
1040
1041 continue;
1042 }
1043
1044 ib::info() << "A semaphore wait:";
1045 sync_array_cell_print(stderr, cell);
1046 }
1047 }
1048
1049#undef SYNC_ARRAY_TIMEOUT
1050
1051 return(fatal);
1052}
1053
1054/**********************************************************************//**
1055Prints warnings of long semaphore waits to stderr.
1056@return TRUE if fatal semaphore wait threshold was exceeded */
1057ibool
1058sync_array_print_long_waits(
1059/*========================*/
1060 os_thread_id_t* waiter, /*!< out: longest waiting thread */
1061 const void** sema) /*!< out: longest-waited-for semaphore */
1062{
1063 ulint i;
1064 ibool fatal = FALSE;
1065 ibool noticed = FALSE;
1066
1067 for (i = 0; i < sync_array_size; ++i) {
1068
1069 sync_array_t* arr = sync_wait_array[i];
1070
1071 sync_array_enter(arr);
1072
1073 if (sync_array_print_long_waits_low(
1074 arr, waiter, sema, &noticed)) {
1075
1076 fatal = TRUE;
1077 }
1078
1079 sync_array_exit(arr);
1080 }
1081
1082 if (noticed) {
1083 fprintf(stderr,
1084 "InnoDB: ###### Starts InnoDB Monitor"
1085 " for 30 secs to print diagnostic info:\n");
1086
1087 my_bool old_val = srv_print_innodb_monitor;
1088
1089 /* If some crucial semaphore is reserved, then also the InnoDB
1090 Monitor can hang, and we do not get diagnostics. Since in
1091 many cases an InnoDB hang is caused by a pwrite() or a pread()
1092 call hanging inside the operating system, let us print right
1093 now the values of pending calls of these. */
1094
1095 fprintf(stderr,
1096 "InnoDB: Pending reads " UINT64PF
1097 ", writes " UINT64PF "\n",
1098 MONITOR_VALUE(MONITOR_OS_PENDING_READS),
1099 MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
1100
1101 srv_print_innodb_monitor = TRUE;
1102
1103 lock_set_timeout_event();
1104
1105 os_thread_sleep(30000000);
1106
1107 srv_print_innodb_monitor = static_cast<my_bool>(old_val);
1108 fprintf(stderr,
1109 "InnoDB: ###### Diagnostic info printed"
1110 " to the standard error stream\n");
1111 }
1112
1113 return(fatal);
1114}
1115
1116/**********************************************************************//**
1117Prints info of the wait array. */
1118static
1119void
1120sync_array_print_info_low(
1121/*======================*/
1122 FILE* file, /*!< in: file where to print */
1123 sync_array_t* arr) /*!< in: wait array */
1124{
1125 ulint i;
1126 ulint count = 0;
1127
1128 fprintf(file,
1129 "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
1130 arr->res_count);
1131
1132 for (i = 0; count < arr->n_reserved; ++i) {
1133 sync_cell_t* cell;
1134
1135 cell = sync_array_get_nth_cell(arr, i);
1136
1137 if (cell->latch.mutex != 0) {
1138 count++;
1139 sync_array_cell_print(file, cell);
1140 }
1141 }
1142}
1143
1144/**********************************************************************//**
1145Prints info of the wait array. */
1146static
1147void
1148sync_array_print_info(
1149/*==================*/
1150 FILE* file, /*!< in: file where to print */
1151 sync_array_t* arr) /*!< in: wait array */
1152{
1153 sync_array_enter(arr);
1154
1155 sync_array_print_info_low(file, arr);
1156
1157 sync_array_exit(arr);
1158}
1159
1160/** Create the primary system wait arrays */
1161void sync_array_init()
1162{
1163 ut_a(sync_wait_array == NULL);
1164 ut_a(srv_sync_array_size > 0);
1165 ut_a(srv_max_n_threads > 0);
1166
1167 sync_array_size = srv_sync_array_size;
1168
1169 sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
1170
1171 ulint n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
1172
1173 for (ulint i = 0; i < sync_array_size; ++i) {
1174
1175 sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
1176 }
1177}
1178
1179/** Destroy the sync array wait sub-system. */
1180void sync_array_close()
1181{
1182 for (ulint i = 0; i < sync_array_size; ++i) {
1183 sync_array_free(sync_wait_array[i]);
1184 }
1185
1186 UT_DELETE_ARRAY(sync_wait_array);
1187 sync_wait_array = NULL;
1188}
1189
1190/**********************************************************************//**
1191Print info about the sync array(s). */
1192void
1193sync_array_print(
1194/*=============*/
1195 FILE* file) /*!< in/out: Print to this stream */
1196{
1197 for (ulint i = 0; i < sync_array_size; ++i) {
1198 sync_array_print_info(file, sync_wait_array[i]);
1199 }
1200
1201 fprintf(file,
1202 "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
1203
1204}
1205
1206/**********************************************************************//**
1207Prints info of the wait array without using any mutexes/semaphores. */
1208UNIV_INTERN
1209void
1210sync_array_print_innodb(void)
1211/*=========================*/
1212{
1213 ulint i;
1214 sync_array_t* arr = sync_array_get();
1215
1216 fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr);
1217
1218 for (i = 0; i < arr->n_cells; i++) {
1219 void* wait_object;
1220 sync_cell_t* cell;
1221
1222 cell = sync_array_get_nth_cell(arr, i);
1223
1224 wait_object = cell->latch.mutex;
1225
1226 if (wait_object == NULL || !cell->waiting) {
1227
1228 continue;
1229 }
1230
1231 fputs("InnoDB: Warning: semaphore wait:\n",
1232 stderr);
1233 sync_array_cell_print(stderr, cell);
1234 }
1235
1236 fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
1237
1238}
1239
1240/**********************************************************************//**
1241Get number of items on sync array. */
1242UNIV_INTERN
1243ulint
1244sync_arr_get_n_items(void)
1245/*======================*/
1246{
1247 sync_array_t* sync_arr = sync_array_get();
1248 return (ulint) sync_arr->n_cells;
1249}
1250
1251/******************************************************************//**
1252Get specified item from sync array if it is reserved. Set given
1253pointer to array item if it is reserved.
1254@return true if item is reserved, false othervise */
1255UNIV_INTERN
1256ibool
1257sync_arr_get_item(
1258/*==============*/
1259 ulint i, /*!< in: requested item */
1260 sync_cell_t **cell) /*!< out: cell contents if item
1261 reserved */
1262{
1263 sync_array_t* sync_arr;
1264 sync_cell_t* wait_cell;
1265 void* wait_object;
1266 ibool found = FALSE;
1267
1268 sync_arr = sync_array_get();
1269 wait_cell = sync_array_get_nth_cell(sync_arr, i);
1270
1271 if (wait_cell) {
1272 wait_object = wait_cell->latch.mutex;
1273
1274 if(wait_object != NULL && wait_cell->waiting) {
1275 found = TRUE;
1276 *cell = wait_cell;
1277 }
1278 }
1279
1280 return found;
1281}
1282
1283/*******************************************************************//**
1284Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1285Loop through each item on sync array, and extract the column
1286information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1287@return 0 on success */
1288UNIV_INTERN
1289int
1290sync_arr_fill_sys_semphore_waits_table(
1291/*===================================*/
1292 THD* thd, /*!< in: thread */
1293 TABLE_LIST* tables, /*!< in/out: tables to fill */
1294 Item* ) /*!< in: condition (not used) */
1295{
1296 Field** fields;
1297 ulint n_items;
1298
1299 DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
1300 RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
1301
1302 /* deny access to user without PROCESS_ACL privilege */
1303 if (check_global_access(thd, PROCESS_ACL)) {
1304 DBUG_RETURN(0);
1305 }
1306
1307 fields = tables->table->field;
1308 n_items = sync_arr_get_n_items();
1309 ulint type;
1310
1311 for(ulint i=0; i < n_items;i++) {
1312 sync_cell_t *cell=NULL;
1313 if (sync_arr_get_item(i, &cell)) {
1314 WaitMutex* mutex;
1315 type = cell->request_type;
1316 /* JAN: FIXME
1317 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID],
1318 (longlong)os_thread_pf(cell->thread)));
1319 */
1320 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
1321 OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
1322 fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
1323 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (ulint)difftime(time(NULL), cell->reservation_time)));
1324
1325 if (type == SYNC_MUTEX) {
1326 mutex = static_cast<WaitMutex*>(cell->latch.mutex);
1327
1328 if (mutex) {
1329 // JAN: FIXME
1330 // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
1331 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex));
1332 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
1333 //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
1334 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
1335 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
1336 //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1337 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
1338 //OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
1339 //fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
1340 //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
1341 //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
1342 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
1343 //OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
1344 //fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1345 //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
1346 }
1347 } else if (type == RW_LOCK_X_WAIT
1348 || type == RW_LOCK_X
1349 || type == RW_LOCK_SX
1350 || type == RW_LOCK_S) {
1351 rw_lock_t* rwlock=NULL;
1352
1353 rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
1354
1355 if (rwlock) {
1356 ulint writer = rw_lock_get_writer(rwlock);
1357
1358 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock));
1359 if (type == RW_LOCK_X) {
1360 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
1361 } else if (type == RW_LOCK_X_WAIT) {
1362 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
1363 } else if (type == RW_LOCK_S) {
1364 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
1365 } else if (type == RW_LOCK_SX) {
1366 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
1367 }
1368
1369 if (writer != RW_LOCK_NOT_LOCKED) {
1370 // JAN: FIXME
1371 // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
1372 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread)));
1373
1374 if (writer == RW_LOCK_X) {
1375 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
1376 } else if (writer == RW_LOCK_X_WAIT) {
1377 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
1378 } else if (type == RW_LOCK_SX) {
1379 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
1380 }
1381
1382 //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
1383 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
1384 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
1385 //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1386 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock)));
1387 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG],
1388 my_atomic_load32_explicit(&rwlock->waiters, MY_MEMORY_ORDER_RELAXED)));
1389 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD],
1390 my_atomic_load32_explicit(&rwlock->lock_word, MY_MEMORY_ORDER_RELAXED)));
1391 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
1392 OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
1393 fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1394 OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], rwlock->count_os_wait));
1395 }
1396 }
1397 }
1398
1399 OK(schema_table_store_record(thd, tables->table));
1400 }
1401 }
1402
1403 DBUG_RETURN(0);
1404}
1405