1/*
2 Copyright (c) 2015, Facebook, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
17#include <my_global.h>
18
19/* This C++ file's header file */
20#include "./rdb_mutex_wrapper.h"
21
22/* The following are for THD_ENTER_COND: */
23#define MYSQL_SERVER 1
24#include "sql_priv.h"
25#include "my_decimal.h"
26#include "sql_class.h"
27//psergey-merge-todo: does MariaDB have/need: #include "../sql/replication.h"
28
29
30/* MyRocks header files */
31#include "./ha_rocksdb.h"
32#include "./rdb_utils.h"
33
34
35using namespace rocksdb;
36
37namespace myrocks {
38
39static PSI_stage_info stage_waiting_on_row_lock2 = {0, "Waiting for row lock",
40 0};
41
42static const int64_t ONE_SECOND_IN_MICROSECS = 1000 * 1000;
43// A timeout as long as one full non-leap year worth of microseconds is as
44// good as infinite timeout.
45static const int64_t ONE_YEAR_IN_MICROSECS =
46 ONE_SECOND_IN_MICROSECS * 60 * 60 * 24 * 365;
47
48Rdb_cond_var::Rdb_cond_var() { mysql_cond_init(0, &m_cond, nullptr); }
49
50Rdb_cond_var::~Rdb_cond_var() { mysql_cond_destroy(&m_cond); }
51
52Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) {
53 return WaitFor(mutex_arg, ONE_YEAR_IN_MICROSECS);
54}
55
56/*
57 @brief
58 Wait on condition variable. The caller must make sure that we own
59 *mutex_ptr. The mutex is released and re-acquired by the wait function.
60
61 @param
62 timeout_micros Timeout in microseconds. Negative value means no timeout.
63
64 @return
65 Status::OK() - Wait successfull
66 Status::TimedOut() - Timed out or wait killed (the caller can check
67 thd_killed() to determine which occurred)
68*/
69
70Status
71Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg,
72 int64_t timeout_micros) {
73 auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get());
74 DBUG_ASSERT(mutex_obj != nullptr);
75
76 mysql_mutex_t *const mutex_ptr = &mutex_obj->m_mutex;
77
78 int res = 0;
79 struct timespec wait_timeout;
80
81 if (timeout_micros < 0)
82 timeout_micros = ONE_YEAR_IN_MICROSECS;
83 set_timespec_nsec(wait_timeout, timeout_micros * 1000);
84
85#ifndef STANDALONE_UNITTEST
86 PSI_stage_info old_stage;
87 mysql_mutex_assert_owner(mutex_ptr);
88
89 if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) {
90 THD_ENTER_COND(current_thd, &m_cond, mutex_ptr, &stage_waiting_on_row_lock2,
91 &old_stage);
92 /*
93 After the mysql_cond_timedwait we need make this call
94
95 THD_EXIT_COND(thd, &old_stage);
96
97 to inform the SQL layer that KILLable wait has ended. However,
98 that will cause mutex to be released. Defer the release until the mutex
99 that is unlocked by RocksDB's Pessimistic Transactions system.
100 */
101 mutex_obj->set_unlock_action(&old_stage);
102 }
103
104#endif
105 bool killed = false;
106
107 do {
108 res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout);
109
110#ifndef STANDALONE_UNITTEST
111 if (current_thd)
112 killed= thd_killed(current_thd);
113#endif
114 } while (!killed && res == EINTR);
115
116 if (res || killed)
117 return Status::TimedOut();
118 else
119 return Status::OK();
120}
121
122/*
123
124 @note
125 This function may be called while not holding the mutex that is used to wait
126 on the condition variable.
127
128 The manual page says ( http://linux.die.net/man/3/pthread_cond_signal):
129
130 The pthread_cond_broadcast() or pthread_cond_signal() functions may be called
131 by a thread whether or not it currently owns the mutex that threads calling
132 pthread_cond_wait() or pthread_cond_timedwait() have associated with the
133 condition variable during their waits; however, IF PREDICTABLE SCHEDULING
134 BEHAVIOR IS REQUIRED, THEN THAT MUTEX SHALL BE LOCKED by the thread calling
135 pthread_cond_broadcast() or pthread_cond_signal().
136
137 What's "predicate scheduling" and do we need it? The explanation is here:
138
139 https://groups.google.com/forum/?hl=ky#!msg/comp.programming.threads/wEUgPq541v8/ZByyyS8acqMJ
140 "The problem (from the realtime side) with condition variables is that
141 if you can signal/broadcast without holding the mutex, and any thread
142 currently running can acquire an unlocked mutex and check a predicate
143 without reference to the condition variable, then you can have an
144 indirect priority inversion."
145
146 Another possible consequence is that one can create spurious wake-ups when
147 there are multiple threads signaling the condition.
148
149 None of this looks like a problem for our use case.
150*/
151
152void Rdb_cond_var::Notify() { mysql_cond_signal(&m_cond); }
153
154/*
155 @note
156 This is called without holding the mutex that's used for waiting on the
157 condition. See ::Notify().
158*/
159void Rdb_cond_var::NotifyAll() { mysql_cond_broadcast(&m_cond); }
160
161Rdb_mutex::Rdb_mutex() {
162 mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex,
163 MY_MUTEX_INIT_FAST);
164}
165
166Rdb_mutex::~Rdb_mutex() { mysql_mutex_destroy(&m_mutex); }
167
168Status Rdb_mutex::Lock() {
169 RDB_MUTEX_LOCK_CHECK(m_mutex);
170 DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
171 return Status::OK();
172}
173
174// Attempt to acquire lock. If timeout is non-negative, operation may be
175// failed after this many milliseconds.
176// If implementing a custom version of this class, the implementation may
177// choose to ignore the timeout.
178// Return OK on success, or other Status on failure.
179Status Rdb_mutex::TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) {
180 /*
181 Note: PThreads API has pthread_mutex_timedlock(), but mysql's
182 mysql_mutex_* wrappers do not wrap that function.
183 */
184 RDB_MUTEX_LOCK_CHECK(m_mutex);
185 return Status::OK();
186}
187
188#ifndef STANDALONE_UNITTEST
189void Rdb_mutex::set_unlock_action(const PSI_stage_info *const old_stage_arg) {
190 DBUG_ASSERT(old_stage_arg != nullptr);
191
192 mysql_mutex_assert_owner(&m_mutex);
193 DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
194
195 m_old_stage_info[current_thd] =
196 std::make_shared<PSI_stage_info>(*old_stage_arg);
197}
198#endif
199
200// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
201void Rdb_mutex::UnLock() {
202#ifndef STANDALONE_UNITTEST
203 if (m_old_stage_info.count(current_thd) > 0) {
204 const std::shared_ptr<PSI_stage_info> old_stage =
205 m_old_stage_info[current_thd];
206 m_old_stage_info.erase(current_thd);
207 /* The following will call mysql_mutex_unlock */
208 THD_EXIT_COND(current_thd, old_stage.get());
209 return;
210 }
211#endif
212 RDB_MUTEX_UNLOCK_CHECK(m_mutex);
213}
214
215} // namespace myrocks
216