| 1 | /* |
| 2 | Copyright (c) 2015, Facebook, Inc. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ |
| 16 | |
| 17 | #include <my_global.h> |
| 18 | |
| 19 | /* This C++ file's header file */ |
| 20 | #include "./rdb_mutex_wrapper.h" |
| 21 | |
| 22 | /* The following are for THD_ENTER_COND: */ |
| 23 | #define MYSQL_SERVER 1 |
| 24 | #include "sql_priv.h" |
| 25 | #include "my_decimal.h" |
| 26 | #include "sql_class.h" |
| 27 | //psergey-merge-todo: does MariaDB have/need: #include "../sql/replication.h" |
| 28 | |
| 29 | |
| 30 | /* MyRocks header files */ |
| 31 | #include "./ha_rocksdb.h" |
| 32 | #include "./rdb_utils.h" |
| 33 | |
| 34 | |
| 35 | using namespace rocksdb; |
| 36 | |
| 37 | namespace myrocks { |
| 38 | |
| 39 | static PSI_stage_info stage_waiting_on_row_lock2 = {0, "Waiting for row lock" , |
| 40 | 0}; |
| 41 | |
| 42 | static const int64_t ONE_SECOND_IN_MICROSECS = 1000 * 1000; |
| 43 | // A timeout as long as one full non-leap year worth of microseconds is as |
| 44 | // good as infinite timeout. |
| 45 | static const int64_t ONE_YEAR_IN_MICROSECS = |
| 46 | ONE_SECOND_IN_MICROSECS * 60 * 60 * 24 * 365; |
| 47 | |
| 48 | Rdb_cond_var::Rdb_cond_var() { mysql_cond_init(0, &m_cond, nullptr); } |
| 49 | |
| 50 | Rdb_cond_var::~Rdb_cond_var() { mysql_cond_destroy(&m_cond); } |
| 51 | |
| 52 | Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) { |
| 53 | return WaitFor(mutex_arg, ONE_YEAR_IN_MICROSECS); |
| 54 | } |
| 55 | |
| 56 | /* |
| 57 | @brief |
| 58 | Wait on condition variable. The caller must make sure that we own |
| 59 | *mutex_ptr. The mutex is released and re-acquired by the wait function. |
| 60 | |
| 61 | @param |
| 62 | timeout_micros Timeout in microseconds. Negative value means no timeout. |
| 63 | |
| 64 | @return |
| 65 | Status::OK() - Wait successfull |
| 66 | Status::TimedOut() - Timed out or wait killed (the caller can check |
| 67 | thd_killed() to determine which occurred) |
| 68 | */ |
| 69 | |
| 70 | Status |
| 71 | Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg, |
| 72 | int64_t timeout_micros) { |
| 73 | auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get()); |
| 74 | DBUG_ASSERT(mutex_obj != nullptr); |
| 75 | |
| 76 | mysql_mutex_t *const mutex_ptr = &mutex_obj->m_mutex; |
| 77 | |
| 78 | int res = 0; |
| 79 | struct timespec wait_timeout; |
| 80 | |
| 81 | if (timeout_micros < 0) |
| 82 | timeout_micros = ONE_YEAR_IN_MICROSECS; |
| 83 | set_timespec_nsec(wait_timeout, timeout_micros * 1000); |
| 84 | |
| 85 | #ifndef STANDALONE_UNITTEST |
| 86 | PSI_stage_info old_stage; |
| 87 | mysql_mutex_assert_owner(mutex_ptr); |
| 88 | |
| 89 | if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) { |
| 90 | THD_ENTER_COND(current_thd, &m_cond, mutex_ptr, &stage_waiting_on_row_lock2, |
| 91 | &old_stage); |
| 92 | /* |
| 93 | After the mysql_cond_timedwait we need make this call |
| 94 | |
| 95 | THD_EXIT_COND(thd, &old_stage); |
| 96 | |
| 97 | to inform the SQL layer that KILLable wait has ended. However, |
| 98 | that will cause mutex to be released. Defer the release until the mutex |
| 99 | that is unlocked by RocksDB's Pessimistic Transactions system. |
| 100 | */ |
| 101 | mutex_obj->set_unlock_action(&old_stage); |
| 102 | } |
| 103 | |
| 104 | #endif |
| 105 | bool killed = false; |
| 106 | |
| 107 | do { |
| 108 | res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout); |
| 109 | |
| 110 | #ifndef STANDALONE_UNITTEST |
| 111 | if (current_thd) |
| 112 | killed= thd_killed(current_thd); |
| 113 | #endif |
| 114 | } while (!killed && res == EINTR); |
| 115 | |
| 116 | if (res || killed) |
| 117 | return Status::TimedOut(); |
| 118 | else |
| 119 | return Status::OK(); |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | |
| 124 | @note |
| 125 | This function may be called while not holding the mutex that is used to wait |
| 126 | on the condition variable. |
| 127 | |
| 128 | The manual page says ( http://linux.die.net/man/3/pthread_cond_signal): |
| 129 | |
| 130 | The pthread_cond_broadcast() or pthread_cond_signal() functions may be called |
| 131 | by a thread whether or not it currently owns the mutex that threads calling |
| 132 | pthread_cond_wait() or pthread_cond_timedwait() have associated with the |
| 133 | condition variable during their waits; however, IF PREDICTABLE SCHEDULING |
| 134 | BEHAVIOR IS REQUIRED, THEN THAT MUTEX SHALL BE LOCKED by the thread calling |
| 135 | pthread_cond_broadcast() or pthread_cond_signal(). |
| 136 | |
| 137 | What's "predicate scheduling" and do we need it? The explanation is here: |
| 138 | |
| 139 | https://groups.google.com/forum/?hl=ky#!msg/comp.programming.threads/wEUgPq541v8/ZByyyS8acqMJ |
| 140 | "The problem (from the realtime side) with condition variables is that |
| 141 | if you can signal/broadcast without holding the mutex, and any thread |
| 142 | currently running can acquire an unlocked mutex and check a predicate |
| 143 | without reference to the condition variable, then you can have an |
| 144 | indirect priority inversion." |
| 145 | |
| 146 | Another possible consequence is that one can create spurious wake-ups when |
| 147 | there are multiple threads signaling the condition. |
| 148 | |
| 149 | None of this looks like a problem for our use case. |
| 150 | */ |
| 151 | |
| 152 | void Rdb_cond_var::Notify() { mysql_cond_signal(&m_cond); } |
| 153 | |
| 154 | /* |
| 155 | @note |
| 156 | This is called without holding the mutex that's used for waiting on the |
| 157 | condition. See ::Notify(). |
| 158 | */ |
| 159 | void Rdb_cond_var::NotifyAll() { mysql_cond_broadcast(&m_cond); } |
| 160 | |
| 161 | Rdb_mutex::Rdb_mutex() { |
| 162 | mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex, |
| 163 | MY_MUTEX_INIT_FAST); |
| 164 | } |
| 165 | |
| 166 | Rdb_mutex::~Rdb_mutex() { mysql_mutex_destroy(&m_mutex); } |
| 167 | |
| 168 | Status Rdb_mutex::Lock() { |
| 169 | RDB_MUTEX_LOCK_CHECK(m_mutex); |
| 170 | DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0); |
| 171 | return Status::OK(); |
| 172 | } |
| 173 | |
| 174 | // Attempt to acquire lock. If timeout is non-negative, operation may be |
| 175 | // failed after this many milliseconds. |
| 176 | // If implementing a custom version of this class, the implementation may |
| 177 | // choose to ignore the timeout. |
| 178 | // Return OK on success, or other Status on failure. |
| 179 | Status Rdb_mutex::TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) { |
| 180 | /* |
| 181 | Note: PThreads API has pthread_mutex_timedlock(), but mysql's |
| 182 | mysql_mutex_* wrappers do not wrap that function. |
| 183 | */ |
| 184 | RDB_MUTEX_LOCK_CHECK(m_mutex); |
| 185 | return Status::OK(); |
| 186 | } |
| 187 | |
| 188 | #ifndef STANDALONE_UNITTEST |
| 189 | void Rdb_mutex::set_unlock_action(const PSI_stage_info *const old_stage_arg) { |
| 190 | DBUG_ASSERT(old_stage_arg != nullptr); |
| 191 | |
| 192 | mysql_mutex_assert_owner(&m_mutex); |
| 193 | DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0); |
| 194 | |
| 195 | m_old_stage_info[current_thd] = |
| 196 | std::make_shared<PSI_stage_info>(*old_stage_arg); |
| 197 | } |
| 198 | #endif |
| 199 | |
| 200 | // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() |
| 201 | void Rdb_mutex::UnLock() { |
| 202 | #ifndef STANDALONE_UNITTEST |
| 203 | if (m_old_stage_info.count(current_thd) > 0) { |
| 204 | const std::shared_ptr<PSI_stage_info> old_stage = |
| 205 | m_old_stage_info[current_thd]; |
| 206 | m_old_stage_info.erase(current_thd); |
| 207 | /* The following will call mysql_mutex_unlock */ |
| 208 | THD_EXIT_COND(current_thd, old_stage.get()); |
| 209 | return; |
| 210 | } |
| 211 | #endif |
| 212 | RDB_MUTEX_UNLOCK_CHECK(m_mutex); |
| 213 | } |
| 214 | |
| 215 | } // namespace myrocks |
| 216 | |