| 1 | #include "RWLock.h" | 
|---|
| 2 | #include <Common/Stopwatch.h> | 
|---|
| 3 | #include <Common/Exception.h> | 
|---|
| 4 | #include <Common/CurrentMetrics.h> | 
|---|
| 5 | #include <Common/ProfileEvents.h> | 
|---|
| 6 |  | 
|---|
| 7 |  | 
|---|
| 8 | namespace ProfileEvents | 
|---|
| 9 | { | 
|---|
| 10 | extern const Event RWLockAcquiredReadLocks; | 
|---|
| 11 | extern const Event RWLockAcquiredWriteLocks; | 
|---|
| 12 | extern const Event RWLockReadersWaitMilliseconds; | 
|---|
| 13 | extern const Event RWLockWritersWaitMilliseconds; | 
|---|
| 14 | } | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | namespace CurrentMetrics | 
|---|
| 18 | { | 
|---|
| 19 | extern const Metric RWLockWaitingReaders; | 
|---|
| 20 | extern const Metric RWLockWaitingWriters; | 
|---|
| 21 | extern const Metric RWLockActiveReaders; | 
|---|
| 22 | extern const Metric RWLockActiveWriters; | 
|---|
| 23 | } | 
|---|
| 24 |  | 
|---|
| 25 |  | 
|---|
| 26 | namespace DB | 
|---|
| 27 | { | 
|---|
| 28 |  | 
|---|
| 29 | namespace ErrorCodes | 
|---|
| 30 | { | 
|---|
| 31 | extern const int LOGICAL_ERROR; | 
|---|
| 32 | extern const int DEADLOCK_AVOIDED; | 
|---|
| 33 | } | 
|---|
| 34 |  | 
|---|
| 35 |  | 
|---|
| 36 | /** A single-use object that represents lock's ownership | 
|---|
| 37 | * For the purpose of exception safety guarantees LockHolder is to be used in two steps: | 
|---|
| 38 | *   1. Create an instance (allocating all the memory needed) | 
|---|
| 39 | *   2. Associate the instance with the lock (attach to the lock and locking request group) | 
|---|
| 40 | */ | 
|---|
| 41 | class RWLockImpl::LockHolderImpl | 
|---|
| 42 | { | 
|---|
| 43 | bool bound{false}; | 
|---|
| 44 | Type lock_type; | 
|---|
| 45 | String query_id; | 
|---|
| 46 | CurrentMetrics::Increment active_client_increment; | 
|---|
| 47 | RWLock parent; | 
|---|
| 48 | GroupsContainer::iterator it_group; | 
|---|
| 49 |  | 
|---|
| 50 | public: | 
|---|
| 51 | LockHolderImpl(const LockHolderImpl & other) = delete; | 
|---|
| 52 | LockHolderImpl& operator=(const LockHolderImpl & other) = delete; | 
|---|
| 53 |  | 
|---|
| 54 | /// Implicit memory allocation for query_id is done here | 
|---|
| 55 | LockHolderImpl(const String & query_id_, Type type) | 
|---|
| 56 | : lock_type{type}, query_id{query_id_}, | 
|---|
| 57 | active_client_increment{ | 
|---|
| 58 | type == Type::Read ? CurrentMetrics::RWLockActiveReaders : CurrentMetrics::RWLockActiveWriters} | 
|---|
| 59 | { | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | ~LockHolderImpl(); | 
|---|
| 63 |  | 
|---|
| 64 | private: | 
|---|
| 65 | /// A separate method which binds the lock holder to the owned lock | 
|---|
| 66 | /// N.B. It is very important that this method produces no allocations | 
|---|
| 67 | bool bind_with(RWLock && parent_, GroupsContainer::iterator it_group_) noexcept | 
|---|
| 68 | { | 
|---|
| 69 | if (bound) | 
|---|
| 70 | return false; | 
|---|
| 71 | it_group = it_group_; | 
|---|
| 72 | parent = std::move(parent_); | 
|---|
| 73 | ++it_group->refererrs; | 
|---|
| 74 | bound = true; | 
|---|
| 75 | return true; | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | friend class RWLockImpl; | 
|---|
| 79 | }; | 
|---|
| 80 |  | 
|---|
| 81 |  | 
|---|
| 82 | namespace | 
|---|
| 83 | { | 
|---|
| 84 | /// Global information about all read locks that query has. It is needed to avoid some type of deadlocks. | 
|---|
| 85 |  | 
|---|
| 86 | class QueryLockInfo | 
|---|
| 87 | { | 
|---|
| 88 | private: | 
|---|
| 89 | mutable std::mutex mutex; | 
|---|
| 90 | std::map<std::string, size_t> queries; | 
|---|
| 91 |  | 
|---|
| 92 | public: | 
|---|
| 93 | void add(const String & query_id) | 
|---|
| 94 | { | 
|---|
| 95 | std::lock_guard lock(mutex); | 
|---|
| 96 |  | 
|---|
| 97 | const auto res = queries.emplace(query_id, 1);  // may throw | 
|---|
| 98 | if (!res.second) | 
|---|
| 99 | ++res.first->second; | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | void remove(const String & query_id) noexcept | 
|---|
| 103 | { | 
|---|
| 104 | std::lock_guard lock(mutex); | 
|---|
| 105 |  | 
|---|
| 106 | const auto query_it = queries.find(query_id); | 
|---|
| 107 | if (query_it != queries.cend() && --query_it->second == 0) | 
|---|
| 108 | queries.erase(query_it); | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | void check(const String & query_id) const | 
|---|
| 112 | { | 
|---|
| 113 | std::lock_guard lock(mutex); | 
|---|
| 114 |  | 
|---|
| 115 | if (queries.find(query_id) != queries.cend()) | 
|---|
| 116 | throw Exception( "Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); | 
|---|
| 117 | } | 
|---|
| 118 | }; | 
|---|
| 119 |  | 
|---|
| 120 | QueryLockInfo all_read_locks; | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 |  | 
|---|
| 124 | /** To guarantee that we do not get any piece of our data corrupted: | 
|---|
| 125 | *   1. Perform all actions that include allocations before changing lock's internal state | 
|---|
| 126 | *   2. Roll back any changes that make the state inconsistent | 
|---|
| 127 | * | 
|---|
| 128 | * Note: "SM" in the commentaries below stands for STATE MODIFICATION | 
|---|
| 129 | */ | 
|---|
| 130 | RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id) | 
|---|
| 131 | { | 
|---|
| 132 | const bool request_has_query_id = query_id != NO_QUERY; | 
|---|
| 133 |  | 
|---|
| 134 | Stopwatch watch(CLOCK_MONOTONIC_COARSE); | 
|---|
| 135 | CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders | 
|---|
| 136 | : CurrentMetrics::RWLockWaitingWriters); | 
|---|
| 137 | auto finalize_metrics = [type, &watch] () | 
|---|
| 138 | { | 
|---|
| 139 | ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockAcquiredReadLocks | 
|---|
| 140 | : ProfileEvents::RWLockAcquiredWriteLocks); | 
|---|
| 141 | ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockReadersWaitMilliseconds | 
|---|
| 142 | : ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds()); | 
|---|
| 143 | }; | 
|---|
| 144 |  | 
|---|
| 145 | /// This object is placed above unique_lock, because it may lock in destructor. | 
|---|
| 146 | auto lock_holder = std::make_shared<LockHolderImpl>(query_id, type); | 
|---|
| 147 |  | 
|---|
| 148 | std::unique_lock lock(mutex); | 
|---|
| 149 |  | 
|---|
| 150 | /// The FastPath: | 
|---|
| 151 | /// Check if the same query_id already holds the required lock in which case we can proceed without waiting | 
|---|
| 152 | if (request_has_query_id) | 
|---|
| 153 | { | 
|---|
| 154 | const auto it_query = owner_queries.find(query_id); | 
|---|
| 155 | if (it_query != owner_queries.end()) | 
|---|
| 156 | { | 
|---|
| 157 | const auto current_owner_group = queue.begin(); | 
|---|
| 158 |  | 
|---|
| 159 | /// XXX: it means we can't upgrade lock from read to write! | 
|---|
| 160 | if (type == Write) | 
|---|
| 161 | throw Exception( | 
|---|
| 162 | "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked", | 
|---|
| 163 | ErrorCodes::LOGICAL_ERROR); | 
|---|
| 164 |  | 
|---|
| 165 | if (current_owner_group->type == Write) | 
|---|
| 166 | throw Exception( | 
|---|
| 167 | "RWLockImpl::getLock(): RWLock is already locked in exclusive mode", | 
|---|
| 168 | ErrorCodes::LOGICAL_ERROR); | 
|---|
| 169 |  | 
|---|
| 170 | /// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator | 
|---|
| 171 | all_read_locks.add(query_id);                                     /// SM1: may throw on insertion (nothing to roll back) | 
|---|
| 172 | ++it_query->second;                                               /// SM2: nothrow | 
|---|
| 173 | lock_holder->bind_with(shared_from_this(), current_owner_group);  /// SM3: nothrow | 
|---|
| 174 |  | 
|---|
| 175 | finalize_metrics(); | 
|---|
| 176 | return lock_holder; | 
|---|
| 177 | } | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /** If the query already has any active read lock and tries to acquire another read lock | 
|---|
| 181 | *  but it is not in front of the queue and has to wait, deadlock is possible: | 
|---|
| 182 | * | 
|---|
| 183 | * Example (four queries, two RWLocks - 'a' and 'b'): | 
|---|
| 184 | * | 
|---|
| 185 | *     --> time --> | 
|---|
| 186 | * | 
|---|
| 187 | * q1: ra          rb | 
|---|
| 188 | * q2:    wa | 
|---|
| 189 | * q3:       rb       ra | 
|---|
| 190 | * q4:          wb | 
|---|
| 191 | * | 
|---|
| 192 | * We will throw an exception instead. | 
|---|
| 193 | */ | 
|---|
| 194 |  | 
|---|
| 195 | if (type == Type::Write || queue.empty() || queue.back().type == Type::Write) | 
|---|
| 196 | { | 
|---|
| 197 | if (type == Type::Read && request_has_query_id && !queue.empty()) | 
|---|
| 198 | all_read_locks.check(query_id); | 
|---|
| 199 |  | 
|---|
| 200 | /// Create a new group of locking requests | 
|---|
| 201 | queue.emplace_back(type);                        /// SM1: may throw (nothing to roll back) | 
|---|
| 202 | } | 
|---|
| 203 | else if (request_has_query_id && queue.size() > 1) | 
|---|
| 204 | all_read_locks.check(query_id); | 
|---|
| 205 |  | 
|---|
| 206 | GroupsContainer::iterator it_group = std::prev(queue.end()); | 
|---|
| 207 |  | 
|---|
| 208 | /// We need to reference the associated group before waiting to guarantee | 
|---|
| 209 | /// that this group does not get deleted prematurely | 
|---|
| 210 | ++it_group->refererrs; | 
|---|
| 211 |  | 
|---|
| 212 | /// Wait a notification until we will be the only in the group. | 
|---|
| 213 | it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); }); | 
|---|
| 214 |  | 
|---|
| 215 | --it_group->refererrs; | 
|---|
| 216 |  | 
|---|
| 217 | if (request_has_query_id) | 
|---|
| 218 | { | 
|---|
| 219 | try | 
|---|
| 220 | { | 
|---|
| 221 | if (type == Type::Read) | 
|---|
| 222 | all_read_locks.add(query_id);              /// SM2: may throw on insertion | 
|---|
| 223 | ///      and is safe to roll back unconditionally | 
|---|
| 224 | const auto emplace_res = | 
|---|
| 225 | owner_queries.emplace(query_id, 1);    /// SM3: may throw on insertion | 
|---|
| 226 | if (!emplace_res.second) | 
|---|
| 227 | ++emplace_res.first->second;               /// SM4: nothrow | 
|---|
| 228 | } | 
|---|
| 229 | catch (...) | 
|---|
| 230 | { | 
|---|
| 231 | /// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety | 
|---|
| 232 | /// We only need to roll back the changes to these objects: all_read_locks and the locking queue | 
|---|
| 233 | if (type == Type::Read) | 
|---|
| 234 | all_read_locks.remove(query_id);           /// Rollback(SM2): nothrow | 
|---|
| 235 |  | 
|---|
| 236 | if (it_group->refererrs == 0) | 
|---|
| 237 | { | 
|---|
| 238 | const auto next = queue.erase(it_group);   /// Rollback(SM1): nothrow | 
|---|
| 239 | if (next != queue.end()) | 
|---|
| 240 | next->cv.notify_all(); | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | throw; | 
|---|
| 244 | } | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | lock_holder->bind_with(shared_from_this(), it_group);  /// SM: nothrow | 
|---|
| 248 |  | 
|---|
| 249 | finalize_metrics(); | 
|---|
| 250 | return lock_holder; | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 |  | 
|---|
| 254 | /** The sequence points of acquiring lock's ownership by an instance of LockHolderImpl: | 
|---|
| 255 | *   1. all_read_locks is updated | 
|---|
| 256 | *   2. owner_queries is updated | 
|---|
| 257 | *   3. request group is updated by LockHolderImpl which in turn becomes "bound" | 
|---|
| 258 | * | 
|---|
| 259 | * If by the time when destructor of LockHolderImpl is called the instance has been "bound", | 
|---|
| 260 | * it is guaranteed that all three steps have been executed successfully and the resulting state is consistent. | 
|---|
| 261 | * With the mutex locked the order of steps to restore the lock's state can be arbitrary | 
|---|
| 262 | * | 
|---|
| 263 | * We do not employ try-catch: if something bad happens, there is nothing we can do =( | 
|---|
| 264 | */ | 
|---|
| 265 | RWLockImpl::LockHolderImpl::~LockHolderImpl() | 
|---|
| 266 | { | 
|---|
| 267 | if (!bound || parent == nullptr) | 
|---|
| 268 | return; | 
|---|
| 269 |  | 
|---|
| 270 | std::lock_guard lock(parent->mutex); | 
|---|
| 271 |  | 
|---|
| 272 | /// The associated group must exist (and be the beginning of the queue?) | 
|---|
| 273 | if (parent->queue.empty() || it_group != parent->queue.begin()) | 
|---|
| 274 | return; | 
|---|
| 275 |  | 
|---|
| 276 | /// If query_id is not empty it must be listed in parent->owner_queries | 
|---|
| 277 | if (query_id != RWLockImpl::NO_QUERY) | 
|---|
| 278 | { | 
|---|
| 279 | const auto owner_it = parent->owner_queries.find(query_id); | 
|---|
| 280 | if (owner_it != parent->owner_queries.end()) | 
|---|
| 281 | { | 
|---|
| 282 | if (--owner_it->second == 0)                  /// SM: nothrow | 
|---|
| 283 | parent->owner_queries.erase(owner_it);    /// SM: nothrow | 
|---|
| 284 |  | 
|---|
| 285 | if (lock_type == RWLockImpl::Read) | 
|---|
| 286 | all_read_locks.remove(query_id);          /// SM: nothrow | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | /// If we are the last remaining referrer, remove the group and notify the next group | 
|---|
| 291 | if (--it_group->refererrs == 0)                       /// SM: nothrow | 
|---|
| 292 | { | 
|---|
| 293 | const auto next = parent->queue.erase(it_group);  /// SM: nothrow | 
|---|
| 294 | if (next != parent->queue.end()) | 
|---|
| 295 | next->cv.notify_all(); | 
|---|
| 296 | } | 
|---|
| 297 | } | 
|---|
| 298 |  | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|