| 1 | // Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. |
| 2 | // An overview, including benchmark results, is provided here: |
| 3 | // http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ |
| 4 | // The full design is also described in excruciating detail at: |
| 5 | // http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue |
| 6 | |
| 7 | // Simplified BSD license: |
| 8 | // Copyright (c) 2013-2016, Cameron Desrochers. |
| 9 | // All rights reserved. |
| 10 | // |
| 11 | // Redistribution and use in source and binary forms, with or without modification, |
| 12 | // are permitted provided that the following conditions are met: |
| 13 | // |
| 14 | // - Redistributions of source code must retain the above copyright notice, this list of |
| 15 | // conditions and the following disclaimer. |
| 16 | // - Redistributions in binary form must reproduce the above copyright notice, this list of |
| 17 | // conditions and the following disclaimer in the documentation and/or other materials |
| 18 | // provided with the distribution. |
| 19 | // |
| 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY |
| 21 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 22 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
| 23 | // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT |
| 25 | // OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 26 | // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR |
| 27 | // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
| 28 | // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | |
| 31 | #pragma once |
| 32 | |
| 33 | #if defined(__GNUC__) |
| 34 | // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and |
| 35 | // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings |
| 36 | // upon assigning any computed values) |
| 37 | |
| 38 | #endif |
| 39 | |
| 40 | #if defined(__APPLE__) |
| 41 | #include <TargetConditionals.h> |
| 42 | #endif |
| 43 | |
| 44 | #include <atomic> // Requires C++11. Sorry VS2010. |
| 45 | #include <cassert> |
| 46 | #include <cstddef> // for max_align_t |
| 47 | #include <cstdint> |
| 48 | #include <cstdlib> |
| 49 | #include <type_traits> |
| 50 | #include <algorithm> |
| 51 | #include <utility> |
| 52 | #include <limits> |
| 53 | #include <climits> // for CHAR_BIT |
| 54 | #include <array> |
| 55 | #include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading |
| 56 | |
| 57 | // Platform-specific definitions of a numeric thread ID type and an invalid value |
| 58 | namespace duckdb_moodycamel { namespace details { |
| 59 | template<typename thread_id_t> struct thread_id_converter { |
| 60 | typedef thread_id_t thread_id_numeric_size_t; |
| 61 | typedef thread_id_t thread_id_hash_t; |
| 62 | static thread_id_hash_t prehash(thread_id_t const& x) { return x; } |
| 63 | }; |
| 64 | } } |
| 65 | #if defined(MCDBGQ_USE_RELACY) |
| 66 | namespace duckdb_moodycamel { namespace details { |
| 67 | typedef std::uint32_t thread_id_t; |
| 68 | static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; |
| 69 | static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; |
| 70 | static inline thread_id_t thread_id() { return rl::thread_index(); } |
| 71 | } } |
| 72 | #elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) |
| 73 | // No sense pulling in windows.h in a header, we'll manually declare the function |
| 74 | // we use and rely on backwards-compatibility for this not to break |
| 75 | extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); |
| 76 | namespace duckdb_moodycamel { namespace details { |
| 77 | static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows" ); |
| 78 | typedef std::uint32_t thread_id_t; |
| 79 | static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx |
| 80 | static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. |
| 81 | static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); } |
| 82 | } } |
| 83 | #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) |
| 84 | namespace duckdb_moodycamel { namespace details { |
| 85 | static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes" ); |
| 86 | |
| 87 | typedef std::thread::id thread_id_t; |
| 88 | static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID |
| 89 | |
| 90 | // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's |
| 91 | // only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't |
| 92 | // be. |
| 93 | static inline thread_id_t thread_id() { return std::this_thread::get_id(); } |
| 94 | |
| 95 | template<std::size_t> struct thread_id_size { }; |
| 96 | template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; }; |
| 97 | template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; }; |
| 98 | |
| 99 | template<> struct thread_id_converter<thread_id_t> { |
| 100 | typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t; |
| 101 | #ifndef __APPLE__ |
| 102 | typedef std::size_t thread_id_hash_t; |
| 103 | #else |
| 104 | typedef thread_id_numeric_size_t thread_id_hash_t; |
| 105 | #endif |
| 106 | |
| 107 | static thread_id_hash_t prehash(thread_id_t const& x) |
| 108 | { |
| 109 | #ifndef __APPLE__ |
| 110 | return std::hash<std::thread::id>()(x); |
| 111 | #else |
| 112 | return *reinterpret_cast<thread_id_hash_t const*>(&x); |
| 113 | #endif |
| 114 | } |
| 115 | }; |
| 116 | } } |
| 117 | #else |
| 118 | // Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 |
| 119 | // In order to get a numeric thread ID in a platform-independent way, we use a thread-local |
| 120 | // static variable's address as a thread identifier :-) |
| 121 | #if defined(__GNUC__) || defined(__INTEL_COMPILER) |
| 122 | #define MOODYCAMEL_THREADLOCAL __thread |
| 123 | #elif defined(_MSC_VER) |
| 124 | #define MOODYCAMEL_THREADLOCAL __declspec(thread) |
| 125 | #else |
| 126 | // Assume C++11 compliant compiler |
| 127 | #define MOODYCAMEL_THREADLOCAL thread_local |
| 128 | #endif |
| 129 | namespace duckdb_moodycamel { namespace details { |
| 130 | typedef std::uintptr_t thread_id_t; |
| 131 | static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr |
| 132 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 133 | static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. |
| 134 | #endif |
| 135 | inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); } |
| 136 | } } |
| 137 | #endif |
| 138 | |
| 139 | // Constexpr if |
| 140 | #ifndef MOODYCAMEL_CONSTEXPR_IF |
| 141 | #if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L |
| 142 | #define MOODYCAMEL_CONSTEXPR_IF if constexpr |
| 143 | #define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] |
| 144 | #else |
| 145 | #define MOODYCAMEL_CONSTEXPR_IF if |
| 146 | #define MOODYCAMEL_MAYBE_UNUSED |
| 147 | #endif |
| 148 | #endif |
| 149 | |
| 150 | // Exceptions |
| 151 | #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED |
| 152 | #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) |
| 153 | #define MOODYCAMEL_EXCEPTIONS_ENABLED |
| 154 | #endif |
| 155 | #endif |
| 156 | #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED |
| 157 | #define MOODYCAMEL_TRY try |
| 158 | #define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__) |
| 159 | #define MOODYCAMEL_RETHROW throw |
| 160 | #define MOODYCAMEL_THROW(expr) throw (expr) |
| 161 | #else |
| 162 | #define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true) |
| 163 | #define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false) |
| 164 | #define MOODYCAMEL_RETHROW |
| 165 | #define MOODYCAMEL_THROW(expr) |
| 166 | #endif |
| 167 | |
| 168 | #ifndef MOODYCAMEL_NOEXCEPT |
| 169 | #if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) |
| 170 | #define MOODYCAMEL_NOEXCEPT |
| 171 | #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true |
| 172 | #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true |
| 173 | #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 |
| 174 | // VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-( |
| 175 | // We have to assume *all* non-trivial constructors may throw on VS2012! |
| 176 | #define MOODYCAMEL_NOEXCEPT _NOEXCEPT |
| 177 | #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value) |
| 178 | #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) |
| 179 | #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 |
| 180 | #define MOODYCAMEL_NOEXCEPT _NOEXCEPT |
| 181 | #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value) |
| 182 | #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) |
| 183 | #else |
| 184 | #define MOODYCAMEL_NOEXCEPT noexcept |
| 185 | #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) |
| 186 | #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) |
| 187 | #endif |
| 188 | #endif |
| 189 | |
| 190 | #ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 191 | #ifdef MCDBGQ_USE_RELACY |
| 192 | #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 193 | #else |
| 194 | // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 |
| 195 | // g++ <=4.7 doesn't support thread_local either. |
| 196 | // Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work |
| 197 | #if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) |
| 198 | // Assume `thread_local` is fully supported in all other C++11 compilers/platforms |
| 199 | //#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on |
| 200 | #endif |
| 201 | #endif |
| 202 | #endif |
| 203 | |
| 204 | // VS2012 doesn't support deleted functions. |
| 205 | // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. |
| 206 | #ifndef MOODYCAMEL_DELETE_FUNCTION |
| 207 | #if defined(_MSC_VER) && _MSC_VER < 1800 |
| 208 | #define MOODYCAMEL_DELETE_FUNCTION |
| 209 | #else |
| 210 | #define MOODYCAMEL_DELETE_FUNCTION = delete |
| 211 | #endif |
| 212 | #endif |
| 213 | |
| 214 | #ifndef MOODYCAMEL_ALIGNAS |
| 215 | // VS2013 doesn't support alignas or alignof |
| 216 | #if defined(_MSC_VER) && _MSC_VER <= 1800 |
| 217 | #define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) |
| 218 | #define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) |
| 219 | #else |
| 220 | #define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) |
| 221 | #define MOODYCAMEL_ALIGNOF(obj) alignof(obj) |
| 222 | #endif |
| 223 | #endif |
| 224 | |
| 225 | |
| 226 | |
| 227 | // Compiler-specific likely/unlikely hints |
| 228 | namespace duckdb_moodycamel { namespace details { |
| 229 | |
| 230 | #if defined(__GNUC__) |
| 231 | static inline bool (likely)(bool x) { return __builtin_expect((x), true); } |
| 232 | // static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } |
| 233 | #else |
| 234 | static inline bool (likely)(bool x) { return x; } |
| 235 | // static inline bool (unlikely)(bool x) { return x; } |
| 236 | #endif |
| 237 | } } |
| 238 | |
| 239 | namespace duckdb_moodycamel { |
| 240 | namespace details { |
| 241 | template<typename T> |
| 242 | struct const_numeric_max { |
| 243 | static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers" ); |
| 244 | static const T value = std::numeric_limits<T>::is_signed |
| 245 | ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1) |
| 246 | : static_cast<T>(-1); |
| 247 | }; |
| 248 | |
| 249 | #if defined(__GLIBCXX__) |
| 250 | typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while |
| 251 | #else |
| 252 | typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: |
| 253 | #endif |
| 254 | |
| 255 | // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting |
| 256 | // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. |
| 257 | typedef union { |
| 258 | std_max_align_t x; |
| 259 | long long y; |
| 260 | void* z; |
| 261 | } max_align_t; |
| 262 | } |
| 263 | |
| 264 | // Default traits for the ConcurrentQueue. To change some of the |
| 265 | // traits without re-implementing all of them, inherit from this |
| 266 | // struct and shadow the declarations you wish to be different; |
| 267 | // since the traits are used as a template type parameter, the |
| 268 | // shadowed declarations will be used where defined, and the defaults |
| 269 | // otherwise. |
| 270 | struct ConcurrentQueueDefaultTraits |
| 271 | { |
| 272 | // General-purpose size type. std::size_t is strongly recommended. |
| 273 | typedef std::size_t size_t; |
| 274 | |
| 275 | // The type used for the enqueue and dequeue indices. Must be at least as |
| 276 | // large as size_t. Should be significantly larger than the number of elements |
| 277 | // you expect to hold at once, especially if you have a high turnover rate; |
| 278 | // for example, on 32-bit x86, if you expect to have over a hundred million |
| 279 | // elements or pump several million elements through your queue in a very |
| 280 | // short space of time, using a 32-bit type *may* trigger a race condition. |
| 281 | // A 64-bit int type is recommended in that case, and in practice will |
| 282 | // prevent a race condition no matter the usage of the queue. Note that |
| 283 | // whether the queue is lock-free with a 64-int type depends on the whether |
| 284 | // std::atomic<std::uint64_t> is lock-free, which is platform-specific. |
| 285 | typedef std::size_t index_t; |
| 286 | |
| 287 | // Internally, all elements are enqueued and dequeued from multi-element |
| 288 | // blocks; this is the smallest controllable unit. If you expect few elements |
| 289 | // but many producers, a smaller block size should be favoured. For few producers |
| 290 | // and/or many elements, a larger block size is preferred. A sane default |
| 291 | // is provided. Must be a power of 2. |
| 292 | static const size_t BLOCK_SIZE = 32; |
| 293 | |
| 294 | // For explicit producers (i.e. when using a producer token), the block is |
| 295 | // checked for being empty by iterating through a list of flags, one per element. |
| 296 | // For large block sizes, this is too inefficient, and switching to an atomic |
| 297 | // counter-based approach is faster. The switch is made for block sizes strictly |
| 298 | // larger than this threshold. |
| 299 | static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; |
| 300 | |
| 301 | // How many full blocks can be expected for a single explicit producer? This should |
| 302 | // reflect that number's maximum for optimal performance. Must be a power of 2. |
| 303 | static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; |
| 304 | |
| 305 | // How many full blocks can be expected for a single implicit producer? This should |
| 306 | // reflect that number's maximum for optimal performance. Must be a power of 2. |
| 307 | static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; |
| 308 | |
| 309 | // The initial size of the hash table mapping thread IDs to implicit producers. |
| 310 | // Note that the hash is resized every time it becomes half full. |
| 311 | // Must be a power of two, and either 0 or at least 1. If 0, implicit production |
| 312 | // (using the enqueue methods without an explicit producer token) is disabled. |
| 313 | static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; |
| 314 | |
| 315 | // Controls the number of items that an explicit consumer (i.e. one with a token) |
| 316 | // must consume before it causes all consumers to rotate and move on to the next |
| 317 | // internal queue. |
| 318 | static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; |
| 319 | |
| 320 | // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. |
| 321 | // Enqueue operations that would cause this limit to be surpassed will fail. Note |
| 322 | // that this limit is enforced at the block level (for performance reasons), i.e. |
| 323 | // it's rounded up to the nearest block size. |
| 324 | static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value; |
| 325 | |
| 326 | |
| 327 | #ifndef MCDBGQ_USE_RELACY |
| 328 | // Memory allocation can be customized if needed. |
| 329 | // malloc should return nullptr on failure, and handle alignment like std::malloc. |
| 330 | #if defined(malloc) || defined(free) |
| 331 | // Gah, this is 2015, stop defining macros that break standard code already! |
| 332 | // Work around malloc/free being special macros: |
| 333 | static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } |
| 334 | static inline void WORKAROUND_free(void* ptr) { return free(ptr); } |
| 335 | static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } |
| 336 | static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } |
| 337 | #else |
| 338 | static inline void* malloc(size_t size) { return std::malloc(size: size); } |
| 339 | static inline void free(void* ptr) { return std::free(ptr: ptr); } |
| 340 | #endif |
| 341 | #else |
| 342 | // Debug versions when running under the Relacy race detector (ignore |
| 343 | // these in user code) |
| 344 | static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } |
| 345 | static inline void free(void* ptr) { return rl::rl_free(ptr, $); } |
| 346 | #endif |
| 347 | }; |
| 348 | |
| 349 | |
| 350 | // When producing or consuming many elements, the most efficient way is to: |
| 351 | // 1) Use one of the bulk-operation methods of the queue with a token |
| 352 | // 2) Failing that, use the bulk-operation methods without a token |
| 353 | // 3) Failing that, create a token and use that with the single-item methods |
| 354 | // 4) Failing that, use the single-parameter methods of the queue |
| 355 | // Having said that, don't create tokens willy-nilly -- ideally there should be |
| 356 | // a maximum of one token per thread (of each kind). |
| 357 | struct ProducerToken; |
| 358 | struct ConsumerToken; |
| 359 | |
| 360 | template<typename T, typename Traits> class ConcurrentQueue; |
| 361 | template<typename T, typename Traits> class BlockingConcurrentQueue; |
| 362 | class ConcurrentQueueTests; |
| 363 | |
| 364 | |
| 365 | namespace details |
| 366 | { |
| 367 | struct ConcurrentQueueProducerTypelessBase |
| 368 | { |
| 369 | ConcurrentQueueProducerTypelessBase* next; |
| 370 | std::atomic<bool> inactive; |
| 371 | ProducerToken* token; |
| 372 | |
| 373 | ConcurrentQueueProducerTypelessBase() |
| 374 | : next(nullptr), inactive(false), token(nullptr) |
| 375 | { |
| 376 | } |
| 377 | }; |
| 378 | |
| 379 | template<bool use32> struct _hash_32_or_64 { |
| 380 | static inline std::uint32_t hash(std::uint32_t h) |
| 381 | { |
| 382 | // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
| 383 | // Since the thread ID is already unique, all we really want to do is propagate that |
| 384 | // uniqueness evenly across all the bits, so that we can use a subset of the bits while |
| 385 | // reducing collisions significantly |
| 386 | h ^= h >> 16; |
| 387 | h *= 0x85ebca6b; |
| 388 | h ^= h >> 13; |
| 389 | h *= 0xc2b2ae35; |
| 390 | return h ^ (h >> 16); |
| 391 | } |
| 392 | }; |
| 393 | template<> struct _hash_32_or_64<1> { |
| 394 | static inline std::uint64_t hash(std::uint64_t h) |
| 395 | { |
| 396 | h ^= h >> 33; |
| 397 | h *= 0xff51afd7ed558ccd; |
| 398 | h ^= h >> 33; |
| 399 | h *= 0xc4ceb9fe1a85ec53; |
| 400 | return h ^ (h >> 33); |
| 401 | } |
| 402 | }; |
| 403 | template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; |
| 404 | |
| 405 | static inline size_t hash_thread_id(thread_id_t id) |
| 406 | { |
| 407 | static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values" ); |
| 408 | return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash( |
| 409 | h: thread_id_converter<thread_id_t>::prehash(x: id))); |
| 410 | } |
| 411 | |
| 412 | template<typename T> |
| 413 | static inline bool circular_less_than(T a, T b) |
| 414 | { |
| 415 | #ifdef _MSC_VER |
| 416 | #pragma warning(push) |
| 417 | #pragma warning(disable: 4554) |
| 418 | #endif |
| 419 | static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types" ); |
| 420 | return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1)); |
| 421 | #ifdef _MSC_VER |
| 422 | #pragma warning(pop) |
| 423 | #endif |
| 424 | } |
| 425 | |
| 426 | template<typename U> |
| 427 | static inline char* align_for(char* ptr) |
| 428 | { |
| 429 | const std::size_t alignment = std::alignment_of<U>::value; |
| 430 | return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment; |
| 431 | } |
| 432 | |
| 433 | template<typename T> |
| 434 | static inline T ceil_to_pow_2(T x) |
| 435 | { |
| 436 | static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types" ); |
| 437 | |
| 438 | // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 |
| 439 | --x; |
| 440 | x |= x >> 1; |
| 441 | x |= x >> 2; |
| 442 | x |= x >> 4; |
| 443 | for (std::size_t i = 1; i < sizeof(T); i <<= 1) { |
| 444 | x |= x >> (i << 3); |
| 445 | } |
| 446 | ++x; |
| 447 | return x; |
| 448 | } |
| 449 | |
| 450 | template<typename T> |
| 451 | static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right) |
| 452 | { |
| 453 | T temp = std::move(left.load(std::memory_order_relaxed)); |
| 454 | left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); |
| 455 | right.store(std::move(temp), std::memory_order_relaxed); |
| 456 | } |
| 457 | |
| 458 | template<typename T> |
| 459 | static inline T const& nomove(T const& x) |
| 460 | { |
| 461 | return x; |
| 462 | } |
| 463 | |
| 464 | template<bool Enable> |
| 465 | struct nomove_if |
| 466 | { |
| 467 | template<typename T> |
| 468 | static inline T const& eval(T const& x) |
| 469 | { |
| 470 | return x; |
| 471 | } |
| 472 | }; |
| 473 | |
| 474 | template<> |
| 475 | struct nomove_if<false> |
| 476 | { |
| 477 | template<typename U> |
| 478 | static inline auto eval(U&& x) |
| 479 | -> decltype(std::forward<U>(x)) |
| 480 | { |
| 481 | return std::forward<U>(x); |
| 482 | } |
| 483 | }; |
| 484 | |
| 485 | template<typename It> |
| 486 | static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) |
| 487 | { |
| 488 | return *it; |
| 489 | } |
| 490 | |
| 491 | #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) |
| 492 | template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { }; |
| 493 | #else |
| 494 | template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { }; |
| 495 | #endif |
| 496 | |
| 497 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 498 | #ifdef MCDBGQ_USE_RELACY |
| 499 | typedef RelacyThreadExitListener ThreadExitListener; |
| 500 | typedef RelacyThreadExitNotifier ThreadExitNotifier; |
| 501 | #else |
| 502 | struct ThreadExitListener |
| 503 | { |
| 504 | typedef void (*callback_t)(void*); |
| 505 | callback_t callback; |
| 506 | void* userData; |
| 507 | |
| 508 | ThreadExitListener* next; // reserved for use by the ThreadExitNotifier |
| 509 | }; |
| 510 | |
| 511 | |
| 512 | class ThreadExitNotifier |
| 513 | { |
| 514 | public: |
| 515 | static void subscribe(ThreadExitListener* listener) |
| 516 | { |
| 517 | auto& tlsInst = instance(); |
| 518 | listener->next = tlsInst.tail; |
| 519 | tlsInst.tail = listener; |
| 520 | } |
| 521 | |
| 522 | static void unsubscribe(ThreadExitListener* listener) |
| 523 | { |
| 524 | auto& tlsInst = instance(); |
| 525 | ThreadExitListener** prev = &tlsInst.tail; |
| 526 | for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { |
| 527 | if (ptr == listener) { |
| 528 | *prev = ptr->next; |
| 529 | break; |
| 530 | } |
| 531 | prev = &ptr->next; |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | private: |
| 536 | ThreadExitNotifier() : tail(nullptr) { } |
| 537 | ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; |
| 538 | ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; |
| 539 | |
| 540 | ~ThreadExitNotifier() |
| 541 | { |
| 542 | // This thread is about to exit, let everyone know! |
| 543 | assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined." ); |
| 544 | for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { |
| 545 | ptr->callback(ptr->userData); |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | // Thread-local |
| 550 | static inline ThreadExitNotifier& instance() |
| 551 | { |
| 552 | static thread_local ThreadExitNotifier notifier; |
| 553 | return notifier; |
| 554 | } |
| 555 | |
| 556 | private: |
| 557 | ThreadExitListener* tail; |
| 558 | }; |
| 559 | #endif |
| 560 | #endif |
| 561 | |
| 562 | template<typename T> struct static_is_lock_free_num { enum { value = 0 }; }; |
| 563 | template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; |
| 564 | template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; |
| 565 | template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; }; |
| 566 | template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; }; |
| 567 | template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; |
| 568 | template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> { }; |
| 569 | template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; |
| 570 | template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; |
| 571 | } |
| 572 | |
| 573 | |
| 574 | struct ProducerToken |
| 575 | { |
| 576 | template<typename T, typename Traits> |
| 577 | explicit ProducerToken(ConcurrentQueue<T, Traits>& queue); |
| 578 | |
| 579 | template<typename T, typename Traits> |
| 580 | explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue); |
| 581 | |
| 582 | ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT |
| 583 | : producer(other.producer) |
| 584 | { |
| 585 | other.producer = nullptr; |
| 586 | if (producer != nullptr) { |
| 587 | producer->token = this; |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT |
| 592 | { |
| 593 | swap(other); |
| 594 | return *this; |
| 595 | } |
| 596 | |
| 597 | void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT |
| 598 | { |
| 599 | std::swap(a&: producer, b&: other.producer); |
| 600 | if (producer != nullptr) { |
| 601 | producer->token = this; |
| 602 | } |
| 603 | if (other.producer != nullptr) { |
| 604 | other.producer->token = &other; |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | // A token is always valid unless: |
| 609 | // 1) Memory allocation failed during construction |
| 610 | // 2) It was moved via the move constructor |
| 611 | // (Note: assignment does a swap, leaving both potentially valid) |
| 612 | // 3) The associated queue was destroyed |
| 613 | // Note that if valid() returns true, that only indicates |
| 614 | // that the token is valid for use with a specific queue, |
| 615 | // but not which one; that's up to the user to track. |
| 616 | inline bool valid() const { return producer != nullptr; } |
| 617 | |
| 618 | ~ProducerToken() |
| 619 | { |
| 620 | if (producer != nullptr) { |
| 621 | producer->token = nullptr; |
| 622 | producer->inactive.store(i: true, m: std::memory_order_release); |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | // Disable copying and assignment |
| 627 | ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; |
| 628 | ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; |
| 629 | |
| 630 | private: |
| 631 | template<typename T, typename Traits> friend class ConcurrentQueue; |
| 632 | friend class ConcurrentQueueTests; |
| 633 | |
| 634 | protected: |
| 635 | details::ConcurrentQueueProducerTypelessBase* producer; |
| 636 | }; |
| 637 | |
| 638 | |
| 639 | struct ConsumerToken |
| 640 | { |
| 641 | template<typename T, typename Traits> |
| 642 | explicit ConsumerToken(ConcurrentQueue<T, Traits>& q); |
| 643 | |
| 644 | template<typename T, typename Traits> |
| 645 | explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q); |
| 646 | |
| 647 | ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT |
| 648 | : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) |
| 649 | { |
| 650 | } |
| 651 | |
| 652 | inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT |
| 653 | { |
| 654 | swap(other); |
| 655 | return *this; |
| 656 | } |
| 657 | |
| 658 | void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT |
| 659 | { |
| 660 | std::swap(a&: initialOffset, b&: other.initialOffset); |
| 661 | std::swap(a&: lastKnownGlobalOffset, b&: other.lastKnownGlobalOffset); |
| 662 | std::swap(a&: itemsConsumedFromCurrent, b&: other.itemsConsumedFromCurrent); |
| 663 | std::swap(a&: currentProducer, b&: other.currentProducer); |
| 664 | std::swap(a&: desiredProducer, b&: other.desiredProducer); |
| 665 | } |
| 666 | |
| 667 | // Disable copying and assignment |
| 668 | ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; |
| 669 | ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; |
| 670 | |
| 671 | private: |
| 672 | template<typename T, typename Traits> friend class ConcurrentQueue; |
| 673 | friend class ConcurrentQueueTests; |
| 674 | |
| 675 | private: // but shared with ConcurrentQueue |
| 676 | std::uint32_t initialOffset; |
| 677 | std::uint32_t lastKnownGlobalOffset; |
| 678 | std::uint32_t itemsConsumedFromCurrent; |
| 679 | details::ConcurrentQueueProducerTypelessBase* currentProducer; |
| 680 | details::ConcurrentQueueProducerTypelessBase* desiredProducer; |
| 681 | }; |
| 682 | |
| 683 | // Need to forward-declare this swap because it's in a namespace. |
| 684 | // See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces |
| 685 | template<typename T, typename Traits> |
| 686 | inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; |
| 687 | |
| 688 | |
| 689 | template<typename T, typename Traits = ConcurrentQueueDefaultTraits> |
| 690 | class ConcurrentQueue |
| 691 | { |
| 692 | public: |
| 693 | typedef ::duckdb_moodycamel::ProducerToken producer_token_t; |
| 694 | typedef ::duckdb_moodycamel::ConsumerToken consumer_token_t; |
| 695 | |
| 696 | typedef typename Traits::index_t index_t; |
| 697 | typedef typename Traits::size_t size_t; |
| 698 | |
| 699 | static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE); |
| 700 | static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); |
| 701 | static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE); |
| 702 | static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE); |
| 703 | static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); |
| 704 | static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); |
| 705 | #ifdef _MSC_VER |
| 706 | #pragma warning(push) |
| 707 | #pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) |
| 708 | #pragma warning(disable: 4309) // static_cast: Truncation of constant value |
| 709 | #endif |
| 710 | static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); |
| 711 | #ifdef _MSC_VER |
| 712 | #pragma warning(pop) |
| 713 | #endif |
| 714 | |
| 715 | static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type" ); |
| 716 | static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type" ); |
| 717 | static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t" ); |
| 718 | static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)" ); |
| 719 | static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)" ); |
| 720 | static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)" ); |
| 721 | static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)" ); |
| 722 | static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2" ); |
| 723 | static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)" ); |
| 724 | |
| 725 | public: |
| 726 | // Creates a queue with at least `capacity` element slots; note that the |
| 727 | // actual number of elements that can be inserted without additional memory |
| 728 | // allocation depends on the number of producers and the block size (e.g. if |
| 729 | // the block size is equal to `capacity`, only a single block will be allocated |
| 730 | // up-front, which means only a single producer will be able to enqueue elements |
| 731 | // without an extra allocation -- blocks aren't shared between producers). |
| 732 | // This method is not thread safe -- it is up to the user to ensure that the |
| 733 | // queue is fully constructed before it starts being used by other threads (this |
| 734 | // includes making the memory effects of construction visible, possibly with a |
| 735 | // memory barrier). |
| 736 | explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) |
| 737 | : producerListTail(nullptr), |
| 738 | producerCount(0), |
| 739 | initialBlockPoolIndex(0), |
| 740 | nextExplicitConsumerId(0), |
| 741 | globalExplicitConsumerOffset(0) |
| 742 | { |
| 743 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_relaxed); |
| 744 | populate_initial_implicit_producer_hash(); |
| 745 | populate_initial_block_list(blockCount: capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); |
| 746 | |
| 747 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 748 | // Track all the producers using a fully-resolved typed list for |
| 749 | // each kind; this makes it possible to debug them starting from |
| 750 | // the root queue object (otherwise wacky casts are needed that |
| 751 | // don't compile in the debugger's expression evaluator). |
| 752 | explicitProducers.store(nullptr, std::memory_order_relaxed); |
| 753 | implicitProducers.store(nullptr, std::memory_order_relaxed); |
| 754 | #endif |
| 755 | } |
| 756 | |
| 757 | // Computes the correct amount of pre-allocated blocks for you based |
| 758 | // on the minimum number of elements you want available at any given |
| 759 | // time, and the maximum concurrent number of each type of producer. |
| 760 | ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) |
| 761 | : producerListTail(nullptr), |
| 762 | producerCount(0), |
| 763 | initialBlockPoolIndex(0), |
| 764 | nextExplicitConsumerId(0), |
| 765 | globalExplicitConsumerOffset(0) |
| 766 | { |
| 767 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_relaxed); |
| 768 | populate_initial_implicit_producer_hash(); |
| 769 | size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); |
| 770 | populate_initial_block_list(blockCount: blocks); |
| 771 | |
| 772 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 773 | explicitProducers.store(nullptr, std::memory_order_relaxed); |
| 774 | implicitProducers.store(nullptr, std::memory_order_relaxed); |
| 775 | #endif |
| 776 | } |
| 777 | |
| 778 | // Note: The queue should not be accessed concurrently while it's |
| 779 | // being deleted. It's up to the user to synchronize this. |
| 780 | // This method is not thread safe. |
| 781 | ~ConcurrentQueue() |
| 782 | { |
| 783 | // Destroy producers |
| 784 | auto ptr = producerListTail.load(std::memory_order_relaxed); |
| 785 | while (ptr != nullptr) { |
| 786 | auto next = ptr->next_prod(); |
| 787 | if (ptr->token != nullptr) { |
| 788 | ptr->token->producer = nullptr; |
| 789 | } |
| 790 | destroy(ptr); |
| 791 | ptr = next; |
| 792 | } |
| 793 | |
| 794 | // Destroy implicit producer hash tables |
| 795 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { |
| 796 | auto hash = implicitProducerHash.load(std::memory_order_relaxed); |
| 797 | while (hash != nullptr) { |
| 798 | auto prev = hash->prev; |
| 799 | if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically |
| 800 | for (size_t i = 0; i != hash->capacity; ++i) { |
| 801 | hash->entries[i].~ImplicitProducerKVP(); |
| 802 | } |
| 803 | hash->~ImplicitProducerHash(); |
| 804 | (Traits::free)(hash); |
| 805 | } |
| 806 | hash = prev; |
| 807 | } |
| 808 | } |
| 809 | |
| 810 | // Destroy global free list |
| 811 | auto block = freeList.head_unsafe(); |
| 812 | while (block != nullptr) { |
| 813 | auto next = block->freeListNext.load(std::memory_order_relaxed); |
| 814 | if (block->dynamicallyAllocated) { |
| 815 | destroy(block); |
| 816 | } |
| 817 | block = next; |
| 818 | } |
| 819 | |
| 820 | // Destroy initial free list |
| 821 | destroy_array(initialBlockPool, initialBlockPoolSize); |
| 822 | } |
| 823 | |
| 824 | // Disable copying and copy assignment |
| 825 | ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; |
| 826 | ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; |
| 827 | |
| 828 | // Moving is supported, but note that it is *not* a thread-safe operation. |
| 829 | // Nobody can use the queue while it's being moved, and the memory effects |
| 830 | // of that move must be propagated to other threads before they can use it. |
| 831 | // Note: When a queue is moved, its tokens are still valid but can only be |
| 832 | // used with the destination queue (i.e. semantically they are moved along |
| 833 | // with the queue itself). |
| 834 | ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT |
| 835 | : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), |
| 836 | producerCount(other.producerCount.load(std::memory_order_relaxed)), |
| 837 | initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), |
| 838 | initialBlockPool(other.initialBlockPool), |
| 839 | initialBlockPoolSize(other.initialBlockPoolSize), |
| 840 | freeList(std::move(other.freeList)), |
| 841 | nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), |
| 842 | globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) |
| 843 | { |
| 844 | // Move the other one into this, and leave the other one as an empty queue |
| 845 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_relaxed); |
| 846 | populate_initial_implicit_producer_hash(); |
| 847 | swap_implicit_producer_hashes(other); |
| 848 | |
| 849 | other.producerListTail.store(nullptr, std::memory_order_relaxed); |
| 850 | other.producerCount.store(0, std::memory_order_relaxed); |
| 851 | other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); |
| 852 | other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); |
| 853 | |
| 854 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 855 | explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); |
| 856 | other.explicitProducers.store(nullptr, std::memory_order_relaxed); |
| 857 | implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); |
| 858 | other.implicitProducers.store(nullptr, std::memory_order_relaxed); |
| 859 | #endif |
| 860 | |
| 861 | other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); |
| 862 | other.initialBlockPoolSize = 0; |
| 863 | other.initialBlockPool = nullptr; |
| 864 | |
| 865 | reown_producers(); |
| 866 | } |
| 867 | |
| 868 | inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT |
| 869 | { |
| 870 | return swap_internal(other); |
| 871 | } |
| 872 | |
| 873 | // Swaps this queue's state with the other's. Not thread-safe. |
| 874 | // Swapping two queues does not invalidate their tokens, however |
| 875 | // the tokens that were created for one queue must be used with |
| 876 | // only the swapped queue (i.e. the tokens are tied to the |
| 877 | // queue's movable state, not the object itself). |
| 878 | inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT |
| 879 | { |
| 880 | swap_internal(other); |
| 881 | } |
| 882 | |
| 883 | private: |
| 884 | ConcurrentQueue& swap_internal(ConcurrentQueue& other) |
| 885 | { |
| 886 | if (this == &other) { |
| 887 | return *this; |
| 888 | } |
| 889 | |
| 890 | details::swap_relaxed(producerListTail, other.producerListTail); |
| 891 | details::swap_relaxed(producerCount, other.producerCount); |
| 892 | details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); |
| 893 | std::swap(initialBlockPool, other.initialBlockPool); |
| 894 | std::swap(initialBlockPoolSize, other.initialBlockPoolSize); |
| 895 | freeList.swap(other.freeList); |
| 896 | details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); |
| 897 | details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); |
| 898 | |
| 899 | swap_implicit_producer_hashes(other); |
| 900 | |
| 901 | reown_producers(); |
| 902 | other.reown_producers(); |
| 903 | |
| 904 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 905 | details::swap_relaxed(explicitProducers, other.explicitProducers); |
| 906 | details::swap_relaxed(implicitProducers, other.implicitProducers); |
| 907 | #endif |
| 908 | |
| 909 | return *this; |
| 910 | } |
| 911 | |
| 912 | public: |
| 913 | // Enqueues a single item (by copying it). |
| 914 | // Allocates memory if required. Only fails if memory allocation fails (or implicit |
| 915 | // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, |
| 916 | // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 917 | // Thread-safe. |
| 918 | inline bool enqueue(T const& item) |
| 919 | { |
| 920 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 921 | else return inner_enqueue<CanAlloc>(item); |
| 922 | } |
| 923 | |
| 924 | // Enqueues a single item (by moving it, if possible). |
| 925 | // Allocates memory if required. Only fails if memory allocation fails (or implicit |
| 926 | // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, |
| 927 | // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 928 | // Thread-safe. |
| 929 | inline bool enqueue(T&& item) |
| 930 | { |
| 931 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 932 | else return inner_enqueue<CanAlloc>(std::move(item)); |
| 933 | } |
| 934 | |
| 935 | // Enqueues a single item (by copying it) using an explicit producer token. |
| 936 | // Allocates memory if required. Only fails if memory allocation fails (or |
| 937 | // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 938 | // Thread-safe. |
| 939 | inline bool enqueue(producer_token_t const& token, T const& item) |
| 940 | { |
| 941 | return inner_enqueue<CanAlloc>(token, item); |
| 942 | } |
| 943 | |
| 944 | // Enqueues a single item (by moving it, if possible) using an explicit producer token. |
| 945 | // Allocates memory if required. Only fails if memory allocation fails (or |
| 946 | // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 947 | // Thread-safe. |
| 948 | inline bool enqueue(producer_token_t const& token, T&& item) |
| 949 | { |
| 950 | return inner_enqueue<CanAlloc>(token, std::move(item)); |
| 951 | } |
| 952 | |
| 953 | // Enqueues several items. |
| 954 | // Allocates memory if required. Only fails if memory allocation fails (or |
| 955 | // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE |
| 956 | // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 957 | // Note: Use std::make_move_iterator if the elements should be moved instead of copied. |
| 958 | // Thread-safe. |
| 959 | template<typename It> |
| 960 | bool enqueue_bulk(It itemFirst, size_t count) |
| 961 | { |
| 962 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 963 | else return inner_enqueue_bulk<CanAlloc>(itemFirst, count); |
| 964 | } |
| 965 | |
| 966 | // Enqueues several items using an explicit producer token. |
| 967 | // Allocates memory if required. Only fails if memory allocation fails |
| 968 | // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). |
| 969 | // Note: Use std::make_move_iterator if the elements should be moved |
| 970 | // instead of copied. |
| 971 | // Thread-safe. |
| 972 | template<typename It> |
| 973 | bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) |
| 974 | { |
| 975 | return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count); |
| 976 | } |
| 977 | |
| 978 | // Enqueues a single item (by copying it). |
| 979 | // Does not allocate memory. Fails if not enough room to enqueue (or implicit |
| 980 | // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE |
| 981 | // is 0). |
| 982 | // Thread-safe. |
| 983 | inline bool try_enqueue(T const& item) |
| 984 | { |
| 985 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 986 | else return inner_enqueue<CannotAlloc>(item); |
| 987 | } |
| 988 | |
| 989 | // Enqueues a single item (by moving it, if possible). |
| 990 | // Does not allocate memory (except for one-time implicit producer). |
| 991 | // Fails if not enough room to enqueue (or implicit production is |
| 992 | // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). |
| 993 | // Thread-safe. |
| 994 | inline bool try_enqueue(T&& item) |
| 995 | { |
| 996 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 997 | else return inner_enqueue<CannotAlloc>(std::move(item)); |
| 998 | } |
| 999 | |
| 1000 | // Enqueues a single item (by copying it) using an explicit producer token. |
| 1001 | // Does not allocate memory. Fails if not enough room to enqueue. |
| 1002 | // Thread-safe. |
| 1003 | inline bool try_enqueue(producer_token_t const& token, T const& item) |
| 1004 | { |
| 1005 | return inner_enqueue<CannotAlloc>(token, item); |
| 1006 | } |
| 1007 | |
| 1008 | // Enqueues a single item (by moving it, if possible) using an explicit producer token. |
| 1009 | // Does not allocate memory. Fails if not enough room to enqueue. |
| 1010 | // Thread-safe. |
| 1011 | inline bool try_enqueue(producer_token_t const& token, T&& item) |
| 1012 | { |
| 1013 | return inner_enqueue<CannotAlloc>(token, std::move(item)); |
| 1014 | } |
| 1015 | |
| 1016 | // Enqueues several items. |
| 1017 | // Does not allocate memory (except for one-time implicit producer). |
| 1018 | // Fails if not enough room to enqueue (or implicit production is |
| 1019 | // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). |
| 1020 | // Note: Use std::make_move_iterator if the elements should be moved |
| 1021 | // instead of copied. |
| 1022 | // Thread-safe. |
| 1023 | template<typename It> |
| 1024 | bool try_enqueue_bulk(It itemFirst, size_t count) |
| 1025 | { |
| 1026 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; |
| 1027 | else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count); |
| 1028 | } |
| 1029 | |
| 1030 | // Enqueues several items using an explicit producer token. |
| 1031 | // Does not allocate memory. Fails if not enough room to enqueue. |
| 1032 | // Note: Use std::make_move_iterator if the elements should be moved |
| 1033 | // instead of copied. |
| 1034 | // Thread-safe. |
| 1035 | template<typename It> |
| 1036 | bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) |
| 1037 | { |
| 1038 | return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count); |
| 1039 | } |
| 1040 | |
| 1041 | |
| 1042 | |
| 1043 | // Attempts to dequeue from the queue. |
| 1044 | // Returns false if all producer streams appeared empty at the time they |
| 1045 | // were checked (so, the queue is likely but not guaranteed to be empty). |
| 1046 | // Never allocates. Thread-safe. |
| 1047 | template<typename U> |
| 1048 | bool try_dequeue(U& item) |
| 1049 | { |
| 1050 | // Instead of simply trying each producer in turn (which could cause needless contention on the first |
| 1051 | // producer), we score them heuristically. |
| 1052 | size_t nonEmptyCount = 0; |
| 1053 | ProducerBase* best = nullptr; |
| 1054 | size_t bestSize = 0; |
| 1055 | for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { |
| 1056 | auto size = ptr->size_approx(); |
| 1057 | if (size > 0) { |
| 1058 | if (size > bestSize) { |
| 1059 | bestSize = size; |
| 1060 | best = ptr; |
| 1061 | } |
| 1062 | ++nonEmptyCount; |
| 1063 | } |
| 1064 | } |
| 1065 | |
| 1066 | // If there was at least one non-empty queue but it appears empty at the time |
| 1067 | // we try to dequeue from it, we need to make sure every queue's been tried |
| 1068 | if (nonEmptyCount > 0) { |
| 1069 | if ((details::likely)(x: best->dequeue(item))) { |
| 1070 | return true; |
| 1071 | } |
| 1072 | for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 1073 | if (ptr != best && ptr->dequeue(item)) { |
| 1074 | return true; |
| 1075 | } |
| 1076 | } |
| 1077 | } |
| 1078 | return false; |
| 1079 | } |
| 1080 | |
| 1081 | // Attempts to dequeue from the queue. |
| 1082 | // Returns false if all producer streams appeared empty at the time they |
| 1083 | // were checked (so, the queue is likely but not guaranteed to be empty). |
| 1084 | // This differs from the try_dequeue(item) method in that this one does |
| 1085 | // not attempt to reduce contention by interleaving the order that producer |
| 1086 | // streams are dequeued from. So, using this method can reduce overall throughput |
| 1087 | // under contention, but will give more predictable results in single-threaded |
| 1088 | // consumer scenarios. This is mostly only useful for internal unit tests. |
| 1089 | // Never allocates. Thread-safe. |
| 1090 | template<typename U> |
| 1091 | bool try_dequeue_non_interleaved(U& item) |
| 1092 | { |
| 1093 | for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 1094 | if (ptr->dequeue(item)) { |
| 1095 | return true; |
| 1096 | } |
| 1097 | } |
| 1098 | return false; |
| 1099 | } |
| 1100 | |
| 1101 | // Attempts to dequeue from the queue using an explicit consumer token. |
| 1102 | // Returns false if all producer streams appeared empty at the time they |
| 1103 | // were checked (so, the queue is likely but not guaranteed to be empty). |
| 1104 | // Never allocates. Thread-safe. |
| 1105 | template<typename U> |
| 1106 | bool try_dequeue(consumer_token_t& token, U& item) |
| 1107 | { |
| 1108 | // The idea is roughly as follows: |
| 1109 | // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less |
| 1110 | // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place |
| 1111 | // If there's no items where you're supposed to be, keep moving until you find a producer with some items |
| 1112 | // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it |
| 1113 | |
| 1114 | if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(m: std::memory_order_relaxed)) { |
| 1115 | if (!update_current_producer_after_rotation(token)) { |
| 1116 | return false; |
| 1117 | } |
| 1118 | } |
| 1119 | |
| 1120 | // If there was at least one non-empty queue but it appears empty at the time |
| 1121 | // we try to dequeue from it, we need to make sure every queue's been tried |
| 1122 | if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) { |
| 1123 | if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { |
| 1124 | globalExplicitConsumerOffset.fetch_add(i: 1, m: std::memory_order_relaxed); |
| 1125 | } |
| 1126 | return true; |
| 1127 | } |
| 1128 | |
| 1129 | auto tail = producerListTail.load(std::memory_order_acquire); |
| 1130 | auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); |
| 1131 | if (ptr == nullptr) { |
| 1132 | ptr = tail; |
| 1133 | } |
| 1134 | while (ptr != static_cast<ProducerBase*>(token.currentProducer)) { |
| 1135 | if (ptr->dequeue(item)) { |
| 1136 | token.currentProducer = ptr; |
| 1137 | token.itemsConsumedFromCurrent = 1; |
| 1138 | return true; |
| 1139 | } |
| 1140 | ptr = ptr->next_prod(); |
| 1141 | if (ptr == nullptr) { |
| 1142 | ptr = tail; |
| 1143 | } |
| 1144 | } |
| 1145 | return false; |
| 1146 | } |
| 1147 | |
| 1148 | // Attempts to dequeue several elements from the queue. |
| 1149 | // Returns the number of items actually dequeued. |
| 1150 | // Returns 0 if all producer streams appeared empty at the time they |
| 1151 | // were checked (so, the queue is likely but not guaranteed to be empty). |
| 1152 | // Never allocates. Thread-safe. |
| 1153 | template<typename It> |
| 1154 | size_t try_dequeue_bulk(It itemFirst, size_t max) |
| 1155 | { |
| 1156 | size_t count = 0; |
| 1157 | for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 1158 | count += ptr->dequeue_bulk(itemFirst, max - count); |
| 1159 | if (count == max) { |
| 1160 | break; |
| 1161 | } |
| 1162 | } |
| 1163 | return count; |
| 1164 | } |
| 1165 | |
| 1166 | // Attempts to dequeue several elements from the queue using an explicit consumer token. |
| 1167 | // Returns the number of items actually dequeued. |
| 1168 | // Returns 0 if all producer streams appeared empty at the time they |
| 1169 | // were checked (so, the queue is likely but not guaranteed to be empty). |
| 1170 | // Never allocates. Thread-safe. |
| 1171 | template<typename It> |
| 1172 | size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) |
| 1173 | { |
| 1174 | if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(m: std::memory_order_relaxed)) { |
| 1175 | if (!update_current_producer_after_rotation(token)) { |
| 1176 | return 0; |
| 1177 | } |
| 1178 | } |
| 1179 | |
| 1180 | size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max); |
| 1181 | if (count == max) { |
| 1182 | if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { |
| 1183 | globalExplicitConsumerOffset.fetch_add(i: 1, m: std::memory_order_relaxed); |
| 1184 | } |
| 1185 | return max; |
| 1186 | } |
| 1187 | token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count); |
| 1188 | max -= count; |
| 1189 | |
| 1190 | auto tail = producerListTail.load(std::memory_order_acquire); |
| 1191 | auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); |
| 1192 | if (ptr == nullptr) { |
| 1193 | ptr = tail; |
| 1194 | } |
| 1195 | while (ptr != static_cast<ProducerBase*>(token.currentProducer)) { |
| 1196 | auto dequeued = ptr->dequeue_bulk(itemFirst, max); |
| 1197 | count += dequeued; |
| 1198 | if (dequeued != 0) { |
| 1199 | token.currentProducer = ptr; |
| 1200 | token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued); |
| 1201 | } |
| 1202 | if (dequeued == max) { |
| 1203 | break; |
| 1204 | } |
| 1205 | max -= dequeued; |
| 1206 | ptr = ptr->next_prod(); |
| 1207 | if (ptr == nullptr) { |
| 1208 | ptr = tail; |
| 1209 | } |
| 1210 | } |
| 1211 | return count; |
| 1212 | } |
| 1213 | |
| 1214 | |
| 1215 | |
| 1216 | // Attempts to dequeue from a specific producer's inner queue. |
| 1217 | // If you happen to know which producer you want to dequeue from, this |
| 1218 | // is significantly faster than using the general-case try_dequeue methods. |
| 1219 | // Returns false if the producer's queue appeared empty at the time it |
| 1220 | // was checked (so, the queue is likely but not guaranteed to be empty). |
| 1221 | // Never allocates. Thread-safe. |
| 1222 | template<typename U> |
| 1223 | inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) |
| 1224 | { |
| 1225 | return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item); |
| 1226 | } |
| 1227 | |
| 1228 | // Attempts to dequeue several elements from a specific producer's inner queue. |
| 1229 | // Returns the number of items actually dequeued. |
| 1230 | // If you happen to know which producer you want to dequeue from, this |
| 1231 | // is significantly faster than using the general-case try_dequeue methods. |
| 1232 | // Returns 0 if the producer's queue appeared empty at the time it |
| 1233 | // was checked (so, the queue is likely but not guaranteed to be empty). |
| 1234 | // Never allocates. Thread-safe. |
| 1235 | template<typename It> |
| 1236 | inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) |
| 1237 | { |
| 1238 | return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max); |
| 1239 | } |
| 1240 | |
| 1241 | |
| 1242 | // Returns an estimate of the total number of elements currently in the queue. This |
| 1243 | // estimate is only accurate if the queue has completely stabilized before it is called |
| 1244 | // (i.e. all enqueue and dequeue operations have completed and their memory effects are |
| 1245 | // visible on the calling thread, and no further operations start while this method is |
| 1246 | // being called). |
| 1247 | // Thread-safe. |
| 1248 | size_t size_approx() const |
| 1249 | { |
| 1250 | size_t size = 0; |
| 1251 | for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 1252 | size += ptr->size_approx(); |
| 1253 | } |
| 1254 | return size; |
| 1255 | } |
| 1256 | |
| 1257 | |
| 1258 | // Returns true if the underlying atomic variables used by |
| 1259 | // the queue are lock-free (they should be on most platforms). |
| 1260 | // Thread-safe. |
| 1261 | static bool is_lock_free() |
| 1262 | { |
| 1263 | return |
| 1264 | details::static_is_lock_free<bool>::value == 2 && |
| 1265 | details::static_is_lock_free<size_t>::value == 2 && |
| 1266 | details::static_is_lock_free<std::uint32_t>::value == 2 && |
| 1267 | details::static_is_lock_free<index_t>::value == 2 && |
| 1268 | details::static_is_lock_free<void*>::value == 2 && |
| 1269 | details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2; |
| 1270 | } |
| 1271 | |
| 1272 | |
| 1273 | private: |
| 1274 | friend struct ProducerToken; |
| 1275 | friend struct ConsumerToken; |
| 1276 | struct ExplicitProducer; |
| 1277 | friend struct ExplicitProducer; |
| 1278 | struct ImplicitProducer; |
| 1279 | friend struct ImplicitProducer; |
| 1280 | friend class ConcurrentQueueTests; |
| 1281 | |
| 1282 | enum AllocationMode { CanAlloc, CannotAlloc }; |
| 1283 | |
| 1284 | |
| 1285 | /////////////////////////////// |
| 1286 | // Queue methods |
| 1287 | /////////////////////////////// |
| 1288 | |
| 1289 | template<AllocationMode canAlloc, typename U> |
| 1290 | inline bool inner_enqueue(producer_token_t const& token, U&& element) |
| 1291 | { |
| 1292 | return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element)); |
| 1293 | } |
| 1294 | |
| 1295 | template<AllocationMode canAlloc, typename U> |
| 1296 | inline bool inner_enqueue(U&& element) |
| 1297 | { |
| 1298 | auto producer = get_or_add_implicit_producer(); |
| 1299 | return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element)); |
| 1300 | } |
| 1301 | |
| 1302 | template<AllocationMode canAlloc, typename It> |
| 1303 | inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) |
| 1304 | { |
| 1305 | return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count); |
| 1306 | } |
| 1307 | |
| 1308 | template<AllocationMode canAlloc, typename It> |
| 1309 | inline bool inner_enqueue_bulk(It itemFirst, size_t count) |
| 1310 | { |
| 1311 | auto producer = get_or_add_implicit_producer(); |
| 1312 | return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count); |
| 1313 | } |
| 1314 | |
| 1315 | inline bool update_current_producer_after_rotation(consumer_token_t& token) |
| 1316 | { |
| 1317 | // Ah, there's been a rotation, figure out where we should be! |
| 1318 | auto tail = producerListTail.load(std::memory_order_acquire); |
| 1319 | if (token.desiredProducer == nullptr && tail == nullptr) { |
| 1320 | return false; |
| 1321 | } |
| 1322 | auto prodCount = producerCount.load(m: std::memory_order_relaxed); |
| 1323 | auto globalOffset = globalExplicitConsumerOffset.load(m: std::memory_order_relaxed); |
| 1324 | if (token.desiredProducer == nullptr) { |
| 1325 | // Aha, first time we're dequeueing anything. |
| 1326 | // Figure out our local position |
| 1327 | // Note: offset is from start, not end, but we're traversing from end -- subtract from count first |
| 1328 | std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); |
| 1329 | token.desiredProducer = tail; |
| 1330 | for (std::uint32_t i = 0; i != offset; ++i) { |
| 1331 | token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod(); |
| 1332 | if (token.desiredProducer == nullptr) { |
| 1333 | token.desiredProducer = tail; |
| 1334 | } |
| 1335 | } |
| 1336 | } |
| 1337 | |
| 1338 | std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; |
| 1339 | if (delta >= prodCount) { |
| 1340 | delta = delta % prodCount; |
| 1341 | } |
| 1342 | for (std::uint32_t i = 0; i != delta; ++i) { |
| 1343 | token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod(); |
| 1344 | if (token.desiredProducer == nullptr) { |
| 1345 | token.desiredProducer = tail; |
| 1346 | } |
| 1347 | } |
| 1348 | |
| 1349 | token.lastKnownGlobalOffset = globalOffset; |
| 1350 | token.currentProducer = token.desiredProducer; |
| 1351 | token.itemsConsumedFromCurrent = 0; |
| 1352 | return true; |
| 1353 | } |
| 1354 | |
| 1355 | |
| 1356 | /////////////////////////// |
| 1357 | // Free list |
| 1358 | /////////////////////////// |
| 1359 | |
| 1360 | template <typename N> |
| 1361 | struct FreeListNode |
| 1362 | { |
| 1363 | FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } |
| 1364 | |
| 1365 | std::atomic<std::uint32_t> freeListRefs; |
| 1366 | std::atomic<N*> freeListNext; |
| 1367 | }; |
| 1368 | |
| 1369 | // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but |
| 1370 | // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly |
| 1371 | // speedy under low contention. |
| 1372 | template<typename N> // N must inherit FreeListNode or have the same fields (and initialization of them) |
| 1373 | struct FreeList |
| 1374 | { |
| 1375 | FreeList() : freeListHead(nullptr) { } |
| 1376 | FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } |
| 1377 | void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } |
| 1378 | |
| 1379 | FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; |
| 1380 | FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; |
| 1381 | |
| 1382 | inline void add(N* node) |
| 1383 | { |
| 1384 | #ifdef MCDBGQ_NOLOCKFREE_FREELIST |
| 1385 | debug::DebugLock lock(mutex); |
| 1386 | #endif |
| 1387 | // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to |
| 1388 | // set it using a fetch_add |
| 1389 | if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { |
| 1390 | // Oh look! We were the last ones referencing this node, and we know |
| 1391 | // we want to add it to the free list, so let's do it! |
| 1392 | add_knowing_refcount_is_zero(node); |
| 1393 | } |
| 1394 | } |
| 1395 | |
| 1396 | inline N* try_get() |
| 1397 | { |
| 1398 | #ifdef MCDBGQ_NOLOCKFREE_FREELIST |
| 1399 | debug::DebugLock lock(mutex); |
| 1400 | #endif |
| 1401 | auto head = freeListHead.load(std::memory_order_acquire); |
| 1402 | while (head != nullptr) { |
| 1403 | auto prevHead = head; |
| 1404 | auto refs = head->freeListRefs.load(std::memory_order_relaxed); |
| 1405 | if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) { |
| 1406 | head = freeListHead.load(std::memory_order_acquire); |
| 1407 | continue; |
| 1408 | } |
| 1409 | |
| 1410 | // Good, reference count has been incremented (it wasn't at zero), which means we can read the |
| 1411 | // next and not worry about it changing between now and the time we do the CAS |
| 1412 | auto next = head->freeListNext.load(std::memory_order_relaxed); |
| 1413 | if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { |
| 1414 | // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no |
| 1415 | // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). |
| 1416 | assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); |
| 1417 | |
| 1418 | // Decrease refcount twice, once for our ref, and once for the list's ref |
| 1419 | head->freeListRefs.fetch_sub(2, std::memory_order_release); |
| 1420 | return head; |
| 1421 | } |
| 1422 | |
| 1423 | // OK, the head must have changed on us, but we still need to decrease the refcount we increased. |
| 1424 | // Note that we don't need to release any memory effects, but we do need to ensure that the reference |
| 1425 | // count decrement happens-after the CAS on the head. |
| 1426 | refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); |
| 1427 | if (refs == SHOULD_BE_ON_FREELIST + 1) { |
| 1428 | add_knowing_refcount_is_zero(node: prevHead); |
| 1429 | } |
| 1430 | } |
| 1431 | |
| 1432 | return nullptr; |
| 1433 | } |
| 1434 | |
| 1435 | // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) |
| 1436 | N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } |
| 1437 | |
| 1438 | private: |
| 1439 | inline void add_knowing_refcount_is_zero(N* node) |
| 1440 | { |
| 1441 | // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run |
| 1442 | // only one copy of this method per node at a time, i.e. the single thread case), then we know |
| 1443 | // we can safely change the next pointer of the node; however, once the refcount is back above |
| 1444 | // zero, then other threads could increase it (happens under heavy contention, when the refcount |
| 1445 | // goes to zero in between a load and a refcount increment of a node in try_get, then back up to |
| 1446 | // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS |
| 1447 | // to add the node to the actual list fails, decrease the refcount and leave the add operation to |
| 1448 | // the next thread who puts the refcount back at zero (which could be us, hence the loop). |
| 1449 | auto head = freeListHead.load(std::memory_order_relaxed); |
| 1450 | while (true) { |
| 1451 | node->freeListNext.store(head, std::memory_order_relaxed); |
| 1452 | node->freeListRefs.store(1, std::memory_order_release); |
| 1453 | if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { |
| 1454 | // Hmm, the add failed, but we can only try again when the refcount goes back to zero |
| 1455 | if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { |
| 1456 | continue; |
| 1457 | } |
| 1458 | } |
| 1459 | return; |
| 1460 | } |
| 1461 | } |
| 1462 | |
| 1463 | private: |
| 1464 | // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) |
| 1465 | std::atomic<N*> freeListHead; |
| 1466 | |
| 1467 | static const std::uint32_t REFS_MASK = 0x7FFFFFFF; |
| 1468 | static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; |
| 1469 | |
| 1470 | #ifdef MCDBGQ_NOLOCKFREE_FREELIST |
| 1471 | debug::DebugMutex mutex; |
| 1472 | #endif |
| 1473 | }; |
| 1474 | |
| 1475 | |
| 1476 | /////////////////////////// |
| 1477 | // Block |
| 1478 | /////////////////////////// |
| 1479 | |
| 1480 | enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; |
| 1481 | |
| 1482 | struct Block |
| 1483 | { |
| 1484 | Block() |
| 1485 | : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true) |
| 1486 | { |
| 1487 | #ifdef MCDBGQ_TRACKMEM |
| 1488 | owner = nullptr; |
| 1489 | #endif |
| 1490 | } |
| 1491 | |
| 1492 | template<InnerQueueContext context> |
| 1493 | inline bool is_empty() const |
| 1494 | { |
| 1495 | MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { |
| 1496 | // Check flags |
| 1497 | for (size_t i = 0; i < BLOCK_SIZE; ++i) { |
| 1498 | if (!emptyFlags[i].load(std::memory_order_relaxed)) { |
| 1499 | return false; |
| 1500 | } |
| 1501 | } |
| 1502 | |
| 1503 | // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set |
| 1504 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 1505 | return true; |
| 1506 | } |
| 1507 | else { |
| 1508 | // Check counter |
| 1509 | if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { |
| 1510 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 1511 | return true; |
| 1512 | } |
| 1513 | assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); |
| 1514 | return false; |
| 1515 | } |
| 1516 | } |
| 1517 | |
| 1518 | // Returns true if the block is now empty (does not apply in explicit context) |
| 1519 | template<InnerQueueContext context> |
| 1520 | inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) |
| 1521 | { |
| 1522 | MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { |
| 1523 | // Set flag |
| 1524 | assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); |
| 1525 | emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release); |
| 1526 | return false; |
| 1527 | } |
| 1528 | else { |
| 1529 | // Increment counter |
| 1530 | auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); |
| 1531 | assert(prevVal < BLOCK_SIZE); |
| 1532 | return prevVal == BLOCK_SIZE - 1; |
| 1533 | } |
| 1534 | } |
| 1535 | |
| 1536 | // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). |
| 1537 | // Returns true if the block is now empty (does not apply in explicit context). |
| 1538 | template<InnerQueueContext context> |
| 1539 | inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) |
| 1540 | { |
| 1541 | MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { |
| 1542 | // Set flags |
| 1543 | std::atomic_thread_fence(m: std::memory_order_release); |
| 1544 | i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1; |
| 1545 | for (size_t j = 0; j != count; ++j) { |
| 1546 | assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); |
| 1547 | emptyFlags[i + j].store(true, std::memory_order_relaxed); |
| 1548 | } |
| 1549 | return false; |
| 1550 | } |
| 1551 | else { |
| 1552 | // Increment counter |
| 1553 | auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); |
| 1554 | assert(prevVal + count <= BLOCK_SIZE); |
| 1555 | return prevVal + count == BLOCK_SIZE; |
| 1556 | } |
| 1557 | } |
| 1558 | |
| 1559 | template<InnerQueueContext context> |
| 1560 | inline void set_all_empty() |
| 1561 | { |
| 1562 | MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { |
| 1563 | // Set all flags |
| 1564 | for (size_t i = 0; i != BLOCK_SIZE; ++i) { |
| 1565 | emptyFlags[i].store(true, std::memory_order_relaxed); |
| 1566 | } |
| 1567 | } |
| 1568 | else { |
| 1569 | // Reset counter |
| 1570 | elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); |
| 1571 | } |
| 1572 | } |
| 1573 | |
| 1574 | template<InnerQueueContext context> |
| 1575 | inline void reset_empty() |
| 1576 | { |
| 1577 | MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { |
| 1578 | // Reset flags |
| 1579 | for (size_t i = 0; i != BLOCK_SIZE; ++i) { |
| 1580 | emptyFlags[i].store(false, std::memory_order_relaxed); |
| 1581 | } |
| 1582 | } |
| 1583 | else { |
| 1584 | // Reset counter |
| 1585 | elementsCompletelyDequeued.store(0, std::memory_order_relaxed); |
| 1586 | } |
| 1587 | } |
| 1588 | |
| 1589 | inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); } |
| 1590 | inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); } |
| 1591 | |
| 1592 | private: |
| 1593 | static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time" ); |
| 1594 | MOODYCAMEL_ALIGNAS(MOODYCAMEL_ALIGNOF(T)) char elements[sizeof(T) * BLOCK_SIZE]; |
| 1595 | public: |
| 1596 | Block* next; |
| 1597 | std::atomic<size_t> elementsCompletelyDequeued; |
| 1598 | std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; |
| 1599 | public: |
| 1600 | std::atomic<std::uint32_t> freeListRefs; |
| 1601 | std::atomic<Block*> freeListNext; |
| 1602 | std::atomic<bool> shouldBeOnFreeList; |
| 1603 | bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' |
| 1604 | |
| 1605 | #ifdef MCDBGQ_TRACKMEM |
| 1606 | void* owner; |
| 1607 | #endif |
| 1608 | }; |
| 1609 | static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping" ); |
| 1610 | |
| 1611 | |
| 1612 | #ifdef MCDBGQ_TRACKMEM |
| 1613 | public: |
| 1614 | struct MemStats; |
| 1615 | private: |
| 1616 | #endif |
| 1617 | |
| 1618 | /////////////////////////// |
| 1619 | // Producer base |
| 1620 | /////////////////////////// |
| 1621 | |
| 1622 | struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase |
| 1623 | { |
| 1624 | ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : |
| 1625 | tailIndex(0), |
| 1626 | headIndex(0), |
| 1627 | dequeueOptimisticCount(0), |
| 1628 | dequeueOvercommit(0), |
| 1629 | tailBlock(nullptr), |
| 1630 | isExplicit(isExplicit_), |
| 1631 | parent(parent_) |
| 1632 | { |
| 1633 | } |
| 1634 | |
| 1635 | virtual ~ProducerBase() { }; |
| 1636 | |
| 1637 | template<typename U> |
| 1638 | inline bool dequeue(U& element) |
| 1639 | { |
| 1640 | if (isExplicit) { |
| 1641 | return static_cast<ExplicitProducer*>(this)->dequeue(element); |
| 1642 | } |
| 1643 | else { |
| 1644 | return static_cast<ImplicitProducer*>(this)->dequeue(element); |
| 1645 | } |
| 1646 | } |
| 1647 | |
| 1648 | template<typename It> |
| 1649 | inline size_t dequeue_bulk(It& itemFirst, size_t max) |
| 1650 | { |
| 1651 | if (isExplicit) { |
| 1652 | return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max); |
| 1653 | } |
| 1654 | else { |
| 1655 | return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max); |
| 1656 | } |
| 1657 | } |
| 1658 | |
| 1659 | inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); } |
| 1660 | |
| 1661 | inline size_t size_approx() const |
| 1662 | { |
| 1663 | auto tail = tailIndex.load(std::memory_order_relaxed); |
| 1664 | auto head = headIndex.load(std::memory_order_relaxed); |
| 1665 | return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0; |
| 1666 | } |
| 1667 | |
| 1668 | inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } |
| 1669 | protected: |
| 1670 | std::atomic<index_t> tailIndex; // Where to enqueue to next |
| 1671 | std::atomic<index_t> headIndex; // Where to dequeue from next |
| 1672 | |
| 1673 | std::atomic<index_t> dequeueOptimisticCount; |
| 1674 | std::atomic<index_t> dequeueOvercommit; |
| 1675 | |
| 1676 | Block* tailBlock; |
| 1677 | |
| 1678 | public: |
| 1679 | bool isExplicit; |
| 1680 | ConcurrentQueue* parent; |
| 1681 | |
| 1682 | protected: |
| 1683 | #ifdef MCDBGQ_TRACKMEM |
| 1684 | friend struct MemStats; |
| 1685 | #endif |
| 1686 | }; |
| 1687 | |
| 1688 | |
| 1689 | /////////////////////////// |
| 1690 | // Explicit queue |
| 1691 | /////////////////////////// |
| 1692 | |
| 1693 | struct ExplicitProducer : public ProducerBase |
| 1694 | { |
| 1695 | explicit ExplicitProducer(ConcurrentQueue* parent_) : |
| 1696 | ProducerBase(parent_, true), |
| 1697 | blockIndex(nullptr), |
| 1698 | pr_blockIndexSlotsUsed(0), |
| 1699 | pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), |
| 1700 | pr_blockIndexFront(0), |
| 1701 | pr_blockIndexEntries(nullptr), |
| 1702 | pr_blockIndexRaw(nullptr) |
| 1703 | { |
| 1704 | size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; |
| 1705 | if (poolBasedIndexSize > pr_blockIndexSize) { |
| 1706 | pr_blockIndexSize = poolBasedIndexSize; |
| 1707 | } |
| 1708 | |
| 1709 | new_block_index(numberOfFilledSlotsToExpose: 0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE |
| 1710 | } |
| 1711 | |
| 1712 | ~ExplicitProducer() |
| 1713 | { |
| 1714 | // Destruct any elements not yet dequeued. |
| 1715 | // Since we're in the destructor, we can assume all elements |
| 1716 | // are either completely dequeued or completely not (no halfways). |
| 1717 | if (this->tailBlock != nullptr) { // Note this means there must be a block index too |
| 1718 | // First find the block that's partially dequeued, if any |
| 1719 | Block* halfDequeuedBlock = nullptr; |
| 1720 | if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) { |
| 1721 | // The head's not on a block boundary, meaning a block somewhere is partially dequeued |
| 1722 | // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) |
| 1723 | size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); |
| 1724 | while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) { |
| 1725 | i = (i + 1) & (pr_blockIndexSize - 1); |
| 1726 | } |
| 1727 | assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); |
| 1728 | halfDequeuedBlock = pr_blockIndexEntries[i].block; |
| 1729 | } |
| 1730 | |
| 1731 | // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) |
| 1732 | auto block = this->tailBlock; |
| 1733 | do { |
| 1734 | block = block->next; |
| 1735 | if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) { |
| 1736 | continue; |
| 1737 | } |
| 1738 | |
| 1739 | size_t i = 0; // Offset into block |
| 1740 | if (block == halfDequeuedBlock) { |
| 1741 | i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)); |
| 1742 | } |
| 1743 | |
| 1744 | // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index |
| 1745 | auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)); |
| 1746 | while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { |
| 1747 | (*block)[i++]->~T(); |
| 1748 | } |
| 1749 | } while (block != this->tailBlock); |
| 1750 | } |
| 1751 | |
| 1752 | // Destroy all blocks that we own |
| 1753 | if (this->tailBlock != nullptr) { |
| 1754 | auto block = this->tailBlock; |
| 1755 | do { |
| 1756 | auto nextBlock = block->next; |
| 1757 | if (block->dynamicallyAllocated) { |
| 1758 | destroy(block); |
| 1759 | } |
| 1760 | else { |
| 1761 | this->parent->add_block_to_free_list(block); |
| 1762 | } |
| 1763 | block = nextBlock; |
| 1764 | } while (block != this->tailBlock); |
| 1765 | } |
| 1766 | |
| 1767 | // Destroy the block indices |
| 1768 | auto = static_cast<BlockIndexHeader*>(pr_blockIndexRaw); |
| 1769 | while (header != nullptr) { |
| 1770 | auto prev = static_cast<BlockIndexHeader*>(header->prev); |
| 1771 | header->~BlockIndexHeader(); |
| 1772 | (Traits::free)(header); |
| 1773 | header = prev; |
| 1774 | } |
| 1775 | } |
| 1776 | |
| 1777 | template<AllocationMode allocMode, typename U> |
| 1778 | inline bool enqueue(U&& element) |
| 1779 | { |
| 1780 | index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); |
| 1781 | index_t newTailIndex = 1 + currentTailIndex; |
| 1782 | if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { |
| 1783 | // We reached the end of a block, start a new one |
| 1784 | auto startBlock = this->tailBlock; |
| 1785 | auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; |
| 1786 | if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) { |
| 1787 | // We can re-use the block ahead of us, it's empty! |
| 1788 | this->tailBlock = this->tailBlock->next; |
| 1789 | this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>(); |
| 1790 | |
| 1791 | // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the |
| 1792 | // last block from it first -- except instead of removing then adding, we can just overwrite). |
| 1793 | // Note that there must be a valid block index here, since even if allocation failed in the ctor, |
| 1794 | // it would have been re-attempted when adding the first block to the queue; since there is such |
| 1795 | // a block, a block index must have been successfully allocated. |
| 1796 | } |
| 1797 | else { |
| 1798 | // Whatever head value we see here is >= the last value we saw here (relatively), |
| 1799 | // and <= its current value. Since we have the most recent tail, the head must be |
| 1800 | // <= to it. |
| 1801 | auto head = this->headIndex.load(std::memory_order_relaxed); |
| 1802 | assert(!details::circular_less_than<index_t>(currentTailIndex, head)); |
| 1803 | if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) |
| 1804 | || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { |
| 1805 | // We can't enqueue in another block because there's not enough leeway -- the |
| 1806 | // tail could surpass the head by the time the block fills up! (Or we'll exceed |
| 1807 | // the size limit, if the second part of the condition was true.) |
| 1808 | return false; |
| 1809 | } |
| 1810 | // We're going to need a new block; check that the block index has room |
| 1811 | if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { |
| 1812 | // Hmm, the circular block index is already full -- we'll need |
| 1813 | // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if |
| 1814 | // the initial allocation failed in the constructor. |
| 1815 | |
| 1816 | MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { |
| 1817 | return false; |
| 1818 | } |
| 1819 | else if (!new_block_index(numberOfFilledSlotsToExpose: pr_blockIndexSlotsUsed)) { |
| 1820 | return false; |
| 1821 | } |
| 1822 | } |
| 1823 | |
| 1824 | // Insert a new block in the circular linked list |
| 1825 | auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); |
| 1826 | if (newBlock == nullptr) { |
| 1827 | return false; |
| 1828 | } |
| 1829 | #ifdef MCDBGQ_TRACKMEM |
| 1830 | newBlock->owner = this; |
| 1831 | #endif |
| 1832 | newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>(); |
| 1833 | if (this->tailBlock == nullptr) { |
| 1834 | newBlock->next = newBlock; |
| 1835 | } |
| 1836 | else { |
| 1837 | newBlock->next = this->tailBlock->next; |
| 1838 | this->tailBlock->next = newBlock; |
| 1839 | } |
| 1840 | this->tailBlock = newBlock; |
| 1841 | ++pr_blockIndexSlotsUsed; |
| 1842 | } |
| 1843 | |
| 1844 | if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) { |
| 1845 | // The constructor may throw. We want the element not to appear in the queue in |
| 1846 | // that case (without corrupting the queue): |
| 1847 | MOODYCAMEL_TRY { |
| 1848 | new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); |
| 1849 | } |
| 1850 | MOODYCAMEL_CATCH (...) { |
| 1851 | // Revert change to the current block, but leave the new block available |
| 1852 | // for next time |
| 1853 | pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; |
| 1854 | this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; |
| 1855 | MOODYCAMEL_RETHROW; |
| 1856 | } |
| 1857 | } |
| 1858 | else { |
| 1859 | (void)startBlock; |
| 1860 | (void)originalBlockIndexSlotsUsed; |
| 1861 | } |
| 1862 | |
| 1863 | // Add block to block index |
| 1864 | auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; |
| 1865 | entry.base = currentTailIndex; |
| 1866 | entry.block = this->tailBlock; |
| 1867 | blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); |
| 1868 | pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); |
| 1869 | |
| 1870 | if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) { |
| 1871 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 1872 | return true; |
| 1873 | } |
| 1874 | } |
| 1875 | |
| 1876 | // Enqueue |
| 1877 | new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); |
| 1878 | |
| 1879 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 1880 | return true; |
| 1881 | } |
| 1882 | |
| 1883 | template<typename U> |
| 1884 | bool dequeue(U& element) |
| 1885 | { |
| 1886 | auto tail = this->tailIndex.load(std::memory_order_relaxed); |
| 1887 | auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); |
| 1888 | if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { |
| 1889 | // Might be something to dequeue, let's give it a try |
| 1890 | |
| 1891 | // Note that this if is purely for performance purposes in the common case when the queue is |
| 1892 | // empty and the values are eventually consistent -- we may enter here spuriously. |
| 1893 | |
| 1894 | // Note that whatever the values of overcommit and tail are, they are not going to change (unless we |
| 1895 | // change them) and must be the same value at this point (inside the if) as when the if condition was |
| 1896 | // evaluated. |
| 1897 | |
| 1898 | // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. |
| 1899 | // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in |
| 1900 | // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). |
| 1901 | // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all |
| 1902 | // read-modify-write operations are guaranteed to work on the latest value in the modification order), but |
| 1903 | // unfortunately that can't be shown to be correct using only the C++11 standard. |
| 1904 | // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case |
| 1905 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 1906 | |
| 1907 | // Increment optimistic counter, then check if it went over the boundary |
| 1908 | auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); |
| 1909 | |
| 1910 | // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever |
| 1911 | // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now |
| 1912 | // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon |
| 1913 | // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. |
| 1914 | // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) |
| 1915 | // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. |
| 1916 | |
| 1917 | // Note that we reload tail here in case it changed; it will be the same value as before or greater, since |
| 1918 | // this load is sequenced after (happens after) the earlier load above. This is supported by read-read |
| 1919 | // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order |
| 1920 | tail = this->tailIndex.load(std::memory_order_acquire); |
| 1921 | if ((details::likely)(x: details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { |
| 1922 | // Guaranteed to be at least one element to dequeue! |
| 1923 | |
| 1924 | // Get the index. Note that since there's guaranteed to be at least one element, this |
| 1925 | // will never exceed tail. We need to do an acquire-release fence here since it's possible |
| 1926 | // that whatever condition got us to this point was for an earlier enqueued element (that |
| 1927 | // we already see the memory effects for), but that by the time we increment somebody else |
| 1928 | // has incremented it, and we need to see the memory effects for *that* element, which is |
| 1929 | // in such a case is necessarily visible on the thread that incremented it in the first |
| 1930 | // place with the more current condition (they must have acquired a tail that is at least |
| 1931 | // as recent). |
| 1932 | auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); |
| 1933 | |
| 1934 | |
| 1935 | // Determine which block the element is in |
| 1936 | |
| 1937 | auto localBlockIndex = blockIndex.load(std::memory_order_acquire); |
| 1938 | auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); |
| 1939 | |
| 1940 | // We need to be careful here about subtracting and dividing because of index wrap-around. |
| 1941 | // When an index wraps, we need to preserve the sign of the offset when dividing it by the |
| 1942 | // block size (in order to get a correct signed block count offset in all cases): |
| 1943 | auto headBase = localBlockIndex->entries[localBlockIndexHead].base; |
| 1944 | auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 1945 | auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE); |
| 1946 | auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; |
| 1947 | |
| 1948 | // Dequeue |
| 1949 | auto& el = *((*block)[index]); |
| 1950 | if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { |
| 1951 | // Make sure the element is still fully dequeued and destroyed even if the assignment |
| 1952 | // throws |
| 1953 | struct Guard { |
| 1954 | Block* block; |
| 1955 | index_t index; |
| 1956 | |
| 1957 | ~Guard() |
| 1958 | { |
| 1959 | (*block)[index]->~T(); |
| 1960 | block->ConcurrentQueue::Block::template set_empty<explicit_context>(index); |
| 1961 | } |
| 1962 | } guard = { block, index }; |
| 1963 | |
| 1964 | element = std::move(el); // NOLINT |
| 1965 | } |
| 1966 | else { |
| 1967 | element = std::move(el); // NOLINT |
| 1968 | el.~T(); // NOLINT |
| 1969 | block->ConcurrentQueue::Block::template set_empty<explicit_context>(index); |
| 1970 | } |
| 1971 | |
| 1972 | return true; |
| 1973 | } |
| 1974 | else { |
| 1975 | // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent |
| 1976 | this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write |
| 1977 | } |
| 1978 | } |
| 1979 | |
| 1980 | return false; |
| 1981 | } |
| 1982 | |
| 1983 | template<AllocationMode allocMode, typename It> |
| 1984 | bool enqueue_bulk(It itemFirst, size_t count) |
| 1985 | { |
| 1986 | // First, we need to make sure we have enough room to enqueue all of the elements; |
| 1987 | // this means pre-allocating blocks and putting them in the block index (but only if |
| 1988 | // all the allocations succeeded). |
| 1989 | index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); |
| 1990 | auto startBlock = this->tailBlock; |
| 1991 | auto originalBlockIndexFront = pr_blockIndexFront; |
| 1992 | auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; |
| 1993 | |
| 1994 | Block* firstAllocatedBlock = nullptr; |
| 1995 | |
| 1996 | // Figure out how many blocks we'll need to allocate, and do so |
| 1997 | size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); |
| 1998 | index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 1999 | if (blockBaseDiff > 0) { |
| 2000 | // Allocate as many blocks as possible from ahead |
| 2001 | while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) { |
| 2002 | blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); |
| 2003 | currentTailIndex += static_cast<index_t>(BLOCK_SIZE); |
| 2004 | |
| 2005 | this->tailBlock = this->tailBlock->next; |
| 2006 | firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; |
| 2007 | |
| 2008 | auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; |
| 2009 | entry.base = currentTailIndex; |
| 2010 | entry.block = this->tailBlock; |
| 2011 | pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); |
| 2012 | } |
| 2013 | |
| 2014 | // Now allocate as many blocks as necessary from the block pool |
| 2015 | while (blockBaseDiff > 0) { |
| 2016 | blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); |
| 2017 | currentTailIndex += static_cast<index_t>(BLOCK_SIZE); |
| 2018 | |
| 2019 | auto head = this->headIndex.load(std::memory_order_relaxed); |
| 2020 | assert(!details::circular_less_than<index_t>(currentTailIndex, head)); |
| 2021 | bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); |
| 2022 | if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { |
| 2023 | MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { |
| 2024 | // Failed to allocate, undo changes (but keep injected blocks) |
| 2025 | pr_blockIndexFront = originalBlockIndexFront; |
| 2026 | pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; |
| 2027 | this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; |
| 2028 | return false; |
| 2029 | } |
| 2030 | else if (full || !new_block_index(numberOfFilledSlotsToExpose: originalBlockIndexSlotsUsed)) { |
| 2031 | // Failed to allocate, undo changes (but keep injected blocks) |
| 2032 | pr_blockIndexFront = originalBlockIndexFront; |
| 2033 | pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; |
| 2034 | this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; |
| 2035 | return false; |
| 2036 | } |
| 2037 | |
| 2038 | // pr_blockIndexFront is updated inside new_block_index, so we need to |
| 2039 | // update our fallback value too (since we keep the new index even if we |
| 2040 | // later fail) |
| 2041 | originalBlockIndexFront = originalBlockIndexSlotsUsed; |
| 2042 | } |
| 2043 | |
| 2044 | // Insert a new block in the circular linked list |
| 2045 | auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); |
| 2046 | if (newBlock == nullptr) { |
| 2047 | pr_blockIndexFront = originalBlockIndexFront; |
| 2048 | pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; |
| 2049 | this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; |
| 2050 | return false; |
| 2051 | } |
| 2052 | |
| 2053 | #ifdef MCDBGQ_TRACKMEM |
| 2054 | newBlock->owner = this; |
| 2055 | #endif |
| 2056 | newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>(); |
| 2057 | if (this->tailBlock == nullptr) { |
| 2058 | newBlock->next = newBlock; |
| 2059 | } |
| 2060 | else { |
| 2061 | newBlock->next = this->tailBlock->next; |
| 2062 | this->tailBlock->next = newBlock; |
| 2063 | } |
| 2064 | this->tailBlock = newBlock; |
| 2065 | firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; |
| 2066 | |
| 2067 | ++pr_blockIndexSlotsUsed; |
| 2068 | |
| 2069 | auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; |
| 2070 | entry.base = currentTailIndex; |
| 2071 | entry.block = this->tailBlock; |
| 2072 | pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); |
| 2073 | } |
| 2074 | |
| 2075 | // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and |
| 2076 | // publish the new block index front |
| 2077 | auto block = firstAllocatedBlock; |
| 2078 | while (true) { |
| 2079 | block->ConcurrentQueue::Block::template reset_empty<explicit_context>(); |
| 2080 | if (block == this->tailBlock) { |
| 2081 | break; |
| 2082 | } |
| 2083 | block = block->next; |
| 2084 | } |
| 2085 | |
| 2086 | if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { |
| 2087 | blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); |
| 2088 | } |
| 2089 | } |
| 2090 | |
| 2091 | // Enqueue, one block at a time |
| 2092 | index_t newTailIndex = startTailIndex + static_cast<index_t>(count); |
| 2093 | currentTailIndex = startTailIndex; |
| 2094 | auto endBlock = this->tailBlock; |
| 2095 | this->tailBlock = startBlock; |
| 2096 | assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); |
| 2097 | if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { |
| 2098 | this->tailBlock = firstAllocatedBlock; |
| 2099 | } |
| 2100 | while (true) { |
| 2101 | auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2102 | if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { |
| 2103 | stopIndex = newTailIndex; |
| 2104 | } |
| 2105 | if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { |
| 2106 | while (currentTailIndex != stopIndex) { |
| 2107 | new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); |
| 2108 | } |
| 2109 | } |
| 2110 | else { |
| 2111 | MOODYCAMEL_TRY { |
| 2112 | while (currentTailIndex != stopIndex) { |
| 2113 | // Must use copy constructor even if move constructor is available |
| 2114 | // because we may have to revert if there's an exception. |
| 2115 | // Sorry about the horrible templated next line, but it was the only way |
| 2116 | // to disable moving *at compile time*, which is important because a type |
| 2117 | // may only define a (noexcept) move constructor, and so calls to the |
| 2118 | // cctor will not compile, even if they are in an if branch that will never |
| 2119 | // be executed |
| 2120 | new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); |
| 2121 | ++currentTailIndex; |
| 2122 | ++itemFirst; |
| 2123 | } |
| 2124 | } |
| 2125 | MOODYCAMEL_CATCH (...) { |
| 2126 | // Oh dear, an exception's been thrown -- destroy the elements that |
| 2127 | // were enqueued so far and revert the entire bulk operation (we'll keep |
| 2128 | // any allocated blocks in our linked list for later, though). |
| 2129 | auto constructedStopIndex = currentTailIndex; |
| 2130 | auto lastBlockEnqueued = this->tailBlock; |
| 2131 | |
| 2132 | pr_blockIndexFront = originalBlockIndexFront; |
| 2133 | pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; |
| 2134 | this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; |
| 2135 | |
| 2136 | if (!details::is_trivially_destructible<T>::value) { |
| 2137 | auto block = startBlock; |
| 2138 | if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { |
| 2139 | block = firstAllocatedBlock; |
| 2140 | } |
| 2141 | currentTailIndex = startTailIndex; |
| 2142 | while (true) { |
| 2143 | stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2144 | if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) { |
| 2145 | stopIndex = constructedStopIndex; |
| 2146 | } |
| 2147 | while (currentTailIndex != stopIndex) { |
| 2148 | (*block)[currentTailIndex++]->~T(); |
| 2149 | } |
| 2150 | if (block == lastBlockEnqueued) { |
| 2151 | break; |
| 2152 | } |
| 2153 | block = block->next; |
| 2154 | } |
| 2155 | } |
| 2156 | MOODYCAMEL_RETHROW; |
| 2157 | } |
| 2158 | } |
| 2159 | |
| 2160 | if (this->tailBlock == endBlock) { |
| 2161 | assert(currentTailIndex == newTailIndex); |
| 2162 | break; |
| 2163 | } |
| 2164 | this->tailBlock = this->tailBlock->next; |
| 2165 | } |
| 2166 | |
| 2167 | if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { |
| 2168 | blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); |
| 2169 | } |
| 2170 | |
| 2171 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 2172 | return true; |
| 2173 | } |
| 2174 | |
| 2175 | template<typename It> |
| 2176 | size_t dequeue_bulk(It& itemFirst, size_t max) |
| 2177 | { |
| 2178 | auto tail = this->tailIndex.load(std::memory_order_relaxed); |
| 2179 | auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); |
| 2180 | auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); |
| 2181 | if (details::circular_less_than<size_t>(0, desiredCount)) { |
| 2182 | desiredCount = desiredCount < max ? desiredCount : max; |
| 2183 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 2184 | |
| 2185 | auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);; |
| 2186 | |
| 2187 | tail = this->tailIndex.load(std::memory_order_acquire); |
| 2188 | auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit)); |
| 2189 | if (details::circular_less_than<size_t>(0, actualCount)) { |
| 2190 | actualCount = desiredCount < actualCount ? desiredCount : actualCount; |
| 2191 | if (actualCount < desiredCount) { |
| 2192 | this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); |
| 2193 | } |
| 2194 | |
| 2195 | // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this |
| 2196 | // will never exceed tail. |
| 2197 | auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); |
| 2198 | |
| 2199 | // Determine which block the first element is in |
| 2200 | auto localBlockIndex = blockIndex.load(std::memory_order_acquire); |
| 2201 | auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); |
| 2202 | |
| 2203 | auto headBase = localBlockIndex->entries[localBlockIndexHead].base; |
| 2204 | auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 2205 | auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); |
| 2206 | auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); |
| 2207 | |
| 2208 | // Iterate the blocks and dequeue |
| 2209 | auto index = firstIndex; |
| 2210 | do { |
| 2211 | auto firstIndexInBlock = index; |
| 2212 | auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2213 | endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; |
| 2214 | auto block = localBlockIndex->entries[indexIndex].block; |
| 2215 | if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { |
| 2216 | while (index != endIndex) { |
| 2217 | auto& el = *((*block)[index]); |
| 2218 | *itemFirst++ = std::move(el); |
| 2219 | el.~T(); |
| 2220 | ++index; |
| 2221 | } |
| 2222 | } |
| 2223 | else { |
| 2224 | MOODYCAMEL_TRY { |
| 2225 | while (index != endIndex) { |
| 2226 | auto& el = *((*block)[index]); |
| 2227 | *itemFirst = std::move(el); |
| 2228 | ++itemFirst; |
| 2229 | el.~T(); |
| 2230 | ++index; |
| 2231 | } |
| 2232 | } |
| 2233 | MOODYCAMEL_CATCH (...) { |
| 2234 | // It's too late to revert the dequeue, but we can make sure that all |
| 2235 | // the dequeued objects are properly destroyed and the block index |
| 2236 | // (and empty count) are properly updated before we propagate the exception |
| 2237 | do { |
| 2238 | block = localBlockIndex->entries[indexIndex].block; |
| 2239 | while (index != endIndex) { |
| 2240 | (*block)[index++]->~T(); |
| 2241 | } |
| 2242 | block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock)); |
| 2243 | indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); |
| 2244 | |
| 2245 | firstIndexInBlock = index; |
| 2246 | endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2247 | endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; |
| 2248 | } while (index != firstIndex + actualCount); |
| 2249 | |
| 2250 | MOODYCAMEL_RETHROW; |
| 2251 | } |
| 2252 | } |
| 2253 | block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock)); |
| 2254 | indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); |
| 2255 | } while (index != firstIndex + actualCount); |
| 2256 | |
| 2257 | return actualCount; |
| 2258 | } |
| 2259 | else { |
| 2260 | // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent |
| 2261 | this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); |
| 2262 | } |
| 2263 | } |
| 2264 | |
| 2265 | return 0; |
| 2266 | } |
| 2267 | |
| 2268 | private: |
| 2269 | struct BlockIndexEntry |
| 2270 | { |
| 2271 | index_t base; |
| 2272 | Block* block; |
| 2273 | }; |
| 2274 | |
| 2275 | struct |
| 2276 | { |
| 2277 | size_t ; |
| 2278 | std::atomic<size_t> ; // Current slot (not next, like pr_blockIndexFront) |
| 2279 | BlockIndexEntry* ; |
| 2280 | void* ; |
| 2281 | }; |
| 2282 | |
| 2283 | |
| 2284 | bool new_block_index(size_t numberOfFilledSlotsToExpose) |
| 2285 | { |
| 2286 | auto prevBlockSizeMask = pr_blockIndexSize - 1; |
| 2287 | |
| 2288 | // Create the new block |
| 2289 | pr_blockIndexSize <<= 1; |
| 2290 | auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); |
| 2291 | if (newRawPtr == nullptr) { |
| 2292 | pr_blockIndexSize >>= 1; // Reset to allow graceful retry |
| 2293 | return false; |
| 2294 | } |
| 2295 | |
| 2296 | auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader))); |
| 2297 | |
| 2298 | // Copy in all the old indices, if any |
| 2299 | size_t j = 0; |
| 2300 | if (pr_blockIndexSlotsUsed != 0) { |
| 2301 | auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; |
| 2302 | do { |
| 2303 | newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; |
| 2304 | i = (i + 1) & prevBlockSizeMask; |
| 2305 | } while (i != pr_blockIndexFront); |
| 2306 | } |
| 2307 | |
| 2308 | // Update everything |
| 2309 | auto = new (newRawPtr) BlockIndexHeader; |
| 2310 | header->size = pr_blockIndexSize; |
| 2311 | header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); |
| 2312 | header->entries = newBlockIndexEntries; |
| 2313 | header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later |
| 2314 | |
| 2315 | pr_blockIndexFront = j; |
| 2316 | pr_blockIndexEntries = newBlockIndexEntries; |
| 2317 | pr_blockIndexRaw = newRawPtr; |
| 2318 | blockIndex.store(header, std::memory_order_release); |
| 2319 | |
| 2320 | return true; |
| 2321 | } |
| 2322 | |
| 2323 | private: |
| 2324 | std::atomic<BlockIndexHeader*> blockIndex; |
| 2325 | |
| 2326 | // To be used by producer only -- consumer must use the ones in referenced by blockIndex |
| 2327 | size_t pr_blockIndexSlotsUsed; |
| 2328 | size_t pr_blockIndexSize; |
| 2329 | size_t pr_blockIndexFront; // Next slot (not current) |
| 2330 | BlockIndexEntry* pr_blockIndexEntries; |
| 2331 | void* pr_blockIndexRaw; |
| 2332 | |
| 2333 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 2334 | public: |
| 2335 | ExplicitProducer* nextExplicitProducer; |
| 2336 | private: |
| 2337 | #endif |
| 2338 | |
| 2339 | #ifdef MCDBGQ_TRACKMEM |
| 2340 | friend struct MemStats; |
| 2341 | #endif |
| 2342 | }; |
| 2343 | |
| 2344 | |
| 2345 | ////////////////////////////////// |
| 2346 | // Implicit queue |
| 2347 | ////////////////////////////////// |
| 2348 | |
| 2349 | struct ImplicitProducer : public ProducerBase |
| 2350 | { |
| 2351 | ImplicitProducer(ConcurrentQueue* parent_) : |
| 2352 | ProducerBase(parent_, false), |
| 2353 | nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), |
| 2354 | blockIndex(nullptr) |
| 2355 | { |
| 2356 | new_block_index(); |
| 2357 | } |
| 2358 | |
| 2359 | ~ImplicitProducer() |
| 2360 | { |
| 2361 | // Note that since we're in the destructor we can assume that all enqueue/dequeue operations |
| 2362 | // completed already; this means that all undequeued elements are placed contiguously across |
| 2363 | // contiguous blocks, and that only the first and last remaining blocks can be only partially |
| 2364 | // empty (all other remaining blocks must be completely full). |
| 2365 | |
| 2366 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 2367 | // Unregister ourselves for thread termination notification |
| 2368 | if (!this->inactive.load(std::memory_order_relaxed)) { |
| 2369 | details::ThreadExitNotifier::unsubscribe(&threadExitListener); |
| 2370 | } |
| 2371 | #endif |
| 2372 | |
| 2373 | // Destroy all remaining elements! |
| 2374 | auto tail = this->tailIndex.load(std::memory_order_relaxed); |
| 2375 | auto index = this->headIndex.load(std::memory_order_relaxed); |
| 2376 | Block* block = nullptr; |
| 2377 | assert(index == tail || details::circular_less_than(index, tail)); |
| 2378 | bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed |
| 2379 | while (index != tail) { |
| 2380 | if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) { |
| 2381 | if (block != nullptr) { |
| 2382 | // Free the old block |
| 2383 | this->parent->add_block_to_free_list(block); |
| 2384 | } |
| 2385 | |
| 2386 | block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); |
| 2387 | } |
| 2388 | |
| 2389 | ((*block)[index])->~T(); |
| 2390 | ++index; |
| 2391 | } |
| 2392 | // Even if the queue is empty, there's still one block that's not on the free list |
| 2393 | // (unless the head index reached the end of it, in which case the tail will be poised |
| 2394 | // to create a new block). |
| 2395 | if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) { |
| 2396 | this->parent->add_block_to_free_list(this->tailBlock); |
| 2397 | } |
| 2398 | |
| 2399 | // Destroy block index |
| 2400 | auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); |
| 2401 | if (localBlockIndex != nullptr) { |
| 2402 | for (size_t i = 0; i != localBlockIndex->capacity; ++i) { |
| 2403 | localBlockIndex->index[i]->~BlockIndexEntry(); |
| 2404 | } |
| 2405 | do { |
| 2406 | auto prev = localBlockIndex->prev; |
| 2407 | localBlockIndex->~BlockIndexHeader(); |
| 2408 | (Traits::free)(localBlockIndex); |
| 2409 | localBlockIndex = prev; |
| 2410 | } while (localBlockIndex != nullptr); |
| 2411 | } |
| 2412 | } |
| 2413 | |
| 2414 | template<AllocationMode allocMode, typename U> |
| 2415 | inline bool enqueue(U&& element) |
| 2416 | { |
| 2417 | index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); |
| 2418 | index_t newTailIndex = 1 + currentTailIndex; |
| 2419 | if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { |
| 2420 | // We reached the end of a block, start a new one |
| 2421 | auto head = this->headIndex.load(std::memory_order_relaxed); |
| 2422 | assert(!details::circular_less_than<index_t>(currentTailIndex, head)); |
| 2423 | if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { |
| 2424 | return false; |
| 2425 | } |
| 2426 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2427 | debug::DebugLock lock(mutex); |
| 2428 | #endif |
| 2429 | // Find out where we'll be inserting this block in the block index |
| 2430 | BlockIndexEntry* idxEntry; |
| 2431 | if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) { |
| 2432 | return false; |
| 2433 | } |
| 2434 | |
| 2435 | // Get ahold of a new block |
| 2436 | auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); |
| 2437 | if (newBlock == nullptr) { |
| 2438 | rewind_block_index_tail(); |
| 2439 | idxEntry->value.store(nullptr, std::memory_order_relaxed); |
| 2440 | return false; |
| 2441 | } |
| 2442 | #ifdef MCDBGQ_TRACKMEM |
| 2443 | newBlock->owner = this; |
| 2444 | #endif |
| 2445 | newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>(); |
| 2446 | |
| 2447 | if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) { |
| 2448 | // May throw, try to insert now before we publish the fact that we have this new block |
| 2449 | MOODYCAMEL_TRY { |
| 2450 | new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element)); |
| 2451 | } |
| 2452 | MOODYCAMEL_CATCH (...) { |
| 2453 | rewind_block_index_tail(); |
| 2454 | idxEntry->value.store(nullptr, std::memory_order_relaxed); |
| 2455 | this->parent->add_block_to_free_list(newBlock); |
| 2456 | MOODYCAMEL_RETHROW; |
| 2457 | } |
| 2458 | } |
| 2459 | |
| 2460 | // Insert the new block into the index |
| 2461 | idxEntry->value.store(newBlock, std::memory_order_relaxed); |
| 2462 | |
| 2463 | this->tailBlock = newBlock; |
| 2464 | |
| 2465 | if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) { |
| 2466 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 2467 | return true; |
| 2468 | } |
| 2469 | } |
| 2470 | |
| 2471 | // Enqueue |
| 2472 | new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); |
| 2473 | |
| 2474 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 2475 | return true; |
| 2476 | } |
| 2477 | |
| 2478 | template<typename U> |
| 2479 | bool dequeue(U& element) |
| 2480 | { |
| 2481 | // See ExplicitProducer::dequeue for rationale and explanation |
| 2482 | index_t tail = this->tailIndex.load(std::memory_order_relaxed); |
| 2483 | index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); |
| 2484 | if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { |
| 2485 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 2486 | |
| 2487 | index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); |
| 2488 | tail = this->tailIndex.load(std::memory_order_acquire); |
| 2489 | if ((details::likely)(x: details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { |
| 2490 | index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); |
| 2491 | |
| 2492 | // Determine which block the element is in |
| 2493 | auto entry = get_block_index_entry_for_index(index); |
| 2494 | |
| 2495 | // Dequeue |
| 2496 | auto block = entry->value.load(std::memory_order_relaxed); |
| 2497 | auto& el = *((*block)[index]); |
| 2498 | |
| 2499 | if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { |
| 2500 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2501 | // Note: Acquiring the mutex with every dequeue instead of only when a block |
| 2502 | // is released is very sub-optimal, but it is, after all, purely debug code. |
| 2503 | debug::DebugLock lock(producer->mutex); |
| 2504 | #endif |
| 2505 | struct Guard { |
| 2506 | Block* block; |
| 2507 | index_t index; |
| 2508 | BlockIndexEntry* entry; |
| 2509 | ConcurrentQueue* parent; |
| 2510 | |
| 2511 | ~Guard() |
| 2512 | { |
| 2513 | (*block)[index]->~T(); |
| 2514 | if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) { |
| 2515 | entry->value.store(nullptr, std::memory_order_relaxed); |
| 2516 | parent->add_block_to_free_list(block); |
| 2517 | } |
| 2518 | } |
| 2519 | } guard = { block, index, entry, this->parent }; |
| 2520 | |
| 2521 | element = std::move(el); // NOLINT |
| 2522 | } |
| 2523 | else { |
| 2524 | element = std::move(el); // NOLINT |
| 2525 | el.~T(); // NOLINT |
| 2526 | |
| 2527 | if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) { |
| 2528 | { |
| 2529 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2530 | debug::DebugLock lock(mutex); |
| 2531 | #endif |
| 2532 | // Add the block back into the global free pool (and remove from block index) |
| 2533 | entry->value.store(nullptr, std::memory_order_relaxed); |
| 2534 | } |
| 2535 | this->parent->add_block_to_free_list(block); // releases the above store |
| 2536 | } |
| 2537 | } |
| 2538 | |
| 2539 | return true; |
| 2540 | } |
| 2541 | else { |
| 2542 | this->dequeueOvercommit.fetch_add(1, std::memory_order_release); |
| 2543 | } |
| 2544 | } |
| 2545 | |
| 2546 | return false; |
| 2547 | } |
| 2548 | |
| 2549 | template<AllocationMode allocMode, typename It> |
| 2550 | bool enqueue_bulk(It itemFirst, size_t count) |
| 2551 | { |
| 2552 | // First, we need to make sure we have enough room to enqueue all of the elements; |
| 2553 | // this means pre-allocating blocks and putting them in the block index (but only if |
| 2554 | // all the allocations succeeded). |
| 2555 | |
| 2556 | // Note that the tailBlock we start off with may not be owned by us any more; |
| 2557 | // this happens if it was filled up exactly to the top (setting tailIndex to |
| 2558 | // the first index of the next block which is not yet allocated), then dequeued |
| 2559 | // completely (putting it on the free list) before we enqueue again. |
| 2560 | |
| 2561 | index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); |
| 2562 | auto startBlock = this->tailBlock; |
| 2563 | Block* firstAllocatedBlock = nullptr; |
| 2564 | auto endBlock = this->tailBlock; |
| 2565 | |
| 2566 | // Figure out how many blocks we'll need to allocate, and do so |
| 2567 | size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); |
| 2568 | index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 2569 | if (blockBaseDiff > 0) { |
| 2570 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2571 | debug::DebugLock lock(mutex); |
| 2572 | #endif |
| 2573 | do { |
| 2574 | blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); |
| 2575 | currentTailIndex += static_cast<index_t>(BLOCK_SIZE); |
| 2576 | |
| 2577 | // Find out where we'll be inserting this block in the block index |
| 2578 | BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell |
| 2579 | Block* newBlock; |
| 2580 | bool indexInserted = false; |
| 2581 | auto head = this->headIndex.load(std::memory_order_relaxed); |
| 2582 | assert(!details::circular_less_than<index_t>(currentTailIndex, head)); |
| 2583 | bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); |
| 2584 | if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) { |
| 2585 | // Index allocation or block allocation failed; revert any other allocations |
| 2586 | // and index insertions done so far for this operation |
| 2587 | if (indexInserted) { |
| 2588 | rewind_block_index_tail(); |
| 2589 | idxEntry->value.store(nullptr, std::memory_order_relaxed); |
| 2590 | } |
| 2591 | currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 2592 | for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { |
| 2593 | currentTailIndex += static_cast<index_t>(BLOCK_SIZE); |
| 2594 | idxEntry = get_block_index_entry_for_index(index: currentTailIndex); |
| 2595 | idxEntry->value.store(nullptr, std::memory_order_relaxed); |
| 2596 | rewind_block_index_tail(); |
| 2597 | } |
| 2598 | this->parent->add_blocks_to_free_list(firstAllocatedBlock); |
| 2599 | this->tailBlock = startBlock; |
| 2600 | |
| 2601 | return false; |
| 2602 | } |
| 2603 | |
| 2604 | #ifdef MCDBGQ_TRACKMEM |
| 2605 | newBlock->owner = this; |
| 2606 | #endif |
| 2607 | newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>(); |
| 2608 | newBlock->next = nullptr; |
| 2609 | |
| 2610 | // Insert the new block into the index |
| 2611 | idxEntry->value.store(newBlock, std::memory_order_relaxed); |
| 2612 | |
| 2613 | // Store the chain of blocks so that we can undo if later allocations fail, |
| 2614 | // and so that we can find the blocks when we do the actual enqueueing |
| 2615 | if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { |
| 2616 | assert(this->tailBlock != nullptr); |
| 2617 | this->tailBlock->next = newBlock; |
| 2618 | } |
| 2619 | this->tailBlock = newBlock; |
| 2620 | endBlock = newBlock; |
| 2621 | firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; |
| 2622 | } while (blockBaseDiff > 0); |
| 2623 | } |
| 2624 | |
| 2625 | // Enqueue, one block at a time |
| 2626 | index_t newTailIndex = startTailIndex + static_cast<index_t>(count); |
| 2627 | currentTailIndex = startTailIndex; |
| 2628 | this->tailBlock = startBlock; |
| 2629 | assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); |
| 2630 | if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { |
| 2631 | this->tailBlock = firstAllocatedBlock; |
| 2632 | } |
| 2633 | while (true) { |
| 2634 | auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2635 | if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { |
| 2636 | stopIndex = newTailIndex; |
| 2637 | } |
| 2638 | if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { |
| 2639 | while (currentTailIndex != stopIndex) { |
| 2640 | new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); |
| 2641 | } |
| 2642 | } |
| 2643 | else { |
| 2644 | MOODYCAMEL_TRY { |
| 2645 | while (currentTailIndex != stopIndex) { |
| 2646 | new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); |
| 2647 | ++currentTailIndex; |
| 2648 | ++itemFirst; |
| 2649 | } |
| 2650 | } |
| 2651 | MOODYCAMEL_CATCH (...) { |
| 2652 | auto constructedStopIndex = currentTailIndex; |
| 2653 | auto lastBlockEnqueued = this->tailBlock; |
| 2654 | |
| 2655 | if (!details::is_trivially_destructible<T>::value) { |
| 2656 | auto block = startBlock; |
| 2657 | if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { |
| 2658 | block = firstAllocatedBlock; |
| 2659 | } |
| 2660 | currentTailIndex = startTailIndex; |
| 2661 | while (true) { |
| 2662 | stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2663 | if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) { |
| 2664 | stopIndex = constructedStopIndex; |
| 2665 | } |
| 2666 | while (currentTailIndex != stopIndex) { |
| 2667 | (*block)[currentTailIndex++]->~T(); |
| 2668 | } |
| 2669 | if (block == lastBlockEnqueued) { |
| 2670 | break; |
| 2671 | } |
| 2672 | block = block->next; |
| 2673 | } |
| 2674 | } |
| 2675 | |
| 2676 | currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 2677 | for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { |
| 2678 | currentTailIndex += static_cast<index_t>(BLOCK_SIZE); |
| 2679 | auto idxEntry = get_block_index_entry_for_index(index: currentTailIndex); |
| 2680 | idxEntry->value.store(nullptr, std::memory_order_relaxed); |
| 2681 | rewind_block_index_tail(); |
| 2682 | } |
| 2683 | this->parent->add_blocks_to_free_list(firstAllocatedBlock); |
| 2684 | this->tailBlock = startBlock; |
| 2685 | MOODYCAMEL_RETHROW; |
| 2686 | } |
| 2687 | } |
| 2688 | |
| 2689 | if (this->tailBlock == endBlock) { |
| 2690 | assert(currentTailIndex == newTailIndex); |
| 2691 | break; |
| 2692 | } |
| 2693 | this->tailBlock = this->tailBlock->next; |
| 2694 | } |
| 2695 | this->tailIndex.store(newTailIndex, std::memory_order_release); |
| 2696 | return true; |
| 2697 | } |
| 2698 | |
| 2699 | template<typename It> |
| 2700 | size_t dequeue_bulk(It& itemFirst, size_t max) |
| 2701 | { |
| 2702 | auto tail = this->tailIndex.load(std::memory_order_relaxed); |
| 2703 | auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); |
| 2704 | auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); |
| 2705 | if (details::circular_less_than<size_t>(0, desiredCount)) { |
| 2706 | desiredCount = desiredCount < max ? desiredCount : max; |
| 2707 | std::atomic_thread_fence(m: std::memory_order_acquire); |
| 2708 | |
| 2709 | auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); |
| 2710 | |
| 2711 | tail = this->tailIndex.load(std::memory_order_acquire); |
| 2712 | auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit)); |
| 2713 | if (details::circular_less_than<size_t>(0, actualCount)) { |
| 2714 | actualCount = desiredCount < actualCount ? desiredCount : actualCount; |
| 2715 | if (actualCount < desiredCount) { |
| 2716 | this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); |
| 2717 | } |
| 2718 | |
| 2719 | // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this |
| 2720 | // will never exceed tail. |
| 2721 | auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); |
| 2722 | |
| 2723 | // Iterate the blocks and dequeue |
| 2724 | auto index = firstIndex; |
| 2725 | BlockIndexHeader* localBlockIndex; |
| 2726 | auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); |
| 2727 | do { |
| 2728 | auto blockStartIndex = index; |
| 2729 | auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2730 | endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; |
| 2731 | |
| 2732 | auto entry = localBlockIndex->index[indexIndex]; |
| 2733 | auto block = entry->value.load(std::memory_order_relaxed); |
| 2734 | if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { |
| 2735 | while (index != endIndex) { |
| 2736 | auto& el = *((*block)[index]); |
| 2737 | *itemFirst++ = std::move(el); |
| 2738 | el.~T(); |
| 2739 | ++index; |
| 2740 | } |
| 2741 | } |
| 2742 | else { |
| 2743 | MOODYCAMEL_TRY { |
| 2744 | while (index != endIndex) { |
| 2745 | auto& el = *((*block)[index]); |
| 2746 | *itemFirst = std::move(el); |
| 2747 | ++itemFirst; |
| 2748 | el.~T(); |
| 2749 | ++index; |
| 2750 | } |
| 2751 | } |
| 2752 | MOODYCAMEL_CATCH (...) { |
| 2753 | do { |
| 2754 | entry = localBlockIndex->index[indexIndex]; |
| 2755 | block = entry->value.load(std::memory_order_relaxed); |
| 2756 | while (index != endIndex) { |
| 2757 | (*block)[index++]->~T(); |
| 2758 | } |
| 2759 | |
| 2760 | if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) { |
| 2761 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2762 | debug::DebugLock lock(mutex); |
| 2763 | #endif |
| 2764 | entry->value.store(nullptr, std::memory_order_relaxed); |
| 2765 | this->parent->add_block_to_free_list(block); |
| 2766 | } |
| 2767 | indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); |
| 2768 | |
| 2769 | blockStartIndex = index; |
| 2770 | endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); |
| 2771 | endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; |
| 2772 | } while (index != firstIndex + actualCount); |
| 2773 | |
| 2774 | MOODYCAMEL_RETHROW; |
| 2775 | } |
| 2776 | } |
| 2777 | if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) { |
| 2778 | { |
| 2779 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2780 | debug::DebugLock lock(mutex); |
| 2781 | #endif |
| 2782 | // Note that the set_many_empty above did a release, meaning that anybody who acquires the block |
| 2783 | // we're about to free can use it safely since our writes (and reads!) will have happened-before then. |
| 2784 | entry->value.store(nullptr, std::memory_order_relaxed); |
| 2785 | } |
| 2786 | this->parent->add_block_to_free_list(block); // releases the above store |
| 2787 | } |
| 2788 | indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); |
| 2789 | } while (index != firstIndex + actualCount); |
| 2790 | |
| 2791 | return actualCount; |
| 2792 | } |
| 2793 | else { |
| 2794 | this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); |
| 2795 | } |
| 2796 | } |
| 2797 | |
| 2798 | return 0; |
| 2799 | } |
| 2800 | |
| 2801 | private: |
| 2802 | // The block size must be > 1, so any number with the low bit set is an invalid block base index |
| 2803 | static const index_t INVALID_BLOCK_BASE = 1; |
| 2804 | |
| 2805 | struct BlockIndexEntry |
| 2806 | { |
| 2807 | std::atomic<index_t> key; |
| 2808 | std::atomic<Block*> value; |
| 2809 | }; |
| 2810 | |
| 2811 | struct |
| 2812 | { |
| 2813 | size_t ; |
| 2814 | std::atomic<size_t> ; |
| 2815 | BlockIndexEntry* ; |
| 2816 | BlockIndexEntry** ; |
| 2817 | BlockIndexHeader* ; |
| 2818 | }; |
| 2819 | |
| 2820 | template<AllocationMode allocMode> |
| 2821 | inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) |
| 2822 | { |
| 2823 | auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK |
| 2824 | if (localBlockIndex == nullptr) { |
| 2825 | return false; // this can happen if new_block_index failed in the constructor |
| 2826 | } |
| 2827 | auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); |
| 2828 | idxEntry = localBlockIndex->index[newTail]; |
| 2829 | if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || |
| 2830 | idxEntry->value.load(std::memory_order_relaxed) == nullptr) { |
| 2831 | |
| 2832 | idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); |
| 2833 | localBlockIndex->tail.store(newTail, std::memory_order_release); |
| 2834 | return true; |
| 2835 | } |
| 2836 | |
| 2837 | // No room in the old block index, try to allocate another one! |
| 2838 | MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { |
| 2839 | return false; |
| 2840 | } |
| 2841 | else if (!new_block_index()) { |
| 2842 | return false; |
| 2843 | } |
| 2844 | localBlockIndex = blockIndex.load(std::memory_order_relaxed); |
| 2845 | newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); |
| 2846 | idxEntry = localBlockIndex->index[newTail]; |
| 2847 | assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); |
| 2848 | idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); |
| 2849 | localBlockIndex->tail.store(newTail, std::memory_order_release); |
| 2850 | return true; |
| 2851 | } |
| 2852 | |
| 2853 | inline void rewind_block_index_tail() |
| 2854 | { |
| 2855 | auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); |
| 2856 | localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); |
| 2857 | } |
| 2858 | |
| 2859 | inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const |
| 2860 | { |
| 2861 | BlockIndexHeader* localBlockIndex; |
| 2862 | auto idx = get_block_index_index_for_index(index, localBlockIndex); |
| 2863 | return localBlockIndex->index[idx]; |
| 2864 | } |
| 2865 | |
| 2866 | inline size_t (index_t index, BlockIndexHeader*& localBlockIndex) const |
| 2867 | { |
| 2868 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2869 | debug::DebugLock lock(mutex); |
| 2870 | #endif |
| 2871 | index &= ~static_cast<index_t>(BLOCK_SIZE - 1); |
| 2872 | localBlockIndex = blockIndex.load(std::memory_order_acquire); |
| 2873 | auto tail = localBlockIndex->tail.load(std::memory_order_acquire); |
| 2874 | auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); |
| 2875 | assert(tailBase != INVALID_BLOCK_BASE); |
| 2876 | // Note: Must use division instead of shift because the index may wrap around, causing a negative |
| 2877 | // offset, whose negativity we want to preserve |
| 2878 | auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE); |
| 2879 | size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); |
| 2880 | assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); |
| 2881 | return idx; |
| 2882 | } |
| 2883 | |
| 2884 | bool new_block_index() |
| 2885 | { |
| 2886 | auto prev = blockIndex.load(std::memory_order_relaxed); |
| 2887 | size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; |
| 2888 | auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; |
| 2889 | auto raw = static_cast<char*>((Traits::malloc)( |
| 2890 | sizeof(BlockIndexHeader) + |
| 2891 | std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount + |
| 2892 | std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); |
| 2893 | if (raw == nullptr) { |
| 2894 | return false; |
| 2895 | } |
| 2896 | |
| 2897 | auto = new (raw) BlockIndexHeader; |
| 2898 | auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader))); |
| 2899 | auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount)); |
| 2900 | if (prev != nullptr) { |
| 2901 | auto prevTail = prev->tail.load(std::memory_order_relaxed); |
| 2902 | auto prevPos = prevTail; |
| 2903 | size_t i = 0; |
| 2904 | do { |
| 2905 | prevPos = (prevPos + 1) & (prev->capacity - 1); |
| 2906 | index[i++] = prev->index[prevPos]; |
| 2907 | } while (prevPos != prevTail); |
| 2908 | assert(i == prevCapacity); |
| 2909 | } |
| 2910 | for (size_t i = 0; i != entryCount; ++i) { |
| 2911 | new (entries + i) BlockIndexEntry; |
| 2912 | entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); |
| 2913 | index[prevCapacity + i] = entries + i; |
| 2914 | } |
| 2915 | header->prev = prev; |
| 2916 | header->entries = entries; |
| 2917 | header->index = index; |
| 2918 | header->capacity = nextBlockIndexCapacity; |
| 2919 | header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); |
| 2920 | |
| 2921 | blockIndex.store(header, std::memory_order_release); |
| 2922 | |
| 2923 | nextBlockIndexCapacity <<= 1; |
| 2924 | |
| 2925 | return true; |
| 2926 | } |
| 2927 | |
| 2928 | private: |
| 2929 | size_t nextBlockIndexCapacity; |
| 2930 | std::atomic<BlockIndexHeader*> blockIndex; |
| 2931 | |
| 2932 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 2933 | public: |
| 2934 | details::ThreadExitListener threadExitListener; |
| 2935 | private: |
| 2936 | #endif |
| 2937 | |
| 2938 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 2939 | public: |
| 2940 | ImplicitProducer* nextImplicitProducer; |
| 2941 | private: |
| 2942 | #endif |
| 2943 | |
| 2944 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX |
| 2945 | mutable debug::DebugMutex mutex; |
| 2946 | #endif |
| 2947 | #ifdef MCDBGQ_TRACKMEM |
| 2948 | friend struct MemStats; |
| 2949 | #endif |
| 2950 | }; |
| 2951 | |
| 2952 | |
| 2953 | ////////////////////////////////// |
| 2954 | // Block pool manipulation |
| 2955 | ////////////////////////////////// |
| 2956 | |
| 2957 | void populate_initial_block_list(size_t blockCount) |
| 2958 | { |
| 2959 | initialBlockPoolSize = blockCount; |
| 2960 | if (initialBlockPoolSize == 0) { |
| 2961 | initialBlockPool = nullptr; |
| 2962 | return; |
| 2963 | } |
| 2964 | |
| 2965 | initialBlockPool = create_array<Block>(blockCount); |
| 2966 | if (initialBlockPool == nullptr) { |
| 2967 | initialBlockPoolSize = 0; |
| 2968 | } |
| 2969 | for (size_t i = 0; i < initialBlockPoolSize; ++i) { |
| 2970 | initialBlockPool[i].dynamicallyAllocated = false; |
| 2971 | } |
| 2972 | } |
| 2973 | |
| 2974 | inline Block* try_get_block_from_initial_pool() |
| 2975 | { |
| 2976 | if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { |
| 2977 | return nullptr; |
| 2978 | } |
| 2979 | |
| 2980 | auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); |
| 2981 | |
| 2982 | return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; |
| 2983 | } |
| 2984 | |
| 2985 | inline void add_block_to_free_list(Block* block) |
| 2986 | { |
| 2987 | #ifdef MCDBGQ_TRACKMEM |
| 2988 | block->owner = nullptr; |
| 2989 | #endif |
| 2990 | freeList.add(block); |
| 2991 | } |
| 2992 | |
| 2993 | inline void add_blocks_to_free_list(Block* block) |
| 2994 | { |
| 2995 | while (block != nullptr) { |
| 2996 | auto next = block->next; |
| 2997 | add_block_to_free_list(block); |
| 2998 | block = next; |
| 2999 | } |
| 3000 | } |
| 3001 | |
| 3002 | inline Block* try_get_block_from_free_list() |
| 3003 | { |
| 3004 | return freeList.try_get(); |
| 3005 | } |
| 3006 | |
| 3007 | // Gets a free block from one of the memory pools, or allocates a new one (if applicable) |
| 3008 | template<AllocationMode canAlloc> |
| 3009 | Block* requisition_block() |
| 3010 | { |
| 3011 | auto block = try_get_block_from_initial_pool(); |
| 3012 | if (block != nullptr) { |
| 3013 | return block; |
| 3014 | } |
| 3015 | |
| 3016 | block = try_get_block_from_free_list(); |
| 3017 | if (block != nullptr) { |
| 3018 | return block; |
| 3019 | } |
| 3020 | |
| 3021 | MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { |
| 3022 | return create<Block>(); |
| 3023 | } |
| 3024 | else { |
| 3025 | return nullptr; |
| 3026 | } |
| 3027 | } |
| 3028 | |
| 3029 | |
| 3030 | #ifdef MCDBGQ_TRACKMEM |
| 3031 | public: |
| 3032 | struct MemStats { |
| 3033 | size_t allocatedBlocks; |
| 3034 | size_t usedBlocks; |
| 3035 | size_t freeBlocks; |
| 3036 | size_t ownedBlocksExplicit; |
| 3037 | size_t ownedBlocksImplicit; |
| 3038 | size_t implicitProducers; |
| 3039 | size_t explicitProducers; |
| 3040 | size_t elementsEnqueued; |
| 3041 | size_t blockClassBytes; |
| 3042 | size_t queueClassBytes; |
| 3043 | size_t implicitBlockIndexBytes; |
| 3044 | size_t explicitBlockIndexBytes; |
| 3045 | |
| 3046 | friend class ConcurrentQueue; |
| 3047 | |
| 3048 | private: |
| 3049 | static MemStats getFor(ConcurrentQueue* q) |
| 3050 | { |
| 3051 | MemStats stats = { 0 }; |
| 3052 | |
| 3053 | stats.elementsEnqueued = q->size_approx(); |
| 3054 | |
| 3055 | auto block = q->freeList.head_unsafe(); |
| 3056 | while (block != nullptr) { |
| 3057 | ++stats.allocatedBlocks; |
| 3058 | ++stats.freeBlocks; |
| 3059 | block = block->freeListNext.load(std::memory_order_relaxed); |
| 3060 | } |
| 3061 | |
| 3062 | for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 3063 | bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr; |
| 3064 | stats.implicitProducers += implicit ? 1 : 0; |
| 3065 | stats.explicitProducers += implicit ? 0 : 1; |
| 3066 | |
| 3067 | if (implicit) { |
| 3068 | auto prod = static_cast<ImplicitProducer*>(ptr); |
| 3069 | stats.queueClassBytes += sizeof(ImplicitProducer); |
| 3070 | auto head = prod->headIndex.load(std::memory_order_relaxed); |
| 3071 | auto tail = prod->tailIndex.load(std::memory_order_relaxed); |
| 3072 | auto hash = prod->blockIndex.load(std::memory_order_relaxed); |
| 3073 | if (hash != nullptr) { |
| 3074 | for (size_t i = 0; i != hash->capacity; ++i) { |
| 3075 | if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) { |
| 3076 | ++stats.allocatedBlocks; |
| 3077 | ++stats.ownedBlocksImplicit; |
| 3078 | } |
| 3079 | } |
| 3080 | stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry); |
| 3081 | for (; hash != nullptr; hash = hash->prev) { |
| 3082 | stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*); |
| 3083 | } |
| 3084 | } |
| 3085 | for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) { |
| 3086 | //auto block = prod->get_block_index_entry_for_index(head); |
| 3087 | ++stats.usedBlocks; |
| 3088 | } |
| 3089 | } |
| 3090 | else { |
| 3091 | auto prod = static_cast<ExplicitProducer*>(ptr); |
| 3092 | stats.queueClassBytes += sizeof(ExplicitProducer); |
| 3093 | auto tailBlock = prod->tailBlock; |
| 3094 | bool wasNonEmpty = false; |
| 3095 | if (tailBlock != nullptr) { |
| 3096 | auto block = tailBlock; |
| 3097 | do { |
| 3098 | ++stats.allocatedBlocks; |
| 3099 | if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) { |
| 3100 | ++stats.usedBlocks; |
| 3101 | wasNonEmpty = wasNonEmpty || block != tailBlock; |
| 3102 | } |
| 3103 | ++stats.ownedBlocksExplicit; |
| 3104 | block = block->next; |
| 3105 | } while (block != tailBlock); |
| 3106 | } |
| 3107 | auto index = prod->blockIndex.load(std::memory_order_relaxed); |
| 3108 | while (index != nullptr) { |
| 3109 | stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry); |
| 3110 | index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev); |
| 3111 | } |
| 3112 | } |
| 3113 | } |
| 3114 | |
| 3115 | auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); |
| 3116 | stats.allocatedBlocks += freeOnInitialPool; |
| 3117 | stats.freeBlocks += freeOnInitialPool; |
| 3118 | |
| 3119 | stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; |
| 3120 | stats.queueClassBytes += sizeof(ConcurrentQueue); |
| 3121 | |
| 3122 | return stats; |
| 3123 | } |
| 3124 | }; |
| 3125 | |
| 3126 | // For debugging only. Not thread-safe. |
| 3127 | MemStats getMemStats() |
| 3128 | { |
| 3129 | return MemStats::getFor(this); |
| 3130 | } |
| 3131 | private: |
| 3132 | friend struct MemStats; |
| 3133 | #endif |
| 3134 | |
| 3135 | |
| 3136 | ////////////////////////////////// |
| 3137 | // Producer list manipulation |
| 3138 | ////////////////////////////////// |
| 3139 | |
| 3140 | ProducerBase* recycle_or_create_producer(bool isExplicit) |
| 3141 | { |
| 3142 | bool recycled; |
| 3143 | return recycle_or_create_producer(isExplicit, recycled); |
| 3144 | } |
| 3145 | |
| 3146 | ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) |
| 3147 | { |
| 3148 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH |
| 3149 | debug::DebugLock lock(implicitProdMutex); |
| 3150 | #endif |
| 3151 | // Try to re-use one first |
| 3152 | for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { |
| 3153 | if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) { |
| 3154 | bool expected = true; |
| 3155 | if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { |
| 3156 | // We caught one! It's been marked as activated, the caller can have it |
| 3157 | recycled = true; |
| 3158 | return ptr; |
| 3159 | } |
| 3160 | } |
| 3161 | } |
| 3162 | |
| 3163 | recycled = false; |
| 3164 | return add_producer(producer: isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this)); |
| 3165 | } |
| 3166 | |
| 3167 | ProducerBase* add_producer(ProducerBase* producer) |
| 3168 | { |
| 3169 | // Handle failed memory allocation |
| 3170 | if (producer == nullptr) { |
| 3171 | return nullptr; |
| 3172 | } |
| 3173 | |
| 3174 | producerCount.fetch_add(i: 1, m: std::memory_order_relaxed); |
| 3175 | |
| 3176 | // Add it to the lock-free list |
| 3177 | auto prevTail = producerListTail.load(std::memory_order_relaxed); |
| 3178 | do { |
| 3179 | producer->next = prevTail; |
| 3180 | } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); |
| 3181 | |
| 3182 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 3183 | if (producer->isExplicit) { |
| 3184 | auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); |
| 3185 | do { |
| 3186 | static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit; |
| 3187 | } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed)); |
| 3188 | } |
| 3189 | else { |
| 3190 | auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); |
| 3191 | do { |
| 3192 | static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit; |
| 3193 | } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed)); |
| 3194 | } |
| 3195 | #endif |
| 3196 | |
| 3197 | return producer; |
| 3198 | } |
| 3199 | |
| 3200 | void reown_producers() |
| 3201 | { |
| 3202 | // After another instance is moved-into/swapped-with this one, all the |
| 3203 | // producers we stole still think their parents are the other queue. |
| 3204 | // So fix them up! |
| 3205 | for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { |
| 3206 | ptr->parent = this; |
| 3207 | } |
| 3208 | } |
| 3209 | |
| 3210 | |
| 3211 | ////////////////////////////////// |
| 3212 | // Implicit producer hash |
| 3213 | ////////////////////////////////// |
| 3214 | |
| 3215 | struct ImplicitProducerKVP |
| 3216 | { |
| 3217 | std::atomic<details::thread_id_t> key; |
| 3218 | ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place |
| 3219 | |
| 3220 | ImplicitProducerKVP() : value(nullptr) { } |
| 3221 | |
| 3222 | ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT |
| 3223 | { |
| 3224 | key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); |
| 3225 | value = other.value; |
| 3226 | } |
| 3227 | |
| 3228 | inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT |
| 3229 | { |
| 3230 | swap(other); |
| 3231 | return *this; |
| 3232 | } |
| 3233 | |
| 3234 | inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT |
| 3235 | { |
| 3236 | if (this != &other) { |
| 3237 | details::swap_relaxed(key, other.key); |
| 3238 | std::swap(value, other.value); |
| 3239 | } |
| 3240 | } |
| 3241 | }; |
| 3242 | |
| 3243 | template<typename XT, typename XTraits> |
| 3244 | friend void duckdb_moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; |
| 3245 | |
| 3246 | struct ImplicitProducerHash |
| 3247 | { |
| 3248 | size_t capacity; |
| 3249 | ImplicitProducerKVP* entries; |
| 3250 | ImplicitProducerHash* prev; |
| 3251 | }; |
| 3252 | |
| 3253 | inline void populate_initial_implicit_producer_hash() |
| 3254 | { |
| 3255 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { |
| 3256 | return; |
| 3257 | } |
| 3258 | else { |
| 3259 | implicitProducerHashCount.store(0, std::memory_order_relaxed); |
| 3260 | auto hash = &initialImplicitProducerHash; |
| 3261 | hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; |
| 3262 | hash->entries = &initialImplicitProducerHashEntries[0]; |
| 3263 | for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { |
| 3264 | initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); |
| 3265 | } |
| 3266 | hash->prev = nullptr; |
| 3267 | implicitProducerHash.store(hash, std::memory_order_relaxed); |
| 3268 | } |
| 3269 | } |
| 3270 | |
| 3271 | void swap_implicit_producer_hashes(ConcurrentQueue& other) |
| 3272 | { |
| 3273 | MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { |
| 3274 | return; |
| 3275 | } |
| 3276 | else { |
| 3277 | // Swap (assumes our implicit producer hash is initialized) |
| 3278 | initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); |
| 3279 | initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; |
| 3280 | other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; |
| 3281 | |
| 3282 | details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); |
| 3283 | |
| 3284 | details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); |
| 3285 | if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { |
| 3286 | implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); |
| 3287 | } |
| 3288 | else { |
| 3289 | ImplicitProducerHash* hash; |
| 3290 | for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { |
| 3291 | continue; |
| 3292 | } |
| 3293 | hash->prev = &initialImplicitProducerHash; |
| 3294 | } |
| 3295 | if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { |
| 3296 | other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); |
| 3297 | } |
| 3298 | else { |
| 3299 | ImplicitProducerHash* hash; |
| 3300 | for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { |
| 3301 | continue; |
| 3302 | } |
| 3303 | hash->prev = &other.initialImplicitProducerHash; |
| 3304 | } |
| 3305 | } |
| 3306 | } |
| 3307 | |
| 3308 | // Only fails (returns nullptr) if memory allocation fails |
| 3309 | ImplicitProducer* get_or_add_implicit_producer() |
| 3310 | { |
| 3311 | // Note that since the data is essentially thread-local (key is thread ID), |
| 3312 | // there's a reduced need for fences (memory ordering is already consistent |
| 3313 | // for any individual thread), except for the current table itself. |
| 3314 | |
| 3315 | // Start by looking for the thread ID in the current and all previous hash tables. |
| 3316 | // If it's not found, it must not be in there yet, since this same thread would |
| 3317 | // have added it previously to one of the tables that we traversed. |
| 3318 | |
| 3319 | // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table |
| 3320 | |
| 3321 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH |
| 3322 | debug::DebugLock lock(implicitProdMutex); |
| 3323 | #endif |
| 3324 | |
| 3325 | auto id = details::thread_id(); |
| 3326 | auto hashedId = details::hash_thread_id(id); |
| 3327 | |
| 3328 | auto mainHash = implicitProducerHash.load(std::memory_order_acquire); |
| 3329 | assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) |
| 3330 | for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { |
| 3331 | // Look for the id in this hash |
| 3332 | auto index = hashedId; |
| 3333 | while (true) { // Not an infinite loop because at least one slot is free in the hash table |
| 3334 | index &= hash->capacity - 1; |
| 3335 | |
| 3336 | auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); |
| 3337 | if (probedKey == id) { |
| 3338 | // Found it! If we had to search several hashes deep, though, we should lazily add it |
| 3339 | // to the current main hash table to avoid the extended search next time. |
| 3340 | // Note there's guaranteed to be room in the current hash table since every subsequent |
| 3341 | // table implicitly reserves space for all previous tables (there's only one |
| 3342 | // implicitProducerHashCount). |
| 3343 | auto value = hash->entries[index].value; |
| 3344 | if (hash != mainHash) { |
| 3345 | index = hashedId; |
| 3346 | while (true) { |
| 3347 | index &= mainHash->capacity - 1; |
| 3348 | probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); |
| 3349 | auto empty = details::invalid_thread_id; |
| 3350 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 3351 | auto reusable = details::invalid_thread_id2; |
| 3352 | if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || |
| 3353 | (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { |
| 3354 | #else |
| 3355 | if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { |
| 3356 | #endif |
| 3357 | mainHash->entries[index].value = value; |
| 3358 | break; |
| 3359 | } |
| 3360 | ++index; |
| 3361 | } |
| 3362 | } |
| 3363 | |
| 3364 | return value; |
| 3365 | } |
| 3366 | if (probedKey == details::invalid_thread_id) { |
| 3367 | break; // Not in this hash table |
| 3368 | } |
| 3369 | ++index; |
| 3370 | } |
| 3371 | } |
| 3372 | |
| 3373 | // Insert! |
| 3374 | auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); |
| 3375 | while (true) { |
| 3376 | // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) |
| 3377 | if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(m: std::memory_order_acquire)) { |
| 3378 | // We've acquired the resize lock, try to allocate a bigger hash table. |
| 3379 | // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when |
| 3380 | // we reload implicitProducerHash it must be the most recent version (it only gets changed within this |
| 3381 | // locked block). |
| 3382 | mainHash = implicitProducerHash.load(std::memory_order_acquire); |
| 3383 | if (newCount >= (mainHash->capacity >> 1)) { |
| 3384 | auto newCapacity = mainHash->capacity << 1; |
| 3385 | while (newCount >= (newCapacity >> 1)) { |
| 3386 | newCapacity <<= 1; |
| 3387 | } |
| 3388 | auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); |
| 3389 | if (raw == nullptr) { |
| 3390 | // Allocation failed |
| 3391 | implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); |
| 3392 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_relaxed); |
| 3393 | return nullptr; |
| 3394 | } |
| 3395 | |
| 3396 | auto newHash = new (raw) ImplicitProducerHash; |
| 3397 | newHash->capacity = newCapacity; |
| 3398 | newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash))); |
| 3399 | for (size_t i = 0; i != newCapacity; ++i) { |
| 3400 | new (newHash->entries + i) ImplicitProducerKVP; |
| 3401 | newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); |
| 3402 | } |
| 3403 | newHash->prev = mainHash; |
| 3404 | implicitProducerHash.store(newHash, std::memory_order_release); |
| 3405 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_release); |
| 3406 | mainHash = newHash; |
| 3407 | } |
| 3408 | else { |
| 3409 | implicitProducerHashResizeInProgress.clear(m: std::memory_order_release); |
| 3410 | } |
| 3411 | } |
| 3412 | |
| 3413 | // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table |
| 3414 | // to finish being allocated by another thread (and if we just finished allocating above, the condition will |
| 3415 | // always be true) |
| 3416 | if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { |
| 3417 | bool recycled; |
| 3418 | auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled)); |
| 3419 | if (producer == nullptr) { |
| 3420 | implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); |
| 3421 | return nullptr; |
| 3422 | } |
| 3423 | if (recycled) { |
| 3424 | implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); |
| 3425 | } |
| 3426 | |
| 3427 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 3428 | producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; |
| 3429 | producer->threadExitListener.userData = producer; |
| 3430 | details::ThreadExitNotifier::subscribe(&producer->threadExitListener); |
| 3431 | #endif |
| 3432 | |
| 3433 | auto index = hashedId; |
| 3434 | while (true) { |
| 3435 | index &= mainHash->capacity - 1; |
| 3436 | auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); |
| 3437 | |
| 3438 | auto empty = details::invalid_thread_id; |
| 3439 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 3440 | auto reusable = details::invalid_thread_id2; |
| 3441 | if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || |
| 3442 | (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { |
| 3443 | #else |
| 3444 | if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { |
| 3445 | #endif |
| 3446 | mainHash->entries[index].value = producer; |
| 3447 | break; |
| 3448 | } |
| 3449 | ++index; |
| 3450 | } |
| 3451 | return producer; |
| 3452 | } |
| 3453 | |
| 3454 | // Hmm, the old hash is quite full and somebody else is busy allocating a new one. |
| 3455 | // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, |
| 3456 | // we try to allocate ourselves). |
| 3457 | mainHash = implicitProducerHash.load(std::memory_order_acquire); |
| 3458 | } |
| 3459 | } |
| 3460 | |
| 3461 | #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED |
| 3462 | void implicit_producer_thread_exited(ImplicitProducer* producer) |
| 3463 | { |
| 3464 | // Remove from thread exit listeners |
| 3465 | details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); |
| 3466 | |
| 3467 | // Remove from hash |
| 3468 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH |
| 3469 | debug::DebugLock lock(implicitProdMutex); |
| 3470 | #endif |
| 3471 | auto hash = implicitProducerHash.load(std::memory_order_acquire); |
| 3472 | assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place |
| 3473 | auto id = details::thread_id(); |
| 3474 | auto hashedId = details::hash_thread_id(id); |
| 3475 | details::thread_id_t probedKey; |
| 3476 | |
| 3477 | // We need to traverse all the hashes just in case other threads aren't on the current one yet and are |
| 3478 | // trying to add an entry thinking there's a free slot (because they reused a producer) |
| 3479 | for (; hash != nullptr; hash = hash->prev) { |
| 3480 | auto index = hashedId; |
| 3481 | do { |
| 3482 | index &= hash->capacity - 1; |
| 3483 | probedKey = hash->entries[index].key.load(std::memory_order_relaxed); |
| 3484 | if (probedKey == id) { |
| 3485 | hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release); |
| 3486 | break; |
| 3487 | } |
| 3488 | ++index; |
| 3489 | } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place |
| 3490 | } |
| 3491 | |
| 3492 | // Mark the queue as being recyclable |
| 3493 | producer->inactive.store(true, std::memory_order_release); |
| 3494 | } |
| 3495 | |
| 3496 | static void implicit_producer_thread_exited_callback(void* userData) |
| 3497 | { |
| 3498 | auto producer = static_cast<ImplicitProducer*>(userData); |
| 3499 | auto queue = producer->parent; |
| 3500 | queue->implicit_producer_thread_exited(producer); |
| 3501 | } |
| 3502 | #endif |
| 3503 | |
| 3504 | ////////////////////////////////// |
| 3505 | // Utility functions |
| 3506 | ////////////////////////////////// |
| 3507 | |
| 3508 | template<typename TAlign> |
| 3509 | static inline void* aligned_malloc(size_t size) |
| 3510 | { |
| 3511 | if (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value) |
| 3512 | return (Traits::malloc)(size); |
| 3513 | size_t alignment = std::alignment_of<TAlign>::value; |
| 3514 | void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); |
| 3515 | if (!raw) |
| 3516 | return nullptr; |
| 3517 | char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*)); |
| 3518 | *(reinterpret_cast<void**>(ptr) - 1) = raw; |
| 3519 | return ptr; |
| 3520 | } |
| 3521 | |
| 3522 | template<typename TAlign> |
| 3523 | static inline void aligned_free(void* ptr) |
| 3524 | { |
| 3525 | if (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value) |
| 3526 | return (Traits::free)(ptr); |
| 3527 | (Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr); |
| 3528 | } |
| 3529 | |
| 3530 | template<typename U> |
| 3531 | static inline U* create_array(size_t count) |
| 3532 | { |
| 3533 | assert(count > 0); |
| 3534 | U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count)); |
| 3535 | if (p == nullptr) |
| 3536 | return nullptr; |
| 3537 | |
| 3538 | for (size_t i = 0; i != count; ++i) |
| 3539 | new (p + i) U(); |
| 3540 | return p; |
| 3541 | } |
| 3542 | |
| 3543 | template<typename U> |
| 3544 | static inline void destroy_array(U* p, size_t count) |
| 3545 | { |
| 3546 | if (p != nullptr) { |
| 3547 | assert(count > 0); |
| 3548 | for (size_t i = count; i != 0; ) |
| 3549 | (p + --i)->~U(); |
| 3550 | } |
| 3551 | aligned_free<U>(p); |
| 3552 | } |
| 3553 | |
| 3554 | template<typename U> |
| 3555 | static inline U* create() |
| 3556 | { |
| 3557 | void* p = aligned_malloc<U>(sizeof(U)); |
| 3558 | return p != nullptr ? new (p) U : nullptr; |
| 3559 | } |
| 3560 | |
| 3561 | template<typename U, typename A1> |
| 3562 | static inline U* create(A1&& a1) |
| 3563 | { |
| 3564 | void* p = aligned_malloc<U>(sizeof(U)); |
| 3565 | return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr; |
| 3566 | } |
| 3567 | |
| 3568 | template<typename U> |
| 3569 | static inline void destroy(U* p) |
| 3570 | { |
| 3571 | if (p != nullptr) |
| 3572 | p->~U(); |
| 3573 | aligned_free<U>(p); |
| 3574 | } |
| 3575 | |
| 3576 | private: |
| 3577 | std::atomic<ProducerBase*> producerListTail; |
| 3578 | std::atomic<std::uint32_t> producerCount; |
| 3579 | |
| 3580 | std::atomic<size_t> initialBlockPoolIndex; |
| 3581 | Block* initialBlockPool; |
| 3582 | size_t initialBlockPoolSize; |
| 3583 | |
| 3584 | #ifndef MCDBGQ_USEDEBUGFREELIST |
| 3585 | FreeList<Block> freeList; |
| 3586 | #else |
| 3587 | debug::DebugFreeList<Block> freeList; |
| 3588 | #endif |
| 3589 | |
| 3590 | std::atomic<ImplicitProducerHash*> implicitProducerHash; |
| 3591 | std::atomic<size_t> implicitProducerHashCount; // Number of slots logically used |
| 3592 | ImplicitProducerHash initialImplicitProducerHash; |
| 3593 | std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries; |
| 3594 | std::atomic_flag implicitProducerHashResizeInProgress; |
| 3595 | |
| 3596 | std::atomic<std::uint32_t> nextExplicitConsumerId; |
| 3597 | std::atomic<std::uint32_t> globalExplicitConsumerOffset; |
| 3598 | |
| 3599 | #ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH |
| 3600 | debug::DebugMutex implicitProdMutex; |
| 3601 | #endif |
| 3602 | |
| 3603 | #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG |
| 3604 | std::atomic<ExplicitProducer*> explicitProducers; |
| 3605 | std::atomic<ImplicitProducer*> implicitProducers; |
| 3606 | #endif |
| 3607 | }; |
| 3608 | |
| 3609 | |
| 3610 | template<typename T, typename Traits> |
| 3611 | ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue) |
| 3612 | : producer(queue.recycle_or_create_producer(true)) |
| 3613 | { |
| 3614 | if (producer != nullptr) { |
| 3615 | producer->token = this; |
| 3616 | } |
| 3617 | } |
| 3618 | |
| 3619 | template<typename T, typename Traits> |
| 3620 | ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue) |
| 3621 | : producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true)) |
| 3622 | { |
| 3623 | if (producer != nullptr) { |
| 3624 | producer->token = this; |
| 3625 | } |
| 3626 | } |
| 3627 | |
| 3628 | template<typename T, typename Traits> |
| 3629 | ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue) |
| 3630 | : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) |
| 3631 | { |
| 3632 | initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); |
| 3633 | lastKnownGlobalOffset = -1; |
| 3634 | } |
| 3635 | |
| 3636 | template<typename T, typename Traits> |
| 3637 | ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue) |
| 3638 | : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) |
| 3639 | { |
| 3640 | initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); |
| 3641 | lastKnownGlobalOffset = -1; |
| 3642 | } |
| 3643 | |
| 3644 | template<typename T, typename Traits> |
| 3645 | inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT |
| 3646 | { |
| 3647 | a.swap(b); |
| 3648 | } |
| 3649 | |
| 3650 | inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT |
| 3651 | { |
| 3652 | a.swap(other&: b); |
| 3653 | } |
| 3654 | |
| 3655 | inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT |
| 3656 | { |
| 3657 | a.swap(other&: b); |
| 3658 | } |
| 3659 | |
| 3660 | template<typename T, typename Traits> |
| 3661 | inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT |
| 3662 | { |
| 3663 | a.swap(b); |
| 3664 | } |
| 3665 | |
| 3666 | } |
| 3667 | |
| 3668 | |