| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef __TBB_tbbmalloc_internal_H |
| 18 | #define __TBB_tbbmalloc_internal_H |
| 19 | |
| 20 | |
| 21 | #include "TypeDefinitions.h" /* Also includes customization layer Customize.h */ |
| 22 | |
| 23 | #if USE_PTHREAD |
| 24 | // Some pthreads documentation says that <pthreads.h> must be first header. |
| 25 | #include <pthread.h> |
| 26 | typedef pthread_key_t tls_key_t; |
| 27 | #elif USE_WINTHREAD |
| 28 | #include "tbb/machine/windows_api.h" |
| 29 | typedef DWORD tls_key_t; |
| 30 | #else |
| 31 | #error Must define USE_PTHREAD or USE_WINTHREAD |
| 32 | #endif |
| 33 | |
| 34 | // TODO: *BSD also has it |
| 35 | #define BACKEND_HAS_MREMAP __linux__ |
| 36 | #define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED |
| 37 | |
| 38 | #include "tbb/tbb_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN |
| 39 | #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN |
| 40 | #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */ |
| 41 | #define _GLIBCXX_NESTED_EXCEPTION_H /* prevents nested_exception.h inclusion */ |
| 42 | #endif |
| 43 | |
| 44 | #include <stdio.h> |
| 45 | #include <stdlib.h> |
| 46 | #include <limits.h> // for CHAR_BIT |
| 47 | #include <string.h> // for memset |
| 48 | #if MALLOC_CHECK_RECURSION |
| 49 | #include <new> /* for placement new */ |
| 50 | #endif |
| 51 | #include "tbb/scalable_allocator.h" |
| 52 | #include "tbbmalloc_internal_api.h" |
| 53 | |
| 54 | /********* Various compile-time options **************/ |
| 55 | |
| 56 | #if !__TBB_DEFINE_MIC && __TBB_MIC_NATIVE |
| 57 | #error Intel(R) Many Integrated Core Compiler does not define __MIC__ anymore. |
| 58 | #endif |
| 59 | |
| 60 | #define MALLOC_TRACE 0 |
| 61 | |
| 62 | #if MALLOC_TRACE |
| 63 | #define TRACEF(x) printf x |
| 64 | #else |
| 65 | #define TRACEF(x) ((void)0) |
| 66 | #endif /* MALLOC_TRACE */ |
| 67 | |
| 68 | #define ASSERT_TEXT NULL |
| 69 | |
| 70 | #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS ) |
| 71 | #ifndef USE_INTERNAL_TID |
| 72 | #define USE_INTERNAL_TID COLLECT_STATISTICS || MALLOC_TRACE |
| 73 | #endif |
| 74 | |
| 75 | #include "Statistics.h" |
| 76 | |
| 77 | // call yield for whitebox testing, skip in real library |
| 78 | #ifndef WhiteboxTestingYield |
| 79 | #define WhiteboxTestingYield() ((void)0) |
| 80 | #endif |
| 81 | |
| 82 | |
| 83 | /********* End compile-time options **************/ |
| 84 | |
| 85 | namespace rml { |
| 86 | |
| 87 | namespace internal { |
| 88 | |
| 89 | #if __TBB_MALLOC_LOCACHE_STAT |
| 90 | extern intptr_t mallocCalls, cacheHits; |
| 91 | extern intptr_t memAllocKB, memHitKB; |
| 92 | #endif |
| 93 | |
| 94 | //! Utility template function to prevent "unused" warnings by various compilers. |
| 95 | template<typename T> |
| 96 | void suppress_unused_warning( const T& ) {} |
| 97 | |
| 98 | /********** Various global default constants ********/ |
| 99 | |
| 100 | /* |
| 101 | * Default huge page size |
| 102 | */ |
| 103 | static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024; |
| 104 | |
| 105 | /********** End of global default constatns *********/ |
| 106 | |
| 107 | /********** Various numeric parameters controlling allocations ********/ |
| 108 | |
| 109 | /* |
| 110 | * slabSize - the size of a block for allocation of small objects, |
| 111 | * it must be larger than maxSegregatedObjectSize. |
| 112 | */ |
| 113 | const uintptr_t slabSize = 16*1024; |
| 114 | |
| 115 | /* |
| 116 | * Large blocks cache cleanup frequency. |
| 117 | * It should be power of 2 for the fast checking. |
| 118 | */ |
| 119 | const unsigned cacheCleanupFreq = 256; |
| 120 | |
| 121 | /* |
| 122 | * Alignment of large (>= minLargeObjectSize) objects. |
| 123 | */ |
| 124 | const size_t largeObjectAlignment = estimatedCacheLineSize; |
| 125 | |
| 126 | /* |
| 127 | * This number of bins in the TLS that leads to blocks that we can allocate in. |
| 128 | */ |
| 129 | const uint32_t numBlockBinLimit = 31; |
| 130 | |
| 131 | /********** End of numeric parameters controlling allocations *********/ |
| 132 | |
| 133 | class BlockI; |
| 134 | class Block; |
| 135 | struct LargeMemoryBlock; |
| 136 | struct ExtMemoryPool; |
| 137 | struct MemRegion; |
| 138 | class FreeBlock; |
| 139 | class TLSData; |
| 140 | class Backend; |
| 141 | class MemoryPool; |
| 142 | struct CacheBinOperation; |
| 143 | extern const uint32_t minLargeObjectSize; |
| 144 | |
| 145 | enum DecreaseOrIncrease { |
| 146 | decrease, increase |
| 147 | }; |
| 148 | |
| 149 | class TLSKey { |
| 150 | tls_key_t TLS_pointer_key; |
| 151 | public: |
| 152 | bool init(); |
| 153 | bool destroy(); |
| 154 | TLSData* getThreadMallocTLS() const; |
| 155 | void setThreadMallocTLS( TLSData * newvalue ); |
| 156 | TLSData* createTLS(MemoryPool *memPool, Backend *backend); |
| 157 | }; |
| 158 | |
| 159 | template<typename Arg, typename Compare> |
| 160 | inline void AtomicUpdate(Arg &location, Arg newVal, const Compare &cmp) |
| 161 | { |
| 162 | MALLOC_STATIC_ASSERT(sizeof(Arg) == sizeof(intptr_t), |
| 163 | "Type of argument must match AtomicCompareExchange type." ); |
| 164 | for (Arg old = location; cmp(old, newVal); ) { |
| 165 | Arg val = AtomicCompareExchange((intptr_t&)location, (intptr_t)newVal, old); |
| 166 | if (val == old) |
| 167 | break; |
| 168 | // TODO: do we need backoff after unsuccessful CAS? |
| 169 | old = val; |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | // TODO: make BitMaskBasic more general |
| 174 | // (currently, it fits BitMaskMin well, but not as suitable for BitMaskMax) |
| 175 | template<unsigned NUM> |
| 176 | class BitMaskBasic { |
| 177 | static const unsigned SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1; |
| 178 | static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t); |
| 179 | uintptr_t mask[SZ]; |
| 180 | protected: |
| 181 | void set(size_t idx, bool val) { |
| 182 | MALLOC_ASSERT(idx<NUM, ASSERT_TEXT); |
| 183 | |
| 184 | size_t i = idx / WORD_LEN; |
| 185 | int pos = WORD_LEN - idx % WORD_LEN - 1; |
| 186 | if (val) |
| 187 | AtomicOr(&mask[i], 1ULL << pos); |
| 188 | else |
| 189 | AtomicAnd(&mask[i], ~(1ULL << pos)); |
| 190 | } |
| 191 | int getMinTrue(unsigned startIdx) const { |
| 192 | unsigned idx = startIdx / WORD_LEN; |
| 193 | int pos; |
| 194 | |
| 195 | if (startIdx % WORD_LEN) { |
| 196 | // only interested in part of a word, clear bits before startIdx |
| 197 | pos = WORD_LEN - startIdx % WORD_LEN; |
| 198 | uintptr_t actualMask = mask[idx] & (((uintptr_t)1<<pos) - 1); |
| 199 | idx++; |
| 200 | if (-1 != (pos = BitScanRev(actualMask))) |
| 201 | return idx*WORD_LEN - pos - 1; |
| 202 | } |
| 203 | |
| 204 | while (idx<SZ) |
| 205 | if (-1 != (pos = BitScanRev(mask[idx++]))) |
| 206 | return idx*WORD_LEN - pos - 1; |
| 207 | return -1; |
| 208 | } |
| 209 | public: |
| 210 | void reset() { for (unsigned i=0; i<SZ; i++) mask[i] = 0; } |
| 211 | }; |
| 212 | |
| 213 | template<unsigned NUM> |
| 214 | class BitMaskMin : public BitMaskBasic<NUM> { |
| 215 | public: |
| 216 | void set(size_t idx, bool val) { BitMaskBasic<NUM>::set(idx, val); } |
| 217 | int getMinTrue(unsigned startIdx) const { |
| 218 | return BitMaskBasic<NUM>::getMinTrue(startIdx); |
| 219 | } |
| 220 | }; |
| 221 | |
| 222 | template<unsigned NUM> |
| 223 | class BitMaskMax : public BitMaskBasic<NUM> { |
| 224 | public: |
| 225 | void set(size_t idx, bool val) { |
| 226 | BitMaskBasic<NUM>::set(NUM - 1 - idx, val); |
| 227 | } |
| 228 | int getMaxTrue(unsigned startIdx) const { |
| 229 | int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1); |
| 230 | return -1==p? -1 : (int)NUM - 1 - p; |
| 231 | } |
| 232 | }; |
| 233 | |
| 234 | |
| 235 | // The part of thread-specific data that can be modified by other threads. |
| 236 | // Such modifications must be protected by AllLocalCaches::listLock. |
| 237 | struct TLSRemote { |
| 238 | TLSRemote *next, |
| 239 | *prev; |
| 240 | }; |
| 241 | |
| 242 | // The list of all thread-local data; supporting cleanup of thread caches |
| 243 | class AllLocalCaches { |
| 244 | TLSRemote *head; |
| 245 | MallocMutex listLock; // protects operations in the list |
| 246 | public: |
| 247 | void registerThread(TLSRemote *tls); |
| 248 | void unregisterThread(TLSRemote *tls); |
| 249 | bool cleanup(bool cleanOnlyUnused); |
| 250 | void markUnused(); |
| 251 | void reset() { head = NULL; } |
| 252 | }; |
| 253 | |
| 254 | class LifoList { |
| 255 | public: |
| 256 | inline LifoList(); |
| 257 | inline void push(Block *block); |
| 258 | inline Block *pop(); |
| 259 | inline Block *grab(); |
| 260 | |
| 261 | private: |
| 262 | Block *top; |
| 263 | MallocMutex lock; |
| 264 | }; |
| 265 | |
| 266 | /* |
| 267 | * When a block that is not completely free is returned for reuse by other threads |
| 268 | * this is where the block goes. |
| 269 | * |
| 270 | * LifoList assumes zero initialization; so below its constructors are omitted, |
| 271 | * to avoid linking with C++ libraries on Linux. |
| 272 | */ |
| 273 | |
| 274 | class OrphanedBlocks { |
| 275 | LifoList bins[numBlockBinLimit]; |
| 276 | public: |
| 277 | Block *get(TLSData *tls, unsigned int size); |
| 278 | void put(intptr_t binTag, Block *block); |
| 279 | void reset(); |
| 280 | bool cleanup(Backend* backend); |
| 281 | }; |
| 282 | |
| 283 | /* Large objects entities */ |
| 284 | #include "large_objects.h" |
| 285 | |
| 286 | // select index size for BackRefMaster based on word size: default is uint32_t, |
| 287 | // uint16_t for 32-bit platforms |
| 288 | template<bool> |
| 289 | struct MasterIndexSelect { |
| 290 | typedef uint32_t master_type; |
| 291 | }; |
| 292 | |
| 293 | template<> |
| 294 | struct MasterIndexSelect<false> { |
| 295 | typedef uint16_t master_type; |
| 296 | }; |
| 297 | |
| 298 | class BackRefIdx { // composite index to backreference array |
| 299 | public: |
| 300 | typedef MasterIndexSelect<4 < sizeof(uintptr_t)>::master_type master_t; |
| 301 | private: |
| 302 | static const master_t invalid = ~master_t(0); |
| 303 | master_t master; // index in BackRefMaster |
| 304 | uint16_t largeObj:1; // is this object "large"? |
| 305 | uint16_t offset :15; // offset from beginning of BackRefBlock |
| 306 | public: |
| 307 | BackRefIdx() : master(invalid), largeObj(0), offset(0) {} |
| 308 | bool isInvalid() const { return master == invalid; } |
| 309 | bool isLargeObject() const { return largeObj; } |
| 310 | master_t getMaster() const { return master; } |
| 311 | uint16_t getOffset() const { return offset; } |
| 312 | |
| 313 | // only newBackRef can modify BackRefIdx |
| 314 | static BackRefIdx newBackRef(bool largeObj); |
| 315 | }; |
| 316 | |
| 317 | // Block header is used during block coalescing |
| 318 | // and must be preserved in used blocks. |
| 319 | class BlockI { |
| 320 | intptr_t blockState[2]; |
| 321 | }; |
| 322 | |
| 323 | struct LargeMemoryBlock : public BlockI { |
| 324 | MemoryPool *pool; // owner pool |
| 325 | LargeMemoryBlock *next, // ptrs in list of cached blocks |
| 326 | *prev, |
| 327 | // 2-linked list of pool's large objects |
| 328 | // Used to destroy backrefs on pool destroy (backrefs are global) |
| 329 | // and for object releasing during pool reset. |
| 330 | *gPrev, |
| 331 | *gNext; |
| 332 | uintptr_t age; // age of block while in cache |
| 333 | size_t objectSize; // the size requested by a client |
| 334 | size_t unalignedSize; // the size requested from backend |
| 335 | BackRefIdx backRefIdx; // cached here, used copy is in LargeObjectHdr |
| 336 | }; |
| 337 | |
| 338 | // Classes and methods for backend.cpp |
| 339 | #include "backend.h" |
| 340 | |
| 341 | // An TBB allocator mode that can be controlled by user |
| 342 | // via API/environment variable. Must be placed in zero-initialized memory. |
| 343 | // External synchronization assumed. |
| 344 | // TODO: TBB_VERSION support |
| 345 | class AllocControlledMode { |
| 346 | intptr_t val; |
| 347 | bool setDone; |
| 348 | |
| 349 | public: |
| 350 | intptr_t get() const { |
| 351 | MALLOC_ASSERT(setDone, ASSERT_TEXT); |
| 352 | return val; |
| 353 | } |
| 354 | |
| 355 | // Note: set() can be called before init() |
| 356 | void set(intptr_t newVal) { |
| 357 | val = newVal; |
| 358 | setDone = true; |
| 359 | } |
| 360 | |
| 361 | bool ready() const { |
| 362 | return setDone; |
| 363 | } |
| 364 | |
| 365 | // envName - environment variable to get controlled mode |
| 366 | void initReadEnv(const char *envName, intptr_t defaultVal) { |
| 367 | if (!setDone) { |
| 368 | #if !__TBB_WIN8UI_SUPPORT |
| 369 | // TODO: use strtol to get the actual value of the envirable |
| 370 | const char *envVal = getenv(envName); |
| 371 | if (envVal && !strcmp(envVal, "1" )) |
| 372 | val = 1; |
| 373 | else |
| 374 | #endif |
| 375 | val = defaultVal; |
| 376 | setDone = true; |
| 377 | } |
| 378 | } |
| 379 | }; |
| 380 | |
| 381 | // Page type to be used inside MapMemory. |
| 382 | // Regular (4KB aligned), Huge and Transparent Huge Pages (2MB aligned). |
| 383 | enum PageType { |
| 384 | REGULAR = 0, |
| 385 | PREALLOCATED_HUGE_PAGE, |
| 386 | TRANSPARENT_HUGE_PAGE |
| 387 | }; |
| 388 | |
| 389 | // init() and printStatus() is called only under global initialization lock. |
| 390 | // Race is possible between registerAllocation() and registerReleasing(), |
| 391 | // harm is that up to single huge page releasing is missed (because failure |
| 392 | // to get huge page is registered only 1st time), that is negligible. |
| 393 | // setMode is also can be called concurrently. |
| 394 | // Object must reside in zero-initialized memory |
| 395 | // TODO: can we check for huge page presence during every 10th mmap() call |
| 396 | // in case huge page is released by another process? |
| 397 | class HugePagesStatus { |
| 398 | private: |
| 399 | AllocControlledMode requestedMode; // changed only by user |
| 400 | // to keep enabled and requestedMode consistent |
| 401 | MallocMutex setModeLock; |
| 402 | size_t pageSize; |
| 403 | intptr_t needActualStatusPrint; |
| 404 | |
| 405 | static void doPrintStatus(bool state, const char *stateName) { |
| 406 | // Under macOS* fprintf/snprintf acquires an internal lock, so when |
| 407 | // 1st allocation is done under the lock, we got a deadlock. |
| 408 | // Do not use fprintf etc during initialization. |
| 409 | fputs("TBBmalloc: huge pages\t" , stderr); |
| 410 | if (!state) |
| 411 | fputs("not " , stderr); |
| 412 | fputs(stateName, stderr); |
| 413 | fputs("\n" , stderr); |
| 414 | } |
| 415 | |
| 416 | void parseSystemMemInfo() { |
| 417 | bool hpAvailable = false; |
| 418 | bool thpAvailable = false; |
| 419 | unsigned long long hugePageSize = 0; |
| 420 | |
| 421 | #if __linux__ |
| 422 | // Check huge pages existence |
| 423 | unsigned long long meminfoHugePagesTotal = 0; |
| 424 | |
| 425 | parseFileItem meminfoItems[] = { |
| 426 | // Parse system huge page size |
| 427 | { "Hugepagesize: %llu kB" , hugePageSize }, |
| 428 | // Check if there are preallocated huge pages on the system |
| 429 | // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt |
| 430 | { "HugePages_Total: %llu" , meminfoHugePagesTotal } }; |
| 431 | |
| 432 | parseFile</*BUFF_SIZE=*/100>("/proc/meminfo" , meminfoItems); |
| 433 | |
| 434 | // Double check another system information regarding preallocated |
| 435 | // huge pages if there are no information in /proc/meminfo |
| 436 | unsigned long long vmHugePagesTotal = 0; |
| 437 | |
| 438 | parseFileItem vmItem[] = { { "%llu" , vmHugePagesTotal } }; |
| 439 | |
| 440 | // We parse a counter number, it can't be huge |
| 441 | parseFile</*BUFF_SIZE=*/100>("/proc/sys/vm/nr_hugepages" , vmItem); |
| 442 | |
| 443 | if (meminfoHugePagesTotal > 0 || vmHugePagesTotal > 0) { |
| 444 | MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found preallocated." ); |
| 445 | |
| 446 | // Any non zero value clearly states that there are preallocated |
| 447 | // huge pages on the system |
| 448 | hpAvailable = true; |
| 449 | } |
| 450 | |
| 451 | // Check if there is transparent huge pages support on the system |
| 452 | unsigned long long thpPresent = 'n'; |
| 453 | parseFileItem thpItem[] = { { "[alwa%cs] madvise never\n" , thpPresent } }; |
| 454 | parseFile</*BUFF_SIZE=*/100>("/sys/kernel/mm/transparent_hugepage/enabled" , thpItem); |
| 455 | |
| 456 | if (thpPresent == 'y') { |
| 457 | MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found thp existence." ); |
| 458 | thpAvailable = true; |
| 459 | } |
| 460 | #endif |
| 461 | MALLOC_ASSERT(!pageSize, "Huge page size can't be set twice. Double initialization." ); |
| 462 | |
| 463 | // Initialize object variables |
| 464 | pageSize = hugePageSize * 1024; // was read in KB from meminfo |
| 465 | isHPAvailable = hpAvailable; |
| 466 | isTHPAvailable = thpAvailable; |
| 467 | } |
| 468 | |
| 469 | public: |
| 470 | |
| 471 | // System information |
| 472 | bool isHPAvailable; |
| 473 | bool isTHPAvailable; |
| 474 | |
| 475 | // User defined value |
| 476 | bool isEnabled; |
| 477 | |
| 478 | void init() { |
| 479 | parseSystemMemInfo(); |
| 480 | MallocMutex::scoped_lock lock(setModeLock); |
| 481 | requestedMode.initReadEnv("TBB_MALLOC_USE_HUGE_PAGES" , 0); |
| 482 | isEnabled = (isHPAvailable || isTHPAvailable) && requestedMode.get(); |
| 483 | } |
| 484 | |
| 485 | // Could be set from user code at any place. |
| 486 | // If we didn't call init() at this place, isEnabled will be false |
| 487 | void setMode(intptr_t newVal) { |
| 488 | MallocMutex::scoped_lock lock(setModeLock); |
| 489 | requestedMode.set(newVal); |
| 490 | isEnabled = (isHPAvailable || isTHPAvailable) && newVal; |
| 491 | } |
| 492 | |
| 493 | bool isRequested() const { |
| 494 | return requestedMode.ready() ? requestedMode.get() : false; |
| 495 | } |
| 496 | |
| 497 | void reset() { |
| 498 | pageSize = needActualStatusPrint = 0; |
| 499 | isEnabled = isHPAvailable = isTHPAvailable = false; |
| 500 | } |
| 501 | |
| 502 | // If memory mapping size is a multiple of huge page size, some OS kernels |
| 503 | // can use huge pages transparently. Use this when huge pages are requested. |
| 504 | size_t getGranularity() const { |
| 505 | if (requestedMode.ready()) |
| 506 | return requestedMode.get() ? pageSize : 0; |
| 507 | else |
| 508 | return HUGE_PAGE_SIZE; // the mode is not yet known; assume typical 2MB huge pages |
| 509 | } |
| 510 | |
| 511 | void printStatus() { |
| 512 | doPrintStatus(requestedMode.get(), "requested" ); |
| 513 | if (requestedMode.get()) { // report actual status iff requested |
| 514 | if (pageSize) |
| 515 | FencedStore(needActualStatusPrint, 1); |
| 516 | else |
| 517 | doPrintStatus(/*state=*/false, "available" ); |
| 518 | } |
| 519 | } |
| 520 | }; |
| 521 | |
| 522 | class AllLargeBlocksList { |
| 523 | MallocMutex largeObjLock; |
| 524 | LargeMemoryBlock *loHead; |
| 525 | public: |
| 526 | void add(LargeMemoryBlock *lmb); |
| 527 | void remove(LargeMemoryBlock *lmb); |
| 528 | template<bool poolDestroy> void releaseAll(Backend *backend); |
| 529 | }; |
| 530 | |
| 531 | struct ExtMemoryPool { |
| 532 | Backend backend; |
| 533 | LargeObjectCache loc; |
| 534 | AllLocalCaches allLocalCaches; |
| 535 | OrphanedBlocks orphanedBlocks; |
| 536 | |
| 537 | intptr_t poolId; |
| 538 | // To find all large objects. Used during user pool destruction, |
| 539 | // to release all backreferences in large blocks (slab blocks do not have them). |
| 540 | AllLargeBlocksList lmbList; |
| 541 | // Callbacks to be used instead of MapMemory/UnmapMemory. |
| 542 | rawAllocType rawAlloc; |
| 543 | rawFreeType rawFree; |
| 544 | size_t granularity; |
| 545 | bool keepAllMemory, |
| 546 | delayRegsReleasing, |
| 547 | // TODO: implements fixedPool with calling rawFree on destruction |
| 548 | fixedPool; |
| 549 | TLSKey tlsPointerKey; // per-pool TLS key |
| 550 | |
| 551 | bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree, |
| 552 | size_t granularity, bool keepAllMemory, bool fixedPool); |
| 553 | bool initTLS(); |
| 554 | |
| 555 | // i.e., not system default pool for scalable_malloc/scalable_free |
| 556 | bool userPool() const { return rawAlloc; } |
| 557 | |
| 558 | // true if something has been released |
| 559 | bool softCachesCleanup(); |
| 560 | bool releaseAllLocalCaches(); |
| 561 | bool hardCachesCleanup(); |
| 562 | void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); |
| 563 | bool reset() { |
| 564 | loc.reset(); |
| 565 | allLocalCaches.reset(); |
| 566 | orphanedBlocks.reset(); |
| 567 | bool ret = tlsPointerKey.destroy(); |
| 568 | backend.reset(); |
| 569 | return ret; |
| 570 | } |
| 571 | bool destroy() { |
| 572 | MALLOC_ASSERT(isPoolValid(), |
| 573 | "Possible double pool_destroy or heap corruption" ); |
| 574 | if (!userPool()) { |
| 575 | loc.reset(); |
| 576 | allLocalCaches.reset(); |
| 577 | } |
| 578 | // pthread_key_dtors must be disabled before memory unmapping |
| 579 | // TODO: race-free solution |
| 580 | bool ret = tlsPointerKey.destroy(); |
| 581 | if (rawFree || !userPool()) |
| 582 | ret &= backend.destroy(); |
| 583 | // pool is not valid after this point |
| 584 | granularity = 0; |
| 585 | return ret; |
| 586 | } |
| 587 | void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; } |
| 588 | inline bool regionsAreReleaseable() const; |
| 589 | |
| 590 | LargeMemoryBlock *mallocLargeObject(MemoryPool *pool, size_t allocationSize); |
| 591 | void freeLargeObject(LargeMemoryBlock *lmb); |
| 592 | void freeLargeObjectList(LargeMemoryBlock *head); |
| 593 | // use granulatity as marker for pool validity |
| 594 | bool isPoolValid() const { return granularity; } |
| 595 | }; |
| 596 | |
| 597 | inline bool Backend::inUserPool() const { return extMemPool->userPool(); } |
| 598 | |
| 599 | struct LargeObjectHdr { |
| 600 | LargeMemoryBlock *memoryBlock; |
| 601 | /* Backreference points to LargeObjectHdr. |
| 602 | Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */ |
| 603 | BackRefIdx backRefIdx; |
| 604 | }; |
| 605 | |
| 606 | struct FreeObject { |
| 607 | FreeObject *next; |
| 608 | }; |
| 609 | |
| 610 | |
| 611 | /******* A helper class to support overriding malloc with scalable_malloc *******/ |
| 612 | #if MALLOC_CHECK_RECURSION |
| 613 | |
| 614 | class RecursiveMallocCallProtector { |
| 615 | // pointer to an automatic data of holding thread |
| 616 | static void *autoObjPtr; |
| 617 | static MallocMutex rmc_mutex; |
| 618 | static pthread_t owner_thread; |
| 619 | /* Under FreeBSD 8.0 1st call to any pthread function including pthread_self |
| 620 | leads to pthread initialization, that causes malloc calls. As 1st usage of |
| 621 | RecursiveMallocCallProtector can be before pthread initialized, pthread calls |
| 622 | can't be used in 1st instance of RecursiveMallocCallProtector. |
| 623 | RecursiveMallocCallProtector is used 1st time in checkInitialization(), |
| 624 | so there is a guarantee that on 2nd usage pthread is initialized. |
| 625 | No such situation observed with other supported OSes. |
| 626 | */ |
| 627 | #if __FreeBSD__ |
| 628 | static bool canUsePthread; |
| 629 | #else |
| 630 | static const bool canUsePthread = true; |
| 631 | #endif |
| 632 | /* |
| 633 | The variable modified in checkInitialization, |
| 634 | so can be read without memory barriers. |
| 635 | */ |
| 636 | static bool mallocRecursionDetected; |
| 637 | |
| 638 | MallocMutex::scoped_lock* lock_acquired; |
| 639 | char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1]; |
| 640 | |
| 641 | static uintptr_t absDiffPtr(void *x, void *y) { |
| 642 | uintptr_t xi = (uintptr_t)x, yi = (uintptr_t)y; |
| 643 | return xi > yi ? xi - yi : yi - xi; |
| 644 | } |
| 645 | public: |
| 646 | |
| 647 | RecursiveMallocCallProtector() : lock_acquired(NULL) { |
| 648 | lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex ); |
| 649 | if (canUsePthread) |
| 650 | owner_thread = pthread_self(); |
| 651 | autoObjPtr = &scoped_lock_space; |
| 652 | } |
| 653 | ~RecursiveMallocCallProtector() { |
| 654 | if (lock_acquired) { |
| 655 | autoObjPtr = NULL; |
| 656 | lock_acquired->~scoped_lock(); |
| 657 | } |
| 658 | } |
| 659 | static bool sameThreadActive() { |
| 660 | if (!autoObjPtr) // fast path |
| 661 | return false; |
| 662 | // Some thread has an active recursive call protector; check if the current one. |
| 663 | // Exact pthread_self based test |
| 664 | if (canUsePthread) { |
| 665 | if (pthread_equal( owner_thread, pthread_self() )) { |
| 666 | mallocRecursionDetected = true; |
| 667 | return true; |
| 668 | } else |
| 669 | return false; |
| 670 | } |
| 671 | // inexact stack size based test |
| 672 | const uintptr_t threadStackSz = 2*1024*1024; |
| 673 | int dummy; |
| 674 | return absDiffPtr(autoObjPtr, &dummy)<threadStackSz; |
| 675 | } |
| 676 | static bool noRecursion(); |
| 677 | /* The function is called on 1st scalable_malloc call to check if malloc calls |
| 678 | scalable_malloc (nested call must set mallocRecursionDetected). */ |
| 679 | static void detectNaiveOverload() { |
| 680 | if (!malloc_proxy) { |
| 681 | #if __FreeBSD__ |
| 682 | /* If !canUsePthread, we can't call pthread_self() before, but now pthread |
| 683 | is already on, so can do it. */ |
| 684 | if (!canUsePthread) { |
| 685 | canUsePthread = true; |
| 686 | owner_thread = pthread_self(); |
| 687 | } |
| 688 | #endif |
| 689 | free(malloc(1)); |
| 690 | } |
| 691 | } |
| 692 | }; |
| 693 | |
| 694 | #else |
| 695 | |
| 696 | class RecursiveMallocCallProtector { |
| 697 | public: |
| 698 | RecursiveMallocCallProtector() {} |
| 699 | ~RecursiveMallocCallProtector() {} |
| 700 | }; |
| 701 | |
| 702 | #endif /* MALLOC_CHECK_RECURSION */ |
| 703 | |
| 704 | bool isMallocInitializedExt(); |
| 705 | |
| 706 | unsigned int getThreadId(); |
| 707 | |
| 708 | bool initBackRefMaster(Backend *backend); |
| 709 | void destroyBackRefMaster(Backend *backend); |
| 710 | void removeBackRef(BackRefIdx backRefIdx); |
| 711 | void setBackRef(BackRefIdx backRefIdx, void *newPtr); |
| 712 | void *getBackRef(BackRefIdx backRefIdx); |
| 713 | |
| 714 | } // namespace internal |
| 715 | } // namespace rml |
| 716 | |
| 717 | #endif // __TBB_tbbmalloc_internal_H |
| 718 | |