| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef __TBB_tbbmalloc_internal_H |
| 18 | #error tbbmalloc_internal.h must be included at this point |
| 19 | #endif |
| 20 | |
| 21 | #ifndef __TBB_backend_H |
| 22 | #define __TBB_backend_H |
| 23 | |
| 24 | // Included from namespace rml::internal |
| 25 | |
| 26 | // global state of blocks currently in processing |
| 27 | class BackendSync { |
| 28 | // Class instances should reside in zero-initialized memory! |
| 29 | // The number of blocks currently removed from a bin and not returned back |
| 30 | intptr_t inFlyBlocks; // to another |
| 31 | intptr_t binsModifications; // incremented on every bin modification |
| 32 | Backend *backend; |
| 33 | public: |
| 34 | void init(Backend *b) { backend = b; } |
| 35 | void blockConsumed() { AtomicIncrement(inFlyBlocks); } |
| 36 | void binsModified() { AtomicIncrement(binsModifications); } |
| 37 | void blockReleased() { |
| 38 | #if __TBB_MALLOC_BACKEND_STAT |
| 39 | MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks); |
| 40 | #endif |
| 41 | AtomicIncrement(binsModifications); |
| 42 | intptr_t prev = AtomicAdd(inFlyBlocks, -1); |
| 43 | MALLOC_ASSERT(prev > 0, ASSERT_TEXT); |
| 44 | suppress_unused_warning(prev); |
| 45 | } |
| 46 | intptr_t getNumOfMods() const { return FencedLoad(binsModifications); } |
| 47 | // return true if need re-do the blocks search |
| 48 | inline bool waitTillBlockReleased(intptr_t startModifiedCnt); |
| 49 | }; |
| 50 | |
| 51 | class CoalRequestQ { // queue of free blocks that coalescing was delayed |
| 52 | private: |
| 53 | FreeBlock *blocksToFree; |
| 54 | BackendSync *bkndSync; |
| 55 | // counted blocks in blocksToFree and that are leaved blocksToFree |
| 56 | // and still in active coalescing |
| 57 | intptr_t inFlyBlocks; |
| 58 | public: |
| 59 | void init(BackendSync *bSync) { bkndSync = bSync; } |
| 60 | FreeBlock *getAll(); // return current list of blocks and make queue empty |
| 61 | void putBlock(FreeBlock *fBlock); |
| 62 | inline void blockWasProcessed(); |
| 63 | intptr_t blocksInFly() const { return FencedLoad(inFlyBlocks); } |
| 64 | }; |
| 65 | |
| 66 | class MemExtendingSema { |
| 67 | intptr_t active; |
| 68 | public: |
| 69 | bool wait() { |
| 70 | bool rescanBins = false; |
| 71 | // up to 3 threads can add more memory from OS simultaneously, |
| 72 | // rest of threads have to wait |
| 73 | for (;;) { |
| 74 | intptr_t prevCnt = FencedLoad(active); |
| 75 | if (prevCnt < 3) { |
| 76 | intptr_t n = AtomicCompareExchange(active, prevCnt+1, prevCnt); |
| 77 | if (n == prevCnt) |
| 78 | break; |
| 79 | } else { |
| 80 | SpinWaitWhileEq(active, prevCnt); |
| 81 | rescanBins = true; |
| 82 | break; |
| 83 | } |
| 84 | } |
| 85 | return rescanBins; |
| 86 | } |
| 87 | void signal() { AtomicAdd(active, -1); } |
| 88 | }; |
| 89 | |
| 90 | enum MemRegionType { |
| 91 | // The region holds only slabs |
| 92 | MEMREG_SLAB_BLOCKS = 0, |
| 93 | // The region can hold several large object blocks |
| 94 | MEMREG_LARGE_BLOCKS, |
| 95 | // The region holds only one block with a requested size |
| 96 | MEMREG_ONE_BLOCK |
| 97 | }; |
| 98 | |
| 99 | class MemRegionList { |
| 100 | MallocMutex regionListLock; |
| 101 | public: |
| 102 | MemRegion *head; |
| 103 | void add(MemRegion *r); |
| 104 | void remove(MemRegion *r); |
| 105 | int reportStat(FILE *f); |
| 106 | }; |
| 107 | |
| 108 | class Backend { |
| 109 | private: |
| 110 | /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins, |
| 111 | one region can contains several blocks. Larger blocks are allocated directly |
| 112 | and one region always contains one block. |
| 113 | */ |
| 114 | enum { |
| 115 | minBinnedSize = 8*1024UL, |
| 116 | /* If huge pages are available, maxBinned_HugePage used. |
| 117 | If not, maxBinned_SmallPage is the threshold. |
| 118 | TODO: use pool's granularity for upper bound setting.*/ |
| 119 | maxBinned_SmallPage = 1024*1024UL, |
| 120 | // TODO: support other page sizes |
| 121 | maxBinned_HugePage = 4*1024*1024UL |
| 122 | }; |
| 123 | enum { |
| 124 | VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result |
| 125 | }; |
| 126 | public: |
| 127 | // Backend bins step is the same as CacheStep for large object cache |
| 128 | static const size_t freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep; |
| 129 | static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1; |
| 130 | |
| 131 | // if previous access missed per-thread slabs pool, |
| 132 | // allocate numOfSlabAllocOnMiss blocks in advance |
| 133 | static const int numOfSlabAllocOnMiss = 2; |
| 134 | |
| 135 | enum { |
| 136 | NO_BIN = -1, |
| 137 | // special bin for blocks >= maxBinned_HugePage, blocks go to this bin |
| 138 | // when pool is created with keepAllMemory policy |
| 139 | // TODO: currently this bin is scanned using "1st fit", as it accumulates |
| 140 | // blocks of different sizes, "best fit" is preferred in terms of fragmentation |
| 141 | HUGE_BIN = freeBinsNum-1 |
| 142 | }; |
| 143 | |
| 144 | // Bin keeps 2-linked list of free blocks. It must be 2-linked |
| 145 | // because during coalescing a block it's removed from a middle of the list. |
| 146 | struct Bin { |
| 147 | FreeBlock *head, |
| 148 | *tail; |
| 149 | MallocMutex tLock; |
| 150 | |
| 151 | void removeBlock(FreeBlock *fBlock); |
| 152 | void reset() { head = tail = 0; } |
| 153 | bool empty() const { return !head; } |
| 154 | |
| 155 | size_t countFreeBlocks(); |
| 156 | size_t reportFreeBlocks(FILE *f); |
| 157 | void reportStat(FILE *f); |
| 158 | }; |
| 159 | |
| 160 | typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins; |
| 161 | |
| 162 | // array of bins supplemented with bitmask for fast finding of non-empty bins |
| 163 | class IndexedBins { |
| 164 | BitMaskBins bitMask; |
| 165 | Bin freeBins[Backend::freeBinsNum]; |
| 166 | FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size, |
| 167 | bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked); |
| 168 | public: |
| 169 | FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size, |
| 170 | bool needAlignedBlock, bool alignedBin,int *numOfLockedBins); |
| 171 | bool tryReleaseRegions(int binIdx, Backend *backend); |
| 172 | void lockRemoveBlock(int binIdx, FreeBlock *fBlock); |
| 173 | void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail); |
| 174 | bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); |
| 175 | int getMinNonemptyBin(unsigned startBin) const; |
| 176 | void verify(); |
| 177 | void reset(); |
| 178 | void reportStat(FILE *f); |
| 179 | }; |
| 180 | |
| 181 | private: |
| 182 | class AdvRegionsBins { |
| 183 | BitMaskBins bins; |
| 184 | public: |
| 185 | void registerBin(int regBin) { bins.set(regBin, 1); } |
| 186 | int getMinUsedBin(int start) const { return bins.getMinTrue(start); } |
| 187 | void reset() { bins.reset(); } |
| 188 | }; |
| 189 | // auxiliary class to atomic maximum request finding |
| 190 | class MaxRequestComparator { |
| 191 | const Backend *backend; |
| 192 | public: |
| 193 | MaxRequestComparator(const Backend *be) : backend(be) {} |
| 194 | inline bool operator()(size_t oldMaxReq, size_t requestSize) const; |
| 195 | }; |
| 196 | |
| 197 | #if CHECK_ALLOCATION_RANGE |
| 198 | // Keep min and max of all addresses requested from OS, |
| 199 | // use it for checking memory possibly allocated by replaced allocators |
| 200 | // and for debugging purposes. Valid only for default memory pool. |
| 201 | class UsedAddressRange { |
| 202 | static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; |
| 203 | |
| 204 | uintptr_t leftBound, |
| 205 | rightBound; |
| 206 | MallocMutex mutex; |
| 207 | public: |
| 208 | // rightBound is zero-initialized |
| 209 | void init() { leftBound = ADDRESS_UPPER_BOUND; } |
| 210 | void registerAlloc(uintptr_t left, uintptr_t right); |
| 211 | void registerFree(uintptr_t left, uintptr_t right); |
| 212 | // as only left and right bounds are kept, we can return true |
| 213 | // for pointer not allocated by us, if more than single region |
| 214 | // was requested from OS |
| 215 | bool inRange(void *ptr) const { |
| 216 | const uintptr_t p = (uintptr_t)ptr; |
| 217 | return leftBound<=p && p<=rightBound; |
| 218 | } |
| 219 | }; |
| 220 | #else |
| 221 | class UsedAddressRange { |
| 222 | public: |
| 223 | void init() { } |
| 224 | void registerAlloc(uintptr_t, uintptr_t) {} |
| 225 | void registerFree(uintptr_t, uintptr_t) {} |
| 226 | bool inRange(void *) const { return true; } |
| 227 | }; |
| 228 | #endif |
| 229 | |
| 230 | ExtMemoryPool *extMemPool; |
| 231 | // used for release every region on pool destroying |
| 232 | MemRegionList regionList; |
| 233 | |
| 234 | CoalRequestQ coalescQ; // queue of coalescing requests |
| 235 | BackendSync bkndSync; |
| 236 | // semaphore protecting adding more more memory from OS |
| 237 | MemExtendingSema memExtendingSema; |
| 238 | size_t totalMemSize, |
| 239 | memSoftLimit; |
| 240 | UsedAddressRange usedAddrRange; |
| 241 | // to keep 1st allocation large than requested, keep bootstrapping status |
| 242 | enum { |
| 243 | bootsrapMemNotDone = 0, |
| 244 | bootsrapMemInitializing, |
| 245 | bootsrapMemDone |
| 246 | }; |
| 247 | intptr_t bootsrapMemStatus; |
| 248 | MallocMutex bootsrapMemStatusMutex; |
| 249 | |
| 250 | // Using of maximal observed requested size allows decrease |
| 251 | // memory consumption for small requests and decrease fragmentation |
| 252 | // for workloads when small and large allocation requests are mixed. |
| 253 | // TODO: decrease, not only increase it |
| 254 | size_t maxRequestedSize; |
| 255 | |
| 256 | // register bins related to advance regions |
| 257 | AdvRegionsBins advRegBins; |
| 258 | // Storage for split FreeBlocks |
| 259 | IndexedBins freeLargeBlockBins, |
| 260 | freeSlabAlignedBins; |
| 261 | |
| 262 | // Our friends |
| 263 | friend class BackendSync; |
| 264 | |
| 265 | /******************************** Backend methods ******************************/ |
| 266 | |
| 267 | /*--------------------------- Coalescing functions ----------------------------*/ |
| 268 | void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned); |
| 269 | bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed); |
| 270 | |
| 271 | // Main coalescing operation |
| 272 | FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); |
| 273 | |
| 274 | // Queue for conflicted blocks during coalescing |
| 275 | bool scanCoalescQ(bool forceCoalescQDrop); |
| 276 | intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); } |
| 277 | |
| 278 | /*--------------------- FreeBlock backend accessors ---------------------------*/ |
| 279 | FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned); |
| 280 | void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned); |
| 281 | |
| 282 | // Split the block and return remaining parts to backend if possible |
| 283 | FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock); |
| 284 | |
| 285 | void removeBlockFromBin(FreeBlock *fBlock); |
| 286 | |
| 287 | // TODO: combine with returnLargeObject |
| 288 | void putLargeBlock(LargeMemoryBlock *lmb); |
| 289 | |
| 290 | /*------------------- Starting point for OS allocation ------------------------*/ |
| 291 | void requestBootstrapMem(); |
| 292 | FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, |
| 293 | int *lockedBinsThreshold, int numOfLockedBins, |
| 294 | bool *splittable, bool needSlabRegion); |
| 295 | |
| 296 | /*---------------------- Memory regions allocation ----------------------------*/ |
| 297 | FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); |
| 298 | void releaseRegion(MemRegion *region); |
| 299 | |
| 300 | // TODO: combine in one initMemoryRegion function |
| 301 | FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); |
| 302 | void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin); |
| 303 | |
| 304 | /*------------------------- Raw memory accessors ------------------------------*/ |
| 305 | void *allocRawMem(size_t &size); |
| 306 | bool freeRawMem(void *object, size_t size); |
| 307 | |
| 308 | /*------------------------------ Cleanup functions ----------------------------*/ |
| 309 | // Clean all memory from all caches (extMemPool hard cleanup) |
| 310 | FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins); |
| 311 | // Soft heap limit (regular cleanup, then maybe hard cleanup) |
| 312 | void releaseCachesToLimit(); |
| 313 | |
| 314 | /*---------------------------------- Utility ----------------------------------*/ |
| 315 | // TODO: move inside IndexedBins class |
| 316 | static int sizeToBin(size_t size) { |
| 317 | if (size >= maxBinned_HugePage) |
| 318 | return HUGE_BIN; |
| 319 | else if (size < minBinnedSize) |
| 320 | return NO_BIN; |
| 321 | |
| 322 | int bin = (size - minBinnedSize)/freeBinsStep; |
| 323 | |
| 324 | MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size." ); |
| 325 | return bin; |
| 326 | } |
| 327 | static bool toAlignedBin(FreeBlock *block, size_t size) { |
| 328 | return isAligned((char*)block + size, slabSize) && size >= slabSize; |
| 329 | } |
| 330 | |
| 331 | public: |
| 332 | /*--------------------- Init, reset, destroy, verify -------------------------*/ |
| 333 | void init(ExtMemoryPool *extMemoryPool); |
| 334 | bool destroy(); |
| 335 | |
| 336 | void verify(); |
| 337 | void reset(); |
| 338 | bool clean(); // clean on caches cleanup |
| 339 | |
| 340 | /*------------------------- Slab block request --------------------------------*/ |
| 341 | BlockI *getSlabBlock(int num); |
| 342 | void putSlabBlock(BlockI *block); |
| 343 | |
| 344 | /*-------------------------- Large object request -----------------------------*/ |
| 345 | LargeMemoryBlock *getLargeBlock(size_t size); |
| 346 | // TODO: make consistent with getLargeBlock |
| 347 | void returnLargeObject(LargeMemoryBlock *lmb); |
| 348 | |
| 349 | /*-------------------------- Backreference memory request ----------------------*/ |
| 350 | void *getBackRefSpace(size_t size, bool *rawMemUsed); |
| 351 | void putBackRefSpace(void *b, size_t size, bool rawMemUsed); |
| 352 | |
| 353 | /*----------------------------- Remap object ----------------------------------*/ |
| 354 | void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); |
| 355 | |
| 356 | /*---------------------------- Validation -------------------------------------*/ |
| 357 | bool inUserPool() const; |
| 358 | bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } |
| 359 | |
| 360 | /*-------------------------- Configuration API --------------------------------*/ |
| 361 | // Soft heap limit |
| 362 | void setRecommendedMaxSize(size_t softLimit) { |
| 363 | memSoftLimit = softLimit; |
| 364 | releaseCachesToLimit(); |
| 365 | } |
| 366 | |
| 367 | /*------------------------------- Info ----------------------------------------*/ |
| 368 | size_t getMaxBinnedSize() const; |
| 369 | |
| 370 | /*-------------------------- Testing, statistics ------------------------------*/ |
| 371 | #if __TBB_MALLOC_WHITEBOX_TEST |
| 372 | size_t getTotalMemSize() const { return totalMemSize; } |
| 373 | #endif |
| 374 | #if __TBB_MALLOC_BACKEND_STAT |
| 375 | void reportStat(FILE *f); |
| 376 | private: |
| 377 | static size_t binToSize(int bin) { |
| 378 | MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin." ); |
| 379 | |
| 380 | return bin*freeBinsStep + minBinnedSize; |
| 381 | } |
| 382 | #endif |
| 383 | }; |
| 384 | |
| 385 | #endif // __TBB_backend_H |
| 386 | |