| 1 | /* | 
|---|
| 2 | Copyright (c) 2005-2019 Intel Corporation | 
|---|
| 3 |  | 
|---|
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | you may not use this file except in compliance with the License. | 
|---|
| 6 | You may obtain a copy of the License at | 
|---|
| 7 |  | 
|---|
| 8 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 |  | 
|---|
| 10 | Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | See the License for the specific language governing permissions and | 
|---|
| 14 | limitations under the License. | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | #ifndef __TBB_tbbmalloc_internal_H | 
|---|
| 18 | #error tbbmalloc_internal.h must be included at this point | 
|---|
| 19 | #endif | 
|---|
| 20 |  | 
|---|
| 21 | #ifndef __TBB_backend_H | 
|---|
| 22 | #define __TBB_backend_H | 
|---|
| 23 |  | 
|---|
| 24 | // Included from namespace rml::internal | 
|---|
| 25 |  | 
|---|
| 26 | // global state of blocks currently in processing | 
|---|
| 27 | class BackendSync { | 
|---|
| 28 | // Class instances should reside in zero-initialized memory! | 
|---|
| 29 | // The number of blocks currently removed from a bin and not returned back | 
|---|
| 30 | intptr_t inFlyBlocks;         // to another | 
|---|
| 31 | intptr_t binsModifications;   // incremented on every bin modification | 
|---|
| 32 | Backend *backend; | 
|---|
| 33 | public: | 
|---|
| 34 | void init(Backend *b) { backend = b; } | 
|---|
| 35 | void blockConsumed() { AtomicIncrement(inFlyBlocks); } | 
|---|
| 36 | void binsModified() { AtomicIncrement(binsModifications); } | 
|---|
| 37 | void blockReleased() { | 
|---|
| 38 | #if __TBB_MALLOC_BACKEND_STAT | 
|---|
| 39 | MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks); | 
|---|
| 40 | #endif | 
|---|
| 41 | AtomicIncrement(binsModifications); | 
|---|
| 42 | intptr_t prev = AtomicAdd(inFlyBlocks, -1); | 
|---|
| 43 | MALLOC_ASSERT(prev > 0, ASSERT_TEXT); | 
|---|
| 44 | suppress_unused_warning(prev); | 
|---|
| 45 | } | 
|---|
| 46 | intptr_t getNumOfMods() const { return FencedLoad(binsModifications); } | 
|---|
| 47 | // return true if need re-do the blocks search | 
|---|
| 48 | inline bool waitTillBlockReleased(intptr_t startModifiedCnt); | 
|---|
| 49 | }; | 
|---|
| 50 |  | 
|---|
| 51 | class CoalRequestQ { // queue of free blocks that coalescing was delayed | 
|---|
| 52 | private: | 
|---|
| 53 | FreeBlock   *blocksToFree; | 
|---|
| 54 | BackendSync *bkndSync; | 
|---|
| 55 | // counted blocks in blocksToFree and that are leaved blocksToFree | 
|---|
| 56 | // and still in active coalescing | 
|---|
| 57 | intptr_t     inFlyBlocks; | 
|---|
| 58 | public: | 
|---|
| 59 | void init(BackendSync *bSync) { bkndSync = bSync; } | 
|---|
| 60 | FreeBlock *getAll(); // return current list of blocks and make queue empty | 
|---|
| 61 | void putBlock(FreeBlock *fBlock); | 
|---|
| 62 | inline void blockWasProcessed(); | 
|---|
| 63 | intptr_t blocksInFly() const { return FencedLoad(inFlyBlocks); } | 
|---|
| 64 | }; | 
|---|
| 65 |  | 
|---|
| 66 | class MemExtendingSema { | 
|---|
| 67 | intptr_t     active; | 
|---|
| 68 | public: | 
|---|
| 69 | bool wait() { | 
|---|
| 70 | bool rescanBins = false; | 
|---|
| 71 | // up to 3 threads can add more memory from OS simultaneously, | 
|---|
| 72 | // rest of threads have to wait | 
|---|
| 73 | for (;;) { | 
|---|
| 74 | intptr_t prevCnt = FencedLoad(active); | 
|---|
| 75 | if (prevCnt < 3) { | 
|---|
| 76 | intptr_t n = AtomicCompareExchange(active, prevCnt+1, prevCnt); | 
|---|
| 77 | if (n == prevCnt) | 
|---|
| 78 | break; | 
|---|
| 79 | } else { | 
|---|
| 80 | SpinWaitWhileEq(active, prevCnt); | 
|---|
| 81 | rescanBins = true; | 
|---|
| 82 | break; | 
|---|
| 83 | } | 
|---|
| 84 | } | 
|---|
| 85 | return rescanBins; | 
|---|
| 86 | } | 
|---|
| 87 | void signal() { AtomicAdd(active, -1); } | 
|---|
| 88 | }; | 
|---|
| 89 |  | 
|---|
| 90 | enum MemRegionType { | 
|---|
| 91 | // The region holds only slabs | 
|---|
| 92 | MEMREG_SLAB_BLOCKS = 0, | 
|---|
| 93 | // The region can hold several large object blocks | 
|---|
| 94 | MEMREG_LARGE_BLOCKS, | 
|---|
| 95 | // The region holds only one block with a requested size | 
|---|
| 96 | MEMREG_ONE_BLOCK | 
|---|
| 97 | }; | 
|---|
| 98 |  | 
|---|
| 99 | class MemRegionList { | 
|---|
| 100 | MallocMutex regionListLock; | 
|---|
| 101 | public: | 
|---|
| 102 | MemRegion  *head; | 
|---|
| 103 | void add(MemRegion *r); | 
|---|
| 104 | void remove(MemRegion *r); | 
|---|
| 105 | int reportStat(FILE *f); | 
|---|
| 106 | }; | 
|---|
| 107 |  | 
|---|
| 108 | class Backend { | 
|---|
| 109 | private: | 
|---|
| 110 | /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins, | 
|---|
| 111 | one region can contains several blocks. Larger blocks are allocated directly | 
|---|
| 112 | and one region always contains one block. | 
|---|
| 113 | */ | 
|---|
| 114 | enum { | 
|---|
| 115 | minBinnedSize = 8*1024UL, | 
|---|
| 116 | /*   If huge pages are available, maxBinned_HugePage used. | 
|---|
| 117 | If not, maxBinned_SmallPage is the threshold. | 
|---|
| 118 | TODO: use pool's granularity for upper bound setting.*/ | 
|---|
| 119 | maxBinned_SmallPage = 1024*1024UL, | 
|---|
| 120 | // TODO: support other page sizes | 
|---|
| 121 | maxBinned_HugePage = 4*1024*1024UL | 
|---|
| 122 | }; | 
|---|
| 123 | enum { | 
|---|
| 124 | VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result | 
|---|
| 125 | }; | 
|---|
| 126 | public: | 
|---|
| 127 | // Backend bins step is the same as CacheStep for large object cache | 
|---|
| 128 | static const size_t   freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep; | 
|---|
| 129 | static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1; | 
|---|
| 130 |  | 
|---|
| 131 | // if previous access missed per-thread slabs pool, | 
|---|
| 132 | // allocate numOfSlabAllocOnMiss blocks in advance | 
|---|
| 133 | static const int numOfSlabAllocOnMiss = 2; | 
|---|
| 134 |  | 
|---|
| 135 | enum { | 
|---|
| 136 | NO_BIN = -1, | 
|---|
| 137 | // special bin for blocks >= maxBinned_HugePage, blocks go to this bin | 
|---|
| 138 | // when pool is created with keepAllMemory policy | 
|---|
| 139 | // TODO: currently this bin is scanned using "1st fit", as it accumulates | 
|---|
| 140 | // blocks of different sizes, "best fit" is preferred in terms of fragmentation | 
|---|
| 141 | HUGE_BIN = freeBinsNum-1 | 
|---|
| 142 | }; | 
|---|
| 143 |  | 
|---|
| 144 | // Bin keeps 2-linked list of free blocks. It must be 2-linked | 
|---|
| 145 | // because during coalescing a block it's removed from a middle of the list. | 
|---|
| 146 | struct Bin { | 
|---|
| 147 | FreeBlock   *head, | 
|---|
| 148 | *tail; | 
|---|
| 149 | MallocMutex  tLock; | 
|---|
| 150 |  | 
|---|
| 151 | void removeBlock(FreeBlock *fBlock); | 
|---|
| 152 | void reset() { head = tail = 0; } | 
|---|
| 153 | bool empty() const { return !head; } | 
|---|
| 154 |  | 
|---|
| 155 | size_t countFreeBlocks(); | 
|---|
| 156 | size_t reportFreeBlocks(FILE *f); | 
|---|
| 157 | void reportStat(FILE *f); | 
|---|
| 158 | }; | 
|---|
| 159 |  | 
|---|
| 160 | typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins; | 
|---|
| 161 |  | 
|---|
| 162 | // array of bins supplemented with bitmask for fast finding of non-empty bins | 
|---|
| 163 | class IndexedBins { | 
|---|
| 164 | BitMaskBins bitMask; | 
|---|
| 165 | Bin         freeBins[Backend::freeBinsNum]; | 
|---|
| 166 | FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size, | 
|---|
| 167 | bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked); | 
|---|
| 168 | public: | 
|---|
| 169 | FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size, | 
|---|
| 170 | bool needAlignedBlock, bool alignedBin,int *numOfLockedBins); | 
|---|
| 171 | bool tryReleaseRegions(int binIdx, Backend *backend); | 
|---|
| 172 | void lockRemoveBlock(int binIdx, FreeBlock *fBlock); | 
|---|
| 173 | void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail); | 
|---|
| 174 | bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); | 
|---|
| 175 | int  getMinNonemptyBin(unsigned startBin) const; | 
|---|
| 176 | void verify(); | 
|---|
| 177 | void reset(); | 
|---|
| 178 | void reportStat(FILE *f); | 
|---|
| 179 | }; | 
|---|
| 180 |  | 
|---|
| 181 | private: | 
|---|
| 182 | class AdvRegionsBins { | 
|---|
| 183 | BitMaskBins bins; | 
|---|
| 184 | public: | 
|---|
| 185 | void registerBin(int regBin) { bins.set(regBin, 1); } | 
|---|
| 186 | int getMinUsedBin(int start) const { return bins.getMinTrue(start); } | 
|---|
| 187 | void reset() { bins.reset(); } | 
|---|
| 188 | }; | 
|---|
| 189 | // auxiliary class to atomic maximum request finding | 
|---|
| 190 | class MaxRequestComparator { | 
|---|
| 191 | const Backend *backend; | 
|---|
| 192 | public: | 
|---|
| 193 | MaxRequestComparator(const Backend *be) : backend(be) {} | 
|---|
| 194 | inline bool operator()(size_t oldMaxReq, size_t requestSize) const; | 
|---|
| 195 | }; | 
|---|
| 196 |  | 
|---|
| 197 | #if CHECK_ALLOCATION_RANGE | 
|---|
| 198 | // Keep min and max of all addresses requested from OS, | 
|---|
| 199 | // use it for checking memory possibly allocated by replaced allocators | 
|---|
| 200 | // and for debugging purposes. Valid only for default memory pool. | 
|---|
| 201 | class UsedAddressRange { | 
|---|
| 202 | static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; | 
|---|
| 203 |  | 
|---|
| 204 | uintptr_t   leftBound, | 
|---|
| 205 | rightBound; | 
|---|
| 206 | MallocMutex mutex; | 
|---|
| 207 | public: | 
|---|
| 208 | // rightBound is zero-initialized | 
|---|
| 209 | void init() { leftBound = ADDRESS_UPPER_BOUND; } | 
|---|
| 210 | void registerAlloc(uintptr_t left, uintptr_t right); | 
|---|
| 211 | void registerFree(uintptr_t left, uintptr_t right); | 
|---|
| 212 | // as only left and right bounds are kept, we can return true | 
|---|
| 213 | // for pointer not allocated by us, if more than single region | 
|---|
| 214 | // was requested from OS | 
|---|
| 215 | bool inRange(void *ptr) const { | 
|---|
| 216 | const uintptr_t p = (uintptr_t)ptr; | 
|---|
| 217 | return leftBound<=p && p<=rightBound; | 
|---|
| 218 | } | 
|---|
| 219 | }; | 
|---|
| 220 | #else | 
|---|
| 221 | class UsedAddressRange { | 
|---|
| 222 | public: | 
|---|
| 223 | void init() { } | 
|---|
| 224 | void registerAlloc(uintptr_t, uintptr_t) {} | 
|---|
| 225 | void registerFree(uintptr_t, uintptr_t) {} | 
|---|
| 226 | bool inRange(void *) const { return true; } | 
|---|
| 227 | }; | 
|---|
| 228 | #endif | 
|---|
| 229 |  | 
|---|
| 230 | ExtMemoryPool   *extMemPool; | 
|---|
| 231 | // used for release every region on pool destroying | 
|---|
| 232 | MemRegionList    regionList; | 
|---|
| 233 |  | 
|---|
| 234 | CoalRequestQ     coalescQ; // queue of coalescing requests | 
|---|
| 235 | BackendSync      bkndSync; | 
|---|
| 236 | // semaphore protecting adding more more memory from OS | 
|---|
| 237 | MemExtendingSema memExtendingSema; | 
|---|
| 238 | size_t           totalMemSize, | 
|---|
| 239 | memSoftLimit; | 
|---|
| 240 | UsedAddressRange usedAddrRange; | 
|---|
| 241 | // to keep 1st allocation large than requested, keep bootstrapping status | 
|---|
| 242 | enum { | 
|---|
| 243 | bootsrapMemNotDone = 0, | 
|---|
| 244 | bootsrapMemInitializing, | 
|---|
| 245 | bootsrapMemDone | 
|---|
| 246 | }; | 
|---|
| 247 | intptr_t         bootsrapMemStatus; | 
|---|
| 248 | MallocMutex      bootsrapMemStatusMutex; | 
|---|
| 249 |  | 
|---|
| 250 | // Using of maximal observed requested size allows decrease | 
|---|
| 251 | // memory consumption for small requests and decrease fragmentation | 
|---|
| 252 | // for workloads when small and large allocation requests are mixed. | 
|---|
| 253 | // TODO: decrease, not only increase it | 
|---|
| 254 | size_t           maxRequestedSize; | 
|---|
| 255 |  | 
|---|
| 256 | // register bins related to advance regions | 
|---|
| 257 | AdvRegionsBins advRegBins; | 
|---|
| 258 | // Storage for split FreeBlocks | 
|---|
| 259 | IndexedBins freeLargeBlockBins, | 
|---|
| 260 | freeSlabAlignedBins; | 
|---|
| 261 |  | 
|---|
| 262 | // Our friends | 
|---|
| 263 | friend class BackendSync; | 
|---|
| 264 |  | 
|---|
| 265 | /******************************** Backend methods ******************************/ | 
|---|
| 266 |  | 
|---|
| 267 | /*--------------------------- Coalescing functions ----------------------------*/ | 
|---|
| 268 | void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned); | 
|---|
| 269 | bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed); | 
|---|
| 270 |  | 
|---|
| 271 | // Main coalescing operation | 
|---|
| 272 | FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); | 
|---|
| 273 |  | 
|---|
| 274 | // Queue for conflicted blocks during coalescing | 
|---|
| 275 | bool scanCoalescQ(bool forceCoalescQDrop); | 
|---|
| 276 | intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); } | 
|---|
| 277 |  | 
|---|
| 278 | /*--------------------- FreeBlock backend accessors ---------------------------*/ | 
|---|
| 279 | FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned); | 
|---|
| 280 | void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned); | 
|---|
| 281 |  | 
|---|
| 282 | // Split the block and return remaining parts to backend if possible | 
|---|
| 283 | FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock); | 
|---|
| 284 |  | 
|---|
| 285 | void removeBlockFromBin(FreeBlock *fBlock); | 
|---|
| 286 |  | 
|---|
| 287 | // TODO: combine with returnLargeObject | 
|---|
| 288 | void putLargeBlock(LargeMemoryBlock *lmb); | 
|---|
| 289 |  | 
|---|
| 290 | /*------------------- Starting point for OS allocation ------------------------*/ | 
|---|
| 291 | void requestBootstrapMem(); | 
|---|
| 292 | FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, | 
|---|
| 293 | int *lockedBinsThreshold, int numOfLockedBins, | 
|---|
| 294 | bool *splittable, bool needSlabRegion); | 
|---|
| 295 |  | 
|---|
| 296 | /*---------------------- Memory regions allocation ----------------------------*/ | 
|---|
| 297 | FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); | 
|---|
| 298 | void releaseRegion(MemRegion *region); | 
|---|
| 299 |  | 
|---|
| 300 | // TODO: combine in one initMemoryRegion function | 
|---|
| 301 | FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); | 
|---|
| 302 | void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin); | 
|---|
| 303 |  | 
|---|
| 304 | /*------------------------- Raw memory accessors ------------------------------*/ | 
|---|
| 305 | void *allocRawMem(size_t &size); | 
|---|
| 306 | bool freeRawMem(void *object, size_t size); | 
|---|
| 307 |  | 
|---|
| 308 | /*------------------------------ Cleanup functions ----------------------------*/ | 
|---|
| 309 | // Clean all memory from all caches (extMemPool hard cleanup) | 
|---|
| 310 | FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins); | 
|---|
| 311 | // Soft heap limit (regular cleanup, then maybe hard cleanup) | 
|---|
| 312 | void releaseCachesToLimit(); | 
|---|
| 313 |  | 
|---|
| 314 | /*---------------------------------- Utility ----------------------------------*/ | 
|---|
| 315 | // TODO: move inside IndexedBins class | 
|---|
| 316 | static int sizeToBin(size_t size) { | 
|---|
| 317 | if (size >= maxBinned_HugePage) | 
|---|
| 318 | return HUGE_BIN; | 
|---|
| 319 | else if (size < minBinnedSize) | 
|---|
| 320 | return NO_BIN; | 
|---|
| 321 |  | 
|---|
| 322 | int bin = (size - minBinnedSize)/freeBinsStep; | 
|---|
| 323 |  | 
|---|
| 324 | MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); | 
|---|
| 325 | return bin; | 
|---|
| 326 | } | 
|---|
| 327 | static bool toAlignedBin(FreeBlock *block, size_t size) { | 
|---|
| 328 | return isAligned((char*)block + size, slabSize) && size >= slabSize; | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 | public: | 
|---|
| 332 | /*--------------------- Init, reset, destroy, verify  -------------------------*/ | 
|---|
| 333 | void init(ExtMemoryPool *extMemoryPool); | 
|---|
| 334 | bool destroy(); | 
|---|
| 335 |  | 
|---|
| 336 | void verify(); | 
|---|
| 337 | void reset(); | 
|---|
| 338 | bool clean(); // clean on caches cleanup | 
|---|
| 339 |  | 
|---|
| 340 | /*------------------------- Slab block request --------------------------------*/ | 
|---|
| 341 | BlockI *getSlabBlock(int num); | 
|---|
| 342 | void putSlabBlock(BlockI *block); | 
|---|
| 343 |  | 
|---|
| 344 | /*-------------------------- Large object request -----------------------------*/ | 
|---|
| 345 | LargeMemoryBlock *getLargeBlock(size_t size); | 
|---|
| 346 | // TODO: make consistent with getLargeBlock | 
|---|
| 347 | void returnLargeObject(LargeMemoryBlock *lmb); | 
|---|
| 348 |  | 
|---|
| 349 | /*-------------------------- Backreference memory request ----------------------*/ | 
|---|
| 350 | void *getBackRefSpace(size_t size, bool *rawMemUsed); | 
|---|
| 351 | void putBackRefSpace(void *b, size_t size, bool rawMemUsed); | 
|---|
| 352 |  | 
|---|
| 353 | /*----------------------------- Remap object ----------------------------------*/ | 
|---|
| 354 | void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); | 
|---|
| 355 |  | 
|---|
| 356 | /*---------------------------- Validation -------------------------------------*/ | 
|---|
| 357 | bool inUserPool() const; | 
|---|
| 358 | bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } | 
|---|
| 359 |  | 
|---|
| 360 | /*-------------------------- Configuration API --------------------------------*/ | 
|---|
| 361 | // Soft heap limit | 
|---|
| 362 | void setRecommendedMaxSize(size_t softLimit) { | 
|---|
| 363 | memSoftLimit = softLimit; | 
|---|
| 364 | releaseCachesToLimit(); | 
|---|
| 365 | } | 
|---|
| 366 |  | 
|---|
| 367 | /*------------------------------- Info ----------------------------------------*/ | 
|---|
| 368 | size_t getMaxBinnedSize() const; | 
|---|
| 369 |  | 
|---|
| 370 | /*-------------------------- Testing, statistics ------------------------------*/ | 
|---|
| 371 | #if __TBB_MALLOC_WHITEBOX_TEST | 
|---|
| 372 | size_t getTotalMemSize() const { return totalMemSize; } | 
|---|
| 373 | #endif | 
|---|
| 374 | #if __TBB_MALLOC_BACKEND_STAT | 
|---|
| 375 | void reportStat(FILE *f); | 
|---|
| 376 | private: | 
|---|
| 377 | static size_t binToSize(int bin) { | 
|---|
| 378 | MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin."); | 
|---|
| 379 |  | 
|---|
| 380 | return bin*freeBinsStep + minBinnedSize; | 
|---|
| 381 | } | 
|---|
| 382 | #endif | 
|---|
| 383 | }; | 
|---|
| 384 |  | 
|---|
| 385 | #endif // __TBB_backend_H | 
|---|
| 386 |  | 
|---|