1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_tbbmalloc_internal_H
18 #error tbbmalloc_internal.h must be included at this point
19#endif
20
21#ifndef __TBB_backend_H
22#define __TBB_backend_H
23
24// Included from namespace rml::internal
25
26// global state of blocks currently in processing
27class BackendSync {
28 // Class instances should reside in zero-initialized memory!
29 // The number of blocks currently removed from a bin and not returned back
30 intptr_t inFlyBlocks; // to another
31 intptr_t binsModifications; // incremented on every bin modification
32 Backend *backend;
33public:
34 void init(Backend *b) { backend = b; }
35 void blockConsumed() { AtomicIncrement(inFlyBlocks); }
36 void binsModified() { AtomicIncrement(binsModifications); }
37 void blockReleased() {
38#if __TBB_MALLOC_BACKEND_STAT
39 MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks);
40#endif
41 AtomicIncrement(binsModifications);
42 intptr_t prev = AtomicAdd(inFlyBlocks, -1);
43 MALLOC_ASSERT(prev > 0, ASSERT_TEXT);
44 suppress_unused_warning(prev);
45 }
46 intptr_t getNumOfMods() const { return FencedLoad(binsModifications); }
47 // return true if need re-do the blocks search
48 inline bool waitTillBlockReleased(intptr_t startModifiedCnt);
49};
50
51class CoalRequestQ { // queue of free blocks that coalescing was delayed
52private:
53 FreeBlock *blocksToFree;
54 BackendSync *bkndSync;
55 // counted blocks in blocksToFree and that are leaved blocksToFree
56 // and still in active coalescing
57 intptr_t inFlyBlocks;
58public:
59 void init(BackendSync *bSync) { bkndSync = bSync; }
60 FreeBlock *getAll(); // return current list of blocks and make queue empty
61 void putBlock(FreeBlock *fBlock);
62 inline void blockWasProcessed();
63 intptr_t blocksInFly() const { return FencedLoad(inFlyBlocks); }
64};
65
66class MemExtendingSema {
67 intptr_t active;
68public:
69 bool wait() {
70 bool rescanBins = false;
71 // up to 3 threads can add more memory from OS simultaneously,
72 // rest of threads have to wait
73 for (;;) {
74 intptr_t prevCnt = FencedLoad(active);
75 if (prevCnt < 3) {
76 intptr_t n = AtomicCompareExchange(active, prevCnt+1, prevCnt);
77 if (n == prevCnt)
78 break;
79 } else {
80 SpinWaitWhileEq(active, prevCnt);
81 rescanBins = true;
82 break;
83 }
84 }
85 return rescanBins;
86 }
87 void signal() { AtomicAdd(active, -1); }
88};
89
90enum MemRegionType {
91 // The region holds only slabs
92 MEMREG_SLAB_BLOCKS = 0,
93 // The region can hold several large object blocks
94 MEMREG_LARGE_BLOCKS,
95 // The region holds only one block with a requested size
96 MEMREG_ONE_BLOCK
97};
98
99class MemRegionList {
100 MallocMutex regionListLock;
101public:
102 MemRegion *head;
103 void add(MemRegion *r);
104 void remove(MemRegion *r);
105 int reportStat(FILE *f);
106};
107
108class Backend {
109private:
110/* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins,
111 one region can contains several blocks. Larger blocks are allocated directly
112 and one region always contains one block.
113*/
114 enum {
115 minBinnedSize = 8*1024UL,
116 /* If huge pages are available, maxBinned_HugePage used.
117 If not, maxBinned_SmallPage is the threshold.
118 TODO: use pool's granularity for upper bound setting.*/
119 maxBinned_SmallPage = 1024*1024UL,
120 // TODO: support other page sizes
121 maxBinned_HugePage = 4*1024*1024UL
122 };
123 enum {
124 VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result
125 };
126public:
127 // Backend bins step is the same as CacheStep for large object cache
128 static const size_t freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep;
129 static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1;
130
131 // if previous access missed per-thread slabs pool,
132 // allocate numOfSlabAllocOnMiss blocks in advance
133 static const int numOfSlabAllocOnMiss = 2;
134
135 enum {
136 NO_BIN = -1,
137 // special bin for blocks >= maxBinned_HugePage, blocks go to this bin
138 // when pool is created with keepAllMemory policy
139 // TODO: currently this bin is scanned using "1st fit", as it accumulates
140 // blocks of different sizes, "best fit" is preferred in terms of fragmentation
141 HUGE_BIN = freeBinsNum-1
142 };
143
144 // Bin keeps 2-linked list of free blocks. It must be 2-linked
145 // because during coalescing a block it's removed from a middle of the list.
146 struct Bin {
147 FreeBlock *head,
148 *tail;
149 MallocMutex tLock;
150
151 void removeBlock(FreeBlock *fBlock);
152 void reset() { head = tail = 0; }
153 bool empty() const { return !head; }
154
155 size_t countFreeBlocks();
156 size_t reportFreeBlocks(FILE *f);
157 void reportStat(FILE *f);
158 };
159
160 typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins;
161
162 // array of bins supplemented with bitmask for fast finding of non-empty bins
163 class IndexedBins {
164 BitMaskBins bitMask;
165 Bin freeBins[Backend::freeBinsNum];
166 FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size,
167 bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked);
168 public:
169 FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size,
170 bool needAlignedBlock, bool alignedBin,int *numOfLockedBins);
171 bool tryReleaseRegions(int binIdx, Backend *backend);
172 void lockRemoveBlock(int binIdx, FreeBlock *fBlock);
173 void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail);
174 bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail);
175 int getMinNonemptyBin(unsigned startBin) const;
176 void verify();
177 void reset();
178 void reportStat(FILE *f);
179 };
180
181private:
182 class AdvRegionsBins {
183 BitMaskBins bins;
184 public:
185 void registerBin(int regBin) { bins.set(regBin, 1); }
186 int getMinUsedBin(int start) const { return bins.getMinTrue(start); }
187 void reset() { bins.reset(); }
188 };
189 // auxiliary class to atomic maximum request finding
190 class MaxRequestComparator {
191 const Backend *backend;
192 public:
193 MaxRequestComparator(const Backend *be) : backend(be) {}
194 inline bool operator()(size_t oldMaxReq, size_t requestSize) const;
195 };
196
197#if CHECK_ALLOCATION_RANGE
198 // Keep min and max of all addresses requested from OS,
199 // use it for checking memory possibly allocated by replaced allocators
200 // and for debugging purposes. Valid only for default memory pool.
201 class UsedAddressRange {
202 static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX;
203
204 uintptr_t leftBound,
205 rightBound;
206 MallocMutex mutex;
207 public:
208 // rightBound is zero-initialized
209 void init() { leftBound = ADDRESS_UPPER_BOUND; }
210 void registerAlloc(uintptr_t left, uintptr_t right);
211 void registerFree(uintptr_t left, uintptr_t right);
212 // as only left and right bounds are kept, we can return true
213 // for pointer not allocated by us, if more than single region
214 // was requested from OS
215 bool inRange(void *ptr) const {
216 const uintptr_t p = (uintptr_t)ptr;
217 return leftBound<=p && p<=rightBound;
218 }
219 };
220#else
221 class UsedAddressRange {
222 public:
223 void init() { }
224 void registerAlloc(uintptr_t, uintptr_t) {}
225 void registerFree(uintptr_t, uintptr_t) {}
226 bool inRange(void *) const { return true; }
227 };
228#endif
229
230 ExtMemoryPool *extMemPool;
231 // used for release every region on pool destroying
232 MemRegionList regionList;
233
234 CoalRequestQ coalescQ; // queue of coalescing requests
235 BackendSync bkndSync;
236 // semaphore protecting adding more more memory from OS
237 MemExtendingSema memExtendingSema;
238 size_t totalMemSize,
239 memSoftLimit;
240 UsedAddressRange usedAddrRange;
241 // to keep 1st allocation large than requested, keep bootstrapping status
242 enum {
243 bootsrapMemNotDone = 0,
244 bootsrapMemInitializing,
245 bootsrapMemDone
246 };
247 intptr_t bootsrapMemStatus;
248 MallocMutex bootsrapMemStatusMutex;
249
250 // Using of maximal observed requested size allows decrease
251 // memory consumption for small requests and decrease fragmentation
252 // for workloads when small and large allocation requests are mixed.
253 // TODO: decrease, not only increase it
254 size_t maxRequestedSize;
255
256 // register bins related to advance regions
257 AdvRegionsBins advRegBins;
258 // Storage for split FreeBlocks
259 IndexedBins freeLargeBlockBins,
260 freeSlabAlignedBins;
261
262 // Our friends
263 friend class BackendSync;
264
265 /******************************** Backend methods ******************************/
266
267 /*--------------------------- Coalescing functions ----------------------------*/
268 void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
269 bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed);
270
271 // Main coalescing operation
272 FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion);
273
274 // Queue for conflicted blocks during coalescing
275 bool scanCoalescQ(bool forceCoalescQDrop);
276 intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); }
277
278 /*--------------------- FreeBlock backend accessors ---------------------------*/
279 FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned);
280 void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned);
281
282 // Split the block and return remaining parts to backend if possible
283 FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock);
284
285 void removeBlockFromBin(FreeBlock *fBlock);
286
287 // TODO: combine with returnLargeObject
288 void putLargeBlock(LargeMemoryBlock *lmb);
289
290 /*------------------- Starting point for OS allocation ------------------------*/
291 void requestBootstrapMem();
292 FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt,
293 int *lockedBinsThreshold, int numOfLockedBins,
294 bool *splittable, bool needSlabRegion);
295
296 /*---------------------- Memory regions allocation ----------------------------*/
297 FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin);
298 void releaseRegion(MemRegion *region);
299
300 // TODO: combine in one initMemoryRegion function
301 FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize);
302 void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin);
303
304 /*------------------------- Raw memory accessors ------------------------------*/
305 void *allocRawMem(size_t &size);
306 bool freeRawMem(void *object, size_t size);
307
308 /*------------------------------ Cleanup functions ----------------------------*/
309 // Clean all memory from all caches (extMemPool hard cleanup)
310 FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins);
311 // Soft heap limit (regular cleanup, then maybe hard cleanup)
312 void releaseCachesToLimit();
313
314 /*---------------------------------- Utility ----------------------------------*/
315 // TODO: move inside IndexedBins class
316 static int sizeToBin(size_t size) {
317 if (size >= maxBinned_HugePage)
318 return HUGE_BIN;
319 else if (size < minBinnedSize)
320 return NO_BIN;
321
322 int bin = (size - minBinnedSize)/freeBinsStep;
323
324 MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size.");
325 return bin;
326 }
327 static bool toAlignedBin(FreeBlock *block, size_t size) {
328 return isAligned((char*)block + size, slabSize) && size >= slabSize;
329 }
330
331public:
332 /*--------------------- Init, reset, destroy, verify -------------------------*/
333 void init(ExtMemoryPool *extMemoryPool);
334 bool destroy();
335
336 void verify();
337 void reset();
338 bool clean(); // clean on caches cleanup
339
340 /*------------------------- Slab block request --------------------------------*/
341 BlockI *getSlabBlock(int num);
342 void putSlabBlock(BlockI *block);
343
344 /*-------------------------- Large object request -----------------------------*/
345 LargeMemoryBlock *getLargeBlock(size_t size);
346 // TODO: make consistent with getLargeBlock
347 void returnLargeObject(LargeMemoryBlock *lmb);
348
349 /*-------------------------- Backreference memory request ----------------------*/
350 void *getBackRefSpace(size_t size, bool *rawMemUsed);
351 void putBackRefSpace(void *b, size_t size, bool rawMemUsed);
352
353 /*----------------------------- Remap object ----------------------------------*/
354 void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment);
355
356 /*---------------------------- Validation -------------------------------------*/
357 bool inUserPool() const;
358 bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
359
360 /*-------------------------- Configuration API --------------------------------*/
361 // Soft heap limit
362 void setRecommendedMaxSize(size_t softLimit) {
363 memSoftLimit = softLimit;
364 releaseCachesToLimit();
365 }
366
367 /*------------------------------- Info ----------------------------------------*/
368 size_t getMaxBinnedSize() const;
369
370 /*-------------------------- Testing, statistics ------------------------------*/
371#if __TBB_MALLOC_WHITEBOX_TEST
372 size_t getTotalMemSize() const { return totalMemSize; }
373#endif
374#if __TBB_MALLOC_BACKEND_STAT
375 void reportStat(FILE *f);
376private:
377 static size_t binToSize(int bin) {
378 MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin.");
379
380 return bin*freeBinsStep + minBinnedSize;
381 }
382#endif
383};
384
385#endif // __TBB_backend_H
386