1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_tbbmalloc_internal_H |
18 | #error tbbmalloc_internal.h must be included at this point |
19 | #endif |
20 | |
21 | #ifndef __TBB_backend_H |
22 | #define __TBB_backend_H |
23 | |
24 | // Included from namespace rml::internal |
25 | |
26 | // global state of blocks currently in processing |
27 | class BackendSync { |
28 | // Class instances should reside in zero-initialized memory! |
29 | // The number of blocks currently removed from a bin and not returned back |
30 | intptr_t inFlyBlocks; // to another |
31 | intptr_t binsModifications; // incremented on every bin modification |
32 | Backend *backend; |
33 | public: |
34 | void init(Backend *b) { backend = b; } |
35 | void blockConsumed() { AtomicIncrement(inFlyBlocks); } |
36 | void binsModified() { AtomicIncrement(binsModifications); } |
37 | void blockReleased() { |
38 | #if __TBB_MALLOC_BACKEND_STAT |
39 | MALLOC_ITT_SYNC_RELEASING(&inFlyBlocks); |
40 | #endif |
41 | AtomicIncrement(binsModifications); |
42 | intptr_t prev = AtomicAdd(inFlyBlocks, -1); |
43 | MALLOC_ASSERT(prev > 0, ASSERT_TEXT); |
44 | suppress_unused_warning(prev); |
45 | } |
46 | intptr_t getNumOfMods() const { return FencedLoad(binsModifications); } |
47 | // return true if need re-do the blocks search |
48 | inline bool waitTillBlockReleased(intptr_t startModifiedCnt); |
49 | }; |
50 | |
51 | class CoalRequestQ { // queue of free blocks that coalescing was delayed |
52 | private: |
53 | FreeBlock *blocksToFree; |
54 | BackendSync *bkndSync; |
55 | // counted blocks in blocksToFree and that are leaved blocksToFree |
56 | // and still in active coalescing |
57 | intptr_t inFlyBlocks; |
58 | public: |
59 | void init(BackendSync *bSync) { bkndSync = bSync; } |
60 | FreeBlock *getAll(); // return current list of blocks and make queue empty |
61 | void putBlock(FreeBlock *fBlock); |
62 | inline void blockWasProcessed(); |
63 | intptr_t blocksInFly() const { return FencedLoad(inFlyBlocks); } |
64 | }; |
65 | |
66 | class MemExtendingSema { |
67 | intptr_t active; |
68 | public: |
69 | bool wait() { |
70 | bool rescanBins = false; |
71 | // up to 3 threads can add more memory from OS simultaneously, |
72 | // rest of threads have to wait |
73 | for (;;) { |
74 | intptr_t prevCnt = FencedLoad(active); |
75 | if (prevCnt < 3) { |
76 | intptr_t n = AtomicCompareExchange(active, prevCnt+1, prevCnt); |
77 | if (n == prevCnt) |
78 | break; |
79 | } else { |
80 | SpinWaitWhileEq(active, prevCnt); |
81 | rescanBins = true; |
82 | break; |
83 | } |
84 | } |
85 | return rescanBins; |
86 | } |
87 | void signal() { AtomicAdd(active, -1); } |
88 | }; |
89 | |
90 | enum MemRegionType { |
91 | // The region holds only slabs |
92 | MEMREG_SLAB_BLOCKS = 0, |
93 | // The region can hold several large object blocks |
94 | MEMREG_LARGE_BLOCKS, |
95 | // The region holds only one block with a requested size |
96 | MEMREG_ONE_BLOCK |
97 | }; |
98 | |
99 | class MemRegionList { |
100 | MallocMutex regionListLock; |
101 | public: |
102 | MemRegion *head; |
103 | void add(MemRegion *r); |
104 | void remove(MemRegion *r); |
105 | int reportStat(FILE *f); |
106 | }; |
107 | |
108 | class Backend { |
109 | private: |
110 | /* Blocks in range [minBinnedSize; getMaxBinnedSize()] are kept in bins, |
111 | one region can contains several blocks. Larger blocks are allocated directly |
112 | and one region always contains one block. |
113 | */ |
114 | enum { |
115 | minBinnedSize = 8*1024UL, |
116 | /* If huge pages are available, maxBinned_HugePage used. |
117 | If not, maxBinned_SmallPage is the threshold. |
118 | TODO: use pool's granularity for upper bound setting.*/ |
119 | maxBinned_SmallPage = 1024*1024UL, |
120 | // TODO: support other page sizes |
121 | maxBinned_HugePage = 4*1024*1024UL |
122 | }; |
123 | enum { |
124 | VALID_BLOCK_IN_BIN = 1 // valid block added to bin, not returned as result |
125 | }; |
126 | public: |
127 | // Backend bins step is the same as CacheStep for large object cache |
128 | static const size_t freeBinsStep = LargeObjectCache::LargeBSProps::CacheStep; |
129 | static const unsigned freeBinsNum = (maxBinned_HugePage-minBinnedSize)/freeBinsStep + 1; |
130 | |
131 | // if previous access missed per-thread slabs pool, |
132 | // allocate numOfSlabAllocOnMiss blocks in advance |
133 | static const int numOfSlabAllocOnMiss = 2; |
134 | |
135 | enum { |
136 | NO_BIN = -1, |
137 | // special bin for blocks >= maxBinned_HugePage, blocks go to this bin |
138 | // when pool is created with keepAllMemory policy |
139 | // TODO: currently this bin is scanned using "1st fit", as it accumulates |
140 | // blocks of different sizes, "best fit" is preferred in terms of fragmentation |
141 | HUGE_BIN = freeBinsNum-1 |
142 | }; |
143 | |
144 | // Bin keeps 2-linked list of free blocks. It must be 2-linked |
145 | // because during coalescing a block it's removed from a middle of the list. |
146 | struct Bin { |
147 | FreeBlock *head, |
148 | *tail; |
149 | MallocMutex tLock; |
150 | |
151 | void removeBlock(FreeBlock *fBlock); |
152 | void reset() { head = tail = 0; } |
153 | bool empty() const { return !head; } |
154 | |
155 | size_t countFreeBlocks(); |
156 | size_t reportFreeBlocks(FILE *f); |
157 | void reportStat(FILE *f); |
158 | }; |
159 | |
160 | typedef BitMaskMin<Backend::freeBinsNum> BitMaskBins; |
161 | |
162 | // array of bins supplemented with bitmask for fast finding of non-empty bins |
163 | class IndexedBins { |
164 | BitMaskBins bitMask; |
165 | Bin freeBins[Backend::freeBinsNum]; |
166 | FreeBlock *getFromBin(int binIdx, BackendSync *sync, size_t size, |
167 | bool needAlignedBlock, bool alignedBin, bool wait, int *resLocked); |
168 | public: |
169 | FreeBlock *findBlock(int nativeBin, BackendSync *sync, size_t size, |
170 | bool needAlignedBlock, bool alignedBin,int *numOfLockedBins); |
171 | bool tryReleaseRegions(int binIdx, Backend *backend); |
172 | void lockRemoveBlock(int binIdx, FreeBlock *fBlock); |
173 | void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool addToTail); |
174 | bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); |
175 | int getMinNonemptyBin(unsigned startBin) const; |
176 | void verify(); |
177 | void reset(); |
178 | void reportStat(FILE *f); |
179 | }; |
180 | |
181 | private: |
182 | class AdvRegionsBins { |
183 | BitMaskBins bins; |
184 | public: |
185 | void registerBin(int regBin) { bins.set(regBin, 1); } |
186 | int getMinUsedBin(int start) const { return bins.getMinTrue(start); } |
187 | void reset() { bins.reset(); } |
188 | }; |
189 | // auxiliary class to atomic maximum request finding |
190 | class MaxRequestComparator { |
191 | const Backend *backend; |
192 | public: |
193 | MaxRequestComparator(const Backend *be) : backend(be) {} |
194 | inline bool operator()(size_t oldMaxReq, size_t requestSize) const; |
195 | }; |
196 | |
197 | #if CHECK_ALLOCATION_RANGE |
198 | // Keep min and max of all addresses requested from OS, |
199 | // use it for checking memory possibly allocated by replaced allocators |
200 | // and for debugging purposes. Valid only for default memory pool. |
201 | class UsedAddressRange { |
202 | static const uintptr_t ADDRESS_UPPER_BOUND = UINTPTR_MAX; |
203 | |
204 | uintptr_t leftBound, |
205 | rightBound; |
206 | MallocMutex mutex; |
207 | public: |
208 | // rightBound is zero-initialized |
209 | void init() { leftBound = ADDRESS_UPPER_BOUND; } |
210 | void registerAlloc(uintptr_t left, uintptr_t right); |
211 | void registerFree(uintptr_t left, uintptr_t right); |
212 | // as only left and right bounds are kept, we can return true |
213 | // for pointer not allocated by us, if more than single region |
214 | // was requested from OS |
215 | bool inRange(void *ptr) const { |
216 | const uintptr_t p = (uintptr_t)ptr; |
217 | return leftBound<=p && p<=rightBound; |
218 | } |
219 | }; |
220 | #else |
221 | class UsedAddressRange { |
222 | public: |
223 | void init() { } |
224 | void registerAlloc(uintptr_t, uintptr_t) {} |
225 | void registerFree(uintptr_t, uintptr_t) {} |
226 | bool inRange(void *) const { return true; } |
227 | }; |
228 | #endif |
229 | |
230 | ExtMemoryPool *extMemPool; |
231 | // used for release every region on pool destroying |
232 | MemRegionList regionList; |
233 | |
234 | CoalRequestQ coalescQ; // queue of coalescing requests |
235 | BackendSync bkndSync; |
236 | // semaphore protecting adding more more memory from OS |
237 | MemExtendingSema memExtendingSema; |
238 | size_t totalMemSize, |
239 | memSoftLimit; |
240 | UsedAddressRange usedAddrRange; |
241 | // to keep 1st allocation large than requested, keep bootstrapping status |
242 | enum { |
243 | bootsrapMemNotDone = 0, |
244 | bootsrapMemInitializing, |
245 | bootsrapMemDone |
246 | }; |
247 | intptr_t bootsrapMemStatus; |
248 | MallocMutex bootsrapMemStatusMutex; |
249 | |
250 | // Using of maximal observed requested size allows decrease |
251 | // memory consumption for small requests and decrease fragmentation |
252 | // for workloads when small and large allocation requests are mixed. |
253 | // TODO: decrease, not only increase it |
254 | size_t maxRequestedSize; |
255 | |
256 | // register bins related to advance regions |
257 | AdvRegionsBins advRegBins; |
258 | // Storage for split FreeBlocks |
259 | IndexedBins freeLargeBlockBins, |
260 | freeSlabAlignedBins; |
261 | |
262 | // Our friends |
263 | friend class BackendSync; |
264 | |
265 | /******************************** Backend methods ******************************/ |
266 | |
267 | /*--------------------------- Coalescing functions ----------------------------*/ |
268 | void coalescAndPut(FreeBlock *fBlock, size_t blockSz, bool slabAligned); |
269 | bool coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool reportBlocksProcessed); |
270 | |
271 | // Main coalescing operation |
272 | FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); |
273 | |
274 | // Queue for conflicted blocks during coalescing |
275 | bool scanCoalescQ(bool forceCoalescQDrop); |
276 | intptr_t blocksInCoalescing() const { return coalescQ.blocksInFly(); } |
277 | |
278 | /*--------------------- FreeBlock backend accessors ---------------------------*/ |
279 | FreeBlock *genericGetBlock(int num, size_t size, bool slabAligned); |
280 | void genericPutBlock(FreeBlock *fBlock, size_t blockSz, bool slabAligned); |
281 | |
282 | // Split the block and return remaining parts to backend if possible |
283 | FreeBlock *splitBlock(FreeBlock *fBlock, int num, size_t size, bool isAligned, bool needAlignedBlock); |
284 | |
285 | void removeBlockFromBin(FreeBlock *fBlock); |
286 | |
287 | // TODO: combine with returnLargeObject |
288 | void putLargeBlock(LargeMemoryBlock *lmb); |
289 | |
290 | /*------------------- Starting point for OS allocation ------------------------*/ |
291 | void requestBootstrapMem(); |
292 | FreeBlock *askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, |
293 | int *lockedBinsThreshold, int numOfLockedBins, |
294 | bool *splittable, bool needSlabRegion); |
295 | |
296 | /*---------------------- Memory regions allocation ----------------------------*/ |
297 | FreeBlock *addNewRegion(size_t size, MemRegionType type, bool addToBin); |
298 | void releaseRegion(MemRegion *region); |
299 | |
300 | // TODO: combine in one initMemoryRegion function |
301 | FreeBlock *findBlockInRegion(MemRegion *region, size_t exactBlockSize); |
302 | void startUseBlock(MemRegion *region, FreeBlock *fBlock, bool addToBin); |
303 | |
304 | /*------------------------- Raw memory accessors ------------------------------*/ |
305 | void *allocRawMem(size_t &size); |
306 | bool freeRawMem(void *object, size_t size); |
307 | |
308 | /*------------------------------ Cleanup functions ----------------------------*/ |
309 | // Clean all memory from all caches (extMemPool hard cleanup) |
310 | FreeBlock *releaseMemInCaches(intptr_t startModifiedCnt, int *lockedBinsThreshold, int numOfLockedBins); |
311 | // Soft heap limit (regular cleanup, then maybe hard cleanup) |
312 | void releaseCachesToLimit(); |
313 | |
314 | /*---------------------------------- Utility ----------------------------------*/ |
315 | // TODO: move inside IndexedBins class |
316 | static int sizeToBin(size_t size) { |
317 | if (size >= maxBinned_HugePage) |
318 | return HUGE_BIN; |
319 | else if (size < minBinnedSize) |
320 | return NO_BIN; |
321 | |
322 | int bin = (size - minBinnedSize)/freeBinsStep; |
323 | |
324 | MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size." ); |
325 | return bin; |
326 | } |
327 | static bool toAlignedBin(FreeBlock *block, size_t size) { |
328 | return isAligned((char*)block + size, slabSize) && size >= slabSize; |
329 | } |
330 | |
331 | public: |
332 | /*--------------------- Init, reset, destroy, verify -------------------------*/ |
333 | void init(ExtMemoryPool *extMemoryPool); |
334 | bool destroy(); |
335 | |
336 | void verify(); |
337 | void reset(); |
338 | bool clean(); // clean on caches cleanup |
339 | |
340 | /*------------------------- Slab block request --------------------------------*/ |
341 | BlockI *getSlabBlock(int num); |
342 | void putSlabBlock(BlockI *block); |
343 | |
344 | /*-------------------------- Large object request -----------------------------*/ |
345 | LargeMemoryBlock *getLargeBlock(size_t size); |
346 | // TODO: make consistent with getLargeBlock |
347 | void returnLargeObject(LargeMemoryBlock *lmb); |
348 | |
349 | /*-------------------------- Backreference memory request ----------------------*/ |
350 | void *getBackRefSpace(size_t size, bool *rawMemUsed); |
351 | void putBackRefSpace(void *b, size_t size, bool rawMemUsed); |
352 | |
353 | /*----------------------------- Remap object ----------------------------------*/ |
354 | void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); |
355 | |
356 | /*---------------------------- Validation -------------------------------------*/ |
357 | bool inUserPool() const; |
358 | bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } |
359 | |
360 | /*-------------------------- Configuration API --------------------------------*/ |
361 | // Soft heap limit |
362 | void setRecommendedMaxSize(size_t softLimit) { |
363 | memSoftLimit = softLimit; |
364 | releaseCachesToLimit(); |
365 | } |
366 | |
367 | /*------------------------------- Info ----------------------------------------*/ |
368 | size_t getMaxBinnedSize() const; |
369 | |
370 | /*-------------------------- Testing, statistics ------------------------------*/ |
371 | #if __TBB_MALLOC_WHITEBOX_TEST |
372 | size_t getTotalMemSize() const { return totalMemSize; } |
373 | #endif |
374 | #if __TBB_MALLOC_BACKEND_STAT |
375 | void reportStat(FILE *f); |
376 | private: |
377 | static size_t binToSize(int bin) { |
378 | MALLOC_ASSERT(bin <= HUGE_BIN, "Invalid bin." ); |
379 | |
380 | return bin*freeBinsStep + minBinnedSize; |
381 | } |
382 | #endif |
383 | }; |
384 | |
385 | #endif // __TBB_backend_H |
386 | |