1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #include "tbbmalloc_internal.h" |
18 | #include "tbb/tbb_environment.h" |
19 | |
20 | /******************************* Allocation of large objects *********************************************/ |
21 | |
22 | namespace rml { |
23 | namespace internal { |
24 | |
25 | /* ---------------------------- Large Object cache init section ---------------------------------------- */ |
26 | |
27 | void LargeObjectCache::init(ExtMemoryPool *memPool) |
28 | { |
29 | extMemPool = memPool; |
30 | // scalable_allocation_mode can be called before allocator initialization, respect this manual request |
31 | if (hugeSizeThreshold == 0) { |
32 | // Huge size threshold initialization if environment variable was set |
33 | long requestedThreshold = tbb::internal::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD" ); |
34 | // Read valid env or initialize by default with max possible values |
35 | if (requestedThreshold != -1) { |
36 | setHugeSizeThreshold(requestedThreshold); |
37 | } else { |
38 | setHugeSizeThreshold(maxHugeSize); |
39 | } |
40 | } |
41 | } |
42 | |
43 | /* ----------------------------- Huge size threshold settings ----------------------------------------- */ |
44 | |
45 | void LargeObjectCache::setHugeSizeThreshold(size_t value) |
46 | { |
47 | // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize]. |
48 | if (value <= maxHugeSize) { |
49 | hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize; |
50 | |
51 | // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup) |
52 | largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins; |
53 | hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold); |
54 | } |
55 | } |
56 | |
57 | bool LargeObjectCache::sizeInCacheRange(size_t size) |
58 | { |
59 | return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold); |
60 | } |
61 | |
62 | /* ----------------------------------------------------------------------------------------------------- */ |
63 | |
64 | /* The functor called by the aggregator for the operation list */ |
65 | template<typename Props> |
66 | class CacheBinFunctor { |
67 | typename LargeObjectCacheImpl<Props>::CacheBin *const bin; |
68 | ExtMemoryPool *const extMemPool; |
69 | typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask; |
70 | const int idx; |
71 | |
72 | LargeMemoryBlock *toRelease; |
73 | bool needCleanup; |
74 | uintptr_t currTime; |
75 | |
76 | /* Do preprocessing under the operation list. */ |
77 | /* All the OP_PUT_LIST operations are merged in the one operation. |
78 | All OP_GET operations are merged with the OP_PUT_LIST operations but |
79 | it demands the update of the moving average value in the bin. |
80 | Only the last OP_CLEAN_TO_THRESHOLD operation has sense. |
81 | The OP_CLEAN_ALL operation also should be performed only once. |
82 | Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */ |
83 | class OperationPreprocessor { |
84 | // TODO: remove the dependency on CacheBin. |
85 | typename LargeObjectCacheImpl<Props>::CacheBin *const bin; |
86 | |
87 | /* Contains the relative time in the operation list. |
88 | It counts in the reverse order since the aggregator also |
89 | provides operations in the reverse order. */ |
90 | uintptr_t lclTime; |
91 | |
92 | /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations |
93 | opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */ |
94 | CacheBinOperation *opGet, *opClean; |
95 | /* The time of the last OP_CLEAN_TO_THRESHOLD operations */ |
96 | uintptr_t cleanTime; |
97 | |
98 | /* lastGetOpTime - the time of the last OP_GET operation. |
99 | lastGet - the same meaning as CacheBin::lastGet */ |
100 | uintptr_t lastGetOpTime, lastGet; |
101 | |
102 | /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */ |
103 | size_t updateUsedSize; |
104 | |
105 | /* The list of blocks for the OP_PUT_LIST operation. */ |
106 | LargeMemoryBlock *head, *tail; |
107 | int putListNum; |
108 | |
109 | /* if the OP_CLEAN_ALL is requested. */ |
110 | bool isCleanAll; |
111 | |
112 | inline void commitOperation(CacheBinOperation *op) const; |
113 | inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const; |
114 | bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime); |
115 | void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num ); |
116 | |
117 | public: |
118 | OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) : |
119 | bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0), |
120 | lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false) {} |
121 | void operator()(CacheBinOperation* opList); |
122 | uintptr_t getTimeRange() const { return -lclTime; } |
123 | |
124 | friend class CacheBinFunctor; |
125 | }; |
126 | |
127 | public: |
128 | CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool, |
129 | typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) : |
130 | bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {} |
131 | void operator()(CacheBinOperation* opList); |
132 | |
133 | bool isCleanupNeeded() const { return needCleanup; } |
134 | LargeMemoryBlock *getToRelease() const { return toRelease; } |
135 | uintptr_t getCurrTime() const { return currTime; } |
136 | }; |
137 | |
138 | /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */ |
139 | |
140 | // The list of structures which describe the operation data |
141 | struct OpGet { |
142 | static const CacheBinOperationType type = CBOP_GET; |
143 | LargeMemoryBlock **res; |
144 | size_t size; |
145 | uintptr_t currTime; |
146 | }; |
147 | |
148 | struct OpPutList { |
149 | static const CacheBinOperationType type = CBOP_PUT_LIST; |
150 | LargeMemoryBlock *head; |
151 | }; |
152 | |
153 | struct OpCleanToThreshold { |
154 | static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD; |
155 | LargeMemoryBlock **res; |
156 | uintptr_t currTime; |
157 | }; |
158 | |
159 | struct OpCleanAll { |
160 | static const CacheBinOperationType type = CBOP_CLEAN_ALL; |
161 | LargeMemoryBlock **res; |
162 | }; |
163 | |
164 | struct OpUpdateUsedSize { |
165 | static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE; |
166 | size_t size; |
167 | }; |
168 | |
169 | union CacheBinOperationData { |
170 | private: |
171 | OpGet opGet; |
172 | OpPutList opPutList; |
173 | OpCleanToThreshold opCleanToThreshold; |
174 | OpCleanAll opCleanAll; |
175 | OpUpdateUsedSize opUpdateUsedSize; |
176 | }; |
177 | |
178 | // Forward declarations |
179 | template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op); |
180 | |
181 | // Describes the aggregator operation |
182 | struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type { |
183 | CacheBinOperationType type; |
184 | |
185 | template <typename OpTypeData> |
186 | CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) { |
187 | opCast<OpTypeData>(*this) = d; |
188 | type = OpTypeData::type; |
189 | MallocAggregatedOperation<CacheBinOperation>::type::status = st; |
190 | } |
191 | private: |
192 | CacheBinOperationData data; |
193 | |
194 | template <typename OpTypeData> |
195 | friend OpTypeData& opCast(CacheBinOperation &op); |
196 | }; |
197 | |
198 | // The opCast function can be the member of CacheBinOperation but it will have |
199 | // small stylistic ambiguity: it will look like a getter (with a cast) for the |
200 | // CacheBinOperation::data data member but it should return a reference to |
201 | // simplify the code from a lot of getter/setter calls. So the global cast in |
202 | // the style of static_cast (or reinterpret_cast) seems to be more readable and |
203 | // have more explicit semantic. |
204 | template <typename OpTypeData> |
205 | OpTypeData& opCast(CacheBinOperation &op) { |
206 | return *reinterpret_cast<OpTypeData*>(&op.data); |
207 | } |
208 | |
209 | /* ------------------------------------------------------------------------ */ |
210 | |
211 | #if __TBB_MALLOC_LOCACHE_STAT |
212 | intptr_t mallocCalls, cacheHits; |
213 | intptr_t memAllocKB, memHitKB; |
214 | #endif |
215 | |
216 | inline bool lessThanWithOverflow(intptr_t a, intptr_t b) |
217 | { |
218 | return (a < b && (b - a < UINTPTR_MAX/2)) || |
219 | (a > b && (a - b > UINTPTR_MAX/2)); |
220 | } |
221 | |
222 | /* ----------------------------------- Operation processing methods ------------------------------------ */ |
223 | |
224 | template<typename Props> void CacheBinFunctor<Props>:: |
225 | OperationPreprocessor::commitOperation(CacheBinOperation *op) const |
226 | { |
227 | FencedStore( (intptr_t&)(op->status), CBST_DONE ); |
228 | } |
229 | |
230 | template<typename Props> void CacheBinFunctor<Props>:: |
231 | OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const |
232 | { |
233 | op->next = *opList; |
234 | *opList = op; |
235 | } |
236 | |
237 | template<typename Props> bool CacheBinFunctor<Props>:: |
238 | OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime) |
239 | { |
240 | if ( head ) { |
241 | uintptr_t age = head->age; |
242 | LargeMemoryBlock *next = head->next; |
243 | *opCast<OpGet>(*opGet).res = head; |
244 | commitOperation( opGet ); |
245 | head = next; |
246 | putListNum--; |
247 | MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT ); |
248 | |
249 | // use moving average with current hit interval |
250 | bin->updateMeanHitRange( currTime - age ); |
251 | return true; |
252 | } |
253 | return false; |
254 | } |
255 | |
256 | template<typename Props> void CacheBinFunctor<Props>:: |
257 | OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num) |
258 | { |
259 | if ( head ) { |
260 | MALLOC_ASSERT( tail, ASSERT_TEXT ); |
261 | tail->next = h; |
262 | h->prev = tail; |
263 | tail = t; |
264 | putListNum += num; |
265 | } else { |
266 | head = h; |
267 | tail = t; |
268 | putListNum = num; |
269 | } |
270 | } |
271 | |
272 | template<typename Props> void CacheBinFunctor<Props>:: |
273 | OperationPreprocessor::operator()(CacheBinOperation* opList) |
274 | { |
275 | for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) { |
276 | opNext = op->next; |
277 | switch ( op->type ) { |
278 | case CBOP_GET: |
279 | { |
280 | lclTime--; |
281 | if ( !lastGetOpTime ) { |
282 | lastGetOpTime = lclTime; |
283 | lastGet = 0; |
284 | } else if ( !lastGet ) lastGet = lclTime; |
285 | |
286 | if ( !getFromPutList(op,lclTime) ) { |
287 | opCast<OpGet>(*op).currTime = lclTime; |
288 | addOpToOpList( op, &opGet ); |
289 | } |
290 | } |
291 | break; |
292 | |
293 | case CBOP_PUT_LIST: |
294 | { |
295 | LargeMemoryBlock *head = opCast<OpPutList>(*op).head; |
296 | LargeMemoryBlock *curr = head, *prev = NULL; |
297 | |
298 | int num = 0; |
299 | do { |
300 | // we do not kept prev pointers during assigning blocks to bins, set them now |
301 | curr->prev = prev; |
302 | |
303 | // Save the local times to the memory blocks. Local times are necessary |
304 | // for the getFromPutList function which updates the hit range value in |
305 | // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully. |
306 | // The age will be updated to the correct global time after preprocessing |
307 | // when global cache time is updated. |
308 | curr->age = --lclTime; |
309 | |
310 | prev = curr; |
311 | num += 1; |
312 | |
313 | STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj); |
314 | } while ((curr = curr->next) != NULL); |
315 | |
316 | LargeMemoryBlock *tail = prev; |
317 | addToPutList(head, tail, num); |
318 | |
319 | while ( opGet ) { |
320 | CacheBinOperation *next = opGet->next; |
321 | if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) ) |
322 | break; |
323 | opGet = next; |
324 | } |
325 | } |
326 | break; |
327 | |
328 | case CBOP_UPDATE_USED_SIZE: |
329 | updateUsedSize += opCast<OpUpdateUsedSize>(*op).size; |
330 | commitOperation( op ); |
331 | break; |
332 | |
333 | case CBOP_CLEAN_ALL: |
334 | isCleanAll = true; |
335 | addOpToOpList( op, &opClean ); |
336 | break; |
337 | |
338 | case CBOP_CLEAN_TO_THRESHOLD: |
339 | { |
340 | uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime; |
341 | // We don't worry about currTime overflow since it is a rare |
342 | // occurrence and doesn't affect correctness |
343 | cleanTime = cleanTime < currTime ? currTime : cleanTime; |
344 | addOpToOpList( op, &opClean ); |
345 | } |
346 | break; |
347 | |
348 | default: |
349 | MALLOC_ASSERT( false, "Unknown operation." ); |
350 | } |
351 | } |
352 | MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" ); |
353 | } |
354 | |
355 | template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList) |
356 | { |
357 | MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." ); |
358 | |
359 | OperationPreprocessor prep(bin); |
360 | prep(opList); |
361 | |
362 | if ( uintptr_t timeRange = prep.getTimeRange() ) { |
363 | uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange); |
364 | // endTime is used as the current (base) time since the local time is negative. |
365 | uintptr_t endTime = startTime + timeRange; |
366 | |
367 | if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime); |
368 | |
369 | if ( CacheBinOperation *opGet = prep.opGet ) { |
370 | bool isEmpty = false; |
371 | do { |
372 | #if __TBB_MALLOC_WHITEBOX_TEST |
373 | tbbmalloc_whitebox::locGetProcessed++; |
374 | #endif |
375 | const OpGet &opGetData = opCast<OpGet>(*opGet); |
376 | if ( !isEmpty ) { |
377 | if ( LargeMemoryBlock *res = bin->get() ) { |
378 | uintptr_t getTime = opGetData.currTime + endTime; |
379 | // use moving average with current hit interval |
380 | bin->updateMeanHitRange( getTime - res->age); |
381 | bin->updateCachedSize( -opGetData.size ); |
382 | *opGetData.res = res; |
383 | } else { |
384 | isEmpty = true; |
385 | uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime; |
386 | bin->forgetOutdatedState(lastGetOpTime); |
387 | bin->updateAgeThreshold(lastGetOpTime); |
388 | } |
389 | } |
390 | |
391 | CacheBinOperation *opNext = opGet->next; |
392 | bin->updateUsedSize( opGetData.size, bitMask, idx ); |
393 | prep.commitOperation( opGet ); |
394 | opGet = opNext; |
395 | } while ( opGet ); |
396 | if ( prep.lastGetOpTime ) |
397 | bin->setLastGet( prep.lastGetOpTime + endTime ); |
398 | } else if ( LargeMemoryBlock *curr = prep.head ) { |
399 | curr->prev = NULL; |
400 | while ( curr ) { |
401 | // Update local times to global times |
402 | curr->age += endTime; |
403 | curr=curr->next; |
404 | } |
405 | #if __TBB_MALLOC_WHITEBOX_TEST |
406 | tbbmalloc_whitebox::locPutProcessed+=prep.putListNum; |
407 | #endif |
408 | toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold); |
409 | } |
410 | needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime); |
411 | currTime = endTime - 1; |
412 | } |
413 | |
414 | if ( CacheBinOperation *opClean = prep.opClean ) { |
415 | if ( prep.isCleanAll ) |
416 | *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx); |
417 | else |
418 | *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx); |
419 | |
420 | CacheBinOperation *opNext = opClean->next; |
421 | prep.commitOperation( opClean ); |
422 | |
423 | while ((opClean = opNext) != NULL) { |
424 | opNext = opClean->next; |
425 | prep.commitOperation(opClean); |
426 | } |
427 | } |
428 | |
429 | if ( size_t size = prep.updateUsedSize ) |
430 | bin->updateUsedSize(size, bitMask, idx); |
431 | } |
432 | /* ----------------------------------------------------------------------------------------------------- */ |
433 | /* --------------------------- Methods for creating and executing operations --------------------------- */ |
434 | template<typename Props> void LargeObjectCacheImpl<Props>:: |
435 | CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime) |
436 | { |
437 | CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx ); |
438 | aggregator.execute( op, func, longLifeTime ); |
439 | |
440 | if ( LargeMemoryBlock *toRelease = func.getToRelease()) { |
441 | extMemPool->backend.returnLargeObject(toRelease); |
442 | } |
443 | |
444 | if ( func.isCleanupNeeded() ) { |
445 | extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false); |
446 | } |
447 | } |
448 | |
449 | template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: |
450 | CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) |
451 | { |
452 | LargeMemoryBlock *lmb=NULL; |
453 | OpGet data = {&lmb, size}; |
454 | CacheBinOperation op(data); |
455 | ExecuteOperation( &op, extMemPool, bitMask, idx ); |
456 | return lmb; |
457 | } |
458 | |
459 | template<typename Props> void LargeObjectCacheImpl<Props>:: |
460 | CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx) |
461 | { |
462 | MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!" ); |
463 | |
464 | OpPutList data = {head}; |
465 | CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT); |
466 | ExecuteOperation( op, extMemPool, bitMask, idx, false ); |
467 | } |
468 | |
469 | template<typename Props> bool LargeObjectCacheImpl<Props>:: |
470 | CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx) |
471 | { |
472 | LargeMemoryBlock *toRelease = NULL; |
473 | |
474 | /* oldest may be more recent then age, that's why cast to signed type |
475 | was used. age overflow is also processed correctly. */ |
476 | if (last && (intptr_t)(currTime - oldest) > ageThreshold) { |
477 | OpCleanToThreshold data = {&toRelease, currTime}; |
478 | CacheBinOperation op(data); |
479 | ExecuteOperation( &op, extMemPool, bitMask, idx ); |
480 | } |
481 | bool released = toRelease; |
482 | |
483 | Backend *backend = &extMemPool->backend; |
484 | while ( toRelease ) { |
485 | LargeMemoryBlock *helper = toRelease->next; |
486 | backend->returnLargeObject(toRelease); |
487 | toRelease = helper; |
488 | } |
489 | return released; |
490 | } |
491 | |
492 | template<typename Props> bool LargeObjectCacheImpl<Props>:: |
493 | CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx) |
494 | { |
495 | LargeMemoryBlock *toRelease = NULL; |
496 | |
497 | if (last) { |
498 | OpCleanAll data = {&toRelease}; |
499 | CacheBinOperation op(data); |
500 | ExecuteOperation(&op, extMemPool, bitMask, idx); |
501 | } |
502 | bool released = toRelease; |
503 | |
504 | Backend *backend = &extMemPool->backend; |
505 | while ( toRelease ) { |
506 | LargeMemoryBlock *helper = toRelease->next; |
507 | MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age), |
508 | ASSERT_TEXT); |
509 | backend->returnLargeObject(toRelease); |
510 | toRelease = helper; |
511 | } |
512 | return released; |
513 | } |
514 | |
515 | template<typename Props> void LargeObjectCacheImpl<Props>:: |
516 | CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx) |
517 | { |
518 | OpUpdateUsedSize data = {size}; |
519 | CacheBinOperation op(data); |
520 | ExecuteOperation( &op, extMemPool, bitMask, idx ); |
521 | } |
522 | |
523 | /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */ |
524 | |
525 | template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: |
526 | CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold) |
527 | { |
528 | size_t size = head->unalignedSize; |
529 | usedSize -= num*size; |
530 | MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT ); |
531 | MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT ); |
532 | LargeMemoryBlock *toRelease = NULL; |
533 | if (size < hugeSizeThreshold && !lastCleanedAge) { |
534 | // 1st object of such size was released. |
535 | // Not cache it, and remember when this occurs |
536 | // to take into account during cache miss. |
537 | lastCleanedAge = tail->age; |
538 | toRelease = tail; |
539 | tail = tail->prev; |
540 | if (tail) |
541 | tail->next = NULL; |
542 | else |
543 | head = NULL; |
544 | num--; |
545 | } |
546 | if (num) { |
547 | // add [head;tail] list to cache |
548 | MALLOC_ASSERT( tail, ASSERT_TEXT ); |
549 | tail->next = first; |
550 | if (first) |
551 | first->prev = tail; |
552 | first = head; |
553 | if (!last) { |
554 | MALLOC_ASSERT(0 == oldest, ASSERT_TEXT); |
555 | oldest = tail->age; |
556 | last = tail; |
557 | } |
558 | |
559 | cachedSize += num*size; |
560 | } |
561 | |
562 | // No used object, and nothing in the bin, mark the bin as empty |
563 | if (!usedSize && !first) |
564 | bitMask->set(idx, false); |
565 | |
566 | return toRelease; |
567 | } |
568 | |
569 | template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: |
570 | CacheBin::get() |
571 | { |
572 | LargeMemoryBlock *result=first; |
573 | if (result) { |
574 | first = result->next; |
575 | if (first) |
576 | first->prev = NULL; |
577 | else { |
578 | last = NULL; |
579 | oldest = 0; |
580 | } |
581 | } |
582 | |
583 | return result; |
584 | } |
585 | |
586 | template<typename Props> void LargeObjectCacheImpl<Props>:: |
587 | CacheBin::forgetOutdatedState(uintptr_t currTime) |
588 | { |
589 | // If the time since the last get is LongWaitFactor times more than ageThreshold |
590 | // for the bin, treat the bin as rarely-used and forget everything we know |
591 | // about it. |
592 | // If LongWaitFactor is too small, we forget too early and |
593 | // so prevents good caching, while if too high, caching blocks |
594 | // with unrelated usage pattern occurs. |
595 | const uintptr_t sinceLastGet = currTime - lastGet; |
596 | bool doCleanup = false; |
597 | |
598 | if (ageThreshold) |
599 | doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold; |
600 | else if (lastCleanedAge) |
601 | doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet); |
602 | |
603 | if (doCleanup) { |
604 | lastCleanedAge = 0; |
605 | ageThreshold = 0; |
606 | } |
607 | |
608 | } |
609 | |
610 | template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: |
611 | CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx) |
612 | { |
613 | /* oldest may be more recent then age, that's why cast to signed type |
614 | was used. age overflow is also processed correctly. */ |
615 | if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL; |
616 | |
617 | #if MALLOC_DEBUG |
618 | uintptr_t nextAge = 0; |
619 | #endif |
620 | do { |
621 | #if MALLOC_DEBUG |
622 | // check that list ordered |
623 | MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age), |
624 | ASSERT_TEXT); |
625 | nextAge = last->age; |
626 | #endif |
627 | cachedSize -= last->unalignedSize; |
628 | last = last->prev; |
629 | } while (last && (intptr_t)(currTime - last->age) > ageThreshold); |
630 | |
631 | LargeMemoryBlock *toRelease = NULL; |
632 | if (last) { |
633 | toRelease = last->next; |
634 | oldest = last->age; |
635 | last->next = NULL; |
636 | } else { |
637 | toRelease = first; |
638 | first = NULL; |
639 | oldest = 0; |
640 | if (!usedSize) |
641 | bitMask->set(idx, false); |
642 | } |
643 | MALLOC_ASSERT( toRelease, ASSERT_TEXT ); |
644 | lastCleanedAge = toRelease->age; |
645 | |
646 | return toRelease; |
647 | } |
648 | |
649 | template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>:: |
650 | CacheBin::cleanAll(BinBitMask *bitMask, int idx) |
651 | { |
652 | if (!last) return NULL; |
653 | |
654 | LargeMemoryBlock *toRelease = first; |
655 | last = NULL; |
656 | first = NULL; |
657 | oldest = 0; |
658 | cachedSize = 0; |
659 | if (!usedSize) |
660 | bitMask->set(idx, false); |
661 | |
662 | return toRelease; |
663 | } |
664 | |
665 | /* ----------------------------------------------------------------------------------------------------- */ |
666 | |
667 | template<typename Props> size_t LargeObjectCacheImpl<Props>:: |
668 | CacheBin::reportStat(int num, FILE *f) |
669 | { |
670 | #if __TBB_MALLOC_LOCACHE_STAT |
671 | if (first) |
672 | printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n" , |
673 | num, num*Props::CacheStep+Props::MinSize, |
674 | cachedSize/1024, ageThreshold, lastCleanedAge, oldest); |
675 | #else |
676 | suppress_unused_warning(num); |
677 | suppress_unused_warning(f); |
678 | #endif |
679 | return cachedSize; |
680 | } |
681 | |
682 | // Release objects from cache blocks that are older than ageThreshold |
683 | template<typename Props> |
684 | bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr) |
685 | { |
686 | bool released = false; |
687 | BinsSummary binsSummary; |
688 | |
689 | // Threshold settings is below this cache or starts from zero index |
690 | if (hugeSizeThresholdIdx == 0) return false; |
691 | |
692 | // Starting searching for bin that is less than huge size threshold (can be cleaned-up) |
693 | int startSearchIdx = hugeSizeThresholdIdx - 1; |
694 | |
695 | for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) { |
696 | bin[i].updateBinsSummary(&binsSummary); |
697 | if (!doThreshDecr && tooLargeLOC > 2 && binsSummary.isLOCTooLarge()) { |
698 | // if LOC is too large for quite long time, decrease the threshold |
699 | // based on bin hit statistics. |
700 | // For this, redo cleanup from the beginning. |
701 | // Note: on this iteration total usedSz can be not too large |
702 | // in comparison to total cachedSz, as we calculated it only |
703 | // partially. We are ok with it. |
704 | i = bitMask.getMaxTrue(startSearchIdx)+1; |
705 | doThreshDecr = true; |
706 | binsSummary.reset(); |
707 | continue; |
708 | } |
709 | if (doThreshDecr) |
710 | bin[i].decreaseThreshold(); |
711 | |
712 | if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) { |
713 | released = true; |
714 | } |
715 | } |
716 | // We want to find if LOC was too large for some time continuously, |
717 | // so OK with races between incrementing and zeroing, but incrementing |
718 | // must be atomic. |
719 | if (binsSummary.isLOCTooLarge()) |
720 | AtomicIncrement(tooLargeLOC); |
721 | else |
722 | tooLargeLOC = 0; |
723 | return released; |
724 | } |
725 | |
726 | template<typename Props> |
727 | bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool) |
728 | { |
729 | bool released = false; |
730 | for (int i = numBins-1; i >= 0; i--) { |
731 | released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i); |
732 | } |
733 | return released; |
734 | } |
735 | |
736 | template<typename Props> |
737 | void LargeObjectCacheImpl<Props>::reset() { |
738 | tooLargeLOC = 0; |
739 | for (int i = numBins-1; i >= 0; i--) |
740 | bin[i].init(); |
741 | bitMask.reset(); |
742 | } |
743 | |
744 | #if __TBB_MALLOC_WHITEBOX_TEST |
745 | template<typename Props> |
746 | size_t LargeObjectCacheImpl<Props>::getLOCSize() const |
747 | { |
748 | size_t size = 0; |
749 | for (int i = numBins-1; i >= 0; i--) |
750 | size += bin[i].getSize(); |
751 | return size; |
752 | } |
753 | |
754 | size_t LargeObjectCache::getLOCSize() const |
755 | { |
756 | return largeCache.getLOCSize() + hugeCache.getLOCSize(); |
757 | } |
758 | |
759 | template<typename Props> |
760 | size_t LargeObjectCacheImpl<Props>::getUsedSize() const |
761 | { |
762 | size_t size = 0; |
763 | for (int i = numBins-1; i >= 0; i--) |
764 | size += bin[i].getUsedSize(); |
765 | return size; |
766 | } |
767 | |
768 | size_t LargeObjectCache::getUsedSize() const |
769 | { |
770 | return largeCache.getUsedSize() + hugeCache.getUsedSize(); |
771 | } |
772 | #endif // __TBB_MALLOC_WHITEBOX_TEST |
773 | |
774 | inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime) |
775 | { |
776 | return range >= cacheCleanupFreq |
777 | || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup |
778 | // (prev;prev+range] contains n*cacheCleanupFreq |
779 | || alignUp(currTime, cacheCleanupFreq)<currTime+range; |
780 | } |
781 | |
782 | bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr) |
783 | { |
784 | if (!doThreshDecr) |
785 | extMemPool->allLocalCaches.markUnused(); |
786 | return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr) |
787 | | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr); |
788 | } |
789 | |
790 | bool LargeObjectCache::decreasingCleanup() |
791 | { |
792 | return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/true); |
793 | } |
794 | |
795 | bool LargeObjectCache::regularCleanup() |
796 | { |
797 | return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/false); |
798 | } |
799 | |
800 | bool LargeObjectCache::cleanAll() |
801 | { |
802 | return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool); |
803 | } |
804 | |
805 | void LargeObjectCache::reset() |
806 | { |
807 | largeCache.reset(); |
808 | hugeCache.reset(); |
809 | } |
810 | |
811 | template<typename Props> |
812 | LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size) |
813 | { |
814 | int idx = Props::sizeToIdx(size); |
815 | |
816 | LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx); |
817 | |
818 | if (lmb) { |
819 | MALLOC_ITT_SYNC_ACQUIRED(bin+idx); |
820 | STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj); |
821 | } |
822 | return lmb; |
823 | } |
824 | |
825 | template<typename Props> |
826 | void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size) |
827 | { |
828 | int idx = Props::sizeToIdx(size); |
829 | MALLOC_ASSERT(idx<numBins, ASSERT_TEXT); |
830 | bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx); |
831 | } |
832 | |
833 | #if __TBB_MALLOC_LOCACHE_STAT |
834 | template<typename Props> |
835 | void LargeObjectCacheImpl<Props>::reportStat(FILE *f) |
836 | { |
837 | size_t cachedSize = 0; |
838 | for (int i=0; i<numBins; i++) |
839 | cachedSize += bin[i].reportStat(i, f); |
840 | fprintf(f, "total LOC size %lu MB\n" , cachedSize/1024/1024); |
841 | } |
842 | |
843 | void LargeObjectCache::reportStat(FILE *f) |
844 | { |
845 | largeCache.reportStat(f); |
846 | hugeCache.reportStat(f); |
847 | fprintf(f, "cache time %lu\n" , cacheCurrTime); |
848 | } |
849 | #endif |
850 | |
851 | template<typename Props> |
852 | void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache) |
853 | { |
854 | int toBinIdx = Props::sizeToIdx(toCache->unalignedSize); |
855 | |
856 | MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx); |
857 | bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx); |
858 | } |
859 | |
860 | void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size) |
861 | { |
862 | if (size < maxLargeSize) |
863 | largeCache.updateCacheState(extMemPool, op, size); |
864 | else if (size < maxHugeSize) |
865 | hugeCache.updateCacheState(extMemPool, op, size); |
866 | } |
867 | |
868 | uintptr_t LargeObjectCache::getCurrTime() |
869 | { |
870 | return (uintptr_t)AtomicIncrement((intptr_t&)cacheCurrTime); |
871 | } |
872 | |
873 | uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range) |
874 | { |
875 | return (uintptr_t)AtomicAdd((intptr_t&)cacheCurrTime, range) + 1; |
876 | } |
877 | |
878 | void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize) |
879 | { |
880 | updateCacheState(decrease, oldSize); |
881 | updateCacheState(increase, alignToBin(newSize)); |
882 | } |
883 | |
884 | size_t LargeObjectCache::alignToBin(size_t size) { |
885 | return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size); |
886 | } |
887 | |
888 | // Used for internal purpose |
889 | int LargeObjectCache::sizeToIdx(size_t size) |
890 | { |
891 | MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT); |
892 | return size < maxLargeSize ? |
893 | LargeCacheType::sizeToIdx(size) : |
894 | LargeCacheType::numBins + HugeCacheType::sizeToIdx(size); |
895 | } |
896 | |
897 | void LargeObjectCache::putList(LargeMemoryBlock *list) |
898 | { |
899 | LargeMemoryBlock *toProcess, *n; |
900 | |
901 | for (LargeMemoryBlock *curr = list; curr; curr = toProcess) { |
902 | LargeMemoryBlock *tail = curr; |
903 | toProcess = curr->next; |
904 | if (!sizeInCacheRange(curr->unalignedSize)) { |
905 | extMemPool->backend.returnLargeObject(curr); |
906 | continue; |
907 | } |
908 | int currIdx = sizeToIdx(curr->unalignedSize); |
909 | |
910 | // Find all blocks fitting to same bin. Not use more efficient sorting |
911 | // algorithm because list is short (commonly, |
912 | // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items). |
913 | for (LargeMemoryBlock *b = toProcess; b; b = n) { |
914 | n = b->next; |
915 | if (sizeToIdx(b->unalignedSize) == currIdx) { |
916 | tail->next = b; |
917 | tail = b; |
918 | if (toProcess == b) |
919 | toProcess = toProcess->next; |
920 | else { |
921 | b->prev->next = b->next; |
922 | if (b->next) |
923 | b->next->prev = b->prev; |
924 | } |
925 | } |
926 | } |
927 | tail->next = NULL; |
928 | if (curr->unalignedSize < maxLargeSize) |
929 | largeCache.putList(extMemPool, curr); |
930 | else |
931 | hugeCache.putList(extMemPool, curr); |
932 | } |
933 | } |
934 | |
935 | void LargeObjectCache::put(LargeMemoryBlock *largeBlock) |
936 | { |
937 | size_t blockSize = largeBlock->unalignedSize; |
938 | if (sizeInCacheRange(blockSize)) { |
939 | largeBlock->next = NULL; |
940 | if (blockSize < maxLargeSize) |
941 | largeCache.putList(extMemPool, largeBlock); |
942 | else |
943 | hugeCache.putList(extMemPool, largeBlock); |
944 | } else { |
945 | extMemPool->backend.returnLargeObject(largeBlock); |
946 | } |
947 | } |
948 | |
949 | LargeMemoryBlock *LargeObjectCache::get(size_t size) |
950 | { |
951 | MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT ); |
952 | if (sizeInCacheRange(size)) { |
953 | return size < maxLargeSize ? |
954 | largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size); |
955 | } |
956 | return NULL; |
957 | } |
958 | |
959 | LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize) |
960 | { |
961 | #if __TBB_MALLOC_LOCACHE_STAT |
962 | AtomicIncrement(mallocCalls); |
963 | AtomicAdd(memAllocKB, allocationSize/1024); |
964 | #endif |
965 | LargeMemoryBlock* lmb = loc.get(allocationSize); |
966 | if (!lmb) { |
967 | BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); |
968 | if (backRefIdx.isInvalid()) |
969 | return NULL; |
970 | |
971 | // unalignedSize is set in getLargeBlock |
972 | lmb = backend.getLargeBlock(allocationSize); |
973 | if (!lmb) { |
974 | removeBackRef(backRefIdx); |
975 | loc.updateCacheState(decrease, allocationSize); |
976 | return NULL; |
977 | } |
978 | lmb->backRefIdx = backRefIdx; |
979 | lmb->pool = pool; |
980 | STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj); |
981 | } else { |
982 | #if __TBB_MALLOC_LOCACHE_STAT |
983 | AtomicIncrement(cacheHits); |
984 | AtomicAdd(memHitKB, allocationSize/1024); |
985 | #endif |
986 | } |
987 | return lmb; |
988 | } |
989 | |
990 | void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock) |
991 | { |
992 | loc.put(mBlock); |
993 | } |
994 | |
995 | void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head) |
996 | { |
997 | loc.putList(head); |
998 | } |
999 | |
1000 | bool ExtMemoryPool::softCachesCleanup() |
1001 | { |
1002 | return loc.regularCleanup(); |
1003 | } |
1004 | |
1005 | bool ExtMemoryPool::hardCachesCleanup() |
1006 | { |
1007 | // thread-local caches must be cleaned before LOC, |
1008 | // because object from thread-local cache can be released to LOC |
1009 | bool ret = releaseAllLocalCaches(); |
1010 | ret |= orphanedBlocks.cleanup(&backend); |
1011 | ret |= loc.cleanAll(); |
1012 | ret |= backend.clean(); |
1013 | return ret; |
1014 | } |
1015 | |
1016 | #if BACKEND_HAS_MREMAP |
1017 | void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment) |
1018 | { |
1019 | const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize; |
1020 | void *o = backend.remap(ptr, oldSize, newSize, alignment); |
1021 | if (o) { |
1022 | LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock; |
1023 | loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize); |
1024 | } |
1025 | return o; |
1026 | } |
1027 | #endif /* BACKEND_HAS_MREMAP */ |
1028 | |
1029 | /*********** End allocation of large objects **********/ |
1030 | |
1031 | } // namespace internal |
1032 | } // namespace rml |
1033 | |
1034 | |