1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_tbbmalloc_internal_H
18 #error tbbmalloc_internal.h must be included at this point
19#endif
20
21#ifndef __TBB_large_objects_H
22#define __TBB_large_objects_H
23
24//! The list of possible Cache Bin Aggregator operations.
25/* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
26enum CacheBinOperationType {
27 CBOP_INVALID = 0,
28 CBOP_GET,
29 CBOP_PUT_LIST,
30 CBOP_CLEAN_TO_THRESHOLD,
31 CBOP_CLEAN_ALL,
32 CBOP_UPDATE_USED_SIZE
33};
34
35// The Cache Bin Aggregator operation status list.
36// CBST_NOWAIT can be specified for non-blocking operations.
37enum CacheBinOperationStatus {
38 CBST_WAIT = 0,
39 CBST_NOWAIT,
40 CBST_DONE
41};
42
43/*
44 * Bins that grow with arithmetic step
45 */
46template<size_t MIN_SIZE, size_t MAX_SIZE>
47struct LargeBinStructureProps {
48public:
49 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
50 static const size_t CacheStep = 8 * 1024;
51 static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
52
53 static size_t alignToBin(size_t size) {
54 return alignUp(size, CacheStep);
55 }
56
57 static int sizeToIdx(size_t size) {
58 MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
59 MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
60 return (size - MinSize) / CacheStep;
61 }
62};
63
64/*
65 * Bins that grow with special geometric progression.
66 */
67template<size_t MIN_SIZE, size_t MAX_SIZE>
68struct HugeBinStructureProps {
69
70private:
71 // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
72 // There are StepFactor bins (8 be default) between each power of 2 bin
73 static const int MaxSizeExp = Log2<MAX_SIZE>::value;
74 static const int MinSizeExp = Log2<MIN_SIZE>::value;
75 static const int StepFactor = 8;
76 static const int StepFactorExp = Log2<StepFactor>::value;
77
78public:
79 static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
80 static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
81
82 static size_t alignToBin(size_t size) {
83 size_t minorStepExp = BitScanRev(size) - StepFactorExp;
84 return alignUp(size, 1ULL << minorStepExp);
85 }
86
87 // Sizes between the power of 2 values are aproximated to StepFactor.
88 static int sizeToIdx(size_t size) {
89 MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
90 int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
91 size_t majorStepSize = 1ULL << sizeExp;
92 int minorStepExp = sizeExp - StepFactorExp;
93 int minorIdx = (size - majorStepSize) >> minorStepExp;
94 MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
95 "Size is not aligned on the bin");
96 return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
97 }
98};
99
100/*
101 * Cache properties accessor
102 *
103 * TooLargeFactor -- when cache size treated "too large" in comparison to user data size
104 * OnMissFactor -- If cache miss occurred and cache was cleaned,
105 * set ageThreshold to OnMissFactor * the difference
106 * between current time and last time cache was cleaned.
107 * LongWaitFactor -- to detect rarely-used bins and forget about their usage history
108 */
109template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
110struct LargeObjectCacheProps : public StructureProps {
111 static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
112};
113
114template<typename Props>
115class LargeObjectCacheImpl {
116private:
117
118 // Current sizes of used and cached objects. It's calculated while we are
119 // traversing bins, and used for isLOCTooLarge() check at the same time.
120 class BinsSummary {
121 size_t usedSz;
122 size_t cachedSz;
123 public:
124 BinsSummary() : usedSz(0), cachedSz(0) {}
125 // "too large" criteria
126 bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
127 void update(size_t usedSize, size_t cachedSize) {
128 usedSz += usedSize;
129 cachedSz += cachedSize;
130 }
131 void reset() { usedSz = cachedSz = 0; }
132 };
133
134public:
135 // The number of bins to cache large/huge objects.
136 static const uint32_t numBins = Props::NumBins;
137
138 typedef BitMaskMax<numBins> BinBitMask;
139
140 // 2-linked list of same-size cached blocks ordered by age (oldest on top)
141 // TODO: are we really want the list to be 2-linked? This allows us
142 // reduce memory consumption and do less operations under lock.
143 // TODO: try to switch to 32-bit logical time to save space in CacheBin
144 // and move bins to different cache lines.
145 class CacheBin {
146 private:
147 LargeMemoryBlock *first,
148 *last;
149 /* age of an oldest block in the list; equal to last->age, if last defined,
150 used for quick checking it without acquiring the lock. */
151 uintptr_t oldest;
152 /* currAge when something was excluded out of list because of the age,
153 not because of cache hit */
154 uintptr_t lastCleanedAge;
155 /* Current threshold value for the blocks of a particular size.
156 Set on cache miss. */
157 intptr_t ageThreshold;
158
159 /* total size of all objects corresponding to the bin and allocated by user */
160 size_t usedSize,
161 /* total size of all objects cached in the bin */
162 cachedSize;
163 /* mean time of presence of block in the bin before successful reuse */
164 intptr_t meanHitRange;
165 /* time of last get called for the bin */
166 uintptr_t lastGet;
167
168 typename MallocAggregator<CacheBinOperation>::type aggregator;
169
170 void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
171
172 /* should be placed in zero-initialized memory, ctor not needed. */
173 CacheBin();
174
175 public:
176 void init() {
177 memset(this, 0, sizeof(CacheBin));
178 }
179
180 /* ---------- Cache accessors ---------- */
181 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
182 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
183
184 /* ---------- Cleanup functions -------- */
185 bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
186 bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
187 /* ------------------------------------- */
188
189 void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
190 void decreaseThreshold() {
191 if (ageThreshold)
192 ageThreshold = (ageThreshold + meanHitRange) / 2;
193 }
194 void updateBinsSummary(BinsSummary *binsSummary) const {
195 binsSummary->update(usedSize, cachedSize);
196 }
197 size_t getSize() const { return cachedSize; }
198 size_t getUsedSize() const { return usedSize; }
199 size_t reportStat(int num, FILE *f);
200
201 /* --------- Unsafe methods used with the aggregator ------- */
202 void forgetOutdatedState(uintptr_t currTime);
203 LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
204 int idx, int num, size_t hugeObjectThreshold);
205 LargeMemoryBlock *get();
206 LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
207 LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
208 void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
209 if (!usedSize) bitMask->set(idx, true);
210 usedSize += size;
211 if (!usedSize && !first) bitMask->set(idx, false);
212 }
213 void updateMeanHitRange( intptr_t hitRange ) {
214 hitRange = hitRange >= 0 ? hitRange : 0;
215 meanHitRange = meanHitRange ? (meanHitRange + hitRange) / 2 : hitRange;
216 }
217 void updateAgeThreshold( uintptr_t currTime ) {
218 if (lastCleanedAge)
219 ageThreshold = Props::OnMissFactor*(currTime - lastCleanedAge);
220 }
221 void updateCachedSize(size_t size) {
222 cachedSize += size;
223 }
224 void setLastGet( uintptr_t newLastGet ) {
225 lastGet = newLastGet;
226 }
227 /* -------------------------------------------------------- */
228 };
229
230 // Huge bins index for fast regular cleanup searching in case of
231 // the "huge size threshold" setting defined
232 intptr_t hugeSizeThresholdIdx;
233
234private:
235 // How many times LOC was "too large"
236 intptr_t tooLargeLOC;
237 // for fast finding of used bins and bins with non-zero usedSize;
238 // indexed from the end, as we need largest 1st
239 BinBitMask bitMask;
240 // bins with lists of recently freed large blocks cached for re-use
241 CacheBin bin[numBins];
242
243public:
244 /* ------------ CacheBin structure dependent stuff ------------ */
245 static size_t alignToBin(size_t size) {
246 return Props::alignToBin(size);
247 }
248 static int sizeToIdx(size_t size) {
249 return Props::sizeToIdx(size);
250 }
251
252 /* --------- Main cache functions (put, get object) ------------ */
253 void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
254 LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
255
256 /* ------------------------ Cleanup ---------------------------- */
257 bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
258 bool cleanAll(ExtMemoryPool *extMemPool);
259
260 /* -------------------------- Other ---------------------------- */
261 void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
262
263 void reset();
264 void reportStat(FILE *f);
265#if __TBB_MALLOC_WHITEBOX_TEST
266 size_t getLOCSize() const;
267 size_t getUsedSize() const;
268#endif
269};
270
271class LargeObjectCache {
272private:
273 // Large bins [minLargeSize, maxLargeSize)
274 // Huge bins [maxLargeSize, maxHugeSize)
275 static const size_t minLargeSize = 8 * 1024,
276 maxLargeSize = 8 * 1024 * 1024,
277 // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
278 maxHugeSize = tbb::internal::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
279
280public:
281 // Upper bound threshold for caching size. After that size all objects sieve through cache
282 // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
283 static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
284 // After that size large object interpreted as huge and does not participate in regular cleanup.
285 // Can be changed during the program execution.
286 size_t hugeSizeThreshold;
287
288private:
289 // Large objects cache properties
290 typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
291 typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
292
293 // Huge objects cache properties
294 typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
295 typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
296
297 // Cache implementation type with properties
298 typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
299 typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
300
301 // Beginning of largeCache is more actively used and smaller than hugeCache,
302 // so put hugeCache first to prevent false sharing
303 // with LargeObjectCache's predecessor
304 HugeCacheType hugeCache;
305 LargeCacheType largeCache;
306
307 /* logical time, incremented on each put/get operation
308 To prevent starvation between pools, keep separately for each pool.
309 Overflow is OK, as we only want difference between
310 its current value and some recent.
311
312 Both malloc and free should increment logical time, as in
313 a different case multiple cached blocks would have same age,
314 and accuracy of predictors suffers.
315 */
316 uintptr_t cacheCurrTime;
317
318 // Memory pool that owns this LargeObjectCache.
319 // strict 1:1 relation, never changed
320 ExtMemoryPool *extMemPool;
321
322 // Returns artificial bin index,
323 // it's used only during sorting and never saved
324 static int sizeToIdx(size_t size);
325
326 // Our friends
327 friend class Backend;
328
329public:
330 void init(ExtMemoryPool *memPool);
331
332 // Item accessors
333 void put(LargeMemoryBlock *largeBlock);
334 void putList(LargeMemoryBlock *head);
335 LargeMemoryBlock *get(size_t size);
336
337 void updateCacheState(DecreaseOrIncrease op, size_t size);
338 bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
339
340 // Cleanup operations
341 bool doCleanup(uintptr_t currTime, bool doThreshDecr);
342 bool decreasingCleanup();
343 bool regularCleanup();
344 bool cleanAll();
345 void reset();
346
347 void reportStat(FILE *f);
348#if __TBB_MALLOC_WHITEBOX_TEST
349 size_t getLOCSize() const;
350 size_t getUsedSize() const;
351#endif
352
353 // Cache deals with exact-fit sizes, so need to align each size
354 // to the specific bin when put object to cache
355 static size_t alignToBin(size_t size);
356
357 void setHugeSizeThreshold(size_t value);
358
359 // Check if we should cache or sieve this size
360 bool sizeInCacheRange(size_t size);
361
362 uintptr_t getCurrTime();
363 uintptr_t getCurrTimeRange(uintptr_t range);
364 void registerRealloc(size_t oldSize, size_t newSize);
365};
366
367#endif // __TBB_large_objects_H
368
369