1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_tbbmalloc_internal_H |
18 | #error tbbmalloc_internal.h must be included at this point |
19 | #endif |
20 | |
21 | #ifndef __TBB_large_objects_H |
22 | #define __TBB_large_objects_H |
23 | |
24 | //! The list of possible Cache Bin Aggregator operations. |
25 | /* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */ |
26 | enum CacheBinOperationType { |
27 | CBOP_INVALID = 0, |
28 | CBOP_GET, |
29 | CBOP_PUT_LIST, |
30 | CBOP_CLEAN_TO_THRESHOLD, |
31 | CBOP_CLEAN_ALL, |
32 | CBOP_UPDATE_USED_SIZE |
33 | }; |
34 | |
35 | // The Cache Bin Aggregator operation status list. |
36 | // CBST_NOWAIT can be specified for non-blocking operations. |
37 | enum CacheBinOperationStatus { |
38 | CBST_WAIT = 0, |
39 | CBST_NOWAIT, |
40 | CBST_DONE |
41 | }; |
42 | |
43 | /* |
44 | * Bins that grow with arithmetic step |
45 | */ |
46 | template<size_t MIN_SIZE, size_t MAX_SIZE> |
47 | struct LargeBinStructureProps { |
48 | public: |
49 | static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; |
50 | static const size_t CacheStep = 8 * 1024; |
51 | static const unsigned NumBins = (MaxSize - MinSize) / CacheStep; |
52 | |
53 | static size_t alignToBin(size_t size) { |
54 | return alignUp(size, CacheStep); |
55 | } |
56 | |
57 | static int sizeToIdx(size_t size) { |
58 | MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT); |
59 | MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT); |
60 | return (size - MinSize) / CacheStep; |
61 | } |
62 | }; |
63 | |
64 | /* |
65 | * Bins that grow with special geometric progression. |
66 | */ |
67 | template<size_t MIN_SIZE, size_t MAX_SIZE> |
68 | struct HugeBinStructureProps { |
69 | |
70 | private: |
71 | // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor)) |
72 | // There are StepFactor bins (8 be default) between each power of 2 bin |
73 | static const int MaxSizeExp = Log2<MAX_SIZE>::value; |
74 | static const int MinSizeExp = Log2<MIN_SIZE>::value; |
75 | static const int StepFactor = 8; |
76 | static const int StepFactorExp = Log2<StepFactor>::value; |
77 | |
78 | public: |
79 | static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; |
80 | static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor; |
81 | |
82 | static size_t alignToBin(size_t size) { |
83 | size_t minorStepExp = BitScanRev(size) - StepFactorExp; |
84 | return alignUp(size, 1ULL << minorStepExp); |
85 | } |
86 | |
87 | // Sizes between the power of 2 values are aproximated to StepFactor. |
88 | static int sizeToIdx(size_t size) { |
89 | MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT); |
90 | int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2 |
91 | size_t majorStepSize = 1ULL << sizeExp; |
92 | int minorStepExp = sizeExp - StepFactorExp; |
93 | int minorIdx = (size - majorStepSize) >> minorStepExp; |
94 | MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp), |
95 | "Size is not aligned on the bin" ); |
96 | return StepFactor * (sizeExp - MinSizeExp) + minorIdx; |
97 | } |
98 | }; |
99 | |
100 | /* |
101 | * Cache properties accessor |
102 | * |
103 | * TooLargeFactor -- when cache size treated "too large" in comparison to user data size |
104 | * OnMissFactor -- If cache miss occurred and cache was cleaned, |
105 | * set ageThreshold to OnMissFactor * the difference |
106 | * between current time and last time cache was cleaned. |
107 | * LongWaitFactor -- to detect rarely-used bins and forget about their usage history |
108 | */ |
109 | template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT> |
110 | struct LargeObjectCacheProps : public StructureProps { |
111 | static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT; |
112 | }; |
113 | |
114 | template<typename Props> |
115 | class LargeObjectCacheImpl { |
116 | private: |
117 | |
118 | // Current sizes of used and cached objects. It's calculated while we are |
119 | // traversing bins, and used for isLOCTooLarge() check at the same time. |
120 | class BinsSummary { |
121 | size_t usedSz; |
122 | size_t cachedSz; |
123 | public: |
124 | BinsSummary() : usedSz(0), cachedSz(0) {} |
125 | // "too large" criteria |
126 | bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; } |
127 | void update(size_t usedSize, size_t cachedSize) { |
128 | usedSz += usedSize; |
129 | cachedSz += cachedSize; |
130 | } |
131 | void reset() { usedSz = cachedSz = 0; } |
132 | }; |
133 | |
134 | public: |
135 | // The number of bins to cache large/huge objects. |
136 | static const uint32_t numBins = Props::NumBins; |
137 | |
138 | typedef BitMaskMax<numBins> BinBitMask; |
139 | |
140 | // 2-linked list of same-size cached blocks ordered by age (oldest on top) |
141 | // TODO: are we really want the list to be 2-linked? This allows us |
142 | // reduce memory consumption and do less operations under lock. |
143 | // TODO: try to switch to 32-bit logical time to save space in CacheBin |
144 | // and move bins to different cache lines. |
145 | class CacheBin { |
146 | private: |
147 | LargeMemoryBlock *first, |
148 | *last; |
149 | /* age of an oldest block in the list; equal to last->age, if last defined, |
150 | used for quick checking it without acquiring the lock. */ |
151 | uintptr_t oldest; |
152 | /* currAge when something was excluded out of list because of the age, |
153 | not because of cache hit */ |
154 | uintptr_t lastCleanedAge; |
155 | /* Current threshold value for the blocks of a particular size. |
156 | Set on cache miss. */ |
157 | intptr_t ageThreshold; |
158 | |
159 | /* total size of all objects corresponding to the bin and allocated by user */ |
160 | size_t usedSize, |
161 | /* total size of all objects cached in the bin */ |
162 | cachedSize; |
163 | /* mean time of presence of block in the bin before successful reuse */ |
164 | intptr_t meanHitRange; |
165 | /* time of last get called for the bin */ |
166 | uintptr_t lastGet; |
167 | |
168 | typename MallocAggregator<CacheBinOperation>::type aggregator; |
169 | |
170 | void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true); |
171 | |
172 | /* should be placed in zero-initialized memory, ctor not needed. */ |
173 | CacheBin(); |
174 | |
175 | public: |
176 | void init() { |
177 | memset(this, 0, sizeof(CacheBin)); |
178 | } |
179 | |
180 | /* ---------- Cache accessors ---------- */ |
181 | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx); |
182 | LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); |
183 | |
184 | /* ---------- Cleanup functions -------- */ |
185 | bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx); |
186 | bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx); |
187 | /* ------------------------------------- */ |
188 | |
189 | void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); |
190 | void decreaseThreshold() { |
191 | if (ageThreshold) |
192 | ageThreshold = (ageThreshold + meanHitRange) / 2; |
193 | } |
194 | void updateBinsSummary(BinsSummary *binsSummary) const { |
195 | binsSummary->update(usedSize, cachedSize); |
196 | } |
197 | size_t getSize() const { return cachedSize; } |
198 | size_t getUsedSize() const { return usedSize; } |
199 | size_t reportStat(int num, FILE *f); |
200 | |
201 | /* --------- Unsafe methods used with the aggregator ------- */ |
202 | void forgetOutdatedState(uintptr_t currTime); |
203 | LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, |
204 | int idx, int num, size_t hugeObjectThreshold); |
205 | LargeMemoryBlock *get(); |
206 | LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx); |
207 | LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx); |
208 | void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) { |
209 | if (!usedSize) bitMask->set(idx, true); |
210 | usedSize += size; |
211 | if (!usedSize && !first) bitMask->set(idx, false); |
212 | } |
213 | void updateMeanHitRange( intptr_t hitRange ) { |
214 | hitRange = hitRange >= 0 ? hitRange : 0; |
215 | meanHitRange = meanHitRange ? (meanHitRange + hitRange) / 2 : hitRange; |
216 | } |
217 | void updateAgeThreshold( uintptr_t currTime ) { |
218 | if (lastCleanedAge) |
219 | ageThreshold = Props::OnMissFactor*(currTime - lastCleanedAge); |
220 | } |
221 | void updateCachedSize(size_t size) { |
222 | cachedSize += size; |
223 | } |
224 | void setLastGet( uintptr_t newLastGet ) { |
225 | lastGet = newLastGet; |
226 | } |
227 | /* -------------------------------------------------------- */ |
228 | }; |
229 | |
230 | // Huge bins index for fast regular cleanup searching in case of |
231 | // the "huge size threshold" setting defined |
232 | intptr_t hugeSizeThresholdIdx; |
233 | |
234 | private: |
235 | // How many times LOC was "too large" |
236 | intptr_t tooLargeLOC; |
237 | // for fast finding of used bins and bins with non-zero usedSize; |
238 | // indexed from the end, as we need largest 1st |
239 | BinBitMask bitMask; |
240 | // bins with lists of recently freed large blocks cached for re-use |
241 | CacheBin bin[numBins]; |
242 | |
243 | public: |
244 | /* ------------ CacheBin structure dependent stuff ------------ */ |
245 | static size_t alignToBin(size_t size) { |
246 | return Props::alignToBin(size); |
247 | } |
248 | static int sizeToIdx(size_t size) { |
249 | return Props::sizeToIdx(size); |
250 | } |
251 | |
252 | /* --------- Main cache functions (put, get object) ------------ */ |
253 | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); |
254 | LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); |
255 | |
256 | /* ------------------------ Cleanup ---------------------------- */ |
257 | bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr); |
258 | bool cleanAll(ExtMemoryPool *extMemPool); |
259 | |
260 | /* -------------------------- Other ---------------------------- */ |
261 | void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size); |
262 | |
263 | void reset(); |
264 | void reportStat(FILE *f); |
265 | #if __TBB_MALLOC_WHITEBOX_TEST |
266 | size_t getLOCSize() const; |
267 | size_t getUsedSize() const; |
268 | #endif |
269 | }; |
270 | |
271 | class LargeObjectCache { |
272 | private: |
273 | // Large bins [minLargeSize, maxLargeSize) |
274 | // Huge bins [maxLargeSize, maxHugeSize) |
275 | static const size_t minLargeSize = 8 * 1024, |
276 | maxLargeSize = 8 * 1024 * 1024, |
277 | // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold |
278 | maxHugeSize = tbb::internal::select_size_t_constant<2147483648U, 1099511627776ULL>::value; |
279 | |
280 | public: |
281 | // Upper bound threshold for caching size. After that size all objects sieve through cache |
282 | // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks) |
283 | static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL; |
284 | // After that size large object interpreted as huge and does not participate in regular cleanup. |
285 | // Can be changed during the program execution. |
286 | size_t hugeSizeThreshold; |
287 | |
288 | private: |
289 | // Large objects cache properties |
290 | typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps; |
291 | typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps; |
292 | |
293 | // Huge objects cache properties |
294 | typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps; |
295 | typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps; |
296 | |
297 | // Cache implementation type with properties |
298 | typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType; |
299 | typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType; |
300 | |
301 | // Beginning of largeCache is more actively used and smaller than hugeCache, |
302 | // so put hugeCache first to prevent false sharing |
303 | // with LargeObjectCache's predecessor |
304 | HugeCacheType hugeCache; |
305 | LargeCacheType largeCache; |
306 | |
307 | /* logical time, incremented on each put/get operation |
308 | To prevent starvation between pools, keep separately for each pool. |
309 | Overflow is OK, as we only want difference between |
310 | its current value and some recent. |
311 | |
312 | Both malloc and free should increment logical time, as in |
313 | a different case multiple cached blocks would have same age, |
314 | and accuracy of predictors suffers. |
315 | */ |
316 | uintptr_t cacheCurrTime; |
317 | |
318 | // Memory pool that owns this LargeObjectCache. |
319 | // strict 1:1 relation, never changed |
320 | ExtMemoryPool *extMemPool; |
321 | |
322 | // Returns artificial bin index, |
323 | // it's used only during sorting and never saved |
324 | static int sizeToIdx(size_t size); |
325 | |
326 | // Our friends |
327 | friend class Backend; |
328 | |
329 | public: |
330 | void init(ExtMemoryPool *memPool); |
331 | |
332 | // Item accessors |
333 | void put(LargeMemoryBlock *largeBlock); |
334 | void putList(LargeMemoryBlock *head); |
335 | LargeMemoryBlock *get(size_t size); |
336 | |
337 | void updateCacheState(DecreaseOrIncrease op, size_t size); |
338 | bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime); |
339 | |
340 | // Cleanup operations |
341 | bool doCleanup(uintptr_t currTime, bool doThreshDecr); |
342 | bool decreasingCleanup(); |
343 | bool regularCleanup(); |
344 | bool cleanAll(); |
345 | void reset(); |
346 | |
347 | void reportStat(FILE *f); |
348 | #if __TBB_MALLOC_WHITEBOX_TEST |
349 | size_t getLOCSize() const; |
350 | size_t getUsedSize() const; |
351 | #endif |
352 | |
353 | // Cache deals with exact-fit sizes, so need to align each size |
354 | // to the specific bin when put object to cache |
355 | static size_t alignToBin(size_t size); |
356 | |
357 | void setHugeSizeThreshold(size_t value); |
358 | |
359 | // Check if we should cache or sieve this size |
360 | bool sizeInCacheRange(size_t size); |
361 | |
362 | uintptr_t getCurrTime(); |
363 | uintptr_t getCurrTimeRange(uintptr_t range); |
364 | void registerRealloc(size_t oldSize, size_t newSize); |
365 | }; |
366 | |
367 | #endif // __TBB_large_objects_H |
368 | |
369 | |