| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef __TBB_tbbmalloc_internal_H |
| 18 | #error tbbmalloc_internal.h must be included at this point |
| 19 | #endif |
| 20 | |
| 21 | #ifndef __TBB_large_objects_H |
| 22 | #define __TBB_large_objects_H |
| 23 | |
| 24 | //! The list of possible Cache Bin Aggregator operations. |
| 25 | /* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */ |
| 26 | enum CacheBinOperationType { |
| 27 | CBOP_INVALID = 0, |
| 28 | CBOP_GET, |
| 29 | CBOP_PUT_LIST, |
| 30 | CBOP_CLEAN_TO_THRESHOLD, |
| 31 | CBOP_CLEAN_ALL, |
| 32 | CBOP_UPDATE_USED_SIZE |
| 33 | }; |
| 34 | |
| 35 | // The Cache Bin Aggregator operation status list. |
| 36 | // CBST_NOWAIT can be specified for non-blocking operations. |
| 37 | enum CacheBinOperationStatus { |
| 38 | CBST_WAIT = 0, |
| 39 | CBST_NOWAIT, |
| 40 | CBST_DONE |
| 41 | }; |
| 42 | |
| 43 | /* |
| 44 | * Bins that grow with arithmetic step |
| 45 | */ |
| 46 | template<size_t MIN_SIZE, size_t MAX_SIZE> |
| 47 | struct LargeBinStructureProps { |
| 48 | public: |
| 49 | static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; |
| 50 | static const size_t CacheStep = 8 * 1024; |
| 51 | static const unsigned NumBins = (MaxSize - MinSize) / CacheStep; |
| 52 | |
| 53 | static size_t alignToBin(size_t size) { |
| 54 | return alignUp(size, CacheStep); |
| 55 | } |
| 56 | |
| 57 | static int sizeToIdx(size_t size) { |
| 58 | MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT); |
| 59 | MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT); |
| 60 | return (size - MinSize) / CacheStep; |
| 61 | } |
| 62 | }; |
| 63 | |
| 64 | /* |
| 65 | * Bins that grow with special geometric progression. |
| 66 | */ |
| 67 | template<size_t MIN_SIZE, size_t MAX_SIZE> |
| 68 | struct HugeBinStructureProps { |
| 69 | |
| 70 | private: |
| 71 | // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor)) |
| 72 | // There are StepFactor bins (8 be default) between each power of 2 bin |
| 73 | static const int MaxSizeExp = Log2<MAX_SIZE>::value; |
| 74 | static const int MinSizeExp = Log2<MIN_SIZE>::value; |
| 75 | static const int StepFactor = 8; |
| 76 | static const int StepFactorExp = Log2<StepFactor>::value; |
| 77 | |
| 78 | public: |
| 79 | static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; |
| 80 | static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor; |
| 81 | |
| 82 | static size_t alignToBin(size_t size) { |
| 83 | size_t minorStepExp = BitScanRev(size) - StepFactorExp; |
| 84 | return alignUp(size, 1ULL << minorStepExp); |
| 85 | } |
| 86 | |
| 87 | // Sizes between the power of 2 values are aproximated to StepFactor. |
| 88 | static int sizeToIdx(size_t size) { |
| 89 | MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT); |
| 90 | int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2 |
| 91 | size_t majorStepSize = 1ULL << sizeExp; |
| 92 | int minorStepExp = sizeExp - StepFactorExp; |
| 93 | int minorIdx = (size - majorStepSize) >> minorStepExp; |
| 94 | MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp), |
| 95 | "Size is not aligned on the bin" ); |
| 96 | return StepFactor * (sizeExp - MinSizeExp) + minorIdx; |
| 97 | } |
| 98 | }; |
| 99 | |
| 100 | /* |
| 101 | * Cache properties accessor |
| 102 | * |
| 103 | * TooLargeFactor -- when cache size treated "too large" in comparison to user data size |
| 104 | * OnMissFactor -- If cache miss occurred and cache was cleaned, |
| 105 | * set ageThreshold to OnMissFactor * the difference |
| 106 | * between current time and last time cache was cleaned. |
| 107 | * LongWaitFactor -- to detect rarely-used bins and forget about their usage history |
| 108 | */ |
| 109 | template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT> |
| 110 | struct LargeObjectCacheProps : public StructureProps { |
| 111 | static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT; |
| 112 | }; |
| 113 | |
| 114 | template<typename Props> |
| 115 | class LargeObjectCacheImpl { |
| 116 | private: |
| 117 | |
| 118 | // Current sizes of used and cached objects. It's calculated while we are |
| 119 | // traversing bins, and used for isLOCTooLarge() check at the same time. |
| 120 | class BinsSummary { |
| 121 | size_t usedSz; |
| 122 | size_t cachedSz; |
| 123 | public: |
| 124 | BinsSummary() : usedSz(0), cachedSz(0) {} |
| 125 | // "too large" criteria |
| 126 | bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; } |
| 127 | void update(size_t usedSize, size_t cachedSize) { |
| 128 | usedSz += usedSize; |
| 129 | cachedSz += cachedSize; |
| 130 | } |
| 131 | void reset() { usedSz = cachedSz = 0; } |
| 132 | }; |
| 133 | |
| 134 | public: |
| 135 | // The number of bins to cache large/huge objects. |
| 136 | static const uint32_t numBins = Props::NumBins; |
| 137 | |
| 138 | typedef BitMaskMax<numBins> BinBitMask; |
| 139 | |
| 140 | // 2-linked list of same-size cached blocks ordered by age (oldest on top) |
| 141 | // TODO: are we really want the list to be 2-linked? This allows us |
| 142 | // reduce memory consumption and do less operations under lock. |
| 143 | // TODO: try to switch to 32-bit logical time to save space in CacheBin |
| 144 | // and move bins to different cache lines. |
| 145 | class CacheBin { |
| 146 | private: |
| 147 | LargeMemoryBlock *first, |
| 148 | *last; |
| 149 | /* age of an oldest block in the list; equal to last->age, if last defined, |
| 150 | used for quick checking it without acquiring the lock. */ |
| 151 | uintptr_t oldest; |
| 152 | /* currAge when something was excluded out of list because of the age, |
| 153 | not because of cache hit */ |
| 154 | uintptr_t lastCleanedAge; |
| 155 | /* Current threshold value for the blocks of a particular size. |
| 156 | Set on cache miss. */ |
| 157 | intptr_t ageThreshold; |
| 158 | |
| 159 | /* total size of all objects corresponding to the bin and allocated by user */ |
| 160 | size_t usedSize, |
| 161 | /* total size of all objects cached in the bin */ |
| 162 | cachedSize; |
| 163 | /* mean time of presence of block in the bin before successful reuse */ |
| 164 | intptr_t meanHitRange; |
| 165 | /* time of last get called for the bin */ |
| 166 | uintptr_t lastGet; |
| 167 | |
| 168 | typename MallocAggregator<CacheBinOperation>::type aggregator; |
| 169 | |
| 170 | void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true); |
| 171 | |
| 172 | /* should be placed in zero-initialized memory, ctor not needed. */ |
| 173 | CacheBin(); |
| 174 | |
| 175 | public: |
| 176 | void init() { |
| 177 | memset(this, 0, sizeof(CacheBin)); |
| 178 | } |
| 179 | |
| 180 | /* ---------- Cache accessors ---------- */ |
| 181 | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx); |
| 182 | LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); |
| 183 | |
| 184 | /* ---------- Cleanup functions -------- */ |
| 185 | bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx); |
| 186 | bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx); |
| 187 | /* ------------------------------------- */ |
| 188 | |
| 189 | void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx); |
| 190 | void decreaseThreshold() { |
| 191 | if (ageThreshold) |
| 192 | ageThreshold = (ageThreshold + meanHitRange) / 2; |
| 193 | } |
| 194 | void updateBinsSummary(BinsSummary *binsSummary) const { |
| 195 | binsSummary->update(usedSize, cachedSize); |
| 196 | } |
| 197 | size_t getSize() const { return cachedSize; } |
| 198 | size_t getUsedSize() const { return usedSize; } |
| 199 | size_t reportStat(int num, FILE *f); |
| 200 | |
| 201 | /* --------- Unsafe methods used with the aggregator ------- */ |
| 202 | void forgetOutdatedState(uintptr_t currTime); |
| 203 | LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, |
| 204 | int idx, int num, size_t hugeObjectThreshold); |
| 205 | LargeMemoryBlock *get(); |
| 206 | LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx); |
| 207 | LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx); |
| 208 | void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) { |
| 209 | if (!usedSize) bitMask->set(idx, true); |
| 210 | usedSize += size; |
| 211 | if (!usedSize && !first) bitMask->set(idx, false); |
| 212 | } |
| 213 | void updateMeanHitRange( intptr_t hitRange ) { |
| 214 | hitRange = hitRange >= 0 ? hitRange : 0; |
| 215 | meanHitRange = meanHitRange ? (meanHitRange + hitRange) / 2 : hitRange; |
| 216 | } |
| 217 | void updateAgeThreshold( uintptr_t currTime ) { |
| 218 | if (lastCleanedAge) |
| 219 | ageThreshold = Props::OnMissFactor*(currTime - lastCleanedAge); |
| 220 | } |
| 221 | void updateCachedSize(size_t size) { |
| 222 | cachedSize += size; |
| 223 | } |
| 224 | void setLastGet( uintptr_t newLastGet ) { |
| 225 | lastGet = newLastGet; |
| 226 | } |
| 227 | /* -------------------------------------------------------- */ |
| 228 | }; |
| 229 | |
| 230 | // Huge bins index for fast regular cleanup searching in case of |
| 231 | // the "huge size threshold" setting defined |
| 232 | intptr_t hugeSizeThresholdIdx; |
| 233 | |
| 234 | private: |
| 235 | // How many times LOC was "too large" |
| 236 | intptr_t tooLargeLOC; |
| 237 | // for fast finding of used bins and bins with non-zero usedSize; |
| 238 | // indexed from the end, as we need largest 1st |
| 239 | BinBitMask bitMask; |
| 240 | // bins with lists of recently freed large blocks cached for re-use |
| 241 | CacheBin bin[numBins]; |
| 242 | |
| 243 | public: |
| 244 | /* ------------ CacheBin structure dependent stuff ------------ */ |
| 245 | static size_t alignToBin(size_t size) { |
| 246 | return Props::alignToBin(size); |
| 247 | } |
| 248 | static int sizeToIdx(size_t size) { |
| 249 | return Props::sizeToIdx(size); |
| 250 | } |
| 251 | |
| 252 | /* --------- Main cache functions (put, get object) ------------ */ |
| 253 | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); |
| 254 | LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); |
| 255 | |
| 256 | /* ------------------------ Cleanup ---------------------------- */ |
| 257 | bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr); |
| 258 | bool cleanAll(ExtMemoryPool *extMemPool); |
| 259 | |
| 260 | /* -------------------------- Other ---------------------------- */ |
| 261 | void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size); |
| 262 | |
| 263 | void reset(); |
| 264 | void reportStat(FILE *f); |
| 265 | #if __TBB_MALLOC_WHITEBOX_TEST |
| 266 | size_t getLOCSize() const; |
| 267 | size_t getUsedSize() const; |
| 268 | #endif |
| 269 | }; |
| 270 | |
| 271 | class LargeObjectCache { |
| 272 | private: |
| 273 | // Large bins [minLargeSize, maxLargeSize) |
| 274 | // Huge bins [maxLargeSize, maxHugeSize) |
| 275 | static const size_t minLargeSize = 8 * 1024, |
| 276 | maxLargeSize = 8 * 1024 * 1024, |
| 277 | // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold |
| 278 | maxHugeSize = tbb::internal::select_size_t_constant<2147483648U, 1099511627776ULL>::value; |
| 279 | |
| 280 | public: |
| 281 | // Upper bound threshold for caching size. After that size all objects sieve through cache |
| 282 | // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks) |
| 283 | static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL; |
| 284 | // After that size large object interpreted as huge and does not participate in regular cleanup. |
| 285 | // Can be changed during the program execution. |
| 286 | size_t hugeSizeThreshold; |
| 287 | |
| 288 | private: |
| 289 | // Large objects cache properties |
| 290 | typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps; |
| 291 | typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps; |
| 292 | |
| 293 | // Huge objects cache properties |
| 294 | typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps; |
| 295 | typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps; |
| 296 | |
| 297 | // Cache implementation type with properties |
| 298 | typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType; |
| 299 | typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType; |
| 300 | |
| 301 | // Beginning of largeCache is more actively used and smaller than hugeCache, |
| 302 | // so put hugeCache first to prevent false sharing |
| 303 | // with LargeObjectCache's predecessor |
| 304 | HugeCacheType hugeCache; |
| 305 | LargeCacheType largeCache; |
| 306 | |
| 307 | /* logical time, incremented on each put/get operation |
| 308 | To prevent starvation between pools, keep separately for each pool. |
| 309 | Overflow is OK, as we only want difference between |
| 310 | its current value and some recent. |
| 311 | |
| 312 | Both malloc and free should increment logical time, as in |
| 313 | a different case multiple cached blocks would have same age, |
| 314 | and accuracy of predictors suffers. |
| 315 | */ |
| 316 | uintptr_t cacheCurrTime; |
| 317 | |
| 318 | // Memory pool that owns this LargeObjectCache. |
| 319 | // strict 1:1 relation, never changed |
| 320 | ExtMemoryPool *extMemPool; |
| 321 | |
| 322 | // Returns artificial bin index, |
| 323 | // it's used only during sorting and never saved |
| 324 | static int sizeToIdx(size_t size); |
| 325 | |
| 326 | // Our friends |
| 327 | friend class Backend; |
| 328 | |
| 329 | public: |
| 330 | void init(ExtMemoryPool *memPool); |
| 331 | |
| 332 | // Item accessors |
| 333 | void put(LargeMemoryBlock *largeBlock); |
| 334 | void putList(LargeMemoryBlock *head); |
| 335 | LargeMemoryBlock *get(size_t size); |
| 336 | |
| 337 | void updateCacheState(DecreaseOrIncrease op, size_t size); |
| 338 | bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime); |
| 339 | |
| 340 | // Cleanup operations |
| 341 | bool doCleanup(uintptr_t currTime, bool doThreshDecr); |
| 342 | bool decreasingCleanup(); |
| 343 | bool regularCleanup(); |
| 344 | bool cleanAll(); |
| 345 | void reset(); |
| 346 | |
| 347 | void reportStat(FILE *f); |
| 348 | #if __TBB_MALLOC_WHITEBOX_TEST |
| 349 | size_t getLOCSize() const; |
| 350 | size_t getUsedSize() const; |
| 351 | #endif |
| 352 | |
| 353 | // Cache deals with exact-fit sizes, so need to align each size |
| 354 | // to the specific bin when put object to cache |
| 355 | static size_t alignToBin(size_t size); |
| 356 | |
| 357 | void setHugeSizeThreshold(size_t value); |
| 358 | |
| 359 | // Check if we should cache or sieve this size |
| 360 | bool sizeInCacheRange(size_t size); |
| 361 | |
| 362 | uintptr_t getCurrTime(); |
| 363 | uintptr_t getCurrTimeRange(uintptr_t range); |
| 364 | void registerRealloc(size_t oldSize, size_t newSize); |
| 365 | }; |
| 366 | |
| 367 | #endif // __TBB_large_objects_H |
| 368 | |
| 369 | |