1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_tbbmalloc_internal_H |
18 | #define __TBB_tbbmalloc_internal_H |
19 | |
20 | |
21 | #include "TypeDefinitions.h" /* Also includes customization layer Customize.h */ |
22 | |
23 | #if USE_PTHREAD |
24 | // Some pthreads documentation says that <pthreads.h> must be first header. |
25 | #include <pthread.h> |
26 | typedef pthread_key_t tls_key_t; |
27 | #elif USE_WINTHREAD |
28 | #include "tbb/machine/windows_api.h" |
29 | typedef DWORD tls_key_t; |
30 | #else |
31 | #error Must define USE_PTHREAD or USE_WINTHREAD |
32 | #endif |
33 | |
34 | // TODO: *BSD also has it |
35 | #define BACKEND_HAS_MREMAP __linux__ |
36 | #define CHECK_ALLOCATION_RANGE MALLOC_DEBUG || MALLOC_ZONE_OVERLOAD_ENABLED || MALLOC_UNIXLIKE_OVERLOAD_ENABLED |
37 | |
38 | #include "tbb/tbb_config.h" // for __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN |
39 | #if __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN |
40 | #define _EXCEPTION_PTR_H /* prevents exception_ptr.h inclusion */ |
41 | #define _GLIBCXX_NESTED_EXCEPTION_H /* prevents nested_exception.h inclusion */ |
42 | #endif |
43 | |
44 | #include <stdio.h> |
45 | #include <stdlib.h> |
46 | #include <limits.h> // for CHAR_BIT |
47 | #include <string.h> // for memset |
48 | #if MALLOC_CHECK_RECURSION |
49 | #include <new> /* for placement new */ |
50 | #endif |
51 | #include "tbb/scalable_allocator.h" |
52 | #include "tbbmalloc_internal_api.h" |
53 | |
54 | /********* Various compile-time options **************/ |
55 | |
56 | #if !__TBB_DEFINE_MIC && __TBB_MIC_NATIVE |
57 | #error Intel(R) Many Integrated Core Compiler does not define __MIC__ anymore. |
58 | #endif |
59 | |
60 | #define MALLOC_TRACE 0 |
61 | |
62 | #if MALLOC_TRACE |
63 | #define TRACEF(x) printf x |
64 | #else |
65 | #define TRACEF(x) ((void)0) |
66 | #endif /* MALLOC_TRACE */ |
67 | |
68 | #define ASSERT_TEXT NULL |
69 | |
70 | #define COLLECT_STATISTICS ( MALLOC_DEBUG && MALLOCENV_COLLECT_STATISTICS ) |
71 | #ifndef USE_INTERNAL_TID |
72 | #define USE_INTERNAL_TID COLLECT_STATISTICS || MALLOC_TRACE |
73 | #endif |
74 | |
75 | #include "Statistics.h" |
76 | |
77 | // call yield for whitebox testing, skip in real library |
78 | #ifndef WhiteboxTestingYield |
79 | #define WhiteboxTestingYield() ((void)0) |
80 | #endif |
81 | |
82 | |
83 | /********* End compile-time options **************/ |
84 | |
85 | namespace rml { |
86 | |
87 | namespace internal { |
88 | |
89 | #if __TBB_MALLOC_LOCACHE_STAT |
90 | extern intptr_t mallocCalls, cacheHits; |
91 | extern intptr_t memAllocKB, memHitKB; |
92 | #endif |
93 | |
94 | //! Utility template function to prevent "unused" warnings by various compilers. |
95 | template<typename T> |
96 | void suppress_unused_warning( const T& ) {} |
97 | |
98 | /********** Various global default constants ********/ |
99 | |
100 | /* |
101 | * Default huge page size |
102 | */ |
103 | static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024; |
104 | |
105 | /********** End of global default constatns *********/ |
106 | |
107 | /********** Various numeric parameters controlling allocations ********/ |
108 | |
109 | /* |
110 | * slabSize - the size of a block for allocation of small objects, |
111 | * it must be larger than maxSegregatedObjectSize. |
112 | */ |
113 | const uintptr_t slabSize = 16*1024; |
114 | |
115 | /* |
116 | * Large blocks cache cleanup frequency. |
117 | * It should be power of 2 for the fast checking. |
118 | */ |
119 | const unsigned cacheCleanupFreq = 256; |
120 | |
121 | /* |
122 | * Alignment of large (>= minLargeObjectSize) objects. |
123 | */ |
124 | const size_t largeObjectAlignment = estimatedCacheLineSize; |
125 | |
126 | /* |
127 | * This number of bins in the TLS that leads to blocks that we can allocate in. |
128 | */ |
129 | const uint32_t numBlockBinLimit = 31; |
130 | |
131 | /********** End of numeric parameters controlling allocations *********/ |
132 | |
133 | class BlockI; |
134 | class Block; |
135 | struct LargeMemoryBlock; |
136 | struct ExtMemoryPool; |
137 | struct MemRegion; |
138 | class FreeBlock; |
139 | class TLSData; |
140 | class Backend; |
141 | class MemoryPool; |
142 | struct CacheBinOperation; |
143 | extern const uint32_t minLargeObjectSize; |
144 | |
145 | enum DecreaseOrIncrease { |
146 | decrease, increase |
147 | }; |
148 | |
149 | class TLSKey { |
150 | tls_key_t TLS_pointer_key; |
151 | public: |
152 | bool init(); |
153 | bool destroy(); |
154 | TLSData* getThreadMallocTLS() const; |
155 | void setThreadMallocTLS( TLSData * newvalue ); |
156 | TLSData* createTLS(MemoryPool *memPool, Backend *backend); |
157 | }; |
158 | |
159 | template<typename Arg, typename Compare> |
160 | inline void AtomicUpdate(Arg &location, Arg newVal, const Compare &cmp) |
161 | { |
162 | MALLOC_STATIC_ASSERT(sizeof(Arg) == sizeof(intptr_t), |
163 | "Type of argument must match AtomicCompareExchange type." ); |
164 | for (Arg old = location; cmp(old, newVal); ) { |
165 | Arg val = AtomicCompareExchange((intptr_t&)location, (intptr_t)newVal, old); |
166 | if (val == old) |
167 | break; |
168 | // TODO: do we need backoff after unsuccessful CAS? |
169 | old = val; |
170 | } |
171 | } |
172 | |
173 | // TODO: make BitMaskBasic more general |
174 | // (currently, it fits BitMaskMin well, but not as suitable for BitMaskMax) |
175 | template<unsigned NUM> |
176 | class BitMaskBasic { |
177 | static const unsigned SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1; |
178 | static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t); |
179 | uintptr_t mask[SZ]; |
180 | protected: |
181 | void set(size_t idx, bool val) { |
182 | MALLOC_ASSERT(idx<NUM, ASSERT_TEXT); |
183 | |
184 | size_t i = idx / WORD_LEN; |
185 | int pos = WORD_LEN - idx % WORD_LEN - 1; |
186 | if (val) |
187 | AtomicOr(&mask[i], 1ULL << pos); |
188 | else |
189 | AtomicAnd(&mask[i], ~(1ULL << pos)); |
190 | } |
191 | int getMinTrue(unsigned startIdx) const { |
192 | unsigned idx = startIdx / WORD_LEN; |
193 | int pos; |
194 | |
195 | if (startIdx % WORD_LEN) { |
196 | // only interested in part of a word, clear bits before startIdx |
197 | pos = WORD_LEN - startIdx % WORD_LEN; |
198 | uintptr_t actualMask = mask[idx] & (((uintptr_t)1<<pos) - 1); |
199 | idx++; |
200 | if (-1 != (pos = BitScanRev(actualMask))) |
201 | return idx*WORD_LEN - pos - 1; |
202 | } |
203 | |
204 | while (idx<SZ) |
205 | if (-1 != (pos = BitScanRev(mask[idx++]))) |
206 | return idx*WORD_LEN - pos - 1; |
207 | return -1; |
208 | } |
209 | public: |
210 | void reset() { for (unsigned i=0; i<SZ; i++) mask[i] = 0; } |
211 | }; |
212 | |
213 | template<unsigned NUM> |
214 | class BitMaskMin : public BitMaskBasic<NUM> { |
215 | public: |
216 | void set(size_t idx, bool val) { BitMaskBasic<NUM>::set(idx, val); } |
217 | int getMinTrue(unsigned startIdx) const { |
218 | return BitMaskBasic<NUM>::getMinTrue(startIdx); |
219 | } |
220 | }; |
221 | |
222 | template<unsigned NUM> |
223 | class BitMaskMax : public BitMaskBasic<NUM> { |
224 | public: |
225 | void set(size_t idx, bool val) { |
226 | BitMaskBasic<NUM>::set(NUM - 1 - idx, val); |
227 | } |
228 | int getMaxTrue(unsigned startIdx) const { |
229 | int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1); |
230 | return -1==p? -1 : (int)NUM - 1 - p; |
231 | } |
232 | }; |
233 | |
234 | |
235 | // The part of thread-specific data that can be modified by other threads. |
236 | // Such modifications must be protected by AllLocalCaches::listLock. |
237 | struct TLSRemote { |
238 | TLSRemote *next, |
239 | *prev; |
240 | }; |
241 | |
242 | // The list of all thread-local data; supporting cleanup of thread caches |
243 | class AllLocalCaches { |
244 | TLSRemote *head; |
245 | MallocMutex listLock; // protects operations in the list |
246 | public: |
247 | void registerThread(TLSRemote *tls); |
248 | void unregisterThread(TLSRemote *tls); |
249 | bool cleanup(bool cleanOnlyUnused); |
250 | void markUnused(); |
251 | void reset() { head = NULL; } |
252 | }; |
253 | |
254 | class LifoList { |
255 | public: |
256 | inline LifoList(); |
257 | inline void push(Block *block); |
258 | inline Block *pop(); |
259 | inline Block *grab(); |
260 | |
261 | private: |
262 | Block *top; |
263 | MallocMutex lock; |
264 | }; |
265 | |
266 | /* |
267 | * When a block that is not completely free is returned for reuse by other threads |
268 | * this is where the block goes. |
269 | * |
270 | * LifoList assumes zero initialization; so below its constructors are omitted, |
271 | * to avoid linking with C++ libraries on Linux. |
272 | */ |
273 | |
274 | class OrphanedBlocks { |
275 | LifoList bins[numBlockBinLimit]; |
276 | public: |
277 | Block *get(TLSData *tls, unsigned int size); |
278 | void put(intptr_t binTag, Block *block); |
279 | void reset(); |
280 | bool cleanup(Backend* backend); |
281 | }; |
282 | |
283 | /* Large objects entities */ |
284 | #include "large_objects.h" |
285 | |
286 | // select index size for BackRefMaster based on word size: default is uint32_t, |
287 | // uint16_t for 32-bit platforms |
288 | template<bool> |
289 | struct MasterIndexSelect { |
290 | typedef uint32_t master_type; |
291 | }; |
292 | |
293 | template<> |
294 | struct MasterIndexSelect<false> { |
295 | typedef uint16_t master_type; |
296 | }; |
297 | |
298 | class BackRefIdx { // composite index to backreference array |
299 | public: |
300 | typedef MasterIndexSelect<4 < sizeof(uintptr_t)>::master_type master_t; |
301 | private: |
302 | static const master_t invalid = ~master_t(0); |
303 | master_t master; // index in BackRefMaster |
304 | uint16_t largeObj:1; // is this object "large"? |
305 | uint16_t offset :15; // offset from beginning of BackRefBlock |
306 | public: |
307 | BackRefIdx() : master(invalid), largeObj(0), offset(0) {} |
308 | bool isInvalid() const { return master == invalid; } |
309 | bool isLargeObject() const { return largeObj; } |
310 | master_t getMaster() const { return master; } |
311 | uint16_t getOffset() const { return offset; } |
312 | |
313 | // only newBackRef can modify BackRefIdx |
314 | static BackRefIdx newBackRef(bool largeObj); |
315 | }; |
316 | |
317 | // Block header is used during block coalescing |
318 | // and must be preserved in used blocks. |
319 | class BlockI { |
320 | intptr_t blockState[2]; |
321 | }; |
322 | |
323 | struct LargeMemoryBlock : public BlockI { |
324 | MemoryPool *pool; // owner pool |
325 | LargeMemoryBlock *next, // ptrs in list of cached blocks |
326 | *prev, |
327 | // 2-linked list of pool's large objects |
328 | // Used to destroy backrefs on pool destroy (backrefs are global) |
329 | // and for object releasing during pool reset. |
330 | *gPrev, |
331 | *gNext; |
332 | uintptr_t age; // age of block while in cache |
333 | size_t objectSize; // the size requested by a client |
334 | size_t unalignedSize; // the size requested from backend |
335 | BackRefIdx backRefIdx; // cached here, used copy is in LargeObjectHdr |
336 | }; |
337 | |
338 | // Classes and methods for backend.cpp |
339 | #include "backend.h" |
340 | |
341 | // An TBB allocator mode that can be controlled by user |
342 | // via API/environment variable. Must be placed in zero-initialized memory. |
343 | // External synchronization assumed. |
344 | // TODO: TBB_VERSION support |
345 | class AllocControlledMode { |
346 | intptr_t val; |
347 | bool setDone; |
348 | |
349 | public: |
350 | intptr_t get() const { |
351 | MALLOC_ASSERT(setDone, ASSERT_TEXT); |
352 | return val; |
353 | } |
354 | |
355 | // Note: set() can be called before init() |
356 | void set(intptr_t newVal) { |
357 | val = newVal; |
358 | setDone = true; |
359 | } |
360 | |
361 | bool ready() const { |
362 | return setDone; |
363 | } |
364 | |
365 | // envName - environment variable to get controlled mode |
366 | void initReadEnv(const char *envName, intptr_t defaultVal) { |
367 | if (!setDone) { |
368 | #if !__TBB_WIN8UI_SUPPORT |
369 | // TODO: use strtol to get the actual value of the envirable |
370 | const char *envVal = getenv(envName); |
371 | if (envVal && !strcmp(envVal, "1" )) |
372 | val = 1; |
373 | else |
374 | #endif |
375 | val = defaultVal; |
376 | setDone = true; |
377 | } |
378 | } |
379 | }; |
380 | |
381 | // Page type to be used inside MapMemory. |
382 | // Regular (4KB aligned), Huge and Transparent Huge Pages (2MB aligned). |
383 | enum PageType { |
384 | REGULAR = 0, |
385 | PREALLOCATED_HUGE_PAGE, |
386 | TRANSPARENT_HUGE_PAGE |
387 | }; |
388 | |
389 | // init() and printStatus() is called only under global initialization lock. |
390 | // Race is possible between registerAllocation() and registerReleasing(), |
391 | // harm is that up to single huge page releasing is missed (because failure |
392 | // to get huge page is registered only 1st time), that is negligible. |
393 | // setMode is also can be called concurrently. |
394 | // Object must reside in zero-initialized memory |
395 | // TODO: can we check for huge page presence during every 10th mmap() call |
396 | // in case huge page is released by another process? |
397 | class HugePagesStatus { |
398 | private: |
399 | AllocControlledMode requestedMode; // changed only by user |
400 | // to keep enabled and requestedMode consistent |
401 | MallocMutex setModeLock; |
402 | size_t pageSize; |
403 | intptr_t needActualStatusPrint; |
404 | |
405 | static void doPrintStatus(bool state, const char *stateName) { |
406 | // Under macOS* fprintf/snprintf acquires an internal lock, so when |
407 | // 1st allocation is done under the lock, we got a deadlock. |
408 | // Do not use fprintf etc during initialization. |
409 | fputs("TBBmalloc: huge pages\t" , stderr); |
410 | if (!state) |
411 | fputs("not " , stderr); |
412 | fputs(stateName, stderr); |
413 | fputs("\n" , stderr); |
414 | } |
415 | |
416 | void parseSystemMemInfo() { |
417 | bool hpAvailable = false; |
418 | bool thpAvailable = false; |
419 | unsigned long long hugePageSize = 0; |
420 | |
421 | #if __linux__ |
422 | // Check huge pages existence |
423 | unsigned long long meminfoHugePagesTotal = 0; |
424 | |
425 | parseFileItem meminfoItems[] = { |
426 | // Parse system huge page size |
427 | { "Hugepagesize: %llu kB" , hugePageSize }, |
428 | // Check if there are preallocated huge pages on the system |
429 | // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt |
430 | { "HugePages_Total: %llu" , meminfoHugePagesTotal } }; |
431 | |
432 | parseFile</*BUFF_SIZE=*/100>("/proc/meminfo" , meminfoItems); |
433 | |
434 | // Double check another system information regarding preallocated |
435 | // huge pages if there are no information in /proc/meminfo |
436 | unsigned long long vmHugePagesTotal = 0; |
437 | |
438 | parseFileItem vmItem[] = { { "%llu" , vmHugePagesTotal } }; |
439 | |
440 | // We parse a counter number, it can't be huge |
441 | parseFile</*BUFF_SIZE=*/100>("/proc/sys/vm/nr_hugepages" , vmItem); |
442 | |
443 | if (meminfoHugePagesTotal > 0 || vmHugePagesTotal > 0) { |
444 | MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found preallocated." ); |
445 | |
446 | // Any non zero value clearly states that there are preallocated |
447 | // huge pages on the system |
448 | hpAvailable = true; |
449 | } |
450 | |
451 | // Check if there is transparent huge pages support on the system |
452 | unsigned long long thpPresent = 'n'; |
453 | parseFileItem thpItem[] = { { "[alwa%cs] madvise never\n" , thpPresent } }; |
454 | parseFile</*BUFF_SIZE=*/100>("/sys/kernel/mm/transparent_hugepage/enabled" , thpItem); |
455 | |
456 | if (thpPresent == 'y') { |
457 | MALLOC_ASSERT(hugePageSize != 0, "Huge Page size can't be zero if we found thp existence." ); |
458 | thpAvailable = true; |
459 | } |
460 | #endif |
461 | MALLOC_ASSERT(!pageSize, "Huge page size can't be set twice. Double initialization." ); |
462 | |
463 | // Initialize object variables |
464 | pageSize = hugePageSize * 1024; // was read in KB from meminfo |
465 | isHPAvailable = hpAvailable; |
466 | isTHPAvailable = thpAvailable; |
467 | } |
468 | |
469 | public: |
470 | |
471 | // System information |
472 | bool isHPAvailable; |
473 | bool isTHPAvailable; |
474 | |
475 | // User defined value |
476 | bool isEnabled; |
477 | |
478 | void init() { |
479 | parseSystemMemInfo(); |
480 | MallocMutex::scoped_lock lock(setModeLock); |
481 | requestedMode.initReadEnv("TBB_MALLOC_USE_HUGE_PAGES" , 0); |
482 | isEnabled = (isHPAvailable || isTHPAvailable) && requestedMode.get(); |
483 | } |
484 | |
485 | // Could be set from user code at any place. |
486 | // If we didn't call init() at this place, isEnabled will be false |
487 | void setMode(intptr_t newVal) { |
488 | MallocMutex::scoped_lock lock(setModeLock); |
489 | requestedMode.set(newVal); |
490 | isEnabled = (isHPAvailable || isTHPAvailable) && newVal; |
491 | } |
492 | |
493 | bool isRequested() const { |
494 | return requestedMode.ready() ? requestedMode.get() : false; |
495 | } |
496 | |
497 | void reset() { |
498 | pageSize = needActualStatusPrint = 0; |
499 | isEnabled = isHPAvailable = isTHPAvailable = false; |
500 | } |
501 | |
502 | // If memory mapping size is a multiple of huge page size, some OS kernels |
503 | // can use huge pages transparently. Use this when huge pages are requested. |
504 | size_t getGranularity() const { |
505 | if (requestedMode.ready()) |
506 | return requestedMode.get() ? pageSize : 0; |
507 | else |
508 | return HUGE_PAGE_SIZE; // the mode is not yet known; assume typical 2MB huge pages |
509 | } |
510 | |
511 | void printStatus() { |
512 | doPrintStatus(requestedMode.get(), "requested" ); |
513 | if (requestedMode.get()) { // report actual status iff requested |
514 | if (pageSize) |
515 | FencedStore(needActualStatusPrint, 1); |
516 | else |
517 | doPrintStatus(/*state=*/false, "available" ); |
518 | } |
519 | } |
520 | }; |
521 | |
522 | class AllLargeBlocksList { |
523 | MallocMutex largeObjLock; |
524 | LargeMemoryBlock *loHead; |
525 | public: |
526 | void add(LargeMemoryBlock *lmb); |
527 | void remove(LargeMemoryBlock *lmb); |
528 | template<bool poolDestroy> void releaseAll(Backend *backend); |
529 | }; |
530 | |
531 | struct ExtMemoryPool { |
532 | Backend backend; |
533 | LargeObjectCache loc; |
534 | AllLocalCaches allLocalCaches; |
535 | OrphanedBlocks orphanedBlocks; |
536 | |
537 | intptr_t poolId; |
538 | // To find all large objects. Used during user pool destruction, |
539 | // to release all backreferences in large blocks (slab blocks do not have them). |
540 | AllLargeBlocksList lmbList; |
541 | // Callbacks to be used instead of MapMemory/UnmapMemory. |
542 | rawAllocType rawAlloc; |
543 | rawFreeType rawFree; |
544 | size_t granularity; |
545 | bool keepAllMemory, |
546 | delayRegsReleasing, |
547 | // TODO: implements fixedPool with calling rawFree on destruction |
548 | fixedPool; |
549 | TLSKey tlsPointerKey; // per-pool TLS key |
550 | |
551 | bool init(intptr_t poolId, rawAllocType rawAlloc, rawFreeType rawFree, |
552 | size_t granularity, bool keepAllMemory, bool fixedPool); |
553 | bool initTLS(); |
554 | |
555 | // i.e., not system default pool for scalable_malloc/scalable_free |
556 | bool userPool() const { return rawAlloc; } |
557 | |
558 | // true if something has been released |
559 | bool softCachesCleanup(); |
560 | bool releaseAllLocalCaches(); |
561 | bool hardCachesCleanup(); |
562 | void *remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment); |
563 | bool reset() { |
564 | loc.reset(); |
565 | allLocalCaches.reset(); |
566 | orphanedBlocks.reset(); |
567 | bool ret = tlsPointerKey.destroy(); |
568 | backend.reset(); |
569 | return ret; |
570 | } |
571 | bool destroy() { |
572 | MALLOC_ASSERT(isPoolValid(), |
573 | "Possible double pool_destroy or heap corruption" ); |
574 | if (!userPool()) { |
575 | loc.reset(); |
576 | allLocalCaches.reset(); |
577 | } |
578 | // pthread_key_dtors must be disabled before memory unmapping |
579 | // TODO: race-free solution |
580 | bool ret = tlsPointerKey.destroy(); |
581 | if (rawFree || !userPool()) |
582 | ret &= backend.destroy(); |
583 | // pool is not valid after this point |
584 | granularity = 0; |
585 | return ret; |
586 | } |
587 | void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; } |
588 | inline bool regionsAreReleaseable() const; |
589 | |
590 | LargeMemoryBlock *mallocLargeObject(MemoryPool *pool, size_t allocationSize); |
591 | void freeLargeObject(LargeMemoryBlock *lmb); |
592 | void freeLargeObjectList(LargeMemoryBlock *head); |
593 | // use granulatity as marker for pool validity |
594 | bool isPoolValid() const { return granularity; } |
595 | }; |
596 | |
597 | inline bool Backend::inUserPool() const { return extMemPool->userPool(); } |
598 | |
599 | struct LargeObjectHdr { |
600 | LargeMemoryBlock *memoryBlock; |
601 | /* Backreference points to LargeObjectHdr. |
602 | Duplicated in LargeMemoryBlock to reuse in subsequent allocations. */ |
603 | BackRefIdx backRefIdx; |
604 | }; |
605 | |
606 | struct FreeObject { |
607 | FreeObject *next; |
608 | }; |
609 | |
610 | |
611 | /******* A helper class to support overriding malloc with scalable_malloc *******/ |
612 | #if MALLOC_CHECK_RECURSION |
613 | |
614 | class RecursiveMallocCallProtector { |
615 | // pointer to an automatic data of holding thread |
616 | static void *autoObjPtr; |
617 | static MallocMutex rmc_mutex; |
618 | static pthread_t owner_thread; |
619 | /* Under FreeBSD 8.0 1st call to any pthread function including pthread_self |
620 | leads to pthread initialization, that causes malloc calls. As 1st usage of |
621 | RecursiveMallocCallProtector can be before pthread initialized, pthread calls |
622 | can't be used in 1st instance of RecursiveMallocCallProtector. |
623 | RecursiveMallocCallProtector is used 1st time in checkInitialization(), |
624 | so there is a guarantee that on 2nd usage pthread is initialized. |
625 | No such situation observed with other supported OSes. |
626 | */ |
627 | #if __FreeBSD__ |
628 | static bool canUsePthread; |
629 | #else |
630 | static const bool canUsePthread = true; |
631 | #endif |
632 | /* |
633 | The variable modified in checkInitialization, |
634 | so can be read without memory barriers. |
635 | */ |
636 | static bool mallocRecursionDetected; |
637 | |
638 | MallocMutex::scoped_lock* lock_acquired; |
639 | char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1]; |
640 | |
641 | static uintptr_t absDiffPtr(void *x, void *y) { |
642 | uintptr_t xi = (uintptr_t)x, yi = (uintptr_t)y; |
643 | return xi > yi ? xi - yi : yi - xi; |
644 | } |
645 | public: |
646 | |
647 | RecursiveMallocCallProtector() : lock_acquired(NULL) { |
648 | lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex ); |
649 | if (canUsePthread) |
650 | owner_thread = pthread_self(); |
651 | autoObjPtr = &scoped_lock_space; |
652 | } |
653 | ~RecursiveMallocCallProtector() { |
654 | if (lock_acquired) { |
655 | autoObjPtr = NULL; |
656 | lock_acquired->~scoped_lock(); |
657 | } |
658 | } |
659 | static bool sameThreadActive() { |
660 | if (!autoObjPtr) // fast path |
661 | return false; |
662 | // Some thread has an active recursive call protector; check if the current one. |
663 | // Exact pthread_self based test |
664 | if (canUsePthread) { |
665 | if (pthread_equal( owner_thread, pthread_self() )) { |
666 | mallocRecursionDetected = true; |
667 | return true; |
668 | } else |
669 | return false; |
670 | } |
671 | // inexact stack size based test |
672 | const uintptr_t threadStackSz = 2*1024*1024; |
673 | int dummy; |
674 | return absDiffPtr(autoObjPtr, &dummy)<threadStackSz; |
675 | } |
676 | static bool noRecursion(); |
677 | /* The function is called on 1st scalable_malloc call to check if malloc calls |
678 | scalable_malloc (nested call must set mallocRecursionDetected). */ |
679 | static void detectNaiveOverload() { |
680 | if (!malloc_proxy) { |
681 | #if __FreeBSD__ |
682 | /* If !canUsePthread, we can't call pthread_self() before, but now pthread |
683 | is already on, so can do it. */ |
684 | if (!canUsePthread) { |
685 | canUsePthread = true; |
686 | owner_thread = pthread_self(); |
687 | } |
688 | #endif |
689 | free(malloc(1)); |
690 | } |
691 | } |
692 | }; |
693 | |
694 | #else |
695 | |
696 | class RecursiveMallocCallProtector { |
697 | public: |
698 | RecursiveMallocCallProtector() {} |
699 | ~RecursiveMallocCallProtector() {} |
700 | }; |
701 | |
702 | #endif /* MALLOC_CHECK_RECURSION */ |
703 | |
704 | bool isMallocInitializedExt(); |
705 | |
706 | unsigned int getThreadId(); |
707 | |
708 | bool initBackRefMaster(Backend *backend); |
709 | void destroyBackRefMaster(Backend *backend); |
710 | void removeBackRef(BackRefIdx backRefIdx); |
711 | void setBackRef(BackRefIdx backRefIdx, void *newPtr); |
712 | void *getBackRef(BackRefIdx backRefIdx); |
713 | |
714 | } // namespace internal |
715 | } // namespace rml |
716 | |
717 | #endif // __TBB_tbbmalloc_internal_H |
718 | |