1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | /* to prevent loading dynamic TBBmalloc at startup, that is not needed |
18 | for the whitebox test */ |
19 | #define __TBB_SOURCE_DIRECTLY_INCLUDED 1 |
20 | |
21 | // According to C99 standard INTPTR_MIN defined for C++ |
22 | // iff __STDC_LIMIT_MACROS pre-defined |
23 | #define __STDC_LIMIT_MACROS 1 |
24 | |
25 | #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1 |
26 | |
27 | #include "harness.h" |
28 | #include "harness_barrier.h" |
29 | |
30 | // To not depends on ITT support stuff |
31 | #ifdef DO_ITT_NOTIFY |
32 | #undef DO_ITT_NOTIFY |
33 | #endif |
34 | |
35 | #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals |
36 | // help trigger rare race condition |
37 | #define WhiteboxTestingYield() (__TBB_Yield(), __TBB_Yield(), __TBB_Yield(), __TBB_Yield()) |
38 | |
39 | #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD |
40 | // 2571 is variable has not been declared with compatible "target" attribute |
41 | // 3218 is class/struct may fail when offloaded because this field is misaligned |
42 | // or contains data that is misaligned |
43 | #pragma warning(push) |
44 | #pragma warning(disable:2571 3218) |
45 | #endif |
46 | #define protected public |
47 | #define private public |
48 | #include "../tbbmalloc/frontend.cpp" |
49 | #undef protected |
50 | #undef private |
51 | #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD |
52 | #pragma warning(pop) |
53 | #endif |
54 | #include "../tbbmalloc/backend.cpp" |
55 | #include "../tbbmalloc/backref.cpp" |
56 | |
57 | namespace tbbmalloc_whitebox { |
58 | size_t locGetProcessed = 0; |
59 | size_t locPutProcessed = 0; |
60 | } |
61 | #include "../tbbmalloc/large_objects.cpp" |
62 | #include "../tbbmalloc/tbbmalloc.cpp" |
63 | |
64 | const int LARGE_MEM_SIZES_NUM = 10; |
65 | |
66 | class AllocInfo { |
67 | int *p; |
68 | int val; |
69 | int size; |
70 | public: |
71 | AllocInfo() : p(NULL), val(0), size(0) {} |
72 | explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))), |
73 | val(rand()), size(sz) { |
74 | ASSERT(p, NULL); |
75 | for (int k=0; k<size; k++) |
76 | p[k] = val; |
77 | } |
78 | void check() const { |
79 | for (int k=0; k<size; k++) |
80 | ASSERT(p[k] == val, NULL); |
81 | } |
82 | void clear() { |
83 | scalable_free(p); |
84 | } |
85 | }; |
86 | |
87 | class SimpleBarrier: NoAssign { |
88 | protected: |
89 | static Harness::SpinBarrier barrier; |
90 | public: |
91 | static void initBarrier(unsigned thrds) { barrier.initialize(thrds); } |
92 | }; |
93 | |
94 | Harness::SpinBarrier SimpleBarrier::barrier; |
95 | |
96 | class TestLargeObjCache: public SimpleBarrier { |
97 | public: |
98 | static int largeMemSizes[LARGE_MEM_SIZES_NUM]; |
99 | |
100 | TestLargeObjCache( ) {} |
101 | |
102 | void operator()( int /*mynum*/ ) const { |
103 | AllocInfo allocs[LARGE_MEM_SIZES_NUM]; |
104 | |
105 | // push to maximal cache limit |
106 | for (int i=0; i<2; i++) { |
107 | const int sizes[] = { MByte/sizeof(int), |
108 | (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) }; |
109 | for (int q=0; q<2; q++) { |
110 | size_t curr = 0; |
111 | for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++) |
112 | new (allocs+curr) AllocInfo(sizes[q]); |
113 | |
114 | for (size_t j=0; j<curr; j++) { |
115 | allocs[j].check(); |
116 | allocs[j].clear(); |
117 | } |
118 | } |
119 | } |
120 | |
121 | barrier.wait(); |
122 | |
123 | // check caching correctness |
124 | for (int i=0; i<1000; i++) { |
125 | size_t curr = 0; |
126 | for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++) |
127 | new (allocs+curr) AllocInfo(largeMemSizes[j]); |
128 | |
129 | new (allocs+curr) |
130 | AllocInfo((int)(4*minLargeObjectSize + |
131 | 2*minLargeObjectSize*(1.*rand()/RAND_MAX))); |
132 | curr++; |
133 | |
134 | for (size_t j=0; j<curr; j++) { |
135 | allocs[j].check(); |
136 | allocs[j].clear(); |
137 | } |
138 | } |
139 | } |
140 | }; |
141 | |
142 | int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM]; |
143 | |
144 | void TestLargeObjectCache() |
145 | { |
146 | for (int i=0; i<LARGE_MEM_SIZES_NUM; i++) |
147 | TestLargeObjCache::largeMemSizes[i] = |
148 | (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX)); |
149 | |
150 | for( int p=MaxThread; p>=MinThread; --p ) { |
151 | TestLargeObjCache::initBarrier( p ); |
152 | NativeParallelFor( p, TestLargeObjCache() ); |
153 | } |
154 | } |
155 | |
156 | #if MALLOC_CHECK_RECURSION |
157 | |
158 | class TestStartupAlloc: public SimpleBarrier { |
159 | struct TestBlock { |
160 | void *ptr; |
161 | size_t sz; |
162 | }; |
163 | static const int ITERS = 100; |
164 | public: |
165 | TestStartupAlloc() {} |
166 | void operator()(int) const { |
167 | TestBlock blocks1[ITERS], blocks2[ITERS]; |
168 | |
169 | barrier.wait(); |
170 | |
171 | for (int i=0; i<ITERS; i++) { |
172 | blocks1[i].sz = rand() % minLargeObjectSize; |
173 | blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz); |
174 | ASSERT(blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz |
175 | && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*), NULL); |
176 | memset(blocks1[i].ptr, i, blocks1[i].sz); |
177 | } |
178 | for (int i=0; i<ITERS; i++) { |
179 | blocks2[i].sz = rand() % minLargeObjectSize; |
180 | blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz); |
181 | ASSERT(blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz |
182 | && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*), NULL); |
183 | memset(blocks2[i].ptr, i, blocks2[i].sz); |
184 | |
185 | for (size_t j=0; j<blocks1[i].sz; j++) |
186 | ASSERT(*((char*)blocks1[i].ptr+j) == i, NULL); |
187 | Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize); |
188 | ((StartupBlock *)block)->free(blocks1[i].ptr); |
189 | } |
190 | for (int i=ITERS-1; i>=0; i--) { |
191 | for (size_t j=0; j<blocks2[i].sz; j++) |
192 | ASSERT(*((char*)blocks2[i].ptr+j) == i, NULL); |
193 | Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize); |
194 | ((StartupBlock *)block)->free(blocks2[i].ptr); |
195 | } |
196 | } |
197 | }; |
198 | |
199 | #endif /* MALLOC_CHECK_RECURSION */ |
200 | |
201 | #include <deque> |
202 | |
203 | template<int ITERS> |
204 | class BackRefWork: NoAssign { |
205 | struct TestBlock { |
206 | BackRefIdx idx; |
207 | char data; |
208 | TestBlock(BackRefIdx idx_) : idx(idx_) {} |
209 | }; |
210 | public: |
211 | BackRefWork() {} |
212 | void operator()(int) const { |
213 | size_t cnt; |
214 | // it's important to not invalidate pointers to the contents of the container |
215 | std::deque<TestBlock> blocks; |
216 | |
217 | // for ITERS==0 consume all available backrefs |
218 | for (cnt=0; !ITERS || cnt<ITERS; cnt++) { |
219 | BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false); |
220 | if (idx.isInvalid()) |
221 | break; |
222 | blocks.push_back(TestBlock(idx)); |
223 | setBackRef(blocks.back().idx, &blocks.back().data); |
224 | } |
225 | for (size_t i=0; i<cnt; i++) |
226 | ASSERT((Block*)&blocks[i].data == getBackRef(blocks[i].idx), NULL); |
227 | for (size_t i=cnt; i>0; i--) |
228 | removeBackRef(blocks[i-1].idx); |
229 | } |
230 | }; |
231 | |
232 | class LocalCachesHit: NoAssign { |
233 | // set ITERS to trigger possible leak of backreferences |
234 | // during cleanup on cache overflow and on thread termination |
235 | static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK + |
236 | LocalLOC::LOC_HIGH_MARK); |
237 | public: |
238 | LocalCachesHit() {} |
239 | void operator()(int) const { |
240 | void *objsSmall[ITERS], *objsLarge[ITERS]; |
241 | |
242 | for (int i=0; i<ITERS; i++) { |
243 | objsSmall[i] = scalable_malloc(minLargeObjectSize-1); |
244 | objsLarge[i] = scalable_malloc(minLargeObjectSize); |
245 | } |
246 | for (int i=0; i<ITERS; i++) { |
247 | scalable_free(objsSmall[i]); |
248 | scalable_free(objsLarge[i]); |
249 | } |
250 | } |
251 | }; |
252 | |
253 | static size_t allocatedBackRefCount() |
254 | { |
255 | size_t cnt = 0; |
256 | for (int i=0; i<=backRefMaster->lastUsed; i++) |
257 | cnt += backRefMaster->backRefBl[i]->allocatedCount; |
258 | return cnt; |
259 | } |
260 | |
261 | class TestInvalidBackrefs: public SimpleBarrier { |
262 | #if __ANDROID__ |
263 | // Android requires lower iters due to lack of virtual memory. |
264 | static const int BACKREF_GROWTH_ITERS = 50*1024; |
265 | #else |
266 | static const int BACKREF_GROWTH_ITERS = 200*1024; |
267 | #endif |
268 | |
269 | static tbb::atomic<bool> backrefGrowthDone; |
270 | static void *ptrs[BACKREF_GROWTH_ITERS]; |
271 | public: |
272 | TestInvalidBackrefs() {} |
273 | void operator()(int id) const { |
274 | |
275 | if (!id) { |
276 | backrefGrowthDone = false; |
277 | barrier.wait(); |
278 | |
279 | for (int i=0; i<BACKREF_GROWTH_ITERS; i++) |
280 | ptrs[i] = scalable_malloc(minLargeObjectSize); |
281 | backrefGrowthDone = true; |
282 | for (int i=0; i<BACKREF_GROWTH_ITERS; i++) |
283 | scalable_free(ptrs[i]); |
284 | } else { |
285 | void *p2 = scalable_malloc(minLargeObjectSize-1); |
286 | char *p1 = (char*)scalable_malloc(minLargeObjectSize-1); |
287 | LargeObjectHdr *hdr = |
288 | (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr)); |
289 | hdr->backRefIdx.master = 7; |
290 | hdr->backRefIdx.largeObj = 1; |
291 | hdr->backRefIdx.offset = 2000; |
292 | |
293 | barrier.wait(); |
294 | |
295 | while (!backrefGrowthDone) { |
296 | scalable_free(p2); |
297 | p2 = scalable_malloc(minLargeObjectSize-1); |
298 | } |
299 | scalable_free(p1); |
300 | scalable_free(p2); |
301 | } |
302 | } |
303 | }; |
304 | |
305 | tbb::atomic<bool> TestInvalidBackrefs::backrefGrowthDone; |
306 | void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS]; |
307 | |
308 | void TestBackRef() { |
309 | size_t beforeNumBackRef, afterNumBackRef; |
310 | |
311 | beforeNumBackRef = allocatedBackRefCount(); |
312 | for( int p=MaxThread; p>=MinThread; --p ) |
313 | NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() ); |
314 | afterNumBackRef = allocatedBackRefCount(); |
315 | ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected" ); |
316 | |
317 | // lastUsed marks peak resource consumption. As we allocate below the mark, |
318 | // it must not move up, otherwise there is a resource leak. |
319 | int sustLastUsed = backRefMaster->lastUsed; |
320 | NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() ); |
321 | ASSERT(sustLastUsed == backRefMaster->lastUsed, "backreference leak detected" ); |
322 | |
323 | // check leak of back references while per-thread caches are in use |
324 | // warm up needed to cover bootStrapMalloc call |
325 | NativeParallelFor( 1, LocalCachesHit() ); |
326 | beforeNumBackRef = allocatedBackRefCount(); |
327 | NativeParallelFor( 2, LocalCachesHit() ); |
328 | int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); |
329 | ASSERT(res == TBBMALLOC_OK, NULL); |
330 | afterNumBackRef = allocatedBackRefCount(); |
331 | ASSERT(beforeNumBackRef>=afterNumBackRef, "backreference leak detected" ); |
332 | |
333 | // This is a regression test against race condition between backreference |
334 | // extension and checking invalid BackRefIdx. |
335 | // While detecting is object large or small, scalable_free 1st check for |
336 | // large objects, so there is a chance to prepend small object with |
337 | // seems valid BackRefIdx for large objects, and thus trigger the bug. |
338 | TestInvalidBackrefs::initBarrier(MaxThread); |
339 | NativeParallelFor( MaxThread, TestInvalidBackrefs() ); |
340 | // Consume all available backrefs and check they work correctly. |
341 | // For now test 32-bit machines only, because for 64-bit memory consumption is too high. |
342 | if (sizeof(uintptr_t) == 4) |
343 | NativeParallelFor( MaxThread, BackRefWork<0>() ); |
344 | } |
345 | |
346 | void *getMem(intptr_t /*pool_id*/, size_t &bytes) |
347 | { |
348 | const size_t BUF_SIZE = 8*1024*1024; |
349 | static char space[BUF_SIZE]; |
350 | static size_t pos; |
351 | |
352 | if (pos + bytes > BUF_SIZE) |
353 | return NULL; |
354 | |
355 | void *ret = space + pos; |
356 | pos += bytes; |
357 | |
358 | return ret; |
359 | } |
360 | |
361 | int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/) |
362 | { |
363 | return 0; |
364 | } |
365 | |
366 | struct { |
367 | void *; |
368 | size_t ; |
369 | }; |
370 | |
371 | void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes) |
372 | { |
373 | void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader)); |
374 | void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader)); |
375 | |
376 | MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1; |
377 | hdr->rawPtr = rawPtr; |
378 | hdr->userSize = bytes; |
379 | |
380 | return ret; |
381 | } |
382 | |
383 | int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes) |
384 | { |
385 | MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1; |
386 | ASSERT(bytes == hdr->userSize, "Invalid size in pool callback." ); |
387 | free(hdr->rawPtr); |
388 | |
389 | return 0; |
390 | } |
391 | |
392 | class StressLOCacheWork: NoAssign { |
393 | rml::MemoryPool *my_mallocPool; |
394 | public: |
395 | StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {} |
396 | void operator()(int) const { |
397 | for (size_t sz=minLargeObjectSize; sz<1*1024*1024; |
398 | sz+=LargeObjectCache::LargeBSProps::CacheStep) { |
399 | void *ptr = pool_malloc(my_mallocPool, sz); |
400 | ASSERT(ptr, "Memory was not allocated" ); |
401 | memset(ptr, sz, sz); |
402 | pool_free(my_mallocPool, ptr); |
403 | } |
404 | } |
405 | }; |
406 | |
407 | void TestPools() { |
408 | rml::MemPoolPolicy pol(getMem, putMem); |
409 | size_t beforeNumBackRef, afterNumBackRef; |
410 | |
411 | rml::MemoryPool *pool1; |
412 | rml::MemoryPool *pool2; |
413 | pool_create_v1(0, &pol, &pool1); |
414 | pool_create_v1(0, &pol, &pool2); |
415 | pool_destroy(pool1); |
416 | pool_destroy(pool2); |
417 | |
418 | scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); |
419 | beforeNumBackRef = allocatedBackRefCount(); |
420 | rml::MemoryPool *fixedPool; |
421 | |
422 | pool_create_v1(0, &pol, &fixedPool); |
423 | pol.pAlloc = getMallocMem; |
424 | pol.pFree = putMallocMem; |
425 | pol.granularity = 8; |
426 | rml::MemoryPool *mallocPool; |
427 | |
428 | pool_create_v1(0, &pol, &mallocPool); |
429 | /* check that large object cache (LOC) returns correct size for cached objects |
430 | passBackendSz Byte objects are cached in LOC, but bypassed the backend, so |
431 | memory requested directly from allocation callback. |
432 | nextPassBackendSz Byte objects must fit to another LOC bin, |
433 | so that their allocation/realeasing leads to cache cleanup. |
434 | All this is expecting to lead to releasing of passBackendSz Byte object |
435 | from LOC during LOC cleanup, and putMallocMem checks that returned size |
436 | is correct. |
437 | */ |
438 | const size_t passBackendSz = Backend::maxBinned_HugePage+1, |
439 | anotherLOCBinSz = minLargeObjectSize+1; |
440 | for (int i=0; i<10; i++) { // run long enough to be cached |
441 | void *p = pool_malloc(mallocPool, passBackendSz); |
442 | ASSERT(p, "Memory was not allocated" ); |
443 | pool_free(mallocPool, p); |
444 | } |
445 | // run long enough to passBackendSz allocation was cleaned from cache |
446 | // and returned back to putMallocMem for size checking |
447 | for (int i=0; i<1000; i++) { |
448 | void *p = pool_malloc(mallocPool, anotherLOCBinSz); |
449 | ASSERT(p, "Memory was not allocated" ); |
450 | pool_free(mallocPool, p); |
451 | } |
452 | |
453 | void *smallObj = pool_malloc(fixedPool, 10); |
454 | ASSERT(smallObj, "Memory was not allocated" ); |
455 | memset(smallObj, 1, 10); |
456 | void *ptr = pool_malloc(fixedPool, 1024); |
457 | ASSERT(ptr, "Memory was not allocated" ); |
458 | memset(ptr, 1, 1024); |
459 | void *largeObj = pool_malloc(fixedPool, minLargeObjectSize); |
460 | ASSERT(largeObj, "Memory was not allocated" ); |
461 | memset(largeObj, 1, minLargeObjectSize); |
462 | ptr = pool_malloc(fixedPool, minLargeObjectSize); |
463 | ASSERT(ptr, "Memory was not allocated" ); |
464 | memset(ptr, minLargeObjectSize, minLargeObjectSize); |
465 | pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations |
466 | pool_free(fixedPool, smallObj); |
467 | pool_free(fixedPool, largeObj); |
468 | |
469 | // provoke large object cache cleanup and hope no leaks occurs |
470 | for( int p=MaxThread; p>=MinThread; --p ) |
471 | NativeParallelFor( p, StressLOCacheWork(mallocPool) ); |
472 | pool_destroy(mallocPool); |
473 | pool_destroy(fixedPool); |
474 | |
475 | scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL); |
476 | afterNumBackRef = allocatedBackRefCount(); |
477 | ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected" ); |
478 | |
479 | { |
480 | // test usedSize/cachedSize and LOC bitmask correctness |
481 | void *p[5]; |
482 | pool_create_v1(0, &pol, &mallocPool); |
483 | const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; |
484 | const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; |
485 | p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep); |
486 | for (int i=0; i<10; i++) { |
487 | p[0] = pool_malloc(mallocPool, minLargeObjectSize); |
488 | p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep); |
489 | pool_free(mallocPool, p[0]); |
490 | pool_free(mallocPool, p[1]); |
491 | } |
492 | ASSERT(loc->getUsedSize(), NULL); |
493 | pool_free(mallocPool, p[3]); |
494 | ASSERT(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep), NULL); |
495 | const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize(); |
496 | ASSERT(loc->getUsedSize() <= maxLocalLOCSize, NULL); |
497 | for (int i=0; i<3; i++) |
498 | p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); |
499 | size_t currUser = loc->getUsedSize(); |
500 | ASSERT(!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep), NULL); |
501 | p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep); |
502 | ASSERT(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep, NULL); |
503 | pool_free(mallocPool, p[4]); |
504 | ASSERT(loc->getUsedSize() <= currUser+maxLocalLOCSize, NULL); |
505 | pool_reset(mallocPool); |
506 | ASSERT(!loc->getLOCSize() && !loc->getUsedSize(), NULL); |
507 | pool_destroy(mallocPool); |
508 | } |
509 | // To test LOC we need bigger lists than released by current LocalLOC |
510 | // in production code. Create special LocalLOC. |
511 | { |
512 | LocalLOCImpl<2, 20> lLOC; |
513 | pool_create_v1(0, &pol, &mallocPool); |
514 | rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool; |
515 | const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc; |
516 | const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep; |
517 | for (int i=0; i<22; i++) { |
518 | void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); |
519 | bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); |
520 | ASSERT(ret, NULL); |
521 | |
522 | o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep); |
523 | ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool); |
524 | ASSERT(ret, NULL); |
525 | } |
526 | lLOC.externalCleanup(mPool); |
527 | ASSERT(!loc->getUsedSize(), NULL); |
528 | |
529 | pool_destroy(mallocPool); |
530 | } |
531 | } |
532 | |
533 | void TestObjectRecognition() { |
534 | size_t = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr); |
535 | unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize |
536 | size_t obtainedSize; |
537 | |
538 | ASSERT(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx" ); |
539 | ASSERT(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size" ); |
540 | |
541 | void* mem = scalable_malloc(2*slabSize); |
542 | ASSERT(mem, "Memory was not allocated" ); |
543 | Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize); |
544 | falseBlock->objectSize = falseObjectSize; |
545 | char* falseSO = (char*)falseBlock + falseObjectSize*7; |
546 | ASSERT(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big" ); |
547 | |
548 | void* bufferLOH = scalable_malloc(2*slabSize + headersSize); |
549 | ASSERT(bufferLOH, "Memory was not allocated" ); |
550 | LargeObjectHdr* falseLO = |
551 | (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize); |
552 | LargeObjectHdr* = (LargeObjectHdr*)falseLO-1; |
553 | headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH; |
554 | headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize; |
555 | headerLO->memoryBlock->objectSize = slabSize + headersSize; |
556 | headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true); |
557 | setBackRef(headerLO->backRefIdx, headerLO); |
558 | ASSERT(scalable_msize(falseLO) == slabSize + headersSize, |
559 | "Error in test: LOH falsification failed" ); |
560 | removeBackRef(headerLO->backRefIdx); |
561 | |
562 | const int NUM_OF_IDX = BR_MAX_CNT+2; |
563 | BackRefIdx idxs[NUM_OF_IDX]; |
564 | for (int cnt=0; cnt<2; cnt++) { |
565 | for (int master = -10; master<10; master++) { |
566 | falseBlock->backRefIdx.master = (uint16_t)master; |
567 | headerLO->backRefIdx.master = (uint16_t)master; |
568 | |
569 | for (int bl = -10; bl<BR_MAX_CNT+10; bl++) { |
570 | falseBlock->backRefIdx.offset = (uint16_t)bl; |
571 | headerLO->backRefIdx.offset = (uint16_t)bl; |
572 | |
573 | for (int largeObj = 0; largeObj<2; largeObj++) { |
574 | falseBlock->backRefIdx.largeObj = largeObj; |
575 | headerLO->backRefIdx.largeObj = largeObj; |
576 | |
577 | obtainedSize = __TBB_malloc_safer_msize(falseSO, NULL); |
578 | ASSERT(obtainedSize==0, "Incorrect pointer accepted" ); |
579 | obtainedSize = __TBB_malloc_safer_msize(falseLO, NULL); |
580 | ASSERT(obtainedSize==0, "Incorrect pointer accepted" ); |
581 | } |
582 | } |
583 | } |
584 | if (cnt == 1) { |
585 | for (int i=0; i<NUM_OF_IDX; i++) |
586 | removeBackRef(idxs[i]); |
587 | break; |
588 | } |
589 | for (int i=0; i<NUM_OF_IDX; i++) { |
590 | idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false); |
591 | setBackRef(idxs[i], NULL); |
592 | } |
593 | } |
594 | char *smallPtr = (char*)scalable_malloc(falseObjectSize); |
595 | obtainedSize = __TBB_malloc_safer_msize(smallPtr, NULL); |
596 | ASSERT(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?" ); |
597 | scalable_free(smallPtr); |
598 | |
599 | obtainedSize = __TBB_malloc_safer_msize(mem, NULL); |
600 | ASSERT(obtainedSize>=2*slabSize, "Correct pointer not accepted?" ); |
601 | scalable_free(mem); |
602 | scalable_free(bufferLOH); |
603 | } |
604 | |
605 | class TestBackendWork: public SimpleBarrier { |
606 | struct TestBlock { |
607 | intptr_t data; |
608 | BackRefIdx idx; |
609 | }; |
610 | static const int ITERS = 20; |
611 | |
612 | rml::internal::Backend *backend; |
613 | public: |
614 | TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {} |
615 | void operator()(int) const { |
616 | barrier.wait(); |
617 | |
618 | for (int i=0; i<ITERS; i++) { |
619 | BlockI *slabBlock = backend->getSlabBlock(1); |
620 | ASSERT(slabBlock, "Memory was not allocated" ); |
621 | uintptr_t prevBlock = (uintptr_t)slabBlock; |
622 | backend->putSlabBlock(slabBlock); |
623 | |
624 | LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024); |
625 | ASSERT(largeBlock, "Memory was not allocated" ); |
626 | ASSERT((uintptr_t)largeBlock != prevBlock, |
627 | "Large block cannot be reused from slab memory, only in fixed_pool case." ); |
628 | backend->putLargeBlock(largeBlock); |
629 | } |
630 | } |
631 | }; |
632 | |
633 | void TestBackend() |
634 | { |
635 | rml::MemPoolPolicy pol(getMallocMem, putMallocMem); |
636 | rml::MemoryPool *mPool; |
637 | pool_create_v1(0, &pol, &mPool); |
638 | rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool; |
639 | rml::internal::Backend *backend = &ePool->backend; |
640 | |
641 | for( int p=MaxThread; p>=MinThread; --p ) { |
642 | // regression test against an race condition in backend synchronization, |
643 | // triggered only when WhiteboxTestingYield() call yields |
644 | for (int i=0; i<100; i++) { |
645 | TestBackendWork::initBarrier(p); |
646 | NativeParallelFor( p, TestBackendWork(backend) ); |
647 | } |
648 | } |
649 | |
650 | BlockI *block = backend->getSlabBlock(1); |
651 | ASSERT(block, "Memory was not allocated" ); |
652 | backend->putSlabBlock(block); |
653 | |
654 | // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated. |
655 | const size_t memSize0 = backend->getTotalMemSize(); |
656 | LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte); |
657 | ASSERT( lmb, ASSERT_TEXT ); |
658 | |
659 | const size_t memSize1 = backend->getTotalMemSize(); |
660 | ASSERT( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." ); |
661 | |
662 | backend->putLargeBlock(lmb); |
663 | const size_t memSize2 = backend->getTotalMemSize(); |
664 | ASSERT( memSize2 == memSize0, "The backend has not decreased the amount of using memory." ); |
665 | |
666 | pool_destroy(mPool); |
667 | } |
668 | |
669 | void TestBitMask() |
670 | { |
671 | BitMaskMin<256> mask; |
672 | |
673 | mask.reset(); |
674 | mask.set(10, 1); |
675 | mask.set(5, 1); |
676 | mask.set(1, 1); |
677 | ASSERT(mask.getMinTrue(2) == 5, NULL); |
678 | |
679 | mask.reset(); |
680 | mask.set(0, 1); |
681 | mask.set(64, 1); |
682 | mask.set(63, 1); |
683 | mask.set(200, 1); |
684 | mask.set(255, 1); |
685 | ASSERT(mask.getMinTrue(0) == 0, NULL); |
686 | ASSERT(mask.getMinTrue(1) == 63, NULL); |
687 | ASSERT(mask.getMinTrue(63) == 63, NULL); |
688 | ASSERT(mask.getMinTrue(64) == 64, NULL); |
689 | ASSERT(mask.getMinTrue(101) == 200, NULL); |
690 | ASSERT(mask.getMinTrue(201) == 255, NULL); |
691 | mask.set(255, 0); |
692 | ASSERT(mask.getMinTrue(201) == -1, NULL); |
693 | } |
694 | |
695 | size_t getMemSize() |
696 | { |
697 | return defaultMemPool->extMemPool.backend.getTotalMemSize(); |
698 | } |
699 | |
700 | class CheckNotCached { |
701 | static size_t memSize; |
702 | public: |
703 | void operator() () const { |
704 | int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); |
705 | ASSERT(res == TBBMALLOC_OK, NULL); |
706 | if (memSize==(size_t)-1) { |
707 | memSize = getMemSize(); |
708 | } else { |
709 | ASSERT(getMemSize() == memSize, NULL); |
710 | memSize=(size_t)-1; |
711 | } |
712 | } |
713 | }; |
714 | |
715 | size_t CheckNotCached::memSize = (size_t)-1; |
716 | |
717 | class RunTestHeapLimit: public SimpleBarrier { |
718 | public: |
719 | void operator()( int /*mynum*/ ) const { |
720 | // Provoke bootstrap heap initialization before recording memory size. |
721 | // NOTE: The initialization should be processed only with a "large" |
722 | // object. Since the "small" object allocation lead to blocking of a |
723 | // slab as an active block and it is impossible to release it with |
724 | // foreign thread. |
725 | scalable_free(scalable_malloc(minLargeObjectSize)); |
726 | barrier.wait(CheckNotCached()); |
727 | for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024) |
728 | scalable_free(scalable_malloc(n)); |
729 | barrier.wait(CheckNotCached()); |
730 | } |
731 | }; |
732 | |
733 | void TestHeapLimit() |
734 | { |
735 | if(!isMallocInitialized()) doInitialization(); |
736 | // tiny limit to stop caching |
737 | int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); |
738 | ASSERT(res == TBBMALLOC_OK, NULL); |
739 | // Provoke bootstrap heap initialization before recording memory size. |
740 | scalable_free(scalable_malloc(8)); |
741 | size_t n, sizeBefore = getMemSize(); |
742 | |
743 | // Try to provoke call to OS for memory to check that |
744 | // requests are not fulfilled from caches. |
745 | // Single call is not enough here because of backend fragmentation. |
746 | for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) { |
747 | void *p = scalable_malloc(n); |
748 | bool leave = (sizeBefore != getMemSize()); |
749 | scalable_free(p); |
750 | if (leave) |
751 | break; |
752 | ASSERT(sizeBefore == getMemSize(), "No caching expected" ); |
753 | } |
754 | ASSERT(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, " |
755 | "is some internal cache still used?" ); |
756 | |
757 | for( int p=MaxThread; p>=MinThread; --p ) { |
758 | RunTestHeapLimit::initBarrier( p ); |
759 | NativeParallelFor( p, RunTestHeapLimit() ); |
760 | } |
761 | // it's try to match limit as well as set limit, so call here |
762 | res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1); |
763 | ASSERT(res == TBBMALLOC_OK, NULL); |
764 | size_t m = getMemSize(); |
765 | ASSERT(sizeBefore == m, NULL); |
766 | // restore default |
767 | res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0); |
768 | ASSERT(res == TBBMALLOC_OK, NULL); |
769 | } |
770 | |
771 | void checkNoHugePages() |
772 | { |
773 | ASSERT(!hugePages.isEnabled, "scalable_allocation_mode " |
774 | "must have priority over environment variable" ); |
775 | } |
776 | |
777 | /*---------------------------------------------------------------------------*/ |
778 | // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command. |
779 | // The idea is to allocate and deallocate a set of objects randomly in parallel. |
780 | // For large sizes (16K), it forces conflicts in backend during coalescing. |
781 | // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs. |
782 | // Global cleanup should process orphaned slabs and the queue of postponed coalescing |
783 | // requests, otherwise it will not be able to unmap all unused memory. |
784 | |
785 | const int num_allocs = 10*1024; |
786 | void *ptrs[num_allocs]; |
787 | tbb::atomic<int> alloc_counter; |
788 | |
789 | inline void multiThreadAlloc(size_t alloc_size) { |
790 | for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) { |
791 | ptrs[i] = scalable_malloc( alloc_size ); |
792 | ASSERT( ptrs[i] != NULL, "scalable_malloc returned zero." ); |
793 | } |
794 | } |
795 | inline void crossThreadDealloc() { |
796 | for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) { |
797 | if (i < num_allocs) scalable_free( ptrs[i] ); |
798 | } |
799 | } |
800 | |
801 | template<int AllocSize> |
802 | struct TestCleanAllBuffersBody : public SimpleBarrier { |
803 | void operator() ( int ) const { |
804 | barrier.wait(); |
805 | multiThreadAlloc(AllocSize); |
806 | barrier.wait(); |
807 | crossThreadDealloc(); |
808 | } |
809 | }; |
810 | |
811 | template<int AllocSize> |
812 | void TestCleanAllBuffers() { |
813 | const int num_threads = 8; |
814 | // Clean up if something was allocated before the test |
815 | scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); |
816 | |
817 | size_t memory_in_use_before = getMemSize(); |
818 | alloc_counter = 0; |
819 | TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads); |
820 | |
821 | NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>()); |
822 | // TODO: reproduce the bug conditions more reliably |
823 | if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree == NULL ) |
824 | REMARK( "Warning: The queue of postponed coalescing requests is empty. Unable to create the condition for bug reproduction.\n" ); |
825 | int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); |
826 | ASSERT( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." ); |
827 | size_t memory_in_use_after = getMemSize(); |
828 | |
829 | size_t memory_leak = memory_in_use_after - memory_in_use_before; |
830 | REMARK( "memory_in_use_before = %ld\nmemory_in_use_after = %ld\n" , memory_in_use_before, memory_in_use_after ); |
831 | ASSERT( memory_leak == 0, "Cleanup was unable to release all allocated memory." ); |
832 | } |
833 | |
834 | //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks. |
835 | //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block. |
836 | struct TestCleanThreadBuffersBody : public SimpleBarrier { |
837 | void operator() ( int ) const { |
838 | barrier.wait(); |
839 | multiThreadAlloc(2*1024); |
840 | barrier.wait(); |
841 | crossThreadDealloc(); |
842 | barrier.wait(); |
843 | int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,0); |
844 | ASSERT(result == TBBMALLOC_OK, "Per-thread clean request has not cleaned anything." ); |
845 | |
846 | // Check that TLS was cleaned fully |
847 | TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false); |
848 | for (int i = 0; i < numBlockBinLimit; i++) { |
849 | ASSERT(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned." ); |
850 | } |
851 | ASSERT(!(tlsCurr->lloc.head), "Local LOC was not cleaned." ); |
852 | ASSERT(!(tlsCurr->freeSlabBlocks.head), "Free Block pool was not cleaned." ); |
853 | } |
854 | }; |
855 | |
856 | void TestCleanThreadBuffers() { |
857 | const int num_threads = 8; |
858 | // Clean up if something was allocated before the test |
859 | scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0); |
860 | |
861 | alloc_counter = 0; |
862 | TestCleanThreadBuffersBody::initBarrier(num_threads); |
863 | NativeParallelFor(num_threads, TestCleanThreadBuffersBody()); |
864 | } |
865 | |
866 | /*---------------------------------------------------------------------------*/ |
867 | /*------------------------- Large Object Cache tests ------------------------*/ |
868 | #if _MSC_VER==1600 || _MSC_VER==1500 |
869 | // ignore C4275: non dll-interface class 'stdext::exception' used as |
870 | // base for dll-interface class 'std::bad_cast' |
871 | #pragma warning (disable: 4275) |
872 | #endif |
873 | #include <vector> |
874 | #include <list> |
875 | #include __TBB_STD_SWAP_HEADER |
876 | |
877 | // default constructor of CacheBin |
878 | template<typename Props> |
879 | rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {} |
880 | |
881 | template<typename Props> |
882 | class CacheBinModel { |
883 | |
884 | typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType; |
885 | |
886 | // The emulated cache bin. |
887 | CacheBinType cacheBinModel; |
888 | // The reference to real cache bin inside the large object cache. |
889 | CacheBinType &cacheBin; |
890 | |
891 | const size_t size; |
892 | |
893 | // save only current time |
894 | std::list<uintptr_t> objects; |
895 | |
896 | void doCleanup() { |
897 | if ( cacheBinModel.cachedSize > Props::TooLargeFactor*cacheBinModel.usedSize ) tooLargeLOC++; |
898 | else tooLargeLOC = 0; |
899 | |
900 | if (tooLargeLOC>3 && cacheBinModel.ageThreshold) |
901 | cacheBinModel.ageThreshold = (cacheBinModel.ageThreshold + cacheBinModel.meanHitRange)/2; |
902 | |
903 | uintptr_t currTime = cacheCurrTime; |
904 | while (!objects.empty() && (intptr_t)(currTime - objects.front()) > cacheBinModel.ageThreshold) { |
905 | cacheBinModel.cachedSize -= size; |
906 | cacheBinModel.lastCleanedAge = objects.front(); |
907 | objects.pop_front(); |
908 | } |
909 | |
910 | cacheBinModel.oldest = objects.empty() ? 0 : objects.front(); |
911 | } |
912 | |
913 | public: |
914 | CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) { |
915 | cacheBinModel.oldest = cacheBin.oldest; |
916 | cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge; |
917 | cacheBinModel.ageThreshold = cacheBin.ageThreshold; |
918 | cacheBinModel.usedSize = cacheBin.usedSize; |
919 | cacheBinModel.cachedSize = cacheBin.cachedSize; |
920 | cacheBinModel.meanHitRange = cacheBin.meanHitRange; |
921 | cacheBinModel.lastGet = cacheBin.lastGet; |
922 | } |
923 | void get() { |
924 | uintptr_t currTime = ++cacheCurrTime; |
925 | |
926 | if ( objects.empty() ) { |
927 | const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet; |
928 | if ( ( cacheBinModel.ageThreshold && sinceLastGet > Props::LongWaitFactor*cacheBinModel.ageThreshold ) || |
929 | ( cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor*(cacheBinModel.lastCleanedAge - cacheBinModel.lastGet) ) ) |
930 | cacheBinModel.lastCleanedAge = cacheBinModel.ageThreshold = 0; |
931 | |
932 | if (cacheBinModel.lastCleanedAge) |
933 | cacheBinModel.ageThreshold = Props::OnMissFactor*(currTime - cacheBinModel.lastCleanedAge); |
934 | } else { |
935 | uintptr_t obj_age = objects.back(); |
936 | objects.pop_back(); |
937 | if ( objects.empty() ) cacheBinModel.oldest = 0; |
938 | |
939 | intptr_t hitRange = currTime - obj_age; |
940 | cacheBinModel.meanHitRange = cacheBinModel.meanHitRange? (cacheBinModel.meanHitRange + hitRange)/2 : hitRange; |
941 | |
942 | cacheBinModel.cachedSize -= size; |
943 | } |
944 | |
945 | cacheBinModel.usedSize += size; |
946 | cacheBinModel.lastGet = currTime; |
947 | |
948 | if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup(); |
949 | } |
950 | |
951 | void putList( int num ) { |
952 | uintptr_t currTime = cacheCurrTime; |
953 | cacheCurrTime += num; |
954 | |
955 | cacheBinModel.usedSize -= num*size; |
956 | |
957 | bool cleanUpNeeded = false; |
958 | if ( !cacheBinModel.lastCleanedAge ) { |
959 | cacheBinModel.lastCleanedAge = ++currTime; |
960 | cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; |
961 | num--; |
962 | } |
963 | |
964 | for ( int i=1; i<=num; ++i ) { |
965 | currTime+=1; |
966 | cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0; |
967 | if ( objects.empty() ) |
968 | cacheBinModel.oldest = currTime; |
969 | objects.push_back(currTime); |
970 | } |
971 | |
972 | cacheBinModel.cachedSize += num*size; |
973 | |
974 | if ( cleanUpNeeded ) doCleanup(); |
975 | } |
976 | |
977 | void check() { |
978 | ASSERT(cacheBinModel.oldest == cacheBin.oldest, ASSERT_TEXT); |
979 | ASSERT(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge, ASSERT_TEXT); |
980 | ASSERT(cacheBinModel.ageThreshold == cacheBin.ageThreshold, ASSERT_TEXT); |
981 | ASSERT(cacheBinModel.usedSize == cacheBin.usedSize, ASSERT_TEXT); |
982 | ASSERT(cacheBinModel.cachedSize == cacheBin.cachedSize, ASSERT_TEXT); |
983 | ASSERT(cacheBinModel.meanHitRange == cacheBin.meanHitRange, ASSERT_TEXT); |
984 | ASSERT(cacheBinModel.lastGet == cacheBin.lastGet, ASSERT_TEXT); |
985 | } |
986 | |
987 | static uintptr_t cacheCurrTime; |
988 | static intptr_t tooLargeLOC; |
989 | }; |
990 | |
991 | template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime; |
992 | template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC; |
993 | |
994 | template <typename Scenario> |
995 | void LOCModelTester() { |
996 | defaultMemPool->extMemPool.loc.cleanAll(); |
997 | defaultMemPool->extMemPool.loc.reset(); |
998 | |
999 | const size_t size = 16 * 1024; |
1000 | const size_t = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); |
1001 | const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); |
1002 | const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize ); |
1003 | |
1004 | CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime; |
1005 | CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC; |
1006 | CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize); |
1007 | |
1008 | Scenario scen; |
1009 | for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) { |
1010 | if ( lmb ) { |
1011 | int num=1; |
1012 | for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1; |
1013 | defaultMemPool->extMemPool.freeLargeObject(lmb); |
1014 | cacheBinModel.putList(num); |
1015 | } else { |
1016 | scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize)); |
1017 | cacheBinModel.get(); |
1018 | } |
1019 | |
1020 | cacheBinModel.check(); |
1021 | } |
1022 | } |
1023 | |
1024 | class TestBootstrap { |
1025 | bool allocating; |
1026 | std::vector<rml::internal::LargeMemoryBlock*> lmbArray; |
1027 | public: |
1028 | TestBootstrap() : allocating(true) {} |
1029 | |
1030 | rml::internal::LargeMemoryBlock* next() { |
1031 | if ( allocating ) |
1032 | return NULL; |
1033 | if ( !lmbArray.empty() ) { |
1034 | rml::internal::LargeMemoryBlock *ret = lmbArray.back(); |
1035 | lmbArray.pop_back(); |
1036 | return ret; |
1037 | } |
1038 | return (rml::internal::LargeMemoryBlock*)-1; |
1039 | } |
1040 | |
1041 | void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { |
1042 | lmb->next = NULL; |
1043 | lmbArray.push_back(lmb); |
1044 | if ( lmbArray.size() == 1000 ) allocating = false; |
1045 | } |
1046 | }; |
1047 | |
1048 | class TestRandom { |
1049 | std::vector<rml::internal::LargeMemoryBlock*> lmbArray; |
1050 | int numOps; |
1051 | public: |
1052 | TestRandom() : numOps(100000) { |
1053 | srand(1234); |
1054 | } |
1055 | |
1056 | rml::internal::LargeMemoryBlock* next() { |
1057 | if ( numOps-- ) { |
1058 | if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 ) |
1059 | return NULL; |
1060 | size_t ind = rand()%lmbArray.size(); |
1061 | if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]); |
1062 | rml::internal::LargeMemoryBlock *lmb = lmbArray.back(); |
1063 | lmbArray.pop_back(); |
1064 | return lmb; |
1065 | } |
1066 | return (rml::internal::LargeMemoryBlock*)-1; |
1067 | } |
1068 | |
1069 | void saveLmb( rml::internal::LargeMemoryBlock *lmb ) { |
1070 | lmb->next = NULL; |
1071 | lmbArray.push_back(lmb); |
1072 | } |
1073 | }; |
1074 | |
1075 | class TestCollapsingMallocFree : public SimpleBarrier { |
1076 | public: |
1077 | static const int NUM_ALLOCS = 100000; |
1078 | const int num_threads; |
1079 | |
1080 | TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) { |
1081 | initBarrier( num_threads ); |
1082 | } |
1083 | |
1084 | void operator() ( int ) const { |
1085 | const size_t size = 16 * 1024; |
1086 | const size_t = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); |
1087 | const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); |
1088 | |
1089 | barrier.wait(); |
1090 | for ( int i=0; i<NUM_ALLOCS; ++i ) { |
1091 | defaultMemPool->extMemPool.freeLargeObject( |
1092 | defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) ); |
1093 | } |
1094 | } |
1095 | |
1096 | void check() { |
1097 | ASSERT( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed, ASSERT_TEXT ); |
1098 | ASSERT( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." ); |
1099 | } |
1100 | }; |
1101 | |
1102 | class TestCollapsingBootstrap : public SimpleBarrier { |
1103 | class CheckNumAllocs { |
1104 | const int num_threads; |
1105 | public: |
1106 | CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {} |
1107 | void operator()() const { |
1108 | ASSERT( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS, ASSERT_TEXT ); |
1109 | ASSERT( tbbmalloc_whitebox::locPutProcessed == 0, ASSERT_TEXT ); |
1110 | } |
1111 | }; |
1112 | public: |
1113 | static const int NUM_ALLOCS = 1000; |
1114 | const int num_threads; |
1115 | |
1116 | TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) { |
1117 | initBarrier( num_threads ); |
1118 | } |
1119 | |
1120 | void operator() ( int ) const { |
1121 | const size_t size = 16 * 1024; |
1122 | size_t = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr); |
1123 | size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment); |
1124 | |
1125 | barrier.wait(); |
1126 | rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS]; |
1127 | for ( int i=0; i<NUM_ALLOCS; ++i ) |
1128 | lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); |
1129 | |
1130 | barrier.wait(CheckNumAllocs(num_threads)); |
1131 | for ( int i=0; i<NUM_ALLOCS; ++i ) |
1132 | defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] ); |
1133 | } |
1134 | |
1135 | void check() { |
1136 | ASSERT( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed, ASSERT_TEXT ); |
1137 | ASSERT( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS, ASSERT_TEXT ); |
1138 | } |
1139 | }; |
1140 | |
1141 | template <typename Scenario> |
1142 | void LOCCollapsingTester( int num_threads ) { |
1143 | tbbmalloc_whitebox::locGetProcessed = 0; |
1144 | tbbmalloc_whitebox::locPutProcessed = 0; |
1145 | defaultMemPool->extMemPool.loc.cleanAll(); |
1146 | defaultMemPool->extMemPool.loc.reset(); |
1147 | |
1148 | Scenario scen(num_threads); |
1149 | NativeParallelFor(num_threads, scen); |
1150 | |
1151 | scen.check(); |
1152 | } |
1153 | |
1154 | void TestLOC() { |
1155 | LOCModelTester<TestBootstrap>(); |
1156 | LOCModelTester<TestRandom>(); |
1157 | |
1158 | const int num_threads = 16; |
1159 | LOCCollapsingTester<TestCollapsingBootstrap>( num_threads ); |
1160 | if ( num_threads > 1 ) { |
1161 | REMARK( "num_threads = %d\n" , num_threads ); |
1162 | LOCCollapsingTester<TestCollapsingMallocFree>( num_threads ); |
1163 | } else { |
1164 | REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n" , num_threads ); |
1165 | } |
1166 | } |
1167 | /*---------------------------------------------------------------------------*/ |
1168 | |
1169 | void *findCacheLine(void *p) { |
1170 | return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize); |
1171 | } |
1172 | |
1173 | // test that internals of Block are at expected cache lines |
1174 | void TestSlabAlignment() { |
1175 | const size_t min_sz = 8; |
1176 | const int space = 2*16*1024; // fill at least 2 slabs |
1177 | void *pointers[space / min_sz]; // the worst case is min_sz byte object |
1178 | |
1179 | for (size_t sz = min_sz; sz <= 64; sz *= 2) { |
1180 | for (size_t i = 0; i < space/sz; i++) { |
1181 | pointers[i] = scalable_malloc(sz); |
1182 | Block *block = (Block *)alignDown(pointers[i], slabSize); |
1183 | MALLOC_ASSERT(findCacheLine(&block->isFull) != findCacheLine(pointers[i]), |
1184 | "A user object must not share a cache line with slab control structures." ); |
1185 | MALLOC_ASSERT(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable), |
1186 | "GlobalBlockFields and LocalBlockFields must be on different cache lines." ); |
1187 | } |
1188 | for (size_t i = 0; i < space/sz; i++) |
1189 | scalable_free(pointers[i]); |
1190 | } |
1191 | } |
1192 | |
1193 | #include "harness_memory.h" |
1194 | |
1195 | // TODO: Consider adding Huge Pages support on macOS (special mmap flag). |
1196 | // Transparent Huge pages support could be enabled by different system parsing mechanism, |
1197 | // because there is no /proc/meminfo on macOS |
1198 | #if __linux__ |
1199 | void TestTHP() { |
1200 | // Get backend from default memory pool |
1201 | rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend); |
1202 | |
1203 | // Configure malloc to use huge pages |
1204 | scalable_allocation_mode(USE_HUGE_PAGES, 1); |
1205 | MALLOC_ASSERT(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode" ); |
1206 | |
1207 | const int HUGE_PAGE_SIZE = 2 * 1024 * 1024; |
1208 | |
1209 | // allocCount transparent huge pages should be allocated |
1210 | const int allocCount = 10; |
1211 | |
1212 | // Allocate huge page aligned memory regions to track system |
1213 | // counters for transparent huge pages |
1214 | void* allocPtrs[allocCount]; |
1215 | |
1216 | // Wait for the system to update process memory info files after other tests |
1217 | Harness::Sleep(4000); |
1218 | |
1219 | // Parse system info regarding current THP status |
1220 | size_t currentSystemTHPCount = getSystemTHPCount(); |
1221 | size_t currentSystemTHPAllocatedSize = getSystemTHPAllocatedSize(); |
1222 | |
1223 | for (int i = 0; i < allocCount; i++) { |
1224 | // Allocation size have to be aligned on page size |
1225 | size_t allocSize = HUGE_PAGE_SIZE - (i * 1000); |
1226 | |
1227 | // Map memory |
1228 | allocPtrs[i] = backend->allocRawMem(allocSize); |
1229 | |
1230 | MALLOC_ASSERT(allocPtrs[i], "Allocation not succeeded." ); |
1231 | MALLOC_ASSERT(allocSize == HUGE_PAGE_SIZE, |
1232 | "Allocation size have to be aligned on Huge Page size internally." ); |
1233 | |
1234 | // First touch policy - no real pages allocated by OS without accessing the region |
1235 | memset(allocPtrs[i], 1, allocSize); |
1236 | |
1237 | MALLOC_ASSERT(isAligned(allocPtrs[i], HUGE_PAGE_SIZE), |
1238 | "The pointer returned by scalable_malloc is not aligned on huge page size." ); |
1239 | } |
1240 | |
1241 | // Wait for the system to update process memory info files after allocations |
1242 | Harness::Sleep(4000); |
1243 | |
1244 | // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this |
1245 | // (tested on SLES 11/12), so consider this system info checks as a remark. |
1246 | // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04) |
1247 | size_t newSystemTHPCount = getSystemTHPCount(); |
1248 | size_t newSystemTHPAllocatedSize = getSystemTHPAllocatedSize(); |
1249 | if ((newSystemTHPCount - currentSystemTHPCount) < allocCount |
1250 | && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) { |
1251 | REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" ); |
1252 | } |
1253 | |
1254 | // Test memory unmap |
1255 | for (int i = 0; i < allocCount; i++) { |
1256 | MALLOC_ASSERT(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE), |
1257 | "Something went wrong during raw memory free" ); |
1258 | } |
1259 | } |
1260 | #endif // __linux__ |
1261 | |
1262 | inline size_t getStabilizedMemUsage() { |
1263 | for (int i = 0; i < 3; i++) GetMemoryUsage(); |
1264 | return GetMemoryUsage(); |
1265 | } |
1266 | |
1267 | inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) { |
1268 | rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock; |
1269 | origBlockSize = origLmb->unalignedSize; |
1270 | |
1271 | void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0); |
1272 | |
1273 | // Retrieved reallocated block information |
1274 | rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock; |
1275 | reallocBlockSize = reallocLmb->unalignedSize; |
1276 | |
1277 | return reallocPtr; |
1278 | } |
1279 | |
1280 | void TestReallocDecreasing() { |
1281 | |
1282 | /* Testing that actual reallocation happens for large objects that do not fit the backend cache |
1283 | but decrease in size by a factor of >= 2. */ |
1284 | |
1285 | size_t startSize = 100 * 1024 * 1024; |
1286 | size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize(); |
1287 | void* origPtr = scalable_malloc(startSize); |
1288 | void* reallocPtr = NULL; |
1289 | |
1290 | // Realloc on 1MB less size |
1291 | size_t origBlockSize = 42; |
1292 | size_t reallocBlockSize = 43; |
1293 | reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize); |
1294 | MALLOC_ASSERT(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change" ); |
1295 | MALLOC_ASSERT(reallocPtr == origPtr, "Original pointer shouldn't change" ); |
1296 | |
1297 | // Repeated decreasing reallocation while max cache bin size reached |
1298 | size_t reallocSize = (startSize / 2) - 1000; // exact realloc |
1299 | while(reallocSize > maxBinnedSize) { |
1300 | |
1301 | // Prevent huge/large objects caching |
1302 | defaultMemPool->extMemPool.loc.cleanAll(); |
1303 | // Prevent local large object caching |
1304 | TLSData *tls = defaultMemPool->getTLS(/*create=*/false); |
1305 | tls->lloc.externalCleanup(&defaultMemPool->extMemPool); |
1306 | |
1307 | size_t sysMemUsageBefore = getStabilizedMemUsage(); |
1308 | size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize(); |
1309 | |
1310 | reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize); |
1311 | |
1312 | MALLOC_ASSERT(origBlockSize > reallocBlockSize, "Reallocated block size should descrease." ); |
1313 | |
1314 | size_t sysMemUsageAfter = getStabilizedMemUsage(); |
1315 | size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize(); |
1316 | |
1317 | // Prevent false checking when backend caching occurred or could not read system memory usage info |
1318 | if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) { |
1319 | MALLOC_ASSERT(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released" ); |
1320 | } |
1321 | |
1322 | origPtr = reallocPtr; |
1323 | reallocSize = (reallocSize / 2) - 1000; // exact realloc |
1324 | } |
1325 | scalable_free(reallocPtr); |
1326 | |
1327 | /* TODO: Decreasing reallocation of large objects that fit backend cache */ |
1328 | /* TODO: Small objects decreasing reallocation test */ |
1329 | } |
1330 | #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) |
1331 | |
1332 | #include "../src/tbbmalloc/tbb_function_replacement.cpp" |
1333 | #include <string> |
1334 | namespace FunctionReplacement { |
1335 | FunctionInfo funcInfo = { "funcname" ,"dllname" }; |
1336 | char **func_replacement_log; |
1337 | int status; |
1338 | |
1339 | void LogCleanup() { |
1340 | // Free all allocated memory |
1341 | for (unsigned i = 0; i < Log::record_number; i++){ |
1342 | HeapFree(GetProcessHeap(), 0, Log::records[i]); |
1343 | } |
1344 | for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){ |
1345 | Log::records[i] = NULL; |
1346 | } |
1347 | Log::replacement_status = true; |
1348 | Log::record_number = 0; |
1349 | } |
1350 | |
1351 | void TestEmptyLog() { |
1352 | status = TBB_malloc_replacement_log(&func_replacement_log); |
1353 | |
1354 | ASSERT(status == -1, "Status is true, but log is empty" ); |
1355 | ASSERT(*func_replacement_log == NULL, "Log must be empty" ); |
1356 | } |
1357 | |
1358 | void TestLogOverload() { |
1359 | for (int i = 0; i < 1000; i++) |
1360 | Log::record(funcInfo, "opcode string" , true); |
1361 | |
1362 | status = TBB_malloc_replacement_log(&func_replacement_log); |
1363 | // Find last record |
1364 | for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {} |
1365 | |
1366 | std::string last_line(*func_replacement_log); |
1367 | ASSERT(status == 0, "False status, but all functions found" ); |
1368 | ASSERT(last_line.compare("Log was truncated." ) == 0, "Log overflow was not handled" ); |
1369 | |
1370 | // Change status |
1371 | Log::record(funcInfo, "opcode string" , false); |
1372 | status = TBB_malloc_replacement_log(NULL); |
1373 | ASSERT(status == -1, "Status is true, but we have false search case" ); |
1374 | |
1375 | LogCleanup(); |
1376 | } |
1377 | |
1378 | void TestFalseSearchCase() { |
1379 | Log::record(funcInfo, "opcode string" , false); |
1380 | std::string expected_line = "Fail: " + std::string(funcInfo.funcName) + " (" + |
1381 | std::string(funcInfo.dllName) + "), byte pattern: <opcode string>" ; |
1382 | |
1383 | status = TBB_malloc_replacement_log(&func_replacement_log); |
1384 | |
1385 | ASSERT(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent" ); |
1386 | ASSERT(status == -1, "Status is true, but we have false search case" ); |
1387 | LogCleanup(); |
1388 | } |
1389 | |
1390 | void TestWrongFunctionInDll(){ |
1391 | HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll" ); |
1392 | if (ucrtbase_handle) { |
1393 | IsPrologueKnown("ucrtbase.dll" , "fake_function" , NULL, ucrtbase_handle); |
1394 | std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>" ; |
1395 | |
1396 | status = TBB_malloc_replacement_log(&func_replacement_log); |
1397 | |
1398 | ASSERT(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent" ); |
1399 | ASSERT(status == -1, "Status is true, but we have false search case" ); |
1400 | LogCleanup(); |
1401 | } else { |
1402 | REMARK("Cannot found ucrtbase.dll on system, test skipped!\n" ); |
1403 | } |
1404 | } |
1405 | } |
1406 | |
1407 | void TesFunctionReplacementLog() { |
1408 | using namespace FunctionReplacement; |
1409 | // Do not reorder the test cases |
1410 | TestEmptyLog(); |
1411 | TestLogOverload(); |
1412 | TestFalseSearchCase(); |
1413 | TestWrongFunctionInDll(); |
1414 | } |
1415 | |
1416 | #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/ |
1417 | |
1418 | #include <cmath> // pow function |
1419 | |
1420 | // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size, |
1421 | // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major |
1422 | // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings. |
1423 | inline size_t hocIdxToSizeFormula(int idx) { |
1424 | return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) * |
1425 | pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor))); |
1426 | } |
1427 | // Large objects cache arithmetic progression |
1428 | inline size_t locIdxToSizeFormula(int idx) { |
1429 | return rml::internal::LargeObjectCache::LargeBSProps::MinSize + |
1430 | (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep); |
1431 | } |
1432 | |
1433 | template <typename CacheType> |
1434 | void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) { |
1435 | size_t alignedSize = CacheType::alignToBin(checkingSize); |
1436 | MALLOC_ASSERT(alignedSize >= checkingSize, "Size is not correctly aligned" ); |
1437 | int calcIdx = CacheType::sizeToIdx(alignedSize); |
1438 | MALLOC_ASSERT(calcIdx == idx, "Index from size calculated not correctly" ); |
1439 | } |
1440 | |
1441 | void TestLOCacheBinsConverter(){ |
1442 | typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType; |
1443 | typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType; |
1444 | |
1445 | size_t checkingSize = 0; |
1446 | for (int idx = 0; idx < LargeCacheType::numBins; idx++) { |
1447 | checkingSize = locIdxToSizeFormula(idx); |
1448 | TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize); |
1449 | } |
1450 | for (int idx = 0; idx < HugeCacheType::numBins; idx++) { |
1451 | checkingSize = hocIdxToSizeFormula(idx); |
1452 | TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize); |
1453 | } |
1454 | } |
1455 | |
1456 | struct HOThresholdTester { |
1457 | LargeObjectCache* loc; |
1458 | size_t hugeSize; |
1459 | |
1460 | static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize; |
1461 | // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range |
1462 | // for decent memory consumption (especially for 32-bit arch) |
1463 | static const int MIN_BIN_IDX = 20; |
1464 | static const int MAX_BIN_IDX = 28; |
1465 | |
1466 | enum CleanupType { |
1467 | NO_CLEANUP, |
1468 | REGULAR_CLEANUP, |
1469 | HARD_CLEANUP |
1470 | }; |
1471 | |
1472 | void populateCache() { |
1473 | LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX]; |
1474 | // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge), |
1475 | // firstly allocate all objects and then cache them at once. |
1476 | // Morevover, just because first cache item will still be dropped from cache because of the lack of history, |
1477 | // redo allocation 2 times. |
1478 | for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { |
1479 | size_t allocationSize = alignedSizeFromIdx(idx); |
1480 | int localIdx = idx - MIN_BIN_IDX; |
1481 | loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); |
1482 | MALLOC_ASSERT(loArray[localIdx], "Large object was not allocated." ); |
1483 | loc->put(loArray[localIdx]); |
1484 | loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize); |
1485 | } |
1486 | for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { |
1487 | loc->put(loArray[idx - MIN_BIN_IDX]); |
1488 | } |
1489 | } |
1490 | void clean(bool all) { |
1491 | if (all) { |
1492 | // Should avoid any threshold and clean all bins |
1493 | loc->cleanAll(); |
1494 | } else { |
1495 | // Regular cleanup should do nothing for bins above threshold. Decreasing option used |
1496 | // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned |
1497 | loc->regularCleanup(); |
1498 | loc->decreasingCleanup(); |
1499 | } |
1500 | } |
1501 | void check(CleanupType type) { |
1502 | for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) { |
1503 | size_t objectSize = alignedSizeFromIdx(idx); |
1504 | // Cache object below sieve threshold and above huge object threshold should be cached |
1505 | // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size. |
1506 | if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) { |
1507 | MALLOC_ASSERT(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't." ); |
1508 | } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) { |
1509 | MALLOC_ASSERT(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't." ); |
1510 | } else { // HARD_CLEANUP |
1511 | MALLOC_ASSERT(cacheBinEmpty(idx), "Object is still cached." ); |
1512 | } |
1513 | } |
1514 | } |
1515 | |
1516 | private: |
1517 | bool cacheBinEmpty(int idx) { |
1518 | return (loc->hugeCache.bin[idx].cachedSize == 0 && loc->hugeCache.bin[idx].get() == NULL); |
1519 | } |
1520 | bool objectInCacheBin(int idx, size_t size) { |
1521 | return (loc->hugeCache.bin[idx].cachedSize != 0 && loc->hugeCache.bin[idx].cachedSize % size == 0); |
1522 | } |
1523 | bool sizeInCacheRange(size_t size) { |
1524 | return size <= sieveSize || size >= hugeSize; |
1525 | } |
1526 | size_t alignedSizeFromIdx(int idx) { |
1527 | return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx)); |
1528 | } |
1529 | }; |
1530 | |
1531 | // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test, |
1532 | // through scalable API or env variable |
1533 | void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) { |
1534 | HOThresholdTester test = {loc, hugeSize}; |
1535 | test.populateCache(); |
1536 | // Check the default sieve value |
1537 | test.check(HOThresholdTester::NO_CLEANUP); |
1538 | |
1539 | if(fullTesting) { |
1540 | // Check that objects above threshold stay in cache after regular cleanup |
1541 | test.clean(/*all*/false); |
1542 | test.check(HOThresholdTester::REGULAR_CLEANUP); |
1543 | } |
1544 | // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold) |
1545 | test.clean(/*all*/true); |
1546 | test.check(HOThresholdTester::HARD_CLEANUP); |
1547 | // Restore previous settings |
1548 | loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); |
1549 | loc->reset(); |
1550 | } |
1551 | |
1552 | /* |
1553 | * Test for default huge size and behaviour when huge object settings defined |
1554 | */ |
1555 | void TestHugeSizeThreshold() { |
1556 | // Clean up if something was allocated before the test and reset cache state |
1557 | scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, 0); |
1558 | LargeObjectCache* loc = &defaultMemPool->extMemPool.loc; |
1559 | // Restore default settings just in case |
1560 | loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize); |
1561 | loc->reset(); |
1562 | // Firstly check default huge size value (with max huge object threshold). |
1563 | // Everything that more then this value should be released to OS without caching. |
1564 | TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false); |
1565 | // Then set huge object threshold. |
1566 | // All objects with sizes after threshold will be released only after the hard cleanup. |
1567 | #if !__TBB_WIN8UI_SUPPORT |
1568 | // Unit testing for environment variable |
1569 | Harness::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD" ,"67108864" ); |
1570 | // Large object cache reads threshold environment during initialization. |
1571 | // Reset the value before the test. |
1572 | loc->hugeSizeThreshold = 0; |
1573 | loc->init(&defaultMemPool->extMemPool); |
1574 | TestHugeSizeThresholdImpl(loc, 64 * MByte, true); |
1575 | #endif |
1576 | // Unit testing for scalable_allocation_command |
1577 | scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte); |
1578 | TestHugeSizeThresholdImpl(loc, 56 * MByte, true); |
1579 | } |
1580 | |
1581 | int TestMain () { |
1582 | scalable_allocation_mode(USE_HUGE_PAGES, 0); |
1583 | #if !__TBB_WIN8UI_SUPPORT |
1584 | Harness::SetEnv("TBB_MALLOC_USE_HUGE_PAGES" ,"yes" ); |
1585 | #endif |
1586 | checkNoHugePages(); |
1587 | // backreference requires that initialization was done |
1588 | if(!isMallocInitialized()) doInitialization(); |
1589 | checkNoHugePages(); |
1590 | // to succeed, leak detection must be the 1st memory-intensive test |
1591 | TestBackRef(); |
1592 | TestCleanAllBuffers<4*1024>(); |
1593 | TestCleanAllBuffers<16*1024>(); |
1594 | TestCleanThreadBuffers(); |
1595 | TestPools(); |
1596 | TestBackend(); |
1597 | |
1598 | #if MALLOC_CHECK_RECURSION |
1599 | for( int p=MaxThread; p>=MinThread; --p ) { |
1600 | TestStartupAlloc::initBarrier( p ); |
1601 | NativeParallelFor( p, TestStartupAlloc() ); |
1602 | ASSERT(!firstStartupBlock, "Startup heap memory leak detected" ); |
1603 | } |
1604 | #endif |
1605 | |
1606 | TestLargeObjectCache(); |
1607 | TestObjectRecognition(); |
1608 | TestBitMask(); |
1609 | TestHeapLimit(); |
1610 | TestLOC(); |
1611 | TestSlabAlignment(); |
1612 | TestReallocDecreasing(); |
1613 | TestLOCacheBinsConverter(); |
1614 | TestHugeSizeThreshold(); |
1615 | |
1616 | #if __linux__ |
1617 | if (isTHPEnabledOnMachine()) { |
1618 | TestTHP(); |
1619 | } else { |
1620 | REMARK("Transparent Huge Pages is not supported on the system - skipped the test\n" ); |
1621 | } |
1622 | #endif |
1623 | |
1624 | #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32) |
1625 | TesFunctionReplacementLog(); |
1626 | #endif |
1627 | return Harness::Done; |
1628 | } |
1629 | |
1630 | |