1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #if __linux__ && !__ANDROID__ |
18 | // include <bits/c++config.h> indirectly so that <cstdlib> is not included |
19 | #include <cstddef> |
20 | // include <features.h> indirectly so that <stdlib.h> is not included |
21 | #include <unistd.h> |
22 | // Working around compiler issue with Anaconda's gcc 7.3 compiler package. |
23 | // New gcc ported for old libc may provide their inline implementation |
24 | // of aligned_alloc as required by new C++ standard, this makes it hard to |
25 | // redefine aligned_alloc here. However, running on systems with new libc |
26 | // version, it still needs it to be redefined, thus tricking system headers |
27 | #if defined(__GLIBC_PREREQ) && !__GLIBC_PREREQ(2, 16) && _GLIBCXX_HAVE_ALIGNED_ALLOC |
28 | // tell <cstdlib> that there is no aligned_alloc |
29 | #undef _GLIBCXX_HAVE_ALIGNED_ALLOC |
30 | // trick <stdlib.h> to define another symbol instead |
31 | #define aligned_alloc __hidden_redefined_aligned_alloc |
32 | // Fix the state and undefine the trick |
33 | #include <cstdlib> |
34 | #undef aligned_alloc |
35 | #endif // defined(__GLIBC_PREREQ)&&!__GLIBC_PREREQ(2, 16)&&_GLIBCXX_HAVE_ALIGNED_ALLOC |
36 | #endif // __linux__ && !__ANDROID__ |
37 | |
38 | #include "proxy.h" |
39 | #include "tbb/tbb_config.h" |
40 | #include "tbb/tbb_environment.h" |
41 | |
42 | #if !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) && !defined(__SUNPRO_CC) |
43 | #if TBB_USE_EXCEPTIONS |
44 | #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. |
45 | #elif !defined(TBB_USE_EXCEPTIONS) |
46 | #define TBB_USE_EXCEPTIONS 0 |
47 | #endif |
48 | #elif !defined(TBB_USE_EXCEPTIONS) |
49 | #define TBB_USE_EXCEPTIONS 1 |
50 | #endif |
51 | |
52 | #if __TBB_CPP11_PRESENT |
53 | #define __TBB_THROW_BAD_ALLOC |
54 | #define __TBB_NO_THROW noexcept |
55 | #else |
56 | #define __TBB_THROW_BAD_ALLOC throw(std::bad_alloc) |
57 | #define __TBB_NO_THROW throw() |
58 | #endif |
59 | |
60 | #if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || _WIN32 && !__TBB_WIN8UI_SUPPORT |
61 | /*** internal global operator new implementation (Linux, Windows) ***/ |
62 | #include <new> |
63 | |
64 | // Synchronization primitives to protect original library pointers and new_handler |
65 | #include "Synchronize.h" |
66 | |
67 | #if __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT |
68 | // Use MallocMutex implementation |
69 | typedef MallocMutex ProxyMutex; |
70 | #else |
71 | // One byte atomic intrinsics are not available, |
72 | // so use simple pointer based spin mutex |
73 | class SimpleSpinMutex : tbb::internal::no_copy { |
74 | intptr_t flag; |
75 | public: |
76 | class scoped_lock : tbb::internal::no_copy { |
77 | SimpleSpinMutex& mutex; |
78 | public: |
79 | scoped_lock( SimpleSpinMutex& m ) : mutex(m) { |
80 | while( !(AtomicFetchStore( &(m.flag), 1 ) == 0) ); |
81 | } |
82 | ~scoped_lock() { |
83 | FencedStore(mutex.flag, 0); |
84 | } |
85 | }; |
86 | friend class scoped_lock; |
87 | }; |
88 | typedef SimpleSpinMutex ProxyMutex; |
89 | #endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */ |
90 | |
91 | // In case there is no std::get_new_handler function |
92 | // which provides synchronized access to std::new_handler |
93 | #if !__TBB_CPP11_GET_NEW_HANDLER_PRESENT |
94 | static ProxyMutex new_lock; |
95 | #endif |
96 | |
97 | static inline void* InternalOperatorNew(size_t sz) { |
98 | void* res = scalable_malloc(sz); |
99 | #if TBB_USE_EXCEPTIONS |
100 | while (!res) { |
101 | std::new_handler handler; |
102 | #if __TBB_CPP11_GET_NEW_HANDLER_PRESENT |
103 | handler = std::get_new_handler(); |
104 | #else |
105 | { |
106 | ProxyMutex::scoped_lock lock(new_lock); |
107 | handler = std::set_new_handler(0); |
108 | std::set_new_handler(handler); |
109 | } |
110 | #endif |
111 | if (handler) { |
112 | (*handler)(); |
113 | } else { |
114 | throw std::bad_alloc(); |
115 | } |
116 | res = scalable_malloc(sz); |
117 | } |
118 | #endif /* TBB_USE_EXCEPTIONS */ |
119 | return res; |
120 | } |
121 | /*** end of internal global operator new implementation ***/ |
122 | #endif // MALLOC_UNIXLIKE_OVERLOAD_ENABLED || _WIN32 && !__TBB_WIN8UI_SUPPORT |
123 | |
124 | #if MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED |
125 | |
126 | #ifndef __THROW |
127 | #define __THROW |
128 | #endif |
129 | |
130 | /*** service functions and variables ***/ |
131 | #include <string.h> // for memset |
132 | #include <unistd.h> // for sysconf |
133 | |
134 | static long memoryPageSize; |
135 | |
136 | static inline void initPageSize() |
137 | { |
138 | memoryPageSize = sysconf(_SC_PAGESIZE); |
139 | } |
140 | |
141 | #if MALLOC_UNIXLIKE_OVERLOAD_ENABLED |
142 | #include <dlfcn.h> |
143 | #include <malloc.h> // mallinfo |
144 | |
145 | /* __TBB_malloc_proxy used as a weak symbol by libtbbmalloc for: |
146 | 1) detection that the proxy library is loaded |
147 | 2) check that dlsym("malloc") found something different from our replacement malloc |
148 | */ |
149 | extern "C" void *__TBB_malloc_proxy(size_t) __attribute__ ((alias ("malloc" ))); |
150 | |
151 | static void *orig_msize; |
152 | |
153 | #elif MALLOC_ZONE_OVERLOAD_ENABLED |
154 | |
155 | #include "proxy_overload_osx.h" |
156 | |
157 | #endif // MALLOC_ZONE_OVERLOAD_ENABLED |
158 | |
159 | // Original (i.e., replaced) functions, |
160 | // they are never changed for MALLOC_ZONE_OVERLOAD_ENABLED. |
161 | static void *orig_free, |
162 | *orig_realloc; |
163 | |
164 | #if MALLOC_UNIXLIKE_OVERLOAD_ENABLED |
165 | #define ZONE_ARG |
166 | #define PREFIX(name) name |
167 | |
168 | static void *orig_libc_free, |
169 | *orig_libc_realloc; |
170 | |
171 | // We already tried to find ptr to original functions. |
172 | static intptr_t origFuncSearched; |
173 | |
174 | inline void InitOrigPointers() |
175 | { |
176 | // race is OK here, as different threads found same functions |
177 | if (!FencedLoad(origFuncSearched)) { |
178 | orig_free = dlsym(RTLD_NEXT, "free" ); |
179 | orig_realloc = dlsym(RTLD_NEXT, "realloc" ); |
180 | orig_msize = dlsym(RTLD_NEXT, "malloc_usable_size" ); |
181 | orig_libc_free = dlsym(RTLD_NEXT, "__libc_free" ); |
182 | orig_libc_realloc = dlsym(RTLD_NEXT, "__libc_realloc" ); |
183 | |
184 | FencedStore(origFuncSearched, 1); |
185 | } |
186 | } |
187 | |
188 | /*** replacements for malloc and the family ***/ |
189 | extern "C" { |
190 | #elif MALLOC_ZONE_OVERLOAD_ENABLED |
191 | |
192 | // each impl_* function has such 1st argument, it's unused |
193 | #define ZONE_ARG struct _malloc_zone_t *, |
194 | #define PREFIX(name) impl_##name |
195 | // not interested in original functions for zone overload |
196 | inline void InitOrigPointers() {} |
197 | |
198 | #endif // MALLOC_UNIXLIKE_OVERLOAD_ENABLED and MALLOC_ZONE_OVERLOAD_ENABLED |
199 | |
200 | void *PREFIX(malloc)(ZONE_ARG size_t size) __THROW |
201 | { |
202 | return scalable_malloc(size); |
203 | } |
204 | |
205 | void *PREFIX(calloc)(ZONE_ARG size_t num, size_t size) __THROW |
206 | { |
207 | return scalable_calloc(num, size); |
208 | } |
209 | |
210 | void PREFIX(free)(ZONE_ARG void *object) __THROW |
211 | { |
212 | InitOrigPointers(); |
213 | __TBB_malloc_safer_free(object, (void (*)(void*))orig_free); |
214 | } |
215 | |
216 | void *PREFIX(realloc)(ZONE_ARG void* ptr, size_t sz) __THROW |
217 | { |
218 | InitOrigPointers(); |
219 | return __TBB_malloc_safer_realloc(ptr, sz, orig_realloc); |
220 | } |
221 | |
222 | /* The older *NIX interface for aligned allocations; |
223 | it's formally substituted by posix_memalign and deprecated, |
224 | so we do not expect it to cause cyclic dependency with C RTL. */ |
225 | void *PREFIX(memalign)(ZONE_ARG size_t alignment, size_t size) __THROW |
226 | { |
227 | return scalable_aligned_malloc(size, alignment); |
228 | } |
229 | |
230 | /* valloc allocates memory aligned on a page boundary */ |
231 | void *PREFIX(valloc)(ZONE_ARG size_t size) __THROW |
232 | { |
233 | if (! memoryPageSize) initPageSize(); |
234 | |
235 | return scalable_aligned_malloc(size, memoryPageSize); |
236 | } |
237 | |
238 | #undef ZONE_ARG |
239 | #undef PREFIX |
240 | |
241 | #if MALLOC_UNIXLIKE_OVERLOAD_ENABLED |
242 | |
243 | // match prototype from system headers |
244 | #if __ANDROID__ |
245 | size_t malloc_usable_size(const void *ptr) __THROW |
246 | #else |
247 | size_t malloc_usable_size(void *ptr) __THROW |
248 | #endif |
249 | { |
250 | InitOrigPointers(); |
251 | return __TBB_malloc_safer_msize(const_cast<void*>(ptr), (size_t (*)(void*))orig_msize); |
252 | } |
253 | |
254 | int posix_memalign(void **memptr, size_t alignment, size_t size) __THROW |
255 | { |
256 | return scalable_posix_memalign(memptr, alignment, size); |
257 | } |
258 | |
259 | /* pvalloc allocates smallest set of complete pages which can hold |
260 | the requested number of bytes. Result is aligned on page boundary. */ |
261 | void *pvalloc(size_t size) __THROW |
262 | { |
263 | if (! memoryPageSize) initPageSize(); |
264 | // align size up to the page size, |
265 | // pvalloc(0) returns 1 page, see man libmpatrol |
266 | size = size? ((size-1) | (memoryPageSize-1)) + 1 : memoryPageSize; |
267 | |
268 | return scalable_aligned_malloc(size, memoryPageSize); |
269 | } |
270 | |
271 | int mallopt(int /*param*/, int /*value*/) __THROW |
272 | { |
273 | return 1; |
274 | } |
275 | |
276 | struct mallinfo mallinfo() __THROW |
277 | { |
278 | struct mallinfo m; |
279 | memset(&m, 0, sizeof(struct mallinfo)); |
280 | |
281 | return m; |
282 | } |
283 | |
284 | #if __ANDROID__ |
285 | // Android doesn't have malloc_usable_size, provide it to be compatible |
286 | // with Linux, in addition overload dlmalloc_usable_size() that presented |
287 | // under Android. |
288 | size_t dlmalloc_usable_size(const void *ptr) __attribute__ ((alias ("malloc_usable_size" ))); |
289 | #else // __ANDROID__ |
290 | // C11 function, supported starting GLIBC 2.16 |
291 | void *aligned_alloc(size_t alignment, size_t size) __attribute__ ((alias ("memalign" ))); |
292 | // Those non-standard functions are exported by GLIBC, and might be used |
293 | // in conjunction with standard malloc/free, so we must ovberload them. |
294 | // Bionic doesn't have them. Not removing from the linker scripts, |
295 | // as absent entry points are ignored by the linker. |
296 | void *__libc_malloc(size_t size) __attribute__ ((alias ("malloc" ))); |
297 | void *__libc_calloc(size_t num, size_t size) __attribute__ ((alias ("calloc" ))); |
298 | void *__libc_memalign(size_t alignment, size_t size) __attribute__ ((alias ("memalign" ))); |
299 | void *__libc_pvalloc(size_t size) __attribute__ ((alias ("pvalloc" ))); |
300 | void *__libc_valloc(size_t size) __attribute__ ((alias ("valloc" ))); |
301 | |
302 | // call original __libc_* to support naive replacement of free via __libc_free etc |
303 | void __libc_free(void *ptr) |
304 | { |
305 | InitOrigPointers(); |
306 | __TBB_malloc_safer_free(ptr, (void (*)(void*))orig_libc_free); |
307 | } |
308 | |
309 | void *__libc_realloc(void *ptr, size_t size) |
310 | { |
311 | InitOrigPointers(); |
312 | return __TBB_malloc_safer_realloc(ptr, size, orig_libc_realloc); |
313 | } |
314 | #endif // !__ANDROID__ |
315 | |
316 | } /* extern "C" */ |
317 | |
318 | /*** replacements for global operators new and delete ***/ |
319 | |
320 | void* operator new(size_t sz) __TBB_THROW_BAD_ALLOC { |
321 | return InternalOperatorNew(sz); |
322 | } |
323 | void* operator new[](size_t sz) __TBB_THROW_BAD_ALLOC { |
324 | return InternalOperatorNew(sz); |
325 | } |
326 | void operator delete(void* ptr) __TBB_NO_THROW { |
327 | InitOrigPointers(); |
328 | __TBB_malloc_safer_free(ptr, (void (*)(void*))orig_free); |
329 | } |
330 | void operator delete[](void* ptr) __TBB_NO_THROW { |
331 | InitOrigPointers(); |
332 | __TBB_malloc_safer_free(ptr, (void (*)(void*))orig_free); |
333 | } |
334 | void* operator new(size_t sz, const std::nothrow_t&) __TBB_NO_THROW { |
335 | return scalable_malloc(sz); |
336 | } |
337 | void* operator new[](std::size_t sz, const std::nothrow_t&) __TBB_NO_THROW { |
338 | return scalable_malloc(sz); |
339 | } |
340 | void operator delete(void* ptr, const std::nothrow_t&) __TBB_NO_THROW { |
341 | InitOrigPointers(); |
342 | __TBB_malloc_safer_free(ptr, (void (*)(void*))orig_free); |
343 | } |
344 | void operator delete[](void* ptr, const std::nothrow_t&) __TBB_NO_THROW { |
345 | InitOrigPointers(); |
346 | __TBB_malloc_safer_free(ptr, (void (*)(void*))orig_free); |
347 | } |
348 | |
349 | #endif /* MALLOC_UNIXLIKE_OVERLOAD_ENABLED */ |
350 | #endif /* MALLOC_UNIXLIKE_OVERLOAD_ENABLED || MALLOC_ZONE_OVERLOAD_ENABLED */ |
351 | |
352 | #ifdef _WIN32 |
353 | #include <windows.h> |
354 | |
355 | #if !__TBB_WIN8UI_SUPPORT |
356 | |
357 | #include <stdio.h> |
358 | #include "tbb_function_replacement.h" |
359 | #include "shared_utils.h" |
360 | |
361 | void __TBB_malloc_safer_delete( void *ptr) |
362 | { |
363 | __TBB_malloc_safer_free( ptr, NULL ); |
364 | } |
365 | |
366 | void* safer_aligned_malloc( size_t size, size_t alignment ) |
367 | { |
368 | // workaround for "is power of 2 pow N" bug that accepts zeros |
369 | return scalable_aligned_malloc( size, alignment>sizeof(size_t*)?alignment:sizeof(size_t*) ); |
370 | } |
371 | |
372 | // we do not support _expand(); |
373 | void* safer_expand( void *, size_t ) |
374 | { |
375 | return NULL; |
376 | } |
377 | |
378 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(CRTLIB) \ |
379 | void (*orig_free_##CRTLIB)(void*); \ |
380 | void __TBB_malloc_safer_free_##CRTLIB(void *ptr) \ |
381 | { \ |
382 | __TBB_malloc_safer_free( ptr, orig_free_##CRTLIB ); \ |
383 | } \ |
384 | \ |
385 | void (*orig__aligned_free_##CRTLIB)(void*); \ |
386 | void __TBB_malloc_safer__aligned_free_##CRTLIB(void *ptr) \ |
387 | { \ |
388 | __TBB_malloc_safer_free( ptr, orig__aligned_free_##CRTLIB ); \ |
389 | } \ |
390 | \ |
391 | size_t (*orig__msize_##CRTLIB)(void*); \ |
392 | size_t __TBB_malloc_safer__msize_##CRTLIB(void *ptr) \ |
393 | { \ |
394 | return __TBB_malloc_safer_msize( ptr, orig__msize_##CRTLIB ); \ |
395 | } \ |
396 | \ |
397 | size_t (*orig__aligned_msize_##CRTLIB)(void*, size_t, size_t); \ |
398 | size_t __TBB_malloc_safer__aligned_msize_##CRTLIB( void *ptr, size_t alignment, size_t offset) \ |
399 | { \ |
400 | return __TBB_malloc_safer_aligned_msize( ptr, alignment, offset, orig__aligned_msize_##CRTLIB ); \ |
401 | } \ |
402 | \ |
403 | void* __TBB_malloc_safer_realloc_##CRTLIB( void *ptr, size_t size ) \ |
404 | { \ |
405 | orig_ptrs func_ptrs = {orig_free_##CRTLIB, orig__msize_##CRTLIB}; \ |
406 | return __TBB_malloc_safer_realloc( ptr, size, &func_ptrs ); \ |
407 | } \ |
408 | \ |
409 | void* __TBB_malloc_safer__aligned_realloc_##CRTLIB( void *ptr, size_t size, size_t alignment ) \ |
410 | { \ |
411 | orig_aligned_ptrs func_ptrs = {orig__aligned_free_##CRTLIB, orig__aligned_msize_##CRTLIB}; \ |
412 | return __TBB_malloc_safer_aligned_realloc( ptr, size, alignment, &func_ptrs ); \ |
413 | } |
414 | |
415 | // Only for ucrtbase: substitution for _o_free |
416 | void (*orig__o_free)(void*); |
417 | void __TBB_malloc__o_free(void *ptr) |
418 | { |
419 | __TBB_malloc_safer_free( ptr, orig__o_free ); |
420 | } |
421 | // Only for ucrtbase: substitution for _free_base |
422 | void(*orig__free_base)(void*); |
423 | void __TBB_malloc__free_base(void *ptr) |
424 | { |
425 | __TBB_malloc_safer_free(ptr, orig__free_base); |
426 | } |
427 | |
428 | // Size limit is MAX_PATTERN_SIZE (28) byte codes / 56 symbols per line. |
429 | // * can be used to match any digit in byte codes. |
430 | // # followed by several * indicate a relative address that needs to be corrected. |
431 | // Purpose of the pattern is to mark an instruction bound; it should consist of several |
432 | // full instructions plus one extra byte code. It's not required for the patterns |
433 | // to be unique (i.e., it's OK to have same pattern for unrelated functions). |
434 | // TODO: use hot patch prologues if exist |
435 | const char* known_bytecodes[] = { |
436 | #if _WIN64 |
437 | // "========================================================" - 56 symbols |
438 | "4883EC284885C974" , // release free() |
439 | "4883EC284885C975" , // release _msize() |
440 | "4885C974375348" , // release free() 8.0.50727.42, 10.0 |
441 | "E907000000CCCC" , // release _aligned_msize(), _aligned_free() ucrtbase.dll |
442 | "C7442410000000008B" , // release free() ucrtbase.dll 10.0.14393.33 |
443 | "E90B000000CCCC" , // release _msize() ucrtbase.dll 10.0.14393.33 |
444 | "48895C24085748" , // release _aligned_msize() ucrtbase.dll 10.0.14393.33 |
445 | "E903000000CCCC" , // release _aligned_msize() ucrtbase.dll 10.0.16299.522 |
446 | "48894C24084883EC28BA" , // debug prologue |
447 | "4C894424184889542410" , // debug _aligned_msize() 10.0 |
448 | "48894C24084883EC2848" , // debug _aligned_free 10.0 |
449 | "488BD1488D0D#*******E9" , // _o_free(), ucrtbase.dll |
450 | #if __TBB_OVERLOAD_OLD_MSVCR |
451 | "48895C2408574883EC3049" , // release _aligned_msize 9.0 |
452 | "4883EC384885C975" , // release _msize() 9.0 |
453 | "4C8BC1488B0DA6E4040033" , // an old win64 SDK |
454 | #endif |
455 | #else // _WIN32 |
456 | // "========================================================" - 56 symbols |
457 | "8BFF558BEC8B" , // multiple |
458 | "8BFF558BEC83" , // release free() & _msize() 10.0.40219.325, _msize() ucrtbase.dll |
459 | "8BFF558BECFF" , // release _aligned_msize ucrtbase.dll |
460 | "8BFF558BEC51" , // release free() & _msize() ucrtbase.dll 10.0.14393.33 |
461 | "558BEC8B450885C074" , // release _aligned_free 11.0 |
462 | "558BEC837D08000F" , // release _msize() 11.0.51106.1 |
463 | "558BEC837D08007419FF" , // release free() 11.0.50727.1 |
464 | "558BEC8B450885C075" , // release _aligned_msize() 11.0.50727.1 |
465 | "558BEC6A018B" , // debug free() & _msize() 11.0 |
466 | "558BEC8B451050" , // debug _aligned_msize() 11.0 |
467 | "558BEC8B450850" , // debug _aligned_free 11.0 |
468 | "8BFF558BEC6A" , // debug free() & _msize() 10.0.40219.325 |
469 | #if __TBB_OVERLOAD_OLD_MSVCR |
470 | "6A1868********E8" , // release free() 8.0.50727.4053, 9.0 |
471 | "6A1C68********E8" , // release _msize() 8.0.50727.4053, 9.0 |
472 | #endif |
473 | #endif // _WIN64/_WIN32 |
474 | NULL |
475 | }; |
476 | |
477 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,function_name,dbgsuffix) \ |
478 | ReplaceFunctionWithStore( #CRT_VER #dbgsuffix ".dll", #function_name, \ |
479 | (FUNCPTR)__TBB_malloc_safer_##function_name##_##CRT_VER##dbgsuffix, \ |
480 | known_bytecodes, (FUNCPTR*)&orig_##function_name##_##CRT_VER##dbgsuffix ); |
481 | |
482 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_NO_FALLBACK(CRT_VER,function_name,dbgsuffix) \ |
483 | ReplaceFunctionWithStore( #CRT_VER #dbgsuffix ".dll", #function_name, \ |
484 | (FUNCPTR)__TBB_malloc_safer_##function_name##_##CRT_VER##dbgsuffix, 0, NULL ); |
485 | |
486 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_REDIRECT(CRT_VER,function_name,dest_func,dbgsuffix) \ |
487 | ReplaceFunctionWithStore( #CRT_VER #dbgsuffix ".dll", #function_name, \ |
488 | (FUNCPTR)__TBB_malloc_safer_##dest_func##_##CRT_VER##dbgsuffix, 0, NULL ); |
489 | |
490 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_IMPL(CRT_VER,dbgsuffix) \ |
491 | if (BytecodesAreKnown(#CRT_VER #dbgsuffix ".dll")) { \ |
492 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,free,dbgsuffix) \ |
493 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,_msize,dbgsuffix) \ |
494 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_NO_FALLBACK(CRT_VER,realloc,dbgsuffix) \ |
495 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,_aligned_free,dbgsuffix) \ |
496 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY(CRT_VER,_aligned_msize,dbgsuffix) \ |
497 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_ENTRY_NO_FALLBACK(CRT_VER,_aligned_realloc,dbgsuffix) \ |
498 | } else \ |
499 | SkipReplacement(#CRT_VER #dbgsuffix ".dll"); |
500 | |
501 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(CRT_VER) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_IMPL(CRT_VER,) |
502 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_DEBUG(CRT_VER) __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_IMPL(CRT_VER,d) |
503 | |
504 | #define __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(CRT_VER) \ |
505 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(CRT_VER) \ |
506 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_DEBUG(CRT_VER) |
507 | |
508 | #if __TBB_OVERLOAD_OLD_MSVCR |
509 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr70d); |
510 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr70); |
511 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr71d); |
512 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr71); |
513 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr80d); |
514 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr80); |
515 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr90d); |
516 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr90); |
517 | #endif |
518 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr100d); |
519 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr100); |
520 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr110d); |
521 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr110); |
522 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr120d); |
523 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(msvcr120); |
524 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_WRAPPER(ucrtbase); |
525 | |
526 | /*** replacements for global operators new and delete ***/ |
527 | |
528 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
529 | #pragma warning( push ) |
530 | #pragma warning( disable : 4290 ) |
531 | #endif |
532 | |
533 | /*** operator new overloads internals (Linux, Windows) ***/ |
534 | |
535 | void* operator_new(size_t sz) __TBB_THROW_BAD_ALLOC { |
536 | return InternalOperatorNew(sz); |
537 | } |
538 | void* operator_new_arr(size_t sz) __TBB_THROW_BAD_ALLOC { |
539 | return InternalOperatorNew(sz); |
540 | } |
541 | void operator_delete(void* ptr) __TBB_NO_THROW { |
542 | __TBB_malloc_safer_delete(ptr); |
543 | } |
544 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
545 | #pragma warning( pop ) |
546 | #endif |
547 | |
548 | void operator_delete_arr(void* ptr) __TBB_NO_THROW { |
549 | __TBB_malloc_safer_delete(ptr); |
550 | } |
551 | void* operator_new_t(size_t sz, const std::nothrow_t&) __TBB_NO_THROW { |
552 | return scalable_malloc(sz); |
553 | } |
554 | void* operator_new_arr_t(std::size_t sz, const std::nothrow_t&) __TBB_NO_THROW { |
555 | return scalable_malloc(sz); |
556 | } |
557 | void operator_delete_t(void* ptr, const std::nothrow_t&) __TBB_NO_THROW { |
558 | __TBB_malloc_safer_delete(ptr); |
559 | } |
560 | void operator_delete_arr_t(void* ptr, const std::nothrow_t&) __TBB_NO_THROW { |
561 | __TBB_malloc_safer_delete(ptr); |
562 | } |
563 | |
564 | struct Module { |
565 | const char *name; |
566 | bool doFuncReplacement; // do replacement in the DLL |
567 | }; |
568 | |
569 | Module modules_to_replace[] = { |
570 | {"msvcr100d.dll" , true}, |
571 | {"msvcr100.dll" , true}, |
572 | {"msvcr110d.dll" , true}, |
573 | {"msvcr110.dll" , true}, |
574 | {"msvcr120d.dll" , true}, |
575 | {"msvcr120.dll" , true}, |
576 | {"ucrtbase.dll" , true}, |
577 | // "ucrtbased.dll" is not supported because of problems with _dbg functions |
578 | #if __TBB_OVERLOAD_OLD_MSVCR |
579 | {"msvcr90d.dll" , true}, |
580 | {"msvcr90.dll" , true}, |
581 | {"msvcr80d.dll" , true}, |
582 | {"msvcr80.dll" , true}, |
583 | {"msvcr70d.dll" , true}, |
584 | {"msvcr70.dll" , true}, |
585 | {"msvcr71d.dll" , true}, |
586 | {"msvcr71.dll" , true}, |
587 | #endif |
588 | #if __TBB_TODO |
589 | // TODO: Try enabling replacement for non-versioned system binaries below |
590 | {"msvcrtd.dll" , true}, |
591 | {"msvcrt.dll" , true}, |
592 | #endif |
593 | }; |
594 | |
595 | /* |
596 | We need to replace following functions: |
597 | malloc |
598 | calloc |
599 | _aligned_malloc |
600 | _expand (by dummy implementation) |
601 | ??2@YAPAXI@Z operator new (ia32) |
602 | ??_U@YAPAXI@Z void * operator new[] (size_t size) (ia32) |
603 | ??3@YAXPAX@Z operator delete (ia32) |
604 | ??_V@YAXPAX@Z operator delete[] (ia32) |
605 | ??2@YAPEAX_K@Z void * operator new(unsigned __int64) (intel64) |
606 | ??_V@YAXPEAX@Z void * operator new[](unsigned __int64) (intel64) |
607 | ??3@YAXPEAX@Z operator delete (intel64) |
608 | ??_V@YAXPEAX@Z operator delete[] (intel64) |
609 | ??2@YAPAXIABUnothrow_t@std@@@Z void * operator new (size_t sz, const std::nothrow_t&) throw() (optional) |
610 | ??_U@YAPAXIABUnothrow_t@std@@@Z void * operator new[] (size_t sz, const std::nothrow_t&) throw() (optional) |
611 | |
612 | and these functions have runtime-specific replacement: |
613 | realloc |
614 | free |
615 | _msize |
616 | _aligned_realloc |
617 | _aligned_free |
618 | _aligned_msize |
619 | */ |
620 | |
621 | typedef struct FRData_t { |
622 | //char *_module; |
623 | const char *_func; |
624 | FUNCPTR _fptr; |
625 | FRR_ON_ERROR _on_error; |
626 | } FRDATA; |
627 | |
628 | FRDATA c_routines_to_replace[] = { |
629 | { "malloc" , (FUNCPTR)scalable_malloc, FRR_FAIL }, |
630 | { "calloc" , (FUNCPTR)scalable_calloc, FRR_FAIL }, |
631 | { "_aligned_malloc" , (FUNCPTR)safer_aligned_malloc, FRR_FAIL }, |
632 | { "_expand" , (FUNCPTR)safer_expand, FRR_IGNORE }, |
633 | }; |
634 | |
635 | FRDATA cxx_routines_to_replace[] = { |
636 | #if _WIN64 |
637 | { "??2@YAPEAX_K@Z" , (FUNCPTR)operator_new, FRR_FAIL }, |
638 | { "??_U@YAPEAX_K@Z" , (FUNCPTR)operator_new_arr, FRR_FAIL }, |
639 | { "??3@YAXPEAX@Z" , (FUNCPTR)operator_delete, FRR_FAIL }, |
640 | { "??_V@YAXPEAX@Z" , (FUNCPTR)operator_delete_arr, FRR_FAIL }, |
641 | #else |
642 | { "??2@YAPAXI@Z" , (FUNCPTR)operator_new, FRR_FAIL }, |
643 | { "??_U@YAPAXI@Z" , (FUNCPTR)operator_new_arr, FRR_FAIL }, |
644 | { "??3@YAXPAX@Z" , (FUNCPTR)operator_delete, FRR_FAIL }, |
645 | { "??_V@YAXPAX@Z" , (FUNCPTR)operator_delete_arr, FRR_FAIL }, |
646 | #endif |
647 | { "??2@YAPAXIABUnothrow_t@std@@@Z" , (FUNCPTR)operator_new_t, FRR_IGNORE }, |
648 | { "??_U@YAPAXIABUnothrow_t@std@@@Z" , (FUNCPTR)operator_new_arr_t, FRR_IGNORE } |
649 | }; |
650 | |
651 | #ifndef UNICODE |
652 | typedef char unicode_char_t; |
653 | #define WCHAR_SPEC "%s" |
654 | #else |
655 | typedef wchar_t unicode_char_t; |
656 | #define WCHAR_SPEC "%ls" |
657 | #endif |
658 | |
659 | // Check that we recognize bytecodes that should be replaced by trampolines. |
660 | // If some functions have unknown prologue patterns, replacement should not be done. |
661 | bool BytecodesAreKnown(const unicode_char_t *dllName) |
662 | { |
663 | const char *funcName[] = {"free" , "_msize" , "_aligned_free" , "_aligned_msize" , 0}; |
664 | HMODULE module = GetModuleHandle(dllName); |
665 | |
666 | if (!module) |
667 | return false; |
668 | for (int i=0; funcName[i]; i++) |
669 | if (! IsPrologueKnown(dllName, funcName[i], known_bytecodes, module)) { |
670 | fprintf(stderr, "TBBmalloc: skip allocation functions replacement in " WCHAR_SPEC |
671 | ": unknown prologue for function " WCHAR_SPEC "\n" , dllName, funcName[i]); |
672 | return false; |
673 | } |
674 | return true; |
675 | } |
676 | |
677 | void SkipReplacement(const unicode_char_t *dllName) |
678 | { |
679 | #ifndef UNICODE |
680 | const char *dllStr = dllName; |
681 | #else |
682 | const size_t sz = 128; // all DLL name must fit |
683 | |
684 | char buffer[sz]; |
685 | size_t real_sz; |
686 | char *dllStr = buffer; |
687 | |
688 | errno_t ret = wcstombs_s(&real_sz, dllStr, sz, dllName, sz-1); |
689 | __TBB_ASSERT(!ret, "Dll name conversion failed" ); |
690 | #endif |
691 | |
692 | for (size_t i=0; i<arrayLength(modules_to_replace); i++) |
693 | if (!strcmp(modules_to_replace[i].name, dllStr)) { |
694 | modules_to_replace[i].doFuncReplacement = false; |
695 | break; |
696 | } |
697 | } |
698 | |
699 | void ReplaceFunctionWithStore( const unicode_char_t *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc, FRR_ON_ERROR on_error = FRR_FAIL ) |
700 | { |
701 | FRR_TYPE res = ReplaceFunction( dllName, funcName, newFunc, opcodes, origFunc ); |
702 | |
703 | if (res == FRR_OK || res == FRR_NODLL || (res == FRR_NOFUNC && on_error == FRR_IGNORE)) |
704 | return; |
705 | |
706 | fprintf(stderr, "Failed to %s function %s in module %s\n" , |
707 | res==FRR_NOFUNC? "find" : "replace" , funcName, dllName); |
708 | |
709 | // Unable to replace a required function |
710 | // Aborting because incomplete replacement of memory management functions |
711 | // may leave the program in an invalid state |
712 | abort(); |
713 | } |
714 | |
715 | void doMallocReplacement() |
716 | { |
717 | // Replace functions and keep backup of original code (separate for each runtime) |
718 | #if __TBB_OVERLOAD_OLD_MSVCR |
719 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr70) |
720 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr71) |
721 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr80) |
722 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr90) |
723 | #endif |
724 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr100) |
725 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr110) |
726 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL(msvcr120) |
727 | __TBB_ORIG_ALLOCATOR_REPLACEMENT_CALL_RELEASE(ucrtbase) |
728 | |
729 | // Replace functions without storing original code |
730 | for (size_t j = 0; j < arrayLength(modules_to_replace); j++) { |
731 | if (!modules_to_replace[j].doFuncReplacement) |
732 | continue; |
733 | for (size_t i = 0; i < arrayLength(c_routines_to_replace); i++) |
734 | { |
735 | ReplaceFunctionWithStore( modules_to_replace[j].name, c_routines_to_replace[i]._func, c_routines_to_replace[i]._fptr, NULL, NULL, c_routines_to_replace[i]._on_error ); |
736 | } |
737 | if ( strcmp(modules_to_replace[j].name, "ucrtbase.dll" ) == 0 ) { |
738 | HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll" ); |
739 | if (!ucrtbase_handle) |
740 | continue; |
741 | // If _o_free function is present and patchable, redirect it to tbbmalloc as well |
742 | // This prevents issues with other _o_* functions which might allocate memory with malloc |
743 | if ( IsPrologueKnown("ucrtbase.dll" , "_o_free" , known_bytecodes, ucrtbase_handle)) { |
744 | ReplaceFunctionWithStore( "ucrtbase.dll" , "_o_free" , (FUNCPTR)__TBB_malloc__o_free, known_bytecodes, (FUNCPTR*)&orig__o_free, FRR_FAIL ); |
745 | } |
746 | // Similarly for _free_base |
747 | if (IsPrologueKnown("ucrtbase.dll" , "_free_base" , known_bytecodes, ucrtbase_handle)) { |
748 | ReplaceFunctionWithStore("ucrtbase.dll" , "_free_base" , (FUNCPTR)__TBB_malloc__free_base, known_bytecodes, (FUNCPTR*)&orig__free_base, FRR_FAIL); |
749 | } |
750 | // ucrtbase.dll does not export operator new/delete, so skip the rest of the loop. |
751 | continue; |
752 | } |
753 | |
754 | for (size_t i = 0; i < arrayLength(cxx_routines_to_replace); i++) |
755 | { |
756 | #if !_WIN64 |
757 | // in Microsoft* Visual Studio* 2012 and 2013 32-bit operator delete consists of 2 bytes only: short jump to free(ptr); |
758 | // replacement should be skipped for this particular case. |
759 | if ( ((strcmp(modules_to_replace[j].name, "msvcr110.dll" ) == 0) || (strcmp(modules_to_replace[j].name, "msvcr120.dll" ) == 0)) && (strcmp(cxx_routines_to_replace[i]._func, "??3@YAXPAX@Z" ) == 0) ) continue; |
760 | // in Microsoft* Visual Studio* 2013 32-bit operator delete[] consists of 2 bytes only: short jump to free(ptr); |
761 | // replacement should be skipped for this particular case. |
762 | if ( (strcmp(modules_to_replace[j].name, "msvcr120.dll" ) == 0) && (strcmp(cxx_routines_to_replace[i]._func, "??_V@YAXPAX@Z" ) == 0) ) continue; |
763 | #endif |
764 | ReplaceFunctionWithStore( modules_to_replace[j].name, cxx_routines_to_replace[i]._func, cxx_routines_to_replace[i]._fptr, NULL, NULL, cxx_routines_to_replace[i]._on_error ); |
765 | } |
766 | } |
767 | } |
768 | |
769 | #endif // !__TBB_WIN8UI_SUPPORT |
770 | |
771 | extern "C" BOOL WINAPI DllMain( HINSTANCE hInst, DWORD callReason, LPVOID reserved ) |
772 | { |
773 | |
774 | if ( callReason==DLL_PROCESS_ATTACH && reserved && hInst ) { |
775 | #if !__TBB_WIN8UI_SUPPORT |
776 | if (!tbb::internal::GetBoolEnvironmentVariable("TBB_MALLOC_DISABLE_REPLACEMENT" )) |
777 | { |
778 | doMallocReplacement(); |
779 | } |
780 | #endif // !__TBB_WIN8UI_SUPPORT |
781 | } |
782 | |
783 | return TRUE; |
784 | } |
785 | |
786 | // Just to make the linker happy and link the DLL to the application |
787 | extern "C" __declspec(dllexport) void __TBB_malloc_proxy() |
788 | { |
789 | |
790 | } |
791 | |
792 | #endif //_WIN32 |
793 | |