1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#include "tbb/tbb_config.h"
18
19#if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
20
21#ifndef _CRT_SECURE_NO_DEPRECATE
22#define _CRT_SECURE_NO_DEPRECATE 1
23#endif
24#define __TBB_NO_IMPLICIT_LINKAGE 1
25
26// no standard-conforming implementation of snprintf prior to VS 2015
27#if !defined(_MSC_VER) || _MSC_VER>=1900
28#define LOG_PRINT(s, n, format, ...) snprintf(s, n, format, __VA_ARGS__)
29#else
30#define LOG_PRINT(s, n, format, ...) _snprintf_s(s, n, _TRUNCATE, format, __VA_ARGS__)
31#endif
32
33#include <windows.h>
34#include <new>
35#include <stdio.h>
36#include <string.h>
37#include "tbb_function_replacement.h"
38
39#include "tbb/tbb_stddef.h"
40#include "../tbb/tbb_assert_impl.h"
41
42// The information about a standard memory allocation function for the replacement log
43struct FunctionInfo {
44 const char* funcName;
45 const char* dllName;
46};
47
48// Namespace that processes and manages the output of records to the Log journal
49// that will be provided to user by TBB_malloc_replacement_log()
50namespace Log {
51 // Value of RECORDS_COUNT is set due to the fact that we maximally
52 // scan 8 modules, and in every module we can swap 6 opcodes. (rounded to 8)
53 static const unsigned RECORDS_COUNT = 8 * 8;
54 static const unsigned RECORD_LENGTH = MAX_PATH;
55
56 // Need to add 1 to count of records, because last record must be always NULL
57 static char *records[RECORDS_COUNT + 1];
58 static bool replacement_status = true;
59
60 // Internal counter that contains number of next string for record
61 static unsigned record_number = 0;
62
63 // Function that writes info about (not)found opcodes to the Log journal
64 // functionInfo - information about a standard memory allocation function for the replacement log
65 // opcodeString - string, that contain byte code of this function
66 // status - information about function replacement status
67 static void record(FunctionInfo functionInfo, const char * opcodeString, bool status) {
68 __TBB_ASSERT(functionInfo.dllName, "Empty DLL name value");
69 __TBB_ASSERT(functionInfo.funcName, "Empty function name value");
70 __TBB_ASSERT(opcodeString, "Empty opcode");
71 __TBB_ASSERT(record_number <= RECORDS_COUNT, "Incorrect record number");
72
73 //If some replacement failed -> set status to false
74 replacement_status &= status;
75
76 // If we reach the end of the log, write this message to the last line
77 if (record_number == RECORDS_COUNT) {
78 // %s - workaround to fix empty variable argument parsing behavior in GCC
79 LOG_PRINT(records[RECORDS_COUNT - 1], RECORD_LENGTH, "%s", "Log was truncated.");
80 return;
81 }
82
83 char* entry = (char*)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, RECORD_LENGTH);
84 __TBB_ASSERT(entry, "Invalid memory was returned");
85
86 LOG_PRINT(entry, RECORD_LENGTH, "%s: %s (%s), byte pattern: <%s>",
87 status ? "Success" : "Fail", functionInfo.funcName, functionInfo.dllName, opcodeString);
88
89 records[record_number++] = entry;
90 }
91};
92
93inline UINT_PTR Ptr2Addrint(LPVOID ptr)
94{
95 Int2Ptr i2p;
96 i2p.lpv = ptr;
97 return i2p.uip;
98}
99
100inline LPVOID Addrint2Ptr(UINT_PTR ptr)
101{
102 Int2Ptr i2p;
103 i2p.uip = ptr;
104 return i2p.lpv;
105}
106
107// Is the distance between addr1 and addr2 smaller than dist
108inline bool IsInDistance(UINT_PTR addr1, UINT_PTR addr2, __int64 dist)
109{
110 __int64 diff = addr1>addr2 ? addr1-addr2 : addr2-addr1;
111 return diff<dist;
112}
113
114/*
115 * When inserting a probe in 64 bits process the distance between the insertion
116 * point and the target may be bigger than 2^32. In this case we are using
117 * indirect jump through memory where the offset to this memory location
118 * is smaller than 2^32 and it contains the absolute address (8 bytes).
119 *
120 * This class is used to hold the pages used for the above trampolines.
121 * Since this utility will be used to replace malloc functions this implementation
122 * doesn't allocate memory dynamically.
123 *
124 * The struct MemoryBuffer holds the data about a page in the memory used for
125 * replacing functions in 64-bit code where the target is too far to be replaced
126 * with a short jump. All the calculations of m_base and m_next are in a multiple
127 * of SIZE_OF_ADDRESS (which is 8 in Win64).
128 */
129class MemoryProvider {
130private:
131 struct MemoryBuffer {
132 UINT_PTR m_base; // base address of the buffer
133 UINT_PTR m_next; // next free location in the buffer
134 DWORD m_size; // size of buffer
135
136 // Default constructor
137 MemoryBuffer() : m_base(0), m_next(0), m_size(0) {}
138
139 // Constructor
140 MemoryBuffer(void *base, DWORD size)
141 {
142 m_base = Ptr2Addrint(base);
143 m_next = m_base;
144 m_size = size;
145 }
146 };
147
148MemoryBuffer *CreateBuffer(UINT_PTR addr)
149 {
150 // No more room in the pages database
151 if (m_lastBuffer - m_pages == MAX_NUM_BUFFERS)
152 return 0;
153
154 void *newAddr = Addrint2Ptr(addr);
155 // Get information for the region which the given address belongs to
156 MEMORY_BASIC_INFORMATION memInfo;
157 if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo))
158 return 0;
159
160 for(;;) {
161 // The new address to check is beyond the current region and aligned to allocation size
162 newAddr = Addrint2Ptr( (Ptr2Addrint(memInfo.BaseAddress) + memInfo.RegionSize + m_allocSize) & ~(UINT_PTR)(m_allocSize-1) );
163
164 // Check that the address is in the right distance.
165 // VirtualAlloc can only round the address down; so it will remain in the right distance
166 if (!IsInDistance(addr, Ptr2Addrint(newAddr), MAX_DISTANCE))
167 break;
168
169 if (VirtualQuery(newAddr, &memInfo, sizeof(memInfo)) != sizeof(memInfo))
170 break;
171
172 if (memInfo.State == MEM_FREE && memInfo.RegionSize >= m_allocSize)
173 {
174 // Found a free region, try to allocate a page in this region
175 void *newPage = VirtualAlloc(newAddr, m_allocSize, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
176 if (!newPage)
177 break;
178
179 // Add the new page to the pages database
180 MemoryBuffer *pBuff = new (m_lastBuffer) MemoryBuffer(newPage, m_allocSize);
181 ++m_lastBuffer;
182 return pBuff;
183 }
184 }
185
186 // Failed to find a buffer in the distance
187 return 0;
188 }
189
190public:
191 MemoryProvider()
192 {
193 SYSTEM_INFO sysInfo;
194 GetSystemInfo(&sysInfo);
195 m_allocSize = sysInfo.dwAllocationGranularity;
196 m_lastBuffer = &m_pages[0];
197 }
198
199 // We can't free the pages in the destructor because the trampolines
200 // are using these memory locations and a replaced function might be called
201 // after the destructor was called.
202 ~MemoryProvider()
203 {
204 }
205
206 // Return a memory location in distance less than 2^31 from input address
207 UINT_PTR GetLocation(UINT_PTR addr)
208 {
209 MemoryBuffer *pBuff = m_pages;
210 for (; pBuff<m_lastBuffer && IsInDistance(pBuff->m_next, addr, MAX_DISTANCE); ++pBuff)
211 {
212 if (pBuff->m_next < pBuff->m_base + pBuff->m_size)
213 {
214 UINT_PTR loc = pBuff->m_next;
215 pBuff->m_next += MAX_PROBE_SIZE;
216 return loc;
217 }
218 }
219
220 pBuff = CreateBuffer(addr);
221 if(!pBuff)
222 return 0;
223
224 UINT_PTR loc = pBuff->m_next;
225 pBuff->m_next += MAX_PROBE_SIZE;
226 return loc;
227 }
228
229private:
230 MemoryBuffer m_pages[MAX_NUM_BUFFERS];
231 MemoryBuffer *m_lastBuffer;
232 DWORD m_allocSize;
233};
234
235static MemoryProvider memProvider;
236
237// Compare opcodes from dictionary (str1) and opcodes from code (str2)
238// str1 might contain '*' to mask addresses
239// RETURN: 0 if opcodes did not match, 1 on success
240size_t compareStrings( const char *str1, const char *str2 )
241{
242 for (size_t i=0; str1[i]!=0; i++){
243 if( str1[i]!='*' && str1[i]!='#' && str1[i]!=str2[i] ) return 0;
244 }
245 return 1;
246}
247
248// Check function prologue with known prologues from the dictionary
249// opcodes - dictionary
250// inpAddr - pointer to function prologue
251// Dictionary contains opcodes for several full asm instructions
252// + one opcode byte for the next asm instruction for safe address processing
253// RETURN: 1 + the index of the matched pattern, or 0 if no match found.
254static UINT CheckOpcodes( const char ** opcodes, void *inpAddr, bool abortOnError, const FunctionInfo* functionInfo = NULL)
255{
256 static size_t opcodesStringsCount = 0;
257 static size_t maxOpcodesLength = 0;
258 static size_t opcodes_pointer = (size_t)opcodes;
259 char opcodeString[2*MAX_PATTERN_SIZE+1];
260 size_t i;
261 size_t result = 0;
262
263 // Get the values for static variables
264 // max length and number of patterns
265 if( !opcodesStringsCount || opcodes_pointer != (size_t)opcodes ){
266 while( *(opcodes + opcodesStringsCount)!= NULL ){
267 if( (i=strlen(*(opcodes + opcodesStringsCount))) > maxOpcodesLength )
268 maxOpcodesLength = i;
269 opcodesStringsCount++;
270 }
271 opcodes_pointer = (size_t)opcodes;
272 __TBB_ASSERT( maxOpcodesLength/2 <= MAX_PATTERN_SIZE, "Pattern exceeded the limit of 28 opcodes/56 symbols" );
273 }
274
275 // Translate prologue opcodes to string format to compare
276 for( i=0; i<maxOpcodesLength/2 && i<MAX_PATTERN_SIZE; ++i ){
277 sprintf( opcodeString + 2*i, "%.2X", *((unsigned char*)inpAddr+i) );
278 }
279 opcodeString[2*i] = 0;
280
281 // Compare translated opcodes with patterns
282 for( UINT idx=0; idx<opcodesStringsCount; ++idx ){
283 result = compareStrings( opcodes[idx],opcodeString );
284 if( result ) {
285 if (functionInfo) {
286 Log::record(*functionInfo, opcodeString, /*status*/ true);
287 }
288 return idx + 1; // avoid 0 which indicates a failure
289 }
290 }
291 if (functionInfo) {
292 Log::record(*functionInfo, opcodeString, /*status*/ false);
293 }
294 if (abortOnError) {
295 // Impossibility to find opcodes in the dictionary is a serious issue,
296 // as if we unable to call original function, leak or crash is expected result.
297 __TBB_ASSERT_RELEASE( false, "CheckOpcodes failed" );
298 }
299 return 0;
300}
301
302// Modify offsets in original code after moving it to a trampoline.
303// We do not have more than one offset to correct in existing opcode patterns.
304static void CorrectOffset( UINT_PTR address, const char* pattern, UINT distance )
305{
306 const char* pos = strstr(pattern, "#*******");
307 if( pos ) {
308 address += (pos - pattern)/2; // compute the offset position
309 UINT value;
310 // UINT assignment is not used to avoid potential alignment issues
311 memcpy(&value, Addrint2Ptr(address), sizeof(value));
312 value += distance;
313 memcpy(Addrint2Ptr(address), &value, sizeof(value));
314 }
315}
316
317// Insert jump relative instruction to the input address
318// RETURN: the size of the trampoline or 0 on failure
319static DWORD InsertTrampoline32(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr)
320{
321 size_t bytesToMove = SIZE_OF_RELJUMP;
322 UINT_PTR srcAddr = Ptr2Addrint(inpAddr);
323 UINT_PTR tgtAddr = Ptr2Addrint(targetAddr);
324 // Check that the target fits in 32 bits
325 if (!IsInDistance(srcAddr, tgtAddr, MAX_DISTANCE))
326 return 0;
327
328 UINT_PTR offset;
329 UINT offset32;
330 UCHAR *codePtr = (UCHAR *)inpAddr;
331
332 if ( storedAddr ){ // If requested, store original function code
333 bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied
334 __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_RELJUMP, "Incorrect bytecode pattern?" );
335 UINT_PTR trampAddr = memProvider.GetLocation(srcAddr);
336 if (!trampAddr)
337 return 0;
338 *storedAddr = Addrint2Ptr(trampAddr);
339 // Set 'executable' flag for original instructions in the new place
340 DWORD pageFlags = PAGE_EXECUTE_READWRITE;
341 if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0;
342 // Copy original instructions to the new place
343 memcpy(*storedAddr, codePtr, bytesToMove);
344 offset = srcAddr - trampAddr;
345 offset32 = (UINT)(offset & 0xFFFFFFFF);
346 CorrectOffset( trampAddr, pattern, offset32 );
347 // Set jump to the code after replacement
348 offset32 -= SIZE_OF_RELJUMP;
349 *(UCHAR*)(trampAddr+bytesToMove) = 0xE9;
350 memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32));
351 }
352
353 // The following will work correctly even if srcAddr>tgtAddr, as long as
354 // address difference is less than 2^31, which is guaranteed by IsInDistance.
355 offset = tgtAddr - srcAddr - SIZE_OF_RELJUMP;
356 offset32 = (UINT)(offset & 0xFFFFFFFF);
357 // Insert the jump to the new code
358 *codePtr = 0xE9;
359 memcpy(codePtr+1, &offset32, sizeof(offset32));
360
361 // Fill the rest with NOPs to correctly see disassembler of old code in debugger.
362 for( unsigned i=SIZE_OF_RELJUMP; i<bytesToMove; i++ ){
363 *(codePtr+i) = 0x90;
364 }
365
366 return SIZE_OF_RELJUMP;
367}
368
369// This function is called when the offset doesn't fit in 32 bits
370// 1 Find and allocate a page in the small distance (<2^31) from input address
371// 2 Put jump RIP relative indirect through the address in the close page
372// 3 Put the absolute address of the target in the allocated location
373// RETURN: the size of the trampoline or 0 on failure
374static DWORD InsertTrampoline64(void *inpAddr, void *targetAddr, const char* pattern, void** storedAddr)
375{
376 size_t bytesToMove = SIZE_OF_INDJUMP;
377
378 UINT_PTR srcAddr = Ptr2Addrint(inpAddr);
379 UINT_PTR tgtAddr = Ptr2Addrint(targetAddr);
380
381 // Get a location close to the source address
382 UINT_PTR location = memProvider.GetLocation(srcAddr);
383 if (!location)
384 return 0;
385
386 UINT_PTR offset;
387 UINT offset32;
388 UCHAR *codePtr = (UCHAR *)inpAddr;
389
390 // Fill the location
391 UINT_PTR *locPtr = (UINT_PTR *)Addrint2Ptr(location);
392 *locPtr = tgtAddr;
393
394 if ( storedAddr ){ // If requested, store original function code
395 bytesToMove = strlen(pattern)/2-1; // The last byte matching the pattern must not be copied
396 __TBB_ASSERT_RELEASE( bytesToMove >= SIZE_OF_INDJUMP, "Incorrect bytecode pattern?" );
397 UINT_PTR trampAddr = memProvider.GetLocation(srcAddr);
398 if (!trampAddr)
399 return 0;
400 *storedAddr = Addrint2Ptr(trampAddr);
401 // Set 'executable' flag for original instructions in the new place
402 DWORD pageFlags = PAGE_EXECUTE_READWRITE;
403 if (!VirtualProtect(*storedAddr, MAX_PROBE_SIZE, pageFlags, &pageFlags)) return 0;
404 // Copy original instructions to the new place
405 memcpy(*storedAddr, codePtr, bytesToMove);
406 offset = srcAddr - trampAddr;
407 offset32 = (UINT)(offset & 0xFFFFFFFF);
408 CorrectOffset( trampAddr, pattern, offset32 );
409 // Set jump to the code after replacement. It is within the distance of relative jump!
410 offset32 -= SIZE_OF_RELJUMP;
411 *(UCHAR*)(trampAddr+bytesToMove) = 0xE9;
412 memcpy((UCHAR*)(trampAddr+bytesToMove+1), &offset32, sizeof(offset32));
413 }
414
415 // Fill the buffer
416 offset = location - srcAddr - SIZE_OF_INDJUMP;
417 offset32 = (UINT)(offset & 0xFFFFFFFF);
418 *(codePtr) = 0xFF;
419 *(codePtr+1) = 0x25;
420 memcpy(codePtr+2, &offset32, sizeof(offset32));
421
422 // Fill the rest with NOPs to correctly see disassembler of old code in debugger.
423 for( unsigned i=SIZE_OF_INDJUMP; i<bytesToMove; i++ ){
424 *(codePtr+i) = 0x90;
425 }
426
427 return SIZE_OF_INDJUMP;
428}
429
430// Insert a jump instruction in the inpAddr to the targetAddr
431// 1. Get the memory protection of the page containing the input address
432// 2. Change the memory protection to writable
433// 3. Call InsertTrampoline32 or InsertTrampoline64
434// 4. Restore memory protection
435// RETURN: FALSE on failure, TRUE on success
436static bool InsertTrampoline(void *inpAddr, void *targetAddr, const char ** opcodes, void** origFunc)
437{
438 DWORD probeSize;
439 // Change page protection to EXECUTE+WRITE
440 DWORD origProt = 0;
441 if (!VirtualProtect(inpAddr, MAX_PROBE_SIZE, PAGE_EXECUTE_WRITECOPY, &origProt))
442 return FALSE;
443
444 const char* pattern = NULL;
445 if ( origFunc ){ // Need to store original function code
446 UCHAR * const codePtr = (UCHAR *)inpAddr;
447 if ( *codePtr == 0xE9 ){ // JMP relative instruction
448 // For the special case when a system function consists of a single near jump,
449 // instead of moving it somewhere we use the target of the jump as the original function.
450 unsigned offsetInJmp = *(unsigned*)(codePtr + 1);
451 *origFunc = (void*)(Ptr2Addrint(inpAddr) + offsetInJmp + SIZE_OF_RELJUMP);
452 origFunc = NULL; // now it must be ignored by InsertTrampoline32/64
453 } else {
454 // find the right opcode pattern
455 UINT opcodeIdx = CheckOpcodes( opcodes, inpAddr, /*abortOnError=*/true );
456 __TBB_ASSERT( opcodeIdx > 0, "abortOnError ignored in CheckOpcodes?" );
457 pattern = opcodes[opcodeIdx-1]; // -1 compensates for +1 in CheckOpcodes
458 }
459 }
460
461 probeSize = InsertTrampoline32(inpAddr, targetAddr, pattern, origFunc);
462 if (!probeSize)
463 probeSize = InsertTrampoline64(inpAddr, targetAddr, pattern, origFunc);
464
465 // Restore original protection
466 VirtualProtect(inpAddr, MAX_PROBE_SIZE, origProt, &origProt);
467
468 if (!probeSize)
469 return FALSE;
470
471 FlushInstructionCache(GetCurrentProcess(), inpAddr, probeSize);
472 FlushInstructionCache(GetCurrentProcess(), origFunc, probeSize);
473
474 return TRUE;
475}
476
477// Routine to replace the functions
478// TODO: replace opcodesNumber with opcodes and opcodes number to check if we replace right code.
479FRR_TYPE ReplaceFunctionA(const char *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc)
480{
481 // Cache the results of the last search for the module
482 // Assume that there was no DLL unload between
483 static char cachedName[MAX_PATH+1];
484 static HMODULE cachedHM = 0;
485
486 if (!dllName || !*dllName)
487 return FRR_NODLL;
488
489 if (!cachedHM || strncmp(dllName, cachedName, MAX_PATH) != 0)
490 {
491 // Find the module handle for the input dll
492 HMODULE hModule = GetModuleHandleA(dllName);
493 if (hModule == 0)
494 {
495 // Couldn't find the module with the input name
496 cachedHM = 0;
497 return FRR_NODLL;
498 }
499
500 cachedHM = hModule;
501 strncpy(cachedName, dllName, MAX_PATH);
502 }
503
504 FARPROC inpFunc = GetProcAddress(cachedHM, funcName);
505 if (inpFunc == 0)
506 {
507 // Function was not found
508 return FRR_NOFUNC;
509 }
510
511 if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){
512 // Failed to insert the trampoline to the target address
513 return FRR_FAILED;
514 }
515
516 return FRR_OK;
517}
518
519FRR_TYPE ReplaceFunctionW(const wchar_t *dllName, const char *funcName, FUNCPTR newFunc, const char ** opcodes, FUNCPTR* origFunc)
520{
521 // Cache the results of the last search for the module
522 // Assume that there was no DLL unload between
523 static wchar_t cachedName[MAX_PATH+1];
524 static HMODULE cachedHM = 0;
525
526 if (!dllName || !*dllName)
527 return FRR_NODLL;
528
529 if (!cachedHM || wcsncmp(dllName, cachedName, MAX_PATH) != 0)
530 {
531 // Find the module handle for the input dll
532 HMODULE hModule = GetModuleHandleW(dllName);
533 if (hModule == 0)
534 {
535 // Couldn't find the module with the input name
536 cachedHM = 0;
537 return FRR_NODLL;
538 }
539
540 cachedHM = hModule;
541 wcsncpy(cachedName, dllName, MAX_PATH);
542 }
543
544 FARPROC inpFunc = GetProcAddress(cachedHM, funcName);
545 if (inpFunc == 0)
546 {
547 // Function was not found
548 return FRR_NOFUNC;
549 }
550
551 if (!InsertTrampoline((void*)inpFunc, (void*)newFunc, opcodes, (void**)origFunc)){
552 // Failed to insert the trampoline to the target address
553 return FRR_FAILED;
554 }
555
556 return FRR_OK;
557}
558
559bool IsPrologueKnown(const char* dllName, const char *funcName, const char **opcodes, HMODULE module)
560{
561 FARPROC inpFunc = GetProcAddress(module, funcName);
562 FunctionInfo functionInfo = { funcName, dllName };
563
564 if (!inpFunc) {
565 Log::record(functionInfo, "unknown", /*status*/ false);
566 return false;
567 }
568
569 return CheckOpcodes( opcodes, (void*)inpFunc, /*abortOnError=*/false, &functionInfo) != 0;
570}
571
572// Public Windows API
573extern "C" __declspec(dllexport) int TBB_malloc_replacement_log(char *** function_replacement_log_ptr)
574{
575 if (function_replacement_log_ptr != NULL) {
576 *function_replacement_log_ptr = Log::records;
577 }
578
579 // If we have no logs -> return false status
580 return Log::replacement_status && Log::records[0] != NULL ? 0 : -1;
581}
582
583#endif /* !__TBB_WIN8UI_SUPPORT && defined(_WIN32) */
584