1//
2// Copyright (c) Microsoft. All rights reserved.
3// Licensed under the MIT license. See LICENSE file in the project root for full license information.
4//
5
6//----------------------------------------------------------
7// nearDiffer.cpp - differ that handles code that is very similar
8//----------------------------------------------------------
9
10#include "standardpch.h"
11
12#ifdef USE_COREDISTOOLS
13#include "coredistools.h"
14#endif // USE_COREDISTOOLS
15
16#include "logging.h"
17#include "neardiffer.h"
18
19#ifdef USE_COREDISTOOLS
20
21//
22// Helper functions to print messages from CoreDisTools Library
23// The file/linenumber information is from this helper itself,
24// since we are only linking with the CoreDisTools library.
25//
26static void LogFromCoreDisToolsHelper(LogLevel level, const char* msg, va_list argList)
27{
28 Logger::LogVprintf(__func__, __FILE__, __LINE__, level, argList, msg);
29}
30
31#define LOGGER(L) \
32 \
33static void __cdecl CorDisToolsLog##L(const char* msg, ...) \
34 \
35{ \
36 va_list argList; \
37 va_start(argList, msg); \
38 LogFromCoreDisToolsHelper(LOGLEVEL_##L, msg, argList); \
39 va_end(argList); \
40 \
41}
42
43LOGGER(VERBOSE)
44LOGGER(ERROR)
45LOGGER(WARNING)
46
47const PrintControl CorPrinter = {CorDisToolsLogERROR, CorDisToolsLogWARNING, CorDisToolsLogVERBOSE,
48 CorDisToolsLogVERBOSE};
49
50#endif // USE_COREDISTOOLS
51
52#ifdef USE_COREDISTOOLS
53NewDiffer_t* g_PtrNewDiffer = nullptr;
54FinishDiff_t* g_PtrFinishDiff = nullptr;
55NearDiffCodeBlocks_t* g_PtrNearDiffCodeBlocks = nullptr;
56DumpDiffBlocks_t* g_PtrDumpDiffBlocks = nullptr;
57#endif // USE_COREDISTOOLS
58
59//
60// The NearDiff Disassembler initialization.
61//
62// Returns true on success, false on failure.
63//
64bool NearDiffer::InitAsmDiff()
65{
66#ifdef USE_COREDISTOOLS
67
68 if (UseCoreDisTools)
69 {
70 const char* coreDisToolsLibrary = MAKEDLLNAME_A("coredistools");
71
72 HMODULE hCoreDisToolsLib = ::LoadLibraryA(coreDisToolsLibrary);
73 if (hCoreDisToolsLib == 0)
74 {
75 LogError("LoadLibrary(%s) failed (0x%08x)", coreDisToolsLibrary, ::GetLastError());
76 return false;
77 }
78 g_PtrNewDiffer = (NewDiffer_t*)::GetProcAddress(hCoreDisToolsLib, "NewDiffer");
79 if (g_PtrNewDiffer == nullptr)
80 {
81 LogError("GetProcAddress 'NewDiffer' failed (0x%08x)", ::GetLastError());
82 return false;
83 }
84 g_PtrFinishDiff = (FinishDiff_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDiff");
85 if (g_PtrFinishDiff == nullptr)
86 {
87 LogError("GetProcAddress 'FinishDiff' failed (0x%08x)", ::GetLastError());
88 return false;
89 }
90 g_PtrNearDiffCodeBlocks = (NearDiffCodeBlocks_t*)::GetProcAddress(hCoreDisToolsLib, "NearDiffCodeBlocks");
91 if (g_PtrNearDiffCodeBlocks == nullptr)
92 {
93 LogError("GetProcAddress 'NearDiffCodeBlocks' failed (0x%08x)", ::GetLastError());
94 return false;
95 }
96 g_PtrDumpDiffBlocks = (DumpDiffBlocks_t*)::GetProcAddress(hCoreDisToolsLib, "DumpDiffBlocks");
97 if (g_PtrDumpDiffBlocks == nullptr)
98 {
99 LogError("GetProcAddress 'DumpDiffBlocks' failed (0x%08x)", ::GetLastError());
100 return false;
101 }
102
103 corAsmDiff = (*g_PtrNewDiffer)(Target_Host, &CorPrinter, NearDiffer::CoreDisCompareOffsetsCallback);
104 }
105#endif // USE_COREDISTOOLS
106
107 return true;
108}
109
110#ifdef USE_COREDISTOOLS
111// static
112bool __cdecl NearDiffer::CoreDisCompareOffsetsCallback(
113 const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2)
114{
115 return compareOffsets(payload, blockOffset, instrLen, offset1, offset2);
116}
117#endif // USE_COREDISTOOLS
118
119//
120// The NearDiff destructor
121//
122NearDiffer::~NearDiffer()
123{
124#ifdef USE_COREDISTOOLS
125 if (corAsmDiff != nullptr)
126 {
127 (*g_PtrFinishDiff)(corAsmDiff);
128 }
129#endif // USE_COREDISTOOLS
130}
131
132// At a high level, the near differ takes in a method context and two compile results, performs
133// some simple fixups, and then compares the main artifacts of the compile result (i.e. generated
134// code, GC info, EH info, debug info, etc.) for equality. In order to be fast, the fixups and
135// definitions of "equality" are minimal; for example, the GC info check just does a simple memcmp.
136//
137// The entrypoint into the near differ is nearDiffer::compare; its doc comments will have more
138// details on what it does. That function in turn fans out to various other components. For asm
139// diffing, the main function of interest will be nearDiffer::compareCodeSection.
140//
141// Most of the diffing logic is architecture-independent, with the following exceptions:
142//
143// - The MSDIS instance must be created with knowledge of the architecture it is working with.
144// - The heuristics to compare different literal operand values has some architecture-specific
145// assumptions.
146// - The code stream is fixed up using relocations recorded during compilation time. The logic
147// for applying these should, in theory, be architecture independent, but depending on how
148// the runtime implements this from platform to platform, there might be subtle differences here.
149//
150
151#ifdef USE_MSVCDIS
152
153DIS* NearDiffer::GetMsVcDis()
154{
155 DIS* disasm;
156
157#ifdef _TARGET_AMD64_
158 if ((TargetArchitecture != nullptr) && (0 == _stricmp(TargetArchitecture, "arm64")))
159 {
160 disasm = DIS::PdisNew(DIS::distArm64);
161 }
162 else
163 {
164 disasm = DIS::PdisNew(DIS::distX8664);
165 }
166#elif defined(_TARGET_X86_)
167 disasm = DIS::PdisNew(DIS::distX86);
168#endif
169
170 return disasm;
171}
172
173#endif // USE_MSVCDIS
174
175//
176// Simple, quick-and-dirty disassembler. If NearDiffer::compareCodeSection finds that two code
177// streams differ, it will call this to dump the two differing code blocks to the log. The dump
178// is logged under the verbose logging level.
179//
180// The output format is in MSDIS's disassembly format.
181//
182// Arguments:
183// block - A pointer to the code block to disassemble.
184// blocksize - The size of the code block to disassemble.
185// originalAddr - The original base address of the code block.
186//
187void NearDiffer::DumpCodeBlock(unsigned char* block, ULONG blocksize, void* originalAddr)
188{
189#ifdef USE_MSVCDIS
190 DIS* disasm = GetMsVcDis();
191 size_t offset = 0;
192 std::string codeBlock;
193
194 while (offset < blocksize)
195 {
196 DIS::INSTRUCTION instr;
197 DIS::OPERAND ops[3];
198
199 size_t instrSize = disasm->CbDisassemble((DIS::ADDR)originalAddr + offset, (void*)(block + offset), 15);
200 if (instrSize == 0)
201 {
202 LogWarning("Zero sized instruction");
203 break;
204 }
205 disasm->FDecode(&instr, ops, 3);
206
207 wchar_t instrMnemonicWide[64]; // I never know how much to allocate...
208 disasm->CchFormatInstr(instrMnemonicWide, 64);
209 char instrMnemonic[128];
210 size_t count;
211 wcstombs_s(&count, instrMnemonic, 128, instrMnemonicWide, 64);
212
213 const size_t minInstrBytes = 7;
214 size_t instrBytes = max(instrSize, minInstrBytes);
215 size_t buffSize = sizeof("%p %s\n") + 10 + count + 3 * instrBytes + 1;
216 char* buff = new char[buffSize];
217 int written = 0;
218 written += sprintf_s(buff, buffSize, "%p ", (void*)((size_t)originalAddr + offset));
219 for (size_t i = 0; i < instrBytes; i++)
220 {
221 if (i < instrSize)
222 {
223 written +=
224 sprintf_s(buff + written, buffSize - written, "%02X ", *(const uint8_t*)(block + offset + i));
225 }
226 else
227 {
228 written += sprintf_s(buff + written, buffSize - written, " ");
229 }
230 }
231 written += sprintf_s(buff + written, buffSize - written, "%s\n", instrMnemonic);
232 codeBlock += buff;
233 delete[] buff;
234 offset += instrSize;
235 }
236 LogVerbose("Code dump:\n%s", codeBlock.c_str());
237 delete disasm;
238#else // !USE_MSVCDIS
239 LogVerbose("No disassembler");
240#endif // !USE_MSVCDIS
241}
242
243//
244// Struct to capture the information required by offset comparator.
245//
246struct DiffData
247{
248 // Common Data
249 CompileResult* cr;
250
251 // Details of the first block
252 size_t blocksize1;
253 size_t datablock1;
254 size_t datablockSize1;
255 size_t originalBlock1;
256 size_t originalDataBlock1;
257 size_t otherCodeBlock1;
258 size_t otherCodeBlockSize1;
259
260 // Details of the second block
261 size_t blocksize2;
262 size_t datablock2;
263 size_t datablockSize2;
264 size_t originalBlock2;
265 size_t originalDataBlock2;
266 size_t otherCodeBlock2;
267 size_t otherCodeBlockSize2;
268};
269
270//
271// NearDiff Offset Comparator.
272// Determine whether two syntactically different constants are
273// semantically equivalent, using certain heuristics.
274//
275bool NearDiffer::compareOffsets(
276 const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2)
277{
278 // The trivial case
279 if (offset1 == offset2)
280 {
281 return true;
282 }
283
284 const DiffData* data = (const DiffData*)payload;
285 size_t ip1 = data->originalBlock1 + blockOffset;
286 size_t ip2 = data->originalBlock2 + blockOffset;
287 size_t ipRelOffset1 = ip1 + instrLen + (size_t)offset1;
288 size_t ipRelOffset2 = ip2 + instrLen + (size_t)offset2;
289
290 // Case where we have a call into flat address -- the most common case.
291 size_t gOffset1 = ipRelOffset1;
292 size_t gOffset2 = ipRelOffset2;
293 if ((DWORD)gOffset1 ==
294 (DWORD)gOffset2) // make sure the lower 32bits match (best we can do in the current replay form)
295 return true;
296
297 // Case where we have an offset into the read only section (e.g. loading a float value)
298 size_t roOffset1a = (size_t)offset1 - data->originalDataBlock1;
299 size_t roOffset2a = (size_t)offset2 - data->originalDataBlock2;
300 if ((roOffset1a == roOffset2a) &&
301 (roOffset1a < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion
302 return true;
303
304 // This case is written to catch IP-relative offsets to the RO data-section
305 // For example:
306 //
307 size_t roOffset1b = ipRelOffset1 - data->originalDataBlock1;
308 size_t roOffset2b = ipRelOffset2 - data->originalDataBlock2;
309 if ((roOffset1b == roOffset2b) &&
310 (roOffset1b < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion
311 return true;
312
313 // Case where we push an address to our own code section.
314 size_t gOffset1a = (size_t)offset1 - data->originalBlock1;
315 size_t gOffset2a = (size_t)offset2 - data->originalBlock2;
316 if ((gOffset1a == gOffset2a) && (gOffset1a < data->blocksize1)) // Confirm its in our code region
317 return true;
318
319 // Case where we push an address in the other codeblock.
320 size_t gOffset1b = (size_t)offset1 - data->otherCodeBlock1;
321 size_t gOffset2b = (size_t)offset2 - data->otherCodeBlock2;
322 if ((gOffset1b == gOffset2b) && (gOffset1b < data->otherCodeBlockSize1)) // Confirm it's in the other code region
323 return true;
324
325 // Case where we have an offset into the hot codeblock from the cold code block (why?)
326 size_t ocOffset1 = ipRelOffset1 - data->otherCodeBlock1;
327 size_t ocOffset2 = ipRelOffset2 - data->otherCodeBlock2;
328 if (ocOffset1 == ocOffset2) // Would be nice to check to see if it fits in the other code block
329 return true;
330
331 // VSD calling case.
332 size_t Offset1 = (ipRelOffset1 - 8);
333 if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1) != (DWORD)-1)
334 {
335 // This logging is too noisy, so disable it.
336 // LogVerbose("Found VSD callsite, did softer compare than ideal");
337 return true;
338 }
339
340 // x86 VSD calling cases.
341 size_t Offset1b = (size_t)offset1 - 4;
342 size_t Offset2b = (size_t)offset2;
343 if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1b) != (DWORD)-1)
344 {
345 // This logging is too noisy, so disable it.
346 // LogVerbose("Found VSD callsite, did softer compare than ideal");
347 return true;
348 }
349 if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset2b) != (DWORD)-1)
350 {
351 // This logging is too noisy, so disable it.
352 // LogVerbose("Found VSD callsite, did softer compare than ideal");
353 return true;
354 }
355
356 // Case might be a field address that we handed out to handle inlined values being loaded into
357 // a register as an immediate value (and where the address is encoded as an indirect immediate load)
358 size_t realTargetAddr = (size_t)data->cr->searchAddressMap((void*)gOffset2);
359 if (realTargetAddr == gOffset1)
360 return true;
361
362 // Case might be a field address that we handed out to handle inlined values being loaded into
363 // a register as an immediate value (and where the address is encoded and loaded by immediate into a register)
364 realTargetAddr = (size_t)data->cr->searchAddressMap((void*)offset2);
365 if (realTargetAddr == offset1)
366 return true;
367 if (realTargetAddr == 0x424242) // this offset matches what we got back from a getTailCallCopyArgsThunk
368 return true;
369
370 realTargetAddr = (size_t)data->cr->searchAddressMap((void*)(gOffset2));
371 if (realTargetAddr != -1) // we know this was passed out as a bbloc
372 return true;
373
374 return false;
375}
376
377//
378// Compares two code sections for syntactic equality. This is the core of the asm diffing logic.
379//
380// This mostly relies on MSDIS's decoded representation of an instruction to compare for equality.
381// That is, using MSDIS's internal IR, this goes through the code stream and compares, instruction
382// by instruction, op code and operand values for equality.
383//
384// Obviously, just blindly comparing operand values will raise a lot of false alarms. In order to
385// compensate for phenomena like literal pointer addresses in the code stream changing, this applies
386// some heuristics on mismatching operand values to try to normalize them a little bit. Essentially,
387// if operand values don't match, they are re-interpreted as various relative deltas from known base
388// addresses. For example, a common case is a pointer into the read-only data section. One of the
389// heuristics subtracts both operand values from the base address of the read-only data section and
390// checks to see if they are the same distance away from their respective read-only base addresses.
391//
392// Notes:
393// - The core syntactic comparison is platform agnostic; we compare op codes and operand values
394// using MSDIS's architecture-independent IR (i.e. the data structures defined in msvcdis.h).
395// Only the disassembler instance itself is initialized differently based on the target arch-
396// itecture.
397// - That being said, the heuristics themselves are not guaranteed to be platform agnostic. For
398// instance, there is a case that applies only to x86 VSD calls. When porting the near differ
399// to new platforms, these special cases should be examined and ported with care.
400//
401// Arguments:
402// mc - The method context of the method to diff. Unused.
403// cr1 - The first compile result to compare. Unused.
404// cr2 - The second compile result to compare. Unused.
405// block1 - A pointer to the first code block to disassemble.
406// blocksize1 - The size of the first code block to compare.
407// datablock1 - A pointer to the first read-only data block to compare. Unused.
408// datablockSize1 - The size of the first read-only data block to compare.
409// originalBlock1 - The original base address of the first code block.
410// originalDataBlock1 - The original base address of the first read-only data block.
411// otherCodeBlock1 - The original base address of the first cold code block. Note that this is
412// just an address; we don't need the cold code buffer.
413// otherCodeBlockSize1- The size of the first cold code block.
414// block2 - A pointer to the second code block to disassemble.
415// blocksize2 - The size of the second code block to compare.
416// datablock2 - A pointer to the second read-only data block to compare.
417// datablockSize2 - The size of the second read-only data block to compare.
418// originalBlock2 - The original base address of the second code block.
419// originalDataBlock2 - The original base address of the second read-only data block.
420// otherCodeBlock2 - The original base address of the second cold code block. Note that this is
421// just an address; we don't need the cold code buffer.
422// otherCodeBlockSize2- The size of the second cold code block.
423//
424// Return Value:
425// True if the code sections are syntactically identical; false otherwise.
426//
427
428bool NearDiffer::compareCodeSection(MethodContext* mc,
429 CompileResult* cr1,
430 CompileResult* cr2,
431 unsigned char* block1,
432 ULONG blocksize1,
433 unsigned char* datablock1,
434 ULONG datablockSize1,
435 void* originalBlock1,
436 void* originalDataBlock1,
437 void* otherCodeBlock1,
438 ULONG otherCodeBlockSize1,
439 unsigned char* block2,
440 ULONG blocksize2,
441 unsigned char* datablock2,
442 ULONG datablockSize2,
443 void* originalBlock2,
444 void* originalDataBlock2,
445 void* otherCodeBlock2,
446 ULONG otherCodeBlockSize2)
447{
448 DiffData data = {cr2,
449
450 // Details of the first block
451 (size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
452 (size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,
453
454 // Details of the second block
455 (size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
456 (size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};
457
458#ifdef USE_COREDISTOOLS
459 if (UseCoreDisTools)
460 {
461 bool areSame = (*g_PtrNearDiffCodeBlocks)(corAsmDiff, &data, (const uint8_t*)originalBlock1, block1, blocksize1,
462 (const uint8_t*)originalBlock2, block2, blocksize2);
463
464 if (!areSame)
465 {
466 (*g_PtrDumpDiffBlocks)(corAsmDiff, (const uint8_t*)originalBlock1, block1, blocksize1,
467 (const uint8_t*)originalBlock2, block2, blocksize2);
468 }
469
470 return areSame;
471 }
472#endif // USE_COREDISTOOLS
473
474#ifdef USE_MSVCDIS
475 bool haveSeenRet = false;
476 DIS* disasm_1 = GetMsVcDis();
477 DIS* disasm_2 = GetMsVcDis();
478
479 size_t offset = 0;
480
481 if (blocksize1 != blocksize2)
482 {
483 LogVerbose("Code sizes don't match %u != %u", blocksize1, blocksize2);
484 goto DumpDetails;
485 }
486
487 while (offset < blocksize1)
488 {
489 DIS::INSTRUCTION instr_1;
490 DIS::INSTRUCTION instr_2;
491 const int MaxOperandCount = 5;
492 DIS::OPERAND ops_1[MaxOperandCount];
493 DIS::OPERAND ops_2[MaxOperandCount];
494
495 // Zero out the locals, just in case.
496 memset(&instr_1, 0, sizeof(instr_1));
497 memset(&instr_2, 0, sizeof(instr_2));
498 memset(&ops_1, 0, sizeof(ops_1));
499 memset(&ops_2, 0, sizeof(ops_2));
500
501 size_t instrSize_1 = disasm_1->CbDisassemble((DIS::ADDR)originalBlock1 + offset, (void*)(block1 + offset), 15);
502 size_t instrSize_2 = disasm_2->CbDisassemble((DIS::ADDR)originalBlock2 + offset, (void*)(block2 + offset), 15);
503
504 if (instrSize_1 != instrSize_2)
505 {
506 LogVerbose("Different instruction sizes %llu %llu", instrSize_1, instrSize_2);
507 goto DumpDetails;
508 }
509 if (instrSize_1 == 0)
510 {
511 if (haveSeenRet)
512 {
513 // This logging is pretty noisy, so disable it.
514 // LogVerbose("instruction size of zero after seeing a ret (soft issue?).");
515 break;
516 }
517 LogWarning("instruction size of zero.");
518 goto DumpDetails;
519 }
520
521 bool FDecodeError = false;
522 if (!disasm_1->FDecode(&instr_1, ops_1, MaxOperandCount))
523 {
524 LogWarning("FDecode of instr_1 returned false.");
525 FDecodeError = true;
526 }
527 if (!disasm_2->FDecode(&instr_2, ops_2, MaxOperandCount))
528 {
529 LogWarning("FDecode of instr_2 returned false.");
530 FDecodeError = true;
531 }
532
533 wchar_t instrMnemonic_1[64]; // I never know how much to allocate...
534 disasm_1->CchFormatInstr(instrMnemonic_1, 64);
535 wchar_t instrMnemonic_2[64]; // I never know how much to allocate...
536 disasm_2->CchFormatInstr(instrMnemonic_2, 64);
537 if (wcscmp(instrMnemonic_1, L"ret") == 0)
538 haveSeenRet = true;
539 if (wcscmp(instrMnemonic_1, L"rep ret") == 0)
540 haveSeenRet = true;
541
542 // First, check to see if these instructions are actually identical.
543 // This is done 1) to avoid the detailed comparison of the fields of instr_1
544 // and instr_2 if they are identical, and 2) because in the event that
545 // there are bugs or unimplemented instructions in FDecode, we don't want
546 // to count them as diffs if they are bitwise identical.
547
548 if (memcmp((block1 + offset), (block2 + offset), instrSize_1) != 0)
549 {
550 if (FDecodeError)
551 {
552 LogWarning("FDecode returned false.");
553 goto DumpDetails;
554 }
555
556 if (instr_1.opa != instr_2.opa)
557 {
558 LogVerbose("different opa %d %d", instr_1.opa, instr_2.opa);
559 goto DumpDetails;
560 }
561 if (instr_1.coperand != instr_2.coperand)
562 {
563 LogVerbose("different coperand %u %u", (unsigned int)instr_1.coperand, (unsigned int)instr_2.coperand);
564 goto DumpDetails;
565 }
566 if (instr_1.dwModifiers != instr_2.dwModifiers)
567 {
568 LogVerbose("different dwModifiers %u %u", instr_1.dwModifiers, instr_2.dwModifiers);
569 goto DumpDetails;
570 }
571
572 for (size_t i = 0; i < instr_1.coperand; i++)
573 {
574 if (ops_1[i].cb != ops_2[i].cb)
575 {
576 LogVerbose("different cb %llu %llu", ops_1[i].cb, ops_2[i].cb);
577 goto DumpDetails;
578 }
579 if (ops_1[i].imcls != ops_2[i].imcls)
580 {
581 LogVerbose("different imcls %d %d", ops_1[i].imcls, ops_2[i].imcls);
582 goto DumpDetails;
583 }
584 if (ops_1[i].opcls != ops_2[i].opcls)
585 {
586 LogVerbose("different opcls %d %d", ops_1[i].opcls, ops_2[i].opcls);
587 goto DumpDetails;
588 }
589 if (ops_1[i].rega1 != ops_2[i].rega1)
590 {
591 LogVerbose("different rega1 %d %d", ops_1[i].rega1, ops_2[i].rega1);
592 goto DumpDetails;
593 }
594 if (ops_1[i].rega2 != ops_2[i].rega2)
595 {
596 LogVerbose("different rega2 %d %d", ops_1[i].rega2, ops_2[i].rega2);
597 goto DumpDetails;
598 }
599 if (ops_1[i].rega3 != ops_2[i].rega3)
600 {
601 LogVerbose("different rega3 %d %d", ops_1[i].rega3, ops_2[i].rega3);
602 goto DumpDetails;
603 }
604 if (ops_1[i].wScale != ops_2[i].wScale)
605 {
606 LogVerbose("different wScale %u %u", ops_1[i].wScale, ops_2[i].wScale);
607 goto DumpDetails;
608 }
609
610 //
611 // These are special.. we can often reason out exactly why these values
612 // are different using heuristics.
613 //
614 // Why is Instruction size passed as zero?
615 // Ans: Because the implementation of areOffsetsEquivalent() uses
616 // the instruction size to compute absolute offsets in the case of
617 // PC-relative addressing, and MSVCDis already reports the
618 // absolute offsets! For example:
619 // 0F 2E 05 67 00 9A FD ucomiss xmm0, dword ptr[FFFFFFFFFD9A006Eh]
620 //
621
622 if (compareOffsets(&data, offset, 0, ops_1[i].dwl, ops_2[i].dwl))
623 {
624 continue;
625 }
626 else
627 {
628 size_t gOffset1 = (size_t)originalBlock1 + offset + (size_t)ops_1[i].dwl;
629 size_t gOffset2 = (size_t)originalBlock2 + offset + (size_t)ops_2[i].dwl;
630
631 LogVerbose("operand %d dwl is different", i);
632#ifdef _TARGET_AMD64_
633 LogVerbose("gOffset1 %016llX", gOffset1);
634 LogVerbose("gOffset2 %016llX", gOffset2);
635 LogVerbose("gOffset1 - gOffset2 %016llX", gOffset1 - gOffset2);
636#elif defined(_TARGET_X86_)
637 LogVerbose("gOffset1 %08X", gOffset1);
638 LogVerbose("gOffset2 %08X", gOffset2);
639 LogVerbose("gOffset1 - gOffset2 %08X", gOffset1 - gOffset2);
640#endif
641 LogVerbose("dwl1 %016llX", ops_1[i].dwl);
642 LogVerbose("dwl2 %016llX", ops_2[i].dwl);
643 goto DumpDetails;
644 }
645 }
646 }
647 offset += instrSize_1;
648 }
649 delete disasm_1;
650 delete disasm_2;
651 return true;
652
653DumpDetails:
654 LogVerbose("block1 %p", block1);
655 LogVerbose("block2 %p", block2);
656 LogVerbose("originalBlock1 [%p,%p)", originalBlock1, (const uint8_t*)originalBlock1 + blocksize1);
657 LogVerbose("originalBlock2 [%p,%p)", originalBlock2, (const uint8_t*)originalBlock2 + blocksize2);
658 LogVerbose("blocksize1 %08X", blocksize1);
659 LogVerbose("blocksize2 %08X", blocksize2);
660 LogVerbose("dataBlock1 [%p,%p)", originalDataBlock1, (const uint8_t*)originalDataBlock1 + datablockSize1);
661 LogVerbose("dataBlock2 [%p,%p)", originalDataBlock2, (const uint8_t*)originalDataBlock2 + datablockSize2);
662 LogVerbose("datablockSize1 %08X", datablockSize1);
663 LogVerbose("datablockSize2 %08X", datablockSize2);
664 LogVerbose("otherCodeBlock1 [%p,%p)", otherCodeBlock1, (const uint8_t*)otherCodeBlock1 + otherCodeBlockSize1);
665 LogVerbose("otherCodeBlock2 [%p,%p)", otherCodeBlock2, (const uint8_t*)otherCodeBlock2 + otherCodeBlockSize2);
666 LogVerbose("otherCodeBlockSize1 %08X", otherCodeBlockSize1);
667 LogVerbose("otherCodeBlockSize2 %08X", otherCodeBlockSize2);
668
669#ifdef _TARGET_AMD64_
670 LogVerbose("offset %016llX", offset);
671 LogVerbose("addr1 %016llX", (size_t)originalBlock1 + offset);
672 LogVerbose("addr2 %016llX", (size_t)originalBlock2 + offset);
673#elif defined(_TARGET_X86_)
674 LogVerbose("offset %08X", offset);
675 LogVerbose("addr1 %08X", (size_t)originalBlock1 + offset);
676 LogVerbose("addr2 %08X", (size_t)originalBlock2 + offset);
677#endif
678
679 LogVerbose("Block1:");
680 DumpCodeBlock(block1, blocksize1, originalBlock1);
681 LogVerbose("Block2:");
682 DumpCodeBlock(block2, blocksize2, originalBlock2);
683
684 if (disasm_1 != nullptr)
685 delete disasm_1;
686 if (disasm_2 != nullptr)
687 delete disasm_2;
688 return false;
689#else // !USE_MSVCDIS
690 return false; // No disassembler; assume there are differences
691#endif // !USE_MSVCDIS
692}
693
694//
695// Compares two read-only data sections for equality.
696//
697// Arguments:
698// mc - The method context of the method to diff.
699// cr1 - The first compile result to compare.
700// cr2 - The second compile result to compare.
701// block1 - A pointer to the first code block to disassemble.
702// blocksize1 - The size of the first code block to compare.
703// originalDataBlock1 - The original base address of the first read-only data block.
704// block2 - A pointer to the second code block to disassemble.
705// blocksize2 - The size of the second code block to compare.
706// originalDataBlock2 - The original base address of the second read-only data block.
707//
708// Return Value:
709// True if the read-only data sections are identical; false otherwise.
710//
711bool NearDiffer::compareReadOnlyDataBlock(MethodContext* mc,
712 CompileResult* cr1,
713 CompileResult* cr2,
714 unsigned char* block1,
715 ULONG blocksize1,
716 void* originalDataBlock1,
717 unsigned char* block2,
718 ULONG blocksize2,
719 void* originalDataBlock2)
720{
721 // no rodata
722 if (blocksize1 == 0 && blocksize2 == 0)
723 return true;
724
725 if (blocksize1 != blocksize2)
726 {
727 LogVerbose("compareReadOnlyDataBlock found non-matching sizes %u %u", blocksize1, blocksize2);
728 return false;
729 }
730
731 // TODO-Cleanup: The values on the datablock seem to wobble. Need further investigation to evaluate a good near
732 // comparison for these
733 return true;
734}
735
736//
737// Compares two EH info blocks for equality.
738//
739// Arguments:
740// mc - The method context of the method to diff.
741// cr1 - The first compile result to compare.
742// cr2 - The second compile result to compare.
743//
744// Return Value:
745// True if the EH info blocks are identical; false otherwise.
746//
747bool NearDiffer::compareEHInfo(MethodContext* mc, CompileResult* cr1, CompileResult* cr2)
748{
749 ULONG cEHSize_1;
750 ULONG ehFlags_1;
751 ULONG tryOffset_1;
752 ULONG tryLength_1;
753 ULONG handlerOffset_1;
754 ULONG handlerLength_1;
755 ULONG classToken_1;
756
757 ULONG cEHSize_2;
758 ULONG ehFlags_2;
759 ULONG tryOffset_2;
760 ULONG tryLength_2;
761 ULONG handlerOffset_2;
762 ULONG handlerLength_2;
763 ULONG classToken_2;
764
765 cEHSize_1 = cr1->repSetEHcount();
766 cEHSize_2 = cr2->repSetEHcount();
767
768 // no exception
769 if (cEHSize_1 == 0 && cEHSize_2 == 0)
770 return true;
771
772 if (cEHSize_1 != cEHSize_2)
773 {
774 LogVerbose("compareEHInfo found non-matching sizes %u %u", cEHSize_1, cEHSize_2);
775 return false;
776 }
777
778 for (unsigned int i = 0; i < cEHSize_1; i++)
779 {
780 cr1->repSetEHinfo(i, &ehFlags_1, &tryOffset_1, &tryLength_1, &handlerOffset_1, &handlerLength_1, &classToken_1);
781 cr2->repSetEHinfo(i, &ehFlags_2, &tryOffset_2, &tryLength_2, &handlerOffset_2, &handlerLength_2, &classToken_2);
782 if (ehFlags_1 != ehFlags_2)
783 {
784 LogVerbose("EH flags don't match %u != %u", ehFlags_1, ehFlags_2);
785 return false;
786 }
787 if ((tryOffset_1 != tryOffset_2) || (tryLength_1 != tryLength_2))
788 {
789 LogVerbose("EH try information don't match, offset: %u %u, length: %u %u", tryOffset_1, tryOffset_2,
790 tryLength_1, tryLength_2);
791 return false;
792 }
793 if ((handlerOffset_1 != handlerOffset_2) || (handlerLength_1 != handlerLength_2))
794 {
795 LogVerbose("EH handler information don't match, offset: %u %u, length: %u %u", handlerOffset_1,
796 handlerOffset_2, handlerLength_1, handlerLength_2);
797 return false;
798 }
799 if (classToken_1 != classToken_2)
800 {
801 LogVerbose("EH class tokens don't match %u!=%u", classToken_1, classToken_2);
802 return false;
803 }
804 }
805
806 return true;
807}
808
809//
810// Compares two GC info blocks for equality.
811//
812// Arguments:
813// mc - The method context of the method to diff.
814// cr1 - The first compile result to compare.
815// cr2 - The second compile result to compare.
816//
817// Return Value:
818// True if the GC info blocks are identical; false otherwise.
819//
820bool NearDiffer::compareGCInfo(MethodContext* mc, CompileResult* cr1, CompileResult* cr2)
821{
822 void* gcInfo1;
823 size_t gcInfo1Size;
824 void* gcInfo2;
825 size_t gcInfo2Size;
826
827 cr1->repAllocGCInfo(&gcInfo1Size, &gcInfo1);
828 cr2->repAllocGCInfo(&gcInfo2Size, &gcInfo2);
829
830 if (gcInfo1Size != gcInfo2Size)
831 {
832 LogVerbose("Reported GCInfo sizes don't match: %u != %u", (unsigned int)gcInfo1Size, (unsigned int)gcInfo2Size);
833 return false;
834 }
835
836 if (memcmp(gcInfo1, gcInfo2, gcInfo1Size) != 0)
837 {
838 LogVerbose("GCInfo doesn't match.");
839 return false;
840 }
841
842 return true;
843}
844
845//
846// Compares two sets of native var info for equality.
847//
848// Arguments:
849// mc - The method context of the method to diff.
850// cr1 - The first compile result to compare.
851// cr2 - The second compile result to compare.
852//
853// Return Value:
854// True if the native var info is identical; false otherwise.
855//
856bool NearDiffer::compareVars(MethodContext* mc, CompileResult* cr1, CompileResult* cr2)
857{
858 CORINFO_METHOD_HANDLE ftn_1;
859 ULONG32 cVars_1;
860 ICorDebugInfo::NativeVarInfo* vars_1;
861
862 CORINFO_METHOD_HANDLE ftn_2;
863 ULONG32 cVars_2;
864 ICorDebugInfo::NativeVarInfo* vars_2;
865
866 CORINFO_METHOD_INFO info;
867 unsigned flags = 0;
868 mc->repCompileMethod(&info, &flags);
869
870 bool set1 = cr1->repSetVars(&ftn_1, &cVars_1, &vars_1);
871 bool set2 = cr2->repSetVars(&ftn_2, &cVars_2, &vars_2);
872 if ((set1 == false) && (set2 == false))
873 return true; // we don't have boundaries for either of these.
874 if (((set1 == true) && (set2 == false)) || ((set1 == false) && (set2 == true)))
875 {
876 LogVerbose("missing matching vars sets");
877 return false;
878 }
879
880 // no vars
881 if (cVars_1 == 0 && cVars_2 == 0)
882 {
883 return true;
884 }
885
886 if (ftn_1 != ftn_2)
887 {
888 // We would like to find out this situation
889 __debugbreak();
890 LogVerbose("compareVars found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2);
891 return false;
892 }
893 if (ftn_1 != info.ftn)
894 {
895 LogVerbose("compareVars found issues with the CORINFO_METHOD_HANDLE %p %p", ftn_1, info.ftn);
896 return false;
897 }
898
899 if (cVars_1 != cVars_2)
900 {
901 LogVerbose("compareVars found non-matching var count %u %u", cVars_1, cVars_2);
902 return false;
903 }
904
905 // TODO-Cleanup: The values on the NativeVarInfo array seem to wobble. Need further investigation to evaluate a good
906 // near comparison for these for(unsigned int i=0;i<cVars_1;i++)
907 //{
908 // if(vars_1[i].startOffset!=vars_2[i].startOffset)
909 // {
910 // LogVerbose("compareVars found non-matching startOffsets %u %u for var: %u", vars_1[i].startOffset,
911 // vars_2[i].startOffset, i); return false;
912 // }
913 //}
914
915 return true;
916}
917
918//
919// Compares two sets of native offset mappings for equality.
920//
921// Arguments:
922// mc - The method context of the method to diff.
923// cr1 - The first compile result to compare.
924// cr2 - The second compile result to compare.
925//
926// Return Value:
927// True if the native offset mappings are identical; false otherwise.
928//
929bool NearDiffer::compareBoundaries(MethodContext* mc, CompileResult* cr1, CompileResult* cr2)
930{
931 CORINFO_METHOD_HANDLE ftn_1;
932 ULONG32 cMap_1;
933 ICorDebugInfo::OffsetMapping* map_1;
934
935 CORINFO_METHOD_HANDLE ftn_2;
936 ULONG32 cMap_2;
937 ICorDebugInfo::OffsetMapping* map_2;
938
939 CORINFO_METHOD_INFO info;
940 unsigned flags = 0;
941 mc->repCompileMethod(&info, &flags);
942
943 bool set1 = cr1->repSetBoundaries(&ftn_1, &cMap_1, &map_1);
944 bool set2 = cr2->repSetBoundaries(&ftn_2, &cMap_2, &map_2);
945 if ((set1 == false) && (set2 == false))
946 return true; // we don't have boundaries for either of these.
947 if (((set1 == true) && (set2 == false)) || ((set1 == false) && (set2 == true)))
948 {
949 LogVerbose("missing matching boundary sets");
950 return false;
951 }
952
953 if (ftn_1 != ftn_2)
954 {
955 LogVerbose("compareBoundaries found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2);
956 return false;
957 }
958
959 // no maps
960 if (cMap_1 == 0 && cMap_2 == 0)
961 return true;
962
963 if (cMap_1 != cMap_2)
964 {
965 LogVerbose("compareBoundaries found non-matching var count %u %u", cMap_1, cMap_2);
966 return false;
967 }
968
969 for (unsigned int i = 0; i < cMap_1; i++)
970 {
971 if (map_1[i].ilOffset != map_2[i].ilOffset)
972 {
973 LogVerbose("compareBoundaries found non-matching ilOffset %u %u for map: %u", map_1[i].ilOffset,
974 map_2[i].ilOffset, i);
975 return false;
976 }
977 if (map_1[i].nativeOffset != map_2[i].nativeOffset)
978 {
979 LogVerbose("compareBoundaries found non-matching nativeOffset %u %u for map: %u", map_1[i].nativeOffset,
980 map_2[i].nativeOffset, i);
981 return false;
982 }
983 if (map_1[i].source != map_2[i].source)
984 {
985 LogVerbose("compareBoundaries found non-matching source %u %u for map: %u", (unsigned int)map_1[i].source,
986 (unsigned int)map_2[i].source, i);
987 return false;
988 }
989 }
990
991 return true;
992}
993
994//
995// Compares two compiled versions of a method for equality. This is the main driver for the various
996// components of near diffing.
997//
998// Before starting the diffing process, this applies some fixups to the code stream based on relocations
999// recorded during compilation, using the original base address that was used when compiling the method.
1000//
1001// Arguments:
1002// mc - The method context of the method to diff.
1003// cr1 - The first compile result to compare.
1004// cr2 - The second compile result to compare.
1005//
1006// Return Value:
1007// True if the compile results are identical; false otherwise.
1008//
1009bool NearDiffer::compare(MethodContext* mc, CompileResult* cr1, CompileResult* cr2)
1010{
1011 ULONG hotCodeSize_1;
1012 ULONG coldCodeSize_1;
1013 ULONG roDataSize_1;
1014 ULONG xcptnsCount_1;
1015 CorJitAllocMemFlag flag_1;
1016 unsigned char* hotCodeBlock_1;
1017 unsigned char* coldCodeBlock_1;
1018 unsigned char* roDataBlock_1;
1019 void* orig_hotCodeBlock_1;
1020 void* orig_coldCodeBlock_1;
1021 void* orig_roDataBlock_1;
1022
1023 ULONG hotCodeSize_2;
1024 ULONG coldCodeSize_2;
1025 ULONG roDataSize_2;
1026 ULONG xcptnsCount_2;
1027 CorJitAllocMemFlag flag_2;
1028 unsigned char* hotCodeBlock_2;
1029 unsigned char* coldCodeBlock_2;
1030 unsigned char* roDataBlock_2;
1031 void* orig_hotCodeBlock_2;
1032 void* orig_coldCodeBlock_2;
1033 void* orig_roDataBlock_2;
1034
1035 cr1->repAllocMem(&hotCodeSize_1, &coldCodeSize_1, &roDataSize_1, &xcptnsCount_1, &flag_1, &hotCodeBlock_1,
1036 &coldCodeBlock_1, &roDataBlock_1, &orig_hotCodeBlock_1, &orig_coldCodeBlock_1,
1037 &orig_roDataBlock_1);
1038 cr2->repAllocMem(&hotCodeSize_2, &coldCodeSize_2, &roDataSize_2, &xcptnsCount_2, &flag_2, &hotCodeBlock_2,
1039 &coldCodeBlock_2, &roDataBlock_2, &orig_hotCodeBlock_2, &orig_coldCodeBlock_2,
1040 &orig_roDataBlock_2);
1041
1042 LogDebug("HCS1 %d CCS1 %d RDS1 %d xcpnt1 %d flag1 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p", hotCodeSize_1,
1043 coldCodeSize_1, roDataSize_1, xcptnsCount_1, flag_1, hotCodeBlock_1, coldCodeBlock_1, roDataBlock_1,
1044 orig_hotCodeBlock_1, orig_coldCodeBlock_1, orig_roDataBlock_1);
1045 LogDebug("HCS2 %d CCS2 %d RDS2 %d xcpnt2 %d flag2 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p", hotCodeSize_2,
1046 coldCodeSize_2, roDataSize_2, xcptnsCount_2, flag_2, hotCodeBlock_2, coldCodeBlock_2, roDataBlock_2,
1047 orig_hotCodeBlock_2, orig_coldCodeBlock_2, orig_roDataBlock_2);
1048
1049 cr1->applyRelocs(hotCodeBlock_1, hotCodeSize_1, orig_hotCodeBlock_1);
1050 cr2->applyRelocs(hotCodeBlock_2, hotCodeSize_2, orig_hotCodeBlock_2);
1051 cr1->applyRelocs(coldCodeBlock_1, coldCodeSize_1, orig_coldCodeBlock_1);
1052 cr2->applyRelocs(coldCodeBlock_2, coldCodeSize_2, orig_coldCodeBlock_2);
1053 cr1->applyRelocs(roDataBlock_1, roDataSize_1, orig_roDataBlock_1);
1054 cr2->applyRelocs(roDataBlock_2, roDataSize_2, orig_roDataBlock_2);
1055
1056 if (!compareCodeSection(mc, cr1, cr2, hotCodeBlock_1, hotCodeSize_1, roDataBlock_1, roDataSize_1,
1057 orig_hotCodeBlock_1, orig_roDataBlock_1, orig_coldCodeBlock_1, coldCodeSize_1,
1058 hotCodeBlock_2, hotCodeSize_2, roDataBlock_2, roDataSize_2, orig_hotCodeBlock_2,
1059 orig_roDataBlock_2, orig_coldCodeBlock_2, coldCodeSize_2))
1060 return false;
1061
1062 if (!compareCodeSection(mc, cr1, cr2, coldCodeBlock_1, coldCodeSize_1, roDataBlock_1, roDataSize_1,
1063 orig_coldCodeBlock_1, orig_roDataBlock_1, orig_hotCodeBlock_1, hotCodeSize_1,
1064 coldCodeBlock_2, coldCodeSize_2, roDataBlock_2, roDataSize_2, orig_coldCodeBlock_2,
1065 orig_roDataBlock_2, orig_hotCodeBlock_2, hotCodeSize_2))
1066 return false;
1067
1068 if (!compareReadOnlyDataBlock(mc, cr1, cr2, roDataBlock_1, roDataSize_1, orig_roDataBlock_1, roDataBlock_2,
1069 roDataSize_2, orig_roDataBlock_2))
1070 return false;
1071
1072 if (!compareEHInfo(mc, cr1, cr2))
1073 return false;
1074
1075 if (!compareGCInfo(mc, cr1, cr2))
1076 return false;
1077
1078 if (!compareVars(mc, cr1, cr2))
1079 return false;
1080
1081 if (!compareBoundaries(mc, cr1, cr2))
1082 return false;
1083
1084 return true;
1085}
1086