1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Arm64 Code Generator XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13#include "jitpch.h"
14#ifdef _MSC_VER
15#pragma hdrstop
16#endif
17
18#ifdef _TARGET_ARM64_
19#include "emit.h"
20#include "codegen.h"
21#include "lower.h"
22#include "gcinfo.h"
23#include "gcinfoencoder.h"
24
25/*
26XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
27XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
28XX XX
29XX Prolog / Epilog XX
30XX XX
31XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
32XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
33*/
34
35//------------------------------------------------------------------------
36// genInstrWithConstant: we will typically generate one instruction
37//
38// ins reg1, reg2, imm
39//
40// However the imm might not fit as a directly encodable immediate,
41// when it doesn't fit we generate extra instruction(s) that sets up
42// the 'regTmp' with the proper immediate value.
43//
44// mov regTmp, imm
45// ins reg1, reg2, regTmp
46//
47// Arguments:
48// ins - instruction
49// attr - operation size and GC attribute
50// reg1, reg2 - first and second register operands
51// imm - immediate value (third operand when it fits)
52// tmpReg - temp register to use when the 'imm' doesn't fit
53// inUnwindRegion - true if we are in a prolog/epilog region with unwind codes
54//
55// Return Value:
56// returns true if the immediate was too large and tmpReg was used and modified.
57//
58bool CodeGen::genInstrWithConstant(instruction ins,
59 emitAttr attr,
60 regNumber reg1,
61 regNumber reg2,
62 ssize_t imm,
63 regNumber tmpReg,
64 bool inUnwindRegion /* = false */)
65{
66 bool immFitsInIns = false;
67 emitAttr size = EA_SIZE(attr);
68
69 // reg1 is usually a dest register
70 // reg2 is always source register
71 assert(tmpReg != reg2); // regTmp can not match any source register
72
73 switch (ins)
74 {
75 case INS_add:
76 case INS_sub:
77 if (imm < 0)
78 {
79 imm = -imm;
80 ins = (ins == INS_add) ? INS_sub : INS_add;
81 }
82 immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size);
83 break;
84
85 case INS_strb:
86 case INS_strh:
87 case INS_str:
88 // reg1 is a source register for store instructions
89 assert(tmpReg != reg1); // regTmp can not match any source register
90 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
91 break;
92
93 case INS_ldrsb:
94 case INS_ldrsh:
95 case INS_ldrsw:
96 case INS_ldrb:
97 case INS_ldrh:
98 case INS_ldr:
99 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
100 break;
101
102 default:
103 assert(!"Unexpected instruction in genInstrWithConstant");
104 break;
105 }
106
107 if (immFitsInIns)
108 {
109 // generate a single instruction that encodes the immediate directly
110 getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
111 }
112 else
113 {
114 // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit
115 assert(tmpReg != REG_NA);
116
117 // generate two or more instructions
118
119 // first we load the immediate into tmpReg
120 instGen_Set_Reg_To_Imm(size, tmpReg, imm);
121 regSet.verifyRegUsed(tmpReg);
122
123 // when we are in an unwind code region
124 // we record the extra instructions using unwindPadding()
125 if (inUnwindRegion)
126 {
127 compiler->unwindPadding();
128 }
129
130 // generate the instruction using a three register encoding with the immediate in tmpReg
131 getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg);
132 }
133 return immFitsInIns;
134}
135
136//------------------------------------------------------------------------
137// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
138// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
139// register is required to be specified, in case the constant is too large to encode in an "add"
140// instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
141// into a register first, before using it.
142//
143// Arguments:
144// spDelta - the value to add to SP (can be negative)
145// tmpReg - an available temporary register
146// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
147// Otherwise, we don't touch it.
148//
149// Return Value:
150// None.
151
152void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
153{
154 // Even though INS_add is specified here, the encoder will choose either
155 // an INS_add or an INS_sub and encode the immediate as a positive value
156 //
157 if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true))
158 {
159 if (pTmpRegIsZero != nullptr)
160 {
161 *pTmpRegIsZero = false;
162 }
163 }
164
165 // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
166 // value.
167 ssize_t spDeltaAbs = abs(spDelta);
168 unsigned unwindSpDelta = (unsigned)spDeltaAbs;
169 assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
170
171 compiler->unwindAllocStack(unwindSpDelta);
172}
173
174//------------------------------------------------------------------------
175// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
176// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
177// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
178// instruction.
179//
180// Arguments:
181// reg1 - First register of pair to save.
182// reg2 - Second register of pair to save.
183// spOffset - The offset from SP to store reg1 (must be positive or zero).
184// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
185// zero).
186// lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
187// allows us to emit the "save_next" unwind code.
188// tmpReg - An available temporary register. Needed for the case of large frames.
189// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
190// Otherwise, we don't touch it.
191//
192// Return Value:
193// None.
194
195void CodeGen::genPrologSaveRegPair(regNumber reg1,
196 regNumber reg2,
197 int spOffset,
198 int spDelta,
199 bool lastSavedWasPreviousPair,
200 regNumber tmpReg,
201 bool* pTmpRegIsZero)
202{
203 assert(spOffset >= 0);
204 assert(spDelta <= 0);
205 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
206 assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
207 // FP/SIMD
208
209 bool needToSaveRegs = true;
210 if (spDelta != 0)
211 {
212 if ((spOffset == 0) && (spDelta >= -512))
213 {
214 // We can use pre-indexed addressing.
215 // stp REG, REG + 1, [SP, #spDelta]!
216 // 64-bit STP offset range: -512 to 504, multiple of 8.
217 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
218 compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
219
220 needToSaveRegs = false;
221 }
222 else // (spDelta < -512))
223 {
224 // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the
225 // non-zero offset.
226
227 // generate sub SP,SP,imm
228 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
229 }
230 }
231
232 if (needToSaveRegs)
233 {
234 // stp REG, REG + 1, [SP, #offset]
235 // 64-bit STP offset range: -512 to 504, multiple of 8.
236 assert(spOffset <= 504);
237 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
238
239 if (lastSavedWasPreviousPair)
240 {
241 // This works as long as we've only been saving pairs, in order, and we've saved the previous one just
242 // before this one.
243 compiler->unwindSaveNext();
244 }
245 else
246 {
247 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
248 }
249 }
250}
251
252//------------------------------------------------------------------------
253// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
254// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
255// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
256// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
257// (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
258// STR instruction, and that spOffset will be in the legal range for that instruction.
259//
260// Arguments:
261// reg1 - Register to save.
262// spOffset - The offset from SP to store reg1 (must be positive or zero).
263// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
264// zero).
265// tmpReg - An available temporary register. Needed for the case of large frames.
266// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
267// Otherwise, we don't touch it.
268//
269// Return Value:
270// None.
271
272void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
273{
274 assert(spOffset >= 0);
275 assert(spDelta <= 0);
276 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
277
278 if (spDelta != 0)
279 {
280 // generate sub SP,SP,imm
281 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
282 }
283
284 // str REG, [SP, #offset]
285 // 64-bit STR offset range: 0 to 32760, multiple of 8.
286 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
287 compiler->unwindSaveReg(reg1, spOffset);
288}
289
290//------------------------------------------------------------------------
291// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
292// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
293// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
294// instruction.
295//
296// Arguments:
297// reg1 - First register of pair to restore.
298// reg2 - Second register of pair to restore.
299// spOffset - The offset from SP to load reg1 (must be positive or zero).
300// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
301// zero).
302// tmpReg - An available temporary register. Needed for the case of large frames.
303// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
304// Otherwise, we don't touch it.
305//
306// Return Value:
307// None.
308
309void CodeGen::genEpilogRestoreRegPair(
310 regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
311{
312 assert(spOffset >= 0);
313 assert(spDelta >= 0);
314 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
315
316 if (spDelta != 0)
317 {
318 if ((spOffset == 0) && (spDelta <= 504))
319 {
320 // Fold the SP change into this instruction.
321 // ldp reg1, reg2, [SP], #spDelta
322 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
323 compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
324 }
325 else // (spDelta > 504))
326 {
327 // Can't fold in the SP change; need to use a separate ADD instruction.
328
329 // ldp reg1, reg2, [SP, #offset]
330 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
331 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
332
333 // generate add SP,SP,imm
334 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
335 }
336 }
337 else
338 {
339 // ldp reg1, reg2, [SP, #offset]
340 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
341 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
342 }
343}
344
345//------------------------------------------------------------------------
346// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
347//
348// Arguments:
349// reg1 - Register to restore.
350// spOffset - The offset from SP to restore reg1 (must be positive or zero).
351// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
352// zero).
353// tmpReg - An available temporary register. Needed for the case of large frames.
354// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
355// Otherwise, we don't touch it.
356//
357// Return Value:
358// None.
359
360void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
361{
362 assert(spOffset >= 0);
363 assert(spDelta >= 0);
364 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
365
366 // ldr reg1, [SP, #offset]
367 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
368 compiler->unwindSaveReg(reg1, spOffset);
369
370 if (spDelta != 0)
371 {
372 // generate add SP,SP,imm
373 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
374 }
375}
376
377#ifdef DEBUG
378//------------------------------------------------------------------------
379// genCheckSPOffset: Check Stack Pointer(SP) offset value,
380// it must be 8 to account for alignment for the odd count
381// or it must be 0 for the even count.
382//
383// Arguments:
384// isRegsCountOdd - true if number of registers to save/restore is odd;
385// spOffset - stack pointer offset value;
386// slotSize - stack slot size in bytes.
387//
388// static
389void CodeGen::genCheckSPOffset(bool isRegsCountOdd, int spOffset, int slotSize)
390{
391 if (isRegsCountOdd)
392 {
393 // The offset must be 8 to account for alignment for the odd count.
394 assert(spOffset == slotSize);
395 }
396 else
397 {
398 // The offset must be 0 for the even count.
399 assert(spOffset == 0);
400 }
401}
402#endif // DEBUG
403
404//------------------------------------------------------------------------
405// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask.
406// The first register pair will contain the lowest register. Register pairs will combine neighbor
407// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with
408// odd number of regs) then the second element of that RegPair will be REG_NA.
409//
410// Arguments:
411// regsMask - a mask of registers for prolog/epilog generation;
412// regStack - a regStack instance to build the stack in, used to save temp copyings.
413//
414// Return value:
415// no return value; the regStack argument is modified.
416//
417// static
418void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack)
419{
420 assert(regStack != nullptr);
421 assert(regStack->Height() == 0);
422
423 unsigned regsCount = genCountBits(regsMask);
424
425 while (regsMask != RBM_NONE)
426 {
427 regMaskTP reg1Mask = genFindLowestBit(regsMask);
428 regNumber reg1 = genRegNumFromMask(reg1Mask);
429 regsMask &= ~reg1Mask;
430 regsCount -= 1;
431
432 bool isPairSave = false;
433 if (regsCount > 0)
434 {
435 regMaskTP reg2Mask = genFindLowestBit(regsMask);
436 regNumber reg2 = genRegNumFromMask(reg2Mask);
437 if (reg2 == REG_NEXT(reg1))
438 {
439 isPairSave = true;
440
441 regsMask &= ~reg2Mask;
442 regsCount -= 1;
443
444 regStack->Push(RegPair(reg1, reg2));
445 }
446 }
447 if (!isPairSave)
448 {
449 regStack->Push(RegPair(reg1));
450 }
451 }
452 assert(regsCount == 0 && regsMask == RBM_NONE);
453}
454
455//------------------------------------------------------------------------
456// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask.
457//
458// Arguments:
459// regsMask - a mask of registers for prolog/epilog generation.
460//
461// Return value:
462// stack slot size in bytes.
463//
464// static
465int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
466{
467 assert((regsMask & (RBM_CALLEE_SAVED | RBM_LR)) == regsMask); // Do not expect anything else.
468
469 bool isIntMask = ((regsMask & RBM_ALLFLOAT) == 0);
470#ifdef DEBUG
471 bool isFloatMask = ((regsMask & RBM_ALLFLOAT) == regsMask);
472 // Has to be either int or float.
473 assert(isIntMask != isFloatMask);
474#endif // DEBUG
475
476 int slotSize = isIntMask ? REGSIZE_BYTES : FPSAVE_REGSIZE_BYTES;
477 return slotSize;
478}
479
480//------------------------------------------------------------------------
481// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask.
482// All registers in the mask must be the same type (int or float).
483//
484// Arguments:
485// regsMask - a mask of registers for prolog generation;
486// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it);
487// spOffset - the offset from SP that is the beginning of the callee-saved register area;
488// isRegsToSaveCountOdd - (DEBUG only) true if number of registers to save is odd.
489//
490// Return Value:
491// SP offset after saving registers from this group.
492//
493int CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask,
494 int spDelta,
495 int spOffset DEBUGARG(bool isRegsToSaveCountOdd))
496{
497 const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
498
499#ifdef DEBUG
500 if (spDelta != 0) // The first store change SP offset, check its value before.
501 {
502 genCheckSPOffset(isRegsToSaveCountOdd, spOffset, slotSize);
503 }
504#endif // DEBUG
505
506 ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
507 genBuildRegPairsStack(regsMask, &regStack);
508
509 bool lastSavedWasPair = false; // currently unused, see the comment below.
510 for (int i = 0; i < regStack.Height(); ++i)
511 {
512 RegPair regPair = regStack.Bottom(i);
513 if (regPair.reg2 != REG_NA)
514 {
515 // We can use a STP instruction.
516 genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
517
518 // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
519 // this epilog, to get the codes to match. Turn this off until that is better understood.
520 // lastSavedWasPair = true;
521
522 spOffset += 2 * slotSize;
523 }
524 else
525 {
526 // No register pair; we use a STR instruction.
527 genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr);
528
529 lastSavedWasPair = false;
530 spOffset += slotSize;
531 }
532
533 spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
534 }
535 return spOffset;
536}
537
538//------------------------------------------------------------------------
539// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
540// in the function or funclet prolog. The save set does not contain FP, since that is
541// guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
542// that are allowed by the unwind codes. Integer registers are stored at lower addresses,
543// FP/SIMD registers are stored at higher addresses. The caller ensures that
544// there is enough space on the frame to store these registers, and that the store instructions
545// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
546// use. The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction. Note
547// that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
548// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
549// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
550// it below (at a lower address) the callee-saved registers, as that is currently how we
551// do frame layout. This means that the first stack offset will be 8 and the stack pointer
552// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
553//
554// Arguments:
555// regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing.
556// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
557// if non-zero spDelta, then this is the offset of the first save *after* that
558// SP adjustment.
559// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
560// zero).
561//
562// Notes:
563// the save set can contain LR in which case LR is saved along with the other callee-saved registers.
564// But currently Jit doesn't use frames without frame pointer on arm64.
565//
566void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
567{
568 assert(spDelta <= 0);
569 unsigned regsToSaveCount = genCountBits(regsToSaveMask);
570 if (regsToSaveCount == 0)
571 {
572 if (spDelta != 0)
573 {
574 // Currently this is the case for varargs only
575 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
576 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
577 }
578 return;
579 }
580
581 assert((spDelta % 16) == 0);
582 assert((regsToSaveMask & RBM_FP) == 0); // We never save FP here.
583
584 // We also save LR, even though it is not in RBM_CALLEE_SAVED.
585 assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR));
586
587#ifdef DEBUG
588 bool isRegsToSaveCountOdd = ((regsToSaveCount % 2) != 0);
589#endif // DEBUG
590
591 int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
592
593 regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
594 regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
595
596 bool floatSavesSp = (maskSaveRegsInt == 0);
597
598 if (maskSaveRegsInt != 0)
599 {
600 assert(!floatSavesSp); // We always change SP only once with the first save/last load.
601
602 // Save the integer registers.
603 spOffset = genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, spOffset DEBUGARG(isRegsToSaveCountOdd));
604 }
605
606 if (maskSaveRegsFloat != 0)
607 {
608 int floatSpDelta = floatSavesSp ? spDelta : 0;
609
610 // Save the floating-point/SIMD registers
611 spOffset =
612 genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, floatSpDelta, spOffset DEBUGARG(isRegsToSaveCountOdd));
613 spDelta = 0;
614 }
615}
616
617//------------------------------------------------------------------------
618// genRestoreCalleeSavedRegisterGroup: Saves the group of registers described by the mask.
619// All registers in the mask must be the same type (int or float).
620//
621// Arguments:
622// regsMask - a mask of registers for epilog generation;
623// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it);
624// spOffset - the offset from SP that is the beginning of the callee-saved register area;
625//
626// Return Value:
627// SP offset after restoring registers from this group.
628//
629int CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask,
630 int spDelta,
631 int spOffset DEBUGARG(bool isRegsToRestoreCountOdd))
632{
633 const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
634
635 ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
636 genBuildRegPairsStack(regsMask, &regStack);
637
638 int stackDelta = 0;
639 for (int i = 0; i < regStack.Height(); ++i)
640 {
641 bool lastRestoreInTheGroup = (i == regStack.Height() - 1);
642 bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0);
643 if (updateStackDelta)
644 {
645 // Update stack delta only if it is the last restore (the first save).
646 assert(stackDelta == 0);
647 stackDelta = spDelta;
648 }
649
650 RegPair regPair = regStack.Index(i);
651 if (regPair.reg2 != REG_NA)
652 {
653 spOffset -= 2 * slotSize;
654
655 genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, REG_IP1, nullptr);
656 }
657 else
658 {
659 spOffset -= slotSize;
660 genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr);
661 }
662 }
663
664#ifdef DEBUG
665 if (stackDelta != 0) // The last restore (the first save) changes SP offset, check its value after.
666 {
667 genCheckSPOffset(isRegsToRestoreCountOdd, spOffset, slotSize);
668 }
669#endif // DEBUG
670 return spOffset;
671}
672
673//------------------------------------------------------------------------
674// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
675// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
676//
677// Arguments:
678// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
679// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
680// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
681// zero).
682//
683// Here's an example restore sequence:
684// ldp x27, x28, [sp,#96]
685// ldp x25, x26, [sp,#80]
686// ldp x23, x24, [sp,#64]
687// ldp x21, x22, [sp,#48]
688// ldp x19, x20, [sp,#32]
689//
690// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
691// the last restore adjusts SP by the specified amount. For example:
692// ldp x27, x28, [sp,#64]
693// ldp x25, x26, [sp,#48]
694// ldp x23, x24, [sp,#32]
695// ldp x21, x22, [sp,#16]
696// ldp x19, x20, [sp], #80
697//
698// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
699// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
700//
701// Return Value:
702// None.
703
704void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
705{
706 assert(spDelta >= 0);
707 unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
708 if (regsToRestoreCount == 0)
709 {
710 if (spDelta != 0)
711 {
712 // Currently this is the case for varargs only
713 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
714 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
715 }
716 return;
717 }
718
719 assert((spDelta % 16) == 0);
720 assert((regsToRestoreMask & RBM_FP) == 0); // We never restore FP here.
721
722 // We also restore LR, even though it is not in RBM_CALLEE_SAVED.
723 assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR));
724
725#ifdef DEBUG
726 bool isRegsToRestoreCountOdd = ((regsToRestoreCount % 2) != 0);
727#endif // DEBUG
728
729 assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
730 int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
731 // predecrement to find the offset to
732 // load from.
733
734 // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
735 // callee-saved registers properly.
736
737 regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
738 regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
739
740 bool floatRestoresSp = (maskRestoreRegsInt == 0);
741
742 if (maskRestoreRegsFloat != 0)
743 {
744 int floatSpDelta = floatRestoresSp ? spDelta : 0;
745 // Restore the floating-point/SIMD registers
746 spOffset = genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, floatSpDelta,
747 spOffset DEBUGARG(isRegsToRestoreCountOdd));
748 }
749
750 if (maskRestoreRegsInt != 0)
751 {
752 assert(!floatRestoresSp); // We always change SP only once with the first save/last load.
753
754 // Restore the integer registers
755 spOffset =
756 genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spDelta, spOffset DEBUGARG(isRegsToRestoreCountOdd));
757 }
758}
759
760// clang-format off
761/*****************************************************************************
762 *
763 * Generates code for an EH funclet prolog.
764 *
765 * Funclets have the following incoming arguments:
766 *
767 * catch: x0 = the exception object that was caught (see GT_CATCH_ARG)
768 * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
769 * finally/fault: none
770 *
771 * Funclets set the following registers on exit:
772 *
773 * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
774 * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
775 * finally/fault: none
776 *
777 * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
778 * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
779 *
780 * Frame type 1:
781 * For #outsz == 0 and #framesz <= 512:
782 * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
783 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
784 *
785 * The funclet frame is thus:
786 *
787 * | |
788 * |-----------------------|
789 * | incoming |
790 * | arguments |
791 * +=======================+ <---- Caller's SP
792 * |Callee saved registers | // multiple of 8 bytes
793 * |-----------------------|
794 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
795 * |-----------------------|
796 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
797 * |-----------------------|
798 * | Saved FP, LR | // 16 bytes
799 * |-----------------------| <---- Ambient SP
800 * | | |
801 * ~ | Stack grows ~
802 * | | downward |
803 * V
804 *
805 * Frame type 2:
806 * For #outsz != 0 and #framesz <= 512:
807 * sub sp,sp,#framesz ; establish the frame
808 * stp fp,lr,[sp,#outsz] ; save FP/LR.
809 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
810 *
811 * The funclet frame is thus:
812 *
813 * | |
814 * |-----------------------|
815 * | incoming |
816 * | arguments |
817 * +=======================+ <---- Caller's SP
818 * |Callee saved registers | // multiple of 8 bytes
819 * |-----------------------|
820 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
821 * |-----------------------|
822 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
823 * |-----------------------|
824 * | Saved FP, LR | // 16 bytes
825 * |-----------------------|
826 * | Outgoing arg space | // multiple of 8 bytes
827 * |-----------------------| <---- Ambient SP
828 * | | |
829 * ~ | Stack grows ~
830 * | | downward |
831 * V
832 *
833 * Frame type 3:
834 * For #framesz > 512:
835 * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
836 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
837 * sub sp,sp,#outsz ; create space for outgoing argument space
838 *
839 * The funclet frame is thus:
840 *
841 * | |
842 * |-----------------------|
843 * | incoming |
844 * | arguments |
845 * +=======================+ <---- Caller's SP
846 * |Callee saved registers | // multiple of 8 bytes
847 * |-----------------------|
848 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
849 * |-----------------------|
850 * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
851 * |-----------------------|
852 * | Saved FP, LR | // 16 bytes
853 * |-----------------------|
854 * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
855 * |-----------------------|
856 * | Outgoing arg space | // multiple of 8 bytes
857 * |-----------------------| <---- Ambient SP
858 * | | |
859 * ~ | Stack grows ~
860 * | | downward |
861 * V
862 *
863 * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
864 * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
865 * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
866 * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
867 * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
868 * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
869 * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
870 * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
871 * outgoing argument space. Both changes to SP might need to add alignment padding.
872 *
873 * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
874 * as in the main function.
875 *
876 * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
877 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
878 *
879 * if (this is a filter funclet)
880 * {
881 * // x1 on entry to a filter funclet is CallerSP of the containing function:
882 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
883 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
884 * // a funclet. Consider:
885 * //
886 * // try {
887 * // try {
888 * // throw new Exception();
889 * // } catch(Exception) {
890 * // throw new Exception(); // The exception thrown here ...
891 * // }
892 * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
893 * // } filter-handler {
894 * // }
895 * //
896 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
897 * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
898 * // create a main function PSP for any function with a filter.
899 *
900 * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
901 * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP
902 * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
903 * }
904 * else
905 * {
906 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
907 * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
908 *
909 * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch.
910 * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP
911 * }
912 *
913 * An example epilog sequence is then:
914 *
915 * add sp,sp,#outsz ; if any outgoing argument space
916 * ... ; restore callee-saved registers
917 * ldp x19,x20,[sp,#xxx]
918 * ldp fp,lr,[sp],#framesz
919 * ret lr
920 *
921 * The funclet frame is thus:
922 *
923 * | |
924 * |-----------------------|
925 * | incoming |
926 * | arguments |
927 * +=======================+ <---- Caller's SP
928 * |Callee saved registers | // multiple of 8 bytes
929 * |-----------------------|
930 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
931 * |-----------------------|
932 * | Saved FP, LR | // 16 bytes
933 * |-----------------------|
934 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
935 * |-----------------------|
936 * | Outgoing arg space | // multiple of 8 bytes
937 * |-----------------------| <---- Ambient SP
938 * | | |
939 * ~ | Stack grows ~
940 * | | downward |
941 * V
942 */
943// clang-format on
944
945void CodeGen::genFuncletProlog(BasicBlock* block)
946{
947#ifdef DEBUG
948 if (verbose)
949 printf("*************** In genFuncletProlog()\n");
950#endif
951
952 assert(block != NULL);
953 assert(block->bbFlags & BBF_FUNCLET_BEG);
954
955 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
956
957 gcInfo.gcResetForBB();
958
959 compiler->unwindBegProlog();
960
961 regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
962 regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
963
964 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
965 assert((maskSaveRegsInt & RBM_LR) != 0);
966 assert((maskSaveRegsInt & RBM_FP) != 0);
967
968 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
969
970 regMaskTP maskArgRegsLiveIn;
971 if (isFilter)
972 {
973 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
974 }
975 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
976 {
977 maskArgRegsLiveIn = RBM_NONE;
978 }
979 else
980 {
981 maskArgRegsLiveIn = RBM_R0;
982 }
983
984 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
985
986 if (genFuncletInfo.fiFrameType == 1)
987 {
988 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
989 INS_OPTS_PRE_INDEX);
990 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
991
992 assert(genFuncletInfo.fiSpDelta2 == 0);
993 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
994 }
995 else if (genFuncletInfo.fiFrameType == 2)
996 {
997 // fiFrameType==2 constraints:
998 assert(genFuncletInfo.fiSpDelta1 < 0);
999 assert(genFuncletInfo.fiSpDelta1 >= -512);
1000
1001 // generate sub SP,SP,imm
1002 genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
1003
1004 assert(genFuncletInfo.fiSpDelta2 == 0);
1005
1006 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
1007 genFuncletInfo.fiSP_to_FPLR_save_delta);
1008 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
1009 }
1010 else
1011 {
1012 assert(genFuncletInfo.fiFrameType == 3);
1013 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
1014 INS_OPTS_PRE_INDEX);
1015 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1016
1017 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
1018 }
1019 maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
1020
1021 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
1022
1023 if (genFuncletInfo.fiFrameType == 3)
1024 {
1025 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
1026 assert(genFuncletInfo.fiSpDelta2 < 0);
1027
1028 // generate sub SP,SP,imm
1029 genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
1030 }
1031
1032 // This is the end of the OS-reported prolog for purposes of unwinding
1033 compiler->unwindEndProlog();
1034
1035 // If there is no PSPSym (CoreRT ABI), we are done.
1036 if (compiler->lvaPSPSym == BAD_VAR_NUM)
1037 {
1038 return;
1039 }
1040
1041 if (isFilter)
1042 {
1043 // This is the first block of a filter
1044 // Note that register x1 = CallerSP of the containing function
1045 // X1 is overwritten by the first Load (new callerSP)
1046 // X2 is scratch when we have a large constant offset
1047
1048 // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
1049 genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
1050 genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
1051 regSet.verifyRegUsed(REG_R1);
1052
1053 // Store the PSP value (aka CallerSP)
1054 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
1055 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1056
1057 // re-establish the frame pointer
1058 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1059 REG_R2, false);
1060 }
1061 else // This is a non-filter funclet
1062 {
1063 // X3 is scratch, X2 can also become scratch
1064
1065 // compute the CallerSP, given the frame pointer. x3 is scratch.
1066 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1067 REG_R2, false);
1068 regSet.verifyRegUsed(REG_R3);
1069
1070 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
1071 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1072 }
1073}
1074
1075/*****************************************************************************
1076 *
1077 * Generates code for an EH funclet epilog.
1078 */
1079
1080void CodeGen::genFuncletEpilog()
1081{
1082#ifdef DEBUG
1083 if (verbose)
1084 printf("*************** In genFuncletEpilog()\n");
1085#endif
1086
1087 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
1088
1089 bool unwindStarted = false;
1090
1091 if (!unwindStarted)
1092 {
1093 // We can delay this until we know we'll generate an unwindable instruction, if necessary.
1094 compiler->unwindBegEpilog();
1095 unwindStarted = true;
1096 }
1097
1098 regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
1099 regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
1100
1101 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
1102 assert((maskRestoreRegsInt & RBM_LR) != 0);
1103 assert((maskRestoreRegsInt & RBM_FP) != 0);
1104
1105 maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
1106
1107 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
1108
1109 if (genFuncletInfo.fiFrameType == 3)
1110 {
1111 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
1112 assert(genFuncletInfo.fiSpDelta2 < 0);
1113
1114 // generate add SP,SP,imm
1115 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
1116
1117 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
1118 }
1119
1120 regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
1121 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
1122
1123 if (genFuncletInfo.fiFrameType == 1)
1124 {
1125 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1126 INS_OPTS_POST_INDEX);
1127 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1128
1129 assert(genFuncletInfo.fiSpDelta2 == 0);
1130 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
1131 }
1132 else if (genFuncletInfo.fiFrameType == 2)
1133 {
1134 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
1135 genFuncletInfo.fiSP_to_FPLR_save_delta);
1136 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
1137
1138 // fiFrameType==2 constraints:
1139 assert(genFuncletInfo.fiSpDelta1 < 0);
1140 assert(genFuncletInfo.fiSpDelta1 >= -512);
1141
1142 // generate add SP,SP,imm
1143 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
1144
1145 assert(genFuncletInfo.fiSpDelta2 == 0);
1146 }
1147 else
1148 {
1149 assert(genFuncletInfo.fiFrameType == 3);
1150
1151 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1152 INS_OPTS_POST_INDEX);
1153 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1154 }
1155
1156 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
1157 compiler->unwindReturn(REG_LR);
1158
1159 compiler->unwindEndEpilog();
1160}
1161
1162/*****************************************************************************
1163 *
1164 * Capture the information used to generate the funclet prologs and epilogs.
1165 * Note that all funclet prologs are identical, and all funclet epilogs are
1166 * identical (per type: filters are identical, and non-filters are identical).
1167 * Thus, we compute the data used for these just once.
1168 *
1169 * See genFuncletProlog() for more information about the prolog/epilog sequences.
1170 */
1171
1172void CodeGen::genCaptureFuncletPrologEpilogInfo()
1173{
1174 if (!compiler->ehAnyFunclets())
1175 return;
1176
1177 assert(isFramePointerUsed());
1178 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
1179 // finalized
1180
1181 genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
1182
1183 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
1184 assert((rsMaskSaveRegs & RBM_LR) != 0);
1185 assert((rsMaskSaveRegs & RBM_FP) != 0);
1186
1187 unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
1188
1189 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
1190 unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
1191 if (compiler->info.compIsVarArgs)
1192 {
1193 // For varargs we always save all of the integer register arguments
1194 // so that they are contiguous with the incoming stack arguments.
1195 saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
1196 }
1197 unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
1198
1199 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
1200 unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
1201
1202 unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
1203 assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
1204
1205 int SP_to_FPLR_save_delta;
1206 int SP_to_PSP_slot_delta;
1207 int CallerSP_to_PSP_slot_delta;
1208
1209 if (maxFuncletFrameSizeAligned <= 512)
1210 {
1211 unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
1212 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
1213 assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
1214
1215 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
1216 assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
1217
1218 SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
1219 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
1220 CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
1221
1222 if (compiler->lvaOutgoingArgSpaceSize == 0)
1223 {
1224 genFuncletInfo.fiFrameType = 1;
1225 }
1226 else
1227 {
1228 genFuncletInfo.fiFrameType = 2;
1229 }
1230 genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
1231 genFuncletInfo.fiSpDelta2 = 0;
1232
1233 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
1234 }
1235 else
1236 {
1237 unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
1238 assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
1239
1240 SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
1241 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
1242 CallerSP_to_PSP_slot_delta =
1243 -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
1244
1245 genFuncletInfo.fiFrameType = 3;
1246 genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
1247 genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
1248
1249 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
1250 }
1251
1252 /* Now save it for future use */
1253
1254 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
1255 genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
1256 genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
1257 genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES;
1258 genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
1259
1260#ifdef DEBUG
1261 if (verbose)
1262 {
1263 printf("\n");
1264 printf("Funclet prolog / epilog info\n");
1265 printf(" Save regs: ");
1266 dspRegMask(genFuncletInfo.fiSaveRegs);
1267 printf("\n");
1268 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
1269 printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
1270 printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
1271 printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
1272 printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
1273 printf(" Frame type: %d\n", genFuncletInfo.fiFrameType);
1274 printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
1275 printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
1276
1277 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1278 {
1279 if (CallerSP_to_PSP_slot_delta !=
1280 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
1281 {
1282 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
1283 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
1284 }
1285 }
1286 }
1287
1288 assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
1289 assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
1290 assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
1291 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
1292
1293 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1294 {
1295 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
1296 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
1297 // funclet!
1298 }
1299#endif // DEBUG
1300}
1301
1302/*
1303XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1304XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1305XX XX
1306XX End Prolog / Epilog XX
1307XX XX
1308XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1309XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1310*/
1311
1312BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
1313{
1314 // Generate a call to the finally, like this:
1315 // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
1316 // bl finally-funclet
1317 // b finally-return // Only for non-retless finally calls
1318 // The 'b' can be a NOP if we're going to the next block.
1319
1320 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1321 {
1322 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
1323 }
1324 else
1325 {
1326 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE);
1327 }
1328 getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
1329
1330 if (block->bbFlags & BBF_RETLESS_CALL)
1331 {
1332 // We have a retless call, and the last instruction generated was a call.
1333 // If the next block is in a different EH region (or is the end of the code
1334 // block), then we need to generate a breakpoint here (since it will never
1335 // get executed) to get proper unwind behavior.
1336
1337 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
1338 {
1339 instGen(INS_bkpt); // This should never get executed
1340 }
1341 }
1342 else
1343 {
1344 // Because of the way the flowgraph is connected, the liveness info for this one instruction
1345 // after the call is not (can not be) correct in cases where a variable has a last use in the
1346 // handler. So turn off GC reporting for this single instruction.
1347 getEmitter()->emitDisableGC();
1348
1349 // Now go to where the finally funclet needs to return to.
1350 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
1351 {
1352 // Fall-through.
1353 // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
1354 // to the next instruction? This would depend on stack walking from within the finally
1355 // handler working without this instruction being in this special EH region.
1356 instGen(INS_nop);
1357 }
1358 else
1359 {
1360 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
1361 }
1362
1363 getEmitter()->emitEnableGC();
1364 }
1365
1366 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
1367 // jump target using bbJumpDest - that is already used to point
1368 // to the finally block. So just skip past the BBJ_ALWAYS unless the
1369 // block is RETLESS.
1370 if (!(block->bbFlags & BBF_RETLESS_CALL))
1371 {
1372 assert(block->isBBCallAlwaysPair());
1373 block = block->bbNext;
1374 }
1375 return block;
1376}
1377
1378void CodeGen::genEHCatchRet(BasicBlock* block)
1379{
1380 // For long address (default): `adrp + add` will be emitted.
1381 // For short address (proven later): `adr` will be emitted.
1382 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
1383}
1384
1385// move an immediate value into an integer register
1386
1387void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
1388{
1389 // reg cannot be a FP register
1390 assert(!genIsValidFloatReg(reg));
1391 if (!compiler->opts.compReloc)
1392 {
1393 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
1394 }
1395
1396 if (EA_IS_RELOC(size))
1397 {
1398 // This emits a pair of adrp/add (two instructions) with fix-ups.
1399 getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm);
1400 }
1401 else if (imm == 0)
1402 {
1403 instGen_Set_Reg_To_Zero(size, reg, flags);
1404 }
1405 else
1406 {
1407 if (emitter::emitIns_valid_imm_for_mov(imm, size))
1408 {
1409 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
1410 }
1411 else
1412 {
1413 // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
1414 // There are three forms
1415 // movk which loads into any halfword preserving the remaining halfwords
1416 // movz which loads into any halfword zeroing the remaining halfwords
1417 // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register
1418 // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords
1419 // with ones
1420
1421 // Determine whether movn or movz will require the fewest instructions to populate the immediate
1422 int preferMovn = 0;
1423
1424 for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
1425 {
1426 if (uint16_t(imm >> i) == 0xffff)
1427 ++preferMovn; // a single movk 0xffff could be skipped if movn was used
1428 else if (uint16_t(imm >> i) == 0x0000)
1429 --preferMovn; // a single movk 0 could be skipped if movz was used
1430 }
1431
1432 // Select the first instruction. Any additional instruction will use movk
1433 instruction ins = (preferMovn > 0) ? INS_movn : INS_movz;
1434
1435 // Initial movz or movn will fill the remaining bytes with the skipVal
1436 // This can allow skipping filling a halfword
1437 uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0;
1438
1439 unsigned bits = (size == EA_8BYTE) ? 64 : 32;
1440
1441 // Iterate over imm examining 16 bits at a time
1442 for (unsigned i = 0; i < bits; i += 16)
1443 {
1444 uint16_t imm16 = uint16_t(imm >> i);
1445
1446 if (imm16 != skipVal)
1447 {
1448 if (ins == INS_movn)
1449 {
1450 // For the movn case, we need to bitwise invert the immediate. This is because
1451 // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000)
1452 imm16 = ~imm16;
1453 }
1454
1455 getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL);
1456
1457 // Once the initial movz/movn is emitted the remaining instructions will all use movk
1458 ins = INS_movk;
1459 }
1460 }
1461
1462 // We must emit a movn or movz or we have not done anything
1463 // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and
1464 // should not be in this else condition
1465 assert(ins == INS_movk);
1466 }
1467 // The caller may have requested that the flags be set on this mov (rarely/never)
1468 if (flags == INS_FLAGS_SET)
1469 {
1470 getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
1471 }
1472 }
1473
1474 regSet.verifyRegUsed(reg);
1475}
1476
1477/***********************************************************************************
1478 *
1479 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
1480 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
1481 * genProduceReg() on the target register.
1482 */
1483void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
1484{
1485 switch (tree->gtOper)
1486 {
1487 case GT_CNS_INT:
1488 {
1489 // relocatable values tend to come down as a CNS_INT of native int type
1490 // so the line between these two opcodes is kind of blurry
1491 GenTreeIntConCommon* con = tree->AsIntConCommon();
1492 ssize_t cnsVal = con->IconValue();
1493
1494 if (con->ImmedValNeedsReloc(compiler))
1495 {
1496 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
1497 regSet.verifyRegUsed(targetReg);
1498 }
1499 else
1500 {
1501 genSetRegToIcon(targetReg, cnsVal, targetType);
1502 }
1503 }
1504 break;
1505
1506 case GT_CNS_DBL:
1507 {
1508 emitter* emit = getEmitter();
1509 emitAttr size = emitActualTypeSize(tree);
1510 double constValue = tree->AsDblCon()->gtDconVal;
1511
1512 // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0)
1513 if (*(__int64*)&constValue == 0)
1514 {
1515 // A faster/smaller way to generate 0.0
1516 // We will just zero out the entire vector register for both float and double
1517 emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1518 }
1519 else if (emitter::emitIns_valid_imm_for_fmov(constValue))
1520 {
1521 // We can load the FP constant using the fmov FP-immediate for this constValue
1522 emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
1523 }
1524 else
1525 {
1526 // Get a temp integer register to compute long address.
1527 regNumber addrReg = tree->GetSingleTempReg();
1528
1529 // We must load the FP constant from the constant pool
1530 // Emit a data section constant for the float or double constant.
1531 CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
1532 // For long address (default): `adrp + ldr + fmov` will be emitted.
1533 // For short address (proven later), `ldr` will be emitted.
1534 emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0);
1535 }
1536 }
1537 break;
1538
1539 default:
1540 unreached();
1541 }
1542}
1543
1544// Generate code to get the high N bits of a N*N=2N bit multiplication result
1545void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
1546{
1547 assert(!treeNode->gtOverflowEx());
1548
1549 genConsumeOperands(treeNode);
1550
1551 regNumber targetReg = treeNode->gtRegNum;
1552 var_types targetType = treeNode->TypeGet();
1553 emitter* emit = getEmitter();
1554 emitAttr attr = emitActualTypeSize(treeNode);
1555 unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED);
1556
1557 GenTree* op1 = treeNode->gtGetOp1();
1558 GenTree* op2 = treeNode->gtGetOp2();
1559
1560 assert(!varTypeIsFloating(targetType));
1561
1562 // The arithmetic node must be sitting in a register (since it's not contained)
1563 assert(targetReg != REG_NA);
1564
1565 if (EA_SIZE(attr) == EA_8BYTE)
1566 {
1567 instruction ins = isUnsigned ? INS_umulh : INS_smulh;
1568
1569 regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2);
1570
1571 assert(r == targetReg);
1572 }
1573 else
1574 {
1575 assert(EA_SIZE(attr) == EA_4BYTE);
1576
1577 instruction ins = isUnsigned ? INS_umull : INS_smull;
1578
1579 regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2);
1580
1581 emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32);
1582 }
1583
1584 genProduceReg(treeNode);
1585}
1586
1587// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
1588// This method is expected to have called genConsumeOperands() before calling it.
1589void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
1590{
1591 const genTreeOps oper = treeNode->OperGet();
1592 regNumber targetReg = treeNode->gtRegNum;
1593 var_types targetType = treeNode->TypeGet();
1594 emitter* emit = getEmitter();
1595
1596 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
1597 oper == GT_OR || oper == GT_XOR);
1598
1599 GenTree* op1 = treeNode->gtGetOp1();
1600 GenTree* op2 = treeNode->gtGetOp2();
1601 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1602
1603 if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0)
1604 {
1605 switch (oper)
1606 {
1607 case GT_ADD:
1608 ins = INS_adds;
1609 break;
1610 case GT_SUB:
1611 ins = INS_subs;
1612 break;
1613 case GT_AND:
1614 ins = INS_ands;
1615 break;
1616 default:
1617 noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
1618 }
1619 }
1620
1621 // The arithmetic node must be sitting in a register (since it's not contained)
1622 assert(targetReg != REG_NA);
1623
1624 regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
1625 assert(r == targetReg);
1626
1627 genProduceReg(treeNode);
1628}
1629
1630//------------------------------------------------------------------------
1631// genCodeForLclVar: Produce code for a GT_LCL_VAR node.
1632//
1633// Arguments:
1634// tree - the GT_LCL_VAR node
1635//
1636void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
1637{
1638 var_types targetType = tree->TypeGet();
1639 emitter* emit = getEmitter();
1640
1641 unsigned varNum = tree->gtLclNum;
1642 assert(varNum < compiler->lvaCount);
1643 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1644 bool isRegCandidate = varDsc->lvIsRegCandidate();
1645
1646 // lcl_vars are not defs
1647 assert((tree->gtFlags & GTF_VAR_DEF) == 0);
1648
1649 // If this is a register candidate that has been spilled, genConsumeReg() will
1650 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
1651
1652 if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
1653 {
1654 // targetType must be a normal scalar type and not a TYP_STRUCT
1655 assert(targetType != TYP_STRUCT);
1656
1657 instruction ins = ins_Load(targetType);
1658 emitAttr attr = emitTypeSize(targetType);
1659
1660 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1661
1662 emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0);
1663 genProduceReg(tree);
1664 }
1665}
1666
1667//------------------------------------------------------------------------
1668// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
1669//
1670// Arguments:
1671// tree - the GT_STORE_LCL_FLD node
1672//
1673void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
1674{
1675 var_types targetType = tree->TypeGet();
1676 regNumber targetReg = tree->gtRegNum;
1677 emitter* emit = getEmitter();
1678 noway_assert(targetType != TYP_STRUCT);
1679
1680#ifdef FEATURE_SIMD
1681 // storing of TYP_SIMD12 (i.e. Vector3) field
1682 if (tree->TypeGet() == TYP_SIMD12)
1683 {
1684 genStoreLclTypeSIMD12(tree);
1685 return;
1686 }
1687#endif // FEATURE_SIMD
1688
1689 // record the offset
1690 unsigned offset = tree->gtLclOffs;
1691
1692 // We must have a stack store with GT_STORE_LCL_FLD
1693 noway_assert(targetReg == REG_NA);
1694
1695 unsigned varNum = tree->gtLclNum;
1696 assert(varNum < compiler->lvaCount);
1697 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1698
1699 // Ensure that lclVar nodes are typed correctly.
1700 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1701
1702 GenTree* data = tree->gtOp1;
1703 genConsumeRegs(data);
1704
1705 regNumber dataReg = REG_NA;
1706 if (data->isContainedIntOrIImmed())
1707 {
1708 assert(data->IsIntegralConst(0));
1709 dataReg = REG_ZR;
1710 }
1711 else
1712 {
1713 assert(!data->isContained());
1714 dataReg = data->gtRegNum;
1715 }
1716 assert(dataReg != REG_NA);
1717
1718 instruction ins = ins_Store(targetType);
1719
1720 emitAttr attr = emitTypeSize(targetType);
1721
1722 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1723
1724 emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
1725
1726 genUpdateLife(tree);
1727
1728 varDsc->lvRegNum = REG_STK;
1729}
1730
1731//------------------------------------------------------------------------
1732// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
1733//
1734// Arguments:
1735// tree - the GT_STORE_LCL_VAR node
1736//
1737void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
1738{
1739 var_types targetType = tree->TypeGet();
1740 regNumber targetReg = tree->gtRegNum;
1741 emitter* emit = getEmitter();
1742
1743 unsigned varNum = tree->gtLclNum;
1744 assert(varNum < compiler->lvaCount);
1745 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1746
1747 // Ensure that lclVar nodes are typed correctly.
1748 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1749
1750 GenTree* data = tree->gtOp1;
1751
1752 // var = call, where call returns a multi-reg return value
1753 // case is handled separately.
1754 if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
1755 {
1756 genMultiRegCallStoreToLocal(tree);
1757 }
1758 else
1759 {
1760#ifdef FEATURE_SIMD
1761 // storing of TYP_SIMD12 (i.e. Vector3) field
1762 if (tree->TypeGet() == TYP_SIMD12)
1763 {
1764 genStoreLclTypeSIMD12(tree);
1765 return;
1766 }
1767#endif // FEATURE_SIMD
1768
1769 genConsumeRegs(data);
1770
1771 regNumber dataReg = REG_NA;
1772 if (data->isContainedIntOrIImmed())
1773 {
1774 // This is only possible for a zero-init.
1775 assert(data->IsIntegralConst(0));
1776
1777 if (varTypeIsSIMD(targetType))
1778 {
1779 assert(targetReg != REG_NA);
1780 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1781 genProduceReg(tree);
1782 return;
1783 }
1784
1785 dataReg = REG_ZR;
1786 }
1787 else
1788 {
1789 assert(!data->isContained());
1790 dataReg = data->gtRegNum;
1791 }
1792 assert(dataReg != REG_NA);
1793
1794 if (targetReg == REG_NA) // store into stack based LclVar
1795 {
1796 inst_set_SV_var(tree);
1797
1798 instruction ins = ins_Store(targetType);
1799 emitAttr attr = emitTypeSize(targetType);
1800
1801 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1802
1803 emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
1804
1805 genUpdateLife(tree);
1806
1807 varDsc->lvRegNum = REG_STK;
1808 }
1809 else // store into register (i.e move into register)
1810 {
1811 if (dataReg != targetReg)
1812 {
1813 // Assign into targetReg when dataReg (from op1) is not the same register
1814 inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
1815 }
1816 genProduceReg(tree);
1817 }
1818 }
1819}
1820
1821//------------------------------------------------------------------------
1822// genSimpleReturn: Generates code for simple return statement for arm64.
1823//
1824// Note: treeNode's and op1's registers are already consumed.
1825//
1826// Arguments:
1827// treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type
1828//
1829// Return Value:
1830// None
1831//
1832void CodeGen::genSimpleReturn(GenTree* treeNode)
1833{
1834 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1835 GenTree* op1 = treeNode->gtGetOp1();
1836 var_types targetType = treeNode->TypeGet();
1837
1838 assert(!isStructReturn(treeNode));
1839 assert(targetType != TYP_VOID);
1840
1841 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1842
1843 bool movRequired = (op1->gtRegNum != retReg);
1844
1845 if (!movRequired)
1846 {
1847 if (op1->OperGet() == GT_LCL_VAR)
1848 {
1849 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
1850 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
1851 if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
1852 {
1853 // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
1854
1855 unsigned lclNum = lcl->gtLclNum;
1856 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1857 var_types op1Type = genActualType(op1->TypeGet());
1858 var_types lclType = genActualType(varDsc->TypeGet());
1859
1860 if (genTypeSize(op1Type) < genTypeSize(lclType))
1861 {
1862 movRequired = true;
1863 }
1864 }
1865 }
1866 }
1867 if (movRequired)
1868 {
1869 emitAttr attr = emitActualTypeSize(targetType);
1870 getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum);
1871 }
1872}
1873
1874/***********************************************************************************************
1875 * Generate code for localloc
1876 */
1877void CodeGen::genLclHeap(GenTree* tree)
1878{
1879 assert(tree->OperGet() == GT_LCLHEAP);
1880 assert(compiler->compLocallocUsed);
1881
1882 GenTree* size = tree->gtOp.gtOp1;
1883 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
1884
1885 regNumber targetReg = tree->gtRegNum;
1886 regNumber regCnt = REG_NA;
1887 regNumber pspSymReg = REG_NA;
1888 var_types type = genActualType(size->gtType);
1889 emitAttr easz = emitTypeSize(type);
1890 BasicBlock* endLabel = nullptr;
1891 BasicBlock* loop = nullptr;
1892 unsigned stackAdjustment = 0;
1893
1894 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
1895 noway_assert(genStackLevel == 0); // Can't have anything on the stack
1896
1897 // compute the amount of memory to allocate to properly STACK_ALIGN.
1898 size_t amount = 0;
1899 if (size->IsCnsIntOrI())
1900 {
1901 // If size is a constant, then it must be contained.
1902 assert(size->isContained());
1903
1904 // If amount is zero then return null in targetReg
1905 amount = size->gtIntCon.gtIconVal;
1906 if (amount == 0)
1907 {
1908 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
1909 goto BAILOUT;
1910 }
1911
1912 // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
1913 amount = AlignUp(amount, STACK_ALIGN);
1914 }
1915 else
1916 {
1917 // If 0 bail out by returning null in targetReg
1918 genConsumeRegAndCopy(size, targetReg);
1919 endLabel = genCreateTempLabel();
1920 getEmitter()->emitIns_R_R(INS_tst, easz, targetReg, targetReg);
1921 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1922 inst_JMP(jmpEqual, endLabel);
1923
1924 // Compute the size of the block to allocate and perform alignment.
1925 // If compInitMem=true, we can reuse targetReg as regcnt,
1926 // since we don't need any internal registers.
1927 if (compiler->info.compInitMem)
1928 {
1929 assert(tree->AvailableTempRegCount() == 0);
1930 regCnt = targetReg;
1931 }
1932 else
1933 {
1934 regCnt = tree->ExtractTempReg();
1935 if (regCnt != targetReg)
1936 {
1937 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
1938 }
1939 }
1940
1941 // Align to STACK_ALIGN
1942 // regCnt will be the total number of bytes to localloc
1943 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
1944 inst_RV_IV(INS_and, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
1945 }
1946
1947 stackAdjustment = 0;
1948
1949 // If we have an outgoing arg area then we must adjust the SP by popping off the
1950 // outgoing arg area. We will restore it right before we return from this method.
1951 //
1952 // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
1953 // are the cases that need to be handled:
1954 // i) Method has out-going arg area.
1955 // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
1956 // Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc
1957 // space.
1958 // ii) Method has no out-going arg area.
1959 // Nothing to pop off from the stack.
1960 if (compiler->lvaOutgoingArgSpaceSize > 0)
1961 {
1962 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
1963 // aligned
1964 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
1965 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
1966 }
1967
1968 if (size->IsCnsIntOrI())
1969 {
1970 // We should reach here only for non-zero, constant size allocations.
1971 assert(amount > 0);
1972
1973 // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes.
1974 static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
1975 assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
1976 size_t stpCount = amount / (REGSIZE_BYTES * 2);
1977 if (stpCount <= 4)
1978 {
1979 while (stpCount != 0)
1980 {
1981 // We can use pre-indexed addressing.
1982 // stp ZR, ZR, [SP, #-16]! // STACK_ALIGN is 16
1983 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
1984 stpCount -= 1;
1985 }
1986
1987 goto ALLOC_DONE;
1988 }
1989 else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
1990 {
1991 // Since the size is less than a page, simply adjust the SP value.
1992 // The SP might already be in the guard page, so we must touch it BEFORE
1993 // the alloc, not after.
1994
1995 // ldr wz, [SP, #0]
1996 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0);
1997
1998 inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
1999
2000 goto ALLOC_DONE;
2001 }
2002
2003 // else, "mov regCnt, amount"
2004 // If compInitMem=true, we can reuse targetReg as regcnt.
2005 // Since size is a constant, regCnt is not yet initialized.
2006 assert(regCnt == REG_NA);
2007 if (compiler->info.compInitMem)
2008 {
2009 assert(tree->AvailableTempRegCount() == 0);
2010 regCnt = targetReg;
2011 }
2012 else
2013 {
2014 regCnt = tree->ExtractTempReg();
2015 }
2016 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
2017 }
2018
2019 if (compiler->info.compInitMem)
2020 {
2021 BasicBlock* loop = genCreateTempLabel();
2022
2023 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
2024 // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
2025 // by tickling the pages, we will just push 0's on the stack.
2026 //
2027 // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
2028 // and localloc size is a multiple of STACK_ALIGN.
2029
2030 // Loop:
2031 genDefineTempLabel(loop);
2032
2033 // We can use pre-indexed addressing.
2034 // stp ZR, ZR, [SP, #-16]!
2035 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
2036
2037 // If not done, loop
2038 // Note that regCnt is the number of bytes to stack allocate.
2039 // Therefore we need to subtract 16 from regcnt here.
2040 assert(genIsValidIntReg(regCnt));
2041 inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
2042 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2043 inst_JMP(jmpNotEqual, loop);
2044 }
2045 else
2046 {
2047 // At this point 'regCnt' is set to the total number of bytes to localloc.
2048 //
2049 // We don't need to zero out the allocated memory. However, we do have
2050 // to tickle the pages to ensure that SP is always valid and is
2051 // in sync with the "stack guard page". Note that in the worst
2052 // case SP is on the last byte of the guard page. Thus you must
2053 // touch SP-0 first not SP-0x1000.
2054 //
2055 // Another subtlety is that you don't want SP to be exactly on the
2056 // boundary of the guard page because PUSH is predecrement, thus
2057 // call setup would not touch the guard page but just beyond it
2058 //
2059 // Note that we go through a few hoops so that SP never points to
2060 // illegal pages at any time during the tickling process
2061 //
2062 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2063 // bvc Loop // result is smaller than orignial SP (no wrap around)
2064 // mov regCnt, #0 // Overflow, pick lowest possible value
2065 //
2066 // Loop:
2067 // ldr wzr, [SP + 0] // tickle the page - read from the page
2068 // sub regTmp, SP, PAGE_SIZE // decrement SP by eeGetPageSize()
2069 // cmp regTmp, regCnt
2070 // jb Done
2071 // mov SP, regTmp
2072 // j Loop
2073 //
2074 // Done:
2075 // mov SP, regCnt
2076 //
2077
2078 // Setup the regTmp
2079 regNumber regTmp = tree->GetSingleTempReg();
2080
2081 BasicBlock* loop = genCreateTempLabel();
2082 BasicBlock* done = genCreateTempLabel();
2083
2084 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2085 getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
2086
2087 inst_JMP(EJ_vc, loop); // branch if the V flag is not set
2088
2089 // Overflow, set regCnt to lowest possible value
2090 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
2091
2092 genDefineTempLabel(loop);
2093
2094 // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
2095 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0);
2096
2097 // decrement SP by eeGetPageSize()
2098 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
2099
2100 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
2101 emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
2102 inst_JMP(jmpLTU, done);
2103
2104 // Update SP to be at the next page of stack that we will tickle
2105 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp);
2106
2107 // Jump to loop and tickle new stack address
2108 inst_JMP(EJ_jmp, loop);
2109
2110 // Done with stack tickle loop
2111 genDefineTempLabel(done);
2112
2113 // Now just move the final value to SP
2114 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
2115 }
2116
2117ALLOC_DONE:
2118 // Re-adjust SP to allocate out-going arg area
2119 if (stackAdjustment != 0)
2120 {
2121 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2122 assert(stackAdjustment > 0);
2123 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
2124
2125 // Return the stackalloc'ed address in result register.
2126 // TargetReg = SP + stackAdjustment.
2127 //
2128 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment);
2129 }
2130 else // stackAdjustment == 0
2131 {
2132 // Move the final value of SP to targetReg
2133 inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
2134 }
2135
2136BAILOUT:
2137 if (endLabel != nullptr)
2138 genDefineTempLabel(endLabel);
2139
2140#if STACK_PROBES
2141 if (compiler->opts.compNeedStackProbes)
2142 {
2143 genGenerateStackProbe();
2144 }
2145#endif
2146
2147 genProduceReg(tree);
2148}
2149
2150//------------------------------------------------------------------------
2151// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
2152//
2153// Arguments:
2154// tree - the node
2155//
2156void CodeGen::genCodeForNegNot(GenTree* tree)
2157{
2158 assert(tree->OperIs(GT_NEG, GT_NOT));
2159
2160 var_types targetType = tree->TypeGet();
2161
2162 assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
2163
2164 regNumber targetReg = tree->gtRegNum;
2165 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
2166
2167 // The arithmetic node must be sitting in a register (since it's not contained)
2168 assert(!tree->isContained());
2169 // The dst can only be a register.
2170 assert(targetReg != REG_NA);
2171
2172 GenTree* operand = tree->gtGetOp1();
2173 assert(!operand->isContained());
2174 // The src must be a register.
2175 regNumber operandReg = genConsumeReg(operand);
2176
2177 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg);
2178
2179 genProduceReg(tree);
2180}
2181
2182//------------------------------------------------------------------------
2183// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
2184// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
2185// (2) float/double MOD is morphed into a helper call by front-end.
2186//
2187// Arguments:
2188// tree - the node
2189//
2190void CodeGen::genCodeForDivMod(GenTreeOp* tree)
2191{
2192 assert(tree->OperIs(GT_DIV, GT_UDIV));
2193
2194 var_types targetType = tree->TypeGet();
2195 emitter* emit = getEmitter();
2196
2197 genConsumeOperands(tree);
2198
2199 if (varTypeIsFloating(targetType))
2200 {
2201 // Floating point divide never raises an exception
2202 genCodeForBinary(tree);
2203 }
2204 else // an integer divide operation
2205 {
2206 GenTree* divisorOp = tree->gtGetOp2();
2207 emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet())));
2208
2209 if (divisorOp->IsIntegralConst(0))
2210 {
2211 // We unconditionally throw a divide by zero exception
2212 genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
2213
2214 // We still need to call genProduceReg
2215 genProduceReg(tree);
2216 }
2217 else // the divisor is not the constant zero
2218 {
2219 regNumber divisorReg = divisorOp->gtRegNum;
2220
2221 // Generate the require runtime checks for GT_DIV or GT_UDIV
2222 if (tree->gtOper == GT_DIV)
2223 {
2224 BasicBlock* sdivLabel = genCreateTempLabel();
2225
2226 // Two possible exceptions:
2227 // (AnyVal / 0) => DivideByZeroException
2228 // (MinInt / -1) => ArithmeticException
2229 //
2230 bool checkDividend = true;
2231
2232 // Do we have an immediate for the 'divisorOp'?
2233 //
2234 if (divisorOp->IsCnsIntOrI())
2235 {
2236 GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
2237 ssize_t intConstValue = intConstTree->IconValue();
2238 assert(intConstValue != 0); // already checked above by IsIntegralConst(0)
2239 if (intConstValue != -1)
2240 {
2241 checkDividend = false; // We statically know that the dividend is not -1
2242 }
2243 }
2244 else // insert check for divison by zero
2245 {
2246 // Check if the divisor is zero throw a DivideByZeroException
2247 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2248 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2249 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2250 }
2251
2252 if (checkDividend)
2253 {
2254 // Check if the divisor is not -1 branch to 'sdivLabel'
2255 emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
2256
2257 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2258 inst_JMP(jmpNotEqual, sdivLabel);
2259 // If control flow continues past here the 'divisorReg' is known to be -1
2260
2261 regNumber dividendReg = tree->gtGetOp1()->gtRegNum;
2262 // At this point the divisor is known to be -1
2263 //
2264 // Issue the 'adds zr, dividendReg, dividendReg' instruction
2265 // this will set both the Z and V flags only when dividendReg is MinInt
2266 //
2267 emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
2268 inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
2269 genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
2270 // ArithmeticException
2271
2272 genDefineTempLabel(sdivLabel);
2273 }
2274 genCodeForBinary(tree); // Generate the sdiv instruction
2275 }
2276 else // (tree->gtOper == GT_UDIV)
2277 {
2278 // Only one possible exception
2279 // (AnyVal / 0) => DivideByZeroException
2280 //
2281 // Note that division by the constant 0 was already checked for above by the
2282 // op2->IsIntegralConst(0) check
2283 //
2284 if (!divisorOp->IsCnsIntOrI())
2285 {
2286 // divisorOp is not a constant, so it could be zero
2287 //
2288 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2289 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2290 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2291 }
2292 genCodeForBinary(tree);
2293 }
2294 }
2295 }
2296}
2297
2298// Generate code for InitBlk by performing a loop unroll
2299// Preconditions:
2300// a) Both the size and fill byte value are integer constants.
2301// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
2302void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
2303{
2304 // Make sure we got the arguments of the initblk/initobj operation in the right registers
2305 unsigned size = initBlkNode->Size();
2306 GenTree* dstAddr = initBlkNode->Addr();
2307 GenTree* initVal = initBlkNode->Data();
2308 if (initVal->OperIsInitVal())
2309 {
2310 initVal = initVal->gtGetOp1();
2311 }
2312
2313 assert(dstAddr->isUsedFromReg());
2314 assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0));
2315 assert(size != 0);
2316 assert(size <= INITBLK_UNROLL_LIMIT);
2317
2318 emitter* emit = getEmitter();
2319
2320 genConsumeOperands(initBlkNode);
2321
2322 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
2323 {
2324 // issue a full memory barrier before a volatile initBlockUnroll operation
2325 instGen_MemoryBarrier();
2326 }
2327
2328 regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum;
2329
2330 assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR));
2331
2332 unsigned offset = 0;
2333
2334 // Perform an unroll using stp.
2335 if (size >= 2 * REGSIZE_BYTES)
2336 {
2337 // Determine how many 16 byte slots
2338 size_t slots = size / (2 * REGSIZE_BYTES);
2339
2340 while (slots-- > 0)
2341 {
2342 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset);
2343 offset += (2 * REGSIZE_BYTES);
2344 }
2345 }
2346
2347 // Fill the remainder (15 bytes or less) if there's any.
2348 if ((size & 0xf) != 0)
2349 {
2350 if ((size & 8) != 0)
2351 {
2352 emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
2353 offset += 8;
2354 }
2355 if ((size & 4) != 0)
2356 {
2357 emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2358 offset += 4;
2359 }
2360 if ((size & 2) != 0)
2361 {
2362 emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
2363 offset += 2;
2364 }
2365 if ((size & 1) != 0)
2366 {
2367 emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
2368 }
2369 }
2370}
2371
2372// Generate code for a load pair from some address + offset
2373// base: tree node which can be either a local address or arbitrary node
2374// offset: distance from the base from which to load
2375void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset)
2376{
2377 emitter* emit = getEmitter();
2378
2379 if (base->OperIsLocalAddr())
2380 {
2381 if (base->gtOper == GT_LCL_FLD_ADDR)
2382 offset += base->gtLclFld.gtLclOffs;
2383
2384 emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset);
2385 }
2386 else
2387 {
2388 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset);
2389 }
2390}
2391
2392// Generate code for a store pair to some address + offset
2393// base: tree node which can be either a local address or arbitrary node
2394// offset: distance from the base from which to load
2395void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset)
2396{
2397 emitter* emit = getEmitter();
2398
2399 if (base->OperIsLocalAddr())
2400 {
2401 if (base->gtOper == GT_LCL_FLD_ADDR)
2402 offset += base->gtLclFld.gtLclOffs;
2403
2404 emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset);
2405 }
2406 else
2407 {
2408 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset);
2409 }
2410}
2411
2412// Generate code for CpObj nodes wich copy structs that have interleaved
2413// GC pointers.
2414// For this case we'll generate a sequence of loads/stores in the case of struct
2415// slots that don't contain GC pointers. The generated code will look like:
2416// ldr tempReg, [R13, #8]
2417// str tempReg, [R14, #8]
2418//
2419// In the case of a GC-Pointer we'll call the ByRef write barrier helper
2420// who happens to use the same registers as the previous call to maintain
2421// the same register requirements and register killsets:
2422// bl CORINFO_HELP_ASSIGN_BYREF
2423//
2424// So finally an example would look like this:
2425// ldr tempReg, [R13, #8]
2426// str tempReg, [R14, #8]
2427// bl CORINFO_HELP_ASSIGN_BYREF
2428// ldr tempReg, [R13, #8]
2429// str tempReg, [R14, #8]
2430// bl CORINFO_HELP_ASSIGN_BYREF
2431// ldr tempReg, [R13, #8]
2432// str tempReg, [R14, #8]
2433void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
2434{
2435 GenTree* dstAddr = cpObjNode->Addr();
2436 GenTree* source = cpObjNode->Data();
2437 var_types srcAddrType = TYP_BYREF;
2438 bool sourceIsLocal = false;
2439
2440 assert(source->isContained());
2441 if (source->gtOper == GT_IND)
2442 {
2443 GenTree* srcAddr = source->gtGetOp1();
2444 assert(!srcAddr->isContained());
2445 srcAddrType = srcAddr->TypeGet();
2446 }
2447 else
2448 {
2449 noway_assert(source->IsLocal());
2450 sourceIsLocal = true;
2451 }
2452
2453 bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr();
2454
2455#ifdef DEBUG
2456 assert(!dstAddr->isContained());
2457
2458 // This GenTree node has data about GC pointers, this means we're dealing
2459 // with CpObj.
2460 assert(cpObjNode->gtGcPtrCount > 0);
2461#endif // DEBUG
2462
2463 // Consume the operands and get them into the right registers.
2464 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
2465 genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
2466 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
2467 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
2468
2469 unsigned slots = cpObjNode->gtSlots;
2470
2471 // Temp register(s) used to perform the sequence of loads and stores.
2472 regNumber tmpReg = cpObjNode->ExtractTempReg();
2473 regNumber tmpReg2 = REG_NA;
2474
2475 assert(genIsValidIntReg(tmpReg));
2476 assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
2477 assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
2478
2479 if (slots > 1)
2480 {
2481 tmpReg2 = cpObjNode->GetSingleTempReg();
2482 assert(tmpReg2 != tmpReg);
2483 assert(genIsValidIntReg(tmpReg2));
2484 assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
2485 assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
2486 }
2487
2488 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2489 {
2490 // issue a full memory barrier before a volatile CpObj operation
2491 instGen_MemoryBarrier();
2492 }
2493
2494 emitter* emit = getEmitter();
2495
2496 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
2497
2498 // If we can prove it's on the stack we don't need to use the write barrier.
2499 if (dstOnStack)
2500 {
2501 unsigned i = 0;
2502 // Check if two or more remaining slots and use a ldp/stp sequence
2503 while (i < slots - 1)
2504 {
2505 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2506 emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1]));
2507
2508 emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE,
2509 INS_OPTS_POST_INDEX, attr1);
2510 emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE,
2511 INS_OPTS_POST_INDEX, attr1);
2512 i += 2;
2513 }
2514
2515 // Use a ldr/str sequence for the last remainder
2516 if (i < slots)
2517 {
2518 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2519
2520 emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2521 INS_OPTS_POST_INDEX);
2522 emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2523 INS_OPTS_POST_INDEX);
2524 }
2525 }
2526 else
2527 {
2528 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
2529
2530 unsigned i = 0;
2531 while (i < slots)
2532 {
2533 switch (gcPtrs[i])
2534 {
2535 case TYPE_GC_NONE:
2536 // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
2537 if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE))
2538 {
2539 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2540 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2541 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
2542 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2543 ++i; // extra increment of i, since we are copying two items
2544 }
2545 else
2546 {
2547 emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2548 INS_OPTS_POST_INDEX);
2549 emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2550 INS_OPTS_POST_INDEX);
2551 }
2552 break;
2553
2554 default:
2555 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
2556 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
2557
2558 gcPtrCount--;
2559 break;
2560 }
2561 ++i;
2562 }
2563 assert(gcPtrCount == 0);
2564 }
2565
2566 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2567 {
2568 // issue a INS_BARRIER_ISHLD after a volatile CpObj operation
2569 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2570 }
2571
2572 // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
2573 // While we normally update GC info prior to the last instruction that uses them,
2574 // these actually live into the helper call.
2575 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
2576}
2577
2578// generate code do a switch statement based on a table of ip-relative offsets
2579void CodeGen::genTableBasedSwitch(GenTree* treeNode)
2580{
2581 genConsumeOperands(treeNode->AsOp());
2582 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
2583 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
2584
2585 regNumber tmpReg = treeNode->GetSingleTempReg();
2586
2587 // load the ip-relative offset (which is relative to start of fgFirstBB)
2588 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
2589
2590 // add it to the absolute address of fgFirstBB
2591 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
2592 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
2593 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg);
2594
2595 // br baseReg
2596 getEmitter()->emitIns_R(INS_br, emitActualTypeSize(TYP_I_IMPL), baseReg);
2597}
2598
2599// emits the table and an instruction to get the address of the first element
2600void CodeGen::genJumpTable(GenTree* treeNode)
2601{
2602 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
2603 assert(treeNode->OperGet() == GT_JMPTABLE);
2604
2605 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
2606 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
2607 unsigned jmpTabOffs;
2608 unsigned jmpTabBase;
2609
2610 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
2611
2612 jmpTabOffs = 0;
2613
2614 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
2615
2616 for (unsigned i = 0; i < jumpCount; i++)
2617 {
2618 BasicBlock* target = *jumpTable++;
2619 noway_assert(target->bbFlags & BBF_JMP_TARGET);
2620
2621 JITDUMP(" DD L_M%03u_" FMT_BB "\n", Compiler::s_compMethodsCount, target->bbNum);
2622
2623 getEmitter()->emitDataGenData(i, target);
2624 };
2625
2626 getEmitter()->emitDataGenEnd();
2627
2628 // Access to inline data is 'abstracted' by a special type of static member
2629 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
2630 // to constant data, not a real static field.
2631 getEmitter()->emitIns_R_C(INS_adr, emitActualTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA,
2632 compiler->eeFindJitDataOffs(jmpTabBase), 0);
2633 genProduceReg(treeNode);
2634}
2635
2636//------------------------------------------------------------------------
2637// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
2638//
2639// Arguments:
2640// treeNode - the GT_XADD/XCHG node
2641//
2642void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
2643{
2644 GenTree* data = treeNode->gtOp.gtOp2;
2645 GenTree* addr = treeNode->gtOp.gtOp1;
2646 regNumber targetReg = treeNode->gtRegNum;
2647 regNumber dataReg = data->gtRegNum;
2648 regNumber addrReg = addr->gtRegNum;
2649
2650 genConsumeAddress(addr);
2651 genConsumeRegs(data);
2652
2653 emitAttr dataSize = emitActualTypeSize(data);
2654
2655 if (compiler->compSupports(InstructionSet_Atomics))
2656 {
2657 assert(!data->isContainedIntOrIImmed());
2658
2659 switch (treeNode->gtOper)
2660 {
2661 case GT_XCHG:
2662 getEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg);
2663 break;
2664 case GT_XADD:
2665 if ((targetReg == REG_NA) || (targetReg == REG_ZR))
2666 {
2667 getEmitter()->emitIns_R_R(INS_staddl, dataSize, dataReg, addrReg);
2668 }
2669 else
2670 {
2671 getEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, targetReg, addrReg);
2672 }
2673 break;
2674 default:
2675 assert(!"Unexpected treeNode->gtOper");
2676 }
2677
2678 instGen_MemoryBarrier(INS_BARRIER_ISH);
2679 }
2680 else
2681 {
2682 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2683 regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
2684 regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
2685
2686 // Check allocator assumptions
2687 //
2688 // The register allocator should have extended the lifetimes of all input and internal registers so that
2689 // none interfere with the target.
2690 noway_assert(addrReg != targetReg);
2691
2692 noway_assert(addrReg != loadReg);
2693 noway_assert(dataReg != loadReg);
2694
2695 noway_assert(addrReg != storeDataReg);
2696 noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg));
2697
2698 assert(addr->isUsedFromReg());
2699 noway_assert(exResultReg != REG_NA);
2700 noway_assert(exResultReg != targetReg);
2701 noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG));
2702
2703 // Store exclusive unpredictable cases must be avoided
2704 noway_assert(exResultReg != storeDataReg);
2705 noway_assert(exResultReg != addrReg);
2706
2707 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input
2708 // registers
2709 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2710 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2711 // we are finished generating the code for this node.
2712
2713 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2714
2715 // Emit code like this:
2716 // retry:
2717 // ldxr loadReg, [addrReg]
2718 // add storeDataReg, loadReg, dataReg # Only for GT_XADD
2719 // # GT_XCHG storeDataReg === dataReg
2720 // stxr exResult, storeDataReg, [addrReg]
2721 // cbnz exResult, retry
2722 // dmb ish
2723
2724 BasicBlock* labelRetry = genCreateTempLabel();
2725 genDefineTempLabel(labelRetry);
2726
2727 // The following instruction includes a acquire half barrier
2728 getEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg);
2729
2730 switch (treeNode->OperGet())
2731 {
2732 case GT_XADD:
2733 if (data->isContainedIntOrIImmed())
2734 {
2735 // Even though INS_add is specified here, the encoder will choose either
2736 // an INS_add or an INS_sub and encode the immediate as a positive value
2737 genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(),
2738 REG_NA);
2739 }
2740 else
2741 {
2742 getEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg);
2743 }
2744 break;
2745 case GT_XCHG:
2746 assert(!data->isContained());
2747 storeDataReg = dataReg;
2748 break;
2749 default:
2750 unreached();
2751 }
2752
2753 // The following instruction includes a release half barrier
2754 getEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg);
2755
2756 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2757
2758 instGen_MemoryBarrier(INS_BARRIER_ISH);
2759
2760 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2761 }
2762
2763 if (treeNode->gtRegNum != REG_NA)
2764 {
2765 genProduceReg(treeNode);
2766 }
2767}
2768
2769//------------------------------------------------------------------------
2770// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node.
2771//
2772// Arguments:
2773// tree - the GT_CMPXCHG node
2774//
2775void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
2776{
2777 assert(treeNode->OperIs(GT_CMPXCHG));
2778
2779 GenTree* addr = treeNode->gtOpLocation; // arg1
2780 GenTree* data = treeNode->gtOpValue; // arg2
2781 GenTree* comparand = treeNode->gtOpComparand; // arg3
2782
2783 regNumber targetReg = treeNode->gtRegNum;
2784 regNumber dataReg = data->gtRegNum;
2785 regNumber addrReg = addr->gtRegNum;
2786 regNumber comparandReg = comparand->gtRegNum;
2787
2788 genConsumeAddress(addr);
2789 genConsumeRegs(data);
2790 genConsumeRegs(comparand);
2791
2792 if (compiler->compSupports(InstructionSet_Atomics))
2793 {
2794 emitAttr dataSize = emitActualTypeSize(data);
2795
2796 // casal use the comparand as the target reg
2797 if (targetReg != comparandReg)
2798 {
2799 getEmitter()->emitIns_R_R(INS_mov, dataSize, targetReg, comparandReg);
2800
2801 // Catch case we destroyed data or address before use
2802 noway_assert(addrReg != targetReg);
2803 noway_assert(dataReg != targetReg);
2804 }
2805 getEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg);
2806
2807 instGen_MemoryBarrier(INS_BARRIER_ISH);
2808 }
2809 else
2810 {
2811 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2812
2813 // Check allocator assumptions
2814 //
2815 // The register allocator should have extended the lifetimes of all input and internal registers so that
2816 // none interfere with the target.
2817 noway_assert(addrReg != targetReg);
2818 noway_assert(dataReg != targetReg);
2819 noway_assert(comparandReg != targetReg);
2820 noway_assert(addrReg != dataReg);
2821 noway_assert(targetReg != REG_NA);
2822 noway_assert(exResultReg != REG_NA);
2823 noway_assert(exResultReg != targetReg);
2824
2825 assert(addr->isUsedFromReg());
2826 assert(data->isUsedFromReg());
2827 assert(!comparand->isUsedFromMemory());
2828
2829 // Store exclusive unpredictable cases must be avoided
2830 noway_assert(exResultReg != dataReg);
2831 noway_assert(exResultReg != addrReg);
2832
2833 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input
2834 // registers
2835 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2836 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2837 // we are finished generating the code for this node.
2838
2839 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2840
2841 // TODO-ARM64-CQ Use ARMv8.1 atomics if available
2842 // https://github.com/dotnet/coreclr/issues/11881
2843
2844 // Emit code like this:
2845 // retry:
2846 // ldxr targetReg, [addrReg]
2847 // cmp targetReg, comparandReg
2848 // bne compareFail
2849 // stxr exResult, dataReg, [addrReg]
2850 // cbnz exResult, retry
2851 // compareFail:
2852 // dmb ish
2853
2854 BasicBlock* labelRetry = genCreateTempLabel();
2855 BasicBlock* labelCompareFail = genCreateTempLabel();
2856 genDefineTempLabel(labelRetry);
2857
2858 // The following instruction includes a acquire half barrier
2859 getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg);
2860
2861 if (comparand->isContainedIntOrIImmed())
2862 {
2863 if (comparand->IsIntegralConst(0))
2864 {
2865 getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg);
2866 }
2867 else
2868 {
2869 getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg,
2870 comparand->AsIntConCommon()->IconValue());
2871 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2872 }
2873 }
2874 else
2875 {
2876 getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg);
2877 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2878 }
2879
2880 // The following instruction includes a release half barrier
2881 getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg);
2882
2883 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2884
2885 genDefineTempLabel(labelCompareFail);
2886
2887 instGen_MemoryBarrier(INS_BARRIER_ISH);
2888
2889 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2890 }
2891
2892 genProduceReg(treeNode);
2893}
2894
2895instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
2896{
2897 instruction ins = INS_brk;
2898
2899 if (varTypeIsFloating(type))
2900 {
2901 switch (oper)
2902 {
2903 case GT_ADD:
2904 ins = INS_fadd;
2905 break;
2906 case GT_SUB:
2907 ins = INS_fsub;
2908 break;
2909 case GT_MUL:
2910 ins = INS_fmul;
2911 break;
2912 case GT_DIV:
2913 ins = INS_fdiv;
2914 break;
2915 case GT_NEG:
2916 ins = INS_fneg;
2917 break;
2918
2919 default:
2920 NYI("Unhandled oper in genGetInsForOper() - float");
2921 unreached();
2922 break;
2923 }
2924 }
2925 else
2926 {
2927 switch (oper)
2928 {
2929 case GT_ADD:
2930 ins = INS_add;
2931 break;
2932 case GT_AND:
2933 ins = INS_and;
2934 break;
2935 case GT_DIV:
2936 ins = INS_sdiv;
2937 break;
2938 case GT_UDIV:
2939 ins = INS_udiv;
2940 break;
2941 case GT_MUL:
2942 ins = INS_mul;
2943 break;
2944 case GT_LSH:
2945 ins = INS_lsl;
2946 break;
2947 case GT_NEG:
2948 ins = INS_neg;
2949 break;
2950 case GT_NOT:
2951 ins = INS_mvn;
2952 break;
2953 case GT_OR:
2954 ins = INS_orr;
2955 break;
2956 case GT_ROR:
2957 ins = INS_ror;
2958 break;
2959 case GT_RSH:
2960 ins = INS_asr;
2961 break;
2962 case GT_RSZ:
2963 ins = INS_lsr;
2964 break;
2965 case GT_SUB:
2966 ins = INS_sub;
2967 break;
2968 case GT_XOR:
2969 ins = INS_eor;
2970 break;
2971
2972 default:
2973 NYI("Unhandled oper in genGetInsForOper() - integer");
2974 unreached();
2975 break;
2976 }
2977 }
2978 return ins;
2979}
2980
2981//------------------------------------------------------------------------
2982// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
2983//
2984// Arguments:
2985// tree - the GT_RETURNTRAP node
2986//
2987void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
2988{
2989 assert(tree->OperGet() == GT_RETURNTRAP);
2990
2991 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
2992 // based on the contents of 'data'
2993
2994 GenTree* data = tree->gtOp1;
2995 genConsumeRegs(data);
2996 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
2997
2998 BasicBlock* skipLabel = genCreateTempLabel();
2999
3000 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3001 inst_JMP(jmpEqual, skipLabel);
3002 // emit the call to the EE-helper that stops for GC (or other reasons)
3003
3004 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
3005 genDefineTempLabel(skipLabel);
3006}
3007
3008//------------------------------------------------------------------------
3009// genCodeForStoreInd: Produce code for a GT_STOREIND node.
3010//
3011// Arguments:
3012// tree - the GT_STOREIND node
3013//
3014void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
3015{
3016 GenTree* data = tree->Data();
3017 GenTree* addr = tree->Addr();
3018 var_types targetType = tree->TypeGet();
3019 emitter* emit = getEmitter();
3020 emitAttr attr = emitTypeSize(tree);
3021 instruction ins = ins_Store(targetType);
3022
3023#ifdef FEATURE_SIMD
3024 // Storing Vector3 of size 12 bytes through indirection
3025 if (tree->TypeGet() == TYP_SIMD12)
3026 {
3027 genStoreIndTypeSIMD12(tree);
3028 return;
3029 }
3030#endif // FEATURE_SIMD
3031
3032 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
3033 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
3034 {
3035 // data and addr must be in registers.
3036 // Consume both registers so that any copies of interfering
3037 // registers are taken care of.
3038 genConsumeOperands(tree);
3039
3040 // At this point, we should not have any interference.
3041 // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
3042 // as that is where 'addr' must go.
3043 noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
3044
3045 // 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
3046 genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST);
3047
3048 // 'data' goes into x15 (REG_WRITE_BARRIER_SRC)
3049 genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC);
3050
3051 genGCWriteBarrier(tree, writeBarrierForm);
3052 }
3053 else // A normal store, not a WriteBarrier store
3054 {
3055 bool dataIsUnary = false;
3056 GenTree* nonRMWsrc = nullptr;
3057 // We must consume the operands in the proper execution order,
3058 // so that liveness is updated appropriately.
3059 genConsumeAddress(addr);
3060
3061 if (!data->isContained())
3062 {
3063 genConsumeRegs(data);
3064 }
3065
3066 regNumber dataReg = REG_NA;
3067 if (data->isContainedIntOrIImmed())
3068 {
3069 assert(data->IsIntegralConst(0));
3070 dataReg = REG_ZR;
3071 }
3072 else // data is not contained, so evaluate it into a register
3073 {
3074 assert(!data->isContained());
3075 dataReg = data->gtRegNum;
3076 }
3077
3078 assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED));
3079
3080 if (tree->gtFlags & GTF_IND_VOLATILE)
3081 {
3082 bool useStoreRelease =
3083 genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED);
3084
3085 if (useStoreRelease)
3086 {
3087 switch (EA_SIZE(attr))
3088 {
3089 case EA_1BYTE:
3090 assert(ins == INS_strb);
3091 ins = INS_stlrb;
3092 break;
3093 case EA_2BYTE:
3094 assert(ins == INS_strh);
3095 ins = INS_stlrh;
3096 break;
3097 case EA_4BYTE:
3098 case EA_8BYTE:
3099 assert(ins == INS_str);
3100 ins = INS_stlr;
3101 break;
3102 default:
3103 assert(false); // We should not get here
3104 }
3105 }
3106 else
3107 {
3108 // issue a full memory barrier before a volatile StInd
3109 instGen_MemoryBarrier();
3110 }
3111 }
3112
3113 emit->emitInsLoadStoreOp(ins, attr, dataReg, tree);
3114 }
3115}
3116
3117//------------------------------------------------------------------------
3118// genCodeForSwap: Produce code for a GT_SWAP node.
3119//
3120// Arguments:
3121// tree - the GT_SWAP node
3122//
3123void CodeGen::genCodeForSwap(GenTreeOp* tree)
3124{
3125 assert(tree->OperIs(GT_SWAP));
3126
3127 // Swap is only supported for lclVar operands that are enregistered
3128 // We do not consume or produce any registers. Both operands remain enregistered.
3129 // However, the gc-ness may change.
3130 assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2));
3131
3132 GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon();
3133 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
3134 var_types type1 = varDsc1->TypeGet();
3135 GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon();
3136 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
3137 var_types type2 = varDsc2->TypeGet();
3138
3139 // We must have both int or both fp regs
3140 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
3141
3142 // FP swap is not yet implemented (and should have NYI'd in LSRA)
3143 assert(!varTypeIsFloating(type1));
3144
3145 regNumber oldOp1Reg = lcl1->gtRegNum;
3146 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
3147 regNumber oldOp2Reg = lcl2->gtRegNum;
3148 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
3149
3150 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
3151 varDsc1->lvRegNum = oldOp2Reg;
3152 varDsc2->lvRegNum = oldOp1Reg;
3153
3154 // Do the xchg
3155 emitAttr size = EA_PTRSIZE;
3156 if (varTypeGCtype(type1) != varTypeGCtype(type2))
3157 {
3158 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
3159 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
3160 size = EA_GCREF;
3161 }
3162
3163 NYI("register swap");
3164 // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
3165
3166 // Update the gcInfo.
3167 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
3168 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3169 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3170
3171 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
3172 // It will also dump the updates.
3173 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
3174 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
3175}
3176
3177//-------------------------------------------------------------------------------------------
3178// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
3179// corresponding to a binary Relational operator result.
3180//
3181// Arguments:
3182// dstReg - The target register to set to 1 or 0
3183// tree - The GenTree Relop node that was used to set the Condition codes
3184//
3185// Return Value: none
3186//
3187// Notes:
3188// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
3189//-------------------------------------------------------------------------------------------
3190
3191void CodeGen::genSetRegToCond(regNumber dstReg, GenTree* tree)
3192{
3193 emitJumpKind jumpKind[2];
3194 bool branchToTrueLabel[2];
3195 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
3196 assert(jumpKind[0] != EJ_NONE);
3197
3198 // Set the reg according to the flags
3199 inst_SET(jumpKind[0], dstReg);
3200
3201 // Do we need to use two operation to set the flags?
3202 //
3203 if (jumpKind[1] != EJ_NONE)
3204 {
3205 emitter* emit = getEmitter();
3206 bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
3207 insCond secondCond;
3208
3209 // The only ones that require two operations are the
3210 // floating point compare operations of BEQ or BNE.UN
3211 //
3212 if (tree->gtOper == GT_EQ)
3213 {
3214 // This must be an ordered comparison.
3215 assert(ordered);
3216 assert(jumpKind[1] == EJ_vs); // We complement this value
3217 secondCond = INS_COND_VC; // for the secondCond
3218 }
3219 else // gtOper == GT_NE
3220 {
3221 // This must be BNE.UN (unordered comparison)
3222 assert((tree->gtOper == GT_NE) && !ordered);
3223 assert(jumpKind[1] == EJ_lo); // We complement this value
3224 secondCond = INS_COND_HS; // for the secondCond
3225 }
3226
3227 // The second instruction is a 'csinc' instruction that either selects the previous dstReg
3228 // or increments the ZR register, which produces a 1 result.
3229
3230 emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond);
3231 }
3232}
3233
3234//------------------------------------------------------------------------
3235// genIntToFloatCast: Generate code to cast an int/long to float/double
3236//
3237// Arguments:
3238// treeNode - The GT_CAST node
3239//
3240// Return Value:
3241// None.
3242//
3243// Assumptions:
3244// Cast is a non-overflow conversion.
3245// The treeNode must have an assigned register.
3246// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
3247//
3248void CodeGen::genIntToFloatCast(GenTree* treeNode)
3249{
3250 // int type --> float/double conversions are always non-overflow ones
3251 assert(treeNode->OperGet() == GT_CAST);
3252 assert(!treeNode->gtOverflow());
3253
3254 regNumber targetReg = treeNode->gtRegNum;
3255 assert(genIsValidFloatReg(targetReg));
3256
3257 GenTree* op1 = treeNode->gtOp.gtOp1;
3258 assert(!op1->isContained()); // Cannot be contained
3259 assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
3260
3261 var_types dstType = treeNode->CastToType();
3262 var_types srcType = genActualType(op1->TypeGet());
3263 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
3264
3265 // force the srcType to unsigned if GT_UNSIGNED flag is set
3266 if (treeNode->gtFlags & GTF_UNSIGNED)
3267 {
3268 srcType = genUnsignedType(srcType);
3269 }
3270
3271 // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
3272 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
3273 noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
3274
3275 instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf;
3276 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3277
3278 if (dstType == TYP_DOUBLE)
3279 {
3280 if (srcSize == EA_4BYTE)
3281 {
3282 cvtOption = INS_OPTS_4BYTE_TO_D;
3283 }
3284 else
3285 {
3286 assert(srcSize == EA_8BYTE);
3287 cvtOption = INS_OPTS_8BYTE_TO_D;
3288 }
3289 }
3290 else
3291 {
3292 assert(dstType == TYP_FLOAT);
3293 if (srcSize == EA_4BYTE)
3294 {
3295 cvtOption = INS_OPTS_4BYTE_TO_S;
3296 }
3297 else
3298 {
3299 assert(srcSize == EA_8BYTE);
3300 cvtOption = INS_OPTS_8BYTE_TO_S;
3301 }
3302 }
3303
3304 genConsumeOperands(treeNode->AsOp());
3305
3306 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3307
3308 genProduceReg(treeNode);
3309}
3310
3311//------------------------------------------------------------------------
3312// genFloatToIntCast: Generate code to cast float/double to int/long
3313//
3314// Arguments:
3315// treeNode - The GT_CAST node
3316//
3317// Return Value:
3318// None.
3319//
3320// Assumptions:
3321// Cast is a non-overflow conversion.
3322// The treeNode must have an assigned register.
3323// SrcType=float/double and DstType= int32/uint32/int64/uint64
3324//
3325void CodeGen::genFloatToIntCast(GenTree* treeNode)
3326{
3327 // we don't expect to see overflow detecting float/double --> int type conversions here
3328 // as they should have been converted into helper calls by front-end.
3329 assert(treeNode->OperGet() == GT_CAST);
3330 assert(!treeNode->gtOverflow());
3331
3332 regNumber targetReg = treeNode->gtRegNum;
3333 assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
3334
3335 GenTree* op1 = treeNode->gtOp.gtOp1;
3336 assert(!op1->isContained()); // Cannot be contained
3337 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
3338
3339 var_types dstType = treeNode->CastToType();
3340 var_types srcType = op1->TypeGet();
3341 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
3342
3343 // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
3344 // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
3345 // we expect the front-end or lowering phase to have generated two levels of cast.
3346 //
3347 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
3348 noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
3349
3350 instruction ins = INS_fcvtzs; // default to sign converts
3351 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3352
3353 if (varTypeIsUnsigned(dstType))
3354 {
3355 ins = INS_fcvtzu; // use unsigned converts
3356 }
3357
3358 if (srcType == TYP_DOUBLE)
3359 {
3360 if (dstSize == EA_4BYTE)
3361 {
3362 cvtOption = INS_OPTS_D_TO_4BYTE;
3363 }
3364 else
3365 {
3366 assert(dstSize == EA_8BYTE);
3367 cvtOption = INS_OPTS_D_TO_8BYTE;
3368 }
3369 }
3370 else
3371 {
3372 assert(srcType == TYP_FLOAT);
3373 if (dstSize == EA_4BYTE)
3374 {
3375 cvtOption = INS_OPTS_S_TO_4BYTE;
3376 }
3377 else
3378 {
3379 assert(dstSize == EA_8BYTE);
3380 cvtOption = INS_OPTS_S_TO_8BYTE;
3381 }
3382 }
3383
3384 genConsumeOperands(treeNode->AsOp());
3385
3386 getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3387
3388 genProduceReg(treeNode);
3389}
3390
3391//------------------------------------------------------------------------
3392// genCkfinite: Generate code for ckfinite opcode.
3393//
3394// Arguments:
3395// treeNode - The GT_CKFINITE node
3396//
3397// Return Value:
3398// None.
3399//
3400// Assumptions:
3401// GT_CKFINITE node has reserved an internal register.
3402//
3403void CodeGen::genCkfinite(GenTree* treeNode)
3404{
3405 assert(treeNode->OperGet() == GT_CKFINITE);
3406
3407 GenTree* op1 = treeNode->gtOp.gtOp1;
3408 var_types targetType = treeNode->TypeGet();
3409 int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent.
3410 int shiftAmount = targetType == TYP_FLOAT ? 20 : 52;
3411
3412 emitter* emit = getEmitter();
3413
3414 // Extract exponent into a register.
3415 regNumber intReg = treeNode->GetSingleTempReg();
3416 regNumber fpReg = genConsumeReg(op1);
3417
3418 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg);
3419 emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount);
3420
3421 // Mask of exponent with all 1's and check if the exponent is all 1's
3422 emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask);
3423 emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask);
3424
3425 // If exponent is all 1's, throw ArithmeticException
3426 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3427 genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
3428
3429 // if it is a finite value copy it to targetReg
3430 if (treeNode->gtRegNum != fpReg)
3431 {
3432 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->gtRegNum, fpReg);
3433 }
3434 genProduceReg(treeNode);
3435}
3436
3437//------------------------------------------------------------------------
3438// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node.
3439//
3440// Arguments:
3441// tree - the node
3442//
3443void CodeGen::genCodeForCompare(GenTreeOp* tree)
3444{
3445 regNumber targetReg = tree->gtRegNum;
3446 emitter* emit = getEmitter();
3447
3448 GenTree* op1 = tree->gtOp1;
3449 GenTree* op2 = tree->gtOp2;
3450 var_types op1Type = genActualType(op1->TypeGet());
3451 var_types op2Type = genActualType(op2->TypeGet());
3452
3453 assert(!op1->isUsedFromMemory());
3454 assert(!op2->isUsedFromMemory());
3455
3456 genConsumeOperands(tree);
3457
3458 emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
3459
3460 assert(genTypeSize(op1Type) == genTypeSize(op2Type));
3461
3462 if (varTypeIsFloating(op1Type))
3463 {
3464 assert(varTypeIsFloating(op2Type));
3465 assert(!op1->isContained());
3466 assert(op1Type == op2Type);
3467
3468 if (op2->IsIntegralConst(0))
3469 {
3470 assert(op2->isContained());
3471 emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
3472 }
3473 else
3474 {
3475 assert(!op2->isContained());
3476 emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
3477 }
3478 }
3479 else
3480 {
3481 assert(!varTypeIsFloating(op2Type));
3482 // We don't support swapping op1 and op2 to generate cmp reg, imm
3483 assert(!op1->isContainedIntOrIImmed());
3484
3485 instruction ins = tree->OperIs(GT_TEST_EQ, GT_TEST_NE) ? INS_tst : INS_cmp;
3486
3487 if (op2->isContainedIntOrIImmed())
3488 {
3489 GenTreeIntConCommon* intConst = op2->AsIntConCommon();
3490 emit->emitIns_R_I(ins, cmpSize, op1->gtRegNum, intConst->IconValue());
3491 }
3492 else
3493 {
3494 emit->emitIns_R_R(ins, cmpSize, op1->gtRegNum, op2->gtRegNum);
3495 }
3496 }
3497
3498 // Are we evaluating this into a register?
3499 if (targetReg != REG_NA)
3500 {
3501 genSetRegToCond(targetReg, tree);
3502 genProduceReg(tree);
3503 }
3504}
3505
3506//------------------------------------------------------------------------
3507// genCodeForJumpCompare: Generates code for jmpCompare statement.
3508//
3509// A GT_JCMP node is created when a comparison and conditional branch
3510// can be executed in a single instruction.
3511//
3512// Arm64 has a few instructions with this behavior.
3513// - cbz/cbnz -- Compare and branch register zero/not zero
3514// - tbz/tbnz -- Test and branch register bit zero/not zero
3515//
3516// The cbz/cbnz supports the normal +/- 1MB branch range for conditional branches
3517// The tbz/tbnz supports a smaller +/- 32KB branch range
3518//
3519// A GT_JCMP cbz/cbnz node is created when there is a GT_EQ or GT_NE
3520// integer/unsigned comparison against #0 which is used by a GT_JTRUE
3521// condition jump node.
3522//
3523// A GT_JCMP tbz/tbnz node is created when there is a GT_TEST_EQ or GT_TEST_NE
3524// integer/unsigned comparison against against a mask with a single bit set
3525// which is used by a GT_JTRUE condition jump node.
3526//
3527// This node is repsonsible for consuming the register, and emitting the
3528// appropriate fused compare/test and branch instruction
3529//
3530// Two flags guide code generation
3531// GTF_JCMP_TST -- Set if this is a tbz/tbnz rather than cbz/cbnz
3532// GTF_JCMP_EQ -- Set if this is cbz/tbz rather than cbnz/tbnz
3533//
3534// Arguments:
3535// tree - The GT_JCMP tree node.
3536//
3537// Return Value:
3538// None
3539//
3540void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
3541{
3542 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3543
3544 GenTree* op1 = tree->gtGetOp1();
3545 GenTree* op2 = tree->gtGetOp2();
3546
3547 assert(tree->OperIs(GT_JCMP));
3548 assert(!varTypeIsFloating(tree));
3549 assert(!op1->isUsedFromMemory());
3550 assert(!op2->isUsedFromMemory());
3551 assert(op2->IsCnsIntOrI());
3552 assert(op2->isContained());
3553
3554 genConsumeOperands(tree);
3555
3556 regNumber reg = op1->gtRegNum;
3557 emitAttr attr = emitActualTypeSize(op1->TypeGet());
3558
3559 if (tree->gtFlags & GTF_JCMP_TST)
3560 {
3561 ssize_t compareImm = op2->gtIntCon.IconValue();
3562
3563 assert(isPow2(compareImm));
3564
3565 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz;
3566 int imm = genLog2((size_t)compareImm);
3567
3568 getEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
3569 }
3570 else
3571 {
3572 assert(op2->IsIntegralConst(0));
3573
3574 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_cbz : INS_cbnz;
3575
3576 getEmitter()->emitIns_J_R(ins, attr, compiler->compCurBB->bbJumpDest, reg);
3577 }
3578}
3579
3580int CodeGenInterface::genSPtoFPdelta()
3581{
3582 int delta;
3583
3584 // We place the saved frame pointer immediately above the outgoing argument space.
3585 delta = (int)compiler->lvaOutgoingArgSpaceSize;
3586
3587 assert(delta >= 0);
3588 return delta;
3589}
3590
3591//---------------------------------------------------------------------
3592// genTotalFrameSize - return the total size of the stack frame, including local size,
3593// callee-saved register size, etc.
3594//
3595// Return value:
3596// Total frame size
3597//
3598
3599int CodeGenInterface::genTotalFrameSize()
3600{
3601 // For varargs functions, we home all the incoming register arguments. They are not
3602 // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
3603 // since we don't use "push" instructions to save them, we don't have to do the
3604 // save of these varargs register arguments as the first thing in the prolog.
3605
3606 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
3607
3608 int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
3609 compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
3610
3611 assert(totalFrameSize >= 0);
3612 return totalFrameSize;
3613}
3614
3615//---------------------------------------------------------------------
3616// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
3617// This number is going to be negative, since the Caller-SP is at a higher
3618// address than the frame pointer.
3619//
3620// There must be a frame pointer to call this function!
3621
3622int CodeGenInterface::genCallerSPtoFPdelta()
3623{
3624 assert(isFramePointerUsed());
3625 int callerSPtoFPdelta;
3626
3627 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
3628
3629 assert(callerSPtoFPdelta <= 0);
3630 return callerSPtoFPdelta;
3631}
3632
3633//---------------------------------------------------------------------
3634// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
3635//
3636// This number will be negative.
3637
3638int CodeGenInterface::genCallerSPtoInitialSPdelta()
3639{
3640 int callerSPtoSPdelta = 0;
3641
3642 callerSPtoSPdelta -= genTotalFrameSize();
3643
3644 assert(callerSPtoSPdelta <= 0);
3645 return callerSPtoSPdelta;
3646}
3647
3648/*****************************************************************************
3649 * Emit a call to a helper function.
3650 *
3651 */
3652
3653void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
3654{
3655 void* addr = nullptr;
3656 void* pAddr = nullptr;
3657
3658 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
3659 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
3660 regNumber callTarget = REG_NA;
3661
3662 if (addr == nullptr)
3663 {
3664 // This is call to a runtime helper.
3665 // adrp x, [reloc:rel page addr]
3666 // add x, x, [reloc:page offset]
3667 // ldr x, [x]
3668 // br x
3669
3670 if (callTargetReg == REG_NA)
3671 {
3672 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
3673 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
3674 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
3675 }
3676
3677 regMaskTP callTargetMask = genRegMask(callTargetReg);
3678 regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3679
3680 // assert that all registers in callTargetMask are in the callKillSet
3681 noway_assert((callTargetMask & callKillSet) == callTargetMask);
3682
3683 callTarget = callTargetReg;
3684
3685 // adrp + add with relocations will be emitted
3686 getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
3687 getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
3688 callType = emitter::EC_INDIR_R;
3689 }
3690
3691 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
3692 retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
3693 gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */
3694 callTarget, /* ireg */
3695 REG_NA, 0, 0, /* xreg, xmul, disp */
3696 false /* isJump */
3697 );
3698
3699 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3700 regSet.verifyRegistersUsed(killMask);
3701}
3702
3703#ifdef FEATURE_SIMD
3704
3705//------------------------------------------------------------------------
3706// genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main
3707// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine.
3708//
3709// Arguments:
3710// simdNode - The GT_SIMD node
3711//
3712// Return Value:
3713// None.
3714//
3715// Notes:
3716// Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
3717// a limited set of methods.
3718//
3719// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
3720void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
3721{
3722 // NYI for unsupported base types
3723 if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
3724 simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
3725 simdNode->gtSIMDBaseType != TYP_USHORT && simdNode->gtSIMDBaseType != TYP_UBYTE &&
3726 simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
3727 simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
3728 {
3729 noway_assert(!"SIMD intrinsic with unsupported base type.");
3730 }
3731
3732 switch (simdNode->gtSIMDIntrinsicID)
3733 {
3734 case SIMDIntrinsicInit:
3735 genSIMDIntrinsicInit(simdNode);
3736 break;
3737
3738 case SIMDIntrinsicInitN:
3739 genSIMDIntrinsicInitN(simdNode);
3740 break;
3741
3742 case SIMDIntrinsicSqrt:
3743 case SIMDIntrinsicAbs:
3744 case SIMDIntrinsicCast:
3745 case SIMDIntrinsicConvertToSingle:
3746 case SIMDIntrinsicConvertToInt32:
3747 case SIMDIntrinsicConvertToDouble:
3748 case SIMDIntrinsicConvertToInt64:
3749 genSIMDIntrinsicUnOp(simdNode);
3750 break;
3751
3752 case SIMDIntrinsicWidenLo:
3753 case SIMDIntrinsicWidenHi:
3754 genSIMDIntrinsicWiden(simdNode);
3755 break;
3756
3757 case SIMDIntrinsicNarrow:
3758 genSIMDIntrinsicNarrow(simdNode);
3759 break;
3760
3761 case SIMDIntrinsicAdd:
3762 case SIMDIntrinsicSub:
3763 case SIMDIntrinsicMul:
3764 case SIMDIntrinsicDiv:
3765 case SIMDIntrinsicBitwiseAnd:
3766 case SIMDIntrinsicBitwiseAndNot:
3767 case SIMDIntrinsicBitwiseOr:
3768 case SIMDIntrinsicBitwiseXor:
3769 case SIMDIntrinsicMin:
3770 case SIMDIntrinsicMax:
3771 case SIMDIntrinsicEqual:
3772 case SIMDIntrinsicLessThan:
3773 case SIMDIntrinsicGreaterThan:
3774 case SIMDIntrinsicLessThanOrEqual:
3775 case SIMDIntrinsicGreaterThanOrEqual:
3776 genSIMDIntrinsicBinOp(simdNode);
3777 break;
3778
3779 case SIMDIntrinsicOpEquality:
3780 case SIMDIntrinsicOpInEquality:
3781 genSIMDIntrinsicRelOp(simdNode);
3782 break;
3783
3784 case SIMDIntrinsicDotProduct:
3785 genSIMDIntrinsicDotProduct(simdNode);
3786 break;
3787
3788 case SIMDIntrinsicGetItem:
3789 genSIMDIntrinsicGetItem(simdNode);
3790 break;
3791
3792 case SIMDIntrinsicSetX:
3793 case SIMDIntrinsicSetY:
3794 case SIMDIntrinsicSetZ:
3795 case SIMDIntrinsicSetW:
3796 genSIMDIntrinsicSetItem(simdNode);
3797 break;
3798
3799 case SIMDIntrinsicUpperSave:
3800 genSIMDIntrinsicUpperSave(simdNode);
3801 break;
3802
3803 case SIMDIntrinsicUpperRestore:
3804 genSIMDIntrinsicUpperRestore(simdNode);
3805 break;
3806
3807 case SIMDIntrinsicSelect:
3808 NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)");
3809 break;
3810
3811 default:
3812 noway_assert(!"Unimplemented SIMD intrinsic.");
3813 unreached();
3814 }
3815}
3816
3817insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
3818{
3819 assert((size == EA_16BYTE) || (size == EA_8BYTE));
3820 insOpts result = INS_OPTS_NONE;
3821
3822 switch (elementType)
3823 {
3824 case TYP_DOUBLE:
3825 case TYP_ULONG:
3826 case TYP_LONG:
3827 result = (size == EA_16BYTE) ? INS_OPTS_2D : INS_OPTS_1D;
3828 break;
3829 case TYP_FLOAT:
3830 case TYP_UINT:
3831 case TYP_INT:
3832 result = (size == EA_16BYTE) ? INS_OPTS_4S : INS_OPTS_2S;
3833 break;
3834 case TYP_USHORT:
3835 case TYP_SHORT:
3836 result = (size == EA_16BYTE) ? INS_OPTS_8H : INS_OPTS_4H;
3837 break;
3838 case TYP_UBYTE:
3839 case TYP_BYTE:
3840 result = (size == EA_16BYTE) ? INS_OPTS_16B : INS_OPTS_8B;
3841 break;
3842 default:
3843 assert(!"Unsupported element type");
3844 unreached();
3845 }
3846
3847 return result;
3848}
3849
3850// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
3851//
3852// Arguments:
3853// intrinsicId - SIMD intrinsic Id
3854// baseType - Base type of the SIMD vector
3855// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
3856//
3857//
3858// Return Value:
3859// Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
3860//
3861instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
3862{
3863 instruction result = INS_invalid;
3864 if (varTypeIsFloating(baseType))
3865 {
3866 switch (intrinsicId)
3867 {
3868 case SIMDIntrinsicAbs:
3869 result = INS_fabs;
3870 break;
3871 case SIMDIntrinsicAdd:
3872 result = INS_fadd;
3873 break;
3874 case SIMDIntrinsicBitwiseAnd:
3875 result = INS_and;
3876 break;
3877 case SIMDIntrinsicBitwiseAndNot:
3878 result = INS_bic;
3879 break;
3880 case SIMDIntrinsicBitwiseOr:
3881 result = INS_orr;
3882 break;
3883 case SIMDIntrinsicBitwiseXor:
3884 result = INS_eor;
3885 break;
3886 case SIMDIntrinsicCast:
3887 result = INS_mov;
3888 break;
3889 case SIMDIntrinsicConvertToInt32:
3890 case SIMDIntrinsicConvertToInt64:
3891 result = INS_fcvtns;
3892 break;
3893 case SIMDIntrinsicDiv:
3894 result = INS_fdiv;
3895 break;
3896 case SIMDIntrinsicEqual:
3897 result = INS_fcmeq;
3898 break;
3899 case SIMDIntrinsicGreaterThan:
3900 result = INS_fcmgt;
3901 break;
3902 case SIMDIntrinsicGreaterThanOrEqual:
3903 result = INS_fcmge;
3904 break;
3905 case SIMDIntrinsicLessThan:
3906 result = INS_fcmlt;
3907 break;
3908 case SIMDIntrinsicLessThanOrEqual:
3909 result = INS_fcmle;
3910 break;
3911 case SIMDIntrinsicMax:
3912 result = INS_fmax;
3913 break;
3914 case SIMDIntrinsicMin:
3915 result = INS_fmin;
3916 break;
3917 case SIMDIntrinsicMul:
3918 result = INS_fmul;
3919 break;
3920 case SIMDIntrinsicNarrow:
3921 // Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
3922 // Return lower bytes instruction here
3923 result = INS_fcvtn;
3924 break;
3925 case SIMDIntrinsicSelect:
3926 result = INS_bsl;
3927 break;
3928 case SIMDIntrinsicSqrt:
3929 result = INS_fsqrt;
3930 break;
3931 case SIMDIntrinsicSub:
3932 result = INS_fsub;
3933 break;
3934 case SIMDIntrinsicWidenLo:
3935 result = INS_fcvtl;
3936 break;
3937 case SIMDIntrinsicWidenHi:
3938 result = INS_fcvtl2;
3939 break;
3940 default:
3941 assert(!"Unsupported SIMD intrinsic");
3942 unreached();
3943 }
3944 }
3945 else
3946 {
3947 bool isUnsigned = varTypeIsUnsigned(baseType);
3948
3949 switch (intrinsicId)
3950 {
3951 case SIMDIntrinsicAbs:
3952 assert(!isUnsigned);
3953 result = INS_abs;
3954 break;
3955 case SIMDIntrinsicAdd:
3956 result = INS_add;
3957 break;
3958 case SIMDIntrinsicBitwiseAnd:
3959 result = INS_and;
3960 break;
3961 case SIMDIntrinsicBitwiseAndNot:
3962 result = INS_bic;
3963 break;
3964 case SIMDIntrinsicBitwiseOr:
3965 result = INS_orr;
3966 break;
3967 case SIMDIntrinsicBitwiseXor:
3968 result = INS_eor;
3969 break;
3970 case SIMDIntrinsicCast:
3971 result = INS_mov;
3972 break;
3973 case SIMDIntrinsicConvertToDouble:
3974 case SIMDIntrinsicConvertToSingle:
3975 result = isUnsigned ? INS_ucvtf : INS_scvtf;
3976 break;
3977 case SIMDIntrinsicEqual:
3978 result = INS_cmeq;
3979 break;
3980 case SIMDIntrinsicGreaterThan:
3981 result = isUnsigned ? INS_cmhi : INS_cmgt;
3982 break;
3983 case SIMDIntrinsicGreaterThanOrEqual:
3984 result = isUnsigned ? INS_cmhs : INS_cmge;
3985 break;
3986 case SIMDIntrinsicLessThan:
3987 assert(!isUnsigned);
3988 result = INS_cmlt;
3989 break;
3990 case SIMDIntrinsicLessThanOrEqual:
3991 assert(!isUnsigned);
3992 result = INS_cmle;
3993 break;
3994 case SIMDIntrinsicMax:
3995 result = isUnsigned ? INS_umax : INS_smax;
3996 break;
3997 case SIMDIntrinsicMin:
3998 result = isUnsigned ? INS_umin : INS_smin;
3999 break;
4000 case SIMDIntrinsicMul:
4001 result = INS_mul;
4002 break;
4003 case SIMDIntrinsicNarrow:
4004 // Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
4005 // Return lower bytes instruction here
4006 result = INS_xtn;
4007 break;
4008 case SIMDIntrinsicSelect:
4009 result = INS_bsl;
4010 break;
4011 case SIMDIntrinsicSub:
4012 result = INS_sub;
4013 break;
4014 case SIMDIntrinsicWidenLo:
4015 result = isUnsigned ? INS_uxtl : INS_sxtl;
4016 break;
4017 case SIMDIntrinsicWidenHi:
4018 result = isUnsigned ? INS_uxtl2 : INS_sxtl2;
4019 break;
4020 default:
4021 assert(!"Unsupported SIMD intrinsic");
4022 unreached();
4023 }
4024 }
4025
4026 noway_assert(result != INS_invalid);
4027 return result;
4028}
4029
4030//------------------------------------------------------------------------
4031// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
4032//
4033// Arguments:
4034// simdNode - The GT_SIMD node
4035//
4036// Return Value:
4037// None.
4038//
4039void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
4040{
4041 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
4042
4043 GenTree* op1 = simdNode->gtGetOp1();
4044 var_types baseType = simdNode->gtSIMDBaseType;
4045 regNumber targetReg = simdNode->gtRegNum;
4046 assert(targetReg != REG_NA);
4047 var_types targetType = simdNode->TypeGet();
4048
4049 genConsumeOperands(simdNode);
4050 regNumber op1Reg = op1->IsIntegralConst(0) ? REG_ZR : op1->gtRegNum;
4051
4052 // TODO-ARM64-CQ Add LD1R to allow SIMDIntrinsicInit from contained memory
4053 // TODO-ARM64-CQ Add MOVI to allow SIMDIntrinsicInit from contained immediate small constants
4054
4055 assert(op1->isContained() == op1->IsIntegralConst(0));
4056 assert(!op1->isUsedFromMemory());
4057
4058 assert(genIsValidFloatReg(targetReg));
4059 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
4060
4061 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4062 insOpts opt = genGetSimdInsOpt(attr, baseType);
4063
4064 if (genIsValidIntReg(op1Reg))
4065 {
4066 getEmitter()->emitIns_R_R(INS_dup, attr, targetReg, op1Reg, opt);
4067 }
4068 else
4069 {
4070 getEmitter()->emitIns_R_R_I(INS_dup, attr, targetReg, op1Reg, 0, opt);
4071 }
4072
4073 genProduceReg(simdNode);
4074}
4075
4076//-------------------------------------------------------------------------------------------
4077// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
4078// a number of arguments equal to the length of the Vector.
4079//
4080// Arguments:
4081// simdNode - The GT_SIMD node
4082//
4083// Return Value:
4084// None.
4085//
4086void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
4087{
4088 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
4089
4090 regNumber targetReg = simdNode->gtRegNum;
4091 assert(targetReg != REG_NA);
4092
4093 var_types targetType = simdNode->TypeGet();
4094
4095 var_types baseType = simdNode->gtSIMDBaseType;
4096
4097 regNumber vectorReg = targetReg;
4098
4099 if (varTypeIsFloating(baseType))
4100 {
4101 // Note that we cannot use targetReg before consuming all float source operands.
4102 // Therefore use an internal temp register
4103 vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4104 }
4105
4106 emitAttr baseTypeSize = emitTypeSize(baseType);
4107
4108 // We will first consume the list items in execution (left to right) order,
4109 // and record the registers.
4110 regNumber operandRegs[FP_REGSIZE_BYTES];
4111 unsigned initCount = 0;
4112 for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
4113 {
4114 assert(list->OperGet() == GT_LIST);
4115 GenTree* listItem = list->gtGetOp1();
4116 assert(listItem->TypeGet() == baseType);
4117 assert(!listItem->isContained());
4118 regNumber operandReg = genConsumeReg(listItem);
4119 operandRegs[initCount] = operandReg;
4120 initCount++;
4121 }
4122
4123 assert((initCount * baseTypeSize) <= simdNode->gtSIMDSize);
4124
4125 if (initCount * baseTypeSize < EA_16BYTE)
4126 {
4127 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B);
4128 }
4129
4130 if (varTypeIsIntegral(baseType))
4131 {
4132 for (unsigned i = 0; i < initCount; i++)
4133 {
4134 getEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i);
4135 }
4136 }
4137 else
4138 {
4139 for (unsigned i = 0; i < initCount; i++)
4140 {
4141 getEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0);
4142 }
4143 }
4144
4145 // Load the initialized value.
4146 if (targetReg != vectorReg)
4147 {
4148 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, vectorReg);
4149 }
4150
4151 genProduceReg(simdNode);
4152}
4153
4154//----------------------------------------------------------------------------------
4155// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
4156//
4157// Arguments:
4158// simdNode - The GT_SIMD node
4159//
4160// Return Value:
4161// None.
4162//
4163void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
4164{
4165 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
4166 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs ||
4167 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle ||
4168 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 ||
4169 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble ||
4170 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64);
4171
4172 GenTree* op1 = simdNode->gtGetOp1();
4173 var_types baseType = simdNode->gtSIMDBaseType;
4174 regNumber targetReg = simdNode->gtRegNum;
4175 assert(targetReg != REG_NA);
4176 var_types targetType = simdNode->TypeGet();
4177
4178 genConsumeOperands(simdNode);
4179 regNumber op1Reg = op1->gtRegNum;
4180
4181 assert(genIsValidFloatReg(op1Reg));
4182 assert(genIsValidFloatReg(targetReg));
4183
4184 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4185 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4186 insOpts opt = (ins == INS_mov) ? INS_OPTS_NONE : genGetSimdInsOpt(attr, baseType);
4187
4188 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
4189
4190 genProduceReg(simdNode);
4191}
4192
4193//--------------------------------------------------------------------------------
4194// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
4195//
4196// Arguments:
4197// simdNode - The GT_SIMD node
4198//
4199// Notes:
4200// The Widen intrinsics are broken into separate intrinsics for the two results.
4201//
4202void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
4203{
4204 assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) ||
4205 (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi));
4206
4207 GenTree* op1 = simdNode->gtGetOp1();
4208 var_types baseType = simdNode->gtSIMDBaseType;
4209 regNumber targetReg = simdNode->gtRegNum;
4210 assert(targetReg != REG_NA);
4211 var_types simdType = simdNode->TypeGet();
4212
4213 genConsumeOperands(simdNode);
4214 regNumber op1Reg = op1->gtRegNum;
4215 regNumber srcReg = op1Reg;
4216 emitAttr emitSize = emitActualTypeSize(simdType);
4217
4218 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4219
4220 if (varTypeIsFloating(baseType))
4221 {
4222 getEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
4223 }
4224 else
4225 {
4226 emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
4227 insOpts opt = genGetSimdInsOpt(attr, baseType);
4228
4229 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
4230 }
4231
4232 genProduceReg(simdNode);
4233}
4234
4235//--------------------------------------------------------------------------------
4236// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
4237//
4238// Arguments:
4239// simdNode - The GT_SIMD node
4240//
4241// Notes:
4242// This intrinsic takes two arguments. The first operand is narrowed to produce the
4243// lower elements of the results, and the second operand produces the high elements.
4244//
4245void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
4246{
4247 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow);
4248
4249 GenTree* op1 = simdNode->gtGetOp1();
4250 GenTree* op2 = simdNode->gtGetOp2();
4251 var_types baseType = simdNode->gtSIMDBaseType;
4252 regNumber targetReg = simdNode->gtRegNum;
4253 assert(targetReg != REG_NA);
4254 var_types simdType = simdNode->TypeGet();
4255 emitAttr emitSize = emitTypeSize(simdType);
4256
4257 genConsumeOperands(simdNode);
4258 regNumber op1Reg = op1->gtRegNum;
4259 regNumber op2Reg = op2->gtRegNum;
4260
4261 assert(genIsValidFloatReg(op1Reg));
4262 assert(genIsValidFloatReg(op2Reg));
4263 assert(genIsValidFloatReg(targetReg));
4264 assert(op2Reg != targetReg);
4265 assert(simdNode->gtSIMDSize == 16);
4266
4267 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4268 assert((ins == INS_fcvtn) || (ins == INS_xtn));
4269
4270 if (ins == INS_fcvtn)
4271 {
4272 getEmitter()->emitIns_R_R(INS_fcvtn, EA_8BYTE, targetReg, op1Reg);
4273 getEmitter()->emitIns_R_R(INS_fcvtn2, EA_8BYTE, targetReg, op2Reg);
4274 }
4275 else
4276 {
4277 insOpts opt = INS_OPTS_NONE;
4278 insOpts opt2 = INS_OPTS_NONE;
4279
4280 // This is not the same as genGetSimdInsOpt()
4281 // Basetype is the soure operand type
4282 // However encoding is based on the destination operand type which is 1/2 the basetype.
4283 switch (baseType)
4284 {
4285 case TYP_ULONG:
4286 case TYP_LONG:
4287 opt = INS_OPTS_2S;
4288 opt2 = INS_OPTS_4S;
4289 break;
4290 case TYP_UINT:
4291 case TYP_INT:
4292 opt = INS_OPTS_4H;
4293 opt2 = INS_OPTS_8H;
4294 break;
4295 case TYP_USHORT:
4296 case TYP_SHORT:
4297 opt = INS_OPTS_8B;
4298 opt2 = INS_OPTS_16B;
4299 break;
4300 default:
4301 assert(!"Unsupported narrowing element type");
4302 unreached();
4303 }
4304 getEmitter()->emitIns_R_R(INS_xtn, EA_8BYTE, targetReg, op1Reg, opt);
4305 getEmitter()->emitIns_R_R(INS_xtn2, EA_16BYTE, targetReg, op2Reg, opt2);
4306 }
4307
4308 genProduceReg(simdNode);
4309}
4310
4311//--------------------------------------------------------------------------------
4312// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
4313// add, sub, mul, bit-wise And, AndNot and Or.
4314//
4315// Arguments:
4316// simdNode - The GT_SIMD node
4317//
4318// Return Value:
4319// None.
4320//
4321void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
4322{
4323 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
4324 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
4325 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
4326 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
4327 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
4328 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
4329 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual ||
4330 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan ||
4331 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan ||
4332 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual ||
4333 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual);
4334
4335 GenTree* op1 = simdNode->gtGetOp1();
4336 GenTree* op2 = simdNode->gtGetOp2();
4337 var_types baseType = simdNode->gtSIMDBaseType;
4338 regNumber targetReg = simdNode->gtRegNum;
4339 assert(targetReg != REG_NA);
4340 var_types targetType = simdNode->TypeGet();
4341
4342 genConsumeOperands(simdNode);
4343 regNumber op1Reg = op1->gtRegNum;
4344 regNumber op2Reg = op2->gtRegNum;
4345
4346 assert(genIsValidFloatReg(op1Reg));
4347 assert(genIsValidFloatReg(op2Reg));
4348 assert(genIsValidFloatReg(targetReg));
4349
4350 // TODO-ARM64-CQ Contain integer constants where posible
4351
4352 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4353 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4354 insOpts opt = genGetSimdInsOpt(attr, baseType);
4355
4356 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
4357
4358 genProduceReg(simdNode);
4359}
4360
4361//--------------------------------------------------------------------------------
4362// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
4363// == and !=
4364//
4365// Arguments:
4366// simdNode - The GT_SIMD node
4367//
4368// Return Value:
4369// None.
4370//
4371void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
4372{
4373 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ||
4374 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
4375
4376 GenTree* op1 = simdNode->gtGetOp1();
4377 GenTree* op2 = simdNode->gtGetOp2();
4378 var_types baseType = simdNode->gtSIMDBaseType;
4379 regNumber targetReg = simdNode->gtRegNum;
4380 var_types targetType = simdNode->TypeGet();
4381
4382 genConsumeOperands(simdNode);
4383 regNumber op1Reg = op1->gtRegNum;
4384 regNumber op2Reg = op2->gtRegNum;
4385 regNumber otherReg = op2Reg;
4386
4387 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType);
4388 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4389 insOpts opt = genGetSimdInsOpt(attr, baseType);
4390
4391 // TODO-ARM64-CQ Contain integer constants where posible
4392
4393 regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4394
4395 getEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt);
4396
4397 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4398 {
4399 // For 12Byte vectors we must set upper bits to get correct comparison
4400 // We do not assume upper bits are zero.
4401 instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1);
4402 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3);
4403 }
4404
4405 getEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg,
4406 (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B);
4407
4408 getEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0);
4409
4410 if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)
4411 {
4412 getEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1);
4413 }
4414
4415 getEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1);
4416
4417 genProduceReg(simdNode);
4418}
4419
4420//--------------------------------------------------------------------------------
4421// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
4422//
4423// Arguments:
4424// simdNode - The GT_SIMD node
4425//
4426// Return Value:
4427// None.
4428//
4429void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
4430{
4431 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
4432
4433 GenTree* op1 = simdNode->gtGetOp1();
4434 GenTree* op2 = simdNode->gtGetOp2();
4435 var_types baseType = simdNode->gtSIMDBaseType;
4436 var_types simdType = op1->TypeGet();
4437
4438 regNumber targetReg = simdNode->gtRegNum;
4439 assert(targetReg != REG_NA);
4440
4441 var_types targetType = simdNode->TypeGet();
4442 assert(targetType == baseType);
4443
4444 genConsumeOperands(simdNode);
4445 regNumber op1Reg = op1->gtRegNum;
4446 regNumber op2Reg = op2->gtRegNum;
4447 regNumber tmpReg = targetReg;
4448
4449 if (!varTypeIsFloating(baseType))
4450 {
4451 tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4452 }
4453
4454 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicMul, baseType);
4455 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4456 insOpts opt = genGetSimdInsOpt(attr, baseType);
4457
4458 // Vector multiply
4459 getEmitter()->emitIns_R_R_R(ins, attr, tmpReg, op1Reg, op2Reg, opt);
4460
4461 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4462 {
4463 // For 12Byte vectors we must zero upper bits to get correct dot product
4464 // We do not assume upper bits are zero.
4465 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpReg, REG_ZR, 3);
4466 }
4467
4468 // Vector add horizontal
4469 if (varTypeIsFloating(baseType))
4470 {
4471 if (baseType == TYP_FLOAT)
4472 {
4473 if (opt == INS_OPTS_4S)
4474 {
4475 getEmitter()->emitIns_R_R_R(INS_faddp, attr, tmpReg, tmpReg, tmpReg, INS_OPTS_4S);
4476 }
4477 getEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg);
4478 }
4479 else
4480 {
4481 getEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg);
4482 }
4483 }
4484 else
4485 {
4486 ins = varTypeIsUnsigned(baseType) ? INS_uaddlv : INS_saddlv;
4487
4488 getEmitter()->emitIns_R_R(ins, attr, tmpReg, tmpReg, opt);
4489
4490 // Mov to integer register
4491 if (varTypeIsUnsigned(baseType) || (genTypeSize(baseType) < 4))
4492 {
4493 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(baseType), targetReg, tmpReg, 0);
4494 }
4495 else
4496 {
4497 getEmitter()->emitIns_R_R_I(INS_smov, emitActualTypeSize(baseType), targetReg, tmpReg, 0);
4498 }
4499 }
4500
4501 genProduceReg(simdNode);
4502}
4503
4504//------------------------------------------------------------------------------------
4505// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
4506//
4507// Arguments:
4508// simdNode - The GT_SIMD node
4509//
4510// Return Value:
4511// None.
4512//
4513void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
4514{
4515 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
4516
4517 GenTree* op1 = simdNode->gtGetOp1();
4518 GenTree* op2 = simdNode->gtGetOp2();
4519 var_types simdType = op1->TypeGet();
4520 assert(varTypeIsSIMD(simdType));
4521
4522 // op1 of TYP_SIMD12 should be considered as TYP_SIMD16
4523 if (simdType == TYP_SIMD12)
4524 {
4525 simdType = TYP_SIMD16;
4526 }
4527
4528 var_types baseType = simdNode->gtSIMDBaseType;
4529 regNumber targetReg = simdNode->gtRegNum;
4530 assert(targetReg != REG_NA);
4531 var_types targetType = simdNode->TypeGet();
4532 assert(targetType == genActualType(baseType));
4533
4534 // GetItem has 2 operands:
4535 // - the source of SIMD type (op1)
4536 // - the index of the value to be returned.
4537 genConsumeOperands(simdNode);
4538
4539 emitAttr baseTypeSize = emitTypeSize(baseType);
4540 unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
4541
4542 if (op2->IsCnsIntOrI())
4543 {
4544 assert(op2->isContained());
4545
4546 ssize_t index = op2->gtIntCon.gtIconVal;
4547
4548 // We only need to generate code for the get if the index is valid
4549 // If the index is invalid, previously generated for the range check will throw
4550 if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
4551 {
4552 if (op1->isContained())
4553 {
4554 int offset = (int)index * genTypeSize(baseType);
4555 instruction ins = ins_Load(baseType);
4556 baseTypeSize = varTypeIsFloating(baseType)
4557 ? baseTypeSize
4558 : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
4559
4560 assert(!op1->isUsedFromReg());
4561
4562 if (op1->OperIsLocal())
4563 {
4564 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4565
4566 getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
4567 }
4568 else
4569 {
4570 assert(op1->OperGet() == GT_IND);
4571
4572 GenTree* addr = op1->AsIndir()->Addr();
4573 assert(!addr->isContained());
4574 regNumber baseReg = addr->gtRegNum;
4575
4576 // ldr targetReg, [baseReg, #offset]
4577 getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
4578 }
4579 }
4580 else
4581 {
4582 assert(op1->isUsedFromReg());
4583 regNumber srcReg = op1->gtRegNum;
4584
4585 instruction ins;
4586 if (varTypeIsFloating(baseType))
4587 {
4588 assert(genIsValidFloatReg(targetReg));
4589 // dup targetReg, srcReg[#index]
4590 ins = INS_dup;
4591 }
4592 else
4593 {
4594 assert(genIsValidIntReg(targetReg));
4595 if (varTypeIsUnsigned(baseType) || (baseTypeSize == EA_8BYTE))
4596 {
4597 // umov targetReg, srcReg[#index]
4598 ins = INS_umov;
4599 }
4600 else
4601 {
4602 // smov targetReg, srcReg[#index]
4603 ins = INS_smov;
4604 }
4605 }
4606 getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, srcReg, index);
4607 }
4608 }
4609 }
4610 else
4611 {
4612 assert(!op2->isContained());
4613
4614 regNumber baseReg = REG_NA;
4615 regNumber indexReg = op2->gtRegNum;
4616
4617 if (op1->isContained())
4618 {
4619 // Optimize the case of op1 is in memory and trying to access ith element.
4620 assert(!op1->isUsedFromReg());
4621 if (op1->OperIsLocal())
4622 {
4623 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4624
4625 baseReg = simdNode->ExtractTempReg();
4626
4627 // Load the address of varNum
4628 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
4629 }
4630 else
4631 {
4632 // Require GT_IND addr to be not contained.
4633 assert(op1->OperGet() == GT_IND);
4634
4635 GenTree* addr = op1->AsIndir()->Addr();
4636 assert(!addr->isContained());
4637
4638 baseReg = addr->gtRegNum;
4639 }
4640 }
4641 else
4642 {
4643 assert(op1->isUsedFromReg());
4644 regNumber srcReg = op1->gtRegNum;
4645
4646 unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
4647 noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
4648
4649 baseReg = simdNode->ExtractTempReg();
4650
4651 // Load the address of simdInitTempVarNum
4652 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
4653
4654 // Store the vector to simdInitTempVarNum
4655 getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
4656 }
4657
4658 assert(genIsValidIntReg(indexReg));
4659 assert(genIsValidIntReg(baseReg));
4660 assert(baseReg != indexReg);
4661
4662 // Load item at baseReg[index]
4663 getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
4664 baseTypeScale);
4665 }
4666
4667 genProduceReg(simdNode);
4668}
4669
4670//------------------------------------------------------------------------------------
4671// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
4672//
4673// Arguments:
4674// simdNode - The GT_SIMD node
4675//
4676// Return Value:
4677// None.
4678//
4679void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
4680{
4681 // Determine index based on intrinsic ID
4682 int index = -1;
4683 switch (simdNode->gtSIMDIntrinsicID)
4684 {
4685 case SIMDIntrinsicSetX:
4686 index = 0;
4687 break;
4688 case SIMDIntrinsicSetY:
4689 index = 1;
4690 break;
4691 case SIMDIntrinsicSetZ:
4692 index = 2;
4693 break;
4694 case SIMDIntrinsicSetW:
4695 index = 3;
4696 break;
4697
4698 default:
4699 unreached();
4700 }
4701 assert(index != -1);
4702
4703 // op1 is the SIMD vector
4704 // op2 is the value to be set
4705 GenTree* op1 = simdNode->gtGetOp1();
4706 GenTree* op2 = simdNode->gtGetOp2();
4707
4708 var_types baseType = simdNode->gtSIMDBaseType;
4709 regNumber targetReg = simdNode->gtRegNum;
4710 assert(targetReg != REG_NA);
4711 var_types targetType = simdNode->TypeGet();
4712 assert(varTypeIsSIMD(targetType));
4713
4714 assert(op2->TypeGet() == baseType);
4715 assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
4716
4717 genConsumeOperands(simdNode);
4718 regNumber op1Reg = op1->gtRegNum;
4719 regNumber op2Reg = op2->gtRegNum;
4720
4721 assert(genIsValidFloatReg(targetReg));
4722 assert(genIsValidFloatReg(op1Reg));
4723 assert(genIsValidIntReg(op2Reg) || genIsValidFloatReg(op2Reg));
4724 assert(targetReg != op2Reg);
4725
4726 emitAttr attr = emitTypeSize(baseType);
4727
4728 // Insert mov if register assignment requires it
4729 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, op1Reg);
4730
4731 if (genIsValidIntReg(op2Reg))
4732 {
4733 getEmitter()->emitIns_R_R_I(INS_ins, attr, targetReg, op2Reg, index);
4734 }
4735 else
4736 {
4737 getEmitter()->emitIns_R_R_I_I(INS_ins, attr, targetReg, op2Reg, index, 0);
4738 }
4739
4740 genProduceReg(simdNode);
4741}
4742
4743//-----------------------------------------------------------------------------
4744// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
4745// the given register, if any, or to memory.
4746//
4747// Arguments:
4748// simdNode - The GT_SIMD node
4749//
4750// Return Value:
4751// None.
4752//
4753// Notes:
4754// The upper half of all SIMD registers are volatile, even the callee-save registers.
4755// When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic
4756// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save
4757// register. If such a register cannot be found, it will save it to an available caller-save register.
4758// In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 8 byte
4759// value to the stack. (Note that if there are no caller-save registers available, the entire 16 byte
4760// value will be spilled to the stack.)
4761//
4762void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
4763{
4764 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
4765
4766 GenTree* op1 = simdNode->gtGetOp1();
4767 assert(op1->IsLocal());
4768 assert(emitTypeSize(op1->TypeGet()) == 16);
4769 regNumber targetReg = simdNode->gtRegNum;
4770 regNumber op1Reg = genConsumeReg(op1);
4771 assert(op1Reg != REG_NA);
4772 assert(targetReg != REG_NA);
4773 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, targetReg, op1Reg, 0, 1);
4774
4775 genProduceReg(simdNode);
4776}
4777
4778//-----------------------------------------------------------------------------
4779// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to
4780// the given register, if any, or to memory.
4781//
4782// Arguments:
4783// simdNode - The GT_SIMD node
4784//
4785// Return Value:
4786// None.
4787//
4788// Notes:
4789// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
4790// have their home register, this node has its targetReg on the lclVar child, and its source
4791// on the simdNode.
4792// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled
4793// an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike
4794// most spill scenarios, the saved tree will be different from the restored tree, but the spill
4795// restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
4796// spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
4797// as it is in the spill descriptor for the register from which it was saved.
4798//
4799void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
4800{
4801 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
4802
4803 GenTree* op1 = simdNode->gtGetOp1();
4804 assert(op1->IsLocal());
4805 assert(emitTypeSize(op1->TypeGet()) == 16);
4806 regNumber srcReg = simdNode->gtRegNum;
4807 regNumber lclVarReg = genConsumeReg(op1);
4808 unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
4809 assert(lclVarReg != REG_NA);
4810 assert(srcReg != REG_NA);
4811 if (simdNode->gtFlags & GTF_SPILLED)
4812 {
4813 GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
4814 noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
4815 genConsumeReg(saveNode);
4816 }
4817 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, lclVarReg, srcReg, 1, 0);
4818}
4819
4820//-----------------------------------------------------------------------------
4821// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
4822// Since Vector3 is not a hardware supported write size, it is performed
4823// as two writes: 8 byte followed by 4-byte.
4824//
4825// Arguments:
4826// treeNode - tree node that is attempting to store indirect
4827//
4828//
4829// Return Value:
4830// None.
4831//
4832void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
4833{
4834 assert(treeNode->OperGet() == GT_STOREIND);
4835
4836 GenTree* addr = treeNode->gtOp.gtOp1;
4837 GenTree* data = treeNode->gtOp.gtOp2;
4838
4839 // addr and data should not be contained.
4840 assert(!data->isContained());
4841 assert(!addr->isContained());
4842
4843#ifdef DEBUG
4844 // Should not require a write barrier
4845 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
4846 assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
4847#endif
4848
4849 genConsumeOperands(treeNode->AsOp());
4850
4851 // Need an addtional integer register to extract upper 4 bytes from data.
4852 regNumber tmpReg = treeNode->GetSingleTempReg();
4853 assert(tmpReg != addr->gtRegNum);
4854
4855 // 8-byte write
4856 getEmitter()->emitIns_R_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum);
4857
4858 // Extract upper 4-bytes from data
4859 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->gtRegNum, 2);
4860
4861 // 4-byte write
4862 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4863}
4864
4865//-----------------------------------------------------------------------------
4866// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
4867// Since Vector3 is not a hardware supported write size, it is performed
4868// as two loads: 8 byte followed by 4-byte.
4869//
4870// Arguments:
4871// treeNode - tree node of GT_IND
4872//
4873//
4874// Return Value:
4875// None.
4876//
4877void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
4878{
4879 assert(treeNode->OperGet() == GT_IND);
4880
4881 GenTree* addr = treeNode->gtOp.gtOp1;
4882 regNumber targetReg = treeNode->gtRegNum;
4883
4884 assert(!addr->isContained());
4885
4886 regNumber operandReg = genConsumeReg(addr);
4887
4888 // Need an addtional int register to read upper 4 bytes, which is different from targetReg
4889 regNumber tmpReg = treeNode->GetSingleTempReg();
4890
4891 // 8-byte read
4892 getEmitter()->emitIns_R_R(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, addr->gtRegNum);
4893
4894 // 4-byte read
4895 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4896
4897 // Insert upper 4-bytes into data
4898 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, targetReg, tmpReg, 2);
4899
4900 genProduceReg(treeNode);
4901}
4902
4903//-----------------------------------------------------------------------------
4904// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
4905// Since Vector3 is not a hardware supported write size, it is performed
4906// as two stores: 8 byte followed by 4-byte.
4907//
4908// Arguments:
4909// treeNode - tree node that is attempting to store TYP_SIMD12 field
4910//
4911// Return Value:
4912// None.
4913//
4914void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
4915{
4916 assert((treeNode->OperGet() == GT_STORE_LCL_FLD) || (treeNode->OperGet() == GT_STORE_LCL_VAR));
4917
4918 unsigned offs = 0;
4919 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
4920 assert(varNum < compiler->lvaCount);
4921
4922 if (treeNode->OperGet() == GT_LCL_FLD)
4923 {
4924 offs = treeNode->gtLclFld.gtLclOffs;
4925 }
4926
4927 GenTree* op1 = treeNode->gtOp.gtOp1;
4928 assert(!op1->isContained());
4929 regNumber operandReg = genConsumeReg(op1);
4930
4931 // Need an addtional integer register to extract upper 4 bytes from data.
4932 regNumber tmpReg = treeNode->GetSingleTempReg();
4933
4934 // store lower 8 bytes
4935 getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
4936
4937 // Extract upper 4-bytes from data
4938 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2);
4939
4940 // 4-byte write
4941 getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offs + 8);
4942}
4943
4944#endif // FEATURE_SIMD
4945
4946#ifdef FEATURE_HW_INTRINSICS
4947#include "hwintrinsic.h"
4948
4949instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
4950{
4951 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4952
4953 unsigned int instrTypeIndex = varTypeIsFloating(instrType) ? 0 : varTypeIsUnsigned(instrType) ? 2 : 1;
4954
4955 instruction ins = HWIntrinsicInfo::lookup(intrinsicID).instrs[instrTypeIndex];
4956 assert(ins != INS_invalid);
4957
4958 return ins;
4959}
4960
4961//------------------------------------------------------------------------
4962// genHWIntrinsic: Produce code for a GT_HWIntrinsic node.
4963//
4964// This is the main routine which in turn calls the genHWIntrinsicXXX() routines.
4965//
4966// Arguments:
4967// node - the GT_HWIntrinsic node
4968//
4969// Return Value:
4970// None.
4971//
4972void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
4973{
4974 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4975
4976 switch (HWIntrinsicInfo::lookup(intrinsicID).form)
4977 {
4978 case HWIntrinsicInfo::UnaryOp:
4979 genHWIntrinsicUnaryOp(node);
4980 break;
4981 case HWIntrinsicInfo::CrcOp:
4982 genHWIntrinsicCrcOp(node);
4983 break;
4984 case HWIntrinsicInfo::SimdBinaryOp:
4985 genHWIntrinsicSimdBinaryOp(node);
4986 break;
4987 case HWIntrinsicInfo::SimdExtractOp:
4988 genHWIntrinsicSimdExtractOp(node);
4989 break;
4990 case HWIntrinsicInfo::SimdInsertOp:
4991 genHWIntrinsicSimdInsertOp(node);
4992 break;
4993 case HWIntrinsicInfo::SimdSelectOp:
4994 genHWIntrinsicSimdSelectOp(node);
4995 break;
4996 case HWIntrinsicInfo::SimdSetAllOp:
4997 genHWIntrinsicSimdSetAllOp(node);
4998 break;
4999 case HWIntrinsicInfo::SimdUnaryOp:
5000 genHWIntrinsicSimdUnaryOp(node);
5001 break;
5002 case HWIntrinsicInfo::SimdBinaryRMWOp:
5003 genHWIntrinsicSimdBinaryRMWOp(node);
5004 break;
5005 case HWIntrinsicInfo::SimdTernaryRMWOp:
5006 genHWIntrinsicSimdTernaryRMWOp(node);
5007 break;
5008 case HWIntrinsicInfo::Sha1HashOp:
5009 genHWIntrinsicShaHashOp(node);
5010 break;
5011 case HWIntrinsicInfo::Sha1RotateOp:
5012 genHWIntrinsicShaRotateOp(node);
5013 break;
5014
5015 default:
5016 NYI("HWIntrinsic form not implemented");
5017 }
5018}
5019
5020//------------------------------------------------------------------------
5021// genHWIntrinsicUnaryOp:
5022//
5023// Produce code for a GT_HWIntrinsic node with form UnaryOp.
5024//
5025// Consumes one scalar operand produces a scalar
5026//
5027// Arguments:
5028// node - the GT_HWIntrinsic node
5029//
5030// Return Value:
5031// None.
5032//
5033void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
5034{
5035 GenTree* op1 = node->gtGetOp1();
5036 regNumber targetReg = node->gtRegNum;
5037 emitAttr attr = emitActualTypeSize(op1->TypeGet());
5038
5039 assert(targetReg != REG_NA);
5040 var_types targetType = node->TypeGet();
5041
5042 genConsumeOperands(node);
5043
5044 regNumber op1Reg = op1->gtRegNum;
5045
5046 instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
5047
5048 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg);
5049
5050 genProduceReg(node);
5051}
5052
5053//------------------------------------------------------------------------
5054// genHWIntrinsicCrcOp:
5055//
5056// Produce code for a GT_HWIntrinsic node with form CrcOp.
5057//
5058// Consumes two scalar operands and produces a scalar result
5059//
5060// This form differs from BinaryOp because the attr depends on the size of op2
5061//
5062// Arguments:
5063// node - the GT_HWIntrinsic node
5064//
5065// Return Value:
5066// None.
5067//
5068void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
5069{
5070 NYI("genHWIntrinsicCrcOp not implemented");
5071}
5072
5073//------------------------------------------------------------------------
5074// genHWIntrinsicSimdBinaryOp:
5075//
5076// Produce code for a GT_HWIntrinsic node with form SimdBinaryOp.
5077//
5078// Consumes two SIMD operands and produces a SIMD result
5079//
5080// Arguments:
5081// node - the GT_HWIntrinsic node
5082//
5083// Return Value:
5084// None.
5085//
5086void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
5087{
5088 GenTree* op1 = node->gtGetOp1();
5089 GenTree* op2 = node->gtGetOp2();
5090 var_types baseType = node->gtSIMDBaseType;
5091 regNumber targetReg = node->gtRegNum;
5092
5093 assert(targetReg != REG_NA);
5094 var_types targetType = node->TypeGet();
5095
5096 genConsumeOperands(node);
5097
5098 regNumber op1Reg = op1->gtRegNum;
5099 regNumber op2Reg = op2->gtRegNum;
5100
5101 assert(genIsValidFloatReg(op1Reg));
5102 assert(genIsValidFloatReg(op2Reg));
5103 assert(genIsValidFloatReg(targetReg));
5104
5105 instruction ins = getOpForHWIntrinsic(node, baseType);
5106 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5107 insOpts opt = genGetSimdInsOpt(attr, baseType);
5108
5109 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
5110
5111 genProduceReg(node);
5112}
5113
5114//------------------------------------------------------------------------
5115// genHWIntrinsicSwitchTable:
5116//
5117// Generate code for an immediate switch table
5118//
5119// In cases where an instruction only supports const immediate operands, we
5120// need to generate functionally correct code when the operand is not constant
5121//
5122// This is required by the HW Intrinsic design to handle indirect calls, such as:
5123// debugger calls
5124// reflection
5125// call backs
5126//
5127// Generated code implements a switch of this form
5128//
5129// switch (swReg)
5130// {
5131// case 0:
5132// ins0; // emitSwCase(0)
5133// break;
5134// case 1:
5135// ins1; // emitSwCase(1)
5136// break;
5137// ...
5138// ...
5139// ...
5140// case swMax - 1:
5141// insLast; // emitSwCase(swMax - 1)
5142// break;
5143// default:
5144// throw ArgumentOutOfRangeException
5145// }
5146//
5147// Generated code looks like:
5148//
5149// cmp swReg, #swMax
5150// b.hs ThrowArgumentOutOfRangeExceptionHelper
5151// adr tmpReg, labelFirst
5152// add tmpReg, tmpReg, swReg, LSL #3
5153// b [tmpReg]
5154// labelFirst:
5155// ins0
5156// b labelBreakTarget
5157// ins1
5158// b labelBreakTarget
5159// ...
5160// ...
5161// ...
5162// insLast
5163// b labelBreakTarget
5164// labelBreakTarget:
5165//
5166//
5167// Arguments:
5168// swReg - register containing the switch case to execute
5169// tmpReg - temporary integer register for calculating the switch indirect branch target
5170// swMax - the number of switch cases. If swReg >= swMax throw SCK_ARG_RNG_EXCPN
5171// emitSwCase - function like argument taking an immediate value and emitting one instruction
5172//
5173// Return Value:
5174// None.
5175//
5176template <typename HWIntrinsicSwitchCaseBody>
5177void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg,
5178 regNumber tmpReg,
5179 int swMax,
5180 HWIntrinsicSwitchCaseBody emitSwCase)
5181{
5182 assert(swMax > 0);
5183 assert(swMax <= 256);
5184
5185 assert(genIsValidIntReg(tmpReg));
5186 assert(genIsValidIntReg(swReg));
5187
5188 BasicBlock* labelFirst = genCreateTempLabel();
5189 BasicBlock* labelBreakTarget = genCreateTempLabel();
5190
5191 // Detect and throw out of range exception
5192 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax);
5193
5194 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
5195 genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN);
5196
5197 // Calculate switch target
5198 labelFirst->bbFlags |= BBF_JMP_TARGET;
5199
5200 // tmpReg = labelFirst
5201 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg);
5202
5203 // tmpReg = labelFirst + swReg * 8
5204 getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL);
5205
5206 // br tmpReg
5207 getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg);
5208
5209 genDefineTempLabel(labelFirst);
5210 for (int i = 0; i < swMax; ++i)
5211 {
5212 unsigned prevInsCount = getEmitter()->emitInsCount;
5213
5214 emitSwCase(i);
5215
5216 assert(getEmitter()->emitInsCount == prevInsCount + 1);
5217
5218 inst_JMP(EJ_jmp, labelBreakTarget);
5219
5220 assert(getEmitter()->emitInsCount == prevInsCount + 2);
5221 }
5222 genDefineTempLabel(labelBreakTarget);
5223}
5224
5225//------------------------------------------------------------------------
5226// genHWIntrinsicSimdExtractOp:
5227//
5228// Produce code for a GT_HWIntrinsic node with form SimdExtractOp.
5229//
5230// Consumes one SIMD operand and one scalar
5231//
5232// The element index operand is typically a const immediate
5233// When it is not, a switch table is generated
5234//
5235// See genHWIntrinsicSwitchTable comments
5236//
5237// Arguments:
5238// node - the GT_HWIntrinsic node
5239//
5240// Return Value:
5241// None.
5242//
5243void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
5244{
5245 GenTree* op1 = node->gtGetOp1();
5246 GenTree* op2 = node->gtGetOp2();
5247 var_types simdType = op1->TypeGet();
5248 var_types targetType = node->TypeGet();
5249 regNumber targetReg = node->gtRegNum;
5250
5251 assert(targetReg != REG_NA);
5252
5253 genConsumeOperands(node);
5254
5255 regNumber op1Reg = op1->gtRegNum;
5256
5257 assert(genIsValidFloatReg(op1Reg));
5258
5259 emitAttr baseTypeSize = emitTypeSize(targetType);
5260
5261 int elements = emitTypeSize(simdType) / baseTypeSize;
5262
5263 auto emitSwCase = [&](int element) {
5264 assert(element >= 0);
5265 assert(element < elements);
5266
5267 if (varTypeIsFloating(targetType))
5268 {
5269 assert(genIsValidFloatReg(targetReg));
5270 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element);
5271 }
5272 else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE))
5273 {
5274 assert(genIsValidIntReg(targetReg));
5275 getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element);
5276 }
5277 else
5278 {
5279 assert(genIsValidIntReg(targetReg));
5280 getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element);
5281 }
5282 };
5283
5284 if (op2->isContainedIntOrIImmed())
5285 {
5286 int element = (int)op2->AsIntConCommon()->IconValue();
5287
5288 emitSwCase(element);
5289 }
5290 else
5291 {
5292 regNumber elementReg = op2->gtRegNum;
5293 regNumber tmpReg = node->GetSingleTempReg();
5294
5295 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5296 }
5297
5298 genProduceReg(node);
5299}
5300
5301//------------------------------------------------------------------------
5302// genHWIntrinsicSimdInsertOp:
5303//
5304// Produce code for a GT_HWIntrinsic node with form SimdInsertOp.
5305//
5306// Consumes one SIMD operand and two scalars
5307//
5308// The element index operand is typically a const immediate
5309// When it is not, a switch table is generated
5310//
5311// See genHWIntrinsicSwitchTable comments
5312//
5313// Arguments:
5314// node - the GT_HWIntrinsic node
5315//
5316// Return Value:
5317// None.
5318//
5319void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
5320{
5321 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5322 GenTree* op1 = argList->Current();
5323 GenTree* op2 = argList->Rest()->Current();
5324 GenTree* op3 = argList->Rest()->Rest()->Current();
5325 var_types simdType = op1->TypeGet();
5326 var_types baseType = node->gtSIMDBaseType;
5327 regNumber targetReg = node->gtRegNum;
5328
5329 assert(targetReg != REG_NA);
5330
5331 genConsumeRegs(op1);
5332 genConsumeRegs(op2);
5333 genConsumeRegs(op3);
5334
5335 regNumber op1Reg = op1->gtRegNum;
5336
5337 assert(genIsValidFloatReg(targetReg));
5338 assert(genIsValidFloatReg(op1Reg));
5339
5340 emitAttr baseTypeSize = emitTypeSize(baseType);
5341
5342 int elements = emitTypeSize(simdType) / baseTypeSize;
5343
5344 if (targetReg != op1Reg)
5345 {
5346 getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg);
5347 }
5348
5349 if (op3->isContained())
5350 {
5351 // Handle vector element to vector element case
5352 //
5353 // If op3 is contained this is because lowering found an opportunity to contain a Simd.Extract in a Simd.Insert
5354 //
5355 regNumber op3Reg = op3->gtGetOp1()->gtRegNum;
5356
5357 assert(genIsValidFloatReg(op3Reg));
5358
5359 // op3 containment currently only occurs when
5360 // + op3 is a Simd.Extract() (gtHWIntrinsicId == NI_ARM64_SIMD_GetItem)
5361 // + element & srcLane are immediate constants
5362 assert(op2->isContainedIntOrIImmed());
5363 assert(op3->OperIs(GT_HWIntrinsic));
5364 assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem);
5365 assert(op3->gtGetOp2()->isContainedIntOrIImmed());
5366
5367 int element = (int)op2->AsIntConCommon()->IconValue();
5368 int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue();
5369
5370 // Emit mov targetReg[element], op3Reg[srcLane]
5371 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane);
5372 }
5373 else
5374 {
5375 // Handle scalar to vector element case
5376 // TODO-ARM64-CQ handle containing op3 scalar const where possible
5377 regNumber op3Reg = op3->gtRegNum;
5378
5379 auto emitSwCase = [&](int element) {
5380 assert(element >= 0);
5381 assert(element < elements);
5382
5383 if (varTypeIsFloating(baseType))
5384 {
5385 assert(genIsValidFloatReg(op3Reg));
5386 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0);
5387 }
5388 else
5389 {
5390 assert(genIsValidIntReg(op3Reg));
5391 getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element);
5392 }
5393 };
5394
5395 if (op2->isContainedIntOrIImmed())
5396 {
5397 int element = (int)op2->AsIntConCommon()->IconValue();
5398
5399 emitSwCase(element);
5400 }
5401 else
5402 {
5403 regNumber elementReg = op2->gtRegNum;
5404 regNumber tmpReg = node->GetSingleTempReg();
5405
5406 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5407 }
5408 }
5409
5410 genProduceReg(node);
5411}
5412
5413//------------------------------------------------------------------------
5414// genHWIntrinsicSimdSelectOp:
5415//
5416// Produce code for a GT_HWIntrinsic node with form SimdSelectOp.
5417//
5418// Consumes three SIMD operands and produces a SIMD result
5419//
5420// This intrinsic form requires one of the source registers to be the
5421// destination register. Inserts a INS_mov if this requirement is not met.
5422//
5423// Arguments:
5424// node - the GT_HWIntrinsic node
5425//
5426// Return Value:
5427// None.
5428//
5429void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
5430{
5431 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5432 GenTree* op1 = argList->Current();
5433 GenTree* op2 = argList->Rest()->Current();
5434 GenTree* op3 = argList->Rest()->Rest()->Current();
5435 var_types baseType = node->gtSIMDBaseType;
5436 regNumber targetReg = node->gtRegNum;
5437
5438 assert(targetReg != REG_NA);
5439 var_types targetType = node->TypeGet();
5440
5441 genConsumeRegs(op1);
5442 genConsumeRegs(op2);
5443 genConsumeRegs(op3);
5444
5445 regNumber op1Reg = op1->gtRegNum;
5446 regNumber op2Reg = op2->gtRegNum;
5447 regNumber op3Reg = op3->gtRegNum;
5448
5449 assert(genIsValidFloatReg(op1Reg));
5450 assert(genIsValidFloatReg(op2Reg));
5451 assert(genIsValidFloatReg(op3Reg));
5452 assert(genIsValidFloatReg(targetReg));
5453
5454 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5455
5456 // Arm64 has three bit select forms; each uses three source registers
5457 // One of the sources is also the destination
5458 if (targetReg == op3Reg)
5459 {
5460 // op3 is target use bit insert if true
5461 // op3 = op3 ^ (op1 & (op2 ^ op3))
5462 getEmitter()->emitIns_R_R_R(INS_bit, attr, op3Reg, op2Reg, op1Reg);
5463 }
5464 else if (targetReg == op2Reg)
5465 {
5466 // op2 is target use bit insert if false
5467 // op2 = op2 ^ (~op1 & (op2 ^ op3))
5468 getEmitter()->emitIns_R_R_R(INS_bif, attr, op2Reg, op3Reg, op1Reg);
5469 }
5470 else
5471 {
5472 if (targetReg != op1Reg)
5473 {
5474 // target is not one of the sources, copy op1 to use bit select form
5475 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5476 }
5477 // use bit select
5478 // targetReg = op3 ^ (targetReg & (op2 ^ op3))
5479 getEmitter()->emitIns_R_R_R(INS_bsl, attr, targetReg, op2Reg, op3Reg);
5480 }
5481
5482 genProduceReg(node);
5483}
5484
5485//------------------------------------------------------------------------
5486// genHWIntrinsicSimdSetAllOp:
5487//
5488// Produce code for a GT_HWIntrinsic node with form SimdSetAllOp.
5489//
5490// Consumes single scalar operand and produces a SIMD result
5491//
5492// Arguments:
5493// node - the GT_HWIntrinsic node
5494//
5495// Return Value:
5496// None.
5497//
5498void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node)
5499{
5500 GenTree* op1 = node->gtGetOp1();
5501 var_types baseType = node->gtSIMDBaseType;
5502 regNumber targetReg = node->gtRegNum;
5503
5504 assert(targetReg != REG_NA);
5505 var_types targetType = node->TypeGet();
5506
5507 genConsumeOperands(node);
5508
5509 regNumber op1Reg = op1->gtRegNum;
5510
5511 assert(genIsValidFloatReg(targetReg));
5512 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
5513
5514 instruction ins = getOpForHWIntrinsic(node, baseType);
5515 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5516 insOpts opt = genGetSimdInsOpt(attr, baseType);
5517
5518 // TODO-ARM64-CQ Support contained immediate cases
5519
5520 if (genIsValidIntReg(op1Reg))
5521 {
5522 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5523 }
5524 else
5525 {
5526 getEmitter()->emitIns_R_R_I(ins, attr, targetReg, op1Reg, 0, opt);
5527 }
5528
5529 genProduceReg(node);
5530}
5531
5532//------------------------------------------------------------------------
5533// genHWIntrinsicSimdUnaryOp:
5534//
5535// Produce code for a GT_HWIntrinsic node with form SimdUnaryOp.
5536//
5537// Consumes single SIMD operand and produces a SIMD result
5538//
5539// Arguments:
5540// node - the GT_HWIntrinsic node
5541//
5542// Return Value:
5543// None.
5544//
5545void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
5546{
5547 GenTree* op1 = node->gtGetOp1();
5548 var_types baseType = node->gtSIMDBaseType;
5549 regNumber targetReg = node->gtRegNum;
5550
5551 assert(targetReg != REG_NA);
5552 var_types targetType = node->TypeGet();
5553
5554 genConsumeOperands(node);
5555
5556 regNumber op1Reg = op1->gtRegNum;
5557
5558 assert(genIsValidFloatReg(op1Reg));
5559 assert(genIsValidFloatReg(targetReg));
5560
5561 instruction ins = getOpForHWIntrinsic(node, baseType);
5562 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5563 insOpts opt = genGetSimdInsOpt(attr, baseType);
5564
5565 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5566
5567 genProduceReg(node);
5568}
5569
5570//------------------------------------------------------------------------
5571// genHWIntrinsicSimdBinaryRMWOp:
5572//
5573// Produce code for a GT_HWIntrinsic node with form SimdBinaryRMWOp.
5574//
5575// Consumes two SIMD operands and produces a SIMD result.
5576// First operand is both source and destination.
5577//
5578// Arguments:
5579// node - the GT_HWIntrinsic node
5580//
5581// Return Value:
5582// None.
5583//
5584void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node)
5585{
5586 GenTree* op1 = node->gtGetOp1();
5587 GenTree* op2 = node->gtGetOp2();
5588 var_types baseType = node->gtSIMDBaseType;
5589 regNumber targetReg = node->gtRegNum;
5590
5591 assert(targetReg != REG_NA);
5592
5593 genConsumeOperands(node);
5594
5595 regNumber op1Reg = op1->gtRegNum;
5596 regNumber op2Reg = op2->gtRegNum;
5597
5598 assert(genIsValidFloatReg(op1Reg));
5599 assert(genIsValidFloatReg(op2Reg));
5600 assert(genIsValidFloatReg(targetReg));
5601
5602 instruction ins = getOpForHWIntrinsic(node, baseType);
5603 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5604 insOpts opt = genGetSimdInsOpt(attr, baseType);
5605
5606 if (targetReg != op1Reg)
5607 {
5608 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5609 }
5610 getEmitter()->emitIns_R_R(ins, attr, targetReg, op2Reg, opt);
5611
5612 genProduceReg(node);
5613}
5614
5615//------------------------------------------------------------------------
5616// genHWIntrinsicSimdTernaryRMWOp:
5617//
5618// Produce code for a GT_HWIntrinsic node with form SimdTernaryRMWOp
5619//
5620// Consumes three SIMD operands and produces a SIMD result.
5621// First operand is both source and destination.
5622//
5623// Arguments:
5624// node - the GT_HWIntrinsic node
5625//
5626// Return Value:
5627// None.
5628//
5629void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node)
5630{
5631 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5632 GenTree* op1 = argList->Current();
5633 GenTree* op2 = argList->Rest()->Current();
5634 GenTree* op3 = argList->Rest()->Rest()->Current();
5635 var_types baseType = node->gtSIMDBaseType;
5636 regNumber targetReg = node->gtRegNum;
5637
5638 assert(targetReg != REG_NA);
5639 var_types targetType = node->TypeGet();
5640
5641 genConsumeRegs(op1);
5642 genConsumeRegs(op2);
5643 genConsumeRegs(op3);
5644
5645 regNumber op1Reg = op1->gtRegNum;
5646 regNumber op2Reg = op2->gtRegNum;
5647 regNumber op3Reg = op3->gtRegNum;
5648
5649 assert(genIsValidFloatReg(op1Reg));
5650 assert(genIsValidFloatReg(op2Reg));
5651 assert(genIsValidFloatReg(op3Reg));
5652 assert(genIsValidFloatReg(targetReg));
5653 assert(targetReg != op2Reg);
5654 assert(targetReg != op3Reg);
5655
5656 instruction ins = getOpForHWIntrinsic(node, baseType);
5657 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5658
5659 if (targetReg != op1Reg)
5660 {
5661 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5662 }
5663
5664 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
5665
5666 genProduceReg(node);
5667}
5668
5669//------------------------------------------------------------------------
5670// genHWIntrinsicShaHashOp:
5671//
5672// Produce code for a GT_HWIntrinsic node with form Sha1HashOp.
5673// Used in Arm64 SHA1 Hash operations.
5674//
5675// Consumes three operands and returns a Simd result.
5676// First Simd operand is both source and destination.
5677// Second Operand is an unsigned int.
5678// Third operand is a simd operand.
5679
5680// Arguments:
5681// node - the GT_HWIntrinsic node
5682//
5683// Return Value:
5684// None.
5685//
5686void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node)
5687{
5688 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5689 GenTree* op1 = argList->Current();
5690 GenTree* op2 = argList->Rest()->Current();
5691 GenTree* op3 = argList->Rest()->Rest()->Current();
5692 var_types baseType = node->gtSIMDBaseType;
5693 regNumber targetReg = node->gtRegNum;
5694
5695 assert(targetReg != REG_NA);
5696 var_types targetType = node->TypeGet();
5697
5698 genConsumeRegs(op1);
5699 genConsumeRegs(op2);
5700 genConsumeRegs(op3);
5701
5702 regNumber op1Reg = op1->gtRegNum;
5703 regNumber op2Reg = op2->gtRegNum;
5704 regNumber op3Reg = op3->gtRegNum;
5705
5706 assert(genIsValidFloatReg(op1Reg));
5707 assert(genIsValidFloatReg(op3Reg));
5708 assert(targetReg != op2Reg);
5709 assert(targetReg != op3Reg);
5710
5711 instruction ins = getOpForHWIntrinsic(node, baseType);
5712 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5713
5714 assert(genIsValidIntReg(op2Reg));
5715 regNumber elementReg = op2->gtRegNum;
5716 regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT);
5717
5718 getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg);
5719
5720 if (targetReg != op1Reg)
5721 {
5722 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5723 }
5724
5725 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, tmpReg, op3Reg);
5726
5727 genProduceReg(node);
5728}
5729
5730//------------------------------------------------------------------------
5731// genHWIntrinsicShaRotateOp:
5732//
5733// Produce code for a GT_HWIntrinsic node with form Sha1RotateOp.
5734// Used in Arm64 SHA1 Rotate operations.
5735//
5736// Consumes one integer operand and returns unsigned int result.
5737//
5738// Arguments:
5739// node - the GT_HWIntrinsic node
5740//
5741// Return Value:
5742// None.
5743//
5744void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
5745{
5746 GenTree* op1 = node->gtGetOp1();
5747 regNumber targetReg = node->gtRegNum;
5748 emitAttr attr = emitActualTypeSize(node);
5749
5750 assert(targetReg != REG_NA);
5751 var_types targetType = node->TypeGet();
5752
5753 genConsumeOperands(node);
5754
5755 instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
5756 regNumber elementReg = op1->gtRegNum;
5757 regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT);
5758
5759 getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg);
5760 getEmitter()->emitIns_R_R(ins, EA_4BYTE, tmpReg, tmpReg);
5761 getEmitter()->emitIns_R_R(INS_fmov, attr, targetReg, tmpReg);
5762
5763 genProduceReg(node);
5764}
5765
5766#endif // FEATURE_HW_INTRINSICS
5767
5768/*****************************************************************************
5769 * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
5770 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
5771 * disassembler thinks the instructions as the same as we do.
5772 */
5773
5774// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
5775// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
5776//#define ALL_ARM64_EMITTER_UNIT_TESTS
5777
5778#if defined(DEBUG)
5779void CodeGen::genArm64EmitterUnitTests()
5780{
5781 if (!verbose)
5782 {
5783 return;
5784 }
5785
5786 if (!compiler->opts.altJit)
5787 {
5788 // No point doing this in a "real" JIT.
5789 return;
5790 }
5791
5792 // Mark the "fake" instructions in the output.
5793 printf("*************** In genArm64EmitterUnitTests()\n");
5794
5795 emitter* theEmitter = getEmitter();
5796
5797#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5798 // We use this:
5799 // genDefineTempLabel(genCreateTempLabel());
5800 // to create artificial labels to help separate groups of tests.
5801
5802 //
5803 // Loads/Stores basic general register
5804 //
5805
5806 genDefineTempLabel(genCreateTempLabel());
5807
5808 // ldr/str Xt, [reg]
5809 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
5810 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5811 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5812 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
5813 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5814 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5815
5816 // ldr/str Wt, [reg]
5817 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
5818 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5819 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5820 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
5821 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5822 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5823
5824 theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
5825 theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
5826 theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
5827 theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
5828 theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
5829
5830 theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
5831 theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
5832 theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
5833 theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
5834 theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
5835 theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
5836 theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
5837 theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
5838 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
5839 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
5840 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
5841 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
5842 theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
5843
5844 // SP and ZR tests
5845 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
5846 theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
5847 theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
5848
5849 // scaled
5850 theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
5851 theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
5852 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
5853 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
5854
5855 // pre-/post-indexed (unscaled)
5856 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5857 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5858 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5859 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5860
5861 // ldar/stlr Rt, [reg]
5862 theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8);
5863 theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10);
5864 theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11);
5865 theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12);
5866
5867 theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8);
5868 theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13);
5869 theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
5870 theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);
5871
5872 // ldaxr Rt, [reg]
5873 theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
5874 theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);
5875 theEmitter->emitIns_R_R(INS_ldaxrb, EA_4BYTE, REG_R5, REG_R11);
5876 theEmitter->emitIns_R_R(INS_ldaxrh, EA_4BYTE, REG_R5, REG_R12);
5877
5878 // ldxr Rt, [reg]
5879 theEmitter->emitIns_R_R(INS_ldxr, EA_8BYTE, REG_R9, REG_R8);
5880 theEmitter->emitIns_R_R(INS_ldxr, EA_4BYTE, REG_R7, REG_R10);
5881 theEmitter->emitIns_R_R(INS_ldxrb, EA_4BYTE, REG_R5, REG_R11);
5882 theEmitter->emitIns_R_R(INS_ldxrh, EA_4BYTE, REG_R5, REG_R12);
5883
5884 // stxr Ws, Rt, [reg]
5885 theEmitter->emitIns_R_R_R(INS_stxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5886 theEmitter->emitIns_R_R_R(INS_stxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5887 theEmitter->emitIns_R_R_R(INS_stxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5888 theEmitter->emitIns_R_R_R(INS_stxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5889
5890 // stlxr Ws, Rt, [reg]
5891 theEmitter->emitIns_R_R_R(INS_stlxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5892 theEmitter->emitIns_R_R_R(INS_stlxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5893 theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5894 theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5895
5896#endif // ALL_ARM64_EMITTER_UNIT_TESTS
5897
5898#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5899 //
5900 // Compares
5901 //
5902
5903 genDefineTempLabel(genCreateTempLabel());
5904
5905 // cmp reg, reg
5906 theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
5907 theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
5908
5909 // cmp reg, imm
5910 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
5911 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
5912 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
5913 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
5914
5915 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
5916 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
5917 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
5918 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
5919
5920 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
5921 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
5922 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5923 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5924
5925 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
5926 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
5927 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5928 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5929
5930#endif // ALL_ARM64_EMITTER_UNIT_TESTS
5931
5932#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5933 // R_R
5934 //
5935
5936 genDefineTempLabel(genCreateTempLabel());
5937
5938 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
5939 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
5940 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
5941 theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
5942 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
5943 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
5944
5945 theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
5946 theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
5947 theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
5948 theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
5949 theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
5950
5951#endif // ALL_ARM64_EMITTER_UNIT_TESTS
5952
5953#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5954 //
5955 // R_I
5956 //
5957
5958 genDefineTempLabel(genCreateTempLabel());
5959
5960 // mov reg, imm(i16,hw)
5961 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
5962 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
5963 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
5964 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
5965 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
5966 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
5967 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
5968 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
5969
5970 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
5971 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
5972 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
5973 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
5974
5975 // mov reg, imm(N,r,s)
5976 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5977 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
5978 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
5979 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
5980 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5981 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
5982
5983 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
5984 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
5985 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
5986 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
5987 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
5988 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
5989
5990 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5991 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5992 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
5993 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
5994 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
5995 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
5996
5997 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
5998 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
5999 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
6000 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
6001 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
6002 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
6003
6004#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6005
6006#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6007 //
6008 // R_R
6009 //
6010
6011 genDefineTempLabel(genCreateTempLabel());
6012
6013 // tst reg, reg
6014 theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
6015
6016 // mov reg, reg
6017 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
6018 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
6019 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
6020
6021 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
6022 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
6023 theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
6024
6025 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
6026 theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
6027 theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
6028 theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
6029
6030 theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
6031 theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
6032 theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
6033 theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
6034 theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
6035
6036 theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
6037 theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
6038 theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
6039 theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
6040
6041#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6042
6043#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6044 //
6045 // R_I_I
6046 //
6047
6048 genDefineTempLabel(genCreateTempLabel());
6049
6050 // mov reg, imm(i16,hw)
6051 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
6052 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6053
6054 theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6055 theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
6056 theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
6057
6058 theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6059 theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
6060 theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
6061
6062#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6063
6064#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6065 //
6066 // R_R_I
6067 //
6068
6069 genDefineTempLabel(genCreateTempLabel());
6070
6071 theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
6072 theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
6073 theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
6074 theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
6075 theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
6076 theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
6077
6078 theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
6079 theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
6080 theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
6081 theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
6082 theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
6083
6084 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
6085 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
6086 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
6087 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
6088
6089 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
6090 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
6091
6092 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6093 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
6094 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
6095 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6096 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6097 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6098 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6099 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6100 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6101
6102 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6103 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
6104 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
6105 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6106 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6107 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6108 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6109 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6110 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6111
6112 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6113 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
6114 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
6115 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6116 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6117 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6118 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6119 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6120 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6121
6122 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6123 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
6124 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
6125 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6126 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6127 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6128 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6129 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6130 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6131
6132 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6133 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
6134 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
6135 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6136 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6137 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6138 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6139 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6140 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6141
6142 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6143 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
6144 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
6145 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6146 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6147 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6148 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6149 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6150 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6151
6152 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6153 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
6154 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
6155 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6156 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6157 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6158 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6159 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6160 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6161
6162 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6163 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
6164 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
6165 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6166 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6167 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6168 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6169 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6170 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6171
6172#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6173
6174#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6175 //
6176 // R_R_I cmp/txt
6177 //
6178
6179 // cmp
6180 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
6181 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
6182
6183 // CMP (shifted register)
6184 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6185 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6186 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6187
6188 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6189 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6190 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6191
6192 // TST (shifted register)
6193 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6194 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6195 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6196 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
6197
6198 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6199 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6200 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6201 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
6202
6203 // CMP (extended register)
6204 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6205 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
6206 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis
6207 // disassembles this "cmp x8,x9",
6208 // which looks like an msdis issue.
6209 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
6210
6211 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6212 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
6213 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
6214 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
6215
6216 // CMP 64-bit (extended register) and left shift
6217 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
6218 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6219 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
6220 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
6221
6222 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
6223 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6224 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
6225 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
6226
6227 // CMP 32-bit (extended register) and left shift
6228 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6229 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6230 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
6231
6232 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6233 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6234 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
6235
6236#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6237
6238#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6239 //
6240 // R_R_R
6241 //
6242
6243 genDefineTempLabel(genCreateTempLabel());
6244
6245 theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6246 theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6247 theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6248 theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6249 theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6250 theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6251 theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6252 theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6253 theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6254 theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6255 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6256 theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6257 theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6258 theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6259 theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6260 theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6261 theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6262 theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6263 theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6264 theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6265 theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6266 theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6267
6268 theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6269 theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6270 theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6271 theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6272 theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6273 theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6274 theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6275 theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6276 theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6277 theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6278 theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6279 theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6280 theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6281 theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6282 theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6283 theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6284 theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6285 theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6286 theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6287 theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6288 theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6289 theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6290
6291#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6292
6293#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6294 //
6295 // ARMv8.1 LSE Atomics
6296 //
6297 genDefineTempLabel(genCreateTempLabel());
6298
6299 theEmitter->emitIns_R_R_R(INS_casb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6300 theEmitter->emitIns_R_R_R(INS_casab, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6301 theEmitter->emitIns_R_R_R(INS_casalb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6302 theEmitter->emitIns_R_R_R(INS_caslb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6303 theEmitter->emitIns_R_R_R(INS_cash, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6304 theEmitter->emitIns_R_R_R(INS_casah, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6305 theEmitter->emitIns_R_R_R(INS_casalh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6306 theEmitter->emitIns_R_R_R(INS_caslh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6307 theEmitter->emitIns_R_R_R(INS_cas, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6308 theEmitter->emitIns_R_R_R(INS_casa, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6309 theEmitter->emitIns_R_R_R(INS_casal, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6310 theEmitter->emitIns_R_R_R(INS_casl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6311 theEmitter->emitIns_R_R_R(INS_cas, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6312 theEmitter->emitIns_R_R_R(INS_casa, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6313 theEmitter->emitIns_R_R_R(INS_casal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6314 theEmitter->emitIns_R_R_R(INS_casl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6315 theEmitter->emitIns_R_R_R(INS_ldaddb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6316 theEmitter->emitIns_R_R_R(INS_ldaddab, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6317 theEmitter->emitIns_R_R_R(INS_ldaddalb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6318 theEmitter->emitIns_R_R_R(INS_ldaddlb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6319 theEmitter->emitIns_R_R_R(INS_ldaddh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6320 theEmitter->emitIns_R_R_R(INS_ldaddah, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6321 theEmitter->emitIns_R_R_R(INS_ldaddalh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6322 theEmitter->emitIns_R_R_R(INS_ldaddlh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6323 theEmitter->emitIns_R_R_R(INS_ldadd, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6324 theEmitter->emitIns_R_R_R(INS_ldadda, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6325 theEmitter->emitIns_R_R_R(INS_ldaddal, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6326 theEmitter->emitIns_R_R_R(INS_ldaddl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6327 theEmitter->emitIns_R_R_R(INS_ldadd, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6328 theEmitter->emitIns_R_R_R(INS_ldadda, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6329 theEmitter->emitIns_R_R_R(INS_ldaddal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6330 theEmitter->emitIns_R_R_R(INS_ldaddl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6331 theEmitter->emitIns_R_R_R(INS_swpb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6332 theEmitter->emitIns_R_R_R(INS_swpab, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6333 theEmitter->emitIns_R_R_R(INS_swpalb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6334 theEmitter->emitIns_R_R_R(INS_swplb, EA_1BYTE, REG_R8, REG_R9, REG_R10);
6335 theEmitter->emitIns_R_R_R(INS_swph, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6336 theEmitter->emitIns_R_R_R(INS_swpah, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6337 theEmitter->emitIns_R_R_R(INS_swpalh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6338 theEmitter->emitIns_R_R_R(INS_swplh, EA_2BYTE, REG_R8, REG_R9, REG_R10);
6339 theEmitter->emitIns_R_R_R(INS_swp, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6340 theEmitter->emitIns_R_R_R(INS_swpa, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6341 theEmitter->emitIns_R_R_R(INS_swpal, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6342 theEmitter->emitIns_R_R_R(INS_swpl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6343 theEmitter->emitIns_R_R_R(INS_swp, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6344 theEmitter->emitIns_R_R_R(INS_swpa, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6345 theEmitter->emitIns_R_R_R(INS_swpal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6346 theEmitter->emitIns_R_R_R(INS_swpl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6347
6348 theEmitter->emitIns_R_R(INS_staddb, EA_1BYTE, REG_R8, REG_R10);
6349 theEmitter->emitIns_R_R(INS_staddlb, EA_1BYTE, REG_R8, REG_R10);
6350 theEmitter->emitIns_R_R(INS_staddh, EA_2BYTE, REG_R8, REG_R10);
6351 theEmitter->emitIns_R_R(INS_staddlh, EA_2BYTE, REG_R8, REG_R10);
6352 theEmitter->emitIns_R_R(INS_stadd, EA_4BYTE, REG_R8, REG_R10);
6353 theEmitter->emitIns_R_R(INS_staddl, EA_4BYTE, REG_R8, REG_R10);
6354 theEmitter->emitIns_R_R(INS_stadd, EA_8BYTE, REG_R8, REG_R10);
6355 theEmitter->emitIns_R_R(INS_staddl, EA_8BYTE, REG_R8, REG_R10);
6356#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6357
6358#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6359 //
6360 // R_R_I_I
6361 //
6362
6363 genDefineTempLabel(genCreateTempLabel());
6364
6365 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
6366 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
6367 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
6368
6369 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
6370 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
6371 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
6372
6373 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
6374 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
6375 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
6376
6377 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
6378 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
6379 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
6380
6381 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
6382 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
6383 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
6384
6385 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
6386 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
6387 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
6388
6389#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6390
6391#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6392 //
6393 // R_R_R_I
6394 //
6395
6396 genDefineTempLabel(genCreateTempLabel());
6397
6398 // ADD (extended register)
6399 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6400 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6401 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6402 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6403 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6404 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6405 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6406 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6407
6408 // ADD (extended register) and left shift
6409 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6410 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6411 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6412 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6413 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6414 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6415 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6416 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6417
6418 // ADD (shifted register)
6419 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6420 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
6421 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
6422 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
6423
6424 // EXTR (extract field from register pair)
6425 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
6426 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
6427 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
6428 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
6429
6430 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
6431 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
6432
6433 // SUB (extended register)
6434 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6435 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6436 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6437 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6438 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6439 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6440 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6441 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6442
6443 // SUB (extended register) and left shift
6444 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6445 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6446 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6447 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6448 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6449 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6450 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6451 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6452
6453 // SUB (shifted register)
6454 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6455 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
6456 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
6457 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
6458
6459 // bit operations
6460 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6461 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6462 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6463 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6464 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6465 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6466 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6467 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6468
6469 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6470 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6471 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6472 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6473 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6474 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6475 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6476 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6477
6478 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6479 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6480 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6481 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6482 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6483 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6484 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6485 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6486
6487 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6488 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6489 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6490 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6491 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6492 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6493 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6494 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6495
6496#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6497
6498#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6499 //
6500 // R_R_R_I -- load/store pair
6501 //
6502
6503 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6504 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6505 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6506 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6507
6508 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6509 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6510 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6511 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6512
6513 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6514 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6515 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6516 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6517 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6518 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6519 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6520 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6521
6522 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6523 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6524 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6525 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6526 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6527 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6528 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6529 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6530
6531 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6532 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
6533 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6534 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6535
6536 // SP and ZR tests
6537 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6538 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6539 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6540 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6541 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
6542 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
6543
6544#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6545
6546#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6547 //
6548 // R_R_R_Ext -- load/store shifted/extend
6549 //
6550
6551 genDefineTempLabel(genCreateTempLabel());
6552
6553 // LDR (register)
6554 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6555 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6556 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6557 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6558 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6559 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6560 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6561 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6562 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6563 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6564 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6565
6566 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6567 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6568 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6569 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6570 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6571 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6572 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6573 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6574 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6575 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6576 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6577
6578 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6579 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6580 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6581 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6582 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6583 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6584 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6585 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6586 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6587 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6588 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6589
6590 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6591 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6592 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6593 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6594 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6595
6596 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6597 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6598 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6599 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6600 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6601 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6602 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6603 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6604 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6605 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6606 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6607
6608 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6609 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6610 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6611 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6612 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6613 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6614 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6615 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6616 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6617 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6618 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6619 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6620
6621 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6622 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6623 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6624 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6625 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6626 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6627
6628 // STR (register)
6629 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6630 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6631 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6632 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6633 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6634 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6635 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6636 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6637 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6638 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6639 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6640
6641 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6642 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6643 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6644 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6645 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6646 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6647 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6648 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6649 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6650 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6651 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6652
6653 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6654 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6655 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6656 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6657 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6658 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6659 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6660 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6661 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6662 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6663 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6664
6665 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6666 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6667 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6668 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6669 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6670
6671#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6672
6673#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6674 //
6675 // R_R_R_R
6676 //
6677
6678 genDefineTempLabel(genCreateTempLabel());
6679
6680 theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
6681 theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
6682 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
6683 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
6684 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
6685 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
6686
6687 theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
6688 theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
6689 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
6690 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
6691 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
6692 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
6693
6694#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6695
6696#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6697 // R_COND
6698 //
6699
6700 // cset reg, cond
6701 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
6702 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
6703 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
6704 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
6705 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
6706 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
6707 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
6708 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
6709 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
6710 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
6711 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
6712 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
6713 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
6714 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
6715
6716 // csetm reg, cond
6717 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
6718 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
6719 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
6720 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
6721 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
6722 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
6723 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
6724 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
6725 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
6726 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
6727 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
6728 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
6729 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
6730 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
6731
6732#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6733
6734#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6735 // R_R_COND
6736 //
6737
6738 // cinc reg, reg, cond
6739 // cinv reg, reg, cond
6740 // cneg reg, reg, cond
6741 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
6742 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
6743 theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
6744 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
6745 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
6746 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
6747 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
6748 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
6749 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
6750 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
6751 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
6752 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
6753 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
6754 theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
6755
6756#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6757
6758#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6759 // R_R_R_COND
6760 //
6761
6762 // csel reg, reg, reg, cond
6763 // csinc reg, reg, reg, cond
6764 // csinv reg, reg, reg, cond
6765 // csneg reg, reg, reg, cond
6766 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq
6767 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
6768 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
6769 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
6770 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi
6771 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
6772 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
6773 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
6774 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi
6775 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
6776 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
6777 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
6778 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt
6779 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
6780
6781#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6782
6783#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6784 // R_R_FLAGS_COND
6785 //
6786
6787 // ccmp reg1, reg2, nzcv, cond
6788 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6789 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6790 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6791 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6792 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6793 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6794 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6795 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6796 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6797 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6798 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6799 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6800 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6801 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6802
6803 // ccmp reg1, imm, nzcv, cond
6804 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6805 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6806 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6807 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6808 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6809 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6810 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6811 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6812 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6813 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6814 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6815 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6816 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6817 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6818
6819 // ccmp reg1, imm, nzcv, cond -- encoded as ccmn
6820 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq
6821 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne
6822 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs
6823 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo
6824 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi
6825 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl
6826 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs
6827 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc
6828 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi
6829 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls
6830 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
6831 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt
6832 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt
6833 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le
6834
6835 // ccmn reg1, reg2, nzcv, cond
6836 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6837 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6838 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6839 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6840 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6841 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6842 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6843 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6844 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6845 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6846 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6847 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6848 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6849 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6850
6851 // ccmn reg1, imm, nzcv, cond
6852 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6853 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6854 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6855 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6856 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6857 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6858 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6859 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6860 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6861 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6862 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6863 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6864 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6865 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6866
6867#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6868
6869#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6870 //
6871 // Branch to register
6872 //
6873
6874 genDefineTempLabel(genCreateTempLabel());
6875
6876 theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
6877 theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
6878 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
6879 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
6880
6881#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6882
6883#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6884 //
6885 // Misc
6886 //
6887
6888 genDefineTempLabel(genCreateTempLabel());
6889
6890 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
6891 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
6892
6893 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
6894 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
6895 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
6896
6897 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
6898 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
6899 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
6900
6901 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
6902 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
6903 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
6904
6905 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
6906 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
6907 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
6908
6909#endif // ALL_ARM64_EMITTER_UNIT_TESTS
6910
6911#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6912 ////////////////////////////////////////////////////////////////////////////////
6913 //
6914 // SIMD and Floating point
6915 //
6916 ////////////////////////////////////////////////////////////////////////////////
6917
6918 //
6919 // Load/Stores vector register
6920 //
6921
6922 genDefineTempLabel(genCreateTempLabel());
6923
6924 // ldr/str Vt, [reg]
6925 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
6926 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
6927 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
6928 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
6929 theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
6930 theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
6931 theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
6932 theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
6933 theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
6934 theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
6935
6936 // ldr/str Vt, [reg+cns] -- scaled
6937 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
6938 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
6939 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
6940 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
6941 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
6942
6943 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
6944 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
6945 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
6946 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
6947 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
6948
6949 // ldr/str Vt, [reg],cns -- post-indexed (unscaled)
6950 // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled)
6951 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6952 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6953 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6954 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6955 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6956
6957 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6958 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6959 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6960 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6961 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6962
6963 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6964 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6965 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6966 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6967 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6968
6969 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6970 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6971 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6972 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6973 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6974
6975 theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
6976 theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
6977 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
6978 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
6979 theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
6980
6981 theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
6982 theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
6983 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
6984 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
6985 theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
6986
6987 // load/store pair
6988 theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
6989 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
6990 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
6991 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
6992
6993 theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
6994 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
6995 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
6996 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
6997
6998 theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
6999 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
7000 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
7001 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
7002
7003 theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
7004 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
7005 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
7006 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
7007 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
7008 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
7009 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
7010 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
7011
7012 theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
7013 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
7014 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
7015 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
7016 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
7017 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
7018 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
7019 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
7020
7021 theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
7022 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
7023 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
7024 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
7025 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
7026 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
7027 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
7028 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
7029
7030 // LDR (register)
7031 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
7032 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
7033 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
7034 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
7035 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
7036 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
7037 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
7038 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
7039 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
7040 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
7041 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
7042
7043 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
7044 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
7045 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
7046 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
7047 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
7048 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
7049 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
7050 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
7051 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
7052 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
7053 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
7054
7055 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
7056 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
7057 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
7058 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
7059 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
7060 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
7061 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
7062 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
7063 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
7064 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
7065 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
7066
7067 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
7068 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
7069 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
7070 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
7071 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
7072 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
7073 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
7074 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
7075 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
7076 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
7077 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
7078
7079 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
7080 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
7081 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
7082 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
7083 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
7084
7085#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7086
7087#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7088 //
7089 // R_R mov and aliases for mov
7090 //
7091
7092 // mov vector to vector
7093 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
7094 theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
7095
7096 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
7097 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
7098 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
7099
7100 // mov vector to general
7101 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
7102 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
7103 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
7104 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
7105
7106 // mov general to vector
7107 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
7108 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
7109 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
7110 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
7111
7112 // mov vector[index] to vector
7113 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
7114 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
7115 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
7116 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
7117
7118 // mov to general from vector[index]
7119 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
7120 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
7121 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
7122 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
7123
7124 // mov to vector[index] from general
7125 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
7126 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
7127 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
7128 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
7129
7130 // mov vector[index] to vector[index2]
7131 theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
7132 theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
7133 theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
7134 theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
7135
7136 //////////////////////////////////////////////////////////////////////////////////
7137
7138 // mov/dup scalar
7139 theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
7140 theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
7141 theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
7142 theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
7143
7144 // mov/ins vector element
7145 theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
7146 theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
7147 theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
7148 theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
7149
7150 // umov to general from vector element
7151 theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
7152 theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
7153 theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
7154 theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
7155
7156 // ins to vector element from general
7157 theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
7158 theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
7159 theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
7160 theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
7161
7162 // smov to general from vector element
7163 theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
7164 theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
7165 theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
7166
7167#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7168
7169#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7170 //
7171 // R_I movi and mvni
7172 //
7173
7174 // movi imm8 (vector)
7175 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
7176 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
7177 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
7178 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
7179
7180 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
7181 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8
7182 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
7183 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8
7184
7185 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
7186 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8
7187 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16
7188 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
7189
7190 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8
7191 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
7192
7193 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
7194 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8
7195 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16
7196 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
7197
7198 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8
7199 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
7200
7201 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni
7202 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
7203
7204 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni
7205 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8
7206 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16
7207 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
7208
7209 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8
7210 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
7211
7212 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
7213 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
7214 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
7215 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
7216
7217 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7218 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7219 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7220 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7221
7222 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7223 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7224 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7225 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7226
7227 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8
7228 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
7229
7230 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7231 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7232 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7233 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7234
7235 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8
7236 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
7237
7238#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7239
7240#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7241 //
7242 // R_I orr/bic vector immediate
7243 //
7244
7245 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7246 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7247 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7248 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7249
7250 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7251 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7252 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7253 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7254
7255 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7256 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7257 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7258 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7259
7260 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7261 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7262 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7263 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7264
7265 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7266 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7267 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7268 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7269
7270 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7271 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7272 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7273 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7274
7275#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7276
7277#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7278 //
7279 // R_F cmp/fmov immediate
7280 //
7281
7282 // fmov imm8 (scalar)
7283 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
7284 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
7285 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
7286 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
7287 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
7288 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
7289 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
7290 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
7291 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
7292 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
7293 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
7294
7295 // fmov imm8 (vector)
7296 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
7297 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
7298 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
7299 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
7300 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
7301 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
7302 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
7303 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
7304 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
7305 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
7306 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
7307 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
7308 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
7309
7310 // fcmp with 0.0
7311 theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
7312 theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
7313 theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
7314 theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
7315
7316#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7317
7318#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7319 //
7320 // R_R fmov/fcmp/fcvt
7321 //
7322
7323 // fmov to vector to vector
7324 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
7325 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
7326
7327 // fmov to vector to general
7328 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
7329 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
7330 // using the optional conversion specifier
7331 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
7332 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
7333
7334 // fmov to general to vector
7335 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
7336 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
7337 // using the optional conversion specifier
7338 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
7339 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
7340
7341 // fcmp/fcmpe
7342 theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
7343 theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
7344 theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
7345 theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
7346
7347 // fcvt
7348 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
7349 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
7350
7351 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
7352 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
7353
7354 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
7355 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
7356
7357#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7358
7359#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7360 //
7361 // R_R floating point conversions
7362 //
7363
7364 // fcvtas scalar
7365 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
7366 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
7367
7368 // fcvtas scalar to general
7369 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7370 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7371 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7372 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7373
7374 // fcvtas vector
7375 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7376 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7377 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7378
7379 // fcvtau scalar
7380 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
7381 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
7382
7383 // fcvtau scalar to general
7384 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7385 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7386 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7387 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7388
7389 // fcvtau vector
7390 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7391 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7392 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7393
7394 ////////////////////////////////////////////////////////////////////////////////
7395
7396 // fcvtms scalar
7397 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
7398 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
7399
7400 // fcvtms scalar to general
7401 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7402 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7403 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7404 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7405
7406 // fcvtms vector
7407 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7408 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7409 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7410
7411 // fcvtmu scalar
7412 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
7413 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
7414
7415 // fcvtmu scalar to general
7416 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7417 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7418 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7419 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7420
7421 // fcvtmu vector
7422 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7423 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7424 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7425
7426 ////////////////////////////////////////////////////////////////////////////////
7427
7428 // fcvtns scalar
7429 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
7430 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
7431
7432 // fcvtns scalar to general
7433 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7434 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7435 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7436 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7437
7438 // fcvtns vector
7439 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7440 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7441 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7442
7443 // fcvtnu scalar
7444 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
7445 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
7446
7447 // fcvtnu scalar to general
7448 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7449 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7450 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7451 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7452
7453 // fcvtnu vector
7454 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7455 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7456 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7457
7458 ////////////////////////////////////////////////////////////////////////////////
7459
7460 // fcvtps scalar
7461 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
7462 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
7463
7464 // fcvtps scalar to general
7465 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7466 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7467 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7468 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7469
7470 // fcvtps vector
7471 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7472 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7473 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7474
7475 // fcvtpu scalar
7476 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
7477 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
7478
7479 // fcvtpu scalar to general
7480 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7481 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7482 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7483 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7484
7485 // fcvtpu vector
7486 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7487 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7488 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7489
7490 ////////////////////////////////////////////////////////////////////////////////
7491
7492 // fcvtzs scalar
7493 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
7494 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
7495
7496 // fcvtzs scalar to general
7497 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7498 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7499 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7500 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7501
7502 // fcvtzs vector
7503 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7504 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7505 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7506
7507 // fcvtzu scalar
7508 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
7509 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
7510
7511 // fcvtzu scalar to general
7512 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7513 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7514 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7515 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7516
7517 // fcvtzu vector
7518 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7519 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7520 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7521
7522 ////////////////////////////////////////////////////////////////////////////////
7523
7524 // scvtf scalar
7525 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
7526 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
7527
7528 // scvtf scalar from general
7529 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7530 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7531 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7532 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7533
7534 // scvtf vector
7535 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7536 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7537 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7538
7539 // ucvtf scalar
7540 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
7541 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
7542
7543 // ucvtf scalar from general
7544 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7545 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7546 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7547 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7548
7549 // ucvtf vector
7550 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7551 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7552 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7553
7554#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7555
7556#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7557 //
7558 // R_R floating point operations, one dest, one source
7559 //
7560
7561 // fabs scalar
7562 theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
7563 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
7564
7565 // fabs vector
7566 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7567 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7568 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7569
7570 // fneg scalar
7571 theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
7572 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
7573
7574 // fneg vector
7575 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7576 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7577 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7578
7579 // fsqrt scalar
7580 theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
7581 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
7582
7583 // fsqrt vector
7584 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7585 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7586 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7587
7588 genDefineTempLabel(genCreateTempLabel());
7589
7590 // abs scalar
7591 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
7592
7593 // abs vector
7594 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7595 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7596 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7597 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7598 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7599 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7600 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7601
7602 // neg scalar
7603 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
7604
7605 // neg vector
7606 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7607 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7608 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7609 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7610 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7611 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7612 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7613
7614 // mvn vector
7615 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
7616 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
7617 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
7618 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
7619
7620 // cnt vector
7621 theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
7622 theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
7623
7624 // not vector (the same encoding as mvn)
7625 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
7626 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
7627 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
7628 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
7629
7630 // cls vector
7631 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7632 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7633 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7634 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7635 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7636 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7637
7638 // clz vector
7639 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7640 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7641 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7642 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7643 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7644 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7645
7646 // rbit vector
7647 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7648 theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7649
7650 // rev16 vector
7651 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7652 theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7653
7654 // rev32 vector
7655 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7656 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7657 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7658 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7659
7660 // rev64 vector
7661 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7662 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7663 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7664 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7665 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7666 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7667
7668 // addv vector
7669 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7670 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7671 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7672 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7673 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7674 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7675
7676 // saddlv vector
7677 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7678 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7679 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7680 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7681 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7682 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7683
7684 // smaxlv vector
7685 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7686 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7687 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7688 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7689 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7690 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7691
7692 // sminlv vector
7693 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7694 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7695 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7696 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7697 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7698 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7699
7700 // uaddlv vector
7701 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7702 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7703 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7704 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7705 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7706 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7707
7708 // umaxlv vector
7709 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7710 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7711 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7712 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7713 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7714 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7715
7716 // uminlv vector
7717 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7718 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7719 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7720 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7721 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7722 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7723
7724 // faddp scalar
7725 theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1);
7726 theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3);
7727
7728 // INS_fcvtl
7729 theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1);
7730
7731 // INS_fcvtl2
7732 theEmitter->emitIns_R_R(INS_fcvtl2, EA_4BYTE, REG_V0, REG_V1);
7733
7734 // INS_fcvtn
7735 theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1);
7736
7737 // INS_fcvtn2
7738 theEmitter->emitIns_R_R(INS_fcvtn2, EA_8BYTE, REG_V0, REG_V1);
7739#endif
7740
7741#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7742 //
7743 // R_R floating point round to int, one dest, one source
7744 //
7745
7746 // frinta scalar
7747 theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
7748 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
7749
7750 // frinta vector
7751 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7752 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7753 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7754
7755 // frinti scalar
7756 theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
7757 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
7758
7759 // frinti vector
7760 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7761 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7762 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7763
7764 // frintm scalar
7765 theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
7766 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
7767
7768 // frintm vector
7769 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7770 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7771 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7772
7773 // frintn scalar
7774 theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
7775 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
7776
7777 // frintn vector
7778 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7779 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7780 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7781
7782 // frintp scalar
7783 theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
7784 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
7785
7786 // frintp vector
7787 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7788 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7789 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7790
7791 // frintx scalar
7792 theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
7793 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
7794
7795 // frintx vector
7796 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7797 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7798 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7799
7800 // frintz scalar
7801 theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
7802 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
7803
7804 // frintz vector
7805 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7806 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7807 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7808
7809#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7810
7811#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7812 //
7813 // R_R_R floating point operations, one dest, two source
7814 //
7815
7816 genDefineTempLabel(genCreateTempLabel());
7817
7818 theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7819 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7820 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7821 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7822 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7823
7824 theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7825 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7826 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7827 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7828 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7829
7830 theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7831 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7832 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7833 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7834 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7835
7836 theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7837 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7838 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7839 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7840 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7841
7842 theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7843 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7844 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7845 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7846 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7847
7848 // fabd
7849 theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7850 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7851 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7852 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7853 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7854
7855 genDefineTempLabel(genCreateTempLabel());
7856
7857 theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7858 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7859 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7860 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7861 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7862
7863 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7864 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7865 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7866 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7867 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7868
7869 theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7870 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7871 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7872 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7873 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7874
7875 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7876 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7877 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7878 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7879 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7880
7881 theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7882 theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7883
7884#endif // ALL_ARM64_EMITTER_UNIT_TESTS
7885
7886#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7887 //
7888 // R_R_I vector operations, one dest, one source reg, one immed
7889 //
7890
7891 genDefineTempLabel(genCreateTempLabel());
7892
7893 // 'sshr' scalar
7894 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
7895 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
7896 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
7897 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
7898 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
7899
7900 // 'sshr' vector
7901 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7902 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7903 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7904 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7905 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7906 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7907 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7908 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7909
7910 // 'ssra' scalar
7911 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
7912 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
7913 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
7914 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
7915 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
7916
7917 // 'ssra' vector
7918 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7919 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7920 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7921 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7922 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7923 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7924 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7925 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7926
7927 // 'srshr' scalar
7928 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
7929 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
7930 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
7931 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
7932 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
7933
7934 // 'srshr' vector
7935 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7936 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7937 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7938 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7939 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7940 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7941 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7942 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7943
7944 // 'srsra' scalar
7945 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
7946 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
7947 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
7948 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
7949 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
7950
7951 // 'srsra' vector
7952 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7953 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7954 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7955 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7956 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7957 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7958 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7959 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7960
7961 // 'shl' scalar
7962 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
7963 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
7964 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
7965 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
7966 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
7967
7968 // 'shl' vector
7969 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7970 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7971 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7972 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7973 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7974 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7975 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7976 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7977
7978 // 'ushr' scalar
7979 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
7980 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
7981 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
7982 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
7983 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
7984
7985 // 'ushr' vector
7986 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7987 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7988 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7989 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7990 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7991 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7992 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7993 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7994
7995 // 'usra' scalar
7996 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
7997 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
7998 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
7999 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
8000 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
8001
8002 // 'usra' vector
8003 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8004 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8005 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8006 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8007 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8008 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8009 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
8010 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
8011
8012 // 'urshr' scalar
8013 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
8014 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
8015 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
8016 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
8017 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
8018
8019 // 'urshr' vector
8020 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8021 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8022 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8023 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8024 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8025 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8026 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
8027 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
8028
8029 // 'ursra' scalar
8030 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
8031 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
8032 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
8033 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
8034 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
8035
8036 // 'srsra' vector
8037 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8038 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8039 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8040 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8041 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8042 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8043 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
8044 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
8045
8046 // 'sri' scalar
8047 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
8048 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
8049 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
8050 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
8051 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
8052
8053 // 'sri' vector
8054 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8055 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8056 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8057 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8058 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8059 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8060 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
8061 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
8062
8063 // 'sli' scalar
8064 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
8065 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
8066 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
8067 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
8068 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
8069
8070 // 'sli' vector
8071 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8072 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8073 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8074 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8075 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8076 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8077 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
8078 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
8079
8080 // 'sshll' vector
8081 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8082 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8083 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8084 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8085 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8086 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8087
8088 // 'ushll' vector
8089 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8090 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8091 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8092 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8093 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8094 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8095
8096 // 'shrn' vector
8097 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8098 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8099 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8100 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8101 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8102 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8103
8104 // 'rshrn' vector
8105 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
8106 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
8107 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
8108 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
8109 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
8110 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
8111
8112 // 'sxtl' vector
8113 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
8114 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
8115 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
8116 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
8117 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
8118 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
8119
8120 // 'uxtl' vector
8121 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
8122 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
8123 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
8124 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
8125 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
8126 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
8127
8128#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8129
8130#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8131 //
8132 // R_R_R vector operations, one dest, two source
8133 //
8134
8135 genDefineTempLabel(genCreateTempLabel());
8136
8137 // Specifying an Arrangement is optional
8138 //
8139 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
8140 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
8141 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
8142 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
8143 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
8144 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
8145 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
8146 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
8147 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
8148 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
8149
8150 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
8151 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
8152 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
8153 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
8154 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
8155 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
8156
8157 // Default Arrangement as per the ARM64 manual
8158 //
8159 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
8160 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
8161 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
8162 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
8163 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
8164 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
8165 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
8166 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
8167 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
8168 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
8169
8170 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
8171 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
8172 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
8173 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
8174 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
8175 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
8176
8177 genDefineTempLabel(genCreateTempLabel());
8178
8179 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
8180 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
8181 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8182 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
8183 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
8184 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
8185 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
8186 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
8187
8188 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
8189 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
8190 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
8191 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
8192 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
8193 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
8194 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
8195 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
8196
8197 genDefineTempLabel(genCreateTempLabel());
8198
8199 // saba vector
8200 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8201 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8202 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8203 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8204 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8205 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8206
8207 // sabd vector
8208 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8209 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8210 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8211 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8212 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8213 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8214
8215 // uaba vector
8216 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8217 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8218 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8219 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8220 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8221 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8222
8223 // uabd vector
8224 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8225 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8226 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8227 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8228 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8229 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8230#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8231
8232#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8233 // smax vector
8234 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8235 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8236 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8237 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8238 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8239 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8240
8241 // smin vector
8242 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8243 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8244 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8245 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8246 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8247 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8248
8249 // umax vector
8250 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8251 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8252 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8253 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8254 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8255 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8256
8257 // umin vector
8258 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8259 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8260 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8261 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8262 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8263 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8264
8265 // cmeq vector
8266 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8267 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8268 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8269 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8270 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8271 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8272 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8273 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8274
8275 // cmge vector
8276 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8277 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8278 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8279 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8280 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8281 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8282 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8283 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8284
8285 // cmgt vector
8286 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8287 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8288 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8289 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8290 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8291 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8292 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8293 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8294
8295 // cmhi vector
8296 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8297 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8298 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8299 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8300 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8301 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8302 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8303 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8304
8305 // cmhs vector
8306 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8307 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8308 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8309 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8310 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8311 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8312 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8313 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8314
8315 // ctst vector
8316 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8317 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8318 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8319 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8320 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8321 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8322 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8323 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8324
8325 // faddp vector
8326 theEmitter->emitIns_R_R_R(INS_faddp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8327 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8328 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8329
8330 // fcmeq vector
8331 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8332 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8333 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8334
8335 // fcmge vector
8336 theEmitter->emitIns_R_R_R(INS_fcmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8337 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8338 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8339
8340 // fcmgt vector
8341 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8342 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8343 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8344#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8345
8346#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8347 //
8348 // R_R_R vector multiply
8349 //
8350
8351 genDefineTempLabel(genCreateTempLabel());
8352
8353 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8354 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
8355 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8356 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
8357 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
8358 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8359
8360 theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
8361 theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
8362
8363 // 'mul' vector by elem
8364 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8365 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8366 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8367 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8368 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8369 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8370 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8371 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8372 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8373 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8374 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8375 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8376
8377 // 'mla' vector by elem
8378 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8379 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8380 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8381 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8382 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8383 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8384 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8385 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8386 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8387 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8388 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8389 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8390
8391 // 'mls' vector by elem
8392 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8393 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8394 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8395 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8396 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8397 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8398 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8399 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8400 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8401 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8402 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8403 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8404
8405#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8406
8407#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8408 //
8409 // R_R_R floating point operations, one source/dest, and two source
8410 //
8411
8412 genDefineTempLabel(genCreateTempLabel());
8413
8414 theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8415 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8416 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8417
8418 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8419 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8420 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8421 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8422 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8423
8424 theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8425 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8426 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8427
8428 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8429 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8430 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8431 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8432 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8433
8434#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8435
8436#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8437 //
8438 // R_R_R_R floating point operations, one dest, and three source
8439 //
8440
8441 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
8442 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
8443 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
8444 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
8445
8446 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
8447 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
8448 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
8449 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
8450
8451#endif
8452
8453#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8454
8455 BasicBlock* label = genCreateTempLabel();
8456 genDefineTempLabel(label);
8457 instGen(INS_nop);
8458 instGen(INS_nop);
8459 instGen(INS_nop);
8460 instGen(INS_nop);
8461 theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
8462
8463#endif // ALL_ARM64_EMITTER_UNIT_TESTS
8464
8465 printf("*************** End of genArm64EmitterUnitTests()\n");
8466}
8467#endif // defined(DEBUG)
8468
8469#endif // _TARGET_ARM64_
8470