1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Arm64 Code Generator XX |
9 | XX XX |
10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
12 | */ |
13 | #include "jitpch.h" |
14 | #ifdef _MSC_VER |
15 | #pragma hdrstop |
16 | #endif |
17 | |
18 | #ifdef _TARGET_ARM64_ |
19 | #include "emit.h" |
20 | #include "codegen.h" |
21 | #include "lower.h" |
22 | #include "gcinfo.h" |
23 | #include "gcinfoencoder.h" |
24 | |
25 | /* |
26 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
27 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
28 | XX XX |
29 | XX Prolog / Epilog XX |
30 | XX XX |
31 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
32 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
33 | */ |
34 | |
35 | //------------------------------------------------------------------------ |
36 | // genInstrWithConstant: we will typically generate one instruction |
37 | // |
38 | // ins reg1, reg2, imm |
39 | // |
40 | // However the imm might not fit as a directly encodable immediate, |
41 | // when it doesn't fit we generate extra instruction(s) that sets up |
42 | // the 'regTmp' with the proper immediate value. |
43 | // |
44 | // mov regTmp, imm |
45 | // ins reg1, reg2, regTmp |
46 | // |
47 | // Arguments: |
48 | // ins - instruction |
49 | // attr - operation size and GC attribute |
50 | // reg1, reg2 - first and second register operands |
51 | // imm - immediate value (third operand when it fits) |
52 | // tmpReg - temp register to use when the 'imm' doesn't fit |
53 | // inUnwindRegion - true if we are in a prolog/epilog region with unwind codes |
54 | // |
55 | // Return Value: |
56 | // returns true if the immediate was too large and tmpReg was used and modified. |
57 | // |
58 | bool CodeGen::genInstrWithConstant(instruction ins, |
59 | emitAttr attr, |
60 | regNumber reg1, |
61 | regNumber reg2, |
62 | ssize_t imm, |
63 | regNumber tmpReg, |
64 | bool inUnwindRegion /* = false */) |
65 | { |
66 | bool immFitsInIns = false; |
67 | emitAttr size = EA_SIZE(attr); |
68 | |
69 | // reg1 is usually a dest register |
70 | // reg2 is always source register |
71 | assert(tmpReg != reg2); // regTmp can not match any source register |
72 | |
73 | switch (ins) |
74 | { |
75 | case INS_add: |
76 | case INS_sub: |
77 | if (imm < 0) |
78 | { |
79 | imm = -imm; |
80 | ins = (ins == INS_add) ? INS_sub : INS_add; |
81 | } |
82 | immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size); |
83 | break; |
84 | |
85 | case INS_strb: |
86 | case INS_strh: |
87 | case INS_str: |
88 | // reg1 is a source register for store instructions |
89 | assert(tmpReg != reg1); // regTmp can not match any source register |
90 | immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size); |
91 | break; |
92 | |
93 | case INS_ldrsb: |
94 | case INS_ldrsh: |
95 | case INS_ldrsw: |
96 | case INS_ldrb: |
97 | case INS_ldrh: |
98 | case INS_ldr: |
99 | immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size); |
100 | break; |
101 | |
102 | default: |
103 | assert(!"Unexpected instruction in genInstrWithConstant" ); |
104 | break; |
105 | } |
106 | |
107 | if (immFitsInIns) |
108 | { |
109 | // generate a single instruction that encodes the immediate directly |
110 | getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm); |
111 | } |
112 | else |
113 | { |
114 | // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit |
115 | assert(tmpReg != REG_NA); |
116 | |
117 | // generate two or more instructions |
118 | |
119 | // first we load the immediate into tmpReg |
120 | instGen_Set_Reg_To_Imm(size, tmpReg, imm); |
121 | regSet.verifyRegUsed(tmpReg); |
122 | |
123 | // when we are in an unwind code region |
124 | // we record the extra instructions using unwindPadding() |
125 | if (inUnwindRegion) |
126 | { |
127 | compiler->unwindPadding(); |
128 | } |
129 | |
130 | // generate the instruction using a three register encoding with the immediate in tmpReg |
131 | getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg); |
132 | } |
133 | return immFitsInIns; |
134 | } |
135 | |
136 | //------------------------------------------------------------------------ |
137 | // genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog |
138 | // or the epilog. The unwind codes for the generated instructions are produced. An available temporary |
139 | // register is required to be specified, in case the constant is too large to encode in an "add" |
140 | // instruction (or "sub" instruction if we choose to use one), such that we need to load the constant |
141 | // into a register first, before using it. |
142 | // |
143 | // Arguments: |
144 | // spDelta - the value to add to SP (can be negative) |
145 | // tmpReg - an available temporary register |
146 | // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. |
147 | // Otherwise, we don't touch it. |
148 | // |
149 | // Return Value: |
150 | // None. |
151 | |
152 | void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero) |
153 | { |
154 | // Even though INS_add is specified here, the encoder will choose either |
155 | // an INS_add or an INS_sub and encode the immediate as a positive value |
156 | // |
157 | if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true)) |
158 | { |
159 | if (pTmpRegIsZero != nullptr) |
160 | { |
161 | *pTmpRegIsZero = false; |
162 | } |
163 | } |
164 | |
165 | // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive |
166 | // value. |
167 | ssize_t spDeltaAbs = abs(spDelta); |
168 | unsigned unwindSpDelta = (unsigned)spDeltaAbs; |
169 | assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned |
170 | |
171 | compiler->unwindAllocStack(unwindSpDelta); |
172 | } |
173 | |
174 | //------------------------------------------------------------------------ |
175 | // genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet |
176 | // prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction. |
177 | // The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that |
178 | // instruction. |
179 | // |
180 | // Arguments: |
181 | // reg1 - First register of pair to save. |
182 | // reg2 - Second register of pair to save. |
183 | // spOffset - The offset from SP to store reg1 (must be positive or zero). |
184 | // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or |
185 | // zero). |
186 | // lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This |
187 | // allows us to emit the "save_next" unwind code. |
188 | // tmpReg - An available temporary register. Needed for the case of large frames. |
189 | // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. |
190 | // Otherwise, we don't touch it. |
191 | // |
192 | // Return Value: |
193 | // None. |
194 | |
195 | void CodeGen::genPrologSaveRegPair(regNumber reg1, |
196 | regNumber reg2, |
197 | int spOffset, |
198 | int spDelta, |
199 | bool lastSavedWasPreviousPair, |
200 | regNumber tmpReg, |
201 | bool* pTmpRegIsZero) |
202 | { |
203 | assert(spOffset >= 0); |
204 | assert(spDelta <= 0); |
205 | assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned |
206 | assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both |
207 | // FP/SIMD |
208 | |
209 | bool needToSaveRegs = true; |
210 | if (spDelta != 0) |
211 | { |
212 | if ((spOffset == 0) && (spDelta >= -512)) |
213 | { |
214 | // We can use pre-indexed addressing. |
215 | // stp REG, REG + 1, [SP, #spDelta]! |
216 | // 64-bit STP offset range: -512 to 504, multiple of 8. |
217 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); |
218 | compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); |
219 | |
220 | needToSaveRegs = false; |
221 | } |
222 | else // (spDelta < -512)) |
223 | { |
224 | // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the |
225 | // non-zero offset. |
226 | |
227 | // generate sub SP,SP,imm |
228 | genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); |
229 | } |
230 | } |
231 | |
232 | if (needToSaveRegs) |
233 | { |
234 | // stp REG, REG + 1, [SP, #offset] |
235 | // 64-bit STP offset range: -512 to 504, multiple of 8. |
236 | assert(spOffset <= 504); |
237 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); |
238 | |
239 | if (lastSavedWasPreviousPair) |
240 | { |
241 | // This works as long as we've only been saving pairs, in order, and we've saved the previous one just |
242 | // before this one. |
243 | compiler->unwindSaveNext(); |
244 | } |
245 | else |
246 | { |
247 | compiler->unwindSaveRegPair(reg1, reg2, spOffset); |
248 | } |
249 | } |
250 | } |
251 | |
252 | //------------------------------------------------------------------------ |
253 | // genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or |
254 | // floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), |
255 | // then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not |
256 | // below it, which we currently don't support. This restriction could be loosened if the callers change to handle it |
257 | // (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the |
258 | // STR instruction, and that spOffset will be in the legal range for that instruction. |
259 | // |
260 | // Arguments: |
261 | // reg1 - Register to save. |
262 | // spOffset - The offset from SP to store reg1 (must be positive or zero). |
263 | // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or |
264 | // zero). |
265 | // tmpReg - An available temporary register. Needed for the case of large frames. |
266 | // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. |
267 | // Otherwise, we don't touch it. |
268 | // |
269 | // Return Value: |
270 | // None. |
271 | |
272 | void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) |
273 | { |
274 | assert(spOffset >= 0); |
275 | assert(spDelta <= 0); |
276 | assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned |
277 | |
278 | if (spDelta != 0) |
279 | { |
280 | // generate sub SP,SP,imm |
281 | genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); |
282 | } |
283 | |
284 | // str REG, [SP, #offset] |
285 | // 64-bit STR offset range: 0 to 32760, multiple of 8. |
286 | getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); |
287 | compiler->unwindSaveReg(reg1, spOffset); |
288 | } |
289 | |
290 | //------------------------------------------------------------------------ |
291 | // genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. |
292 | // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. |
293 | // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that |
294 | // instruction. |
295 | // |
296 | // Arguments: |
297 | // reg1 - First register of pair to restore. |
298 | // reg2 - Second register of pair to restore. |
299 | // spOffset - The offset from SP to load reg1 (must be positive or zero). |
300 | // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or |
301 | // zero). |
302 | // tmpReg - An available temporary register. Needed for the case of large frames. |
303 | // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. |
304 | // Otherwise, we don't touch it. |
305 | // |
306 | // Return Value: |
307 | // None. |
308 | |
309 | void CodeGen::genEpilogRestoreRegPair( |
310 | regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) |
311 | { |
312 | assert(spOffset >= 0); |
313 | assert(spDelta >= 0); |
314 | assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned |
315 | |
316 | if (spDelta != 0) |
317 | { |
318 | if ((spOffset == 0) && (spDelta <= 504)) |
319 | { |
320 | // Fold the SP change into this instruction. |
321 | // ldp reg1, reg2, [SP], #spDelta |
322 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); |
323 | compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); |
324 | } |
325 | else // (spDelta > 504)) |
326 | { |
327 | // Can't fold in the SP change; need to use a separate ADD instruction. |
328 | |
329 | // ldp reg1, reg2, [SP, #offset] |
330 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); |
331 | compiler->unwindSaveRegPair(reg1, reg2, spOffset); |
332 | |
333 | // generate add SP,SP,imm |
334 | genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); |
335 | } |
336 | } |
337 | else |
338 | { |
339 | // ldp reg1, reg2, [SP, #offset] |
340 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); |
341 | compiler->unwindSaveRegPair(reg1, reg2, spOffset); |
342 | } |
343 | } |
344 | |
345 | //------------------------------------------------------------------------ |
346 | // genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. |
347 | // |
348 | // Arguments: |
349 | // reg1 - Register to restore. |
350 | // spOffset - The offset from SP to restore reg1 (must be positive or zero). |
351 | // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or |
352 | // zero). |
353 | // tmpReg - An available temporary register. Needed for the case of large frames. |
354 | // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. |
355 | // Otherwise, we don't touch it. |
356 | // |
357 | // Return Value: |
358 | // None. |
359 | |
360 | void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) |
361 | { |
362 | assert(spOffset >= 0); |
363 | assert(spDelta >= 0); |
364 | assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned |
365 | |
366 | // ldr reg1, [SP, #offset] |
367 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); |
368 | compiler->unwindSaveReg(reg1, spOffset); |
369 | |
370 | if (spDelta != 0) |
371 | { |
372 | // generate add SP,SP,imm |
373 | genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); |
374 | } |
375 | } |
376 | |
377 | #ifdef DEBUG |
378 | //------------------------------------------------------------------------ |
379 | // genCheckSPOffset: Check Stack Pointer(SP) offset value, |
380 | // it must be 8 to account for alignment for the odd count |
381 | // or it must be 0 for the even count. |
382 | // |
383 | // Arguments: |
384 | // isRegsCountOdd - true if number of registers to save/restore is odd; |
385 | // spOffset - stack pointer offset value; |
386 | // slotSize - stack slot size in bytes. |
387 | // |
388 | // static |
389 | void CodeGen::genCheckSPOffset(bool isRegsCountOdd, int spOffset, int slotSize) |
390 | { |
391 | if (isRegsCountOdd) |
392 | { |
393 | // The offset must be 8 to account for alignment for the odd count. |
394 | assert(spOffset == slotSize); |
395 | } |
396 | else |
397 | { |
398 | // The offset must be 0 for the even count. |
399 | assert(spOffset == 0); |
400 | } |
401 | } |
402 | #endif // DEBUG |
403 | |
404 | //------------------------------------------------------------------------ |
405 | // genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask. |
406 | // The first register pair will contain the lowest register. Register pairs will combine neighbor |
407 | // registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with |
408 | // odd number of regs) then the second element of that RegPair will be REG_NA. |
409 | // |
410 | // Arguments: |
411 | // regsMask - a mask of registers for prolog/epilog generation; |
412 | // regStack - a regStack instance to build the stack in, used to save temp copyings. |
413 | // |
414 | // Return value: |
415 | // no return value; the regStack argument is modified. |
416 | // |
417 | // static |
418 | void CodeGen::(regMaskTP regsMask, ArrayStack<RegPair>* regStack) |
419 | { |
420 | assert(regStack != nullptr); |
421 | assert(regStack->Height() == 0); |
422 | |
423 | unsigned regsCount = genCountBits(regsMask); |
424 | |
425 | while (regsMask != RBM_NONE) |
426 | { |
427 | regMaskTP reg1Mask = genFindLowestBit(regsMask); |
428 | regNumber reg1 = genRegNumFromMask(reg1Mask); |
429 | regsMask &= ~reg1Mask; |
430 | regsCount -= 1; |
431 | |
432 | bool isPairSave = false; |
433 | if (regsCount > 0) |
434 | { |
435 | regMaskTP reg2Mask = genFindLowestBit(regsMask); |
436 | regNumber reg2 = genRegNumFromMask(reg2Mask); |
437 | if (reg2 == REG_NEXT(reg1)) |
438 | { |
439 | isPairSave = true; |
440 | |
441 | regsMask &= ~reg2Mask; |
442 | regsCount -= 1; |
443 | |
444 | regStack->Push(RegPair(reg1, reg2)); |
445 | } |
446 | } |
447 | if (!isPairSave) |
448 | { |
449 | regStack->Push(RegPair(reg1)); |
450 | } |
451 | } |
452 | assert(regsCount == 0 && regsMask == RBM_NONE); |
453 | } |
454 | |
455 | //------------------------------------------------------------------------ |
456 | // genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask. |
457 | // |
458 | // Arguments: |
459 | // regsMask - a mask of registers for prolog/epilog generation. |
460 | // |
461 | // Return value: |
462 | // stack slot size in bytes. |
463 | // |
464 | // static |
465 | int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) |
466 | { |
467 | assert((regsMask & (RBM_CALLEE_SAVED | RBM_LR)) == regsMask); // Do not expect anything else. |
468 | |
469 | bool isIntMask = ((regsMask & RBM_ALLFLOAT) == 0); |
470 | #ifdef DEBUG |
471 | bool isFloatMask = ((regsMask & RBM_ALLFLOAT) == regsMask); |
472 | // Has to be either int or float. |
473 | assert(isIntMask != isFloatMask); |
474 | #endif // DEBUG |
475 | |
476 | int slotSize = isIntMask ? REGSIZE_BYTES : FPSAVE_REGSIZE_BYTES; |
477 | return slotSize; |
478 | } |
479 | |
480 | //------------------------------------------------------------------------ |
481 | // genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask. |
482 | // All registers in the mask must be the same type (int or float). |
483 | // |
484 | // Arguments: |
485 | // regsMask - a mask of registers for prolog generation; |
486 | // spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); |
487 | // spOffset - the offset from SP that is the beginning of the callee-saved register area; |
488 | // isRegsToSaveCountOdd - (DEBUG only) true if number of registers to save is odd. |
489 | // |
490 | // Return Value: |
491 | // SP offset after saving registers from this group. |
492 | // |
493 | int CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, |
494 | int spDelta, |
495 | int spOffset DEBUGARG(bool isRegsToSaveCountOdd)) |
496 | { |
497 | const int slotSize = genGetSlotSizeForRegsInMask(regsMask); |
498 | |
499 | #ifdef DEBUG |
500 | if (spDelta != 0) // The first store change SP offset, check its value before. |
501 | { |
502 | genCheckSPOffset(isRegsToSaveCountOdd, spOffset, slotSize); |
503 | } |
504 | #endif // DEBUG |
505 | |
506 | ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen)); |
507 | genBuildRegPairsStack(regsMask, ®Stack); |
508 | |
509 | bool lastSavedWasPair = false; // currently unused, see the comment below. |
510 | for (int i = 0; i < regStack.Height(); ++i) |
511 | { |
512 | RegPair regPair = regStack.Bottom(i); |
513 | if (regPair.reg2 != REG_NA) |
514 | { |
515 | // We can use a STP instruction. |
516 | genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr); |
517 | |
518 | // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating |
519 | // this epilog, to get the codes to match. Turn this off until that is better understood. |
520 | // lastSavedWasPair = true; |
521 | |
522 | spOffset += 2 * slotSize; |
523 | } |
524 | else |
525 | { |
526 | // No register pair; we use a STR instruction. |
527 | genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr); |
528 | |
529 | lastSavedWasPair = false; |
530 | spOffset += slotSize; |
531 | } |
532 | |
533 | spDelta = 0; // We've now changed SP already, if necessary; don't do it again. |
534 | } |
535 | return spOffset; |
536 | } |
537 | |
538 | //------------------------------------------------------------------------ |
539 | // genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame |
540 | // in the function or funclet prolog. The save set does not contain FP, since that is |
541 | // guaranteed to be saved separately, so we can set up chaining. We can only use the instructions |
542 | // that are allowed by the unwind codes. Integer registers are stored at lower addresses, |
543 | // FP/SIMD registers are stored at higher addresses. The caller ensures that |
544 | // there is enough space on the frame to store these registers, and that the store instructions |
545 | // we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to |
546 | // use. The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction. Note |
547 | // that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the |
548 | // stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved |
549 | // registers, though, we will have an empty aligment slot somewhere. It turns out we will put |
550 | // it below (at a lower address) the callee-saved registers, as that is currently how we |
551 | // do frame layout. This means that the first stack offset will be 8 and the stack pointer |
552 | // adjustment must be done by a SUB, and not folded in to a pre-indexed store. |
553 | // |
554 | // Arguments: |
555 | // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. |
556 | // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that |
557 | // if non-zero spDelta, then this is the offset of the first save *after* that |
558 | // SP adjustment. |
559 | // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or |
560 | // zero). |
561 | // |
562 | // Notes: |
563 | // the save set can contain LR in which case LR is saved along with the other callee-saved registers. |
564 | // But currently Jit doesn't use frames without frame pointer on arm64. |
565 | // |
566 | void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) |
567 | { |
568 | assert(spDelta <= 0); |
569 | unsigned regsToSaveCount = genCountBits(regsToSaveMask); |
570 | if (regsToSaveCount == 0) |
571 | { |
572 | if (spDelta != 0) |
573 | { |
574 | // Currently this is the case for varargs only |
575 | // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. |
576 | genStackPointerAdjustment(spDelta, REG_NA, nullptr); |
577 | } |
578 | return; |
579 | } |
580 | |
581 | assert((spDelta % 16) == 0); |
582 | assert((regsToSaveMask & RBM_FP) == 0); // We never save FP here. |
583 | |
584 | // We also save LR, even though it is not in RBM_CALLEE_SAVED. |
585 | assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); |
586 | |
587 | #ifdef DEBUG |
588 | bool isRegsToSaveCountOdd = ((regsToSaveCount % 2) != 0); |
589 | #endif // DEBUG |
590 | |
591 | int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP. |
592 | |
593 | regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; |
594 | regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; |
595 | |
596 | bool floatSavesSp = (maskSaveRegsInt == 0); |
597 | |
598 | if (maskSaveRegsInt != 0) |
599 | { |
600 | assert(!floatSavesSp); // We always change SP only once with the first save/last load. |
601 | |
602 | // Save the integer registers. |
603 | spOffset = genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, spOffset DEBUGARG(isRegsToSaveCountOdd)); |
604 | } |
605 | |
606 | if (maskSaveRegsFloat != 0) |
607 | { |
608 | int floatSpDelta = floatSavesSp ? spDelta : 0; |
609 | |
610 | // Save the floating-point/SIMD registers |
611 | spOffset = |
612 | genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, floatSpDelta, spOffset DEBUGARG(isRegsToSaveCountOdd)); |
613 | spDelta = 0; |
614 | } |
615 | } |
616 | |
617 | //------------------------------------------------------------------------ |
618 | // genRestoreCalleeSavedRegisterGroup: Saves the group of registers described by the mask. |
619 | // All registers in the mask must be the same type (int or float). |
620 | // |
621 | // Arguments: |
622 | // regsMask - a mask of registers for epilog generation; |
623 | // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); |
624 | // spOffset - the offset from SP that is the beginning of the callee-saved register area; |
625 | // |
626 | // Return Value: |
627 | // SP offset after restoring registers from this group. |
628 | // |
629 | int CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, |
630 | int spDelta, |
631 | int spOffset DEBUGARG(bool isRegsToRestoreCountOdd)) |
632 | { |
633 | const int slotSize = genGetSlotSizeForRegsInMask(regsMask); |
634 | |
635 | ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen)); |
636 | genBuildRegPairsStack(regsMask, ®Stack); |
637 | |
638 | int stackDelta = 0; |
639 | for (int i = 0; i < regStack.Height(); ++i) |
640 | { |
641 | bool lastRestoreInTheGroup = (i == regStack.Height() - 1); |
642 | bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); |
643 | if (updateStackDelta) |
644 | { |
645 | // Update stack delta only if it is the last restore (the first save). |
646 | assert(stackDelta == 0); |
647 | stackDelta = spDelta; |
648 | } |
649 | |
650 | RegPair regPair = regStack.Index(i); |
651 | if (regPair.reg2 != REG_NA) |
652 | { |
653 | spOffset -= 2 * slotSize; |
654 | |
655 | genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, REG_IP1, nullptr); |
656 | } |
657 | else |
658 | { |
659 | spOffset -= slotSize; |
660 | genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr); |
661 | } |
662 | } |
663 | |
664 | #ifdef DEBUG |
665 | if (stackDelta != 0) // The last restore (the first save) changes SP offset, check its value after. |
666 | { |
667 | genCheckSPOffset(isRegsToRestoreCountOdd, spOffset, slotSize); |
668 | } |
669 | #endif // DEBUG |
670 | return spOffset; |
671 | } |
672 | |
673 | //------------------------------------------------------------------------ |
674 | // genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame |
675 | // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). |
676 | // |
677 | // Arguments: |
678 | // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. |
679 | // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. |
680 | // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or |
681 | // zero). |
682 | // |
683 | // Here's an example restore sequence: |
684 | // ldp x27, x28, [sp,#96] |
685 | // ldp x25, x26, [sp,#80] |
686 | // ldp x23, x24, [sp,#64] |
687 | // ldp x21, x22, [sp,#48] |
688 | // ldp x19, x20, [sp,#32] |
689 | // |
690 | // For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and |
691 | // the last restore adjusts SP by the specified amount. For example: |
692 | // ldp x27, x28, [sp,#64] |
693 | // ldp x25, x26, [sp,#48] |
694 | // ldp x23, x24, [sp,#32] |
695 | // ldp x21, x22, [sp,#16] |
696 | // ldp x19, x20, [sp], #80 |
697 | // |
698 | // Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when |
699 | // generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store. |
700 | // |
701 | // Return Value: |
702 | // None. |
703 | |
704 | void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) |
705 | { |
706 | assert(spDelta >= 0); |
707 | unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); |
708 | if (regsToRestoreCount == 0) |
709 | { |
710 | if (spDelta != 0) |
711 | { |
712 | // Currently this is the case for varargs only |
713 | // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. |
714 | genStackPointerAdjustment(spDelta, REG_NA, nullptr); |
715 | } |
716 | return; |
717 | } |
718 | |
719 | assert((spDelta % 16) == 0); |
720 | assert((regsToRestoreMask & RBM_FP) == 0); // We never restore FP here. |
721 | |
722 | // We also restore LR, even though it is not in RBM_CALLEE_SAVED. |
723 | assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); |
724 | |
725 | #ifdef DEBUG |
726 | bool isRegsToRestoreCountOdd = ((regsToRestoreCount % 2) != 0); |
727 | #endif // DEBUG |
728 | |
729 | assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); |
730 | int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We |
731 | // predecrement to find the offset to |
732 | // load from. |
733 | |
734 | // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of |
735 | // callee-saved registers properly. |
736 | |
737 | regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; |
738 | regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; |
739 | |
740 | bool floatRestoresSp = (maskRestoreRegsInt == 0); |
741 | |
742 | if (maskRestoreRegsFloat != 0) |
743 | { |
744 | int floatSpDelta = floatRestoresSp ? spDelta : 0; |
745 | // Restore the floating-point/SIMD registers |
746 | spOffset = genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, floatSpDelta, |
747 | spOffset DEBUGARG(isRegsToRestoreCountOdd)); |
748 | } |
749 | |
750 | if (maskRestoreRegsInt != 0) |
751 | { |
752 | assert(!floatRestoresSp); // We always change SP only once with the first save/last load. |
753 | |
754 | // Restore the integer registers |
755 | spOffset = |
756 | genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spDelta, spOffset DEBUGARG(isRegsToRestoreCountOdd)); |
757 | } |
758 | } |
759 | |
760 | // clang-format off |
761 | /***************************************************************************** |
762 | * |
763 | * Generates code for an EH funclet prolog. |
764 | * |
765 | * Funclets have the following incoming arguments: |
766 | * |
767 | * catch: x0 = the exception object that was caught (see GT_CATCH_ARG) |
768 | * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function |
769 | * finally/fault: none |
770 | * |
771 | * Funclets set the following registers on exit: |
772 | * |
773 | * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET) |
774 | * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) |
775 | * finally/fault: none |
776 | * |
777 | * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size, |
778 | * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): |
779 | * |
780 | * Frame type 1: |
781 | * For #outsz == 0 and #framesz <= 512: |
782 | * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR |
783 | * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary |
784 | * |
785 | * The funclet frame is thus: |
786 | * |
787 | * | | |
788 | * |-----------------------| |
789 | * | incoming | |
790 | * | arguments | |
791 | * +=======================+ <---- Caller's SP |
792 | * |Callee saved registers | // multiple of 8 bytes |
793 | * |-----------------------| |
794 | * | PSP slot | // 8 bytes (omitted in CoreRT ABI) |
795 | * |-----------------------| |
796 | * ~ alignment padding ~ // To make the whole frame 16 byte aligned. |
797 | * |-----------------------| |
798 | * | Saved FP, LR | // 16 bytes |
799 | * |-----------------------| <---- Ambient SP |
800 | * | | | |
801 | * ~ | Stack grows ~ |
802 | * | | downward | |
803 | * V |
804 | * |
805 | * Frame type 2: |
806 | * For #outsz != 0 and #framesz <= 512: |
807 | * sub sp,sp,#framesz ; establish the frame |
808 | * stp fp,lr,[sp,#outsz] ; save FP/LR. |
809 | * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary |
810 | * |
811 | * The funclet frame is thus: |
812 | * |
813 | * | | |
814 | * |-----------------------| |
815 | * | incoming | |
816 | * | arguments | |
817 | * +=======================+ <---- Caller's SP |
818 | * |Callee saved registers | // multiple of 8 bytes |
819 | * |-----------------------| |
820 | * | PSP slot | // 8 bytes (omitted in CoreRT ABI) |
821 | * |-----------------------| |
822 | * ~ alignment padding ~ // To make the whole frame 16 byte aligned. |
823 | * |-----------------------| |
824 | * | Saved FP, LR | // 16 bytes |
825 | * |-----------------------| |
826 | * | Outgoing arg space | // multiple of 8 bytes |
827 | * |-----------------------| <---- Ambient SP |
828 | * | | | |
829 | * ~ | Stack grows ~ |
830 | * | | downward | |
831 | * V |
832 | * |
833 | * Frame type 3: |
834 | * For #framesz > 512: |
835 | * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168 |
836 | * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary |
837 | * sub sp,sp,#outsz ; create space for outgoing argument space |
838 | * |
839 | * The funclet frame is thus: |
840 | * |
841 | * | | |
842 | * |-----------------------| |
843 | * | incoming | |
844 | * | arguments | |
845 | * +=======================+ <---- Caller's SP |
846 | * |Callee saved registers | // multiple of 8 bytes |
847 | * |-----------------------| |
848 | * | PSP slot | // 8 bytes (omitted in CoreRT ABI) |
849 | * |-----------------------| |
850 | * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned |
851 | * |-----------------------| |
852 | * | Saved FP, LR | // 16 bytes |
853 | * |-----------------------| |
854 | * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space). |
855 | * |-----------------------| |
856 | * | Outgoing arg space | // multiple of 8 bytes |
857 | * |-----------------------| <---- Ambient SP |
858 | * | | | |
859 | * ~ | Stack grows ~ |
860 | * | | downward | |
861 | * V |
862 | * |
863 | * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, |
864 | * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack |
865 | * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes: |
866 | * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a |
867 | * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main |
868 | * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call). |
869 | * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym, |
870 | * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the |
871 | * outgoing argument space. Both changes to SP might need to add alignment padding. |
872 | * |
873 | * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP |
874 | * as in the main function. |
875 | * |
876 | * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. |
877 | * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. |
878 | * |
879 | * if (this is a filter funclet) |
880 | * { |
881 | * // x1 on entry to a filter funclet is CallerSP of the containing function: |
882 | * // either the main function, or the funclet for a handler that this filter is dynamically nested within. |
883 | * // Note that a filter can be dynamically nested within a funclet even if it is not statically within |
884 | * // a funclet. Consider: |
885 | * // |
886 | * // try { |
887 | * // try { |
888 | * // throw new Exception(); |
889 | * // } catch(Exception) { |
890 | * // throw new Exception(); // The exception thrown here ... |
891 | * // } |
892 | * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack |
893 | * // } filter-handler { |
894 | * // } |
895 | * // |
896 | * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will |
897 | * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always |
898 | * // create a main function PSP for any function with a filter. |
899 | * |
900 | * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) |
901 | * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP |
902 | * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer |
903 | * } |
904 | * else |
905 | * { |
906 | * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. |
907 | * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. |
908 | * |
909 | * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch. |
910 | * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP |
911 | * } |
912 | * |
913 | * An example epilog sequence is then: |
914 | * |
915 | * add sp,sp,#outsz ; if any outgoing argument space |
916 | * ... ; restore callee-saved registers |
917 | * ldp x19,x20,[sp,#xxx] |
918 | * ldp fp,lr,[sp],#framesz |
919 | * ret lr |
920 | * |
921 | * The funclet frame is thus: |
922 | * |
923 | * | | |
924 | * |-----------------------| |
925 | * | incoming | |
926 | * | arguments | |
927 | * +=======================+ <---- Caller's SP |
928 | * |Callee saved registers | // multiple of 8 bytes |
929 | * |-----------------------| |
930 | * | PSP slot | // 8 bytes (omitted in CoreRT ABI) |
931 | * |-----------------------| |
932 | * | Saved FP, LR | // 16 bytes |
933 | * |-----------------------| |
934 | * ~ alignment padding ~ // To make the whole frame 16 byte aligned. |
935 | * |-----------------------| |
936 | * | Outgoing arg space | // multiple of 8 bytes |
937 | * |-----------------------| <---- Ambient SP |
938 | * | | | |
939 | * ~ | Stack grows ~ |
940 | * | | downward | |
941 | * V |
942 | */ |
943 | // clang-format on |
944 | |
945 | void CodeGen::genFuncletProlog(BasicBlock* block) |
946 | { |
947 | #ifdef DEBUG |
948 | if (verbose) |
949 | printf("*************** In genFuncletProlog()\n" ); |
950 | #endif |
951 | |
952 | assert(block != NULL); |
953 | assert(block->bbFlags & BBF_FUNCLET_BEG); |
954 | |
955 | ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); |
956 | |
957 | gcInfo.gcResetForBB(); |
958 | |
959 | compiler->unwindBegProlog(); |
960 | |
961 | regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; |
962 | regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; |
963 | |
964 | // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. |
965 | assert((maskSaveRegsInt & RBM_LR) != 0); |
966 | assert((maskSaveRegsInt & RBM_FP) != 0); |
967 | |
968 | bool isFilter = (block->bbCatchTyp == BBCT_FILTER); |
969 | |
970 | regMaskTP maskArgRegsLiveIn; |
971 | if (isFilter) |
972 | { |
973 | maskArgRegsLiveIn = RBM_R0 | RBM_R1; |
974 | } |
975 | else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) |
976 | { |
977 | maskArgRegsLiveIn = RBM_NONE; |
978 | } |
979 | else |
980 | { |
981 | maskArgRegsLiveIn = RBM_R0; |
982 | } |
983 | |
984 | int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; |
985 | |
986 | if (genFuncletInfo.fiFrameType == 1) |
987 | { |
988 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, |
989 | INS_OPTS_PRE_INDEX); |
990 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); |
991 | |
992 | assert(genFuncletInfo.fiSpDelta2 == 0); |
993 | assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); |
994 | } |
995 | else if (genFuncletInfo.fiFrameType == 2) |
996 | { |
997 | // fiFrameType==2 constraints: |
998 | assert(genFuncletInfo.fiSpDelta1 < 0); |
999 | assert(genFuncletInfo.fiSpDelta1 >= -512); |
1000 | |
1001 | // generate sub SP,SP,imm |
1002 | genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr); |
1003 | |
1004 | assert(genFuncletInfo.fiSpDelta2 == 0); |
1005 | |
1006 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, |
1007 | genFuncletInfo.fiSP_to_FPLR_save_delta); |
1008 | compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); |
1009 | } |
1010 | else |
1011 | { |
1012 | assert(genFuncletInfo.fiFrameType == 3); |
1013 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, |
1014 | INS_OPTS_PRE_INDEX); |
1015 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); |
1016 | |
1017 | lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet. |
1018 | } |
1019 | maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now |
1020 | |
1021 | genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0); |
1022 | |
1023 | if (genFuncletInfo.fiFrameType == 3) |
1024 | { |
1025 | // Note that genFuncletInfo.fiSpDelta2 is always a negative value |
1026 | assert(genFuncletInfo.fiSpDelta2 < 0); |
1027 | |
1028 | // generate sub SP,SP,imm |
1029 | genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr); |
1030 | } |
1031 | |
1032 | // This is the end of the OS-reported prolog for purposes of unwinding |
1033 | compiler->unwindEndProlog(); |
1034 | |
1035 | // If there is no PSPSym (CoreRT ABI), we are done. |
1036 | if (compiler->lvaPSPSym == BAD_VAR_NUM) |
1037 | { |
1038 | return; |
1039 | } |
1040 | |
1041 | if (isFilter) |
1042 | { |
1043 | // This is the first block of a filter |
1044 | // Note that register x1 = CallerSP of the containing function |
1045 | // X1 is overwritten by the first Load (new callerSP) |
1046 | // X2 is scratch when we have a large constant offset |
1047 | |
1048 | // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) |
1049 | genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, |
1050 | genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false); |
1051 | regSet.verifyRegUsed(REG_R1); |
1052 | |
1053 | // Store the PSP value (aka CallerSP) |
1054 | genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, |
1055 | genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); |
1056 | |
1057 | // re-establish the frame pointer |
1058 | genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta, |
1059 | REG_R2, false); |
1060 | } |
1061 | else // This is a non-filter funclet |
1062 | { |
1063 | // X3 is scratch, X2 can also become scratch |
1064 | |
1065 | // compute the CallerSP, given the frame pointer. x3 is scratch. |
1066 | genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, |
1067 | REG_R2, false); |
1068 | regSet.verifyRegUsed(REG_R3); |
1069 | |
1070 | genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, |
1071 | genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); |
1072 | } |
1073 | } |
1074 | |
1075 | /***************************************************************************** |
1076 | * |
1077 | * Generates code for an EH funclet epilog. |
1078 | */ |
1079 | |
1080 | void CodeGen::genFuncletEpilog() |
1081 | { |
1082 | #ifdef DEBUG |
1083 | if (verbose) |
1084 | printf("*************** In genFuncletEpilog()\n" ); |
1085 | #endif |
1086 | |
1087 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
1088 | |
1089 | bool unwindStarted = false; |
1090 | |
1091 | if (!unwindStarted) |
1092 | { |
1093 | // We can delay this until we know we'll generate an unwindable instruction, if necessary. |
1094 | compiler->unwindBegEpilog(); |
1095 | unwindStarted = true; |
1096 | } |
1097 | |
1098 | regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; |
1099 | regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; |
1100 | |
1101 | // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. |
1102 | assert((maskRestoreRegsInt & RBM_LR) != 0); |
1103 | assert((maskRestoreRegsInt & RBM_FP) != 0); |
1104 | |
1105 | maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end |
1106 | |
1107 | int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; |
1108 | |
1109 | if (genFuncletInfo.fiFrameType == 3) |
1110 | { |
1111 | // Note that genFuncletInfo.fiSpDelta2 is always a negative value |
1112 | assert(genFuncletInfo.fiSpDelta2 < 0); |
1113 | |
1114 | // generate add SP,SP,imm |
1115 | genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr); |
1116 | |
1117 | lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; |
1118 | } |
1119 | |
1120 | regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; |
1121 | genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); |
1122 | |
1123 | if (genFuncletInfo.fiFrameType == 1) |
1124 | { |
1125 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, |
1126 | INS_OPTS_POST_INDEX); |
1127 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); |
1128 | |
1129 | assert(genFuncletInfo.fiSpDelta2 == 0); |
1130 | assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); |
1131 | } |
1132 | else if (genFuncletInfo.fiFrameType == 2) |
1133 | { |
1134 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, |
1135 | genFuncletInfo.fiSP_to_FPLR_save_delta); |
1136 | compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); |
1137 | |
1138 | // fiFrameType==2 constraints: |
1139 | assert(genFuncletInfo.fiSpDelta1 < 0); |
1140 | assert(genFuncletInfo.fiSpDelta1 >= -512); |
1141 | |
1142 | // generate add SP,SP,imm |
1143 | genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr); |
1144 | |
1145 | assert(genFuncletInfo.fiSpDelta2 == 0); |
1146 | } |
1147 | else |
1148 | { |
1149 | assert(genFuncletInfo.fiFrameType == 3); |
1150 | |
1151 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, |
1152 | INS_OPTS_POST_INDEX); |
1153 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); |
1154 | } |
1155 | |
1156 | inst_RV(INS_ret, REG_LR, TYP_I_IMPL); |
1157 | compiler->unwindReturn(REG_LR); |
1158 | |
1159 | compiler->unwindEndEpilog(); |
1160 | } |
1161 | |
1162 | /***************************************************************************** |
1163 | * |
1164 | * Capture the information used to generate the funclet prologs and epilogs. |
1165 | * Note that all funclet prologs are identical, and all funclet epilogs are |
1166 | * identical (per type: filters are identical, and non-filters are identical). |
1167 | * Thus, we compute the data used for these just once. |
1168 | * |
1169 | * See genFuncletProlog() for more information about the prolog/epilog sequences. |
1170 | */ |
1171 | |
1172 | void CodeGen::genCaptureFuncletPrologEpilogInfo() |
1173 | { |
1174 | if (!compiler->ehAnyFunclets()) |
1175 | return; |
1176 | |
1177 | assert(isFramePointerUsed()); |
1178 | assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be |
1179 | // finalized |
1180 | |
1181 | genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); |
1182 | |
1183 | regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; |
1184 | assert((rsMaskSaveRegs & RBM_LR) != 0); |
1185 | assert((rsMaskSaveRegs & RBM_FP) != 0); |
1186 | |
1187 | unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; |
1188 | |
1189 | unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); |
1190 | unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; |
1191 | if (compiler->info.compIsVarArgs) |
1192 | { |
1193 | // For varargs we always save all of the integer register arguments |
1194 | // so that they are contiguous with the incoming stack arguments. |
1195 | saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; |
1196 | } |
1197 | unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); |
1198 | |
1199 | assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); |
1200 | unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); |
1201 | |
1202 | unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; |
1203 | assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); |
1204 | |
1205 | int SP_to_FPLR_save_delta; |
1206 | int SP_to_PSP_slot_delta; |
1207 | int CallerSP_to_PSP_slot_delta; |
1208 | |
1209 | if (maxFuncletFrameSizeAligned <= 512) |
1210 | { |
1211 | unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; |
1212 | unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); |
1213 | assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); |
1214 | |
1215 | unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; |
1216 | assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); |
1217 | |
1218 | SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize; |
1219 | SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad; |
1220 | CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES); |
1221 | |
1222 | if (compiler->lvaOutgoingArgSpaceSize == 0) |
1223 | { |
1224 | genFuncletInfo.fiFrameType = 1; |
1225 | } |
1226 | else |
1227 | { |
1228 | genFuncletInfo.fiFrameType = 2; |
1229 | } |
1230 | genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; |
1231 | genFuncletInfo.fiSpDelta2 = 0; |
1232 | |
1233 | assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned); |
1234 | } |
1235 | else |
1236 | { |
1237 | unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; |
1238 | assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); |
1239 | |
1240 | SP_to_FPLR_save_delta = outgoingArgSpaceAligned; |
1241 | SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad; |
1242 | CallerSP_to_PSP_slot_delta = |
1243 | -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad); |
1244 | |
1245 | genFuncletInfo.fiFrameType = 3; |
1246 | genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned; |
1247 | genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned; |
1248 | |
1249 | assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned); |
1250 | } |
1251 | |
1252 | /* Now save it for future use */ |
1253 | |
1254 | genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; |
1255 | genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; |
1256 | genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; |
1257 | genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES; |
1258 | genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; |
1259 | |
1260 | #ifdef DEBUG |
1261 | if (verbose) |
1262 | { |
1263 | printf("\n" ); |
1264 | printf("Funclet prolog / epilog info\n" ); |
1265 | printf(" Save regs: " ); |
1266 | dspRegMask(genFuncletInfo.fiSaveRegs); |
1267 | printf("\n" ); |
1268 | printf(" Function CallerSP-to-FP delta: %d\n" , genFuncletInfo.fiFunction_CallerSP_to_FP_delta); |
1269 | printf(" SP to FP/LR save location delta: %d\n" , genFuncletInfo.fiSP_to_FPLR_save_delta); |
1270 | printf(" SP to PSP slot delta: %d\n" , genFuncletInfo.fiSP_to_PSP_slot_delta); |
1271 | printf(" SP to callee-saved area delta: %d\n" , genFuncletInfo.fiSP_to_CalleeSave_delta); |
1272 | printf(" Caller SP to PSP slot delta: %d\n" , genFuncletInfo.fiCallerSP_to_PSP_slot_delta); |
1273 | printf(" Frame type: %d\n" , genFuncletInfo.fiFrameType); |
1274 | printf(" SP delta 1: %d\n" , genFuncletInfo.fiSpDelta1); |
1275 | printf(" SP delta 2: %d\n" , genFuncletInfo.fiSpDelta2); |
1276 | |
1277 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
1278 | { |
1279 | if (CallerSP_to_PSP_slot_delta != |
1280 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging |
1281 | { |
1282 | printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n" , |
1283 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); |
1284 | } |
1285 | } |
1286 | } |
1287 | |
1288 | assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); |
1289 | assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); |
1290 | assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); |
1291 | assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); |
1292 | |
1293 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
1294 | { |
1295 | assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == |
1296 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and |
1297 | // funclet! |
1298 | } |
1299 | #endif // DEBUG |
1300 | } |
1301 | |
1302 | /* |
1303 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
1304 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
1305 | XX XX |
1306 | XX End Prolog / Epilog XX |
1307 | XX XX |
1308 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
1309 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
1310 | */ |
1311 | |
1312 | BasicBlock* CodeGen::genCallFinally(BasicBlock* block) |
1313 | { |
1314 | // Generate a call to the finally, like this: |
1315 | // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used |
1316 | // bl finally-funclet |
1317 | // b finally-return // Only for non-retless finally calls |
1318 | // The 'b' can be a NOP if we're going to the next block. |
1319 | |
1320 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
1321 | { |
1322 | getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); |
1323 | } |
1324 | else |
1325 | { |
1326 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE); |
1327 | } |
1328 | getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); |
1329 | |
1330 | if (block->bbFlags & BBF_RETLESS_CALL) |
1331 | { |
1332 | // We have a retless call, and the last instruction generated was a call. |
1333 | // If the next block is in a different EH region (or is the end of the code |
1334 | // block), then we need to generate a breakpoint here (since it will never |
1335 | // get executed) to get proper unwind behavior. |
1336 | |
1337 | if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) |
1338 | { |
1339 | instGen(INS_bkpt); // This should never get executed |
1340 | } |
1341 | } |
1342 | else |
1343 | { |
1344 | // Because of the way the flowgraph is connected, the liveness info for this one instruction |
1345 | // after the call is not (can not be) correct in cases where a variable has a last use in the |
1346 | // handler. So turn off GC reporting for this single instruction. |
1347 | getEmitter()->emitDisableGC(); |
1348 | |
1349 | // Now go to where the finally funclet needs to return to. |
1350 | if (block->bbNext->bbJumpDest == block->bbNext->bbNext) |
1351 | { |
1352 | // Fall-through. |
1353 | // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly |
1354 | // to the next instruction? This would depend on stack walking from within the finally |
1355 | // handler working without this instruction being in this special EH region. |
1356 | instGen(INS_nop); |
1357 | } |
1358 | else |
1359 | { |
1360 | inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); |
1361 | } |
1362 | |
1363 | getEmitter()->emitEnableGC(); |
1364 | } |
1365 | |
1366 | // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the |
1367 | // jump target using bbJumpDest - that is already used to point |
1368 | // to the finally block. So just skip past the BBJ_ALWAYS unless the |
1369 | // block is RETLESS. |
1370 | if (!(block->bbFlags & BBF_RETLESS_CALL)) |
1371 | { |
1372 | assert(block->isBBCallAlwaysPair()); |
1373 | block = block->bbNext; |
1374 | } |
1375 | return block; |
1376 | } |
1377 | |
1378 | void CodeGen::genEHCatchRet(BasicBlock* block) |
1379 | { |
1380 | // For long address (default): `adrp + add` will be emitted. |
1381 | // For short address (proven later): `adr` will be emitted. |
1382 | getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); |
1383 | } |
1384 | |
1385 | // move an immediate value into an integer register |
1386 | |
1387 | void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) |
1388 | { |
1389 | // reg cannot be a FP register |
1390 | assert(!genIsValidFloatReg(reg)); |
1391 | if (!compiler->opts.compReloc) |
1392 | { |
1393 | size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs |
1394 | } |
1395 | |
1396 | if (EA_IS_RELOC(size)) |
1397 | { |
1398 | // This emits a pair of adrp/add (two instructions) with fix-ups. |
1399 | getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm); |
1400 | } |
1401 | else if (imm == 0) |
1402 | { |
1403 | instGen_Set_Reg_To_Zero(size, reg, flags); |
1404 | } |
1405 | else |
1406 | { |
1407 | if (emitter::emitIns_valid_imm_for_mov(imm, size)) |
1408 | { |
1409 | getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); |
1410 | } |
1411 | else |
1412 | { |
1413 | // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword |
1414 | // There are three forms |
1415 | // movk which loads into any halfword preserving the remaining halfwords |
1416 | // movz which loads into any halfword zeroing the remaining halfwords |
1417 | // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register |
1418 | // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords |
1419 | // with ones |
1420 | |
1421 | // Determine whether movn or movz will require the fewest instructions to populate the immediate |
1422 | int preferMovn = 0; |
1423 | |
1424 | for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16) |
1425 | { |
1426 | if (uint16_t(imm >> i) == 0xffff) |
1427 | ++preferMovn; // a single movk 0xffff could be skipped if movn was used |
1428 | else if (uint16_t(imm >> i) == 0x0000) |
1429 | --preferMovn; // a single movk 0 could be skipped if movz was used |
1430 | } |
1431 | |
1432 | // Select the first instruction. Any additional instruction will use movk |
1433 | instruction ins = (preferMovn > 0) ? INS_movn : INS_movz; |
1434 | |
1435 | // Initial movz or movn will fill the remaining bytes with the skipVal |
1436 | // This can allow skipping filling a halfword |
1437 | uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0; |
1438 | |
1439 | unsigned bits = (size == EA_8BYTE) ? 64 : 32; |
1440 | |
1441 | // Iterate over imm examining 16 bits at a time |
1442 | for (unsigned i = 0; i < bits; i += 16) |
1443 | { |
1444 | uint16_t imm16 = uint16_t(imm >> i); |
1445 | |
1446 | if (imm16 != skipVal) |
1447 | { |
1448 | if (ins == INS_movn) |
1449 | { |
1450 | // For the movn case, we need to bitwise invert the immediate. This is because |
1451 | // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000) |
1452 | imm16 = ~imm16; |
1453 | } |
1454 | |
1455 | getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL); |
1456 | |
1457 | // Once the initial movz/movn is emitted the remaining instructions will all use movk |
1458 | ins = INS_movk; |
1459 | } |
1460 | } |
1461 | |
1462 | // We must emit a movn or movz or we have not done anything |
1463 | // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and |
1464 | // should not be in this else condition |
1465 | assert(ins == INS_movk); |
1466 | } |
1467 | // The caller may have requested that the flags be set on this mov (rarely/never) |
1468 | if (flags == INS_FLAGS_SET) |
1469 | { |
1470 | getEmitter()->emitIns_R_I(INS_tst, size, reg, 0); |
1471 | } |
1472 | } |
1473 | |
1474 | regSet.verifyRegUsed(reg); |
1475 | } |
1476 | |
1477 | /*********************************************************************************** |
1478 | * |
1479 | * Generate code to set a register 'targetReg' of type 'targetType' to the constant |
1480 | * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call |
1481 | * genProduceReg() on the target register. |
1482 | */ |
1483 | void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) |
1484 | { |
1485 | switch (tree->gtOper) |
1486 | { |
1487 | case GT_CNS_INT: |
1488 | { |
1489 | // relocatable values tend to come down as a CNS_INT of native int type |
1490 | // so the line between these two opcodes is kind of blurry |
1491 | GenTreeIntConCommon* con = tree->AsIntConCommon(); |
1492 | ssize_t cnsVal = con->IconValue(); |
1493 | |
1494 | if (con->ImmedValNeedsReloc(compiler)) |
1495 | { |
1496 | instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); |
1497 | regSet.verifyRegUsed(targetReg); |
1498 | } |
1499 | else |
1500 | { |
1501 | genSetRegToIcon(targetReg, cnsVal, targetType); |
1502 | } |
1503 | } |
1504 | break; |
1505 | |
1506 | case GT_CNS_DBL: |
1507 | { |
1508 | emitter* emit = getEmitter(); |
1509 | emitAttr size = emitActualTypeSize(tree); |
1510 | double constValue = tree->AsDblCon()->gtDconVal; |
1511 | |
1512 | // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0) |
1513 | if (*(__int64*)&constValue == 0) |
1514 | { |
1515 | // A faster/smaller way to generate 0.0 |
1516 | // We will just zero out the entire vector register for both float and double |
1517 | emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B); |
1518 | } |
1519 | else if (emitter::emitIns_valid_imm_for_fmov(constValue)) |
1520 | { |
1521 | // We can load the FP constant using the fmov FP-immediate for this constValue |
1522 | emit->emitIns_R_F(INS_fmov, size, targetReg, constValue); |
1523 | } |
1524 | else |
1525 | { |
1526 | // Get a temp integer register to compute long address. |
1527 | regNumber addrReg = tree->GetSingleTempReg(); |
1528 | |
1529 | // We must load the FP constant from the constant pool |
1530 | // Emit a data section constant for the float or double constant. |
1531 | CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); |
1532 | // For long address (default): `adrp + ldr + fmov` will be emitted. |
1533 | // For short address (proven later), `ldr` will be emitted. |
1534 | emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0); |
1535 | } |
1536 | } |
1537 | break; |
1538 | |
1539 | default: |
1540 | unreached(); |
1541 | } |
1542 | } |
1543 | |
1544 | // Generate code to get the high N bits of a N*N=2N bit multiplication result |
1545 | void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) |
1546 | { |
1547 | assert(!treeNode->gtOverflowEx()); |
1548 | |
1549 | genConsumeOperands(treeNode); |
1550 | |
1551 | regNumber targetReg = treeNode->gtRegNum; |
1552 | var_types targetType = treeNode->TypeGet(); |
1553 | emitter* emit = getEmitter(); |
1554 | emitAttr attr = emitActualTypeSize(treeNode); |
1555 | unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED); |
1556 | |
1557 | GenTree* op1 = treeNode->gtGetOp1(); |
1558 | GenTree* op2 = treeNode->gtGetOp2(); |
1559 | |
1560 | assert(!varTypeIsFloating(targetType)); |
1561 | |
1562 | // The arithmetic node must be sitting in a register (since it's not contained) |
1563 | assert(targetReg != REG_NA); |
1564 | |
1565 | if (EA_SIZE(attr) == EA_8BYTE) |
1566 | { |
1567 | instruction ins = isUnsigned ? INS_umulh : INS_smulh; |
1568 | |
1569 | regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2); |
1570 | |
1571 | assert(r == targetReg); |
1572 | } |
1573 | else |
1574 | { |
1575 | assert(EA_SIZE(attr) == EA_4BYTE); |
1576 | |
1577 | instruction ins = isUnsigned ? INS_umull : INS_smull; |
1578 | |
1579 | regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2); |
1580 | |
1581 | emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32); |
1582 | } |
1583 | |
1584 | genProduceReg(treeNode); |
1585 | } |
1586 | |
1587 | // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR |
1588 | // This method is expected to have called genConsumeOperands() before calling it. |
1589 | void CodeGen::genCodeForBinary(GenTreeOp* treeNode) |
1590 | { |
1591 | const genTreeOps oper = treeNode->OperGet(); |
1592 | regNumber targetReg = treeNode->gtRegNum; |
1593 | var_types targetType = treeNode->TypeGet(); |
1594 | emitter* emit = getEmitter(); |
1595 | |
1596 | assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND || |
1597 | oper == GT_OR || oper == GT_XOR); |
1598 | |
1599 | GenTree* op1 = treeNode->gtGetOp1(); |
1600 | GenTree* op2 = treeNode->gtGetOp2(); |
1601 | instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); |
1602 | |
1603 | if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0) |
1604 | { |
1605 | switch (oper) |
1606 | { |
1607 | case GT_ADD: |
1608 | ins = INS_adds; |
1609 | break; |
1610 | case GT_SUB: |
1611 | ins = INS_subs; |
1612 | break; |
1613 | case GT_AND: |
1614 | ins = INS_ands; |
1615 | break; |
1616 | default: |
1617 | noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set" ); |
1618 | } |
1619 | } |
1620 | |
1621 | // The arithmetic node must be sitting in a register (since it's not contained) |
1622 | assert(targetReg != REG_NA); |
1623 | |
1624 | regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); |
1625 | assert(r == targetReg); |
1626 | |
1627 | genProduceReg(treeNode); |
1628 | } |
1629 | |
1630 | //------------------------------------------------------------------------ |
1631 | // genCodeForLclVar: Produce code for a GT_LCL_VAR node. |
1632 | // |
1633 | // Arguments: |
1634 | // tree - the GT_LCL_VAR node |
1635 | // |
1636 | void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) |
1637 | { |
1638 | var_types targetType = tree->TypeGet(); |
1639 | emitter* emit = getEmitter(); |
1640 | |
1641 | unsigned varNum = tree->gtLclNum; |
1642 | assert(varNum < compiler->lvaCount); |
1643 | LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); |
1644 | bool isRegCandidate = varDsc->lvIsRegCandidate(); |
1645 | |
1646 | // lcl_vars are not defs |
1647 | assert((tree->gtFlags & GTF_VAR_DEF) == 0); |
1648 | |
1649 | // If this is a register candidate that has been spilled, genConsumeReg() will |
1650 | // reload it at the point of use. Otherwise, if it's not in a register, we load it here. |
1651 | |
1652 | if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) |
1653 | { |
1654 | // targetType must be a normal scalar type and not a TYP_STRUCT |
1655 | assert(targetType != TYP_STRUCT); |
1656 | |
1657 | instruction ins = ins_Load(targetType); |
1658 | emitAttr attr = emitTypeSize(targetType); |
1659 | |
1660 | attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); |
1661 | |
1662 | emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0); |
1663 | genProduceReg(tree); |
1664 | } |
1665 | } |
1666 | |
1667 | //------------------------------------------------------------------------ |
1668 | // genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. |
1669 | // |
1670 | // Arguments: |
1671 | // tree - the GT_STORE_LCL_FLD node |
1672 | // |
1673 | void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) |
1674 | { |
1675 | var_types targetType = tree->TypeGet(); |
1676 | regNumber targetReg = tree->gtRegNum; |
1677 | emitter* emit = getEmitter(); |
1678 | noway_assert(targetType != TYP_STRUCT); |
1679 | |
1680 | #ifdef FEATURE_SIMD |
1681 | // storing of TYP_SIMD12 (i.e. Vector3) field |
1682 | if (tree->TypeGet() == TYP_SIMD12) |
1683 | { |
1684 | genStoreLclTypeSIMD12(tree); |
1685 | return; |
1686 | } |
1687 | #endif // FEATURE_SIMD |
1688 | |
1689 | // record the offset |
1690 | unsigned offset = tree->gtLclOffs; |
1691 | |
1692 | // We must have a stack store with GT_STORE_LCL_FLD |
1693 | noway_assert(targetReg == REG_NA); |
1694 | |
1695 | unsigned varNum = tree->gtLclNum; |
1696 | assert(varNum < compiler->lvaCount); |
1697 | LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); |
1698 | |
1699 | // Ensure that lclVar nodes are typed correctly. |
1700 | assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); |
1701 | |
1702 | GenTree* data = tree->gtOp1; |
1703 | genConsumeRegs(data); |
1704 | |
1705 | regNumber dataReg = REG_NA; |
1706 | if (data->isContainedIntOrIImmed()) |
1707 | { |
1708 | assert(data->IsIntegralConst(0)); |
1709 | dataReg = REG_ZR; |
1710 | } |
1711 | else |
1712 | { |
1713 | assert(!data->isContained()); |
1714 | dataReg = data->gtRegNum; |
1715 | } |
1716 | assert(dataReg != REG_NA); |
1717 | |
1718 | instruction ins = ins_Store(targetType); |
1719 | |
1720 | emitAttr attr = emitTypeSize(targetType); |
1721 | |
1722 | attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); |
1723 | |
1724 | emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); |
1725 | |
1726 | genUpdateLife(tree); |
1727 | |
1728 | varDsc->lvRegNum = REG_STK; |
1729 | } |
1730 | |
1731 | //------------------------------------------------------------------------ |
1732 | // genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. |
1733 | // |
1734 | // Arguments: |
1735 | // tree - the GT_STORE_LCL_VAR node |
1736 | // |
1737 | void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) |
1738 | { |
1739 | var_types targetType = tree->TypeGet(); |
1740 | regNumber targetReg = tree->gtRegNum; |
1741 | emitter* emit = getEmitter(); |
1742 | |
1743 | unsigned varNum = tree->gtLclNum; |
1744 | assert(varNum < compiler->lvaCount); |
1745 | LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); |
1746 | |
1747 | // Ensure that lclVar nodes are typed correctly. |
1748 | assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); |
1749 | |
1750 | GenTree* data = tree->gtOp1; |
1751 | |
1752 | // var = call, where call returns a multi-reg return value |
1753 | // case is handled separately. |
1754 | if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) |
1755 | { |
1756 | genMultiRegCallStoreToLocal(tree); |
1757 | } |
1758 | else |
1759 | { |
1760 | #ifdef FEATURE_SIMD |
1761 | // storing of TYP_SIMD12 (i.e. Vector3) field |
1762 | if (tree->TypeGet() == TYP_SIMD12) |
1763 | { |
1764 | genStoreLclTypeSIMD12(tree); |
1765 | return; |
1766 | } |
1767 | #endif // FEATURE_SIMD |
1768 | |
1769 | genConsumeRegs(data); |
1770 | |
1771 | regNumber dataReg = REG_NA; |
1772 | if (data->isContainedIntOrIImmed()) |
1773 | { |
1774 | // This is only possible for a zero-init. |
1775 | assert(data->IsIntegralConst(0)); |
1776 | |
1777 | if (varTypeIsSIMD(targetType)) |
1778 | { |
1779 | assert(targetReg != REG_NA); |
1780 | getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B); |
1781 | genProduceReg(tree); |
1782 | return; |
1783 | } |
1784 | |
1785 | dataReg = REG_ZR; |
1786 | } |
1787 | else |
1788 | { |
1789 | assert(!data->isContained()); |
1790 | dataReg = data->gtRegNum; |
1791 | } |
1792 | assert(dataReg != REG_NA); |
1793 | |
1794 | if (targetReg == REG_NA) // store into stack based LclVar |
1795 | { |
1796 | inst_set_SV_var(tree); |
1797 | |
1798 | instruction ins = ins_Store(targetType); |
1799 | emitAttr attr = emitTypeSize(targetType); |
1800 | |
1801 | attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); |
1802 | |
1803 | emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); |
1804 | |
1805 | genUpdateLife(tree); |
1806 | |
1807 | varDsc->lvRegNum = REG_STK; |
1808 | } |
1809 | else // store into register (i.e move into register) |
1810 | { |
1811 | if (dataReg != targetReg) |
1812 | { |
1813 | // Assign into targetReg when dataReg (from op1) is not the same register |
1814 | inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); |
1815 | } |
1816 | genProduceReg(tree); |
1817 | } |
1818 | } |
1819 | } |
1820 | |
1821 | //------------------------------------------------------------------------ |
1822 | // genSimpleReturn: Generates code for simple return statement for arm64. |
1823 | // |
1824 | // Note: treeNode's and op1's registers are already consumed. |
1825 | // |
1826 | // Arguments: |
1827 | // treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type |
1828 | // |
1829 | // Return Value: |
1830 | // None |
1831 | // |
1832 | void CodeGen::genSimpleReturn(GenTree* treeNode) |
1833 | { |
1834 | assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); |
1835 | GenTree* op1 = treeNode->gtGetOp1(); |
1836 | var_types targetType = treeNode->TypeGet(); |
1837 | |
1838 | assert(!isStructReturn(treeNode)); |
1839 | assert(targetType != TYP_VOID); |
1840 | |
1841 | regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; |
1842 | |
1843 | bool movRequired = (op1->gtRegNum != retReg); |
1844 | |
1845 | if (!movRequired) |
1846 | { |
1847 | if (op1->OperGet() == GT_LCL_VAR) |
1848 | { |
1849 | GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); |
1850 | bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); |
1851 | if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) |
1852 | { |
1853 | // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR |
1854 | |
1855 | unsigned lclNum = lcl->gtLclNum; |
1856 | LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); |
1857 | var_types op1Type = genActualType(op1->TypeGet()); |
1858 | var_types lclType = genActualType(varDsc->TypeGet()); |
1859 | |
1860 | if (genTypeSize(op1Type) < genTypeSize(lclType)) |
1861 | { |
1862 | movRequired = true; |
1863 | } |
1864 | } |
1865 | } |
1866 | } |
1867 | if (movRequired) |
1868 | { |
1869 | emitAttr attr = emitActualTypeSize(targetType); |
1870 | getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum); |
1871 | } |
1872 | } |
1873 | |
1874 | /*********************************************************************************************** |
1875 | * Generate code for localloc |
1876 | */ |
1877 | void CodeGen::genLclHeap(GenTree* tree) |
1878 | { |
1879 | assert(tree->OperGet() == GT_LCLHEAP); |
1880 | assert(compiler->compLocallocUsed); |
1881 | |
1882 | GenTree* size = tree->gtOp.gtOp1; |
1883 | noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); |
1884 | |
1885 | regNumber targetReg = tree->gtRegNum; |
1886 | regNumber regCnt = REG_NA; |
1887 | regNumber pspSymReg = REG_NA; |
1888 | var_types type = genActualType(size->gtType); |
1889 | emitAttr easz = emitTypeSize(type); |
1890 | BasicBlock* endLabel = nullptr; |
1891 | BasicBlock* loop = nullptr; |
1892 | unsigned stackAdjustment = 0; |
1893 | |
1894 | noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes |
1895 | noway_assert(genStackLevel == 0); // Can't have anything on the stack |
1896 | |
1897 | // compute the amount of memory to allocate to properly STACK_ALIGN. |
1898 | size_t amount = 0; |
1899 | if (size->IsCnsIntOrI()) |
1900 | { |
1901 | // If size is a constant, then it must be contained. |
1902 | assert(size->isContained()); |
1903 | |
1904 | // If amount is zero then return null in targetReg |
1905 | amount = size->gtIntCon.gtIconVal; |
1906 | if (amount == 0) |
1907 | { |
1908 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); |
1909 | goto BAILOUT; |
1910 | } |
1911 | |
1912 | // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN |
1913 | amount = AlignUp(amount, STACK_ALIGN); |
1914 | } |
1915 | else |
1916 | { |
1917 | // If 0 bail out by returning null in targetReg |
1918 | genConsumeRegAndCopy(size, targetReg); |
1919 | endLabel = genCreateTempLabel(); |
1920 | getEmitter()->emitIns_R_R(INS_tst, easz, targetReg, targetReg); |
1921 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
1922 | inst_JMP(jmpEqual, endLabel); |
1923 | |
1924 | // Compute the size of the block to allocate and perform alignment. |
1925 | // If compInitMem=true, we can reuse targetReg as regcnt, |
1926 | // since we don't need any internal registers. |
1927 | if (compiler->info.compInitMem) |
1928 | { |
1929 | assert(tree->AvailableTempRegCount() == 0); |
1930 | regCnt = targetReg; |
1931 | } |
1932 | else |
1933 | { |
1934 | regCnt = tree->ExtractTempReg(); |
1935 | if (regCnt != targetReg) |
1936 | { |
1937 | inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); |
1938 | } |
1939 | } |
1940 | |
1941 | // Align to STACK_ALIGN |
1942 | // regCnt will be the total number of bytes to localloc |
1943 | inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); |
1944 | inst_RV_IV(INS_and, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); |
1945 | } |
1946 | |
1947 | stackAdjustment = 0; |
1948 | |
1949 | // If we have an outgoing arg area then we must adjust the SP by popping off the |
1950 | // outgoing arg area. We will restore it right before we return from this method. |
1951 | // |
1952 | // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following |
1953 | // are the cases that need to be handled: |
1954 | // i) Method has out-going arg area. |
1955 | // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). |
1956 | // Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc |
1957 | // space. |
1958 | // ii) Method has no out-going arg area. |
1959 | // Nothing to pop off from the stack. |
1960 | if (compiler->lvaOutgoingArgSpaceSize > 0) |
1961 | { |
1962 | assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain |
1963 | // aligned |
1964 | inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); |
1965 | stackAdjustment += compiler->lvaOutgoingArgSpaceSize; |
1966 | } |
1967 | |
1968 | if (size->IsCnsIntOrI()) |
1969 | { |
1970 | // We should reach here only for non-zero, constant size allocations. |
1971 | assert(amount > 0); |
1972 | |
1973 | // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes. |
1974 | static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); |
1975 | assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time |
1976 | size_t stpCount = amount / (REGSIZE_BYTES * 2); |
1977 | if (stpCount <= 4) |
1978 | { |
1979 | while (stpCount != 0) |
1980 | { |
1981 | // We can use pre-indexed addressing. |
1982 | // stp ZR, ZR, [SP, #-16]! // STACK_ALIGN is 16 |
1983 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); |
1984 | stpCount -= 1; |
1985 | } |
1986 | |
1987 | goto ALLOC_DONE; |
1988 | } |
1989 | else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <= |
1990 | { |
1991 | // Since the size is less than a page, simply adjust the SP value. |
1992 | // The SP might already be in the guard page, so we must touch it BEFORE |
1993 | // the alloc, not after. |
1994 | |
1995 | // ldr wz, [SP, #0] |
1996 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0); |
1997 | |
1998 | inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE); |
1999 | |
2000 | goto ALLOC_DONE; |
2001 | } |
2002 | |
2003 | // else, "mov regCnt, amount" |
2004 | // If compInitMem=true, we can reuse targetReg as regcnt. |
2005 | // Since size is a constant, regCnt is not yet initialized. |
2006 | assert(regCnt == REG_NA); |
2007 | if (compiler->info.compInitMem) |
2008 | { |
2009 | assert(tree->AvailableTempRegCount() == 0); |
2010 | regCnt = targetReg; |
2011 | } |
2012 | else |
2013 | { |
2014 | regCnt = tree->ExtractTempReg(); |
2015 | } |
2016 | genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); |
2017 | } |
2018 | |
2019 | if (compiler->info.compInitMem) |
2020 | { |
2021 | BasicBlock* loop = genCreateTempLabel(); |
2022 | |
2023 | // At this point 'regCnt' is set to the total number of bytes to locAlloc. |
2024 | // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid |
2025 | // by tickling the pages, we will just push 0's on the stack. |
2026 | // |
2027 | // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 |
2028 | // and localloc size is a multiple of STACK_ALIGN. |
2029 | |
2030 | // Loop: |
2031 | genDefineTempLabel(loop); |
2032 | |
2033 | // We can use pre-indexed addressing. |
2034 | // stp ZR, ZR, [SP, #-16]! |
2035 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); |
2036 | |
2037 | // If not done, loop |
2038 | // Note that regCnt is the number of bytes to stack allocate. |
2039 | // Therefore we need to subtract 16 from regcnt here. |
2040 | assert(genIsValidIntReg(regCnt)); |
2041 | inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type)); |
2042 | emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); |
2043 | inst_JMP(jmpNotEqual, loop); |
2044 | } |
2045 | else |
2046 | { |
2047 | // At this point 'regCnt' is set to the total number of bytes to localloc. |
2048 | // |
2049 | // We don't need to zero out the allocated memory. However, we do have |
2050 | // to tickle the pages to ensure that SP is always valid and is |
2051 | // in sync with the "stack guard page". Note that in the worst |
2052 | // case SP is on the last byte of the guard page. Thus you must |
2053 | // touch SP-0 first not SP-0x1000. |
2054 | // |
2055 | // Another subtlety is that you don't want SP to be exactly on the |
2056 | // boundary of the guard page because PUSH is predecrement, thus |
2057 | // call setup would not touch the guard page but just beyond it |
2058 | // |
2059 | // Note that we go through a few hoops so that SP never points to |
2060 | // illegal pages at any time during the tickling process |
2061 | // |
2062 | // subs regCnt, SP, regCnt // regCnt now holds ultimate SP |
2063 | // bvc Loop // result is smaller than orignial SP (no wrap around) |
2064 | // mov regCnt, #0 // Overflow, pick lowest possible value |
2065 | // |
2066 | // Loop: |
2067 | // ldr wzr, [SP + 0] // tickle the page - read from the page |
2068 | // sub regTmp, SP, PAGE_SIZE // decrement SP by eeGetPageSize() |
2069 | // cmp regTmp, regCnt |
2070 | // jb Done |
2071 | // mov SP, regTmp |
2072 | // j Loop |
2073 | // |
2074 | // Done: |
2075 | // mov SP, regCnt |
2076 | // |
2077 | |
2078 | // Setup the regTmp |
2079 | regNumber regTmp = tree->GetSingleTempReg(); |
2080 | |
2081 | BasicBlock* loop = genCreateTempLabel(); |
2082 | BasicBlock* done = genCreateTempLabel(); |
2083 | |
2084 | // subs regCnt, SP, regCnt // regCnt now holds ultimate SP |
2085 | getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); |
2086 | |
2087 | inst_JMP(EJ_vc, loop); // branch if the V flag is not set |
2088 | |
2089 | // Overflow, set regCnt to lowest possible value |
2090 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); |
2091 | |
2092 | genDefineTempLabel(loop); |
2093 | |
2094 | // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page |
2095 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0); |
2096 | |
2097 | // decrement SP by eeGetPageSize() |
2098 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize()); |
2099 | |
2100 | getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt); |
2101 | emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED); |
2102 | inst_JMP(jmpLTU, done); |
2103 | |
2104 | // Update SP to be at the next page of stack that we will tickle |
2105 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp); |
2106 | |
2107 | // Jump to loop and tickle new stack address |
2108 | inst_JMP(EJ_jmp, loop); |
2109 | |
2110 | // Done with stack tickle loop |
2111 | genDefineTempLabel(done); |
2112 | |
2113 | // Now just move the final value to SP |
2114 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); |
2115 | } |
2116 | |
2117 | ALLOC_DONE: |
2118 | // Re-adjust SP to allocate out-going arg area |
2119 | if (stackAdjustment != 0) |
2120 | { |
2121 | assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned |
2122 | assert(stackAdjustment > 0); |
2123 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment); |
2124 | |
2125 | // Return the stackalloc'ed address in result register. |
2126 | // TargetReg = SP + stackAdjustment. |
2127 | // |
2128 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment); |
2129 | } |
2130 | else // stackAdjustment == 0 |
2131 | { |
2132 | // Move the final value of SP to targetReg |
2133 | inst_RV_RV(INS_mov, targetReg, REG_SPBASE); |
2134 | } |
2135 | |
2136 | BAILOUT: |
2137 | if (endLabel != nullptr) |
2138 | genDefineTempLabel(endLabel); |
2139 | |
2140 | #if STACK_PROBES |
2141 | if (compiler->opts.compNeedStackProbes) |
2142 | { |
2143 | genGenerateStackProbe(); |
2144 | } |
2145 | #endif |
2146 | |
2147 | genProduceReg(tree); |
2148 | } |
2149 | |
2150 | //------------------------------------------------------------------------ |
2151 | // genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. |
2152 | // |
2153 | // Arguments: |
2154 | // tree - the node |
2155 | // |
2156 | void CodeGen::genCodeForNegNot(GenTree* tree) |
2157 | { |
2158 | assert(tree->OperIs(GT_NEG, GT_NOT)); |
2159 | |
2160 | var_types targetType = tree->TypeGet(); |
2161 | |
2162 | assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); |
2163 | |
2164 | regNumber targetReg = tree->gtRegNum; |
2165 | instruction ins = genGetInsForOper(tree->OperGet(), targetType); |
2166 | |
2167 | // The arithmetic node must be sitting in a register (since it's not contained) |
2168 | assert(!tree->isContained()); |
2169 | // The dst can only be a register. |
2170 | assert(targetReg != REG_NA); |
2171 | |
2172 | GenTree* operand = tree->gtGetOp1(); |
2173 | assert(!operand->isContained()); |
2174 | // The src must be a register. |
2175 | regNumber operandReg = genConsumeReg(operand); |
2176 | |
2177 | getEmitter()->emitIns_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg); |
2178 | |
2179 | genProduceReg(tree); |
2180 | } |
2181 | |
2182 | //------------------------------------------------------------------------ |
2183 | // genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: |
2184 | // (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; |
2185 | // (2) float/double MOD is morphed into a helper call by front-end. |
2186 | // |
2187 | // Arguments: |
2188 | // tree - the node |
2189 | // |
2190 | void CodeGen::genCodeForDivMod(GenTreeOp* tree) |
2191 | { |
2192 | assert(tree->OperIs(GT_DIV, GT_UDIV)); |
2193 | |
2194 | var_types targetType = tree->TypeGet(); |
2195 | emitter* emit = getEmitter(); |
2196 | |
2197 | genConsumeOperands(tree); |
2198 | |
2199 | if (varTypeIsFloating(targetType)) |
2200 | { |
2201 | // Floating point divide never raises an exception |
2202 | genCodeForBinary(tree); |
2203 | } |
2204 | else // an integer divide operation |
2205 | { |
2206 | GenTree* divisorOp = tree->gtGetOp2(); |
2207 | emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet()))); |
2208 | |
2209 | if (divisorOp->IsIntegralConst(0)) |
2210 | { |
2211 | // We unconditionally throw a divide by zero exception |
2212 | genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); |
2213 | |
2214 | // We still need to call genProduceReg |
2215 | genProduceReg(tree); |
2216 | } |
2217 | else // the divisor is not the constant zero |
2218 | { |
2219 | regNumber divisorReg = divisorOp->gtRegNum; |
2220 | |
2221 | // Generate the require runtime checks for GT_DIV or GT_UDIV |
2222 | if (tree->gtOper == GT_DIV) |
2223 | { |
2224 | BasicBlock* sdivLabel = genCreateTempLabel(); |
2225 | |
2226 | // Two possible exceptions: |
2227 | // (AnyVal / 0) => DivideByZeroException |
2228 | // (MinInt / -1) => ArithmeticException |
2229 | // |
2230 | bool checkDividend = true; |
2231 | |
2232 | // Do we have an immediate for the 'divisorOp'? |
2233 | // |
2234 | if (divisorOp->IsCnsIntOrI()) |
2235 | { |
2236 | GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); |
2237 | ssize_t intConstValue = intConstTree->IconValue(); |
2238 | assert(intConstValue != 0); // already checked above by IsIntegralConst(0) |
2239 | if (intConstValue != -1) |
2240 | { |
2241 | checkDividend = false; // We statically know that the dividend is not -1 |
2242 | } |
2243 | } |
2244 | else // insert check for divison by zero |
2245 | { |
2246 | // Check if the divisor is zero throw a DivideByZeroException |
2247 | emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); |
2248 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
2249 | genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); |
2250 | } |
2251 | |
2252 | if (checkDividend) |
2253 | { |
2254 | // Check if the divisor is not -1 branch to 'sdivLabel' |
2255 | emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); |
2256 | |
2257 | emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); |
2258 | inst_JMP(jmpNotEqual, sdivLabel); |
2259 | // If control flow continues past here the 'divisorReg' is known to be -1 |
2260 | |
2261 | regNumber dividendReg = tree->gtGetOp1()->gtRegNum; |
2262 | // At this point the divisor is known to be -1 |
2263 | // |
2264 | // Issue the 'adds zr, dividendReg, dividendReg' instruction |
2265 | // this will set both the Z and V flags only when dividendReg is MinInt |
2266 | // |
2267 | emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); |
2268 | inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear |
2269 | genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw |
2270 | // ArithmeticException |
2271 | |
2272 | genDefineTempLabel(sdivLabel); |
2273 | } |
2274 | genCodeForBinary(tree); // Generate the sdiv instruction |
2275 | } |
2276 | else // (tree->gtOper == GT_UDIV) |
2277 | { |
2278 | // Only one possible exception |
2279 | // (AnyVal / 0) => DivideByZeroException |
2280 | // |
2281 | // Note that division by the constant 0 was already checked for above by the |
2282 | // op2->IsIntegralConst(0) check |
2283 | // |
2284 | if (!divisorOp->IsCnsIntOrI()) |
2285 | { |
2286 | // divisorOp is not a constant, so it could be zero |
2287 | // |
2288 | emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); |
2289 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
2290 | genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); |
2291 | } |
2292 | genCodeForBinary(tree); |
2293 | } |
2294 | } |
2295 | } |
2296 | } |
2297 | |
2298 | // Generate code for InitBlk by performing a loop unroll |
2299 | // Preconditions: |
2300 | // a) Both the size and fill byte value are integer constants. |
2301 | // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. |
2302 | void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) |
2303 | { |
2304 | // Make sure we got the arguments of the initblk/initobj operation in the right registers |
2305 | unsigned size = initBlkNode->Size(); |
2306 | GenTree* dstAddr = initBlkNode->Addr(); |
2307 | GenTree* initVal = initBlkNode->Data(); |
2308 | if (initVal->OperIsInitVal()) |
2309 | { |
2310 | initVal = initVal->gtGetOp1(); |
2311 | } |
2312 | |
2313 | assert(dstAddr->isUsedFromReg()); |
2314 | assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0)); |
2315 | assert(size != 0); |
2316 | assert(size <= INITBLK_UNROLL_LIMIT); |
2317 | |
2318 | emitter* emit = getEmitter(); |
2319 | |
2320 | genConsumeOperands(initBlkNode); |
2321 | |
2322 | if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) |
2323 | { |
2324 | // issue a full memory barrier before a volatile initBlockUnroll operation |
2325 | instGen_MemoryBarrier(); |
2326 | } |
2327 | |
2328 | regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum; |
2329 | |
2330 | assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR)); |
2331 | |
2332 | unsigned offset = 0; |
2333 | |
2334 | // Perform an unroll using stp. |
2335 | if (size >= 2 * REGSIZE_BYTES) |
2336 | { |
2337 | // Determine how many 16 byte slots |
2338 | size_t slots = size / (2 * REGSIZE_BYTES); |
2339 | |
2340 | while (slots-- > 0) |
2341 | { |
2342 | emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset); |
2343 | offset += (2 * REGSIZE_BYTES); |
2344 | } |
2345 | } |
2346 | |
2347 | // Fill the remainder (15 bytes or less) if there's any. |
2348 | if ((size & 0xf) != 0) |
2349 | { |
2350 | if ((size & 8) != 0) |
2351 | { |
2352 | emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset); |
2353 | offset += 8; |
2354 | } |
2355 | if ((size & 4) != 0) |
2356 | { |
2357 | emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); |
2358 | offset += 4; |
2359 | } |
2360 | if ((size & 2) != 0) |
2361 | { |
2362 | emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset); |
2363 | offset += 2; |
2364 | } |
2365 | if ((size & 1) != 0) |
2366 | { |
2367 | emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset); |
2368 | } |
2369 | } |
2370 | } |
2371 | |
2372 | // Generate code for a load pair from some address + offset |
2373 | // base: tree node which can be either a local address or arbitrary node |
2374 | // offset: distance from the base from which to load |
2375 | void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset) |
2376 | { |
2377 | emitter* emit = getEmitter(); |
2378 | |
2379 | if (base->OperIsLocalAddr()) |
2380 | { |
2381 | if (base->gtOper == GT_LCL_FLD_ADDR) |
2382 | offset += base->gtLclFld.gtLclOffs; |
2383 | |
2384 | emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset); |
2385 | } |
2386 | else |
2387 | { |
2388 | emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset); |
2389 | } |
2390 | } |
2391 | |
2392 | // Generate code for a store pair to some address + offset |
2393 | // base: tree node which can be either a local address or arbitrary node |
2394 | // offset: distance from the base from which to load |
2395 | void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset) |
2396 | { |
2397 | emitter* emit = getEmitter(); |
2398 | |
2399 | if (base->OperIsLocalAddr()) |
2400 | { |
2401 | if (base->gtOper == GT_LCL_FLD_ADDR) |
2402 | offset += base->gtLclFld.gtLclOffs; |
2403 | |
2404 | emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset); |
2405 | } |
2406 | else |
2407 | { |
2408 | emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset); |
2409 | } |
2410 | } |
2411 | |
2412 | // Generate code for CpObj nodes wich copy structs that have interleaved |
2413 | // GC pointers. |
2414 | // For this case we'll generate a sequence of loads/stores in the case of struct |
2415 | // slots that don't contain GC pointers. The generated code will look like: |
2416 | // ldr tempReg, [R13, #8] |
2417 | // str tempReg, [R14, #8] |
2418 | // |
2419 | // In the case of a GC-Pointer we'll call the ByRef write barrier helper |
2420 | // who happens to use the same registers as the previous call to maintain |
2421 | // the same register requirements and register killsets: |
2422 | // bl CORINFO_HELP_ASSIGN_BYREF |
2423 | // |
2424 | // So finally an example would look like this: |
2425 | // ldr tempReg, [R13, #8] |
2426 | // str tempReg, [R14, #8] |
2427 | // bl CORINFO_HELP_ASSIGN_BYREF |
2428 | // ldr tempReg, [R13, #8] |
2429 | // str tempReg, [R14, #8] |
2430 | // bl CORINFO_HELP_ASSIGN_BYREF |
2431 | // ldr tempReg, [R13, #8] |
2432 | // str tempReg, [R14, #8] |
2433 | void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) |
2434 | { |
2435 | GenTree* dstAddr = cpObjNode->Addr(); |
2436 | GenTree* source = cpObjNode->Data(); |
2437 | var_types srcAddrType = TYP_BYREF; |
2438 | bool sourceIsLocal = false; |
2439 | |
2440 | assert(source->isContained()); |
2441 | if (source->gtOper == GT_IND) |
2442 | { |
2443 | GenTree* srcAddr = source->gtGetOp1(); |
2444 | assert(!srcAddr->isContained()); |
2445 | srcAddrType = srcAddr->TypeGet(); |
2446 | } |
2447 | else |
2448 | { |
2449 | noway_assert(source->IsLocal()); |
2450 | sourceIsLocal = true; |
2451 | } |
2452 | |
2453 | bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); |
2454 | |
2455 | #ifdef DEBUG |
2456 | assert(!dstAddr->isContained()); |
2457 | |
2458 | // This GenTree node has data about GC pointers, this means we're dealing |
2459 | // with CpObj. |
2460 | assert(cpObjNode->gtGcPtrCount > 0); |
2461 | #endif // DEBUG |
2462 | |
2463 | // Consume the operands and get them into the right registers. |
2464 | // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). |
2465 | genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); |
2466 | gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); |
2467 | gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); |
2468 | |
2469 | unsigned slots = cpObjNode->gtSlots; |
2470 | |
2471 | // Temp register(s) used to perform the sequence of loads and stores. |
2472 | regNumber tmpReg = cpObjNode->ExtractTempReg(); |
2473 | regNumber tmpReg2 = REG_NA; |
2474 | |
2475 | assert(genIsValidIntReg(tmpReg)); |
2476 | assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); |
2477 | assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); |
2478 | |
2479 | if (slots > 1) |
2480 | { |
2481 | tmpReg2 = cpObjNode->GetSingleTempReg(); |
2482 | assert(tmpReg2 != tmpReg); |
2483 | assert(genIsValidIntReg(tmpReg2)); |
2484 | assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); |
2485 | assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); |
2486 | } |
2487 | |
2488 | if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) |
2489 | { |
2490 | // issue a full memory barrier before a volatile CpObj operation |
2491 | instGen_MemoryBarrier(); |
2492 | } |
2493 | |
2494 | emitter* emit = getEmitter(); |
2495 | |
2496 | BYTE* gcPtrs = cpObjNode->gtGcPtrs; |
2497 | |
2498 | // If we can prove it's on the stack we don't need to use the write barrier. |
2499 | if (dstOnStack) |
2500 | { |
2501 | unsigned i = 0; |
2502 | // Check if two or more remaining slots and use a ldp/stp sequence |
2503 | while (i < slots - 1) |
2504 | { |
2505 | emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); |
2506 | emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1])); |
2507 | |
2508 | emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, |
2509 | INS_OPTS_POST_INDEX, attr1); |
2510 | emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE, |
2511 | INS_OPTS_POST_INDEX, attr1); |
2512 | i += 2; |
2513 | } |
2514 | |
2515 | // Use a ldr/str sequence for the last remainder |
2516 | if (i < slots) |
2517 | { |
2518 | emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); |
2519 | |
2520 | emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, |
2521 | INS_OPTS_POST_INDEX); |
2522 | emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, |
2523 | INS_OPTS_POST_INDEX); |
2524 | } |
2525 | } |
2526 | else |
2527 | { |
2528 | unsigned gcPtrCount = cpObjNode->gtGcPtrCount; |
2529 | |
2530 | unsigned i = 0; |
2531 | while (i < slots) |
2532 | { |
2533 | switch (gcPtrs[i]) |
2534 | { |
2535 | case TYPE_GC_NONE: |
2536 | // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp |
2537 | if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE)) |
2538 | { |
2539 | emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, |
2540 | 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); |
2541 | emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, |
2542 | 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); |
2543 | ++i; // extra increment of i, since we are copying two items |
2544 | } |
2545 | else |
2546 | { |
2547 | emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, |
2548 | INS_OPTS_POST_INDEX); |
2549 | emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, |
2550 | INS_OPTS_POST_INDEX); |
2551 | } |
2552 | break; |
2553 | |
2554 | default: |
2555 | // In the case of a GC-Pointer we'll call the ByRef write barrier helper |
2556 | genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); |
2557 | |
2558 | gcPtrCount--; |
2559 | break; |
2560 | } |
2561 | ++i; |
2562 | } |
2563 | assert(gcPtrCount == 0); |
2564 | } |
2565 | |
2566 | if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) |
2567 | { |
2568 | // issue a INS_BARRIER_ISHLD after a volatile CpObj operation |
2569 | instGen_MemoryBarrier(INS_BARRIER_ISHLD); |
2570 | } |
2571 | |
2572 | // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. |
2573 | // While we normally update GC info prior to the last instruction that uses them, |
2574 | // these actually live into the helper call. |
2575 | gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); |
2576 | } |
2577 | |
2578 | // generate code do a switch statement based on a table of ip-relative offsets |
2579 | void CodeGen::genTableBasedSwitch(GenTree* treeNode) |
2580 | { |
2581 | genConsumeOperands(treeNode->AsOp()); |
2582 | regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; |
2583 | regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; |
2584 | |
2585 | regNumber tmpReg = treeNode->GetSingleTempReg(); |
2586 | |
2587 | // load the ip-relative offset (which is relative to start of fgFirstBB) |
2588 | getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL); |
2589 | |
2590 | // add it to the absolute address of fgFirstBB |
2591 | compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; |
2592 | getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); |
2593 | getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg); |
2594 | |
2595 | // br baseReg |
2596 | getEmitter()->emitIns_R(INS_br, emitActualTypeSize(TYP_I_IMPL), baseReg); |
2597 | } |
2598 | |
2599 | // emits the table and an instruction to get the address of the first element |
2600 | void CodeGen::genJumpTable(GenTree* treeNode) |
2601 | { |
2602 | noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); |
2603 | assert(treeNode->OperGet() == GT_JMPTABLE); |
2604 | |
2605 | unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; |
2606 | BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; |
2607 | unsigned jmpTabOffs; |
2608 | unsigned jmpTabBase; |
2609 | |
2610 | jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true); |
2611 | |
2612 | jmpTabOffs = 0; |
2613 | |
2614 | JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n" , Compiler::s_compMethodsCount, jmpTabBase); |
2615 | |
2616 | for (unsigned i = 0; i < jumpCount; i++) |
2617 | { |
2618 | BasicBlock* target = *jumpTable++; |
2619 | noway_assert(target->bbFlags & BBF_JMP_TARGET); |
2620 | |
2621 | JITDUMP(" DD L_M%03u_" FMT_BB "\n" , Compiler::s_compMethodsCount, target->bbNum); |
2622 | |
2623 | getEmitter()->emitDataGenData(i, target); |
2624 | }; |
2625 | |
2626 | getEmitter()->emitDataGenEnd(); |
2627 | |
2628 | // Access to inline data is 'abstracted' by a special type of static member |
2629 | // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference |
2630 | // to constant data, not a real static field. |
2631 | getEmitter()->emitIns_R_C(INS_adr, emitActualTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA, |
2632 | compiler->eeFindJitDataOffs(jmpTabBase), 0); |
2633 | genProduceReg(treeNode); |
2634 | } |
2635 | |
2636 | //------------------------------------------------------------------------ |
2637 | // genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. |
2638 | // |
2639 | // Arguments: |
2640 | // treeNode - the GT_XADD/XCHG node |
2641 | // |
2642 | void CodeGen::genLockedInstructions(GenTreeOp* treeNode) |
2643 | { |
2644 | GenTree* data = treeNode->gtOp.gtOp2; |
2645 | GenTree* addr = treeNode->gtOp.gtOp1; |
2646 | regNumber targetReg = treeNode->gtRegNum; |
2647 | regNumber dataReg = data->gtRegNum; |
2648 | regNumber addrReg = addr->gtRegNum; |
2649 | |
2650 | genConsumeAddress(addr); |
2651 | genConsumeRegs(data); |
2652 | |
2653 | emitAttr dataSize = emitActualTypeSize(data); |
2654 | |
2655 | if (compiler->compSupports(InstructionSet_Atomics)) |
2656 | { |
2657 | assert(!data->isContainedIntOrIImmed()); |
2658 | |
2659 | switch (treeNode->gtOper) |
2660 | { |
2661 | case GT_XCHG: |
2662 | getEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg); |
2663 | break; |
2664 | case GT_XADD: |
2665 | if ((targetReg == REG_NA) || (targetReg == REG_ZR)) |
2666 | { |
2667 | getEmitter()->emitIns_R_R(INS_staddl, dataSize, dataReg, addrReg); |
2668 | } |
2669 | else |
2670 | { |
2671 | getEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, targetReg, addrReg); |
2672 | } |
2673 | break; |
2674 | default: |
2675 | assert(!"Unexpected treeNode->gtOper" ); |
2676 | } |
2677 | |
2678 | instGen_MemoryBarrier(INS_BARRIER_ISH); |
2679 | } |
2680 | else |
2681 | { |
2682 | regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); |
2683 | regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT); |
2684 | regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; |
2685 | |
2686 | // Check allocator assumptions |
2687 | // |
2688 | // The register allocator should have extended the lifetimes of all input and internal registers so that |
2689 | // none interfere with the target. |
2690 | noway_assert(addrReg != targetReg); |
2691 | |
2692 | noway_assert(addrReg != loadReg); |
2693 | noway_assert(dataReg != loadReg); |
2694 | |
2695 | noway_assert(addrReg != storeDataReg); |
2696 | noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); |
2697 | |
2698 | assert(addr->isUsedFromReg()); |
2699 | noway_assert(exResultReg != REG_NA); |
2700 | noway_assert(exResultReg != targetReg); |
2701 | noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); |
2702 | |
2703 | // Store exclusive unpredictable cases must be avoided |
2704 | noway_assert(exResultReg != storeDataReg); |
2705 | noway_assert(exResultReg != addrReg); |
2706 | |
2707 | // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input |
2708 | // registers |
2709 | // die at the first instruction generated by the node. This is not the case for these atomics as the input |
2710 | // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until |
2711 | // we are finished generating the code for this node. |
2712 | |
2713 | gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); |
2714 | |
2715 | // Emit code like this: |
2716 | // retry: |
2717 | // ldxr loadReg, [addrReg] |
2718 | // add storeDataReg, loadReg, dataReg # Only for GT_XADD |
2719 | // # GT_XCHG storeDataReg === dataReg |
2720 | // stxr exResult, storeDataReg, [addrReg] |
2721 | // cbnz exResult, retry |
2722 | // dmb ish |
2723 | |
2724 | BasicBlock* labelRetry = genCreateTempLabel(); |
2725 | genDefineTempLabel(labelRetry); |
2726 | |
2727 | // The following instruction includes a acquire half barrier |
2728 | getEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg); |
2729 | |
2730 | switch (treeNode->OperGet()) |
2731 | { |
2732 | case GT_XADD: |
2733 | if (data->isContainedIntOrIImmed()) |
2734 | { |
2735 | // Even though INS_add is specified here, the encoder will choose either |
2736 | // an INS_add or an INS_sub and encode the immediate as a positive value |
2737 | genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), |
2738 | REG_NA); |
2739 | } |
2740 | else |
2741 | { |
2742 | getEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); |
2743 | } |
2744 | break; |
2745 | case GT_XCHG: |
2746 | assert(!data->isContained()); |
2747 | storeDataReg = dataReg; |
2748 | break; |
2749 | default: |
2750 | unreached(); |
2751 | } |
2752 | |
2753 | // The following instruction includes a release half barrier |
2754 | getEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg); |
2755 | |
2756 | getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); |
2757 | |
2758 | instGen_MemoryBarrier(INS_BARRIER_ISH); |
2759 | |
2760 | gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); |
2761 | } |
2762 | |
2763 | if (treeNode->gtRegNum != REG_NA) |
2764 | { |
2765 | genProduceReg(treeNode); |
2766 | } |
2767 | } |
2768 | |
2769 | //------------------------------------------------------------------------ |
2770 | // genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. |
2771 | // |
2772 | // Arguments: |
2773 | // tree - the GT_CMPXCHG node |
2774 | // |
2775 | void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) |
2776 | { |
2777 | assert(treeNode->OperIs(GT_CMPXCHG)); |
2778 | |
2779 | GenTree* addr = treeNode->gtOpLocation; // arg1 |
2780 | GenTree* data = treeNode->gtOpValue; // arg2 |
2781 | GenTree* comparand = treeNode->gtOpComparand; // arg3 |
2782 | |
2783 | regNumber targetReg = treeNode->gtRegNum; |
2784 | regNumber dataReg = data->gtRegNum; |
2785 | regNumber addrReg = addr->gtRegNum; |
2786 | regNumber comparandReg = comparand->gtRegNum; |
2787 | |
2788 | genConsumeAddress(addr); |
2789 | genConsumeRegs(data); |
2790 | genConsumeRegs(comparand); |
2791 | |
2792 | if (compiler->compSupports(InstructionSet_Atomics)) |
2793 | { |
2794 | emitAttr dataSize = emitActualTypeSize(data); |
2795 | |
2796 | // casal use the comparand as the target reg |
2797 | if (targetReg != comparandReg) |
2798 | { |
2799 | getEmitter()->emitIns_R_R(INS_mov, dataSize, targetReg, comparandReg); |
2800 | |
2801 | // Catch case we destroyed data or address before use |
2802 | noway_assert(addrReg != targetReg); |
2803 | noway_assert(dataReg != targetReg); |
2804 | } |
2805 | getEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg); |
2806 | |
2807 | instGen_MemoryBarrier(INS_BARRIER_ISH); |
2808 | } |
2809 | else |
2810 | { |
2811 | regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); |
2812 | |
2813 | // Check allocator assumptions |
2814 | // |
2815 | // The register allocator should have extended the lifetimes of all input and internal registers so that |
2816 | // none interfere with the target. |
2817 | noway_assert(addrReg != targetReg); |
2818 | noway_assert(dataReg != targetReg); |
2819 | noway_assert(comparandReg != targetReg); |
2820 | noway_assert(addrReg != dataReg); |
2821 | noway_assert(targetReg != REG_NA); |
2822 | noway_assert(exResultReg != REG_NA); |
2823 | noway_assert(exResultReg != targetReg); |
2824 | |
2825 | assert(addr->isUsedFromReg()); |
2826 | assert(data->isUsedFromReg()); |
2827 | assert(!comparand->isUsedFromMemory()); |
2828 | |
2829 | // Store exclusive unpredictable cases must be avoided |
2830 | noway_assert(exResultReg != dataReg); |
2831 | noway_assert(exResultReg != addrReg); |
2832 | |
2833 | // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input |
2834 | // registers |
2835 | // die at the first instruction generated by the node. This is not the case for these atomics as the input |
2836 | // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until |
2837 | // we are finished generating the code for this node. |
2838 | |
2839 | gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); |
2840 | |
2841 | // TODO-ARM64-CQ Use ARMv8.1 atomics if available |
2842 | // https://github.com/dotnet/coreclr/issues/11881 |
2843 | |
2844 | // Emit code like this: |
2845 | // retry: |
2846 | // ldxr targetReg, [addrReg] |
2847 | // cmp targetReg, comparandReg |
2848 | // bne compareFail |
2849 | // stxr exResult, dataReg, [addrReg] |
2850 | // cbnz exResult, retry |
2851 | // compareFail: |
2852 | // dmb ish |
2853 | |
2854 | BasicBlock* labelRetry = genCreateTempLabel(); |
2855 | BasicBlock* labelCompareFail = genCreateTempLabel(); |
2856 | genDefineTempLabel(labelRetry); |
2857 | |
2858 | // The following instruction includes a acquire half barrier |
2859 | getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg); |
2860 | |
2861 | if (comparand->isContainedIntOrIImmed()) |
2862 | { |
2863 | if (comparand->IsIntegralConst(0)) |
2864 | { |
2865 | getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); |
2866 | } |
2867 | else |
2868 | { |
2869 | getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, |
2870 | comparand->AsIntConCommon()->IconValue()); |
2871 | getEmitter()->emitIns_J(INS_bne, labelCompareFail); |
2872 | } |
2873 | } |
2874 | else |
2875 | { |
2876 | getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); |
2877 | getEmitter()->emitIns_J(INS_bne, labelCompareFail); |
2878 | } |
2879 | |
2880 | // The following instruction includes a release half barrier |
2881 | getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg); |
2882 | |
2883 | getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); |
2884 | |
2885 | genDefineTempLabel(labelCompareFail); |
2886 | |
2887 | instGen_MemoryBarrier(INS_BARRIER_ISH); |
2888 | |
2889 | gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); |
2890 | } |
2891 | |
2892 | genProduceReg(treeNode); |
2893 | } |
2894 | |
2895 | instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) |
2896 | { |
2897 | instruction ins = INS_brk; |
2898 | |
2899 | if (varTypeIsFloating(type)) |
2900 | { |
2901 | switch (oper) |
2902 | { |
2903 | case GT_ADD: |
2904 | ins = INS_fadd; |
2905 | break; |
2906 | case GT_SUB: |
2907 | ins = INS_fsub; |
2908 | break; |
2909 | case GT_MUL: |
2910 | ins = INS_fmul; |
2911 | break; |
2912 | case GT_DIV: |
2913 | ins = INS_fdiv; |
2914 | break; |
2915 | case GT_NEG: |
2916 | ins = INS_fneg; |
2917 | break; |
2918 | |
2919 | default: |
2920 | NYI("Unhandled oper in genGetInsForOper() - float" ); |
2921 | unreached(); |
2922 | break; |
2923 | } |
2924 | } |
2925 | else |
2926 | { |
2927 | switch (oper) |
2928 | { |
2929 | case GT_ADD: |
2930 | ins = INS_add; |
2931 | break; |
2932 | case GT_AND: |
2933 | ins = INS_and; |
2934 | break; |
2935 | case GT_DIV: |
2936 | ins = INS_sdiv; |
2937 | break; |
2938 | case GT_UDIV: |
2939 | ins = INS_udiv; |
2940 | break; |
2941 | case GT_MUL: |
2942 | ins = INS_mul; |
2943 | break; |
2944 | case GT_LSH: |
2945 | ins = INS_lsl; |
2946 | break; |
2947 | case GT_NEG: |
2948 | ins = INS_neg; |
2949 | break; |
2950 | case GT_NOT: |
2951 | ins = INS_mvn; |
2952 | break; |
2953 | case GT_OR: |
2954 | ins = INS_orr; |
2955 | break; |
2956 | case GT_ROR: |
2957 | ins = INS_ror; |
2958 | break; |
2959 | case GT_RSH: |
2960 | ins = INS_asr; |
2961 | break; |
2962 | case GT_RSZ: |
2963 | ins = INS_lsr; |
2964 | break; |
2965 | case GT_SUB: |
2966 | ins = INS_sub; |
2967 | break; |
2968 | case GT_XOR: |
2969 | ins = INS_eor; |
2970 | break; |
2971 | |
2972 | default: |
2973 | NYI("Unhandled oper in genGetInsForOper() - integer" ); |
2974 | unreached(); |
2975 | break; |
2976 | } |
2977 | } |
2978 | return ins; |
2979 | } |
2980 | |
2981 | //------------------------------------------------------------------------ |
2982 | // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. |
2983 | // |
2984 | // Arguments: |
2985 | // tree - the GT_RETURNTRAP node |
2986 | // |
2987 | void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) |
2988 | { |
2989 | assert(tree->OperGet() == GT_RETURNTRAP); |
2990 | |
2991 | // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC |
2992 | // based on the contents of 'data' |
2993 | |
2994 | GenTree* data = tree->gtOp1; |
2995 | genConsumeRegs(data); |
2996 | getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); |
2997 | |
2998 | BasicBlock* skipLabel = genCreateTempLabel(); |
2999 | |
3000 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
3001 | inst_JMP(jmpEqual, skipLabel); |
3002 | // emit the call to the EE-helper that stops for GC (or other reasons) |
3003 | |
3004 | genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); |
3005 | genDefineTempLabel(skipLabel); |
3006 | } |
3007 | |
3008 | //------------------------------------------------------------------------ |
3009 | // genCodeForStoreInd: Produce code for a GT_STOREIND node. |
3010 | // |
3011 | // Arguments: |
3012 | // tree - the GT_STOREIND node |
3013 | // |
3014 | void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) |
3015 | { |
3016 | GenTree* data = tree->Data(); |
3017 | GenTree* addr = tree->Addr(); |
3018 | var_types targetType = tree->TypeGet(); |
3019 | emitter* emit = getEmitter(); |
3020 | emitAttr attr = emitTypeSize(tree); |
3021 | instruction ins = ins_Store(targetType); |
3022 | |
3023 | #ifdef FEATURE_SIMD |
3024 | // Storing Vector3 of size 12 bytes through indirection |
3025 | if (tree->TypeGet() == TYP_SIMD12) |
3026 | { |
3027 | genStoreIndTypeSIMD12(tree); |
3028 | return; |
3029 | } |
3030 | #endif // FEATURE_SIMD |
3031 | |
3032 | GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); |
3033 | if (writeBarrierForm != GCInfo::WBF_NoBarrier) |
3034 | { |
3035 | // data and addr must be in registers. |
3036 | // Consume both registers so that any copies of interfering |
3037 | // registers are taken care of. |
3038 | genConsumeOperands(tree); |
3039 | |
3040 | // At this point, we should not have any interference. |
3041 | // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, |
3042 | // as that is where 'addr' must go. |
3043 | noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); |
3044 | |
3045 | // 'addr' goes into x14 (REG_WRITE_BARRIER_DST) |
3046 | genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); |
3047 | |
3048 | // 'data' goes into x15 (REG_WRITE_BARRIER_SRC) |
3049 | genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); |
3050 | |
3051 | genGCWriteBarrier(tree, writeBarrierForm); |
3052 | } |
3053 | else // A normal store, not a WriteBarrier store |
3054 | { |
3055 | bool dataIsUnary = false; |
3056 | GenTree* nonRMWsrc = nullptr; |
3057 | // We must consume the operands in the proper execution order, |
3058 | // so that liveness is updated appropriately. |
3059 | genConsumeAddress(addr); |
3060 | |
3061 | if (!data->isContained()) |
3062 | { |
3063 | genConsumeRegs(data); |
3064 | } |
3065 | |
3066 | regNumber dataReg = REG_NA; |
3067 | if (data->isContainedIntOrIImmed()) |
3068 | { |
3069 | assert(data->IsIntegralConst(0)); |
3070 | dataReg = REG_ZR; |
3071 | } |
3072 | else // data is not contained, so evaluate it into a register |
3073 | { |
3074 | assert(!data->isContained()); |
3075 | dataReg = data->gtRegNum; |
3076 | } |
3077 | |
3078 | assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED)); |
3079 | |
3080 | if (tree->gtFlags & GTF_IND_VOLATILE) |
3081 | { |
3082 | bool useStoreRelease = |
3083 | genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED); |
3084 | |
3085 | if (useStoreRelease) |
3086 | { |
3087 | switch (EA_SIZE(attr)) |
3088 | { |
3089 | case EA_1BYTE: |
3090 | assert(ins == INS_strb); |
3091 | ins = INS_stlrb; |
3092 | break; |
3093 | case EA_2BYTE: |
3094 | assert(ins == INS_strh); |
3095 | ins = INS_stlrh; |
3096 | break; |
3097 | case EA_4BYTE: |
3098 | case EA_8BYTE: |
3099 | assert(ins == INS_str); |
3100 | ins = INS_stlr; |
3101 | break; |
3102 | default: |
3103 | assert(false); // We should not get here |
3104 | } |
3105 | } |
3106 | else |
3107 | { |
3108 | // issue a full memory barrier before a volatile StInd |
3109 | instGen_MemoryBarrier(); |
3110 | } |
3111 | } |
3112 | |
3113 | emit->emitInsLoadStoreOp(ins, attr, dataReg, tree); |
3114 | } |
3115 | } |
3116 | |
3117 | //------------------------------------------------------------------------ |
3118 | // genCodeForSwap: Produce code for a GT_SWAP node. |
3119 | // |
3120 | // Arguments: |
3121 | // tree - the GT_SWAP node |
3122 | // |
3123 | void CodeGen::genCodeForSwap(GenTreeOp* tree) |
3124 | { |
3125 | assert(tree->OperIs(GT_SWAP)); |
3126 | |
3127 | // Swap is only supported for lclVar operands that are enregistered |
3128 | // We do not consume or produce any registers. Both operands remain enregistered. |
3129 | // However, the gc-ness may change. |
3130 | assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); |
3131 | |
3132 | GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); |
3133 | LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); |
3134 | var_types type1 = varDsc1->TypeGet(); |
3135 | GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); |
3136 | LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); |
3137 | var_types type2 = varDsc2->TypeGet(); |
3138 | |
3139 | // We must have both int or both fp regs |
3140 | assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); |
3141 | |
3142 | // FP swap is not yet implemented (and should have NYI'd in LSRA) |
3143 | assert(!varTypeIsFloating(type1)); |
3144 | |
3145 | regNumber oldOp1Reg = lcl1->gtRegNum; |
3146 | regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); |
3147 | regNumber oldOp2Reg = lcl2->gtRegNum; |
3148 | regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); |
3149 | |
3150 | // We don't call genUpdateVarReg because we don't have a tree node with the new register. |
3151 | varDsc1->lvRegNum = oldOp2Reg; |
3152 | varDsc2->lvRegNum = oldOp1Reg; |
3153 | |
3154 | // Do the xchg |
3155 | emitAttr size = EA_PTRSIZE; |
3156 | if (varTypeGCtype(type1) != varTypeGCtype(type2)) |
3157 | { |
3158 | // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. |
3159 | // Otherwise it will leave them alone, which is correct if they have the same GC-ness. |
3160 | size = EA_GCREF; |
3161 | } |
3162 | |
3163 | NYI("register swap" ); |
3164 | // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); |
3165 | |
3166 | // Update the gcInfo. |
3167 | // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) |
3168 | gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); |
3169 | gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); |
3170 | |
3171 | // gcMarkRegPtrVal will do the appropriate thing for non-gc types. |
3172 | // It will also dump the updates. |
3173 | gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); |
3174 | gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); |
3175 | } |
3176 | |
3177 | //------------------------------------------------------------------------------------------- |
3178 | // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value |
3179 | // corresponding to a binary Relational operator result. |
3180 | // |
3181 | // Arguments: |
3182 | // dstReg - The target register to set to 1 or 0 |
3183 | // tree - The GenTree Relop node that was used to set the Condition codes |
3184 | // |
3185 | // Return Value: none |
3186 | // |
3187 | // Notes: |
3188 | // A full 64-bit value of either 1 or 0 is setup in the 'dstReg' |
3189 | //------------------------------------------------------------------------------------------- |
3190 | |
3191 | void CodeGen::genSetRegToCond(regNumber dstReg, GenTree* tree) |
3192 | { |
3193 | emitJumpKind jumpKind[2]; |
3194 | bool branchToTrueLabel[2]; |
3195 | genJumpKindsForTree(tree, jumpKind, branchToTrueLabel); |
3196 | assert(jumpKind[0] != EJ_NONE); |
3197 | |
3198 | // Set the reg according to the flags |
3199 | inst_SET(jumpKind[0], dstReg); |
3200 | |
3201 | // Do we need to use two operation to set the flags? |
3202 | // |
3203 | if (jumpKind[1] != EJ_NONE) |
3204 | { |
3205 | emitter* emit = getEmitter(); |
3206 | bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0); |
3207 | insCond secondCond; |
3208 | |
3209 | // The only ones that require two operations are the |
3210 | // floating point compare operations of BEQ or BNE.UN |
3211 | // |
3212 | if (tree->gtOper == GT_EQ) |
3213 | { |
3214 | // This must be an ordered comparison. |
3215 | assert(ordered); |
3216 | assert(jumpKind[1] == EJ_vs); // We complement this value |
3217 | secondCond = INS_COND_VC; // for the secondCond |
3218 | } |
3219 | else // gtOper == GT_NE |
3220 | { |
3221 | // This must be BNE.UN (unordered comparison) |
3222 | assert((tree->gtOper == GT_NE) && !ordered); |
3223 | assert(jumpKind[1] == EJ_lo); // We complement this value |
3224 | secondCond = INS_COND_HS; // for the secondCond |
3225 | } |
3226 | |
3227 | // The second instruction is a 'csinc' instruction that either selects the previous dstReg |
3228 | // or increments the ZR register, which produces a 1 result. |
3229 | |
3230 | emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond); |
3231 | } |
3232 | } |
3233 | |
3234 | //------------------------------------------------------------------------ |
3235 | // genIntToFloatCast: Generate code to cast an int/long to float/double |
3236 | // |
3237 | // Arguments: |
3238 | // treeNode - The GT_CAST node |
3239 | // |
3240 | // Return Value: |
3241 | // None. |
3242 | // |
3243 | // Assumptions: |
3244 | // Cast is a non-overflow conversion. |
3245 | // The treeNode must have an assigned register. |
3246 | // SrcType= int32/uint32/int64/uint64 and DstType=float/double. |
3247 | // |
3248 | void CodeGen::genIntToFloatCast(GenTree* treeNode) |
3249 | { |
3250 | // int type --> float/double conversions are always non-overflow ones |
3251 | assert(treeNode->OperGet() == GT_CAST); |
3252 | assert(!treeNode->gtOverflow()); |
3253 | |
3254 | regNumber targetReg = treeNode->gtRegNum; |
3255 | assert(genIsValidFloatReg(targetReg)); |
3256 | |
3257 | GenTree* op1 = treeNode->gtOp.gtOp1; |
3258 | assert(!op1->isContained()); // Cannot be contained |
3259 | assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg. |
3260 | |
3261 | var_types dstType = treeNode->CastToType(); |
3262 | var_types srcType = genActualType(op1->TypeGet()); |
3263 | assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); |
3264 | |
3265 | // force the srcType to unsigned if GT_UNSIGNED flag is set |
3266 | if (treeNode->gtFlags & GTF_UNSIGNED) |
3267 | { |
3268 | srcType = genUnsignedType(srcType); |
3269 | } |
3270 | |
3271 | // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE |
3272 | emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); |
3273 | noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); |
3274 | |
3275 | instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf; |
3276 | insOpts cvtOption = INS_OPTS_NONE; // invalid value |
3277 | |
3278 | if (dstType == TYP_DOUBLE) |
3279 | { |
3280 | if (srcSize == EA_4BYTE) |
3281 | { |
3282 | cvtOption = INS_OPTS_4BYTE_TO_D; |
3283 | } |
3284 | else |
3285 | { |
3286 | assert(srcSize == EA_8BYTE); |
3287 | cvtOption = INS_OPTS_8BYTE_TO_D; |
3288 | } |
3289 | } |
3290 | else |
3291 | { |
3292 | assert(dstType == TYP_FLOAT); |
3293 | if (srcSize == EA_4BYTE) |
3294 | { |
3295 | cvtOption = INS_OPTS_4BYTE_TO_S; |
3296 | } |
3297 | else |
3298 | { |
3299 | assert(srcSize == EA_8BYTE); |
3300 | cvtOption = INS_OPTS_8BYTE_TO_S; |
3301 | } |
3302 | } |
3303 | |
3304 | genConsumeOperands(treeNode->AsOp()); |
3305 | |
3306 | getEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption); |
3307 | |
3308 | genProduceReg(treeNode); |
3309 | } |
3310 | |
3311 | //------------------------------------------------------------------------ |
3312 | // genFloatToIntCast: Generate code to cast float/double to int/long |
3313 | // |
3314 | // Arguments: |
3315 | // treeNode - The GT_CAST node |
3316 | // |
3317 | // Return Value: |
3318 | // None. |
3319 | // |
3320 | // Assumptions: |
3321 | // Cast is a non-overflow conversion. |
3322 | // The treeNode must have an assigned register. |
3323 | // SrcType=float/double and DstType= int32/uint32/int64/uint64 |
3324 | // |
3325 | void CodeGen::genFloatToIntCast(GenTree* treeNode) |
3326 | { |
3327 | // we don't expect to see overflow detecting float/double --> int type conversions here |
3328 | // as they should have been converted into helper calls by front-end. |
3329 | assert(treeNode->OperGet() == GT_CAST); |
3330 | assert(!treeNode->gtOverflow()); |
3331 | |
3332 | regNumber targetReg = treeNode->gtRegNum; |
3333 | assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. |
3334 | |
3335 | GenTree* op1 = treeNode->gtOp.gtOp1; |
3336 | assert(!op1->isContained()); // Cannot be contained |
3337 | assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. |
3338 | |
3339 | var_types dstType = treeNode->CastToType(); |
3340 | var_types srcType = op1->TypeGet(); |
3341 | assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); |
3342 | |
3343 | // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE |
3344 | // For conversions to small types (byte/sbyte/int16/uint16) from float/double, |
3345 | // we expect the front-end or lowering phase to have generated two levels of cast. |
3346 | // |
3347 | emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); |
3348 | noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); |
3349 | |
3350 | instruction ins = INS_fcvtzs; // default to sign converts |
3351 | insOpts cvtOption = INS_OPTS_NONE; // invalid value |
3352 | |
3353 | if (varTypeIsUnsigned(dstType)) |
3354 | { |
3355 | ins = INS_fcvtzu; // use unsigned converts |
3356 | } |
3357 | |
3358 | if (srcType == TYP_DOUBLE) |
3359 | { |
3360 | if (dstSize == EA_4BYTE) |
3361 | { |
3362 | cvtOption = INS_OPTS_D_TO_4BYTE; |
3363 | } |
3364 | else |
3365 | { |
3366 | assert(dstSize == EA_8BYTE); |
3367 | cvtOption = INS_OPTS_D_TO_8BYTE; |
3368 | } |
3369 | } |
3370 | else |
3371 | { |
3372 | assert(srcType == TYP_FLOAT); |
3373 | if (dstSize == EA_4BYTE) |
3374 | { |
3375 | cvtOption = INS_OPTS_S_TO_4BYTE; |
3376 | } |
3377 | else |
3378 | { |
3379 | assert(dstSize == EA_8BYTE); |
3380 | cvtOption = INS_OPTS_S_TO_8BYTE; |
3381 | } |
3382 | } |
3383 | |
3384 | genConsumeOperands(treeNode->AsOp()); |
3385 | |
3386 | getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption); |
3387 | |
3388 | genProduceReg(treeNode); |
3389 | } |
3390 | |
3391 | //------------------------------------------------------------------------ |
3392 | // genCkfinite: Generate code for ckfinite opcode. |
3393 | // |
3394 | // Arguments: |
3395 | // treeNode - The GT_CKFINITE node |
3396 | // |
3397 | // Return Value: |
3398 | // None. |
3399 | // |
3400 | // Assumptions: |
3401 | // GT_CKFINITE node has reserved an internal register. |
3402 | // |
3403 | void CodeGen::genCkfinite(GenTree* treeNode) |
3404 | { |
3405 | assert(treeNode->OperGet() == GT_CKFINITE); |
3406 | |
3407 | GenTree* op1 = treeNode->gtOp.gtOp1; |
3408 | var_types targetType = treeNode->TypeGet(); |
3409 | int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent. |
3410 | int shiftAmount = targetType == TYP_FLOAT ? 20 : 52; |
3411 | |
3412 | emitter* emit = getEmitter(); |
3413 | |
3414 | // Extract exponent into a register. |
3415 | regNumber intReg = treeNode->GetSingleTempReg(); |
3416 | regNumber fpReg = genConsumeReg(op1); |
3417 | |
3418 | emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg); |
3419 | emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount); |
3420 | |
3421 | // Mask of exponent with all 1's and check if the exponent is all 1's |
3422 | emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask); |
3423 | emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask); |
3424 | |
3425 | // If exponent is all 1's, throw ArithmeticException |
3426 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
3427 | genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN); |
3428 | |
3429 | // if it is a finite value copy it to targetReg |
3430 | if (treeNode->gtRegNum != fpReg) |
3431 | { |
3432 | emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->gtRegNum, fpReg); |
3433 | } |
3434 | genProduceReg(treeNode); |
3435 | } |
3436 | |
3437 | //------------------------------------------------------------------------ |
3438 | // genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node. |
3439 | // |
3440 | // Arguments: |
3441 | // tree - the node |
3442 | // |
3443 | void CodeGen::genCodeForCompare(GenTreeOp* tree) |
3444 | { |
3445 | regNumber targetReg = tree->gtRegNum; |
3446 | emitter* emit = getEmitter(); |
3447 | |
3448 | GenTree* op1 = tree->gtOp1; |
3449 | GenTree* op2 = tree->gtOp2; |
3450 | var_types op1Type = genActualType(op1->TypeGet()); |
3451 | var_types op2Type = genActualType(op2->TypeGet()); |
3452 | |
3453 | assert(!op1->isUsedFromMemory()); |
3454 | assert(!op2->isUsedFromMemory()); |
3455 | |
3456 | genConsumeOperands(tree); |
3457 | |
3458 | emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); |
3459 | |
3460 | assert(genTypeSize(op1Type) == genTypeSize(op2Type)); |
3461 | |
3462 | if (varTypeIsFloating(op1Type)) |
3463 | { |
3464 | assert(varTypeIsFloating(op2Type)); |
3465 | assert(!op1->isContained()); |
3466 | assert(op1Type == op2Type); |
3467 | |
3468 | if (op2->IsIntegralConst(0)) |
3469 | { |
3470 | assert(op2->isContained()); |
3471 | emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); |
3472 | } |
3473 | else |
3474 | { |
3475 | assert(!op2->isContained()); |
3476 | emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); |
3477 | } |
3478 | } |
3479 | else |
3480 | { |
3481 | assert(!varTypeIsFloating(op2Type)); |
3482 | // We don't support swapping op1 and op2 to generate cmp reg, imm |
3483 | assert(!op1->isContainedIntOrIImmed()); |
3484 | |
3485 | instruction ins = tree->OperIs(GT_TEST_EQ, GT_TEST_NE) ? INS_tst : INS_cmp; |
3486 | |
3487 | if (op2->isContainedIntOrIImmed()) |
3488 | { |
3489 | GenTreeIntConCommon* intConst = op2->AsIntConCommon(); |
3490 | emit->emitIns_R_I(ins, cmpSize, op1->gtRegNum, intConst->IconValue()); |
3491 | } |
3492 | else |
3493 | { |
3494 | emit->emitIns_R_R(ins, cmpSize, op1->gtRegNum, op2->gtRegNum); |
3495 | } |
3496 | } |
3497 | |
3498 | // Are we evaluating this into a register? |
3499 | if (targetReg != REG_NA) |
3500 | { |
3501 | genSetRegToCond(targetReg, tree); |
3502 | genProduceReg(tree); |
3503 | } |
3504 | } |
3505 | |
3506 | //------------------------------------------------------------------------ |
3507 | // genCodeForJumpCompare: Generates code for jmpCompare statement. |
3508 | // |
3509 | // A GT_JCMP node is created when a comparison and conditional branch |
3510 | // can be executed in a single instruction. |
3511 | // |
3512 | // Arm64 has a few instructions with this behavior. |
3513 | // - cbz/cbnz -- Compare and branch register zero/not zero |
3514 | // - tbz/tbnz -- Test and branch register bit zero/not zero |
3515 | // |
3516 | // The cbz/cbnz supports the normal +/- 1MB branch range for conditional branches |
3517 | // The tbz/tbnz supports a smaller +/- 32KB branch range |
3518 | // |
3519 | // A GT_JCMP cbz/cbnz node is created when there is a GT_EQ or GT_NE |
3520 | // integer/unsigned comparison against #0 which is used by a GT_JTRUE |
3521 | // condition jump node. |
3522 | // |
3523 | // A GT_JCMP tbz/tbnz node is created when there is a GT_TEST_EQ or GT_TEST_NE |
3524 | // integer/unsigned comparison against against a mask with a single bit set |
3525 | // which is used by a GT_JTRUE condition jump node. |
3526 | // |
3527 | // This node is repsonsible for consuming the register, and emitting the |
3528 | // appropriate fused compare/test and branch instruction |
3529 | // |
3530 | // Two flags guide code generation |
3531 | // GTF_JCMP_TST -- Set if this is a tbz/tbnz rather than cbz/cbnz |
3532 | // GTF_JCMP_EQ -- Set if this is cbz/tbz rather than cbnz/tbnz |
3533 | // |
3534 | // Arguments: |
3535 | // tree - The GT_JCMP tree node. |
3536 | // |
3537 | // Return Value: |
3538 | // None |
3539 | // |
3540 | void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) |
3541 | { |
3542 | assert(compiler->compCurBB->bbJumpKind == BBJ_COND); |
3543 | |
3544 | GenTree* op1 = tree->gtGetOp1(); |
3545 | GenTree* op2 = tree->gtGetOp2(); |
3546 | |
3547 | assert(tree->OperIs(GT_JCMP)); |
3548 | assert(!varTypeIsFloating(tree)); |
3549 | assert(!op1->isUsedFromMemory()); |
3550 | assert(!op2->isUsedFromMemory()); |
3551 | assert(op2->IsCnsIntOrI()); |
3552 | assert(op2->isContained()); |
3553 | |
3554 | genConsumeOperands(tree); |
3555 | |
3556 | regNumber reg = op1->gtRegNum; |
3557 | emitAttr attr = emitActualTypeSize(op1->TypeGet()); |
3558 | |
3559 | if (tree->gtFlags & GTF_JCMP_TST) |
3560 | { |
3561 | ssize_t compareImm = op2->gtIntCon.IconValue(); |
3562 | |
3563 | assert(isPow2(compareImm)); |
3564 | |
3565 | instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz; |
3566 | int imm = genLog2((size_t)compareImm); |
3567 | |
3568 | getEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm); |
3569 | } |
3570 | else |
3571 | { |
3572 | assert(op2->IsIntegralConst(0)); |
3573 | |
3574 | instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_cbz : INS_cbnz; |
3575 | |
3576 | getEmitter()->emitIns_J_R(ins, attr, compiler->compCurBB->bbJumpDest, reg); |
3577 | } |
3578 | } |
3579 | |
3580 | int CodeGenInterface::genSPtoFPdelta() |
3581 | { |
3582 | int delta; |
3583 | |
3584 | // We place the saved frame pointer immediately above the outgoing argument space. |
3585 | delta = (int)compiler->lvaOutgoingArgSpaceSize; |
3586 | |
3587 | assert(delta >= 0); |
3588 | return delta; |
3589 | } |
3590 | |
3591 | //--------------------------------------------------------------------- |
3592 | // genTotalFrameSize - return the total size of the stack frame, including local size, |
3593 | // callee-saved register size, etc. |
3594 | // |
3595 | // Return value: |
3596 | // Total frame size |
3597 | // |
3598 | |
3599 | int CodeGenInterface::genTotalFrameSize() |
3600 | { |
3601 | // For varargs functions, we home all the incoming register arguments. They are not |
3602 | // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but |
3603 | // since we don't use "push" instructions to save them, we don't have to do the |
3604 | // save of these varargs register arguments as the first thing in the prolog. |
3605 | |
3606 | assert(!IsUninitialized(compiler->compCalleeRegsPushed)); |
3607 | |
3608 | int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) + |
3609 | compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; |
3610 | |
3611 | assert(totalFrameSize >= 0); |
3612 | return totalFrameSize; |
3613 | } |
3614 | |
3615 | //--------------------------------------------------------------------- |
3616 | // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. |
3617 | // This number is going to be negative, since the Caller-SP is at a higher |
3618 | // address than the frame pointer. |
3619 | // |
3620 | // There must be a frame pointer to call this function! |
3621 | |
3622 | int CodeGenInterface::genCallerSPtoFPdelta() |
3623 | { |
3624 | assert(isFramePointerUsed()); |
3625 | int callerSPtoFPdelta; |
3626 | |
3627 | callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); |
3628 | |
3629 | assert(callerSPtoFPdelta <= 0); |
3630 | return callerSPtoFPdelta; |
3631 | } |
3632 | |
3633 | //--------------------------------------------------------------------- |
3634 | // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. |
3635 | // |
3636 | // This number will be negative. |
3637 | |
3638 | int CodeGenInterface::genCallerSPtoInitialSPdelta() |
3639 | { |
3640 | int callerSPtoSPdelta = 0; |
3641 | |
3642 | callerSPtoSPdelta -= genTotalFrameSize(); |
3643 | |
3644 | assert(callerSPtoSPdelta <= 0); |
3645 | return callerSPtoSPdelta; |
3646 | } |
3647 | |
3648 | /***************************************************************************** |
3649 | * Emit a call to a helper function. |
3650 | * |
3651 | */ |
3652 | |
3653 | void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) |
3654 | { |
3655 | void* addr = nullptr; |
3656 | void* pAddr = nullptr; |
3657 | |
3658 | emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; |
3659 | addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); |
3660 | regNumber callTarget = REG_NA; |
3661 | |
3662 | if (addr == nullptr) |
3663 | { |
3664 | // This is call to a runtime helper. |
3665 | // adrp x, [reloc:rel page addr] |
3666 | // add x, x, [reloc:page offset] |
3667 | // ldr x, [x] |
3668 | // br x |
3669 | |
3670 | if (callTargetReg == REG_NA) |
3671 | { |
3672 | // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but |
3673 | // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. |
3674 | callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; |
3675 | } |
3676 | |
3677 | regMaskTP callTargetMask = genRegMask(callTargetReg); |
3678 | regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); |
3679 | |
3680 | // assert that all registers in callTargetMask are in the callKillSet |
3681 | noway_assert((callTargetMask & callKillSet) == callTargetMask); |
3682 | |
3683 | callTarget = callTargetReg; |
3684 | |
3685 | // adrp + add with relocations will be emitted |
3686 | getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); |
3687 | getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); |
3688 | callType = emitter::EC_INDIR_R; |
3689 | } |
3690 | |
3691 | getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, |
3692 | retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, |
3693 | gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */ |
3694 | callTarget, /* ireg */ |
3695 | REG_NA, 0, 0, /* xreg, xmul, disp */ |
3696 | false /* isJump */ |
3697 | ); |
3698 | |
3699 | regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); |
3700 | regSet.verifyRegistersUsed(killMask); |
3701 | } |
3702 | |
3703 | #ifdef FEATURE_SIMD |
3704 | |
3705 | //------------------------------------------------------------------------ |
3706 | // genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main |
3707 | // routine which in turn calls appropriate genSIMDIntrinsicXXX() routine. |
3708 | // |
3709 | // Arguments: |
3710 | // simdNode - The GT_SIMD node |
3711 | // |
3712 | // Return Value: |
3713 | // None. |
3714 | // |
3715 | // Notes: |
3716 | // Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and |
3717 | // a limited set of methods. |
3718 | // |
3719 | // TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp. |
3720 | void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) |
3721 | { |
3722 | // NYI for unsupported base types |
3723 | if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG && |
3724 | simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE && |
3725 | simdNode->gtSIMDBaseType != TYP_USHORT && simdNode->gtSIMDBaseType != TYP_UBYTE && |
3726 | simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE && |
3727 | simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG) |
3728 | { |
3729 | noway_assert(!"SIMD intrinsic with unsupported base type." ); |
3730 | } |
3731 | |
3732 | switch (simdNode->gtSIMDIntrinsicID) |
3733 | { |
3734 | case SIMDIntrinsicInit: |
3735 | genSIMDIntrinsicInit(simdNode); |
3736 | break; |
3737 | |
3738 | case SIMDIntrinsicInitN: |
3739 | genSIMDIntrinsicInitN(simdNode); |
3740 | break; |
3741 | |
3742 | case SIMDIntrinsicSqrt: |
3743 | case SIMDIntrinsicAbs: |
3744 | case SIMDIntrinsicCast: |
3745 | case SIMDIntrinsicConvertToSingle: |
3746 | case SIMDIntrinsicConvertToInt32: |
3747 | case SIMDIntrinsicConvertToDouble: |
3748 | case SIMDIntrinsicConvertToInt64: |
3749 | genSIMDIntrinsicUnOp(simdNode); |
3750 | break; |
3751 | |
3752 | case SIMDIntrinsicWidenLo: |
3753 | case SIMDIntrinsicWidenHi: |
3754 | genSIMDIntrinsicWiden(simdNode); |
3755 | break; |
3756 | |
3757 | case SIMDIntrinsicNarrow: |
3758 | genSIMDIntrinsicNarrow(simdNode); |
3759 | break; |
3760 | |
3761 | case SIMDIntrinsicAdd: |
3762 | case SIMDIntrinsicSub: |
3763 | case SIMDIntrinsicMul: |
3764 | case SIMDIntrinsicDiv: |
3765 | case SIMDIntrinsicBitwiseAnd: |
3766 | case SIMDIntrinsicBitwiseAndNot: |
3767 | case SIMDIntrinsicBitwiseOr: |
3768 | case SIMDIntrinsicBitwiseXor: |
3769 | case SIMDIntrinsicMin: |
3770 | case SIMDIntrinsicMax: |
3771 | case SIMDIntrinsicEqual: |
3772 | case SIMDIntrinsicLessThan: |
3773 | case SIMDIntrinsicGreaterThan: |
3774 | case SIMDIntrinsicLessThanOrEqual: |
3775 | case SIMDIntrinsicGreaterThanOrEqual: |
3776 | genSIMDIntrinsicBinOp(simdNode); |
3777 | break; |
3778 | |
3779 | case SIMDIntrinsicOpEquality: |
3780 | case SIMDIntrinsicOpInEquality: |
3781 | genSIMDIntrinsicRelOp(simdNode); |
3782 | break; |
3783 | |
3784 | case SIMDIntrinsicDotProduct: |
3785 | genSIMDIntrinsicDotProduct(simdNode); |
3786 | break; |
3787 | |
3788 | case SIMDIntrinsicGetItem: |
3789 | genSIMDIntrinsicGetItem(simdNode); |
3790 | break; |
3791 | |
3792 | case SIMDIntrinsicSetX: |
3793 | case SIMDIntrinsicSetY: |
3794 | case SIMDIntrinsicSetZ: |
3795 | case SIMDIntrinsicSetW: |
3796 | genSIMDIntrinsicSetItem(simdNode); |
3797 | break; |
3798 | |
3799 | case SIMDIntrinsicUpperSave: |
3800 | genSIMDIntrinsicUpperSave(simdNode); |
3801 | break; |
3802 | |
3803 | case SIMDIntrinsicUpperRestore: |
3804 | genSIMDIntrinsicUpperRestore(simdNode); |
3805 | break; |
3806 | |
3807 | case SIMDIntrinsicSelect: |
3808 | NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)" ); |
3809 | break; |
3810 | |
3811 | default: |
3812 | noway_assert(!"Unimplemented SIMD intrinsic." ); |
3813 | unreached(); |
3814 | } |
3815 | } |
3816 | |
3817 | insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) |
3818 | { |
3819 | assert((size == EA_16BYTE) || (size == EA_8BYTE)); |
3820 | insOpts result = INS_OPTS_NONE; |
3821 | |
3822 | switch (elementType) |
3823 | { |
3824 | case TYP_DOUBLE: |
3825 | case TYP_ULONG: |
3826 | case TYP_LONG: |
3827 | result = (size == EA_16BYTE) ? INS_OPTS_2D : INS_OPTS_1D; |
3828 | break; |
3829 | case TYP_FLOAT: |
3830 | case TYP_UINT: |
3831 | case TYP_INT: |
3832 | result = (size == EA_16BYTE) ? INS_OPTS_4S : INS_OPTS_2S; |
3833 | break; |
3834 | case TYP_USHORT: |
3835 | case TYP_SHORT: |
3836 | result = (size == EA_16BYTE) ? INS_OPTS_8H : INS_OPTS_4H; |
3837 | break; |
3838 | case TYP_UBYTE: |
3839 | case TYP_BYTE: |
3840 | result = (size == EA_16BYTE) ? INS_OPTS_16B : INS_OPTS_8B; |
3841 | break; |
3842 | default: |
3843 | assert(!"Unsupported element type" ); |
3844 | unreached(); |
3845 | } |
3846 | |
3847 | return result; |
3848 | } |
3849 | |
3850 | // getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic |
3851 | // |
3852 | // Arguments: |
3853 | // intrinsicId - SIMD intrinsic Id |
3854 | // baseType - Base type of the SIMD vector |
3855 | // immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode |
3856 | // |
3857 | // |
3858 | // Return Value: |
3859 | // Instruction (op) to be used, and immed is set if instruction requires an immediate operand. |
3860 | // |
3861 | instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) |
3862 | { |
3863 | instruction result = INS_invalid; |
3864 | if (varTypeIsFloating(baseType)) |
3865 | { |
3866 | switch (intrinsicId) |
3867 | { |
3868 | case SIMDIntrinsicAbs: |
3869 | result = INS_fabs; |
3870 | break; |
3871 | case SIMDIntrinsicAdd: |
3872 | result = INS_fadd; |
3873 | break; |
3874 | case SIMDIntrinsicBitwiseAnd: |
3875 | result = INS_and; |
3876 | break; |
3877 | case SIMDIntrinsicBitwiseAndNot: |
3878 | result = INS_bic; |
3879 | break; |
3880 | case SIMDIntrinsicBitwiseOr: |
3881 | result = INS_orr; |
3882 | break; |
3883 | case SIMDIntrinsicBitwiseXor: |
3884 | result = INS_eor; |
3885 | break; |
3886 | case SIMDIntrinsicCast: |
3887 | result = INS_mov; |
3888 | break; |
3889 | case SIMDIntrinsicConvertToInt32: |
3890 | case SIMDIntrinsicConvertToInt64: |
3891 | result = INS_fcvtns; |
3892 | break; |
3893 | case SIMDIntrinsicDiv: |
3894 | result = INS_fdiv; |
3895 | break; |
3896 | case SIMDIntrinsicEqual: |
3897 | result = INS_fcmeq; |
3898 | break; |
3899 | case SIMDIntrinsicGreaterThan: |
3900 | result = INS_fcmgt; |
3901 | break; |
3902 | case SIMDIntrinsicGreaterThanOrEqual: |
3903 | result = INS_fcmge; |
3904 | break; |
3905 | case SIMDIntrinsicLessThan: |
3906 | result = INS_fcmlt; |
3907 | break; |
3908 | case SIMDIntrinsicLessThanOrEqual: |
3909 | result = INS_fcmle; |
3910 | break; |
3911 | case SIMDIntrinsicMax: |
3912 | result = INS_fmax; |
3913 | break; |
3914 | case SIMDIntrinsicMin: |
3915 | result = INS_fmin; |
3916 | break; |
3917 | case SIMDIntrinsicMul: |
3918 | result = INS_fmul; |
3919 | break; |
3920 | case SIMDIntrinsicNarrow: |
3921 | // Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes |
3922 | // Return lower bytes instruction here |
3923 | result = INS_fcvtn; |
3924 | break; |
3925 | case SIMDIntrinsicSelect: |
3926 | result = INS_bsl; |
3927 | break; |
3928 | case SIMDIntrinsicSqrt: |
3929 | result = INS_fsqrt; |
3930 | break; |
3931 | case SIMDIntrinsicSub: |
3932 | result = INS_fsub; |
3933 | break; |
3934 | case SIMDIntrinsicWidenLo: |
3935 | result = INS_fcvtl; |
3936 | break; |
3937 | case SIMDIntrinsicWidenHi: |
3938 | result = INS_fcvtl2; |
3939 | break; |
3940 | default: |
3941 | assert(!"Unsupported SIMD intrinsic" ); |
3942 | unreached(); |
3943 | } |
3944 | } |
3945 | else |
3946 | { |
3947 | bool isUnsigned = varTypeIsUnsigned(baseType); |
3948 | |
3949 | switch (intrinsicId) |
3950 | { |
3951 | case SIMDIntrinsicAbs: |
3952 | assert(!isUnsigned); |
3953 | result = INS_abs; |
3954 | break; |
3955 | case SIMDIntrinsicAdd: |
3956 | result = INS_add; |
3957 | break; |
3958 | case SIMDIntrinsicBitwiseAnd: |
3959 | result = INS_and; |
3960 | break; |
3961 | case SIMDIntrinsicBitwiseAndNot: |
3962 | result = INS_bic; |
3963 | break; |
3964 | case SIMDIntrinsicBitwiseOr: |
3965 | result = INS_orr; |
3966 | break; |
3967 | case SIMDIntrinsicBitwiseXor: |
3968 | result = INS_eor; |
3969 | break; |
3970 | case SIMDIntrinsicCast: |
3971 | result = INS_mov; |
3972 | break; |
3973 | case SIMDIntrinsicConvertToDouble: |
3974 | case SIMDIntrinsicConvertToSingle: |
3975 | result = isUnsigned ? INS_ucvtf : INS_scvtf; |
3976 | break; |
3977 | case SIMDIntrinsicEqual: |
3978 | result = INS_cmeq; |
3979 | break; |
3980 | case SIMDIntrinsicGreaterThan: |
3981 | result = isUnsigned ? INS_cmhi : INS_cmgt; |
3982 | break; |
3983 | case SIMDIntrinsicGreaterThanOrEqual: |
3984 | result = isUnsigned ? INS_cmhs : INS_cmge; |
3985 | break; |
3986 | case SIMDIntrinsicLessThan: |
3987 | assert(!isUnsigned); |
3988 | result = INS_cmlt; |
3989 | break; |
3990 | case SIMDIntrinsicLessThanOrEqual: |
3991 | assert(!isUnsigned); |
3992 | result = INS_cmle; |
3993 | break; |
3994 | case SIMDIntrinsicMax: |
3995 | result = isUnsigned ? INS_umax : INS_smax; |
3996 | break; |
3997 | case SIMDIntrinsicMin: |
3998 | result = isUnsigned ? INS_umin : INS_smin; |
3999 | break; |
4000 | case SIMDIntrinsicMul: |
4001 | result = INS_mul; |
4002 | break; |
4003 | case SIMDIntrinsicNarrow: |
4004 | // Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes |
4005 | // Return lower bytes instruction here |
4006 | result = INS_xtn; |
4007 | break; |
4008 | case SIMDIntrinsicSelect: |
4009 | result = INS_bsl; |
4010 | break; |
4011 | case SIMDIntrinsicSub: |
4012 | result = INS_sub; |
4013 | break; |
4014 | case SIMDIntrinsicWidenLo: |
4015 | result = isUnsigned ? INS_uxtl : INS_sxtl; |
4016 | break; |
4017 | case SIMDIntrinsicWidenHi: |
4018 | result = isUnsigned ? INS_uxtl2 : INS_sxtl2; |
4019 | break; |
4020 | default: |
4021 | assert(!"Unsupported SIMD intrinsic" ); |
4022 | unreached(); |
4023 | } |
4024 | } |
4025 | |
4026 | noway_assert(result != INS_invalid); |
4027 | return result; |
4028 | } |
4029 | |
4030 | //------------------------------------------------------------------------ |
4031 | // genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. |
4032 | // |
4033 | // Arguments: |
4034 | // simdNode - The GT_SIMD node |
4035 | // |
4036 | // Return Value: |
4037 | // None. |
4038 | // |
4039 | void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) |
4040 | { |
4041 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit); |
4042 | |
4043 | GenTree* op1 = simdNode->gtGetOp1(); |
4044 | var_types baseType = simdNode->gtSIMDBaseType; |
4045 | regNumber targetReg = simdNode->gtRegNum; |
4046 | assert(targetReg != REG_NA); |
4047 | var_types targetType = simdNode->TypeGet(); |
4048 | |
4049 | genConsumeOperands(simdNode); |
4050 | regNumber op1Reg = op1->IsIntegralConst(0) ? REG_ZR : op1->gtRegNum; |
4051 | |
4052 | // TODO-ARM64-CQ Add LD1R to allow SIMDIntrinsicInit from contained memory |
4053 | // TODO-ARM64-CQ Add MOVI to allow SIMDIntrinsicInit from contained immediate small constants |
4054 | |
4055 | assert(op1->isContained() == op1->IsIntegralConst(0)); |
4056 | assert(!op1->isUsedFromMemory()); |
4057 | |
4058 | assert(genIsValidFloatReg(targetReg)); |
4059 | assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg)); |
4060 | |
4061 | emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
4062 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
4063 | |
4064 | if (genIsValidIntReg(op1Reg)) |
4065 | { |
4066 | getEmitter()->emitIns_R_R(INS_dup, attr, targetReg, op1Reg, opt); |
4067 | } |
4068 | else |
4069 | { |
4070 | getEmitter()->emitIns_R_R_I(INS_dup, attr, targetReg, op1Reg, 0, opt); |
4071 | } |
4072 | |
4073 | genProduceReg(simdNode); |
4074 | } |
4075 | |
4076 | //------------------------------------------------------------------------------------------- |
4077 | // genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes |
4078 | // a number of arguments equal to the length of the Vector. |
4079 | // |
4080 | // Arguments: |
4081 | // simdNode - The GT_SIMD node |
4082 | // |
4083 | // Return Value: |
4084 | // None. |
4085 | // |
4086 | void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) |
4087 | { |
4088 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN); |
4089 | |
4090 | regNumber targetReg = simdNode->gtRegNum; |
4091 | assert(targetReg != REG_NA); |
4092 | |
4093 | var_types targetType = simdNode->TypeGet(); |
4094 | |
4095 | var_types baseType = simdNode->gtSIMDBaseType; |
4096 | |
4097 | regNumber vectorReg = targetReg; |
4098 | |
4099 | if (varTypeIsFloating(baseType)) |
4100 | { |
4101 | // Note that we cannot use targetReg before consuming all float source operands. |
4102 | // Therefore use an internal temp register |
4103 | vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); |
4104 | } |
4105 | |
4106 | emitAttr baseTypeSize = emitTypeSize(baseType); |
4107 | |
4108 | // We will first consume the list items in execution (left to right) order, |
4109 | // and record the registers. |
4110 | regNumber operandRegs[FP_REGSIZE_BYTES]; |
4111 | unsigned initCount = 0; |
4112 | for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2()) |
4113 | { |
4114 | assert(list->OperGet() == GT_LIST); |
4115 | GenTree* listItem = list->gtGetOp1(); |
4116 | assert(listItem->TypeGet() == baseType); |
4117 | assert(!listItem->isContained()); |
4118 | regNumber operandReg = genConsumeReg(listItem); |
4119 | operandRegs[initCount] = operandReg; |
4120 | initCount++; |
4121 | } |
4122 | |
4123 | assert((initCount * baseTypeSize) <= simdNode->gtSIMDSize); |
4124 | |
4125 | if (initCount * baseTypeSize < EA_16BYTE) |
4126 | { |
4127 | getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B); |
4128 | } |
4129 | |
4130 | if (varTypeIsIntegral(baseType)) |
4131 | { |
4132 | for (unsigned i = 0; i < initCount; i++) |
4133 | { |
4134 | getEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i); |
4135 | } |
4136 | } |
4137 | else |
4138 | { |
4139 | for (unsigned i = 0; i < initCount; i++) |
4140 | { |
4141 | getEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0); |
4142 | } |
4143 | } |
4144 | |
4145 | // Load the initialized value. |
4146 | if (targetReg != vectorReg) |
4147 | { |
4148 | getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, vectorReg); |
4149 | } |
4150 | |
4151 | genProduceReg(simdNode); |
4152 | } |
4153 | |
4154 | //---------------------------------------------------------------------------------- |
4155 | // genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. |
4156 | // |
4157 | // Arguments: |
4158 | // simdNode - The GT_SIMD node |
4159 | // |
4160 | // Return Value: |
4161 | // None. |
4162 | // |
4163 | void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) |
4164 | { |
4165 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || |
4166 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs || |
4167 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || |
4168 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || |
4169 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || |
4170 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64); |
4171 | |
4172 | GenTree* op1 = simdNode->gtGetOp1(); |
4173 | var_types baseType = simdNode->gtSIMDBaseType; |
4174 | regNumber targetReg = simdNode->gtRegNum; |
4175 | assert(targetReg != REG_NA); |
4176 | var_types targetType = simdNode->TypeGet(); |
4177 | |
4178 | genConsumeOperands(simdNode); |
4179 | regNumber op1Reg = op1->gtRegNum; |
4180 | |
4181 | assert(genIsValidFloatReg(op1Reg)); |
4182 | assert(genIsValidFloatReg(targetReg)); |
4183 | |
4184 | instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); |
4185 | emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
4186 | insOpts opt = (ins == INS_mov) ? INS_OPTS_NONE : genGetSimdInsOpt(attr, baseType); |
4187 | |
4188 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt); |
4189 | |
4190 | genProduceReg(simdNode); |
4191 | } |
4192 | |
4193 | //-------------------------------------------------------------------------------- |
4194 | // genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations |
4195 | // |
4196 | // Arguments: |
4197 | // simdNode - The GT_SIMD node |
4198 | // |
4199 | // Notes: |
4200 | // The Widen intrinsics are broken into separate intrinsics for the two results. |
4201 | // |
4202 | void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) |
4203 | { |
4204 | assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) || |
4205 | (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi)); |
4206 | |
4207 | GenTree* op1 = simdNode->gtGetOp1(); |
4208 | var_types baseType = simdNode->gtSIMDBaseType; |
4209 | regNumber targetReg = simdNode->gtRegNum; |
4210 | assert(targetReg != REG_NA); |
4211 | var_types simdType = simdNode->TypeGet(); |
4212 | |
4213 | genConsumeOperands(simdNode); |
4214 | regNumber op1Reg = op1->gtRegNum; |
4215 | regNumber srcReg = op1Reg; |
4216 | emitAttr emitSize = emitActualTypeSize(simdType); |
4217 | |
4218 | instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); |
4219 | |
4220 | if (varTypeIsFloating(baseType)) |
4221 | { |
4222 | getEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg); |
4223 | } |
4224 | else |
4225 | { |
4226 | emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE; |
4227 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
4228 | |
4229 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt); |
4230 | } |
4231 | |
4232 | genProduceReg(simdNode); |
4233 | } |
4234 | |
4235 | //-------------------------------------------------------------------------------- |
4236 | // genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations |
4237 | // |
4238 | // Arguments: |
4239 | // simdNode - The GT_SIMD node |
4240 | // |
4241 | // Notes: |
4242 | // This intrinsic takes two arguments. The first operand is narrowed to produce the |
4243 | // lower elements of the results, and the second operand produces the high elements. |
4244 | // |
4245 | void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) |
4246 | { |
4247 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow); |
4248 | |
4249 | GenTree* op1 = simdNode->gtGetOp1(); |
4250 | GenTree* op2 = simdNode->gtGetOp2(); |
4251 | var_types baseType = simdNode->gtSIMDBaseType; |
4252 | regNumber targetReg = simdNode->gtRegNum; |
4253 | assert(targetReg != REG_NA); |
4254 | var_types simdType = simdNode->TypeGet(); |
4255 | emitAttr emitSize = emitTypeSize(simdType); |
4256 | |
4257 | genConsumeOperands(simdNode); |
4258 | regNumber op1Reg = op1->gtRegNum; |
4259 | regNumber op2Reg = op2->gtRegNum; |
4260 | |
4261 | assert(genIsValidFloatReg(op1Reg)); |
4262 | assert(genIsValidFloatReg(op2Reg)); |
4263 | assert(genIsValidFloatReg(targetReg)); |
4264 | assert(op2Reg != targetReg); |
4265 | assert(simdNode->gtSIMDSize == 16); |
4266 | |
4267 | instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); |
4268 | assert((ins == INS_fcvtn) || (ins == INS_xtn)); |
4269 | |
4270 | if (ins == INS_fcvtn) |
4271 | { |
4272 | getEmitter()->emitIns_R_R(INS_fcvtn, EA_8BYTE, targetReg, op1Reg); |
4273 | getEmitter()->emitIns_R_R(INS_fcvtn2, EA_8BYTE, targetReg, op2Reg); |
4274 | } |
4275 | else |
4276 | { |
4277 | insOpts opt = INS_OPTS_NONE; |
4278 | insOpts opt2 = INS_OPTS_NONE; |
4279 | |
4280 | // This is not the same as genGetSimdInsOpt() |
4281 | // Basetype is the soure operand type |
4282 | // However encoding is based on the destination operand type which is 1/2 the basetype. |
4283 | switch (baseType) |
4284 | { |
4285 | case TYP_ULONG: |
4286 | case TYP_LONG: |
4287 | opt = INS_OPTS_2S; |
4288 | opt2 = INS_OPTS_4S; |
4289 | break; |
4290 | case TYP_UINT: |
4291 | case TYP_INT: |
4292 | opt = INS_OPTS_4H; |
4293 | opt2 = INS_OPTS_8H; |
4294 | break; |
4295 | case TYP_USHORT: |
4296 | case TYP_SHORT: |
4297 | opt = INS_OPTS_8B; |
4298 | opt2 = INS_OPTS_16B; |
4299 | break; |
4300 | default: |
4301 | assert(!"Unsupported narrowing element type" ); |
4302 | unreached(); |
4303 | } |
4304 | getEmitter()->emitIns_R_R(INS_xtn, EA_8BYTE, targetReg, op1Reg, opt); |
4305 | getEmitter()->emitIns_R_R(INS_xtn2, EA_16BYTE, targetReg, op2Reg, opt2); |
4306 | } |
4307 | |
4308 | genProduceReg(simdNode); |
4309 | } |
4310 | |
4311 | //-------------------------------------------------------------------------------- |
4312 | // genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations |
4313 | // add, sub, mul, bit-wise And, AndNot and Or. |
4314 | // |
4315 | // Arguments: |
4316 | // simdNode - The GT_SIMD node |
4317 | // |
4318 | // Return Value: |
4319 | // None. |
4320 | // |
4321 | void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) |
4322 | { |
4323 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || |
4324 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || |
4325 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || |
4326 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || |
4327 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || |
4328 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin || |
4329 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || |
4330 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || |
4331 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan || |
4332 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual || |
4333 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual); |
4334 | |
4335 | GenTree* op1 = simdNode->gtGetOp1(); |
4336 | GenTree* op2 = simdNode->gtGetOp2(); |
4337 | var_types baseType = simdNode->gtSIMDBaseType; |
4338 | regNumber targetReg = simdNode->gtRegNum; |
4339 | assert(targetReg != REG_NA); |
4340 | var_types targetType = simdNode->TypeGet(); |
4341 | |
4342 | genConsumeOperands(simdNode); |
4343 | regNumber op1Reg = op1->gtRegNum; |
4344 | regNumber op2Reg = op2->gtRegNum; |
4345 | |
4346 | assert(genIsValidFloatReg(op1Reg)); |
4347 | assert(genIsValidFloatReg(op2Reg)); |
4348 | assert(genIsValidFloatReg(targetReg)); |
4349 | |
4350 | // TODO-ARM64-CQ Contain integer constants where posible |
4351 | |
4352 | instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); |
4353 | emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
4354 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
4355 | |
4356 | getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt); |
4357 | |
4358 | genProduceReg(simdNode); |
4359 | } |
4360 | |
4361 | //-------------------------------------------------------------------------------- |
4362 | // genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater |
4363 | // == and != |
4364 | // |
4365 | // Arguments: |
4366 | // simdNode - The GT_SIMD node |
4367 | // |
4368 | // Return Value: |
4369 | // None. |
4370 | // |
4371 | void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) |
4372 | { |
4373 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality || |
4374 | simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); |
4375 | |
4376 | GenTree* op1 = simdNode->gtGetOp1(); |
4377 | GenTree* op2 = simdNode->gtGetOp2(); |
4378 | var_types baseType = simdNode->gtSIMDBaseType; |
4379 | regNumber targetReg = simdNode->gtRegNum; |
4380 | var_types targetType = simdNode->TypeGet(); |
4381 | |
4382 | genConsumeOperands(simdNode); |
4383 | regNumber op1Reg = op1->gtRegNum; |
4384 | regNumber op2Reg = op2->gtRegNum; |
4385 | regNumber otherReg = op2Reg; |
4386 | |
4387 | instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType); |
4388 | emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
4389 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
4390 | |
4391 | // TODO-ARM64-CQ Contain integer constants where posible |
4392 | |
4393 | regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); |
4394 | |
4395 | getEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt); |
4396 | |
4397 | if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) |
4398 | { |
4399 | // For 12Byte vectors we must set upper bits to get correct comparison |
4400 | // We do not assume upper bits are zero. |
4401 | instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1); |
4402 | getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3); |
4403 | } |
4404 | |
4405 | getEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg, |
4406 | (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B); |
4407 | |
4408 | getEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0); |
4409 | |
4410 | if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality) |
4411 | { |
4412 | getEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1); |
4413 | } |
4414 | |
4415 | getEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1); |
4416 | |
4417 | genProduceReg(simdNode); |
4418 | } |
4419 | |
4420 | //-------------------------------------------------------------------------------- |
4421 | // genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. |
4422 | // |
4423 | // Arguments: |
4424 | // simdNode - The GT_SIMD node |
4425 | // |
4426 | // Return Value: |
4427 | // None. |
4428 | // |
4429 | void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) |
4430 | { |
4431 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct); |
4432 | |
4433 | GenTree* op1 = simdNode->gtGetOp1(); |
4434 | GenTree* op2 = simdNode->gtGetOp2(); |
4435 | var_types baseType = simdNode->gtSIMDBaseType; |
4436 | var_types simdType = op1->TypeGet(); |
4437 | |
4438 | regNumber targetReg = simdNode->gtRegNum; |
4439 | assert(targetReg != REG_NA); |
4440 | |
4441 | var_types targetType = simdNode->TypeGet(); |
4442 | assert(targetType == baseType); |
4443 | |
4444 | genConsumeOperands(simdNode); |
4445 | regNumber op1Reg = op1->gtRegNum; |
4446 | regNumber op2Reg = op2->gtRegNum; |
4447 | regNumber tmpReg = targetReg; |
4448 | |
4449 | if (!varTypeIsFloating(baseType)) |
4450 | { |
4451 | tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); |
4452 | } |
4453 | |
4454 | instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicMul, baseType); |
4455 | emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
4456 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
4457 | |
4458 | // Vector multiply |
4459 | getEmitter()->emitIns_R_R_R(ins, attr, tmpReg, op1Reg, op2Reg, opt); |
4460 | |
4461 | if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) |
4462 | { |
4463 | // For 12Byte vectors we must zero upper bits to get correct dot product |
4464 | // We do not assume upper bits are zero. |
4465 | getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpReg, REG_ZR, 3); |
4466 | } |
4467 | |
4468 | // Vector add horizontal |
4469 | if (varTypeIsFloating(baseType)) |
4470 | { |
4471 | if (baseType == TYP_FLOAT) |
4472 | { |
4473 | if (opt == INS_OPTS_4S) |
4474 | { |
4475 | getEmitter()->emitIns_R_R_R(INS_faddp, attr, tmpReg, tmpReg, tmpReg, INS_OPTS_4S); |
4476 | } |
4477 | getEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg); |
4478 | } |
4479 | else |
4480 | { |
4481 | getEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg); |
4482 | } |
4483 | } |
4484 | else |
4485 | { |
4486 | ins = varTypeIsUnsigned(baseType) ? INS_uaddlv : INS_saddlv; |
4487 | |
4488 | getEmitter()->emitIns_R_R(ins, attr, tmpReg, tmpReg, opt); |
4489 | |
4490 | // Mov to integer register |
4491 | if (varTypeIsUnsigned(baseType) || (genTypeSize(baseType) < 4)) |
4492 | { |
4493 | getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(baseType), targetReg, tmpReg, 0); |
4494 | } |
4495 | else |
4496 | { |
4497 | getEmitter()->emitIns_R_R_I(INS_smov, emitActualTypeSize(baseType), targetReg, tmpReg, 0); |
4498 | } |
4499 | } |
4500 | |
4501 | genProduceReg(simdNode); |
4502 | } |
4503 | |
4504 | //------------------------------------------------------------------------------------ |
4505 | // genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. |
4506 | // |
4507 | // Arguments: |
4508 | // simdNode - The GT_SIMD node |
4509 | // |
4510 | // Return Value: |
4511 | // None. |
4512 | // |
4513 | void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) |
4514 | { |
4515 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem); |
4516 | |
4517 | GenTree* op1 = simdNode->gtGetOp1(); |
4518 | GenTree* op2 = simdNode->gtGetOp2(); |
4519 | var_types simdType = op1->TypeGet(); |
4520 | assert(varTypeIsSIMD(simdType)); |
4521 | |
4522 | // op1 of TYP_SIMD12 should be considered as TYP_SIMD16 |
4523 | if (simdType == TYP_SIMD12) |
4524 | { |
4525 | simdType = TYP_SIMD16; |
4526 | } |
4527 | |
4528 | var_types baseType = simdNode->gtSIMDBaseType; |
4529 | regNumber targetReg = simdNode->gtRegNum; |
4530 | assert(targetReg != REG_NA); |
4531 | var_types targetType = simdNode->TypeGet(); |
4532 | assert(targetType == genActualType(baseType)); |
4533 | |
4534 | // GetItem has 2 operands: |
4535 | // - the source of SIMD type (op1) |
4536 | // - the index of the value to be returned. |
4537 | genConsumeOperands(simdNode); |
4538 | |
4539 | emitAttr baseTypeSize = emitTypeSize(baseType); |
4540 | unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize)); |
4541 | |
4542 | if (op2->IsCnsIntOrI()) |
4543 | { |
4544 | assert(op2->isContained()); |
4545 | |
4546 | ssize_t index = op2->gtIntCon.gtIconVal; |
4547 | |
4548 | // We only need to generate code for the get if the index is valid |
4549 | // If the index is invalid, previously generated for the range check will throw |
4550 | if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index)) |
4551 | { |
4552 | if (op1->isContained()) |
4553 | { |
4554 | int offset = (int)index * genTypeSize(baseType); |
4555 | instruction ins = ins_Load(baseType); |
4556 | baseTypeSize = varTypeIsFloating(baseType) |
4557 | ? baseTypeSize |
4558 | : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize); |
4559 | |
4560 | assert(!op1->isUsedFromReg()); |
4561 | |
4562 | if (op1->OperIsLocal()) |
4563 | { |
4564 | unsigned varNum = op1->gtLclVarCommon.gtLclNum; |
4565 | |
4566 | getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset); |
4567 | } |
4568 | else |
4569 | { |
4570 | assert(op1->OperGet() == GT_IND); |
4571 | |
4572 | GenTree* addr = op1->AsIndir()->Addr(); |
4573 | assert(!addr->isContained()); |
4574 | regNumber baseReg = addr->gtRegNum; |
4575 | |
4576 | // ldr targetReg, [baseReg, #offset] |
4577 | getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset); |
4578 | } |
4579 | } |
4580 | else |
4581 | { |
4582 | assert(op1->isUsedFromReg()); |
4583 | regNumber srcReg = op1->gtRegNum; |
4584 | |
4585 | instruction ins; |
4586 | if (varTypeIsFloating(baseType)) |
4587 | { |
4588 | assert(genIsValidFloatReg(targetReg)); |
4589 | // dup targetReg, srcReg[#index] |
4590 | ins = INS_dup; |
4591 | } |
4592 | else |
4593 | { |
4594 | assert(genIsValidIntReg(targetReg)); |
4595 | if (varTypeIsUnsigned(baseType) || (baseTypeSize == EA_8BYTE)) |
4596 | { |
4597 | // umov targetReg, srcReg[#index] |
4598 | ins = INS_umov; |
4599 | } |
4600 | else |
4601 | { |
4602 | // smov targetReg, srcReg[#index] |
4603 | ins = INS_smov; |
4604 | } |
4605 | } |
4606 | getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, srcReg, index); |
4607 | } |
4608 | } |
4609 | } |
4610 | else |
4611 | { |
4612 | assert(!op2->isContained()); |
4613 | |
4614 | regNumber baseReg = REG_NA; |
4615 | regNumber indexReg = op2->gtRegNum; |
4616 | |
4617 | if (op1->isContained()) |
4618 | { |
4619 | // Optimize the case of op1 is in memory and trying to access ith element. |
4620 | assert(!op1->isUsedFromReg()); |
4621 | if (op1->OperIsLocal()) |
4622 | { |
4623 | unsigned varNum = op1->gtLclVarCommon.gtLclNum; |
4624 | |
4625 | baseReg = simdNode->ExtractTempReg(); |
4626 | |
4627 | // Load the address of varNum |
4628 | getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0); |
4629 | } |
4630 | else |
4631 | { |
4632 | // Require GT_IND addr to be not contained. |
4633 | assert(op1->OperGet() == GT_IND); |
4634 | |
4635 | GenTree* addr = op1->AsIndir()->Addr(); |
4636 | assert(!addr->isContained()); |
4637 | |
4638 | baseReg = addr->gtRegNum; |
4639 | } |
4640 | } |
4641 | else |
4642 | { |
4643 | assert(op1->isUsedFromReg()); |
4644 | regNumber srcReg = op1->gtRegNum; |
4645 | |
4646 | unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; |
4647 | noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM); |
4648 | |
4649 | baseReg = simdNode->ExtractTempReg(); |
4650 | |
4651 | // Load the address of simdInitTempVarNum |
4652 | getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0); |
4653 | |
4654 | // Store the vector to simdInitTempVarNum |
4655 | getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg); |
4656 | } |
4657 | |
4658 | assert(genIsValidIntReg(indexReg)); |
4659 | assert(genIsValidIntReg(baseReg)); |
4660 | assert(baseReg != indexReg); |
4661 | |
4662 | // Load item at baseReg[index] |
4663 | getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL, |
4664 | baseTypeScale); |
4665 | } |
4666 | |
4667 | genProduceReg(simdNode); |
4668 | } |
4669 | |
4670 | //------------------------------------------------------------------------------------ |
4671 | // genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. |
4672 | // |
4673 | // Arguments: |
4674 | // simdNode - The GT_SIMD node |
4675 | // |
4676 | // Return Value: |
4677 | // None. |
4678 | // |
4679 | void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) |
4680 | { |
4681 | // Determine index based on intrinsic ID |
4682 | int index = -1; |
4683 | switch (simdNode->gtSIMDIntrinsicID) |
4684 | { |
4685 | case SIMDIntrinsicSetX: |
4686 | index = 0; |
4687 | break; |
4688 | case SIMDIntrinsicSetY: |
4689 | index = 1; |
4690 | break; |
4691 | case SIMDIntrinsicSetZ: |
4692 | index = 2; |
4693 | break; |
4694 | case SIMDIntrinsicSetW: |
4695 | index = 3; |
4696 | break; |
4697 | |
4698 | default: |
4699 | unreached(); |
4700 | } |
4701 | assert(index != -1); |
4702 | |
4703 | // op1 is the SIMD vector |
4704 | // op2 is the value to be set |
4705 | GenTree* op1 = simdNode->gtGetOp1(); |
4706 | GenTree* op2 = simdNode->gtGetOp2(); |
4707 | |
4708 | var_types baseType = simdNode->gtSIMDBaseType; |
4709 | regNumber targetReg = simdNode->gtRegNum; |
4710 | assert(targetReg != REG_NA); |
4711 | var_types targetType = simdNode->TypeGet(); |
4712 | assert(varTypeIsSIMD(targetType)); |
4713 | |
4714 | assert(op2->TypeGet() == baseType); |
4715 | assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType))); |
4716 | |
4717 | genConsumeOperands(simdNode); |
4718 | regNumber op1Reg = op1->gtRegNum; |
4719 | regNumber op2Reg = op2->gtRegNum; |
4720 | |
4721 | assert(genIsValidFloatReg(targetReg)); |
4722 | assert(genIsValidFloatReg(op1Reg)); |
4723 | assert(genIsValidIntReg(op2Reg) || genIsValidFloatReg(op2Reg)); |
4724 | assert(targetReg != op2Reg); |
4725 | |
4726 | emitAttr attr = emitTypeSize(baseType); |
4727 | |
4728 | // Insert mov if register assignment requires it |
4729 | getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, op1Reg); |
4730 | |
4731 | if (genIsValidIntReg(op2Reg)) |
4732 | { |
4733 | getEmitter()->emitIns_R_R_I(INS_ins, attr, targetReg, op2Reg, index); |
4734 | } |
4735 | else |
4736 | { |
4737 | getEmitter()->emitIns_R_R_I_I(INS_ins, attr, targetReg, op2Reg, index, 0); |
4738 | } |
4739 | |
4740 | genProduceReg(simdNode); |
4741 | } |
4742 | |
4743 | //----------------------------------------------------------------------------- |
4744 | // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to |
4745 | // the given register, if any, or to memory. |
4746 | // |
4747 | // Arguments: |
4748 | // simdNode - The GT_SIMD node |
4749 | // |
4750 | // Return Value: |
4751 | // None. |
4752 | // |
4753 | // Notes: |
4754 | // The upper half of all SIMD registers are volatile, even the callee-save registers. |
4755 | // When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic |
4756 | // to cause the upper half to be saved. It will first attempt to find another, unused, callee-save |
4757 | // register. If such a register cannot be found, it will save it to an available caller-save register. |
4758 | // In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 8 byte |
4759 | // value to the stack. (Note that if there are no caller-save registers available, the entire 16 byte |
4760 | // value will be spilled to the stack.) |
4761 | // |
4762 | void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) |
4763 | { |
4764 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave); |
4765 | |
4766 | GenTree* op1 = simdNode->gtGetOp1(); |
4767 | assert(op1->IsLocal()); |
4768 | assert(emitTypeSize(op1->TypeGet()) == 16); |
4769 | regNumber targetReg = simdNode->gtRegNum; |
4770 | regNumber op1Reg = genConsumeReg(op1); |
4771 | assert(op1Reg != REG_NA); |
4772 | assert(targetReg != REG_NA); |
4773 | getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, targetReg, op1Reg, 0, 1); |
4774 | |
4775 | genProduceReg(simdNode); |
4776 | } |
4777 | |
4778 | //----------------------------------------------------------------------------- |
4779 | // genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to |
4780 | // the given register, if any, or to memory. |
4781 | // |
4782 | // Arguments: |
4783 | // simdNode - The GT_SIMD node |
4784 | // |
4785 | // Return Value: |
4786 | // None. |
4787 | // |
4788 | // Notes: |
4789 | // For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always |
4790 | // have their home register, this node has its targetReg on the lclVar child, and its source |
4791 | // on the simdNode. |
4792 | // Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled |
4793 | // an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike |
4794 | // most spill scenarios, the saved tree will be different from the restored tree, but the spill |
4795 | // restore logic, which is triggered by the call to genConsumeReg, requires us to provide the |
4796 | // spilled tree (saveNode) in order to perform the reload. We can easily find that tree, |
4797 | // as it is in the spill descriptor for the register from which it was saved. |
4798 | // |
4799 | void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) |
4800 | { |
4801 | assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); |
4802 | |
4803 | GenTree* op1 = simdNode->gtGetOp1(); |
4804 | assert(op1->IsLocal()); |
4805 | assert(emitTypeSize(op1->TypeGet()) == 16); |
4806 | regNumber srcReg = simdNode->gtRegNum; |
4807 | regNumber lclVarReg = genConsumeReg(op1); |
4808 | unsigned varNum = op1->AsLclVarCommon()->gtLclNum; |
4809 | assert(lclVarReg != REG_NA); |
4810 | assert(srcReg != REG_NA); |
4811 | if (simdNode->gtFlags & GTF_SPILLED) |
4812 | { |
4813 | GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree; |
4814 | noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg)); |
4815 | genConsumeReg(saveNode); |
4816 | } |
4817 | getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, lclVarReg, srcReg, 1, 0); |
4818 | } |
4819 | |
4820 | //----------------------------------------------------------------------------- |
4821 | // genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. |
4822 | // Since Vector3 is not a hardware supported write size, it is performed |
4823 | // as two writes: 8 byte followed by 4-byte. |
4824 | // |
4825 | // Arguments: |
4826 | // treeNode - tree node that is attempting to store indirect |
4827 | // |
4828 | // |
4829 | // Return Value: |
4830 | // None. |
4831 | // |
4832 | void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) |
4833 | { |
4834 | assert(treeNode->OperGet() == GT_STOREIND); |
4835 | |
4836 | GenTree* addr = treeNode->gtOp.gtOp1; |
4837 | GenTree* data = treeNode->gtOp.gtOp2; |
4838 | |
4839 | // addr and data should not be contained. |
4840 | assert(!data->isContained()); |
4841 | assert(!addr->isContained()); |
4842 | |
4843 | #ifdef DEBUG |
4844 | // Should not require a write barrier |
4845 | GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); |
4846 | assert(writeBarrierForm == GCInfo::WBF_NoBarrier); |
4847 | #endif |
4848 | |
4849 | genConsumeOperands(treeNode->AsOp()); |
4850 | |
4851 | // Need an addtional integer register to extract upper 4 bytes from data. |
4852 | regNumber tmpReg = treeNode->GetSingleTempReg(); |
4853 | assert(tmpReg != addr->gtRegNum); |
4854 | |
4855 | // 8-byte write |
4856 | getEmitter()->emitIns_R_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum); |
4857 | |
4858 | // Extract upper 4-bytes from data |
4859 | getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->gtRegNum, 2); |
4860 | |
4861 | // 4-byte write |
4862 | getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addr->gtRegNum, 8); |
4863 | } |
4864 | |
4865 | //----------------------------------------------------------------------------- |
4866 | // genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value. |
4867 | // Since Vector3 is not a hardware supported write size, it is performed |
4868 | // as two loads: 8 byte followed by 4-byte. |
4869 | // |
4870 | // Arguments: |
4871 | // treeNode - tree node of GT_IND |
4872 | // |
4873 | // |
4874 | // Return Value: |
4875 | // None. |
4876 | // |
4877 | void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) |
4878 | { |
4879 | assert(treeNode->OperGet() == GT_IND); |
4880 | |
4881 | GenTree* addr = treeNode->gtOp.gtOp1; |
4882 | regNumber targetReg = treeNode->gtRegNum; |
4883 | |
4884 | assert(!addr->isContained()); |
4885 | |
4886 | regNumber operandReg = genConsumeReg(addr); |
4887 | |
4888 | // Need an addtional int register to read upper 4 bytes, which is different from targetReg |
4889 | regNumber tmpReg = treeNode->GetSingleTempReg(); |
4890 | |
4891 | // 8-byte read |
4892 | getEmitter()->emitIns_R_R(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, addr->gtRegNum); |
4893 | |
4894 | // 4-byte read |
4895 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->gtRegNum, 8); |
4896 | |
4897 | // Insert upper 4-bytes into data |
4898 | getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, targetReg, tmpReg, 2); |
4899 | |
4900 | genProduceReg(treeNode); |
4901 | } |
4902 | |
4903 | //----------------------------------------------------------------------------- |
4904 | // genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field. |
4905 | // Since Vector3 is not a hardware supported write size, it is performed |
4906 | // as two stores: 8 byte followed by 4-byte. |
4907 | // |
4908 | // Arguments: |
4909 | // treeNode - tree node that is attempting to store TYP_SIMD12 field |
4910 | // |
4911 | // Return Value: |
4912 | // None. |
4913 | // |
4914 | void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) |
4915 | { |
4916 | assert((treeNode->OperGet() == GT_STORE_LCL_FLD) || (treeNode->OperGet() == GT_STORE_LCL_VAR)); |
4917 | |
4918 | unsigned offs = 0; |
4919 | unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; |
4920 | assert(varNum < compiler->lvaCount); |
4921 | |
4922 | if (treeNode->OperGet() == GT_LCL_FLD) |
4923 | { |
4924 | offs = treeNode->gtLclFld.gtLclOffs; |
4925 | } |
4926 | |
4927 | GenTree* op1 = treeNode->gtOp.gtOp1; |
4928 | assert(!op1->isContained()); |
4929 | regNumber operandReg = genConsumeReg(op1); |
4930 | |
4931 | // Need an addtional integer register to extract upper 4 bytes from data. |
4932 | regNumber tmpReg = treeNode->GetSingleTempReg(); |
4933 | |
4934 | // store lower 8 bytes |
4935 | getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs); |
4936 | |
4937 | // Extract upper 4-bytes from data |
4938 | getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2); |
4939 | |
4940 | // 4-byte write |
4941 | getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offs + 8); |
4942 | } |
4943 | |
4944 | #endif // FEATURE_SIMD |
4945 | |
4946 | #ifdef FEATURE_HW_INTRINSICS |
4947 | #include "hwintrinsic.h" |
4948 | |
4949 | instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType) |
4950 | { |
4951 | NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; |
4952 | |
4953 | unsigned int instrTypeIndex = varTypeIsFloating(instrType) ? 0 : varTypeIsUnsigned(instrType) ? 2 : 1; |
4954 | |
4955 | instruction ins = HWIntrinsicInfo::lookup(intrinsicID).instrs[instrTypeIndex]; |
4956 | assert(ins != INS_invalid); |
4957 | |
4958 | return ins; |
4959 | } |
4960 | |
4961 | //------------------------------------------------------------------------ |
4962 | // genHWIntrinsic: Produce code for a GT_HWIntrinsic node. |
4963 | // |
4964 | // This is the main routine which in turn calls the genHWIntrinsicXXX() routines. |
4965 | // |
4966 | // Arguments: |
4967 | // node - the GT_HWIntrinsic node |
4968 | // |
4969 | // Return Value: |
4970 | // None. |
4971 | // |
4972 | void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) |
4973 | { |
4974 | NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; |
4975 | |
4976 | switch (HWIntrinsicInfo::lookup(intrinsicID).form) |
4977 | { |
4978 | case HWIntrinsicInfo::UnaryOp: |
4979 | genHWIntrinsicUnaryOp(node); |
4980 | break; |
4981 | case HWIntrinsicInfo::CrcOp: |
4982 | genHWIntrinsicCrcOp(node); |
4983 | break; |
4984 | case HWIntrinsicInfo::SimdBinaryOp: |
4985 | genHWIntrinsicSimdBinaryOp(node); |
4986 | break; |
4987 | case HWIntrinsicInfo::SimdExtractOp: |
4988 | genHWIntrinsicSimdExtractOp(node); |
4989 | break; |
4990 | case HWIntrinsicInfo::SimdInsertOp: |
4991 | genHWIntrinsicSimdInsertOp(node); |
4992 | break; |
4993 | case HWIntrinsicInfo::SimdSelectOp: |
4994 | genHWIntrinsicSimdSelectOp(node); |
4995 | break; |
4996 | case HWIntrinsicInfo::SimdSetAllOp: |
4997 | genHWIntrinsicSimdSetAllOp(node); |
4998 | break; |
4999 | case HWIntrinsicInfo::SimdUnaryOp: |
5000 | genHWIntrinsicSimdUnaryOp(node); |
5001 | break; |
5002 | case HWIntrinsicInfo::SimdBinaryRMWOp: |
5003 | genHWIntrinsicSimdBinaryRMWOp(node); |
5004 | break; |
5005 | case HWIntrinsicInfo::SimdTernaryRMWOp: |
5006 | genHWIntrinsicSimdTernaryRMWOp(node); |
5007 | break; |
5008 | case HWIntrinsicInfo::Sha1HashOp: |
5009 | genHWIntrinsicShaHashOp(node); |
5010 | break; |
5011 | case HWIntrinsicInfo::Sha1RotateOp: |
5012 | genHWIntrinsicShaRotateOp(node); |
5013 | break; |
5014 | |
5015 | default: |
5016 | NYI("HWIntrinsic form not implemented" ); |
5017 | } |
5018 | } |
5019 | |
5020 | //------------------------------------------------------------------------ |
5021 | // genHWIntrinsicUnaryOp: |
5022 | // |
5023 | // Produce code for a GT_HWIntrinsic node with form UnaryOp. |
5024 | // |
5025 | // Consumes one scalar operand produces a scalar |
5026 | // |
5027 | // Arguments: |
5028 | // node - the GT_HWIntrinsic node |
5029 | // |
5030 | // Return Value: |
5031 | // None. |
5032 | // |
5033 | void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node) |
5034 | { |
5035 | GenTree* op1 = node->gtGetOp1(); |
5036 | regNumber targetReg = node->gtRegNum; |
5037 | emitAttr attr = emitActualTypeSize(op1->TypeGet()); |
5038 | |
5039 | assert(targetReg != REG_NA); |
5040 | var_types targetType = node->TypeGet(); |
5041 | |
5042 | genConsumeOperands(node); |
5043 | |
5044 | regNumber op1Reg = op1->gtRegNum; |
5045 | |
5046 | instruction ins = getOpForHWIntrinsic(node, node->TypeGet()); |
5047 | |
5048 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg); |
5049 | |
5050 | genProduceReg(node); |
5051 | } |
5052 | |
5053 | //------------------------------------------------------------------------ |
5054 | // genHWIntrinsicCrcOp: |
5055 | // |
5056 | // Produce code for a GT_HWIntrinsic node with form CrcOp. |
5057 | // |
5058 | // Consumes two scalar operands and produces a scalar result |
5059 | // |
5060 | // This form differs from BinaryOp because the attr depends on the size of op2 |
5061 | // |
5062 | // Arguments: |
5063 | // node - the GT_HWIntrinsic node |
5064 | // |
5065 | // Return Value: |
5066 | // None. |
5067 | // |
5068 | void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node) |
5069 | { |
5070 | NYI("genHWIntrinsicCrcOp not implemented" ); |
5071 | } |
5072 | |
5073 | //------------------------------------------------------------------------ |
5074 | // genHWIntrinsicSimdBinaryOp: |
5075 | // |
5076 | // Produce code for a GT_HWIntrinsic node with form SimdBinaryOp. |
5077 | // |
5078 | // Consumes two SIMD operands and produces a SIMD result |
5079 | // |
5080 | // Arguments: |
5081 | // node - the GT_HWIntrinsic node |
5082 | // |
5083 | // Return Value: |
5084 | // None. |
5085 | // |
5086 | void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node) |
5087 | { |
5088 | GenTree* op1 = node->gtGetOp1(); |
5089 | GenTree* op2 = node->gtGetOp2(); |
5090 | var_types baseType = node->gtSIMDBaseType; |
5091 | regNumber targetReg = node->gtRegNum; |
5092 | |
5093 | assert(targetReg != REG_NA); |
5094 | var_types targetType = node->TypeGet(); |
5095 | |
5096 | genConsumeOperands(node); |
5097 | |
5098 | regNumber op1Reg = op1->gtRegNum; |
5099 | regNumber op2Reg = op2->gtRegNum; |
5100 | |
5101 | assert(genIsValidFloatReg(op1Reg)); |
5102 | assert(genIsValidFloatReg(op2Reg)); |
5103 | assert(genIsValidFloatReg(targetReg)); |
5104 | |
5105 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5106 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5107 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
5108 | |
5109 | getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt); |
5110 | |
5111 | genProduceReg(node); |
5112 | } |
5113 | |
5114 | //------------------------------------------------------------------------ |
5115 | // genHWIntrinsicSwitchTable: |
5116 | // |
5117 | // Generate code for an immediate switch table |
5118 | // |
5119 | // In cases where an instruction only supports const immediate operands, we |
5120 | // need to generate functionally correct code when the operand is not constant |
5121 | // |
5122 | // This is required by the HW Intrinsic design to handle indirect calls, such as: |
5123 | // debugger calls |
5124 | // reflection |
5125 | // call backs |
5126 | // |
5127 | // Generated code implements a switch of this form |
5128 | // |
5129 | // switch (swReg) |
5130 | // { |
5131 | // case 0: |
5132 | // ins0; // emitSwCase(0) |
5133 | // break; |
5134 | // case 1: |
5135 | // ins1; // emitSwCase(1) |
5136 | // break; |
5137 | // ... |
5138 | // ... |
5139 | // ... |
5140 | // case swMax - 1: |
5141 | // insLast; // emitSwCase(swMax - 1) |
5142 | // break; |
5143 | // default: |
5144 | // throw ArgumentOutOfRangeException |
5145 | // } |
5146 | // |
5147 | // Generated code looks like: |
5148 | // |
5149 | // cmp swReg, #swMax |
5150 | // b.hs ThrowArgumentOutOfRangeExceptionHelper |
5151 | // adr tmpReg, labelFirst |
5152 | // add tmpReg, tmpReg, swReg, LSL #3 |
5153 | // b [tmpReg] |
5154 | // labelFirst: |
5155 | // ins0 |
5156 | // b labelBreakTarget |
5157 | // ins1 |
5158 | // b labelBreakTarget |
5159 | // ... |
5160 | // ... |
5161 | // ... |
5162 | // insLast |
5163 | // b labelBreakTarget |
5164 | // labelBreakTarget: |
5165 | // |
5166 | // |
5167 | // Arguments: |
5168 | // swReg - register containing the switch case to execute |
5169 | // tmpReg - temporary integer register for calculating the switch indirect branch target |
5170 | // swMax - the number of switch cases. If swReg >= swMax throw SCK_ARG_RNG_EXCPN |
5171 | // emitSwCase - function like argument taking an immediate value and emitting one instruction |
5172 | // |
5173 | // Return Value: |
5174 | // None. |
5175 | // |
5176 | template <typename HWIntrinsicSwitchCaseBody> |
5177 | void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg, |
5178 | regNumber tmpReg, |
5179 | int swMax, |
5180 | HWIntrinsicSwitchCaseBody emitSwCase) |
5181 | { |
5182 | assert(swMax > 0); |
5183 | assert(swMax <= 256); |
5184 | |
5185 | assert(genIsValidIntReg(tmpReg)); |
5186 | assert(genIsValidIntReg(swReg)); |
5187 | |
5188 | BasicBlock* labelFirst = genCreateTempLabel(); |
5189 | BasicBlock* labelBreakTarget = genCreateTempLabel(); |
5190 | |
5191 | // Detect and throw out of range exception |
5192 | getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax); |
5193 | |
5194 | emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); |
5195 | genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN); |
5196 | |
5197 | // Calculate switch target |
5198 | labelFirst->bbFlags |= BBF_JMP_TARGET; |
5199 | |
5200 | // tmpReg = labelFirst |
5201 | getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg); |
5202 | |
5203 | // tmpReg = labelFirst + swReg * 8 |
5204 | getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL); |
5205 | |
5206 | // br tmpReg |
5207 | getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg); |
5208 | |
5209 | genDefineTempLabel(labelFirst); |
5210 | for (int i = 0; i < swMax; ++i) |
5211 | { |
5212 | unsigned prevInsCount = getEmitter()->emitInsCount; |
5213 | |
5214 | emitSwCase(i); |
5215 | |
5216 | assert(getEmitter()->emitInsCount == prevInsCount + 1); |
5217 | |
5218 | inst_JMP(EJ_jmp, labelBreakTarget); |
5219 | |
5220 | assert(getEmitter()->emitInsCount == prevInsCount + 2); |
5221 | } |
5222 | genDefineTempLabel(labelBreakTarget); |
5223 | } |
5224 | |
5225 | //------------------------------------------------------------------------ |
5226 | // genHWIntrinsicSimdExtractOp: |
5227 | // |
5228 | // Produce code for a GT_HWIntrinsic node with form SimdExtractOp. |
5229 | // |
5230 | // Consumes one SIMD operand and one scalar |
5231 | // |
5232 | // The element index operand is typically a const immediate |
5233 | // When it is not, a switch table is generated |
5234 | // |
5235 | // See genHWIntrinsicSwitchTable comments |
5236 | // |
5237 | // Arguments: |
5238 | // node - the GT_HWIntrinsic node |
5239 | // |
5240 | // Return Value: |
5241 | // None. |
5242 | // |
5243 | void CodeGen::(GenTreeHWIntrinsic* node) |
5244 | { |
5245 | GenTree* op1 = node->gtGetOp1(); |
5246 | GenTree* op2 = node->gtGetOp2(); |
5247 | var_types simdType = op1->TypeGet(); |
5248 | var_types targetType = node->TypeGet(); |
5249 | regNumber targetReg = node->gtRegNum; |
5250 | |
5251 | assert(targetReg != REG_NA); |
5252 | |
5253 | genConsumeOperands(node); |
5254 | |
5255 | regNumber op1Reg = op1->gtRegNum; |
5256 | |
5257 | assert(genIsValidFloatReg(op1Reg)); |
5258 | |
5259 | emitAttr baseTypeSize = emitTypeSize(targetType); |
5260 | |
5261 | int elements = emitTypeSize(simdType) / baseTypeSize; |
5262 | |
5263 | auto emitSwCase = [&](int element) { |
5264 | assert(element >= 0); |
5265 | assert(element < elements); |
5266 | |
5267 | if (varTypeIsFloating(targetType)) |
5268 | { |
5269 | assert(genIsValidFloatReg(targetReg)); |
5270 | getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element); |
5271 | } |
5272 | else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE)) |
5273 | { |
5274 | assert(genIsValidIntReg(targetReg)); |
5275 | getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element); |
5276 | } |
5277 | else |
5278 | { |
5279 | assert(genIsValidIntReg(targetReg)); |
5280 | getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element); |
5281 | } |
5282 | }; |
5283 | |
5284 | if (op2->isContainedIntOrIImmed()) |
5285 | { |
5286 | int element = (int)op2->AsIntConCommon()->IconValue(); |
5287 | |
5288 | emitSwCase(element); |
5289 | } |
5290 | else |
5291 | { |
5292 | regNumber elementReg = op2->gtRegNum; |
5293 | regNumber tmpReg = node->GetSingleTempReg(); |
5294 | |
5295 | genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase); |
5296 | } |
5297 | |
5298 | genProduceReg(node); |
5299 | } |
5300 | |
5301 | //------------------------------------------------------------------------ |
5302 | // genHWIntrinsicSimdInsertOp: |
5303 | // |
5304 | // Produce code for a GT_HWIntrinsic node with form SimdInsertOp. |
5305 | // |
5306 | // Consumes one SIMD operand and two scalars |
5307 | // |
5308 | // The element index operand is typically a const immediate |
5309 | // When it is not, a switch table is generated |
5310 | // |
5311 | // See genHWIntrinsicSwitchTable comments |
5312 | // |
5313 | // Arguments: |
5314 | // node - the GT_HWIntrinsic node |
5315 | // |
5316 | // Return Value: |
5317 | // None. |
5318 | // |
5319 | void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node) |
5320 | { |
5321 | GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); |
5322 | GenTree* op1 = argList->Current(); |
5323 | GenTree* op2 = argList->Rest()->Current(); |
5324 | GenTree* op3 = argList->Rest()->Rest()->Current(); |
5325 | var_types simdType = op1->TypeGet(); |
5326 | var_types baseType = node->gtSIMDBaseType; |
5327 | regNumber targetReg = node->gtRegNum; |
5328 | |
5329 | assert(targetReg != REG_NA); |
5330 | |
5331 | genConsumeRegs(op1); |
5332 | genConsumeRegs(op2); |
5333 | genConsumeRegs(op3); |
5334 | |
5335 | regNumber op1Reg = op1->gtRegNum; |
5336 | |
5337 | assert(genIsValidFloatReg(targetReg)); |
5338 | assert(genIsValidFloatReg(op1Reg)); |
5339 | |
5340 | emitAttr baseTypeSize = emitTypeSize(baseType); |
5341 | |
5342 | int elements = emitTypeSize(simdType) / baseTypeSize; |
5343 | |
5344 | if (targetReg != op1Reg) |
5345 | { |
5346 | getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg); |
5347 | } |
5348 | |
5349 | if (op3->isContained()) |
5350 | { |
5351 | // Handle vector element to vector element case |
5352 | // |
5353 | // If op3 is contained this is because lowering found an opportunity to contain a Simd.Extract in a Simd.Insert |
5354 | // |
5355 | regNumber op3Reg = op3->gtGetOp1()->gtRegNum; |
5356 | |
5357 | assert(genIsValidFloatReg(op3Reg)); |
5358 | |
5359 | // op3 containment currently only occurs when |
5360 | // + op3 is a Simd.Extract() (gtHWIntrinsicId == NI_ARM64_SIMD_GetItem) |
5361 | // + element & srcLane are immediate constants |
5362 | assert(op2->isContainedIntOrIImmed()); |
5363 | assert(op3->OperIs(GT_HWIntrinsic)); |
5364 | assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem); |
5365 | assert(op3->gtGetOp2()->isContainedIntOrIImmed()); |
5366 | |
5367 | int element = (int)op2->AsIntConCommon()->IconValue(); |
5368 | int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue(); |
5369 | |
5370 | // Emit mov targetReg[element], op3Reg[srcLane] |
5371 | getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane); |
5372 | } |
5373 | else |
5374 | { |
5375 | // Handle scalar to vector element case |
5376 | // TODO-ARM64-CQ handle containing op3 scalar const where possible |
5377 | regNumber op3Reg = op3->gtRegNum; |
5378 | |
5379 | auto emitSwCase = [&](int element) { |
5380 | assert(element >= 0); |
5381 | assert(element < elements); |
5382 | |
5383 | if (varTypeIsFloating(baseType)) |
5384 | { |
5385 | assert(genIsValidFloatReg(op3Reg)); |
5386 | getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0); |
5387 | } |
5388 | else |
5389 | { |
5390 | assert(genIsValidIntReg(op3Reg)); |
5391 | getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element); |
5392 | } |
5393 | }; |
5394 | |
5395 | if (op2->isContainedIntOrIImmed()) |
5396 | { |
5397 | int element = (int)op2->AsIntConCommon()->IconValue(); |
5398 | |
5399 | emitSwCase(element); |
5400 | } |
5401 | else |
5402 | { |
5403 | regNumber elementReg = op2->gtRegNum; |
5404 | regNumber tmpReg = node->GetSingleTempReg(); |
5405 | |
5406 | genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase); |
5407 | } |
5408 | } |
5409 | |
5410 | genProduceReg(node); |
5411 | } |
5412 | |
5413 | //------------------------------------------------------------------------ |
5414 | // genHWIntrinsicSimdSelectOp: |
5415 | // |
5416 | // Produce code for a GT_HWIntrinsic node with form SimdSelectOp. |
5417 | // |
5418 | // Consumes three SIMD operands and produces a SIMD result |
5419 | // |
5420 | // This intrinsic form requires one of the source registers to be the |
5421 | // destination register. Inserts a INS_mov if this requirement is not met. |
5422 | // |
5423 | // Arguments: |
5424 | // node - the GT_HWIntrinsic node |
5425 | // |
5426 | // Return Value: |
5427 | // None. |
5428 | // |
5429 | void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node) |
5430 | { |
5431 | GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); |
5432 | GenTree* op1 = argList->Current(); |
5433 | GenTree* op2 = argList->Rest()->Current(); |
5434 | GenTree* op3 = argList->Rest()->Rest()->Current(); |
5435 | var_types baseType = node->gtSIMDBaseType; |
5436 | regNumber targetReg = node->gtRegNum; |
5437 | |
5438 | assert(targetReg != REG_NA); |
5439 | var_types targetType = node->TypeGet(); |
5440 | |
5441 | genConsumeRegs(op1); |
5442 | genConsumeRegs(op2); |
5443 | genConsumeRegs(op3); |
5444 | |
5445 | regNumber op1Reg = op1->gtRegNum; |
5446 | regNumber op2Reg = op2->gtRegNum; |
5447 | regNumber op3Reg = op3->gtRegNum; |
5448 | |
5449 | assert(genIsValidFloatReg(op1Reg)); |
5450 | assert(genIsValidFloatReg(op2Reg)); |
5451 | assert(genIsValidFloatReg(op3Reg)); |
5452 | assert(genIsValidFloatReg(targetReg)); |
5453 | |
5454 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5455 | |
5456 | // Arm64 has three bit select forms; each uses three source registers |
5457 | // One of the sources is also the destination |
5458 | if (targetReg == op3Reg) |
5459 | { |
5460 | // op3 is target use bit insert if true |
5461 | // op3 = op3 ^ (op1 & (op2 ^ op3)) |
5462 | getEmitter()->emitIns_R_R_R(INS_bit, attr, op3Reg, op2Reg, op1Reg); |
5463 | } |
5464 | else if (targetReg == op2Reg) |
5465 | { |
5466 | // op2 is target use bit insert if false |
5467 | // op2 = op2 ^ (~op1 & (op2 ^ op3)) |
5468 | getEmitter()->emitIns_R_R_R(INS_bif, attr, op2Reg, op3Reg, op1Reg); |
5469 | } |
5470 | else |
5471 | { |
5472 | if (targetReg != op1Reg) |
5473 | { |
5474 | // target is not one of the sources, copy op1 to use bit select form |
5475 | getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg); |
5476 | } |
5477 | // use bit select |
5478 | // targetReg = op3 ^ (targetReg & (op2 ^ op3)) |
5479 | getEmitter()->emitIns_R_R_R(INS_bsl, attr, targetReg, op2Reg, op3Reg); |
5480 | } |
5481 | |
5482 | genProduceReg(node); |
5483 | } |
5484 | |
5485 | //------------------------------------------------------------------------ |
5486 | // genHWIntrinsicSimdSetAllOp: |
5487 | // |
5488 | // Produce code for a GT_HWIntrinsic node with form SimdSetAllOp. |
5489 | // |
5490 | // Consumes single scalar operand and produces a SIMD result |
5491 | // |
5492 | // Arguments: |
5493 | // node - the GT_HWIntrinsic node |
5494 | // |
5495 | // Return Value: |
5496 | // None. |
5497 | // |
5498 | void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node) |
5499 | { |
5500 | GenTree* op1 = node->gtGetOp1(); |
5501 | var_types baseType = node->gtSIMDBaseType; |
5502 | regNumber targetReg = node->gtRegNum; |
5503 | |
5504 | assert(targetReg != REG_NA); |
5505 | var_types targetType = node->TypeGet(); |
5506 | |
5507 | genConsumeOperands(node); |
5508 | |
5509 | regNumber op1Reg = op1->gtRegNum; |
5510 | |
5511 | assert(genIsValidFloatReg(targetReg)); |
5512 | assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg)); |
5513 | |
5514 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5515 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5516 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
5517 | |
5518 | // TODO-ARM64-CQ Support contained immediate cases |
5519 | |
5520 | if (genIsValidIntReg(op1Reg)) |
5521 | { |
5522 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt); |
5523 | } |
5524 | else |
5525 | { |
5526 | getEmitter()->emitIns_R_R_I(ins, attr, targetReg, op1Reg, 0, opt); |
5527 | } |
5528 | |
5529 | genProduceReg(node); |
5530 | } |
5531 | |
5532 | //------------------------------------------------------------------------ |
5533 | // genHWIntrinsicSimdUnaryOp: |
5534 | // |
5535 | // Produce code for a GT_HWIntrinsic node with form SimdUnaryOp. |
5536 | // |
5537 | // Consumes single SIMD operand and produces a SIMD result |
5538 | // |
5539 | // Arguments: |
5540 | // node - the GT_HWIntrinsic node |
5541 | // |
5542 | // Return Value: |
5543 | // None. |
5544 | // |
5545 | void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node) |
5546 | { |
5547 | GenTree* op1 = node->gtGetOp1(); |
5548 | var_types baseType = node->gtSIMDBaseType; |
5549 | regNumber targetReg = node->gtRegNum; |
5550 | |
5551 | assert(targetReg != REG_NA); |
5552 | var_types targetType = node->TypeGet(); |
5553 | |
5554 | genConsumeOperands(node); |
5555 | |
5556 | regNumber op1Reg = op1->gtRegNum; |
5557 | |
5558 | assert(genIsValidFloatReg(op1Reg)); |
5559 | assert(genIsValidFloatReg(targetReg)); |
5560 | |
5561 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5562 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5563 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
5564 | |
5565 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt); |
5566 | |
5567 | genProduceReg(node); |
5568 | } |
5569 | |
5570 | //------------------------------------------------------------------------ |
5571 | // genHWIntrinsicSimdBinaryRMWOp: |
5572 | // |
5573 | // Produce code for a GT_HWIntrinsic node with form SimdBinaryRMWOp. |
5574 | // |
5575 | // Consumes two SIMD operands and produces a SIMD result. |
5576 | // First operand is both source and destination. |
5577 | // |
5578 | // Arguments: |
5579 | // node - the GT_HWIntrinsic node |
5580 | // |
5581 | // Return Value: |
5582 | // None. |
5583 | // |
5584 | void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node) |
5585 | { |
5586 | GenTree* op1 = node->gtGetOp1(); |
5587 | GenTree* op2 = node->gtGetOp2(); |
5588 | var_types baseType = node->gtSIMDBaseType; |
5589 | regNumber targetReg = node->gtRegNum; |
5590 | |
5591 | assert(targetReg != REG_NA); |
5592 | |
5593 | genConsumeOperands(node); |
5594 | |
5595 | regNumber op1Reg = op1->gtRegNum; |
5596 | regNumber op2Reg = op2->gtRegNum; |
5597 | |
5598 | assert(genIsValidFloatReg(op1Reg)); |
5599 | assert(genIsValidFloatReg(op2Reg)); |
5600 | assert(genIsValidFloatReg(targetReg)); |
5601 | |
5602 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5603 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5604 | insOpts opt = genGetSimdInsOpt(attr, baseType); |
5605 | |
5606 | if (targetReg != op1Reg) |
5607 | { |
5608 | getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg); |
5609 | } |
5610 | getEmitter()->emitIns_R_R(ins, attr, targetReg, op2Reg, opt); |
5611 | |
5612 | genProduceReg(node); |
5613 | } |
5614 | |
5615 | //------------------------------------------------------------------------ |
5616 | // genHWIntrinsicSimdTernaryRMWOp: |
5617 | // |
5618 | // Produce code for a GT_HWIntrinsic node with form SimdTernaryRMWOp |
5619 | // |
5620 | // Consumes three SIMD operands and produces a SIMD result. |
5621 | // First operand is both source and destination. |
5622 | // |
5623 | // Arguments: |
5624 | // node - the GT_HWIntrinsic node |
5625 | // |
5626 | // Return Value: |
5627 | // None. |
5628 | // |
5629 | void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node) |
5630 | { |
5631 | GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); |
5632 | GenTree* op1 = argList->Current(); |
5633 | GenTree* op2 = argList->Rest()->Current(); |
5634 | GenTree* op3 = argList->Rest()->Rest()->Current(); |
5635 | var_types baseType = node->gtSIMDBaseType; |
5636 | regNumber targetReg = node->gtRegNum; |
5637 | |
5638 | assert(targetReg != REG_NA); |
5639 | var_types targetType = node->TypeGet(); |
5640 | |
5641 | genConsumeRegs(op1); |
5642 | genConsumeRegs(op2); |
5643 | genConsumeRegs(op3); |
5644 | |
5645 | regNumber op1Reg = op1->gtRegNum; |
5646 | regNumber op2Reg = op2->gtRegNum; |
5647 | regNumber op3Reg = op3->gtRegNum; |
5648 | |
5649 | assert(genIsValidFloatReg(op1Reg)); |
5650 | assert(genIsValidFloatReg(op2Reg)); |
5651 | assert(genIsValidFloatReg(op3Reg)); |
5652 | assert(genIsValidFloatReg(targetReg)); |
5653 | assert(targetReg != op2Reg); |
5654 | assert(targetReg != op3Reg); |
5655 | |
5656 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5657 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5658 | |
5659 | if (targetReg != op1Reg) |
5660 | { |
5661 | getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg); |
5662 | } |
5663 | |
5664 | getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); |
5665 | |
5666 | genProduceReg(node); |
5667 | } |
5668 | |
5669 | //------------------------------------------------------------------------ |
5670 | // genHWIntrinsicShaHashOp: |
5671 | // |
5672 | // Produce code for a GT_HWIntrinsic node with form Sha1HashOp. |
5673 | // Used in Arm64 SHA1 Hash operations. |
5674 | // |
5675 | // Consumes three operands and returns a Simd result. |
5676 | // First Simd operand is both source and destination. |
5677 | // Second Operand is an unsigned int. |
5678 | // Third operand is a simd operand. |
5679 | |
5680 | // Arguments: |
5681 | // node - the GT_HWIntrinsic node |
5682 | // |
5683 | // Return Value: |
5684 | // None. |
5685 | // |
5686 | void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node) |
5687 | { |
5688 | GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); |
5689 | GenTree* op1 = argList->Current(); |
5690 | GenTree* op2 = argList->Rest()->Current(); |
5691 | GenTree* op3 = argList->Rest()->Rest()->Current(); |
5692 | var_types baseType = node->gtSIMDBaseType; |
5693 | regNumber targetReg = node->gtRegNum; |
5694 | |
5695 | assert(targetReg != REG_NA); |
5696 | var_types targetType = node->TypeGet(); |
5697 | |
5698 | genConsumeRegs(op1); |
5699 | genConsumeRegs(op2); |
5700 | genConsumeRegs(op3); |
5701 | |
5702 | regNumber op1Reg = op1->gtRegNum; |
5703 | regNumber op2Reg = op2->gtRegNum; |
5704 | regNumber op3Reg = op3->gtRegNum; |
5705 | |
5706 | assert(genIsValidFloatReg(op1Reg)); |
5707 | assert(genIsValidFloatReg(op3Reg)); |
5708 | assert(targetReg != op2Reg); |
5709 | assert(targetReg != op3Reg); |
5710 | |
5711 | instruction ins = getOpForHWIntrinsic(node, baseType); |
5712 | emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; |
5713 | |
5714 | assert(genIsValidIntReg(op2Reg)); |
5715 | regNumber elementReg = op2->gtRegNum; |
5716 | regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT); |
5717 | |
5718 | getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg); |
5719 | |
5720 | if (targetReg != op1Reg) |
5721 | { |
5722 | getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg); |
5723 | } |
5724 | |
5725 | getEmitter()->emitIns_R_R_R(ins, attr, targetReg, tmpReg, op3Reg); |
5726 | |
5727 | genProduceReg(node); |
5728 | } |
5729 | |
5730 | //------------------------------------------------------------------------ |
5731 | // genHWIntrinsicShaRotateOp: |
5732 | // |
5733 | // Produce code for a GT_HWIntrinsic node with form Sha1RotateOp. |
5734 | // Used in Arm64 SHA1 Rotate operations. |
5735 | // |
5736 | // Consumes one integer operand and returns unsigned int result. |
5737 | // |
5738 | // Arguments: |
5739 | // node - the GT_HWIntrinsic node |
5740 | // |
5741 | // Return Value: |
5742 | // None. |
5743 | // |
5744 | void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node) |
5745 | { |
5746 | GenTree* op1 = node->gtGetOp1(); |
5747 | regNumber targetReg = node->gtRegNum; |
5748 | emitAttr attr = emitActualTypeSize(node); |
5749 | |
5750 | assert(targetReg != REG_NA); |
5751 | var_types targetType = node->TypeGet(); |
5752 | |
5753 | genConsumeOperands(node); |
5754 | |
5755 | instruction ins = getOpForHWIntrinsic(node, node->TypeGet()); |
5756 | regNumber elementReg = op1->gtRegNum; |
5757 | regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT); |
5758 | |
5759 | getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg); |
5760 | getEmitter()->emitIns_R_R(ins, EA_4BYTE, tmpReg, tmpReg); |
5761 | getEmitter()->emitIns_R_R(INS_fmov, attr, targetReg, tmpReg); |
5762 | |
5763 | genProduceReg(node); |
5764 | } |
5765 | |
5766 | #endif // FEATURE_HW_INTRINSICS |
5767 | |
5768 | /***************************************************************************** |
5769 | * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog |
5770 | * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late |
5771 | * disassembler thinks the instructions as the same as we do. |
5772 | */ |
5773 | |
5774 | // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. |
5775 | // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. |
5776 | //#define ALL_ARM64_EMITTER_UNIT_TESTS |
5777 | |
5778 | #if defined(DEBUG) |
5779 | void CodeGen::genArm64EmitterUnitTests() |
5780 | { |
5781 | if (!verbose) |
5782 | { |
5783 | return; |
5784 | } |
5785 | |
5786 | if (!compiler->opts.altJit) |
5787 | { |
5788 | // No point doing this in a "real" JIT. |
5789 | return; |
5790 | } |
5791 | |
5792 | // Mark the "fake" instructions in the output. |
5793 | printf("*************** In genArm64EmitterUnitTests()\n" ); |
5794 | |
5795 | emitter* theEmitter = getEmitter(); |
5796 | |
5797 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
5798 | // We use this: |
5799 | // genDefineTempLabel(genCreateTempLabel()); |
5800 | // to create artificial labels to help separate groups of tests. |
5801 | |
5802 | // |
5803 | // Loads/Stores basic general register |
5804 | // |
5805 | |
5806 | genDefineTempLabel(genCreateTempLabel()); |
5807 | |
5808 | // ldr/str Xt, [reg] |
5809 | theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9); |
5810 | theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); |
5811 | theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); |
5812 | theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9); |
5813 | theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); |
5814 | theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); |
5815 | |
5816 | // ldr/str Wt, [reg] |
5817 | theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9); |
5818 | theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); |
5819 | theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); |
5820 | theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9); |
5821 | theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); |
5822 | theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); |
5823 | |
5824 | theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt |
5825 | theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt |
5826 | theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt |
5827 | theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt |
5828 | theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt |
5829 | |
5830 | theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1); |
5831 | theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1); |
5832 | theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1); |
5833 | theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1); |
5834 | theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1); |
5835 | theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1); |
5836 | theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1); |
5837 | theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1); |
5838 | theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1); |
5839 | theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1); |
5840 | theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1); |
5841 | theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1); |
5842 | theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1); |
5843 | |
5844 | // SP and ZR tests |
5845 | theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1); |
5846 | theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1); |
5847 | theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1); |
5848 | |
5849 | // scaled |
5850 | theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1); |
5851 | theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2); |
5852 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4); |
5853 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8); |
5854 | |
5855 | // pre-/post-indexed (unscaled) |
5856 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); |
5857 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
5858 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); |
5859 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
5860 | |
5861 | // ldar/stlr Rt, [reg] |
5862 | theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8); |
5863 | theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10); |
5864 | theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11); |
5865 | theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12); |
5866 | |
5867 | theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8); |
5868 | theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13); |
5869 | theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14); |
5870 | theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15); |
5871 | |
5872 | // ldaxr Rt, [reg] |
5873 | theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8); |
5874 | theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10); |
5875 | theEmitter->emitIns_R_R(INS_ldaxrb, EA_4BYTE, REG_R5, REG_R11); |
5876 | theEmitter->emitIns_R_R(INS_ldaxrh, EA_4BYTE, REG_R5, REG_R12); |
5877 | |
5878 | // ldxr Rt, [reg] |
5879 | theEmitter->emitIns_R_R(INS_ldxr, EA_8BYTE, REG_R9, REG_R8); |
5880 | theEmitter->emitIns_R_R(INS_ldxr, EA_4BYTE, REG_R7, REG_R10); |
5881 | theEmitter->emitIns_R_R(INS_ldxrb, EA_4BYTE, REG_R5, REG_R11); |
5882 | theEmitter->emitIns_R_R(INS_ldxrh, EA_4BYTE, REG_R5, REG_R12); |
5883 | |
5884 | // stxr Ws, Rt, [reg] |
5885 | theEmitter->emitIns_R_R_R(INS_stxr, EA_8BYTE, REG_R1, REG_R9, REG_R8); |
5886 | theEmitter->emitIns_R_R_R(INS_stxr, EA_4BYTE, REG_R3, REG_R7, REG_R13); |
5887 | theEmitter->emitIns_R_R_R(INS_stxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); |
5888 | theEmitter->emitIns_R_R_R(INS_stxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); |
5889 | |
5890 | // stlxr Ws, Rt, [reg] |
5891 | theEmitter->emitIns_R_R_R(INS_stlxr, EA_8BYTE, REG_R1, REG_R9, REG_R8); |
5892 | theEmitter->emitIns_R_R_R(INS_stlxr, EA_4BYTE, REG_R3, REG_R7, REG_R13); |
5893 | theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); |
5894 | theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); |
5895 | |
5896 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
5897 | |
5898 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
5899 | // |
5900 | // Compares |
5901 | // |
5902 | |
5903 | genDefineTempLabel(genCreateTempLabel()); |
5904 | |
5905 | // cmp reg, reg |
5906 | theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9); |
5907 | theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9); |
5908 | |
5909 | // cmp reg, imm |
5910 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0); |
5911 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095); |
5912 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12); |
5913 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12); |
5914 | |
5915 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0); |
5916 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095); |
5917 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12); |
5918 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12); |
5919 | |
5920 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1); |
5921 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff); |
5922 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); |
5923 | theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL); |
5924 | |
5925 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1); |
5926 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff); |
5927 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); |
5928 | theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL); |
5929 | |
5930 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
5931 | |
5932 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
5933 | // R_R |
5934 | // |
5935 | |
5936 | genDefineTempLabel(genCreateTempLabel()); |
5937 | |
5938 | theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12); |
5939 | theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13); |
5940 | theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14); |
5941 | theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15); |
5942 | theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0); |
5943 | theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1); |
5944 | |
5945 | theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2); |
5946 | theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3); |
5947 | theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4); |
5948 | theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5); |
5949 | theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6); |
5950 | |
5951 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
5952 | |
5953 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
5954 | // |
5955 | // R_I |
5956 | // |
5957 | |
5958 | genDefineTempLabel(genCreateTempLabel()); |
5959 | |
5960 | // mov reg, imm(i16,hw) |
5961 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234); |
5962 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000); |
5963 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000); |
5964 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000); |
5965 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234); |
5966 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF); |
5967 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF); |
5968 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF); |
5969 | |
5970 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234); |
5971 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000); |
5972 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234); |
5973 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF); |
5974 | |
5975 | // mov reg, imm(N,r,s) |
5976 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000); |
5977 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666); |
5978 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000); |
5979 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555); |
5980 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003); |
5981 | theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707); |
5982 | |
5983 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0); |
5984 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666); |
5985 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000); |
5986 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555); |
5987 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003); |
5988 | theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707); |
5989 | |
5990 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003); |
5991 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000); |
5992 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666); |
5993 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707); |
5994 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000); |
5995 | theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555); |
5996 | |
5997 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003); |
5998 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0); |
5999 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666); |
6000 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707); |
6001 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000); |
6002 | theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555); |
6003 | |
6004 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6005 | |
6006 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6007 | // |
6008 | // R_R |
6009 | // |
6010 | |
6011 | genDefineTempLabel(genCreateTempLabel()); |
6012 | |
6013 | // tst reg, reg |
6014 | theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10); |
6015 | |
6016 | // mov reg, reg |
6017 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10); |
6018 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP); |
6019 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9); |
6020 | |
6021 | theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11); |
6022 | theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12); |
6023 | theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13); |
6024 | |
6025 | theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10); |
6026 | theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11); |
6027 | theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12); |
6028 | theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13); |
6029 | |
6030 | theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10); |
6031 | theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11); |
6032 | theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12); |
6033 | theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt |
6034 | theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt |
6035 | |
6036 | theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10); |
6037 | theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11); |
6038 | theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13); |
6039 | theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14); |
6040 | |
6041 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6042 | |
6043 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6044 | // |
6045 | // R_I_I |
6046 | // |
6047 | |
6048 | genDefineTempLabel(genCreateTempLabel()); |
6049 | |
6050 | // mov reg, imm(i16,hw) |
6051 | theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL); |
6052 | theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); |
6053 | |
6054 | theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); |
6055 | theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL); |
6056 | theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL); |
6057 | |
6058 | theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); |
6059 | theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL); |
6060 | theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL); |
6061 | |
6062 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6063 | |
6064 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6065 | // |
6066 | // R_R_I |
6067 | // |
6068 | |
6069 | genDefineTempLabel(genCreateTempLabel()); |
6070 | |
6071 | theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1); |
6072 | theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18); |
6073 | theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37); |
6074 | theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2); |
6075 | theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53); |
6076 | theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18); |
6077 | |
6078 | theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555); |
6079 | theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666); |
6080 | theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707); |
6081 | theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000); |
6082 | theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003); |
6083 | |
6084 | theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1); |
6085 | theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31); |
6086 | theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32); |
6087 | theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63); |
6088 | |
6089 | theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1); |
6090 | theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31); |
6091 | |
6092 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov |
6093 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1); |
6094 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1); |
6095 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff); |
6096 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff); |
6097 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000); |
6098 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000); |
6099 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6100 | theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6101 | |
6102 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov |
6103 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1); |
6104 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1); |
6105 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff); |
6106 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff); |
6107 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000); |
6108 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000); |
6109 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6110 | theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6111 | |
6112 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov |
6113 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1); |
6114 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1); |
6115 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff); |
6116 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff); |
6117 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000); |
6118 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000); |
6119 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6120 | theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6121 | |
6122 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov |
6123 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1); |
6124 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1); |
6125 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff); |
6126 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff); |
6127 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000); |
6128 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000); |
6129 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6130 | theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6131 | |
6132 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov |
6133 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1); |
6134 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1); |
6135 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff); |
6136 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff); |
6137 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000); |
6138 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000); |
6139 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6140 | theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6141 | |
6142 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov |
6143 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1); |
6144 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1); |
6145 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff); |
6146 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff); |
6147 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000); |
6148 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000); |
6149 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6150 | theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6151 | |
6152 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov |
6153 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1); |
6154 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1); |
6155 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff); |
6156 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff); |
6157 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000); |
6158 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000); |
6159 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6160 | theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6161 | |
6162 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov |
6163 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1); |
6164 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1); |
6165 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff); |
6166 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff); |
6167 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000); |
6168 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000); |
6169 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); |
6170 | theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); |
6171 | |
6172 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6173 | |
6174 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6175 | // |
6176 | // R_R_I cmp/txt |
6177 | // |
6178 | |
6179 | // cmp |
6180 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0); |
6181 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0); |
6182 | |
6183 | // CMP (shifted register) |
6184 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); |
6185 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); |
6186 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); |
6187 | |
6188 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); |
6189 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); |
6190 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); |
6191 | |
6192 | // TST (shifted register) |
6193 | theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); |
6194 | theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); |
6195 | theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); |
6196 | theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR); |
6197 | |
6198 | theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); |
6199 | theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); |
6200 | theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); |
6201 | theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR); |
6202 | |
6203 | // CMP (extended register) |
6204 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); |
6205 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH); |
6206 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis |
6207 | // disassembles this "cmp x8,x9", |
6208 | // which looks like an msdis issue. |
6209 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX); |
6210 | |
6211 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); |
6212 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH); |
6213 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW); |
6214 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX); |
6215 | |
6216 | // CMP 64-bit (extended register) and left shift |
6217 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB); |
6218 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); |
6219 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW); |
6220 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX); |
6221 | |
6222 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB); |
6223 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); |
6224 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW); |
6225 | theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX); |
6226 | |
6227 | // CMP 32-bit (extended register) and left shift |
6228 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); |
6229 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); |
6230 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW); |
6231 | |
6232 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); |
6233 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); |
6234 | theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW); |
6235 | |
6236 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6237 | |
6238 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6239 | // |
6240 | // R_R_R |
6241 | // |
6242 | |
6243 | genDefineTempLabel(genCreateTempLabel()); |
6244 | |
6245 | theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6246 | theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6247 | theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6248 | theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6249 | theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6250 | theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6251 | theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6252 | theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6253 | theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6254 | theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6255 | theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6256 | theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6257 | theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6258 | theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6259 | theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6260 | theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6261 | theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6262 | theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6263 | theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6264 | theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6265 | theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6266 | theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6267 | |
6268 | theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6269 | theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6270 | theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6271 | theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6272 | theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6273 | theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6274 | theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6275 | theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6276 | theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6277 | theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6278 | theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6279 | theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6280 | theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6281 | theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6282 | theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6283 | theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6284 | theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6285 | theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6286 | theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6287 | theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6288 | theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6289 | theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6290 | |
6291 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6292 | |
6293 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6294 | // |
6295 | // ARMv8.1 LSE Atomics |
6296 | // |
6297 | genDefineTempLabel(genCreateTempLabel()); |
6298 | |
6299 | theEmitter->emitIns_R_R_R(INS_casb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6300 | theEmitter->emitIns_R_R_R(INS_casab, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6301 | theEmitter->emitIns_R_R_R(INS_casalb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6302 | theEmitter->emitIns_R_R_R(INS_caslb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6303 | theEmitter->emitIns_R_R_R(INS_cash, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6304 | theEmitter->emitIns_R_R_R(INS_casah, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6305 | theEmitter->emitIns_R_R_R(INS_casalh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6306 | theEmitter->emitIns_R_R_R(INS_caslh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6307 | theEmitter->emitIns_R_R_R(INS_cas, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6308 | theEmitter->emitIns_R_R_R(INS_casa, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6309 | theEmitter->emitIns_R_R_R(INS_casal, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6310 | theEmitter->emitIns_R_R_R(INS_casl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6311 | theEmitter->emitIns_R_R_R(INS_cas, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6312 | theEmitter->emitIns_R_R_R(INS_casa, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6313 | theEmitter->emitIns_R_R_R(INS_casal, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6314 | theEmitter->emitIns_R_R_R(INS_casl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6315 | theEmitter->emitIns_R_R_R(INS_ldaddb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6316 | theEmitter->emitIns_R_R_R(INS_ldaddab, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6317 | theEmitter->emitIns_R_R_R(INS_ldaddalb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6318 | theEmitter->emitIns_R_R_R(INS_ldaddlb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6319 | theEmitter->emitIns_R_R_R(INS_ldaddh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6320 | theEmitter->emitIns_R_R_R(INS_ldaddah, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6321 | theEmitter->emitIns_R_R_R(INS_ldaddalh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6322 | theEmitter->emitIns_R_R_R(INS_ldaddlh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6323 | theEmitter->emitIns_R_R_R(INS_ldadd, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6324 | theEmitter->emitIns_R_R_R(INS_ldadda, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6325 | theEmitter->emitIns_R_R_R(INS_ldaddal, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6326 | theEmitter->emitIns_R_R_R(INS_ldaddl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6327 | theEmitter->emitIns_R_R_R(INS_ldadd, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6328 | theEmitter->emitIns_R_R_R(INS_ldadda, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6329 | theEmitter->emitIns_R_R_R(INS_ldaddal, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6330 | theEmitter->emitIns_R_R_R(INS_ldaddl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6331 | theEmitter->emitIns_R_R_R(INS_swpb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6332 | theEmitter->emitIns_R_R_R(INS_swpab, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6333 | theEmitter->emitIns_R_R_R(INS_swpalb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6334 | theEmitter->emitIns_R_R_R(INS_swplb, EA_1BYTE, REG_R8, REG_R9, REG_R10); |
6335 | theEmitter->emitIns_R_R_R(INS_swph, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6336 | theEmitter->emitIns_R_R_R(INS_swpah, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6337 | theEmitter->emitIns_R_R_R(INS_swpalh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6338 | theEmitter->emitIns_R_R_R(INS_swplh, EA_2BYTE, REG_R8, REG_R9, REG_R10); |
6339 | theEmitter->emitIns_R_R_R(INS_swp, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6340 | theEmitter->emitIns_R_R_R(INS_swpa, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6341 | theEmitter->emitIns_R_R_R(INS_swpal, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6342 | theEmitter->emitIns_R_R_R(INS_swpl, EA_4BYTE, REG_R8, REG_R9, REG_R10); |
6343 | theEmitter->emitIns_R_R_R(INS_swp, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6344 | theEmitter->emitIns_R_R_R(INS_swpa, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6345 | theEmitter->emitIns_R_R_R(INS_swpal, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6346 | theEmitter->emitIns_R_R_R(INS_swpl, EA_8BYTE, REG_R8, REG_R9, REG_R10); |
6347 | |
6348 | theEmitter->emitIns_R_R(INS_staddb, EA_1BYTE, REG_R8, REG_R10); |
6349 | theEmitter->emitIns_R_R(INS_staddlb, EA_1BYTE, REG_R8, REG_R10); |
6350 | theEmitter->emitIns_R_R(INS_staddh, EA_2BYTE, REG_R8, REG_R10); |
6351 | theEmitter->emitIns_R_R(INS_staddlh, EA_2BYTE, REG_R8, REG_R10); |
6352 | theEmitter->emitIns_R_R(INS_stadd, EA_4BYTE, REG_R8, REG_R10); |
6353 | theEmitter->emitIns_R_R(INS_staddl, EA_4BYTE, REG_R8, REG_R10); |
6354 | theEmitter->emitIns_R_R(INS_stadd, EA_8BYTE, REG_R8, REG_R10); |
6355 | theEmitter->emitIns_R_R(INS_staddl, EA_8BYTE, REG_R8, REG_R10); |
6356 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6357 | |
6358 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6359 | // |
6360 | // R_R_I_I |
6361 | // |
6362 | |
6363 | genDefineTempLabel(genCreateTempLabel()); |
6364 | |
6365 | theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39); |
6366 | theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23); |
6367 | theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7); |
6368 | |
6369 | theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37); |
6370 | theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21); |
6371 | theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5); |
6372 | |
6373 | theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24); |
6374 | theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16); |
6375 | theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8); |
6376 | |
6377 | theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19); |
6378 | theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13); |
6379 | theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7); |
6380 | |
6381 | theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17); |
6382 | theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11); |
6383 | theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5); |
6384 | |
6385 | theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14); |
6386 | theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9); |
6387 | theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8); |
6388 | |
6389 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6390 | |
6391 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6392 | // |
6393 | // R_R_R_I |
6394 | // |
6395 | |
6396 | genDefineTempLabel(genCreateTempLabel()); |
6397 | |
6398 | // ADD (extended register) |
6399 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); |
6400 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); |
6401 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); |
6402 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); |
6403 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); |
6404 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); |
6405 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); |
6406 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); |
6407 | |
6408 | // ADD (extended register) and left shift |
6409 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); |
6410 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); |
6411 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); |
6412 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); |
6413 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); |
6414 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); |
6415 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); |
6416 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); |
6417 | |
6418 | // ADD (shifted register) |
6419 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6420 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL); |
6421 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR); |
6422 | theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR); |
6423 | |
6424 | // EXTR (extract field from register pair) |
6425 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1); |
6426 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31); |
6427 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32); |
6428 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63); |
6429 | |
6430 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1); |
6431 | theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31); |
6432 | |
6433 | // SUB (extended register) |
6434 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); |
6435 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); |
6436 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); |
6437 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); |
6438 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); |
6439 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); |
6440 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); |
6441 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); |
6442 | |
6443 | // SUB (extended register) and left shift |
6444 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); |
6445 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); |
6446 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); |
6447 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); |
6448 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); |
6449 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); |
6450 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); |
6451 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); |
6452 | |
6453 | // SUB (shifted register) |
6454 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6455 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL); |
6456 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR); |
6457 | theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR); |
6458 | |
6459 | // bit operations |
6460 | theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6461 | theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6462 | theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6463 | theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6464 | theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6465 | theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6466 | theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6467 | theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6468 | |
6469 | theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); |
6470 | theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); |
6471 | theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); |
6472 | theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); |
6473 | theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); |
6474 | theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); |
6475 | theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); |
6476 | theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); |
6477 | |
6478 | theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6479 | theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6480 | theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6481 | theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6482 | theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6483 | theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6484 | theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6485 | theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6486 | |
6487 | theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); |
6488 | theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); |
6489 | theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); |
6490 | theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); |
6491 | theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); |
6492 | theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); |
6493 | theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); |
6494 | theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); |
6495 | |
6496 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6497 | |
6498 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6499 | // |
6500 | // R_R_R_I -- load/store pair |
6501 | // |
6502 | |
6503 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6504 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6505 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); |
6506 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); |
6507 | |
6508 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); |
6509 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); |
6510 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); |
6511 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); |
6512 | |
6513 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6514 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); |
6515 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); |
6516 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); |
6517 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); |
6518 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); |
6519 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); |
6520 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); |
6521 | |
6522 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); |
6523 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); |
6524 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); |
6525 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); |
6526 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); |
6527 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); |
6528 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); |
6529 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); |
6530 | |
6531 | theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); |
6532 | theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16); |
6533 | theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); |
6534 | theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); |
6535 | |
6536 | // SP and ZR tests |
6537 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); |
6538 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); |
6539 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); |
6540 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); |
6541 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX); |
6542 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX); |
6543 | |
6544 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6545 | |
6546 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6547 | // |
6548 | // R_R_R_Ext -- load/store shifted/extend |
6549 | // |
6550 | |
6551 | genDefineTempLabel(genCreateTempLabel()); |
6552 | |
6553 | // LDR (register) |
6554 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9); |
6555 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6556 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); |
6557 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6558 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); |
6559 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6560 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); |
6561 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6562 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); |
6563 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6564 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); |
6565 | |
6566 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9); |
6567 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6568 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); |
6569 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6570 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); |
6571 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6572 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); |
6573 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6574 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); |
6575 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6576 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); |
6577 | |
6578 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9); |
6579 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6580 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); |
6581 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6582 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); |
6583 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6584 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); |
6585 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6586 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); |
6587 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6588 | theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); |
6589 | |
6590 | theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9); |
6591 | theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6592 | theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6593 | theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6594 | theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6595 | |
6596 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9); |
6597 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6598 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); |
6599 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6600 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); |
6601 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6602 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); |
6603 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6604 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); |
6605 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6606 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); |
6607 | |
6608 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9); |
6609 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9); |
6610 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6611 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); |
6612 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6613 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); |
6614 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6615 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); |
6616 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6617 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); |
6618 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6619 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); |
6620 | |
6621 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9); |
6622 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9); |
6623 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6624 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6625 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6626 | theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6627 | |
6628 | // STR (register) |
6629 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9); |
6630 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6631 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); |
6632 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6633 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); |
6634 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6635 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); |
6636 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6637 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); |
6638 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6639 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); |
6640 | |
6641 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9); |
6642 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6643 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); |
6644 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6645 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); |
6646 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6647 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); |
6648 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6649 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); |
6650 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6651 | theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); |
6652 | |
6653 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9); |
6654 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); |
6655 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); |
6656 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6657 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); |
6658 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6659 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); |
6660 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6661 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); |
6662 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6663 | theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); |
6664 | |
6665 | theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9); |
6666 | theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); |
6667 | theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); |
6668 | theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); |
6669 | theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); |
6670 | |
6671 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6672 | |
6673 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6674 | // |
6675 | // R_R_R_R |
6676 | // |
6677 | |
6678 | genDefineTempLabel(genCreateTempLabel()); |
6679 | |
6680 | theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10); |
6681 | theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11); |
6682 | theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12); |
6683 | theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13); |
6684 | theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14); |
6685 | theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15); |
6686 | |
6687 | theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19); |
6688 | theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20); |
6689 | theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21); |
6690 | theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22); |
6691 | theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23); |
6692 | theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24); |
6693 | |
6694 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6695 | |
6696 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6697 | // R_COND |
6698 | // |
6699 | |
6700 | // cset reg, cond |
6701 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq |
6702 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne |
6703 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs |
6704 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo |
6705 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi |
6706 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl |
6707 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs |
6708 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc |
6709 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi |
6710 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls |
6711 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge |
6712 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt |
6713 | theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt |
6714 | theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le |
6715 | |
6716 | // csetm reg, cond |
6717 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq |
6718 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne |
6719 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs |
6720 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo |
6721 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi |
6722 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl |
6723 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs |
6724 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc |
6725 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi |
6726 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls |
6727 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge |
6728 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt |
6729 | theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt |
6730 | theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le |
6731 | |
6732 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6733 | |
6734 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6735 | // R_R_COND |
6736 | // |
6737 | |
6738 | // cinc reg, reg, cond |
6739 | // cinv reg, reg, cond |
6740 | // cneg reg, reg, cond |
6741 | theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq |
6742 | theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne |
6743 | theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs |
6744 | theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo |
6745 | theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi |
6746 | theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl |
6747 | theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs |
6748 | theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc |
6749 | theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi |
6750 | theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls |
6751 | theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge |
6752 | theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt |
6753 | theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt |
6754 | theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le |
6755 | |
6756 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6757 | |
6758 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6759 | // R_R_R_COND |
6760 | // |
6761 | |
6762 | // csel reg, reg, reg, cond |
6763 | // csinc reg, reg, reg, cond |
6764 | // csinv reg, reg, reg, cond |
6765 | // csneg reg, reg, reg, cond |
6766 | theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq |
6767 | theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne |
6768 | theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs |
6769 | theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo |
6770 | theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi |
6771 | theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl |
6772 | theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs |
6773 | theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc |
6774 | theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi |
6775 | theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls |
6776 | theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge |
6777 | theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt |
6778 | theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt |
6779 | theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le |
6780 | |
6781 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6782 | |
6783 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6784 | // R_R_FLAGS_COND |
6785 | // |
6786 | |
6787 | // ccmp reg1, reg2, nzcv, cond |
6788 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq |
6789 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne |
6790 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs |
6791 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo |
6792 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi |
6793 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl |
6794 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs |
6795 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc |
6796 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi |
6797 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls |
6798 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge |
6799 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt |
6800 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt |
6801 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le |
6802 | |
6803 | // ccmp reg1, imm, nzcv, cond |
6804 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq |
6805 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne |
6806 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs |
6807 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo |
6808 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi |
6809 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl |
6810 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs |
6811 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc |
6812 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi |
6813 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls |
6814 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge |
6815 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt |
6816 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt |
6817 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le |
6818 | |
6819 | // ccmp reg1, imm, nzcv, cond -- encoded as ccmn |
6820 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq |
6821 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne |
6822 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs |
6823 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo |
6824 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi |
6825 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl |
6826 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs |
6827 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc |
6828 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi |
6829 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls |
6830 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge |
6831 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt |
6832 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt |
6833 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le |
6834 | |
6835 | // ccmn reg1, reg2, nzcv, cond |
6836 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq |
6837 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne |
6838 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs |
6839 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo |
6840 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi |
6841 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl |
6842 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs |
6843 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc |
6844 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi |
6845 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls |
6846 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge |
6847 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt |
6848 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt |
6849 | theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le |
6850 | |
6851 | // ccmn reg1, imm, nzcv, cond |
6852 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq |
6853 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne |
6854 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs |
6855 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo |
6856 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi |
6857 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl |
6858 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs |
6859 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc |
6860 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi |
6861 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls |
6862 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge |
6863 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt |
6864 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt |
6865 | theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le |
6866 | |
6867 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6868 | |
6869 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6870 | // |
6871 | // Branch to register |
6872 | // |
6873 | |
6874 | genDefineTempLabel(genCreateTempLabel()); |
6875 | |
6876 | theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8); |
6877 | theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9); |
6878 | theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); |
6879 | theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); |
6880 | |
6881 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6882 | |
6883 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6884 | // |
6885 | // Misc |
6886 | // |
6887 | |
6888 | genDefineTempLabel(genCreateTempLabel()); |
6889 | |
6890 | theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); |
6891 | theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); |
6892 | |
6893 | theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD); |
6894 | theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST); |
6895 | theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH); |
6896 | |
6897 | theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD); |
6898 | theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST); |
6899 | theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH); |
6900 | |
6901 | theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD); |
6902 | theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST); |
6903 | theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH); |
6904 | |
6905 | theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD); |
6906 | theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST); |
6907 | theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY); |
6908 | |
6909 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
6910 | |
6911 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
6912 | //////////////////////////////////////////////////////////////////////////////// |
6913 | // |
6914 | // SIMD and Floating point |
6915 | // |
6916 | //////////////////////////////////////////////////////////////////////////////// |
6917 | |
6918 | // |
6919 | // Load/Stores vector register |
6920 | // |
6921 | |
6922 | genDefineTempLabel(genCreateTempLabel()); |
6923 | |
6924 | // ldr/str Vt, [reg] |
6925 | theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9); |
6926 | theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8); |
6927 | theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7); |
6928 | theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6); |
6929 | theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5); |
6930 | theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4); |
6931 | theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3); |
6932 | theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2); |
6933 | theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1); |
6934 | theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0); |
6935 | |
6936 | // ldr/str Vt, [reg+cns] -- scaled |
6937 | theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1); |
6938 | theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2); |
6939 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4); |
6940 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8); |
6941 | theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16); |
6942 | |
6943 | theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1); |
6944 | theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2); |
6945 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4); |
6946 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8); |
6947 | theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16); |
6948 | |
6949 | // ldr/str Vt, [reg],cns -- post-indexed (unscaled) |
6950 | // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled) |
6951 | theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6952 | theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6953 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6954 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6955 | theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6956 | |
6957 | theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6958 | theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6959 | theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6960 | theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6961 | theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6962 | |
6963 | theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6964 | theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6965 | theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6966 | theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6967 | theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); |
6968 | |
6969 | theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6970 | theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6971 | theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6972 | theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6973 | theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); |
6974 | |
6975 | theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2); |
6976 | theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3); |
6977 | theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5); |
6978 | theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9); |
6979 | theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17); |
6980 | |
6981 | theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2); |
6982 | theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3); |
6983 | theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5); |
6984 | theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9); |
6985 | theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17); |
6986 | |
6987 | // load/store pair |
6988 | theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10); |
6989 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0); |
6990 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8); |
6991 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24); |
6992 | |
6993 | theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP); |
6994 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0); |
6995 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4); |
6996 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12); |
6997 | |
6998 | theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10); |
6999 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0); |
7000 | theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16); |
7001 | theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48); |
7002 | |
7003 | theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10); |
7004 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0); |
7005 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8); |
7006 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16); |
7007 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX); |
7008 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX); |
7009 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX); |
7010 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX); |
7011 | |
7012 | theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10); |
7013 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0); |
7014 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4); |
7015 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8); |
7016 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX); |
7017 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX); |
7018 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX); |
7019 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX); |
7020 | |
7021 | theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10); |
7022 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0); |
7023 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16); |
7024 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32); |
7025 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX); |
7026 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX); |
7027 | theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX); |
7028 | theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX); |
7029 | |
7030 | // LDR (register) |
7031 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9); |
7032 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); |
7033 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3); |
7034 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); |
7035 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3); |
7036 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); |
7037 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3); |
7038 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); |
7039 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3); |
7040 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); |
7041 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3); |
7042 | |
7043 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9); |
7044 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); |
7045 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2); |
7046 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); |
7047 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2); |
7048 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); |
7049 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2); |
7050 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); |
7051 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2); |
7052 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); |
7053 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2); |
7054 | |
7055 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9); |
7056 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); |
7057 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4); |
7058 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); |
7059 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4); |
7060 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); |
7061 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4); |
7062 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); |
7063 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4); |
7064 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); |
7065 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4); |
7066 | |
7067 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9); |
7068 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); |
7069 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1); |
7070 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); |
7071 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1); |
7072 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); |
7073 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1); |
7074 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); |
7075 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1); |
7076 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); |
7077 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1); |
7078 | |
7079 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9); |
7080 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW); |
7081 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW); |
7082 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX); |
7083 | theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX); |
7084 | |
7085 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7086 | |
7087 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7088 | // |
7089 | // R_R mov and aliases for mov |
7090 | // |
7091 | |
7092 | // mov vector to vector |
7093 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1); |
7094 | theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3); |
7095 | |
7096 | theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13); |
7097 | theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15); |
7098 | theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17); |
7099 | |
7100 | // mov vector to general |
7101 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4); |
7102 | theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5); |
7103 | theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6); |
7104 | theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7); |
7105 | |
7106 | // mov general to vector |
7107 | theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4); |
7108 | theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5); |
7109 | theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6); |
7110 | theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7); |
7111 | |
7112 | // mov vector[index] to vector |
7113 | theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1); |
7114 | theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3); |
7115 | theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7); |
7116 | theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15); |
7117 | |
7118 | // mov to general from vector[index] |
7119 | theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1); |
7120 | theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2); |
7121 | theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3); |
7122 | theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4); |
7123 | |
7124 | // mov to vector[index] from general |
7125 | theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1); |
7126 | theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2); |
7127 | theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6); |
7128 | theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8); |
7129 | |
7130 | // mov vector[index] to vector[index2] |
7131 | theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0); |
7132 | theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1); |
7133 | theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2); |
7134 | theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3); |
7135 | |
7136 | ////////////////////////////////////////////////////////////////////////////////// |
7137 | |
7138 | // mov/dup scalar |
7139 | theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1); |
7140 | theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3); |
7141 | theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7); |
7142 | theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15); |
7143 | |
7144 | // mov/ins vector element |
7145 | theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1); |
7146 | theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2); |
7147 | theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3); |
7148 | theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4); |
7149 | |
7150 | // umov to general from vector element |
7151 | theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1); |
7152 | theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2); |
7153 | theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4); |
7154 | theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8); |
7155 | |
7156 | // ins to vector element from general |
7157 | theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1); |
7158 | theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3); |
7159 | theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7); |
7160 | theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15); |
7161 | |
7162 | // smov to general from vector element |
7163 | theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2); |
7164 | theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4); |
7165 | theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8); |
7166 | |
7167 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7168 | |
7169 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7170 | // |
7171 | // R_I movi and mvni |
7172 | // |
7173 | |
7174 | // movi imm8 (vector) |
7175 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B); |
7176 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B); |
7177 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B); |
7178 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B); |
7179 | |
7180 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H); |
7181 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8 |
7182 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H); |
7183 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8 |
7184 | |
7185 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S); |
7186 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8 |
7187 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16 |
7188 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24 |
7189 | |
7190 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8 |
7191 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16 |
7192 | |
7193 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S); |
7194 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8 |
7195 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16 |
7196 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24 |
7197 | |
7198 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8 |
7199 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16 |
7200 | |
7201 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni |
7202 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni |
7203 | |
7204 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni |
7205 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8 |
7206 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16 |
7207 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24 |
7208 | |
7209 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8 |
7210 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16 |
7211 | |
7212 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D); |
7213 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D); |
7214 | theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000); |
7215 | theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D); |
7216 | |
7217 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); |
7218 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 |
7219 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); |
7220 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 |
7221 | |
7222 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); |
7223 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 |
7224 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 |
7225 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 |
7226 | |
7227 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8 |
7228 | theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16 |
7229 | |
7230 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); |
7231 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 |
7232 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 |
7233 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 |
7234 | |
7235 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8 |
7236 | theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16 |
7237 | |
7238 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7239 | |
7240 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7241 | // |
7242 | // R_I orr/bic vector immediate |
7243 | // |
7244 | |
7245 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); |
7246 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 |
7247 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); |
7248 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 |
7249 | |
7250 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); |
7251 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 |
7252 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 |
7253 | theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 |
7254 | |
7255 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); |
7256 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 |
7257 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 |
7258 | theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 |
7259 | |
7260 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); |
7261 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 |
7262 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); |
7263 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 |
7264 | |
7265 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); |
7266 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 |
7267 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 |
7268 | theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 |
7269 | |
7270 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); |
7271 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 |
7272 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 |
7273 | theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 |
7274 | |
7275 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7276 | |
7277 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7278 | // |
7279 | // R_F cmp/fmov immediate |
7280 | // |
7281 | |
7282 | // fmov imm8 (scalar) |
7283 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0); |
7284 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0); |
7285 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0 |
7286 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0); |
7287 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0); |
7288 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value |
7289 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31); |
7290 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25); |
7291 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25); |
7292 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value |
7293 | theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125); |
7294 | |
7295 | // fmov imm8 (vector) |
7296 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S); |
7297 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S); |
7298 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S); |
7299 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D); |
7300 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S); |
7301 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S); |
7302 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D); |
7303 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S); |
7304 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S); |
7305 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D); |
7306 | theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S); |
7307 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S); |
7308 | theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D); |
7309 | |
7310 | // fcmp with 0.0 |
7311 | theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0); |
7312 | theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0); |
7313 | theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0); |
7314 | theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0); |
7315 | |
7316 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7317 | |
7318 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7319 | // |
7320 | // R_R fmov/fcmp/fcvt |
7321 | // |
7322 | |
7323 | // fmov to vector to vector |
7324 | theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2); |
7325 | theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3); |
7326 | |
7327 | // fmov to vector to general |
7328 | theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4); |
7329 | theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5); |
7330 | // using the optional conversion specifier |
7331 | theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE); |
7332 | theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE); |
7333 | |
7334 | // fmov to general to vector |
7335 | theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4); |
7336 | theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5); |
7337 | // using the optional conversion specifier |
7338 | theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D); |
7339 | theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S); |
7340 | |
7341 | // fcmp/fcmpe |
7342 | theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16); |
7343 | theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17); |
7344 | theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18); |
7345 | theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19); |
7346 | |
7347 | // fcvt |
7348 | theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double |
7349 | theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single |
7350 | |
7351 | theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S); |
7352 | theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D); |
7353 | |
7354 | theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H); |
7355 | theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H); |
7356 | |
7357 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7358 | |
7359 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7360 | // |
7361 | // R_R floating point conversions |
7362 | // |
7363 | |
7364 | // fcvtas scalar |
7365 | theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1); |
7366 | theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3); |
7367 | |
7368 | // fcvtas scalar to general |
7369 | theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7370 | theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7371 | theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7372 | theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7373 | |
7374 | // fcvtas vector |
7375 | theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7376 | theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7377 | theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7378 | |
7379 | // fcvtau scalar |
7380 | theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1); |
7381 | theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3); |
7382 | |
7383 | // fcvtau scalar to general |
7384 | theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7385 | theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7386 | theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7387 | theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7388 | |
7389 | // fcvtau vector |
7390 | theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7391 | theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7392 | theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7393 | |
7394 | //////////////////////////////////////////////////////////////////////////////// |
7395 | |
7396 | // fcvtms scalar |
7397 | theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1); |
7398 | theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3); |
7399 | |
7400 | // fcvtms scalar to general |
7401 | theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7402 | theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7403 | theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7404 | theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7405 | |
7406 | // fcvtms vector |
7407 | theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7408 | theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7409 | theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7410 | |
7411 | // fcvtmu scalar |
7412 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1); |
7413 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3); |
7414 | |
7415 | // fcvtmu scalar to general |
7416 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7417 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7418 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7419 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7420 | |
7421 | // fcvtmu vector |
7422 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7423 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7424 | theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7425 | |
7426 | //////////////////////////////////////////////////////////////////////////////// |
7427 | |
7428 | // fcvtns scalar |
7429 | theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1); |
7430 | theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3); |
7431 | |
7432 | // fcvtns scalar to general |
7433 | theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7434 | theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7435 | theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7436 | theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7437 | |
7438 | // fcvtns vector |
7439 | theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7440 | theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7441 | theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7442 | |
7443 | // fcvtnu scalar |
7444 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1); |
7445 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3); |
7446 | |
7447 | // fcvtnu scalar to general |
7448 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7449 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7450 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7451 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7452 | |
7453 | // fcvtnu vector |
7454 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7455 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7456 | theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7457 | |
7458 | //////////////////////////////////////////////////////////////////////////////// |
7459 | |
7460 | // fcvtps scalar |
7461 | theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1); |
7462 | theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3); |
7463 | |
7464 | // fcvtps scalar to general |
7465 | theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7466 | theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7467 | theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7468 | theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7469 | |
7470 | // fcvtps vector |
7471 | theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7472 | theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7473 | theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7474 | |
7475 | // fcvtpu scalar |
7476 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1); |
7477 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3); |
7478 | |
7479 | // fcvtpu scalar to general |
7480 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7481 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7482 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7483 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7484 | |
7485 | // fcvtpu vector |
7486 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7487 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7488 | theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7489 | |
7490 | //////////////////////////////////////////////////////////////////////////////// |
7491 | |
7492 | // fcvtzs scalar |
7493 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1); |
7494 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3); |
7495 | |
7496 | // fcvtzs scalar to general |
7497 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7498 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7499 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7500 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7501 | |
7502 | // fcvtzs vector |
7503 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7504 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7505 | theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7506 | |
7507 | // fcvtzu scalar |
7508 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1); |
7509 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3); |
7510 | |
7511 | // fcvtzu scalar to general |
7512 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); |
7513 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); |
7514 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); |
7515 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); |
7516 | |
7517 | // fcvtzu vector |
7518 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7519 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7520 | theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7521 | |
7522 | //////////////////////////////////////////////////////////////////////////////// |
7523 | |
7524 | // scvtf scalar |
7525 | theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1); |
7526 | theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3); |
7527 | |
7528 | // scvtf scalar from general |
7529 | theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); |
7530 | theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); |
7531 | theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); |
7532 | theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); |
7533 | |
7534 | // scvtf vector |
7535 | theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7536 | theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7537 | theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7538 | |
7539 | // ucvtf scalar |
7540 | theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1); |
7541 | theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3); |
7542 | |
7543 | // ucvtf scalar from general |
7544 | theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); |
7545 | theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); |
7546 | theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); |
7547 | theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); |
7548 | |
7549 | // ucvtf vector |
7550 | theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
7551 | theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
7552 | theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); |
7553 | |
7554 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7555 | |
7556 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7557 | // |
7558 | // R_R floating point operations, one dest, one source |
7559 | // |
7560 | |
7561 | // fabs scalar |
7562 | theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1); |
7563 | theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3); |
7564 | |
7565 | // fabs vector |
7566 | theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7567 | theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7568 | theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7569 | |
7570 | // fneg scalar |
7571 | theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1); |
7572 | theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3); |
7573 | |
7574 | // fneg vector |
7575 | theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7576 | theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7577 | theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7578 | |
7579 | // fsqrt scalar |
7580 | theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1); |
7581 | theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3); |
7582 | |
7583 | // fsqrt vector |
7584 | theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7585 | theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7586 | theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7587 | |
7588 | genDefineTempLabel(genCreateTempLabel()); |
7589 | |
7590 | // abs scalar |
7591 | theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3); |
7592 | |
7593 | // abs vector |
7594 | theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7595 | theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7596 | theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7597 | theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7598 | theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7599 | theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7600 | theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); |
7601 | |
7602 | // neg scalar |
7603 | theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3); |
7604 | |
7605 | // neg vector |
7606 | theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7607 | theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7608 | theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7609 | theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7610 | theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7611 | theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7612 | theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); |
7613 | |
7614 | // mvn vector |
7615 | theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5); |
7616 | theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B); |
7617 | theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9); |
7618 | theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B); |
7619 | |
7620 | // cnt vector |
7621 | theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B); |
7622 | theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B); |
7623 | |
7624 | // not vector (the same encoding as mvn) |
7625 | theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13); |
7626 | theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B); |
7627 | theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17); |
7628 | theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B); |
7629 | |
7630 | // cls vector |
7631 | theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7632 | theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7633 | theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7634 | theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7635 | theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7636 | theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7637 | |
7638 | // clz vector |
7639 | theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7640 | theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7641 | theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7642 | theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7643 | theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7644 | theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7645 | |
7646 | // rbit vector |
7647 | theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); |
7648 | theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); |
7649 | |
7650 | // rev16 vector |
7651 | theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); |
7652 | theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); |
7653 | |
7654 | // rev32 vector |
7655 | theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7656 | theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7657 | theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7658 | theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7659 | |
7660 | // rev64 vector |
7661 | theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7662 | theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7663 | theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7664 | theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7665 | theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7666 | theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7667 | |
7668 | // addv vector |
7669 | theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7670 | theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7671 | theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7672 | theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7673 | theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7674 | theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7675 | |
7676 | // saddlv vector |
7677 | theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7678 | theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7679 | theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7680 | theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7681 | theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7682 | theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7683 | |
7684 | // smaxlv vector |
7685 | theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7686 | theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7687 | theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7688 | theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7689 | theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7690 | theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7691 | |
7692 | // sminlv vector |
7693 | theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7694 | theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7695 | theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7696 | theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7697 | theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7698 | theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7699 | |
7700 | // uaddlv vector |
7701 | theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7702 | theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7703 | theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7704 | theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7705 | theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7706 | theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7707 | |
7708 | // umaxlv vector |
7709 | theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7710 | theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7711 | theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7712 | theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7713 | theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7714 | theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7715 | |
7716 | // uminlv vector |
7717 | theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); |
7718 | theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); |
7719 | theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); |
7720 | theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); |
7721 | theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); |
7722 | theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); |
7723 | |
7724 | // faddp scalar |
7725 | theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1); |
7726 | theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3); |
7727 | |
7728 | // INS_fcvtl |
7729 | theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1); |
7730 | |
7731 | // INS_fcvtl2 |
7732 | theEmitter->emitIns_R_R(INS_fcvtl2, EA_4BYTE, REG_V0, REG_V1); |
7733 | |
7734 | // INS_fcvtn |
7735 | theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1); |
7736 | |
7737 | // INS_fcvtn2 |
7738 | theEmitter->emitIns_R_R(INS_fcvtn2, EA_8BYTE, REG_V0, REG_V1); |
7739 | #endif |
7740 | |
7741 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7742 | // |
7743 | // R_R floating point round to int, one dest, one source |
7744 | // |
7745 | |
7746 | // frinta scalar |
7747 | theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1); |
7748 | theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3); |
7749 | |
7750 | // frinta vector |
7751 | theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7752 | theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7753 | theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7754 | |
7755 | // frinti scalar |
7756 | theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1); |
7757 | theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3); |
7758 | |
7759 | // frinti vector |
7760 | theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7761 | theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7762 | theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7763 | |
7764 | // frintm scalar |
7765 | theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1); |
7766 | theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3); |
7767 | |
7768 | // frintm vector |
7769 | theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7770 | theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7771 | theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7772 | |
7773 | // frintn scalar |
7774 | theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1); |
7775 | theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3); |
7776 | |
7777 | // frintn vector |
7778 | theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7779 | theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7780 | theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7781 | |
7782 | // frintp scalar |
7783 | theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1); |
7784 | theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3); |
7785 | |
7786 | // frintp vector |
7787 | theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7788 | theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7789 | theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7790 | |
7791 | // frintx scalar |
7792 | theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1); |
7793 | theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3); |
7794 | |
7795 | // frintx vector |
7796 | theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7797 | theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7798 | theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7799 | |
7800 | // frintz scalar |
7801 | theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1); |
7802 | theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3); |
7803 | |
7804 | // frintz vector |
7805 | theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); |
7806 | theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); |
7807 | theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); |
7808 | |
7809 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7810 | |
7811 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7812 | // |
7813 | // R_R_R floating point operations, one dest, two source |
7814 | // |
7815 | |
7816 | genDefineTempLabel(genCreateTempLabel()); |
7817 | |
7818 | theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7819 | theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7820 | theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7821 | theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7822 | theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7823 | |
7824 | theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7825 | theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7826 | theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7827 | theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7828 | theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7829 | |
7830 | theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7831 | theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7832 | theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7833 | theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7834 | theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7835 | |
7836 | theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7837 | theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7838 | theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7839 | theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7840 | theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7841 | |
7842 | theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7843 | theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7844 | theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7845 | theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7846 | theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7847 | |
7848 | // fabd |
7849 | theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7850 | theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7851 | theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7852 | theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7853 | theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7854 | |
7855 | genDefineTempLabel(genCreateTempLabel()); |
7856 | |
7857 | theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7858 | theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7859 | theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7860 | theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7861 | theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7862 | |
7863 | theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE |
7864 | theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE |
7865 | theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); |
7866 | theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); |
7867 | theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); |
7868 | |
7869 | theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7870 | theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7871 | theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
7872 | theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
7873 | theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
7874 | |
7875 | theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE |
7876 | theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE |
7877 | theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); |
7878 | theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); |
7879 | theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); |
7880 | |
7881 | theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE |
7882 | theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE |
7883 | |
7884 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
7885 | |
7886 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
7887 | // |
7888 | // R_R_I vector operations, one dest, one source reg, one immed |
7889 | // |
7890 | |
7891 | genDefineTempLabel(genCreateTempLabel()); |
7892 | |
7893 | // 'sshr' scalar |
7894 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1); |
7895 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14); |
7896 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27); |
7897 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40); |
7898 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63); |
7899 | |
7900 | // 'sshr' vector |
7901 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7902 | theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7903 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7904 | theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7905 | theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7906 | theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7907 | theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7908 | theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7909 | |
7910 | // 'ssra' scalar |
7911 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1); |
7912 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14); |
7913 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27); |
7914 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40); |
7915 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63); |
7916 | |
7917 | // 'ssra' vector |
7918 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7919 | theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7920 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7921 | theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7922 | theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7923 | theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7924 | theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7925 | theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7926 | |
7927 | // 'srshr' scalar |
7928 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1); |
7929 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14); |
7930 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27); |
7931 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40); |
7932 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63); |
7933 | |
7934 | // 'srshr' vector |
7935 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7936 | theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7937 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7938 | theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7939 | theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7940 | theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7941 | theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7942 | theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7943 | |
7944 | // 'srsra' scalar |
7945 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1); |
7946 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14); |
7947 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27); |
7948 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40); |
7949 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63); |
7950 | |
7951 | // 'srsra' vector |
7952 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7953 | theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7954 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7955 | theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7956 | theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7957 | theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7958 | theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7959 | theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7960 | |
7961 | // 'shl' scalar |
7962 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1); |
7963 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14); |
7964 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27); |
7965 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40); |
7966 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63); |
7967 | |
7968 | // 'shl' vector |
7969 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7970 | theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7971 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7972 | theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7973 | theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7974 | theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7975 | theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7976 | theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7977 | |
7978 | // 'ushr' scalar |
7979 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1); |
7980 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14); |
7981 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27); |
7982 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40); |
7983 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63); |
7984 | |
7985 | // 'ushr' vector |
7986 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
7987 | theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
7988 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
7989 | theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
7990 | theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
7991 | theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
7992 | theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
7993 | theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
7994 | |
7995 | // 'usra' scalar |
7996 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1); |
7997 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14); |
7998 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27); |
7999 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40); |
8000 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63); |
8001 | |
8002 | // 'usra' vector |
8003 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8004 | theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8005 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8006 | theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8007 | theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8008 | theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8009 | theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
8010 | theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
8011 | |
8012 | // 'urshr' scalar |
8013 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1); |
8014 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14); |
8015 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27); |
8016 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40); |
8017 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63); |
8018 | |
8019 | // 'urshr' vector |
8020 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8021 | theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8022 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8023 | theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8024 | theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8025 | theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8026 | theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
8027 | theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
8028 | |
8029 | // 'ursra' scalar |
8030 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1); |
8031 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14); |
8032 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27); |
8033 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40); |
8034 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63); |
8035 | |
8036 | // 'srsra' vector |
8037 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8038 | theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8039 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8040 | theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8041 | theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8042 | theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8043 | theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
8044 | theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
8045 | |
8046 | // 'sri' scalar |
8047 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1); |
8048 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14); |
8049 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27); |
8050 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40); |
8051 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63); |
8052 | |
8053 | // 'sri' vector |
8054 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8055 | theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8056 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8057 | theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8058 | theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8059 | theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8060 | theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
8061 | theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
8062 | |
8063 | // 'sli' scalar |
8064 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1); |
8065 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14); |
8066 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27); |
8067 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40); |
8068 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63); |
8069 | |
8070 | // 'sli' vector |
8071 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8072 | theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8073 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8074 | theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8075 | theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8076 | theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8077 | theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); |
8078 | theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); |
8079 | |
8080 | // 'sshll' vector |
8081 | theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8082 | theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8083 | theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8084 | theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8085 | theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8086 | theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8087 | |
8088 | // 'ushll' vector |
8089 | theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8090 | theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8091 | theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8092 | theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8093 | theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8094 | theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8095 | |
8096 | // 'shrn' vector |
8097 | theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8098 | theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8099 | theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8100 | theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8101 | theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8102 | theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8103 | |
8104 | // 'rshrn' vector |
8105 | theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); |
8106 | theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); |
8107 | theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); |
8108 | theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); |
8109 | theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); |
8110 | theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); |
8111 | |
8112 | // 'sxtl' vector |
8113 | theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); |
8114 | theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); |
8115 | theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); |
8116 | theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); |
8117 | theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
8118 | theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
8119 | |
8120 | // 'uxtl' vector |
8121 | theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); |
8122 | theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); |
8123 | theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); |
8124 | theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); |
8125 | theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); |
8126 | theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); |
8127 | |
8128 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8129 | |
8130 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8131 | // |
8132 | // R_R_R vector operations, one dest, two source |
8133 | // |
8134 | |
8135 | genDefineTempLabel(genCreateTempLabel()); |
8136 | |
8137 | // Specifying an Arrangement is optional |
8138 | // |
8139 | theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8); |
8140 | theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11); |
8141 | theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14); |
8142 | theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17); |
8143 | theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20); |
8144 | theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23); |
8145 | theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26); |
8146 | theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29); |
8147 | theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0); |
8148 | theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3); |
8149 | |
8150 | theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6); |
8151 | theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9); |
8152 | theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12); |
8153 | theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15); |
8154 | theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18); |
8155 | theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21); |
8156 | |
8157 | // Default Arrangement as per the ARM64 manual |
8158 | // |
8159 | theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B); |
8160 | theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B); |
8161 | theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B); |
8162 | theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B); |
8163 | theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); |
8164 | theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); |
8165 | theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B); |
8166 | theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B); |
8167 | theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B); |
8168 | theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B); |
8169 | |
8170 | theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); |
8171 | theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B); |
8172 | theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B); |
8173 | theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); |
8174 | theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B); |
8175 | theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B); |
8176 | |
8177 | genDefineTempLabel(genCreateTempLabel()); |
8178 | |
8179 | theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE |
8180 | theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B); |
8181 | theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8182 | theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S); |
8183 | theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B); |
8184 | theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H); |
8185 | theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S); |
8186 | theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D); |
8187 | |
8188 | theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE |
8189 | theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); |
8190 | theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H); |
8191 | theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S); |
8192 | theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); |
8193 | theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H); |
8194 | theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S); |
8195 | theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D); |
8196 | |
8197 | genDefineTempLabel(genCreateTempLabel()); |
8198 | |
8199 | // saba vector |
8200 | theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8201 | theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8202 | theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8203 | theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8204 | theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8205 | theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8206 | |
8207 | // sabd vector |
8208 | theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8209 | theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8210 | theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8211 | theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8212 | theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8213 | theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8214 | |
8215 | // uaba vector |
8216 | theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8217 | theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8218 | theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8219 | theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8220 | theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8221 | theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8222 | |
8223 | // uabd vector |
8224 | theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8225 | theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8226 | theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8227 | theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8228 | theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8229 | theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8230 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8231 | |
8232 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8233 | // smax vector |
8234 | theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8235 | theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8236 | theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8237 | theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8238 | theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8239 | theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8240 | |
8241 | // smin vector |
8242 | theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8243 | theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8244 | theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8245 | theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8246 | theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8247 | theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8248 | |
8249 | // umax vector |
8250 | theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8251 | theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8252 | theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8253 | theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8254 | theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8255 | theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8256 | |
8257 | // umin vector |
8258 | theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8259 | theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8260 | theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8261 | theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8262 | theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8263 | theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8264 | |
8265 | // cmeq vector |
8266 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8267 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8268 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8269 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8270 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8271 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8272 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8273 | theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8274 | |
8275 | // cmge vector |
8276 | theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8277 | theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8278 | theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8279 | theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8280 | theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8281 | theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8282 | theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8283 | theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8284 | |
8285 | // cmgt vector |
8286 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8287 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8288 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8289 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8290 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8291 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8292 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8293 | theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8294 | |
8295 | // cmhi vector |
8296 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8297 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8298 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8299 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8300 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8301 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8302 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8303 | theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8304 | |
8305 | // cmhs vector |
8306 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8307 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8308 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8309 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8310 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8311 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8312 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8313 | theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8314 | |
8315 | // ctst vector |
8316 | theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8317 | theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); |
8318 | theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); |
8319 | theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); |
8320 | theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8321 | theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8322 | theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); |
8323 | theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8324 | |
8325 | // faddp vector |
8326 | theEmitter->emitIns_R_R_R(INS_faddp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8327 | theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8328 | theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8329 | |
8330 | // fcmeq vector |
8331 | theEmitter->emitIns_R_R_R(INS_fcmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8332 | theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8333 | theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8334 | |
8335 | // fcmge vector |
8336 | theEmitter->emitIns_R_R_R(INS_fcmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8337 | theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8338 | theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8339 | |
8340 | // fcmgt vector |
8341 | theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); |
8342 | theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8343 | theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); |
8344 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8345 | |
8346 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8347 | // |
8348 | // R_R_R vector multiply |
8349 | // |
8350 | |
8351 | genDefineTempLabel(genCreateTempLabel()); |
8352 | |
8353 | theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); |
8354 | theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H); |
8355 | theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
8356 | theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B); |
8357 | theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H); |
8358 | theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); |
8359 | |
8360 | theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); |
8361 | theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); |
8362 | |
8363 | // 'mul' vector by elem |
8364 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); |
8365 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); |
8366 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); |
8367 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); |
8368 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); |
8369 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); |
8370 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); |
8371 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); |
8372 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); |
8373 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); |
8374 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); |
8375 | theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); |
8376 | |
8377 | // 'mla' vector by elem |
8378 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); |
8379 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); |
8380 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); |
8381 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); |
8382 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); |
8383 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); |
8384 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); |
8385 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); |
8386 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); |
8387 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); |
8388 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); |
8389 | theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); |
8390 | |
8391 | // 'mls' vector by elem |
8392 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); |
8393 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); |
8394 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); |
8395 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); |
8396 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); |
8397 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); |
8398 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); |
8399 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); |
8400 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); |
8401 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); |
8402 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); |
8403 | theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); |
8404 | |
8405 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8406 | |
8407 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8408 | // |
8409 | // R_R_R floating point operations, one source/dest, and two source |
8410 | // |
8411 | |
8412 | genDefineTempLabel(genCreateTempLabel()); |
8413 | |
8414 | theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
8415 | theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
8416 | theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
8417 | |
8418 | theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE |
8419 | theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE |
8420 | theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); |
8421 | theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); |
8422 | theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); |
8423 | |
8424 | theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); |
8425 | theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); |
8426 | theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); |
8427 | |
8428 | theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE |
8429 | theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE |
8430 | theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); |
8431 | theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); |
8432 | theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); |
8433 | |
8434 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8435 | |
8436 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8437 | // |
8438 | // R_R_R_R floating point operations, one dest, and three source |
8439 | // |
8440 | |
8441 | theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24); |
8442 | theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25); |
8443 | theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26); |
8444 | theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27); |
8445 | |
8446 | theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28); |
8447 | theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29); |
8448 | theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30); |
8449 | theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31); |
8450 | |
8451 | #endif |
8452 | |
8453 | #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |
8454 | |
8455 | BasicBlock* label = genCreateTempLabel(); |
8456 | genDefineTempLabel(label); |
8457 | instGen(INS_nop); |
8458 | instGen(INS_nop); |
8459 | instGen(INS_nop); |
8460 | instGen(INS_nop); |
8461 | theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0); |
8462 | |
8463 | #endif // ALL_ARM64_EMITTER_UNIT_TESTS |
8464 | |
8465 | printf("*************** End of genArm64EmitterUnitTests()\n" ); |
8466 | } |
8467 | #endif // defined(DEBUG) |
8468 | |
8469 | #endif // _TARGET_ARM64_ |
8470 | |