1/*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "qemu/osdep.h"
20
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
25#include "qemu/log.h"
26#include "arm_ldst.h"
27#include "translate.h"
28#include "internals.h"
29#include "qemu/host-utils.h"
30
31#include "hw/semihosting/semihost.h"
32#include "exec/gen-icount.h"
33
34#include "exec/helper-proto.h"
35#include "exec/helper-gen.h"
36#include "exec/log.h"
37
38#include "trace-tcg.h"
39#include "translate-a64.h"
40#include "qemu/atomic128.h"
41
42static TCGv_i64 cpu_X[32];
43static TCGv_i64 cpu_pc;
44
45/* Load/store exclusive handling */
46static TCGv_i64 cpu_exclusive_high;
47
48static const char *regnames[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
53};
54
55enum a64_shift_type {
56 A64_SHIFT_TYPE_LSL = 0,
57 A64_SHIFT_TYPE_LSR = 1,
58 A64_SHIFT_TYPE_ASR = 2,
59 A64_SHIFT_TYPE_ROR = 3
60};
61
62/* Table based decoder typedefs - used when the relevant bits for decode
63 * are too awkwardly scattered across the instruction (eg SIMD).
64 */
65typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
66
67typedef struct AArch64DecodeTable {
68 uint32_t pattern;
69 uint32_t mask;
70 AArch64DecodeFn *disas_fn;
71} AArch64DecodeTable;
72
73/* Function prototype for gen_ functions for calling Neon helpers */
74typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
75typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
76typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
77typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
78typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
79typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
80typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
81typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
82typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
83typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
84typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
85typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
86typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
87typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
88typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
89
90/* initialize TCG globals. */
91void a64_translate_init(void)
92{
93 int i;
94
95 cpu_pc = tcg_global_mem_new_i64(cpu_env,
96 offsetof(CPUARMState, pc),
97 "pc");
98 for (i = 0; i < 32; i++) {
99 cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
100 offsetof(CPUARMState, xregs[i]),
101 regnames[i]);
102 }
103
104 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
105 offsetof(CPUARMState, exclusive_high), "exclusive_high");
106}
107
108static inline int get_a64_user_mem_index(DisasContext *s)
109{
110 /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
111 * if EL1, access as if EL0; otherwise access at current EL
112 */
113 ARMMMUIdx useridx;
114
115 switch (s->mmu_idx) {
116 case ARMMMUIdx_S12NSE1:
117 useridx = ARMMMUIdx_S12NSE0;
118 break;
119 case ARMMMUIdx_S1SE1:
120 useridx = ARMMMUIdx_S1SE0;
121 break;
122 case ARMMMUIdx_S2NS:
123 g_assert_not_reached();
124 default:
125 useridx = s->mmu_idx;
126 break;
127 }
128 return arm_to_core_mmu_idx(useridx);
129}
130
131static void reset_btype(DisasContext *s)
132{
133 if (s->btype != 0) {
134 TCGv_i32 zero = tcg_const_i32(0);
135 tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
136 tcg_temp_free_i32(zero);
137 s->btype = 0;
138 }
139}
140
141static void set_btype(DisasContext *s, int val)
142{
143 TCGv_i32 tcg_val;
144
145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
146 tcg_debug_assert(val >= 1 && val <= 3);
147
148 tcg_val = tcg_const_i32(val);
149 tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
150 tcg_temp_free_i32(tcg_val);
151 s->btype = -1;
152}
153
154void gen_a64_set_pc_im(uint64_t val)
155{
156 tcg_gen_movi_i64(cpu_pc, val);
157}
158
159/*
160 * Handle Top Byte Ignore (TBI) bits.
161 *
162 * If address tagging is enabled via the TCR TBI bits:
163 * + for EL2 and EL3 there is only one TBI bit, and if it is set
164 * then the address is zero-extended, clearing bits [63:56]
165 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
166 * and TBI1 controls addressses with bit 55 == 1.
167 * If the appropriate TBI bit is set for the address then
168 * the address is sign-extended from bit 55 into bits [63:56]
169 *
170 * Here We have concatenated TBI{1,0} into tbi.
171 */
172static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
173 TCGv_i64 src, int tbi)
174{
175 if (tbi == 0) {
176 /* Load unmodified address */
177 tcg_gen_mov_i64(dst, src);
178 } else if (s->current_el >= 2) {
179 /* FIXME: ARMv8.1-VHE S2 translation regime. */
180 /* Force tag byte to all zero */
181 tcg_gen_extract_i64(dst, src, 0, 56);
182 } else {
183 /* Sign-extend from bit 55. */
184 tcg_gen_sextract_i64(dst, src, 0, 56);
185
186 if (tbi != 3) {
187 TCGv_i64 tcg_zero = tcg_const_i64(0);
188
189 /*
190 * The two TBI bits differ.
191 * If tbi0, then !tbi1: only use the extension if positive.
192 * if !tbi0, then tbi1: only use the extension if negative.
193 */
194 tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
195 dst, dst, tcg_zero, dst, src);
196 tcg_temp_free_i64(tcg_zero);
197 }
198 }
199}
200
201static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
202{
203 /*
204 * If address tagging is enabled for instructions via the TCR TBI bits,
205 * then loading an address into the PC will clear out any tag.
206 */
207 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
208}
209
210/*
211 * Return a "clean" address for ADDR according to TBID.
212 * This is always a fresh temporary, as we need to be able to
213 * increment this independently of a dirty write-back address.
214 */
215static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
216{
217 TCGv_i64 clean = new_tmp_a64(s);
218 gen_top_byte_ignore(s, clean, addr, s->tbid);
219 return clean;
220}
221
222typedef struct DisasCompare64 {
223 TCGCond cond;
224 TCGv_i64 value;
225} DisasCompare64;
226
227static void a64_test_cc(DisasCompare64 *c64, int cc)
228{
229 DisasCompare c32;
230
231 arm_test_cc(&c32, cc);
232
233 /* Sign-extend the 32-bit value so that the GE/LT comparisons work
234 * properly. The NE/EQ comparisons are also fine with this choice. */
235 c64->cond = c32.cond;
236 c64->value = tcg_temp_new_i64();
237 tcg_gen_ext_i32_i64(c64->value, c32.value);
238
239 arm_free_cc(&c32);
240}
241
242static void a64_free_cc(DisasCompare64 *c64)
243{
244 tcg_temp_free_i64(c64->value);
245}
246
247static void gen_exception_internal(int excp)
248{
249 TCGv_i32 tcg_excp = tcg_const_i32(excp);
250
251 assert(excp_is_internal(excp));
252 gen_helper_exception_internal(cpu_env, tcg_excp);
253 tcg_temp_free_i32(tcg_excp);
254}
255
256static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
257{
258 gen_a64_set_pc_im(pc);
259 gen_exception_internal(excp);
260 s->base.is_jmp = DISAS_NORETURN;
261}
262
263static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
264 uint32_t syndrome, uint32_t target_el)
265{
266 gen_a64_set_pc_im(pc);
267 gen_exception(excp, syndrome, target_el);
268 s->base.is_jmp = DISAS_NORETURN;
269}
270
271static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
272{
273 TCGv_i32 tcg_syn;
274
275 gen_a64_set_pc_im(s->pc_curr);
276 tcg_syn = tcg_const_i32(syndrome);
277 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
278 tcg_temp_free_i32(tcg_syn);
279 s->base.is_jmp = DISAS_NORETURN;
280}
281
282static void gen_step_complete_exception(DisasContext *s)
283{
284 /* We just completed step of an insn. Move from Active-not-pending
285 * to Active-pending, and then also take the swstep exception.
286 * This corresponds to making the (IMPDEF) choice to prioritize
287 * swstep exceptions over asynchronous exceptions taken to an exception
288 * level where debug is disabled. This choice has the advantage that
289 * we do not need to maintain internal state corresponding to the
290 * ISV/EX syndrome bits between completion of the step and generation
291 * of the exception, and our syndrome information is always correct.
292 */
293 gen_ss_advance(s);
294 gen_swstep_exception(s, 1, s->is_ldex);
295 s->base.is_jmp = DISAS_NORETURN;
296}
297
298static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
299{
300 /* No direct tb linking with singlestep (either QEMU's or the ARM
301 * debug architecture kind) or deterministic io
302 */
303 if (s->base.singlestep_enabled || s->ss_active ||
304 (tb_cflags(s->base.tb) & CF_LAST_IO)) {
305 return false;
306 }
307
308#ifndef CONFIG_USER_ONLY
309 /* Only link tbs from inside the same guest page */
310 if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
311 return false;
312 }
313#endif
314
315 return true;
316}
317
318static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
319{
320 TranslationBlock *tb;
321
322 tb = s->base.tb;
323 if (use_goto_tb(s, n, dest)) {
324 tcg_gen_goto_tb(n);
325 gen_a64_set_pc_im(dest);
326 tcg_gen_exit_tb(tb, n);
327 s->base.is_jmp = DISAS_NORETURN;
328 } else {
329 gen_a64_set_pc_im(dest);
330 if (s->ss_active) {
331 gen_step_complete_exception(s);
332 } else if (s->base.singlestep_enabled) {
333 gen_exception_internal(EXCP_DEBUG);
334 } else {
335 tcg_gen_lookup_and_goto_ptr();
336 s->base.is_jmp = DISAS_NORETURN;
337 }
338 }
339}
340
341void unallocated_encoding(DisasContext *s)
342{
343 /* Unallocated and reserved encodings are uncategorized */
344 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
345 default_exception_el(s));
346}
347
348static void init_tmp_a64_array(DisasContext *s)
349{
350#ifdef CONFIG_DEBUG_TCG
351 memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
352#endif
353 s->tmp_a64_count = 0;
354}
355
356static void free_tmp_a64(DisasContext *s)
357{
358 int i;
359 for (i = 0; i < s->tmp_a64_count; i++) {
360 tcg_temp_free_i64(s->tmp_a64[i]);
361 }
362 init_tmp_a64_array(s);
363}
364
365TCGv_i64 new_tmp_a64(DisasContext *s)
366{
367 assert(s->tmp_a64_count < TMP_A64_MAX);
368 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
369}
370
371TCGv_i64 new_tmp_a64_zero(DisasContext *s)
372{
373 TCGv_i64 t = new_tmp_a64(s);
374 tcg_gen_movi_i64(t, 0);
375 return t;
376}
377
378/*
379 * Register access functions
380 *
381 * These functions are used for directly accessing a register in where
382 * changes to the final register value are likely to be made. If you
383 * need to use a register for temporary calculation (e.g. index type
384 * operations) use the read_* form.
385 *
386 * B1.2.1 Register mappings
387 *
388 * In instruction register encoding 31 can refer to ZR (zero register) or
389 * the SP (stack pointer) depending on context. In QEMU's case we map SP
390 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
391 * This is the point of the _sp forms.
392 */
393TCGv_i64 cpu_reg(DisasContext *s, int reg)
394{
395 if (reg == 31) {
396 return new_tmp_a64_zero(s);
397 } else {
398 return cpu_X[reg];
399 }
400}
401
402/* register access for when 31 == SP */
403TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
404{
405 return cpu_X[reg];
406}
407
408/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
409 * representing the register contents. This TCGv is an auto-freed
410 * temporary so it need not be explicitly freed, and may be modified.
411 */
412TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
413{
414 TCGv_i64 v = new_tmp_a64(s);
415 if (reg != 31) {
416 if (sf) {
417 tcg_gen_mov_i64(v, cpu_X[reg]);
418 } else {
419 tcg_gen_ext32u_i64(v, cpu_X[reg]);
420 }
421 } else {
422 tcg_gen_movi_i64(v, 0);
423 }
424 return v;
425}
426
427TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
428{
429 TCGv_i64 v = new_tmp_a64(s);
430 if (sf) {
431 tcg_gen_mov_i64(v, cpu_X[reg]);
432 } else {
433 tcg_gen_ext32u_i64(v, cpu_X[reg]);
434 }
435 return v;
436}
437
438/* Return the offset into CPUARMState of a slice (from
439 * the least significant end) of FP register Qn (ie
440 * Dn, Sn, Hn or Bn).
441 * (Note that this is not the same mapping as for A32; see cpu.h)
442 */
443static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
444{
445 return vec_reg_offset(s, regno, 0, size);
446}
447
448/* Offset of the high half of the 128 bit vector Qn */
449static inline int fp_reg_hi_offset(DisasContext *s, int regno)
450{
451 return vec_reg_offset(s, regno, 1, MO_64);
452}
453
454/* Convenience accessors for reading and writing single and double
455 * FP registers. Writing clears the upper parts of the associated
456 * 128 bit vector register, as required by the architecture.
457 * Note that unlike the GP register accessors, the values returned
458 * by the read functions must be manually freed.
459 */
460static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
461{
462 TCGv_i64 v = tcg_temp_new_i64();
463
464 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
465 return v;
466}
467
468static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
469{
470 TCGv_i32 v = tcg_temp_new_i32();
471
472 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
473 return v;
474}
475
476static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
477{
478 TCGv_i32 v = tcg_temp_new_i32();
479
480 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
481 return v;
482}
483
484/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
485 * If SVE is not enabled, then there are only 128 bits in the vector.
486 */
487static void clear_vec_high(DisasContext *s, bool is_q, int rd)
488{
489 unsigned ofs = fp_reg_offset(s, rd, MO_64);
490 unsigned vsz = vec_full_reg_size(s);
491
492 if (!is_q) {
493 TCGv_i64 tcg_zero = tcg_const_i64(0);
494 tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
495 tcg_temp_free_i64(tcg_zero);
496 }
497 if (vsz > 16) {
498 tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
499 }
500}
501
502void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
503{
504 unsigned ofs = fp_reg_offset(s, reg, MO_64);
505
506 tcg_gen_st_i64(v, cpu_env, ofs);
507 clear_vec_high(s, false, reg);
508}
509
510static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
511{
512 TCGv_i64 tmp = tcg_temp_new_i64();
513
514 tcg_gen_extu_i32_i64(tmp, v);
515 write_fp_dreg(s, reg, tmp);
516 tcg_temp_free_i64(tmp);
517}
518
519TCGv_ptr get_fpstatus_ptr(bool is_f16)
520{
521 TCGv_ptr statusptr = tcg_temp_new_ptr();
522 int offset;
523
524 /* In A64 all instructions (both FP and Neon) use the FPCR; there
525 * is no equivalent of the A32 Neon "standard FPSCR value".
526 * However half-precision operations operate under a different
527 * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
528 */
529 if (is_f16) {
530 offset = offsetof(CPUARMState, vfp.fp_status_f16);
531 } else {
532 offset = offsetof(CPUARMState, vfp.fp_status);
533 }
534 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
535 return statusptr;
536}
537
538/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
539static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
540 GVecGen2Fn *gvec_fn, int vece)
541{
542 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
543 is_q ? 16 : 8, vec_full_reg_size(s));
544}
545
546/* Expand a 2-operand + immediate AdvSIMD vector operation using
547 * an expander function.
548 */
549static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
550 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
551{
552 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
553 imm, is_q ? 16 : 8, vec_full_reg_size(s));
554}
555
556/* Expand a 3-operand AdvSIMD vector operation using an expander function. */
557static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
558 GVecGen3Fn *gvec_fn, int vece)
559{
560 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
561 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
562}
563
564/* Expand a 4-operand AdvSIMD vector operation using an expander function. */
565static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
566 int rx, GVecGen4Fn *gvec_fn, int vece)
567{
568 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
569 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
570 is_q ? 16 : 8, vec_full_reg_size(s));
571}
572
573/* Expand a 2-operand + immediate AdvSIMD vector operation using
574 * an op descriptor.
575 */
576static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
577 int rn, int64_t imm, const GVecGen2i *gvec_op)
578{
579 tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
580 is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
581}
582
583/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
584static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
585 int rn, int rm, const GVecGen3 *gvec_op)
586{
587 tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
588 vec_full_reg_offset(s, rm), is_q ? 16 : 8,
589 vec_full_reg_size(s), gvec_op);
590}
591
592/* Expand a 3-operand operation using an out-of-line helper. */
593static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
594 int rn, int rm, int data, gen_helper_gvec_3 *fn)
595{
596 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
597 vec_full_reg_offset(s, rn),
598 vec_full_reg_offset(s, rm),
599 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
600}
601
602/* Expand a 3-operand + env pointer operation using
603 * an out-of-line helper.
604 */
605static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
606 int rn, int rm, gen_helper_gvec_3_ptr *fn)
607{
608 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
609 vec_full_reg_offset(s, rn),
610 vec_full_reg_offset(s, rm), cpu_env,
611 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
612}
613
614/* Expand a 3-operand + fpstatus pointer + simd data value operation using
615 * an out-of-line helper.
616 */
617static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
618 int rm, bool is_fp16, int data,
619 gen_helper_gvec_3_ptr *fn)
620{
621 TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
622 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
623 vec_full_reg_offset(s, rn),
624 vec_full_reg_offset(s, rm), fpst,
625 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
626 tcg_temp_free_ptr(fpst);
627}
628
629/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
630 * than the 32 bit equivalent.
631 */
632static inline void gen_set_NZ64(TCGv_i64 result)
633{
634 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
635 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
636}
637
638/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
639static inline void gen_logic_CC(int sf, TCGv_i64 result)
640{
641 if (sf) {
642 gen_set_NZ64(result);
643 } else {
644 tcg_gen_extrl_i64_i32(cpu_ZF, result);
645 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
646 }
647 tcg_gen_movi_i32(cpu_CF, 0);
648 tcg_gen_movi_i32(cpu_VF, 0);
649}
650
651/* dest = T0 + T1; compute C, N, V and Z flags */
652static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
653{
654 if (sf) {
655 TCGv_i64 result, flag, tmp;
656 result = tcg_temp_new_i64();
657 flag = tcg_temp_new_i64();
658 tmp = tcg_temp_new_i64();
659
660 tcg_gen_movi_i64(tmp, 0);
661 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
662
663 tcg_gen_extrl_i64_i32(cpu_CF, flag);
664
665 gen_set_NZ64(result);
666
667 tcg_gen_xor_i64(flag, result, t0);
668 tcg_gen_xor_i64(tmp, t0, t1);
669 tcg_gen_andc_i64(flag, flag, tmp);
670 tcg_temp_free_i64(tmp);
671 tcg_gen_extrh_i64_i32(cpu_VF, flag);
672
673 tcg_gen_mov_i64(dest, result);
674 tcg_temp_free_i64(result);
675 tcg_temp_free_i64(flag);
676 } else {
677 /* 32 bit arithmetic */
678 TCGv_i32 t0_32 = tcg_temp_new_i32();
679 TCGv_i32 t1_32 = tcg_temp_new_i32();
680 TCGv_i32 tmp = tcg_temp_new_i32();
681
682 tcg_gen_movi_i32(tmp, 0);
683 tcg_gen_extrl_i64_i32(t0_32, t0);
684 tcg_gen_extrl_i64_i32(t1_32, t1);
685 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
686 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
687 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
688 tcg_gen_xor_i32(tmp, t0_32, t1_32);
689 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
690 tcg_gen_extu_i32_i64(dest, cpu_NF);
691
692 tcg_temp_free_i32(tmp);
693 tcg_temp_free_i32(t0_32);
694 tcg_temp_free_i32(t1_32);
695 }
696}
697
698/* dest = T0 - T1; compute C, N, V and Z flags */
699static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
700{
701 if (sf) {
702 /* 64 bit arithmetic */
703 TCGv_i64 result, flag, tmp;
704
705 result = tcg_temp_new_i64();
706 flag = tcg_temp_new_i64();
707 tcg_gen_sub_i64(result, t0, t1);
708
709 gen_set_NZ64(result);
710
711 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
712 tcg_gen_extrl_i64_i32(cpu_CF, flag);
713
714 tcg_gen_xor_i64(flag, result, t0);
715 tmp = tcg_temp_new_i64();
716 tcg_gen_xor_i64(tmp, t0, t1);
717 tcg_gen_and_i64(flag, flag, tmp);
718 tcg_temp_free_i64(tmp);
719 tcg_gen_extrh_i64_i32(cpu_VF, flag);
720 tcg_gen_mov_i64(dest, result);
721 tcg_temp_free_i64(flag);
722 tcg_temp_free_i64(result);
723 } else {
724 /* 32 bit arithmetic */
725 TCGv_i32 t0_32 = tcg_temp_new_i32();
726 TCGv_i32 t1_32 = tcg_temp_new_i32();
727 TCGv_i32 tmp;
728
729 tcg_gen_extrl_i64_i32(t0_32, t0);
730 tcg_gen_extrl_i64_i32(t1_32, t1);
731 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
732 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
733 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
734 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
735 tmp = tcg_temp_new_i32();
736 tcg_gen_xor_i32(tmp, t0_32, t1_32);
737 tcg_temp_free_i32(t0_32);
738 tcg_temp_free_i32(t1_32);
739 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
740 tcg_temp_free_i32(tmp);
741 tcg_gen_extu_i32_i64(dest, cpu_NF);
742 }
743}
744
745/* dest = T0 + T1 + CF; do not compute flags. */
746static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
747{
748 TCGv_i64 flag = tcg_temp_new_i64();
749 tcg_gen_extu_i32_i64(flag, cpu_CF);
750 tcg_gen_add_i64(dest, t0, t1);
751 tcg_gen_add_i64(dest, dest, flag);
752 tcg_temp_free_i64(flag);
753
754 if (!sf) {
755 tcg_gen_ext32u_i64(dest, dest);
756 }
757}
758
759/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
760static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
761{
762 if (sf) {
763 TCGv_i64 result, cf_64, vf_64, tmp;
764 result = tcg_temp_new_i64();
765 cf_64 = tcg_temp_new_i64();
766 vf_64 = tcg_temp_new_i64();
767 tmp = tcg_const_i64(0);
768
769 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
770 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
771 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
772 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
773 gen_set_NZ64(result);
774
775 tcg_gen_xor_i64(vf_64, result, t0);
776 tcg_gen_xor_i64(tmp, t0, t1);
777 tcg_gen_andc_i64(vf_64, vf_64, tmp);
778 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
779
780 tcg_gen_mov_i64(dest, result);
781
782 tcg_temp_free_i64(tmp);
783 tcg_temp_free_i64(vf_64);
784 tcg_temp_free_i64(cf_64);
785 tcg_temp_free_i64(result);
786 } else {
787 TCGv_i32 t0_32, t1_32, tmp;
788 t0_32 = tcg_temp_new_i32();
789 t1_32 = tcg_temp_new_i32();
790 tmp = tcg_const_i32(0);
791
792 tcg_gen_extrl_i64_i32(t0_32, t0);
793 tcg_gen_extrl_i64_i32(t1_32, t1);
794 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
795 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
796
797 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
798 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
799 tcg_gen_xor_i32(tmp, t0_32, t1_32);
800 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
801 tcg_gen_extu_i32_i64(dest, cpu_NF);
802
803 tcg_temp_free_i32(tmp);
804 tcg_temp_free_i32(t1_32);
805 tcg_temp_free_i32(t0_32);
806 }
807}
808
809/*
810 * Load/Store generators
811 */
812
813/*
814 * Store from GPR register to memory.
815 */
816static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
817 TCGv_i64 tcg_addr, int size, int memidx,
818 bool iss_valid,
819 unsigned int iss_srt,
820 bool iss_sf, bool iss_ar)
821{
822 g_assert(size <= 3);
823 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
824
825 if (iss_valid) {
826 uint32_t syn;
827
828 syn = syn_data_abort_with_iss(0,
829 size,
830 false,
831 iss_srt,
832 iss_sf,
833 iss_ar,
834 0, 0, 0, 0, 0, false);
835 disas_set_insn_syndrome(s, syn);
836 }
837}
838
839static void do_gpr_st(DisasContext *s, TCGv_i64 source,
840 TCGv_i64 tcg_addr, int size,
841 bool iss_valid,
842 unsigned int iss_srt,
843 bool iss_sf, bool iss_ar)
844{
845 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
846 iss_valid, iss_srt, iss_sf, iss_ar);
847}
848
849/*
850 * Load from memory to GPR register
851 */
852static void do_gpr_ld_memidx(DisasContext *s,
853 TCGv_i64 dest, TCGv_i64 tcg_addr,
854 int size, bool is_signed,
855 bool extend, int memidx,
856 bool iss_valid, unsigned int iss_srt,
857 bool iss_sf, bool iss_ar)
858{
859 MemOp memop = s->be_data + size;
860
861 g_assert(size <= 3);
862
863 if (is_signed) {
864 memop += MO_SIGN;
865 }
866
867 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
868
869 if (extend && is_signed) {
870 g_assert(size < 3);
871 tcg_gen_ext32u_i64(dest, dest);
872 }
873
874 if (iss_valid) {
875 uint32_t syn;
876
877 syn = syn_data_abort_with_iss(0,
878 size,
879 is_signed,
880 iss_srt,
881 iss_sf,
882 iss_ar,
883 0, 0, 0, 0, 0, false);
884 disas_set_insn_syndrome(s, syn);
885 }
886}
887
888static void do_gpr_ld(DisasContext *s,
889 TCGv_i64 dest, TCGv_i64 tcg_addr,
890 int size, bool is_signed, bool extend,
891 bool iss_valid, unsigned int iss_srt,
892 bool iss_sf, bool iss_ar)
893{
894 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
895 get_mem_index(s),
896 iss_valid, iss_srt, iss_sf, iss_ar);
897}
898
899/*
900 * Store from FP register to memory
901 */
902static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
903{
904 /* This writes the bottom N bits of a 128 bit wide vector to memory */
905 TCGv_i64 tmp = tcg_temp_new_i64();
906 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
907 if (size < 4) {
908 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
909 s->be_data + size);
910 } else {
911 bool be = s->be_data == MO_BE;
912 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
913
914 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
915 tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
916 s->be_data | MO_Q);
917 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
918 tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
919 s->be_data | MO_Q);
920 tcg_temp_free_i64(tcg_hiaddr);
921 }
922
923 tcg_temp_free_i64(tmp);
924}
925
926/*
927 * Load from memory to FP register
928 */
929static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
930{
931 /* This always zero-extends and writes to a full 128 bit wide vector */
932 TCGv_i64 tmplo = tcg_temp_new_i64();
933 TCGv_i64 tmphi;
934
935 if (size < 4) {
936 MemOp memop = s->be_data + size;
937 tmphi = tcg_const_i64(0);
938 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
939 } else {
940 bool be = s->be_data == MO_BE;
941 TCGv_i64 tcg_hiaddr;
942
943 tmphi = tcg_temp_new_i64();
944 tcg_hiaddr = tcg_temp_new_i64();
945
946 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
947 tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
948 s->be_data | MO_Q);
949 tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
950 s->be_data | MO_Q);
951 tcg_temp_free_i64(tcg_hiaddr);
952 }
953
954 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
955 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
956
957 tcg_temp_free_i64(tmplo);
958 tcg_temp_free_i64(tmphi);
959
960 clear_vec_high(s, true, destidx);
961}
962
963/*
964 * Vector load/store helpers.
965 *
966 * The principal difference between this and a FP load is that we don't
967 * zero extend as we are filling a partial chunk of the vector register.
968 * These functions don't support 128 bit loads/stores, which would be
969 * normal load/store operations.
970 *
971 * The _i32 versions are useful when operating on 32 bit quantities
972 * (eg for floating point single or using Neon helper functions).
973 */
974
975/* Get value of an element within a vector register */
976static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
977 int element, MemOp memop)
978{
979 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
980 switch (memop) {
981 case MO_8:
982 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
983 break;
984 case MO_16:
985 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
986 break;
987 case MO_32:
988 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
989 break;
990 case MO_8|MO_SIGN:
991 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
992 break;
993 case MO_16|MO_SIGN:
994 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
995 break;
996 case MO_32|MO_SIGN:
997 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
998 break;
999 case MO_64:
1000 case MO_64|MO_SIGN:
1001 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1002 break;
1003 default:
1004 g_assert_not_reached();
1005 }
1006}
1007
1008static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1009 int element, MemOp memop)
1010{
1011 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1012 switch (memop) {
1013 case MO_8:
1014 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1015 break;
1016 case MO_16:
1017 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1018 break;
1019 case MO_8|MO_SIGN:
1020 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1021 break;
1022 case MO_16|MO_SIGN:
1023 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1024 break;
1025 case MO_32:
1026 case MO_32|MO_SIGN:
1027 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1028 break;
1029 default:
1030 g_assert_not_reached();
1031 }
1032}
1033
1034/* Set value of an element within a vector register */
1035static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1036 int element, MemOp memop)
1037{
1038 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1039 switch (memop) {
1040 case MO_8:
1041 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1042 break;
1043 case MO_16:
1044 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1045 break;
1046 case MO_32:
1047 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1048 break;
1049 case MO_64:
1050 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1051 break;
1052 default:
1053 g_assert_not_reached();
1054 }
1055}
1056
1057static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1058 int destidx, int element, MemOp memop)
1059{
1060 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1061 switch (memop) {
1062 case MO_8:
1063 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1064 break;
1065 case MO_16:
1066 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1067 break;
1068 case MO_32:
1069 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1070 break;
1071 default:
1072 g_assert_not_reached();
1073 }
1074}
1075
1076/* Store from vector register to memory */
1077static void do_vec_st(DisasContext *s, int srcidx, int element,
1078 TCGv_i64 tcg_addr, int size, MemOp endian)
1079{
1080 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1081
1082 read_vec_element(s, tcg_tmp, srcidx, element, size);
1083 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1084
1085 tcg_temp_free_i64(tcg_tmp);
1086}
1087
1088/* Load from memory to vector register */
1089static void do_vec_ld(DisasContext *s, int destidx, int element,
1090 TCGv_i64 tcg_addr, int size, MemOp endian)
1091{
1092 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1093
1094 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1095 write_vec_element(s, tcg_tmp, destidx, element, size);
1096
1097 tcg_temp_free_i64(tcg_tmp);
1098}
1099
1100/* Check that FP/Neon access is enabled. If it is, return
1101 * true. If not, emit code to generate an appropriate exception,
1102 * and return false; the caller should not emit any code for
1103 * the instruction. Note that this check must happen after all
1104 * unallocated-encoding checks (otherwise the syndrome information
1105 * for the resulting exception will be incorrect).
1106 */
1107static inline bool fp_access_check(DisasContext *s)
1108{
1109 assert(!s->fp_access_checked);
1110 s->fp_access_checked = true;
1111
1112 if (!s->fp_excp_el) {
1113 return true;
1114 }
1115
1116 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1117 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1118 return false;
1119}
1120
1121/* Check that SVE access is enabled. If it is, return true.
1122 * If not, emit code to generate an appropriate exception and return false.
1123 */
1124bool sve_access_check(DisasContext *s)
1125{
1126 if (s->sve_excp_el) {
1127 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1128 s->sve_excp_el);
1129 return false;
1130 }
1131 return fp_access_check(s);
1132}
1133
1134/*
1135 * This utility function is for doing register extension with an
1136 * optional shift. You will likely want to pass a temporary for the
1137 * destination register. See DecodeRegExtend() in the ARM ARM.
1138 */
1139static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1140 int option, unsigned int shift)
1141{
1142 int extsize = extract32(option, 0, 2);
1143 bool is_signed = extract32(option, 2, 1);
1144
1145 if (is_signed) {
1146 switch (extsize) {
1147 case 0:
1148 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1149 break;
1150 case 1:
1151 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1152 break;
1153 case 2:
1154 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1155 break;
1156 case 3:
1157 tcg_gen_mov_i64(tcg_out, tcg_in);
1158 break;
1159 }
1160 } else {
1161 switch (extsize) {
1162 case 0:
1163 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1164 break;
1165 case 1:
1166 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1167 break;
1168 case 2:
1169 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1170 break;
1171 case 3:
1172 tcg_gen_mov_i64(tcg_out, tcg_in);
1173 break;
1174 }
1175 }
1176
1177 if (shift) {
1178 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1179 }
1180}
1181
1182static inline void gen_check_sp_alignment(DisasContext *s)
1183{
1184 /* The AArch64 architecture mandates that (if enabled via PSTATE
1185 * or SCTLR bits) there is a check that SP is 16-aligned on every
1186 * SP-relative load or store (with an exception generated if it is not).
1187 * In line with general QEMU practice regarding misaligned accesses,
1188 * we omit these checks for the sake of guest program performance.
1189 * This function is provided as a hook so we can more easily add these
1190 * checks in future (possibly as a "favour catching guest program bugs
1191 * over speed" user selectable option).
1192 */
1193}
1194
1195/*
1196 * This provides a simple table based table lookup decoder. It is
1197 * intended to be used when the relevant bits for decode are too
1198 * awkwardly placed and switch/if based logic would be confusing and
1199 * deeply nested. Since it's a linear search through the table, tables
1200 * should be kept small.
1201 *
1202 * It returns the first handler where insn & mask == pattern, or
1203 * NULL if there is no match.
1204 * The table is terminated by an empty mask (i.e. 0)
1205 */
1206static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1207 uint32_t insn)
1208{
1209 const AArch64DecodeTable *tptr = table;
1210
1211 while (tptr->mask) {
1212 if ((insn & tptr->mask) == tptr->pattern) {
1213 return tptr->disas_fn;
1214 }
1215 tptr++;
1216 }
1217 return NULL;
1218}
1219
1220/*
1221 * The instruction disassembly implemented here matches
1222 * the instruction encoding classifications in chapter C4
1223 * of the ARM Architecture Reference Manual (DDI0487B_a);
1224 * classification names and decode diagrams here should generally
1225 * match up with those in the manual.
1226 */
1227
1228/* Unconditional branch (immediate)
1229 * 31 30 26 25 0
1230 * +----+-----------+-------------------------------------+
1231 * | op | 0 0 1 0 1 | imm26 |
1232 * +----+-----------+-------------------------------------+
1233 */
1234static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1235{
1236 uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1237
1238 if (insn & (1U << 31)) {
1239 /* BL Branch with link */
1240 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1241 }
1242
1243 /* B Branch / BL Branch with link */
1244 reset_btype(s);
1245 gen_goto_tb(s, 0, addr);
1246}
1247
1248/* Compare and branch (immediate)
1249 * 31 30 25 24 23 5 4 0
1250 * +----+-------------+----+---------------------+--------+
1251 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1252 * +----+-------------+----+---------------------+--------+
1253 */
1254static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1255{
1256 unsigned int sf, op, rt;
1257 uint64_t addr;
1258 TCGLabel *label_match;
1259 TCGv_i64 tcg_cmp;
1260
1261 sf = extract32(insn, 31, 1);
1262 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1263 rt = extract32(insn, 0, 5);
1264 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1265
1266 tcg_cmp = read_cpu_reg(s, rt, sf);
1267 label_match = gen_new_label();
1268
1269 reset_btype(s);
1270 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1271 tcg_cmp, 0, label_match);
1272
1273 gen_goto_tb(s, 0, s->base.pc_next);
1274 gen_set_label(label_match);
1275 gen_goto_tb(s, 1, addr);
1276}
1277
1278/* Test and branch (immediate)
1279 * 31 30 25 24 23 19 18 5 4 0
1280 * +----+-------------+----+-------+-------------+------+
1281 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1282 * +----+-------------+----+-------+-------------+------+
1283 */
1284static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1285{
1286 unsigned int bit_pos, op, rt;
1287 uint64_t addr;
1288 TCGLabel *label_match;
1289 TCGv_i64 tcg_cmp;
1290
1291 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1292 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1293 addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1294 rt = extract32(insn, 0, 5);
1295
1296 tcg_cmp = tcg_temp_new_i64();
1297 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1298 label_match = gen_new_label();
1299
1300 reset_btype(s);
1301 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1302 tcg_cmp, 0, label_match);
1303 tcg_temp_free_i64(tcg_cmp);
1304 gen_goto_tb(s, 0, s->base.pc_next);
1305 gen_set_label(label_match);
1306 gen_goto_tb(s, 1, addr);
1307}
1308
1309/* Conditional branch (immediate)
1310 * 31 25 24 23 5 4 3 0
1311 * +---------------+----+---------------------+----+------+
1312 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1313 * +---------------+----+---------------------+----+------+
1314 */
1315static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1316{
1317 unsigned int cond;
1318 uint64_t addr;
1319
1320 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1321 unallocated_encoding(s);
1322 return;
1323 }
1324 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1325 cond = extract32(insn, 0, 4);
1326
1327 reset_btype(s);
1328 if (cond < 0x0e) {
1329 /* genuinely conditional branches */
1330 TCGLabel *label_match = gen_new_label();
1331 arm_gen_test_cc(cond, label_match);
1332 gen_goto_tb(s, 0, s->base.pc_next);
1333 gen_set_label(label_match);
1334 gen_goto_tb(s, 1, addr);
1335 } else {
1336 /* 0xe and 0xf are both "always" conditions */
1337 gen_goto_tb(s, 0, addr);
1338 }
1339}
1340
1341/* HINT instruction group, including various allocated HINTs */
1342static void handle_hint(DisasContext *s, uint32_t insn,
1343 unsigned int op1, unsigned int op2, unsigned int crm)
1344{
1345 unsigned int selector = crm << 3 | op2;
1346
1347 if (op1 != 3) {
1348 unallocated_encoding(s);
1349 return;
1350 }
1351
1352 switch (selector) {
1353 case 0b00000: /* NOP */
1354 break;
1355 case 0b00011: /* WFI */
1356 s->base.is_jmp = DISAS_WFI;
1357 break;
1358 case 0b00001: /* YIELD */
1359 /* When running in MTTCG we don't generate jumps to the yield and
1360 * WFE helpers as it won't affect the scheduling of other vCPUs.
1361 * If we wanted to more completely model WFE/SEV so we don't busy
1362 * spin unnecessarily we would need to do something more involved.
1363 */
1364 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1365 s->base.is_jmp = DISAS_YIELD;
1366 }
1367 break;
1368 case 0b00010: /* WFE */
1369 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1370 s->base.is_jmp = DISAS_WFE;
1371 }
1372 break;
1373 case 0b00100: /* SEV */
1374 case 0b00101: /* SEVL */
1375 /* we treat all as NOP at least for now */
1376 break;
1377 case 0b00111: /* XPACLRI */
1378 if (s->pauth_active) {
1379 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1380 }
1381 break;
1382 case 0b01000: /* PACIA1716 */
1383 if (s->pauth_active) {
1384 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1385 }
1386 break;
1387 case 0b01010: /* PACIB1716 */
1388 if (s->pauth_active) {
1389 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1390 }
1391 break;
1392 case 0b01100: /* AUTIA1716 */
1393 if (s->pauth_active) {
1394 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1395 }
1396 break;
1397 case 0b01110: /* AUTIB1716 */
1398 if (s->pauth_active) {
1399 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1400 }
1401 break;
1402 case 0b11000: /* PACIAZ */
1403 if (s->pauth_active) {
1404 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1405 new_tmp_a64_zero(s));
1406 }
1407 break;
1408 case 0b11001: /* PACIASP */
1409 if (s->pauth_active) {
1410 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1411 }
1412 break;
1413 case 0b11010: /* PACIBZ */
1414 if (s->pauth_active) {
1415 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1416 new_tmp_a64_zero(s));
1417 }
1418 break;
1419 case 0b11011: /* PACIBSP */
1420 if (s->pauth_active) {
1421 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1422 }
1423 break;
1424 case 0b11100: /* AUTIAZ */
1425 if (s->pauth_active) {
1426 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1427 new_tmp_a64_zero(s));
1428 }
1429 break;
1430 case 0b11101: /* AUTIASP */
1431 if (s->pauth_active) {
1432 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1433 }
1434 break;
1435 case 0b11110: /* AUTIBZ */
1436 if (s->pauth_active) {
1437 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1438 new_tmp_a64_zero(s));
1439 }
1440 break;
1441 case 0b11111: /* AUTIBSP */
1442 if (s->pauth_active) {
1443 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1444 }
1445 break;
1446 default:
1447 /* default specified as NOP equivalent */
1448 break;
1449 }
1450}
1451
1452static void gen_clrex(DisasContext *s, uint32_t insn)
1453{
1454 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1455}
1456
1457/* CLREX, DSB, DMB, ISB */
1458static void handle_sync(DisasContext *s, uint32_t insn,
1459 unsigned int op1, unsigned int op2, unsigned int crm)
1460{
1461 TCGBar bar;
1462
1463 if (op1 != 3) {
1464 unallocated_encoding(s);
1465 return;
1466 }
1467
1468 switch (op2) {
1469 case 2: /* CLREX */
1470 gen_clrex(s, insn);
1471 return;
1472 case 4: /* DSB */
1473 case 5: /* DMB */
1474 switch (crm & 3) {
1475 case 1: /* MBReqTypes_Reads */
1476 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1477 break;
1478 case 2: /* MBReqTypes_Writes */
1479 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1480 break;
1481 default: /* MBReqTypes_All */
1482 bar = TCG_BAR_SC | TCG_MO_ALL;
1483 break;
1484 }
1485 tcg_gen_mb(bar);
1486 return;
1487 case 6: /* ISB */
1488 /* We need to break the TB after this insn to execute
1489 * a self-modified code correctly and also to take
1490 * any pending interrupts immediately.
1491 */
1492 reset_btype(s);
1493 gen_goto_tb(s, 0, s->base.pc_next);
1494 return;
1495
1496 case 7: /* SB */
1497 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1498 goto do_unallocated;
1499 }
1500 /*
1501 * TODO: There is no speculation barrier opcode for TCG;
1502 * MB and end the TB instead.
1503 */
1504 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1505 gen_goto_tb(s, 0, s->base.pc_next);
1506 return;
1507
1508 default:
1509 do_unallocated:
1510 unallocated_encoding(s);
1511 return;
1512 }
1513}
1514
1515static void gen_xaflag(void)
1516{
1517 TCGv_i32 z = tcg_temp_new_i32();
1518
1519 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1520
1521 /*
1522 * (!C & !Z) << 31
1523 * (!(C | Z)) << 31
1524 * ~((C | Z) << 31)
1525 * ~-(C | Z)
1526 * (C | Z) - 1
1527 */
1528 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1529 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1530
1531 /* !(Z & C) */
1532 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1533 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1534
1535 /* (!C & Z) << 31 -> -(Z & ~C) */
1536 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1537 tcg_gen_neg_i32(cpu_VF, cpu_VF);
1538
1539 /* C | Z */
1540 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1541
1542 tcg_temp_free_i32(z);
1543}
1544
1545static void gen_axflag(void)
1546{
1547 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
1548 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
1549
1550 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1551 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1552
1553 tcg_gen_movi_i32(cpu_NF, 0);
1554 tcg_gen_movi_i32(cpu_VF, 0);
1555}
1556
1557/* MSR (immediate) - move immediate to processor state field */
1558static void handle_msr_i(DisasContext *s, uint32_t insn,
1559 unsigned int op1, unsigned int op2, unsigned int crm)
1560{
1561 TCGv_i32 t1;
1562 int op = op1 << 3 | op2;
1563
1564 /* End the TB by default, chaining is ok. */
1565 s->base.is_jmp = DISAS_TOO_MANY;
1566
1567 switch (op) {
1568 case 0x00: /* CFINV */
1569 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1570 goto do_unallocated;
1571 }
1572 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1573 s->base.is_jmp = DISAS_NEXT;
1574 break;
1575
1576 case 0x01: /* XAFlag */
1577 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1578 goto do_unallocated;
1579 }
1580 gen_xaflag();
1581 s->base.is_jmp = DISAS_NEXT;
1582 break;
1583
1584 case 0x02: /* AXFlag */
1585 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1586 goto do_unallocated;
1587 }
1588 gen_axflag();
1589 s->base.is_jmp = DISAS_NEXT;
1590 break;
1591
1592 case 0x05: /* SPSel */
1593 if (s->current_el == 0) {
1594 goto do_unallocated;
1595 }
1596 t1 = tcg_const_i32(crm & PSTATE_SP);
1597 gen_helper_msr_i_spsel(cpu_env, t1);
1598 tcg_temp_free_i32(t1);
1599 break;
1600
1601 case 0x1e: /* DAIFSet */
1602 t1 = tcg_const_i32(crm);
1603 gen_helper_msr_i_daifset(cpu_env, t1);
1604 tcg_temp_free_i32(t1);
1605 break;
1606
1607 case 0x1f: /* DAIFClear */
1608 t1 = tcg_const_i32(crm);
1609 gen_helper_msr_i_daifclear(cpu_env, t1);
1610 tcg_temp_free_i32(t1);
1611 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
1612 s->base.is_jmp = DISAS_UPDATE;
1613 break;
1614
1615 default:
1616 do_unallocated:
1617 unallocated_encoding(s);
1618 return;
1619 }
1620}
1621
1622static void gen_get_nzcv(TCGv_i64 tcg_rt)
1623{
1624 TCGv_i32 tmp = tcg_temp_new_i32();
1625 TCGv_i32 nzcv = tcg_temp_new_i32();
1626
1627 /* build bit 31, N */
1628 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1629 /* build bit 30, Z */
1630 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1631 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1632 /* build bit 29, C */
1633 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1634 /* build bit 28, V */
1635 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1636 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1637 /* generate result */
1638 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1639
1640 tcg_temp_free_i32(nzcv);
1641 tcg_temp_free_i32(tmp);
1642}
1643
1644static void gen_set_nzcv(TCGv_i64 tcg_rt)
1645{
1646 TCGv_i32 nzcv = tcg_temp_new_i32();
1647
1648 /* take NZCV from R[t] */
1649 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1650
1651 /* bit 31, N */
1652 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1653 /* bit 30, Z */
1654 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1655 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1656 /* bit 29, C */
1657 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1658 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1659 /* bit 28, V */
1660 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1661 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1662 tcg_temp_free_i32(nzcv);
1663}
1664
1665/* MRS - move from system register
1666 * MSR (register) - move to system register
1667 * SYS
1668 * SYSL
1669 * These are all essentially the same insn in 'read' and 'write'
1670 * versions, with varying op0 fields.
1671 */
1672static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1673 unsigned int op0, unsigned int op1, unsigned int op2,
1674 unsigned int crn, unsigned int crm, unsigned int rt)
1675{
1676 const ARMCPRegInfo *ri;
1677 TCGv_i64 tcg_rt;
1678
1679 ri = get_arm_cp_reginfo(s->cp_regs,
1680 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1681 crn, crm, op0, op1, op2));
1682
1683 if (!ri) {
1684 /* Unknown register; this might be a guest error or a QEMU
1685 * unimplemented feature.
1686 */
1687 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1688 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1689 isread ? "read" : "write", op0, op1, crn, crm, op2);
1690 unallocated_encoding(s);
1691 return;
1692 }
1693
1694 /* Check access permissions */
1695 if (!cp_access_ok(s->current_el, ri, isread)) {
1696 unallocated_encoding(s);
1697 return;
1698 }
1699
1700 if (ri->accessfn) {
1701 /* Emit code to perform further access permissions checks at
1702 * runtime; this may result in an exception.
1703 */
1704 TCGv_ptr tmpptr;
1705 TCGv_i32 tcg_syn, tcg_isread;
1706 uint32_t syndrome;
1707
1708 gen_a64_set_pc_im(s->pc_curr);
1709 tmpptr = tcg_const_ptr(ri);
1710 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1711 tcg_syn = tcg_const_i32(syndrome);
1712 tcg_isread = tcg_const_i32(isread);
1713 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1714 tcg_temp_free_ptr(tmpptr);
1715 tcg_temp_free_i32(tcg_syn);
1716 tcg_temp_free_i32(tcg_isread);
1717 } else if (ri->type & ARM_CP_RAISES_EXC) {
1718 /*
1719 * The readfn or writefn might raise an exception;
1720 * synchronize the CPU state in case it does.
1721 */
1722 gen_a64_set_pc_im(s->pc_curr);
1723 }
1724
1725 /* Handle special cases first */
1726 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1727 case ARM_CP_NOP:
1728 return;
1729 case ARM_CP_NZCV:
1730 tcg_rt = cpu_reg(s, rt);
1731 if (isread) {
1732 gen_get_nzcv(tcg_rt);
1733 } else {
1734 gen_set_nzcv(tcg_rt);
1735 }
1736 return;
1737 case ARM_CP_CURRENTEL:
1738 /* Reads as current EL value from pstate, which is
1739 * guaranteed to be constant by the tb flags.
1740 */
1741 tcg_rt = cpu_reg(s, rt);
1742 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1743 return;
1744 case ARM_CP_DC_ZVA:
1745 /* Writes clear the aligned block of memory which rt points into. */
1746 tcg_rt = cpu_reg(s, rt);
1747 gen_helper_dc_zva(cpu_env, tcg_rt);
1748 return;
1749 default:
1750 break;
1751 }
1752 if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1753 return;
1754 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1755 return;
1756 }
1757
1758 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1759 gen_io_start();
1760 }
1761
1762 tcg_rt = cpu_reg(s, rt);
1763
1764 if (isread) {
1765 if (ri->type & ARM_CP_CONST) {
1766 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1767 } else if (ri->readfn) {
1768 TCGv_ptr tmpptr;
1769 tmpptr = tcg_const_ptr(ri);
1770 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1771 tcg_temp_free_ptr(tmpptr);
1772 } else {
1773 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1774 }
1775 } else {
1776 if (ri->type & ARM_CP_CONST) {
1777 /* If not forbidden by access permissions, treat as WI */
1778 return;
1779 } else if (ri->writefn) {
1780 TCGv_ptr tmpptr;
1781 tmpptr = tcg_const_ptr(ri);
1782 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1783 tcg_temp_free_ptr(tmpptr);
1784 } else {
1785 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1786 }
1787 }
1788
1789 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1790 /* I/O operations must end the TB here (whether read or write) */
1791 s->base.is_jmp = DISAS_UPDATE;
1792 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1793 /* We default to ending the TB on a coprocessor register write,
1794 * but allow this to be suppressed by the register definition
1795 * (usually only necessary to work around guest bugs).
1796 */
1797 s->base.is_jmp = DISAS_UPDATE;
1798 }
1799}
1800
1801/* System
1802 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1803 * +---------------------+---+-----+-----+-------+-------+-----+------+
1804 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1805 * +---------------------+---+-----+-----+-------+-------+-----+------+
1806 */
1807static void disas_system(DisasContext *s, uint32_t insn)
1808{
1809 unsigned int l, op0, op1, crn, crm, op2, rt;
1810 l = extract32(insn, 21, 1);
1811 op0 = extract32(insn, 19, 2);
1812 op1 = extract32(insn, 16, 3);
1813 crn = extract32(insn, 12, 4);
1814 crm = extract32(insn, 8, 4);
1815 op2 = extract32(insn, 5, 3);
1816 rt = extract32(insn, 0, 5);
1817
1818 if (op0 == 0) {
1819 if (l || rt != 31) {
1820 unallocated_encoding(s);
1821 return;
1822 }
1823 switch (crn) {
1824 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1825 handle_hint(s, insn, op1, op2, crm);
1826 break;
1827 case 3: /* CLREX, DSB, DMB, ISB */
1828 handle_sync(s, insn, op1, op2, crm);
1829 break;
1830 case 4: /* MSR (immediate) */
1831 handle_msr_i(s, insn, op1, op2, crm);
1832 break;
1833 default:
1834 unallocated_encoding(s);
1835 break;
1836 }
1837 return;
1838 }
1839 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1840}
1841
1842/* Exception generation
1843 *
1844 * 31 24 23 21 20 5 4 2 1 0
1845 * +-----------------+-----+------------------------+-----+----+
1846 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1847 * +-----------------------+------------------------+----------+
1848 */
1849static void disas_exc(DisasContext *s, uint32_t insn)
1850{
1851 int opc = extract32(insn, 21, 3);
1852 int op2_ll = extract32(insn, 0, 5);
1853 int imm16 = extract32(insn, 5, 16);
1854 TCGv_i32 tmp;
1855
1856 switch (opc) {
1857 case 0:
1858 /* For SVC, HVC and SMC we advance the single-step state
1859 * machine before taking the exception. This is architecturally
1860 * mandated, to ensure that single-stepping a system call
1861 * instruction works properly.
1862 */
1863 switch (op2_ll) {
1864 case 1: /* SVC */
1865 gen_ss_advance(s);
1866 gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
1867 syn_aa64_svc(imm16), default_exception_el(s));
1868 break;
1869 case 2: /* HVC */
1870 if (s->current_el == 0) {
1871 unallocated_encoding(s);
1872 break;
1873 }
1874 /* The pre HVC helper handles cases when HVC gets trapped
1875 * as an undefined insn by runtime configuration.
1876 */
1877 gen_a64_set_pc_im(s->pc_curr);
1878 gen_helper_pre_hvc(cpu_env);
1879 gen_ss_advance(s);
1880 gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
1881 syn_aa64_hvc(imm16), 2);
1882 break;
1883 case 3: /* SMC */
1884 if (s->current_el == 0) {
1885 unallocated_encoding(s);
1886 break;
1887 }
1888 gen_a64_set_pc_im(s->pc_curr);
1889 tmp = tcg_const_i32(syn_aa64_smc(imm16));
1890 gen_helper_pre_smc(cpu_env, tmp);
1891 tcg_temp_free_i32(tmp);
1892 gen_ss_advance(s);
1893 gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
1894 syn_aa64_smc(imm16), 3);
1895 break;
1896 default:
1897 unallocated_encoding(s);
1898 break;
1899 }
1900 break;
1901 case 1:
1902 if (op2_ll != 0) {
1903 unallocated_encoding(s);
1904 break;
1905 }
1906 /* BRK */
1907 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
1908 break;
1909 case 2:
1910 if (op2_ll != 0) {
1911 unallocated_encoding(s);
1912 break;
1913 }
1914 /* HLT. This has two purposes.
1915 * Architecturally, it is an external halting debug instruction.
1916 * Since QEMU doesn't implement external debug, we treat this as
1917 * it is required for halting debug disabled: it will UNDEF.
1918 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1919 */
1920 if (semihosting_enabled() && imm16 == 0xf000) {
1921#ifndef CONFIG_USER_ONLY
1922 /* In system mode, don't allow userspace access to semihosting,
1923 * to provide some semblance of security (and for consistency
1924 * with our 32-bit semihosting).
1925 */
1926 if (s->current_el == 0) {
1927 unsupported_encoding(s, insn);
1928 break;
1929 }
1930#endif
1931 gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1932 } else {
1933 unsupported_encoding(s, insn);
1934 }
1935 break;
1936 case 5:
1937 if (op2_ll < 1 || op2_ll > 3) {
1938 unallocated_encoding(s);
1939 break;
1940 }
1941 /* DCPS1, DCPS2, DCPS3 */
1942 unsupported_encoding(s, insn);
1943 break;
1944 default:
1945 unallocated_encoding(s);
1946 break;
1947 }
1948}
1949
1950/* Unconditional branch (register)
1951 * 31 25 24 21 20 16 15 10 9 5 4 0
1952 * +---------------+-------+-------+-------+------+-------+
1953 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1954 * +---------------+-------+-------+-------+------+-------+
1955 */
1956static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1957{
1958 unsigned int opc, op2, op3, rn, op4;
1959 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
1960 TCGv_i64 dst;
1961 TCGv_i64 modifier;
1962
1963 opc = extract32(insn, 21, 4);
1964 op2 = extract32(insn, 16, 5);
1965 op3 = extract32(insn, 10, 6);
1966 rn = extract32(insn, 5, 5);
1967 op4 = extract32(insn, 0, 5);
1968
1969 if (op2 != 0x1f) {
1970 goto do_unallocated;
1971 }
1972
1973 switch (opc) {
1974 case 0: /* BR */
1975 case 1: /* BLR */
1976 case 2: /* RET */
1977 btype_mod = opc;
1978 switch (op3) {
1979 case 0:
1980 /* BR, BLR, RET */
1981 if (op4 != 0) {
1982 goto do_unallocated;
1983 }
1984 dst = cpu_reg(s, rn);
1985 break;
1986
1987 case 2:
1988 case 3:
1989 if (!dc_isar_feature(aa64_pauth, s)) {
1990 goto do_unallocated;
1991 }
1992 if (opc == 2) {
1993 /* RETAA, RETAB */
1994 if (rn != 0x1f || op4 != 0x1f) {
1995 goto do_unallocated;
1996 }
1997 rn = 30;
1998 modifier = cpu_X[31];
1999 } else {
2000 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2001 if (op4 != 0x1f) {
2002 goto do_unallocated;
2003 }
2004 modifier = new_tmp_a64_zero(s);
2005 }
2006 if (s->pauth_active) {
2007 dst = new_tmp_a64(s);
2008 if (op3 == 2) {
2009 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2010 } else {
2011 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2012 }
2013 } else {
2014 dst = cpu_reg(s, rn);
2015 }
2016 break;
2017
2018 default:
2019 goto do_unallocated;
2020 }
2021 gen_a64_set_pc(s, dst);
2022 /* BLR also needs to load return address */
2023 if (opc == 1) {
2024 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2025 }
2026 break;
2027
2028 case 8: /* BRAA */
2029 case 9: /* BLRAA */
2030 if (!dc_isar_feature(aa64_pauth, s)) {
2031 goto do_unallocated;
2032 }
2033 if ((op3 & ~1) != 2) {
2034 goto do_unallocated;
2035 }
2036 btype_mod = opc & 1;
2037 if (s->pauth_active) {
2038 dst = new_tmp_a64(s);
2039 modifier = cpu_reg_sp(s, op4);
2040 if (op3 == 2) {
2041 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2042 } else {
2043 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2044 }
2045 } else {
2046 dst = cpu_reg(s, rn);
2047 }
2048 gen_a64_set_pc(s, dst);
2049 /* BLRAA also needs to load return address */
2050 if (opc == 9) {
2051 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2052 }
2053 break;
2054
2055 case 4: /* ERET */
2056 if (s->current_el == 0) {
2057 goto do_unallocated;
2058 }
2059 switch (op3) {
2060 case 0: /* ERET */
2061 if (op4 != 0) {
2062 goto do_unallocated;
2063 }
2064 dst = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(dst, cpu_env,
2066 offsetof(CPUARMState, elr_el[s->current_el]));
2067 break;
2068
2069 case 2: /* ERETAA */
2070 case 3: /* ERETAB */
2071 if (!dc_isar_feature(aa64_pauth, s)) {
2072 goto do_unallocated;
2073 }
2074 if (rn != 0x1f || op4 != 0x1f) {
2075 goto do_unallocated;
2076 }
2077 dst = tcg_temp_new_i64();
2078 tcg_gen_ld_i64(dst, cpu_env,
2079 offsetof(CPUARMState, elr_el[s->current_el]));
2080 if (s->pauth_active) {
2081 modifier = cpu_X[31];
2082 if (op3 == 2) {
2083 gen_helper_autia(dst, cpu_env, dst, modifier);
2084 } else {
2085 gen_helper_autib(dst, cpu_env, dst, modifier);
2086 }
2087 }
2088 break;
2089
2090 default:
2091 goto do_unallocated;
2092 }
2093 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2094 gen_io_start();
2095 }
2096
2097 gen_helper_exception_return(cpu_env, dst);
2098 tcg_temp_free_i64(dst);
2099 /* Must exit loop to check un-masked IRQs */
2100 s->base.is_jmp = DISAS_EXIT;
2101 return;
2102
2103 case 5: /* DRPS */
2104 if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2105 goto do_unallocated;
2106 } else {
2107 unsupported_encoding(s, insn);
2108 }
2109 return;
2110
2111 default:
2112 do_unallocated:
2113 unallocated_encoding(s);
2114 return;
2115 }
2116
2117 switch (btype_mod) {
2118 case 0: /* BR */
2119 if (dc_isar_feature(aa64_bti, s)) {
2120 /* BR to {x16,x17} or !guard -> 1, else 3. */
2121 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2122 }
2123 break;
2124
2125 case 1: /* BLR */
2126 if (dc_isar_feature(aa64_bti, s)) {
2127 /* BLR sets BTYPE to 2, regardless of source guarded page. */
2128 set_btype(s, 2);
2129 }
2130 break;
2131
2132 default: /* RET or none of the above. */
2133 /* BTYPE will be set to 0 by normal end-of-insn processing. */
2134 break;
2135 }
2136
2137 s->base.is_jmp = DISAS_JUMP;
2138}
2139
2140/* Branches, exception generating and system instructions */
2141static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2142{
2143 switch (extract32(insn, 25, 7)) {
2144 case 0x0a: case 0x0b:
2145 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2146 disas_uncond_b_imm(s, insn);
2147 break;
2148 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2149 disas_comp_b_imm(s, insn);
2150 break;
2151 case 0x1b: case 0x5b: /* Test & branch (immediate) */
2152 disas_test_b_imm(s, insn);
2153 break;
2154 case 0x2a: /* Conditional branch (immediate) */
2155 disas_cond_b_imm(s, insn);
2156 break;
2157 case 0x6a: /* Exception generation / System */
2158 if (insn & (1 << 24)) {
2159 if (extract32(insn, 22, 2) == 0) {
2160 disas_system(s, insn);
2161 } else {
2162 unallocated_encoding(s);
2163 }
2164 } else {
2165 disas_exc(s, insn);
2166 }
2167 break;
2168 case 0x6b: /* Unconditional branch (register) */
2169 disas_uncond_b_reg(s, insn);
2170 break;
2171 default:
2172 unallocated_encoding(s);
2173 break;
2174 }
2175}
2176
2177/*
2178 * Load/Store exclusive instructions are implemented by remembering
2179 * the value/address loaded, and seeing if these are the same
2180 * when the store is performed. This is not actually the architecturally
2181 * mandated semantics, but it works for typical guest code sequences
2182 * and avoids having to monitor regular stores.
2183 *
2184 * The store exclusive uses the atomic cmpxchg primitives to avoid
2185 * races in multi-threaded linux-user and when MTTCG softmmu is
2186 * enabled.
2187 */
2188static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2189 TCGv_i64 addr, int size, bool is_pair)
2190{
2191 int idx = get_mem_index(s);
2192 MemOp memop = s->be_data;
2193
2194 g_assert(size <= 3);
2195 if (is_pair) {
2196 g_assert(size >= 2);
2197 if (size == 2) {
2198 /* The pair must be single-copy atomic for the doubleword. */
2199 memop |= MO_64 | MO_ALIGN;
2200 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2201 if (s->be_data == MO_LE) {
2202 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2203 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2204 } else {
2205 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2206 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2207 }
2208 } else {
2209 /* The pair must be single-copy atomic for *each* doubleword, not
2210 the entire quadword, however it must be quadword aligned. */
2211 memop |= MO_64;
2212 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2213 memop | MO_ALIGN_16);
2214
2215 TCGv_i64 addr2 = tcg_temp_new_i64();
2216 tcg_gen_addi_i64(addr2, addr, 8);
2217 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2218 tcg_temp_free_i64(addr2);
2219
2220 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2221 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2222 }
2223 } else {
2224 memop |= size | MO_ALIGN;
2225 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2226 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2227 }
2228 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2229}
2230
2231static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2232 TCGv_i64 addr, int size, int is_pair)
2233{
2234 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2235 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2236 * [addr] = {Rt};
2237 * if (is_pair) {
2238 * [addr + datasize] = {Rt2};
2239 * }
2240 * {Rd} = 0;
2241 * } else {
2242 * {Rd} = 1;
2243 * }
2244 * env->exclusive_addr = -1;
2245 */
2246 TCGLabel *fail_label = gen_new_label();
2247 TCGLabel *done_label = gen_new_label();
2248 TCGv_i64 tmp;
2249
2250 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2251
2252 tmp = tcg_temp_new_i64();
2253 if (is_pair) {
2254 if (size == 2) {
2255 if (s->be_data == MO_LE) {
2256 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2257 } else {
2258 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2259 }
2260 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2261 cpu_exclusive_val, tmp,
2262 get_mem_index(s),
2263 MO_64 | MO_ALIGN | s->be_data);
2264 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2265 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2266 if (!HAVE_CMPXCHG128) {
2267 gen_helper_exit_atomic(cpu_env);
2268 s->base.is_jmp = DISAS_NORETURN;
2269 } else if (s->be_data == MO_LE) {
2270 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2271 cpu_exclusive_addr,
2272 cpu_reg(s, rt),
2273 cpu_reg(s, rt2));
2274 } else {
2275 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2276 cpu_exclusive_addr,
2277 cpu_reg(s, rt),
2278 cpu_reg(s, rt2));
2279 }
2280 } else if (s->be_data == MO_LE) {
2281 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2282 cpu_reg(s, rt), cpu_reg(s, rt2));
2283 } else {
2284 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2285 cpu_reg(s, rt), cpu_reg(s, rt2));
2286 }
2287 } else {
2288 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2289 cpu_reg(s, rt), get_mem_index(s),
2290 size | MO_ALIGN | s->be_data);
2291 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2292 }
2293 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2294 tcg_temp_free_i64(tmp);
2295 tcg_gen_br(done_label);
2296
2297 gen_set_label(fail_label);
2298 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2299 gen_set_label(done_label);
2300 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2301}
2302
2303static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2304 int rn, int size)
2305{
2306 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2307 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2308 int memidx = get_mem_index(s);
2309 TCGv_i64 clean_addr;
2310
2311 if (rn == 31) {
2312 gen_check_sp_alignment(s);
2313 }
2314 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2315 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2316 size | MO_ALIGN | s->be_data);
2317}
2318
2319static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2320 int rn, int size)
2321{
2322 TCGv_i64 s1 = cpu_reg(s, rs);
2323 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2324 TCGv_i64 t1 = cpu_reg(s, rt);
2325 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2326 TCGv_i64 clean_addr;
2327 int memidx = get_mem_index(s);
2328
2329 if (rn == 31) {
2330 gen_check_sp_alignment(s);
2331 }
2332 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2333
2334 if (size == 2) {
2335 TCGv_i64 cmp = tcg_temp_new_i64();
2336 TCGv_i64 val = tcg_temp_new_i64();
2337
2338 if (s->be_data == MO_LE) {
2339 tcg_gen_concat32_i64(val, t1, t2);
2340 tcg_gen_concat32_i64(cmp, s1, s2);
2341 } else {
2342 tcg_gen_concat32_i64(val, t2, t1);
2343 tcg_gen_concat32_i64(cmp, s2, s1);
2344 }
2345
2346 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2347 MO_64 | MO_ALIGN | s->be_data);
2348 tcg_temp_free_i64(val);
2349
2350 if (s->be_data == MO_LE) {
2351 tcg_gen_extr32_i64(s1, s2, cmp);
2352 } else {
2353 tcg_gen_extr32_i64(s2, s1, cmp);
2354 }
2355 tcg_temp_free_i64(cmp);
2356 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2357 if (HAVE_CMPXCHG128) {
2358 TCGv_i32 tcg_rs = tcg_const_i32(rs);
2359 if (s->be_data == MO_LE) {
2360 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2361 clean_addr, t1, t2);
2362 } else {
2363 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2364 clean_addr, t1, t2);
2365 }
2366 tcg_temp_free_i32(tcg_rs);
2367 } else {
2368 gen_helper_exit_atomic(cpu_env);
2369 s->base.is_jmp = DISAS_NORETURN;
2370 }
2371 } else {
2372 TCGv_i64 d1 = tcg_temp_new_i64();
2373 TCGv_i64 d2 = tcg_temp_new_i64();
2374 TCGv_i64 a2 = tcg_temp_new_i64();
2375 TCGv_i64 c1 = tcg_temp_new_i64();
2376 TCGv_i64 c2 = tcg_temp_new_i64();
2377 TCGv_i64 zero = tcg_const_i64(0);
2378
2379 /* Load the two words, in memory order. */
2380 tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2381 MO_64 | MO_ALIGN_16 | s->be_data);
2382 tcg_gen_addi_i64(a2, clean_addr, 8);
2383 tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2384
2385 /* Compare the two words, also in memory order. */
2386 tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2387 tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2388 tcg_gen_and_i64(c2, c2, c1);
2389
2390 /* If compare equal, write back new data, else write back old data. */
2391 tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2392 tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2393 tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2394 tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2395 tcg_temp_free_i64(a2);
2396 tcg_temp_free_i64(c1);
2397 tcg_temp_free_i64(c2);
2398 tcg_temp_free_i64(zero);
2399
2400 /* Write back the data from memory to Rs. */
2401 tcg_gen_mov_i64(s1, d1);
2402 tcg_gen_mov_i64(s2, d2);
2403 tcg_temp_free_i64(d1);
2404 tcg_temp_free_i64(d2);
2405 }
2406}
2407
2408/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2409 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2410 */
2411static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2412{
2413 int opc0 = extract32(opc, 0, 1);
2414 int regsize;
2415
2416 if (is_signed) {
2417 regsize = opc0 ? 32 : 64;
2418 } else {
2419 regsize = size == 3 ? 64 : 32;
2420 }
2421 return regsize == 64;
2422}
2423
2424/* Load/store exclusive
2425 *
2426 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
2427 * +-----+-------------+----+---+----+------+----+-------+------+------+
2428 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
2429 * +-----+-------------+----+---+----+------+----+-------+------+------+
2430 *
2431 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2432 * L: 0 -> store, 1 -> load
2433 * o2: 0 -> exclusive, 1 -> not
2434 * o1: 0 -> single register, 1 -> register pair
2435 * o0: 1 -> load-acquire/store-release, 0 -> not
2436 */
2437static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2438{
2439 int rt = extract32(insn, 0, 5);
2440 int rn = extract32(insn, 5, 5);
2441 int rt2 = extract32(insn, 10, 5);
2442 int rs = extract32(insn, 16, 5);
2443 int is_lasr = extract32(insn, 15, 1);
2444 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2445 int size = extract32(insn, 30, 2);
2446 TCGv_i64 clean_addr;
2447
2448 switch (o2_L_o1_o0) {
2449 case 0x0: /* STXR */
2450 case 0x1: /* STLXR */
2451 if (rn == 31) {
2452 gen_check_sp_alignment(s);
2453 }
2454 if (is_lasr) {
2455 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2456 }
2457 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2458 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2459 return;
2460
2461 case 0x4: /* LDXR */
2462 case 0x5: /* LDAXR */
2463 if (rn == 31) {
2464 gen_check_sp_alignment(s);
2465 }
2466 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2467 s->is_ldex = true;
2468 gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2469 if (is_lasr) {
2470 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2471 }
2472 return;
2473
2474 case 0x8: /* STLLR */
2475 if (!dc_isar_feature(aa64_lor, s)) {
2476 break;
2477 }
2478 /* StoreLORelease is the same as Store-Release for QEMU. */
2479 /* fall through */
2480 case 0x9: /* STLR */
2481 /* Generate ISS for non-exclusive accesses including LASR. */
2482 if (rn == 31) {
2483 gen_check_sp_alignment(s);
2484 }
2485 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2486 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2487 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2488 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2489 return;
2490
2491 case 0xc: /* LDLAR */
2492 if (!dc_isar_feature(aa64_lor, s)) {
2493 break;
2494 }
2495 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2496 /* fall through */
2497 case 0xd: /* LDAR */
2498 /* Generate ISS for non-exclusive accesses including LASR. */
2499 if (rn == 31) {
2500 gen_check_sp_alignment(s);
2501 }
2502 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2503 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2504 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2505 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2506 return;
2507
2508 case 0x2: case 0x3: /* CASP / STXP */
2509 if (size & 2) { /* STXP / STLXP */
2510 if (rn == 31) {
2511 gen_check_sp_alignment(s);
2512 }
2513 if (is_lasr) {
2514 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2515 }
2516 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2517 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2518 return;
2519 }
2520 if (rt2 == 31
2521 && ((rt | rs) & 1) == 0
2522 && dc_isar_feature(aa64_atomics, s)) {
2523 /* CASP / CASPL */
2524 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2525 return;
2526 }
2527 break;
2528
2529 case 0x6: case 0x7: /* CASPA / LDXP */
2530 if (size & 2) { /* LDXP / LDAXP */
2531 if (rn == 31) {
2532 gen_check_sp_alignment(s);
2533 }
2534 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2535 s->is_ldex = true;
2536 gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2537 if (is_lasr) {
2538 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2539 }
2540 return;
2541 }
2542 if (rt2 == 31
2543 && ((rt | rs) & 1) == 0
2544 && dc_isar_feature(aa64_atomics, s)) {
2545 /* CASPA / CASPAL */
2546 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2547 return;
2548 }
2549 break;
2550
2551 case 0xa: /* CAS */
2552 case 0xb: /* CASL */
2553 case 0xe: /* CASA */
2554 case 0xf: /* CASAL */
2555 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2556 gen_compare_and_swap(s, rs, rt, rn, size);
2557 return;
2558 }
2559 break;
2560 }
2561 unallocated_encoding(s);
2562}
2563
2564/*
2565 * Load register (literal)
2566 *
2567 * 31 30 29 27 26 25 24 23 5 4 0
2568 * +-----+-------+---+-----+-------------------+-------+
2569 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
2570 * +-----+-------+---+-----+-------------------+-------+
2571 *
2572 * V: 1 -> vector (simd/fp)
2573 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2574 * 10-> 32 bit signed, 11 -> prefetch
2575 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2576 */
2577static void disas_ld_lit(DisasContext *s, uint32_t insn)
2578{
2579 int rt = extract32(insn, 0, 5);
2580 int64_t imm = sextract32(insn, 5, 19) << 2;
2581 bool is_vector = extract32(insn, 26, 1);
2582 int opc = extract32(insn, 30, 2);
2583 bool is_signed = false;
2584 int size = 2;
2585 TCGv_i64 tcg_rt, clean_addr;
2586
2587 if (is_vector) {
2588 if (opc == 3) {
2589 unallocated_encoding(s);
2590 return;
2591 }
2592 size = 2 + opc;
2593 if (!fp_access_check(s)) {
2594 return;
2595 }
2596 } else {
2597 if (opc == 3) {
2598 /* PRFM (literal) : prefetch */
2599 return;
2600 }
2601 size = 2 + extract32(opc, 0, 1);
2602 is_signed = extract32(opc, 1, 1);
2603 }
2604
2605 tcg_rt = cpu_reg(s, rt);
2606
2607 clean_addr = tcg_const_i64(s->pc_curr + imm);
2608 if (is_vector) {
2609 do_fp_ld(s, rt, clean_addr, size);
2610 } else {
2611 /* Only unsigned 32bit loads target 32bit registers. */
2612 bool iss_sf = opc != 0;
2613
2614 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2615 true, rt, iss_sf, false);
2616 }
2617 tcg_temp_free_i64(clean_addr);
2618}
2619
2620/*
2621 * LDNP (Load Pair - non-temporal hint)
2622 * LDP (Load Pair - non vector)
2623 * LDPSW (Load Pair Signed Word - non vector)
2624 * STNP (Store Pair - non-temporal hint)
2625 * STP (Store Pair - non vector)
2626 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2627 * LDP (Load Pair of SIMD&FP)
2628 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2629 * STP (Store Pair of SIMD&FP)
2630 *
2631 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
2632 * +-----+-------+---+---+-------+---+-----------------------------+
2633 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
2634 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2635 *
2636 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
2637 * LDPSW 01
2638 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2639 * V: 0 -> GPR, 1 -> Vector
2640 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2641 * 10 -> signed offset, 11 -> pre-index
2642 * L: 0 -> Store 1 -> Load
2643 *
2644 * Rt, Rt2 = GPR or SIMD registers to be stored
2645 * Rn = general purpose register containing address
2646 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2647 */
2648static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2649{
2650 int rt = extract32(insn, 0, 5);
2651 int rn = extract32(insn, 5, 5);
2652 int rt2 = extract32(insn, 10, 5);
2653 uint64_t offset = sextract64(insn, 15, 7);
2654 int index = extract32(insn, 23, 2);
2655 bool is_vector = extract32(insn, 26, 1);
2656 bool is_load = extract32(insn, 22, 1);
2657 int opc = extract32(insn, 30, 2);
2658
2659 bool is_signed = false;
2660 bool postindex = false;
2661 bool wback = false;
2662
2663 TCGv_i64 clean_addr, dirty_addr;
2664
2665 int size;
2666
2667 if (opc == 3) {
2668 unallocated_encoding(s);
2669 return;
2670 }
2671
2672 if (is_vector) {
2673 size = 2 + opc;
2674 } else {
2675 size = 2 + extract32(opc, 1, 1);
2676 is_signed = extract32(opc, 0, 1);
2677 if (!is_load && is_signed) {
2678 unallocated_encoding(s);
2679 return;
2680 }
2681 }
2682
2683 switch (index) {
2684 case 1: /* post-index */
2685 postindex = true;
2686 wback = true;
2687 break;
2688 case 0:
2689 /* signed offset with "non-temporal" hint. Since we don't emulate
2690 * caches we don't care about hints to the cache system about
2691 * data access patterns, and handle this identically to plain
2692 * signed offset.
2693 */
2694 if (is_signed) {
2695 /* There is no non-temporal-hint version of LDPSW */
2696 unallocated_encoding(s);
2697 return;
2698 }
2699 postindex = false;
2700 break;
2701 case 2: /* signed offset, rn not updated */
2702 postindex = false;
2703 break;
2704 case 3: /* pre-index */
2705 postindex = false;
2706 wback = true;
2707 break;
2708 }
2709
2710 if (is_vector && !fp_access_check(s)) {
2711 return;
2712 }
2713
2714 offset <<= size;
2715
2716 if (rn == 31) {
2717 gen_check_sp_alignment(s);
2718 }
2719
2720 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2721 if (!postindex) {
2722 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2723 }
2724 clean_addr = clean_data_tbi(s, dirty_addr);
2725
2726 if (is_vector) {
2727 if (is_load) {
2728 do_fp_ld(s, rt, clean_addr, size);
2729 } else {
2730 do_fp_st(s, rt, clean_addr, size);
2731 }
2732 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2733 if (is_load) {
2734 do_fp_ld(s, rt2, clean_addr, size);
2735 } else {
2736 do_fp_st(s, rt2, clean_addr, size);
2737 }
2738 } else {
2739 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2740 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2741
2742 if (is_load) {
2743 TCGv_i64 tmp = tcg_temp_new_i64();
2744
2745 /* Do not modify tcg_rt before recognizing any exception
2746 * from the second load.
2747 */
2748 do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2749 false, 0, false, false);
2750 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2751 do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2752 false, 0, false, false);
2753
2754 tcg_gen_mov_i64(tcg_rt, tmp);
2755 tcg_temp_free_i64(tmp);
2756 } else {
2757 do_gpr_st(s, tcg_rt, clean_addr, size,
2758 false, 0, false, false);
2759 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2760 do_gpr_st(s, tcg_rt2, clean_addr, size,
2761 false, 0, false, false);
2762 }
2763 }
2764
2765 if (wback) {
2766 if (postindex) {
2767 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2768 }
2769 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2770 }
2771}
2772
2773/*
2774 * Load/store (immediate post-indexed)
2775 * Load/store (immediate pre-indexed)
2776 * Load/store (unscaled immediate)
2777 *
2778 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2779 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2780 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2781 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2782 *
2783 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2784 10 -> unprivileged
2785 * V = 0 -> non-vector
2786 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2787 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2788 */
2789static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2790 int opc,
2791 int size,
2792 int rt,
2793 bool is_vector)
2794{
2795 int rn = extract32(insn, 5, 5);
2796 int imm9 = sextract32(insn, 12, 9);
2797 int idx = extract32(insn, 10, 2);
2798 bool is_signed = false;
2799 bool is_store = false;
2800 bool is_extended = false;
2801 bool is_unpriv = (idx == 2);
2802 bool iss_valid = !is_vector;
2803 bool post_index;
2804 bool writeback;
2805
2806 TCGv_i64 clean_addr, dirty_addr;
2807
2808 if (is_vector) {
2809 size |= (opc & 2) << 1;
2810 if (size > 4 || is_unpriv) {
2811 unallocated_encoding(s);
2812 return;
2813 }
2814 is_store = ((opc & 1) == 0);
2815 if (!fp_access_check(s)) {
2816 return;
2817 }
2818 } else {
2819 if (size == 3 && opc == 2) {
2820 /* PRFM - prefetch */
2821 if (idx != 0) {
2822 unallocated_encoding(s);
2823 return;
2824 }
2825 return;
2826 }
2827 if (opc == 3 && size > 1) {
2828 unallocated_encoding(s);
2829 return;
2830 }
2831 is_store = (opc == 0);
2832 is_signed = extract32(opc, 1, 1);
2833 is_extended = (size < 3) && extract32(opc, 0, 1);
2834 }
2835
2836 switch (idx) {
2837 case 0:
2838 case 2:
2839 post_index = false;
2840 writeback = false;
2841 break;
2842 case 1:
2843 post_index = true;
2844 writeback = true;
2845 break;
2846 case 3:
2847 post_index = false;
2848 writeback = true;
2849 break;
2850 default:
2851 g_assert_not_reached();
2852 }
2853
2854 if (rn == 31) {
2855 gen_check_sp_alignment(s);
2856 }
2857
2858 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2859 if (!post_index) {
2860 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2861 }
2862 clean_addr = clean_data_tbi(s, dirty_addr);
2863
2864 if (is_vector) {
2865 if (is_store) {
2866 do_fp_st(s, rt, clean_addr, size);
2867 } else {
2868 do_fp_ld(s, rt, clean_addr, size);
2869 }
2870 } else {
2871 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2872 int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2873 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2874
2875 if (is_store) {
2876 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2877 iss_valid, rt, iss_sf, false);
2878 } else {
2879 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2880 is_signed, is_extended, memidx,
2881 iss_valid, rt, iss_sf, false);
2882 }
2883 }
2884
2885 if (writeback) {
2886 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2887 if (post_index) {
2888 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2889 }
2890 tcg_gen_mov_i64(tcg_rn, dirty_addr);
2891 }
2892}
2893
2894/*
2895 * Load/store (register offset)
2896 *
2897 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2898 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2899 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
2900 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2901 *
2902 * For non-vector:
2903 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2904 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2905 * For vector:
2906 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2907 * opc<0>: 0 -> store, 1 -> load
2908 * V: 1 -> vector/simd
2909 * opt: extend encoding (see DecodeRegExtend)
2910 * S: if S=1 then scale (essentially index by sizeof(size))
2911 * Rt: register to transfer into/out of
2912 * Rn: address register or SP for base
2913 * Rm: offset register or ZR for offset
2914 */
2915static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2916 int opc,
2917 int size,
2918 int rt,
2919 bool is_vector)
2920{
2921 int rn = extract32(insn, 5, 5);
2922 int shift = extract32(insn, 12, 1);
2923 int rm = extract32(insn, 16, 5);
2924 int opt = extract32(insn, 13, 3);
2925 bool is_signed = false;
2926 bool is_store = false;
2927 bool is_extended = false;
2928
2929 TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2930
2931 if (extract32(opt, 1, 1) == 0) {
2932 unallocated_encoding(s);
2933 return;
2934 }
2935
2936 if (is_vector) {
2937 size |= (opc & 2) << 1;
2938 if (size > 4) {
2939 unallocated_encoding(s);
2940 return;
2941 }
2942 is_store = !extract32(opc, 0, 1);
2943 if (!fp_access_check(s)) {
2944 return;
2945 }
2946 } else {
2947 if (size == 3 && opc == 2) {
2948 /* PRFM - prefetch */
2949 return;
2950 }
2951 if (opc == 3 && size > 1) {
2952 unallocated_encoding(s);
2953 return;
2954 }
2955 is_store = (opc == 0);
2956 is_signed = extract32(opc, 1, 1);
2957 is_extended = (size < 3) && extract32(opc, 0, 1);
2958 }
2959
2960 if (rn == 31) {
2961 gen_check_sp_alignment(s);
2962 }
2963 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2964
2965 tcg_rm = read_cpu_reg(s, rm, 1);
2966 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2967
2968 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2969 clean_addr = clean_data_tbi(s, dirty_addr);
2970
2971 if (is_vector) {
2972 if (is_store) {
2973 do_fp_st(s, rt, clean_addr, size);
2974 } else {
2975 do_fp_ld(s, rt, clean_addr, size);
2976 }
2977 } else {
2978 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2979 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2980 if (is_store) {
2981 do_gpr_st(s, tcg_rt, clean_addr, size,
2982 true, rt, iss_sf, false);
2983 } else {
2984 do_gpr_ld(s, tcg_rt, clean_addr, size,
2985 is_signed, is_extended,
2986 true, rt, iss_sf, false);
2987 }
2988 }
2989}
2990
2991/*
2992 * Load/store (unsigned immediate)
2993 *
2994 * 31 30 29 27 26 25 24 23 22 21 10 9 5
2995 * +----+-------+---+-----+-----+------------+-------+------+
2996 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
2997 * +----+-------+---+-----+-----+------------+-------+------+
2998 *
2999 * For non-vector:
3000 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3001 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3002 * For vector:
3003 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3004 * opc<0>: 0 -> store, 1 -> load
3005 * Rn: base address register (inc SP)
3006 * Rt: target register
3007 */
3008static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3009 int opc,
3010 int size,
3011 int rt,
3012 bool is_vector)
3013{
3014 int rn = extract32(insn, 5, 5);
3015 unsigned int imm12 = extract32(insn, 10, 12);
3016 unsigned int offset;
3017
3018 TCGv_i64 clean_addr, dirty_addr;
3019
3020 bool is_store;
3021 bool is_signed = false;
3022 bool is_extended = false;
3023
3024 if (is_vector) {
3025 size |= (opc & 2) << 1;
3026 if (size > 4) {
3027 unallocated_encoding(s);
3028 return;
3029 }
3030 is_store = !extract32(opc, 0, 1);
3031 if (!fp_access_check(s)) {
3032 return;
3033 }
3034 } else {
3035 if (size == 3 && opc == 2) {
3036 /* PRFM - prefetch */
3037 return;
3038 }
3039 if (opc == 3 && size > 1) {
3040 unallocated_encoding(s);
3041 return;
3042 }
3043 is_store = (opc == 0);
3044 is_signed = extract32(opc, 1, 1);
3045 is_extended = (size < 3) && extract32(opc, 0, 1);
3046 }
3047
3048 if (rn == 31) {
3049 gen_check_sp_alignment(s);
3050 }
3051 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3052 offset = imm12 << size;
3053 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3054 clean_addr = clean_data_tbi(s, dirty_addr);
3055
3056 if (is_vector) {
3057 if (is_store) {
3058 do_fp_st(s, rt, clean_addr, size);
3059 } else {
3060 do_fp_ld(s, rt, clean_addr, size);
3061 }
3062 } else {
3063 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3064 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3065 if (is_store) {
3066 do_gpr_st(s, tcg_rt, clean_addr, size,
3067 true, rt, iss_sf, false);
3068 } else {
3069 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3070 true, rt, iss_sf, false);
3071 }
3072 }
3073}
3074
3075/* Atomic memory operations
3076 *
3077 * 31 30 27 26 24 22 21 16 15 12 10 5 0
3078 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3079 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
3080 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3081 *
3082 * Rt: the result register
3083 * Rn: base address or SP
3084 * Rs: the source register for the operation
3085 * V: vector flag (always 0 as of v8.3)
3086 * A: acquire flag
3087 * R: release flag
3088 */
3089static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3090 int size, int rt, bool is_vector)
3091{
3092 int rs = extract32(insn, 16, 5);
3093 int rn = extract32(insn, 5, 5);
3094 int o3_opc = extract32(insn, 12, 4);
3095 TCGv_i64 tcg_rs, clean_addr;
3096 AtomicThreeOpFn *fn;
3097
3098 if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3099 unallocated_encoding(s);
3100 return;
3101 }
3102 switch (o3_opc) {
3103 case 000: /* LDADD */
3104 fn = tcg_gen_atomic_fetch_add_i64;
3105 break;
3106 case 001: /* LDCLR */
3107 fn = tcg_gen_atomic_fetch_and_i64;
3108 break;
3109 case 002: /* LDEOR */
3110 fn = tcg_gen_atomic_fetch_xor_i64;
3111 break;
3112 case 003: /* LDSET */
3113 fn = tcg_gen_atomic_fetch_or_i64;
3114 break;
3115 case 004: /* LDSMAX */
3116 fn = tcg_gen_atomic_fetch_smax_i64;
3117 break;
3118 case 005: /* LDSMIN */
3119 fn = tcg_gen_atomic_fetch_smin_i64;
3120 break;
3121 case 006: /* LDUMAX */
3122 fn = tcg_gen_atomic_fetch_umax_i64;
3123 break;
3124 case 007: /* LDUMIN */
3125 fn = tcg_gen_atomic_fetch_umin_i64;
3126 break;
3127 case 010: /* SWP */
3128 fn = tcg_gen_atomic_xchg_i64;
3129 break;
3130 default:
3131 unallocated_encoding(s);
3132 return;
3133 }
3134
3135 if (rn == 31) {
3136 gen_check_sp_alignment(s);
3137 }
3138 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3139 tcg_rs = read_cpu_reg(s, rs, true);
3140
3141 if (o3_opc == 1) { /* LDCLR */
3142 tcg_gen_not_i64(tcg_rs, tcg_rs);
3143 }
3144
3145 /* The tcg atomic primitives are all full barriers. Therefore we
3146 * can ignore the Acquire and Release bits of this instruction.
3147 */
3148 fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3149 s->be_data | size | MO_ALIGN);
3150}
3151
3152/*
3153 * PAC memory operations
3154 *
3155 * 31 30 27 26 24 22 21 12 11 10 5 0
3156 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3157 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt |
3158 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3159 *
3160 * Rt: the result register
3161 * Rn: base address or SP
3162 * V: vector flag (always 0 as of v8.3)
3163 * M: clear for key DA, set for key DB
3164 * W: pre-indexing flag
3165 * S: sign for imm9.
3166 */
3167static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3168 int size, int rt, bool is_vector)
3169{
3170 int rn = extract32(insn, 5, 5);
3171 bool is_wback = extract32(insn, 11, 1);
3172 bool use_key_a = !extract32(insn, 23, 1);
3173 int offset;
3174 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3175
3176 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3177 unallocated_encoding(s);
3178 return;
3179 }
3180
3181 if (rn == 31) {
3182 gen_check_sp_alignment(s);
3183 }
3184 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3185
3186 if (s->pauth_active) {
3187 if (use_key_a) {
3188 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3189 } else {
3190 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3191 }
3192 }
3193
3194 /* Form the 10-bit signed, scaled offset. */
3195 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3196 offset = sextract32(offset << size, 0, 10 + size);
3197 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3198
3199 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3200 clean_addr = clean_data_tbi(s, dirty_addr);
3201
3202 tcg_rt = cpu_reg(s, rt);
3203 do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3204 /* extend */ false, /* iss_valid */ !is_wback,
3205 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3206
3207 if (is_wback) {
3208 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3209 }
3210}
3211
3212/* Load/store register (all forms) */
3213static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3214{
3215 int rt = extract32(insn, 0, 5);
3216 int opc = extract32(insn, 22, 2);
3217 bool is_vector = extract32(insn, 26, 1);
3218 int size = extract32(insn, 30, 2);
3219
3220 switch (extract32(insn, 24, 2)) {
3221 case 0:
3222 if (extract32(insn, 21, 1) == 0) {
3223 /* Load/store register (unscaled immediate)
3224 * Load/store immediate pre/post-indexed
3225 * Load/store register unprivileged
3226 */
3227 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3228 return;
3229 }
3230 switch (extract32(insn, 10, 2)) {
3231 case 0:
3232 disas_ldst_atomic(s, insn, size, rt, is_vector);
3233 return;
3234 case 2:
3235 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3236 return;
3237 default:
3238 disas_ldst_pac(s, insn, size, rt, is_vector);
3239 return;
3240 }
3241 break;
3242 case 1:
3243 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3244 return;
3245 }
3246 unallocated_encoding(s);
3247}
3248
3249/* AdvSIMD load/store multiple structures
3250 *
3251 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
3252 * +---+---+---------------+---+-------------+--------+------+------+------+
3253 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
3254 * +---+---+---------------+---+-------------+--------+------+------+------+
3255 *
3256 * AdvSIMD load/store multiple structures (post-indexed)
3257 *
3258 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
3259 * +---+---+---------------+---+---+---------+--------+------+------+------+
3260 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
3261 * +---+---+---------------+---+---+---------+--------+------+------+------+
3262 *
3263 * Rt: first (or only) SIMD&FP register to be transferred
3264 * Rn: base address or SP
3265 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3266 */
3267static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3268{
3269 int rt = extract32(insn, 0, 5);
3270 int rn = extract32(insn, 5, 5);
3271 int rm = extract32(insn, 16, 5);
3272 int size = extract32(insn, 10, 2);
3273 int opcode = extract32(insn, 12, 4);
3274 bool is_store = !extract32(insn, 22, 1);
3275 bool is_postidx = extract32(insn, 23, 1);
3276 bool is_q = extract32(insn, 30, 1);
3277 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3278 MemOp endian = s->be_data;
3279
3280 int ebytes; /* bytes per element */
3281 int elements; /* elements per vector */
3282 int rpt; /* num iterations */
3283 int selem; /* structure elements */
3284 int r;
3285
3286 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3287 unallocated_encoding(s);
3288 return;
3289 }
3290
3291 if (!is_postidx && rm != 0) {
3292 unallocated_encoding(s);
3293 return;
3294 }
3295
3296 /* From the shared decode logic */
3297 switch (opcode) {
3298 case 0x0:
3299 rpt = 1;
3300 selem = 4;
3301 break;
3302 case 0x2:
3303 rpt = 4;
3304 selem = 1;
3305 break;
3306 case 0x4:
3307 rpt = 1;
3308 selem = 3;
3309 break;
3310 case 0x6:
3311 rpt = 3;
3312 selem = 1;
3313 break;
3314 case 0x7:
3315 rpt = 1;
3316 selem = 1;
3317 break;
3318 case 0x8:
3319 rpt = 1;
3320 selem = 2;
3321 break;
3322 case 0xa:
3323 rpt = 2;
3324 selem = 1;
3325 break;
3326 default:
3327 unallocated_encoding(s);
3328 return;
3329 }
3330
3331 if (size == 3 && !is_q && selem != 1) {
3332 /* reserved */
3333 unallocated_encoding(s);
3334 return;
3335 }
3336
3337 if (!fp_access_check(s)) {
3338 return;
3339 }
3340
3341 if (rn == 31) {
3342 gen_check_sp_alignment(s);
3343 }
3344
3345 /* For our purposes, bytes are always little-endian. */
3346 if (size == 0) {
3347 endian = MO_LE;
3348 }
3349
3350 /* Consecutive little-endian elements from a single register
3351 * can be promoted to a larger little-endian operation.
3352 */
3353 if (selem == 1 && endian == MO_LE) {
3354 size = 3;
3355 }
3356 ebytes = 1 << size;
3357 elements = (is_q ? 16 : 8) / ebytes;
3358
3359 tcg_rn = cpu_reg_sp(s, rn);
3360 clean_addr = clean_data_tbi(s, tcg_rn);
3361 tcg_ebytes = tcg_const_i64(ebytes);
3362
3363 for (r = 0; r < rpt; r++) {
3364 int e;
3365 for (e = 0; e < elements; e++) {
3366 int xs;
3367 for (xs = 0; xs < selem; xs++) {
3368 int tt = (rt + r + xs) % 32;
3369 if (is_store) {
3370 do_vec_st(s, tt, e, clean_addr, size, endian);
3371 } else {
3372 do_vec_ld(s, tt, e, clean_addr, size, endian);
3373 }
3374 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3375 }
3376 }
3377 }
3378 tcg_temp_free_i64(tcg_ebytes);
3379
3380 if (!is_store) {
3381 /* For non-quad operations, setting a slice of the low
3382 * 64 bits of the register clears the high 64 bits (in
3383 * the ARM ARM pseudocode this is implicit in the fact
3384 * that 'rval' is a 64 bit wide variable).
3385 * For quad operations, we might still need to zero the
3386 * high bits of SVE.
3387 */
3388 for (r = 0; r < rpt * selem; r++) {
3389 int tt = (rt + r) % 32;
3390 clear_vec_high(s, is_q, tt);
3391 }
3392 }
3393
3394 if (is_postidx) {
3395 if (rm == 31) {
3396 tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3397 } else {
3398 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3399 }
3400 }
3401}
3402
3403/* AdvSIMD load/store single structure
3404 *
3405 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3406 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3407 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
3408 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3409 *
3410 * AdvSIMD load/store single structure (post-indexed)
3411 *
3412 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3413 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3414 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
3415 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3416 *
3417 * Rt: first (or only) SIMD&FP register to be transferred
3418 * Rn: base address or SP
3419 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3420 * index = encoded in Q:S:size dependent on size
3421 *
3422 * lane_size = encoded in R, opc
3423 * transfer width = encoded in opc, S, size
3424 */
3425static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3426{
3427 int rt = extract32(insn, 0, 5);
3428 int rn = extract32(insn, 5, 5);
3429 int rm = extract32(insn, 16, 5);
3430 int size = extract32(insn, 10, 2);
3431 int S = extract32(insn, 12, 1);
3432 int opc = extract32(insn, 13, 3);
3433 int R = extract32(insn, 21, 1);
3434 int is_load = extract32(insn, 22, 1);
3435 int is_postidx = extract32(insn, 23, 1);
3436 int is_q = extract32(insn, 30, 1);
3437
3438 int scale = extract32(opc, 1, 2);
3439 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3440 bool replicate = false;
3441 int index = is_q << 3 | S << 2 | size;
3442 int ebytes, xs;
3443 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3444
3445 if (extract32(insn, 31, 1)) {
3446 unallocated_encoding(s);
3447 return;
3448 }
3449 if (!is_postidx && rm != 0) {
3450 unallocated_encoding(s);
3451 return;
3452 }
3453
3454 switch (scale) {
3455 case 3:
3456 if (!is_load || S) {
3457 unallocated_encoding(s);
3458 return;
3459 }
3460 scale = size;
3461 replicate = true;
3462 break;
3463 case 0:
3464 break;
3465 case 1:
3466 if (extract32(size, 0, 1)) {
3467 unallocated_encoding(s);
3468 return;
3469 }
3470 index >>= 1;
3471 break;
3472 case 2:
3473 if (extract32(size, 1, 1)) {
3474 unallocated_encoding(s);
3475 return;
3476 }
3477 if (!extract32(size, 0, 1)) {
3478 index >>= 2;
3479 } else {
3480 if (S) {
3481 unallocated_encoding(s);
3482 return;
3483 }
3484 index >>= 3;
3485 scale = 3;
3486 }
3487 break;
3488 default:
3489 g_assert_not_reached();
3490 }
3491
3492 if (!fp_access_check(s)) {
3493 return;
3494 }
3495
3496 ebytes = 1 << scale;
3497
3498 if (rn == 31) {
3499 gen_check_sp_alignment(s);
3500 }
3501
3502 tcg_rn = cpu_reg_sp(s, rn);
3503 clean_addr = clean_data_tbi(s, tcg_rn);
3504 tcg_ebytes = tcg_const_i64(ebytes);
3505
3506 for (xs = 0; xs < selem; xs++) {
3507 if (replicate) {
3508 /* Load and replicate to all elements */
3509 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3510
3511 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3512 get_mem_index(s), s->be_data + scale);
3513 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3514 (is_q + 1) * 8, vec_full_reg_size(s),
3515 tcg_tmp);
3516 tcg_temp_free_i64(tcg_tmp);
3517 } else {
3518 /* Load/store one element per register */
3519 if (is_load) {
3520 do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3521 } else {
3522 do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3523 }
3524 }
3525 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3526 rt = (rt + 1) % 32;
3527 }
3528 tcg_temp_free_i64(tcg_ebytes);
3529
3530 if (is_postidx) {
3531 if (rm == 31) {
3532 tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3533 } else {
3534 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3535 }
3536 }
3537}
3538
3539/* Loads and stores */
3540static void disas_ldst(DisasContext *s, uint32_t insn)
3541{
3542 switch (extract32(insn, 24, 6)) {
3543 case 0x08: /* Load/store exclusive */
3544 disas_ldst_excl(s, insn);
3545 break;
3546 case 0x18: case 0x1c: /* Load register (literal) */
3547 disas_ld_lit(s, insn);
3548 break;
3549 case 0x28: case 0x29:
3550 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3551 disas_ldst_pair(s, insn);
3552 break;
3553 case 0x38: case 0x39:
3554 case 0x3c: case 0x3d: /* Load/store register (all forms) */
3555 disas_ldst_reg(s, insn);
3556 break;
3557 case 0x0c: /* AdvSIMD load/store multiple structures */
3558 disas_ldst_multiple_struct(s, insn);
3559 break;
3560 case 0x0d: /* AdvSIMD load/store single structure */
3561 disas_ldst_single_struct(s, insn);
3562 break;
3563 default:
3564 unallocated_encoding(s);
3565 break;
3566 }
3567}
3568
3569/* PC-rel. addressing
3570 * 31 30 29 28 24 23 5 4 0
3571 * +----+-------+-----------+-------------------+------+
3572 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
3573 * +----+-------+-----------+-------------------+------+
3574 */
3575static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3576{
3577 unsigned int page, rd;
3578 uint64_t base;
3579 uint64_t offset;
3580
3581 page = extract32(insn, 31, 1);
3582 /* SignExtend(immhi:immlo) -> offset */
3583 offset = sextract64(insn, 5, 19);
3584 offset = offset << 2 | extract32(insn, 29, 2);
3585 rd = extract32(insn, 0, 5);
3586 base = s->pc_curr;
3587
3588 if (page) {
3589 /* ADRP (page based) */
3590 base &= ~0xfff;
3591 offset <<= 12;
3592 }
3593
3594 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3595}
3596
3597/*
3598 * Add/subtract (immediate)
3599 *
3600 * 31 30 29 28 24 23 22 21 10 9 5 4 0
3601 * +--+--+--+-----------+-----+-------------+-----+-----+
3602 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
3603 * +--+--+--+-----------+-----+-------------+-----+-----+
3604 *
3605 * sf: 0 -> 32bit, 1 -> 64bit
3606 * op: 0 -> add , 1 -> sub
3607 * S: 1 -> set flags
3608 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3609 */
3610static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3611{
3612 int rd = extract32(insn, 0, 5);
3613 int rn = extract32(insn, 5, 5);
3614 uint64_t imm = extract32(insn, 10, 12);
3615 int shift = extract32(insn, 22, 2);
3616 bool setflags = extract32(insn, 29, 1);
3617 bool sub_op = extract32(insn, 30, 1);
3618 bool is_64bit = extract32(insn, 31, 1);
3619
3620 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3621 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3622 TCGv_i64 tcg_result;
3623
3624 switch (shift) {
3625 case 0x0:
3626 break;
3627 case 0x1:
3628 imm <<= 12;
3629 break;
3630 default:
3631 unallocated_encoding(s);
3632 return;
3633 }
3634
3635 tcg_result = tcg_temp_new_i64();
3636 if (!setflags) {
3637 if (sub_op) {
3638 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3639 } else {
3640 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3641 }
3642 } else {
3643 TCGv_i64 tcg_imm = tcg_const_i64(imm);
3644 if (sub_op) {
3645 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3646 } else {
3647 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3648 }
3649 tcg_temp_free_i64(tcg_imm);
3650 }
3651
3652 if (is_64bit) {
3653 tcg_gen_mov_i64(tcg_rd, tcg_result);
3654 } else {
3655 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3656 }
3657
3658 tcg_temp_free_i64(tcg_result);
3659}
3660
3661/* The input should be a value in the bottom e bits (with higher
3662 * bits zero); returns that value replicated into every element
3663 * of size e in a 64 bit integer.
3664 */
3665static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3666{
3667 assert(e != 0);
3668 while (e < 64) {
3669 mask |= mask << e;
3670 e *= 2;
3671 }
3672 return mask;
3673}
3674
3675/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3676static inline uint64_t bitmask64(unsigned int length)
3677{
3678 assert(length > 0 && length <= 64);
3679 return ~0ULL >> (64 - length);
3680}
3681
3682/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3683 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3684 * value (ie should cause a guest UNDEF exception), and true if they are
3685 * valid, in which case the decoded bit pattern is written to result.
3686 */
3687bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3688 unsigned int imms, unsigned int immr)
3689{
3690 uint64_t mask;
3691 unsigned e, levels, s, r;
3692 int len;
3693
3694 assert(immn < 2 && imms < 64 && immr < 64);
3695
3696 /* The bit patterns we create here are 64 bit patterns which
3697 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3698 * 64 bits each. Each element contains the same value: a run
3699 * of between 1 and e-1 non-zero bits, rotated within the
3700 * element by between 0 and e-1 bits.
3701 *
3702 * The element size and run length are encoded into immn (1 bit)
3703 * and imms (6 bits) as follows:
3704 * 64 bit elements: immn = 1, imms = <length of run - 1>
3705 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3706 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3707 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3708 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3709 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3710 * Notice that immn = 0, imms = 11111x is the only combination
3711 * not covered by one of the above options; this is reserved.
3712 * Further, <length of run - 1> all-ones is a reserved pattern.
3713 *
3714 * In all cases the rotation is by immr % e (and immr is 6 bits).
3715 */
3716
3717 /* First determine the element size */
3718 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3719 if (len < 1) {
3720 /* This is the immn == 0, imms == 0x11111x case */
3721 return false;
3722 }
3723 e = 1 << len;
3724
3725 levels = e - 1;
3726 s = imms & levels;
3727 r = immr & levels;
3728
3729 if (s == levels) {
3730 /* <length of run - 1> mustn't be all-ones. */
3731 return false;
3732 }
3733
3734 /* Create the value of one element: s+1 set bits rotated
3735 * by r within the element (which is e bits wide)...
3736 */
3737 mask = bitmask64(s + 1);
3738 if (r) {
3739 mask = (mask >> r) | (mask << (e - r));
3740 mask &= bitmask64(e);
3741 }
3742 /* ...then replicate the element over the whole 64 bit value */
3743 mask = bitfield_replicate(mask, e);
3744 *result = mask;
3745 return true;
3746}
3747
3748/* Logical (immediate)
3749 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3750 * +----+-----+-------------+---+------+------+------+------+
3751 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
3752 * +----+-----+-------------+---+------+------+------+------+
3753 */
3754static void disas_logic_imm(DisasContext *s, uint32_t insn)
3755{
3756 unsigned int sf, opc, is_n, immr, imms, rn, rd;
3757 TCGv_i64 tcg_rd, tcg_rn;
3758 uint64_t wmask;
3759 bool is_and = false;
3760
3761 sf = extract32(insn, 31, 1);
3762 opc = extract32(insn, 29, 2);
3763 is_n = extract32(insn, 22, 1);
3764 immr = extract32(insn, 16, 6);
3765 imms = extract32(insn, 10, 6);
3766 rn = extract32(insn, 5, 5);
3767 rd = extract32(insn, 0, 5);
3768
3769 if (!sf && is_n) {
3770 unallocated_encoding(s);
3771 return;
3772 }
3773
3774 if (opc == 0x3) { /* ANDS */
3775 tcg_rd = cpu_reg(s, rd);
3776 } else {
3777 tcg_rd = cpu_reg_sp(s, rd);
3778 }
3779 tcg_rn = cpu_reg(s, rn);
3780
3781 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3782 /* some immediate field values are reserved */
3783 unallocated_encoding(s);
3784 return;
3785 }
3786
3787 if (!sf) {
3788 wmask &= 0xffffffff;
3789 }
3790
3791 switch (opc) {
3792 case 0x3: /* ANDS */
3793 case 0x0: /* AND */
3794 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3795 is_and = true;
3796 break;
3797 case 0x1: /* ORR */
3798 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3799 break;
3800 case 0x2: /* EOR */
3801 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3802 break;
3803 default:
3804 assert(FALSE); /* must handle all above */
3805 break;
3806 }
3807
3808 if (!sf && !is_and) {
3809 /* zero extend final result; we know we can skip this for AND
3810 * since the immediate had the high 32 bits clear.
3811 */
3812 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3813 }
3814
3815 if (opc == 3) { /* ANDS */
3816 gen_logic_CC(sf, tcg_rd);
3817 }
3818}
3819
3820/*
3821 * Move wide (immediate)
3822 *
3823 * 31 30 29 28 23 22 21 20 5 4 0
3824 * +--+-----+-------------+-----+----------------+------+
3825 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
3826 * +--+-----+-------------+-----+----------------+------+
3827 *
3828 * sf: 0 -> 32 bit, 1 -> 64 bit
3829 * opc: 00 -> N, 10 -> Z, 11 -> K
3830 * hw: shift/16 (0,16, and sf only 32, 48)
3831 */
3832static void disas_movw_imm(DisasContext *s, uint32_t insn)
3833{
3834 int rd = extract32(insn, 0, 5);
3835 uint64_t imm = extract32(insn, 5, 16);
3836 int sf = extract32(insn, 31, 1);
3837 int opc = extract32(insn, 29, 2);
3838 int pos = extract32(insn, 21, 2) << 4;
3839 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3840 TCGv_i64 tcg_imm;
3841
3842 if (!sf && (pos >= 32)) {
3843 unallocated_encoding(s);
3844 return;
3845 }
3846
3847 switch (opc) {
3848 case 0: /* MOVN */
3849 case 2: /* MOVZ */
3850 imm <<= pos;
3851 if (opc == 0) {
3852 imm = ~imm;
3853 }
3854 if (!sf) {
3855 imm &= 0xffffffffu;
3856 }
3857 tcg_gen_movi_i64(tcg_rd, imm);
3858 break;
3859 case 3: /* MOVK */
3860 tcg_imm = tcg_const_i64(imm);
3861 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3862 tcg_temp_free_i64(tcg_imm);
3863 if (!sf) {
3864 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3865 }
3866 break;
3867 default:
3868 unallocated_encoding(s);
3869 break;
3870 }
3871}
3872
3873/* Bitfield
3874 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3875 * +----+-----+-------------+---+------+------+------+------+
3876 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
3877 * +----+-----+-------------+---+------+------+------+------+
3878 */
3879static void disas_bitfield(DisasContext *s, uint32_t insn)
3880{
3881 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3882 TCGv_i64 tcg_rd, tcg_tmp;
3883
3884 sf = extract32(insn, 31, 1);
3885 opc = extract32(insn, 29, 2);
3886 n = extract32(insn, 22, 1);
3887 ri = extract32(insn, 16, 6);
3888 si = extract32(insn, 10, 6);
3889 rn = extract32(insn, 5, 5);
3890 rd = extract32(insn, 0, 5);
3891 bitsize = sf ? 64 : 32;
3892
3893 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3894 unallocated_encoding(s);
3895 return;
3896 }
3897
3898 tcg_rd = cpu_reg(s, rd);
3899
3900 /* Suppress the zero-extend for !sf. Since RI and SI are constrained
3901 to be smaller than bitsize, we'll never reference data outside the
3902 low 32-bits anyway. */
3903 tcg_tmp = read_cpu_reg(s, rn, 1);
3904
3905 /* Recognize simple(r) extractions. */
3906 if (si >= ri) {
3907 /* Wd<s-r:0> = Wn<s:r> */
3908 len = (si - ri) + 1;
3909 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3910 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3911 goto done;
3912 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3913 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3914 return;
3915 }
3916 /* opc == 1, BFXIL fall through to deposit */
3917 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3918 pos = 0;
3919 } else {
3920 /* Handle the ri > si case with a deposit
3921 * Wd<32+s-r,32-r> = Wn<s:0>
3922 */
3923 len = si + 1;
3924 pos = (bitsize - ri) & (bitsize - 1);
3925 }
3926
3927 if (opc == 0 && len < ri) {
3928 /* SBFM: sign extend the destination field from len to fill
3929 the balance of the word. Let the deposit below insert all
3930 of those sign bits. */
3931 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3932 len = ri;
3933 }
3934
3935 if (opc == 1) { /* BFM, BFXIL */
3936 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3937 } else {
3938 /* SBFM or UBFM: We start with zero, and we haven't modified
3939 any bits outside bitsize, therefore the zero-extension
3940 below is unneeded. */
3941 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3942 return;
3943 }
3944
3945 done:
3946 if (!sf) { /* zero extend final result */
3947 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3948 }
3949}
3950
3951/* Extract
3952 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
3953 * +----+------+-------------+---+----+------+--------+------+------+
3954 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
3955 * +----+------+-------------+---+----+------+--------+------+------+
3956 */
3957static void disas_extract(DisasContext *s, uint32_t insn)
3958{
3959 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3960
3961 sf = extract32(insn, 31, 1);
3962 n = extract32(insn, 22, 1);
3963 rm = extract32(insn, 16, 5);
3964 imm = extract32(insn, 10, 6);
3965 rn = extract32(insn, 5, 5);
3966 rd = extract32(insn, 0, 5);
3967 op21 = extract32(insn, 29, 2);
3968 op0 = extract32(insn, 21, 1);
3969 bitsize = sf ? 64 : 32;
3970
3971 if (sf != n || op21 || op0 || imm >= bitsize) {
3972 unallocated_encoding(s);
3973 } else {
3974 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3975
3976 tcg_rd = cpu_reg(s, rd);
3977
3978 if (unlikely(imm == 0)) {
3979 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3980 * so an extract from bit 0 is a special case.
3981 */
3982 if (sf) {
3983 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3984 } else {
3985 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3986 }
3987 } else {
3988 tcg_rm = cpu_reg(s, rm);
3989 tcg_rn = cpu_reg(s, rn);
3990
3991 if (sf) {
3992 /* Specialization to ROR happens in EXTRACT2. */
3993 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
3994 } else {
3995 TCGv_i32 t0 = tcg_temp_new_i32();
3996
3997 tcg_gen_extrl_i64_i32(t0, tcg_rm);
3998 if (rm == rn) {
3999 tcg_gen_rotri_i32(t0, t0, imm);
4000 } else {
4001 TCGv_i32 t1 = tcg_temp_new_i32();
4002 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4003 tcg_gen_extract2_i32(t0, t0, t1, imm);
4004 tcg_temp_free_i32(t1);
4005 }
4006 tcg_gen_extu_i32_i64(tcg_rd, t0);
4007 tcg_temp_free_i32(t0);
4008 }
4009 }
4010 }
4011}
4012
4013/* Data processing - immediate */
4014static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4015{
4016 switch (extract32(insn, 23, 6)) {
4017 case 0x20: case 0x21: /* PC-rel. addressing */
4018 disas_pc_rel_adr(s, insn);
4019 break;
4020 case 0x22: case 0x23: /* Add/subtract (immediate) */
4021 disas_add_sub_imm(s, insn);
4022 break;
4023 case 0x24: /* Logical (immediate) */
4024 disas_logic_imm(s, insn);
4025 break;
4026 case 0x25: /* Move wide (immediate) */
4027 disas_movw_imm(s, insn);
4028 break;
4029 case 0x26: /* Bitfield */
4030 disas_bitfield(s, insn);
4031 break;
4032 case 0x27: /* Extract */
4033 disas_extract(s, insn);
4034 break;
4035 default:
4036 unallocated_encoding(s);
4037 break;
4038 }
4039}
4040
4041/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4042 * Note that it is the caller's responsibility to ensure that the
4043 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4044 * mandated semantics for out of range shifts.
4045 */
4046static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4047 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4048{
4049 switch (shift_type) {
4050 case A64_SHIFT_TYPE_LSL:
4051 tcg_gen_shl_i64(dst, src, shift_amount);
4052 break;
4053 case A64_SHIFT_TYPE_LSR:
4054 tcg_gen_shr_i64(dst, src, shift_amount);
4055 break;
4056 case A64_SHIFT_TYPE_ASR:
4057 if (!sf) {
4058 tcg_gen_ext32s_i64(dst, src);
4059 }
4060 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4061 break;
4062 case A64_SHIFT_TYPE_ROR:
4063 if (sf) {
4064 tcg_gen_rotr_i64(dst, src, shift_amount);
4065 } else {
4066 TCGv_i32 t0, t1;
4067 t0 = tcg_temp_new_i32();
4068 t1 = tcg_temp_new_i32();
4069 tcg_gen_extrl_i64_i32(t0, src);
4070 tcg_gen_extrl_i64_i32(t1, shift_amount);
4071 tcg_gen_rotr_i32(t0, t0, t1);
4072 tcg_gen_extu_i32_i64(dst, t0);
4073 tcg_temp_free_i32(t0);
4074 tcg_temp_free_i32(t1);
4075 }
4076 break;
4077 default:
4078 assert(FALSE); /* all shift types should be handled */
4079 break;
4080 }
4081
4082 if (!sf) { /* zero extend final result */
4083 tcg_gen_ext32u_i64(dst, dst);
4084 }
4085}
4086
4087/* Shift a TCGv src by immediate, put result in dst.
4088 * The shift amount must be in range (this should always be true as the
4089 * relevant instructions will UNDEF on bad shift immediates).
4090 */
4091static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4092 enum a64_shift_type shift_type, unsigned int shift_i)
4093{
4094 assert(shift_i < (sf ? 64 : 32));
4095
4096 if (shift_i == 0) {
4097 tcg_gen_mov_i64(dst, src);
4098 } else {
4099 TCGv_i64 shift_const;
4100
4101 shift_const = tcg_const_i64(shift_i);
4102 shift_reg(dst, src, sf, shift_type, shift_const);
4103 tcg_temp_free_i64(shift_const);
4104 }
4105}
4106
4107/* Logical (shifted register)
4108 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4109 * +----+-----+-----------+-------+---+------+--------+------+------+
4110 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
4111 * +----+-----+-----------+-------+---+------+--------+------+------+
4112 */
4113static void disas_logic_reg(DisasContext *s, uint32_t insn)
4114{
4115 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4116 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4117
4118 sf = extract32(insn, 31, 1);
4119 opc = extract32(insn, 29, 2);
4120 shift_type = extract32(insn, 22, 2);
4121 invert = extract32(insn, 21, 1);
4122 rm = extract32(insn, 16, 5);
4123 shift_amount = extract32(insn, 10, 6);
4124 rn = extract32(insn, 5, 5);
4125 rd = extract32(insn, 0, 5);
4126
4127 if (!sf && (shift_amount & (1 << 5))) {
4128 unallocated_encoding(s);
4129 return;
4130 }
4131
4132 tcg_rd = cpu_reg(s, rd);
4133
4134 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4135 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4136 * register-register MOV and MVN, so it is worth special casing.
4137 */
4138 tcg_rm = cpu_reg(s, rm);
4139 if (invert) {
4140 tcg_gen_not_i64(tcg_rd, tcg_rm);
4141 if (!sf) {
4142 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4143 }
4144 } else {
4145 if (sf) {
4146 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4147 } else {
4148 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4149 }
4150 }
4151 return;
4152 }
4153
4154 tcg_rm = read_cpu_reg(s, rm, sf);
4155
4156 if (shift_amount) {
4157 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4158 }
4159
4160 tcg_rn = cpu_reg(s, rn);
4161
4162 switch (opc | (invert << 2)) {
4163 case 0: /* AND */
4164 case 3: /* ANDS */
4165 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4166 break;
4167 case 1: /* ORR */
4168 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4169 break;
4170 case 2: /* EOR */
4171 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4172 break;
4173 case 4: /* BIC */
4174 case 7: /* BICS */
4175 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4176 break;
4177 case 5: /* ORN */
4178 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4179 break;
4180 case 6: /* EON */
4181 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4182 break;
4183 default:
4184 assert(FALSE);
4185 break;
4186 }
4187
4188 if (!sf) {
4189 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4190 }
4191
4192 if (opc == 3) {
4193 gen_logic_CC(sf, tcg_rd);
4194 }
4195}
4196
4197/*
4198 * Add/subtract (extended register)
4199 *
4200 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
4201 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4202 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
4203 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4204 *
4205 * sf: 0 -> 32bit, 1 -> 64bit
4206 * op: 0 -> add , 1 -> sub
4207 * S: 1 -> set flags
4208 * opt: 00
4209 * option: extension type (see DecodeRegExtend)
4210 * imm3: optional shift to Rm
4211 *
4212 * Rd = Rn + LSL(extend(Rm), amount)
4213 */
4214static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4215{
4216 int rd = extract32(insn, 0, 5);
4217 int rn = extract32(insn, 5, 5);
4218 int imm3 = extract32(insn, 10, 3);
4219 int option = extract32(insn, 13, 3);
4220 int rm = extract32(insn, 16, 5);
4221 int opt = extract32(insn, 22, 2);
4222 bool setflags = extract32(insn, 29, 1);
4223 bool sub_op = extract32(insn, 30, 1);
4224 bool sf = extract32(insn, 31, 1);
4225
4226 TCGv_i64 tcg_rm, tcg_rn; /* temps */
4227 TCGv_i64 tcg_rd;
4228 TCGv_i64 tcg_result;
4229
4230 if (imm3 > 4 || opt != 0) {
4231 unallocated_encoding(s);
4232 return;
4233 }
4234
4235 /* non-flag setting ops may use SP */
4236 if (!setflags) {
4237 tcg_rd = cpu_reg_sp(s, rd);
4238 } else {
4239 tcg_rd = cpu_reg(s, rd);
4240 }
4241 tcg_rn = read_cpu_reg_sp(s, rn, sf);
4242
4243 tcg_rm = read_cpu_reg(s, rm, sf);
4244 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4245
4246 tcg_result = tcg_temp_new_i64();
4247
4248 if (!setflags) {
4249 if (sub_op) {
4250 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4251 } else {
4252 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4253 }
4254 } else {
4255 if (sub_op) {
4256 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4257 } else {
4258 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4259 }
4260 }
4261
4262 if (sf) {
4263 tcg_gen_mov_i64(tcg_rd, tcg_result);
4264 } else {
4265 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4266 }
4267
4268 tcg_temp_free_i64(tcg_result);
4269}
4270
4271/*
4272 * Add/subtract (shifted register)
4273 *
4274 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4275 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4276 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
4277 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4278 *
4279 * sf: 0 -> 32bit, 1 -> 64bit
4280 * op: 0 -> add , 1 -> sub
4281 * S: 1 -> set flags
4282 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4283 * imm6: Shift amount to apply to Rm before the add/sub
4284 */
4285static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4286{
4287 int rd = extract32(insn, 0, 5);
4288 int rn = extract32(insn, 5, 5);
4289 int imm6 = extract32(insn, 10, 6);
4290 int rm = extract32(insn, 16, 5);
4291 int shift_type = extract32(insn, 22, 2);
4292 bool setflags = extract32(insn, 29, 1);
4293 bool sub_op = extract32(insn, 30, 1);
4294 bool sf = extract32(insn, 31, 1);
4295
4296 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4297 TCGv_i64 tcg_rn, tcg_rm;
4298 TCGv_i64 tcg_result;
4299
4300 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4301 unallocated_encoding(s);
4302 return;
4303 }
4304
4305 tcg_rn = read_cpu_reg(s, rn, sf);
4306 tcg_rm = read_cpu_reg(s, rm, sf);
4307
4308 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4309
4310 tcg_result = tcg_temp_new_i64();
4311
4312 if (!setflags) {
4313 if (sub_op) {
4314 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4315 } else {
4316 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4317 }
4318 } else {
4319 if (sub_op) {
4320 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4321 } else {
4322 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4323 }
4324 }
4325
4326 if (sf) {
4327 tcg_gen_mov_i64(tcg_rd, tcg_result);
4328 } else {
4329 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4330 }
4331
4332 tcg_temp_free_i64(tcg_result);
4333}
4334
4335/* Data-processing (3 source)
4336 *
4337 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
4338 * +--+------+-----------+------+------+----+------+------+------+
4339 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
4340 * +--+------+-----------+------+------+----+------+------+------+
4341 */
4342static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4343{
4344 int rd = extract32(insn, 0, 5);
4345 int rn = extract32(insn, 5, 5);
4346 int ra = extract32(insn, 10, 5);
4347 int rm = extract32(insn, 16, 5);
4348 int op_id = (extract32(insn, 29, 3) << 4) |
4349 (extract32(insn, 21, 3) << 1) |
4350 extract32(insn, 15, 1);
4351 bool sf = extract32(insn, 31, 1);
4352 bool is_sub = extract32(op_id, 0, 1);
4353 bool is_high = extract32(op_id, 2, 1);
4354 bool is_signed = false;
4355 TCGv_i64 tcg_op1;
4356 TCGv_i64 tcg_op2;
4357 TCGv_i64 tcg_tmp;
4358
4359 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4360 switch (op_id) {
4361 case 0x42: /* SMADDL */
4362 case 0x43: /* SMSUBL */
4363 case 0x44: /* SMULH */
4364 is_signed = true;
4365 break;
4366 case 0x0: /* MADD (32bit) */
4367 case 0x1: /* MSUB (32bit) */
4368 case 0x40: /* MADD (64bit) */
4369 case 0x41: /* MSUB (64bit) */
4370 case 0x4a: /* UMADDL */
4371 case 0x4b: /* UMSUBL */
4372 case 0x4c: /* UMULH */
4373 break;
4374 default:
4375 unallocated_encoding(s);
4376 return;
4377 }
4378
4379 if (is_high) {
4380 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4381 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4382 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4383 TCGv_i64 tcg_rm = cpu_reg(s, rm);
4384
4385 if (is_signed) {
4386 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4387 } else {
4388 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4389 }
4390
4391 tcg_temp_free_i64(low_bits);
4392 return;
4393 }
4394
4395 tcg_op1 = tcg_temp_new_i64();
4396 tcg_op2 = tcg_temp_new_i64();
4397 tcg_tmp = tcg_temp_new_i64();
4398
4399 if (op_id < 0x42) {
4400 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4401 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4402 } else {
4403 if (is_signed) {
4404 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4405 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4406 } else {
4407 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4408 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4409 }
4410 }
4411
4412 if (ra == 31 && !is_sub) {
4413 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4414 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4415 } else {
4416 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4417 if (is_sub) {
4418 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4419 } else {
4420 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4421 }
4422 }
4423
4424 if (!sf) {
4425 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4426 }
4427
4428 tcg_temp_free_i64(tcg_op1);
4429 tcg_temp_free_i64(tcg_op2);
4430 tcg_temp_free_i64(tcg_tmp);
4431}
4432
4433/* Add/subtract (with carry)
4434 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
4435 * +--+--+--+------------------------+------+-------------+------+-----+
4436 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
4437 * +--+--+--+------------------------+------+-------------+------+-----+
4438 */
4439
4440static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4441{
4442 unsigned int sf, op, setflags, rm, rn, rd;
4443 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4444
4445 sf = extract32(insn, 31, 1);
4446 op = extract32(insn, 30, 1);
4447 setflags = extract32(insn, 29, 1);
4448 rm = extract32(insn, 16, 5);
4449 rn = extract32(insn, 5, 5);
4450 rd = extract32(insn, 0, 5);
4451
4452 tcg_rd = cpu_reg(s, rd);
4453 tcg_rn = cpu_reg(s, rn);
4454
4455 if (op) {
4456 tcg_y = new_tmp_a64(s);
4457 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4458 } else {
4459 tcg_y = cpu_reg(s, rm);
4460 }
4461
4462 if (setflags) {
4463 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4464 } else {
4465 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4466 }
4467}
4468
4469/*
4470 * Rotate right into flags
4471 * 31 30 29 21 15 10 5 4 0
4472 * +--+--+--+-----------------+--------+-----------+------+--+------+
4473 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
4474 * +--+--+--+-----------------+--------+-----------+------+--+------+
4475 */
4476static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4477{
4478 int mask = extract32(insn, 0, 4);
4479 int o2 = extract32(insn, 4, 1);
4480 int rn = extract32(insn, 5, 5);
4481 int imm6 = extract32(insn, 15, 6);
4482 int sf_op_s = extract32(insn, 29, 3);
4483 TCGv_i64 tcg_rn;
4484 TCGv_i32 nzcv;
4485
4486 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4487 unallocated_encoding(s);
4488 return;
4489 }
4490
4491 tcg_rn = read_cpu_reg(s, rn, 1);
4492 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4493
4494 nzcv = tcg_temp_new_i32();
4495 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4496
4497 if (mask & 8) { /* N */
4498 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4499 }
4500 if (mask & 4) { /* Z */
4501 tcg_gen_not_i32(cpu_ZF, nzcv);
4502 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4503 }
4504 if (mask & 2) { /* C */
4505 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4506 }
4507 if (mask & 1) { /* V */
4508 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4509 }
4510
4511 tcg_temp_free_i32(nzcv);
4512}
4513
4514/*
4515 * Evaluate into flags
4516 * 31 30 29 21 15 14 10 5 4 0
4517 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4518 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
4519 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4520 */
4521static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4522{
4523 int o3_mask = extract32(insn, 0, 5);
4524 int rn = extract32(insn, 5, 5);
4525 int o2 = extract32(insn, 15, 6);
4526 int sz = extract32(insn, 14, 1);
4527 int sf_op_s = extract32(insn, 29, 3);
4528 TCGv_i32 tmp;
4529 int shift;
4530
4531 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4532 !dc_isar_feature(aa64_condm_4, s)) {
4533 unallocated_encoding(s);
4534 return;
4535 }
4536 shift = sz ? 16 : 24; /* SETF16 or SETF8 */
4537
4538 tmp = tcg_temp_new_i32();
4539 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4540 tcg_gen_shli_i32(cpu_NF, tmp, shift);
4541 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4542 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4543 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4544 tcg_temp_free_i32(tmp);
4545}
4546
4547/* Conditional compare (immediate / register)
4548 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4549 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4550 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
4551 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4552 * [1] y [0] [0]
4553 */
4554static void disas_cc(DisasContext *s, uint32_t insn)
4555{
4556 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4557 TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4558 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4559 DisasCompare c;
4560
4561 if (!extract32(insn, 29, 1)) {
4562 unallocated_encoding(s);
4563 return;
4564 }
4565 if (insn & (1 << 10 | 1 << 4)) {
4566 unallocated_encoding(s);
4567 return;
4568 }
4569 sf = extract32(insn, 31, 1);
4570 op = extract32(insn, 30, 1);
4571 is_imm = extract32(insn, 11, 1);
4572 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4573 cond = extract32(insn, 12, 4);
4574 rn = extract32(insn, 5, 5);
4575 nzcv = extract32(insn, 0, 4);
4576
4577 /* Set T0 = !COND. */
4578 tcg_t0 = tcg_temp_new_i32();
4579 arm_test_cc(&c, cond);
4580 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4581 arm_free_cc(&c);
4582
4583 /* Load the arguments for the new comparison. */
4584 if (is_imm) {
4585 tcg_y = new_tmp_a64(s);
4586 tcg_gen_movi_i64(tcg_y, y);
4587 } else {
4588 tcg_y = cpu_reg(s, y);
4589 }
4590 tcg_rn = cpu_reg(s, rn);
4591
4592 /* Set the flags for the new comparison. */
4593 tcg_tmp = tcg_temp_new_i64();
4594 if (op) {
4595 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4596 } else {
4597 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4598 }
4599 tcg_temp_free_i64(tcg_tmp);
4600
4601 /* If COND was false, force the flags to #nzcv. Compute two masks
4602 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4603 * For tcg hosts that support ANDC, we can make do with just T1.
4604 * In either case, allow the tcg optimizer to delete any unused mask.
4605 */
4606 tcg_t1 = tcg_temp_new_i32();
4607 tcg_t2 = tcg_temp_new_i32();
4608 tcg_gen_neg_i32(tcg_t1, tcg_t0);
4609 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4610
4611 if (nzcv & 8) { /* N */
4612 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4613 } else {
4614 if (TCG_TARGET_HAS_andc_i32) {
4615 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4616 } else {
4617 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4618 }
4619 }
4620 if (nzcv & 4) { /* Z */
4621 if (TCG_TARGET_HAS_andc_i32) {
4622 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4623 } else {
4624 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4625 }
4626 } else {
4627 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4628 }
4629 if (nzcv & 2) { /* C */
4630 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4631 } else {
4632 if (TCG_TARGET_HAS_andc_i32) {
4633 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4634 } else {
4635 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4636 }
4637 }
4638 if (nzcv & 1) { /* V */
4639 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4640 } else {
4641 if (TCG_TARGET_HAS_andc_i32) {
4642 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4643 } else {
4644 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4645 }
4646 }
4647 tcg_temp_free_i32(tcg_t0);
4648 tcg_temp_free_i32(tcg_t1);
4649 tcg_temp_free_i32(tcg_t2);
4650}
4651
4652/* Conditional select
4653 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
4654 * +----+----+---+-----------------+------+------+-----+------+------+
4655 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
4656 * +----+----+---+-----------------+------+------+-----+------+------+
4657 */
4658static void disas_cond_select(DisasContext *s, uint32_t insn)
4659{
4660 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4661 TCGv_i64 tcg_rd, zero;
4662 DisasCompare64 c;
4663
4664 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4665 /* S == 1 or op2<1> == 1 */
4666 unallocated_encoding(s);
4667 return;
4668 }
4669 sf = extract32(insn, 31, 1);
4670 else_inv = extract32(insn, 30, 1);
4671 rm = extract32(insn, 16, 5);
4672 cond = extract32(insn, 12, 4);
4673 else_inc = extract32(insn, 10, 1);
4674 rn = extract32(insn, 5, 5);
4675 rd = extract32(insn, 0, 5);
4676
4677 tcg_rd = cpu_reg(s, rd);
4678
4679 a64_test_cc(&c, cond);
4680 zero = tcg_const_i64(0);
4681
4682 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4683 /* CSET & CSETM. */
4684 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4685 if (else_inv) {
4686 tcg_gen_neg_i64(tcg_rd, tcg_rd);
4687 }
4688 } else {
4689 TCGv_i64 t_true = cpu_reg(s, rn);
4690 TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4691 if (else_inv && else_inc) {
4692 tcg_gen_neg_i64(t_false, t_false);
4693 } else if (else_inv) {
4694 tcg_gen_not_i64(t_false, t_false);
4695 } else if (else_inc) {
4696 tcg_gen_addi_i64(t_false, t_false, 1);
4697 }
4698 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4699 }
4700
4701 tcg_temp_free_i64(zero);
4702 a64_free_cc(&c);
4703
4704 if (!sf) {
4705 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4706 }
4707}
4708
4709static void handle_clz(DisasContext *s, unsigned int sf,
4710 unsigned int rn, unsigned int rd)
4711{
4712 TCGv_i64 tcg_rd, tcg_rn;
4713 tcg_rd = cpu_reg(s, rd);
4714 tcg_rn = cpu_reg(s, rn);
4715
4716 if (sf) {
4717 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4718 } else {
4719 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4720 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4721 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4722 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4723 tcg_temp_free_i32(tcg_tmp32);
4724 }
4725}
4726
4727static void handle_cls(DisasContext *s, unsigned int sf,
4728 unsigned int rn, unsigned int rd)
4729{
4730 TCGv_i64 tcg_rd, tcg_rn;
4731 tcg_rd = cpu_reg(s, rd);
4732 tcg_rn = cpu_reg(s, rn);
4733
4734 if (sf) {
4735 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4736 } else {
4737 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4738 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4739 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4740 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4741 tcg_temp_free_i32(tcg_tmp32);
4742 }
4743}
4744
4745static void handle_rbit(DisasContext *s, unsigned int sf,
4746 unsigned int rn, unsigned int rd)
4747{
4748 TCGv_i64 tcg_rd, tcg_rn;
4749 tcg_rd = cpu_reg(s, rd);
4750 tcg_rn = cpu_reg(s, rn);
4751
4752 if (sf) {
4753 gen_helper_rbit64(tcg_rd, tcg_rn);
4754 } else {
4755 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4756 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4757 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4758 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4759 tcg_temp_free_i32(tcg_tmp32);
4760 }
4761}
4762
4763/* REV with sf==1, opcode==3 ("REV64") */
4764static void handle_rev64(DisasContext *s, unsigned int sf,
4765 unsigned int rn, unsigned int rd)
4766{
4767 if (!sf) {
4768 unallocated_encoding(s);
4769 return;
4770 }
4771 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4772}
4773
4774/* REV with sf==0, opcode==2
4775 * REV32 (sf==1, opcode==2)
4776 */
4777static void handle_rev32(DisasContext *s, unsigned int sf,
4778 unsigned int rn, unsigned int rd)
4779{
4780 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4781
4782 if (sf) {
4783 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4784 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4785
4786 /* bswap32_i64 requires zero high word */
4787 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4788 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4789 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4790 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4791 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4792
4793 tcg_temp_free_i64(tcg_tmp);
4794 } else {
4795 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4796 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4797 }
4798}
4799
4800/* REV16 (opcode==1) */
4801static void handle_rev16(DisasContext *s, unsigned int sf,
4802 unsigned int rn, unsigned int rd)
4803{
4804 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4805 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4806 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4807 TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4808
4809 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4810 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4811 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4812 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4813 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4814
4815 tcg_temp_free_i64(mask);
4816 tcg_temp_free_i64(tcg_tmp);
4817}
4818
4819/* Data-processing (1 source)
4820 * 31 30 29 28 21 20 16 15 10 9 5 4 0
4821 * +----+---+---+-----------------+---------+--------+------+------+
4822 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
4823 * +----+---+---+-----------------+---------+--------+------+------+
4824 */
4825static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4826{
4827 unsigned int sf, opcode, opcode2, rn, rd;
4828 TCGv_i64 tcg_rd;
4829
4830 if (extract32(insn, 29, 1)) {
4831 unallocated_encoding(s);
4832 return;
4833 }
4834
4835 sf = extract32(insn, 31, 1);
4836 opcode = extract32(insn, 10, 6);
4837 opcode2 = extract32(insn, 16, 5);
4838 rn = extract32(insn, 5, 5);
4839 rd = extract32(insn, 0, 5);
4840
4841#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4842
4843 switch (MAP(sf, opcode2, opcode)) {
4844 case MAP(0, 0x00, 0x00): /* RBIT */
4845 case MAP(1, 0x00, 0x00):
4846 handle_rbit(s, sf, rn, rd);
4847 break;
4848 case MAP(0, 0x00, 0x01): /* REV16 */
4849 case MAP(1, 0x00, 0x01):
4850 handle_rev16(s, sf, rn, rd);
4851 break;
4852 case MAP(0, 0x00, 0x02): /* REV/REV32 */
4853 case MAP(1, 0x00, 0x02):
4854 handle_rev32(s, sf, rn, rd);
4855 break;
4856 case MAP(1, 0x00, 0x03): /* REV64 */
4857 handle_rev64(s, sf, rn, rd);
4858 break;
4859 case MAP(0, 0x00, 0x04): /* CLZ */
4860 case MAP(1, 0x00, 0x04):
4861 handle_clz(s, sf, rn, rd);
4862 break;
4863 case MAP(0, 0x00, 0x05): /* CLS */
4864 case MAP(1, 0x00, 0x05):
4865 handle_cls(s, sf, rn, rd);
4866 break;
4867 case MAP(1, 0x01, 0x00): /* PACIA */
4868 if (s->pauth_active) {
4869 tcg_rd = cpu_reg(s, rd);
4870 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4871 } else if (!dc_isar_feature(aa64_pauth, s)) {
4872 goto do_unallocated;
4873 }
4874 break;
4875 case MAP(1, 0x01, 0x01): /* PACIB */
4876 if (s->pauth_active) {
4877 tcg_rd = cpu_reg(s, rd);
4878 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4879 } else if (!dc_isar_feature(aa64_pauth, s)) {
4880 goto do_unallocated;
4881 }
4882 break;
4883 case MAP(1, 0x01, 0x02): /* PACDA */
4884 if (s->pauth_active) {
4885 tcg_rd = cpu_reg(s, rd);
4886 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4887 } else if (!dc_isar_feature(aa64_pauth, s)) {
4888 goto do_unallocated;
4889 }
4890 break;
4891 case MAP(1, 0x01, 0x03): /* PACDB */
4892 if (s->pauth_active) {
4893 tcg_rd = cpu_reg(s, rd);
4894 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4895 } else if (!dc_isar_feature(aa64_pauth, s)) {
4896 goto do_unallocated;
4897 }
4898 break;
4899 case MAP(1, 0x01, 0x04): /* AUTIA */
4900 if (s->pauth_active) {
4901 tcg_rd = cpu_reg(s, rd);
4902 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4903 } else if (!dc_isar_feature(aa64_pauth, s)) {
4904 goto do_unallocated;
4905 }
4906 break;
4907 case MAP(1, 0x01, 0x05): /* AUTIB */
4908 if (s->pauth_active) {
4909 tcg_rd = cpu_reg(s, rd);
4910 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4911 } else if (!dc_isar_feature(aa64_pauth, s)) {
4912 goto do_unallocated;
4913 }
4914 break;
4915 case MAP(1, 0x01, 0x06): /* AUTDA */
4916 if (s->pauth_active) {
4917 tcg_rd = cpu_reg(s, rd);
4918 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4919 } else if (!dc_isar_feature(aa64_pauth, s)) {
4920 goto do_unallocated;
4921 }
4922 break;
4923 case MAP(1, 0x01, 0x07): /* AUTDB */
4924 if (s->pauth_active) {
4925 tcg_rd = cpu_reg(s, rd);
4926 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4927 } else if (!dc_isar_feature(aa64_pauth, s)) {
4928 goto do_unallocated;
4929 }
4930 break;
4931 case MAP(1, 0x01, 0x08): /* PACIZA */
4932 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4933 goto do_unallocated;
4934 } else if (s->pauth_active) {
4935 tcg_rd = cpu_reg(s, rd);
4936 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4937 }
4938 break;
4939 case MAP(1, 0x01, 0x09): /* PACIZB */
4940 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4941 goto do_unallocated;
4942 } else if (s->pauth_active) {
4943 tcg_rd = cpu_reg(s, rd);
4944 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4945 }
4946 break;
4947 case MAP(1, 0x01, 0x0a): /* PACDZA */
4948 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4949 goto do_unallocated;
4950 } else if (s->pauth_active) {
4951 tcg_rd = cpu_reg(s, rd);
4952 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4953 }
4954 break;
4955 case MAP(1, 0x01, 0x0b): /* PACDZB */
4956 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4957 goto do_unallocated;
4958 } else if (s->pauth_active) {
4959 tcg_rd = cpu_reg(s, rd);
4960 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4961 }
4962 break;
4963 case MAP(1, 0x01, 0x0c): /* AUTIZA */
4964 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4965 goto do_unallocated;
4966 } else if (s->pauth_active) {
4967 tcg_rd = cpu_reg(s, rd);
4968 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4969 }
4970 break;
4971 case MAP(1, 0x01, 0x0d): /* AUTIZB */
4972 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4973 goto do_unallocated;
4974 } else if (s->pauth_active) {
4975 tcg_rd = cpu_reg(s, rd);
4976 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4977 }
4978 break;
4979 case MAP(1, 0x01, 0x0e): /* AUTDZA */
4980 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4981 goto do_unallocated;
4982 } else if (s->pauth_active) {
4983 tcg_rd = cpu_reg(s, rd);
4984 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4985 }
4986 break;
4987 case MAP(1, 0x01, 0x0f): /* AUTDZB */
4988 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4989 goto do_unallocated;
4990 } else if (s->pauth_active) {
4991 tcg_rd = cpu_reg(s, rd);
4992 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4993 }
4994 break;
4995 case MAP(1, 0x01, 0x10): /* XPACI */
4996 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4997 goto do_unallocated;
4998 } else if (s->pauth_active) {
4999 tcg_rd = cpu_reg(s, rd);
5000 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5001 }
5002 break;
5003 case MAP(1, 0x01, 0x11): /* XPACD */
5004 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5005 goto do_unallocated;
5006 } else if (s->pauth_active) {
5007 tcg_rd = cpu_reg(s, rd);
5008 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5009 }
5010 break;
5011 default:
5012 do_unallocated:
5013 unallocated_encoding(s);
5014 break;
5015 }
5016
5017#undef MAP
5018}
5019
5020static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5021 unsigned int rm, unsigned int rn, unsigned int rd)
5022{
5023 TCGv_i64 tcg_n, tcg_m, tcg_rd;
5024 tcg_rd = cpu_reg(s, rd);
5025
5026 if (!sf && is_signed) {
5027 tcg_n = new_tmp_a64(s);
5028 tcg_m = new_tmp_a64(s);
5029 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5030 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5031 } else {
5032 tcg_n = read_cpu_reg(s, rn, sf);
5033 tcg_m = read_cpu_reg(s, rm, sf);
5034 }
5035
5036 if (is_signed) {
5037 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5038 } else {
5039 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5040 }
5041
5042 if (!sf) { /* zero extend final result */
5043 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5044 }
5045}
5046
5047/* LSLV, LSRV, ASRV, RORV */
5048static void handle_shift_reg(DisasContext *s,
5049 enum a64_shift_type shift_type, unsigned int sf,
5050 unsigned int rm, unsigned int rn, unsigned int rd)
5051{
5052 TCGv_i64 tcg_shift = tcg_temp_new_i64();
5053 TCGv_i64 tcg_rd = cpu_reg(s, rd);
5054 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5055
5056 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5057 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5058 tcg_temp_free_i64(tcg_shift);
5059}
5060
5061/* CRC32[BHWX], CRC32C[BHWX] */
5062static void handle_crc32(DisasContext *s,
5063 unsigned int sf, unsigned int sz, bool crc32c,
5064 unsigned int rm, unsigned int rn, unsigned int rd)
5065{
5066 TCGv_i64 tcg_acc, tcg_val;
5067 TCGv_i32 tcg_bytes;
5068
5069 if (!dc_isar_feature(aa64_crc32, s)
5070 || (sf == 1 && sz != 3)
5071 || (sf == 0 && sz == 3)) {
5072 unallocated_encoding(s);
5073 return;
5074 }
5075
5076 if (sz == 3) {
5077 tcg_val = cpu_reg(s, rm);
5078 } else {
5079 uint64_t mask;
5080 switch (sz) {
5081 case 0:
5082 mask = 0xFF;
5083 break;
5084 case 1:
5085 mask = 0xFFFF;
5086 break;
5087 case 2:
5088 mask = 0xFFFFFFFF;
5089 break;
5090 default:
5091 g_assert_not_reached();
5092 }
5093 tcg_val = new_tmp_a64(s);
5094 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5095 }
5096
5097 tcg_acc = cpu_reg(s, rn);
5098 tcg_bytes = tcg_const_i32(1 << sz);
5099
5100 if (crc32c) {
5101 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5102 } else {
5103 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5104 }
5105
5106 tcg_temp_free_i32(tcg_bytes);
5107}
5108
5109/* Data-processing (2 source)
5110 * 31 30 29 28 21 20 16 15 10 9 5 4 0
5111 * +----+---+---+-----------------+------+--------+------+------+
5112 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
5113 * +----+---+---+-----------------+------+--------+------+------+
5114 */
5115static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5116{
5117 unsigned int sf, rm, opcode, rn, rd;
5118 sf = extract32(insn, 31, 1);
5119 rm = extract32(insn, 16, 5);
5120 opcode = extract32(insn, 10, 6);
5121 rn = extract32(insn, 5, 5);
5122 rd = extract32(insn, 0, 5);
5123
5124 if (extract32(insn, 29, 1)) {
5125 unallocated_encoding(s);
5126 return;
5127 }
5128
5129 switch (opcode) {
5130 case 2: /* UDIV */
5131 handle_div(s, false, sf, rm, rn, rd);
5132 break;
5133 case 3: /* SDIV */
5134 handle_div(s, true, sf, rm, rn, rd);
5135 break;
5136 case 8: /* LSLV */
5137 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5138 break;
5139 case 9: /* LSRV */
5140 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5141 break;
5142 case 10: /* ASRV */
5143 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5144 break;
5145 case 11: /* RORV */
5146 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5147 break;
5148 case 12: /* PACGA */
5149 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5150 goto do_unallocated;
5151 }
5152 gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5153 cpu_reg(s, rn), cpu_reg_sp(s, rm));
5154 break;
5155 case 16:
5156 case 17:
5157 case 18:
5158 case 19:
5159 case 20:
5160 case 21:
5161 case 22:
5162 case 23: /* CRC32 */
5163 {
5164 int sz = extract32(opcode, 0, 2);
5165 bool crc32c = extract32(opcode, 2, 1);
5166 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5167 break;
5168 }
5169 default:
5170 do_unallocated:
5171 unallocated_encoding(s);
5172 break;
5173 }
5174}
5175
5176/*
5177 * Data processing - register
5178 * 31 30 29 28 25 21 20 16 10 0
5179 * +--+---+--+---+-------+-----+-------+-------+---------+
5180 * | |op0| |op1| 1 0 1 | op2 | | op3 | |
5181 * +--+---+--+---+-------+-----+-------+-------+---------+
5182 */
5183static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5184{
5185 int op0 = extract32(insn, 30, 1);
5186 int op1 = extract32(insn, 28, 1);
5187 int op2 = extract32(insn, 21, 4);
5188 int op3 = extract32(insn, 10, 6);
5189
5190 if (!op1) {
5191 if (op2 & 8) {
5192 if (op2 & 1) {
5193 /* Add/sub (extended register) */
5194 disas_add_sub_ext_reg(s, insn);
5195 } else {
5196 /* Add/sub (shifted register) */
5197 disas_add_sub_reg(s, insn);
5198 }
5199 } else {
5200 /* Logical (shifted register) */
5201 disas_logic_reg(s, insn);
5202 }
5203 return;
5204 }
5205
5206 switch (op2) {
5207 case 0x0:
5208 switch (op3) {
5209 case 0x00: /* Add/subtract (with carry) */
5210 disas_adc_sbc(s, insn);
5211 break;
5212
5213 case 0x01: /* Rotate right into flags */
5214 case 0x21:
5215 disas_rotate_right_into_flags(s, insn);
5216 break;
5217
5218 case 0x02: /* Evaluate into flags */
5219 case 0x12:
5220 case 0x22:
5221 case 0x32:
5222 disas_evaluate_into_flags(s, insn);
5223 break;
5224
5225 default:
5226 goto do_unallocated;
5227 }
5228 break;
5229
5230 case 0x2: /* Conditional compare */
5231 disas_cc(s, insn); /* both imm and reg forms */
5232 break;
5233
5234 case 0x4: /* Conditional select */
5235 disas_cond_select(s, insn);
5236 break;
5237
5238 case 0x6: /* Data-processing */
5239 if (op0) { /* (1 source) */
5240 disas_data_proc_1src(s, insn);
5241 } else { /* (2 source) */
5242 disas_data_proc_2src(s, insn);
5243 }
5244 break;
5245 case 0x8 ... 0xf: /* (3 source) */
5246 disas_data_proc_3src(s, insn);
5247 break;
5248
5249 default:
5250 do_unallocated:
5251 unallocated_encoding(s);
5252 break;
5253 }
5254}
5255
5256static void handle_fp_compare(DisasContext *s, int size,
5257 unsigned int rn, unsigned int rm,
5258 bool cmp_with_zero, bool signal_all_nans)
5259{
5260 TCGv_i64 tcg_flags = tcg_temp_new_i64();
5261 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5262
5263 if (size == MO_64) {
5264 TCGv_i64 tcg_vn, tcg_vm;
5265
5266 tcg_vn = read_fp_dreg(s, rn);
5267 if (cmp_with_zero) {
5268 tcg_vm = tcg_const_i64(0);
5269 } else {
5270 tcg_vm = read_fp_dreg(s, rm);
5271 }
5272 if (signal_all_nans) {
5273 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5274 } else {
5275 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5276 }
5277 tcg_temp_free_i64(tcg_vn);
5278 tcg_temp_free_i64(tcg_vm);
5279 } else {
5280 TCGv_i32 tcg_vn = tcg_temp_new_i32();
5281 TCGv_i32 tcg_vm = tcg_temp_new_i32();
5282
5283 read_vec_element_i32(s, tcg_vn, rn, 0, size);
5284 if (cmp_with_zero) {
5285 tcg_gen_movi_i32(tcg_vm, 0);
5286 } else {
5287 read_vec_element_i32(s, tcg_vm, rm, 0, size);
5288 }
5289
5290 switch (size) {
5291 case MO_32:
5292 if (signal_all_nans) {
5293 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5294 } else {
5295 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5296 }
5297 break;
5298 case MO_16:
5299 if (signal_all_nans) {
5300 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5301 } else {
5302 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5303 }
5304 break;
5305 default:
5306 g_assert_not_reached();
5307 }
5308
5309 tcg_temp_free_i32(tcg_vn);
5310 tcg_temp_free_i32(tcg_vm);
5311 }
5312
5313 tcg_temp_free_ptr(fpst);
5314
5315 gen_set_nzcv(tcg_flags);
5316
5317 tcg_temp_free_i64(tcg_flags);
5318}
5319
5320/* Floating point compare
5321 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
5322 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5323 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
5324 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5325 */
5326static void disas_fp_compare(DisasContext *s, uint32_t insn)
5327{
5328 unsigned int mos, type, rm, op, rn, opc, op2r;
5329 int size;
5330
5331 mos = extract32(insn, 29, 3);
5332 type = extract32(insn, 22, 2);
5333 rm = extract32(insn, 16, 5);
5334 op = extract32(insn, 14, 2);
5335 rn = extract32(insn, 5, 5);
5336 opc = extract32(insn, 3, 2);
5337 op2r = extract32(insn, 0, 3);
5338
5339 if (mos || op || op2r) {
5340 unallocated_encoding(s);
5341 return;
5342 }
5343
5344 switch (type) {
5345 case 0:
5346 size = MO_32;
5347 break;
5348 case 1:
5349 size = MO_64;
5350 break;
5351 case 3:
5352 size = MO_16;
5353 if (dc_isar_feature(aa64_fp16, s)) {
5354 break;
5355 }
5356 /* fallthru */
5357 default:
5358 unallocated_encoding(s);
5359 return;
5360 }
5361
5362 if (!fp_access_check(s)) {
5363 return;
5364 }
5365
5366 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5367}
5368
5369/* Floating point conditional compare
5370 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
5371 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5372 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
5373 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5374 */
5375static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5376{
5377 unsigned int mos, type, rm, cond, rn, op, nzcv;
5378 TCGv_i64 tcg_flags;
5379 TCGLabel *label_continue = NULL;
5380 int size;
5381
5382 mos = extract32(insn, 29, 3);
5383 type = extract32(insn, 22, 2);
5384 rm = extract32(insn, 16, 5);
5385 cond = extract32(insn, 12, 4);
5386 rn = extract32(insn, 5, 5);
5387 op = extract32(insn, 4, 1);
5388 nzcv = extract32(insn, 0, 4);
5389
5390 if (mos) {
5391 unallocated_encoding(s);
5392 return;
5393 }
5394
5395 switch (type) {
5396 case 0:
5397 size = MO_32;
5398 break;
5399 case 1:
5400 size = MO_64;
5401 break;
5402 case 3:
5403 size = MO_16;
5404 if (dc_isar_feature(aa64_fp16, s)) {
5405 break;
5406 }
5407 /* fallthru */
5408 default:
5409 unallocated_encoding(s);
5410 return;
5411 }
5412
5413 if (!fp_access_check(s)) {
5414 return;
5415 }
5416
5417 if (cond < 0x0e) { /* not always */
5418 TCGLabel *label_match = gen_new_label();
5419 label_continue = gen_new_label();
5420 arm_gen_test_cc(cond, label_match);
5421 /* nomatch: */
5422 tcg_flags = tcg_const_i64(nzcv << 28);
5423 gen_set_nzcv(tcg_flags);
5424 tcg_temp_free_i64(tcg_flags);
5425 tcg_gen_br(label_continue);
5426 gen_set_label(label_match);
5427 }
5428
5429 handle_fp_compare(s, size, rn, rm, false, op);
5430
5431 if (cond < 0x0e) {
5432 gen_set_label(label_continue);
5433 }
5434}
5435
5436/* Floating point conditional select
5437 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5438 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5439 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
5440 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5441 */
5442static void disas_fp_csel(DisasContext *s, uint32_t insn)
5443{
5444 unsigned int mos, type, rm, cond, rn, rd;
5445 TCGv_i64 t_true, t_false, t_zero;
5446 DisasCompare64 c;
5447 MemOp sz;
5448
5449 mos = extract32(insn, 29, 3);
5450 type = extract32(insn, 22, 2);
5451 rm = extract32(insn, 16, 5);
5452 cond = extract32(insn, 12, 4);
5453 rn = extract32(insn, 5, 5);
5454 rd = extract32(insn, 0, 5);
5455
5456 if (mos) {
5457 unallocated_encoding(s);
5458 return;
5459 }
5460
5461 switch (type) {
5462 case 0:
5463 sz = MO_32;
5464 break;
5465 case 1:
5466 sz = MO_64;
5467 break;
5468 case 3:
5469 sz = MO_16;
5470 if (dc_isar_feature(aa64_fp16, s)) {
5471 break;
5472 }
5473 /* fallthru */
5474 default:
5475 unallocated_encoding(s);
5476 return;
5477 }
5478
5479 if (!fp_access_check(s)) {
5480 return;
5481 }
5482
5483 /* Zero extend sreg & hreg inputs to 64 bits now. */
5484 t_true = tcg_temp_new_i64();
5485 t_false = tcg_temp_new_i64();
5486 read_vec_element(s, t_true, rn, 0, sz);
5487 read_vec_element(s, t_false, rm, 0, sz);
5488
5489 a64_test_cc(&c, cond);
5490 t_zero = tcg_const_i64(0);
5491 tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5492 tcg_temp_free_i64(t_zero);
5493 tcg_temp_free_i64(t_false);
5494 a64_free_cc(&c);
5495
5496 /* Note that sregs & hregs write back zeros to the high bits,
5497 and we've already done the zero-extension. */
5498 write_fp_dreg(s, rd, t_true);
5499 tcg_temp_free_i64(t_true);
5500}
5501
5502/* Floating-point data-processing (1 source) - half precision */
5503static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5504{
5505 TCGv_ptr fpst = NULL;
5506 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5507 TCGv_i32 tcg_res = tcg_temp_new_i32();
5508
5509 switch (opcode) {
5510 case 0x0: /* FMOV */
5511 tcg_gen_mov_i32(tcg_res, tcg_op);
5512 break;
5513 case 0x1: /* FABS */
5514 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5515 break;
5516 case 0x2: /* FNEG */
5517 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5518 break;
5519 case 0x3: /* FSQRT */
5520 fpst = get_fpstatus_ptr(true);
5521 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5522 break;
5523 case 0x8: /* FRINTN */
5524 case 0x9: /* FRINTP */
5525 case 0xa: /* FRINTM */
5526 case 0xb: /* FRINTZ */
5527 case 0xc: /* FRINTA */
5528 {
5529 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5530 fpst = get_fpstatus_ptr(true);
5531
5532 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5533 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5534
5535 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5536 tcg_temp_free_i32(tcg_rmode);
5537 break;
5538 }
5539 case 0xe: /* FRINTX */
5540 fpst = get_fpstatus_ptr(true);
5541 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5542 break;
5543 case 0xf: /* FRINTI */
5544 fpst = get_fpstatus_ptr(true);
5545 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5546 break;
5547 default:
5548 abort();
5549 }
5550
5551 write_fp_sreg(s, rd, tcg_res);
5552
5553 if (fpst) {
5554 tcg_temp_free_ptr(fpst);
5555 }
5556 tcg_temp_free_i32(tcg_op);
5557 tcg_temp_free_i32(tcg_res);
5558}
5559
5560/* Floating-point data-processing (1 source) - single precision */
5561static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5562{
5563 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5564 TCGv_i32 tcg_op, tcg_res;
5565 TCGv_ptr fpst;
5566 int rmode = -1;
5567
5568 tcg_op = read_fp_sreg(s, rn);
5569 tcg_res = tcg_temp_new_i32();
5570
5571 switch (opcode) {
5572 case 0x0: /* FMOV */
5573 tcg_gen_mov_i32(tcg_res, tcg_op);
5574 goto done;
5575 case 0x1: /* FABS */
5576 gen_helper_vfp_abss(tcg_res, tcg_op);
5577 goto done;
5578 case 0x2: /* FNEG */
5579 gen_helper_vfp_negs(tcg_res, tcg_op);
5580 goto done;
5581 case 0x3: /* FSQRT */
5582 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5583 goto done;
5584 case 0x8: /* FRINTN */
5585 case 0x9: /* FRINTP */
5586 case 0xa: /* FRINTM */
5587 case 0xb: /* FRINTZ */
5588 case 0xc: /* FRINTA */
5589 rmode = arm_rmode_to_sf(opcode & 7);
5590 gen_fpst = gen_helper_rints;
5591 break;
5592 case 0xe: /* FRINTX */
5593 gen_fpst = gen_helper_rints_exact;
5594 break;
5595 case 0xf: /* FRINTI */
5596 gen_fpst = gen_helper_rints;
5597 break;
5598 case 0x10: /* FRINT32Z */
5599 rmode = float_round_to_zero;
5600 gen_fpst = gen_helper_frint32_s;
5601 break;
5602 case 0x11: /* FRINT32X */
5603 gen_fpst = gen_helper_frint32_s;
5604 break;
5605 case 0x12: /* FRINT64Z */
5606 rmode = float_round_to_zero;
5607 gen_fpst = gen_helper_frint64_s;
5608 break;
5609 case 0x13: /* FRINT64X */
5610 gen_fpst = gen_helper_frint64_s;
5611 break;
5612 default:
5613 g_assert_not_reached();
5614 }
5615
5616 fpst = get_fpstatus_ptr(false);
5617 if (rmode >= 0) {
5618 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5619 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5620 gen_fpst(tcg_res, tcg_op, fpst);
5621 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5622 tcg_temp_free_i32(tcg_rmode);
5623 } else {
5624 gen_fpst(tcg_res, tcg_op, fpst);
5625 }
5626 tcg_temp_free_ptr(fpst);
5627
5628 done:
5629 write_fp_sreg(s, rd, tcg_res);
5630 tcg_temp_free_i32(tcg_op);
5631 tcg_temp_free_i32(tcg_res);
5632}
5633
5634/* Floating-point data-processing (1 source) - double precision */
5635static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5636{
5637 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5638 TCGv_i64 tcg_op, tcg_res;
5639 TCGv_ptr fpst;
5640 int rmode = -1;
5641
5642 switch (opcode) {
5643 case 0x0: /* FMOV */
5644 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5645 return;
5646 }
5647
5648 tcg_op = read_fp_dreg(s, rn);
5649 tcg_res = tcg_temp_new_i64();
5650
5651 switch (opcode) {
5652 case 0x1: /* FABS */
5653 gen_helper_vfp_absd(tcg_res, tcg_op);
5654 goto done;
5655 case 0x2: /* FNEG */
5656 gen_helper_vfp_negd(tcg_res, tcg_op);
5657 goto done;
5658 case 0x3: /* FSQRT */
5659 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5660 goto done;
5661 case 0x8: /* FRINTN */
5662 case 0x9: /* FRINTP */
5663 case 0xa: /* FRINTM */
5664 case 0xb: /* FRINTZ */
5665 case 0xc: /* FRINTA */
5666 rmode = arm_rmode_to_sf(opcode & 7);
5667 gen_fpst = gen_helper_rintd;
5668 break;
5669 case 0xe: /* FRINTX */
5670 gen_fpst = gen_helper_rintd_exact;
5671 break;
5672 case 0xf: /* FRINTI */
5673 gen_fpst = gen_helper_rintd;
5674 break;
5675 case 0x10: /* FRINT32Z */
5676 rmode = float_round_to_zero;
5677 gen_fpst = gen_helper_frint32_d;
5678 break;
5679 case 0x11: /* FRINT32X */
5680 gen_fpst = gen_helper_frint32_d;
5681 break;
5682 case 0x12: /* FRINT64Z */
5683 rmode = float_round_to_zero;
5684 gen_fpst = gen_helper_frint64_d;
5685 break;
5686 case 0x13: /* FRINT64X */
5687 gen_fpst = gen_helper_frint64_d;
5688 break;
5689 default:
5690 g_assert_not_reached();
5691 }
5692
5693 fpst = get_fpstatus_ptr(false);
5694 if (rmode >= 0) {
5695 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5696 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5697 gen_fpst(tcg_res, tcg_op, fpst);
5698 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5699 tcg_temp_free_i32(tcg_rmode);
5700 } else {
5701 gen_fpst(tcg_res, tcg_op, fpst);
5702 }
5703 tcg_temp_free_ptr(fpst);
5704
5705 done:
5706 write_fp_dreg(s, rd, tcg_res);
5707 tcg_temp_free_i64(tcg_op);
5708 tcg_temp_free_i64(tcg_res);
5709}
5710
5711static void handle_fp_fcvt(DisasContext *s, int opcode,
5712 int rd, int rn, int dtype, int ntype)
5713{
5714 switch (ntype) {
5715 case 0x0:
5716 {
5717 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5718 if (dtype == 1) {
5719 /* Single to double */
5720 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5721 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5722 write_fp_dreg(s, rd, tcg_rd);
5723 tcg_temp_free_i64(tcg_rd);
5724 } else {
5725 /* Single to half */
5726 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5727 TCGv_i32 ahp = get_ahp_flag();
5728 TCGv_ptr fpst = get_fpstatus_ptr(false);
5729
5730 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5731 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5732 write_fp_sreg(s, rd, tcg_rd);
5733 tcg_temp_free_i32(tcg_rd);
5734 tcg_temp_free_i32(ahp);
5735 tcg_temp_free_ptr(fpst);
5736 }
5737 tcg_temp_free_i32(tcg_rn);
5738 break;
5739 }
5740 case 0x1:
5741 {
5742 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5743 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5744 if (dtype == 0) {
5745 /* Double to single */
5746 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5747 } else {
5748 TCGv_ptr fpst = get_fpstatus_ptr(false);
5749 TCGv_i32 ahp = get_ahp_flag();
5750 /* Double to half */
5751 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5752 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5753 tcg_temp_free_ptr(fpst);
5754 tcg_temp_free_i32(ahp);
5755 }
5756 write_fp_sreg(s, rd, tcg_rd);
5757 tcg_temp_free_i32(tcg_rd);
5758 tcg_temp_free_i64(tcg_rn);
5759 break;
5760 }
5761 case 0x3:
5762 {
5763 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5764 TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5765 TCGv_i32 tcg_ahp = get_ahp_flag();
5766 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5767 if (dtype == 0) {
5768 /* Half to single */
5769 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5770 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5771 write_fp_sreg(s, rd, tcg_rd);
5772 tcg_temp_free_ptr(tcg_fpst);
5773 tcg_temp_free_i32(tcg_ahp);
5774 tcg_temp_free_i32(tcg_rd);
5775 } else {
5776 /* Half to double */
5777 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5778 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5779 write_fp_dreg(s, rd, tcg_rd);
5780 tcg_temp_free_i64(tcg_rd);
5781 }
5782 tcg_temp_free_i32(tcg_rn);
5783 break;
5784 }
5785 default:
5786 abort();
5787 }
5788}
5789
5790/* Floating point data-processing (1 source)
5791 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
5792 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5793 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
5794 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5795 */
5796static void disas_fp_1src(DisasContext *s, uint32_t insn)
5797{
5798 int mos = extract32(insn, 29, 3);
5799 int type = extract32(insn, 22, 2);
5800 int opcode = extract32(insn, 15, 6);
5801 int rn = extract32(insn, 5, 5);
5802 int rd = extract32(insn, 0, 5);
5803
5804 if (mos) {
5805 unallocated_encoding(s);
5806 return;
5807 }
5808
5809 switch (opcode) {
5810 case 0x4: case 0x5: case 0x7:
5811 {
5812 /* FCVT between half, single and double precision */
5813 int dtype = extract32(opcode, 0, 2);
5814 if (type == 2 || dtype == type) {
5815 unallocated_encoding(s);
5816 return;
5817 }
5818 if (!fp_access_check(s)) {
5819 return;
5820 }
5821
5822 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5823 break;
5824 }
5825
5826 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
5827 if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
5828 unallocated_encoding(s);
5829 return;
5830 }
5831 /* fall through */
5832 case 0x0 ... 0x3:
5833 case 0x8 ... 0xc:
5834 case 0xe ... 0xf:
5835 /* 32-to-32 and 64-to-64 ops */
5836 switch (type) {
5837 case 0:
5838 if (!fp_access_check(s)) {
5839 return;
5840 }
5841 handle_fp_1src_single(s, opcode, rd, rn);
5842 break;
5843 case 1:
5844 if (!fp_access_check(s)) {
5845 return;
5846 }
5847 handle_fp_1src_double(s, opcode, rd, rn);
5848 break;
5849 case 3:
5850 if (!dc_isar_feature(aa64_fp16, s)) {
5851 unallocated_encoding(s);
5852 return;
5853 }
5854
5855 if (!fp_access_check(s)) {
5856 return;
5857 }
5858 handle_fp_1src_half(s, opcode, rd, rn);
5859 break;
5860 default:
5861 unallocated_encoding(s);
5862 }
5863 break;
5864
5865 default:
5866 unallocated_encoding(s);
5867 break;
5868 }
5869}
5870
5871/* Floating-point data-processing (2 source) - single precision */
5872static void handle_fp_2src_single(DisasContext *s, int opcode,
5873 int rd, int rn, int rm)
5874{
5875 TCGv_i32 tcg_op1;
5876 TCGv_i32 tcg_op2;
5877 TCGv_i32 tcg_res;
5878 TCGv_ptr fpst;
5879
5880 tcg_res = tcg_temp_new_i32();
5881 fpst = get_fpstatus_ptr(false);
5882 tcg_op1 = read_fp_sreg(s, rn);
5883 tcg_op2 = read_fp_sreg(s, rm);
5884
5885 switch (opcode) {
5886 case 0x0: /* FMUL */
5887 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5888 break;
5889 case 0x1: /* FDIV */
5890 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5891 break;
5892 case 0x2: /* FADD */
5893 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5894 break;
5895 case 0x3: /* FSUB */
5896 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5897 break;
5898 case 0x4: /* FMAX */
5899 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5900 break;
5901 case 0x5: /* FMIN */
5902 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5903 break;
5904 case 0x6: /* FMAXNM */
5905 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5906 break;
5907 case 0x7: /* FMINNM */
5908 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5909 break;
5910 case 0x8: /* FNMUL */
5911 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5912 gen_helper_vfp_negs(tcg_res, tcg_res);
5913 break;
5914 }
5915
5916 write_fp_sreg(s, rd, tcg_res);
5917
5918 tcg_temp_free_ptr(fpst);
5919 tcg_temp_free_i32(tcg_op1);
5920 tcg_temp_free_i32(tcg_op2);
5921 tcg_temp_free_i32(tcg_res);
5922}
5923
5924/* Floating-point data-processing (2 source) - double precision */
5925static void handle_fp_2src_double(DisasContext *s, int opcode,
5926 int rd, int rn, int rm)
5927{
5928 TCGv_i64 tcg_op1;
5929 TCGv_i64 tcg_op2;
5930 TCGv_i64 tcg_res;
5931 TCGv_ptr fpst;
5932
5933 tcg_res = tcg_temp_new_i64();
5934 fpst = get_fpstatus_ptr(false);
5935 tcg_op1 = read_fp_dreg(s, rn);
5936 tcg_op2 = read_fp_dreg(s, rm);
5937
5938 switch (opcode) {
5939 case 0x0: /* FMUL */
5940 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5941 break;
5942 case 0x1: /* FDIV */
5943 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5944 break;
5945 case 0x2: /* FADD */
5946 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5947 break;
5948 case 0x3: /* FSUB */
5949 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5950 break;
5951 case 0x4: /* FMAX */
5952 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5953 break;
5954 case 0x5: /* FMIN */
5955 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5956 break;
5957 case 0x6: /* FMAXNM */
5958 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5959 break;
5960 case 0x7: /* FMINNM */
5961 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5962 break;
5963 case 0x8: /* FNMUL */
5964 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5965 gen_helper_vfp_negd(tcg_res, tcg_res);
5966 break;
5967 }
5968
5969 write_fp_dreg(s, rd, tcg_res);
5970
5971 tcg_temp_free_ptr(fpst);
5972 tcg_temp_free_i64(tcg_op1);
5973 tcg_temp_free_i64(tcg_op2);
5974 tcg_temp_free_i64(tcg_res);
5975}
5976
5977/* Floating-point data-processing (2 source) - half precision */
5978static void handle_fp_2src_half(DisasContext *s, int opcode,
5979 int rd, int rn, int rm)
5980{
5981 TCGv_i32 tcg_op1;
5982 TCGv_i32 tcg_op2;
5983 TCGv_i32 tcg_res;
5984 TCGv_ptr fpst;
5985
5986 tcg_res = tcg_temp_new_i32();
5987 fpst = get_fpstatus_ptr(true);
5988 tcg_op1 = read_fp_hreg(s, rn);
5989 tcg_op2 = read_fp_hreg(s, rm);
5990
5991 switch (opcode) {
5992 case 0x0: /* FMUL */
5993 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5994 break;
5995 case 0x1: /* FDIV */
5996 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
5997 break;
5998 case 0x2: /* FADD */
5999 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6000 break;
6001 case 0x3: /* FSUB */
6002 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6003 break;
6004 case 0x4: /* FMAX */
6005 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6006 break;
6007 case 0x5: /* FMIN */
6008 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6009 break;
6010 case 0x6: /* FMAXNM */
6011 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6012 break;
6013 case 0x7: /* FMINNM */
6014 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6015 break;
6016 case 0x8: /* FNMUL */
6017 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6018 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6019 break;
6020 default:
6021 g_assert_not_reached();
6022 }
6023
6024 write_fp_sreg(s, rd, tcg_res);
6025
6026 tcg_temp_free_ptr(fpst);
6027 tcg_temp_free_i32(tcg_op1);
6028 tcg_temp_free_i32(tcg_op2);
6029 tcg_temp_free_i32(tcg_res);
6030}
6031
6032/* Floating point data-processing (2 source)
6033 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6034 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6035 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
6036 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6037 */
6038static void disas_fp_2src(DisasContext *s, uint32_t insn)
6039{
6040 int mos = extract32(insn, 29, 3);
6041 int type = extract32(insn, 22, 2);
6042 int rd = extract32(insn, 0, 5);
6043 int rn = extract32(insn, 5, 5);
6044 int rm = extract32(insn, 16, 5);
6045 int opcode = extract32(insn, 12, 4);
6046
6047 if (opcode > 8 || mos) {
6048 unallocated_encoding(s);
6049 return;
6050 }
6051
6052 switch (type) {
6053 case 0:
6054 if (!fp_access_check(s)) {
6055 return;
6056 }
6057 handle_fp_2src_single(s, opcode, rd, rn, rm);
6058 break;
6059 case 1:
6060 if (!fp_access_check(s)) {
6061 return;
6062 }
6063 handle_fp_2src_double(s, opcode, rd, rn, rm);
6064 break;
6065 case 3:
6066 if (!dc_isar_feature(aa64_fp16, s)) {
6067 unallocated_encoding(s);
6068 return;
6069 }
6070 if (!fp_access_check(s)) {
6071 return;
6072 }
6073 handle_fp_2src_half(s, opcode, rd, rn, rm);
6074 break;
6075 default:
6076 unallocated_encoding(s);
6077 }
6078}
6079
6080/* Floating-point data-processing (3 source) - single precision */
6081static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6082 int rd, int rn, int rm, int ra)
6083{
6084 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6085 TCGv_i32 tcg_res = tcg_temp_new_i32();
6086 TCGv_ptr fpst = get_fpstatus_ptr(false);
6087
6088 tcg_op1 = read_fp_sreg(s, rn);
6089 tcg_op2 = read_fp_sreg(s, rm);
6090 tcg_op3 = read_fp_sreg(s, ra);
6091
6092 /* These are fused multiply-add, and must be done as one
6093 * floating point operation with no rounding between the
6094 * multiplication and addition steps.
6095 * NB that doing the negations here as separate steps is
6096 * correct : an input NaN should come out with its sign bit
6097 * flipped if it is a negated-input.
6098 */
6099 if (o1 == true) {
6100 gen_helper_vfp_negs(tcg_op3, tcg_op3);
6101 }
6102
6103 if (o0 != o1) {
6104 gen_helper_vfp_negs(tcg_op1, tcg_op1);
6105 }
6106
6107 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6108
6109 write_fp_sreg(s, rd, tcg_res);
6110
6111 tcg_temp_free_ptr(fpst);
6112 tcg_temp_free_i32(tcg_op1);
6113 tcg_temp_free_i32(tcg_op2);
6114 tcg_temp_free_i32(tcg_op3);
6115 tcg_temp_free_i32(tcg_res);
6116}
6117
6118/* Floating-point data-processing (3 source) - double precision */
6119static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6120 int rd, int rn, int rm, int ra)
6121{
6122 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6123 TCGv_i64 tcg_res = tcg_temp_new_i64();
6124 TCGv_ptr fpst = get_fpstatus_ptr(false);
6125
6126 tcg_op1 = read_fp_dreg(s, rn);
6127 tcg_op2 = read_fp_dreg(s, rm);
6128 tcg_op3 = read_fp_dreg(s, ra);
6129
6130 /* These are fused multiply-add, and must be done as one
6131 * floating point operation with no rounding between the
6132 * multiplication and addition steps.
6133 * NB that doing the negations here as separate steps is
6134 * correct : an input NaN should come out with its sign bit
6135 * flipped if it is a negated-input.
6136 */
6137 if (o1 == true) {
6138 gen_helper_vfp_negd(tcg_op3, tcg_op3);
6139 }
6140
6141 if (o0 != o1) {
6142 gen_helper_vfp_negd(tcg_op1, tcg_op1);
6143 }
6144
6145 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6146
6147 write_fp_dreg(s, rd, tcg_res);
6148
6149 tcg_temp_free_ptr(fpst);
6150 tcg_temp_free_i64(tcg_op1);
6151 tcg_temp_free_i64(tcg_op2);
6152 tcg_temp_free_i64(tcg_op3);
6153 tcg_temp_free_i64(tcg_res);
6154}
6155
6156/* Floating-point data-processing (3 source) - half precision */
6157static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6158 int rd, int rn, int rm, int ra)
6159{
6160 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6161 TCGv_i32 tcg_res = tcg_temp_new_i32();
6162 TCGv_ptr fpst = get_fpstatus_ptr(true);
6163
6164 tcg_op1 = read_fp_hreg(s, rn);
6165 tcg_op2 = read_fp_hreg(s, rm);
6166 tcg_op3 = read_fp_hreg(s, ra);
6167
6168 /* These are fused multiply-add, and must be done as one
6169 * floating point operation with no rounding between the
6170 * multiplication and addition steps.
6171 * NB that doing the negations here as separate steps is
6172 * correct : an input NaN should come out with its sign bit
6173 * flipped if it is a negated-input.
6174 */
6175 if (o1 == true) {
6176 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6177 }
6178
6179 if (o0 != o1) {
6180 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6181 }
6182
6183 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6184
6185 write_fp_sreg(s, rd, tcg_res);
6186
6187 tcg_temp_free_ptr(fpst);
6188 tcg_temp_free_i32(tcg_op1);
6189 tcg_temp_free_i32(tcg_op2);
6190 tcg_temp_free_i32(tcg_op3);
6191 tcg_temp_free_i32(tcg_res);
6192}
6193
6194/* Floating point data-processing (3 source)
6195 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
6196 * +---+---+---+-----------+------+----+------+----+------+------+------+
6197 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
6198 * +---+---+---+-----------+------+----+------+----+------+------+------+
6199 */
6200static void disas_fp_3src(DisasContext *s, uint32_t insn)
6201{
6202 int mos = extract32(insn, 29, 3);
6203 int type = extract32(insn, 22, 2);
6204 int rd = extract32(insn, 0, 5);
6205 int rn = extract32(insn, 5, 5);
6206 int ra = extract32(insn, 10, 5);
6207 int rm = extract32(insn, 16, 5);
6208 bool o0 = extract32(insn, 15, 1);
6209 bool o1 = extract32(insn, 21, 1);
6210
6211 if (mos) {
6212 unallocated_encoding(s);
6213 return;
6214 }
6215
6216 switch (type) {
6217 case 0:
6218 if (!fp_access_check(s)) {
6219 return;
6220 }
6221 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6222 break;
6223 case 1:
6224 if (!fp_access_check(s)) {
6225 return;
6226 }
6227 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6228 break;
6229 case 3:
6230 if (!dc_isar_feature(aa64_fp16, s)) {
6231 unallocated_encoding(s);
6232 return;
6233 }
6234 if (!fp_access_check(s)) {
6235 return;
6236 }
6237 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6238 break;
6239 default:
6240 unallocated_encoding(s);
6241 }
6242}
6243
6244/* Floating point immediate
6245 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
6246 * +---+---+---+-----------+------+---+------------+-------+------+------+
6247 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
6248 * +---+---+---+-----------+------+---+------------+-------+------+------+
6249 */
6250static void disas_fp_imm(DisasContext *s, uint32_t insn)
6251{
6252 int rd = extract32(insn, 0, 5);
6253 int imm5 = extract32(insn, 5, 5);
6254 int imm8 = extract32(insn, 13, 8);
6255 int type = extract32(insn, 22, 2);
6256 int mos = extract32(insn, 29, 3);
6257 uint64_t imm;
6258 TCGv_i64 tcg_res;
6259 MemOp sz;
6260
6261 if (mos || imm5) {
6262 unallocated_encoding(s);
6263 return;
6264 }
6265
6266 switch (type) {
6267 case 0:
6268 sz = MO_32;
6269 break;
6270 case 1:
6271 sz = MO_64;
6272 break;
6273 case 3:
6274 sz = MO_16;
6275 if (dc_isar_feature(aa64_fp16, s)) {
6276 break;
6277 }
6278 /* fallthru */
6279 default:
6280 unallocated_encoding(s);
6281 return;
6282 }
6283
6284 if (!fp_access_check(s)) {
6285 return;
6286 }
6287
6288 imm = vfp_expand_imm(sz, imm8);
6289
6290 tcg_res = tcg_const_i64(imm);
6291 write_fp_dreg(s, rd, tcg_res);
6292 tcg_temp_free_i64(tcg_res);
6293}
6294
6295/* Handle floating point <=> fixed point conversions. Note that we can
6296 * also deal with fp <=> integer conversions as a special case (scale == 64)
6297 * OPTME: consider handling that special case specially or at least skipping
6298 * the call to scalbn in the helpers for zero shifts.
6299 */
6300static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6301 bool itof, int rmode, int scale, int sf, int type)
6302{
6303 bool is_signed = !(opcode & 1);
6304 TCGv_ptr tcg_fpstatus;
6305 TCGv_i32 tcg_shift, tcg_single;
6306 TCGv_i64 tcg_double;
6307
6308 tcg_fpstatus = get_fpstatus_ptr(type == 3);
6309
6310 tcg_shift = tcg_const_i32(64 - scale);
6311
6312 if (itof) {
6313 TCGv_i64 tcg_int = cpu_reg(s, rn);
6314 if (!sf) {
6315 TCGv_i64 tcg_extend = new_tmp_a64(s);
6316
6317 if (is_signed) {
6318 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6319 } else {
6320 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6321 }
6322
6323 tcg_int = tcg_extend;
6324 }
6325
6326 switch (type) {
6327 case 1: /* float64 */
6328 tcg_double = tcg_temp_new_i64();
6329 if (is_signed) {
6330 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6331 tcg_shift, tcg_fpstatus);
6332 } else {
6333 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6334 tcg_shift, tcg_fpstatus);
6335 }
6336 write_fp_dreg(s, rd, tcg_double);
6337 tcg_temp_free_i64(tcg_double);
6338 break;
6339
6340 case 0: /* float32 */
6341 tcg_single = tcg_temp_new_i32();
6342 if (is_signed) {
6343 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6344 tcg_shift, tcg_fpstatus);
6345 } else {
6346 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6347 tcg_shift, tcg_fpstatus);
6348 }
6349 write_fp_sreg(s, rd, tcg_single);
6350 tcg_temp_free_i32(tcg_single);
6351 break;
6352
6353 case 3: /* float16 */
6354 tcg_single = tcg_temp_new_i32();
6355 if (is_signed) {
6356 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6357 tcg_shift, tcg_fpstatus);
6358 } else {
6359 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6360 tcg_shift, tcg_fpstatus);
6361 }
6362 write_fp_sreg(s, rd, tcg_single);
6363 tcg_temp_free_i32(tcg_single);
6364 break;
6365
6366 default:
6367 g_assert_not_reached();
6368 }
6369 } else {
6370 TCGv_i64 tcg_int = cpu_reg(s, rd);
6371 TCGv_i32 tcg_rmode;
6372
6373 if (extract32(opcode, 2, 1)) {
6374 /* There are too many rounding modes to all fit into rmode,
6375 * so FCVTA[US] is a special case.
6376 */
6377 rmode = FPROUNDING_TIEAWAY;
6378 }
6379
6380 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6381
6382 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6383
6384 switch (type) {
6385 case 1: /* float64 */
6386 tcg_double = read_fp_dreg(s, rn);
6387 if (is_signed) {
6388 if (!sf) {
6389 gen_helper_vfp_tosld(tcg_int, tcg_double,
6390 tcg_shift, tcg_fpstatus);
6391 } else {
6392 gen_helper_vfp_tosqd(tcg_int, tcg_double,
6393 tcg_shift, tcg_fpstatus);
6394 }
6395 } else {
6396 if (!sf) {
6397 gen_helper_vfp_tould(tcg_int, tcg_double,
6398 tcg_shift, tcg_fpstatus);
6399 } else {
6400 gen_helper_vfp_touqd(tcg_int, tcg_double,
6401 tcg_shift, tcg_fpstatus);
6402 }
6403 }
6404 if (!sf) {
6405 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6406 }
6407 tcg_temp_free_i64(tcg_double);
6408 break;
6409
6410 case 0: /* float32 */
6411 tcg_single = read_fp_sreg(s, rn);
6412 if (sf) {
6413 if (is_signed) {
6414 gen_helper_vfp_tosqs(tcg_int, tcg_single,
6415 tcg_shift, tcg_fpstatus);
6416 } else {
6417 gen_helper_vfp_touqs(tcg_int, tcg_single,
6418 tcg_shift, tcg_fpstatus);
6419 }
6420 } else {
6421 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6422 if (is_signed) {
6423 gen_helper_vfp_tosls(tcg_dest, tcg_single,
6424 tcg_shift, tcg_fpstatus);
6425 } else {
6426 gen_helper_vfp_touls(tcg_dest, tcg_single,
6427 tcg_shift, tcg_fpstatus);
6428 }
6429 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6430 tcg_temp_free_i32(tcg_dest);
6431 }
6432 tcg_temp_free_i32(tcg_single);
6433 break;
6434
6435 case 3: /* float16 */
6436 tcg_single = read_fp_sreg(s, rn);
6437 if (sf) {
6438 if (is_signed) {
6439 gen_helper_vfp_tosqh(tcg_int, tcg_single,
6440 tcg_shift, tcg_fpstatus);
6441 } else {
6442 gen_helper_vfp_touqh(tcg_int, tcg_single,
6443 tcg_shift, tcg_fpstatus);
6444 }
6445 } else {
6446 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6447 if (is_signed) {
6448 gen_helper_vfp_toslh(tcg_dest, tcg_single,
6449 tcg_shift, tcg_fpstatus);
6450 } else {
6451 gen_helper_vfp_toulh(tcg_dest, tcg_single,
6452 tcg_shift, tcg_fpstatus);
6453 }
6454 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6455 tcg_temp_free_i32(tcg_dest);
6456 }
6457 tcg_temp_free_i32(tcg_single);
6458 break;
6459
6460 default:
6461 g_assert_not_reached();
6462 }
6463
6464 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6465 tcg_temp_free_i32(tcg_rmode);
6466 }
6467
6468 tcg_temp_free_ptr(tcg_fpstatus);
6469 tcg_temp_free_i32(tcg_shift);
6470}
6471
6472/* Floating point <-> fixed point conversions
6473 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6474 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6475 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
6476 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6477 */
6478static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6479{
6480 int rd = extract32(insn, 0, 5);
6481 int rn = extract32(insn, 5, 5);
6482 int scale = extract32(insn, 10, 6);
6483 int opcode = extract32(insn, 16, 3);
6484 int rmode = extract32(insn, 19, 2);
6485 int type = extract32(insn, 22, 2);
6486 bool sbit = extract32(insn, 29, 1);
6487 bool sf = extract32(insn, 31, 1);
6488 bool itof;
6489
6490 if (sbit || (!sf && scale < 32)) {
6491 unallocated_encoding(s);
6492 return;
6493 }
6494
6495 switch (type) {
6496 case 0: /* float32 */
6497 case 1: /* float64 */
6498 break;
6499 case 3: /* float16 */
6500 if (dc_isar_feature(aa64_fp16, s)) {
6501 break;
6502 }
6503 /* fallthru */
6504 default:
6505 unallocated_encoding(s);
6506 return;
6507 }
6508
6509 switch ((rmode << 3) | opcode) {
6510 case 0x2: /* SCVTF */
6511 case 0x3: /* UCVTF */
6512 itof = true;
6513 break;
6514 case 0x18: /* FCVTZS */
6515 case 0x19: /* FCVTZU */
6516 itof = false;
6517 break;
6518 default:
6519 unallocated_encoding(s);
6520 return;
6521 }
6522
6523 if (!fp_access_check(s)) {
6524 return;
6525 }
6526
6527 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6528}
6529
6530static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6531{
6532 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6533 * without conversion.
6534 */
6535
6536 if (itof) {
6537 TCGv_i64 tcg_rn = cpu_reg(s, rn);
6538 TCGv_i64 tmp;
6539
6540 switch (type) {
6541 case 0:
6542 /* 32 bit */
6543 tmp = tcg_temp_new_i64();
6544 tcg_gen_ext32u_i64(tmp, tcg_rn);
6545 write_fp_dreg(s, rd, tmp);
6546 tcg_temp_free_i64(tmp);
6547 break;
6548 case 1:
6549 /* 64 bit */
6550 write_fp_dreg(s, rd, tcg_rn);
6551 break;
6552 case 2:
6553 /* 64 bit to top half. */
6554 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6555 clear_vec_high(s, true, rd);
6556 break;
6557 case 3:
6558 /* 16 bit */
6559 tmp = tcg_temp_new_i64();
6560 tcg_gen_ext16u_i64(tmp, tcg_rn);
6561 write_fp_dreg(s, rd, tmp);
6562 tcg_temp_free_i64(tmp);
6563 break;
6564 default:
6565 g_assert_not_reached();
6566 }
6567 } else {
6568 TCGv_i64 tcg_rd = cpu_reg(s, rd);
6569
6570 switch (type) {
6571 case 0:
6572 /* 32 bit */
6573 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6574 break;
6575 case 1:
6576 /* 64 bit */
6577 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6578 break;
6579 case 2:
6580 /* 64 bits from top half */
6581 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6582 break;
6583 case 3:
6584 /* 16 bit */
6585 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6586 break;
6587 default:
6588 g_assert_not_reached();
6589 }
6590 }
6591}
6592
6593static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6594{
6595 TCGv_i64 t = read_fp_dreg(s, rn);
6596 TCGv_ptr fpstatus = get_fpstatus_ptr(false);
6597
6598 gen_helper_fjcvtzs(t, t, fpstatus);
6599
6600 tcg_temp_free_ptr(fpstatus);
6601
6602 tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6603 tcg_gen_extrh_i64_i32(cpu_ZF, t);
6604 tcg_gen_movi_i32(cpu_CF, 0);
6605 tcg_gen_movi_i32(cpu_NF, 0);
6606 tcg_gen_movi_i32(cpu_VF, 0);
6607
6608 tcg_temp_free_i64(t);
6609}
6610
6611/* Floating point <-> integer conversions
6612 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6613 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6614 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6615 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6616 */
6617static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6618{
6619 int rd = extract32(insn, 0, 5);
6620 int rn = extract32(insn, 5, 5);
6621 int opcode = extract32(insn, 16, 3);
6622 int rmode = extract32(insn, 19, 2);
6623 int type = extract32(insn, 22, 2);
6624 bool sbit = extract32(insn, 29, 1);
6625 bool sf = extract32(insn, 31, 1);
6626 bool itof = false;
6627
6628 if (sbit) {
6629 goto do_unallocated;
6630 }
6631
6632 switch (opcode) {
6633 case 2: /* SCVTF */
6634 case 3: /* UCVTF */
6635 itof = true;
6636 /* fallthru */
6637 case 4: /* FCVTAS */
6638 case 5: /* FCVTAU */
6639 if (rmode != 0) {
6640 goto do_unallocated;
6641 }
6642 /* fallthru */
6643 case 0: /* FCVT[NPMZ]S */
6644 case 1: /* FCVT[NPMZ]U */
6645 switch (type) {
6646 case 0: /* float32 */
6647 case 1: /* float64 */
6648 break;
6649 case 3: /* float16 */
6650 if (!dc_isar_feature(aa64_fp16, s)) {
6651 goto do_unallocated;
6652 }
6653 break;
6654 default:
6655 goto do_unallocated;
6656 }
6657 if (!fp_access_check(s)) {
6658 return;
6659 }
6660 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6661 break;
6662
6663 default:
6664 switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6665 case 0b01100110: /* FMOV half <-> 32-bit int */
6666 case 0b01100111:
6667 case 0b11100110: /* FMOV half <-> 64-bit int */
6668 case 0b11100111:
6669 if (!dc_isar_feature(aa64_fp16, s)) {
6670 goto do_unallocated;
6671 }
6672 /* fallthru */
6673 case 0b00000110: /* FMOV 32-bit */
6674 case 0b00000111:
6675 case 0b10100110: /* FMOV 64-bit */
6676 case 0b10100111:
6677 case 0b11001110: /* FMOV top half of 128-bit */
6678 case 0b11001111:
6679 if (!fp_access_check(s)) {
6680 return;
6681 }
6682 itof = opcode & 1;
6683 handle_fmov(s, rd, rn, type, itof);
6684 break;
6685
6686 case 0b00111110: /* FJCVTZS */
6687 if (!dc_isar_feature(aa64_jscvt, s)) {
6688 goto do_unallocated;
6689 } else if (fp_access_check(s)) {
6690 handle_fjcvtzs(s, rd, rn);
6691 }
6692 break;
6693
6694 default:
6695 do_unallocated:
6696 unallocated_encoding(s);
6697 return;
6698 }
6699 break;
6700 }
6701}
6702
6703/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6704 * 31 30 29 28 25 24 0
6705 * +---+---+---+---------+-----------------------------+
6706 * | | 0 | | 1 1 1 1 | |
6707 * +---+---+---+---------+-----------------------------+
6708 */
6709static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6710{
6711 if (extract32(insn, 24, 1)) {
6712 /* Floating point data-processing (3 source) */
6713 disas_fp_3src(s, insn);
6714 } else if (extract32(insn, 21, 1) == 0) {
6715 /* Floating point to fixed point conversions */
6716 disas_fp_fixed_conv(s, insn);
6717 } else {
6718 switch (extract32(insn, 10, 2)) {
6719 case 1:
6720 /* Floating point conditional compare */
6721 disas_fp_ccomp(s, insn);
6722 break;
6723 case 2:
6724 /* Floating point data-processing (2 source) */
6725 disas_fp_2src(s, insn);
6726 break;
6727 case 3:
6728 /* Floating point conditional select */
6729 disas_fp_csel(s, insn);
6730 break;
6731 case 0:
6732 switch (ctz32(extract32(insn, 12, 4))) {
6733 case 0: /* [15:12] == xxx1 */
6734 /* Floating point immediate */
6735 disas_fp_imm(s, insn);
6736 break;
6737 case 1: /* [15:12] == xx10 */
6738 /* Floating point compare */
6739 disas_fp_compare(s, insn);
6740 break;
6741 case 2: /* [15:12] == x100 */
6742 /* Floating point data-processing (1 source) */
6743 disas_fp_1src(s, insn);
6744 break;
6745 case 3: /* [15:12] == 1000 */
6746 unallocated_encoding(s);
6747 break;
6748 default: /* [15:12] == 0000 */
6749 /* Floating point <-> integer conversions */
6750 disas_fp_int_conv(s, insn);
6751 break;
6752 }
6753 break;
6754 }
6755 }
6756}
6757
6758static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6759 int pos)
6760{
6761 /* Extract 64 bits from the middle of two concatenated 64 bit
6762 * vector register slices left:right. The extracted bits start
6763 * at 'pos' bits into the right (least significant) side.
6764 * We return the result in tcg_right, and guarantee not to
6765 * trash tcg_left.
6766 */
6767 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6768 assert(pos > 0 && pos < 64);
6769
6770 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6771 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6772 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6773
6774 tcg_temp_free_i64(tcg_tmp);
6775}
6776
6777/* EXT
6778 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
6779 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6780 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
6781 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6782 */
6783static void disas_simd_ext(DisasContext *s, uint32_t insn)
6784{
6785 int is_q = extract32(insn, 30, 1);
6786 int op2 = extract32(insn, 22, 2);
6787 int imm4 = extract32(insn, 11, 4);
6788 int rm = extract32(insn, 16, 5);
6789 int rn = extract32(insn, 5, 5);
6790 int rd = extract32(insn, 0, 5);
6791 int pos = imm4 << 3;
6792 TCGv_i64 tcg_resl, tcg_resh;
6793
6794 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6795 unallocated_encoding(s);
6796 return;
6797 }
6798
6799 if (!fp_access_check(s)) {
6800 return;
6801 }
6802
6803 tcg_resh = tcg_temp_new_i64();
6804 tcg_resl = tcg_temp_new_i64();
6805
6806 /* Vd gets bits starting at pos bits into Vm:Vn. This is
6807 * either extracting 128 bits from a 128:128 concatenation, or
6808 * extracting 64 bits from a 64:64 concatenation.
6809 */
6810 if (!is_q) {
6811 read_vec_element(s, tcg_resl, rn, 0, MO_64);
6812 if (pos != 0) {
6813 read_vec_element(s, tcg_resh, rm, 0, MO_64);
6814 do_ext64(s, tcg_resh, tcg_resl, pos);
6815 }
6816 tcg_gen_movi_i64(tcg_resh, 0);
6817 } else {
6818 TCGv_i64 tcg_hh;
6819 typedef struct {
6820 int reg;
6821 int elt;
6822 } EltPosns;
6823 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6824 EltPosns *elt = eltposns;
6825
6826 if (pos >= 64) {
6827 elt++;
6828 pos -= 64;
6829 }
6830
6831 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6832 elt++;
6833 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6834 elt++;
6835 if (pos != 0) {
6836 do_ext64(s, tcg_resh, tcg_resl, pos);
6837 tcg_hh = tcg_temp_new_i64();
6838 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6839 do_ext64(s, tcg_hh, tcg_resh, pos);
6840 tcg_temp_free_i64(tcg_hh);
6841 }
6842 }
6843
6844 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6845 tcg_temp_free_i64(tcg_resl);
6846 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6847 tcg_temp_free_i64(tcg_resh);
6848}
6849
6850/* TBL/TBX
6851 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
6852 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6853 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
6854 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6855 */
6856static void disas_simd_tb(DisasContext *s, uint32_t insn)
6857{
6858 int op2 = extract32(insn, 22, 2);
6859 int is_q = extract32(insn, 30, 1);
6860 int rm = extract32(insn, 16, 5);
6861 int rn = extract32(insn, 5, 5);
6862 int rd = extract32(insn, 0, 5);
6863 int is_tblx = extract32(insn, 12, 1);
6864 int len = extract32(insn, 13, 2);
6865 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6866 TCGv_i32 tcg_regno, tcg_numregs;
6867
6868 if (op2 != 0) {
6869 unallocated_encoding(s);
6870 return;
6871 }
6872
6873 if (!fp_access_check(s)) {
6874 return;
6875 }
6876
6877 /* This does a table lookup: for every byte element in the input
6878 * we index into a table formed from up to four vector registers,
6879 * and then the output is the result of the lookups. Our helper
6880 * function does the lookup operation for a single 64 bit part of
6881 * the input.
6882 */
6883 tcg_resl = tcg_temp_new_i64();
6884 tcg_resh = tcg_temp_new_i64();
6885
6886 if (is_tblx) {
6887 read_vec_element(s, tcg_resl, rd, 0, MO_64);
6888 } else {
6889 tcg_gen_movi_i64(tcg_resl, 0);
6890 }
6891 if (is_tblx && is_q) {
6892 read_vec_element(s, tcg_resh, rd, 1, MO_64);
6893 } else {
6894 tcg_gen_movi_i64(tcg_resh, 0);
6895 }
6896
6897 tcg_idx = tcg_temp_new_i64();
6898 tcg_regno = tcg_const_i32(rn);
6899 tcg_numregs = tcg_const_i32(len + 1);
6900 read_vec_element(s, tcg_idx, rm, 0, MO_64);
6901 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6902 tcg_regno, tcg_numregs);
6903 if (is_q) {
6904 read_vec_element(s, tcg_idx, rm, 1, MO_64);
6905 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6906 tcg_regno, tcg_numregs);
6907 }
6908 tcg_temp_free_i64(tcg_idx);
6909 tcg_temp_free_i32(tcg_regno);
6910 tcg_temp_free_i32(tcg_numregs);
6911
6912 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6913 tcg_temp_free_i64(tcg_resl);
6914 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6915 tcg_temp_free_i64(tcg_resh);
6916}
6917
6918/* ZIP/UZP/TRN
6919 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
6920 * +---+---+-------------+------+---+------+---+------------------+------+
6921 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
6922 * +---+---+-------------+------+---+------+---+------------------+------+
6923 */
6924static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6925{
6926 int rd = extract32(insn, 0, 5);
6927 int rn = extract32(insn, 5, 5);
6928 int rm = extract32(insn, 16, 5);
6929 int size = extract32(insn, 22, 2);
6930 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6931 * bit 2 indicates 1 vs 2 variant of the insn.
6932 */
6933 int opcode = extract32(insn, 12, 2);
6934 bool part = extract32(insn, 14, 1);
6935 bool is_q = extract32(insn, 30, 1);
6936 int esize = 8 << size;
6937 int i, ofs;
6938 int datasize = is_q ? 128 : 64;
6939 int elements = datasize / esize;
6940 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6941
6942 if (opcode == 0 || (size == 3 && !is_q)) {
6943 unallocated_encoding(s);
6944 return;
6945 }
6946
6947 if (!fp_access_check(s)) {
6948 return;
6949 }
6950
6951 tcg_resl = tcg_const_i64(0);
6952 tcg_resh = tcg_const_i64(0);
6953 tcg_res = tcg_temp_new_i64();
6954
6955 for (i = 0; i < elements; i++) {
6956 switch (opcode) {
6957 case 1: /* UZP1/2 */
6958 {
6959 int midpoint = elements / 2;
6960 if (i < midpoint) {
6961 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6962 } else {
6963 read_vec_element(s, tcg_res, rm,
6964 2 * (i - midpoint) + part, size);
6965 }
6966 break;
6967 }
6968 case 2: /* TRN1/2 */
6969 if (i & 1) {
6970 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6971 } else {
6972 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6973 }
6974 break;
6975 case 3: /* ZIP1/2 */
6976 {
6977 int base = part * elements / 2;
6978 if (i & 1) {
6979 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6980 } else {
6981 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6982 }
6983 break;
6984 }
6985 default:
6986 g_assert_not_reached();
6987 }
6988
6989 ofs = i * esize;
6990 if (ofs < 64) {
6991 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
6992 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
6993 } else {
6994 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
6995 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
6996 }
6997 }
6998
6999 tcg_temp_free_i64(tcg_res);
7000
7001 write_vec_element(s, tcg_resl, rd, 0, MO_64);
7002 tcg_temp_free_i64(tcg_resl);
7003 write_vec_element(s, tcg_resh, rd, 1, MO_64);
7004 tcg_temp_free_i64(tcg_resh);
7005}
7006
7007/*
7008 * do_reduction_op helper
7009 *
7010 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7011 * important for correct NaN propagation that we do these
7012 * operations in exactly the order specified by the pseudocode.
7013 *
7014 * This is a recursive function, TCG temps should be freed by the
7015 * calling function once it is done with the values.
7016 */
7017static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7018 int esize, int size, int vmap, TCGv_ptr fpst)
7019{
7020 if (esize == size) {
7021 int element;
7022 MemOp msize = esize == 16 ? MO_16 : MO_32;
7023 TCGv_i32 tcg_elem;
7024
7025 /* We should have one register left here */
7026 assert(ctpop8(vmap) == 1);
7027 element = ctz32(vmap);
7028 assert(element < 8);
7029
7030 tcg_elem = tcg_temp_new_i32();
7031 read_vec_element_i32(s, tcg_elem, rn, element, msize);
7032 return tcg_elem;
7033 } else {
7034 int bits = size / 2;
7035 int shift = ctpop8(vmap) / 2;
7036 int vmap_lo = (vmap >> shift) & vmap;
7037 int vmap_hi = (vmap & ~vmap_lo);
7038 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7039
7040 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7041 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7042 tcg_res = tcg_temp_new_i32();
7043
7044 switch (fpopcode) {
7045 case 0x0c: /* fmaxnmv half-precision */
7046 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7047 break;
7048 case 0x0f: /* fmaxv half-precision */
7049 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7050 break;
7051 case 0x1c: /* fminnmv half-precision */
7052 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7053 break;
7054 case 0x1f: /* fminv half-precision */
7055 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7056 break;
7057 case 0x2c: /* fmaxnmv */
7058 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7059 break;
7060 case 0x2f: /* fmaxv */
7061 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7062 break;
7063 case 0x3c: /* fminnmv */
7064 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7065 break;
7066 case 0x3f: /* fminv */
7067 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7068 break;
7069 default:
7070 g_assert_not_reached();
7071 }
7072
7073 tcg_temp_free_i32(tcg_hi);
7074 tcg_temp_free_i32(tcg_lo);
7075 return tcg_res;
7076 }
7077}
7078
7079/* AdvSIMD across lanes
7080 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7081 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7082 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7083 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7084 */
7085static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7086{
7087 int rd = extract32(insn, 0, 5);
7088 int rn = extract32(insn, 5, 5);
7089 int size = extract32(insn, 22, 2);
7090 int opcode = extract32(insn, 12, 5);
7091 bool is_q = extract32(insn, 30, 1);
7092 bool is_u = extract32(insn, 29, 1);
7093 bool is_fp = false;
7094 bool is_min = false;
7095 int esize;
7096 int elements;
7097 int i;
7098 TCGv_i64 tcg_res, tcg_elt;
7099
7100 switch (opcode) {
7101 case 0x1b: /* ADDV */
7102 if (is_u) {
7103 unallocated_encoding(s);
7104 return;
7105 }
7106 /* fall through */
7107 case 0x3: /* SADDLV, UADDLV */
7108 case 0xa: /* SMAXV, UMAXV */
7109 case 0x1a: /* SMINV, UMINV */
7110 if (size == 3 || (size == 2 && !is_q)) {
7111 unallocated_encoding(s);
7112 return;
7113 }
7114 break;
7115 case 0xc: /* FMAXNMV, FMINNMV */
7116 case 0xf: /* FMAXV, FMINV */
7117 /* Bit 1 of size field encodes min vs max and the actual size
7118 * depends on the encoding of the U bit. If not set (and FP16
7119 * enabled) then we do half-precision float instead of single
7120 * precision.
7121 */
7122 is_min = extract32(size, 1, 1);
7123 is_fp = true;
7124 if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7125 size = 1;
7126 } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7127 unallocated_encoding(s);
7128 return;
7129 } else {
7130 size = 2;
7131 }
7132 break;
7133 default:
7134 unallocated_encoding(s);
7135 return;
7136 }
7137
7138 if (!fp_access_check(s)) {
7139 return;
7140 }
7141
7142 esize = 8 << size;
7143 elements = (is_q ? 128 : 64) / esize;
7144
7145 tcg_res = tcg_temp_new_i64();
7146 tcg_elt = tcg_temp_new_i64();
7147
7148 /* These instructions operate across all lanes of a vector
7149 * to produce a single result. We can guarantee that a 64
7150 * bit intermediate is sufficient:
7151 * + for [US]ADDLV the maximum element size is 32 bits, and
7152 * the result type is 64 bits
7153 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7154 * same as the element size, which is 32 bits at most
7155 * For the integer operations we can choose to work at 64
7156 * or 32 bits and truncate at the end; for simplicity
7157 * we use 64 bits always. The floating point
7158 * ops do require 32 bit intermediates, though.
7159 */
7160 if (!is_fp) {
7161 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7162
7163 for (i = 1; i < elements; i++) {
7164 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7165
7166 switch (opcode) {
7167 case 0x03: /* SADDLV / UADDLV */
7168 case 0x1b: /* ADDV */
7169 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7170 break;
7171 case 0x0a: /* SMAXV / UMAXV */
7172 if (is_u) {
7173 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7174 } else {
7175 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7176 }
7177 break;
7178 case 0x1a: /* SMINV / UMINV */
7179 if (is_u) {
7180 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7181 } else {
7182 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7183 }
7184 break;
7185 default:
7186 g_assert_not_reached();
7187 }
7188
7189 }
7190 } else {
7191 /* Floating point vector reduction ops which work across 32
7192 * bit (single) or 16 bit (half-precision) intermediates.
7193 * Note that correct NaN propagation requires that we do these
7194 * operations in exactly the order specified by the pseudocode.
7195 */
7196 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
7197 int fpopcode = opcode | is_min << 4 | is_u << 5;
7198 int vmap = (1 << elements) - 1;
7199 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7200 (is_q ? 128 : 64), vmap, fpst);
7201 tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7202 tcg_temp_free_i32(tcg_res32);
7203 tcg_temp_free_ptr(fpst);
7204 }
7205
7206 tcg_temp_free_i64(tcg_elt);
7207
7208 /* Now truncate the result to the width required for the final output */
7209 if (opcode == 0x03) {
7210 /* SADDLV, UADDLV: result is 2*esize */
7211 size++;
7212 }
7213
7214 switch (size) {
7215 case 0:
7216 tcg_gen_ext8u_i64(tcg_res, tcg_res);
7217 break;
7218 case 1:
7219 tcg_gen_ext16u_i64(tcg_res, tcg_res);
7220 break;
7221 case 2:
7222 tcg_gen_ext32u_i64(tcg_res, tcg_res);
7223 break;
7224 case 3:
7225 break;
7226 default:
7227 g_assert_not_reached();
7228 }
7229
7230 write_fp_dreg(s, rd, tcg_res);
7231 tcg_temp_free_i64(tcg_res);
7232}
7233
7234/* DUP (Element, Vector)
7235 *
7236 * 31 30 29 21 20 16 15 10 9 5 4 0
7237 * +---+---+-------------------+--------+-------------+------+------+
7238 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7239 * +---+---+-------------------+--------+-------------+------+------+
7240 *
7241 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7242 */
7243static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7244 int imm5)
7245{
7246 int size = ctz32(imm5);
7247 int index = imm5 >> (size + 1);
7248
7249 if (size > 3 || (size == 3 && !is_q)) {
7250 unallocated_encoding(s);
7251 return;
7252 }
7253
7254 if (!fp_access_check(s)) {
7255 return;
7256 }
7257
7258 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7259 vec_reg_offset(s, rn, index, size),
7260 is_q ? 16 : 8, vec_full_reg_size(s));
7261}
7262
7263/* DUP (element, scalar)
7264 * 31 21 20 16 15 10 9 5 4 0
7265 * +-----------------------+--------+-------------+------+------+
7266 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7267 * +-----------------------+--------+-------------+------+------+
7268 */
7269static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7270 int imm5)
7271{
7272 int size = ctz32(imm5);
7273 int index;
7274 TCGv_i64 tmp;
7275
7276 if (size > 3) {
7277 unallocated_encoding(s);
7278 return;
7279 }
7280
7281 if (!fp_access_check(s)) {
7282 return;
7283 }
7284
7285 index = imm5 >> (size + 1);
7286
7287 /* This instruction just extracts the specified element and
7288 * zero-extends it into the bottom of the destination register.
7289 */
7290 tmp = tcg_temp_new_i64();
7291 read_vec_element(s, tmp, rn, index, size);
7292 write_fp_dreg(s, rd, tmp);
7293 tcg_temp_free_i64(tmp);
7294}
7295
7296/* DUP (General)
7297 *
7298 * 31 30 29 21 20 16 15 10 9 5 4 0
7299 * +---+---+-------------------+--------+-------------+------+------+
7300 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
7301 * +---+---+-------------------+--------+-------------+------+------+
7302 *
7303 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7304 */
7305static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7306 int imm5)
7307{
7308 int size = ctz32(imm5);
7309 uint32_t dofs, oprsz, maxsz;
7310
7311 if (size > 3 || ((size == 3) && !is_q)) {
7312 unallocated_encoding(s);
7313 return;
7314 }
7315
7316 if (!fp_access_check(s)) {
7317 return;
7318 }
7319
7320 dofs = vec_full_reg_offset(s, rd);
7321 oprsz = is_q ? 16 : 8;
7322 maxsz = vec_full_reg_size(s);
7323
7324 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7325}
7326
7327/* INS (Element)
7328 *
7329 * 31 21 20 16 15 14 11 10 9 5 4 0
7330 * +-----------------------+--------+------------+---+------+------+
7331 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7332 * +-----------------------+--------+------------+---+------+------+
7333 *
7334 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7335 * index: encoded in imm5<4:size+1>
7336 */
7337static void handle_simd_inse(DisasContext *s, int rd, int rn,
7338 int imm4, int imm5)
7339{
7340 int size = ctz32(imm5);
7341 int src_index, dst_index;
7342 TCGv_i64 tmp;
7343
7344 if (size > 3) {
7345 unallocated_encoding(s);
7346 return;
7347 }
7348
7349 if (!fp_access_check(s)) {
7350 return;
7351 }
7352
7353 dst_index = extract32(imm5, 1+size, 5);
7354 src_index = extract32(imm4, size, 4);
7355
7356 tmp = tcg_temp_new_i64();
7357
7358 read_vec_element(s, tmp, rn, src_index, size);
7359 write_vec_element(s, tmp, rd, dst_index, size);
7360
7361 tcg_temp_free_i64(tmp);
7362}
7363
7364
7365/* INS (General)
7366 *
7367 * 31 21 20 16 15 10 9 5 4 0
7368 * +-----------------------+--------+-------------+------+------+
7369 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
7370 * +-----------------------+--------+-------------+------+------+
7371 *
7372 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7373 * index: encoded in imm5<4:size+1>
7374 */
7375static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7376{
7377 int size = ctz32(imm5);
7378 int idx;
7379
7380 if (size > 3) {
7381 unallocated_encoding(s);
7382 return;
7383 }
7384
7385 if (!fp_access_check(s)) {
7386 return;
7387 }
7388
7389 idx = extract32(imm5, 1 + size, 4 - size);
7390 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7391}
7392
7393/*
7394 * UMOV (General)
7395 * SMOV (General)
7396 *
7397 * 31 30 29 21 20 16 15 12 10 9 5 4 0
7398 * +---+---+-------------------+--------+-------------+------+------+
7399 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
7400 * +---+---+-------------------+--------+-------------+------+------+
7401 *
7402 * U: unsigned when set
7403 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7404 */
7405static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7406 int rn, int rd, int imm5)
7407{
7408 int size = ctz32(imm5);
7409 int element;
7410 TCGv_i64 tcg_rd;
7411
7412 /* Check for UnallocatedEncodings */
7413 if (is_signed) {
7414 if (size > 2 || (size == 2 && !is_q)) {
7415 unallocated_encoding(s);
7416 return;
7417 }
7418 } else {
7419 if (size > 3
7420 || (size < 3 && is_q)
7421 || (size == 3 && !is_q)) {
7422 unallocated_encoding(s);
7423 return;
7424 }
7425 }
7426
7427 if (!fp_access_check(s)) {
7428 return;
7429 }
7430
7431 element = extract32(imm5, 1+size, 4);
7432
7433 tcg_rd = cpu_reg(s, rd);
7434 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7435 if (is_signed && !is_q) {
7436 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7437 }
7438}
7439
7440/* AdvSIMD copy
7441 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7442 * +---+---+----+-----------------+------+---+------+---+------+------+
7443 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7444 * +---+---+----+-----------------+------+---+------+---+------+------+
7445 */
7446static void disas_simd_copy(DisasContext *s, uint32_t insn)
7447{
7448 int rd = extract32(insn, 0, 5);
7449 int rn = extract32(insn, 5, 5);
7450 int imm4 = extract32(insn, 11, 4);
7451 int op = extract32(insn, 29, 1);
7452 int is_q = extract32(insn, 30, 1);
7453 int imm5 = extract32(insn, 16, 5);
7454
7455 if (op) {
7456 if (is_q) {
7457 /* INS (element) */
7458 handle_simd_inse(s, rd, rn, imm4, imm5);
7459 } else {
7460 unallocated_encoding(s);
7461 }
7462 } else {
7463 switch (imm4) {
7464 case 0:
7465 /* DUP (element - vector) */
7466 handle_simd_dupe(s, is_q, rd, rn, imm5);
7467 break;
7468 case 1:
7469 /* DUP (general) */
7470 handle_simd_dupg(s, is_q, rd, rn, imm5);
7471 break;
7472 case 3:
7473 if (is_q) {
7474 /* INS (general) */
7475 handle_simd_insg(s, rd, rn, imm5);
7476 } else {
7477 unallocated_encoding(s);
7478 }
7479 break;
7480 case 5:
7481 case 7:
7482 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7483 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7484 break;
7485 default:
7486 unallocated_encoding(s);
7487 break;
7488 }
7489 }
7490}
7491
7492/* AdvSIMD modified immediate
7493 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
7494 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7495 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
7496 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7497 *
7498 * There are a number of operations that can be carried out here:
7499 * MOVI - move (shifted) imm into register
7500 * MVNI - move inverted (shifted) imm into register
7501 * ORR - bitwise OR of (shifted) imm with register
7502 * BIC - bitwise clear of (shifted) imm with register
7503 * With ARMv8.2 we also have:
7504 * FMOV half-precision
7505 */
7506static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7507{
7508 int rd = extract32(insn, 0, 5);
7509 int cmode = extract32(insn, 12, 4);
7510 int cmode_3_1 = extract32(cmode, 1, 3);
7511 int cmode_0 = extract32(cmode, 0, 1);
7512 int o2 = extract32(insn, 11, 1);
7513 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7514 bool is_neg = extract32(insn, 29, 1);
7515 bool is_q = extract32(insn, 30, 1);
7516 uint64_t imm = 0;
7517
7518 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7519 /* Check for FMOV (vector, immediate) - half-precision */
7520 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7521 unallocated_encoding(s);
7522 return;
7523 }
7524 }
7525
7526 if (!fp_access_check(s)) {
7527 return;
7528 }
7529
7530 /* See AdvSIMDExpandImm() in ARM ARM */
7531 switch (cmode_3_1) {
7532 case 0: /* Replicate(Zeros(24):imm8, 2) */
7533 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
7534 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
7535 case 3: /* Replicate(imm8:Zeros(24), 2) */
7536 {
7537 int shift = cmode_3_1 * 8;
7538 imm = bitfield_replicate(abcdefgh << shift, 32);
7539 break;
7540 }
7541 case 4: /* Replicate(Zeros(8):imm8, 4) */
7542 case 5: /* Replicate(imm8:Zeros(8), 4) */
7543 {
7544 int shift = (cmode_3_1 & 0x1) * 8;
7545 imm = bitfield_replicate(abcdefgh << shift, 16);
7546 break;
7547 }
7548 case 6:
7549 if (cmode_0) {
7550 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
7551 imm = (abcdefgh << 16) | 0xffff;
7552 } else {
7553 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
7554 imm = (abcdefgh << 8) | 0xff;
7555 }
7556 imm = bitfield_replicate(imm, 32);
7557 break;
7558 case 7:
7559 if (!cmode_0 && !is_neg) {
7560 imm = bitfield_replicate(abcdefgh, 8);
7561 } else if (!cmode_0 && is_neg) {
7562 int i;
7563 imm = 0;
7564 for (i = 0; i < 8; i++) {
7565 if ((abcdefgh) & (1 << i)) {
7566 imm |= 0xffULL << (i * 8);
7567 }
7568 }
7569 } else if (cmode_0) {
7570 if (is_neg) {
7571 imm = (abcdefgh & 0x3f) << 48;
7572 if (abcdefgh & 0x80) {
7573 imm |= 0x8000000000000000ULL;
7574 }
7575 if (abcdefgh & 0x40) {
7576 imm |= 0x3fc0000000000000ULL;
7577 } else {
7578 imm |= 0x4000000000000000ULL;
7579 }
7580 } else {
7581 if (o2) {
7582 /* FMOV (vector, immediate) - half-precision */
7583 imm = vfp_expand_imm(MO_16, abcdefgh);
7584 /* now duplicate across the lanes */
7585 imm = bitfield_replicate(imm, 16);
7586 } else {
7587 imm = (abcdefgh & 0x3f) << 19;
7588 if (abcdefgh & 0x80) {
7589 imm |= 0x80000000;
7590 }
7591 if (abcdefgh & 0x40) {
7592 imm |= 0x3e000000;
7593 } else {
7594 imm |= 0x40000000;
7595 }
7596 imm |= (imm << 32);
7597 }
7598 }
7599 }
7600 break;
7601 default:
7602 fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7603 g_assert_not_reached();
7604 }
7605
7606 if (cmode_3_1 != 7 && is_neg) {
7607 imm = ~imm;
7608 }
7609
7610 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7611 /* MOVI or MVNI, with MVNI negation handled above. */
7612 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7613 vec_full_reg_size(s), imm);
7614 } else {
7615 /* ORR or BIC, with BIC negation to AND handled above. */
7616 if (is_neg) {
7617 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7618 } else {
7619 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7620 }
7621 }
7622}
7623
7624/* AdvSIMD scalar copy
7625 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7626 * +-----+----+-----------------+------+---+------+---+------+------+
7627 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7628 * +-----+----+-----------------+------+---+------+---+------+------+
7629 */
7630static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7631{
7632 int rd = extract32(insn, 0, 5);
7633 int rn = extract32(insn, 5, 5);
7634 int imm4 = extract32(insn, 11, 4);
7635 int imm5 = extract32(insn, 16, 5);
7636 int op = extract32(insn, 29, 1);
7637
7638 if (op != 0 || imm4 != 0) {
7639 unallocated_encoding(s);
7640 return;
7641 }
7642
7643 /* DUP (element, scalar) */
7644 handle_simd_dupes(s, rd, rn, imm5);
7645}
7646
7647/* AdvSIMD scalar pairwise
7648 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7649 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7650 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7651 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7652 */
7653static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7654{
7655 int u = extract32(insn, 29, 1);
7656 int size = extract32(insn, 22, 2);
7657 int opcode = extract32(insn, 12, 5);
7658 int rn = extract32(insn, 5, 5);
7659 int rd = extract32(insn, 0, 5);
7660 TCGv_ptr fpst;
7661
7662 /* For some ops (the FP ones), size[1] is part of the encoding.
7663 * For ADDP strictly it is not but size[1] is always 1 for valid
7664 * encodings.
7665 */
7666 opcode |= (extract32(size, 1, 1) << 5);
7667
7668 switch (opcode) {
7669 case 0x3b: /* ADDP */
7670 if (u || size != 3) {
7671 unallocated_encoding(s);
7672 return;
7673 }
7674 if (!fp_access_check(s)) {
7675 return;
7676 }
7677
7678 fpst = NULL;
7679 break;
7680 case 0xc: /* FMAXNMP */
7681 case 0xd: /* FADDP */
7682 case 0xf: /* FMAXP */
7683 case 0x2c: /* FMINNMP */
7684 case 0x2f: /* FMINP */
7685 /* FP op, size[0] is 32 or 64 bit*/
7686 if (!u) {
7687 if (!dc_isar_feature(aa64_fp16, s)) {
7688 unallocated_encoding(s);
7689 return;
7690 } else {
7691 size = MO_16;
7692 }
7693 } else {
7694 size = extract32(size, 0, 1) ? MO_64 : MO_32;
7695 }
7696
7697 if (!fp_access_check(s)) {
7698 return;
7699 }
7700
7701 fpst = get_fpstatus_ptr(size == MO_16);
7702 break;
7703 default:
7704 unallocated_encoding(s);
7705 return;
7706 }
7707
7708 if (size == MO_64) {
7709 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7710 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7711 TCGv_i64 tcg_res = tcg_temp_new_i64();
7712
7713 read_vec_element(s, tcg_op1, rn, 0, MO_64);
7714 read_vec_element(s, tcg_op2, rn, 1, MO_64);
7715
7716 switch (opcode) {
7717 case 0x3b: /* ADDP */
7718 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7719 break;
7720 case 0xc: /* FMAXNMP */
7721 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7722 break;
7723 case 0xd: /* FADDP */
7724 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7725 break;
7726 case 0xf: /* FMAXP */
7727 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7728 break;
7729 case 0x2c: /* FMINNMP */
7730 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7731 break;
7732 case 0x2f: /* FMINP */
7733 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7734 break;
7735 default:
7736 g_assert_not_reached();
7737 }
7738
7739 write_fp_dreg(s, rd, tcg_res);
7740
7741 tcg_temp_free_i64(tcg_op1);
7742 tcg_temp_free_i64(tcg_op2);
7743 tcg_temp_free_i64(tcg_res);
7744 } else {
7745 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7746 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7747 TCGv_i32 tcg_res = tcg_temp_new_i32();
7748
7749 read_vec_element_i32(s, tcg_op1, rn, 0, size);
7750 read_vec_element_i32(s, tcg_op2, rn, 1, size);
7751
7752 if (size == MO_16) {
7753 switch (opcode) {
7754 case 0xc: /* FMAXNMP */
7755 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7756 break;
7757 case 0xd: /* FADDP */
7758 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7759 break;
7760 case 0xf: /* FMAXP */
7761 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7762 break;
7763 case 0x2c: /* FMINNMP */
7764 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7765 break;
7766 case 0x2f: /* FMINP */
7767 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7768 break;
7769 default:
7770 g_assert_not_reached();
7771 }
7772 } else {
7773 switch (opcode) {
7774 case 0xc: /* FMAXNMP */
7775 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7776 break;
7777 case 0xd: /* FADDP */
7778 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7779 break;
7780 case 0xf: /* FMAXP */
7781 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7782 break;
7783 case 0x2c: /* FMINNMP */
7784 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7785 break;
7786 case 0x2f: /* FMINP */
7787 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7788 break;
7789 default:
7790 g_assert_not_reached();
7791 }
7792 }
7793
7794 write_fp_sreg(s, rd, tcg_res);
7795
7796 tcg_temp_free_i32(tcg_op1);
7797 tcg_temp_free_i32(tcg_op2);
7798 tcg_temp_free_i32(tcg_res);
7799 }
7800
7801 if (fpst) {
7802 tcg_temp_free_ptr(fpst);
7803 }
7804}
7805
7806/*
7807 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7808 *
7809 * This code is handles the common shifting code and is used by both
7810 * the vector and scalar code.
7811 */
7812static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7813 TCGv_i64 tcg_rnd, bool accumulate,
7814 bool is_u, int size, int shift)
7815{
7816 bool extended_result = false;
7817 bool round = tcg_rnd != NULL;
7818 int ext_lshift = 0;
7819 TCGv_i64 tcg_src_hi;
7820
7821 if (round && size == 3) {
7822 extended_result = true;
7823 ext_lshift = 64 - shift;
7824 tcg_src_hi = tcg_temp_new_i64();
7825 } else if (shift == 64) {
7826 if (!accumulate && is_u) {
7827 /* result is zero */
7828 tcg_gen_movi_i64(tcg_res, 0);
7829 return;
7830 }
7831 }
7832
7833 /* Deal with the rounding step */
7834 if (round) {
7835 if (extended_result) {
7836 TCGv_i64 tcg_zero = tcg_const_i64(0);
7837 if (!is_u) {
7838 /* take care of sign extending tcg_res */
7839 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7840 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7841 tcg_src, tcg_src_hi,
7842 tcg_rnd, tcg_zero);
7843 } else {
7844 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7845 tcg_src, tcg_zero,
7846 tcg_rnd, tcg_zero);
7847 }
7848 tcg_temp_free_i64(tcg_zero);
7849 } else {
7850 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7851 }
7852 }
7853
7854 /* Now do the shift right */
7855 if (round && extended_result) {
7856 /* extended case, >64 bit precision required */
7857 if (ext_lshift == 0) {
7858 /* special case, only high bits matter */
7859 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7860 } else {
7861 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7862 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7863 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7864 }
7865 } else {
7866 if (is_u) {
7867 if (shift == 64) {
7868 /* essentially shifting in 64 zeros */
7869 tcg_gen_movi_i64(tcg_src, 0);
7870 } else {
7871 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7872 }
7873 } else {
7874 if (shift == 64) {
7875 /* effectively extending the sign-bit */
7876 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7877 } else {
7878 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7879 }
7880 }
7881 }
7882
7883 if (accumulate) {
7884 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7885 } else {
7886 tcg_gen_mov_i64(tcg_res, tcg_src);
7887 }
7888
7889 if (extended_result) {
7890 tcg_temp_free_i64(tcg_src_hi);
7891 }
7892}
7893
7894/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7895static void handle_scalar_simd_shri(DisasContext *s,
7896 bool is_u, int immh, int immb,
7897 int opcode, int rn, int rd)
7898{
7899 const int size = 3;
7900 int immhb = immh << 3 | immb;
7901 int shift = 2 * (8 << size) - immhb;
7902 bool accumulate = false;
7903 bool round = false;
7904 bool insert = false;
7905 TCGv_i64 tcg_rn;
7906 TCGv_i64 tcg_rd;
7907 TCGv_i64 tcg_round;
7908
7909 if (!extract32(immh, 3, 1)) {
7910 unallocated_encoding(s);
7911 return;
7912 }
7913
7914 if (!fp_access_check(s)) {
7915 return;
7916 }
7917
7918 switch (opcode) {
7919 case 0x02: /* SSRA / USRA (accumulate) */
7920 accumulate = true;
7921 break;
7922 case 0x04: /* SRSHR / URSHR (rounding) */
7923 round = true;
7924 break;
7925 case 0x06: /* SRSRA / URSRA (accum + rounding) */
7926 accumulate = round = true;
7927 break;
7928 case 0x08: /* SRI */
7929 insert = true;
7930 break;
7931 }
7932
7933 if (round) {
7934 uint64_t round_const = 1ULL << (shift - 1);
7935 tcg_round = tcg_const_i64(round_const);
7936 } else {
7937 tcg_round = NULL;
7938 }
7939
7940 tcg_rn = read_fp_dreg(s, rn);
7941 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7942
7943 if (insert) {
7944 /* shift count same as element size is valid but does nothing;
7945 * special case to avoid potential shift by 64.
7946 */
7947 int esize = 8 << size;
7948 if (shift != esize) {
7949 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7950 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7951 }
7952 } else {
7953 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7954 accumulate, is_u, size, shift);
7955 }
7956
7957 write_fp_dreg(s, rd, tcg_rd);
7958
7959 tcg_temp_free_i64(tcg_rn);
7960 tcg_temp_free_i64(tcg_rd);
7961 if (round) {
7962 tcg_temp_free_i64(tcg_round);
7963 }
7964}
7965
7966/* SHL/SLI - Scalar shift left */
7967static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7968 int immh, int immb, int opcode,
7969 int rn, int rd)
7970{
7971 int size = 32 - clz32(immh) - 1;
7972 int immhb = immh << 3 | immb;
7973 int shift = immhb - (8 << size);
7974 TCGv_i64 tcg_rn = new_tmp_a64(s);
7975 TCGv_i64 tcg_rd = new_tmp_a64(s);
7976
7977 if (!extract32(immh, 3, 1)) {
7978 unallocated_encoding(s);
7979 return;
7980 }
7981
7982 if (!fp_access_check(s)) {
7983 return;
7984 }
7985
7986 tcg_rn = read_fp_dreg(s, rn);
7987 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7988
7989 if (insert) {
7990 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
7991 } else {
7992 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
7993 }
7994
7995 write_fp_dreg(s, rd, tcg_rd);
7996
7997 tcg_temp_free_i64(tcg_rn);
7998 tcg_temp_free_i64(tcg_rd);
7999}
8000
8001/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8002 * (signed/unsigned) narrowing */
8003static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8004 bool is_u_shift, bool is_u_narrow,
8005 int immh, int immb, int opcode,
8006 int rn, int rd)
8007{
8008 int immhb = immh << 3 | immb;
8009 int size = 32 - clz32(immh) - 1;
8010 int esize = 8 << size;
8011 int shift = (2 * esize) - immhb;
8012 int elements = is_scalar ? 1 : (64 / esize);
8013 bool round = extract32(opcode, 0, 1);
8014 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8015 TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8016 TCGv_i32 tcg_rd_narrowed;
8017 TCGv_i64 tcg_final;
8018
8019 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8020 { gen_helper_neon_narrow_sat_s8,
8021 gen_helper_neon_unarrow_sat8 },
8022 { gen_helper_neon_narrow_sat_s16,
8023 gen_helper_neon_unarrow_sat16 },
8024 { gen_helper_neon_narrow_sat_s32,
8025 gen_helper_neon_unarrow_sat32 },
8026 { NULL, NULL },
8027 };
8028 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8029 gen_helper_neon_narrow_sat_u8,
8030 gen_helper_neon_narrow_sat_u16,
8031 gen_helper_neon_narrow_sat_u32,
8032 NULL
8033 };
8034 NeonGenNarrowEnvFn *narrowfn;
8035
8036 int i;
8037
8038 assert(size < 4);
8039
8040 if (extract32(immh, 3, 1)) {
8041 unallocated_encoding(s);
8042 return;
8043 }
8044
8045 if (!fp_access_check(s)) {
8046 return;
8047 }
8048
8049 if (is_u_shift) {
8050 narrowfn = unsigned_narrow_fns[size];
8051 } else {
8052 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8053 }
8054
8055 tcg_rn = tcg_temp_new_i64();
8056 tcg_rd = tcg_temp_new_i64();
8057 tcg_rd_narrowed = tcg_temp_new_i32();
8058 tcg_final = tcg_const_i64(0);
8059
8060 if (round) {
8061 uint64_t round_const = 1ULL << (shift - 1);
8062 tcg_round = tcg_const_i64(round_const);
8063 } else {
8064 tcg_round = NULL;
8065 }
8066
8067 for (i = 0; i < elements; i++) {
8068 read_vec_element(s, tcg_rn, rn, i, ldop);
8069 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8070 false, is_u_shift, size+1, shift);
8071 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8072 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8073 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8074 }
8075
8076 if (!is_q) {
8077 write_vec_element(s, tcg_final, rd, 0, MO_64);
8078 } else {
8079 write_vec_element(s, tcg_final, rd, 1, MO_64);
8080 }
8081
8082 if (round) {
8083 tcg_temp_free_i64(tcg_round);
8084 }
8085 tcg_temp_free_i64(tcg_rn);
8086 tcg_temp_free_i64(tcg_rd);
8087 tcg_temp_free_i32(tcg_rd_narrowed);
8088 tcg_temp_free_i64(tcg_final);
8089
8090 clear_vec_high(s, is_q, rd);
8091}
8092
8093/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8094static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8095 bool src_unsigned, bool dst_unsigned,
8096 int immh, int immb, int rn, int rd)
8097{
8098 int immhb = immh << 3 | immb;
8099 int size = 32 - clz32(immh) - 1;
8100 int shift = immhb - (8 << size);
8101 int pass;
8102
8103 assert(immh != 0);
8104 assert(!(scalar && is_q));
8105
8106 if (!scalar) {
8107 if (!is_q && extract32(immh, 3, 1)) {
8108 unallocated_encoding(s);
8109 return;
8110 }
8111
8112 /* Since we use the variable-shift helpers we must
8113 * replicate the shift count into each element of
8114 * the tcg_shift value.
8115 */
8116 switch (size) {
8117 case 0:
8118 shift |= shift << 8;
8119 /* fall through */
8120 case 1:
8121 shift |= shift << 16;
8122 break;
8123 case 2:
8124 case 3:
8125 break;
8126 default:
8127 g_assert_not_reached();
8128 }
8129 }
8130
8131 if (!fp_access_check(s)) {
8132 return;
8133 }
8134
8135 if (size == 3) {
8136 TCGv_i64 tcg_shift = tcg_const_i64(shift);
8137 static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8138 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8139 { NULL, gen_helper_neon_qshl_u64 },
8140 };
8141 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8142 int maxpass = is_q ? 2 : 1;
8143
8144 for (pass = 0; pass < maxpass; pass++) {
8145 TCGv_i64 tcg_op = tcg_temp_new_i64();
8146
8147 read_vec_element(s, tcg_op, rn, pass, MO_64);
8148 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8149 write_vec_element(s, tcg_op, rd, pass, MO_64);
8150
8151 tcg_temp_free_i64(tcg_op);
8152 }
8153 tcg_temp_free_i64(tcg_shift);
8154 clear_vec_high(s, is_q, rd);
8155 } else {
8156 TCGv_i32 tcg_shift = tcg_const_i32(shift);
8157 static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8158 {
8159 { gen_helper_neon_qshl_s8,
8160 gen_helper_neon_qshl_s16,
8161 gen_helper_neon_qshl_s32 },
8162 { gen_helper_neon_qshlu_s8,
8163 gen_helper_neon_qshlu_s16,
8164 gen_helper_neon_qshlu_s32 }
8165 }, {
8166 { NULL, NULL, NULL },
8167 { gen_helper_neon_qshl_u8,
8168 gen_helper_neon_qshl_u16,
8169 gen_helper_neon_qshl_u32 }
8170 }
8171 };
8172 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8173 MemOp memop = scalar ? size : MO_32;
8174 int maxpass = scalar ? 1 : is_q ? 4 : 2;
8175
8176 for (pass = 0; pass < maxpass; pass++) {
8177 TCGv_i32 tcg_op = tcg_temp_new_i32();
8178
8179 read_vec_element_i32(s, tcg_op, rn, pass, memop);
8180 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8181 if (scalar) {
8182 switch (size) {
8183 case 0:
8184 tcg_gen_ext8u_i32(tcg_op, tcg_op);
8185 break;
8186 case 1:
8187 tcg_gen_ext16u_i32(tcg_op, tcg_op);
8188 break;
8189 case 2:
8190 break;
8191 default:
8192 g_assert_not_reached();
8193 }
8194 write_fp_sreg(s, rd, tcg_op);
8195 } else {
8196 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8197 }
8198
8199 tcg_temp_free_i32(tcg_op);
8200 }
8201 tcg_temp_free_i32(tcg_shift);
8202
8203 if (!scalar) {
8204 clear_vec_high(s, is_q, rd);
8205 }
8206 }
8207}
8208
8209/* Common vector code for handling integer to FP conversion */
8210static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8211 int elements, int is_signed,
8212 int fracbits, int size)
8213{
8214 TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
8215 TCGv_i32 tcg_shift = NULL;
8216
8217 MemOp mop = size | (is_signed ? MO_SIGN : 0);
8218 int pass;
8219
8220 if (fracbits || size == MO_64) {
8221 tcg_shift = tcg_const_i32(fracbits);
8222 }
8223
8224 if (size == MO_64) {
8225 TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8226 TCGv_i64 tcg_double = tcg_temp_new_i64();
8227
8228 for (pass = 0; pass < elements; pass++) {
8229 read_vec_element(s, tcg_int64, rn, pass, mop);
8230
8231 if (is_signed) {
8232 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8233 tcg_shift, tcg_fpst);
8234 } else {
8235 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8236 tcg_shift, tcg_fpst);
8237 }
8238 if (elements == 1) {
8239 write_fp_dreg(s, rd, tcg_double);
8240 } else {
8241 write_vec_element(s, tcg_double, rd, pass, MO_64);
8242 }
8243 }
8244
8245 tcg_temp_free_i64(tcg_int64);
8246 tcg_temp_free_i64(tcg_double);
8247
8248 } else {
8249 TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8250 TCGv_i32 tcg_float = tcg_temp_new_i32();
8251
8252 for (pass = 0; pass < elements; pass++) {
8253 read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8254
8255 switch (size) {
8256 case MO_32:
8257 if (fracbits) {
8258 if (is_signed) {
8259 gen_helper_vfp_sltos(tcg_float, tcg_int32,
8260 tcg_shift, tcg_fpst);
8261 } else {
8262 gen_helper_vfp_ultos(tcg_float, tcg_int32,
8263 tcg_shift, tcg_fpst);
8264 }
8265 } else {
8266 if (is_signed) {
8267 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8268 } else {
8269 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8270 }
8271 }
8272 break;
8273 case MO_16:
8274 if (fracbits) {
8275 if (is_signed) {
8276 gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8277 tcg_shift, tcg_fpst);
8278 } else {
8279 gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8280 tcg_shift, tcg_fpst);
8281 }
8282 } else {
8283 if (is_signed) {
8284 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8285 } else {
8286 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8287 }
8288 }
8289 break;
8290 default:
8291 g_assert_not_reached();
8292 }
8293
8294 if (elements == 1) {
8295 write_fp_sreg(s, rd, tcg_float);
8296 } else {
8297 write_vec_element_i32(s, tcg_float, rd, pass, size);
8298 }
8299 }
8300
8301 tcg_temp_free_i32(tcg_int32);
8302 tcg_temp_free_i32(tcg_float);
8303 }
8304
8305 tcg_temp_free_ptr(tcg_fpst);
8306 if (tcg_shift) {
8307 tcg_temp_free_i32(tcg_shift);
8308 }
8309
8310 clear_vec_high(s, elements << size == 16, rd);
8311}
8312
8313/* UCVTF/SCVTF - Integer to FP conversion */
8314static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8315 bool is_q, bool is_u,
8316 int immh, int immb, int opcode,
8317 int rn, int rd)
8318{
8319 int size, elements, fracbits;
8320 int immhb = immh << 3 | immb;
8321
8322 if (immh & 8) {
8323 size = MO_64;
8324 if (!is_scalar && !is_q) {
8325 unallocated_encoding(s);
8326 return;
8327 }
8328 } else if (immh & 4) {
8329 size = MO_32;
8330 } else if (immh & 2) {
8331 size = MO_16;
8332 if (!dc_isar_feature(aa64_fp16, s)) {
8333 unallocated_encoding(s);
8334 return;
8335 }
8336 } else {
8337 /* immh == 0 would be a failure of the decode logic */
8338 g_assert(immh == 1);
8339 unallocated_encoding(s);
8340 return;
8341 }
8342
8343 if (is_scalar) {
8344 elements = 1;
8345 } else {
8346 elements = (8 << is_q) >> size;
8347 }
8348 fracbits = (16 << size) - immhb;
8349
8350 if (!fp_access_check(s)) {
8351 return;
8352 }
8353
8354 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8355}
8356
8357/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8358static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8359 bool is_q, bool is_u,
8360 int immh, int immb, int rn, int rd)
8361{
8362 int immhb = immh << 3 | immb;
8363 int pass, size, fracbits;
8364 TCGv_ptr tcg_fpstatus;
8365 TCGv_i32 tcg_rmode, tcg_shift;
8366
8367 if (immh & 0x8) {
8368 size = MO_64;
8369 if (!is_scalar && !is_q) {
8370 unallocated_encoding(s);
8371 return;
8372 }
8373 } else if (immh & 0x4) {
8374 size = MO_32;
8375 } else if (immh & 0x2) {
8376 size = MO_16;
8377 if (!dc_isar_feature(aa64_fp16, s)) {
8378 unallocated_encoding(s);
8379 return;
8380 }
8381 } else {
8382 /* Should have split out AdvSIMD modified immediate earlier. */
8383 assert(immh == 1);
8384 unallocated_encoding(s);
8385 return;
8386 }
8387
8388 if (!fp_access_check(s)) {
8389 return;
8390 }
8391
8392 assert(!(is_scalar && is_q));
8393
8394 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8395 tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
8396 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8397 fracbits = (16 << size) - immhb;
8398 tcg_shift = tcg_const_i32(fracbits);
8399
8400 if (size == MO_64) {
8401 int maxpass = is_scalar ? 1 : 2;
8402
8403 for (pass = 0; pass < maxpass; pass++) {
8404 TCGv_i64 tcg_op = tcg_temp_new_i64();
8405
8406 read_vec_element(s, tcg_op, rn, pass, MO_64);
8407 if (is_u) {
8408 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8409 } else {
8410 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8411 }
8412 write_vec_element(s, tcg_op, rd, pass, MO_64);
8413 tcg_temp_free_i64(tcg_op);
8414 }
8415 clear_vec_high(s, is_q, rd);
8416 } else {
8417 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8418 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8419
8420 switch (size) {
8421 case MO_16:
8422 if (is_u) {
8423 fn = gen_helper_vfp_touhh;
8424 } else {
8425 fn = gen_helper_vfp_toshh;
8426 }
8427 break;
8428 case MO_32:
8429 if (is_u) {
8430 fn = gen_helper_vfp_touls;
8431 } else {
8432 fn = gen_helper_vfp_tosls;
8433 }
8434 break;
8435 default:
8436 g_assert_not_reached();
8437 }
8438
8439 for (pass = 0; pass < maxpass; pass++) {
8440 TCGv_i32 tcg_op = tcg_temp_new_i32();
8441
8442 read_vec_element_i32(s, tcg_op, rn, pass, size);
8443 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8444 if (is_scalar) {
8445 write_fp_sreg(s, rd, tcg_op);
8446 } else {
8447 write_vec_element_i32(s, tcg_op, rd, pass, size);
8448 }
8449 tcg_temp_free_i32(tcg_op);
8450 }
8451 if (!is_scalar) {
8452 clear_vec_high(s, is_q, rd);
8453 }
8454 }
8455
8456 tcg_temp_free_ptr(tcg_fpstatus);
8457 tcg_temp_free_i32(tcg_shift);
8458 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8459 tcg_temp_free_i32(tcg_rmode);
8460}
8461
8462/* AdvSIMD scalar shift by immediate
8463 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8464 * +-----+---+-------------+------+------+--------+---+------+------+
8465 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8466 * +-----+---+-------------+------+------+--------+---+------+------+
8467 *
8468 * This is the scalar version so it works on a fixed sized registers
8469 */
8470static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8471{
8472 int rd = extract32(insn, 0, 5);
8473 int rn = extract32(insn, 5, 5);
8474 int opcode = extract32(insn, 11, 5);
8475 int immb = extract32(insn, 16, 3);
8476 int immh = extract32(insn, 19, 4);
8477 bool is_u = extract32(insn, 29, 1);
8478
8479 if (immh == 0) {
8480 unallocated_encoding(s);
8481 return;
8482 }
8483
8484 switch (opcode) {
8485 case 0x08: /* SRI */
8486 if (!is_u) {
8487 unallocated_encoding(s);
8488 return;
8489 }
8490 /* fall through */
8491 case 0x00: /* SSHR / USHR */
8492 case 0x02: /* SSRA / USRA */
8493 case 0x04: /* SRSHR / URSHR */
8494 case 0x06: /* SRSRA / URSRA */
8495 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8496 break;
8497 case 0x0a: /* SHL / SLI */
8498 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8499 break;
8500 case 0x1c: /* SCVTF, UCVTF */
8501 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8502 opcode, rn, rd);
8503 break;
8504 case 0x10: /* SQSHRUN, SQSHRUN2 */
8505 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8506 if (!is_u) {
8507 unallocated_encoding(s);
8508 return;
8509 }
8510 handle_vec_simd_sqshrn(s, true, false, false, true,
8511 immh, immb, opcode, rn, rd);
8512 break;
8513 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8514 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8515 handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8516 immh, immb, opcode, rn, rd);
8517 break;
8518 case 0xc: /* SQSHLU */
8519 if (!is_u) {
8520 unallocated_encoding(s);
8521 return;
8522 }
8523 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8524 break;
8525 case 0xe: /* SQSHL, UQSHL */
8526 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8527 break;
8528 case 0x1f: /* FCVTZS, FCVTZU */
8529 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8530 break;
8531 default:
8532 unallocated_encoding(s);
8533 break;
8534 }
8535}
8536
8537/* AdvSIMD scalar three different
8538 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8539 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8540 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8541 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8542 */
8543static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8544{
8545 bool is_u = extract32(insn, 29, 1);
8546 int size = extract32(insn, 22, 2);
8547 int opcode = extract32(insn, 12, 4);
8548 int rm = extract32(insn, 16, 5);
8549 int rn = extract32(insn, 5, 5);
8550 int rd = extract32(insn, 0, 5);
8551
8552 if (is_u) {
8553 unallocated_encoding(s);
8554 return;
8555 }
8556
8557 switch (opcode) {
8558 case 0x9: /* SQDMLAL, SQDMLAL2 */
8559 case 0xb: /* SQDMLSL, SQDMLSL2 */
8560 case 0xd: /* SQDMULL, SQDMULL2 */
8561 if (size == 0 || size == 3) {
8562 unallocated_encoding(s);
8563 return;
8564 }
8565 break;
8566 default:
8567 unallocated_encoding(s);
8568 return;
8569 }
8570
8571 if (!fp_access_check(s)) {
8572 return;
8573 }
8574
8575 if (size == 2) {
8576 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8577 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8578 TCGv_i64 tcg_res = tcg_temp_new_i64();
8579
8580 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8581 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8582
8583 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8584 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8585
8586 switch (opcode) {
8587 case 0xd: /* SQDMULL, SQDMULL2 */
8588 break;
8589 case 0xb: /* SQDMLSL, SQDMLSL2 */
8590 tcg_gen_neg_i64(tcg_res, tcg_res);
8591 /* fall through */
8592 case 0x9: /* SQDMLAL, SQDMLAL2 */
8593 read_vec_element(s, tcg_op1, rd, 0, MO_64);
8594 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8595 tcg_res, tcg_op1);
8596 break;
8597 default:
8598 g_assert_not_reached();
8599 }
8600
8601 write_fp_dreg(s, rd, tcg_res);
8602
8603 tcg_temp_free_i64(tcg_op1);
8604 tcg_temp_free_i64(tcg_op2);
8605 tcg_temp_free_i64(tcg_res);
8606 } else {
8607 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8608 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8609 TCGv_i64 tcg_res = tcg_temp_new_i64();
8610
8611 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8612 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8613
8614 switch (opcode) {
8615 case 0xd: /* SQDMULL, SQDMULL2 */
8616 break;
8617 case 0xb: /* SQDMLSL, SQDMLSL2 */
8618 gen_helper_neon_negl_u32(tcg_res, tcg_res);
8619 /* fall through */
8620 case 0x9: /* SQDMLAL, SQDMLAL2 */
8621 {
8622 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8623 read_vec_element(s, tcg_op3, rd, 0, MO_32);
8624 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8625 tcg_res, tcg_op3);
8626 tcg_temp_free_i64(tcg_op3);
8627 break;
8628 }
8629 default:
8630 g_assert_not_reached();
8631 }
8632
8633 tcg_gen_ext32u_i64(tcg_res, tcg_res);
8634 write_fp_dreg(s, rd, tcg_res);
8635
8636 tcg_temp_free_i32(tcg_op1);
8637 tcg_temp_free_i32(tcg_op2);
8638 tcg_temp_free_i64(tcg_res);
8639 }
8640}
8641
8642static void handle_3same_64(DisasContext *s, int opcode, bool u,
8643 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8644{
8645 /* Handle 64x64->64 opcodes which are shared between the scalar
8646 * and vector 3-same groups. We cover every opcode where size == 3
8647 * is valid in either the three-reg-same (integer, not pairwise)
8648 * or scalar-three-reg-same groups.
8649 */
8650 TCGCond cond;
8651
8652 switch (opcode) {
8653 case 0x1: /* SQADD */
8654 if (u) {
8655 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8656 } else {
8657 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8658 }
8659 break;
8660 case 0x5: /* SQSUB */
8661 if (u) {
8662 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8663 } else {
8664 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8665 }
8666 break;
8667 case 0x6: /* CMGT, CMHI */
8668 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8669 * We implement this using setcond (test) and then negating.
8670 */
8671 cond = u ? TCG_COND_GTU : TCG_COND_GT;
8672 do_cmop:
8673 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8674 tcg_gen_neg_i64(tcg_rd, tcg_rd);
8675 break;
8676 case 0x7: /* CMGE, CMHS */
8677 cond = u ? TCG_COND_GEU : TCG_COND_GE;
8678 goto do_cmop;
8679 case 0x11: /* CMTST, CMEQ */
8680 if (u) {
8681 cond = TCG_COND_EQ;
8682 goto do_cmop;
8683 }
8684 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8685 break;
8686 case 0x8: /* SSHL, USHL */
8687 if (u) {
8688 gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8689 } else {
8690 gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8691 }
8692 break;
8693 case 0x9: /* SQSHL, UQSHL */
8694 if (u) {
8695 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8696 } else {
8697 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8698 }
8699 break;
8700 case 0xa: /* SRSHL, URSHL */
8701 if (u) {
8702 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8703 } else {
8704 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8705 }
8706 break;
8707 case 0xb: /* SQRSHL, UQRSHL */
8708 if (u) {
8709 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8710 } else {
8711 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8712 }
8713 break;
8714 case 0x10: /* ADD, SUB */
8715 if (u) {
8716 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8717 } else {
8718 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8719 }
8720 break;
8721 default:
8722 g_assert_not_reached();
8723 }
8724}
8725
8726/* Handle the 3-same-operands float operations; shared by the scalar
8727 * and vector encodings. The caller must filter out any encodings
8728 * not allocated for the encoding it is dealing with.
8729 */
8730static void handle_3same_float(DisasContext *s, int size, int elements,
8731 int fpopcode, int rd, int rn, int rm)
8732{
8733 int pass;
8734 TCGv_ptr fpst = get_fpstatus_ptr(false);
8735
8736 for (pass = 0; pass < elements; pass++) {
8737 if (size) {
8738 /* Double */
8739 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8740 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8741 TCGv_i64 tcg_res = tcg_temp_new_i64();
8742
8743 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8744 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8745
8746 switch (fpopcode) {
8747 case 0x39: /* FMLS */
8748 /* As usual for ARM, separate negation for fused multiply-add */
8749 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8750 /* fall through */
8751 case 0x19: /* FMLA */
8752 read_vec_element(s, tcg_res, rd, pass, MO_64);
8753 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8754 tcg_res, fpst);
8755 break;
8756 case 0x18: /* FMAXNM */
8757 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8758 break;
8759 case 0x1a: /* FADD */
8760 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8761 break;
8762 case 0x1b: /* FMULX */
8763 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8764 break;
8765 case 0x1c: /* FCMEQ */
8766 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8767 break;
8768 case 0x1e: /* FMAX */
8769 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8770 break;
8771 case 0x1f: /* FRECPS */
8772 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8773 break;
8774 case 0x38: /* FMINNM */
8775 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8776 break;
8777 case 0x3a: /* FSUB */
8778 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8779 break;
8780 case 0x3e: /* FMIN */
8781 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8782 break;
8783 case 0x3f: /* FRSQRTS */
8784 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8785 break;
8786 case 0x5b: /* FMUL */
8787 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8788 break;
8789 case 0x5c: /* FCMGE */
8790 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8791 break;
8792 case 0x5d: /* FACGE */
8793 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8794 break;
8795 case 0x5f: /* FDIV */
8796 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8797 break;
8798 case 0x7a: /* FABD */
8799 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8800 gen_helper_vfp_absd(tcg_res, tcg_res);
8801 break;
8802 case 0x7c: /* FCMGT */
8803 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8804 break;
8805 case 0x7d: /* FACGT */
8806 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8807 break;
8808 default:
8809 g_assert_not_reached();
8810 }
8811
8812 write_vec_element(s, tcg_res, rd, pass, MO_64);
8813
8814 tcg_temp_free_i64(tcg_res);
8815 tcg_temp_free_i64(tcg_op1);
8816 tcg_temp_free_i64(tcg_op2);
8817 } else {
8818 /* Single */
8819 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8820 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8821 TCGv_i32 tcg_res = tcg_temp_new_i32();
8822
8823 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8824 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8825
8826 switch (fpopcode) {
8827 case 0x39: /* FMLS */
8828 /* As usual for ARM, separate negation for fused multiply-add */
8829 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8830 /* fall through */
8831 case 0x19: /* FMLA */
8832 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8833 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8834 tcg_res, fpst);
8835 break;
8836 case 0x1a: /* FADD */
8837 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8838 break;
8839 case 0x1b: /* FMULX */
8840 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8841 break;
8842 case 0x1c: /* FCMEQ */
8843 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8844 break;
8845 case 0x1e: /* FMAX */
8846 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8847 break;
8848 case 0x1f: /* FRECPS */
8849 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8850 break;
8851 case 0x18: /* FMAXNM */
8852 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8853 break;
8854 case 0x38: /* FMINNM */
8855 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8856 break;
8857 case 0x3a: /* FSUB */
8858 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8859 break;
8860 case 0x3e: /* FMIN */
8861 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8862 break;
8863 case 0x3f: /* FRSQRTS */
8864 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8865 break;
8866 case 0x5b: /* FMUL */
8867 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8868 break;
8869 case 0x5c: /* FCMGE */
8870 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8871 break;
8872 case 0x5d: /* FACGE */
8873 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8874 break;
8875 case 0x5f: /* FDIV */
8876 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8877 break;
8878 case 0x7a: /* FABD */
8879 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8880 gen_helper_vfp_abss(tcg_res, tcg_res);
8881 break;
8882 case 0x7c: /* FCMGT */
8883 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8884 break;
8885 case 0x7d: /* FACGT */
8886 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8887 break;
8888 default:
8889 g_assert_not_reached();
8890 }
8891
8892 if (elements == 1) {
8893 /* scalar single so clear high part */
8894 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8895
8896 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8897 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8898 tcg_temp_free_i64(tcg_tmp);
8899 } else {
8900 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8901 }
8902
8903 tcg_temp_free_i32(tcg_res);
8904 tcg_temp_free_i32(tcg_op1);
8905 tcg_temp_free_i32(tcg_op2);
8906 }
8907 }
8908
8909 tcg_temp_free_ptr(fpst);
8910
8911 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8912}
8913
8914/* AdvSIMD scalar three same
8915 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
8916 * +-----+---+-----------+------+---+------+--------+---+------+------+
8917 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
8918 * +-----+---+-----------+------+---+------+--------+---+------+------+
8919 */
8920static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8921{
8922 int rd = extract32(insn, 0, 5);
8923 int rn = extract32(insn, 5, 5);
8924 int opcode = extract32(insn, 11, 5);
8925 int rm = extract32(insn, 16, 5);
8926 int size = extract32(insn, 22, 2);
8927 bool u = extract32(insn, 29, 1);
8928 TCGv_i64 tcg_rd;
8929
8930 if (opcode >= 0x18) {
8931 /* Floating point: U, size[1] and opcode indicate operation */
8932 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8933 switch (fpopcode) {
8934 case 0x1b: /* FMULX */
8935 case 0x1f: /* FRECPS */
8936 case 0x3f: /* FRSQRTS */
8937 case 0x5d: /* FACGE */
8938 case 0x7d: /* FACGT */
8939 case 0x1c: /* FCMEQ */
8940 case 0x5c: /* FCMGE */
8941 case 0x7c: /* FCMGT */
8942 case 0x7a: /* FABD */
8943 break;
8944 default:
8945 unallocated_encoding(s);
8946 return;
8947 }
8948
8949 if (!fp_access_check(s)) {
8950 return;
8951 }
8952
8953 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8954 return;
8955 }
8956
8957 switch (opcode) {
8958 case 0x1: /* SQADD, UQADD */
8959 case 0x5: /* SQSUB, UQSUB */
8960 case 0x9: /* SQSHL, UQSHL */
8961 case 0xb: /* SQRSHL, UQRSHL */
8962 break;
8963 case 0x8: /* SSHL, USHL */
8964 case 0xa: /* SRSHL, URSHL */
8965 case 0x6: /* CMGT, CMHI */
8966 case 0x7: /* CMGE, CMHS */
8967 case 0x11: /* CMTST, CMEQ */
8968 case 0x10: /* ADD, SUB (vector) */
8969 if (size != 3) {
8970 unallocated_encoding(s);
8971 return;
8972 }
8973 break;
8974 case 0x16: /* SQDMULH, SQRDMULH (vector) */
8975 if (size != 1 && size != 2) {
8976 unallocated_encoding(s);
8977 return;
8978 }
8979 break;
8980 default:
8981 unallocated_encoding(s);
8982 return;
8983 }
8984
8985 if (!fp_access_check(s)) {
8986 return;
8987 }
8988
8989 tcg_rd = tcg_temp_new_i64();
8990
8991 if (size == 3) {
8992 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8993 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8994
8995 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8996 tcg_temp_free_i64(tcg_rn);
8997 tcg_temp_free_i64(tcg_rm);
8998 } else {
8999 /* Do a single operation on the lowest element in the vector.
9000 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9001 * no side effects for all these operations.
9002 * OPTME: special-purpose helpers would avoid doing some
9003 * unnecessary work in the helper for the 8 and 16 bit cases.
9004 */
9005 NeonGenTwoOpEnvFn *genenvfn;
9006 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9007 TCGv_i32 tcg_rm = tcg_temp_new_i32();
9008 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9009
9010 read_vec_element_i32(s, tcg_rn, rn, 0, size);
9011 read_vec_element_i32(s, tcg_rm, rm, 0, size);
9012
9013 switch (opcode) {
9014 case 0x1: /* SQADD, UQADD */
9015 {
9016 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9017 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9018 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9019 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9020 };
9021 genenvfn = fns[size][u];
9022 break;
9023 }
9024 case 0x5: /* SQSUB, UQSUB */
9025 {
9026 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9027 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9028 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9029 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9030 };
9031 genenvfn = fns[size][u];
9032 break;
9033 }
9034 case 0x9: /* SQSHL, UQSHL */
9035 {
9036 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9037 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9038 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9039 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9040 };
9041 genenvfn = fns[size][u];
9042 break;
9043 }
9044 case 0xb: /* SQRSHL, UQRSHL */
9045 {
9046 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9047 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9048 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9049 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9050 };
9051 genenvfn = fns[size][u];
9052 break;
9053 }
9054 case 0x16: /* SQDMULH, SQRDMULH */
9055 {
9056 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9057 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9058 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9059 };
9060 assert(size == 1 || size == 2);
9061 genenvfn = fns[size - 1][u];
9062 break;
9063 }
9064 default:
9065 g_assert_not_reached();
9066 }
9067
9068 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9069 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9070 tcg_temp_free_i32(tcg_rd32);
9071 tcg_temp_free_i32(tcg_rn);
9072 tcg_temp_free_i32(tcg_rm);
9073 }
9074
9075 write_fp_dreg(s, rd, tcg_rd);
9076
9077 tcg_temp_free_i64(tcg_rd);
9078}
9079
9080/* AdvSIMD scalar three same FP16
9081 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
9082 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9083 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
9084 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9085 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9086 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9087 */
9088static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9089 uint32_t insn)
9090{
9091 int rd = extract32(insn, 0, 5);
9092 int rn = extract32(insn, 5, 5);
9093 int opcode = extract32(insn, 11, 3);
9094 int rm = extract32(insn, 16, 5);
9095 bool u = extract32(insn, 29, 1);
9096 bool a = extract32(insn, 23, 1);
9097 int fpopcode = opcode | (a << 3) | (u << 4);
9098 TCGv_ptr fpst;
9099 TCGv_i32 tcg_op1;
9100 TCGv_i32 tcg_op2;
9101 TCGv_i32 tcg_res;
9102
9103 switch (fpopcode) {
9104 case 0x03: /* FMULX */
9105 case 0x04: /* FCMEQ (reg) */
9106 case 0x07: /* FRECPS */
9107 case 0x0f: /* FRSQRTS */
9108 case 0x14: /* FCMGE (reg) */
9109 case 0x15: /* FACGE */
9110 case 0x1a: /* FABD */
9111 case 0x1c: /* FCMGT (reg) */
9112 case 0x1d: /* FACGT */
9113 break;
9114 default:
9115 unallocated_encoding(s);
9116 return;
9117 }
9118
9119 if (!dc_isar_feature(aa64_fp16, s)) {
9120 unallocated_encoding(s);
9121 }
9122
9123 if (!fp_access_check(s)) {
9124 return;
9125 }
9126
9127 fpst = get_fpstatus_ptr(true);
9128
9129 tcg_op1 = read_fp_hreg(s, rn);
9130 tcg_op2 = read_fp_hreg(s, rm);
9131 tcg_res = tcg_temp_new_i32();
9132
9133 switch (fpopcode) {
9134 case 0x03: /* FMULX */
9135 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9136 break;
9137 case 0x04: /* FCMEQ (reg) */
9138 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9139 break;
9140 case 0x07: /* FRECPS */
9141 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9142 break;
9143 case 0x0f: /* FRSQRTS */
9144 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9145 break;
9146 case 0x14: /* FCMGE (reg) */
9147 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9148 break;
9149 case 0x15: /* FACGE */
9150 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9151 break;
9152 case 0x1a: /* FABD */
9153 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9154 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9155 break;
9156 case 0x1c: /* FCMGT (reg) */
9157 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9158 break;
9159 case 0x1d: /* FACGT */
9160 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9161 break;
9162 default:
9163 g_assert_not_reached();
9164 }
9165
9166 write_fp_sreg(s, rd, tcg_res);
9167
9168
9169 tcg_temp_free_i32(tcg_res);
9170 tcg_temp_free_i32(tcg_op1);
9171 tcg_temp_free_i32(tcg_op2);
9172 tcg_temp_free_ptr(fpst);
9173}
9174
9175/* AdvSIMD scalar three same extra
9176 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
9177 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9178 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
9179 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9180 */
9181static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9182 uint32_t insn)
9183{
9184 int rd = extract32(insn, 0, 5);
9185 int rn = extract32(insn, 5, 5);
9186 int opcode = extract32(insn, 11, 4);
9187 int rm = extract32(insn, 16, 5);
9188 int size = extract32(insn, 22, 2);
9189 bool u = extract32(insn, 29, 1);
9190 TCGv_i32 ele1, ele2, ele3;
9191 TCGv_i64 res;
9192 bool feature;
9193
9194 switch (u * 16 + opcode) {
9195 case 0x10: /* SQRDMLAH (vector) */
9196 case 0x11: /* SQRDMLSH (vector) */
9197 if (size != 1 && size != 2) {
9198 unallocated_encoding(s);
9199 return;
9200 }
9201 feature = dc_isar_feature(aa64_rdm, s);
9202 break;
9203 default:
9204 unallocated_encoding(s);
9205 return;
9206 }
9207 if (!feature) {
9208 unallocated_encoding(s);
9209 return;
9210 }
9211 if (!fp_access_check(s)) {
9212 return;
9213 }
9214
9215 /* Do a single operation on the lowest element in the vector.
9216 * We use the standard Neon helpers and rely on 0 OP 0 == 0
9217 * with no side effects for all these operations.
9218 * OPTME: special-purpose helpers would avoid doing some
9219 * unnecessary work in the helper for the 16 bit cases.
9220 */
9221 ele1 = tcg_temp_new_i32();
9222 ele2 = tcg_temp_new_i32();
9223 ele3 = tcg_temp_new_i32();
9224
9225 read_vec_element_i32(s, ele1, rn, 0, size);
9226 read_vec_element_i32(s, ele2, rm, 0, size);
9227 read_vec_element_i32(s, ele3, rd, 0, size);
9228
9229 switch (opcode) {
9230 case 0x0: /* SQRDMLAH */
9231 if (size == 1) {
9232 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9233 } else {
9234 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9235 }
9236 break;
9237 case 0x1: /* SQRDMLSH */
9238 if (size == 1) {
9239 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9240 } else {
9241 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9242 }
9243 break;
9244 default:
9245 g_assert_not_reached();
9246 }
9247 tcg_temp_free_i32(ele1);
9248 tcg_temp_free_i32(ele2);
9249
9250 res = tcg_temp_new_i64();
9251 tcg_gen_extu_i32_i64(res, ele3);
9252 tcg_temp_free_i32(ele3);
9253
9254 write_fp_dreg(s, rd, res);
9255 tcg_temp_free_i64(res);
9256}
9257
9258static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9259 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9260 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9261{
9262 /* Handle 64->64 opcodes which are shared between the scalar and
9263 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9264 * is valid in either group and also the double-precision fp ops.
9265 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9266 * requires them.
9267 */
9268 TCGCond cond;
9269
9270 switch (opcode) {
9271 case 0x4: /* CLS, CLZ */
9272 if (u) {
9273 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9274 } else {
9275 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9276 }
9277 break;
9278 case 0x5: /* NOT */
9279 /* This opcode is shared with CNT and RBIT but we have earlier
9280 * enforced that size == 3 if and only if this is the NOT insn.
9281 */
9282 tcg_gen_not_i64(tcg_rd, tcg_rn);
9283 break;
9284 case 0x7: /* SQABS, SQNEG */
9285 if (u) {
9286 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9287 } else {
9288 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9289 }
9290 break;
9291 case 0xa: /* CMLT */
9292 /* 64 bit integer comparison against zero, result is
9293 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9294 * subtracting 1.
9295 */
9296 cond = TCG_COND_LT;
9297 do_cmop:
9298 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9299 tcg_gen_neg_i64(tcg_rd, tcg_rd);
9300 break;
9301 case 0x8: /* CMGT, CMGE */
9302 cond = u ? TCG_COND_GE : TCG_COND_GT;
9303 goto do_cmop;
9304 case 0x9: /* CMEQ, CMLE */
9305 cond = u ? TCG_COND_LE : TCG_COND_EQ;
9306 goto do_cmop;
9307 case 0xb: /* ABS, NEG */
9308 if (u) {
9309 tcg_gen_neg_i64(tcg_rd, tcg_rn);
9310 } else {
9311 tcg_gen_abs_i64(tcg_rd, tcg_rn);
9312 }
9313 break;
9314 case 0x2f: /* FABS */
9315 gen_helper_vfp_absd(tcg_rd, tcg_rn);
9316 break;
9317 case 0x6f: /* FNEG */
9318 gen_helper_vfp_negd(tcg_rd, tcg_rn);
9319 break;
9320 case 0x7f: /* FSQRT */
9321 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9322 break;
9323 case 0x1a: /* FCVTNS */
9324 case 0x1b: /* FCVTMS */
9325 case 0x1c: /* FCVTAS */
9326 case 0x3a: /* FCVTPS */
9327 case 0x3b: /* FCVTZS */
9328 {
9329 TCGv_i32 tcg_shift = tcg_const_i32(0);
9330 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9331 tcg_temp_free_i32(tcg_shift);
9332 break;
9333 }
9334 case 0x5a: /* FCVTNU */
9335 case 0x5b: /* FCVTMU */
9336 case 0x5c: /* FCVTAU */
9337 case 0x7a: /* FCVTPU */
9338 case 0x7b: /* FCVTZU */
9339 {
9340 TCGv_i32 tcg_shift = tcg_const_i32(0);
9341 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9342 tcg_temp_free_i32(tcg_shift);
9343 break;
9344 }
9345 case 0x18: /* FRINTN */
9346 case 0x19: /* FRINTM */
9347 case 0x38: /* FRINTP */
9348 case 0x39: /* FRINTZ */
9349 case 0x58: /* FRINTA */
9350 case 0x79: /* FRINTI */
9351 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9352 break;
9353 case 0x59: /* FRINTX */
9354 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9355 break;
9356 case 0x1e: /* FRINT32Z */
9357 case 0x5e: /* FRINT32X */
9358 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9359 break;
9360 case 0x1f: /* FRINT64Z */
9361 case 0x5f: /* FRINT64X */
9362 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9363 break;
9364 default:
9365 g_assert_not_reached();
9366 }
9367}
9368
9369static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9370 bool is_scalar, bool is_u, bool is_q,
9371 int size, int rn, int rd)
9372{
9373 bool is_double = (size == MO_64);
9374 TCGv_ptr fpst;
9375
9376 if (!fp_access_check(s)) {
9377 return;
9378 }
9379
9380 fpst = get_fpstatus_ptr(size == MO_16);
9381
9382 if (is_double) {
9383 TCGv_i64 tcg_op = tcg_temp_new_i64();
9384 TCGv_i64 tcg_zero = tcg_const_i64(0);
9385 TCGv_i64 tcg_res = tcg_temp_new_i64();
9386 NeonGenTwoDoubleOPFn *genfn;
9387 bool swap = false;
9388 int pass;
9389
9390 switch (opcode) {
9391 case 0x2e: /* FCMLT (zero) */
9392 swap = true;
9393 /* fallthrough */
9394 case 0x2c: /* FCMGT (zero) */
9395 genfn = gen_helper_neon_cgt_f64;
9396 break;
9397 case 0x2d: /* FCMEQ (zero) */
9398 genfn = gen_helper_neon_ceq_f64;
9399 break;
9400 case 0x6d: /* FCMLE (zero) */
9401 swap = true;
9402 /* fall through */
9403 case 0x6c: /* FCMGE (zero) */
9404 genfn = gen_helper_neon_cge_f64;
9405 break;
9406 default:
9407 g_assert_not_reached();
9408 }
9409
9410 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9411 read_vec_element(s, tcg_op, rn, pass, MO_64);
9412 if (swap) {
9413 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9414 } else {
9415 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9416 }
9417 write_vec_element(s, tcg_res, rd, pass, MO_64);
9418 }
9419 tcg_temp_free_i64(tcg_res);
9420 tcg_temp_free_i64(tcg_zero);
9421 tcg_temp_free_i64(tcg_op);
9422
9423 clear_vec_high(s, !is_scalar, rd);
9424 } else {
9425 TCGv_i32 tcg_op = tcg_temp_new_i32();
9426 TCGv_i32 tcg_zero = tcg_const_i32(0);
9427 TCGv_i32 tcg_res = tcg_temp_new_i32();
9428 NeonGenTwoSingleOPFn *genfn;
9429 bool swap = false;
9430 int pass, maxpasses;
9431
9432 if (size == MO_16) {
9433 switch (opcode) {
9434 case 0x2e: /* FCMLT (zero) */
9435 swap = true;
9436 /* fall through */
9437 case 0x2c: /* FCMGT (zero) */
9438 genfn = gen_helper_advsimd_cgt_f16;
9439 break;
9440 case 0x2d: /* FCMEQ (zero) */
9441 genfn = gen_helper_advsimd_ceq_f16;
9442 break;
9443 case 0x6d: /* FCMLE (zero) */
9444 swap = true;
9445 /* fall through */
9446 case 0x6c: /* FCMGE (zero) */
9447 genfn = gen_helper_advsimd_cge_f16;
9448 break;
9449 default:
9450 g_assert_not_reached();
9451 }
9452 } else {
9453 switch (opcode) {
9454 case 0x2e: /* FCMLT (zero) */
9455 swap = true;
9456 /* fall through */
9457 case 0x2c: /* FCMGT (zero) */
9458 genfn = gen_helper_neon_cgt_f32;
9459 break;
9460 case 0x2d: /* FCMEQ (zero) */
9461 genfn = gen_helper_neon_ceq_f32;
9462 break;
9463 case 0x6d: /* FCMLE (zero) */
9464 swap = true;
9465 /* fall through */
9466 case 0x6c: /* FCMGE (zero) */
9467 genfn = gen_helper_neon_cge_f32;
9468 break;
9469 default:
9470 g_assert_not_reached();
9471 }
9472 }
9473
9474 if (is_scalar) {
9475 maxpasses = 1;
9476 } else {
9477 int vector_size = 8 << is_q;
9478 maxpasses = vector_size >> size;
9479 }
9480
9481 for (pass = 0; pass < maxpasses; pass++) {
9482 read_vec_element_i32(s, tcg_op, rn, pass, size);
9483 if (swap) {
9484 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9485 } else {
9486 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9487 }
9488 if (is_scalar) {
9489 write_fp_sreg(s, rd, tcg_res);
9490 } else {
9491 write_vec_element_i32(s, tcg_res, rd, pass, size);
9492 }
9493 }
9494 tcg_temp_free_i32(tcg_res);
9495 tcg_temp_free_i32(tcg_zero);
9496 tcg_temp_free_i32(tcg_op);
9497 if (!is_scalar) {
9498 clear_vec_high(s, is_q, rd);
9499 }
9500 }
9501
9502 tcg_temp_free_ptr(fpst);
9503}
9504
9505static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9506 bool is_scalar, bool is_u, bool is_q,
9507 int size, int rn, int rd)
9508{
9509 bool is_double = (size == 3);
9510 TCGv_ptr fpst = get_fpstatus_ptr(false);
9511
9512 if (is_double) {
9513 TCGv_i64 tcg_op = tcg_temp_new_i64();
9514 TCGv_i64 tcg_res = tcg_temp_new_i64();
9515 int pass;
9516
9517 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9518 read_vec_element(s, tcg_op, rn, pass, MO_64);
9519 switch (opcode) {
9520 case 0x3d: /* FRECPE */
9521 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9522 break;
9523 case 0x3f: /* FRECPX */
9524 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9525 break;
9526 case 0x7d: /* FRSQRTE */
9527 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9528 break;
9529 default:
9530 g_assert_not_reached();
9531 }
9532 write_vec_element(s, tcg_res, rd, pass, MO_64);
9533 }
9534 tcg_temp_free_i64(tcg_res);
9535 tcg_temp_free_i64(tcg_op);
9536 clear_vec_high(s, !is_scalar, rd);
9537 } else {
9538 TCGv_i32 tcg_op = tcg_temp_new_i32();
9539 TCGv_i32 tcg_res = tcg_temp_new_i32();
9540 int pass, maxpasses;
9541
9542 if (is_scalar) {
9543 maxpasses = 1;
9544 } else {
9545 maxpasses = is_q ? 4 : 2;
9546 }
9547
9548 for (pass = 0; pass < maxpasses; pass++) {
9549 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9550
9551 switch (opcode) {
9552 case 0x3c: /* URECPE */
9553 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
9554 break;
9555 case 0x3d: /* FRECPE */
9556 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9557 break;
9558 case 0x3f: /* FRECPX */
9559 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9560 break;
9561 case 0x7d: /* FRSQRTE */
9562 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9563 break;
9564 default:
9565 g_assert_not_reached();
9566 }
9567
9568 if (is_scalar) {
9569 write_fp_sreg(s, rd, tcg_res);
9570 } else {
9571 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9572 }
9573 }
9574 tcg_temp_free_i32(tcg_res);
9575 tcg_temp_free_i32(tcg_op);
9576 if (!is_scalar) {
9577 clear_vec_high(s, is_q, rd);
9578 }
9579 }
9580 tcg_temp_free_ptr(fpst);
9581}
9582
9583static void handle_2misc_narrow(DisasContext *s, bool scalar,
9584 int opcode, bool u, bool is_q,
9585 int size, int rn, int rd)
9586{
9587 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9588 * in the source becomes a size element in the destination).
9589 */
9590 int pass;
9591 TCGv_i32 tcg_res[2];
9592 int destelt = is_q ? 2 : 0;
9593 int passes = scalar ? 1 : 2;
9594
9595 if (scalar) {
9596 tcg_res[1] = tcg_const_i32(0);
9597 }
9598
9599 for (pass = 0; pass < passes; pass++) {
9600 TCGv_i64 tcg_op = tcg_temp_new_i64();
9601 NeonGenNarrowFn *genfn = NULL;
9602 NeonGenNarrowEnvFn *genenvfn = NULL;
9603
9604 if (scalar) {
9605 read_vec_element(s, tcg_op, rn, pass, size + 1);
9606 } else {
9607 read_vec_element(s, tcg_op, rn, pass, MO_64);
9608 }
9609 tcg_res[pass] = tcg_temp_new_i32();
9610
9611 switch (opcode) {
9612 case 0x12: /* XTN, SQXTUN */
9613 {
9614 static NeonGenNarrowFn * const xtnfns[3] = {
9615 gen_helper_neon_narrow_u8,
9616 gen_helper_neon_narrow_u16,
9617 tcg_gen_extrl_i64_i32,
9618 };
9619 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9620 gen_helper_neon_unarrow_sat8,
9621 gen_helper_neon_unarrow_sat16,
9622 gen_helper_neon_unarrow_sat32,
9623 };
9624 if (u) {
9625 genenvfn = sqxtunfns[size];
9626 } else {
9627 genfn = xtnfns[size];
9628 }
9629 break;
9630 }
9631 case 0x14: /* SQXTN, UQXTN */
9632 {
9633 static NeonGenNarrowEnvFn * const fns[3][2] = {
9634 { gen_helper_neon_narrow_sat_s8,
9635 gen_helper_neon_narrow_sat_u8 },
9636 { gen_helper_neon_narrow_sat_s16,
9637 gen_helper_neon_narrow_sat_u16 },
9638 { gen_helper_neon_narrow_sat_s32,
9639 gen_helper_neon_narrow_sat_u32 },
9640 };
9641 genenvfn = fns[size][u];
9642 break;
9643 }
9644 case 0x16: /* FCVTN, FCVTN2 */
9645 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9646 if (size == 2) {
9647 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9648 } else {
9649 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9650 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9651 TCGv_ptr fpst = get_fpstatus_ptr(false);
9652 TCGv_i32 ahp = get_ahp_flag();
9653
9654 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9655 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9656 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9657 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9658 tcg_temp_free_i32(tcg_lo);
9659 tcg_temp_free_i32(tcg_hi);
9660 tcg_temp_free_ptr(fpst);
9661 tcg_temp_free_i32(ahp);
9662 }
9663 break;
9664 case 0x56: /* FCVTXN, FCVTXN2 */
9665 /* 64 bit to 32 bit float conversion
9666 * with von Neumann rounding (round to odd)
9667 */
9668 assert(size == 2);
9669 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9670 break;
9671 default:
9672 g_assert_not_reached();
9673 }
9674
9675 if (genfn) {
9676 genfn(tcg_res[pass], tcg_op);
9677 } else if (genenvfn) {
9678 genenvfn(tcg_res[pass], cpu_env, tcg_op);
9679 }
9680
9681 tcg_temp_free_i64(tcg_op);
9682 }
9683
9684 for (pass = 0; pass < 2; pass++) {
9685 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9686 tcg_temp_free_i32(tcg_res[pass]);
9687 }
9688 clear_vec_high(s, is_q, rd);
9689}
9690
9691/* Remaining saturating accumulating ops */
9692static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9693 bool is_q, int size, int rn, int rd)
9694{
9695 bool is_double = (size == 3);
9696
9697 if (is_double) {
9698 TCGv_i64 tcg_rn = tcg_temp_new_i64();
9699 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9700 int pass;
9701
9702 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9703 read_vec_element(s, tcg_rn, rn, pass, MO_64);
9704 read_vec_element(s, tcg_rd, rd, pass, MO_64);
9705
9706 if (is_u) { /* USQADD */
9707 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9708 } else { /* SUQADD */
9709 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9710 }
9711 write_vec_element(s, tcg_rd, rd, pass, MO_64);
9712 }
9713 tcg_temp_free_i64(tcg_rd);
9714 tcg_temp_free_i64(tcg_rn);
9715 clear_vec_high(s, !is_scalar, rd);
9716 } else {
9717 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9718 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9719 int pass, maxpasses;
9720
9721 if (is_scalar) {
9722 maxpasses = 1;
9723 } else {
9724 maxpasses = is_q ? 4 : 2;
9725 }
9726
9727 for (pass = 0; pass < maxpasses; pass++) {
9728 if (is_scalar) {
9729 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9730 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9731 } else {
9732 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9733 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9734 }
9735
9736 if (is_u) { /* USQADD */
9737 switch (size) {
9738 case 0:
9739 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9740 break;
9741 case 1:
9742 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9743 break;
9744 case 2:
9745 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9746 break;
9747 default:
9748 g_assert_not_reached();
9749 }
9750 } else { /* SUQADD */
9751 switch (size) {
9752 case 0:
9753 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9754 break;
9755 case 1:
9756 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9757 break;
9758 case 2:
9759 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9760 break;
9761 default:
9762 g_assert_not_reached();
9763 }
9764 }
9765
9766 if (is_scalar) {
9767 TCGv_i64 tcg_zero = tcg_const_i64(0);
9768 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9769 tcg_temp_free_i64(tcg_zero);
9770 }
9771 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9772 }
9773 tcg_temp_free_i32(tcg_rd);
9774 tcg_temp_free_i32(tcg_rn);
9775 clear_vec_high(s, is_q, rd);
9776 }
9777}
9778
9779/* AdvSIMD scalar two reg misc
9780 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9781 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9782 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9783 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9784 */
9785static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9786{
9787 int rd = extract32(insn, 0, 5);
9788 int rn = extract32(insn, 5, 5);
9789 int opcode = extract32(insn, 12, 5);
9790 int size = extract32(insn, 22, 2);
9791 bool u = extract32(insn, 29, 1);
9792 bool is_fcvt = false;
9793 int rmode;
9794 TCGv_i32 tcg_rmode;
9795 TCGv_ptr tcg_fpstatus;
9796
9797 switch (opcode) {
9798 case 0x3: /* USQADD / SUQADD*/
9799 if (!fp_access_check(s)) {
9800 return;
9801 }
9802 handle_2misc_satacc(s, true, u, false, size, rn, rd);
9803 return;
9804 case 0x7: /* SQABS / SQNEG */
9805 break;
9806 case 0xa: /* CMLT */
9807 if (u) {
9808 unallocated_encoding(s);
9809 return;
9810 }
9811 /* fall through */
9812 case 0x8: /* CMGT, CMGE */
9813 case 0x9: /* CMEQ, CMLE */
9814 case 0xb: /* ABS, NEG */
9815 if (size != 3) {
9816 unallocated_encoding(s);
9817 return;
9818 }
9819 break;
9820 case 0x12: /* SQXTUN */
9821 if (!u) {
9822 unallocated_encoding(s);
9823 return;
9824 }
9825 /* fall through */
9826 case 0x14: /* SQXTN, UQXTN */
9827 if (size == 3) {
9828 unallocated_encoding(s);
9829 return;
9830 }
9831 if (!fp_access_check(s)) {
9832 return;
9833 }
9834 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9835 return;
9836 case 0xc ... 0xf:
9837 case 0x16 ... 0x1d:
9838 case 0x1f:
9839 /* Floating point: U, size[1] and opcode indicate operation;
9840 * size[0] indicates single or double precision.
9841 */
9842 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9843 size = extract32(size, 0, 1) ? 3 : 2;
9844 switch (opcode) {
9845 case 0x2c: /* FCMGT (zero) */
9846 case 0x2d: /* FCMEQ (zero) */
9847 case 0x2e: /* FCMLT (zero) */
9848 case 0x6c: /* FCMGE (zero) */
9849 case 0x6d: /* FCMLE (zero) */
9850 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9851 return;
9852 case 0x1d: /* SCVTF */
9853 case 0x5d: /* UCVTF */
9854 {
9855 bool is_signed = (opcode == 0x1d);
9856 if (!fp_access_check(s)) {
9857 return;
9858 }
9859 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9860 return;
9861 }
9862 case 0x3d: /* FRECPE */
9863 case 0x3f: /* FRECPX */
9864 case 0x7d: /* FRSQRTE */
9865 if (!fp_access_check(s)) {
9866 return;
9867 }
9868 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9869 return;
9870 case 0x1a: /* FCVTNS */
9871 case 0x1b: /* FCVTMS */
9872 case 0x3a: /* FCVTPS */
9873 case 0x3b: /* FCVTZS */
9874 case 0x5a: /* FCVTNU */
9875 case 0x5b: /* FCVTMU */
9876 case 0x7a: /* FCVTPU */
9877 case 0x7b: /* FCVTZU */
9878 is_fcvt = true;
9879 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9880 break;
9881 case 0x1c: /* FCVTAS */
9882 case 0x5c: /* FCVTAU */
9883 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9884 is_fcvt = true;
9885 rmode = FPROUNDING_TIEAWAY;
9886 break;
9887 case 0x56: /* FCVTXN, FCVTXN2 */
9888 if (size == 2) {
9889 unallocated_encoding(s);
9890 return;
9891 }
9892 if (!fp_access_check(s)) {
9893 return;
9894 }
9895 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9896 return;
9897 default:
9898 unallocated_encoding(s);
9899 return;
9900 }
9901 break;
9902 default:
9903 unallocated_encoding(s);
9904 return;
9905 }
9906
9907 if (!fp_access_check(s)) {
9908 return;
9909 }
9910
9911 if (is_fcvt) {
9912 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9913 tcg_fpstatus = get_fpstatus_ptr(false);
9914 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9915 } else {
9916 tcg_rmode = NULL;
9917 tcg_fpstatus = NULL;
9918 }
9919
9920 if (size == 3) {
9921 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9922 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9923
9924 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9925 write_fp_dreg(s, rd, tcg_rd);
9926 tcg_temp_free_i64(tcg_rd);
9927 tcg_temp_free_i64(tcg_rn);
9928 } else {
9929 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9930 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9931
9932 read_vec_element_i32(s, tcg_rn, rn, 0, size);
9933
9934 switch (opcode) {
9935 case 0x7: /* SQABS, SQNEG */
9936 {
9937 NeonGenOneOpEnvFn *genfn;
9938 static NeonGenOneOpEnvFn * const fns[3][2] = {
9939 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9940 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9941 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9942 };
9943 genfn = fns[size][u];
9944 genfn(tcg_rd, cpu_env, tcg_rn);
9945 break;
9946 }
9947 case 0x1a: /* FCVTNS */
9948 case 0x1b: /* FCVTMS */
9949 case 0x1c: /* FCVTAS */
9950 case 0x3a: /* FCVTPS */
9951 case 0x3b: /* FCVTZS */
9952 {
9953 TCGv_i32 tcg_shift = tcg_const_i32(0);
9954 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9955 tcg_temp_free_i32(tcg_shift);
9956 break;
9957 }
9958 case 0x5a: /* FCVTNU */
9959 case 0x5b: /* FCVTMU */
9960 case 0x5c: /* FCVTAU */
9961 case 0x7a: /* FCVTPU */
9962 case 0x7b: /* FCVTZU */
9963 {
9964 TCGv_i32 tcg_shift = tcg_const_i32(0);
9965 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9966 tcg_temp_free_i32(tcg_shift);
9967 break;
9968 }
9969 default:
9970 g_assert_not_reached();
9971 }
9972
9973 write_fp_sreg(s, rd, tcg_rd);
9974 tcg_temp_free_i32(tcg_rd);
9975 tcg_temp_free_i32(tcg_rn);
9976 }
9977
9978 if (is_fcvt) {
9979 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9980 tcg_temp_free_i32(tcg_rmode);
9981 tcg_temp_free_ptr(tcg_fpstatus);
9982 }
9983}
9984
9985/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9986static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9987 int immh, int immb, int opcode, int rn, int rd)
9988{
9989 int size = 32 - clz32(immh) - 1;
9990 int immhb = immh << 3 | immb;
9991 int shift = 2 * (8 << size) - immhb;
9992 bool accumulate = false;
9993 int dsize = is_q ? 128 : 64;
9994 int esize = 8 << size;
9995 int elements = dsize/esize;
9996 MemOp memop = size | (is_u ? 0 : MO_SIGN);
9997 TCGv_i64 tcg_rn = new_tmp_a64(s);
9998 TCGv_i64 tcg_rd = new_tmp_a64(s);
9999 TCGv_i64 tcg_round;
10000 uint64_t round_const;
10001 int i;
10002
10003 if (extract32(immh, 3, 1) && !is_q) {
10004 unallocated_encoding(s);
10005 return;
10006 }
10007 tcg_debug_assert(size <= 3);
10008
10009 if (!fp_access_check(s)) {
10010 return;
10011 }
10012
10013 switch (opcode) {
10014 case 0x02: /* SSRA / USRA (accumulate) */
10015 if (is_u) {
10016 /* Shift count same as element size produces zero to add. */
10017 if (shift == 8 << size) {
10018 goto done;
10019 }
10020 gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
10021 } else {
10022 /* Shift count same as element size produces all sign to add. */
10023 if (shift == 8 << size) {
10024 shift -= 1;
10025 }
10026 gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
10027 }
10028 return;
10029 case 0x08: /* SRI */
10030 /* Shift count same as element size is valid but does nothing. */
10031 if (shift == 8 << size) {
10032 goto done;
10033 }
10034 gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
10035 return;
10036
10037 case 0x00: /* SSHR / USHR */
10038 if (is_u) {
10039 if (shift == 8 << size) {
10040 /* Shift count the same size as element size produces zero. */
10041 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
10042 is_q ? 16 : 8, vec_full_reg_size(s), 0);
10043 } else {
10044 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
10045 }
10046 } else {
10047 /* Shift count the same size as element size produces all sign. */
10048 if (shift == 8 << size) {
10049 shift -= 1;
10050 }
10051 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
10052 }
10053 return;
10054
10055 case 0x04: /* SRSHR / URSHR (rounding) */
10056 break;
10057 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10058 accumulate = true;
10059 break;
10060 default:
10061 g_assert_not_reached();
10062 }
10063
10064 round_const = 1ULL << (shift - 1);
10065 tcg_round = tcg_const_i64(round_const);
10066
10067 for (i = 0; i < elements; i++) {
10068 read_vec_element(s, tcg_rn, rn, i, memop);
10069 if (accumulate) {
10070 read_vec_element(s, tcg_rd, rd, i, memop);
10071 }
10072
10073 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10074 accumulate, is_u, size, shift);
10075
10076 write_vec_element(s, tcg_rd, rd, i, size);
10077 }
10078 tcg_temp_free_i64(tcg_round);
10079
10080 done:
10081 clear_vec_high(s, is_q, rd);
10082}
10083
10084/* SHL/SLI - Vector shift left */
10085static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10086 int immh, int immb, int opcode, int rn, int rd)
10087{
10088 int size = 32 - clz32(immh) - 1;
10089 int immhb = immh << 3 | immb;
10090 int shift = immhb - (8 << size);
10091
10092 /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10093 assert(size >= 0 && size <= 3);
10094
10095 if (extract32(immh, 3, 1) && !is_q) {
10096 unallocated_encoding(s);
10097 return;
10098 }
10099
10100 if (!fp_access_check(s)) {
10101 return;
10102 }
10103
10104 if (insert) {
10105 gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
10106 } else {
10107 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10108 }
10109}
10110
10111/* USHLL/SHLL - Vector shift left with widening */
10112static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10113 int immh, int immb, int opcode, int rn, int rd)
10114{
10115 int size = 32 - clz32(immh) - 1;
10116 int immhb = immh << 3 | immb;
10117 int shift = immhb - (8 << size);
10118 int dsize = 64;
10119 int esize = 8 << size;
10120 int elements = dsize/esize;
10121 TCGv_i64 tcg_rn = new_tmp_a64(s);
10122 TCGv_i64 tcg_rd = new_tmp_a64(s);
10123 int i;
10124
10125 if (size >= 3) {
10126 unallocated_encoding(s);
10127 return;
10128 }
10129
10130 if (!fp_access_check(s)) {
10131 return;
10132 }
10133
10134 /* For the LL variants the store is larger than the load,
10135 * so if rd == rn we would overwrite parts of our input.
10136 * So load everything right now and use shifts in the main loop.
10137 */
10138 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10139
10140 for (i = 0; i < elements; i++) {
10141 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10142 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10143 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10144 write_vec_element(s, tcg_rd, rd, i, size + 1);
10145 }
10146}
10147
10148/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10149static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10150 int immh, int immb, int opcode, int rn, int rd)
10151{
10152 int immhb = immh << 3 | immb;
10153 int size = 32 - clz32(immh) - 1;
10154 int dsize = 64;
10155 int esize = 8 << size;
10156 int elements = dsize/esize;
10157 int shift = (2 * esize) - immhb;
10158 bool round = extract32(opcode, 0, 1);
10159 TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10160 TCGv_i64 tcg_round;
10161 int i;
10162
10163 if (extract32(immh, 3, 1)) {
10164 unallocated_encoding(s);
10165 return;
10166 }
10167
10168 if (!fp_access_check(s)) {
10169 return;
10170 }
10171
10172 tcg_rn = tcg_temp_new_i64();
10173 tcg_rd = tcg_temp_new_i64();
10174 tcg_final = tcg_temp_new_i64();
10175 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10176
10177 if (round) {
10178 uint64_t round_const = 1ULL << (shift - 1);
10179 tcg_round = tcg_const_i64(round_const);
10180 } else {
10181 tcg_round = NULL;
10182 }
10183
10184 for (i = 0; i < elements; i++) {
10185 read_vec_element(s, tcg_rn, rn, i, size+1);
10186 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10187 false, true, size+1, shift);
10188
10189 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10190 }
10191
10192 if (!is_q) {
10193 write_vec_element(s, tcg_final, rd, 0, MO_64);
10194 } else {
10195 write_vec_element(s, tcg_final, rd, 1, MO_64);
10196 }
10197 if (round) {
10198 tcg_temp_free_i64(tcg_round);
10199 }
10200 tcg_temp_free_i64(tcg_rn);
10201 tcg_temp_free_i64(tcg_rd);
10202 tcg_temp_free_i64(tcg_final);
10203
10204 clear_vec_high(s, is_q, rd);
10205}
10206
10207
10208/* AdvSIMD shift by immediate
10209 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
10210 * +---+---+---+-------------+------+------+--------+---+------+------+
10211 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
10212 * +---+---+---+-------------+------+------+--------+---+------+------+
10213 */
10214static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10215{
10216 int rd = extract32(insn, 0, 5);
10217 int rn = extract32(insn, 5, 5);
10218 int opcode = extract32(insn, 11, 5);
10219 int immb = extract32(insn, 16, 3);
10220 int immh = extract32(insn, 19, 4);
10221 bool is_u = extract32(insn, 29, 1);
10222 bool is_q = extract32(insn, 30, 1);
10223
10224 switch (opcode) {
10225 case 0x08: /* SRI */
10226 if (!is_u) {
10227 unallocated_encoding(s);
10228 return;
10229 }
10230 /* fall through */
10231 case 0x00: /* SSHR / USHR */
10232 case 0x02: /* SSRA / USRA (accumulate) */
10233 case 0x04: /* SRSHR / URSHR (rounding) */
10234 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10235 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10236 break;
10237 case 0x0a: /* SHL / SLI */
10238 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10239 break;
10240 case 0x10: /* SHRN */
10241 case 0x11: /* RSHRN / SQRSHRUN */
10242 if (is_u) {
10243 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10244 opcode, rn, rd);
10245 } else {
10246 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10247 }
10248 break;
10249 case 0x12: /* SQSHRN / UQSHRN */
10250 case 0x13: /* SQRSHRN / UQRSHRN */
10251 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10252 opcode, rn, rd);
10253 break;
10254 case 0x14: /* SSHLL / USHLL */
10255 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10256 break;
10257 case 0x1c: /* SCVTF / UCVTF */
10258 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10259 opcode, rn, rd);
10260 break;
10261 case 0xc: /* SQSHLU */
10262 if (!is_u) {
10263 unallocated_encoding(s);
10264 return;
10265 }
10266 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10267 break;
10268 case 0xe: /* SQSHL, UQSHL */
10269 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10270 break;
10271 case 0x1f: /* FCVTZS/ FCVTZU */
10272 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10273 return;
10274 default:
10275 unallocated_encoding(s);
10276 return;
10277 }
10278}
10279
10280/* Generate code to do a "long" addition or subtraction, ie one done in
10281 * TCGv_i64 on vector lanes twice the width specified by size.
10282 */
10283static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10284 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10285{
10286 static NeonGenTwo64OpFn * const fns[3][2] = {
10287 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10288 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10289 { tcg_gen_add_i64, tcg_gen_sub_i64 },
10290 };
10291 NeonGenTwo64OpFn *genfn;
10292 assert(size < 3);
10293
10294 genfn = fns[size][is_sub];
10295 genfn(tcg_res, tcg_op1, tcg_op2);
10296}
10297
10298static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10299 int opcode, int rd, int rn, int rm)
10300{
10301 /* 3-reg-different widening insns: 64 x 64 -> 128 */
10302 TCGv_i64 tcg_res[2];
10303 int pass, accop;
10304
10305 tcg_res[0] = tcg_temp_new_i64();
10306 tcg_res[1] = tcg_temp_new_i64();
10307
10308 /* Does this op do an adding accumulate, a subtracting accumulate,
10309 * or no accumulate at all?
10310 */
10311 switch (opcode) {
10312 case 5:
10313 case 8:
10314 case 9:
10315 accop = 1;
10316 break;
10317 case 10:
10318 case 11:
10319 accop = -1;
10320 break;
10321 default:
10322 accop = 0;
10323 break;
10324 }
10325
10326 if (accop != 0) {
10327 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10328 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10329 }
10330
10331 /* size == 2 means two 32x32->64 operations; this is worth special
10332 * casing because we can generally handle it inline.
10333 */
10334 if (size == 2) {
10335 for (pass = 0; pass < 2; pass++) {
10336 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10337 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10338 TCGv_i64 tcg_passres;
10339 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10340
10341 int elt = pass + is_q * 2;
10342
10343 read_vec_element(s, tcg_op1, rn, elt, memop);
10344 read_vec_element(s, tcg_op2, rm, elt, memop);
10345
10346 if (accop == 0) {
10347 tcg_passres = tcg_res[pass];
10348 } else {
10349 tcg_passres = tcg_temp_new_i64();
10350 }
10351
10352 switch (opcode) {
10353 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10354 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10355 break;
10356 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10357 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10358 break;
10359 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10360 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10361 {
10362 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10363 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10364
10365 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10366 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10367 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10368 tcg_passres,
10369 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10370 tcg_temp_free_i64(tcg_tmp1);
10371 tcg_temp_free_i64(tcg_tmp2);
10372 break;
10373 }
10374 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10375 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10376 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10377 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10378 break;
10379 case 9: /* SQDMLAL, SQDMLAL2 */
10380 case 11: /* SQDMLSL, SQDMLSL2 */
10381 case 13: /* SQDMULL, SQDMULL2 */
10382 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10383 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10384 tcg_passres, tcg_passres);
10385 break;
10386 default:
10387 g_assert_not_reached();
10388 }
10389
10390 if (opcode == 9 || opcode == 11) {
10391 /* saturating accumulate ops */
10392 if (accop < 0) {
10393 tcg_gen_neg_i64(tcg_passres, tcg_passres);
10394 }
10395 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10396 tcg_res[pass], tcg_passres);
10397 } else if (accop > 0) {
10398 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10399 } else if (accop < 0) {
10400 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10401 }
10402
10403 if (accop != 0) {
10404 tcg_temp_free_i64(tcg_passres);
10405 }
10406
10407 tcg_temp_free_i64(tcg_op1);
10408 tcg_temp_free_i64(tcg_op2);
10409 }
10410 } else {
10411 /* size 0 or 1, generally helper functions */
10412 for (pass = 0; pass < 2; pass++) {
10413 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10414 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10415 TCGv_i64 tcg_passres;
10416 int elt = pass + is_q * 2;
10417
10418 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10419 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10420
10421 if (accop == 0) {
10422 tcg_passres = tcg_res[pass];
10423 } else {
10424 tcg_passres = tcg_temp_new_i64();
10425 }
10426
10427 switch (opcode) {
10428 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10429 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10430 {
10431 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10432 static NeonGenWidenFn * const widenfns[2][2] = {
10433 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10434 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10435 };
10436 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10437
10438 widenfn(tcg_op2_64, tcg_op2);
10439 widenfn(tcg_passres, tcg_op1);
10440 gen_neon_addl(size, (opcode == 2), tcg_passres,
10441 tcg_passres, tcg_op2_64);
10442 tcg_temp_free_i64(tcg_op2_64);
10443 break;
10444 }
10445 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10446 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10447 if (size == 0) {
10448 if (is_u) {
10449 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10450 } else {
10451 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10452 }
10453 } else {
10454 if (is_u) {
10455 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10456 } else {
10457 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10458 }
10459 }
10460 break;
10461 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10462 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10463 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10464 if (size == 0) {
10465 if (is_u) {
10466 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10467 } else {
10468 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10469 }
10470 } else {
10471 if (is_u) {
10472 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10473 } else {
10474 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10475 }
10476 }
10477 break;
10478 case 9: /* SQDMLAL, SQDMLAL2 */
10479 case 11: /* SQDMLSL, SQDMLSL2 */
10480 case 13: /* SQDMULL, SQDMULL2 */
10481 assert(size == 1);
10482 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10483 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10484 tcg_passres, tcg_passres);
10485 break;
10486 case 14: /* PMULL */
10487 assert(size == 0);
10488 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10489 break;
10490 default:
10491 g_assert_not_reached();
10492 }
10493 tcg_temp_free_i32(tcg_op1);
10494 tcg_temp_free_i32(tcg_op2);
10495
10496 if (accop != 0) {
10497 if (opcode == 9 || opcode == 11) {
10498 /* saturating accumulate ops */
10499 if (accop < 0) {
10500 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10501 }
10502 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10503 tcg_res[pass],
10504 tcg_passres);
10505 } else {
10506 gen_neon_addl(size, (accop < 0), tcg_res[pass],
10507 tcg_res[pass], tcg_passres);
10508 }
10509 tcg_temp_free_i64(tcg_passres);
10510 }
10511 }
10512 }
10513
10514 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10515 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10516 tcg_temp_free_i64(tcg_res[0]);
10517 tcg_temp_free_i64(tcg_res[1]);
10518}
10519
10520static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10521 int opcode, int rd, int rn, int rm)
10522{
10523 TCGv_i64 tcg_res[2];
10524 int part = is_q ? 2 : 0;
10525 int pass;
10526
10527 for (pass = 0; pass < 2; pass++) {
10528 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10529 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10530 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10531 static NeonGenWidenFn * const widenfns[3][2] = {
10532 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10533 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10534 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10535 };
10536 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10537
10538 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10539 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10540 widenfn(tcg_op2_wide, tcg_op2);
10541 tcg_temp_free_i32(tcg_op2);
10542 tcg_res[pass] = tcg_temp_new_i64();
10543 gen_neon_addl(size, (opcode == 3),
10544 tcg_res[pass], tcg_op1, tcg_op2_wide);
10545 tcg_temp_free_i64(tcg_op1);
10546 tcg_temp_free_i64(tcg_op2_wide);
10547 }
10548
10549 for (pass = 0; pass < 2; pass++) {
10550 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10551 tcg_temp_free_i64(tcg_res[pass]);
10552 }
10553}
10554
10555static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10556{
10557 tcg_gen_addi_i64(in, in, 1U << 31);
10558 tcg_gen_extrh_i64_i32(res, in);
10559}
10560
10561static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10562 int opcode, int rd, int rn, int rm)
10563{
10564 TCGv_i32 tcg_res[2];
10565 int part = is_q ? 2 : 0;
10566 int pass;
10567
10568 for (pass = 0; pass < 2; pass++) {
10569 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10570 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10571 TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10572 static NeonGenNarrowFn * const narrowfns[3][2] = {
10573 { gen_helper_neon_narrow_high_u8,
10574 gen_helper_neon_narrow_round_high_u8 },
10575 { gen_helper_neon_narrow_high_u16,
10576 gen_helper_neon_narrow_round_high_u16 },
10577 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10578 };
10579 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10580
10581 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10582 read_vec_element(s, tcg_op2, rm, pass, MO_64);
10583
10584 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10585
10586 tcg_temp_free_i64(tcg_op1);
10587 tcg_temp_free_i64(tcg_op2);
10588
10589 tcg_res[pass] = tcg_temp_new_i32();
10590 gennarrow(tcg_res[pass], tcg_wideres);
10591 tcg_temp_free_i64(tcg_wideres);
10592 }
10593
10594 for (pass = 0; pass < 2; pass++) {
10595 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10596 tcg_temp_free_i32(tcg_res[pass]);
10597 }
10598 clear_vec_high(s, is_q, rd);
10599}
10600
10601static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10602{
10603 /* PMULL of 64 x 64 -> 128 is an odd special case because it
10604 * is the only three-reg-diff instruction which produces a
10605 * 128-bit wide result from a single operation. However since
10606 * it's possible to calculate the two halves more or less
10607 * separately we just use two helper calls.
10608 */
10609 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10610 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10611 TCGv_i64 tcg_res = tcg_temp_new_i64();
10612
10613 read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10614 read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10615 gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10616 write_vec_element(s, tcg_res, rd, 0, MO_64);
10617 gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10618 write_vec_element(s, tcg_res, rd, 1, MO_64);
10619
10620 tcg_temp_free_i64(tcg_op1);
10621 tcg_temp_free_i64(tcg_op2);
10622 tcg_temp_free_i64(tcg_res);
10623}
10624
10625/* AdvSIMD three different
10626 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
10627 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10628 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
10629 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10630 */
10631static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10632{
10633 /* Instructions in this group fall into three basic classes
10634 * (in each case with the operation working on each element in
10635 * the input vectors):
10636 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10637 * 128 bit input)
10638 * (2) wide 64 x 128 -> 128
10639 * (3) narrowing 128 x 128 -> 64
10640 * Here we do initial decode, catch unallocated cases and
10641 * dispatch to separate functions for each class.
10642 */
10643 int is_q = extract32(insn, 30, 1);
10644 int is_u = extract32(insn, 29, 1);
10645 int size = extract32(insn, 22, 2);
10646 int opcode = extract32(insn, 12, 4);
10647 int rm = extract32(insn, 16, 5);
10648 int rn = extract32(insn, 5, 5);
10649 int rd = extract32(insn, 0, 5);
10650
10651 switch (opcode) {
10652 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10653 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10654 /* 64 x 128 -> 128 */
10655 if (size == 3) {
10656 unallocated_encoding(s);
10657 return;
10658 }
10659 if (!fp_access_check(s)) {
10660 return;
10661 }
10662 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10663 break;
10664 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10665 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10666 /* 128 x 128 -> 64 */
10667 if (size == 3) {
10668 unallocated_encoding(s);
10669 return;
10670 }
10671 if (!fp_access_check(s)) {
10672 return;
10673 }
10674 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10675 break;
10676 case 14: /* PMULL, PMULL2 */
10677 if (is_u || size == 1 || size == 2) {
10678 unallocated_encoding(s);
10679 return;
10680 }
10681 if (size == 3) {
10682 if (!dc_isar_feature(aa64_pmull, s)) {
10683 unallocated_encoding(s);
10684 return;
10685 }
10686 if (!fp_access_check(s)) {
10687 return;
10688 }
10689 handle_pmull_64(s, is_q, rd, rn, rm);
10690 return;
10691 }
10692 goto is_widening;
10693 case 9: /* SQDMLAL, SQDMLAL2 */
10694 case 11: /* SQDMLSL, SQDMLSL2 */
10695 case 13: /* SQDMULL, SQDMULL2 */
10696 if (is_u || size == 0) {
10697 unallocated_encoding(s);
10698 return;
10699 }
10700 /* fall through */
10701 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10702 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10703 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10704 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10705 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10706 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10707 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10708 /* 64 x 64 -> 128 */
10709 if (size == 3) {
10710 unallocated_encoding(s);
10711 return;
10712 }
10713 is_widening:
10714 if (!fp_access_check(s)) {
10715 return;
10716 }
10717
10718 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10719 break;
10720 default:
10721 /* opcode 15 not allocated */
10722 unallocated_encoding(s);
10723 break;
10724 }
10725}
10726
10727/* Logic op (opcode == 3) subgroup of C3.6.16. */
10728static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10729{
10730 int rd = extract32(insn, 0, 5);
10731 int rn = extract32(insn, 5, 5);
10732 int rm = extract32(insn, 16, 5);
10733 int size = extract32(insn, 22, 2);
10734 bool is_u = extract32(insn, 29, 1);
10735 bool is_q = extract32(insn, 30, 1);
10736
10737 if (!fp_access_check(s)) {
10738 return;
10739 }
10740
10741 switch (size + 4 * is_u) {
10742 case 0: /* AND */
10743 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10744 return;
10745 case 1: /* BIC */
10746 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10747 return;
10748 case 2: /* ORR */
10749 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10750 return;
10751 case 3: /* ORN */
10752 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10753 return;
10754 case 4: /* EOR */
10755 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10756 return;
10757
10758 case 5: /* BSL bitwise select */
10759 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10760 return;
10761 case 6: /* BIT, bitwise insert if true */
10762 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10763 return;
10764 case 7: /* BIF, bitwise insert if false */
10765 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10766 return;
10767
10768 default:
10769 g_assert_not_reached();
10770 }
10771}
10772
10773/* Pairwise op subgroup of C3.6.16.
10774 *
10775 * This is called directly or via the handle_3same_float for float pairwise
10776 * operations where the opcode and size are calculated differently.
10777 */
10778static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10779 int size, int rn, int rm, int rd)
10780{
10781 TCGv_ptr fpst;
10782 int pass;
10783
10784 /* Floating point operations need fpst */
10785 if (opcode >= 0x58) {
10786 fpst = get_fpstatus_ptr(false);
10787 } else {
10788 fpst = NULL;
10789 }
10790
10791 if (!fp_access_check(s)) {
10792 return;
10793 }
10794
10795 /* These operations work on the concatenated rm:rn, with each pair of
10796 * adjacent elements being operated on to produce an element in the result.
10797 */
10798 if (size == 3) {
10799 TCGv_i64 tcg_res[2];
10800
10801 for (pass = 0; pass < 2; pass++) {
10802 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10803 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10804 int passreg = (pass == 0) ? rn : rm;
10805
10806 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10807 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10808 tcg_res[pass] = tcg_temp_new_i64();
10809
10810 switch (opcode) {
10811 case 0x17: /* ADDP */
10812 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10813 break;
10814 case 0x58: /* FMAXNMP */
10815 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10816 break;
10817 case 0x5a: /* FADDP */
10818 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10819 break;
10820 case 0x5e: /* FMAXP */
10821 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10822 break;
10823 case 0x78: /* FMINNMP */
10824 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10825 break;
10826 case 0x7e: /* FMINP */
10827 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10828 break;
10829 default:
10830 g_assert_not_reached();
10831 }
10832
10833 tcg_temp_free_i64(tcg_op1);
10834 tcg_temp_free_i64(tcg_op2);
10835 }
10836
10837 for (pass = 0; pass < 2; pass++) {
10838 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10839 tcg_temp_free_i64(tcg_res[pass]);
10840 }
10841 } else {
10842 int maxpass = is_q ? 4 : 2;
10843 TCGv_i32 tcg_res[4];
10844
10845 for (pass = 0; pass < maxpass; pass++) {
10846 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10847 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10848 NeonGenTwoOpFn *genfn = NULL;
10849 int passreg = pass < (maxpass / 2) ? rn : rm;
10850 int passelt = (is_q && (pass & 1)) ? 2 : 0;
10851
10852 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10853 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10854 tcg_res[pass] = tcg_temp_new_i32();
10855
10856 switch (opcode) {
10857 case 0x17: /* ADDP */
10858 {
10859 static NeonGenTwoOpFn * const fns[3] = {
10860 gen_helper_neon_padd_u8,
10861 gen_helper_neon_padd_u16,
10862 tcg_gen_add_i32,
10863 };
10864 genfn = fns[size];
10865 break;
10866 }
10867 case 0x14: /* SMAXP, UMAXP */
10868 {
10869 static NeonGenTwoOpFn * const fns[3][2] = {
10870 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10871 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10872 { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10873 };
10874 genfn = fns[size][u];
10875 break;
10876 }
10877 case 0x15: /* SMINP, UMINP */
10878 {
10879 static NeonGenTwoOpFn * const fns[3][2] = {
10880 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10881 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10882 { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10883 };
10884 genfn = fns[size][u];
10885 break;
10886 }
10887 /* The FP operations are all on single floats (32 bit) */
10888 case 0x58: /* FMAXNMP */
10889 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10890 break;
10891 case 0x5a: /* FADDP */
10892 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10893 break;
10894 case 0x5e: /* FMAXP */
10895 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10896 break;
10897 case 0x78: /* FMINNMP */
10898 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10899 break;
10900 case 0x7e: /* FMINP */
10901 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10902 break;
10903 default:
10904 g_assert_not_reached();
10905 }
10906
10907 /* FP ops called directly, otherwise call now */
10908 if (genfn) {
10909 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10910 }
10911
10912 tcg_temp_free_i32(tcg_op1);
10913 tcg_temp_free_i32(tcg_op2);
10914 }
10915
10916 for (pass = 0; pass < maxpass; pass++) {
10917 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10918 tcg_temp_free_i32(tcg_res[pass]);
10919 }
10920 clear_vec_high(s, is_q, rd);
10921 }
10922
10923 if (fpst) {
10924 tcg_temp_free_ptr(fpst);
10925 }
10926}
10927
10928/* Floating point op subgroup of C3.6.16. */
10929static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10930{
10931 /* For floating point ops, the U, size[1] and opcode bits
10932 * together indicate the operation. size[0] indicates single
10933 * or double.
10934 */
10935 int fpopcode = extract32(insn, 11, 5)
10936 | (extract32(insn, 23, 1) << 5)
10937 | (extract32(insn, 29, 1) << 6);
10938 int is_q = extract32(insn, 30, 1);
10939 int size = extract32(insn, 22, 1);
10940 int rm = extract32(insn, 16, 5);
10941 int rn = extract32(insn, 5, 5);
10942 int rd = extract32(insn, 0, 5);
10943
10944 int datasize = is_q ? 128 : 64;
10945 int esize = 32 << size;
10946 int elements = datasize / esize;
10947
10948 if (size == 1 && !is_q) {
10949 unallocated_encoding(s);
10950 return;
10951 }
10952
10953 switch (fpopcode) {
10954 case 0x58: /* FMAXNMP */
10955 case 0x5a: /* FADDP */
10956 case 0x5e: /* FMAXP */
10957 case 0x78: /* FMINNMP */
10958 case 0x7e: /* FMINP */
10959 if (size && !is_q) {
10960 unallocated_encoding(s);
10961 return;
10962 }
10963 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10964 rn, rm, rd);
10965 return;
10966 case 0x1b: /* FMULX */
10967 case 0x1f: /* FRECPS */
10968 case 0x3f: /* FRSQRTS */
10969 case 0x5d: /* FACGE */
10970 case 0x7d: /* FACGT */
10971 case 0x19: /* FMLA */
10972 case 0x39: /* FMLS */
10973 case 0x18: /* FMAXNM */
10974 case 0x1a: /* FADD */
10975 case 0x1c: /* FCMEQ */
10976 case 0x1e: /* FMAX */
10977 case 0x38: /* FMINNM */
10978 case 0x3a: /* FSUB */
10979 case 0x3e: /* FMIN */
10980 case 0x5b: /* FMUL */
10981 case 0x5c: /* FCMGE */
10982 case 0x5f: /* FDIV */
10983 case 0x7a: /* FABD */
10984 case 0x7c: /* FCMGT */
10985 if (!fp_access_check(s)) {
10986 return;
10987 }
10988 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10989 return;
10990
10991 case 0x1d: /* FMLAL */
10992 case 0x3d: /* FMLSL */
10993 case 0x59: /* FMLAL2 */
10994 case 0x79: /* FMLSL2 */
10995 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
10996 unallocated_encoding(s);
10997 return;
10998 }
10999 if (fp_access_check(s)) {
11000 int is_s = extract32(insn, 23, 1);
11001 int is_2 = extract32(insn, 29, 1);
11002 int data = (is_2 << 1) | is_s;
11003 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11004 vec_full_reg_offset(s, rn),
11005 vec_full_reg_offset(s, rm), cpu_env,
11006 is_q ? 16 : 8, vec_full_reg_size(s),
11007 data, gen_helper_gvec_fmlal_a64);
11008 }
11009 return;
11010
11011 default:
11012 unallocated_encoding(s);
11013 return;
11014 }
11015}
11016
11017/* Integer op subgroup of C3.6.16. */
11018static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11019{
11020 int is_q = extract32(insn, 30, 1);
11021 int u = extract32(insn, 29, 1);
11022 int size = extract32(insn, 22, 2);
11023 int opcode = extract32(insn, 11, 5);
11024 int rm = extract32(insn, 16, 5);
11025 int rn = extract32(insn, 5, 5);
11026 int rd = extract32(insn, 0, 5);
11027 int pass;
11028 TCGCond cond;
11029
11030 switch (opcode) {
11031 case 0x13: /* MUL, PMUL */
11032 if (u && size != 0) {
11033 unallocated_encoding(s);
11034 return;
11035 }
11036 /* fall through */
11037 case 0x0: /* SHADD, UHADD */
11038 case 0x2: /* SRHADD, URHADD */
11039 case 0x4: /* SHSUB, UHSUB */
11040 case 0xc: /* SMAX, UMAX */
11041 case 0xd: /* SMIN, UMIN */
11042 case 0xe: /* SABD, UABD */
11043 case 0xf: /* SABA, UABA */
11044 case 0x12: /* MLA, MLS */
11045 if (size == 3) {
11046 unallocated_encoding(s);
11047 return;
11048 }
11049 break;
11050 case 0x16: /* SQDMULH, SQRDMULH */
11051 if (size == 0 || size == 3) {
11052 unallocated_encoding(s);
11053 return;
11054 }
11055 break;
11056 default:
11057 if (size == 3 && !is_q) {
11058 unallocated_encoding(s);
11059 return;
11060 }
11061 break;
11062 }
11063
11064 if (!fp_access_check(s)) {
11065 return;
11066 }
11067
11068 switch (opcode) {
11069 case 0x01: /* SQADD, UQADD */
11070 tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11071 offsetof(CPUARMState, vfp.qc),
11072 vec_full_reg_offset(s, rn),
11073 vec_full_reg_offset(s, rm),
11074 is_q ? 16 : 8, vec_full_reg_size(s),
11075 (u ? uqadd_op : sqadd_op) + size);
11076 return;
11077 case 0x05: /* SQSUB, UQSUB */
11078 tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11079 offsetof(CPUARMState, vfp.qc),
11080 vec_full_reg_offset(s, rn),
11081 vec_full_reg_offset(s, rm),
11082 is_q ? 16 : 8, vec_full_reg_size(s),
11083 (u ? uqsub_op : sqsub_op) + size);
11084 return;
11085 case 0x0c: /* SMAX, UMAX */
11086 if (u) {
11087 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11088 } else {
11089 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11090 }
11091 return;
11092 case 0x0d: /* SMIN, UMIN */
11093 if (u) {
11094 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11095 } else {
11096 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11097 }
11098 return;
11099 case 0x10: /* ADD, SUB */
11100 if (u) {
11101 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11102 } else {
11103 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11104 }
11105 return;
11106 case 0x13: /* MUL, PMUL */
11107 if (!u) { /* MUL */
11108 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11109 return;
11110 }
11111 break;
11112 case 0x12: /* MLA, MLS */
11113 if (u) {
11114 gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
11115 } else {
11116 gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
11117 }
11118 return;
11119 case 0x11:
11120 if (!u) { /* CMTST */
11121 gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
11122 return;
11123 }
11124 /* else CMEQ */
11125 cond = TCG_COND_EQ;
11126 goto do_gvec_cmp;
11127 case 0x06: /* CMGT, CMHI */
11128 cond = u ? TCG_COND_GTU : TCG_COND_GT;
11129 goto do_gvec_cmp;
11130 case 0x07: /* CMGE, CMHS */
11131 cond = u ? TCG_COND_GEU : TCG_COND_GE;
11132 do_gvec_cmp:
11133 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11134 vec_full_reg_offset(s, rn),
11135 vec_full_reg_offset(s, rm),
11136 is_q ? 16 : 8, vec_full_reg_size(s));
11137 return;
11138 }
11139
11140 if (size == 3) {
11141 assert(is_q);
11142 for (pass = 0; pass < 2; pass++) {
11143 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11144 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11145 TCGv_i64 tcg_res = tcg_temp_new_i64();
11146
11147 read_vec_element(s, tcg_op1, rn, pass, MO_64);
11148 read_vec_element(s, tcg_op2, rm, pass, MO_64);
11149
11150 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11151
11152 write_vec_element(s, tcg_res, rd, pass, MO_64);
11153
11154 tcg_temp_free_i64(tcg_res);
11155 tcg_temp_free_i64(tcg_op1);
11156 tcg_temp_free_i64(tcg_op2);
11157 }
11158 } else {
11159 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11160 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11161 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11162 TCGv_i32 tcg_res = tcg_temp_new_i32();
11163 NeonGenTwoOpFn *genfn = NULL;
11164 NeonGenTwoOpEnvFn *genenvfn = NULL;
11165
11166 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11167 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11168
11169 switch (opcode) {
11170 case 0x0: /* SHADD, UHADD */
11171 {
11172 static NeonGenTwoOpFn * const fns[3][2] = {
11173 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11174 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11175 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11176 };
11177 genfn = fns[size][u];
11178 break;
11179 }
11180 case 0x2: /* SRHADD, URHADD */
11181 {
11182 static NeonGenTwoOpFn * const fns[3][2] = {
11183 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11184 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11185 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11186 };
11187 genfn = fns[size][u];
11188 break;
11189 }
11190 case 0x4: /* SHSUB, UHSUB */
11191 {
11192 static NeonGenTwoOpFn * const fns[3][2] = {
11193 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11194 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11195 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11196 };
11197 genfn = fns[size][u];
11198 break;
11199 }
11200 case 0x8: /* SSHL, USHL */
11201 {
11202 static NeonGenTwoOpFn * const fns[3][2] = {
11203 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
11204 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
11205 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
11206 };
11207 genfn = fns[size][u];
11208 break;
11209 }
11210 case 0x9: /* SQSHL, UQSHL */
11211 {
11212 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11213 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11214 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11215 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11216 };
11217 genenvfn = fns[size][u];
11218 break;
11219 }
11220 case 0xa: /* SRSHL, URSHL */
11221 {
11222 static NeonGenTwoOpFn * const fns[3][2] = {
11223 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11224 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11225 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11226 };
11227 genfn = fns[size][u];
11228 break;
11229 }
11230 case 0xb: /* SQRSHL, UQRSHL */
11231 {
11232 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11233 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11234 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11235 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11236 };
11237 genenvfn = fns[size][u];
11238 break;
11239 }
11240 case 0xe: /* SABD, UABD */
11241 case 0xf: /* SABA, UABA */
11242 {
11243 static NeonGenTwoOpFn * const fns[3][2] = {
11244 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11245 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11246 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11247 };
11248 genfn = fns[size][u];
11249 break;
11250 }
11251 case 0x13: /* MUL, PMUL */
11252 assert(u); /* PMUL */
11253 assert(size == 0);
11254 genfn = gen_helper_neon_mul_p8;
11255 break;
11256 case 0x16: /* SQDMULH, SQRDMULH */
11257 {
11258 static NeonGenTwoOpEnvFn * const fns[2][2] = {
11259 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11260 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11261 };
11262 assert(size == 1 || size == 2);
11263 genenvfn = fns[size - 1][u];
11264 break;
11265 }
11266 default:
11267 g_assert_not_reached();
11268 }
11269
11270 if (genenvfn) {
11271 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11272 } else {
11273 genfn(tcg_res, tcg_op1, tcg_op2);
11274 }
11275
11276 if (opcode == 0xf) {
11277 /* SABA, UABA: accumulating ops */
11278 static NeonGenTwoOpFn * const fns[3] = {
11279 gen_helper_neon_add_u8,
11280 gen_helper_neon_add_u16,
11281 tcg_gen_add_i32,
11282 };
11283
11284 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11285 fns[size](tcg_res, tcg_op1, tcg_res);
11286 }
11287
11288 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11289
11290 tcg_temp_free_i32(tcg_res);
11291 tcg_temp_free_i32(tcg_op1);
11292 tcg_temp_free_i32(tcg_op2);
11293 }
11294 }
11295 clear_vec_high(s, is_q, rd);
11296}
11297
11298/* AdvSIMD three same
11299 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
11300 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11301 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
11302 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11303 */
11304static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11305{
11306 int opcode = extract32(insn, 11, 5);
11307
11308 switch (opcode) {
11309 case 0x3: /* logic ops */
11310 disas_simd_3same_logic(s, insn);
11311 break;
11312 case 0x17: /* ADDP */
11313 case 0x14: /* SMAXP, UMAXP */
11314 case 0x15: /* SMINP, UMINP */
11315 {
11316 /* Pairwise operations */
11317 int is_q = extract32(insn, 30, 1);
11318 int u = extract32(insn, 29, 1);
11319 int size = extract32(insn, 22, 2);
11320 int rm = extract32(insn, 16, 5);
11321 int rn = extract32(insn, 5, 5);
11322 int rd = extract32(insn, 0, 5);
11323 if (opcode == 0x17) {
11324 if (u || (size == 3 && !is_q)) {
11325 unallocated_encoding(s);
11326 return;
11327 }
11328 } else {
11329 if (size == 3) {
11330 unallocated_encoding(s);
11331 return;
11332 }
11333 }
11334 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11335 break;
11336 }
11337 case 0x18 ... 0x31:
11338 /* floating point ops, sz[1] and U are part of opcode */
11339 disas_simd_3same_float(s, insn);
11340 break;
11341 default:
11342 disas_simd_3same_int(s, insn);
11343 break;
11344 }
11345}
11346
11347/*
11348 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11349 *
11350 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
11351 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11352 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
11353 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11354 *
11355 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11356 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11357 *
11358 */
11359static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11360{
11361 int opcode, fpopcode;
11362 int is_q, u, a, rm, rn, rd;
11363 int datasize, elements;
11364 int pass;
11365 TCGv_ptr fpst;
11366 bool pairwise = false;
11367
11368 if (!dc_isar_feature(aa64_fp16, s)) {
11369 unallocated_encoding(s);
11370 return;
11371 }
11372
11373 if (!fp_access_check(s)) {
11374 return;
11375 }
11376
11377 /* For these floating point ops, the U, a and opcode bits
11378 * together indicate the operation.
11379 */
11380 opcode = extract32(insn, 11, 3);
11381 u = extract32(insn, 29, 1);
11382 a = extract32(insn, 23, 1);
11383 is_q = extract32(insn, 30, 1);
11384 rm = extract32(insn, 16, 5);
11385 rn = extract32(insn, 5, 5);
11386 rd = extract32(insn, 0, 5);
11387
11388 fpopcode = opcode | (a << 3) | (u << 4);
11389 datasize = is_q ? 128 : 64;
11390 elements = datasize / 16;
11391
11392 switch (fpopcode) {
11393 case 0x10: /* FMAXNMP */
11394 case 0x12: /* FADDP */
11395 case 0x16: /* FMAXP */
11396 case 0x18: /* FMINNMP */
11397 case 0x1e: /* FMINP */
11398 pairwise = true;
11399 break;
11400 }
11401
11402 fpst = get_fpstatus_ptr(true);
11403
11404 if (pairwise) {
11405 int maxpass = is_q ? 8 : 4;
11406 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11407 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11408 TCGv_i32 tcg_res[8];
11409
11410 for (pass = 0; pass < maxpass; pass++) {
11411 int passreg = pass < (maxpass / 2) ? rn : rm;
11412 int passelt = (pass << 1) & (maxpass - 1);
11413
11414 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11415 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11416 tcg_res[pass] = tcg_temp_new_i32();
11417
11418 switch (fpopcode) {
11419 case 0x10: /* FMAXNMP */
11420 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11421 fpst);
11422 break;
11423 case 0x12: /* FADDP */
11424 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11425 break;
11426 case 0x16: /* FMAXP */
11427 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11428 break;
11429 case 0x18: /* FMINNMP */
11430 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11431 fpst);
11432 break;
11433 case 0x1e: /* FMINP */
11434 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11435 break;
11436 default:
11437 g_assert_not_reached();
11438 }
11439 }
11440
11441 for (pass = 0; pass < maxpass; pass++) {
11442 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11443 tcg_temp_free_i32(tcg_res[pass]);
11444 }
11445
11446 tcg_temp_free_i32(tcg_op1);
11447 tcg_temp_free_i32(tcg_op2);
11448
11449 } else {
11450 for (pass = 0; pass < elements; pass++) {
11451 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11452 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11453 TCGv_i32 tcg_res = tcg_temp_new_i32();
11454
11455 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11456 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11457
11458 switch (fpopcode) {
11459 case 0x0: /* FMAXNM */
11460 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11461 break;
11462 case 0x1: /* FMLA */
11463 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11464 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11465 fpst);
11466 break;
11467 case 0x2: /* FADD */
11468 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11469 break;
11470 case 0x3: /* FMULX */
11471 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11472 break;
11473 case 0x4: /* FCMEQ */
11474 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11475 break;
11476 case 0x6: /* FMAX */
11477 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11478 break;
11479 case 0x7: /* FRECPS */
11480 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11481 break;
11482 case 0x8: /* FMINNM */
11483 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11484 break;
11485 case 0x9: /* FMLS */
11486 /* As usual for ARM, separate negation for fused multiply-add */
11487 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11488 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11489 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11490 fpst);
11491 break;
11492 case 0xa: /* FSUB */
11493 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11494 break;
11495 case 0xe: /* FMIN */
11496 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11497 break;
11498 case 0xf: /* FRSQRTS */
11499 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11500 break;
11501 case 0x13: /* FMUL */
11502 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11503 break;
11504 case 0x14: /* FCMGE */
11505 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11506 break;
11507 case 0x15: /* FACGE */
11508 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11509 break;
11510 case 0x17: /* FDIV */
11511 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11512 break;
11513 case 0x1a: /* FABD */
11514 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11515 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11516 break;
11517 case 0x1c: /* FCMGT */
11518 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11519 break;
11520 case 0x1d: /* FACGT */
11521 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11522 break;
11523 default:
11524 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11525 __func__, insn, fpopcode, s->pc_curr);
11526 g_assert_not_reached();
11527 }
11528
11529 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11530 tcg_temp_free_i32(tcg_res);
11531 tcg_temp_free_i32(tcg_op1);
11532 tcg_temp_free_i32(tcg_op2);
11533 }
11534 }
11535
11536 tcg_temp_free_ptr(fpst);
11537
11538 clear_vec_high(s, is_q, rd);
11539}
11540
11541/* AdvSIMD three same extra
11542 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
11543 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11544 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
11545 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11546 */
11547static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11548{
11549 int rd = extract32(insn, 0, 5);
11550 int rn = extract32(insn, 5, 5);
11551 int opcode = extract32(insn, 11, 4);
11552 int rm = extract32(insn, 16, 5);
11553 int size = extract32(insn, 22, 2);
11554 bool u = extract32(insn, 29, 1);
11555 bool is_q = extract32(insn, 30, 1);
11556 bool feature;
11557 int rot;
11558
11559 switch (u * 16 + opcode) {
11560 case 0x10: /* SQRDMLAH (vector) */
11561 case 0x11: /* SQRDMLSH (vector) */
11562 if (size != 1 && size != 2) {
11563 unallocated_encoding(s);
11564 return;
11565 }
11566 feature = dc_isar_feature(aa64_rdm, s);
11567 break;
11568 case 0x02: /* SDOT (vector) */
11569 case 0x12: /* UDOT (vector) */
11570 if (size != MO_32) {
11571 unallocated_encoding(s);
11572 return;
11573 }
11574 feature = dc_isar_feature(aa64_dp, s);
11575 break;
11576 case 0x18: /* FCMLA, #0 */
11577 case 0x19: /* FCMLA, #90 */
11578 case 0x1a: /* FCMLA, #180 */
11579 case 0x1b: /* FCMLA, #270 */
11580 case 0x1c: /* FCADD, #90 */
11581 case 0x1e: /* FCADD, #270 */
11582 if (size == 0
11583 || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11584 || (size == 3 && !is_q)) {
11585 unallocated_encoding(s);
11586 return;
11587 }
11588 feature = dc_isar_feature(aa64_fcma, s);
11589 break;
11590 default:
11591 unallocated_encoding(s);
11592 return;
11593 }
11594 if (!feature) {
11595 unallocated_encoding(s);
11596 return;
11597 }
11598 if (!fp_access_check(s)) {
11599 return;
11600 }
11601
11602 switch (opcode) {
11603 case 0x0: /* SQRDMLAH (vector) */
11604 switch (size) {
11605 case 1:
11606 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11607 break;
11608 case 2:
11609 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11610 break;
11611 default:
11612 g_assert_not_reached();
11613 }
11614 return;
11615
11616 case 0x1: /* SQRDMLSH (vector) */
11617 switch (size) {
11618 case 1:
11619 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11620 break;
11621 case 2:
11622 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11623 break;
11624 default:
11625 g_assert_not_reached();
11626 }
11627 return;
11628
11629 case 0x2: /* SDOT / UDOT */
11630 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11631 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11632 return;
11633
11634 case 0x8: /* FCMLA, #0 */
11635 case 0x9: /* FCMLA, #90 */
11636 case 0xa: /* FCMLA, #180 */
11637 case 0xb: /* FCMLA, #270 */
11638 rot = extract32(opcode, 0, 2);
11639 switch (size) {
11640 case 1:
11641 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11642 gen_helper_gvec_fcmlah);
11643 break;
11644 case 2:
11645 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11646 gen_helper_gvec_fcmlas);
11647 break;
11648 case 3:
11649 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11650 gen_helper_gvec_fcmlad);
11651 break;
11652 default:
11653 g_assert_not_reached();
11654 }
11655 return;
11656
11657 case 0xc: /* FCADD, #90 */
11658 case 0xe: /* FCADD, #270 */
11659 rot = extract32(opcode, 1, 1);
11660 switch (size) {
11661 case 1:
11662 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11663 gen_helper_gvec_fcaddh);
11664 break;
11665 case 2:
11666 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11667 gen_helper_gvec_fcadds);
11668 break;
11669 case 3:
11670 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11671 gen_helper_gvec_fcaddd);
11672 break;
11673 default:
11674 g_assert_not_reached();
11675 }
11676 return;
11677
11678 default:
11679 g_assert_not_reached();
11680 }
11681}
11682
11683static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11684 int size, int rn, int rd)
11685{
11686 /* Handle 2-reg-misc ops which are widening (so each size element
11687 * in the source becomes a 2*size element in the destination.
11688 * The only instruction like this is FCVTL.
11689 */
11690 int pass;
11691
11692 if (size == 3) {
11693 /* 32 -> 64 bit fp conversion */
11694 TCGv_i64 tcg_res[2];
11695 int srcelt = is_q ? 2 : 0;
11696
11697 for (pass = 0; pass < 2; pass++) {
11698 TCGv_i32 tcg_op = tcg_temp_new_i32();
11699 tcg_res[pass] = tcg_temp_new_i64();
11700
11701 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11702 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11703 tcg_temp_free_i32(tcg_op);
11704 }
11705 for (pass = 0; pass < 2; pass++) {
11706 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11707 tcg_temp_free_i64(tcg_res[pass]);
11708 }
11709 } else {
11710 /* 16 -> 32 bit fp conversion */
11711 int srcelt = is_q ? 4 : 0;
11712 TCGv_i32 tcg_res[4];
11713 TCGv_ptr fpst = get_fpstatus_ptr(false);
11714 TCGv_i32 ahp = get_ahp_flag();
11715
11716 for (pass = 0; pass < 4; pass++) {
11717 tcg_res[pass] = tcg_temp_new_i32();
11718
11719 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11720 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11721 fpst, ahp);
11722 }
11723 for (pass = 0; pass < 4; pass++) {
11724 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11725 tcg_temp_free_i32(tcg_res[pass]);
11726 }
11727
11728 tcg_temp_free_ptr(fpst);
11729 tcg_temp_free_i32(ahp);
11730 }
11731}
11732
11733static void handle_rev(DisasContext *s, int opcode, bool u,
11734 bool is_q, int size, int rn, int rd)
11735{
11736 int op = (opcode << 1) | u;
11737 int opsz = op + size;
11738 int grp_size = 3 - opsz;
11739 int dsize = is_q ? 128 : 64;
11740 int i;
11741
11742 if (opsz >= 3) {
11743 unallocated_encoding(s);
11744 return;
11745 }
11746
11747 if (!fp_access_check(s)) {
11748 return;
11749 }
11750
11751 if (size == 0) {
11752 /* Special case bytes, use bswap op on each group of elements */
11753 int groups = dsize / (8 << grp_size);
11754
11755 for (i = 0; i < groups; i++) {
11756 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11757
11758 read_vec_element(s, tcg_tmp, rn, i, grp_size);
11759 switch (grp_size) {
11760 case MO_16:
11761 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11762 break;
11763 case MO_32:
11764 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11765 break;
11766 case MO_64:
11767 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11768 break;
11769 default:
11770 g_assert_not_reached();
11771 }
11772 write_vec_element(s, tcg_tmp, rd, i, grp_size);
11773 tcg_temp_free_i64(tcg_tmp);
11774 }
11775 clear_vec_high(s, is_q, rd);
11776 } else {
11777 int revmask = (1 << grp_size) - 1;
11778 int esize = 8 << size;
11779 int elements = dsize / esize;
11780 TCGv_i64 tcg_rn = tcg_temp_new_i64();
11781 TCGv_i64 tcg_rd = tcg_const_i64(0);
11782 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11783
11784 for (i = 0; i < elements; i++) {
11785 int e_rev = (i & 0xf) ^ revmask;
11786 int off = e_rev * esize;
11787 read_vec_element(s, tcg_rn, rn, i, size);
11788 if (off >= 64) {
11789 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11790 tcg_rn, off - 64, esize);
11791 } else {
11792 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11793 }
11794 }
11795 write_vec_element(s, tcg_rd, rd, 0, MO_64);
11796 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11797
11798 tcg_temp_free_i64(tcg_rd_hi);
11799 tcg_temp_free_i64(tcg_rd);
11800 tcg_temp_free_i64(tcg_rn);
11801 }
11802}
11803
11804static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11805 bool is_q, int size, int rn, int rd)
11806{
11807 /* Implement the pairwise operations from 2-misc:
11808 * SADDLP, UADDLP, SADALP, UADALP.
11809 * These all add pairs of elements in the input to produce a
11810 * double-width result element in the output (possibly accumulating).
11811 */
11812 bool accum = (opcode == 0x6);
11813 int maxpass = is_q ? 2 : 1;
11814 int pass;
11815 TCGv_i64 tcg_res[2];
11816
11817 if (size == 2) {
11818 /* 32 + 32 -> 64 op */
11819 MemOp memop = size + (u ? 0 : MO_SIGN);
11820
11821 for (pass = 0; pass < maxpass; pass++) {
11822 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11823 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11824
11825 tcg_res[pass] = tcg_temp_new_i64();
11826
11827 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11828 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11829 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11830 if (accum) {
11831 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11832 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11833 }
11834
11835 tcg_temp_free_i64(tcg_op1);
11836 tcg_temp_free_i64(tcg_op2);
11837 }
11838 } else {
11839 for (pass = 0; pass < maxpass; pass++) {
11840 TCGv_i64 tcg_op = tcg_temp_new_i64();
11841 NeonGenOneOpFn *genfn;
11842 static NeonGenOneOpFn * const fns[2][2] = {
11843 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
11844 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
11845 };
11846
11847 genfn = fns[size][u];
11848
11849 tcg_res[pass] = tcg_temp_new_i64();
11850
11851 read_vec_element(s, tcg_op, rn, pass, MO_64);
11852 genfn(tcg_res[pass], tcg_op);
11853
11854 if (accum) {
11855 read_vec_element(s, tcg_op, rd, pass, MO_64);
11856 if (size == 0) {
11857 gen_helper_neon_addl_u16(tcg_res[pass],
11858 tcg_res[pass], tcg_op);
11859 } else {
11860 gen_helper_neon_addl_u32(tcg_res[pass],
11861 tcg_res[pass], tcg_op);
11862 }
11863 }
11864 tcg_temp_free_i64(tcg_op);
11865 }
11866 }
11867 if (!is_q) {
11868 tcg_res[1] = tcg_const_i64(0);
11869 }
11870 for (pass = 0; pass < 2; pass++) {
11871 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11872 tcg_temp_free_i64(tcg_res[pass]);
11873 }
11874}
11875
11876static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11877{
11878 /* Implement SHLL and SHLL2 */
11879 int pass;
11880 int part = is_q ? 2 : 0;
11881 TCGv_i64 tcg_res[2];
11882
11883 for (pass = 0; pass < 2; pass++) {
11884 static NeonGenWidenFn * const widenfns[3] = {
11885 gen_helper_neon_widen_u8,
11886 gen_helper_neon_widen_u16,
11887 tcg_gen_extu_i32_i64,
11888 };
11889 NeonGenWidenFn *widenfn = widenfns[size];
11890 TCGv_i32 tcg_op = tcg_temp_new_i32();
11891
11892 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11893 tcg_res[pass] = tcg_temp_new_i64();
11894 widenfn(tcg_res[pass], tcg_op);
11895 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11896
11897 tcg_temp_free_i32(tcg_op);
11898 }
11899
11900 for (pass = 0; pass < 2; pass++) {
11901 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11902 tcg_temp_free_i64(tcg_res[pass]);
11903 }
11904}
11905
11906/* AdvSIMD two reg misc
11907 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
11908 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11909 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
11910 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11911 */
11912static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11913{
11914 int size = extract32(insn, 22, 2);
11915 int opcode = extract32(insn, 12, 5);
11916 bool u = extract32(insn, 29, 1);
11917 bool is_q = extract32(insn, 30, 1);
11918 int rn = extract32(insn, 5, 5);
11919 int rd = extract32(insn, 0, 5);
11920 bool need_fpstatus = false;
11921 bool need_rmode = false;
11922 int rmode = -1;
11923 TCGv_i32 tcg_rmode;
11924 TCGv_ptr tcg_fpstatus;
11925
11926 switch (opcode) {
11927 case 0x0: /* REV64, REV32 */
11928 case 0x1: /* REV16 */
11929 handle_rev(s, opcode, u, is_q, size, rn, rd);
11930 return;
11931 case 0x5: /* CNT, NOT, RBIT */
11932 if (u && size == 0) {
11933 /* NOT */
11934 break;
11935 } else if (u && size == 1) {
11936 /* RBIT */
11937 break;
11938 } else if (!u && size == 0) {
11939 /* CNT */
11940 break;
11941 }
11942 unallocated_encoding(s);
11943 return;
11944 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11945 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11946 if (size == 3) {
11947 unallocated_encoding(s);
11948 return;
11949 }
11950 if (!fp_access_check(s)) {
11951 return;
11952 }
11953
11954 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11955 return;
11956 case 0x4: /* CLS, CLZ */
11957 if (size == 3) {
11958 unallocated_encoding(s);
11959 return;
11960 }
11961 break;
11962 case 0x2: /* SADDLP, UADDLP */
11963 case 0x6: /* SADALP, UADALP */
11964 if (size == 3) {
11965 unallocated_encoding(s);
11966 return;
11967 }
11968 if (!fp_access_check(s)) {
11969 return;
11970 }
11971 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11972 return;
11973 case 0x13: /* SHLL, SHLL2 */
11974 if (u == 0 || size == 3) {
11975 unallocated_encoding(s);
11976 return;
11977 }
11978 if (!fp_access_check(s)) {
11979 return;
11980 }
11981 handle_shll(s, is_q, size, rn, rd);
11982 return;
11983 case 0xa: /* CMLT */
11984 if (u == 1) {
11985 unallocated_encoding(s);
11986 return;
11987 }
11988 /* fall through */
11989 case 0x8: /* CMGT, CMGE */
11990 case 0x9: /* CMEQ, CMLE */
11991 case 0xb: /* ABS, NEG */
11992 if (size == 3 && !is_q) {
11993 unallocated_encoding(s);
11994 return;
11995 }
11996 break;
11997 case 0x3: /* SUQADD, USQADD */
11998 if (size == 3 && !is_q) {
11999 unallocated_encoding(s);
12000 return;
12001 }
12002 if (!fp_access_check(s)) {
12003 return;
12004 }
12005 handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12006 return;
12007 case 0x7: /* SQABS, SQNEG */
12008 if (size == 3 && !is_q) {
12009 unallocated_encoding(s);
12010 return;
12011 }
12012 break;
12013 case 0xc ... 0xf:
12014 case 0x16 ... 0x1f:
12015 {
12016 /* Floating point: U, size[1] and opcode indicate operation;
12017 * size[0] indicates single or double precision.
12018 */
12019 int is_double = extract32(size, 0, 1);
12020 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12021 size = is_double ? 3 : 2;
12022 switch (opcode) {
12023 case 0x2f: /* FABS */
12024 case 0x6f: /* FNEG */
12025 if (size == 3 && !is_q) {
12026 unallocated_encoding(s);
12027 return;
12028 }
12029 break;
12030 case 0x1d: /* SCVTF */
12031 case 0x5d: /* UCVTF */
12032 {
12033 bool is_signed = (opcode == 0x1d) ? true : false;
12034 int elements = is_double ? 2 : is_q ? 4 : 2;
12035 if (is_double && !is_q) {
12036 unallocated_encoding(s);
12037 return;
12038 }
12039 if (!fp_access_check(s)) {
12040 return;
12041 }
12042 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12043 return;
12044 }
12045 case 0x2c: /* FCMGT (zero) */
12046 case 0x2d: /* FCMEQ (zero) */
12047 case 0x2e: /* FCMLT (zero) */
12048 case 0x6c: /* FCMGE (zero) */
12049 case 0x6d: /* FCMLE (zero) */
12050 if (size == 3 && !is_q) {
12051 unallocated_encoding(s);
12052 return;
12053 }
12054 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12055 return;
12056 case 0x7f: /* FSQRT */
12057 if (size == 3 && !is_q) {
12058 unallocated_encoding(s);
12059 return;
12060 }
12061 break;
12062 case 0x1a: /* FCVTNS */
12063 case 0x1b: /* FCVTMS */
12064 case 0x3a: /* FCVTPS */
12065 case 0x3b: /* FCVTZS */
12066 case 0x5a: /* FCVTNU */
12067 case 0x5b: /* FCVTMU */
12068 case 0x7a: /* FCVTPU */
12069 case 0x7b: /* FCVTZU */
12070 need_fpstatus = true;
12071 need_rmode = true;
12072 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12073 if (size == 3 && !is_q) {
12074 unallocated_encoding(s);
12075 return;
12076 }
12077 break;
12078 case 0x5c: /* FCVTAU */
12079 case 0x1c: /* FCVTAS */
12080 need_fpstatus = true;
12081 need_rmode = true;
12082 rmode = FPROUNDING_TIEAWAY;
12083 if (size == 3 && !is_q) {
12084 unallocated_encoding(s);
12085 return;
12086 }
12087 break;
12088 case 0x3c: /* URECPE */
12089 if (size == 3) {
12090 unallocated_encoding(s);
12091 return;
12092 }
12093 /* fall through */
12094 case 0x3d: /* FRECPE */
12095 case 0x7d: /* FRSQRTE */
12096 if (size == 3 && !is_q) {
12097 unallocated_encoding(s);
12098 return;
12099 }
12100 if (!fp_access_check(s)) {
12101 return;
12102 }
12103 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12104 return;
12105 case 0x56: /* FCVTXN, FCVTXN2 */
12106 if (size == 2) {
12107 unallocated_encoding(s);
12108 return;
12109 }
12110 /* fall through */
12111 case 0x16: /* FCVTN, FCVTN2 */
12112 /* handle_2misc_narrow does a 2*size -> size operation, but these
12113 * instructions encode the source size rather than dest size.
12114 */
12115 if (!fp_access_check(s)) {
12116 return;
12117 }
12118 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12119 return;
12120 case 0x17: /* FCVTL, FCVTL2 */
12121 if (!fp_access_check(s)) {
12122 return;
12123 }
12124 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12125 return;
12126 case 0x18: /* FRINTN */
12127 case 0x19: /* FRINTM */
12128 case 0x38: /* FRINTP */
12129 case 0x39: /* FRINTZ */
12130 need_rmode = true;
12131 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12132 /* fall through */
12133 case 0x59: /* FRINTX */
12134 case 0x79: /* FRINTI */
12135 need_fpstatus = true;
12136 if (size == 3 && !is_q) {
12137 unallocated_encoding(s);
12138 return;
12139 }
12140 break;
12141 case 0x58: /* FRINTA */
12142 need_rmode = true;
12143 rmode = FPROUNDING_TIEAWAY;
12144 need_fpstatus = true;
12145 if (size == 3 && !is_q) {
12146 unallocated_encoding(s);
12147 return;
12148 }
12149 break;
12150 case 0x7c: /* URSQRTE */
12151 if (size == 3) {
12152 unallocated_encoding(s);
12153 return;
12154 }
12155 need_fpstatus = true;
12156 break;
12157 case 0x1e: /* FRINT32Z */
12158 case 0x1f: /* FRINT64Z */
12159 need_rmode = true;
12160 rmode = FPROUNDING_ZERO;
12161 /* fall through */
12162 case 0x5e: /* FRINT32X */
12163 case 0x5f: /* FRINT64X */
12164 need_fpstatus = true;
12165 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12166 unallocated_encoding(s);
12167 return;
12168 }
12169 break;
12170 default:
12171 unallocated_encoding(s);
12172 return;
12173 }
12174 break;
12175 }
12176 default:
12177 unallocated_encoding(s);
12178 return;
12179 }
12180
12181 if (!fp_access_check(s)) {
12182 return;
12183 }
12184
12185 if (need_fpstatus || need_rmode) {
12186 tcg_fpstatus = get_fpstatus_ptr(false);
12187 } else {
12188 tcg_fpstatus = NULL;
12189 }
12190 if (need_rmode) {
12191 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12192 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12193 } else {
12194 tcg_rmode = NULL;
12195 }
12196
12197 switch (opcode) {
12198 case 0x5:
12199 if (u && size == 0) { /* NOT */
12200 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12201 return;
12202 }
12203 break;
12204 case 0xb:
12205 if (u) { /* ABS, NEG */
12206 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12207 } else {
12208 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12209 }
12210 return;
12211 }
12212
12213 if (size == 3) {
12214 /* All 64-bit element operations can be shared with scalar 2misc */
12215 int pass;
12216
12217 /* Coverity claims (size == 3 && !is_q) has been eliminated
12218 * from all paths leading to here.
12219 */
12220 tcg_debug_assert(is_q);
12221 for (pass = 0; pass < 2; pass++) {
12222 TCGv_i64 tcg_op = tcg_temp_new_i64();
12223 TCGv_i64 tcg_res = tcg_temp_new_i64();
12224
12225 read_vec_element(s, tcg_op, rn, pass, MO_64);
12226
12227 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12228 tcg_rmode, tcg_fpstatus);
12229
12230 write_vec_element(s, tcg_res, rd, pass, MO_64);
12231
12232 tcg_temp_free_i64(tcg_res);
12233 tcg_temp_free_i64(tcg_op);
12234 }
12235 } else {
12236 int pass;
12237
12238 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12239 TCGv_i32 tcg_op = tcg_temp_new_i32();
12240 TCGv_i32 tcg_res = tcg_temp_new_i32();
12241 TCGCond cond;
12242
12243 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12244
12245 if (size == 2) {
12246 /* Special cases for 32 bit elements */
12247 switch (opcode) {
12248 case 0xa: /* CMLT */
12249 /* 32 bit integer comparison against zero, result is
12250 * test ? (2^32 - 1) : 0. We implement via setcond(test)
12251 * and inverting.
12252 */
12253 cond = TCG_COND_LT;
12254 do_cmop:
12255 tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12256 tcg_gen_neg_i32(tcg_res, tcg_res);
12257 break;
12258 case 0x8: /* CMGT, CMGE */
12259 cond = u ? TCG_COND_GE : TCG_COND_GT;
12260 goto do_cmop;
12261 case 0x9: /* CMEQ, CMLE */
12262 cond = u ? TCG_COND_LE : TCG_COND_EQ;
12263 goto do_cmop;
12264 case 0x4: /* CLS */
12265 if (u) {
12266 tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12267 } else {
12268 tcg_gen_clrsb_i32(tcg_res, tcg_op);
12269 }
12270 break;
12271 case 0x7: /* SQABS, SQNEG */
12272 if (u) {
12273 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12274 } else {
12275 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12276 }
12277 break;
12278 case 0x2f: /* FABS */
12279 gen_helper_vfp_abss(tcg_res, tcg_op);
12280 break;
12281 case 0x6f: /* FNEG */
12282 gen_helper_vfp_negs(tcg_res, tcg_op);
12283 break;
12284 case 0x7f: /* FSQRT */
12285 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12286 break;
12287 case 0x1a: /* FCVTNS */
12288 case 0x1b: /* FCVTMS */
12289 case 0x1c: /* FCVTAS */
12290 case 0x3a: /* FCVTPS */
12291 case 0x3b: /* FCVTZS */
12292 {
12293 TCGv_i32 tcg_shift = tcg_const_i32(0);
12294 gen_helper_vfp_tosls(tcg_res, tcg_op,
12295 tcg_shift, tcg_fpstatus);
12296 tcg_temp_free_i32(tcg_shift);
12297 break;
12298 }
12299 case 0x5a: /* FCVTNU */
12300 case 0x5b: /* FCVTMU */
12301 case 0x5c: /* FCVTAU */
12302 case 0x7a: /* FCVTPU */
12303 case 0x7b: /* FCVTZU */
12304 {
12305 TCGv_i32 tcg_shift = tcg_const_i32(0);
12306 gen_helper_vfp_touls(tcg_res, tcg_op,
12307 tcg_shift, tcg_fpstatus);
12308 tcg_temp_free_i32(tcg_shift);
12309 break;
12310 }
12311 case 0x18: /* FRINTN */
12312 case 0x19: /* FRINTM */
12313 case 0x38: /* FRINTP */
12314 case 0x39: /* FRINTZ */
12315 case 0x58: /* FRINTA */
12316 case 0x79: /* FRINTI */
12317 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12318 break;
12319 case 0x59: /* FRINTX */
12320 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12321 break;
12322 case 0x7c: /* URSQRTE */
12323 gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12324 break;
12325 case 0x1e: /* FRINT32Z */
12326 case 0x5e: /* FRINT32X */
12327 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12328 break;
12329 case 0x1f: /* FRINT64Z */
12330 case 0x5f: /* FRINT64X */
12331 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12332 break;
12333 default:
12334 g_assert_not_reached();
12335 }
12336 } else {
12337 /* Use helpers for 8 and 16 bit elements */
12338 switch (opcode) {
12339 case 0x5: /* CNT, RBIT */
12340 /* For these two insns size is part of the opcode specifier
12341 * (handled earlier); they always operate on byte elements.
12342 */
12343 if (u) {
12344 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12345 } else {
12346 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12347 }
12348 break;
12349 case 0x7: /* SQABS, SQNEG */
12350 {
12351 NeonGenOneOpEnvFn *genfn;
12352 static NeonGenOneOpEnvFn * const fns[2][2] = {
12353 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12354 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12355 };
12356 genfn = fns[size][u];
12357 genfn(tcg_res, cpu_env, tcg_op);
12358 break;
12359 }
12360 case 0x8: /* CMGT, CMGE */
12361 case 0x9: /* CMEQ, CMLE */
12362 case 0xa: /* CMLT */
12363 {
12364 static NeonGenTwoOpFn * const fns[3][2] = {
12365 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12366 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12367 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12368 };
12369 NeonGenTwoOpFn *genfn;
12370 int comp;
12371 bool reverse;
12372 TCGv_i32 tcg_zero = tcg_const_i32(0);
12373
12374 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12375 comp = (opcode - 0x8) * 2 + u;
12376 /* ...but LE, LT are implemented as reverse GE, GT */
12377 reverse = (comp > 2);
12378 if (reverse) {
12379 comp = 4 - comp;
12380 }
12381 genfn = fns[comp][size];
12382 if (reverse) {
12383 genfn(tcg_res, tcg_zero, tcg_op);
12384 } else {
12385 genfn(tcg_res, tcg_op, tcg_zero);
12386 }
12387 tcg_temp_free_i32(tcg_zero);
12388 break;
12389 }
12390 case 0x4: /* CLS, CLZ */
12391 if (u) {
12392 if (size == 0) {
12393 gen_helper_neon_clz_u8(tcg_res, tcg_op);
12394 } else {
12395 gen_helper_neon_clz_u16(tcg_res, tcg_op);
12396 }
12397 } else {
12398 if (size == 0) {
12399 gen_helper_neon_cls_s8(tcg_res, tcg_op);
12400 } else {
12401 gen_helper_neon_cls_s16(tcg_res, tcg_op);
12402 }
12403 }
12404 break;
12405 default:
12406 g_assert_not_reached();
12407 }
12408 }
12409
12410 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12411
12412 tcg_temp_free_i32(tcg_res);
12413 tcg_temp_free_i32(tcg_op);
12414 }
12415 }
12416 clear_vec_high(s, is_q, rd);
12417
12418 if (need_rmode) {
12419 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12420 tcg_temp_free_i32(tcg_rmode);
12421 }
12422 if (need_fpstatus) {
12423 tcg_temp_free_ptr(tcg_fpstatus);
12424 }
12425}
12426
12427/* AdvSIMD [scalar] two register miscellaneous (FP16)
12428 *
12429 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
12430 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12431 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
12432 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12433 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12434 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12435 *
12436 * This actually covers two groups where scalar access is governed by
12437 * bit 28. A bunch of the instructions (float to integral) only exist
12438 * in the vector form and are un-allocated for the scalar decode. Also
12439 * in the scalar decode Q is always 1.
12440 */
12441static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12442{
12443 int fpop, opcode, a, u;
12444 int rn, rd;
12445 bool is_q;
12446 bool is_scalar;
12447 bool only_in_vector = false;
12448
12449 int pass;
12450 TCGv_i32 tcg_rmode = NULL;
12451 TCGv_ptr tcg_fpstatus = NULL;
12452 bool need_rmode = false;
12453 bool need_fpst = true;
12454 int rmode;
12455
12456 if (!dc_isar_feature(aa64_fp16, s)) {
12457 unallocated_encoding(s);
12458 return;
12459 }
12460
12461 rd = extract32(insn, 0, 5);
12462 rn = extract32(insn, 5, 5);
12463
12464 a = extract32(insn, 23, 1);
12465 u = extract32(insn, 29, 1);
12466 is_scalar = extract32(insn, 28, 1);
12467 is_q = extract32(insn, 30, 1);
12468
12469 opcode = extract32(insn, 12, 5);
12470 fpop = deposit32(opcode, 5, 1, a);
12471 fpop = deposit32(fpop, 6, 1, u);
12472
12473 rd = extract32(insn, 0, 5);
12474 rn = extract32(insn, 5, 5);
12475
12476 switch (fpop) {
12477 case 0x1d: /* SCVTF */
12478 case 0x5d: /* UCVTF */
12479 {
12480 int elements;
12481
12482 if (is_scalar) {
12483 elements = 1;
12484 } else {
12485 elements = (is_q ? 8 : 4);
12486 }
12487
12488 if (!fp_access_check(s)) {
12489 return;
12490 }
12491 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12492 return;
12493 }
12494 break;
12495 case 0x2c: /* FCMGT (zero) */
12496 case 0x2d: /* FCMEQ (zero) */
12497 case 0x2e: /* FCMLT (zero) */
12498 case 0x6c: /* FCMGE (zero) */
12499 case 0x6d: /* FCMLE (zero) */
12500 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12501 return;
12502 case 0x3d: /* FRECPE */
12503 case 0x3f: /* FRECPX */
12504 break;
12505 case 0x18: /* FRINTN */
12506 need_rmode = true;
12507 only_in_vector = true;
12508 rmode = FPROUNDING_TIEEVEN;
12509 break;
12510 case 0x19: /* FRINTM */
12511 need_rmode = true;
12512 only_in_vector = true;
12513 rmode = FPROUNDING_NEGINF;
12514 break;
12515 case 0x38: /* FRINTP */
12516 need_rmode = true;
12517 only_in_vector = true;
12518 rmode = FPROUNDING_POSINF;
12519 break;
12520 case 0x39: /* FRINTZ */
12521 need_rmode = true;
12522 only_in_vector = true;
12523 rmode = FPROUNDING_ZERO;
12524 break;
12525 case 0x58: /* FRINTA */
12526 need_rmode = true;
12527 only_in_vector = true;
12528 rmode = FPROUNDING_TIEAWAY;
12529 break;
12530 case 0x59: /* FRINTX */
12531 case 0x79: /* FRINTI */
12532 only_in_vector = true;
12533 /* current rounding mode */
12534 break;
12535 case 0x1a: /* FCVTNS */
12536 need_rmode = true;
12537 rmode = FPROUNDING_TIEEVEN;
12538 break;
12539 case 0x1b: /* FCVTMS */
12540 need_rmode = true;
12541 rmode = FPROUNDING_NEGINF;
12542 break;
12543 case 0x1c: /* FCVTAS */
12544 need_rmode = true;
12545 rmode = FPROUNDING_TIEAWAY;
12546 break;
12547 case 0x3a: /* FCVTPS */
12548 need_rmode = true;
12549 rmode = FPROUNDING_POSINF;
12550 break;
12551 case 0x3b: /* FCVTZS */
12552 need_rmode = true;
12553 rmode = FPROUNDING_ZERO;
12554 break;
12555 case 0x5a: /* FCVTNU */
12556 need_rmode = true;
12557 rmode = FPROUNDING_TIEEVEN;
12558 break;
12559 case 0x5b: /* FCVTMU */
12560 need_rmode = true;
12561 rmode = FPROUNDING_NEGINF;
12562 break;
12563 case 0x5c: /* FCVTAU */
12564 need_rmode = true;
12565 rmode = FPROUNDING_TIEAWAY;
12566 break;
12567 case 0x7a: /* FCVTPU */
12568 need_rmode = true;
12569 rmode = FPROUNDING_POSINF;
12570 break;
12571 case 0x7b: /* FCVTZU */
12572 need_rmode = true;
12573 rmode = FPROUNDING_ZERO;
12574 break;
12575 case 0x2f: /* FABS */
12576 case 0x6f: /* FNEG */
12577 need_fpst = false;
12578 break;
12579 case 0x7d: /* FRSQRTE */
12580 case 0x7f: /* FSQRT (vector) */
12581 break;
12582 default:
12583 fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12584 g_assert_not_reached();
12585 }
12586
12587
12588 /* Check additional constraints for the scalar encoding */
12589 if (is_scalar) {
12590 if (!is_q) {
12591 unallocated_encoding(s);
12592 return;
12593 }
12594 /* FRINTxx is only in the vector form */
12595 if (only_in_vector) {
12596 unallocated_encoding(s);
12597 return;
12598 }
12599 }
12600
12601 if (!fp_access_check(s)) {
12602 return;
12603 }
12604
12605 if (need_rmode || need_fpst) {
12606 tcg_fpstatus = get_fpstatus_ptr(true);
12607 }
12608
12609 if (need_rmode) {
12610 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12611 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12612 }
12613
12614 if (is_scalar) {
12615 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12616 TCGv_i32 tcg_res = tcg_temp_new_i32();
12617
12618 switch (fpop) {
12619 case 0x1a: /* FCVTNS */
12620 case 0x1b: /* FCVTMS */
12621 case 0x1c: /* FCVTAS */
12622 case 0x3a: /* FCVTPS */
12623 case 0x3b: /* FCVTZS */
12624 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12625 break;
12626 case 0x3d: /* FRECPE */
12627 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12628 break;
12629 case 0x3f: /* FRECPX */
12630 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12631 break;
12632 case 0x5a: /* FCVTNU */
12633 case 0x5b: /* FCVTMU */
12634 case 0x5c: /* FCVTAU */
12635 case 0x7a: /* FCVTPU */
12636 case 0x7b: /* FCVTZU */
12637 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12638 break;
12639 case 0x6f: /* FNEG */
12640 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12641 break;
12642 case 0x7d: /* FRSQRTE */
12643 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12644 break;
12645 default:
12646 g_assert_not_reached();
12647 }
12648
12649 /* limit any sign extension going on */
12650 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12651 write_fp_sreg(s, rd, tcg_res);
12652
12653 tcg_temp_free_i32(tcg_res);
12654 tcg_temp_free_i32(tcg_op);
12655 } else {
12656 for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12657 TCGv_i32 tcg_op = tcg_temp_new_i32();
12658 TCGv_i32 tcg_res = tcg_temp_new_i32();
12659
12660 read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12661
12662 switch (fpop) {
12663 case 0x1a: /* FCVTNS */
12664 case 0x1b: /* FCVTMS */
12665 case 0x1c: /* FCVTAS */
12666 case 0x3a: /* FCVTPS */
12667 case 0x3b: /* FCVTZS */
12668 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12669 break;
12670 case 0x3d: /* FRECPE */
12671 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12672 break;
12673 case 0x5a: /* FCVTNU */
12674 case 0x5b: /* FCVTMU */
12675 case 0x5c: /* FCVTAU */
12676 case 0x7a: /* FCVTPU */
12677 case 0x7b: /* FCVTZU */
12678 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12679 break;
12680 case 0x18: /* FRINTN */
12681 case 0x19: /* FRINTM */
12682 case 0x38: /* FRINTP */
12683 case 0x39: /* FRINTZ */
12684 case 0x58: /* FRINTA */
12685 case 0x79: /* FRINTI */
12686 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12687 break;
12688 case 0x59: /* FRINTX */
12689 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12690 break;
12691 case 0x2f: /* FABS */
12692 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12693 break;
12694 case 0x6f: /* FNEG */
12695 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12696 break;
12697 case 0x7d: /* FRSQRTE */
12698 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12699 break;
12700 case 0x7f: /* FSQRT */
12701 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12702 break;
12703 default:
12704 g_assert_not_reached();
12705 }
12706
12707 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12708
12709 tcg_temp_free_i32(tcg_res);
12710 tcg_temp_free_i32(tcg_op);
12711 }
12712
12713 clear_vec_high(s, is_q, rd);
12714 }
12715
12716 if (tcg_rmode) {
12717 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12718 tcg_temp_free_i32(tcg_rmode);
12719 }
12720
12721 if (tcg_fpstatus) {
12722 tcg_temp_free_ptr(tcg_fpstatus);
12723 }
12724}
12725
12726/* AdvSIMD scalar x indexed element
12727 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12728 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12729 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12730 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12731 * AdvSIMD vector x indexed element
12732 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12733 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12734 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12735 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12736 */
12737static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12738{
12739 /* This encoding has two kinds of instruction:
12740 * normal, where we perform elt x idxelt => elt for each
12741 * element in the vector
12742 * long, where we perform elt x idxelt and generate a result of
12743 * double the width of the input element
12744 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12745 */
12746 bool is_scalar = extract32(insn, 28, 1);
12747 bool is_q = extract32(insn, 30, 1);
12748 bool u = extract32(insn, 29, 1);
12749 int size = extract32(insn, 22, 2);
12750 int l = extract32(insn, 21, 1);
12751 int m = extract32(insn, 20, 1);
12752 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12753 int rm = extract32(insn, 16, 4);
12754 int opcode = extract32(insn, 12, 4);
12755 int h = extract32(insn, 11, 1);
12756 int rn = extract32(insn, 5, 5);
12757 int rd = extract32(insn, 0, 5);
12758 bool is_long = false;
12759 int is_fp = 0;
12760 bool is_fp16 = false;
12761 int index;
12762 TCGv_ptr fpst;
12763
12764 switch (16 * u + opcode) {
12765 case 0x08: /* MUL */
12766 case 0x10: /* MLA */
12767 case 0x14: /* MLS */
12768 if (is_scalar) {
12769 unallocated_encoding(s);
12770 return;
12771 }
12772 break;
12773 case 0x02: /* SMLAL, SMLAL2 */
12774 case 0x12: /* UMLAL, UMLAL2 */
12775 case 0x06: /* SMLSL, SMLSL2 */
12776 case 0x16: /* UMLSL, UMLSL2 */
12777 case 0x0a: /* SMULL, SMULL2 */
12778 case 0x1a: /* UMULL, UMULL2 */
12779 if (is_scalar) {
12780 unallocated_encoding(s);
12781 return;
12782 }
12783 is_long = true;
12784 break;
12785 case 0x03: /* SQDMLAL, SQDMLAL2 */
12786 case 0x07: /* SQDMLSL, SQDMLSL2 */
12787 case 0x0b: /* SQDMULL, SQDMULL2 */
12788 is_long = true;
12789 break;
12790 case 0x0c: /* SQDMULH */
12791 case 0x0d: /* SQRDMULH */
12792 break;
12793 case 0x01: /* FMLA */
12794 case 0x05: /* FMLS */
12795 case 0x09: /* FMUL */
12796 case 0x19: /* FMULX */
12797 is_fp = 1;
12798 break;
12799 case 0x1d: /* SQRDMLAH */
12800 case 0x1f: /* SQRDMLSH */
12801 if (!dc_isar_feature(aa64_rdm, s)) {
12802 unallocated_encoding(s);
12803 return;
12804 }
12805 break;
12806 case 0x0e: /* SDOT */
12807 case 0x1e: /* UDOT */
12808 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12809 unallocated_encoding(s);
12810 return;
12811 }
12812 break;
12813 case 0x11: /* FCMLA #0 */
12814 case 0x13: /* FCMLA #90 */
12815 case 0x15: /* FCMLA #180 */
12816 case 0x17: /* FCMLA #270 */
12817 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12818 unallocated_encoding(s);
12819 return;
12820 }
12821 is_fp = 2;
12822 break;
12823 case 0x00: /* FMLAL */
12824 case 0x04: /* FMLSL */
12825 case 0x18: /* FMLAL2 */
12826 case 0x1c: /* FMLSL2 */
12827 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12828 unallocated_encoding(s);
12829 return;
12830 }
12831 size = MO_16;
12832 /* is_fp, but we pass cpu_env not fp_status. */
12833 break;
12834 default:
12835 unallocated_encoding(s);
12836 return;
12837 }
12838
12839 switch (is_fp) {
12840 case 1: /* normal fp */
12841 /* convert insn encoded size to MemOp size */
12842 switch (size) {
12843 case 0: /* half-precision */
12844 size = MO_16;
12845 is_fp16 = true;
12846 break;
12847 case MO_32: /* single precision */
12848 case MO_64: /* double precision */
12849 break;
12850 default:
12851 unallocated_encoding(s);
12852 return;
12853 }
12854 break;
12855
12856 case 2: /* complex fp */
12857 /* Each indexable element is a complex pair. */
12858 size += 1;
12859 switch (size) {
12860 case MO_32:
12861 if (h && !is_q) {
12862 unallocated_encoding(s);
12863 return;
12864 }
12865 is_fp16 = true;
12866 break;
12867 case MO_64:
12868 break;
12869 default:
12870 unallocated_encoding(s);
12871 return;
12872 }
12873 break;
12874
12875 default: /* integer */
12876 switch (size) {
12877 case MO_8:
12878 case MO_64:
12879 unallocated_encoding(s);
12880 return;
12881 }
12882 break;
12883 }
12884 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12885 unallocated_encoding(s);
12886 return;
12887 }
12888
12889 /* Given MemOp size, adjust register and indexing. */
12890 switch (size) {
12891 case MO_16:
12892 index = h << 2 | l << 1 | m;
12893 break;
12894 case MO_32:
12895 index = h << 1 | l;
12896 rm |= m << 4;
12897 break;
12898 case MO_64:
12899 if (l || !is_q) {
12900 unallocated_encoding(s);
12901 return;
12902 }
12903 index = h;
12904 rm |= m << 4;
12905 break;
12906 default:
12907 g_assert_not_reached();
12908 }
12909
12910 if (!fp_access_check(s)) {
12911 return;
12912 }
12913
12914 if (is_fp) {
12915 fpst = get_fpstatus_ptr(is_fp16);
12916 } else {
12917 fpst = NULL;
12918 }
12919
12920 switch (16 * u + opcode) {
12921 case 0x0e: /* SDOT */
12922 case 0x1e: /* UDOT */
12923 gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12924 u ? gen_helper_gvec_udot_idx_b
12925 : gen_helper_gvec_sdot_idx_b);
12926 return;
12927 case 0x11: /* FCMLA #0 */
12928 case 0x13: /* FCMLA #90 */
12929 case 0x15: /* FCMLA #180 */
12930 case 0x17: /* FCMLA #270 */
12931 {
12932 int rot = extract32(insn, 13, 2);
12933 int data = (index << 2) | rot;
12934 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12935 vec_full_reg_offset(s, rn),
12936 vec_full_reg_offset(s, rm), fpst,
12937 is_q ? 16 : 8, vec_full_reg_size(s), data,
12938 size == MO_64
12939 ? gen_helper_gvec_fcmlas_idx
12940 : gen_helper_gvec_fcmlah_idx);
12941 tcg_temp_free_ptr(fpst);
12942 }
12943 return;
12944
12945 case 0x00: /* FMLAL */
12946 case 0x04: /* FMLSL */
12947 case 0x18: /* FMLAL2 */
12948 case 0x1c: /* FMLSL2 */
12949 {
12950 int is_s = extract32(opcode, 2, 1);
12951 int is_2 = u;
12952 int data = (index << 2) | (is_2 << 1) | is_s;
12953 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12954 vec_full_reg_offset(s, rn),
12955 vec_full_reg_offset(s, rm), cpu_env,
12956 is_q ? 16 : 8, vec_full_reg_size(s),
12957 data, gen_helper_gvec_fmlal_idx_a64);
12958 }
12959 return;
12960 }
12961
12962 if (size == 3) {
12963 TCGv_i64 tcg_idx = tcg_temp_new_i64();
12964 int pass;
12965
12966 assert(is_fp && is_q && !is_long);
12967
12968 read_vec_element(s, tcg_idx, rm, index, MO_64);
12969
12970 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12971 TCGv_i64 tcg_op = tcg_temp_new_i64();
12972 TCGv_i64 tcg_res = tcg_temp_new_i64();
12973
12974 read_vec_element(s, tcg_op, rn, pass, MO_64);
12975
12976 switch (16 * u + opcode) {
12977 case 0x05: /* FMLS */
12978 /* As usual for ARM, separate negation for fused multiply-add */
12979 gen_helper_vfp_negd(tcg_op, tcg_op);
12980 /* fall through */
12981 case 0x01: /* FMLA */
12982 read_vec_element(s, tcg_res, rd, pass, MO_64);
12983 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12984 break;
12985 case 0x09: /* FMUL */
12986 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12987 break;
12988 case 0x19: /* FMULX */
12989 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12990 break;
12991 default:
12992 g_assert_not_reached();
12993 }
12994
12995 write_vec_element(s, tcg_res, rd, pass, MO_64);
12996 tcg_temp_free_i64(tcg_op);
12997 tcg_temp_free_i64(tcg_res);
12998 }
12999
13000 tcg_temp_free_i64(tcg_idx);
13001 clear_vec_high(s, !is_scalar, rd);
13002 } else if (!is_long) {
13003 /* 32 bit floating point, or 16 or 32 bit integer.
13004 * For the 16 bit scalar case we use the usual Neon helpers and
13005 * rely on the fact that 0 op 0 == 0 with no side effects.
13006 */
13007 TCGv_i32 tcg_idx = tcg_temp_new_i32();
13008 int pass, maxpasses;
13009
13010 if (is_scalar) {
13011 maxpasses = 1;
13012 } else {
13013 maxpasses = is_q ? 4 : 2;
13014 }
13015
13016 read_vec_element_i32(s, tcg_idx, rm, index, size);
13017
13018 if (size == 1 && !is_scalar) {
13019 /* The simplest way to handle the 16x16 indexed ops is to duplicate
13020 * the index into both halves of the 32 bit tcg_idx and then use
13021 * the usual Neon helpers.
13022 */
13023 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13024 }
13025
13026 for (pass = 0; pass < maxpasses; pass++) {
13027 TCGv_i32 tcg_op = tcg_temp_new_i32();
13028 TCGv_i32 tcg_res = tcg_temp_new_i32();
13029
13030 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13031
13032 switch (16 * u + opcode) {
13033 case 0x08: /* MUL */
13034 case 0x10: /* MLA */
13035 case 0x14: /* MLS */
13036 {
13037 static NeonGenTwoOpFn * const fns[2][2] = {
13038 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13039 { tcg_gen_add_i32, tcg_gen_sub_i32 },
13040 };
13041 NeonGenTwoOpFn *genfn;
13042 bool is_sub = opcode == 0x4;
13043
13044 if (size == 1) {
13045 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13046 } else {
13047 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13048 }
13049 if (opcode == 0x8) {
13050 break;
13051 }
13052 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13053 genfn = fns[size - 1][is_sub];
13054 genfn(tcg_res, tcg_op, tcg_res);
13055 break;
13056 }
13057 case 0x05: /* FMLS */
13058 case 0x01: /* FMLA */
13059 read_vec_element_i32(s, tcg_res, rd, pass,
13060 is_scalar ? size : MO_32);
13061 switch (size) {
13062 case 1:
13063 if (opcode == 0x5) {
13064 /* As usual for ARM, separate negation for fused
13065 * multiply-add */
13066 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13067 }
13068 if (is_scalar) {
13069 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13070 tcg_res, fpst);
13071 } else {
13072 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13073 tcg_res, fpst);
13074 }
13075 break;
13076 case 2:
13077 if (opcode == 0x5) {
13078 /* As usual for ARM, separate negation for
13079 * fused multiply-add */
13080 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13081 }
13082 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13083 tcg_res, fpst);
13084 break;
13085 default:
13086 g_assert_not_reached();
13087 }
13088 break;
13089 case 0x09: /* FMUL */
13090 switch (size) {
13091 case 1:
13092 if (is_scalar) {
13093 gen_helper_advsimd_mulh(tcg_res, tcg_op,
13094 tcg_idx, fpst);
13095 } else {
13096 gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13097 tcg_idx, fpst);
13098 }
13099 break;
13100 case 2:
13101 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13102 break;
13103 default:
13104 g_assert_not_reached();
13105 }
13106 break;
13107 case 0x19: /* FMULX */
13108 switch (size) {
13109 case 1:
13110 if (is_scalar) {
13111 gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13112 tcg_idx, fpst);
13113 } else {
13114 gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13115 tcg_idx, fpst);
13116 }
13117 break;
13118 case 2:
13119 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13120 break;
13121 default:
13122 g_assert_not_reached();
13123 }
13124 break;
13125 case 0x0c: /* SQDMULH */
13126 if (size == 1) {
13127 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13128 tcg_op, tcg_idx);
13129 } else {
13130 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13131 tcg_op, tcg_idx);
13132 }
13133 break;
13134 case 0x0d: /* SQRDMULH */
13135 if (size == 1) {
13136 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13137 tcg_op, tcg_idx);
13138 } else {
13139 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13140 tcg_op, tcg_idx);
13141 }
13142 break;
13143 case 0x1d: /* SQRDMLAH */
13144 read_vec_element_i32(s, tcg_res, rd, pass,
13145 is_scalar ? size : MO_32);
13146 if (size == 1) {
13147 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13148 tcg_op, tcg_idx, tcg_res);
13149 } else {
13150 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13151 tcg_op, tcg_idx, tcg_res);
13152 }
13153 break;
13154 case 0x1f: /* SQRDMLSH */
13155 read_vec_element_i32(s, tcg_res, rd, pass,
13156 is_scalar ? size : MO_32);
13157 if (size == 1) {
13158 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13159 tcg_op, tcg_idx, tcg_res);
13160 } else {
13161 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13162 tcg_op, tcg_idx, tcg_res);
13163 }
13164 break;
13165 default:
13166 g_assert_not_reached();
13167 }
13168
13169 if (is_scalar) {
13170 write_fp_sreg(s, rd, tcg_res);
13171 } else {
13172 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13173 }
13174
13175 tcg_temp_free_i32(tcg_op);
13176 tcg_temp_free_i32(tcg_res);
13177 }
13178
13179 tcg_temp_free_i32(tcg_idx);
13180 clear_vec_high(s, is_q, rd);
13181 } else {
13182 /* long ops: 16x16->32 or 32x32->64 */
13183 TCGv_i64 tcg_res[2];
13184 int pass;
13185 bool satop = extract32(opcode, 0, 1);
13186 MemOp memop = MO_32;
13187
13188 if (satop || !u) {
13189 memop |= MO_SIGN;
13190 }
13191
13192 if (size == 2) {
13193 TCGv_i64 tcg_idx = tcg_temp_new_i64();
13194
13195 read_vec_element(s, tcg_idx, rm, index, memop);
13196
13197 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13198 TCGv_i64 tcg_op = tcg_temp_new_i64();
13199 TCGv_i64 tcg_passres;
13200 int passelt;
13201
13202 if (is_scalar) {
13203 passelt = 0;
13204 } else {
13205 passelt = pass + (is_q * 2);
13206 }
13207
13208 read_vec_element(s, tcg_op, rn, passelt, memop);
13209
13210 tcg_res[pass] = tcg_temp_new_i64();
13211
13212 if (opcode == 0xa || opcode == 0xb) {
13213 /* Non-accumulating ops */
13214 tcg_passres = tcg_res[pass];
13215 } else {
13216 tcg_passres = tcg_temp_new_i64();
13217 }
13218
13219 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13220 tcg_temp_free_i64(tcg_op);
13221
13222 if (satop) {
13223 /* saturating, doubling */
13224 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13225 tcg_passres, tcg_passres);
13226 }
13227
13228 if (opcode == 0xa || opcode == 0xb) {
13229 continue;
13230 }
13231
13232 /* Accumulating op: handle accumulate step */
13233 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13234
13235 switch (opcode) {
13236 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13237 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13238 break;
13239 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13240 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13241 break;
13242 case 0x7: /* SQDMLSL, SQDMLSL2 */
13243 tcg_gen_neg_i64(tcg_passres, tcg_passres);
13244 /* fall through */
13245 case 0x3: /* SQDMLAL, SQDMLAL2 */
13246 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13247 tcg_res[pass],
13248 tcg_passres);
13249 break;
13250 default:
13251 g_assert_not_reached();
13252 }
13253 tcg_temp_free_i64(tcg_passres);
13254 }
13255 tcg_temp_free_i64(tcg_idx);
13256
13257 clear_vec_high(s, !is_scalar, rd);
13258 } else {
13259 TCGv_i32 tcg_idx = tcg_temp_new_i32();
13260
13261 assert(size == 1);
13262 read_vec_element_i32(s, tcg_idx, rm, index, size);
13263
13264 if (!is_scalar) {
13265 /* The simplest way to handle the 16x16 indexed ops is to
13266 * duplicate the index into both halves of the 32 bit tcg_idx
13267 * and then use the usual Neon helpers.
13268 */
13269 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13270 }
13271
13272 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13273 TCGv_i32 tcg_op = tcg_temp_new_i32();
13274 TCGv_i64 tcg_passres;
13275
13276 if (is_scalar) {
13277 read_vec_element_i32(s, tcg_op, rn, pass, size);
13278 } else {
13279 read_vec_element_i32(s, tcg_op, rn,
13280 pass + (is_q * 2), MO_32);
13281 }
13282
13283 tcg_res[pass] = tcg_temp_new_i64();
13284
13285 if (opcode == 0xa || opcode == 0xb) {
13286 /* Non-accumulating ops */
13287 tcg_passres = tcg_res[pass];
13288 } else {
13289 tcg_passres = tcg_temp_new_i64();
13290 }
13291
13292 if (memop & MO_SIGN) {
13293 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13294 } else {
13295 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13296 }
13297 if (satop) {
13298 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13299 tcg_passres, tcg_passres);
13300 }
13301 tcg_temp_free_i32(tcg_op);
13302
13303 if (opcode == 0xa || opcode == 0xb) {
13304 continue;
13305 }
13306
13307 /* Accumulating op: handle accumulate step */
13308 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13309
13310 switch (opcode) {
13311 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13312 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13313 tcg_passres);
13314 break;
13315 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13316 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13317 tcg_passres);
13318 break;
13319 case 0x7: /* SQDMLSL, SQDMLSL2 */
13320 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13321 /* fall through */
13322 case 0x3: /* SQDMLAL, SQDMLAL2 */
13323 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13324 tcg_res[pass],
13325 tcg_passres);
13326 break;
13327 default:
13328 g_assert_not_reached();
13329 }
13330 tcg_temp_free_i64(tcg_passres);
13331 }
13332 tcg_temp_free_i32(tcg_idx);
13333
13334 if (is_scalar) {
13335 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13336 }
13337 }
13338
13339 if (is_scalar) {
13340 tcg_res[1] = tcg_const_i64(0);
13341 }
13342
13343 for (pass = 0; pass < 2; pass++) {
13344 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13345 tcg_temp_free_i64(tcg_res[pass]);
13346 }
13347 }
13348
13349 if (fpst) {
13350 tcg_temp_free_ptr(fpst);
13351 }
13352}
13353
13354/* Crypto AES
13355 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13356 * +-----------------+------+-----------+--------+-----+------+------+
13357 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13358 * +-----------------+------+-----------+--------+-----+------+------+
13359 */
13360static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13361{
13362 int size = extract32(insn, 22, 2);
13363 int opcode = extract32(insn, 12, 5);
13364 int rn = extract32(insn, 5, 5);
13365 int rd = extract32(insn, 0, 5);
13366 int decrypt;
13367 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13368 TCGv_i32 tcg_decrypt;
13369 CryptoThreeOpIntFn *genfn;
13370
13371 if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13372 unallocated_encoding(s);
13373 return;
13374 }
13375
13376 switch (opcode) {
13377 case 0x4: /* AESE */
13378 decrypt = 0;
13379 genfn = gen_helper_crypto_aese;
13380 break;
13381 case 0x6: /* AESMC */
13382 decrypt = 0;
13383 genfn = gen_helper_crypto_aesmc;
13384 break;
13385 case 0x5: /* AESD */
13386 decrypt = 1;
13387 genfn = gen_helper_crypto_aese;
13388 break;
13389 case 0x7: /* AESIMC */
13390 decrypt = 1;
13391 genfn = gen_helper_crypto_aesmc;
13392 break;
13393 default:
13394 unallocated_encoding(s);
13395 return;
13396 }
13397
13398 if (!fp_access_check(s)) {
13399 return;
13400 }
13401
13402 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13403 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13404 tcg_decrypt = tcg_const_i32(decrypt);
13405
13406 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13407
13408 tcg_temp_free_ptr(tcg_rd_ptr);
13409 tcg_temp_free_ptr(tcg_rn_ptr);
13410 tcg_temp_free_i32(tcg_decrypt);
13411}
13412
13413/* Crypto three-reg SHA
13414 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
13415 * +-----------------+------+---+------+---+--------+-----+------+------+
13416 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
13417 * +-----------------+------+---+------+---+--------+-----+------+------+
13418 */
13419static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13420{
13421 int size = extract32(insn, 22, 2);
13422 int opcode = extract32(insn, 12, 3);
13423 int rm = extract32(insn, 16, 5);
13424 int rn = extract32(insn, 5, 5);
13425 int rd = extract32(insn, 0, 5);
13426 CryptoThreeOpFn *genfn;
13427 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13428 bool feature;
13429
13430 if (size != 0) {
13431 unallocated_encoding(s);
13432 return;
13433 }
13434
13435 switch (opcode) {
13436 case 0: /* SHA1C */
13437 case 1: /* SHA1P */
13438 case 2: /* SHA1M */
13439 case 3: /* SHA1SU0 */
13440 genfn = NULL;
13441 feature = dc_isar_feature(aa64_sha1, s);
13442 break;
13443 case 4: /* SHA256H */
13444 genfn = gen_helper_crypto_sha256h;
13445 feature = dc_isar_feature(aa64_sha256, s);
13446 break;
13447 case 5: /* SHA256H2 */
13448 genfn = gen_helper_crypto_sha256h2;
13449 feature = dc_isar_feature(aa64_sha256, s);
13450 break;
13451 case 6: /* SHA256SU1 */
13452 genfn = gen_helper_crypto_sha256su1;
13453 feature = dc_isar_feature(aa64_sha256, s);
13454 break;
13455 default:
13456 unallocated_encoding(s);
13457 return;
13458 }
13459
13460 if (!feature) {
13461 unallocated_encoding(s);
13462 return;
13463 }
13464
13465 if (!fp_access_check(s)) {
13466 return;
13467 }
13468
13469 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13470 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13471 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13472
13473 if (genfn) {
13474 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13475 } else {
13476 TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13477
13478 gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13479 tcg_rm_ptr, tcg_opcode);
13480 tcg_temp_free_i32(tcg_opcode);
13481 }
13482
13483 tcg_temp_free_ptr(tcg_rd_ptr);
13484 tcg_temp_free_ptr(tcg_rn_ptr);
13485 tcg_temp_free_ptr(tcg_rm_ptr);
13486}
13487
13488/* Crypto two-reg SHA
13489 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13490 * +-----------------+------+-----------+--------+-----+------+------+
13491 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13492 * +-----------------+------+-----------+--------+-----+------+------+
13493 */
13494static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13495{
13496 int size = extract32(insn, 22, 2);
13497 int opcode = extract32(insn, 12, 5);
13498 int rn = extract32(insn, 5, 5);
13499 int rd = extract32(insn, 0, 5);
13500 CryptoTwoOpFn *genfn;
13501 bool feature;
13502 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13503
13504 if (size != 0) {
13505 unallocated_encoding(s);
13506 return;
13507 }
13508
13509 switch (opcode) {
13510 case 0: /* SHA1H */
13511 feature = dc_isar_feature(aa64_sha1, s);
13512 genfn = gen_helper_crypto_sha1h;
13513 break;
13514 case 1: /* SHA1SU1 */
13515 feature = dc_isar_feature(aa64_sha1, s);
13516 genfn = gen_helper_crypto_sha1su1;
13517 break;
13518 case 2: /* SHA256SU0 */
13519 feature = dc_isar_feature(aa64_sha256, s);
13520 genfn = gen_helper_crypto_sha256su0;
13521 break;
13522 default:
13523 unallocated_encoding(s);
13524 return;
13525 }
13526
13527 if (!feature) {
13528 unallocated_encoding(s);
13529 return;
13530 }
13531
13532 if (!fp_access_check(s)) {
13533 return;
13534 }
13535
13536 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13537 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13538
13539 genfn(tcg_rd_ptr, tcg_rn_ptr);
13540
13541 tcg_temp_free_ptr(tcg_rd_ptr);
13542 tcg_temp_free_ptr(tcg_rn_ptr);
13543}
13544
13545/* Crypto three-reg SHA512
13546 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13547 * +-----------------------+------+---+---+-----+--------+------+------+
13548 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
13549 * +-----------------------+------+---+---+-----+--------+------+------+
13550 */
13551static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13552{
13553 int opcode = extract32(insn, 10, 2);
13554 int o = extract32(insn, 14, 1);
13555 int rm = extract32(insn, 16, 5);
13556 int rn = extract32(insn, 5, 5);
13557 int rd = extract32(insn, 0, 5);
13558 bool feature;
13559 CryptoThreeOpFn *genfn;
13560
13561 if (o == 0) {
13562 switch (opcode) {
13563 case 0: /* SHA512H */
13564 feature = dc_isar_feature(aa64_sha512, s);
13565 genfn = gen_helper_crypto_sha512h;
13566 break;
13567 case 1: /* SHA512H2 */
13568 feature = dc_isar_feature(aa64_sha512, s);
13569 genfn = gen_helper_crypto_sha512h2;
13570 break;
13571 case 2: /* SHA512SU1 */
13572 feature = dc_isar_feature(aa64_sha512, s);
13573 genfn = gen_helper_crypto_sha512su1;
13574 break;
13575 case 3: /* RAX1 */
13576 feature = dc_isar_feature(aa64_sha3, s);
13577 genfn = NULL;
13578 break;
13579 }
13580 } else {
13581 switch (opcode) {
13582 case 0: /* SM3PARTW1 */
13583 feature = dc_isar_feature(aa64_sm3, s);
13584 genfn = gen_helper_crypto_sm3partw1;
13585 break;
13586 case 1: /* SM3PARTW2 */
13587 feature = dc_isar_feature(aa64_sm3, s);
13588 genfn = gen_helper_crypto_sm3partw2;
13589 break;
13590 case 2: /* SM4EKEY */
13591 feature = dc_isar_feature(aa64_sm4, s);
13592 genfn = gen_helper_crypto_sm4ekey;
13593 break;
13594 default:
13595 unallocated_encoding(s);
13596 return;
13597 }
13598 }
13599
13600 if (!feature) {
13601 unallocated_encoding(s);
13602 return;
13603 }
13604
13605 if (!fp_access_check(s)) {
13606 return;
13607 }
13608
13609 if (genfn) {
13610 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13611
13612 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13613 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13614 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13615
13616 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13617
13618 tcg_temp_free_ptr(tcg_rd_ptr);
13619 tcg_temp_free_ptr(tcg_rn_ptr);
13620 tcg_temp_free_ptr(tcg_rm_ptr);
13621 } else {
13622 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13623 int pass;
13624
13625 tcg_op1 = tcg_temp_new_i64();
13626 tcg_op2 = tcg_temp_new_i64();
13627 tcg_res[0] = tcg_temp_new_i64();
13628 tcg_res[1] = tcg_temp_new_i64();
13629
13630 for (pass = 0; pass < 2; pass++) {
13631 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13632 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13633
13634 tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13635 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13636 }
13637 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13638 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13639
13640 tcg_temp_free_i64(tcg_op1);
13641 tcg_temp_free_i64(tcg_op2);
13642 tcg_temp_free_i64(tcg_res[0]);
13643 tcg_temp_free_i64(tcg_res[1]);
13644 }
13645}
13646
13647/* Crypto two-reg SHA512
13648 * 31 12 11 10 9 5 4 0
13649 * +-----------------------------------------+--------+------+------+
13650 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
13651 * +-----------------------------------------+--------+------+------+
13652 */
13653static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13654{
13655 int opcode = extract32(insn, 10, 2);
13656 int rn = extract32(insn, 5, 5);
13657 int rd = extract32(insn, 0, 5);
13658 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13659 bool feature;
13660 CryptoTwoOpFn *genfn;
13661
13662 switch (opcode) {
13663 case 0: /* SHA512SU0 */
13664 feature = dc_isar_feature(aa64_sha512, s);
13665 genfn = gen_helper_crypto_sha512su0;
13666 break;
13667 case 1: /* SM4E */
13668 feature = dc_isar_feature(aa64_sm4, s);
13669 genfn = gen_helper_crypto_sm4e;
13670 break;
13671 default:
13672 unallocated_encoding(s);
13673 return;
13674 }
13675
13676 if (!feature) {
13677 unallocated_encoding(s);
13678 return;
13679 }
13680
13681 if (!fp_access_check(s)) {
13682 return;
13683 }
13684
13685 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13686 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13687
13688 genfn(tcg_rd_ptr, tcg_rn_ptr);
13689
13690 tcg_temp_free_ptr(tcg_rd_ptr);
13691 tcg_temp_free_ptr(tcg_rn_ptr);
13692}
13693
13694/* Crypto four-register
13695 * 31 23 22 21 20 16 15 14 10 9 5 4 0
13696 * +-------------------+-----+------+---+------+------+------+
13697 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
13698 * +-------------------+-----+------+---+------+------+------+
13699 */
13700static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13701{
13702 int op0 = extract32(insn, 21, 2);
13703 int rm = extract32(insn, 16, 5);
13704 int ra = extract32(insn, 10, 5);
13705 int rn = extract32(insn, 5, 5);
13706 int rd = extract32(insn, 0, 5);
13707 bool feature;
13708
13709 switch (op0) {
13710 case 0: /* EOR3 */
13711 case 1: /* BCAX */
13712 feature = dc_isar_feature(aa64_sha3, s);
13713 break;
13714 case 2: /* SM3SS1 */
13715 feature = dc_isar_feature(aa64_sm3, s);
13716 break;
13717 default:
13718 unallocated_encoding(s);
13719 return;
13720 }
13721
13722 if (!feature) {
13723 unallocated_encoding(s);
13724 return;
13725 }
13726
13727 if (!fp_access_check(s)) {
13728 return;
13729 }
13730
13731 if (op0 < 2) {
13732 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13733 int pass;
13734
13735 tcg_op1 = tcg_temp_new_i64();
13736 tcg_op2 = tcg_temp_new_i64();
13737 tcg_op3 = tcg_temp_new_i64();
13738 tcg_res[0] = tcg_temp_new_i64();
13739 tcg_res[1] = tcg_temp_new_i64();
13740
13741 for (pass = 0; pass < 2; pass++) {
13742 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13743 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13744 read_vec_element(s, tcg_op3, ra, pass, MO_64);
13745
13746 if (op0 == 0) {
13747 /* EOR3 */
13748 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13749 } else {
13750 /* BCAX */
13751 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13752 }
13753 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13754 }
13755 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13756 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13757
13758 tcg_temp_free_i64(tcg_op1);
13759 tcg_temp_free_i64(tcg_op2);
13760 tcg_temp_free_i64(tcg_op3);
13761 tcg_temp_free_i64(tcg_res[0]);
13762 tcg_temp_free_i64(tcg_res[1]);
13763 } else {
13764 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13765
13766 tcg_op1 = tcg_temp_new_i32();
13767 tcg_op2 = tcg_temp_new_i32();
13768 tcg_op3 = tcg_temp_new_i32();
13769 tcg_res = tcg_temp_new_i32();
13770 tcg_zero = tcg_const_i32(0);
13771
13772 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13773 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13774 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13775
13776 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13777 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13778 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13779 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13780
13781 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13782 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13783 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13784 write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13785
13786 tcg_temp_free_i32(tcg_op1);
13787 tcg_temp_free_i32(tcg_op2);
13788 tcg_temp_free_i32(tcg_op3);
13789 tcg_temp_free_i32(tcg_res);
13790 tcg_temp_free_i32(tcg_zero);
13791 }
13792}
13793
13794/* Crypto XAR
13795 * 31 21 20 16 15 10 9 5 4 0
13796 * +-----------------------+------+--------+------+------+
13797 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
13798 * +-----------------------+------+--------+------+------+
13799 */
13800static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13801{
13802 int rm = extract32(insn, 16, 5);
13803 int imm6 = extract32(insn, 10, 6);
13804 int rn = extract32(insn, 5, 5);
13805 int rd = extract32(insn, 0, 5);
13806 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13807 int pass;
13808
13809 if (!dc_isar_feature(aa64_sha3, s)) {
13810 unallocated_encoding(s);
13811 return;
13812 }
13813
13814 if (!fp_access_check(s)) {
13815 return;
13816 }
13817
13818 tcg_op1 = tcg_temp_new_i64();
13819 tcg_op2 = tcg_temp_new_i64();
13820 tcg_res[0] = tcg_temp_new_i64();
13821 tcg_res[1] = tcg_temp_new_i64();
13822
13823 for (pass = 0; pass < 2; pass++) {
13824 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13825 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13826
13827 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13828 tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13829 }
13830 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13831 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13832
13833 tcg_temp_free_i64(tcg_op1);
13834 tcg_temp_free_i64(tcg_op2);
13835 tcg_temp_free_i64(tcg_res[0]);
13836 tcg_temp_free_i64(tcg_res[1]);
13837}
13838
13839/* Crypto three-reg imm2
13840 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13841 * +-----------------------+------+-----+------+--------+------+------+
13842 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
13843 * +-----------------------+------+-----+------+--------+------+------+
13844 */
13845static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13846{
13847 int opcode = extract32(insn, 10, 2);
13848 int imm2 = extract32(insn, 12, 2);
13849 int rm = extract32(insn, 16, 5);
13850 int rn = extract32(insn, 5, 5);
13851 int rd = extract32(insn, 0, 5);
13852 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13853 TCGv_i32 tcg_imm2, tcg_opcode;
13854
13855 if (!dc_isar_feature(aa64_sm3, s)) {
13856 unallocated_encoding(s);
13857 return;
13858 }
13859
13860 if (!fp_access_check(s)) {
13861 return;
13862 }
13863
13864 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13865 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13866 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13867 tcg_imm2 = tcg_const_i32(imm2);
13868 tcg_opcode = tcg_const_i32(opcode);
13869
13870 gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13871 tcg_opcode);
13872
13873 tcg_temp_free_ptr(tcg_rd_ptr);
13874 tcg_temp_free_ptr(tcg_rn_ptr);
13875 tcg_temp_free_ptr(tcg_rm_ptr);
13876 tcg_temp_free_i32(tcg_imm2);
13877 tcg_temp_free_i32(tcg_opcode);
13878}
13879
13880/* C3.6 Data processing - SIMD, inc Crypto
13881 *
13882 * As the decode gets a little complex we are using a table based
13883 * approach for this part of the decode.
13884 */
13885static const AArch64DecodeTable data_proc_simd[] = {
13886 /* pattern , mask , fn */
13887 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13888 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13889 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13890 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13891 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13892 { 0x0e000400, 0x9fe08400, disas_simd_copy },
13893 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13894 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13895 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13896 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13897 { 0x0e000000, 0xbf208c00, disas_simd_tb },
13898 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13899 { 0x2e000000, 0xbf208400, disas_simd_ext },
13900 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13901 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13902 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13903 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13904 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13905 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13906 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13907 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13908 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13909 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13910 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13911 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13912 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13913 { 0xce000000, 0xff808000, disas_crypto_four_reg },
13914 { 0xce800000, 0xffe00000, disas_crypto_xar },
13915 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13916 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13917 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13918 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13919 { 0x00000000, 0x00000000, NULL }
13920};
13921
13922static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13923{
13924 /* Note that this is called with all non-FP cases from
13925 * table C3-6 so it must UNDEF for entries not specifically
13926 * allocated to instructions in that table.
13927 */
13928 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13929 if (fn) {
13930 fn(s, insn);
13931 } else {
13932 unallocated_encoding(s);
13933 }
13934}
13935
13936/* C3.6 Data processing - SIMD and floating point */
13937static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13938{
13939 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13940 disas_data_proc_fp(s, insn);
13941 } else {
13942 /* SIMD, including crypto */
13943 disas_data_proc_simd(s, insn);
13944 }
13945}
13946
13947/**
13948 * is_guarded_page:
13949 * @env: The cpu environment
13950 * @s: The DisasContext
13951 *
13952 * Return true if the page is guarded.
13953 */
13954static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13955{
13956#ifdef CONFIG_USER_ONLY
13957 return false; /* FIXME */
13958#else
13959 uint64_t addr = s->base.pc_first;
13960 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13961 unsigned int index = tlb_index(env, mmu_idx, addr);
13962 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
13963
13964 /*
13965 * We test this immediately after reading an insn, which means
13966 * that any normal page must be in the TLB. The only exception
13967 * would be for executing from flash or device memory, which
13968 * does not retain the TLB entry.
13969 *
13970 * FIXME: Assume false for those, for now. We could use
13971 * arm_cpu_get_phys_page_attrs_debug to re-read the page
13972 * table entry even for that case.
13973 */
13974 return (tlb_hit(entry->addr_code, addr) &&
13975 env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
13976#endif
13977}
13978
13979/**
13980 * btype_destination_ok:
13981 * @insn: The instruction at the branch destination
13982 * @bt: SCTLR_ELx.BT
13983 * @btype: PSTATE.BTYPE, and is non-zero
13984 *
13985 * On a guarded page, there are a limited number of insns
13986 * that may be present at the branch target:
13987 * - branch target identifiers,
13988 * - paciasp, pacibsp,
13989 * - BRK insn
13990 * - HLT insn
13991 * Anything else causes a Branch Target Exception.
13992 *
13993 * Return true if the branch is compatible, false to raise BTITRAP.
13994 */
13995static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13996{
13997 if ((insn & 0xfffff01fu) == 0xd503201fu) {
13998 /* HINT space */
13999 switch (extract32(insn, 5, 7)) {
14000 case 0b011001: /* PACIASP */
14001 case 0b011011: /* PACIBSP */
14002 /*
14003 * If SCTLR_ELx.BT, then PACI*SP are not compatible
14004 * with btype == 3. Otherwise all btype are ok.
14005 */
14006 return !bt || btype != 3;
14007 case 0b100000: /* BTI */
14008 /* Not compatible with any btype. */
14009 return false;
14010 case 0b100010: /* BTI c */
14011 /* Not compatible with btype == 3 */
14012 return btype != 3;
14013 case 0b100100: /* BTI j */
14014 /* Not compatible with btype == 2 */
14015 return btype != 2;
14016 case 0b100110: /* BTI jc */
14017 /* Compatible with any btype. */
14018 return true;
14019 }
14020 } else {
14021 switch (insn & 0xffe0001fu) {
14022 case 0xd4200000u: /* BRK */
14023 case 0xd4400000u: /* HLT */
14024 /* Give priority to the breakpoint exception. */
14025 return true;
14026 }
14027 }
14028 return false;
14029}
14030
14031/* C3.1 A64 instruction index by encoding */
14032static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14033{
14034 uint32_t insn;
14035
14036 s->pc_curr = s->base.pc_next;
14037 insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14038 s->insn = insn;
14039 s->base.pc_next += 4;
14040
14041 s->fp_access_checked = false;
14042
14043 if (dc_isar_feature(aa64_bti, s)) {
14044 if (s->base.num_insns == 1) {
14045 /*
14046 * At the first insn of the TB, compute s->guarded_page.
14047 * We delayed computing this until successfully reading
14048 * the first insn of the TB, above. This (mostly) ensures
14049 * that the softmmu tlb entry has been populated, and the
14050 * page table GP bit is available.
14051 *
14052 * Note that we need to compute this even if btype == 0,
14053 * because this value is used for BR instructions later
14054 * where ENV is not available.
14055 */
14056 s->guarded_page = is_guarded_page(env, s);
14057
14058 /* First insn can have btype set to non-zero. */
14059 tcg_debug_assert(s->btype >= 0);
14060
14061 /*
14062 * Note that the Branch Target Exception has fairly high
14063 * priority -- below debugging exceptions but above most
14064 * everything else. This allows us to handle this now
14065 * instead of waiting until the insn is otherwise decoded.
14066 */
14067 if (s->btype != 0
14068 && s->guarded_page
14069 && !btype_destination_ok(insn, s->bt, s->btype)) {
14070 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14071 syn_btitrap(s->btype),
14072 default_exception_el(s));
14073 return;
14074 }
14075 } else {
14076 /* Not the first insn: btype must be 0. */
14077 tcg_debug_assert(s->btype == 0);
14078 }
14079 }
14080
14081 switch (extract32(insn, 25, 4)) {
14082 case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14083 unallocated_encoding(s);
14084 break;
14085 case 0x2:
14086 if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14087 unallocated_encoding(s);
14088 }
14089 break;
14090 case 0x8: case 0x9: /* Data processing - immediate */
14091 disas_data_proc_imm(s, insn);
14092 break;
14093 case 0xa: case 0xb: /* Branch, exception generation and system insns */
14094 disas_b_exc_sys(s, insn);
14095 break;
14096 case 0x4:
14097 case 0x6:
14098 case 0xc:
14099 case 0xe: /* Loads and stores */
14100 disas_ldst(s, insn);
14101 break;
14102 case 0x5:
14103 case 0xd: /* Data processing - register */
14104 disas_data_proc_reg(s, insn);
14105 break;
14106 case 0x7:
14107 case 0xf: /* Data processing - SIMD and floating point */
14108 disas_data_proc_simd_fp(s, insn);
14109 break;
14110 default:
14111 assert(FALSE); /* all 15 cases should be handled above */
14112 break;
14113 }
14114
14115 /* if we allocated any temporaries, free them here */
14116 free_tmp_a64(s);
14117
14118 /*
14119 * After execution of most insns, btype is reset to 0.
14120 * Note that we set btype == -1 when the insn sets btype.
14121 */
14122 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14123 reset_btype(s);
14124 }
14125}
14126
14127static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14128 CPUState *cpu)
14129{
14130 DisasContext *dc = container_of(dcbase, DisasContext, base);
14131 CPUARMState *env = cpu->env_ptr;
14132 ARMCPU *arm_cpu = env_archcpu(env);
14133 uint32_t tb_flags = dc->base.tb->flags;
14134 int bound, core_mmu_idx;
14135
14136 dc->isar = &arm_cpu->isar;
14137 dc->condjmp = 0;
14138
14139 dc->aarch64 = 1;
14140 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14141 * there is no secure EL1, so we route exceptions to EL3.
14142 */
14143 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14144 !arm_el_is_aa64(env, 3);
14145 dc->thumb = 0;
14146 dc->sctlr_b = 0;
14147 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14148 dc->condexec_mask = 0;
14149 dc->condexec_cond = 0;
14150 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14151 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
14152 dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14153 dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14154 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14155#if !defined(CONFIG_USER_ONLY)
14156 dc->user = (dc->current_el == 0);
14157#endif
14158 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14159 dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14160 dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14161 dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14162 dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14163 dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14164 dc->vec_len = 0;
14165 dc->vec_stride = 0;
14166 dc->cp_regs = arm_cpu->cp_regs;
14167 dc->features = env->features;
14168
14169 /* Single step state. The code-generation logic here is:
14170 * SS_ACTIVE == 0:
14171 * generate code with no special handling for single-stepping (except
14172 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14173 * this happens anyway because those changes are all system register or
14174 * PSTATE writes).
14175 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14176 * emit code for one insn
14177 * emit code to clear PSTATE.SS
14178 * emit code to generate software step exception for completed step
14179 * end TB (as usual for having generated an exception)
14180 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14181 * emit code to generate a software step exception
14182 * end the TB
14183 */
14184 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14185 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14186 dc->is_ldex = false;
14187 dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
14188
14189 /* Bound the number of insns to execute to those left on the page. */
14190 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14191
14192 /* If architectural single step active, limit to 1. */
14193 if (dc->ss_active) {
14194 bound = 1;
14195 }
14196 dc->base.max_insns = MIN(dc->base.max_insns, bound);
14197
14198 init_tmp_a64_array(dc);
14199}
14200
14201static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14202{
14203}
14204
14205static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14206{
14207 DisasContext *dc = container_of(dcbase, DisasContext, base);
14208
14209 tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14210 dc->insn_start = tcg_last_op();
14211}
14212
14213static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14214 const CPUBreakpoint *bp)
14215{
14216 DisasContext *dc = container_of(dcbase, DisasContext, base);
14217
14218 if (bp->flags & BP_CPU) {
14219 gen_a64_set_pc_im(dc->base.pc_next);
14220 gen_helper_check_breakpoints(cpu_env);
14221 /* End the TB early; it likely won't be executed */
14222 dc->base.is_jmp = DISAS_TOO_MANY;
14223 } else {
14224 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
14225 /* The address covered by the breakpoint must be
14226 included in [tb->pc, tb->pc + tb->size) in order
14227 to for it to be properly cleared -- thus we
14228 increment the PC here so that the logic setting
14229 tb->size below does the right thing. */
14230 dc->base.pc_next += 4;
14231 dc->base.is_jmp = DISAS_NORETURN;
14232 }
14233
14234 return true;
14235}
14236
14237static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14238{
14239 DisasContext *dc = container_of(dcbase, DisasContext, base);
14240 CPUARMState *env = cpu->env_ptr;
14241
14242 if (dc->ss_active && !dc->pstate_ss) {
14243 /* Singlestep state is Active-pending.
14244 * If we're in this state at the start of a TB then either
14245 * a) we just took an exception to an EL which is being debugged
14246 * and this is the first insn in the exception handler
14247 * b) debug exceptions were masked and we just unmasked them
14248 * without changing EL (eg by clearing PSTATE.D)
14249 * In either case we're going to take a swstep exception in the
14250 * "did not step an insn" case, and so the syndrome ISV and EX
14251 * bits should be zero.
14252 */
14253 assert(dc->base.num_insns == 1);
14254 gen_swstep_exception(dc, 0, 0);
14255 dc->base.is_jmp = DISAS_NORETURN;
14256 } else {
14257 disas_a64_insn(env, dc);
14258 }
14259
14260 translator_loop_temp_check(&dc->base);
14261}
14262
14263static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14264{
14265 DisasContext *dc = container_of(dcbase, DisasContext, base);
14266
14267 if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14268 /* Note that this means single stepping WFI doesn't halt the CPU.
14269 * For conditional branch insns this is harmless unreachable code as
14270 * gen_goto_tb() has already handled emitting the debug exception
14271 * (and thus a tb-jump is not possible when singlestepping).
14272 */
14273 switch (dc->base.is_jmp) {
14274 default:
14275 gen_a64_set_pc_im(dc->base.pc_next);
14276 /* fall through */
14277 case DISAS_EXIT:
14278 case DISAS_JUMP:
14279 if (dc->base.singlestep_enabled) {
14280 gen_exception_internal(EXCP_DEBUG);
14281 } else {
14282 gen_step_complete_exception(dc);
14283 }
14284 break;
14285 case DISAS_NORETURN:
14286 break;
14287 }
14288 } else {
14289 switch (dc->base.is_jmp) {
14290 case DISAS_NEXT:
14291 case DISAS_TOO_MANY:
14292 gen_goto_tb(dc, 1, dc->base.pc_next);
14293 break;
14294 default:
14295 case DISAS_UPDATE:
14296 gen_a64_set_pc_im(dc->base.pc_next);
14297 /* fall through */
14298 case DISAS_EXIT:
14299 tcg_gen_exit_tb(NULL, 0);
14300 break;
14301 case DISAS_JUMP:
14302 tcg_gen_lookup_and_goto_ptr();
14303 break;
14304 case DISAS_NORETURN:
14305 case DISAS_SWI:
14306 break;
14307 case DISAS_WFE:
14308 gen_a64_set_pc_im(dc->base.pc_next);
14309 gen_helper_wfe(cpu_env);
14310 break;
14311 case DISAS_YIELD:
14312 gen_a64_set_pc_im(dc->base.pc_next);
14313 gen_helper_yield(cpu_env);
14314 break;
14315 case DISAS_WFI:
14316 {
14317 /* This is a special case because we don't want to just halt the CPU
14318 * if trying to debug across a WFI.
14319 */
14320 TCGv_i32 tmp = tcg_const_i32(4);
14321
14322 gen_a64_set_pc_im(dc->base.pc_next);
14323 gen_helper_wfi(cpu_env, tmp);
14324 tcg_temp_free_i32(tmp);
14325 /* The helper doesn't necessarily throw an exception, but we
14326 * must go back to the main loop to check for interrupts anyway.
14327 */
14328 tcg_gen_exit_tb(NULL, 0);
14329 break;
14330 }
14331 }
14332 }
14333}
14334
14335static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14336 CPUState *cpu)
14337{
14338 DisasContext *dc = container_of(dcbase, DisasContext, base);
14339
14340 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14341 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14342}
14343
14344const TranslatorOps aarch64_translator_ops = {
14345 .init_disas_context = aarch64_tr_init_disas_context,
14346 .tb_start = aarch64_tr_tb_start,
14347 .insn_start = aarch64_tr_insn_start,
14348 .breakpoint_check = aarch64_tr_breakpoint_check,
14349 .translate_insn = aarch64_tr_translate_insn,
14350 .tb_stop = aarch64_tr_tb_stop,
14351 .disas_log = aarch64_tr_disas_log,
14352};
14353